bio-polyploid-tools 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +16 -0
- data/Gemfile.lock +67 -0
- data/README +21 -0
- data/Rakefile +61 -0
- data/VERSION +1 -0
- data/bin/bfr.rb +133 -0
- data/bin/count_variations.rb +36 -0
- data/bin/filter_blat_by_target_coverage.rb +15 -0
- data/bin/find_best_blat_hit.rb +32 -0
- data/bin/hexaploid_primers.rb +168 -0
- data/bin/homokaryot_primers.rb +155 -0
- data/bin/map_markers_to_contigs.rb +66 -0
- data/bin/markers_in_region.rb +42 -0
- data/bin/polymarker.rb +219 -0
- data/bin/snps_between_bams.rb +106 -0
- data/bio-polyploid-tools.gemspec +139 -0
- data/conf/defaults.rb +1 -0
- data/conf/primer3_config/dangle.dh +128 -0
- data/conf/primer3_config/dangle.ds +128 -0
- data/conf/primer3_config/interpretations/dangle_i.dh +131 -0
- data/conf/primer3_config/interpretations/dangle_i.ds +131 -0
- data/conf/primer3_config/interpretations/loops_i.dh +34 -0
- data/conf/primer3_config/interpretations/loops_i.ds +31 -0
- data/conf/primer3_config/interpretations/stack_i.dh +257 -0
- data/conf/primer3_config/interpretations/stack_i.ds +256 -0
- data/conf/primer3_config/interpretations/stackmm_i_mm.dh +257 -0
- data/conf/primer3_config/interpretations/stackmm_i_mm.ds +256 -0
- data/conf/primer3_config/interpretations/tetraloop_i.dh +79 -0
- data/conf/primer3_config/interpretations/tetraloop_i.ds +81 -0
- data/conf/primer3_config/interpretations/triloop_i.dh +21 -0
- data/conf/primer3_config/interpretations/triloop_i.ds +18 -0
- data/conf/primer3_config/interpretations/tstack2_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack2_i.ds +256 -0
- data/conf/primer3_config/interpretations/tstack_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack_i.ds +256 -0
- data/conf/primer3_config/interpretations/tstack_tm_inf_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack_tm_inf_i.ds +256 -0
- data/conf/primer3_config/loops.dh +30 -0
- data/conf/primer3_config/loops.ds +30 -0
- data/conf/primer3_config/stack.dh +256 -0
- data/conf/primer3_config/stack.ds +256 -0
- data/conf/primer3_config/stackmm.dh +256 -0
- data/conf/primer3_config/stackmm.ds +256 -0
- data/conf/primer3_config/tetraloop.dh +77 -0
- data/conf/primer3_config/tetraloop.ds +77 -0
- data/conf/primer3_config/triloop.dh +16 -0
- data/conf/primer3_config/triloop.ds +16 -0
- data/conf/primer3_config/tstack.dh +256 -0
- data/conf/primer3_config/tstack2.dh +256 -0
- data/conf/primer3_config/tstack2.ds +256 -0
- data/conf/primer3_config/tstack_tm_inf.ds +256 -0
- data/lib/bio/BFRTools.rb +698 -0
- data/lib/bio/BIOExtensions.rb +186 -0
- data/lib/bio/PolyploidTools/ChromosomeArm.rb +52 -0
- data/lib/bio/PolyploidTools/ExonContainer.rb +194 -0
- data/lib/bio/PolyploidTools/Marker.rb +175 -0
- data/lib/bio/PolyploidTools/PrimerRegion.rb +22 -0
- data/lib/bio/PolyploidTools/SNP.rb +681 -0
- data/lib/bio/PolyploidTools/SNPSequence.rb +56 -0
- data/lib/bio/SAMToolsExtensions.rb +284 -0
- data/lib/bio/db/exonerate.rb +272 -0
- data/lib/bio/db/fastadb.rb +164 -0
- data/lib/bio/db/primer3.rb +673 -0
- data/lib/bioruby-polyploid-tools.rb +25 -0
- data/test/data/BS00068396_51.fa +2 -0
- data/test/data/BS00068396_51_contigs.aln +1412 -0
- data/test/data/BS00068396_51_contigs.dnd +7 -0
- data/test/data/BS00068396_51_contigs.fa +8 -0
- data/test/data/BS00068396_51_exonerate.tab +6 -0
- data/test/data/BS00068396_51_genes.txt +14 -0
- data/test/data/LIB1716.bam +0 -0
- data/test/data/LIB1716.bam.bai +0 -0
- data/test/data/LIB1719.bam +0 -0
- data/test/data/LIB1719.bam.bai +0 -0
- data/test/data/LIB1721.bam +0 -0
- data/test/data/LIB1721.bam.bai +0 -0
- data/test/data/LIB1722.bam +0 -0
- data/test/data/LIB1722.bam.bai +0 -0
- data/test/data/S22380157.fa +16 -0
- data/test/data/S22380157.fa.fai +1 -0
- data/test/data/Test3Aspecific.csv +1 -0
- data/test/data/Test3Aspecific_contigs.fa +6 -0
- data/test/data/patological_cases5D.csv +1 -0
- data/test/data/short_primer_design_test.csv +10 -0
- data/test/data/test_primer3_error.csv +4 -0
- data/test/data/test_primer3_error_contigs.fa +10 -0
- data/test/test_bfr.rb +51 -0
- data/test/test_exon_container.rb +17 -0
- data/test/test_exonearate.rb +53 -0
- data/test/test_snp_parsing.rb +40 -0
- metadata +201 -0
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
#Module to hold the information about the fasta file
|
|
2
|
+
|
|
3
|
+
module Bio::DB::Fasta
|
|
4
|
+
class Index
|
|
5
|
+
include Enumerable
|
|
6
|
+
attr_reader :entries
|
|
7
|
+
|
|
8
|
+
def initialize
|
|
9
|
+
@entries=[]
|
|
10
|
+
@entries_map = Hash.new
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
#This doesnt validate if you are adding the same entry twice. I may add
|
|
14
|
+
#a validation for that.
|
|
15
|
+
def << (entry)
|
|
16
|
+
@entries << entry
|
|
17
|
+
@entries_map[entry.id] = entry
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def each(&block)
|
|
21
|
+
@entries.entries(&block)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def length
|
|
25
|
+
@entries.length
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
#Returns a new Index just with the specified range, as if it was an Array.
|
|
29
|
+
#The return object is of type Index.
|
|
30
|
+
def [](args)
|
|
31
|
+
tmp = @entries[args]
|
|
32
|
+
new_index = Index.new
|
|
33
|
+
tmp.each do | entry |
|
|
34
|
+
@new_index << entry
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def region_for_entry(entry)
|
|
39
|
+
@entries_map[entry]
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
class Entry
|
|
44
|
+
attr_reader :id, :length
|
|
45
|
+
|
|
46
|
+
def initialize(id, length)
|
|
47
|
+
@id=id
|
|
48
|
+
@length=length.to_i
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def get_full_region
|
|
52
|
+
reg = Region.new
|
|
53
|
+
reg.entry = id
|
|
54
|
+
reg.start = 0
|
|
55
|
+
reg.end = @length
|
|
56
|
+
reg.orientation = :forward
|
|
57
|
+
reg
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def to_region
|
|
61
|
+
get_full_region
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
#Class to wrap a region of a chromosome
|
|
66
|
+
class Region
|
|
67
|
+
attr_accessor :entry, :start, :end, :orientation
|
|
68
|
+
|
|
69
|
+
def to_s
|
|
70
|
+
string = @entry + ":" + @start.to_s + "-" + @end.to_s
|
|
71
|
+
string
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def self.parse_region(reg_str)
|
|
75
|
+
string = reg_str.delete("'")
|
|
76
|
+
fields_1 = string.split(":")
|
|
77
|
+
fields_2 = fields_1[1].split("-")
|
|
78
|
+
raise FastaDBException.new(), "Invalid region. #{string}" if fields_1.length != 2 || fields_2.length != 2
|
|
79
|
+
|
|
80
|
+
reg = Region.new
|
|
81
|
+
reg.entry = fields_1[0]
|
|
82
|
+
reg.start = fields_2[0].to_i
|
|
83
|
+
reg.end = fields_2[1].to_i
|
|
84
|
+
|
|
85
|
+
if reg.end < reg.start
|
|
86
|
+
reg.orientation = :reverse
|
|
87
|
+
else
|
|
88
|
+
reg.orientation = :forward
|
|
89
|
+
end
|
|
90
|
+
reg
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def size
|
|
94
|
+
@end - @start
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
class FastaDBException < StandardError; end
|
|
100
|
+
|
|
101
|
+
#Class that holds the fasta file. It is used as a database. It heavily relies ond samtools.
|
|
102
|
+
class FastaFile
|
|
103
|
+
|
|
104
|
+
attr_reader :index, :fasta_path
|
|
105
|
+
|
|
106
|
+
def FastaFile.finalize(id)
|
|
107
|
+
#id.close()
|
|
108
|
+
#puts "Finalizing #{id} at #{Time.new}"
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def initialize(fasta_filename)
|
|
112
|
+
@fasta_path = fasta_filename
|
|
113
|
+
raise FastaDBException.new(), "No path for the refernce fasta file. " if @fasta_path.nil?
|
|
114
|
+
@fasta_index = Bio::DB::SAM::Tools.fai_load(@fasta_path)
|
|
115
|
+
if @fasta_index.null? then
|
|
116
|
+
$stderr.puts "Generating index for: " + @fasta_path
|
|
117
|
+
Bio::DB::SAM::Tools.fai_build(@fasta_path)
|
|
118
|
+
@fasta_index = Bio::DB::SAM::Tools.fai_load(@fasta_path)
|
|
119
|
+
raise FastaDBException.new(), "Unable to generate fasta index for: " + @fasta_path if @fasta_index.nil? || @fasta_index.null?
|
|
120
|
+
end
|
|
121
|
+
ObjectSpace.define_finalizer(self, self.class.method(:finalize).to_proc)
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def load_fai_entries()
|
|
125
|
+
return @index.length if @index
|
|
126
|
+
@index = Index.new
|
|
127
|
+
fai_file = @fasta_path + ".fai"
|
|
128
|
+
File.open(fai_file).each do | line |
|
|
129
|
+
fields = line.split("\t")
|
|
130
|
+
@index << Entry.new(fields[0], fields[1])
|
|
131
|
+
|
|
132
|
+
end
|
|
133
|
+
@index.length
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def close()
|
|
137
|
+
Bio::DB::SAM::Tools.fai_destroy(@fasta_index) unless @fasta_index.nil? || @fasta_index.null?
|
|
138
|
+
@fasta_index = nil
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
#The region needs to have a method to_region or a method to_s that ha the format "chromosome:start-end" as in samtools
|
|
142
|
+
def fetch_sequence(region)
|
|
143
|
+
|
|
144
|
+
raise FastaDBException.new(), "No fasta index for " if @fasta_index.nil? || @fasta_index.null?
|
|
145
|
+
query = region.to_s
|
|
146
|
+
query = region.to_region.to_s if region.respond_to?(:to_region)
|
|
147
|
+
|
|
148
|
+
len = FFI::MemoryPointer.new :int
|
|
149
|
+
str = Bio::DB::SAM::Tools.fai_fetch(@fasta_index, query, len)
|
|
150
|
+
raise FastaDBException.new(), "Unable to get sequence for reference: " + query if str.nil?
|
|
151
|
+
reference = Bio::Sequence.auto(str)
|
|
152
|
+
|
|
153
|
+
#
|
|
154
|
+
|
|
155
|
+
if region.orientation == :reverse
|
|
156
|
+
#puts "reversing! #{reference.to_s}"
|
|
157
|
+
reference.reverse_complement!()
|
|
158
|
+
end
|
|
159
|
+
reference
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
end
|
|
164
|
+
end
|
|
@@ -0,0 +1,673 @@
|
|
|
1
|
+
|
|
2
|
+
module Bio::DB::Primer3
|
|
3
|
+
class Primer3Exception < RuntimeError
|
|
4
|
+
end
|
|
5
|
+
|
|
6
|
+
def self.run(opts={})
|
|
7
|
+
puts "Primer3.run running..."
|
|
8
|
+
|
|
9
|
+
f_in=opts[:in]
|
|
10
|
+
f_out=opts[:out]
|
|
11
|
+
primer_3_in = File.read(f_in)
|
|
12
|
+
status = systemu "primer3_core", 0=>primer_3_in, 1=>stdout='', 2=>stderr=''
|
|
13
|
+
# $stderr.puts cmdline
|
|
14
|
+
if status.exitstatus == 0
|
|
15
|
+
File.open(f_out, 'w') { |f| f.write(stdout) }
|
|
16
|
+
else
|
|
17
|
+
raise Primer3Exception.new(), "Error running primer3. Command line was 'primer3_core'\nPrimer3 STDERR was:\n#{stderr}"
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
class SNP
|
|
22
|
+
|
|
23
|
+
attr_accessor :gene, :original, :position, :snp, :chromosome, :line_1, :line_2
|
|
24
|
+
attr_accessor :primer3_line_1, :primer3_line_2, :template_length
|
|
25
|
+
attr_accessor :primers_line_1, :primers_line_2
|
|
26
|
+
attr_accessor :used_contigs
|
|
27
|
+
attr_accessor :snp_from
|
|
28
|
+
attr_accessor :regions
|
|
29
|
+
attr_accessor :primer3_errors
|
|
30
|
+
|
|
31
|
+
def line_1_name
|
|
32
|
+
"#{gene}:#{position}#{original}>#{snp} #{line_1}}"
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def initialize
|
|
36
|
+
@primers_line_1 = SortedSet.new
|
|
37
|
+
@primers_line_2 = SortedSet.new
|
|
38
|
+
@reguibs = SortedSet.new
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def line_2_name
|
|
42
|
+
"#{gene}:#{position}#{original}>#{snp} #{line_2}}"
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def to_s
|
|
46
|
+
"#{gene}:#{original}#{position}#{snp}:#{snp_from.chromosome}"
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def find_left_primer_temp(primer)
|
|
50
|
+
primers_line_1.each do |pr|
|
|
51
|
+
return pr.find_left_tm(primer) if pr.find_left_tm(primer)
|
|
52
|
+
end
|
|
53
|
+
primers_line_2.each do |pr|
|
|
54
|
+
return pr.find_left_tm(primer) if pr.find_left_tm(primer)
|
|
55
|
+
end
|
|
56
|
+
return "NA"
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def find_primer_pair_first
|
|
61
|
+
primers_line_1.each do |pr|
|
|
62
|
+
primer = pr.left_primer_snp(self)
|
|
63
|
+
return pr if find_left_primer_temp(primer) != "NA"
|
|
64
|
+
end
|
|
65
|
+
nil
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def find_primer_pair_second
|
|
69
|
+
primers_line_2.each do |pr|
|
|
70
|
+
primer = pr.left_primer_snp(self)
|
|
71
|
+
return pr if find_left_primer_temp(primer) != "NA"
|
|
72
|
+
end
|
|
73
|
+
nil
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def print_primers
|
|
78
|
+
#TODO: Retrieve error messages
|
|
79
|
+
left_start = 0
|
|
80
|
+
left_end = 0
|
|
81
|
+
right_start = 0
|
|
82
|
+
right_end = 0
|
|
83
|
+
# exons = snp_from.exon_list.values
|
|
84
|
+
|
|
85
|
+
# puts "Exons: #{exon_list.size}"
|
|
86
|
+
|
|
87
|
+
# puts "It has the following exons: #{snp_in.exon_list.to_s}"
|
|
88
|
+
values = Array.new
|
|
89
|
+
#values << "#{gene},,#{template_length},"
|
|
90
|
+
values << gene
|
|
91
|
+
values << "#{original}#{position}#{snp}"
|
|
92
|
+
values << template_length
|
|
93
|
+
values << snp_from.chromosome
|
|
94
|
+
values << regions.size
|
|
95
|
+
values << regions.join("|")
|
|
96
|
+
if primer3_line_1 and primer3_line_2
|
|
97
|
+
values << primer3_line_1.polymorphism
|
|
98
|
+
|
|
99
|
+
#Block that searches both if both pairs have a TM
|
|
100
|
+
primer_2 = primer3_line_2.left_primer_with_coordinates(primer3_line_1.left_coordinates, primer3_line_1.orientation)
|
|
101
|
+
primer_2_tm = find_left_primer_temp(primer_2)
|
|
102
|
+
primer_1 = primer3_line_1.left_primer_with_coordinates(primer3_line_2.left_coordinates, primer3_line_2.orientation)
|
|
103
|
+
primer_1_tm = find_left_primer_temp(primer_1)
|
|
104
|
+
# $stderr.puts primer_1
|
|
105
|
+
# $stderr.puts primer_2
|
|
106
|
+
if primer3_line_1 < primer3_line_2 and primer_2_tm != "NA"
|
|
107
|
+
values << primer3_line_1.left_primer
|
|
108
|
+
values << primer_2
|
|
109
|
+
values << primer3_line_1.right_primer
|
|
110
|
+
values << primer3_line_1.type.to_s
|
|
111
|
+
values << primer3_line_1.orientation.to_s
|
|
112
|
+
values << primer3_line_1.shortest_pair.left.tm
|
|
113
|
+
values << primer_2_tm
|
|
114
|
+
values << primer3_line_1.shortest_pair.right.tm
|
|
115
|
+
values << "first"
|
|
116
|
+
values << primer3_line_1.shortest_pair.product_size
|
|
117
|
+
elsif primer_1_tm != "NA"
|
|
118
|
+
values << primer_1
|
|
119
|
+
values << primer3_line_2.left_primer
|
|
120
|
+
values << primer3_line_2.right_primer
|
|
121
|
+
values << primer3_line_2.type.to_s
|
|
122
|
+
values << primer3_line_2.orientation.to_s
|
|
123
|
+
values << primer_1_tm
|
|
124
|
+
values << primer3_line_2.shortest_pair.left.tm
|
|
125
|
+
values << primer3_line_2.shortest_pair.right.tm
|
|
126
|
+
values << "second"
|
|
127
|
+
values << primer3_line_2.shortest_pair.product_size
|
|
128
|
+
else
|
|
129
|
+
first_candidate = find_primer_pair_first
|
|
130
|
+
second_candidate = find_primer_pair_second
|
|
131
|
+
|
|
132
|
+
if first_candidate
|
|
133
|
+
primer_2 = primer3_line_2.left_primer_with_coordinates(first_candidate.left_coordinates, first_candidate.orientation)
|
|
134
|
+
primer_2_tm = find_left_primer_temp(primer_2)
|
|
135
|
+
end
|
|
136
|
+
if second_candidate
|
|
137
|
+
primer_1 = primer3_line_1.left_primer_with_coordinates(second_candidate.left_coordinates, second_candidate.orientation)
|
|
138
|
+
primer_1_tm = find_left_primer_temp(primer_1)
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
if first_candidate and second_candidate and first_candidate < second_candidate
|
|
142
|
+
values << first_candidate.left_primer
|
|
143
|
+
values << primer_2
|
|
144
|
+
values << first_candidate.right_primer
|
|
145
|
+
values << first_candidate.type.to_s
|
|
146
|
+
values << first_candidate.orientation.to_s
|
|
147
|
+
values << first_candidate.shortest_pair.left.tm
|
|
148
|
+
values << primer_2_tm
|
|
149
|
+
values << first_candidate.shortest_pair.right.tm
|
|
150
|
+
values << "first"
|
|
151
|
+
values << first_candidate.shortest_pair.product_size
|
|
152
|
+
elsif second_candidate
|
|
153
|
+
values << primer_1
|
|
154
|
+
values << second_candidate.left_primer
|
|
155
|
+
values << second_candidate.right_primer
|
|
156
|
+
values << second_candidate.type.to_s
|
|
157
|
+
values << second_candidate.orientation.to_s
|
|
158
|
+
values << primer_1_tm
|
|
159
|
+
values << second_candidate.shortest_pair.left.tm
|
|
160
|
+
values << second_candidate.shortest_pair.right.tm
|
|
161
|
+
values << "second"
|
|
162
|
+
values << second_candidate.shortest_pair.product_size
|
|
163
|
+
elsif first_candidate
|
|
164
|
+
values << primer_2
|
|
165
|
+
values << first_candidate.left_primer
|
|
166
|
+
values << first_candidate.right_primer
|
|
167
|
+
values << first_candidate.type.to_s
|
|
168
|
+
values << first_candidate.orientation.to_s
|
|
169
|
+
values << primer_2_tm
|
|
170
|
+
values << first_candidate.shortest_pair.left.tm
|
|
171
|
+
values << first_candidate.shortest_pair.right.tm
|
|
172
|
+
values << "first"
|
|
173
|
+
values << first_candidate.shortest_pair.product_size
|
|
174
|
+
# else
|
|
175
|
+
# values << ""
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
elsif primer3_line_1
|
|
181
|
+
values << primer3_line_1.polymorphism
|
|
182
|
+
values << primer3_line_1.left_primer
|
|
183
|
+
values << primer3_line_1.left_primer_snp(self)
|
|
184
|
+
values << primer3_line_1.right_primer
|
|
185
|
+
values << primer3_line_1.type.to_s
|
|
186
|
+
values << primer3_line_1.orientation.to_s
|
|
187
|
+
values << primer3_line_1.shortest_pair.left.tm
|
|
188
|
+
values << "NA"
|
|
189
|
+
values << primer3_line_1.shortest_pair.right.tm
|
|
190
|
+
|
|
191
|
+
values << "first+"
|
|
192
|
+
values << primer3_line_1.shortest_pair.product_size
|
|
193
|
+
elsif primer3_line_2
|
|
194
|
+
values << primer3_line_2.polymorphism
|
|
195
|
+
values << primer3_line_2.left_primer_snp(self)
|
|
196
|
+
values << primer3_line_2.left_primer
|
|
197
|
+
values << primer3_line_2.right_primer
|
|
198
|
+
values << primer3_line_2.type.to_s
|
|
199
|
+
values << primer3_line_2.orientation.to_s
|
|
200
|
+
values << "NA"
|
|
201
|
+
values << primer3_line_2.shortest_pair.left.tm
|
|
202
|
+
values << primer3_line_2.shortest_pair.right.tm
|
|
203
|
+
values << "second+"
|
|
204
|
+
values << primer3_line_2.shortest_pair.product_size
|
|
205
|
+
|
|
206
|
+
end
|
|
207
|
+
values.join(",")
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
def self.parse(reg_str)
|
|
211
|
+
reg_str.chomp!
|
|
212
|
+
snp = SNP.new
|
|
213
|
+
snp.gene, snp.original, snp.position, snp.snp = reg_str.split(",")
|
|
214
|
+
snp.position = snp.position.to_i
|
|
215
|
+
snp.original.upcase!
|
|
216
|
+
snp.snp.upcase!
|
|
217
|
+
snp
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
def self.parse_file(filename)
|
|
221
|
+
File.open(filename) do | f |
|
|
222
|
+
f.each_line do | line |
|
|
223
|
+
snp = SNP.parse(line)
|
|
224
|
+
if snp.position > 0
|
|
225
|
+
yield snp
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
end
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def add_record(primer3record)
|
|
233
|
+
@primer3_errors = Array.new unless @primer3_errors
|
|
234
|
+
@template_length = primer3record.sequence_template.size
|
|
235
|
+
if primer3record.primer_error != nil
|
|
236
|
+
primer3_errors << primer3record
|
|
237
|
+
return
|
|
238
|
+
end
|
|
239
|
+
case
|
|
240
|
+
when primer3record.line == @line_1
|
|
241
|
+
@line_1_template = primer3record.sequence_template
|
|
242
|
+
when primer3record.line == @line_2
|
|
243
|
+
@line_2_template = primer3record.sequence_template
|
|
244
|
+
else
|
|
245
|
+
raise Primer3Exception.new "#{primer3record.line} is not recognized (#{line_1}, #{line_2})"
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
if primer3record.primer_left_num_returned.to_i > 0
|
|
249
|
+
case
|
|
250
|
+
when primer3record.line == @line_1
|
|
251
|
+
primers_line_1 << primer3record
|
|
252
|
+
@primer3_line_1 = primer3record if not @primer3_line_1 or @primer3_line_1 > primer3record
|
|
253
|
+
when primer3record.line == @line_2
|
|
254
|
+
primers_line_1 << primer3record
|
|
255
|
+
@primer3_line_2 = primer3record if not @primer3_line_2 or @primer3_line_2 > primer3record
|
|
256
|
+
else
|
|
257
|
+
raise Primer3Exception.new "#{primer3record.line} is not recognized (#{line_1}, #{line_2})"
|
|
258
|
+
end
|
|
259
|
+
end
|
|
260
|
+
end
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
class Primer3Record
|
|
264
|
+
include Comparable
|
|
265
|
+
attr_accessor :properties, :polymorphism
|
|
266
|
+
|
|
267
|
+
def shortest_pair
|
|
268
|
+
return @shortest_pair if @shortest_pair
|
|
269
|
+
@shortest_pair = nil
|
|
270
|
+
@primerPairs.each do | primer |
|
|
271
|
+
@shortest_pair = primer if @shortest_pair == nil
|
|
272
|
+
@shortest_pair = primer if primer.size < @shortest_pair.size
|
|
273
|
+
end
|
|
274
|
+
@shortest_pair
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
def primer_error
|
|
278
|
+
return @properties[:primer_error] if @properties[:primer_error]
|
|
279
|
+
return nil
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
def method_missing(method_name, *args)
|
|
283
|
+
return @properties[method_name] if @properties[method_name]
|
|
284
|
+
$stderr.puts "Missing #{method_name}"
|
|
285
|
+
$stderr.puts @properties.inspect
|
|
286
|
+
raise NoMethodError.new()
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
def find_left_tm(primer)
|
|
290
|
+
last = size - 1
|
|
291
|
+
(0..last).each do | i |
|
|
292
|
+
seq_prop = "primer_left_#{i}_sequence".to_sym
|
|
293
|
+
# $stderr.puts seq_prop
|
|
294
|
+
temp_property = "primer_left_#{i}_tm".to_sym
|
|
295
|
+
# $stderr.puts "comparing #{@properties[seq_prop] } == #{primer}"
|
|
296
|
+
return @properties[temp_property] if @properties[seq_prop] == primer
|
|
297
|
+
|
|
298
|
+
end
|
|
299
|
+
return nil
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
def <=>(anOther)
|
|
303
|
+
ret = snp <=> anOther.snp
|
|
304
|
+
return ret if ret != 0
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
#Sorting by the types.
|
|
308
|
+
if type == :chromosome_specific
|
|
309
|
+
if anOther.type != :chromosome_specific
|
|
310
|
+
return -1
|
|
311
|
+
end
|
|
312
|
+
elsif type == :chromosome_semispecific
|
|
313
|
+
if anOther.type == :chromosome_specific
|
|
314
|
+
return 1
|
|
315
|
+
else anOther.type == :chromosome_nonspecific
|
|
316
|
+
return -1
|
|
317
|
+
end
|
|
318
|
+
elsif type == :chromosome_nonspecific
|
|
319
|
+
if anOther.type != :chromosome_nonspecific
|
|
320
|
+
return 1
|
|
321
|
+
end
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
#Sorting if it is in intron or not This will give priority
|
|
325
|
+
#to the cases when we know for sure the sequence from the line
|
|
326
|
+
#and reduce the chances of getting messed with a short indel
|
|
327
|
+
if self.exon?
|
|
328
|
+
unless anOther.exon?
|
|
329
|
+
return -1
|
|
330
|
+
end
|
|
331
|
+
else
|
|
332
|
+
if anOther.exon?
|
|
333
|
+
return 1
|
|
334
|
+
end
|
|
335
|
+
end
|
|
336
|
+
|
|
337
|
+
#Sorting for how long the product is, the shorter, the better
|
|
338
|
+
return product_length <=> anOther.product_length
|
|
339
|
+
|
|
340
|
+
end
|
|
341
|
+
|
|
342
|
+
def parse_coordinates(str)
|
|
343
|
+
coords = str.split(',')
|
|
344
|
+
coords[0] = coords[0].to_i
|
|
345
|
+
coords[1] = coords[1].to_i
|
|
346
|
+
coords
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def left_coordinates
|
|
351
|
+
#@left_coordinates = parse_coordinates(self.primer_left_0) unless @left_coordinates
|
|
352
|
+
@left_coordinates = shortest_pair.left.coordinates
|
|
353
|
+
@left_coordinates
|
|
354
|
+
end
|
|
355
|
+
|
|
356
|
+
def right_coordinates
|
|
357
|
+
unless @right_coordinates
|
|
358
|
+
@right_coordinates = shortest_pair.right.coordinates
|
|
359
|
+
@right_coordinates[0] = @right_coordinates[0] - @right_coordinates[1] + 1
|
|
360
|
+
end
|
|
361
|
+
@right_coordinates
|
|
362
|
+
end
|
|
363
|
+
|
|
364
|
+
def left_primer
|
|
365
|
+
#@left_primer = self.sequence_template[left_coordinates[0],left_coordinates[1]] unless @left_primer
|
|
366
|
+
@left_primer = shortest_pair.left.sequence
|
|
367
|
+
@left_primer
|
|
368
|
+
end
|
|
369
|
+
|
|
370
|
+
def left_primer_snp(snp)
|
|
371
|
+
tmp_primer = String.new(left_primer)
|
|
372
|
+
if self.orientation == :forward
|
|
373
|
+
base_original = snp.original
|
|
374
|
+
base_snp = snp.snp
|
|
375
|
+
elsif self.orientation == :reverse
|
|
376
|
+
base_original = reverse_complement_string(snp.original )
|
|
377
|
+
base_snp = reverse_complement_string(snp.snp)
|
|
378
|
+
else
|
|
379
|
+
raise Primer3Exception.new "#{self.orientation} is not a valid orientation"
|
|
380
|
+
end
|
|
381
|
+
|
|
382
|
+
# puts "#{snp.to_s} #{self.orientation} #{tmp_primer[-1] } #{base_original} #{base_snp}"
|
|
383
|
+
if tmp_primer[-1] == base_original
|
|
384
|
+
tmp_primer[-1] = base_snp
|
|
385
|
+
elsif tmp_primer[-1] == base_snp
|
|
386
|
+
tmp_primer[-1] = base_original
|
|
387
|
+
else
|
|
388
|
+
raise Primer3Exception.new "#{tmp_primer} doesnt end in a base in the SNP #{snp.to_s}"
|
|
389
|
+
end
|
|
390
|
+
return tmp_primer
|
|
391
|
+
end
|
|
392
|
+
|
|
393
|
+
def left_primer_with_coordinates(coordinates, other_orientation)
|
|
394
|
+
|
|
395
|
+
seq = self.sequence_template
|
|
396
|
+
|
|
397
|
+
seq = reverse_complement_string(seq) if self.orientation != other_orientation
|
|
398
|
+
|
|
399
|
+
seq[coordinates[0],coordinates[1]]
|
|
400
|
+
end
|
|
401
|
+
|
|
402
|
+
def reverse_complement_string(sequenc_str)
|
|
403
|
+
complement = sequenc_str.tr('atgcrymkdhvbswnATGCRYMKDHVBSWN', 'tacgyrkmhdbvswnTACGYRKMHDBVSWN')
|
|
404
|
+
complement.reverse!
|
|
405
|
+
end
|
|
406
|
+
|
|
407
|
+
def right_primer_delete
|
|
408
|
+
@right_primer = self.sequence_template[right_coordinates[0],right_coordinates[1]] unless @right_primer
|
|
409
|
+
@right_primer = reverse_complement_string(@right_primer)
|
|
410
|
+
@right_primer
|
|
411
|
+
end
|
|
412
|
+
|
|
413
|
+
def right_primer
|
|
414
|
+
return shortest_pair.right.sequence
|
|
415
|
+
end
|
|
416
|
+
|
|
417
|
+
def product_length
|
|
418
|
+
return shortest_pair.size
|
|
419
|
+
end
|
|
420
|
+
|
|
421
|
+
def initialize
|
|
422
|
+
@properties = Hash.new
|
|
423
|
+
end
|
|
424
|
+
|
|
425
|
+
def snp
|
|
426
|
+
return @snp if @snp
|
|
427
|
+
parse_header
|
|
428
|
+
@snp
|
|
429
|
+
end
|
|
430
|
+
|
|
431
|
+
#CL3339Contig1:T509C AvocetS chromosome_specific exon 4D forward
|
|
432
|
+
def parse_header
|
|
433
|
+
#puts "Parsing header: '#{self.sequence_id}'"
|
|
434
|
+
@snp, @line, @type, @in, @polymorphism, @chromosome, @orientation = self.sequence_id.split(" ")
|
|
435
|
+
@type = @type.to_sym
|
|
436
|
+
if @in
|
|
437
|
+
@in = @in.to_sym == :exon
|
|
438
|
+
else
|
|
439
|
+
@exon = false
|
|
440
|
+
end
|
|
441
|
+
|
|
442
|
+
if @polymorphism.to_sym == :homeologous
|
|
443
|
+
@homeologous = true
|
|
444
|
+
else
|
|
445
|
+
@homeologous = false
|
|
446
|
+
end
|
|
447
|
+
@parsed = true
|
|
448
|
+
@orientation = @orientation.to_sym
|
|
449
|
+
end
|
|
450
|
+
|
|
451
|
+
def orientation
|
|
452
|
+
return @orientation if @parsed
|
|
453
|
+
parse_header
|
|
454
|
+
@orientation
|
|
455
|
+
end
|
|
456
|
+
|
|
457
|
+
def chromosome
|
|
458
|
+
return @chromosome if @parsed
|
|
459
|
+
parse_header
|
|
460
|
+
@chromosome
|
|
461
|
+
end
|
|
462
|
+
|
|
463
|
+
def homeologous?
|
|
464
|
+
return @homeologous if @parsed
|
|
465
|
+
parse_header
|
|
466
|
+
@homeologous
|
|
467
|
+
end
|
|
468
|
+
|
|
469
|
+
def type
|
|
470
|
+
return @type if @parsed
|
|
471
|
+
parse_header
|
|
472
|
+
@type
|
|
473
|
+
end
|
|
474
|
+
|
|
475
|
+
def exon?
|
|
476
|
+
return @exon if @parsed
|
|
477
|
+
parse_header
|
|
478
|
+
@exon
|
|
479
|
+
end
|
|
480
|
+
|
|
481
|
+
def line
|
|
482
|
+
return @line if @parsed
|
|
483
|
+
parse_header
|
|
484
|
+
@line
|
|
485
|
+
end
|
|
486
|
+
|
|
487
|
+
def size
|
|
488
|
+
@properties[:primer_pair_num_returned].to_i
|
|
489
|
+
end
|
|
490
|
+
|
|
491
|
+
def parse_blocks
|
|
492
|
+
total_blocks = size - 1
|
|
493
|
+
@primerPairs = Array.new
|
|
494
|
+
for i in 0..total_blocks
|
|
495
|
+
@primerPairs << PrimerPair.new(self, i)
|
|
496
|
+
end
|
|
497
|
+
|
|
498
|
+
end
|
|
499
|
+
|
|
500
|
+
def self.parse_file(filename)
|
|
501
|
+
File.open(filename) do | f |
|
|
502
|
+
record = Primer3Record.new
|
|
503
|
+
f.each_line do | line |
|
|
504
|
+
line.chomp!
|
|
505
|
+
if line == "="
|
|
506
|
+
|
|
507
|
+
record.parse_blocks
|
|
508
|
+
yield record
|
|
509
|
+
record = Primer3Record.new
|
|
510
|
+
else
|
|
511
|
+
tokens = line.split("=")
|
|
512
|
+
i = 0
|
|
513
|
+
reg = ""
|
|
514
|
+
#TODO: Look if there is a join function or something similar to go around this...
|
|
515
|
+
tokens.each do |tok|
|
|
516
|
+
if i > 0
|
|
517
|
+
if i > 1
|
|
518
|
+
reg << "="
|
|
519
|
+
end
|
|
520
|
+
reg << tok
|
|
521
|
+
end
|
|
522
|
+
i+=1
|
|
523
|
+
end
|
|
524
|
+
record.properties[tokens[0].downcase.to_sym] = reg
|
|
525
|
+
end
|
|
526
|
+
end
|
|
527
|
+
end
|
|
528
|
+
end
|
|
529
|
+
end
|
|
530
|
+
|
|
531
|
+
|
|
532
|
+
class Primer
|
|
533
|
+
attr_accessor :pair
|
|
534
|
+
def initialize
|
|
535
|
+
@values = Hash.new
|
|
536
|
+
end
|
|
537
|
+
|
|
538
|
+
def method_missing(m, *args, &block)
|
|
539
|
+
|
|
540
|
+
return @values[m.to_s] if @values[m.to_s] != nil
|
|
541
|
+
raise NoMethodError.new(), "There's no method called #{m}, available: #{@values.keys.to_s}."
|
|
542
|
+
end
|
|
543
|
+
|
|
544
|
+
def set_value(key, value)
|
|
545
|
+
@values[key] = value
|
|
546
|
+
end
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
|
|
550
|
+
end
|
|
551
|
+
|
|
552
|
+
class PrimerPair
|
|
553
|
+
|
|
554
|
+
attr_reader :record
|
|
555
|
+
attr_reader :left, :right
|
|
556
|
+
|
|
557
|
+
def parse_coordinates(str)
|
|
558
|
+
coords = str.split(',')
|
|
559
|
+
coords[0] = coords[0].to_i
|
|
560
|
+
coords[1] = coords[1].to_i
|
|
561
|
+
coords
|
|
562
|
+
end
|
|
563
|
+
|
|
564
|
+
def size
|
|
565
|
+
return product_size.to_i
|
|
566
|
+
end
|
|
567
|
+
|
|
568
|
+
def initialize(record, index)
|
|
569
|
+
raise Primer3Exception.new(), "Index #{index} is greater than the number of records" unless index < record.size
|
|
570
|
+
@record = record
|
|
571
|
+
@left = Primer.new
|
|
572
|
+
@right = Primer.new
|
|
573
|
+
@values = Hash.new
|
|
574
|
+
|
|
575
|
+
|
|
576
|
+
@left.set_value("added", false)
|
|
577
|
+
@right.set_value("added", false)
|
|
578
|
+
@left.pair = self
|
|
579
|
+
@right.pair = self
|
|
580
|
+
index_s = index.to_s
|
|
581
|
+
record.properties.each do |key, value|
|
|
582
|
+
tokens = key.to_s.split("_")
|
|
583
|
+
if tokens.size > 2 and tokens[2] == index_s
|
|
584
|
+
primer = nil
|
|
585
|
+
primer = @right if tokens[1] == "right"
|
|
586
|
+
primer = @left if tokens[1] == "left"
|
|
587
|
+
if primer != nil
|
|
588
|
+
primer.set_value("added", true)
|
|
589
|
+
if tokens.size == 3
|
|
590
|
+
primer.set_value("coordinates", parse_coordinates(value) )
|
|
591
|
+
else
|
|
592
|
+
|
|
593
|
+
to_add = value
|
|
594
|
+
to_add = value.to_f unless tokens[3]=="sequence"
|
|
595
|
+
n_key = tokens[3..6].join("_")
|
|
596
|
+
primer.set_value(n_key, to_add)
|
|
597
|
+
end
|
|
598
|
+
else
|
|
599
|
+
n_key = tokens[3..6].join("_")
|
|
600
|
+
@values[n_key] = value
|
|
601
|
+
end
|
|
602
|
+
|
|
603
|
+
end
|
|
604
|
+
end
|
|
605
|
+
|
|
606
|
+
raise Primer3Exception.new(), "The pair is not complete (l:#{left.added}, r:#{right.added})" if @left.added == false or @right.added == false
|
|
607
|
+
|
|
608
|
+
end
|
|
609
|
+
|
|
610
|
+
def method_missing(m, *args, &block)
|
|
611
|
+
|
|
612
|
+
return @values[m.to_s] if @values[m.to_s]
|
|
613
|
+
raise NoMethodError.new(), "There's no method called #{m}. Available methods: #{@values.keys.to_s}"
|
|
614
|
+
end
|
|
615
|
+
end
|
|
616
|
+
|
|
617
|
+
class KASPContainer
|
|
618
|
+
|
|
619
|
+
attr_accessor :line_1, :line_2
|
|
620
|
+
attr_accessor :snp_hash
|
|
621
|
+
|
|
622
|
+
|
|
623
|
+
def add_snp_file(filename)
|
|
624
|
+
@snp_hash=Hash.new unless @snp_hash
|
|
625
|
+
SNP.parse_file(filename) do |snp|
|
|
626
|
+
@snp_hash[snp.to_s] = snp
|
|
627
|
+
snp.line_1 = @line_1
|
|
628
|
+
snp.line_2 = @line_2
|
|
629
|
+
end
|
|
630
|
+
end
|
|
631
|
+
|
|
632
|
+
def add_snp(snp_in)
|
|
633
|
+
@snp_hash=Hash.new unless @snp_hash
|
|
634
|
+
snp = SNP.new
|
|
635
|
+
snp.gene = snp_in.gene
|
|
636
|
+
snp.original = snp_in.original
|
|
637
|
+
|
|
638
|
+
snp.position = snp_in.position
|
|
639
|
+
snp.snp = snp_in.snp
|
|
640
|
+
|
|
641
|
+
# snp.original.upcase!
|
|
642
|
+
# snp.snp.upcase!
|
|
643
|
+
snp.line_1 = @line_1
|
|
644
|
+
snp.line_2 = @line_2
|
|
645
|
+
snp.snp_from = snp_in
|
|
646
|
+
#puts "Kasp container, adding #{snp.to_s} #{snp.class} #{snp_in.class}"
|
|
647
|
+
#puts "#{snp.regions}"
|
|
648
|
+
snp.regions = snp_in.exon_list.values.collect { |x| x.target_region.to_s }
|
|
649
|
+
#puts "#{snp.regions}"
|
|
650
|
+
@snp_hash[snp.to_s] = snp
|
|
651
|
+
snp
|
|
652
|
+
end
|
|
653
|
+
|
|
654
|
+
def add_primers_file(filename)
|
|
655
|
+
Primer3Record.parse_file(filename) do | primer3record |
|
|
656
|
+
current_snp = @snp_hash["#{primer3record.snp.to_s}:#{primer3record.chromosome}"]
|
|
657
|
+
current_snp.add_record(primer3record)
|
|
658
|
+
#puts current_snp.inspect
|
|
659
|
+
end
|
|
660
|
+
end
|
|
661
|
+
|
|
662
|
+
def print_primers
|
|
663
|
+
str = ""
|
|
664
|
+
snp_hash.each do |k, snp|
|
|
665
|
+
str << snp.print_primers << "\n"
|
|
666
|
+
end
|
|
667
|
+
return str
|
|
668
|
+
end
|
|
669
|
+
|
|
670
|
+
end
|
|
671
|
+
|
|
672
|
+
end
|
|
673
|
+
|