bio-polyploid-tools 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +16 -0
- data/Gemfile.lock +67 -0
- data/README +21 -0
- data/Rakefile +61 -0
- data/VERSION +1 -0
- data/bin/bfr.rb +133 -0
- data/bin/count_variations.rb +36 -0
- data/bin/filter_blat_by_target_coverage.rb +15 -0
- data/bin/find_best_blat_hit.rb +32 -0
- data/bin/hexaploid_primers.rb +168 -0
- data/bin/homokaryot_primers.rb +155 -0
- data/bin/map_markers_to_contigs.rb +66 -0
- data/bin/markers_in_region.rb +42 -0
- data/bin/polymarker.rb +219 -0
- data/bin/snps_between_bams.rb +106 -0
- data/bio-polyploid-tools.gemspec +139 -0
- data/conf/defaults.rb +1 -0
- data/conf/primer3_config/dangle.dh +128 -0
- data/conf/primer3_config/dangle.ds +128 -0
- data/conf/primer3_config/interpretations/dangle_i.dh +131 -0
- data/conf/primer3_config/interpretations/dangle_i.ds +131 -0
- data/conf/primer3_config/interpretations/loops_i.dh +34 -0
- data/conf/primer3_config/interpretations/loops_i.ds +31 -0
- data/conf/primer3_config/interpretations/stack_i.dh +257 -0
- data/conf/primer3_config/interpretations/stack_i.ds +256 -0
- data/conf/primer3_config/interpretations/stackmm_i_mm.dh +257 -0
- data/conf/primer3_config/interpretations/stackmm_i_mm.ds +256 -0
- data/conf/primer3_config/interpretations/tetraloop_i.dh +79 -0
- data/conf/primer3_config/interpretations/tetraloop_i.ds +81 -0
- data/conf/primer3_config/interpretations/triloop_i.dh +21 -0
- data/conf/primer3_config/interpretations/triloop_i.ds +18 -0
- data/conf/primer3_config/interpretations/tstack2_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack2_i.ds +256 -0
- data/conf/primer3_config/interpretations/tstack_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack_i.ds +256 -0
- data/conf/primer3_config/interpretations/tstack_tm_inf_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack_tm_inf_i.ds +256 -0
- data/conf/primer3_config/loops.dh +30 -0
- data/conf/primer3_config/loops.ds +30 -0
- data/conf/primer3_config/stack.dh +256 -0
- data/conf/primer3_config/stack.ds +256 -0
- data/conf/primer3_config/stackmm.dh +256 -0
- data/conf/primer3_config/stackmm.ds +256 -0
- data/conf/primer3_config/tetraloop.dh +77 -0
- data/conf/primer3_config/tetraloop.ds +77 -0
- data/conf/primer3_config/triloop.dh +16 -0
- data/conf/primer3_config/triloop.ds +16 -0
- data/conf/primer3_config/tstack.dh +256 -0
- data/conf/primer3_config/tstack2.dh +256 -0
- data/conf/primer3_config/tstack2.ds +256 -0
- data/conf/primer3_config/tstack_tm_inf.ds +256 -0
- data/lib/bio/BFRTools.rb +698 -0
- data/lib/bio/BIOExtensions.rb +186 -0
- data/lib/bio/PolyploidTools/ChromosomeArm.rb +52 -0
- data/lib/bio/PolyploidTools/ExonContainer.rb +194 -0
- data/lib/bio/PolyploidTools/Marker.rb +175 -0
- data/lib/bio/PolyploidTools/PrimerRegion.rb +22 -0
- data/lib/bio/PolyploidTools/SNP.rb +681 -0
- data/lib/bio/PolyploidTools/SNPSequence.rb +56 -0
- data/lib/bio/SAMToolsExtensions.rb +284 -0
- data/lib/bio/db/exonerate.rb +272 -0
- data/lib/bio/db/fastadb.rb +164 -0
- data/lib/bio/db/primer3.rb +673 -0
- data/lib/bioruby-polyploid-tools.rb +25 -0
- data/test/data/BS00068396_51.fa +2 -0
- data/test/data/BS00068396_51_contigs.aln +1412 -0
- data/test/data/BS00068396_51_contigs.dnd +7 -0
- data/test/data/BS00068396_51_contigs.fa +8 -0
- data/test/data/BS00068396_51_exonerate.tab +6 -0
- data/test/data/BS00068396_51_genes.txt +14 -0
- data/test/data/LIB1716.bam +0 -0
- data/test/data/LIB1716.bam.bai +0 -0
- data/test/data/LIB1719.bam +0 -0
- data/test/data/LIB1719.bam.bai +0 -0
- data/test/data/LIB1721.bam +0 -0
- data/test/data/LIB1721.bam.bai +0 -0
- data/test/data/LIB1722.bam +0 -0
- data/test/data/LIB1722.bam.bai +0 -0
- data/test/data/S22380157.fa +16 -0
- data/test/data/S22380157.fa.fai +1 -0
- data/test/data/Test3Aspecific.csv +1 -0
- data/test/data/Test3Aspecific_contigs.fa +6 -0
- data/test/data/patological_cases5D.csv +1 -0
- data/test/data/short_primer_design_test.csv +10 -0
- data/test/data/test_primer3_error.csv +4 -0
- data/test/data/test_primer3_error_contigs.fa +10 -0
- data/test/test_bfr.rb +51 -0
- data/test/test_exon_container.rb +17 -0
- data/test/test_exonearate.rb +53 -0
- data/test/test_snp_parsing.rb +40 -0
- metadata +201 -0
@@ -0,0 +1,155 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'bio'
|
3
|
+
require 'rubygems'
|
4
|
+
require 'pathname'
|
5
|
+
require 'bio-samtools'
|
6
|
+
|
7
|
+
require 'set'
|
8
|
+
|
9
|
+
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
10
|
+
$: << File.expand_path('.')
|
11
|
+
path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
|
12
|
+
require path
|
13
|
+
|
14
|
+
|
15
|
+
#@snp_map=Hash.new
|
16
|
+
|
17
|
+
class HomokaryotContainer < Bio::PolyploidTools::ExonContainer
|
18
|
+
|
19
|
+
|
20
|
+
def add_snp_file(filename, chromosome, snp_in, original_name)
|
21
|
+
flanking_size = 100
|
22
|
+
File.open(filename) do | f |
|
23
|
+
f.each_line do | line |
|
24
|
+
snp = Bio::PolyploidTools::SNP.parse(line)
|
25
|
+
snp.flanking_size = flanking_size
|
26
|
+
if snp.position > 0
|
27
|
+
snp.container = self
|
28
|
+
snp.chromosome = chromosome
|
29
|
+
snp.snp_in = snp_in
|
30
|
+
snp.original_name = original_name
|
31
|
+
snp.use_reference = true
|
32
|
+
snp.container = self
|
33
|
+
@snp_map[snp.gene] = Array.new unless @snp_map[snp.gene]
|
34
|
+
@snp_map[snp.gene] << snp
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
def print_primer_3_exons (file, target_chromosome , parental )
|
43
|
+
@snp_map.each do | gene, snp_array|
|
44
|
+
snp_array.each do |snp|
|
45
|
+
string = snp.primer_3_string( snp.chromosome, parental )
|
46
|
+
file.puts string if string.size > 0
|
47
|
+
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
class Bio::PolyploidTools::SNP
|
54
|
+
|
55
|
+
@aligned = false
|
56
|
+
|
57
|
+
def aligned_snp_position
|
58
|
+
return local_position
|
59
|
+
|
60
|
+
end
|
61
|
+
|
62
|
+
def aligned_sequences
|
63
|
+
|
64
|
+
@aligned_sequences = parental_sequences
|
65
|
+
@aligned_sequences["A"][local_position] = original
|
66
|
+
@aligned_sequences["B"][local_position] = snp
|
67
|
+
return @aligned_sequences
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
|
72
|
+
|
73
|
+
|
74
|
+
|
75
|
+
snp_file = ARGV[0]
|
76
|
+
reference_file = ARGV[1]
|
77
|
+
|
78
|
+
snp_in="A"
|
79
|
+
original_name="B"
|
80
|
+
snps = Array.new
|
81
|
+
|
82
|
+
#0. Load the fasta index
|
83
|
+
fasta_reference_db = nil
|
84
|
+
if reference_file
|
85
|
+
fasta_reference_db = Bio::DB::Fasta::FastaFile.new(reference_file)
|
86
|
+
fasta_reference_db.load_fai_entries
|
87
|
+
p "Fasta reference: #{reference_file}"
|
88
|
+
end
|
89
|
+
#1. Read all the SNP files
|
90
|
+
#All the SNPs should be on the same chromosome as the first SNP.
|
91
|
+
chromosome = nil
|
92
|
+
File.open(snp_file) do | f |
|
93
|
+
f.each_line do | line |
|
94
|
+
# p line.chomp!
|
95
|
+
snp = nil
|
96
|
+
if ARGV.size == 1 #List with Sequence
|
97
|
+
snp = Bio::PolyploidTools::SNPSequence.parse(line)
|
98
|
+
elsif ARGV.size == 2 #List and fasta file
|
99
|
+
snp = Bio::PolyploidTools::SNP.parse(line)
|
100
|
+
region = fasta_reference_db.index.region_for_entry(snp.gene).get_full_region
|
101
|
+
snp.template_sequence = fasta_reference_db.fetch_sequence(region)
|
102
|
+
else
|
103
|
+
rise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
|
104
|
+
end
|
105
|
+
rise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
|
106
|
+
snp.snp_in = snp_in
|
107
|
+
snp.original_name = original_name
|
108
|
+
snps << snp
|
109
|
+
chromosome = snp.chromosome unless chromosome
|
110
|
+
raise Bio::DB::Exonerate::ExonerateException.new "All the snps should come from the same chromosome" if chromosome != snp.chromosome
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
|
115
|
+
container = HomokaryotContainer.new
|
116
|
+
container.add_parental({:name=>snp_in})
|
117
|
+
container.add_parental({:name=>original_name})
|
118
|
+
container.gene_models(reference_file)
|
119
|
+
|
120
|
+
output_folder="#{snp_file}_primer_design_#{Time.now.strftime('%Y%m%d-%H%M%S')}/"
|
121
|
+
Dir.mkdir(output_folder)
|
122
|
+
primer_3_input="#{output_folder}primer_3_input_temp"
|
123
|
+
primer_3_output="#{output_folder}primer_3_output_temp"
|
124
|
+
container.add_snp_file(snp_file, "PST130", snp_in, original_name)
|
125
|
+
primer_3_config=File.expand_path(File.dirname(__FILE__) + '/../conf/primer3_config')
|
126
|
+
output_primers="#{output_folder}primers.csv"
|
127
|
+
|
128
|
+
file = File.open(primer_3_input, "w")
|
129
|
+
file.puts("PRIMER_PRODUCT_SIZE_RANGE=50-150")
|
130
|
+
file.puts("PRIMER_MAX_SIZE=25")
|
131
|
+
file.puts("PRIMER_LIB_AMBIGUITY_CODES_CONSENSUS=1")
|
132
|
+
file.puts("PRIMER_LIBERAL_BASE=1")
|
133
|
+
file.puts("PRIMER_NUM_RETURN=5")
|
134
|
+
file.puts("PRIMER_THERMODYNAMIC_PARAMETERS_PATH=#{primer_3_config}/")
|
135
|
+
|
136
|
+
|
137
|
+
container.print_primer_3_exons(file, "PST130",snp_in)
|
138
|
+
|
139
|
+
file.close
|
140
|
+
|
141
|
+
|
142
|
+
Bio::DB::Primer3.run({:in=>primer_3_input, :out=>primer_3_output})
|
143
|
+
|
144
|
+
#2. Pick the best primer and make the primer3 output
|
145
|
+
kasp_container=Bio::DB::Primer3::KASPContainer.new
|
146
|
+
kasp_container.line_1=original_name
|
147
|
+
kasp_container.line_2=snp_in
|
148
|
+
|
149
|
+
snps.each do |snp|
|
150
|
+
kasp_container.add_snp(snp)
|
151
|
+
end
|
152
|
+
|
153
|
+
kasp_container.add_primers_file(primer_3_output)
|
154
|
+
header = "Marker,SNP,RegionSize,SNP_type,#{snp_in},#{original_name},common,primer_type,orientation,#{snp_in}_TM,#{original_name}_TM,common_TM,selected_from,product_size"
|
155
|
+
File.open(output_primers, 'w') { |f| f.write("#{header}\n#{kasp_container.print_primers}") }
|
@@ -0,0 +1,66 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'bio'
|
3
|
+
require 'optparse'
|
4
|
+
|
5
|
+
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
6
|
+
$: << File.expand_path('.')
|
7
|
+
path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
|
8
|
+
require path
|
9
|
+
|
10
|
+
|
11
|
+
def log(msg)
|
12
|
+
time=Time.now.strftime("%Y-%m-%d %H:%M:%S.%L")
|
13
|
+
puts "#{time}: #{msg}"
|
14
|
+
end
|
15
|
+
|
16
|
+
markers = nil
|
17
|
+
|
18
|
+
options = {}
|
19
|
+
OptionParser.new do |opts|
|
20
|
+
|
21
|
+
opts.banner = "Usage: polymarker.rb [options]"
|
22
|
+
|
23
|
+
opts.on("-c", "--chromosome CHR", "chromosome (1A, 3B, etc)") do |o|
|
24
|
+
options[:chromosome] = o.upcase
|
25
|
+
end
|
26
|
+
opts.on("-r", "--reference FASTA", "reference with the contigs") do |o|
|
27
|
+
options[:reference] = o
|
28
|
+
end
|
29
|
+
opts.on("-m", "--map CSV", "File with the map and sequence \n Header: INDEX_90K,SNP_ID,SNP_NAME,CHR,COORDINATES_CHR,MAP_ORDER,CHR_ARM,DISTANCE_CM,SEQUENCE") do |o|
|
30
|
+
options[:map] = o
|
31
|
+
end
|
32
|
+
|
33
|
+
end.parse!
|
34
|
+
#reference="/Users/ramirezr/Documents/TGAC/references/Triticum_aestivum.IWGSP1.21.dna_rm.genome.fa"
|
35
|
+
reference = options[:reference] if options[:reference]
|
36
|
+
throw raise Exception.new(), "Reference has to be provided" unless reference
|
37
|
+
|
38
|
+
map = Bio::PolyploidTools::ArmMap.new
|
39
|
+
map.chromosome = options[:chromosome]
|
40
|
+
map.global_reference(reference)
|
41
|
+
log "Reading markers file"
|
42
|
+
Bio::PolyploidTools::Marker.parse(options[:map]) do |marker|
|
43
|
+
if options[:chromosome] == marker.chr
|
44
|
+
map.markers[marker.snp_name] = marker
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
|
49
|
+
|
50
|
+
fasta_tmp="markers_#{options[:chromosome]}.fa"
|
51
|
+
contigs_tmp="contigs_#{options[:chromosome]}.fa"
|
52
|
+
aln_tmp="align_#{options[:chromosome]}.psl"
|
53
|
+
contigs_map="contigs_map_#{options[:chromosome]}.fa"
|
54
|
+
map_with_contigs="contigs_map_#{options[:chromosome]}.csv"
|
55
|
+
|
56
|
+
#1. Prints the sequences to print according to the chromosome to search
|
57
|
+
log "Writing markers: #{fasta_tmp}"
|
58
|
+
map.print_fasta_markers(fasta_tmp)
|
59
|
+
log "Writing contigs: #{contigs_tmp}"
|
60
|
+
map.print_fasta_contigs_from_reference(contigs_tmp)
|
61
|
+
log "Aligning markers #{aln_tmp}"
|
62
|
+
map.align_markers(aln_tmp)
|
63
|
+
log "printing contigs with markers #{contigs_map}"
|
64
|
+
map.print_fasta_contigs_for_markers(contigs_map)
|
65
|
+
log "printing map with contigs #{map_with_contigs}"
|
66
|
+
map.print_map_with_contigs(map_with_contigs)
|
@@ -0,0 +1,42 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
#This uses the map output from map_markers_to_contigs.rb
|
4
|
+
#You need a reference with the name of the contigs, containing the chromosome
|
5
|
+
#arm and a list of sequences to map. The algorithm creates a smaller reference
|
6
|
+
#file, so the search only spans across the contigs in the region. This should
|
7
|
+
#allow to use a refined mapping algorithm.
|
8
|
+
require 'bio'
|
9
|
+
require 'optparse'
|
10
|
+
|
11
|
+
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
12
|
+
$: << File.expand_path('.')
|
13
|
+
path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
|
14
|
+
require path
|
15
|
+
|
16
|
+
|
17
|
+
def log(msg)
|
18
|
+
time=Time.now.strftime("%Y-%m-%d %H:%M:%S.%L")
|
19
|
+
puts "#{time}: #{msg}"
|
20
|
+
end
|
21
|
+
|
22
|
+
markers = nil
|
23
|
+
|
24
|
+
options = {}
|
25
|
+
OptionParser.new do |opts|
|
26
|
+
|
27
|
+
opts.banner = "Usage: polymarker.rb [options]"
|
28
|
+
|
29
|
+
opts.on("-c", "--chromosome CHR", "chromosome (1A, 3B, etc)") do |o|
|
30
|
+
options[:chromosome] = o.upcase
|
31
|
+
end
|
32
|
+
opts.on("-r", "--reference FASTA", "reference with the contigs") do |o|
|
33
|
+
options[:reference] = o
|
34
|
+
end
|
35
|
+
opts.on("-m", "--map CSV", "File with the map and sequence \n Header: INDEX_90K,SNP_ID,SNP_NAME,CHR,COORDINATES_CHR,MAP_ORDER,CHR_ARM,DISTANCE_CM,SEQUENCE") do |o|
|
36
|
+
options[:map] = o
|
37
|
+
end
|
38
|
+
|
39
|
+
end.parse!
|
40
|
+
#reference="/Users/ramirezr/Documents/TGAC/references/Triticum_aestivum.IWGSP1.21.dna_rm.genome.fa"
|
41
|
+
reference = options[:reference] if options[:reference]
|
42
|
+
throw raise Exception.new(), "Reference has to be provided" unless reference
|
data/bin/polymarker.rb
ADDED
@@ -0,0 +1,219 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'bio'
|
3
|
+
require 'rubygems'
|
4
|
+
require 'pathname'
|
5
|
+
require 'bio-samtools'
|
6
|
+
require 'optparse'
|
7
|
+
require 'set'
|
8
|
+
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
9
|
+
$: << File.expand_path('.')
|
10
|
+
path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
|
11
|
+
require path
|
12
|
+
|
13
|
+
|
14
|
+
|
15
|
+
|
16
|
+
options = {}
|
17
|
+
options[:path_to_contigs] = "/tgac/references/external/projects/iwgsc/css/IWGSC_CSS_all_scaff_v1.fa"
|
18
|
+
options[:chunks] = 1
|
19
|
+
options[:bucket_size] = 0
|
20
|
+
options[:bucket] = 1
|
21
|
+
options[:model] = "est2genome"
|
22
|
+
OptionParser.new do |opts|
|
23
|
+
opts.banner = "Usage: polymarker.rb [options]"
|
24
|
+
|
25
|
+
opts.on("-c", "--contigs FILE", "File with contigs to use as database") do |o|
|
26
|
+
options[:path_to_contigs] = o
|
27
|
+
end
|
28
|
+
|
29
|
+
opts.on("-m", "--marker_list FILE", "File with the list of markers to search from") do |o|
|
30
|
+
options[:marker_list] = o
|
31
|
+
end
|
32
|
+
|
33
|
+
opts.on("-s", "--snp_list FILE", "File with the list of snps to search from, requires --reference to get the sequence using a position") do |o|
|
34
|
+
options[:snp_list] = o
|
35
|
+
end
|
36
|
+
|
37
|
+
opts.on("-r", "--reference FILE", "Fasta file with the sequence for the markers (to complement --snp_list)") do |o|
|
38
|
+
options[:reference] = o
|
39
|
+
end
|
40
|
+
|
41
|
+
opts.on("-o", "--output FOLDER", "Output folder") do |o|
|
42
|
+
options[:output_folder] = o
|
43
|
+
end
|
44
|
+
|
45
|
+
opts.on("-e", "--exonerate_model MODEL", "Model to be used in exonerate to search for the contigs") do |o|
|
46
|
+
options[:model] = o
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
end.parse!
|
51
|
+
|
52
|
+
p options
|
53
|
+
p ARGV
|
54
|
+
|
55
|
+
|
56
|
+
#TODO: Use temporary files somewhere in the file system and add traps to delete them/forward them as a result.
|
57
|
+
#TODO: Make all this parameters
|
58
|
+
|
59
|
+
path_to_contigs=options[:path_to_contigs]
|
60
|
+
|
61
|
+
snp_in="A"
|
62
|
+
original_name="B"
|
63
|
+
fasta_reference = nil
|
64
|
+
#test_file="/Users/ramirezr/Dropbox/JIC/PrimersToTest/test_primers_nick_and_james_1.csv"
|
65
|
+
test_file=options[:marker_list]
|
66
|
+
test_file=options[:snp_list] if options[:snp_list]
|
67
|
+
fasta_reference = options[:reference]
|
68
|
+
output_folder="#{test_file}_primer_design_#{Time.now.strftime('%Y%m%d-%H%M%S')}"
|
69
|
+
output_folder= options[:output_folder] if options[:output_folder]
|
70
|
+
Dir.mkdir(output_folder)
|
71
|
+
#TODO Make this tmp files
|
72
|
+
temp_fasta_query="#{output_folder}/to_align.fa"
|
73
|
+
temp_contigs="#{output_folder}/contigs_tmp.fa"
|
74
|
+
exonerate_file="#{output_folder}/exonerate_tmp.tab"
|
75
|
+
primer_3_input="#{output_folder}/primer_3_input_temp"
|
76
|
+
primer_3_output="#{output_folder}/primer_3_output_temp"
|
77
|
+
exons_filename="#{output_folder}/exons_genes_and_contigs.fa"
|
78
|
+
output_primers="#{output_folder}/primers.csv"
|
79
|
+
|
80
|
+
primer_3_config=File.expand_path(File.dirname(__FILE__) + '/../conf/primer3_config')
|
81
|
+
model=options[:model]
|
82
|
+
|
83
|
+
|
84
|
+
min_identity= 90
|
85
|
+
snps = Array.new
|
86
|
+
|
87
|
+
#0. Load the fasta index
|
88
|
+
fasta_reference_db = nil
|
89
|
+
if fasta_reference
|
90
|
+
fasta_reference_db = Bio::DB::Fasta::FastaFile.new(fasta_reference)
|
91
|
+
fasta_reference_db.load_fai_entries
|
92
|
+
p "Fasta reference: #{fasta_reference}"
|
93
|
+
end
|
94
|
+
|
95
|
+
|
96
|
+
#1. Read all the SNP files
|
97
|
+
#All the SNPs should be on the same chromosome as the first SNP.
|
98
|
+
#chromosome = nil
|
99
|
+
File.open(test_file) do | f |
|
100
|
+
f.each_line do | line |
|
101
|
+
# p line.chomp!
|
102
|
+
snp = nil
|
103
|
+
if options[:marker_list] #List with Sequence
|
104
|
+
snp = Bio::PolyploidTools::SNPSequence.parse(line)
|
105
|
+
elsif options[:snp_list] and options[:reference] #List and fasta file
|
106
|
+
snp = Bio::PolyploidTools::SNP.parse(line)
|
107
|
+
region = fasta_reference_db.index.region_for_entry(snp.gene).get_full_region
|
108
|
+
snp.template_sequence = fasta_reference_db.fetch_sequence(region)
|
109
|
+
else
|
110
|
+
rise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
|
111
|
+
end
|
112
|
+
rise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
|
113
|
+
snp.snp_in = snp_in
|
114
|
+
snp.original_name = original_name
|
115
|
+
snps << snp
|
116
|
+
# chromosome = snp.chromosome unless chromosome
|
117
|
+
# raise Bio::DB::Exonerate::ExonerateException.new "All the snps should come from the same chromosome" if chromosome != snp.chromosome
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
#1.1 Close fasta file
|
122
|
+
#fasta_reference_db.close() if fasta_reference_db
|
123
|
+
#2. Generate all the fasta files
|
124
|
+
|
125
|
+
written_seqs = Set.new
|
126
|
+
file = File.open(temp_fasta_query, "w")
|
127
|
+
snps.each do |snp|
|
128
|
+
unless written_seqs.include?(snp.gene)
|
129
|
+
written_seqs << snp.gene
|
130
|
+
file.puts snp.to_fasta
|
131
|
+
end
|
132
|
+
end
|
133
|
+
file.close
|
134
|
+
|
135
|
+
#3. Run exonerate on each of the possible chromosomes for the SNP
|
136
|
+
#puts chromosome
|
137
|
+
#chr_group = chromosome[0]
|
138
|
+
exo_f = File.open(exonerate_file, "w")
|
139
|
+
contigs_f = File.open(temp_contigs, "w")
|
140
|
+
filename=path_to_contigs
|
141
|
+
puts filename
|
142
|
+
target=filename
|
143
|
+
|
144
|
+
fasta_file = Bio::DB::Fasta::FastaFile.new(target)
|
145
|
+
fasta_file.load_fai_entries
|
146
|
+
|
147
|
+
found_cointigs = Set.new
|
148
|
+
Bio::DB::Exonerate.align({:query=>temp_fasta_query, :target=>target, :model=>model}) do |aln|
|
149
|
+
if aln.identity > min_identity
|
150
|
+
exo_f.puts aln.line
|
151
|
+
unless found_cointigs.include?(aln.target_id) #We only add once each contig. Should reduce the size of the output file.
|
152
|
+
found_cointigs.add(aln.target_id)
|
153
|
+
entry = fasta_file.index.region_for_entry(aln.target_id)
|
154
|
+
raise ExonerateException.new, "Entry not found! #{aln.target_id}. Make sure that the #{target_id}.fai was generated properly." if entry == nil
|
155
|
+
region = entry.get_full_region
|
156
|
+
seq = fasta_file.fetch_sequence(region)
|
157
|
+
contigs_f.puts(">#{aln.target_id}\n#{seq}")
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
exo_f.close()
|
163
|
+
contigs_f.close()
|
164
|
+
|
165
|
+
#4. Load all the results from exonerate and get the input filename for primer3
|
166
|
+
#Custom arm selection function that only uses the first two characters. Maybe
|
167
|
+
#we want to make it a bit more cleaver
|
168
|
+
arm_selection_first_two = lambda do | contig_name |
|
169
|
+
ret = contig_name[0,2]
|
170
|
+
return ret
|
171
|
+
end
|
172
|
+
#Function to parse stuff like: IWGSC_CSS_1AL_scaff_110
|
173
|
+
arm_selection_embl = lambda do | contig_name|
|
174
|
+
ret = contig_name.split('_')[2][0,2]
|
175
|
+
return ret
|
176
|
+
end
|
177
|
+
|
178
|
+
container= Bio::PolyploidTools::ExonContainer.new
|
179
|
+
container.flanking_size=100
|
180
|
+
container.gene_models(temp_fasta_query)
|
181
|
+
container.chromosomes(temp_contigs)
|
182
|
+
container.add_parental({:name=>snp_in})
|
183
|
+
container.add_parental({:name=>original_name})
|
184
|
+
snps.each do |snp|
|
185
|
+
snp.container = container
|
186
|
+
snp.flanking_size = container.flanking_size
|
187
|
+
container.add_snp(snp)
|
188
|
+
end
|
189
|
+
container.add_alignments({:exonerate_file=>exonerate_file, :arm_selection=>arm_selection_embl, :min_identity=>min_identity})
|
190
|
+
|
191
|
+
file = File.open(exons_filename, "w")
|
192
|
+
container.print_fasta_snp_exones(file)
|
193
|
+
file.close
|
194
|
+
|
195
|
+
file = File.open(primer_3_input, "w")
|
196
|
+
file.puts("PRIMER_PRODUCT_SIZE_RANGE=50-150")
|
197
|
+
file.puts("PRIMER_MAX_SIZE=25")
|
198
|
+
file.puts("PRIMER_LIB_AMBIGUITY_CODES_CONSENSUS=1")
|
199
|
+
file.puts("PRIMER_LIBERAL_BASE=1")
|
200
|
+
file.puts("PRIMER_NUM_RETURN=5")
|
201
|
+
file.puts("PRIMER_THERMODYNAMIC_PARAMETERS_PATH=#{primer_3_config}/")
|
202
|
+
container.print_primer_3_exons(file, nil, snp_in)
|
203
|
+
file.close
|
204
|
+
|
205
|
+
Bio::DB::Primer3.run({:in=>primer_3_input, :out=>primer_3_output})
|
206
|
+
|
207
|
+
#5. Pick the best primer and make the primer3 output
|
208
|
+
kasp_container=Bio::DB::Primer3::KASPContainer.new
|
209
|
+
kasp_container.line_1=snp_in
|
210
|
+
kasp_container.line_2=original_name
|
211
|
+
|
212
|
+
snps.each do |snp|
|
213
|
+
kasp_container.add_snp(snp)
|
214
|
+
end
|
215
|
+
|
216
|
+
kasp_container.add_primers_file(primer_3_output)
|
217
|
+
header = "Marker,SNP,RegionSize,chromosome,total_contigs,contig_regions,SNP_type,#{snp_in},#{original_name},common,primer_type,orientation,#{snp_in}_TM,#{original_name}_TM,common_TM,selected_from,product_size"
|
218
|
+
File.open(output_primers, 'w') { |f| f.write("#{header}\n#{kasp_container.print_primers}") }
|
219
|
+
|