bio-polyploid-tools 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +16 -0
- data/Gemfile.lock +67 -0
- data/README +21 -0
- data/Rakefile +61 -0
- data/VERSION +1 -0
- data/bin/bfr.rb +133 -0
- data/bin/count_variations.rb +36 -0
- data/bin/filter_blat_by_target_coverage.rb +15 -0
- data/bin/find_best_blat_hit.rb +32 -0
- data/bin/hexaploid_primers.rb +168 -0
- data/bin/homokaryot_primers.rb +155 -0
- data/bin/map_markers_to_contigs.rb +66 -0
- data/bin/markers_in_region.rb +42 -0
- data/bin/polymarker.rb +219 -0
- data/bin/snps_between_bams.rb +106 -0
- data/bio-polyploid-tools.gemspec +139 -0
- data/conf/defaults.rb +1 -0
- data/conf/primer3_config/dangle.dh +128 -0
- data/conf/primer3_config/dangle.ds +128 -0
- data/conf/primer3_config/interpretations/dangle_i.dh +131 -0
- data/conf/primer3_config/interpretations/dangle_i.ds +131 -0
- data/conf/primer3_config/interpretations/loops_i.dh +34 -0
- data/conf/primer3_config/interpretations/loops_i.ds +31 -0
- data/conf/primer3_config/interpretations/stack_i.dh +257 -0
- data/conf/primer3_config/interpretations/stack_i.ds +256 -0
- data/conf/primer3_config/interpretations/stackmm_i_mm.dh +257 -0
- data/conf/primer3_config/interpretations/stackmm_i_mm.ds +256 -0
- data/conf/primer3_config/interpretations/tetraloop_i.dh +79 -0
- data/conf/primer3_config/interpretations/tetraloop_i.ds +81 -0
- data/conf/primer3_config/interpretations/triloop_i.dh +21 -0
- data/conf/primer3_config/interpretations/triloop_i.ds +18 -0
- data/conf/primer3_config/interpretations/tstack2_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack2_i.ds +256 -0
- data/conf/primer3_config/interpretations/tstack_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack_i.ds +256 -0
- data/conf/primer3_config/interpretations/tstack_tm_inf_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack_tm_inf_i.ds +256 -0
- data/conf/primer3_config/loops.dh +30 -0
- data/conf/primer3_config/loops.ds +30 -0
- data/conf/primer3_config/stack.dh +256 -0
- data/conf/primer3_config/stack.ds +256 -0
- data/conf/primer3_config/stackmm.dh +256 -0
- data/conf/primer3_config/stackmm.ds +256 -0
- data/conf/primer3_config/tetraloop.dh +77 -0
- data/conf/primer3_config/tetraloop.ds +77 -0
- data/conf/primer3_config/triloop.dh +16 -0
- data/conf/primer3_config/triloop.ds +16 -0
- data/conf/primer3_config/tstack.dh +256 -0
- data/conf/primer3_config/tstack2.dh +256 -0
- data/conf/primer3_config/tstack2.ds +256 -0
- data/conf/primer3_config/tstack_tm_inf.ds +256 -0
- data/lib/bio/BFRTools.rb +698 -0
- data/lib/bio/BIOExtensions.rb +186 -0
- data/lib/bio/PolyploidTools/ChromosomeArm.rb +52 -0
- data/lib/bio/PolyploidTools/ExonContainer.rb +194 -0
- data/lib/bio/PolyploidTools/Marker.rb +175 -0
- data/lib/bio/PolyploidTools/PrimerRegion.rb +22 -0
- data/lib/bio/PolyploidTools/SNP.rb +681 -0
- data/lib/bio/PolyploidTools/SNPSequence.rb +56 -0
- data/lib/bio/SAMToolsExtensions.rb +284 -0
- data/lib/bio/db/exonerate.rb +272 -0
- data/lib/bio/db/fastadb.rb +164 -0
- data/lib/bio/db/primer3.rb +673 -0
- data/lib/bioruby-polyploid-tools.rb +25 -0
- data/test/data/BS00068396_51.fa +2 -0
- data/test/data/BS00068396_51_contigs.aln +1412 -0
- data/test/data/BS00068396_51_contigs.dnd +7 -0
- data/test/data/BS00068396_51_contigs.fa +8 -0
- data/test/data/BS00068396_51_exonerate.tab +6 -0
- data/test/data/BS00068396_51_genes.txt +14 -0
- data/test/data/LIB1716.bam +0 -0
- data/test/data/LIB1716.bam.bai +0 -0
- data/test/data/LIB1719.bam +0 -0
- data/test/data/LIB1719.bam.bai +0 -0
- data/test/data/LIB1721.bam +0 -0
- data/test/data/LIB1721.bam.bai +0 -0
- data/test/data/LIB1722.bam +0 -0
- data/test/data/LIB1722.bam.bai +0 -0
- data/test/data/S22380157.fa +16 -0
- data/test/data/S22380157.fa.fai +1 -0
- data/test/data/Test3Aspecific.csv +1 -0
- data/test/data/Test3Aspecific_contigs.fa +6 -0
- data/test/data/patological_cases5D.csv +1 -0
- data/test/data/short_primer_design_test.csv +10 -0
- data/test/data/test_primer3_error.csv +4 -0
- data/test/data/test_primer3_error_contigs.fa +10 -0
- data/test/test_bfr.rb +51 -0
- data/test/test_exon_container.rb +17 -0
- data/test/test_exonearate.rb +53 -0
- data/test/test_snp_parsing.rb +40 -0
- metadata +201 -0
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
require 'bio'
|
|
3
|
+
require 'rubygems'
|
|
4
|
+
require 'pathname'
|
|
5
|
+
require 'bio-samtools'
|
|
6
|
+
|
|
7
|
+
require 'set'
|
|
8
|
+
|
|
9
|
+
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
|
10
|
+
$: << File.expand_path('.')
|
|
11
|
+
path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
|
|
12
|
+
require path
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
#@snp_map=Hash.new
|
|
16
|
+
|
|
17
|
+
class HomokaryotContainer < Bio::PolyploidTools::ExonContainer
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def add_snp_file(filename, chromosome, snp_in, original_name)
|
|
21
|
+
flanking_size = 100
|
|
22
|
+
File.open(filename) do | f |
|
|
23
|
+
f.each_line do | line |
|
|
24
|
+
snp = Bio::PolyploidTools::SNP.parse(line)
|
|
25
|
+
snp.flanking_size = flanking_size
|
|
26
|
+
if snp.position > 0
|
|
27
|
+
snp.container = self
|
|
28
|
+
snp.chromosome = chromosome
|
|
29
|
+
snp.snp_in = snp_in
|
|
30
|
+
snp.original_name = original_name
|
|
31
|
+
snp.use_reference = true
|
|
32
|
+
snp.container = self
|
|
33
|
+
@snp_map[snp.gene] = Array.new unless @snp_map[snp.gene]
|
|
34
|
+
@snp_map[snp.gene] << snp
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def print_primer_3_exons (file, target_chromosome , parental )
|
|
43
|
+
@snp_map.each do | gene, snp_array|
|
|
44
|
+
snp_array.each do |snp|
|
|
45
|
+
string = snp.primer_3_string( snp.chromosome, parental )
|
|
46
|
+
file.puts string if string.size > 0
|
|
47
|
+
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
class Bio::PolyploidTools::SNP
|
|
54
|
+
|
|
55
|
+
@aligned = false
|
|
56
|
+
|
|
57
|
+
def aligned_snp_position
|
|
58
|
+
return local_position
|
|
59
|
+
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def aligned_sequences
|
|
63
|
+
|
|
64
|
+
@aligned_sequences = parental_sequences
|
|
65
|
+
@aligned_sequences["A"][local_position] = original
|
|
66
|
+
@aligned_sequences["B"][local_position] = snp
|
|
67
|
+
return @aligned_sequences
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
snp_file = ARGV[0]
|
|
76
|
+
reference_file = ARGV[1]
|
|
77
|
+
|
|
78
|
+
snp_in="A"
|
|
79
|
+
original_name="B"
|
|
80
|
+
snps = Array.new
|
|
81
|
+
|
|
82
|
+
#0. Load the fasta index
|
|
83
|
+
fasta_reference_db = nil
|
|
84
|
+
if reference_file
|
|
85
|
+
fasta_reference_db = Bio::DB::Fasta::FastaFile.new(reference_file)
|
|
86
|
+
fasta_reference_db.load_fai_entries
|
|
87
|
+
p "Fasta reference: #{reference_file}"
|
|
88
|
+
end
|
|
89
|
+
#1. Read all the SNP files
|
|
90
|
+
#All the SNPs should be on the same chromosome as the first SNP.
|
|
91
|
+
chromosome = nil
|
|
92
|
+
File.open(snp_file) do | f |
|
|
93
|
+
f.each_line do | line |
|
|
94
|
+
# p line.chomp!
|
|
95
|
+
snp = nil
|
|
96
|
+
if ARGV.size == 1 #List with Sequence
|
|
97
|
+
snp = Bio::PolyploidTools::SNPSequence.parse(line)
|
|
98
|
+
elsif ARGV.size == 2 #List and fasta file
|
|
99
|
+
snp = Bio::PolyploidTools::SNP.parse(line)
|
|
100
|
+
region = fasta_reference_db.index.region_for_entry(snp.gene).get_full_region
|
|
101
|
+
snp.template_sequence = fasta_reference_db.fetch_sequence(region)
|
|
102
|
+
else
|
|
103
|
+
rise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
|
|
104
|
+
end
|
|
105
|
+
rise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
|
|
106
|
+
snp.snp_in = snp_in
|
|
107
|
+
snp.original_name = original_name
|
|
108
|
+
snps << snp
|
|
109
|
+
chromosome = snp.chromosome unless chromosome
|
|
110
|
+
raise Bio::DB::Exonerate::ExonerateException.new "All the snps should come from the same chromosome" if chromosome != snp.chromosome
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
container = HomokaryotContainer.new
|
|
116
|
+
container.add_parental({:name=>snp_in})
|
|
117
|
+
container.add_parental({:name=>original_name})
|
|
118
|
+
container.gene_models(reference_file)
|
|
119
|
+
|
|
120
|
+
output_folder="#{snp_file}_primer_design_#{Time.now.strftime('%Y%m%d-%H%M%S')}/"
|
|
121
|
+
Dir.mkdir(output_folder)
|
|
122
|
+
primer_3_input="#{output_folder}primer_3_input_temp"
|
|
123
|
+
primer_3_output="#{output_folder}primer_3_output_temp"
|
|
124
|
+
container.add_snp_file(snp_file, "PST130", snp_in, original_name)
|
|
125
|
+
primer_3_config=File.expand_path(File.dirname(__FILE__) + '/../conf/primer3_config')
|
|
126
|
+
output_primers="#{output_folder}primers.csv"
|
|
127
|
+
|
|
128
|
+
file = File.open(primer_3_input, "w")
|
|
129
|
+
file.puts("PRIMER_PRODUCT_SIZE_RANGE=50-150")
|
|
130
|
+
file.puts("PRIMER_MAX_SIZE=25")
|
|
131
|
+
file.puts("PRIMER_LIB_AMBIGUITY_CODES_CONSENSUS=1")
|
|
132
|
+
file.puts("PRIMER_LIBERAL_BASE=1")
|
|
133
|
+
file.puts("PRIMER_NUM_RETURN=5")
|
|
134
|
+
file.puts("PRIMER_THERMODYNAMIC_PARAMETERS_PATH=#{primer_3_config}/")
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
container.print_primer_3_exons(file, "PST130",snp_in)
|
|
138
|
+
|
|
139
|
+
file.close
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
Bio::DB::Primer3.run({:in=>primer_3_input, :out=>primer_3_output})
|
|
143
|
+
|
|
144
|
+
#2. Pick the best primer and make the primer3 output
|
|
145
|
+
kasp_container=Bio::DB::Primer3::KASPContainer.new
|
|
146
|
+
kasp_container.line_1=original_name
|
|
147
|
+
kasp_container.line_2=snp_in
|
|
148
|
+
|
|
149
|
+
snps.each do |snp|
|
|
150
|
+
kasp_container.add_snp(snp)
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
kasp_container.add_primers_file(primer_3_output)
|
|
154
|
+
header = "Marker,SNP,RegionSize,SNP_type,#{snp_in},#{original_name},common,primer_type,orientation,#{snp_in}_TM,#{original_name}_TM,common_TM,selected_from,product_size"
|
|
155
|
+
File.open(output_primers, 'w') { |f| f.write("#{header}\n#{kasp_container.print_primers}") }
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
require 'bio'
|
|
3
|
+
require 'optparse'
|
|
4
|
+
|
|
5
|
+
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
|
6
|
+
$: << File.expand_path('.')
|
|
7
|
+
path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
|
|
8
|
+
require path
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def log(msg)
|
|
12
|
+
time=Time.now.strftime("%Y-%m-%d %H:%M:%S.%L")
|
|
13
|
+
puts "#{time}: #{msg}"
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
markers = nil
|
|
17
|
+
|
|
18
|
+
options = {}
|
|
19
|
+
OptionParser.new do |opts|
|
|
20
|
+
|
|
21
|
+
opts.banner = "Usage: polymarker.rb [options]"
|
|
22
|
+
|
|
23
|
+
opts.on("-c", "--chromosome CHR", "chromosome (1A, 3B, etc)") do |o|
|
|
24
|
+
options[:chromosome] = o.upcase
|
|
25
|
+
end
|
|
26
|
+
opts.on("-r", "--reference FASTA", "reference with the contigs") do |o|
|
|
27
|
+
options[:reference] = o
|
|
28
|
+
end
|
|
29
|
+
opts.on("-m", "--map CSV", "File with the map and sequence \n Header: INDEX_90K,SNP_ID,SNP_NAME,CHR,COORDINATES_CHR,MAP_ORDER,CHR_ARM,DISTANCE_CM,SEQUENCE") do |o|
|
|
30
|
+
options[:map] = o
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
end.parse!
|
|
34
|
+
#reference="/Users/ramirezr/Documents/TGAC/references/Triticum_aestivum.IWGSP1.21.dna_rm.genome.fa"
|
|
35
|
+
reference = options[:reference] if options[:reference]
|
|
36
|
+
throw raise Exception.new(), "Reference has to be provided" unless reference
|
|
37
|
+
|
|
38
|
+
map = Bio::PolyploidTools::ArmMap.new
|
|
39
|
+
map.chromosome = options[:chromosome]
|
|
40
|
+
map.global_reference(reference)
|
|
41
|
+
log "Reading markers file"
|
|
42
|
+
Bio::PolyploidTools::Marker.parse(options[:map]) do |marker|
|
|
43
|
+
if options[:chromosome] == marker.chr
|
|
44
|
+
map.markers[marker.snp_name] = marker
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
fasta_tmp="markers_#{options[:chromosome]}.fa"
|
|
51
|
+
contigs_tmp="contigs_#{options[:chromosome]}.fa"
|
|
52
|
+
aln_tmp="align_#{options[:chromosome]}.psl"
|
|
53
|
+
contigs_map="contigs_map_#{options[:chromosome]}.fa"
|
|
54
|
+
map_with_contigs="contigs_map_#{options[:chromosome]}.csv"
|
|
55
|
+
|
|
56
|
+
#1. Prints the sequences to print according to the chromosome to search
|
|
57
|
+
log "Writing markers: #{fasta_tmp}"
|
|
58
|
+
map.print_fasta_markers(fasta_tmp)
|
|
59
|
+
log "Writing contigs: #{contigs_tmp}"
|
|
60
|
+
map.print_fasta_contigs_from_reference(contigs_tmp)
|
|
61
|
+
log "Aligning markers #{aln_tmp}"
|
|
62
|
+
map.align_markers(aln_tmp)
|
|
63
|
+
log "printing contigs with markers #{contigs_map}"
|
|
64
|
+
map.print_fasta_contigs_for_markers(contigs_map)
|
|
65
|
+
log "printing map with contigs #{map_with_contigs}"
|
|
66
|
+
map.print_map_with_contigs(map_with_contigs)
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
#This uses the map output from map_markers_to_contigs.rb
|
|
4
|
+
#You need a reference with the name of the contigs, containing the chromosome
|
|
5
|
+
#arm and a list of sequences to map. The algorithm creates a smaller reference
|
|
6
|
+
#file, so the search only spans across the contigs in the region. This should
|
|
7
|
+
#allow to use a refined mapping algorithm.
|
|
8
|
+
require 'bio'
|
|
9
|
+
require 'optparse'
|
|
10
|
+
|
|
11
|
+
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
|
12
|
+
$: << File.expand_path('.')
|
|
13
|
+
path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
|
|
14
|
+
require path
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def log(msg)
|
|
18
|
+
time=Time.now.strftime("%Y-%m-%d %H:%M:%S.%L")
|
|
19
|
+
puts "#{time}: #{msg}"
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
markers = nil
|
|
23
|
+
|
|
24
|
+
options = {}
|
|
25
|
+
OptionParser.new do |opts|
|
|
26
|
+
|
|
27
|
+
opts.banner = "Usage: polymarker.rb [options]"
|
|
28
|
+
|
|
29
|
+
opts.on("-c", "--chromosome CHR", "chromosome (1A, 3B, etc)") do |o|
|
|
30
|
+
options[:chromosome] = o.upcase
|
|
31
|
+
end
|
|
32
|
+
opts.on("-r", "--reference FASTA", "reference with the contigs") do |o|
|
|
33
|
+
options[:reference] = o
|
|
34
|
+
end
|
|
35
|
+
opts.on("-m", "--map CSV", "File with the map and sequence \n Header: INDEX_90K,SNP_ID,SNP_NAME,CHR,COORDINATES_CHR,MAP_ORDER,CHR_ARM,DISTANCE_CM,SEQUENCE") do |o|
|
|
36
|
+
options[:map] = o
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
end.parse!
|
|
40
|
+
#reference="/Users/ramirezr/Documents/TGAC/references/Triticum_aestivum.IWGSP1.21.dna_rm.genome.fa"
|
|
41
|
+
reference = options[:reference] if options[:reference]
|
|
42
|
+
throw raise Exception.new(), "Reference has to be provided" unless reference
|
data/bin/polymarker.rb
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
require 'bio'
|
|
3
|
+
require 'rubygems'
|
|
4
|
+
require 'pathname'
|
|
5
|
+
require 'bio-samtools'
|
|
6
|
+
require 'optparse'
|
|
7
|
+
require 'set'
|
|
8
|
+
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
|
9
|
+
$: << File.expand_path('.')
|
|
10
|
+
path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
|
|
11
|
+
require path
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
options = {}
|
|
17
|
+
options[:path_to_contigs] = "/tgac/references/external/projects/iwgsc/css/IWGSC_CSS_all_scaff_v1.fa"
|
|
18
|
+
options[:chunks] = 1
|
|
19
|
+
options[:bucket_size] = 0
|
|
20
|
+
options[:bucket] = 1
|
|
21
|
+
options[:model] = "est2genome"
|
|
22
|
+
OptionParser.new do |opts|
|
|
23
|
+
opts.banner = "Usage: polymarker.rb [options]"
|
|
24
|
+
|
|
25
|
+
opts.on("-c", "--contigs FILE", "File with contigs to use as database") do |o|
|
|
26
|
+
options[:path_to_contigs] = o
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
opts.on("-m", "--marker_list FILE", "File with the list of markers to search from") do |o|
|
|
30
|
+
options[:marker_list] = o
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
opts.on("-s", "--snp_list FILE", "File with the list of snps to search from, requires --reference to get the sequence using a position") do |o|
|
|
34
|
+
options[:snp_list] = o
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
opts.on("-r", "--reference FILE", "Fasta file with the sequence for the markers (to complement --snp_list)") do |o|
|
|
38
|
+
options[:reference] = o
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
opts.on("-o", "--output FOLDER", "Output folder") do |o|
|
|
42
|
+
options[:output_folder] = o
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
opts.on("-e", "--exonerate_model MODEL", "Model to be used in exonerate to search for the contigs") do |o|
|
|
46
|
+
options[:model] = o
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
end.parse!
|
|
51
|
+
|
|
52
|
+
p options
|
|
53
|
+
p ARGV
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
#TODO: Use temporary files somewhere in the file system and add traps to delete them/forward them as a result.
|
|
57
|
+
#TODO: Make all this parameters
|
|
58
|
+
|
|
59
|
+
path_to_contigs=options[:path_to_contigs]
|
|
60
|
+
|
|
61
|
+
snp_in="A"
|
|
62
|
+
original_name="B"
|
|
63
|
+
fasta_reference = nil
|
|
64
|
+
#test_file="/Users/ramirezr/Dropbox/JIC/PrimersToTest/test_primers_nick_and_james_1.csv"
|
|
65
|
+
test_file=options[:marker_list]
|
|
66
|
+
test_file=options[:snp_list] if options[:snp_list]
|
|
67
|
+
fasta_reference = options[:reference]
|
|
68
|
+
output_folder="#{test_file}_primer_design_#{Time.now.strftime('%Y%m%d-%H%M%S')}"
|
|
69
|
+
output_folder= options[:output_folder] if options[:output_folder]
|
|
70
|
+
Dir.mkdir(output_folder)
|
|
71
|
+
#TODO Make this tmp files
|
|
72
|
+
temp_fasta_query="#{output_folder}/to_align.fa"
|
|
73
|
+
temp_contigs="#{output_folder}/contigs_tmp.fa"
|
|
74
|
+
exonerate_file="#{output_folder}/exonerate_tmp.tab"
|
|
75
|
+
primer_3_input="#{output_folder}/primer_3_input_temp"
|
|
76
|
+
primer_3_output="#{output_folder}/primer_3_output_temp"
|
|
77
|
+
exons_filename="#{output_folder}/exons_genes_and_contigs.fa"
|
|
78
|
+
output_primers="#{output_folder}/primers.csv"
|
|
79
|
+
|
|
80
|
+
primer_3_config=File.expand_path(File.dirname(__FILE__) + '/../conf/primer3_config')
|
|
81
|
+
model=options[:model]
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
min_identity= 90
|
|
85
|
+
snps = Array.new
|
|
86
|
+
|
|
87
|
+
#0. Load the fasta index
|
|
88
|
+
fasta_reference_db = nil
|
|
89
|
+
if fasta_reference
|
|
90
|
+
fasta_reference_db = Bio::DB::Fasta::FastaFile.new(fasta_reference)
|
|
91
|
+
fasta_reference_db.load_fai_entries
|
|
92
|
+
p "Fasta reference: #{fasta_reference}"
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
#1. Read all the SNP files
|
|
97
|
+
#All the SNPs should be on the same chromosome as the first SNP.
|
|
98
|
+
#chromosome = nil
|
|
99
|
+
File.open(test_file) do | f |
|
|
100
|
+
f.each_line do | line |
|
|
101
|
+
# p line.chomp!
|
|
102
|
+
snp = nil
|
|
103
|
+
if options[:marker_list] #List with Sequence
|
|
104
|
+
snp = Bio::PolyploidTools::SNPSequence.parse(line)
|
|
105
|
+
elsif options[:snp_list] and options[:reference] #List and fasta file
|
|
106
|
+
snp = Bio::PolyploidTools::SNP.parse(line)
|
|
107
|
+
region = fasta_reference_db.index.region_for_entry(snp.gene).get_full_region
|
|
108
|
+
snp.template_sequence = fasta_reference_db.fetch_sequence(region)
|
|
109
|
+
else
|
|
110
|
+
rise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
|
|
111
|
+
end
|
|
112
|
+
rise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
|
|
113
|
+
snp.snp_in = snp_in
|
|
114
|
+
snp.original_name = original_name
|
|
115
|
+
snps << snp
|
|
116
|
+
# chromosome = snp.chromosome unless chromosome
|
|
117
|
+
# raise Bio::DB::Exonerate::ExonerateException.new "All the snps should come from the same chromosome" if chromosome != snp.chromosome
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
#1.1 Close fasta file
|
|
122
|
+
#fasta_reference_db.close() if fasta_reference_db
|
|
123
|
+
#2. Generate all the fasta files
|
|
124
|
+
|
|
125
|
+
written_seqs = Set.new
|
|
126
|
+
file = File.open(temp_fasta_query, "w")
|
|
127
|
+
snps.each do |snp|
|
|
128
|
+
unless written_seqs.include?(snp.gene)
|
|
129
|
+
written_seqs << snp.gene
|
|
130
|
+
file.puts snp.to_fasta
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
file.close
|
|
134
|
+
|
|
135
|
+
#3. Run exonerate on each of the possible chromosomes for the SNP
|
|
136
|
+
#puts chromosome
|
|
137
|
+
#chr_group = chromosome[0]
|
|
138
|
+
exo_f = File.open(exonerate_file, "w")
|
|
139
|
+
contigs_f = File.open(temp_contigs, "w")
|
|
140
|
+
filename=path_to_contigs
|
|
141
|
+
puts filename
|
|
142
|
+
target=filename
|
|
143
|
+
|
|
144
|
+
fasta_file = Bio::DB::Fasta::FastaFile.new(target)
|
|
145
|
+
fasta_file.load_fai_entries
|
|
146
|
+
|
|
147
|
+
found_cointigs = Set.new
|
|
148
|
+
Bio::DB::Exonerate.align({:query=>temp_fasta_query, :target=>target, :model=>model}) do |aln|
|
|
149
|
+
if aln.identity > min_identity
|
|
150
|
+
exo_f.puts aln.line
|
|
151
|
+
unless found_cointigs.include?(aln.target_id) #We only add once each contig. Should reduce the size of the output file.
|
|
152
|
+
found_cointigs.add(aln.target_id)
|
|
153
|
+
entry = fasta_file.index.region_for_entry(aln.target_id)
|
|
154
|
+
raise ExonerateException.new, "Entry not found! #{aln.target_id}. Make sure that the #{target_id}.fai was generated properly." if entry == nil
|
|
155
|
+
region = entry.get_full_region
|
|
156
|
+
seq = fasta_file.fetch_sequence(region)
|
|
157
|
+
contigs_f.puts(">#{aln.target_id}\n#{seq}")
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
exo_f.close()
|
|
163
|
+
contigs_f.close()
|
|
164
|
+
|
|
165
|
+
#4. Load all the results from exonerate and get the input filename for primer3
|
|
166
|
+
#Custom arm selection function that only uses the first two characters. Maybe
|
|
167
|
+
#we want to make it a bit more cleaver
|
|
168
|
+
arm_selection_first_two = lambda do | contig_name |
|
|
169
|
+
ret = contig_name[0,2]
|
|
170
|
+
return ret
|
|
171
|
+
end
|
|
172
|
+
#Function to parse stuff like: IWGSC_CSS_1AL_scaff_110
|
|
173
|
+
arm_selection_embl = lambda do | contig_name|
|
|
174
|
+
ret = contig_name.split('_')[2][0,2]
|
|
175
|
+
return ret
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
container= Bio::PolyploidTools::ExonContainer.new
|
|
179
|
+
container.flanking_size=100
|
|
180
|
+
container.gene_models(temp_fasta_query)
|
|
181
|
+
container.chromosomes(temp_contigs)
|
|
182
|
+
container.add_parental({:name=>snp_in})
|
|
183
|
+
container.add_parental({:name=>original_name})
|
|
184
|
+
snps.each do |snp|
|
|
185
|
+
snp.container = container
|
|
186
|
+
snp.flanking_size = container.flanking_size
|
|
187
|
+
container.add_snp(snp)
|
|
188
|
+
end
|
|
189
|
+
container.add_alignments({:exonerate_file=>exonerate_file, :arm_selection=>arm_selection_embl, :min_identity=>min_identity})
|
|
190
|
+
|
|
191
|
+
file = File.open(exons_filename, "w")
|
|
192
|
+
container.print_fasta_snp_exones(file)
|
|
193
|
+
file.close
|
|
194
|
+
|
|
195
|
+
file = File.open(primer_3_input, "w")
|
|
196
|
+
file.puts("PRIMER_PRODUCT_SIZE_RANGE=50-150")
|
|
197
|
+
file.puts("PRIMER_MAX_SIZE=25")
|
|
198
|
+
file.puts("PRIMER_LIB_AMBIGUITY_CODES_CONSENSUS=1")
|
|
199
|
+
file.puts("PRIMER_LIBERAL_BASE=1")
|
|
200
|
+
file.puts("PRIMER_NUM_RETURN=5")
|
|
201
|
+
file.puts("PRIMER_THERMODYNAMIC_PARAMETERS_PATH=#{primer_3_config}/")
|
|
202
|
+
container.print_primer_3_exons(file, nil, snp_in)
|
|
203
|
+
file.close
|
|
204
|
+
|
|
205
|
+
Bio::DB::Primer3.run({:in=>primer_3_input, :out=>primer_3_output})
|
|
206
|
+
|
|
207
|
+
#5. Pick the best primer and make the primer3 output
|
|
208
|
+
kasp_container=Bio::DB::Primer3::KASPContainer.new
|
|
209
|
+
kasp_container.line_1=snp_in
|
|
210
|
+
kasp_container.line_2=original_name
|
|
211
|
+
|
|
212
|
+
snps.each do |snp|
|
|
213
|
+
kasp_container.add_snp(snp)
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
kasp_container.add_primers_file(primer_3_output)
|
|
217
|
+
header = "Marker,SNP,RegionSize,chromosome,total_contigs,contig_regions,SNP_type,#{snp_in},#{original_name},common,primer_type,orientation,#{snp_in}_TM,#{original_name}_TM,common_TM,selected_from,product_size"
|
|
218
|
+
File.open(output_primers, 'w') { |f| f.write("#{header}\n#{kasp_container.print_primers}") }
|
|
219
|
+
|