bio-polymarker 1.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.travis.yml +24 -0
- data/Gemfile +23 -0
- data/README.md +205 -0
- data/Rakefile +61 -0
- data/SECURITY.md +16 -0
- data/VERSION +1 -0
- data/bin/bfr.rb +128 -0
- data/bin/blast_triads.rb +166 -0
- data/bin/blast_triads_promoters.rb +192 -0
- data/bin/count_variations.rb +36 -0
- data/bin/filter_blat_by_target_coverage.rb +69 -0
- data/bin/filter_exonerate_by_identity.rb +38 -0
- data/bin/find_best_blat_hit.rb +33 -0
- data/bin/find_best_exonerate.rb +17 -0
- data/bin/get_longest_hsp_blastx_triads.rb +66 -0
- data/bin/hexaploid_primers.rb +168 -0
- data/bin/homokaryot_primers.rb +183 -0
- data/bin/mafft_triads.rb +120 -0
- data/bin/mafft_triads_promoters.rb +403 -0
- data/bin/map_markers_to_contigs.rb +66 -0
- data/bin/marker_to_vcf.rb +241 -0
- data/bin/markers_in_region.rb +42 -0
- data/bin/mask_triads.rb +169 -0
- data/bin/polymarker.rb +410 -0
- data/bin/polymarker_capillary.rb +443 -0
- data/bin/polymarker_deletions.rb +350 -0
- data/bin/snp_position_to_polymarker.rb +101 -0
- data/bin/snps_between_bams.rb +107 -0
- data/bin/tag_stats.rb +75 -0
- data/bin/vcfLineToTable.rb +56 -0
- data/bin/vcfToPolyMarker.rb +82 -0
- data/bio-polymarker.gemspec +227 -0
- data/conf/defaults.rb +1 -0
- data/conf/primer3_config/dangle.dh +128 -0
- data/conf/primer3_config/dangle.ds +128 -0
- data/conf/primer3_config/interpretations/dangle_i.dh +131 -0
- data/conf/primer3_config/interpretations/dangle_i.ds +131 -0
- data/conf/primer3_config/interpretations/loops_i.dh +34 -0
- data/conf/primer3_config/interpretations/loops_i.ds +31 -0
- data/conf/primer3_config/interpretations/stack_i.dh +257 -0
- data/conf/primer3_config/interpretations/stack_i.ds +256 -0
- data/conf/primer3_config/interpretations/stackmm_i_mm.dh +257 -0
- data/conf/primer3_config/interpretations/stackmm_i_mm.ds +256 -0
- data/conf/primer3_config/interpretations/tetraloop_i.dh +79 -0
- data/conf/primer3_config/interpretations/tetraloop_i.ds +81 -0
- data/conf/primer3_config/interpretations/triloop_i.dh +21 -0
- data/conf/primer3_config/interpretations/triloop_i.ds +18 -0
- data/conf/primer3_config/interpretations/tstack2_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack2_i.ds +256 -0
- data/conf/primer3_config/interpretations/tstack_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack_i.ds +256 -0
- data/conf/primer3_config/interpretations/tstack_tm_inf_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack_tm_inf_i.ds +256 -0
- data/conf/primer3_config/loops.dh +30 -0
- data/conf/primer3_config/loops.ds +30 -0
- data/conf/primer3_config/stack.dh +256 -0
- data/conf/primer3_config/stack.ds +256 -0
- data/conf/primer3_config/stackmm.dh +256 -0
- data/conf/primer3_config/stackmm.ds +256 -0
- data/conf/primer3_config/tetraloop.dh +77 -0
- data/conf/primer3_config/tetraloop.ds +77 -0
- data/conf/primer3_config/triloop.dh +16 -0
- data/conf/primer3_config/triloop.ds +16 -0
- data/conf/primer3_config/tstack.dh +256 -0
- data/conf/primer3_config/tstack2.dh +256 -0
- data/conf/primer3_config/tstack2.ds +256 -0
- data/conf/primer3_config/tstack_tm_inf.ds +256 -0
- data/lib/bio/BFRTools.rb +465 -0
- data/lib/bio/BIOExtensions.rb +153 -0
- data/lib/bio/PolyploidTools/ChromosomeArm.rb +63 -0
- data/lib/bio/PolyploidTools/ExonContainer.rb +245 -0
- data/lib/bio/PolyploidTools/Marker.rb +175 -0
- data/lib/bio/PolyploidTools/Mask.rb +116 -0
- data/lib/bio/PolyploidTools/NoSNPSequence.rb +292 -0
- data/lib/bio/PolyploidTools/PrimerRegion.rb +30 -0
- data/lib/bio/PolyploidTools/SNP.rb +804 -0
- data/lib/bio/PolyploidTools/SNPMutant.rb +86 -0
- data/lib/bio/PolyploidTools/SNPSequence.rb +55 -0
- data/lib/bio/db/blast.rb +114 -0
- data/lib/bio/db/exonerate.rb +333 -0
- data/lib/bio/db/primer3.rb +820 -0
- data/lib/bio-polymarker.rb +28 -0
- data/test/data/7B_amplicon_test.fa +12 -0
- data/test/data/7B_amplicon_test.fa.fai +1 -0
- data/test/data/7B_amplicon_test_reference.fa +110 -0
- data/test/data/7B_amplicon_test_reference.fa.fai +3 -0
- data/test/data/7B_marker_test.txt +1 -0
- data/test/data/BS00068396_51.fa +2 -0
- data/test/data/BS00068396_51_blast.tab +4 -0
- data/test/data/BS00068396_51_contigs.aln +1412 -0
- data/test/data/BS00068396_51_contigs.dnd +7 -0
- data/test/data/BS00068396_51_contigs.fa +8 -0
- data/test/data/BS00068396_51_contigs.fa.fai +4 -0
- data/test/data/BS00068396_51_contigs.fa.nhr +0 -0
- data/test/data/BS00068396_51_contigs.fa.nin +0 -0
- data/test/data/BS00068396_51_contigs.fa.nsq +0 -0
- data/test/data/BS00068396_51_contigs.nhr +0 -0
- data/test/data/BS00068396_51_contigs.nin +0 -0
- data/test/data/BS00068396_51_contigs.nsq +0 -0
- data/test/data/BS00068396_51_exonerate.tab +6 -0
- data/test/data/BS00068396_51_for_polymarker.txt +1 -0
- data/test/data/BS00068396_51_genes.txt +14 -0
- data/test/data/IWGSC_CSS_1AL_scaff_1455974.fa +112 -0
- data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa +2304 -0
- data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa.fai +11 -0
- data/test/data/LIB1716.bam +0 -0
- data/test/data/LIB1716.bam.bai +0 -0
- data/test/data/LIB1719.bam +0 -0
- data/test/data/LIB1719.bam.bai +0 -0
- data/test/data/LIB1721.bam +0 -0
- data/test/data/LIB1721.bam.bai +0 -0
- data/test/data/LIB1722.bam +0 -0
- data/test/data/LIB1722.bam.bai +0 -0
- data/test/data/PST130_7067.csv +1 -0
- data/test/data/PST130_7067.fa +2 -0
- data/test/data/PST130_7067.fa.fai +1 -0
- data/test/data/PST130_7067.fa.ndb +0 -0
- data/test/data/PST130_7067.fa.nhr +0 -0
- data/test/data/PST130_7067.fa.nin +0 -0
- data/test/data/PST130_7067.fa.not +0 -0
- data/test/data/PST130_7067.fa.nsq +0 -0
- data/test/data/PST130_7067.fa.ntf +0 -0
- data/test/data/PST130_7067.fa.nto +0 -0
- data/test/data/PST130_reverse_primer.csv +1 -0
- data/test/data/S22380157.fa +16 -0
- data/test/data/S22380157.fa.fai +1 -0
- data/test/data/S22380157.vcf +67 -0
- data/test/data/S58861868/LIB1716.bam +0 -0
- data/test/data/S58861868/LIB1716.sam +651 -0
- data/test/data/S58861868/LIB1719.bam +0 -0
- data/test/data/S58861868/LIB1719.sam +805 -0
- data/test/data/S58861868/LIB1721.bam +0 -0
- data/test/data/S58861868/LIB1721.sam +1790 -0
- data/test/data/S58861868/LIB1722.bam +0 -0
- data/test/data/S58861868/LIB1722.sam +1271 -0
- data/test/data/S58861868/S58861868.fa +16 -0
- data/test/data/S58861868/S58861868.fa.fai +1 -0
- data/test/data/S58861868/S58861868.vcf +76 -0
- data/test/data/S58861868/header.txt +9 -0
- data/test/data/S58861868/merged.bam +0 -0
- data/test/data/S58861868/merged_reheader.bam +0 -0
- data/test/data/S58861868/merged_reheader.bam.bai +0 -0
- data/test/data/Test3Aspecific.csv +2 -0
- data/test/data/Test3Aspecific_contigs.fa +6 -0
- data/test/data/bfr_out_test.csv +5 -0
- data/test/data/chr1A_C1145499T/chr1A_C1145499T.csv +1 -0
- data/test/data/chr1A_G540414846C/chr1A_G540414846C.csv +1 -0
- data/test/data/chr1A_G540414846C/chr1A_G540414846C.fa +2 -0
- data/test/data/chr1A_T517634750C/chr1A_T517634750C.csv +1 -0
- data/test/data/chr2D_C112180134A/chr2D_C112180134A.csv +1 -0
- data/test/data/chr4D_C14473543T/chr4D_C14473543T.csv +1 -0
- data/test/data/chr4D_C14473543T/chr4D_C14473543T.fa +2 -0
- data/test/data/headerMergeed.txt +9 -0
- data/test/data/headerS2238015 +1 -0
- data/test/data/mergedLibs.bam +0 -0
- data/test/data/mergedLibsReheader.bam +0 -0
- data/test/data/mergedLibsSorted.bam +0 -0
- data/test/data/mergedLibsSorted.bam.bai +0 -0
- data/test/data/patological_cases5D.csv +1 -0
- data/test/data/primer_3_input_header_test +5 -0
- data/test/data/short_primer_design_test.csv +10 -0
- data/test/data/some_tests/some_tests.csv +201 -0
- data/test/data/test_from_mutant.csv +3 -0
- data/test/data/test_iselect.csv +196 -0
- data/test/data/test_iselect_reference.fa +1868 -0
- data/test/data/test_iselect_reference.fa.fai +934 -0
- data/test/data/test_primer3_error.csv +4 -0
- data/test/data/test_primer3_error_contigs.fa +10 -0
- data/test/test_bfr.rb +135 -0
- data/test/test_blast.rb +47 -0
- data/test/test_exon_container.rb +17 -0
- data/test/test_exonearate.rb +48 -0
- data/test/test_integration.rb +76 -0
- data/test/test_snp_parsing.rb +121 -0
- data/test/test_wrong_selection.sh +5 -0
- metadata +356 -0
data/bin/polymarker.rb
ADDED
@@ -0,0 +1,410 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'bio'
|
3
|
+
require 'rubygems'
|
4
|
+
require 'pathname'
|
5
|
+
require 'bio-samtools-wrapper'
|
6
|
+
require 'optparse'
|
7
|
+
require 'set'
|
8
|
+
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
9
|
+
$: << File.expand_path('.')
|
10
|
+
path= File.expand_path(File.dirname(__FILE__) + '/../lib/bio-polymarker.rb')
|
11
|
+
require path
|
12
|
+
|
13
|
+
|
14
|
+
|
15
|
+
def validate_files(o)
|
16
|
+
[
|
17
|
+
o[:path_to_contigs],
|
18
|
+
o[:marker_list],
|
19
|
+
o[:snp_list],
|
20
|
+
o[:mutant_list],
|
21
|
+
o[:reference]
|
22
|
+
].flatten.compact.each do |f|
|
23
|
+
raise IOError.new "Unable to read #{f}" unless File.exist? f
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
options = {}
|
28
|
+
options[:path_to_contigs] = "/tgac/references/external/projects/iwgsc/css/IWGSC_CSS_all_scaff_v1.fa"
|
29
|
+
options[:chunks] = 1
|
30
|
+
options[:bucket_size] = 0
|
31
|
+
options[:bucket] = 1
|
32
|
+
options[:model] = "est2genome"
|
33
|
+
options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection("nrgene");
|
34
|
+
options[:flanking_size] = 150;
|
35
|
+
options[:variation_free_region] = 0
|
36
|
+
options[:extract_found_contigs] = false
|
37
|
+
options[:genomes_count] = 3
|
38
|
+
options[:min_identity] = 90
|
39
|
+
options[:scoring] = :genome_specific
|
40
|
+
options[:database] = false
|
41
|
+
options[:filter_best] = false
|
42
|
+
options[:aligner] = :blast
|
43
|
+
options[:max_hits] = 8
|
44
|
+
options[:max_specific_primers] = 20
|
45
|
+
options[:primer_3_preferences] = {
|
46
|
+
:primer_product_size_range => "50-150" ,
|
47
|
+
:primer_max_size => 25 ,
|
48
|
+
:primer_lib_ambiguity_codes_consensus => 1,
|
49
|
+
:primer_liberal_base => 1,
|
50
|
+
:primer_num_return=>5,
|
51
|
+
:primer_explain_flag => 1,
|
52
|
+
:primer_thermodynamic_parameters_path=>File.expand_path(File.dirname(__FILE__) + '../../conf/primer3_config/') + '/'
|
53
|
+
}
|
54
|
+
|
55
|
+
|
56
|
+
|
57
|
+
OptionParser.new do |opts|
|
58
|
+
opts.banner = "Usage: polymarker.rb [options]"
|
59
|
+
|
60
|
+
opts.on("-c", "--contigs FILE", "File with contigs to use as database") do |o|
|
61
|
+
options[:path_to_contigs] = o
|
62
|
+
end
|
63
|
+
|
64
|
+
opts.on("-m", "--marker_list FILE", "File with the list of markers to search from") do |o|
|
65
|
+
options[:marker_list] = o
|
66
|
+
end
|
67
|
+
|
68
|
+
opts.on("-g", "--genomes_count INT", "Number of genomes (default 3, for hexaploid)") do |o|
|
69
|
+
options[:genomes_count] = o.to_i
|
70
|
+
end
|
71
|
+
|
72
|
+
opts.on("-b", "--filter_best", "If set, only keep the best alignment for each chromosome") do
|
73
|
+
options[:filter_best] = true
|
74
|
+
end
|
75
|
+
|
76
|
+
|
77
|
+
opts.on("-s", "--snp_list FILE", "File with the list of snps to search from, requires --reference to get the sequence using a position") do |o|
|
78
|
+
options[:snp_list] = o
|
79
|
+
end
|
80
|
+
|
81
|
+
opts.on("-t", "--mutant_list FILE", "File with the list of positions with mutation and the mutation line.\n\
|
82
|
+
requires --reference to get the sequence using a position") do |o|
|
83
|
+
options[:mutant_list] = o
|
84
|
+
end
|
85
|
+
|
86
|
+
opts.on("-r", "--reference FILE", "Fasta file with the sequence for the markers (to complement --snp_list)") do |o|
|
87
|
+
options[:reference] = o
|
88
|
+
end
|
89
|
+
|
90
|
+
opts.on("-i", "--min_identity INT", "Minimum identity to consider a hit (default 90)") do |o|
|
91
|
+
options[:min_identity] = o.to_i
|
92
|
+
end
|
93
|
+
|
94
|
+
opts.on("-o", "--output FOLDER", "Output folder") do |o|
|
95
|
+
options[:output_folder] = o
|
96
|
+
end
|
97
|
+
|
98
|
+
opts.on("-e", "--exonerate_model MODEL", "Model to be used in exonerate to search for the contigs") do |o|
|
99
|
+
options[:model] = o
|
100
|
+
end
|
101
|
+
|
102
|
+
opts.on("-a", "--arm_selection #{Bio::PolyploidTools::ChromosomeArm.getValidFunctions.join('|')}", "Function to decide the chromome arm") do |o|
|
103
|
+
options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection(o)
|
104
|
+
end
|
105
|
+
|
106
|
+
opts.on("-p", "--primer_3_preferences FILE", "file with preferences to be sent to primer3") do |o|
|
107
|
+
options[:primer_3_preferences] = Bio::DB::Primer3.read_primer_preferences(o, options[:primer_3_preferences] )
|
108
|
+
end
|
109
|
+
|
110
|
+
opts.on("-v", "--variation_free_region INT", "If present, avoid generating the common primer if there are homoeologous SNPs within the specified distance") do |o|
|
111
|
+
options[:variation_free_region] = o.to_i
|
112
|
+
end
|
113
|
+
|
114
|
+
opts.on("-x", "--extract_found_contigs", "If present, save in a separate file the contigs with matches. Useful to debug.") do |o|
|
115
|
+
options[:extract_found_contigs] = true
|
116
|
+
end
|
117
|
+
|
118
|
+
opts.on("-P", "--primers_to_order", "If present, save a separate file with the primers with the KASP tails")do
|
119
|
+
#TODO: have a string with the tails, optional.
|
120
|
+
options[:primers_to_order] = true
|
121
|
+
end
|
122
|
+
|
123
|
+
opts.on("-H", "--het_dels", "If present, change the scoring to give priority to: semi-specific, specific, non-specific") do
|
124
|
+
options[:scoring] = :het_dels
|
125
|
+
end
|
126
|
+
|
127
|
+
opts.on("-A", "--aligner exonerate|blast", "Select the aligner to use. Default: #{options[:aligner]}") do |o|
|
128
|
+
raise "Invalid aligner" unless o == "exonerate" or o == "blast"
|
129
|
+
options[:aligner] = o.to_sym
|
130
|
+
end
|
131
|
+
|
132
|
+
opts.on("-d", "--database PREFIX", "Path to the blast database. Only used if the aligner is blast. The default is the name of the contigs file without extension.") do |o|
|
133
|
+
options[:database] = o
|
134
|
+
end
|
135
|
+
|
136
|
+
opts.on("-H", "--max_hits INT", "Maximum number of hits to the reference. If there are more hits than this value, the marker is ignored") do |o|
|
137
|
+
options[:max_hits] = o.to_i
|
138
|
+
end
|
139
|
+
|
140
|
+
opts.on("-S", "--max_specific_primers INT", "Maximum number of candidate primers to attempt to design. Default: #{options[:max_specific_primers]} ") do |o|
|
141
|
+
options[:max_specific_primers] = o.to_i
|
142
|
+
end
|
143
|
+
|
144
|
+
end.parse!
|
145
|
+
|
146
|
+
|
147
|
+
validate_files(options)
|
148
|
+
|
149
|
+
options[:database] = options[:path_to_contigs] unless options[:database]
|
150
|
+
|
151
|
+
|
152
|
+
if options[:primer_3_preferences][:primer_product_size_range]
|
153
|
+
range = options[:primer_3_preferences][:primer_product_size_range]
|
154
|
+
range_arr = range.split("-")
|
155
|
+
min = range_arr[0].to_i
|
156
|
+
max = range_arr[1].to_i
|
157
|
+
raise Bio::DB::Exonerate::ExonerateException.new "Range #{range} is invalid!" unless max > min
|
158
|
+
options[:flanking_size] = max
|
159
|
+
end
|
160
|
+
|
161
|
+
#p options
|
162
|
+
#p ARGV
|
163
|
+
|
164
|
+
|
165
|
+
#TODO: Use temporary files somewhere in the file system and add traps to delete them/forward them as a result.
|
166
|
+
#TODO: Make all this parameters
|
167
|
+
|
168
|
+
path_to_contigs=options[:path_to_contigs]
|
169
|
+
|
170
|
+
original_name="A"
|
171
|
+
snp_in="B"
|
172
|
+
|
173
|
+
fasta_reference = nil
|
174
|
+
#test_file="/Users/ramirezr/Dropbox/JIC/PrimersToTest/test_primers_nick_and_james_1.csv"
|
175
|
+
test_file=options[:marker_list] if options[:marker_list]
|
176
|
+
test_file=options[:snp_list] if options[:snp_list]
|
177
|
+
test_file=options[:mutant_list] if options[:mutant_list]
|
178
|
+
fasta_reference = options[:reference]
|
179
|
+
output_folder="#{test_file}_primer_design_#{Time.now.strftime('%Y%m%d-%H%M%S')}"
|
180
|
+
output_folder= options[:output_folder] if options[:output_folder]
|
181
|
+
Dir.mkdir(output_folder) unless Dir.exist?(output_folder)
|
182
|
+
#TODO Make this tmp files
|
183
|
+
temp_fasta_query="#{output_folder}/to_align.fa"
|
184
|
+
temp_contigs="#{output_folder}/contigs_tmp.fa"
|
185
|
+
exonerate_file="#{output_folder}/exonerate_tmp.tab"
|
186
|
+
primer_3_input="#{output_folder}/primer_3_input_temp"
|
187
|
+
primer_3_output="#{output_folder}/primer_3_output_temp"
|
188
|
+
exons_filename="#{output_folder}/exons_genes_and_contigs.fa"
|
189
|
+
output_primers="#{output_folder}/primers.csv"
|
190
|
+
output_to_order="#{output_folder}/primers_to_order.csv"
|
191
|
+
min_identity= options[:min_identity]
|
192
|
+
|
193
|
+
@status_file="#{output_folder}/status.txt"
|
194
|
+
|
195
|
+
primer_3_config=File.expand_path(File.dirname(__FILE__) + '/../conf/primer3_config')
|
196
|
+
model=options[:model]
|
197
|
+
|
198
|
+
def write_status(status)
|
199
|
+
f=File.open(@status_file, "a")
|
200
|
+
f.puts "#{Time.now.to_s},#{status}"
|
201
|
+
f.close
|
202
|
+
end
|
203
|
+
|
204
|
+
Signal.trap("ABRT") do
|
205
|
+
write_status "ERROR: Job aborted. Please try a small number of primers."
|
206
|
+
Signal.trap("SIGABRT", "DEFAULT") # restore handler
|
207
|
+
Process.kill("ABRT", 0)
|
208
|
+
end
|
209
|
+
|
210
|
+
Signal.trap("TERM") do
|
211
|
+
write_status "ERROR: Job terminated. Please try a small number of primers."
|
212
|
+
Signal.trap("SIGTERM", "DEFAULT") # restore handler
|
213
|
+
exit
|
214
|
+
end
|
215
|
+
|
216
|
+
snps = Array.new
|
217
|
+
|
218
|
+
begin
|
219
|
+
|
220
|
+
write_status "Loading Reference"
|
221
|
+
#0. Load the fasta index
|
222
|
+
fasta_reference_db = nil
|
223
|
+
if fasta_reference
|
224
|
+
fasta_reference_db = Bio::DB::Fasta::FastaFile.new({:fasta=>fasta_reference})
|
225
|
+
fasta_reference_db.load_fai_entries
|
226
|
+
write_status "Fasta reference: #{fasta_reference}"
|
227
|
+
end
|
228
|
+
|
229
|
+
#1. Read all the SNP files
|
230
|
+
#chromosome = nil
|
231
|
+
write_status "Reading SNPs"
|
232
|
+
File.open(test_file) do | f |
|
233
|
+
f.each_line do | line |
|
234
|
+
# p line.chomp!
|
235
|
+
snp = nil
|
236
|
+
if options[:marker_list] #List with Sequence
|
237
|
+
snp = Bio::PolyploidTools::SNPSequence.parse(line)
|
238
|
+
elsif options[:snp_list] and options[:reference] #List and fasta file
|
239
|
+
snp = Bio::PolyploidTools::SNP.parse(line)
|
240
|
+
entry = fasta_reference_db.index.region_for_entry(snp.gene)
|
241
|
+
if entry
|
242
|
+
region = fasta_reference_db.index.region_for_entry(snp.gene).get_full_region
|
243
|
+
snp.template_sequence = fasta_reference_db.fetch_sequence(region)
|
244
|
+
else
|
245
|
+
write_status "WARN: Unable to find entry for #{snp.gene}"
|
246
|
+
end
|
247
|
+
elsif options[:mutant_list] and options[:reference] #List and fasta file
|
248
|
+
snp = Bio::PolyploidTools::SNPMutant.parse(line)
|
249
|
+
entry = fasta_reference_db.index.region_for_entry(snp.contig)
|
250
|
+
if entry
|
251
|
+
region = fasta_reference_db.index.region_for_entry(snp.contig).get_full_region
|
252
|
+
snp.full_sequence = fasta_reference_db.fetch_sequence(region)
|
253
|
+
else
|
254
|
+
write_status "WARN: Unable to find entry for #{snp.gene}"
|
255
|
+
end
|
256
|
+
else
|
257
|
+
raise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
|
258
|
+
end
|
259
|
+
raise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
|
260
|
+
snp.max_hits = options[:max_hits]
|
261
|
+
snp.genomes_count = options[:genomes_count]
|
262
|
+
snp.snp_in = snp_in
|
263
|
+
snp.original_name = original_name
|
264
|
+
if snp.position
|
265
|
+
snps << snp
|
266
|
+
else
|
267
|
+
$stderr.puts "ERROR: #{snp.gene} doesn't contain a SNP"
|
268
|
+
end
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
#1.1 Close fasta file
|
273
|
+
#fasta_reference_db.close() if fasta_reference_db
|
274
|
+
#2. Generate all the fasta files
|
275
|
+
write_status "Writing sequences to align"
|
276
|
+
written_seqs = Set.new
|
277
|
+
file = File.open(temp_fasta_query, "w")
|
278
|
+
snps.each do |snp|
|
279
|
+
unless written_seqs.include?(snp.gene)
|
280
|
+
written_seqs << snp.gene
|
281
|
+
file.puts snp.to_fasta
|
282
|
+
end
|
283
|
+
end
|
284
|
+
file.close
|
285
|
+
|
286
|
+
#3. Run exonerate on each of the possible chromosomes for the SNP
|
287
|
+
#puts chromosome
|
288
|
+
#chr_group = chromosome[0]
|
289
|
+
write_status "Searching markers in genome"
|
290
|
+
exo_f = File.open(exonerate_file, "w")
|
291
|
+
contigs_f = nil
|
292
|
+
contigs_f = File.open(temp_contigs, "w") if options[:extract_found_contigs]
|
293
|
+
filename=path_to_contigs
|
294
|
+
#puts filename
|
295
|
+
target=filename
|
296
|
+
|
297
|
+
fasta_file = Bio::DB::Fasta::FastaFile.new(fasta: target)
|
298
|
+
fasta_file.load_fai_entries
|
299
|
+
|
300
|
+
found_contigs = Set.new
|
301
|
+
|
302
|
+
|
303
|
+
def do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options, contigs_f: nil)
|
304
|
+
if aln.identity > min_identity
|
305
|
+
exo_f.puts aln.line
|
306
|
+
unless found_contigs.include?(aln.target_id) #We only add once each contig. Should reduce the size of the output file.
|
307
|
+
found_contigs.add(aln.target_id)
|
308
|
+
entry = fasta_file.index.region_for_entry(aln.target_id)
|
309
|
+
raise ExonerateException.new, "Entry not found! #{aln.target_id}. Make sure that the #{target_id}.fai was generated properly." if entry == nil
|
310
|
+
if options[:extract_found_contigs]
|
311
|
+
region = entry.get_full_region
|
312
|
+
seq = fasta_file.fetch_sequence(region)
|
313
|
+
contigs_f.puts(">#{aln.target_id}\n#{seq}")
|
314
|
+
end
|
315
|
+
end
|
316
|
+
end
|
317
|
+
|
318
|
+
end
|
319
|
+
|
320
|
+
Bio::DB::Blast.align({:query=>temp_fasta_query, :target=>options[:database], :model=>model, :max_hits=>options[:max_hits]}) do |aln|
|
321
|
+
do_align(aln, exo_f, found_contigs,min_identity, fasta_file,options, contigs_f: contigs_f)
|
322
|
+
end if options[:aligner] == :blast
|
323
|
+
|
324
|
+
Bio::DB::Exonerate.align({:query=>temp_fasta_query, :target=>target, :model=>model}) do |aln|
|
325
|
+
do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options, contigs_f: contigs_f)
|
326
|
+
end if options[:aligner] == :exonerate
|
327
|
+
|
328
|
+
exo_f.close()
|
329
|
+
|
330
|
+
|
331
|
+
|
332
|
+
exo_f.close()
|
333
|
+
contigs_f.close() if options[:extract_found_contigs]
|
334
|
+
|
335
|
+
#4. Load all the results from exonerate and get the input filename for primer3
|
336
|
+
#Custom arm selection function that only uses the first two characters. Maybe
|
337
|
+
#we want to make it a bit more cleaver
|
338
|
+
write_status "Reading best alignment on each chromosome"
|
339
|
+
|
340
|
+
|
341
|
+
container= Bio::PolyploidTools::ExonContainer.new
|
342
|
+
container.flanking_size=options[:flanking_size]
|
343
|
+
container.gene_models(temp_fasta_query)
|
344
|
+
container.chromosomes(target)
|
345
|
+
container.add_parental({:name=>snp_in})
|
346
|
+
container.add_parental({:name=>original_name})
|
347
|
+
container.max_hits = options[:max_hits]
|
348
|
+
snps.each do |snp|
|
349
|
+
snp.container = container
|
350
|
+
snp.flanking_size = container.flanking_size
|
351
|
+
snp.variation_free_region = options[:variation_free_region]
|
352
|
+
container.add_snp(snp)
|
353
|
+
end
|
354
|
+
container.add_alignments({
|
355
|
+
:exonerate_file=>exonerate_file,
|
356
|
+
:arm_selection=>options[:arm_selection],
|
357
|
+
:min_identity=>min_identity,
|
358
|
+
:filter_best=>options[:filter_best]})
|
359
|
+
|
360
|
+
|
361
|
+
#4.1 generating primer3 file
|
362
|
+
write_status "Finding genome-specific positions"
|
363
|
+
file = File.open(exons_filename, "w")
|
364
|
+
container.print_fasta_snp_exones(file)
|
365
|
+
file.close
|
366
|
+
write_status "Running primer3"
|
367
|
+
|
368
|
+
file = File.open(primer_3_input, "w")
|
369
|
+
|
370
|
+
Bio::DB::Primer3.prepare_input_file(file, options[:primer_3_preferences])
|
371
|
+
added_exons = container.print_primer_3_exons(file, nil, snp_in, max_specific_primers: options[:max_specific_primers] )
|
372
|
+
file.close
|
373
|
+
|
374
|
+
Bio::DB::Primer3.run({:in=>primer_3_input, :out=>primer_3_output}) if added_exons > 0
|
375
|
+
|
376
|
+
#5. Pick the best primer and make the primer3 output
|
377
|
+
write_status "Selecting best primers"
|
378
|
+
kasp_container=Bio::DB::Primer3::KASPContainer.new
|
379
|
+
|
380
|
+
|
381
|
+
|
382
|
+
kasp_container.line_1= original_name
|
383
|
+
kasp_container.line_2= snp_in
|
384
|
+
|
385
|
+
if options[:scoring] == :het_dels
|
386
|
+
kasp_container.scores = Hash.new
|
387
|
+
kasp_container.scores[:chromosome_specific] = 0
|
388
|
+
kasp_container.scores[:chromosome_semispecific] = 1000
|
389
|
+
kasp_container.scores[:chromosome_nonspecific] = 100
|
390
|
+
end
|
391
|
+
|
392
|
+
snps.each do |snp|
|
393
|
+
snpk = kasp_container.add_snp(snp)
|
394
|
+
|
395
|
+
|
396
|
+
end
|
397
|
+
|
398
|
+
kasp_container.add_primers_file(primer_3_output) if added_exons > 0
|
399
|
+
header = "Marker,SNP,RegionSize,chromosome,total_contigs,contig_regions,SNP_type,#{original_name},#{snp_in},common,primer_type,orientation,#{original_name}_TM,#{snp_in}_TM,common_TM,selected_from,product_size,errors,repetitive,total_hits"
|
400
|
+
File.open(output_primers, 'w') { |f| f.write("#{header}\n#{kasp_container.print_primers}") }
|
401
|
+
File.open(output_to_order, "w") { |io| io.write(kasp_container.print_primers_with_tails())}
|
402
|
+
|
403
|
+
write_status "DONE"
|
404
|
+
rescue StandardError => e
|
405
|
+
write_status "ERROR\t#{e.message}"
|
406
|
+
raise e
|
407
|
+
rescue Exception => e
|
408
|
+
write_status "ERROR\t#{e.message}"
|
409
|
+
raise e
|
410
|
+
end
|