bio-polymarker 1.3.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (177) hide show
  1. checksums.yaml +7 -0
  2. data/.travis.yml +24 -0
  3. data/Gemfile +23 -0
  4. data/README.md +205 -0
  5. data/Rakefile +61 -0
  6. data/SECURITY.md +16 -0
  7. data/VERSION +1 -0
  8. data/bin/bfr.rb +128 -0
  9. data/bin/blast_triads.rb +166 -0
  10. data/bin/blast_triads_promoters.rb +192 -0
  11. data/bin/count_variations.rb +36 -0
  12. data/bin/filter_blat_by_target_coverage.rb +69 -0
  13. data/bin/filter_exonerate_by_identity.rb +38 -0
  14. data/bin/find_best_blat_hit.rb +33 -0
  15. data/bin/find_best_exonerate.rb +17 -0
  16. data/bin/get_longest_hsp_blastx_triads.rb +66 -0
  17. data/bin/hexaploid_primers.rb +168 -0
  18. data/bin/homokaryot_primers.rb +183 -0
  19. data/bin/mafft_triads.rb +120 -0
  20. data/bin/mafft_triads_promoters.rb +403 -0
  21. data/bin/map_markers_to_contigs.rb +66 -0
  22. data/bin/marker_to_vcf.rb +241 -0
  23. data/bin/markers_in_region.rb +42 -0
  24. data/bin/mask_triads.rb +169 -0
  25. data/bin/polymarker.rb +410 -0
  26. data/bin/polymarker_capillary.rb +443 -0
  27. data/bin/polymarker_deletions.rb +350 -0
  28. data/bin/snp_position_to_polymarker.rb +101 -0
  29. data/bin/snps_between_bams.rb +107 -0
  30. data/bin/tag_stats.rb +75 -0
  31. data/bin/vcfLineToTable.rb +56 -0
  32. data/bin/vcfToPolyMarker.rb +82 -0
  33. data/bio-polymarker.gemspec +227 -0
  34. data/conf/defaults.rb +1 -0
  35. data/conf/primer3_config/dangle.dh +128 -0
  36. data/conf/primer3_config/dangle.ds +128 -0
  37. data/conf/primer3_config/interpretations/dangle_i.dh +131 -0
  38. data/conf/primer3_config/interpretations/dangle_i.ds +131 -0
  39. data/conf/primer3_config/interpretations/loops_i.dh +34 -0
  40. data/conf/primer3_config/interpretations/loops_i.ds +31 -0
  41. data/conf/primer3_config/interpretations/stack_i.dh +257 -0
  42. data/conf/primer3_config/interpretations/stack_i.ds +256 -0
  43. data/conf/primer3_config/interpretations/stackmm_i_mm.dh +257 -0
  44. data/conf/primer3_config/interpretations/stackmm_i_mm.ds +256 -0
  45. data/conf/primer3_config/interpretations/tetraloop_i.dh +79 -0
  46. data/conf/primer3_config/interpretations/tetraloop_i.ds +81 -0
  47. data/conf/primer3_config/interpretations/triloop_i.dh +21 -0
  48. data/conf/primer3_config/interpretations/triloop_i.ds +18 -0
  49. data/conf/primer3_config/interpretations/tstack2_i.dh +256 -0
  50. data/conf/primer3_config/interpretations/tstack2_i.ds +256 -0
  51. data/conf/primer3_config/interpretations/tstack_i.dh +256 -0
  52. data/conf/primer3_config/interpretations/tstack_i.ds +256 -0
  53. data/conf/primer3_config/interpretations/tstack_tm_inf_i.dh +256 -0
  54. data/conf/primer3_config/interpretations/tstack_tm_inf_i.ds +256 -0
  55. data/conf/primer3_config/loops.dh +30 -0
  56. data/conf/primer3_config/loops.ds +30 -0
  57. data/conf/primer3_config/stack.dh +256 -0
  58. data/conf/primer3_config/stack.ds +256 -0
  59. data/conf/primer3_config/stackmm.dh +256 -0
  60. data/conf/primer3_config/stackmm.ds +256 -0
  61. data/conf/primer3_config/tetraloop.dh +77 -0
  62. data/conf/primer3_config/tetraloop.ds +77 -0
  63. data/conf/primer3_config/triloop.dh +16 -0
  64. data/conf/primer3_config/triloop.ds +16 -0
  65. data/conf/primer3_config/tstack.dh +256 -0
  66. data/conf/primer3_config/tstack2.dh +256 -0
  67. data/conf/primer3_config/tstack2.ds +256 -0
  68. data/conf/primer3_config/tstack_tm_inf.ds +256 -0
  69. data/lib/bio/BFRTools.rb +465 -0
  70. data/lib/bio/BIOExtensions.rb +153 -0
  71. data/lib/bio/PolyploidTools/ChromosomeArm.rb +63 -0
  72. data/lib/bio/PolyploidTools/ExonContainer.rb +245 -0
  73. data/lib/bio/PolyploidTools/Marker.rb +175 -0
  74. data/lib/bio/PolyploidTools/Mask.rb +116 -0
  75. data/lib/bio/PolyploidTools/NoSNPSequence.rb +292 -0
  76. data/lib/bio/PolyploidTools/PrimerRegion.rb +30 -0
  77. data/lib/bio/PolyploidTools/SNP.rb +804 -0
  78. data/lib/bio/PolyploidTools/SNPMutant.rb +86 -0
  79. data/lib/bio/PolyploidTools/SNPSequence.rb +55 -0
  80. data/lib/bio/db/blast.rb +114 -0
  81. data/lib/bio/db/exonerate.rb +333 -0
  82. data/lib/bio/db/primer3.rb +820 -0
  83. data/lib/bio-polymarker.rb +28 -0
  84. data/test/data/7B_amplicon_test.fa +12 -0
  85. data/test/data/7B_amplicon_test.fa.fai +1 -0
  86. data/test/data/7B_amplicon_test_reference.fa +110 -0
  87. data/test/data/7B_amplicon_test_reference.fa.fai +3 -0
  88. data/test/data/7B_marker_test.txt +1 -0
  89. data/test/data/BS00068396_51.fa +2 -0
  90. data/test/data/BS00068396_51_blast.tab +4 -0
  91. data/test/data/BS00068396_51_contigs.aln +1412 -0
  92. data/test/data/BS00068396_51_contigs.dnd +7 -0
  93. data/test/data/BS00068396_51_contigs.fa +8 -0
  94. data/test/data/BS00068396_51_contigs.fa.fai +4 -0
  95. data/test/data/BS00068396_51_contigs.fa.nhr +0 -0
  96. data/test/data/BS00068396_51_contigs.fa.nin +0 -0
  97. data/test/data/BS00068396_51_contigs.fa.nsq +0 -0
  98. data/test/data/BS00068396_51_contigs.nhr +0 -0
  99. data/test/data/BS00068396_51_contigs.nin +0 -0
  100. data/test/data/BS00068396_51_contigs.nsq +0 -0
  101. data/test/data/BS00068396_51_exonerate.tab +6 -0
  102. data/test/data/BS00068396_51_for_polymarker.txt +1 -0
  103. data/test/data/BS00068396_51_genes.txt +14 -0
  104. data/test/data/IWGSC_CSS_1AL_scaff_1455974.fa +112 -0
  105. data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa +2304 -0
  106. data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa.fai +11 -0
  107. data/test/data/LIB1716.bam +0 -0
  108. data/test/data/LIB1716.bam.bai +0 -0
  109. data/test/data/LIB1719.bam +0 -0
  110. data/test/data/LIB1719.bam.bai +0 -0
  111. data/test/data/LIB1721.bam +0 -0
  112. data/test/data/LIB1721.bam.bai +0 -0
  113. data/test/data/LIB1722.bam +0 -0
  114. data/test/data/LIB1722.bam.bai +0 -0
  115. data/test/data/PST130_7067.csv +1 -0
  116. data/test/data/PST130_7067.fa +2 -0
  117. data/test/data/PST130_7067.fa.fai +1 -0
  118. data/test/data/PST130_7067.fa.ndb +0 -0
  119. data/test/data/PST130_7067.fa.nhr +0 -0
  120. data/test/data/PST130_7067.fa.nin +0 -0
  121. data/test/data/PST130_7067.fa.not +0 -0
  122. data/test/data/PST130_7067.fa.nsq +0 -0
  123. data/test/data/PST130_7067.fa.ntf +0 -0
  124. data/test/data/PST130_7067.fa.nto +0 -0
  125. data/test/data/PST130_reverse_primer.csv +1 -0
  126. data/test/data/S22380157.fa +16 -0
  127. data/test/data/S22380157.fa.fai +1 -0
  128. data/test/data/S22380157.vcf +67 -0
  129. data/test/data/S58861868/LIB1716.bam +0 -0
  130. data/test/data/S58861868/LIB1716.sam +651 -0
  131. data/test/data/S58861868/LIB1719.bam +0 -0
  132. data/test/data/S58861868/LIB1719.sam +805 -0
  133. data/test/data/S58861868/LIB1721.bam +0 -0
  134. data/test/data/S58861868/LIB1721.sam +1790 -0
  135. data/test/data/S58861868/LIB1722.bam +0 -0
  136. data/test/data/S58861868/LIB1722.sam +1271 -0
  137. data/test/data/S58861868/S58861868.fa +16 -0
  138. data/test/data/S58861868/S58861868.fa.fai +1 -0
  139. data/test/data/S58861868/S58861868.vcf +76 -0
  140. data/test/data/S58861868/header.txt +9 -0
  141. data/test/data/S58861868/merged.bam +0 -0
  142. data/test/data/S58861868/merged_reheader.bam +0 -0
  143. data/test/data/S58861868/merged_reheader.bam.bai +0 -0
  144. data/test/data/Test3Aspecific.csv +2 -0
  145. data/test/data/Test3Aspecific_contigs.fa +6 -0
  146. data/test/data/bfr_out_test.csv +5 -0
  147. data/test/data/chr1A_C1145499T/chr1A_C1145499T.csv +1 -0
  148. data/test/data/chr1A_G540414846C/chr1A_G540414846C.csv +1 -0
  149. data/test/data/chr1A_G540414846C/chr1A_G540414846C.fa +2 -0
  150. data/test/data/chr1A_T517634750C/chr1A_T517634750C.csv +1 -0
  151. data/test/data/chr2D_C112180134A/chr2D_C112180134A.csv +1 -0
  152. data/test/data/chr4D_C14473543T/chr4D_C14473543T.csv +1 -0
  153. data/test/data/chr4D_C14473543T/chr4D_C14473543T.fa +2 -0
  154. data/test/data/headerMergeed.txt +9 -0
  155. data/test/data/headerS2238015 +1 -0
  156. data/test/data/mergedLibs.bam +0 -0
  157. data/test/data/mergedLibsReheader.bam +0 -0
  158. data/test/data/mergedLibsSorted.bam +0 -0
  159. data/test/data/mergedLibsSorted.bam.bai +0 -0
  160. data/test/data/patological_cases5D.csv +1 -0
  161. data/test/data/primer_3_input_header_test +5 -0
  162. data/test/data/short_primer_design_test.csv +10 -0
  163. data/test/data/some_tests/some_tests.csv +201 -0
  164. data/test/data/test_from_mutant.csv +3 -0
  165. data/test/data/test_iselect.csv +196 -0
  166. data/test/data/test_iselect_reference.fa +1868 -0
  167. data/test/data/test_iselect_reference.fa.fai +934 -0
  168. data/test/data/test_primer3_error.csv +4 -0
  169. data/test/data/test_primer3_error_contigs.fa +10 -0
  170. data/test/test_bfr.rb +135 -0
  171. data/test/test_blast.rb +47 -0
  172. data/test/test_exon_container.rb +17 -0
  173. data/test/test_exonearate.rb +48 -0
  174. data/test/test_integration.rb +76 -0
  175. data/test/test_snp_parsing.rb +121 -0
  176. data/test/test_wrong_selection.sh +5 -0
  177. metadata +356 -0
data/bin/polymarker.rb ADDED
@@ -0,0 +1,410 @@
1
+ #!/usr/bin/env ruby
2
+ require 'bio'
3
+ require 'rubygems'
4
+ require 'pathname'
5
+ require 'bio-samtools-wrapper'
6
+ require 'optparse'
7
+ require 'set'
8
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
9
+ $: << File.expand_path('.')
10
+ path= File.expand_path(File.dirname(__FILE__) + '/../lib/bio-polymarker.rb')
11
+ require path
12
+
13
+
14
+
15
+ def validate_files(o)
16
+ [
17
+ o[:path_to_contigs],
18
+ o[:marker_list],
19
+ o[:snp_list],
20
+ o[:mutant_list],
21
+ o[:reference]
22
+ ].flatten.compact.each do |f|
23
+ raise IOError.new "Unable to read #{f}" unless File.exist? f
24
+ end
25
+ end
26
+
27
+ options = {}
28
+ options[:path_to_contigs] = "/tgac/references/external/projects/iwgsc/css/IWGSC_CSS_all_scaff_v1.fa"
29
+ options[:chunks] = 1
30
+ options[:bucket_size] = 0
31
+ options[:bucket] = 1
32
+ options[:model] = "est2genome"
33
+ options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection("nrgene");
34
+ options[:flanking_size] = 150;
35
+ options[:variation_free_region] = 0
36
+ options[:extract_found_contigs] = false
37
+ options[:genomes_count] = 3
38
+ options[:min_identity] = 90
39
+ options[:scoring] = :genome_specific
40
+ options[:database] = false
41
+ options[:filter_best] = false
42
+ options[:aligner] = :blast
43
+ options[:max_hits] = 8
44
+ options[:max_specific_primers] = 20
45
+ options[:primer_3_preferences] = {
46
+ :primer_product_size_range => "50-150" ,
47
+ :primer_max_size => 25 ,
48
+ :primer_lib_ambiguity_codes_consensus => 1,
49
+ :primer_liberal_base => 1,
50
+ :primer_num_return=>5,
51
+ :primer_explain_flag => 1,
52
+ :primer_thermodynamic_parameters_path=>File.expand_path(File.dirname(__FILE__) + '../../conf/primer3_config/') + '/'
53
+ }
54
+
55
+
56
+
57
+ OptionParser.new do |opts|
58
+ opts.banner = "Usage: polymarker.rb [options]"
59
+
60
+ opts.on("-c", "--contigs FILE", "File with contigs to use as database") do |o|
61
+ options[:path_to_contigs] = o
62
+ end
63
+
64
+ opts.on("-m", "--marker_list FILE", "File with the list of markers to search from") do |o|
65
+ options[:marker_list] = o
66
+ end
67
+
68
+ opts.on("-g", "--genomes_count INT", "Number of genomes (default 3, for hexaploid)") do |o|
69
+ options[:genomes_count] = o.to_i
70
+ end
71
+
72
+ opts.on("-b", "--filter_best", "If set, only keep the best alignment for each chromosome") do
73
+ options[:filter_best] = true
74
+ end
75
+
76
+
77
+ opts.on("-s", "--snp_list FILE", "File with the list of snps to search from, requires --reference to get the sequence using a position") do |o|
78
+ options[:snp_list] = o
79
+ end
80
+
81
+ opts.on("-t", "--mutant_list FILE", "File with the list of positions with mutation and the mutation line.\n\
82
+ requires --reference to get the sequence using a position") do |o|
83
+ options[:mutant_list] = o
84
+ end
85
+
86
+ opts.on("-r", "--reference FILE", "Fasta file with the sequence for the markers (to complement --snp_list)") do |o|
87
+ options[:reference] = o
88
+ end
89
+
90
+ opts.on("-i", "--min_identity INT", "Minimum identity to consider a hit (default 90)") do |o|
91
+ options[:min_identity] = o.to_i
92
+ end
93
+
94
+ opts.on("-o", "--output FOLDER", "Output folder") do |o|
95
+ options[:output_folder] = o
96
+ end
97
+
98
+ opts.on("-e", "--exonerate_model MODEL", "Model to be used in exonerate to search for the contigs") do |o|
99
+ options[:model] = o
100
+ end
101
+
102
+ opts.on("-a", "--arm_selection #{Bio::PolyploidTools::ChromosomeArm.getValidFunctions.join('|')}", "Function to decide the chromome arm") do |o|
103
+ options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection(o)
104
+ end
105
+
106
+ opts.on("-p", "--primer_3_preferences FILE", "file with preferences to be sent to primer3") do |o|
107
+ options[:primer_3_preferences] = Bio::DB::Primer3.read_primer_preferences(o, options[:primer_3_preferences] )
108
+ end
109
+
110
+ opts.on("-v", "--variation_free_region INT", "If present, avoid generating the common primer if there are homoeologous SNPs within the specified distance") do |o|
111
+ options[:variation_free_region] = o.to_i
112
+ end
113
+
114
+ opts.on("-x", "--extract_found_contigs", "If present, save in a separate file the contigs with matches. Useful to debug.") do |o|
115
+ options[:extract_found_contigs] = true
116
+ end
117
+
118
+ opts.on("-P", "--primers_to_order", "If present, save a separate file with the primers with the KASP tails")do
119
+ #TODO: have a string with the tails, optional.
120
+ options[:primers_to_order] = true
121
+ end
122
+
123
+ opts.on("-H", "--het_dels", "If present, change the scoring to give priority to: semi-specific, specific, non-specific") do
124
+ options[:scoring] = :het_dels
125
+ end
126
+
127
+ opts.on("-A", "--aligner exonerate|blast", "Select the aligner to use. Default: #{options[:aligner]}") do |o|
128
+ raise "Invalid aligner" unless o == "exonerate" or o == "blast"
129
+ options[:aligner] = o.to_sym
130
+ end
131
+
132
+ opts.on("-d", "--database PREFIX", "Path to the blast database. Only used if the aligner is blast. The default is the name of the contigs file without extension.") do |o|
133
+ options[:database] = o
134
+ end
135
+
136
+ opts.on("-H", "--max_hits INT", "Maximum number of hits to the reference. If there are more hits than this value, the marker is ignored") do |o|
137
+ options[:max_hits] = o.to_i
138
+ end
139
+
140
+ opts.on("-S", "--max_specific_primers INT", "Maximum number of candidate primers to attempt to design. Default: #{options[:max_specific_primers]} ") do |o|
141
+ options[:max_specific_primers] = o.to_i
142
+ end
143
+
144
+ end.parse!
145
+
146
+
147
+ validate_files(options)
148
+
149
+ options[:database] = options[:path_to_contigs] unless options[:database]
150
+
151
+
152
+ if options[:primer_3_preferences][:primer_product_size_range]
153
+ range = options[:primer_3_preferences][:primer_product_size_range]
154
+ range_arr = range.split("-")
155
+ min = range_arr[0].to_i
156
+ max = range_arr[1].to_i
157
+ raise Bio::DB::Exonerate::ExonerateException.new "Range #{range} is invalid!" unless max > min
158
+ options[:flanking_size] = max
159
+ end
160
+
161
+ #p options
162
+ #p ARGV
163
+
164
+
165
+ #TODO: Use temporary files somewhere in the file system and add traps to delete them/forward them as a result.
166
+ #TODO: Make all this parameters
167
+
168
+ path_to_contigs=options[:path_to_contigs]
169
+
170
+ original_name="A"
171
+ snp_in="B"
172
+
173
+ fasta_reference = nil
174
+ #test_file="/Users/ramirezr/Dropbox/JIC/PrimersToTest/test_primers_nick_and_james_1.csv"
175
+ test_file=options[:marker_list] if options[:marker_list]
176
+ test_file=options[:snp_list] if options[:snp_list]
177
+ test_file=options[:mutant_list] if options[:mutant_list]
178
+ fasta_reference = options[:reference]
179
+ output_folder="#{test_file}_primer_design_#{Time.now.strftime('%Y%m%d-%H%M%S')}"
180
+ output_folder= options[:output_folder] if options[:output_folder]
181
+ Dir.mkdir(output_folder) unless Dir.exist?(output_folder)
182
+ #TODO Make this tmp files
183
+ temp_fasta_query="#{output_folder}/to_align.fa"
184
+ temp_contigs="#{output_folder}/contigs_tmp.fa"
185
+ exonerate_file="#{output_folder}/exonerate_tmp.tab"
186
+ primer_3_input="#{output_folder}/primer_3_input_temp"
187
+ primer_3_output="#{output_folder}/primer_3_output_temp"
188
+ exons_filename="#{output_folder}/exons_genes_and_contigs.fa"
189
+ output_primers="#{output_folder}/primers.csv"
190
+ output_to_order="#{output_folder}/primers_to_order.csv"
191
+ min_identity= options[:min_identity]
192
+
193
+ @status_file="#{output_folder}/status.txt"
194
+
195
+ primer_3_config=File.expand_path(File.dirname(__FILE__) + '/../conf/primer3_config')
196
+ model=options[:model]
197
+
198
+ def write_status(status)
199
+ f=File.open(@status_file, "a")
200
+ f.puts "#{Time.now.to_s},#{status}"
201
+ f.close
202
+ end
203
+
204
+ Signal.trap("ABRT") do
205
+ write_status "ERROR: Job aborted. Please try a small number of primers."
206
+ Signal.trap("SIGABRT", "DEFAULT") # restore handler
207
+ Process.kill("ABRT", 0)
208
+ end
209
+
210
+ Signal.trap("TERM") do
211
+ write_status "ERROR: Job terminated. Please try a small number of primers."
212
+ Signal.trap("SIGTERM", "DEFAULT") # restore handler
213
+ exit
214
+ end
215
+
216
+ snps = Array.new
217
+
218
+ begin
219
+
220
+ write_status "Loading Reference"
221
+ #0. Load the fasta index
222
+ fasta_reference_db = nil
223
+ if fasta_reference
224
+ fasta_reference_db = Bio::DB::Fasta::FastaFile.new({:fasta=>fasta_reference})
225
+ fasta_reference_db.load_fai_entries
226
+ write_status "Fasta reference: #{fasta_reference}"
227
+ end
228
+
229
+ #1. Read all the SNP files
230
+ #chromosome = nil
231
+ write_status "Reading SNPs"
232
+ File.open(test_file) do | f |
233
+ f.each_line do | line |
234
+ # p line.chomp!
235
+ snp = nil
236
+ if options[:marker_list] #List with Sequence
237
+ snp = Bio::PolyploidTools::SNPSequence.parse(line)
238
+ elsif options[:snp_list] and options[:reference] #List and fasta file
239
+ snp = Bio::PolyploidTools::SNP.parse(line)
240
+ entry = fasta_reference_db.index.region_for_entry(snp.gene)
241
+ if entry
242
+ region = fasta_reference_db.index.region_for_entry(snp.gene).get_full_region
243
+ snp.template_sequence = fasta_reference_db.fetch_sequence(region)
244
+ else
245
+ write_status "WARN: Unable to find entry for #{snp.gene}"
246
+ end
247
+ elsif options[:mutant_list] and options[:reference] #List and fasta file
248
+ snp = Bio::PolyploidTools::SNPMutant.parse(line)
249
+ entry = fasta_reference_db.index.region_for_entry(snp.contig)
250
+ if entry
251
+ region = fasta_reference_db.index.region_for_entry(snp.contig).get_full_region
252
+ snp.full_sequence = fasta_reference_db.fetch_sequence(region)
253
+ else
254
+ write_status "WARN: Unable to find entry for #{snp.gene}"
255
+ end
256
+ else
257
+ raise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
258
+ end
259
+ raise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
260
+ snp.max_hits = options[:max_hits]
261
+ snp.genomes_count = options[:genomes_count]
262
+ snp.snp_in = snp_in
263
+ snp.original_name = original_name
264
+ if snp.position
265
+ snps << snp
266
+ else
267
+ $stderr.puts "ERROR: #{snp.gene} doesn't contain a SNP"
268
+ end
269
+ end
270
+ end
271
+
272
+ #1.1 Close fasta file
273
+ #fasta_reference_db.close() if fasta_reference_db
274
+ #2. Generate all the fasta files
275
+ write_status "Writing sequences to align"
276
+ written_seqs = Set.new
277
+ file = File.open(temp_fasta_query, "w")
278
+ snps.each do |snp|
279
+ unless written_seqs.include?(snp.gene)
280
+ written_seqs << snp.gene
281
+ file.puts snp.to_fasta
282
+ end
283
+ end
284
+ file.close
285
+
286
+ #3. Run exonerate on each of the possible chromosomes for the SNP
287
+ #puts chromosome
288
+ #chr_group = chromosome[0]
289
+ write_status "Searching markers in genome"
290
+ exo_f = File.open(exonerate_file, "w")
291
+ contigs_f = nil
292
+ contigs_f = File.open(temp_contigs, "w") if options[:extract_found_contigs]
293
+ filename=path_to_contigs
294
+ #puts filename
295
+ target=filename
296
+
297
+ fasta_file = Bio::DB::Fasta::FastaFile.new(fasta: target)
298
+ fasta_file.load_fai_entries
299
+
300
+ found_contigs = Set.new
301
+
302
+
303
+ def do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options, contigs_f: nil)
304
+ if aln.identity > min_identity
305
+ exo_f.puts aln.line
306
+ unless found_contigs.include?(aln.target_id) #We only add once each contig. Should reduce the size of the output file.
307
+ found_contigs.add(aln.target_id)
308
+ entry = fasta_file.index.region_for_entry(aln.target_id)
309
+ raise ExonerateException.new, "Entry not found! #{aln.target_id}. Make sure that the #{target_id}.fai was generated properly." if entry == nil
310
+ if options[:extract_found_contigs]
311
+ region = entry.get_full_region
312
+ seq = fasta_file.fetch_sequence(region)
313
+ contigs_f.puts(">#{aln.target_id}\n#{seq}")
314
+ end
315
+ end
316
+ end
317
+
318
+ end
319
+
320
+ Bio::DB::Blast.align({:query=>temp_fasta_query, :target=>options[:database], :model=>model, :max_hits=>options[:max_hits]}) do |aln|
321
+ do_align(aln, exo_f, found_contigs,min_identity, fasta_file,options, contigs_f: contigs_f)
322
+ end if options[:aligner] == :blast
323
+
324
+ Bio::DB::Exonerate.align({:query=>temp_fasta_query, :target=>target, :model=>model}) do |aln|
325
+ do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options, contigs_f: contigs_f)
326
+ end if options[:aligner] == :exonerate
327
+
328
+ exo_f.close()
329
+
330
+
331
+
332
+ exo_f.close()
333
+ contigs_f.close() if options[:extract_found_contigs]
334
+
335
+ #4. Load all the results from exonerate and get the input filename for primer3
336
+ #Custom arm selection function that only uses the first two characters. Maybe
337
+ #we want to make it a bit more cleaver
338
+ write_status "Reading best alignment on each chromosome"
339
+
340
+
341
+ container= Bio::PolyploidTools::ExonContainer.new
342
+ container.flanking_size=options[:flanking_size]
343
+ container.gene_models(temp_fasta_query)
344
+ container.chromosomes(target)
345
+ container.add_parental({:name=>snp_in})
346
+ container.add_parental({:name=>original_name})
347
+ container.max_hits = options[:max_hits]
348
+ snps.each do |snp|
349
+ snp.container = container
350
+ snp.flanking_size = container.flanking_size
351
+ snp.variation_free_region = options[:variation_free_region]
352
+ container.add_snp(snp)
353
+ end
354
+ container.add_alignments({
355
+ :exonerate_file=>exonerate_file,
356
+ :arm_selection=>options[:arm_selection],
357
+ :min_identity=>min_identity,
358
+ :filter_best=>options[:filter_best]})
359
+
360
+
361
+ #4.1 generating primer3 file
362
+ write_status "Finding genome-specific positions"
363
+ file = File.open(exons_filename, "w")
364
+ container.print_fasta_snp_exones(file)
365
+ file.close
366
+ write_status "Running primer3"
367
+
368
+ file = File.open(primer_3_input, "w")
369
+
370
+ Bio::DB::Primer3.prepare_input_file(file, options[:primer_3_preferences])
371
+ added_exons = container.print_primer_3_exons(file, nil, snp_in, max_specific_primers: options[:max_specific_primers] )
372
+ file.close
373
+
374
+ Bio::DB::Primer3.run({:in=>primer_3_input, :out=>primer_3_output}) if added_exons > 0
375
+
376
+ #5. Pick the best primer and make the primer3 output
377
+ write_status "Selecting best primers"
378
+ kasp_container=Bio::DB::Primer3::KASPContainer.new
379
+
380
+
381
+
382
+ kasp_container.line_1= original_name
383
+ kasp_container.line_2= snp_in
384
+
385
+ if options[:scoring] == :het_dels
386
+ kasp_container.scores = Hash.new
387
+ kasp_container.scores[:chromosome_specific] = 0
388
+ kasp_container.scores[:chromosome_semispecific] = 1000
389
+ kasp_container.scores[:chromosome_nonspecific] = 100
390
+ end
391
+
392
+ snps.each do |snp|
393
+ snpk = kasp_container.add_snp(snp)
394
+
395
+
396
+ end
397
+
398
+ kasp_container.add_primers_file(primer_3_output) if added_exons > 0
399
+ header = "Marker,SNP,RegionSize,chromosome,total_contigs,contig_regions,SNP_type,#{original_name},#{snp_in},common,primer_type,orientation,#{original_name}_TM,#{snp_in}_TM,common_TM,selected_from,product_size,errors,repetitive,total_hits"
400
+ File.open(output_primers, 'w') { |f| f.write("#{header}\n#{kasp_container.print_primers}") }
401
+ File.open(output_to_order, "w") { |io| io.write(kasp_container.print_primers_with_tails())}
402
+
403
+ write_status "DONE"
404
+ rescue StandardError => e
405
+ write_status "ERROR\t#{e.message}"
406
+ raise e
407
+ rescue Exception => e
408
+ write_status "ERROR\t#{e.message}"
409
+ raise e
410
+ end