bio-polymarker 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. checksums.yaml +7 -0
  2. data/.travis.yml +24 -0
  3. data/Gemfile +23 -0
  4. data/README.md +205 -0
  5. data/Rakefile +61 -0
  6. data/SECURITY.md +16 -0
  7. data/VERSION +1 -0
  8. data/bin/bfr.rb +128 -0
  9. data/bin/blast_triads.rb +166 -0
  10. data/bin/blast_triads_promoters.rb +192 -0
  11. data/bin/count_variations.rb +36 -0
  12. data/bin/filter_blat_by_target_coverage.rb +69 -0
  13. data/bin/filter_exonerate_by_identity.rb +38 -0
  14. data/bin/find_best_blat_hit.rb +33 -0
  15. data/bin/find_best_exonerate.rb +17 -0
  16. data/bin/get_longest_hsp_blastx_triads.rb +66 -0
  17. data/bin/hexaploid_primers.rb +168 -0
  18. data/bin/homokaryot_primers.rb +183 -0
  19. data/bin/mafft_triads.rb +120 -0
  20. data/bin/mafft_triads_promoters.rb +403 -0
  21. data/bin/map_markers_to_contigs.rb +66 -0
  22. data/bin/marker_to_vcf.rb +241 -0
  23. data/bin/markers_in_region.rb +42 -0
  24. data/bin/mask_triads.rb +169 -0
  25. data/bin/polymarker.rb +410 -0
  26. data/bin/polymarker_capillary.rb +443 -0
  27. data/bin/polymarker_deletions.rb +350 -0
  28. data/bin/snp_position_to_polymarker.rb +101 -0
  29. data/bin/snps_between_bams.rb +107 -0
  30. data/bin/tag_stats.rb +75 -0
  31. data/bin/vcfLineToTable.rb +56 -0
  32. data/bin/vcfToPolyMarker.rb +82 -0
  33. data/bio-polymarker.gemspec +227 -0
  34. data/conf/defaults.rb +1 -0
  35. data/conf/primer3_config/dangle.dh +128 -0
  36. data/conf/primer3_config/dangle.ds +128 -0
  37. data/conf/primer3_config/interpretations/dangle_i.dh +131 -0
  38. data/conf/primer3_config/interpretations/dangle_i.ds +131 -0
  39. data/conf/primer3_config/interpretations/loops_i.dh +34 -0
  40. data/conf/primer3_config/interpretations/loops_i.ds +31 -0
  41. data/conf/primer3_config/interpretations/stack_i.dh +257 -0
  42. data/conf/primer3_config/interpretations/stack_i.ds +256 -0
  43. data/conf/primer3_config/interpretations/stackmm_i_mm.dh +257 -0
  44. data/conf/primer3_config/interpretations/stackmm_i_mm.ds +256 -0
  45. data/conf/primer3_config/interpretations/tetraloop_i.dh +79 -0
  46. data/conf/primer3_config/interpretations/tetraloop_i.ds +81 -0
  47. data/conf/primer3_config/interpretations/triloop_i.dh +21 -0
  48. data/conf/primer3_config/interpretations/triloop_i.ds +18 -0
  49. data/conf/primer3_config/interpretations/tstack2_i.dh +256 -0
  50. data/conf/primer3_config/interpretations/tstack2_i.ds +256 -0
  51. data/conf/primer3_config/interpretations/tstack_i.dh +256 -0
  52. data/conf/primer3_config/interpretations/tstack_i.ds +256 -0
  53. data/conf/primer3_config/interpretations/tstack_tm_inf_i.dh +256 -0
  54. data/conf/primer3_config/interpretations/tstack_tm_inf_i.ds +256 -0
  55. data/conf/primer3_config/loops.dh +30 -0
  56. data/conf/primer3_config/loops.ds +30 -0
  57. data/conf/primer3_config/stack.dh +256 -0
  58. data/conf/primer3_config/stack.ds +256 -0
  59. data/conf/primer3_config/stackmm.dh +256 -0
  60. data/conf/primer3_config/stackmm.ds +256 -0
  61. data/conf/primer3_config/tetraloop.dh +77 -0
  62. data/conf/primer3_config/tetraloop.ds +77 -0
  63. data/conf/primer3_config/triloop.dh +16 -0
  64. data/conf/primer3_config/triloop.ds +16 -0
  65. data/conf/primer3_config/tstack.dh +256 -0
  66. data/conf/primer3_config/tstack2.dh +256 -0
  67. data/conf/primer3_config/tstack2.ds +256 -0
  68. data/conf/primer3_config/tstack_tm_inf.ds +256 -0
  69. data/lib/bio/BFRTools.rb +465 -0
  70. data/lib/bio/BIOExtensions.rb +153 -0
  71. data/lib/bio/PolyploidTools/ChromosomeArm.rb +63 -0
  72. data/lib/bio/PolyploidTools/ExonContainer.rb +245 -0
  73. data/lib/bio/PolyploidTools/Marker.rb +175 -0
  74. data/lib/bio/PolyploidTools/Mask.rb +116 -0
  75. data/lib/bio/PolyploidTools/NoSNPSequence.rb +292 -0
  76. data/lib/bio/PolyploidTools/PrimerRegion.rb +30 -0
  77. data/lib/bio/PolyploidTools/SNP.rb +804 -0
  78. data/lib/bio/PolyploidTools/SNPMutant.rb +86 -0
  79. data/lib/bio/PolyploidTools/SNPSequence.rb +55 -0
  80. data/lib/bio/db/blast.rb +114 -0
  81. data/lib/bio/db/exonerate.rb +333 -0
  82. data/lib/bio/db/primer3.rb +820 -0
  83. data/lib/bio-polymarker.rb +28 -0
  84. data/test/data/7B_amplicon_test.fa +12 -0
  85. data/test/data/7B_amplicon_test.fa.fai +1 -0
  86. data/test/data/7B_amplicon_test_reference.fa +110 -0
  87. data/test/data/7B_amplicon_test_reference.fa.fai +3 -0
  88. data/test/data/7B_marker_test.txt +1 -0
  89. data/test/data/BS00068396_51.fa +2 -0
  90. data/test/data/BS00068396_51_blast.tab +4 -0
  91. data/test/data/BS00068396_51_contigs.aln +1412 -0
  92. data/test/data/BS00068396_51_contigs.dnd +7 -0
  93. data/test/data/BS00068396_51_contigs.fa +8 -0
  94. data/test/data/BS00068396_51_contigs.fa.fai +4 -0
  95. data/test/data/BS00068396_51_contigs.fa.nhr +0 -0
  96. data/test/data/BS00068396_51_contigs.fa.nin +0 -0
  97. data/test/data/BS00068396_51_contigs.fa.nsq +0 -0
  98. data/test/data/BS00068396_51_contigs.nhr +0 -0
  99. data/test/data/BS00068396_51_contigs.nin +0 -0
  100. data/test/data/BS00068396_51_contigs.nsq +0 -0
  101. data/test/data/BS00068396_51_exonerate.tab +6 -0
  102. data/test/data/BS00068396_51_for_polymarker.txt +1 -0
  103. data/test/data/BS00068396_51_genes.txt +14 -0
  104. data/test/data/IWGSC_CSS_1AL_scaff_1455974.fa +112 -0
  105. data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa +2304 -0
  106. data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa.fai +11 -0
  107. data/test/data/LIB1716.bam +0 -0
  108. data/test/data/LIB1716.bam.bai +0 -0
  109. data/test/data/LIB1719.bam +0 -0
  110. data/test/data/LIB1719.bam.bai +0 -0
  111. data/test/data/LIB1721.bam +0 -0
  112. data/test/data/LIB1721.bam.bai +0 -0
  113. data/test/data/LIB1722.bam +0 -0
  114. data/test/data/LIB1722.bam.bai +0 -0
  115. data/test/data/PST130_7067.csv +1 -0
  116. data/test/data/PST130_7067.fa +2 -0
  117. data/test/data/PST130_7067.fa.fai +1 -0
  118. data/test/data/PST130_7067.fa.ndb +0 -0
  119. data/test/data/PST130_7067.fa.nhr +0 -0
  120. data/test/data/PST130_7067.fa.nin +0 -0
  121. data/test/data/PST130_7067.fa.not +0 -0
  122. data/test/data/PST130_7067.fa.nsq +0 -0
  123. data/test/data/PST130_7067.fa.ntf +0 -0
  124. data/test/data/PST130_7067.fa.nto +0 -0
  125. data/test/data/PST130_reverse_primer.csv +1 -0
  126. data/test/data/S22380157.fa +16 -0
  127. data/test/data/S22380157.fa.fai +1 -0
  128. data/test/data/S22380157.vcf +67 -0
  129. data/test/data/S58861868/LIB1716.bam +0 -0
  130. data/test/data/S58861868/LIB1716.sam +651 -0
  131. data/test/data/S58861868/LIB1719.bam +0 -0
  132. data/test/data/S58861868/LIB1719.sam +805 -0
  133. data/test/data/S58861868/LIB1721.bam +0 -0
  134. data/test/data/S58861868/LIB1721.sam +1790 -0
  135. data/test/data/S58861868/LIB1722.bam +0 -0
  136. data/test/data/S58861868/LIB1722.sam +1271 -0
  137. data/test/data/S58861868/S58861868.fa +16 -0
  138. data/test/data/S58861868/S58861868.fa.fai +1 -0
  139. data/test/data/S58861868/S58861868.vcf +76 -0
  140. data/test/data/S58861868/header.txt +9 -0
  141. data/test/data/S58861868/merged.bam +0 -0
  142. data/test/data/S58861868/merged_reheader.bam +0 -0
  143. data/test/data/S58861868/merged_reheader.bam.bai +0 -0
  144. data/test/data/Test3Aspecific.csv +2 -0
  145. data/test/data/Test3Aspecific_contigs.fa +6 -0
  146. data/test/data/bfr_out_test.csv +5 -0
  147. data/test/data/chr1A_C1145499T/chr1A_C1145499T.csv +1 -0
  148. data/test/data/chr1A_G540414846C/chr1A_G540414846C.csv +1 -0
  149. data/test/data/chr1A_G540414846C/chr1A_G540414846C.fa +2 -0
  150. data/test/data/chr1A_T517634750C/chr1A_T517634750C.csv +1 -0
  151. data/test/data/chr2D_C112180134A/chr2D_C112180134A.csv +1 -0
  152. data/test/data/chr4D_C14473543T/chr4D_C14473543T.csv +1 -0
  153. data/test/data/chr4D_C14473543T/chr4D_C14473543T.fa +2 -0
  154. data/test/data/headerMergeed.txt +9 -0
  155. data/test/data/headerS2238015 +1 -0
  156. data/test/data/mergedLibs.bam +0 -0
  157. data/test/data/mergedLibsReheader.bam +0 -0
  158. data/test/data/mergedLibsSorted.bam +0 -0
  159. data/test/data/mergedLibsSorted.bam.bai +0 -0
  160. data/test/data/patological_cases5D.csv +1 -0
  161. data/test/data/primer_3_input_header_test +5 -0
  162. data/test/data/short_primer_design_test.csv +10 -0
  163. data/test/data/some_tests/some_tests.csv +201 -0
  164. data/test/data/test_from_mutant.csv +3 -0
  165. data/test/data/test_iselect.csv +196 -0
  166. data/test/data/test_iselect_reference.fa +1868 -0
  167. data/test/data/test_iselect_reference.fa.fai +934 -0
  168. data/test/data/test_primer3_error.csv +4 -0
  169. data/test/data/test_primer3_error_contigs.fa +10 -0
  170. data/test/test_bfr.rb +135 -0
  171. data/test/test_blast.rb +47 -0
  172. data/test/test_exon_container.rb +17 -0
  173. data/test/test_exonearate.rb +48 -0
  174. data/test/test_integration.rb +76 -0
  175. data/test/test_snp_parsing.rb +121 -0
  176. data/test/test_wrong_selection.sh +5 -0
  177. metadata +356 -0
data/bin/polymarker.rb ADDED
@@ -0,0 +1,410 @@
1
+ #!/usr/bin/env ruby
2
+ require 'bio'
3
+ require 'rubygems'
4
+ require 'pathname'
5
+ require 'bio-samtools-wrapper'
6
+ require 'optparse'
7
+ require 'set'
8
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
9
+ $: << File.expand_path('.')
10
+ path= File.expand_path(File.dirname(__FILE__) + '/../lib/bio-polymarker.rb')
11
+ require path
12
+
13
+
14
+
15
+ def validate_files(o)
16
+ [
17
+ o[:path_to_contigs],
18
+ o[:marker_list],
19
+ o[:snp_list],
20
+ o[:mutant_list],
21
+ o[:reference]
22
+ ].flatten.compact.each do |f|
23
+ raise IOError.new "Unable to read #{f}" unless File.exist? f
24
+ end
25
+ end
26
+
27
+ options = {}
28
+ options[:path_to_contigs] = "/tgac/references/external/projects/iwgsc/css/IWGSC_CSS_all_scaff_v1.fa"
29
+ options[:chunks] = 1
30
+ options[:bucket_size] = 0
31
+ options[:bucket] = 1
32
+ options[:model] = "est2genome"
33
+ options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection("nrgene");
34
+ options[:flanking_size] = 150;
35
+ options[:variation_free_region] = 0
36
+ options[:extract_found_contigs] = false
37
+ options[:genomes_count] = 3
38
+ options[:min_identity] = 90
39
+ options[:scoring] = :genome_specific
40
+ options[:database] = false
41
+ options[:filter_best] = false
42
+ options[:aligner] = :blast
43
+ options[:max_hits] = 8
44
+ options[:max_specific_primers] = 20
45
+ options[:primer_3_preferences] = {
46
+ :primer_product_size_range => "50-150" ,
47
+ :primer_max_size => 25 ,
48
+ :primer_lib_ambiguity_codes_consensus => 1,
49
+ :primer_liberal_base => 1,
50
+ :primer_num_return=>5,
51
+ :primer_explain_flag => 1,
52
+ :primer_thermodynamic_parameters_path=>File.expand_path(File.dirname(__FILE__) + '../../conf/primer3_config/') + '/'
53
+ }
54
+
55
+
56
+
57
+ OptionParser.new do |opts|
58
+ opts.banner = "Usage: polymarker.rb [options]"
59
+
60
+ opts.on("-c", "--contigs FILE", "File with contigs to use as database") do |o|
61
+ options[:path_to_contigs] = o
62
+ end
63
+
64
+ opts.on("-m", "--marker_list FILE", "File with the list of markers to search from") do |o|
65
+ options[:marker_list] = o
66
+ end
67
+
68
+ opts.on("-g", "--genomes_count INT", "Number of genomes (default 3, for hexaploid)") do |o|
69
+ options[:genomes_count] = o.to_i
70
+ end
71
+
72
+ opts.on("-b", "--filter_best", "If set, only keep the best alignment for each chromosome") do
73
+ options[:filter_best] = true
74
+ end
75
+
76
+
77
+ opts.on("-s", "--snp_list FILE", "File with the list of snps to search from, requires --reference to get the sequence using a position") do |o|
78
+ options[:snp_list] = o
79
+ end
80
+
81
+ opts.on("-t", "--mutant_list FILE", "File with the list of positions with mutation and the mutation line.\n\
82
+ requires --reference to get the sequence using a position") do |o|
83
+ options[:mutant_list] = o
84
+ end
85
+
86
+ opts.on("-r", "--reference FILE", "Fasta file with the sequence for the markers (to complement --snp_list)") do |o|
87
+ options[:reference] = o
88
+ end
89
+
90
+ opts.on("-i", "--min_identity INT", "Minimum identity to consider a hit (default 90)") do |o|
91
+ options[:min_identity] = o.to_i
92
+ end
93
+
94
+ opts.on("-o", "--output FOLDER", "Output folder") do |o|
95
+ options[:output_folder] = o
96
+ end
97
+
98
+ opts.on("-e", "--exonerate_model MODEL", "Model to be used in exonerate to search for the contigs") do |o|
99
+ options[:model] = o
100
+ end
101
+
102
+ opts.on("-a", "--arm_selection #{Bio::PolyploidTools::ChromosomeArm.getValidFunctions.join('|')}", "Function to decide the chromome arm") do |o|
103
+ options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection(o)
104
+ end
105
+
106
+ opts.on("-p", "--primer_3_preferences FILE", "file with preferences to be sent to primer3") do |o|
107
+ options[:primer_3_preferences] = Bio::DB::Primer3.read_primer_preferences(o, options[:primer_3_preferences] )
108
+ end
109
+
110
+ opts.on("-v", "--variation_free_region INT", "If present, avoid generating the common primer if there are homoeologous SNPs within the specified distance") do |o|
111
+ options[:variation_free_region] = o.to_i
112
+ end
113
+
114
+ opts.on("-x", "--extract_found_contigs", "If present, save in a separate file the contigs with matches. Useful to debug.") do |o|
115
+ options[:extract_found_contigs] = true
116
+ end
117
+
118
+ opts.on("-P", "--primers_to_order", "If present, save a separate file with the primers with the KASP tails")do
119
+ #TODO: have a string with the tails, optional.
120
+ options[:primers_to_order] = true
121
+ end
122
+
123
+ opts.on("-H", "--het_dels", "If present, change the scoring to give priority to: semi-specific, specific, non-specific") do
124
+ options[:scoring] = :het_dels
125
+ end
126
+
127
+ opts.on("-A", "--aligner exonerate|blast", "Select the aligner to use. Default: #{options[:aligner]}") do |o|
128
+ raise "Invalid aligner" unless o == "exonerate" or o == "blast"
129
+ options[:aligner] = o.to_sym
130
+ end
131
+
132
+ opts.on("-d", "--database PREFIX", "Path to the blast database. Only used if the aligner is blast. The default is the name of the contigs file without extension.") do |o|
133
+ options[:database] = o
134
+ end
135
+
136
+ opts.on("-H", "--max_hits INT", "Maximum number of hits to the reference. If there are more hits than this value, the marker is ignored") do |o|
137
+ options[:max_hits] = o.to_i
138
+ end
139
+
140
+ opts.on("-S", "--max_specific_primers INT", "Maximum number of candidate primers to attempt to design. Default: #{options[:max_specific_primers]} ") do |o|
141
+ options[:max_specific_primers] = o.to_i
142
+ end
143
+
144
+ end.parse!
145
+
146
+
147
+ validate_files(options)
148
+
149
+ options[:database] = options[:path_to_contigs] unless options[:database]
150
+
151
+
152
+ if options[:primer_3_preferences][:primer_product_size_range]
153
+ range = options[:primer_3_preferences][:primer_product_size_range]
154
+ range_arr = range.split("-")
155
+ min = range_arr[0].to_i
156
+ max = range_arr[1].to_i
157
+ raise Bio::DB::Exonerate::ExonerateException.new "Range #{range} is invalid!" unless max > min
158
+ options[:flanking_size] = max
159
+ end
160
+
161
+ #p options
162
+ #p ARGV
163
+
164
+
165
+ #TODO: Use temporary files somewhere in the file system and add traps to delete them/forward them as a result.
166
+ #TODO: Make all this parameters
167
+
168
+ path_to_contigs=options[:path_to_contigs]
169
+
170
+ original_name="A"
171
+ snp_in="B"
172
+
173
+ fasta_reference = nil
174
+ #test_file="/Users/ramirezr/Dropbox/JIC/PrimersToTest/test_primers_nick_and_james_1.csv"
175
+ test_file=options[:marker_list] if options[:marker_list]
176
+ test_file=options[:snp_list] if options[:snp_list]
177
+ test_file=options[:mutant_list] if options[:mutant_list]
178
+ fasta_reference = options[:reference]
179
+ output_folder="#{test_file}_primer_design_#{Time.now.strftime('%Y%m%d-%H%M%S')}"
180
+ output_folder= options[:output_folder] if options[:output_folder]
181
+ Dir.mkdir(output_folder) unless Dir.exist?(output_folder)
182
+ #TODO Make this tmp files
183
+ temp_fasta_query="#{output_folder}/to_align.fa"
184
+ temp_contigs="#{output_folder}/contigs_tmp.fa"
185
+ exonerate_file="#{output_folder}/exonerate_tmp.tab"
186
+ primer_3_input="#{output_folder}/primer_3_input_temp"
187
+ primer_3_output="#{output_folder}/primer_3_output_temp"
188
+ exons_filename="#{output_folder}/exons_genes_and_contigs.fa"
189
+ output_primers="#{output_folder}/primers.csv"
190
+ output_to_order="#{output_folder}/primers_to_order.csv"
191
+ min_identity= options[:min_identity]
192
+
193
+ @status_file="#{output_folder}/status.txt"
194
+
195
+ primer_3_config=File.expand_path(File.dirname(__FILE__) + '/../conf/primer3_config')
196
+ model=options[:model]
197
+
198
+ def write_status(status)
199
+ f=File.open(@status_file, "a")
200
+ f.puts "#{Time.now.to_s},#{status}"
201
+ f.close
202
+ end
203
+
204
+ Signal.trap("ABRT") do
205
+ write_status "ERROR: Job aborted. Please try a small number of primers."
206
+ Signal.trap("SIGABRT", "DEFAULT") # restore handler
207
+ Process.kill("ABRT", 0)
208
+ end
209
+
210
+ Signal.trap("TERM") do
211
+ write_status "ERROR: Job terminated. Please try a small number of primers."
212
+ Signal.trap("SIGTERM", "DEFAULT") # restore handler
213
+ exit
214
+ end
215
+
216
+ snps = Array.new
217
+
218
+ begin
219
+
220
+ write_status "Loading Reference"
221
+ #0. Load the fasta index
222
+ fasta_reference_db = nil
223
+ if fasta_reference
224
+ fasta_reference_db = Bio::DB::Fasta::FastaFile.new({:fasta=>fasta_reference})
225
+ fasta_reference_db.load_fai_entries
226
+ write_status "Fasta reference: #{fasta_reference}"
227
+ end
228
+
229
+ #1. Read all the SNP files
230
+ #chromosome = nil
231
+ write_status "Reading SNPs"
232
+ File.open(test_file) do | f |
233
+ f.each_line do | line |
234
+ # p line.chomp!
235
+ snp = nil
236
+ if options[:marker_list] #List with Sequence
237
+ snp = Bio::PolyploidTools::SNPSequence.parse(line)
238
+ elsif options[:snp_list] and options[:reference] #List and fasta file
239
+ snp = Bio::PolyploidTools::SNP.parse(line)
240
+ entry = fasta_reference_db.index.region_for_entry(snp.gene)
241
+ if entry
242
+ region = fasta_reference_db.index.region_for_entry(snp.gene).get_full_region
243
+ snp.template_sequence = fasta_reference_db.fetch_sequence(region)
244
+ else
245
+ write_status "WARN: Unable to find entry for #{snp.gene}"
246
+ end
247
+ elsif options[:mutant_list] and options[:reference] #List and fasta file
248
+ snp = Bio::PolyploidTools::SNPMutant.parse(line)
249
+ entry = fasta_reference_db.index.region_for_entry(snp.contig)
250
+ if entry
251
+ region = fasta_reference_db.index.region_for_entry(snp.contig).get_full_region
252
+ snp.full_sequence = fasta_reference_db.fetch_sequence(region)
253
+ else
254
+ write_status "WARN: Unable to find entry for #{snp.gene}"
255
+ end
256
+ else
257
+ raise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
258
+ end
259
+ raise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
260
+ snp.max_hits = options[:max_hits]
261
+ snp.genomes_count = options[:genomes_count]
262
+ snp.snp_in = snp_in
263
+ snp.original_name = original_name
264
+ if snp.position
265
+ snps << snp
266
+ else
267
+ $stderr.puts "ERROR: #{snp.gene} doesn't contain a SNP"
268
+ end
269
+ end
270
+ end
271
+
272
+ #1.1 Close fasta file
273
+ #fasta_reference_db.close() if fasta_reference_db
274
+ #2. Generate all the fasta files
275
+ write_status "Writing sequences to align"
276
+ written_seqs = Set.new
277
+ file = File.open(temp_fasta_query, "w")
278
+ snps.each do |snp|
279
+ unless written_seqs.include?(snp.gene)
280
+ written_seqs << snp.gene
281
+ file.puts snp.to_fasta
282
+ end
283
+ end
284
+ file.close
285
+
286
+ #3. Run exonerate on each of the possible chromosomes for the SNP
287
+ #puts chromosome
288
+ #chr_group = chromosome[0]
289
+ write_status "Searching markers in genome"
290
+ exo_f = File.open(exonerate_file, "w")
291
+ contigs_f = nil
292
+ contigs_f = File.open(temp_contigs, "w") if options[:extract_found_contigs]
293
+ filename=path_to_contigs
294
+ #puts filename
295
+ target=filename
296
+
297
+ fasta_file = Bio::DB::Fasta::FastaFile.new(fasta: target)
298
+ fasta_file.load_fai_entries
299
+
300
+ found_contigs = Set.new
301
+
302
+
303
+ def do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options, contigs_f: nil)
304
+ if aln.identity > min_identity
305
+ exo_f.puts aln.line
306
+ unless found_contigs.include?(aln.target_id) #We only add once each contig. Should reduce the size of the output file.
307
+ found_contigs.add(aln.target_id)
308
+ entry = fasta_file.index.region_for_entry(aln.target_id)
309
+ raise ExonerateException.new, "Entry not found! #{aln.target_id}. Make sure that the #{target_id}.fai was generated properly." if entry == nil
310
+ if options[:extract_found_contigs]
311
+ region = entry.get_full_region
312
+ seq = fasta_file.fetch_sequence(region)
313
+ contigs_f.puts(">#{aln.target_id}\n#{seq}")
314
+ end
315
+ end
316
+ end
317
+
318
+ end
319
+
320
+ Bio::DB::Blast.align({:query=>temp_fasta_query, :target=>options[:database], :model=>model, :max_hits=>options[:max_hits]}) do |aln|
321
+ do_align(aln, exo_f, found_contigs,min_identity, fasta_file,options, contigs_f: contigs_f)
322
+ end if options[:aligner] == :blast
323
+
324
+ Bio::DB::Exonerate.align({:query=>temp_fasta_query, :target=>target, :model=>model}) do |aln|
325
+ do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options, contigs_f: contigs_f)
326
+ end if options[:aligner] == :exonerate
327
+
328
+ exo_f.close()
329
+
330
+
331
+
332
+ exo_f.close()
333
+ contigs_f.close() if options[:extract_found_contigs]
334
+
335
+ #4. Load all the results from exonerate and get the input filename for primer3
336
+ #Custom arm selection function that only uses the first two characters. Maybe
337
+ #we want to make it a bit more cleaver
338
+ write_status "Reading best alignment on each chromosome"
339
+
340
+
341
+ container= Bio::PolyploidTools::ExonContainer.new
342
+ container.flanking_size=options[:flanking_size]
343
+ container.gene_models(temp_fasta_query)
344
+ container.chromosomes(target)
345
+ container.add_parental({:name=>snp_in})
346
+ container.add_parental({:name=>original_name})
347
+ container.max_hits = options[:max_hits]
348
+ snps.each do |snp|
349
+ snp.container = container
350
+ snp.flanking_size = container.flanking_size
351
+ snp.variation_free_region = options[:variation_free_region]
352
+ container.add_snp(snp)
353
+ end
354
+ container.add_alignments({
355
+ :exonerate_file=>exonerate_file,
356
+ :arm_selection=>options[:arm_selection],
357
+ :min_identity=>min_identity,
358
+ :filter_best=>options[:filter_best]})
359
+
360
+
361
+ #4.1 generating primer3 file
362
+ write_status "Finding genome-specific positions"
363
+ file = File.open(exons_filename, "w")
364
+ container.print_fasta_snp_exones(file)
365
+ file.close
366
+ write_status "Running primer3"
367
+
368
+ file = File.open(primer_3_input, "w")
369
+
370
+ Bio::DB::Primer3.prepare_input_file(file, options[:primer_3_preferences])
371
+ added_exons = container.print_primer_3_exons(file, nil, snp_in, max_specific_primers: options[:max_specific_primers] )
372
+ file.close
373
+
374
+ Bio::DB::Primer3.run({:in=>primer_3_input, :out=>primer_3_output}) if added_exons > 0
375
+
376
+ #5. Pick the best primer and make the primer3 output
377
+ write_status "Selecting best primers"
378
+ kasp_container=Bio::DB::Primer3::KASPContainer.new
379
+
380
+
381
+
382
+ kasp_container.line_1= original_name
383
+ kasp_container.line_2= snp_in
384
+
385
+ if options[:scoring] == :het_dels
386
+ kasp_container.scores = Hash.new
387
+ kasp_container.scores[:chromosome_specific] = 0
388
+ kasp_container.scores[:chromosome_semispecific] = 1000
389
+ kasp_container.scores[:chromosome_nonspecific] = 100
390
+ end
391
+
392
+ snps.each do |snp|
393
+ snpk = kasp_container.add_snp(snp)
394
+
395
+
396
+ end
397
+
398
+ kasp_container.add_primers_file(primer_3_output) if added_exons > 0
399
+ header = "Marker,SNP,RegionSize,chromosome,total_contigs,contig_regions,SNP_type,#{original_name},#{snp_in},common,primer_type,orientation,#{original_name}_TM,#{snp_in}_TM,common_TM,selected_from,product_size,errors,repetitive,total_hits"
400
+ File.open(output_primers, 'w') { |f| f.write("#{header}\n#{kasp_container.print_primers}") }
401
+ File.open(output_to_order, "w") { |io| io.write(kasp_container.print_primers_with_tails())}
402
+
403
+ write_status "DONE"
404
+ rescue StandardError => e
405
+ write_status "ERROR\t#{e.message}"
406
+ raise e
407
+ rescue Exception => e
408
+ write_status "ERROR\t#{e.message}"
409
+ raise e
410
+ end