bio-polyploid-tools 0.7.3 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. checksums.yaml +5 -5
  2. data/.travis.yml +17 -0
  3. data/Gemfile +10 -7
  4. data/README.md +44 -0
  5. data/Rakefile +14 -14
  6. data/VERSION +1 -1
  7. data/bin/bfr.rb +2 -2
  8. data/bin/blast_triads.rb +166 -0
  9. data/bin/blast_triads_promoters.rb +192 -0
  10. data/bin/find_homoeologue_variations.rb +385 -0
  11. data/bin/get_longest_hsp_blastx_triads.rb +66 -0
  12. data/bin/hexaploid_primers.rb +2 -2
  13. data/bin/homokaryot_primers.rb +2 -2
  14. data/bin/mafft_triads.rb +120 -0
  15. data/bin/mafft_triads_promoters.rb +403 -0
  16. data/bin/polymarker.rb +73 -17
  17. data/bin/polymarker_capillary.rb +416 -0
  18. data/bin/snp_position_to_polymarker.rb +5 -3
  19. data/bin/snps_between_bams.rb +0 -29
  20. data/bin/vcfLineToTable.rb +56 -0
  21. data/bio-polyploid-tools.gemspec +74 -32
  22. data/lib/bio/BFRTools.rb +1 -0
  23. data/lib/bio/PolyploidTools/ChromosomeArm.rb +2 -6
  24. data/lib/bio/PolyploidTools/ExonContainer.rb +31 -8
  25. data/lib/bio/PolyploidTools/NoSNPSequence.rb +286 -0
  26. data/lib/bio/PolyploidTools/PrimerRegion.rb +9 -1
  27. data/lib/bio/PolyploidTools/SNP.rb +58 -18
  28. data/lib/bio/PolyploidTools/SNPMutant.rb +5 -3
  29. data/lib/bio/db/blast.rb +112 -0
  30. data/lib/bio/db/exonerate.rb +4 -5
  31. data/lib/bio/db/primer3.rb +83 -14
  32. data/test/data/BS00068396_51_blast.tab +4 -0
  33. data/test/data/BS00068396_51_contigs.nhr +0 -0
  34. data/test/data/BS00068396_51_contigs.nin +0 -0
  35. data/test/data/BS00068396_51_contigs.nsq +0 -0
  36. data/test/data/BS00068396_51_for_polymarker.fa +1 -0
  37. data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa.fai +11 -0
  38. data/test/data/S22380157.vcf +67 -0
  39. data/test/data/S58861868/LIB1716.bam +0 -0
  40. data/test/data/S58861868/LIB1716.sam +651 -0
  41. data/test/data/S58861868/LIB1719.bam +0 -0
  42. data/test/data/S58861868/LIB1719.sam +805 -0
  43. data/test/data/S58861868/LIB1721.bam +0 -0
  44. data/test/data/S58861868/LIB1721.sam +1790 -0
  45. data/test/data/S58861868/LIB1722.bam +0 -0
  46. data/test/data/S58861868/LIB1722.sam +1271 -0
  47. data/test/data/S58861868/S58861868.fa +16 -0
  48. data/test/data/S58861868/S58861868.fa.fai +1 -0
  49. data/test/data/S58861868/S58861868.vcf +76 -0
  50. data/test/data/S58861868/header.txt +9 -0
  51. data/test/data/S58861868/merged.bam +0 -0
  52. data/test/data/S58861868/merged_reheader.bam +0 -0
  53. data/test/data/S58861868/merged_reheader.bam.bai +0 -0
  54. data/test/data/bfr_out_test.csv +5 -5
  55. data/test/data/headerMergeed.txt +9 -0
  56. data/test/data/headerS2238015 +1 -0
  57. data/test/data/mergedLibs.bam +0 -0
  58. data/test/data/mergedLibsReheader.bam +0 -0
  59. data/test/data/mergedLibsSorted.bam +0 -0
  60. data/test/data/mergedLibsSorted.bam.bai +0 -0
  61. data/test/test_bfr.rb +26 -34
  62. data/test/test_blast.rb +47 -0
  63. data/test/test_exonearate.rb +4 -9
  64. data/test/test_snp_parsing.rb +42 -22
  65. metadata +81 -20
  66. data/Gemfile.lock +0 -67
@@ -14,6 +14,7 @@ arm_selection_functions = Hash.new;
14
14
 
15
15
 
16
16
  arm_selection_functions[:arm_selection_first_two] = lambda do | contig_name |
17
+ contig_name.gsub!(/chr/,"")
17
18
  ret = contig_name[0,2]
18
19
  return ret
19
20
  end
@@ -43,7 +44,6 @@ arm_selection_functions[:scaffold] = lambda do | contig_name |
43
44
  end
44
45
 
45
46
  def validate_files(o)
46
-
47
47
  [
48
48
  o[:path_to_contigs],
49
49
  o[:marker_list],
@@ -51,7 +51,7 @@ def validate_files(o)
51
51
  o[:mutant_list],
52
52
  o[:reference]
53
53
  ].flatten.compact.each do |f|
54
- raise IOError "Unable to read #{f}" unless File.exists? f
54
+ raise IOError.new "Unable to read #{f}" unless File.exists? f
55
55
  end
56
56
  end
57
57
 
@@ -67,6 +67,10 @@ options[:variation_free_region] = 0
67
67
  options[:extract_found_contigs] = false
68
68
  options[:genomes_count] = 3
69
69
  options[:min_identity] = 90
70
+ options[:scoring] = :genome_specific
71
+ options[:database] = false
72
+ options[:aligner] = :exonerate
73
+
70
74
 
71
75
  options[:primer_3_preferences] = {
72
76
  :primer_product_size_range => "50-150" ,
@@ -119,7 +123,19 @@ OptionParser.new do |opts|
119
123
  end
120
124
 
121
125
  opts.on("-a", "--arm_selection arm_selection_embl|arm_selection_morex|arm_selection_first_two|scaffold", "Function to decide the chromome arm") do |o|
122
- options[:arm_selection] = arm_selection_functions[o.to_sym];
126
+ tmp_str = o
127
+ arr = o.split(",")
128
+ if arr.size == 2
129
+ options[:arm_selection] = lambda do |contig_name|
130
+ separator, field = arr
131
+ field = field.to_i
132
+ ret = contig_name.split(separator)[field]
133
+ return ret
134
+ end
135
+ else
136
+ options[:arm_selection] = arm_selection_functions[o.to_sym];
137
+ end
138
+
123
139
  end
124
140
 
125
141
  opts.on("-p", "--primer_3_preferences FILE", "file with preferences to be sent to primer3") do |o|
@@ -139,12 +155,26 @@ OptionParser.new do |opts|
139
155
  options[:primers_to_order] = true
140
156
  end
141
157
 
142
-
158
+ opts.on("-H", "--het_dels", "If present, change the scoring to give priority to: semi-specific, specific, non-specific") do
159
+ options[:scoring] = :het_dels
160
+ end
161
+
162
+ opts.on("-A", "--aligner exonerate|blast", "Select the aligner to use. Default: exonerate") do |o|
163
+ raise "Invalid aligner" unless o == "exonerate" or o == "blast"
164
+ options[:aligner] = o.to_sym
165
+ end
166
+
167
+ opts.on("-d", "--database PREFIX", "Path to the blast database. Only used if the aligner is blast. The default is the name of the contigs file without extension.") do |o|
168
+ options[:database] = o
169
+ end
143
170
  end.parse!
144
171
 
145
172
 
146
173
  validate_files(options)
147
174
 
175
+ options[:database] = options[:path_to_contigs] unless options[:database]
176
+
177
+
148
178
  if options[:primer_3_preferences][:primer_product_size_range]
149
179
  range = options[:primer_3_preferences][:primer_product_size_range]
150
180
  range_arr = range.split("-")
@@ -208,7 +238,7 @@ fasta_reference_db = nil
208
238
  if fasta_reference
209
239
  fasta_reference_db = Bio::DB::Fasta::FastaFile.new({:fasta=>fasta_reference})
210
240
  fasta_reference_db.load_fai_entries
211
- p "Fasta reference: #{fasta_reference}"
241
+ write_status "Fasta reference: #{fasta_reference}"
212
242
  end
213
243
 
214
244
  #1. Read all the SNP files
@@ -239,9 +269,9 @@ File.open(test_file) do | f |
239
269
  write_status "WARN: Unable to find entry for #{snp.gene}"
240
270
  end
241
271
  else
242
- rise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
272
+ raise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
243
273
  end
244
- rise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
274
+ raise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
245
275
 
246
276
  snp.genomes_count = options[:genomes_count]
247
277
  snp.snp_in = snp_in
@@ -251,9 +281,6 @@ File.open(test_file) do | f |
251
281
  else
252
282
  $stderr.puts "ERROR: #{snp.gene} doesn't contain a SNP"
253
283
  end
254
-
255
- # chromosome = snp.chromosome unless chromosome
256
- # raise Bio::DB::Exonerate::ExonerateException.new "All the snps should come from the same chromosome" if chromosome != snp.chromosome
257
284
  end
258
285
  end
259
286
 
@@ -278,26 +305,43 @@ write_status "Searching markers in genome"
278
305
  exo_f = File.open(exonerate_file, "w")
279
306
  contigs_f = File.open(temp_contigs, "w") if options[:extract_found_contigs]
280
307
  filename=path_to_contigs
281
- puts filename
308
+ #puts filename
282
309
  target=filename
283
310
 
284
311
  fasta_file = Bio::DB::Fasta::FastaFile.new({:fasta=>target})
285
312
  fasta_file.load_fai_entries
286
313
 
287
314
  found_contigs = Set.new
288
- Bio::DB::Exonerate.align({:query=>temp_fasta_query, :target=>target, :model=>model}) do |aln|
315
+
316
+
317
+ def do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
289
318
  if aln.identity > min_identity
290
319
  exo_f.puts aln.line
291
320
  unless found_contigs.include?(aln.target_id) #We only add once each contig. Should reduce the size of the output file.
292
321
  found_contigs.add(aln.target_id)
293
322
  entry = fasta_file.index.region_for_entry(aln.target_id)
294
323
  raise ExonerateException.new, "Entry not found! #{aln.target_id}. Make sure that the #{target_id}.fai was generated properly." if entry == nil
295
- region = entry.get_full_region
296
- seq = fasta_file.fetch_sequence(region)
297
- contigs_f.puts(">#{aln.target_id}\n#{seq}") if options[:extract_found_contigs]
324
+ if options[:extract_found_contigs]
325
+ region = entry.get_full_region
326
+ seq = fasta_file.fetch_sequence(region)
327
+ contigs_f.puts(">#{aln.target_id}\n#{seq}")
328
+ end
298
329
  end
299
330
  end
331
+
300
332
  end
333
+
334
+ Bio::DB::Blast.align({:query=>temp_fasta_query, :target=>options[:database], :model=>model}) do |aln|
335
+ do_align(aln, exo_f, found_contigs,min_identity, fasta_file,options)
336
+ end if options[:aligner] == :blast
337
+
338
+ Bio::DB::Exonerate.align({:query=>temp_fasta_query, :target=>target, :model=>model}) do |aln|
339
+ do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
340
+ end if options[:aligner] == :exonerate
341
+
342
+ exo_f.close()
343
+
344
+
301
345
 
302
346
  exo_f.close()
303
347
  contigs_f.close() if options[:extract_found_contigs]
@@ -314,6 +358,7 @@ container.gene_models(temp_fasta_query)
314
358
  container.chromosomes(target)
315
359
  container.add_parental({:name=>snp_in})
316
360
  container.add_parental({:name=>original_name})
361
+
317
362
  snps.each do |snp|
318
363
  snp.container = container
319
364
  snp.flanking_size = container.flanking_size
@@ -337,15 +382,26 @@ file.close
337
382
 
338
383
  Bio::DB::Primer3.run({:in=>primer_3_input, :out=>primer_3_output}) if added_exons > 0
339
384
 
340
-
341
385
  #5. Pick the best primer and make the primer3 output
342
386
  write_status "Selecting best primers"
343
387
  kasp_container=Bio::DB::Primer3::KASPContainer.new
388
+
389
+
390
+
344
391
  kasp_container.line_1= original_name
345
392
  kasp_container.line_2= snp_in
346
393
 
394
+ if options[:scoring] == :het_dels
395
+ kasp_container.scores = Hash.new
396
+ kasp_container.scores[:chromosome_specific] = 0
397
+ kasp_container.scores[:chromosome_semispecific] = 1000
398
+ kasp_container.scores[:chromosome_nonspecific] = 100
399
+ end
400
+
347
401
  snps.each do |snp|
348
- kasp_container.add_snp(snp)
402
+ snpk = kasp_container.add_snp(snp)
403
+
404
+
349
405
  end
350
406
 
351
407
  kasp_container.add_primers_file(primer_3_output) if added_exons > 0
@@ -0,0 +1,416 @@
1
+ #!/usr/bin/env ruby
2
+ require 'bio'
3
+ require 'bio-samtools'
4
+ require 'pathname'
5
+ require 'optparse'
6
+
7
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
8
+ $: << File.expand_path('.')
9
+ path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
10
+ require path
11
+
12
+ def log(msg)
13
+ time=Time.now.strftime("%Y-%m-%d %H:%M:%S.%L")
14
+ puts "#{time}: #{msg}"
15
+ end
16
+
17
+
18
+
19
+ #reference='wheat_6x_ty_mm_mutations_10mutants_for_validations/scaffolds_with_mm.fa'
20
+ #markers='wheat_6x_ty_mm_mutations_10mutants_for_validations/CadMulitMap.fa'
21
+ #output_folder='wheat_6x_ty_mm_mutations_10mutants_for_validations/PolyMarker'
22
+
23
+ options = Hash.new
24
+
25
+ options[:primer_3_preferences] = {
26
+ :primer_product_size_range => "100-900" ,
27
+ :primer_max_size => 25 ,
28
+ :primer_lib_ambiguity_codes_consensus => 1,
29
+ :primer_liberal_base => 1,
30
+ :primer_min_left_three_prime_distance => 5,
31
+ :primer_min_right_three_prime_distance => 5,
32
+ :primer_num_return =>1,
33
+ :primer_explain_flag => 1,
34
+ :primer_thermodynamic_parameters_path=>File.expand_path(File.dirname(__FILE__) + '../../conf/primer3_config/') + '/'
35
+ }
36
+ options[:genomes_count] = 3
37
+ options[:allow_non_specific] = false
38
+
39
+ OptionParser.new do |opts|
40
+ opts.banner = "Usage: polymarker_capillary.rb [options]"
41
+
42
+ opts.on("-r", "--reference FILE", "Fasta file with the assembly") do |o|
43
+ options[:reference] = o
44
+ end
45
+
46
+ opts.on("-m", "--sequences FILE", "Fasta file with the sequences to amplify. the format must be Chromosome:start-end. Chromosome should match the names to the entries in the fasta files as it is used as main target") do |o|
47
+ options[:markers] = o
48
+ end
49
+
50
+ opts.on("-o", "--output_folder FOLDER", "Path to a folder where the outputs are going to be stored") do |o|
51
+ options[:output_folder] = o
52
+ end
53
+ opts.on("-g", "--genomes_count INT", "Number of genomes (default 3, for hexaploid)") do |o|
54
+ options[:genomes_count] = o.to_i
55
+ end
56
+ opts.on("-a", "--allow_non_specific", "If used, semi-specific and non-specific primers will be produced") do |o|
57
+ options[:allow_non_specific] = true
58
+ end
59
+
60
+ end.parse!
61
+
62
+
63
+ #puts options.inspect
64
+ reference = options[:reference]
65
+ markers = options[:markers]
66
+ output_folder = options[:output_folder]
67
+ allow_non_specific = options[:allow_non_specific]
68
+ log "Output folder: #{output_folder}"
69
+ exonerate_file="#{output_folder}/exonerate_tmp.tab"
70
+ Dir.mkdir(output_folder)
71
+
72
+ module Bio::PolyploidTools
73
+
74
+
75
+
76
+ class SequenceToAmplify < SNP
77
+
78
+ def self.select_chromosome(contig_name)
79
+
80
+ arr = contig_name.split('_')
81
+ ret = "U"
82
+ ret = arr[2][0,2] if arr.size >= 3
83
+ ret = "3B" if arr.size == 2 and arr[0] == "v443"
84
+ ret = arr[0][0,2] if arr.size == 1
85
+ return ret
86
+ end
87
+
88
+ attr_accessor :sequence_original
89
+ attr_accessor :rstart
90
+ attr_accessor :rend
91
+ attr_accessor :includeNoSpecific
92
+ #Format:
93
+ #A fasta entry with the id: contig:start-end
94
+ #The sequence can be prodcued with samtools faidx
95
+ def self.parse(fasta_entry)
96
+
97
+ snp = SequenceToAmplify.new
98
+ match_data = /(?<rname>\w*):(?<rstart>\w*)-(?<rend>\w*)/.match(fasta_entry.definition)
99
+
100
+ rName = Regexp.last_match(:rname)
101
+ rStart = Regexp.last_match(:rstart).to_i
102
+ rEnd = Regexp.last_match(:rend).to_i
103
+ snp.gene = fasta_entry.definition
104
+ #snp.chromosome=rName
105
+
106
+ snp.chromosome=select_chromosome(rName)
107
+ #puts "#{rName}: #{snp.chromosome}"
108
+ snp.sequence_original = fasta_entry.seq
109
+ snp.template_sequence = fasta_entry.seq.upcase
110
+ snp.snp_in = "B"
111
+ snp.rstart = rStart
112
+ snp.rend = rEnd
113
+
114
+ snp.position = 100
115
+ snp.original = snp.sequence_original[snp.position]
116
+
117
+ tmp = Bio::Sequence::NA.new(snp.original)
118
+ rev = tmp.complement
119
+ snp.snp = rev
120
+ snp.exon_list = Hash.new()
121
+ snp
122
+ end
123
+
124
+ def primer_3_all_strings(target_chromosome, parental)
125
+ #puts target_chromosome
126
+ #puts parental
127
+ #puts aligned_sequences.to_fasta
128
+ pr = primer_region(target_chromosome, parental )
129
+ primer_3_propertes = Array.new
130
+
131
+ seq_original = String.new(pr.sequence)
132
+ #puts seq_original.size.to_s << "-" << primer_3_min_seq_length.to_s
133
+ return primer_3_propertes if seq_original.size < primer_3_min_seq_length
134
+ return primer_3_propertes unless pr.snp_pos == 500
135
+ #puts "Sequence origina: #{ self.original}"
136
+ #puts pr.to_fasta
137
+ #puts "Postion: #{pr.snp_pos}"
138
+ seq_original[pr.snp_pos] = self.original
139
+ seq_original_reverse = reverse_complement_string(seq_original)
140
+
141
+ seq_snp = String.new(pr.sequence)
142
+ seq_snp[pr.snp_pos] = self.snp
143
+ seq_snp_reverse = reverse_complement_string(seq_snp)
144
+
145
+ rev_pos = seq_snp.size - position
146
+
147
+ if pr.homoeologous
148
+ snp_type = "homoeologous"
149
+ else
150
+ snp_type = "non-homoeologous"
151
+ end
152
+ left_pos = Array.new
153
+ right_pos = Array.new
154
+ l_pos = pr.snp_pos
155
+ pr.chromosome_specific.shuffle.each {|pos| left_pos << pos if pos < l_pos - 50 }
156
+ pr.chromosome_specific.shuffle.each {|pos| right_pos << pos if pos > l_pos + 50}
157
+
158
+ pr.crhomosome_specific_intron.shuffle.each {|pos| left_pos << pos if pos < l_pos - 50}
159
+ pr.crhomosome_specific_intron.shuffle.each {|pos| right_pos << pos if pos > l_pos + 50}
160
+
161
+ prepareLRPrimers(left_pos, right_pos, "chromosome_specific" , snp_type,seq_original, primer_3_propertes)
162
+ if includeNoSpecific and (right_pos.size == 0 or right_pos.size == 0)
163
+ left_pos = Array.new
164
+ right_pos = Array.new
165
+ l_pos = pr.snp_pos
166
+ pr.almost_chromosome_specific.each {|pos| left_pos << pos if pos < l_pos - 50 }
167
+ pr.almost_chromosome_specific.each {|pos| right_pos << pos if pos > l_pos + 50}
168
+
169
+ pr.almost_crhomosome_specific_intron.each {|pos| left_pos << pos if pos < l_pos - 50}
170
+ pr.almost_crhomosome_specific_intron.each {|pos| right_pos << pos if pos > l_pos + 50}
171
+
172
+ prepareLRPrimers(left_pos, right_pos, "chromosome_semispecific" ,snp_type, seq_original, primer_3_propertes)
173
+ args = {
174
+ :name =>"#{gene}:#{original}#{position}#{snp} #{original_name} chromosome_nonspecific exon #{snp_type} #{chromosome}",
175
+ :left_pos => 350,
176
+ :extra_f=>"SEQUENCE_TARGET=350,400\n",
177
+ :extra_r=>"SEQUENCE_TARGET=350,400\n",
178
+ :sequence=>seq_original}
179
+ str = return_primer_3_string(args)
180
+
181
+ primer_3_propertes << str
182
+ end
183
+ primer_3_propertes
184
+ end
185
+
186
+ def prepareLRPrimers(left_pos, right_pos, type , snp_type, seq_original,primer_3_propertes)
187
+ count = 0
188
+ left_pos.each do |l|
189
+ right_pos.each do |r|
190
+ args = {:name =>"#{gene}:#{original}#{position}#{snp} #{original_name} #{type} exon #{snp_type} #{chromosome}",
191
+ :left_pos => l,
192
+ :right_pos => r,
193
+ :sequence=>seq_original}
194
+
195
+ primer_3_propertes << return_primer_3_string(args)
196
+ count += 1
197
+ # return if count > 25
198
+ end
199
+ end
200
+ end
201
+
202
+ def parental_sequences
203
+ return @parental_sequences if @parental_sequences
204
+ gene_region = self.covered_region
205
+ local_pos_in_gene = self.position
206
+
207
+ @parental_sequences = Bio::Alignment::SequenceHash.new
208
+ container.parents.each do |name, bam|
209
+ seq = self.sequence_original.clone.downcase
210
+
211
+ if name == self.snp_in
212
+ #puts self.snp
213
+ seq[local_pos_in_gene] = self.snp
214
+ else
215
+ #puts self.original
216
+ seq[local_pos_in_gene] = self.original
217
+ end
218
+ seq[local_pos_in_gene] = seq[local_pos_in_gene].upcase
219
+ @parental_sequences [name] = seq
220
+ #puts name
221
+ #puts self.snp_in
222
+ #puts seq
223
+ end
224
+ @parental_sequences
225
+ end
226
+ end
227
+ end
228
+
229
+
230
+ snps = Array.new
231
+ file = Bio::FastaFormat.open(markers)
232
+ file.each do |entry|
233
+
234
+ begin
235
+ tmp = Bio::PolyploidTools::SequenceToAmplify.parse(entry)
236
+ snps << tmp if tmp
237
+ rescue
238
+ $stderr.puts "Unable to generate the marker for: #{entry.definition}"
239
+ end
240
+
241
+ end
242
+ file.close
243
+
244
+
245
+
246
+ exo_f = File.open(exonerate_file, "w")
247
+ target=reference
248
+
249
+ fasta_file = Bio::DB::Fasta::FastaFile.new({:fasta=>target})
250
+ fasta_file.load_fai_entries
251
+ min_identity = 95
252
+ found_contigs = Set.new
253
+
254
+ Bio::DB::Exonerate.align({:query=>markers, :target=>reference, :model=>'ungapped'}) do |aln|
255
+ if aln.identity > min_identity
256
+ exo_f.puts aln.line
257
+ #puts aln.line
258
+ unless found_contigs.include?(aln.target_id) #We only add once each contig. Should reduce the size of the output file.
259
+ found_contigs.add(aln.target_id)
260
+ entry = fasta_file.index.region_for_entry(aln.target_id)
261
+ raise Exception.new, "Entry not found! #{aln.target_id}. Make sure that the #{reference}.fai was generated properly." if entry == nil
262
+ end
263
+ end
264
+ end
265
+ exo_f.close
266
+
267
+ arm_selection_functions = Hash.new
268
+
269
+ arm_selection_functions[:full_scaffold] = lambda do | contig_name |
270
+ return contig_name
271
+ end
272
+
273
+ #Function to parse stuff like: "IWGSC_CSS_1AL_scaff_110"
274
+ #Or the first two characters in the contig name, to deal with
275
+ #pseudomolecules that start with headers like: "1A"
276
+ #And with the cases when 3B is named with the prefix: v443
277
+ arm_selection_functions[:arm_selection_embl] = lambda do | contig_name|
278
+
279
+ arr = contig_name.split('_')
280
+ ret = "U"
281
+ ret = arr[2][0,2] if arr.size >= 3
282
+ ret = "3B" if arr.size == 2 and arr[0] == "v443"
283
+ ret = arr[0][0,2] if arr.size == 1
284
+ return ret
285
+ end
286
+
287
+
288
+
289
+ container= Bio::PolyploidTools::ExonContainer.new
290
+ container.flanking_size=500
291
+ container.gene_models(markers)
292
+ container.chromosomes(target)
293
+ container.add_parental({:name=>"A"})
294
+ container.add_parental({:name=>"B"})
295
+ snps.each do |snp|
296
+ snp.snp_in = "B"
297
+ snp.container = container
298
+ snp.flanking_size = container.flanking_size
299
+ snp.genomes_count = options[:genomes_count]
300
+ snp.includeNoSpecific = allow_non_specific
301
+ container.add_snp(snp)
302
+ end
303
+ container.add_alignments({:exonerate_file=>exonerate_file, :arm_selection=>arm_selection_functions[:arm_selection_embl] , :min_identity=>min_identity})
304
+
305
+
306
+
307
+ exons_filename="#{output_folder}/localAlignment.fa"
308
+ file = File.open(exons_filename, "w")
309
+ container.print_fasta_snp_exones(file)
310
+ file.close
311
+
312
+
313
+
314
+ primer_3_input ="#{output_folder}/primer3_input.txt"
315
+ primer_3_output ="#{output_folder}/primer3_output.txt"
316
+
317
+
318
+
319
+ file = File.open(primer_3_input, "w")
320
+ snp_in="B"
321
+ Bio::DB::Primer3.prepare_input_file(file, options[:primer_3_preferences])
322
+ added_exons = container.print_primer_3_exons(file, nil, snp_in)
323
+ file.close
324
+
325
+ Bio::DB::Primer3.run({:in=>primer_3_input, :out=>primer_3_output}) if added_exons > 0
326
+
327
+ masks_output = "#{output_folder}/masks_designed.fa"
328
+ output_file = "#{output_folder}/primers.csv"
329
+ file = File.open(masks_output, "w")
330
+ out = File.open(output_file, "w")
331
+
332
+ class Bio::DB::Primer3::Primer3Record
333
+ attr_accessor :primerPairs
334
+ end
335
+
336
+ printed_counts = Hash.new(0)
337
+ Bio::DB::Primer3::Primer3Record.parse_file(primer_3_output) do | primer3record |
338
+ #puts primer3record.inspect
339
+ next if primer3record.primer_left_num_returned.to_i == 0
340
+
341
+ seq_id = primer3record.sequence_id
342
+ printed_counts[seq_id] += 1
343
+ next if printed_counts[seq_id] > 10
344
+ excluded = "-"
345
+ exArr = excluded.split(",")
346
+ st = exArr[0].to_i
347
+ ed = exArr[1].to_i
348
+ tot = ed + st
349
+
350
+ excluded="#{st}-#{tot}"
351
+ seq_len = primer3record.sequence_template.length
352
+ printed = 0
353
+
354
+ sequence_template = primer3record.sequence_template
355
+ sequence_mask = "-" * st
356
+ sequence_mask << "*" * ed
357
+ sequence_mask << "-" * (seq_len - sequence_mask.length)
358
+
359
+ file.puts ">#{seq_id}\n#{sequence_template}"
360
+ file.puts ">#{seq_id}:mask\n#{sequence_mask}"
361
+ #puts "FDFDS"
362
+
363
+ #puts primer3record.primerPairs
364
+
365
+ primer3record.primerPairs.each do |p|
366
+ #puts p.inspect
367
+ printed += 1
368
+ lArr = p.left.coordinates
369
+ lArr[1] = lArr[0] + lArr[1]
370
+ rArr = p.right.coordinates
371
+ rArr[1] = rArr[0] - rArr[1]
372
+ toPrint = Array.new
373
+ toPrint << seq_id.split(" ")
374
+ #toPrint << seq_len
375
+ toPrint << p.product_size
376
+ toPrint << lArr.join("-")
377
+ toPrint << p.left.tm
378
+ toPrint << p.left.sequence
379
+ toPrint << rArr.join("-")
380
+ toPrint << p.right.tm
381
+ toPrint << p.right.sequence
382
+
383
+ middle = 501
384
+ toPrint << lArr[0]
385
+ toPrint << rArr[0]
386
+ toPrint << middle - lArr[0]
387
+ toPrint << rArr[0] - middle
388
+ #Start End LeftDistance RightDistance
389
+
390
+ out.puts toPrint.join(",")
391
+
392
+ sequence_primers = sequence_mask.clone
393
+ a = lArr[0]
394
+ b = lArr[1] - 1
395
+ #puts sequence_template[a..b]
396
+ sequence_primers[a..b] = sequence_template[a..b]
397
+ b = rArr[0]
398
+ a = rArr[1] + 1
399
+
400
+ sequence_primers[a..b] = sequence_template[a..b]
401
+
402
+ file.puts ">#{seq_id}:primerPair:#{printed}\n#{sequence_primers}"
403
+ end
404
+
405
+ if printed == 0
406
+ toPrint = Array.new
407
+ toPrint << seq_id.split(" ")
408
+ toPrint << excluded
409
+ toPrint << seq_len
410
+ out.puts toPrint.join(",")
411
+ end
412
+
413
+ end
414
+ out.close
415
+ file.close
416
+