bio-polyploid-tools 0.7.3 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (66) hide show
  1. checksums.yaml +5 -5
  2. data/.travis.yml +17 -0
  3. data/Gemfile +10 -7
  4. data/README.md +44 -0
  5. data/Rakefile +14 -14
  6. data/VERSION +1 -1
  7. data/bin/bfr.rb +2 -2
  8. data/bin/blast_triads.rb +166 -0
  9. data/bin/blast_triads_promoters.rb +192 -0
  10. data/bin/find_homoeologue_variations.rb +385 -0
  11. data/bin/get_longest_hsp_blastx_triads.rb +66 -0
  12. data/bin/hexaploid_primers.rb +2 -2
  13. data/bin/homokaryot_primers.rb +2 -2
  14. data/bin/mafft_triads.rb +120 -0
  15. data/bin/mafft_triads_promoters.rb +403 -0
  16. data/bin/polymarker.rb +73 -17
  17. data/bin/polymarker_capillary.rb +416 -0
  18. data/bin/snp_position_to_polymarker.rb +5 -3
  19. data/bin/snps_between_bams.rb +0 -29
  20. data/bin/vcfLineToTable.rb +56 -0
  21. data/bio-polyploid-tools.gemspec +74 -32
  22. data/lib/bio/BFRTools.rb +1 -0
  23. data/lib/bio/PolyploidTools/ChromosomeArm.rb +2 -6
  24. data/lib/bio/PolyploidTools/ExonContainer.rb +31 -8
  25. data/lib/bio/PolyploidTools/NoSNPSequence.rb +286 -0
  26. data/lib/bio/PolyploidTools/PrimerRegion.rb +9 -1
  27. data/lib/bio/PolyploidTools/SNP.rb +58 -18
  28. data/lib/bio/PolyploidTools/SNPMutant.rb +5 -3
  29. data/lib/bio/db/blast.rb +112 -0
  30. data/lib/bio/db/exonerate.rb +4 -5
  31. data/lib/bio/db/primer3.rb +83 -14
  32. data/test/data/BS00068396_51_blast.tab +4 -0
  33. data/test/data/BS00068396_51_contigs.nhr +0 -0
  34. data/test/data/BS00068396_51_contigs.nin +0 -0
  35. data/test/data/BS00068396_51_contigs.nsq +0 -0
  36. data/test/data/BS00068396_51_for_polymarker.fa +1 -0
  37. data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa.fai +11 -0
  38. data/test/data/S22380157.vcf +67 -0
  39. data/test/data/S58861868/LIB1716.bam +0 -0
  40. data/test/data/S58861868/LIB1716.sam +651 -0
  41. data/test/data/S58861868/LIB1719.bam +0 -0
  42. data/test/data/S58861868/LIB1719.sam +805 -0
  43. data/test/data/S58861868/LIB1721.bam +0 -0
  44. data/test/data/S58861868/LIB1721.sam +1790 -0
  45. data/test/data/S58861868/LIB1722.bam +0 -0
  46. data/test/data/S58861868/LIB1722.sam +1271 -0
  47. data/test/data/S58861868/S58861868.fa +16 -0
  48. data/test/data/S58861868/S58861868.fa.fai +1 -0
  49. data/test/data/S58861868/S58861868.vcf +76 -0
  50. data/test/data/S58861868/header.txt +9 -0
  51. data/test/data/S58861868/merged.bam +0 -0
  52. data/test/data/S58861868/merged_reheader.bam +0 -0
  53. data/test/data/S58861868/merged_reheader.bam.bai +0 -0
  54. data/test/data/bfr_out_test.csv +5 -5
  55. data/test/data/headerMergeed.txt +9 -0
  56. data/test/data/headerS2238015 +1 -0
  57. data/test/data/mergedLibs.bam +0 -0
  58. data/test/data/mergedLibsReheader.bam +0 -0
  59. data/test/data/mergedLibsSorted.bam +0 -0
  60. data/test/data/mergedLibsSorted.bam.bai +0 -0
  61. data/test/test_bfr.rb +26 -34
  62. data/test/test_blast.rb +47 -0
  63. data/test/test_exonearate.rb +4 -9
  64. data/test/test_snp_parsing.rb +42 -22
  65. metadata +81 -20
  66. data/Gemfile.lock +0 -67
@@ -14,6 +14,7 @@ arm_selection_functions = Hash.new;
14
14
 
15
15
 
16
16
  arm_selection_functions[:arm_selection_first_two] = lambda do | contig_name |
17
+ contig_name.gsub!(/chr/,"")
17
18
  ret = contig_name[0,2]
18
19
  return ret
19
20
  end
@@ -43,7 +44,6 @@ arm_selection_functions[:scaffold] = lambda do | contig_name |
43
44
  end
44
45
 
45
46
  def validate_files(o)
46
-
47
47
  [
48
48
  o[:path_to_contigs],
49
49
  o[:marker_list],
@@ -51,7 +51,7 @@ def validate_files(o)
51
51
  o[:mutant_list],
52
52
  o[:reference]
53
53
  ].flatten.compact.each do |f|
54
- raise IOError "Unable to read #{f}" unless File.exists? f
54
+ raise IOError.new "Unable to read #{f}" unless File.exists? f
55
55
  end
56
56
  end
57
57
 
@@ -67,6 +67,10 @@ options[:variation_free_region] = 0
67
67
  options[:extract_found_contigs] = false
68
68
  options[:genomes_count] = 3
69
69
  options[:min_identity] = 90
70
+ options[:scoring] = :genome_specific
71
+ options[:database] = false
72
+ options[:aligner] = :exonerate
73
+
70
74
 
71
75
  options[:primer_3_preferences] = {
72
76
  :primer_product_size_range => "50-150" ,
@@ -119,7 +123,19 @@ OptionParser.new do |opts|
119
123
  end
120
124
 
121
125
  opts.on("-a", "--arm_selection arm_selection_embl|arm_selection_morex|arm_selection_first_two|scaffold", "Function to decide the chromome arm") do |o|
122
- options[:arm_selection] = arm_selection_functions[o.to_sym];
126
+ tmp_str = o
127
+ arr = o.split(",")
128
+ if arr.size == 2
129
+ options[:arm_selection] = lambda do |contig_name|
130
+ separator, field = arr
131
+ field = field.to_i
132
+ ret = contig_name.split(separator)[field]
133
+ return ret
134
+ end
135
+ else
136
+ options[:arm_selection] = arm_selection_functions[o.to_sym];
137
+ end
138
+
123
139
  end
124
140
 
125
141
  opts.on("-p", "--primer_3_preferences FILE", "file with preferences to be sent to primer3") do |o|
@@ -139,12 +155,26 @@ OptionParser.new do |opts|
139
155
  options[:primers_to_order] = true
140
156
  end
141
157
 
142
-
158
+ opts.on("-H", "--het_dels", "If present, change the scoring to give priority to: semi-specific, specific, non-specific") do
159
+ options[:scoring] = :het_dels
160
+ end
161
+
162
+ opts.on("-A", "--aligner exonerate|blast", "Select the aligner to use. Default: exonerate") do |o|
163
+ raise "Invalid aligner" unless o == "exonerate" or o == "blast"
164
+ options[:aligner] = o.to_sym
165
+ end
166
+
167
+ opts.on("-d", "--database PREFIX", "Path to the blast database. Only used if the aligner is blast. The default is the name of the contigs file without extension.") do |o|
168
+ options[:database] = o
169
+ end
143
170
  end.parse!
144
171
 
145
172
 
146
173
  validate_files(options)
147
174
 
175
+ options[:database] = options[:path_to_contigs] unless options[:database]
176
+
177
+
148
178
  if options[:primer_3_preferences][:primer_product_size_range]
149
179
  range = options[:primer_3_preferences][:primer_product_size_range]
150
180
  range_arr = range.split("-")
@@ -208,7 +238,7 @@ fasta_reference_db = nil
208
238
  if fasta_reference
209
239
  fasta_reference_db = Bio::DB::Fasta::FastaFile.new({:fasta=>fasta_reference})
210
240
  fasta_reference_db.load_fai_entries
211
- p "Fasta reference: #{fasta_reference}"
241
+ write_status "Fasta reference: #{fasta_reference}"
212
242
  end
213
243
 
214
244
  #1. Read all the SNP files
@@ -239,9 +269,9 @@ File.open(test_file) do | f |
239
269
  write_status "WARN: Unable to find entry for #{snp.gene}"
240
270
  end
241
271
  else
242
- rise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
272
+ raise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
243
273
  end
244
- rise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
274
+ raise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
245
275
 
246
276
  snp.genomes_count = options[:genomes_count]
247
277
  snp.snp_in = snp_in
@@ -251,9 +281,6 @@ File.open(test_file) do | f |
251
281
  else
252
282
  $stderr.puts "ERROR: #{snp.gene} doesn't contain a SNP"
253
283
  end
254
-
255
- # chromosome = snp.chromosome unless chromosome
256
- # raise Bio::DB::Exonerate::ExonerateException.new "All the snps should come from the same chromosome" if chromosome != snp.chromosome
257
284
  end
258
285
  end
259
286
 
@@ -278,26 +305,43 @@ write_status "Searching markers in genome"
278
305
  exo_f = File.open(exonerate_file, "w")
279
306
  contigs_f = File.open(temp_contigs, "w") if options[:extract_found_contigs]
280
307
  filename=path_to_contigs
281
- puts filename
308
+ #puts filename
282
309
  target=filename
283
310
 
284
311
  fasta_file = Bio::DB::Fasta::FastaFile.new({:fasta=>target})
285
312
  fasta_file.load_fai_entries
286
313
 
287
314
  found_contigs = Set.new
288
- Bio::DB::Exonerate.align({:query=>temp_fasta_query, :target=>target, :model=>model}) do |aln|
315
+
316
+
317
+ def do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
289
318
  if aln.identity > min_identity
290
319
  exo_f.puts aln.line
291
320
  unless found_contigs.include?(aln.target_id) #We only add once each contig. Should reduce the size of the output file.
292
321
  found_contigs.add(aln.target_id)
293
322
  entry = fasta_file.index.region_for_entry(aln.target_id)
294
323
  raise ExonerateException.new, "Entry not found! #{aln.target_id}. Make sure that the #{target_id}.fai was generated properly." if entry == nil
295
- region = entry.get_full_region
296
- seq = fasta_file.fetch_sequence(region)
297
- contigs_f.puts(">#{aln.target_id}\n#{seq}") if options[:extract_found_contigs]
324
+ if options[:extract_found_contigs]
325
+ region = entry.get_full_region
326
+ seq = fasta_file.fetch_sequence(region)
327
+ contigs_f.puts(">#{aln.target_id}\n#{seq}")
328
+ end
298
329
  end
299
330
  end
331
+
300
332
  end
333
+
334
+ Bio::DB::Blast.align({:query=>temp_fasta_query, :target=>options[:database], :model=>model}) do |aln|
335
+ do_align(aln, exo_f, found_contigs,min_identity, fasta_file,options)
336
+ end if options[:aligner] == :blast
337
+
338
+ Bio::DB::Exonerate.align({:query=>temp_fasta_query, :target=>target, :model=>model}) do |aln|
339
+ do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
340
+ end if options[:aligner] == :exonerate
341
+
342
+ exo_f.close()
343
+
344
+
301
345
 
302
346
  exo_f.close()
303
347
  contigs_f.close() if options[:extract_found_contigs]
@@ -314,6 +358,7 @@ container.gene_models(temp_fasta_query)
314
358
  container.chromosomes(target)
315
359
  container.add_parental({:name=>snp_in})
316
360
  container.add_parental({:name=>original_name})
361
+
317
362
  snps.each do |snp|
318
363
  snp.container = container
319
364
  snp.flanking_size = container.flanking_size
@@ -337,15 +382,26 @@ file.close
337
382
 
338
383
  Bio::DB::Primer3.run({:in=>primer_3_input, :out=>primer_3_output}) if added_exons > 0
339
384
 
340
-
341
385
  #5. Pick the best primer and make the primer3 output
342
386
  write_status "Selecting best primers"
343
387
  kasp_container=Bio::DB::Primer3::KASPContainer.new
388
+
389
+
390
+
344
391
  kasp_container.line_1= original_name
345
392
  kasp_container.line_2= snp_in
346
393
 
394
+ if options[:scoring] == :het_dels
395
+ kasp_container.scores = Hash.new
396
+ kasp_container.scores[:chromosome_specific] = 0
397
+ kasp_container.scores[:chromosome_semispecific] = 1000
398
+ kasp_container.scores[:chromosome_nonspecific] = 100
399
+ end
400
+
347
401
  snps.each do |snp|
348
- kasp_container.add_snp(snp)
402
+ snpk = kasp_container.add_snp(snp)
403
+
404
+
349
405
  end
350
406
 
351
407
  kasp_container.add_primers_file(primer_3_output) if added_exons > 0
@@ -0,0 +1,416 @@
1
+ #!/usr/bin/env ruby
2
+ require 'bio'
3
+ require 'bio-samtools'
4
+ require 'pathname'
5
+ require 'optparse'
6
+
7
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
8
+ $: << File.expand_path('.')
9
+ path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
10
+ require path
11
+
12
+ def log(msg)
13
+ time=Time.now.strftime("%Y-%m-%d %H:%M:%S.%L")
14
+ puts "#{time}: #{msg}"
15
+ end
16
+
17
+
18
+
19
+ #reference='wheat_6x_ty_mm_mutations_10mutants_for_validations/scaffolds_with_mm.fa'
20
+ #markers='wheat_6x_ty_mm_mutations_10mutants_for_validations/CadMulitMap.fa'
21
+ #output_folder='wheat_6x_ty_mm_mutations_10mutants_for_validations/PolyMarker'
22
+
23
+ options = Hash.new
24
+
25
+ options[:primer_3_preferences] = {
26
+ :primer_product_size_range => "100-900" ,
27
+ :primer_max_size => 25 ,
28
+ :primer_lib_ambiguity_codes_consensus => 1,
29
+ :primer_liberal_base => 1,
30
+ :primer_min_left_three_prime_distance => 5,
31
+ :primer_min_right_three_prime_distance => 5,
32
+ :primer_num_return =>1,
33
+ :primer_explain_flag => 1,
34
+ :primer_thermodynamic_parameters_path=>File.expand_path(File.dirname(__FILE__) + '../../conf/primer3_config/') + '/'
35
+ }
36
+ options[:genomes_count] = 3
37
+ options[:allow_non_specific] = false
38
+
39
+ OptionParser.new do |opts|
40
+ opts.banner = "Usage: polymarker_capillary.rb [options]"
41
+
42
+ opts.on("-r", "--reference FILE", "Fasta file with the assembly") do |o|
43
+ options[:reference] = o
44
+ end
45
+
46
+ opts.on("-m", "--sequences FILE", "Fasta file with the sequences to amplify. the format must be Chromosome:start-end. Chromosome should match the names to the entries in the fasta files as it is used as main target") do |o|
47
+ options[:markers] = o
48
+ end
49
+
50
+ opts.on("-o", "--output_folder FOLDER", "Path to a folder where the outputs are going to be stored") do |o|
51
+ options[:output_folder] = o
52
+ end
53
+ opts.on("-g", "--genomes_count INT", "Number of genomes (default 3, for hexaploid)") do |o|
54
+ options[:genomes_count] = o.to_i
55
+ end
56
+ opts.on("-a", "--allow_non_specific", "If used, semi-specific and non-specific primers will be produced") do |o|
57
+ options[:allow_non_specific] = true
58
+ end
59
+
60
+ end.parse!
61
+
62
+
63
+ #puts options.inspect
64
+ reference = options[:reference]
65
+ markers = options[:markers]
66
+ output_folder = options[:output_folder]
67
+ allow_non_specific = options[:allow_non_specific]
68
+ log "Output folder: #{output_folder}"
69
+ exonerate_file="#{output_folder}/exonerate_tmp.tab"
70
+ Dir.mkdir(output_folder)
71
+
72
+ module Bio::PolyploidTools
73
+
74
+
75
+
76
+ class SequenceToAmplify < SNP
77
+
78
+ def self.select_chromosome(contig_name)
79
+
80
+ arr = contig_name.split('_')
81
+ ret = "U"
82
+ ret = arr[2][0,2] if arr.size >= 3
83
+ ret = "3B" if arr.size == 2 and arr[0] == "v443"
84
+ ret = arr[0][0,2] if arr.size == 1
85
+ return ret
86
+ end
87
+
88
+ attr_accessor :sequence_original
89
+ attr_accessor :rstart
90
+ attr_accessor :rend
91
+ attr_accessor :includeNoSpecific
92
+ #Format:
93
+ #A fasta entry with the id: contig:start-end
94
+ #The sequence can be prodcued with samtools faidx
95
+ def self.parse(fasta_entry)
96
+
97
+ snp = SequenceToAmplify.new
98
+ match_data = /(?<rname>\w*):(?<rstart>\w*)-(?<rend>\w*)/.match(fasta_entry.definition)
99
+
100
+ rName = Regexp.last_match(:rname)
101
+ rStart = Regexp.last_match(:rstart).to_i
102
+ rEnd = Regexp.last_match(:rend).to_i
103
+ snp.gene = fasta_entry.definition
104
+ #snp.chromosome=rName
105
+
106
+ snp.chromosome=select_chromosome(rName)
107
+ #puts "#{rName}: #{snp.chromosome}"
108
+ snp.sequence_original = fasta_entry.seq
109
+ snp.template_sequence = fasta_entry.seq.upcase
110
+ snp.snp_in = "B"
111
+ snp.rstart = rStart
112
+ snp.rend = rEnd
113
+
114
+ snp.position = 100
115
+ snp.original = snp.sequence_original[snp.position]
116
+
117
+ tmp = Bio::Sequence::NA.new(snp.original)
118
+ rev = tmp.complement
119
+ snp.snp = rev
120
+ snp.exon_list = Hash.new()
121
+ snp
122
+ end
123
+
124
+ def primer_3_all_strings(target_chromosome, parental)
125
+ #puts target_chromosome
126
+ #puts parental
127
+ #puts aligned_sequences.to_fasta
128
+ pr = primer_region(target_chromosome, parental )
129
+ primer_3_propertes = Array.new
130
+
131
+ seq_original = String.new(pr.sequence)
132
+ #puts seq_original.size.to_s << "-" << primer_3_min_seq_length.to_s
133
+ return primer_3_propertes if seq_original.size < primer_3_min_seq_length
134
+ return primer_3_propertes unless pr.snp_pos == 500
135
+ #puts "Sequence origina: #{ self.original}"
136
+ #puts pr.to_fasta
137
+ #puts "Postion: #{pr.snp_pos}"
138
+ seq_original[pr.snp_pos] = self.original
139
+ seq_original_reverse = reverse_complement_string(seq_original)
140
+
141
+ seq_snp = String.new(pr.sequence)
142
+ seq_snp[pr.snp_pos] = self.snp
143
+ seq_snp_reverse = reverse_complement_string(seq_snp)
144
+
145
+ rev_pos = seq_snp.size - position
146
+
147
+ if pr.homoeologous
148
+ snp_type = "homoeologous"
149
+ else
150
+ snp_type = "non-homoeologous"
151
+ end
152
+ left_pos = Array.new
153
+ right_pos = Array.new
154
+ l_pos = pr.snp_pos
155
+ pr.chromosome_specific.shuffle.each {|pos| left_pos << pos if pos < l_pos - 50 }
156
+ pr.chromosome_specific.shuffle.each {|pos| right_pos << pos if pos > l_pos + 50}
157
+
158
+ pr.crhomosome_specific_intron.shuffle.each {|pos| left_pos << pos if pos < l_pos - 50}
159
+ pr.crhomosome_specific_intron.shuffle.each {|pos| right_pos << pos if pos > l_pos + 50}
160
+
161
+ prepareLRPrimers(left_pos, right_pos, "chromosome_specific" , snp_type,seq_original, primer_3_propertes)
162
+ if includeNoSpecific and (right_pos.size == 0 or right_pos.size == 0)
163
+ left_pos = Array.new
164
+ right_pos = Array.new
165
+ l_pos = pr.snp_pos
166
+ pr.almost_chromosome_specific.each {|pos| left_pos << pos if pos < l_pos - 50 }
167
+ pr.almost_chromosome_specific.each {|pos| right_pos << pos if pos > l_pos + 50}
168
+
169
+ pr.almost_crhomosome_specific_intron.each {|pos| left_pos << pos if pos < l_pos - 50}
170
+ pr.almost_crhomosome_specific_intron.each {|pos| right_pos << pos if pos > l_pos + 50}
171
+
172
+ prepareLRPrimers(left_pos, right_pos, "chromosome_semispecific" ,snp_type, seq_original, primer_3_propertes)
173
+ args = {
174
+ :name =>"#{gene}:#{original}#{position}#{snp} #{original_name} chromosome_nonspecific exon #{snp_type} #{chromosome}",
175
+ :left_pos => 350,
176
+ :extra_f=>"SEQUENCE_TARGET=350,400\n",
177
+ :extra_r=>"SEQUENCE_TARGET=350,400\n",
178
+ :sequence=>seq_original}
179
+ str = return_primer_3_string(args)
180
+
181
+ primer_3_propertes << str
182
+ end
183
+ primer_3_propertes
184
+ end
185
+
186
+ def prepareLRPrimers(left_pos, right_pos, type , snp_type, seq_original,primer_3_propertes)
187
+ count = 0
188
+ left_pos.each do |l|
189
+ right_pos.each do |r|
190
+ args = {:name =>"#{gene}:#{original}#{position}#{snp} #{original_name} #{type} exon #{snp_type} #{chromosome}",
191
+ :left_pos => l,
192
+ :right_pos => r,
193
+ :sequence=>seq_original}
194
+
195
+ primer_3_propertes << return_primer_3_string(args)
196
+ count += 1
197
+ # return if count > 25
198
+ end
199
+ end
200
+ end
201
+
202
+ def parental_sequences
203
+ return @parental_sequences if @parental_sequences
204
+ gene_region = self.covered_region
205
+ local_pos_in_gene = self.position
206
+
207
+ @parental_sequences = Bio::Alignment::SequenceHash.new
208
+ container.parents.each do |name, bam|
209
+ seq = self.sequence_original.clone.downcase
210
+
211
+ if name == self.snp_in
212
+ #puts self.snp
213
+ seq[local_pos_in_gene] = self.snp
214
+ else
215
+ #puts self.original
216
+ seq[local_pos_in_gene] = self.original
217
+ end
218
+ seq[local_pos_in_gene] = seq[local_pos_in_gene].upcase
219
+ @parental_sequences [name] = seq
220
+ #puts name
221
+ #puts self.snp_in
222
+ #puts seq
223
+ end
224
+ @parental_sequences
225
+ end
226
+ end
227
+ end
228
+
229
+
230
+ snps = Array.new
231
+ file = Bio::FastaFormat.open(markers)
232
+ file.each do |entry|
233
+
234
+ begin
235
+ tmp = Bio::PolyploidTools::SequenceToAmplify.parse(entry)
236
+ snps << tmp if tmp
237
+ rescue
238
+ $stderr.puts "Unable to generate the marker for: #{entry.definition}"
239
+ end
240
+
241
+ end
242
+ file.close
243
+
244
+
245
+
246
+ exo_f = File.open(exonerate_file, "w")
247
+ target=reference
248
+
249
+ fasta_file = Bio::DB::Fasta::FastaFile.new({:fasta=>target})
250
+ fasta_file.load_fai_entries
251
+ min_identity = 95
252
+ found_contigs = Set.new
253
+
254
+ Bio::DB::Exonerate.align({:query=>markers, :target=>reference, :model=>'ungapped'}) do |aln|
255
+ if aln.identity > min_identity
256
+ exo_f.puts aln.line
257
+ #puts aln.line
258
+ unless found_contigs.include?(aln.target_id) #We only add once each contig. Should reduce the size of the output file.
259
+ found_contigs.add(aln.target_id)
260
+ entry = fasta_file.index.region_for_entry(aln.target_id)
261
+ raise Exception.new, "Entry not found! #{aln.target_id}. Make sure that the #{reference}.fai was generated properly." if entry == nil
262
+ end
263
+ end
264
+ end
265
+ exo_f.close
266
+
267
+ arm_selection_functions = Hash.new
268
+
269
+ arm_selection_functions[:full_scaffold] = lambda do | contig_name |
270
+ return contig_name
271
+ end
272
+
273
+ #Function to parse stuff like: "IWGSC_CSS_1AL_scaff_110"
274
+ #Or the first two characters in the contig name, to deal with
275
+ #pseudomolecules that start with headers like: "1A"
276
+ #And with the cases when 3B is named with the prefix: v443
277
+ arm_selection_functions[:arm_selection_embl] = lambda do | contig_name|
278
+
279
+ arr = contig_name.split('_')
280
+ ret = "U"
281
+ ret = arr[2][0,2] if arr.size >= 3
282
+ ret = "3B" if arr.size == 2 and arr[0] == "v443"
283
+ ret = arr[0][0,2] if arr.size == 1
284
+ return ret
285
+ end
286
+
287
+
288
+
289
+ container= Bio::PolyploidTools::ExonContainer.new
290
+ container.flanking_size=500
291
+ container.gene_models(markers)
292
+ container.chromosomes(target)
293
+ container.add_parental({:name=>"A"})
294
+ container.add_parental({:name=>"B"})
295
+ snps.each do |snp|
296
+ snp.snp_in = "B"
297
+ snp.container = container
298
+ snp.flanking_size = container.flanking_size
299
+ snp.genomes_count = options[:genomes_count]
300
+ snp.includeNoSpecific = allow_non_specific
301
+ container.add_snp(snp)
302
+ end
303
+ container.add_alignments({:exonerate_file=>exonerate_file, :arm_selection=>arm_selection_functions[:arm_selection_embl] , :min_identity=>min_identity})
304
+
305
+
306
+
307
+ exons_filename="#{output_folder}/localAlignment.fa"
308
+ file = File.open(exons_filename, "w")
309
+ container.print_fasta_snp_exones(file)
310
+ file.close
311
+
312
+
313
+
314
+ primer_3_input ="#{output_folder}/primer3_input.txt"
315
+ primer_3_output ="#{output_folder}/primer3_output.txt"
316
+
317
+
318
+
319
+ file = File.open(primer_3_input, "w")
320
+ snp_in="B"
321
+ Bio::DB::Primer3.prepare_input_file(file, options[:primer_3_preferences])
322
+ added_exons = container.print_primer_3_exons(file, nil, snp_in)
323
+ file.close
324
+
325
+ Bio::DB::Primer3.run({:in=>primer_3_input, :out=>primer_3_output}) if added_exons > 0
326
+
327
+ masks_output = "#{output_folder}/masks_designed.fa"
328
+ output_file = "#{output_folder}/primers.csv"
329
+ file = File.open(masks_output, "w")
330
+ out = File.open(output_file, "w")
331
+
332
+ class Bio::DB::Primer3::Primer3Record
333
+ attr_accessor :primerPairs
334
+ end
335
+
336
+ printed_counts = Hash.new(0)
337
+ Bio::DB::Primer3::Primer3Record.parse_file(primer_3_output) do | primer3record |
338
+ #puts primer3record.inspect
339
+ next if primer3record.primer_left_num_returned.to_i == 0
340
+
341
+ seq_id = primer3record.sequence_id
342
+ printed_counts[seq_id] += 1
343
+ next if printed_counts[seq_id] > 10
344
+ excluded = "-"
345
+ exArr = excluded.split(",")
346
+ st = exArr[0].to_i
347
+ ed = exArr[1].to_i
348
+ tot = ed + st
349
+
350
+ excluded="#{st}-#{tot}"
351
+ seq_len = primer3record.sequence_template.length
352
+ printed = 0
353
+
354
+ sequence_template = primer3record.sequence_template
355
+ sequence_mask = "-" * st
356
+ sequence_mask << "*" * ed
357
+ sequence_mask << "-" * (seq_len - sequence_mask.length)
358
+
359
+ file.puts ">#{seq_id}\n#{sequence_template}"
360
+ file.puts ">#{seq_id}:mask\n#{sequence_mask}"
361
+ #puts "FDFDS"
362
+
363
+ #puts primer3record.primerPairs
364
+
365
+ primer3record.primerPairs.each do |p|
366
+ #puts p.inspect
367
+ printed += 1
368
+ lArr = p.left.coordinates
369
+ lArr[1] = lArr[0] + lArr[1]
370
+ rArr = p.right.coordinates
371
+ rArr[1] = rArr[0] - rArr[1]
372
+ toPrint = Array.new
373
+ toPrint << seq_id.split(" ")
374
+ #toPrint << seq_len
375
+ toPrint << p.product_size
376
+ toPrint << lArr.join("-")
377
+ toPrint << p.left.tm
378
+ toPrint << p.left.sequence
379
+ toPrint << rArr.join("-")
380
+ toPrint << p.right.tm
381
+ toPrint << p.right.sequence
382
+
383
+ middle = 501
384
+ toPrint << lArr[0]
385
+ toPrint << rArr[0]
386
+ toPrint << middle - lArr[0]
387
+ toPrint << rArr[0] - middle
388
+ #Start End LeftDistance RightDistance
389
+
390
+ out.puts toPrint.join(",")
391
+
392
+ sequence_primers = sequence_mask.clone
393
+ a = lArr[0]
394
+ b = lArr[1] - 1
395
+ #puts sequence_template[a..b]
396
+ sequence_primers[a..b] = sequence_template[a..b]
397
+ b = rArr[0]
398
+ a = rArr[1] + 1
399
+
400
+ sequence_primers[a..b] = sequence_template[a..b]
401
+
402
+ file.puts ">#{seq_id}:primerPair:#{printed}\n#{sequence_primers}"
403
+ end
404
+
405
+ if printed == 0
406
+ toPrint = Array.new
407
+ toPrint << seq_id.split(" ")
408
+ toPrint << excluded
409
+ toPrint << seq_len
410
+ out.puts toPrint.join(",")
411
+ end
412
+
413
+ end
414
+ out.close
415
+ file.close
416
+