bio-polyploid-tools 0.7.3 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (66) hide show
  1. checksums.yaml +5 -5
  2. data/.travis.yml +17 -0
  3. data/Gemfile +10 -7
  4. data/README.md +44 -0
  5. data/Rakefile +14 -14
  6. data/VERSION +1 -1
  7. data/bin/bfr.rb +2 -2
  8. data/bin/blast_triads.rb +166 -0
  9. data/bin/blast_triads_promoters.rb +192 -0
  10. data/bin/find_homoeologue_variations.rb +385 -0
  11. data/bin/get_longest_hsp_blastx_triads.rb +66 -0
  12. data/bin/hexaploid_primers.rb +2 -2
  13. data/bin/homokaryot_primers.rb +2 -2
  14. data/bin/mafft_triads.rb +120 -0
  15. data/bin/mafft_triads_promoters.rb +403 -0
  16. data/bin/polymarker.rb +73 -17
  17. data/bin/polymarker_capillary.rb +416 -0
  18. data/bin/snp_position_to_polymarker.rb +5 -3
  19. data/bin/snps_between_bams.rb +0 -29
  20. data/bin/vcfLineToTable.rb +56 -0
  21. data/bio-polyploid-tools.gemspec +74 -32
  22. data/lib/bio/BFRTools.rb +1 -0
  23. data/lib/bio/PolyploidTools/ChromosomeArm.rb +2 -6
  24. data/lib/bio/PolyploidTools/ExonContainer.rb +31 -8
  25. data/lib/bio/PolyploidTools/NoSNPSequence.rb +286 -0
  26. data/lib/bio/PolyploidTools/PrimerRegion.rb +9 -1
  27. data/lib/bio/PolyploidTools/SNP.rb +58 -18
  28. data/lib/bio/PolyploidTools/SNPMutant.rb +5 -3
  29. data/lib/bio/db/blast.rb +112 -0
  30. data/lib/bio/db/exonerate.rb +4 -5
  31. data/lib/bio/db/primer3.rb +83 -14
  32. data/test/data/BS00068396_51_blast.tab +4 -0
  33. data/test/data/BS00068396_51_contigs.nhr +0 -0
  34. data/test/data/BS00068396_51_contigs.nin +0 -0
  35. data/test/data/BS00068396_51_contigs.nsq +0 -0
  36. data/test/data/BS00068396_51_for_polymarker.fa +1 -0
  37. data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa.fai +11 -0
  38. data/test/data/S22380157.vcf +67 -0
  39. data/test/data/S58861868/LIB1716.bam +0 -0
  40. data/test/data/S58861868/LIB1716.sam +651 -0
  41. data/test/data/S58861868/LIB1719.bam +0 -0
  42. data/test/data/S58861868/LIB1719.sam +805 -0
  43. data/test/data/S58861868/LIB1721.bam +0 -0
  44. data/test/data/S58861868/LIB1721.sam +1790 -0
  45. data/test/data/S58861868/LIB1722.bam +0 -0
  46. data/test/data/S58861868/LIB1722.sam +1271 -0
  47. data/test/data/S58861868/S58861868.fa +16 -0
  48. data/test/data/S58861868/S58861868.fa.fai +1 -0
  49. data/test/data/S58861868/S58861868.vcf +76 -0
  50. data/test/data/S58861868/header.txt +9 -0
  51. data/test/data/S58861868/merged.bam +0 -0
  52. data/test/data/S58861868/merged_reheader.bam +0 -0
  53. data/test/data/S58861868/merged_reheader.bam.bai +0 -0
  54. data/test/data/bfr_out_test.csv +5 -5
  55. data/test/data/headerMergeed.txt +9 -0
  56. data/test/data/headerS2238015 +1 -0
  57. data/test/data/mergedLibs.bam +0 -0
  58. data/test/data/mergedLibsReheader.bam +0 -0
  59. data/test/data/mergedLibsSorted.bam +0 -0
  60. data/test/data/mergedLibsSorted.bam.bai +0 -0
  61. data/test/test_bfr.rb +26 -34
  62. data/test/test_blast.rb +47 -0
  63. data/test/test_exonearate.rb +4 -9
  64. data/test/test_snp_parsing.rb +42 -22
  65. metadata +81 -20
  66. data/Gemfile.lock +0 -67
@@ -63,9 +63,9 @@ File.open(test_file) do | f |
63
63
  region = fasta_reference_db.index.region_for_entry(snp.gene).get_full_region
64
64
  snp.template_sequence = fasta_reference_db.fetch_sequence(region)
65
65
  else
66
- rise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
66
+ raise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
67
67
  end
68
- rise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
68
+ raise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
69
69
  snp.snp_in = snp_in
70
70
  snp.original_name = original_name
71
71
  snps << snp
@@ -109,9 +109,9 @@ File.open(snp_file) do | f |
109
109
  region = fasta_reference_db.index.region_for_entry(snp.gene).get_full_region
110
110
  snp.template_sequence = fasta_reference_db.fetch_sequence(region)
111
111
  else
112
- rise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
112
+ raise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
113
113
  end
114
- rise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
114
+ raise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
115
115
  snp.snp_in = snp_in
116
116
  snp.original_name = original_name
117
117
  snps << snp
@@ -0,0 +1,120 @@
1
+ #!/usr/bin/env ruby
2
+ require 'optparse'
3
+ require 'bio'
4
+ require 'csv'
5
+ require 'bio-blastxmlparser'
6
+ require 'fileutils'
7
+ require 'tmpdir'
8
+
9
+
10
+ options = {}
11
+ options[:identity] = 50
12
+ options[:min_bases] = 200
13
+ options[:split_token] = "-"
14
+ options[:tmp_folder] = Dir.mktmpdir
15
+ options[:program] = "blastn"
16
+ options[:random_sample] = 0
17
+
18
+ OptionParser.new do |opts|
19
+
20
+ opts.banner = "Usage: mafft_triads.rb [options]"
21
+
22
+ opts.on("-i", "--identity FLOAT", "Minimum percentage identity") do |o|
23
+ options[:identity] = o.to_f
24
+ end
25
+
26
+ opts.on("-t", "--triads FILE", "CSV file with the gene triad names in the named columns 'A','B' and 'D' ") do |o|
27
+ options[:triads] = o
28
+ end
29
+
30
+ opts.on("-f", "--pep FILE" , "FASTA file containing all the possible peptide sequences. ") do |o|
31
+ options[:pep] = o
32
+ end
33
+
34
+ opts.on("-s", "--cds FILE" , "FASTA file containing all the possible CDS sequences. ") do |o|
35
+ options[:cds] = o
36
+ end
37
+
38
+ opts.on("-s", "--split_token CHAR", "Character used to split the sequence name. The name will be evarything before this token on the name of the sequences") do |o|
39
+ options[:split_token] = o
40
+ end
41
+
42
+ end.parse!
43
+
44
+
45
+ def peptide_alignment(sequences_to_align)
46
+ options = ['--maxiterate', '1000', '--localpair', '--quiet']
47
+ mafft = Bio::MAFFT.new( "mafft" , options)
48
+ report = mafft.query_align(sequences_to_align)
49
+ report.alignment
50
+ end
51
+
52
+
53
+ split_token = options[:split_token]
54
+
55
+ pep_seq = Hash.new
56
+ pep_seq_count=0
57
+ Bio::FlatFile.open(Bio::FastaFormat, options[:pep]) do |fasta_file|
58
+ fasta_file.each do |entry|
59
+ gene_name = entry.entry_id.split(split_token)[0]
60
+ pep_seq[gene_name] = entry unless pep_seq[gene_name]
61
+ pep_seq[gene_name] = entry if entry.length > pep_seq[gene_name].length
62
+ pep_seq_count += 1
63
+ end
64
+ end
65
+ $stderr.puts "#Loaded #{pep_seq.length} genes from #{pep_seq_count} pep_seq"
66
+
67
+ cds_seq = Hash.new
68
+ cds_seq_count=0
69
+ Bio::FlatFile.open(Bio::FastaFormat, options[:cds]) do |fasta_file|
70
+ fasta_file.each do |entry|
71
+ gene_name = entry.entry_id.split(split_token)[0]
72
+ cds_seq[gene_name] = entry unless cds_seq[gene_name]
73
+ cds_seq[gene_name] = entry if entry.length > cds_seq[gene_name].length
74
+ cds_seq_count += 1
75
+ end
76
+ end
77
+ $stderr.puts "#Loaded #{cds_seq.length} genes from #{cds_seq_count} cds_seq"
78
+
79
+
80
+ $stderr.puts "TMP dir: #{options[:tmp_folder]}"
81
+
82
+ def write_fasta_from_hash(sequences, filename)
83
+ out = File.new(filename, "w")
84
+ #puts sequences.inspect
85
+ sequences.each_pair do | chromosome, exon_seq |
86
+ out.puts ">#{chromosome}\n#{exon_seq}\n"
87
+ end
88
+ out.close
89
+ end
90
+
91
+
92
+ CSV.foreach(options[:triads], headers:true ) do |row|
93
+ a = row['A']
94
+ b = row['B']
95
+ d = row['D']
96
+ triad = row['group_id']
97
+
98
+ to_align = Bio::Alignment::SequenceHash.new
99
+ to_align[a] = pep_seq[a]
100
+ to_align[b] = pep_seq[b]
101
+ to_align[d] = pep_seq[d]
102
+
103
+ cds_seqs = Bio::Alignment::SequenceHash.new
104
+ cds_seqs[a] = cds_seq[a].to_biosequence
105
+ cds_seqs[b] = cds_seq[b].to_biosequence
106
+ cds_seqs[d] = cds_seq[d].to_biosequence
107
+
108
+ cent_triad = triad.to_i / 100
109
+ folder = "alignments/#{cent_triad}/"
110
+ FileUtils.mkdir_p folder
111
+
112
+ pep_aln = peptide_alignment(to_align)
113
+
114
+ save_pep = "#{folder}/#{triad}.pep.fa"
115
+ write_fasta_from_hash(pep_aln, save_pep)
116
+
117
+ save_cds = "#{folder}/#{triad}.cds.fa"
118
+ write_fasta_from_hash(cds_seqs, save_cds)
119
+ #break
120
+ end
@@ -0,0 +1,403 @@
1
+ #!/usr/bin/env ruby
2
+ require 'optparse'
3
+ require 'bio'
4
+ require 'csv'
5
+ require 'bio-blastxmlparser'
6
+ require 'fileutils'
7
+ require 'tmpdir'
8
+
9
+
10
+ options = {}
11
+ options[:identity] = 50
12
+ options[:min_bases] = 200
13
+ options[:split_token] = "-"
14
+ options[:output_folder] = "."
15
+ options[:program] = "blastn"
16
+ options[:random_sample] = 0
17
+
18
+ OptionParser.new do |opts|
19
+
20
+ opts.banner = "Usage: filter_blat.rb [options]"
21
+
22
+ opts.on("-i", "--identity FLOAT", "Minimum percentage identity") do |o|
23
+ options[:identity] = o.to_f
24
+ end
25
+ opts.on("-c", "--min_bases int", "Minimum alignment length (default 200)") do |o|
26
+ options[:min_bases] = o.to_i
27
+ end
28
+
29
+ opts.on("-t", "--triads FILE", "CSV file with the gene triad names in the named columns 'A','B' and 'D' ") do |o|
30
+ options[:triads] = o
31
+ end
32
+
33
+ opts.on("-f", "--sequences FILE" , "FASTA file containing all the possible sequences. ") do |o|
34
+ options[:fasta] = o
35
+ end
36
+
37
+ opts.on("-s", "--split_token CHAR", "Character used to split the sequence name. The name will be evarything before this token on the name of the sequences") do |o|
38
+ options[:split_token] = o
39
+ end
40
+
41
+ opts.on("-p", "--program blastn|blastp", "The program to use in the alignments. Currntly only supported blastn and blastp") do |o|
42
+ options[:program] = o
43
+ end
44
+
45
+ opts.on("-o", "--output_folder DIR", "Folder to save the output") do |o|
46
+ options[:output_folder] = o
47
+ end
48
+
49
+
50
+ end.parse!
51
+
52
+ module Bio::Alignment::EnumerableExtension
53
+ def each_base_alignment
54
+ names = self.keys
55
+
56
+ i = 0
57
+ len = 0
58
+ len = self[names[0]].length if names[0]
59
+ total_alignments = names.size
60
+ while i < len do
61
+ yield names.map { | chr| self[chr][i] }
62
+ i += 1
63
+ end
64
+ end
65
+
66
+ def cut_alignment(start, length)
67
+ a = Bio::Alignment::SequenceHash.new
68
+ a.set_all_property(get_all_property)
69
+ each_pair do |key, str|
70
+ seq = ""
71
+ seq = str[start, length] if str != nil
72
+ a.store(key, seq)
73
+ end
74
+ a
75
+ end
76
+
77
+ def best_block
78
+ best_start = 0
79
+ best_score = 0
80
+ best_end = 0
81
+ best_length = 0
82
+ current_start = 0
83
+ current_score = 0
84
+ current_length = 0
85
+
86
+ each_base_alignment_with_index do |bases, i|
87
+ current_start = i if current_length == 0
88
+ current_length += 1
89
+ current_score += sum_of_pair bases
90
+ if current_score > best_score
91
+ best_score = current_score
92
+ best_length = current_length
93
+ best_end = i
94
+ best_start = current_start
95
+ end
96
+
97
+ if current_score < 0
98
+ current_length = 0
99
+ current_score = 0
100
+ end
101
+
102
+ end
103
+
104
+ [best_start, best_length, len - best_start - best_length , len - best_start ]
105
+ end
106
+
107
+ def each_base_alignment_with_index
108
+ names = self.keys
109
+ total_alignments = names.size
110
+ i = 0
111
+ while i < len do
112
+ yield names.map { | chr| self[chr][i] } , i
113
+ i += 1
114
+ end
115
+ end
116
+
117
+ def each_base_alignment
118
+ each_base_alignment_with_index do |chr, i|
119
+ yield chr
120
+ end
121
+ end
122
+
123
+ def sum_of_all_pairs
124
+ return @sum_of_all_pairs if @sum_of_all_pairs
125
+ @sum_of_all_pairs = 0
126
+ self.each_base_alignment do |bases|
127
+ @sum_of_all_pairs += sum_of_pair bases
128
+ end
129
+ @sum_of_all_pairs
130
+ end
131
+
132
+ def sum_of_identities
133
+ return @sum_of_identities if @sum_of_identities
134
+ @sum_of_identities = 0
135
+ self.each_base_alignment do |bases|
136
+ @sum_of_identities += s_o_i bases
137
+ end
138
+ @sum_of_identities
139
+ end
140
+
141
+ def len
142
+ return @len if @len
143
+ names = self.keys
144
+ @len = 0
145
+ @len = self[names[0]].length if names[0] and self[names[0]] != nil
146
+ @len
147
+ end
148
+
149
+ def pairwise_comparaisons
150
+ names = self.keys
151
+ n = names.size
152
+ c = n * (n-1)/2
153
+ c
154
+ end
155
+
156
+ def identity
157
+ max_score = len * pairwise_comparaisons
158
+ sum_of_identities.to_f/max_score
159
+ end
160
+
161
+ def normalized_sum_of_all_pairs
162
+ max_score = len * pairwise_comparaisons
163
+ sum_of_all_pairs.to_f/max_score
164
+ end
165
+
166
+ def sum_of_pair(bases)
167
+ x = bases.length - 1
168
+ total = 0
169
+ for i in 0..x
170
+ y = i + 1
171
+ for j in y..x
172
+ case
173
+ when (bases[i] == "-" and bases[j] == "-")
174
+ total += 0
175
+ when (bases[i] == "N" and bases[j] == "N")
176
+ total += 0
177
+ when (bases[i] == "n" and bases[j] == "n")
178
+ total += 0
179
+ when (bases[i] == "-" or bases[j] == "-")
180
+ total -= 2
181
+ when bases[i] == bases[j]
182
+ total += 1
183
+ when bases[i] != bases[j]
184
+ total -= 1
185
+ else
186
+ $stderr.puts "Invalid comparaison! sum_of_all_pairs(#{bases})"
187
+ end
188
+ end
189
+ end
190
+ total
191
+ end
192
+
193
+ def s_o_i(bases)
194
+ x = bases.length - 1
195
+ total = 0
196
+ for i in 0..x
197
+ y = i + 1
198
+ for j in y..x
199
+ total += 1 if bases[i] == bases[j]
200
+ end
201
+ end
202
+ total
203
+ end
204
+
205
+ def window_identities(window_size=100, offset=25)
206
+ steps = (0..len).step(offset).to_a.map {|a| a + len%offset }.reverse
207
+ ret = []
208
+ steps.each_with_index do |e, i|
209
+ start = e - window_size
210
+ tmp_aln = self.cut_alignment start, window_size
211
+ tmp_arr = [
212
+ i * offset,
213
+ i * offset + window_size,
214
+ tmp_aln.sum_of_all_pairs,
215
+ tmp_aln.normalized_sum_of_all_pairs,
216
+ tmp_aln.sum_of_identities,
217
+ tmp_aln.identity]
218
+ ret << tmp_arr
219
+ end
220
+ ret
221
+ end
222
+ end
223
+
224
+ def promoter_alignment(sequences_to_align)
225
+ process = true
226
+ sequences_to_align.each_value { |val| process &= val != nil }
227
+ return sequences_to_align unless process
228
+ #options = ['--maxiterate', '1000', '--ep', '0', '--genafpair', '--quiet']
229
+ options = ['--maxiterate', '1000', '--localpair', '--quiet']
230
+ @mafft = Bio::MAFFT.new( "mafft" , options) unless @mafft
231
+ report = @mafft.query_align(sequences_to_align)
232
+ report.alignment
233
+ end
234
+
235
+ def write_fasta_from_hash(sequences, filename)
236
+ out = File.new(filename, "w")
237
+ sequences.each_pair do | chromosome, exon_seq |
238
+ out.puts ">#{chromosome}\n#{exon_seq}\n"
239
+ end
240
+ out.close
241
+ end
242
+
243
+ def get_longest_aln(aln, max_gap: 10)
244
+ names = aln.keys
245
+ i = 0
246
+ len = 0
247
+ len = aln[names[0]].length if names[0] and aln[names[0]] != nil
248
+ total_alignments = names.size
249
+ masked_snps = "-" * len
250
+ longest_start = -1
251
+ longest_length = 0
252
+ current_start = -1
253
+ current_length = 0
254
+ current_gap = 0
255
+ longest_gaps = 0
256
+ gaps = 0
257
+ while i < len do
258
+ different = 0
259
+ cov = 0
260
+ names.each do | chr |
261
+ if aln[chr][i] != "-"
262
+ cov += 1
263
+ end
264
+ end
265
+ if cov == total_alignments
266
+ current_start = i if current_length == 0
267
+ current_length += 1
268
+ current_gap = 0
269
+ else
270
+ gaps += 1
271
+ current_gap += 1
272
+ end
273
+
274
+ if current_length > longest_length
275
+ longest_length = current_length
276
+ longest_start = current_start
277
+ longest_gaps = gaps - current_gap
278
+ end
279
+ if current_gap > max_gap
280
+ current_length = 0
281
+ gaps = 0
282
+ end
283
+ i += 1
284
+ end
285
+ longest_length += longest_gaps
286
+ [longest_start, longest_length, len - longest_start - longest_length, len - longest_start]
287
+ end
288
+
289
+ split_token = options[:split_token]
290
+
291
+ def read_alignments(fasta_path, split_token)
292
+ sequences = Hash.new
293
+ sequence_count=0
294
+ Bio::FlatFile.open(Bio::FastaFormat, fasta_path) do |fasta_file|
295
+ fasta_file.each do |entry|
296
+ #puts entry
297
+ gene_name = entry.entry_id.split(split_token)[0]
298
+ sequences[gene_name] = entry unless sequences[gene_name]
299
+ sequences[gene_name] = entry if entry.length > sequences[gene_name].length
300
+ sequence_count += 1
301
+ end
302
+ end
303
+ [sequences,sequence_count]
304
+ end
305
+
306
+ sequences, sequence_count = read_alignments(options[:fasta], split_token)
307
+
308
+ $stderr.puts "#Loaded #{sequences.length} genes from #{sequence_count} sequences"
309
+ output_folder = options[:output_folder]
310
+
311
+ FileUtils.mkdir_p output_folder
312
+ summary_file = "#{output_folder}/identities.txt"
313
+ long_table_file = "#{output_folder}/sliding_window_identities.txt"
314
+
315
+ out = File.open(summary_file, "w")
316
+ long_table = File.open(long_table_file, "w")
317
+
318
+ i =0
319
+
320
+ header = ["triad", "total_aln_length"]
321
+ header << ["longest_start", "longest_length", "longest_start_from_CDS","longest_end_from_CDS", "longest_sum_of_all_pairs","longest_norm_sum_of_all_pairs","longest_sum_of_identities", "longest_identity"]
322
+ header << ["best_start", "best_length" , "best_start_from_CDS","best_end_from_CDS", "best_sum_of_all_pairs","best_norm_sum_of_all_pairs","best_sum_of_identities", "best_identity"]
323
+ out.puts header.join("\t")
324
+ long_table.puts ["triad", "type", "start_from_CDS", "end_from_cds" , "sum_of_all_pairs","norm_sum_of_all_pairs","sum_of_identities", "identity"].join("\t")
325
+ CSV.foreach( options[:triads], headers:true ) do |row|
326
+ a = row['A']
327
+ b = row['B']
328
+ d = row['D']
329
+ triad = row['group_id']
330
+
331
+ cent_triad = triad.to_i / 100
332
+ folder = "#{output_folder}/prom_aln/#{cent_triad}/"
333
+ save_prom = "#{folder}/#{triad}.prom.fa"
334
+
335
+ to_align = Bio::Alignment::SequenceHash.new
336
+ to_align[a] = sequences[a]
337
+ to_align[b] = sequences[b]
338
+ to_align[d] = sequences[d]
339
+
340
+ prom_aln = nil
341
+ unless File.file? save_prom
342
+ prom_aln = promoter_alignment to_align
343
+ else
344
+ ff, seqs_cnt = read_alignments save_prom, split_token
345
+ seqs = Bio::Alignment::SequenceHash.new
346
+ prom_aln = Bio::Alignment.new(ff)
347
+ end
348
+ print_arr = [triad, prom_aln.len]
349
+ aln_stats = get_longest_aln prom_aln
350
+ print_arr << aln_stats
351
+ cut_seqs = prom_aln.cut_alignment aln_stats[0], aln_stats[1]
352
+
353
+
354
+
355
+ print_arr << cut_seqs.sum_of_all_pairs
356
+ print_arr << cut_seqs.normalized_sum_of_all_pairs
357
+
358
+ print_arr << cut_seqs.sum_of_identities
359
+ print_arr << cut_seqs.identity
360
+
361
+ best_aln_stats = prom_aln.best_block
362
+ best_aln_cut = prom_aln.cut_alignment best_aln_stats[0], best_aln_stats[1]
363
+
364
+ print_arr << best_aln_stats
365
+
366
+ print_arr << best_aln_cut.sum_of_all_pairs
367
+ print_arr << best_aln_cut.normalized_sum_of_all_pairs
368
+
369
+ print_arr << best_aln_cut.sum_of_identities
370
+ print_arr << best_aln_cut.identity
371
+
372
+ base = [triad, "cut_longest_region"]
373
+ cut_seqs.window_identities.each do |e|
374
+ long_table.puts [base, e].flatten.join("\t")
375
+ end
376
+
377
+ base = [triad, "cut_best_region"]
378
+ best_aln_cut.window_identities.each do |e|
379
+ long_table.puts [base, e].flatten.join("\t")
380
+ end
381
+
382
+ base = [triad, "full_promoter"]
383
+ prom_aln.window_identities.each do |e|
384
+ long_table.puts [base, e].flatten.join("\t")
385
+ end
386
+
387
+ out.puts print_arr.join("\t")
388
+
389
+ FileUtils.mkdir_p folder
390
+
391
+ write_fasta_from_hash(prom_aln, save_prom) unless File.file?(save_prom)
392
+
393
+ save_prom_cut = "#{folder}/#{triad}.prom.cut.fa"
394
+ write_fasta_from_hash(cut_seqs, save_prom_cut) unless File.file?(save_prom)
395
+
396
+ save_prom_cut_best = "#{folder}/#{triad}.prom.cut.best.fa"
397
+ write_fasta_from_hash(best_aln_cut, save_prom_cut_best)
398
+
399
+ i += 1
400
+ #break if i > 10
401
+ end
402
+ long_table.close
403
+ out.close