bacterial-annotator 0.7.0 → 0.7.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -6,15 +6,19 @@
6
6
  # version: 0.0.1
7
7
  # licence:
8
8
 
9
+ require 'json'
10
+ require 'zlib'
9
11
 
10
12
  class SequenceSynteny
11
13
 
12
14
  attr_reader :query_file, :subject_file, :aln_hits, :query_sequences, :subject_sequences
13
15
 
14
- def initialize query_file, subject_file, name, pidentity, min_coverage, type
16
+ def initialize root, outdir, query_file, subject_file, name, pidentity, min_coverage, type
17
+
18
+ @root = root
19
+ @outdir = outdir
15
20
  @query_file = query_file
16
21
  @subject_file = subject_file
17
-
18
22
  @query_sequences = get_sequences(query_file)
19
23
  @subject_sequences = get_sequences(subject_file)
20
24
 
@@ -28,22 +32,47 @@ class SequenceSynteny
28
32
 
29
33
 
30
34
  # get sequences name with length in hash
31
- def get_sequences seq_file
35
+ def get_sequences raw_file
32
36
 
33
37
  sequences = {}
34
- flat = Bio::FlatFile.auto("#{seq_file}")
35
- flat.each_entry do |s|
36
- s_name = s.definition.chomp.split(" ")[0]
37
- sequences[s_name] = {}
38
- properties = s.definition.chomp.split(";")
39
- partial = false
40
- if properties.length >= 2 and properties[1].include? "partial"
41
- partial = (properties[1].gsub("partial=","").include? '1')
38
+
39
+ if raw_file.include?(".dmnd")
40
+
41
+ seq_info_file = raw_file.gsub(".dmnd",".json.gz")
42
+
43
+ json_genes = {}
44
+ Zlib::GzipReader.open(seq_info_file) {|gz|
45
+ json_genes = JSON.parse(gz.read)
46
+ }
47
+
48
+ json_genes.each do |gene|
49
+
50
+ sequences[gene["cluster_id"]] = {}
51
+ sequences[gene["cluster_id"]][:length] = gene["consensus_length"].to_f
52
+ sequences[gene["cluster_id"]][:conserved] = false
53
+ sequences[gene["cluster_id"]][:contig] = gene["cluster_id"].split("_")[0..-2].join("_") if gene["cluster_id"].include? "_"
54
+
42
55
  end
43
- sequences[s_name][:partial] = partial
44
- sequences[s_name][:length] = s.seq.length
45
- sequences[s_name][:conserved] = false
46
- sequences[s_name][:contig] = s_name.split("_")[0..-2].join("_") if s_name.include? "_"
56
+
57
+ else
58
+
59
+ seq_file = raw_file
60
+ flat = Bio::FlatFile.auto("#{seq_file}")
61
+ flat.each_entry do |s|
62
+ s_name = s.definition.chomp.split(" ")[0]
63
+ sequences[s_name] = {}
64
+ properties = s.definition.chomp.split(";")
65
+ partial = false
66
+ if properties.length >= 2 and properties[1].include? "partial"
67
+ partial = (properties[1].gsub("partial=","").include? '1')
68
+ end
69
+ sequences[s_name][:partial] = partial
70
+ sequences[s_name][:length] = s.seq.length
71
+ sequences[s_name][:conserved] = false
72
+ sequences[s_name][:contig] = s_name.split("_")[0..-2].join("_") if s_name.include? "_"
73
+
74
+ end
75
+
47
76
  end
48
77
 
49
78
  sequences
@@ -51,14 +80,42 @@ class SequenceSynteny
51
80
  end
52
81
 
53
82
  # run blat on proteins
54
- def run_blat root, outdir
55
- base_cmd = "#{root}/blat.linux -out=blast8 -minIdentity=#{@pidentity} > /dev/null 2>&1"
83
+ def run_blat
84
+ base_cmd = "#{@root}/blat.linux -out=blast8 -minIdentity=#{@pidentity} > /dev/null 2>&1"
56
85
  if @type == "prot"
57
- system("#{base_cmd} -prot #{@subject_file} #{@query_file} #{outdir}/#{@name}.blat8.tsv")
86
+ system("#{base_cmd} -prot #{@subject_file} #{@query_file} #{@outdir}/#{@name}.blat8.tsv")
87
+ else
88
+ system("#{base_cmd} #{@subject_file} #{@query_file} #{@outdir}/#{@name}.blat8.tsv")
89
+ end
90
+ @aln_file = "#{@outdir}/#{@name}.blat8.tsv"
91
+ # extract_hits
92
+ end # end of method
93
+
94
+ # run fasta36 on proteins
95
+ def run_fasta36
96
+ if @type == "prot"
97
+ system("#{@root}/fasta36.linux -T 1 -b 3 -E 1e-40 -m 8 #{@query_file} #{@subject_file} > #{@outdir}/#{@name}.fasta36.tsv")
98
+ else
99
+ system("#{@root}/glsearch36.linux -T 1 -b 12 -E 1e-40 -m 8 #{@query_file} #{@subject_file} > #{@outdir}/#{@name}.fasta36.tsv")
100
+ end
101
+ @aln_file_fasta36 = "#{@outdir}/#{@name}.fasta36.tsv"
102
+ # extract_hits
103
+ end # end of method
104
+
105
+ # run diamond on proteins
106
+ def run_diamond
107
+ if @type == "prot"
108
+ if subject_file.include? ".dmnd"
109
+ db_file = subject_file
110
+ else
111
+ system("#{@root}/diamond.linux makedb --db #{subject_file} --in #{subject_file} > /dev/null 2>&1")
112
+ db_file = subject_file
113
+ end
114
+ system("#{@root}/diamond.linux blastp --db #{db_file} -q #{query_file} -o #{@outdir}/#{@name}.diamond.tsv -f 6 > /dev/null 2>&1")
58
115
  else
59
- system("#{base_cmd} #{@subject_file} #{@query_file} #{outdir}/#{@name}.blat8.tsv")
116
+ # system("#{@root}/glsearch36.linux -b 3 -E 1e-25 -m 8 #{@subject_file} #{@query_file} > #{@outdir}/#{@name}.fasta36.tsv")
60
117
  end
61
- @aln_file = "#{outdir}/#{@name}.blat8.tsv"
118
+ @aln_file = "#{@outdir}/#{@name}.diamond.tsv"
62
119
  # extract_hits
63
120
  end # end of method
64
121
 
@@ -23,6 +23,8 @@ class BacterialAnnotator
23
23
  @root = root
24
24
  @options = options
25
25
 
26
+ abort if ! @options.has_key? :input
27
+
26
28
  @minlength = @options[:minlength].to_i
27
29
  @options[:minlength] = @options[:minlength].to_i
28
30
  @options[:pidentity] = @options[:pidentity].to_f
@@ -44,12 +46,25 @@ class BacterialAnnotator
44
46
  end
45
47
  Dir.mkdir(@options[:outdir])
46
48
 
47
- @query_fasta = SequenceFasta.new(@options[:input], @options[:meta])
49
+ @query_fasta = SequenceFasta.new(@root,
50
+ options[:outdir],
51
+ @options[:input],
52
+ @options[:meta])
48
53
 
49
54
  @with_refence_genome = false
55
+ @with_db = false
50
56
  if @options.has_key? :refgenome
51
57
  @with_refence_genome = true
52
- @ref_genome = SequenceAnnotation.new(@options[:refgenome], @options[:outdir])
58
+ @ref_genome = SequenceAnnotation.new(@root,
59
+ @options[:outdir],
60
+ @options[:refgenome],
61
+ "refGbk")
62
+ elsif @options[:mergem]
63
+ @with_db = true
64
+ @ref_genome = SequenceAnnotation.new(@root,
65
+ @options[:outdir],
66
+ @options[:mergem],
67
+ "db")
53
68
  end
54
69
 
55
70
  @with_external_db = false
@@ -76,52 +91,12 @@ class BacterialAnnotator
76
91
 
77
92
  end # end of method
78
93
 
79
- # Prepare files for the annotation
80
- # Will run prodigal on the query and prepare reference genome files
81
- def prepare_files_for_annotation
82
- print "# Running Prodigal on your genome.."
83
- start_time = Time.now
84
- @query_fasta.run_prodigal @root, @options[:outdir]
85
- end_time = Time.now
86
- c_time = Helper.sec2str(end_time - start_time)
87
- print "done (#{c_time})\n"
88
- if @with_refence_genome
89
- @ref_genome.write_cds_to_file @options[:outdir]
90
- @ref_genome.write_rna_to_file @options[:outdir]
91
- # puts "Successfully loaded #{@ref_genome.gbk.definition}"
92
- end
93
- end # end of method
94
-
95
-
96
- def run_reference_synteny_prot
97
-
98
- ref_synteny_prot = SequenceSynteny.new(@query_fasta.annotation_files[:proteins], @ref_genome.cds_file,
99
- "Prot-Ref", @options[:pidentity], @options[:pcoverage], "prot")
100
-
101
- print "# Running alignment with Reference Genome CDS (blat).."
102
- start_time = Time.now
103
- ref_synteny_prot.run_blat @root, @options[:outdir]
104
- end_time = Time.now
105
- c_time = Helper.sec2str(end_time - start_time)
106
- print "done (#{c_time})\n"
107
-
108
- ref_synteny_prot.extract_hits :refgenome
109
-
110
- ref_synteny_prot.query_sequences.each do |k,v|
111
- if v.has_key? :homology
112
- @contig_annotations_cds[v[:contig]] = [] if ! @contig_annotations_cds.has_key? v[:contig]
113
- @contig_annotations_cds[v[:contig]] << k
114
- end
115
- end
116
-
117
- ref_synteny_prot
118
-
119
- end
120
-
121
94
 
122
95
  # run_alignment of reference genome proteins and the query
123
96
  def run_annotation
124
97
 
98
+ prepare_files_for_annotation
99
+
125
100
  # process reference genome synteny
126
101
  if @with_refence_genome # Annotation with the Reference Genome
127
102
 
@@ -153,23 +128,69 @@ class BacterialAnnotator
153
128
  dump_ref_synteny_to_file
154
129
 
155
130
  # run RNA annotation
156
- @rna_synteny = SequenceSynteny.new(@query_fasta.fasta_file, @ref_genome.rna_file,
157
- "RNA-Ref", @options[:pidentity], @options[:pcoverage], "dna")
131
+ @rna_synteny = SequenceSynteny.new(@root,
132
+ @options[:outdir],
133
+ @query_fasta.fasta_file,
134
+ @ref_genome.rna_file,
135
+ "RNA-Ref",
136
+ @options[:pidentity],
137
+ @options[:pcoverage],
138
+ "dna")
139
+
158
140
  print "# Running alignment with Reference Genome RNA (blat).."
159
141
  start_time = Time.now
160
- @rna_synteny.run_blat @root, @options[:outdir]
142
+ @rna_synteny.run_blat
161
143
  end_time = Time.now
162
144
  c_time = Helper.sec2str(end_time-start_time)
163
145
  print "done (#{c_time})\n"
146
+
147
+ # # takes too long
148
+ # print "# Running alignment with Reference Genome RNA (fasta36).."
149
+ # start_time = Time.now
150
+ # @rna_synteny.run_fasta36
151
+ # end_time = Time.now
152
+ # c_time = Helper.sec2str(end_time-start_time)
153
+ # print "done (#{c_time})\n"
154
+
164
155
  @rna_synteny.extract_hits_dna :rna
165
156
  @contig_annotations_rna = {}
166
157
  @query_fasta.annotation_files[:contigs].each_with_index do |contig, contig_index|
167
158
  @contig_annotations_rna[contig] = @rna_synteny.get_annotation_for_contig contig
168
159
  end
169
160
 
161
+
162
+ elsif @with_db
163
+
164
+ @prot_synteny_refgenome = run_mergem_synteny_prot
165
+ # iterate over each contig
166
+ # discard short contig
167
+ # cumulate statistics of homolog CDS
168
+ @query_fasta.annotation_files[:contigs].each_with_index do |contig, contig_index|
169
+
170
+ # Skip short contigs
171
+ if @query_fasta.annotation_files[:contigs_length][contig_index] < @minlength
172
+ @annotation_stats[:short_contigs] << contig
173
+ next
174
+ end
175
+
176
+ remaining_cds = cumulate_annotation_stats_reference contig
177
+
178
+ if remaining_cds != []
179
+ @contig_foreign_cds[contig] = remaining_cds
180
+ end
181
+
182
+ end
183
+
184
+ # dump foreign proteins to file
185
+ foreign_cds_file = dump_cds
186
+
187
+ # dump reference CDS synteny to file
188
+ dump_ref_synteny_to_file
189
+
190
+
170
191
  else # no reference genome
171
192
 
172
- # no reference genome .. will process all the CDS
193
+ # no reference genome .. will process all the CDS as foreign for the external db
173
194
  foreign_cds_file = @query_fasta.annotation_files[:proteins]
174
195
 
175
196
  end
@@ -187,6 +208,99 @@ class BacterialAnnotator
187
208
  end # end of method
188
209
 
189
210
 
211
+ # Prepare files for the annotation
212
+ # Will run prodigal on the query and prepare reference genome files
213
+ def prepare_files_for_annotation
214
+ print "# Running Prodigal on your genome.."
215
+ start_time = Time.now
216
+ @query_fasta.run_prodigal
217
+ end_time = Time.now
218
+ c_time = Helper.sec2str(end_time - start_time)
219
+ print "done (#{c_time})\n"
220
+ end # end of method
221
+
222
+
223
+ def run_mergem_synteny_prot
224
+
225
+
226
+ ref_synteny_prot = SequenceSynteny.new(@root,
227
+ @options[:outdir],
228
+ @query_fasta.annotation_files[:proteins],
229
+ @ref_genome.cds_file,
230
+ "Prot-Ref",
231
+ @options[:pidentity],
232
+ @options[:pcoverage],
233
+ "prot")
234
+
235
+ print "# Running alignment with Reference Genome CDS (diamond).."
236
+ start_time = Time.now
237
+ ref_synteny_prot.run_diamond
238
+ end_time = Time.now
239
+ c_time = Helper.sec2str(end_time - start_time)
240
+ print "done (#{c_time})\n"
241
+
242
+ ref_synteny_prot.extract_hits :refgenome
243
+
244
+ ref_synteny_prot.query_sequences.each do |k,v|
245
+ if v.has_key? :homology
246
+ @contig_annotations_cds[v[:contig]] = [] if ! @contig_annotations_cds.has_key? v[:contig]
247
+ @contig_annotations_cds[v[:contig]] << k
248
+ end
249
+ end
250
+
251
+ ref_synteny_prot
252
+
253
+
254
+ end
255
+
256
+
257
+
258
+ def run_reference_synteny_prot
259
+
260
+ ref_synteny_prot = SequenceSynteny.new(@root,
261
+ @options[:outdir],
262
+ @query_fasta.annotation_files[:proteins],
263
+ @ref_genome.cds_file,
264
+ "Prot-Ref",
265
+ @options[:pidentity],
266
+ @options[:pcoverage],
267
+ "prot")
268
+
269
+ print "# Running alignment with Reference Genome CDS (diamond).."
270
+ start_time = Time.now
271
+ ref_synteny_prot.run_diamond
272
+ end_time = Time.now
273
+ c_time = Helper.sec2str(end_time - start_time)
274
+ print "done (#{c_time})\n"
275
+
276
+ # print "# Running alignment with Reference Genome CDS (blat).."
277
+ # start_time = Time.now
278
+ # ref_synteny_prot.run_blat
279
+ # end_time = Time.now
280
+ # c_time = Helper.sec2str(end_time - start_time)
281
+ # print "done (#{c_time})\n"
282
+
283
+ # print "# Running alignment with Reference Genome CDS (fasta36).."
284
+ # start_time = Time.now
285
+ # ref_synteny_prot.run_fasta36
286
+ # end_time = Time.now
287
+ # c_time = Helper.sec2str(end_time - start_time)
288
+ # print "done (#{c_time})\n"
289
+
290
+ ref_synteny_prot.extract_hits :refgenome
291
+
292
+ ref_synteny_prot.query_sequences.each do |k,v|
293
+ if v.has_key? :homology
294
+ @contig_annotations_cds[v[:contig]] = [] if ! @contig_annotations_cds.has_key? v[:contig]
295
+ @contig_annotations_cds[v[:contig]] << k
296
+ end
297
+ end
298
+
299
+ ref_synteny_prot
300
+
301
+ end
302
+
303
+
190
304
  # Finishing the annotation of the remaining CDS
191
305
  def finish_annotation remaining_cds_file
192
306
 
@@ -194,15 +308,25 @@ class BacterialAnnotator
194
308
  if @options.has_key? :external_db # from an external DB
195
309
 
196
310
  db_file = @options[:external_db]
197
- ref_cds = extract_externaldb_prot_info db_file
198
-
199
- @externaldb_synteny = SequenceSynteny.new(remaining_cds_file, db_file,
200
- "Prot-ExternalDB", @options[:pidentity],
201
- @options[:pcoverage], "prot")
311
+ ref_cds = SequenceAnnotation.new(@root,
312
+ @options[:outdir],
313
+ db_file,
314
+ "fasta")
315
+
316
+ # ref_cds = extract_externaldb_prot_info db_file
317
+
318
+ @externaldb_synteny = SequenceSynteny.new(@root,
319
+ @options[:outdir],
320
+ remaining_cds_file,
321
+ db_file,
322
+ "Prot-ExternalDB",
323
+ @options[:pidentity],
324
+ @options[:pcoverage],
325
+ "prot")
202
326
 
203
327
  print "# Running BLAT alignment with External Database.."
204
328
  start_time = Time.now
205
- @externaldb_synteny.run_blat @root, @options[:outdir]
329
+ @externaldb_synteny.run_blat
206
330
  end_time = Time.now
207
331
  c_time = Helper.sec2str(end_time-start_time)
208
332
  print "done (#{c_time})\n"
@@ -228,18 +352,18 @@ class BacterialAnnotator
228
352
  # note = "Protein homology (#{v[:pId]}% identity) with gi:#{hit_gi}"
229
353
  cov_query = (v[:homology][:cov_query]*100).round(2)
230
354
  cov_subject = (v[:homology][:cov_subject]*100).round(2)
231
- note = "Protein homology (AA identity: #{v[:homology][:pId]}%; coverage (q,s): #{cov_query}%,#{cov_subject}%) with #{ref_cds[hit_gi][:prot_id]}"
232
- inference = "similar to AA sequence:#{ref_cds[hit_gi][:db_source]}:#{ref_cds[hit_gi][:prot_id]}"
355
+ note = "Protein homology (AA identity: #{v[:homology][:pId]}%; coverage (q,s): #{cov_query}%,#{cov_subject}%) with #{ref_cds.coding_seq[hit_gi][:prot_id]}"
356
+ inference = "similar to AA sequence:#{ref_cds.coding_seq[hit_gi][:db_source]}:#{ref_cds.coding_seq[hit_gi][:prot_id]}"
233
357
 
234
- if ref_cds[hit_gi][:org] != ""
235
- note += " from #{ref_cds[hit_gi][:org]}"
358
+ if ref_cds.coding_seq[hit_gi][:org] != ""
359
+ note += " from #{ref_cds.coding_seq[hit_gi][:org]}"
236
360
  end
237
361
 
238
362
  @contig_annotations_externaldb[contig_of_protein][v[:homology][:hits][0]] = {
239
- product: ref_cds[hit_gi][:product],
363
+ product: ref_cds.coding_seq[hit_gi][:product],
240
364
  feature: "cds",
241
365
  gene: nil,
242
- prot_id: ref_cds[hit_gi][:prot_id],
366
+ prot_id: ref_cds.coding_seq[hit_gi][:prot_id],
243
367
  locustag: nil,
244
368
  note: note,
245
369
  inference: inference
@@ -260,7 +384,10 @@ class BacterialAnnotator
260
384
  @contig_annotations_cds.each do |contig, contig_prots|
261
385
 
262
386
  gbk_path = @query_fasta.annotation_files[:gbk_path]
263
- gbk_to_annotate = SequenceAnnotation.new("#{gbk_path}/#{contig}.gbk", "#{gbk_path}")
387
+ gbk_to_annotate = SequenceAnnotation.new(@root,
388
+ "#{gbk_path}",
389
+ "#{gbk_path}/#{contig}.gbk",
390
+ "newGbk")
264
391
 
265
392
  if @with_external_db and @with_refence_genome
266
393
  gbk_to_annotate.add_annotation_ref_synteny_prot(
@@ -273,6 +400,11 @@ class BacterialAnnotator
273
400
  @externaldb_synteny.query_sequences,
274
401
  @contig_annotations_externaldb[contig]
275
402
  )
403
+ elsif @with_db
404
+ gbk_to_annotate.add_annotation_ref_synteny_prot(
405
+ @prot_synteny_refgenome.query_sequences,
406
+ @ref_genome.coding_seq
407
+ )
276
408
  else
277
409
  gbk_to_annotate.add_annotation_ref_synteny_prot(
278
410
  @prot_synteny_refgenome.query_sequences,
@@ -286,7 +418,7 @@ class BacterialAnnotator
286
418
  gbk_to_annotate.add_annotations @contig_annotations_rna[contig], "new"
287
419
  end
288
420
 
289
- gbk_to_annotate.save_genbank_to_file gbk_path
421
+ gbk_to_annotate.save_genbank_to_file
290
422
 
291
423
  end
292
424
  end_time = Time.now
@@ -579,9 +711,14 @@ class BacterialAnnotator
579
711
  partial = ref_annotated[ref_v[:protId]][:partial]
580
712
  end
581
713
 
714
+ _locus_tag = ref_v[:locustag] || ""
715
+ _seq_len = "NA"
716
+ # _seq_len = ref_v[:bioseq].seq.length.to_s if ! ref_v[:bioseq].nil?
717
+ _seq_len = ref_v[:length].to_s if ! ref_v[:length].nil?
718
+
582
719
  synteny_file.write(ref_v[:protId])
583
- synteny_file.write("\t"+ref_v[:locustag])
584
- synteny_file.write("\t"+ref_v[:bioseq].seq.length.to_s)
720
+ synteny_file.write("\t"+_locus_tag)
721
+ synteny_file.write("\t"+_seq_len)
585
722
  synteny_file.write("\t"+coverage_ref.to_s)
586
723
  synteny_file.write("\t"+pId.to_s)
587
724
  synteny_file.write("\t"+gene)
@@ -34,15 +34,31 @@ class BacterialComparator
34
34
  min_pid = min_pid/100
35
35
  end
36
36
 
37
+ @aln_opt = options[:align].downcase
38
+ @run_phylo = 0
39
+ if options[:phylogeny] == 1
40
+ @bootstrap = options[:bootstrap]
41
+ @run_phylo = 1
42
+ end
43
+
37
44
  @ref_prot = get_ref_prot
38
45
  @synteny = read_prot_synteny
39
46
  @stats = extract_syntenic_fasta min_cov, min_pid
40
47
 
41
48
  end
42
49
 
50
+
51
+ def run_comparison
52
+
53
+ run_mafft_aln
54
+ run_raxml_phylo if @run_phylo != 0
55
+
56
+ end
57
+
58
+
43
59
  def read_prot_synteny
44
60
 
45
- print "# Reading genome synteny files - from genome annotations.."
61
+ puts "# Reading genome synteny files START.."
46
62
  start_time = Time.now
47
63
  synteny = {}
48
64
  @genomes_list.each do |g|
@@ -65,7 +81,8 @@ class BacterialComparator
65
81
  end
66
82
  end_time = Time.now
67
83
  c_time = Helper.sec2str(end_time-start_time)
68
- print "done (#{c_time})\n"
84
+
85
+ puts "# Reading genome synteny files [DONE] (in #{c_time})"
69
86
 
70
87
  synteny
71
88
 
@@ -146,7 +163,7 @@ class BacterialComparator
146
163
  # extract and dump multifasta for syntenic genes and proteins
147
164
  def extract_syntenic_fasta min_cov, min_pid
148
165
 
149
- print "# Extracting Proteins and Genes multifasta.."
166
+ puts "# Extracting Proteins and Genes multifasta START.."
150
167
  start_time = Time.now
151
168
 
152
169
  nb_of_syntenic = 0
@@ -216,14 +233,13 @@ class BacterialComparator
216
233
 
217
234
  end_time = Time.now
218
235
  c_time = Helper.sec2str(end_time-start_time)
219
- print "done (#{c_time})\n"
236
+ puts "# Extracting Proteins and Genes multifasta [DONE] (in #{c_time})"
220
237
 
221
238
  stats[:nb_of_syntenic] = nb_of_syntenic
222
239
  #puts " Syntenic genes : " + nb_of_syntenic.to_s + " / " + @ref_prot.length.to_s
223
240
 
224
241
  end
225
242
 
226
-
227
243
  def mafft_align f
228
244
 
229
245
  trying = 0
@@ -252,7 +268,7 @@ class BacterialComparator
252
268
 
253
269
  def mafft_align_all_pep
254
270
 
255
- print "# Sequence alignments - conserved single proteins a.a. (MAFFT).."
271
+ puts "# Sequence alignments - individual proteins a.a. (MAFFT) START.."
256
272
  start_time = Time.now
257
273
 
258
274
  ori_dir = Dir.pwd
@@ -277,7 +293,7 @@ class BacterialComparator
277
293
 
278
294
  end_time = Time.now
279
295
  c_time = Helper.sec2str(end_time-start_time)
280
- print "done (#{c_time})\n"
296
+ puts "# Sequence alignments - individual proteins a.a. (MAFFT) [DONE] (in #{c_time})"
281
297
 
282
298
  # FIXME ugly hack to find out the reference genome
283
299
  ref_id = Dir["#{ori_dir}/#{@genomes_list[0]}/*.pep"][0].split('/')[-1].gsub(".pep","")
@@ -290,7 +306,7 @@ class BacterialComparator
290
306
 
291
307
  def mafft_align_all_dna
292
308
 
293
- print "# Sequence alignments - conserved single genes dna (MAFFT).."
309
+ puts "# Sequence alignments - individual genes dna (MAFFT) START.."
294
310
  start_time = Time.now
295
311
 
296
312
  ori_dir = Dir.pwd
@@ -313,12 +329,12 @@ class BacterialComparator
313
329
  }
314
330
  end
315
331
 
316
- # ugly hack to find out the reference genome
332
+ # ugly hack to find out the reference genome FIXME
317
333
  ref_id = Dir["#{ori_dir}/#{@genomes_list[0]}/*.pep"][0].split('/')[-1].gsub(".pep","")
318
334
 
319
335
  end_time = Time.now
320
336
  c_time = Helper.sec2str(end_time-start_time)
321
- print "done (#{c_time})\n"
337
+ puts "# Sequence alignments - individual genes dna (MAFFT) [DONE] (in #{c_time})"
322
338
 
323
339
  concat_alignments "align-genes-dna.all.fasta", ref_id
324
340
 
@@ -377,21 +393,21 @@ class BacterialComparator
377
393
 
378
394
  end
379
395
 
380
- def mafft_aln aln_opt
396
+ def run_mafft_aln
381
397
 
382
- if aln_opt == "both"
398
+ if @aln_opt == "both"
383
399
  mafft_align_all_pep
384
400
  mafft_align_all_dna
385
- elsif aln_opt == "prot"
401
+ elsif @aln_opt == "prot"
386
402
  mafft_align_all_pep
387
- elsif aln_opt == "dna"
403
+ elsif @aln_opt == "dna"
388
404
  mafft_align_all_dna
389
405
  end
390
406
 
391
407
  end
392
408
 
393
409
  def raxml_tree_dna bt
394
- print "# Genes DNA tree creation (RAXML).."
410
+ puts "# Genes DNA tree creation (RAXML) START.."
395
411
  start_time = Time.now
396
412
  ori_dir = Dir.pwd
397
413
  Dir.chdir(@outdir)
@@ -405,11 +421,11 @@ class BacterialComparator
405
421
  Dir.chdir(ori_dir)
406
422
  end_time = Time.now
407
423
  c_time = Helper.sec2str(end_time-start_time)
408
- print "done (#{c_time})\n"
424
+ puts "# Genes DNA tree creation (RAXML) [DONE] (in #{c_time})"
409
425
  end
410
426
 
411
427
  def raxml_tree_pep bt
412
- print "# Proteins AA tree creation (RAXML).."
428
+ puts "# Proteins AA tree creation (RAXML) START.."
413
429
  start_time = Time.now
414
430
  ori_dir = Dir.pwd
415
431
  Dir.chdir(@outdir)
@@ -423,18 +439,18 @@ class BacterialComparator
423
439
  Dir.chdir(ori_dir)
424
440
  end_time = Time.now
425
441
  c_time = Helper.sec2str(end_time-start_time)
426
- print "done (#{c_time})\n"
442
+ puts "# Proteins AA tree creation (RAXML) [DONE] (in #{c_time})"
427
443
  end
428
444
 
429
- def raxml_tree aln_opt, bt
445
+ def run_raxml_phylo
430
446
 
431
- if aln_opt == "both"
432
- raxml_tree_dna bt
433
- raxml_tree_pep bt
434
- elsif aln_opt == "prot"
435
- raxml_tree_pep bt
436
- elsif aln_opt == "dna"
437
- raxml_tree_dna bt
447
+ if @aln_opt == "both"
448
+ raxml_tree_dna @bootstrap
449
+ raxml_tree_pep @bootstrap
450
+ elsif @aln_opt == "prot"
451
+ raxml_tree_pep @bootstrap
452
+ elsif @aln_opt == "dna"
453
+ raxml_tree_dna @bootstrap
438
454
  end
439
455
 
440
456
  end