bacterial-annotator 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,15 +6,19 @@
6
6
  # version: 0.0.1
7
7
  # licence:
8
8
 
9
+ require 'json'
10
+ require 'zlib'
9
11
 
10
12
  class SequenceSynteny
11
13
 
12
14
  attr_reader :query_file, :subject_file, :aln_hits, :query_sequences, :subject_sequences
13
15
 
14
- def initialize query_file, subject_file, name, pidentity, min_coverage, type
16
+ def initialize root, outdir, query_file, subject_file, name, pidentity, min_coverage, type
17
+
18
+ @root = root
19
+ @outdir = outdir
15
20
  @query_file = query_file
16
21
  @subject_file = subject_file
17
-
18
22
  @query_sequences = get_sequences(query_file)
19
23
  @subject_sequences = get_sequences(subject_file)
20
24
 
@@ -28,22 +32,47 @@ class SequenceSynteny
28
32
 
29
33
 
30
34
  # get sequences name with length in hash
31
- def get_sequences seq_file
35
+ def get_sequences raw_file
32
36
 
33
37
  sequences = {}
34
- flat = Bio::FlatFile.auto("#{seq_file}")
35
- flat.each_entry do |s|
36
- s_name = s.definition.chomp.split(" ")[0]
37
- sequences[s_name] = {}
38
- properties = s.definition.chomp.split(";")
39
- partial = false
40
- if properties.length >= 2 and properties[1].include? "partial"
41
- partial = (properties[1].gsub("partial=","").include? '1')
38
+
39
+ if raw_file.include?(".dmnd")
40
+
41
+ seq_info_file = raw_file.gsub(".dmnd",".json.gz")
42
+
43
+ json_genes = {}
44
+ Zlib::GzipReader.open(seq_info_file) {|gz|
45
+ json_genes = JSON.parse(gz.read)
46
+ }
47
+
48
+ json_genes.each do |gene|
49
+
50
+ sequences[gene["cluster_id"]] = {}
51
+ sequences[gene["cluster_id"]][:length] = gene["consensus_length"].to_f
52
+ sequences[gene["cluster_id"]][:conserved] = false
53
+ sequences[gene["cluster_id"]][:contig] = gene["cluster_id"].split("_")[0..-2].join("_") if gene["cluster_id"].include? "_"
54
+
42
55
  end
43
- sequences[s_name][:partial] = partial
44
- sequences[s_name][:length] = s.seq.length
45
- sequences[s_name][:conserved] = false
46
- sequences[s_name][:contig] = s_name.split("_")[0..-2].join("_") if s_name.include? "_"
56
+
57
+ else
58
+
59
+ seq_file = raw_file
60
+ flat = Bio::FlatFile.auto("#{seq_file}")
61
+ flat.each_entry do |s|
62
+ s_name = s.definition.chomp.split(" ")[0]
63
+ sequences[s_name] = {}
64
+ properties = s.definition.chomp.split(";")
65
+ partial = false
66
+ if properties.length >= 2 and properties[1].include? "partial"
67
+ partial = (properties[1].gsub("partial=","").include? '1')
68
+ end
69
+ sequences[s_name][:partial] = partial
70
+ sequences[s_name][:length] = s.seq.length
71
+ sequences[s_name][:conserved] = false
72
+ sequences[s_name][:contig] = s_name.split("_")[0..-2].join("_") if s_name.include? "_"
73
+
74
+ end
75
+
47
76
  end
48
77
 
49
78
  sequences
@@ -51,14 +80,42 @@ class SequenceSynteny
51
80
  end
52
81
 
53
82
  # run blat on proteins
54
- def run_blat root, outdir
55
- base_cmd = "#{root}/blat.linux -out=blast8 -minIdentity=#{@pidentity} > /dev/null 2>&1"
83
+ def run_blat
84
+ base_cmd = "#{@root}/blat.linux -out=blast8 -minIdentity=#{@pidentity} > /dev/null 2>&1"
56
85
  if @type == "prot"
57
- system("#{base_cmd} -prot #{@subject_file} #{@query_file} #{outdir}/#{@name}.blat8.tsv")
86
+ system("#{base_cmd} -prot #{@subject_file} #{@query_file} #{@outdir}/#{@name}.blat8.tsv")
87
+ else
88
+ system("#{base_cmd} #{@subject_file} #{@query_file} #{@outdir}/#{@name}.blat8.tsv")
89
+ end
90
+ @aln_file = "#{@outdir}/#{@name}.blat8.tsv"
91
+ # extract_hits
92
+ end # end of method
93
+
94
+ # run fasta36 on proteins
95
+ def run_fasta36
96
+ if @type == "prot"
97
+ system("#{@root}/fasta36.linux -T 1 -b 3 -E 1e-40 -m 8 #{@query_file} #{@subject_file} > #{@outdir}/#{@name}.fasta36.tsv")
98
+ else
99
+ system("#{@root}/glsearch36.linux -T 1 -b 12 -E 1e-40 -m 8 #{@query_file} #{@subject_file} > #{@outdir}/#{@name}.fasta36.tsv")
100
+ end
101
+ @aln_file_fasta36 = "#{@outdir}/#{@name}.fasta36.tsv"
102
+ # extract_hits
103
+ end # end of method
104
+
105
+ # run diamond on proteins
106
+ def run_diamond
107
+ if @type == "prot"
108
+ if subject_file.include? ".dmnd"
109
+ db_file = subject_file
110
+ else
111
+ system("#{@root}/diamond.linux makedb --db #{subject_file} --in #{subject_file} > /dev/null 2>&1")
112
+ db_file = subject_file
113
+ end
114
+ system("#{@root}/diamond.linux blastp --db #{db_file} -q #{query_file} -o #{@outdir}/#{@name}.diamond.tsv -f 6 > /dev/null 2>&1")
58
115
  else
59
- system("#{base_cmd} #{@subject_file} #{@query_file} #{outdir}/#{@name}.blat8.tsv")
116
+ # system("#{@root}/glsearch36.linux -b 3 -E 1e-25 -m 8 #{@subject_file} #{@query_file} > #{@outdir}/#{@name}.fasta36.tsv")
60
117
  end
61
- @aln_file = "#{outdir}/#{@name}.blat8.tsv"
118
+ @aln_file = "#{@outdir}/#{@name}.diamond.tsv"
62
119
  # extract_hits
63
120
  end # end of method
64
121
 
@@ -23,6 +23,8 @@ class BacterialAnnotator
23
23
  @root = root
24
24
  @options = options
25
25
 
26
+ abort if ! @options.has_key? :input
27
+
26
28
  @minlength = @options[:minlength].to_i
27
29
  @options[:minlength] = @options[:minlength].to_i
28
30
  @options[:pidentity] = @options[:pidentity].to_f
@@ -44,12 +46,25 @@ class BacterialAnnotator
44
46
  end
45
47
  Dir.mkdir(@options[:outdir])
46
48
 
47
- @query_fasta = SequenceFasta.new(@options[:input], @options[:meta])
49
+ @query_fasta = SequenceFasta.new(@root,
50
+ options[:outdir],
51
+ @options[:input],
52
+ @options[:meta])
48
53
 
49
54
  @with_refence_genome = false
55
+ @with_db = false
50
56
  if @options.has_key? :refgenome
51
57
  @with_refence_genome = true
52
- @ref_genome = SequenceAnnotation.new(@options[:refgenome], @options[:outdir])
58
+ @ref_genome = SequenceAnnotation.new(@root,
59
+ @options[:outdir],
60
+ @options[:refgenome],
61
+ "refGbk")
62
+ elsif @options[:mergem]
63
+ @with_db = true
64
+ @ref_genome = SequenceAnnotation.new(@root,
65
+ @options[:outdir],
66
+ @options[:mergem],
67
+ "db")
53
68
  end
54
69
 
55
70
  @with_external_db = false
@@ -76,52 +91,12 @@ class BacterialAnnotator
76
91
 
77
92
  end # end of method
78
93
 
79
- # Prepare files for the annotation
80
- # Will run prodigal on the query and prepare reference genome files
81
- def prepare_files_for_annotation
82
- print "# Running Prodigal on your genome.."
83
- start_time = Time.now
84
- @query_fasta.run_prodigal @root, @options[:outdir]
85
- end_time = Time.now
86
- c_time = Helper.sec2str(end_time - start_time)
87
- print "done (#{c_time})\n"
88
- if @with_refence_genome
89
- @ref_genome.write_cds_to_file @options[:outdir]
90
- @ref_genome.write_rna_to_file @options[:outdir]
91
- # puts "Successfully loaded #{@ref_genome.gbk.definition}"
92
- end
93
- end # end of method
94
-
95
-
96
- def run_reference_synteny_prot
97
-
98
- ref_synteny_prot = SequenceSynteny.new(@query_fasta.annotation_files[:proteins], @ref_genome.cds_file,
99
- "Prot-Ref", @options[:pidentity], @options[:pcoverage], "prot")
100
-
101
- print "# Running alignment with Reference Genome CDS (blat).."
102
- start_time = Time.now
103
- ref_synteny_prot.run_blat @root, @options[:outdir]
104
- end_time = Time.now
105
- c_time = Helper.sec2str(end_time - start_time)
106
- print "done (#{c_time})\n"
107
-
108
- ref_synteny_prot.extract_hits :refgenome
109
-
110
- ref_synteny_prot.query_sequences.each do |k,v|
111
- if v.has_key? :homology
112
- @contig_annotations_cds[v[:contig]] = [] if ! @contig_annotations_cds.has_key? v[:contig]
113
- @contig_annotations_cds[v[:contig]] << k
114
- end
115
- end
116
-
117
- ref_synteny_prot
118
-
119
- end
120
-
121
94
 
122
95
  # run_alignment of reference genome proteins and the query
123
96
  def run_annotation
124
97
 
98
+ prepare_files_for_annotation
99
+
125
100
  # process reference genome synteny
126
101
  if @with_refence_genome # Annotation with the Reference Genome
127
102
 
@@ -153,23 +128,69 @@ class BacterialAnnotator
153
128
  dump_ref_synteny_to_file
154
129
 
155
130
  # run RNA annotation
156
- @rna_synteny = SequenceSynteny.new(@query_fasta.fasta_file, @ref_genome.rna_file,
157
- "RNA-Ref", @options[:pidentity], @options[:pcoverage], "dna")
131
+ @rna_synteny = SequenceSynteny.new(@root,
132
+ @options[:outdir],
133
+ @query_fasta.fasta_file,
134
+ @ref_genome.rna_file,
135
+ "RNA-Ref",
136
+ @options[:pidentity],
137
+ @options[:pcoverage],
138
+ "dna")
139
+
158
140
  print "# Running alignment with Reference Genome RNA (blat).."
159
141
  start_time = Time.now
160
- @rna_synteny.run_blat @root, @options[:outdir]
142
+ @rna_synteny.run_blat
161
143
  end_time = Time.now
162
144
  c_time = Helper.sec2str(end_time-start_time)
163
145
  print "done (#{c_time})\n"
146
+
147
+ # # takes too long
148
+ # print "# Running alignment with Reference Genome RNA (fasta36).."
149
+ # start_time = Time.now
150
+ # @rna_synteny.run_fasta36
151
+ # end_time = Time.now
152
+ # c_time = Helper.sec2str(end_time-start_time)
153
+ # print "done (#{c_time})\n"
154
+
164
155
  @rna_synteny.extract_hits_dna :rna
165
156
  @contig_annotations_rna = {}
166
157
  @query_fasta.annotation_files[:contigs].each_with_index do |contig, contig_index|
167
158
  @contig_annotations_rna[contig] = @rna_synteny.get_annotation_for_contig contig
168
159
  end
169
160
 
161
+
162
+ elsif @with_db
163
+
164
+ @prot_synteny_refgenome = run_mergem_synteny_prot
165
+ # iterate over each contig
166
+ # discard short contig
167
+ # cumulate statistics of homolog CDS
168
+ @query_fasta.annotation_files[:contigs].each_with_index do |contig, contig_index|
169
+
170
+ # Skip short contigs
171
+ if @query_fasta.annotation_files[:contigs_length][contig_index] < @minlength
172
+ @annotation_stats[:short_contigs] << contig
173
+ next
174
+ end
175
+
176
+ remaining_cds = cumulate_annotation_stats_reference contig
177
+
178
+ if remaining_cds != []
179
+ @contig_foreign_cds[contig] = remaining_cds
180
+ end
181
+
182
+ end
183
+
184
+ # dump foreign proteins to file
185
+ foreign_cds_file = dump_cds
186
+
187
+ # dump reference CDS synteny to file
188
+ dump_ref_synteny_to_file
189
+
190
+
170
191
  else # no reference genome
171
192
 
172
- # no reference genome .. will process all the CDS
193
+ # no reference genome .. will process all the CDS as foreign for the external db
173
194
  foreign_cds_file = @query_fasta.annotation_files[:proteins]
174
195
 
175
196
  end
@@ -187,6 +208,99 @@ class BacterialAnnotator
187
208
  end # end of method
188
209
 
189
210
 
211
+ # Prepare files for the annotation
212
+ # Will run prodigal on the query and prepare reference genome files
213
+ def prepare_files_for_annotation
214
+ print "# Running Prodigal on your genome.."
215
+ start_time = Time.now
216
+ @query_fasta.run_prodigal
217
+ end_time = Time.now
218
+ c_time = Helper.sec2str(end_time - start_time)
219
+ print "done (#{c_time})\n"
220
+ end # end of method
221
+
222
+
223
+ def run_mergem_synteny_prot
224
+
225
+
226
+ ref_synteny_prot = SequenceSynteny.new(@root,
227
+ @options[:outdir],
228
+ @query_fasta.annotation_files[:proteins],
229
+ @ref_genome.cds_file,
230
+ "Prot-Ref",
231
+ @options[:pidentity],
232
+ @options[:pcoverage],
233
+ "prot")
234
+
235
+ print "# Running alignment with Reference Genome CDS (diamond).."
236
+ start_time = Time.now
237
+ ref_synteny_prot.run_diamond
238
+ end_time = Time.now
239
+ c_time = Helper.sec2str(end_time - start_time)
240
+ print "done (#{c_time})\n"
241
+
242
+ ref_synteny_prot.extract_hits :refgenome
243
+
244
+ ref_synteny_prot.query_sequences.each do |k,v|
245
+ if v.has_key? :homology
246
+ @contig_annotations_cds[v[:contig]] = [] if ! @contig_annotations_cds.has_key? v[:contig]
247
+ @contig_annotations_cds[v[:contig]] << k
248
+ end
249
+ end
250
+
251
+ ref_synteny_prot
252
+
253
+
254
+ end
255
+
256
+
257
+
258
+ def run_reference_synteny_prot
259
+
260
+ ref_synteny_prot = SequenceSynteny.new(@root,
261
+ @options[:outdir],
262
+ @query_fasta.annotation_files[:proteins],
263
+ @ref_genome.cds_file,
264
+ "Prot-Ref",
265
+ @options[:pidentity],
266
+ @options[:pcoverage],
267
+ "prot")
268
+
269
+ print "# Running alignment with Reference Genome CDS (diamond).."
270
+ start_time = Time.now
271
+ ref_synteny_prot.run_diamond
272
+ end_time = Time.now
273
+ c_time = Helper.sec2str(end_time - start_time)
274
+ print "done (#{c_time})\n"
275
+
276
+ # print "# Running alignment with Reference Genome CDS (blat).."
277
+ # start_time = Time.now
278
+ # ref_synteny_prot.run_blat
279
+ # end_time = Time.now
280
+ # c_time = Helper.sec2str(end_time - start_time)
281
+ # print "done (#{c_time})\n"
282
+
283
+ # print "# Running alignment with Reference Genome CDS (fasta36).."
284
+ # start_time = Time.now
285
+ # ref_synteny_prot.run_fasta36
286
+ # end_time = Time.now
287
+ # c_time = Helper.sec2str(end_time - start_time)
288
+ # print "done (#{c_time})\n"
289
+
290
+ ref_synteny_prot.extract_hits :refgenome
291
+
292
+ ref_synteny_prot.query_sequences.each do |k,v|
293
+ if v.has_key? :homology
294
+ @contig_annotations_cds[v[:contig]] = [] if ! @contig_annotations_cds.has_key? v[:contig]
295
+ @contig_annotations_cds[v[:contig]] << k
296
+ end
297
+ end
298
+
299
+ ref_synteny_prot
300
+
301
+ end
302
+
303
+
190
304
  # Finishing the annotation of the remaining CDS
191
305
  def finish_annotation remaining_cds_file
192
306
 
@@ -194,15 +308,25 @@ class BacterialAnnotator
194
308
  if @options.has_key? :external_db # from an external DB
195
309
 
196
310
  db_file = @options[:external_db]
197
- ref_cds = extract_externaldb_prot_info db_file
198
-
199
- @externaldb_synteny = SequenceSynteny.new(remaining_cds_file, db_file,
200
- "Prot-ExternalDB", @options[:pidentity],
201
- @options[:pcoverage], "prot")
311
+ ref_cds = SequenceAnnotation.new(@root,
312
+ @options[:outdir],
313
+ db_file,
314
+ "fasta")
315
+
316
+ # ref_cds = extract_externaldb_prot_info db_file
317
+
318
+ @externaldb_synteny = SequenceSynteny.new(@root,
319
+ @options[:outdir],
320
+ remaining_cds_file,
321
+ db_file,
322
+ "Prot-ExternalDB",
323
+ @options[:pidentity],
324
+ @options[:pcoverage],
325
+ "prot")
202
326
 
203
327
  print "# Running BLAT alignment with External Database.."
204
328
  start_time = Time.now
205
- @externaldb_synteny.run_blat @root, @options[:outdir]
329
+ @externaldb_synteny.run_blat
206
330
  end_time = Time.now
207
331
  c_time = Helper.sec2str(end_time-start_time)
208
332
  print "done (#{c_time})\n"
@@ -228,18 +352,18 @@ class BacterialAnnotator
228
352
  # note = "Protein homology (#{v[:pId]}% identity) with gi:#{hit_gi}"
229
353
  cov_query = (v[:homology][:cov_query]*100).round(2)
230
354
  cov_subject = (v[:homology][:cov_subject]*100).round(2)
231
- note = "Protein homology (AA identity: #{v[:homology][:pId]}%; coverage (q,s): #{cov_query}%,#{cov_subject}%) with #{ref_cds[hit_gi][:prot_id]}"
232
- inference = "similar to AA sequence:#{ref_cds[hit_gi][:db_source]}:#{ref_cds[hit_gi][:prot_id]}"
355
+ note = "Protein homology (AA identity: #{v[:homology][:pId]}%; coverage (q,s): #{cov_query}%,#{cov_subject}%) with #{ref_cds.coding_seq[hit_gi][:prot_id]}"
356
+ inference = "similar to AA sequence:#{ref_cds.coding_seq[hit_gi][:db_source]}:#{ref_cds.coding_seq[hit_gi][:prot_id]}"
233
357
 
234
- if ref_cds[hit_gi][:org] != ""
235
- note += " from #{ref_cds[hit_gi][:org]}"
358
+ if ref_cds.coding_seq[hit_gi][:org] != ""
359
+ note += " from #{ref_cds.coding_seq[hit_gi][:org]}"
236
360
  end
237
361
 
238
362
  @contig_annotations_externaldb[contig_of_protein][v[:homology][:hits][0]] = {
239
- product: ref_cds[hit_gi][:product],
363
+ product: ref_cds.coding_seq[hit_gi][:product],
240
364
  feature: "cds",
241
365
  gene: nil,
242
- prot_id: ref_cds[hit_gi][:prot_id],
366
+ prot_id: ref_cds.coding_seq[hit_gi][:prot_id],
243
367
  locustag: nil,
244
368
  note: note,
245
369
  inference: inference
@@ -260,7 +384,10 @@ class BacterialAnnotator
260
384
  @contig_annotations_cds.each do |contig, contig_prots|
261
385
 
262
386
  gbk_path = @query_fasta.annotation_files[:gbk_path]
263
- gbk_to_annotate = SequenceAnnotation.new("#{gbk_path}/#{contig}.gbk", "#{gbk_path}")
387
+ gbk_to_annotate = SequenceAnnotation.new(@root,
388
+ "#{gbk_path}",
389
+ "#{gbk_path}/#{contig}.gbk",
390
+ "newGbk")
264
391
 
265
392
  if @with_external_db and @with_refence_genome
266
393
  gbk_to_annotate.add_annotation_ref_synteny_prot(
@@ -273,6 +400,11 @@ class BacterialAnnotator
273
400
  @externaldb_synteny.query_sequences,
274
401
  @contig_annotations_externaldb[contig]
275
402
  )
403
+ elsif @with_db
404
+ gbk_to_annotate.add_annotation_ref_synteny_prot(
405
+ @prot_synteny_refgenome.query_sequences,
406
+ @ref_genome.coding_seq
407
+ )
276
408
  else
277
409
  gbk_to_annotate.add_annotation_ref_synteny_prot(
278
410
  @prot_synteny_refgenome.query_sequences,
@@ -286,7 +418,7 @@ class BacterialAnnotator
286
418
  gbk_to_annotate.add_annotations @contig_annotations_rna[contig], "new"
287
419
  end
288
420
 
289
- gbk_to_annotate.save_genbank_to_file gbk_path
421
+ gbk_to_annotate.save_genbank_to_file
290
422
 
291
423
  end
292
424
  end_time = Time.now
@@ -579,9 +711,14 @@ class BacterialAnnotator
579
711
  partial = ref_annotated[ref_v[:protId]][:partial]
580
712
  end
581
713
 
714
+ _locus_tag = ref_v[:locustag] || ""
715
+ _seq_len = "NA"
716
+ # _seq_len = ref_v[:bioseq].seq.length.to_s if ! ref_v[:bioseq].nil?
717
+ _seq_len = ref_v[:length].to_s if ! ref_v[:length].nil?
718
+
582
719
  synteny_file.write(ref_v[:protId])
583
- synteny_file.write("\t"+ref_v[:locustag])
584
- synteny_file.write("\t"+ref_v[:bioseq].seq.length.to_s)
720
+ synteny_file.write("\t"+_locus_tag)
721
+ synteny_file.write("\t"+_seq_len)
585
722
  synteny_file.write("\t"+coverage_ref.to_s)
586
723
  synteny_file.write("\t"+pId.to_s)
587
724
  synteny_file.write("\t"+gene)
@@ -34,15 +34,31 @@ class BacterialComparator
34
34
  min_pid = min_pid/100
35
35
  end
36
36
 
37
+ @aln_opt = options[:align].downcase
38
+ @run_phylo = 0
39
+ if options[:phylogeny] == 1
40
+ @bootstrap = options[:bootstrap]
41
+ @run_phylo = 1
42
+ end
43
+
37
44
  @ref_prot = get_ref_prot
38
45
  @synteny = read_prot_synteny
39
46
  @stats = extract_syntenic_fasta min_cov, min_pid
40
47
 
41
48
  end
42
49
 
50
+
51
+ def run_comparison
52
+
53
+ run_mafft_aln
54
+ run_raxml_phylo if @run_phylo != 0
55
+
56
+ end
57
+
58
+
43
59
  def read_prot_synteny
44
60
 
45
- print "# Reading genome synteny files - from genome annotations.."
61
+ puts "# Reading genome synteny files START.."
46
62
  start_time = Time.now
47
63
  synteny = {}
48
64
  @genomes_list.each do |g|
@@ -65,7 +81,8 @@ class BacterialComparator
65
81
  end
66
82
  end_time = Time.now
67
83
  c_time = Helper.sec2str(end_time-start_time)
68
- print "done (#{c_time})\n"
84
+
85
+ puts "# Reading genome synteny files [DONE] (in #{c_time})"
69
86
 
70
87
  synteny
71
88
 
@@ -146,7 +163,7 @@ class BacterialComparator
146
163
  # extract and dump multifasta for syntenic genes and proteins
147
164
  def extract_syntenic_fasta min_cov, min_pid
148
165
 
149
- print "# Extracting Proteins and Genes multifasta.."
166
+ puts "# Extracting Proteins and Genes multifasta START.."
150
167
  start_time = Time.now
151
168
 
152
169
  nb_of_syntenic = 0
@@ -216,14 +233,13 @@ class BacterialComparator
216
233
 
217
234
  end_time = Time.now
218
235
  c_time = Helper.sec2str(end_time-start_time)
219
- print "done (#{c_time})\n"
236
+ puts "# Extracting Proteins and Genes multifasta [DONE] (in #{c_time})"
220
237
 
221
238
  stats[:nb_of_syntenic] = nb_of_syntenic
222
239
  #puts " Syntenic genes : " + nb_of_syntenic.to_s + " / " + @ref_prot.length.to_s
223
240
 
224
241
  end
225
242
 
226
-
227
243
  def mafft_align f
228
244
 
229
245
  trying = 0
@@ -252,7 +268,7 @@ class BacterialComparator
252
268
 
253
269
  def mafft_align_all_pep
254
270
 
255
- print "# Sequence alignments - conserved single proteins a.a. (MAFFT).."
271
+ puts "# Sequence alignments - individual proteins a.a. (MAFFT) START.."
256
272
  start_time = Time.now
257
273
 
258
274
  ori_dir = Dir.pwd
@@ -277,7 +293,7 @@ class BacterialComparator
277
293
 
278
294
  end_time = Time.now
279
295
  c_time = Helper.sec2str(end_time-start_time)
280
- print "done (#{c_time})\n"
296
+ puts "# Sequence alignments - individual proteins a.a. (MAFFT) [DONE] (in #{c_time})"
281
297
 
282
298
  # FIXME ugly hack to find out the reference genome
283
299
  ref_id = Dir["#{ori_dir}/#{@genomes_list[0]}/*.pep"][0].split('/')[-1].gsub(".pep","")
@@ -290,7 +306,7 @@ class BacterialComparator
290
306
 
291
307
  def mafft_align_all_dna
292
308
 
293
- print "# Sequence alignments - conserved single genes dna (MAFFT).."
309
+ puts "# Sequence alignments - individual genes dna (MAFFT) START.."
294
310
  start_time = Time.now
295
311
 
296
312
  ori_dir = Dir.pwd
@@ -313,12 +329,12 @@ class BacterialComparator
313
329
  }
314
330
  end
315
331
 
316
- # ugly hack to find out the reference genome
332
+ # ugly hack to find out the reference genome FIXME
317
333
  ref_id = Dir["#{ori_dir}/#{@genomes_list[0]}/*.pep"][0].split('/')[-1].gsub(".pep","")
318
334
 
319
335
  end_time = Time.now
320
336
  c_time = Helper.sec2str(end_time-start_time)
321
- print "done (#{c_time})\n"
337
+ puts "# Sequence alignments - individual genes dna (MAFFT) [DONE] (in #{c_time})"
322
338
 
323
339
  concat_alignments "align-genes-dna.all.fasta", ref_id
324
340
 
@@ -377,21 +393,21 @@ class BacterialComparator
377
393
 
378
394
  end
379
395
 
380
- def mafft_aln aln_opt
396
+ def run_mafft_aln
381
397
 
382
- if aln_opt == "both"
398
+ if @aln_opt == "both"
383
399
  mafft_align_all_pep
384
400
  mafft_align_all_dna
385
- elsif aln_opt == "prot"
401
+ elsif @aln_opt == "prot"
386
402
  mafft_align_all_pep
387
- elsif aln_opt == "dna"
403
+ elsif @aln_opt == "dna"
388
404
  mafft_align_all_dna
389
405
  end
390
406
 
391
407
  end
392
408
 
393
409
  def raxml_tree_dna bt
394
- print "# Genes DNA tree creation (RAXML).."
410
+ puts "# Genes DNA tree creation (RAXML) START.."
395
411
  start_time = Time.now
396
412
  ori_dir = Dir.pwd
397
413
  Dir.chdir(@outdir)
@@ -405,11 +421,11 @@ class BacterialComparator
405
421
  Dir.chdir(ori_dir)
406
422
  end_time = Time.now
407
423
  c_time = Helper.sec2str(end_time-start_time)
408
- print "done (#{c_time})\n"
424
+ puts "# Genes DNA tree creation (RAXML) [DONE] (in #{c_time})"
409
425
  end
410
426
 
411
427
  def raxml_tree_pep bt
412
- print "# Proteins AA tree creation (RAXML).."
428
+ puts "# Proteins AA tree creation (RAXML) START.."
413
429
  start_time = Time.now
414
430
  ori_dir = Dir.pwd
415
431
  Dir.chdir(@outdir)
@@ -423,18 +439,18 @@ class BacterialComparator
423
439
  Dir.chdir(ori_dir)
424
440
  end_time = Time.now
425
441
  c_time = Helper.sec2str(end_time-start_time)
426
- print "done (#{c_time})\n"
442
+ puts "# Proteins AA tree creation (RAXML) [DONE] (in #{c_time})"
427
443
  end
428
444
 
429
- def raxml_tree aln_opt, bt
445
+ def run_raxml_phylo
430
446
 
431
- if aln_opt == "both"
432
- raxml_tree_dna bt
433
- raxml_tree_pep bt
434
- elsif aln_opt == "prot"
435
- raxml_tree_pep bt
436
- elsif aln_opt == "dna"
437
- raxml_tree_dna bt
447
+ if @aln_opt == "both"
448
+ raxml_tree_dna @bootstrap
449
+ raxml_tree_pep @bootstrap
450
+ elsif @aln_opt == "prot"
451
+ raxml_tree_pep @bootstrap
452
+ elsif @aln_opt == "dna"
453
+ raxml_tree_dna @bootstrap
438
454
  end
439
455
 
440
456
  end