bacterial-annotator 0.7.1 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/bacterial-annotator +4 -1
- data/lib/bacterial-annotator.rb +1 -1
- data/lib/bacterial-comparator.rb +79 -22
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1c90c64fd59297a06dd9b50def86f8ba9305b23e
|
4
|
+
data.tar.gz: '008eec9088d41505c8c9ffbbc097ce4cee5b5c3b'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7904dc06b85e1dca28bab9fbb5a84d2919da714990775df637ed27f504f25218eeffd264146fd1082e2a44e1188b2faf6e2a0ce923183eb88caf15595cf26500
|
7
|
+
data.tar.gz: 1c3a09f9952e5750faad5fa5c5c27492967dcebcb6f2fa5a8fd89ef79f5a074d2e6e4f4eeb299729304328777af8e752a1747409ee24150ce1f96354243a0f38
|
data/bin/bacterial-annotator
CHANGED
@@ -12,7 +12,7 @@ require 'bacterial-annotator'
|
|
12
12
|
require 'bacterial-comparator'
|
13
13
|
require 'bacterial-identificator'
|
14
14
|
|
15
|
-
VERSION = "0.
|
15
|
+
VERSION = "0.8.0"
|
16
16
|
|
17
17
|
def print_version
|
18
18
|
version = "Bacterial Annotator - Version #{VERSION}\n\n"
|
@@ -189,6 +189,7 @@ def parseOptions_compare
|
|
189
189
|
options[:genomes_list] = []
|
190
190
|
options[:concat] = 0
|
191
191
|
options[:phylogeny] = 0
|
192
|
+
options[:software] = "fasttree"
|
192
193
|
options[:bootstrap] = 100
|
193
194
|
|
194
195
|
while x = ARGV.shift
|
@@ -208,6 +209,8 @@ def parseOptions_compare
|
|
208
209
|
options[:concat] = ARGV.shift
|
209
210
|
when "--phylogeny"
|
210
211
|
options[:phylogeny] = 1
|
212
|
+
when "--software"
|
213
|
+
options[:software] = ARGV.shift
|
211
214
|
when "--bootstrap"
|
212
215
|
options[:bootstrap] = (ARGV.shift).to_i
|
213
216
|
when "--help", "-h"
|
data/lib/bacterial-annotator.rb
CHANGED
data/lib/bacterial-comparator.rb
CHANGED
@@ -20,11 +20,17 @@ class BacterialComparator
|
|
20
20
|
def initialize options, root
|
21
21
|
|
22
22
|
@root = root
|
23
|
-
@outdir = options[:outdir]
|
23
|
+
@outdir = File.expand_path(File.dirname(options[:outdir]))+"/#{options[:outdir]}"
|
24
24
|
Dir.mkdir(@outdir) if ! Dir.exists? @outdir
|
25
25
|
@genomes_list = options[:genomes_list]
|
26
26
|
@proc = options[:proc].to_i
|
27
27
|
@phylo_nb_genes = options[:phylo_nb_genes]
|
28
|
+
if ["fasttree","raxml"].include? options[:software]
|
29
|
+
@software = options[:software]
|
30
|
+
else
|
31
|
+
@software = "fasttree"
|
32
|
+
end
|
33
|
+
|
28
34
|
min_cov = options[:min_cov].to_f
|
29
35
|
min_pid = options[:pidentity].to_f
|
30
36
|
if min_cov > 1
|
@@ -51,7 +57,8 @@ class BacterialComparator
|
|
51
57
|
def run_comparison
|
52
58
|
|
53
59
|
run_mafft_aln
|
54
|
-
|
60
|
+
|
61
|
+
run_phylo if @run_phylo != 0
|
55
62
|
|
56
63
|
end
|
57
64
|
|
@@ -119,7 +126,7 @@ class BacterialComparator
|
|
119
126
|
|
120
127
|
def build_multifasta synteny_list
|
121
128
|
|
122
|
-
pep_out_dir = "
|
129
|
+
pep_out_dir = "#{@outdir}/align-genes-pep"
|
123
130
|
|
124
131
|
ref_proteins = load_genome_cds(Dir["#{@genomes_list[0]}/*.pep"][0])
|
125
132
|
synteny_list.each do |k,v|
|
@@ -139,7 +146,7 @@ class BacterialComparator
|
|
139
146
|
|
140
147
|
end
|
141
148
|
|
142
|
-
dna_out_dir = "
|
149
|
+
dna_out_dir = "#{@outdir}/align-genes-dna"
|
143
150
|
ref_genes = load_genome_cds(Dir["#{@genomes_list[0]}/*.dna"][0])
|
144
151
|
synteny_list.each do |k,v|
|
145
152
|
dna_out = File.open(dna_out_dir+"/#{k}.dna", "w")
|
@@ -220,8 +227,8 @@ class BacterialComparator
|
|
220
227
|
|
221
228
|
fout.close
|
222
229
|
|
223
|
-
pep_out_dir = "
|
224
|
-
dna_out_dir = "
|
230
|
+
pep_out_dir = "#{@outdir}/align-genes-pep"
|
231
|
+
dna_out_dir = "#{@outdir}/align-genes-dna"
|
225
232
|
Dir.mkdir(pep_out_dir) if ! Dir.exists? pep_out_dir
|
226
233
|
Dir.mkdir(dna_out_dir) if ! Dir.exists? dna_out_dir
|
227
234
|
|
@@ -296,9 +303,11 @@ class BacterialComparator
|
|
296
303
|
puts "# Sequence alignments - individual proteins a.a. (MAFFT) [DONE] (in #{c_time})"
|
297
304
|
|
298
305
|
# FIXME ugly hack to find out the reference genome
|
299
|
-
|
306
|
+
Dir.chdir("#{@outdir}")
|
307
|
+
Dir.chdir("../")
|
308
|
+
ref_id = Dir["#{@genomes_list[0]}/*.pep"][0].split('/')[-1].gsub(".pep","")
|
300
309
|
|
301
|
-
concat_alignments "align-genes-pep.all.fasta", ref_id
|
310
|
+
concat_alignments "#{@outdir}/align-genes-pep.all.fasta", ref_id
|
302
311
|
|
303
312
|
Dir.chdir(ori_dir)
|
304
313
|
|
@@ -330,13 +339,17 @@ class BacterialComparator
|
|
330
339
|
end
|
331
340
|
|
332
341
|
# ugly hack to find out the reference genome FIXME
|
333
|
-
|
342
|
+
Dir.chdir("#{@outdir}")
|
343
|
+
Dir.chdir("../")
|
344
|
+
|
345
|
+
ref_id = Dir["#{@genomes_list[0]}/*.pep"][0].split('/')[-1].gsub(".pep","")
|
346
|
+
# ref_id = Dir["#{ori_dir}/#{@genomes_list[0]}/*.pep"][0].split('/')[-1].gsub(".pep","")
|
334
347
|
|
335
348
|
end_time = Time.now
|
336
349
|
c_time = Helper.sec2str(end_time-start_time)
|
337
350
|
puts "# Sequence alignments - individual genes dna (MAFFT) [DONE] (in #{c_time})"
|
338
351
|
|
339
|
-
concat_alignments "align-genes-dna.all.fasta", ref_id
|
352
|
+
concat_alignments "#{@outdir}/align-genes-dna.all.fasta", ref_id
|
340
353
|
|
341
354
|
Dir.chdir(ori_dir)
|
342
355
|
|
@@ -345,15 +358,17 @@ class BacterialComparator
|
|
345
358
|
|
346
359
|
def concat_alignments outfile, ref_id
|
347
360
|
|
348
|
-
if File.exists?("
|
361
|
+
if File.exists?("#{outfile}") and File.size("#{outfile}") > 0
|
349
362
|
puts "..Alignment concatenated file already exists, skipping."
|
350
363
|
return
|
351
364
|
end
|
352
365
|
|
353
|
-
fout = File.open("
|
366
|
+
fout = File.open("#{outfile}", "w")
|
354
367
|
|
355
368
|
seq = ""
|
356
|
-
|
369
|
+
aln_dir = outfile.split(".")[0..-3].join(".")
|
370
|
+
|
371
|
+
Dir["#{aln_dir}/*.aln"].each do |f|
|
357
372
|
flat = Bio::FlatFile.auto(f)
|
358
373
|
ref_seq = flat.entries[0]
|
359
374
|
flat.close
|
@@ -366,7 +381,7 @@ class BacterialComparator
|
|
366
381
|
|
367
382
|
for i in 1..@genomes_list.length
|
368
383
|
seq = ""
|
369
|
-
Dir["
|
384
|
+
Dir["#{aln_dir}/*.aln"].each do |f|
|
370
385
|
flat = Bio::FlatFile.auto(f)
|
371
386
|
j=0
|
372
387
|
flat.each_entry do |entry|
|
@@ -442,15 +457,57 @@ class BacterialComparator
|
|
442
457
|
puts "# Proteins AA tree creation (RAXML) [DONE] (in #{c_time})"
|
443
458
|
end
|
444
459
|
|
445
|
-
def run_raxml_phylo
|
446
460
|
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
461
|
+
def fasttree_tree_dna bt
|
462
|
+
puts "# Genes DNA tree creation (FastTree) START.."
|
463
|
+
start_time = Time.now
|
464
|
+
ori_dir = Dir.pwd
|
465
|
+
Dir.chdir(@outdir)
|
466
|
+
Dir.mkdir("tree-genes-dna") if ! Dir.exists?("tree-genes-dna")
|
467
|
+
current_dir = Dir.pwd
|
468
|
+
cmd = system("export OMP_NUM_THREADS=#{@proc} && #{@root}/fasttree.linux -nosupport -fastest -nt -gtr align-genes-dna.all.fasta > tree-genes-dna.nwk")
|
469
|
+
Dir.chdir(ori_dir)
|
470
|
+
end_time = Time.now
|
471
|
+
c_time = Helper.sec2str(end_time-start_time)
|
472
|
+
puts "# Genes DNA tree creation (FastTree) [DONE] (in #{c_time})"
|
473
|
+
end
|
474
|
+
|
475
|
+
|
476
|
+
def fasttree_tree_pep bt
|
477
|
+
puts "# Proteins AA tree creation (FastTree) START.."
|
478
|
+
start_time = Time.now
|
479
|
+
ori_dir = Dir.pwd
|
480
|
+
Dir.chdir(@outdir)
|
481
|
+
Dir.mkdir("tree-genes-pep") if ! Dir.exists?("tree-genes-pep")
|
482
|
+
current_dir = Dir.pwd
|
483
|
+
cmd = system("export OMP_NUM_THREADS=#{@proc} && #{@root}/fasttree.linux -nosupport -fastest align-genes-pep.all.fasta > tree-proteins-aa.nwk")
|
484
|
+
Dir.chdir(ori_dir)
|
485
|
+
end_time = Time.now
|
486
|
+
c_time = Helper.sec2str(end_time-start_time)
|
487
|
+
puts "# Proteins AA tree creation (FastTree) [DONE] (in #{c_time})"
|
488
|
+
end
|
489
|
+
|
490
|
+
|
491
|
+
def run_phylo
|
492
|
+
|
493
|
+
if @software == "raxml"
|
494
|
+
if @aln_opt == "both"
|
495
|
+
raxml_tree_dna @bootstrap
|
496
|
+
raxml_tree_pep @bootstrap
|
497
|
+
elsif @aln_opt == "prot"
|
498
|
+
raxml_tree_pep @bootstrap
|
499
|
+
elsif @aln_opt == "dna"
|
500
|
+
raxml_tree_dna @bootstrap
|
501
|
+
end
|
502
|
+
elsif @software == "fasttree"
|
503
|
+
if @aln_opt == "both"
|
504
|
+
fasttree_tree_dna @bootstrap
|
505
|
+
fasttree_tree_pep @bootstrap
|
506
|
+
elsif @aln_opt == "prot"
|
507
|
+
fasttree_tree_pep @bootstrap
|
508
|
+
elsif @aln_opt == "dna"
|
509
|
+
fasttree_tree_dna @bootstrap
|
510
|
+
end
|
454
511
|
end
|
455
512
|
|
456
513
|
end
|