bacterial-annotator 0.7.1 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/bacterial-annotator +4 -1
- data/lib/bacterial-annotator.rb +1 -1
- data/lib/bacterial-comparator.rb +79 -22
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1c90c64fd59297a06dd9b50def86f8ba9305b23e
|
4
|
+
data.tar.gz: '008eec9088d41505c8c9ffbbc097ce4cee5b5c3b'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7904dc06b85e1dca28bab9fbb5a84d2919da714990775df637ed27f504f25218eeffd264146fd1082e2a44e1188b2faf6e2a0ce923183eb88caf15595cf26500
|
7
|
+
data.tar.gz: 1c3a09f9952e5750faad5fa5c5c27492967dcebcb6f2fa5a8fd89ef79f5a074d2e6e4f4eeb299729304328777af8e752a1747409ee24150ce1f96354243a0f38
|
data/bin/bacterial-annotator
CHANGED
@@ -12,7 +12,7 @@ require 'bacterial-annotator'
|
|
12
12
|
require 'bacterial-comparator'
|
13
13
|
require 'bacterial-identificator'
|
14
14
|
|
15
|
-
VERSION = "0.
|
15
|
+
VERSION = "0.8.0"
|
16
16
|
|
17
17
|
def print_version
|
18
18
|
version = "Bacterial Annotator - Version #{VERSION}\n\n"
|
@@ -189,6 +189,7 @@ def parseOptions_compare
|
|
189
189
|
options[:genomes_list] = []
|
190
190
|
options[:concat] = 0
|
191
191
|
options[:phylogeny] = 0
|
192
|
+
options[:software] = "fasttree"
|
192
193
|
options[:bootstrap] = 100
|
193
194
|
|
194
195
|
while x = ARGV.shift
|
@@ -208,6 +209,8 @@ def parseOptions_compare
|
|
208
209
|
options[:concat] = ARGV.shift
|
209
210
|
when "--phylogeny"
|
210
211
|
options[:phylogeny] = 1
|
212
|
+
when "--software"
|
213
|
+
options[:software] = ARGV.shift
|
211
214
|
when "--bootstrap"
|
212
215
|
options[:bootstrap] = (ARGV.shift).to_i
|
213
216
|
when "--help", "-h"
|
data/lib/bacterial-annotator.rb
CHANGED
data/lib/bacterial-comparator.rb
CHANGED
@@ -20,11 +20,17 @@ class BacterialComparator
|
|
20
20
|
def initialize options, root
|
21
21
|
|
22
22
|
@root = root
|
23
|
-
@outdir = options[:outdir]
|
23
|
+
@outdir = File.expand_path(File.dirname(options[:outdir]))+"/#{options[:outdir]}"
|
24
24
|
Dir.mkdir(@outdir) if ! Dir.exists? @outdir
|
25
25
|
@genomes_list = options[:genomes_list]
|
26
26
|
@proc = options[:proc].to_i
|
27
27
|
@phylo_nb_genes = options[:phylo_nb_genes]
|
28
|
+
if ["fasttree","raxml"].include? options[:software]
|
29
|
+
@software = options[:software]
|
30
|
+
else
|
31
|
+
@software = "fasttree"
|
32
|
+
end
|
33
|
+
|
28
34
|
min_cov = options[:min_cov].to_f
|
29
35
|
min_pid = options[:pidentity].to_f
|
30
36
|
if min_cov > 1
|
@@ -51,7 +57,8 @@ class BacterialComparator
|
|
51
57
|
def run_comparison
|
52
58
|
|
53
59
|
run_mafft_aln
|
54
|
-
|
60
|
+
|
61
|
+
run_phylo if @run_phylo != 0
|
55
62
|
|
56
63
|
end
|
57
64
|
|
@@ -119,7 +126,7 @@ class BacterialComparator
|
|
119
126
|
|
120
127
|
def build_multifasta synteny_list
|
121
128
|
|
122
|
-
pep_out_dir = "
|
129
|
+
pep_out_dir = "#{@outdir}/align-genes-pep"
|
123
130
|
|
124
131
|
ref_proteins = load_genome_cds(Dir["#{@genomes_list[0]}/*.pep"][0])
|
125
132
|
synteny_list.each do |k,v|
|
@@ -139,7 +146,7 @@ class BacterialComparator
|
|
139
146
|
|
140
147
|
end
|
141
148
|
|
142
|
-
dna_out_dir = "
|
149
|
+
dna_out_dir = "#{@outdir}/align-genes-dna"
|
143
150
|
ref_genes = load_genome_cds(Dir["#{@genomes_list[0]}/*.dna"][0])
|
144
151
|
synteny_list.each do |k,v|
|
145
152
|
dna_out = File.open(dna_out_dir+"/#{k}.dna", "w")
|
@@ -220,8 +227,8 @@ class BacterialComparator
|
|
220
227
|
|
221
228
|
fout.close
|
222
229
|
|
223
|
-
pep_out_dir = "
|
224
|
-
dna_out_dir = "
|
230
|
+
pep_out_dir = "#{@outdir}/align-genes-pep"
|
231
|
+
dna_out_dir = "#{@outdir}/align-genes-dna"
|
225
232
|
Dir.mkdir(pep_out_dir) if ! Dir.exists? pep_out_dir
|
226
233
|
Dir.mkdir(dna_out_dir) if ! Dir.exists? dna_out_dir
|
227
234
|
|
@@ -296,9 +303,11 @@ class BacterialComparator
|
|
296
303
|
puts "# Sequence alignments - individual proteins a.a. (MAFFT) [DONE] (in #{c_time})"
|
297
304
|
|
298
305
|
# FIXME ugly hack to find out the reference genome
|
299
|
-
|
306
|
+
Dir.chdir("#{@outdir}")
|
307
|
+
Dir.chdir("../")
|
308
|
+
ref_id = Dir["#{@genomes_list[0]}/*.pep"][0].split('/')[-1].gsub(".pep","")
|
300
309
|
|
301
|
-
concat_alignments "align-genes-pep.all.fasta", ref_id
|
310
|
+
concat_alignments "#{@outdir}/align-genes-pep.all.fasta", ref_id
|
302
311
|
|
303
312
|
Dir.chdir(ori_dir)
|
304
313
|
|
@@ -330,13 +339,17 @@ class BacterialComparator
|
|
330
339
|
end
|
331
340
|
|
332
341
|
# ugly hack to find out the reference genome FIXME
|
333
|
-
|
342
|
+
Dir.chdir("#{@outdir}")
|
343
|
+
Dir.chdir("../")
|
344
|
+
|
345
|
+
ref_id = Dir["#{@genomes_list[0]}/*.pep"][0].split('/')[-1].gsub(".pep","")
|
346
|
+
# ref_id = Dir["#{ori_dir}/#{@genomes_list[0]}/*.pep"][0].split('/')[-1].gsub(".pep","")
|
334
347
|
|
335
348
|
end_time = Time.now
|
336
349
|
c_time = Helper.sec2str(end_time-start_time)
|
337
350
|
puts "# Sequence alignments - individual genes dna (MAFFT) [DONE] (in #{c_time})"
|
338
351
|
|
339
|
-
concat_alignments "align-genes-dna.all.fasta", ref_id
|
352
|
+
concat_alignments "#{@outdir}/align-genes-dna.all.fasta", ref_id
|
340
353
|
|
341
354
|
Dir.chdir(ori_dir)
|
342
355
|
|
@@ -345,15 +358,17 @@ class BacterialComparator
|
|
345
358
|
|
346
359
|
def concat_alignments outfile, ref_id
|
347
360
|
|
348
|
-
if File.exists?("
|
361
|
+
if File.exists?("#{outfile}") and File.size("#{outfile}") > 0
|
349
362
|
puts "..Alignment concatenated file already exists, skipping."
|
350
363
|
return
|
351
364
|
end
|
352
365
|
|
353
|
-
fout = File.open("
|
366
|
+
fout = File.open("#{outfile}", "w")
|
354
367
|
|
355
368
|
seq = ""
|
356
|
-
|
369
|
+
aln_dir = outfile.split(".")[0..-3].join(".")
|
370
|
+
|
371
|
+
Dir["#{aln_dir}/*.aln"].each do |f|
|
357
372
|
flat = Bio::FlatFile.auto(f)
|
358
373
|
ref_seq = flat.entries[0]
|
359
374
|
flat.close
|
@@ -366,7 +381,7 @@ class BacterialComparator
|
|
366
381
|
|
367
382
|
for i in 1..@genomes_list.length
|
368
383
|
seq = ""
|
369
|
-
Dir["
|
384
|
+
Dir["#{aln_dir}/*.aln"].each do |f|
|
370
385
|
flat = Bio::FlatFile.auto(f)
|
371
386
|
j=0
|
372
387
|
flat.each_entry do |entry|
|
@@ -442,15 +457,57 @@ class BacterialComparator
|
|
442
457
|
puts "# Proteins AA tree creation (RAXML) [DONE] (in #{c_time})"
|
443
458
|
end
|
444
459
|
|
445
|
-
def run_raxml_phylo
|
446
460
|
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
461
|
+
def fasttree_tree_dna bt
|
462
|
+
puts "# Genes DNA tree creation (FastTree) START.."
|
463
|
+
start_time = Time.now
|
464
|
+
ori_dir = Dir.pwd
|
465
|
+
Dir.chdir(@outdir)
|
466
|
+
Dir.mkdir("tree-genes-dna") if ! Dir.exists?("tree-genes-dna")
|
467
|
+
current_dir = Dir.pwd
|
468
|
+
cmd = system("export OMP_NUM_THREADS=#{@proc} && #{@root}/fasttree.linux -nosupport -fastest -nt -gtr align-genes-dna.all.fasta > tree-genes-dna.nwk")
|
469
|
+
Dir.chdir(ori_dir)
|
470
|
+
end_time = Time.now
|
471
|
+
c_time = Helper.sec2str(end_time-start_time)
|
472
|
+
puts "# Genes DNA tree creation (FastTree) [DONE] (in #{c_time})"
|
473
|
+
end
|
474
|
+
|
475
|
+
|
476
|
+
def fasttree_tree_pep bt
|
477
|
+
puts "# Proteins AA tree creation (FastTree) START.."
|
478
|
+
start_time = Time.now
|
479
|
+
ori_dir = Dir.pwd
|
480
|
+
Dir.chdir(@outdir)
|
481
|
+
Dir.mkdir("tree-genes-pep") if ! Dir.exists?("tree-genes-pep")
|
482
|
+
current_dir = Dir.pwd
|
483
|
+
cmd = system("export OMP_NUM_THREADS=#{@proc} && #{@root}/fasttree.linux -nosupport -fastest align-genes-pep.all.fasta > tree-proteins-aa.nwk")
|
484
|
+
Dir.chdir(ori_dir)
|
485
|
+
end_time = Time.now
|
486
|
+
c_time = Helper.sec2str(end_time-start_time)
|
487
|
+
puts "# Proteins AA tree creation (FastTree) [DONE] (in #{c_time})"
|
488
|
+
end
|
489
|
+
|
490
|
+
|
491
|
+
def run_phylo
|
492
|
+
|
493
|
+
if @software == "raxml"
|
494
|
+
if @aln_opt == "both"
|
495
|
+
raxml_tree_dna @bootstrap
|
496
|
+
raxml_tree_pep @bootstrap
|
497
|
+
elsif @aln_opt == "prot"
|
498
|
+
raxml_tree_pep @bootstrap
|
499
|
+
elsif @aln_opt == "dna"
|
500
|
+
raxml_tree_dna @bootstrap
|
501
|
+
end
|
502
|
+
elsif @software == "fasttree"
|
503
|
+
if @aln_opt == "both"
|
504
|
+
fasttree_tree_dna @bootstrap
|
505
|
+
fasttree_tree_pep @bootstrap
|
506
|
+
elsif @aln_opt == "prot"
|
507
|
+
fasttree_tree_pep @bootstrap
|
508
|
+
elsif @aln_opt == "dna"
|
509
|
+
fasttree_tree_dna @bootstrap
|
510
|
+
end
|
454
511
|
end
|
455
512
|
|
456
513
|
end
|