bacterial-annotator 0.3.3 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/ba_raxml +46 -0
- data/bin/bacterial-annotator +17 -10
- data/lib/bacterial-comparator.rb +81 -10
- metadata +3 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 76bdda1755e5721c74d1d2207ce007d9a3b55410
|
4
|
+
data.tar.gz: 00adfe5d0405d2b7193be6b1e9a54826e2096946
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 399f59cfe68cc7fe8abdfa822c04825ff5168dbb7a6ab4bd73dcf01d539ac47d3e6dd983fe2df0539a0f3096333d26778c1e3bd4bf96a6225a103f62377066d6
|
7
|
+
data.tar.gz: 7519118185f6e197cd8985e06c9fddded3a1d3c36ce00a6b9684b0b4257de00b27283043fe4167438089c0f3b60962749d4fa8378816e9fc1928b7e617e99b4d
|
data/bin/ba_raxml
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# author: maxime déraspe
|
4
|
+
# email: maxime@deraspe.net
|
5
|
+
# review:
|
6
|
+
# date: 15-02-24
|
7
|
+
# version: 0.01
|
8
|
+
# licence:
|
9
|
+
|
10
|
+
require 'open-uri'
|
11
|
+
|
12
|
+
ROOT_path = File.dirname(__FILE__)
|
13
|
+
# raxml_url = https://github.com/stamatak/standard-RAxML/archive/master.zip
|
14
|
+
# Install RAXML on the user system
|
15
|
+
def installRaxml
|
16
|
+
|
17
|
+
begin
|
18
|
+
resp = open("https://github.com/stamatak/standard-RAxML/archive/master.zip")
|
19
|
+
open("#{ROOT_path}/master.zip", "wb") do |file|
|
20
|
+
file.write(resp.read)
|
21
|
+
end
|
22
|
+
Dir.chdir("#{ROOT_path}/")
|
23
|
+
`unzip master.zip`
|
24
|
+
Dir.chdir("#{ROOT_path}/standard-RAxML-master")
|
25
|
+
`make -f Makefile.PTHREADS.gcc`
|
26
|
+
`rm *.o`
|
27
|
+
`cp #{ROOT_path}/standard-RAxML-master/raxmlHPC-PTHREADS #{ROOT_path}/raxml.linux`
|
28
|
+
File.chmod(0755, "#{ROOT_path}/raxml.linux")
|
29
|
+
rescue
|
30
|
+
abort "Problem in stalling RAXML, aborting"
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
# Install prodigal if not already install
|
37
|
+
if ! File.exists? "#{ROOT_path}/raxml.linux"
|
38
|
+
|
39
|
+
puts "Installing RAXML git master.."
|
40
|
+
puts "See https://github.com/stamatak/standard-RAxML"
|
41
|
+
puts "License GPLv3 : https://github.com/stamatak/standard-RAxML/blob/master/gpl-3.0.txt"
|
42
|
+
installRaxml
|
43
|
+
puts "RAXML successfully installed in #{ROOT_path}/standard-RAxML-master"
|
44
|
+
puts ""
|
45
|
+
|
46
|
+
end
|
data/bin/bacterial-annotator
CHANGED
@@ -132,9 +132,11 @@ compare [OPTIONS]
|
|
132
132
|
|
133
133
|
//Alignment (MAFFT)
|
134
134
|
--align [dna|prot|both] by default align only proteins
|
135
|
-
--concat <nb of genes
|
135
|
+
--concat <nb of genes> by default 0=all
|
136
136
|
|
137
|
-
//
|
137
|
+
//Phylogeny (RAXML)
|
138
|
+
--phylogeny will build phylogenetic tree from the alignments files (pep or dna)
|
139
|
+
--bootstrap <nb of bootstrap> by default 100
|
138
140
|
|
139
141
|
OEM
|
140
142
|
|
@@ -153,6 +155,9 @@ def parseOptions_compare
|
|
153
155
|
options[:proc] = 2
|
154
156
|
options[:align] = "prot"
|
155
157
|
options[:genomes_list] = []
|
158
|
+
options[:concat] = 0
|
159
|
+
options[:phylogeny] = 0
|
160
|
+
options[:bootstrap] = 100
|
156
161
|
|
157
162
|
while x = ARGV.shift
|
158
163
|
|
@@ -167,6 +172,12 @@ def parseOptions_compare
|
|
167
172
|
options[:proc] = ARGV.shift
|
168
173
|
when "--align"
|
169
174
|
options[:align] = ARGV.shift
|
175
|
+
when "--concat"
|
176
|
+
options[:concat] = ARGV.shift
|
177
|
+
when "--phylogeny"
|
178
|
+
options[:phylogeny] = 1
|
179
|
+
when "--bootstrap"
|
180
|
+
options[:bootstrap] = (ARGV.shift).to_i
|
170
181
|
when "--help", "-h"
|
171
182
|
usage_compare
|
172
183
|
abort
|
@@ -190,6 +201,7 @@ if ARGV.size > 1
|
|
190
201
|
system("ba_prodigal")
|
191
202
|
system("ba_blat")
|
192
203
|
system("ba_mafft")
|
204
|
+
system("ba_raxml")
|
193
205
|
|
194
206
|
options = {}
|
195
207
|
genomes_list = []
|
@@ -223,14 +235,9 @@ if ARGV.size > 1
|
|
223
235
|
ARGV.shift
|
224
236
|
options = parseOptions_compare
|
225
237
|
bcomp = BacterialComparator.new(options, ROOT)
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
elsif options[:align].downcase == "prot"
|
230
|
-
bcomp.mafft_align_all_pep
|
231
|
-
elsif options[:align].downcase == "dna"
|
232
|
-
bcomp.mafft_align_all_dna
|
233
|
-
end
|
238
|
+
aln_opt = options[:align].downcase
|
239
|
+
bcomp.mafft_aln aln_opt
|
240
|
+
bcomp.raxml_tree aln_opt, options[:bootstrap] if options[:phylogeny] == 1
|
234
241
|
|
235
242
|
else
|
236
243
|
|
data/lib/bacterial-comparator.rb
CHANGED
@@ -84,8 +84,8 @@ class BacterialComparator
|
|
84
84
|
|
85
85
|
def build_multifasta ref_prot, synteny
|
86
86
|
|
87
|
-
pep_out_dir = "./#{@outdir}/genes-
|
88
|
-
dna_out_dir = "./#{@outdir}/genes-
|
87
|
+
pep_out_dir = "./#{@outdir}/align-genes-pep"
|
88
|
+
dna_out_dir = "./#{@outdir}/align-genes-dna"
|
89
89
|
|
90
90
|
# create multifasta by syntenic proteins (pep)
|
91
91
|
if ! File.exists? pep_out_dir+"/#{ref_prot}.pep"
|
@@ -160,8 +160,8 @@ class BacterialComparator
|
|
160
160
|
|
161
161
|
fout.close
|
162
162
|
|
163
|
-
pep_out_dir = "./#{@outdir}/genes-
|
164
|
-
dna_out_dir = "./#{@outdir}/genes-
|
163
|
+
pep_out_dir = "./#{@outdir}/align-genes-pep"
|
164
|
+
dna_out_dir = "./#{@outdir}/align-genes-dna"
|
165
165
|
Dir.mkdir(pep_out_dir) if ! Dir.exists? pep_out_dir
|
166
166
|
Dir.mkdir(dna_out_dir) if ! Dir.exists? dna_out_dir
|
167
167
|
|
@@ -202,8 +202,10 @@ class BacterialComparator
|
|
202
202
|
|
203
203
|
|
204
204
|
def mafft_align_all_pep
|
205
|
+
|
205
206
|
puts "# MAFFT multialign all protein sequences.."
|
206
|
-
Dir.
|
207
|
+
ori_dir = Dir.pwd
|
208
|
+
Dir.chdir("#{@outdir}/align-genes-pep/")
|
207
209
|
|
208
210
|
is_done = 1
|
209
211
|
if Dir["*.pep"].length == Dir["*.aln"].length
|
@@ -220,16 +222,19 @@ class BacterialComparator
|
|
220
222
|
Parallel.map(Dir["*.pep"], in_processes: @proc) { |f|
|
221
223
|
mafft_align f
|
222
224
|
}
|
225
|
+
else
|
226
|
+
puts "..Prot alignment files already exists, skipping."
|
223
227
|
end
|
224
228
|
|
225
|
-
concat_alignments "genes-
|
226
|
-
Dir.chdir(
|
229
|
+
concat_alignments "align-genes-pep.all.fasta"
|
230
|
+
Dir.chdir(ori_dir)
|
227
231
|
|
228
232
|
end
|
229
233
|
|
230
234
|
def mafft_align_all_dna
|
231
235
|
puts "# MAFFT multialign all gene sequences.."
|
232
|
-
Dir.
|
236
|
+
ori_dir = Dir.pwd
|
237
|
+
Dir.chdir("#{@outdir}/align-genes-dna/")
|
233
238
|
|
234
239
|
is_done = 1
|
235
240
|
if Dir["*.dna"].length == Dir["*.aln"].length
|
@@ -246,16 +251,23 @@ class BacterialComparator
|
|
246
251
|
Parallel.map(Dir["*.dna"], in_processes: @proc) { |f|
|
247
252
|
mafft_align f
|
248
253
|
}
|
254
|
+
else
|
255
|
+
puts "..Gene alignment files already exists, skipping."
|
249
256
|
end
|
250
257
|
|
251
|
-
concat_alignments "genes-
|
252
|
-
Dir.chdir(
|
258
|
+
concat_alignments "align-genes-dna.all.fasta"
|
259
|
+
Dir.chdir(ori_dir)
|
253
260
|
|
254
261
|
end
|
255
262
|
|
256
263
|
|
257
264
|
def concat_alignments outfile
|
258
265
|
|
266
|
+
if File.exists?("../#{outfile}") and File.size("../#{outfile}") > 0
|
267
|
+
puts "..Alignment concatenated file already exists, skipping."
|
268
|
+
return
|
269
|
+
end
|
270
|
+
|
259
271
|
fout = File.open("../#{outfile}", "w")
|
260
272
|
|
261
273
|
ref_id = Dir["../../#{@genomes_list[0]}/*.pep"][0].gsub(/.*\//,"").gsub(".pep","")
|
@@ -297,5 +309,64 @@ class BacterialComparator
|
|
297
309
|
|
298
310
|
end
|
299
311
|
|
312
|
+
def mafft_aln aln_opt
|
313
|
+
|
314
|
+
if aln_opt == "both"
|
315
|
+
mafft_align_all_pep
|
316
|
+
mafft_align_all_dna
|
317
|
+
elsif aln_opt == "prot"
|
318
|
+
mafft_align_all_pep
|
319
|
+
elsif aln_opt == "dna"
|
320
|
+
mafft_align_all_dna
|
321
|
+
end
|
322
|
+
|
323
|
+
end
|
324
|
+
|
325
|
+
|
326
|
+
def raxml_tree_dna bt
|
327
|
+
|
328
|
+
# DNA tree
|
329
|
+
puts "# RAXML DNA tree creation.. "
|
330
|
+
ori_dir = Dir.pwd
|
331
|
+
Dir.chdir(@outdir)
|
332
|
+
Dir.mkdir("tree-genes-dna") if ! Dir.exists?("tree-genes-dna")
|
333
|
+
current_dir = Dir.pwd
|
334
|
+
tree_dir = "#{current_dir}/tree-genes-dna"
|
335
|
+
cmd = system("#{@root}/raxml.linux -T #{@proc} -f d -N #{bt} -s align-genes-dna.all.fasta -m GTRGAMMA -p 123454321 -n DnaTree -w #{tree_dir}")
|
336
|
+
cmd = system("cat #{tree_dir}/RAxML_result.DnaTree.RUN.* >> #{tree_dir}/RAxML_result.BS")
|
337
|
+
cmd = system("#{@root}/raxml.linux -T 3 -f b -z #{tree_dir}/RAxML_result.BS -t #{tree_dir}/RAxML_bestTree.DnaTree -m GTRGAMMA -n DNA_BS_TREE -w #{tree_dir}")
|
338
|
+
cmd = system("ln -s #{tree_dir}/RAxML_bipartitionsBranchLabels.DNA_BS_TREE #{tree_dir}/../")
|
339
|
+
Dir.chdir(ori_dir)
|
340
|
+
end
|
341
|
+
|
342
|
+
def raxml_tree_pep bt
|
343
|
+
|
344
|
+
# Prot tree
|
345
|
+
puts "# RAXML Protein tree creation.. "
|
346
|
+
ori_dir = Dir.pwd
|
347
|
+
Dir.chdir(@outdir)
|
348
|
+
Dir.mkdir("tree-genes-pep") if ! Dir.exists?("tree-genes-pep")
|
349
|
+
current_dir = Dir.pwd
|
350
|
+
tree_dir = "#{current_dir}/tree-genes-pep"
|
351
|
+
cmd = system("#{@root}/raxml.linux -T #{@proc} -f d -N #{bt} -s align-genes-pep.all.fasta -m PROTGAMMAAUTO -p 123454321 -n PepTree -w #{tree_dir}")
|
352
|
+
cmd = system("cat #{tree_dir}/RAxML_result.PepTree.RUN.* >> #{tree_dir}/RAxML_result.BS")
|
353
|
+
cmd = system("#{@root}/raxml.linux -T 3 -f b -z #{tree_dir}/RAxML_result.BS -t #{tree_dir}/RAxML_bestTree.PepTree -m PROTGAMMAAUTO -n PEP_BS_TREE -w #{tree_dir}")
|
354
|
+
cmd = system("ln -s #{tree_dir}/RAxML_bipartitionsBranchLabels.PEP_BS_TREE #{tree_dir}/../")
|
355
|
+
Dir.chdir(ori_dir)
|
356
|
+
end
|
357
|
+
|
358
|
+
|
359
|
+
def raxml_tree aln_opt, bt
|
360
|
+
|
361
|
+
if aln_opt == "both"
|
362
|
+
raxml_tree_dna bt
|
363
|
+
raxml_tree_pep bt
|
364
|
+
elsif aln_opt == "prot"
|
365
|
+
raxml_tree_pep bt
|
366
|
+
elsif aln_opt == "dna"
|
367
|
+
raxml_tree_dna bt
|
368
|
+
end
|
369
|
+
|
370
|
+
end
|
300
371
|
|
301
372
|
end # end of Class
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bacterial-annotator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maxime Deraspe
|
@@ -78,12 +78,14 @@ executables:
|
|
78
78
|
- ba_prodigal
|
79
79
|
- ba_blat
|
80
80
|
- ba_mafft
|
81
|
+
- ba_raxml
|
81
82
|
extensions: []
|
82
83
|
extra_rdoc_files: []
|
83
84
|
files:
|
84
85
|
- bin/ba_blat
|
85
86
|
- bin/ba_mafft
|
86
87
|
- bin/ba_prodigal
|
88
|
+
- bin/ba_raxml
|
87
89
|
- bin/bacterial-annotator
|
88
90
|
- lib/bacterial-annotator.rb
|
89
91
|
- lib/bacterial-annotator/fasta-manip.rb
|