bacterial-annotator 0.3.3 → 0.3.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/ba_raxml +46 -0
- data/bin/bacterial-annotator +17 -10
- data/lib/bacterial-comparator.rb +81 -10
- metadata +3 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 76bdda1755e5721c74d1d2207ce007d9a3b55410
|
4
|
+
data.tar.gz: 00adfe5d0405d2b7193be6b1e9a54826e2096946
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 399f59cfe68cc7fe8abdfa822c04825ff5168dbb7a6ab4bd73dcf01d539ac47d3e6dd983fe2df0539a0f3096333d26778c1e3bd4bf96a6225a103f62377066d6
|
7
|
+
data.tar.gz: 7519118185f6e197cd8985e06c9fddded3a1d3c36ce00a6b9684b0b4257de00b27283043fe4167438089c0f3b60962749d4fa8378816e9fc1928b7e617e99b4d
|
data/bin/ba_raxml
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# author: maxime déraspe
|
4
|
+
# email: maxime@deraspe.net
|
5
|
+
# review:
|
6
|
+
# date: 15-02-24
|
7
|
+
# version: 0.01
|
8
|
+
# licence:
|
9
|
+
|
10
|
+
require 'open-uri'
|
11
|
+
|
12
|
+
ROOT_path = File.dirname(__FILE__)
|
13
|
+
# raxml_url = https://github.com/stamatak/standard-RAxML/archive/master.zip
|
14
|
+
# Install RAXML on the user system
|
15
|
+
def installRaxml
|
16
|
+
|
17
|
+
begin
|
18
|
+
resp = open("https://github.com/stamatak/standard-RAxML/archive/master.zip")
|
19
|
+
open("#{ROOT_path}/master.zip", "wb") do |file|
|
20
|
+
file.write(resp.read)
|
21
|
+
end
|
22
|
+
Dir.chdir("#{ROOT_path}/")
|
23
|
+
`unzip master.zip`
|
24
|
+
Dir.chdir("#{ROOT_path}/standard-RAxML-master")
|
25
|
+
`make -f Makefile.PTHREADS.gcc`
|
26
|
+
`rm *.o`
|
27
|
+
`cp #{ROOT_path}/standard-RAxML-master/raxmlHPC-PTHREADS #{ROOT_path}/raxml.linux`
|
28
|
+
File.chmod(0755, "#{ROOT_path}/raxml.linux")
|
29
|
+
rescue
|
30
|
+
abort "Problem in stalling RAXML, aborting"
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
# Install prodigal if not already install
|
37
|
+
if ! File.exists? "#{ROOT_path}/raxml.linux"
|
38
|
+
|
39
|
+
puts "Installing RAXML git master.."
|
40
|
+
puts "See https://github.com/stamatak/standard-RAxML"
|
41
|
+
puts "License GPLv3 : https://github.com/stamatak/standard-RAxML/blob/master/gpl-3.0.txt"
|
42
|
+
installRaxml
|
43
|
+
puts "RAXML successfully installed in #{ROOT_path}/standard-RAxML-master"
|
44
|
+
puts ""
|
45
|
+
|
46
|
+
end
|
data/bin/bacterial-annotator
CHANGED
@@ -132,9 +132,11 @@ compare [OPTIONS]
|
|
132
132
|
|
133
133
|
//Alignment (MAFFT)
|
134
134
|
--align [dna|prot|both] by default align only proteins
|
135
|
-
--concat <nb of genes
|
135
|
+
--concat <nb of genes> by default 0=all
|
136
136
|
|
137
|
-
//
|
137
|
+
//Phylogeny (RAXML)
|
138
|
+
--phylogeny will build phylogenetic tree from the alignments files (pep or dna)
|
139
|
+
--bootstrap <nb of bootstrap> by default 100
|
138
140
|
|
139
141
|
OEM
|
140
142
|
|
@@ -153,6 +155,9 @@ def parseOptions_compare
|
|
153
155
|
options[:proc] = 2
|
154
156
|
options[:align] = "prot"
|
155
157
|
options[:genomes_list] = []
|
158
|
+
options[:concat] = 0
|
159
|
+
options[:phylogeny] = 0
|
160
|
+
options[:bootstrap] = 100
|
156
161
|
|
157
162
|
while x = ARGV.shift
|
158
163
|
|
@@ -167,6 +172,12 @@ def parseOptions_compare
|
|
167
172
|
options[:proc] = ARGV.shift
|
168
173
|
when "--align"
|
169
174
|
options[:align] = ARGV.shift
|
175
|
+
when "--concat"
|
176
|
+
options[:concat] = ARGV.shift
|
177
|
+
when "--phylogeny"
|
178
|
+
options[:phylogeny] = 1
|
179
|
+
when "--bootstrap"
|
180
|
+
options[:bootstrap] = (ARGV.shift).to_i
|
170
181
|
when "--help", "-h"
|
171
182
|
usage_compare
|
172
183
|
abort
|
@@ -190,6 +201,7 @@ if ARGV.size > 1
|
|
190
201
|
system("ba_prodigal")
|
191
202
|
system("ba_blat")
|
192
203
|
system("ba_mafft")
|
204
|
+
system("ba_raxml")
|
193
205
|
|
194
206
|
options = {}
|
195
207
|
genomes_list = []
|
@@ -223,14 +235,9 @@ if ARGV.size > 1
|
|
223
235
|
ARGV.shift
|
224
236
|
options = parseOptions_compare
|
225
237
|
bcomp = BacterialComparator.new(options, ROOT)
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
elsif options[:align].downcase == "prot"
|
230
|
-
bcomp.mafft_align_all_pep
|
231
|
-
elsif options[:align].downcase == "dna"
|
232
|
-
bcomp.mafft_align_all_dna
|
233
|
-
end
|
238
|
+
aln_opt = options[:align].downcase
|
239
|
+
bcomp.mafft_aln aln_opt
|
240
|
+
bcomp.raxml_tree aln_opt, options[:bootstrap] if options[:phylogeny] == 1
|
234
241
|
|
235
242
|
else
|
236
243
|
|
data/lib/bacterial-comparator.rb
CHANGED
@@ -84,8 +84,8 @@ class BacterialComparator
|
|
84
84
|
|
85
85
|
def build_multifasta ref_prot, synteny
|
86
86
|
|
87
|
-
pep_out_dir = "./#{@outdir}/genes-
|
88
|
-
dna_out_dir = "./#{@outdir}/genes-
|
87
|
+
pep_out_dir = "./#{@outdir}/align-genes-pep"
|
88
|
+
dna_out_dir = "./#{@outdir}/align-genes-dna"
|
89
89
|
|
90
90
|
# create multifasta by syntenic proteins (pep)
|
91
91
|
if ! File.exists? pep_out_dir+"/#{ref_prot}.pep"
|
@@ -160,8 +160,8 @@ class BacterialComparator
|
|
160
160
|
|
161
161
|
fout.close
|
162
162
|
|
163
|
-
pep_out_dir = "./#{@outdir}/genes-
|
164
|
-
dna_out_dir = "./#{@outdir}/genes-
|
163
|
+
pep_out_dir = "./#{@outdir}/align-genes-pep"
|
164
|
+
dna_out_dir = "./#{@outdir}/align-genes-dna"
|
165
165
|
Dir.mkdir(pep_out_dir) if ! Dir.exists? pep_out_dir
|
166
166
|
Dir.mkdir(dna_out_dir) if ! Dir.exists? dna_out_dir
|
167
167
|
|
@@ -202,8 +202,10 @@ class BacterialComparator
|
|
202
202
|
|
203
203
|
|
204
204
|
def mafft_align_all_pep
|
205
|
+
|
205
206
|
puts "# MAFFT multialign all protein sequences.."
|
206
|
-
Dir.
|
207
|
+
ori_dir = Dir.pwd
|
208
|
+
Dir.chdir("#{@outdir}/align-genes-pep/")
|
207
209
|
|
208
210
|
is_done = 1
|
209
211
|
if Dir["*.pep"].length == Dir["*.aln"].length
|
@@ -220,16 +222,19 @@ class BacterialComparator
|
|
220
222
|
Parallel.map(Dir["*.pep"], in_processes: @proc) { |f|
|
221
223
|
mafft_align f
|
222
224
|
}
|
225
|
+
else
|
226
|
+
puts "..Prot alignment files already exists, skipping."
|
223
227
|
end
|
224
228
|
|
225
|
-
concat_alignments "genes-
|
226
|
-
Dir.chdir(
|
229
|
+
concat_alignments "align-genes-pep.all.fasta"
|
230
|
+
Dir.chdir(ori_dir)
|
227
231
|
|
228
232
|
end
|
229
233
|
|
230
234
|
def mafft_align_all_dna
|
231
235
|
puts "# MAFFT multialign all gene sequences.."
|
232
|
-
Dir.
|
236
|
+
ori_dir = Dir.pwd
|
237
|
+
Dir.chdir("#{@outdir}/align-genes-dna/")
|
233
238
|
|
234
239
|
is_done = 1
|
235
240
|
if Dir["*.dna"].length == Dir["*.aln"].length
|
@@ -246,16 +251,23 @@ class BacterialComparator
|
|
246
251
|
Parallel.map(Dir["*.dna"], in_processes: @proc) { |f|
|
247
252
|
mafft_align f
|
248
253
|
}
|
254
|
+
else
|
255
|
+
puts "..Gene alignment files already exists, skipping."
|
249
256
|
end
|
250
257
|
|
251
|
-
concat_alignments "genes-
|
252
|
-
Dir.chdir(
|
258
|
+
concat_alignments "align-genes-dna.all.fasta"
|
259
|
+
Dir.chdir(ori_dir)
|
253
260
|
|
254
261
|
end
|
255
262
|
|
256
263
|
|
257
264
|
def concat_alignments outfile
|
258
265
|
|
266
|
+
if File.exists?("../#{outfile}") and File.size("../#{outfile}") > 0
|
267
|
+
puts "..Alignment concatenated file already exists, skipping."
|
268
|
+
return
|
269
|
+
end
|
270
|
+
|
259
271
|
fout = File.open("../#{outfile}", "w")
|
260
272
|
|
261
273
|
ref_id = Dir["../../#{@genomes_list[0]}/*.pep"][0].gsub(/.*\//,"").gsub(".pep","")
|
@@ -297,5 +309,64 @@ class BacterialComparator
|
|
297
309
|
|
298
310
|
end
|
299
311
|
|
312
|
+
def mafft_aln aln_opt
|
313
|
+
|
314
|
+
if aln_opt == "both"
|
315
|
+
mafft_align_all_pep
|
316
|
+
mafft_align_all_dna
|
317
|
+
elsif aln_opt == "prot"
|
318
|
+
mafft_align_all_pep
|
319
|
+
elsif aln_opt == "dna"
|
320
|
+
mafft_align_all_dna
|
321
|
+
end
|
322
|
+
|
323
|
+
end
|
324
|
+
|
325
|
+
|
326
|
+
def raxml_tree_dna bt
|
327
|
+
|
328
|
+
# DNA tree
|
329
|
+
puts "# RAXML DNA tree creation.. "
|
330
|
+
ori_dir = Dir.pwd
|
331
|
+
Dir.chdir(@outdir)
|
332
|
+
Dir.mkdir("tree-genes-dna") if ! Dir.exists?("tree-genes-dna")
|
333
|
+
current_dir = Dir.pwd
|
334
|
+
tree_dir = "#{current_dir}/tree-genes-dna"
|
335
|
+
cmd = system("#{@root}/raxml.linux -T #{@proc} -f d -N #{bt} -s align-genes-dna.all.fasta -m GTRGAMMA -p 123454321 -n DnaTree -w #{tree_dir}")
|
336
|
+
cmd = system("cat #{tree_dir}/RAxML_result.DnaTree.RUN.* >> #{tree_dir}/RAxML_result.BS")
|
337
|
+
cmd = system("#{@root}/raxml.linux -T 3 -f b -z #{tree_dir}/RAxML_result.BS -t #{tree_dir}/RAxML_bestTree.DnaTree -m GTRGAMMA -n DNA_BS_TREE -w #{tree_dir}")
|
338
|
+
cmd = system("ln -s #{tree_dir}/RAxML_bipartitionsBranchLabels.DNA_BS_TREE #{tree_dir}/../")
|
339
|
+
Dir.chdir(ori_dir)
|
340
|
+
end
|
341
|
+
|
342
|
+
def raxml_tree_pep bt
|
343
|
+
|
344
|
+
# Prot tree
|
345
|
+
puts "# RAXML Protein tree creation.. "
|
346
|
+
ori_dir = Dir.pwd
|
347
|
+
Dir.chdir(@outdir)
|
348
|
+
Dir.mkdir("tree-genes-pep") if ! Dir.exists?("tree-genes-pep")
|
349
|
+
current_dir = Dir.pwd
|
350
|
+
tree_dir = "#{current_dir}/tree-genes-pep"
|
351
|
+
cmd = system("#{@root}/raxml.linux -T #{@proc} -f d -N #{bt} -s align-genes-pep.all.fasta -m PROTGAMMAAUTO -p 123454321 -n PepTree -w #{tree_dir}")
|
352
|
+
cmd = system("cat #{tree_dir}/RAxML_result.PepTree.RUN.* >> #{tree_dir}/RAxML_result.BS")
|
353
|
+
cmd = system("#{@root}/raxml.linux -T 3 -f b -z #{tree_dir}/RAxML_result.BS -t #{tree_dir}/RAxML_bestTree.PepTree -m PROTGAMMAAUTO -n PEP_BS_TREE -w #{tree_dir}")
|
354
|
+
cmd = system("ln -s #{tree_dir}/RAxML_bipartitionsBranchLabels.PEP_BS_TREE #{tree_dir}/../")
|
355
|
+
Dir.chdir(ori_dir)
|
356
|
+
end
|
357
|
+
|
358
|
+
|
359
|
+
def raxml_tree aln_opt, bt
|
360
|
+
|
361
|
+
if aln_opt == "both"
|
362
|
+
raxml_tree_dna bt
|
363
|
+
raxml_tree_pep bt
|
364
|
+
elsif aln_opt == "prot"
|
365
|
+
raxml_tree_pep bt
|
366
|
+
elsif aln_opt == "dna"
|
367
|
+
raxml_tree_dna bt
|
368
|
+
end
|
369
|
+
|
370
|
+
end
|
300
371
|
|
301
372
|
end # end of Class
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bacterial-annotator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maxime Deraspe
|
@@ -78,12 +78,14 @@ executables:
|
|
78
78
|
- ba_prodigal
|
79
79
|
- ba_blat
|
80
80
|
- ba_mafft
|
81
|
+
- ba_raxml
|
81
82
|
extensions: []
|
82
83
|
extra_rdoc_files: []
|
83
84
|
files:
|
84
85
|
- bin/ba_blat
|
85
86
|
- bin/ba_mafft
|
86
87
|
- bin/ba_prodigal
|
88
|
+
- bin/ba_raxml
|
87
89
|
- bin/bacterial-annotator
|
88
90
|
- lib/bacterial-annotator.rb
|
89
91
|
- lib/bacterial-annotator/fasta-manip.rb
|