bacterial-annotator 0.3.3 → 0.3.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6a97539de7b1dff0da1ac8edaef6e0e10949c74e
4
- data.tar.gz: 09fa573de375d54339adf3a8800318a49c059ba5
3
+ metadata.gz: 76bdda1755e5721c74d1d2207ce007d9a3b55410
4
+ data.tar.gz: 00adfe5d0405d2b7193be6b1e9a54826e2096946
5
5
  SHA512:
6
- metadata.gz: 608daa4501f95457e6d412fdc43d0672e74acd4ecf91186ea57da00614033e569688ca2d6a82f89f4266bc62da2ca20d7703a9ace0d04f131de57025c93a7685
7
- data.tar.gz: 6ac5946ce49c23bfeb0d32dbc38c41516df960ab73af5f6a1a99689a08f7076ecffb2e361cf4f087d11dd1dcb74858ab35c84aaac687d9a8b28112eee765c05e
6
+ metadata.gz: 399f59cfe68cc7fe8abdfa822c04825ff5168dbb7a6ab4bd73dcf01d539ac47d3e6dd983fe2df0539a0f3096333d26778c1e3bd4bf96a6225a103f62377066d6
7
+ data.tar.gz: 7519118185f6e197cd8985e06c9fddded3a1d3c36ce00a6b9684b0b4257de00b27283043fe4167438089c0f3b60962749d4fa8378816e9fc1928b7e617e99b4d
data/bin/ba_raxml ADDED
@@ -0,0 +1,46 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- coding: utf-8 -*-
3
+ # author: maxime déraspe
4
+ # email: maxime@deraspe.net
5
+ # review:
6
+ # date: 15-02-24
7
+ # version: 0.01
8
+ # licence:
9
+
10
+ require 'open-uri'
11
+
12
+ ROOT_path = File.dirname(__FILE__)
13
+ # raxml_url = https://github.com/stamatak/standard-RAxML/archive/master.zip
14
+ # Install RAXML on the user system
15
+ def installRaxml
16
+
17
+ begin
18
+ resp = open("https://github.com/stamatak/standard-RAxML/archive/master.zip")
19
+ open("#{ROOT_path}/master.zip", "wb") do |file|
20
+ file.write(resp.read)
21
+ end
22
+ Dir.chdir("#{ROOT_path}/")
23
+ `unzip master.zip`
24
+ Dir.chdir("#{ROOT_path}/standard-RAxML-master")
25
+ `make -f Makefile.PTHREADS.gcc`
26
+ `rm *.o`
27
+ `cp #{ROOT_path}/standard-RAxML-master/raxmlHPC-PTHREADS #{ROOT_path}/raxml.linux`
28
+ File.chmod(0755, "#{ROOT_path}/raxml.linux")
29
+ rescue
30
+ abort "Problem in stalling RAXML, aborting"
31
+ end
32
+
33
+ end
34
+
35
+
36
+ # Install prodigal if not already install
37
+ if ! File.exists? "#{ROOT_path}/raxml.linux"
38
+
39
+ puts "Installing RAXML git master.."
40
+ puts "See https://github.com/stamatak/standard-RAxML"
41
+ puts "License GPLv3 : https://github.com/stamatak/standard-RAxML/blob/master/gpl-3.0.txt"
42
+ installRaxml
43
+ puts "RAXML successfully installed in #{ROOT_path}/standard-RAxML-master"
44
+ puts ""
45
+
46
+ end
@@ -132,9 +132,11 @@ compare [OPTIONS]
132
132
 
133
133
  //Alignment (MAFFT)
134
134
  --align [dna|prot|both] by default align only proteins
135
- --concat <nb of genes | all> by default all
135
+ --concat <nb of genes> by default 0=all
136
136
 
137
- //Phylo (RAXML)
137
+ //Phylogeny (RAXML)
138
+ --phylogeny will build phylogenetic tree from the alignments files (pep or dna)
139
+ --bootstrap <nb of bootstrap> by default 100
138
140
 
139
141
  OEM
140
142
 
@@ -153,6 +155,9 @@ def parseOptions_compare
153
155
  options[:proc] = 2
154
156
  options[:align] = "prot"
155
157
  options[:genomes_list] = []
158
+ options[:concat] = 0
159
+ options[:phylogeny] = 0
160
+ options[:bootstrap] = 100
156
161
 
157
162
  while x = ARGV.shift
158
163
 
@@ -167,6 +172,12 @@ def parseOptions_compare
167
172
  options[:proc] = ARGV.shift
168
173
  when "--align"
169
174
  options[:align] = ARGV.shift
175
+ when "--concat"
176
+ options[:concat] = ARGV.shift
177
+ when "--phylogeny"
178
+ options[:phylogeny] = 1
179
+ when "--bootstrap"
180
+ options[:bootstrap] = (ARGV.shift).to_i
170
181
  when "--help", "-h"
171
182
  usage_compare
172
183
  abort
@@ -190,6 +201,7 @@ if ARGV.size > 1
190
201
  system("ba_prodigal")
191
202
  system("ba_blat")
192
203
  system("ba_mafft")
204
+ system("ba_raxml")
193
205
 
194
206
  options = {}
195
207
  genomes_list = []
@@ -223,14 +235,9 @@ if ARGV.size > 1
223
235
  ARGV.shift
224
236
  options = parseOptions_compare
225
237
  bcomp = BacterialComparator.new(options, ROOT)
226
- if options[:align].downcase == "both"
227
- bcomp.mafft_align_all_pep
228
- bcomp.mafft_align_all_dna
229
- elsif options[:align].downcase == "prot"
230
- bcomp.mafft_align_all_pep
231
- elsif options[:align].downcase == "dna"
232
- bcomp.mafft_align_all_dna
233
- end
238
+ aln_opt = options[:align].downcase
239
+ bcomp.mafft_aln aln_opt
240
+ bcomp.raxml_tree aln_opt, options[:bootstrap] if options[:phylogeny] == 1
234
241
 
235
242
  else
236
243
 
@@ -84,8 +84,8 @@ class BacterialComparator
84
84
 
85
85
  def build_multifasta ref_prot, synteny
86
86
 
87
- pep_out_dir = "./#{@outdir}/genes-align-pep"
88
- dna_out_dir = "./#{@outdir}/genes-align-dna"
87
+ pep_out_dir = "./#{@outdir}/align-genes-pep"
88
+ dna_out_dir = "./#{@outdir}/align-genes-dna"
89
89
 
90
90
  # create multifasta by syntenic proteins (pep)
91
91
  if ! File.exists? pep_out_dir+"/#{ref_prot}.pep"
@@ -160,8 +160,8 @@ class BacterialComparator
160
160
 
161
161
  fout.close
162
162
 
163
- pep_out_dir = "./#{@outdir}/genes-align-pep"
164
- dna_out_dir = "./#{@outdir}/genes-align-dna"
163
+ pep_out_dir = "./#{@outdir}/align-genes-pep"
164
+ dna_out_dir = "./#{@outdir}/align-genes-dna"
165
165
  Dir.mkdir(pep_out_dir) if ! Dir.exists? pep_out_dir
166
166
  Dir.mkdir(dna_out_dir) if ! Dir.exists? dna_out_dir
167
167
 
@@ -202,8 +202,10 @@ class BacterialComparator
202
202
 
203
203
 
204
204
  def mafft_align_all_pep
205
+
205
206
  puts "# MAFFT multialign all protein sequences.."
206
- Dir.chdir("#{@outdir}/genes-align-pep/")
207
+ ori_dir = Dir.pwd
208
+ Dir.chdir("#{@outdir}/align-genes-pep/")
207
209
 
208
210
  is_done = 1
209
211
  if Dir["*.pep"].length == Dir["*.aln"].length
@@ -220,16 +222,19 @@ class BacterialComparator
220
222
  Parallel.map(Dir["*.pep"], in_processes: @proc) { |f|
221
223
  mafft_align f
222
224
  }
225
+ else
226
+ puts "..Prot alignment files already exists, skipping."
223
227
  end
224
228
 
225
- concat_alignments "genes-align-pep.concat.fasta"
226
- Dir.chdir("../../")
229
+ concat_alignments "align-genes-pep.all.fasta"
230
+ Dir.chdir(ori_dir)
227
231
 
228
232
  end
229
233
 
230
234
  def mafft_align_all_dna
231
235
  puts "# MAFFT multialign all gene sequences.."
232
- Dir.chdir("#{@outdir}/genes-align-dna/")
236
+ ori_dir = Dir.pwd
237
+ Dir.chdir("#{@outdir}/align-genes-dna/")
233
238
 
234
239
  is_done = 1
235
240
  if Dir["*.dna"].length == Dir["*.aln"].length
@@ -246,16 +251,23 @@ class BacterialComparator
246
251
  Parallel.map(Dir["*.dna"], in_processes: @proc) { |f|
247
252
  mafft_align f
248
253
  }
254
+ else
255
+ puts "..Gene alignment files already exists, skipping."
249
256
  end
250
257
 
251
- concat_alignments "genes-align-dna.concat.fasta"
252
- Dir.chdir("../../")
258
+ concat_alignments "align-genes-dna.all.fasta"
259
+ Dir.chdir(ori_dir)
253
260
 
254
261
  end
255
262
 
256
263
 
257
264
  def concat_alignments outfile
258
265
 
266
+ if File.exists?("../#{outfile}") and File.size("../#{outfile}") > 0
267
+ puts "..Alignment concatenated file already exists, skipping."
268
+ return
269
+ end
270
+
259
271
  fout = File.open("../#{outfile}", "w")
260
272
 
261
273
  ref_id = Dir["../../#{@genomes_list[0]}/*.pep"][0].gsub(/.*\//,"").gsub(".pep","")
@@ -297,5 +309,64 @@ class BacterialComparator
297
309
 
298
310
  end
299
311
 
312
+ def mafft_aln aln_opt
313
+
314
+ if aln_opt == "both"
315
+ mafft_align_all_pep
316
+ mafft_align_all_dna
317
+ elsif aln_opt == "prot"
318
+ mafft_align_all_pep
319
+ elsif aln_opt == "dna"
320
+ mafft_align_all_dna
321
+ end
322
+
323
+ end
324
+
325
+
326
+ def raxml_tree_dna bt
327
+
328
+ # DNA tree
329
+ puts "# RAXML DNA tree creation.. "
330
+ ori_dir = Dir.pwd
331
+ Dir.chdir(@outdir)
332
+ Dir.mkdir("tree-genes-dna") if ! Dir.exists?("tree-genes-dna")
333
+ current_dir = Dir.pwd
334
+ tree_dir = "#{current_dir}/tree-genes-dna"
335
+ cmd = system("#{@root}/raxml.linux -T #{@proc} -f d -N #{bt} -s align-genes-dna.all.fasta -m GTRGAMMA -p 123454321 -n DnaTree -w #{tree_dir}")
336
+ cmd = system("cat #{tree_dir}/RAxML_result.DnaTree.RUN.* >> #{tree_dir}/RAxML_result.BS")
337
+ cmd = system("#{@root}/raxml.linux -T 3 -f b -z #{tree_dir}/RAxML_result.BS -t #{tree_dir}/RAxML_bestTree.DnaTree -m GTRGAMMA -n DNA_BS_TREE -w #{tree_dir}")
338
+ cmd = system("ln -s #{tree_dir}/RAxML_bipartitionsBranchLabels.DNA_BS_TREE #{tree_dir}/../")
339
+ Dir.chdir(ori_dir)
340
+ end
341
+
342
+ def raxml_tree_pep bt
343
+
344
+ # Prot tree
345
+ puts "# RAXML Protein tree creation.. "
346
+ ori_dir = Dir.pwd
347
+ Dir.chdir(@outdir)
348
+ Dir.mkdir("tree-genes-pep") if ! Dir.exists?("tree-genes-pep")
349
+ current_dir = Dir.pwd
350
+ tree_dir = "#{current_dir}/tree-genes-pep"
351
+ cmd = system("#{@root}/raxml.linux -T #{@proc} -f d -N #{bt} -s align-genes-pep.all.fasta -m PROTGAMMAAUTO -p 123454321 -n PepTree -w #{tree_dir}")
352
+ cmd = system("cat #{tree_dir}/RAxML_result.PepTree.RUN.* >> #{tree_dir}/RAxML_result.BS")
353
+ cmd = system("#{@root}/raxml.linux -T 3 -f b -z #{tree_dir}/RAxML_result.BS -t #{tree_dir}/RAxML_bestTree.PepTree -m PROTGAMMAAUTO -n PEP_BS_TREE -w #{tree_dir}")
354
+ cmd = system("ln -s #{tree_dir}/RAxML_bipartitionsBranchLabels.PEP_BS_TREE #{tree_dir}/../")
355
+ Dir.chdir(ori_dir)
356
+ end
357
+
358
+
359
+ def raxml_tree aln_opt, bt
360
+
361
+ if aln_opt == "both"
362
+ raxml_tree_dna bt
363
+ raxml_tree_pep bt
364
+ elsif aln_opt == "prot"
365
+ raxml_tree_pep bt
366
+ elsif aln_opt == "dna"
367
+ raxml_tree_dna bt
368
+ end
369
+
370
+ end
300
371
 
301
372
  end # end of Class
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bacterial-annotator
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 0.3.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maxime Deraspe
@@ -78,12 +78,14 @@ executables:
78
78
  - ba_prodigal
79
79
  - ba_blat
80
80
  - ba_mafft
81
+ - ba_raxml
81
82
  extensions: []
82
83
  extra_rdoc_files: []
83
84
  files:
84
85
  - bin/ba_blat
85
86
  - bin/ba_mafft
86
87
  - bin/ba_prodigal
88
+ - bin/ba_raxml
87
89
  - bin/bacterial-annotator
88
90
  - lib/bacterial-annotator.rb
89
91
  - lib/bacterial-annotator/fasta-manip.rb