bacterial-annotator 0.6.1 → 0.6.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/ba_raxml +3 -2
- data/bin/bacterial-annotator +35 -9
- data/lib/bacterial-annotator.rb +4 -0
- data/lib/bacterial-annotator/sequence-synteny.rb +2 -53
- data/lib/bacterial-comparator.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 713193368d710e1a3f78b0279657fab96ded5586
|
4
|
+
data.tar.gz: 82a924362e32d116b029429a00e55a45aa3f8dc7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9d853a09c8cfbb5e9bfc4525acf4d222ed8c15d601c7b40c0f68d21c724419f773dbe354e161940a04138a59c75c49743165f290f0f4acc1e4a7b15a7d5b9680
|
7
|
+
data.tar.gz: 3772047f7326e06f3cafd73f8776318001b3e53b446a92e144ec30725dd22fc41cb13c08a5f4cb4cc638bbc6b728f4c7af8b8375755f58b44ec42940f589cd6c
|
data/bin/ba_raxml
CHANGED
@@ -10,17 +10,18 @@
|
|
10
10
|
require 'open-uri'
|
11
11
|
|
12
12
|
ROOT_path = File.dirname(__FILE__)
|
13
|
-
# raxml_url = https://github.com/stamatak/standard-RAxML/archive/
|
13
|
+
# raxml_url = https://github.com/stamatak/standard-RAxML/archive/v8.2.11.zip
|
14
14
|
# Install RAXML on the user system
|
15
15
|
def installRaxml
|
16
16
|
|
17
17
|
begin
|
18
|
-
resp = open("https://github.com/stamatak/standard-RAxML/archive/
|
18
|
+
resp = open("https://github.com/stamatak/standard-RAxML/archive/v8.2.11.zip")
|
19
19
|
open("#{ROOT_path}/master.zip", "wb") do |file|
|
20
20
|
file.write(resp.read)
|
21
21
|
end
|
22
22
|
Dir.chdir("#{ROOT_path}/")
|
23
23
|
`unzip master.zip && rm master.zip`
|
24
|
+
`mv standard-RAxML-8.2.11 standard-RAxML-master`
|
24
25
|
Dir.chdir("#{ROOT_path}/standard-RAxML-master")
|
25
26
|
`make -f Makefile.PTHREADS.gcc`
|
26
27
|
`rm *.o`
|
data/bin/bacterial-annotator
CHANGED
@@ -11,6 +11,21 @@
|
|
11
11
|
require 'bacterial-annotator'
|
12
12
|
require 'bacterial-comparator'
|
13
13
|
|
14
|
+
VERSION = "0.6.1"
|
15
|
+
|
16
|
+
def print_version
|
17
|
+
version = "Bacterial Annotator - Version #{VERSION}\n\n"
|
18
|
+
version += "Packaged with : \n"
|
19
|
+
version += " -- Prodigal v2.6.2\n"
|
20
|
+
version += " -- BLAT standalone version\n"
|
21
|
+
version += " -- MAFFT v7.222\n"
|
22
|
+
version += " -- fasta36 v36.3.8d\n"
|
23
|
+
version += " -- RAxML v8.2.11\n"
|
24
|
+
version += " -- FastTree v2.1.10\n"
|
25
|
+
puts version
|
26
|
+
end
|
27
|
+
|
28
|
+
|
14
29
|
|
15
30
|
# Usage message to print to CLI
|
16
31
|
def usage
|
@@ -29,6 +44,8 @@ bacterial-annotator [annotate | compare] [OPTIONS]
|
|
29
44
|
|
30
45
|
--help/-h Print this !
|
31
46
|
|
47
|
+
--version/-v Version of the Bacterial-Annotator
|
48
|
+
|
32
49
|
OEM
|
33
50
|
|
34
51
|
end
|
@@ -47,7 +64,7 @@ annotate [OPTIONS]
|
|
47
64
|
--name/-n <name> Sample name
|
48
65
|
|
49
66
|
// MERGEM-based Annotation (Recommended)
|
50
|
-
--db MERGEM database directory
|
67
|
+
--db/-d <directory> MERGEM database directory
|
51
68
|
|
52
69
|
// Reference-Based Annotation
|
53
70
|
--refgenome/-g <GBK_ID> Provide a Genbank file or a Gbk Accession ID.
|
@@ -56,8 +73,8 @@ annotate [OPTIONS]
|
|
56
73
|
Fasta headers need to look similar to NCBI or EBI fasta headers, ex.:
|
57
74
|
>gi|385721352|gb|AFI72857.1| NDM-1 [Escherichia coli]
|
58
75
|
>sp|C7C422|BLAN1_KLEPN Beta-lactamase NDM-1 OS=Klebsiella pneumoniae..
|
59
|
-
--pidentity <% identity> Minimum percentage identity to incorporate a CDS annotation [default=0.
|
60
|
-
--pcoverage <% identity> Minimum percentage of coverage over protein alignment to incorporate a CDS annotation [default=0.
|
76
|
+
--pidentity <% identity> Minimum percentage identity to incorporate a CDS annotation [default=0.8]
|
77
|
+
--pcoverage <% identity> Minimum percentage of coverage over protein alignment to incorporate a CDS annotation [default=0.8]
|
61
78
|
.. otherwise hint for a non-functional protein
|
62
79
|
--minlength <length> Minimum contig length for annotation [default=500]
|
63
80
|
|
@@ -74,8 +91,8 @@ def parseOptions_annotate
|
|
74
91
|
|
75
92
|
# default options
|
76
93
|
options[:outdir] = "BAnnotation"
|
77
|
-
options[:pidentity] =
|
78
|
-
options[:pcoverage] =
|
94
|
+
options[:pidentity] = 80
|
95
|
+
options[:pcoverage] = 80
|
79
96
|
options[:minlength] = 500
|
80
97
|
options[:meta] = 0
|
81
98
|
|
@@ -84,6 +101,10 @@ def parseOptions_annotate
|
|
84
101
|
case x.downcase
|
85
102
|
when "--input", "-i"
|
86
103
|
options[:input] = ARGV.shift
|
104
|
+
when "--name", "-n"
|
105
|
+
options[:name] = ARGV.shift
|
106
|
+
when "--db", "-d"
|
107
|
+
options[:mergem] = ARGV.shift
|
87
108
|
when "--refgenome", "-g"
|
88
109
|
options[:refgenome] = ARGV.shift
|
89
110
|
when "--outdir", "-o"
|
@@ -101,8 +122,12 @@ def parseOptions_annotate
|
|
101
122
|
when "--externaldb"
|
102
123
|
options[:external_db] = ARGV.shift
|
103
124
|
when "--help", "-h"
|
125
|
+
|
104
126
|
usage_annotate
|
105
127
|
abort
|
128
|
+
when "--version", "-v"
|
129
|
+
print_version
|
130
|
+
abort
|
106
131
|
end
|
107
132
|
|
108
133
|
end
|
@@ -188,12 +213,11 @@ def parseOptions_compare
|
|
188
213
|
|
189
214
|
end
|
190
215
|
|
191
|
-
|
192
216
|
########
|
193
217
|
# MAIN #
|
194
218
|
########
|
195
219
|
|
196
|
-
if ARGV.size
|
220
|
+
if ARGV.size >= 1
|
197
221
|
|
198
222
|
ROOT = File.dirname(__FILE__)
|
199
223
|
|
@@ -242,11 +266,13 @@ if ARGV.size > 1
|
|
242
266
|
bcomp.mafft_aln aln_opt
|
243
267
|
bcomp.raxml_tree aln_opt, options[:bootstrap] if options[:phylogeny] == 1
|
244
268
|
|
245
|
-
|
269
|
+
elsif ARGV[0] == "--version" or ARGV[0] == "-v"
|
246
270
|
|
247
|
-
|
271
|
+
print_version
|
248
272
|
abort
|
249
273
|
|
274
|
+
else
|
275
|
+
usage
|
250
276
|
end
|
251
277
|
|
252
278
|
|
data/lib/bacterial-annotator.rb
CHANGED
@@ -30,6 +30,10 @@ class BacterialAnnotator
|
|
30
30
|
@options[:pcoverage] = @options[:pcoverage].to_f
|
31
31
|
@options[:pcoverage] = @options[:pcoverage] / 100 if @options[:pcoverage] > 1.00
|
32
32
|
|
33
|
+
if ! @options.has_key? :name
|
34
|
+
@options[:name] = @options[:input].gsub(/.fasta|.fa|.fna/,"")
|
35
|
+
end
|
36
|
+
|
33
37
|
if File.exists? (@options[:outdir])
|
34
38
|
if ! options.has_key? :force
|
35
39
|
abort "Output directory already exist ! Choose another one or use -f to overwrite"
|
@@ -39,7 +39,6 @@ class SequenceSynteny
|
|
39
39
|
partial = false
|
40
40
|
if properties.length >= 2 and properties[1].include? "partial"
|
41
41
|
partial = (properties[1].gsub("partial=","")=='01')
|
42
|
-
puts "partial:" + partial.to_s
|
43
42
|
end
|
44
43
|
sequences[s_name][:partial] = partial
|
45
44
|
sequences[s_name][:length] = s.seq.length
|
@@ -54,9 +53,10 @@ class SequenceSynteny
|
|
54
53
|
# run blat on proteins
|
55
54
|
def run_blat root, outdir
|
56
55
|
base_cmd = "#{root}/blat.linux -out=blast8 -minIdentity=#{@pidentity}"
|
57
|
-
system("#{base_cmd} #{@subject_file} #{@query_file} #{outdir}/#{@name}.blat8.tsv")
|
58
56
|
if @type == "prot"
|
59
57
|
system("#{base_cmd} -prot #{@subject_file} #{@query_file} #{outdir}/#{@name}.blat8.tsv")
|
58
|
+
else
|
59
|
+
system("#{base_cmd} #{@subject_file} #{@query_file} #{outdir}/#{@name}.blat8.tsv")
|
60
60
|
end
|
61
61
|
@aln_file = "#{outdir}/#{@name}.blat8.tsv"
|
62
62
|
# extract_hits
|
@@ -153,57 +153,6 @@ class SequenceSynteny
|
|
153
153
|
end # end of method
|
154
154
|
|
155
155
|
|
156
|
-
# Extract Hit from blast8 file and save it in hash
|
157
|
-
# contig-0_1 ABJ71957.1 96.92 65 2 0 1 65 1 65 9.2e-31 131.0
|
158
|
-
def extract_hits_prodigal mode
|
159
|
-
|
160
|
-
@aln_hits = {}
|
161
|
-
feature = ""
|
162
|
-
File.open(@aln_file,"r") do |fread|
|
163
|
-
while l = fread.gets
|
164
|
-
lA = l.chomp!.split("\t")
|
165
|
-
key = lA[0]
|
166
|
-
if mode == :refgenome
|
167
|
-
hit = lA[1]
|
168
|
-
feature = "cds"
|
169
|
-
elsif mode == :externaldb
|
170
|
-
hit = lA[1].chomp.split("|")[3]
|
171
|
-
feature = "cds"
|
172
|
-
end
|
173
|
-
next if lA[2].to_f < @pidentity
|
174
|
-
if ! @aln_hits.has_key? key
|
175
|
-
@aln_hits[key] = {
|
176
|
-
pId: lA[2].to_f.round(2),
|
177
|
-
evalue: lA[10],
|
178
|
-
score: lA[11].to_f,
|
179
|
-
hits: [hit],
|
180
|
-
length: [lA[3].to_i],
|
181
|
-
query_location: [[lA[6].to_i,lA[7].to_i]],
|
182
|
-
subject_location: [[lA[8].to_i,lA[9].to_i]],
|
183
|
-
feature: feature
|
184
|
-
}
|
185
|
-
elsif lA[11].to_f > @aln_hits[key][:score]
|
186
|
-
@aln_hits[key] = {
|
187
|
-
pId: lA[2].to_f.round(2),
|
188
|
-
evalue: lA[10],
|
189
|
-
score: lA[11].to_f,
|
190
|
-
hits: [hit],
|
191
|
-
length: [lA[3].to_i],
|
192
|
-
query_location: [[lA[6].to_i,lA[7].to_i]],
|
193
|
-
subject_location: [[lA[8].to_i,lA[9].to_i]],
|
194
|
-
feature: feature
|
195
|
-
}
|
196
|
-
elsif lA[11].to_f == @aln_hits[key][:score]
|
197
|
-
@aln_hits[key][:hits] << hit
|
198
|
-
@aln_hits[key][:length] << lA[3].to_i
|
199
|
-
@aln_hits[key][:query_location] << [lA[6].to_i,lA[7].to_i]
|
200
|
-
@aln_hits[key][:subject_location] << [lA[8].to_i,lA[9].to_i]
|
201
|
-
end
|
202
|
-
end
|
203
|
-
end
|
204
|
-
|
205
|
-
end # end of method
|
206
|
-
|
207
156
|
# Extract Hit from blast8 file and save it in hash
|
208
157
|
# prpa PA0668.4|rRNA|23S 99.97 2891 1 0 705042 707932 1 2891 0.0e+00 5671.0
|
209
158
|
def extract_hits_dna mode
|
data/lib/bacterial-comparator.rb
CHANGED
@@ -247,7 +247,7 @@ class BacterialComparator
|
|
247
247
|
puts "..Prot alignment files already exists, skipping."
|
248
248
|
end
|
249
249
|
|
250
|
-
# ugly hack to find out the reference genome
|
250
|
+
# FIXME ugly hack to find out the reference genome
|
251
251
|
ref_id = Dir["#{ori_dir}/#{@genomes_list[0]}/*.pep"][0].split('/')[-1].gsub(".pep","")
|
252
252
|
|
253
253
|
concat_alignments "align-genes-pep.all.fasta", ref_id
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bacterial-annotator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maxime Deraspe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-10-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bio
|