bacterial-annotator 0.6.1 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/ba_raxml +3 -2
- data/bin/bacterial-annotator +35 -9
- data/lib/bacterial-annotator.rb +4 -0
- data/lib/bacterial-annotator/sequence-synteny.rb +2 -53
- data/lib/bacterial-comparator.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 713193368d710e1a3f78b0279657fab96ded5586
|
4
|
+
data.tar.gz: 82a924362e32d116b029429a00e55a45aa3f8dc7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9d853a09c8cfbb5e9bfc4525acf4d222ed8c15d601c7b40c0f68d21c724419f773dbe354e161940a04138a59c75c49743165f290f0f4acc1e4a7b15a7d5b9680
|
7
|
+
data.tar.gz: 3772047f7326e06f3cafd73f8776318001b3e53b446a92e144ec30725dd22fc41cb13c08a5f4cb4cc638bbc6b728f4c7af8b8375755f58b44ec42940f589cd6c
|
data/bin/ba_raxml
CHANGED
@@ -10,17 +10,18 @@
|
|
10
10
|
require 'open-uri'
|
11
11
|
|
12
12
|
ROOT_path = File.dirname(__FILE__)
|
13
|
-
# raxml_url = https://github.com/stamatak/standard-RAxML/archive/
|
13
|
+
# raxml_url = https://github.com/stamatak/standard-RAxML/archive/v8.2.11.zip
|
14
14
|
# Install RAXML on the user system
|
15
15
|
def installRaxml
|
16
16
|
|
17
17
|
begin
|
18
|
-
resp = open("https://github.com/stamatak/standard-RAxML/archive/
|
18
|
+
resp = open("https://github.com/stamatak/standard-RAxML/archive/v8.2.11.zip")
|
19
19
|
open("#{ROOT_path}/master.zip", "wb") do |file|
|
20
20
|
file.write(resp.read)
|
21
21
|
end
|
22
22
|
Dir.chdir("#{ROOT_path}/")
|
23
23
|
`unzip master.zip && rm master.zip`
|
24
|
+
`mv standard-RAxML-8.2.11 standard-RAxML-master`
|
24
25
|
Dir.chdir("#{ROOT_path}/standard-RAxML-master")
|
25
26
|
`make -f Makefile.PTHREADS.gcc`
|
26
27
|
`rm *.o`
|
data/bin/bacterial-annotator
CHANGED
@@ -11,6 +11,21 @@
|
|
11
11
|
require 'bacterial-annotator'
|
12
12
|
require 'bacterial-comparator'
|
13
13
|
|
14
|
+
VERSION = "0.6.1"
|
15
|
+
|
16
|
+
def print_version
|
17
|
+
version = "Bacterial Annotator - Version #{VERSION}\n\n"
|
18
|
+
version += "Packaged with : \n"
|
19
|
+
version += " -- Prodigal v2.6.2\n"
|
20
|
+
version += " -- BLAT standalone version\n"
|
21
|
+
version += " -- MAFFT v7.222\n"
|
22
|
+
version += " -- fasta36 v36.3.8d\n"
|
23
|
+
version += " -- RAxML v8.2.11\n"
|
24
|
+
version += " -- FastTree v2.1.10\n"
|
25
|
+
puts version
|
26
|
+
end
|
27
|
+
|
28
|
+
|
14
29
|
|
15
30
|
# Usage message to print to CLI
|
16
31
|
def usage
|
@@ -29,6 +44,8 @@ bacterial-annotator [annotate | compare] [OPTIONS]
|
|
29
44
|
|
30
45
|
--help/-h Print this !
|
31
46
|
|
47
|
+
--version/-v Version of the Bacterial-Annotator
|
48
|
+
|
32
49
|
OEM
|
33
50
|
|
34
51
|
end
|
@@ -47,7 +64,7 @@ annotate [OPTIONS]
|
|
47
64
|
--name/-n <name> Sample name
|
48
65
|
|
49
66
|
// MERGEM-based Annotation (Recommended)
|
50
|
-
--db MERGEM database directory
|
67
|
+
--db/-d <directory> MERGEM database directory
|
51
68
|
|
52
69
|
// Reference-Based Annotation
|
53
70
|
--refgenome/-g <GBK_ID> Provide a Genbank file or a Gbk Accession ID.
|
@@ -56,8 +73,8 @@ annotate [OPTIONS]
|
|
56
73
|
Fasta headers need to look similar to NCBI or EBI fasta headers, ex.:
|
57
74
|
>gi|385721352|gb|AFI72857.1| NDM-1 [Escherichia coli]
|
58
75
|
>sp|C7C422|BLAN1_KLEPN Beta-lactamase NDM-1 OS=Klebsiella pneumoniae..
|
59
|
-
--pidentity <% identity> Minimum percentage identity to incorporate a CDS annotation [default=0.
|
60
|
-
--pcoverage <% identity> Minimum percentage of coverage over protein alignment to incorporate a CDS annotation [default=0.
|
76
|
+
--pidentity <% identity> Minimum percentage identity to incorporate a CDS annotation [default=0.8]
|
77
|
+
--pcoverage <% identity> Minimum percentage of coverage over protein alignment to incorporate a CDS annotation [default=0.8]
|
61
78
|
.. otherwise hint for a non-functional protein
|
62
79
|
--minlength <length> Minimum contig length for annotation [default=500]
|
63
80
|
|
@@ -74,8 +91,8 @@ def parseOptions_annotate
|
|
74
91
|
|
75
92
|
# default options
|
76
93
|
options[:outdir] = "BAnnotation"
|
77
|
-
options[:pidentity] =
|
78
|
-
options[:pcoverage] =
|
94
|
+
options[:pidentity] = 80
|
95
|
+
options[:pcoverage] = 80
|
79
96
|
options[:minlength] = 500
|
80
97
|
options[:meta] = 0
|
81
98
|
|
@@ -84,6 +101,10 @@ def parseOptions_annotate
|
|
84
101
|
case x.downcase
|
85
102
|
when "--input", "-i"
|
86
103
|
options[:input] = ARGV.shift
|
104
|
+
when "--name", "-n"
|
105
|
+
options[:name] = ARGV.shift
|
106
|
+
when "--db", "-d"
|
107
|
+
options[:mergem] = ARGV.shift
|
87
108
|
when "--refgenome", "-g"
|
88
109
|
options[:refgenome] = ARGV.shift
|
89
110
|
when "--outdir", "-o"
|
@@ -101,8 +122,12 @@ def parseOptions_annotate
|
|
101
122
|
when "--externaldb"
|
102
123
|
options[:external_db] = ARGV.shift
|
103
124
|
when "--help", "-h"
|
125
|
+
|
104
126
|
usage_annotate
|
105
127
|
abort
|
128
|
+
when "--version", "-v"
|
129
|
+
print_version
|
130
|
+
abort
|
106
131
|
end
|
107
132
|
|
108
133
|
end
|
@@ -188,12 +213,11 @@ def parseOptions_compare
|
|
188
213
|
|
189
214
|
end
|
190
215
|
|
191
|
-
|
192
216
|
########
|
193
217
|
# MAIN #
|
194
218
|
########
|
195
219
|
|
196
|
-
if ARGV.size
|
220
|
+
if ARGV.size >= 1
|
197
221
|
|
198
222
|
ROOT = File.dirname(__FILE__)
|
199
223
|
|
@@ -242,11 +266,13 @@ if ARGV.size > 1
|
|
242
266
|
bcomp.mafft_aln aln_opt
|
243
267
|
bcomp.raxml_tree aln_opt, options[:bootstrap] if options[:phylogeny] == 1
|
244
268
|
|
245
|
-
|
269
|
+
elsif ARGV[0] == "--version" or ARGV[0] == "-v"
|
246
270
|
|
247
|
-
|
271
|
+
print_version
|
248
272
|
abort
|
249
273
|
|
274
|
+
else
|
275
|
+
usage
|
250
276
|
end
|
251
277
|
|
252
278
|
|
data/lib/bacterial-annotator.rb
CHANGED
@@ -30,6 +30,10 @@ class BacterialAnnotator
|
|
30
30
|
@options[:pcoverage] = @options[:pcoverage].to_f
|
31
31
|
@options[:pcoverage] = @options[:pcoverage] / 100 if @options[:pcoverage] > 1.00
|
32
32
|
|
33
|
+
if ! @options.has_key? :name
|
34
|
+
@options[:name] = @options[:input].gsub(/.fasta|.fa|.fna/,"")
|
35
|
+
end
|
36
|
+
|
33
37
|
if File.exists? (@options[:outdir])
|
34
38
|
if ! options.has_key? :force
|
35
39
|
abort "Output directory already exist ! Choose another one or use -f to overwrite"
|
@@ -39,7 +39,6 @@ class SequenceSynteny
|
|
39
39
|
partial = false
|
40
40
|
if properties.length >= 2 and properties[1].include? "partial"
|
41
41
|
partial = (properties[1].gsub("partial=","")=='01')
|
42
|
-
puts "partial:" + partial.to_s
|
43
42
|
end
|
44
43
|
sequences[s_name][:partial] = partial
|
45
44
|
sequences[s_name][:length] = s.seq.length
|
@@ -54,9 +53,10 @@ class SequenceSynteny
|
|
54
53
|
# run blat on proteins
|
55
54
|
def run_blat root, outdir
|
56
55
|
base_cmd = "#{root}/blat.linux -out=blast8 -minIdentity=#{@pidentity}"
|
57
|
-
system("#{base_cmd} #{@subject_file} #{@query_file} #{outdir}/#{@name}.blat8.tsv")
|
58
56
|
if @type == "prot"
|
59
57
|
system("#{base_cmd} -prot #{@subject_file} #{@query_file} #{outdir}/#{@name}.blat8.tsv")
|
58
|
+
else
|
59
|
+
system("#{base_cmd} #{@subject_file} #{@query_file} #{outdir}/#{@name}.blat8.tsv")
|
60
60
|
end
|
61
61
|
@aln_file = "#{outdir}/#{@name}.blat8.tsv"
|
62
62
|
# extract_hits
|
@@ -153,57 +153,6 @@ class SequenceSynteny
|
|
153
153
|
end # end of method
|
154
154
|
|
155
155
|
|
156
|
-
# Extract Hit from blast8 file and save it in hash
|
157
|
-
# contig-0_1 ABJ71957.1 96.92 65 2 0 1 65 1 65 9.2e-31 131.0
|
158
|
-
def extract_hits_prodigal mode
|
159
|
-
|
160
|
-
@aln_hits = {}
|
161
|
-
feature = ""
|
162
|
-
File.open(@aln_file,"r") do |fread|
|
163
|
-
while l = fread.gets
|
164
|
-
lA = l.chomp!.split("\t")
|
165
|
-
key = lA[0]
|
166
|
-
if mode == :refgenome
|
167
|
-
hit = lA[1]
|
168
|
-
feature = "cds"
|
169
|
-
elsif mode == :externaldb
|
170
|
-
hit = lA[1].chomp.split("|")[3]
|
171
|
-
feature = "cds"
|
172
|
-
end
|
173
|
-
next if lA[2].to_f < @pidentity
|
174
|
-
if ! @aln_hits.has_key? key
|
175
|
-
@aln_hits[key] = {
|
176
|
-
pId: lA[2].to_f.round(2),
|
177
|
-
evalue: lA[10],
|
178
|
-
score: lA[11].to_f,
|
179
|
-
hits: [hit],
|
180
|
-
length: [lA[3].to_i],
|
181
|
-
query_location: [[lA[6].to_i,lA[7].to_i]],
|
182
|
-
subject_location: [[lA[8].to_i,lA[9].to_i]],
|
183
|
-
feature: feature
|
184
|
-
}
|
185
|
-
elsif lA[11].to_f > @aln_hits[key][:score]
|
186
|
-
@aln_hits[key] = {
|
187
|
-
pId: lA[2].to_f.round(2),
|
188
|
-
evalue: lA[10],
|
189
|
-
score: lA[11].to_f,
|
190
|
-
hits: [hit],
|
191
|
-
length: [lA[3].to_i],
|
192
|
-
query_location: [[lA[6].to_i,lA[7].to_i]],
|
193
|
-
subject_location: [[lA[8].to_i,lA[9].to_i]],
|
194
|
-
feature: feature
|
195
|
-
}
|
196
|
-
elsif lA[11].to_f == @aln_hits[key][:score]
|
197
|
-
@aln_hits[key][:hits] << hit
|
198
|
-
@aln_hits[key][:length] << lA[3].to_i
|
199
|
-
@aln_hits[key][:query_location] << [lA[6].to_i,lA[7].to_i]
|
200
|
-
@aln_hits[key][:subject_location] << [lA[8].to_i,lA[9].to_i]
|
201
|
-
end
|
202
|
-
end
|
203
|
-
end
|
204
|
-
|
205
|
-
end # end of method
|
206
|
-
|
207
156
|
# Extract Hit from blast8 file and save it in hash
|
208
157
|
# prpa PA0668.4|rRNA|23S 99.97 2891 1 0 705042 707932 1 2891 0.0e+00 5671.0
|
209
158
|
def extract_hits_dna mode
|
data/lib/bacterial-comparator.rb
CHANGED
@@ -247,7 +247,7 @@ class BacterialComparator
|
|
247
247
|
puts "..Prot alignment files already exists, skipping."
|
248
248
|
end
|
249
249
|
|
250
|
-
# ugly hack to find out the reference genome
|
250
|
+
# FIXME ugly hack to find out the reference genome
|
251
251
|
ref_id = Dir["#{ori_dir}/#{@genomes_list[0]}/*.pep"][0].split('/')[-1].gsub(".pep","")
|
252
252
|
|
253
253
|
concat_alignments "align-genes-pep.all.fasta", ref_id
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bacterial-annotator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maxime Deraspe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-10-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bio
|