bacterial-annotator 0.9.1 → 0.9.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/ba_diamond +4 -6
- data/lib/bacterial-annotator/sequence-annotation.rb +79 -65
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e6b26cd37618fedd557b8d9f7fe2c27a3b95dee5404d7751a8fb10ddcb63edc8
|
4
|
+
data.tar.gz: 2129f8372c3a48654cf3bdf6b75eccce3148b45f0e15191f3a555ff0769f9bd4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2400660df26bab507b5f9a3310105514d415a4950a06fdb7a3cbc9a4de2b4268d77c3725a13507e19c92fcdcc688f2bda4184aeadb88afd4a444addce1117fb9
|
7
|
+
data.tar.gz: 9caef8af954e6de43ed3eb2cd5a729065bafd2451aa5c73e00e60b400cb849666a53fb1e39d549f4b33925edbfea33f209a966da7f64d91ac1aa3d8b75e5f4d2
|
data/bin/ba_diamond
CHANGED
@@ -2,28 +2,27 @@
|
|
2
2
|
# -*- coding: utf-8 -*-
|
3
3
|
# author: maxime déraspe
|
4
4
|
# email: maximilien1er@gmail.com
|
5
|
-
# review:
|
5
|
+
# review:
|
6
6
|
# date: 17-08-24
|
7
7
|
# version: 0.01
|
8
|
-
# licence:
|
8
|
+
# licence:
|
9
9
|
|
10
10
|
require 'open-uri'
|
11
11
|
|
12
12
|
ROOT_path = File.dirname(__FILE__)
|
13
|
-
# diamond URL = "https://github.com/bbuchfink/diamond/releases/download/
|
13
|
+
# diamond URL = "https://github.com/bbuchfink/diamond/releases/download/v2.0.15/diamond-linux64.tar.gz"
|
14
14
|
|
15
15
|
# Install diamond on the user system
|
16
16
|
def installDiamond
|
17
17
|
|
18
18
|
begin
|
19
|
-
resp = open("https://github.com/bbuchfink/diamond/releases/download/
|
19
|
+
resp = open("https://github.com/bbuchfink/diamond/releases/download/v2.0.15/diamond-linux64.tar.gz")
|
20
20
|
open("#{ROOT_path}/diamond-linux64.tar.gz", "wb") do |file|
|
21
21
|
file.write(resp.read)
|
22
22
|
end
|
23
23
|
Dir.chdir("#{ROOT_path}/")
|
24
24
|
`tar xvf diamond-linux64.tar.gz; rm diamond-linux64.tar.gz`
|
25
25
|
`mv diamond ./diamond.linux`
|
26
|
-
`rm diamond_manual.pdf`
|
27
26
|
File.chmod(0755, "#{ROOT_path}/diamond.linux")
|
28
27
|
rescue
|
29
28
|
abort "Problem installing Diamond, aborting"
|
@@ -43,4 +42,3 @@ if ! File.exists? "#{ROOT_path}/diamond.linux"
|
|
43
42
|
puts ""
|
44
43
|
|
45
44
|
end
|
46
|
-
|
@@ -3,7 +3,7 @@
|
|
3
3
|
# email: maximilien1er@gmail.com
|
4
4
|
# date: 15-02-24
|
5
5
|
# version: 0.0.1
|
6
|
-
# licence:
|
6
|
+
# licence:
|
7
7
|
|
8
8
|
require 'json'
|
9
9
|
require 'zlib'
|
@@ -104,7 +104,16 @@ class SequenceAnnotation
|
|
104
104
|
if flat_gbk.dbclass != Bio::GenBank
|
105
105
|
abort "Aborting : The input #{gbk_file} is not a valid genbank file !"
|
106
106
|
else
|
107
|
-
|
107
|
+
|
108
|
+
@gbk_all = []
|
109
|
+
@bioseq_all = []
|
110
|
+
flat_gbk.each_entry do |e|
|
111
|
+
@gbk_all << e
|
112
|
+
@bioseq_all << e.to_biosequence
|
113
|
+
end
|
114
|
+
|
115
|
+
@gbk = @gbk_all[0]
|
116
|
+
|
108
117
|
end
|
109
118
|
|
110
119
|
@bioseq = @gbk.to_biosequence
|
@@ -221,41 +230,45 @@ class SequenceAnnotation
|
|
221
230
|
|
222
231
|
if @coding_seq.empty?
|
223
232
|
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
protId = ftH["protein_id"][0] if !ftH["protein_id"].nil?
|
237
|
-
locustag = ftH["locus_tag"][0] if !ftH["locus_tag"].nil?
|
238
|
-
|
239
|
-
dna = get_DNA(ft,@bioseq)
|
240
|
-
pep = dna.translate
|
241
|
-
pepBioSeq = Bio::Sequence.auto(pep)
|
242
|
-
dnaBioSeq = Bio::Sequence.auto(dna)
|
243
|
-
|
244
|
-
if protId.strip == ""
|
245
|
-
protId = locustag
|
246
|
-
end
|
233
|
+
@gbk_all.each_with_index do |gb, gb_i|
|
234
|
+
|
235
|
+
# Iterate over each CDS
|
236
|
+
gb.each_cds do |ft|
|
237
|
+
ftH = ft.to_hash
|
238
|
+
loc = ft.locations
|
239
|
+
gene = []
|
240
|
+
product = []
|
241
|
+
protId = ""
|
242
|
+
if ftH.has_key? "pseudo"
|
243
|
+
next
|
244
|
+
end
|
247
245
|
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
locustag
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
246
|
+
gene = ftH["gene"] if !ftH["gene"].nil?
|
247
|
+
product = ftH["product"] if !ftH["product"].nil?
|
248
|
+
protId = ftH["protein_id"][0] if !ftH["protein_id"].nil?
|
249
|
+
locustag = ftH["locus_tag"][0] if !ftH["locus_tag"].nil?
|
250
|
+
|
251
|
+
dna = get_DNA(ft,@bioseq_all[gb_i])
|
252
|
+
pep = dna.translate
|
253
|
+
pepBioSeq = Bio::Sequence.auto(pep)
|
254
|
+
dnaBioSeq = Bio::Sequence.auto(dna)
|
255
|
+
|
256
|
+
if protId.strip == ""
|
257
|
+
protId = locustag
|
258
|
+
end
|
258
259
|
|
260
|
+
@coding_seq[protId] = {
|
261
|
+
protId: protId,
|
262
|
+
location: loc,
|
263
|
+
locustag: locustag,
|
264
|
+
gene: gene[0],
|
265
|
+
product: product[0],
|
266
|
+
bioseq: pepBioSeq,
|
267
|
+
bioseq_gene: dnaBioSeq,
|
268
|
+
length: pepBioSeq.length
|
269
|
+
}
|
270
|
+
|
271
|
+
end
|
259
272
|
end
|
260
273
|
|
261
274
|
end
|
@@ -270,43 +283,45 @@ class SequenceAnnotation
|
|
270
283
|
if @rna_seq.empty?
|
271
284
|
|
272
285
|
@rna_seq = {}
|
273
|
-
@gbk.features do |ft|
|
274
286
|
|
275
|
-
|
287
|
+
@gbk_all.each_with_index do |gb, gb_i|
|
288
|
+
gb.features do |ft|
|
276
289
|
|
277
|
-
|
278
|
-
loc = ft.locations
|
279
|
-
# seqBeg = loc[0].from.to_s
|
280
|
-
# seqEnd = loc[0].to.to_s
|
281
|
-
# strand = loc[0].strand.to_s
|
282
|
-
if ftH.has_key? "pseudo"
|
283
|
-
next
|
284
|
-
end
|
285
|
-
# gene = ftH["gene"] if !ftH["gene"].nil?
|
286
|
-
# protId = ftH["protein_id"][0] if !ftH["protein_id"].nil?
|
287
|
-
product = ""
|
290
|
+
next if ! ft.feature.to_s.include? "RNA"
|
288
291
|
|
289
|
-
|
290
|
-
|
291
|
-
#
|
292
|
-
|
292
|
+
ftH = ft.to_hash
|
293
|
+
loc = ft.locations
|
294
|
+
# seqBeg = loc[0].from.to_s
|
295
|
+
# seqEnd = loc[0].to.to_s
|
296
|
+
# strand = loc[0].strand.to_s
|
297
|
+
if ftH.has_key? "pseudo"
|
298
|
+
next
|
299
|
+
end
|
300
|
+
# gene = ftH["gene"] if !ftH["gene"].nil?
|
301
|
+
# protId = ftH["protein_id"][0] if !ftH["protein_id"].nil?
|
302
|
+
product = ""
|
293
303
|
|
294
|
-
|
304
|
+
if !ftH["product"].nil?
|
305
|
+
product = ftH["product"][0]
|
306
|
+
# puts ftH["product"].join(",") + "---" + ftH["product"][0]
|
307
|
+
end
|
295
308
|
|
296
|
-
|
297
|
-
dna = get_DNA(ft,@bioseq)
|
298
|
-
dnaBioSeq = Bio::Sequence.auto(dna)
|
309
|
+
locustag = ftH["locus_tag"][0] if !ftH["locus_tag"].nil?
|
299
310
|
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
locustag: locustag,
|
304
|
-
product: product,
|
305
|
-
bioseq_gene: dnaBioSeq
|
306
|
-
}
|
311
|
+
# puts "#{@accession}\t#{seqBeg}\t#{seqEnd}\t#{strand}\t#{protId}\t#{locustag}\t#{gene[0]}\t#{product[0]}"
|
312
|
+
dna = get_DNA(ft,@bioseq_all[gb_i])
|
313
|
+
dnaBioSeq = Bio::Sequence.auto(dna)
|
307
314
|
|
308
|
-
|
315
|
+
@rna_seq[locustag] = {
|
316
|
+
type: ft.feature.to_s,
|
317
|
+
location: loc,
|
318
|
+
locustag: locustag,
|
319
|
+
product: product,
|
320
|
+
bioseq_gene: dnaBioSeq
|
321
|
+
}
|
309
322
|
|
323
|
+
end
|
324
|
+
end
|
310
325
|
end
|
311
326
|
|
312
327
|
@rna_seq
|
@@ -622,4 +637,3 @@ class SequenceAnnotation
|
|
622
637
|
|
623
638
|
|
624
639
|
end # end of Class
|
625
|
-
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bacterial-annotator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maxime Deraspe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-12-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bio
|