bacterial-annotator 0.9.2 → 0.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/ba_diamond +0 -1
- data/lib/bacterial-annotator/sequence-annotation.rb +79 -65
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e6b26cd37618fedd557b8d9f7fe2c27a3b95dee5404d7751a8fb10ddcb63edc8
|
4
|
+
data.tar.gz: 2129f8372c3a48654cf3bdf6b75eccce3148b45f0e15191f3a555ff0769f9bd4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2400660df26bab507b5f9a3310105514d415a4950a06fdb7a3cbc9a4de2b4268d77c3725a13507e19c92fcdcc688f2bda4184aeadb88afd4a444addce1117fb9
|
7
|
+
data.tar.gz: 9caef8af954e6de43ed3eb2cd5a729065bafd2451aa5c73e00e60b400cb849666a53fb1e39d549f4b33925edbfea33f209a966da7f64d91ac1aa3d8b75e5f4d2
|
data/bin/ba_diamond
CHANGED
@@ -23,7 +23,6 @@ def installDiamond
|
|
23
23
|
Dir.chdir("#{ROOT_path}/")
|
24
24
|
`tar xvf diamond-linux64.tar.gz; rm diamond-linux64.tar.gz`
|
25
25
|
`mv diamond ./diamond.linux`
|
26
|
-
`rm diamond_manual.pdf`
|
27
26
|
File.chmod(0755, "#{ROOT_path}/diamond.linux")
|
28
27
|
rescue
|
29
28
|
abort "Problem installing Diamond, aborting"
|
@@ -3,7 +3,7 @@
|
|
3
3
|
# email: maximilien1er@gmail.com
|
4
4
|
# date: 15-02-24
|
5
5
|
# version: 0.0.1
|
6
|
-
# licence:
|
6
|
+
# licence:
|
7
7
|
|
8
8
|
require 'json'
|
9
9
|
require 'zlib'
|
@@ -104,7 +104,16 @@ class SequenceAnnotation
|
|
104
104
|
if flat_gbk.dbclass != Bio::GenBank
|
105
105
|
abort "Aborting : The input #{gbk_file} is not a valid genbank file !"
|
106
106
|
else
|
107
|
-
|
107
|
+
|
108
|
+
@gbk_all = []
|
109
|
+
@bioseq_all = []
|
110
|
+
flat_gbk.each_entry do |e|
|
111
|
+
@gbk_all << e
|
112
|
+
@bioseq_all << e.to_biosequence
|
113
|
+
end
|
114
|
+
|
115
|
+
@gbk = @gbk_all[0]
|
116
|
+
|
108
117
|
end
|
109
118
|
|
110
119
|
@bioseq = @gbk.to_biosequence
|
@@ -221,41 +230,45 @@ class SequenceAnnotation
|
|
221
230
|
|
222
231
|
if @coding_seq.empty?
|
223
232
|
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
protId = ftH["protein_id"][0] if !ftH["protein_id"].nil?
|
237
|
-
locustag = ftH["locus_tag"][0] if !ftH["locus_tag"].nil?
|
238
|
-
|
239
|
-
dna = get_DNA(ft,@bioseq)
|
240
|
-
pep = dna.translate
|
241
|
-
pepBioSeq = Bio::Sequence.auto(pep)
|
242
|
-
dnaBioSeq = Bio::Sequence.auto(dna)
|
243
|
-
|
244
|
-
if protId.strip == ""
|
245
|
-
protId = locustag
|
246
|
-
end
|
233
|
+
@gbk_all.each_with_index do |gb, gb_i|
|
234
|
+
|
235
|
+
# Iterate over each CDS
|
236
|
+
gb.each_cds do |ft|
|
237
|
+
ftH = ft.to_hash
|
238
|
+
loc = ft.locations
|
239
|
+
gene = []
|
240
|
+
product = []
|
241
|
+
protId = ""
|
242
|
+
if ftH.has_key? "pseudo"
|
243
|
+
next
|
244
|
+
end
|
247
245
|
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
locustag
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
246
|
+
gene = ftH["gene"] if !ftH["gene"].nil?
|
247
|
+
product = ftH["product"] if !ftH["product"].nil?
|
248
|
+
protId = ftH["protein_id"][0] if !ftH["protein_id"].nil?
|
249
|
+
locustag = ftH["locus_tag"][0] if !ftH["locus_tag"].nil?
|
250
|
+
|
251
|
+
dna = get_DNA(ft,@bioseq_all[gb_i])
|
252
|
+
pep = dna.translate
|
253
|
+
pepBioSeq = Bio::Sequence.auto(pep)
|
254
|
+
dnaBioSeq = Bio::Sequence.auto(dna)
|
255
|
+
|
256
|
+
if protId.strip == ""
|
257
|
+
protId = locustag
|
258
|
+
end
|
258
259
|
|
260
|
+
@coding_seq[protId] = {
|
261
|
+
protId: protId,
|
262
|
+
location: loc,
|
263
|
+
locustag: locustag,
|
264
|
+
gene: gene[0],
|
265
|
+
product: product[0],
|
266
|
+
bioseq: pepBioSeq,
|
267
|
+
bioseq_gene: dnaBioSeq,
|
268
|
+
length: pepBioSeq.length
|
269
|
+
}
|
270
|
+
|
271
|
+
end
|
259
272
|
end
|
260
273
|
|
261
274
|
end
|
@@ -270,43 +283,45 @@ class SequenceAnnotation
|
|
270
283
|
if @rna_seq.empty?
|
271
284
|
|
272
285
|
@rna_seq = {}
|
273
|
-
@gbk.features do |ft|
|
274
286
|
|
275
|
-
|
287
|
+
@gbk_all.each_with_index do |gb, gb_i|
|
288
|
+
gb.features do |ft|
|
276
289
|
|
277
|
-
|
278
|
-
loc = ft.locations
|
279
|
-
# seqBeg = loc[0].from.to_s
|
280
|
-
# seqEnd = loc[0].to.to_s
|
281
|
-
# strand = loc[0].strand.to_s
|
282
|
-
if ftH.has_key? "pseudo"
|
283
|
-
next
|
284
|
-
end
|
285
|
-
# gene = ftH["gene"] if !ftH["gene"].nil?
|
286
|
-
# protId = ftH["protein_id"][0] if !ftH["protein_id"].nil?
|
287
|
-
product = ""
|
290
|
+
next if ! ft.feature.to_s.include? "RNA"
|
288
291
|
|
289
|
-
|
290
|
-
|
291
|
-
#
|
292
|
-
|
292
|
+
ftH = ft.to_hash
|
293
|
+
loc = ft.locations
|
294
|
+
# seqBeg = loc[0].from.to_s
|
295
|
+
# seqEnd = loc[0].to.to_s
|
296
|
+
# strand = loc[0].strand.to_s
|
297
|
+
if ftH.has_key? "pseudo"
|
298
|
+
next
|
299
|
+
end
|
300
|
+
# gene = ftH["gene"] if !ftH["gene"].nil?
|
301
|
+
# protId = ftH["protein_id"][0] if !ftH["protein_id"].nil?
|
302
|
+
product = ""
|
293
303
|
|
294
|
-
|
304
|
+
if !ftH["product"].nil?
|
305
|
+
product = ftH["product"][0]
|
306
|
+
# puts ftH["product"].join(",") + "---" + ftH["product"][0]
|
307
|
+
end
|
295
308
|
|
296
|
-
|
297
|
-
dna = get_DNA(ft,@bioseq)
|
298
|
-
dnaBioSeq = Bio::Sequence.auto(dna)
|
309
|
+
locustag = ftH["locus_tag"][0] if !ftH["locus_tag"].nil?
|
299
310
|
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
locustag: locustag,
|
304
|
-
product: product,
|
305
|
-
bioseq_gene: dnaBioSeq
|
306
|
-
}
|
311
|
+
# puts "#{@accession}\t#{seqBeg}\t#{seqEnd}\t#{strand}\t#{protId}\t#{locustag}\t#{gene[0]}\t#{product[0]}"
|
312
|
+
dna = get_DNA(ft,@bioseq_all[gb_i])
|
313
|
+
dnaBioSeq = Bio::Sequence.auto(dna)
|
307
314
|
|
308
|
-
|
315
|
+
@rna_seq[locustag] = {
|
316
|
+
type: ft.feature.to_s,
|
317
|
+
location: loc,
|
318
|
+
locustag: locustag,
|
319
|
+
product: product,
|
320
|
+
bioseq_gene: dnaBioSeq
|
321
|
+
}
|
309
322
|
|
323
|
+
end
|
324
|
+
end
|
310
325
|
end
|
311
326
|
|
312
327
|
@rna_seq
|
@@ -622,4 +637,3 @@ class SequenceAnnotation
|
|
622
637
|
|
623
638
|
|
624
639
|
end # end of Class
|
625
|
-
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bacterial-annotator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maxime Deraspe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-12-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bio
|