bacterial-annotator 0.9.2 → 0.9.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/ba_diamond +0 -1
- data/lib/bacterial-annotator/sequence-annotation.rb +79 -65
- data/lib/bacterial-annotator/sequence-synteny.rb +3 -3
- data/lib/bacterial-comparator.rb +22 -8
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6873b45085524788e9ea2c82b29c5e3cfb6998c6cd7ff1f16d56dc64378c910e
|
4
|
+
data.tar.gz: 7b02f34f8ffef086ade18526db05b5fffdeabee339a4808ec09181a7cb108b53
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4b01f90fe7bc53e79a4245fbd64830a8fa967447e4a609715feef1ec21a68583531b9d1120631234204215bfa3bf6cfdd0b311704f030cf4cdf08d670fcabcf8
|
7
|
+
data.tar.gz: 15f9a395c37249bf937052d1917c89a42122a720bb1beddd1c4b2bddef3883111cb7e2e883da7757ccd9170eb3675499bbb70ac8dcfc62d4e722b9434549c92c
|
data/bin/ba_diamond
CHANGED
@@ -23,7 +23,6 @@ def installDiamond
|
|
23
23
|
Dir.chdir("#{ROOT_path}/")
|
24
24
|
`tar xvf diamond-linux64.tar.gz; rm diamond-linux64.tar.gz`
|
25
25
|
`mv diamond ./diamond.linux`
|
26
|
-
`rm diamond_manual.pdf`
|
27
26
|
File.chmod(0755, "#{ROOT_path}/diamond.linux")
|
28
27
|
rescue
|
29
28
|
abort "Problem installing Diamond, aborting"
|
@@ -3,7 +3,7 @@
|
|
3
3
|
# email: maximilien1er@gmail.com
|
4
4
|
# date: 15-02-24
|
5
5
|
# version: 0.0.1
|
6
|
-
# licence:
|
6
|
+
# licence:
|
7
7
|
|
8
8
|
require 'json'
|
9
9
|
require 'zlib'
|
@@ -104,7 +104,16 @@ class SequenceAnnotation
|
|
104
104
|
if flat_gbk.dbclass != Bio::GenBank
|
105
105
|
abort "Aborting : The input #{gbk_file} is not a valid genbank file !"
|
106
106
|
else
|
107
|
-
|
107
|
+
|
108
|
+
@gbk_all = []
|
109
|
+
@bioseq_all = []
|
110
|
+
flat_gbk.each_entry do |e|
|
111
|
+
@gbk_all << e
|
112
|
+
@bioseq_all << e.to_biosequence
|
113
|
+
end
|
114
|
+
|
115
|
+
@gbk = @gbk_all[0]
|
116
|
+
|
108
117
|
end
|
109
118
|
|
110
119
|
@bioseq = @gbk.to_biosequence
|
@@ -221,41 +230,45 @@ class SequenceAnnotation
|
|
221
230
|
|
222
231
|
if @coding_seq.empty?
|
223
232
|
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
protId = ftH["protein_id"][0] if !ftH["protein_id"].nil?
|
237
|
-
locustag = ftH["locus_tag"][0] if !ftH["locus_tag"].nil?
|
238
|
-
|
239
|
-
dna = get_DNA(ft,@bioseq)
|
240
|
-
pep = dna.translate
|
241
|
-
pepBioSeq = Bio::Sequence.auto(pep)
|
242
|
-
dnaBioSeq = Bio::Sequence.auto(dna)
|
243
|
-
|
244
|
-
if protId.strip == ""
|
245
|
-
protId = locustag
|
246
|
-
end
|
233
|
+
@gbk_all.each_with_index do |gb, gb_i|
|
234
|
+
|
235
|
+
# Iterate over each CDS
|
236
|
+
gb.each_cds do |ft|
|
237
|
+
ftH = ft.to_hash
|
238
|
+
loc = ft.locations
|
239
|
+
gene = []
|
240
|
+
product = []
|
241
|
+
protId = ""
|
242
|
+
if ftH.has_key? "pseudo"
|
243
|
+
next
|
244
|
+
end
|
247
245
|
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
locustag
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
246
|
+
gene = ftH["gene"] if !ftH["gene"].nil?
|
247
|
+
product = ftH["product"] if !ftH["product"].nil?
|
248
|
+
protId = ftH["protein_id"][0] if !ftH["protein_id"].nil?
|
249
|
+
locustag = ftH["locus_tag"][0] if !ftH["locus_tag"].nil?
|
250
|
+
|
251
|
+
dna = get_DNA(ft,@bioseq_all[gb_i])
|
252
|
+
pep = dna.translate
|
253
|
+
pepBioSeq = Bio::Sequence.auto(pep)
|
254
|
+
dnaBioSeq = Bio::Sequence.auto(dna)
|
255
|
+
|
256
|
+
if protId.strip == ""
|
257
|
+
protId = locustag
|
258
|
+
end
|
258
259
|
|
260
|
+
@coding_seq[protId] = {
|
261
|
+
protId: protId,
|
262
|
+
location: loc,
|
263
|
+
locustag: locustag,
|
264
|
+
gene: gene[0],
|
265
|
+
product: product[0],
|
266
|
+
bioseq: pepBioSeq,
|
267
|
+
bioseq_gene: dnaBioSeq,
|
268
|
+
length: pepBioSeq.length
|
269
|
+
}
|
270
|
+
|
271
|
+
end
|
259
272
|
end
|
260
273
|
|
261
274
|
end
|
@@ -270,43 +283,45 @@ class SequenceAnnotation
|
|
270
283
|
if @rna_seq.empty?
|
271
284
|
|
272
285
|
@rna_seq = {}
|
273
|
-
@gbk.features do |ft|
|
274
286
|
|
275
|
-
|
287
|
+
@gbk_all.each_with_index do |gb, gb_i|
|
288
|
+
gb.features do |ft|
|
276
289
|
|
277
|
-
|
278
|
-
loc = ft.locations
|
279
|
-
# seqBeg = loc[0].from.to_s
|
280
|
-
# seqEnd = loc[0].to.to_s
|
281
|
-
# strand = loc[0].strand.to_s
|
282
|
-
if ftH.has_key? "pseudo"
|
283
|
-
next
|
284
|
-
end
|
285
|
-
# gene = ftH["gene"] if !ftH["gene"].nil?
|
286
|
-
# protId = ftH["protein_id"][0] if !ftH["protein_id"].nil?
|
287
|
-
product = ""
|
290
|
+
next if ! ft.feature.to_s.include? "RNA"
|
288
291
|
|
289
|
-
|
290
|
-
|
291
|
-
#
|
292
|
-
|
292
|
+
ftH = ft.to_hash
|
293
|
+
loc = ft.locations
|
294
|
+
# seqBeg = loc[0].from.to_s
|
295
|
+
# seqEnd = loc[0].to.to_s
|
296
|
+
# strand = loc[0].strand.to_s
|
297
|
+
if ftH.has_key? "pseudo"
|
298
|
+
next
|
299
|
+
end
|
300
|
+
# gene = ftH["gene"] if !ftH["gene"].nil?
|
301
|
+
# protId = ftH["protein_id"][0] if !ftH["protein_id"].nil?
|
302
|
+
product = ""
|
293
303
|
|
294
|
-
|
304
|
+
if !ftH["product"].nil?
|
305
|
+
product = ftH["product"][0]
|
306
|
+
# puts ftH["product"].join(",") + "---" + ftH["product"][0]
|
307
|
+
end
|
295
308
|
|
296
|
-
|
297
|
-
dna = get_DNA(ft,@bioseq)
|
298
|
-
dnaBioSeq = Bio::Sequence.auto(dna)
|
309
|
+
locustag = ftH["locus_tag"][0] if !ftH["locus_tag"].nil?
|
299
310
|
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
locustag: locustag,
|
304
|
-
product: product,
|
305
|
-
bioseq_gene: dnaBioSeq
|
306
|
-
}
|
311
|
+
# puts "#{@accession}\t#{seqBeg}\t#{seqEnd}\t#{strand}\t#{protId}\t#{locustag}\t#{gene[0]}\t#{product[0]}"
|
312
|
+
dna = get_DNA(ft,@bioseq_all[gb_i])
|
313
|
+
dnaBioSeq = Bio::Sequence.auto(dna)
|
307
314
|
|
308
|
-
|
315
|
+
@rna_seq[locustag] = {
|
316
|
+
type: ft.feature.to_s,
|
317
|
+
location: loc,
|
318
|
+
locustag: locustag,
|
319
|
+
product: product,
|
320
|
+
bioseq_gene: dnaBioSeq
|
321
|
+
}
|
309
322
|
|
323
|
+
end
|
324
|
+
end
|
310
325
|
end
|
311
326
|
|
312
327
|
@rna_seq
|
@@ -622,4 +637,3 @@ class SequenceAnnotation
|
|
622
637
|
|
623
638
|
|
624
639
|
end # end of Class
|
625
|
-
|
@@ -1,10 +1,10 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
# author: maxime déraspe
|
3
3
|
# email: maximilien1er@gmail.com
|
4
|
-
# review:
|
4
|
+
# review:
|
5
5
|
# date: 15-02-24
|
6
6
|
# version: 0.0.1
|
7
|
-
# licence:
|
7
|
+
# licence:
|
8
8
|
|
9
9
|
require 'json'
|
10
10
|
require 'zlib'
|
@@ -111,7 +111,7 @@ class SequenceSynteny
|
|
111
111
|
system("#{@root}/diamond.linux makedb --db #{subject_file} --in #{subject_file} > /dev/null 2>&1")
|
112
112
|
db_file = subject_file
|
113
113
|
end
|
114
|
-
system("#{@root}/diamond.linux blastp --db #{db_file} -q #{query_file} -o #{@outdir}/#{@name}.diamond.tsv -f 6 > /dev/null 2>&1")
|
114
|
+
system("#{@root}/diamond.linux blastp --masking none --db #{db_file} -q #{query_file} -o #{@outdir}/#{@name}.diamond.tsv -f 6 > /dev/null 2>&1")
|
115
115
|
else
|
116
116
|
# system("#{@root}/glsearch36.linux -b 3 -E 1e-25 -m 8 #{@subject_file} #{@query_file} > #{@outdir}/#{@name}.fasta36.tsv")
|
117
117
|
end
|
data/lib/bacterial-comparator.rb
CHANGED
@@ -45,6 +45,9 @@ class BacterialComparator
|
|
45
45
|
min_pid = min_pid/100
|
46
46
|
end
|
47
47
|
|
48
|
+
@min_cov = min_cov
|
49
|
+
@min_pid = min_pid
|
50
|
+
|
48
51
|
@aln_opt = options[:align].downcase
|
49
52
|
@run_phylo = 0
|
50
53
|
if options[:phylogeny] == 1
|
@@ -592,8 +595,8 @@ class BacterialComparator
|
|
592
595
|
query_prot_file,
|
593
596
|
ref_prot_file,
|
594
597
|
"Prot-Ref",
|
595
|
-
|
596
|
-
|
598
|
+
@min_cov,
|
599
|
+
@min_cov,
|
597
600
|
"prot")
|
598
601
|
|
599
602
|
print "# Running alignment with Reference Genome CDS (diamond).."
|
@@ -613,6 +616,7 @@ class BacterialComparator
|
|
613
616
|
next if ! syn_val.has_key? :homology
|
614
617
|
next if syn_val[:homology][:assert_cutoff].inject(:+) < 3
|
615
618
|
next if ref_annotated.has_key? syn_val[:homology][:hits][0] and ref_annotated[syn_val[:homology][:hits][0]][:partial] == 0
|
619
|
+
|
616
620
|
ref_annotated[syn_val[:homology][:hits][0]] = {
|
617
621
|
key: prot,
|
618
622
|
pId: syn_val[:homology][:pId],
|
@@ -622,6 +626,7 @@ class BacterialComparator
|
|
622
626
|
length: syn_val[:homology][:length][0],
|
623
627
|
partial: (syn_val[:partial] ? 1 : 0)
|
624
628
|
}
|
629
|
+
|
625
630
|
# ref_annotated[syn_val[:homology][:hits][0]] = {
|
626
631
|
# key: prot,
|
627
632
|
# pId: syn_val[:homology][:pId],
|
@@ -631,6 +636,7 @@ class BacterialComparator
|
|
631
636
|
# length: syn_val[:homology][:length][0],
|
632
637
|
# partial: (syn_val[:partial] ? 1 : 0)
|
633
638
|
# }
|
639
|
+
|
634
640
|
end
|
635
641
|
|
636
642
|
# print ref_annotated
|
@@ -642,13 +648,21 @@ class BacterialComparator
|
|
642
648
|
coverage_query = ""
|
643
649
|
query_length = ""
|
644
650
|
pId = ""
|
651
|
+
|
645
652
|
if ref_annotated[ref_v[:protId]] != nil
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
653
|
+
|
654
|
+
if ref_annotated[ref_v[:protId]][:pId] >= @min_pid and
|
655
|
+
ref_annotated[ref_v[:protId]][:cov_query] >= @min_cov and
|
656
|
+
ref_annotated[ref_v[:protId]][:cov_subject] >= @min_cov
|
657
|
+
|
658
|
+
gene = ref_annotated[ref_v[:protId]][:key]
|
659
|
+
coverage_ref = ref_annotated[ref_v[:protId]][:cov_subject]
|
660
|
+
query_length = query_lengths[ref_annotated[ref_v[:protId]][:key]]
|
661
|
+
coverage_query = ref_annotated[ref_v[:protId]][:cov_query]
|
662
|
+
pId = ref_annotated[ref_v[:protId]][:pId]
|
663
|
+
partial = ref_annotated[ref_v[:protId]][:partial]
|
664
|
+
end
|
665
|
+
|
652
666
|
end
|
653
667
|
|
654
668
|
_locus_tag = ref_v[:locustag] || ""
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bacterial-annotator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maxime Deraspe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-07-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bio
|