bacterial-annotator 0.9.2 → 0.9.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b90fb0d1c0bc3d82a10706a7d6d2c0554099b94ce8ec27f63d4ebcad0ec11f04
4
- data.tar.gz: c98732d57ad75290b6f0584ee559de98f8676ed0d94e5f4e9886f152269bf75e
3
+ metadata.gz: 6873b45085524788e9ea2c82b29c5e3cfb6998c6cd7ff1f16d56dc64378c910e
4
+ data.tar.gz: 7b02f34f8ffef086ade18526db05b5fffdeabee339a4808ec09181a7cb108b53
5
5
  SHA512:
6
- metadata.gz: eab03371ef5f9fbf75b694f58ba4108adb38f6316131a96efc38a0c938fed727f3583f80e3f97604103c5c25f0d271fa9bcb397670b16bfd1bb2b930ca47be83
7
- data.tar.gz: 6e48a1c67bd2add06ca3772bab0f734050a28476e05a9879f23fbf6df1cd88a314aee21c469a8226623978fefcd1b0d62cd240960b4097c55802395a891d5ae9
6
+ metadata.gz: 4b01f90fe7bc53e79a4245fbd64830a8fa967447e4a609715feef1ec21a68583531b9d1120631234204215bfa3bf6cfdd0b311704f030cf4cdf08d670fcabcf8
7
+ data.tar.gz: 15f9a395c37249bf937052d1917c89a42122a720bb1beddd1c4b2bddef3883111cb7e2e883da7757ccd9170eb3675499bbb70ac8dcfc62d4e722b9434549c92c
data/bin/ba_diamond CHANGED
@@ -23,7 +23,6 @@ def installDiamond
23
23
  Dir.chdir("#{ROOT_path}/")
24
24
  `tar xvf diamond-linux64.tar.gz; rm diamond-linux64.tar.gz`
25
25
  `mv diamond ./diamond.linux`
26
- `rm diamond_manual.pdf`
27
26
  File.chmod(0755, "#{ROOT_path}/diamond.linux")
28
27
  rescue
29
28
  abort "Problem installing Diamond, aborting"
@@ -3,7 +3,7 @@
3
3
  # email: maximilien1er@gmail.com
4
4
  # date: 15-02-24
5
5
  # version: 0.0.1
6
- # licence:
6
+ # licence:
7
7
 
8
8
  require 'json'
9
9
  require 'zlib'
@@ -104,7 +104,16 @@ class SequenceAnnotation
104
104
  if flat_gbk.dbclass != Bio::GenBank
105
105
  abort "Aborting : The input #{gbk_file} is not a valid genbank file !"
106
106
  else
107
- @gbk = flat_gbk.next_entry
107
+
108
+ @gbk_all = []
109
+ @bioseq_all = []
110
+ flat_gbk.each_entry do |e|
111
+ @gbk_all << e
112
+ @bioseq_all << e.to_biosequence
113
+ end
114
+
115
+ @gbk = @gbk_all[0]
116
+
108
117
  end
109
118
 
110
119
  @bioseq = @gbk.to_biosequence
@@ -221,41 +230,45 @@ class SequenceAnnotation
221
230
 
222
231
  if @coding_seq.empty?
223
232
 
224
- # Iterate over each CDS
225
- @gbk.each_cds do |ft|
226
- ftH = ft.to_hash
227
- loc = ft.locations
228
- gene = []
229
- product = []
230
- protId = ""
231
- if ftH.has_key? "pseudo"
232
- next
233
- end
234
- gene = ftH["gene"] if !ftH["gene"].nil?
235
- product = ftH["product"] if !ftH["product"].nil?
236
- protId = ftH["protein_id"][0] if !ftH["protein_id"].nil?
237
- locustag = ftH["locus_tag"][0] if !ftH["locus_tag"].nil?
238
-
239
- dna = get_DNA(ft,@bioseq)
240
- pep = dna.translate
241
- pepBioSeq = Bio::Sequence.auto(pep)
242
- dnaBioSeq = Bio::Sequence.auto(dna)
243
-
244
- if protId.strip == ""
245
- protId = locustag
246
- end
233
+ @gbk_all.each_with_index do |gb, gb_i|
234
+
235
+ # Iterate over each CDS
236
+ gb.each_cds do |ft|
237
+ ftH = ft.to_hash
238
+ loc = ft.locations
239
+ gene = []
240
+ product = []
241
+ protId = ""
242
+ if ftH.has_key? "pseudo"
243
+ next
244
+ end
247
245
 
248
- @coding_seq[protId] = {
249
- protId: protId,
250
- location: loc,
251
- locustag: locustag,
252
- gene: gene[0],
253
- product: product[0],
254
- bioseq: pepBioSeq,
255
- bioseq_gene: dnaBioSeq,
256
- length: pepBioSeq.length
257
- }
246
+ gene = ftH["gene"] if !ftH["gene"].nil?
247
+ product = ftH["product"] if !ftH["product"].nil?
248
+ protId = ftH["protein_id"][0] if !ftH["protein_id"].nil?
249
+ locustag = ftH["locus_tag"][0] if !ftH["locus_tag"].nil?
250
+
251
+ dna = get_DNA(ft,@bioseq_all[gb_i])
252
+ pep = dna.translate
253
+ pepBioSeq = Bio::Sequence.auto(pep)
254
+ dnaBioSeq = Bio::Sequence.auto(dna)
255
+
256
+ if protId.strip == ""
257
+ protId = locustag
258
+ end
258
259
 
260
+ @coding_seq[protId] = {
261
+ protId: protId,
262
+ location: loc,
263
+ locustag: locustag,
264
+ gene: gene[0],
265
+ product: product[0],
266
+ bioseq: pepBioSeq,
267
+ bioseq_gene: dnaBioSeq,
268
+ length: pepBioSeq.length
269
+ }
270
+
271
+ end
259
272
  end
260
273
 
261
274
  end
@@ -270,43 +283,45 @@ class SequenceAnnotation
270
283
  if @rna_seq.empty?
271
284
 
272
285
  @rna_seq = {}
273
- @gbk.features do |ft|
274
286
 
275
- next if ! ft.feature.to_s.include? "RNA"
287
+ @gbk_all.each_with_index do |gb, gb_i|
288
+ gb.features do |ft|
276
289
 
277
- ftH = ft.to_hash
278
- loc = ft.locations
279
- # seqBeg = loc[0].from.to_s
280
- # seqEnd = loc[0].to.to_s
281
- # strand = loc[0].strand.to_s
282
- if ftH.has_key? "pseudo"
283
- next
284
- end
285
- # gene = ftH["gene"] if !ftH["gene"].nil?
286
- # protId = ftH["protein_id"][0] if !ftH["protein_id"].nil?
287
- product = ""
290
+ next if ! ft.feature.to_s.include? "RNA"
288
291
 
289
- if !ftH["product"].nil?
290
- product = ftH["product"][0]
291
- # puts ftH["product"].join(",") + "---" + ftH["product"][0]
292
- end
292
+ ftH = ft.to_hash
293
+ loc = ft.locations
294
+ # seqBeg = loc[0].from.to_s
295
+ # seqEnd = loc[0].to.to_s
296
+ # strand = loc[0].strand.to_s
297
+ if ftH.has_key? "pseudo"
298
+ next
299
+ end
300
+ # gene = ftH["gene"] if !ftH["gene"].nil?
301
+ # protId = ftH["protein_id"][0] if !ftH["protein_id"].nil?
302
+ product = ""
293
303
 
294
- locustag = ftH["locus_tag"][0] if !ftH["locus_tag"].nil?
304
+ if !ftH["product"].nil?
305
+ product = ftH["product"][0]
306
+ # puts ftH["product"].join(",") + "---" + ftH["product"][0]
307
+ end
295
308
 
296
- # puts "#{@accession}\t#{seqBeg}\t#{seqEnd}\t#{strand}\t#{protId}\t#{locustag}\t#{gene[0]}\t#{product[0]}"
297
- dna = get_DNA(ft,@bioseq)
298
- dnaBioSeq = Bio::Sequence.auto(dna)
309
+ locustag = ftH["locus_tag"][0] if !ftH["locus_tag"].nil?
299
310
 
300
- @rna_seq[locustag] = {
301
- type: ft.feature.to_s,
302
- location: loc,
303
- locustag: locustag,
304
- product: product,
305
- bioseq_gene: dnaBioSeq
306
- }
311
+ # puts "#{@accession}\t#{seqBeg}\t#{seqEnd}\t#{strand}\t#{protId}\t#{locustag}\t#{gene[0]}\t#{product[0]}"
312
+ dna = get_DNA(ft,@bioseq_all[gb_i])
313
+ dnaBioSeq = Bio::Sequence.auto(dna)
307
314
 
308
- end
315
+ @rna_seq[locustag] = {
316
+ type: ft.feature.to_s,
317
+ location: loc,
318
+ locustag: locustag,
319
+ product: product,
320
+ bioseq_gene: dnaBioSeq
321
+ }
309
322
 
323
+ end
324
+ end
310
325
  end
311
326
 
312
327
  @rna_seq
@@ -622,4 +637,3 @@ class SequenceAnnotation
622
637
 
623
638
 
624
639
  end # end of Class
625
-
@@ -1,10 +1,10 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  # author: maxime déraspe
3
3
  # email: maximilien1er@gmail.com
4
- # review:
4
+ # review:
5
5
  # date: 15-02-24
6
6
  # version: 0.0.1
7
- # licence:
7
+ # licence:
8
8
 
9
9
  require 'json'
10
10
  require 'zlib'
@@ -111,7 +111,7 @@ class SequenceSynteny
111
111
  system("#{@root}/diamond.linux makedb --db #{subject_file} --in #{subject_file} > /dev/null 2>&1")
112
112
  db_file = subject_file
113
113
  end
114
- system("#{@root}/diamond.linux blastp --db #{db_file} -q #{query_file} -o #{@outdir}/#{@name}.diamond.tsv -f 6 > /dev/null 2>&1")
114
+ system("#{@root}/diamond.linux blastp --masking none --db #{db_file} -q #{query_file} -o #{@outdir}/#{@name}.diamond.tsv -f 6 > /dev/null 2>&1")
115
115
  else
116
116
  # system("#{@root}/glsearch36.linux -b 3 -E 1e-25 -m 8 #{@subject_file} #{@query_file} > #{@outdir}/#{@name}.fasta36.tsv")
117
117
  end
@@ -45,6 +45,9 @@ class BacterialComparator
45
45
  min_pid = min_pid/100
46
46
  end
47
47
 
48
+ @min_cov = min_cov
49
+ @min_pid = min_pid
50
+
48
51
  @aln_opt = options[:align].downcase
49
52
  @run_phylo = 0
50
53
  if options[:phylogeny] == 1
@@ -592,8 +595,8 @@ class BacterialComparator
592
595
  query_prot_file,
593
596
  ref_prot_file,
594
597
  "Prot-Ref",
595
- 0.80,
596
- 0.80,
598
+ @min_cov,
599
+ @min_cov,
597
600
  "prot")
598
601
 
599
602
  print "# Running alignment with Reference Genome CDS (diamond).."
@@ -613,6 +616,7 @@ class BacterialComparator
613
616
  next if ! syn_val.has_key? :homology
614
617
  next if syn_val[:homology][:assert_cutoff].inject(:+) < 3
615
618
  next if ref_annotated.has_key? syn_val[:homology][:hits][0] and ref_annotated[syn_val[:homology][:hits][0]][:partial] == 0
619
+
616
620
  ref_annotated[syn_val[:homology][:hits][0]] = {
617
621
  key: prot,
618
622
  pId: syn_val[:homology][:pId],
@@ -622,6 +626,7 @@ class BacterialComparator
622
626
  length: syn_val[:homology][:length][0],
623
627
  partial: (syn_val[:partial] ? 1 : 0)
624
628
  }
629
+
625
630
  # ref_annotated[syn_val[:homology][:hits][0]] = {
626
631
  # key: prot,
627
632
  # pId: syn_val[:homology][:pId],
@@ -631,6 +636,7 @@ class BacterialComparator
631
636
  # length: syn_val[:homology][:length][0],
632
637
  # partial: (syn_val[:partial] ? 1 : 0)
633
638
  # }
639
+
634
640
  end
635
641
 
636
642
  # print ref_annotated
@@ -642,13 +648,21 @@ class BacterialComparator
642
648
  coverage_query = ""
643
649
  query_length = ""
644
650
  pId = ""
651
+
645
652
  if ref_annotated[ref_v[:protId]] != nil
646
- gene = ref_annotated[ref_v[:protId]][:key]
647
- coverage_ref = ref_annotated[ref_v[:protId]][:cov_subject]
648
- query_length = query_lengths[ref_annotated[ref_v[:protId]][:key]]
649
- coverage_query = ref_annotated[ref_v[:protId]][:cov_query]
650
- pId = ref_annotated[ref_v[:protId]][:pId]
651
- partial = ref_annotated[ref_v[:protId]][:partial]
653
+
654
+ if ref_annotated[ref_v[:protId]][:pId] >= @min_pid and
655
+ ref_annotated[ref_v[:protId]][:cov_query] >= @min_cov and
656
+ ref_annotated[ref_v[:protId]][:cov_subject] >= @min_cov
657
+
658
+ gene = ref_annotated[ref_v[:protId]][:key]
659
+ coverage_ref = ref_annotated[ref_v[:protId]][:cov_subject]
660
+ query_length = query_lengths[ref_annotated[ref_v[:protId]][:key]]
661
+ coverage_query = ref_annotated[ref_v[:protId]][:cov_query]
662
+ pId = ref_annotated[ref_v[:protId]][:pId]
663
+ partial = ref_annotated[ref_v[:protId]][:partial]
664
+ end
665
+
652
666
  end
653
667
 
654
668
  _locus_tag = ref_v[:locustag] || ""
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bacterial-annotator
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.2
4
+ version: 0.9.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maxime Deraspe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-09-05 00:00:00.000000000 Z
11
+ date: 2023-07-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bio