bacterial-annotator 0.9.2 → 0.9.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b90fb0d1c0bc3d82a10706a7d6d2c0554099b94ce8ec27f63d4ebcad0ec11f04
4
- data.tar.gz: c98732d57ad75290b6f0584ee559de98f8676ed0d94e5f4e9886f152269bf75e
3
+ metadata.gz: 6873b45085524788e9ea2c82b29c5e3cfb6998c6cd7ff1f16d56dc64378c910e
4
+ data.tar.gz: 7b02f34f8ffef086ade18526db05b5fffdeabee339a4808ec09181a7cb108b53
5
5
  SHA512:
6
- metadata.gz: eab03371ef5f9fbf75b694f58ba4108adb38f6316131a96efc38a0c938fed727f3583f80e3f97604103c5c25f0d271fa9bcb397670b16bfd1bb2b930ca47be83
7
- data.tar.gz: 6e48a1c67bd2add06ca3772bab0f734050a28476e05a9879f23fbf6df1cd88a314aee21c469a8226623978fefcd1b0d62cd240960b4097c55802395a891d5ae9
6
+ metadata.gz: 4b01f90fe7bc53e79a4245fbd64830a8fa967447e4a609715feef1ec21a68583531b9d1120631234204215bfa3bf6cfdd0b311704f030cf4cdf08d670fcabcf8
7
+ data.tar.gz: 15f9a395c37249bf937052d1917c89a42122a720bb1beddd1c4b2bddef3883111cb7e2e883da7757ccd9170eb3675499bbb70ac8dcfc62d4e722b9434549c92c
data/bin/ba_diamond CHANGED
@@ -23,7 +23,6 @@ def installDiamond
23
23
  Dir.chdir("#{ROOT_path}/")
24
24
  `tar xvf diamond-linux64.tar.gz; rm diamond-linux64.tar.gz`
25
25
  `mv diamond ./diamond.linux`
26
- `rm diamond_manual.pdf`
27
26
  File.chmod(0755, "#{ROOT_path}/diamond.linux")
28
27
  rescue
29
28
  abort "Problem installing Diamond, aborting"
@@ -3,7 +3,7 @@
3
3
  # email: maximilien1er@gmail.com
4
4
  # date: 15-02-24
5
5
  # version: 0.0.1
6
- # licence:
6
+ # licence:
7
7
 
8
8
  require 'json'
9
9
  require 'zlib'
@@ -104,7 +104,16 @@ class SequenceAnnotation
104
104
  if flat_gbk.dbclass != Bio::GenBank
105
105
  abort "Aborting : The input #{gbk_file} is not a valid genbank file !"
106
106
  else
107
- @gbk = flat_gbk.next_entry
107
+
108
+ @gbk_all = []
109
+ @bioseq_all = []
110
+ flat_gbk.each_entry do |e|
111
+ @gbk_all << e
112
+ @bioseq_all << e.to_biosequence
113
+ end
114
+
115
+ @gbk = @gbk_all[0]
116
+
108
117
  end
109
118
 
110
119
  @bioseq = @gbk.to_biosequence
@@ -221,41 +230,45 @@ class SequenceAnnotation
221
230
 
222
231
  if @coding_seq.empty?
223
232
 
224
- # Iterate over each CDS
225
- @gbk.each_cds do |ft|
226
- ftH = ft.to_hash
227
- loc = ft.locations
228
- gene = []
229
- product = []
230
- protId = ""
231
- if ftH.has_key? "pseudo"
232
- next
233
- end
234
- gene = ftH["gene"] if !ftH["gene"].nil?
235
- product = ftH["product"] if !ftH["product"].nil?
236
- protId = ftH["protein_id"][0] if !ftH["protein_id"].nil?
237
- locustag = ftH["locus_tag"][0] if !ftH["locus_tag"].nil?
238
-
239
- dna = get_DNA(ft,@bioseq)
240
- pep = dna.translate
241
- pepBioSeq = Bio::Sequence.auto(pep)
242
- dnaBioSeq = Bio::Sequence.auto(dna)
243
-
244
- if protId.strip == ""
245
- protId = locustag
246
- end
233
+ @gbk_all.each_with_index do |gb, gb_i|
234
+
235
+ # Iterate over each CDS
236
+ gb.each_cds do |ft|
237
+ ftH = ft.to_hash
238
+ loc = ft.locations
239
+ gene = []
240
+ product = []
241
+ protId = ""
242
+ if ftH.has_key? "pseudo"
243
+ next
244
+ end
247
245
 
248
- @coding_seq[protId] = {
249
- protId: protId,
250
- location: loc,
251
- locustag: locustag,
252
- gene: gene[0],
253
- product: product[0],
254
- bioseq: pepBioSeq,
255
- bioseq_gene: dnaBioSeq,
256
- length: pepBioSeq.length
257
- }
246
+ gene = ftH["gene"] if !ftH["gene"].nil?
247
+ product = ftH["product"] if !ftH["product"].nil?
248
+ protId = ftH["protein_id"][0] if !ftH["protein_id"].nil?
249
+ locustag = ftH["locus_tag"][0] if !ftH["locus_tag"].nil?
250
+
251
+ dna = get_DNA(ft,@bioseq_all[gb_i])
252
+ pep = dna.translate
253
+ pepBioSeq = Bio::Sequence.auto(pep)
254
+ dnaBioSeq = Bio::Sequence.auto(dna)
255
+
256
+ if protId.strip == ""
257
+ protId = locustag
258
+ end
258
259
 
260
+ @coding_seq[protId] = {
261
+ protId: protId,
262
+ location: loc,
263
+ locustag: locustag,
264
+ gene: gene[0],
265
+ product: product[0],
266
+ bioseq: pepBioSeq,
267
+ bioseq_gene: dnaBioSeq,
268
+ length: pepBioSeq.length
269
+ }
270
+
271
+ end
259
272
  end
260
273
 
261
274
  end
@@ -270,43 +283,45 @@ class SequenceAnnotation
270
283
  if @rna_seq.empty?
271
284
 
272
285
  @rna_seq = {}
273
- @gbk.features do |ft|
274
286
 
275
- next if ! ft.feature.to_s.include? "RNA"
287
+ @gbk_all.each_with_index do |gb, gb_i|
288
+ gb.features do |ft|
276
289
 
277
- ftH = ft.to_hash
278
- loc = ft.locations
279
- # seqBeg = loc[0].from.to_s
280
- # seqEnd = loc[0].to.to_s
281
- # strand = loc[0].strand.to_s
282
- if ftH.has_key? "pseudo"
283
- next
284
- end
285
- # gene = ftH["gene"] if !ftH["gene"].nil?
286
- # protId = ftH["protein_id"][0] if !ftH["protein_id"].nil?
287
- product = ""
290
+ next if ! ft.feature.to_s.include? "RNA"
288
291
 
289
- if !ftH["product"].nil?
290
- product = ftH["product"][0]
291
- # puts ftH["product"].join(",") + "---" + ftH["product"][0]
292
- end
292
+ ftH = ft.to_hash
293
+ loc = ft.locations
294
+ # seqBeg = loc[0].from.to_s
295
+ # seqEnd = loc[0].to.to_s
296
+ # strand = loc[0].strand.to_s
297
+ if ftH.has_key? "pseudo"
298
+ next
299
+ end
300
+ # gene = ftH["gene"] if !ftH["gene"].nil?
301
+ # protId = ftH["protein_id"][0] if !ftH["protein_id"].nil?
302
+ product = ""
293
303
 
294
- locustag = ftH["locus_tag"][0] if !ftH["locus_tag"].nil?
304
+ if !ftH["product"].nil?
305
+ product = ftH["product"][0]
306
+ # puts ftH["product"].join(",") + "---" + ftH["product"][0]
307
+ end
295
308
 
296
- # puts "#{@accession}\t#{seqBeg}\t#{seqEnd}\t#{strand}\t#{protId}\t#{locustag}\t#{gene[0]}\t#{product[0]}"
297
- dna = get_DNA(ft,@bioseq)
298
- dnaBioSeq = Bio::Sequence.auto(dna)
309
+ locustag = ftH["locus_tag"][0] if !ftH["locus_tag"].nil?
299
310
 
300
- @rna_seq[locustag] = {
301
- type: ft.feature.to_s,
302
- location: loc,
303
- locustag: locustag,
304
- product: product,
305
- bioseq_gene: dnaBioSeq
306
- }
311
+ # puts "#{@accession}\t#{seqBeg}\t#{seqEnd}\t#{strand}\t#{protId}\t#{locustag}\t#{gene[0]}\t#{product[0]}"
312
+ dna = get_DNA(ft,@bioseq_all[gb_i])
313
+ dnaBioSeq = Bio::Sequence.auto(dna)
307
314
 
308
- end
315
+ @rna_seq[locustag] = {
316
+ type: ft.feature.to_s,
317
+ location: loc,
318
+ locustag: locustag,
319
+ product: product,
320
+ bioseq_gene: dnaBioSeq
321
+ }
309
322
 
323
+ end
324
+ end
310
325
  end
311
326
 
312
327
  @rna_seq
@@ -622,4 +637,3 @@ class SequenceAnnotation
622
637
 
623
638
 
624
639
  end # end of Class
625
-
@@ -1,10 +1,10 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  # author: maxime déraspe
3
3
  # email: maximilien1er@gmail.com
4
- # review:
4
+ # review:
5
5
  # date: 15-02-24
6
6
  # version: 0.0.1
7
- # licence:
7
+ # licence:
8
8
 
9
9
  require 'json'
10
10
  require 'zlib'
@@ -111,7 +111,7 @@ class SequenceSynteny
111
111
  system("#{@root}/diamond.linux makedb --db #{subject_file} --in #{subject_file} > /dev/null 2>&1")
112
112
  db_file = subject_file
113
113
  end
114
- system("#{@root}/diamond.linux blastp --db #{db_file} -q #{query_file} -o #{@outdir}/#{@name}.diamond.tsv -f 6 > /dev/null 2>&1")
114
+ system("#{@root}/diamond.linux blastp --masking none --db #{db_file} -q #{query_file} -o #{@outdir}/#{@name}.diamond.tsv -f 6 > /dev/null 2>&1")
115
115
  else
116
116
  # system("#{@root}/glsearch36.linux -b 3 -E 1e-25 -m 8 #{@subject_file} #{@query_file} > #{@outdir}/#{@name}.fasta36.tsv")
117
117
  end
@@ -45,6 +45,9 @@ class BacterialComparator
45
45
  min_pid = min_pid/100
46
46
  end
47
47
 
48
+ @min_cov = min_cov
49
+ @min_pid = min_pid
50
+
48
51
  @aln_opt = options[:align].downcase
49
52
  @run_phylo = 0
50
53
  if options[:phylogeny] == 1
@@ -592,8 +595,8 @@ class BacterialComparator
592
595
  query_prot_file,
593
596
  ref_prot_file,
594
597
  "Prot-Ref",
595
- 0.80,
596
- 0.80,
598
+ @min_cov,
599
+ @min_cov,
597
600
  "prot")
598
601
 
599
602
  print "# Running alignment with Reference Genome CDS (diamond).."
@@ -613,6 +616,7 @@ class BacterialComparator
613
616
  next if ! syn_val.has_key? :homology
614
617
  next if syn_val[:homology][:assert_cutoff].inject(:+) < 3
615
618
  next if ref_annotated.has_key? syn_val[:homology][:hits][0] and ref_annotated[syn_val[:homology][:hits][0]][:partial] == 0
619
+
616
620
  ref_annotated[syn_val[:homology][:hits][0]] = {
617
621
  key: prot,
618
622
  pId: syn_val[:homology][:pId],
@@ -622,6 +626,7 @@ class BacterialComparator
622
626
  length: syn_val[:homology][:length][0],
623
627
  partial: (syn_val[:partial] ? 1 : 0)
624
628
  }
629
+
625
630
  # ref_annotated[syn_val[:homology][:hits][0]] = {
626
631
  # key: prot,
627
632
  # pId: syn_val[:homology][:pId],
@@ -631,6 +636,7 @@ class BacterialComparator
631
636
  # length: syn_val[:homology][:length][0],
632
637
  # partial: (syn_val[:partial] ? 1 : 0)
633
638
  # }
639
+
634
640
  end
635
641
 
636
642
  # print ref_annotated
@@ -642,13 +648,21 @@ class BacterialComparator
642
648
  coverage_query = ""
643
649
  query_length = ""
644
650
  pId = ""
651
+
645
652
  if ref_annotated[ref_v[:protId]] != nil
646
- gene = ref_annotated[ref_v[:protId]][:key]
647
- coverage_ref = ref_annotated[ref_v[:protId]][:cov_subject]
648
- query_length = query_lengths[ref_annotated[ref_v[:protId]][:key]]
649
- coverage_query = ref_annotated[ref_v[:protId]][:cov_query]
650
- pId = ref_annotated[ref_v[:protId]][:pId]
651
- partial = ref_annotated[ref_v[:protId]][:partial]
653
+
654
+ if ref_annotated[ref_v[:protId]][:pId] >= @min_pid and
655
+ ref_annotated[ref_v[:protId]][:cov_query] >= @min_cov and
656
+ ref_annotated[ref_v[:protId]][:cov_subject] >= @min_cov
657
+
658
+ gene = ref_annotated[ref_v[:protId]][:key]
659
+ coverage_ref = ref_annotated[ref_v[:protId]][:cov_subject]
660
+ query_length = query_lengths[ref_annotated[ref_v[:protId]][:key]]
661
+ coverage_query = ref_annotated[ref_v[:protId]][:cov_query]
662
+ pId = ref_annotated[ref_v[:protId]][:pId]
663
+ partial = ref_annotated[ref_v[:protId]][:partial]
664
+ end
665
+
652
666
  end
653
667
 
654
668
  _locus_tag = ref_v[:locustag] || ""
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bacterial-annotator
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.2
4
+ version: 0.9.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maxime Deraspe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-09-05 00:00:00.000000000 Z
11
+ date: 2023-07-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bio