bacterial-annotator 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,95 +9,130 @@
9
9
  require 'bio'
10
10
  require 'fileutils'
11
11
 
12
- require 'bacterial-annotator/genbank-manip'
13
- require 'bacterial-annotator/fasta-manip'
14
- require 'bacterial-annotator/synteny-manip'
15
- require 'bacterial-annotator/remote-ncbi'
12
+ require 'bacterial-annotator/sequence-fasta'
13
+ require 'bacterial-annotator/sequence-annotation'
14
+ require 'bacterial-annotator/sequence-synteny'
15
+
16
16
 
17
17
  class BacterialAnnotator
18
18
 
19
19
  # Initialize BacterialAnnotator
20
- # options[:input], options[:refgenome], ROOT, options[:outdir], options)
20
+ # options, ROOT (path)
21
21
  def initialize options, root
22
22
 
23
23
  @root = root
24
24
  @options = options
25
- @outdir = @options[:outdir]
26
25
 
27
26
  @minlength = @options[:minlength].to_i
28
- @pidentity = @options[:pidentity].to_f
29
- @pidentity = @pidentity * 100 if @pidentity < 1.00
27
+ @options[:minlength] = @options[:minlength].to_i
28
+ @options[:pidentity] = @options[:pidentity].to_f
29
+ @options[:pidentispacemacs-lightty] = @options[:pidentity] * 100 if @options[:pidentity] <= 1.00
30
+ @options[:pcoverage] = @options[:pcoverage].to_f
31
+ @options[:pcoverage] = @options[:pcoverage] / 100 if @options[:pcoverage] > 1.00
30
32
 
31
- if File.exists? (@outdir)
33
+ if File.exists? (@options[:outdir])
32
34
  if ! options.has_key? :force
33
35
  abort "Output directory already exist ! Choose another one or use -f to overwrite"
34
36
  else
35
- puts "Overwriting output directory #{@outdir}"
36
- FileUtils.remove_dir(@outdir, :force=>true)
37
+ puts "Overwriting output directory #{@options[:outdir]}"
38
+ FileUtils.remove_dir(@options[:outdir], :force=>true)
37
39
  end
38
40
  end
39
- Dir.mkdir(@outdir)
41
+ Dir.mkdir(@options[:outdir])
40
42
 
41
- @fasta = FastaManip.new(@options[:input], @options[:meta])
43
+ @query_fasta = SequenceFasta.new(@options[:input], @options[:meta])
42
44
 
43
45
  @with_refence_genome = false
44
46
  if @options.has_key? :refgenome
45
47
  @with_refence_genome = true
46
- @refgenome = GenbankManip.new(@options[:refgenome], @outdir)
48
+ @ref_genome = SequenceAnnotation.new(@options[:refgenome], @options[:outdir])
47
49
  end
48
50
 
51
+ @with_external_db = false
52
+ @with_external_db = true if @options.has_key? :external_db
53
+
49
54
  @prot_synteny = nil
50
- @annotation_stats = {by_contigs: {},
51
- annotated_cds: 0,
52
- total_cds: 0,
53
- foreign_contigs: [],
54
- synteny_contigs: [],
55
- short_contigs: []}
55
+ @annotation_stats = {
56
+ by_contigs: {},
57
+ annotated_cds: 0,
58
+ flagged_cds: [],
59
+ total_cds: 0,
60
+ foreign_contigs: [],
61
+ synteny_contigs: [],
62
+ short_contigs: []
63
+ }
56
64
 
57
65
  @contig_foreign_cds = {}
66
+
58
67
  @contig_annotations = {}
59
68
 
69
+ @contig_annotations_externaldb = {}
70
+
71
+ @contig_annotations_cds = {}
72
+
60
73
  end # end of method
61
74
 
62
75
  # Prepare files for the annotation
63
76
  # Will run prodigal on the query and prepare reference genome files
64
77
  def prepare_files_for_annotation
65
78
  puts "\nRunning Prodigal on your genome.."
66
- @fasta.run_prodigal @root, @outdir
79
+ @query_fasta.run_prodigal @root, @options[:outdir]
67
80
  puts "Prodigal done."
68
81
  if @with_refence_genome
69
- @refgenome.write_cds_to_file @outdir
70
- @refgenome.write_rna_to_file @outdir
71
- puts "Successfully loaded #{@refgenome.gbk.definition}"
82
+ @ref_genome.write_cds_to_file @options[:outdir]
83
+ @ref_genome.write_rna_to_file @options[:outdir]
84
+ puts "Successfully loaded #{@ref_genome.gbk.definition}"
72
85
  end
73
86
  end # end of method
74
87
 
88
+
89
+ def run_reference_synteny_prot
90
+
91
+ ref_synteny_prot = SequenceSynteny.new(@query_fasta.annotation_files[:proteins], @ref_genome.cds_file,
92
+ "Prot-Ref", @options[:pidentity], @options[:pcoverage], "prot")
93
+
94
+ ref_synteny_prot.run_blat @root, @options[:outdir]
95
+
96
+ ref_synteny_prot.extract_hits :refgenome
97
+
98
+ fdebug = File.open("debug-synteny.tsv", "w")
99
+
100
+ ref_synteny_prot.query_sequences.each do |k,v|
101
+ if v.has_key? :homology
102
+ @contig_annotations_cds[v[:contig]] = [] if ! @contig_annotations_cds.has_key? v[:contig]
103
+ @contig_annotations_cds[v[:contig]] << k
104
+ fdebug.write("#{v[:contig]}\t#{k}\t#{v[:homology][:pId]}\t#{v[:homology][:cov_query]}\t#{v[:homology][:cov_subject]}\t#{v[:homology][:hits].join(',')}\t#{@ref_genome.coding_seq[v[:homology][:hits][0]][:locustag]}\t#{@ref_genome.coding_seq[v[:homology][:hits][0]][:product]}\t#{v[:homology][:assert_cutoff].join(',')}\n")
105
+ else
106
+ fdebug.write("#{v[:contig]} #{k} NONE...\n")
107
+ end
108
+ end
109
+ fdebug.close
110
+
111
+ ref_synteny_prot
112
+
113
+ end
114
+
115
+
75
116
  # run_alignment of reference genome proteins and the query
76
117
  def run_annotation
77
118
 
78
119
  # process reference genome synteny
79
120
  if @with_refence_genome # Annotation with the Reference Genome
80
121
 
81
- # run CDS annotation
82
- puts "\nRunning BLAT alignment with Reference Genome CDS.."
83
- @prot_synteny = SyntenyManip.new(@fasta.prodigal_files[:proteins], @refgenome.cds_file, "Prot-Ref", @pidentity, "prot")
84
- @prot_synteny.run_blat @root, @outdir
85
- @prot_synteny.extract_hits_prodigal :refgenome
122
+ @prot_synteny_refgenome = run_reference_synteny_prot
86
123
 
87
- @fasta.prodigal_files[:contigs].each_with_index do |contig, contig_index|
124
+ # iterate over each contig
125
+ # discard short contig
126
+ # cumulate statistics of homolog CDS
127
+ @query_fasta.annotation_files[:contigs].each_with_index do |contig, contig_index|
88
128
 
89
129
  # Skip short contigs
90
- if @fasta.prodigal_files[:contigs_length][contig_index] < @minlength
130
+ if @query_fasta.annotation_files[:contigs_length][contig_index] < @minlength
91
131
  @annotation_stats[:short_contigs] << contig
92
132
  next
93
133
  end
94
134
 
95
- contig_prots = @fasta.prodigal_files[:prot_ids_by_contig][contig]
96
- # contig_to_annotate = contig_prots[0].split("_")[0..-2].join("_")
97
- # contig_prot_annotations = @prot_synteny.get_annotation_for_contig contig_prots, @refgenome.coding_seq
98
- @contig_annotations[contig] = @prot_synteny.get_annotation_for_contig contig, contig_prots, @refgenome.coding_seq
99
-
100
- remaining_cds = cumulate_annotation_stats_reference contig, @contig_annotations[contig]
135
+ remaining_cds = cumulate_annotation_stats_reference contig
101
136
 
102
137
  if ! remaining_cds.empty?
103
138
  @contig_foreign_cds[contig] = remaining_cds
@@ -113,18 +148,19 @@ class BacterialAnnotator
113
148
 
114
149
  # run RNA annotation
115
150
  puts "\nRunning BLAT alignment with Reference Genome RNA.."
116
- @rna_synteny = SyntenyManip.new(@fasta.fasta_file, @refgenome.rna_file, "RNA-Ref", @pidentity, "dna")
117
- @rna_synteny.run_blat @root, @outdir
151
+ @rna_synteny = SequenceSynteny.new(@query_fasta.fasta_file, @ref_genome.rna_file,
152
+ "RNA-Ref", @options[:pidentity], @options[:pcoverage], "dna")
153
+ @rna_synteny.run_blat @root, @options[:outdir]
118
154
  @rna_synteny.extract_hits_dna :rna
119
155
  @contig_annotations_rna = {}
120
- @fasta.prodigal_files[:contigs].each_with_index do |contig, contig_index|
156
+ @query_fasta.annotation_files[:contigs].each_with_index do |contig, contig_index|
121
157
  @contig_annotations_rna[contig] = @rna_synteny.get_annotation_for_contig contig
122
158
  end
123
159
 
124
160
  else # no reference genome
125
161
 
126
162
  # no reference genome .. will process all the CDS
127
- foreign_cds_file = @fasta.prodigal_files[:proteins]
163
+ foreign_cds_file = @query_fasta.annotation_files[:proteins]
128
164
 
129
165
  end
130
166
 
@@ -135,7 +171,7 @@ class BacterialAnnotator
135
171
  parse_genbank_files
136
172
 
137
173
  puts "\nPrinting Statistics.."
138
- print_stats "#{@outdir}/Annotation-Stats.txt"
174
+ print_stats "#{@options[:outdir]}"
139
175
 
140
176
 
141
177
  end # end of method
@@ -150,84 +186,48 @@ class BacterialAnnotator
150
186
  db_file = @options[:external_db]
151
187
  ref_cds = extract_externaldb_prot_info db_file
152
188
 
153
- externaldb_synteny = SyntenyManip.new(remaining_cds_file, db_file, "Prot-ExternalDB", @pidentity)
189
+ @externaldb_synteny = SequenceSynteny.new(remaining_cds_file, db_file,
190
+ "Prot-ExternalDB", @options[:pidentity],
191
+ @options[:pcoverage], "prot")
192
+
154
193
  puts "\nRunning BLAT alignment with External Database.."
155
- externaldb_synteny.run_blat @root, @outdir
156
- externaldb_synteny.extract_hits_prodigal :externaldb
194
+ @externaldb_synteny.run_blat @root, @options[:outdir]
195
+ @externaldb_synteny.extract_hits :externaldb
196
+
197
+ @externaldb_synteny.query_sequences.each do |k, v|
157
198
 
158
- externaldb_synteny.aln_hits.each do |k,v|
159
199
  contig_of_protein = k.split("_")[0..-2].join("_")
160
200
 
161
- if ! @contig_annotations.has_key? contig_of_protein
162
- @contig_annotations[contig_of_protein] = {}
201
+ if ! @contig_annotations_externaldb.has_key? contig_of_protein
202
+ @contig_annotations_externaldb[contig_of_protein] = {}
163
203
  end
164
204
 
165
- hit_gi = v[:hits][0]
205
+ next if ! v.has_key? :homology
206
+
207
+ @contig_annotations_cds[contig_of_protein] << k
208
+
209
+ hit_gi = v[:homology][:hits][0]
166
210
 
167
211
  # note = "Protein homology (#{v[:pId]}% identity) with gi:#{hit_gi}"
168
- note = "Protein homology (#{v[:pId]}% identity) with #{hit_gi}"
212
+ cov_query = (v[:homology][:cov_query]*100).round(2)
213
+ cov_subject = (v[:homology][:cov_subject]*100).round(2)
214
+ note = "Protein homology (AA identity: #{v[:homology][:pId]}%; coverage (q,s): #{cov_query}%,#{cov_subject}%) with #{ref_cds[hit_gi][:prot_id]}"
215
+ inference = "similar to AA sequence:#{ref_cds[hit_gi][:db_source]}:#{ref_cds[hit_gi][:prot_id]}"
169
216
 
170
217
  if ref_cds[hit_gi][:org] != ""
171
218
  note += " from #{ref_cds[hit_gi][:org]}"
172
219
  end
173
- @contig_annotations[contig_of_protein][k] = {product: ref_cds[hit_gi][:product],
174
- feature: "cds",
175
- gene: nil,
176
- locustag: nil,
177
- note: note}
178
-
179
- end
180
-
181
-
182
- elsif @options.has_key? :remote_db # from a remote DB
183
-
184
- # do it by chunk to avoid NCBI CPU exceeding limit
185
- cds_files = split_remaining_cds_file remaining_cds_file
186
- @remotedb = @options[:remote_db]
187
220
 
188
- puts "\n# NCBI Blast on #{@remotedb}"
189
-
190
- cds_files.each do |cds_file|
191
-
192
- # remotedb = @options[:remote_db]
193
- valid = true
194
- begin
195
- # puts "\nNCBI blast on #{@remotedb} for #{cds_file}"
196
- ncbiblast = RemoteNCBI.new(@remotedb,
197
- cds_file,
198
- "#{cds_file}.#{@remotedb}.xml",
199
- @pidentity)
200
- rescue
201
- valid = false
202
- end
203
-
204
- # ncbi blast didn't worked out
205
- if !valid
206
- puts "Problem NCBI blast for foreign proteins"
207
- else
208
- ncbiblast.extract_blast_results
209
- if ! ncbiblast.aln_hits
210
- puts "Didn't produce the annotation for #{cds_file}"
211
- next
212
- end
213
- ncbiblast.aln_hits.each do |k,v|
214
- contig_of_protein = k.split("_")[0..-2].join("_")
215
- if ! @contig_annotations.has_key? contig_of_protein
216
- @contig_annotations[contig_of_protein] = {}
217
- end
218
- # note = "Protein homology (#{v[:pId]}% identity) with gi:#{v[:hits][0][:gi]}"
219
- note = "Protein homology (#{v[:pId]}% identity) with gi:#{v[:hits][0][:accession]}"
220
- if v[:hits][0][:org] != ""
221
- note += " from #{v[:hits][0][:org]}"
222
- end
223
- @contig_annotations[contig_of_protein][k] = {product: v[:hits][0][:product],
224
- feature: "cds",
225
- gene: nil,
226
- locustag: nil,
227
- note: note}
228
- end
221
+ @contig_annotations_externaldb[contig_of_protein][v[:homology][:hits][0]] = {
222
+ product: ref_cds[hit_gi][:product],
223
+ feature: "cds",
224
+ gene: nil,
225
+ prot_id: ref_cds[hit_gi][:prot_id],
226
+ locustag: nil,
227
+ note: note,
228
+ inference: inference
229
+ }
229
230
 
230
- end
231
231
 
232
232
  end
233
233
 
@@ -240,12 +240,24 @@ class BacterialAnnotator
240
240
  def parse_genbank_files
241
241
 
242
242
  puts "\nParsing annotation into genbank files.."
243
- @contig_annotations.each do |contig, contig_prot_annotations|
244
- gbk_path = @fasta.prodigal_files[:gbk_path]
245
- gbk_to_annotate = GenbankManip.new("#{gbk_path}/#{contig}.gbk", "#{gbk_path}")
246
- reference_locus = nil
247
- reference_locus = @refgenome.gbk.locus if @with_refence_genome
248
- gbk_to_annotate.add_annotations contig_prot_annotations, "inplace", reference_locus
243
+ @contig_annotations_cds.each do |contig, contig_prots|
244
+
245
+ gbk_path = @query_fasta.annotation_files[:gbk_path]
246
+ gbk_to_annotate = SequenceAnnotation.new("#{gbk_path}/#{contig}.gbk", "#{gbk_path}")
247
+
248
+ if @with_external_db
249
+ gbk_to_annotate.add_annotation_ref_synteny_prot(
250
+ (@prot_synteny_refgenome.query_sequences.merge(@externaldb_synteny.query_sequences)),
251
+ @contig_annotations_externaldb[contig].merge(@ref_genome.coding_seq),
252
+ @options[:refgenome].gsub(/.gb.*/,"")
253
+ )
254
+ else
255
+ gbk_to_annotate.add_annotation_ref_synteny_prot(
256
+ @prot_synteny_refgenome.query_sequences,
257
+ @ref_genome.coding_seq,
258
+ @options[:refgenome].gsub(/.gb.*/,"")
259
+ )
260
+ end
249
261
 
250
262
  if @contig_annotations_rna.has_key? contig
251
263
  # puts "RNA annotation"
@@ -261,34 +273,65 @@ class BacterialAnnotator
261
273
 
262
274
  # cumulate the stats for the synteny
263
275
  # return : unannotated cds array
264
- def cumulate_annotation_stats_reference contig, contig_prots_ann
276
+ # def cumulate_annotation_stats_reference contig, contig_prots_ann
277
+ def cumulate_annotation_stats_reference contig
265
278
 
266
279
  remaining_cds = []
267
- contig_prots = @fasta.prodigal_files[:prot_ids_by_contig][contig]
280
+ contig_prots = @query_fasta.annotation_files[:prot_ids_by_contig][contig]
268
281
 
269
282
  @annotation_stats[:total_cds] += contig_prots.length if contig_prots
270
- contig_prots_ann.each do |k,v|
271
- if v != nil
272
- @annotation_stats[:annotated_cds] += 1
283
+
284
+ # count contig as foreign if no cds homolog in reference genome
285
+ if @contig_annotations_cds.has_key? contig and
286
+ @contig_annotations_cds[contig].length > 0
287
+ @annotation_stats[:synteny_contigs] << contig
288
+ else
289
+ @annotation_stats[:foreign_contigs] << contig
290
+ return
291
+ end
292
+
293
+ contig_prots.each do |prot|
294
+
295
+ if @contig_annotations_cds[contig].include? prot
296
+
297
+ if @prot_synteny_refgenome.query_sequences[prot].has_key? :homology and
298
+ @prot_synteny_refgenome.query_sequences[prot][:homology][:hits].length > 0
299
+
300
+ assert_sum = @prot_synteny_refgenome.query_sequences[prot][:homology][:assert_cutoff].inject(:+)
301
+ if assert_sum > 2
302
+ @annotation_stats[:annotated_cds] += 1
303
+ else
304
+ flag = "#{prot}"
305
+ flag += "\t#{@prot_synteny_refgenome.query_sequences[prot][:homology][:assert_cutoff].join(',')}"
306
+ flag += "\t#{@prot_synteny_refgenome.query_sequences[prot][:homology][:pId]}"
307
+ flag += "\t#{(@prot_synteny_refgenome.query_sequences[prot][:homology][:cov_query]*100).round(2)}"
308
+ flag += "\t#{(@prot_synteny_refgenome.query_sequences[prot][:homology][:cov_subject]*100).round(2)}"
309
+ @annotation_stats[:flagged_cds] << flag
310
+ end
311
+
312
+ else
313
+
314
+ puts "No " + prot
315
+
316
+ end
317
+
273
318
  else
274
- remaining_cds << k
319
+
320
+ remaining_cds << prot
321
+
275
322
  end
276
- end
277
323
 
278
- # Annotated Contigs
279
- if contig_prots_ann.keys.length < 1
280
- @annotation_stats[:foreign_contigs] << contig
281
- else
282
- @annotation_stats[:synteny_contigs] << contig
283
324
  end
284
325
 
285
326
  remaining_cds
327
+
286
328
  end # end of method
287
329
 
288
330
 
289
331
  # print statistics to file
290
- def print_stats file
332
+ def print_stats file_dir
291
333
 
334
+ file = file_dir + "/Annotation-Stats.txt"
292
335
  total_nb_contigs = @annotation_stats[:foreign_contigs].length +
293
336
  @annotation_stats[:synteny_contigs].length +
294
337
  @annotation_stats[:short_contigs].length
@@ -307,25 +350,34 @@ class BacterialAnnotator
307
350
 
308
351
  fopen.write("#CDS annotations based on reference genomes\n")
309
352
  fopen.write("Annotated CDS :\t\t\t" + @annotation_stats[:annotated_cds].to_s + "\n")
353
+ fopen.write("Flagged CDS :\t\t\t" + @annotation_stats[:flagged_cds].length.to_s + "\n")
310
354
  fopen.write("Total CDS :\t\t\t" + @annotation_stats[:total_cds].to_s + "\n")
311
355
  fopen.write("% CDS annotated :\t\t" + (p_cds_annotated*100).round(2).to_s + "\n")
312
356
  fopen.write("\n")
313
357
 
314
358
  end
315
359
 
360
+ file_flagged_cds = file_dir + "/Prot-flagged.tsv"
361
+ File.open(file_flagged_cds, "w") do |fopen|
362
+ fopen.write("CDS locus\tAssertion-CutOff\tAA Identity\tCovQuery(%)\tCovSubject(%)\n")
363
+ @annotation_stats[:flagged_cds].each do |fcds|
364
+ fopen.write("#{fcds}\n")
365
+ end
366
+ end
367
+
316
368
  end # end of method
317
369
 
318
370
 
319
371
  # dump cds to file for blast
320
372
  def dump_cds
321
373
 
322
- cds_outfile = File.open("#{@outdir}/Proteins-foreign.fa","w")
374
+ cds_outfile = File.open("#{@options[:outdir]}/Proteins-foreign.fa","w")
323
375
  foreign_cds = []
324
376
  @contig_foreign_cds.each_value do |v|
325
377
  foreign_cds.push(*v)
326
378
  end
327
379
  inprot = false
328
- File.open(@fasta.prodigal_files[:proteins]) do |fprot|
380
+ File.open(@query_fasta.annotation_files[:proteins]) do |fprot|
329
381
  while l=fprot.gets
330
382
  if l[0] == ">"
331
383
  inprot = false
@@ -340,7 +392,7 @@ class BacterialAnnotator
340
392
  end
341
393
  end
342
394
  cds_outfile.close
343
- return "#{@outdir}/Proteins-foreign.fa"
395
+ return "#{@options[:outdir]}/Proteins-foreign.fa"
344
396
 
345
397
  end # end of method
346
398
 
@@ -363,25 +415,46 @@ class BacterialAnnotator
363
415
  if l[0] == ">"
364
416
 
365
417
  lA = l.chomp.split("|")
366
- key_gi = lA[1]
418
+ #key_gi = lA[1]
419
+ key_gi = l.split(" ")[0][1..-1]
367
420
  product_long = lA[-1]
368
421
 
369
422
  organism = ""
370
423
  product = ""
424
+ db_source = "[DBSource]"
371
425
 
372
426
  if product_long.include? " [" and product_long.include? "]" # NCBI
373
427
  organism = product_long[/\[.*?\]/]
374
428
  product = product_long.split(" [")[0].strip
375
- elsif product_long.include? "OS="
429
+ elsif product_long.include? "OS=" # Swissprot / TrEMBL
376
430
  product_tmp = product.split("OS=")
377
431
  organism = product_tmp[1].split(/[A-Z][A-Z]=/)[0].strip
378
432
  product = product_tmp[0].strip
379
- elsif product_long.include? "[A-Z][A-Z]="
433
+ elsif product_long.include? "[A-Z][A-Z]=" # NCBI
380
434
  product = product_long.split(/[A-Z][A-Z]=/)[0].strip
435
+ else
436
+ product = product_long
381
437
  end
438
+
382
439
  org = organism.gsub("[","").gsub("]","")
440
+
383
441
  product.lstrip!
384
- ref_cds[key_gi] = {product: product, org: org}
442
+ prot_id = nil
443
+
444
+ if key_gi.count("|") == 4
445
+ if lA[2] == "ref"
446
+ db_source = "RefSeq"
447
+ end
448
+ prot_id = lA[3]
449
+ elsif key_gi.count("|") == 2
450
+ if lA[0].include? == "sp" or
451
+ lA[0].include? == "tr"
452
+ db_source = "UniProtKB"
453
+ end
454
+ prot_id = lA[1]
455
+ end
456
+
457
+ ref_cds[key_gi] = {product: product, org: org, prot_id: prot_id, db_source: db_source}
385
458
 
386
459
  end
387
460
 
@@ -398,7 +471,7 @@ class BacterialAnnotator
398
471
  def split_remaining_cds_file file
399
472
 
400
473
  cds_files = []
401
- outdir = "#{@outdir}/Protein-foreign.split"
474
+ outdir = "#{@options[:outdir]}/Protein-foreign.split"
402
475
 
403
476
  Dir.mkdir(outdir) if ! Dir.exists? outdir
404
477
 
@@ -429,22 +502,20 @@ class BacterialAnnotator
429
502
 
430
503
  end # end of method
431
504
 
432
- # will reference CDS synteny to file
505
+ # will dump reference CDS synteny to file
433
506
  def dump_ref_synteny_to_file
434
507
 
435
508
  # Iterate over each Ref protein and print syntheny
436
- synteny_file = File.open("#{@outdir}/Prot-Synteny.tsv","w")
509
+ synteny_file = File.open("#{@options[:outdir]}/Prot-Synteny.tsv","w")
437
510
  synteny_file.write("RefLocusTag\tRefProtID\tRefLength\tRefCoverage\tIdentity\tQueryGene\tQueryLength\tQueryCoverage\n")
438
511
  ref_annotated = {}
439
- @contig_annotations.each do |contig,prot_annotations|
512
+ @contig_annotations.each do |contig, prot_annotations|
440
513
  prot_annotations.each do |key,prot|
441
- # p key
442
- # p prot
443
514
  ref_annotated[prot[:protId]] = {key: key, length: prot[:length], pId: prot[:pId]} if prot != nil
444
515
  end
445
516
  end
446
517
 
447
- @refgenome.coding_seq.each do |ref_k, ref_v|
518
+ @ref_genome.coding_seq.each do |ref_k, ref_v|
448
519
 
449
520
  gene = ""
450
521
  coverage_ref = ""
@@ -454,7 +525,7 @@ class BacterialAnnotator
454
525
  if ref_annotated[ref_v[:protId]] != nil
455
526
  gene = ref_annotated[ref_v[:protId]][:key]
456
527
  coverage_ref = (ref_annotated[ref_v[:protId]][:length].to_f/ref_v[:bioseq].seq.length.to_f).round(2)
457
- query_length = @fasta.prodigal_files[:prot_ids_length][gene]
528
+ query_length = @query_fasta.annotation_files[:prot_ids_length][gene]
458
529
  coverage_query = (ref_annotated[ref_v[:protId]][:length].to_f/query_length.to_f).round(2)
459
530
  pId = ref_annotated[ref_v[:protId]][:pId]
460
531
  end
@@ -377,6 +377,7 @@ class BacterialComparator
377
377
  cmd = system("#{@root}/raxml.linux -T 3 -f b -z #{tree_dir}/RAxML_result.BS -t #{tree_dir}/RAxML_bestTree.PepTree -m PROTGAMMAAUTO -n PEP_BS_TREE -w #{tree_dir}")
378
378
  cmd = system("ln -s #{tree_dir}/RAxML_bipartitionsBranchLabels.PEP_BS_TREE #{tree_dir}/../")
379
379
  Dir.chdir(ori_dir)
380
+
380
381
  end
381
382
 
382
383
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bacterial-annotator
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maxime Deraspe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-02-14 00:00:00.000000000 Z
11
+ date: 2017-05-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bio
@@ -88,10 +88,9 @@ files:
88
88
  - bin/ba_raxml
89
89
  - bin/bacterial-annotator
90
90
  - lib/bacterial-annotator.rb
91
- - lib/bacterial-annotator/fasta-manip.rb
92
- - lib/bacterial-annotator/genbank-manip.rb
93
- - lib/bacterial-annotator/remote-ncbi.rb
94
- - lib/bacterial-annotator/synteny-manip.rb
91
+ - lib/bacterial-annotator/sequence-annotation.rb
92
+ - lib/bacterial-annotator/sequence-fasta.rb
93
+ - lib/bacterial-annotator/sequence-synteny.rb
95
94
  - lib/bacterial-comparator.rb
96
95
  homepage: http://rubygems.org/gems/bacterial-annotator
97
96
  licenses: