bacterial-annotator 0.4.1 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -9,95 +9,130 @@
9
9
  require 'bio'
10
10
  require 'fileutils'
11
11
 
12
- require 'bacterial-annotator/genbank-manip'
13
- require 'bacterial-annotator/fasta-manip'
14
- require 'bacterial-annotator/synteny-manip'
15
- require 'bacterial-annotator/remote-ncbi'
12
+ require 'bacterial-annotator/sequence-fasta'
13
+ require 'bacterial-annotator/sequence-annotation'
14
+ require 'bacterial-annotator/sequence-synteny'
15
+
16
16
 
17
17
  class BacterialAnnotator
18
18
 
19
19
  # Initialize BacterialAnnotator
20
- # options[:input], options[:refgenome], ROOT, options[:outdir], options)
20
+ # options, ROOT (path)
21
21
  def initialize options, root
22
22
 
23
23
  @root = root
24
24
  @options = options
25
- @outdir = @options[:outdir]
26
25
 
27
26
  @minlength = @options[:minlength].to_i
28
- @pidentity = @options[:pidentity].to_f
29
- @pidentity = @pidentity * 100 if @pidentity < 1.00
27
+ @options[:minlength] = @options[:minlength].to_i
28
+ @options[:pidentity] = @options[:pidentity].to_f
29
+ @options[:pidentispacemacs-lightty] = @options[:pidentity] * 100 if @options[:pidentity] <= 1.00
30
+ @options[:pcoverage] = @options[:pcoverage].to_f
31
+ @options[:pcoverage] = @options[:pcoverage] / 100 if @options[:pcoverage] > 1.00
30
32
 
31
- if File.exists? (@outdir)
33
+ if File.exists? (@options[:outdir])
32
34
  if ! options.has_key? :force
33
35
  abort "Output directory already exist ! Choose another one or use -f to overwrite"
34
36
  else
35
- puts "Overwriting output directory #{@outdir}"
36
- FileUtils.remove_dir(@outdir, :force=>true)
37
+ puts "Overwriting output directory #{@options[:outdir]}"
38
+ FileUtils.remove_dir(@options[:outdir], :force=>true)
37
39
  end
38
40
  end
39
- Dir.mkdir(@outdir)
41
+ Dir.mkdir(@options[:outdir])
40
42
 
41
- @fasta = FastaManip.new(@options[:input], @options[:meta])
43
+ @query_fasta = SequenceFasta.new(@options[:input], @options[:meta])
42
44
 
43
45
  @with_refence_genome = false
44
46
  if @options.has_key? :refgenome
45
47
  @with_refence_genome = true
46
- @refgenome = GenbankManip.new(@options[:refgenome], @outdir)
48
+ @ref_genome = SequenceAnnotation.new(@options[:refgenome], @options[:outdir])
47
49
  end
48
50
 
51
+ @with_external_db = false
52
+ @with_external_db = true if @options.has_key? :external_db
53
+
49
54
  @prot_synteny = nil
50
- @annotation_stats = {by_contigs: {},
51
- annotated_cds: 0,
52
- total_cds: 0,
53
- foreign_contigs: [],
54
- synteny_contigs: [],
55
- short_contigs: []}
55
+ @annotation_stats = {
56
+ by_contigs: {},
57
+ annotated_cds: 0,
58
+ flagged_cds: [],
59
+ total_cds: 0,
60
+ foreign_contigs: [],
61
+ synteny_contigs: [],
62
+ short_contigs: []
63
+ }
56
64
 
57
65
  @contig_foreign_cds = {}
66
+
58
67
  @contig_annotations = {}
59
68
 
69
+ @contig_annotations_externaldb = {}
70
+
71
+ @contig_annotations_cds = {}
72
+
60
73
  end # end of method
61
74
 
62
75
  # Prepare files for the annotation
63
76
  # Will run prodigal on the query and prepare reference genome files
64
77
  def prepare_files_for_annotation
65
78
  puts "\nRunning Prodigal on your genome.."
66
- @fasta.run_prodigal @root, @outdir
79
+ @query_fasta.run_prodigal @root, @options[:outdir]
67
80
  puts "Prodigal done."
68
81
  if @with_refence_genome
69
- @refgenome.write_cds_to_file @outdir
70
- @refgenome.write_rna_to_file @outdir
71
- puts "Successfully loaded #{@refgenome.gbk.definition}"
82
+ @ref_genome.write_cds_to_file @options[:outdir]
83
+ @ref_genome.write_rna_to_file @options[:outdir]
84
+ puts "Successfully loaded #{@ref_genome.gbk.definition}"
72
85
  end
73
86
  end # end of method
74
87
 
88
+
89
+ def run_reference_synteny_prot
90
+
91
+ ref_synteny_prot = SequenceSynteny.new(@query_fasta.annotation_files[:proteins], @ref_genome.cds_file,
92
+ "Prot-Ref", @options[:pidentity], @options[:pcoverage], "prot")
93
+
94
+ ref_synteny_prot.run_blat @root, @options[:outdir]
95
+
96
+ ref_synteny_prot.extract_hits :refgenome
97
+
98
+ fdebug = File.open("debug-synteny.tsv", "w")
99
+
100
+ ref_synteny_prot.query_sequences.each do |k,v|
101
+ if v.has_key? :homology
102
+ @contig_annotations_cds[v[:contig]] = [] if ! @contig_annotations_cds.has_key? v[:contig]
103
+ @contig_annotations_cds[v[:contig]] << k
104
+ fdebug.write("#{v[:contig]}\t#{k}\t#{v[:homology][:pId]}\t#{v[:homology][:cov_query]}\t#{v[:homology][:cov_subject]}\t#{v[:homology][:hits].join(',')}\t#{@ref_genome.coding_seq[v[:homology][:hits][0]][:locustag]}\t#{@ref_genome.coding_seq[v[:homology][:hits][0]][:product]}\t#{v[:homology][:assert_cutoff].join(',')}\n")
105
+ else
106
+ fdebug.write("#{v[:contig]} #{k} NONE...\n")
107
+ end
108
+ end
109
+ fdebug.close
110
+
111
+ ref_synteny_prot
112
+
113
+ end
114
+
115
+
75
116
  # run_alignment of reference genome proteins and the query
76
117
  def run_annotation
77
118
 
78
119
  # process reference genome synteny
79
120
  if @with_refence_genome # Annotation with the Reference Genome
80
121
 
81
- # run CDS annotation
82
- puts "\nRunning BLAT alignment with Reference Genome CDS.."
83
- @prot_synteny = SyntenyManip.new(@fasta.prodigal_files[:proteins], @refgenome.cds_file, "Prot-Ref", @pidentity, "prot")
84
- @prot_synteny.run_blat @root, @outdir
85
- @prot_synteny.extract_hits_prodigal :refgenome
122
+ @prot_synteny_refgenome = run_reference_synteny_prot
86
123
 
87
- @fasta.prodigal_files[:contigs].each_with_index do |contig, contig_index|
124
+ # iterate over each contig
125
+ # discard short contig
126
+ # cumulate statistics of homolog CDS
127
+ @query_fasta.annotation_files[:contigs].each_with_index do |contig, contig_index|
88
128
 
89
129
  # Skip short contigs
90
- if @fasta.prodigal_files[:contigs_length][contig_index] < @minlength
130
+ if @query_fasta.annotation_files[:contigs_length][contig_index] < @minlength
91
131
  @annotation_stats[:short_contigs] << contig
92
132
  next
93
133
  end
94
134
 
95
- contig_prots = @fasta.prodigal_files[:prot_ids_by_contig][contig]
96
- # contig_to_annotate = contig_prots[0].split("_")[0..-2].join("_")
97
- # contig_prot_annotations = @prot_synteny.get_annotation_for_contig contig_prots, @refgenome.coding_seq
98
- @contig_annotations[contig] = @prot_synteny.get_annotation_for_contig contig, contig_prots, @refgenome.coding_seq
99
-
100
- remaining_cds = cumulate_annotation_stats_reference contig, @contig_annotations[contig]
135
+ remaining_cds = cumulate_annotation_stats_reference contig
101
136
 
102
137
  if ! remaining_cds.empty?
103
138
  @contig_foreign_cds[contig] = remaining_cds
@@ -113,18 +148,19 @@ class BacterialAnnotator
113
148
 
114
149
  # run RNA annotation
115
150
  puts "\nRunning BLAT alignment with Reference Genome RNA.."
116
- @rna_synteny = SyntenyManip.new(@fasta.fasta_file, @refgenome.rna_file, "RNA-Ref", @pidentity, "dna")
117
- @rna_synteny.run_blat @root, @outdir
151
+ @rna_synteny = SequenceSynteny.new(@query_fasta.fasta_file, @ref_genome.rna_file,
152
+ "RNA-Ref", @options[:pidentity], @options[:pcoverage], "dna")
153
+ @rna_synteny.run_blat @root, @options[:outdir]
118
154
  @rna_synteny.extract_hits_dna :rna
119
155
  @contig_annotations_rna = {}
120
- @fasta.prodigal_files[:contigs].each_with_index do |contig, contig_index|
156
+ @query_fasta.annotation_files[:contigs].each_with_index do |contig, contig_index|
121
157
  @contig_annotations_rna[contig] = @rna_synteny.get_annotation_for_contig contig
122
158
  end
123
159
 
124
160
  else # no reference genome
125
161
 
126
162
  # no reference genome .. will process all the CDS
127
- foreign_cds_file = @fasta.prodigal_files[:proteins]
163
+ foreign_cds_file = @query_fasta.annotation_files[:proteins]
128
164
 
129
165
  end
130
166
 
@@ -135,7 +171,7 @@ class BacterialAnnotator
135
171
  parse_genbank_files
136
172
 
137
173
  puts "\nPrinting Statistics.."
138
- print_stats "#{@outdir}/Annotation-Stats.txt"
174
+ print_stats "#{@options[:outdir]}"
139
175
 
140
176
 
141
177
  end # end of method
@@ -150,84 +186,48 @@ class BacterialAnnotator
150
186
  db_file = @options[:external_db]
151
187
  ref_cds = extract_externaldb_prot_info db_file
152
188
 
153
- externaldb_synteny = SyntenyManip.new(remaining_cds_file, db_file, "Prot-ExternalDB", @pidentity)
189
+ @externaldb_synteny = SequenceSynteny.new(remaining_cds_file, db_file,
190
+ "Prot-ExternalDB", @options[:pidentity],
191
+ @options[:pcoverage], "prot")
192
+
154
193
  puts "\nRunning BLAT alignment with External Database.."
155
- externaldb_synteny.run_blat @root, @outdir
156
- externaldb_synteny.extract_hits_prodigal :externaldb
194
+ @externaldb_synteny.run_blat @root, @options[:outdir]
195
+ @externaldb_synteny.extract_hits :externaldb
196
+
197
+ @externaldb_synteny.query_sequences.each do |k, v|
157
198
 
158
- externaldb_synteny.aln_hits.each do |k,v|
159
199
  contig_of_protein = k.split("_")[0..-2].join("_")
160
200
 
161
- if ! @contig_annotations.has_key? contig_of_protein
162
- @contig_annotations[contig_of_protein] = {}
201
+ if ! @contig_annotations_externaldb.has_key? contig_of_protein
202
+ @contig_annotations_externaldb[contig_of_protein] = {}
163
203
  end
164
204
 
165
- hit_gi = v[:hits][0]
205
+ next if ! v.has_key? :homology
206
+
207
+ @contig_annotations_cds[contig_of_protein] << k
208
+
209
+ hit_gi = v[:homology][:hits][0]
166
210
 
167
211
  # note = "Protein homology (#{v[:pId]}% identity) with gi:#{hit_gi}"
168
- note = "Protein homology (#{v[:pId]}% identity) with #{hit_gi}"
212
+ cov_query = (v[:homology][:cov_query]*100).round(2)
213
+ cov_subject = (v[:homology][:cov_subject]*100).round(2)
214
+ note = "Protein homology (AA identity: #{v[:homology][:pId]}%; coverage (q,s): #{cov_query}%,#{cov_subject}%) with #{ref_cds[hit_gi][:prot_id]}"
215
+ inference = "similar to AA sequence:#{ref_cds[hit_gi][:db_source]}:#{ref_cds[hit_gi][:prot_id]}"
169
216
 
170
217
  if ref_cds[hit_gi][:org] != ""
171
218
  note += " from #{ref_cds[hit_gi][:org]}"
172
219
  end
173
- @contig_annotations[contig_of_protein][k] = {product: ref_cds[hit_gi][:product],
174
- feature: "cds",
175
- gene: nil,
176
- locustag: nil,
177
- note: note}
178
-
179
- end
180
-
181
-
182
- elsif @options.has_key? :remote_db # from a remote DB
183
-
184
- # do it by chunk to avoid NCBI CPU exceeding limit
185
- cds_files = split_remaining_cds_file remaining_cds_file
186
- @remotedb = @options[:remote_db]
187
220
 
188
- puts "\n# NCBI Blast on #{@remotedb}"
189
-
190
- cds_files.each do |cds_file|
191
-
192
- # remotedb = @options[:remote_db]
193
- valid = true
194
- begin
195
- # puts "\nNCBI blast on #{@remotedb} for #{cds_file}"
196
- ncbiblast = RemoteNCBI.new(@remotedb,
197
- cds_file,
198
- "#{cds_file}.#{@remotedb}.xml",
199
- @pidentity)
200
- rescue
201
- valid = false
202
- end
203
-
204
- # ncbi blast didn't worked out
205
- if !valid
206
- puts "Problem NCBI blast for foreign proteins"
207
- else
208
- ncbiblast.extract_blast_results
209
- if ! ncbiblast.aln_hits
210
- puts "Didn't produce the annotation for #{cds_file}"
211
- next
212
- end
213
- ncbiblast.aln_hits.each do |k,v|
214
- contig_of_protein = k.split("_")[0..-2].join("_")
215
- if ! @contig_annotations.has_key? contig_of_protein
216
- @contig_annotations[contig_of_protein] = {}
217
- end
218
- # note = "Protein homology (#{v[:pId]}% identity) with gi:#{v[:hits][0][:gi]}"
219
- note = "Protein homology (#{v[:pId]}% identity) with gi:#{v[:hits][0][:accession]}"
220
- if v[:hits][0][:org] != ""
221
- note += " from #{v[:hits][0][:org]}"
222
- end
223
- @contig_annotations[contig_of_protein][k] = {product: v[:hits][0][:product],
224
- feature: "cds",
225
- gene: nil,
226
- locustag: nil,
227
- note: note}
228
- end
221
+ @contig_annotations_externaldb[contig_of_protein][v[:homology][:hits][0]] = {
222
+ product: ref_cds[hit_gi][:product],
223
+ feature: "cds",
224
+ gene: nil,
225
+ prot_id: ref_cds[hit_gi][:prot_id],
226
+ locustag: nil,
227
+ note: note,
228
+ inference: inference
229
+ }
229
230
 
230
- end
231
231
 
232
232
  end
233
233
 
@@ -240,12 +240,24 @@ class BacterialAnnotator
240
240
  def parse_genbank_files
241
241
 
242
242
  puts "\nParsing annotation into genbank files.."
243
- @contig_annotations.each do |contig, contig_prot_annotations|
244
- gbk_path = @fasta.prodigal_files[:gbk_path]
245
- gbk_to_annotate = GenbankManip.new("#{gbk_path}/#{contig}.gbk", "#{gbk_path}")
246
- reference_locus = nil
247
- reference_locus = @refgenome.gbk.locus if @with_refence_genome
248
- gbk_to_annotate.add_annotations contig_prot_annotations, "inplace", reference_locus
243
+ @contig_annotations_cds.each do |contig, contig_prots|
244
+
245
+ gbk_path = @query_fasta.annotation_files[:gbk_path]
246
+ gbk_to_annotate = SequenceAnnotation.new("#{gbk_path}/#{contig}.gbk", "#{gbk_path}")
247
+
248
+ if @with_external_db
249
+ gbk_to_annotate.add_annotation_ref_synteny_prot(
250
+ (@prot_synteny_refgenome.query_sequences.merge(@externaldb_synteny.query_sequences)),
251
+ @contig_annotations_externaldb[contig].merge(@ref_genome.coding_seq),
252
+ @options[:refgenome].gsub(/.gb.*/,"")
253
+ )
254
+ else
255
+ gbk_to_annotate.add_annotation_ref_synteny_prot(
256
+ @prot_synteny_refgenome.query_sequences,
257
+ @ref_genome.coding_seq,
258
+ @options[:refgenome].gsub(/.gb.*/,"")
259
+ )
260
+ end
249
261
 
250
262
  if @contig_annotations_rna.has_key? contig
251
263
  # puts "RNA annotation"
@@ -261,34 +273,65 @@ class BacterialAnnotator
261
273
 
262
274
  # cumulate the stats for the synteny
263
275
  # return : unannotated cds array
264
- def cumulate_annotation_stats_reference contig, contig_prots_ann
276
+ # def cumulate_annotation_stats_reference contig, contig_prots_ann
277
+ def cumulate_annotation_stats_reference contig
265
278
 
266
279
  remaining_cds = []
267
- contig_prots = @fasta.prodigal_files[:prot_ids_by_contig][contig]
280
+ contig_prots = @query_fasta.annotation_files[:prot_ids_by_contig][contig]
268
281
 
269
282
  @annotation_stats[:total_cds] += contig_prots.length if contig_prots
270
- contig_prots_ann.each do |k,v|
271
- if v != nil
272
- @annotation_stats[:annotated_cds] += 1
283
+
284
+ # count contig as foreign if no cds homolog in reference genome
285
+ if @contig_annotations_cds.has_key? contig and
286
+ @contig_annotations_cds[contig].length > 0
287
+ @annotation_stats[:synteny_contigs] << contig
288
+ else
289
+ @annotation_stats[:foreign_contigs] << contig
290
+ return
291
+ end
292
+
293
+ contig_prots.each do |prot|
294
+
295
+ if @contig_annotations_cds[contig].include? prot
296
+
297
+ if @prot_synteny_refgenome.query_sequences[prot].has_key? :homology and
298
+ @prot_synteny_refgenome.query_sequences[prot][:homology][:hits].length > 0
299
+
300
+ assert_sum = @prot_synteny_refgenome.query_sequences[prot][:homology][:assert_cutoff].inject(:+)
301
+ if assert_sum > 2
302
+ @annotation_stats[:annotated_cds] += 1
303
+ else
304
+ flag = "#{prot}"
305
+ flag += "\t#{@prot_synteny_refgenome.query_sequences[prot][:homology][:assert_cutoff].join(',')}"
306
+ flag += "\t#{@prot_synteny_refgenome.query_sequences[prot][:homology][:pId]}"
307
+ flag += "\t#{(@prot_synteny_refgenome.query_sequences[prot][:homology][:cov_query]*100).round(2)}"
308
+ flag += "\t#{(@prot_synteny_refgenome.query_sequences[prot][:homology][:cov_subject]*100).round(2)}"
309
+ @annotation_stats[:flagged_cds] << flag
310
+ end
311
+
312
+ else
313
+
314
+ puts "No " + prot
315
+
316
+ end
317
+
273
318
  else
274
- remaining_cds << k
319
+
320
+ remaining_cds << prot
321
+
275
322
  end
276
- end
277
323
 
278
- # Annotated Contigs
279
- if contig_prots_ann.keys.length < 1
280
- @annotation_stats[:foreign_contigs] << contig
281
- else
282
- @annotation_stats[:synteny_contigs] << contig
283
324
  end
284
325
 
285
326
  remaining_cds
327
+
286
328
  end # end of method
287
329
 
288
330
 
289
331
  # print statistics to file
290
- def print_stats file
332
+ def print_stats file_dir
291
333
 
334
+ file = file_dir + "/Annotation-Stats.txt"
292
335
  total_nb_contigs = @annotation_stats[:foreign_contigs].length +
293
336
  @annotation_stats[:synteny_contigs].length +
294
337
  @annotation_stats[:short_contigs].length
@@ -307,25 +350,34 @@ class BacterialAnnotator
307
350
 
308
351
  fopen.write("#CDS annotations based on reference genomes\n")
309
352
  fopen.write("Annotated CDS :\t\t\t" + @annotation_stats[:annotated_cds].to_s + "\n")
353
+ fopen.write("Flagged CDS :\t\t\t" + @annotation_stats[:flagged_cds].length.to_s + "\n")
310
354
  fopen.write("Total CDS :\t\t\t" + @annotation_stats[:total_cds].to_s + "\n")
311
355
  fopen.write("% CDS annotated :\t\t" + (p_cds_annotated*100).round(2).to_s + "\n")
312
356
  fopen.write("\n")
313
357
 
314
358
  end
315
359
 
360
+ file_flagged_cds = file_dir + "/Prot-flagged.tsv"
361
+ File.open(file_flagged_cds, "w") do |fopen|
362
+ fopen.write("CDS locus\tAssertion-CutOff\tAA Identity\tCovQuery(%)\tCovSubject(%)\n")
363
+ @annotation_stats[:flagged_cds].each do |fcds|
364
+ fopen.write("#{fcds}\n")
365
+ end
366
+ end
367
+
316
368
  end # end of method
317
369
 
318
370
 
319
371
  # dump cds to file for blast
320
372
  def dump_cds
321
373
 
322
- cds_outfile = File.open("#{@outdir}/Proteins-foreign.fa","w")
374
+ cds_outfile = File.open("#{@options[:outdir]}/Proteins-foreign.fa","w")
323
375
  foreign_cds = []
324
376
  @contig_foreign_cds.each_value do |v|
325
377
  foreign_cds.push(*v)
326
378
  end
327
379
  inprot = false
328
- File.open(@fasta.prodigal_files[:proteins]) do |fprot|
380
+ File.open(@query_fasta.annotation_files[:proteins]) do |fprot|
329
381
  while l=fprot.gets
330
382
  if l[0] == ">"
331
383
  inprot = false
@@ -340,7 +392,7 @@ class BacterialAnnotator
340
392
  end
341
393
  end
342
394
  cds_outfile.close
343
- return "#{@outdir}/Proteins-foreign.fa"
395
+ return "#{@options[:outdir]}/Proteins-foreign.fa"
344
396
 
345
397
  end # end of method
346
398
 
@@ -363,25 +415,46 @@ class BacterialAnnotator
363
415
  if l[0] == ">"
364
416
 
365
417
  lA = l.chomp.split("|")
366
- key_gi = lA[1]
418
+ #key_gi = lA[1]
419
+ key_gi = l.split(" ")[0][1..-1]
367
420
  product_long = lA[-1]
368
421
 
369
422
  organism = ""
370
423
  product = ""
424
+ db_source = "[DBSource]"
371
425
 
372
426
  if product_long.include? " [" and product_long.include? "]" # NCBI
373
427
  organism = product_long[/\[.*?\]/]
374
428
  product = product_long.split(" [")[0].strip
375
- elsif product_long.include? "OS="
429
+ elsif product_long.include? "OS=" # Swissprot / TrEMBL
376
430
  product_tmp = product.split("OS=")
377
431
  organism = product_tmp[1].split(/[A-Z][A-Z]=/)[0].strip
378
432
  product = product_tmp[0].strip
379
- elsif product_long.include? "[A-Z][A-Z]="
433
+ elsif product_long.include? "[A-Z][A-Z]=" # NCBI
380
434
  product = product_long.split(/[A-Z][A-Z]=/)[0].strip
435
+ else
436
+ product = product_long
381
437
  end
438
+
382
439
  org = organism.gsub("[","").gsub("]","")
440
+
383
441
  product.lstrip!
384
- ref_cds[key_gi] = {product: product, org: org}
442
+ prot_id = nil
443
+
444
+ if key_gi.count("|") == 4
445
+ if lA[2] == "ref"
446
+ db_source = "RefSeq"
447
+ end
448
+ prot_id = lA[3]
449
+ elsif key_gi.count("|") == 2
450
+ if lA[0].include? == "sp" or
451
+ lA[0].include? == "tr"
452
+ db_source = "UniProtKB"
453
+ end
454
+ prot_id = lA[1]
455
+ end
456
+
457
+ ref_cds[key_gi] = {product: product, org: org, prot_id: prot_id, db_source: db_source}
385
458
 
386
459
  end
387
460
 
@@ -398,7 +471,7 @@ class BacterialAnnotator
398
471
  def split_remaining_cds_file file
399
472
 
400
473
  cds_files = []
401
- outdir = "#{@outdir}/Protein-foreign.split"
474
+ outdir = "#{@options[:outdir]}/Protein-foreign.split"
402
475
 
403
476
  Dir.mkdir(outdir) if ! Dir.exists? outdir
404
477
 
@@ -429,22 +502,20 @@ class BacterialAnnotator
429
502
 
430
503
  end # end of method
431
504
 
432
- # will reference CDS synteny to file
505
+ # will dump reference CDS synteny to file
433
506
  def dump_ref_synteny_to_file
434
507
 
435
508
  # Iterate over each Ref protein and print syntheny
436
- synteny_file = File.open("#{@outdir}/Prot-Synteny.tsv","w")
509
+ synteny_file = File.open("#{@options[:outdir]}/Prot-Synteny.tsv","w")
437
510
  synteny_file.write("RefLocusTag\tRefProtID\tRefLength\tRefCoverage\tIdentity\tQueryGene\tQueryLength\tQueryCoverage\n")
438
511
  ref_annotated = {}
439
- @contig_annotations.each do |contig,prot_annotations|
512
+ @contig_annotations.each do |contig, prot_annotations|
440
513
  prot_annotations.each do |key,prot|
441
- # p key
442
- # p prot
443
514
  ref_annotated[prot[:protId]] = {key: key, length: prot[:length], pId: prot[:pId]} if prot != nil
444
515
  end
445
516
  end
446
517
 
447
- @refgenome.coding_seq.each do |ref_k, ref_v|
518
+ @ref_genome.coding_seq.each do |ref_k, ref_v|
448
519
 
449
520
  gene = ""
450
521
  coverage_ref = ""
@@ -454,7 +525,7 @@ class BacterialAnnotator
454
525
  if ref_annotated[ref_v[:protId]] != nil
455
526
  gene = ref_annotated[ref_v[:protId]][:key]
456
527
  coverage_ref = (ref_annotated[ref_v[:protId]][:length].to_f/ref_v[:bioseq].seq.length.to_f).round(2)
457
- query_length = @fasta.prodigal_files[:prot_ids_length][gene]
528
+ query_length = @query_fasta.annotation_files[:prot_ids_length][gene]
458
529
  coverage_query = (ref_annotated[ref_v[:protId]][:length].to_f/query_length.to_f).round(2)
459
530
  pId = ref_annotated[ref_v[:protId]][:pId]
460
531
  end
@@ -377,6 +377,7 @@ class BacterialComparator
377
377
  cmd = system("#{@root}/raxml.linux -T 3 -f b -z #{tree_dir}/RAxML_result.BS -t #{tree_dir}/RAxML_bestTree.PepTree -m PROTGAMMAAUTO -n PEP_BS_TREE -w #{tree_dir}")
378
378
  cmd = system("ln -s #{tree_dir}/RAxML_bipartitionsBranchLabels.PEP_BS_TREE #{tree_dir}/../")
379
379
  Dir.chdir(ori_dir)
380
+
380
381
  end
381
382
 
382
383
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bacterial-annotator
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maxime Deraspe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-02-14 00:00:00.000000000 Z
11
+ date: 2017-05-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bio
@@ -88,10 +88,9 @@ files:
88
88
  - bin/ba_raxml
89
89
  - bin/bacterial-annotator
90
90
  - lib/bacterial-annotator.rb
91
- - lib/bacterial-annotator/fasta-manip.rb
92
- - lib/bacterial-annotator/genbank-manip.rb
93
- - lib/bacterial-annotator/remote-ncbi.rb
94
- - lib/bacterial-annotator/synteny-manip.rb
91
+ - lib/bacterial-annotator/sequence-annotation.rb
92
+ - lib/bacterial-annotator/sequence-fasta.rb
93
+ - lib/bacterial-annotator/sequence-synteny.rb
95
94
  - lib/bacterial-comparator.rb
96
95
  homepage: http://rubygems.org/gems/bacterial-annotator
97
96
  licenses: