bacterial-annotator 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 12f3850a1e1eb738530d8cdb31c002b3ec34419c
4
- data.tar.gz: d765574dd7200b59aecbfe1f7c589d1a1ea15527
3
+ metadata.gz: befd57ce78f0c186da1553c7372c3aa6faeb9d90
4
+ data.tar.gz: 5e37d6a7e579a1e9e428deb9864e4a9d5ea9f057
5
5
  SHA512:
6
- metadata.gz: 1f8f63582170adc0d2efb47dd655364c46822ba600778638226278d07d7b06ec2ae838058a7ee3ceb3b8d9b2668587711995b94c1fcdd7fdb6816f8384283bb1
7
- data.tar.gz: 3a210bef0d5c0112add5867997894ce893f20ccdd867b72fa34b4d5a55ec9821e6e878e3e6168917705bdfbdeec12c4c70cc1cdb93e570b650c6eca28b18ee61
6
+ metadata.gz: a9a9766113cef56ae7ed35749cd5fbc10d746aa82e403596dccd0c5e7946786b136a69e19c74a4cece73549b0f1a8de077a8c106fc0e2310f7b000dc6cbad962
7
+ data.tar.gz: 00a0c5cf815252fa45ffae194318f730cf69e27dd53643659fb06d2dac131a3de881cbad2595b43df6ba5014be75cef8b337e6710afb46e42b420fdd1cf9b178
data/bin/ba_prodigal CHANGED
@@ -31,7 +31,7 @@ def installProdigal
31
31
  rescue
32
32
  abort "Problem installing Prodigal, aborting"
33
33
  end
34
-
34
+
35
35
  end
36
36
 
37
37
 
@@ -46,22 +46,21 @@ annotate [OPTIONS]
46
46
  --force/-f Force to overwrite the output directory
47
47
 
48
48
  // Dataset
49
- --refgenome/-g <GBK_ID> Provide a Genbank file or a Gbk Accession ID.
50
- --guessref Will guess the best reference genome to use for the annotation.
49
+ --refgenome/-g <GBK_ID> Provide a Genbank file or a Gbk Accession ID.
50
+ --guessref Will guess the best reference genome to use for the annotation.
51
51
 
52
- --remotedb <remote_database> [nr|refseq|swissprot]
53
- Complete the annotation of remaining CDS with a remote NCBI BLAST
54
- Can be very slow, better to use an external database !
55
-
56
- --externaldb <proteins fasta_file>
57
- Complete or do the annotation of remaining CDS with this database (a protein fasta file).
52
+ --externaldb <proteins fasta_file>
53
+ Finish or do a complete annotation with this sequence database (a protein fasta file).
58
54
  Fasta headers need to look similar to NCBI or EBI fasta headers, ex.:
59
55
  >gi|385721352|gb|AFI72857.1| NDM-1 [Escherichia coli]
60
56
  >sp|C7C422|BLAN1_KLEPN Beta-lactamase NDM-1 OS=Klebsiella pneumoniae..
61
57
 
62
58
  // Other options
63
- --pidentity Minimum percentage identity to incorporate a CDS annotation [default=0.7]
64
- --minlength Minimum contig length for annotation [default=500]
59
+ --pidentity <% identity> Minimum percentage identity to incorporate a CDS annotation [default=0.7]
60
+ --pcoverage <% identity> Minimum percentage of coverage over protein alignment to incorporate a CDS annotation [default=0.7]
61
+ .. otherwise hint for a non-functional protein
62
+
63
+ --minlength <length> Minimum contig length for annotation [default=500]
65
64
 
66
65
  --meta Better for metagenome and plasmid annotations because of disparate codon usage [default=off]
67
66
 
@@ -77,6 +76,7 @@ def parseOptions_annotate
77
76
  # default options
78
77
  options[:outdir] = "BAnnotation"
79
78
  options[:pidentity] = 70
79
+ options[:pcoverage] = 70
80
80
  options[:minlength] = 500
81
81
  options[:meta] = 0
82
82
 
@@ -95,10 +95,10 @@ def parseOptions_annotate
95
95
  options[:minlength] = ARGV.shift
96
96
  when "--pidentity"
97
97
  options[:pidentity] = ARGV.shift
98
+ when "--pcoverage"
99
+ options[:pcoverage] = ARGV.shift
98
100
  when "--meta"
99
101
  options[:meta] = 1
100
- when "--remotedb"
101
- options[:remote_db] = ARGV.shift
102
102
  when "--externaldb"
103
103
  options[:external_db] = ARGV.shift
104
104
  when "--help", "-h"
@@ -204,7 +204,7 @@ if ARGV.size > 1
204
204
  system("ba_raxml")
205
205
 
206
206
  options = {}
207
- genomes_list = []
207
+ genomes_list = [] # TODO multiple input genomes
208
208
 
209
209
  if ARGV[0] == "annotate"
210
210
 
@@ -217,7 +217,6 @@ if ARGV.size > 1
217
217
 
218
218
  # Check Options
219
219
  if ! options.has_key? :refgenome and
220
- ! options.has_key? :remote_db and
221
220
  ! options.has_key? :external_db
222
221
  puts "You didn't provide a reference genome or a database for the annotation !"
223
222
  elsif ! options.has_key? :input
@@ -1,14 +1,13 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  # author: maxime déraspe
3
3
  # email: maximilien1er@gmail.com
4
- # review:
5
4
  # date: 15-02-24
6
5
  # version: 0.0.1
7
6
  # licence:
8
7
 
9
8
 
10
9
 
11
- class GenbankManip
10
+ class SequenceAnnotation
12
11
 
13
12
  attr_accessor :gbk, :coding_seq, :cds_file, :rna_file
14
13
 
@@ -67,13 +66,16 @@ class GenbankManip
67
66
  protId = locustag
68
67
  end
69
68
 
70
- @coding_seq[protId] = {protId: protId,
71
- location: loc,
72
- locustag: locustag,
73
- gene: gene[0],
74
- product: product[0],
75
- bioseq: pepBioSeq,
76
- bioseq_gene: dnaBioSeq}
69
+ @coding_seq[protId] = {
70
+ protId: protId,
71
+ location: loc,
72
+ locustag: locustag,
73
+ gene: gene[0],
74
+ product: product[0],
75
+ bioseq: pepBioSeq,
76
+ bioseq_gene: dnaBioSeq,
77
+ bioseq_len: pepBioSeq.length
78
+ }
77
79
  end
78
80
 
79
81
  end
@@ -110,11 +112,13 @@ class GenbankManip
110
112
  dna = get_DNA(ft,@bioseq)
111
113
  dnaBioSeq = Bio::Sequence.auto(dna)
112
114
 
113
- @rna_seq[locustag] = {type: ft.feature.to_s,
114
- location: loc,
115
- locustag: locustag,
116
- product: product,
117
- bioseq_gene: dnaBioSeq}
115
+ @rna_seq[locustag] = {
116
+ type: ft.feature.to_s,
117
+ location: loc,
118
+ locustag: locustag,
119
+ product: product,
120
+ bioseq_gene: dnaBioSeq
121
+ }
118
122
 
119
123
  end
120
124
 
@@ -125,7 +129,6 @@ class GenbankManip
125
129
  end
126
130
 
127
131
 
128
-
129
132
  # Print CDS to files
130
133
  # RETURN : cds_file path
131
134
  def write_cds_to_file outdir
@@ -174,12 +177,108 @@ class GenbankManip
174
177
  end
175
178
 
176
179
 
180
+ # add annotation from reference prot synteny
181
+ def add_annotation_ref_synteny_prot synteny_prot, annotations, ref_genome=nil
182
+
183
+ contig = @gbk.definition
184
+
185
+ prot_iterator = 0
186
+ @gbk.features.each_with_index do |cds, ft_index|
187
+
188
+ next if cds.feature != "CDS"
189
+
190
+ prot_iterator+=1
191
+ prot_id = contig+"_"+prot_iterator.to_s
192
+
193
+ ftArray = []
194
+ cds.qualifiers = []
195
+
196
+ hit = nil
197
+
198
+ next if ! synteny_prot.has_key? prot_id or
199
+ ! synteny_prot[prot_id].has_key? :homology
200
+
201
+ # puts "#{annotations.keys}"
202
+ if annotations.has_key? synteny_prot[prot_id][:homology][:hits][0]
203
+ hit = annotations[synteny_prot[prot_id][:homology][:hits][0]]
204
+ # puts hit
205
+ else
206
+ puts "no hit for #{prot_id}"
207
+ next
208
+ end
209
+
210
+ # hit = annotations[synteny_prot[prot_id][:homology][:hits][0]]
211
+
212
+ if synteny_prot.has_key? prot_id
213
+
214
+ locus, gene, product, note, inference = nil
215
+ locus = hit[:locustag]
216
+ gene = hit[:gene]
217
+ product = hit[:product]
218
+ note = hit[:note]
219
+ inference = hit[:inference]
220
+ pId = synteny_prot[prot_id][:homology][:pId]
221
+ cov_query = (synteny_prot[prot_id][:homology][:cov_query]*100).round(2)
222
+ cov_subject = (synteny_prot[prot_id][:homology][:cov_subject]*100).round(2)
223
+ reference_prot_id = synteny_prot[prot_id][:homology][:hits][0]
224
+
225
+ qLocusTag = Bio::Feature::Qualifier.new('locus_tag', "#{prot_id}")
226
+ ftArray.push(qLocusTag)
227
+
228
+ if gene != nil
229
+ qGene = Bio::Feature::Qualifier.new('gene', gene)
230
+ ftArray.push(qGene)
231
+ end
232
+
233
+ if product != nil
234
+ qProd = Bio::Feature::Qualifier.new('product', product)
235
+ ftArray.push(qProd)
236
+ end
237
+
238
+ # check if there is a reference genome.. reference_locus shouldn't be nil in that case
239
+ if locus != nil
240
+ qNote = Bio::Feature::Qualifier.new('note', "corresponds to #{locus} locus (AA identity: #{pId}%; coverage(q,s): #{cov_query}%,#{cov_subject}%) from #{ref_genome}")
241
+ ftArray.push(qNote)
242
+
243
+ db_source = "[DBSource]"
244
+ if reference_prot_id.include? "_"
245
+ db_source = "RefSeq"
246
+ else
247
+ db_source = "INSD"
248
+ end
249
+ qInference = Bio::Feature::Qualifier.new('inference', "similar to AA sequence:#{db_source}:#{reference_prot_id}")
250
+ ftArray.push(qInference)
251
+
252
+ end
253
+
254
+ if note != nil
255
+ qNote = Bio::Feature::Qualifier.new('note', note)
256
+ ftArray.push(qNote)
257
+ end
258
+
259
+ if inference != nil
260
+ qInference = Bio::Feature::Qualifier.new('inference', inference)
261
+ ftArray.push(qInference)
262
+ end
263
+
264
+ end
265
+
266
+ cds.qualifiers = ftArray
267
+
268
+ end
269
+
270
+
271
+ end
272
+
273
+
177
274
  # add annotation to a genbank file produced by prodigal
178
275
  def add_annotations annotations, mode, reference_locus=nil
179
276
 
180
277
  # nb_of_added_ft = 0
181
278
  i = 0
182
279
 
280
+ fdebug = File.open("debug-add-annotation.txt","w")
281
+
183
282
  contig = @gbk.definition
184
283
 
185
284
  if mode == "inplace"
@@ -195,9 +294,19 @@ class GenbankManip
195
294
  i += 1
196
295
  prot_id = contig+"_"+i.to_s
197
296
  hit = nil
198
- hit = annotations[prot_id] if annotations.has_key? prot_id
297
+
298
+ if annotations.has_key? prot_id
299
+ hit = annotations[prot_id]
300
+ else
301
+ puts "no hit for #{prot_id}"
302
+ next
303
+ end
199
304
 
200
305
  if hit != nil
306
+
307
+ fdebug.write(hit)
308
+ fdebug.write("\n")
309
+
201
310
  locus, gene, product, note = nil
202
311
  locus = hit[:locustag]
203
312
  gene = hit[:gene]
@@ -271,6 +380,8 @@ class GenbankManip
271
380
 
272
381
  end
273
382
 
383
+ fdebug.close
384
+
274
385
  end
275
386
 
276
387
 
@@ -315,3 +426,4 @@ class GenbankManip
315
426
 
316
427
 
317
428
  end # end of Class
429
+
@@ -8,29 +8,35 @@
8
8
 
9
9
 
10
10
 
11
- class FastaManip
11
+ class SequenceFasta
12
12
 
13
- attr_reader :fasta_flat, :fasta_file, :prodigal_files
13
+ attr_reader :fasta_flat, :fasta_file, :annotation_files
14
14
 
15
15
  # Initialize fasta holder
16
16
  def initialize fasta_file, meta
17
17
 
18
18
  @fasta_file = fasta_file
19
19
  @fasta_flat = Bio::FlatFile.auto(@fasta_file)
20
- @meta = meta
21
- @prodigal_files = nil
22
- @single_fasta = nil
23
- @seq_info = nil
24
20
 
25
21
  if @fasta_flat.dbclass != Bio::FastaFormat
26
22
  abort "Aborting : The input sequence is not a fasta file !"
27
23
  end
28
24
 
25
+ # @contigs = extract_contigs(@fasta_flat)
26
+
27
+ @meta = meta
28
+
29
+ @annotation_files = nil
30
+ @single_fasta = nil
31
+ @seq_info = nil
32
+
29
33
  end
30
34
 
35
+
31
36
  # Run prodigal on the genome to annotate
32
37
  def run_prodigal root, outdir
33
- @prodigal_files = {}
38
+
39
+ @annotation_files = {}
34
40
  Dir.mkdir "#{outdir}" if ! Dir.exists? "#{outdir}"
35
41
  if @meta
36
42
  system("#{root}/prodigal.linux -p meta -i #{@fasta_file} -a #{outdir}/Proteins.fa -d #{outdir}/Genes.fa -o #{outdir}/Genbanks.gbk -q")
@@ -38,30 +44,34 @@ class FastaManip
38
44
  system("#{root}/prodigal.linux -i #{@fasta_file} -a #{outdir}/Proteins.fa -d #{outdir}/Genes.fa -o #{outdir}/Genbanks.gbk -q")
39
45
  end
40
46
 
41
- @prodigal_files = {multiGBK: "#{outdir}/Genbanks.gbk",
42
- contigs: [],
43
- contigs_length: [],
44
- genes: "#{outdir}/Genes.fa",
45
- proteins: "#{outdir}/Proteins.fa",
46
- prot_ids_by_contig: {},
47
- fasta_path: "#{outdir}/single-fasta/",
48
- gbk_path: "#{outdir}/single-genbank/"}
47
+ @annotation_files = {
48
+ multiGBK: "#{outdir}/Genbanks.gbk",
49
+ contigs: [],
50
+ contigs_length: [],
51
+ genes: "#{outdir}/Genes.fa",
52
+ proteins: "#{outdir}/Proteins.fa",
53
+ prot_ids_by_contig: {},
54
+ fasta_path: "#{outdir}/single-fasta/",
55
+ gbk_path: "#{outdir}/single-genbank/"
56
+ }
57
+
49
58
  split_fasta outdir
50
59
  split_genbank outdir, "#{outdir}/Genbanks.gbk"
51
60
  extract_cds_names
52
- @prodigal_files
61
+ @annotation_files
62
+
53
63
  end
54
64
 
55
65
 
56
- # Split Multi Genbanks file
66
+ # Split Multi Fasta file
57
67
  # RETURN : array of fasta files
58
68
  def split_fasta outdir
59
69
  @single_fasta = {}
60
70
  Dir.mkdir("#{outdir}/single-fasta") if ! Dir.exists?("#{outdir}/single-fasta")
61
71
  @fasta_flat.each_entry do |seq|
62
72
  file_name = seq.definition.chomp.split(" ")[0]
63
- @prodigal_files[:contigs] << "#{file_name}"
64
- @prodigal_files[:contigs_length] << seq.seq.length
73
+ @annotation_files[:contigs] << "#{file_name}"
74
+ @annotation_files[:contigs_length] << seq.seq.length
65
75
  File.open("#{outdir}/single-fasta/#{file_name}.fasta", "w") do |fwrite|
66
76
  fwrite.write(seq)
67
77
  end
@@ -108,7 +118,6 @@ class FastaManip
108
118
  outseq = "ORIGIN\n"
109
119
  # puts "ORIGIN"
110
120
 
111
- ntNum = 0
112
121
  sequence = seq.seq.downcase
113
122
 
114
123
  nt_left = true
@@ -144,7 +153,7 @@ class FastaManip
144
153
 
145
154
  prot_ids = {}
146
155
  prot_length = {}
147
- flatfile = Bio::FlatFile.auto(@prodigal_files[:proteins])
156
+ flatfile = Bio::FlatFile.auto(@annotation_files[:proteins])
148
157
 
149
158
  flatfile.each_entry do |entry|
150
159
  prot_id = entry.definition.split(" ")[0]
@@ -163,8 +172,8 @@ class FastaManip
163
172
  prot_array.sort! { |a,b| a.split("_")[-1].to_i <=> b.split("_")[-1].to_i }
164
173
  end
165
174
 
166
- @prodigal_files[:prot_ids_by_contig] = prot_ids
167
- @prodigal_files[:prot_ids_length] = prot_length
175
+ @annotation_files[:prot_ids_by_contig] = prot_ids
176
+ @annotation_files[:prot_ids_length] = prot_length
168
177
 
169
178
  end
170
179
 
@@ -7,20 +7,43 @@
7
7
  # licence:
8
8
 
9
9
 
10
+ class SequenceSynteny
10
11
 
11
- class SyntenyManip
12
+ attr_reader :query_file, :subject_file, :aln_hits, :query_sequences, :subject_sequences
12
13
 
13
- attr_reader :query_file, :subject_file, :aln_hits
14
-
15
- def initialize query_file, subject_file, name, pidentity, type
14
+ def initialize query_file, subject_file, name, pidentity, min_coverage, type
16
15
  @query_file = query_file
17
16
  @subject_file = subject_file
17
+
18
+ @query_sequences = get_sequences(query_file)
19
+ @subject_sequences = get_sequences(subject_file)
20
+
18
21
  @name = name
19
22
  @pidentity = pidentity
23
+ @min_coverage = min_coverage
20
24
  @aln_file = nil
21
25
  @type = type
26
+
22
27
  end # end of initialize
23
28
 
29
+
30
+ # get sequences name with length in hash
31
+ def get_sequences seq_file
32
+
33
+ sequences = {}
34
+ flat = Bio::FlatFile.auto("#{seq_file}")
35
+ flat.each_entry do |s|
36
+ s_name = s.definition.chomp.split(" ")[0]
37
+ sequences[s_name] = {}
38
+ sequences[s_name][:length] = s.seq.length
39
+ sequences[s_name][:conserved] = false
40
+ sequences[s_name][:contig] = s_name.split("_")[0..-2].join("_") if s_name.include? "_"
41
+ end
42
+
43
+ sequences
44
+
45
+ end
46
+
24
47
  # run blat on proteins
25
48
  def run_blat root, outdir
26
49
  base_cmd = "#{root}/blat.linux -out=blast8 -minIdentity=#{@pidentity}"
@@ -32,9 +55,98 @@ class SyntenyManip
32
55
  # extract_hits
33
56
  end # end of method
34
57
 
58
+
59
+ # Extract Hit from blast8 file and save it in hash
60
+ # contig-0_1 ABJ71957.1 96.92 65 2 0 1 65 1 65 9.2e-31 131.0
61
+ def extract_hits mode
62
+
63
+ feature = ""
64
+ File.open(@aln_file,"r") do |fread|
65
+ while l = fread.gets
66
+
67
+ lA = l.chomp!.split("\t")
68
+ key = lA[0]
69
+
70
+ # extraction of hit id depends on mode ..
71
+ if mode == :refgenome
72
+ hit = lA[1]
73
+ feature = "cds"
74
+ elsif mode == :externaldb
75
+ # hit = lA[1].chomp.split("|")[3]
76
+ hit = lA[1]
77
+ feature = "cds"
78
+ end
79
+
80
+ # compute coverage based on sequences length
81
+ cov_query = (lA[3].to_f/@query_sequences[key][:length]).round(2)
82
+ cov_subject = (lA[3].to_f/@subject_sequences[hit][:length]).round(2)
83
+
84
+ # assert cutoff on identity and coverage
85
+ # 1 -> pass cutoff, 0 under cutoff
86
+ assert_cutoff = [1,1,1]
87
+ assert_cutoff[0] = 0 if lA[2].to_f < @pidentity
88
+ assert_cutoff[1] = 0 if cov_query < @min_coverage
89
+ assert_cutoff[2] = 0 if cov_subject < @min_coverage
90
+
91
+ # first hit for query
92
+ if ! @query_sequences[key].has_key? :homology
93
+ @query_sequences[key][:conserved] = true
94
+ @subject_sequences[key][:conserved] = true
95
+ @query_sequences[key][:homology] = {
96
+ pId: lA[2].to_f.round(2),
97
+ cov_query: cov_query,
98
+ cov_subject: cov_subject,
99
+ evalue: lA[10],
100
+ score: lA[11].to_f,
101
+ hits: [hit],
102
+ length: [lA[3].to_i],
103
+ query_location: [[lA[6].to_i,lA[7].to_i]],
104
+ subject_location: [[lA[8].to_i,lA[9].to_i]],
105
+ feature: feature,
106
+ assert_cutoff: assert_cutoff
107
+ }
108
+ @subject_sequences[hit][:hits] = [key]
109
+
110
+ # query already got at least 1 hit and new_score > last_score
111
+ elsif lA[11].to_f > @query_sequences[key][:homology][:score]
112
+ @query_sequences[key][:conserved] = true
113
+ @subject_sequences[key][:conserved] = true
114
+ @query_sequences[key][:homology] = {
115
+ pId: lA[2].to_f.round(2),
116
+ cov_query: cov_query,
117
+ cov_subject: cov_subject,
118
+ evalue: lA[10],
119
+ score: lA[11].to_f,
120
+ hits: [hit],
121
+ length: [lA[3].to_i],
122
+ query_location: [[lA[6].to_i,lA[7].to_i]],
123
+ subject_location: [[lA[8].to_i,lA[9].to_i]],
124
+ feature: feature,
125
+ assert_cutoff: assert_cutoff
126
+ }
127
+ @subject_sequences[hit][:hits] = [key]
128
+
129
+ # query already got at least 1 hit and score == last_score
130
+ elsif lA[11].to_f == @query_sequences[key][:homology][:score]
131
+ @query_sequences[key][:homology][:hits] << hit
132
+ @query_sequences[key][:homology][:length] << lA[3].to_i
133
+ @query_sequences[key][:homology][:query_location] << [lA[6].to_i,lA[7].to_i]
134
+ @query_sequences[key][:homology][:subject_location] << [lA[8].to_i,lA[9].to_i]
135
+ if @subject_sequences[hit].has_key? :hits
136
+ @subject_sequences[hit][:hits] << key
137
+ else
138
+ @subject_sequences[hit][:hits] = [key]
139
+ end
140
+ end
141
+ end
142
+ end
143
+
144
+ end # end of method
145
+
146
+
35
147
  # Extract Hit from blast8 file and save it in hash
36
148
  # contig-0_1 ABJ71957.1 96.92 65 2 0 1 65 1 65 9.2e-31 131.0
37
- def extract_hits_prodigal mode, ref_cds=nil
149
+ def extract_hits_prodigal mode
38
150
 
39
151
  @aln_hits = {}
40
152
  feature = ""
@@ -49,8 +161,8 @@ class SyntenyManip
49
161
  hit = lA[1].chomp.split("|")[3]
50
162
  feature = "cds"
51
163
  end
164
+ next if lA[2].to_f < @pidentity
52
165
  if ! @aln_hits.has_key? key
53
- next if lA[2].to_f < @pidentity
54
166
  @aln_hits[key] = {
55
167
  pId: lA[2].to_f.round(2),
56
168
  evalue: lA[10],
@@ -99,10 +211,12 @@ class SyntenyManip
99
211
  feature = hit_split[1]
100
212
  product = hit_split[2]
101
213
  end
214
+ next if lA[2].to_f < @pidentity
102
215
  if ! @aln_hits.has_key? key
103
- next if lA[2].to_f < @pidentity
104
216
  @aln_hits[key] = {
105
217
  pId: lA[2].to_f.round(2),
218
+ # cov_query: (@query_sequences[key][:length]/lA[3].to_f).round(2),
219
+ # cov_subject: (@subject_sequences[hit][:length]/lA[3].to_f).round(2),
106
220
  evalue: lA[10],
107
221
  score: lA[11].to_f,
108
222
  hits: [hit],
@@ -115,6 +229,8 @@ class SyntenyManip
115
229
  elsif lA[11].to_f > @aln_hits[key][:score]
116
230
  @aln_hits[key] = {
117
231
  pId: lA[2].to_f.round(2),
232
+ # cov_query: (@query_sequences[key][:length]/lA[3].to_f).round(2),
233
+ # cov_subject: (@subject_sequences[hit][:length]/lA[3].to_f).round(2),
118
234
  evalue: lA[10],
119
235
  score: lA[11].to_f,
120
236
  hits: [hit],
@@ -135,7 +251,7 @@ class SyntenyManip
135
251
  end
136
252
  end
137
253
 
138
- prune_aln_hits @aln_hits
254
+ # prune_aln_hits @aln_hits
139
255
 
140
256
  end # end of method
141
257
 
@@ -178,6 +294,10 @@ class SyntenyManip
178
294
  annotations[p][:length] = @aln_hits[p][:length][hit_index]
179
295
  i+=1
180
296
 
297
+ File.open("debug-annotation-by-contig.txt","a") do |fout|
298
+ fout.write("#{p} #{@aln_hits[p][:pId]} #{@aln_hits[p][:cov_query]} #{@aln_hits[p][:cov_subject]} #{ref_cds[h][:product]}\n")
299
+ end
300
+
181
301
  else
182
302
 
183
303
  annotations[p] = nil