bacterial-annotator 0.4.1 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 12f3850a1e1eb738530d8cdb31c002b3ec34419c
4
- data.tar.gz: d765574dd7200b59aecbfe1f7c589d1a1ea15527
3
+ metadata.gz: befd57ce78f0c186da1553c7372c3aa6faeb9d90
4
+ data.tar.gz: 5e37d6a7e579a1e9e428deb9864e4a9d5ea9f057
5
5
  SHA512:
6
- metadata.gz: 1f8f63582170adc0d2efb47dd655364c46822ba600778638226278d07d7b06ec2ae838058a7ee3ceb3b8d9b2668587711995b94c1fcdd7fdb6816f8384283bb1
7
- data.tar.gz: 3a210bef0d5c0112add5867997894ce893f20ccdd867b72fa34b4d5a55ec9821e6e878e3e6168917705bdfbdeec12c4c70cc1cdb93e570b650c6eca28b18ee61
6
+ metadata.gz: a9a9766113cef56ae7ed35749cd5fbc10d746aa82e403596dccd0c5e7946786b136a69e19c74a4cece73549b0f1a8de077a8c106fc0e2310f7b000dc6cbad962
7
+ data.tar.gz: 00a0c5cf815252fa45ffae194318f730cf69e27dd53643659fb06d2dac131a3de881cbad2595b43df6ba5014be75cef8b337e6710afb46e42b420fdd1cf9b178
data/bin/ba_prodigal CHANGED
@@ -31,7 +31,7 @@ def installProdigal
31
31
  rescue
32
32
  abort "Problem installing Prodigal, aborting"
33
33
  end
34
-
34
+
35
35
  end
36
36
 
37
37
 
@@ -46,22 +46,21 @@ annotate [OPTIONS]
46
46
  --force/-f Force to overwrite the output directory
47
47
 
48
48
  // Dataset
49
- --refgenome/-g <GBK_ID> Provide a Genbank file or a Gbk Accession ID.
50
- --guessref Will guess the best reference genome to use for the annotation.
49
+ --refgenome/-g <GBK_ID> Provide a Genbank file or a Gbk Accession ID.
50
+ --guessref Will guess the best reference genome to use for the annotation.
51
51
 
52
- --remotedb <remote_database> [nr|refseq|swissprot]
53
- Complete the annotation of remaining CDS with a remote NCBI BLAST
54
- Can be very slow, better to use an external database !
55
-
56
- --externaldb <proteins fasta_file>
57
- Complete or do the annotation of remaining CDS with this database (a protein fasta file).
52
+ --externaldb <proteins fasta_file>
53
+ Finish or do a complete annotation with this sequence database (a protein fasta file).
58
54
  Fasta headers need to look similar to NCBI or EBI fasta headers, ex.:
59
55
  >gi|385721352|gb|AFI72857.1| NDM-1 [Escherichia coli]
60
56
  >sp|C7C422|BLAN1_KLEPN Beta-lactamase NDM-1 OS=Klebsiella pneumoniae..
61
57
 
62
58
  // Other options
63
- --pidentity Minimum percentage identity to incorporate a CDS annotation [default=0.7]
64
- --minlength Minimum contig length for annotation [default=500]
59
+ --pidentity <% identity> Minimum percentage identity to incorporate a CDS annotation [default=0.7]
60
+ --pcoverage <% identity> Minimum percentage of coverage over protein alignment to incorporate a CDS annotation [default=0.7]
61
+ .. otherwise hint for a non-functional protein
62
+
63
+ --minlength <length> Minimum contig length for annotation [default=500]
65
64
 
66
65
  --meta Better for metagenome and plasmid annotations because of disparate codon usage [default=off]
67
66
 
@@ -77,6 +76,7 @@ def parseOptions_annotate
77
76
  # default options
78
77
  options[:outdir] = "BAnnotation"
79
78
  options[:pidentity] = 70
79
+ options[:pcoverage] = 70
80
80
  options[:minlength] = 500
81
81
  options[:meta] = 0
82
82
 
@@ -95,10 +95,10 @@ def parseOptions_annotate
95
95
  options[:minlength] = ARGV.shift
96
96
  when "--pidentity"
97
97
  options[:pidentity] = ARGV.shift
98
+ when "--pcoverage"
99
+ options[:pcoverage] = ARGV.shift
98
100
  when "--meta"
99
101
  options[:meta] = 1
100
- when "--remotedb"
101
- options[:remote_db] = ARGV.shift
102
102
  when "--externaldb"
103
103
  options[:external_db] = ARGV.shift
104
104
  when "--help", "-h"
@@ -204,7 +204,7 @@ if ARGV.size > 1
204
204
  system("ba_raxml")
205
205
 
206
206
  options = {}
207
- genomes_list = []
207
+ genomes_list = [] # TODO multiple input genomes
208
208
 
209
209
  if ARGV[0] == "annotate"
210
210
 
@@ -217,7 +217,6 @@ if ARGV.size > 1
217
217
 
218
218
  # Check Options
219
219
  if ! options.has_key? :refgenome and
220
- ! options.has_key? :remote_db and
221
220
  ! options.has_key? :external_db
222
221
  puts "You didn't provide a reference genome or a database for the annotation !"
223
222
  elsif ! options.has_key? :input
@@ -1,14 +1,13 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  # author: maxime déraspe
3
3
  # email: maximilien1er@gmail.com
4
- # review:
5
4
  # date: 15-02-24
6
5
  # version: 0.0.1
7
6
  # licence:
8
7
 
9
8
 
10
9
 
11
- class GenbankManip
10
+ class SequenceAnnotation
12
11
 
13
12
  attr_accessor :gbk, :coding_seq, :cds_file, :rna_file
14
13
 
@@ -67,13 +66,16 @@ class GenbankManip
67
66
  protId = locustag
68
67
  end
69
68
 
70
- @coding_seq[protId] = {protId: protId,
71
- location: loc,
72
- locustag: locustag,
73
- gene: gene[0],
74
- product: product[0],
75
- bioseq: pepBioSeq,
76
- bioseq_gene: dnaBioSeq}
69
+ @coding_seq[protId] = {
70
+ protId: protId,
71
+ location: loc,
72
+ locustag: locustag,
73
+ gene: gene[0],
74
+ product: product[0],
75
+ bioseq: pepBioSeq,
76
+ bioseq_gene: dnaBioSeq,
77
+ bioseq_len: pepBioSeq.length
78
+ }
77
79
  end
78
80
 
79
81
  end
@@ -110,11 +112,13 @@ class GenbankManip
110
112
  dna = get_DNA(ft,@bioseq)
111
113
  dnaBioSeq = Bio::Sequence.auto(dna)
112
114
 
113
- @rna_seq[locustag] = {type: ft.feature.to_s,
114
- location: loc,
115
- locustag: locustag,
116
- product: product,
117
- bioseq_gene: dnaBioSeq}
115
+ @rna_seq[locustag] = {
116
+ type: ft.feature.to_s,
117
+ location: loc,
118
+ locustag: locustag,
119
+ product: product,
120
+ bioseq_gene: dnaBioSeq
121
+ }
118
122
 
119
123
  end
120
124
 
@@ -125,7 +129,6 @@ class GenbankManip
125
129
  end
126
130
 
127
131
 
128
-
129
132
  # Print CDS to files
130
133
  # RETURN : cds_file path
131
134
  def write_cds_to_file outdir
@@ -174,12 +177,108 @@ class GenbankManip
174
177
  end
175
178
 
176
179
 
180
+ # add annotation from reference prot synteny
181
+ def add_annotation_ref_synteny_prot synteny_prot, annotations, ref_genome=nil
182
+
183
+ contig = @gbk.definition
184
+
185
+ prot_iterator = 0
186
+ @gbk.features.each_with_index do |cds, ft_index|
187
+
188
+ next if cds.feature != "CDS"
189
+
190
+ prot_iterator+=1
191
+ prot_id = contig+"_"+prot_iterator.to_s
192
+
193
+ ftArray = []
194
+ cds.qualifiers = []
195
+
196
+ hit = nil
197
+
198
+ next if ! synteny_prot.has_key? prot_id or
199
+ ! synteny_prot[prot_id].has_key? :homology
200
+
201
+ # puts "#{annotations.keys}"
202
+ if annotations.has_key? synteny_prot[prot_id][:homology][:hits][0]
203
+ hit = annotations[synteny_prot[prot_id][:homology][:hits][0]]
204
+ # puts hit
205
+ else
206
+ puts "no hit for #{prot_id}"
207
+ next
208
+ end
209
+
210
+ # hit = annotations[synteny_prot[prot_id][:homology][:hits][0]]
211
+
212
+ if synteny_prot.has_key? prot_id
213
+
214
+ locus, gene, product, note, inference = nil
215
+ locus = hit[:locustag]
216
+ gene = hit[:gene]
217
+ product = hit[:product]
218
+ note = hit[:note]
219
+ inference = hit[:inference]
220
+ pId = synteny_prot[prot_id][:homology][:pId]
221
+ cov_query = (synteny_prot[prot_id][:homology][:cov_query]*100).round(2)
222
+ cov_subject = (synteny_prot[prot_id][:homology][:cov_subject]*100).round(2)
223
+ reference_prot_id = synteny_prot[prot_id][:homology][:hits][0]
224
+
225
+ qLocusTag = Bio::Feature::Qualifier.new('locus_tag', "#{prot_id}")
226
+ ftArray.push(qLocusTag)
227
+
228
+ if gene != nil
229
+ qGene = Bio::Feature::Qualifier.new('gene', gene)
230
+ ftArray.push(qGene)
231
+ end
232
+
233
+ if product != nil
234
+ qProd = Bio::Feature::Qualifier.new('product', product)
235
+ ftArray.push(qProd)
236
+ end
237
+
238
+ # check if there is a reference genome.. reference_locus shouldn't be nil in that case
239
+ if locus != nil
240
+ qNote = Bio::Feature::Qualifier.new('note', "corresponds to #{locus} locus (AA identity: #{pId}%; coverage(q,s): #{cov_query}%,#{cov_subject}%) from #{ref_genome}")
241
+ ftArray.push(qNote)
242
+
243
+ db_source = "[DBSource]"
244
+ if reference_prot_id.include? "_"
245
+ db_source = "RefSeq"
246
+ else
247
+ db_source = "INSD"
248
+ end
249
+ qInference = Bio::Feature::Qualifier.new('inference', "similar to AA sequence:#{db_source}:#{reference_prot_id}")
250
+ ftArray.push(qInference)
251
+
252
+ end
253
+
254
+ if note != nil
255
+ qNote = Bio::Feature::Qualifier.new('note', note)
256
+ ftArray.push(qNote)
257
+ end
258
+
259
+ if inference != nil
260
+ qInference = Bio::Feature::Qualifier.new('inference', inference)
261
+ ftArray.push(qInference)
262
+ end
263
+
264
+ end
265
+
266
+ cds.qualifiers = ftArray
267
+
268
+ end
269
+
270
+
271
+ end
272
+
273
+
177
274
  # add annotation to a genbank file produced by prodigal
178
275
  def add_annotations annotations, mode, reference_locus=nil
179
276
 
180
277
  # nb_of_added_ft = 0
181
278
  i = 0
182
279
 
280
+ fdebug = File.open("debug-add-annotation.txt","w")
281
+
183
282
  contig = @gbk.definition
184
283
 
185
284
  if mode == "inplace"
@@ -195,9 +294,19 @@ class GenbankManip
195
294
  i += 1
196
295
  prot_id = contig+"_"+i.to_s
197
296
  hit = nil
198
- hit = annotations[prot_id] if annotations.has_key? prot_id
297
+
298
+ if annotations.has_key? prot_id
299
+ hit = annotations[prot_id]
300
+ else
301
+ puts "no hit for #{prot_id}"
302
+ next
303
+ end
199
304
 
200
305
  if hit != nil
306
+
307
+ fdebug.write(hit)
308
+ fdebug.write("\n")
309
+
201
310
  locus, gene, product, note = nil
202
311
  locus = hit[:locustag]
203
312
  gene = hit[:gene]
@@ -271,6 +380,8 @@ class GenbankManip
271
380
 
272
381
  end
273
382
 
383
+ fdebug.close
384
+
274
385
  end
275
386
 
276
387
 
@@ -315,3 +426,4 @@ class GenbankManip
315
426
 
316
427
 
317
428
  end # end of Class
429
+
@@ -8,29 +8,35 @@
8
8
 
9
9
 
10
10
 
11
- class FastaManip
11
+ class SequenceFasta
12
12
 
13
- attr_reader :fasta_flat, :fasta_file, :prodigal_files
13
+ attr_reader :fasta_flat, :fasta_file, :annotation_files
14
14
 
15
15
  # Initialize fasta holder
16
16
  def initialize fasta_file, meta
17
17
 
18
18
  @fasta_file = fasta_file
19
19
  @fasta_flat = Bio::FlatFile.auto(@fasta_file)
20
- @meta = meta
21
- @prodigal_files = nil
22
- @single_fasta = nil
23
- @seq_info = nil
24
20
 
25
21
  if @fasta_flat.dbclass != Bio::FastaFormat
26
22
  abort "Aborting : The input sequence is not a fasta file !"
27
23
  end
28
24
 
25
+ # @contigs = extract_contigs(@fasta_flat)
26
+
27
+ @meta = meta
28
+
29
+ @annotation_files = nil
30
+ @single_fasta = nil
31
+ @seq_info = nil
32
+
29
33
  end
30
34
 
35
+
31
36
  # Run prodigal on the genome to annotate
32
37
  def run_prodigal root, outdir
33
- @prodigal_files = {}
38
+
39
+ @annotation_files = {}
34
40
  Dir.mkdir "#{outdir}" if ! Dir.exists? "#{outdir}"
35
41
  if @meta
36
42
  system("#{root}/prodigal.linux -p meta -i #{@fasta_file} -a #{outdir}/Proteins.fa -d #{outdir}/Genes.fa -o #{outdir}/Genbanks.gbk -q")
@@ -38,30 +44,34 @@ class FastaManip
38
44
  system("#{root}/prodigal.linux -i #{@fasta_file} -a #{outdir}/Proteins.fa -d #{outdir}/Genes.fa -o #{outdir}/Genbanks.gbk -q")
39
45
  end
40
46
 
41
- @prodigal_files = {multiGBK: "#{outdir}/Genbanks.gbk",
42
- contigs: [],
43
- contigs_length: [],
44
- genes: "#{outdir}/Genes.fa",
45
- proteins: "#{outdir}/Proteins.fa",
46
- prot_ids_by_contig: {},
47
- fasta_path: "#{outdir}/single-fasta/",
48
- gbk_path: "#{outdir}/single-genbank/"}
47
+ @annotation_files = {
48
+ multiGBK: "#{outdir}/Genbanks.gbk",
49
+ contigs: [],
50
+ contigs_length: [],
51
+ genes: "#{outdir}/Genes.fa",
52
+ proteins: "#{outdir}/Proteins.fa",
53
+ prot_ids_by_contig: {},
54
+ fasta_path: "#{outdir}/single-fasta/",
55
+ gbk_path: "#{outdir}/single-genbank/"
56
+ }
57
+
49
58
  split_fasta outdir
50
59
  split_genbank outdir, "#{outdir}/Genbanks.gbk"
51
60
  extract_cds_names
52
- @prodigal_files
61
+ @annotation_files
62
+
53
63
  end
54
64
 
55
65
 
56
- # Split Multi Genbanks file
66
+ # Split Multi Fasta file
57
67
  # RETURN : array of fasta files
58
68
  def split_fasta outdir
59
69
  @single_fasta = {}
60
70
  Dir.mkdir("#{outdir}/single-fasta") if ! Dir.exists?("#{outdir}/single-fasta")
61
71
  @fasta_flat.each_entry do |seq|
62
72
  file_name = seq.definition.chomp.split(" ")[0]
63
- @prodigal_files[:contigs] << "#{file_name}"
64
- @prodigal_files[:contigs_length] << seq.seq.length
73
+ @annotation_files[:contigs] << "#{file_name}"
74
+ @annotation_files[:contigs_length] << seq.seq.length
65
75
  File.open("#{outdir}/single-fasta/#{file_name}.fasta", "w") do |fwrite|
66
76
  fwrite.write(seq)
67
77
  end
@@ -108,7 +118,6 @@ class FastaManip
108
118
  outseq = "ORIGIN\n"
109
119
  # puts "ORIGIN"
110
120
 
111
- ntNum = 0
112
121
  sequence = seq.seq.downcase
113
122
 
114
123
  nt_left = true
@@ -144,7 +153,7 @@ class FastaManip
144
153
 
145
154
  prot_ids = {}
146
155
  prot_length = {}
147
- flatfile = Bio::FlatFile.auto(@prodigal_files[:proteins])
156
+ flatfile = Bio::FlatFile.auto(@annotation_files[:proteins])
148
157
 
149
158
  flatfile.each_entry do |entry|
150
159
  prot_id = entry.definition.split(" ")[0]
@@ -163,8 +172,8 @@ class FastaManip
163
172
  prot_array.sort! { |a,b| a.split("_")[-1].to_i <=> b.split("_")[-1].to_i }
164
173
  end
165
174
 
166
- @prodigal_files[:prot_ids_by_contig] = prot_ids
167
- @prodigal_files[:prot_ids_length] = prot_length
175
+ @annotation_files[:prot_ids_by_contig] = prot_ids
176
+ @annotation_files[:prot_ids_length] = prot_length
168
177
 
169
178
  end
170
179
 
@@ -7,20 +7,43 @@
7
7
  # licence:
8
8
 
9
9
 
10
+ class SequenceSynteny
10
11
 
11
- class SyntenyManip
12
+ attr_reader :query_file, :subject_file, :aln_hits, :query_sequences, :subject_sequences
12
13
 
13
- attr_reader :query_file, :subject_file, :aln_hits
14
-
15
- def initialize query_file, subject_file, name, pidentity, type
14
+ def initialize query_file, subject_file, name, pidentity, min_coverage, type
16
15
  @query_file = query_file
17
16
  @subject_file = subject_file
17
+
18
+ @query_sequences = get_sequences(query_file)
19
+ @subject_sequences = get_sequences(subject_file)
20
+
18
21
  @name = name
19
22
  @pidentity = pidentity
23
+ @min_coverage = min_coverage
20
24
  @aln_file = nil
21
25
  @type = type
26
+
22
27
  end # end of initialize
23
28
 
29
+
30
+ # get sequences name with length in hash
31
+ def get_sequences seq_file
32
+
33
+ sequences = {}
34
+ flat = Bio::FlatFile.auto("#{seq_file}")
35
+ flat.each_entry do |s|
36
+ s_name = s.definition.chomp.split(" ")[0]
37
+ sequences[s_name] = {}
38
+ sequences[s_name][:length] = s.seq.length
39
+ sequences[s_name][:conserved] = false
40
+ sequences[s_name][:contig] = s_name.split("_")[0..-2].join("_") if s_name.include? "_"
41
+ end
42
+
43
+ sequences
44
+
45
+ end
46
+
24
47
  # run blat on proteins
25
48
  def run_blat root, outdir
26
49
  base_cmd = "#{root}/blat.linux -out=blast8 -minIdentity=#{@pidentity}"
@@ -32,9 +55,98 @@ class SyntenyManip
32
55
  # extract_hits
33
56
  end # end of method
34
57
 
58
+
59
+ # Extract Hit from blast8 file and save it in hash
60
+ # contig-0_1 ABJ71957.1 96.92 65 2 0 1 65 1 65 9.2e-31 131.0
61
+ def extract_hits mode
62
+
63
+ feature = ""
64
+ File.open(@aln_file,"r") do |fread|
65
+ while l = fread.gets
66
+
67
+ lA = l.chomp!.split("\t")
68
+ key = lA[0]
69
+
70
+ # extraction of hit id depends on mode ..
71
+ if mode == :refgenome
72
+ hit = lA[1]
73
+ feature = "cds"
74
+ elsif mode == :externaldb
75
+ # hit = lA[1].chomp.split("|")[3]
76
+ hit = lA[1]
77
+ feature = "cds"
78
+ end
79
+
80
+ # compute coverage based on sequences length
81
+ cov_query = (lA[3].to_f/@query_sequences[key][:length]).round(2)
82
+ cov_subject = (lA[3].to_f/@subject_sequences[hit][:length]).round(2)
83
+
84
+ # assert cutoff on identity and coverage
85
+ # 1 -> pass cutoff, 0 under cutoff
86
+ assert_cutoff = [1,1,1]
87
+ assert_cutoff[0] = 0 if lA[2].to_f < @pidentity
88
+ assert_cutoff[1] = 0 if cov_query < @min_coverage
89
+ assert_cutoff[2] = 0 if cov_subject < @min_coverage
90
+
91
+ # first hit for query
92
+ if ! @query_sequences[key].has_key? :homology
93
+ @query_sequences[key][:conserved] = true
94
+ @subject_sequences[key][:conserved] = true
95
+ @query_sequences[key][:homology] = {
96
+ pId: lA[2].to_f.round(2),
97
+ cov_query: cov_query,
98
+ cov_subject: cov_subject,
99
+ evalue: lA[10],
100
+ score: lA[11].to_f,
101
+ hits: [hit],
102
+ length: [lA[3].to_i],
103
+ query_location: [[lA[6].to_i,lA[7].to_i]],
104
+ subject_location: [[lA[8].to_i,lA[9].to_i]],
105
+ feature: feature,
106
+ assert_cutoff: assert_cutoff
107
+ }
108
+ @subject_sequences[hit][:hits] = [key]
109
+
110
+ # query already got at least 1 hit and new_score > last_score
111
+ elsif lA[11].to_f > @query_sequences[key][:homology][:score]
112
+ @query_sequences[key][:conserved] = true
113
+ @subject_sequences[key][:conserved] = true
114
+ @query_sequences[key][:homology] = {
115
+ pId: lA[2].to_f.round(2),
116
+ cov_query: cov_query,
117
+ cov_subject: cov_subject,
118
+ evalue: lA[10],
119
+ score: lA[11].to_f,
120
+ hits: [hit],
121
+ length: [lA[3].to_i],
122
+ query_location: [[lA[6].to_i,lA[7].to_i]],
123
+ subject_location: [[lA[8].to_i,lA[9].to_i]],
124
+ feature: feature,
125
+ assert_cutoff: assert_cutoff
126
+ }
127
+ @subject_sequences[hit][:hits] = [key]
128
+
129
+ # query already got at least 1 hit and score == last_score
130
+ elsif lA[11].to_f == @query_sequences[key][:homology][:score]
131
+ @query_sequences[key][:homology][:hits] << hit
132
+ @query_sequences[key][:homology][:length] << lA[3].to_i
133
+ @query_sequences[key][:homology][:query_location] << [lA[6].to_i,lA[7].to_i]
134
+ @query_sequences[key][:homology][:subject_location] << [lA[8].to_i,lA[9].to_i]
135
+ if @subject_sequences[hit].has_key? :hits
136
+ @subject_sequences[hit][:hits] << key
137
+ else
138
+ @subject_sequences[hit][:hits] = [key]
139
+ end
140
+ end
141
+ end
142
+ end
143
+
144
+ end # end of method
145
+
146
+
35
147
  # Extract Hit from blast8 file and save it in hash
36
148
  # contig-0_1 ABJ71957.1 96.92 65 2 0 1 65 1 65 9.2e-31 131.0
37
- def extract_hits_prodigal mode, ref_cds=nil
149
+ def extract_hits_prodigal mode
38
150
 
39
151
  @aln_hits = {}
40
152
  feature = ""
@@ -49,8 +161,8 @@ class SyntenyManip
49
161
  hit = lA[1].chomp.split("|")[3]
50
162
  feature = "cds"
51
163
  end
164
+ next if lA[2].to_f < @pidentity
52
165
  if ! @aln_hits.has_key? key
53
- next if lA[2].to_f < @pidentity
54
166
  @aln_hits[key] = {
55
167
  pId: lA[2].to_f.round(2),
56
168
  evalue: lA[10],
@@ -99,10 +211,12 @@ class SyntenyManip
99
211
  feature = hit_split[1]
100
212
  product = hit_split[2]
101
213
  end
214
+ next if lA[2].to_f < @pidentity
102
215
  if ! @aln_hits.has_key? key
103
- next if lA[2].to_f < @pidentity
104
216
  @aln_hits[key] = {
105
217
  pId: lA[2].to_f.round(2),
218
+ # cov_query: (@query_sequences[key][:length]/lA[3].to_f).round(2),
219
+ # cov_subject: (@subject_sequences[hit][:length]/lA[3].to_f).round(2),
106
220
  evalue: lA[10],
107
221
  score: lA[11].to_f,
108
222
  hits: [hit],
@@ -115,6 +229,8 @@ class SyntenyManip
115
229
  elsif lA[11].to_f > @aln_hits[key][:score]
116
230
  @aln_hits[key] = {
117
231
  pId: lA[2].to_f.round(2),
232
+ # cov_query: (@query_sequences[key][:length]/lA[3].to_f).round(2),
233
+ # cov_subject: (@subject_sequences[hit][:length]/lA[3].to_f).round(2),
118
234
  evalue: lA[10],
119
235
  score: lA[11].to_f,
120
236
  hits: [hit],
@@ -135,7 +251,7 @@ class SyntenyManip
135
251
  end
136
252
  end
137
253
 
138
- prune_aln_hits @aln_hits
254
+ # prune_aln_hits @aln_hits
139
255
 
140
256
  end # end of method
141
257
 
@@ -178,6 +294,10 @@ class SyntenyManip
178
294
  annotations[p][:length] = @aln_hits[p][:length][hit_index]
179
295
  i+=1
180
296
 
297
+ File.open("debug-annotation-by-contig.txt","a") do |fout|
298
+ fout.write("#{p} #{@aln_hits[p][:pId]} #{@aln_hits[p][:cov_query]} #{@aln_hits[p][:cov_subject]} #{ref_cds[h][:product]}\n")
299
+ end
300
+
181
301
  else
182
302
 
183
303
  annotations[p] = nil