bacterial-annotator 0.1.7 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2620736bb062558a4f44ee488f0c670945f42c85
4
- data.tar.gz: 0ebfcfe4f935415818ade300927148a6cdcccff1
3
+ metadata.gz: 4dbc3b5e016a3f24ba82ab525a0c8daa9617afec
4
+ data.tar.gz: 7c8ba281632d84d7131cf1a27e143f596e76f414
5
5
  SHA512:
6
- metadata.gz: ea81aa3caad269501af23e31b5d6fa9157b1a291da6467b31b74faaf207038255c5f42cd50f8c2c560304c84754af00bbdaf710120c0a8c9c03b9f029134fb18
7
- data.tar.gz: a89e07dc87b7c219f664be43fb17154ae7c40d19ffc23fdd84ae0ff89dc3d66867ba8cd8121c01b7c2a4b8fa14db7f2fbf65f03ac04e2c502bca180a31babd1d
6
+ metadata.gz: 73f72513dc9964c46c73ef39477a9b5148b010d68449ff270f5b15f556a5e56e027a9ddb80149b2ce44e12c09466ac11267c118c97c8ea8892eb582d220588cf
7
+ data.tar.gz: 0a92c0ceec70a12500a8dcf4433d76eff7edf2e4fe7ca180151efc016ee81a3e7a6279cc633a6adaaec5936425908c3a4299742ae1571bc1970017a058a8dedb
@@ -64,7 +64,7 @@ def parseOptions
64
64
  options[:pidentity] = 70
65
65
  options[:minlength] = 500
66
66
  options[:meta] = 0
67
-
67
+
68
68
  while x = ARGV.shift
69
69
 
70
70
  case x.downcase
@@ -113,7 +113,7 @@ if ARGV.size > 1
113
113
  abort "exiting blat is missing"
114
114
  end
115
115
 
116
- # Check Options
116
+ # Check Options
117
117
  if ! options.has_key? :refgenome and ! options.has_key? :remote_db and ! options.has_key? :external_db
118
118
  puts "You didn't provide a reference genome or a database for the annotation !"
119
119
  elsif ! options.has_key? :input
@@ -106,7 +106,46 @@ class BacterialAnnotator
106
106
  # dump foreign proteins to file
107
107
  foreign_cds_file = dump_cds
108
108
 
109
- else
109
+ # Iterate over each Ref protein and print syntheny
110
+ synteny_file = File.open("#{@outdir}/Prot-Synteny.tsv","w")
111
+ synteny_file.write("RefLocusTag\tRefProtID\tRefLength\tRefCoverage\tIdentity\tQueryGene\tQueryLength\tQueryCoverage\n")
112
+ ref_annotated = {}
113
+ @contig_annotations.each do |contig,prot_annotations|
114
+ prot_annotations.each do |key,prot|
115
+ # p key
116
+ # p prot
117
+ ref_annotated[prot[:protId]] = {key: key, length: prot[:length], pId: prot[:pId]} if prot != nil
118
+ end
119
+ end
120
+
121
+ @refgenome.coding_seq.each do |ref_k, ref_v|
122
+ gene = ""
123
+ coverage_ref = ""
124
+ coverage_query = ""
125
+ query_length = ""
126
+ pId = ""
127
+ if ref_annotated[ref_v[:protId]] != nil
128
+ gene = ref_annotated[ref_v[:protId]][:key]
129
+ coverage_ref = (ref_annotated[ref_v[:protId]][:length].to_f/ref_v[:bioseq].seq.length.to_f).round(2)
130
+ query_length = @fasta.prodigal_files[:prot_ids_length][gene]
131
+ coverage_query = (ref_annotated[ref_v[:protId]][:length].to_f/query_length.to_f).round(2)
132
+ pId = ref_annotated[ref_v[:protId]][:pId]
133
+ end
134
+
135
+ synteny_file.write(ref_v[:protId])
136
+ synteny_file.write("\t"+ref_v[:locustag])
137
+ synteny_file.write("\t"+ref_v[:bioseq].seq.length.to_s)
138
+ synteny_file.write("\t"+coverage_ref.to_s)
139
+ synteny_file.write("\t"+pId.to_s)
140
+ synteny_file.write("\t"+gene)
141
+ synteny_file.write("\t"+query_length.to_s)
142
+ synteny_file.write("\t"+coverage_query.to_s)
143
+ synteny_file.write("\n")
144
+
145
+ end
146
+ synteny_file.close
147
+
148
+ else # no reference genome
110
149
 
111
150
  # no reference genome .. will process all the CDS
112
151
  foreign_cds_file = @fasta.prodigal_files[:proteins]
@@ -122,12 +161,14 @@ class BacterialAnnotator
122
161
  puts "\nPrinting Statistics.."
123
162
  print_stats "#{@outdir}/Annotation-Stats.txt"
124
163
 
164
+
125
165
  end # end of method
126
166
 
127
167
 
128
168
  # Finishing the annotation of the remaining CDS
129
169
  def finish_annotation remaining_cds_file
130
170
 
171
+ # only finish the annotation with an external DB
131
172
  if @options.has_key? :external_db # from an external DB
132
173
 
133
174
  db_file = @options[:external_db]
@@ -20,6 +20,7 @@ class FastaManip
20
20
  @meta = meta
21
21
  @prodigal_files = nil
22
22
  @single_fasta = nil
23
+ @seq_info = nil
23
24
 
24
25
  if @fasta_flat.dbclass != Bio::FastaFormat
25
26
  abort "Aborting : The input sequence is not a fasta file !"
@@ -61,7 +62,7 @@ class FastaManip
61
62
  file_name = seq.definition.chomp.split(" ")[0]
62
63
  @prodigal_files[:contigs] << "#{file_name}"
63
64
  @prodigal_files[:contigs_length] << seq.seq.length
64
- File.open("#{outdir}/single-fasta/#{file_name}.fasta", "w") do |fwrite|
65
+ File.open("#{outdir}/single-fasta/#{file_name}.fasta", "w") do |fwrite|
65
66
  fwrite.write(seq)
66
67
  end
67
68
  @single_fasta[file_name] = seq
@@ -132,17 +133,19 @@ class FastaManip
132
133
  end
133
134
 
134
135
  end
135
-
136
+
136
137
  return outseq, sequence.length
137
138
 
138
139
  end
139
140
 
140
141
 
141
- # extract protein and gene names
142
+ # extract protein and gene names from prodigal... with contig numbering
142
143
  def extract_cds_names
143
144
 
144
145
  prot_ids = {}
146
+ prot_length = {}
145
147
  flatfile = Bio::FlatFile.auto(@prodigal_files[:proteins])
148
+
146
149
  flatfile.each_entry do |entry|
147
150
  prot_id = entry.definition.split(" ")[0]
148
151
  contig = prot_id.split("_")[0..-2].join("_")
@@ -150,6 +153,10 @@ class FastaManip
150
153
  prot_ids[contig] = []
151
154
  end
152
155
  prot_ids[contig] << prot_id
156
+
157
+ # puts "Prodigal length : " + entry.seq.length.to_s
158
+ prot_length[prot_id] = entry.seq.length-1 # minus the stop codon
159
+
153
160
  end
154
161
 
155
162
  prot_ids.each do |k,prot_array|
@@ -157,10 +164,10 @@ class FastaManip
157
164
  end
158
165
 
159
166
  @prodigal_files[:prot_ids_by_contig] = prot_ids
167
+ @prodigal_files[:prot_ids_length] = prot_length
160
168
 
161
169
  end
162
170
 
163
-
164
171
  private :extract_cds_names # :split_fasta, :split_genbank
165
172
 
166
173
  end
@@ -25,7 +25,7 @@ class GenbankManip
25
25
  flat_gbk = Bio::FlatFile.auto(@gbk_file)
26
26
 
27
27
  # Check if gbk is valid
28
- if flat_gbk.dbclass != Bio::GenBank
28
+ if flat_gbk.dbclass != Bio::GenBank
29
29
  abort "Aborting : The input #{@gbk_file} is not a valid genbank file !"
30
30
  else
31
31
  @gbk = flat_gbk.next_entry
@@ -67,10 +67,11 @@ class GenbankManip
67
67
  pepBioSeq = Bio::Sequence.auto(pep)
68
68
 
69
69
  if protId.strip == ""
70
- protId = locustag
70
+ protId = locustag
71
71
  end
72
72
 
73
- @coding_seq[protId] = {location: loc,
73
+ @coding_seq[protId] = {protId: protId,
74
+ location: loc,
74
75
  locustag: locustag,
75
76
  gene: gene[0],
76
77
  product: product[0],
@@ -113,7 +114,7 @@ class GenbankManip
113
114
 
114
115
  contig = @gbk.definition
115
116
 
116
- # iterate through
117
+ # iterate through
117
118
  @gbk.features.each_with_index do |cds, ft_index|
118
119
 
119
120
  next if cds.feature != "CDS"
@@ -150,7 +151,7 @@ class GenbankManip
150
151
 
151
152
  # check if there is a reference genome.. reference_locus shouldn't be nil in that case
152
153
  if locus != nil
153
- qNote = Bio::Feature::Qualifier.new('note', "correspond to #{locus} locus (#{pId}% identity) from #{reference_locus.entry_id}")
154
+ qNote = Bio::Feature::Qualifier.new('note', "corresponds to #{locus} locus (#{pId}% identity) from #{reference_locus.entry_id}")
154
155
  ftArray.push(qNote)
155
156
  end
156
157
 
@@ -180,7 +181,7 @@ class GenbankManip
180
181
  ###################
181
182
  # Private Methods #
182
183
  ###################
183
-
184
+
184
185
  # Fct: Get dna sequence
185
186
  def get_DNA (cds, seq)
186
187
  loc = cds.locations
@@ -208,5 +209,5 @@ class GenbankManip
208
209
 
209
210
  private :fetch_ncbi_genome, :get_DNA
210
211
 
211
-
212
+
212
213
  end # end of Class
@@ -83,7 +83,7 @@ class RemoteNCBI
83
83
  end
84
84
 
85
85
  end
86
-
86
+
87
87
  rescue
88
88
  try += 1
89
89
  puts "#{try} POST try for #{f}"
@@ -24,7 +24,7 @@ class SyntenyManip
24
24
  def run_blat root, outdir
25
25
  system("#{root}/blat.linux -out=blast8 -minIdentity=#{@pidentity} -prot #{@subject_file} #{@query_file} #{outdir}/#{@name}.blat8.tsv")
26
26
  @aln_file = "#{outdir}/#{@name}.blat8.tsv"
27
- # extract_hits
27
+ # extract_hits
28
28
  end # end of method
29
29
 
30
30
  # Extract Hit from blast8 file and save it in hash
@@ -45,21 +45,28 @@ class SyntenyManip
45
45
  next if lA[2].to_f < @pidentity
46
46
  @aln_hits[key] = {
47
47
  pId: lA[2].to_f.round(2),
48
- length: lA[3].to_i,
49
48
  evalue: lA[10],
50
49
  score: lA[11].to_f,
51
- hits: [hit]
50
+ hits: [hit],
51
+ length: [lA[3].to_i],
52
+ query_location: [[lA[6].to_i,lA[7].to_i]],
53
+ subject_location: [[lA[8].to_i,lA[9].to_i]]
52
54
  }
53
55
  elsif lA[11].to_f > @aln_hits[key][:score]
54
56
  @aln_hits[key] = {
55
57
  pId: lA[2].to_f.round(2),
56
- length: lA[3].to_i,
57
58
  evalue: lA[10],
58
59
  score: lA[11].to_f,
59
- hits: [hit]
60
+ hits: [hit],
61
+ length: [lA[3].to_i],
62
+ query_location: [[lA[6].to_i,lA[7].to_i]],
63
+ subject_location: [[lA[8].to_i,lA[9].to_i]]
60
64
  }
61
65
  elsif lA[11].to_f == @aln_hits[key][:score]
62
66
  @aln_hits[key][:hits] << hit
67
+ @aln_hits[key][:length] << lA[3].to_i
68
+ @aln_hits[key][:query_location] << [lA[6].to_i,lA[7].to_i]
69
+ @aln_hits[key][:subject_location] << [lA[8].to_i,lA[9].to_i]
63
70
  end
64
71
  end
65
72
  end
@@ -102,6 +109,7 @@ class SyntenyManip
102
109
  hit = ref_cds[h]
103
110
  annotations[p] = hit
104
111
  annotations[p][:pId] = @aln_hits[p][:pId]
112
+ annotations[p][:length] = @aln_hits[p][:length][hit_index]
105
113
  i+=1
106
114
 
107
115
  else
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bacterial-annotator
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maxime Deraspe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-27 00:00:00.000000000 Z
11
+ date: 2016-09-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bio
@@ -70,7 +70,7 @@ files:
70
70
  - lib/bacterial-annotator/synteny-manip.rb
71
71
  homepage: http://rubygems.org/gems/bacterial-annotator
72
72
  licenses:
73
- - GPLv3
73
+ - GPL-3.0
74
74
  metadata: {}
75
75
  post_install_message:
76
76
  rdoc_options: []
@@ -88,9 +88,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
88
88
  version: '0'
89
89
  requirements: []
90
90
  rubyforge_project:
91
- rubygems_version: 2.4.5
91
+ rubygems_version: 2.5.1
92
92
  signing_key:
93
93
  specification_version: 4
94
94
  summary: Bacterial Annotator
95
95
  test_files: []
96
- has_rdoc: