bacterial-annotator 0.1.7 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2620736bb062558a4f44ee488f0c670945f42c85
4
- data.tar.gz: 0ebfcfe4f935415818ade300927148a6cdcccff1
3
+ metadata.gz: 4dbc3b5e016a3f24ba82ab525a0c8daa9617afec
4
+ data.tar.gz: 7c8ba281632d84d7131cf1a27e143f596e76f414
5
5
  SHA512:
6
- metadata.gz: ea81aa3caad269501af23e31b5d6fa9157b1a291da6467b31b74faaf207038255c5f42cd50f8c2c560304c84754af00bbdaf710120c0a8c9c03b9f029134fb18
7
- data.tar.gz: a89e07dc87b7c219f664be43fb17154ae7c40d19ffc23fdd84ae0ff89dc3d66867ba8cd8121c01b7c2a4b8fa14db7f2fbf65f03ac04e2c502bca180a31babd1d
6
+ metadata.gz: 73f72513dc9964c46c73ef39477a9b5148b010d68449ff270f5b15f556a5e56e027a9ddb80149b2ce44e12c09466ac11267c118c97c8ea8892eb582d220588cf
7
+ data.tar.gz: 0a92c0ceec70a12500a8dcf4433d76eff7edf2e4fe7ca180151efc016ee81a3e7a6279cc633a6adaaec5936425908c3a4299742ae1571bc1970017a058a8dedb
@@ -64,7 +64,7 @@ def parseOptions
64
64
  options[:pidentity] = 70
65
65
  options[:minlength] = 500
66
66
  options[:meta] = 0
67
-
67
+
68
68
  while x = ARGV.shift
69
69
 
70
70
  case x.downcase
@@ -113,7 +113,7 @@ if ARGV.size > 1
113
113
  abort "exiting blat is missing"
114
114
  end
115
115
 
116
- # Check Options
116
+ # Check Options
117
117
  if ! options.has_key? :refgenome and ! options.has_key? :remote_db and ! options.has_key? :external_db
118
118
  puts "You didn't provide a reference genome or a database for the annotation !"
119
119
  elsif ! options.has_key? :input
@@ -106,7 +106,46 @@ class BacterialAnnotator
106
106
  # dump foreign proteins to file
107
107
  foreign_cds_file = dump_cds
108
108
 
109
- else
109
+ # Iterate over each Ref protein and print syntheny
110
+ synteny_file = File.open("#{@outdir}/Prot-Synteny.tsv","w")
111
+ synteny_file.write("RefLocusTag\tRefProtID\tRefLength\tRefCoverage\tIdentity\tQueryGene\tQueryLength\tQueryCoverage\n")
112
+ ref_annotated = {}
113
+ @contig_annotations.each do |contig,prot_annotations|
114
+ prot_annotations.each do |key,prot|
115
+ # p key
116
+ # p prot
117
+ ref_annotated[prot[:protId]] = {key: key, length: prot[:length], pId: prot[:pId]} if prot != nil
118
+ end
119
+ end
120
+
121
+ @refgenome.coding_seq.each do |ref_k, ref_v|
122
+ gene = ""
123
+ coverage_ref = ""
124
+ coverage_query = ""
125
+ query_length = ""
126
+ pId = ""
127
+ if ref_annotated[ref_v[:protId]] != nil
128
+ gene = ref_annotated[ref_v[:protId]][:key]
129
+ coverage_ref = (ref_annotated[ref_v[:protId]][:length].to_f/ref_v[:bioseq].seq.length.to_f).round(2)
130
+ query_length = @fasta.prodigal_files[:prot_ids_length][gene]
131
+ coverage_query = (ref_annotated[ref_v[:protId]][:length].to_f/query_length.to_f).round(2)
132
+ pId = ref_annotated[ref_v[:protId]][:pId]
133
+ end
134
+
135
+ synteny_file.write(ref_v[:protId])
136
+ synteny_file.write("\t"+ref_v[:locustag])
137
+ synteny_file.write("\t"+ref_v[:bioseq].seq.length.to_s)
138
+ synteny_file.write("\t"+coverage_ref.to_s)
139
+ synteny_file.write("\t"+pId.to_s)
140
+ synteny_file.write("\t"+gene)
141
+ synteny_file.write("\t"+query_length.to_s)
142
+ synteny_file.write("\t"+coverage_query.to_s)
143
+ synteny_file.write("\n")
144
+
145
+ end
146
+ synteny_file.close
147
+
148
+ else # no reference genome
110
149
 
111
150
  # no reference genome .. will process all the CDS
112
151
  foreign_cds_file = @fasta.prodigal_files[:proteins]
@@ -122,12 +161,14 @@ class BacterialAnnotator
122
161
  puts "\nPrinting Statistics.."
123
162
  print_stats "#{@outdir}/Annotation-Stats.txt"
124
163
 
164
+
125
165
  end # end of method
126
166
 
127
167
 
128
168
  # Finishing the annotation of the remaining CDS
129
169
  def finish_annotation remaining_cds_file
130
170
 
171
+ # only finish the annotation with an external DB
131
172
  if @options.has_key? :external_db # from an external DB
132
173
 
133
174
  db_file = @options[:external_db]
@@ -20,6 +20,7 @@ class FastaManip
20
20
  @meta = meta
21
21
  @prodigal_files = nil
22
22
  @single_fasta = nil
23
+ @seq_info = nil
23
24
 
24
25
  if @fasta_flat.dbclass != Bio::FastaFormat
25
26
  abort "Aborting : The input sequence is not a fasta file !"
@@ -61,7 +62,7 @@ class FastaManip
61
62
  file_name = seq.definition.chomp.split(" ")[0]
62
63
  @prodigal_files[:contigs] << "#{file_name}"
63
64
  @prodigal_files[:contigs_length] << seq.seq.length
64
- File.open("#{outdir}/single-fasta/#{file_name}.fasta", "w") do |fwrite|
65
+ File.open("#{outdir}/single-fasta/#{file_name}.fasta", "w") do |fwrite|
65
66
  fwrite.write(seq)
66
67
  end
67
68
  @single_fasta[file_name] = seq
@@ -132,17 +133,19 @@ class FastaManip
132
133
  end
133
134
 
134
135
  end
135
-
136
+
136
137
  return outseq, sequence.length
137
138
 
138
139
  end
139
140
 
140
141
 
141
- # extract protein and gene names
142
+ # extract protein and gene names from prodigal... with contig numbering
142
143
  def extract_cds_names
143
144
 
144
145
  prot_ids = {}
146
+ prot_length = {}
145
147
  flatfile = Bio::FlatFile.auto(@prodigal_files[:proteins])
148
+
146
149
  flatfile.each_entry do |entry|
147
150
  prot_id = entry.definition.split(" ")[0]
148
151
  contig = prot_id.split("_")[0..-2].join("_")
@@ -150,6 +153,10 @@ class FastaManip
150
153
  prot_ids[contig] = []
151
154
  end
152
155
  prot_ids[contig] << prot_id
156
+
157
+ # puts "Prodigal length : " + entry.seq.length.to_s
158
+ prot_length[prot_id] = entry.seq.length-1 # minus the stop codon
159
+
153
160
  end
154
161
 
155
162
  prot_ids.each do |k,prot_array|
@@ -157,10 +164,10 @@ class FastaManip
157
164
  end
158
165
 
159
166
  @prodigal_files[:prot_ids_by_contig] = prot_ids
167
+ @prodigal_files[:prot_ids_length] = prot_length
160
168
 
161
169
  end
162
170
 
163
-
164
171
  private :extract_cds_names # :split_fasta, :split_genbank
165
172
 
166
173
  end
@@ -25,7 +25,7 @@ class GenbankManip
25
25
  flat_gbk = Bio::FlatFile.auto(@gbk_file)
26
26
 
27
27
  # Check if gbk is valid
28
- if flat_gbk.dbclass != Bio::GenBank
28
+ if flat_gbk.dbclass != Bio::GenBank
29
29
  abort "Aborting : The input #{@gbk_file} is not a valid genbank file !"
30
30
  else
31
31
  @gbk = flat_gbk.next_entry
@@ -67,10 +67,11 @@ class GenbankManip
67
67
  pepBioSeq = Bio::Sequence.auto(pep)
68
68
 
69
69
  if protId.strip == ""
70
- protId = locustag
70
+ protId = locustag
71
71
  end
72
72
 
73
- @coding_seq[protId] = {location: loc,
73
+ @coding_seq[protId] = {protId: protId,
74
+ location: loc,
74
75
  locustag: locustag,
75
76
  gene: gene[0],
76
77
  product: product[0],
@@ -113,7 +114,7 @@ class GenbankManip
113
114
 
114
115
  contig = @gbk.definition
115
116
 
116
- # iterate through
117
+ # iterate through
117
118
  @gbk.features.each_with_index do |cds, ft_index|
118
119
 
119
120
  next if cds.feature != "CDS"
@@ -150,7 +151,7 @@ class GenbankManip
150
151
 
151
152
  # check if there is a reference genome.. reference_locus shouldn't be nil in that case
152
153
  if locus != nil
153
- qNote = Bio::Feature::Qualifier.new('note', "correspond to #{locus} locus (#{pId}% identity) from #{reference_locus.entry_id}")
154
+ qNote = Bio::Feature::Qualifier.new('note', "corresponds to #{locus} locus (#{pId}% identity) from #{reference_locus.entry_id}")
154
155
  ftArray.push(qNote)
155
156
  end
156
157
 
@@ -180,7 +181,7 @@ class GenbankManip
180
181
  ###################
181
182
  # Private Methods #
182
183
  ###################
183
-
184
+
184
185
  # Fct: Get dna sequence
185
186
  def get_DNA (cds, seq)
186
187
  loc = cds.locations
@@ -208,5 +209,5 @@ class GenbankManip
208
209
 
209
210
  private :fetch_ncbi_genome, :get_DNA
210
211
 
211
-
212
+
212
213
  end # end of Class
@@ -83,7 +83,7 @@ class RemoteNCBI
83
83
  end
84
84
 
85
85
  end
86
-
86
+
87
87
  rescue
88
88
  try += 1
89
89
  puts "#{try} POST try for #{f}"
@@ -24,7 +24,7 @@ class SyntenyManip
24
24
  def run_blat root, outdir
25
25
  system("#{root}/blat.linux -out=blast8 -minIdentity=#{@pidentity} -prot #{@subject_file} #{@query_file} #{outdir}/#{@name}.blat8.tsv")
26
26
  @aln_file = "#{outdir}/#{@name}.blat8.tsv"
27
- # extract_hits
27
+ # extract_hits
28
28
  end # end of method
29
29
 
30
30
  # Extract Hit from blast8 file and save it in hash
@@ -45,21 +45,28 @@ class SyntenyManip
45
45
  next if lA[2].to_f < @pidentity
46
46
  @aln_hits[key] = {
47
47
  pId: lA[2].to_f.round(2),
48
- length: lA[3].to_i,
49
48
  evalue: lA[10],
50
49
  score: lA[11].to_f,
51
- hits: [hit]
50
+ hits: [hit],
51
+ length: [lA[3].to_i],
52
+ query_location: [[lA[6].to_i,lA[7].to_i]],
53
+ subject_location: [[lA[8].to_i,lA[9].to_i]]
52
54
  }
53
55
  elsif lA[11].to_f > @aln_hits[key][:score]
54
56
  @aln_hits[key] = {
55
57
  pId: lA[2].to_f.round(2),
56
- length: lA[3].to_i,
57
58
  evalue: lA[10],
58
59
  score: lA[11].to_f,
59
- hits: [hit]
60
+ hits: [hit],
61
+ length: [lA[3].to_i],
62
+ query_location: [[lA[6].to_i,lA[7].to_i]],
63
+ subject_location: [[lA[8].to_i,lA[9].to_i]]
60
64
  }
61
65
  elsif lA[11].to_f == @aln_hits[key][:score]
62
66
  @aln_hits[key][:hits] << hit
67
+ @aln_hits[key][:length] << lA[3].to_i
68
+ @aln_hits[key][:query_location] << [lA[6].to_i,lA[7].to_i]
69
+ @aln_hits[key][:subject_location] << [lA[8].to_i,lA[9].to_i]
63
70
  end
64
71
  end
65
72
  end
@@ -102,6 +109,7 @@ class SyntenyManip
102
109
  hit = ref_cds[h]
103
110
  annotations[p] = hit
104
111
  annotations[p][:pId] = @aln_hits[p][:pId]
112
+ annotations[p][:length] = @aln_hits[p][:length][hit_index]
105
113
  i+=1
106
114
 
107
115
  else
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bacterial-annotator
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maxime Deraspe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-27 00:00:00.000000000 Z
11
+ date: 2016-09-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bio
@@ -70,7 +70,7 @@ files:
70
70
  - lib/bacterial-annotator/synteny-manip.rb
71
71
  homepage: http://rubygems.org/gems/bacterial-annotator
72
72
  licenses:
73
- - GPLv3
73
+ - GPL-3.0
74
74
  metadata: {}
75
75
  post_install_message:
76
76
  rdoc_options: []
@@ -88,9 +88,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
88
88
  version: '0'
89
89
  requirements: []
90
90
  rubyforge_project:
91
- rubygems_version: 2.4.5
91
+ rubygems_version: 2.5.1
92
92
  signing_key:
93
93
  specification_version: 4
94
94
  summary: Bacterial Annotator
95
95
  test_files: []
96
- has_rdoc: