bacterial-annotator 0.1.7 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/bacterial-annotator +2 -2
- data/lib/bacterial-annotator.rb +42 -1
- data/lib/bacterial-annotator/fasta-manip.rb +11 -4
- data/lib/bacterial-annotator/genbank-manip.rb +8 -7
- data/lib/bacterial-annotator/remote-ncbi.rb +1 -1
- data/lib/bacterial-annotator/synteny-manip.rb +13 -5
- metadata +4 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4dbc3b5e016a3f24ba82ab525a0c8daa9617afec
|
4
|
+
data.tar.gz: 7c8ba281632d84d7131cf1a27e143f596e76f414
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 73f72513dc9964c46c73ef39477a9b5148b010d68449ff270f5b15f556a5e56e027a9ddb80149b2ce44e12c09466ac11267c118c97c8ea8892eb582d220588cf
|
7
|
+
data.tar.gz: 0a92c0ceec70a12500a8dcf4433d76eff7edf2e4fe7ca180151efc016ee81a3e7a6279cc633a6adaaec5936425908c3a4299742ae1571bc1970017a058a8dedb
|
data/bin/bacterial-annotator
CHANGED
@@ -64,7 +64,7 @@ def parseOptions
|
|
64
64
|
options[:pidentity] = 70
|
65
65
|
options[:minlength] = 500
|
66
66
|
options[:meta] = 0
|
67
|
-
|
67
|
+
|
68
68
|
while x = ARGV.shift
|
69
69
|
|
70
70
|
case x.downcase
|
@@ -113,7 +113,7 @@ if ARGV.size > 1
|
|
113
113
|
abort "exiting blat is missing"
|
114
114
|
end
|
115
115
|
|
116
|
-
# Check Options
|
116
|
+
# Check Options
|
117
117
|
if ! options.has_key? :refgenome and ! options.has_key? :remote_db and ! options.has_key? :external_db
|
118
118
|
puts "You didn't provide a reference genome or a database for the annotation !"
|
119
119
|
elsif ! options.has_key? :input
|
data/lib/bacterial-annotator.rb
CHANGED
@@ -106,7 +106,46 @@ class BacterialAnnotator
|
|
106
106
|
# dump foreign proteins to file
|
107
107
|
foreign_cds_file = dump_cds
|
108
108
|
|
109
|
-
|
109
|
+
# Iterate over each Ref protein and print syntheny
|
110
|
+
synteny_file = File.open("#{@outdir}/Prot-Synteny.tsv","w")
|
111
|
+
synteny_file.write("RefLocusTag\tRefProtID\tRefLength\tRefCoverage\tIdentity\tQueryGene\tQueryLength\tQueryCoverage\n")
|
112
|
+
ref_annotated = {}
|
113
|
+
@contig_annotations.each do |contig,prot_annotations|
|
114
|
+
prot_annotations.each do |key,prot|
|
115
|
+
# p key
|
116
|
+
# p prot
|
117
|
+
ref_annotated[prot[:protId]] = {key: key, length: prot[:length], pId: prot[:pId]} if prot != nil
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
@refgenome.coding_seq.each do |ref_k, ref_v|
|
122
|
+
gene = ""
|
123
|
+
coverage_ref = ""
|
124
|
+
coverage_query = ""
|
125
|
+
query_length = ""
|
126
|
+
pId = ""
|
127
|
+
if ref_annotated[ref_v[:protId]] != nil
|
128
|
+
gene = ref_annotated[ref_v[:protId]][:key]
|
129
|
+
coverage_ref = (ref_annotated[ref_v[:protId]][:length].to_f/ref_v[:bioseq].seq.length.to_f).round(2)
|
130
|
+
query_length = @fasta.prodigal_files[:prot_ids_length][gene]
|
131
|
+
coverage_query = (ref_annotated[ref_v[:protId]][:length].to_f/query_length.to_f).round(2)
|
132
|
+
pId = ref_annotated[ref_v[:protId]][:pId]
|
133
|
+
end
|
134
|
+
|
135
|
+
synteny_file.write(ref_v[:protId])
|
136
|
+
synteny_file.write("\t"+ref_v[:locustag])
|
137
|
+
synteny_file.write("\t"+ref_v[:bioseq].seq.length.to_s)
|
138
|
+
synteny_file.write("\t"+coverage_ref.to_s)
|
139
|
+
synteny_file.write("\t"+pId.to_s)
|
140
|
+
synteny_file.write("\t"+gene)
|
141
|
+
synteny_file.write("\t"+query_length.to_s)
|
142
|
+
synteny_file.write("\t"+coverage_query.to_s)
|
143
|
+
synteny_file.write("\n")
|
144
|
+
|
145
|
+
end
|
146
|
+
synteny_file.close
|
147
|
+
|
148
|
+
else # no reference genome
|
110
149
|
|
111
150
|
# no reference genome .. will process all the CDS
|
112
151
|
foreign_cds_file = @fasta.prodigal_files[:proteins]
|
@@ -122,12 +161,14 @@ class BacterialAnnotator
|
|
122
161
|
puts "\nPrinting Statistics.."
|
123
162
|
print_stats "#{@outdir}/Annotation-Stats.txt"
|
124
163
|
|
164
|
+
|
125
165
|
end # end of method
|
126
166
|
|
127
167
|
|
128
168
|
# Finishing the annotation of the remaining CDS
|
129
169
|
def finish_annotation remaining_cds_file
|
130
170
|
|
171
|
+
# only finish the annotation with an external DB
|
131
172
|
if @options.has_key? :external_db # from an external DB
|
132
173
|
|
133
174
|
db_file = @options[:external_db]
|
@@ -20,6 +20,7 @@ class FastaManip
|
|
20
20
|
@meta = meta
|
21
21
|
@prodigal_files = nil
|
22
22
|
@single_fasta = nil
|
23
|
+
@seq_info = nil
|
23
24
|
|
24
25
|
if @fasta_flat.dbclass != Bio::FastaFormat
|
25
26
|
abort "Aborting : The input sequence is not a fasta file !"
|
@@ -61,7 +62,7 @@ class FastaManip
|
|
61
62
|
file_name = seq.definition.chomp.split(" ")[0]
|
62
63
|
@prodigal_files[:contigs] << "#{file_name}"
|
63
64
|
@prodigal_files[:contigs_length] << seq.seq.length
|
64
|
-
File.open("#{outdir}/single-fasta/#{file_name}.fasta", "w") do |fwrite|
|
65
|
+
File.open("#{outdir}/single-fasta/#{file_name}.fasta", "w") do |fwrite|
|
65
66
|
fwrite.write(seq)
|
66
67
|
end
|
67
68
|
@single_fasta[file_name] = seq
|
@@ -132,17 +133,19 @@ class FastaManip
|
|
132
133
|
end
|
133
134
|
|
134
135
|
end
|
135
|
-
|
136
|
+
|
136
137
|
return outseq, sequence.length
|
137
138
|
|
138
139
|
end
|
139
140
|
|
140
141
|
|
141
|
-
# extract protein and gene names
|
142
|
+
# extract protein and gene names from prodigal... with contig numbering
|
142
143
|
def extract_cds_names
|
143
144
|
|
144
145
|
prot_ids = {}
|
146
|
+
prot_length = {}
|
145
147
|
flatfile = Bio::FlatFile.auto(@prodigal_files[:proteins])
|
148
|
+
|
146
149
|
flatfile.each_entry do |entry|
|
147
150
|
prot_id = entry.definition.split(" ")[0]
|
148
151
|
contig = prot_id.split("_")[0..-2].join("_")
|
@@ -150,6 +153,10 @@ class FastaManip
|
|
150
153
|
prot_ids[contig] = []
|
151
154
|
end
|
152
155
|
prot_ids[contig] << prot_id
|
156
|
+
|
157
|
+
# puts "Prodigal length : " + entry.seq.length.to_s
|
158
|
+
prot_length[prot_id] = entry.seq.length-1 # minus the stop codon
|
159
|
+
|
153
160
|
end
|
154
161
|
|
155
162
|
prot_ids.each do |k,prot_array|
|
@@ -157,10 +164,10 @@ class FastaManip
|
|
157
164
|
end
|
158
165
|
|
159
166
|
@prodigal_files[:prot_ids_by_contig] = prot_ids
|
167
|
+
@prodigal_files[:prot_ids_length] = prot_length
|
160
168
|
|
161
169
|
end
|
162
170
|
|
163
|
-
|
164
171
|
private :extract_cds_names # :split_fasta, :split_genbank
|
165
172
|
|
166
173
|
end
|
@@ -25,7 +25,7 @@ class GenbankManip
|
|
25
25
|
flat_gbk = Bio::FlatFile.auto(@gbk_file)
|
26
26
|
|
27
27
|
# Check if gbk is valid
|
28
|
-
if flat_gbk.dbclass != Bio::GenBank
|
28
|
+
if flat_gbk.dbclass != Bio::GenBank
|
29
29
|
abort "Aborting : The input #{@gbk_file} is not a valid genbank file !"
|
30
30
|
else
|
31
31
|
@gbk = flat_gbk.next_entry
|
@@ -67,10 +67,11 @@ class GenbankManip
|
|
67
67
|
pepBioSeq = Bio::Sequence.auto(pep)
|
68
68
|
|
69
69
|
if protId.strip == ""
|
70
|
-
protId = locustag
|
70
|
+
protId = locustag
|
71
71
|
end
|
72
72
|
|
73
|
-
@coding_seq[protId] = {
|
73
|
+
@coding_seq[protId] = {protId: protId,
|
74
|
+
location: loc,
|
74
75
|
locustag: locustag,
|
75
76
|
gene: gene[0],
|
76
77
|
product: product[0],
|
@@ -113,7 +114,7 @@ class GenbankManip
|
|
113
114
|
|
114
115
|
contig = @gbk.definition
|
115
116
|
|
116
|
-
# iterate through
|
117
|
+
# iterate through
|
117
118
|
@gbk.features.each_with_index do |cds, ft_index|
|
118
119
|
|
119
120
|
next if cds.feature != "CDS"
|
@@ -150,7 +151,7 @@ class GenbankManip
|
|
150
151
|
|
151
152
|
# check if there is a reference genome.. reference_locus shouldn't be nil in that case
|
152
153
|
if locus != nil
|
153
|
-
qNote = Bio::Feature::Qualifier.new('note', "
|
154
|
+
qNote = Bio::Feature::Qualifier.new('note', "corresponds to #{locus} locus (#{pId}% identity) from #{reference_locus.entry_id}")
|
154
155
|
ftArray.push(qNote)
|
155
156
|
end
|
156
157
|
|
@@ -180,7 +181,7 @@ class GenbankManip
|
|
180
181
|
###################
|
181
182
|
# Private Methods #
|
182
183
|
###################
|
183
|
-
|
184
|
+
|
184
185
|
# Fct: Get dna sequence
|
185
186
|
def get_DNA (cds, seq)
|
186
187
|
loc = cds.locations
|
@@ -208,5 +209,5 @@ class GenbankManip
|
|
208
209
|
|
209
210
|
private :fetch_ncbi_genome, :get_DNA
|
210
211
|
|
211
|
-
|
212
|
+
|
212
213
|
end # end of Class
|
@@ -24,7 +24,7 @@ class SyntenyManip
|
|
24
24
|
def run_blat root, outdir
|
25
25
|
system("#{root}/blat.linux -out=blast8 -minIdentity=#{@pidentity} -prot #{@subject_file} #{@query_file} #{outdir}/#{@name}.blat8.tsv")
|
26
26
|
@aln_file = "#{outdir}/#{@name}.blat8.tsv"
|
27
|
-
# extract_hits
|
27
|
+
# extract_hits
|
28
28
|
end # end of method
|
29
29
|
|
30
30
|
# Extract Hit from blast8 file and save it in hash
|
@@ -45,21 +45,28 @@ class SyntenyManip
|
|
45
45
|
next if lA[2].to_f < @pidentity
|
46
46
|
@aln_hits[key] = {
|
47
47
|
pId: lA[2].to_f.round(2),
|
48
|
-
length: lA[3].to_i,
|
49
48
|
evalue: lA[10],
|
50
49
|
score: lA[11].to_f,
|
51
|
-
hits: [hit]
|
50
|
+
hits: [hit],
|
51
|
+
length: [lA[3].to_i],
|
52
|
+
query_location: [[lA[6].to_i,lA[7].to_i]],
|
53
|
+
subject_location: [[lA[8].to_i,lA[9].to_i]]
|
52
54
|
}
|
53
55
|
elsif lA[11].to_f > @aln_hits[key][:score]
|
54
56
|
@aln_hits[key] = {
|
55
57
|
pId: lA[2].to_f.round(2),
|
56
|
-
length: lA[3].to_i,
|
57
58
|
evalue: lA[10],
|
58
59
|
score: lA[11].to_f,
|
59
|
-
hits: [hit]
|
60
|
+
hits: [hit],
|
61
|
+
length: [lA[3].to_i],
|
62
|
+
query_location: [[lA[6].to_i,lA[7].to_i]],
|
63
|
+
subject_location: [[lA[8].to_i,lA[9].to_i]]
|
60
64
|
}
|
61
65
|
elsif lA[11].to_f == @aln_hits[key][:score]
|
62
66
|
@aln_hits[key][:hits] << hit
|
67
|
+
@aln_hits[key][:length] << lA[3].to_i
|
68
|
+
@aln_hits[key][:query_location] << [lA[6].to_i,lA[7].to_i]
|
69
|
+
@aln_hits[key][:subject_location] << [lA[8].to_i,lA[9].to_i]
|
63
70
|
end
|
64
71
|
end
|
65
72
|
end
|
@@ -102,6 +109,7 @@ class SyntenyManip
|
|
102
109
|
hit = ref_cds[h]
|
103
110
|
annotations[p] = hit
|
104
111
|
annotations[p][:pId] = @aln_hits[p][:pId]
|
112
|
+
annotations[p][:length] = @aln_hits[p][:length][hit_index]
|
105
113
|
i+=1
|
106
114
|
|
107
115
|
else
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bacterial-annotator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maxime Deraspe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-09-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bio
|
@@ -70,7 +70,7 @@ files:
|
|
70
70
|
- lib/bacterial-annotator/synteny-manip.rb
|
71
71
|
homepage: http://rubygems.org/gems/bacterial-annotator
|
72
72
|
licenses:
|
73
|
-
-
|
73
|
+
- GPL-3.0
|
74
74
|
metadata: {}
|
75
75
|
post_install_message:
|
76
76
|
rdoc_options: []
|
@@ -88,9 +88,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
88
88
|
version: '0'
|
89
89
|
requirements: []
|
90
90
|
rubyforge_project:
|
91
|
-
rubygems_version: 2.
|
91
|
+
rubygems_version: 2.5.1
|
92
92
|
signing_key:
|
93
93
|
specification_version: 4
|
94
94
|
summary: Bacterial Annotator
|
95
95
|
test_files: []
|
96
|
-
has_rdoc:
|