bacterial-annotator 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/ba_blat +60 -0
- data/bin/ba_prodigal +45 -0
- data/bin/bacterial-annotator +131 -0
- data/lib/bacterial-annotator.rb +406 -0
- data/lib/bacterial-annotator/fasta-manip.rb +166 -0
- data/lib/bacterial-annotator/genbank-manip.rb +208 -0
- data/lib/bacterial-annotator/remote-ncbi.rb +200 -0
- data/lib/bacterial-annotator/synteny-manip.rb +188 -0
- metadata +96 -0
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# author: maxime déraspe
|
|
3
|
+
# email: maxime@deraspe.net
|
|
4
|
+
# review:
|
|
5
|
+
# date: 15-02-24
|
|
6
|
+
# version: 0.0.1
|
|
7
|
+
# licence:
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class SyntenyManip
|
|
12
|
+
|
|
13
|
+
attr_reader :query_file, :subject_file, :aln_hits
|
|
14
|
+
|
|
15
|
+
def initialize query_file, subject_file, name, pidentity
|
|
16
|
+
@query_file = query_file
|
|
17
|
+
@subject_file = subject_file
|
|
18
|
+
@name = name
|
|
19
|
+
@pidentity = pidentity
|
|
20
|
+
@aln_file = nil
|
|
21
|
+
end # end of initialize
|
|
22
|
+
|
|
23
|
+
# run blat on proteins
|
|
24
|
+
def run_blat root, outdir
|
|
25
|
+
system("#{root}/blat.linux -out=blast8 -minIdentity=#{@pidentity} -prot #{@subject_file} #{@query_file} #{outdir}/#{@name}.blat8.tsv")
|
|
26
|
+
@aln_file = "#{outdir}/#{@name}.blat8.tsv"
|
|
27
|
+
# extract_hits
|
|
28
|
+
end # end of method
|
|
29
|
+
|
|
30
|
+
# Extract Hit from blast8 file and save it in hash
|
|
31
|
+
# contig-0_1 ABJ71957.1 96.92 65 2 0 1 65 1 65 9.2e-31 131.0
|
|
32
|
+
def extract_hits mode
|
|
33
|
+
|
|
34
|
+
@aln_hits = {}
|
|
35
|
+
File.open(@aln_file,"r") do |fread|
|
|
36
|
+
while l = fread.gets
|
|
37
|
+
lA = l.chomp!.split("\t")
|
|
38
|
+
key = lA[0]
|
|
39
|
+
if mode == :refgenome
|
|
40
|
+
hit = lA[1]
|
|
41
|
+
elsif mode == :externaldb
|
|
42
|
+
hit = lA[1].chomp.split("|")[1]
|
|
43
|
+
end
|
|
44
|
+
if ! @aln_hits.has_key? key
|
|
45
|
+
next if lA[2].to_f < @pidentity
|
|
46
|
+
@aln_hits[key] = {
|
|
47
|
+
pId: lA[2].to_f,
|
|
48
|
+
length: lA[3].to_i,
|
|
49
|
+
evalue: lA[10],
|
|
50
|
+
score: lA[11].to_f,
|
|
51
|
+
hits: [hit]
|
|
52
|
+
}
|
|
53
|
+
elsif lA[11].to_f > @aln_hits[key][:score]
|
|
54
|
+
@aln_hits[key] = {
|
|
55
|
+
pId: lA[2].to_f,
|
|
56
|
+
length: lA[3].to_i,
|
|
57
|
+
evalue: lA[10],
|
|
58
|
+
score: lA[11].to_f,
|
|
59
|
+
hits: [hit]
|
|
60
|
+
}
|
|
61
|
+
elsif lA[11].to_f == @aln_hits[key][:score]
|
|
62
|
+
@aln_hits[key][:hits] << hit
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
end # end of method
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
# Get the annotations for a contig for RerenceGenome
|
|
72
|
+
def get_annotation_for_contig prots_to_annotate, ref_cds
|
|
73
|
+
|
|
74
|
+
return {} if prots_to_annotate == nil
|
|
75
|
+
|
|
76
|
+
contig_to_annotate = prots_to_annotate[0].split("_")[0..-2].join("_")
|
|
77
|
+
annotations = {}
|
|
78
|
+
prots = []
|
|
79
|
+
|
|
80
|
+
@aln_hits.each_key do |k|
|
|
81
|
+
contig = k.split("_")[0..-2].join("_")
|
|
82
|
+
if contig == contig_to_annotate
|
|
83
|
+
prots << k
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# sorting the prot by their appearance in the contig
|
|
88
|
+
prots.sort! { |a,b| a.split("_")[-1].to_i <=> b.split("_")[-1].to_i }
|
|
89
|
+
|
|
90
|
+
i = 0
|
|
91
|
+
prots_to_annotate.each do |p|
|
|
92
|
+
|
|
93
|
+
if @aln_hits.has_key? p
|
|
94
|
+
|
|
95
|
+
hit_index = 0
|
|
96
|
+
|
|
97
|
+
if @aln_hits[p][:hits].length > 1
|
|
98
|
+
hit_index = choose_best_hit i, prots, ref_cds
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
h = @aln_hits[p][:hits][hit_index]
|
|
102
|
+
hit = ref_cds[h]
|
|
103
|
+
annotations[p] = hit
|
|
104
|
+
i+=1
|
|
105
|
+
|
|
106
|
+
else
|
|
107
|
+
|
|
108
|
+
annotations[p] = nil
|
|
109
|
+
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
annotations # return
|
|
115
|
+
|
|
116
|
+
end # end of method
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
# Choose Best Hit base on neighbor hits
|
|
120
|
+
def choose_best_hit i, prots, ref_cds
|
|
121
|
+
|
|
122
|
+
hit_index = 0
|
|
123
|
+
p = prots[i]
|
|
124
|
+
hit_locus_tags = []
|
|
125
|
+
|
|
126
|
+
@aln_hits[p][:hits].each do |h|
|
|
127
|
+
hit_locus_tags << ref_cds[h][:locustag].downcase.split("_")[-1].gsub(/[a-z]/,"").to_i
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
continue=true
|
|
131
|
+
offset=1
|
|
132
|
+
|
|
133
|
+
while continue
|
|
134
|
+
fwd_end = false
|
|
135
|
+
bcw_end = false
|
|
136
|
+
found = false
|
|
137
|
+
|
|
138
|
+
if (i+offset) < (prots.length-1)
|
|
139
|
+
fwd_p = prots[i+offset]
|
|
140
|
+
next_prot_hits = @aln_hits[fwd_p][:hits]
|
|
141
|
+
if next_prot_hits.length < 2
|
|
142
|
+
n = ref_cds[next_prot_hits[0]][:locustag].downcase.split("_")[-1].gsub(/[a-z]/,"").to_i
|
|
143
|
+
closest = 10000
|
|
144
|
+
current_ltag_i = 0
|
|
145
|
+
hit_locus_tags.each_with_index do |ltag,ltag_i|
|
|
146
|
+
if (ltag-n).abs < closest
|
|
147
|
+
current_ltag_i = ltag_i
|
|
148
|
+
closest = (ltag-n).abs
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
hit_index = current_ltag_i
|
|
152
|
+
found = true
|
|
153
|
+
end
|
|
154
|
+
else
|
|
155
|
+
fwd_end = true
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
if (i-offset) >= 0 and !found
|
|
159
|
+
bcw_p = prots[i-offset]
|
|
160
|
+
next_prot_hits = @aln_hits[bcw_p][:hits]
|
|
161
|
+
if next_prot_hits.length < 2
|
|
162
|
+
n = ref_cds[next_prot_hits[0]][:locustag].downcase.split("_")[-1].gsub(/[a-z]/,"").to_i
|
|
163
|
+
closest = 10000
|
|
164
|
+
current_ltag_i = 0
|
|
165
|
+
hit_locus_tags.each_with_index do |ltag,ltag_i|
|
|
166
|
+
if (ltag-n).abs < closest
|
|
167
|
+
current_ltag_i = ltag_i
|
|
168
|
+
closest = (ltag-n).abs
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
hit_index = current_ltag_i
|
|
172
|
+
found = true
|
|
173
|
+
end
|
|
174
|
+
else
|
|
175
|
+
bcw_end = true
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
offset += 1
|
|
179
|
+
continue = (!fwd_end and !bcw_end and !found)
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
hit_index
|
|
183
|
+
|
|
184
|
+
end # end of method
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
end # end of class
|
metadata
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: bacterial-annotator
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.0.1
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Maxime Deraspe
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2015-02-24 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: bio
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - "~>"
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '1.4'
|
|
20
|
+
- - ">="
|
|
21
|
+
- !ruby/object:Gem::Version
|
|
22
|
+
version: 1.4.3
|
|
23
|
+
type: :runtime
|
|
24
|
+
prerelease: false
|
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
26
|
+
requirements:
|
|
27
|
+
- - "~>"
|
|
28
|
+
- !ruby/object:Gem::Version
|
|
29
|
+
version: '1.4'
|
|
30
|
+
- - ">="
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: 1.4.3
|
|
33
|
+
- !ruby/object:Gem::Dependency
|
|
34
|
+
name: mechanize
|
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
|
36
|
+
requirements:
|
|
37
|
+
- - "~>"
|
|
38
|
+
- !ruby/object:Gem::Version
|
|
39
|
+
version: '2.7'
|
|
40
|
+
- - ">="
|
|
41
|
+
- !ruby/object:Gem::Version
|
|
42
|
+
version: 2.7.3
|
|
43
|
+
type: :runtime
|
|
44
|
+
prerelease: false
|
|
45
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
46
|
+
requirements:
|
|
47
|
+
- - "~>"
|
|
48
|
+
- !ruby/object:Gem::Version
|
|
49
|
+
version: '2.7'
|
|
50
|
+
- - ">="
|
|
51
|
+
- !ruby/object:Gem::Version
|
|
52
|
+
version: 2.7.3
|
|
53
|
+
description: Annotate bacterial genomes from a draft or complete genome based on a
|
|
54
|
+
reference genome.
|
|
55
|
+
email: maxime@deraspe.net
|
|
56
|
+
executables:
|
|
57
|
+
- bacterial-annotator
|
|
58
|
+
- ba_prodigal
|
|
59
|
+
- ba_blat
|
|
60
|
+
extensions: []
|
|
61
|
+
extra_rdoc_files: []
|
|
62
|
+
files:
|
|
63
|
+
- bin/ba_blat
|
|
64
|
+
- bin/ba_prodigal
|
|
65
|
+
- bin/bacterial-annotator
|
|
66
|
+
- lib/bacterial-annotator.rb
|
|
67
|
+
- lib/bacterial-annotator/fasta-manip.rb
|
|
68
|
+
- lib/bacterial-annotator/genbank-manip.rb
|
|
69
|
+
- lib/bacterial-annotator/remote-ncbi.rb
|
|
70
|
+
- lib/bacterial-annotator/synteny-manip.rb
|
|
71
|
+
homepage: http://rubygems.org/gems/bacterial-annotator
|
|
72
|
+
licenses:
|
|
73
|
+
- GPLv3
|
|
74
|
+
metadata: {}
|
|
75
|
+
post_install_message:
|
|
76
|
+
rdoc_options: []
|
|
77
|
+
require_paths:
|
|
78
|
+
- lib
|
|
79
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
80
|
+
requirements:
|
|
81
|
+
- - ">="
|
|
82
|
+
- !ruby/object:Gem::Version
|
|
83
|
+
version: '0'
|
|
84
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
85
|
+
requirements:
|
|
86
|
+
- - ">="
|
|
87
|
+
- !ruby/object:Gem::Version
|
|
88
|
+
version: '0'
|
|
89
|
+
requirements: []
|
|
90
|
+
rubyforge_project:
|
|
91
|
+
rubygems_version: 2.4.5
|
|
92
|
+
signing_key:
|
|
93
|
+
specification_version: 4
|
|
94
|
+
summary: Bacterial Annotator
|
|
95
|
+
test_files: []
|
|
96
|
+
has_rdoc:
|