bacterial-annotator 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,188 @@
1
+ # -*- coding: utf-8 -*-
2
+ # author: maxime déraspe
3
+ # email: maxime@deraspe.net
4
+ # review:
5
+ # date: 15-02-24
6
+ # version: 0.0.1
7
+ # licence:
8
+
9
+
10
+
11
+ class SyntenyManip
12
+
13
+ attr_reader :query_file, :subject_file, :aln_hits
14
+
15
+ def initialize query_file, subject_file, name, pidentity
16
+ @query_file = query_file
17
+ @subject_file = subject_file
18
+ @name = name
19
+ @pidentity = pidentity
20
+ @aln_file = nil
21
+ end # end of initialize
22
+
23
+ # run blat on proteins
24
+ def run_blat root, outdir
25
+ system("#{root}/blat.linux -out=blast8 -minIdentity=#{@pidentity} -prot #{@subject_file} #{@query_file} #{outdir}/#{@name}.blat8.tsv")
26
+ @aln_file = "#{outdir}/#{@name}.blat8.tsv"
27
+ # extract_hits
28
+ end # end of method
29
+
30
+ # Extract Hit from blast8 file and save it in hash
31
+ # contig-0_1 ABJ71957.1 96.92 65 2 0 1 65 1 65 9.2e-31 131.0
32
+ def extract_hits mode
33
+
34
+ @aln_hits = {}
35
+ File.open(@aln_file,"r") do |fread|
36
+ while l = fread.gets
37
+ lA = l.chomp!.split("\t")
38
+ key = lA[0]
39
+ if mode == :refgenome
40
+ hit = lA[1]
41
+ elsif mode == :externaldb
42
+ hit = lA[1].chomp.split("|")[1]
43
+ end
44
+ if ! @aln_hits.has_key? key
45
+ next if lA[2].to_f < @pidentity
46
+ @aln_hits[key] = {
47
+ pId: lA[2].to_f,
48
+ length: lA[3].to_i,
49
+ evalue: lA[10],
50
+ score: lA[11].to_f,
51
+ hits: [hit]
52
+ }
53
+ elsif lA[11].to_f > @aln_hits[key][:score]
54
+ @aln_hits[key] = {
55
+ pId: lA[2].to_f,
56
+ length: lA[3].to_i,
57
+ evalue: lA[10],
58
+ score: lA[11].to_f,
59
+ hits: [hit]
60
+ }
61
+ elsif lA[11].to_f == @aln_hits[key][:score]
62
+ @aln_hits[key][:hits] << hit
63
+ end
64
+ end
65
+ end
66
+
67
+ end # end of method
68
+
69
+
70
+
71
+ # Get the annotations for a contig for RerenceGenome
72
+ def get_annotation_for_contig prots_to_annotate, ref_cds
73
+
74
+ return {} if prots_to_annotate == nil
75
+
76
+ contig_to_annotate = prots_to_annotate[0].split("_")[0..-2].join("_")
77
+ annotations = {}
78
+ prots = []
79
+
80
+ @aln_hits.each_key do |k|
81
+ contig = k.split("_")[0..-2].join("_")
82
+ if contig == contig_to_annotate
83
+ prots << k
84
+ end
85
+ end
86
+
87
+ # sorting the prot by their appearance in the contig
88
+ prots.sort! { |a,b| a.split("_")[-1].to_i <=> b.split("_")[-1].to_i }
89
+
90
+ i = 0
91
+ prots_to_annotate.each do |p|
92
+
93
+ if @aln_hits.has_key? p
94
+
95
+ hit_index = 0
96
+
97
+ if @aln_hits[p][:hits].length > 1
98
+ hit_index = choose_best_hit i, prots, ref_cds
99
+ end
100
+
101
+ h = @aln_hits[p][:hits][hit_index]
102
+ hit = ref_cds[h]
103
+ annotations[p] = hit
104
+ i+=1
105
+
106
+ else
107
+
108
+ annotations[p] = nil
109
+
110
+ end
111
+
112
+ end
113
+
114
+ annotations # return
115
+
116
+ end # end of method
117
+
118
+
119
+ # Choose Best Hit base on neighbor hits
120
+ def choose_best_hit i, prots, ref_cds
121
+
122
+ hit_index = 0
123
+ p = prots[i]
124
+ hit_locus_tags = []
125
+
126
+ @aln_hits[p][:hits].each do |h|
127
+ hit_locus_tags << ref_cds[h][:locustag].downcase.split("_")[-1].gsub(/[a-z]/,"").to_i
128
+ end
129
+
130
+ continue=true
131
+ offset=1
132
+
133
+ while continue
134
+ fwd_end = false
135
+ bcw_end = false
136
+ found = false
137
+
138
+ if (i+offset) < (prots.length-1)
139
+ fwd_p = prots[i+offset]
140
+ next_prot_hits = @aln_hits[fwd_p][:hits]
141
+ if next_prot_hits.length < 2
142
+ n = ref_cds[next_prot_hits[0]][:locustag].downcase.split("_")[-1].gsub(/[a-z]/,"").to_i
143
+ closest = 10000
144
+ current_ltag_i = 0
145
+ hit_locus_tags.each_with_index do |ltag,ltag_i|
146
+ if (ltag-n).abs < closest
147
+ current_ltag_i = ltag_i
148
+ closest = (ltag-n).abs
149
+ end
150
+ end
151
+ hit_index = current_ltag_i
152
+ found = true
153
+ end
154
+ else
155
+ fwd_end = true
156
+ end
157
+
158
+ if (i-offset) >= 0 and !found
159
+ bcw_p = prots[i-offset]
160
+ next_prot_hits = @aln_hits[bcw_p][:hits]
161
+ if next_prot_hits.length < 2
162
+ n = ref_cds[next_prot_hits[0]][:locustag].downcase.split("_")[-1].gsub(/[a-z]/,"").to_i
163
+ closest = 10000
164
+ current_ltag_i = 0
165
+ hit_locus_tags.each_with_index do |ltag,ltag_i|
166
+ if (ltag-n).abs < closest
167
+ current_ltag_i = ltag_i
168
+ closest = (ltag-n).abs
169
+ end
170
+ end
171
+ hit_index = current_ltag_i
172
+ found = true
173
+ end
174
+ else
175
+ bcw_end = true
176
+ end
177
+
178
+ offset += 1
179
+ continue = (!fwd_end and !bcw_end and !found)
180
+ end
181
+
182
+ hit_index
183
+
184
+ end # end of method
185
+
186
+
187
+
188
+ end # end of class
metadata ADDED
@@ -0,0 +1,96 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bacterial-annotator
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Maxime Deraspe
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-02-24 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bio
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.4'
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 1.4.3
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: '1.4'
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 1.4.3
33
+ - !ruby/object:Gem::Dependency
34
+ name: mechanize
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '2.7'
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: 2.7.3
43
+ type: :runtime
44
+ prerelease: false
45
+ version_requirements: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - "~>"
48
+ - !ruby/object:Gem::Version
49
+ version: '2.7'
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: 2.7.3
53
+ description: Annotate bacterial genomes from a draft or complete genome based on a
54
+ reference genome.
55
+ email: maxime@deraspe.net
56
+ executables:
57
+ - bacterial-annotator
58
+ - ba_prodigal
59
+ - ba_blat
60
+ extensions: []
61
+ extra_rdoc_files: []
62
+ files:
63
+ - bin/ba_blat
64
+ - bin/ba_prodigal
65
+ - bin/bacterial-annotator
66
+ - lib/bacterial-annotator.rb
67
+ - lib/bacterial-annotator/fasta-manip.rb
68
+ - lib/bacterial-annotator/genbank-manip.rb
69
+ - lib/bacterial-annotator/remote-ncbi.rb
70
+ - lib/bacterial-annotator/synteny-manip.rb
71
+ homepage: http://rubygems.org/gems/bacterial-annotator
72
+ licenses:
73
+ - GPLv3
74
+ metadata: {}
75
+ post_install_message:
76
+ rdoc_options: []
77
+ require_paths:
78
+ - lib
79
+ required_ruby_version: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
84
+ required_rubygems_version: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: '0'
89
+ requirements: []
90
+ rubyforge_project:
91
+ rubygems_version: 2.4.5
92
+ signing_key:
93
+ specification_version: 4
94
+ summary: Bacterial Annotator
95
+ test_files: []
96
+ has_rdoc: