bacterial-annotator 0.7.0 → 0.7.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/bacterial-annotator +39 -29
- data/lib/bacterial-annotator/sequence-annotation.rb +209 -30
- data/lib/bacterial-annotator/sequence-fasta.rb +21 -18
- data/lib/bacterial-annotator/sequence-synteny.rb +77 -20
- data/lib/bacterial-annotator.rb +201 -64
- data/lib/bacterial-comparator.rb +42 -26
- data/lib/bacterial-identificator.rb +86 -13
- metadata +3 -3
@@ -10,6 +10,9 @@ require 'bio'
|
|
10
10
|
require 'fileutils'
|
11
11
|
require 'parallel'
|
12
12
|
require 'helper'
|
13
|
+
require 'json'
|
14
|
+
require 'pp'
|
15
|
+
|
13
16
|
|
14
17
|
class BacterialIdentificator
|
15
18
|
|
@@ -20,15 +23,27 @@ class BacterialIdentificator
|
|
20
23
|
def initialize options, root
|
21
24
|
|
22
25
|
@root = root
|
23
|
-
@
|
24
|
-
@
|
26
|
+
@mash_file = options[:mash_file]
|
27
|
+
@genome_list = options[:genome_list]
|
25
28
|
@proc = options[:proc].to_i
|
26
|
-
|
29
|
+
@output=options[:output]
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
def run_identification
|
35
|
+
|
36
|
+
@genome_hits = {}
|
37
|
+
@genome_list.each do |g|
|
38
|
+
@genome_hits[g] = []
|
39
|
+
end
|
27
40
|
|
28
|
-
@
|
29
|
-
mash_genome g
|
41
|
+
Parallel.map(@genome_list, in_threads: @proc) do |g|
|
42
|
+
@genome_hits[g] = mash_genome g
|
30
43
|
end
|
31
44
|
|
45
|
+
print_output
|
46
|
+
|
32
47
|
end
|
33
48
|
|
34
49
|
|
@@ -37,28 +52,86 @@ class BacterialIdentificator
|
|
37
52
|
# Reference-ID, Query-ID, Mash-distance, P-value, and Matching-hashes
|
38
53
|
# fields = ["hit","query","distance","pvalue","match"]
|
39
54
|
|
40
|
-
results_raw = `#{@root}/mash.linux dist #{@
|
55
|
+
results_raw = `#{@root}/mash.linux dist #{@mash_file} #{genome}`
|
41
56
|
results = []
|
42
57
|
|
43
58
|
results_raw.split("\n").each do |l|
|
44
59
|
lA = l.chomp.split("\t")
|
45
60
|
next if lA[-1].split("/")[0] == '0' # no match
|
46
|
-
results << lA
|
61
|
+
results << (lA[0..0] + lA[2..-1])
|
62
|
+
end
|
63
|
+
|
64
|
+
results_sorted = results.sort {|a,b| a[1] <=> b[1]}
|
65
|
+
|
66
|
+
return results_sorted
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
# consensus species model
|
71
|
+
def consensus_reference
|
72
|
+
|
73
|
+
all_hits = {}
|
74
|
+
@genome_hits.each do |g, hits|
|
75
|
+
hits.each do |h|
|
76
|
+
score = h[3].split("/")[0].to_i
|
77
|
+
if ! all_hits.has_key? h[0]
|
78
|
+
all_hits[h[0]] = score
|
79
|
+
else
|
80
|
+
all_hits[h[0]] += score
|
81
|
+
end
|
82
|
+
end
|
47
83
|
end
|
84
|
+
return all_hits.sort_by { |k,v| v }.to_h
|
85
|
+
|
86
|
+
end
|
48
87
|
|
49
|
-
|
88
|
+
# print json
|
89
|
+
def print_output
|
50
90
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
91
|
+
case @output.downcase
|
92
|
+
when "csv"
|
93
|
+
@genome_hits.each do |g, hits|
|
94
|
+
hits.each do |h|
|
95
|
+
puts "#{g},#{h.join(',')}"
|
96
|
+
end
|
97
|
+
end
|
98
|
+
when "json"
|
99
|
+
new_genome_hits = {}
|
100
|
+
@genome_hits.each do |g, hits|
|
101
|
+
new_genome_hits[g] = []
|
102
|
+
hits.each do |h|
|
103
|
+
new_genome_hits[g].push(Hash[["hit","distance","e-value","score"].zip(h)])
|
104
|
+
end
|
105
|
+
end
|
106
|
+
puts JSON.pretty_generate({genomes: new_genome_hits, summary: summary})
|
107
|
+
else
|
108
|
+
@genome_hits.each do |g, hits|
|
109
|
+
hits.each do |h|
|
110
|
+
out = h.join("\t")
|
111
|
+
puts "#{g}\t#{out}"
|
112
|
+
end
|
55
113
|
end
|
56
114
|
end
|
57
115
|
|
58
116
|
end
|
59
117
|
|
118
|
+
def summary
|
60
119
|
|
120
|
+
genome_hit_association = {}
|
61
121
|
|
122
|
+
@genome_hits.each do |g, hits|
|
123
|
+
genome_hit_association[hits[0][0]] = 0 if ! genome_hit_association.has_key? hits[0][0]
|
124
|
+
genome_hit_association[hits[0][0]] += 1
|
125
|
+
end
|
126
|
+
|
127
|
+
population = {
|
128
|
+
consensus: consensus_reference.first[0],
|
129
|
+
genome_hits: genome_hit_association
|
130
|
+
}
|
131
|
+
|
132
|
+
return population
|
133
|
+
|
134
|
+
end
|
62
135
|
|
63
|
-
end
|
64
136
|
|
137
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bacterial-annotator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maxime Deraspe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-04-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bio
|
@@ -124,7 +124,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
124
124
|
version: '0'
|
125
125
|
requirements: []
|
126
126
|
rubyforge_project:
|
127
|
-
rubygems_version: 2.
|
127
|
+
rubygems_version: 2.6.14
|
128
128
|
signing_key:
|
129
129
|
specification_version: 4
|
130
130
|
summary: Bacterial Annotator
|