bacterial-annotator 0.7.0 → 0.7.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -10,6 +10,9 @@ require 'bio'
10
10
  require 'fileutils'
11
11
  require 'parallel'
12
12
  require 'helper'
13
+ require 'json'
14
+ require 'pp'
15
+
13
16
 
14
17
  class BacterialIdentificator
15
18
 
@@ -20,15 +23,27 @@ class BacterialIdentificator
20
23
  def initialize options, root
21
24
 
22
25
  @root = root
23
- @db_path = options[:database]
24
- @genomes_list = options[:genomes_list]
26
+ @mash_file = options[:mash_file]
27
+ @genome_list = options[:genome_list]
25
28
  @proc = options[:proc].to_i
26
- p @genomes_list
29
+ @output=options[:output]
30
+
31
+ end
32
+
33
+
34
+ def run_identification
35
+
36
+ @genome_hits = {}
37
+ @genome_list.each do |g|
38
+ @genome_hits[g] = []
39
+ end
27
40
 
28
- @genomes_list.each do |g|
29
- mash_genome g
41
+ Parallel.map(@genome_list, in_threads: @proc) do |g|
42
+ @genome_hits[g] = mash_genome g
30
43
  end
31
44
 
45
+ print_output
46
+
32
47
  end
33
48
 
34
49
 
@@ -37,28 +52,86 @@ class BacterialIdentificator
37
52
  # Reference-ID, Query-ID, Mash-distance, P-value, and Matching-hashes
38
53
  # fields = ["hit","query","distance","pvalue","match"]
39
54
 
40
- results_raw = `#{@root}/mash.linux dist #{@db_path}/species-sequences.msh #{genome}`
55
+ results_raw = `#{@root}/mash.linux dist #{@mash_file} #{genome}`
41
56
  results = []
42
57
 
43
58
  results_raw.split("\n").each do |l|
44
59
  lA = l.chomp.split("\t")
45
60
  next if lA[-1].split("/")[0] == '0' # no match
46
- results << lA
61
+ results << (lA[0..0] + lA[2..-1])
62
+ end
63
+
64
+ results_sorted = results.sort {|a,b| a[1] <=> b[1]}
65
+
66
+ return results_sorted
67
+
68
+ end
69
+
70
+ # consensus species model
71
+ def consensus_reference
72
+
73
+ all_hits = {}
74
+ @genome_hits.each do |g, hits|
75
+ hits.each do |h|
76
+ score = h[3].split("/")[0].to_i
77
+ if ! all_hits.has_key? h[0]
78
+ all_hits[h[0]] = score
79
+ else
80
+ all_hits[h[0]] += score
81
+ end
82
+ end
47
83
  end
84
+ return all_hits.sort_by { |k,v| v }.to_h
85
+
86
+ end
48
87
 
49
- results_sorted = results.sort {|a,b| a[2] <=> b[2]}
88
+ # print json
89
+ def print_output
50
90
 
51
- File.open("#{genome}.msh_dist", "w") do |fout|
52
- results_sorted.each do |f|
53
- fout.write(f.join("\t"))
54
- fout.write("\n")
91
+ case @output.downcase
92
+ when "csv"
93
+ @genome_hits.each do |g, hits|
94
+ hits.each do |h|
95
+ puts "#{g},#{h.join(',')}"
96
+ end
97
+ end
98
+ when "json"
99
+ new_genome_hits = {}
100
+ @genome_hits.each do |g, hits|
101
+ new_genome_hits[g] = []
102
+ hits.each do |h|
103
+ new_genome_hits[g].push(Hash[["hit","distance","e-value","score"].zip(h)])
104
+ end
105
+ end
106
+ puts JSON.pretty_generate({genomes: new_genome_hits, summary: summary})
107
+ else
108
+ @genome_hits.each do |g, hits|
109
+ hits.each do |h|
110
+ out = h.join("\t")
111
+ puts "#{g}\t#{out}"
112
+ end
55
113
  end
56
114
  end
57
115
 
58
116
  end
59
117
 
118
+ def summary
60
119
 
120
+ genome_hit_association = {}
61
121
 
122
+ @genome_hits.each do |g, hits|
123
+ genome_hit_association[hits[0][0]] = 0 if ! genome_hit_association.has_key? hits[0][0]
124
+ genome_hit_association[hits[0][0]] += 1
125
+ end
126
+
127
+ population = {
128
+ consensus: consensus_reference.first[0],
129
+ genome_hits: genome_hit_association
130
+ }
131
+
132
+ return population
133
+
134
+ end
62
135
 
63
- end
64
136
 
137
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bacterial-annotator
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.7.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maxime Deraspe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-11-06 00:00:00.000000000 Z
11
+ date: 2018-04-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bio
@@ -124,7 +124,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
124
124
  version: '0'
125
125
  requirements: []
126
126
  rubyforge_project:
127
- rubygems_version: 2.5.1
127
+ rubygems_version: 2.6.14
128
128
  signing_key:
129
129
  specification_version: 4
130
130
  summary: Bacterial Annotator