bacterial-annotator 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,6 +10,9 @@ require 'bio'
10
10
  require 'fileutils'
11
11
  require 'parallel'
12
12
  require 'helper'
13
+ require 'json'
14
+ require 'pp'
15
+
13
16
 
14
17
  class BacterialIdentificator
15
18
 
@@ -20,15 +23,27 @@ class BacterialIdentificator
20
23
  def initialize options, root
21
24
 
22
25
  @root = root
23
- @db_path = options[:database]
24
- @genomes_list = options[:genomes_list]
26
+ @mash_file = options[:mash_file]
27
+ @genome_list = options[:genome_list]
25
28
  @proc = options[:proc].to_i
26
- p @genomes_list
29
+ @output=options[:output]
30
+
31
+ end
32
+
33
+
34
+ def run_identification
35
+
36
+ @genome_hits = {}
37
+ @genome_list.each do |g|
38
+ @genome_hits[g] = []
39
+ end
27
40
 
28
- @genomes_list.each do |g|
29
- mash_genome g
41
+ Parallel.map(@genome_list, in_threads: @proc) do |g|
42
+ @genome_hits[g] = mash_genome g
30
43
  end
31
44
 
45
+ print_output
46
+
32
47
  end
33
48
 
34
49
 
@@ -37,28 +52,86 @@ class BacterialIdentificator
37
52
  # Reference-ID, Query-ID, Mash-distance, P-value, and Matching-hashes
38
53
  # fields = ["hit","query","distance","pvalue","match"]
39
54
 
40
- results_raw = `#{@root}/mash.linux dist #{@db_path}/species-sequences.msh #{genome}`
55
+ results_raw = `#{@root}/mash.linux dist #{@mash_file} #{genome}`
41
56
  results = []
42
57
 
43
58
  results_raw.split("\n").each do |l|
44
59
  lA = l.chomp.split("\t")
45
60
  next if lA[-1].split("/")[0] == '0' # no match
46
- results << lA
61
+ results << (lA[0..0] + lA[2..-1])
62
+ end
63
+
64
+ results_sorted = results.sort {|a,b| a[1] <=> b[1]}
65
+
66
+ return results_sorted
67
+
68
+ end
69
+
70
+ # consensus species model
71
+ def consensus_reference
72
+
73
+ all_hits = {}
74
+ @genome_hits.each do |g, hits|
75
+ hits.each do |h|
76
+ score = h[3].split("/")[0].to_i
77
+ if ! all_hits.has_key? h[0]
78
+ all_hits[h[0]] = score
79
+ else
80
+ all_hits[h[0]] += score
81
+ end
82
+ end
47
83
  end
84
+ return all_hits.sort_by { |k,v| v }.to_h
85
+
86
+ end
48
87
 
49
- results_sorted = results.sort {|a,b| a[2] <=> b[2]}
88
+ # print json
89
+ def print_output
50
90
 
51
- File.open("#{genome}.msh_dist", "w") do |fout|
52
- results_sorted.each do |f|
53
- fout.write(f.join("\t"))
54
- fout.write("\n")
91
+ case @output.downcase
92
+ when "csv"
93
+ @genome_hits.each do |g, hits|
94
+ hits.each do |h|
95
+ puts "#{g},#{h.join(',')}"
96
+ end
97
+ end
98
+ when "json"
99
+ new_genome_hits = {}
100
+ @genome_hits.each do |g, hits|
101
+ new_genome_hits[g] = []
102
+ hits.each do |h|
103
+ new_genome_hits[g].push(Hash[["hit","distance","e-value","score"].zip(h)])
104
+ end
105
+ end
106
+ puts JSON.pretty_generate({genomes: new_genome_hits, summary: summary})
107
+ else
108
+ @genome_hits.each do |g, hits|
109
+ hits.each do |h|
110
+ out = h.join("\t")
111
+ puts "#{g}\t#{out}"
112
+ end
55
113
  end
56
114
  end
57
115
 
58
116
  end
59
117
 
118
+ def summary
60
119
 
120
+ genome_hit_association = {}
61
121
 
122
+ @genome_hits.each do |g, hits|
123
+ genome_hit_association[hits[0][0]] = 0 if ! genome_hit_association.has_key? hits[0][0]
124
+ genome_hit_association[hits[0][0]] += 1
125
+ end
126
+
127
+ population = {
128
+ consensus: consensus_reference.first[0],
129
+ genome_hits: genome_hit_association
130
+ }
131
+
132
+ return population
133
+
134
+ end
62
135
 
63
- end
64
136
 
137
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bacterial-annotator
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.7.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maxime Deraspe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-11-06 00:00:00.000000000 Z
11
+ date: 2018-04-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bio
@@ -124,7 +124,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
124
124
  version: '0'
125
125
  requirements: []
126
126
  rubyforge_project:
127
- rubygems_version: 2.5.1
127
+ rubygems_version: 2.6.14
128
128
  signing_key:
129
129
  specification_version: 4
130
130
  summary: Bacterial Annotator