miga-base 0.7.3.1 → 0.7.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/cli.rb +10 -8
- data/lib/miga/cli/action.rb +2 -3
- data/lib/miga/cli/action/about.rb +5 -6
- data/lib/miga/cli/action/add.rb +18 -12
- data/lib/miga/cli/action/add_result.rb +2 -3
- data/lib/miga/cli/action/archive.rb +1 -2
- data/lib/miga/cli/action/classify_wf.rb +8 -6
- data/lib/miga/cli/action/console.rb +0 -1
- data/lib/miga/cli/action/daemon.rb +7 -7
- data/lib/miga/cli/action/date.rb +0 -1
- data/lib/miga/cli/action/derep_wf.rb +5 -4
- data/lib/miga/cli/action/doctor.rb +71 -82
- data/lib/miga/cli/action/doctor/base.rb +102 -0
- data/lib/miga/cli/action/edit.rb +14 -2
- data/lib/miga/cli/action/files.rb +8 -8
- data/lib/miga/cli/action/find.rb +5 -6
- data/lib/miga/cli/action/generic.rb +7 -7
- data/lib/miga/cli/action/get.rb +20 -17
- data/lib/miga/cli/action/get_db.rb +8 -2
- data/lib/miga/cli/action/index_wf.rb +1 -1
- data/lib/miga/cli/action/init.rb +53 -41
- data/lib/miga/cli/action/init/daemon_helper.rb +65 -43
- data/lib/miga/cli/action/lair.rb +7 -7
- data/lib/miga/cli/action/ln.rb +6 -6
- data/lib/miga/cli/action/ls.rb +1 -2
- data/lib/miga/cli/action/ncbi_get.rb +11 -3
- data/lib/miga/cli/action/new.rb +4 -4
- data/lib/miga/cli/action/next_step.rb +0 -1
- data/lib/miga/cli/action/preproc_wf.rb +3 -3
- data/lib/miga/cli/action/quality_wf.rb +1 -1
- data/lib/miga/cli/action/rm.rb +2 -3
- data/lib/miga/cli/action/run.rb +8 -8
- data/lib/miga/cli/action/stats.rb +8 -4
- data/lib/miga/cli/action/summary.rb +7 -6
- data/lib/miga/cli/action/tax_dist.rb +8 -4
- data/lib/miga/cli/action/tax_index.rb +3 -4
- data/lib/miga/cli/action/tax_set.rb +7 -6
- data/lib/miga/cli/action/tax_test.rb +6 -5
- data/lib/miga/cli/action/wf.rb +25 -19
- data/lib/miga/cli/base.rb +34 -32
- data/lib/miga/cli/objects_helper.rb +27 -18
- data/lib/miga/cli/opt_helper.rb +3 -2
- data/lib/miga/common.rb +2 -5
- data/lib/miga/common/base.rb +15 -16
- data/lib/miga/common/format.rb +8 -5
- data/lib/miga/common/hooks.rb +1 -4
- data/lib/miga/common/path.rb +4 -9
- data/lib/miga/common/with_daemon.rb +5 -2
- data/lib/miga/common/with_daemon_class.rb +1 -1
- data/lib/miga/common/with_result.rb +2 -1
- data/lib/miga/daemon.rb +93 -44
- data/lib/miga/daemon/base.rb +30 -11
- data/lib/miga/dataset.rb +47 -37
- data/lib/miga/dataset/base.rb +52 -37
- data/lib/miga/dataset/hooks.rb +3 -4
- data/lib/miga/dataset/result.rb +17 -1
- data/lib/miga/dataset/status.rb +6 -5
- data/lib/miga/json.rb +5 -7
- data/lib/miga/lair.rb +4 -0
- data/lib/miga/metadata.rb +4 -3
- data/lib/miga/project.rb +29 -20
- data/lib/miga/project/base.rb +52 -37
- data/lib/miga/project/dataset.rb +33 -26
- data/lib/miga/project/hooks.rb +0 -3
- data/lib/miga/project/result.rb +14 -5
- data/lib/miga/remote_dataset.rb +85 -72
- data/lib/miga/remote_dataset/base.rb +11 -13
- data/lib/miga/remote_dataset/download.rb +34 -12
- data/lib/miga/result.rb +48 -53
- data/lib/miga/result/base.rb +0 -2
- data/lib/miga/result/dates.rb +1 -3
- data/lib/miga/result/source.rb +15 -16
- data/lib/miga/result/stats.rb +37 -27
- data/lib/miga/tax_dist.rb +6 -3
- data/lib/miga/tax_index.rb +17 -17
- data/lib/miga/taxonomy.rb +6 -1
- data/lib/miga/taxonomy/base.rb +19 -15
- data/lib/miga/version.rb +19 -16
- data/scripts/project_stats.bash +3 -0
- data/scripts/stats.bash +1 -1
- data/test/common_test.rb +3 -11
- data/test/daemon_helper.rb +38 -0
- data/test/daemon_test.rb +91 -99
- data/test/dataset_test.rb +63 -59
- data/test/format_test.rb +3 -11
- data/test/hook_test.rb +50 -55
- data/test/json_test.rb +7 -8
- data/test/lair_test.rb +22 -28
- data/test/metadata_test.rb +6 -14
- data/test/project_test.rb +33 -40
- data/test/remote_dataset_test.rb +26 -32
- data/test/result_stats_test.rb +17 -27
- data/test/result_test.rb +41 -34
- data/test/tax_dist_test.rb +2 -4
- data/test/tax_index_test.rb +4 -10
- data/test/taxonomy_test.rb +7 -9
- data/test/test_helper.rb +42 -1
- data/test/with_daemon_test.rb +14 -22
- data/utils/adapters.fa +13 -0
- data/utils/cleanup-databases.rb +6 -5
- data/utils/distance/base.rb +0 -1
- data/utils/distance/commands.rb +19 -12
- data/utils/distance/database.rb +24 -21
- data/utils/distance/pipeline.rb +23 -10
- data/utils/distance/runner.rb +20 -16
- data/utils/distance/temporal.rb +1 -3
- data/utils/distances.rb +1 -1
- data/utils/domain-ess-genes.rb +7 -7
- data/utils/index_metadata.rb +5 -4
- data/utils/mytaxa_scan.rb +18 -16
- data/utils/representatives.rb +5 -4
- data/utils/requirements.txt +1 -1
- data/utils/subclade/base.rb +0 -1
- data/utils/subclade/pipeline.rb +7 -6
- data/utils/subclade/runner.rb +9 -9
- data/utils/subclade/temporal.rb +0 -2
- data/utils/subclades-compile.rb +39 -37
- data/utils/subclades.rb +1 -1
- metadata +6 -4
data/utils/distance/base.rb
CHANGED
data/utils/distance/commands.rb
CHANGED
@@ -1,12 +1,13 @@
|
|
1
|
-
|
2
1
|
module MiGA::DistanceRunner::Commands
|
3
2
|
# Estimates or calculates AAI against +target+
|
4
3
|
def aai(target)
|
5
4
|
# Check if the request makes sense
|
6
5
|
return nil if target.nil? || target.result(:essential_genes).nil?
|
6
|
+
|
7
7
|
# Check if it's been calculated
|
8
8
|
y = stored_value(target, :aai)
|
9
9
|
return y unless y.nil? || y.zero?
|
10
|
+
|
10
11
|
# Try hAAI (except in clade projects)
|
11
12
|
unless @ref_project.is_clade?
|
12
13
|
y = haai(target)
|
@@ -14,24 +15,27 @@ module MiGA::DistanceRunner::Commands
|
|
14
15
|
end
|
15
16
|
# Full AAI
|
16
17
|
aai_cmd(
|
17
|
-
|
18
|
-
|
18
|
+
tmp_file('proteins.fa'), target.result(:cds).file_path(:proteins),
|
19
|
+
dataset.name, target.name, tmp_dbs[:aai]
|
20
|
+
).tap { checkpoint :aai }
|
19
21
|
end
|
20
22
|
|
21
23
|
##
|
22
24
|
# Estimates AAI against +target+ using hAAI
|
23
25
|
def haai(target)
|
24
26
|
return nil if opts[:haai_p] == 'no'
|
27
|
+
|
25
28
|
haai = aai_cmd(tmp_file('ess_genes.fa'),
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
+
target.result(:essential_genes).file_path(:ess_genes),
|
30
|
+
dataset.name, target.name, tmp_dbs[:haai],
|
31
|
+
aai_save_rbm: 'no-save-rbm', aai_p: opts[:haai_p])
|
29
32
|
checkpoint :haai
|
30
33
|
return nil if haai.nil? || haai.zero? || haai > 90.0
|
31
|
-
|
34
|
+
|
35
|
+
aai = 100.0 - Math.exp(2.435076 + 0.4275193 * Math.log(100.0 - haai))
|
32
36
|
SQLite3::Database.new(tmp_dbs[:aai]) do |conn|
|
33
37
|
conn.execute 'insert into aai values(?, ?, ?, 0, 0, 0)',
|
34
|
-
|
38
|
+
[dataset.name, target.name, aai]
|
35
39
|
end
|
36
40
|
checkpoint :aai
|
37
41
|
aai
|
@@ -44,13 +48,16 @@ module MiGA::DistanceRunner::Commands
|
|
44
48
|
t = tmp_file('largecontigs.fa')
|
45
49
|
r = target.result(:assembly)
|
46
50
|
return nil if r.nil? || !File.size?(t)
|
51
|
+
|
47
52
|
# Check if it's been calculated
|
48
53
|
y = stored_value(target, :ani)
|
49
54
|
return y unless y.nil? || y.zero?
|
55
|
+
|
50
56
|
# Run it
|
51
57
|
ani_cmd(
|
52
|
-
|
53
|
-
|
58
|
+
t, r.file_path(:largecontigs),
|
59
|
+
dataset.name, target.name, tmp_dbs[:ani]
|
60
|
+
).tap { checkpoint :ani }
|
54
61
|
end
|
55
62
|
|
56
63
|
##
|
@@ -74,7 +81,7 @@ module MiGA::DistanceRunner::Commands
|
|
74
81
|
|
75
82
|
##
|
76
83
|
# Execute an ANI command
|
77
|
-
def ani_cmd(f1, f2, n1, n2, db, o={})
|
84
|
+
def ani_cmd(f1, f2, n1, n2, db, o = {})
|
78
85
|
o = opts.merge(o)
|
79
86
|
v = nil
|
80
87
|
if o[:ani_p] == 'fastani'
|
@@ -83,7 +90,7 @@ module MiGA::DistanceRunner::Commands
|
|
83
90
|
unless out.empty?
|
84
91
|
SQLite3::Database.new(db) do |conn|
|
85
92
|
conn.execute 'insert into ani values(?, ?, ?, 0, ?, ?)',
|
86
|
-
|
93
|
+
[n1, n2, out[2], out[3], out[4]]
|
87
94
|
end
|
88
95
|
end
|
89
96
|
v = out[2]
|
data/utils/distance/database.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
|
2
1
|
require 'sqlite3'
|
3
2
|
|
4
3
|
module MiGA::DistanceRunner::Database
|
@@ -9,7 +8,7 @@ module MiGA::DistanceRunner::Database
|
|
9
8
|
@dbs = {}
|
10
9
|
@tmp_dbs = {}
|
11
10
|
@db_counts = {}
|
12
|
-
{haai: :aai, aai: :aai, ani: :ani}.each do |m, t|
|
11
|
+
{ haai: :aai, aai: :aai, ani: :ani }.each do |m, t|
|
13
12
|
@db_counts[m] = 0
|
14
13
|
@dbs[m] = for_ref ? ref_db(m) : query_db(m)
|
15
14
|
# Remove if corrupt
|
@@ -25,9 +24,9 @@ module MiGA::DistanceRunner::Database
|
|
25
24
|
# Initialize if it doesn't exist
|
26
25
|
SQLite3::Database.new(dbs[m]) do |conn|
|
27
26
|
conn.execute "create table if not exists #{t}(" +
|
28
|
-
|
29
|
-
|
30
|
-
|
27
|
+
"seq1 varchar(256), seq2 varchar(256), " +
|
28
|
+
"#{t} float, sd float, n int, omega int" +
|
29
|
+
")"
|
31
30
|
end unless File.size? dbs[m]
|
32
31
|
# Copy over to (local) temporals
|
33
32
|
@tmp_dbs[m] = tmp_file("#{m}.db")
|
@@ -38,16 +37,17 @@ module MiGA::DistanceRunner::Database
|
|
38
37
|
##
|
39
38
|
# Path to the database +metric+ for +dataset_name+ in +project+
|
40
39
|
# (assumes that +dataset_name+ is a reference dataset)
|
41
|
-
def ref_db(metric, dataset_name=nil)
|
40
|
+
def ref_db(metric, dataset_name = nil)
|
42
41
|
dataset_name ||= dataset.name
|
43
|
-
b =
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
42
|
+
b =
|
43
|
+
case metric
|
44
|
+
when :haai
|
45
|
+
"01.haai/#{dataset_name}.db"
|
46
|
+
when :aai
|
47
|
+
"02.aai/#{dataset_name}.db"
|
48
|
+
when :ani
|
49
|
+
"03.ani/#{dataset_name}.db"
|
50
|
+
end
|
51
51
|
File.expand_path(b, home)
|
52
52
|
end
|
53
53
|
|
@@ -63,13 +63,14 @@ module MiGA::DistanceRunner::Database
|
|
63
63
|
def stored_value(target, metric)
|
64
64
|
# Check if self.dataset -> target is done (previous run)
|
65
65
|
y = value_from_db(dataset.name, target.name, tmp_dbs[metric], metric)
|
66
|
-
return y unless y.nil?
|
66
|
+
return y unless y.nil? || y.zero?
|
67
67
|
|
68
68
|
# Check if self.dataset <- target is done (another thread)
|
69
|
-
if dataset.is_ref?
|
69
|
+
if dataset.is_ref? && project.path == ref_project.path
|
70
70
|
y = data_from_db(
|
71
|
-
target.name, dataset.name, ref_db(metric, target.name), metric
|
72
|
-
|
71
|
+
target.name, dataset.name, ref_db(metric, target.name), metric
|
72
|
+
)
|
73
|
+
unless y.nil? || y.first.nil? || y.first.zero?
|
73
74
|
# Store a copy
|
74
75
|
data_to_db(dataset.name, target.name, tmp_dbs[metric], metric, y)
|
75
76
|
return y.first
|
@@ -94,7 +95,8 @@ module MiGA::DistanceRunner::Database
|
|
94
95
|
SQLite3::Database.new(db) do |conn|
|
95
96
|
y = conn.execute(
|
96
97
|
"select #{metric}, sd, n, omega from #{metric} where seq1=? and seq2=?",
|
97
|
-
[n1, n2]
|
98
|
+
[n1, n2]
|
99
|
+
).first
|
98
100
|
end if File.size? db
|
99
101
|
y
|
100
102
|
end
|
@@ -105,7 +107,8 @@ module MiGA::DistanceRunner::Database
|
|
105
107
|
SQLite3::Database.new(db) do |conn|
|
106
108
|
conn.execute(
|
107
109
|
"insert into #{metric} (seq1, seq2, #{metric}, sd, n, omega) " +
|
108
|
-
"values (?, ?, ?, ?, ?, ?)", [n1, n2] + data
|
110
|
+
"values (?, ?, ?, ?, ?, ?)", [n1, n2] + data
|
111
|
+
)
|
109
112
|
end
|
110
113
|
checkpoint metric
|
111
114
|
end
|
@@ -114,7 +117,7 @@ module MiGA::DistanceRunner::Database
|
|
114
117
|
# Iterates for each entry in +db+
|
115
118
|
def foreach_in_db(db, metric, &blk)
|
116
119
|
SQLite3::Database.new(db) do |conn|
|
117
|
-
conn.execute("select * from #{metric}").each{ |r| blk[r] }
|
120
|
+
conn.execute("select * from #{metric}").each { |r| blk[r] }
|
118
121
|
end
|
119
122
|
end
|
120
123
|
end
|
data/utils/distance/pipeline.rb
CHANGED
@@ -1,13 +1,12 @@
|
|
1
|
-
|
2
1
|
# High-end pipelines for DistanceRunner
|
3
2
|
module MiGA::DistanceRunner::Pipeline
|
4
|
-
|
5
3
|
# Recursively classify the dataset, returning an Array with two entries:
|
6
4
|
# classification and cluster number
|
7
5
|
def classify(clades, classif, metric, result_fh, val_cls = nil)
|
8
6
|
dir = File.expand_path(classif, clades)
|
9
7
|
med = File.expand_path('miga-project.medoids', dir)
|
10
|
-
return [classif,val_cls] unless File.size? med
|
8
|
+
return [classif, val_cls] unless File.size? med
|
9
|
+
|
11
10
|
max_val = 0
|
12
11
|
val_med = ''
|
13
12
|
val_cls = nil
|
@@ -30,11 +29,22 @@ module MiGA::DistanceRunner::Pipeline
|
|
30
29
|
classify(clades, classif, metric, result_fh, val_cls)
|
31
30
|
end
|
32
31
|
|
32
|
+
# Run distances against datasets listed in metadata's +:dist_req+
|
33
|
+
def distances_by_request(metric)
|
34
|
+
return unless dataset.metadata[:dist_req]
|
35
|
+
|
36
|
+
$stderr.puts 'Running distances by request'
|
37
|
+
dataset.metadata[:dist_req].each do |target|
|
38
|
+
ds = ref_project.dataset(target) and send(metric, ds)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
33
42
|
# Builds a tree with all visited medoids from any classification level
|
34
43
|
def build_medoids_tree(metric)
|
35
44
|
$stderr.puts "Building medoids tree (metric = #{metric})"
|
36
45
|
db = query_db(metric)
|
37
46
|
return unless File.size? db
|
47
|
+
|
38
48
|
out_base = File.expand_path(dataset.name, home)
|
39
49
|
ds_matrix = "#{out_base}.txt"
|
40
50
|
ds_matrix_fh = File.open(ds_matrix, 'w')
|
@@ -43,7 +53,7 @@ module MiGA::DistanceRunner::Pipeline
|
|
43
53
|
seq2 = []
|
44
54
|
foreach_in_db(db, metric) do |r|
|
45
55
|
seq2 << r[0]
|
46
|
-
ds_matrix_fh.puts r[0,3].join("\t")
|
56
|
+
ds_matrix_fh.puts r[0, 3].join("\t")
|
47
57
|
end
|
48
58
|
# Find all values among visited datasets in ref_project
|
49
59
|
ref_r = ref_project.result("#{metric}_distances") or return
|
@@ -51,7 +61,8 @@ module MiGA::DistanceRunner::Pipeline
|
|
51
61
|
fh.each_line do |ln|
|
52
62
|
r = ln.chomp.split("\t")
|
53
63
|
next unless seq2.include?(r[1]) or seq2.include?(r[2])
|
54
|
-
|
64
|
+
|
65
|
+
ds_matrix_fh.puts r[1, 3].join("\t")
|
55
66
|
end
|
56
67
|
end
|
57
68
|
ds_matrix_fh.close
|
@@ -74,11 +85,12 @@ module MiGA::DistanceRunner::Pipeline
|
|
74
85
|
dataset.add_result(from_ref_project ? :taxonomy : :distances, true)
|
75
86
|
cr = dataset.closest_relatives(1, from_ref_project)
|
76
87
|
return if cr.nil? or cr.empty?
|
88
|
+
|
77
89
|
tax = ref_project.dataset(cr[0][0]).metadata[:tax] || {}
|
78
90
|
|
79
91
|
# Run the test for each rank
|
80
92
|
tax_test = MiGA::TaxDist.aai_pvalues(cr[0][1], :intax, engine: opts[:aai_p])
|
81
|
-
r = tax_test.map do |k,v|
|
93
|
+
r = tax_test.map do |k, v|
|
82
94
|
sig = ''
|
83
95
|
[0.5, 0.1, 0.05, 0.01].each { |i| sig << '*' if v < i }
|
84
96
|
[MiGA::Taxonomy.LONG_RANKS[k], (tax[k] || '?'), v, sig]
|
@@ -97,12 +109,13 @@ module MiGA::DistanceRunner::Pipeline
|
|
97
109
|
|
98
110
|
# Transfer the taxonomy to the current dataset
|
99
111
|
def transfer_taxonomy(tax)
|
100
|
-
$stderr.puts
|
112
|
+
$stderr.puts 'Transferring taxonomy'
|
101
113
|
return if tax.nil?
|
114
|
+
|
102
115
|
pval = (project.metadata[:tax_pvalue] || 0.05).to_f
|
103
|
-
tax_a = tax
|
104
|
-
|
105
|
-
|
116
|
+
tax_a = tax
|
117
|
+
.select { |i| i[1] != '?' && i[2] <= pval }
|
118
|
+
.map { |i| i[0, 2].join(':') }
|
106
119
|
dataset.metadata[:tax] = MiGA::Taxonomy.new(tax_a)
|
107
120
|
dataset.save
|
108
121
|
end
|
data/utils/distance/runner.rb
CHANGED
@@ -1,13 +1,10 @@
|
|
1
|
-
|
2
1
|
require_relative 'base.rb'
|
3
2
|
require_relative 'temporal.rb'
|
4
3
|
require_relative 'database.rb'
|
5
4
|
require_relative 'commands.rb'
|
6
5
|
require_relative 'pipeline.rb'
|
7
6
|
|
8
|
-
|
9
7
|
class MiGA::DistanceRunner
|
10
|
-
|
11
8
|
include MiGA::DistanceRunner::Temporal
|
12
9
|
include MiGA::DistanceRunner::Database
|
13
10
|
include MiGA::DistanceRunner::Commands
|
@@ -16,7 +13,7 @@ class MiGA::DistanceRunner
|
|
16
13
|
attr_reader :project, :ref_project, :dataset, :opts, :home
|
17
14
|
attr_reader :tmp, :tmp_dbs, :dbs, :db_counts
|
18
15
|
|
19
|
-
def initialize(project_path, dataset_name, opts_hash={})
|
16
|
+
def initialize(project_path, dataset_name, opts_hash = {})
|
20
17
|
@opts = opts_hash
|
21
18
|
@project = MiGA::Project.load(project_path) or
|
22
19
|
raise "No project at #{project_path}"
|
@@ -30,7 +27,7 @@ class MiGA::DistanceRunner
|
|
30
27
|
@opts[:aai_save_rbm] ||= ENV.fetch('MIGA_AAI_SAVE_RBM') do
|
31
28
|
project.is_clade? ? 'save-rbm' : 'no-save-rbm'
|
32
29
|
end
|
33
|
-
@opts[:thr] ||= ENV.fetch('CORES'){ 2 }.to_i
|
30
|
+
@opts[:thr] ||= ENV.fetch('CORES') { 2 }.to_i
|
34
31
|
if opts[:run_taxonomy] and project.metadata[:ref_project]
|
35
32
|
ref_path = project.metadata[:ref_project]
|
36
33
|
@home = File.expand_path('05.taxonomy', @home)
|
@@ -60,6 +57,7 @@ class MiGA::DistanceRunner
|
|
60
57
|
def go!
|
61
58
|
$stderr.puts "Launching analysis"
|
62
59
|
return if dataset.is_multi?
|
60
|
+
|
63
61
|
Dir.mktmpdir do |tmp_dir|
|
64
62
|
@tmp = tmp_dir
|
65
63
|
create_temporals
|
@@ -69,25 +67,26 @@ class MiGA::DistanceRunner
|
|
69
67
|
|
70
68
|
# Launch analysis for reference datasets
|
71
69
|
def go_ref!
|
72
|
-
$stderr.puts
|
70
|
+
$stderr.puts 'Launching analysis for reference dataset'
|
73
71
|
# Initialize databases
|
74
72
|
initialize_dbs! true
|
75
73
|
|
76
74
|
# first-come-first-serve traverse
|
77
75
|
ref_project.each_dataset do |ds|
|
78
76
|
next if !ds.is_ref? or ds.is_multi? or ds.result(:essential_genes).nil?
|
77
|
+
|
79
78
|
puts "[ #{Time.now} ] #{ds.name}"
|
80
79
|
ani_after_aai(ds)
|
81
80
|
end
|
82
81
|
|
83
82
|
# Finalize
|
84
|
-
[
|
83
|
+
%i[haai aai ani].each { |m| checkpoint! m if db_counts[m] > 0 }
|
85
84
|
end
|
86
85
|
|
87
86
|
##
|
88
87
|
# Launch analysis for query datasets
|
89
88
|
def go_query!
|
90
|
-
$stderr.puts
|
89
|
+
$stderr.puts 'Launching analysis for query dataset'
|
91
90
|
# Check if project is ready
|
92
91
|
tsk = ref_project.is_clade? ? [:subclades, :ani] : [:clade_finding, :aai]
|
93
92
|
res = ref_project.result(tsk[0])
|
@@ -95,6 +94,7 @@ class MiGA::DistanceRunner
|
|
95
94
|
|
96
95
|
# Initialize the databases
|
97
96
|
initialize_dbs! false
|
97
|
+
distances_by_request(tsk[1])
|
98
98
|
# Calculate the classification-informed AAI/ANI traverse
|
99
99
|
results = File.expand_path("#{dataset.name}.#{tsk[1]}-medoids.tsv", home)
|
100
100
|
fh = File.open(results, 'w')
|
@@ -104,14 +104,17 @@ class MiGA::DistanceRunner
|
|
104
104
|
# Calculate all the AAIs/ANIs against the lowest subclade (if classified)
|
105
105
|
par_dir = File.dirname(File.expand_path(classif, res.dir))
|
106
106
|
par = File.expand_path('miga-project.classif', par_dir)
|
107
|
-
closest = {dataset: nil, ani: 0.0}
|
107
|
+
closest = { dataset: nil, ani: 0.0 }
|
108
108
|
if File.size? par
|
109
109
|
File.open(par, 'r') do |fh|
|
110
110
|
fh.each_line do |ln|
|
111
111
|
r = ln.chomp.split("\t")
|
112
112
|
next unless r[1].to_i == val_cls
|
113
|
+
|
113
114
|
ani = ani_after_aai(ref_project.dataset(r[0]), 80.0)
|
114
|
-
|
115
|
+
unless ani.nil? || ani < closest[:ani]
|
116
|
+
closest = { ds: r[0], ani: ani }
|
117
|
+
end
|
115
118
|
end
|
116
119
|
end
|
117
120
|
end
|
@@ -119,22 +122,23 @@ class MiGA::DistanceRunner
|
|
119
122
|
# Calculate all the AAIs/ANIs against the closest ANI95-clade (if AAI > 80%)
|
120
123
|
cl_path = res.file_path :clades_ani95
|
121
124
|
if !cl_path.nil? and File.size? cl_path and tsk[0] == :clade_finding
|
122
|
-
File.foreach(cl_path)
|
123
|
-
|
124
|
-
|
125
|
-
|
125
|
+
File.foreach(cl_path)
|
126
|
+
.map { |i| i.chomp.split(',') }
|
127
|
+
.find(lambda { [] }) { |i| i.include? closest[:ds] }
|
128
|
+
.each { |i| ani_after_aai(ref_project.dataset(i), 80.0) }
|
126
129
|
end
|
127
130
|
|
128
131
|
# Finalize
|
129
|
-
[:haai, :aai, :ani].each{ |m| checkpoint! m if db_counts[m] > 0 }
|
132
|
+
[:haai, :aai, :ani].each { |m| checkpoint! m if db_counts[m] > 0 }
|
130
133
|
build_medoids_tree(tsk[1])
|
131
134
|
transfer_taxonomy(tax_test)
|
132
135
|
end
|
133
136
|
|
134
137
|
# Launch analysis for taxonomy jobs
|
135
138
|
def go_taxonomy!
|
136
|
-
$stderr.puts
|
139
|
+
$stderr.puts 'Launching taxonomy analysis'
|
137
140
|
return unless project.metadata[:ref_project]
|
141
|
+
|
138
142
|
go_query! # <- yeah, it's actually the same, just different ref_project
|
139
143
|
end
|
140
144
|
end
|
data/utils/distance/temporal.rb
CHANGED
@@ -1,9 +1,7 @@
|
|
1
|
-
|
2
1
|
require 'tmpdir'
|
3
2
|
require 'zlib'
|
4
3
|
|
5
4
|
module MiGA::DistanceRunner::Temporal
|
6
|
-
|
7
5
|
# Copy input files to the (local) temporal folder
|
8
6
|
def create_temporals
|
9
7
|
rf = {
|
@@ -43,7 +41,7 @@ module MiGA::DistanceRunner::Temporal
|
|
43
41
|
def checkpoint!(metric)
|
44
42
|
$stderr.puts "Checkpoint (metric = #{metric})"
|
45
43
|
SQLite3::Database.new(tmp_dbs[metric]) do |conn|
|
46
|
-
conn.execute("select count(*) from #{metric
|
44
|
+
conn.execute("select count(*) from #{metric == :haai ? :aai : metric}")
|
47
45
|
end
|
48
46
|
FileUtils.cp(tmp_dbs[metric], dbs[metric])
|
49
47
|
@db_counts[metric] = 0
|
data/utils/distances.rb
CHANGED
@@ -4,6 +4,6 @@ require_relative 'distance/runner.rb'
|
|
4
4
|
|
5
5
|
dataset = ARGV.shift
|
6
6
|
project = ARGV.shift
|
7
|
-
opts = Hash[
|
7
|
+
opts = Hash[ARGV.map { |i| i.split("=", 2).tap { |j| j[0] = j[0].to_sym } }]
|
8
8
|
runner = MiGA::DistanceRunner.new(dataset, project, opts)
|
9
9
|
runner.go!
|
data/utils/domain-ess-genes.rb
CHANGED
@@ -6,10 +6,10 @@ domain = ARGV.shift
|
|
6
6
|
|
7
7
|
def quality(hsh)
|
8
8
|
q = {}
|
9
|
-
q[:found] = hsh.values.map{ |i| i==0 ? 0 : 1 }.inject(:+)
|
10
|
-
q[:multi] = hsh.values.map{ |i| i==0 ? 0 : i-1 }.inject(:+)
|
11
|
-
q[:cmp] = 100.0*q[:found].to_f/hsh.size
|
12
|
-
q[:cnt] = 100.0*q[:multi].to_f/hsh.size
|
9
|
+
q[:found] = hsh.values.map { |i| i == 0 ? 0 : 1 }.inject(:+)
|
10
|
+
q[:multi] = hsh.values.map { |i| i == 0 ? 0 : i - 1 }.inject(:+)
|
11
|
+
q[:cmp] = 100.0 * q[:found].to_f / hsh.size
|
12
|
+
q[:cnt] = 100.0 * q[:multi].to_f / hsh.size
|
13
13
|
q
|
14
14
|
end
|
15
15
|
|
@@ -39,7 +39,7 @@ end
|
|
39
39
|
# Find expected genes for domain
|
40
40
|
n_dom = Hash[
|
41
41
|
`HMM.essential.rb -L -q '-#{domain}' -c '#{collection}'`
|
42
|
-
|
42
|
+
.chomp.split("\n").map { |i| i.split("\t") }
|
43
43
|
]
|
44
44
|
l_dom = n_dom.keys
|
45
45
|
cnt_dom = {}
|
@@ -54,10 +54,10 @@ File.open(outlog, 'w') do |ofh|
|
|
54
54
|
ofh.puts "! Contamination: #{q[:cnt].round(1)}%."
|
55
55
|
if q[:multi] > 0
|
56
56
|
ofh.puts "! Multiple copies: "
|
57
|
-
cnt_dom.each{ |k,v| ofh.puts "! #{v} #{k}: #{n_dom[k]}." if v>1 }
|
57
|
+
cnt_dom.each { |k, v| ofh.puts "! #{v} #{k}: #{n_dom[k]}." if v > 1 }
|
58
58
|
end
|
59
59
|
if q[:found] < cnt_dom.size
|
60
60
|
ofh.puts "! Missing genes: "
|
61
|
-
cnt_dom.each{ |k,v| ofh.puts "! #{k}: #{n_dom[k]}." if v==0 }
|
61
|
+
cnt_dom.each { |k, v| ofh.puts "! #{k}: #{n_dom[k]}." if v == 0 }
|
62
62
|
end
|
63
63
|
end
|