miga-base 0.7.3.0 → 0.7.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/miga/cli.rb +10 -8
- data/lib/miga/cli/action.rb +2 -3
- data/lib/miga/cli/action/about.rb +5 -6
- data/lib/miga/cli/action/add.rb +18 -12
- data/lib/miga/cli/action/add_result.rb +2 -3
- data/lib/miga/cli/action/archive.rb +1 -2
- data/lib/miga/cli/action/classify_wf.rb +8 -6
- data/lib/miga/cli/action/console.rb +0 -1
- data/lib/miga/cli/action/daemon.rb +7 -7
- data/lib/miga/cli/action/date.rb +0 -1
- data/lib/miga/cli/action/derep_wf.rb +5 -4
- data/lib/miga/cli/action/doctor.rb +71 -82
- data/lib/miga/cli/action/doctor/base.rb +102 -0
- data/lib/miga/cli/action/edit.rb +14 -2
- data/lib/miga/cli/action/files.rb +8 -8
- data/lib/miga/cli/action/find.rb +5 -6
- data/lib/miga/cli/action/generic.rb +7 -7
- data/lib/miga/cli/action/get.rb +20 -17
- data/lib/miga/cli/action/get_db.rb +8 -2
- data/lib/miga/cli/action/index_wf.rb +1 -1
- data/lib/miga/cli/action/init.rb +53 -41
- data/lib/miga/cli/action/init/daemon_helper.rb +65 -43
- data/lib/miga/cli/action/lair.rb +7 -7
- data/lib/miga/cli/action/ln.rb +6 -6
- data/lib/miga/cli/action/ls.rb +1 -2
- data/lib/miga/cli/action/ncbi_get.rb +11 -3
- data/lib/miga/cli/action/new.rb +4 -4
- data/lib/miga/cli/action/next_step.rb +0 -1
- data/lib/miga/cli/action/preproc_wf.rb +3 -3
- data/lib/miga/cli/action/quality_wf.rb +1 -1
- data/lib/miga/cli/action/rm.rb +2 -3
- data/lib/miga/cli/action/run.rb +8 -8
- data/lib/miga/cli/action/stats.rb +8 -4
- data/lib/miga/cli/action/summary.rb +7 -6
- data/lib/miga/cli/action/tax_dist.rb +8 -4
- data/lib/miga/cli/action/tax_index.rb +3 -4
- data/lib/miga/cli/action/tax_set.rb +7 -6
- data/lib/miga/cli/action/tax_test.rb +6 -5
- data/lib/miga/cli/action/wf.rb +21 -19
- data/lib/miga/cli/base.rb +34 -32
- data/lib/miga/cli/objects_helper.rb +27 -18
- data/lib/miga/cli/opt_helper.rb +3 -2
- data/lib/miga/common.rb +2 -5
- data/lib/miga/common/base.rb +15 -16
- data/lib/miga/common/format.rb +8 -5
- data/lib/miga/common/hooks.rb +1 -4
- data/lib/miga/common/path.rb +4 -9
- data/lib/miga/common/with_daemon.rb +6 -3
- data/lib/miga/common/with_daemon_class.rb +3 -2
- data/lib/miga/common/with_result.rb +2 -1
- data/lib/miga/daemon.rb +93 -44
- data/lib/miga/daemon/base.rb +30 -11
- data/lib/miga/dataset.rb +47 -37
- data/lib/miga/dataset/base.rb +52 -37
- data/lib/miga/dataset/hooks.rb +3 -4
- data/lib/miga/dataset/result.rb +17 -1
- data/lib/miga/dataset/status.rb +6 -5
- data/lib/miga/json.rb +5 -7
- data/lib/miga/lair.rb +4 -0
- data/lib/miga/metadata.rb +4 -3
- data/lib/miga/project.rb +29 -20
- data/lib/miga/project/base.rb +52 -37
- data/lib/miga/project/dataset.rb +33 -26
- data/lib/miga/project/hooks.rb +0 -3
- data/lib/miga/project/result.rb +14 -5
- data/lib/miga/remote_dataset.rb +85 -72
- data/lib/miga/remote_dataset/base.rb +11 -13
- data/lib/miga/remote_dataset/download.rb +34 -12
- data/lib/miga/result.rb +34 -25
- data/lib/miga/result/base.rb +0 -2
- data/lib/miga/result/dates.rb +1 -3
- data/lib/miga/result/source.rb +15 -16
- data/lib/miga/result/stats.rb +37 -27
- data/lib/miga/tax_dist.rb +6 -4
- data/lib/miga/tax_index.rb +17 -17
- data/lib/miga/taxonomy.rb +6 -1
- data/lib/miga/taxonomy/base.rb +19 -15
- data/lib/miga/version.rb +19 -16
- data/scripts/project_stats.bash +3 -0
- data/scripts/stats.bash +1 -1
- data/test/common_test.rb +3 -11
- data/test/daemon_helper.rb +38 -0
- data/test/daemon_test.rb +91 -99
- data/test/dataset_test.rb +63 -59
- data/test/format_test.rb +3 -11
- data/test/hook_test.rb +50 -55
- data/test/json_test.rb +7 -8
- data/test/lair_test.rb +22 -28
- data/test/metadata_test.rb +6 -14
- data/test/project_test.rb +33 -40
- data/test/remote_dataset_test.rb +26 -32
- data/test/result_stats_test.rb +17 -27
- data/test/result_test.rb +41 -34
- data/test/tax_dist_test.rb +2 -4
- data/test/tax_index_test.rb +4 -10
- data/test/taxonomy_test.rb +7 -9
- data/test/test_helper.rb +42 -1
- data/test/with_daemon_test.rb +14 -22
- data/utils/adapters.fa +13 -0
- data/utils/cleanup-databases.rb +6 -5
- data/utils/distance/base.rb +0 -1
- data/utils/distance/commands.rb +19 -12
- data/utils/distance/database.rb +25 -21
- data/utils/distance/pipeline.rb +16 -10
- data/utils/distance/runner.rb +19 -13
- data/utils/distance/temporal.rb +7 -4
- data/utils/distances.rb +1 -1
- data/utils/domain-ess-genes.rb +7 -7
- data/utils/index_metadata.rb +5 -4
- data/utils/mytaxa_scan.rb +18 -16
- data/utils/representatives.rb +5 -4
- data/utils/requirements.txt +1 -1
- data/utils/subclade/base.rb +0 -1
- data/utils/subclade/pipeline.rb +7 -6
- data/utils/subclade/runner.rb +9 -9
- data/utils/subclade/temporal.rb +0 -2
- data/utils/subclades-compile.rb +39 -37
- data/utils/subclades.rb +1 -1
- metadata +6 -4
data/utils/distance/base.rb
CHANGED
data/utils/distance/commands.rb
CHANGED
@@ -1,12 +1,13 @@
|
|
1
|
-
|
2
1
|
module MiGA::DistanceRunner::Commands
|
3
2
|
# Estimates or calculates AAI against +target+
|
4
3
|
def aai(target)
|
5
4
|
# Check if the request makes sense
|
6
5
|
return nil if target.nil? || target.result(:essential_genes).nil?
|
6
|
+
|
7
7
|
# Check if it's been calculated
|
8
8
|
y = stored_value(target, :aai)
|
9
9
|
return y unless y.nil? || y.zero?
|
10
|
+
|
10
11
|
# Try hAAI (except in clade projects)
|
11
12
|
unless @ref_project.is_clade?
|
12
13
|
y = haai(target)
|
@@ -14,24 +15,27 @@ module MiGA::DistanceRunner::Commands
|
|
14
15
|
end
|
15
16
|
# Full AAI
|
16
17
|
aai_cmd(
|
17
|
-
|
18
|
-
|
18
|
+
tmp_file('proteins.fa'), target.result(:cds).file_path(:proteins),
|
19
|
+
dataset.name, target.name, tmp_dbs[:aai]
|
20
|
+
).tap { checkpoint :aai }
|
19
21
|
end
|
20
22
|
|
21
23
|
##
|
22
24
|
# Estimates AAI against +target+ using hAAI
|
23
25
|
def haai(target)
|
24
26
|
return nil if opts[:haai_p] == 'no'
|
27
|
+
|
25
28
|
haai = aai_cmd(tmp_file('ess_genes.fa'),
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
+
target.result(:essential_genes).file_path(:ess_genes),
|
30
|
+
dataset.name, target.name, tmp_dbs[:haai],
|
31
|
+
aai_save_rbm: 'no-save-rbm', aai_p: opts[:haai_p])
|
29
32
|
checkpoint :haai
|
30
33
|
return nil if haai.nil? || haai.zero? || haai > 90.0
|
31
|
-
|
34
|
+
|
35
|
+
aai = 100.0 - Math.exp(2.435076 + 0.4275193 * Math.log(100.0 - haai))
|
32
36
|
SQLite3::Database.new(tmp_dbs[:aai]) do |conn|
|
33
37
|
conn.execute 'insert into aai values(?, ?, ?, 0, 0, 0)',
|
34
|
-
|
38
|
+
[dataset.name, target.name, aai]
|
35
39
|
end
|
36
40
|
checkpoint :aai
|
37
41
|
aai
|
@@ -44,13 +48,16 @@ module MiGA::DistanceRunner::Commands
|
|
44
48
|
t = tmp_file('largecontigs.fa')
|
45
49
|
r = target.result(:assembly)
|
46
50
|
return nil if r.nil? || !File.size?(t)
|
51
|
+
|
47
52
|
# Check if it's been calculated
|
48
53
|
y = stored_value(target, :ani)
|
49
54
|
return y unless y.nil? || y.zero?
|
55
|
+
|
50
56
|
# Run it
|
51
57
|
ani_cmd(
|
52
|
-
|
53
|
-
|
58
|
+
t, r.file_path(:largecontigs),
|
59
|
+
dataset.name, target.name, tmp_dbs[:ani]
|
60
|
+
).tap { checkpoint :ani }
|
54
61
|
end
|
55
62
|
|
56
63
|
##
|
@@ -74,7 +81,7 @@ module MiGA::DistanceRunner::Commands
|
|
74
81
|
|
75
82
|
##
|
76
83
|
# Execute an ANI command
|
77
|
-
def ani_cmd(f1, f2, n1, n2, db, o={})
|
84
|
+
def ani_cmd(f1, f2, n1, n2, db, o = {})
|
78
85
|
o = opts.merge(o)
|
79
86
|
v = nil
|
80
87
|
if o[:ani_p] == 'fastani'
|
@@ -83,7 +90,7 @@ module MiGA::DistanceRunner::Commands
|
|
83
90
|
unless out.empty?
|
84
91
|
SQLite3::Database.new(db) do |conn|
|
85
92
|
conn.execute 'insert into ani values(?, ?, ?, 0, ?, ?)',
|
86
|
-
|
93
|
+
[n1, n2, out[2], out[3], out[4]]
|
87
94
|
end
|
88
95
|
end
|
89
96
|
v = out[2]
|
data/utils/distance/database.rb
CHANGED
@@ -1,14 +1,14 @@
|
|
1
|
-
|
2
1
|
require 'sqlite3'
|
3
2
|
|
4
3
|
module MiGA::DistanceRunner::Database
|
5
4
|
##
|
6
5
|
# Check for corrupt files and create empty databases
|
7
6
|
def initialize_dbs!(for_ref)
|
7
|
+
$stderr.puts "Initializing databases (for_ref = #{for_ref})"
|
8
8
|
@dbs = {}
|
9
9
|
@tmp_dbs = {}
|
10
10
|
@db_counts = {}
|
11
|
-
{haai: :aai, aai: :aai, ani: :ani}.each do |m, t|
|
11
|
+
{ haai: :aai, aai: :aai, ani: :ani }.each do |m, t|
|
12
12
|
@db_counts[m] = 0
|
13
13
|
@dbs[m] = for_ref ? ref_db(m) : query_db(m)
|
14
14
|
# Remove if corrupt
|
@@ -24,9 +24,9 @@ module MiGA::DistanceRunner::Database
|
|
24
24
|
# Initialize if it doesn't exist
|
25
25
|
SQLite3::Database.new(dbs[m]) do |conn|
|
26
26
|
conn.execute "create table if not exists #{t}(" +
|
27
|
-
|
28
|
-
|
29
|
-
|
27
|
+
"seq1 varchar(256), seq2 varchar(256), " +
|
28
|
+
"#{t} float, sd float, n int, omega int" +
|
29
|
+
")"
|
30
30
|
end unless File.size? dbs[m]
|
31
31
|
# Copy over to (local) temporals
|
32
32
|
@tmp_dbs[m] = tmp_file("#{m}.db")
|
@@ -37,16 +37,17 @@ module MiGA::DistanceRunner::Database
|
|
37
37
|
##
|
38
38
|
# Path to the database +metric+ for +dataset_name+ in +project+
|
39
39
|
# (assumes that +dataset_name+ is a reference dataset)
|
40
|
-
def ref_db(metric, dataset_name=nil)
|
40
|
+
def ref_db(metric, dataset_name = nil)
|
41
41
|
dataset_name ||= dataset.name
|
42
|
-
b =
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
42
|
+
b =
|
43
|
+
case metric
|
44
|
+
when :haai
|
45
|
+
"01.haai/#{dataset_name}.db"
|
46
|
+
when :aai
|
47
|
+
"02.aai/#{dataset_name}.db"
|
48
|
+
when :ani
|
49
|
+
"03.ani/#{dataset_name}.db"
|
50
|
+
end
|
50
51
|
File.expand_path(b, home)
|
51
52
|
end
|
52
53
|
|
@@ -62,13 +63,14 @@ module MiGA::DistanceRunner::Database
|
|
62
63
|
def stored_value(target, metric)
|
63
64
|
# Check if self.dataset -> target is done (previous run)
|
64
65
|
y = value_from_db(dataset.name, target.name, tmp_dbs[metric], metric)
|
65
|
-
return y unless y.nil?
|
66
|
+
return y unless y.nil? || y.zero?
|
66
67
|
|
67
68
|
# Check if self.dataset <- target is done (another thread)
|
68
|
-
if dataset.is_ref?
|
69
|
+
if dataset.is_ref? && project.path == ref_project.path
|
69
70
|
y = data_from_db(
|
70
|
-
target.name, dataset.name, ref_db(metric, target.name), metric
|
71
|
-
|
71
|
+
target.name, dataset.name, ref_db(metric, target.name), metric
|
72
|
+
)
|
73
|
+
unless y.nil? || y.first.nil? || y.first.zero?
|
72
74
|
# Store a copy
|
73
75
|
data_to_db(dataset.name, target.name, tmp_dbs[metric], metric, y)
|
74
76
|
return y.first
|
@@ -93,7 +95,8 @@ module MiGA::DistanceRunner::Database
|
|
93
95
|
SQLite3::Database.new(db) do |conn|
|
94
96
|
y = conn.execute(
|
95
97
|
"select #{metric}, sd, n, omega from #{metric} where seq1=? and seq2=?",
|
96
|
-
[n1, n2]
|
98
|
+
[n1, n2]
|
99
|
+
).first
|
97
100
|
end if File.size? db
|
98
101
|
y
|
99
102
|
end
|
@@ -104,7 +107,8 @@ module MiGA::DistanceRunner::Database
|
|
104
107
|
SQLite3::Database.new(db) do |conn|
|
105
108
|
conn.execute(
|
106
109
|
"insert into #{metric} (seq1, seq2, #{metric}, sd, n, omega) " +
|
107
|
-
"values (?, ?, ?, ?, ?, ?)", [n1, n2] + data
|
110
|
+
"values (?, ?, ?, ?, ?, ?)", [n1, n2] + data
|
111
|
+
)
|
108
112
|
end
|
109
113
|
checkpoint metric
|
110
114
|
end
|
@@ -113,7 +117,7 @@ module MiGA::DistanceRunner::Database
|
|
113
117
|
# Iterates for each entry in +db+
|
114
118
|
def foreach_in_db(db, metric, &blk)
|
115
119
|
SQLite3::Database.new(db) do |conn|
|
116
|
-
conn.execute("select * from #{metric}").each{ |r| blk[r] }
|
120
|
+
conn.execute("select * from #{metric}").each { |r| blk[r] }
|
117
121
|
end
|
118
122
|
end
|
119
123
|
end
|
data/utils/distance/pipeline.rb
CHANGED
@@ -1,13 +1,12 @@
|
|
1
|
-
|
2
1
|
# High-end pipelines for DistanceRunner
|
3
2
|
module MiGA::DistanceRunner::Pipeline
|
4
|
-
|
5
3
|
# Recursively classify the dataset, returning an Array with two entries:
|
6
4
|
# classification and cluster number
|
7
|
-
def classify(clades, classif, metric, result_fh, val_cls=nil)
|
5
|
+
def classify(clades, classif, metric, result_fh, val_cls = nil)
|
8
6
|
dir = File.expand_path(classif, clades)
|
9
7
|
med = File.expand_path('miga-project.medoids', dir)
|
10
|
-
return [classif,val_cls] unless File.size? med
|
8
|
+
return [classif, val_cls] unless File.size? med
|
9
|
+
|
11
10
|
max_val = 0
|
12
11
|
val_med = ''
|
13
12
|
val_cls = nil
|
@@ -32,8 +31,10 @@ module MiGA::DistanceRunner::Pipeline
|
|
32
31
|
|
33
32
|
# Builds a tree with all visited medoids from any classification level
|
34
33
|
def build_medoids_tree(metric)
|
34
|
+
$stderr.puts "Building medoids tree (metric = #{metric})"
|
35
35
|
db = query_db(metric)
|
36
36
|
return unless File.size? db
|
37
|
+
|
37
38
|
out_base = File.expand_path(dataset.name, home)
|
38
39
|
ds_matrix = "#{out_base}.txt"
|
39
40
|
ds_matrix_fh = File.open(ds_matrix, 'w')
|
@@ -42,7 +43,7 @@ module MiGA::DistanceRunner::Pipeline
|
|
42
43
|
seq2 = []
|
43
44
|
foreach_in_db(db, metric) do |r|
|
44
45
|
seq2 << r[0]
|
45
|
-
ds_matrix_fh.puts r[0,3].join("\t")
|
46
|
+
ds_matrix_fh.puts r[0, 3].join("\t")
|
46
47
|
end
|
47
48
|
# Find all values among visited datasets in ref_project
|
48
49
|
ref_r = ref_project.result("#{metric}_distances") or return
|
@@ -50,7 +51,8 @@ module MiGA::DistanceRunner::Pipeline
|
|
50
51
|
fh.each_line do |ln|
|
51
52
|
r = ln.chomp.split("\t")
|
52
53
|
next unless seq2.include?(r[1]) or seq2.include?(r[2])
|
53
|
-
|
54
|
+
|
55
|
+
ds_matrix_fh.puts r[1, 3].join("\t")
|
54
56
|
end
|
55
57
|
end
|
56
58
|
ds_matrix_fh.close
|
@@ -61,6 +63,7 @@ module MiGA::DistanceRunner::Pipeline
|
|
61
63
|
|
62
64
|
# Tests taxonomy
|
63
65
|
def tax_test
|
66
|
+
$stderr.puts "Testing taxonomy | opts = #{opts}"
|
64
67
|
# Get taxonomy of closest relative
|
65
68
|
from_ref_project = (project != ref_project)
|
66
69
|
res_dir = from_ref_project ?
|
@@ -72,11 +75,12 @@ module MiGA::DistanceRunner::Pipeline
|
|
72
75
|
dataset.add_result(from_ref_project ? :taxonomy : :distances, true)
|
73
76
|
cr = dataset.closest_relatives(1, from_ref_project)
|
74
77
|
return if cr.nil? or cr.empty?
|
78
|
+
|
75
79
|
tax = ref_project.dataset(cr[0][0]).metadata[:tax] || {}
|
76
80
|
|
77
81
|
# Run the test for each rank
|
78
82
|
tax_test = MiGA::TaxDist.aai_pvalues(cr[0][1], :intax, engine: opts[:aai_p])
|
79
|
-
r = tax_test.map do |k,v|
|
83
|
+
r = tax_test.map do |k, v|
|
80
84
|
sig = ''
|
81
85
|
[0.5, 0.1, 0.05, 0.01].each { |i| sig << '*' if v < i }
|
82
86
|
[MiGA::Taxonomy.LONG_RANKS[k], (tax[k] || '?'), v, sig]
|
@@ -95,11 +99,13 @@ module MiGA::DistanceRunner::Pipeline
|
|
95
99
|
|
96
100
|
# Transfer the taxonomy to the current dataset
|
97
101
|
def transfer_taxonomy(tax)
|
102
|
+
$stderr.puts "Transferring taxonomy"
|
98
103
|
return if tax.nil?
|
104
|
+
|
99
105
|
pval = (project.metadata[:tax_pvalue] || 0.05).to_f
|
100
|
-
tax_a = tax
|
101
|
-
|
102
|
-
|
106
|
+
tax_a = tax
|
107
|
+
.select { |i| i[1] != '?' && i[2] <= pval }
|
108
|
+
.map { |i| i[0, 2].join(':') }
|
103
109
|
dataset.metadata[:tax] = MiGA::Taxonomy.new(tax_a)
|
104
110
|
dataset.save
|
105
111
|
end
|
data/utils/distance/runner.rb
CHANGED
@@ -1,13 +1,10 @@
|
|
1
|
-
|
2
1
|
require_relative 'base.rb'
|
3
2
|
require_relative 'temporal.rb'
|
4
3
|
require_relative 'database.rb'
|
5
4
|
require_relative 'commands.rb'
|
6
5
|
require_relative 'pipeline.rb'
|
7
6
|
|
8
|
-
|
9
7
|
class MiGA::DistanceRunner
|
10
|
-
|
11
8
|
include MiGA::DistanceRunner::Temporal
|
12
9
|
include MiGA::DistanceRunner::Database
|
13
10
|
include MiGA::DistanceRunner::Commands
|
@@ -16,7 +13,7 @@ class MiGA::DistanceRunner
|
|
16
13
|
attr_reader :project, :ref_project, :dataset, :opts, :home
|
17
14
|
attr_reader :tmp, :tmp_dbs, :dbs, :db_counts
|
18
15
|
|
19
|
-
def initialize(project_path, dataset_name, opts_hash={})
|
16
|
+
def initialize(project_path, dataset_name, opts_hash = {})
|
20
17
|
@opts = opts_hash
|
21
18
|
@project = MiGA::Project.load(project_path) or
|
22
19
|
raise "No project at #{project_path}"
|
@@ -30,7 +27,7 @@ class MiGA::DistanceRunner
|
|
30
27
|
@opts[:aai_save_rbm] ||= ENV.fetch('MIGA_AAI_SAVE_RBM') do
|
31
28
|
project.is_clade? ? 'save-rbm' : 'no-save-rbm'
|
32
29
|
end
|
33
|
-
@opts[:thr] ||= ENV.fetch('CORES'){ 2 }.to_i
|
30
|
+
@opts[:thr] ||= ENV.fetch('CORES') { 2 }.to_i
|
34
31
|
if opts[:run_taxonomy] and project.metadata[:ref_project]
|
35
32
|
ref_path = project.metadata[:ref_project]
|
36
33
|
@home = File.expand_path('05.taxonomy', @home)
|
@@ -53,11 +50,14 @@ class MiGA::DistanceRunner
|
|
53
50
|
@opts[:ani_p] ||= 'blast+'
|
54
51
|
@opts[:distances_checkpoint] ||= 10
|
55
52
|
@opts[:distances_checkpoint] = @opts[:distances_checkpoint].to_i
|
53
|
+
$stderr.puts "Options: #{opts}"
|
56
54
|
end
|
57
55
|
|
58
56
|
# Launch the appropriate analysis
|
59
57
|
def go!
|
58
|
+
$stderr.puts "Launching analysis"
|
60
59
|
return if dataset.is_multi?
|
60
|
+
|
61
61
|
Dir.mktmpdir do |tmp_dir|
|
62
62
|
@tmp = tmp_dir
|
63
63
|
create_temporals
|
@@ -67,23 +67,26 @@ class MiGA::DistanceRunner
|
|
67
67
|
|
68
68
|
# Launch analysis for reference datasets
|
69
69
|
def go_ref!
|
70
|
+
$stderr.puts "Launching analysis for reference dataset"
|
70
71
|
# Initialize databases
|
71
72
|
initialize_dbs! true
|
72
73
|
|
73
74
|
# first-come-first-serve traverse
|
74
75
|
ref_project.each_dataset do |ds|
|
75
76
|
next if !ds.is_ref? or ds.is_multi? or ds.result(:essential_genes).nil?
|
77
|
+
|
76
78
|
puts "[ #{Time.now} ] #{ds.name}"
|
77
79
|
ani_after_aai(ds)
|
78
80
|
end
|
79
81
|
|
80
82
|
# Finalize
|
81
|
-
[:haai, :aai, :ani].each{ |m| checkpoint! m if db_counts[m] > 0 }
|
83
|
+
[:haai, :aai, :ani].each { |m| checkpoint! m if db_counts[m] > 0 }
|
82
84
|
end
|
83
85
|
|
84
86
|
##
|
85
87
|
# Launch analysis for query datasets
|
86
88
|
def go_query!
|
89
|
+
$stderr.puts "Launching analysis for query dataset"
|
87
90
|
# Check if project is ready
|
88
91
|
tsk = ref_project.is_clade? ? [:subclades, :ani] : [:clade_finding, :aai]
|
89
92
|
res = ref_project.result(tsk[0])
|
@@ -100,14 +103,15 @@ class MiGA::DistanceRunner
|
|
100
103
|
# Calculate all the AAIs/ANIs against the lowest subclade (if classified)
|
101
104
|
par_dir = File.dirname(File.expand_path(classif, res.dir))
|
102
105
|
par = File.expand_path('miga-project.classif', par_dir)
|
103
|
-
closest = {dataset: nil, ani: 0.0}
|
106
|
+
closest = { dataset: nil, ani: 0.0 }
|
104
107
|
if File.size? par
|
105
108
|
File.open(par, 'r') do |fh|
|
106
109
|
fh.each_line do |ln|
|
107
110
|
r = ln.chomp.split("\t")
|
108
111
|
next unless r[1].to_i == val_cls
|
112
|
+
|
109
113
|
ani = ani_after_aai(ref_project.dataset(r[0]), 80.0)
|
110
|
-
closest = {ds: r[0], ani: ani} unless ani.nil? or ani < closest[:ani]
|
114
|
+
closest = { ds: r[0], ani: ani } unless ani.nil? or ani < closest[:ani]
|
111
115
|
end
|
112
116
|
end
|
113
117
|
end
|
@@ -115,21 +119,23 @@ class MiGA::DistanceRunner
|
|
115
119
|
# Calculate all the AAIs/ANIs against the closest ANI95-clade (if AAI > 80%)
|
116
120
|
cl_path = res.file_path :clades_ani95
|
117
121
|
if !cl_path.nil? and File.size? cl_path and tsk[0] == :clade_finding
|
118
|
-
File.foreach(cl_path)
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
+
File.foreach(cl_path)
|
123
|
+
.map { |i| i.chomp.split(',') }
|
124
|
+
.find(lambda { [] }) { |i| i.include? closest[:ds] }
|
125
|
+
.each { |i| ani_after_aai(ref_project.dataset(i), 80.0) }
|
122
126
|
end
|
123
127
|
|
124
128
|
# Finalize
|
125
|
-
[:haai, :aai, :ani].each{ |m| checkpoint! m if db_counts[m] > 0 }
|
129
|
+
[:haai, :aai, :ani].each { |m| checkpoint! m if db_counts[m] > 0 }
|
126
130
|
build_medoids_tree(tsk[1])
|
127
131
|
transfer_taxonomy(tax_test)
|
128
132
|
end
|
129
133
|
|
130
134
|
# Launch analysis for taxonomy jobs
|
131
135
|
def go_taxonomy!
|
136
|
+
$stderr.puts "Launching taxonomy analysis"
|
132
137
|
return unless project.metadata[:ref_project]
|
138
|
+
|
133
139
|
go_query! # <- yeah, it's actually the same, just different ref_project
|
134
140
|
end
|
135
141
|
end
|
data/utils/distance/temporal.rb
CHANGED
@@ -1,12 +1,14 @@
|
|
1
|
-
|
2
1
|
require 'tmpdir'
|
3
2
|
require 'zlib'
|
4
3
|
|
5
4
|
module MiGA::DistanceRunner::Temporal
|
6
|
-
|
7
5
|
# Copy input files to the (local) temporal folder
|
8
6
|
def create_temporals
|
9
|
-
rf = {
|
7
|
+
rf = {
|
8
|
+
essential_genes: :ess_genes,
|
9
|
+
cds: :proteins,
|
10
|
+
assembly: :largecontigs
|
11
|
+
}
|
10
12
|
rf.each do |res, file|
|
11
13
|
r = dataset.result(res)
|
12
14
|
f = r.nil? ? nil : r.file_path(file)
|
@@ -37,8 +39,9 @@ module MiGA::DistanceRunner::Temporal
|
|
37
39
|
|
38
40
|
# Copies temporal databases back to the MiGA Project
|
39
41
|
def checkpoint!(metric)
|
42
|
+
$stderr.puts "Checkpoint (metric = #{metric})"
|
40
43
|
SQLite3::Database.new(tmp_dbs[metric]) do |conn|
|
41
|
-
conn.execute("select count(*) from #{metric
|
44
|
+
conn.execute("select count(*) from #{metric == :haai ? :aai : metric}")
|
42
45
|
end
|
43
46
|
FileUtils.cp(tmp_dbs[metric], dbs[metric])
|
44
47
|
@db_counts[metric] = 0
|
data/utils/distances.rb
CHANGED
@@ -4,6 +4,6 @@ require_relative 'distance/runner.rb'
|
|
4
4
|
|
5
5
|
dataset = ARGV.shift
|
6
6
|
project = ARGV.shift
|
7
|
-
opts = Hash[
|
7
|
+
opts = Hash[ARGV.map { |i| i.split("=", 2).tap { |j| j[0] = j[0].to_sym } }]
|
8
8
|
runner = MiGA::DistanceRunner.new(dataset, project, opts)
|
9
9
|
runner.go!
|
data/utils/domain-ess-genes.rb
CHANGED
@@ -6,10 +6,10 @@ domain = ARGV.shift
|
|
6
6
|
|
7
7
|
def quality(hsh)
|
8
8
|
q = {}
|
9
|
-
q[:found] = hsh.values.map{ |i| i==0 ? 0 : 1 }.inject(:+)
|
10
|
-
q[:multi] = hsh.values.map{ |i| i==0 ? 0 : i-1 }.inject(:+)
|
11
|
-
q[:cmp] = 100.0*q[:found].to_f/hsh.size
|
12
|
-
q[:cnt] = 100.0*q[:multi].to_f/hsh.size
|
9
|
+
q[:found] = hsh.values.map { |i| i == 0 ? 0 : 1 }.inject(:+)
|
10
|
+
q[:multi] = hsh.values.map { |i| i == 0 ? 0 : i - 1 }.inject(:+)
|
11
|
+
q[:cmp] = 100.0 * q[:found].to_f / hsh.size
|
12
|
+
q[:cnt] = 100.0 * q[:multi].to_f / hsh.size
|
13
13
|
q
|
14
14
|
end
|
15
15
|
|
@@ -39,7 +39,7 @@ end
|
|
39
39
|
# Find expected genes for domain
|
40
40
|
n_dom = Hash[
|
41
41
|
`HMM.essential.rb -L -q '-#{domain}' -c '#{collection}'`
|
42
|
-
|
42
|
+
.chomp.split("\n").map { |i| i.split("\t") }
|
43
43
|
]
|
44
44
|
l_dom = n_dom.keys
|
45
45
|
cnt_dom = {}
|
@@ -54,10 +54,10 @@ File.open(outlog, 'w') do |ofh|
|
|
54
54
|
ofh.puts "! Contamination: #{q[:cnt].round(1)}%."
|
55
55
|
if q[:multi] > 0
|
56
56
|
ofh.puts "! Multiple copies: "
|
57
|
-
cnt_dom.each{ |k,v| ofh.puts "! #{v} #{k}: #{n_dom[k]}." if v>1 }
|
57
|
+
cnt_dom.each { |k, v| ofh.puts "! #{v} #{k}: #{n_dom[k]}." if v > 1 }
|
58
58
|
end
|
59
59
|
if q[:found] < cnt_dom.size
|
60
60
|
ofh.puts "! Missing genes: "
|
61
|
-
cnt_dom.each{ |k,v| ofh.puts "! #{k}: #{n_dom[k]}." if v==0 }
|
61
|
+
cnt_dom.each { |k, v| ofh.puts "! #{k}: #{n_dom[k]}." if v == 0 }
|
62
62
|
end
|
63
63
|
end
|