miga-base 0.7.3.0 → 0.7.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/cli.rb +10 -8
- data/lib/miga/cli/action.rb +2 -3
- data/lib/miga/cli/action/about.rb +5 -6
- data/lib/miga/cli/action/add.rb +18 -12
- data/lib/miga/cli/action/add_result.rb +2 -3
- data/lib/miga/cli/action/archive.rb +1 -2
- data/lib/miga/cli/action/classify_wf.rb +8 -6
- data/lib/miga/cli/action/console.rb +0 -1
- data/lib/miga/cli/action/daemon.rb +7 -7
- data/lib/miga/cli/action/date.rb +0 -1
- data/lib/miga/cli/action/derep_wf.rb +5 -4
- data/lib/miga/cli/action/doctor.rb +71 -82
- data/lib/miga/cli/action/doctor/base.rb +102 -0
- data/lib/miga/cli/action/edit.rb +14 -2
- data/lib/miga/cli/action/files.rb +8 -8
- data/lib/miga/cli/action/find.rb +5 -6
- data/lib/miga/cli/action/generic.rb +7 -7
- data/lib/miga/cli/action/get.rb +20 -17
- data/lib/miga/cli/action/get_db.rb +8 -2
- data/lib/miga/cli/action/index_wf.rb +1 -1
- data/lib/miga/cli/action/init.rb +53 -41
- data/lib/miga/cli/action/init/daemon_helper.rb +65 -43
- data/lib/miga/cli/action/lair.rb +7 -7
- data/lib/miga/cli/action/ln.rb +6 -6
- data/lib/miga/cli/action/ls.rb +1 -2
- data/lib/miga/cli/action/ncbi_get.rb +11 -3
- data/lib/miga/cli/action/new.rb +4 -4
- data/lib/miga/cli/action/next_step.rb +0 -1
- data/lib/miga/cli/action/preproc_wf.rb +3 -3
- data/lib/miga/cli/action/quality_wf.rb +1 -1
- data/lib/miga/cli/action/rm.rb +2 -3
- data/lib/miga/cli/action/run.rb +8 -8
- data/lib/miga/cli/action/stats.rb +8 -4
- data/lib/miga/cli/action/summary.rb +7 -6
- data/lib/miga/cli/action/tax_dist.rb +8 -4
- data/lib/miga/cli/action/tax_index.rb +3 -4
- data/lib/miga/cli/action/tax_set.rb +7 -6
- data/lib/miga/cli/action/tax_test.rb +6 -5
- data/lib/miga/cli/action/wf.rb +21 -19
- data/lib/miga/cli/base.rb +34 -32
- data/lib/miga/cli/objects_helper.rb +27 -18
- data/lib/miga/cli/opt_helper.rb +3 -2
- data/lib/miga/common.rb +2 -5
- data/lib/miga/common/base.rb +15 -16
- data/lib/miga/common/format.rb +8 -5
- data/lib/miga/common/hooks.rb +1 -4
- data/lib/miga/common/path.rb +4 -9
- data/lib/miga/common/with_daemon.rb +6 -3
- data/lib/miga/common/with_daemon_class.rb +3 -2
- data/lib/miga/common/with_result.rb +2 -1
- data/lib/miga/daemon.rb +93 -44
- data/lib/miga/daemon/base.rb +30 -11
- data/lib/miga/dataset.rb +47 -37
- data/lib/miga/dataset/base.rb +52 -37
- data/lib/miga/dataset/hooks.rb +3 -4
- data/lib/miga/dataset/result.rb +17 -1
- data/lib/miga/dataset/status.rb +6 -5
- data/lib/miga/json.rb +5 -7
- data/lib/miga/lair.rb +4 -0
- data/lib/miga/metadata.rb +4 -3
- data/lib/miga/project.rb +29 -20
- data/lib/miga/project/base.rb +52 -37
- data/lib/miga/project/dataset.rb +33 -26
- data/lib/miga/project/hooks.rb +0 -3
- data/lib/miga/project/result.rb +14 -5
- data/lib/miga/remote_dataset.rb +85 -72
- data/lib/miga/remote_dataset/base.rb +11 -13
- data/lib/miga/remote_dataset/download.rb +34 -12
- data/lib/miga/result.rb +34 -25
- data/lib/miga/result/base.rb +0 -2
- data/lib/miga/result/dates.rb +1 -3
- data/lib/miga/result/source.rb +15 -16
- data/lib/miga/result/stats.rb +37 -27
- data/lib/miga/tax_dist.rb +6 -4
- data/lib/miga/tax_index.rb +17 -17
- data/lib/miga/taxonomy.rb +6 -1
- data/lib/miga/taxonomy/base.rb +19 -15
- data/lib/miga/version.rb +19 -16
- data/scripts/project_stats.bash +3 -0
- data/scripts/stats.bash +1 -1
- data/test/common_test.rb +3 -11
- data/test/daemon_helper.rb +38 -0
- data/test/daemon_test.rb +91 -99
- data/test/dataset_test.rb +63 -59
- data/test/format_test.rb +3 -11
- data/test/hook_test.rb +50 -55
- data/test/json_test.rb +7 -8
- data/test/lair_test.rb +22 -28
- data/test/metadata_test.rb +6 -14
- data/test/project_test.rb +33 -40
- data/test/remote_dataset_test.rb +26 -32
- data/test/result_stats_test.rb +17 -27
- data/test/result_test.rb +41 -34
- data/test/tax_dist_test.rb +2 -4
- data/test/tax_index_test.rb +4 -10
- data/test/taxonomy_test.rb +7 -9
- data/test/test_helper.rb +42 -1
- data/test/with_daemon_test.rb +14 -22
- data/utils/adapters.fa +13 -0
- data/utils/cleanup-databases.rb +6 -5
- data/utils/distance/base.rb +0 -1
- data/utils/distance/commands.rb +19 -12
- data/utils/distance/database.rb +25 -21
- data/utils/distance/pipeline.rb +16 -10
- data/utils/distance/runner.rb +19 -13
- data/utils/distance/temporal.rb +7 -4
- data/utils/distances.rb +1 -1
- data/utils/domain-ess-genes.rb +7 -7
- data/utils/index_metadata.rb +5 -4
- data/utils/mytaxa_scan.rb +18 -16
- data/utils/representatives.rb +5 -4
- data/utils/requirements.txt +1 -1
- data/utils/subclade/base.rb +0 -1
- data/utils/subclade/pipeline.rb +7 -6
- data/utils/subclade/runner.rb +9 -9
- data/utils/subclade/temporal.rb +0 -2
- data/utils/subclades-compile.rb +39 -37
- data/utils/subclades.rb +1 -1
- metadata +6 -4
data/utils/distance/base.rb
CHANGED
data/utils/distance/commands.rb
CHANGED
@@ -1,12 +1,13 @@
|
|
1
|
-
|
2
1
|
module MiGA::DistanceRunner::Commands
|
3
2
|
# Estimates or calculates AAI against +target+
|
4
3
|
def aai(target)
|
5
4
|
# Check if the request makes sense
|
6
5
|
return nil if target.nil? || target.result(:essential_genes).nil?
|
6
|
+
|
7
7
|
# Check if it's been calculated
|
8
8
|
y = stored_value(target, :aai)
|
9
9
|
return y unless y.nil? || y.zero?
|
10
|
+
|
10
11
|
# Try hAAI (except in clade projects)
|
11
12
|
unless @ref_project.is_clade?
|
12
13
|
y = haai(target)
|
@@ -14,24 +15,27 @@ module MiGA::DistanceRunner::Commands
|
|
14
15
|
end
|
15
16
|
# Full AAI
|
16
17
|
aai_cmd(
|
17
|
-
|
18
|
-
|
18
|
+
tmp_file('proteins.fa'), target.result(:cds).file_path(:proteins),
|
19
|
+
dataset.name, target.name, tmp_dbs[:aai]
|
20
|
+
).tap { checkpoint :aai }
|
19
21
|
end
|
20
22
|
|
21
23
|
##
|
22
24
|
# Estimates AAI against +target+ using hAAI
|
23
25
|
def haai(target)
|
24
26
|
return nil if opts[:haai_p] == 'no'
|
27
|
+
|
25
28
|
haai = aai_cmd(tmp_file('ess_genes.fa'),
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
+
target.result(:essential_genes).file_path(:ess_genes),
|
30
|
+
dataset.name, target.name, tmp_dbs[:haai],
|
31
|
+
aai_save_rbm: 'no-save-rbm', aai_p: opts[:haai_p])
|
29
32
|
checkpoint :haai
|
30
33
|
return nil if haai.nil? || haai.zero? || haai > 90.0
|
31
|
-
|
34
|
+
|
35
|
+
aai = 100.0 - Math.exp(2.435076 + 0.4275193 * Math.log(100.0 - haai))
|
32
36
|
SQLite3::Database.new(tmp_dbs[:aai]) do |conn|
|
33
37
|
conn.execute 'insert into aai values(?, ?, ?, 0, 0, 0)',
|
34
|
-
|
38
|
+
[dataset.name, target.name, aai]
|
35
39
|
end
|
36
40
|
checkpoint :aai
|
37
41
|
aai
|
@@ -44,13 +48,16 @@ module MiGA::DistanceRunner::Commands
|
|
44
48
|
t = tmp_file('largecontigs.fa')
|
45
49
|
r = target.result(:assembly)
|
46
50
|
return nil if r.nil? || !File.size?(t)
|
51
|
+
|
47
52
|
# Check if it's been calculated
|
48
53
|
y = stored_value(target, :ani)
|
49
54
|
return y unless y.nil? || y.zero?
|
55
|
+
|
50
56
|
# Run it
|
51
57
|
ani_cmd(
|
52
|
-
|
53
|
-
|
58
|
+
t, r.file_path(:largecontigs),
|
59
|
+
dataset.name, target.name, tmp_dbs[:ani]
|
60
|
+
).tap { checkpoint :ani }
|
54
61
|
end
|
55
62
|
|
56
63
|
##
|
@@ -74,7 +81,7 @@ module MiGA::DistanceRunner::Commands
|
|
74
81
|
|
75
82
|
##
|
76
83
|
# Execute an ANI command
|
77
|
-
def ani_cmd(f1, f2, n1, n2, db, o={})
|
84
|
+
def ani_cmd(f1, f2, n1, n2, db, o = {})
|
78
85
|
o = opts.merge(o)
|
79
86
|
v = nil
|
80
87
|
if o[:ani_p] == 'fastani'
|
@@ -83,7 +90,7 @@ module MiGA::DistanceRunner::Commands
|
|
83
90
|
unless out.empty?
|
84
91
|
SQLite3::Database.new(db) do |conn|
|
85
92
|
conn.execute 'insert into ani values(?, ?, ?, 0, ?, ?)',
|
86
|
-
|
93
|
+
[n1, n2, out[2], out[3], out[4]]
|
87
94
|
end
|
88
95
|
end
|
89
96
|
v = out[2]
|
data/utils/distance/database.rb
CHANGED
@@ -1,14 +1,14 @@
|
|
1
|
-
|
2
1
|
require 'sqlite3'
|
3
2
|
|
4
3
|
module MiGA::DistanceRunner::Database
|
5
4
|
##
|
6
5
|
# Check for corrupt files and create empty databases
|
7
6
|
def initialize_dbs!(for_ref)
|
7
|
+
$stderr.puts "Initializing databases (for_ref = #{for_ref})"
|
8
8
|
@dbs = {}
|
9
9
|
@tmp_dbs = {}
|
10
10
|
@db_counts = {}
|
11
|
-
{haai: :aai, aai: :aai, ani: :ani}.each do |m, t|
|
11
|
+
{ haai: :aai, aai: :aai, ani: :ani }.each do |m, t|
|
12
12
|
@db_counts[m] = 0
|
13
13
|
@dbs[m] = for_ref ? ref_db(m) : query_db(m)
|
14
14
|
# Remove if corrupt
|
@@ -24,9 +24,9 @@ module MiGA::DistanceRunner::Database
|
|
24
24
|
# Initialize if it doesn't exist
|
25
25
|
SQLite3::Database.new(dbs[m]) do |conn|
|
26
26
|
conn.execute "create table if not exists #{t}(" +
|
27
|
-
|
28
|
-
|
29
|
-
|
27
|
+
"seq1 varchar(256), seq2 varchar(256), " +
|
28
|
+
"#{t} float, sd float, n int, omega int" +
|
29
|
+
")"
|
30
30
|
end unless File.size? dbs[m]
|
31
31
|
# Copy over to (local) temporals
|
32
32
|
@tmp_dbs[m] = tmp_file("#{m}.db")
|
@@ -37,16 +37,17 @@ module MiGA::DistanceRunner::Database
|
|
37
37
|
##
|
38
38
|
# Path to the database +metric+ for +dataset_name+ in +project+
|
39
39
|
# (assumes that +dataset_name+ is a reference dataset)
|
40
|
-
def ref_db(metric, dataset_name=nil)
|
40
|
+
def ref_db(metric, dataset_name = nil)
|
41
41
|
dataset_name ||= dataset.name
|
42
|
-
b =
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
42
|
+
b =
|
43
|
+
case metric
|
44
|
+
when :haai
|
45
|
+
"01.haai/#{dataset_name}.db"
|
46
|
+
when :aai
|
47
|
+
"02.aai/#{dataset_name}.db"
|
48
|
+
when :ani
|
49
|
+
"03.ani/#{dataset_name}.db"
|
50
|
+
end
|
50
51
|
File.expand_path(b, home)
|
51
52
|
end
|
52
53
|
|
@@ -62,13 +63,14 @@ module MiGA::DistanceRunner::Database
|
|
62
63
|
def stored_value(target, metric)
|
63
64
|
# Check if self.dataset -> target is done (previous run)
|
64
65
|
y = value_from_db(dataset.name, target.name, tmp_dbs[metric], metric)
|
65
|
-
return y unless y.nil?
|
66
|
+
return y unless y.nil? || y.zero?
|
66
67
|
|
67
68
|
# Check if self.dataset <- target is done (another thread)
|
68
|
-
if dataset.is_ref?
|
69
|
+
if dataset.is_ref? && project.path == ref_project.path
|
69
70
|
y = data_from_db(
|
70
|
-
target.name, dataset.name, ref_db(metric, target.name), metric
|
71
|
-
|
71
|
+
target.name, dataset.name, ref_db(metric, target.name), metric
|
72
|
+
)
|
73
|
+
unless y.nil? || y.first.nil? || y.first.zero?
|
72
74
|
# Store a copy
|
73
75
|
data_to_db(dataset.name, target.name, tmp_dbs[metric], metric, y)
|
74
76
|
return y.first
|
@@ -93,7 +95,8 @@ module MiGA::DistanceRunner::Database
|
|
93
95
|
SQLite3::Database.new(db) do |conn|
|
94
96
|
y = conn.execute(
|
95
97
|
"select #{metric}, sd, n, omega from #{metric} where seq1=? and seq2=?",
|
96
|
-
[n1, n2]
|
98
|
+
[n1, n2]
|
99
|
+
).first
|
97
100
|
end if File.size? db
|
98
101
|
y
|
99
102
|
end
|
@@ -104,7 +107,8 @@ module MiGA::DistanceRunner::Database
|
|
104
107
|
SQLite3::Database.new(db) do |conn|
|
105
108
|
conn.execute(
|
106
109
|
"insert into #{metric} (seq1, seq2, #{metric}, sd, n, omega) " +
|
107
|
-
"values (?, ?, ?, ?, ?, ?)", [n1, n2] + data
|
110
|
+
"values (?, ?, ?, ?, ?, ?)", [n1, n2] + data
|
111
|
+
)
|
108
112
|
end
|
109
113
|
checkpoint metric
|
110
114
|
end
|
@@ -113,7 +117,7 @@ module MiGA::DistanceRunner::Database
|
|
113
117
|
# Iterates for each entry in +db+
|
114
118
|
def foreach_in_db(db, metric, &blk)
|
115
119
|
SQLite3::Database.new(db) do |conn|
|
116
|
-
conn.execute("select * from #{metric}").each{ |r| blk[r] }
|
120
|
+
conn.execute("select * from #{metric}").each { |r| blk[r] }
|
117
121
|
end
|
118
122
|
end
|
119
123
|
end
|
data/utils/distance/pipeline.rb
CHANGED
@@ -1,13 +1,12 @@
|
|
1
|
-
|
2
1
|
# High-end pipelines for DistanceRunner
|
3
2
|
module MiGA::DistanceRunner::Pipeline
|
4
|
-
|
5
3
|
# Recursively classify the dataset, returning an Array with two entries:
|
6
4
|
# classification and cluster number
|
7
|
-
def classify(clades, classif, metric, result_fh, val_cls=nil)
|
5
|
+
def classify(clades, classif, metric, result_fh, val_cls = nil)
|
8
6
|
dir = File.expand_path(classif, clades)
|
9
7
|
med = File.expand_path('miga-project.medoids', dir)
|
10
|
-
return [classif,val_cls] unless File.size? med
|
8
|
+
return [classif, val_cls] unless File.size? med
|
9
|
+
|
11
10
|
max_val = 0
|
12
11
|
val_med = ''
|
13
12
|
val_cls = nil
|
@@ -32,8 +31,10 @@ module MiGA::DistanceRunner::Pipeline
|
|
32
31
|
|
33
32
|
# Builds a tree with all visited medoids from any classification level
|
34
33
|
def build_medoids_tree(metric)
|
34
|
+
$stderr.puts "Building medoids tree (metric = #{metric})"
|
35
35
|
db = query_db(metric)
|
36
36
|
return unless File.size? db
|
37
|
+
|
37
38
|
out_base = File.expand_path(dataset.name, home)
|
38
39
|
ds_matrix = "#{out_base}.txt"
|
39
40
|
ds_matrix_fh = File.open(ds_matrix, 'w')
|
@@ -42,7 +43,7 @@ module MiGA::DistanceRunner::Pipeline
|
|
42
43
|
seq2 = []
|
43
44
|
foreach_in_db(db, metric) do |r|
|
44
45
|
seq2 << r[0]
|
45
|
-
ds_matrix_fh.puts r[0,3].join("\t")
|
46
|
+
ds_matrix_fh.puts r[0, 3].join("\t")
|
46
47
|
end
|
47
48
|
# Find all values among visited datasets in ref_project
|
48
49
|
ref_r = ref_project.result("#{metric}_distances") or return
|
@@ -50,7 +51,8 @@ module MiGA::DistanceRunner::Pipeline
|
|
50
51
|
fh.each_line do |ln|
|
51
52
|
r = ln.chomp.split("\t")
|
52
53
|
next unless seq2.include?(r[1]) or seq2.include?(r[2])
|
53
|
-
|
54
|
+
|
55
|
+
ds_matrix_fh.puts r[1, 3].join("\t")
|
54
56
|
end
|
55
57
|
end
|
56
58
|
ds_matrix_fh.close
|
@@ -61,6 +63,7 @@ module MiGA::DistanceRunner::Pipeline
|
|
61
63
|
|
62
64
|
# Tests taxonomy
|
63
65
|
def tax_test
|
66
|
+
$stderr.puts "Testing taxonomy | opts = #{opts}"
|
64
67
|
# Get taxonomy of closest relative
|
65
68
|
from_ref_project = (project != ref_project)
|
66
69
|
res_dir = from_ref_project ?
|
@@ -72,11 +75,12 @@ module MiGA::DistanceRunner::Pipeline
|
|
72
75
|
dataset.add_result(from_ref_project ? :taxonomy : :distances, true)
|
73
76
|
cr = dataset.closest_relatives(1, from_ref_project)
|
74
77
|
return if cr.nil? or cr.empty?
|
78
|
+
|
75
79
|
tax = ref_project.dataset(cr[0][0]).metadata[:tax] || {}
|
76
80
|
|
77
81
|
# Run the test for each rank
|
78
82
|
tax_test = MiGA::TaxDist.aai_pvalues(cr[0][1], :intax, engine: opts[:aai_p])
|
79
|
-
r = tax_test.map do |k,v|
|
83
|
+
r = tax_test.map do |k, v|
|
80
84
|
sig = ''
|
81
85
|
[0.5, 0.1, 0.05, 0.01].each { |i| sig << '*' if v < i }
|
82
86
|
[MiGA::Taxonomy.LONG_RANKS[k], (tax[k] || '?'), v, sig]
|
@@ -95,11 +99,13 @@ module MiGA::DistanceRunner::Pipeline
|
|
95
99
|
|
96
100
|
# Transfer the taxonomy to the current dataset
|
97
101
|
def transfer_taxonomy(tax)
|
102
|
+
$stderr.puts "Transferring taxonomy"
|
98
103
|
return if tax.nil?
|
104
|
+
|
99
105
|
pval = (project.metadata[:tax_pvalue] || 0.05).to_f
|
100
|
-
tax_a = tax
|
101
|
-
|
102
|
-
|
106
|
+
tax_a = tax
|
107
|
+
.select { |i| i[1] != '?' && i[2] <= pval }
|
108
|
+
.map { |i| i[0, 2].join(':') }
|
103
109
|
dataset.metadata[:tax] = MiGA::Taxonomy.new(tax_a)
|
104
110
|
dataset.save
|
105
111
|
end
|
data/utils/distance/runner.rb
CHANGED
@@ -1,13 +1,10 @@
|
|
1
|
-
|
2
1
|
require_relative 'base.rb'
|
3
2
|
require_relative 'temporal.rb'
|
4
3
|
require_relative 'database.rb'
|
5
4
|
require_relative 'commands.rb'
|
6
5
|
require_relative 'pipeline.rb'
|
7
6
|
|
8
|
-
|
9
7
|
class MiGA::DistanceRunner
|
10
|
-
|
11
8
|
include MiGA::DistanceRunner::Temporal
|
12
9
|
include MiGA::DistanceRunner::Database
|
13
10
|
include MiGA::DistanceRunner::Commands
|
@@ -16,7 +13,7 @@ class MiGA::DistanceRunner
|
|
16
13
|
attr_reader :project, :ref_project, :dataset, :opts, :home
|
17
14
|
attr_reader :tmp, :tmp_dbs, :dbs, :db_counts
|
18
15
|
|
19
|
-
def initialize(project_path, dataset_name, opts_hash={})
|
16
|
+
def initialize(project_path, dataset_name, opts_hash = {})
|
20
17
|
@opts = opts_hash
|
21
18
|
@project = MiGA::Project.load(project_path) or
|
22
19
|
raise "No project at #{project_path}"
|
@@ -30,7 +27,7 @@ class MiGA::DistanceRunner
|
|
30
27
|
@opts[:aai_save_rbm] ||= ENV.fetch('MIGA_AAI_SAVE_RBM') do
|
31
28
|
project.is_clade? ? 'save-rbm' : 'no-save-rbm'
|
32
29
|
end
|
33
|
-
@opts[:thr] ||= ENV.fetch('CORES'){ 2 }.to_i
|
30
|
+
@opts[:thr] ||= ENV.fetch('CORES') { 2 }.to_i
|
34
31
|
if opts[:run_taxonomy] and project.metadata[:ref_project]
|
35
32
|
ref_path = project.metadata[:ref_project]
|
36
33
|
@home = File.expand_path('05.taxonomy', @home)
|
@@ -53,11 +50,14 @@ class MiGA::DistanceRunner
|
|
53
50
|
@opts[:ani_p] ||= 'blast+'
|
54
51
|
@opts[:distances_checkpoint] ||= 10
|
55
52
|
@opts[:distances_checkpoint] = @opts[:distances_checkpoint].to_i
|
53
|
+
$stderr.puts "Options: #{opts}"
|
56
54
|
end
|
57
55
|
|
58
56
|
# Launch the appropriate analysis
|
59
57
|
def go!
|
58
|
+
$stderr.puts "Launching analysis"
|
60
59
|
return if dataset.is_multi?
|
60
|
+
|
61
61
|
Dir.mktmpdir do |tmp_dir|
|
62
62
|
@tmp = tmp_dir
|
63
63
|
create_temporals
|
@@ -67,23 +67,26 @@ class MiGA::DistanceRunner
|
|
67
67
|
|
68
68
|
# Launch analysis for reference datasets
|
69
69
|
def go_ref!
|
70
|
+
$stderr.puts "Launching analysis for reference dataset"
|
70
71
|
# Initialize databases
|
71
72
|
initialize_dbs! true
|
72
73
|
|
73
74
|
# first-come-first-serve traverse
|
74
75
|
ref_project.each_dataset do |ds|
|
75
76
|
next if !ds.is_ref? or ds.is_multi? or ds.result(:essential_genes).nil?
|
77
|
+
|
76
78
|
puts "[ #{Time.now} ] #{ds.name}"
|
77
79
|
ani_after_aai(ds)
|
78
80
|
end
|
79
81
|
|
80
82
|
# Finalize
|
81
|
-
[:haai, :aai, :ani].each{ |m| checkpoint! m if db_counts[m] > 0 }
|
83
|
+
[:haai, :aai, :ani].each { |m| checkpoint! m if db_counts[m] > 0 }
|
82
84
|
end
|
83
85
|
|
84
86
|
##
|
85
87
|
# Launch analysis for query datasets
|
86
88
|
def go_query!
|
89
|
+
$stderr.puts "Launching analysis for query dataset"
|
87
90
|
# Check if project is ready
|
88
91
|
tsk = ref_project.is_clade? ? [:subclades, :ani] : [:clade_finding, :aai]
|
89
92
|
res = ref_project.result(tsk[0])
|
@@ -100,14 +103,15 @@ class MiGA::DistanceRunner
|
|
100
103
|
# Calculate all the AAIs/ANIs against the lowest subclade (if classified)
|
101
104
|
par_dir = File.dirname(File.expand_path(classif, res.dir))
|
102
105
|
par = File.expand_path('miga-project.classif', par_dir)
|
103
|
-
closest = {dataset: nil, ani: 0.0}
|
106
|
+
closest = { dataset: nil, ani: 0.0 }
|
104
107
|
if File.size? par
|
105
108
|
File.open(par, 'r') do |fh|
|
106
109
|
fh.each_line do |ln|
|
107
110
|
r = ln.chomp.split("\t")
|
108
111
|
next unless r[1].to_i == val_cls
|
112
|
+
|
109
113
|
ani = ani_after_aai(ref_project.dataset(r[0]), 80.0)
|
110
|
-
closest = {ds: r[0], ani: ani} unless ani.nil? or ani < closest[:ani]
|
114
|
+
closest = { ds: r[0], ani: ani } unless ani.nil? or ani < closest[:ani]
|
111
115
|
end
|
112
116
|
end
|
113
117
|
end
|
@@ -115,21 +119,23 @@ class MiGA::DistanceRunner
|
|
115
119
|
# Calculate all the AAIs/ANIs against the closest ANI95-clade (if AAI > 80%)
|
116
120
|
cl_path = res.file_path :clades_ani95
|
117
121
|
if !cl_path.nil? and File.size? cl_path and tsk[0] == :clade_finding
|
118
|
-
File.foreach(cl_path)
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
+
File.foreach(cl_path)
|
123
|
+
.map { |i| i.chomp.split(',') }
|
124
|
+
.find(lambda { [] }) { |i| i.include? closest[:ds] }
|
125
|
+
.each { |i| ani_after_aai(ref_project.dataset(i), 80.0) }
|
122
126
|
end
|
123
127
|
|
124
128
|
# Finalize
|
125
|
-
[:haai, :aai, :ani].each{ |m| checkpoint! m if db_counts[m] > 0 }
|
129
|
+
[:haai, :aai, :ani].each { |m| checkpoint! m if db_counts[m] > 0 }
|
126
130
|
build_medoids_tree(tsk[1])
|
127
131
|
transfer_taxonomy(tax_test)
|
128
132
|
end
|
129
133
|
|
130
134
|
# Launch analysis for taxonomy jobs
|
131
135
|
def go_taxonomy!
|
136
|
+
$stderr.puts "Launching taxonomy analysis"
|
132
137
|
return unless project.metadata[:ref_project]
|
138
|
+
|
133
139
|
go_query! # <- yeah, it's actually the same, just different ref_project
|
134
140
|
end
|
135
141
|
end
|
data/utils/distance/temporal.rb
CHANGED
@@ -1,12 +1,14 @@
|
|
1
|
-
|
2
1
|
require 'tmpdir'
|
3
2
|
require 'zlib'
|
4
3
|
|
5
4
|
module MiGA::DistanceRunner::Temporal
|
6
|
-
|
7
5
|
# Copy input files to the (local) temporal folder
|
8
6
|
def create_temporals
|
9
|
-
rf = {
|
7
|
+
rf = {
|
8
|
+
essential_genes: :ess_genes,
|
9
|
+
cds: :proteins,
|
10
|
+
assembly: :largecontigs
|
11
|
+
}
|
10
12
|
rf.each do |res, file|
|
11
13
|
r = dataset.result(res)
|
12
14
|
f = r.nil? ? nil : r.file_path(file)
|
@@ -37,8 +39,9 @@ module MiGA::DistanceRunner::Temporal
|
|
37
39
|
|
38
40
|
# Copies temporal databases back to the MiGA Project
|
39
41
|
def checkpoint!(metric)
|
42
|
+
$stderr.puts "Checkpoint (metric = #{metric})"
|
40
43
|
SQLite3::Database.new(tmp_dbs[metric]) do |conn|
|
41
|
-
conn.execute("select count(*) from #{metric
|
44
|
+
conn.execute("select count(*) from #{metric == :haai ? :aai : metric}")
|
42
45
|
end
|
43
46
|
FileUtils.cp(tmp_dbs[metric], dbs[metric])
|
44
47
|
@db_counts[metric] = 0
|
data/utils/distances.rb
CHANGED
@@ -4,6 +4,6 @@ require_relative 'distance/runner.rb'
|
|
4
4
|
|
5
5
|
dataset = ARGV.shift
|
6
6
|
project = ARGV.shift
|
7
|
-
opts = Hash[
|
7
|
+
opts = Hash[ARGV.map { |i| i.split("=", 2).tap { |j| j[0] = j[0].to_sym } }]
|
8
8
|
runner = MiGA::DistanceRunner.new(dataset, project, opts)
|
9
9
|
runner.go!
|
data/utils/domain-ess-genes.rb
CHANGED
@@ -6,10 +6,10 @@ domain = ARGV.shift
|
|
6
6
|
|
7
7
|
def quality(hsh)
|
8
8
|
q = {}
|
9
|
-
q[:found] = hsh.values.map{ |i| i==0 ? 0 : 1 }.inject(:+)
|
10
|
-
q[:multi] = hsh.values.map{ |i| i==0 ? 0 : i-1 }.inject(:+)
|
11
|
-
q[:cmp] = 100.0*q[:found].to_f/hsh.size
|
12
|
-
q[:cnt] = 100.0*q[:multi].to_f/hsh.size
|
9
|
+
q[:found] = hsh.values.map { |i| i == 0 ? 0 : 1 }.inject(:+)
|
10
|
+
q[:multi] = hsh.values.map { |i| i == 0 ? 0 : i - 1 }.inject(:+)
|
11
|
+
q[:cmp] = 100.0 * q[:found].to_f / hsh.size
|
12
|
+
q[:cnt] = 100.0 * q[:multi].to_f / hsh.size
|
13
13
|
q
|
14
14
|
end
|
15
15
|
|
@@ -39,7 +39,7 @@ end
|
|
39
39
|
# Find expected genes for domain
|
40
40
|
n_dom = Hash[
|
41
41
|
`HMM.essential.rb -L -q '-#{domain}' -c '#{collection}'`
|
42
|
-
|
42
|
+
.chomp.split("\n").map { |i| i.split("\t") }
|
43
43
|
]
|
44
44
|
l_dom = n_dom.keys
|
45
45
|
cnt_dom = {}
|
@@ -54,10 +54,10 @@ File.open(outlog, 'w') do |ofh|
|
|
54
54
|
ofh.puts "! Contamination: #{q[:cnt].round(1)}%."
|
55
55
|
if q[:multi] > 0
|
56
56
|
ofh.puts "! Multiple copies: "
|
57
|
-
cnt_dom.each{ |k,v| ofh.puts "! #{v} #{k}: #{n_dom[k]}." if v>1 }
|
57
|
+
cnt_dom.each { |k, v| ofh.puts "! #{v} #{k}: #{n_dom[k]}." if v > 1 }
|
58
58
|
end
|
59
59
|
if q[:found] < cnt_dom.size
|
60
60
|
ofh.puts "! Missing genes: "
|
61
|
-
cnt_dom.each{ |k,v| ofh.puts "! #{k}: #{n_dom[k]}." if v==0 }
|
61
|
+
cnt_dom.each { |k, v| ofh.puts "! #{k}: #{n_dom[k]}." if v == 0 }
|
62
62
|
end
|
63
63
|
end
|