miga-base 0.7.4.0 → 0.7.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/cli.rb +10 -8
- data/lib/miga/cli/action.rb +2 -3
- data/lib/miga/cli/action/about.rb +5 -6
- data/lib/miga/cli/action/add.rb +18 -12
- data/lib/miga/cli/action/add_result.rb +2 -3
- data/lib/miga/cli/action/archive.rb +1 -2
- data/lib/miga/cli/action/classify_wf.rb +8 -6
- data/lib/miga/cli/action/console.rb +0 -1
- data/lib/miga/cli/action/daemon.rb +7 -7
- data/lib/miga/cli/action/date.rb +0 -1
- data/lib/miga/cli/action/derep_wf.rb +5 -4
- data/lib/miga/cli/action/doctor.rb +28 -20
- data/lib/miga/cli/action/doctor/base.rb +29 -6
- data/lib/miga/cli/action/edit.rb +1 -2
- data/lib/miga/cli/action/files.rb +8 -8
- data/lib/miga/cli/action/find.rb +5 -6
- data/lib/miga/cli/action/generic.rb +7 -7
- data/lib/miga/cli/action/get.rb +20 -17
- data/lib/miga/cli/action/get_db.rb +8 -2
- data/lib/miga/cli/action/index_wf.rb +1 -1
- data/lib/miga/cli/action/init.rb +34 -29
- data/lib/miga/cli/action/init/daemon_helper.rb +65 -43
- data/lib/miga/cli/action/lair.rb +7 -7
- data/lib/miga/cli/action/ln.rb +6 -6
- data/lib/miga/cli/action/ls.rb +1 -2
- data/lib/miga/cli/action/ncbi_get.rb +11 -3
- data/lib/miga/cli/action/new.rb +4 -4
- data/lib/miga/cli/action/next_step.rb +0 -1
- data/lib/miga/cli/action/preproc_wf.rb +3 -3
- data/lib/miga/cli/action/quality_wf.rb +1 -1
- data/lib/miga/cli/action/rm.rb +2 -3
- data/lib/miga/cli/action/run.rb +8 -8
- data/lib/miga/cli/action/stats.rb +3 -3
- data/lib/miga/cli/action/summary.rb +7 -6
- data/lib/miga/cli/action/tax_dist.rb +8 -4
- data/lib/miga/cli/action/tax_index.rb +3 -4
- data/lib/miga/cli/action/tax_set.rb +7 -6
- data/lib/miga/cli/action/tax_test.rb +6 -5
- data/lib/miga/cli/action/wf.rb +21 -19
- data/lib/miga/cli/base.rb +34 -32
- data/lib/miga/cli/objects_helper.rb +24 -17
- data/lib/miga/cli/opt_helper.rb +3 -2
- data/lib/miga/common.rb +2 -5
- data/lib/miga/common/base.rb +15 -16
- data/lib/miga/common/format.rb +8 -5
- data/lib/miga/common/hooks.rb +1 -4
- data/lib/miga/common/path.rb +4 -9
- data/lib/miga/common/with_daemon.rb +5 -2
- data/lib/miga/common/with_daemon_class.rb +1 -1
- data/lib/miga/common/with_result.rb +2 -1
- data/lib/miga/daemon.rb +51 -35
- data/lib/miga/daemon/base.rb +0 -2
- data/lib/miga/dataset.rb +47 -37
- data/lib/miga/dataset/base.rb +52 -37
- data/lib/miga/dataset/hooks.rb +3 -4
- data/lib/miga/dataset/result.rb +17 -1
- data/lib/miga/json.rb +5 -7
- data/lib/miga/lair.rb +4 -0
- data/lib/miga/metadata.rb +4 -3
- data/lib/miga/project.rb +29 -20
- data/lib/miga/project/base.rb +52 -37
- data/lib/miga/project/dataset.rb +27 -13
- data/lib/miga/project/hooks.rb +0 -3
- data/lib/miga/project/result.rb +14 -5
- data/lib/miga/remote_dataset.rb +85 -72
- data/lib/miga/remote_dataset/base.rb +11 -13
- data/lib/miga/remote_dataset/download.rb +33 -12
- data/lib/miga/result.rb +34 -25
- data/lib/miga/result/base.rb +0 -2
- data/lib/miga/result/dates.rb +1 -3
- data/lib/miga/result/source.rb +15 -16
- data/lib/miga/result/stats.rb +36 -25
- data/lib/miga/tax_dist.rb +6 -3
- data/lib/miga/tax_index.rb +17 -17
- data/lib/miga/taxonomy.rb +6 -1
- data/lib/miga/taxonomy/base.rb +19 -15
- data/lib/miga/version.rb +19 -16
- data/test/common_test.rb +3 -11
- data/test/daemon_helper.rb +38 -0
- data/test/daemon_test.rb +73 -101
- data/test/dataset_test.rb +58 -59
- data/test/format_test.rb +3 -11
- data/test/hook_test.rb +50 -55
- data/test/json_test.rb +7 -8
- data/test/lair_test.rb +22 -28
- data/test/metadata_test.rb +6 -14
- data/test/project_test.rb +33 -39
- data/test/remote_dataset_test.rb +20 -28
- data/test/result_stats_test.rb +17 -27
- data/test/result_test.rb +41 -34
- data/test/tax_dist_test.rb +0 -2
- data/test/tax_index_test.rb +4 -10
- data/test/taxonomy_test.rb +7 -9
- data/test/test_helper.rb +42 -1
- data/test/with_daemon_test.rb +14 -22
- data/utils/cleanup-databases.rb +6 -5
- data/utils/distance/base.rb +0 -1
- data/utils/distance/commands.rb +19 -12
- data/utils/distance/database.rb +24 -21
- data/utils/distance/pipeline.rb +12 -9
- data/utils/distance/runner.rb +14 -13
- data/utils/distance/temporal.rb +1 -3
- data/utils/distances.rb +1 -1
- data/utils/domain-ess-genes.rb +7 -7
- data/utils/index_metadata.rb +4 -2
- data/utils/mytaxa_scan.rb +18 -16
- data/utils/representatives.rb +5 -4
- data/utils/requirements.txt +1 -1
- data/utils/subclade/base.rb +0 -1
- data/utils/subclade/pipeline.rb +7 -6
- data/utils/subclade/runner.rb +9 -9
- data/utils/subclade/temporal.rb +0 -2
- data/utils/subclades-compile.rb +39 -37
- data/utils/subclades.rb +1 -1
- metadata +3 -2
data/test/tax_dist_test.rb
CHANGED
@@ -2,7 +2,6 @@ require 'test_helper'
|
|
2
2
|
require 'miga/tax_dist'
|
3
3
|
|
4
4
|
class TaxDistTest < Test::Unit::TestCase
|
5
|
-
|
6
5
|
def test_aai_path
|
7
6
|
assert_path_exist(MiGA::TaxDist.aai_path(:intax))
|
8
7
|
assert_path_exist(MiGA::TaxDist.aai_path(:novel))
|
@@ -54,5 +53,4 @@ class TaxDistTest < Test::Unit::TestCase
|
|
54
53
|
close_intax = MiGA::TaxDist.aai_taxtest(99.0, :intax, engine: :blast)
|
55
54
|
assert_equal(:s, close_intax[:probably][0])
|
56
55
|
end
|
57
|
-
|
58
56
|
end
|
data/test/tax_index_test.rb
CHANGED
@@ -1,7 +1,9 @@
|
|
1
1
|
require 'test_helper'
|
2
|
+
require 'miga'
|
2
3
|
require 'miga/tax_index'
|
3
4
|
|
4
5
|
class TaxIndexTest < Test::Unit::TestCase
|
6
|
+
include TestHelper
|
5
7
|
|
6
8
|
def test_initialization
|
7
9
|
ti = MiGA::TaxIndex.new
|
@@ -9,12 +11,8 @@ class TaxIndexTest < Test::Unit::TestCase
|
|
9
11
|
end
|
10
12
|
|
11
13
|
def test_dataset
|
12
|
-
|
13
|
-
|
14
|
-
FileUtils.touch(File.expand_path('.miga_rc', ENV["MIGA_HOME"]))
|
15
|
-
FileUtils.touch(File.expand_path('.miga_daemon.json', ENV["MIGA_HOME"]))
|
16
|
-
p = MiGA::Project.new(File.expand_path('project1', $tmp))
|
17
|
-
d = p.add_dataset('dataset1')
|
14
|
+
initialize_miga_home
|
15
|
+
d = dataset
|
18
16
|
|
19
17
|
ti = MiGA::TaxIndex.new
|
20
18
|
assert_empty(ti.datasets)
|
@@ -24,9 +22,6 @@ class TaxIndexTest < Test::Unit::TestCase
|
|
24
22
|
ti << d
|
25
23
|
assert_equal(1, ti.datasets.size, 'index should have one dataset')
|
26
24
|
assert_equal(1, ti.root.datasets_count)
|
27
|
-
ensure
|
28
|
-
FileUtils.rm_rf $tmp
|
29
|
-
ENV["MIGA_HOME"] = nil
|
30
25
|
end
|
31
26
|
|
32
27
|
def test_to_json
|
@@ -40,5 +35,4 @@ class TaxIndexTest < Test::Unit::TestCase
|
|
40
35
|
ti = MiGA::TaxIndex.new
|
41
36
|
assert_equal("root:biota: 0\n", ti.to_tab)
|
42
37
|
end
|
43
|
-
|
44
38
|
end
|
data/test/taxonomy_test.rb
CHANGED
@@ -2,7 +2,6 @@ require 'test_helper'
|
|
2
2
|
require 'miga/taxonomy'
|
3
3
|
|
4
4
|
class TaxonomyTest < Test::Unit::TestCase
|
5
|
-
|
6
5
|
def test_ranks
|
7
6
|
assert_respond_to(MiGA::Taxonomy, :KNOWN_RANKS)
|
8
7
|
assert_include(MiGA::Taxonomy.KNOWN_RANKS, :s)
|
@@ -14,7 +13,7 @@ class TaxonomyTest < Test::Unit::TestCase
|
|
14
13
|
def test_json
|
15
14
|
txt = 'k:Fantasia f:Dragonaceae s:Dragonia_azura'
|
16
15
|
js = '{"json_class":"MiGA::Taxonomy","str":"' + txt + '"}'
|
17
|
-
tx = JSON.parse(js, {symbolize_names: false, create_additions: true})
|
16
|
+
tx = JSON.parse(js, { symbolize_names: false, create_additions: true })
|
18
17
|
assert_equal(MiGA::Taxonomy, tx.class)
|
19
18
|
assert_equal('Dragonaceae', tx[:f])
|
20
19
|
assert_equal(js, tx.to_json)
|
@@ -25,7 +24,7 @@ class TaxonomyTest < Test::Unit::TestCase
|
|
25
24
|
tx = MiGA::Taxonomy.new(txt)
|
26
25
|
assert_equal(txt, tx.to_s)
|
27
26
|
assert_equal(
|
28
|
-
[[:k, 'Fantasia'],[:f, 'Dragonaceae'],[:s, 'Dragonia azura']],
|
27
|
+
[[:k, 'Fantasia'], [:f, 'Dragonaceae'], [:s, 'Dragonia azura']],
|
29
28
|
tx.sorted_ranks
|
30
29
|
)
|
31
30
|
assert_equal('Irrealis', tx.namespace)
|
@@ -46,7 +45,7 @@ class TaxonomyTest < Test::Unit::TestCase
|
|
46
45
|
end
|
47
46
|
|
48
47
|
def test_init_methods
|
49
|
-
tx = MiGA::Taxonomy.new({k: 'Mascot', c: 'Cereal', s: 'Melvin'})
|
48
|
+
tx = MiGA::Taxonomy.new({ k: 'Mascot', c: 'Cereal', s: 'Melvin' })
|
50
49
|
assert_equal('k:Mascot c:Cereal s:Melvin', tx.to_s)
|
51
50
|
tx = MiGA::Taxonomy.new('Mascot College Buzz', 'k c s')
|
52
51
|
assert_equal('k:Mascot c:College s:Buzz', tx.to_s)
|
@@ -56,7 +55,7 @@ class TaxonomyTest < Test::Unit::TestCase
|
|
56
55
|
end
|
57
56
|
|
58
57
|
def test_rank_order
|
59
|
-
tx = MiGA::Taxonomy.new({k: 'Mascot', s: 'Melvin', c: 'Cereal'})
|
58
|
+
tx = MiGA::Taxonomy.new({ k: 'Mascot', s: 'Melvin', c: 'Cereal' })
|
60
59
|
assert_equal([:d, nil], tx.highest(true))
|
61
60
|
assert_equal([:k, 'Mascot'], tx.highest)
|
62
61
|
assert_equal([:ds, nil], tx.lowest(true))
|
@@ -65,7 +64,7 @@ class TaxonomyTest < Test::Unit::TestCase
|
|
65
64
|
|
66
65
|
def test_alternative
|
67
66
|
tx = MiGA::Taxonomy.new('ns:a s:Arnie', nil,
|
68
|
-
|
67
|
+
['ns:b s:Bernie', 'ns:c s:Cornie', 's:Darnie'])
|
69
68
|
# Fields
|
70
69
|
assert_equal('ns:a s:Arnie', tx.to_s)
|
71
70
|
assert_equal([[:s, 'Arnie']], tx.sorted_ranks)
|
@@ -77,7 +76,7 @@ class TaxonomyTest < Test::Unit::TestCase
|
|
77
76
|
assert_equal(3, tx.alternative.size)
|
78
77
|
# JSON
|
79
78
|
js = tx.to_json
|
80
|
-
tx_js = JSON.parse(js, {symbolize_names: false, create_additions: true})
|
79
|
+
tx_js = JSON.parse(js, { symbolize_names: false, create_additions: true })
|
81
80
|
assert_equal(tx.to_s, tx_js.to_s)
|
82
81
|
assert_equal(tx.alternative(2).to_s, tx_js.alternative(2).to_s)
|
83
82
|
assert_equal(tx.alternative.size, tx_js.alternative.size)
|
@@ -94,7 +93,7 @@ class TaxonomyTest < Test::Unit::TestCase
|
|
94
93
|
|
95
94
|
def test_reset
|
96
95
|
tx = MiGA::Taxonomy.new('ns:Letters d:Latin s:A', nil,
|
97
|
-
|
96
|
+
['ns:Words d:English s:A', 'ns:Music d:Tone s:A'])
|
98
97
|
# Reset
|
99
98
|
assert_equal(2, tx.alternative.size)
|
100
99
|
assert_equal('Letters', tx.namespace)
|
@@ -112,5 +111,4 @@ class TaxonomyTest < Test::Unit::TestCase
|
|
112
111
|
tx.add_alternative(MiGA::Taxonomy.new('ns:Letters d:Unicode s:A'))
|
113
112
|
assert_equal('ns:Letters d:Unicode s:A', tx.to_s)
|
114
113
|
end
|
115
|
-
|
116
114
|
end
|
data/test/test_helper.rb
CHANGED
@@ -11,7 +11,6 @@ require 'stringio'
|
|
11
11
|
# Kernel extensions tp capture +$stdout+ and +$stderr+ based on
|
12
12
|
# http://thinkingdigitally.com/archive/capturing-output-from-puts-in-ruby/
|
13
13
|
module Kernel
|
14
|
-
|
15
14
|
def capture_stdout
|
16
15
|
out = StringIO.new
|
17
16
|
$stdout = out
|
@@ -29,5 +28,47 @@ module Kernel
|
|
29
28
|
ensure
|
30
29
|
$stderr = STDERR
|
31
30
|
end
|
31
|
+
end
|
32
32
|
|
33
|
+
module TestHelper
|
34
|
+
def teardown
|
35
|
+
@tmpdir ||= nil
|
36
|
+
FileUtils.rm_rf tmpdir unless @tmpdir.nil?
|
37
|
+
ENV['MIGA_HOME'] = nil
|
38
|
+
end
|
39
|
+
|
40
|
+
def declare_remote_access
|
41
|
+
omit_if(ENV['REMOTE_TESTS'].nil?, 'Remote access is error-prone')
|
42
|
+
end
|
43
|
+
|
44
|
+
def declare_forks
|
45
|
+
omit_if(!ENV['JRUBY_TESTS'].nil?, 'JRuby doesn\'t implement fork')
|
46
|
+
end
|
47
|
+
|
48
|
+
def tmpdir
|
49
|
+
@tmpdir ||= Dir.mktmpdir
|
50
|
+
end
|
51
|
+
|
52
|
+
def tmpfile(name)
|
53
|
+
File.join(tmpdir, name)
|
54
|
+
end
|
55
|
+
|
56
|
+
def initialize_miga_home(daemon = '{}')
|
57
|
+
ENV['MIGA_HOME'] = tmpdir
|
58
|
+
FileUtils.touch(File.join(ENV['MIGA_HOME'], '.miga_rc'))
|
59
|
+
File.open(File.join(ENV['MIGA_HOME'], '.miga_daemon.json'), 'w') do |fh|
|
60
|
+
fh.puts daemon
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def project(i = 0)
|
65
|
+
@project ||= {}
|
66
|
+
i = "project#{i}" unless i.is_a? String
|
67
|
+
@project[i] ||= MiGA::Project.new(tmpfile(i))
|
68
|
+
end
|
69
|
+
|
70
|
+
def dataset(project_i = 0, n = 0)
|
71
|
+
n = "dataset#{n}" unless n.is_a? String
|
72
|
+
project(project_i).dataset(n) || project(project_i).add_dataset(n)
|
73
|
+
end
|
33
74
|
end
|
data/test/with_daemon_test.rb
CHANGED
@@ -2,15 +2,7 @@ require 'test_helper'
|
|
2
2
|
require 'miga/common/with_daemon'
|
3
3
|
|
4
4
|
class WithDaemonTest < Test::Unit::TestCase
|
5
|
-
|
6
|
-
def setup
|
7
|
-
$jruby_tests = !ENV['JRUBY_TESTS'].nil?
|
8
|
-
$tmp = Dir.mktmpdir
|
9
|
-
end
|
10
|
-
|
11
|
-
def teardown
|
12
|
-
FileUtils.rm_rf $tmp
|
13
|
-
end
|
5
|
+
include TestHelper
|
14
6
|
|
15
7
|
class TestWithDaemon < MiGA::MiGA
|
16
8
|
include MiGA::Common::WithDaemon
|
@@ -40,7 +32,7 @@ class WithDaemonTest < Test::Unit::TestCase
|
|
40
32
|
puts(*o)
|
41
33
|
end
|
42
34
|
end
|
43
|
-
|
35
|
+
|
44
36
|
class TestWithDaemon2 < TestWithDaemon
|
45
37
|
def daemon_loop
|
46
38
|
puts 'I am 2.0!'
|
@@ -54,20 +46,20 @@ class WithDaemonTest < Test::Unit::TestCase
|
|
54
46
|
end
|
55
47
|
|
56
48
|
def test_with_daemon
|
57
|
-
d = TestWithDaemon.new(
|
49
|
+
d = TestWithDaemon.new(tmpdir)
|
58
50
|
assert_respond_to(d, :pid_file)
|
59
51
|
assert_respond_to(d.class, :daemon_home)
|
60
52
|
assert_nil(d.loop_i)
|
61
53
|
end
|
62
54
|
|
63
55
|
def test_daemon_run
|
64
|
-
d = TestWithDaemon2.new(
|
56
|
+
d = TestWithDaemon2.new(tmpdir)
|
65
57
|
capture_stdout { d.run }
|
66
58
|
assert_path_not_exist(d.pid_file)
|
67
59
|
end
|
68
60
|
|
69
61
|
def test_daemmon_status
|
70
|
-
d = TestWithDaemon.new(
|
62
|
+
d = TestWithDaemon.new(tmpdir)
|
71
63
|
out = capture_stdout { d.status }.string
|
72
64
|
assert_match(/Not running/, out)
|
73
65
|
|
@@ -84,11 +76,11 @@ class WithDaemonTest < Test::Unit::TestCase
|
|
84
76
|
end
|
85
77
|
|
86
78
|
def test_daemon_operations
|
87
|
-
d = TestWithDaemon.new(
|
79
|
+
d = TestWithDaemon.new(tmpdir)
|
88
80
|
FileUtils.touch(d.output_file)
|
89
81
|
assert_not_predicate(d, :active?)
|
90
82
|
|
91
|
-
|
83
|
+
declare_forks
|
92
84
|
capture_stdout do
|
93
85
|
pid = d.start
|
94
86
|
assert_gt(pid, 0)
|
@@ -112,7 +104,7 @@ class WithDaemonTest < Test::Unit::TestCase
|
|
112
104
|
end
|
113
105
|
|
114
106
|
def test_termination_file
|
115
|
-
d = TestWithDaemon2.new(
|
107
|
+
d = TestWithDaemon2.new(tmpdir)
|
116
108
|
assert { !d.termination_file?(nil) }
|
117
109
|
FileUtils.touch(d.terminate_file)
|
118
110
|
err = capture_stdout do
|
@@ -124,19 +116,19 @@ class WithDaemonTest < Test::Unit::TestCase
|
|
124
116
|
end
|
125
117
|
|
126
118
|
def test_process_alive
|
127
|
-
d = TestWithDaemon2.new(
|
119
|
+
d = TestWithDaemon2.new(tmpdir)
|
128
120
|
assert { d.process_alive?(Process.pid) }
|
129
121
|
assert { !d.process_alive?(1e9) }
|
130
122
|
end
|
131
123
|
|
132
124
|
def test_declare_alive_loop
|
133
|
-
d = TestWithDaemon.new(
|
125
|
+
d = TestWithDaemon.new(tmpfile('nope'))
|
134
126
|
assert_equal(:no_home, d.declare_alive_loop)
|
135
127
|
|
136
|
-
d = TestWithDaemon.new(
|
128
|
+
d = TestWithDaemon.new(tmpdir)
|
137
129
|
assert_equal(:no_process_alive, d.declare_alive_loop(1e9))
|
138
130
|
|
139
|
-
|
131
|
+
declare_forks
|
140
132
|
FileUtils.touch(d.terminate_file)
|
141
133
|
child = fork { sleep(3) }
|
142
134
|
capture_stdout do
|
@@ -145,12 +137,12 @@ class WithDaemonTest < Test::Unit::TestCase
|
|
145
137
|
end
|
146
138
|
|
147
139
|
def test_write_alive_file
|
148
|
-
d = TestWithDaemon.new(
|
140
|
+
d = TestWithDaemon.new(tmpfile('nope'))
|
149
141
|
assert_not_predicate(d, :active?)
|
150
142
|
assert_raise { d.write_alive_file }
|
151
143
|
assert_not_predicate(d, :active?)
|
152
144
|
|
153
|
-
d = TestWithDaemon.new(
|
145
|
+
d = TestWithDaemon.new(tmpdir)
|
154
146
|
assert_not_predicate(d, :active?)
|
155
147
|
d.write_alive_file
|
156
148
|
assert_predicate(d, :active?)
|
data/utils/cleanup-databases.rb
CHANGED
@@ -10,23 +10,24 @@ p = MiGA::Project.load(ARGV[0])
|
|
10
10
|
ds_names = p.dataset_names
|
11
11
|
thr = ARGV[1].to_i
|
12
12
|
|
13
|
-
pc = [0] + (1
|
14
|
-
$stderr.puts (('.'*9 + '|')*10) + ' 100%'
|
13
|
+
pc = [0] + (1..100).map { |i| ds_names.size * i / 100 }
|
14
|
+
$stderr.puts (('.' * 9 + '|') * 10) + ' 100%'
|
15
15
|
|
16
|
-
(0
|
16
|
+
(0..thr - 1).each do |t|
|
17
17
|
fork do
|
18
18
|
ds_names.each_with_index do |i, idx|
|
19
|
-
while t == 0 and idx+1 > pc.first
|
19
|
+
while t == 0 and idx + 1 > pc.first
|
20
20
|
$stderr.print '#'
|
21
21
|
pc.shift
|
22
22
|
end
|
23
23
|
next unless (idx % thr) == t
|
24
|
+
|
24
25
|
d = p.dataset(i)
|
25
26
|
next unless d.is_ref? and d.is_active?
|
27
|
+
|
26
28
|
d.cleanup_distances!
|
27
29
|
end
|
28
30
|
end
|
29
31
|
end
|
30
32
|
Process.waitall
|
31
33
|
$stderr.puts ' Done'
|
32
|
-
|
data/utils/distance/base.rb
CHANGED
data/utils/distance/commands.rb
CHANGED
@@ -1,12 +1,13 @@
|
|
1
|
-
|
2
1
|
module MiGA::DistanceRunner::Commands
|
3
2
|
# Estimates or calculates AAI against +target+
|
4
3
|
def aai(target)
|
5
4
|
# Check if the request makes sense
|
6
5
|
return nil if target.nil? || target.result(:essential_genes).nil?
|
6
|
+
|
7
7
|
# Check if it's been calculated
|
8
8
|
y = stored_value(target, :aai)
|
9
9
|
return y unless y.nil? || y.zero?
|
10
|
+
|
10
11
|
# Try hAAI (except in clade projects)
|
11
12
|
unless @ref_project.is_clade?
|
12
13
|
y = haai(target)
|
@@ -14,24 +15,27 @@ module MiGA::DistanceRunner::Commands
|
|
14
15
|
end
|
15
16
|
# Full AAI
|
16
17
|
aai_cmd(
|
17
|
-
|
18
|
-
|
18
|
+
tmp_file('proteins.fa'), target.result(:cds).file_path(:proteins),
|
19
|
+
dataset.name, target.name, tmp_dbs[:aai]
|
20
|
+
).tap { checkpoint :aai }
|
19
21
|
end
|
20
22
|
|
21
23
|
##
|
22
24
|
# Estimates AAI against +target+ using hAAI
|
23
25
|
def haai(target)
|
24
26
|
return nil if opts[:haai_p] == 'no'
|
27
|
+
|
25
28
|
haai = aai_cmd(tmp_file('ess_genes.fa'),
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
+
target.result(:essential_genes).file_path(:ess_genes),
|
30
|
+
dataset.name, target.name, tmp_dbs[:haai],
|
31
|
+
aai_save_rbm: 'no-save-rbm', aai_p: opts[:haai_p])
|
29
32
|
checkpoint :haai
|
30
33
|
return nil if haai.nil? || haai.zero? || haai > 90.0
|
31
|
-
|
34
|
+
|
35
|
+
aai = 100.0 - Math.exp(2.435076 + 0.4275193 * Math.log(100.0 - haai))
|
32
36
|
SQLite3::Database.new(tmp_dbs[:aai]) do |conn|
|
33
37
|
conn.execute 'insert into aai values(?, ?, ?, 0, 0, 0)',
|
34
|
-
|
38
|
+
[dataset.name, target.name, aai]
|
35
39
|
end
|
36
40
|
checkpoint :aai
|
37
41
|
aai
|
@@ -44,13 +48,16 @@ module MiGA::DistanceRunner::Commands
|
|
44
48
|
t = tmp_file('largecontigs.fa')
|
45
49
|
r = target.result(:assembly)
|
46
50
|
return nil if r.nil? || !File.size?(t)
|
51
|
+
|
47
52
|
# Check if it's been calculated
|
48
53
|
y = stored_value(target, :ani)
|
49
54
|
return y unless y.nil? || y.zero?
|
55
|
+
|
50
56
|
# Run it
|
51
57
|
ani_cmd(
|
52
|
-
|
53
|
-
|
58
|
+
t, r.file_path(:largecontigs),
|
59
|
+
dataset.name, target.name, tmp_dbs[:ani]
|
60
|
+
).tap { checkpoint :ani }
|
54
61
|
end
|
55
62
|
|
56
63
|
##
|
@@ -74,7 +81,7 @@ module MiGA::DistanceRunner::Commands
|
|
74
81
|
|
75
82
|
##
|
76
83
|
# Execute an ANI command
|
77
|
-
def ani_cmd(f1, f2, n1, n2, db, o={})
|
84
|
+
def ani_cmd(f1, f2, n1, n2, db, o = {})
|
78
85
|
o = opts.merge(o)
|
79
86
|
v = nil
|
80
87
|
if o[:ani_p] == 'fastani'
|
@@ -83,7 +90,7 @@ module MiGA::DistanceRunner::Commands
|
|
83
90
|
unless out.empty?
|
84
91
|
SQLite3::Database.new(db) do |conn|
|
85
92
|
conn.execute 'insert into ani values(?, ?, ?, 0, ?, ?)',
|
86
|
-
|
93
|
+
[n1, n2, out[2], out[3], out[4]]
|
87
94
|
end
|
88
95
|
end
|
89
96
|
v = out[2]
|
data/utils/distance/database.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
|
2
1
|
require 'sqlite3'
|
3
2
|
|
4
3
|
module MiGA::DistanceRunner::Database
|
@@ -9,7 +8,7 @@ module MiGA::DistanceRunner::Database
|
|
9
8
|
@dbs = {}
|
10
9
|
@tmp_dbs = {}
|
11
10
|
@db_counts = {}
|
12
|
-
{haai: :aai, aai: :aai, ani: :ani}.each do |m, t|
|
11
|
+
{ haai: :aai, aai: :aai, ani: :ani }.each do |m, t|
|
13
12
|
@db_counts[m] = 0
|
14
13
|
@dbs[m] = for_ref ? ref_db(m) : query_db(m)
|
15
14
|
# Remove if corrupt
|
@@ -25,9 +24,9 @@ module MiGA::DistanceRunner::Database
|
|
25
24
|
# Initialize if it doesn't exist
|
26
25
|
SQLite3::Database.new(dbs[m]) do |conn|
|
27
26
|
conn.execute "create table if not exists #{t}(" +
|
28
|
-
|
29
|
-
|
30
|
-
|
27
|
+
"seq1 varchar(256), seq2 varchar(256), " +
|
28
|
+
"#{t} float, sd float, n int, omega int" +
|
29
|
+
")"
|
31
30
|
end unless File.size? dbs[m]
|
32
31
|
# Copy over to (local) temporals
|
33
32
|
@tmp_dbs[m] = tmp_file("#{m}.db")
|
@@ -38,16 +37,17 @@ module MiGA::DistanceRunner::Database
|
|
38
37
|
##
|
39
38
|
# Path to the database +metric+ for +dataset_name+ in +project+
|
40
39
|
# (assumes that +dataset_name+ is a reference dataset)
|
41
|
-
def ref_db(metric, dataset_name=nil)
|
40
|
+
def ref_db(metric, dataset_name = nil)
|
42
41
|
dataset_name ||= dataset.name
|
43
|
-
b =
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
42
|
+
b =
|
43
|
+
case metric
|
44
|
+
when :haai
|
45
|
+
"01.haai/#{dataset_name}.db"
|
46
|
+
when :aai
|
47
|
+
"02.aai/#{dataset_name}.db"
|
48
|
+
when :ani
|
49
|
+
"03.ani/#{dataset_name}.db"
|
50
|
+
end
|
51
51
|
File.expand_path(b, home)
|
52
52
|
end
|
53
53
|
|
@@ -63,13 +63,14 @@ module MiGA::DistanceRunner::Database
|
|
63
63
|
def stored_value(target, metric)
|
64
64
|
# Check if self.dataset -> target is done (previous run)
|
65
65
|
y = value_from_db(dataset.name, target.name, tmp_dbs[metric], metric)
|
66
|
-
return y unless y.nil?
|
66
|
+
return y unless y.nil? || y.zero?
|
67
67
|
|
68
68
|
# Check if self.dataset <- target is done (another thread)
|
69
|
-
if dataset.is_ref?
|
69
|
+
if dataset.is_ref? && project.path == ref_project.path
|
70
70
|
y = data_from_db(
|
71
|
-
target.name, dataset.name, ref_db(metric, target.name), metric
|
72
|
-
|
71
|
+
target.name, dataset.name, ref_db(metric, target.name), metric
|
72
|
+
)
|
73
|
+
unless y.nil? || y.first.nil? || y.first.zero?
|
73
74
|
# Store a copy
|
74
75
|
data_to_db(dataset.name, target.name, tmp_dbs[metric], metric, y)
|
75
76
|
return y.first
|
@@ -94,7 +95,8 @@ module MiGA::DistanceRunner::Database
|
|
94
95
|
SQLite3::Database.new(db) do |conn|
|
95
96
|
y = conn.execute(
|
96
97
|
"select #{metric}, sd, n, omega from #{metric} where seq1=? and seq2=?",
|
97
|
-
[n1, n2]
|
98
|
+
[n1, n2]
|
99
|
+
).first
|
98
100
|
end if File.size? db
|
99
101
|
y
|
100
102
|
end
|
@@ -105,7 +107,8 @@ module MiGA::DistanceRunner::Database
|
|
105
107
|
SQLite3::Database.new(db) do |conn|
|
106
108
|
conn.execute(
|
107
109
|
"insert into #{metric} (seq1, seq2, #{metric}, sd, n, omega) " +
|
108
|
-
"values (?, ?, ?, ?, ?, ?)", [n1, n2] + data
|
110
|
+
"values (?, ?, ?, ?, ?, ?)", [n1, n2] + data
|
111
|
+
)
|
109
112
|
end
|
110
113
|
checkpoint metric
|
111
114
|
end
|
@@ -114,7 +117,7 @@ module MiGA::DistanceRunner::Database
|
|
114
117
|
# Iterates for each entry in +db+
|
115
118
|
def foreach_in_db(db, metric, &blk)
|
116
119
|
SQLite3::Database.new(db) do |conn|
|
117
|
-
conn.execute("select * from #{metric}").each{ |r| blk[r] }
|
120
|
+
conn.execute("select * from #{metric}").each { |r| blk[r] }
|
118
121
|
end
|
119
122
|
end
|
120
123
|
end
|