miga-base 0.7.5.0 → 0.7.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/lib/miga/cli/action/classify_wf.rb +3 -1
- data/lib/miga/cli/action/edit.rb +9 -6
- data/lib/miga/cli/action/init.rb +21 -14
- data/lib/miga/cli/action/quality_wf.rb +4 -1
- data/lib/miga/cli/action/stats.rb +6 -2
- data/lib/miga/cli/action/summary.rb +6 -1
- data/lib/miga/cli/action/wf.rb +14 -3
- data/lib/miga/cli/objects_helper.rb +3 -1
- data/lib/miga/common/format.rb +4 -2
- data/lib/miga/daemon.rb +5 -3
- data/lib/miga/dataset.rb +5 -1
- data/lib/miga/dataset/base.rb +1 -1
- data/lib/miga/dataset/hooks.rb +4 -4
- data/lib/miga/dataset/result.rb +1 -1
- data/lib/miga/project/hooks.rb +4 -3
- data/lib/miga/remote_dataset/download.rb +3 -1
- data/lib/miga/result.rb +18 -30
- data/lib/miga/result/stats.rb +30 -17
- data/lib/miga/version.rb +2 -2
- data/scripts/distances.bash +6 -1
- data/scripts/project_stats.bash +3 -0
- data/scripts/stats.bash +1 -1
- data/test/daemon_test.rb +1 -1
- data/test/dataset_test.rb +8 -1
- data/test/remote_dataset_test.rb +7 -5
- data/utils/adapters.fa +13 -0
- data/utils/distance/pipeline.rb +11 -1
- data/utils/distance/runner.rb +8 -5
- metadata +8 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 30d61eb5847a28c4d83a43e6e47ff0738bb819218e677b9aa43de158b441e0ae
|
4
|
+
data.tar.gz: 46338ae15353b71fb6e7eff390c8bb976d1c11fc296bb83f33aab8ffba1a3fa8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 488e7888039bb9e08e7c257fdd7cb0cf34340766f73b9b46c28b332072f5f207f5dcfb0df08c98b27f5640158d3a14ae6d0f5ab19d78f3e9e1ef44e381d34e74
|
7
|
+
data.tar.gz: 992cfc225eeb2f4b8017260e66c67bc9df39a79757ecdb65e01766079e103385259c1eeb447fdc5e6b769990f96256d384dc4adddf184693a3bc94b8323b9a5a
|
data/README.md
CHANGED
@@ -41,6 +41,7 @@ Developed and maintained by [Luis M. Rodriguez-R][lrr]. MiGA is the result of a
|
|
41
41
|
collaboration between [Kostas Lab][kostas] at the Georgia Institute of
|
42
42
|
Technology and [RDP][rdp] at Michigan State University.
|
43
43
|
|
44
|
+
See also the [complete list of contributors](manual/part1/contributors.md).
|
44
45
|
|
45
46
|
# License
|
46
47
|
|
@@ -42,7 +42,9 @@ class MiGA::Cli::Action::ClassifyWf < MiGA::Cli::Action
|
|
42
42
|
'--no-summaries',
|
43
43
|
'Do not generate intermediate step summaries'
|
44
44
|
) { |v| cli[:summaries] = v }
|
45
|
-
opts_for_wf(
|
45
|
+
opts_for_wf(
|
46
|
+
opt, 'Input genome assemblies (nucleotides, FastA)', qual: false
|
47
|
+
)
|
46
48
|
end
|
47
49
|
end
|
48
50
|
|
data/lib/miga/cli/action/edit.rb
CHANGED
@@ -17,18 +17,21 @@ class MiGA::Cli::Action::Edit < MiGA::Cli::Action
|
|
17
17
|
'Activate dataset; requires -D'
|
18
18
|
) { |v| cli[:activate] = v }
|
19
19
|
opt.on(
|
20
|
-
'--inactivate',
|
21
|
-
'Inactivate dataset; requires -D'
|
22
|
-
|
20
|
+
'--inactivate [reason]',
|
21
|
+
'Inactivate dataset; requires -D',
|
22
|
+
'The argument is optional: reason to inactivate dataset'
|
23
|
+
) { |v| cli[:activate] = false ; cli[:reason] = v }
|
23
24
|
end
|
24
25
|
end
|
25
26
|
|
26
27
|
def perform
|
27
28
|
obj = cli.load_project_or_dataset
|
28
29
|
unless cli[:activate].nil?
|
29
|
-
cli.ensure_par(
|
30
|
-
|
31
|
-
|
30
|
+
cli.ensure_par(
|
31
|
+
{ dataset: '-D' },
|
32
|
+
'%<name>s is mandatory with --[in-]activate: please provide %<flag>s'
|
33
|
+
)
|
34
|
+
cli[:activate] ? obj.activate! : obj.inactivate!(cli[:reason])
|
32
35
|
end
|
33
36
|
cli.add_metadata(obj)
|
34
37
|
obj.save
|
data/lib/miga/cli/action/init.rb
CHANGED
@@ -66,8 +66,10 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
|
|
66
66
|
end
|
67
67
|
|
68
68
|
def run_r_cmd(cli, paths, cmd)
|
69
|
-
run_cmd(
|
70
|
-
|
69
|
+
run_cmd(
|
70
|
+
cli,
|
71
|
+
"echo #{cmd.shellescape} | #{paths['R'].shellescape} --vanilla -q 2>&1"
|
72
|
+
)
|
71
73
|
end
|
72
74
|
|
73
75
|
def test_r_package(cli, paths, pkg)
|
@@ -81,16 +83,21 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
|
|
81
83
|
end
|
82
84
|
|
83
85
|
def test_ruby_gem(cli, paths, pkg)
|
84
|
-
run_cmd(
|
85
|
-
|
86
|
+
run_cmd(
|
87
|
+
cli,
|
88
|
+
"#{paths['ruby'].shellescape} -r #{pkg.shellescape} -e '' 2>/dev/null"
|
89
|
+
)
|
86
90
|
$?.success?
|
87
91
|
end
|
88
92
|
|
89
93
|
def install_ruby_gem(cli, paths, pkg)
|
90
94
|
gem_cmd = "Gem::GemRunner.new.run %w(install --user #{pkg})"
|
91
|
-
run_cmd(
|
95
|
+
run_cmd(
|
96
|
+
cli,
|
97
|
+
"#{paths['ruby'].shellescape} \
|
92
98
|
-r rubygems -r rubygems/gem_runner \
|
93
|
-
-e #{gem_cmd.shellescape} 2>&1"
|
99
|
+
-e #{gem_cmd.shellescape} 2>&1"
|
100
|
+
)
|
94
101
|
end
|
95
102
|
|
96
103
|
def list_requirements
|
@@ -99,7 +106,7 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
|
|
99
106
|
'no', %w(yes no)
|
100
107
|
) == 'yes'
|
101
108
|
cli.puts ''
|
102
|
-
req_path = File.
|
109
|
+
req_path = File.join(MiGA.root_path, 'utils', 'requirements.txt')
|
103
110
|
File.open(req_path, 'r') do |fh|
|
104
111
|
fh.each_line { |ln| cli.puts ln }
|
105
112
|
end
|
@@ -205,18 +212,18 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
|
|
205
212
|
def check_additional_files(paths)
|
206
213
|
if cli[:mytaxa]
|
207
214
|
cli.puts 'Looking for MyTaxa databases:'
|
208
|
-
mt = File.dirname paths[
|
215
|
+
mt = File.dirname paths['MyTaxa']
|
209
216
|
cli.print 'Looking for scores... '
|
210
217
|
unless Dir.exist?(File.expand_path('db', mt))
|
211
|
-
cli.puts "no
|
218
|
+
cli.puts "no\nExecute 'python2 #{mt}/utils/download_db.py'"
|
212
219
|
exit(1)
|
213
220
|
end
|
214
|
-
cli.puts 'yes
|
221
|
+
cli.puts 'yes'
|
215
222
|
cli.print 'Looking for diamond db... '
|
216
223
|
unless File.exist?(File.expand_path('AllGenomes.faa.dmnd', mt))
|
217
|
-
cli.puts "no
|
224
|
+
cli.puts "no\nDownload " \
|
218
225
|
"'http://enve-omics.ce.gatech.edu/data/public_mytaxa/" \
|
219
|
-
"AllGenomes.faa.dmnd' into #{mt}
|
226
|
+
"AllGenomes.faa.dmnd' into #{mt}"
|
220
227
|
exit(1)
|
221
228
|
end
|
222
229
|
cli.puts ''
|
@@ -228,7 +235,7 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
|
|
228
235
|
%w(ape cluster vegan).each do |pkg|
|
229
236
|
cli.print "Testing #{pkg}... "
|
230
237
|
if test_r_package(cli, paths, pkg)
|
231
|
-
cli.puts 'yes
|
238
|
+
cli.puts 'yes'
|
232
239
|
else
|
233
240
|
cli.puts 'no, installing'
|
234
241
|
cli.print '' + install_r_package(cli, paths, pkg)
|
@@ -245,7 +252,7 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
|
|
245
252
|
%w(sqlite3 daemons json).each do |pkg|
|
246
253
|
cli.print "Testing #{pkg}... "
|
247
254
|
if test_ruby_gem(cli, paths, pkg)
|
248
|
-
cli.puts 'yes
|
255
|
+
cli.puts 'yes'
|
249
256
|
else
|
250
257
|
cli.puts 'no, installing'
|
251
258
|
# This hackey mess is meant to ensure the test and installation are done
|
@@ -15,7 +15,10 @@ class MiGA::Cli::Action::QualityWf < MiGA::Cli::Action
|
|
15
15
|
'-m', '--mytaxa-scan',
|
16
16
|
'Perform MyTaxa scan analysis'
|
17
17
|
) { |v| cli[:mytaxa] = v }
|
18
|
-
opts_for_wf(
|
18
|
+
opts_for_wf(
|
19
|
+
opt, 'Input genome assemblies (nucleotides, FastA)',
|
20
|
+
qual: false
|
21
|
+
)
|
19
22
|
end
|
20
23
|
end
|
21
24
|
|
@@ -14,12 +14,16 @@ class MiGA::Cli::Action::Stats < MiGA::Cli::Action
|
|
14
14
|
) { |v| cli[:key] = v }
|
15
15
|
opt.on(
|
16
16
|
'--compute-and-save',
|
17
|
-
'Compute and
|
17
|
+
'Compute and save the statistics'
|
18
18
|
) { |v| cli[:compute] = v }
|
19
19
|
opt.on(
|
20
20
|
'--try-load',
|
21
21
|
'Check if stat exists instead of computing on --compute-and-save'
|
22
22
|
) { |v| cli[:try_load] = v }
|
23
|
+
opt.on(
|
24
|
+
'--ignore-empty',
|
25
|
+
'If the result does not exist, exit without throwing exceptions'
|
26
|
+
) { |v| cli[:ignore_result_empty] = v }
|
23
27
|
end
|
24
28
|
end
|
25
29
|
|
@@ -27,7 +31,7 @@ class MiGA::Cli::Action::Stats < MiGA::Cli::Action
|
|
27
31
|
if cli[:try_load] && !r[:stats].nil? && !r[:stats].empty?
|
28
32
|
cli[:compute] = false
|
29
33
|
end
|
30
|
-
r = cli.load_result
|
34
|
+
r = cli.load_result or return
|
31
35
|
if cli[:compute]
|
32
36
|
cli.say 'Computing statistics'
|
33
37
|
r.compute_stats
|
@@ -26,6 +26,10 @@ class MiGA::Cli::Action::Summary < MiGA::Cli::Action
|
|
26
26
|
'--with-units',
|
27
27
|
'Include units in each cell'
|
28
28
|
) { |v| cli[:units] = v }
|
29
|
+
opt.on(
|
30
|
+
'--compute-and-save',
|
31
|
+
'Compute and save the statistics if not yet available'
|
32
|
+
) { |v| cli[:compute] = v }
|
29
33
|
end
|
30
34
|
end
|
31
35
|
|
@@ -34,7 +38,8 @@ class MiGA::Cli::Action::Summary < MiGA::Cli::Action
|
|
34
38
|
ds = cli.load_and_filter_datasets
|
35
39
|
cli.say 'Loading results'
|
36
40
|
stats = ds.map do |d|
|
37
|
-
r = d.
|
41
|
+
r = d.result(cli[:result])
|
42
|
+
r.compute_stats if cli[:compute] && !r.nil? && r[:stats].empty?
|
38
43
|
s = r.nil? ? {} : r[:stats]
|
39
44
|
s.tap { |i| i[:dataset] = d.name }
|
40
45
|
end
|
data/lib/miga/cli/action/wf.rb
CHANGED
@@ -15,7 +15,7 @@ module MiGA::Cli::Action::Wf
|
|
15
15
|
|
16
16
|
def opts_for_wf(opt, files_desc, params = {})
|
17
17
|
{
|
18
|
-
multi: false, cleanup: true, project_type: false, ncbi: true
|
18
|
+
multi: false, cleanup: true, project_type: false, ncbi: true, qual: true
|
19
19
|
}.each { |k, v| params[k] = v if params[k].nil? }
|
20
20
|
opt.on(
|
21
21
|
'-o', '--out_dir PATH',
|
@@ -40,6 +40,13 @@ module MiGA::Cli::Action::Wf
|
|
40
40
|
'Only download complete genomes, not drafts'
|
41
41
|
) { |v| cli[:ncbi_draft] = v }
|
42
42
|
end
|
43
|
+
if params[:qual]
|
44
|
+
opt.on(
|
45
|
+
'--min-qual FLOAT', Float,
|
46
|
+
'Minimum genome quality to include in analysis',
|
47
|
+
'By default: 50.0'
|
48
|
+
) { |v| cli[:min_qual] = v }
|
49
|
+
end
|
43
50
|
if params[:cleanup]
|
44
51
|
opt.on(
|
45
52
|
'-c', '--clean',
|
@@ -89,6 +96,10 @@ module MiGA::Cli::Action::Wf
|
|
89
96
|
end
|
90
97
|
|
91
98
|
def opts_for_wf_distances(opt)
|
99
|
+
opt.on('--sensitive', 'Alias to: --aai-p blast+ --ani-p blast+') do
|
100
|
+
cli[:aai_p] = 'blast+'
|
101
|
+
cli[:ani_p] = 'blast+'
|
102
|
+
end
|
92
103
|
opt.on('--fast', 'Alias to: --aai-p diamond --ani-p fastani') do
|
93
104
|
cli[:aai_p] = 'diamond'
|
94
105
|
cli[:ani_p] = 'fastani'
|
@@ -121,7 +132,7 @@ module MiGA::Cli::Action::Wf
|
|
121
132
|
]) unless MiGA::Project.exist? cli[:outdir]
|
122
133
|
# Define project metadata
|
123
134
|
p = cli.load_project(:outdir, '-o')
|
124
|
-
[
|
135
|
+
%i[haai_p aai_p ani_p ess_coll min_qual].each { |i| p_metadata[i] = cli[i] }
|
125
136
|
p_metadata[:type] = cli[:project_type]
|
126
137
|
transfer_metadata(p, p_metadata)
|
127
138
|
# Download datasets
|
@@ -155,7 +166,7 @@ module MiGA::Cli::Action::Wf
|
|
155
166
|
'-P', cli[:outdir],
|
156
167
|
'-r', r,
|
157
168
|
'-o', File.expand_path("#{r}.tsv", cli[:outdir]),
|
158
|
-
'--tab'
|
169
|
+
'--tab', '--ref', '--active'
|
159
170
|
])
|
160
171
|
end
|
161
172
|
end
|
@@ -80,7 +80,9 @@ module MiGA::Cli::ObjectsHelper
|
|
80
80
|
raise "Unsupported result for #{klass}: #{self[:result]}"
|
81
81
|
end
|
82
82
|
r = obj.add_result(self[:result], false)
|
83
|
-
|
83
|
+
if r.nil? && !self[:ignore_result_empty]
|
84
|
+
raise "Cannot load result: #{self[:result]}"
|
85
|
+
end
|
84
86
|
|
85
87
|
@objects[:result] = r
|
86
88
|
end
|
data/lib/miga/common/format.rb
CHANGED
@@ -90,7 +90,8 @@ module MiGA::Common::Format
|
|
90
90
|
end
|
91
91
|
fh.close
|
92
92
|
|
93
|
-
o = { n: l.size, tot: l.inject(:+), max: l.max }
|
93
|
+
o = { n: l.size, tot: l.inject(0, :+), max: l.max }
|
94
|
+
return o if o[:tot].zero?
|
94
95
|
o[:avg] = o[:tot].to_f / l.size
|
95
96
|
o[:var] = l.map { |a| a**2 }.inject(:+).to_f / l.size - o[:avg]**2
|
96
97
|
o[:sd] = Math.sqrt o[:var]
|
@@ -106,7 +107,8 @@ module MiGA::Common::Format
|
|
106
107
|
break if pos >= thr
|
107
108
|
end
|
108
109
|
o[:med] = o[:n].even? ?
|
109
|
-
0.5 * l[o[:n] / 2 - 1, 2].inject(:+) :
|
110
|
+
0.5 * l[o[:n] / 2 - 1, 2].inject(:+) :
|
111
|
+
l[(o[:n] - 1) / 2]
|
110
112
|
end
|
111
113
|
o
|
112
114
|
end
|
data/lib/miga/daemon.rb
CHANGED
@@ -72,6 +72,7 @@ class MiGA::Daemon < MiGA::MiGA
|
|
72
72
|
say '-----------------------------------'
|
73
73
|
say 'MiGA:%s launched' % project.name
|
74
74
|
say '-----------------------------------'
|
75
|
+
recalculate_status!
|
75
76
|
load_status
|
76
77
|
say 'Configuration options:'
|
77
78
|
say @runopts.to_s
|
@@ -99,6 +100,7 @@ class MiGA::Daemon < MiGA::MiGA
|
|
99
100
|
end
|
100
101
|
|
101
102
|
def recalculate_status!
|
103
|
+
say 'Recalculating status for all datasets'
|
102
104
|
project.each_dataset(&:recalculate_status)
|
103
105
|
end
|
104
106
|
|
@@ -158,8 +160,8 @@ class MiGA::Daemon < MiGA::MiGA
|
|
158
160
|
end
|
159
161
|
|
160
162
|
##
|
161
|
-
# Traverse datasets, and returns boolean indicating if at any
|
162
|
-
# are incomplete
|
163
|
+
# Traverse datasets, and returns boolean indicating if at any reference
|
164
|
+
# datasets are incomplete
|
163
165
|
def check_datasets
|
164
166
|
l_say(2, 'Checking datasets')
|
165
167
|
o = false
|
@@ -167,7 +169,7 @@ class MiGA::Daemon < MiGA::MiGA
|
|
167
169
|
next unless ds.status == :incomplete
|
168
170
|
next if ds.next_preprocessing(false).nil?
|
169
171
|
|
170
|
-
o = true
|
172
|
+
o = true if ds.ref?
|
171
173
|
queue_job(:d, ds)
|
172
174
|
end
|
173
175
|
o
|
data/lib/miga/dataset.rb
CHANGED
@@ -97,7 +97,10 @@ class MiGA::Dataset < MiGA::MiGA
|
|
97
97
|
|
98
98
|
##
|
99
99
|
# Inactivate a dataset. This halts automated processing by the daemon
|
100
|
-
|
100
|
+
#
|
101
|
+
# If given, the +reason+ string is saved as a metadata +:warn+ entry
|
102
|
+
def inactivate!(reason = nil)
|
103
|
+
metadata[:warn] = "Inactive: #{reason}" unless reason.nil?
|
101
104
|
metadata[:inactive] = true
|
102
105
|
metadata.save
|
103
106
|
pull_hook :on_inactivate
|
@@ -107,6 +110,7 @@ class MiGA::Dataset < MiGA::MiGA
|
|
107
110
|
# Activate a dataset. This removes the +:inactive+ flag
|
108
111
|
def activate!
|
109
112
|
metadata[:inactive] = nil
|
113
|
+
metadata[:warn] = nil if metadata[:warn] && metadata[:warn] =~ /^Inactive: /
|
110
114
|
metadata.save
|
111
115
|
pull_hook :on_activate
|
112
116
|
end
|
data/lib/miga/dataset/base.rb
CHANGED
@@ -35,8 +35,8 @@ module MiGA::Dataset::Base
|
|
35
35
|
mytaxa: '07.annotation/02.taxonomy/01.mytaxa',
|
36
36
|
mytaxa_scan: '07.annotation/03.qa/02.mytaxa_scan',
|
37
37
|
# Distances (for single-species datasets)
|
38
|
-
distances: '09.distances',
|
39
38
|
taxonomy: '09.distances/05.taxonomy',
|
39
|
+
distances: '09.distances',
|
40
40
|
# General statistics
|
41
41
|
stats: '90.stats'
|
42
42
|
}
|
data/lib/miga/dataset/hooks.rb
CHANGED
@@ -52,15 +52,15 @@ module MiGA::Dataset::Hooks
|
|
52
52
|
end
|
53
53
|
|
54
54
|
##
|
55
|
-
# Run +cmd+ in the command-line with {{variables}}:
|
56
|
-
# object (
|
55
|
+
# Run +cmd+ in the command-line with {{variables}}:
|
56
|
+
# dataset, project, project_name, miga, object (if defined for the event)
|
57
57
|
# - +hook_args+: +[cmd]+
|
58
58
|
# - +event_args+: +[object (optional)]+
|
59
59
|
def hook_run_cmd(hook_args, event_args)
|
60
60
|
Process.wait(
|
61
61
|
spawn hook_args.first.miga_variables(
|
62
|
-
dataset: name, project: project.path,
|
63
|
-
object: event_args.first
|
62
|
+
dataset: name, project: project.path, project_name: project.name,
|
63
|
+
miga: MiGA::MiGA.root_path, object: event_args.first
|
64
64
|
)
|
65
65
|
)
|
66
66
|
end
|
data/lib/miga/dataset/result.rb
CHANGED
data/lib/miga/project/hooks.rb
CHANGED
@@ -26,14 +26,15 @@ module MiGA::Project::Hooks
|
|
26
26
|
end
|
27
27
|
|
28
28
|
##
|
29
|
-
# Run +cmd+ in the command-line with {{variables}}:
|
30
|
-
# object (
|
29
|
+
# Run +cmd+ in the command-line with {{variables}}:
|
30
|
+
# project, project_name, miga, object (if defined by the event)
|
31
31
|
# - +hook_args+: +[cmd]+
|
32
32
|
# - +event_args+: +[object (optional)]+
|
33
33
|
def hook_run_cmd(hook_args, event_args)
|
34
34
|
Process.wait(
|
35
35
|
spawn hook_args.first.miga_variables(
|
36
|
-
project: path,
|
36
|
+
project: path, project_name: name,
|
37
|
+
miga: MiGA::MiGA.root_path, object: event_args.first
|
37
38
|
)
|
38
39
|
)
|
39
40
|
end
|
@@ -94,11 +94,13 @@ class MiGA::RemoteDataset
|
|
94
94
|
@timeout_try = 0
|
95
95
|
begin
|
96
96
|
DEBUG 'GET: ' + url
|
97
|
-
open(
|
97
|
+
URI.parse(url).open(read_timeout: 600) { |f| doc = f.read }
|
98
98
|
rescue => e
|
99
99
|
@timeout_try += 1
|
100
100
|
raise e if @timeout_try >= 3
|
101
101
|
|
102
|
+
sleep 5 # <- For: 429 Too Many Requests
|
103
|
+
DEBUG "RETRYING after: #{e}"
|
102
104
|
retry
|
103
105
|
end
|
104
106
|
doc
|
data/lib/miga/result.rb
CHANGED
@@ -45,10 +45,6 @@ class MiGA::Result < MiGA::MiGA
|
|
45
45
|
# Hash with the result metadata
|
46
46
|
attr_reader :data
|
47
47
|
|
48
|
-
##
|
49
|
-
# Array of MiGA::Result objects nested within the result (if any)
|
50
|
-
attr_reader :results
|
51
|
-
|
52
48
|
##
|
53
49
|
# Load or create the MiGA::Result described by the JSON file +path+
|
54
50
|
def initialize(path)
|
@@ -78,9 +74,9 @@ class MiGA::Result < MiGA::MiGA
|
|
78
74
|
when :json
|
79
75
|
@path
|
80
76
|
when :start
|
81
|
-
@path.sub(/\.json$/,
|
77
|
+
@path.sub(/\.json$/, '.start')
|
82
78
|
when :done
|
83
|
-
@path.sub(/\.json$/,
|
79
|
+
@path.sub(/\.json$/, '.done')
|
84
80
|
end
|
85
81
|
end
|
86
82
|
|
@@ -134,7 +130,7 @@ class MiGA::Result < MiGA::MiGA
|
|
134
130
|
##
|
135
131
|
# Initialize and #save empty result
|
136
132
|
def create
|
137
|
-
@data = { created: Time.now.to_s,
|
133
|
+
@data = { created: Time.now.to_s, stats: {}, files: {} }
|
138
134
|
save
|
139
135
|
end
|
140
136
|
|
@@ -156,19 +152,20 @@ class MiGA::Result < MiGA::MiGA
|
|
156
152
|
def load
|
157
153
|
@data = MiGA::Json.parse(path)
|
158
154
|
@data[:files] ||= {}
|
159
|
-
@results = (self[:results] || []).map { |rs| MiGA::Result.new rs }
|
160
155
|
end
|
161
156
|
|
162
157
|
##
|
163
158
|
# Remove result, including all associated files
|
164
159
|
def remove!
|
165
|
-
each_file
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
160
|
+
each_file { |file| FileUtils.rm_rf(File.join(dir, file)) }
|
161
|
+
unlink
|
162
|
+
end
|
163
|
+
|
164
|
+
# Unlink result by removing the .done and .start timestamps and the
|
165
|
+
# .json descriptor, but don't remove any other associated files
|
166
|
+
def unlink
|
167
|
+
%i(start done).each do |i|
|
168
|
+
f = path(i) and File.exists?(f) and File.unlink(f)
|
172
169
|
end
|
173
170
|
File.unlink path
|
174
171
|
end
|
@@ -182,28 +179,19 @@ class MiGA::Result < MiGA::MiGA
|
|
182
179
|
# Note that multiple files may have the same symbol (file_sym), since
|
183
180
|
# arrays of files are supported.
|
184
181
|
def each_file(&blk)
|
182
|
+
return to_enum(:each_file) unless block_given?
|
183
|
+
|
185
184
|
@data[:files] ||= {}
|
186
185
|
self[:files].each do |k, files|
|
187
186
|
files = [files] unless files.kind_of? Array
|
188
187
|
files.each do |file|
|
189
188
|
case blk.arity
|
190
|
-
when 1
|
191
|
-
|
192
|
-
when
|
193
|
-
|
194
|
-
when 3
|
195
|
-
blk.call(k, file, File.expand_path(file, dir))
|
196
|
-
else
|
197
|
-
raise "Wrong number of arguments: #{blk.arity} for 1..3"
|
189
|
+
when 1; blk.call(file)
|
190
|
+
when 2; blk.call(k, file)
|
191
|
+
when 3; blk.call(k, file, File.expand_path(file, dir))
|
192
|
+
else; raise "Wrong number of arguments: #{blk.arity} for 1..3"
|
198
193
|
end
|
199
194
|
end
|
200
195
|
end
|
201
196
|
end
|
202
|
-
|
203
|
-
##
|
204
|
-
# Add the MiGA::Result +result+ as part of the current result
|
205
|
-
def add_result(result)
|
206
|
-
@data[:results] << result.path
|
207
|
-
save
|
208
|
-
end
|
209
197
|
end
|
data/lib/miga/result/stats.rb
CHANGED
@@ -8,6 +8,7 @@ module MiGA::Result::Stats
|
|
8
8
|
# (Re-)calculate and save the statistics for the result
|
9
9
|
def compute_stats
|
10
10
|
method = :"compute_stats_#{key}"
|
11
|
+
MiGA::MiGA.DEBUG "Result(#{key}).compute_stats"
|
11
12
|
stats = self.respond_to?(method, true) ? send(method) : nil
|
12
13
|
unless stats.nil?
|
13
14
|
self[:stats] = stats
|
@@ -109,20 +110,8 @@ module MiGA::Result::Stats
|
|
109
110
|
end
|
110
111
|
end
|
111
112
|
else
|
112
|
-
#
|
113
|
-
|
114
|
-
%w[Archaea Bacteria].include?(tax[:d]) &&
|
115
|
-
file_path(:raw_report).nil?
|
116
|
-
scr = "#{MiGA::MiGA.root_path}/utils/domain-ess-genes.rb"
|
117
|
-
rep = file_path(:report)
|
118
|
-
rc_p = File.expand_path('.miga_rc', ENV['HOME'])
|
119
|
-
rc = File.exist?(rc_p) ? ". '#{rc_p}' && " : ''
|
120
|
-
$stderr.print `#{rc} ruby '#{scr}' \
|
121
|
-
'#{rep}' '#{rep}.domain' '#{tax[:d][0]}'`
|
122
|
-
add_file(:raw_report, "#{source.name}.ess/log")
|
123
|
-
add_file(:report, "#{source.name}.ess/log.domain")
|
124
|
-
end
|
125
|
-
# Extract/compute quality values
|
113
|
+
# Estimate quality metrics
|
114
|
+
fix_essential_genes_by_domain
|
126
115
|
stats = { completeness: [0.0, '%'], contamination: [0.0, '%'] }
|
127
116
|
File.open(file_path(:report), 'r') do |fh|
|
128
117
|
fh.each_line do |ln|
|
@@ -131,6 +120,8 @@ module MiGA::Result::Stats
|
|
131
120
|
end
|
132
121
|
end
|
133
122
|
end
|
123
|
+
|
124
|
+
# Determine qualitative range
|
134
125
|
stats[:quality] = stats[:completeness][0] - stats[:contamination][0] * 5
|
135
126
|
source.metadata[:quality] =
|
136
127
|
case stats[:quality]
|
@@ -140,6 +131,12 @@ module MiGA::Result::Stats
|
|
140
131
|
else; :low
|
141
132
|
end
|
142
133
|
source.save
|
134
|
+
|
135
|
+
# Inactivate low-quality datasets
|
136
|
+
min_qual = (project.metadata[:min_qual] || 50)
|
137
|
+
if min_qual != 'no' && stats[:quality] < min_qual
|
138
|
+
source.inactivate! 'Low genome quality'
|
139
|
+
end
|
143
140
|
end
|
144
141
|
stats
|
145
142
|
end
|
@@ -168,12 +165,28 @@ module MiGA::Result::Stats
|
|
168
165
|
stats[:aai] = [$2.to_f, '%']
|
169
166
|
3.times { fh.gets }
|
170
167
|
fh.each_line do |ln|
|
171
|
-
|
172
|
-
break if row.empty?
|
168
|
+
next unless ln.chomp =~ /^\s*(\S+)\s+(.+)\s+([0-9\.e-]+)\s+\**\s*$/
|
173
169
|
|
174
|
-
stats[:"#{
|
170
|
+
stats[:"#{$1}_pvalue"] = $3.to_f unless $1 == 'root'
|
175
171
|
end
|
176
172
|
end
|
177
173
|
stats
|
178
174
|
end
|
175
|
+
|
176
|
+
# Fix estimates based on essential genes based on taxonomy
|
177
|
+
def fix_essential_genes_by_domain
|
178
|
+
return if (tax = source.metadata[:tax]).nil? ||
|
179
|
+
!%w[Archaea Bacteria].include?(tax[:d]) ||
|
180
|
+
file_path(:raw_report)
|
181
|
+
|
182
|
+
MiGA::MiGA.DEBUG "Fixing essential genes by domain"
|
183
|
+
scr = "#{MiGA::MiGA.root_path}/utils/domain-ess-genes.rb"
|
184
|
+
rep = file_path(:report)
|
185
|
+
rc_p = File.expand_path('.miga_rc', ENV['HOME'])
|
186
|
+
rc = File.exist?(rc_p) ? ". '#{rc_p}' && " : ''
|
187
|
+
$stderr.print `#{rc} ruby '#{scr}' \
|
188
|
+
'#{rep}' '#{rep}.domain' '#{tax[:d][0]}'`
|
189
|
+
add_file(:raw_report, "#{source.name}.ess/log")
|
190
|
+
add_file(:report, "#{source.name}.ess/log.domain")
|
191
|
+
end
|
179
192
|
end
|
data/lib/miga/version.rb
CHANGED
@@ -8,7 +8,7 @@ module MiGA
|
|
8
8
|
# - Float representing the major.minor version.
|
9
9
|
# - Integer representing gem releases of the current version.
|
10
10
|
# - Integer representing minor changes that require new version number.
|
11
|
-
VERSION = [0.7,
|
11
|
+
VERSION = [0.7, 10, 0]
|
12
12
|
|
13
13
|
##
|
14
14
|
# Nickname for the current major.minor version.
|
@@ -16,7 +16,7 @@ module MiGA
|
|
16
16
|
|
17
17
|
##
|
18
18
|
# Date of the current gem release.
|
19
|
-
VERSION_DATE = Date.new(2020,
|
19
|
+
VERSION_DATE = Date.new(2020, 6, 29)
|
20
20
|
|
21
21
|
##
|
22
22
|
# Reference of MiGA.
|
data/scripts/distances.bash
CHANGED
@@ -9,7 +9,12 @@ cd "$PROJECT/data/09.distances"
|
|
9
9
|
# Initialize
|
10
10
|
miga date > "$DATASET.start"
|
11
11
|
|
12
|
-
#
|
12
|
+
# Check quality first
|
13
|
+
miga stats -P "$PROJECT" -D "$DATASET" -r essential_genes --compute-and-save
|
14
|
+
inactive=$(miga ls -P "$PROJECT" -D "$DATASET" -m inactive | cut -f 2)
|
15
|
+
[[ "$inactive" == "true" ]] && exit
|
16
|
+
|
17
|
+
# Run distances
|
13
18
|
ruby -I "$MIGA/lib" "$MIGA/utils/distances.rb" "$PROJECT" "$DATASET"
|
14
19
|
|
15
20
|
# Finalize
|
data/scripts/project_stats.bash
CHANGED
data/scripts/stats.bash
CHANGED
@@ -14,7 +14,7 @@ miga date > "$DATASET.start"
|
|
14
14
|
# Calculate statistics
|
15
15
|
for i in raw_reads trimmed_fasta assembly cds essential_genes ssu distances taxonomy ; do
|
16
16
|
echo "# $i"
|
17
|
-
miga stats --compute-and-save -P "$PROJECT" -D "$DATASET" -r $i
|
17
|
+
miga stats --compute-and-save --ignore-empty -P "$PROJECT" -D "$DATASET" -r $i
|
18
18
|
done
|
19
19
|
|
20
20
|
# Finalize
|
data/test/daemon_test.rb
CHANGED
@@ -93,7 +93,7 @@ class DaemonTest < Test::Unit::TestCase
|
|
93
93
|
0 => /-{20}\n/,
|
94
94
|
1 => /MiGA:#{p.name} launched/,
|
95
95
|
2 => /-{20}\n/,
|
96
|
-
|
96
|
+
6 => /Probing running jobs\n/
|
97
97
|
}.each { |k, v| assert_match(v, l[k], "unexpected line: #{k}") }
|
98
98
|
ensure
|
99
99
|
begin
|
data/test/dataset_test.rb
CHANGED
@@ -185,11 +185,18 @@ class DatasetTest < Test::Unit::TestCase
|
|
185
185
|
d = dataset
|
186
186
|
assert_equal(:incomplete, d.status)
|
187
187
|
assert_predicate(d, :active?)
|
188
|
-
d.inactivate!
|
188
|
+
d.inactivate! 'Too annoying'
|
189
189
|
assert_equal(:inactive, d.status)
|
190
|
+
assert_equal('Inactive: Too annoying', d.metadata[:warn])
|
190
191
|
assert_not_predicate(d, :active?)
|
191
192
|
d.activate!
|
192
193
|
assert_equal(:incomplete, d.status)
|
194
|
+
assert_nil(d.metadata[:warn])
|
193
195
|
assert_predicate(d, :active?)
|
194
196
|
end
|
197
|
+
|
198
|
+
def test_preprocessing_tasks
|
199
|
+
assert_include(MiGA::Dataset.PREPROCESSING_TASKS, :cds)
|
200
|
+
assert_include(MiGA::Dataset.PREPROCESSING_TASKS, :taxonomy)
|
201
|
+
end
|
195
202
|
end
|
data/test/remote_dataset_test.rb
CHANGED
@@ -31,13 +31,15 @@ class RemoteDatasetTest < Test::Unit::TestCase
|
|
31
31
|
assert_equal(MiGA::Taxonomy, tx.class, msg)
|
32
32
|
assert_equal('Lentivirus', tx[:g], msg)
|
33
33
|
assert_equal(
|
34
|
-
'ns:ncbi
|
35
|
-
'g:Lentivirus
|
34
|
+
'ns:ncbi k:Pararnavirae p:Artverviricota c:Revtraviricetes ' \
|
35
|
+
'o:Ortervirales f:Retroviridae g:Lentivirus ' \
|
36
|
+
's:Human_immunodeficiency_virus_2',
|
36
37
|
tx.to_s, msg
|
37
38
|
)
|
38
39
|
assert_equal(
|
39
|
-
'ns:ncbi d: k: p: c:
|
40
|
-
'
|
40
|
+
'ns:ncbi d: k:Pararnavirae p:Artverviricota c:Revtraviricetes ' \
|
41
|
+
'o:Ortervirales f:Retroviridae g:Lentivirus ' \
|
42
|
+
's:Human_immunodeficiency_virus_2 ssp: str: ds:',
|
41
43
|
tx.to_s(true), msg
|
42
44
|
)
|
43
45
|
assert_equal('ncbi', tx.namespace, msg)
|
@@ -99,7 +101,7 @@ class RemoteDatasetTest < Test::Unit::TestCase
|
|
99
101
|
|
100
102
|
def test_ref_type_status
|
101
103
|
declare_remote_access
|
102
|
-
rd = MiGA::RemoteDataset.new('
|
104
|
+
rd = MiGA::RemoteDataset.new('GCA_003144295.1', :assembly, :ncbi)
|
103
105
|
assert { !rd.get_metadata[:is_type] }
|
104
106
|
assert { rd.get_metadata[:is_ref_type] }
|
105
107
|
end
|
data/utils/adapters.fa
CHANGED
@@ -300,3 +300,16 @@ AGATCGGAAGAGCGGTTCAGCAGGAATGCCGAG
|
|
300
300
|
AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC
|
301
301
|
>TruSeq3_UniversalAdapter
|
302
302
|
AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTA
|
303
|
+
|
304
|
+
>Nextera_PE_PrefixNX/1
|
305
|
+
AGATGTGTATAAGAGACAG
|
306
|
+
>Nextera_PE_PrefixNX/2
|
307
|
+
AGATGTGTATAAGAGACAG
|
308
|
+
>Nextera_PE_Trans1
|
309
|
+
TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG
|
310
|
+
>Nextera_PE_Trans1_rc
|
311
|
+
CTGTCTCTTATACACATCTGACGCTGCCGACGA
|
312
|
+
>Nextera_PE_Trans2
|
313
|
+
GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG
|
314
|
+
>Nextera_PE_Trans2_rc
|
315
|
+
CTGTCTCTTATACACATCTCCGAGCCCACGAGAC
|
data/utils/distance/pipeline.rb
CHANGED
@@ -29,6 +29,16 @@ module MiGA::DistanceRunner::Pipeline
|
|
29
29
|
classify(clades, classif, metric, result_fh, val_cls)
|
30
30
|
end
|
31
31
|
|
32
|
+
# Run distances against datasets listed in metadata's +:dist_req+
|
33
|
+
def distances_by_request(metric)
|
34
|
+
return unless dataset.metadata[:dist_req]
|
35
|
+
|
36
|
+
$stderr.puts 'Running distances by request'
|
37
|
+
dataset.metadata[:dist_req].each do |target|
|
38
|
+
ds = ref_project.dataset(target) and send(metric, ds)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
32
42
|
# Builds a tree with all visited medoids from any classification level
|
33
43
|
def build_medoids_tree(metric)
|
34
44
|
$stderr.puts "Building medoids tree (metric = #{metric})"
|
@@ -99,7 +109,7 @@ module MiGA::DistanceRunner::Pipeline
|
|
99
109
|
|
100
110
|
# Transfer the taxonomy to the current dataset
|
101
111
|
def transfer_taxonomy(tax)
|
102
|
-
$stderr.puts
|
112
|
+
$stderr.puts 'Transferring taxonomy'
|
103
113
|
return if tax.nil?
|
104
114
|
|
105
115
|
pval = (project.metadata[:tax_pvalue] || 0.05).to_f
|
data/utils/distance/runner.rb
CHANGED
@@ -67,7 +67,7 @@ class MiGA::DistanceRunner
|
|
67
67
|
|
68
68
|
# Launch analysis for reference datasets
|
69
69
|
def go_ref!
|
70
|
-
$stderr.puts
|
70
|
+
$stderr.puts 'Launching analysis for reference dataset'
|
71
71
|
# Initialize databases
|
72
72
|
initialize_dbs! true
|
73
73
|
|
@@ -80,13 +80,13 @@ class MiGA::DistanceRunner
|
|
80
80
|
end
|
81
81
|
|
82
82
|
# Finalize
|
83
|
-
[
|
83
|
+
%i[haai aai ani].each { |m| checkpoint! m if db_counts[m] > 0 }
|
84
84
|
end
|
85
85
|
|
86
86
|
##
|
87
87
|
# Launch analysis for query datasets
|
88
88
|
def go_query!
|
89
|
-
$stderr.puts
|
89
|
+
$stderr.puts 'Launching analysis for query dataset'
|
90
90
|
# Check if project is ready
|
91
91
|
tsk = ref_project.is_clade? ? [:subclades, :ani] : [:clade_finding, :aai]
|
92
92
|
res = ref_project.result(tsk[0])
|
@@ -94,6 +94,7 @@ class MiGA::DistanceRunner
|
|
94
94
|
|
95
95
|
# Initialize the databases
|
96
96
|
initialize_dbs! false
|
97
|
+
distances_by_request(tsk[1])
|
97
98
|
# Calculate the classification-informed AAI/ANI traverse
|
98
99
|
results = File.expand_path("#{dataset.name}.#{tsk[1]}-medoids.tsv", home)
|
99
100
|
fh = File.open(results, 'w')
|
@@ -111,7 +112,9 @@ class MiGA::DistanceRunner
|
|
111
112
|
next unless r[1].to_i == val_cls
|
112
113
|
|
113
114
|
ani = ani_after_aai(ref_project.dataset(r[0]), 80.0)
|
114
|
-
|
115
|
+
unless ani.nil? || ani < closest[:ani]
|
116
|
+
closest = { ds: r[0], ani: ani }
|
117
|
+
end
|
115
118
|
end
|
116
119
|
end
|
117
120
|
end
|
@@ -133,7 +136,7 @@ class MiGA::DistanceRunner
|
|
133
136
|
|
134
137
|
# Launch analysis for taxonomy jobs
|
135
138
|
def go_taxonomy!
|
136
|
-
$stderr.puts
|
139
|
+
$stderr.puts 'Launching taxonomy analysis'
|
137
140
|
return unless project.metadata[:ref_project]
|
138
141
|
|
139
142
|
go_query! # <- yeah, it's actually the same, just different ref_project
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.10.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-06-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|
@@ -44,14 +44,14 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '1.
|
47
|
+
version: '1.3'
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '1.
|
54
|
+
version: '1.3'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: rake
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -529,7 +529,7 @@ homepage: http://enve-omics.ce.gatech.edu/miga
|
|
529
529
|
licenses:
|
530
530
|
- Artistic-2.0
|
531
531
|
metadata: {}
|
532
|
-
post_install_message:
|
532
|
+
post_install_message:
|
533
533
|
rdoc_options:
|
534
534
|
- lib
|
535
535
|
- README.md
|
@@ -550,8 +550,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
550
550
|
- !ruby/object:Gem::Version
|
551
551
|
version: '0'
|
552
552
|
requirements: []
|
553
|
-
rubygems_version: 3.
|
554
|
-
signing_key:
|
553
|
+
rubygems_version: 3.1.2
|
554
|
+
signing_key:
|
555
555
|
specification_version: 4
|
556
556
|
summary: MiGA
|
557
557
|
test_files: []
|