miga-base 0.7.5.0 → 0.7.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/lib/miga/cli/action/classify_wf.rb +3 -1
- data/lib/miga/cli/action/edit.rb +9 -6
- data/lib/miga/cli/action/init.rb +21 -14
- data/lib/miga/cli/action/quality_wf.rb +4 -1
- data/lib/miga/cli/action/stats.rb +6 -2
- data/lib/miga/cli/action/summary.rb +6 -1
- data/lib/miga/cli/action/wf.rb +14 -3
- data/lib/miga/cli/objects_helper.rb +3 -1
- data/lib/miga/common/format.rb +4 -2
- data/lib/miga/daemon.rb +5 -3
- data/lib/miga/dataset.rb +5 -1
- data/lib/miga/dataset/base.rb +1 -1
- data/lib/miga/dataset/hooks.rb +4 -4
- data/lib/miga/dataset/result.rb +1 -1
- data/lib/miga/project/hooks.rb +4 -3
- data/lib/miga/remote_dataset/download.rb +3 -1
- data/lib/miga/result.rb +18 -30
- data/lib/miga/result/stats.rb +30 -17
- data/lib/miga/version.rb +2 -2
- data/scripts/distances.bash +6 -1
- data/scripts/project_stats.bash +3 -0
- data/scripts/stats.bash +1 -1
- data/test/daemon_test.rb +1 -1
- data/test/dataset_test.rb +8 -1
- data/test/remote_dataset_test.rb +7 -5
- data/utils/adapters.fa +13 -0
- data/utils/distance/pipeline.rb +11 -1
- data/utils/distance/runner.rb +8 -5
- metadata +8 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 30d61eb5847a28c4d83a43e6e47ff0738bb819218e677b9aa43de158b441e0ae
|
4
|
+
data.tar.gz: 46338ae15353b71fb6e7eff390c8bb976d1c11fc296bb83f33aab8ffba1a3fa8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 488e7888039bb9e08e7c257fdd7cb0cf34340766f73b9b46c28b332072f5f207f5dcfb0df08c98b27f5640158d3a14ae6d0f5ab19d78f3e9e1ef44e381d34e74
|
7
|
+
data.tar.gz: 992cfc225eeb2f4b8017260e66c67bc9df39a79757ecdb65e01766079e103385259c1eeb447fdc5e6b769990f96256d384dc4adddf184693a3bc94b8323b9a5a
|
data/README.md
CHANGED
@@ -41,6 +41,7 @@ Developed and maintained by [Luis M. Rodriguez-R][lrr]. MiGA is the result of a
|
|
41
41
|
collaboration between [Kostas Lab][kostas] at the Georgia Institute of
|
42
42
|
Technology and [RDP][rdp] at Michigan State University.
|
43
43
|
|
44
|
+
See also the [complete list of contributors](manual/part1/contributors.md).
|
44
45
|
|
45
46
|
# License
|
46
47
|
|
@@ -42,7 +42,9 @@ class MiGA::Cli::Action::ClassifyWf < MiGA::Cli::Action
|
|
42
42
|
'--no-summaries',
|
43
43
|
'Do not generate intermediate step summaries'
|
44
44
|
) { |v| cli[:summaries] = v }
|
45
|
-
opts_for_wf(
|
45
|
+
opts_for_wf(
|
46
|
+
opt, 'Input genome assemblies (nucleotides, FastA)', qual: false
|
47
|
+
)
|
46
48
|
end
|
47
49
|
end
|
48
50
|
|
data/lib/miga/cli/action/edit.rb
CHANGED
@@ -17,18 +17,21 @@ class MiGA::Cli::Action::Edit < MiGA::Cli::Action
|
|
17
17
|
'Activate dataset; requires -D'
|
18
18
|
) { |v| cli[:activate] = v }
|
19
19
|
opt.on(
|
20
|
-
'--inactivate',
|
21
|
-
'Inactivate dataset; requires -D'
|
22
|
-
|
20
|
+
'--inactivate [reason]',
|
21
|
+
'Inactivate dataset; requires -D',
|
22
|
+
'The argument is optional: reason to inactivate dataset'
|
23
|
+
) { |v| cli[:activate] = false ; cli[:reason] = v }
|
23
24
|
end
|
24
25
|
end
|
25
26
|
|
26
27
|
def perform
|
27
28
|
obj = cli.load_project_or_dataset
|
28
29
|
unless cli[:activate].nil?
|
29
|
-
cli.ensure_par(
|
30
|
-
|
31
|
-
|
30
|
+
cli.ensure_par(
|
31
|
+
{ dataset: '-D' },
|
32
|
+
'%<name>s is mandatory with --[in-]activate: please provide %<flag>s'
|
33
|
+
)
|
34
|
+
cli[:activate] ? obj.activate! : obj.inactivate!(cli[:reason])
|
32
35
|
end
|
33
36
|
cli.add_metadata(obj)
|
34
37
|
obj.save
|
data/lib/miga/cli/action/init.rb
CHANGED
@@ -66,8 +66,10 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
|
|
66
66
|
end
|
67
67
|
|
68
68
|
def run_r_cmd(cli, paths, cmd)
|
69
|
-
run_cmd(
|
70
|
-
|
69
|
+
run_cmd(
|
70
|
+
cli,
|
71
|
+
"echo #{cmd.shellescape} | #{paths['R'].shellescape} --vanilla -q 2>&1"
|
72
|
+
)
|
71
73
|
end
|
72
74
|
|
73
75
|
def test_r_package(cli, paths, pkg)
|
@@ -81,16 +83,21 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
|
|
81
83
|
end
|
82
84
|
|
83
85
|
def test_ruby_gem(cli, paths, pkg)
|
84
|
-
run_cmd(
|
85
|
-
|
86
|
+
run_cmd(
|
87
|
+
cli,
|
88
|
+
"#{paths['ruby'].shellescape} -r #{pkg.shellescape} -e '' 2>/dev/null"
|
89
|
+
)
|
86
90
|
$?.success?
|
87
91
|
end
|
88
92
|
|
89
93
|
def install_ruby_gem(cli, paths, pkg)
|
90
94
|
gem_cmd = "Gem::GemRunner.new.run %w(install --user #{pkg})"
|
91
|
-
run_cmd(
|
95
|
+
run_cmd(
|
96
|
+
cli,
|
97
|
+
"#{paths['ruby'].shellescape} \
|
92
98
|
-r rubygems -r rubygems/gem_runner \
|
93
|
-
-e #{gem_cmd.shellescape} 2>&1"
|
99
|
+
-e #{gem_cmd.shellescape} 2>&1"
|
100
|
+
)
|
94
101
|
end
|
95
102
|
|
96
103
|
def list_requirements
|
@@ -99,7 +106,7 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
|
|
99
106
|
'no', %w(yes no)
|
100
107
|
) == 'yes'
|
101
108
|
cli.puts ''
|
102
|
-
req_path = File.
|
109
|
+
req_path = File.join(MiGA.root_path, 'utils', 'requirements.txt')
|
103
110
|
File.open(req_path, 'r') do |fh|
|
104
111
|
fh.each_line { |ln| cli.puts ln }
|
105
112
|
end
|
@@ -205,18 +212,18 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
|
|
205
212
|
def check_additional_files(paths)
|
206
213
|
if cli[:mytaxa]
|
207
214
|
cli.puts 'Looking for MyTaxa databases:'
|
208
|
-
mt = File.dirname paths[
|
215
|
+
mt = File.dirname paths['MyTaxa']
|
209
216
|
cli.print 'Looking for scores... '
|
210
217
|
unless Dir.exist?(File.expand_path('db', mt))
|
211
|
-
cli.puts "no
|
218
|
+
cli.puts "no\nExecute 'python2 #{mt}/utils/download_db.py'"
|
212
219
|
exit(1)
|
213
220
|
end
|
214
|
-
cli.puts 'yes
|
221
|
+
cli.puts 'yes'
|
215
222
|
cli.print 'Looking for diamond db... '
|
216
223
|
unless File.exist?(File.expand_path('AllGenomes.faa.dmnd', mt))
|
217
|
-
cli.puts "no
|
224
|
+
cli.puts "no\nDownload " \
|
218
225
|
"'http://enve-omics.ce.gatech.edu/data/public_mytaxa/" \
|
219
|
-
"AllGenomes.faa.dmnd' into #{mt}
|
226
|
+
"AllGenomes.faa.dmnd' into #{mt}"
|
220
227
|
exit(1)
|
221
228
|
end
|
222
229
|
cli.puts ''
|
@@ -228,7 +235,7 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
|
|
228
235
|
%w(ape cluster vegan).each do |pkg|
|
229
236
|
cli.print "Testing #{pkg}... "
|
230
237
|
if test_r_package(cli, paths, pkg)
|
231
|
-
cli.puts 'yes
|
238
|
+
cli.puts 'yes'
|
232
239
|
else
|
233
240
|
cli.puts 'no, installing'
|
234
241
|
cli.print '' + install_r_package(cli, paths, pkg)
|
@@ -245,7 +252,7 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
|
|
245
252
|
%w(sqlite3 daemons json).each do |pkg|
|
246
253
|
cli.print "Testing #{pkg}... "
|
247
254
|
if test_ruby_gem(cli, paths, pkg)
|
248
|
-
cli.puts 'yes
|
255
|
+
cli.puts 'yes'
|
249
256
|
else
|
250
257
|
cli.puts 'no, installing'
|
251
258
|
# This hackey mess is meant to ensure the test and installation are done
|
@@ -15,7 +15,10 @@ class MiGA::Cli::Action::QualityWf < MiGA::Cli::Action
|
|
15
15
|
'-m', '--mytaxa-scan',
|
16
16
|
'Perform MyTaxa scan analysis'
|
17
17
|
) { |v| cli[:mytaxa] = v }
|
18
|
-
opts_for_wf(
|
18
|
+
opts_for_wf(
|
19
|
+
opt, 'Input genome assemblies (nucleotides, FastA)',
|
20
|
+
qual: false
|
21
|
+
)
|
19
22
|
end
|
20
23
|
end
|
21
24
|
|
@@ -14,12 +14,16 @@ class MiGA::Cli::Action::Stats < MiGA::Cli::Action
|
|
14
14
|
) { |v| cli[:key] = v }
|
15
15
|
opt.on(
|
16
16
|
'--compute-and-save',
|
17
|
-
'Compute and
|
17
|
+
'Compute and save the statistics'
|
18
18
|
) { |v| cli[:compute] = v }
|
19
19
|
opt.on(
|
20
20
|
'--try-load',
|
21
21
|
'Check if stat exists instead of computing on --compute-and-save'
|
22
22
|
) { |v| cli[:try_load] = v }
|
23
|
+
opt.on(
|
24
|
+
'--ignore-empty',
|
25
|
+
'If the result does not exist, exit without throwing exceptions'
|
26
|
+
) { |v| cli[:ignore_result_empty] = v }
|
23
27
|
end
|
24
28
|
end
|
25
29
|
|
@@ -27,7 +31,7 @@ class MiGA::Cli::Action::Stats < MiGA::Cli::Action
|
|
27
31
|
if cli[:try_load] && !r[:stats].nil? && !r[:stats].empty?
|
28
32
|
cli[:compute] = false
|
29
33
|
end
|
30
|
-
r = cli.load_result
|
34
|
+
r = cli.load_result or return
|
31
35
|
if cli[:compute]
|
32
36
|
cli.say 'Computing statistics'
|
33
37
|
r.compute_stats
|
@@ -26,6 +26,10 @@ class MiGA::Cli::Action::Summary < MiGA::Cli::Action
|
|
26
26
|
'--with-units',
|
27
27
|
'Include units in each cell'
|
28
28
|
) { |v| cli[:units] = v }
|
29
|
+
opt.on(
|
30
|
+
'--compute-and-save',
|
31
|
+
'Compute and save the statistics if not yet available'
|
32
|
+
) { |v| cli[:compute] = v }
|
29
33
|
end
|
30
34
|
end
|
31
35
|
|
@@ -34,7 +38,8 @@ class MiGA::Cli::Action::Summary < MiGA::Cli::Action
|
|
34
38
|
ds = cli.load_and_filter_datasets
|
35
39
|
cli.say 'Loading results'
|
36
40
|
stats = ds.map do |d|
|
37
|
-
r = d.
|
41
|
+
r = d.result(cli[:result])
|
42
|
+
r.compute_stats if cli[:compute] && !r.nil? && r[:stats].empty?
|
38
43
|
s = r.nil? ? {} : r[:stats]
|
39
44
|
s.tap { |i| i[:dataset] = d.name }
|
40
45
|
end
|
data/lib/miga/cli/action/wf.rb
CHANGED
@@ -15,7 +15,7 @@ module MiGA::Cli::Action::Wf
|
|
15
15
|
|
16
16
|
def opts_for_wf(opt, files_desc, params = {})
|
17
17
|
{
|
18
|
-
multi: false, cleanup: true, project_type: false, ncbi: true
|
18
|
+
multi: false, cleanup: true, project_type: false, ncbi: true, qual: true
|
19
19
|
}.each { |k, v| params[k] = v if params[k].nil? }
|
20
20
|
opt.on(
|
21
21
|
'-o', '--out_dir PATH',
|
@@ -40,6 +40,13 @@ module MiGA::Cli::Action::Wf
|
|
40
40
|
'Only download complete genomes, not drafts'
|
41
41
|
) { |v| cli[:ncbi_draft] = v }
|
42
42
|
end
|
43
|
+
if params[:qual]
|
44
|
+
opt.on(
|
45
|
+
'--min-qual FLOAT', Float,
|
46
|
+
'Minimum genome quality to include in analysis',
|
47
|
+
'By default: 50.0'
|
48
|
+
) { |v| cli[:min_qual] = v }
|
49
|
+
end
|
43
50
|
if params[:cleanup]
|
44
51
|
opt.on(
|
45
52
|
'-c', '--clean',
|
@@ -89,6 +96,10 @@ module MiGA::Cli::Action::Wf
|
|
89
96
|
end
|
90
97
|
|
91
98
|
def opts_for_wf_distances(opt)
|
99
|
+
opt.on('--sensitive', 'Alias to: --aai-p blast+ --ani-p blast+') do
|
100
|
+
cli[:aai_p] = 'blast+'
|
101
|
+
cli[:ani_p] = 'blast+'
|
102
|
+
end
|
92
103
|
opt.on('--fast', 'Alias to: --aai-p diamond --ani-p fastani') do
|
93
104
|
cli[:aai_p] = 'diamond'
|
94
105
|
cli[:ani_p] = 'fastani'
|
@@ -121,7 +132,7 @@ module MiGA::Cli::Action::Wf
|
|
121
132
|
]) unless MiGA::Project.exist? cli[:outdir]
|
122
133
|
# Define project metadata
|
123
134
|
p = cli.load_project(:outdir, '-o')
|
124
|
-
[
|
135
|
+
%i[haai_p aai_p ani_p ess_coll min_qual].each { |i| p_metadata[i] = cli[i] }
|
125
136
|
p_metadata[:type] = cli[:project_type]
|
126
137
|
transfer_metadata(p, p_metadata)
|
127
138
|
# Download datasets
|
@@ -155,7 +166,7 @@ module MiGA::Cli::Action::Wf
|
|
155
166
|
'-P', cli[:outdir],
|
156
167
|
'-r', r,
|
157
168
|
'-o', File.expand_path("#{r}.tsv", cli[:outdir]),
|
158
|
-
'--tab'
|
169
|
+
'--tab', '--ref', '--active'
|
159
170
|
])
|
160
171
|
end
|
161
172
|
end
|
@@ -80,7 +80,9 @@ module MiGA::Cli::ObjectsHelper
|
|
80
80
|
raise "Unsupported result for #{klass}: #{self[:result]}"
|
81
81
|
end
|
82
82
|
r = obj.add_result(self[:result], false)
|
83
|
-
|
83
|
+
if r.nil? && !self[:ignore_result_empty]
|
84
|
+
raise "Cannot load result: #{self[:result]}"
|
85
|
+
end
|
84
86
|
|
85
87
|
@objects[:result] = r
|
86
88
|
end
|
data/lib/miga/common/format.rb
CHANGED
@@ -90,7 +90,8 @@ module MiGA::Common::Format
|
|
90
90
|
end
|
91
91
|
fh.close
|
92
92
|
|
93
|
-
o = { n: l.size, tot: l.inject(:+), max: l.max }
|
93
|
+
o = { n: l.size, tot: l.inject(0, :+), max: l.max }
|
94
|
+
return o if o[:tot].zero?
|
94
95
|
o[:avg] = o[:tot].to_f / l.size
|
95
96
|
o[:var] = l.map { |a| a**2 }.inject(:+).to_f / l.size - o[:avg]**2
|
96
97
|
o[:sd] = Math.sqrt o[:var]
|
@@ -106,7 +107,8 @@ module MiGA::Common::Format
|
|
106
107
|
break if pos >= thr
|
107
108
|
end
|
108
109
|
o[:med] = o[:n].even? ?
|
109
|
-
0.5 * l[o[:n] / 2 - 1, 2].inject(:+) :
|
110
|
+
0.5 * l[o[:n] / 2 - 1, 2].inject(:+) :
|
111
|
+
l[(o[:n] - 1) / 2]
|
110
112
|
end
|
111
113
|
o
|
112
114
|
end
|
data/lib/miga/daemon.rb
CHANGED
@@ -72,6 +72,7 @@ class MiGA::Daemon < MiGA::MiGA
|
|
72
72
|
say '-----------------------------------'
|
73
73
|
say 'MiGA:%s launched' % project.name
|
74
74
|
say '-----------------------------------'
|
75
|
+
recalculate_status!
|
75
76
|
load_status
|
76
77
|
say 'Configuration options:'
|
77
78
|
say @runopts.to_s
|
@@ -99,6 +100,7 @@ class MiGA::Daemon < MiGA::MiGA
|
|
99
100
|
end
|
100
101
|
|
101
102
|
def recalculate_status!
|
103
|
+
say 'Recalculating status for all datasets'
|
102
104
|
project.each_dataset(&:recalculate_status)
|
103
105
|
end
|
104
106
|
|
@@ -158,8 +160,8 @@ class MiGA::Daemon < MiGA::MiGA
|
|
158
160
|
end
|
159
161
|
|
160
162
|
##
|
161
|
-
# Traverse datasets, and returns boolean indicating if at any
|
162
|
-
# are incomplete
|
163
|
+
# Traverse datasets, and returns boolean indicating if at any reference
|
164
|
+
# datasets are incomplete
|
163
165
|
def check_datasets
|
164
166
|
l_say(2, 'Checking datasets')
|
165
167
|
o = false
|
@@ -167,7 +169,7 @@ class MiGA::Daemon < MiGA::MiGA
|
|
167
169
|
next unless ds.status == :incomplete
|
168
170
|
next if ds.next_preprocessing(false).nil?
|
169
171
|
|
170
|
-
o = true
|
172
|
+
o = true if ds.ref?
|
171
173
|
queue_job(:d, ds)
|
172
174
|
end
|
173
175
|
o
|
data/lib/miga/dataset.rb
CHANGED
@@ -97,7 +97,10 @@ class MiGA::Dataset < MiGA::MiGA
|
|
97
97
|
|
98
98
|
##
|
99
99
|
# Inactivate a dataset. This halts automated processing by the daemon
|
100
|
-
|
100
|
+
#
|
101
|
+
# If given, the +reason+ string is saved as a metadata +:warn+ entry
|
102
|
+
def inactivate!(reason = nil)
|
103
|
+
metadata[:warn] = "Inactive: #{reason}" unless reason.nil?
|
101
104
|
metadata[:inactive] = true
|
102
105
|
metadata.save
|
103
106
|
pull_hook :on_inactivate
|
@@ -107,6 +110,7 @@ class MiGA::Dataset < MiGA::MiGA
|
|
107
110
|
# Activate a dataset. This removes the +:inactive+ flag
|
108
111
|
def activate!
|
109
112
|
metadata[:inactive] = nil
|
113
|
+
metadata[:warn] = nil if metadata[:warn] && metadata[:warn] =~ /^Inactive: /
|
110
114
|
metadata.save
|
111
115
|
pull_hook :on_activate
|
112
116
|
end
|
data/lib/miga/dataset/base.rb
CHANGED
@@ -35,8 +35,8 @@ module MiGA::Dataset::Base
|
|
35
35
|
mytaxa: '07.annotation/02.taxonomy/01.mytaxa',
|
36
36
|
mytaxa_scan: '07.annotation/03.qa/02.mytaxa_scan',
|
37
37
|
# Distances (for single-species datasets)
|
38
|
-
distances: '09.distances',
|
39
38
|
taxonomy: '09.distances/05.taxonomy',
|
39
|
+
distances: '09.distances',
|
40
40
|
# General statistics
|
41
41
|
stats: '90.stats'
|
42
42
|
}
|
data/lib/miga/dataset/hooks.rb
CHANGED
@@ -52,15 +52,15 @@ module MiGA::Dataset::Hooks
|
|
52
52
|
end
|
53
53
|
|
54
54
|
##
|
55
|
-
# Run +cmd+ in the command-line with {{variables}}:
|
56
|
-
# object (
|
55
|
+
# Run +cmd+ in the command-line with {{variables}}:
|
56
|
+
# dataset, project, project_name, miga, object (if defined for the event)
|
57
57
|
# - +hook_args+: +[cmd]+
|
58
58
|
# - +event_args+: +[object (optional)]+
|
59
59
|
def hook_run_cmd(hook_args, event_args)
|
60
60
|
Process.wait(
|
61
61
|
spawn hook_args.first.miga_variables(
|
62
|
-
dataset: name, project: project.path,
|
63
|
-
object: event_args.first
|
62
|
+
dataset: name, project: project.path, project_name: project.name,
|
63
|
+
miga: MiGA::MiGA.root_path, object: event_args.first
|
64
64
|
)
|
65
65
|
)
|
66
66
|
end
|
data/lib/miga/dataset/result.rb
CHANGED
data/lib/miga/project/hooks.rb
CHANGED
@@ -26,14 +26,15 @@ module MiGA::Project::Hooks
|
|
26
26
|
end
|
27
27
|
|
28
28
|
##
|
29
|
-
# Run +cmd+ in the command-line with {{variables}}:
|
30
|
-
# object (
|
29
|
+
# Run +cmd+ in the command-line with {{variables}}:
|
30
|
+
# project, project_name, miga, object (if defined by the event)
|
31
31
|
# - +hook_args+: +[cmd]+
|
32
32
|
# - +event_args+: +[object (optional)]+
|
33
33
|
def hook_run_cmd(hook_args, event_args)
|
34
34
|
Process.wait(
|
35
35
|
spawn hook_args.first.miga_variables(
|
36
|
-
project: path,
|
36
|
+
project: path, project_name: name,
|
37
|
+
miga: MiGA::MiGA.root_path, object: event_args.first
|
37
38
|
)
|
38
39
|
)
|
39
40
|
end
|
@@ -94,11 +94,13 @@ class MiGA::RemoteDataset
|
|
94
94
|
@timeout_try = 0
|
95
95
|
begin
|
96
96
|
DEBUG 'GET: ' + url
|
97
|
-
open(
|
97
|
+
URI.parse(url).open(read_timeout: 600) { |f| doc = f.read }
|
98
98
|
rescue => e
|
99
99
|
@timeout_try += 1
|
100
100
|
raise e if @timeout_try >= 3
|
101
101
|
|
102
|
+
sleep 5 # <- For: 429 Too Many Requests
|
103
|
+
DEBUG "RETRYING after: #{e}"
|
102
104
|
retry
|
103
105
|
end
|
104
106
|
doc
|
data/lib/miga/result.rb
CHANGED
@@ -45,10 +45,6 @@ class MiGA::Result < MiGA::MiGA
|
|
45
45
|
# Hash with the result metadata
|
46
46
|
attr_reader :data
|
47
47
|
|
48
|
-
##
|
49
|
-
# Array of MiGA::Result objects nested within the result (if any)
|
50
|
-
attr_reader :results
|
51
|
-
|
52
48
|
##
|
53
49
|
# Load or create the MiGA::Result described by the JSON file +path+
|
54
50
|
def initialize(path)
|
@@ -78,9 +74,9 @@ class MiGA::Result < MiGA::MiGA
|
|
78
74
|
when :json
|
79
75
|
@path
|
80
76
|
when :start
|
81
|
-
@path.sub(/\.json$/,
|
77
|
+
@path.sub(/\.json$/, '.start')
|
82
78
|
when :done
|
83
|
-
@path.sub(/\.json$/,
|
79
|
+
@path.sub(/\.json$/, '.done')
|
84
80
|
end
|
85
81
|
end
|
86
82
|
|
@@ -134,7 +130,7 @@ class MiGA::Result < MiGA::MiGA
|
|
134
130
|
##
|
135
131
|
# Initialize and #save empty result
|
136
132
|
def create
|
137
|
-
@data = { created: Time.now.to_s,
|
133
|
+
@data = { created: Time.now.to_s, stats: {}, files: {} }
|
138
134
|
save
|
139
135
|
end
|
140
136
|
|
@@ -156,19 +152,20 @@ class MiGA::Result < MiGA::MiGA
|
|
156
152
|
def load
|
157
153
|
@data = MiGA::Json.parse(path)
|
158
154
|
@data[:files] ||= {}
|
159
|
-
@results = (self[:results] || []).map { |rs| MiGA::Result.new rs }
|
160
155
|
end
|
161
156
|
|
162
157
|
##
|
163
158
|
# Remove result, including all associated files
|
164
159
|
def remove!
|
165
|
-
each_file
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
160
|
+
each_file { |file| FileUtils.rm_rf(File.join(dir, file)) }
|
161
|
+
unlink
|
162
|
+
end
|
163
|
+
|
164
|
+
# Unlink result by removing the .done and .start timestamps and the
|
165
|
+
# .json descriptor, but don't remove any other associated files
|
166
|
+
def unlink
|
167
|
+
%i(start done).each do |i|
|
168
|
+
f = path(i) and File.exists?(f) and File.unlink(f)
|
172
169
|
end
|
173
170
|
File.unlink path
|
174
171
|
end
|
@@ -182,28 +179,19 @@ class MiGA::Result < MiGA::MiGA
|
|
182
179
|
# Note that multiple files may have the same symbol (file_sym), since
|
183
180
|
# arrays of files are supported.
|
184
181
|
def each_file(&blk)
|
182
|
+
return to_enum(:each_file) unless block_given?
|
183
|
+
|
185
184
|
@data[:files] ||= {}
|
186
185
|
self[:files].each do |k, files|
|
187
186
|
files = [files] unless files.kind_of? Array
|
188
187
|
files.each do |file|
|
189
188
|
case blk.arity
|
190
|
-
when 1
|
191
|
-
|
192
|
-
when
|
193
|
-
|
194
|
-
when 3
|
195
|
-
blk.call(k, file, File.expand_path(file, dir))
|
196
|
-
else
|
197
|
-
raise "Wrong number of arguments: #{blk.arity} for 1..3"
|
189
|
+
when 1; blk.call(file)
|
190
|
+
when 2; blk.call(k, file)
|
191
|
+
when 3; blk.call(k, file, File.expand_path(file, dir))
|
192
|
+
else; raise "Wrong number of arguments: #{blk.arity} for 1..3"
|
198
193
|
end
|
199
194
|
end
|
200
195
|
end
|
201
196
|
end
|
202
|
-
|
203
|
-
##
|
204
|
-
# Add the MiGA::Result +result+ as part of the current result
|
205
|
-
def add_result(result)
|
206
|
-
@data[:results] << result.path
|
207
|
-
save
|
208
|
-
end
|
209
197
|
end
|
data/lib/miga/result/stats.rb
CHANGED
@@ -8,6 +8,7 @@ module MiGA::Result::Stats
|
|
8
8
|
# (Re-)calculate and save the statistics for the result
|
9
9
|
def compute_stats
|
10
10
|
method = :"compute_stats_#{key}"
|
11
|
+
MiGA::MiGA.DEBUG "Result(#{key}).compute_stats"
|
11
12
|
stats = self.respond_to?(method, true) ? send(method) : nil
|
12
13
|
unless stats.nil?
|
13
14
|
self[:stats] = stats
|
@@ -109,20 +110,8 @@ module MiGA::Result::Stats
|
|
109
110
|
end
|
110
111
|
end
|
111
112
|
else
|
112
|
-
#
|
113
|
-
|
114
|
-
%w[Archaea Bacteria].include?(tax[:d]) &&
|
115
|
-
file_path(:raw_report).nil?
|
116
|
-
scr = "#{MiGA::MiGA.root_path}/utils/domain-ess-genes.rb"
|
117
|
-
rep = file_path(:report)
|
118
|
-
rc_p = File.expand_path('.miga_rc', ENV['HOME'])
|
119
|
-
rc = File.exist?(rc_p) ? ". '#{rc_p}' && " : ''
|
120
|
-
$stderr.print `#{rc} ruby '#{scr}' \
|
121
|
-
'#{rep}' '#{rep}.domain' '#{tax[:d][0]}'`
|
122
|
-
add_file(:raw_report, "#{source.name}.ess/log")
|
123
|
-
add_file(:report, "#{source.name}.ess/log.domain")
|
124
|
-
end
|
125
|
-
# Extract/compute quality values
|
113
|
+
# Estimate quality metrics
|
114
|
+
fix_essential_genes_by_domain
|
126
115
|
stats = { completeness: [0.0, '%'], contamination: [0.0, '%'] }
|
127
116
|
File.open(file_path(:report), 'r') do |fh|
|
128
117
|
fh.each_line do |ln|
|
@@ -131,6 +120,8 @@ module MiGA::Result::Stats
|
|
131
120
|
end
|
132
121
|
end
|
133
122
|
end
|
123
|
+
|
124
|
+
# Determine qualitative range
|
134
125
|
stats[:quality] = stats[:completeness][0] - stats[:contamination][0] * 5
|
135
126
|
source.metadata[:quality] =
|
136
127
|
case stats[:quality]
|
@@ -140,6 +131,12 @@ module MiGA::Result::Stats
|
|
140
131
|
else; :low
|
141
132
|
end
|
142
133
|
source.save
|
134
|
+
|
135
|
+
# Inactivate low-quality datasets
|
136
|
+
min_qual = (project.metadata[:min_qual] || 50)
|
137
|
+
if min_qual != 'no' && stats[:quality] < min_qual
|
138
|
+
source.inactivate! 'Low genome quality'
|
139
|
+
end
|
143
140
|
end
|
144
141
|
stats
|
145
142
|
end
|
@@ -168,12 +165,28 @@ module MiGA::Result::Stats
|
|
168
165
|
stats[:aai] = [$2.to_f, '%']
|
169
166
|
3.times { fh.gets }
|
170
167
|
fh.each_line do |ln|
|
171
|
-
|
172
|
-
break if row.empty?
|
168
|
+
next unless ln.chomp =~ /^\s*(\S+)\s+(.+)\s+([0-9\.e-]+)\s+\**\s*$/
|
173
169
|
|
174
|
-
stats[:"#{
|
170
|
+
stats[:"#{$1}_pvalue"] = $3.to_f unless $1 == 'root'
|
175
171
|
end
|
176
172
|
end
|
177
173
|
stats
|
178
174
|
end
|
175
|
+
|
176
|
+
# Fix estimates based on essential genes based on taxonomy
|
177
|
+
def fix_essential_genes_by_domain
|
178
|
+
return if (tax = source.metadata[:tax]).nil? ||
|
179
|
+
!%w[Archaea Bacteria].include?(tax[:d]) ||
|
180
|
+
file_path(:raw_report)
|
181
|
+
|
182
|
+
MiGA::MiGA.DEBUG "Fixing essential genes by domain"
|
183
|
+
scr = "#{MiGA::MiGA.root_path}/utils/domain-ess-genes.rb"
|
184
|
+
rep = file_path(:report)
|
185
|
+
rc_p = File.expand_path('.miga_rc', ENV['HOME'])
|
186
|
+
rc = File.exist?(rc_p) ? ". '#{rc_p}' && " : ''
|
187
|
+
$stderr.print `#{rc} ruby '#{scr}' \
|
188
|
+
'#{rep}' '#{rep}.domain' '#{tax[:d][0]}'`
|
189
|
+
add_file(:raw_report, "#{source.name}.ess/log")
|
190
|
+
add_file(:report, "#{source.name}.ess/log.domain")
|
191
|
+
end
|
179
192
|
end
|
data/lib/miga/version.rb
CHANGED
@@ -8,7 +8,7 @@ module MiGA
|
|
8
8
|
# - Float representing the major.minor version.
|
9
9
|
# - Integer representing gem releases of the current version.
|
10
10
|
# - Integer representing minor changes that require new version number.
|
11
|
-
VERSION = [0.7,
|
11
|
+
VERSION = [0.7, 10, 0]
|
12
12
|
|
13
13
|
##
|
14
14
|
# Nickname for the current major.minor version.
|
@@ -16,7 +16,7 @@ module MiGA
|
|
16
16
|
|
17
17
|
##
|
18
18
|
# Date of the current gem release.
|
19
|
-
VERSION_DATE = Date.new(2020,
|
19
|
+
VERSION_DATE = Date.new(2020, 6, 29)
|
20
20
|
|
21
21
|
##
|
22
22
|
# Reference of MiGA.
|
data/scripts/distances.bash
CHANGED
@@ -9,7 +9,12 @@ cd "$PROJECT/data/09.distances"
|
|
9
9
|
# Initialize
|
10
10
|
miga date > "$DATASET.start"
|
11
11
|
|
12
|
-
#
|
12
|
+
# Check quality first
|
13
|
+
miga stats -P "$PROJECT" -D "$DATASET" -r essential_genes --compute-and-save
|
14
|
+
inactive=$(miga ls -P "$PROJECT" -D "$DATASET" -m inactive | cut -f 2)
|
15
|
+
[[ "$inactive" == "true" ]] && exit
|
16
|
+
|
17
|
+
# Run distances
|
13
18
|
ruby -I "$MIGA/lib" "$MIGA/utils/distances.rb" "$PROJECT" "$DATASET"
|
14
19
|
|
15
20
|
# Finalize
|
data/scripts/project_stats.bash
CHANGED
data/scripts/stats.bash
CHANGED
@@ -14,7 +14,7 @@ miga date > "$DATASET.start"
|
|
14
14
|
# Calculate statistics
|
15
15
|
for i in raw_reads trimmed_fasta assembly cds essential_genes ssu distances taxonomy ; do
|
16
16
|
echo "# $i"
|
17
|
-
miga stats --compute-and-save -P "$PROJECT" -D "$DATASET" -r $i
|
17
|
+
miga stats --compute-and-save --ignore-empty -P "$PROJECT" -D "$DATASET" -r $i
|
18
18
|
done
|
19
19
|
|
20
20
|
# Finalize
|
data/test/daemon_test.rb
CHANGED
@@ -93,7 +93,7 @@ class DaemonTest < Test::Unit::TestCase
|
|
93
93
|
0 => /-{20}\n/,
|
94
94
|
1 => /MiGA:#{p.name} launched/,
|
95
95
|
2 => /-{20}\n/,
|
96
|
-
|
96
|
+
6 => /Probing running jobs\n/
|
97
97
|
}.each { |k, v| assert_match(v, l[k], "unexpected line: #{k}") }
|
98
98
|
ensure
|
99
99
|
begin
|
data/test/dataset_test.rb
CHANGED
@@ -185,11 +185,18 @@ class DatasetTest < Test::Unit::TestCase
|
|
185
185
|
d = dataset
|
186
186
|
assert_equal(:incomplete, d.status)
|
187
187
|
assert_predicate(d, :active?)
|
188
|
-
d.inactivate!
|
188
|
+
d.inactivate! 'Too annoying'
|
189
189
|
assert_equal(:inactive, d.status)
|
190
|
+
assert_equal('Inactive: Too annoying', d.metadata[:warn])
|
190
191
|
assert_not_predicate(d, :active?)
|
191
192
|
d.activate!
|
192
193
|
assert_equal(:incomplete, d.status)
|
194
|
+
assert_nil(d.metadata[:warn])
|
193
195
|
assert_predicate(d, :active?)
|
194
196
|
end
|
197
|
+
|
198
|
+
def test_preprocessing_tasks
|
199
|
+
assert_include(MiGA::Dataset.PREPROCESSING_TASKS, :cds)
|
200
|
+
assert_include(MiGA::Dataset.PREPROCESSING_TASKS, :taxonomy)
|
201
|
+
end
|
195
202
|
end
|
data/test/remote_dataset_test.rb
CHANGED
@@ -31,13 +31,15 @@ class RemoteDatasetTest < Test::Unit::TestCase
|
|
31
31
|
assert_equal(MiGA::Taxonomy, tx.class, msg)
|
32
32
|
assert_equal('Lentivirus', tx[:g], msg)
|
33
33
|
assert_equal(
|
34
|
-
'ns:ncbi
|
35
|
-
'g:Lentivirus
|
34
|
+
'ns:ncbi k:Pararnavirae p:Artverviricota c:Revtraviricetes ' \
|
35
|
+
'o:Ortervirales f:Retroviridae g:Lentivirus ' \
|
36
|
+
's:Human_immunodeficiency_virus_2',
|
36
37
|
tx.to_s, msg
|
37
38
|
)
|
38
39
|
assert_equal(
|
39
|
-
'ns:ncbi d: k: p: c:
|
40
|
-
'
|
40
|
+
'ns:ncbi d: k:Pararnavirae p:Artverviricota c:Revtraviricetes ' \
|
41
|
+
'o:Ortervirales f:Retroviridae g:Lentivirus ' \
|
42
|
+
's:Human_immunodeficiency_virus_2 ssp: str: ds:',
|
41
43
|
tx.to_s(true), msg
|
42
44
|
)
|
43
45
|
assert_equal('ncbi', tx.namespace, msg)
|
@@ -99,7 +101,7 @@ class RemoteDatasetTest < Test::Unit::TestCase
|
|
99
101
|
|
100
102
|
def test_ref_type_status
|
101
103
|
declare_remote_access
|
102
|
-
rd = MiGA::RemoteDataset.new('
|
104
|
+
rd = MiGA::RemoteDataset.new('GCA_003144295.1', :assembly, :ncbi)
|
103
105
|
assert { !rd.get_metadata[:is_type] }
|
104
106
|
assert { rd.get_metadata[:is_ref_type] }
|
105
107
|
end
|
data/utils/adapters.fa
CHANGED
@@ -300,3 +300,16 @@ AGATCGGAAGAGCGGTTCAGCAGGAATGCCGAG
|
|
300
300
|
AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC
|
301
301
|
>TruSeq3_UniversalAdapter
|
302
302
|
AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTA
|
303
|
+
|
304
|
+
>Nextera_PE_PrefixNX/1
|
305
|
+
AGATGTGTATAAGAGACAG
|
306
|
+
>Nextera_PE_PrefixNX/2
|
307
|
+
AGATGTGTATAAGAGACAG
|
308
|
+
>Nextera_PE_Trans1
|
309
|
+
TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG
|
310
|
+
>Nextera_PE_Trans1_rc
|
311
|
+
CTGTCTCTTATACACATCTGACGCTGCCGACGA
|
312
|
+
>Nextera_PE_Trans2
|
313
|
+
GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG
|
314
|
+
>Nextera_PE_Trans2_rc
|
315
|
+
CTGTCTCTTATACACATCTCCGAGCCCACGAGAC
|
data/utils/distance/pipeline.rb
CHANGED
@@ -29,6 +29,16 @@ module MiGA::DistanceRunner::Pipeline
|
|
29
29
|
classify(clades, classif, metric, result_fh, val_cls)
|
30
30
|
end
|
31
31
|
|
32
|
+
# Run distances against datasets listed in metadata's +:dist_req+
|
33
|
+
def distances_by_request(metric)
|
34
|
+
return unless dataset.metadata[:dist_req]
|
35
|
+
|
36
|
+
$stderr.puts 'Running distances by request'
|
37
|
+
dataset.metadata[:dist_req].each do |target|
|
38
|
+
ds = ref_project.dataset(target) and send(metric, ds)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
32
42
|
# Builds a tree with all visited medoids from any classification level
|
33
43
|
def build_medoids_tree(metric)
|
34
44
|
$stderr.puts "Building medoids tree (metric = #{metric})"
|
@@ -99,7 +109,7 @@ module MiGA::DistanceRunner::Pipeline
|
|
99
109
|
|
100
110
|
# Transfer the taxonomy to the current dataset
|
101
111
|
def transfer_taxonomy(tax)
|
102
|
-
$stderr.puts
|
112
|
+
$stderr.puts 'Transferring taxonomy'
|
103
113
|
return if tax.nil?
|
104
114
|
|
105
115
|
pval = (project.metadata[:tax_pvalue] || 0.05).to_f
|
data/utils/distance/runner.rb
CHANGED
@@ -67,7 +67,7 @@ class MiGA::DistanceRunner
|
|
67
67
|
|
68
68
|
# Launch analysis for reference datasets
|
69
69
|
def go_ref!
|
70
|
-
$stderr.puts
|
70
|
+
$stderr.puts 'Launching analysis for reference dataset'
|
71
71
|
# Initialize databases
|
72
72
|
initialize_dbs! true
|
73
73
|
|
@@ -80,13 +80,13 @@ class MiGA::DistanceRunner
|
|
80
80
|
end
|
81
81
|
|
82
82
|
# Finalize
|
83
|
-
[
|
83
|
+
%i[haai aai ani].each { |m| checkpoint! m if db_counts[m] > 0 }
|
84
84
|
end
|
85
85
|
|
86
86
|
##
|
87
87
|
# Launch analysis for query datasets
|
88
88
|
def go_query!
|
89
|
-
$stderr.puts
|
89
|
+
$stderr.puts 'Launching analysis for query dataset'
|
90
90
|
# Check if project is ready
|
91
91
|
tsk = ref_project.is_clade? ? [:subclades, :ani] : [:clade_finding, :aai]
|
92
92
|
res = ref_project.result(tsk[0])
|
@@ -94,6 +94,7 @@ class MiGA::DistanceRunner
|
|
94
94
|
|
95
95
|
# Initialize the databases
|
96
96
|
initialize_dbs! false
|
97
|
+
distances_by_request(tsk[1])
|
97
98
|
# Calculate the classification-informed AAI/ANI traverse
|
98
99
|
results = File.expand_path("#{dataset.name}.#{tsk[1]}-medoids.tsv", home)
|
99
100
|
fh = File.open(results, 'w')
|
@@ -111,7 +112,9 @@ class MiGA::DistanceRunner
|
|
111
112
|
next unless r[1].to_i == val_cls
|
112
113
|
|
113
114
|
ani = ani_after_aai(ref_project.dataset(r[0]), 80.0)
|
114
|
-
|
115
|
+
unless ani.nil? || ani < closest[:ani]
|
116
|
+
closest = { ds: r[0], ani: ani }
|
117
|
+
end
|
115
118
|
end
|
116
119
|
end
|
117
120
|
end
|
@@ -133,7 +136,7 @@ class MiGA::DistanceRunner
|
|
133
136
|
|
134
137
|
# Launch analysis for taxonomy jobs
|
135
138
|
def go_taxonomy!
|
136
|
-
$stderr.puts
|
139
|
+
$stderr.puts 'Launching taxonomy analysis'
|
137
140
|
return unless project.metadata[:ref_project]
|
138
141
|
|
139
142
|
go_query! # <- yeah, it's actually the same, just different ref_project
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.10.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-06-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|
@@ -44,14 +44,14 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '1.
|
47
|
+
version: '1.3'
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '1.
|
54
|
+
version: '1.3'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: rake
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -529,7 +529,7 @@ homepage: http://enve-omics.ce.gatech.edu/miga
|
|
529
529
|
licenses:
|
530
530
|
- Artistic-2.0
|
531
531
|
metadata: {}
|
532
|
-
post_install_message:
|
532
|
+
post_install_message:
|
533
533
|
rdoc_options:
|
534
534
|
- lib
|
535
535
|
- README.md
|
@@ -550,8 +550,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
550
550
|
- !ruby/object:Gem::Version
|
551
551
|
version: '0'
|
552
552
|
requirements: []
|
553
|
-
rubygems_version: 3.
|
554
|
-
signing_key:
|
553
|
+
rubygems_version: 3.1.2
|
554
|
+
signing_key:
|
555
555
|
specification_version: 4
|
556
556
|
summary: MiGA
|
557
557
|
test_files: []
|