miga-base 0.7.9.0 → 0.7.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/lib/miga/cli/action/classify_wf.rb +3 -1
- data/lib/miga/cli/action/edit.rb +9 -6
- data/lib/miga/cli/action/quality_wf.rb +4 -1
- data/lib/miga/cli/action/wf.rb +10 -3
- data/lib/miga/daemon.rb +5 -3
- data/lib/miga/dataset.rb +5 -1
- data/lib/miga/dataset/base.rb +1 -1
- data/lib/miga/dataset/hooks.rb +4 -4
- data/lib/miga/dataset/result.rb +1 -1
- data/lib/miga/project/hooks.rb +4 -3
- data/lib/miga/remote_dataset/download.rb +2 -1
- data/lib/miga/result.rb +3 -1
- data/lib/miga/result/stats.rb +28 -14
- data/lib/miga/version.rb +2 -2
- data/scripts/distances.bash +6 -1
- data/test/daemon_test.rb +1 -1
- data/test/dataset_test.rb +3 -1
- data/test/remote_dataset_test.rb +1 -1
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 30d61eb5847a28c4d83a43e6e47ff0738bb819218e677b9aa43de158b441e0ae
|
4
|
+
data.tar.gz: 46338ae15353b71fb6e7eff390c8bb976d1c11fc296bb83f33aab8ffba1a3fa8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 488e7888039bb9e08e7c257fdd7cb0cf34340766f73b9b46c28b332072f5f207f5dcfb0df08c98b27f5640158d3a14ae6d0f5ab19d78f3e9e1ef44e381d34e74
|
7
|
+
data.tar.gz: 992cfc225eeb2f4b8017260e66c67bc9df39a79757ecdb65e01766079e103385259c1eeb447fdc5e6b769990f96256d384dc4adddf184693a3bc94b8323b9a5a
|
data/README.md
CHANGED
@@ -41,6 +41,7 @@ Developed and maintained by [Luis M. Rodriguez-R][lrr]. MiGA is the result of a
|
|
41
41
|
collaboration between [Kostas Lab][kostas] at the Georgia Institute of
|
42
42
|
Technology and [RDP][rdp] at Michigan State University.
|
43
43
|
|
44
|
+
See also the [complete list of contributors](manual/part1/contributors.md).
|
44
45
|
|
45
46
|
# License
|
46
47
|
|
@@ -42,7 +42,9 @@ class MiGA::Cli::Action::ClassifyWf < MiGA::Cli::Action
|
|
42
42
|
'--no-summaries',
|
43
43
|
'Do not generate intermediate step summaries'
|
44
44
|
) { |v| cli[:summaries] = v }
|
45
|
-
opts_for_wf(
|
45
|
+
opts_for_wf(
|
46
|
+
opt, 'Input genome assemblies (nucleotides, FastA)', qual: false
|
47
|
+
)
|
46
48
|
end
|
47
49
|
end
|
48
50
|
|
data/lib/miga/cli/action/edit.rb
CHANGED
@@ -17,18 +17,21 @@ class MiGA::Cli::Action::Edit < MiGA::Cli::Action
|
|
17
17
|
'Activate dataset; requires -D'
|
18
18
|
) { |v| cli[:activate] = v }
|
19
19
|
opt.on(
|
20
|
-
'--inactivate',
|
21
|
-
'Inactivate dataset; requires -D'
|
22
|
-
|
20
|
+
'--inactivate [reason]',
|
21
|
+
'Inactivate dataset; requires -D',
|
22
|
+
'The argument is optional: reason to inactivate dataset'
|
23
|
+
) { |v| cli[:activate] = false ; cli[:reason] = v }
|
23
24
|
end
|
24
25
|
end
|
25
26
|
|
26
27
|
def perform
|
27
28
|
obj = cli.load_project_or_dataset
|
28
29
|
unless cli[:activate].nil?
|
29
|
-
cli.ensure_par(
|
30
|
-
|
31
|
-
|
30
|
+
cli.ensure_par(
|
31
|
+
{ dataset: '-D' },
|
32
|
+
'%<name>s is mandatory with --[in-]activate: please provide %<flag>s'
|
33
|
+
)
|
34
|
+
cli[:activate] ? obj.activate! : obj.inactivate!(cli[:reason])
|
32
35
|
end
|
33
36
|
cli.add_metadata(obj)
|
34
37
|
obj.save
|
@@ -15,7 +15,10 @@ class MiGA::Cli::Action::QualityWf < MiGA::Cli::Action
|
|
15
15
|
'-m', '--mytaxa-scan',
|
16
16
|
'Perform MyTaxa scan analysis'
|
17
17
|
) { |v| cli[:mytaxa] = v }
|
18
|
-
opts_for_wf(
|
18
|
+
opts_for_wf(
|
19
|
+
opt, 'Input genome assemblies (nucleotides, FastA)',
|
20
|
+
qual: false
|
21
|
+
)
|
19
22
|
end
|
20
23
|
end
|
21
24
|
|
data/lib/miga/cli/action/wf.rb
CHANGED
@@ -15,7 +15,7 @@ module MiGA::Cli::Action::Wf
|
|
15
15
|
|
16
16
|
def opts_for_wf(opt, files_desc, params = {})
|
17
17
|
{
|
18
|
-
multi: false, cleanup: true, project_type: false, ncbi: true
|
18
|
+
multi: false, cleanup: true, project_type: false, ncbi: true, qual: true
|
19
19
|
}.each { |k, v| params[k] = v if params[k].nil? }
|
20
20
|
opt.on(
|
21
21
|
'-o', '--out_dir PATH',
|
@@ -40,6 +40,13 @@ module MiGA::Cli::Action::Wf
|
|
40
40
|
'Only download complete genomes, not drafts'
|
41
41
|
) { |v| cli[:ncbi_draft] = v }
|
42
42
|
end
|
43
|
+
if params[:qual]
|
44
|
+
opt.on(
|
45
|
+
'--min-qual FLOAT', Float,
|
46
|
+
'Minimum genome quality to include in analysis',
|
47
|
+
'By default: 50.0'
|
48
|
+
) { |v| cli[:min_qual] = v }
|
49
|
+
end
|
43
50
|
if params[:cleanup]
|
44
51
|
opt.on(
|
45
52
|
'-c', '--clean',
|
@@ -125,7 +132,7 @@ module MiGA::Cli::Action::Wf
|
|
125
132
|
]) unless MiGA::Project.exist? cli[:outdir]
|
126
133
|
# Define project metadata
|
127
134
|
p = cli.load_project(:outdir, '-o')
|
128
|
-
[
|
135
|
+
%i[haai_p aai_p ani_p ess_coll min_qual].each { |i| p_metadata[i] = cli[i] }
|
129
136
|
p_metadata[:type] = cli[:project_type]
|
130
137
|
transfer_metadata(p, p_metadata)
|
131
138
|
# Download datasets
|
@@ -159,7 +166,7 @@ module MiGA::Cli::Action::Wf
|
|
159
166
|
'-P', cli[:outdir],
|
160
167
|
'-r', r,
|
161
168
|
'-o', File.expand_path("#{r}.tsv", cli[:outdir]),
|
162
|
-
'--tab'
|
169
|
+
'--tab', '--ref', '--active'
|
163
170
|
])
|
164
171
|
end
|
165
172
|
end
|
data/lib/miga/daemon.rb
CHANGED
@@ -72,6 +72,7 @@ class MiGA::Daemon < MiGA::MiGA
|
|
72
72
|
say '-----------------------------------'
|
73
73
|
say 'MiGA:%s launched' % project.name
|
74
74
|
say '-----------------------------------'
|
75
|
+
recalculate_status!
|
75
76
|
load_status
|
76
77
|
say 'Configuration options:'
|
77
78
|
say @runopts.to_s
|
@@ -99,6 +100,7 @@ class MiGA::Daemon < MiGA::MiGA
|
|
99
100
|
end
|
100
101
|
|
101
102
|
def recalculate_status!
|
103
|
+
say 'Recalculating status for all datasets'
|
102
104
|
project.each_dataset(&:recalculate_status)
|
103
105
|
end
|
104
106
|
|
@@ -158,8 +160,8 @@ class MiGA::Daemon < MiGA::MiGA
|
|
158
160
|
end
|
159
161
|
|
160
162
|
##
|
161
|
-
# Traverse datasets, and returns boolean indicating if at any
|
162
|
-
# are incomplete
|
163
|
+
# Traverse datasets, and returns boolean indicating if at any reference
|
164
|
+
# datasets are incomplete
|
163
165
|
def check_datasets
|
164
166
|
l_say(2, 'Checking datasets')
|
165
167
|
o = false
|
@@ -167,7 +169,7 @@ class MiGA::Daemon < MiGA::MiGA
|
|
167
169
|
next unless ds.status == :incomplete
|
168
170
|
next if ds.next_preprocessing(false).nil?
|
169
171
|
|
170
|
-
o = true
|
172
|
+
o = true if ds.ref?
|
171
173
|
queue_job(:d, ds)
|
172
174
|
end
|
173
175
|
o
|
data/lib/miga/dataset.rb
CHANGED
@@ -97,7 +97,10 @@ class MiGA::Dataset < MiGA::MiGA
|
|
97
97
|
|
98
98
|
##
|
99
99
|
# Inactivate a dataset. This halts automated processing by the daemon
|
100
|
-
|
100
|
+
#
|
101
|
+
# If given, the +reason+ string is saved as a metadata +:warn+ entry
|
102
|
+
def inactivate!(reason = nil)
|
103
|
+
metadata[:warn] = "Inactive: #{reason}" unless reason.nil?
|
101
104
|
metadata[:inactive] = true
|
102
105
|
metadata.save
|
103
106
|
pull_hook :on_inactivate
|
@@ -107,6 +110,7 @@ class MiGA::Dataset < MiGA::MiGA
|
|
107
110
|
# Activate a dataset. This removes the +:inactive+ flag
|
108
111
|
def activate!
|
109
112
|
metadata[:inactive] = nil
|
113
|
+
metadata[:warn] = nil if metadata[:warn] && metadata[:warn] =~ /^Inactive: /
|
110
114
|
metadata.save
|
111
115
|
pull_hook :on_activate
|
112
116
|
end
|
data/lib/miga/dataset/base.rb
CHANGED
@@ -35,8 +35,8 @@ module MiGA::Dataset::Base
|
|
35
35
|
mytaxa: '07.annotation/02.taxonomy/01.mytaxa',
|
36
36
|
mytaxa_scan: '07.annotation/03.qa/02.mytaxa_scan',
|
37
37
|
# Distances (for single-species datasets)
|
38
|
-
distances: '09.distances',
|
39
38
|
taxonomy: '09.distances/05.taxonomy',
|
39
|
+
distances: '09.distances',
|
40
40
|
# General statistics
|
41
41
|
stats: '90.stats'
|
42
42
|
}
|
data/lib/miga/dataset/hooks.rb
CHANGED
@@ -52,15 +52,15 @@ module MiGA::Dataset::Hooks
|
|
52
52
|
end
|
53
53
|
|
54
54
|
##
|
55
|
-
# Run +cmd+ in the command-line with {{variables}}:
|
56
|
-
# object (
|
55
|
+
# Run +cmd+ in the command-line with {{variables}}:
|
56
|
+
# dataset, project, project_name, miga, object (if defined for the event)
|
57
57
|
# - +hook_args+: +[cmd]+
|
58
58
|
# - +event_args+: +[object (optional)]+
|
59
59
|
def hook_run_cmd(hook_args, event_args)
|
60
60
|
Process.wait(
|
61
61
|
spawn hook_args.first.miga_variables(
|
62
|
-
dataset: name, project: project.path,
|
63
|
-
object: event_args.first
|
62
|
+
dataset: name, project: project.path, project_name: project.name,
|
63
|
+
miga: MiGA::MiGA.root_path, object: event_args.first
|
64
64
|
)
|
65
65
|
)
|
66
66
|
end
|
data/lib/miga/dataset/result.rb
CHANGED
data/lib/miga/project/hooks.rb
CHANGED
@@ -26,14 +26,15 @@ module MiGA::Project::Hooks
|
|
26
26
|
end
|
27
27
|
|
28
28
|
##
|
29
|
-
# Run +cmd+ in the command-line with {{variables}}:
|
30
|
-
# object (
|
29
|
+
# Run +cmd+ in the command-line with {{variables}}:
|
30
|
+
# project, project_name, miga, object (if defined by the event)
|
31
31
|
# - +hook_args+: +[cmd]+
|
32
32
|
# - +event_args+: +[object (optional)]+
|
33
33
|
def hook_run_cmd(hook_args, event_args)
|
34
34
|
Process.wait(
|
35
35
|
spawn hook_args.first.miga_variables(
|
36
|
-
project: path,
|
36
|
+
project: path, project_name: name,
|
37
|
+
miga: MiGA::MiGA.root_path, object: event_args.first
|
37
38
|
)
|
38
39
|
)
|
39
40
|
end
|
@@ -94,12 +94,13 @@ class MiGA::RemoteDataset
|
|
94
94
|
@timeout_try = 0
|
95
95
|
begin
|
96
96
|
DEBUG 'GET: ' + url
|
97
|
-
open(
|
97
|
+
URI.parse(url).open(read_timeout: 600) { |f| doc = f.read }
|
98
98
|
rescue => e
|
99
99
|
@timeout_try += 1
|
100
100
|
raise e if @timeout_try >= 3
|
101
101
|
|
102
102
|
sleep 5 # <- For: 429 Too Many Requests
|
103
|
+
DEBUG "RETRYING after: #{e}"
|
103
104
|
retry
|
104
105
|
end
|
105
106
|
doc
|
data/lib/miga/result.rb
CHANGED
@@ -164,7 +164,9 @@ class MiGA::Result < MiGA::MiGA
|
|
164
164
|
# Unlink result by removing the .done and .start timestamps and the
|
165
165
|
# .json descriptor, but don't remove any other associated files
|
166
166
|
def unlink
|
167
|
-
%i(start done).each
|
167
|
+
%i(start done).each do |i|
|
168
|
+
f = path(i) and File.exists?(f) and File.unlink(f)
|
169
|
+
end
|
168
170
|
File.unlink path
|
169
171
|
end
|
170
172
|
|
data/lib/miga/result/stats.rb
CHANGED
@@ -8,6 +8,7 @@ module MiGA::Result::Stats
|
|
8
8
|
# (Re-)calculate and save the statistics for the result
|
9
9
|
def compute_stats
|
10
10
|
method = :"compute_stats_#{key}"
|
11
|
+
MiGA::MiGA.DEBUG "Result(#{key}).compute_stats"
|
11
12
|
stats = self.respond_to?(method, true) ? send(method) : nil
|
12
13
|
unless stats.nil?
|
13
14
|
self[:stats] = stats
|
@@ -109,20 +110,8 @@ module MiGA::Result::Stats
|
|
109
110
|
end
|
110
111
|
end
|
111
112
|
else
|
112
|
-
#
|
113
|
-
|
114
|
-
%w[Archaea Bacteria].include?(tax[:d]) &&
|
115
|
-
file_path(:raw_report).nil?
|
116
|
-
scr = "#{MiGA::MiGA.root_path}/utils/domain-ess-genes.rb"
|
117
|
-
rep = file_path(:report)
|
118
|
-
rc_p = File.expand_path('.miga_rc', ENV['HOME'])
|
119
|
-
rc = File.exist?(rc_p) ? ". '#{rc_p}' && " : ''
|
120
|
-
$stderr.print `#{rc} ruby '#{scr}' \
|
121
|
-
'#{rep}' '#{rep}.domain' '#{tax[:d][0]}'`
|
122
|
-
add_file(:raw_report, "#{source.name}.ess/log")
|
123
|
-
add_file(:report, "#{source.name}.ess/log.domain")
|
124
|
-
end
|
125
|
-
# Extract/compute quality values
|
113
|
+
# Estimate quality metrics
|
114
|
+
fix_essential_genes_by_domain
|
126
115
|
stats = { completeness: [0.0, '%'], contamination: [0.0, '%'] }
|
127
116
|
File.open(file_path(:report), 'r') do |fh|
|
128
117
|
fh.each_line do |ln|
|
@@ -131,6 +120,8 @@ module MiGA::Result::Stats
|
|
131
120
|
end
|
132
121
|
end
|
133
122
|
end
|
123
|
+
|
124
|
+
# Determine qualitative range
|
134
125
|
stats[:quality] = stats[:completeness][0] - stats[:contamination][0] * 5
|
135
126
|
source.metadata[:quality] =
|
136
127
|
case stats[:quality]
|
@@ -140,6 +131,12 @@ module MiGA::Result::Stats
|
|
140
131
|
else; :low
|
141
132
|
end
|
142
133
|
source.save
|
134
|
+
|
135
|
+
# Inactivate low-quality datasets
|
136
|
+
min_qual = (project.metadata[:min_qual] || 50)
|
137
|
+
if min_qual != 'no' && stats[:quality] < min_qual
|
138
|
+
source.inactivate! 'Low genome quality'
|
139
|
+
end
|
143
140
|
end
|
144
141
|
stats
|
145
142
|
end
|
@@ -175,4 +172,21 @@ module MiGA::Result::Stats
|
|
175
172
|
end
|
176
173
|
stats
|
177
174
|
end
|
175
|
+
|
176
|
+
# Fix estimates based on essential genes based on taxonomy
|
177
|
+
def fix_essential_genes_by_domain
|
178
|
+
return if (tax = source.metadata[:tax]).nil? ||
|
179
|
+
!%w[Archaea Bacteria].include?(tax[:d]) ||
|
180
|
+
file_path(:raw_report)
|
181
|
+
|
182
|
+
MiGA::MiGA.DEBUG "Fixing essential genes by domain"
|
183
|
+
scr = "#{MiGA::MiGA.root_path}/utils/domain-ess-genes.rb"
|
184
|
+
rep = file_path(:report)
|
185
|
+
rc_p = File.expand_path('.miga_rc', ENV['HOME'])
|
186
|
+
rc = File.exist?(rc_p) ? ". '#{rc_p}' && " : ''
|
187
|
+
$stderr.print `#{rc} ruby '#{scr}' \
|
188
|
+
'#{rep}' '#{rep}.domain' '#{tax[:d][0]}'`
|
189
|
+
add_file(:raw_report, "#{source.name}.ess/log")
|
190
|
+
add_file(:report, "#{source.name}.ess/log.domain")
|
191
|
+
end
|
178
192
|
end
|
data/lib/miga/version.rb
CHANGED
@@ -8,7 +8,7 @@ module MiGA
|
|
8
8
|
# - Float representing the major.minor version.
|
9
9
|
# - Integer representing gem releases of the current version.
|
10
10
|
# - Integer representing minor changes that require new version number.
|
11
|
-
VERSION = [0.7,
|
11
|
+
VERSION = [0.7, 10, 0]
|
12
12
|
|
13
13
|
##
|
14
14
|
# Nickname for the current major.minor version.
|
@@ -16,7 +16,7 @@ module MiGA
|
|
16
16
|
|
17
17
|
##
|
18
18
|
# Date of the current gem release.
|
19
|
-
VERSION_DATE = Date.new(2020, 6,
|
19
|
+
VERSION_DATE = Date.new(2020, 6, 29)
|
20
20
|
|
21
21
|
##
|
22
22
|
# Reference of MiGA.
|
data/scripts/distances.bash
CHANGED
@@ -9,7 +9,12 @@ cd "$PROJECT/data/09.distances"
|
|
9
9
|
# Initialize
|
10
10
|
miga date > "$DATASET.start"
|
11
11
|
|
12
|
-
#
|
12
|
+
# Check quality first
|
13
|
+
miga stats -P "$PROJECT" -D "$DATASET" -r essential_genes --compute-and-save
|
14
|
+
inactive=$(miga ls -P "$PROJECT" -D "$DATASET" -m inactive | cut -f 2)
|
15
|
+
[[ "$inactive" == "true" ]] && exit
|
16
|
+
|
17
|
+
# Run distances
|
13
18
|
ruby -I "$MIGA/lib" "$MIGA/utils/distances.rb" "$PROJECT" "$DATASET"
|
14
19
|
|
15
20
|
# Finalize
|
data/test/daemon_test.rb
CHANGED
@@ -93,7 +93,7 @@ class DaemonTest < Test::Unit::TestCase
|
|
93
93
|
0 => /-{20}\n/,
|
94
94
|
1 => /MiGA:#{p.name} launched/,
|
95
95
|
2 => /-{20}\n/,
|
96
|
-
|
96
|
+
6 => /Probing running jobs\n/
|
97
97
|
}.each { |k, v| assert_match(v, l[k], "unexpected line: #{k}") }
|
98
98
|
ensure
|
99
99
|
begin
|
data/test/dataset_test.rb
CHANGED
@@ -185,11 +185,13 @@ class DatasetTest < Test::Unit::TestCase
|
|
185
185
|
d = dataset
|
186
186
|
assert_equal(:incomplete, d.status)
|
187
187
|
assert_predicate(d, :active?)
|
188
|
-
d.inactivate!
|
188
|
+
d.inactivate! 'Too annoying'
|
189
189
|
assert_equal(:inactive, d.status)
|
190
|
+
assert_equal('Inactive: Too annoying', d.metadata[:warn])
|
190
191
|
assert_not_predicate(d, :active?)
|
191
192
|
d.activate!
|
192
193
|
assert_equal(:incomplete, d.status)
|
194
|
+
assert_nil(d.metadata[:warn])
|
193
195
|
assert_predicate(d, :active?)
|
194
196
|
end
|
195
197
|
|
data/test/remote_dataset_test.rb
CHANGED
@@ -101,7 +101,7 @@ class RemoteDatasetTest < Test::Unit::TestCase
|
|
101
101
|
|
102
102
|
def test_ref_type_status
|
103
103
|
declare_remote_access
|
104
|
-
rd = MiGA::RemoteDataset.new('
|
104
|
+
rd = MiGA::RemoteDataset.new('GCA_003144295.1', :assembly, :ncbi)
|
105
105
|
assert { !rd.get_metadata[:is_type] }
|
106
106
|
assert { rd.get_metadata[:is_ref_type] }
|
107
107
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.10.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-06-
|
11
|
+
date: 2020-06-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|
@@ -529,7 +529,7 @@ homepage: http://enve-omics.ce.gatech.edu/miga
|
|
529
529
|
licenses:
|
530
530
|
- Artistic-2.0
|
531
531
|
metadata: {}
|
532
|
-
post_install_message:
|
532
|
+
post_install_message:
|
533
533
|
rdoc_options:
|
534
534
|
- lib
|
535
535
|
- README.md
|
@@ -550,8 +550,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
550
550
|
- !ruby/object:Gem::Version
|
551
551
|
version: '0'
|
552
552
|
requirements: []
|
553
|
-
rubygems_version: 3.
|
554
|
-
signing_key:
|
553
|
+
rubygems_version: 3.1.2
|
554
|
+
signing_key:
|
555
555
|
specification_version: 4
|
556
556
|
summary: MiGA
|
557
557
|
test_files: []
|