miga-base 0.7.23.0 → 0.7.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +3 -0
- data/Rakefile +1 -0
- data/lib/miga/cli/action/add.rb +1 -2
- data/lib/miga/cli/action/classify_wf.rb +12 -11
- data/lib/miga/cli/action/derep_wf.rb +3 -9
- data/lib/miga/cli/action/edit.rb +0 -1
- data/lib/miga/cli/action/find.rb +1 -1
- data/lib/miga/cli/action/generic.rb +1 -1
- data/lib/miga/cli/action/get.rb +7 -2
- data/lib/miga/cli/action/ncbi_get.rb +1 -1
- data/lib/miga/cli/action/new.rb +15 -9
- data/lib/miga/cli/action/option.rb +44 -0
- data/lib/miga/cli/action/quality_wf.rb +3 -3
- data/lib/miga/cli/action/tax_dist.rb +1 -1
- data/lib/miga/cli/action/tax_test.rb +1 -1
- data/lib/miga/cli/action/wf.rb +32 -30
- data/lib/miga/cli/base.rb +1 -0
- data/lib/miga/cli/objects_helper.rb +23 -18
- data/lib/miga/common.rb +1 -1
- data/lib/miga/common/with_option.rb +83 -0
- data/lib/miga/common/with_result.rb +2 -1
- data/lib/miga/dataset/base.rb +20 -2
- data/lib/miga/dataset/result.rb +1 -1
- data/lib/miga/metadata.rb +25 -13
- data/lib/miga/project/base.rb +82 -2
- data/lib/miga/project/result.rb +4 -4
- data/lib/miga/result/stats.rb +2 -2
- data/lib/miga/version.rb +2 -2
- data/scripts/essential_genes.bash +1 -2
- data/scripts/ogs.bash +2 -3
- data/test/dataset_test.rb +5 -5
- data/test/with_option_test.rb +115 -0
- data/utils/cleanup-databases.rb +1 -2
- data/utils/distance/commands.rb +2 -2
- data/utils/distance/database.rb +1 -1
- data/utils/distance/pipeline.rb +2 -4
- data/utils/distance/runner.rb +15 -23
- data/utils/index_metadata.rb +1 -2
- data/utils/subclade/runner.rb +9 -10
- metadata +6 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 82ca84b468df712ab38eba74071c26e8dcb1d87335764dde2ba86075abc8c8d5
|
4
|
+
data.tar.gz: 36583e298fc020faa5cc16af2f6a3fcc88c30619a4f529fd6f03772f6c437bb9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d78a3709fecba4594d8dc9a2351f217b3605022237e424eb1a194ccf62ddf2059eb1df274f68a15cda74936dfece3f2123485483773172cc62cfba9ff454c5d2
|
7
|
+
data.tar.gz: fbf11b04c062701b204f4ba93425dea6f8ff1492fe48f7182472e65b16fccbde98e86d396a79cf51c6e410a246c3b9e9509043eaf952b320191ec086e2efa573
|
data/Gemfile
CHANGED
data/Rakefile
CHANGED
data/lib/miga/cli/action/add.rb
CHANGED
@@ -65,8 +65,7 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
|
|
65
65
|
next if d.nil?
|
66
66
|
|
67
67
|
copy_file_to_project(file, file_type, d, p)
|
68
|
-
|
69
|
-
d.save
|
68
|
+
cli.add_metadata(d)
|
70
69
|
p.add_dataset(d.name)
|
71
70
|
res = d.first_preprocessing(true)
|
72
71
|
cli.say " result: #{res}"
|
@@ -49,23 +49,24 @@ class MiGA::Cli::Action::ClassifyWf < MiGA::Cli::Action
|
|
49
49
|
def perform
|
50
50
|
# Input data
|
51
51
|
ref_db = reference_db
|
52
|
-
|
53
|
-
|
54
|
-
.map { |i| ["run_#{i}", false] }
|
52
|
+
norun = %w[
|
53
|
+
project_stats haai_distances aai_distances ani_distances clade_finding
|
55
54
|
]
|
56
|
-
p_metadata
|
57
|
-
|
58
|
-
|
59
|
-
|
55
|
+
p_metadata = Hash[norun.map { |i| ["run_#{i}", false] }]
|
56
|
+
p = create_project(
|
57
|
+
:assembly,
|
58
|
+
p_metadata,
|
59
|
+
run_ssu: false, run_mytaxa_scan: false, run_distances: false
|
60
|
+
)
|
61
|
+
p.set_option(:ref_project, ref_db.path)
|
62
|
+
p.set_option(:tax_pvalue, cli[:pvalue], true)
|
60
63
|
# Run
|
61
64
|
run_daemon
|
62
65
|
summarize(%w[cds assembly essential_genes]) if cli[:summaries]
|
63
66
|
summarize(['taxonomy'])
|
64
67
|
cli.say "Summary: classification"
|
65
|
-
|
66
|
-
|
67
|
-
'-o', File.expand_path('classification.tsv', cli[:outdir])
|
68
|
-
])
|
68
|
+
ofile = File.expand_path('classification.tsv', cli[:outdir])
|
69
|
+
call_cli(['ls', '-P', cli[:outdir], '-m', 'tax', '--tab', '-o', ofile])
|
69
70
|
cleanup
|
70
71
|
end
|
71
72
|
|
@@ -52,17 +52,11 @@ class MiGA::Cli::Action::DerepWf < MiGA::Cli::Action
|
|
52
52
|
# Input data
|
53
53
|
p = create_project(
|
54
54
|
:assembly,
|
55
|
-
{
|
56
|
-
run_project_stats: false,
|
57
|
-
run_clades: false,
|
58
|
-
gsp_metric: cli[:metric],
|
59
|
-
:"gsp_#{cli[:metric]}" => cli[:threshold]
|
60
|
-
},
|
55
|
+
{ run_project_stats: false, run_clades: false },
|
61
56
|
{ run_mytaxa_scan: false, run_ssu: false }
|
62
57
|
)
|
63
|
-
|
64
|
-
|
65
|
-
end
|
58
|
+
p.set_option(:gsp_metric, cli[:metric])
|
59
|
+
p.set_option(:"gsp_#{cli[:metric]}", cli[:threshold])
|
66
60
|
|
67
61
|
# Run
|
68
62
|
run_daemon
|
data/lib/miga/cli/action/edit.rb
CHANGED
data/lib/miga/cli/action/find.rb
CHANGED
@@ -20,7 +20,7 @@ class MiGA::Cli::Action::Generic < MiGA::Cli::Action
|
|
20
20
|
opt.on(
|
21
21
|
'-v', '--version',
|
22
22
|
'Show MiGA version'
|
23
|
-
) { puts MiGA::MiGA.
|
23
|
+
) { puts MiGA::MiGA.FULL_VERSION; exit }
|
24
24
|
opt.on(
|
25
25
|
'-V', '--long-version',
|
26
26
|
'Show complete MiGA version'
|
data/lib/miga/cli/action/get.rb
CHANGED
@@ -6,8 +6,9 @@ require 'miga/remote_dataset'
|
|
6
6
|
|
7
7
|
class MiGA::Cli::Action::Get < MiGA::Cli::Action
|
8
8
|
def parse_cli
|
9
|
-
cli.defaults = {
|
10
|
-
|
9
|
+
cli.defaults = {
|
10
|
+
query: false, universe: :ncbi, db: :nuccore, get_md: false, only_md: false
|
11
|
+
}
|
11
12
|
cli.parse do |opt|
|
12
13
|
cli.opt_object(opt, [:project, :dataset, :dataset_type])
|
13
14
|
opt.on(
|
@@ -141,6 +142,10 @@ class MiGA::Cli::Action::Get < MiGA::Cli::Action
|
|
141
142
|
|
142
143
|
def create_dataset(sub_cli, p, rd)
|
143
144
|
sub_cli.say 'Creating dataset'
|
145
|
+
if Dataset.exist?(p, sub_cli[:dataset])
|
146
|
+
raise "Dataset already exists: #{sub_cli[:dataset]}"
|
147
|
+
end
|
148
|
+
|
144
149
|
dummy_d = Dataset.new(p, sub_cli[:dataset])
|
145
150
|
md = sub_cli.add_metadata(dummy_d).metadata.data
|
146
151
|
md[:metadata_only] = true if cli[:only_md]
|
@@ -269,7 +269,7 @@ class MiGA::Cli::Action::NcbiGet < MiGA::Cli::Action
|
|
269
269
|
else
|
270
270
|
cli.say ' Creating dataset'
|
271
271
|
rd.save_to(p, name, !cli[:query], body[:md])
|
272
|
-
cli.add_metadata(p.add_dataset(name))
|
272
|
+
cli.add_metadata(p.add_dataset(name))
|
273
273
|
end
|
274
274
|
end
|
275
275
|
end
|
data/lib/miga/cli/action/new.rb
CHANGED
@@ -24,6 +24,11 @@ class MiGA::Cli::Action::New < MiGA::Cli::Action
|
|
24
24
|
'Use faster identity engines (Diamond-AAI and FastANI)',
|
25
25
|
'Equivalent to: -m aai_p=diamond,ani_p=fastani'
|
26
26
|
) { |v| cli[:fast] = v }
|
27
|
+
opt.on(
|
28
|
+
'--sensitive',
|
29
|
+
'Use more sensitive identity engines (BLAST+)',
|
30
|
+
'Equivalent to: -m aai_p=blast+,ani_p=blast+'
|
31
|
+
) { |v| cli[:sensitive] = v }
|
27
32
|
opt.on(
|
28
33
|
'-m', '--metadata STRING',
|
29
34
|
'Metadata as key-value pairs separated by = and delimited by comma',
|
@@ -35,20 +40,21 @@ class MiGA::Cli::Action::New < MiGA::Cli::Action
|
|
35
40
|
def perform
|
36
41
|
cli.ensure_type(MiGA::Project)
|
37
42
|
cli.ensure_par(project: '-P')
|
38
|
-
unless
|
39
|
-
|
40
|
-
raise "You must initialize MiGA before creating the first project.\n" +
|
41
|
-
'Please use "miga init".'
|
43
|
+
unless MiGA::MiGA.initialized?
|
44
|
+
raise 'MiGA has not been initialized, please use "miga init" first'
|
42
45
|
end
|
43
46
|
cli.say "Creating project: #{cli[:project]}"
|
44
|
-
raise 'Project already exists, aborting
|
47
|
+
raise 'Project already exists, aborting' if Project.exist?(cli[:project])
|
45
48
|
|
46
49
|
p = Project.new(cli[:project], false)
|
47
50
|
p = cli.add_metadata(p)
|
48
|
-
|
49
|
-
|
50
|
-
p.
|
51
|
+
|
52
|
+
if cli[:sensitive]
|
53
|
+
p.set_option(:aai_p, 'blast+')
|
54
|
+
p.set_option(:ani_p, 'blast+')
|
55
|
+
elsif cli[:fast]
|
56
|
+
p.set_option(:aai_p, 'diamond')
|
57
|
+
p.set_option(:ani_p, 'fastani')
|
51
58
|
end
|
52
|
-
p.save
|
53
59
|
end
|
54
60
|
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'miga/cli/action'
|
4
|
+
|
5
|
+
##
|
6
|
+
# CLI: `miga option`
|
7
|
+
class MiGA::Cli::Action::Option < MiGA::Cli::Action
|
8
|
+
def parse_cli
|
9
|
+
cli.parse do |opt|
|
10
|
+
cli.opt_object(opt, %i[project dataset_opt])
|
11
|
+
opt.on(
|
12
|
+
'-k', '--key STRING',
|
13
|
+
'Option name to get or set (by default, all options are printed)'
|
14
|
+
) { |v| cli[:key] = v }
|
15
|
+
opt.on(
|
16
|
+
'--value STRING',
|
17
|
+
'Value of the option to set (by default, option value is not changed)',
|
18
|
+
'Recognized tokens: nil, true, false'
|
19
|
+
) { |v| cli[:value] = v }
|
20
|
+
opt.on(
|
21
|
+
'-o', '--output PATH',
|
22
|
+
'Create output file instead of returning to STDOUT'
|
23
|
+
) { |v| cli[:output] = v }
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def perform
|
28
|
+
unless cli[:value].nil?
|
29
|
+
cli.ensure_par(
|
30
|
+
{ key: '-k' },
|
31
|
+
'%<name>s is mandatory when --value is set: please provide %<flag>s'
|
32
|
+
)
|
33
|
+
end
|
34
|
+
obj = cli.load_project_or_dataset
|
35
|
+
io = cli[:output].nil? ? $stdout : File.open(cli[:output], 'w')
|
36
|
+
if cli[:key].nil?
|
37
|
+
cli.table(%w[Key Value], obj.all_options.to_a, io)
|
38
|
+
else
|
39
|
+
obj.set_option(cli[:key], cli[:value], true) unless cli[:value].nil?
|
40
|
+
io.puts obj.option(cli[:key])
|
41
|
+
end
|
42
|
+
io.close unless cli[:output].nil?
|
43
|
+
end
|
44
|
+
end
|
@@ -21,10 +21,10 @@ class MiGA::Cli::Action::QualityWf < MiGA::Cli::Action
|
|
21
21
|
|
22
22
|
def perform
|
23
23
|
# Input data
|
24
|
-
|
25
|
-
|
26
|
-
.map { |i| ["run_#{i}", false] }
|
24
|
+
norun = %w[
|
25
|
+
project_stats haai_distances aai_distances ani_distances clade_finding
|
27
26
|
]
|
27
|
+
p_metadata = Hash[norun.map { |i| ["run_#{i}", false] }]
|
28
28
|
d_metadata = { run_distances: false }
|
29
29
|
d_metadata[:run_mytaxa_scan] = false unless cli[:mytaxa]
|
30
30
|
p = create_project(:assembly, p_metadata, d_metadata)
|
@@ -47,7 +47,7 @@ class MiGA::Cli::Action::TaxDist < MiGA::Cli::Action
|
|
47
47
|
|
48
48
|
def read_distances
|
49
49
|
p = cli.load_project
|
50
|
-
cli[:metric] ||= p.
|
50
|
+
cli[:metric] ||= p.clade? ? 'ani' : 'aai'
|
51
51
|
res_n = "#{cli[:metric]}_distances"
|
52
52
|
cli.say "Reading distances: 1-#{cli[:metric].upcase}"
|
53
53
|
res = p.result(res_n)
|
@@ -59,7 +59,7 @@ class MiGA::Cli::Action::TaxTest < MiGA::Cli::Action
|
|
59
59
|
cli.puts "Closest relative: #{cr[0]} with AAI: #{cr[1]}."
|
60
60
|
p = cli.load_project
|
61
61
|
if cli[:ref_project]
|
62
|
-
if (ref = p.
|
62
|
+
if (ref = p.option(:ref_project)).nil?
|
63
63
|
raise '--ref-project requested but no reference project has been set'
|
64
64
|
end
|
65
65
|
if (q = MiGA::Project.load(ref)).nil?
|
data/lib/miga/cli/action/wf.rb
CHANGED
@@ -9,7 +9,7 @@ module MiGA::Cli::Action::Wf
|
|
9
9
|
cli.defaults = {
|
10
10
|
clean: false, regexp: MiGA::Cli.FILE_REGEXP,
|
11
11
|
project_type: :genomes, dataset_type: :popgenome,
|
12
|
-
ncbi_draft: true, min_qual:
|
12
|
+
ncbi_draft: true, min_qual: MiGA::Project.OPTIONS[:min_qual][:default]
|
13
13
|
}
|
14
14
|
end
|
15
15
|
|
@@ -125,32 +125,34 @@ module MiGA::Cli::Action::Wf
|
|
125
125
|
dataset_type: '--dataset-type'
|
126
126
|
)
|
127
127
|
# Create empty project
|
128
|
-
call_cli(
|
129
|
-
|
130
|
-
|
131
|
-
'-t', cli[:project_type]
|
132
|
-
]) unless MiGA::Project.exist? cli[:outdir]
|
128
|
+
call_cli(
|
129
|
+
['new', '-P', cli[:outdir], '-t', cli[:project_type]]
|
130
|
+
) unless MiGA::Project.exist? cli[:outdir]
|
133
131
|
# Define project metadata
|
134
132
|
p = cli.load_project(:outdir, '-o')
|
135
|
-
%i[haai_p aai_p ani_p ess_coll min_qual].each { |i| p_metadata[i] = cli[i] }
|
136
133
|
p_metadata[:type] = cli[:project_type]
|
137
134
|
transfer_metadata(p, p_metadata)
|
135
|
+
%i[haai_p aai_p ani_p ess_coll min_qual].each do |i|
|
136
|
+
p.set_option(i, cli[i])
|
137
|
+
end
|
138
138
|
# Download datasets
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
139
|
+
unless cli[:ncbi_taxon].nil?
|
140
|
+
what = cli[:ncbi_draft] ? '--all' : '--complete'
|
141
|
+
call_cli(
|
142
|
+
['ncbi_get', '-P', cli[:outdir], '-T', cli[:ncbi_taxon], what]
|
143
|
+
)
|
144
|
+
end
|
145
145
|
# Add datasets
|
146
|
-
call_cli(
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
146
|
+
call_cli(
|
147
|
+
[
|
148
|
+
'add',
|
149
|
+
'--ignore-dups',
|
150
|
+
'-P', cli[:outdir],
|
151
|
+
'-t', cli[:dataset_type],
|
152
|
+
'-i', stage,
|
153
|
+
'-R', cli[:regexp]
|
154
|
+
] + cli.files
|
155
|
+
) unless cli.files.empty?
|
154
156
|
# Define datasets metadata
|
155
157
|
p.load
|
156
158
|
d_metadata[:type] = cli[:dataset_type]
|
@@ -161,13 +163,13 @@ module MiGA::Cli::Action::Wf
|
|
161
163
|
def summarize(which = %w[cds assembly essential_genes ssu])
|
162
164
|
which.each do |r|
|
163
165
|
cli.say "Summary: #{r}"
|
164
|
-
call_cli(
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
166
|
+
call_cli(
|
167
|
+
[
|
168
|
+
'summary',
|
169
|
+
'-P', cli[:outdir], '-r', r, '--tab', '--ref', '--active',
|
170
|
+
'-o', File.join(cli[:outdir], "#{r}.tsv")
|
171
|
+
]
|
172
|
+
)
|
171
173
|
end
|
172
174
|
call_cli(['browse', '-P', cli[:outdir]])
|
173
175
|
end
|
@@ -193,14 +195,14 @@ module MiGA::Cli::Action::Wf
|
|
193
195
|
cmd += ['--max-jobs', cli[:jobs]] unless cli[:jobs].nil?
|
194
196
|
cmd += ['--ppn', cli[:threads]] unless cli[:threads].nil?
|
195
197
|
cwd = Dir.pwd
|
196
|
-
call_cli
|
198
|
+
call_cli(cmd)
|
197
199
|
Dir.chdir(cwd)
|
198
200
|
end
|
199
201
|
|
200
202
|
def transfer_metadata(obj, md)
|
201
203
|
# Clear old metadata
|
202
204
|
obj.metadata.each do |k, v|
|
203
|
-
obj.metadata[k] = nil if k.to_s =~ /^run_/ || k
|
205
|
+
obj.metadata[k] = nil if k.to_s =~ /^run_/ || obj.option?(k)
|
204
206
|
end
|
205
207
|
# Transfer and save
|
206
208
|
md.each { |k, v| obj.metadata[k] = v }
|
data/lib/miga/cli/base.rb
CHANGED
@@ -34,6 +34,7 @@ module MiGA::Cli::Base
|
|
34
34
|
next_step: 'Return the next task to run in a dataset or project',
|
35
35
|
# Objects (Datasets or Projects)
|
36
36
|
edit: 'Edit the metadata of a dataset or project',
|
37
|
+
option: 'Get or set options of a dataset or project',
|
37
38
|
# System
|
38
39
|
init: 'Initialize MiGA to process new projects',
|
39
40
|
daemon: 'Control the daemon of a MiGA project',
|
@@ -57,12 +57,12 @@ module MiGA::Cli::ObjectsHelper
|
|
57
57
|
ds.select! do |d|
|
58
58
|
advance('Datasets:', k += 1, n, false)
|
59
59
|
o = true
|
60
|
-
o &&= (d.
|
61
|
-
o &&= (d.
|
62
|
-
o &&= (self[:multi] ? d.
|
63
|
-
|
64
|
-
|
65
|
-
|
60
|
+
o &&= (d.ref? == self[:ref]) unless self[:ref].nil?
|
61
|
+
o &&= (d.active? == self[:active]) unless self[:active].nil?
|
62
|
+
o &&= (self[:multi] ? d.multi? : d.nonmulti?) unless self[:multi].nil?
|
63
|
+
unless self[:taxonomy].nil?
|
64
|
+
o &&= !d.metadata[:tax].nil? && d.metadata[:tax].in?(self[:taxonomy])
|
65
|
+
end
|
66
66
|
o
|
67
67
|
end
|
68
68
|
say ''
|
@@ -90,22 +90,27 @@ module MiGA::Cli::ObjectsHelper
|
|
90
90
|
def add_metadata(obj, cli = self)
|
91
91
|
raise "Unsupported object: #{obj.class}" unless obj.respond_to? :metadata
|
92
92
|
|
93
|
-
cli[:metadata].split(',').each do |pair|
|
93
|
+
(cli[:metadata] || '').split(',').each do |pair|
|
94
94
|
(k, v) = pair.split('=')
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
95
|
+
if obj.option?(k)
|
96
|
+
obj.set_option(k, v, true)
|
97
|
+
else
|
98
|
+
case v
|
99
|
+
when 'true'; v = true
|
100
|
+
when 'false'; v = false
|
101
|
+
when 'nil'; v = nil
|
102
|
+
end
|
103
|
+
if k == '_step'
|
104
|
+
obj.metadata["_try_#{v}"] ||= 0
|
105
|
+
obj.metadata["_try_#{v}"] += 1
|
106
|
+
end
|
107
|
+
obj.metadata[k] = v
|
103
108
|
end
|
104
|
-
|
105
|
-
|
106
|
-
[:type, :name, :user, :description, :comments].each do |k|
|
109
|
+
end
|
110
|
+
%i[type name user description comments].each do |k|
|
107
111
|
obj.metadata[k] = cli[k] unless cli[k].nil?
|
108
112
|
end
|
113
|
+
obj.save
|
109
114
|
obj
|
110
115
|
end
|
111
116
|
end
|