miga-base 0.4.3.0 → 0.5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/miga/cli.rb +43 -223
- data/lib/miga/cli/action/add.rb +91 -62
- data/lib/miga/cli/action/classify_wf.rb +97 -0
- data/lib/miga/cli/action/daemon.rb +14 -10
- data/lib/miga/cli/action/derep_wf.rb +95 -0
- data/lib/miga/cli/action/doctor.rb +83 -55
- data/lib/miga/cli/action/get.rb +68 -52
- data/lib/miga/cli/action/get_db.rb +206 -0
- data/lib/miga/cli/action/index_wf.rb +31 -0
- data/lib/miga/cli/action/init.rb +115 -190
- data/lib/miga/cli/action/init/daemon_helper.rb +124 -0
- data/lib/miga/cli/action/ls.rb +20 -11
- data/lib/miga/cli/action/ncbi_get.rb +199 -157
- data/lib/miga/cli/action/preproc_wf.rb +46 -0
- data/lib/miga/cli/action/quality_wf.rb +45 -0
- data/lib/miga/cli/action/stats.rb +147 -99
- data/lib/miga/cli/action/summary.rb +10 -4
- data/lib/miga/cli/action/tax_dist.rb +61 -46
- data/lib/miga/cli/action/tax_test.rb +46 -39
- data/lib/miga/cli/action/wf.rb +178 -0
- data/lib/miga/cli/base.rb +11 -0
- data/lib/miga/cli/objects_helper.rb +88 -0
- data/lib/miga/cli/opt_helper.rb +160 -0
- data/lib/miga/daemon.rb +7 -4
- data/lib/miga/dataset/base.rb +5 -5
- data/lib/miga/project/base.rb +4 -4
- data/lib/miga/project/result.rb +2 -1
- data/lib/miga/remote_dataset/base.rb +5 -5
- data/lib/miga/remote_dataset/download.rb +1 -1
- data/lib/miga/version.rb +3 -3
- data/scripts/cds.bash +3 -1
- data/scripts/essential_genes.bash +1 -0
- data/scripts/stats.bash +1 -1
- data/scripts/trimmed_fasta.bash +5 -3
- data/utils/distance/runner.rb +3 -0
- data/utils/distance/temporal.rb +10 -1
- data/utils/enveomics/Manifest/Tasks/fasta.json +5 -0
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +7 -0
- data/utils/enveomics/Scripts/BlastTab.addlen.rb +33 -31
- data/utils/enveomics/Scripts/FastA.tag.rb +42 -41
- data/utils/enveomics/Scripts/HMM.essential.rb +85 -55
- data/utils/enveomics/Scripts/HMM.haai.rb +29 -20
- data/utils/enveomics/Scripts/SRA.download.bash +1 -1
- data/utils/enveomics/Scripts/aai.rb +163 -128
- data/utils/enveomics/build_enveomics_r.bash +11 -10
- data/utils/enveomics/enveomics.R/DESCRIPTION +3 -2
- data/utils/enveomics/enveomics.R/R/autoprune.R +141 -107
- data/utils/enveomics/enveomics.R/R/barplot.R +105 -86
- data/utils/enveomics/enveomics.R/R/cliopts.R +131 -115
- data/utils/enveomics/enveomics.R/R/df2dist.R +144 -106
- data/utils/enveomics/enveomics.R/R/growthcurve.R +201 -133
- data/utils/enveomics/enveomics.R/R/recplot.R +350 -315
- data/utils/enveomics/enveomics.R/R/recplot2.R +1334 -914
- data/utils/enveomics/enveomics.R/R/tribs.R +521 -361
- data/utils/enveomics/enveomics.R/R/utils.R +31 -15
- data/utils/enveomics/enveomics.R/README.md +7 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +16 -21
- data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +31 -28
- data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -19
- data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +36 -26
- data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -24
- data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -24
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +32 -33
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +91 -64
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +57 -37
- data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -19
- data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +39 -26
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +38 -25
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +40 -26
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +67 -49
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +37 -28
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +122 -97
- data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +35 -31
- data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -23
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +68 -51
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -22
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -20
- data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +41 -32
- data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +40 -34
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -20
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -20
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -29
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +41 -42
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +43 -33
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +36 -28
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +74 -56
- data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +44 -31
- data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -22
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +32 -26
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +59 -44
- data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -21
- data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -22
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +63 -43
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +38 -29
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +38 -30
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +111 -83
- data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -18
- data/utils/find-medoid.R +3 -2
- data/utils/representatives.rb +5 -3
- data/utils/subclade/pipeline.rb +22 -11
- data/utils/subclade/runner.rb +5 -1
- data/utils/subclades-compile.rb +1 -1
- data/utils/subclades.R +9 -3
- metadata +15 -4
- data/utils/enveomics/enveomics.R/man/enveomics.R-package.Rd +0 -15
- data/utils/enveomics/enveomics.R/man/z$-methods.Rd +0 -26
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e370d282f1b28480765e1b91fcb7d8921d12baa31d22db1318975a1c2a79e19a
|
4
|
+
data.tar.gz: e7fb3941fd3381e0e9696a2c577aeb157657335e56434e7c6d6650be7ba45e98
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4642a212e1b4021e211fd144b515ff49e9ddb7a9b2292430553307a7ae165e4d8d5e6fd8426757f15ea6e70f4c3efbb055e0439497172cc1f91186d522c82635
|
7
|
+
data.tar.gz: 8d5d3ded3c03e56505572102110a4bca4b84d06b2e73bcf208856610a4cd6e60092ce6d54d47dcef2c8acf85f1cce5f8461097e8699344df6738ed8493215112
|
data/README.md
CHANGED
@@ -23,7 +23,7 @@ For additional information on MiGA, visit:
|
|
23
23
|
If you're like us, you probably want to see sofware in action from the get go.
|
24
24
|
You have two options:
|
25
25
|
|
26
|
-
1. Get a
|
26
|
+
1. Get a peek on MiGA using [MiGA Online][miga-online].
|
27
27
|
2. Install the [requirements](manual/part2/requirements.md) and follow the
|
28
28
|
[installation instructions](manual/part2/installation.md). Once you have MiGA
|
29
29
|
installed, you can [deploy some examples](manual/part4.md).
|
data/lib/miga/cli.rb
CHANGED
@@ -9,7 +9,11 @@ require 'optparse'
|
|
9
9
|
class MiGA::Cli < MiGA::MiGA
|
10
10
|
|
11
11
|
require 'miga/cli/base'
|
12
|
+
require 'miga/cli/opt_helper'
|
13
|
+
require 'miga/cli/objects_helper'
|
12
14
|
require 'miga/cli/action'
|
15
|
+
include MiGA::Cli::OptHelper
|
16
|
+
include MiGA::Cli::ObjectsHelper
|
13
17
|
|
14
18
|
##
|
15
19
|
# Task to execute, a symbol
|
@@ -71,41 +75,68 @@ class MiGA::Cli < MiGA::MiGA
|
|
71
75
|
end
|
72
76
|
|
73
77
|
##
|
74
|
-
#
|
78
|
+
# Print +par+, ensuring new line at the end.
|
79
|
+
# If the first parameter is +IO+, the output is sent there,
|
80
|
+
# otherwise it's sent to +$stdout+
|
75
81
|
def puts(*par)
|
76
|
-
|
82
|
+
io = par.first.is_a?(IO) ? par.shift : $stdout
|
83
|
+
io.puts(*par)
|
77
84
|
end
|
78
85
|
|
79
86
|
##
|
80
|
-
#
|
87
|
+
# Print +par+.
|
88
|
+
# If the first parameter is +IO+, the output is sent there,
|
89
|
+
# otherwise it's sent to +$stdout+
|
81
90
|
def print(*par)
|
82
|
-
|
91
|
+
io = par.first.is_a?(IO) ? par.shift : $stdout
|
92
|
+
io.print(*par)
|
83
93
|
end
|
84
94
|
|
85
95
|
##
|
86
|
-
# Display a table with headers +header+ and contents +values+, both Array
|
87
|
-
|
88
|
-
|
96
|
+
# Display a table with headers +header+ and contents +values+, both Array.
|
97
|
+
# The output is printed to +io+
|
98
|
+
def table(header, values, io = $stdout)
|
99
|
+
self.puts(io, MiGA.tabulate(header, values, self[:tabular]))
|
89
100
|
end
|
90
101
|
|
91
102
|
##
|
92
|
-
#
|
103
|
+
# Print +par+ ensuring new line at the end, iff --verbose.
|
93
104
|
# Date/time each line.
|
105
|
+
# If the first parameter is +IO+, the output is sent there,
|
106
|
+
# otherwise it's sent to +$stderr+
|
94
107
|
def say(*par)
|
95
108
|
return unless self[:verbose]
|
96
109
|
par.map! { |i| "[#{Time.now}] #{i}" }
|
97
|
-
|
110
|
+
io = par.first.is_a?(IO) ? par.shift : $stderr
|
111
|
+
io.puts(*par)
|
98
112
|
end
|
99
113
|
|
100
114
|
##
|
101
|
-
# Reports the advance of a task at +step+ (String), the +n+ out of +total
|
115
|
+
# Reports the advance of a task at +step+ (String), the +n+ out of +total+.
|
116
|
+
# The advance is reported in powers of 1,024 if +bin+ is true, or powers of
|
117
|
+
# 1,000 otherwise.
|
102
118
|
# The report goes to $stderr iff --verborse
|
103
|
-
def advance(step, n = 0, total = nil)
|
119
|
+
def advance(step, n = 0, total = nil, bin = true)
|
104
120
|
return unless self[:verbose]
|
105
|
-
adv = total.nil? ? '' :
|
121
|
+
adv = total.nil? ? '' :
|
122
|
+
('%.1f%% (%s/%s)' % [100 * n / total,
|
123
|
+
num_suffix(n, bin), num_suffix(total, bin)])
|
106
124
|
$stderr.print("[%s] %s %s \r" % [Time.now, step, adv])
|
107
125
|
end
|
108
126
|
|
127
|
+
def num_suffix(n, bin = false)
|
128
|
+
p = ''
|
129
|
+
{T: 4, G: 3, M: 2, K: 1}.each do |k,x|
|
130
|
+
v = (bin ? 1024 : 1e3) ** x
|
131
|
+
if n > v
|
132
|
+
n = '%.1f' % (n / v)
|
133
|
+
p = k
|
134
|
+
break
|
135
|
+
end
|
136
|
+
end
|
137
|
+
"#{n}#{p}"
|
138
|
+
end
|
139
|
+
|
109
140
|
##
|
110
141
|
# Ask a question +question+ to the user (requires +#interactive = true+)
|
111
142
|
# The +default+ is used if the answer is empty
|
@@ -188,135 +219,6 @@ class MiGA::Cli < MiGA::MiGA
|
|
188
219
|
end
|
189
220
|
end
|
190
221
|
|
191
|
-
##
|
192
|
-
# Send MiGA's banner to OptionParser +opt+
|
193
|
-
def banner(opt)
|
194
|
-
usage = "Usage: miga #{action.name}"
|
195
|
-
usage += ' {operation}' if expect_operation
|
196
|
-
usage += ' [options]'
|
197
|
-
usage += ' {FILES...}' if expect_files
|
198
|
-
opt.banner = "\n#{task_description}\n\n#{usage}\n"
|
199
|
-
opt.separator ''
|
200
|
-
end
|
201
|
-
|
202
|
-
##
|
203
|
-
# Common options at the end of most actions, passed to OptionParser +opt+
|
204
|
-
# No action is performed if +#opt_common = false+ is passed
|
205
|
-
# Executes only once, unless +#opt_common = true+ is passed between calls
|
206
|
-
def opt_common(opt)
|
207
|
-
return unless @opt_common
|
208
|
-
opt.on(
|
209
|
-
'--auto',
|
210
|
-
'Accept all defaults as answers'
|
211
|
-
){ |v| cli[:auto] = v } if interactive
|
212
|
-
opt.on(
|
213
|
-
'-v', '--verbose',
|
214
|
-
'Print additional information to STDERR'
|
215
|
-
){ |v| self[:verbose] = v }
|
216
|
-
opt.on(
|
217
|
-
'-d', '--debug INT', Integer,
|
218
|
-
'Print debugging information to STDERR (1: debug, 2: trace)'
|
219
|
-
){ |v| (v > 1) ? MiGA.DEBUG_TRACE_ON : MiGA.DEBUG_ON }
|
220
|
-
opt.on(
|
221
|
-
'-h', '--help',
|
222
|
-
'Display this screen'
|
223
|
-
){ puts opt ; exit }
|
224
|
-
opt.separator ''
|
225
|
-
self.opt_common = false
|
226
|
-
end
|
227
|
-
|
228
|
-
##
|
229
|
-
# Options to load an object passed to OptionParser +opt+, as determined
|
230
|
-
# by +what+ an Array with any combination of:
|
231
|
-
# - :project To require a project
|
232
|
-
# - :dataset To require a dataset
|
233
|
-
# - :dataset_opt To allow (optionally) a dataset
|
234
|
-
# - :dataset_type To allow (optionally) a type of dataset
|
235
|
-
# - :dataset_type_req To require a type of dataset
|
236
|
-
# - :project_type To allow (optionally) a type of project
|
237
|
-
# - :project_type_req To require a type of project
|
238
|
-
# - :result To require a type of project or dataset result
|
239
|
-
# - :result_dataset To require a type of dataset result
|
240
|
-
# - :result_project To require a type of project result
|
241
|
-
def opt_object(opt, what = [:project, :dataset])
|
242
|
-
opt.on(
|
243
|
-
'-P', '--project PATH',
|
244
|
-
'(Mandatory) Path to the project'
|
245
|
-
){ |v| self[:project] = v } if what.include? :project
|
246
|
-
opt.on(
|
247
|
-
'-D', '--dataset STRING',
|
248
|
-
(what.include?(:dataset) ? '(Mandatory) ' : '') + 'Name of the dataset'
|
249
|
-
){ |v| self[:dataset] = v } if what.include? :dataset or
|
250
|
-
what.include? :dataset_opt
|
251
|
-
opt.on(
|
252
|
-
'-D', '--dataset STRING',
|
253
|
-
'Name of the dataset'
|
254
|
-
){ |v| self[:dataset] = v } if what.include? :dataset_opt
|
255
|
-
opt.on(
|
256
|
-
'-t', '--type STRING',
|
257
|
-
(what.include?(:dataset_type_req) ? '(Mandatory) ' : '') +
|
258
|
-
'Type of dataset. Recognized types include:',
|
259
|
-
*Dataset.KNOWN_TYPES.map{ |k,v| "~ #{k}: #{v[:description]}" }
|
260
|
-
){ |v| self[:type] = v.downcase.to_sym } if what.include? :dataset_type or
|
261
|
-
what.include? :dataset_type_req
|
262
|
-
opt.on(
|
263
|
-
'-t', '--type STRING',
|
264
|
-
(what.include?(:project_type_req) ? '(Mandatory) ' : '') +
|
265
|
-
'Type of project. Recognized types include:',
|
266
|
-
*Project.KNOWN_TYPES.map{ |k,v| "~ #{k}: #{v[:description]}"}
|
267
|
-
){ |v| self[:type] = v.downcase.to_sym } if what.include? :project_type or
|
268
|
-
what.include? :project_type_req
|
269
|
-
opt.on(
|
270
|
-
'-r', '--result STRING',
|
271
|
-
'(Mandatory) Name of the result',
|
272
|
-
'Recognized names for dataset-specific results include:',
|
273
|
-
*Dataset.RESULT_DIRS.keys.map{|n| " ~ #{n}"},
|
274
|
-
'Recognized names for project-wide results include:',
|
275
|
-
*Project.RESULT_DIRS.keys.map{|n| " ~ #{n}"}
|
276
|
-
){ |v| self[:result] = v.downcase.to_sym } if what.include? :result
|
277
|
-
opt.on(
|
278
|
-
'-r', '--result STRING',
|
279
|
-
'(Mandatory) Name of the result, one of:',
|
280
|
-
*Dataset.RESULT_DIRS.keys.map{|n| " ~ #{n}"}
|
281
|
-
){ |v| self[:result] = v.downcase.to_sym } if what.include? :result_dataset
|
282
|
-
opt.on(
|
283
|
-
'-r', '--result STRING',
|
284
|
-
'(Mandatory) Name of the result, one of:',
|
285
|
-
*Project.RESULT_DIRS.keys.map{|n| " ~ #{n}"}
|
286
|
-
){ |v| self[:result] = v.downcase.to_sym } if what.include? :result_project
|
287
|
-
end
|
288
|
-
|
289
|
-
##
|
290
|
-
# Options to filter a list of datasets passed to OptionParser +opt+,
|
291
|
-
# as determined by +what+ an Array with any combination of:
|
292
|
-
# - :ref To filter by reference (--ref) or query (--no-ref)
|
293
|
-
# - :multi To filter by multiple (--multi) or single (--no-multi) species
|
294
|
-
# - :active To filter by active (--active) or inactive (--no-active)
|
295
|
-
# - :taxonomy To filter by taxonomy (--taxonomy)
|
296
|
-
# The "k-th" filter (--dataset-k) is always included
|
297
|
-
def opt_filter_datasets(opt, what = [:ref, :multi, :active, :taxonomy])
|
298
|
-
opt.on(
|
299
|
-
'--[no-]ref',
|
300
|
-
'Use only reference (or only non-reference) datasets'
|
301
|
-
){ |v| self[:ref] = v } if what.include? :ref
|
302
|
-
opt.on(
|
303
|
-
'--[no-]multi',
|
304
|
-
'Use only multi-species (or only single-species) datasets'
|
305
|
-
){ |v| self[:multi] = v } if what.include? :multi
|
306
|
-
opt.on(
|
307
|
-
'--[no-]active',
|
308
|
-
'Use only active (or inactive) datasets'
|
309
|
-
){ |v| self[:active] = v } if what.include? :active
|
310
|
-
opt.on(
|
311
|
-
'-t', '--taxonomy RANK:TAXON',
|
312
|
-
'Filter by taxonomy'
|
313
|
-
){ |v| self[:taxonomy] = Taxonomy.new(v) } if what.include? :taxonomy
|
314
|
-
opt.on(
|
315
|
-
'--dataset-k INTEGER', Integer,
|
316
|
-
'Use only the k-th dataset in the list'
|
317
|
-
){ |v| self[:dataset_k] = v }
|
318
|
-
end
|
319
|
-
|
320
222
|
##
|
321
223
|
# Ensure that these parameters have been passed to the CLI, as defined by
|
322
224
|
# +par+, a Hash with object names as keys and parameter flag as values.
|
@@ -336,88 +238,6 @@ class MiGA::Cli < MiGA::MiGA
|
|
336
238
|
end
|
337
239
|
end
|
338
240
|
|
339
|
-
##
|
340
|
-
# Get the project defined in the CLI by parameter +name+ and +flag+
|
341
|
-
def load_project(name = :project, flag = '-P')
|
342
|
-
return @objects[name] unless @objects[name].nil?
|
343
|
-
ensure_par(name => flag)
|
344
|
-
say "Loading project: #{self[name]}"
|
345
|
-
@objects[name] = Project.load(self[name])
|
346
|
-
raise "Cannot load project: #{self[name]}" if @objects[name].nil?
|
347
|
-
@objects[name]
|
348
|
-
end
|
349
|
-
|
350
|
-
##
|
351
|
-
# Load the dataset defined in the CLI
|
352
|
-
# If +silent=true+, it allows failures silently
|
353
|
-
def load_dataset(silent = false)
|
354
|
-
return @objects[:dataset] unless @objects[:dataset].nil?
|
355
|
-
ensure_par(dataset: '-D')
|
356
|
-
@objects[:dataset] = load_project.dataset(self[:dataset])
|
357
|
-
if !silent && @objects[:dataset].nil?
|
358
|
-
raise "Cannot load dataset: #{self[:dataset]}"
|
359
|
-
end
|
360
|
-
return @objects[:dataset]
|
361
|
-
end
|
362
|
-
|
363
|
-
##
|
364
|
-
# Load an a project or (if defined) a dataset
|
365
|
-
def load_project_or_dataset
|
366
|
-
self[:dataset].nil? ? load_project : load_dataset
|
367
|
-
end
|
368
|
-
|
369
|
-
##
|
370
|
-
# Load and filter a list of datasets as requested in the CLI
|
371
|
-
# If +silent=true+, it allows failures silently
|
372
|
-
def load_and_filter_datasets(silent = false)
|
373
|
-
return @objects[:filtered_datasets] unless @objects[:filtered_datasets].nil?
|
374
|
-
say 'Listing datasets'
|
375
|
-
ds = self[:dataset].nil? ?
|
376
|
-
load_project.datasets : [load_dataset(silent)].compact
|
377
|
-
ds.select! { |d| d.is_ref? == self[:ref] } unless self[:ref].nil?
|
378
|
-
ds.select! { |d| d.is_active? == self[:active] } unless self[:active].nil?
|
379
|
-
ds.select! do |d|
|
380
|
-
self[:multi] ? d.is_multi? : d.is_nonmulti?
|
381
|
-
end unless self[:multi].nil?
|
382
|
-
ds.select! do |d|
|
383
|
-
(not d.metadata[:tax].nil?) && d.metadata[:tax].in?(self[:taxonomy])
|
384
|
-
end unless self[:taxonomy].nil?
|
385
|
-
ds = ds.values_at(self[:dataset_k]-1) unless self[:dataset_k].nil?
|
386
|
-
@objects[:filtered_datasets] = ds
|
387
|
-
end
|
388
|
-
|
389
|
-
def load_result
|
390
|
-
return @objects[:result] unless @objects[:result].nil?
|
391
|
-
ensure_par(result: '-r')
|
392
|
-
obj = load_project_or_dataset
|
393
|
-
if obj.class.RESULT_DIRS[self[:result]].nil?
|
394
|
-
raise "Unsupported result for #{obj.class.to_s.gsub(/.*::/,'')}: #{self[:result]}"
|
395
|
-
end
|
396
|
-
r = obj.add_result(self[:result], false)
|
397
|
-
raise "Cannot load result: #{self[:result]}" if r.nil?
|
398
|
-
@objects[:result] = r
|
399
|
-
end
|
400
|
-
|
401
|
-
def add_metadata(obj, cli = self)
|
402
|
-
cli[:metadata].split(',').each do |pair|
|
403
|
-
(k,v) = pair.split('=')
|
404
|
-
case v
|
405
|
-
when 'true'; v = true
|
406
|
-
when 'false'; v = false
|
407
|
-
when 'nil'; v = nil
|
408
|
-
end
|
409
|
-
if k == '_step'
|
410
|
-
obj.metadata["_try_#{v}"] ||= 0
|
411
|
-
obj.metadata["_try_#{v}"] += 1
|
412
|
-
end
|
413
|
-
obj.metadata[k] = v
|
414
|
-
end unless cli[:metadata].nil?
|
415
|
-
[:type, :name, :user, :description, :comments].each do |k|
|
416
|
-
obj.metadata[k] = cli[k] unless cli[k].nil?
|
417
|
-
end
|
418
|
-
obj
|
419
|
-
end
|
420
|
-
|
421
241
|
##
|
422
242
|
# Task description
|
423
243
|
def task_description
|
data/lib/miga/cli/action/add.rb
CHANGED
@@ -7,8 +7,10 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
|
|
7
7
|
|
8
8
|
def parse_cli
|
9
9
|
cli.expect_files = true
|
10
|
-
cli.defaults = {
|
11
|
-
|
10
|
+
cli.defaults = {
|
11
|
+
ref: true, ignore_dups: false,
|
12
|
+
regexp: MiGA::Cli.FILE_REGEXP
|
13
|
+
}
|
12
14
|
cli.parse do |opt|
|
13
15
|
opt.separator 'You can create multiple datasets with a single command; ' \
|
14
16
|
'simply pass all the files at the end: {FILES...}'
|
@@ -19,92 +21,53 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
|
|
19
21
|
opt.on(
|
20
22
|
'-q', '--query',
|
21
23
|
'Register the dataset as a query, not a reference dataset'
|
22
|
-
|
24
|
+
) { |v| cli[:ref] = !v }
|
23
25
|
opt.on(
|
24
26
|
'-d', '--description STRING',
|
25
27
|
'Description of the dataset'
|
26
|
-
|
27
|
-
opt.on(
|
28
|
+
) { |v| cli[:description] = v }
|
29
|
+
opt.on(
|
30
|
+
'-c', '--comments STRING',
|
28
31
|
'Comments on the dataset'
|
29
|
-
|
30
|
-
opt.on(
|
32
|
+
) { |v| cli[:comments] = v }
|
33
|
+
opt.on(
|
34
|
+
'-m', '--metadata STRING',
|
31
35
|
'Metadata as key-value pairs separated by = and delimited by comma',
|
32
36
|
'Values are saved as strings except for booleans (true / false) or nil'
|
33
|
-
|
37
|
+
) { |v| cli[:metadata] = v }
|
34
38
|
opt.on(
|
35
39
|
'-R', '--name-regexp REGEXP', Regexp,
|
36
40
|
'Regular expression indicating how to extract the name from the path',
|
37
41
|
"By default: '#{cli[:regexp]}'"
|
38
|
-
|
42
|
+
) { |v| cli[:regexp] = v }
|
43
|
+
opt.on(
|
44
|
+
'--prefix STRING',
|
45
|
+
'Prefix to all the dataset names'
|
46
|
+
) { |v| cli[:prefix] = v }
|
39
47
|
opt.on(
|
40
48
|
'-i', '--input-type STRING',
|
41
49
|
'Type of input data, one of the following:',
|
42
|
-
*self.class.INPUT_TYPES.map{ |k,v| "~ #{k}: #{v[0]}
|
43
|
-
|
50
|
+
*self.class.INPUT_TYPES.map{ |k,v| "~ #{k}: #{v[0]}" }
|
51
|
+
) { |v| cli[:input_type] = v.downcase.to_sym }
|
44
52
|
opt.on(
|
45
53
|
'--ignore-dups',
|
46
54
|
'Continue with a warning if a dataset already exists'
|
47
|
-
|
55
|
+
) { |v| cli[:ignore_dups] = v }
|
48
56
|
end
|
49
57
|
end
|
50
58
|
|
51
59
|
def perform
|
52
60
|
p = cli.load_project
|
53
|
-
files =
|
54
|
-
file_type = nil
|
55
|
-
if files.empty?
|
56
|
-
cli.ensure_par({dataset: '-D'},
|
57
|
-
'dataset is mandatory (-D) unless files are provided')
|
58
|
-
cli.ensure_type(Dataset)
|
59
|
-
files = [nil]
|
60
|
-
else
|
61
|
-
raise 'Please specify input type (-i).' if cli[:input_type].nil?
|
62
|
-
file_type = self.class.INPUT_TYPES[cli[:input_type]]
|
63
|
-
raise "Unrecognized input type: #{cli[:input_type]}." if file_type.nil?
|
64
|
-
raise 'Some files are duplicated, files must be unique.' if
|
65
|
-
files.size != files.uniq.size
|
66
|
-
if cli[:input_type].to_s =~ /_paired$/
|
67
|
-
raise 'Odd number of files incompatible with input type.' if files.size.odd?
|
68
|
-
files = Hash[*files].to_a
|
69
|
-
else
|
70
|
-
files = files.map{ |i| [i] }
|
71
|
-
end
|
72
|
-
raise 'The dataset name (-D) can only be specified with one input file.' if
|
73
|
-
files.size > 1 && !cli[:dataset].nil?
|
74
|
-
end
|
61
|
+
files, file_type = get_files_and_type
|
75
62
|
|
76
63
|
cli.say 'Creating datasets:'
|
77
64
|
files.each do |file|
|
78
|
-
|
79
|
-
if
|
80
|
-
|
81
|
-
m = cli[:regexp].match(ref_file)
|
82
|
-
raise "Cannot extract name from file: #{ref_file}" if m.nil? or m[1].nil?
|
83
|
-
name = m[1].miga_name
|
84
|
-
end
|
85
|
-
if Dataset.exist?(p, name)
|
86
|
-
msg = "Dataset already exists: #{name}."
|
87
|
-
cli[:ignore_dups] ? (warn(msg); next) : raise(msg)
|
88
|
-
end
|
89
|
-
|
90
|
-
cli.say "o #{name}"
|
91
|
-
d = Dataset.new(p, name, cli[:ref])
|
92
|
-
raise "Unexpected: Couldn't create dataset: #{name}." if d.nil?
|
93
|
-
|
94
|
-
unless file.nil?
|
95
|
-
r_dir = Dataset.RESULT_DIRS[ file_type[1] ]
|
96
|
-
r_path = File.expand_path("data/#{r_dir}/#{d.name}", p.path)
|
97
|
-
file_type[2].each_with_index do |ext, i|
|
98
|
-
gz = file[i] =~ /\.gz/ ? '.gz' : ''
|
99
|
-
FileUtils.cp(file[i], "#{r_path}#{ext}#{gz}")
|
100
|
-
cli.say " file: #{file[i]}"
|
101
|
-
end
|
102
|
-
File.open("#{r_path}.done", 'w') { |f| f.print Time.now.to_s }
|
103
|
-
end
|
104
|
-
|
65
|
+
d = create_dataset(file, p)
|
66
|
+
next if d.nil?
|
67
|
+
copy_file_to_project(file, file_type, d, p)
|
105
68
|
d = cli.add_metadata(d)
|
106
69
|
d.save
|
107
|
-
p.add_dataset(name)
|
70
|
+
p.add_dataset(d.name)
|
108
71
|
res = d.first_preprocessing(true)
|
109
72
|
cli.say " result: #{res}"
|
110
73
|
end
|
@@ -136,4 +99,70 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
|
|
136
99
|
@@INPUT_TYPES
|
137
100
|
end
|
138
101
|
end
|
102
|
+
|
103
|
+
private
|
104
|
+
|
105
|
+
def get_files_and_type
|
106
|
+
files = cli.files
|
107
|
+
file_type = nil
|
108
|
+
if files.empty?
|
109
|
+
cli.ensure_par({dataset: '-D'},
|
110
|
+
'dataset is mandatory (-D) unless files are provided')
|
111
|
+
cli.ensure_type(Dataset)
|
112
|
+
files = [nil]
|
113
|
+
else
|
114
|
+
raise 'Please specify input type (-i).' if cli[:input_type].nil?
|
115
|
+
file_type = self.class.INPUT_TYPES[cli[:input_type]]
|
116
|
+
raise "Unrecognized input type: #{cli[:input_type]}." if file_type.nil?
|
117
|
+
raise 'Some files are duplicated, files must be unique.' if
|
118
|
+
files.size != files.uniq.size
|
119
|
+
if cli[:input_type].to_s =~ /_paired$/
|
120
|
+
if files.size.odd?
|
121
|
+
raise 'Odd number of files incompatible with input type.'
|
122
|
+
end
|
123
|
+
files = Hash[*files].to_a
|
124
|
+
else
|
125
|
+
files = files.map{ |i| [i] }
|
126
|
+
end
|
127
|
+
if files.size > 1 && !cli[:dataset].nil?
|
128
|
+
raise 'The dataset name (-D) can only be specified with one input file.'
|
129
|
+
end
|
130
|
+
end
|
131
|
+
[files, file_type]
|
132
|
+
end
|
133
|
+
|
134
|
+
def create_dataset(file, p)
|
135
|
+
name = cli[:dataset]
|
136
|
+
if name.nil?
|
137
|
+
ref_file = file.is_a?(Array) ? file.first : file
|
138
|
+
m = cli[:regexp].match(ref_file)
|
139
|
+
raise "Cannot extract name from file: #{ref_file}" if m.nil? or m[1].nil?
|
140
|
+
name = cli[:prefix].to_s + m[1].miga_name
|
141
|
+
end
|
142
|
+
if Dataset.exist?(p, name)
|
143
|
+
msg = "Dataset already exists: #{name}."
|
144
|
+
if cli[:ignore_dups]
|
145
|
+
warn(msg)
|
146
|
+
return nil
|
147
|
+
else
|
148
|
+
raise(msg)
|
149
|
+
end
|
150
|
+
end
|
151
|
+
cli.say "o #{name}"
|
152
|
+
d = Dataset.new(p, name, cli[:ref])
|
153
|
+
raise "Unexpected: Couldn't create dataset: #{name}." if d.nil?
|
154
|
+
d
|
155
|
+
end
|
156
|
+
|
157
|
+
def copy_file_to_project(file, file_type, d, p)
|
158
|
+
return if file.nil?
|
159
|
+
r_dir = Dataset.RESULT_DIRS[ file_type[1] ]
|
160
|
+
r_path = File.expand_path("data/#{r_dir}/#{d.name}", p.path)
|
161
|
+
file_type[2].each_with_index do |ext, i|
|
162
|
+
gz = file[i] =~ /\.gz/ ? '.gz' : ''
|
163
|
+
FileUtils.cp(file[i], "#{r_path}#{ext}#{gz}")
|
164
|
+
cli.say " file: #{file[i]}"
|
165
|
+
end
|
166
|
+
File.open("#{r_path}.done", 'w') { |f| f.print Time.now.to_s }
|
167
|
+
end
|
139
168
|
end
|