miga-base 0.4.3.0 → 0.5.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/miga/cli.rb +43 -223
- data/lib/miga/cli/action/add.rb +91 -62
- data/lib/miga/cli/action/classify_wf.rb +97 -0
- data/lib/miga/cli/action/daemon.rb +14 -10
- data/lib/miga/cli/action/derep_wf.rb +95 -0
- data/lib/miga/cli/action/doctor.rb +83 -55
- data/lib/miga/cli/action/get.rb +68 -52
- data/lib/miga/cli/action/get_db.rb +206 -0
- data/lib/miga/cli/action/index_wf.rb +31 -0
- data/lib/miga/cli/action/init.rb +115 -190
- data/lib/miga/cli/action/init/daemon_helper.rb +124 -0
- data/lib/miga/cli/action/ls.rb +20 -11
- data/lib/miga/cli/action/ncbi_get.rb +199 -157
- data/lib/miga/cli/action/preproc_wf.rb +46 -0
- data/lib/miga/cli/action/quality_wf.rb +45 -0
- data/lib/miga/cli/action/stats.rb +147 -99
- data/lib/miga/cli/action/summary.rb +10 -4
- data/lib/miga/cli/action/tax_dist.rb +61 -46
- data/lib/miga/cli/action/tax_test.rb +46 -39
- data/lib/miga/cli/action/wf.rb +178 -0
- data/lib/miga/cli/base.rb +11 -0
- data/lib/miga/cli/objects_helper.rb +88 -0
- data/lib/miga/cli/opt_helper.rb +160 -0
- data/lib/miga/daemon.rb +7 -4
- data/lib/miga/dataset/base.rb +5 -5
- data/lib/miga/project/base.rb +4 -4
- data/lib/miga/project/result.rb +2 -1
- data/lib/miga/remote_dataset/base.rb +5 -5
- data/lib/miga/remote_dataset/download.rb +1 -1
- data/lib/miga/version.rb +3 -3
- data/scripts/cds.bash +3 -1
- data/scripts/essential_genes.bash +1 -0
- data/scripts/stats.bash +1 -1
- data/scripts/trimmed_fasta.bash +5 -3
- data/utils/distance/runner.rb +3 -0
- data/utils/distance/temporal.rb +10 -1
- data/utils/enveomics/Manifest/Tasks/fasta.json +5 -0
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +7 -0
- data/utils/enveomics/Scripts/BlastTab.addlen.rb +33 -31
- data/utils/enveomics/Scripts/FastA.tag.rb +42 -41
- data/utils/enveomics/Scripts/HMM.essential.rb +85 -55
- data/utils/enveomics/Scripts/HMM.haai.rb +29 -20
- data/utils/enveomics/Scripts/SRA.download.bash +1 -1
- data/utils/enveomics/Scripts/aai.rb +163 -128
- data/utils/enveomics/build_enveomics_r.bash +11 -10
- data/utils/enveomics/enveomics.R/DESCRIPTION +3 -2
- data/utils/enveomics/enveomics.R/R/autoprune.R +141 -107
- data/utils/enveomics/enveomics.R/R/barplot.R +105 -86
- data/utils/enveomics/enveomics.R/R/cliopts.R +131 -115
- data/utils/enveomics/enveomics.R/R/df2dist.R +144 -106
- data/utils/enveomics/enveomics.R/R/growthcurve.R +201 -133
- data/utils/enveomics/enveomics.R/R/recplot.R +350 -315
- data/utils/enveomics/enveomics.R/R/recplot2.R +1334 -914
- data/utils/enveomics/enveomics.R/R/tribs.R +521 -361
- data/utils/enveomics/enveomics.R/R/utils.R +31 -15
- data/utils/enveomics/enveomics.R/README.md +7 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +16 -21
- data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +31 -28
- data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -19
- data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +36 -26
- data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -24
- data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -24
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +32 -33
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +91 -64
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +57 -37
- data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -19
- data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +39 -26
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +38 -25
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +40 -26
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +67 -49
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +37 -28
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +122 -97
- data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +35 -31
- data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -23
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +68 -51
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -22
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -20
- data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +41 -32
- data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +40 -34
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -20
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -20
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -29
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +41 -42
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +43 -33
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +36 -28
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +74 -56
- data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +44 -31
- data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -22
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +32 -26
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +59 -44
- data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -21
- data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -22
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +63 -43
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +38 -29
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +38 -30
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +111 -83
- data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -18
- data/utils/find-medoid.R +3 -2
- data/utils/representatives.rb +5 -3
- data/utils/subclade/pipeline.rb +22 -11
- data/utils/subclade/runner.rb +5 -1
- data/utils/subclades-compile.rb +1 -1
- data/utils/subclades.R +9 -3
- metadata +15 -4
- data/utils/enveomics/enveomics.R/man/enveomics.R-package.Rd +0 -15
- data/utils/enveomics/enveomics.R/man/z$-methods.Rd +0 -26
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e370d282f1b28480765e1b91fcb7d8921d12baa31d22db1318975a1c2a79e19a
|
4
|
+
data.tar.gz: e7fb3941fd3381e0e9696a2c577aeb157657335e56434e7c6d6650be7ba45e98
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4642a212e1b4021e211fd144b515ff49e9ddb7a9b2292430553307a7ae165e4d8d5e6fd8426757f15ea6e70f4c3efbb055e0439497172cc1f91186d522c82635
|
7
|
+
data.tar.gz: 8d5d3ded3c03e56505572102110a4bca4b84d06b2e73bcf208856610a4cd6e60092ce6d54d47dcef2c8acf85f1cce5f8461097e8699344df6738ed8493215112
|
data/README.md
CHANGED
@@ -23,7 +23,7 @@ For additional information on MiGA, visit:
|
|
23
23
|
If you're like us, you probably want to see sofware in action from the get go.
|
24
24
|
You have two options:
|
25
25
|
|
26
|
-
1. Get a
|
26
|
+
1. Get a peek on MiGA using [MiGA Online][miga-online].
|
27
27
|
2. Install the [requirements](manual/part2/requirements.md) and follow the
|
28
28
|
[installation instructions](manual/part2/installation.md). Once you have MiGA
|
29
29
|
installed, you can [deploy some examples](manual/part4.md).
|
data/lib/miga/cli.rb
CHANGED
@@ -9,7 +9,11 @@ require 'optparse'
|
|
9
9
|
class MiGA::Cli < MiGA::MiGA
|
10
10
|
|
11
11
|
require 'miga/cli/base'
|
12
|
+
require 'miga/cli/opt_helper'
|
13
|
+
require 'miga/cli/objects_helper'
|
12
14
|
require 'miga/cli/action'
|
15
|
+
include MiGA::Cli::OptHelper
|
16
|
+
include MiGA::Cli::ObjectsHelper
|
13
17
|
|
14
18
|
##
|
15
19
|
# Task to execute, a symbol
|
@@ -71,41 +75,68 @@ class MiGA::Cli < MiGA::MiGA
|
|
71
75
|
end
|
72
76
|
|
73
77
|
##
|
74
|
-
#
|
78
|
+
# Print +par+, ensuring new line at the end.
|
79
|
+
# If the first parameter is +IO+, the output is sent there,
|
80
|
+
# otherwise it's sent to +$stdout+
|
75
81
|
def puts(*par)
|
76
|
-
|
82
|
+
io = par.first.is_a?(IO) ? par.shift : $stdout
|
83
|
+
io.puts(*par)
|
77
84
|
end
|
78
85
|
|
79
86
|
##
|
80
|
-
#
|
87
|
+
# Print +par+.
|
88
|
+
# If the first parameter is +IO+, the output is sent there,
|
89
|
+
# otherwise it's sent to +$stdout+
|
81
90
|
def print(*par)
|
82
|
-
|
91
|
+
io = par.first.is_a?(IO) ? par.shift : $stdout
|
92
|
+
io.print(*par)
|
83
93
|
end
|
84
94
|
|
85
95
|
##
|
86
|
-
# Display a table with headers +header+ and contents +values+, both Array
|
87
|
-
|
88
|
-
|
96
|
+
# Display a table with headers +header+ and contents +values+, both Array.
|
97
|
+
# The output is printed to +io+
|
98
|
+
def table(header, values, io = $stdout)
|
99
|
+
self.puts(io, MiGA.tabulate(header, values, self[:tabular]))
|
89
100
|
end
|
90
101
|
|
91
102
|
##
|
92
|
-
#
|
103
|
+
# Print +par+ ensuring new line at the end, iff --verbose.
|
93
104
|
# Date/time each line.
|
105
|
+
# If the first parameter is +IO+, the output is sent there,
|
106
|
+
# otherwise it's sent to +$stderr+
|
94
107
|
def say(*par)
|
95
108
|
return unless self[:verbose]
|
96
109
|
par.map! { |i| "[#{Time.now}] #{i}" }
|
97
|
-
|
110
|
+
io = par.first.is_a?(IO) ? par.shift : $stderr
|
111
|
+
io.puts(*par)
|
98
112
|
end
|
99
113
|
|
100
114
|
##
|
101
|
-
# Reports the advance of a task at +step+ (String), the +n+ out of +total
|
115
|
+
# Reports the advance of a task at +step+ (String), the +n+ out of +total+.
|
116
|
+
# The advance is reported in powers of 1,024 if +bin+ is true, or powers of
|
117
|
+
# 1,000 otherwise.
|
102
118
|
# The report goes to $stderr iff --verborse
|
103
|
-
def advance(step, n = 0, total = nil)
|
119
|
+
def advance(step, n = 0, total = nil, bin = true)
|
104
120
|
return unless self[:verbose]
|
105
|
-
adv = total.nil? ? '' :
|
121
|
+
adv = total.nil? ? '' :
|
122
|
+
('%.1f%% (%s/%s)' % [100 * n / total,
|
123
|
+
num_suffix(n, bin), num_suffix(total, bin)])
|
106
124
|
$stderr.print("[%s] %s %s \r" % [Time.now, step, adv])
|
107
125
|
end
|
108
126
|
|
127
|
+
def num_suffix(n, bin = false)
|
128
|
+
p = ''
|
129
|
+
{T: 4, G: 3, M: 2, K: 1}.each do |k,x|
|
130
|
+
v = (bin ? 1024 : 1e3) ** x
|
131
|
+
if n > v
|
132
|
+
n = '%.1f' % (n / v)
|
133
|
+
p = k
|
134
|
+
break
|
135
|
+
end
|
136
|
+
end
|
137
|
+
"#{n}#{p}"
|
138
|
+
end
|
139
|
+
|
109
140
|
##
|
110
141
|
# Ask a question +question+ to the user (requires +#interactive = true+)
|
111
142
|
# The +default+ is used if the answer is empty
|
@@ -188,135 +219,6 @@ class MiGA::Cli < MiGA::MiGA
|
|
188
219
|
end
|
189
220
|
end
|
190
221
|
|
191
|
-
##
|
192
|
-
# Send MiGA's banner to OptionParser +opt+
|
193
|
-
def banner(opt)
|
194
|
-
usage = "Usage: miga #{action.name}"
|
195
|
-
usage += ' {operation}' if expect_operation
|
196
|
-
usage += ' [options]'
|
197
|
-
usage += ' {FILES...}' if expect_files
|
198
|
-
opt.banner = "\n#{task_description}\n\n#{usage}\n"
|
199
|
-
opt.separator ''
|
200
|
-
end
|
201
|
-
|
202
|
-
##
|
203
|
-
# Common options at the end of most actions, passed to OptionParser +opt+
|
204
|
-
# No action is performed if +#opt_common = false+ is passed
|
205
|
-
# Executes only once, unless +#opt_common = true+ is passed between calls
|
206
|
-
def opt_common(opt)
|
207
|
-
return unless @opt_common
|
208
|
-
opt.on(
|
209
|
-
'--auto',
|
210
|
-
'Accept all defaults as answers'
|
211
|
-
){ |v| cli[:auto] = v } if interactive
|
212
|
-
opt.on(
|
213
|
-
'-v', '--verbose',
|
214
|
-
'Print additional information to STDERR'
|
215
|
-
){ |v| self[:verbose] = v }
|
216
|
-
opt.on(
|
217
|
-
'-d', '--debug INT', Integer,
|
218
|
-
'Print debugging information to STDERR (1: debug, 2: trace)'
|
219
|
-
){ |v| (v > 1) ? MiGA.DEBUG_TRACE_ON : MiGA.DEBUG_ON }
|
220
|
-
opt.on(
|
221
|
-
'-h', '--help',
|
222
|
-
'Display this screen'
|
223
|
-
){ puts opt ; exit }
|
224
|
-
opt.separator ''
|
225
|
-
self.opt_common = false
|
226
|
-
end
|
227
|
-
|
228
|
-
##
|
229
|
-
# Options to load an object passed to OptionParser +opt+, as determined
|
230
|
-
# by +what+ an Array with any combination of:
|
231
|
-
# - :project To require a project
|
232
|
-
# - :dataset To require a dataset
|
233
|
-
# - :dataset_opt To allow (optionally) a dataset
|
234
|
-
# - :dataset_type To allow (optionally) a type of dataset
|
235
|
-
# - :dataset_type_req To require a type of dataset
|
236
|
-
# - :project_type To allow (optionally) a type of project
|
237
|
-
# - :project_type_req To require a type of project
|
238
|
-
# - :result To require a type of project or dataset result
|
239
|
-
# - :result_dataset To require a type of dataset result
|
240
|
-
# - :result_project To require a type of project result
|
241
|
-
def opt_object(opt, what = [:project, :dataset])
|
242
|
-
opt.on(
|
243
|
-
'-P', '--project PATH',
|
244
|
-
'(Mandatory) Path to the project'
|
245
|
-
){ |v| self[:project] = v } if what.include? :project
|
246
|
-
opt.on(
|
247
|
-
'-D', '--dataset STRING',
|
248
|
-
(what.include?(:dataset) ? '(Mandatory) ' : '') + 'Name of the dataset'
|
249
|
-
){ |v| self[:dataset] = v } if what.include? :dataset or
|
250
|
-
what.include? :dataset_opt
|
251
|
-
opt.on(
|
252
|
-
'-D', '--dataset STRING',
|
253
|
-
'Name of the dataset'
|
254
|
-
){ |v| self[:dataset] = v } if what.include? :dataset_opt
|
255
|
-
opt.on(
|
256
|
-
'-t', '--type STRING',
|
257
|
-
(what.include?(:dataset_type_req) ? '(Mandatory) ' : '') +
|
258
|
-
'Type of dataset. Recognized types include:',
|
259
|
-
*Dataset.KNOWN_TYPES.map{ |k,v| "~ #{k}: #{v[:description]}" }
|
260
|
-
){ |v| self[:type] = v.downcase.to_sym } if what.include? :dataset_type or
|
261
|
-
what.include? :dataset_type_req
|
262
|
-
opt.on(
|
263
|
-
'-t', '--type STRING',
|
264
|
-
(what.include?(:project_type_req) ? '(Mandatory) ' : '') +
|
265
|
-
'Type of project. Recognized types include:',
|
266
|
-
*Project.KNOWN_TYPES.map{ |k,v| "~ #{k}: #{v[:description]}"}
|
267
|
-
){ |v| self[:type] = v.downcase.to_sym } if what.include? :project_type or
|
268
|
-
what.include? :project_type_req
|
269
|
-
opt.on(
|
270
|
-
'-r', '--result STRING',
|
271
|
-
'(Mandatory) Name of the result',
|
272
|
-
'Recognized names for dataset-specific results include:',
|
273
|
-
*Dataset.RESULT_DIRS.keys.map{|n| " ~ #{n}"},
|
274
|
-
'Recognized names for project-wide results include:',
|
275
|
-
*Project.RESULT_DIRS.keys.map{|n| " ~ #{n}"}
|
276
|
-
){ |v| self[:result] = v.downcase.to_sym } if what.include? :result
|
277
|
-
opt.on(
|
278
|
-
'-r', '--result STRING',
|
279
|
-
'(Mandatory) Name of the result, one of:',
|
280
|
-
*Dataset.RESULT_DIRS.keys.map{|n| " ~ #{n}"}
|
281
|
-
){ |v| self[:result] = v.downcase.to_sym } if what.include? :result_dataset
|
282
|
-
opt.on(
|
283
|
-
'-r', '--result STRING',
|
284
|
-
'(Mandatory) Name of the result, one of:',
|
285
|
-
*Project.RESULT_DIRS.keys.map{|n| " ~ #{n}"}
|
286
|
-
){ |v| self[:result] = v.downcase.to_sym } if what.include? :result_project
|
287
|
-
end
|
288
|
-
|
289
|
-
##
|
290
|
-
# Options to filter a list of datasets passed to OptionParser +opt+,
|
291
|
-
# as determined by +what+ an Array with any combination of:
|
292
|
-
# - :ref To filter by reference (--ref) or query (--no-ref)
|
293
|
-
# - :multi To filter by multiple (--multi) or single (--no-multi) species
|
294
|
-
# - :active To filter by active (--active) or inactive (--no-active)
|
295
|
-
# - :taxonomy To filter by taxonomy (--taxonomy)
|
296
|
-
# The "k-th" filter (--dataset-k) is always included
|
297
|
-
def opt_filter_datasets(opt, what = [:ref, :multi, :active, :taxonomy])
|
298
|
-
opt.on(
|
299
|
-
'--[no-]ref',
|
300
|
-
'Use only reference (or only non-reference) datasets'
|
301
|
-
){ |v| self[:ref] = v } if what.include? :ref
|
302
|
-
opt.on(
|
303
|
-
'--[no-]multi',
|
304
|
-
'Use only multi-species (or only single-species) datasets'
|
305
|
-
){ |v| self[:multi] = v } if what.include? :multi
|
306
|
-
opt.on(
|
307
|
-
'--[no-]active',
|
308
|
-
'Use only active (or inactive) datasets'
|
309
|
-
){ |v| self[:active] = v } if what.include? :active
|
310
|
-
opt.on(
|
311
|
-
'-t', '--taxonomy RANK:TAXON',
|
312
|
-
'Filter by taxonomy'
|
313
|
-
){ |v| self[:taxonomy] = Taxonomy.new(v) } if what.include? :taxonomy
|
314
|
-
opt.on(
|
315
|
-
'--dataset-k INTEGER', Integer,
|
316
|
-
'Use only the k-th dataset in the list'
|
317
|
-
){ |v| self[:dataset_k] = v }
|
318
|
-
end
|
319
|
-
|
320
222
|
##
|
321
223
|
# Ensure that these parameters have been passed to the CLI, as defined by
|
322
224
|
# +par+, a Hash with object names as keys and parameter flag as values.
|
@@ -336,88 +238,6 @@ class MiGA::Cli < MiGA::MiGA
|
|
336
238
|
end
|
337
239
|
end
|
338
240
|
|
339
|
-
##
|
340
|
-
# Get the project defined in the CLI by parameter +name+ and +flag+
|
341
|
-
def load_project(name = :project, flag = '-P')
|
342
|
-
return @objects[name] unless @objects[name].nil?
|
343
|
-
ensure_par(name => flag)
|
344
|
-
say "Loading project: #{self[name]}"
|
345
|
-
@objects[name] = Project.load(self[name])
|
346
|
-
raise "Cannot load project: #{self[name]}" if @objects[name].nil?
|
347
|
-
@objects[name]
|
348
|
-
end
|
349
|
-
|
350
|
-
##
|
351
|
-
# Load the dataset defined in the CLI
|
352
|
-
# If +silent=true+, it allows failures silently
|
353
|
-
def load_dataset(silent = false)
|
354
|
-
return @objects[:dataset] unless @objects[:dataset].nil?
|
355
|
-
ensure_par(dataset: '-D')
|
356
|
-
@objects[:dataset] = load_project.dataset(self[:dataset])
|
357
|
-
if !silent && @objects[:dataset].nil?
|
358
|
-
raise "Cannot load dataset: #{self[:dataset]}"
|
359
|
-
end
|
360
|
-
return @objects[:dataset]
|
361
|
-
end
|
362
|
-
|
363
|
-
##
|
364
|
-
# Load an a project or (if defined) a dataset
|
365
|
-
def load_project_or_dataset
|
366
|
-
self[:dataset].nil? ? load_project : load_dataset
|
367
|
-
end
|
368
|
-
|
369
|
-
##
|
370
|
-
# Load and filter a list of datasets as requested in the CLI
|
371
|
-
# If +silent=true+, it allows failures silently
|
372
|
-
def load_and_filter_datasets(silent = false)
|
373
|
-
return @objects[:filtered_datasets] unless @objects[:filtered_datasets].nil?
|
374
|
-
say 'Listing datasets'
|
375
|
-
ds = self[:dataset].nil? ?
|
376
|
-
load_project.datasets : [load_dataset(silent)].compact
|
377
|
-
ds.select! { |d| d.is_ref? == self[:ref] } unless self[:ref].nil?
|
378
|
-
ds.select! { |d| d.is_active? == self[:active] } unless self[:active].nil?
|
379
|
-
ds.select! do |d|
|
380
|
-
self[:multi] ? d.is_multi? : d.is_nonmulti?
|
381
|
-
end unless self[:multi].nil?
|
382
|
-
ds.select! do |d|
|
383
|
-
(not d.metadata[:tax].nil?) && d.metadata[:tax].in?(self[:taxonomy])
|
384
|
-
end unless self[:taxonomy].nil?
|
385
|
-
ds = ds.values_at(self[:dataset_k]-1) unless self[:dataset_k].nil?
|
386
|
-
@objects[:filtered_datasets] = ds
|
387
|
-
end
|
388
|
-
|
389
|
-
def load_result
|
390
|
-
return @objects[:result] unless @objects[:result].nil?
|
391
|
-
ensure_par(result: '-r')
|
392
|
-
obj = load_project_or_dataset
|
393
|
-
if obj.class.RESULT_DIRS[self[:result]].nil?
|
394
|
-
raise "Unsupported result for #{obj.class.to_s.gsub(/.*::/,'')}: #{self[:result]}"
|
395
|
-
end
|
396
|
-
r = obj.add_result(self[:result], false)
|
397
|
-
raise "Cannot load result: #{self[:result]}" if r.nil?
|
398
|
-
@objects[:result] = r
|
399
|
-
end
|
400
|
-
|
401
|
-
def add_metadata(obj, cli = self)
|
402
|
-
cli[:metadata].split(',').each do |pair|
|
403
|
-
(k,v) = pair.split('=')
|
404
|
-
case v
|
405
|
-
when 'true'; v = true
|
406
|
-
when 'false'; v = false
|
407
|
-
when 'nil'; v = nil
|
408
|
-
end
|
409
|
-
if k == '_step'
|
410
|
-
obj.metadata["_try_#{v}"] ||= 0
|
411
|
-
obj.metadata["_try_#{v}"] += 1
|
412
|
-
end
|
413
|
-
obj.metadata[k] = v
|
414
|
-
end unless cli[:metadata].nil?
|
415
|
-
[:type, :name, :user, :description, :comments].each do |k|
|
416
|
-
obj.metadata[k] = cli[k] unless cli[k].nil?
|
417
|
-
end
|
418
|
-
obj
|
419
|
-
end
|
420
|
-
|
421
241
|
##
|
422
242
|
# Task description
|
423
243
|
def task_description
|
data/lib/miga/cli/action/add.rb
CHANGED
@@ -7,8 +7,10 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
|
|
7
7
|
|
8
8
|
def parse_cli
|
9
9
|
cli.expect_files = true
|
10
|
-
cli.defaults = {
|
11
|
-
|
10
|
+
cli.defaults = {
|
11
|
+
ref: true, ignore_dups: false,
|
12
|
+
regexp: MiGA::Cli.FILE_REGEXP
|
13
|
+
}
|
12
14
|
cli.parse do |opt|
|
13
15
|
opt.separator 'You can create multiple datasets with a single command; ' \
|
14
16
|
'simply pass all the files at the end: {FILES...}'
|
@@ -19,92 +21,53 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
|
|
19
21
|
opt.on(
|
20
22
|
'-q', '--query',
|
21
23
|
'Register the dataset as a query, not a reference dataset'
|
22
|
-
|
24
|
+
) { |v| cli[:ref] = !v }
|
23
25
|
opt.on(
|
24
26
|
'-d', '--description STRING',
|
25
27
|
'Description of the dataset'
|
26
|
-
|
27
|
-
opt.on(
|
28
|
+
) { |v| cli[:description] = v }
|
29
|
+
opt.on(
|
30
|
+
'-c', '--comments STRING',
|
28
31
|
'Comments on the dataset'
|
29
|
-
|
30
|
-
opt.on(
|
32
|
+
) { |v| cli[:comments] = v }
|
33
|
+
opt.on(
|
34
|
+
'-m', '--metadata STRING',
|
31
35
|
'Metadata as key-value pairs separated by = and delimited by comma',
|
32
36
|
'Values are saved as strings except for booleans (true / false) or nil'
|
33
|
-
|
37
|
+
) { |v| cli[:metadata] = v }
|
34
38
|
opt.on(
|
35
39
|
'-R', '--name-regexp REGEXP', Regexp,
|
36
40
|
'Regular expression indicating how to extract the name from the path',
|
37
41
|
"By default: '#{cli[:regexp]}'"
|
38
|
-
|
42
|
+
) { |v| cli[:regexp] = v }
|
43
|
+
opt.on(
|
44
|
+
'--prefix STRING',
|
45
|
+
'Prefix to all the dataset names'
|
46
|
+
) { |v| cli[:prefix] = v }
|
39
47
|
opt.on(
|
40
48
|
'-i', '--input-type STRING',
|
41
49
|
'Type of input data, one of the following:',
|
42
|
-
*self.class.INPUT_TYPES.map{ |k,v| "~ #{k}: #{v[0]}
|
43
|
-
|
50
|
+
*self.class.INPUT_TYPES.map{ |k,v| "~ #{k}: #{v[0]}" }
|
51
|
+
) { |v| cli[:input_type] = v.downcase.to_sym }
|
44
52
|
opt.on(
|
45
53
|
'--ignore-dups',
|
46
54
|
'Continue with a warning if a dataset already exists'
|
47
|
-
|
55
|
+
) { |v| cli[:ignore_dups] = v }
|
48
56
|
end
|
49
57
|
end
|
50
58
|
|
51
59
|
def perform
|
52
60
|
p = cli.load_project
|
53
|
-
files =
|
54
|
-
file_type = nil
|
55
|
-
if files.empty?
|
56
|
-
cli.ensure_par({dataset: '-D'},
|
57
|
-
'dataset is mandatory (-D) unless files are provided')
|
58
|
-
cli.ensure_type(Dataset)
|
59
|
-
files = [nil]
|
60
|
-
else
|
61
|
-
raise 'Please specify input type (-i).' if cli[:input_type].nil?
|
62
|
-
file_type = self.class.INPUT_TYPES[cli[:input_type]]
|
63
|
-
raise "Unrecognized input type: #{cli[:input_type]}." if file_type.nil?
|
64
|
-
raise 'Some files are duplicated, files must be unique.' if
|
65
|
-
files.size != files.uniq.size
|
66
|
-
if cli[:input_type].to_s =~ /_paired$/
|
67
|
-
raise 'Odd number of files incompatible with input type.' if files.size.odd?
|
68
|
-
files = Hash[*files].to_a
|
69
|
-
else
|
70
|
-
files = files.map{ |i| [i] }
|
71
|
-
end
|
72
|
-
raise 'The dataset name (-D) can only be specified with one input file.' if
|
73
|
-
files.size > 1 && !cli[:dataset].nil?
|
74
|
-
end
|
61
|
+
files, file_type = get_files_and_type
|
75
62
|
|
76
63
|
cli.say 'Creating datasets:'
|
77
64
|
files.each do |file|
|
78
|
-
|
79
|
-
if
|
80
|
-
|
81
|
-
m = cli[:regexp].match(ref_file)
|
82
|
-
raise "Cannot extract name from file: #{ref_file}" if m.nil? or m[1].nil?
|
83
|
-
name = m[1].miga_name
|
84
|
-
end
|
85
|
-
if Dataset.exist?(p, name)
|
86
|
-
msg = "Dataset already exists: #{name}."
|
87
|
-
cli[:ignore_dups] ? (warn(msg); next) : raise(msg)
|
88
|
-
end
|
89
|
-
|
90
|
-
cli.say "o #{name}"
|
91
|
-
d = Dataset.new(p, name, cli[:ref])
|
92
|
-
raise "Unexpected: Couldn't create dataset: #{name}." if d.nil?
|
93
|
-
|
94
|
-
unless file.nil?
|
95
|
-
r_dir = Dataset.RESULT_DIRS[ file_type[1] ]
|
96
|
-
r_path = File.expand_path("data/#{r_dir}/#{d.name}", p.path)
|
97
|
-
file_type[2].each_with_index do |ext, i|
|
98
|
-
gz = file[i] =~ /\.gz/ ? '.gz' : ''
|
99
|
-
FileUtils.cp(file[i], "#{r_path}#{ext}#{gz}")
|
100
|
-
cli.say " file: #{file[i]}"
|
101
|
-
end
|
102
|
-
File.open("#{r_path}.done", 'w') { |f| f.print Time.now.to_s }
|
103
|
-
end
|
104
|
-
|
65
|
+
d = create_dataset(file, p)
|
66
|
+
next if d.nil?
|
67
|
+
copy_file_to_project(file, file_type, d, p)
|
105
68
|
d = cli.add_metadata(d)
|
106
69
|
d.save
|
107
|
-
p.add_dataset(name)
|
70
|
+
p.add_dataset(d.name)
|
108
71
|
res = d.first_preprocessing(true)
|
109
72
|
cli.say " result: #{res}"
|
110
73
|
end
|
@@ -136,4 +99,70 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
|
|
136
99
|
@@INPUT_TYPES
|
137
100
|
end
|
138
101
|
end
|
102
|
+
|
103
|
+
private
|
104
|
+
|
105
|
+
def get_files_and_type
|
106
|
+
files = cli.files
|
107
|
+
file_type = nil
|
108
|
+
if files.empty?
|
109
|
+
cli.ensure_par({dataset: '-D'},
|
110
|
+
'dataset is mandatory (-D) unless files are provided')
|
111
|
+
cli.ensure_type(Dataset)
|
112
|
+
files = [nil]
|
113
|
+
else
|
114
|
+
raise 'Please specify input type (-i).' if cli[:input_type].nil?
|
115
|
+
file_type = self.class.INPUT_TYPES[cli[:input_type]]
|
116
|
+
raise "Unrecognized input type: #{cli[:input_type]}." if file_type.nil?
|
117
|
+
raise 'Some files are duplicated, files must be unique.' if
|
118
|
+
files.size != files.uniq.size
|
119
|
+
if cli[:input_type].to_s =~ /_paired$/
|
120
|
+
if files.size.odd?
|
121
|
+
raise 'Odd number of files incompatible with input type.'
|
122
|
+
end
|
123
|
+
files = Hash[*files].to_a
|
124
|
+
else
|
125
|
+
files = files.map{ |i| [i] }
|
126
|
+
end
|
127
|
+
if files.size > 1 && !cli[:dataset].nil?
|
128
|
+
raise 'The dataset name (-D) can only be specified with one input file.'
|
129
|
+
end
|
130
|
+
end
|
131
|
+
[files, file_type]
|
132
|
+
end
|
133
|
+
|
134
|
+
def create_dataset(file, p)
|
135
|
+
name = cli[:dataset]
|
136
|
+
if name.nil?
|
137
|
+
ref_file = file.is_a?(Array) ? file.first : file
|
138
|
+
m = cli[:regexp].match(ref_file)
|
139
|
+
raise "Cannot extract name from file: #{ref_file}" if m.nil? or m[1].nil?
|
140
|
+
name = cli[:prefix].to_s + m[1].miga_name
|
141
|
+
end
|
142
|
+
if Dataset.exist?(p, name)
|
143
|
+
msg = "Dataset already exists: #{name}."
|
144
|
+
if cli[:ignore_dups]
|
145
|
+
warn(msg)
|
146
|
+
return nil
|
147
|
+
else
|
148
|
+
raise(msg)
|
149
|
+
end
|
150
|
+
end
|
151
|
+
cli.say "o #{name}"
|
152
|
+
d = Dataset.new(p, name, cli[:ref])
|
153
|
+
raise "Unexpected: Couldn't create dataset: #{name}." if d.nil?
|
154
|
+
d
|
155
|
+
end
|
156
|
+
|
157
|
+
def copy_file_to_project(file, file_type, d, p)
|
158
|
+
return if file.nil?
|
159
|
+
r_dir = Dataset.RESULT_DIRS[ file_type[1] ]
|
160
|
+
r_path = File.expand_path("data/#{r_dir}/#{d.name}", p.path)
|
161
|
+
file_type[2].each_with_index do |ext, i|
|
162
|
+
gz = file[i] =~ /\.gz/ ? '.gz' : ''
|
163
|
+
FileUtils.cp(file[i], "#{r_path}#{ext}#{gz}")
|
164
|
+
cli.say " file: #{file[i]}"
|
165
|
+
end
|
166
|
+
File.open("#{r_path}.done", 'w') { |f| f.print Time.now.to_s }
|
167
|
+
end
|
139
168
|
end
|