miga-base 0.4.3.0 → 0.5.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/lib/miga/cli.rb +43 -223
  4. data/lib/miga/cli/action/add.rb +91 -62
  5. data/lib/miga/cli/action/classify_wf.rb +97 -0
  6. data/lib/miga/cli/action/daemon.rb +14 -10
  7. data/lib/miga/cli/action/derep_wf.rb +95 -0
  8. data/lib/miga/cli/action/doctor.rb +83 -55
  9. data/lib/miga/cli/action/get.rb +68 -52
  10. data/lib/miga/cli/action/get_db.rb +206 -0
  11. data/lib/miga/cli/action/index_wf.rb +31 -0
  12. data/lib/miga/cli/action/init.rb +115 -190
  13. data/lib/miga/cli/action/init/daemon_helper.rb +124 -0
  14. data/lib/miga/cli/action/ls.rb +20 -11
  15. data/lib/miga/cli/action/ncbi_get.rb +199 -157
  16. data/lib/miga/cli/action/preproc_wf.rb +46 -0
  17. data/lib/miga/cli/action/quality_wf.rb +45 -0
  18. data/lib/miga/cli/action/stats.rb +147 -99
  19. data/lib/miga/cli/action/summary.rb +10 -4
  20. data/lib/miga/cli/action/tax_dist.rb +61 -46
  21. data/lib/miga/cli/action/tax_test.rb +46 -39
  22. data/lib/miga/cli/action/wf.rb +178 -0
  23. data/lib/miga/cli/base.rb +11 -0
  24. data/lib/miga/cli/objects_helper.rb +88 -0
  25. data/lib/miga/cli/opt_helper.rb +160 -0
  26. data/lib/miga/daemon.rb +7 -4
  27. data/lib/miga/dataset/base.rb +5 -5
  28. data/lib/miga/project/base.rb +4 -4
  29. data/lib/miga/project/result.rb +2 -1
  30. data/lib/miga/remote_dataset/base.rb +5 -5
  31. data/lib/miga/remote_dataset/download.rb +1 -1
  32. data/lib/miga/version.rb +3 -3
  33. data/scripts/cds.bash +3 -1
  34. data/scripts/essential_genes.bash +1 -0
  35. data/scripts/stats.bash +1 -1
  36. data/scripts/trimmed_fasta.bash +5 -3
  37. data/utils/distance/runner.rb +3 -0
  38. data/utils/distance/temporal.rb +10 -1
  39. data/utils/enveomics/Manifest/Tasks/fasta.json +5 -0
  40. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +7 -0
  41. data/utils/enveomics/Scripts/BlastTab.addlen.rb +33 -31
  42. data/utils/enveomics/Scripts/FastA.tag.rb +42 -41
  43. data/utils/enveomics/Scripts/HMM.essential.rb +85 -55
  44. data/utils/enveomics/Scripts/HMM.haai.rb +29 -20
  45. data/utils/enveomics/Scripts/SRA.download.bash +1 -1
  46. data/utils/enveomics/Scripts/aai.rb +163 -128
  47. data/utils/enveomics/build_enveomics_r.bash +11 -10
  48. data/utils/enveomics/enveomics.R/DESCRIPTION +3 -2
  49. data/utils/enveomics/enveomics.R/R/autoprune.R +141 -107
  50. data/utils/enveomics/enveomics.R/R/barplot.R +105 -86
  51. data/utils/enveomics/enveomics.R/R/cliopts.R +131 -115
  52. data/utils/enveomics/enveomics.R/R/df2dist.R +144 -106
  53. data/utils/enveomics/enveomics.R/R/growthcurve.R +201 -133
  54. data/utils/enveomics/enveomics.R/R/recplot.R +350 -315
  55. data/utils/enveomics/enveomics.R/R/recplot2.R +1334 -914
  56. data/utils/enveomics/enveomics.R/R/tribs.R +521 -361
  57. data/utils/enveomics/enveomics.R/R/utils.R +31 -15
  58. data/utils/enveomics/enveomics.R/README.md +7 -0
  59. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +17 -0
  60. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +17 -0
  61. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +17 -0
  62. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +16 -21
  63. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +31 -28
  64. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -19
  65. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +36 -26
  66. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -24
  67. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -24
  68. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +32 -33
  69. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +91 -64
  70. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +57 -37
  71. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -19
  72. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -18
  73. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +39 -26
  74. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +38 -25
  75. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +40 -26
  76. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +67 -49
  77. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +37 -28
  78. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +122 -97
  79. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +35 -31
  80. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -23
  81. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +68 -51
  82. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -24
  83. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -22
  84. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -20
  85. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -18
  86. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +41 -32
  87. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -24
  88. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -18
  89. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +40 -34
  90. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -24
  91. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -20
  92. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -20
  93. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -29
  94. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +41 -42
  95. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -18
  96. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +43 -33
  97. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +36 -28
  98. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +74 -56
  99. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +44 -31
  100. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -22
  101. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +32 -26
  102. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +59 -44
  103. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -21
  104. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -22
  105. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +63 -43
  106. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +38 -29
  107. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +38 -30
  108. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +111 -83
  109. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -18
  110. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -18
  111. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -18
  112. data/utils/find-medoid.R +3 -2
  113. data/utils/representatives.rb +5 -3
  114. data/utils/subclade/pipeline.rb +22 -11
  115. data/utils/subclade/runner.rb +5 -1
  116. data/utils/subclades-compile.rb +1 -1
  117. data/utils/subclades.R +9 -3
  118. metadata +15 -4
  119. data/utils/enveomics/enveomics.R/man/enveomics.R-package.Rd +0 -15
  120. data/utils/enveomics/enveomics.R/man/z$-methods.Rd +0 -26
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f547a13f78444c6d3f7149bb111d3a4444af885ef085fef913c9516fac706215
4
- data.tar.gz: 0ae132567bd8e677bfc7e0bafaaeb28138499b1e720302e47f711364f790f020
3
+ metadata.gz: e370d282f1b28480765e1b91fcb7d8921d12baa31d22db1318975a1c2a79e19a
4
+ data.tar.gz: e7fb3941fd3381e0e9696a2c577aeb157657335e56434e7c6d6650be7ba45e98
5
5
  SHA512:
6
- metadata.gz: f174c56c991a3055d347f0450aca196e8c6f4813bb21d18f0bcaca528df40fe76e4ea497ff36d58886817d26fbcb2281459a5866260040de975022e572f6869b
7
- data.tar.gz: ff4777abf6dab9bd2800e86533840817a77d01b3724ed51d9a77d0dac4ec1506ca407420b615f5e16fd3548af5b58fd56a3e922a448f2d75052d814cdb504ae3
6
+ metadata.gz: 4642a212e1b4021e211fd144b515ff49e9ddb7a9b2292430553307a7ae165e4d8d5e6fd8426757f15ea6e70f4c3efbb055e0439497172cc1f91186d522c82635
7
+ data.tar.gz: 8d5d3ded3c03e56505572102110a4bca4b84d06b2e73bcf208856610a4cd6e60092ce6d54d47dcef2c8acf85f1cce5f8461097e8699344df6738ed8493215112
data/README.md CHANGED
@@ -23,7 +23,7 @@ For additional information on MiGA, visit:
23
23
  If you're like us, you probably want to see sofware in action from the get go.
24
24
  You have two options:
25
25
 
26
- 1. Get a peak on MiGA using [MiGA Online][miga-online].
26
+ 1. Get a peek on MiGA using [MiGA Online][miga-online].
27
27
  2. Install the [requirements](manual/part2/requirements.md) and follow the
28
28
  [installation instructions](manual/part2/installation.md). Once you have MiGA
29
29
  installed, you can [deploy some examples](manual/part4.md).
@@ -9,7 +9,11 @@ require 'optparse'
9
9
  class MiGA::Cli < MiGA::MiGA
10
10
 
11
11
  require 'miga/cli/base'
12
+ require 'miga/cli/opt_helper'
13
+ require 'miga/cli/objects_helper'
12
14
  require 'miga/cli/action'
15
+ include MiGA::Cli::OptHelper
16
+ include MiGA::Cli::ObjectsHelper
13
17
 
14
18
  ##
15
19
  # Task to execute, a symbol
@@ -71,41 +75,68 @@ class MiGA::Cli < MiGA::MiGA
71
75
  end
72
76
 
73
77
  ##
74
- # Send +par+ to $stdout, ensuring new line at the end
78
+ # Print +par+, ensuring new line at the end.
79
+ # If the first parameter is +IO+, the output is sent there,
80
+ # otherwise it's sent to +$stdout+
75
81
  def puts(*par)
76
- $stdout.puts(*par)
82
+ io = par.first.is_a?(IO) ? par.shift : $stdout
83
+ io.puts(*par)
77
84
  end
78
85
 
79
86
  ##
80
- # Send +par+ to $stdout as is
87
+ # Print +par+.
88
+ # If the first parameter is +IO+, the output is sent there,
89
+ # otherwise it's sent to +$stdout+
81
90
  def print(*par)
82
- $stdout.print(*par)
91
+ io = par.first.is_a?(IO) ? par.shift : $stdout
92
+ io.print(*par)
83
93
  end
84
94
 
85
95
  ##
86
- # Display a table with headers +header+ and contents +values+, both Array
87
- def table(header, values)
88
- self.puts MiGA.tabulate(header, values, self[:tabular])
96
+ # Display a table with headers +header+ and contents +values+, both Array.
97
+ # The output is printed to +io+
98
+ def table(header, values, io = $stdout)
99
+ self.puts(io, MiGA.tabulate(header, values, self[:tabular]))
89
100
  end
90
101
 
91
102
  ##
92
- # Send +par+ to $stderr (ensuring new line at the end), iff --verbose.
103
+ # Print +par+ ensuring new line at the end, iff --verbose.
93
104
  # Date/time each line.
105
+ # If the first parameter is +IO+, the output is sent there,
106
+ # otherwise it's sent to +$stderr+
94
107
  def say(*par)
95
108
  return unless self[:verbose]
96
109
  par.map! { |i| "[#{Time.now}] #{i}" }
97
- $stderr.puts(*par)
110
+ io = par.first.is_a?(IO) ? par.shift : $stderr
111
+ io.puts(*par)
98
112
  end
99
113
 
100
114
  ##
101
- # Reports the advance of a task at +step+ (String), the +n+ out of +total+
115
+ # Reports the advance of a task at +step+ (String), the +n+ out of +total+.
116
+ # The advance is reported in powers of 1,024 if +bin+ is true, or powers of
117
+ # 1,000 otherwise.
102
118
  # The report goes to $stderr iff --verborse
103
- def advance(step, n = 0, total = nil)
119
+ def advance(step, n = 0, total = nil, bin = true)
104
120
  return unless self[:verbose]
105
- adv = total.nil? ? '' : ('%.1f%% (%d/%d)' % [n/total, n, total])
121
+ adv = total.nil? ? '' :
122
+ ('%.1f%% (%s/%s)' % [100 * n / total,
123
+ num_suffix(n, bin), num_suffix(total, bin)])
106
124
  $stderr.print("[%s] %s %s \r" % [Time.now, step, adv])
107
125
  end
108
126
 
127
+ def num_suffix(n, bin = false)
128
+ p = ''
129
+ {T: 4, G: 3, M: 2, K: 1}.each do |k,x|
130
+ v = (bin ? 1024 : 1e3) ** x
131
+ if n > v
132
+ n = '%.1f' % (n / v)
133
+ p = k
134
+ break
135
+ end
136
+ end
137
+ "#{n}#{p}"
138
+ end
139
+
109
140
  ##
110
141
  # Ask a question +question+ to the user (requires +#interactive = true+)
111
142
  # The +default+ is used if the answer is empty
@@ -188,135 +219,6 @@ class MiGA::Cli < MiGA::MiGA
188
219
  end
189
220
  end
190
221
 
191
- ##
192
- # Send MiGA's banner to OptionParser +opt+
193
- def banner(opt)
194
- usage = "Usage: miga #{action.name}"
195
- usage += ' {operation}' if expect_operation
196
- usage += ' [options]'
197
- usage += ' {FILES...}' if expect_files
198
- opt.banner = "\n#{task_description}\n\n#{usage}\n"
199
- opt.separator ''
200
- end
201
-
202
- ##
203
- # Common options at the end of most actions, passed to OptionParser +opt+
204
- # No action is performed if +#opt_common = false+ is passed
205
- # Executes only once, unless +#opt_common = true+ is passed between calls
206
- def opt_common(opt)
207
- return unless @opt_common
208
- opt.on(
209
- '--auto',
210
- 'Accept all defaults as answers'
211
- ){ |v| cli[:auto] = v } if interactive
212
- opt.on(
213
- '-v', '--verbose',
214
- 'Print additional information to STDERR'
215
- ){ |v| self[:verbose] = v }
216
- opt.on(
217
- '-d', '--debug INT', Integer,
218
- 'Print debugging information to STDERR (1: debug, 2: trace)'
219
- ){ |v| (v > 1) ? MiGA.DEBUG_TRACE_ON : MiGA.DEBUG_ON }
220
- opt.on(
221
- '-h', '--help',
222
- 'Display this screen'
223
- ){ puts opt ; exit }
224
- opt.separator ''
225
- self.opt_common = false
226
- end
227
-
228
- ##
229
- # Options to load an object passed to OptionParser +opt+, as determined
230
- # by +what+ an Array with any combination of:
231
- # - :project To require a project
232
- # - :dataset To require a dataset
233
- # - :dataset_opt To allow (optionally) a dataset
234
- # - :dataset_type To allow (optionally) a type of dataset
235
- # - :dataset_type_req To require a type of dataset
236
- # - :project_type To allow (optionally) a type of project
237
- # - :project_type_req To require a type of project
238
- # - :result To require a type of project or dataset result
239
- # - :result_dataset To require a type of dataset result
240
- # - :result_project To require a type of project result
241
- def opt_object(opt, what = [:project, :dataset])
242
- opt.on(
243
- '-P', '--project PATH',
244
- '(Mandatory) Path to the project'
245
- ){ |v| self[:project] = v } if what.include? :project
246
- opt.on(
247
- '-D', '--dataset STRING',
248
- (what.include?(:dataset) ? '(Mandatory) ' : '') + 'Name of the dataset'
249
- ){ |v| self[:dataset] = v } if what.include? :dataset or
250
- what.include? :dataset_opt
251
- opt.on(
252
- '-D', '--dataset STRING',
253
- 'Name of the dataset'
254
- ){ |v| self[:dataset] = v } if what.include? :dataset_opt
255
- opt.on(
256
- '-t', '--type STRING',
257
- (what.include?(:dataset_type_req) ? '(Mandatory) ' : '') +
258
- 'Type of dataset. Recognized types include:',
259
- *Dataset.KNOWN_TYPES.map{ |k,v| "~ #{k}: #{v[:description]}" }
260
- ){ |v| self[:type] = v.downcase.to_sym } if what.include? :dataset_type or
261
- what.include? :dataset_type_req
262
- opt.on(
263
- '-t', '--type STRING',
264
- (what.include?(:project_type_req) ? '(Mandatory) ' : '') +
265
- 'Type of project. Recognized types include:',
266
- *Project.KNOWN_TYPES.map{ |k,v| "~ #{k}: #{v[:description]}"}
267
- ){ |v| self[:type] = v.downcase.to_sym } if what.include? :project_type or
268
- what.include? :project_type_req
269
- opt.on(
270
- '-r', '--result STRING',
271
- '(Mandatory) Name of the result',
272
- 'Recognized names for dataset-specific results include:',
273
- *Dataset.RESULT_DIRS.keys.map{|n| " ~ #{n}"},
274
- 'Recognized names for project-wide results include:',
275
- *Project.RESULT_DIRS.keys.map{|n| " ~ #{n}"}
276
- ){ |v| self[:result] = v.downcase.to_sym } if what.include? :result
277
- opt.on(
278
- '-r', '--result STRING',
279
- '(Mandatory) Name of the result, one of:',
280
- *Dataset.RESULT_DIRS.keys.map{|n| " ~ #{n}"}
281
- ){ |v| self[:result] = v.downcase.to_sym } if what.include? :result_dataset
282
- opt.on(
283
- '-r', '--result STRING',
284
- '(Mandatory) Name of the result, one of:',
285
- *Project.RESULT_DIRS.keys.map{|n| " ~ #{n}"}
286
- ){ |v| self[:result] = v.downcase.to_sym } if what.include? :result_project
287
- end
288
-
289
- ##
290
- # Options to filter a list of datasets passed to OptionParser +opt+,
291
- # as determined by +what+ an Array with any combination of:
292
- # - :ref To filter by reference (--ref) or query (--no-ref)
293
- # - :multi To filter by multiple (--multi) or single (--no-multi) species
294
- # - :active To filter by active (--active) or inactive (--no-active)
295
- # - :taxonomy To filter by taxonomy (--taxonomy)
296
- # The "k-th" filter (--dataset-k) is always included
297
- def opt_filter_datasets(opt, what = [:ref, :multi, :active, :taxonomy])
298
- opt.on(
299
- '--[no-]ref',
300
- 'Use only reference (or only non-reference) datasets'
301
- ){ |v| self[:ref] = v } if what.include? :ref
302
- opt.on(
303
- '--[no-]multi',
304
- 'Use only multi-species (or only single-species) datasets'
305
- ){ |v| self[:multi] = v } if what.include? :multi
306
- opt.on(
307
- '--[no-]active',
308
- 'Use only active (or inactive) datasets'
309
- ){ |v| self[:active] = v } if what.include? :active
310
- opt.on(
311
- '-t', '--taxonomy RANK:TAXON',
312
- 'Filter by taxonomy'
313
- ){ |v| self[:taxonomy] = Taxonomy.new(v) } if what.include? :taxonomy
314
- opt.on(
315
- '--dataset-k INTEGER', Integer,
316
- 'Use only the k-th dataset in the list'
317
- ){ |v| self[:dataset_k] = v }
318
- end
319
-
320
222
  ##
321
223
  # Ensure that these parameters have been passed to the CLI, as defined by
322
224
  # +par+, a Hash with object names as keys and parameter flag as values.
@@ -336,88 +238,6 @@ class MiGA::Cli < MiGA::MiGA
336
238
  end
337
239
  end
338
240
 
339
- ##
340
- # Get the project defined in the CLI by parameter +name+ and +flag+
341
- def load_project(name = :project, flag = '-P')
342
- return @objects[name] unless @objects[name].nil?
343
- ensure_par(name => flag)
344
- say "Loading project: #{self[name]}"
345
- @objects[name] = Project.load(self[name])
346
- raise "Cannot load project: #{self[name]}" if @objects[name].nil?
347
- @objects[name]
348
- end
349
-
350
- ##
351
- # Load the dataset defined in the CLI
352
- # If +silent=true+, it allows failures silently
353
- def load_dataset(silent = false)
354
- return @objects[:dataset] unless @objects[:dataset].nil?
355
- ensure_par(dataset: '-D')
356
- @objects[:dataset] = load_project.dataset(self[:dataset])
357
- if !silent && @objects[:dataset].nil?
358
- raise "Cannot load dataset: #{self[:dataset]}"
359
- end
360
- return @objects[:dataset]
361
- end
362
-
363
- ##
364
- # Load an a project or (if defined) a dataset
365
- def load_project_or_dataset
366
- self[:dataset].nil? ? load_project : load_dataset
367
- end
368
-
369
- ##
370
- # Load and filter a list of datasets as requested in the CLI
371
- # If +silent=true+, it allows failures silently
372
- def load_and_filter_datasets(silent = false)
373
- return @objects[:filtered_datasets] unless @objects[:filtered_datasets].nil?
374
- say 'Listing datasets'
375
- ds = self[:dataset].nil? ?
376
- load_project.datasets : [load_dataset(silent)].compact
377
- ds.select! { |d| d.is_ref? == self[:ref] } unless self[:ref].nil?
378
- ds.select! { |d| d.is_active? == self[:active] } unless self[:active].nil?
379
- ds.select! do |d|
380
- self[:multi] ? d.is_multi? : d.is_nonmulti?
381
- end unless self[:multi].nil?
382
- ds.select! do |d|
383
- (not d.metadata[:tax].nil?) && d.metadata[:tax].in?(self[:taxonomy])
384
- end unless self[:taxonomy].nil?
385
- ds = ds.values_at(self[:dataset_k]-1) unless self[:dataset_k].nil?
386
- @objects[:filtered_datasets] = ds
387
- end
388
-
389
- def load_result
390
- return @objects[:result] unless @objects[:result].nil?
391
- ensure_par(result: '-r')
392
- obj = load_project_or_dataset
393
- if obj.class.RESULT_DIRS[self[:result]].nil?
394
- raise "Unsupported result for #{obj.class.to_s.gsub(/.*::/,'')}: #{self[:result]}"
395
- end
396
- r = obj.add_result(self[:result], false)
397
- raise "Cannot load result: #{self[:result]}" if r.nil?
398
- @objects[:result] = r
399
- end
400
-
401
- def add_metadata(obj, cli = self)
402
- cli[:metadata].split(',').each do |pair|
403
- (k,v) = pair.split('=')
404
- case v
405
- when 'true'; v = true
406
- when 'false'; v = false
407
- when 'nil'; v = nil
408
- end
409
- if k == '_step'
410
- obj.metadata["_try_#{v}"] ||= 0
411
- obj.metadata["_try_#{v}"] += 1
412
- end
413
- obj.metadata[k] = v
414
- end unless cli[:metadata].nil?
415
- [:type, :name, :user, :description, :comments].each do |k|
416
- obj.metadata[k] = cli[k] unless cli[k].nil?
417
- end
418
- obj
419
- end
420
-
421
241
  ##
422
242
  # Task description
423
243
  def task_description
@@ -7,8 +7,10 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
7
7
 
8
8
  def parse_cli
9
9
  cli.expect_files = true
10
- cli.defaults = {ref: true, ignore_dups: false,
11
- regexp: /^(?:.*\/)?(.+?)(?:\..*(?:[12]|Reads|Contigs))?(?i:\.f[nastq]+)?$/}
10
+ cli.defaults = {
11
+ ref: true, ignore_dups: false,
12
+ regexp: MiGA::Cli.FILE_REGEXP
13
+ }
12
14
  cli.parse do |opt|
13
15
  opt.separator 'You can create multiple datasets with a single command; ' \
14
16
  'simply pass all the files at the end: {FILES...}'
@@ -19,92 +21,53 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
19
21
  opt.on(
20
22
  '-q', '--query',
21
23
  'Register the dataset as a query, not a reference dataset'
22
- ){ |v| cli[:ref] = !v }
24
+ ) { |v| cli[:ref] = !v }
23
25
  opt.on(
24
26
  '-d', '--description STRING',
25
27
  'Description of the dataset'
26
- ){ |v| cli[:description] = v }
27
- opt.on('-c', '--comments STRING',
28
+ ) { |v| cli[:description] = v }
29
+ opt.on(
30
+ '-c', '--comments STRING',
28
31
  'Comments on the dataset'
29
- ){ |v| cli[:comments] = v }
30
- opt.on('-m', '--metadata STRING',
32
+ ) { |v| cli[:comments] = v }
33
+ opt.on(
34
+ '-m', '--metadata STRING',
31
35
  'Metadata as key-value pairs separated by = and delimited by comma',
32
36
  'Values are saved as strings except for booleans (true / false) or nil'
33
- ){ |v| cli[:metadata] = v }
37
+ ) { |v| cli[:metadata] = v }
34
38
  opt.on(
35
39
  '-R', '--name-regexp REGEXP', Regexp,
36
40
  'Regular expression indicating how to extract the name from the path',
37
41
  "By default: '#{cli[:regexp]}'"
38
- ){ |v| cli[:regexp] = v }
42
+ ) { |v| cli[:regexp] = v }
43
+ opt.on(
44
+ '--prefix STRING',
45
+ 'Prefix to all the dataset names'
46
+ ) { |v| cli[:prefix] = v }
39
47
  opt.on(
40
48
  '-i', '--input-type STRING',
41
49
  'Type of input data, one of the following:',
42
- *self.class.INPUT_TYPES.map{ |k,v| "~ #{k}: #{v[0]}." }
43
- ){ |v| cli[:input_type] = v.downcase.to_sym }
50
+ *self.class.INPUT_TYPES.map{ |k,v| "~ #{k}: #{v[0]}" }
51
+ ) { |v| cli[:input_type] = v.downcase.to_sym }
44
52
  opt.on(
45
53
  '--ignore-dups',
46
54
  'Continue with a warning if a dataset already exists'
47
- ){ |v| cli[:ignore_dups] = v }
55
+ ) { |v| cli[:ignore_dups] = v }
48
56
  end
49
57
  end
50
58
 
51
59
  def perform
52
60
  p = cli.load_project
53
- files = cli.files
54
- file_type = nil
55
- if files.empty?
56
- cli.ensure_par({dataset: '-D'},
57
- 'dataset is mandatory (-D) unless files are provided')
58
- cli.ensure_type(Dataset)
59
- files = [nil]
60
- else
61
- raise 'Please specify input type (-i).' if cli[:input_type].nil?
62
- file_type = self.class.INPUT_TYPES[cli[:input_type]]
63
- raise "Unrecognized input type: #{cli[:input_type]}." if file_type.nil?
64
- raise 'Some files are duplicated, files must be unique.' if
65
- files.size != files.uniq.size
66
- if cli[:input_type].to_s =~ /_paired$/
67
- raise 'Odd number of files incompatible with input type.' if files.size.odd?
68
- files = Hash[*files].to_a
69
- else
70
- files = files.map{ |i| [i] }
71
- end
72
- raise 'The dataset name (-D) can only be specified with one input file.' if
73
- files.size > 1 && !cli[:dataset].nil?
74
- end
61
+ files, file_type = get_files_and_type
75
62
 
76
63
  cli.say 'Creating datasets:'
77
64
  files.each do |file|
78
- name = cli[:dataset]
79
- if name.nil?
80
- ref_file = file.is_a?(Array) ? file.first : file
81
- m = cli[:regexp].match(ref_file)
82
- raise "Cannot extract name from file: #{ref_file}" if m.nil? or m[1].nil?
83
- name = m[1].miga_name
84
- end
85
- if Dataset.exist?(p, name)
86
- msg = "Dataset already exists: #{name}."
87
- cli[:ignore_dups] ? (warn(msg); next) : raise(msg)
88
- end
89
-
90
- cli.say "o #{name}"
91
- d = Dataset.new(p, name, cli[:ref])
92
- raise "Unexpected: Couldn't create dataset: #{name}." if d.nil?
93
-
94
- unless file.nil?
95
- r_dir = Dataset.RESULT_DIRS[ file_type[1] ]
96
- r_path = File.expand_path("data/#{r_dir}/#{d.name}", p.path)
97
- file_type[2].each_with_index do |ext, i|
98
- gz = file[i] =~ /\.gz/ ? '.gz' : ''
99
- FileUtils.cp(file[i], "#{r_path}#{ext}#{gz}")
100
- cli.say " file: #{file[i]}"
101
- end
102
- File.open("#{r_path}.done", 'w') { |f| f.print Time.now.to_s }
103
- end
104
-
65
+ d = create_dataset(file, p)
66
+ next if d.nil?
67
+ copy_file_to_project(file, file_type, d, p)
105
68
  d = cli.add_metadata(d)
106
69
  d.save
107
- p.add_dataset(name)
70
+ p.add_dataset(d.name)
108
71
  res = d.first_preprocessing(true)
109
72
  cli.say " result: #{res}"
110
73
  end
@@ -136,4 +99,70 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
136
99
  @@INPUT_TYPES
137
100
  end
138
101
  end
102
+
103
+ private
104
+
105
+ def get_files_and_type
106
+ files = cli.files
107
+ file_type = nil
108
+ if files.empty?
109
+ cli.ensure_par({dataset: '-D'},
110
+ 'dataset is mandatory (-D) unless files are provided')
111
+ cli.ensure_type(Dataset)
112
+ files = [nil]
113
+ else
114
+ raise 'Please specify input type (-i).' if cli[:input_type].nil?
115
+ file_type = self.class.INPUT_TYPES[cli[:input_type]]
116
+ raise "Unrecognized input type: #{cli[:input_type]}." if file_type.nil?
117
+ raise 'Some files are duplicated, files must be unique.' if
118
+ files.size != files.uniq.size
119
+ if cli[:input_type].to_s =~ /_paired$/
120
+ if files.size.odd?
121
+ raise 'Odd number of files incompatible with input type.'
122
+ end
123
+ files = Hash[*files].to_a
124
+ else
125
+ files = files.map{ |i| [i] }
126
+ end
127
+ if files.size > 1 && !cli[:dataset].nil?
128
+ raise 'The dataset name (-D) can only be specified with one input file.'
129
+ end
130
+ end
131
+ [files, file_type]
132
+ end
133
+
134
+ def create_dataset(file, p)
135
+ name = cli[:dataset]
136
+ if name.nil?
137
+ ref_file = file.is_a?(Array) ? file.first : file
138
+ m = cli[:regexp].match(ref_file)
139
+ raise "Cannot extract name from file: #{ref_file}" if m.nil? or m[1].nil?
140
+ name = cli[:prefix].to_s + m[1].miga_name
141
+ end
142
+ if Dataset.exist?(p, name)
143
+ msg = "Dataset already exists: #{name}."
144
+ if cli[:ignore_dups]
145
+ warn(msg)
146
+ return nil
147
+ else
148
+ raise(msg)
149
+ end
150
+ end
151
+ cli.say "o #{name}"
152
+ d = Dataset.new(p, name, cli[:ref])
153
+ raise "Unexpected: Couldn't create dataset: #{name}." if d.nil?
154
+ d
155
+ end
156
+
157
+ def copy_file_to_project(file, file_type, d, p)
158
+ return if file.nil?
159
+ r_dir = Dataset.RESULT_DIRS[ file_type[1] ]
160
+ r_path = File.expand_path("data/#{r_dir}/#{d.name}", p.path)
161
+ file_type[2].each_with_index do |ext, i|
162
+ gz = file[i] =~ /\.gz/ ? '.gz' : ''
163
+ FileUtils.cp(file[i], "#{r_path}#{ext}#{gz}")
164
+ cli.say " file: #{file[i]}"
165
+ end
166
+ File.open("#{r_path}.done", 'w') { |f| f.print Time.now.to_s }
167
+ end
139
168
  end