miga-base 0.4.3.0 → 0.5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/lib/miga/cli.rb +43 -223
  4. data/lib/miga/cli/action/add.rb +91 -62
  5. data/lib/miga/cli/action/classify_wf.rb +97 -0
  6. data/lib/miga/cli/action/daemon.rb +14 -10
  7. data/lib/miga/cli/action/derep_wf.rb +95 -0
  8. data/lib/miga/cli/action/doctor.rb +83 -55
  9. data/lib/miga/cli/action/get.rb +68 -52
  10. data/lib/miga/cli/action/get_db.rb +206 -0
  11. data/lib/miga/cli/action/index_wf.rb +31 -0
  12. data/lib/miga/cli/action/init.rb +115 -190
  13. data/lib/miga/cli/action/init/daemon_helper.rb +124 -0
  14. data/lib/miga/cli/action/ls.rb +20 -11
  15. data/lib/miga/cli/action/ncbi_get.rb +199 -157
  16. data/lib/miga/cli/action/preproc_wf.rb +46 -0
  17. data/lib/miga/cli/action/quality_wf.rb +45 -0
  18. data/lib/miga/cli/action/stats.rb +147 -99
  19. data/lib/miga/cli/action/summary.rb +10 -4
  20. data/lib/miga/cli/action/tax_dist.rb +61 -46
  21. data/lib/miga/cli/action/tax_test.rb +46 -39
  22. data/lib/miga/cli/action/wf.rb +178 -0
  23. data/lib/miga/cli/base.rb +11 -0
  24. data/lib/miga/cli/objects_helper.rb +88 -0
  25. data/lib/miga/cli/opt_helper.rb +160 -0
  26. data/lib/miga/daemon.rb +7 -4
  27. data/lib/miga/dataset/base.rb +5 -5
  28. data/lib/miga/project/base.rb +4 -4
  29. data/lib/miga/project/result.rb +2 -1
  30. data/lib/miga/remote_dataset/base.rb +5 -5
  31. data/lib/miga/remote_dataset/download.rb +1 -1
  32. data/lib/miga/version.rb +3 -3
  33. data/scripts/cds.bash +3 -1
  34. data/scripts/essential_genes.bash +1 -0
  35. data/scripts/stats.bash +1 -1
  36. data/scripts/trimmed_fasta.bash +5 -3
  37. data/utils/distance/runner.rb +3 -0
  38. data/utils/distance/temporal.rb +10 -1
  39. data/utils/enveomics/Manifest/Tasks/fasta.json +5 -0
  40. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +7 -0
  41. data/utils/enveomics/Scripts/BlastTab.addlen.rb +33 -31
  42. data/utils/enveomics/Scripts/FastA.tag.rb +42 -41
  43. data/utils/enveomics/Scripts/HMM.essential.rb +85 -55
  44. data/utils/enveomics/Scripts/HMM.haai.rb +29 -20
  45. data/utils/enveomics/Scripts/SRA.download.bash +1 -1
  46. data/utils/enveomics/Scripts/aai.rb +163 -128
  47. data/utils/enveomics/build_enveomics_r.bash +11 -10
  48. data/utils/enveomics/enveomics.R/DESCRIPTION +3 -2
  49. data/utils/enveomics/enveomics.R/R/autoprune.R +141 -107
  50. data/utils/enveomics/enveomics.R/R/barplot.R +105 -86
  51. data/utils/enveomics/enveomics.R/R/cliopts.R +131 -115
  52. data/utils/enveomics/enveomics.R/R/df2dist.R +144 -106
  53. data/utils/enveomics/enveomics.R/R/growthcurve.R +201 -133
  54. data/utils/enveomics/enveomics.R/R/recplot.R +350 -315
  55. data/utils/enveomics/enveomics.R/R/recplot2.R +1334 -914
  56. data/utils/enveomics/enveomics.R/R/tribs.R +521 -361
  57. data/utils/enveomics/enveomics.R/R/utils.R +31 -15
  58. data/utils/enveomics/enveomics.R/README.md +7 -0
  59. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +17 -0
  60. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +17 -0
  61. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +17 -0
  62. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +16 -21
  63. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +31 -28
  64. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -19
  65. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +36 -26
  66. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -24
  67. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -24
  68. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +32 -33
  69. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +91 -64
  70. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +57 -37
  71. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -19
  72. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -18
  73. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +39 -26
  74. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +38 -25
  75. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +40 -26
  76. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +67 -49
  77. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +37 -28
  78. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +122 -97
  79. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +35 -31
  80. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -23
  81. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +68 -51
  82. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -24
  83. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -22
  84. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -20
  85. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -18
  86. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +41 -32
  87. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -24
  88. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -18
  89. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +40 -34
  90. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -24
  91. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -20
  92. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -20
  93. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -29
  94. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +41 -42
  95. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -18
  96. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +43 -33
  97. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +36 -28
  98. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +74 -56
  99. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +44 -31
  100. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -22
  101. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +32 -26
  102. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +59 -44
  103. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -21
  104. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -22
  105. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +63 -43
  106. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +38 -29
  107. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +38 -30
  108. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +111 -83
  109. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -18
  110. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -18
  111. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -18
  112. data/utils/find-medoid.R +3 -2
  113. data/utils/representatives.rb +5 -3
  114. data/utils/subclade/pipeline.rb +22 -11
  115. data/utils/subclade/runner.rb +5 -1
  116. data/utils/subclades-compile.rb +1 -1
  117. data/utils/subclades.R +9 -3
  118. metadata +15 -4
  119. data/utils/enveomics/enveomics.R/man/enveomics.R-package.Rd +0 -15
  120. data/utils/enveomics/enveomics.R/man/z$-methods.Rd +0 -26
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f547a13f78444c6d3f7149bb111d3a4444af885ef085fef913c9516fac706215
4
- data.tar.gz: 0ae132567bd8e677bfc7e0bafaaeb28138499b1e720302e47f711364f790f020
3
+ metadata.gz: e370d282f1b28480765e1b91fcb7d8921d12baa31d22db1318975a1c2a79e19a
4
+ data.tar.gz: e7fb3941fd3381e0e9696a2c577aeb157657335e56434e7c6d6650be7ba45e98
5
5
  SHA512:
6
- metadata.gz: f174c56c991a3055d347f0450aca196e8c6f4813bb21d18f0bcaca528df40fe76e4ea497ff36d58886817d26fbcb2281459a5866260040de975022e572f6869b
7
- data.tar.gz: ff4777abf6dab9bd2800e86533840817a77d01b3724ed51d9a77d0dac4ec1506ca407420b615f5e16fd3548af5b58fd56a3e922a448f2d75052d814cdb504ae3
6
+ metadata.gz: 4642a212e1b4021e211fd144b515ff49e9ddb7a9b2292430553307a7ae165e4d8d5e6fd8426757f15ea6e70f4c3efbb055e0439497172cc1f91186d522c82635
7
+ data.tar.gz: 8d5d3ded3c03e56505572102110a4bca4b84d06b2e73bcf208856610a4cd6e60092ce6d54d47dcef2c8acf85f1cce5f8461097e8699344df6738ed8493215112
data/README.md CHANGED
@@ -23,7 +23,7 @@ For additional information on MiGA, visit:
23
23
  If you're like us, you probably want to see sofware in action from the get go.
24
24
  You have two options:
25
25
 
26
- 1. Get a peak on MiGA using [MiGA Online][miga-online].
26
+ 1. Get a peek on MiGA using [MiGA Online][miga-online].
27
27
  2. Install the [requirements](manual/part2/requirements.md) and follow the
28
28
  [installation instructions](manual/part2/installation.md). Once you have MiGA
29
29
  installed, you can [deploy some examples](manual/part4.md).
@@ -9,7 +9,11 @@ require 'optparse'
9
9
  class MiGA::Cli < MiGA::MiGA
10
10
 
11
11
  require 'miga/cli/base'
12
+ require 'miga/cli/opt_helper'
13
+ require 'miga/cli/objects_helper'
12
14
  require 'miga/cli/action'
15
+ include MiGA::Cli::OptHelper
16
+ include MiGA::Cli::ObjectsHelper
13
17
 
14
18
  ##
15
19
  # Task to execute, a symbol
@@ -71,41 +75,68 @@ class MiGA::Cli < MiGA::MiGA
71
75
  end
72
76
 
73
77
  ##
74
- # Send +par+ to $stdout, ensuring new line at the end
78
+ # Print +par+, ensuring new line at the end.
79
+ # If the first parameter is +IO+, the output is sent there,
80
+ # otherwise it's sent to +$stdout+
75
81
  def puts(*par)
76
- $stdout.puts(*par)
82
+ io = par.first.is_a?(IO) ? par.shift : $stdout
83
+ io.puts(*par)
77
84
  end
78
85
 
79
86
  ##
80
- # Send +par+ to $stdout as is
87
+ # Print +par+.
88
+ # If the first parameter is +IO+, the output is sent there,
89
+ # otherwise it's sent to +$stdout+
81
90
  def print(*par)
82
- $stdout.print(*par)
91
+ io = par.first.is_a?(IO) ? par.shift : $stdout
92
+ io.print(*par)
83
93
  end
84
94
 
85
95
  ##
86
- # Display a table with headers +header+ and contents +values+, both Array
87
- def table(header, values)
88
- self.puts MiGA.tabulate(header, values, self[:tabular])
96
+ # Display a table with headers +header+ and contents +values+, both Array.
97
+ # The output is printed to +io+
98
+ def table(header, values, io = $stdout)
99
+ self.puts(io, MiGA.tabulate(header, values, self[:tabular]))
89
100
  end
90
101
 
91
102
  ##
92
- # Send +par+ to $stderr (ensuring new line at the end), iff --verbose.
103
+ # Print +par+ ensuring new line at the end, iff --verbose.
93
104
  # Date/time each line.
105
+ # If the first parameter is +IO+, the output is sent there,
106
+ # otherwise it's sent to +$stderr+
94
107
  def say(*par)
95
108
  return unless self[:verbose]
96
109
  par.map! { |i| "[#{Time.now}] #{i}" }
97
- $stderr.puts(*par)
110
+ io = par.first.is_a?(IO) ? par.shift : $stderr
111
+ io.puts(*par)
98
112
  end
99
113
 
100
114
  ##
101
- # Reports the advance of a task at +step+ (String), the +n+ out of +total+
115
+ # Reports the advance of a task at +step+ (String), the +n+ out of +total+.
116
+ # The advance is reported in powers of 1,024 if +bin+ is true, or powers of
117
+ # 1,000 otherwise.
102
118
  # The report goes to $stderr iff --verborse
103
- def advance(step, n = 0, total = nil)
119
+ def advance(step, n = 0, total = nil, bin = true)
104
120
  return unless self[:verbose]
105
- adv = total.nil? ? '' : ('%.1f%% (%d/%d)' % [n/total, n, total])
121
+ adv = total.nil? ? '' :
122
+ ('%.1f%% (%s/%s)' % [100 * n / total,
123
+ num_suffix(n, bin), num_suffix(total, bin)])
106
124
  $stderr.print("[%s] %s %s \r" % [Time.now, step, adv])
107
125
  end
108
126
 
127
+ def num_suffix(n, bin = false)
128
+ p = ''
129
+ {T: 4, G: 3, M: 2, K: 1}.each do |k,x|
130
+ v = (bin ? 1024 : 1e3) ** x
131
+ if n > v
132
+ n = '%.1f' % (n / v)
133
+ p = k
134
+ break
135
+ end
136
+ end
137
+ "#{n}#{p}"
138
+ end
139
+
109
140
  ##
110
141
  # Ask a question +question+ to the user (requires +#interactive = true+)
111
142
  # The +default+ is used if the answer is empty
@@ -188,135 +219,6 @@ class MiGA::Cli < MiGA::MiGA
188
219
  end
189
220
  end
190
221
 
191
- ##
192
- # Send MiGA's banner to OptionParser +opt+
193
- def banner(opt)
194
- usage = "Usage: miga #{action.name}"
195
- usage += ' {operation}' if expect_operation
196
- usage += ' [options]'
197
- usage += ' {FILES...}' if expect_files
198
- opt.banner = "\n#{task_description}\n\n#{usage}\n"
199
- opt.separator ''
200
- end
201
-
202
- ##
203
- # Common options at the end of most actions, passed to OptionParser +opt+
204
- # No action is performed if +#opt_common = false+ is passed
205
- # Executes only once, unless +#opt_common = true+ is passed between calls
206
- def opt_common(opt)
207
- return unless @opt_common
208
- opt.on(
209
- '--auto',
210
- 'Accept all defaults as answers'
211
- ){ |v| cli[:auto] = v } if interactive
212
- opt.on(
213
- '-v', '--verbose',
214
- 'Print additional information to STDERR'
215
- ){ |v| self[:verbose] = v }
216
- opt.on(
217
- '-d', '--debug INT', Integer,
218
- 'Print debugging information to STDERR (1: debug, 2: trace)'
219
- ){ |v| (v > 1) ? MiGA.DEBUG_TRACE_ON : MiGA.DEBUG_ON }
220
- opt.on(
221
- '-h', '--help',
222
- 'Display this screen'
223
- ){ puts opt ; exit }
224
- opt.separator ''
225
- self.opt_common = false
226
- end
227
-
228
- ##
229
- # Options to load an object passed to OptionParser +opt+, as determined
230
- # by +what+ an Array with any combination of:
231
- # - :project To require a project
232
- # - :dataset To require a dataset
233
- # - :dataset_opt To allow (optionally) a dataset
234
- # - :dataset_type To allow (optionally) a type of dataset
235
- # - :dataset_type_req To require a type of dataset
236
- # - :project_type To allow (optionally) a type of project
237
- # - :project_type_req To require a type of project
238
- # - :result To require a type of project or dataset result
239
- # - :result_dataset To require a type of dataset result
240
- # - :result_project To require a type of project result
241
- def opt_object(opt, what = [:project, :dataset])
242
- opt.on(
243
- '-P', '--project PATH',
244
- '(Mandatory) Path to the project'
245
- ){ |v| self[:project] = v } if what.include? :project
246
- opt.on(
247
- '-D', '--dataset STRING',
248
- (what.include?(:dataset) ? '(Mandatory) ' : '') + 'Name of the dataset'
249
- ){ |v| self[:dataset] = v } if what.include? :dataset or
250
- what.include? :dataset_opt
251
- opt.on(
252
- '-D', '--dataset STRING',
253
- 'Name of the dataset'
254
- ){ |v| self[:dataset] = v } if what.include? :dataset_opt
255
- opt.on(
256
- '-t', '--type STRING',
257
- (what.include?(:dataset_type_req) ? '(Mandatory) ' : '') +
258
- 'Type of dataset. Recognized types include:',
259
- *Dataset.KNOWN_TYPES.map{ |k,v| "~ #{k}: #{v[:description]}" }
260
- ){ |v| self[:type] = v.downcase.to_sym } if what.include? :dataset_type or
261
- what.include? :dataset_type_req
262
- opt.on(
263
- '-t', '--type STRING',
264
- (what.include?(:project_type_req) ? '(Mandatory) ' : '') +
265
- 'Type of project. Recognized types include:',
266
- *Project.KNOWN_TYPES.map{ |k,v| "~ #{k}: #{v[:description]}"}
267
- ){ |v| self[:type] = v.downcase.to_sym } if what.include? :project_type or
268
- what.include? :project_type_req
269
- opt.on(
270
- '-r', '--result STRING',
271
- '(Mandatory) Name of the result',
272
- 'Recognized names for dataset-specific results include:',
273
- *Dataset.RESULT_DIRS.keys.map{|n| " ~ #{n}"},
274
- 'Recognized names for project-wide results include:',
275
- *Project.RESULT_DIRS.keys.map{|n| " ~ #{n}"}
276
- ){ |v| self[:result] = v.downcase.to_sym } if what.include? :result
277
- opt.on(
278
- '-r', '--result STRING',
279
- '(Mandatory) Name of the result, one of:',
280
- *Dataset.RESULT_DIRS.keys.map{|n| " ~ #{n}"}
281
- ){ |v| self[:result] = v.downcase.to_sym } if what.include? :result_dataset
282
- opt.on(
283
- '-r', '--result STRING',
284
- '(Mandatory) Name of the result, one of:',
285
- *Project.RESULT_DIRS.keys.map{|n| " ~ #{n}"}
286
- ){ |v| self[:result] = v.downcase.to_sym } if what.include? :result_project
287
- end
288
-
289
- ##
290
- # Options to filter a list of datasets passed to OptionParser +opt+,
291
- # as determined by +what+ an Array with any combination of:
292
- # - :ref To filter by reference (--ref) or query (--no-ref)
293
- # - :multi To filter by multiple (--multi) or single (--no-multi) species
294
- # - :active To filter by active (--active) or inactive (--no-active)
295
- # - :taxonomy To filter by taxonomy (--taxonomy)
296
- # The "k-th" filter (--dataset-k) is always included
297
- def opt_filter_datasets(opt, what = [:ref, :multi, :active, :taxonomy])
298
- opt.on(
299
- '--[no-]ref',
300
- 'Use only reference (or only non-reference) datasets'
301
- ){ |v| self[:ref] = v } if what.include? :ref
302
- opt.on(
303
- '--[no-]multi',
304
- 'Use only multi-species (or only single-species) datasets'
305
- ){ |v| self[:multi] = v } if what.include? :multi
306
- opt.on(
307
- '--[no-]active',
308
- 'Use only active (or inactive) datasets'
309
- ){ |v| self[:active] = v } if what.include? :active
310
- opt.on(
311
- '-t', '--taxonomy RANK:TAXON',
312
- 'Filter by taxonomy'
313
- ){ |v| self[:taxonomy] = Taxonomy.new(v) } if what.include? :taxonomy
314
- opt.on(
315
- '--dataset-k INTEGER', Integer,
316
- 'Use only the k-th dataset in the list'
317
- ){ |v| self[:dataset_k] = v }
318
- end
319
-
320
222
  ##
321
223
  # Ensure that these parameters have been passed to the CLI, as defined by
322
224
  # +par+, a Hash with object names as keys and parameter flag as values.
@@ -336,88 +238,6 @@ class MiGA::Cli < MiGA::MiGA
336
238
  end
337
239
  end
338
240
 
339
- ##
340
- # Get the project defined in the CLI by parameter +name+ and +flag+
341
- def load_project(name = :project, flag = '-P')
342
- return @objects[name] unless @objects[name].nil?
343
- ensure_par(name => flag)
344
- say "Loading project: #{self[name]}"
345
- @objects[name] = Project.load(self[name])
346
- raise "Cannot load project: #{self[name]}" if @objects[name].nil?
347
- @objects[name]
348
- end
349
-
350
- ##
351
- # Load the dataset defined in the CLI
352
- # If +silent=true+, it allows failures silently
353
- def load_dataset(silent = false)
354
- return @objects[:dataset] unless @objects[:dataset].nil?
355
- ensure_par(dataset: '-D')
356
- @objects[:dataset] = load_project.dataset(self[:dataset])
357
- if !silent && @objects[:dataset].nil?
358
- raise "Cannot load dataset: #{self[:dataset]}"
359
- end
360
- return @objects[:dataset]
361
- end
362
-
363
- ##
364
- # Load an a project or (if defined) a dataset
365
- def load_project_or_dataset
366
- self[:dataset].nil? ? load_project : load_dataset
367
- end
368
-
369
- ##
370
- # Load and filter a list of datasets as requested in the CLI
371
- # If +silent=true+, it allows failures silently
372
- def load_and_filter_datasets(silent = false)
373
- return @objects[:filtered_datasets] unless @objects[:filtered_datasets].nil?
374
- say 'Listing datasets'
375
- ds = self[:dataset].nil? ?
376
- load_project.datasets : [load_dataset(silent)].compact
377
- ds.select! { |d| d.is_ref? == self[:ref] } unless self[:ref].nil?
378
- ds.select! { |d| d.is_active? == self[:active] } unless self[:active].nil?
379
- ds.select! do |d|
380
- self[:multi] ? d.is_multi? : d.is_nonmulti?
381
- end unless self[:multi].nil?
382
- ds.select! do |d|
383
- (not d.metadata[:tax].nil?) && d.metadata[:tax].in?(self[:taxonomy])
384
- end unless self[:taxonomy].nil?
385
- ds = ds.values_at(self[:dataset_k]-1) unless self[:dataset_k].nil?
386
- @objects[:filtered_datasets] = ds
387
- end
388
-
389
- def load_result
390
- return @objects[:result] unless @objects[:result].nil?
391
- ensure_par(result: '-r')
392
- obj = load_project_or_dataset
393
- if obj.class.RESULT_DIRS[self[:result]].nil?
394
- raise "Unsupported result for #{obj.class.to_s.gsub(/.*::/,'')}: #{self[:result]}"
395
- end
396
- r = obj.add_result(self[:result], false)
397
- raise "Cannot load result: #{self[:result]}" if r.nil?
398
- @objects[:result] = r
399
- end
400
-
401
- def add_metadata(obj, cli = self)
402
- cli[:metadata].split(',').each do |pair|
403
- (k,v) = pair.split('=')
404
- case v
405
- when 'true'; v = true
406
- when 'false'; v = false
407
- when 'nil'; v = nil
408
- end
409
- if k == '_step'
410
- obj.metadata["_try_#{v}"] ||= 0
411
- obj.metadata["_try_#{v}"] += 1
412
- end
413
- obj.metadata[k] = v
414
- end unless cli[:metadata].nil?
415
- [:type, :name, :user, :description, :comments].each do |k|
416
- obj.metadata[k] = cli[k] unless cli[k].nil?
417
- end
418
- obj
419
- end
420
-
421
241
  ##
422
242
  # Task description
423
243
  def task_description
@@ -7,8 +7,10 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
7
7
 
8
8
  def parse_cli
9
9
  cli.expect_files = true
10
- cli.defaults = {ref: true, ignore_dups: false,
11
- regexp: /^(?:.*\/)?(.+?)(?:\..*(?:[12]|Reads|Contigs))?(?i:\.f[nastq]+)?$/}
10
+ cli.defaults = {
11
+ ref: true, ignore_dups: false,
12
+ regexp: MiGA::Cli.FILE_REGEXP
13
+ }
12
14
  cli.parse do |opt|
13
15
  opt.separator 'You can create multiple datasets with a single command; ' \
14
16
  'simply pass all the files at the end: {FILES...}'
@@ -19,92 +21,53 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
19
21
  opt.on(
20
22
  '-q', '--query',
21
23
  'Register the dataset as a query, not a reference dataset'
22
- ){ |v| cli[:ref] = !v }
24
+ ) { |v| cli[:ref] = !v }
23
25
  opt.on(
24
26
  '-d', '--description STRING',
25
27
  'Description of the dataset'
26
- ){ |v| cli[:description] = v }
27
- opt.on('-c', '--comments STRING',
28
+ ) { |v| cli[:description] = v }
29
+ opt.on(
30
+ '-c', '--comments STRING',
28
31
  'Comments on the dataset'
29
- ){ |v| cli[:comments] = v }
30
- opt.on('-m', '--metadata STRING',
32
+ ) { |v| cli[:comments] = v }
33
+ opt.on(
34
+ '-m', '--metadata STRING',
31
35
  'Metadata as key-value pairs separated by = and delimited by comma',
32
36
  'Values are saved as strings except for booleans (true / false) or nil'
33
- ){ |v| cli[:metadata] = v }
37
+ ) { |v| cli[:metadata] = v }
34
38
  opt.on(
35
39
  '-R', '--name-regexp REGEXP', Regexp,
36
40
  'Regular expression indicating how to extract the name from the path',
37
41
  "By default: '#{cli[:regexp]}'"
38
- ){ |v| cli[:regexp] = v }
42
+ ) { |v| cli[:regexp] = v }
43
+ opt.on(
44
+ '--prefix STRING',
45
+ 'Prefix to all the dataset names'
46
+ ) { |v| cli[:prefix] = v }
39
47
  opt.on(
40
48
  '-i', '--input-type STRING',
41
49
  'Type of input data, one of the following:',
42
- *self.class.INPUT_TYPES.map{ |k,v| "~ #{k}: #{v[0]}." }
43
- ){ |v| cli[:input_type] = v.downcase.to_sym }
50
+ *self.class.INPUT_TYPES.map{ |k,v| "~ #{k}: #{v[0]}" }
51
+ ) { |v| cli[:input_type] = v.downcase.to_sym }
44
52
  opt.on(
45
53
  '--ignore-dups',
46
54
  'Continue with a warning if a dataset already exists'
47
- ){ |v| cli[:ignore_dups] = v }
55
+ ) { |v| cli[:ignore_dups] = v }
48
56
  end
49
57
  end
50
58
 
51
59
  def perform
52
60
  p = cli.load_project
53
- files = cli.files
54
- file_type = nil
55
- if files.empty?
56
- cli.ensure_par({dataset: '-D'},
57
- 'dataset is mandatory (-D) unless files are provided')
58
- cli.ensure_type(Dataset)
59
- files = [nil]
60
- else
61
- raise 'Please specify input type (-i).' if cli[:input_type].nil?
62
- file_type = self.class.INPUT_TYPES[cli[:input_type]]
63
- raise "Unrecognized input type: #{cli[:input_type]}." if file_type.nil?
64
- raise 'Some files are duplicated, files must be unique.' if
65
- files.size != files.uniq.size
66
- if cli[:input_type].to_s =~ /_paired$/
67
- raise 'Odd number of files incompatible with input type.' if files.size.odd?
68
- files = Hash[*files].to_a
69
- else
70
- files = files.map{ |i| [i] }
71
- end
72
- raise 'The dataset name (-D) can only be specified with one input file.' if
73
- files.size > 1 && !cli[:dataset].nil?
74
- end
61
+ files, file_type = get_files_and_type
75
62
 
76
63
  cli.say 'Creating datasets:'
77
64
  files.each do |file|
78
- name = cli[:dataset]
79
- if name.nil?
80
- ref_file = file.is_a?(Array) ? file.first : file
81
- m = cli[:regexp].match(ref_file)
82
- raise "Cannot extract name from file: #{ref_file}" if m.nil? or m[1].nil?
83
- name = m[1].miga_name
84
- end
85
- if Dataset.exist?(p, name)
86
- msg = "Dataset already exists: #{name}."
87
- cli[:ignore_dups] ? (warn(msg); next) : raise(msg)
88
- end
89
-
90
- cli.say "o #{name}"
91
- d = Dataset.new(p, name, cli[:ref])
92
- raise "Unexpected: Couldn't create dataset: #{name}." if d.nil?
93
-
94
- unless file.nil?
95
- r_dir = Dataset.RESULT_DIRS[ file_type[1] ]
96
- r_path = File.expand_path("data/#{r_dir}/#{d.name}", p.path)
97
- file_type[2].each_with_index do |ext, i|
98
- gz = file[i] =~ /\.gz/ ? '.gz' : ''
99
- FileUtils.cp(file[i], "#{r_path}#{ext}#{gz}")
100
- cli.say " file: #{file[i]}"
101
- end
102
- File.open("#{r_path}.done", 'w') { |f| f.print Time.now.to_s }
103
- end
104
-
65
+ d = create_dataset(file, p)
66
+ next if d.nil?
67
+ copy_file_to_project(file, file_type, d, p)
105
68
  d = cli.add_metadata(d)
106
69
  d.save
107
- p.add_dataset(name)
70
+ p.add_dataset(d.name)
108
71
  res = d.first_preprocessing(true)
109
72
  cli.say " result: #{res}"
110
73
  end
@@ -136,4 +99,70 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
136
99
  @@INPUT_TYPES
137
100
  end
138
101
  end
102
+
103
+ private
104
+
105
+ def get_files_and_type
106
+ files = cli.files
107
+ file_type = nil
108
+ if files.empty?
109
+ cli.ensure_par({dataset: '-D'},
110
+ 'dataset is mandatory (-D) unless files are provided')
111
+ cli.ensure_type(Dataset)
112
+ files = [nil]
113
+ else
114
+ raise 'Please specify input type (-i).' if cli[:input_type].nil?
115
+ file_type = self.class.INPUT_TYPES[cli[:input_type]]
116
+ raise "Unrecognized input type: #{cli[:input_type]}." if file_type.nil?
117
+ raise 'Some files are duplicated, files must be unique.' if
118
+ files.size != files.uniq.size
119
+ if cli[:input_type].to_s =~ /_paired$/
120
+ if files.size.odd?
121
+ raise 'Odd number of files incompatible with input type.'
122
+ end
123
+ files = Hash[*files].to_a
124
+ else
125
+ files = files.map{ |i| [i] }
126
+ end
127
+ if files.size > 1 && !cli[:dataset].nil?
128
+ raise 'The dataset name (-D) can only be specified with one input file.'
129
+ end
130
+ end
131
+ [files, file_type]
132
+ end
133
+
134
+ def create_dataset(file, p)
135
+ name = cli[:dataset]
136
+ if name.nil?
137
+ ref_file = file.is_a?(Array) ? file.first : file
138
+ m = cli[:regexp].match(ref_file)
139
+ raise "Cannot extract name from file: #{ref_file}" if m.nil? or m[1].nil?
140
+ name = cli[:prefix].to_s + m[1].miga_name
141
+ end
142
+ if Dataset.exist?(p, name)
143
+ msg = "Dataset already exists: #{name}."
144
+ if cli[:ignore_dups]
145
+ warn(msg)
146
+ return nil
147
+ else
148
+ raise(msg)
149
+ end
150
+ end
151
+ cli.say "o #{name}"
152
+ d = Dataset.new(p, name, cli[:ref])
153
+ raise "Unexpected: Couldn't create dataset: #{name}." if d.nil?
154
+ d
155
+ end
156
+
157
+ def copy_file_to_project(file, file_type, d, p)
158
+ return if file.nil?
159
+ r_dir = Dataset.RESULT_DIRS[ file_type[1] ]
160
+ r_path = File.expand_path("data/#{r_dir}/#{d.name}", p.path)
161
+ file_type[2].each_with_index do |ext, i|
162
+ gz = file[i] =~ /\.gz/ ? '.gz' : ''
163
+ FileUtils.cp(file[i], "#{r_path}#{ext}#{gz}")
164
+ cli.say " file: #{file[i]}"
165
+ end
166
+ File.open("#{r_path}.done", 'w') { |f| f.print Time.now.to_s }
167
+ end
139
168
  end