miga-base 0.4.3.0 → 0.5.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/miga/cli.rb +43 -223
- data/lib/miga/cli/action/add.rb +91 -62
- data/lib/miga/cli/action/classify_wf.rb +97 -0
- data/lib/miga/cli/action/daemon.rb +14 -10
- data/lib/miga/cli/action/derep_wf.rb +95 -0
- data/lib/miga/cli/action/doctor.rb +83 -55
- data/lib/miga/cli/action/get.rb +68 -52
- data/lib/miga/cli/action/get_db.rb +206 -0
- data/lib/miga/cli/action/index_wf.rb +31 -0
- data/lib/miga/cli/action/init.rb +115 -190
- data/lib/miga/cli/action/init/daemon_helper.rb +124 -0
- data/lib/miga/cli/action/ls.rb +20 -11
- data/lib/miga/cli/action/ncbi_get.rb +199 -157
- data/lib/miga/cli/action/preproc_wf.rb +46 -0
- data/lib/miga/cli/action/quality_wf.rb +45 -0
- data/lib/miga/cli/action/stats.rb +147 -99
- data/lib/miga/cli/action/summary.rb +10 -4
- data/lib/miga/cli/action/tax_dist.rb +61 -46
- data/lib/miga/cli/action/tax_test.rb +46 -39
- data/lib/miga/cli/action/wf.rb +178 -0
- data/lib/miga/cli/base.rb +11 -0
- data/lib/miga/cli/objects_helper.rb +88 -0
- data/lib/miga/cli/opt_helper.rb +160 -0
- data/lib/miga/daemon.rb +7 -4
- data/lib/miga/dataset/base.rb +5 -5
- data/lib/miga/project/base.rb +4 -4
- data/lib/miga/project/result.rb +2 -1
- data/lib/miga/remote_dataset/base.rb +5 -5
- data/lib/miga/remote_dataset/download.rb +1 -1
- data/lib/miga/version.rb +3 -3
- data/scripts/cds.bash +3 -1
- data/scripts/essential_genes.bash +1 -0
- data/scripts/stats.bash +1 -1
- data/scripts/trimmed_fasta.bash +5 -3
- data/utils/distance/runner.rb +3 -0
- data/utils/distance/temporal.rb +10 -1
- data/utils/enveomics/Manifest/Tasks/fasta.json +5 -0
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +7 -0
- data/utils/enveomics/Scripts/BlastTab.addlen.rb +33 -31
- data/utils/enveomics/Scripts/FastA.tag.rb +42 -41
- data/utils/enveomics/Scripts/HMM.essential.rb +85 -55
- data/utils/enveomics/Scripts/HMM.haai.rb +29 -20
- data/utils/enveomics/Scripts/SRA.download.bash +1 -1
- data/utils/enveomics/Scripts/aai.rb +163 -128
- data/utils/enveomics/build_enveomics_r.bash +11 -10
- data/utils/enveomics/enveomics.R/DESCRIPTION +3 -2
- data/utils/enveomics/enveomics.R/R/autoprune.R +141 -107
- data/utils/enveomics/enveomics.R/R/barplot.R +105 -86
- data/utils/enveomics/enveomics.R/R/cliopts.R +131 -115
- data/utils/enveomics/enveomics.R/R/df2dist.R +144 -106
- data/utils/enveomics/enveomics.R/R/growthcurve.R +201 -133
- data/utils/enveomics/enveomics.R/R/recplot.R +350 -315
- data/utils/enveomics/enveomics.R/R/recplot2.R +1334 -914
- data/utils/enveomics/enveomics.R/R/tribs.R +521 -361
- data/utils/enveomics/enveomics.R/R/utils.R +31 -15
- data/utils/enveomics/enveomics.R/README.md +7 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +16 -21
- data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +31 -28
- data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -19
- data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +36 -26
- data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -24
- data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -24
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +32 -33
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +91 -64
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +57 -37
- data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -19
- data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +39 -26
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +38 -25
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +40 -26
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +67 -49
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +37 -28
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +122 -97
- data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +35 -31
- data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -23
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +68 -51
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -22
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -20
- data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +41 -32
- data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +40 -34
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -20
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -20
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -29
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +41 -42
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +43 -33
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +36 -28
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +74 -56
- data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +44 -31
- data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -22
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +32 -26
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +59 -44
- data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -21
- data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -22
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +63 -43
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +38 -29
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +38 -30
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +111 -83
- data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -18
- data/utils/find-medoid.R +3 -2
- data/utils/representatives.rb +5 -3
- data/utils/subclade/pipeline.rb +22 -11
- data/utils/subclade/runner.rb +5 -1
- data/utils/subclades-compile.rb +1 -1
- data/utils/subclades.R +9 -3
- metadata +15 -4
- data/utils/enveomics/enveomics.R/man/enveomics.R-package.Rd +0 -15
- data/utils/enveomics/enveomics.R/man/z$-methods.Rd +0 -26
@@ -29,49 +29,56 @@ class MiGA::Cli::Action::TaxTest < MiGA::Cli::Action
|
|
29
29
|
raise 'Action not supported for the project or dataset' if cr.nil?
|
30
30
|
raise 'No close relatives found'
|
31
31
|
else
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
if self[:ref_project]
|
36
|
-
if (ref = p.metadata[:ref_project]).nil?
|
37
|
-
raise '--ref-project requested but no reference project has been set'
|
38
|
-
end
|
39
|
-
if (q = MiGA::Project.load(ref)).nil?
|
40
|
-
raise '--ref-project requested but reference project doesn\'t exist'
|
41
|
-
end
|
42
|
-
cr_d = q.dataset(cr[0])
|
43
|
-
else
|
44
|
-
cr_d = p.dataset(cr[0])
|
45
|
-
end
|
46
|
-
tax = cr_d.metadata[:tax] unless cr_d.nil?
|
47
|
-
tax ||= {}
|
32
|
+
query_probability_distributions(d, cr[0])
|
33
|
+
end
|
34
|
+
end
|
48
35
|
|
49
|
-
|
50
|
-
# Intax
|
51
|
-
r = TaxDist.aai_pvalues(cr[1], :intax).map do |k,v|
|
52
|
-
sig = ''
|
53
|
-
[0.5, 0.1, 0.05, 0.01].each{ |i| sig << '*' if v<i }
|
54
|
-
[Taxonomy.LONG_RANKS[k], (tax[k] || '?'), v, sig]
|
55
|
-
end
|
56
|
-
cli.puts ''
|
57
|
-
cli.puts 'Taxonomic classification'
|
58
|
-
cli.puts MiGA.tabulate(%w[Rank Taxonomy P-value Signif.], r)
|
59
|
-
end
|
36
|
+
private
|
60
37
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
38
|
+
def query_probability_distributions(d, cr)
|
39
|
+
cli.say 'Querying probability distributions'
|
40
|
+
tax = closest_relative_tax(cr)
|
41
|
+
if %w[intax both].include? cli[:test]
|
42
|
+
r = test_closest_relative(cr, tax, :intax)
|
43
|
+
cli.puts ''
|
44
|
+
cli.puts 'Taxonomic classification'
|
45
|
+
cli.puts MiGA.tabulate(%w[Rank Taxonomy P-value Signif.], r)
|
46
|
+
end
|
47
|
+
if %w[novel both].include? cli[:test]
|
48
|
+
r = test_closest_relative(cr, tax, :novel)
|
49
|
+
r.map! { |i| i.tap { |j| j.delete_at(1) } }
|
50
|
+
cli.puts ''
|
51
|
+
cli.puts 'Taxonomic novelty'
|
52
|
+
cli.puts MiGA.tabulate(%w[Rank P-value Signif.], r)
|
53
|
+
end
|
54
|
+
cli.puts ''
|
55
|
+
cli.puts 'Significance at p-value below: *0.5, **0.1, ***0.05, ****0.01.'
|
56
|
+
end
|
57
|
+
|
58
|
+
def closest_relative_tax(cr)
|
59
|
+
cli.puts "Closest relative: #{cr[0]} with AAI: #{cr[1]}."
|
60
|
+
p = cli.load_project
|
61
|
+
if cli[:ref_project]
|
62
|
+
if (ref = p.metadata[:ref_project]).nil?
|
63
|
+
raise '--ref-project requested but no reference project has been set'
|
71
64
|
end
|
65
|
+
if (q = MiGA::Project.load(ref)).nil?
|
66
|
+
raise '--ref-project requested but reference project doesn\'t exist'
|
67
|
+
end
|
68
|
+
cr_d = q.dataset(cr[0])
|
69
|
+
else
|
70
|
+
cr_d = p.dataset(cr[0])
|
71
|
+
end
|
72
|
+
tax = cr_d.metadata[:tax] unless cr_d.nil?
|
73
|
+
tax ||= {}
|
74
|
+
tax
|
75
|
+
end
|
72
76
|
|
73
|
-
|
74
|
-
|
77
|
+
def test_closest_relative(cr, tax, test)
|
78
|
+
TaxDist.aai_pvalues(cr[1], test).map do |k,v|
|
79
|
+
sig = ''
|
80
|
+
[0.5, 0.1, 0.05, 0.01].each { |i| sig << '*' if v < i }
|
81
|
+
[Taxonomy.LONG_RANKS[k], (tax[k] || '?'), v, sig]
|
75
82
|
end
|
76
83
|
end
|
77
84
|
end
|
@@ -0,0 +1,178 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
##
|
5
|
+
# Helper module for workflows
|
6
|
+
module MiGA::Cli::Action::Wf
|
7
|
+
def default_opts_for_wf
|
8
|
+
cli.expect_files = true
|
9
|
+
cli.defaults = {
|
10
|
+
clean: false, regexp: MiGA::Cli.FILE_REGEXP,
|
11
|
+
project_type: :genomes, dataset_type: :popgenome,
|
12
|
+
ncbi_draft: true }
|
13
|
+
end
|
14
|
+
|
15
|
+
def opts_for_wf(opt, files_desc, params = {})
|
16
|
+
{
|
17
|
+
multi: false, cleanup: true, project_type: false, ncbi: true
|
18
|
+
}.each { |k, v| params[k] = v if params[k].nil? }
|
19
|
+
opt.on(
|
20
|
+
'-o', '--out_dir PATH',
|
21
|
+
'(Mandatory) Directory to be created with all output data'
|
22
|
+
) { |v| cli[:outdir] = v }
|
23
|
+
opt.separator ''
|
24
|
+
opt.separator " FILES...: #{files_desc}"
|
25
|
+
opt.separator ''
|
26
|
+
opt.separator 'Workflow Control Options'
|
27
|
+
if params[:ncbi]
|
28
|
+
opt.on(
|
29
|
+
'-T', '--ncbi-taxon STRING',
|
30
|
+
'Download all the genomes in NCBI classified as this taxon'
|
31
|
+
) { |v| cli[:ncbi_taxon] = v }
|
32
|
+
opt.on(
|
33
|
+
'--no-draft',
|
34
|
+
'Only download complete genomes, not drafts'
|
35
|
+
) { |v| cli[:ncbi_draft] = v }
|
36
|
+
end
|
37
|
+
if params[:cleanup]
|
38
|
+
opt.on(
|
39
|
+
'-c', '--clean',
|
40
|
+
'Clean all intermediate files after generating the reports'
|
41
|
+
) { |v| cli[:clean] = v }
|
42
|
+
end
|
43
|
+
opt.on(
|
44
|
+
'-R', '--name-regexp REGEXP', Regexp,
|
45
|
+
'Regular expression indicating how to extract the name from the path',
|
46
|
+
"By default: '#{cli[:regexp]}'"
|
47
|
+
) { |v| cli[:regexp] = v }
|
48
|
+
opt.on(
|
49
|
+
'--type STRING',
|
50
|
+
"Type of datasets. By default: #{cli[:dataset_type]}",
|
51
|
+
'Recognized types:',
|
52
|
+
*MiGA::Dataset.KNOWN_TYPES
|
53
|
+
.map do |k, v|
|
54
|
+
"~ #{k}: #{v[:description]}" unless !params[:multi] && v[:multi]
|
55
|
+
end.compact
|
56
|
+
) { |v| cli[:dataset_type] = v.downcase.to_sym }
|
57
|
+
if params[:project_type]
|
58
|
+
opt.on(
|
59
|
+
'--project-type STRING',
|
60
|
+
"Type of project. By default: #{cli[:project_type]}",
|
61
|
+
'Recognized types:',
|
62
|
+
*MiGA::Project.KNOWN_TYPES
|
63
|
+
.map do |k, v|
|
64
|
+
"~ #{k}: #{v[:description]}" unless !params[:multi] && v[:multi]
|
65
|
+
end.compact
|
66
|
+
) { |v| cli[:project_type] = v.downcase.to_sym }
|
67
|
+
end
|
68
|
+
opt.on(
|
69
|
+
'--daemon PATH',
|
70
|
+
'Use custom daemon configuration in JSON format',
|
71
|
+
'By default: ~/.miga_daemon.json'
|
72
|
+
) { |v| cli[:daemon_json] = v }
|
73
|
+
opt.on(
|
74
|
+
'-j', '--jobs INT',
|
75
|
+
'Number of parallel jobs to execute',
|
76
|
+
'By default controlled by the daemon configuration (maxjobs)'
|
77
|
+
) { |v| cli[:jobs] = v.to_i }
|
78
|
+
opt.on(
|
79
|
+
'-t', '--threads INT',
|
80
|
+
'Number of CPUs to use per job',
|
81
|
+
'By default controlled by the daemon configuration (ppn)'
|
82
|
+
) { |v| cli[:threads] = v.to_i }
|
83
|
+
end
|
84
|
+
|
85
|
+
def opts_for_wf_distances(opt)
|
86
|
+
opt.on('--fast', 'Alias to: --aai-p diamond --ani-p fastani') do
|
87
|
+
cli[:aai_p] = 'diamond'
|
88
|
+
cli[:ani_p] = 'fastani'
|
89
|
+
end
|
90
|
+
opt.on(
|
91
|
+
'--haai-p STRING',
|
92
|
+
'hAAI search engine. One of: blast+ (default), blat, diamond, no'
|
93
|
+
) { |v| cli[:haai_p] = v }
|
94
|
+
opt.on(
|
95
|
+
'--aai-p STRING',
|
96
|
+
'AAI search engine. One of: blast+ (default), blat, diamond'
|
97
|
+
) { |v| cli[:aai_p] = v }
|
98
|
+
opt.on(
|
99
|
+
'--ani-p STRING',
|
100
|
+
'ANI search engine. One of: blast+ (default), blat, fastani'
|
101
|
+
) { |v| cli[:ani_p] = v }
|
102
|
+
end
|
103
|
+
|
104
|
+
def create_project(stage, p_metadata = {}, d_metadata = {})
|
105
|
+
cli.ensure_par(
|
106
|
+
outdir: '-o',
|
107
|
+
project_type: '--project-type',
|
108
|
+
dataset_type: '--dataset-type')
|
109
|
+
# Create empty project
|
110
|
+
call_cli([
|
111
|
+
'new',
|
112
|
+
'-P', cli[:outdir],
|
113
|
+
'-t', cli[:project_type],
|
114
|
+
'-m', p_metadata.map{ |k,v| "#{k}=#{v}" }.join(',')
|
115
|
+
])
|
116
|
+
# Download datasets
|
117
|
+
call_cli([
|
118
|
+
'ncbi_get',
|
119
|
+
'-P', cli[:outdir],
|
120
|
+
'-T', cli[:ncbi_taxon],
|
121
|
+
(cli[:ncbi_draft] ? '--all' : '--complete'),
|
122
|
+
'-m', d_metadata.map{ |k,v| "#{k}=#{v}" }.join(',')
|
123
|
+
]) unless cli[:ncbi_taxon].nil?
|
124
|
+
# Add datasets
|
125
|
+
call_cli([
|
126
|
+
'add',
|
127
|
+
'-P', cli[:outdir],
|
128
|
+
'-t', cli[:dataset_type],
|
129
|
+
'-i', stage,
|
130
|
+
'-R', cli[:regexp],
|
131
|
+
'-m', d_metadata.map{ |k,v| "#{k}=#{v}" }.join(',')
|
132
|
+
] + cli.files) unless cli.files.empty?
|
133
|
+
p = MiGA::Project.load(cli[:outdir])
|
134
|
+
raise "Impossible to create project: #{cli[:outdir]}" if p.nil?
|
135
|
+
[:haai_p, :aai_p, :ani_p].each do |i|
|
136
|
+
p.metadata[i] = cli[i] unless cli[i].nil?
|
137
|
+
end
|
138
|
+
p.save
|
139
|
+
p
|
140
|
+
end
|
141
|
+
|
142
|
+
def summarize(which = %w[cds assembly essential_genes ssu])
|
143
|
+
which.each do |r|
|
144
|
+
cli.say "Summary: #{r}"
|
145
|
+
call_cli([
|
146
|
+
'summary',
|
147
|
+
'-P', cli[:outdir],
|
148
|
+
'-r', r,
|
149
|
+
'-o', File.expand_path("#{r}.tsv", cli[:outdir]),
|
150
|
+
'--tab'
|
151
|
+
])
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
def cleanup
|
156
|
+
return unless cli[:clean]
|
157
|
+
cli.say "Cleaning up intermediate files"
|
158
|
+
%w[data daemon metadata miga.project.json].each do |f|
|
159
|
+
FileUtils.rm_rf(File.expand_path(f, cli[:outdir]))
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
def call_cli(cmd)
|
164
|
+
cmd << '-v' if cli[:verbose]
|
165
|
+
MiGA::Cli.new(cmd.map(&:to_s)).launch
|
166
|
+
end
|
167
|
+
|
168
|
+
def run_daemon
|
169
|
+
cmd = ['daemon', 'run', '-P', cli[:outdir], '--shutdown-when-done']
|
170
|
+
cmd += ['--json', cli[:daemon_json]] unless cli[:daemon_json].nil?
|
171
|
+
cmd += ['--max-jobs', cli[:jobs]] unless cli[:jobs].nil?
|
172
|
+
cmd += ['--ppn', cli[:threads]] unless cli[:threads].nil?
|
173
|
+
cwd = Dir.pwd
|
174
|
+
call_cli cmd
|
175
|
+
Dir.chdir(cwd)
|
176
|
+
end
|
177
|
+
|
178
|
+
end
|
data/lib/miga/cli/base.rb
CHANGED
@@ -5,11 +5,18 @@ module MiGA::Cli::Base
|
|
5
5
|
|
6
6
|
@@TASK_DESC = {
|
7
7
|
generic: 'MiGA: The Microbial Genomes Atlas',
|
8
|
+
# Workflows
|
9
|
+
quality_wf: 'Evaluate the quality of input genomes',
|
10
|
+
derep_wf: 'Dereplicate a collection of input genomes',
|
11
|
+
classify_wf: 'Classify input genomes against a reference database',
|
12
|
+
preproc_wf: 'Preprocess input genomes or metagenomes',
|
13
|
+
index_wf: 'Generate distance indexing of input genomes',
|
8
14
|
# Projects
|
9
15
|
new: 'Creates an empty MiGA project',
|
10
16
|
about: 'Displays information about a MiGA project',
|
11
17
|
plugins: 'Lists or (un)installs plugins in a MiGA project',
|
12
18
|
doctor: 'Performs consistency checks on a MiGA project',
|
19
|
+
get_db: 'Downloads a pre-indexed database',
|
13
20
|
# Datasets
|
14
21
|
add: 'Creates a dataset in a MiGA project',
|
15
22
|
get: 'Downloads a dataset from public databases into a MiGA project',
|
@@ -43,6 +50,7 @@ module MiGA::Cli::Base
|
|
43
50
|
# Projects
|
44
51
|
create_project: :new,
|
45
52
|
project_info: :about,
|
53
|
+
download: :get_db,
|
46
54
|
# Datasets
|
47
55
|
create_dataset: :add,
|
48
56
|
download_dataset: :get,
|
@@ -76,6 +84,8 @@ module MiGA::Cli::Base
|
|
76
84
|
|
77
85
|
@@EXECS = @@TASK_DESC.keys
|
78
86
|
|
87
|
+
@@FILE_REGEXP = %r{^(?:.*/)?(.+?)(?:\..*(?:[12]|Reads|Contigs))?(?:\.f[nastq]+)?$}i
|
88
|
+
|
79
89
|
end
|
80
90
|
|
81
91
|
class MiGA::Cli < MiGA::MiGA
|
@@ -86,5 +96,6 @@ class MiGA::Cli < MiGA::MiGA
|
|
86
96
|
def TASK_DESC; @@TASK_DESC end
|
87
97
|
def TASK_ALIAS; @@TASK_ALIAS end
|
88
98
|
def EXECS; @@EXECS end
|
99
|
+
def FILE_REGEXP; @@FILE_REGEXP end
|
89
100
|
end
|
90
101
|
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
module MiGA::Cli::ObjectsHelper
|
5
|
+
##
|
6
|
+
# Get the project defined in the CLI by parameter +name+ and +flag+
|
7
|
+
def load_project(name = :project, flag = '-P')
|
8
|
+
return @objects[name] unless @objects[name].nil?
|
9
|
+
ensure_par(name => flag)
|
10
|
+
say "Loading project: #{self[name]}"
|
11
|
+
@objects[name] = MiGA::Project.load(self[name])
|
12
|
+
raise "Cannot load project: #{self[name]}" if @objects[name].nil?
|
13
|
+
@objects[name]
|
14
|
+
end
|
15
|
+
|
16
|
+
##
|
17
|
+
# Load the dataset defined in the CLI
|
18
|
+
# If +silent=true+, it allows failures silently
|
19
|
+
def load_dataset(silent = false)
|
20
|
+
return @objects[:dataset] unless @objects[:dataset].nil?
|
21
|
+
ensure_par(dataset: '-D')
|
22
|
+
@objects[:dataset] = load_project.dataset(self[:dataset])
|
23
|
+
if !silent && @objects[:dataset].nil?
|
24
|
+
raise "Cannot load dataset: #{self[:dataset]}"
|
25
|
+
end
|
26
|
+
return @objects[:dataset]
|
27
|
+
end
|
28
|
+
|
29
|
+
##
|
30
|
+
# Load an a project or (if defined) a dataset
|
31
|
+
def load_project_or_dataset
|
32
|
+
self[:dataset].nil? ? load_project : load_dataset
|
33
|
+
end
|
34
|
+
|
35
|
+
##
|
36
|
+
# Load and filter a list of datasets as requested in the CLI
|
37
|
+
# If +silent=true+, it allows failures silently
|
38
|
+
def load_and_filter_datasets(silent = false)
|
39
|
+
return @objects[:filtered_datasets] unless @objects[:filtered_datasets].nil?
|
40
|
+
say 'Listing datasets'
|
41
|
+
ds = self[:dataset].nil? ?
|
42
|
+
load_project.datasets : [load_dataset(silent)].compact
|
43
|
+
ds.select! { |d| d.is_ref? == self[:ref] } unless self[:ref].nil?
|
44
|
+
ds.select! { |d| d.is_active? == self[:active] } unless self[:active].nil?
|
45
|
+
ds.select! do |d|
|
46
|
+
self[:multi] ? d.is_multi? : d.is_nonmulti?
|
47
|
+
end unless self[:multi].nil?
|
48
|
+
ds.select! do |d|
|
49
|
+
(not d.metadata[:tax].nil?) && d.metadata[:tax].in?(self[:taxonomy])
|
50
|
+
end unless self[:taxonomy].nil?
|
51
|
+
ds = ds.values_at(self[:dataset_k]-1) unless self[:dataset_k].nil?
|
52
|
+
@objects[:filtered_datasets] = ds
|
53
|
+
end
|
54
|
+
|
55
|
+
def load_result
|
56
|
+
return @objects[:result] unless @objects[:result].nil?
|
57
|
+
ensure_par(result: '-r')
|
58
|
+
obj = load_project_or_dataset
|
59
|
+
if obj.class.RESULT_DIRS[self[:result]].nil?
|
60
|
+
klass = obj.class.to_s.gsub(/.*::/,'')
|
61
|
+
raise "Unsupported result for #{klass}: #{self[:result]}"
|
62
|
+
end
|
63
|
+
r = obj.add_result(self[:result], false)
|
64
|
+
raise "Cannot load result: #{self[:result]}" if r.nil?
|
65
|
+
@objects[:result] = r
|
66
|
+
end
|
67
|
+
|
68
|
+
def add_metadata(obj, cli = self)
|
69
|
+
cli[:metadata].split(',').each do |pair|
|
70
|
+
(k,v) = pair.split('=')
|
71
|
+
case v
|
72
|
+
when 'true'; v = true
|
73
|
+
when 'false'; v = false
|
74
|
+
when 'nil'; v = nil
|
75
|
+
end
|
76
|
+
if k == '_step'
|
77
|
+
obj.metadata["_try_#{v}"] ||= 0
|
78
|
+
obj.metadata["_try_#{v}"] += 1
|
79
|
+
end
|
80
|
+
obj.metadata[k] = v
|
81
|
+
end unless cli[:metadata].nil?
|
82
|
+
[:type, :name, :user, :description, :comments].each do |k|
|
83
|
+
obj.metadata[k] = cli[k] unless cli[k].nil?
|
84
|
+
end
|
85
|
+
obj
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
@@ -0,0 +1,160 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
module MiGA::Cli::OptHelper
|
5
|
+
##
|
6
|
+
# Send MiGA's banner to OptionParser +opt+
|
7
|
+
def banner(opt)
|
8
|
+
usage = "Usage: miga #{action.name}"
|
9
|
+
usage += ' {operation}' if expect_operation
|
10
|
+
usage += ' [options]'
|
11
|
+
usage += ' {FILES...}' if expect_files
|
12
|
+
opt.banner = "\n#{task_description}\n\n#{usage}\n"
|
13
|
+
opt.separator ''
|
14
|
+
end
|
15
|
+
|
16
|
+
##
|
17
|
+
# Common options at the end of most actions, passed to OptionParser +opt+
|
18
|
+
# No action is performed if +#opt_common = false+ is passed
|
19
|
+
# Executes only once, unless +#opt_common = true+ is passed between calls
|
20
|
+
def opt_common(opt)
|
21
|
+
return unless @opt_common
|
22
|
+
if interactive
|
23
|
+
opt.on(
|
24
|
+
'--auto',
|
25
|
+
'Accept all defaults as answers'
|
26
|
+
) { |v| self[:auto] = v }
|
27
|
+
end
|
28
|
+
opt.on(
|
29
|
+
'-v', '--verbose',
|
30
|
+
'Print additional information to STDERR'
|
31
|
+
) { |v| self[:verbose] = v }
|
32
|
+
opt.on(
|
33
|
+
'-d', '--debug INT', Integer,
|
34
|
+
'Print debugging information to STDERR (1: debug, 2: trace)'
|
35
|
+
) { |v| v > 1 ? MiGA::MiGA.DEBUG_TRACE_ON : MiGA::MiGA.DEBUG_ON }
|
36
|
+
opt.on(
|
37
|
+
'-h', '--help',
|
38
|
+
'Display this screen'
|
39
|
+
) do
|
40
|
+
puts opt
|
41
|
+
exit
|
42
|
+
end
|
43
|
+
opt.separator ''
|
44
|
+
self.opt_common = false
|
45
|
+
end
|
46
|
+
|
47
|
+
##
|
48
|
+
# Options to load an object passed to OptionParser +opt+, as determined
|
49
|
+
# by +what+ an Array with any combination of:
|
50
|
+
# - :project To require a project
|
51
|
+
# - :dataset To require a dataset
|
52
|
+
# - :dataset_opt To allow (optionally) a dataset
|
53
|
+
# - :dataset_type To allow (optionally) a type of dataset
|
54
|
+
# - :dataset_type_req To require a type of dataset
|
55
|
+
# - :project_type To allow (optionally) a type of project
|
56
|
+
# - :project_type_req To require a type of project
|
57
|
+
# - :result To require a type of project or dataset result
|
58
|
+
# - :result_dataset To require a type of dataset result
|
59
|
+
# - :result_project To require a type of project result
|
60
|
+
# The options :result, :result_dataset, and :result_project are mutually
|
61
|
+
# exclusive
|
62
|
+
def opt_object(opt, what = [:project, :dataset])
|
63
|
+
what.each do |w|
|
64
|
+
case w
|
65
|
+
when :project
|
66
|
+
opt.on(
|
67
|
+
'-P', '--project PATH',
|
68
|
+
'(Mandatory) Path to the project'
|
69
|
+
) { |v| self[:project] = v }
|
70
|
+
when :dataset, :dataset_opt
|
71
|
+
opt.on(
|
72
|
+
'-D', '--dataset STRING',
|
73
|
+
(w == :dataset ? '(Mandatory) ' : '') + 'Name of the dataset'
|
74
|
+
) { |v| self[:dataset] = v }
|
75
|
+
when :dataset_type, :dataset_type_req, :project_type, :project_type_req
|
76
|
+
obj = w.to_s.gsub(/_.*/, '')
|
77
|
+
klass = Object.const_get("MiGA::#{obj.capitalize}")
|
78
|
+
req = w.to_s =~ /_req$/ ? '(Mandatory) ' : ''
|
79
|
+
opt.on(
|
80
|
+
'-t', '--type STRING',
|
81
|
+
"#{req}Type of #{obj}. Recognized types include:",
|
82
|
+
*klass.KNOWN_TYPES.map { |k, v| "~ #{k}: #{v[:description]}" }
|
83
|
+
) { |v| self[:type] = v.downcase.to_sym }
|
84
|
+
when :result
|
85
|
+
opt.on(
|
86
|
+
'-r', '--result STRING',
|
87
|
+
'(Mandatory) Name of the result',
|
88
|
+
'Recognized names for dataset-specific results include:',
|
89
|
+
*MiGA::Dataset.RESULT_DIRS.keys.map { |n| " ~ #{n}" },
|
90
|
+
'Recognized names for project-wide results include:',
|
91
|
+
*MiGA::Project.RESULT_DIRS.keys.map { |n| " ~ #{n}" }
|
92
|
+
) { |v| self[:result] = v.downcase.to_sym }
|
93
|
+
when :result_dataset
|
94
|
+
opt.on(
|
95
|
+
'-r', '--result STRING',
|
96
|
+
'(Mandatory) Name of the result, one of:',
|
97
|
+
*MiGA::Dataset.RESULT_DIRS.keys.map { |n| " ~ #{n}" }
|
98
|
+
) { |v| self[:result] = v.downcase.to_sym }
|
99
|
+
when :result_project
|
100
|
+
opt.on(
|
101
|
+
'-r', '--result STRING',
|
102
|
+
'(Mandatory) Name of the result, one of:',
|
103
|
+
*MiGA::Project.RESULT_DIRS.keys.map { |n| " ~ #{n}" }
|
104
|
+
) { |v| self[:result] = v.downcase.to_sym }
|
105
|
+
else
|
106
|
+
raise "Internal error: Unrecognized option: #{w}"
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
##
|
112
|
+
# Options to filter a list of datasets passed to OptionParser +opt+,
|
113
|
+
# as determined by +what+ an Array with any combination of:
|
114
|
+
# - :ref To filter by reference (--ref) or query (--no-ref)
|
115
|
+
# - :multi To filter by multiple (--multi) or single (--no-multi) species
|
116
|
+
# - :active To filter by active (--active) or inactive (--no-active)
|
117
|
+
# - :taxonomy To filter by taxonomy (--taxonomy)
|
118
|
+
# The "k-th" filter (--dataset-k) is always included
|
119
|
+
def opt_filter_datasets(opt, what = [:ref, :multi, :active, :taxonomy])
|
120
|
+
what.each do |w|
|
121
|
+
case w
|
122
|
+
when :ref
|
123
|
+
opt.on(
|
124
|
+
'--[no-]ref',
|
125
|
+
'Use only reference (or only non-reference) datasets'
|
126
|
+
) { |v| self[:ref] = v }
|
127
|
+
when :multi
|
128
|
+
opt.on(
|
129
|
+
'--[no-]multi',
|
130
|
+
'Use only multi-species (or only single-species) datasets'
|
131
|
+
) { |v| self[:multi] = v }
|
132
|
+
when :active
|
133
|
+
opt.on(
|
134
|
+
'--[no-]active',
|
135
|
+
'Use only active (or inactive) datasets'
|
136
|
+
) { |v| self[:active] = v }
|
137
|
+
when :taxonomy
|
138
|
+
opt.on(
|
139
|
+
'-t', '--taxonomy RANK:TAXON',
|
140
|
+
'Filter by taxonomy'
|
141
|
+
) { |v| self[:taxonomy] = MiGA::Taxonomy.new(v) }
|
142
|
+
else
|
143
|
+
raise "Internal error: Unrecognized option: #{w}"
|
144
|
+
end
|
145
|
+
end
|
146
|
+
opt.on(
|
147
|
+
'--dataset-k INTEGER', Integer,
|
148
|
+
'Use only the k-th dataset in the list'
|
149
|
+
) { |v| self[:dataset_k] = v }
|
150
|
+
end
|
151
|
+
|
152
|
+
##
|
153
|
+
# Add a flag (true/false) to the OptionParser +opt+ defined by
|
154
|
+
# +flag+ (without --) and +description+, and save it in the CLI as +sym+.
|
155
|
+
# If +sym+ is nil, +flag+ is used as Symbol
|
156
|
+
def opt_flag(opt, flag, description, sym = nil)
|
157
|
+
sym = flag.to_sym if sym.nil?
|
158
|
+
opt.on("--#{flag}", description) { |v| self[sym] = v }
|
159
|
+
end
|
160
|
+
end
|