miga-base 0.4.3.0 → 0.5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/miga/cli.rb +43 -223
- data/lib/miga/cli/action/add.rb +91 -62
- data/lib/miga/cli/action/classify_wf.rb +97 -0
- data/lib/miga/cli/action/daemon.rb +14 -10
- data/lib/miga/cli/action/derep_wf.rb +95 -0
- data/lib/miga/cli/action/doctor.rb +83 -55
- data/lib/miga/cli/action/get.rb +68 -52
- data/lib/miga/cli/action/get_db.rb +206 -0
- data/lib/miga/cli/action/index_wf.rb +31 -0
- data/lib/miga/cli/action/init.rb +115 -190
- data/lib/miga/cli/action/init/daemon_helper.rb +124 -0
- data/lib/miga/cli/action/ls.rb +20 -11
- data/lib/miga/cli/action/ncbi_get.rb +199 -157
- data/lib/miga/cli/action/preproc_wf.rb +46 -0
- data/lib/miga/cli/action/quality_wf.rb +45 -0
- data/lib/miga/cli/action/stats.rb +147 -99
- data/lib/miga/cli/action/summary.rb +10 -4
- data/lib/miga/cli/action/tax_dist.rb +61 -46
- data/lib/miga/cli/action/tax_test.rb +46 -39
- data/lib/miga/cli/action/wf.rb +178 -0
- data/lib/miga/cli/base.rb +11 -0
- data/lib/miga/cli/objects_helper.rb +88 -0
- data/lib/miga/cli/opt_helper.rb +160 -0
- data/lib/miga/daemon.rb +7 -4
- data/lib/miga/dataset/base.rb +5 -5
- data/lib/miga/project/base.rb +4 -4
- data/lib/miga/project/result.rb +2 -1
- data/lib/miga/remote_dataset/base.rb +5 -5
- data/lib/miga/remote_dataset/download.rb +1 -1
- data/lib/miga/version.rb +3 -3
- data/scripts/cds.bash +3 -1
- data/scripts/essential_genes.bash +1 -0
- data/scripts/stats.bash +1 -1
- data/scripts/trimmed_fasta.bash +5 -3
- data/utils/distance/runner.rb +3 -0
- data/utils/distance/temporal.rb +10 -1
- data/utils/enveomics/Manifest/Tasks/fasta.json +5 -0
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +7 -0
- data/utils/enveomics/Scripts/BlastTab.addlen.rb +33 -31
- data/utils/enveomics/Scripts/FastA.tag.rb +42 -41
- data/utils/enveomics/Scripts/HMM.essential.rb +85 -55
- data/utils/enveomics/Scripts/HMM.haai.rb +29 -20
- data/utils/enveomics/Scripts/SRA.download.bash +1 -1
- data/utils/enveomics/Scripts/aai.rb +163 -128
- data/utils/enveomics/build_enveomics_r.bash +11 -10
- data/utils/enveomics/enveomics.R/DESCRIPTION +3 -2
- data/utils/enveomics/enveomics.R/R/autoprune.R +141 -107
- data/utils/enveomics/enveomics.R/R/barplot.R +105 -86
- data/utils/enveomics/enveomics.R/R/cliopts.R +131 -115
- data/utils/enveomics/enveomics.R/R/df2dist.R +144 -106
- data/utils/enveomics/enveomics.R/R/growthcurve.R +201 -133
- data/utils/enveomics/enveomics.R/R/recplot.R +350 -315
- data/utils/enveomics/enveomics.R/R/recplot2.R +1334 -914
- data/utils/enveomics/enveomics.R/R/tribs.R +521 -361
- data/utils/enveomics/enveomics.R/R/utils.R +31 -15
- data/utils/enveomics/enveomics.R/README.md +7 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +16 -21
- data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +31 -28
- data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -19
- data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +36 -26
- data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -24
- data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -24
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +32 -33
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +91 -64
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +57 -37
- data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -19
- data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +39 -26
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +38 -25
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +40 -26
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +67 -49
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +37 -28
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +122 -97
- data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +35 -31
- data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -23
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +68 -51
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -22
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -20
- data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +41 -32
- data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +40 -34
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -20
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -20
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -29
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +41 -42
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +43 -33
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +36 -28
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +74 -56
- data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +44 -31
- data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -22
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +32 -26
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +59 -44
- data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -21
- data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -22
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +63 -43
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +38 -29
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +38 -30
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +111 -83
- data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -18
- data/utils/find-medoid.R +3 -2
- data/utils/representatives.rb +5 -3
- data/utils/subclade/pipeline.rb +22 -11
- data/utils/subclade/runner.rb +5 -1
- data/utils/subclades-compile.rb +1 -1
- data/utils/subclades.R +9 -3
- metadata +15 -4
- data/utils/enveomics/enveomics.R/man/enveomics.R-package.Rd +0 -15
- data/utils/enveomics/enveomics.R/man/z$-methods.Rd +0 -26
@@ -29,49 +29,56 @@ class MiGA::Cli::Action::TaxTest < MiGA::Cli::Action
|
|
29
29
|
raise 'Action not supported for the project or dataset' if cr.nil?
|
30
30
|
raise 'No close relatives found'
|
31
31
|
else
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
if self[:ref_project]
|
36
|
-
if (ref = p.metadata[:ref_project]).nil?
|
37
|
-
raise '--ref-project requested but no reference project has been set'
|
38
|
-
end
|
39
|
-
if (q = MiGA::Project.load(ref)).nil?
|
40
|
-
raise '--ref-project requested but reference project doesn\'t exist'
|
41
|
-
end
|
42
|
-
cr_d = q.dataset(cr[0])
|
43
|
-
else
|
44
|
-
cr_d = p.dataset(cr[0])
|
45
|
-
end
|
46
|
-
tax = cr_d.metadata[:tax] unless cr_d.nil?
|
47
|
-
tax ||= {}
|
32
|
+
query_probability_distributions(d, cr[0])
|
33
|
+
end
|
34
|
+
end
|
48
35
|
|
49
|
-
|
50
|
-
# Intax
|
51
|
-
r = TaxDist.aai_pvalues(cr[1], :intax).map do |k,v|
|
52
|
-
sig = ''
|
53
|
-
[0.5, 0.1, 0.05, 0.01].each{ |i| sig << '*' if v<i }
|
54
|
-
[Taxonomy.LONG_RANKS[k], (tax[k] || '?'), v, sig]
|
55
|
-
end
|
56
|
-
cli.puts ''
|
57
|
-
cli.puts 'Taxonomic classification'
|
58
|
-
cli.puts MiGA.tabulate(%w[Rank Taxonomy P-value Signif.], r)
|
59
|
-
end
|
36
|
+
private
|
60
37
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
38
|
+
def query_probability_distributions(d, cr)
|
39
|
+
cli.say 'Querying probability distributions'
|
40
|
+
tax = closest_relative_tax(cr)
|
41
|
+
if %w[intax both].include? cli[:test]
|
42
|
+
r = test_closest_relative(cr, tax, :intax)
|
43
|
+
cli.puts ''
|
44
|
+
cli.puts 'Taxonomic classification'
|
45
|
+
cli.puts MiGA.tabulate(%w[Rank Taxonomy P-value Signif.], r)
|
46
|
+
end
|
47
|
+
if %w[novel both].include? cli[:test]
|
48
|
+
r = test_closest_relative(cr, tax, :novel)
|
49
|
+
r.map! { |i| i.tap { |j| j.delete_at(1) } }
|
50
|
+
cli.puts ''
|
51
|
+
cli.puts 'Taxonomic novelty'
|
52
|
+
cli.puts MiGA.tabulate(%w[Rank P-value Signif.], r)
|
53
|
+
end
|
54
|
+
cli.puts ''
|
55
|
+
cli.puts 'Significance at p-value below: *0.5, **0.1, ***0.05, ****0.01.'
|
56
|
+
end
|
57
|
+
|
58
|
+
def closest_relative_tax(cr)
|
59
|
+
cli.puts "Closest relative: #{cr[0]} with AAI: #{cr[1]}."
|
60
|
+
p = cli.load_project
|
61
|
+
if cli[:ref_project]
|
62
|
+
if (ref = p.metadata[:ref_project]).nil?
|
63
|
+
raise '--ref-project requested but no reference project has been set'
|
71
64
|
end
|
65
|
+
if (q = MiGA::Project.load(ref)).nil?
|
66
|
+
raise '--ref-project requested but reference project doesn\'t exist'
|
67
|
+
end
|
68
|
+
cr_d = q.dataset(cr[0])
|
69
|
+
else
|
70
|
+
cr_d = p.dataset(cr[0])
|
71
|
+
end
|
72
|
+
tax = cr_d.metadata[:tax] unless cr_d.nil?
|
73
|
+
tax ||= {}
|
74
|
+
tax
|
75
|
+
end
|
72
76
|
|
73
|
-
|
74
|
-
|
77
|
+
def test_closest_relative(cr, tax, test)
|
78
|
+
TaxDist.aai_pvalues(cr[1], test).map do |k,v|
|
79
|
+
sig = ''
|
80
|
+
[0.5, 0.1, 0.05, 0.01].each { |i| sig << '*' if v < i }
|
81
|
+
[Taxonomy.LONG_RANKS[k], (tax[k] || '?'), v, sig]
|
75
82
|
end
|
76
83
|
end
|
77
84
|
end
|
@@ -0,0 +1,178 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
##
|
5
|
+
# Helper module for workflows
|
6
|
+
module MiGA::Cli::Action::Wf
|
7
|
+
def default_opts_for_wf
|
8
|
+
cli.expect_files = true
|
9
|
+
cli.defaults = {
|
10
|
+
clean: false, regexp: MiGA::Cli.FILE_REGEXP,
|
11
|
+
project_type: :genomes, dataset_type: :popgenome,
|
12
|
+
ncbi_draft: true }
|
13
|
+
end
|
14
|
+
|
15
|
+
def opts_for_wf(opt, files_desc, params = {})
|
16
|
+
{
|
17
|
+
multi: false, cleanup: true, project_type: false, ncbi: true
|
18
|
+
}.each { |k, v| params[k] = v if params[k].nil? }
|
19
|
+
opt.on(
|
20
|
+
'-o', '--out_dir PATH',
|
21
|
+
'(Mandatory) Directory to be created with all output data'
|
22
|
+
) { |v| cli[:outdir] = v }
|
23
|
+
opt.separator ''
|
24
|
+
opt.separator " FILES...: #{files_desc}"
|
25
|
+
opt.separator ''
|
26
|
+
opt.separator 'Workflow Control Options'
|
27
|
+
if params[:ncbi]
|
28
|
+
opt.on(
|
29
|
+
'-T', '--ncbi-taxon STRING',
|
30
|
+
'Download all the genomes in NCBI classified as this taxon'
|
31
|
+
) { |v| cli[:ncbi_taxon] = v }
|
32
|
+
opt.on(
|
33
|
+
'--no-draft',
|
34
|
+
'Only download complete genomes, not drafts'
|
35
|
+
) { |v| cli[:ncbi_draft] = v }
|
36
|
+
end
|
37
|
+
if params[:cleanup]
|
38
|
+
opt.on(
|
39
|
+
'-c', '--clean',
|
40
|
+
'Clean all intermediate files after generating the reports'
|
41
|
+
) { |v| cli[:clean] = v }
|
42
|
+
end
|
43
|
+
opt.on(
|
44
|
+
'-R', '--name-regexp REGEXP', Regexp,
|
45
|
+
'Regular expression indicating how to extract the name from the path',
|
46
|
+
"By default: '#{cli[:regexp]}'"
|
47
|
+
) { |v| cli[:regexp] = v }
|
48
|
+
opt.on(
|
49
|
+
'--type STRING',
|
50
|
+
"Type of datasets. By default: #{cli[:dataset_type]}",
|
51
|
+
'Recognized types:',
|
52
|
+
*MiGA::Dataset.KNOWN_TYPES
|
53
|
+
.map do |k, v|
|
54
|
+
"~ #{k}: #{v[:description]}" unless !params[:multi] && v[:multi]
|
55
|
+
end.compact
|
56
|
+
) { |v| cli[:dataset_type] = v.downcase.to_sym }
|
57
|
+
if params[:project_type]
|
58
|
+
opt.on(
|
59
|
+
'--project-type STRING',
|
60
|
+
"Type of project. By default: #{cli[:project_type]}",
|
61
|
+
'Recognized types:',
|
62
|
+
*MiGA::Project.KNOWN_TYPES
|
63
|
+
.map do |k, v|
|
64
|
+
"~ #{k}: #{v[:description]}" unless !params[:multi] && v[:multi]
|
65
|
+
end.compact
|
66
|
+
) { |v| cli[:project_type] = v.downcase.to_sym }
|
67
|
+
end
|
68
|
+
opt.on(
|
69
|
+
'--daemon PATH',
|
70
|
+
'Use custom daemon configuration in JSON format',
|
71
|
+
'By default: ~/.miga_daemon.json'
|
72
|
+
) { |v| cli[:daemon_json] = v }
|
73
|
+
opt.on(
|
74
|
+
'-j', '--jobs INT',
|
75
|
+
'Number of parallel jobs to execute',
|
76
|
+
'By default controlled by the daemon configuration (maxjobs)'
|
77
|
+
) { |v| cli[:jobs] = v.to_i }
|
78
|
+
opt.on(
|
79
|
+
'-t', '--threads INT',
|
80
|
+
'Number of CPUs to use per job',
|
81
|
+
'By default controlled by the daemon configuration (ppn)'
|
82
|
+
) { |v| cli[:threads] = v.to_i }
|
83
|
+
end
|
84
|
+
|
85
|
+
def opts_for_wf_distances(opt)
|
86
|
+
opt.on('--fast', 'Alias to: --aai-p diamond --ani-p fastani') do
|
87
|
+
cli[:aai_p] = 'diamond'
|
88
|
+
cli[:ani_p] = 'fastani'
|
89
|
+
end
|
90
|
+
opt.on(
|
91
|
+
'--haai-p STRING',
|
92
|
+
'hAAI search engine. One of: blast+ (default), blat, diamond, no'
|
93
|
+
) { |v| cli[:haai_p] = v }
|
94
|
+
opt.on(
|
95
|
+
'--aai-p STRING',
|
96
|
+
'AAI search engine. One of: blast+ (default), blat, diamond'
|
97
|
+
) { |v| cli[:aai_p] = v }
|
98
|
+
opt.on(
|
99
|
+
'--ani-p STRING',
|
100
|
+
'ANI search engine. One of: blast+ (default), blat, fastani'
|
101
|
+
) { |v| cli[:ani_p] = v }
|
102
|
+
end
|
103
|
+
|
104
|
+
def create_project(stage, p_metadata = {}, d_metadata = {})
|
105
|
+
cli.ensure_par(
|
106
|
+
outdir: '-o',
|
107
|
+
project_type: '--project-type',
|
108
|
+
dataset_type: '--dataset-type')
|
109
|
+
# Create empty project
|
110
|
+
call_cli([
|
111
|
+
'new',
|
112
|
+
'-P', cli[:outdir],
|
113
|
+
'-t', cli[:project_type],
|
114
|
+
'-m', p_metadata.map{ |k,v| "#{k}=#{v}" }.join(',')
|
115
|
+
])
|
116
|
+
# Download datasets
|
117
|
+
call_cli([
|
118
|
+
'ncbi_get',
|
119
|
+
'-P', cli[:outdir],
|
120
|
+
'-T', cli[:ncbi_taxon],
|
121
|
+
(cli[:ncbi_draft] ? '--all' : '--complete'),
|
122
|
+
'-m', d_metadata.map{ |k,v| "#{k}=#{v}" }.join(',')
|
123
|
+
]) unless cli[:ncbi_taxon].nil?
|
124
|
+
# Add datasets
|
125
|
+
call_cli([
|
126
|
+
'add',
|
127
|
+
'-P', cli[:outdir],
|
128
|
+
'-t', cli[:dataset_type],
|
129
|
+
'-i', stage,
|
130
|
+
'-R', cli[:regexp],
|
131
|
+
'-m', d_metadata.map{ |k,v| "#{k}=#{v}" }.join(',')
|
132
|
+
] + cli.files) unless cli.files.empty?
|
133
|
+
p = MiGA::Project.load(cli[:outdir])
|
134
|
+
raise "Impossible to create project: #{cli[:outdir]}" if p.nil?
|
135
|
+
[:haai_p, :aai_p, :ani_p].each do |i|
|
136
|
+
p.metadata[i] = cli[i] unless cli[i].nil?
|
137
|
+
end
|
138
|
+
p.save
|
139
|
+
p
|
140
|
+
end
|
141
|
+
|
142
|
+
def summarize(which = %w[cds assembly essential_genes ssu])
|
143
|
+
which.each do |r|
|
144
|
+
cli.say "Summary: #{r}"
|
145
|
+
call_cli([
|
146
|
+
'summary',
|
147
|
+
'-P', cli[:outdir],
|
148
|
+
'-r', r,
|
149
|
+
'-o', File.expand_path("#{r}.tsv", cli[:outdir]),
|
150
|
+
'--tab'
|
151
|
+
])
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
def cleanup
|
156
|
+
return unless cli[:clean]
|
157
|
+
cli.say "Cleaning up intermediate files"
|
158
|
+
%w[data daemon metadata miga.project.json].each do |f|
|
159
|
+
FileUtils.rm_rf(File.expand_path(f, cli[:outdir]))
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
def call_cli(cmd)
|
164
|
+
cmd << '-v' if cli[:verbose]
|
165
|
+
MiGA::Cli.new(cmd.map(&:to_s)).launch
|
166
|
+
end
|
167
|
+
|
168
|
+
def run_daemon
|
169
|
+
cmd = ['daemon', 'run', '-P', cli[:outdir], '--shutdown-when-done']
|
170
|
+
cmd += ['--json', cli[:daemon_json]] unless cli[:daemon_json].nil?
|
171
|
+
cmd += ['--max-jobs', cli[:jobs]] unless cli[:jobs].nil?
|
172
|
+
cmd += ['--ppn', cli[:threads]] unless cli[:threads].nil?
|
173
|
+
cwd = Dir.pwd
|
174
|
+
call_cli cmd
|
175
|
+
Dir.chdir(cwd)
|
176
|
+
end
|
177
|
+
|
178
|
+
end
|
data/lib/miga/cli/base.rb
CHANGED
@@ -5,11 +5,18 @@ module MiGA::Cli::Base
|
|
5
5
|
|
6
6
|
@@TASK_DESC = {
|
7
7
|
generic: 'MiGA: The Microbial Genomes Atlas',
|
8
|
+
# Workflows
|
9
|
+
quality_wf: 'Evaluate the quality of input genomes',
|
10
|
+
derep_wf: 'Dereplicate a collection of input genomes',
|
11
|
+
classify_wf: 'Classify input genomes against a reference database',
|
12
|
+
preproc_wf: 'Preprocess input genomes or metagenomes',
|
13
|
+
index_wf: 'Generate distance indexing of input genomes',
|
8
14
|
# Projects
|
9
15
|
new: 'Creates an empty MiGA project',
|
10
16
|
about: 'Displays information about a MiGA project',
|
11
17
|
plugins: 'Lists or (un)installs plugins in a MiGA project',
|
12
18
|
doctor: 'Performs consistency checks on a MiGA project',
|
19
|
+
get_db: 'Downloads a pre-indexed database',
|
13
20
|
# Datasets
|
14
21
|
add: 'Creates a dataset in a MiGA project',
|
15
22
|
get: 'Downloads a dataset from public databases into a MiGA project',
|
@@ -43,6 +50,7 @@ module MiGA::Cli::Base
|
|
43
50
|
# Projects
|
44
51
|
create_project: :new,
|
45
52
|
project_info: :about,
|
53
|
+
download: :get_db,
|
46
54
|
# Datasets
|
47
55
|
create_dataset: :add,
|
48
56
|
download_dataset: :get,
|
@@ -76,6 +84,8 @@ module MiGA::Cli::Base
|
|
76
84
|
|
77
85
|
@@EXECS = @@TASK_DESC.keys
|
78
86
|
|
87
|
+
@@FILE_REGEXP = %r{^(?:.*/)?(.+?)(?:\..*(?:[12]|Reads|Contigs))?(?:\.f[nastq]+)?$}i
|
88
|
+
|
79
89
|
end
|
80
90
|
|
81
91
|
class MiGA::Cli < MiGA::MiGA
|
@@ -86,5 +96,6 @@ class MiGA::Cli < MiGA::MiGA
|
|
86
96
|
def TASK_DESC; @@TASK_DESC end
|
87
97
|
def TASK_ALIAS; @@TASK_ALIAS end
|
88
98
|
def EXECS; @@EXECS end
|
99
|
+
def FILE_REGEXP; @@FILE_REGEXP end
|
89
100
|
end
|
90
101
|
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
module MiGA::Cli::ObjectsHelper
|
5
|
+
##
|
6
|
+
# Get the project defined in the CLI by parameter +name+ and +flag+
|
7
|
+
def load_project(name = :project, flag = '-P')
|
8
|
+
return @objects[name] unless @objects[name].nil?
|
9
|
+
ensure_par(name => flag)
|
10
|
+
say "Loading project: #{self[name]}"
|
11
|
+
@objects[name] = MiGA::Project.load(self[name])
|
12
|
+
raise "Cannot load project: #{self[name]}" if @objects[name].nil?
|
13
|
+
@objects[name]
|
14
|
+
end
|
15
|
+
|
16
|
+
##
|
17
|
+
# Load the dataset defined in the CLI
|
18
|
+
# If +silent=true+, it allows failures silently
|
19
|
+
def load_dataset(silent = false)
|
20
|
+
return @objects[:dataset] unless @objects[:dataset].nil?
|
21
|
+
ensure_par(dataset: '-D')
|
22
|
+
@objects[:dataset] = load_project.dataset(self[:dataset])
|
23
|
+
if !silent && @objects[:dataset].nil?
|
24
|
+
raise "Cannot load dataset: #{self[:dataset]}"
|
25
|
+
end
|
26
|
+
return @objects[:dataset]
|
27
|
+
end
|
28
|
+
|
29
|
+
##
|
30
|
+
# Load an a project or (if defined) a dataset
|
31
|
+
def load_project_or_dataset
|
32
|
+
self[:dataset].nil? ? load_project : load_dataset
|
33
|
+
end
|
34
|
+
|
35
|
+
##
|
36
|
+
# Load and filter a list of datasets as requested in the CLI
|
37
|
+
# If +silent=true+, it allows failures silently
|
38
|
+
def load_and_filter_datasets(silent = false)
|
39
|
+
return @objects[:filtered_datasets] unless @objects[:filtered_datasets].nil?
|
40
|
+
say 'Listing datasets'
|
41
|
+
ds = self[:dataset].nil? ?
|
42
|
+
load_project.datasets : [load_dataset(silent)].compact
|
43
|
+
ds.select! { |d| d.is_ref? == self[:ref] } unless self[:ref].nil?
|
44
|
+
ds.select! { |d| d.is_active? == self[:active] } unless self[:active].nil?
|
45
|
+
ds.select! do |d|
|
46
|
+
self[:multi] ? d.is_multi? : d.is_nonmulti?
|
47
|
+
end unless self[:multi].nil?
|
48
|
+
ds.select! do |d|
|
49
|
+
(not d.metadata[:tax].nil?) && d.metadata[:tax].in?(self[:taxonomy])
|
50
|
+
end unless self[:taxonomy].nil?
|
51
|
+
ds = ds.values_at(self[:dataset_k]-1) unless self[:dataset_k].nil?
|
52
|
+
@objects[:filtered_datasets] = ds
|
53
|
+
end
|
54
|
+
|
55
|
+
def load_result
|
56
|
+
return @objects[:result] unless @objects[:result].nil?
|
57
|
+
ensure_par(result: '-r')
|
58
|
+
obj = load_project_or_dataset
|
59
|
+
if obj.class.RESULT_DIRS[self[:result]].nil?
|
60
|
+
klass = obj.class.to_s.gsub(/.*::/,'')
|
61
|
+
raise "Unsupported result for #{klass}: #{self[:result]}"
|
62
|
+
end
|
63
|
+
r = obj.add_result(self[:result], false)
|
64
|
+
raise "Cannot load result: #{self[:result]}" if r.nil?
|
65
|
+
@objects[:result] = r
|
66
|
+
end
|
67
|
+
|
68
|
+
def add_metadata(obj, cli = self)
|
69
|
+
cli[:metadata].split(',').each do |pair|
|
70
|
+
(k,v) = pair.split('=')
|
71
|
+
case v
|
72
|
+
when 'true'; v = true
|
73
|
+
when 'false'; v = false
|
74
|
+
when 'nil'; v = nil
|
75
|
+
end
|
76
|
+
if k == '_step'
|
77
|
+
obj.metadata["_try_#{v}"] ||= 0
|
78
|
+
obj.metadata["_try_#{v}"] += 1
|
79
|
+
end
|
80
|
+
obj.metadata[k] = v
|
81
|
+
end unless cli[:metadata].nil?
|
82
|
+
[:type, :name, :user, :description, :comments].each do |k|
|
83
|
+
obj.metadata[k] = cli[k] unless cli[k].nil?
|
84
|
+
end
|
85
|
+
obj
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
@@ -0,0 +1,160 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
module MiGA::Cli::OptHelper
|
5
|
+
##
|
6
|
+
# Send MiGA's banner to OptionParser +opt+
|
7
|
+
def banner(opt)
|
8
|
+
usage = "Usage: miga #{action.name}"
|
9
|
+
usage += ' {operation}' if expect_operation
|
10
|
+
usage += ' [options]'
|
11
|
+
usage += ' {FILES...}' if expect_files
|
12
|
+
opt.banner = "\n#{task_description}\n\n#{usage}\n"
|
13
|
+
opt.separator ''
|
14
|
+
end
|
15
|
+
|
16
|
+
##
|
17
|
+
# Common options at the end of most actions, passed to OptionParser +opt+
|
18
|
+
# No action is performed if +#opt_common = false+ is passed
|
19
|
+
# Executes only once, unless +#opt_common = true+ is passed between calls
|
20
|
+
def opt_common(opt)
|
21
|
+
return unless @opt_common
|
22
|
+
if interactive
|
23
|
+
opt.on(
|
24
|
+
'--auto',
|
25
|
+
'Accept all defaults as answers'
|
26
|
+
) { |v| self[:auto] = v }
|
27
|
+
end
|
28
|
+
opt.on(
|
29
|
+
'-v', '--verbose',
|
30
|
+
'Print additional information to STDERR'
|
31
|
+
) { |v| self[:verbose] = v }
|
32
|
+
opt.on(
|
33
|
+
'-d', '--debug INT', Integer,
|
34
|
+
'Print debugging information to STDERR (1: debug, 2: trace)'
|
35
|
+
) { |v| v > 1 ? MiGA::MiGA.DEBUG_TRACE_ON : MiGA::MiGA.DEBUG_ON }
|
36
|
+
opt.on(
|
37
|
+
'-h', '--help',
|
38
|
+
'Display this screen'
|
39
|
+
) do
|
40
|
+
puts opt
|
41
|
+
exit
|
42
|
+
end
|
43
|
+
opt.separator ''
|
44
|
+
self.opt_common = false
|
45
|
+
end
|
46
|
+
|
47
|
+
##
|
48
|
+
# Options to load an object passed to OptionParser +opt+, as determined
|
49
|
+
# by +what+ an Array with any combination of:
|
50
|
+
# - :project To require a project
|
51
|
+
# - :dataset To require a dataset
|
52
|
+
# - :dataset_opt To allow (optionally) a dataset
|
53
|
+
# - :dataset_type To allow (optionally) a type of dataset
|
54
|
+
# - :dataset_type_req To require a type of dataset
|
55
|
+
# - :project_type To allow (optionally) a type of project
|
56
|
+
# - :project_type_req To require a type of project
|
57
|
+
# - :result To require a type of project or dataset result
|
58
|
+
# - :result_dataset To require a type of dataset result
|
59
|
+
# - :result_project To require a type of project result
|
60
|
+
# The options :result, :result_dataset, and :result_project are mutually
|
61
|
+
# exclusive
|
62
|
+
def opt_object(opt, what = [:project, :dataset])
|
63
|
+
what.each do |w|
|
64
|
+
case w
|
65
|
+
when :project
|
66
|
+
opt.on(
|
67
|
+
'-P', '--project PATH',
|
68
|
+
'(Mandatory) Path to the project'
|
69
|
+
) { |v| self[:project] = v }
|
70
|
+
when :dataset, :dataset_opt
|
71
|
+
opt.on(
|
72
|
+
'-D', '--dataset STRING',
|
73
|
+
(w == :dataset ? '(Mandatory) ' : '') + 'Name of the dataset'
|
74
|
+
) { |v| self[:dataset] = v }
|
75
|
+
when :dataset_type, :dataset_type_req, :project_type, :project_type_req
|
76
|
+
obj = w.to_s.gsub(/_.*/, '')
|
77
|
+
klass = Object.const_get("MiGA::#{obj.capitalize}")
|
78
|
+
req = w.to_s =~ /_req$/ ? '(Mandatory) ' : ''
|
79
|
+
opt.on(
|
80
|
+
'-t', '--type STRING',
|
81
|
+
"#{req}Type of #{obj}. Recognized types include:",
|
82
|
+
*klass.KNOWN_TYPES.map { |k, v| "~ #{k}: #{v[:description]}" }
|
83
|
+
) { |v| self[:type] = v.downcase.to_sym }
|
84
|
+
when :result
|
85
|
+
opt.on(
|
86
|
+
'-r', '--result STRING',
|
87
|
+
'(Mandatory) Name of the result',
|
88
|
+
'Recognized names for dataset-specific results include:',
|
89
|
+
*MiGA::Dataset.RESULT_DIRS.keys.map { |n| " ~ #{n}" },
|
90
|
+
'Recognized names for project-wide results include:',
|
91
|
+
*MiGA::Project.RESULT_DIRS.keys.map { |n| " ~ #{n}" }
|
92
|
+
) { |v| self[:result] = v.downcase.to_sym }
|
93
|
+
when :result_dataset
|
94
|
+
opt.on(
|
95
|
+
'-r', '--result STRING',
|
96
|
+
'(Mandatory) Name of the result, one of:',
|
97
|
+
*MiGA::Dataset.RESULT_DIRS.keys.map { |n| " ~ #{n}" }
|
98
|
+
) { |v| self[:result] = v.downcase.to_sym }
|
99
|
+
when :result_project
|
100
|
+
opt.on(
|
101
|
+
'-r', '--result STRING',
|
102
|
+
'(Mandatory) Name of the result, one of:',
|
103
|
+
*MiGA::Project.RESULT_DIRS.keys.map { |n| " ~ #{n}" }
|
104
|
+
) { |v| self[:result] = v.downcase.to_sym }
|
105
|
+
else
|
106
|
+
raise "Internal error: Unrecognized option: #{w}"
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
##
|
112
|
+
# Options to filter a list of datasets passed to OptionParser +opt+,
|
113
|
+
# as determined by +what+ an Array with any combination of:
|
114
|
+
# - :ref To filter by reference (--ref) or query (--no-ref)
|
115
|
+
# - :multi To filter by multiple (--multi) or single (--no-multi) species
|
116
|
+
# - :active To filter by active (--active) or inactive (--no-active)
|
117
|
+
# - :taxonomy To filter by taxonomy (--taxonomy)
|
118
|
+
# The "k-th" filter (--dataset-k) is always included
|
119
|
+
def opt_filter_datasets(opt, what = [:ref, :multi, :active, :taxonomy])
|
120
|
+
what.each do |w|
|
121
|
+
case w
|
122
|
+
when :ref
|
123
|
+
opt.on(
|
124
|
+
'--[no-]ref',
|
125
|
+
'Use only reference (or only non-reference) datasets'
|
126
|
+
) { |v| self[:ref] = v }
|
127
|
+
when :multi
|
128
|
+
opt.on(
|
129
|
+
'--[no-]multi',
|
130
|
+
'Use only multi-species (or only single-species) datasets'
|
131
|
+
) { |v| self[:multi] = v }
|
132
|
+
when :active
|
133
|
+
opt.on(
|
134
|
+
'--[no-]active',
|
135
|
+
'Use only active (or inactive) datasets'
|
136
|
+
) { |v| self[:active] = v }
|
137
|
+
when :taxonomy
|
138
|
+
opt.on(
|
139
|
+
'-t', '--taxonomy RANK:TAXON',
|
140
|
+
'Filter by taxonomy'
|
141
|
+
) { |v| self[:taxonomy] = MiGA::Taxonomy.new(v) }
|
142
|
+
else
|
143
|
+
raise "Internal error: Unrecognized option: #{w}"
|
144
|
+
end
|
145
|
+
end
|
146
|
+
opt.on(
|
147
|
+
'--dataset-k INTEGER', Integer,
|
148
|
+
'Use only the k-th dataset in the list'
|
149
|
+
) { |v| self[:dataset_k] = v }
|
150
|
+
end
|
151
|
+
|
152
|
+
##
|
153
|
+
# Add a flag (true/false) to the OptionParser +opt+ defined by
|
154
|
+
# +flag+ (without --) and +description+, and save it in the CLI as +sym+.
|
155
|
+
# If +sym+ is nil, +flag+ is used as Symbol
|
156
|
+
def opt_flag(opt, flag, description, sym = nil)
|
157
|
+
sym = flag.to_sym if sym.nil?
|
158
|
+
opt.on("--#{flag}", description) { |v| self[sym] = v }
|
159
|
+
end
|
160
|
+
end
|