miga-base 0.4.3.0 → 0.5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/miga/cli.rb +43 -223
- data/lib/miga/cli/action/add.rb +91 -62
- data/lib/miga/cli/action/classify_wf.rb +97 -0
- data/lib/miga/cli/action/daemon.rb +14 -10
- data/lib/miga/cli/action/derep_wf.rb +95 -0
- data/lib/miga/cli/action/doctor.rb +83 -55
- data/lib/miga/cli/action/get.rb +68 -52
- data/lib/miga/cli/action/get_db.rb +206 -0
- data/lib/miga/cli/action/index_wf.rb +31 -0
- data/lib/miga/cli/action/init.rb +115 -190
- data/lib/miga/cli/action/init/daemon_helper.rb +124 -0
- data/lib/miga/cli/action/ls.rb +20 -11
- data/lib/miga/cli/action/ncbi_get.rb +199 -157
- data/lib/miga/cli/action/preproc_wf.rb +46 -0
- data/lib/miga/cli/action/quality_wf.rb +45 -0
- data/lib/miga/cli/action/stats.rb +147 -99
- data/lib/miga/cli/action/summary.rb +10 -4
- data/lib/miga/cli/action/tax_dist.rb +61 -46
- data/lib/miga/cli/action/tax_test.rb +46 -39
- data/lib/miga/cli/action/wf.rb +178 -0
- data/lib/miga/cli/base.rb +11 -0
- data/lib/miga/cli/objects_helper.rb +88 -0
- data/lib/miga/cli/opt_helper.rb +160 -0
- data/lib/miga/daemon.rb +7 -4
- data/lib/miga/dataset/base.rb +5 -5
- data/lib/miga/project/base.rb +4 -4
- data/lib/miga/project/result.rb +2 -1
- data/lib/miga/remote_dataset/base.rb +5 -5
- data/lib/miga/remote_dataset/download.rb +1 -1
- data/lib/miga/version.rb +3 -3
- data/scripts/cds.bash +3 -1
- data/scripts/essential_genes.bash +1 -0
- data/scripts/stats.bash +1 -1
- data/scripts/trimmed_fasta.bash +5 -3
- data/utils/distance/runner.rb +3 -0
- data/utils/distance/temporal.rb +10 -1
- data/utils/enveomics/Manifest/Tasks/fasta.json +5 -0
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +7 -0
- data/utils/enveomics/Scripts/BlastTab.addlen.rb +33 -31
- data/utils/enveomics/Scripts/FastA.tag.rb +42 -41
- data/utils/enveomics/Scripts/HMM.essential.rb +85 -55
- data/utils/enveomics/Scripts/HMM.haai.rb +29 -20
- data/utils/enveomics/Scripts/SRA.download.bash +1 -1
- data/utils/enveomics/Scripts/aai.rb +163 -128
- data/utils/enveomics/build_enveomics_r.bash +11 -10
- data/utils/enveomics/enveomics.R/DESCRIPTION +3 -2
- data/utils/enveomics/enveomics.R/R/autoprune.R +141 -107
- data/utils/enveomics/enveomics.R/R/barplot.R +105 -86
- data/utils/enveomics/enveomics.R/R/cliopts.R +131 -115
- data/utils/enveomics/enveomics.R/R/df2dist.R +144 -106
- data/utils/enveomics/enveomics.R/R/growthcurve.R +201 -133
- data/utils/enveomics/enveomics.R/R/recplot.R +350 -315
- data/utils/enveomics/enveomics.R/R/recplot2.R +1334 -914
- data/utils/enveomics/enveomics.R/R/tribs.R +521 -361
- data/utils/enveomics/enveomics.R/R/utils.R +31 -15
- data/utils/enveomics/enveomics.R/README.md +7 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +16 -21
- data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +31 -28
- data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -19
- data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +36 -26
- data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -24
- data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -24
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +32 -33
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +91 -64
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +57 -37
- data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -19
- data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +39 -26
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +38 -25
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +40 -26
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +67 -49
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +37 -28
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +122 -97
- data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +35 -31
- data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -23
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +68 -51
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -22
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -20
- data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +41 -32
- data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +40 -34
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -20
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -20
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -29
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +41 -42
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +43 -33
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +36 -28
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +74 -56
- data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +44 -31
- data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -22
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +32 -26
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +59 -44
- data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -21
- data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -22
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +63 -43
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +38 -29
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +38 -30
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +111 -83
- data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -18
- data/utils/find-medoid.R +3 -2
- data/utils/representatives.rb +5 -3
- data/utils/subclade/pipeline.rb +22 -11
- data/utils/subclade/runner.rb +5 -1
- data/utils/subclades-compile.rb +1 -1
- data/utils/subclades.R +9 -3
- metadata +15 -4
- data/utils/enveomics/enveomics.R/man/enveomics.R-package.Rd +0 -15
- data/utils/enveomics/enveomics.R/man/z$-methods.Rd +0 -26
data/lib/miga/cli/action/get.rb
CHANGED
@@ -67,66 +67,82 @@ class MiGA::Cli::Action::Get < MiGA::Cli::Action
|
|
67
67
|
end
|
68
68
|
|
69
69
|
def perform
|
70
|
-
glob =
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
70
|
+
glob = get_sub_cli
|
71
|
+
p = cli.load_project
|
72
|
+
glob.each do |sub_cli|
|
73
|
+
rd = create_remote_dataset(sub_cli)
|
74
|
+
next if rd.nil?
|
75
|
+
if sub_cli[:get_md]
|
76
|
+
update_metadata(sub_cli, p, rd)
|
77
|
+
else
|
78
|
+
create_dataset(sub_cli, p, rd)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
private
|
84
|
+
|
85
|
+
def get_sub_cli
|
86
|
+
return [cli] if cli[:file].nil?
|
87
|
+
glob = []
|
88
|
+
File.open(cli[:file], 'r') do |fh|
|
89
|
+
h = nil
|
90
|
+
fh.each do |ln|
|
91
|
+
r = ln.chomp.split(/\t/)
|
92
|
+
if h.nil?
|
93
|
+
h = r
|
94
|
+
else
|
95
|
+
argv_i = [self.name]
|
96
|
+
h.each_with_index do |field, k|
|
97
|
+
case field.downcase
|
98
|
+
when *%w[query ignore-dup get-metadata only-metadata]
|
99
|
+
argv_i << "--#{field.downcase}" if r[k].downcase == 'true'
|
100
|
+
when *%w[project file verbose help debug]
|
101
|
+
raise "Unsupported header: #{field}"
|
102
|
+
else
|
103
|
+
argv_i += ["--#{field.downcase}", r[k]]
|
90
104
|
end
|
91
|
-
sub_cli = MiGA::Cli.new(argv_i)
|
92
|
-
sub_cli.defaults = cli.data
|
93
|
-
sub_cli.action.parse_cli
|
94
|
-
glob << sub_cli
|
95
105
|
end
|
106
|
+
sub_cli = MiGA::Cli.new(argv_i)
|
107
|
+
sub_cli.defaults = cli.data
|
108
|
+
sub_cli.action.parse_cli
|
109
|
+
glob << sub_cli
|
96
110
|
end
|
97
111
|
end
|
98
112
|
end
|
113
|
+
glob
|
114
|
+
end
|
99
115
|
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
end
|
116
|
+
def create_remote_dataset(sub_cli)
|
117
|
+
sub_cli.ensure_par(dataset: '-D', ids: '-I')
|
118
|
+
unless sub_cli[:api_key].nil?
|
119
|
+
ENV["#{sub_cli[:universe].to_s.upcase}_API_KEY"] = sub_cli[:api_key]
|
120
|
+
end
|
106
121
|
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
122
|
+
sub_cli.say "Dataset: #{sub_cli[:dataset]}"
|
123
|
+
if sub_cli[:ignore_dup] && !sub_cli[:get_md]
|
124
|
+
return if Dataset.exist?(p, sub_cli[:dataset])
|
125
|
+
end
|
111
126
|
|
112
|
-
|
113
|
-
|
127
|
+
sub_cli.say 'Locating remote dataset'
|
128
|
+
RemoteDataset.new(sub_cli[:ids], sub_cli[:db], sub_cli[:universe])
|
129
|
+
end
|
114
130
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
+
def update_metadata(sub_cli, p, rd)
|
132
|
+
sub_cli.say 'Updating dataset'
|
133
|
+
d = p.dataset(sub_cli[:dataset])
|
134
|
+
return if d.nil?
|
135
|
+
md = sub_cli.add_metadata(d).metadata.data
|
136
|
+
rd.update_metadata(d, md)
|
137
|
+
end
|
138
|
+
|
139
|
+
def create_dataset(sub_cli, p, rd)
|
140
|
+
sub_cli.say 'Creating dataset'
|
141
|
+
dummy_d = Dataset.new(p, sub_cli[:dataset])
|
142
|
+
md = sub_cli.add_metadata(dummy_d).metadata.data
|
143
|
+
md[:metadata_only] = true if cli[:only_md]
|
144
|
+
dummy_d.remove!
|
145
|
+
rd.save_to(p, sub_cli[:dataset], !sub_cli[:query], md)
|
146
|
+
p.add_dataset(sub_cli[:dataset])
|
131
147
|
end
|
132
148
|
end
|
@@ -0,0 +1,206 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
require 'miga/cli/action'
|
5
|
+
require 'net/ftp'
|
6
|
+
require 'digest/md5'
|
7
|
+
|
8
|
+
class MiGA::Cli::Action::GetDb < MiGA::Cli::Action
|
9
|
+
|
10
|
+
def parse_cli
|
11
|
+
cli.defaults = {
|
12
|
+
database: :recommended,
|
13
|
+
version: :latest,
|
14
|
+
local: File.expand_path('.miga_db', ENV['MIGA_HOME']),
|
15
|
+
host: 'ftp://microbial-genomes.org/db',
|
16
|
+
pb: true,
|
17
|
+
overwrite: true
|
18
|
+
}
|
19
|
+
cli.parse do |opt|
|
20
|
+
opt.on(
|
21
|
+
'-n', '--database STRING',
|
22
|
+
"Name of the database to download. By default: #{cli[:database]}"
|
23
|
+
) { |v| cli[:database] = v.to_sym }
|
24
|
+
opt.on(
|
25
|
+
'--db-version STRING',
|
26
|
+
"Database version to download. By default: #{cli[:version]}"
|
27
|
+
) { |v| cli[:version] = v.to_sym }
|
28
|
+
opt.on(
|
29
|
+
'-l', '--local-dir PATH',
|
30
|
+
"Local directory to store the database. By default: #{cli[:local]}"
|
31
|
+
) { |v| cli[:local] = v }
|
32
|
+
opt.on(
|
33
|
+
'-h', '--host STRING',
|
34
|
+
"Remote host of the database. By default: #{cli[:host]}"
|
35
|
+
) { |v| cli[:db] = v.to_sym }
|
36
|
+
opt.on(
|
37
|
+
'--list',
|
38
|
+
'List available databases and exit'
|
39
|
+
) { |v| cli[:list_databases] = v }
|
40
|
+
opt.on(
|
41
|
+
'--list-versions',
|
42
|
+
'List available versions of the database and exit'
|
43
|
+
) { |v| cli[:list_versions] = v }
|
44
|
+
opt.on(
|
45
|
+
'--no-overwrite',
|
46
|
+
'Exit without downloading if the target database already exists'
|
47
|
+
) { |v| cli[:overwrite] = v }
|
48
|
+
opt.on('--no-progress', 'Supress progress bars') { |v| cli[:pb] = v }
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def perform
|
53
|
+
@ftp = remote_connection
|
54
|
+
manif = remote_manifest(@ftp)
|
55
|
+
cli.puts "# Host: #{manif[:host]}"
|
56
|
+
cli.puts "# Manifest last update: #{manif[:last_update]}"
|
57
|
+
list_databases(manif) and return
|
58
|
+
db = db_requested(manif)
|
59
|
+
list_versions(db) and return
|
60
|
+
ver = version_requested(db)
|
61
|
+
check_target and return
|
62
|
+
file = download_file(@ftp, ver[:path])
|
63
|
+
check_digest(ver, file)
|
64
|
+
unarchive(file)
|
65
|
+
register_database(manif, db, ver)
|
66
|
+
end
|
67
|
+
|
68
|
+
def empty_action
|
69
|
+
cli.puts 'Downloading latest version of the default database'
|
70
|
+
end
|
71
|
+
|
72
|
+
def complete
|
73
|
+
@ftp.close unless @ftp.nil?
|
74
|
+
super
|
75
|
+
end
|
76
|
+
|
77
|
+
private
|
78
|
+
|
79
|
+
def remote_connection
|
80
|
+
cli.say "Connecting to '#{cli[:host]}'"
|
81
|
+
uri = URI.parse(cli[:host])
|
82
|
+
raise 'Only FTP hosts are supported' unless uri.scheme == 'ftp'
|
83
|
+
ftp = Net::FTP.open(uri.host, port: uri.port)
|
84
|
+
ftp.login
|
85
|
+
ftp.chdir(uri.path)
|
86
|
+
ftp
|
87
|
+
end
|
88
|
+
|
89
|
+
def download_file(ftp, path)
|
90
|
+
cli.say "Downloading '#{path}'"
|
91
|
+
Dir.mkdir(cli[:local]) unless Dir.exist? cli[:local]
|
92
|
+
file = File.expand_path(path, cli[:local])
|
93
|
+
filesize = ftp.size(path)
|
94
|
+
transferred = 0
|
95
|
+
ftp.getbinaryfile(path, file, 1024) do |data|
|
96
|
+
if cli[:pb]
|
97
|
+
transferred += data.size
|
98
|
+
cli.advance("#{path}:", transferred, filesize)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
cli.print "\n" if cli[:pb]
|
102
|
+
file
|
103
|
+
end
|
104
|
+
|
105
|
+
def remote_manifest(ftp)
|
106
|
+
file = download_file(ftp, '_manif.json')
|
107
|
+
MiGA::Json.parse(file)
|
108
|
+
end
|
109
|
+
|
110
|
+
def db_requested(manif)
|
111
|
+
[:recommended, :test].each do |n|
|
112
|
+
if cli[:database] == n
|
113
|
+
raise "This host has no #{n} database" if manif[n].nil?
|
114
|
+
cli[:database] = manif[n].to_sym
|
115
|
+
end
|
116
|
+
end
|
117
|
+
db = manif[:databases][cli[:database]]
|
118
|
+
raise 'Cannot find database in this host' if db.nil?
|
119
|
+
db
|
120
|
+
end
|
121
|
+
|
122
|
+
def version_requested(db)
|
123
|
+
if cli[:version] == :latest
|
124
|
+
cli[:version] = db[:latest].to_sym
|
125
|
+
end
|
126
|
+
ver = db[:versions][cli[:version]]
|
127
|
+
raise 'Cannot find database version' if ver.nil?
|
128
|
+
cli.puts "# Database size: #{version_size(ver)}"
|
129
|
+
ver
|
130
|
+
end
|
131
|
+
|
132
|
+
def list_databases(manif)
|
133
|
+
return false unless cli[:list_databases]
|
134
|
+
cli.puts "# Recommended database: #{manif[:recommended]}"
|
135
|
+
cli.puts ''
|
136
|
+
cli.table(
|
137
|
+
%w[name description latest versions],
|
138
|
+
manif[:databases].map do |name, i|
|
139
|
+
[name, i[:description], i[:latest], i[:versions].size.to_s]
|
140
|
+
end
|
141
|
+
)
|
142
|
+
true
|
143
|
+
end
|
144
|
+
|
145
|
+
def list_versions(db)
|
146
|
+
return false unless cli[:list_versions]
|
147
|
+
cli.puts "# Database: #{cli[:database]}"
|
148
|
+
cli.puts ''
|
149
|
+
cli.table(
|
150
|
+
%w[version updated size datasets],
|
151
|
+
db[:versions].map do |name, i|
|
152
|
+
[name, i[:last_update], version_size(i), i[:datasets]]
|
153
|
+
end
|
154
|
+
)
|
155
|
+
true
|
156
|
+
end
|
157
|
+
|
158
|
+
def check_target
|
159
|
+
return false if cli[:overwrite]
|
160
|
+
file = File.expand_path(cli[:database], cli[:local])
|
161
|
+
if Dir.exist? file
|
162
|
+
warn "The target directory already exists: #{file}"
|
163
|
+
true
|
164
|
+
else
|
165
|
+
false
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
def check_digest(ver, file)
|
170
|
+
cli.say 'Checking MD5 digest'
|
171
|
+
cli.say "Expected: #{ver[:MD5]}"
|
172
|
+
md5 = Digest::MD5.new
|
173
|
+
File.open(file, 'rb') do |fh|
|
174
|
+
until fh.eof?
|
175
|
+
md5.update fh.read(1024)
|
176
|
+
end
|
177
|
+
end
|
178
|
+
dig = md5.hexdigest
|
179
|
+
cli.say "Observed: #{dig}"
|
180
|
+
raise 'Corrupt file, MD5 does not match' unless dig == ver[:MD5]
|
181
|
+
end
|
182
|
+
|
183
|
+
def version_size(ver)
|
184
|
+
cli.num_suffix(ver[:size], true) + ' (' +
|
185
|
+
cli.num_suffix(ver[:size_unarchived], true) + ')'
|
186
|
+
end
|
187
|
+
|
188
|
+
def unarchive(file)
|
189
|
+
cli.say "Unarchiving #{file}"
|
190
|
+
`cd "#{cli[:local]}" && tar -zxf "#{file}"`
|
191
|
+
end
|
192
|
+
|
193
|
+
def register_database(manif, db, ver)
|
194
|
+
cli.say "Registering database locally"
|
195
|
+
local_manif = File.expand_path('_local_manif.json', cli[:local])
|
196
|
+
reg = File.exist?(local_manif) ? MiGA::Json.parse(local_manif) : {}
|
197
|
+
reg[:last_update] = Time.now.to_s
|
198
|
+
reg[:databases] ||= {}
|
199
|
+
reg[:databases][cli[:database]] ||= {}
|
200
|
+
reg[:databases][cli[:database]][:manif_last_update] = manif[:last_update]
|
201
|
+
reg[:databases][cli[:database]][:manif_host] = manif[:host]
|
202
|
+
db.each { |k,v| reg[:databases][cli[:database]][k] = v }
|
203
|
+
reg[:databases][cli[:database]][:local_version] = ver
|
204
|
+
MiGA::Json.generate(reg, local_manif)
|
205
|
+
end
|
206
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
require 'miga/cli/action'
|
5
|
+
|
6
|
+
class MiGA::Cli::Action::IndexWf < MiGA::Cli::Action
|
7
|
+
require 'miga/cli/action/wf'
|
8
|
+
include MiGA::Cli::Action::Wf
|
9
|
+
|
10
|
+
def parse_cli
|
11
|
+
default_opts_for_wf
|
12
|
+
cli.defaults = { mytaxa: false }
|
13
|
+
cli.parse do |opt|
|
14
|
+
opt.on(
|
15
|
+
'-m', '--mytaxa-scan',
|
16
|
+
'Perform MyTaxa scan analysis'
|
17
|
+
) { |v| cli[:mytaxa] = v }
|
18
|
+
opts_for_wf_distances(opt)
|
19
|
+
opts_for_wf(opt, 'Input genome assemblies (nucleotides, FastA)',
|
20
|
+
cleanup: false, project_type: true)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def perform
|
25
|
+
# Input data
|
26
|
+
p = create_project(:assembly, {}, run_mytaxa_scan: cli[:mytaxa])
|
27
|
+
# Run
|
28
|
+
run_daemon
|
29
|
+
summarize
|
30
|
+
end
|
31
|
+
end
|
data/lib/miga/cli/action/init.rb
CHANGED
@@ -5,38 +5,39 @@ require 'miga/cli/action'
|
|
5
5
|
require 'shellwords'
|
6
6
|
|
7
7
|
class MiGA::Cli::Action::Init < MiGA::Cli::Action
|
8
|
-
|
8
|
+
require 'miga/cli/action/init/daemon_helper'
|
9
|
+
include MiGA::Cli::Action::Init::DaemonHelper
|
10
|
+
|
9
11
|
def parse_cli
|
10
12
|
cli.interactive = true
|
11
|
-
cli.defaults = {mytaxa: nil,
|
13
|
+
cli.defaults = { mytaxa: nil,
|
12
14
|
config: File.expand_path('.miga_modules', ENV['HOME']),
|
13
|
-
ask: false, auto: false, dtype: :bash}
|
15
|
+
ask: false, auto: false, dtype: :bash }
|
14
16
|
cli.parse do |opt|
|
15
17
|
opt.on(
|
16
18
|
'-c', '--config PATH',
|
17
19
|
'Path to the Bash configuration file',
|
18
20
|
"By default: #{cli[:config]}"
|
19
|
-
|
21
|
+
){ |v| cli[:config] = v }
|
20
22
|
opt.on(
|
21
23
|
'--[no-]mytaxa',
|
22
24
|
'Should I try setting up MyTaxa its dependencies?',
|
23
25
|
'By default: interactive (true if --auto)'
|
24
|
-
|
26
|
+
){ |v| cli[:mytaxa] = v }
|
25
27
|
opt.on(
|
26
28
|
'--daemon-type STRING',
|
27
29
|
'Type of daemon launcher, one of: bash, qsub, msub, slurm',
|
28
30
|
"By default: interactive (#{cli[:dtype]} if --auto)"
|
29
|
-
|
31
|
+
){ |v| cli[:dtype] = v.to_sym }
|
30
32
|
opt.on(
|
31
33
|
'--ask-all',
|
32
34
|
'Ask for the location of all software',
|
33
35
|
'By default, only the locations missing in PATH are requested'
|
34
|
-
|
36
|
+
){ |v| cli[:ask] = v }
|
35
37
|
end
|
36
38
|
end
|
37
39
|
|
38
40
|
def perform
|
39
|
-
miga = MiGA.root_path
|
40
41
|
cli.puts <<BANNER
|
41
42
|
===[ Welcome to MiGA, the Microbial Genome Atlas ]===
|
42
43
|
|
@@ -44,17 +45,70 @@ I'm the initialization script, and I'll sniff around your computer to
|
|
44
45
|
make sure you have all the requirements for MiGA data processing.
|
45
46
|
|
46
47
|
BANNER
|
48
|
+
list_requirements
|
49
|
+
rc_fh = open_rc_file
|
50
|
+
check_configuration_script rc_fh
|
51
|
+
paths = check_software_requirements rc_fh
|
52
|
+
check_additional_files paths
|
53
|
+
check_r_packages paths
|
54
|
+
check_ruby_gems paths
|
55
|
+
configure_daemon
|
56
|
+
close_rc_file rc_fh
|
57
|
+
cli.puts 'Configuration complete. MiGA is ready to work!'
|
58
|
+
cli.puts ''
|
59
|
+
end
|
60
|
+
|
61
|
+
def empty_action
|
62
|
+
end
|
63
|
+
|
64
|
+
def run_cmd(cli, cmd)
|
65
|
+
`. "#{cli[:config]}" && #{cmd}`
|
66
|
+
end
|
67
|
+
|
68
|
+
def run_r_cmd(cli, paths, cmd)
|
69
|
+
run_cmd(cli,
|
70
|
+
"echo #{cmd.shellescape} | #{paths['R'].shellescape} --vanilla -q 2>&1")
|
71
|
+
end
|
72
|
+
|
73
|
+
def test_r_package(cli, paths, pkg)
|
74
|
+
run_r_cmd(cli, paths, "library('#{pkg}')")
|
75
|
+
$?.success?
|
76
|
+
end
|
77
|
+
|
78
|
+
def install_r_package(cli, paths, pkg)
|
79
|
+
r_cmd = "install.packages('#{pkg}', repos='http://cran.rstudio.com/')"
|
80
|
+
run_r_cmd(cli, paths, r_cmd)
|
81
|
+
end
|
47
82
|
|
83
|
+
def test_ruby_gem(cli, paths, pkg)
|
84
|
+
run_cmd(cli,
|
85
|
+
"#{paths['ruby'].shellescape} -r #{pkg.shellescape} -e '' 2>/dev/null")
|
86
|
+
$?.success?
|
87
|
+
end
|
88
|
+
|
89
|
+
def install_ruby_gem(cli, paths, pkg)
|
90
|
+
gem_cmd = "Gem::GemRunner.new.run %w(install --user #{pkg})"
|
91
|
+
run_cmd(cli, "#{paths['ruby'].shellescape} \
|
92
|
+
-r rubygems -r rubygems/gem_runner \
|
93
|
+
-e #{gem_cmd.shellescape} 2>&1")
|
94
|
+
end
|
95
|
+
|
96
|
+
def list_requirements
|
48
97
|
if cli.ask_user(
|
49
98
|
'Would you like to see all the requirements before starting?',
|
50
99
|
'no', %w(yes no)) == 'yes'
|
51
100
|
cli.puts ''
|
52
|
-
File.
|
101
|
+
req_path = File.expand_path('utils/requirements.txt', MiGA.root_path)
|
102
|
+
File.open(req_path, 'r') do |fh|
|
53
103
|
fh.each_line { |ln| cli.puts ln }
|
54
104
|
end
|
55
105
|
cli.puts ''
|
56
106
|
end
|
107
|
+
end
|
108
|
+
|
109
|
+
private
|
57
110
|
|
111
|
+
def open_rc_file
|
58
112
|
rc_path = File.expand_path('.miga_rc', ENV['HOME'])
|
59
113
|
if File.exist? rc_path
|
60
114
|
if cli.ask_user(
|
@@ -70,8 +124,10 @@ BANNER
|
|
70
124
|
# `miga init` made this on #{Time.now}
|
71
125
|
|
72
126
|
BASH
|
127
|
+
rc_fh
|
128
|
+
end
|
73
129
|
|
74
|
-
|
130
|
+
def check_configuration_script(rc_fh)
|
75
131
|
unless File.exist? cli[:config]
|
76
132
|
cli[:config] = cli.ask_user(
|
77
133
|
'Is there a script I need to load at startup?',
|
@@ -86,47 +142,62 @@ BASH
|
|
86
142
|
cli[:config] = '/dev/null'
|
87
143
|
end
|
88
144
|
cli.puts ''
|
145
|
+
end
|
89
146
|
|
90
|
-
|
147
|
+
def check_software_requirements(rc_fh)
|
91
148
|
cli.puts 'Looking for requirements:'
|
92
|
-
|
93
|
-
cli[:mytaxa] = cli.ask_user(
|
94
|
-
'Should I include MyTaxa modules?',
|
95
|
-
'yes', %w(yes no)) == 'yes'
|
96
|
-
end
|
149
|
+
ask_for_mytaxa
|
97
150
|
rc_fh.puts 'export MIGA_MYTAXA="no"' unless cli[:mytaxa]
|
98
151
|
paths = {}
|
99
152
|
rc_fh.puts 'MIGA_PATH=""'
|
100
|
-
File.
|
153
|
+
req_path = File.expand_path('utils/requirements.txt', MiGA.root_path)
|
154
|
+
File.open(req_path, 'r') do |fh|
|
101
155
|
fh.each_line do |ln|
|
102
156
|
next if $. < 3
|
103
157
|
r = ln.chomp.split(/\t+/)
|
104
158
|
next if r[0] =~ /\(opt\)$/ && !cli[:mytaxa]
|
105
159
|
cli.print "Testing #{r[0]}#{" (#{r[3]})" if r[3]}... "
|
106
|
-
path =
|
107
|
-
loop do
|
108
|
-
d_path = File.dirname(run_cmd(cli, "which #{r[1].shellescape}"))
|
109
|
-
if cli[:ask] || d_path == '.'
|
110
|
-
path = cli.ask_user('Where can I find it?', d_path, nil, true)
|
111
|
-
else
|
112
|
-
path = d_path
|
113
|
-
cli.puts path
|
114
|
-
end
|
115
|
-
if File.executable?(File.expand_path(r[1], path))
|
116
|
-
if d_path != path
|
117
|
-
rc_fh.puts "MIGA_PATH=\"#{path}:$MIGA_PATH\" # #{r[1]}"
|
118
|
-
end
|
119
|
-
break
|
120
|
-
end
|
121
|
-
cli.print "I cannot find #{r[1]} "
|
122
|
-
end
|
160
|
+
path = find_software(r[1])
|
123
161
|
paths[r[1]] = File.expand_path(r[1], path).shellescape
|
124
162
|
end
|
125
163
|
end
|
126
164
|
rc_fh.puts 'export PATH="$MIGA_PATH$PATH"'
|
127
165
|
cli.puts ''
|
166
|
+
paths
|
167
|
+
end
|
168
|
+
|
169
|
+
def ask_for_mytaxa
|
170
|
+
if cli[:mytaxa].nil?
|
171
|
+
cli[:mytaxa] =
|
172
|
+
cli.ask_user(
|
173
|
+
'Should I include MyTaxa modules?',
|
174
|
+
'yes', %w(yes no)
|
175
|
+
) == 'yes'
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
def find_software(exec)
|
180
|
+
path = nil
|
181
|
+
loop do
|
182
|
+
d_path = File.dirname(run_cmd(cli, "which #{exec.shellescape}"))
|
183
|
+
if cli[:ask] || d_path == '.'
|
184
|
+
path = cli.ask_user('Where can I find it?', d_path, nil, true)
|
185
|
+
else
|
186
|
+
path = d_path
|
187
|
+
cli.puts path
|
188
|
+
end
|
189
|
+
if File.executable?(File.expand_path(exec, path))
|
190
|
+
if d_path != path
|
191
|
+
rc_fh.puts "MIGA_PATH=\"#{path}:$MIGA_PATH\" # #{exec}"
|
192
|
+
end
|
193
|
+
break
|
194
|
+
end
|
195
|
+
cli.print "I cannot find #{exec} "
|
196
|
+
end
|
197
|
+
path
|
198
|
+
end
|
128
199
|
|
129
|
-
|
200
|
+
def check_additional_files(paths)
|
130
201
|
if cli[:mytaxa]
|
131
202
|
cli.puts 'Looking for MyTaxa databases:'
|
132
203
|
mt = File.dirname paths["MyTaxa"]
|
@@ -145,8 +216,9 @@ BASH
|
|
145
216
|
end
|
146
217
|
cli.puts ''
|
147
218
|
end
|
219
|
+
end
|
148
220
|
|
149
|
-
|
221
|
+
def check_r_packages(paths)
|
150
222
|
cli.puts 'Looking for R packages:'
|
151
223
|
%w(enveomics.R ape cluster vegan).each do |pkg|
|
152
224
|
cli.print "Testing #{pkg}... "
|
@@ -161,8 +233,9 @@ BASH
|
|
161
233
|
end
|
162
234
|
end
|
163
235
|
cli.puts ''
|
236
|
+
end
|
164
237
|
|
165
|
-
|
238
|
+
def check_ruby_gems(paths)
|
166
239
|
cli.puts 'Looking for Ruby gems:'
|
167
240
|
%w(sqlite3 daemons json).each do |pkg|
|
168
241
|
cli.print "Testing #{pkg}... "
|
@@ -171,8 +244,8 @@ BASH
|
|
171
244
|
else
|
172
245
|
cli.puts 'no, installing'
|
173
246
|
# This hackey mess is meant to ensure the test and installation are done
|
174
|
-
# on the configuration Ruby, not on the Ruby currently executing the
|
175
|
-
# action
|
247
|
+
# on the configuration Ruby, not on the Ruby currently executing the
|
248
|
+
# init action
|
176
249
|
cli.print install_ruby_gem(cli, paths, pkg)
|
177
250
|
unless test_ruby_gem(cli, paths, pkg)
|
178
251
|
raise "Unable to auto-install Ruby gem: #{pkg}"
|
@@ -180,119 +253,9 @@ BASH
|
|
180
253
|
end
|
181
254
|
end
|
182
255
|
cli.puts ''
|
256
|
+
end
|
183
257
|
|
184
|
-
|
185
|
-
cli.puts 'Default daemon configuration:'
|
186
|
-
daemon_f = File.expand_path('.miga_daemon.json', ENV['HOME'])
|
187
|
-
unless File.exist?(daemon_f) and cli.ask_user(
|
188
|
-
'A template daemon already exists, do you want to preserve it?',
|
189
|
-
'yes', %w(yes no)) == 'yes'
|
190
|
-
v = {created: Time.now.to_s, updated: Time.now.to_s}
|
191
|
-
v[:type] = cli.ask_user(
|
192
|
-
'Please select the type of daemon you want to setup',
|
193
|
-
cli[:dtype], %w(bash qsub msub slurm))
|
194
|
-
case v[:type]
|
195
|
-
when 'bash'
|
196
|
-
v[:latency] = cli.ask_user(
|
197
|
-
'How long should I sleep? (in seconds)', '30').to_i
|
198
|
-
v[:maxjobs] = cli.ask_user(
|
199
|
-
'How many jobs can I launch at once?', '6').to_i
|
200
|
-
v[:ppn] = cli.ask_user(
|
201
|
-
'How many CPUs can I use per job?', '2').to_i
|
202
|
-
cli.puts 'Setting up internal daemon defaults.'
|
203
|
-
cli.puts 'If you don\'t understand this just leave default values:'
|
204
|
-
v[:cmd] = cli.ask_user(
|
205
|
-
"How should I launch tasks?\n %1$s: script path, " \
|
206
|
-
"%2$s: variables, %3$d: CPUs, %4$s: log file, %5$s: task name.\n",
|
207
|
-
"%2$s '%1$s' > '%4$s' 2>&1")
|
208
|
-
v[:var] = cli.ask_user(
|
209
|
-
"How should I pass variables?\n %1$s: keys, %2$s: values.\n",
|
210
|
-
"%1$s=%2$s")
|
211
|
-
v[:varsep] = cli.ask_user(
|
212
|
-
'What should I use to separate variables?', ' ')
|
213
|
-
v[:alive] = cli.ask_user(
|
214
|
-
"How can I know that a process is still alive?\n %1$s: PID, " \
|
215
|
-
"output should be 1 for running and 0 for non-running.\n",
|
216
|
-
"ps -p '%1$s'|tail -n+2|wc -l")
|
217
|
-
v[:kill] = cli.ask_user(
|
218
|
-
"How should I terminate tasks?\n %s: process ID.", "kill -9 '%s'")
|
219
|
-
when 'slurm'
|
220
|
-
queue = cli.ask_user(
|
221
|
-
'What queue should I use?', nil, nil, true)
|
222
|
-
v[:latency] = cli.ask_user(
|
223
|
-
'How long should I sleep? (in seconds)', '150').to_i
|
224
|
-
v[:maxjobs] = cli.ask_user(
|
225
|
-
'How many jobs can I launch at once?', '300').to_i
|
226
|
-
v[:ppn] = cli.ask_user(
|
227
|
-
'How many CPUs can I use per job?', '2').to_i
|
228
|
-
cli.puts 'Setting up internal daemon defaults'
|
229
|
-
cli.puts 'If you don\'t understand this just leave default values:'
|
230
|
-
v[:cmd] = cli.ask_user(
|
231
|
-
"How should I launch tasks?\n %1$s: script path, " \
|
232
|
-
"%2$s: variables, %3$d: CPUs, %4$d: log file, %5$s: task name.\n",
|
233
|
-
"%2$s sbatch --partition='#{queue}' --export=ALL " \
|
234
|
-
"--nodes=1 --ntasks-per-node=%3$d --output='%4$s' " \
|
235
|
-
"--job-name='%5$s' --mem=9G --time=12:00:00 %1$s " \
|
236
|
-
"| perl -pe 's/.* //'")
|
237
|
-
v[:var] = cli.ask_user(
|
238
|
-
"How should I pass variables?\n %1$s: keys, %2$s: values.\n",
|
239
|
-
"%1$s=%2$s")
|
240
|
-
v[:varsep] = cli.ask_user(
|
241
|
-
'What should I use to separate variables?', ' ')
|
242
|
-
v[:alive] = cli.ask_user(
|
243
|
-
"How can I know that a process is still alive?\n %1$s: job id, " \
|
244
|
-
"output should be 1 for running and 0 for non-running.\n",
|
245
|
-
"squeue -h -o %%t -j '%1$s' | grep '^PD\\|R\\|CF\\|CG$' " \
|
246
|
-
"| tail -n 1 | wc -l")
|
247
|
-
v[:kill] = cli.ask_user(
|
248
|
-
"How should I terminate tasks?\n %s: process ID.", "scancel '%s'")
|
249
|
-
else # [qm]sub
|
250
|
-
queue = cli.ask_user('What queue should I use?', nil, nil, true)
|
251
|
-
v[:latency] = cli.ask_user(
|
252
|
-
'How long should I sleep? (in seconds)', '150').to_i
|
253
|
-
v[:maxjobs] = cli.ask_user(
|
254
|
-
'How many jobs can I launch at once?', '300').to_i
|
255
|
-
v[:ppn] = cli.ask_user(
|
256
|
-
'How many CPUs can I use per job?', '2').to_i
|
257
|
-
cli.puts 'Setting up internal daemon defaults.'
|
258
|
-
cli.puts 'If you don\'t understand this just leave default values:'
|
259
|
-
v[:cmd] = cli.ask_user(
|
260
|
-
"How should I launch tasks?\n %1$s: script path, " \
|
261
|
-
"%2$s: variables, %3$d: CPUs, %4$d: log file, %5$s: task name.\n",
|
262
|
-
"#{v[:type]} -q '#{queue}' -v '%2$s' -l nodes=1:ppn=%3$d %1$s " \
|
263
|
-
"-j oe -o '%4$s' -N '%5$s' -l mem=9g -l walltime=12:00:00 " \
|
264
|
-
"| grep .")
|
265
|
-
v[:var] = cli.ask_user(
|
266
|
-
"How should I pass variables?\n %1$s: keys, %2$s: values.\n",
|
267
|
-
"%1$s=%2$s")
|
268
|
-
v[:varsep] = cli.ask_user(
|
269
|
-
'What should I use to separate variables?', ',')
|
270
|
-
if v[:type] == 'qsub'
|
271
|
-
v[:alive] = cli.ask_user(
|
272
|
-
"How can I know that a process is still alive?\n " \
|
273
|
-
"%1$s: job id, output should be 1 for running and " \
|
274
|
-
"0 for non-running.\n",
|
275
|
-
"qstat -f '%1$s'|grep ' job_state ='|perl -pe 's/.*= //'" \
|
276
|
-
"|grep '[^C]'|tail -n1|wc -l|awk '{print $1}'")
|
277
|
-
v[:kill] = cli.ask_user(
|
278
|
-
"How should I terminate tasks?\n %s: process ID.", "qdel '%s'")
|
279
|
-
else # msub
|
280
|
-
v[:alive] = cli.ask_user(
|
281
|
-
"How can I know that a process is still alive?\n " \
|
282
|
-
"%1$s: job id, output should be 1 for running and " \
|
283
|
-
"0 for non-running.\n",
|
284
|
-
"checkjob '%1$s'|grep '^State:'|perl -pe 's/.*: //'" \
|
285
|
-
"|grep 'Deferred\\|Hold\\|Idle\\|Starting\\|Running\\|Blocked'" \
|
286
|
-
"|tail -n1|wc -l|awk '{print $1}'")
|
287
|
-
v[:kill] = cli.ask_user(
|
288
|
-
"How should I terminate tasks?\n %s: process ID.",
|
289
|
-
"canceljob '%s'")
|
290
|
-
end
|
291
|
-
end
|
292
|
-
File.open(daemon_f, 'w') { |fh| fh.puts JSON.pretty_generate(v) }
|
293
|
-
end
|
294
|
-
cli.puts ''
|
295
|
-
|
258
|
+
def close_rc_file(rc_fh)
|
296
259
|
rc_fh.puts <<FOOT
|
297
260
|
|
298
261
|
MIGA_CONFIG_VERSION='#{MiGA::MiGA.VERSION}'
|
@@ -300,44 +263,6 @@ MIGA_CONFIG_LONGVERSION='#{MiGA::MiGA.LONG_VERSION}'
|
|
300
263
|
MIGA_CONFIG_DATE='#{Time.now}'
|
301
264
|
|
302
265
|
FOOT
|
303
|
-
|
304
|
-
cli.puts 'Configuration complete. MiGA is ready to work!'
|
305
|
-
cli.puts ''
|
306
|
-
|
307
|
-
end
|
308
|
-
|
309
|
-
def empty_action
|
310
|
-
end
|
311
|
-
|
312
|
-
def run_cmd(cli, cmd)
|
313
|
-
`. "#{cli[:config]}" && #{cmd}`
|
314
|
-
end
|
315
|
-
|
316
|
-
def run_r_cmd(cli, paths, cmd)
|
317
|
-
run_cmd(cli,
|
318
|
-
"echo #{cmd.shellescape} | #{paths['R'].shellescape} --vanilla -q 2>&1")
|
319
|
-
end
|
320
|
-
|
321
|
-
def test_r_package(cli, paths, pkg)
|
322
|
-
run_r_cmd(cli, paths, "library('#{pkg}')")
|
323
|
-
$?.success?
|
324
|
-
end
|
325
|
-
|
326
|
-
def install_r_package(cli, paths, pkg)
|
327
|
-
r_cmd = "install.packages('#{pkg}', repos='http://cran.rstudio.com/')"
|
328
|
-
run_r_cmd(cli, paths, r_cmd)
|
329
|
-
end
|
330
|
-
|
331
|
-
def test_ruby_gem(cli, paths, pkg)
|
332
|
-
run_cmd(cli,
|
333
|
-
"#{paths['ruby'].shellescape} -r #{pkg.shellescape} -e '' 2>/dev/null")
|
334
|
-
$?.success?
|
335
|
-
end
|
336
|
-
|
337
|
-
def install_ruby_gem(cli, paths, pkg)
|
338
|
-
gem_cmd = "Gem::GemRunner.new.run %w(install --user #{pkg})"
|
339
|
-
run_cmd(cli, "#{paths['ruby'].shellescape} \
|
340
|
-
-r rubygems -r rubygems/gem_runner \
|
341
|
-
-e #{gem_cmd.shellescape} 2>&1")
|
266
|
+
rc_fh.close
|
342
267
|
end
|
343
268
|
end
|