miga-base 0.4.3.0 → 0.5.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/miga/cli.rb +43 -223
- data/lib/miga/cli/action/add.rb +91 -62
- data/lib/miga/cli/action/classify_wf.rb +97 -0
- data/lib/miga/cli/action/daemon.rb +14 -10
- data/lib/miga/cli/action/derep_wf.rb +95 -0
- data/lib/miga/cli/action/doctor.rb +83 -55
- data/lib/miga/cli/action/get.rb +68 -52
- data/lib/miga/cli/action/get_db.rb +206 -0
- data/lib/miga/cli/action/index_wf.rb +31 -0
- data/lib/miga/cli/action/init.rb +115 -190
- data/lib/miga/cli/action/init/daemon_helper.rb +124 -0
- data/lib/miga/cli/action/ls.rb +20 -11
- data/lib/miga/cli/action/ncbi_get.rb +199 -157
- data/lib/miga/cli/action/preproc_wf.rb +46 -0
- data/lib/miga/cli/action/quality_wf.rb +45 -0
- data/lib/miga/cli/action/stats.rb +147 -99
- data/lib/miga/cli/action/summary.rb +10 -4
- data/lib/miga/cli/action/tax_dist.rb +61 -46
- data/lib/miga/cli/action/tax_test.rb +46 -39
- data/lib/miga/cli/action/wf.rb +178 -0
- data/lib/miga/cli/base.rb +11 -0
- data/lib/miga/cli/objects_helper.rb +88 -0
- data/lib/miga/cli/opt_helper.rb +160 -0
- data/lib/miga/daemon.rb +7 -4
- data/lib/miga/dataset/base.rb +5 -5
- data/lib/miga/project/base.rb +4 -4
- data/lib/miga/project/result.rb +2 -1
- data/lib/miga/remote_dataset/base.rb +5 -5
- data/lib/miga/remote_dataset/download.rb +1 -1
- data/lib/miga/version.rb +3 -3
- data/scripts/cds.bash +3 -1
- data/scripts/essential_genes.bash +1 -0
- data/scripts/stats.bash +1 -1
- data/scripts/trimmed_fasta.bash +5 -3
- data/utils/distance/runner.rb +3 -0
- data/utils/distance/temporal.rb +10 -1
- data/utils/enveomics/Manifest/Tasks/fasta.json +5 -0
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +7 -0
- data/utils/enveomics/Scripts/BlastTab.addlen.rb +33 -31
- data/utils/enveomics/Scripts/FastA.tag.rb +42 -41
- data/utils/enveomics/Scripts/HMM.essential.rb +85 -55
- data/utils/enveomics/Scripts/HMM.haai.rb +29 -20
- data/utils/enveomics/Scripts/SRA.download.bash +1 -1
- data/utils/enveomics/Scripts/aai.rb +163 -128
- data/utils/enveomics/build_enveomics_r.bash +11 -10
- data/utils/enveomics/enveomics.R/DESCRIPTION +3 -2
- data/utils/enveomics/enveomics.R/R/autoprune.R +141 -107
- data/utils/enveomics/enveomics.R/R/barplot.R +105 -86
- data/utils/enveomics/enveomics.R/R/cliopts.R +131 -115
- data/utils/enveomics/enveomics.R/R/df2dist.R +144 -106
- data/utils/enveomics/enveomics.R/R/growthcurve.R +201 -133
- data/utils/enveomics/enveomics.R/R/recplot.R +350 -315
- data/utils/enveomics/enveomics.R/R/recplot2.R +1334 -914
- data/utils/enveomics/enveomics.R/R/tribs.R +521 -361
- data/utils/enveomics/enveomics.R/R/utils.R +31 -15
- data/utils/enveomics/enveomics.R/README.md +7 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +16 -21
- data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +31 -28
- data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -19
- data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +36 -26
- data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -24
- data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -24
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +32 -33
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +91 -64
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +57 -37
- data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -19
- data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +39 -26
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +38 -25
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +40 -26
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +67 -49
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +37 -28
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +122 -97
- data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +35 -31
- data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -23
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +68 -51
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -22
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -20
- data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +41 -32
- data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +40 -34
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -20
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -20
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -29
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +41 -42
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +43 -33
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +36 -28
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +74 -56
- data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +44 -31
- data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -22
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +32 -26
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +59 -44
- data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -21
- data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -22
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +63 -43
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +38 -29
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +38 -30
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +111 -83
- data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -18
- data/utils/find-medoid.R +3 -2
- data/utils/representatives.rb +5 -3
- data/utils/subclade/pipeline.rb +22 -11
- data/utils/subclade/runner.rb +5 -1
- data/utils/subclades-compile.rb +1 -1
- data/utils/subclades.R +9 -3
- metadata +15 -4
- data/utils/enveomics/enveomics.R/man/enveomics.R-package.Rd +0 -15
- data/utils/enveomics/enveomics.R/man/z$-methods.Rd +0 -26
data/lib/miga/cli/action/get.rb
CHANGED
@@ -67,66 +67,82 @@ class MiGA::Cli::Action::Get < MiGA::Cli::Action
|
|
67
67
|
end
|
68
68
|
|
69
69
|
def perform
|
70
|
-
glob =
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
70
|
+
glob = get_sub_cli
|
71
|
+
p = cli.load_project
|
72
|
+
glob.each do |sub_cli|
|
73
|
+
rd = create_remote_dataset(sub_cli)
|
74
|
+
next if rd.nil?
|
75
|
+
if sub_cli[:get_md]
|
76
|
+
update_metadata(sub_cli, p, rd)
|
77
|
+
else
|
78
|
+
create_dataset(sub_cli, p, rd)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
private
|
84
|
+
|
85
|
+
def get_sub_cli
|
86
|
+
return [cli] if cli[:file].nil?
|
87
|
+
glob = []
|
88
|
+
File.open(cli[:file], 'r') do |fh|
|
89
|
+
h = nil
|
90
|
+
fh.each do |ln|
|
91
|
+
r = ln.chomp.split(/\t/)
|
92
|
+
if h.nil?
|
93
|
+
h = r
|
94
|
+
else
|
95
|
+
argv_i = [self.name]
|
96
|
+
h.each_with_index do |field, k|
|
97
|
+
case field.downcase
|
98
|
+
when *%w[query ignore-dup get-metadata only-metadata]
|
99
|
+
argv_i << "--#{field.downcase}" if r[k].downcase == 'true'
|
100
|
+
when *%w[project file verbose help debug]
|
101
|
+
raise "Unsupported header: #{field}"
|
102
|
+
else
|
103
|
+
argv_i += ["--#{field.downcase}", r[k]]
|
90
104
|
end
|
91
|
-
sub_cli = MiGA::Cli.new(argv_i)
|
92
|
-
sub_cli.defaults = cli.data
|
93
|
-
sub_cli.action.parse_cli
|
94
|
-
glob << sub_cli
|
95
105
|
end
|
106
|
+
sub_cli = MiGA::Cli.new(argv_i)
|
107
|
+
sub_cli.defaults = cli.data
|
108
|
+
sub_cli.action.parse_cli
|
109
|
+
glob << sub_cli
|
96
110
|
end
|
97
111
|
end
|
98
112
|
end
|
113
|
+
glob
|
114
|
+
end
|
99
115
|
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
end
|
116
|
+
def create_remote_dataset(sub_cli)
|
117
|
+
sub_cli.ensure_par(dataset: '-D', ids: '-I')
|
118
|
+
unless sub_cli[:api_key].nil?
|
119
|
+
ENV["#{sub_cli[:universe].to_s.upcase}_API_KEY"] = sub_cli[:api_key]
|
120
|
+
end
|
106
121
|
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
122
|
+
sub_cli.say "Dataset: #{sub_cli[:dataset]}"
|
123
|
+
if sub_cli[:ignore_dup] && !sub_cli[:get_md]
|
124
|
+
return if Dataset.exist?(p, sub_cli[:dataset])
|
125
|
+
end
|
111
126
|
|
112
|
-
|
113
|
-
|
127
|
+
sub_cli.say 'Locating remote dataset'
|
128
|
+
RemoteDataset.new(sub_cli[:ids], sub_cli[:db], sub_cli[:universe])
|
129
|
+
end
|
114
130
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
+
def update_metadata(sub_cli, p, rd)
|
132
|
+
sub_cli.say 'Updating dataset'
|
133
|
+
d = p.dataset(sub_cli[:dataset])
|
134
|
+
return if d.nil?
|
135
|
+
md = sub_cli.add_metadata(d).metadata.data
|
136
|
+
rd.update_metadata(d, md)
|
137
|
+
end
|
138
|
+
|
139
|
+
def create_dataset(sub_cli, p, rd)
|
140
|
+
sub_cli.say 'Creating dataset'
|
141
|
+
dummy_d = Dataset.new(p, sub_cli[:dataset])
|
142
|
+
md = sub_cli.add_metadata(dummy_d).metadata.data
|
143
|
+
md[:metadata_only] = true if cli[:only_md]
|
144
|
+
dummy_d.remove!
|
145
|
+
rd.save_to(p, sub_cli[:dataset], !sub_cli[:query], md)
|
146
|
+
p.add_dataset(sub_cli[:dataset])
|
131
147
|
end
|
132
148
|
end
|
@@ -0,0 +1,206 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
require 'miga/cli/action'
|
5
|
+
require 'net/ftp'
|
6
|
+
require 'digest/md5'
|
7
|
+
|
8
|
+
class MiGA::Cli::Action::GetDb < MiGA::Cli::Action
|
9
|
+
|
10
|
+
def parse_cli
|
11
|
+
cli.defaults = {
|
12
|
+
database: :recommended,
|
13
|
+
version: :latest,
|
14
|
+
local: File.expand_path('.miga_db', ENV['MIGA_HOME']),
|
15
|
+
host: 'ftp://microbial-genomes.org/db',
|
16
|
+
pb: true,
|
17
|
+
overwrite: true
|
18
|
+
}
|
19
|
+
cli.parse do |opt|
|
20
|
+
opt.on(
|
21
|
+
'-n', '--database STRING',
|
22
|
+
"Name of the database to download. By default: #{cli[:database]}"
|
23
|
+
) { |v| cli[:database] = v.to_sym }
|
24
|
+
opt.on(
|
25
|
+
'--db-version STRING',
|
26
|
+
"Database version to download. By default: #{cli[:version]}"
|
27
|
+
) { |v| cli[:version] = v.to_sym }
|
28
|
+
opt.on(
|
29
|
+
'-l', '--local-dir PATH',
|
30
|
+
"Local directory to store the database. By default: #{cli[:local]}"
|
31
|
+
) { |v| cli[:local] = v }
|
32
|
+
opt.on(
|
33
|
+
'-h', '--host STRING',
|
34
|
+
"Remote host of the database. By default: #{cli[:host]}"
|
35
|
+
) { |v| cli[:db] = v.to_sym }
|
36
|
+
opt.on(
|
37
|
+
'--list',
|
38
|
+
'List available databases and exit'
|
39
|
+
) { |v| cli[:list_databases] = v }
|
40
|
+
opt.on(
|
41
|
+
'--list-versions',
|
42
|
+
'List available versions of the database and exit'
|
43
|
+
) { |v| cli[:list_versions] = v }
|
44
|
+
opt.on(
|
45
|
+
'--no-overwrite',
|
46
|
+
'Exit without downloading if the target database already exists'
|
47
|
+
) { |v| cli[:overwrite] = v }
|
48
|
+
opt.on('--no-progress', 'Supress progress bars') { |v| cli[:pb] = v }
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def perform
|
53
|
+
@ftp = remote_connection
|
54
|
+
manif = remote_manifest(@ftp)
|
55
|
+
cli.puts "# Host: #{manif[:host]}"
|
56
|
+
cli.puts "# Manifest last update: #{manif[:last_update]}"
|
57
|
+
list_databases(manif) and return
|
58
|
+
db = db_requested(manif)
|
59
|
+
list_versions(db) and return
|
60
|
+
ver = version_requested(db)
|
61
|
+
check_target and return
|
62
|
+
file = download_file(@ftp, ver[:path])
|
63
|
+
check_digest(ver, file)
|
64
|
+
unarchive(file)
|
65
|
+
register_database(manif, db, ver)
|
66
|
+
end
|
67
|
+
|
68
|
+
def empty_action
|
69
|
+
cli.puts 'Downloading latest version of the default database'
|
70
|
+
end
|
71
|
+
|
72
|
+
def complete
|
73
|
+
@ftp.close unless @ftp.nil?
|
74
|
+
super
|
75
|
+
end
|
76
|
+
|
77
|
+
private
|
78
|
+
|
79
|
+
def remote_connection
|
80
|
+
cli.say "Connecting to '#{cli[:host]}'"
|
81
|
+
uri = URI.parse(cli[:host])
|
82
|
+
raise 'Only FTP hosts are supported' unless uri.scheme == 'ftp'
|
83
|
+
ftp = Net::FTP.open(uri.host, port: uri.port)
|
84
|
+
ftp.login
|
85
|
+
ftp.chdir(uri.path)
|
86
|
+
ftp
|
87
|
+
end
|
88
|
+
|
89
|
+
def download_file(ftp, path)
|
90
|
+
cli.say "Downloading '#{path}'"
|
91
|
+
Dir.mkdir(cli[:local]) unless Dir.exist? cli[:local]
|
92
|
+
file = File.expand_path(path, cli[:local])
|
93
|
+
filesize = ftp.size(path)
|
94
|
+
transferred = 0
|
95
|
+
ftp.getbinaryfile(path, file, 1024) do |data|
|
96
|
+
if cli[:pb]
|
97
|
+
transferred += data.size
|
98
|
+
cli.advance("#{path}:", transferred, filesize)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
cli.print "\n" if cli[:pb]
|
102
|
+
file
|
103
|
+
end
|
104
|
+
|
105
|
+
def remote_manifest(ftp)
|
106
|
+
file = download_file(ftp, '_manif.json')
|
107
|
+
MiGA::Json.parse(file)
|
108
|
+
end
|
109
|
+
|
110
|
+
def db_requested(manif)
|
111
|
+
[:recommended, :test].each do |n|
|
112
|
+
if cli[:database] == n
|
113
|
+
raise "This host has no #{n} database" if manif[n].nil?
|
114
|
+
cli[:database] = manif[n].to_sym
|
115
|
+
end
|
116
|
+
end
|
117
|
+
db = manif[:databases][cli[:database]]
|
118
|
+
raise 'Cannot find database in this host' if db.nil?
|
119
|
+
db
|
120
|
+
end
|
121
|
+
|
122
|
+
def version_requested(db)
|
123
|
+
if cli[:version] == :latest
|
124
|
+
cli[:version] = db[:latest].to_sym
|
125
|
+
end
|
126
|
+
ver = db[:versions][cli[:version]]
|
127
|
+
raise 'Cannot find database version' if ver.nil?
|
128
|
+
cli.puts "# Database size: #{version_size(ver)}"
|
129
|
+
ver
|
130
|
+
end
|
131
|
+
|
132
|
+
def list_databases(manif)
|
133
|
+
return false unless cli[:list_databases]
|
134
|
+
cli.puts "# Recommended database: #{manif[:recommended]}"
|
135
|
+
cli.puts ''
|
136
|
+
cli.table(
|
137
|
+
%w[name description latest versions],
|
138
|
+
manif[:databases].map do |name, i|
|
139
|
+
[name, i[:description], i[:latest], i[:versions].size.to_s]
|
140
|
+
end
|
141
|
+
)
|
142
|
+
true
|
143
|
+
end
|
144
|
+
|
145
|
+
def list_versions(db)
|
146
|
+
return false unless cli[:list_versions]
|
147
|
+
cli.puts "# Database: #{cli[:database]}"
|
148
|
+
cli.puts ''
|
149
|
+
cli.table(
|
150
|
+
%w[version updated size datasets],
|
151
|
+
db[:versions].map do |name, i|
|
152
|
+
[name, i[:last_update], version_size(i), i[:datasets]]
|
153
|
+
end
|
154
|
+
)
|
155
|
+
true
|
156
|
+
end
|
157
|
+
|
158
|
+
def check_target
|
159
|
+
return false if cli[:overwrite]
|
160
|
+
file = File.expand_path(cli[:database], cli[:local])
|
161
|
+
if Dir.exist? file
|
162
|
+
warn "The target directory already exists: #{file}"
|
163
|
+
true
|
164
|
+
else
|
165
|
+
false
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
def check_digest(ver, file)
|
170
|
+
cli.say 'Checking MD5 digest'
|
171
|
+
cli.say "Expected: #{ver[:MD5]}"
|
172
|
+
md5 = Digest::MD5.new
|
173
|
+
File.open(file, 'rb') do |fh|
|
174
|
+
until fh.eof?
|
175
|
+
md5.update fh.read(1024)
|
176
|
+
end
|
177
|
+
end
|
178
|
+
dig = md5.hexdigest
|
179
|
+
cli.say "Observed: #{dig}"
|
180
|
+
raise 'Corrupt file, MD5 does not match' unless dig == ver[:MD5]
|
181
|
+
end
|
182
|
+
|
183
|
+
def version_size(ver)
|
184
|
+
cli.num_suffix(ver[:size], true) + ' (' +
|
185
|
+
cli.num_suffix(ver[:size_unarchived], true) + ')'
|
186
|
+
end
|
187
|
+
|
188
|
+
def unarchive(file)
|
189
|
+
cli.say "Unarchiving #{file}"
|
190
|
+
`cd "#{cli[:local]}" && tar -zxf "#{file}"`
|
191
|
+
end
|
192
|
+
|
193
|
+
def register_database(manif, db, ver)
|
194
|
+
cli.say "Registering database locally"
|
195
|
+
local_manif = File.expand_path('_local_manif.json', cli[:local])
|
196
|
+
reg = File.exist?(local_manif) ? MiGA::Json.parse(local_manif) : {}
|
197
|
+
reg[:last_update] = Time.now.to_s
|
198
|
+
reg[:databases] ||= {}
|
199
|
+
reg[:databases][cli[:database]] ||= {}
|
200
|
+
reg[:databases][cli[:database]][:manif_last_update] = manif[:last_update]
|
201
|
+
reg[:databases][cli[:database]][:manif_host] = manif[:host]
|
202
|
+
db.each { |k,v| reg[:databases][cli[:database]][k] = v }
|
203
|
+
reg[:databases][cli[:database]][:local_version] = ver
|
204
|
+
MiGA::Json.generate(reg, local_manif)
|
205
|
+
end
|
206
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
require 'miga/cli/action'
|
5
|
+
|
6
|
+
class MiGA::Cli::Action::IndexWf < MiGA::Cli::Action
|
7
|
+
require 'miga/cli/action/wf'
|
8
|
+
include MiGA::Cli::Action::Wf
|
9
|
+
|
10
|
+
def parse_cli
|
11
|
+
default_opts_for_wf
|
12
|
+
cli.defaults = { mytaxa: false }
|
13
|
+
cli.parse do |opt|
|
14
|
+
opt.on(
|
15
|
+
'-m', '--mytaxa-scan',
|
16
|
+
'Perform MyTaxa scan analysis'
|
17
|
+
) { |v| cli[:mytaxa] = v }
|
18
|
+
opts_for_wf_distances(opt)
|
19
|
+
opts_for_wf(opt, 'Input genome assemblies (nucleotides, FastA)',
|
20
|
+
cleanup: false, project_type: true)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def perform
|
25
|
+
# Input data
|
26
|
+
p = create_project(:assembly, {}, run_mytaxa_scan: cli[:mytaxa])
|
27
|
+
# Run
|
28
|
+
run_daemon
|
29
|
+
summarize
|
30
|
+
end
|
31
|
+
end
|
data/lib/miga/cli/action/init.rb
CHANGED
@@ -5,38 +5,39 @@ require 'miga/cli/action'
|
|
5
5
|
require 'shellwords'
|
6
6
|
|
7
7
|
class MiGA::Cli::Action::Init < MiGA::Cli::Action
|
8
|
-
|
8
|
+
require 'miga/cli/action/init/daemon_helper'
|
9
|
+
include MiGA::Cli::Action::Init::DaemonHelper
|
10
|
+
|
9
11
|
def parse_cli
|
10
12
|
cli.interactive = true
|
11
|
-
cli.defaults = {mytaxa: nil,
|
13
|
+
cli.defaults = { mytaxa: nil,
|
12
14
|
config: File.expand_path('.miga_modules', ENV['HOME']),
|
13
|
-
ask: false, auto: false, dtype: :bash}
|
15
|
+
ask: false, auto: false, dtype: :bash }
|
14
16
|
cli.parse do |opt|
|
15
17
|
opt.on(
|
16
18
|
'-c', '--config PATH',
|
17
19
|
'Path to the Bash configuration file',
|
18
20
|
"By default: #{cli[:config]}"
|
19
|
-
|
21
|
+
){ |v| cli[:config] = v }
|
20
22
|
opt.on(
|
21
23
|
'--[no-]mytaxa',
|
22
24
|
'Should I try setting up MyTaxa its dependencies?',
|
23
25
|
'By default: interactive (true if --auto)'
|
24
|
-
|
26
|
+
){ |v| cli[:mytaxa] = v }
|
25
27
|
opt.on(
|
26
28
|
'--daemon-type STRING',
|
27
29
|
'Type of daemon launcher, one of: bash, qsub, msub, slurm',
|
28
30
|
"By default: interactive (#{cli[:dtype]} if --auto)"
|
29
|
-
|
31
|
+
){ |v| cli[:dtype] = v.to_sym }
|
30
32
|
opt.on(
|
31
33
|
'--ask-all',
|
32
34
|
'Ask for the location of all software',
|
33
35
|
'By default, only the locations missing in PATH are requested'
|
34
|
-
|
36
|
+
){ |v| cli[:ask] = v }
|
35
37
|
end
|
36
38
|
end
|
37
39
|
|
38
40
|
def perform
|
39
|
-
miga = MiGA.root_path
|
40
41
|
cli.puts <<BANNER
|
41
42
|
===[ Welcome to MiGA, the Microbial Genome Atlas ]===
|
42
43
|
|
@@ -44,17 +45,70 @@ I'm the initialization script, and I'll sniff around your computer to
|
|
44
45
|
make sure you have all the requirements for MiGA data processing.
|
45
46
|
|
46
47
|
BANNER
|
48
|
+
list_requirements
|
49
|
+
rc_fh = open_rc_file
|
50
|
+
check_configuration_script rc_fh
|
51
|
+
paths = check_software_requirements rc_fh
|
52
|
+
check_additional_files paths
|
53
|
+
check_r_packages paths
|
54
|
+
check_ruby_gems paths
|
55
|
+
configure_daemon
|
56
|
+
close_rc_file rc_fh
|
57
|
+
cli.puts 'Configuration complete. MiGA is ready to work!'
|
58
|
+
cli.puts ''
|
59
|
+
end
|
60
|
+
|
61
|
+
def empty_action
|
62
|
+
end
|
63
|
+
|
64
|
+
def run_cmd(cli, cmd)
|
65
|
+
`. "#{cli[:config]}" && #{cmd}`
|
66
|
+
end
|
67
|
+
|
68
|
+
def run_r_cmd(cli, paths, cmd)
|
69
|
+
run_cmd(cli,
|
70
|
+
"echo #{cmd.shellescape} | #{paths['R'].shellescape} --vanilla -q 2>&1")
|
71
|
+
end
|
72
|
+
|
73
|
+
def test_r_package(cli, paths, pkg)
|
74
|
+
run_r_cmd(cli, paths, "library('#{pkg}')")
|
75
|
+
$?.success?
|
76
|
+
end
|
77
|
+
|
78
|
+
def install_r_package(cli, paths, pkg)
|
79
|
+
r_cmd = "install.packages('#{pkg}', repos='http://cran.rstudio.com/')"
|
80
|
+
run_r_cmd(cli, paths, r_cmd)
|
81
|
+
end
|
47
82
|
|
83
|
+
def test_ruby_gem(cli, paths, pkg)
|
84
|
+
run_cmd(cli,
|
85
|
+
"#{paths['ruby'].shellescape} -r #{pkg.shellescape} -e '' 2>/dev/null")
|
86
|
+
$?.success?
|
87
|
+
end
|
88
|
+
|
89
|
+
def install_ruby_gem(cli, paths, pkg)
|
90
|
+
gem_cmd = "Gem::GemRunner.new.run %w(install --user #{pkg})"
|
91
|
+
run_cmd(cli, "#{paths['ruby'].shellescape} \
|
92
|
+
-r rubygems -r rubygems/gem_runner \
|
93
|
+
-e #{gem_cmd.shellescape} 2>&1")
|
94
|
+
end
|
95
|
+
|
96
|
+
def list_requirements
|
48
97
|
if cli.ask_user(
|
49
98
|
'Would you like to see all the requirements before starting?',
|
50
99
|
'no', %w(yes no)) == 'yes'
|
51
100
|
cli.puts ''
|
52
|
-
File.
|
101
|
+
req_path = File.expand_path('utils/requirements.txt', MiGA.root_path)
|
102
|
+
File.open(req_path, 'r') do |fh|
|
53
103
|
fh.each_line { |ln| cli.puts ln }
|
54
104
|
end
|
55
105
|
cli.puts ''
|
56
106
|
end
|
107
|
+
end
|
108
|
+
|
109
|
+
private
|
57
110
|
|
111
|
+
def open_rc_file
|
58
112
|
rc_path = File.expand_path('.miga_rc', ENV['HOME'])
|
59
113
|
if File.exist? rc_path
|
60
114
|
if cli.ask_user(
|
@@ -70,8 +124,10 @@ BANNER
|
|
70
124
|
# `miga init` made this on #{Time.now}
|
71
125
|
|
72
126
|
BASH
|
127
|
+
rc_fh
|
128
|
+
end
|
73
129
|
|
74
|
-
|
130
|
+
def check_configuration_script(rc_fh)
|
75
131
|
unless File.exist? cli[:config]
|
76
132
|
cli[:config] = cli.ask_user(
|
77
133
|
'Is there a script I need to load at startup?',
|
@@ -86,47 +142,62 @@ BASH
|
|
86
142
|
cli[:config] = '/dev/null'
|
87
143
|
end
|
88
144
|
cli.puts ''
|
145
|
+
end
|
89
146
|
|
90
|
-
|
147
|
+
def check_software_requirements(rc_fh)
|
91
148
|
cli.puts 'Looking for requirements:'
|
92
|
-
|
93
|
-
cli[:mytaxa] = cli.ask_user(
|
94
|
-
'Should I include MyTaxa modules?',
|
95
|
-
'yes', %w(yes no)) == 'yes'
|
96
|
-
end
|
149
|
+
ask_for_mytaxa
|
97
150
|
rc_fh.puts 'export MIGA_MYTAXA="no"' unless cli[:mytaxa]
|
98
151
|
paths = {}
|
99
152
|
rc_fh.puts 'MIGA_PATH=""'
|
100
|
-
File.
|
153
|
+
req_path = File.expand_path('utils/requirements.txt', MiGA.root_path)
|
154
|
+
File.open(req_path, 'r') do |fh|
|
101
155
|
fh.each_line do |ln|
|
102
156
|
next if $. < 3
|
103
157
|
r = ln.chomp.split(/\t+/)
|
104
158
|
next if r[0] =~ /\(opt\)$/ && !cli[:mytaxa]
|
105
159
|
cli.print "Testing #{r[0]}#{" (#{r[3]})" if r[3]}... "
|
106
|
-
path =
|
107
|
-
loop do
|
108
|
-
d_path = File.dirname(run_cmd(cli, "which #{r[1].shellescape}"))
|
109
|
-
if cli[:ask] || d_path == '.'
|
110
|
-
path = cli.ask_user('Where can I find it?', d_path, nil, true)
|
111
|
-
else
|
112
|
-
path = d_path
|
113
|
-
cli.puts path
|
114
|
-
end
|
115
|
-
if File.executable?(File.expand_path(r[1], path))
|
116
|
-
if d_path != path
|
117
|
-
rc_fh.puts "MIGA_PATH=\"#{path}:$MIGA_PATH\" # #{r[1]}"
|
118
|
-
end
|
119
|
-
break
|
120
|
-
end
|
121
|
-
cli.print "I cannot find #{r[1]} "
|
122
|
-
end
|
160
|
+
path = find_software(r[1])
|
123
161
|
paths[r[1]] = File.expand_path(r[1], path).shellescape
|
124
162
|
end
|
125
163
|
end
|
126
164
|
rc_fh.puts 'export PATH="$MIGA_PATH$PATH"'
|
127
165
|
cli.puts ''
|
166
|
+
paths
|
167
|
+
end
|
168
|
+
|
169
|
+
def ask_for_mytaxa
|
170
|
+
if cli[:mytaxa].nil?
|
171
|
+
cli[:mytaxa] =
|
172
|
+
cli.ask_user(
|
173
|
+
'Should I include MyTaxa modules?',
|
174
|
+
'yes', %w(yes no)
|
175
|
+
) == 'yes'
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
def find_software(exec)
|
180
|
+
path = nil
|
181
|
+
loop do
|
182
|
+
d_path = File.dirname(run_cmd(cli, "which #{exec.shellescape}"))
|
183
|
+
if cli[:ask] || d_path == '.'
|
184
|
+
path = cli.ask_user('Where can I find it?', d_path, nil, true)
|
185
|
+
else
|
186
|
+
path = d_path
|
187
|
+
cli.puts path
|
188
|
+
end
|
189
|
+
if File.executable?(File.expand_path(exec, path))
|
190
|
+
if d_path != path
|
191
|
+
rc_fh.puts "MIGA_PATH=\"#{path}:$MIGA_PATH\" # #{exec}"
|
192
|
+
end
|
193
|
+
break
|
194
|
+
end
|
195
|
+
cli.print "I cannot find #{exec} "
|
196
|
+
end
|
197
|
+
path
|
198
|
+
end
|
128
199
|
|
129
|
-
|
200
|
+
def check_additional_files(paths)
|
130
201
|
if cli[:mytaxa]
|
131
202
|
cli.puts 'Looking for MyTaxa databases:'
|
132
203
|
mt = File.dirname paths["MyTaxa"]
|
@@ -145,8 +216,9 @@ BASH
|
|
145
216
|
end
|
146
217
|
cli.puts ''
|
147
218
|
end
|
219
|
+
end
|
148
220
|
|
149
|
-
|
221
|
+
def check_r_packages(paths)
|
150
222
|
cli.puts 'Looking for R packages:'
|
151
223
|
%w(enveomics.R ape cluster vegan).each do |pkg|
|
152
224
|
cli.print "Testing #{pkg}... "
|
@@ -161,8 +233,9 @@ BASH
|
|
161
233
|
end
|
162
234
|
end
|
163
235
|
cli.puts ''
|
236
|
+
end
|
164
237
|
|
165
|
-
|
238
|
+
def check_ruby_gems(paths)
|
166
239
|
cli.puts 'Looking for Ruby gems:'
|
167
240
|
%w(sqlite3 daemons json).each do |pkg|
|
168
241
|
cli.print "Testing #{pkg}... "
|
@@ -171,8 +244,8 @@ BASH
|
|
171
244
|
else
|
172
245
|
cli.puts 'no, installing'
|
173
246
|
# This hackey mess is meant to ensure the test and installation are done
|
174
|
-
# on the configuration Ruby, not on the Ruby currently executing the
|
175
|
-
# action
|
247
|
+
# on the configuration Ruby, not on the Ruby currently executing the
|
248
|
+
# init action
|
176
249
|
cli.print install_ruby_gem(cli, paths, pkg)
|
177
250
|
unless test_ruby_gem(cli, paths, pkg)
|
178
251
|
raise "Unable to auto-install Ruby gem: #{pkg}"
|
@@ -180,119 +253,9 @@ BASH
|
|
180
253
|
end
|
181
254
|
end
|
182
255
|
cli.puts ''
|
256
|
+
end
|
183
257
|
|
184
|
-
|
185
|
-
cli.puts 'Default daemon configuration:'
|
186
|
-
daemon_f = File.expand_path('.miga_daemon.json', ENV['HOME'])
|
187
|
-
unless File.exist?(daemon_f) and cli.ask_user(
|
188
|
-
'A template daemon already exists, do you want to preserve it?',
|
189
|
-
'yes', %w(yes no)) == 'yes'
|
190
|
-
v = {created: Time.now.to_s, updated: Time.now.to_s}
|
191
|
-
v[:type] = cli.ask_user(
|
192
|
-
'Please select the type of daemon you want to setup',
|
193
|
-
cli[:dtype], %w(bash qsub msub slurm))
|
194
|
-
case v[:type]
|
195
|
-
when 'bash'
|
196
|
-
v[:latency] = cli.ask_user(
|
197
|
-
'How long should I sleep? (in seconds)', '30').to_i
|
198
|
-
v[:maxjobs] = cli.ask_user(
|
199
|
-
'How many jobs can I launch at once?', '6').to_i
|
200
|
-
v[:ppn] = cli.ask_user(
|
201
|
-
'How many CPUs can I use per job?', '2').to_i
|
202
|
-
cli.puts 'Setting up internal daemon defaults.'
|
203
|
-
cli.puts 'If you don\'t understand this just leave default values:'
|
204
|
-
v[:cmd] = cli.ask_user(
|
205
|
-
"How should I launch tasks?\n %1$s: script path, " \
|
206
|
-
"%2$s: variables, %3$d: CPUs, %4$s: log file, %5$s: task name.\n",
|
207
|
-
"%2$s '%1$s' > '%4$s' 2>&1")
|
208
|
-
v[:var] = cli.ask_user(
|
209
|
-
"How should I pass variables?\n %1$s: keys, %2$s: values.\n",
|
210
|
-
"%1$s=%2$s")
|
211
|
-
v[:varsep] = cli.ask_user(
|
212
|
-
'What should I use to separate variables?', ' ')
|
213
|
-
v[:alive] = cli.ask_user(
|
214
|
-
"How can I know that a process is still alive?\n %1$s: PID, " \
|
215
|
-
"output should be 1 for running and 0 for non-running.\n",
|
216
|
-
"ps -p '%1$s'|tail -n+2|wc -l")
|
217
|
-
v[:kill] = cli.ask_user(
|
218
|
-
"How should I terminate tasks?\n %s: process ID.", "kill -9 '%s'")
|
219
|
-
when 'slurm'
|
220
|
-
queue = cli.ask_user(
|
221
|
-
'What queue should I use?', nil, nil, true)
|
222
|
-
v[:latency] = cli.ask_user(
|
223
|
-
'How long should I sleep? (in seconds)', '150').to_i
|
224
|
-
v[:maxjobs] = cli.ask_user(
|
225
|
-
'How many jobs can I launch at once?', '300').to_i
|
226
|
-
v[:ppn] = cli.ask_user(
|
227
|
-
'How many CPUs can I use per job?', '2').to_i
|
228
|
-
cli.puts 'Setting up internal daemon defaults'
|
229
|
-
cli.puts 'If you don\'t understand this just leave default values:'
|
230
|
-
v[:cmd] = cli.ask_user(
|
231
|
-
"How should I launch tasks?\n %1$s: script path, " \
|
232
|
-
"%2$s: variables, %3$d: CPUs, %4$d: log file, %5$s: task name.\n",
|
233
|
-
"%2$s sbatch --partition='#{queue}' --export=ALL " \
|
234
|
-
"--nodes=1 --ntasks-per-node=%3$d --output='%4$s' " \
|
235
|
-
"--job-name='%5$s' --mem=9G --time=12:00:00 %1$s " \
|
236
|
-
"| perl -pe 's/.* //'")
|
237
|
-
v[:var] = cli.ask_user(
|
238
|
-
"How should I pass variables?\n %1$s: keys, %2$s: values.\n",
|
239
|
-
"%1$s=%2$s")
|
240
|
-
v[:varsep] = cli.ask_user(
|
241
|
-
'What should I use to separate variables?', ' ')
|
242
|
-
v[:alive] = cli.ask_user(
|
243
|
-
"How can I know that a process is still alive?\n %1$s: job id, " \
|
244
|
-
"output should be 1 for running and 0 for non-running.\n",
|
245
|
-
"squeue -h -o %%t -j '%1$s' | grep '^PD\\|R\\|CF\\|CG$' " \
|
246
|
-
"| tail -n 1 | wc -l")
|
247
|
-
v[:kill] = cli.ask_user(
|
248
|
-
"How should I terminate tasks?\n %s: process ID.", "scancel '%s'")
|
249
|
-
else # [qm]sub
|
250
|
-
queue = cli.ask_user('What queue should I use?', nil, nil, true)
|
251
|
-
v[:latency] = cli.ask_user(
|
252
|
-
'How long should I sleep? (in seconds)', '150').to_i
|
253
|
-
v[:maxjobs] = cli.ask_user(
|
254
|
-
'How many jobs can I launch at once?', '300').to_i
|
255
|
-
v[:ppn] = cli.ask_user(
|
256
|
-
'How many CPUs can I use per job?', '2').to_i
|
257
|
-
cli.puts 'Setting up internal daemon defaults.'
|
258
|
-
cli.puts 'If you don\'t understand this just leave default values:'
|
259
|
-
v[:cmd] = cli.ask_user(
|
260
|
-
"How should I launch tasks?\n %1$s: script path, " \
|
261
|
-
"%2$s: variables, %3$d: CPUs, %4$d: log file, %5$s: task name.\n",
|
262
|
-
"#{v[:type]} -q '#{queue}' -v '%2$s' -l nodes=1:ppn=%3$d %1$s " \
|
263
|
-
"-j oe -o '%4$s' -N '%5$s' -l mem=9g -l walltime=12:00:00 " \
|
264
|
-
"| grep .")
|
265
|
-
v[:var] = cli.ask_user(
|
266
|
-
"How should I pass variables?\n %1$s: keys, %2$s: values.\n",
|
267
|
-
"%1$s=%2$s")
|
268
|
-
v[:varsep] = cli.ask_user(
|
269
|
-
'What should I use to separate variables?', ',')
|
270
|
-
if v[:type] == 'qsub'
|
271
|
-
v[:alive] = cli.ask_user(
|
272
|
-
"How can I know that a process is still alive?\n " \
|
273
|
-
"%1$s: job id, output should be 1 for running and " \
|
274
|
-
"0 for non-running.\n",
|
275
|
-
"qstat -f '%1$s'|grep ' job_state ='|perl -pe 's/.*= //'" \
|
276
|
-
"|grep '[^C]'|tail -n1|wc -l|awk '{print $1}'")
|
277
|
-
v[:kill] = cli.ask_user(
|
278
|
-
"How should I terminate tasks?\n %s: process ID.", "qdel '%s'")
|
279
|
-
else # msub
|
280
|
-
v[:alive] = cli.ask_user(
|
281
|
-
"How can I know that a process is still alive?\n " \
|
282
|
-
"%1$s: job id, output should be 1 for running and " \
|
283
|
-
"0 for non-running.\n",
|
284
|
-
"checkjob '%1$s'|grep '^State:'|perl -pe 's/.*: //'" \
|
285
|
-
"|grep 'Deferred\\|Hold\\|Idle\\|Starting\\|Running\\|Blocked'" \
|
286
|
-
"|tail -n1|wc -l|awk '{print $1}'")
|
287
|
-
v[:kill] = cli.ask_user(
|
288
|
-
"How should I terminate tasks?\n %s: process ID.",
|
289
|
-
"canceljob '%s'")
|
290
|
-
end
|
291
|
-
end
|
292
|
-
File.open(daemon_f, 'w') { |fh| fh.puts JSON.pretty_generate(v) }
|
293
|
-
end
|
294
|
-
cli.puts ''
|
295
|
-
|
258
|
+
def close_rc_file(rc_fh)
|
296
259
|
rc_fh.puts <<FOOT
|
297
260
|
|
298
261
|
MIGA_CONFIG_VERSION='#{MiGA::MiGA.VERSION}'
|
@@ -300,44 +263,6 @@ MIGA_CONFIG_LONGVERSION='#{MiGA::MiGA.LONG_VERSION}'
|
|
300
263
|
MIGA_CONFIG_DATE='#{Time.now}'
|
301
264
|
|
302
265
|
FOOT
|
303
|
-
|
304
|
-
cli.puts 'Configuration complete. MiGA is ready to work!'
|
305
|
-
cli.puts ''
|
306
|
-
|
307
|
-
end
|
308
|
-
|
309
|
-
def empty_action
|
310
|
-
end
|
311
|
-
|
312
|
-
def run_cmd(cli, cmd)
|
313
|
-
`. "#{cli[:config]}" && #{cmd}`
|
314
|
-
end
|
315
|
-
|
316
|
-
def run_r_cmd(cli, paths, cmd)
|
317
|
-
run_cmd(cli,
|
318
|
-
"echo #{cmd.shellescape} | #{paths['R'].shellescape} --vanilla -q 2>&1")
|
319
|
-
end
|
320
|
-
|
321
|
-
def test_r_package(cli, paths, pkg)
|
322
|
-
run_r_cmd(cli, paths, "library('#{pkg}')")
|
323
|
-
$?.success?
|
324
|
-
end
|
325
|
-
|
326
|
-
def install_r_package(cli, paths, pkg)
|
327
|
-
r_cmd = "install.packages('#{pkg}', repos='http://cran.rstudio.com/')"
|
328
|
-
run_r_cmd(cli, paths, r_cmd)
|
329
|
-
end
|
330
|
-
|
331
|
-
def test_ruby_gem(cli, paths, pkg)
|
332
|
-
run_cmd(cli,
|
333
|
-
"#{paths['ruby'].shellescape} -r #{pkg.shellescape} -e '' 2>/dev/null")
|
334
|
-
$?.success?
|
335
|
-
end
|
336
|
-
|
337
|
-
def install_ruby_gem(cli, paths, pkg)
|
338
|
-
gem_cmd = "Gem::GemRunner.new.run %w(install --user #{pkg})"
|
339
|
-
run_cmd(cli, "#{paths['ruby'].shellescape} \
|
340
|
-
-r rubygems -r rubygems/gem_runner \
|
341
|
-
-e #{gem_cmd.shellescape} 2>&1")
|
266
|
+
rc_fh.close
|
342
267
|
end
|
343
268
|
end
|