miga-base 0.7.24.0 → 0.7.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/cli/action/add.rb +9 -6
- data/lib/miga/cli/action/derep_wf.rb +1 -1
- data/lib/miga/cli/action/index_wf.rb +4 -2
- data/lib/miga/cli/action/init.rb +60 -59
- data/lib/miga/cli/action/init/files_helper.rb +2 -1
- data/lib/miga/cli/action/preproc_wf.rb +7 -5
- data/lib/miga/cli/action/wf.rb +39 -23
- data/lib/miga/cli/base.rb +16 -5
- data/lib/miga/common/with_option.rb +1 -1
- data/lib/miga/dataset/result.rb +2 -1
- data/lib/miga/project/base.rb +1 -1
- data/lib/miga/version.rb +2 -2
- data/scripts/essential_genes.bash +17 -1
- data/scripts/miga.bash +8 -2
- data/test/lair_test.rb +1 -2
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
- data/utils/FastAAI/FastAAI/FastAAI +1336 -0
- data/utils/FastAAI/README.md +84 -0
- data/utils/FastAAI/kAAI_v1.0_virus.py +1296 -0
- data/utils/distance/base.rb +9 -0
- data/utils/distance/commands.rb +183 -81
- data/utils/distance/database.rb +68 -9
- data/utils/distance/pipeline.rb +14 -18
- data/utils/distance/runner.rb +16 -30
- data/utils/distance/temporal.rb +4 -2
- data/utils/distances.rb +2 -2
- data/utils/requirements.txt +1 -1
- metadata +8 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fefd5ed0ec84f2b49d25243ecaec7e567000c04598605ba8a78d0a866c35e5b1
|
4
|
+
data.tar.gz: b0f473401ccf64d31fcfc0ed19b54a2b84d29c138e3f983a69083659fcfe547e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2520f7169e91e38aed0027e679d0cbc126271a6948ce27916bef9a34299eb50df249568d06bbdb09c5ff13754ec7628d48d7154a18c5bf363628b69d241757c2
|
7
|
+
data.tar.gz: 0e684e321ec13e022f95d0499c2c83fea6230bbcadbaacce226e20423cda9ea7466fac929b579e29b6a50745b243da53b9d1697a5e08876b2d423d577205e269
|
data/lib/miga/cli/action/add.rb
CHANGED
@@ -6,10 +6,7 @@ require 'miga/cli/action'
|
|
6
6
|
class MiGA::Cli::Action::Add < MiGA::Cli::Action
|
7
7
|
def parse_cli
|
8
8
|
cli.expect_files = true
|
9
|
-
cli.defaults = {
|
10
|
-
ref: true, ignore_dups: false,
|
11
|
-
regexp: MiGA::Cli.FILE_REGEXP
|
12
|
-
}
|
9
|
+
cli.defaults = { ref: true, ignore_dups: false }
|
13
10
|
cli.parse do |opt|
|
14
11
|
opt.separator 'You can create multiple datasets with a single command; ' \
|
15
12
|
'simply pass all the files at the end: {FILES...}'
|
@@ -37,7 +34,10 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
|
|
37
34
|
opt.on(
|
38
35
|
'-R', '--name-regexp REGEXP', Regexp,
|
39
36
|
'Regular expression indicating how to extract the name from the path',
|
40
|
-
|
37
|
+
'By default for paired files:',
|
38
|
+
"'#{MiGA::Cli.FILE_REGEXP(true)}'",
|
39
|
+
'By default for other files:',
|
40
|
+
"'#{MiGA::Cli.FILE_REGEXP}'"
|
41
41
|
) { |v| cli[:regexp] = v }
|
42
42
|
opt.on(
|
43
43
|
'--prefix STRING',
|
@@ -59,6 +59,9 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
|
|
59
59
|
p = cli.load_project
|
60
60
|
files, file_type = get_files_and_type
|
61
61
|
|
62
|
+
paired = cli[:input_type].to_s.include?('_paired')
|
63
|
+
cli[:regexp] ||= MiGA::Cli.FILE_REGEXP(paired)
|
64
|
+
|
62
65
|
cli.say 'Creating datasets:'
|
63
66
|
files.each do |file|
|
64
67
|
d = create_dataset(file, p)
|
@@ -166,7 +169,7 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
|
|
166
169
|
file_type[2].each_with_index do |ext, i|
|
167
170
|
gz = file[i] =~ /\.gz/ ? '.gz' : ''
|
168
171
|
FileUtils.cp(file[i], "#{r_path}#{ext}#{gz}")
|
169
|
-
cli.say " file: #{file[i]}"
|
172
|
+
cli.say " file: #{File.basename(file[i])}"
|
170
173
|
end
|
171
174
|
File.open("#{r_path}.done", 'w') { |f| f.print Time.now.to_s }
|
172
175
|
end
|
@@ -55,7 +55,7 @@ class MiGA::Cli::Action::DerepWf < MiGA::Cli::Action
|
|
55
55
|
{ run_project_stats: false, run_clades: false },
|
56
56
|
{ run_mytaxa_scan: false, run_ssu: false }
|
57
57
|
)
|
58
|
-
p.set_option(:gsp_metric, cli[:metric])
|
58
|
+
p.set_option(:gsp_metric, cli[:metric].to_s)
|
59
59
|
p.set_option(:"gsp_#{cli[:metric]}", cli[:threshold])
|
60
60
|
|
61
61
|
# Run
|
@@ -16,8 +16,10 @@ class MiGA::Cli::Action::IndexWf < MiGA::Cli::Action
|
|
16
16
|
'Perform MyTaxa scan analysis'
|
17
17
|
) { |v| cli[:mytaxa] = v }
|
18
18
|
opts_for_wf_distances(opt)
|
19
|
-
opts_for_wf(
|
20
|
-
|
19
|
+
opts_for_wf(
|
20
|
+
opt, 'Input genome assemblies (nucleotides, FastA)',
|
21
|
+
cleanup: false, project_type: true
|
22
|
+
)
|
21
23
|
end
|
22
24
|
end
|
23
25
|
|
data/lib/miga/cli/action/init.rb
CHANGED
@@ -61,8 +61,7 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
|
|
61
61
|
check_configuration_script(rc_fh)
|
62
62
|
paths = check_software_requirements(rc_fh)
|
63
63
|
check_additional_files(paths)
|
64
|
-
|
65
|
-
check_ruby_gems(paths)
|
64
|
+
check_libraries(paths)
|
66
65
|
configure_daemon
|
67
66
|
close_rc_file(rc_fh)
|
68
67
|
cli.puts 'Configuration complete. MiGA is ready to work!'
|
@@ -83,34 +82,6 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
|
|
83
82
|
)
|
84
83
|
end
|
85
84
|
|
86
|
-
def test_r_package(cli, paths, pkg)
|
87
|
-
run_r_cmd(cli, paths, "library('#{pkg}')")
|
88
|
-
$?.success?
|
89
|
-
end
|
90
|
-
|
91
|
-
def install_r_package(cli, paths, pkg)
|
92
|
-
r_cmd = "install.packages('#{pkg}', repos='http://cran.rstudio.com/')"
|
93
|
-
run_r_cmd(cli, paths, r_cmd)
|
94
|
-
end
|
95
|
-
|
96
|
-
def test_ruby_gem(cli, paths, pkg)
|
97
|
-
run_cmd(
|
98
|
-
cli,
|
99
|
-
"#{paths['ruby'].shellescape} -r #{pkg.shellescape} -e '' 2>/dev/null"
|
100
|
-
)
|
101
|
-
$?.success?
|
102
|
-
end
|
103
|
-
|
104
|
-
def install_ruby_gem(cli, paths, pkg)
|
105
|
-
gem_cmd = "Gem::GemRunner.new.run %w(install --user #{pkg})"
|
106
|
-
run_cmd(
|
107
|
-
cli,
|
108
|
-
"#{paths['ruby'].shellescape} \
|
109
|
-
-r rubygems -r rubygems/gem_runner \
|
110
|
-
-e #{gem_cmd.shellescape} 2>&1"
|
111
|
-
)
|
112
|
-
end
|
113
|
-
|
114
85
|
def list_requirements
|
115
86
|
if cli.ask_user(
|
116
87
|
'Would you like to see all the requirements before starting?',
|
@@ -189,40 +160,70 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
|
|
189
160
|
path
|
190
161
|
end
|
191
162
|
|
192
|
-
def
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
end
|
163
|
+
def check_libraries(paths)
|
164
|
+
req_libraries = {
|
165
|
+
r: %w[ape cluster vegan],
|
166
|
+
ruby: %w[sqlite3 daemons json],
|
167
|
+
python: %w[numpy]
|
168
|
+
}
|
169
|
+
|
170
|
+
req_libraries.each do |language, libraries|
|
171
|
+
cli.puts "Looking for #{language.to_s.capitalize} libraries:"
|
172
|
+
libraries.each do |lib|
|
173
|
+
check_and_install_library(paths, language, lib)
|
204
174
|
end
|
175
|
+
cli.puts ''
|
205
176
|
end
|
206
|
-
cli.puts ''
|
207
177
|
end
|
208
178
|
|
209
|
-
def
|
210
|
-
cli.
|
211
|
-
|
212
|
-
cli.
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
# on the configuration Ruby, not on the Ruby currently executing the
|
219
|
-
# init action
|
220
|
-
cli.print install_ruby_gem(cli, paths, pkg)
|
221
|
-
unless test_ruby_gem(cli, paths, pkg)
|
222
|
-
raise "Unable to auto-install Ruby gem: #{pkg}"
|
223
|
-
end
|
179
|
+
def check_and_install_library(paths, language, library)
|
180
|
+
cli.print "Testing #{library}... "
|
181
|
+
if test_library(cli, paths, language, library)
|
182
|
+
cli.puts 'yes'
|
183
|
+
else
|
184
|
+
cli.puts 'no, installing'
|
185
|
+
cli.print '' + install_library(cli, paths, language, library)
|
186
|
+
unless test_library(cli, paths, language, library)
|
187
|
+
raise "Cannot install #{language.to_s.capitalize} library: #{library}"
|
224
188
|
end
|
225
189
|
end
|
226
|
-
|
190
|
+
end
|
191
|
+
|
192
|
+
def test_library(cli, paths, language, pkg)
|
193
|
+
case language
|
194
|
+
when :r
|
195
|
+
run_r_cmd(cli, paths, "library('#{pkg}')")
|
196
|
+
when :ruby
|
197
|
+
x = "#{paths['ruby'].shellescape} -r #{pkg.shellescape} -e '' 2>/dev/null"
|
198
|
+
run_cmd(cli, x)
|
199
|
+
when :python
|
200
|
+
x = "#{paths['python3'].shellescape} -c 'import #{pkg}' 2>/dev/null"
|
201
|
+
run_cmd(cli, x)
|
202
|
+
else
|
203
|
+
raise "Unrecognized language: #{language}"
|
204
|
+
end
|
205
|
+
$?.success?
|
206
|
+
end
|
207
|
+
|
208
|
+
def install_library(cli, paths, language, pkg)
|
209
|
+
case language
|
210
|
+
when :r
|
211
|
+
r_cmd = "install.packages('#{pkg}', repos='http://cran.rstudio.com/')"
|
212
|
+
run_r_cmd(cli, paths, r_cmd)
|
213
|
+
when :ruby
|
214
|
+
# This hackey mess is meant to ensure the test and installation are done
|
215
|
+
# on the configuration Ruby, not on the Ruby currently executing the
|
216
|
+
# init action
|
217
|
+
gem_cmd = "Gem::GemRunner.new.run %w(install --user #{pkg})"
|
218
|
+
x = "#{paths['ruby'].shellescape} -r rubygems -r rubygems/gem_runner \
|
219
|
+
-e #{gem_cmd.shellescape} 2>&1"
|
220
|
+
run_cmd(cli, x)
|
221
|
+
when :python
|
222
|
+
x = "#{paths['python3'].shellescape} \
|
223
|
+
-m pip install #{pkg.shellescape} 2>&1"
|
224
|
+
run_cmd(cli, x)
|
225
|
+
else
|
226
|
+
raise "Unrecognized language: #{language}"
|
227
|
+
end
|
227
228
|
end
|
228
229
|
end
|
@@ -26,7 +26,7 @@ module MiGA::Cli::Action::Init::FilesHelper
|
|
26
26
|
def close_rc_file(rc_fh)
|
27
27
|
rc_fh.puts <<~FOOT
|
28
28
|
|
29
|
-
MIGA_CONFIG_VERSION='#{MiGA::MiGA.
|
29
|
+
MIGA_CONFIG_VERSION='#{MiGA::MiGA.FULL_VERSION}'
|
30
30
|
MIGA_CONFIG_LONGVERSION='#{MiGA::MiGA.LONG_VERSION}'
|
31
31
|
MIGA_CONFIG_DATE='#{Time.now}'
|
32
32
|
|
@@ -59,6 +59,7 @@ module MiGA::Cli::Action::Init::FilesHelper
|
|
59
59
|
end
|
60
60
|
check_rdp_classifier if cli[:rdp]
|
61
61
|
check_phyla_lite
|
62
|
+
cli.puts ''
|
62
63
|
end
|
63
64
|
|
64
65
|
def check_mytaxa_scores(paths)
|
@@ -21,18 +21,20 @@ class MiGA::Cli::Action::PreprocWf < MiGA::Cli::Action
|
|
21
21
|
'-m', '--mytaxa_scan',
|
22
22
|
'Perform MyTaxa scan analysis'
|
23
23
|
) { |v| cli[:mytaxa] = v }
|
24
|
-
opts_for_wf(
|
25
|
-
|
24
|
+
opts_for_wf(
|
25
|
+
opt, 'Input files as defined by --input-type',
|
26
|
+
multi: true, cleanup: false, ncbi: false, project_type: true
|
27
|
+
)
|
26
28
|
end
|
27
29
|
end
|
28
30
|
|
29
31
|
def perform
|
30
32
|
# Input data
|
31
33
|
cli.ensure_par(input_type: '-i')
|
32
|
-
|
33
|
-
|
34
|
-
.map { |i| ["run_#{i}", false] }
|
34
|
+
norun = %w[
|
35
|
+
project_stats haai_distances aai_distances ani_distances clade_finding
|
35
36
|
]
|
37
|
+
p_metadata = Hash[norun.map { |i| ["run_#{i}", false] }]
|
36
38
|
d_metadata = { run_distances: false }
|
37
39
|
unless cli[:mytaxa]
|
38
40
|
d_metadata[:run_mytaxa_scan] = false
|
data/lib/miga/cli/action/wf.rb
CHANGED
@@ -7,8 +7,7 @@ module MiGA::Cli::Action::Wf
|
|
7
7
|
def default_opts_for_wf
|
8
8
|
cli.expect_files = true
|
9
9
|
cli.defaults = {
|
10
|
-
clean: false,
|
11
|
-
project_type: :genomes, dataset_type: :popgenome,
|
10
|
+
clean: false, project_type: :genomes, dataset_type: :popgenome,
|
12
11
|
ncbi_draft: true, min_qual: MiGA::Project.OPTIONS[:min_qual][:default]
|
13
12
|
}
|
14
13
|
end
|
@@ -56,28 +55,10 @@ module MiGA::Cli::Action::Wf
|
|
56
55
|
opt.on(
|
57
56
|
'-R', '--name-regexp REGEXP', Regexp,
|
58
57
|
'Regular expression indicating how to extract the name from the path',
|
59
|
-
"By default: '#{
|
58
|
+
"By default: '#{MiGA::Cli.FILE_REGEXP}'"
|
60
59
|
) { |v| cli[:regexp] = v }
|
61
|
-
opt
|
62
|
-
|
63
|
-
"Type of datasets. By default: #{cli[:dataset_type]}",
|
64
|
-
'Recognized types:',
|
65
|
-
*MiGA::Dataset.KNOWN_TYPES
|
66
|
-
.map do |k, v|
|
67
|
-
"~ #{k}: #{v[:description]}" unless !params[:multi] && v[:multi]
|
68
|
-
end.compact
|
69
|
-
) { |v| cli[:dataset_type] = v.downcase.to_sym }
|
70
|
-
if params[:project_type]
|
71
|
-
opt.on(
|
72
|
-
'--project-type STRING',
|
73
|
-
"Type of project. By default: #{cli[:project_type]}",
|
74
|
-
'Recognized types:',
|
75
|
-
*MiGA::Project.KNOWN_TYPES
|
76
|
-
.map do |k, v|
|
77
|
-
"~ #{k}: #{v[:description]}" unless !params[:multi] && v[:multi]
|
78
|
-
end.compact
|
79
|
-
) { |v| cli[:project_type] = v.downcase.to_sym }
|
80
|
-
end
|
60
|
+
opt_object_type(opt, :dataset, params[:multi])
|
61
|
+
opt_object_type(opt, :project, params[:multi]) if params[:project_type]
|
81
62
|
opt.on(
|
82
63
|
'--daemon PATH',
|
83
64
|
'Use custom daemon configuration in JSON format',
|
@@ -124,10 +105,14 @@ module MiGA::Cli::Action::Wf
|
|
124
105
|
project_type: '--project-type',
|
125
106
|
dataset_type: '--dataset-type'
|
126
107
|
)
|
108
|
+
paired = cli[:input_type].to_s.include?('_paired')
|
109
|
+
cli[:regexp] ||= MiGA::Cli.FILE_REGEXP(paired)
|
110
|
+
|
127
111
|
# Create empty project
|
128
112
|
call_cli(
|
129
113
|
['new', '-P', cli[:outdir], '-t', cli[:project_type]]
|
130
114
|
) unless MiGA::Project.exist? cli[:outdir]
|
115
|
+
|
131
116
|
# Define project metadata
|
132
117
|
p = cli.load_project(:outdir, '-o')
|
133
118
|
p_metadata[:type] = cli[:project_type]
|
@@ -135,6 +120,7 @@ module MiGA::Cli::Action::Wf
|
|
135
120
|
%i[haai_p aai_p ani_p ess_coll min_qual].each do |i|
|
136
121
|
p.set_option(i, cli[i])
|
137
122
|
end
|
123
|
+
|
138
124
|
# Download datasets
|
139
125
|
unless cli[:ncbi_taxon].nil?
|
140
126
|
what = cli[:ncbi_draft] ? '--all' : '--complete'
|
@@ -142,6 +128,7 @@ module MiGA::Cli::Action::Wf
|
|
142
128
|
['ncbi_get', '-P', cli[:outdir], '-T', cli[:ncbi_taxon], what]
|
143
129
|
)
|
144
130
|
end
|
131
|
+
|
145
132
|
# Add datasets
|
146
133
|
call_cli(
|
147
134
|
[
|
@@ -153,6 +140,7 @@ module MiGA::Cli::Action::Wf
|
|
153
140
|
'-R', cli[:regexp]
|
154
141
|
] + cli.files
|
155
142
|
) unless cli.files.empty?
|
143
|
+
|
156
144
|
# Define datasets metadata
|
157
145
|
p.load
|
158
146
|
d_metadata[:type] = cli[:dataset_type]
|
@@ -208,4 +196,32 @@ module MiGA::Cli::Action::Wf
|
|
208
196
|
md.each { |k, v| obj.metadata[k] = v }
|
209
197
|
obj.save
|
210
198
|
end
|
199
|
+
|
200
|
+
private
|
201
|
+
|
202
|
+
##
|
203
|
+
# Add option --type or --project-type to +opt+
|
204
|
+
def opt_object_type(opt, obj, multi)
|
205
|
+
conf =
|
206
|
+
case obj
|
207
|
+
when :dataset
|
208
|
+
['type', 'datasets', :dataset_type, MiGA::Dataset]
|
209
|
+
when :project
|
210
|
+
['project-type', 'project', :project_type, MiGA::Project]
|
211
|
+
else
|
212
|
+
raise "Unrecognized object type: #{obj}"
|
213
|
+
end
|
214
|
+
|
215
|
+
options =
|
216
|
+
conf[3].KNOWN_TYPES.map do |k, v|
|
217
|
+
"~ #{k}: #{v[:description]}" unless !multi && v[:multi]
|
218
|
+
end.compact
|
219
|
+
|
220
|
+
opt.on(
|
221
|
+
"--#{conf[0]} STRING",
|
222
|
+
"Type of #{conf[1]}. By default: #{cli[conf[2]]}",
|
223
|
+
'Recognized types:',
|
224
|
+
*options
|
225
|
+
) { |v| cli[conf[2]] = v.downcase.to_sym }
|
226
|
+
end
|
211
227
|
end
|
data/lib/miga/cli/base.rb
CHANGED
@@ -88,19 +88,30 @@ module MiGA::Cli::Base
|
|
88
88
|
@@EXECS = @@TASK_DESC.keys
|
89
89
|
|
90
90
|
@@FILE_REGEXP =
|
91
|
-
%r{^(?:.*/)?(.+?)(\.[A-Z]*(
|
91
|
+
%r{^(?:.*/)?(.+?)(\.[A-Z]*(Reads|Contigs))?(\.f[nastq]+)?(\.gz)?$}i
|
92
|
+
|
93
|
+
@@PAIRED_FILE_REGEXP =
|
94
|
+
%r{^(?:.*/)?(.+?)(\.[A-Z]*([12]|Reads))?(\.f[nastq]+)?(\.gz)?$}i
|
92
95
|
end
|
93
96
|
|
94
97
|
class MiGA::Cli < MiGA::MiGA
|
95
98
|
include MiGA::Cli::Base
|
96
99
|
|
97
100
|
class << self
|
98
|
-
def TASK_DESC
|
101
|
+
def TASK_DESC
|
102
|
+
@@TASK_DESC
|
103
|
+
end
|
99
104
|
|
100
|
-
def TASK_ALIAS
|
105
|
+
def TASK_ALIAS
|
106
|
+
@@TASK_ALIAS
|
107
|
+
end
|
101
108
|
|
102
|
-
def EXECS
|
109
|
+
def EXECS
|
110
|
+
@@EXECS
|
111
|
+
end
|
103
112
|
|
104
|
-
def FILE_REGEXP
|
113
|
+
def FILE_REGEXP(paired = false)
|
114
|
+
paired ? @@PAIRED_FILE_REGEXP : @@FILE_REGEXP
|
115
|
+
end
|
105
116
|
end
|
106
117
|
end
|
data/lib/miga/dataset/result.rb
CHANGED
data/lib/miga/project/base.rb
CHANGED
@@ -132,7 +132,7 @@ module MiGA::Project::Base
|
|
132
132
|
haai_p: {
|
133
133
|
desc: 'Value of aai.rb -p on hAAI', type: String,
|
134
134
|
default: proc { |project| project.clade? ? 'no' : 'blast+' },
|
135
|
-
in: %w[blast+ blast blat diamond no]
|
135
|
+
in: %w[fastaai blast+ blast blat diamond no]
|
136
136
|
},
|
137
137
|
aai_p: {
|
138
138
|
desc: 'Value of aai.rb -p on AAI', default: 'blast+', type: String,
|