miga-base 0.7.3.0 → 0.7.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/cli.rb +10 -8
- data/lib/miga/cli/action.rb +2 -3
- data/lib/miga/cli/action/about.rb +5 -6
- data/lib/miga/cli/action/add.rb +18 -12
- data/lib/miga/cli/action/add_result.rb +2 -3
- data/lib/miga/cli/action/archive.rb +1 -2
- data/lib/miga/cli/action/classify_wf.rb +8 -6
- data/lib/miga/cli/action/console.rb +0 -1
- data/lib/miga/cli/action/daemon.rb +7 -7
- data/lib/miga/cli/action/date.rb +0 -1
- data/lib/miga/cli/action/derep_wf.rb +5 -4
- data/lib/miga/cli/action/doctor.rb +71 -82
- data/lib/miga/cli/action/doctor/base.rb +102 -0
- data/lib/miga/cli/action/edit.rb +14 -2
- data/lib/miga/cli/action/files.rb +8 -8
- data/lib/miga/cli/action/find.rb +5 -6
- data/lib/miga/cli/action/generic.rb +7 -7
- data/lib/miga/cli/action/get.rb +20 -17
- data/lib/miga/cli/action/get_db.rb +8 -2
- data/lib/miga/cli/action/index_wf.rb +1 -1
- data/lib/miga/cli/action/init.rb +53 -41
- data/lib/miga/cli/action/init/daemon_helper.rb +65 -43
- data/lib/miga/cli/action/lair.rb +7 -7
- data/lib/miga/cli/action/ln.rb +6 -6
- data/lib/miga/cli/action/ls.rb +1 -2
- data/lib/miga/cli/action/ncbi_get.rb +11 -3
- data/lib/miga/cli/action/new.rb +4 -4
- data/lib/miga/cli/action/next_step.rb +0 -1
- data/lib/miga/cli/action/preproc_wf.rb +3 -3
- data/lib/miga/cli/action/quality_wf.rb +1 -1
- data/lib/miga/cli/action/rm.rb +2 -3
- data/lib/miga/cli/action/run.rb +8 -8
- data/lib/miga/cli/action/stats.rb +8 -4
- data/lib/miga/cli/action/summary.rb +7 -6
- data/lib/miga/cli/action/tax_dist.rb +8 -4
- data/lib/miga/cli/action/tax_index.rb +3 -4
- data/lib/miga/cli/action/tax_set.rb +7 -6
- data/lib/miga/cli/action/tax_test.rb +6 -5
- data/lib/miga/cli/action/wf.rb +21 -19
- data/lib/miga/cli/base.rb +34 -32
- data/lib/miga/cli/objects_helper.rb +27 -18
- data/lib/miga/cli/opt_helper.rb +3 -2
- data/lib/miga/common.rb +2 -5
- data/lib/miga/common/base.rb +15 -16
- data/lib/miga/common/format.rb +8 -5
- data/lib/miga/common/hooks.rb +1 -4
- data/lib/miga/common/path.rb +4 -9
- data/lib/miga/common/with_daemon.rb +6 -3
- data/lib/miga/common/with_daemon_class.rb +3 -2
- data/lib/miga/common/with_result.rb +2 -1
- data/lib/miga/daemon.rb +93 -44
- data/lib/miga/daemon/base.rb +30 -11
- data/lib/miga/dataset.rb +47 -37
- data/lib/miga/dataset/base.rb +52 -37
- data/lib/miga/dataset/hooks.rb +3 -4
- data/lib/miga/dataset/result.rb +17 -1
- data/lib/miga/dataset/status.rb +6 -5
- data/lib/miga/json.rb +5 -7
- data/lib/miga/lair.rb +4 -0
- data/lib/miga/metadata.rb +4 -3
- data/lib/miga/project.rb +29 -20
- data/lib/miga/project/base.rb +52 -37
- data/lib/miga/project/dataset.rb +33 -26
- data/lib/miga/project/hooks.rb +0 -3
- data/lib/miga/project/result.rb +14 -5
- data/lib/miga/remote_dataset.rb +85 -72
- data/lib/miga/remote_dataset/base.rb +11 -13
- data/lib/miga/remote_dataset/download.rb +34 -12
- data/lib/miga/result.rb +34 -25
- data/lib/miga/result/base.rb +0 -2
- data/lib/miga/result/dates.rb +1 -3
- data/lib/miga/result/source.rb +15 -16
- data/lib/miga/result/stats.rb +37 -27
- data/lib/miga/tax_dist.rb +6 -4
- data/lib/miga/tax_index.rb +17 -17
- data/lib/miga/taxonomy.rb +6 -1
- data/lib/miga/taxonomy/base.rb +19 -15
- data/lib/miga/version.rb +19 -16
- data/scripts/project_stats.bash +3 -0
- data/scripts/stats.bash +1 -1
- data/test/common_test.rb +3 -11
- data/test/daemon_helper.rb +38 -0
- data/test/daemon_test.rb +91 -99
- data/test/dataset_test.rb +63 -59
- data/test/format_test.rb +3 -11
- data/test/hook_test.rb +50 -55
- data/test/json_test.rb +7 -8
- data/test/lair_test.rb +22 -28
- data/test/metadata_test.rb +6 -14
- data/test/project_test.rb +33 -40
- data/test/remote_dataset_test.rb +26 -32
- data/test/result_stats_test.rb +17 -27
- data/test/result_test.rb +41 -34
- data/test/tax_dist_test.rb +2 -4
- data/test/tax_index_test.rb +4 -10
- data/test/taxonomy_test.rb +7 -9
- data/test/test_helper.rb +42 -1
- data/test/with_daemon_test.rb +14 -22
- data/utils/adapters.fa +13 -0
- data/utils/cleanup-databases.rb +6 -5
- data/utils/distance/base.rb +0 -1
- data/utils/distance/commands.rb +19 -12
- data/utils/distance/database.rb +25 -21
- data/utils/distance/pipeline.rb +16 -10
- data/utils/distance/runner.rb +19 -13
- data/utils/distance/temporal.rb +7 -4
- data/utils/distances.rb +1 -1
- data/utils/domain-ess-genes.rb +7 -7
- data/utils/index_metadata.rb +5 -4
- data/utils/mytaxa_scan.rb +18 -16
- data/utils/representatives.rb +5 -4
- data/utils/requirements.txt +1 -1
- data/utils/subclade/base.rb +0 -1
- data/utils/subclade/pipeline.rb +7 -6
- data/utils/subclade/runner.rb +9 -9
- data/utils/subclade/temporal.rb +0 -2
- data/utils/subclades-compile.rb +39 -37
- data/utils/subclades.rb +1 -1
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3682f50e3efe936ce751cd83cc7945edddb8e1c3ea6e654c4d54f8ea79efbfcb
|
4
|
+
data.tar.gz: a5bc821d8f1b6f55baf495eea28e8783c86c61ffaca2d486e4589b818a60038f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 23e986949f97ae31498b7310eba666f0fc4b5f3e4ab9d38a135b2934db901449dca70ce74830e4353bb60f2196ce2c195b1bfb20400f884494e9766e58ea5214
|
7
|
+
data.tar.gz: 3857008111b8a65b1fbf09442eb3a657789ebf964769c7805485be57298141c363296b7d2491ef3379726344f07892211d10d4c72fc74a8095bc0eaf00d4e873
|
data/lib/miga/cli.rb
CHANGED
@@ -7,7 +7,6 @@ require 'optparse'
|
|
7
7
|
##
|
8
8
|
# MiGA Command Line Interface API.
|
9
9
|
class MiGA::Cli < MiGA::MiGA
|
10
|
-
|
11
10
|
require 'miga/cli/base'
|
12
11
|
require 'miga/cli/opt_helper'
|
13
12
|
require 'miga/cli/objects_helper'
|
@@ -61,7 +60,7 @@ class MiGA::Cli < MiGA::MiGA
|
|
61
60
|
|
62
61
|
def initialize(argv)
|
63
62
|
@data = {}
|
64
|
-
@defaults = {verbose: false, tabular: false}
|
63
|
+
@defaults = { verbose: false, tabular: false }
|
65
64
|
@opt_common = true
|
66
65
|
@objects = {}
|
67
66
|
if argv[0].nil? or argv[0].to_s[0] == '-'
|
@@ -106,6 +105,7 @@ class MiGA::Cli < MiGA::MiGA
|
|
106
105
|
# otherwise it's sent to +$stderr+
|
107
106
|
def say(*par)
|
108
107
|
return unless self[:verbose]
|
108
|
+
|
109
109
|
super(*par)
|
110
110
|
end
|
111
111
|
|
@@ -116,16 +116,17 @@ class MiGA::Cli < MiGA::MiGA
|
|
116
116
|
# The report goes to $stderr iff --verborse
|
117
117
|
def advance(step, n = 0, total = nil, bin = true)
|
118
118
|
return unless self[:verbose]
|
119
|
+
|
119
120
|
adv = total.nil? ? (n == 0 ? '' : num_suffix(n, bin)) :
|
120
121
|
('%.1f%% (%s/%s)' % [100.0 * n / total,
|
121
|
-
|
122
|
+
num_suffix(n, bin), num_suffix(total, bin)])
|
122
123
|
$stderr.print("[%s] %s %s \r" % [Time.now, step, adv])
|
123
124
|
end
|
124
125
|
|
125
126
|
def num_suffix(n, bin = false)
|
126
127
|
p = ''
|
127
|
-
{T: 4, G: 3, M: 2, K: 1}.each do |k,x|
|
128
|
-
v = (bin ? 1024 : 1e3)
|
128
|
+
{ T: 4, G: 3, M: 2, K: 1 }.each do |k, x|
|
129
|
+
v = (bin ? 1024 : 1e3)**x
|
129
130
|
if n > v
|
130
131
|
n = '%.1f' % (n / v)
|
131
132
|
p = k
|
@@ -160,7 +161,7 @@ class MiGA::Cli < MiGA::MiGA
|
|
160
161
|
##
|
161
162
|
# Set default values in the Hash +hsh+
|
162
163
|
def defaults=(hsh)
|
163
|
-
hsh.each{ |k,v| @defaults[k] = v }
|
164
|
+
hsh.each { |k, v| @defaults[k] = v }
|
164
165
|
end
|
165
166
|
|
166
167
|
##
|
@@ -192,6 +193,7 @@ class MiGA::Cli < MiGA::MiGA
|
|
192
193
|
def launch
|
193
194
|
begin
|
194
195
|
raise "See `miga -h`" if action.nil?
|
196
|
+
|
195
197
|
action.launch
|
196
198
|
rescue => err
|
197
199
|
$stderr.puts "Exception: #{err}"
|
@@ -222,8 +224,8 @@ class MiGA::Cli < MiGA::MiGA
|
|
222
224
|
# +par+, a Hash with object names as keys and parameter flag as values.
|
223
225
|
# If missing, raise an error with message +msg+
|
224
226
|
def ensure_par(req, msg = '%<name>s is mandatory: please provide %<flag>s')
|
225
|
-
req.each do |k,v|
|
226
|
-
raise (msg % {name: k, flag: v}) if self[k].nil?
|
227
|
+
req.each do |k, v|
|
228
|
+
raise (msg % { name: k, flag: v }) if self[k].nil?
|
227
229
|
end
|
228
230
|
end
|
229
231
|
|
data/lib/miga/cli/action.rb
CHANGED
@@ -8,7 +8,6 @@ require 'miga/cli'
|
|
8
8
|
# by MiGA::Cli::Action::* classes. Do not attempt creating directly with +new+,
|
9
9
|
# use instead the MiGA::Cli::Action.load interface.
|
10
10
|
class MiGA::Cli::Action < MiGA::MiGA
|
11
|
-
|
12
11
|
class << self
|
13
12
|
def load(task, cli)
|
14
13
|
require "miga/cli/action/#{task}"
|
@@ -55,8 +54,8 @@ class MiGA::Cli::Action < MiGA::MiGA
|
|
55
54
|
##
|
56
55
|
# Name of the action, as referred to by the CLI
|
57
56
|
def name
|
58
|
-
camel = self.class.to_s.gsub(/.*::/,'')
|
59
|
-
camel.gsub(/(\S)([A-Z])/,'\1_\2').downcase
|
57
|
+
camel = self.class.to_s.gsub(/.*::/, '')
|
58
|
+
camel.gsub(/(\S)([A-Z])/, '\1_\2').downcase
|
60
59
|
end
|
61
60
|
|
62
61
|
##
|
@@ -4,23 +4,22 @@
|
|
4
4
|
require 'miga/cli/action'
|
5
5
|
|
6
6
|
class MiGA::Cli::Action::About < MiGA::Cli::Action
|
7
|
-
|
8
7
|
def parse_cli
|
9
|
-
cli.defaults = {info: false, processing: false, tabular: false}
|
8
|
+
cli.defaults = { info: false, processing: false, tabular: false }
|
10
9
|
cli.parse do |opt|
|
11
10
|
cli.opt_object(opt, [:project])
|
12
11
|
opt.on(
|
13
12
|
'-p', '--processing',
|
14
13
|
'Print information on processing advance'
|
15
|
-
|
14
|
+
) { |v| cli[:processing] = v }
|
16
15
|
opt.on(
|
17
16
|
'-m', '--metadata STRING',
|
18
17
|
'Print name and metadata field only'
|
19
|
-
|
18
|
+
) { |v| cli[:datum] = v }
|
20
19
|
opt.on(
|
21
20
|
'--tab',
|
22
21
|
'Return a tab-delimited table'
|
23
|
-
|
22
|
+
) { |v| cli[:tabular] = v }
|
24
23
|
end
|
25
24
|
end
|
26
25
|
|
@@ -37,7 +36,7 @@ class MiGA::Cli::Action::About < MiGA::Cli::Action
|
|
37
36
|
else
|
38
37
|
cli.puts MiGA.tabulate([:key, :value], p.metadata.data.keys.map do |k|
|
39
38
|
v = p.metadata[k]
|
40
|
-
[k, k
|
39
|
+
[k, k == :datasets ? v.size : v]
|
41
40
|
end, cli[:tabular])
|
42
41
|
end
|
43
42
|
end
|
data/lib/miga/cli/action/add.rb
CHANGED
@@ -4,7 +4,6 @@
|
|
4
4
|
require 'miga/cli/action'
|
5
5
|
|
6
6
|
class MiGA::Cli::Action::Add < MiGA::Cli::Action
|
7
|
-
|
8
7
|
def parse_cli
|
9
8
|
cli.expect_files = true
|
10
9
|
cli.defaults = {
|
@@ -47,7 +46,7 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
|
|
47
46
|
opt.on(
|
48
47
|
'-i', '--input-type STRING',
|
49
48
|
'Type of input data, one of the following:',
|
50
|
-
*self.class.INPUT_TYPES.map{ |k,v| "~ #{k}: #{v[0]}" }
|
49
|
+
*self.class.INPUT_TYPES.map { |k, v| "~ #{k}: #{v[0]}" }
|
51
50
|
) { |v| cli[:input_type] = v.downcase.to_sym }
|
52
51
|
opt.on(
|
53
52
|
'--ignore-dups',
|
@@ -64,6 +63,7 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
|
|
64
63
|
files.each do |file|
|
65
64
|
d = create_dataset(file, p)
|
66
65
|
next if d.nil?
|
66
|
+
|
67
67
|
copy_file_to_project(file, file_type, d, p)
|
68
68
|
d = cli.add_metadata(d)
|
69
69
|
d.save
|
@@ -76,22 +76,22 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
|
|
76
76
|
@@INPUT_TYPES = {
|
77
77
|
raw_reads_single:
|
78
78
|
['Single raw reads in a single FastQ file',
|
79
|
-
|
79
|
+
:raw_reads, %w[.1.fastq]],
|
80
80
|
raw_reads_paired:
|
81
81
|
['Paired raw reads in two FastQ files',
|
82
|
-
|
82
|
+
:raw_reads, %w[.1.fastq .2.fastq]],
|
83
83
|
trimmed_reads_single:
|
84
84
|
['Single trimmed reads in a single FastA file',
|
85
|
-
|
85
|
+
:trimmed_fasta, %w[.SingleReads.fa]],
|
86
86
|
trimmed_reads_paired:
|
87
87
|
['Paired trimmed reads in two FastA files',
|
88
|
-
|
88
|
+
:trimmed_fasta, %w[.1.fasta .2.fasta]],
|
89
89
|
trimmed_reads_interleaved:
|
90
90
|
['Paired trimmed reads in a single FastA file',
|
91
|
-
|
91
|
+
:trimmed_fasta, %w[.CoupledReads.fa]],
|
92
92
|
assembly:
|
93
93
|
['Assembled contigs or scaffolds in FastA format',
|
94
|
-
|
94
|
+
:assembly, %w[.LargeContigs.fna]]
|
95
95
|
}
|
96
96
|
|
97
97
|
class << self
|
@@ -106,23 +106,26 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
|
|
106
106
|
files = cli.files
|
107
107
|
file_type = nil
|
108
108
|
if files.empty?
|
109
|
-
cli.ensure_par({dataset: '-D'},
|
110
|
-
|
109
|
+
cli.ensure_par({ dataset: '-D' },
|
110
|
+
'dataset is mandatory (-D) unless files are provided')
|
111
111
|
cli.ensure_type(Dataset)
|
112
112
|
files = [nil]
|
113
113
|
else
|
114
114
|
raise 'Please specify input type (-i).' if cli[:input_type].nil?
|
115
|
+
|
115
116
|
file_type = self.class.INPUT_TYPES[cli[:input_type]]
|
116
117
|
raise "Unrecognized input type: #{cli[:input_type]}." if file_type.nil?
|
117
118
|
raise 'Some files are duplicated, files must be unique.' if
|
118
119
|
files.size != files.uniq.size
|
120
|
+
|
119
121
|
if cli[:input_type].to_s =~ /_paired$/
|
120
122
|
if files.size.odd?
|
121
123
|
raise 'Odd number of files incompatible with input type.'
|
122
124
|
end
|
125
|
+
|
123
126
|
files = Hash[*files].to_a
|
124
127
|
else
|
125
|
-
files = files.map{ |i| [i] }
|
128
|
+
files = files.map { |i| [i] }
|
126
129
|
end
|
127
130
|
if files.size > 1 && !cli[:dataset].nil?
|
128
131
|
raise 'The dataset name (-D) can only be specified with one input file.'
|
@@ -137,6 +140,7 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
|
|
137
140
|
ref_file = file.is_a?(Array) ? file.first : file
|
138
141
|
m = cli[:regexp].match(ref_file)
|
139
142
|
raise "Cannot extract name from file: #{ref_file}" if m.nil? or m[1].nil?
|
143
|
+
|
140
144
|
name = cli[:prefix].to_s + m[1].miga_name
|
141
145
|
end
|
142
146
|
if Dataset.exist?(p, name)
|
@@ -151,12 +155,14 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
|
|
151
155
|
cli.say "o #{name}"
|
152
156
|
d = Dataset.new(p, name, cli[:ref])
|
153
157
|
raise "Unexpected: Couldn't create dataset: #{name}." if d.nil?
|
158
|
+
|
154
159
|
d
|
155
160
|
end
|
156
161
|
|
157
162
|
def copy_file_to_project(file, file_type, d, p)
|
158
163
|
return if file.nil?
|
159
|
-
|
164
|
+
|
165
|
+
r_dir = Dataset.RESULT_DIRS[file_type[1]]
|
160
166
|
r_path = File.expand_path("data/#{r_dir}/#{d.name}", p.path)
|
161
167
|
file_type[2].each_with_index do |ext, i|
|
162
168
|
gz = file[i] =~ /\.gz/ ? '.gz' : ''
|
@@ -4,15 +4,14 @@
|
|
4
4
|
require 'miga/cli/action'
|
5
5
|
|
6
6
|
class MiGA::Cli::Action::AddResult < MiGA::Cli::Action
|
7
|
-
|
8
7
|
def parse_cli
|
9
|
-
cli.defaults = {force: false}
|
8
|
+
cli.defaults = { force: false }
|
10
9
|
cli.parse do |opt|
|
11
10
|
cli.opt_object(opt, [:project, :dataset_opt, :result])
|
12
11
|
opt.on(
|
13
12
|
'-f', '--force',
|
14
13
|
'Force re-indexing of the result even if it\'s already registered'
|
15
|
-
|
14
|
+
) { |v| cli[:force] = v }
|
16
15
|
end
|
17
16
|
end
|
18
17
|
|
@@ -5,7 +5,6 @@ require 'miga/cli/action'
|
|
5
5
|
require 'rubygems/package'
|
6
6
|
|
7
7
|
class MiGA::Cli::Action::Archive < MiGA::Cli::Action
|
8
|
-
|
9
8
|
def parse_cli
|
10
9
|
cli.parse do |opt|
|
11
10
|
opt.on(
|
@@ -26,6 +25,7 @@ class MiGA::Cli::Action::Archive < MiGA::Cli::Action
|
|
26
25
|
unless cli[:tarball] =~ /\.tar\.gz$/
|
27
26
|
raise 'The tarball path (-o) must have .tar.gz extension'
|
28
27
|
end
|
28
|
+
|
29
29
|
cli[:folder] ||= cli.load_project.name
|
30
30
|
ds = cli.load_and_filter_datasets
|
31
31
|
|
@@ -95,5 +95,4 @@ class MiGA::Cli::Action::Archive < MiGA::Cli::Action
|
|
95
95
|
in_tar = File.join(cli[:folder], rel_path)
|
96
96
|
tar.add_file_simple(in_tar, 0666, string.size) { |fh| fh.write(string) }
|
97
97
|
end
|
98
|
-
|
99
98
|
end
|
@@ -51,24 +51,24 @@ class MiGA::Cli::Action::ClassifyWf < MiGA::Cli::Action
|
|
51
51
|
ref_db = reference_db
|
52
52
|
p_metadata = Hash[
|
53
53
|
%w[project_stats haai_distances aai_distances ani_distances clade_finding]
|
54
|
-
|
54
|
+
.map { |i| ["run_#{i}", false] }
|
55
55
|
]
|
56
56
|
p_metadata[:ref_project] = ref_db.path
|
57
57
|
p_metadata[:tax_pvalue] = cli[:pvalue]
|
58
58
|
p = create_project(:assembly, p_metadata,
|
59
|
-
|
59
|
+
run_ssu: false, run_mytaxa_scan: false, run_distances: false)
|
60
60
|
# Run
|
61
61
|
run_daemon
|
62
62
|
summarize(%w[cds assembly essential_genes]) if cli[:summaries]
|
63
63
|
summarize(['taxonomy'])
|
64
64
|
cli.say "Summary: classification"
|
65
65
|
call_cli([
|
66
|
-
|
67
|
-
|
68
|
-
|
66
|
+
'ls', '-P', cli[:outdir], '-m', 'tax', '--tab',
|
67
|
+
'-o', File.expand_path('classification.tsv', cli[:outdir])
|
68
|
+
])
|
69
69
|
cleanup
|
70
70
|
end
|
71
|
-
|
71
|
+
|
72
72
|
private
|
73
73
|
|
74
74
|
def reference_db
|
@@ -85,12 +85,14 @@ class MiGA::Cli::Action::ClassifyWf < MiGA::Cli::Action
|
|
85
85
|
unless File.size? lm_f
|
86
86
|
raise 'No locally listed databases, call "miga get_db" first'
|
87
87
|
end
|
88
|
+
|
88
89
|
cli[:database] = MiGA::Json.parse(lm_f)[:databases].keys.first
|
89
90
|
end
|
90
91
|
ref_db_path = File.expand_path(cli[:database].to_s, cli[:local])
|
91
92
|
end
|
92
93
|
ref_db = MiGA::Project.load(ref_db_path)
|
93
94
|
raise "Cannot locate reference database: #{ref_db_path}" if ref_db.nil?
|
95
|
+
|
94
96
|
cli.say "Reference database: #{ref_db.name}"
|
95
97
|
ref_db
|
96
98
|
end
|
@@ -5,17 +5,17 @@ require 'miga/cli/action'
|
|
5
5
|
require 'miga/daemon'
|
6
6
|
|
7
7
|
class MiGA::Cli::Action::Daemon < MiGA::Cli::Action
|
8
|
-
|
9
8
|
def parse_cli
|
10
|
-
cli.defaults = {daemon_opts: []}
|
9
|
+
cli.defaults = { daemon_opts: [] }
|
11
10
|
cli.expect_operation = true
|
12
11
|
cli.parse do |opt|
|
13
12
|
opt.separator 'Available operations:'
|
14
|
-
{
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
13
|
+
{
|
14
|
+
start: 'Start an instance of the application',
|
15
|
+
stop: 'Start an instance of the application',
|
16
|
+
run: 'Start the application and stay on top',
|
17
|
+
status: 'Show status (PID) of application instances'
|
18
|
+
}.each { |k, v| opt.separator sprintf(' %*s%s', -33, k, v) }
|
19
19
|
opt.separator ''
|
20
20
|
|
21
21
|
opt.separator 'MiGA options:'
|
data/lib/miga/cli/action/date.rb
CHANGED
@@ -44,12 +44,13 @@ class MiGA::Cli::Action::DerepWf < MiGA::Cli::Action
|
|
44
44
|
def perform
|
45
45
|
# Input data
|
46
46
|
p = create_project(:assembly,
|
47
|
-
|
48
|
-
|
49
|
-
|
47
|
+
{ run_project_stats: false, run_clades: false,
|
48
|
+
gsp_metric: cli[:metric], :"gsp_#{cli[:metric]}" => cli[:threshold] },
|
49
|
+
{ run_mytaxa_scan: false, run_ssu: false })
|
50
50
|
unless cli[:threshold] >= 0.0 && cli[:threshold] <= 100.0
|
51
51
|
raise "The threshold of identity must be in the range [0,100]"
|
52
52
|
end
|
53
|
+
|
53
54
|
# Run
|
54
55
|
run_daemon
|
55
56
|
dereplicate(p)
|
@@ -68,7 +69,7 @@ class MiGA::Cli::Action::DerepWf < MiGA::Cli::Action
|
|
68
69
|
File.open(File.expand_path('genomospecies.tsv', cli[:outdir]), 'w') do |fh|
|
69
70
|
fh.puts "Clade\tRepresentative\tMembers"
|
70
71
|
clades.each_with_index do |i, k|
|
71
|
-
fh.puts ["gsp_#{k+1}", rep[k], i.join(',')].join("\t")
|
72
|
+
fh.puts ["gsp_#{k + 1}", rep[k], i.join(',')].join("\t")
|
72
73
|
end
|
73
74
|
end
|
74
75
|
if cli[:collection]
|
@@ -1,15 +1,15 @@
|
|
1
1
|
# @package MiGA
|
2
2
|
# @license Artistic-2.0
|
3
3
|
|
4
|
-
require 'miga/cli/action'
|
5
|
-
require 'sqlite3'
|
4
|
+
require 'miga/cli/action/doctor/base'
|
6
5
|
|
7
6
|
class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
|
7
|
+
include MiGA::Cli::Action::Doctor::Base
|
8
8
|
|
9
9
|
def parse_cli
|
10
|
-
@@OPERATIONS.keys.
|
10
|
+
cli.defaults = Hash[@@OPERATIONS.keys.map { |i| [i, true] }]
|
11
11
|
cli.parse do |opt|
|
12
|
-
operation_n = Hash[@@OPERATIONS.map { |k,v| [v[0], k] }]
|
12
|
+
operation_n = Hash[@@OPERATIONS.map { |k, v| [v[0], k] }]
|
13
13
|
cli.opt_object(opt, [:project])
|
14
14
|
opt.on(
|
15
15
|
'--ignore TASK1,TASK2', Array,
|
@@ -21,20 +21,12 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
|
|
21
21
|
'Perform only the specified task (see --ignore)'
|
22
22
|
) do |v|
|
23
23
|
op_k = @@OPERATIONS.find { |_, i| i[0] == v.downcase }.first
|
24
|
-
@@OPERATIONS.
|
24
|
+
@@OPERATIONS.each_key { |i| cli[i] = false }
|
25
25
|
cli[op_k] = true
|
26
26
|
end
|
27
27
|
end
|
28
28
|
end
|
29
29
|
|
30
|
-
def check_sqlite3_database(db_file, metric)
|
31
|
-
SQLite3::Database.new(db_file) do |conn|
|
32
|
-
conn.execute("select count(*) from #{metric}").first
|
33
|
-
end
|
34
|
-
rescue SQLite3::SQLException
|
35
|
-
yield
|
36
|
-
end
|
37
|
-
|
38
30
|
def perform
|
39
31
|
p = cli.load_project
|
40
32
|
@@OPERATIONS.keys.each do |k|
|
@@ -43,7 +35,8 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
|
|
43
35
|
end
|
44
36
|
|
45
37
|
@@OPERATIONS = {
|
46
|
-
|
38
|
+
status: ['status', 'Update metadata status of all datasets'],
|
39
|
+
db: ['databases', 'Check integrity of database files'],
|
47
40
|
dist: ['distances', 'Check distance summary tables'],
|
48
41
|
files: ['files', 'Check for outdated files'],
|
49
42
|
cds: ['cds', 'Check for gzipped genes and proteins'],
|
@@ -52,36 +45,54 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
|
|
52
45
|
start: ['start', 'Check for lingering .start files'],
|
53
46
|
tax: ['taxonomy', 'Check for taxonomy consistency (not yet implemented)']
|
54
47
|
}
|
48
|
+
|
55
49
|
class << self
|
50
|
+
##
|
51
|
+
# All supported operations
|
56
52
|
def OPERATIONS
|
57
53
|
@@OPERATIONS
|
58
54
|
end
|
59
55
|
end
|
60
56
|
|
57
|
+
##
|
58
|
+
# Perform status operation with MiGA::Cli +cli+
|
59
|
+
def check_status(cli)
|
60
|
+
cli.say 'Updating metadata status'
|
61
|
+
n, k = cli.load_project.dataset_names.size, 0
|
62
|
+
cli.load_project.each_dataset do |d|
|
63
|
+
cli.advance('Datasets:', k += 1, n, false)
|
64
|
+
d.recalculate_status
|
65
|
+
end
|
66
|
+
cli.say
|
67
|
+
end
|
68
|
+
|
69
|
+
##
|
70
|
+
# Perform databases operation with MiGA::Cli +cli+
|
61
71
|
def check_db(cli)
|
62
|
-
cli.say 'Checking databases
|
72
|
+
cli.say 'Checking integrity of databases'
|
73
|
+
n, k = cli.load_project.dataset_names.size, 0
|
63
74
|
cli.load_project.each_dataset do |d|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
end # each |db_key, metric|
|
76
|
-
end # each |r_key|
|
77
|
-
end # each |d|
|
75
|
+
cli.advance('Datasets:', k += 1, n, false)
|
76
|
+
each_database_file(d) do |db_file, metric, result|
|
77
|
+
check_sqlite3_database(db_file, metric) do
|
78
|
+
cli.say(" > Removing malformed database from #{d.name}:#{result} ")
|
79
|
+
File.unlink(db_file)
|
80
|
+
r = d.result(result) or next
|
81
|
+
[r.path(:done), r.path].each { |f| File.unlink(f) if File.exist?(f) }
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
cli.say
|
78
86
|
end
|
79
87
|
|
88
|
+
##
|
89
|
+
# Perform distances operation with MiGA::Cli +cli+
|
80
90
|
def check_dist(cli)
|
81
91
|
p = cli.load_project
|
82
|
-
[
|
92
|
+
%i[ani aai].each do |dist|
|
83
93
|
res = p.result("#{dist}_distances")
|
84
94
|
next if res.nil?
|
95
|
+
|
85
96
|
cli.say "Checking #{dist} table for consistent datasets"
|
86
97
|
notok, fix = check_dist_eval(cli, p, res)
|
87
98
|
check_dist_fix(cli, p, fix)
|
@@ -89,10 +100,13 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
|
|
89
100
|
end
|
90
101
|
end
|
91
102
|
|
103
|
+
##
|
104
|
+
# Perform files operation with MiGA::Cli +cli+
|
92
105
|
def check_files(cli)
|
93
106
|
cli.say 'Looking for outdated files in results'
|
94
|
-
|
95
|
-
|
107
|
+
n, k = cli.load_project.dataset_names.size, 0
|
108
|
+
cli.load_project.each_dataset do |d|
|
109
|
+
cli.advance('Datasets:', k += 1, n, false)
|
96
110
|
d.each_result do |r_k, r|
|
97
111
|
ok = true
|
98
112
|
r.each_file do |_f_sym, _f_rel, f_abs|
|
@@ -102,23 +116,28 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
|
|
102
116
|
end
|
103
117
|
end
|
104
118
|
unless ok
|
105
|
-
cli.say " > Registering again #{d.name}:#{r_k}"
|
119
|
+
cli.say " > Registering again #{d.name}:#{r_k} "
|
106
120
|
d.add_result(r_k, true, force: true)
|
107
121
|
sr = d.result(:stats) and sr.remove!
|
108
122
|
end
|
109
123
|
end
|
110
124
|
end
|
125
|
+
cli.say
|
111
126
|
end
|
112
127
|
|
128
|
+
##
|
129
|
+
# Perform cds operation with MiGA::Cli +cli+
|
113
130
|
def check_cds(cli)
|
114
131
|
cli.say 'Looking for unzipped genes or proteins'
|
132
|
+
n, k = cli.load_project.dataset_names.size, 0
|
115
133
|
cli.load_project.each_dataset do |d|
|
134
|
+
cli.advance('Datasets:', k += 1, n, false)
|
116
135
|
res = d.result(:cds) or next
|
117
136
|
changed = false
|
118
|
-
[
|
137
|
+
%i[genes proteins gff3 gff2 tab].each do |f|
|
119
138
|
file = res.file_path(f) or next
|
120
139
|
if file !~ /\.gz/
|
121
|
-
cli.say " > Gzipping #{d.name} #{f}"
|
140
|
+
cli.say " > Gzipping #{d.name} #{f} "
|
122
141
|
cmdo = `gzip -9 '#{file}'`.chomp
|
123
142
|
warn(cmdo) unless cmdo.empty?
|
124
143
|
changed = true
|
@@ -129,13 +148,17 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
|
|
129
148
|
sr = d.result(:stats) and sr.remove!
|
130
149
|
end
|
131
150
|
end
|
151
|
+
cli.say
|
132
152
|
end
|
133
153
|
|
154
|
+
##
|
155
|
+
# Perform essential-genes operation with MiGA::Cli +cli+
|
134
156
|
def check_ess(cli)
|
135
157
|
cli.say 'Looking for unarchived essential genes'
|
136
158
|
cli.load_project.each_dataset do |d|
|
137
159
|
res = d.result(:essential_genes)
|
138
160
|
next if res.nil?
|
161
|
+
|
139
162
|
dir = res.file_path(:collection)
|
140
163
|
if dir.nil?
|
141
164
|
cli.say " > Removing #{d.name}:essential_genes"
|
@@ -144,17 +167,21 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
|
|
144
167
|
next
|
145
168
|
end
|
146
169
|
next if Dir["#{dir}/*.faa"].empty?
|
170
|
+
|
147
171
|
cli.say " > Fixing #{d.name}"
|
148
172
|
cmdo = `cd '#{dir}' && tar -zcf proteins.tar.gz *.faa && rm *.faa`.chomp
|
149
173
|
warn(cmdo) unless cmdo.empty?
|
150
174
|
end
|
151
175
|
end
|
152
176
|
|
177
|
+
##
|
178
|
+
# Perform mytaxa-scan operation with MiGA::Cli +cli+
|
153
179
|
def check_mts(cli)
|
154
180
|
cli.say 'Looking for unarchived MyTaxa Scan runs'
|
155
181
|
cli.load_project.each_dataset do |d|
|
156
182
|
res = d.result(:mytaxa_scan)
|
157
183
|
next if res.nil?
|
184
|
+
|
158
185
|
dir = res.file_path(:regions)
|
159
186
|
fix = false
|
160
187
|
unless dir.nil?
|
@@ -166,8 +193,8 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
|
|
166
193
|
end
|
167
194
|
fix = true
|
168
195
|
end
|
169
|
-
%
|
170
|
-
file = res.file_path(ext
|
196
|
+
%i[blast mytaxain wintax gene_ids region_ids].each do |ext|
|
197
|
+
file = res.file_path(ext)
|
171
198
|
unless file.nil?
|
172
199
|
FileUtils.rm(file) if File.exist? file
|
173
200
|
fix = true
|
@@ -180,6 +207,8 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
|
|
180
207
|
end
|
181
208
|
end
|
182
209
|
|
210
|
+
##
|
211
|
+
# Perform start operation with MiGA::Cli +cli+
|
183
212
|
def check_start(cli)
|
184
213
|
cli.say 'Looking for legacy .start files lingering'
|
185
214
|
cli.load_project.each_dataset do |d|
|
@@ -192,52 +221,12 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
|
|
192
221
|
end
|
193
222
|
end
|
194
223
|
|
224
|
+
##
|
225
|
+
# Perform taxonomy operation with MiGA::Cli +cli+
|
195
226
|
def check_tax(cli)
|
196
|
-
#cli.say 'o Checking for taxonomy/distances consistency'
|
227
|
+
# cli.say 'o Checking for taxonomy/distances consistency'
|
197
228
|
# TODO: Find 95%ANI clusters with entries from different species
|
198
|
-
|
199
|
-
|
200
|
-
private
|
201
|
-
|
202
|
-
def check_dist_eval(cli, p, res)
|
203
|
-
notok = {}
|
204
|
-
fix = {}
|
205
|
-
Zlib::GzipReader.open(res.file_path(:matrix)) do |fh|
|
206
|
-
lineno = 0
|
207
|
-
fh.each_line do |ln|
|
208
|
-
next if (lineno += 1) == 1
|
209
|
-
r = ln.split("\t")
|
210
|
-
next unless [1, 2].map { |i| p.dataset(r[i]).nil? }.any?
|
211
|
-
[1, 2].each do |i|
|
212
|
-
if p.dataset(r[i]).nil?
|
213
|
-
notok[r[i]] = true
|
214
|
-
else
|
215
|
-
fix[r[i]] = true
|
216
|
-
end
|
217
|
-
end
|
218
|
-
end
|
219
|
-
end
|
220
|
-
[notok, fix]
|
221
|
-
end
|
222
|
-
|
223
|
-
def check_dist_fix(cli, p, fix)
|
224
|
-
return if fix.empty?
|
225
|
-
cli.say("- Fixing #{fix.size} datasets")
|
226
|
-
fix.keys.each do |d_n|
|
227
|
-
cli.say " > Fixing #{d_n}."
|
228
|
-
p.dataset(d_n).cleanup_distances!
|
229
|
-
end
|
230
|
-
end
|
231
|
-
|
232
|
-
def check_dist_recompute(cli, p, notok)
|
233
|
-
return if notok.empty?
|
234
|
-
cli.say '- Unregistered datasets detected: '
|
235
|
-
if notok.size <= 5
|
236
|
-
notok.keys.each { |i| cli.say " > #{i}" }
|
237
|
-
else
|
238
|
-
cli.say " > #{notok.size}, including #{notok.keys.first}"
|
239
|
-
end
|
240
|
-
cli.say '- Removing tables, recompute'
|
241
|
-
res.remove!
|
229
|
+
# TODO: Find different 95%ANI clusters with genomes from the same species
|
230
|
+
# TODO: Find AAI values too high or too low for each LCA rank
|
242
231
|
end
|
243
232
|
end
|