miga-base 0.7.3.0 → 0.7.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/miga/cli.rb +10 -8
- data/lib/miga/cli/action.rb +2 -3
- data/lib/miga/cli/action/about.rb +5 -6
- data/lib/miga/cli/action/add.rb +18 -12
- data/lib/miga/cli/action/add_result.rb +2 -3
- data/lib/miga/cli/action/archive.rb +1 -2
- data/lib/miga/cli/action/classify_wf.rb +8 -6
- data/lib/miga/cli/action/console.rb +0 -1
- data/lib/miga/cli/action/daemon.rb +7 -7
- data/lib/miga/cli/action/date.rb +0 -1
- data/lib/miga/cli/action/derep_wf.rb +5 -4
- data/lib/miga/cli/action/doctor.rb +71 -82
- data/lib/miga/cli/action/doctor/base.rb +102 -0
- data/lib/miga/cli/action/edit.rb +14 -2
- data/lib/miga/cli/action/files.rb +8 -8
- data/lib/miga/cli/action/find.rb +5 -6
- data/lib/miga/cli/action/generic.rb +7 -7
- data/lib/miga/cli/action/get.rb +20 -17
- data/lib/miga/cli/action/get_db.rb +8 -2
- data/lib/miga/cli/action/index_wf.rb +1 -1
- data/lib/miga/cli/action/init.rb +53 -41
- data/lib/miga/cli/action/init/daemon_helper.rb +65 -43
- data/lib/miga/cli/action/lair.rb +7 -7
- data/lib/miga/cli/action/ln.rb +6 -6
- data/lib/miga/cli/action/ls.rb +1 -2
- data/lib/miga/cli/action/ncbi_get.rb +11 -3
- data/lib/miga/cli/action/new.rb +4 -4
- data/lib/miga/cli/action/next_step.rb +0 -1
- data/lib/miga/cli/action/preproc_wf.rb +3 -3
- data/lib/miga/cli/action/quality_wf.rb +1 -1
- data/lib/miga/cli/action/rm.rb +2 -3
- data/lib/miga/cli/action/run.rb +8 -8
- data/lib/miga/cli/action/stats.rb +8 -4
- data/lib/miga/cli/action/summary.rb +7 -6
- data/lib/miga/cli/action/tax_dist.rb +8 -4
- data/lib/miga/cli/action/tax_index.rb +3 -4
- data/lib/miga/cli/action/tax_set.rb +7 -6
- data/lib/miga/cli/action/tax_test.rb +6 -5
- data/lib/miga/cli/action/wf.rb +21 -19
- data/lib/miga/cli/base.rb +34 -32
- data/lib/miga/cli/objects_helper.rb +27 -18
- data/lib/miga/cli/opt_helper.rb +3 -2
- data/lib/miga/common.rb +2 -5
- data/lib/miga/common/base.rb +15 -16
- data/lib/miga/common/format.rb +8 -5
- data/lib/miga/common/hooks.rb +1 -4
- data/lib/miga/common/path.rb +4 -9
- data/lib/miga/common/with_daemon.rb +6 -3
- data/lib/miga/common/with_daemon_class.rb +3 -2
- data/lib/miga/common/with_result.rb +2 -1
- data/lib/miga/daemon.rb +93 -44
- data/lib/miga/daemon/base.rb +30 -11
- data/lib/miga/dataset.rb +47 -37
- data/lib/miga/dataset/base.rb +52 -37
- data/lib/miga/dataset/hooks.rb +3 -4
- data/lib/miga/dataset/result.rb +17 -1
- data/lib/miga/dataset/status.rb +6 -5
- data/lib/miga/json.rb +5 -7
- data/lib/miga/lair.rb +4 -0
- data/lib/miga/metadata.rb +4 -3
- data/lib/miga/project.rb +29 -20
- data/lib/miga/project/base.rb +52 -37
- data/lib/miga/project/dataset.rb +33 -26
- data/lib/miga/project/hooks.rb +0 -3
- data/lib/miga/project/result.rb +14 -5
- data/lib/miga/remote_dataset.rb +85 -72
- data/lib/miga/remote_dataset/base.rb +11 -13
- data/lib/miga/remote_dataset/download.rb +34 -12
- data/lib/miga/result.rb +34 -25
- data/lib/miga/result/base.rb +0 -2
- data/lib/miga/result/dates.rb +1 -3
- data/lib/miga/result/source.rb +15 -16
- data/lib/miga/result/stats.rb +37 -27
- data/lib/miga/tax_dist.rb +6 -4
- data/lib/miga/tax_index.rb +17 -17
- data/lib/miga/taxonomy.rb +6 -1
- data/lib/miga/taxonomy/base.rb +19 -15
- data/lib/miga/version.rb +19 -16
- data/scripts/project_stats.bash +3 -0
- data/scripts/stats.bash +1 -1
- data/test/common_test.rb +3 -11
- data/test/daemon_helper.rb +38 -0
- data/test/daemon_test.rb +91 -99
- data/test/dataset_test.rb +63 -59
- data/test/format_test.rb +3 -11
- data/test/hook_test.rb +50 -55
- data/test/json_test.rb +7 -8
- data/test/lair_test.rb +22 -28
- data/test/metadata_test.rb +6 -14
- data/test/project_test.rb +33 -40
- data/test/remote_dataset_test.rb +26 -32
- data/test/result_stats_test.rb +17 -27
- data/test/result_test.rb +41 -34
- data/test/tax_dist_test.rb +2 -4
- data/test/tax_index_test.rb +4 -10
- data/test/taxonomy_test.rb +7 -9
- data/test/test_helper.rb +42 -1
- data/test/with_daemon_test.rb +14 -22
- data/utils/adapters.fa +13 -0
- data/utils/cleanup-databases.rb +6 -5
- data/utils/distance/base.rb +0 -1
- data/utils/distance/commands.rb +19 -12
- data/utils/distance/database.rb +25 -21
- data/utils/distance/pipeline.rb +16 -10
- data/utils/distance/runner.rb +19 -13
- data/utils/distance/temporal.rb +7 -4
- data/utils/distances.rb +1 -1
- data/utils/domain-ess-genes.rb +7 -7
- data/utils/index_metadata.rb +5 -4
- data/utils/mytaxa_scan.rb +18 -16
- data/utils/representatives.rb +5 -4
- data/utils/requirements.txt +1 -1
- data/utils/subclade/base.rb +0 -1
- data/utils/subclade/pipeline.rb +7 -6
- data/utils/subclade/runner.rb +9 -9
- data/utils/subclade/temporal.rb +0 -2
- data/utils/subclades-compile.rb +39 -37
- data/utils/subclades.rb +1 -1
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3682f50e3efe936ce751cd83cc7945edddb8e1c3ea6e654c4d54f8ea79efbfcb
|
4
|
+
data.tar.gz: a5bc821d8f1b6f55baf495eea28e8783c86c61ffaca2d486e4589b818a60038f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 23e986949f97ae31498b7310eba666f0fc4b5f3e4ab9d38a135b2934db901449dca70ce74830e4353bb60f2196ce2c195b1bfb20400f884494e9766e58ea5214
|
7
|
+
data.tar.gz: 3857008111b8a65b1fbf09442eb3a657789ebf964769c7805485be57298141c363296b7d2491ef3379726344f07892211d10d4c72fc74a8095bc0eaf00d4e873
|
data/lib/miga/cli.rb
CHANGED
@@ -7,7 +7,6 @@ require 'optparse'
|
|
7
7
|
##
|
8
8
|
# MiGA Command Line Interface API.
|
9
9
|
class MiGA::Cli < MiGA::MiGA
|
10
|
-
|
11
10
|
require 'miga/cli/base'
|
12
11
|
require 'miga/cli/opt_helper'
|
13
12
|
require 'miga/cli/objects_helper'
|
@@ -61,7 +60,7 @@ class MiGA::Cli < MiGA::MiGA
|
|
61
60
|
|
62
61
|
def initialize(argv)
|
63
62
|
@data = {}
|
64
|
-
@defaults = {verbose: false, tabular: false}
|
63
|
+
@defaults = { verbose: false, tabular: false }
|
65
64
|
@opt_common = true
|
66
65
|
@objects = {}
|
67
66
|
if argv[0].nil? or argv[0].to_s[0] == '-'
|
@@ -106,6 +105,7 @@ class MiGA::Cli < MiGA::MiGA
|
|
106
105
|
# otherwise it's sent to +$stderr+
|
107
106
|
def say(*par)
|
108
107
|
return unless self[:verbose]
|
108
|
+
|
109
109
|
super(*par)
|
110
110
|
end
|
111
111
|
|
@@ -116,16 +116,17 @@ class MiGA::Cli < MiGA::MiGA
|
|
116
116
|
# The report goes to $stderr iff --verborse
|
117
117
|
def advance(step, n = 0, total = nil, bin = true)
|
118
118
|
return unless self[:verbose]
|
119
|
+
|
119
120
|
adv = total.nil? ? (n == 0 ? '' : num_suffix(n, bin)) :
|
120
121
|
('%.1f%% (%s/%s)' % [100.0 * n / total,
|
121
|
-
|
122
|
+
num_suffix(n, bin), num_suffix(total, bin)])
|
122
123
|
$stderr.print("[%s] %s %s \r" % [Time.now, step, adv])
|
123
124
|
end
|
124
125
|
|
125
126
|
def num_suffix(n, bin = false)
|
126
127
|
p = ''
|
127
|
-
{T: 4, G: 3, M: 2, K: 1}.each do |k,x|
|
128
|
-
v = (bin ? 1024 : 1e3)
|
128
|
+
{ T: 4, G: 3, M: 2, K: 1 }.each do |k, x|
|
129
|
+
v = (bin ? 1024 : 1e3)**x
|
129
130
|
if n > v
|
130
131
|
n = '%.1f' % (n / v)
|
131
132
|
p = k
|
@@ -160,7 +161,7 @@ class MiGA::Cli < MiGA::MiGA
|
|
160
161
|
##
|
161
162
|
# Set default values in the Hash +hsh+
|
162
163
|
def defaults=(hsh)
|
163
|
-
hsh.each{ |k,v| @defaults[k] = v }
|
164
|
+
hsh.each { |k, v| @defaults[k] = v }
|
164
165
|
end
|
165
166
|
|
166
167
|
##
|
@@ -192,6 +193,7 @@ class MiGA::Cli < MiGA::MiGA
|
|
192
193
|
def launch
|
193
194
|
begin
|
194
195
|
raise "See `miga -h`" if action.nil?
|
196
|
+
|
195
197
|
action.launch
|
196
198
|
rescue => err
|
197
199
|
$stderr.puts "Exception: #{err}"
|
@@ -222,8 +224,8 @@ class MiGA::Cli < MiGA::MiGA
|
|
222
224
|
# +par+, a Hash with object names as keys and parameter flag as values.
|
223
225
|
# If missing, raise an error with message +msg+
|
224
226
|
def ensure_par(req, msg = '%<name>s is mandatory: please provide %<flag>s')
|
225
|
-
req.each do |k,v|
|
226
|
-
raise (msg % {name: k, flag: v}) if self[k].nil?
|
227
|
+
req.each do |k, v|
|
228
|
+
raise (msg % { name: k, flag: v }) if self[k].nil?
|
227
229
|
end
|
228
230
|
end
|
229
231
|
|
data/lib/miga/cli/action.rb
CHANGED
@@ -8,7 +8,6 @@ require 'miga/cli'
|
|
8
8
|
# by MiGA::Cli::Action::* classes. Do not attempt creating directly with +new+,
|
9
9
|
# use instead the MiGA::Cli::Action.load interface.
|
10
10
|
class MiGA::Cli::Action < MiGA::MiGA
|
11
|
-
|
12
11
|
class << self
|
13
12
|
def load(task, cli)
|
14
13
|
require "miga/cli/action/#{task}"
|
@@ -55,8 +54,8 @@ class MiGA::Cli::Action < MiGA::MiGA
|
|
55
54
|
##
|
56
55
|
# Name of the action, as referred to by the CLI
|
57
56
|
def name
|
58
|
-
camel = self.class.to_s.gsub(/.*::/,'')
|
59
|
-
camel.gsub(/(\S)([A-Z])/,'\1_\2').downcase
|
57
|
+
camel = self.class.to_s.gsub(/.*::/, '')
|
58
|
+
camel.gsub(/(\S)([A-Z])/, '\1_\2').downcase
|
60
59
|
end
|
61
60
|
|
62
61
|
##
|
@@ -4,23 +4,22 @@
|
|
4
4
|
require 'miga/cli/action'
|
5
5
|
|
6
6
|
class MiGA::Cli::Action::About < MiGA::Cli::Action
|
7
|
-
|
8
7
|
def parse_cli
|
9
|
-
cli.defaults = {info: false, processing: false, tabular: false}
|
8
|
+
cli.defaults = { info: false, processing: false, tabular: false }
|
10
9
|
cli.parse do |opt|
|
11
10
|
cli.opt_object(opt, [:project])
|
12
11
|
opt.on(
|
13
12
|
'-p', '--processing',
|
14
13
|
'Print information on processing advance'
|
15
|
-
|
14
|
+
) { |v| cli[:processing] = v }
|
16
15
|
opt.on(
|
17
16
|
'-m', '--metadata STRING',
|
18
17
|
'Print name and metadata field only'
|
19
|
-
|
18
|
+
) { |v| cli[:datum] = v }
|
20
19
|
opt.on(
|
21
20
|
'--tab',
|
22
21
|
'Return a tab-delimited table'
|
23
|
-
|
22
|
+
) { |v| cli[:tabular] = v }
|
24
23
|
end
|
25
24
|
end
|
26
25
|
|
@@ -37,7 +36,7 @@ class MiGA::Cli::Action::About < MiGA::Cli::Action
|
|
37
36
|
else
|
38
37
|
cli.puts MiGA.tabulate([:key, :value], p.metadata.data.keys.map do |k|
|
39
38
|
v = p.metadata[k]
|
40
|
-
[k, k
|
39
|
+
[k, k == :datasets ? v.size : v]
|
41
40
|
end, cli[:tabular])
|
42
41
|
end
|
43
42
|
end
|
data/lib/miga/cli/action/add.rb
CHANGED
@@ -4,7 +4,6 @@
|
|
4
4
|
require 'miga/cli/action'
|
5
5
|
|
6
6
|
class MiGA::Cli::Action::Add < MiGA::Cli::Action
|
7
|
-
|
8
7
|
def parse_cli
|
9
8
|
cli.expect_files = true
|
10
9
|
cli.defaults = {
|
@@ -47,7 +46,7 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
|
|
47
46
|
opt.on(
|
48
47
|
'-i', '--input-type STRING',
|
49
48
|
'Type of input data, one of the following:',
|
50
|
-
*self.class.INPUT_TYPES.map{ |k,v| "~ #{k}: #{v[0]}" }
|
49
|
+
*self.class.INPUT_TYPES.map { |k, v| "~ #{k}: #{v[0]}" }
|
51
50
|
) { |v| cli[:input_type] = v.downcase.to_sym }
|
52
51
|
opt.on(
|
53
52
|
'--ignore-dups',
|
@@ -64,6 +63,7 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
|
|
64
63
|
files.each do |file|
|
65
64
|
d = create_dataset(file, p)
|
66
65
|
next if d.nil?
|
66
|
+
|
67
67
|
copy_file_to_project(file, file_type, d, p)
|
68
68
|
d = cli.add_metadata(d)
|
69
69
|
d.save
|
@@ -76,22 +76,22 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
|
|
76
76
|
@@INPUT_TYPES = {
|
77
77
|
raw_reads_single:
|
78
78
|
['Single raw reads in a single FastQ file',
|
79
|
-
|
79
|
+
:raw_reads, %w[.1.fastq]],
|
80
80
|
raw_reads_paired:
|
81
81
|
['Paired raw reads in two FastQ files',
|
82
|
-
|
82
|
+
:raw_reads, %w[.1.fastq .2.fastq]],
|
83
83
|
trimmed_reads_single:
|
84
84
|
['Single trimmed reads in a single FastA file',
|
85
|
-
|
85
|
+
:trimmed_fasta, %w[.SingleReads.fa]],
|
86
86
|
trimmed_reads_paired:
|
87
87
|
['Paired trimmed reads in two FastA files',
|
88
|
-
|
88
|
+
:trimmed_fasta, %w[.1.fasta .2.fasta]],
|
89
89
|
trimmed_reads_interleaved:
|
90
90
|
['Paired trimmed reads in a single FastA file',
|
91
|
-
|
91
|
+
:trimmed_fasta, %w[.CoupledReads.fa]],
|
92
92
|
assembly:
|
93
93
|
['Assembled contigs or scaffolds in FastA format',
|
94
|
-
|
94
|
+
:assembly, %w[.LargeContigs.fna]]
|
95
95
|
}
|
96
96
|
|
97
97
|
class << self
|
@@ -106,23 +106,26 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
|
|
106
106
|
files = cli.files
|
107
107
|
file_type = nil
|
108
108
|
if files.empty?
|
109
|
-
cli.ensure_par({dataset: '-D'},
|
110
|
-
|
109
|
+
cli.ensure_par({ dataset: '-D' },
|
110
|
+
'dataset is mandatory (-D) unless files are provided')
|
111
111
|
cli.ensure_type(Dataset)
|
112
112
|
files = [nil]
|
113
113
|
else
|
114
114
|
raise 'Please specify input type (-i).' if cli[:input_type].nil?
|
115
|
+
|
115
116
|
file_type = self.class.INPUT_TYPES[cli[:input_type]]
|
116
117
|
raise "Unrecognized input type: #{cli[:input_type]}." if file_type.nil?
|
117
118
|
raise 'Some files are duplicated, files must be unique.' if
|
118
119
|
files.size != files.uniq.size
|
120
|
+
|
119
121
|
if cli[:input_type].to_s =~ /_paired$/
|
120
122
|
if files.size.odd?
|
121
123
|
raise 'Odd number of files incompatible with input type.'
|
122
124
|
end
|
125
|
+
|
123
126
|
files = Hash[*files].to_a
|
124
127
|
else
|
125
|
-
files = files.map{ |i| [i] }
|
128
|
+
files = files.map { |i| [i] }
|
126
129
|
end
|
127
130
|
if files.size > 1 && !cli[:dataset].nil?
|
128
131
|
raise 'The dataset name (-D) can only be specified with one input file.'
|
@@ -137,6 +140,7 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
|
|
137
140
|
ref_file = file.is_a?(Array) ? file.first : file
|
138
141
|
m = cli[:regexp].match(ref_file)
|
139
142
|
raise "Cannot extract name from file: #{ref_file}" if m.nil? or m[1].nil?
|
143
|
+
|
140
144
|
name = cli[:prefix].to_s + m[1].miga_name
|
141
145
|
end
|
142
146
|
if Dataset.exist?(p, name)
|
@@ -151,12 +155,14 @@ class MiGA::Cli::Action::Add < MiGA::Cli::Action
|
|
151
155
|
cli.say "o #{name}"
|
152
156
|
d = Dataset.new(p, name, cli[:ref])
|
153
157
|
raise "Unexpected: Couldn't create dataset: #{name}." if d.nil?
|
158
|
+
|
154
159
|
d
|
155
160
|
end
|
156
161
|
|
157
162
|
def copy_file_to_project(file, file_type, d, p)
|
158
163
|
return if file.nil?
|
159
|
-
|
164
|
+
|
165
|
+
r_dir = Dataset.RESULT_DIRS[file_type[1]]
|
160
166
|
r_path = File.expand_path("data/#{r_dir}/#{d.name}", p.path)
|
161
167
|
file_type[2].each_with_index do |ext, i|
|
162
168
|
gz = file[i] =~ /\.gz/ ? '.gz' : ''
|
@@ -4,15 +4,14 @@
|
|
4
4
|
require 'miga/cli/action'
|
5
5
|
|
6
6
|
class MiGA::Cli::Action::AddResult < MiGA::Cli::Action
|
7
|
-
|
8
7
|
def parse_cli
|
9
|
-
cli.defaults = {force: false}
|
8
|
+
cli.defaults = { force: false }
|
10
9
|
cli.parse do |opt|
|
11
10
|
cli.opt_object(opt, [:project, :dataset_opt, :result])
|
12
11
|
opt.on(
|
13
12
|
'-f', '--force',
|
14
13
|
'Force re-indexing of the result even if it\'s already registered'
|
15
|
-
|
14
|
+
) { |v| cli[:force] = v }
|
16
15
|
end
|
17
16
|
end
|
18
17
|
|
@@ -5,7 +5,6 @@ require 'miga/cli/action'
|
|
5
5
|
require 'rubygems/package'
|
6
6
|
|
7
7
|
class MiGA::Cli::Action::Archive < MiGA::Cli::Action
|
8
|
-
|
9
8
|
def parse_cli
|
10
9
|
cli.parse do |opt|
|
11
10
|
opt.on(
|
@@ -26,6 +25,7 @@ class MiGA::Cli::Action::Archive < MiGA::Cli::Action
|
|
26
25
|
unless cli[:tarball] =~ /\.tar\.gz$/
|
27
26
|
raise 'The tarball path (-o) must have .tar.gz extension'
|
28
27
|
end
|
28
|
+
|
29
29
|
cli[:folder] ||= cli.load_project.name
|
30
30
|
ds = cli.load_and_filter_datasets
|
31
31
|
|
@@ -95,5 +95,4 @@ class MiGA::Cli::Action::Archive < MiGA::Cli::Action
|
|
95
95
|
in_tar = File.join(cli[:folder], rel_path)
|
96
96
|
tar.add_file_simple(in_tar, 0666, string.size) { |fh| fh.write(string) }
|
97
97
|
end
|
98
|
-
|
99
98
|
end
|
@@ -51,24 +51,24 @@ class MiGA::Cli::Action::ClassifyWf < MiGA::Cli::Action
|
|
51
51
|
ref_db = reference_db
|
52
52
|
p_metadata = Hash[
|
53
53
|
%w[project_stats haai_distances aai_distances ani_distances clade_finding]
|
54
|
-
|
54
|
+
.map { |i| ["run_#{i}", false] }
|
55
55
|
]
|
56
56
|
p_metadata[:ref_project] = ref_db.path
|
57
57
|
p_metadata[:tax_pvalue] = cli[:pvalue]
|
58
58
|
p = create_project(:assembly, p_metadata,
|
59
|
-
|
59
|
+
run_ssu: false, run_mytaxa_scan: false, run_distances: false)
|
60
60
|
# Run
|
61
61
|
run_daemon
|
62
62
|
summarize(%w[cds assembly essential_genes]) if cli[:summaries]
|
63
63
|
summarize(['taxonomy'])
|
64
64
|
cli.say "Summary: classification"
|
65
65
|
call_cli([
|
66
|
-
|
67
|
-
|
68
|
-
|
66
|
+
'ls', '-P', cli[:outdir], '-m', 'tax', '--tab',
|
67
|
+
'-o', File.expand_path('classification.tsv', cli[:outdir])
|
68
|
+
])
|
69
69
|
cleanup
|
70
70
|
end
|
71
|
-
|
71
|
+
|
72
72
|
private
|
73
73
|
|
74
74
|
def reference_db
|
@@ -85,12 +85,14 @@ class MiGA::Cli::Action::ClassifyWf < MiGA::Cli::Action
|
|
85
85
|
unless File.size? lm_f
|
86
86
|
raise 'No locally listed databases, call "miga get_db" first'
|
87
87
|
end
|
88
|
+
|
88
89
|
cli[:database] = MiGA::Json.parse(lm_f)[:databases].keys.first
|
89
90
|
end
|
90
91
|
ref_db_path = File.expand_path(cli[:database].to_s, cli[:local])
|
91
92
|
end
|
92
93
|
ref_db = MiGA::Project.load(ref_db_path)
|
93
94
|
raise "Cannot locate reference database: #{ref_db_path}" if ref_db.nil?
|
95
|
+
|
94
96
|
cli.say "Reference database: #{ref_db.name}"
|
95
97
|
ref_db
|
96
98
|
end
|
@@ -5,17 +5,17 @@ require 'miga/cli/action'
|
|
5
5
|
require 'miga/daemon'
|
6
6
|
|
7
7
|
class MiGA::Cli::Action::Daemon < MiGA::Cli::Action
|
8
|
-
|
9
8
|
def parse_cli
|
10
|
-
cli.defaults = {daemon_opts: []}
|
9
|
+
cli.defaults = { daemon_opts: [] }
|
11
10
|
cli.expect_operation = true
|
12
11
|
cli.parse do |opt|
|
13
12
|
opt.separator 'Available operations:'
|
14
|
-
{
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
13
|
+
{
|
14
|
+
start: 'Start an instance of the application',
|
15
|
+
stop: 'Start an instance of the application',
|
16
|
+
run: 'Start the application and stay on top',
|
17
|
+
status: 'Show status (PID) of application instances'
|
18
|
+
}.each { |k, v| opt.separator sprintf(' %*s%s', -33, k, v) }
|
19
19
|
opt.separator ''
|
20
20
|
|
21
21
|
opt.separator 'MiGA options:'
|
data/lib/miga/cli/action/date.rb
CHANGED
@@ -44,12 +44,13 @@ class MiGA::Cli::Action::DerepWf < MiGA::Cli::Action
|
|
44
44
|
def perform
|
45
45
|
# Input data
|
46
46
|
p = create_project(:assembly,
|
47
|
-
|
48
|
-
|
49
|
-
|
47
|
+
{ run_project_stats: false, run_clades: false,
|
48
|
+
gsp_metric: cli[:metric], :"gsp_#{cli[:metric]}" => cli[:threshold] },
|
49
|
+
{ run_mytaxa_scan: false, run_ssu: false })
|
50
50
|
unless cli[:threshold] >= 0.0 && cli[:threshold] <= 100.0
|
51
51
|
raise "The threshold of identity must be in the range [0,100]"
|
52
52
|
end
|
53
|
+
|
53
54
|
# Run
|
54
55
|
run_daemon
|
55
56
|
dereplicate(p)
|
@@ -68,7 +69,7 @@ class MiGA::Cli::Action::DerepWf < MiGA::Cli::Action
|
|
68
69
|
File.open(File.expand_path('genomospecies.tsv', cli[:outdir]), 'w') do |fh|
|
69
70
|
fh.puts "Clade\tRepresentative\tMembers"
|
70
71
|
clades.each_with_index do |i, k|
|
71
|
-
fh.puts ["gsp_#{k+1}", rep[k], i.join(',')].join("\t")
|
72
|
+
fh.puts ["gsp_#{k + 1}", rep[k], i.join(',')].join("\t")
|
72
73
|
end
|
73
74
|
end
|
74
75
|
if cli[:collection]
|
@@ -1,15 +1,15 @@
|
|
1
1
|
# @package MiGA
|
2
2
|
# @license Artistic-2.0
|
3
3
|
|
4
|
-
require 'miga/cli/action'
|
5
|
-
require 'sqlite3'
|
4
|
+
require 'miga/cli/action/doctor/base'
|
6
5
|
|
7
6
|
class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
|
7
|
+
include MiGA::Cli::Action::Doctor::Base
|
8
8
|
|
9
9
|
def parse_cli
|
10
|
-
@@OPERATIONS.keys.
|
10
|
+
cli.defaults = Hash[@@OPERATIONS.keys.map { |i| [i, true] }]
|
11
11
|
cli.parse do |opt|
|
12
|
-
operation_n = Hash[@@OPERATIONS.map { |k,v| [v[0], k] }]
|
12
|
+
operation_n = Hash[@@OPERATIONS.map { |k, v| [v[0], k] }]
|
13
13
|
cli.opt_object(opt, [:project])
|
14
14
|
opt.on(
|
15
15
|
'--ignore TASK1,TASK2', Array,
|
@@ -21,20 +21,12 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
|
|
21
21
|
'Perform only the specified task (see --ignore)'
|
22
22
|
) do |v|
|
23
23
|
op_k = @@OPERATIONS.find { |_, i| i[0] == v.downcase }.first
|
24
|
-
@@OPERATIONS.
|
24
|
+
@@OPERATIONS.each_key { |i| cli[i] = false }
|
25
25
|
cli[op_k] = true
|
26
26
|
end
|
27
27
|
end
|
28
28
|
end
|
29
29
|
|
30
|
-
def check_sqlite3_database(db_file, metric)
|
31
|
-
SQLite3::Database.new(db_file) do |conn|
|
32
|
-
conn.execute("select count(*) from #{metric}").first
|
33
|
-
end
|
34
|
-
rescue SQLite3::SQLException
|
35
|
-
yield
|
36
|
-
end
|
37
|
-
|
38
30
|
def perform
|
39
31
|
p = cli.load_project
|
40
32
|
@@OPERATIONS.keys.each do |k|
|
@@ -43,7 +35,8 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
|
|
43
35
|
end
|
44
36
|
|
45
37
|
@@OPERATIONS = {
|
46
|
-
|
38
|
+
status: ['status', 'Update metadata status of all datasets'],
|
39
|
+
db: ['databases', 'Check integrity of database files'],
|
47
40
|
dist: ['distances', 'Check distance summary tables'],
|
48
41
|
files: ['files', 'Check for outdated files'],
|
49
42
|
cds: ['cds', 'Check for gzipped genes and proteins'],
|
@@ -52,36 +45,54 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
|
|
52
45
|
start: ['start', 'Check for lingering .start files'],
|
53
46
|
tax: ['taxonomy', 'Check for taxonomy consistency (not yet implemented)']
|
54
47
|
}
|
48
|
+
|
55
49
|
class << self
|
50
|
+
##
|
51
|
+
# All supported operations
|
56
52
|
def OPERATIONS
|
57
53
|
@@OPERATIONS
|
58
54
|
end
|
59
55
|
end
|
60
56
|
|
57
|
+
##
|
58
|
+
# Perform status operation with MiGA::Cli +cli+
|
59
|
+
def check_status(cli)
|
60
|
+
cli.say 'Updating metadata status'
|
61
|
+
n, k = cli.load_project.dataset_names.size, 0
|
62
|
+
cli.load_project.each_dataset do |d|
|
63
|
+
cli.advance('Datasets:', k += 1, n, false)
|
64
|
+
d.recalculate_status
|
65
|
+
end
|
66
|
+
cli.say
|
67
|
+
end
|
68
|
+
|
69
|
+
##
|
70
|
+
# Perform databases operation with MiGA::Cli +cli+
|
61
71
|
def check_db(cli)
|
62
|
-
cli.say 'Checking databases
|
72
|
+
cli.say 'Checking integrity of databases'
|
73
|
+
n, k = cli.load_project.dataset_names.size, 0
|
63
74
|
cli.load_project.each_dataset do |d|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
end # each |db_key, metric|
|
76
|
-
end # each |r_key|
|
77
|
-
end # each |d|
|
75
|
+
cli.advance('Datasets:', k += 1, n, false)
|
76
|
+
each_database_file(d) do |db_file, metric, result|
|
77
|
+
check_sqlite3_database(db_file, metric) do
|
78
|
+
cli.say(" > Removing malformed database from #{d.name}:#{result} ")
|
79
|
+
File.unlink(db_file)
|
80
|
+
r = d.result(result) or next
|
81
|
+
[r.path(:done), r.path].each { |f| File.unlink(f) if File.exist?(f) }
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
cli.say
|
78
86
|
end
|
79
87
|
|
88
|
+
##
|
89
|
+
# Perform distances operation with MiGA::Cli +cli+
|
80
90
|
def check_dist(cli)
|
81
91
|
p = cli.load_project
|
82
|
-
[
|
92
|
+
%i[ani aai].each do |dist|
|
83
93
|
res = p.result("#{dist}_distances")
|
84
94
|
next if res.nil?
|
95
|
+
|
85
96
|
cli.say "Checking #{dist} table for consistent datasets"
|
86
97
|
notok, fix = check_dist_eval(cli, p, res)
|
87
98
|
check_dist_fix(cli, p, fix)
|
@@ -89,10 +100,13 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
|
|
89
100
|
end
|
90
101
|
end
|
91
102
|
|
103
|
+
##
|
104
|
+
# Perform files operation with MiGA::Cli +cli+
|
92
105
|
def check_files(cli)
|
93
106
|
cli.say 'Looking for outdated files in results'
|
94
|
-
|
95
|
-
|
107
|
+
n, k = cli.load_project.dataset_names.size, 0
|
108
|
+
cli.load_project.each_dataset do |d|
|
109
|
+
cli.advance('Datasets:', k += 1, n, false)
|
96
110
|
d.each_result do |r_k, r|
|
97
111
|
ok = true
|
98
112
|
r.each_file do |_f_sym, _f_rel, f_abs|
|
@@ -102,23 +116,28 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
|
|
102
116
|
end
|
103
117
|
end
|
104
118
|
unless ok
|
105
|
-
cli.say " > Registering again #{d.name}:#{r_k}"
|
119
|
+
cli.say " > Registering again #{d.name}:#{r_k} "
|
106
120
|
d.add_result(r_k, true, force: true)
|
107
121
|
sr = d.result(:stats) and sr.remove!
|
108
122
|
end
|
109
123
|
end
|
110
124
|
end
|
125
|
+
cli.say
|
111
126
|
end
|
112
127
|
|
128
|
+
##
|
129
|
+
# Perform cds operation with MiGA::Cli +cli+
|
113
130
|
def check_cds(cli)
|
114
131
|
cli.say 'Looking for unzipped genes or proteins'
|
132
|
+
n, k = cli.load_project.dataset_names.size, 0
|
115
133
|
cli.load_project.each_dataset do |d|
|
134
|
+
cli.advance('Datasets:', k += 1, n, false)
|
116
135
|
res = d.result(:cds) or next
|
117
136
|
changed = false
|
118
|
-
[
|
137
|
+
%i[genes proteins gff3 gff2 tab].each do |f|
|
119
138
|
file = res.file_path(f) or next
|
120
139
|
if file !~ /\.gz/
|
121
|
-
cli.say " > Gzipping #{d.name} #{f}"
|
140
|
+
cli.say " > Gzipping #{d.name} #{f} "
|
122
141
|
cmdo = `gzip -9 '#{file}'`.chomp
|
123
142
|
warn(cmdo) unless cmdo.empty?
|
124
143
|
changed = true
|
@@ -129,13 +148,17 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
|
|
129
148
|
sr = d.result(:stats) and sr.remove!
|
130
149
|
end
|
131
150
|
end
|
151
|
+
cli.say
|
132
152
|
end
|
133
153
|
|
154
|
+
##
|
155
|
+
# Perform essential-genes operation with MiGA::Cli +cli+
|
134
156
|
def check_ess(cli)
|
135
157
|
cli.say 'Looking for unarchived essential genes'
|
136
158
|
cli.load_project.each_dataset do |d|
|
137
159
|
res = d.result(:essential_genes)
|
138
160
|
next if res.nil?
|
161
|
+
|
139
162
|
dir = res.file_path(:collection)
|
140
163
|
if dir.nil?
|
141
164
|
cli.say " > Removing #{d.name}:essential_genes"
|
@@ -144,17 +167,21 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
|
|
144
167
|
next
|
145
168
|
end
|
146
169
|
next if Dir["#{dir}/*.faa"].empty?
|
170
|
+
|
147
171
|
cli.say " > Fixing #{d.name}"
|
148
172
|
cmdo = `cd '#{dir}' && tar -zcf proteins.tar.gz *.faa && rm *.faa`.chomp
|
149
173
|
warn(cmdo) unless cmdo.empty?
|
150
174
|
end
|
151
175
|
end
|
152
176
|
|
177
|
+
##
|
178
|
+
# Perform mytaxa-scan operation with MiGA::Cli +cli+
|
153
179
|
def check_mts(cli)
|
154
180
|
cli.say 'Looking for unarchived MyTaxa Scan runs'
|
155
181
|
cli.load_project.each_dataset do |d|
|
156
182
|
res = d.result(:mytaxa_scan)
|
157
183
|
next if res.nil?
|
184
|
+
|
158
185
|
dir = res.file_path(:regions)
|
159
186
|
fix = false
|
160
187
|
unless dir.nil?
|
@@ -166,8 +193,8 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
|
|
166
193
|
end
|
167
194
|
fix = true
|
168
195
|
end
|
169
|
-
%
|
170
|
-
file = res.file_path(ext
|
196
|
+
%i[blast mytaxain wintax gene_ids region_ids].each do |ext|
|
197
|
+
file = res.file_path(ext)
|
171
198
|
unless file.nil?
|
172
199
|
FileUtils.rm(file) if File.exist? file
|
173
200
|
fix = true
|
@@ -180,6 +207,8 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
|
|
180
207
|
end
|
181
208
|
end
|
182
209
|
|
210
|
+
##
|
211
|
+
# Perform start operation with MiGA::Cli +cli+
|
183
212
|
def check_start(cli)
|
184
213
|
cli.say 'Looking for legacy .start files lingering'
|
185
214
|
cli.load_project.each_dataset do |d|
|
@@ -192,52 +221,12 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
|
|
192
221
|
end
|
193
222
|
end
|
194
223
|
|
224
|
+
##
|
225
|
+
# Perform taxonomy operation with MiGA::Cli +cli+
|
195
226
|
def check_tax(cli)
|
196
|
-
#cli.say 'o Checking for taxonomy/distances consistency'
|
227
|
+
# cli.say 'o Checking for taxonomy/distances consistency'
|
197
228
|
# TODO: Find 95%ANI clusters with entries from different species
|
198
|
-
|
199
|
-
|
200
|
-
private
|
201
|
-
|
202
|
-
def check_dist_eval(cli, p, res)
|
203
|
-
notok = {}
|
204
|
-
fix = {}
|
205
|
-
Zlib::GzipReader.open(res.file_path(:matrix)) do |fh|
|
206
|
-
lineno = 0
|
207
|
-
fh.each_line do |ln|
|
208
|
-
next if (lineno += 1) == 1
|
209
|
-
r = ln.split("\t")
|
210
|
-
next unless [1, 2].map { |i| p.dataset(r[i]).nil? }.any?
|
211
|
-
[1, 2].each do |i|
|
212
|
-
if p.dataset(r[i]).nil?
|
213
|
-
notok[r[i]] = true
|
214
|
-
else
|
215
|
-
fix[r[i]] = true
|
216
|
-
end
|
217
|
-
end
|
218
|
-
end
|
219
|
-
end
|
220
|
-
[notok, fix]
|
221
|
-
end
|
222
|
-
|
223
|
-
def check_dist_fix(cli, p, fix)
|
224
|
-
return if fix.empty?
|
225
|
-
cli.say("- Fixing #{fix.size} datasets")
|
226
|
-
fix.keys.each do |d_n|
|
227
|
-
cli.say " > Fixing #{d_n}."
|
228
|
-
p.dataset(d_n).cleanup_distances!
|
229
|
-
end
|
230
|
-
end
|
231
|
-
|
232
|
-
def check_dist_recompute(cli, p, notok)
|
233
|
-
return if notok.empty?
|
234
|
-
cli.say '- Unregistered datasets detected: '
|
235
|
-
if notok.size <= 5
|
236
|
-
notok.keys.each { |i| cli.say " > #{i}" }
|
237
|
-
else
|
238
|
-
cli.say " > #{notok.size}, including #{notok.keys.first}"
|
239
|
-
end
|
240
|
-
cli.say '- Removing tables, recompute'
|
241
|
-
res.remove!
|
229
|
+
# TODO: Find different 95%ANI clusters with genomes from the same species
|
230
|
+
# TODO: Find AAI values too high or too low for each LCA rank
|
242
231
|
end
|
243
232
|
end
|