miga-base 0.7.8.0 → 0.7.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/lib/miga/cli/action/browse.rb +213 -0
- data/lib/miga/cli/action/classify_wf.rb +3 -1
- data/lib/miga/cli/action/derep_wf.rb +4 -0
- data/lib/miga/cli/action/edit.rb +9 -6
- data/lib/miga/cli/action/quality_wf.rb +4 -1
- data/lib/miga/cli/action/stats.rb +2 -2
- data/lib/miga/cli/action/summary.rb +6 -1
- data/lib/miga/cli/action/wf.rb +11 -3
- data/lib/miga/cli/base.rb +27 -26
- data/lib/miga/common/format.rb +30 -8
- data/lib/miga/daemon.rb +6 -4
- data/lib/miga/dataset.rb +5 -1
- data/lib/miga/dataset/base.rb +3 -3
- data/lib/miga/dataset/hooks.rb +4 -4
- data/lib/miga/dataset/result.rb +18 -14
- data/lib/miga/lair.rb +1 -1
- data/lib/miga/project/dataset.rb +3 -5
- data/lib/miga/project/hooks.rb +4 -3
- data/lib/miga/remote_dataset/download.rb +2 -1
- data/lib/miga/result.rb +3 -1
- data/lib/miga/result/stats.rb +55 -23
- data/lib/miga/version.rb +2 -2
- data/scripts/cds.bash +0 -1
- data/scripts/distances.bash +6 -1
- data/test/daemon_test.rb +1 -1
- data/test/dataset_test.rb +3 -1
- data/test/project_test.rb +1 -1
- data/test/remote_dataset_test.rb +1 -1
- metadata +7 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f99f8fd530489d42672cdd96123f321725b9437ee4a81e822a07854ec924ad53
|
4
|
+
data.tar.gz: c4d6607a4b6062b45cc94985b8bc920bb25307232851b20436ee4b9cd8a8986b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '03478c56a40e948ad9eb4cb09fbedc72bac331072ccf46ce5468f1f78a08e891260684771e455b92841d8af454fcab997d7d51a68360b8337ffa13c8c2ec88a4'
|
7
|
+
data.tar.gz: e992d10e5de206a85ac425e15c7594629f86e8088ec6f59ba820c5ecaf6c8901af0142455501a1d8af03902a3ca559d0374692e4df9f41acdceaf45d5d750b1f
|
data/README.md
CHANGED
@@ -41,6 +41,7 @@ Developed and maintained by [Luis M. Rodriguez-R][lrr]. MiGA is the result of a
|
|
41
41
|
collaboration between [Kostas Lab][kostas] at the Georgia Institute of
|
42
42
|
Technology and [RDP][rdp] at Michigan State University.
|
43
43
|
|
44
|
+
See also the [complete list of contributors](manual/part1/contributors.md).
|
44
45
|
|
45
46
|
# License
|
46
47
|
|
@@ -0,0 +1,213 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'miga/cli/action'
|
4
|
+
|
5
|
+
# Action: miga browse
|
6
|
+
class MiGA::Cli::Action::Browse < MiGA::Cli::Action
|
7
|
+
def parse_cli
|
8
|
+
cli.parse do |opt|
|
9
|
+
cli.defaults = { open: true }
|
10
|
+
cli.opt_object(opt, [:project])
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def perform
|
15
|
+
p = cli.load_project
|
16
|
+
create_empty_page(p)
|
17
|
+
generate_project_page(p)
|
18
|
+
say 'Creating dataset pages'
|
19
|
+
cli.load_project.each_dataset do |d|
|
20
|
+
generate_dataset_page(p, d)
|
21
|
+
end
|
22
|
+
generate_datasets_index(p)
|
23
|
+
say "Open in your browser: #{File.join(p.path, 'index.html')}"
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
##
|
29
|
+
# Create an empty page with necessary assets for project +p+
|
30
|
+
def create_empty_page(p)
|
31
|
+
say 'Creating project page'
|
32
|
+
FileUtils.mkdir_p(browse_file(p, '.'))
|
33
|
+
%w[favicon-32.png style.css].each do |i|
|
34
|
+
FileUtils.cp(template_file(i), browse_file(p, i))
|
35
|
+
end
|
36
|
+
write_file(p, 'about.html') do
|
37
|
+
build_from_template('about.html', citation: MiGA::MiGA.CITATION)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
##
|
42
|
+
# Create landing page for project +p+
|
43
|
+
def generate_project_page(p)
|
44
|
+
# Redirect page
|
45
|
+
write_file(p, '../index.html') { build_from_template('redirect.html') }
|
46
|
+
|
47
|
+
# Summaries
|
48
|
+
summaries = Dir["#{p.path}/*.tsv"].map do |i|
|
49
|
+
"<li><a href='file://#{i}'>#{File.basename(i)}</a></li>"
|
50
|
+
end.join('')
|
51
|
+
|
52
|
+
# Project index page
|
53
|
+
data = {
|
54
|
+
project_active: 'active',
|
55
|
+
information: format_metadata(p),
|
56
|
+
summaries: summaries.empty? ? 'None' : "<ul>#{summaries}</ul>",
|
57
|
+
results: format_results(p)
|
58
|
+
}
|
59
|
+
write_file(p, 'index.html') { build_from_template('index.html', data) }
|
60
|
+
end
|
61
|
+
|
62
|
+
##
|
63
|
+
# Create page for dataset +d+ within project +p+
|
64
|
+
def generate_dataset_page(p, d)
|
65
|
+
data = {
|
66
|
+
unmiga_name: d.name.unmiga_name,
|
67
|
+
information: format_metadata(d),
|
68
|
+
results: format_results(d)
|
69
|
+
}
|
70
|
+
write_file(p, "d_#{d.name}.html") do
|
71
|
+
build_from_template('dataset.html', data)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
##
|
76
|
+
# Create pages for reference and query dataset indexes
|
77
|
+
def generate_datasets_index(p)
|
78
|
+
say 'Creating index pages'
|
79
|
+
data = format_dataset_index(p)
|
80
|
+
data.each do |k, v|
|
81
|
+
write_file(p, "#{k}_datasets.html") do
|
82
|
+
v[:list] = 'None' if v[:list] == ''
|
83
|
+
build_from_template(
|
84
|
+
'datasets.html',
|
85
|
+
v.merge(:"#{k}_datasets_active" => 'active')
|
86
|
+
)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def format_dataset_index(p)
|
92
|
+
data = {
|
93
|
+
ref: { type_name: 'Reference', list: '' },
|
94
|
+
qry: { type_name: 'Query', list: '' }
|
95
|
+
}
|
96
|
+
p.each_dataset do |d|
|
97
|
+
data[d.ref? ? :ref : :qry][:list] +=
|
98
|
+
"<li><a href='d_#{d.name}.html'>#{d.name.unmiga_name}</a></li>"
|
99
|
+
end
|
100
|
+
data
|
101
|
+
end
|
102
|
+
|
103
|
+
##
|
104
|
+
# Format +obj+ metadata as a table
|
105
|
+
def format_metadata(obj)
|
106
|
+
'<table class="table table-sm table-responsive">' +
|
107
|
+
obj.metadata.data.map do |k, v|
|
108
|
+
case k
|
109
|
+
when /^run_/, :plugins, :user
|
110
|
+
next
|
111
|
+
when :web_assembly_gz
|
112
|
+
v = "<a href='#{v}'>#{v[0..50]}...</a>"
|
113
|
+
when :datasets
|
114
|
+
v = v.size
|
115
|
+
end
|
116
|
+
"<tr><td class='text-right pr-4'><b>#{format_name(k)}</b></td>" \
|
117
|
+
"<td>#{v}</td></tr>"
|
118
|
+
end.compact.join('') +
|
119
|
+
'</table>'
|
120
|
+
end
|
121
|
+
|
122
|
+
##
|
123
|
+
# Format +obj+ results as cards
|
124
|
+
def format_results(obj)
|
125
|
+
o = ''
|
126
|
+
obj.each_result do |key, res|
|
127
|
+
links = format_result_links(res)
|
128
|
+
stats = format_result_stats(res)
|
129
|
+
next unless links || stats
|
130
|
+
name = format_name(key)
|
131
|
+
url_doc =
|
132
|
+
'http://manual.microbial-genomes.org/part5/workflow#' +
|
133
|
+
key.to_s.tr('_', '-')
|
134
|
+
o += <<~CARD
|
135
|
+
<div class="col-md-6 mb-4">
|
136
|
+
<h3>#{name}</h3>
|
137
|
+
<div class='border-left p-3'>
|
138
|
+
#{stats}
|
139
|
+
#{links}
|
140
|
+
</div>
|
141
|
+
<div class='border-top p-2 bg-light'>
|
142
|
+
<a target=_blank href="#{url_doc}" class='p-2'>Learn more</a>
|
143
|
+
</div>
|
144
|
+
</div>
|
145
|
+
CARD
|
146
|
+
end
|
147
|
+
"<div class='row'>#{o}</div>"
|
148
|
+
end
|
149
|
+
|
150
|
+
def format_name(str)
|
151
|
+
str
|
152
|
+
.to_s.unmiga_name
|
153
|
+
.sub(/^./, &:upcase)
|
154
|
+
.gsub(/(Aai|Ani|Ogs|Cds|Ssu| db$| ssu )/, &:upcase)
|
155
|
+
.sub(/Haai/, 'hAAI')
|
156
|
+
.sub(/Mytaxa/, 'MyTaxa')
|
157
|
+
.sub(/ pvalue$/, ' p-value')
|
158
|
+
.sub(/contigs$/, 'Contigs')
|
159
|
+
end
|
160
|
+
|
161
|
+
def format_result_links(res)
|
162
|
+
links = []
|
163
|
+
res.each_file do |key, _|
|
164
|
+
name = format_name(key)
|
165
|
+
links << "<a href='file://#{res.file_path(key)}'>#{name}</a><br/>"
|
166
|
+
end
|
167
|
+
links.empty? ? nil : links.join('')
|
168
|
+
end
|
169
|
+
|
170
|
+
def format_result_stats(res)
|
171
|
+
res.stats.map do |k, v|
|
172
|
+
v = [v, ''] unless v.is_a? Array
|
173
|
+
v[0] = ('%.3g' % v[0]) if v[0].is_a? Float
|
174
|
+
"<b>#{format_name(k)}:</b> #{v[0]}#{v[1]}<br/>"
|
175
|
+
end.join('') + '<br/>' unless res.stats.empty?
|
176
|
+
end
|
177
|
+
|
178
|
+
##
|
179
|
+
# Write +file+ within the browse folder of project +p+ using the passed
|
180
|
+
# block output as content
|
181
|
+
def write_file(p, file)
|
182
|
+
File.open(browse_file(p, file), 'w') { |fh| fh.print yield }
|
183
|
+
end
|
184
|
+
|
185
|
+
##
|
186
|
+
# Use a +template+ file to generate content with a hash of +data+ over the
|
187
|
+
# layout page if +layout+ is true
|
188
|
+
def build_from_template(template, data = {}, layout = true)
|
189
|
+
cont = File.read(template_file(template)).miga_variables(data)
|
190
|
+
return cont unless layout
|
191
|
+
|
192
|
+
build_from_template(
|
193
|
+
'layout.html',
|
194
|
+
data.merge(content: cont, project_name: cli.load_project.name),
|
195
|
+
false
|
196
|
+
)
|
197
|
+
end
|
198
|
+
|
199
|
+
##
|
200
|
+
# Path to the template browse file
|
201
|
+
def template_file(file)
|
202
|
+
File.join(
|
203
|
+
MiGA::MiGA.root_path,
|
204
|
+
'lib', 'miga', 'cli', 'action', 'browse', file
|
205
|
+
)
|
206
|
+
end
|
207
|
+
|
208
|
+
##
|
209
|
+
# Path to the browse file in the project
|
210
|
+
def browse_file(p, file)
|
211
|
+
File.join(p.path, 'browse', file)
|
212
|
+
end
|
213
|
+
end
|
@@ -42,7 +42,9 @@ class MiGA::Cli::Action::ClassifyWf < MiGA::Cli::Action
|
|
42
42
|
'--no-summaries',
|
43
43
|
'Do not generate intermediate step summaries'
|
44
44
|
) { |v| cli[:summaries] = v }
|
45
|
-
opts_for_wf(
|
45
|
+
opts_for_wf(
|
46
|
+
opt, 'Input genome assemblies (nucleotides, FastA)', qual: false
|
47
|
+
)
|
46
48
|
end
|
47
49
|
end
|
48
50
|
|
@@ -19,6 +19,10 @@ class MiGA::Cli::Action::DerepWf < MiGA::Cli::Action
|
|
19
19
|
'Use Average Amino Acid Identity (AAI) as genome similarity metric',
|
20
20
|
'By default: Use Average Nucleotide Identity (ANI)'
|
21
21
|
) { cli[:metric] = :aai }
|
22
|
+
opt.on(
|
23
|
+
'--ani',
|
24
|
+
'Use Average Nucleotide Identity (ANI) as similarity metric (default)'
|
25
|
+
) { cli[:metric] = :ani }
|
22
26
|
opt.on(
|
23
27
|
'--threshold FLOAT', Float,
|
24
28
|
"Metric threshold (%) to dereplicate. By default: #{cli[:threshold]}"
|
data/lib/miga/cli/action/edit.rb
CHANGED
@@ -17,18 +17,21 @@ class MiGA::Cli::Action::Edit < MiGA::Cli::Action
|
|
17
17
|
'Activate dataset; requires -D'
|
18
18
|
) { |v| cli[:activate] = v }
|
19
19
|
opt.on(
|
20
|
-
'--inactivate',
|
21
|
-
'Inactivate dataset; requires -D'
|
22
|
-
|
20
|
+
'--inactivate [reason]',
|
21
|
+
'Inactivate dataset; requires -D',
|
22
|
+
'The argument is optional: reason to inactivate dataset'
|
23
|
+
) { |v| cli[:activate] = false ; cli[:reason] = v }
|
23
24
|
end
|
24
25
|
end
|
25
26
|
|
26
27
|
def perform
|
27
28
|
obj = cli.load_project_or_dataset
|
28
29
|
unless cli[:activate].nil?
|
29
|
-
cli.ensure_par(
|
30
|
-
|
31
|
-
|
30
|
+
cli.ensure_par(
|
31
|
+
{ dataset: '-D' },
|
32
|
+
'%<name>s is mandatory with --[in-]activate: please provide %<flag>s'
|
33
|
+
)
|
34
|
+
cli[:activate] ? obj.activate! : obj.inactivate!(cli[:reason])
|
32
35
|
end
|
33
36
|
cli.add_metadata(obj)
|
34
37
|
obj.save
|
@@ -15,7 +15,10 @@ class MiGA::Cli::Action::QualityWf < MiGA::Cli::Action
|
|
15
15
|
'-m', '--mytaxa-scan',
|
16
16
|
'Perform MyTaxa scan analysis'
|
17
17
|
) { |v| cli[:mytaxa] = v }
|
18
|
-
opts_for_wf(
|
18
|
+
opts_for_wf(
|
19
|
+
opt, 'Input genome assemblies (nucleotides, FastA)',
|
20
|
+
qual: false
|
21
|
+
)
|
19
22
|
end
|
20
23
|
end
|
21
24
|
|
@@ -14,7 +14,7 @@ class MiGA::Cli::Action::Stats < MiGA::Cli::Action
|
|
14
14
|
) { |v| cli[:key] = v }
|
15
15
|
opt.on(
|
16
16
|
'--compute-and-save',
|
17
|
-
'Compute and
|
17
|
+
'Compute and save the statistics'
|
18
18
|
) { |v| cli[:compute] = v }
|
19
19
|
opt.on(
|
20
20
|
'--try-load',
|
@@ -38,7 +38,7 @@ class MiGA::Cli::Action::Stats < MiGA::Cli::Action
|
|
38
38
|
end
|
39
39
|
if cli[:key].nil?
|
40
40
|
r[:stats].each do |k, v|
|
41
|
-
k_n = k
|
41
|
+
k_n = k.to_s.unmiga_name.sub(/^./, &:upcase)
|
42
42
|
cli.puts "#{k_n}: #{v.is_a?(Array) ? v.join(' ') : v}"
|
43
43
|
end
|
44
44
|
else
|
@@ -26,6 +26,10 @@ class MiGA::Cli::Action::Summary < MiGA::Cli::Action
|
|
26
26
|
'--with-units',
|
27
27
|
'Include units in each cell'
|
28
28
|
) { |v| cli[:units] = v }
|
29
|
+
opt.on(
|
30
|
+
'--compute-and-save',
|
31
|
+
'Compute and save the statistics if not yet available'
|
32
|
+
) { |v| cli[:compute] = v }
|
29
33
|
end
|
30
34
|
end
|
31
35
|
|
@@ -34,7 +38,8 @@ class MiGA::Cli::Action::Summary < MiGA::Cli::Action
|
|
34
38
|
ds = cli.load_and_filter_datasets
|
35
39
|
cli.say 'Loading results'
|
36
40
|
stats = ds.map do |d|
|
37
|
-
r = d.
|
41
|
+
r = d.result(cli[:result])
|
42
|
+
r.compute_stats if cli[:compute] && !r.nil? && r[:stats].empty?
|
38
43
|
s = r.nil? ? {} : r[:stats]
|
39
44
|
s.tap { |i| i[:dataset] = d.name }
|
40
45
|
end
|
data/lib/miga/cli/action/wf.rb
CHANGED
@@ -15,7 +15,7 @@ module MiGA::Cli::Action::Wf
|
|
15
15
|
|
16
16
|
def opts_for_wf(opt, files_desc, params = {})
|
17
17
|
{
|
18
|
-
multi: false, cleanup: true, project_type: false, ncbi: true
|
18
|
+
multi: false, cleanup: true, project_type: false, ncbi: true, qual: true
|
19
19
|
}.each { |k, v| params[k] = v if params[k].nil? }
|
20
20
|
opt.on(
|
21
21
|
'-o', '--out_dir PATH',
|
@@ -40,6 +40,13 @@ module MiGA::Cli::Action::Wf
|
|
40
40
|
'Only download complete genomes, not drafts'
|
41
41
|
) { |v| cli[:ncbi_draft] = v }
|
42
42
|
end
|
43
|
+
if params[:qual]
|
44
|
+
opt.on(
|
45
|
+
'--min-qual FLOAT', Float,
|
46
|
+
'Minimum genome quality to include in analysis',
|
47
|
+
'By default: 50.0'
|
48
|
+
) { |v| cli[:min_qual] = v }
|
49
|
+
end
|
43
50
|
if params[:cleanup]
|
44
51
|
opt.on(
|
45
52
|
'-c', '--clean',
|
@@ -125,7 +132,7 @@ module MiGA::Cli::Action::Wf
|
|
125
132
|
]) unless MiGA::Project.exist? cli[:outdir]
|
126
133
|
# Define project metadata
|
127
134
|
p = cli.load_project(:outdir, '-o')
|
128
|
-
[
|
135
|
+
%i[haai_p aai_p ani_p ess_coll min_qual].each { |i| p_metadata[i] = cli[i] }
|
129
136
|
p_metadata[:type] = cli[:project_type]
|
130
137
|
transfer_metadata(p, p_metadata)
|
131
138
|
# Download datasets
|
@@ -159,9 +166,10 @@ module MiGA::Cli::Action::Wf
|
|
159
166
|
'-P', cli[:outdir],
|
160
167
|
'-r', r,
|
161
168
|
'-o', File.expand_path("#{r}.tsv", cli[:outdir]),
|
162
|
-
'--tab'
|
169
|
+
'--tab', '--ref', '--active'
|
163
170
|
])
|
164
171
|
end
|
172
|
+
call_cli(['browse', '-P', cli[:outdir]])
|
165
173
|
end
|
166
174
|
|
167
175
|
def cleanup
|
data/lib/miga/cli/base.rb
CHANGED
@@ -11,39 +11,40 @@ module MiGA::Cli::Base
|
|
11
11
|
preproc_wf: 'Preprocess input genomes or metagenomes',
|
12
12
|
index_wf: 'Generate distance indexing of input genomes',
|
13
13
|
# Projects
|
14
|
-
new: '
|
15
|
-
about: '
|
16
|
-
doctor: '
|
17
|
-
get_db: '
|
14
|
+
new: 'Create an empty MiGA project',
|
15
|
+
about: 'Display information about a MiGA project',
|
16
|
+
doctor: 'Perform consistency checks on a MiGA project',
|
17
|
+
get_db: 'Download a pre-indexed database',
|
18
|
+
browse: 'Explore a project locally using a web browser',
|
18
19
|
# Datasets
|
19
|
-
add: '
|
20
|
-
get: '
|
21
|
-
ncbi_get: '
|
22
|
-
rm: '
|
23
|
-
find: '
|
20
|
+
add: 'Create a dataset in a MiGA project',
|
21
|
+
get: 'Download a dataset from public databases into a MiGA project',
|
22
|
+
ncbi_get: 'Download all genomes in a taxon from NCBI into a MiGA project',
|
23
|
+
rm: 'Remove a dataset from an MiGA project',
|
24
|
+
find: 'Find unregistered datasets based on result files',
|
24
25
|
ln: 'Link datasets (including results) from one project to another',
|
25
|
-
ls: '
|
26
|
-
archive: '
|
26
|
+
ls: 'List all registered datasets in an MiGA project',
|
27
|
+
archive: 'Generate a tar-ball with all files from select datasets',
|
27
28
|
# Results
|
28
|
-
add_result: '
|
29
|
-
stats: '
|
30
|
-
files: '
|
31
|
-
run: '
|
32
|
-
summary: '
|
33
|
-
next_step: '
|
29
|
+
add_result: 'Register a result',
|
30
|
+
stats: 'Extract statistics for the given result',
|
31
|
+
files: 'List registered files from the results of a dataset or project',
|
32
|
+
run: 'Execute locally one step analysis producing the given result',
|
33
|
+
summary: 'Generate a summary table for the statistics of all datasets',
|
34
|
+
next_step: 'Return the next task to run in a dataset or project',
|
34
35
|
# Objects (Datasets or Projects)
|
35
|
-
edit: '
|
36
|
+
edit: 'Edit the metadata of a dataset or project',
|
36
37
|
# System
|
37
38
|
init: 'Initialize MiGA to process new projects',
|
38
|
-
daemon: '
|
39
|
-
lair: '
|
40
|
-
date: '
|
41
|
-
console: '
|
39
|
+
daemon: 'Control the daemon of a MiGA project',
|
40
|
+
lair: 'Control groups of daemons for several MiGA projects',
|
41
|
+
date: 'Return the current date in standard MiGA format',
|
42
|
+
console: 'Open an IRB console with MiGA',
|
42
43
|
# Taxonomy
|
43
|
-
tax_set: '
|
44
|
-
tax_test: '
|
45
|
-
tax_index: '
|
46
|
-
tax_dist: '
|
44
|
+
tax_set: 'Register taxonomic information for datasets',
|
45
|
+
tax_test: 'Return test of taxonomic distributions for query datasets',
|
46
|
+
tax_index: 'Create a taxonomy-indexed list of the datasets',
|
47
|
+
tax_dist: 'Estimate distributions of distance by taxonomy',
|
47
48
|
}
|
48
49
|
|
49
50
|
@@TASK_ALIAS = {
|
data/lib/miga/common/format.rb
CHANGED
@@ -68,15 +68,20 @@ module MiGA::Common::Format
|
|
68
68
|
# a FastA or FastQ file (supports gzipped files). The +format+ must be a
|
69
69
|
# Symbol, one of +:fasta+ or +:fastq+. Additional estimations can be
|
70
70
|
# controlled via the +opts+ Hash. Supported options include:
|
71
|
-
# - +:n50+:
|
72
|
-
# - +:gc+:
|
73
|
-
# - +:x+:
|
71
|
+
# - +:n50+: Include the N50 and the median (in bp)
|
72
|
+
# - +:gc+: Include the G+C content (in %)
|
73
|
+
# - +:x+: Include the undetermined bases content (in %)
|
74
|
+
# - +:skew+: Include G-C and A-T sequence skew (in %; forces gc: true).
|
75
|
+
# See definition used here in DOI:10.1177/117693430700300006
|
74
76
|
def seqs_length(file, format, opts = {})
|
77
|
+
opts[:gc] = true if opts[:skew]
|
75
78
|
fh = file =~ /\.gz/ ? Zlib::GzipReader.open(file) : File.open(file, 'r')
|
76
79
|
l = []
|
77
80
|
gc = 0
|
78
81
|
xn = 0
|
79
|
-
|
82
|
+
t = 0
|
83
|
+
c = 0
|
84
|
+
i = 0 # <- Zlib::GzipReader doesn't set `$.`
|
80
85
|
fh.each_line do |ln|
|
81
86
|
i += 1
|
82
87
|
if (format == :fasta and ln =~ /^>/) or
|
@@ -86,16 +91,27 @@ module MiGA::Common::Format
|
|
86
91
|
l[l.size - 1] += ln.chomp.size
|
87
92
|
gc += ln.scan(/[GCgc]/).count if opts[:gc]
|
88
93
|
xn += ln.scan(/[XNxn]/).count if opts[:x]
|
94
|
+
if opts[:skew]
|
95
|
+
t += ln.scan(/[Tt]/).count
|
96
|
+
c += ln.scan(/[Cc]/).count
|
97
|
+
end
|
89
98
|
end
|
90
99
|
end
|
91
100
|
fh.close
|
92
101
|
|
93
|
-
o = { n: l.size, tot: l.inject(:+), max: l.max }
|
102
|
+
o = { n: l.size, tot: l.inject(0, :+), max: l.max }
|
103
|
+
return o if o[:tot].zero?
|
94
104
|
o[:avg] = o[:tot].to_f / l.size
|
95
105
|
o[:var] = l.map { |a| a**2 }.inject(:+).to_f / l.size - o[:avg]**2
|
96
106
|
o[:sd] = Math.sqrt o[:var]
|
97
107
|
o[:gc] = 100.0 * gc / o[:tot] if opts[:gc]
|
98
108
|
o[:x] = 100.0 * xn / o[:tot] if opts[:x]
|
109
|
+
if opts[:skew]
|
110
|
+
at = o[:tot] - gc
|
111
|
+
o[:at_skew] = 100.0 * (2 * t - at) / at
|
112
|
+
o[:gc_skew] = 100.0 * (2 * c - gc) / gc
|
113
|
+
end
|
114
|
+
|
99
115
|
if opts[:n50]
|
100
116
|
l.sort!
|
101
117
|
thr = o[:tot] / 2
|
@@ -106,7 +122,8 @@ module MiGA::Common::Format
|
|
106
122
|
break if pos >= thr
|
107
123
|
end
|
108
124
|
o[:med] = o[:n].even? ?
|
109
|
-
0.5 * l[o[:n] / 2 - 1, 2].inject(:+) :
|
125
|
+
0.5 * l[o[:n] / 2 - 1, 2].inject(:+) :
|
126
|
+
l[(o[:n] - 1) / 2]
|
110
127
|
end
|
111
128
|
o
|
112
129
|
end
|
@@ -130,9 +147,14 @@ class String
|
|
130
147
|
end
|
131
148
|
|
132
149
|
##
|
133
|
-
# Replace underscores by spaces or
|
150
|
+
# Replace underscores by spaces or other symbols depending on context
|
134
151
|
def unmiga_name
|
135
|
-
gsub(/_(str|sp|subsp|pv)__/, '_\\1._')
|
152
|
+
gsub(/_(str|sp|subsp|pv)__/, '_\\1._')
|
153
|
+
.gsub(/g_c_(content)/, 'G+C \\1')
|
154
|
+
.gsub(/g_c_(skew)/, 'G-C \\1')
|
155
|
+
.gsub(/a_t_(skew)/, 'A-T \\1')
|
156
|
+
.gsub(/x_content/, &:capitalize)
|
157
|
+
.tr('_', ' ')
|
136
158
|
end
|
137
159
|
|
138
160
|
##
|
data/lib/miga/daemon.rb
CHANGED
@@ -72,6 +72,7 @@ class MiGA::Daemon < MiGA::MiGA
|
|
72
72
|
say '-----------------------------------'
|
73
73
|
say 'MiGA:%s launched' % project.name
|
74
74
|
say '-----------------------------------'
|
75
|
+
recalculate_status!
|
75
76
|
load_status
|
76
77
|
say 'Configuration options:'
|
77
78
|
say @runopts.to_s
|
@@ -99,6 +100,7 @@ class MiGA::Daemon < MiGA::MiGA
|
|
99
100
|
end
|
100
101
|
|
101
102
|
def recalculate_status!
|
103
|
+
say 'Recalculating status for all datasets'
|
102
104
|
project.each_dataset(&:recalculate_status)
|
103
105
|
end
|
104
106
|
|
@@ -158,8 +160,8 @@ class MiGA::Daemon < MiGA::MiGA
|
|
158
160
|
end
|
159
161
|
|
160
162
|
##
|
161
|
-
# Traverse datasets, and returns boolean indicating if at any
|
162
|
-
# are incomplete
|
163
|
+
# Traverse datasets, and returns boolean indicating if at any reference
|
164
|
+
# datasets are incomplete
|
163
165
|
def check_datasets
|
164
166
|
l_say(2, 'Checking datasets')
|
165
167
|
o = false
|
@@ -167,7 +169,7 @@ class MiGA::Daemon < MiGA::MiGA
|
|
167
169
|
next unless ds.status == :incomplete
|
168
170
|
next if ds.next_preprocessing(false).nil?
|
169
171
|
|
170
|
-
o = true
|
172
|
+
o = true if ds.ref?
|
171
173
|
queue_job(:d, ds)
|
172
174
|
end
|
173
175
|
o
|
@@ -183,7 +185,7 @@ class MiGA::Daemon < MiGA::MiGA
|
|
183
185
|
return if project.dataset_names.empty?
|
184
186
|
|
185
187
|
# Double-check if all datasets are ready
|
186
|
-
return unless project.done_preprocessing?
|
188
|
+
return unless project.done_preprocessing?
|
187
189
|
|
188
190
|
# Queue project-level job
|
189
191
|
to_run = project.next_task(nil, false)
|
data/lib/miga/dataset.rb
CHANGED
@@ -97,7 +97,10 @@ class MiGA::Dataset < MiGA::MiGA
|
|
97
97
|
|
98
98
|
##
|
99
99
|
# Inactivate a dataset. This halts automated processing by the daemon
|
100
|
-
|
100
|
+
#
|
101
|
+
# If given, the +reason+ string is saved as a metadata +:warn+ entry
|
102
|
+
def inactivate!(reason = nil)
|
103
|
+
metadata[:warn] = "Inactive: #{reason}" unless reason.nil?
|
101
104
|
metadata[:inactive] = true
|
102
105
|
metadata.save
|
103
106
|
pull_hook :on_inactivate
|
@@ -107,6 +110,7 @@ class MiGA::Dataset < MiGA::MiGA
|
|
107
110
|
# Activate a dataset. This removes the +:inactive+ flag
|
108
111
|
def activate!
|
109
112
|
metadata[:inactive] = nil
|
113
|
+
metadata[:warn] = nil if metadata[:warn] && metadata[:warn] =~ /^Inactive: /
|
110
114
|
metadata.save
|
111
115
|
pull_hook :on_activate
|
112
116
|
end
|
data/lib/miga/dataset/base.rb
CHANGED
@@ -35,8 +35,8 @@ module MiGA::Dataset::Base
|
|
35
35
|
mytaxa: '07.annotation/02.taxonomy/01.mytaxa',
|
36
36
|
mytaxa_scan: '07.annotation/03.qa/02.mytaxa_scan',
|
37
37
|
# Distances (for single-species datasets)
|
38
|
-
distances: '09.distances',
|
39
38
|
taxonomy: '09.distances/05.taxonomy',
|
39
|
+
distances: '09.distances',
|
40
40
|
# General statistics
|
41
41
|
stats: '90.stats'
|
42
42
|
}
|
@@ -66,7 +66,7 @@ module MiGA::Dataset::Base
|
|
66
66
|
@@PREPROCESSING_TASKS = [
|
67
67
|
:raw_reads, :trimmed_reads, :read_quality, :trimmed_fasta,
|
68
68
|
:assembly, :cds, :essential_genes, :ssu, :mytaxa, :mytaxa_scan,
|
69
|
-
:
|
69
|
+
:taxonomy, :distances, :stats
|
70
70
|
]
|
71
71
|
|
72
72
|
##
|
@@ -77,7 +77,7 @@ module MiGA::Dataset::Base
|
|
77
77
|
##
|
78
78
|
# Tasks to be executed only in datasets that are not multi-organism. These
|
79
79
|
# tasks are ignored for multi-organism datasets or for unknown types.
|
80
|
-
@@ONLY_NONMULTI_TASKS = [:mytaxa_scan, :
|
80
|
+
@@ONLY_NONMULTI_TASKS = [:mytaxa_scan, :taxonomy, :distances]
|
81
81
|
@@_ONLY_NONMULTI_TASKS_H = Hash[@@ONLY_NONMULTI_TASKS.map { |i| [i, true] }]
|
82
82
|
|
83
83
|
##
|
data/lib/miga/dataset/hooks.rb
CHANGED
@@ -52,15 +52,15 @@ module MiGA::Dataset::Hooks
|
|
52
52
|
end
|
53
53
|
|
54
54
|
##
|
55
|
-
# Run +cmd+ in the command-line with {{variables}}:
|
56
|
-
# object (
|
55
|
+
# Run +cmd+ in the command-line with {{variables}}:
|
56
|
+
# dataset, project, project_name, miga, object (if defined for the event)
|
57
57
|
# - +hook_args+: +[cmd]+
|
58
58
|
# - +event_args+: +[object (optional)]+
|
59
59
|
def hook_run_cmd(hook_args, event_args)
|
60
60
|
Process.wait(
|
61
61
|
spawn hook_args.first.miga_variables(
|
62
|
-
dataset: name, project: project.path,
|
63
|
-
object: event_args.first
|
62
|
+
dataset: name, project: project.path, project_name: project.name,
|
63
|
+
miga: MiGA::MiGA.root_path, object: event_args.first
|
64
64
|
)
|
65
65
|
)
|
66
66
|
end
|
data/lib/miga/dataset/result.rb
CHANGED
@@ -26,15 +26,24 @@ module MiGA::Dataset::Result
|
|
26
26
|
# The values are symbols:
|
27
27
|
# - empty: the dataset has no data
|
28
28
|
# - inactive: the dataset is inactive
|
29
|
+
# - upstream: the task is upstream from dataset's input
|
29
30
|
# - force: forced to ignore by metadata
|
30
31
|
# - project: incompatible project
|
31
32
|
# - noref: incompatible dataset, only for reference
|
32
33
|
# - multi: incompatible dataset, only for multi
|
33
34
|
# - nonmulti: incompatible dataset, only for nonmulti
|
35
|
+
# - complete: the task is already complete
|
34
36
|
# - execute: do not ignore, execute the task
|
35
37
|
def why_ignore(task)
|
36
|
-
if !
|
38
|
+
if !get_result(task).nil?
|
39
|
+
:complete
|
40
|
+
elsif !active?
|
37
41
|
:inactive
|
42
|
+
elsif first_preprocessing.nil?
|
43
|
+
:empty
|
44
|
+
elsif @@PREPROCESSING_TASKS.index(task) <
|
45
|
+
@@PREPROCESSING_TASKS.index(first_preprocessing)
|
46
|
+
:upstream
|
38
47
|
elsif !metadata["run_#{task}"].nil?
|
39
48
|
metadata["run_#{task}"] ? :execute : :force
|
40
49
|
elsif task == :taxonomy && project.metadata[:ref_project].nil?
|
@@ -56,7 +65,7 @@ module MiGA::Dataset::Result
|
|
56
65
|
# initial input. Passes +save+ to #add_result.
|
57
66
|
def first_preprocessing(save = false)
|
58
67
|
@first_processing ||= @@PREPROCESSING_TASKS.find do |t|
|
59
|
-
!
|
68
|
+
!add_result(t, save).nil?
|
60
69
|
end
|
61
70
|
end
|
62
71
|
|
@@ -70,7 +79,7 @@ module MiGA::Dataset::Result
|
|
70
79
|
false
|
71
80
|
elsif add_result(t, save).nil?
|
72
81
|
if (metadata["_try_#{t}"] || 0) > (project.metadata[:max_try] || 10)
|
73
|
-
inactivate!
|
82
|
+
inactivate! "Too many errors in step #{t}"
|
74
83
|
false
|
75
84
|
else
|
76
85
|
true
|
@@ -121,17 +130,12 @@ module MiGA::Dataset::Result
|
|
121
130
|
# - complete: a task with registered results
|
122
131
|
# - pending: a task queued to be performed
|
123
132
|
def result_status(task)
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
:-
|
131
|
-
elsif ignore_task?(task)
|
132
|
-
:"ignore_#{why_ignore task}"
|
133
|
-
else
|
134
|
-
:pending
|
133
|
+
reason = why_ignore(task)
|
134
|
+
case reason
|
135
|
+
when :upstream; :-
|
136
|
+
when :execute; :pending
|
137
|
+
when :complete; :complete
|
138
|
+
else; :"ignore_#{reason}"
|
135
139
|
end
|
136
140
|
end
|
137
141
|
|
data/lib/miga/lair.rb
CHANGED
data/lib/miga/project/dataset.rb
CHANGED
@@ -134,12 +134,10 @@ module MiGA::Project::Dataset
|
|
134
134
|
##
|
135
135
|
# Are all the datasets in the project preprocessed? Save intermediate results
|
136
136
|
# if +save+ (until the first incomplete dataset is reached).
|
137
|
-
def done_preprocessing?(save =
|
138
|
-
|
139
|
-
|
140
|
-
return false if ds.is_ref? and not ds.done_preprocessing?(save)
|
137
|
+
def done_preprocessing?(save = false)
|
138
|
+
!each_dataset.any? do |d|
|
139
|
+
d.ref? && d.active? && !d.done_preprocessing?(save)
|
141
140
|
end
|
142
|
-
true
|
143
141
|
end
|
144
142
|
|
145
143
|
##
|
data/lib/miga/project/hooks.rb
CHANGED
@@ -26,14 +26,15 @@ module MiGA::Project::Hooks
|
|
26
26
|
end
|
27
27
|
|
28
28
|
##
|
29
|
-
# Run +cmd+ in the command-line with {{variables}}:
|
30
|
-
# object (
|
29
|
+
# Run +cmd+ in the command-line with {{variables}}:
|
30
|
+
# project, project_name, miga, object (if defined by the event)
|
31
31
|
# - +hook_args+: +[cmd]+
|
32
32
|
# - +event_args+: +[object (optional)]+
|
33
33
|
def hook_run_cmd(hook_args, event_args)
|
34
34
|
Process.wait(
|
35
35
|
spawn hook_args.first.miga_variables(
|
36
|
-
project: path,
|
36
|
+
project: path, project_name: name,
|
37
|
+
miga: MiGA::MiGA.root_path, object: event_args.first
|
37
38
|
)
|
38
39
|
)
|
39
40
|
end
|
@@ -94,12 +94,13 @@ class MiGA::RemoteDataset
|
|
94
94
|
@timeout_try = 0
|
95
95
|
begin
|
96
96
|
DEBUG 'GET: ' + url
|
97
|
-
open(
|
97
|
+
URI.parse(url).open(read_timeout: 600) { |f| doc = f.read }
|
98
98
|
rescue => e
|
99
99
|
@timeout_try += 1
|
100
100
|
raise e if @timeout_try >= 3
|
101
101
|
|
102
102
|
sleep 5 # <- For: 429 Too Many Requests
|
103
|
+
DEBUG "RETRYING after: #{e}"
|
103
104
|
retry
|
104
105
|
end
|
105
106
|
doc
|
data/lib/miga/result.rb
CHANGED
@@ -164,7 +164,9 @@ class MiGA::Result < MiGA::MiGA
|
|
164
164
|
# Unlink result by removing the .done and .start timestamps and the
|
165
165
|
# .json descriptor, but don't remove any other associated files
|
166
166
|
def unlink
|
167
|
-
%i(start done).each
|
167
|
+
%i(start done).each do |i|
|
168
|
+
f = path(i) and File.exists?(f) and File.unlink(f)
|
169
|
+
end
|
168
170
|
File.unlink path
|
169
171
|
end
|
170
172
|
|
data/lib/miga/result/stats.rb
CHANGED
@@ -8,6 +8,7 @@ module MiGA::Result::Stats
|
|
8
8
|
# (Re-)calculate and save the statistics for the result
|
9
9
|
def compute_stats
|
10
10
|
method = :"compute_stats_#{key}"
|
11
|
+
MiGA::MiGA.DEBUG "Result(#{key}).compute_stats"
|
11
12
|
stats = self.respond_to?(method, true) ? send(method) : nil
|
12
13
|
unless stats.nil?
|
13
14
|
self[:stats] = stats
|
@@ -16,32 +17,45 @@ module MiGA::Result::Stats
|
|
16
17
|
self[:stats]
|
17
18
|
end
|
18
19
|
|
20
|
+
##
|
21
|
+
# Access the stats entry of results
|
22
|
+
def stats
|
23
|
+
self[:stats]
|
24
|
+
end
|
25
|
+
|
19
26
|
private
|
20
27
|
|
21
28
|
def compute_stats_raw_reads
|
22
29
|
stats = {}
|
30
|
+
seq_opts = { gc: true, x: true, skew: true }
|
23
31
|
if self[:files][:pair1].nil?
|
24
|
-
s = MiGA::MiGA.seqs_length(file_path(:single), :fastq,
|
32
|
+
s = MiGA::MiGA.seqs_length(file_path(:single), :fastq, seq_opts)
|
25
33
|
stats = {
|
26
34
|
reads: s[:n],
|
27
35
|
length_average: [s[:avg], 'bp'],
|
28
36
|
length_standard_deviation: [s[:sd], 'bp'],
|
29
37
|
g_c_content: [s[:gc], '%'],
|
30
|
-
x_content: [s[:x], '%']
|
38
|
+
x_content: [s[:x], '%'],
|
39
|
+
g_c_skew: [s[:gc_skew], '%'],
|
40
|
+
a_t_skew: [s[:at_skew], '%']
|
31
41
|
}
|
32
42
|
else
|
33
|
-
s1 = MiGA::MiGA.seqs_length(file_path(:pair1), :fastq,
|
34
|
-
s2 = MiGA::MiGA.seqs_length(file_path(:pair2), :fastq,
|
43
|
+
s1 = MiGA::MiGA.seqs_length(file_path(:pair1), :fastq, seq_opts)
|
44
|
+
s2 = MiGA::MiGA.seqs_length(file_path(:pair2), :fastq, seq_opts)
|
35
45
|
stats = {
|
36
46
|
read_pairs: s1[:n],
|
37
47
|
forward_length_average: [s1[:avg], 'bp'],
|
38
48
|
forward_length_standard_deviation: [s1[:sd], 'bp'],
|
39
49
|
forward_g_c_content: [s1[:gc], '%'],
|
40
50
|
forward_x_content: [s1[:x], '%'],
|
51
|
+
forward_g_c_skew: [s1[:gc_skew], '%'],
|
52
|
+
forward_a_t_skew: [s1[:at_skew], '%'],
|
41
53
|
reverse_length_average: [s2[:avg], 'bp'],
|
42
54
|
reverse_length_standard_deviation: [s2[:sd], 'bp'],
|
43
55
|
reverse_g_c_content: [s2[:gc], '%'],
|
44
|
-
reverse_x_content: [s2[:x], '%']
|
56
|
+
reverse_x_content: [s2[:x], '%'],
|
57
|
+
reverse_g_c_skew: [s2[:gc_skew], '%'],
|
58
|
+
reverse_a_t_skew: [s2[:at_skew], '%']
|
45
59
|
}
|
46
60
|
end
|
47
61
|
stats
|
@@ -49,19 +63,22 @@ module MiGA::Result::Stats
|
|
49
63
|
|
50
64
|
def compute_stats_trimmed_fasta
|
51
65
|
f = self[:files][:coupled].nil? ? file_path(:single) : file_path(:coupled)
|
52
|
-
s = MiGA::MiGA.seqs_length(f, :fasta, gc: true, x: true)
|
66
|
+
s = MiGA::MiGA.seqs_length(f, :fasta, gc: true, x: true, skew: true)
|
53
67
|
{
|
54
68
|
reads: s[:n],
|
55
69
|
length_average: [s[:avg], 'bp'],
|
56
70
|
length_standard_deviation: [s[:sd], 'bp'],
|
57
71
|
g_c_content: [s[:gc], '%'],
|
58
|
-
x_content: [s[:x], '%']
|
72
|
+
x_content: [s[:x], '%'],
|
73
|
+
g_c_skew: [s[:gc_skew], '%'],
|
74
|
+
a_t_skew: [s[:at_skew], '%']
|
59
75
|
}
|
60
76
|
end
|
61
77
|
|
62
78
|
def compute_stats_assembly
|
63
79
|
s = MiGA::MiGA.seqs_length(
|
64
|
-
file_path(:largecontigs), :fasta,
|
80
|
+
file_path(:largecontigs), :fasta,
|
81
|
+
n50: true, gc: true, x: true, skew: true
|
65
82
|
)
|
66
83
|
{
|
67
84
|
contigs: s[:n],
|
@@ -69,7 +86,9 @@ module MiGA::Result::Stats
|
|
69
86
|
total_length: [s[:tot], 'bp'],
|
70
87
|
longest_sequence: [s[:max], 'bp'],
|
71
88
|
g_c_content: [s[:gc], '%'],
|
72
|
-
x_content: [s[:x], '%']
|
89
|
+
x_content: [s[:x], '%'],
|
90
|
+
g_c_skew: [s[:gc_skew], '%'],
|
91
|
+
a_t_skew: [s[:at_skew], '%']
|
73
92
|
}
|
74
93
|
end
|
75
94
|
|
@@ -109,20 +128,8 @@ module MiGA::Result::Stats
|
|
109
128
|
end
|
110
129
|
end
|
111
130
|
else
|
112
|
-
#
|
113
|
-
|
114
|
-
%w[Archaea Bacteria].include?(tax[:d]) &&
|
115
|
-
file_path(:raw_report).nil?
|
116
|
-
scr = "#{MiGA::MiGA.root_path}/utils/domain-ess-genes.rb"
|
117
|
-
rep = file_path(:report)
|
118
|
-
rc_p = File.expand_path('.miga_rc', ENV['HOME'])
|
119
|
-
rc = File.exist?(rc_p) ? ". '#{rc_p}' && " : ''
|
120
|
-
$stderr.print `#{rc} ruby '#{scr}' \
|
121
|
-
'#{rep}' '#{rep}.domain' '#{tax[:d][0]}'`
|
122
|
-
add_file(:raw_report, "#{source.name}.ess/log")
|
123
|
-
add_file(:report, "#{source.name}.ess/log.domain")
|
124
|
-
end
|
125
|
-
# Extract/compute quality values
|
131
|
+
# Estimate quality metrics
|
132
|
+
fix_essential_genes_by_domain
|
126
133
|
stats = { completeness: [0.0, '%'], contamination: [0.0, '%'] }
|
127
134
|
File.open(file_path(:report), 'r') do |fh|
|
128
135
|
fh.each_line do |ln|
|
@@ -131,6 +138,8 @@ module MiGA::Result::Stats
|
|
131
138
|
end
|
132
139
|
end
|
133
140
|
end
|
141
|
+
|
142
|
+
# Determine qualitative range
|
134
143
|
stats[:quality] = stats[:completeness][0] - stats[:contamination][0] * 5
|
135
144
|
source.metadata[:quality] =
|
136
145
|
case stats[:quality]
|
@@ -140,6 +149,12 @@ module MiGA::Result::Stats
|
|
140
149
|
else; :low
|
141
150
|
end
|
142
151
|
source.save
|
152
|
+
|
153
|
+
# Inactivate low-quality datasets
|
154
|
+
min_qual = (project.metadata[:min_qual] || 25)
|
155
|
+
if min_qual != 'no' && stats[:quality] < min_qual
|
156
|
+
source.inactivate! 'Low quality genome'
|
157
|
+
end
|
143
158
|
end
|
144
159
|
stats
|
145
160
|
end
|
@@ -175,4 +190,21 @@ module MiGA::Result::Stats
|
|
175
190
|
end
|
176
191
|
stats
|
177
192
|
end
|
193
|
+
|
194
|
+
# Fix estimates based on essential genes based on taxonomy
|
195
|
+
def fix_essential_genes_by_domain
|
196
|
+
return if (tax = source.metadata[:tax]).nil? ||
|
197
|
+
!%w[Archaea Bacteria].include?(tax[:d]) ||
|
198
|
+
file_path(:raw_report)
|
199
|
+
|
200
|
+
MiGA::MiGA.DEBUG "Fixing essential genes by domain"
|
201
|
+
scr = "#{MiGA::MiGA.root_path}/utils/domain-ess-genes.rb"
|
202
|
+
rep = file_path(:report)
|
203
|
+
rc_p = File.expand_path('.miga_rc', ENV['HOME'])
|
204
|
+
rc = File.exist?(rc_p) ? ". '#{rc_p}' && " : ''
|
205
|
+
$stderr.print `#{rc} ruby '#{scr}' \
|
206
|
+
'#{rep}' '#{rep}.domain' '#{tax[:d][0]}'`
|
207
|
+
add_file(:raw_report, "#{source.name}.ess/log")
|
208
|
+
add_file(:report, "#{source.name}.ess/log.domain")
|
209
|
+
end
|
178
210
|
end
|
data/lib/miga/version.rb
CHANGED
@@ -8,7 +8,7 @@ module MiGA
|
|
8
8
|
# - Float representing the major.minor version.
|
9
9
|
# - Integer representing gem releases of the current version.
|
10
10
|
# - Integer representing minor changes that require new version number.
|
11
|
-
VERSION = [0.7,
|
11
|
+
VERSION = [0.7, 12, 0]
|
12
12
|
|
13
13
|
##
|
14
14
|
# Nickname for the current major.minor version.
|
@@ -16,7 +16,7 @@ module MiGA
|
|
16
16
|
|
17
17
|
##
|
18
18
|
# Date of the current gem release.
|
19
|
-
VERSION_DATE = Date.new(2020,
|
19
|
+
VERSION_DATE = Date.new(2020, 7, 22)
|
20
20
|
|
21
21
|
##
|
22
22
|
# Reference of MiGA.
|
data/scripts/cds.bash
CHANGED
@@ -20,7 +20,6 @@ fi
|
|
20
20
|
TYPE=$(miga ls -P "$PROJECT" -D "$DATASET" -m type | cut -f 2)
|
21
21
|
case "$TYPE" in
|
22
22
|
metagenome|virome)
|
23
|
-
$CMD -p meta
|
24
23
|
prodigal -a "${DATASET}.faa" -d "${DATASET}.fna" -o "${DATASET}.gff3" \
|
25
24
|
-f gff -q -i "../05.assembly/${DATASET}.LargeContigs.fna" -p meta
|
26
25
|
;;
|
data/scripts/distances.bash
CHANGED
@@ -9,7 +9,12 @@ cd "$PROJECT/data/09.distances"
|
|
9
9
|
# Initialize
|
10
10
|
miga date > "$DATASET.start"
|
11
11
|
|
12
|
-
#
|
12
|
+
# Check quality first
|
13
|
+
miga stats -P "$PROJECT" -D "$DATASET" -r essential_genes --compute-and-save
|
14
|
+
inactive=$(miga ls -P "$PROJECT" -D "$DATASET" -m inactive | cut -f 2)
|
15
|
+
[[ "$inactive" == "true" ]] && exit
|
16
|
+
|
17
|
+
# Run distances
|
13
18
|
ruby -I "$MIGA/lib" "$MIGA/utils/distances.rb" "$PROJECT" "$DATASET"
|
14
19
|
|
15
20
|
# Finalize
|
data/test/daemon_test.rb
CHANGED
@@ -93,7 +93,7 @@ class DaemonTest < Test::Unit::TestCase
|
|
93
93
|
0 => /-{20}\n/,
|
94
94
|
1 => /MiGA:#{p.name} launched/,
|
95
95
|
2 => /-{20}\n/,
|
96
|
-
|
96
|
+
6 => /Probing running jobs\n/
|
97
97
|
}.each { |k, v| assert_match(v, l[k], "unexpected line: #{k}") }
|
98
98
|
ensure
|
99
99
|
begin
|
data/test/dataset_test.rb
CHANGED
@@ -185,11 +185,13 @@ class DatasetTest < Test::Unit::TestCase
|
|
185
185
|
d = dataset
|
186
186
|
assert_equal(:incomplete, d.status)
|
187
187
|
assert_predicate(d, :active?)
|
188
|
-
d.inactivate!
|
188
|
+
d.inactivate! 'Too annoying'
|
189
189
|
assert_equal(:inactive, d.status)
|
190
|
+
assert_equal('Inactive: Too annoying', d.metadata[:warn])
|
190
191
|
assert_not_predicate(d, :active?)
|
191
192
|
d.activate!
|
192
193
|
assert_equal(:incomplete, d.status)
|
194
|
+
assert_nil(d.metadata[:warn])
|
193
195
|
assert_predicate(d, :active?)
|
194
196
|
end
|
195
197
|
|
data/test/project_test.rb
CHANGED
@@ -108,7 +108,7 @@ class ProjectTest < Test::Unit::TestCase
|
|
108
108
|
d1 = p1.add_dataset('BAH')
|
109
109
|
assert_not_predicate(p1, :done_preprocessing?)
|
110
110
|
FileUtils.touch(File.join(p1.path, 'data', '90.stats', "#{d1.name}.done"))
|
111
|
-
|
111
|
+
assert { p1.done_preprocessing? true }
|
112
112
|
assert_nil(p1.next_inclade)
|
113
113
|
p1.metadata[:type] = :clade
|
114
114
|
assert_equal(:subclades, p1.next_inclade)
|
data/test/remote_dataset_test.rb
CHANGED
@@ -101,7 +101,7 @@ class RemoteDatasetTest < Test::Unit::TestCase
|
|
101
101
|
|
102
102
|
def test_ref_type_status
|
103
103
|
declare_remote_access
|
104
|
-
rd = MiGA::RemoteDataset.new('
|
104
|
+
rd = MiGA::RemoteDataset.new('GCA_003144295.1', :assembly, :ncbi)
|
105
105
|
assert { !rd.get_metadata[:is_type] }
|
106
106
|
assert { rd.get_metadata[:is_ref_type] }
|
107
107
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.12.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-07-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|
@@ -118,6 +118,7 @@ files:
|
|
118
118
|
- lib/miga/cli/action/add.rb
|
119
119
|
- lib/miga/cli/action/add_result.rb
|
120
120
|
- lib/miga/cli/action/archive.rb
|
121
|
+
- lib/miga/cli/action/browse.rb
|
121
122
|
- lib/miga/cli/action/classify_wf.rb
|
122
123
|
- lib/miga/cli/action/console.rb
|
123
124
|
- lib/miga/cli/action/daemon.rb
|
@@ -529,7 +530,7 @@ homepage: http://enve-omics.ce.gatech.edu/miga
|
|
529
530
|
licenses:
|
530
531
|
- Artistic-2.0
|
531
532
|
metadata: {}
|
532
|
-
post_install_message:
|
533
|
+
post_install_message:
|
533
534
|
rdoc_options:
|
534
535
|
- lib
|
535
536
|
- README.md
|
@@ -550,8 +551,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
550
551
|
- !ruby/object:Gem::Version
|
551
552
|
version: '0'
|
552
553
|
requirements: []
|
553
|
-
rubygems_version: 3.
|
554
|
-
signing_key:
|
554
|
+
rubygems_version: 3.1.2
|
555
|
+
signing_key:
|
555
556
|
specification_version: 4
|
556
557
|
summary: MiGA
|
557
558
|
test_files: []
|