miga-base 0.7.8.0 → 0.7.12.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/lib/miga/cli/action/browse.rb +213 -0
- data/lib/miga/cli/action/classify_wf.rb +3 -1
- data/lib/miga/cli/action/derep_wf.rb +4 -0
- data/lib/miga/cli/action/edit.rb +9 -6
- data/lib/miga/cli/action/quality_wf.rb +4 -1
- data/lib/miga/cli/action/stats.rb +2 -2
- data/lib/miga/cli/action/summary.rb +6 -1
- data/lib/miga/cli/action/wf.rb +11 -3
- data/lib/miga/cli/base.rb +27 -26
- data/lib/miga/common/format.rb +30 -8
- data/lib/miga/daemon.rb +6 -4
- data/lib/miga/dataset.rb +5 -1
- data/lib/miga/dataset/base.rb +3 -3
- data/lib/miga/dataset/hooks.rb +4 -4
- data/lib/miga/dataset/result.rb +18 -14
- data/lib/miga/lair.rb +1 -1
- data/lib/miga/project/dataset.rb +3 -5
- data/lib/miga/project/hooks.rb +4 -3
- data/lib/miga/remote_dataset/download.rb +2 -1
- data/lib/miga/result.rb +3 -1
- data/lib/miga/result/stats.rb +55 -23
- data/lib/miga/version.rb +2 -2
- data/scripts/cds.bash +0 -1
- data/scripts/distances.bash +6 -1
- data/test/daemon_test.rb +1 -1
- data/test/dataset_test.rb +3 -1
- data/test/project_test.rb +1 -1
- data/test/remote_dataset_test.rb +1 -1
- metadata +7 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f99f8fd530489d42672cdd96123f321725b9437ee4a81e822a07854ec924ad53
|
4
|
+
data.tar.gz: c4d6607a4b6062b45cc94985b8bc920bb25307232851b20436ee4b9cd8a8986b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '03478c56a40e948ad9eb4cb09fbedc72bac331072ccf46ce5468f1f78a08e891260684771e455b92841d8af454fcab997d7d51a68360b8337ffa13c8c2ec88a4'
|
7
|
+
data.tar.gz: e992d10e5de206a85ac425e15c7594629f86e8088ec6f59ba820c5ecaf6c8901af0142455501a1d8af03902a3ca559d0374692e4df9f41acdceaf45d5d750b1f
|
data/README.md
CHANGED
@@ -41,6 +41,7 @@ Developed and maintained by [Luis M. Rodriguez-R][lrr]. MiGA is the result of a
|
|
41
41
|
collaboration between [Kostas Lab][kostas] at the Georgia Institute of
|
42
42
|
Technology and [RDP][rdp] at Michigan State University.
|
43
43
|
|
44
|
+
See also the [complete list of contributors](manual/part1/contributors.md).
|
44
45
|
|
45
46
|
# License
|
46
47
|
|
@@ -0,0 +1,213 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'miga/cli/action'
|
4
|
+
|
5
|
+
# Action: miga browse
|
6
|
+
class MiGA::Cli::Action::Browse < MiGA::Cli::Action
|
7
|
+
def parse_cli
|
8
|
+
cli.parse do |opt|
|
9
|
+
cli.defaults = { open: true }
|
10
|
+
cli.opt_object(opt, [:project])
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def perform
|
15
|
+
p = cli.load_project
|
16
|
+
create_empty_page(p)
|
17
|
+
generate_project_page(p)
|
18
|
+
say 'Creating dataset pages'
|
19
|
+
cli.load_project.each_dataset do |d|
|
20
|
+
generate_dataset_page(p, d)
|
21
|
+
end
|
22
|
+
generate_datasets_index(p)
|
23
|
+
say "Open in your browser: #{File.join(p.path, 'index.html')}"
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
##
|
29
|
+
# Create an empty page with necessary assets for project +p+
|
30
|
+
def create_empty_page(p)
|
31
|
+
say 'Creating project page'
|
32
|
+
FileUtils.mkdir_p(browse_file(p, '.'))
|
33
|
+
%w[favicon-32.png style.css].each do |i|
|
34
|
+
FileUtils.cp(template_file(i), browse_file(p, i))
|
35
|
+
end
|
36
|
+
write_file(p, 'about.html') do
|
37
|
+
build_from_template('about.html', citation: MiGA::MiGA.CITATION)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
##
|
42
|
+
# Create landing page for project +p+
|
43
|
+
def generate_project_page(p)
|
44
|
+
# Redirect page
|
45
|
+
write_file(p, '../index.html') { build_from_template('redirect.html') }
|
46
|
+
|
47
|
+
# Summaries
|
48
|
+
summaries = Dir["#{p.path}/*.tsv"].map do |i|
|
49
|
+
"<li><a href='file://#{i}'>#{File.basename(i)}</a></li>"
|
50
|
+
end.join('')
|
51
|
+
|
52
|
+
# Project index page
|
53
|
+
data = {
|
54
|
+
project_active: 'active',
|
55
|
+
information: format_metadata(p),
|
56
|
+
summaries: summaries.empty? ? 'None' : "<ul>#{summaries}</ul>",
|
57
|
+
results: format_results(p)
|
58
|
+
}
|
59
|
+
write_file(p, 'index.html') { build_from_template('index.html', data) }
|
60
|
+
end
|
61
|
+
|
62
|
+
##
|
63
|
+
# Create page for dataset +d+ within project +p+
|
64
|
+
def generate_dataset_page(p, d)
|
65
|
+
data = {
|
66
|
+
unmiga_name: d.name.unmiga_name,
|
67
|
+
information: format_metadata(d),
|
68
|
+
results: format_results(d)
|
69
|
+
}
|
70
|
+
write_file(p, "d_#{d.name}.html") do
|
71
|
+
build_from_template('dataset.html', data)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
##
|
76
|
+
# Create pages for reference and query dataset indexes
|
77
|
+
def generate_datasets_index(p)
|
78
|
+
say 'Creating index pages'
|
79
|
+
data = format_dataset_index(p)
|
80
|
+
data.each do |k, v|
|
81
|
+
write_file(p, "#{k}_datasets.html") do
|
82
|
+
v[:list] = 'None' if v[:list] == ''
|
83
|
+
build_from_template(
|
84
|
+
'datasets.html',
|
85
|
+
v.merge(:"#{k}_datasets_active" => 'active')
|
86
|
+
)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def format_dataset_index(p)
|
92
|
+
data = {
|
93
|
+
ref: { type_name: 'Reference', list: '' },
|
94
|
+
qry: { type_name: 'Query', list: '' }
|
95
|
+
}
|
96
|
+
p.each_dataset do |d|
|
97
|
+
data[d.ref? ? :ref : :qry][:list] +=
|
98
|
+
"<li><a href='d_#{d.name}.html'>#{d.name.unmiga_name}</a></li>"
|
99
|
+
end
|
100
|
+
data
|
101
|
+
end
|
102
|
+
|
103
|
+
##
|
104
|
+
# Format +obj+ metadata as a table
|
105
|
+
def format_metadata(obj)
|
106
|
+
'<table class="table table-sm table-responsive">' +
|
107
|
+
obj.metadata.data.map do |k, v|
|
108
|
+
case k
|
109
|
+
when /^run_/, :plugins, :user
|
110
|
+
next
|
111
|
+
when :web_assembly_gz
|
112
|
+
v = "<a href='#{v}'>#{v[0..50]}...</a>"
|
113
|
+
when :datasets
|
114
|
+
v = v.size
|
115
|
+
end
|
116
|
+
"<tr><td class='text-right pr-4'><b>#{format_name(k)}</b></td>" \
|
117
|
+
"<td>#{v}</td></tr>"
|
118
|
+
end.compact.join('') +
|
119
|
+
'</table>'
|
120
|
+
end
|
121
|
+
|
122
|
+
##
|
123
|
+
# Format +obj+ results as cards
|
124
|
+
def format_results(obj)
|
125
|
+
o = ''
|
126
|
+
obj.each_result do |key, res|
|
127
|
+
links = format_result_links(res)
|
128
|
+
stats = format_result_stats(res)
|
129
|
+
next unless links || stats
|
130
|
+
name = format_name(key)
|
131
|
+
url_doc =
|
132
|
+
'http://manual.microbial-genomes.org/part5/workflow#' +
|
133
|
+
key.to_s.tr('_', '-')
|
134
|
+
o += <<~CARD
|
135
|
+
<div class="col-md-6 mb-4">
|
136
|
+
<h3>#{name}</h3>
|
137
|
+
<div class='border-left p-3'>
|
138
|
+
#{stats}
|
139
|
+
#{links}
|
140
|
+
</div>
|
141
|
+
<div class='border-top p-2 bg-light'>
|
142
|
+
<a target=_blank href="#{url_doc}" class='p-2'>Learn more</a>
|
143
|
+
</div>
|
144
|
+
</div>
|
145
|
+
CARD
|
146
|
+
end
|
147
|
+
"<div class='row'>#{o}</div>"
|
148
|
+
end
|
149
|
+
|
150
|
+
def format_name(str)
|
151
|
+
str
|
152
|
+
.to_s.unmiga_name
|
153
|
+
.sub(/^./, &:upcase)
|
154
|
+
.gsub(/(Aai|Ani|Ogs|Cds|Ssu| db$| ssu )/, &:upcase)
|
155
|
+
.sub(/Haai/, 'hAAI')
|
156
|
+
.sub(/Mytaxa/, 'MyTaxa')
|
157
|
+
.sub(/ pvalue$/, ' p-value')
|
158
|
+
.sub(/contigs$/, 'Contigs')
|
159
|
+
end
|
160
|
+
|
161
|
+
def format_result_links(res)
|
162
|
+
links = []
|
163
|
+
res.each_file do |key, _|
|
164
|
+
name = format_name(key)
|
165
|
+
links << "<a href='file://#{res.file_path(key)}'>#{name}</a><br/>"
|
166
|
+
end
|
167
|
+
links.empty? ? nil : links.join('')
|
168
|
+
end
|
169
|
+
|
170
|
+
def format_result_stats(res)
|
171
|
+
res.stats.map do |k, v|
|
172
|
+
v = [v, ''] unless v.is_a? Array
|
173
|
+
v[0] = ('%.3g' % v[0]) if v[0].is_a? Float
|
174
|
+
"<b>#{format_name(k)}:</b> #{v[0]}#{v[1]}<br/>"
|
175
|
+
end.join('') + '<br/>' unless res.stats.empty?
|
176
|
+
end
|
177
|
+
|
178
|
+
##
|
179
|
+
# Write +file+ within the browse folder of project +p+ using the passed
|
180
|
+
# block output as content
|
181
|
+
def write_file(p, file)
|
182
|
+
File.open(browse_file(p, file), 'w') { |fh| fh.print yield }
|
183
|
+
end
|
184
|
+
|
185
|
+
##
|
186
|
+
# Use a +template+ file to generate content with a hash of +data+ over the
|
187
|
+
# layout page if +layout+ is true
|
188
|
+
def build_from_template(template, data = {}, layout = true)
|
189
|
+
cont = File.read(template_file(template)).miga_variables(data)
|
190
|
+
return cont unless layout
|
191
|
+
|
192
|
+
build_from_template(
|
193
|
+
'layout.html',
|
194
|
+
data.merge(content: cont, project_name: cli.load_project.name),
|
195
|
+
false
|
196
|
+
)
|
197
|
+
end
|
198
|
+
|
199
|
+
##
|
200
|
+
# Path to the template browse file
|
201
|
+
def template_file(file)
|
202
|
+
File.join(
|
203
|
+
MiGA::MiGA.root_path,
|
204
|
+
'lib', 'miga', 'cli', 'action', 'browse', file
|
205
|
+
)
|
206
|
+
end
|
207
|
+
|
208
|
+
##
|
209
|
+
# Path to the browse file in the project
|
210
|
+
def browse_file(p, file)
|
211
|
+
File.join(p.path, 'browse', file)
|
212
|
+
end
|
213
|
+
end
|
@@ -42,7 +42,9 @@ class MiGA::Cli::Action::ClassifyWf < MiGA::Cli::Action
|
|
42
42
|
'--no-summaries',
|
43
43
|
'Do not generate intermediate step summaries'
|
44
44
|
) { |v| cli[:summaries] = v }
|
45
|
-
opts_for_wf(
|
45
|
+
opts_for_wf(
|
46
|
+
opt, 'Input genome assemblies (nucleotides, FastA)', qual: false
|
47
|
+
)
|
46
48
|
end
|
47
49
|
end
|
48
50
|
|
@@ -19,6 +19,10 @@ class MiGA::Cli::Action::DerepWf < MiGA::Cli::Action
|
|
19
19
|
'Use Average Amino Acid Identity (AAI) as genome similarity metric',
|
20
20
|
'By default: Use Average Nucleotide Identity (ANI)'
|
21
21
|
) { cli[:metric] = :aai }
|
22
|
+
opt.on(
|
23
|
+
'--ani',
|
24
|
+
'Use Average Nucleotide Identity (ANI) as similarity metric (default)'
|
25
|
+
) { cli[:metric] = :ani }
|
22
26
|
opt.on(
|
23
27
|
'--threshold FLOAT', Float,
|
24
28
|
"Metric threshold (%) to dereplicate. By default: #{cli[:threshold]}"
|
data/lib/miga/cli/action/edit.rb
CHANGED
@@ -17,18 +17,21 @@ class MiGA::Cli::Action::Edit < MiGA::Cli::Action
|
|
17
17
|
'Activate dataset; requires -D'
|
18
18
|
) { |v| cli[:activate] = v }
|
19
19
|
opt.on(
|
20
|
-
'--inactivate',
|
21
|
-
'Inactivate dataset; requires -D'
|
22
|
-
|
20
|
+
'--inactivate [reason]',
|
21
|
+
'Inactivate dataset; requires -D',
|
22
|
+
'The argument is optional: reason to inactivate dataset'
|
23
|
+
) { |v| cli[:activate] = false ; cli[:reason] = v }
|
23
24
|
end
|
24
25
|
end
|
25
26
|
|
26
27
|
def perform
|
27
28
|
obj = cli.load_project_or_dataset
|
28
29
|
unless cli[:activate].nil?
|
29
|
-
cli.ensure_par(
|
30
|
-
|
31
|
-
|
30
|
+
cli.ensure_par(
|
31
|
+
{ dataset: '-D' },
|
32
|
+
'%<name>s is mandatory with --[in-]activate: please provide %<flag>s'
|
33
|
+
)
|
34
|
+
cli[:activate] ? obj.activate! : obj.inactivate!(cli[:reason])
|
32
35
|
end
|
33
36
|
cli.add_metadata(obj)
|
34
37
|
obj.save
|
@@ -15,7 +15,10 @@ class MiGA::Cli::Action::QualityWf < MiGA::Cli::Action
|
|
15
15
|
'-m', '--mytaxa-scan',
|
16
16
|
'Perform MyTaxa scan analysis'
|
17
17
|
) { |v| cli[:mytaxa] = v }
|
18
|
-
opts_for_wf(
|
18
|
+
opts_for_wf(
|
19
|
+
opt, 'Input genome assemblies (nucleotides, FastA)',
|
20
|
+
qual: false
|
21
|
+
)
|
19
22
|
end
|
20
23
|
end
|
21
24
|
|
@@ -14,7 +14,7 @@ class MiGA::Cli::Action::Stats < MiGA::Cli::Action
|
|
14
14
|
) { |v| cli[:key] = v }
|
15
15
|
opt.on(
|
16
16
|
'--compute-and-save',
|
17
|
-
'Compute and
|
17
|
+
'Compute and save the statistics'
|
18
18
|
) { |v| cli[:compute] = v }
|
19
19
|
opt.on(
|
20
20
|
'--try-load',
|
@@ -38,7 +38,7 @@ class MiGA::Cli::Action::Stats < MiGA::Cli::Action
|
|
38
38
|
end
|
39
39
|
if cli[:key].nil?
|
40
40
|
r[:stats].each do |k, v|
|
41
|
-
k_n = k
|
41
|
+
k_n = k.to_s.unmiga_name.sub(/^./, &:upcase)
|
42
42
|
cli.puts "#{k_n}: #{v.is_a?(Array) ? v.join(' ') : v}"
|
43
43
|
end
|
44
44
|
else
|
@@ -26,6 +26,10 @@ class MiGA::Cli::Action::Summary < MiGA::Cli::Action
|
|
26
26
|
'--with-units',
|
27
27
|
'Include units in each cell'
|
28
28
|
) { |v| cli[:units] = v }
|
29
|
+
opt.on(
|
30
|
+
'--compute-and-save',
|
31
|
+
'Compute and save the statistics if not yet available'
|
32
|
+
) { |v| cli[:compute] = v }
|
29
33
|
end
|
30
34
|
end
|
31
35
|
|
@@ -34,7 +38,8 @@ class MiGA::Cli::Action::Summary < MiGA::Cli::Action
|
|
34
38
|
ds = cli.load_and_filter_datasets
|
35
39
|
cli.say 'Loading results'
|
36
40
|
stats = ds.map do |d|
|
37
|
-
r = d.
|
41
|
+
r = d.result(cli[:result])
|
42
|
+
r.compute_stats if cli[:compute] && !r.nil? && r[:stats].empty?
|
38
43
|
s = r.nil? ? {} : r[:stats]
|
39
44
|
s.tap { |i| i[:dataset] = d.name }
|
40
45
|
end
|
data/lib/miga/cli/action/wf.rb
CHANGED
@@ -15,7 +15,7 @@ module MiGA::Cli::Action::Wf
|
|
15
15
|
|
16
16
|
def opts_for_wf(opt, files_desc, params = {})
|
17
17
|
{
|
18
|
-
multi: false, cleanup: true, project_type: false, ncbi: true
|
18
|
+
multi: false, cleanup: true, project_type: false, ncbi: true, qual: true
|
19
19
|
}.each { |k, v| params[k] = v if params[k].nil? }
|
20
20
|
opt.on(
|
21
21
|
'-o', '--out_dir PATH',
|
@@ -40,6 +40,13 @@ module MiGA::Cli::Action::Wf
|
|
40
40
|
'Only download complete genomes, not drafts'
|
41
41
|
) { |v| cli[:ncbi_draft] = v }
|
42
42
|
end
|
43
|
+
if params[:qual]
|
44
|
+
opt.on(
|
45
|
+
'--min-qual FLOAT', Float,
|
46
|
+
'Minimum genome quality to include in analysis',
|
47
|
+
'By default: 50.0'
|
48
|
+
) { |v| cli[:min_qual] = v }
|
49
|
+
end
|
43
50
|
if params[:cleanup]
|
44
51
|
opt.on(
|
45
52
|
'-c', '--clean',
|
@@ -125,7 +132,7 @@ module MiGA::Cli::Action::Wf
|
|
125
132
|
]) unless MiGA::Project.exist? cli[:outdir]
|
126
133
|
# Define project metadata
|
127
134
|
p = cli.load_project(:outdir, '-o')
|
128
|
-
[
|
135
|
+
%i[haai_p aai_p ani_p ess_coll min_qual].each { |i| p_metadata[i] = cli[i] }
|
129
136
|
p_metadata[:type] = cli[:project_type]
|
130
137
|
transfer_metadata(p, p_metadata)
|
131
138
|
# Download datasets
|
@@ -159,9 +166,10 @@ module MiGA::Cli::Action::Wf
|
|
159
166
|
'-P', cli[:outdir],
|
160
167
|
'-r', r,
|
161
168
|
'-o', File.expand_path("#{r}.tsv", cli[:outdir]),
|
162
|
-
'--tab'
|
169
|
+
'--tab', '--ref', '--active'
|
163
170
|
])
|
164
171
|
end
|
172
|
+
call_cli(['browse', '-P', cli[:outdir]])
|
165
173
|
end
|
166
174
|
|
167
175
|
def cleanup
|
data/lib/miga/cli/base.rb
CHANGED
@@ -11,39 +11,40 @@ module MiGA::Cli::Base
|
|
11
11
|
preproc_wf: 'Preprocess input genomes or metagenomes',
|
12
12
|
index_wf: 'Generate distance indexing of input genomes',
|
13
13
|
# Projects
|
14
|
-
new: '
|
15
|
-
about: '
|
16
|
-
doctor: '
|
17
|
-
get_db: '
|
14
|
+
new: 'Create an empty MiGA project',
|
15
|
+
about: 'Display information about a MiGA project',
|
16
|
+
doctor: 'Perform consistency checks on a MiGA project',
|
17
|
+
get_db: 'Download a pre-indexed database',
|
18
|
+
browse: 'Explore a project locally using a web browser',
|
18
19
|
# Datasets
|
19
|
-
add: '
|
20
|
-
get: '
|
21
|
-
ncbi_get: '
|
22
|
-
rm: '
|
23
|
-
find: '
|
20
|
+
add: 'Create a dataset in a MiGA project',
|
21
|
+
get: 'Download a dataset from public databases into a MiGA project',
|
22
|
+
ncbi_get: 'Download all genomes in a taxon from NCBI into a MiGA project',
|
23
|
+
rm: 'Remove a dataset from an MiGA project',
|
24
|
+
find: 'Find unregistered datasets based on result files',
|
24
25
|
ln: 'Link datasets (including results) from one project to another',
|
25
|
-
ls: '
|
26
|
-
archive: '
|
26
|
+
ls: 'List all registered datasets in an MiGA project',
|
27
|
+
archive: 'Generate a tar-ball with all files from select datasets',
|
27
28
|
# Results
|
28
|
-
add_result: '
|
29
|
-
stats: '
|
30
|
-
files: '
|
31
|
-
run: '
|
32
|
-
summary: '
|
33
|
-
next_step: '
|
29
|
+
add_result: 'Register a result',
|
30
|
+
stats: 'Extract statistics for the given result',
|
31
|
+
files: 'List registered files from the results of a dataset or project',
|
32
|
+
run: 'Execute locally one step analysis producing the given result',
|
33
|
+
summary: 'Generate a summary table for the statistics of all datasets',
|
34
|
+
next_step: 'Return the next task to run in a dataset or project',
|
34
35
|
# Objects (Datasets or Projects)
|
35
|
-
edit: '
|
36
|
+
edit: 'Edit the metadata of a dataset or project',
|
36
37
|
# System
|
37
38
|
init: 'Initialize MiGA to process new projects',
|
38
|
-
daemon: '
|
39
|
-
lair: '
|
40
|
-
date: '
|
41
|
-
console: '
|
39
|
+
daemon: 'Control the daemon of a MiGA project',
|
40
|
+
lair: 'Control groups of daemons for several MiGA projects',
|
41
|
+
date: 'Return the current date in standard MiGA format',
|
42
|
+
console: 'Open an IRB console with MiGA',
|
42
43
|
# Taxonomy
|
43
|
-
tax_set: '
|
44
|
-
tax_test: '
|
45
|
-
tax_index: '
|
46
|
-
tax_dist: '
|
44
|
+
tax_set: 'Register taxonomic information for datasets',
|
45
|
+
tax_test: 'Return test of taxonomic distributions for query datasets',
|
46
|
+
tax_index: 'Create a taxonomy-indexed list of the datasets',
|
47
|
+
tax_dist: 'Estimate distributions of distance by taxonomy',
|
47
48
|
}
|
48
49
|
|
49
50
|
@@TASK_ALIAS = {
|
data/lib/miga/common/format.rb
CHANGED
@@ -68,15 +68,20 @@ module MiGA::Common::Format
|
|
68
68
|
# a FastA or FastQ file (supports gzipped files). The +format+ must be a
|
69
69
|
# Symbol, one of +:fasta+ or +:fastq+. Additional estimations can be
|
70
70
|
# controlled via the +opts+ Hash. Supported options include:
|
71
|
-
# - +:n50+:
|
72
|
-
# - +:gc+:
|
73
|
-
# - +:x+:
|
71
|
+
# - +:n50+: Include the N50 and the median (in bp)
|
72
|
+
# - +:gc+: Include the G+C content (in %)
|
73
|
+
# - +:x+: Include the undetermined bases content (in %)
|
74
|
+
# - +:skew+: Include G-C and A-T sequence skew (in %; forces gc: true).
|
75
|
+
# See definition used here in DOI:10.1177/117693430700300006
|
74
76
|
def seqs_length(file, format, opts = {})
|
77
|
+
opts[:gc] = true if opts[:skew]
|
75
78
|
fh = file =~ /\.gz/ ? Zlib::GzipReader.open(file) : File.open(file, 'r')
|
76
79
|
l = []
|
77
80
|
gc = 0
|
78
81
|
xn = 0
|
79
|
-
|
82
|
+
t = 0
|
83
|
+
c = 0
|
84
|
+
i = 0 # <- Zlib::GzipReader doesn't set `$.`
|
80
85
|
fh.each_line do |ln|
|
81
86
|
i += 1
|
82
87
|
if (format == :fasta and ln =~ /^>/) or
|
@@ -86,16 +91,27 @@ module MiGA::Common::Format
|
|
86
91
|
l[l.size - 1] += ln.chomp.size
|
87
92
|
gc += ln.scan(/[GCgc]/).count if opts[:gc]
|
88
93
|
xn += ln.scan(/[XNxn]/).count if opts[:x]
|
94
|
+
if opts[:skew]
|
95
|
+
t += ln.scan(/[Tt]/).count
|
96
|
+
c += ln.scan(/[Cc]/).count
|
97
|
+
end
|
89
98
|
end
|
90
99
|
end
|
91
100
|
fh.close
|
92
101
|
|
93
|
-
o = { n: l.size, tot: l.inject(:+), max: l.max }
|
102
|
+
o = { n: l.size, tot: l.inject(0, :+), max: l.max }
|
103
|
+
return o if o[:tot].zero?
|
94
104
|
o[:avg] = o[:tot].to_f / l.size
|
95
105
|
o[:var] = l.map { |a| a**2 }.inject(:+).to_f / l.size - o[:avg]**2
|
96
106
|
o[:sd] = Math.sqrt o[:var]
|
97
107
|
o[:gc] = 100.0 * gc / o[:tot] if opts[:gc]
|
98
108
|
o[:x] = 100.0 * xn / o[:tot] if opts[:x]
|
109
|
+
if opts[:skew]
|
110
|
+
at = o[:tot] - gc
|
111
|
+
o[:at_skew] = 100.0 * (2 * t - at) / at
|
112
|
+
o[:gc_skew] = 100.0 * (2 * c - gc) / gc
|
113
|
+
end
|
114
|
+
|
99
115
|
if opts[:n50]
|
100
116
|
l.sort!
|
101
117
|
thr = o[:tot] / 2
|
@@ -106,7 +122,8 @@ module MiGA::Common::Format
|
|
106
122
|
break if pos >= thr
|
107
123
|
end
|
108
124
|
o[:med] = o[:n].even? ?
|
109
|
-
0.5 * l[o[:n] / 2 - 1, 2].inject(:+) :
|
125
|
+
0.5 * l[o[:n] / 2 - 1, 2].inject(:+) :
|
126
|
+
l[(o[:n] - 1) / 2]
|
110
127
|
end
|
111
128
|
o
|
112
129
|
end
|
@@ -130,9 +147,14 @@ class String
|
|
130
147
|
end
|
131
148
|
|
132
149
|
##
|
133
|
-
# Replace underscores by spaces or
|
150
|
+
# Replace underscores by spaces or other symbols depending on context
|
134
151
|
def unmiga_name
|
135
|
-
gsub(/_(str|sp|subsp|pv)__/, '_\\1._')
|
152
|
+
gsub(/_(str|sp|subsp|pv)__/, '_\\1._')
|
153
|
+
.gsub(/g_c_(content)/, 'G+C \\1')
|
154
|
+
.gsub(/g_c_(skew)/, 'G-C \\1')
|
155
|
+
.gsub(/a_t_(skew)/, 'A-T \\1')
|
156
|
+
.gsub(/x_content/, &:capitalize)
|
157
|
+
.tr('_', ' ')
|
136
158
|
end
|
137
159
|
|
138
160
|
##
|
data/lib/miga/daemon.rb
CHANGED
@@ -72,6 +72,7 @@ class MiGA::Daemon < MiGA::MiGA
|
|
72
72
|
say '-----------------------------------'
|
73
73
|
say 'MiGA:%s launched' % project.name
|
74
74
|
say '-----------------------------------'
|
75
|
+
recalculate_status!
|
75
76
|
load_status
|
76
77
|
say 'Configuration options:'
|
77
78
|
say @runopts.to_s
|
@@ -99,6 +100,7 @@ class MiGA::Daemon < MiGA::MiGA
|
|
99
100
|
end
|
100
101
|
|
101
102
|
def recalculate_status!
|
103
|
+
say 'Recalculating status for all datasets'
|
102
104
|
project.each_dataset(&:recalculate_status)
|
103
105
|
end
|
104
106
|
|
@@ -158,8 +160,8 @@ class MiGA::Daemon < MiGA::MiGA
|
|
158
160
|
end
|
159
161
|
|
160
162
|
##
|
161
|
-
# Traverse datasets, and returns boolean indicating if at any
|
162
|
-
# are incomplete
|
163
|
+
# Traverse datasets, and returns boolean indicating if at any reference
|
164
|
+
# datasets are incomplete
|
163
165
|
def check_datasets
|
164
166
|
l_say(2, 'Checking datasets')
|
165
167
|
o = false
|
@@ -167,7 +169,7 @@ class MiGA::Daemon < MiGA::MiGA
|
|
167
169
|
next unless ds.status == :incomplete
|
168
170
|
next if ds.next_preprocessing(false).nil?
|
169
171
|
|
170
|
-
o = true
|
172
|
+
o = true if ds.ref?
|
171
173
|
queue_job(:d, ds)
|
172
174
|
end
|
173
175
|
o
|
@@ -183,7 +185,7 @@ class MiGA::Daemon < MiGA::MiGA
|
|
183
185
|
return if project.dataset_names.empty?
|
184
186
|
|
185
187
|
# Double-check if all datasets are ready
|
186
|
-
return unless project.done_preprocessing?
|
188
|
+
return unless project.done_preprocessing?
|
187
189
|
|
188
190
|
# Queue project-level job
|
189
191
|
to_run = project.next_task(nil, false)
|
data/lib/miga/dataset.rb
CHANGED
@@ -97,7 +97,10 @@ class MiGA::Dataset < MiGA::MiGA
|
|
97
97
|
|
98
98
|
##
|
99
99
|
# Inactivate a dataset. This halts automated processing by the daemon
|
100
|
-
|
100
|
+
#
|
101
|
+
# If given, the +reason+ string is saved as a metadata +:warn+ entry
|
102
|
+
def inactivate!(reason = nil)
|
103
|
+
metadata[:warn] = "Inactive: #{reason}" unless reason.nil?
|
101
104
|
metadata[:inactive] = true
|
102
105
|
metadata.save
|
103
106
|
pull_hook :on_inactivate
|
@@ -107,6 +110,7 @@ class MiGA::Dataset < MiGA::MiGA
|
|
107
110
|
# Activate a dataset. This removes the +:inactive+ flag
|
108
111
|
def activate!
|
109
112
|
metadata[:inactive] = nil
|
113
|
+
metadata[:warn] = nil if metadata[:warn] && metadata[:warn] =~ /^Inactive: /
|
110
114
|
metadata.save
|
111
115
|
pull_hook :on_activate
|
112
116
|
end
|
data/lib/miga/dataset/base.rb
CHANGED
@@ -35,8 +35,8 @@ module MiGA::Dataset::Base
|
|
35
35
|
mytaxa: '07.annotation/02.taxonomy/01.mytaxa',
|
36
36
|
mytaxa_scan: '07.annotation/03.qa/02.mytaxa_scan',
|
37
37
|
# Distances (for single-species datasets)
|
38
|
-
distances: '09.distances',
|
39
38
|
taxonomy: '09.distances/05.taxonomy',
|
39
|
+
distances: '09.distances',
|
40
40
|
# General statistics
|
41
41
|
stats: '90.stats'
|
42
42
|
}
|
@@ -66,7 +66,7 @@ module MiGA::Dataset::Base
|
|
66
66
|
@@PREPROCESSING_TASKS = [
|
67
67
|
:raw_reads, :trimmed_reads, :read_quality, :trimmed_fasta,
|
68
68
|
:assembly, :cds, :essential_genes, :ssu, :mytaxa, :mytaxa_scan,
|
69
|
-
:
|
69
|
+
:taxonomy, :distances, :stats
|
70
70
|
]
|
71
71
|
|
72
72
|
##
|
@@ -77,7 +77,7 @@ module MiGA::Dataset::Base
|
|
77
77
|
##
|
78
78
|
# Tasks to be executed only in datasets that are not multi-organism. These
|
79
79
|
# tasks are ignored for multi-organism datasets or for unknown types.
|
80
|
-
@@ONLY_NONMULTI_TASKS = [:mytaxa_scan, :
|
80
|
+
@@ONLY_NONMULTI_TASKS = [:mytaxa_scan, :taxonomy, :distances]
|
81
81
|
@@_ONLY_NONMULTI_TASKS_H = Hash[@@ONLY_NONMULTI_TASKS.map { |i| [i, true] }]
|
82
82
|
|
83
83
|
##
|
data/lib/miga/dataset/hooks.rb
CHANGED
@@ -52,15 +52,15 @@ module MiGA::Dataset::Hooks
|
|
52
52
|
end
|
53
53
|
|
54
54
|
##
|
55
|
-
# Run +cmd+ in the command-line with {{variables}}:
|
56
|
-
# object (
|
55
|
+
# Run +cmd+ in the command-line with {{variables}}:
|
56
|
+
# dataset, project, project_name, miga, object (if defined for the event)
|
57
57
|
# - +hook_args+: +[cmd]+
|
58
58
|
# - +event_args+: +[object (optional)]+
|
59
59
|
def hook_run_cmd(hook_args, event_args)
|
60
60
|
Process.wait(
|
61
61
|
spawn hook_args.first.miga_variables(
|
62
|
-
dataset: name, project: project.path,
|
63
|
-
object: event_args.first
|
62
|
+
dataset: name, project: project.path, project_name: project.name,
|
63
|
+
miga: MiGA::MiGA.root_path, object: event_args.first
|
64
64
|
)
|
65
65
|
)
|
66
66
|
end
|
data/lib/miga/dataset/result.rb
CHANGED
@@ -26,15 +26,24 @@ module MiGA::Dataset::Result
|
|
26
26
|
# The values are symbols:
|
27
27
|
# - empty: the dataset has no data
|
28
28
|
# - inactive: the dataset is inactive
|
29
|
+
# - upstream: the task is upstream from dataset's input
|
29
30
|
# - force: forced to ignore by metadata
|
30
31
|
# - project: incompatible project
|
31
32
|
# - noref: incompatible dataset, only for reference
|
32
33
|
# - multi: incompatible dataset, only for multi
|
33
34
|
# - nonmulti: incompatible dataset, only for nonmulti
|
35
|
+
# - complete: the task is already complete
|
34
36
|
# - execute: do not ignore, execute the task
|
35
37
|
def why_ignore(task)
|
36
|
-
if !
|
38
|
+
if !get_result(task).nil?
|
39
|
+
:complete
|
40
|
+
elsif !active?
|
37
41
|
:inactive
|
42
|
+
elsif first_preprocessing.nil?
|
43
|
+
:empty
|
44
|
+
elsif @@PREPROCESSING_TASKS.index(task) <
|
45
|
+
@@PREPROCESSING_TASKS.index(first_preprocessing)
|
46
|
+
:upstream
|
38
47
|
elsif !metadata["run_#{task}"].nil?
|
39
48
|
metadata["run_#{task}"] ? :execute : :force
|
40
49
|
elsif task == :taxonomy && project.metadata[:ref_project].nil?
|
@@ -56,7 +65,7 @@ module MiGA::Dataset::Result
|
|
56
65
|
# initial input. Passes +save+ to #add_result.
|
57
66
|
def first_preprocessing(save = false)
|
58
67
|
@first_processing ||= @@PREPROCESSING_TASKS.find do |t|
|
59
|
-
!
|
68
|
+
!add_result(t, save).nil?
|
60
69
|
end
|
61
70
|
end
|
62
71
|
|
@@ -70,7 +79,7 @@ module MiGA::Dataset::Result
|
|
70
79
|
false
|
71
80
|
elsif add_result(t, save).nil?
|
72
81
|
if (metadata["_try_#{t}"] || 0) > (project.metadata[:max_try] || 10)
|
73
|
-
inactivate!
|
82
|
+
inactivate! "Too many errors in step #{t}"
|
74
83
|
false
|
75
84
|
else
|
76
85
|
true
|
@@ -121,17 +130,12 @@ module MiGA::Dataset::Result
|
|
121
130
|
# - complete: a task with registered results
|
122
131
|
# - pending: a task queued to be performed
|
123
132
|
def result_status(task)
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
:-
|
131
|
-
elsif ignore_task?(task)
|
132
|
-
:"ignore_#{why_ignore task}"
|
133
|
-
else
|
134
|
-
:pending
|
133
|
+
reason = why_ignore(task)
|
134
|
+
case reason
|
135
|
+
when :upstream; :-
|
136
|
+
when :execute; :pending
|
137
|
+
when :complete; :complete
|
138
|
+
else; :"ignore_#{reason}"
|
135
139
|
end
|
136
140
|
end
|
137
141
|
|
data/lib/miga/lair.rb
CHANGED
data/lib/miga/project/dataset.rb
CHANGED
@@ -134,12 +134,10 @@ module MiGA::Project::Dataset
|
|
134
134
|
##
|
135
135
|
# Are all the datasets in the project preprocessed? Save intermediate results
|
136
136
|
# if +save+ (until the first incomplete dataset is reached).
|
137
|
-
def done_preprocessing?(save =
|
138
|
-
|
139
|
-
|
140
|
-
return false if ds.is_ref? and not ds.done_preprocessing?(save)
|
137
|
+
def done_preprocessing?(save = false)
|
138
|
+
!each_dataset.any? do |d|
|
139
|
+
d.ref? && d.active? && !d.done_preprocessing?(save)
|
141
140
|
end
|
142
|
-
true
|
143
141
|
end
|
144
142
|
|
145
143
|
##
|
data/lib/miga/project/hooks.rb
CHANGED
@@ -26,14 +26,15 @@ module MiGA::Project::Hooks
|
|
26
26
|
end
|
27
27
|
|
28
28
|
##
|
29
|
-
# Run +cmd+ in the command-line with {{variables}}:
|
30
|
-
# object (
|
29
|
+
# Run +cmd+ in the command-line with {{variables}}:
|
30
|
+
# project, project_name, miga, object (if defined by the event)
|
31
31
|
# - +hook_args+: +[cmd]+
|
32
32
|
# - +event_args+: +[object (optional)]+
|
33
33
|
def hook_run_cmd(hook_args, event_args)
|
34
34
|
Process.wait(
|
35
35
|
spawn hook_args.first.miga_variables(
|
36
|
-
project: path,
|
36
|
+
project: path, project_name: name,
|
37
|
+
miga: MiGA::MiGA.root_path, object: event_args.first
|
37
38
|
)
|
38
39
|
)
|
39
40
|
end
|
@@ -94,12 +94,13 @@ class MiGA::RemoteDataset
|
|
94
94
|
@timeout_try = 0
|
95
95
|
begin
|
96
96
|
DEBUG 'GET: ' + url
|
97
|
-
open(
|
97
|
+
URI.parse(url).open(read_timeout: 600) { |f| doc = f.read }
|
98
98
|
rescue => e
|
99
99
|
@timeout_try += 1
|
100
100
|
raise e if @timeout_try >= 3
|
101
101
|
|
102
102
|
sleep 5 # <- For: 429 Too Many Requests
|
103
|
+
DEBUG "RETRYING after: #{e}"
|
103
104
|
retry
|
104
105
|
end
|
105
106
|
doc
|
data/lib/miga/result.rb
CHANGED
@@ -164,7 +164,9 @@ class MiGA::Result < MiGA::MiGA
|
|
164
164
|
# Unlink result by removing the .done and .start timestamps and the
|
165
165
|
# .json descriptor, but don't remove any other associated files
|
166
166
|
def unlink
|
167
|
-
%i(start done).each
|
167
|
+
%i(start done).each do |i|
|
168
|
+
f = path(i) and File.exists?(f) and File.unlink(f)
|
169
|
+
end
|
168
170
|
File.unlink path
|
169
171
|
end
|
170
172
|
|
data/lib/miga/result/stats.rb
CHANGED
@@ -8,6 +8,7 @@ module MiGA::Result::Stats
|
|
8
8
|
# (Re-)calculate and save the statistics for the result
|
9
9
|
def compute_stats
|
10
10
|
method = :"compute_stats_#{key}"
|
11
|
+
MiGA::MiGA.DEBUG "Result(#{key}).compute_stats"
|
11
12
|
stats = self.respond_to?(method, true) ? send(method) : nil
|
12
13
|
unless stats.nil?
|
13
14
|
self[:stats] = stats
|
@@ -16,32 +17,45 @@ module MiGA::Result::Stats
|
|
16
17
|
self[:stats]
|
17
18
|
end
|
18
19
|
|
20
|
+
##
|
21
|
+
# Access the stats entry of results
|
22
|
+
def stats
|
23
|
+
self[:stats]
|
24
|
+
end
|
25
|
+
|
19
26
|
private
|
20
27
|
|
21
28
|
def compute_stats_raw_reads
|
22
29
|
stats = {}
|
30
|
+
seq_opts = { gc: true, x: true, skew: true }
|
23
31
|
if self[:files][:pair1].nil?
|
24
|
-
s = MiGA::MiGA.seqs_length(file_path(:single), :fastq,
|
32
|
+
s = MiGA::MiGA.seqs_length(file_path(:single), :fastq, seq_opts)
|
25
33
|
stats = {
|
26
34
|
reads: s[:n],
|
27
35
|
length_average: [s[:avg], 'bp'],
|
28
36
|
length_standard_deviation: [s[:sd], 'bp'],
|
29
37
|
g_c_content: [s[:gc], '%'],
|
30
|
-
x_content: [s[:x], '%']
|
38
|
+
x_content: [s[:x], '%'],
|
39
|
+
g_c_skew: [s[:gc_skew], '%'],
|
40
|
+
a_t_skew: [s[:at_skew], '%']
|
31
41
|
}
|
32
42
|
else
|
33
|
-
s1 = MiGA::MiGA.seqs_length(file_path(:pair1), :fastq,
|
34
|
-
s2 = MiGA::MiGA.seqs_length(file_path(:pair2), :fastq,
|
43
|
+
s1 = MiGA::MiGA.seqs_length(file_path(:pair1), :fastq, seq_opts)
|
44
|
+
s2 = MiGA::MiGA.seqs_length(file_path(:pair2), :fastq, seq_opts)
|
35
45
|
stats = {
|
36
46
|
read_pairs: s1[:n],
|
37
47
|
forward_length_average: [s1[:avg], 'bp'],
|
38
48
|
forward_length_standard_deviation: [s1[:sd], 'bp'],
|
39
49
|
forward_g_c_content: [s1[:gc], '%'],
|
40
50
|
forward_x_content: [s1[:x], '%'],
|
51
|
+
forward_g_c_skew: [s1[:gc_skew], '%'],
|
52
|
+
forward_a_t_skew: [s1[:at_skew], '%'],
|
41
53
|
reverse_length_average: [s2[:avg], 'bp'],
|
42
54
|
reverse_length_standard_deviation: [s2[:sd], 'bp'],
|
43
55
|
reverse_g_c_content: [s2[:gc], '%'],
|
44
|
-
reverse_x_content: [s2[:x], '%']
|
56
|
+
reverse_x_content: [s2[:x], '%'],
|
57
|
+
reverse_g_c_skew: [s2[:gc_skew], '%'],
|
58
|
+
reverse_a_t_skew: [s2[:at_skew], '%']
|
45
59
|
}
|
46
60
|
end
|
47
61
|
stats
|
@@ -49,19 +63,22 @@ module MiGA::Result::Stats
|
|
49
63
|
|
50
64
|
def compute_stats_trimmed_fasta
|
51
65
|
f = self[:files][:coupled].nil? ? file_path(:single) : file_path(:coupled)
|
52
|
-
s = MiGA::MiGA.seqs_length(f, :fasta, gc: true, x: true)
|
66
|
+
s = MiGA::MiGA.seqs_length(f, :fasta, gc: true, x: true, skew: true)
|
53
67
|
{
|
54
68
|
reads: s[:n],
|
55
69
|
length_average: [s[:avg], 'bp'],
|
56
70
|
length_standard_deviation: [s[:sd], 'bp'],
|
57
71
|
g_c_content: [s[:gc], '%'],
|
58
|
-
x_content: [s[:x], '%']
|
72
|
+
x_content: [s[:x], '%'],
|
73
|
+
g_c_skew: [s[:gc_skew], '%'],
|
74
|
+
a_t_skew: [s[:at_skew], '%']
|
59
75
|
}
|
60
76
|
end
|
61
77
|
|
62
78
|
def compute_stats_assembly
|
63
79
|
s = MiGA::MiGA.seqs_length(
|
64
|
-
file_path(:largecontigs), :fasta,
|
80
|
+
file_path(:largecontigs), :fasta,
|
81
|
+
n50: true, gc: true, x: true, skew: true
|
65
82
|
)
|
66
83
|
{
|
67
84
|
contigs: s[:n],
|
@@ -69,7 +86,9 @@ module MiGA::Result::Stats
|
|
69
86
|
total_length: [s[:tot], 'bp'],
|
70
87
|
longest_sequence: [s[:max], 'bp'],
|
71
88
|
g_c_content: [s[:gc], '%'],
|
72
|
-
x_content: [s[:x], '%']
|
89
|
+
x_content: [s[:x], '%'],
|
90
|
+
g_c_skew: [s[:gc_skew], '%'],
|
91
|
+
a_t_skew: [s[:at_skew], '%']
|
73
92
|
}
|
74
93
|
end
|
75
94
|
|
@@ -109,20 +128,8 @@ module MiGA::Result::Stats
|
|
109
128
|
end
|
110
129
|
end
|
111
130
|
else
|
112
|
-
#
|
113
|
-
|
114
|
-
%w[Archaea Bacteria].include?(tax[:d]) &&
|
115
|
-
file_path(:raw_report).nil?
|
116
|
-
scr = "#{MiGA::MiGA.root_path}/utils/domain-ess-genes.rb"
|
117
|
-
rep = file_path(:report)
|
118
|
-
rc_p = File.expand_path('.miga_rc', ENV['HOME'])
|
119
|
-
rc = File.exist?(rc_p) ? ". '#{rc_p}' && " : ''
|
120
|
-
$stderr.print `#{rc} ruby '#{scr}' \
|
121
|
-
'#{rep}' '#{rep}.domain' '#{tax[:d][0]}'`
|
122
|
-
add_file(:raw_report, "#{source.name}.ess/log")
|
123
|
-
add_file(:report, "#{source.name}.ess/log.domain")
|
124
|
-
end
|
125
|
-
# Extract/compute quality values
|
131
|
+
# Estimate quality metrics
|
132
|
+
fix_essential_genes_by_domain
|
126
133
|
stats = { completeness: [0.0, '%'], contamination: [0.0, '%'] }
|
127
134
|
File.open(file_path(:report), 'r') do |fh|
|
128
135
|
fh.each_line do |ln|
|
@@ -131,6 +138,8 @@ module MiGA::Result::Stats
|
|
131
138
|
end
|
132
139
|
end
|
133
140
|
end
|
141
|
+
|
142
|
+
# Determine qualitative range
|
134
143
|
stats[:quality] = stats[:completeness][0] - stats[:contamination][0] * 5
|
135
144
|
source.metadata[:quality] =
|
136
145
|
case stats[:quality]
|
@@ -140,6 +149,12 @@ module MiGA::Result::Stats
|
|
140
149
|
else; :low
|
141
150
|
end
|
142
151
|
source.save
|
152
|
+
|
153
|
+
# Inactivate low-quality datasets
|
154
|
+
min_qual = (project.metadata[:min_qual] || 25)
|
155
|
+
if min_qual != 'no' && stats[:quality] < min_qual
|
156
|
+
source.inactivate! 'Low quality genome'
|
157
|
+
end
|
143
158
|
end
|
144
159
|
stats
|
145
160
|
end
|
@@ -175,4 +190,21 @@ module MiGA::Result::Stats
|
|
175
190
|
end
|
176
191
|
stats
|
177
192
|
end
|
193
|
+
|
194
|
+
# Fix estimates based on essential genes based on taxonomy
|
195
|
+
def fix_essential_genes_by_domain
|
196
|
+
return if (tax = source.metadata[:tax]).nil? ||
|
197
|
+
!%w[Archaea Bacteria].include?(tax[:d]) ||
|
198
|
+
file_path(:raw_report)
|
199
|
+
|
200
|
+
MiGA::MiGA.DEBUG "Fixing essential genes by domain"
|
201
|
+
scr = "#{MiGA::MiGA.root_path}/utils/domain-ess-genes.rb"
|
202
|
+
rep = file_path(:report)
|
203
|
+
rc_p = File.expand_path('.miga_rc', ENV['HOME'])
|
204
|
+
rc = File.exist?(rc_p) ? ". '#{rc_p}' && " : ''
|
205
|
+
$stderr.print `#{rc} ruby '#{scr}' \
|
206
|
+
'#{rep}' '#{rep}.domain' '#{tax[:d][0]}'`
|
207
|
+
add_file(:raw_report, "#{source.name}.ess/log")
|
208
|
+
add_file(:report, "#{source.name}.ess/log.domain")
|
209
|
+
end
|
178
210
|
end
|
data/lib/miga/version.rb
CHANGED
@@ -8,7 +8,7 @@ module MiGA
|
|
8
8
|
# - Float representing the major.minor version.
|
9
9
|
# - Integer representing gem releases of the current version.
|
10
10
|
# - Integer representing minor changes that require new version number.
|
11
|
-
VERSION = [0.7,
|
11
|
+
VERSION = [0.7, 12, 0]
|
12
12
|
|
13
13
|
##
|
14
14
|
# Nickname for the current major.minor version.
|
@@ -16,7 +16,7 @@ module MiGA
|
|
16
16
|
|
17
17
|
##
|
18
18
|
# Date of the current gem release.
|
19
|
-
VERSION_DATE = Date.new(2020,
|
19
|
+
VERSION_DATE = Date.new(2020, 7, 22)
|
20
20
|
|
21
21
|
##
|
22
22
|
# Reference of MiGA.
|
data/scripts/cds.bash
CHANGED
@@ -20,7 +20,6 @@ fi
|
|
20
20
|
TYPE=$(miga ls -P "$PROJECT" -D "$DATASET" -m type | cut -f 2)
|
21
21
|
case "$TYPE" in
|
22
22
|
metagenome|virome)
|
23
|
-
$CMD -p meta
|
24
23
|
prodigal -a "${DATASET}.faa" -d "${DATASET}.fna" -o "${DATASET}.gff3" \
|
25
24
|
-f gff -q -i "../05.assembly/${DATASET}.LargeContigs.fna" -p meta
|
26
25
|
;;
|
data/scripts/distances.bash
CHANGED
@@ -9,7 +9,12 @@ cd "$PROJECT/data/09.distances"
|
|
9
9
|
# Initialize
|
10
10
|
miga date > "$DATASET.start"
|
11
11
|
|
12
|
-
#
|
12
|
+
# Check quality first
|
13
|
+
miga stats -P "$PROJECT" -D "$DATASET" -r essential_genes --compute-and-save
|
14
|
+
inactive=$(miga ls -P "$PROJECT" -D "$DATASET" -m inactive | cut -f 2)
|
15
|
+
[[ "$inactive" == "true" ]] && exit
|
16
|
+
|
17
|
+
# Run distances
|
13
18
|
ruby -I "$MIGA/lib" "$MIGA/utils/distances.rb" "$PROJECT" "$DATASET"
|
14
19
|
|
15
20
|
# Finalize
|
data/test/daemon_test.rb
CHANGED
@@ -93,7 +93,7 @@ class DaemonTest < Test::Unit::TestCase
|
|
93
93
|
0 => /-{20}\n/,
|
94
94
|
1 => /MiGA:#{p.name} launched/,
|
95
95
|
2 => /-{20}\n/,
|
96
|
-
|
96
|
+
6 => /Probing running jobs\n/
|
97
97
|
}.each { |k, v| assert_match(v, l[k], "unexpected line: #{k}") }
|
98
98
|
ensure
|
99
99
|
begin
|
data/test/dataset_test.rb
CHANGED
@@ -185,11 +185,13 @@ class DatasetTest < Test::Unit::TestCase
|
|
185
185
|
d = dataset
|
186
186
|
assert_equal(:incomplete, d.status)
|
187
187
|
assert_predicate(d, :active?)
|
188
|
-
d.inactivate!
|
188
|
+
d.inactivate! 'Too annoying'
|
189
189
|
assert_equal(:inactive, d.status)
|
190
|
+
assert_equal('Inactive: Too annoying', d.metadata[:warn])
|
190
191
|
assert_not_predicate(d, :active?)
|
191
192
|
d.activate!
|
192
193
|
assert_equal(:incomplete, d.status)
|
194
|
+
assert_nil(d.metadata[:warn])
|
193
195
|
assert_predicate(d, :active?)
|
194
196
|
end
|
195
197
|
|
data/test/project_test.rb
CHANGED
@@ -108,7 +108,7 @@ class ProjectTest < Test::Unit::TestCase
|
|
108
108
|
d1 = p1.add_dataset('BAH')
|
109
109
|
assert_not_predicate(p1, :done_preprocessing?)
|
110
110
|
FileUtils.touch(File.join(p1.path, 'data', '90.stats', "#{d1.name}.done"))
|
111
|
-
|
111
|
+
assert { p1.done_preprocessing? true }
|
112
112
|
assert_nil(p1.next_inclade)
|
113
113
|
p1.metadata[:type] = :clade
|
114
114
|
assert_equal(:subclades, p1.next_inclade)
|
data/test/remote_dataset_test.rb
CHANGED
@@ -101,7 +101,7 @@ class RemoteDatasetTest < Test::Unit::TestCase
|
|
101
101
|
|
102
102
|
def test_ref_type_status
|
103
103
|
declare_remote_access
|
104
|
-
rd = MiGA::RemoteDataset.new('
|
104
|
+
rd = MiGA::RemoteDataset.new('GCA_003144295.1', :assembly, :ncbi)
|
105
105
|
assert { !rd.get_metadata[:is_type] }
|
106
106
|
assert { rd.get_metadata[:is_ref_type] }
|
107
107
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.12.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-07-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|
@@ -118,6 +118,7 @@ files:
|
|
118
118
|
- lib/miga/cli/action/add.rb
|
119
119
|
- lib/miga/cli/action/add_result.rb
|
120
120
|
- lib/miga/cli/action/archive.rb
|
121
|
+
- lib/miga/cli/action/browse.rb
|
121
122
|
- lib/miga/cli/action/classify_wf.rb
|
122
123
|
- lib/miga/cli/action/console.rb
|
123
124
|
- lib/miga/cli/action/daemon.rb
|
@@ -529,7 +530,7 @@ homepage: http://enve-omics.ce.gatech.edu/miga
|
|
529
530
|
licenses:
|
530
531
|
- Artistic-2.0
|
531
532
|
metadata: {}
|
532
|
-
post_install_message:
|
533
|
+
post_install_message:
|
533
534
|
rdoc_options:
|
534
535
|
- lib
|
535
536
|
- README.md
|
@@ -550,8 +551,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
550
551
|
- !ruby/object:Gem::Version
|
551
552
|
version: '0'
|
552
553
|
requirements: []
|
553
|
-
rubygems_version: 3.
|
554
|
-
signing_key:
|
554
|
+
rubygems_version: 3.1.2
|
555
|
+
signing_key:
|
555
556
|
specification_version: 4
|
556
557
|
summary: MiGA
|
557
558
|
test_files: []
|