miga-base 0.7.2.0 → 0.7.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/common/with_daemon_class.rb +3 -1
- data/lib/miga/dataset.rb +33 -12
- data/lib/miga/dataset/hooks.rb +15 -4
- data/lib/miga/dataset/result.rb +9 -9
- data/lib/miga/dataset/status.rb +25 -0
- data/lib/miga/version.rb +2 -2
- data/test/dataset_test.rb +12 -0
- data/test/result_stats_test.rb +33 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5b233f892ba1294bd0959433c443944f267ff9b8c7ec4d220dc4bbacaca985a6
|
4
|
+
data.tar.gz: bdc51401c6680d63872e7aab594eab50dbc500e6662d244a6fa04f6b6ea2587d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 96bc61749ae2964656a9d82a2b5b0c74691513af237837960dfe146482a5691dadf7ea8aa958fb5ff35abd1e8ac829c447e219fdf330095e151efd4448470d73
|
7
|
+
data.tar.gz: 299a4806eea3364a0a64d86aa0eabfa5798f27801d7892241e13bc8ec36e0d3325cddc4eb321ec5b0826b498bb86bdf98494e1384e42cfa6441d97940c690b4f
|
@@ -27,6 +27,8 @@ module MiGA::Common::WithDaemonClass
|
|
27
27
|
f = alive_file(path)
|
28
28
|
f = terminated_file(path) unless File.exist? f
|
29
29
|
return nil unless File.exist? f
|
30
|
-
|
30
|
+
c = File.read(f)
|
31
|
+
return nil if c.nil? || c.empty?
|
32
|
+
Time.parse(c)
|
31
33
|
end
|
32
34
|
end
|
data/lib/miga/dataset.rb
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
|
4
4
|
require 'miga/metadata'
|
5
5
|
require 'miga/dataset/result'
|
6
|
+
require 'miga/dataset/status'
|
6
7
|
require 'miga/dataset/hooks'
|
7
8
|
require 'sqlite3'
|
8
9
|
|
@@ -10,15 +11,15 @@ require 'sqlite3'
|
|
10
11
|
# Dataset representation in MiGA.
|
11
12
|
class MiGA::Dataset < MiGA::MiGA
|
12
13
|
include MiGA::Dataset::Result
|
14
|
+
include MiGA::Dataset::Status
|
13
15
|
include MiGA::Dataset::Hooks
|
14
16
|
|
15
17
|
# Class-level
|
16
18
|
class << self
|
17
|
-
|
18
19
|
##
|
19
20
|
# Does the +project+ already have a dataset with that +name+?
|
20
21
|
def exist?(project, name)
|
21
|
-
|
22
|
+
!project.dataset_names_hash[name].nil?
|
22
23
|
end
|
23
24
|
|
24
25
|
##
|
@@ -26,7 +27,6 @@ class MiGA::Dataset < MiGA::MiGA
|
|
26
27
|
def INFO_FIELDS
|
27
28
|
%w(name created updated type ref user description comments)
|
28
29
|
end
|
29
|
-
|
30
30
|
end
|
31
31
|
|
32
32
|
# Instance-level
|
@@ -57,7 +57,10 @@ class MiGA::Dataset < MiGA::MiGA
|
|
57
57
|
File.expand_path("metadata/#{name}.json", project.path),
|
58
58
|
metadata
|
59
59
|
]
|
60
|
-
|
60
|
+
unless File.exist? @metadata_future[0]
|
61
|
+
save
|
62
|
+
pull_hook :on_create
|
63
|
+
end
|
61
64
|
end
|
62
65
|
|
63
66
|
##
|
@@ -117,32 +120,52 @@ class MiGA::Dataset < MiGA::MiGA
|
|
117
120
|
|
118
121
|
##
|
119
122
|
# Is this dataset a reference?
|
120
|
-
def
|
123
|
+
def ref? ; !!metadata[:ref] ; end
|
121
124
|
|
122
125
|
##
|
123
126
|
# Is this dataset a query (non-reference)?
|
124
|
-
def
|
127
|
+
def query? ; !metadata[:ref] ; end
|
125
128
|
|
126
129
|
##
|
127
130
|
# Is this dataset known to be multi-organism?
|
128
|
-
def
|
131
|
+
def multi?
|
129
132
|
return false if metadata[:type].nil? or @@KNOWN_TYPES[type].nil?
|
130
133
|
@@KNOWN_TYPES[type][:multi]
|
131
134
|
end
|
132
135
|
|
133
136
|
##
|
134
137
|
# Is this dataset known to be single-organism?
|
135
|
-
def
|
138
|
+
def nonmulti?
|
136
139
|
return false if metadata[:type].nil? or @@KNOWN_TYPES[type].nil?
|
137
140
|
!@@KNOWN_TYPES[type][:multi]
|
138
141
|
end
|
139
142
|
|
140
143
|
##
|
141
144
|
# Is this dataset active?
|
142
|
-
def
|
145
|
+
def active?
|
143
146
|
metadata[:inactive].nil? or !metadata[:inactive]
|
144
147
|
end
|
145
148
|
|
149
|
+
##
|
150
|
+
# Same as +ref?+ for backwards-compatibility
|
151
|
+
alias is_ref? ref?
|
152
|
+
|
153
|
+
##
|
154
|
+
# Same as +query?+ for backwards-compatibility
|
155
|
+
alias is_query? query?
|
156
|
+
|
157
|
+
##
|
158
|
+
# Same as +multi?+ for backwards-compatibility
|
159
|
+
alias is_multi? multi?
|
160
|
+
|
161
|
+
##
|
162
|
+
# Same as +is_nonmulti?+ for backwards-compatibility
|
163
|
+
alias is_nonmulti? nonmulti?
|
164
|
+
|
165
|
+
##
|
166
|
+
# Same as +active?+ for backwards-compatibility
|
167
|
+
alias is_active? active?
|
168
|
+
|
146
169
|
##
|
147
170
|
# Returns an Array of +how_many+ duples (Arrays) sorted by AAI:
|
148
171
|
# - +0+: A String with the name(s) of the reference dataset.
|
@@ -151,7 +174,7 @@ class MiGA::Dataset < MiGA::MiGA
|
|
151
174
|
# +ref_project+ is false (default), and only for reference dataset when
|
152
175
|
# +ref_project+ is true. It returns +nil+ if this analysis is not supported.
|
153
176
|
def closest_relatives(how_many = 1, ref_project = false)
|
154
|
-
return nil if (
|
177
|
+
return nil if (ref? != ref_project) or multi?
|
155
178
|
r = result(ref_project ? :taxonomy : :distances)
|
156
179
|
return nil if r.nil?
|
157
180
|
db = SQLite3::Database.new(r.file_path :aai_db)
|
@@ -159,6 +182,4 @@ class MiGA::Dataset < MiGA::MiGA
|
|
159
182
|
'SELECT seq2, aai FROM aai WHERE seq2 != ? ' \
|
160
183
|
'GROUP BY seq2 ORDER BY aai DESC LIMIT ?', [name, how_many])
|
161
184
|
end
|
162
|
-
|
163
185
|
end
|
164
|
-
|
data/lib/miga/dataset/hooks.rb
CHANGED
@@ -4,29 +4,34 @@ require 'miga/common/hooks'
|
|
4
4
|
##
|
5
5
|
# Helper module including specific functions to handle dataset hooks.
|
6
6
|
# Supported events:
|
7
|
+
# - on_create(): When first created
|
7
8
|
# - on_load(): When loaded
|
8
9
|
# - on_save(): When saved
|
9
10
|
# - on_remove(): When removed
|
10
|
-
# - on_inactivate(): When inactivated
|
11
11
|
# - on_activate(): When activated
|
12
|
+
# - on_inactivate(): When inactivated
|
12
13
|
# - on_result_ready(result): When any result is ready, with key +result+
|
13
14
|
# - on_result_ready_{result}(): When +result+ is ready
|
14
15
|
# - on_preprocessing_ready(): When preprocessing is complete
|
15
16
|
# Supported hooks:
|
16
17
|
# - run_lambda(lambda, args...)
|
18
|
+
# - recalculate_status()
|
17
19
|
# - clear_run_counts()
|
18
20
|
# - run_cmd(cmd)
|
19
21
|
# Internal hooks:
|
20
22
|
# - _pull_result_hooks()
|
21
23
|
module MiGA::Dataset::Hooks
|
22
|
-
|
23
24
|
include MiGA::Common::Hooks
|
24
25
|
|
26
|
+
##
|
27
|
+
# Dataset hooks triggered by default
|
25
28
|
def default_hooks
|
26
29
|
{
|
27
|
-
|
30
|
+
on_create: [[:recalculate_status]],
|
31
|
+
on_activate: [[:clear_run_counts], [:recalculate_status]],
|
32
|
+
on_inactivate: [[:recalculate_status]],
|
28
33
|
on_result_ready: [[:_pull_result_hooks]],
|
29
|
-
|
34
|
+
on_preprocessing_ready: [[:clear_run_counts], [:recalculate_status]],
|
30
35
|
}
|
31
36
|
end
|
32
37
|
|
@@ -40,6 +45,12 @@ module MiGA::Dataset::Hooks
|
|
40
45
|
save
|
41
46
|
end
|
42
47
|
|
48
|
+
##
|
49
|
+
# Recalculate the dataset status and save in metadata
|
50
|
+
def hook_recalculate_status(_hook_args, _event_args)
|
51
|
+
recalculate_status
|
52
|
+
end
|
53
|
+
|
43
54
|
##
|
44
55
|
# Run +cmd+ in the command-line with {{variables}}: dataset, project, miga,
|
45
56
|
# object (as defined for the event, if any)
|
data/lib/miga/dataset/result.rb
CHANGED
@@ -33,17 +33,17 @@ module MiGA::Dataset::Result
|
|
33
33
|
# - nonmulti: incompatible dataset, only for nonmulti
|
34
34
|
# - execute: do not ignore, execute the task
|
35
35
|
def why_ignore(task)
|
36
|
-
if !
|
36
|
+
if !active?
|
37
37
|
:inactive
|
38
38
|
elsif !metadata["run_#{task}"].nil?
|
39
39
|
metadata["run_#{task}"] ? :execute : :force
|
40
40
|
elsif task == :taxonomy && project.metadata[:ref_project].nil?
|
41
41
|
:project
|
42
|
-
elsif @@_EXCLUDE_NOREF_TASKS_H[task] && !
|
42
|
+
elsif @@_EXCLUDE_NOREF_TASKS_H[task] && !ref?
|
43
43
|
:noref
|
44
|
-
elsif @@_ONLY_MULTI_TASKS_H[task] && !
|
44
|
+
elsif @@_ONLY_MULTI_TASKS_H[task] && !multi?
|
45
45
|
:multi
|
46
|
-
elsif @@_ONLY_NONMULTI_TASKS_H[task] && !
|
46
|
+
elsif @@_ONLY_NONMULTI_TASKS_H[task] && !nonmulti?
|
47
47
|
:nonmulti
|
48
48
|
else
|
49
49
|
:execute
|
@@ -139,7 +139,7 @@ module MiGA::Dataset::Result
|
|
139
139
|
# the project as reference datasets.
|
140
140
|
def cleanup_distances!
|
141
141
|
r = get_result(:distances)
|
142
|
-
ref = project.datasets.select(&:
|
142
|
+
ref = project.datasets.select(&:ref?).select(&:active?).map(&:name)
|
143
143
|
return if r.nil?
|
144
144
|
%i[haai_db aai_db ani_db].each do |db_type|
|
145
145
|
db = r.file_path(db_type)
|
@@ -294,7 +294,7 @@ module MiGA::Dataset::Result
|
|
294
294
|
##
|
295
295
|
# Add result type +:mytaxa+ at +base+ (no +_opts+ supported)
|
296
296
|
def add_result_mytaxa(base, _opts)
|
297
|
-
if
|
297
|
+
if multi?
|
298
298
|
return nil unless
|
299
299
|
result_files_exist?(base, '.mytaxa') ||
|
300
300
|
result_files_exist?(base, '.nomytaxa.txt')
|
@@ -319,7 +319,7 @@ module MiGA::Dataset::Result
|
|
319
319
|
##
|
320
320
|
# Add result type +:mytaxa_scan+ at +base+ (no +_opts+ supported)
|
321
321
|
def add_result_mytaxa_scan(base, _opts)
|
322
|
-
if
|
322
|
+
if nonmulti?
|
323
323
|
return nil unless
|
324
324
|
result_files_exist?(base, %w[.pdf .mytaxa]) ||
|
325
325
|
result_files_exist?(base, '.nomytaxa.txt')
|
@@ -345,8 +345,8 @@ module MiGA::Dataset::Result
|
|
345
345
|
##
|
346
346
|
# Add result type +:distances+ at +base+ (no +_opts+ supported)
|
347
347
|
def add_result_distances(base, _opts)
|
348
|
-
if
|
349
|
-
if
|
348
|
+
if nonmulti?
|
349
|
+
if ref?
|
350
350
|
add_result_distances_ref(base)
|
351
351
|
else
|
352
352
|
add_result_distances_nonref(base)
|
@@ -0,0 +1,25 @@
|
|
1
|
+
##
|
2
|
+
# Helper module including specific functions for dataset status
|
3
|
+
module MiGA::Dataset::Status
|
4
|
+
##
|
5
|
+
# Returns the status of the dataset. If the status is not yet defined,
|
6
|
+
# it recalculates it and, if +save+ is true, saves it in metadata.
|
7
|
+
# Return values are:
|
8
|
+
# - +:inactive+ The dataset is currently inactive
|
9
|
+
# - +:incomplete+ The dataset is not yet fully processed
|
10
|
+
# - +:complete+ The dataset is fully processed
|
11
|
+
def status(save = false)
|
12
|
+
recalculate_status(save) if metadata[:status].nil?
|
13
|
+
metadata[:status].to_sym
|
14
|
+
end
|
15
|
+
|
16
|
+
##
|
17
|
+
# Identify the current status instead of relying on metadata, and save
|
18
|
+
# it if +save+ is true. Return codes are the same as +status+.
|
19
|
+
def recalculate_status(save = true)
|
20
|
+
metadata[:status] =
|
21
|
+
!active? ? :inactive : done_preprocessing? ? :complete : :incomplete
|
22
|
+
self.save if save
|
23
|
+
metadata[:status].to_sym
|
24
|
+
end
|
25
|
+
end
|
data/lib/miga/version.rb
CHANGED
@@ -10,7 +10,7 @@ module MiGA
|
|
10
10
|
# - Float representing the major.minor version.
|
11
11
|
# - Integer representing gem releases of the current version.
|
12
12
|
# - Integer representing minor changes that require new version number.
|
13
|
-
VERSION = [0.7,
|
13
|
+
VERSION = [0.7, 3, 0]
|
14
14
|
|
15
15
|
##
|
16
16
|
# Nickname for the current major.minor version.
|
@@ -18,7 +18,7 @@ module MiGA
|
|
18
18
|
|
19
19
|
##
|
20
20
|
# Date of the current gem release.
|
21
|
-
VERSION_DATE = Date.new(2020, 4,
|
21
|
+
VERSION_DATE = Date.new(2020, 4, 22)
|
22
22
|
|
23
23
|
##
|
24
24
|
# Reference of MiGA.
|
data/test/dataset_test.rb
CHANGED
@@ -39,6 +39,7 @@ class DatasetTest < Test::Unit::TestCase
|
|
39
39
|
assert_equal('dataset1', $d1.name)
|
40
40
|
assert_predicate($d1, :is_ref?)
|
41
41
|
assert_equal(MiGA::Metadata, $d1.metadata.class)
|
42
|
+
assert_equal(:incomplete, $d1.status)
|
42
43
|
end
|
43
44
|
|
44
45
|
def test_save
|
@@ -181,4 +182,15 @@ class DatasetTest < Test::Unit::TestCase
|
|
181
182
|
end
|
182
183
|
end
|
183
184
|
|
185
|
+
def test_activate
|
186
|
+
d = $d1
|
187
|
+
assert_equal(:incomplete, d.status)
|
188
|
+
assert_predicate(d, :active?)
|
189
|
+
d.inactivate!
|
190
|
+
assert_equal(:inactive, d.status)
|
191
|
+
assert_not_predicate(d, :active?)
|
192
|
+
d.activate!
|
193
|
+
assert_equal(:incomplete, d.status)
|
194
|
+
assert_predicate(d, :active?)
|
195
|
+
end
|
184
196
|
end
|
data/test/result_stats_test.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'test_helper'
|
2
2
|
require 'miga/project'
|
3
|
+
require 'zlib'
|
3
4
|
|
4
5
|
class ResultStatsTest < Test::Unit::TestCase
|
5
6
|
|
@@ -88,11 +89,14 @@ class ResultStatsTest < Test::Unit::TestCase
|
|
88
89
|
end
|
89
90
|
|
90
91
|
def test_assembly
|
92
|
+
# Prepare result
|
91
93
|
dir = 'data/05.assembly'
|
92
94
|
fa = file_path(dir, '.LargeContigs.fna')
|
93
95
|
File.open(fa, 'w') { |fh| fh.puts '>1','ACTAC' }
|
94
96
|
touch_done(dir)
|
95
97
|
r = $d.add_result(:assembly)
|
98
|
+
|
99
|
+
# Test assertions
|
96
100
|
assert_equal({}, r[:stats])
|
97
101
|
r.compute_stats
|
98
102
|
assert_equal(1, r[:stats][:contigs])
|
@@ -101,11 +105,18 @@ class ResultStatsTest < Test::Unit::TestCase
|
|
101
105
|
end
|
102
106
|
|
103
107
|
def test_cds
|
108
|
+
# Prepare result
|
104
109
|
dir = 'data/06.cds'
|
105
110
|
fa = file_path(dir, '.faa')
|
106
111
|
File.open(fa, 'w') { |fh| fh.puts '>1','M' }
|
112
|
+
gff = file_path(dir, '.gff3.gz')
|
113
|
+
Zlib::GzipWriter.open(gff) do |fh|
|
114
|
+
fh.puts '# Model Data: a=b;transl_table=11;'
|
115
|
+
end
|
107
116
|
touch_done(dir)
|
108
117
|
r = $d.add_result(:cds)
|
118
|
+
|
119
|
+
# Test assertions
|
109
120
|
assert_equal({}, r[:stats])
|
110
121
|
r.compute_stats
|
111
122
|
assert_equal(1, r[:stats][:predicted_proteins])
|
@@ -114,6 +125,28 @@ class ResultStatsTest < Test::Unit::TestCase
|
|
114
125
|
test_assembly
|
115
126
|
r.compute_stats
|
116
127
|
assert_equal([60.0, '%'], r[:stats][:coding_density])
|
128
|
+
assert_equal('11', r[:stats][:codon_table])
|
129
|
+
end
|
130
|
+
|
131
|
+
def test_taxonomy
|
132
|
+
# Prepare result
|
133
|
+
dir = 'data/09.distances/05.taxonomy'
|
134
|
+
FileUtils.touch(file_path(dir, '.aai-medoids.tsv'))
|
135
|
+
FileUtils.touch(file_path(dir, '.aai.db'))
|
136
|
+
File.open(file_path(dir, '.intax.txt'), 'w') do |fh|
|
137
|
+
fh.puts 'Closest relative: dad with AAI: 100.0.'
|
138
|
+
3.times { fh.puts '' }
|
139
|
+
fh.puts ' phylum Abc 0.0 **** '
|
140
|
+
end
|
141
|
+
touch_done(dir)
|
142
|
+
r = $d.add_result(:taxonomy)
|
143
|
+
|
144
|
+
# Test assertions
|
145
|
+
assert_nil(r[:stats][:closest_relative])
|
146
|
+
r.compute_stats
|
147
|
+
assert_equal('dad', r[:stats][:closest_relative])
|
148
|
+
assert_equal([100.0, '%'], r[:stats][:aai])
|
149
|
+
assert_equal(0.0, r[:stats][:phylum_pvalue])
|
117
150
|
end
|
118
151
|
|
119
152
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-04-
|
11
|
+
date: 2020-04-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|
@@ -167,6 +167,7 @@ files:
|
|
167
167
|
- lib/miga/dataset/base.rb
|
168
168
|
- lib/miga/dataset/hooks.rb
|
169
169
|
- lib/miga/dataset/result.rb
|
170
|
+
- lib/miga/dataset/status.rb
|
170
171
|
- lib/miga/json.rb
|
171
172
|
- lib/miga/lair.rb
|
172
173
|
- lib/miga/metadata.rb
|