miga-base 0.7.2.0 → 0.7.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/miga/common/with_daemon_class.rb +3 -1
- data/lib/miga/dataset.rb +33 -12
- data/lib/miga/dataset/hooks.rb +15 -4
- data/lib/miga/dataset/result.rb +9 -9
- data/lib/miga/dataset/status.rb +25 -0
- data/lib/miga/version.rb +2 -2
- data/test/dataset_test.rb +12 -0
- data/test/result_stats_test.rb +33 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5b233f892ba1294bd0959433c443944f267ff9b8c7ec4d220dc4bbacaca985a6
|
4
|
+
data.tar.gz: bdc51401c6680d63872e7aab594eab50dbc500e6662d244a6fa04f6b6ea2587d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 96bc61749ae2964656a9d82a2b5b0c74691513af237837960dfe146482a5691dadf7ea8aa958fb5ff35abd1e8ac829c447e219fdf330095e151efd4448470d73
|
7
|
+
data.tar.gz: 299a4806eea3364a0a64d86aa0eabfa5798f27801d7892241e13bc8ec36e0d3325cddc4eb321ec5b0826b498bb86bdf98494e1384e42cfa6441d97940c690b4f
|
@@ -27,6 +27,8 @@ module MiGA::Common::WithDaemonClass
|
|
27
27
|
f = alive_file(path)
|
28
28
|
f = terminated_file(path) unless File.exist? f
|
29
29
|
return nil unless File.exist? f
|
30
|
-
|
30
|
+
c = File.read(f)
|
31
|
+
return nil if c.nil? || c.empty?
|
32
|
+
Time.parse(c)
|
31
33
|
end
|
32
34
|
end
|
data/lib/miga/dataset.rb
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
|
4
4
|
require 'miga/metadata'
|
5
5
|
require 'miga/dataset/result'
|
6
|
+
require 'miga/dataset/status'
|
6
7
|
require 'miga/dataset/hooks'
|
7
8
|
require 'sqlite3'
|
8
9
|
|
@@ -10,15 +11,15 @@ require 'sqlite3'
|
|
10
11
|
# Dataset representation in MiGA.
|
11
12
|
class MiGA::Dataset < MiGA::MiGA
|
12
13
|
include MiGA::Dataset::Result
|
14
|
+
include MiGA::Dataset::Status
|
13
15
|
include MiGA::Dataset::Hooks
|
14
16
|
|
15
17
|
# Class-level
|
16
18
|
class << self
|
17
|
-
|
18
19
|
##
|
19
20
|
# Does the +project+ already have a dataset with that +name+?
|
20
21
|
def exist?(project, name)
|
21
|
-
|
22
|
+
!project.dataset_names_hash[name].nil?
|
22
23
|
end
|
23
24
|
|
24
25
|
##
|
@@ -26,7 +27,6 @@ class MiGA::Dataset < MiGA::MiGA
|
|
26
27
|
def INFO_FIELDS
|
27
28
|
%w(name created updated type ref user description comments)
|
28
29
|
end
|
29
|
-
|
30
30
|
end
|
31
31
|
|
32
32
|
# Instance-level
|
@@ -57,7 +57,10 @@ class MiGA::Dataset < MiGA::MiGA
|
|
57
57
|
File.expand_path("metadata/#{name}.json", project.path),
|
58
58
|
metadata
|
59
59
|
]
|
60
|
-
|
60
|
+
unless File.exist? @metadata_future[0]
|
61
|
+
save
|
62
|
+
pull_hook :on_create
|
63
|
+
end
|
61
64
|
end
|
62
65
|
|
63
66
|
##
|
@@ -117,32 +120,52 @@ class MiGA::Dataset < MiGA::MiGA
|
|
117
120
|
|
118
121
|
##
|
119
122
|
# Is this dataset a reference?
|
120
|
-
def
|
123
|
+
def ref? ; !!metadata[:ref] ; end
|
121
124
|
|
122
125
|
##
|
123
126
|
# Is this dataset a query (non-reference)?
|
124
|
-
def
|
127
|
+
def query? ; !metadata[:ref] ; end
|
125
128
|
|
126
129
|
##
|
127
130
|
# Is this dataset known to be multi-organism?
|
128
|
-
def
|
131
|
+
def multi?
|
129
132
|
return false if metadata[:type].nil? or @@KNOWN_TYPES[type].nil?
|
130
133
|
@@KNOWN_TYPES[type][:multi]
|
131
134
|
end
|
132
135
|
|
133
136
|
##
|
134
137
|
# Is this dataset known to be single-organism?
|
135
|
-
def
|
138
|
+
def nonmulti?
|
136
139
|
return false if metadata[:type].nil? or @@KNOWN_TYPES[type].nil?
|
137
140
|
!@@KNOWN_TYPES[type][:multi]
|
138
141
|
end
|
139
142
|
|
140
143
|
##
|
141
144
|
# Is this dataset active?
|
142
|
-
def
|
145
|
+
def active?
|
143
146
|
metadata[:inactive].nil? or !metadata[:inactive]
|
144
147
|
end
|
145
148
|
|
149
|
+
##
|
150
|
+
# Same as +ref?+ for backwards-compatibility
|
151
|
+
alias is_ref? ref?
|
152
|
+
|
153
|
+
##
|
154
|
+
# Same as +query?+ for backwards-compatibility
|
155
|
+
alias is_query? query?
|
156
|
+
|
157
|
+
##
|
158
|
+
# Same as +multi?+ for backwards-compatibility
|
159
|
+
alias is_multi? multi?
|
160
|
+
|
161
|
+
##
|
162
|
+
# Same as +is_nonmulti?+ for backwards-compatibility
|
163
|
+
alias is_nonmulti? nonmulti?
|
164
|
+
|
165
|
+
##
|
166
|
+
# Same as +active?+ for backwards-compatibility
|
167
|
+
alias is_active? active?
|
168
|
+
|
146
169
|
##
|
147
170
|
# Returns an Array of +how_many+ duples (Arrays) sorted by AAI:
|
148
171
|
# - +0+: A String with the name(s) of the reference dataset.
|
@@ -151,7 +174,7 @@ class MiGA::Dataset < MiGA::MiGA
|
|
151
174
|
# +ref_project+ is false (default), and only for reference dataset when
|
152
175
|
# +ref_project+ is true. It returns +nil+ if this analysis is not supported.
|
153
176
|
def closest_relatives(how_many = 1, ref_project = false)
|
154
|
-
return nil if (
|
177
|
+
return nil if (ref? != ref_project) or multi?
|
155
178
|
r = result(ref_project ? :taxonomy : :distances)
|
156
179
|
return nil if r.nil?
|
157
180
|
db = SQLite3::Database.new(r.file_path :aai_db)
|
@@ -159,6 +182,4 @@ class MiGA::Dataset < MiGA::MiGA
|
|
159
182
|
'SELECT seq2, aai FROM aai WHERE seq2 != ? ' \
|
160
183
|
'GROUP BY seq2 ORDER BY aai DESC LIMIT ?', [name, how_many])
|
161
184
|
end
|
162
|
-
|
163
185
|
end
|
164
|
-
|
data/lib/miga/dataset/hooks.rb
CHANGED
@@ -4,29 +4,34 @@ require 'miga/common/hooks'
|
|
4
4
|
##
|
5
5
|
# Helper module including specific functions to handle dataset hooks.
|
6
6
|
# Supported events:
|
7
|
+
# - on_create(): When first created
|
7
8
|
# - on_load(): When loaded
|
8
9
|
# - on_save(): When saved
|
9
10
|
# - on_remove(): When removed
|
10
|
-
# - on_inactivate(): When inactivated
|
11
11
|
# - on_activate(): When activated
|
12
|
+
# - on_inactivate(): When inactivated
|
12
13
|
# - on_result_ready(result): When any result is ready, with key +result+
|
13
14
|
# - on_result_ready_{result}(): When +result+ is ready
|
14
15
|
# - on_preprocessing_ready(): When preprocessing is complete
|
15
16
|
# Supported hooks:
|
16
17
|
# - run_lambda(lambda, args...)
|
18
|
+
# - recalculate_status()
|
17
19
|
# - clear_run_counts()
|
18
20
|
# - run_cmd(cmd)
|
19
21
|
# Internal hooks:
|
20
22
|
# - _pull_result_hooks()
|
21
23
|
module MiGA::Dataset::Hooks
|
22
|
-
|
23
24
|
include MiGA::Common::Hooks
|
24
25
|
|
26
|
+
##
|
27
|
+
# Dataset hooks triggered by default
|
25
28
|
def default_hooks
|
26
29
|
{
|
27
|
-
|
30
|
+
on_create: [[:recalculate_status]],
|
31
|
+
on_activate: [[:clear_run_counts], [:recalculate_status]],
|
32
|
+
on_inactivate: [[:recalculate_status]],
|
28
33
|
on_result_ready: [[:_pull_result_hooks]],
|
29
|
-
|
34
|
+
on_preprocessing_ready: [[:clear_run_counts], [:recalculate_status]],
|
30
35
|
}
|
31
36
|
end
|
32
37
|
|
@@ -40,6 +45,12 @@ module MiGA::Dataset::Hooks
|
|
40
45
|
save
|
41
46
|
end
|
42
47
|
|
48
|
+
##
|
49
|
+
# Recalculate the dataset status and save in metadata
|
50
|
+
def hook_recalculate_status(_hook_args, _event_args)
|
51
|
+
recalculate_status
|
52
|
+
end
|
53
|
+
|
43
54
|
##
|
44
55
|
# Run +cmd+ in the command-line with {{variables}}: dataset, project, miga,
|
45
56
|
# object (as defined for the event, if any)
|
data/lib/miga/dataset/result.rb
CHANGED
@@ -33,17 +33,17 @@ module MiGA::Dataset::Result
|
|
33
33
|
# - nonmulti: incompatible dataset, only for nonmulti
|
34
34
|
# - execute: do not ignore, execute the task
|
35
35
|
def why_ignore(task)
|
36
|
-
if !
|
36
|
+
if !active?
|
37
37
|
:inactive
|
38
38
|
elsif !metadata["run_#{task}"].nil?
|
39
39
|
metadata["run_#{task}"] ? :execute : :force
|
40
40
|
elsif task == :taxonomy && project.metadata[:ref_project].nil?
|
41
41
|
:project
|
42
|
-
elsif @@_EXCLUDE_NOREF_TASKS_H[task] && !
|
42
|
+
elsif @@_EXCLUDE_NOREF_TASKS_H[task] && !ref?
|
43
43
|
:noref
|
44
|
-
elsif @@_ONLY_MULTI_TASKS_H[task] && !
|
44
|
+
elsif @@_ONLY_MULTI_TASKS_H[task] && !multi?
|
45
45
|
:multi
|
46
|
-
elsif @@_ONLY_NONMULTI_TASKS_H[task] && !
|
46
|
+
elsif @@_ONLY_NONMULTI_TASKS_H[task] && !nonmulti?
|
47
47
|
:nonmulti
|
48
48
|
else
|
49
49
|
:execute
|
@@ -139,7 +139,7 @@ module MiGA::Dataset::Result
|
|
139
139
|
# the project as reference datasets.
|
140
140
|
def cleanup_distances!
|
141
141
|
r = get_result(:distances)
|
142
|
-
ref = project.datasets.select(&:
|
142
|
+
ref = project.datasets.select(&:ref?).select(&:active?).map(&:name)
|
143
143
|
return if r.nil?
|
144
144
|
%i[haai_db aai_db ani_db].each do |db_type|
|
145
145
|
db = r.file_path(db_type)
|
@@ -294,7 +294,7 @@ module MiGA::Dataset::Result
|
|
294
294
|
##
|
295
295
|
# Add result type +:mytaxa+ at +base+ (no +_opts+ supported)
|
296
296
|
def add_result_mytaxa(base, _opts)
|
297
|
-
if
|
297
|
+
if multi?
|
298
298
|
return nil unless
|
299
299
|
result_files_exist?(base, '.mytaxa') ||
|
300
300
|
result_files_exist?(base, '.nomytaxa.txt')
|
@@ -319,7 +319,7 @@ module MiGA::Dataset::Result
|
|
319
319
|
##
|
320
320
|
# Add result type +:mytaxa_scan+ at +base+ (no +_opts+ supported)
|
321
321
|
def add_result_mytaxa_scan(base, _opts)
|
322
|
-
if
|
322
|
+
if nonmulti?
|
323
323
|
return nil unless
|
324
324
|
result_files_exist?(base, %w[.pdf .mytaxa]) ||
|
325
325
|
result_files_exist?(base, '.nomytaxa.txt')
|
@@ -345,8 +345,8 @@ module MiGA::Dataset::Result
|
|
345
345
|
##
|
346
346
|
# Add result type +:distances+ at +base+ (no +_opts+ supported)
|
347
347
|
def add_result_distances(base, _opts)
|
348
|
-
if
|
349
|
-
if
|
348
|
+
if nonmulti?
|
349
|
+
if ref?
|
350
350
|
add_result_distances_ref(base)
|
351
351
|
else
|
352
352
|
add_result_distances_nonref(base)
|
@@ -0,0 +1,25 @@
|
|
1
|
+
##
|
2
|
+
# Helper module including specific functions for dataset status
|
3
|
+
module MiGA::Dataset::Status
|
4
|
+
##
|
5
|
+
# Returns the status of the dataset. If the status is not yet defined,
|
6
|
+
# it recalculates it and, if +save+ is true, saves it in metadata.
|
7
|
+
# Return values are:
|
8
|
+
# - +:inactive+ The dataset is currently inactive
|
9
|
+
# - +:incomplete+ The dataset is not yet fully processed
|
10
|
+
# - +:complete+ The dataset is fully processed
|
11
|
+
def status(save = false)
|
12
|
+
recalculate_status(save) if metadata[:status].nil?
|
13
|
+
metadata[:status].to_sym
|
14
|
+
end
|
15
|
+
|
16
|
+
##
|
17
|
+
# Identify the current status instead of relying on metadata, and save
|
18
|
+
# it if +save+ is true. Return codes are the same as +status+.
|
19
|
+
def recalculate_status(save = true)
|
20
|
+
metadata[:status] =
|
21
|
+
!active? ? :inactive : done_preprocessing? ? :complete : :incomplete
|
22
|
+
self.save if save
|
23
|
+
metadata[:status].to_sym
|
24
|
+
end
|
25
|
+
end
|
data/lib/miga/version.rb
CHANGED
@@ -10,7 +10,7 @@ module MiGA
|
|
10
10
|
# - Float representing the major.minor version.
|
11
11
|
# - Integer representing gem releases of the current version.
|
12
12
|
# - Integer representing minor changes that require new version number.
|
13
|
-
VERSION = [0.7,
|
13
|
+
VERSION = [0.7, 3, 0]
|
14
14
|
|
15
15
|
##
|
16
16
|
# Nickname for the current major.minor version.
|
@@ -18,7 +18,7 @@ module MiGA
|
|
18
18
|
|
19
19
|
##
|
20
20
|
# Date of the current gem release.
|
21
|
-
VERSION_DATE = Date.new(2020, 4,
|
21
|
+
VERSION_DATE = Date.new(2020, 4, 22)
|
22
22
|
|
23
23
|
##
|
24
24
|
# Reference of MiGA.
|
data/test/dataset_test.rb
CHANGED
@@ -39,6 +39,7 @@ class DatasetTest < Test::Unit::TestCase
|
|
39
39
|
assert_equal('dataset1', $d1.name)
|
40
40
|
assert_predicate($d1, :is_ref?)
|
41
41
|
assert_equal(MiGA::Metadata, $d1.metadata.class)
|
42
|
+
assert_equal(:incomplete, $d1.status)
|
42
43
|
end
|
43
44
|
|
44
45
|
def test_save
|
@@ -181,4 +182,15 @@ class DatasetTest < Test::Unit::TestCase
|
|
181
182
|
end
|
182
183
|
end
|
183
184
|
|
185
|
+
def test_activate
|
186
|
+
d = $d1
|
187
|
+
assert_equal(:incomplete, d.status)
|
188
|
+
assert_predicate(d, :active?)
|
189
|
+
d.inactivate!
|
190
|
+
assert_equal(:inactive, d.status)
|
191
|
+
assert_not_predicate(d, :active?)
|
192
|
+
d.activate!
|
193
|
+
assert_equal(:incomplete, d.status)
|
194
|
+
assert_predicate(d, :active?)
|
195
|
+
end
|
184
196
|
end
|
data/test/result_stats_test.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'test_helper'
|
2
2
|
require 'miga/project'
|
3
|
+
require 'zlib'
|
3
4
|
|
4
5
|
class ResultStatsTest < Test::Unit::TestCase
|
5
6
|
|
@@ -88,11 +89,14 @@ class ResultStatsTest < Test::Unit::TestCase
|
|
88
89
|
end
|
89
90
|
|
90
91
|
def test_assembly
|
92
|
+
# Prepare result
|
91
93
|
dir = 'data/05.assembly'
|
92
94
|
fa = file_path(dir, '.LargeContigs.fna')
|
93
95
|
File.open(fa, 'w') { |fh| fh.puts '>1','ACTAC' }
|
94
96
|
touch_done(dir)
|
95
97
|
r = $d.add_result(:assembly)
|
98
|
+
|
99
|
+
# Test assertions
|
96
100
|
assert_equal({}, r[:stats])
|
97
101
|
r.compute_stats
|
98
102
|
assert_equal(1, r[:stats][:contigs])
|
@@ -101,11 +105,18 @@ class ResultStatsTest < Test::Unit::TestCase
|
|
101
105
|
end
|
102
106
|
|
103
107
|
def test_cds
|
108
|
+
# Prepare result
|
104
109
|
dir = 'data/06.cds'
|
105
110
|
fa = file_path(dir, '.faa')
|
106
111
|
File.open(fa, 'w') { |fh| fh.puts '>1','M' }
|
112
|
+
gff = file_path(dir, '.gff3.gz')
|
113
|
+
Zlib::GzipWriter.open(gff) do |fh|
|
114
|
+
fh.puts '# Model Data: a=b;transl_table=11;'
|
115
|
+
end
|
107
116
|
touch_done(dir)
|
108
117
|
r = $d.add_result(:cds)
|
118
|
+
|
119
|
+
# Test assertions
|
109
120
|
assert_equal({}, r[:stats])
|
110
121
|
r.compute_stats
|
111
122
|
assert_equal(1, r[:stats][:predicted_proteins])
|
@@ -114,6 +125,28 @@ class ResultStatsTest < Test::Unit::TestCase
|
|
114
125
|
test_assembly
|
115
126
|
r.compute_stats
|
116
127
|
assert_equal([60.0, '%'], r[:stats][:coding_density])
|
128
|
+
assert_equal('11', r[:stats][:codon_table])
|
129
|
+
end
|
130
|
+
|
131
|
+
def test_taxonomy
|
132
|
+
# Prepare result
|
133
|
+
dir = 'data/09.distances/05.taxonomy'
|
134
|
+
FileUtils.touch(file_path(dir, '.aai-medoids.tsv'))
|
135
|
+
FileUtils.touch(file_path(dir, '.aai.db'))
|
136
|
+
File.open(file_path(dir, '.intax.txt'), 'w') do |fh|
|
137
|
+
fh.puts 'Closest relative: dad with AAI: 100.0.'
|
138
|
+
3.times { fh.puts '' }
|
139
|
+
fh.puts ' phylum Abc 0.0 **** '
|
140
|
+
end
|
141
|
+
touch_done(dir)
|
142
|
+
r = $d.add_result(:taxonomy)
|
143
|
+
|
144
|
+
# Test assertions
|
145
|
+
assert_nil(r[:stats][:closest_relative])
|
146
|
+
r.compute_stats
|
147
|
+
assert_equal('dad', r[:stats][:closest_relative])
|
148
|
+
assert_equal([100.0, '%'], r[:stats][:aai])
|
149
|
+
assert_equal(0.0, r[:stats][:phylum_pvalue])
|
117
150
|
end
|
118
151
|
|
119
152
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-04-
|
11
|
+
date: 2020-04-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|
@@ -167,6 +167,7 @@ files:
|
|
167
167
|
- lib/miga/dataset/base.rb
|
168
168
|
- lib/miga/dataset/hooks.rb
|
169
169
|
- lib/miga/dataset/result.rb
|
170
|
+
- lib/miga/dataset/status.rb
|
170
171
|
- lib/miga/json.rb
|
171
172
|
- lib/miga/lair.rb
|
172
173
|
- lib/miga/metadata.rb
|