miga-base 0.7.3.1 → 0.7.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/miga/cli.rb +10 -8
- data/lib/miga/cli/action.rb +2 -3
- data/lib/miga/cli/action/about.rb +5 -6
- data/lib/miga/cli/action/add.rb +18 -12
- data/lib/miga/cli/action/add_result.rb +2 -3
- data/lib/miga/cli/action/archive.rb +1 -2
- data/lib/miga/cli/action/classify_wf.rb +8 -6
- data/lib/miga/cli/action/console.rb +0 -1
- data/lib/miga/cli/action/daemon.rb +7 -7
- data/lib/miga/cli/action/date.rb +0 -1
- data/lib/miga/cli/action/derep_wf.rb +5 -4
- data/lib/miga/cli/action/doctor.rb +71 -82
- data/lib/miga/cli/action/doctor/base.rb +102 -0
- data/lib/miga/cli/action/edit.rb +14 -2
- data/lib/miga/cli/action/files.rb +8 -8
- data/lib/miga/cli/action/find.rb +5 -6
- data/lib/miga/cli/action/generic.rb +7 -7
- data/lib/miga/cli/action/get.rb +20 -17
- data/lib/miga/cli/action/get_db.rb +8 -2
- data/lib/miga/cli/action/index_wf.rb +1 -1
- data/lib/miga/cli/action/init.rb +53 -41
- data/lib/miga/cli/action/init/daemon_helper.rb +65 -43
- data/lib/miga/cli/action/lair.rb +7 -7
- data/lib/miga/cli/action/ln.rb +6 -6
- data/lib/miga/cli/action/ls.rb +1 -2
- data/lib/miga/cli/action/ncbi_get.rb +11 -3
- data/lib/miga/cli/action/new.rb +4 -4
- data/lib/miga/cli/action/next_step.rb +0 -1
- data/lib/miga/cli/action/preproc_wf.rb +3 -3
- data/lib/miga/cli/action/quality_wf.rb +1 -1
- data/lib/miga/cli/action/rm.rb +2 -3
- data/lib/miga/cli/action/run.rb +8 -8
- data/lib/miga/cli/action/stats.rb +8 -4
- data/lib/miga/cli/action/summary.rb +7 -6
- data/lib/miga/cli/action/tax_dist.rb +8 -4
- data/lib/miga/cli/action/tax_index.rb +3 -4
- data/lib/miga/cli/action/tax_set.rb +7 -6
- data/lib/miga/cli/action/tax_test.rb +6 -5
- data/lib/miga/cli/action/wf.rb +25 -19
- data/lib/miga/cli/base.rb +34 -32
- data/lib/miga/cli/objects_helper.rb +27 -18
- data/lib/miga/cli/opt_helper.rb +3 -2
- data/lib/miga/common.rb +2 -5
- data/lib/miga/common/base.rb +15 -16
- data/lib/miga/common/format.rb +8 -5
- data/lib/miga/common/hooks.rb +1 -4
- data/lib/miga/common/path.rb +4 -9
- data/lib/miga/common/with_daemon.rb +5 -2
- data/lib/miga/common/with_daemon_class.rb +1 -1
- data/lib/miga/common/with_result.rb +2 -1
- data/lib/miga/daemon.rb +93 -44
- data/lib/miga/daemon/base.rb +30 -11
- data/lib/miga/dataset.rb +47 -37
- data/lib/miga/dataset/base.rb +52 -37
- data/lib/miga/dataset/hooks.rb +3 -4
- data/lib/miga/dataset/result.rb +17 -1
- data/lib/miga/dataset/status.rb +6 -5
- data/lib/miga/json.rb +5 -7
- data/lib/miga/lair.rb +4 -0
- data/lib/miga/metadata.rb +4 -3
- data/lib/miga/project.rb +29 -20
- data/lib/miga/project/base.rb +52 -37
- data/lib/miga/project/dataset.rb +33 -26
- data/lib/miga/project/hooks.rb +0 -3
- data/lib/miga/project/result.rb +14 -5
- data/lib/miga/remote_dataset.rb +85 -72
- data/lib/miga/remote_dataset/base.rb +11 -13
- data/lib/miga/remote_dataset/download.rb +34 -12
- data/lib/miga/result.rb +48 -53
- data/lib/miga/result/base.rb +0 -2
- data/lib/miga/result/dates.rb +1 -3
- data/lib/miga/result/source.rb +15 -16
- data/lib/miga/result/stats.rb +37 -27
- data/lib/miga/tax_dist.rb +6 -3
- data/lib/miga/tax_index.rb +17 -17
- data/lib/miga/taxonomy.rb +6 -1
- data/lib/miga/taxonomy/base.rb +19 -15
- data/lib/miga/version.rb +19 -16
- data/scripts/project_stats.bash +3 -0
- data/scripts/stats.bash +1 -1
- data/test/common_test.rb +3 -11
- data/test/daemon_helper.rb +38 -0
- data/test/daemon_test.rb +91 -99
- data/test/dataset_test.rb +63 -59
- data/test/format_test.rb +3 -11
- data/test/hook_test.rb +50 -55
- data/test/json_test.rb +7 -8
- data/test/lair_test.rb +22 -28
- data/test/metadata_test.rb +6 -14
- data/test/project_test.rb +33 -40
- data/test/remote_dataset_test.rb +26 -32
- data/test/result_stats_test.rb +17 -27
- data/test/result_test.rb +41 -34
- data/test/tax_dist_test.rb +2 -4
- data/test/tax_index_test.rb +4 -10
- data/test/taxonomy_test.rb +7 -9
- data/test/test_helper.rb +42 -1
- data/test/with_daemon_test.rb +14 -22
- data/utils/adapters.fa +13 -0
- data/utils/cleanup-databases.rb +6 -5
- data/utils/distance/base.rb +0 -1
- data/utils/distance/commands.rb +19 -12
- data/utils/distance/database.rb +24 -21
- data/utils/distance/pipeline.rb +23 -10
- data/utils/distance/runner.rb +20 -16
- data/utils/distance/temporal.rb +1 -3
- data/utils/distances.rb +1 -1
- data/utils/domain-ess-genes.rb +7 -7
- data/utils/index_metadata.rb +5 -4
- data/utils/mytaxa_scan.rb +18 -16
- data/utils/representatives.rb +5 -4
- data/utils/requirements.txt +1 -1
- data/utils/subclade/base.rb +0 -1
- data/utils/subclade/pipeline.rb +7 -6
- data/utils/subclade/runner.rb +9 -9
- data/utils/subclade/temporal.rb +0 -2
- data/utils/subclades-compile.rb +39 -37
- data/utils/subclades.rb +1 -1
- metadata +6 -4
data/lib/miga/daemon/base.rb
CHANGED
@@ -1,11 +1,9 @@
|
|
1
|
-
|
2
1
|
require 'shellwords'
|
3
2
|
|
4
3
|
class MiGA::Daemon < MiGA::MiGA
|
5
4
|
end
|
6
5
|
|
7
6
|
module MiGA::Daemon::Base
|
8
|
-
|
9
7
|
##
|
10
8
|
# Set/get #options, where +k+ is the Symbol of the option and +v+ is the value
|
11
9
|
# (or nil to use as getter). Skips consistency tests if +force+. Returns new
|
@@ -14,19 +12,21 @@ module MiGA::Daemon::Base
|
|
14
12
|
k = k.to_sym
|
15
13
|
unless v.nil?
|
16
14
|
case k
|
17
|
-
when :latency, :maxjobs, :ppn, :format_version
|
15
|
+
when :latency, :maxjobs, :ppn, :format_version, :verbosity
|
18
16
|
v = v.to_i
|
17
|
+
if !force && v == 0 && k != :verbosity
|
18
|
+
raise "Daemon's #{k} cannot be set to zero"
|
19
|
+
end
|
19
20
|
when :shutdown_when_done
|
20
21
|
v = !!v
|
21
22
|
when :nodelist
|
22
23
|
if v =~ /^\$/
|
23
|
-
vv = ENV[v.sub('$','')] or raise "Unset environment variable: #{v}"
|
24
|
+
vv = ENV[v.sub('$', '')] or raise "Unset environment variable: #{v}"
|
24
25
|
v = vv
|
25
26
|
end
|
26
27
|
say "Reading node list: #{v}"
|
27
28
|
v = File.readlines(v).map(&:chomp)
|
28
29
|
end
|
29
|
-
raise "Daemon's #{k} cannot be set to zero." if !force and v == 0
|
30
30
|
@runopts[k] = v
|
31
31
|
end
|
32
32
|
@runopts[k]
|
@@ -34,24 +34,43 @@ module MiGA::Daemon::Base
|
|
34
34
|
|
35
35
|
##
|
36
36
|
# Returns Integer indicating the number of seconds to sleep between checks
|
37
|
-
def latency
|
37
|
+
def latency
|
38
|
+
runopts(:latency)
|
39
|
+
end
|
38
40
|
|
39
41
|
##
|
40
42
|
# Returns Integer indicating the maximum number of concurrent jobs to run
|
41
|
-
def maxjobs
|
43
|
+
def maxjobs
|
44
|
+
runopts(:maxjobs)
|
45
|
+
end
|
42
46
|
|
43
47
|
##
|
44
48
|
# Returns the path to the list of execution hostnames
|
45
|
-
def nodelist
|
49
|
+
def nodelist
|
50
|
+
runopts(:nodelist)
|
51
|
+
end
|
46
52
|
|
47
53
|
##
|
48
54
|
# Returns Integer indicating the number of CPUs per job
|
49
|
-
def ppn
|
55
|
+
def ppn
|
56
|
+
runopts(:ppn)
|
57
|
+
end
|
50
58
|
|
51
59
|
##
|
52
60
|
# Returns Boolean indicating if the daemon should shutdown when processing is
|
53
61
|
# complete
|
54
|
-
def shutdown_when_done?
|
62
|
+
def shutdown_when_done?
|
63
|
+
!!runopts(:shutdown_when_done)
|
64
|
+
end
|
55
65
|
|
66
|
+
##
|
67
|
+
# Returns the level of verbosity for the daemon as an Integer, or 1 if unset.
|
68
|
+
# Verbosity levels are:
|
69
|
+
# 0: No output
|
70
|
+
# 1: General daemon and job information
|
71
|
+
# 2: Same, and indicate when each task is performed (even if nothing happens)
|
72
|
+
# 3: Same, and indicate when each loop begins and ends
|
73
|
+
def verbosity
|
74
|
+
runopts(:verbosity) || 1
|
75
|
+
end
|
56
76
|
end
|
57
|
-
|
data/lib/miga/dataset.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# @package MiGA
|
2
4
|
# @license Artistic-2.0
|
3
5
|
|
@@ -8,7 +10,7 @@ require 'miga/dataset/hooks'
|
|
8
10
|
require 'sqlite3'
|
9
11
|
|
10
12
|
##
|
11
|
-
# Dataset representation in MiGA
|
13
|
+
# Dataset representation in MiGA
|
12
14
|
class MiGA::Dataset < MiGA::MiGA
|
13
15
|
include MiGA::Dataset::Result
|
14
16
|
include MiGA::Dataset::Status
|
@@ -23,20 +25,20 @@ class MiGA::Dataset < MiGA::MiGA
|
|
23
25
|
end
|
24
26
|
|
25
27
|
##
|
26
|
-
# Standard fields of metadata for datasets
|
28
|
+
# Standard fields of metadata for datasets
|
27
29
|
def INFO_FIELDS
|
28
|
-
%w
|
30
|
+
%w[name created updated type ref user description comments]
|
29
31
|
end
|
30
32
|
end
|
31
33
|
|
32
34
|
# Instance-level
|
33
35
|
|
34
36
|
##
|
35
|
-
# MiGA::Project that contains the dataset
|
37
|
+
# MiGA::Project that contains the dataset
|
36
38
|
attr_reader :project
|
37
39
|
|
38
40
|
##
|
39
|
-
# Datasets are uniquely identified by +name+ in a project
|
41
|
+
# Datasets are uniquely identified by +name+ in a project
|
40
42
|
attr_reader :name
|
41
43
|
|
42
44
|
##
|
@@ -45,22 +47,19 @@ class MiGA::Dataset < MiGA::MiGA
|
|
45
47
|
# be treated as reference (true, default) or query (false). Pass any
|
46
48
|
# additional +metadata+ as a Hash.
|
47
49
|
def initialize(project, name, is_ref = true, metadata = {})
|
48
|
-
|
50
|
+
name.miga_name? or
|
49
51
|
raise 'Invalid name, please use only alphanumerics and underscores: ' +
|
50
|
-
|
51
|
-
|
52
|
-
@project = project
|
53
|
-
@name = name
|
54
|
-
@metadata = nil
|
52
|
+
name.to_s
|
53
|
+
@project, @name, @metadata = project, name, nil
|
55
54
|
metadata[:ref] = is_ref
|
56
55
|
@metadata_future = [
|
57
|
-
File.
|
56
|
+
File.join(project.path, 'metadata', "#{name}.json"),
|
58
57
|
metadata
|
59
58
|
]
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
59
|
+
return if File.exist? @metadata_future[0]
|
60
|
+
|
61
|
+
save
|
62
|
+
pull_hook :on_create
|
64
63
|
end
|
65
64
|
|
66
65
|
##
|
@@ -74,7 +73,7 @@ class MiGA::Dataset < MiGA::MiGA
|
|
74
73
|
end
|
75
74
|
|
76
75
|
##
|
77
|
-
# Save any changes you've made in the dataset
|
76
|
+
# Save any changes you've made in the dataset
|
78
77
|
def save
|
79
78
|
MiGA.DEBUG "Dataset.metadata: #{metadata.data}"
|
80
79
|
metadata.save
|
@@ -82,61 +81,69 @@ class MiGA::Dataset < MiGA::MiGA
|
|
82
81
|
end
|
83
82
|
|
84
83
|
##
|
85
|
-
# Get the type of dataset as Symbol
|
86
|
-
def type
|
84
|
+
# Get the type of dataset as Symbol
|
85
|
+
def type
|
86
|
+
metadata[:type]
|
87
|
+
end
|
87
88
|
|
88
89
|
##
|
89
90
|
# Delete the dataset with all it's contents (including results) and returns
|
90
|
-
# nil
|
91
|
+
# nil
|
91
92
|
def remove!
|
92
|
-
|
93
|
-
|
93
|
+
results.each(&:remove!)
|
94
|
+
metadata.remove!
|
94
95
|
pull_hook :on_remove
|
95
96
|
end
|
96
97
|
|
97
98
|
##
|
98
|
-
# Inactivate a dataset. This halts automated processing by the daemon
|
99
|
+
# Inactivate a dataset. This halts automated processing by the daemon
|
99
100
|
def inactivate!
|
100
|
-
|
101
|
-
|
101
|
+
metadata[:inactive] = true
|
102
|
+
metadata.save
|
102
103
|
pull_hook :on_inactivate
|
103
104
|
end
|
104
105
|
|
105
106
|
##
|
106
|
-
# Activate a dataset. This removes the +:inactive+ flag
|
107
|
+
# Activate a dataset. This removes the +:inactive+ flag
|
107
108
|
def activate!
|
108
|
-
|
109
|
-
|
109
|
+
metadata[:inactive] = nil
|
110
|
+
metadata.save
|
110
111
|
pull_hook :on_activate
|
111
112
|
end
|
112
113
|
|
113
114
|
##
|
114
|
-
# Get standard metadata values for the dataset as Array
|
115
|
+
# Get standard metadata values for the dataset as Array
|
115
116
|
def info
|
116
117
|
MiGA::Dataset.INFO_FIELDS.map do |k|
|
117
|
-
|
118
|
+
k == 'name' ? name : metadata[k]
|
118
119
|
end
|
119
120
|
end
|
120
121
|
|
121
122
|
##
|
122
123
|
# Is this dataset a reference?
|
123
|
-
def ref?
|
124
|
+
def ref?
|
125
|
+
!query?
|
126
|
+
end
|
124
127
|
|
125
128
|
##
|
126
129
|
# Is this dataset a query (non-reference)?
|
127
|
-
def query?
|
130
|
+
def query?
|
131
|
+
!metadata[:ref]
|
132
|
+
end
|
128
133
|
|
129
134
|
##
|
130
135
|
# Is this dataset known to be multi-organism?
|
131
136
|
def multi?
|
132
|
-
return false if metadata[:type].nil?
|
137
|
+
return false if metadata[:type].nil? || @@KNOWN_TYPES[type].nil?
|
138
|
+
|
133
139
|
@@KNOWN_TYPES[type][:multi]
|
134
140
|
end
|
135
141
|
|
136
142
|
##
|
137
143
|
# Is this dataset known to be single-organism?
|
138
144
|
def nonmulti?
|
139
|
-
return false if metadata[:type].nil?
|
145
|
+
return false if metadata[:type].nil? || @@KNOWN_TYPES[type].nil?
|
146
|
+
|
140
147
|
!@@KNOWN_TYPES[type][:multi]
|
141
148
|
end
|
142
149
|
|
@@ -174,12 +181,15 @@ class MiGA::Dataset < MiGA::MiGA
|
|
174
181
|
# +ref_project+ is false (default), and only for reference dataset when
|
175
182
|
# +ref_project+ is true. It returns +nil+ if this analysis is not supported.
|
176
183
|
def closest_relatives(how_many = 1, ref_project = false)
|
177
|
-
return nil if (ref? != ref_project)
|
184
|
+
return nil if (ref? != ref_project) || multi?
|
185
|
+
|
178
186
|
r = result(ref_project ? :taxonomy : :distances)
|
179
187
|
return nil if r.nil?
|
180
|
-
|
188
|
+
|
189
|
+
db = SQLite3::Database.new(r.file_path(:aai_db))
|
181
190
|
db.execute(
|
182
191
|
'SELECT seq2, aai FROM aai WHERE seq2 != ? ' \
|
183
|
-
'GROUP BY seq2 ORDER BY aai DESC LIMIT ?', [name, how_many]
|
192
|
+
'GROUP BY seq2 ORDER BY aai DESC LIMIT ?', [name, how_many]
|
193
|
+
)
|
184
194
|
end
|
185
195
|
end
|
data/lib/miga/dataset/base.rb
CHANGED
@@ -2,72 +2,87 @@
|
|
2
2
|
# @license Artistic-2.0
|
3
3
|
|
4
4
|
class MiGA::Dataset < MiGA::MiGA
|
5
|
-
|
6
5
|
# Class-level
|
7
6
|
class << self
|
8
|
-
def RESULT_DIRS
|
9
|
-
|
10
|
-
|
7
|
+
def RESULT_DIRS
|
8
|
+
@@RESULT_DIRS
|
9
|
+
end
|
10
|
+
|
11
|
+
def KNOWN_TYPES
|
12
|
+
@@KNOWN_TYPES
|
13
|
+
end
|
14
|
+
|
15
|
+
def PREPROCESSING_TASKS
|
16
|
+
@@PREPROCESSING_TASKS
|
17
|
+
end
|
11
18
|
end
|
12
|
-
|
13
19
|
end
|
14
20
|
|
15
21
|
module MiGA::Dataset::Base
|
16
|
-
|
17
22
|
##
|
18
|
-
# Directories containing the results from dataset-specific tasks
|
23
|
+
# Directories containing the results from dataset-specific tasks
|
19
24
|
@@RESULT_DIRS = {
|
20
25
|
# Preprocessing
|
21
|
-
raw_reads:
|
22
|
-
|
23
|
-
|
26
|
+
raw_reads: '01.raw_reads',
|
27
|
+
trimmed_reads: '02.trimmed_reads',
|
28
|
+
read_quality: '03.read_quality',
|
29
|
+
trimmed_fasta: '04.trimmed_fasta',
|
30
|
+
assembly: '05.assembly',
|
31
|
+
cds: '06.cds',
|
24
32
|
# Annotation
|
25
|
-
essential_genes:
|
26
|
-
ssu:
|
27
|
-
mytaxa:
|
28
|
-
mytaxa_scan:
|
33
|
+
essential_genes: '07.annotation/01.function/01.essential',
|
34
|
+
ssu: '07.annotation/01.function/02.ssu',
|
35
|
+
mytaxa: '07.annotation/02.taxonomy/01.mytaxa',
|
36
|
+
mytaxa_scan: '07.annotation/03.qa/02.mytaxa_scan',
|
29
37
|
# Distances (for single-species datasets)
|
30
|
-
distances:
|
38
|
+
distances: '09.distances',
|
39
|
+
taxonomy: '09.distances/05.taxonomy',
|
31
40
|
# General statistics
|
32
|
-
stats:
|
41
|
+
stats: '90.stats'
|
33
42
|
}
|
34
43
|
|
35
44
|
##
|
36
|
-
# Supported dataset types
|
45
|
+
# Supported dataset types
|
37
46
|
@@KNOWN_TYPES = {
|
38
|
-
genome: {
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
:multi
|
43
|
-
|
44
|
-
|
45
|
-
|
47
|
+
genome: {
|
48
|
+
description: 'The genome from an isolate', multi: false
|
49
|
+
},
|
50
|
+
scgenome: {
|
51
|
+
description: 'A Single-cell Amplified Genome (SAG)', multi: false
|
52
|
+
},
|
53
|
+
popgenome: {
|
54
|
+
description: 'A Metagenome-Assembled Genome (MAG)', multi: false
|
55
|
+
},
|
56
|
+
metagenome: {
|
57
|
+
description: 'A metagenome (excluding viromes)', multi: true
|
58
|
+
},
|
59
|
+
virome: {
|
60
|
+
description: 'A viral metagenome', multi: true
|
61
|
+
}
|
46
62
|
}
|
47
63
|
|
48
64
|
##
|
49
|
-
# Returns an Array of tasks to be executed before project-wide tasks
|
50
|
-
@@PREPROCESSING_TASKS = [
|
51
|
-
:
|
52
|
-
:
|
53
|
-
|
65
|
+
# Returns an Array of tasks to be executed before project-wide tasks
|
66
|
+
@@PREPROCESSING_TASKS = [
|
67
|
+
:raw_reads, :trimmed_reads, :read_quality, :trimmed_fasta,
|
68
|
+
:assembly, :cds, :essential_genes, :ssu, :mytaxa, :mytaxa_scan,
|
69
|
+
:distances, :taxonomy, :stats
|
70
|
+
]
|
71
|
+
|
54
72
|
##
|
55
73
|
# Tasks to be excluded from query datasets.
|
56
74
|
@@EXCLUDE_NOREF_TASKS = [:mytaxa_scan, :taxonomy]
|
57
|
-
@@_EXCLUDE_NOREF_TASKS_H = Hash[@@EXCLUDE_NOREF_TASKS.map{ |i| [i,true] }]
|
58
|
-
|
75
|
+
@@_EXCLUDE_NOREF_TASKS_H = Hash[@@EXCLUDE_NOREF_TASKS.map { |i| [i, true] }]
|
76
|
+
|
59
77
|
##
|
60
78
|
# Tasks to be executed only in datasets that are not multi-organism. These
|
61
79
|
# tasks are ignored for multi-organism datasets or for unknown types.
|
62
80
|
@@ONLY_NONMULTI_TASKS = [:mytaxa_scan, :distances, :taxonomy]
|
63
|
-
@@_ONLY_NONMULTI_TASKS_H = Hash[@@ONLY_NONMULTI_TASKS.map{ |i| [i,true] }]
|
81
|
+
@@_ONLY_NONMULTI_TASKS_H = Hash[@@ONLY_NONMULTI_TASKS.map { |i| [i, true] }]
|
64
82
|
|
65
83
|
##
|
66
84
|
# Tasks to be executed only in datasets that are multi-organism. These
|
67
85
|
# tasks are ignored for single-organism datasets or for unknwon types.
|
68
86
|
@@ONLY_MULTI_TASKS = [:mytaxa]
|
69
|
-
@@_ONLY_MULTI_TASKS_H = Hash[@@ONLY_MULTI_TASKS.map{ |i| [i,true] }]
|
70
|
-
|
71
|
-
|
87
|
+
@@_ONLY_MULTI_TASKS_H = Hash[@@ONLY_MULTI_TASKS.map { |i| [i, true] }]
|
72
88
|
end
|
73
|
-
|
data/lib/miga/dataset/hooks.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
|
2
1
|
require 'miga/common/hooks'
|
3
2
|
|
4
3
|
##
|
@@ -20,7 +19,7 @@ require 'miga/common/hooks'
|
|
20
19
|
# - run_cmd(cmd)
|
21
20
|
# Internal hooks:
|
22
21
|
# - _pull_result_hooks()
|
23
|
-
module MiGA::Dataset::Hooks
|
22
|
+
module MiGA::Dataset::Hooks
|
24
23
|
include MiGA::Common::Hooks
|
25
24
|
|
26
25
|
##
|
@@ -38,7 +37,8 @@ module MiGA::Dataset::Hooks
|
|
38
37
|
##
|
39
38
|
# Clear metadata from run counts
|
40
39
|
def hook_clear_run_counts(_hook_args, _event_args)
|
41
|
-
metadata
|
40
|
+
metadata
|
41
|
+
.data.keys
|
42
42
|
.select { |k| k.to_s =~ /^_try_/ }
|
43
43
|
.each { |k| metadata[k] = nil }
|
44
44
|
metadata[:_step] = nil
|
@@ -72,5 +72,4 @@ module MiGA::Dataset::Hooks
|
|
72
72
|
pull_hook(:"on_result_ready_#{event_args.first}", *event_args)
|
73
73
|
pull_hook(:on_preprocessing_ready) if done_preprocessing?
|
74
74
|
end
|
75
|
-
|
76
75
|
end
|
data/lib/miga/dataset/result.rb
CHANGED
@@ -65,7 +65,7 @@ module MiGA::Dataset::Result
|
|
65
65
|
# Passes +save+ to #add_result.
|
66
66
|
def next_preprocessing(save = false)
|
67
67
|
first = first_preprocessing(save) or return nil
|
68
|
-
@@PREPROCESSING_TASKS[@@PREPROCESSING_TASKS.index(first)
|
68
|
+
@@PREPROCESSING_TASKS[@@PREPROCESSING_TASKS.index(first)..-1].find do |t|
|
69
69
|
if ignore_task? t
|
70
70
|
false
|
71
71
|
elsif add_result(t, save).nil?
|
@@ -95,6 +95,7 @@ module MiGA::Dataset::Result
|
|
95
95
|
def profile_advance(save = false)
|
96
96
|
first_task = first_preprocessing(save)
|
97
97
|
return Array.new(@@PREPROCESSING_TASKS.size, 0) if first_task.nil?
|
98
|
+
|
98
99
|
adv = []
|
99
100
|
state = 0
|
100
101
|
next_task = next_preprocessing(save)
|
@@ -141,13 +142,16 @@ module MiGA::Dataset::Result
|
|
141
142
|
r = get_result(:distances)
|
142
143
|
ref = project.datasets.select(&:ref?).select(&:active?).map(&:name)
|
143
144
|
return if r.nil?
|
145
|
+
|
144
146
|
%i[haai_db aai_db ani_db].each do |db_type|
|
145
147
|
db = r.file_path(db_type)
|
146
148
|
next if db.nil? || !File.size?(db)
|
149
|
+
|
147
150
|
sqlite_db = SQLite3::Database.new db
|
148
151
|
table = db_type[-6..-4]
|
149
152
|
val = sqlite_db.execute "select seq2 from #{table}"
|
150
153
|
next if val.empty?
|
154
|
+
|
151
155
|
(val.map(&:first) - ref).each do |extra|
|
152
156
|
sqlite_db.execute "delete from #{table} where seq2=?", extra
|
153
157
|
end
|
@@ -160,6 +164,7 @@ module MiGA::Dataset::Result
|
|
160
164
|
# Add result type +:raw_reads+ at +base+ (no +_opts+ supported)
|
161
165
|
def add_result_raw_reads(base, _opts)
|
162
166
|
return nil unless result_files_exist?(base, '.1.fastq')
|
167
|
+
|
163
168
|
add_files_to_ds_result(
|
164
169
|
MiGA::Result.new("#{base}.json"), name,
|
165
170
|
if result_files_exist?(base, '.2.fastq')
|
@@ -174,6 +179,7 @@ module MiGA::Dataset::Result
|
|
174
179
|
# Add result type +:trimmed_reads+ at +base+ (no +_opts+ supported)
|
175
180
|
def add_result_trimmed_reads(base, _opts)
|
176
181
|
return nil unless result_files_exist?(base, '.1.clipped.fastq')
|
182
|
+
|
177
183
|
add_files_to_ds_result(
|
178
184
|
MiGA::Result.new("#{base}.json"), name,
|
179
185
|
if result_files_exist?(base, '.2.clipped.fastq')
|
@@ -194,6 +200,7 @@ module MiGA::Dataset::Result
|
|
194
200
|
# Add result type +:read_quality+ at +base+ (no +_opts+ supported)
|
195
201
|
def add_result_read_quality(base, _opts)
|
196
202
|
return nil unless result_files_exist?(base, %w[.solexaqa .fastqc])
|
203
|
+
|
197
204
|
add_files_to_ds_result(
|
198
205
|
MiGA::Result.new("#{base}.json"), name,
|
199
206
|
solexaqa: '.solexaqa', fastqc: '.fastqc'
|
@@ -207,6 +214,7 @@ module MiGA::Dataset::Result
|
|
207
214
|
result_files_exist?(base, '.CoupledReads.fa') ||
|
208
215
|
result_files_exist?(base, '.SingleReads.fa') ||
|
209
216
|
result_files_exist?(base, %w[.1.fasta .2.fasta])
|
217
|
+
|
210
218
|
add_files_to_ds_result(
|
211
219
|
MiGA::Result.new("#{base}.json"), name,
|
212
220
|
coupled: '.CoupledReads.fa',
|
@@ -221,6 +229,7 @@ module MiGA::Dataset::Result
|
|
221
229
|
# +is_clean: Boolean+.
|
222
230
|
def add_result_assembly(base, opts)
|
223
231
|
return nil unless result_files_exist?(base, '.LargeContigs.fna')
|
232
|
+
|
224
233
|
r = add_files_to_ds_result(
|
225
234
|
MiGA::Result.new("#{base}.json"), name,
|
226
235
|
largecontigs: '.LargeContigs.fna',
|
@@ -240,6 +249,7 @@ module MiGA::Dataset::Result
|
|
240
249
|
# Add result type +:cds+ at +base+. Hash +opts+ supports +is_clean: Boolean+
|
241
250
|
def add_result_cds(base, opts)
|
242
251
|
return nil unless result_files_exist?(base, %w[.faa])
|
252
|
+
|
243
253
|
r = add_files_to_ds_result(
|
244
254
|
MiGA::Result.new("#{base}.json"), name,
|
245
255
|
proteins: '.faa',
|
@@ -262,6 +272,7 @@ module MiGA::Dataset::Result
|
|
262
272
|
# Add result type +:essential_genes+ at +base+ (no +_opts+ supported).
|
263
273
|
def add_result_essential_genes(base, _opts)
|
264
274
|
return nil unless result_files_exist?(base, %w[.ess.faa .ess .ess/log])
|
275
|
+
|
265
276
|
add_files_to_ds_result(
|
266
277
|
MiGA::Result.new("#{base}.json"), name,
|
267
278
|
ess_genes: '.ess.faa',
|
@@ -276,6 +287,7 @@ module MiGA::Dataset::Result
|
|
276
287
|
def add_result_ssu(base, opts)
|
277
288
|
return MiGA::Result.new("#{base}.json") if result(:assembly).nil?
|
278
289
|
return nil unless result_files_exist?(base, '.ssu.fa')
|
290
|
+
|
279
291
|
r = add_files_to_ds_result(
|
280
292
|
MiGA::Result.new("#{base}.json"), name,
|
281
293
|
longest_ssu_gene: '.ssu.fa',
|
@@ -298,6 +310,7 @@ module MiGA::Dataset::Result
|
|
298
310
|
return nil unless
|
299
311
|
result_files_exist?(base, '.mytaxa') ||
|
300
312
|
result_files_exist?(base, '.nomytaxa.txt')
|
313
|
+
|
301
314
|
add_files_to_ds_result(
|
302
315
|
MiGA::Result.new("#{base}.json"), name,
|
303
316
|
mytaxa: '.mytaxa',
|
@@ -323,6 +336,7 @@ module MiGA::Dataset::Result
|
|
323
336
|
return nil unless
|
324
337
|
result_files_exist?(base, %w[.pdf .mytaxa]) ||
|
325
338
|
result_files_exist?(base, '.nomytaxa.txt')
|
339
|
+
|
326
340
|
add_files_to_ds_result(
|
327
341
|
MiGA::Result.new("#{base}.json"), name,
|
328
342
|
nomytaxa: '.nomytaxa.txt',
|
@@ -379,6 +393,7 @@ module MiGA::Dataset::Result
|
|
379
393
|
def add_result_distances_ref(base)
|
380
394
|
pref = File.dirname(base)
|
381
395
|
return nil unless File.exist?("#{pref}/01.haai/#{name}.db")
|
396
|
+
|
382
397
|
MiGA::Result.new("#{base}.json").tap do |r|
|
383
398
|
r.add_files(
|
384
399
|
haai_db: "01.haai/#{name}.db",
|
@@ -394,6 +409,7 @@ module MiGA::Dataset::Result
|
|
394
409
|
return nil unless
|
395
410
|
result_files_exist?(base, %w[.aai-medoids.tsv .aai.db]) ||
|
396
411
|
result_files_exist?(base, %w[.ani-medoids.tsv .ani.db])
|
412
|
+
|
397
413
|
add_files_to_ds_result(
|
398
414
|
MiGA::Result.new("#{base}.json"), name,
|
399
415
|
aai_medoids: '.aai-medoids.tsv',
|