miga-base 0.3.1.3 → 0.3.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/actions/stats.rb +6 -4
- data/bin/miga +3 -1
- data/lib/miga/common.rb +26 -18
- data/lib/miga/daemon.rb +59 -54
- data/lib/miga/dataset.rb +17 -171
- data/lib/miga/dataset/base.rb +73 -0
- data/lib/miga/{dataset_result.rb → dataset/result.rb} +105 -1
- data/lib/miga/project.rb +6 -315
- data/lib/miga/project/base.rb +99 -0
- data/lib/miga/project/dataset.rb +148 -0
- data/lib/miga/project/plugins.rb +41 -0
- data/lib/miga/{project_result.rb → project/result.rb} +66 -1
- data/lib/miga/version.rb +1 -1
- data/scripts/cds.bash +7 -0
- data/test/remote_dataset_test.rb +9 -7
- data/utils/distances.rb +16 -0
- data/utils/distances/functions.rb +58 -0
- data/utils/distances/ref-nomulti.rb +2 -0
- metadata +11 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2edb583f3cefe6b6f36a52b50b595e0f0d8be81c
|
4
|
+
data.tar.gz: 11a9c0c45f7144e88c01d5c7d4fc1c1bcda6b8e6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 75748d35171225261a4fc962ea80aa3ffc64ccf76206203866eb9fc60a3ae6b69c9b362f8e8248b5e7740aff2b63b8021c912f2b0a76950d5a7da04e95ef94b7
|
7
|
+
data.tar.gz: eece83478cfe635f9828eae45807a6bc072c5960a3320d05d154cf2ad8cd117fc0a1d801ec6b2ac18f527b62053bc698ee5f85f0ac4a0b8f5890516383130817
|
data/actions/stats.rb
CHANGED
@@ -111,10 +111,12 @@ if o[:compute]
|
|
111
111
|
end
|
112
112
|
end
|
113
113
|
stats[:quality] = stats[:completeness][0] - stats[:contamination][0]*5
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
114
|
+
d.metadata[:quality] = case stats[:quality]
|
115
|
+
when 80..100 ; :excellent
|
116
|
+
when 50..80 ; :high
|
117
|
+
when 20..50 ; :intermediate
|
118
|
+
else ; :low
|
119
|
+
end
|
118
120
|
d.save
|
119
121
|
end
|
120
122
|
when :distances
|
data/bin/miga
CHANGED
@@ -139,6 +139,8 @@ def opt_filter_datasets(opt, o, what=[:ref, :multi, :taxonomy])
|
|
139
139
|
){ |v| o[:multi]=v } if what.include? :multi
|
140
140
|
opt.on("-t", "--taxonomy RANK:TAXON", "Filter by taxonomy."
|
141
141
|
){ |v| o[:taxonomy]=MiGA::Taxonomy.new v } if what.include? :taxonomy
|
142
|
+
opt.on("-k", "--key INTEGER",
|
143
|
+
"Returns only the k-th dataset in the list."){ |v| o[:key]=v.to_i }
|
142
144
|
end
|
143
145
|
|
144
146
|
def opt_require(o, req={project:"-P", dataset:"-D"})
|
@@ -161,7 +163,7 @@ def filter_datasets!(ds, o)
|
|
161
163
|
ds.select! do |d|
|
162
164
|
(not d.metadata[:tax].nil?) and d.metadata[:tax].is_in?(o[:taxonomy])
|
163
165
|
end unless o[:taxonomy].nil?
|
164
|
-
ds
|
166
|
+
o[:key].nil? ? ds : ds.values_at(o[:key]-1)
|
165
167
|
end
|
166
168
|
|
167
169
|
def add_metadata(o, obj)
|
data/lib/miga/common.rb
CHANGED
@@ -80,28 +80,36 @@ class MiGA::MiGA
|
|
80
80
|
##
|
81
81
|
# Cleans a FastA file in place.
|
82
82
|
def self.clean_fasta_file(file)
|
83
|
-
|
83
|
+
tmp_fh = nil
|
84
84
|
begin
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
85
|
+
if (file =~ /\.gz/)
|
86
|
+
tmp_path = Tempfile.new("MiGA.gz").tap{ |i| i.close }.path
|
87
|
+
tmp_fh = Zlib::GzipWriter.open(tmp_path)
|
88
|
+
fh = Zlib::GzipReader.open(file)
|
89
|
+
else
|
90
|
+
tmp_fh = Tempfile.new("MiGA")
|
91
|
+
tmp_path = tmp_fh.path
|
92
|
+
fh = File.open(file, "r")
|
93
|
+
end
|
94
|
+
buffer = ""
|
95
|
+
fh.each_line do |ln|
|
96
|
+
ln.chomp!
|
97
|
+
if ln =~ /^>\s*(\S+)(.*)/
|
98
|
+
(id, df) = [$1, $2]
|
99
|
+
tmp_fh.print buffer.wrap_width(80)
|
100
|
+
buffer = ""
|
101
|
+
tmp_fh.puts ">#{id.gsub(/[^A-Za-z0-9_\|\.]/, "_")}#{df}"
|
102
|
+
else
|
103
|
+
buffer << ln.gsub(/[^A-Za-z\.\-]/, "")
|
97
104
|
end
|
98
|
-
tmp.print buffer.wrap_width(80)
|
99
105
|
end
|
100
|
-
|
101
|
-
|
106
|
+
tmp_fh.print buffer.wrap_width(80)
|
107
|
+
tmp_fh.close
|
108
|
+
fh.close
|
109
|
+
FileUtils.cp(tmp_path, file)
|
102
110
|
ensure
|
103
|
-
|
104
|
-
|
111
|
+
tmp_fh.close unless tmp_fh.nil?
|
112
|
+
File.unlink(tmp_path) unless tmp_path.nil?
|
105
113
|
end
|
106
114
|
end
|
107
115
|
|
data/lib/miga/daemon.rb
CHANGED
@@ -1,27 +1,27 @@
|
|
1
1
|
# @package MiGA
|
2
2
|
# @license Artistic-2.0
|
3
3
|
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require
|
4
|
+
require 'miga/project'
|
5
|
+
require 'daemons'
|
6
|
+
require 'date'
|
7
7
|
|
8
8
|
##
|
9
9
|
# MiGA Daemons handling job submissions.
|
10
10
|
class MiGA::Daemon < MiGA::MiGA
|
11
|
-
|
11
|
+
|
12
12
|
##
|
13
13
|
# When was the last time a daemon for the MiGA::Project +project+ was seen
|
14
14
|
# active? Returns DateTime.
|
15
15
|
def self.last_alive(project)
|
16
|
-
f = File.expand_path(
|
16
|
+
f = File.expand_path('daemon/alive', project.path)
|
17
17
|
return nil unless File.exist? f
|
18
18
|
DateTime.parse(File.read(f))
|
19
19
|
end
|
20
20
|
|
21
21
|
# Shutdown all spawned daemons before exit.
|
22
22
|
$_MIGA_DAEMON_LAIR = []
|
23
|
-
END { $_MIGA_DAEMON_LAIR.each
|
24
|
-
|
23
|
+
END { $_MIGA_DAEMON_LAIR.each(&:terminate) }
|
24
|
+
|
25
25
|
# MiGA::Project in which the daemon is running.
|
26
26
|
attr_reader :project
|
27
27
|
# Options used to setup the daemon.
|
@@ -40,8 +40,8 @@ class MiGA::Daemon < MiGA::MiGA
|
|
40
40
|
$_MIGA_DAEMON_LAIR << self
|
41
41
|
@project = project
|
42
42
|
@runopts = JSON.parse(
|
43
|
-
|
44
|
-
|
43
|
+
File.read(File.expand_path('daemon/daemon.json', project.path)),
|
44
|
+
symbolize_names: true)
|
45
45
|
@jobs_to_run = []
|
46
46
|
@jobs_running = []
|
47
47
|
@loop_i = -1
|
@@ -57,7 +57,7 @@ class MiGA::Daemon < MiGA::MiGA
|
|
57
57
|
##
|
58
58
|
# Returns Hash containing the default options for the daemon.
|
59
59
|
def default_options
|
60
|
-
{ dir_mode: :normal, dir: File.expand_path(
|
60
|
+
{ dir_mode: :normal, dir: File.expand_path('daemon', project.path),
|
61
61
|
multiple: false, log_output: true }
|
62
62
|
end
|
63
63
|
|
@@ -68,16 +68,19 @@ class MiGA::Daemon < MiGA::MiGA
|
|
68
68
|
def runopts(k, v=nil, force=false)
|
69
69
|
k = k.to_sym
|
70
70
|
unless v.nil?
|
71
|
-
|
72
|
-
|
71
|
+
if [:latency, :maxjobs, :ppn].include?(k)
|
72
|
+
v = v.to_i
|
73
|
+
elsif [:shutdown_when_done].include?(k)
|
74
|
+
v = !!v
|
75
|
+
end
|
73
76
|
raise "Daemon's #{k} cannot be set to zero." if !force and v==0
|
74
77
|
@runopts[k] = v
|
75
78
|
end
|
76
79
|
if k==:kill and v.nil?
|
77
80
|
case @runopts[:type].to_s
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
+
when 'bash' then return "kill -9 '%s'"
|
82
|
+
when 'qsub' then return "qdel '%s'"
|
83
|
+
else return "canceljob '%s'"
|
81
84
|
end
|
82
85
|
end
|
83
86
|
@runopts[k]
|
@@ -85,36 +88,36 @@ class MiGA::Daemon < MiGA::MiGA
|
|
85
88
|
|
86
89
|
##
|
87
90
|
# Returns Integer indicating the number of seconds to sleep between checks.
|
88
|
-
def latency() runopts(:latency)
|
91
|
+
def latency() runopts(:latency); end
|
89
92
|
|
90
93
|
##
|
91
94
|
# Returns Integer indicating the maximum number of concurrent jobs to run.
|
92
|
-
def maxjobs() runopts(:maxjobs)
|
95
|
+
def maxjobs() runopts(:maxjobs); end
|
93
96
|
|
94
97
|
##
|
95
98
|
# Returns Integer indicating the number of CPUs per job.
|
96
|
-
def ppn() runopts(:ppn)
|
99
|
+
def ppn() runopts(:ppn); end
|
97
100
|
|
98
101
|
##
|
99
102
|
# Returns Boolean indicating if the daemon should shutdown when processing is
|
100
103
|
# complete.
|
101
|
-
def shutdown_when_done?() !!runopts(:shutdown_when_done)
|
104
|
+
def shutdown_when_done?() !!runopts(:shutdown_when_done); end
|
102
105
|
|
103
106
|
##
|
104
107
|
# Initializes the daemon with +opts+.
|
105
|
-
def start(opts=[]) daemon(
|
108
|
+
def start(opts=[]) daemon('start', opts); end
|
106
109
|
|
107
110
|
##
|
108
111
|
# Stops the daemon with +opts+.
|
109
|
-
def stop(opts=[]) daemon(
|
112
|
+
def stop(opts=[]) daemon('stop', opts); end
|
110
113
|
|
111
114
|
##
|
112
115
|
# Restarts the daemon with +opts+.
|
113
|
-
def restart(opts=[]) daemon(
|
116
|
+
def restart(opts=[]) daemon('restart', opts); end
|
114
117
|
|
115
118
|
##
|
116
119
|
# Returns the status of the daemon with +opts+.
|
117
|
-
def status(opts=[]) daemon(
|
120
|
+
def status(opts=[]) daemon('status', opts); end
|
118
121
|
|
119
122
|
##
|
120
123
|
# Launches the +task+ with options +opts+ (as command-line arguments).
|
@@ -131,7 +134,7 @@ class MiGA::Daemon < MiGA::MiGA
|
|
131
134
|
##
|
132
135
|
# Tell the world that you're alive.
|
133
136
|
def declare_alive
|
134
|
-
f = File.open(File.expand_path(
|
137
|
+
f = File.open(File.expand_path('daemon/alive', project.path), 'w')
|
135
138
|
f.print Time.now.to_s
|
136
139
|
f.close
|
137
140
|
end
|
@@ -139,12 +142,12 @@ class MiGA::Daemon < MiGA::MiGA
|
|
139
142
|
##
|
140
143
|
# Report status in a JSON file.
|
141
144
|
def report_status
|
142
|
-
f = File.open(File.expand_path(
|
145
|
+
f = File.open(File.expand_path('daemon/status.json', project.path), 'w')
|
143
146
|
f.print JSON.pretty_generate(
|
144
147
|
jobs_running:@jobs_running, jobs_to_run:@jobs_to_run)
|
145
148
|
f.close
|
146
149
|
end
|
147
|
-
|
150
|
+
|
148
151
|
##
|
149
152
|
# Traverse datasets
|
150
153
|
def check_datasets
|
@@ -164,34 +167,37 @@ class MiGA::Daemon < MiGA::MiGA
|
|
164
167
|
# project-level tasks
|
165
168
|
def check_project
|
166
169
|
return if project.dataset_names.empty?
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
end
|
170
|
+
return unless project.done_preprocessing?(false)
|
171
|
+
to_run = project.next_distances(true)
|
172
|
+
to_run = project.next_inclade(true) if to_run.nil?
|
173
|
+
queue_job(to_run) unless to_run.nil?
|
172
174
|
end
|
173
|
-
|
175
|
+
|
174
176
|
##
|
175
177
|
# Add the task to the internal queue with symbol key +job+. If the task is
|
176
178
|
# dataset-specific, +ds+ specifies the dataset. To submit jobs to the
|
177
179
|
# scheduler (or to bash) see #flush!.
|
178
180
|
def queue_job(job, ds=nil)
|
179
181
|
return nil unless get_job(job, ds).nil?
|
180
|
-
ds_name = (ds.nil? ?
|
181
|
-
say
|
182
|
-
vars = {
|
183
|
-
|
184
|
-
|
182
|
+
ds_name = (ds.nil? ? 'miga-project' : ds.name)
|
183
|
+
say 'Queueing %s:%s' % [ds_name, job]
|
184
|
+
vars = {
|
185
|
+
'PROJECT' => project.path,
|
186
|
+
'RUNTYPE' => runopts(:type),
|
187
|
+
'CORES' => ppn,
|
188
|
+
'MIGA' => MiGA::MiGA.root_path
|
189
|
+
}
|
190
|
+
vars['DATASET'] = ds.name unless ds.nil?
|
185
191
|
log_dir = File.expand_path("daemon/#{job}", project.path)
|
186
192
|
Dir.mkdir(log_dir) unless Dir.exist? log_dir
|
187
193
|
task_name = "#{project.metadata[:name][0..9]}:#{job}:#{ds_name}"
|
188
194
|
to_run = {ds: ds, job: job, task_name: task_name,
|
189
195
|
cmd: sprintf(runopts(:cmd),
|
190
196
|
# 1: script
|
191
|
-
MiGA::MiGA.script_path(job, miga:vars[
|
197
|
+
MiGA::MiGA.script_path(job, miga:vars['MIGA'], project:project),
|
192
198
|
# 2: vars
|
193
|
-
vars.keys.map { |k|
|
194
|
-
|
199
|
+
vars.keys.map { |k| sprintf(runopts(:var), k, vars[k]) }.
|
200
|
+
join(runopts(:varsep)),
|
195
201
|
# 3: CPUs
|
196
202
|
ppn,
|
197
203
|
# 4: log file
|
@@ -213,7 +219,7 @@ class MiGA::Daemon < MiGA::MiGA
|
|
213
219
|
end
|
214
220
|
end
|
215
221
|
end
|
216
|
-
|
222
|
+
|
217
223
|
##
|
218
224
|
# Remove finished jobs from the internal queue and launch as many as
|
219
225
|
# possible respecting #maxjobs.
|
@@ -245,9 +251,9 @@ class MiGA::Daemon < MiGA::MiGA
|
|
245
251
|
# Run one loop step. Returns a Boolean indicating if the loop should continue.
|
246
252
|
def in_loop
|
247
253
|
if loop_i == -1
|
248
|
-
say
|
249
|
-
say
|
250
|
-
say
|
254
|
+
say '-----------------------------------'
|
255
|
+
say 'MiGA:%s launched.' % project.name
|
256
|
+
say '-----------------------------------'
|
251
257
|
@loop_i = 0
|
252
258
|
end
|
253
259
|
@loop_i += 1
|
@@ -257,14 +263,14 @@ class MiGA::Daemon < MiGA::MiGA
|
|
257
263
|
check_project
|
258
264
|
flush!
|
259
265
|
if loop_i==4
|
260
|
-
say
|
266
|
+
say 'Housekeeping for sanity'
|
261
267
|
@loop_i = 0
|
262
268
|
purge!
|
263
269
|
end
|
264
270
|
report_status
|
265
271
|
sleep(latency)
|
266
272
|
if shutdown_when_done? and jobs_running.size+jobs_to_run.size == 0
|
267
|
-
say
|
273
|
+
say 'Nothing else to do, shutting down.'
|
268
274
|
return false
|
269
275
|
end
|
270
276
|
true
|
@@ -279,32 +285,32 @@ class MiGA::Daemon < MiGA::MiGA
|
|
279
285
|
##
|
280
286
|
# Terminates a daemon.
|
281
287
|
def terminate
|
282
|
-
say
|
288
|
+
say 'Terminating daemon...'
|
283
289
|
report_status
|
284
290
|
k = runopts(:kill)
|
285
291
|
@jobs_running.each do |i|
|
286
292
|
`#{k % i[:pid]}`
|
287
293
|
puts "Terminating pid:#{i[:pid]} for #{i[:task_name]}"
|
288
294
|
end
|
289
|
-
f = File.expand_path(
|
295
|
+
f = File.expand_path('daemon/alive', project.path)
|
290
296
|
File.unlink(f) if File.exist? f
|
291
297
|
end
|
292
298
|
|
293
299
|
private
|
294
|
-
|
300
|
+
|
295
301
|
def launch_job(job)
|
296
302
|
# Execute job
|
297
|
-
if runopts(:type) ==
|
303
|
+
if runopts(:type) == 'bash'
|
298
304
|
# Local job
|
299
305
|
job[:pid] = spawn job[:cmd]
|
300
|
-
Process.detach job[:pid] unless [nil,
|
306
|
+
Process.detach job[:pid] unless [nil, '', 0].include?(job[:pid])
|
301
307
|
else
|
302
308
|
# Schedule cluster job
|
303
309
|
job[:pid] = `#{job[:cmd]}`.chomp
|
304
310
|
end
|
305
|
-
|
311
|
+
|
306
312
|
# Check if registered
|
307
|
-
if [nil,
|
313
|
+
if [nil, '', 0].include? job[:pid]
|
308
314
|
job[:pid] = nil
|
309
315
|
@jobs_to_run << job
|
310
316
|
say "Unsuccessful #{job[:task_name]}, rescheduling."
|
@@ -313,5 +319,4 @@ class MiGA::Daemon < MiGA::MiGA
|
|
313
319
|
say "Spawned pid:#{job[:pid]} for #{job[:task_name]}."
|
314
320
|
end
|
315
321
|
end
|
316
|
-
|
317
322
|
end
|
data/lib/miga/dataset.rb
CHANGED
@@ -2,85 +2,30 @@
|
|
2
2
|
# @license Artistic-2.0
|
3
3
|
|
4
4
|
require "miga/metadata"
|
5
|
-
require "miga/result"
|
6
|
-
require "miga/dataset_result"
|
5
|
+
require "miga/dataset/result"
|
7
6
|
require "sqlite3"
|
8
7
|
|
9
8
|
##
|
10
9
|
# Dataset representation in MiGA.
|
11
10
|
class MiGA::Dataset < MiGA::MiGA
|
12
11
|
|
13
|
-
include MiGA::
|
12
|
+
include MiGA::Dataset::Result
|
14
13
|
|
15
14
|
# Class-level
|
15
|
+
class << self
|
16
16
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
raw_reads: "01.raw_reads", trimmed_reads: "02.trimmed_reads",
|
23
|
-
read_quality: "03.read_quality", trimmed_fasta: "04.trimmed_fasta",
|
24
|
-
assembly: "05.assembly", cds: "06.cds",
|
25
|
-
# Annotation
|
26
|
-
essential_genes: "07.annotation/01.function/01.essential",
|
27
|
-
ssu: "07.annotation/01.function/02.ssu",
|
28
|
-
mytaxa: "07.annotation/02.taxonomy/01.mytaxa",
|
29
|
-
mytaxa_scan: "07.annotation/03.qa/02.mytaxa_scan",
|
30
|
-
# Distances (for single-species datasets)
|
31
|
-
distances: "09.distances", taxonomy: "09.distances/05.taxonomy",
|
32
|
-
# General statistics
|
33
|
-
stats: "90.stats"
|
34
|
-
}
|
35
|
-
|
36
|
-
##
|
37
|
-
# Supported dataset types.
|
38
|
-
def self.KNOWN_TYPES ; @@KNOWN_TYPES end
|
39
|
-
@@KNOWN_TYPES = {
|
40
|
-
genome: {description: "The genome from an isolate.", multi: false},
|
41
|
-
scgenome: {description: "A Single-cell Genome Amplification (SGA).",
|
42
|
-
multi: false},
|
43
|
-
popgenome: {description: "A population genome (including " +
|
44
|
-
"metagenomic bins).", :multi=>false},
|
45
|
-
metagenome: {description: "A metagenome (excluding viromes).",
|
46
|
-
multi: true},
|
47
|
-
virome: {description: "A viral metagenome.", multi: true}
|
48
|
-
}
|
17
|
+
##
|
18
|
+
# Does the +project+ already have a dataset with that +name+?
|
19
|
+
def exist?(project, name)
|
20
|
+
project.dataset_names.include? name
|
21
|
+
end
|
49
22
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
:mytaxa_scan, :distances, :taxonomy, :stats]
|
56
|
-
|
57
|
-
##
|
58
|
-
# Tasks to be excluded from query datasets.
|
59
|
-
@@EXCLUDE_NOREF_TASKS = [:mytaxa_scan, :taxonomy]
|
60
|
-
@@_EXCLUDE_NOREF_TASKS_H = Hash[@@EXCLUDE_NOREF_TASKS.map{ |i| [i,true] }]
|
23
|
+
##
|
24
|
+
# Standard fields of metadata for datasets.
|
25
|
+
def INFO_FIELDS
|
26
|
+
%w(name created updated type ref user description comments)
|
27
|
+
end
|
61
28
|
|
62
|
-
##
|
63
|
-
# Tasks to be executed only in datasets that are not multi-organism. These
|
64
|
-
# tasks are ignored for multi-organism datasets or for unknown types.
|
65
|
-
@@ONLY_NONMULTI_TASKS = [:mytaxa_scan, :distances, :taxonomy]
|
66
|
-
@@_ONLY_NONMULTI_TASKS_H = Hash[@@ONLY_NONMULTI_TASKS.map{ |i| [i,true] }]
|
67
|
-
|
68
|
-
##
|
69
|
-
# Tasks to be executed only in datasets that are multi-organism. These
|
70
|
-
# tasks are ignored for single-organism datasets or for unknwon types.
|
71
|
-
@@ONLY_MULTI_TASKS = [:mytaxa]
|
72
|
-
@@_ONLY_MULTI_TASKS_H = Hash[@@ONLY_MULTI_TASKS.map{ |i| [i,true] }]
|
73
|
-
|
74
|
-
##
|
75
|
-
# Does the +project+ already have a dataset with that +name+?
|
76
|
-
def self.exist?(project, name)
|
77
|
-
File.exist? "#{project.path}/metadata/#{name}.json"
|
78
|
-
end
|
79
|
-
|
80
|
-
##
|
81
|
-
# Standard fields of metadata for datasets.
|
82
|
-
def self.INFO_FIELDS
|
83
|
-
%w(name created updated type ref user description comments)
|
84
29
|
end
|
85
30
|
|
86
31
|
# Instance-level
|
@@ -164,79 +109,6 @@ class MiGA::Dataset < MiGA::MiGA
|
|
164
109
|
!@@KNOWN_TYPES[type][:multi]
|
165
110
|
end
|
166
111
|
|
167
|
-
##
|
168
|
-
# Get the result MiGA::Result in this dataset identified by the symbol +k+.
|
169
|
-
def result(k)
|
170
|
-
return nil if @@RESULT_DIRS[k.to_sym].nil?
|
171
|
-
MiGA::Result.load(
|
172
|
-
"#{project.path}/data/#{@@RESULT_DIRS[k.to_sym]}/#{name}.json" )
|
173
|
-
end
|
174
|
-
|
175
|
-
##
|
176
|
-
# Get all the results (Array of MiGA::Result) in this dataset.
|
177
|
-
def results ; @@RESULT_DIRS.keys.map{ |k| result k }.compact ; end
|
178
|
-
|
179
|
-
##
|
180
|
-
# For each result executes the 2-ary +blk+ block: key symbol and MiGA::Result.
|
181
|
-
def each_result(&blk)
|
182
|
-
@@RESULT_DIRS.keys.each do |k|
|
183
|
-
blk.call(k, result(k)) unless result(k).nil?
|
184
|
-
end
|
185
|
-
end
|
186
|
-
|
187
|
-
##
|
188
|
-
# Look for the result with symbol key +result_type+ and register it in the
|
189
|
-
# dataset. If +save+ is false, it doesn't register the result, but it still
|
190
|
-
# returns a result if the expected files are complete. The +opts+ hash
|
191
|
-
# controls result creation (if necessary). Supported values include:
|
192
|
-
# - +is_clean+: A Boolean indicating if the input files are clean.
|
193
|
-
# - +force+: A Boolean indicating if the result must be re-indexed. If true, it
|
194
|
-
# implies save=true.
|
195
|
-
# Returns MiGA::Result or nil.
|
196
|
-
def add_result(result_type, save=true, opts={})
|
197
|
-
dir = @@RESULT_DIRS[result_type]
|
198
|
-
return nil if dir.nil?
|
199
|
-
base = File.expand_path("data/#{dir}/#{name}", project.path)
|
200
|
-
unless opts[:force]
|
201
|
-
r_pre = MiGA::Result.load("#{base}.json")
|
202
|
-
return r_pre if (r_pre.nil? and not save) or not r_pre.nil?
|
203
|
-
end
|
204
|
-
r = File.exist?("#{base}.done") ?
|
205
|
-
self.send("add_result_#{result_type}", base, opts) : nil
|
206
|
-
r.save unless r.nil?
|
207
|
-
r
|
208
|
-
end
|
209
|
-
|
210
|
-
##
|
211
|
-
# Gets a result as MiGA::Result for the datasets with +result_type+. This is
|
212
|
-
# equivalent to +add_result(result_type, false)+.
|
213
|
-
def get_result(result_type) ; add_result(result_type, false) ; end
|
214
|
-
|
215
|
-
##
|
216
|
-
# Returns the key symbol of the first registered result (sorted by the
|
217
|
-
# execution order). This typically corresponds to the result used as the
|
218
|
-
# initial input. Passes +save+ to #add_result.
|
219
|
-
def first_preprocessing(save=false)
|
220
|
-
@@PREPROCESSING_TASKS.find do |t|
|
221
|
-
not ignore_task?(t) and not add_result(t, save).nil?
|
222
|
-
end
|
223
|
-
end
|
224
|
-
|
225
|
-
##
|
226
|
-
# Returns the key symbol of the next task that needs to be executed. Passes
|
227
|
-
# +save+ to #add_result.
|
228
|
-
def next_preprocessing(save=false)
|
229
|
-
after_first = false
|
230
|
-
first = first_preprocessing(save)
|
231
|
-
return nil if first.nil?
|
232
|
-
@@PREPROCESSING_TASKS.each do |t|
|
233
|
-
next if ignore_task? t
|
234
|
-
return t if after_first and add_result(t, save).nil?
|
235
|
-
after_first = (after_first or (t==first))
|
236
|
-
end
|
237
|
-
nil
|
238
|
-
end
|
239
|
-
|
240
112
|
##
|
241
113
|
# Should I ignore +task+ for this dataset?
|
242
114
|
def ignore_task?(task)
|
@@ -248,40 +120,13 @@ class MiGA::Dataset < MiGA::MiGA
|
|
248
120
|
[@@_ONLY_NONMULTI_TASKS_H[task], is_nonmulti?]==pattern )
|
249
121
|
end
|
250
122
|
|
251
|
-
##
|
252
|
-
# Are all the dataset-specific tasks done? Passes +save+ to #add_result.
|
253
|
-
def done_preprocessing?(save=false)
|
254
|
-
!first_preprocessing(save).nil? and next_preprocessing(save).nil?
|
255
|
-
end
|
256
|
-
|
257
|
-
##
|
258
|
-
# Returns an array indicating the stage of each task (sorted by execution
|
259
|
-
# order). The values are integers:
|
260
|
-
# - 0 for an undefined result (a task before the initial input).
|
261
|
-
# - 1 for a registered result (a completed task).
|
262
|
-
# - 2 for a queued result (a task yet to be executed).
|
263
|
-
# It passes +save+ to #add_result
|
264
|
-
def profile_advance(save=false)
|
265
|
-
first_task = first_preprocessing(save)
|
266
|
-
return Array.new(@@PREPROCESSING_TASKS.size, 0) if first_task.nil?
|
267
|
-
adv = []
|
268
|
-
state = 0
|
269
|
-
next_task = next_preprocessing(save)
|
270
|
-
@@PREPROCESSING_TASKS.each do |task|
|
271
|
-
state = 1 if first_task==task
|
272
|
-
state = 2 if !next_task.nil? and next_task==task
|
273
|
-
adv << state
|
274
|
-
end
|
275
|
-
adv
|
276
|
-
end
|
277
|
-
|
278
123
|
##
|
279
124
|
# Returns an Array of +how_many+ duples (Arrays) sorted by AAI:
|
280
125
|
# - +0+: A String with the name(s) of the reference dataset.
|
281
126
|
# - +1+: A Float with the AAI.
|
282
|
-
# This function is currently only supported for query datasets when
|
283
|
-
# (default), and only for reference dataset when
|
284
|
-
# +nil+ if this analysis is not supported.
|
127
|
+
# This function is currently only supported for query datasets when
|
128
|
+
# +ref_project+ is false (default), and only for reference dataset when
|
129
|
+
# +ref_project+ is true. It returns +nil+ if this analysis is not supported.
|
285
130
|
def closest_relatives(how_many=1, ref_project=false)
|
286
131
|
return nil if (is_ref? != ref_project) or is_multi?
|
287
132
|
r = result(ref_project ? :taxonomy : :distances)
|
@@ -292,3 +137,4 @@ class MiGA::Dataset < MiGA::MiGA
|
|
292
137
|
end
|
293
138
|
|
294
139
|
end # class MiGA::Dataset
|
140
|
+
|