miga-base 0.3.1.3 → 0.3.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/actions/stats.rb +6 -4
- data/bin/miga +3 -1
- data/lib/miga/common.rb +26 -18
- data/lib/miga/daemon.rb +59 -54
- data/lib/miga/dataset.rb +17 -171
- data/lib/miga/dataset/base.rb +73 -0
- data/lib/miga/{dataset_result.rb → dataset/result.rb} +105 -1
- data/lib/miga/project.rb +6 -315
- data/lib/miga/project/base.rb +99 -0
- data/lib/miga/project/dataset.rb +148 -0
- data/lib/miga/project/plugins.rb +41 -0
- data/lib/miga/{project_result.rb → project/result.rb} +66 -1
- data/lib/miga/version.rb +1 -1
- data/scripts/cds.bash +7 -0
- data/test/remote_dataset_test.rb +9 -7
- data/utils/distances.rb +16 -0
- data/utils/distances/functions.rb +58 -0
- data/utils/distances/ref-nomulti.rb +2 -0
- metadata +11 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2edb583f3cefe6b6f36a52b50b595e0f0d8be81c
|
4
|
+
data.tar.gz: 11a9c0c45f7144e88c01d5c7d4fc1c1bcda6b8e6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 75748d35171225261a4fc962ea80aa3ffc64ccf76206203866eb9fc60a3ae6b69c9b362f8e8248b5e7740aff2b63b8021c912f2b0a76950d5a7da04e95ef94b7
|
7
|
+
data.tar.gz: eece83478cfe635f9828eae45807a6bc072c5960a3320d05d154cf2ad8cd117fc0a1d801ec6b2ac18f527b62053bc698ee5f85f0ac4a0b8f5890516383130817
|
data/actions/stats.rb
CHANGED
@@ -111,10 +111,12 @@ if o[:compute]
|
|
111
111
|
end
|
112
112
|
end
|
113
113
|
stats[:quality] = stats[:completeness][0] - stats[:contamination][0]*5
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
114
|
+
d.metadata[:quality] = case stats[:quality]
|
115
|
+
when 80..100 ; :excellent
|
116
|
+
when 50..80 ; :high
|
117
|
+
when 20..50 ; :intermediate
|
118
|
+
else ; :low
|
119
|
+
end
|
118
120
|
d.save
|
119
121
|
end
|
120
122
|
when :distances
|
data/bin/miga
CHANGED
@@ -139,6 +139,8 @@ def opt_filter_datasets(opt, o, what=[:ref, :multi, :taxonomy])
|
|
139
139
|
){ |v| o[:multi]=v } if what.include? :multi
|
140
140
|
opt.on("-t", "--taxonomy RANK:TAXON", "Filter by taxonomy."
|
141
141
|
){ |v| o[:taxonomy]=MiGA::Taxonomy.new v } if what.include? :taxonomy
|
142
|
+
opt.on("-k", "--key INTEGER",
|
143
|
+
"Returns only the k-th dataset in the list."){ |v| o[:key]=v.to_i }
|
142
144
|
end
|
143
145
|
|
144
146
|
def opt_require(o, req={project:"-P", dataset:"-D"})
|
@@ -161,7 +163,7 @@ def filter_datasets!(ds, o)
|
|
161
163
|
ds.select! do |d|
|
162
164
|
(not d.metadata[:tax].nil?) and d.metadata[:tax].is_in?(o[:taxonomy])
|
163
165
|
end unless o[:taxonomy].nil?
|
164
|
-
ds
|
166
|
+
o[:key].nil? ? ds : ds.values_at(o[:key]-1)
|
165
167
|
end
|
166
168
|
|
167
169
|
def add_metadata(o, obj)
|
data/lib/miga/common.rb
CHANGED
@@ -80,28 +80,36 @@ class MiGA::MiGA
|
|
80
80
|
##
|
81
81
|
# Cleans a FastA file in place.
|
82
82
|
def self.clean_fasta_file(file)
|
83
|
-
|
83
|
+
tmp_fh = nil
|
84
84
|
begin
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
85
|
+
if (file =~ /\.gz/)
|
86
|
+
tmp_path = Tempfile.new("MiGA.gz").tap{ |i| i.close }.path
|
87
|
+
tmp_fh = Zlib::GzipWriter.open(tmp_path)
|
88
|
+
fh = Zlib::GzipReader.open(file)
|
89
|
+
else
|
90
|
+
tmp_fh = Tempfile.new("MiGA")
|
91
|
+
tmp_path = tmp_fh.path
|
92
|
+
fh = File.open(file, "r")
|
93
|
+
end
|
94
|
+
buffer = ""
|
95
|
+
fh.each_line do |ln|
|
96
|
+
ln.chomp!
|
97
|
+
if ln =~ /^>\s*(\S+)(.*)/
|
98
|
+
(id, df) = [$1, $2]
|
99
|
+
tmp_fh.print buffer.wrap_width(80)
|
100
|
+
buffer = ""
|
101
|
+
tmp_fh.puts ">#{id.gsub(/[^A-Za-z0-9_\|\.]/, "_")}#{df}"
|
102
|
+
else
|
103
|
+
buffer << ln.gsub(/[^A-Za-z\.\-]/, "")
|
97
104
|
end
|
98
|
-
tmp.print buffer.wrap_width(80)
|
99
105
|
end
|
100
|
-
|
101
|
-
|
106
|
+
tmp_fh.print buffer.wrap_width(80)
|
107
|
+
tmp_fh.close
|
108
|
+
fh.close
|
109
|
+
FileUtils.cp(tmp_path, file)
|
102
110
|
ensure
|
103
|
-
|
104
|
-
|
111
|
+
tmp_fh.close unless tmp_fh.nil?
|
112
|
+
File.unlink(tmp_path) unless tmp_path.nil?
|
105
113
|
end
|
106
114
|
end
|
107
115
|
|
data/lib/miga/daemon.rb
CHANGED
@@ -1,27 +1,27 @@
|
|
1
1
|
# @package MiGA
|
2
2
|
# @license Artistic-2.0
|
3
3
|
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require
|
4
|
+
require 'miga/project'
|
5
|
+
require 'daemons'
|
6
|
+
require 'date'
|
7
7
|
|
8
8
|
##
|
9
9
|
# MiGA Daemons handling job submissions.
|
10
10
|
class MiGA::Daemon < MiGA::MiGA
|
11
|
-
|
11
|
+
|
12
12
|
##
|
13
13
|
# When was the last time a daemon for the MiGA::Project +project+ was seen
|
14
14
|
# active? Returns DateTime.
|
15
15
|
def self.last_alive(project)
|
16
|
-
f = File.expand_path(
|
16
|
+
f = File.expand_path('daemon/alive', project.path)
|
17
17
|
return nil unless File.exist? f
|
18
18
|
DateTime.parse(File.read(f))
|
19
19
|
end
|
20
20
|
|
21
21
|
# Shutdown all spawned daemons before exit.
|
22
22
|
$_MIGA_DAEMON_LAIR = []
|
23
|
-
END { $_MIGA_DAEMON_LAIR.each
|
24
|
-
|
23
|
+
END { $_MIGA_DAEMON_LAIR.each(&:terminate) }
|
24
|
+
|
25
25
|
# MiGA::Project in which the daemon is running.
|
26
26
|
attr_reader :project
|
27
27
|
# Options used to setup the daemon.
|
@@ -40,8 +40,8 @@ class MiGA::Daemon < MiGA::MiGA
|
|
40
40
|
$_MIGA_DAEMON_LAIR << self
|
41
41
|
@project = project
|
42
42
|
@runopts = JSON.parse(
|
43
|
-
|
44
|
-
|
43
|
+
File.read(File.expand_path('daemon/daemon.json', project.path)),
|
44
|
+
symbolize_names: true)
|
45
45
|
@jobs_to_run = []
|
46
46
|
@jobs_running = []
|
47
47
|
@loop_i = -1
|
@@ -57,7 +57,7 @@ class MiGA::Daemon < MiGA::MiGA
|
|
57
57
|
##
|
58
58
|
# Returns Hash containing the default options for the daemon.
|
59
59
|
def default_options
|
60
|
-
{ dir_mode: :normal, dir: File.expand_path(
|
60
|
+
{ dir_mode: :normal, dir: File.expand_path('daemon', project.path),
|
61
61
|
multiple: false, log_output: true }
|
62
62
|
end
|
63
63
|
|
@@ -68,16 +68,19 @@ class MiGA::Daemon < MiGA::MiGA
|
|
68
68
|
def runopts(k, v=nil, force=false)
|
69
69
|
k = k.to_sym
|
70
70
|
unless v.nil?
|
71
|
-
|
72
|
-
|
71
|
+
if [:latency, :maxjobs, :ppn].include?(k)
|
72
|
+
v = v.to_i
|
73
|
+
elsif [:shutdown_when_done].include?(k)
|
74
|
+
v = !!v
|
75
|
+
end
|
73
76
|
raise "Daemon's #{k} cannot be set to zero." if !force and v==0
|
74
77
|
@runopts[k] = v
|
75
78
|
end
|
76
79
|
if k==:kill and v.nil?
|
77
80
|
case @runopts[:type].to_s
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
+
when 'bash' then return "kill -9 '%s'"
|
82
|
+
when 'qsub' then return "qdel '%s'"
|
83
|
+
else return "canceljob '%s'"
|
81
84
|
end
|
82
85
|
end
|
83
86
|
@runopts[k]
|
@@ -85,36 +88,36 @@ class MiGA::Daemon < MiGA::MiGA
|
|
85
88
|
|
86
89
|
##
|
87
90
|
# Returns Integer indicating the number of seconds to sleep between checks.
|
88
|
-
def latency() runopts(:latency)
|
91
|
+
def latency() runopts(:latency); end
|
89
92
|
|
90
93
|
##
|
91
94
|
# Returns Integer indicating the maximum number of concurrent jobs to run.
|
92
|
-
def maxjobs() runopts(:maxjobs)
|
95
|
+
def maxjobs() runopts(:maxjobs); end
|
93
96
|
|
94
97
|
##
|
95
98
|
# Returns Integer indicating the number of CPUs per job.
|
96
|
-
def ppn() runopts(:ppn)
|
99
|
+
def ppn() runopts(:ppn); end
|
97
100
|
|
98
101
|
##
|
99
102
|
# Returns Boolean indicating if the daemon should shutdown when processing is
|
100
103
|
# complete.
|
101
|
-
def shutdown_when_done?() !!runopts(:shutdown_when_done)
|
104
|
+
def shutdown_when_done?() !!runopts(:shutdown_when_done); end
|
102
105
|
|
103
106
|
##
|
104
107
|
# Initializes the daemon with +opts+.
|
105
|
-
def start(opts=[]) daemon(
|
108
|
+
def start(opts=[]) daemon('start', opts); end
|
106
109
|
|
107
110
|
##
|
108
111
|
# Stops the daemon with +opts+.
|
109
|
-
def stop(opts=[]) daemon(
|
112
|
+
def stop(opts=[]) daemon('stop', opts); end
|
110
113
|
|
111
114
|
##
|
112
115
|
# Restarts the daemon with +opts+.
|
113
|
-
def restart(opts=[]) daemon(
|
116
|
+
def restart(opts=[]) daemon('restart', opts); end
|
114
117
|
|
115
118
|
##
|
116
119
|
# Returns the status of the daemon with +opts+.
|
117
|
-
def status(opts=[]) daemon(
|
120
|
+
def status(opts=[]) daemon('status', opts); end
|
118
121
|
|
119
122
|
##
|
120
123
|
# Launches the +task+ with options +opts+ (as command-line arguments).
|
@@ -131,7 +134,7 @@ class MiGA::Daemon < MiGA::MiGA
|
|
131
134
|
##
|
132
135
|
# Tell the world that you're alive.
|
133
136
|
def declare_alive
|
134
|
-
f = File.open(File.expand_path(
|
137
|
+
f = File.open(File.expand_path('daemon/alive', project.path), 'w')
|
135
138
|
f.print Time.now.to_s
|
136
139
|
f.close
|
137
140
|
end
|
@@ -139,12 +142,12 @@ class MiGA::Daemon < MiGA::MiGA
|
|
139
142
|
##
|
140
143
|
# Report status in a JSON file.
|
141
144
|
def report_status
|
142
|
-
f = File.open(File.expand_path(
|
145
|
+
f = File.open(File.expand_path('daemon/status.json', project.path), 'w')
|
143
146
|
f.print JSON.pretty_generate(
|
144
147
|
jobs_running:@jobs_running, jobs_to_run:@jobs_to_run)
|
145
148
|
f.close
|
146
149
|
end
|
147
|
-
|
150
|
+
|
148
151
|
##
|
149
152
|
# Traverse datasets
|
150
153
|
def check_datasets
|
@@ -164,34 +167,37 @@ class MiGA::Daemon < MiGA::MiGA
|
|
164
167
|
# project-level tasks
|
165
168
|
def check_project
|
166
169
|
return if project.dataset_names.empty?
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
end
|
170
|
+
return unless project.done_preprocessing?(false)
|
171
|
+
to_run = project.next_distances(true)
|
172
|
+
to_run = project.next_inclade(true) if to_run.nil?
|
173
|
+
queue_job(to_run) unless to_run.nil?
|
172
174
|
end
|
173
|
-
|
175
|
+
|
174
176
|
##
|
175
177
|
# Add the task to the internal queue with symbol key +job+. If the task is
|
176
178
|
# dataset-specific, +ds+ specifies the dataset. To submit jobs to the
|
177
179
|
# scheduler (or to bash) see #flush!.
|
178
180
|
def queue_job(job, ds=nil)
|
179
181
|
return nil unless get_job(job, ds).nil?
|
180
|
-
ds_name = (ds.nil? ?
|
181
|
-
say
|
182
|
-
vars = {
|
183
|
-
|
184
|
-
|
182
|
+
ds_name = (ds.nil? ? 'miga-project' : ds.name)
|
183
|
+
say 'Queueing %s:%s' % [ds_name, job]
|
184
|
+
vars = {
|
185
|
+
'PROJECT' => project.path,
|
186
|
+
'RUNTYPE' => runopts(:type),
|
187
|
+
'CORES' => ppn,
|
188
|
+
'MIGA' => MiGA::MiGA.root_path
|
189
|
+
}
|
190
|
+
vars['DATASET'] = ds.name unless ds.nil?
|
185
191
|
log_dir = File.expand_path("daemon/#{job}", project.path)
|
186
192
|
Dir.mkdir(log_dir) unless Dir.exist? log_dir
|
187
193
|
task_name = "#{project.metadata[:name][0..9]}:#{job}:#{ds_name}"
|
188
194
|
to_run = {ds: ds, job: job, task_name: task_name,
|
189
195
|
cmd: sprintf(runopts(:cmd),
|
190
196
|
# 1: script
|
191
|
-
MiGA::MiGA.script_path(job, miga:vars[
|
197
|
+
MiGA::MiGA.script_path(job, miga:vars['MIGA'], project:project),
|
192
198
|
# 2: vars
|
193
|
-
vars.keys.map { |k|
|
194
|
-
|
199
|
+
vars.keys.map { |k| sprintf(runopts(:var), k, vars[k]) }.
|
200
|
+
join(runopts(:varsep)),
|
195
201
|
# 3: CPUs
|
196
202
|
ppn,
|
197
203
|
# 4: log file
|
@@ -213,7 +219,7 @@ class MiGA::Daemon < MiGA::MiGA
|
|
213
219
|
end
|
214
220
|
end
|
215
221
|
end
|
216
|
-
|
222
|
+
|
217
223
|
##
|
218
224
|
# Remove finished jobs from the internal queue and launch as many as
|
219
225
|
# possible respecting #maxjobs.
|
@@ -245,9 +251,9 @@ class MiGA::Daemon < MiGA::MiGA
|
|
245
251
|
# Run one loop step. Returns a Boolean indicating if the loop should continue.
|
246
252
|
def in_loop
|
247
253
|
if loop_i == -1
|
248
|
-
say
|
249
|
-
say
|
250
|
-
say
|
254
|
+
say '-----------------------------------'
|
255
|
+
say 'MiGA:%s launched.' % project.name
|
256
|
+
say '-----------------------------------'
|
251
257
|
@loop_i = 0
|
252
258
|
end
|
253
259
|
@loop_i += 1
|
@@ -257,14 +263,14 @@ class MiGA::Daemon < MiGA::MiGA
|
|
257
263
|
check_project
|
258
264
|
flush!
|
259
265
|
if loop_i==4
|
260
|
-
say
|
266
|
+
say 'Housekeeping for sanity'
|
261
267
|
@loop_i = 0
|
262
268
|
purge!
|
263
269
|
end
|
264
270
|
report_status
|
265
271
|
sleep(latency)
|
266
272
|
if shutdown_when_done? and jobs_running.size+jobs_to_run.size == 0
|
267
|
-
say
|
273
|
+
say 'Nothing else to do, shutting down.'
|
268
274
|
return false
|
269
275
|
end
|
270
276
|
true
|
@@ -279,32 +285,32 @@ class MiGA::Daemon < MiGA::MiGA
|
|
279
285
|
##
|
280
286
|
# Terminates a daemon.
|
281
287
|
def terminate
|
282
|
-
say
|
288
|
+
say 'Terminating daemon...'
|
283
289
|
report_status
|
284
290
|
k = runopts(:kill)
|
285
291
|
@jobs_running.each do |i|
|
286
292
|
`#{k % i[:pid]}`
|
287
293
|
puts "Terminating pid:#{i[:pid]} for #{i[:task_name]}"
|
288
294
|
end
|
289
|
-
f = File.expand_path(
|
295
|
+
f = File.expand_path('daemon/alive', project.path)
|
290
296
|
File.unlink(f) if File.exist? f
|
291
297
|
end
|
292
298
|
|
293
299
|
private
|
294
|
-
|
300
|
+
|
295
301
|
def launch_job(job)
|
296
302
|
# Execute job
|
297
|
-
if runopts(:type) ==
|
303
|
+
if runopts(:type) == 'bash'
|
298
304
|
# Local job
|
299
305
|
job[:pid] = spawn job[:cmd]
|
300
|
-
Process.detach job[:pid] unless [nil,
|
306
|
+
Process.detach job[:pid] unless [nil, '', 0].include?(job[:pid])
|
301
307
|
else
|
302
308
|
# Schedule cluster job
|
303
309
|
job[:pid] = `#{job[:cmd]}`.chomp
|
304
310
|
end
|
305
|
-
|
311
|
+
|
306
312
|
# Check if registered
|
307
|
-
if [nil,
|
313
|
+
if [nil, '', 0].include? job[:pid]
|
308
314
|
job[:pid] = nil
|
309
315
|
@jobs_to_run << job
|
310
316
|
say "Unsuccessful #{job[:task_name]}, rescheduling."
|
@@ -313,5 +319,4 @@ class MiGA::Daemon < MiGA::MiGA
|
|
313
319
|
say "Spawned pid:#{job[:pid]} for #{job[:task_name]}."
|
314
320
|
end
|
315
321
|
end
|
316
|
-
|
317
322
|
end
|
data/lib/miga/dataset.rb
CHANGED
@@ -2,85 +2,30 @@
|
|
2
2
|
# @license Artistic-2.0
|
3
3
|
|
4
4
|
require "miga/metadata"
|
5
|
-
require "miga/result"
|
6
|
-
require "miga/dataset_result"
|
5
|
+
require "miga/dataset/result"
|
7
6
|
require "sqlite3"
|
8
7
|
|
9
8
|
##
|
10
9
|
# Dataset representation in MiGA.
|
11
10
|
class MiGA::Dataset < MiGA::MiGA
|
12
11
|
|
13
|
-
include MiGA::
|
12
|
+
include MiGA::Dataset::Result
|
14
13
|
|
15
14
|
# Class-level
|
15
|
+
class << self
|
16
16
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
raw_reads: "01.raw_reads", trimmed_reads: "02.trimmed_reads",
|
23
|
-
read_quality: "03.read_quality", trimmed_fasta: "04.trimmed_fasta",
|
24
|
-
assembly: "05.assembly", cds: "06.cds",
|
25
|
-
# Annotation
|
26
|
-
essential_genes: "07.annotation/01.function/01.essential",
|
27
|
-
ssu: "07.annotation/01.function/02.ssu",
|
28
|
-
mytaxa: "07.annotation/02.taxonomy/01.mytaxa",
|
29
|
-
mytaxa_scan: "07.annotation/03.qa/02.mytaxa_scan",
|
30
|
-
# Distances (for single-species datasets)
|
31
|
-
distances: "09.distances", taxonomy: "09.distances/05.taxonomy",
|
32
|
-
# General statistics
|
33
|
-
stats: "90.stats"
|
34
|
-
}
|
35
|
-
|
36
|
-
##
|
37
|
-
# Supported dataset types.
|
38
|
-
def self.KNOWN_TYPES ; @@KNOWN_TYPES end
|
39
|
-
@@KNOWN_TYPES = {
|
40
|
-
genome: {description: "The genome from an isolate.", multi: false},
|
41
|
-
scgenome: {description: "A Single-cell Genome Amplification (SGA).",
|
42
|
-
multi: false},
|
43
|
-
popgenome: {description: "A population genome (including " +
|
44
|
-
"metagenomic bins).", :multi=>false},
|
45
|
-
metagenome: {description: "A metagenome (excluding viromes).",
|
46
|
-
multi: true},
|
47
|
-
virome: {description: "A viral metagenome.", multi: true}
|
48
|
-
}
|
17
|
+
##
|
18
|
+
# Does the +project+ already have a dataset with that +name+?
|
19
|
+
def exist?(project, name)
|
20
|
+
project.dataset_names.include? name
|
21
|
+
end
|
49
22
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
:mytaxa_scan, :distances, :taxonomy, :stats]
|
56
|
-
|
57
|
-
##
|
58
|
-
# Tasks to be excluded from query datasets.
|
59
|
-
@@EXCLUDE_NOREF_TASKS = [:mytaxa_scan, :taxonomy]
|
60
|
-
@@_EXCLUDE_NOREF_TASKS_H = Hash[@@EXCLUDE_NOREF_TASKS.map{ |i| [i,true] }]
|
23
|
+
##
|
24
|
+
# Standard fields of metadata for datasets.
|
25
|
+
def INFO_FIELDS
|
26
|
+
%w(name created updated type ref user description comments)
|
27
|
+
end
|
61
28
|
|
62
|
-
##
|
63
|
-
# Tasks to be executed only in datasets that are not multi-organism. These
|
64
|
-
# tasks are ignored for multi-organism datasets or for unknown types.
|
65
|
-
@@ONLY_NONMULTI_TASKS = [:mytaxa_scan, :distances, :taxonomy]
|
66
|
-
@@_ONLY_NONMULTI_TASKS_H = Hash[@@ONLY_NONMULTI_TASKS.map{ |i| [i,true] }]
|
67
|
-
|
68
|
-
##
|
69
|
-
# Tasks to be executed only in datasets that are multi-organism. These
|
70
|
-
# tasks are ignored for single-organism datasets or for unknwon types.
|
71
|
-
@@ONLY_MULTI_TASKS = [:mytaxa]
|
72
|
-
@@_ONLY_MULTI_TASKS_H = Hash[@@ONLY_MULTI_TASKS.map{ |i| [i,true] }]
|
73
|
-
|
74
|
-
##
|
75
|
-
# Does the +project+ already have a dataset with that +name+?
|
76
|
-
def self.exist?(project, name)
|
77
|
-
File.exist? "#{project.path}/metadata/#{name}.json"
|
78
|
-
end
|
79
|
-
|
80
|
-
##
|
81
|
-
# Standard fields of metadata for datasets.
|
82
|
-
def self.INFO_FIELDS
|
83
|
-
%w(name created updated type ref user description comments)
|
84
29
|
end
|
85
30
|
|
86
31
|
# Instance-level
|
@@ -164,79 +109,6 @@ class MiGA::Dataset < MiGA::MiGA
|
|
164
109
|
!@@KNOWN_TYPES[type][:multi]
|
165
110
|
end
|
166
111
|
|
167
|
-
##
|
168
|
-
# Get the result MiGA::Result in this dataset identified by the symbol +k+.
|
169
|
-
def result(k)
|
170
|
-
return nil if @@RESULT_DIRS[k.to_sym].nil?
|
171
|
-
MiGA::Result.load(
|
172
|
-
"#{project.path}/data/#{@@RESULT_DIRS[k.to_sym]}/#{name}.json" )
|
173
|
-
end
|
174
|
-
|
175
|
-
##
|
176
|
-
# Get all the results (Array of MiGA::Result) in this dataset.
|
177
|
-
def results ; @@RESULT_DIRS.keys.map{ |k| result k }.compact ; end
|
178
|
-
|
179
|
-
##
|
180
|
-
# For each result executes the 2-ary +blk+ block: key symbol and MiGA::Result.
|
181
|
-
def each_result(&blk)
|
182
|
-
@@RESULT_DIRS.keys.each do |k|
|
183
|
-
blk.call(k, result(k)) unless result(k).nil?
|
184
|
-
end
|
185
|
-
end
|
186
|
-
|
187
|
-
##
|
188
|
-
# Look for the result with symbol key +result_type+ and register it in the
|
189
|
-
# dataset. If +save+ is false, it doesn't register the result, but it still
|
190
|
-
# returns a result if the expected files are complete. The +opts+ hash
|
191
|
-
# controls result creation (if necessary). Supported values include:
|
192
|
-
# - +is_clean+: A Boolean indicating if the input files are clean.
|
193
|
-
# - +force+: A Boolean indicating if the result must be re-indexed. If true, it
|
194
|
-
# implies save=true.
|
195
|
-
# Returns MiGA::Result or nil.
|
196
|
-
def add_result(result_type, save=true, opts={})
|
197
|
-
dir = @@RESULT_DIRS[result_type]
|
198
|
-
return nil if dir.nil?
|
199
|
-
base = File.expand_path("data/#{dir}/#{name}", project.path)
|
200
|
-
unless opts[:force]
|
201
|
-
r_pre = MiGA::Result.load("#{base}.json")
|
202
|
-
return r_pre if (r_pre.nil? and not save) or not r_pre.nil?
|
203
|
-
end
|
204
|
-
r = File.exist?("#{base}.done") ?
|
205
|
-
self.send("add_result_#{result_type}", base, opts) : nil
|
206
|
-
r.save unless r.nil?
|
207
|
-
r
|
208
|
-
end
|
209
|
-
|
210
|
-
##
|
211
|
-
# Gets a result as MiGA::Result for the datasets with +result_type+. This is
|
212
|
-
# equivalent to +add_result(result_type, false)+.
|
213
|
-
def get_result(result_type) ; add_result(result_type, false) ; end
|
214
|
-
|
215
|
-
##
|
216
|
-
# Returns the key symbol of the first registered result (sorted by the
|
217
|
-
# execution order). This typically corresponds to the result used as the
|
218
|
-
# initial input. Passes +save+ to #add_result.
|
219
|
-
def first_preprocessing(save=false)
|
220
|
-
@@PREPROCESSING_TASKS.find do |t|
|
221
|
-
not ignore_task?(t) and not add_result(t, save).nil?
|
222
|
-
end
|
223
|
-
end
|
224
|
-
|
225
|
-
##
|
226
|
-
# Returns the key symbol of the next task that needs to be executed. Passes
|
227
|
-
# +save+ to #add_result.
|
228
|
-
def next_preprocessing(save=false)
|
229
|
-
after_first = false
|
230
|
-
first = first_preprocessing(save)
|
231
|
-
return nil if first.nil?
|
232
|
-
@@PREPROCESSING_TASKS.each do |t|
|
233
|
-
next if ignore_task? t
|
234
|
-
return t if after_first and add_result(t, save).nil?
|
235
|
-
after_first = (after_first or (t==first))
|
236
|
-
end
|
237
|
-
nil
|
238
|
-
end
|
239
|
-
|
240
112
|
##
|
241
113
|
# Should I ignore +task+ for this dataset?
|
242
114
|
def ignore_task?(task)
|
@@ -248,40 +120,13 @@ class MiGA::Dataset < MiGA::MiGA
|
|
248
120
|
[@@_ONLY_NONMULTI_TASKS_H[task], is_nonmulti?]==pattern )
|
249
121
|
end
|
250
122
|
|
251
|
-
##
|
252
|
-
# Are all the dataset-specific tasks done? Passes +save+ to #add_result.
|
253
|
-
def done_preprocessing?(save=false)
|
254
|
-
!first_preprocessing(save).nil? and next_preprocessing(save).nil?
|
255
|
-
end
|
256
|
-
|
257
|
-
##
|
258
|
-
# Returns an array indicating the stage of each task (sorted by execution
|
259
|
-
# order). The values are integers:
|
260
|
-
# - 0 for an undefined result (a task before the initial input).
|
261
|
-
# - 1 for a registered result (a completed task).
|
262
|
-
# - 2 for a queued result (a task yet to be executed).
|
263
|
-
# It passes +save+ to #add_result
|
264
|
-
def profile_advance(save=false)
|
265
|
-
first_task = first_preprocessing(save)
|
266
|
-
return Array.new(@@PREPROCESSING_TASKS.size, 0) if first_task.nil?
|
267
|
-
adv = []
|
268
|
-
state = 0
|
269
|
-
next_task = next_preprocessing(save)
|
270
|
-
@@PREPROCESSING_TASKS.each do |task|
|
271
|
-
state = 1 if first_task==task
|
272
|
-
state = 2 if !next_task.nil? and next_task==task
|
273
|
-
adv << state
|
274
|
-
end
|
275
|
-
adv
|
276
|
-
end
|
277
|
-
|
278
123
|
##
|
279
124
|
# Returns an Array of +how_many+ duples (Arrays) sorted by AAI:
|
280
125
|
# - +0+: A String with the name(s) of the reference dataset.
|
281
126
|
# - +1+: A Float with the AAI.
|
282
|
-
# This function is currently only supported for query datasets when
|
283
|
-
# (default), and only for reference dataset when
|
284
|
-
# +nil+ if this analysis is not supported.
|
127
|
+
# This function is currently only supported for query datasets when
|
128
|
+
# +ref_project+ is false (default), and only for reference dataset when
|
129
|
+
# +ref_project+ is true. It returns +nil+ if this analysis is not supported.
|
285
130
|
def closest_relatives(how_many=1, ref_project=false)
|
286
131
|
return nil if (is_ref? != ref_project) or is_multi?
|
287
132
|
r = result(ref_project ? :taxonomy : :distances)
|
@@ -292,3 +137,4 @@ class MiGA::Dataset < MiGA::MiGA
|
|
292
137
|
end
|
293
138
|
|
294
139
|
end # class MiGA::Dataset
|
140
|
+
|