miga-base 0.2.0.6 → 0.2.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +3 -0
- data/LICENSE +201 -0
- data/README.md +17 -335
- data/Rakefile +31 -0
- data/actions/add_result +2 -5
- data/actions/add_taxonomy +4 -7
- data/actions/create_dataset +5 -6
- data/actions/create_project +2 -5
- data/actions/daemon +2 -5
- data/actions/download_dataset +88 -58
- data/actions/find_datasets +36 -38
- data/actions/import_datasets +2 -5
- data/actions/index_taxonomy +2 -5
- data/actions/list_datasets +47 -49
- data/actions/list_files +7 -11
- data/actions/unlink_dataset +2 -5
- data/bin/miga +1 -1
- data/lib/miga/common.rb +132 -0
- data/lib/miga/daemon.rb +229 -168
- data/lib/miga/dataset.rb +354 -277
- data/lib/miga/gui.rb +346 -269
- data/lib/miga/metadata.rb +115 -71
- data/lib/miga/project.rb +361 -259
- data/lib/miga/remote_dataset.rb +200 -148
- data/lib/miga/result.rb +150 -99
- data/lib/miga/tax_index.rb +124 -67
- data/lib/miga/taxonomy.rb +129 -100
- data/lib/miga/version.rb +57 -0
- data/lib/miga.rb +2 -77
- data/scripts/_distances_noref_nomulti.bash +2 -0
- data/scripts/_distances_ref_nomulti.bash +2 -0
- data/scripts/aai_distances.bash +1 -0
- data/scripts/ani_distances.bash +1 -0
- data/scripts/assembly.bash +1 -0
- data/scripts/cds.bash +1 -0
- data/scripts/clade_finding.bash +17 -1
- data/scripts/distances.bash +1 -0
- data/scripts/essential_genes.bash +1 -0
- data/scripts/haai_distances.bash +1 -0
- data/scripts/init.bash +2 -0
- data/scripts/mytaxa.bash +1 -0
- data/scripts/mytaxa_scan.bash +1 -0
- data/scripts/ogs.bash +1 -0
- data/scripts/read_quality.bash +1 -0
- data/scripts/ssu.bash +1 -0
- data/scripts/subclades.bash +1 -0
- data/scripts/trimmed_fasta.bash +1 -0
- data/scripts/trimmed_reads.bash +1 -0
- data/test/common_test.rb +82 -0
- data/test/daemon_test.rb +53 -0
- data/test/dataset_test.rb +156 -0
- data/test/jruby_gui_test.rb +20 -0
- data/test/metadata_test.rb +48 -0
- data/test/project_test.rb +54 -0
- data/test/remote_dataset_test.rb +41 -0
- data/test/tax_index_test.rb +44 -0
- data/test/taxonomy_test.rb +36 -0
- data/test/test_helper.rb +32 -0
- metadata +53 -38
data/lib/miga/daemon.rb
CHANGED
@@ -1,178 +1,239 @@
|
|
1
|
-
#
|
2
1
|
# @package MiGA
|
3
|
-
# @
|
4
|
-
# @license artistic license 2.0
|
5
|
-
# @update Nov-12-2015
|
6
|
-
#
|
2
|
+
# @license Artistic-2.0
|
7
3
|
|
8
4
|
require "miga/project"
|
9
5
|
require "daemons"
|
10
6
|
require "date"
|
11
7
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
def get_job(job, ds=nil)
|
128
|
-
if ds==nil
|
129
|
-
(@jobs_to_run + @jobs_running).select do |j|
|
130
|
-
(j[:ds].nil?) and (j[:job]==job)
|
131
|
-
end.first
|
132
|
-
else
|
133
|
-
(@jobs_to_run + @jobs_running).select do |j|
|
134
|
-
(not j[:ds].nil?) and (j[:ds].name==ds.name) and (j[:job]==job)
|
135
|
-
end.first
|
136
|
-
end
|
137
|
-
end
|
138
|
-
def flush!
|
139
|
-
# Check for finished jobs
|
140
|
-
self.jobs_running.select! do |job|
|
141
|
-
r = job[:ds].nil? ?
|
142
|
-
self.project.add_result(job[:job]) :
|
143
|
-
job[:ds].add_result(job[:job])
|
144
|
-
say "Completed pid:#{job[:pid]} for " +
|
145
|
-
"#{job[:ds].nil? ? "" : "#{job[:ds].name}:"}#{job[:job]}" unless
|
146
|
-
r.nil?
|
147
|
-
r.nil?
|
148
|
-
end
|
149
|
-
|
150
|
-
# Avoid single datasets hogging resources
|
151
|
-
@jobs_to_run.rotate! rand(@jobs_to_run.size)
|
152
|
-
|
153
|
-
# Launch as many @jobs_to_run as possible
|
154
|
-
while jobs_running.size < maxjobs
|
155
|
-
break if jobs_to_run.empty?
|
156
|
-
job = self.jobs_to_run.shift
|
157
|
-
if runopts(:type) == "bash"
|
158
|
-
job[:pid] = spawn job[:cmd]
|
159
|
-
Process.detach job[:pid]
|
160
|
-
else
|
161
|
-
job[:pid] = `#{job[:cmd]}`.gsub(/[\n\r]/,"")
|
162
|
-
end
|
163
|
-
@jobs_running << job
|
164
|
-
say "Spawned pid:#{job[:pid]} for " +
|
165
|
-
"#{job[:ds].nil? ? "" : "#{job[:ds].name}:"}#{job[:job]}"
|
166
|
-
end
|
8
|
+
##
|
9
|
+
# MiGA Daemons handling job submissions.
|
10
|
+
class MiGA::Daemon < MiGA::MiGA
|
11
|
+
|
12
|
+
##
|
13
|
+
# When was the last time a daemon for the MiGA::Project +project+ was seen
|
14
|
+
# active? Returns DateTime.
|
15
|
+
def self.last_alive(project)
|
16
|
+
f = File.expand_path("daemon/alive", project.path)
|
17
|
+
return nil unless File.size? f
|
18
|
+
DateTime.parse(File.read(f))
|
19
|
+
end
|
20
|
+
|
21
|
+
# MiGA::Project in which the daemon is running.
|
22
|
+
attr_reader :project
|
23
|
+
# Options used to setup the daemon.
|
24
|
+
attr_reader :options
|
25
|
+
# Array of jobs next to be executed.
|
26
|
+
attr_reader :jobs_to_run
|
27
|
+
# Array of jobs currently running.
|
28
|
+
attr_reader :jobs_running
|
29
|
+
|
30
|
+
##
|
31
|
+
# Initialize an unactive daemon for the MiGA::Project +project+. See #daemon
|
32
|
+
# to wake the daemon.
|
33
|
+
def initialize(project)
|
34
|
+
@project = project
|
35
|
+
@runopts = JSON.parse(
|
36
|
+
File.read(File.expand_path("daemon/daemon.json", project.path)),
|
37
|
+
{:symbolize_names=>true})
|
38
|
+
@jobs_to_run = []
|
39
|
+
@jobs_running = []
|
40
|
+
end
|
41
|
+
|
42
|
+
##
|
43
|
+
# When was the last time a daemon for the current project was seen active?
|
44
|
+
# Returns DateTime.
|
45
|
+
def last_alive
|
46
|
+
MiGA::Daemon.last_alive project
|
47
|
+
end
|
48
|
+
|
49
|
+
##
|
50
|
+
# Returns Hash containing the default options for the daemon.
|
51
|
+
def default_options
|
52
|
+
{ dir_mode: :normal, dir: File.expand_path("daemon", project.path),
|
53
|
+
multiple: false, log_output: true }
|
54
|
+
end
|
55
|
+
|
56
|
+
##
|
57
|
+
# Set/get #options, where +k+ is the Symbol of the option and +v+ is the value
|
58
|
+
# (or nil to use as getter). Returns new value.
|
59
|
+
def runopts(k, v=nil)
|
60
|
+
k = k.to_sym
|
61
|
+
unless v.nil?
|
62
|
+
v = v.to_i if [:latency, :maxjobs, :ppn].include? k
|
63
|
+
raise "Daemon's #{k} cannot be set to zero." if
|
64
|
+
v.is_a? Integer and v==0
|
65
|
+
@runopts[k] = v
|
66
|
+
end
|
67
|
+
@runopts[k]
|
68
|
+
end
|
69
|
+
|
70
|
+
##
|
71
|
+
# Returns Integer indicating the number of seconds to sleep between checks.
|
72
|
+
def latency() runopts(:latency) ; end
|
73
|
+
|
74
|
+
##
|
75
|
+
# Returns Integer indicating the maximum number of concurrent jobs to run.
|
76
|
+
def maxjobs() runopts(:maxjobs) ; end
|
77
|
+
|
78
|
+
##
|
79
|
+
# Returns Integer indicating the number of CPUs per job.
|
80
|
+
def ppn() runopts(:ppn) ; end
|
81
|
+
|
82
|
+
##
|
83
|
+
# Initializes the daemon.
|
84
|
+
def start() daemon("start") ; end
|
85
|
+
|
86
|
+
##
|
87
|
+
# Stops the daemon.
|
88
|
+
def stop() daemon("stop") ; end
|
89
|
+
|
90
|
+
##
|
91
|
+
# Restarts the daemon.
|
92
|
+
def restart() daemon("restart") ; end
|
93
|
+
|
94
|
+
##
|
95
|
+
# Returns the status of the daemon.
|
96
|
+
def status() daemon("status") ; end
|
97
|
+
|
98
|
+
##
|
99
|
+
# Launches the +task+ with options +opts+ (as command-line arguments).
|
100
|
+
# Supported tasks include: start, stop, restart, status.
|
101
|
+
def daemon(task, opts=[])
|
102
|
+
options = default_options
|
103
|
+
opts.unshift(task)
|
104
|
+
options[:ARGV] = opts
|
105
|
+
Daemons.run_proc("MiGA:#{project.name}", options) do
|
106
|
+
say "-----------------------------------"
|
107
|
+
say "MiGA:#{project.name} launched."
|
108
|
+
say "-----------------------------------"
|
109
|
+
loop_i = 0
|
110
|
+
loop do
|
111
|
+
loop_i += 1
|
112
|
+
declare_alive
|
113
|
+
check_datasets
|
114
|
+
check_project
|
115
|
+
flush!
|
116
|
+
if loop_i==12
|
117
|
+
say "Housekeeping for sanity"
|
118
|
+
loop_i = 0
|
119
|
+
purge!
|
120
|
+
project.load
|
121
|
+
end
|
122
|
+
sleep(latency)
|
167
123
|
end
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
##
|
128
|
+
# Tell the world that you're alive
|
129
|
+
def declare_alive
|
130
|
+
f = File.open(File.expand_path("daemon/alive", project.path), "w")
|
131
|
+
f.print Time.now.to_s
|
132
|
+
f.close
|
133
|
+
end
|
134
|
+
|
135
|
+
##
|
136
|
+
# Traverse datasets
|
137
|
+
def check_datasets
|
138
|
+
project.each_dataset do |ds|
|
139
|
+
to_run = ds.next_preprocessing(true)
|
140
|
+
queue_job(to_run, ds) unless to_run.nil?
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
##
|
145
|
+
# Check if all reference datasets are pre-processed. If yes, check the
|
146
|
+
# project-level tasks
|
147
|
+
def check_project
|
148
|
+
if project.done_preprocessing?(false)
|
149
|
+
to_run = project.next_distances(true)
|
150
|
+
to_run = project.next_inclade(true) if to_run.nil?
|
151
|
+
queue_job(to_run) unless to_run.nil?
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
##
|
156
|
+
# Add the task to the internal queue with symbol key +job+. If the task is
|
157
|
+
# dataset-specific, +ds+ specifies the dataset. To submit jobs to the
|
158
|
+
# scheduler (or to bash) see #flush!.
|
159
|
+
def queue_job(job, ds=nil)
|
160
|
+
return nil unless get_job(job, ds).nil?
|
161
|
+
ds_name = (ds.nil? ? "miga-project" : ds.name)
|
162
|
+
say "Queueing ", ds_name, ":#{job}"
|
163
|
+
vars = { "PROJECT"=>project.path, "RUNTYPE"=>runopts(:type),
|
164
|
+
"CORES"=>ppn, "MIGA"=>MiGA::MiGA.root_path }
|
165
|
+
vars["DATASET"] = ds.name unless ds.nil?
|
166
|
+
log_dir = File.expand_path("daemon/#{job}", project.path)
|
167
|
+
Dir.mkdir(log_dir) unless Dir.exist? log_dir
|
168
|
+
task_name = "#{project.metadata[:name][0..9]}:#{job}:#{ds_name}"
|
169
|
+
to_run = {ds: ds, job: job, task_name: task_name,
|
170
|
+
cmd: sprintf(runopts(:cmd),
|
171
|
+
# 1: script
|
172
|
+
File.expand_path("scripts/#{job}.bash", vars["MIGA"]),
|
173
|
+
# 2: vars
|
174
|
+
vars.keys.map { |k|
|
175
|
+
sprintf(runopts(:var), k, vars[k]) }.join(runopts(:varsep)),
|
176
|
+
# 3: CPUs
|
177
|
+
ppn,
|
178
|
+
# 4: log file
|
179
|
+
File.expand_path("#{ds_name}.log", log_dir),
|
180
|
+
# 5: task name
|
181
|
+
task_name)}
|
182
|
+
@jobs_to_run << to_run
|
183
|
+
end
|
184
|
+
|
185
|
+
##
|
186
|
+
# Get the taks with key symbol +job+ in dataset +ds+. For project-wide tasks
|
187
|
+
# let +ds+ be nil.
|
188
|
+
def get_job(job, ds=nil)
|
189
|
+
(jobs_to_run + jobs_running).find do |j|
|
190
|
+
if ds==nil
|
191
|
+
j[:ds].nil? and j[:job]==job
|
192
|
+
else
|
193
|
+
(! j[:ds].nil?) and j[:ds].name==ds.name and j[:job]==job
|
172
194
|
end
|
173
|
-
|
174
|
-
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
##
|
199
|
+
# Remove finished jobs from the internal queue and launch as many as
|
200
|
+
# possible respecting #maxjobs.
|
201
|
+
def flush!
|
202
|
+
# Check for finished jobs
|
203
|
+
@jobs_running.select! do |job|
|
204
|
+
r = (job[:ds].nil? ? project : job[:ds]).add_result(job[:job], false)
|
205
|
+
say "Completed pid:#{job[:pid]} for #{job[:task_name]}." unless r.nil?
|
206
|
+
r.nil?
|
207
|
+
end
|
208
|
+
# Avoid single datasets hogging resources
|
209
|
+
@jobs_to_run.rotate! rand(jobs_to_run.size)
|
210
|
+
# Launch as many +jobs_to_run+ as possible
|
211
|
+
while jobs_running.size < maxjobs
|
212
|
+
break if jobs_to_run.empty?
|
213
|
+
job = @jobs_to_run.shift
|
214
|
+
if runopts(:type) == "bash"
|
215
|
+
job[:pid] = spawn job[:cmd]
|
216
|
+
Process.detach job[:pid]
|
217
|
+
else
|
218
|
+
job[:pid] = `#{job[:cmd]}`.chomp
|
175
219
|
end
|
176
|
-
|
177
|
-
|
220
|
+
@jobs_running << job
|
221
|
+
say "Spawned pid:#{job[:pid]} for #{job[:task_name]}."
|
222
|
+
end
|
223
|
+
end
|
178
224
|
|
225
|
+
##
|
226
|
+
# Remove dead jobs.
|
227
|
+
def purge!
|
228
|
+
@jobs_running.select! do |job|
|
229
|
+
`#{sprintf(runopts(:alive), job[:pid])}`.chomp.to_i == 1
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
##
|
234
|
+
# Send a datestamped message to the log.
|
235
|
+
def say(*opts)
|
236
|
+
print "[#{Time.new.inspect}] ", *opts, "\n"
|
237
|
+
end
|
238
|
+
|
239
|
+
end
|