miga-base 0.2.0.6 → 0.2.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE +201 -0
  4. data/README.md +17 -335
  5. data/Rakefile +31 -0
  6. data/actions/add_result +2 -5
  7. data/actions/add_taxonomy +4 -7
  8. data/actions/create_dataset +5 -6
  9. data/actions/create_project +2 -5
  10. data/actions/daemon +2 -5
  11. data/actions/download_dataset +88 -58
  12. data/actions/find_datasets +36 -38
  13. data/actions/import_datasets +2 -5
  14. data/actions/index_taxonomy +2 -5
  15. data/actions/list_datasets +47 -49
  16. data/actions/list_files +7 -11
  17. data/actions/unlink_dataset +2 -5
  18. data/bin/miga +1 -1
  19. data/lib/miga/common.rb +132 -0
  20. data/lib/miga/daemon.rb +229 -168
  21. data/lib/miga/dataset.rb +354 -277
  22. data/lib/miga/gui.rb +346 -269
  23. data/lib/miga/metadata.rb +115 -71
  24. data/lib/miga/project.rb +361 -259
  25. data/lib/miga/remote_dataset.rb +200 -148
  26. data/lib/miga/result.rb +150 -99
  27. data/lib/miga/tax_index.rb +124 -67
  28. data/lib/miga/taxonomy.rb +129 -100
  29. data/lib/miga/version.rb +57 -0
  30. data/lib/miga.rb +2 -77
  31. data/scripts/_distances_noref_nomulti.bash +2 -0
  32. data/scripts/_distances_ref_nomulti.bash +2 -0
  33. data/scripts/aai_distances.bash +1 -0
  34. data/scripts/ani_distances.bash +1 -0
  35. data/scripts/assembly.bash +1 -0
  36. data/scripts/cds.bash +1 -0
  37. data/scripts/clade_finding.bash +17 -1
  38. data/scripts/distances.bash +1 -0
  39. data/scripts/essential_genes.bash +1 -0
  40. data/scripts/haai_distances.bash +1 -0
  41. data/scripts/init.bash +2 -0
  42. data/scripts/mytaxa.bash +1 -0
  43. data/scripts/mytaxa_scan.bash +1 -0
  44. data/scripts/ogs.bash +1 -0
  45. data/scripts/read_quality.bash +1 -0
  46. data/scripts/ssu.bash +1 -0
  47. data/scripts/subclades.bash +1 -0
  48. data/scripts/trimmed_fasta.bash +1 -0
  49. data/scripts/trimmed_reads.bash +1 -0
  50. data/test/common_test.rb +82 -0
  51. data/test/daemon_test.rb +53 -0
  52. data/test/dataset_test.rb +156 -0
  53. data/test/jruby_gui_test.rb +20 -0
  54. data/test/metadata_test.rb +48 -0
  55. data/test/project_test.rb +54 -0
  56. data/test/remote_dataset_test.rb +41 -0
  57. data/test/tax_index_test.rb +44 -0
  58. data/test/taxonomy_test.rb +36 -0
  59. data/test/test_helper.rb +32 -0
  60. metadata +53 -38
data/lib/miga/daemon.rb CHANGED
@@ -1,178 +1,239 @@
1
- #
2
1
  # @package MiGA
3
- # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @license artistic license 2.0
5
- # @update Nov-12-2015
6
- #
2
+ # @license Artistic-2.0
7
3
 
8
4
  require "miga/project"
9
5
  require "daemons"
10
6
  require "date"
11
7
 
12
- module MiGA
13
- class Daemon
14
- def self.last_alive(p)
15
- f = File.expand_path("daemon/alive", p.path)
16
- return nil unless File.size? f
17
- DateTime.parse(File.read(f))
18
- end
19
-
20
- attr_reader :project, :options, :jobs_to_run, :jobs_running
21
- def initialize(p)
22
- @project = p
23
- @runopts = JSON.parse(
24
- File.read(File.expand_path("daemon/daemon.json", project.path)),
25
- {:symbolize_names=>true})
26
- @jobs_to_run = []
27
- @jobs_running = []
28
- end
29
- def last_alive
30
- Daemon.last_alive project
31
- end
32
- def default_options
33
- { dir_mode: :normal, dir: File.expand_path("daemon", project.path),
34
- multiple: false, log_output: true }
35
- end
36
- def runopts(k, v=nil)
37
- k = k.to_sym
38
- unless v.nil?
39
- v = v.to_i if [:latency, :maxjobs, :ppn].include? k
40
- raise "Daemon's #{k} cannot be set to zero." if
41
- v.is_a? Integer and v==0
42
- @runopts[k] = v
43
- end
44
- @runopts[k]
45
- end
46
- def latency() runopts(:latency) ; end
47
- def maxjobs() runopts(:maxjobs) ; end
48
- def ppn() runopts(:ppn) ; end
49
- def start() daemon("start") ; end
50
- def stop() daemon("stop") ; end
51
- def restart() daemon("restart") ; end
52
- def status() daemon("status") ; end
53
- def daemon(task, opts=[])
54
- options = default_options
55
- opts.unshift(task)
56
- options[:ARGV] = opts
57
- Daemons.run_proc("MiGA:#{project.metadata[:name]}", options) do
58
- p = project
59
- say "-----------------------------------"
60
- say "MiGA:#{p.metadata[:name]} launched."
61
- say "-----------------------------------"
62
- loop_i = 0
63
- loop do
64
- # Tell the world you're alive
65
- f = File.open(File.expand_path("daemon/alive", project.path),"w")
66
- f.print Time.now.to_s
67
- f.close
68
- loop_i += 1
69
- # Traverse datasets
70
- p.datasets.each do |ds|
71
- # Inspect preprocessing
72
- to_run = ds.next_preprocessing
73
- # Launch task
74
- queue_job(to_run, ds) unless to_run.nil?
75
- end
76
-
77
- # Check if all the reference datasets are pre-processed.
78
- # If yes, check the project-level tasks
79
- if p.done_preprocessing?
80
- to_run = p.next_distances
81
- to_run = p.next_inclade if to_run.nil?
82
- # Launch task
83
- queue_job(to_run) unless to_run.nil?
84
- end
85
-
86
- # Run jobs
87
- flush!
88
-
89
- # Every 12 loops:
90
- if loop_i==12
91
- say "Housekeeping for sanity"
92
- loop_i = 0
93
- # Check if running jobs are alive
94
- purge!
95
- # Reload project metadata (to add newly created datasets)
96
- project.load
97
- end
98
- sleep(latency)
99
- end
100
- end
101
- end
102
- def queue_job(job, ds=nil)
103
- return nil unless get_job(job, ds).nil?
104
- ds_name = (ds.nil? ? "miga-project" : ds.name)
105
- say "Queueing ", ds_name, ":#{job}"
106
- type = runopts(:type)
107
- vars = {
108
- "PROJECT"=>project.path, "RUNTYPE"=>runopts(:type), "CORES"=>ppn,
109
- "MIGA"=>File.expand_path("../..", File.dirname(__FILE__)) }
110
- vars["DATASET"] = ds.name unless ds.nil?
111
- log_dir = File.expand_path("daemon/#{job}", project.path)
112
- Dir.mkdir log_dir unless Dir.exist? log_dir
113
- to_run = {ds: ds, job: job, cmd: sprintf(runopts(:cmd),
114
- # 1: script
115
- vars["MIGA"] + "/scripts/#{job.to_s}.bash",
116
- # 2: vars
117
- vars.keys.map{|k| sprintf(runopts(:var),k,vars[k])
118
- }.join(runopts(:varsep)),
119
- # 3: CPUs
120
- ppn,
121
- # 4: log file
122
- File.expand_path("#{ds_name}.log", log_dir),
123
- # 5: task name
124
- "#{project.metadata[:name][0..9]}:#{job}:#{ds_name}")}
125
- @jobs_to_run << to_run
126
- end
127
- def get_job(job, ds=nil)
128
- if ds==nil
129
- (@jobs_to_run + @jobs_running).select do |j|
130
- (j[:ds].nil?) and (j[:job]==job)
131
- end.first
132
- else
133
- (@jobs_to_run + @jobs_running).select do |j|
134
- (not j[:ds].nil?) and (j[:ds].name==ds.name) and (j[:job]==job)
135
- end.first
136
- end
137
- end
138
- def flush!
139
- # Check for finished jobs
140
- self.jobs_running.select! do |job|
141
- r = job[:ds].nil? ?
142
- self.project.add_result(job[:job]) :
143
- job[:ds].add_result(job[:job])
144
- say "Completed pid:#{job[:pid]} for " +
145
- "#{job[:ds].nil? ? "" : "#{job[:ds].name}:"}#{job[:job]}" unless
146
- r.nil?
147
- r.nil?
148
- end
149
-
150
- # Avoid single datasets hogging resources
151
- @jobs_to_run.rotate! rand(@jobs_to_run.size)
152
-
153
- # Launch as many @jobs_to_run as possible
154
- while jobs_running.size < maxjobs
155
- break if jobs_to_run.empty?
156
- job = self.jobs_to_run.shift
157
- if runopts(:type) == "bash"
158
- job[:pid] = spawn job[:cmd]
159
- Process.detach job[:pid]
160
- else
161
- job[:pid] = `#{job[:cmd]}`.gsub(/[\n\r]/,"")
162
- end
163
- @jobs_running << job
164
- say "Spawned pid:#{job[:pid]} for " +
165
- "#{job[:ds].nil? ? "" : "#{job[:ds].name}:"}#{job[:job]}"
166
- end
8
+ ##
9
+ # MiGA Daemons handling job submissions.
10
+ class MiGA::Daemon < MiGA::MiGA
11
+
12
+ ##
13
+ # When was the last time a daemon for the MiGA::Project +project+ was seen
14
+ # active? Returns DateTime.
15
+ def self.last_alive(project)
16
+ f = File.expand_path("daemon/alive", project.path)
17
+ return nil unless File.size? f
18
+ DateTime.parse(File.read(f))
19
+ end
20
+
21
+ # MiGA::Project in which the daemon is running.
22
+ attr_reader :project
23
+ # Options used to setup the daemon.
24
+ attr_reader :options
25
+ # Array of jobs next to be executed.
26
+ attr_reader :jobs_to_run
27
+ # Array of jobs currently running.
28
+ attr_reader :jobs_running
29
+
30
+ ##
31
+ # Initialize an unactive daemon for the MiGA::Project +project+. See #daemon
32
+ # to wake the daemon.
33
+ def initialize(project)
34
+ @project = project
35
+ @runopts = JSON.parse(
36
+ File.read(File.expand_path("daemon/daemon.json", project.path)),
37
+ {:symbolize_names=>true})
38
+ @jobs_to_run = []
39
+ @jobs_running = []
40
+ end
41
+
42
+ ##
43
+ # When was the last time a daemon for the current project was seen active?
44
+ # Returns DateTime.
45
+ def last_alive
46
+ MiGA::Daemon.last_alive project
47
+ end
48
+
49
+ ##
50
+ # Returns Hash containing the default options for the daemon.
51
+ def default_options
52
+ { dir_mode: :normal, dir: File.expand_path("daemon", project.path),
53
+ multiple: false, log_output: true }
54
+ end
55
+
56
+ ##
57
+ # Set/get #options, where +k+ is the Symbol of the option and +v+ is the value
58
+ # (or nil to use as getter). Returns new value.
59
+ def runopts(k, v=nil)
60
+ k = k.to_sym
61
+ unless v.nil?
62
+ v = v.to_i if [:latency, :maxjobs, :ppn].include? k
63
+ raise "Daemon's #{k} cannot be set to zero." if
64
+ v.is_a? Integer and v==0
65
+ @runopts[k] = v
66
+ end
67
+ @runopts[k]
68
+ end
69
+
70
+ ##
71
+ # Returns Integer indicating the number of seconds to sleep between checks.
72
+ def latency() runopts(:latency) ; end
73
+
74
+ ##
75
+ # Returns Integer indicating the maximum number of concurrent jobs to run.
76
+ def maxjobs() runopts(:maxjobs) ; end
77
+
78
+ ##
79
+ # Returns Integer indicating the number of CPUs per job.
80
+ def ppn() runopts(:ppn) ; end
81
+
82
+ ##
83
+ # Initializes the daemon.
84
+ def start() daemon("start") ; end
85
+
86
+ ##
87
+ # Stops the daemon.
88
+ def stop() daemon("stop") ; end
89
+
90
+ ##
91
+ # Restarts the daemon.
92
+ def restart() daemon("restart") ; end
93
+
94
+ ##
95
+ # Returns the status of the daemon.
96
+ def status() daemon("status") ; end
97
+
98
+ ##
99
+ # Launches the +task+ with options +opts+ (as command-line arguments).
100
+ # Supported tasks include: start, stop, restart, status.
101
+ def daemon(task, opts=[])
102
+ options = default_options
103
+ opts.unshift(task)
104
+ options[:ARGV] = opts
105
+ Daemons.run_proc("MiGA:#{project.name}", options) do
106
+ say "-----------------------------------"
107
+ say "MiGA:#{project.name} launched."
108
+ say "-----------------------------------"
109
+ loop_i = 0
110
+ loop do
111
+ loop_i += 1
112
+ declare_alive
113
+ check_datasets
114
+ check_project
115
+ flush!
116
+ if loop_i==12
117
+ say "Housekeeping for sanity"
118
+ loop_i = 0
119
+ purge!
120
+ project.load
121
+ end
122
+ sleep(latency)
167
123
  end
168
- def purge!
169
- self.jobs_running.select! do |job|
170
- `#{sprintf(runopts(:alive), job[:pid])}`.chomp.to_i == 1
171
- end
124
+ end
125
+ end
126
+
127
+ ##
128
+ # Tell the world that you're alive
129
+ def declare_alive
130
+ f = File.open(File.expand_path("daemon/alive", project.path), "w")
131
+ f.print Time.now.to_s
132
+ f.close
133
+ end
134
+
135
+ ##
136
+ # Traverse datasets
137
+ def check_datasets
138
+ project.each_dataset do |ds|
139
+ to_run = ds.next_preprocessing(true)
140
+ queue_job(to_run, ds) unless to_run.nil?
141
+ end
142
+ end
143
+
144
+ ##
145
+ # Check if all reference datasets are pre-processed. If yes, check the
146
+ # project-level tasks
147
+ def check_project
148
+ if project.done_preprocessing?(false)
149
+ to_run = project.next_distances(true)
150
+ to_run = project.next_inclade(true) if to_run.nil?
151
+ queue_job(to_run) unless to_run.nil?
152
+ end
153
+ end
154
+
155
+ ##
156
+ # Add the task to the internal queue with symbol key +job+. If the task is
157
+ # dataset-specific, +ds+ specifies the dataset. To submit jobs to the
158
+ # scheduler (or to bash) see #flush!.
159
+ def queue_job(job, ds=nil)
160
+ return nil unless get_job(job, ds).nil?
161
+ ds_name = (ds.nil? ? "miga-project" : ds.name)
162
+ say "Queueing ", ds_name, ":#{job}"
163
+ vars = { "PROJECT"=>project.path, "RUNTYPE"=>runopts(:type),
164
+ "CORES"=>ppn, "MIGA"=>MiGA::MiGA.root_path }
165
+ vars["DATASET"] = ds.name unless ds.nil?
166
+ log_dir = File.expand_path("daemon/#{job}", project.path)
167
+ Dir.mkdir(log_dir) unless Dir.exist? log_dir
168
+ task_name = "#{project.metadata[:name][0..9]}:#{job}:#{ds_name}"
169
+ to_run = {ds: ds, job: job, task_name: task_name,
170
+ cmd: sprintf(runopts(:cmd),
171
+ # 1: script
172
+ File.expand_path("scripts/#{job}.bash", vars["MIGA"]),
173
+ # 2: vars
174
+ vars.keys.map { |k|
175
+ sprintf(runopts(:var), k, vars[k]) }.join(runopts(:varsep)),
176
+ # 3: CPUs
177
+ ppn,
178
+ # 4: log file
179
+ File.expand_path("#{ds_name}.log", log_dir),
180
+ # 5: task name
181
+ task_name)}
182
+ @jobs_to_run << to_run
183
+ end
184
+
185
+ ##
186
+ # Get the taks with key symbol +job+ in dataset +ds+. For project-wide tasks
187
+ # let +ds+ be nil.
188
+ def get_job(job, ds=nil)
189
+ (jobs_to_run + jobs_running).find do |j|
190
+ if ds==nil
191
+ j[:ds].nil? and j[:job]==job
192
+ else
193
+ (! j[:ds].nil?) and j[:ds].name==ds.name and j[:job]==job
172
194
  end
173
- def say(*opts)
174
- print "[#{Time.new.inspect}] ", *opts, "\n"
195
+ end
196
+ end
197
+
198
+ ##
199
+ # Remove finished jobs from the internal queue and launch as many as
200
+ # possible respecting #maxjobs.
201
+ def flush!
202
+ # Check for finished jobs
203
+ @jobs_running.select! do |job|
204
+ r = (job[:ds].nil? ? project : job[:ds]).add_result(job[:job], false)
205
+ say "Completed pid:#{job[:pid]} for #{job[:task_name]}." unless r.nil?
206
+ r.nil?
207
+ end
208
+ # Avoid single datasets hogging resources
209
+ @jobs_to_run.rotate! rand(jobs_to_run.size)
210
+ # Launch as many +jobs_to_run+ as possible
211
+ while jobs_running.size < maxjobs
212
+ break if jobs_to_run.empty?
213
+ job = @jobs_to_run.shift
214
+ if runopts(:type) == "bash"
215
+ job[:pid] = spawn job[:cmd]
216
+ Process.detach job[:pid]
217
+ else
218
+ job[:pid] = `#{job[:cmd]}`.chomp
175
219
  end
176
- end
177
- end
220
+ @jobs_running << job
221
+ say "Spawned pid:#{job[:pid]} for #{job[:task_name]}."
222
+ end
223
+ end
178
224
 
225
+ ##
226
+ # Remove dead jobs.
227
+ def purge!
228
+ @jobs_running.select! do |job|
229
+ `#{sprintf(runopts(:alive), job[:pid])}`.chomp.to_i == 1
230
+ end
231
+ end
232
+
233
+ ##
234
+ # Send a datestamped message to the log.
235
+ def say(*opts)
236
+ print "[#{Time.new.inspect}] ", *opts, "\n"
237
+ end
238
+
239
+ end