rbbt-util 5.28.7 → 5.28.12

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/lib/rbbt/entity.rb +1 -1
  3. data/lib/rbbt/fix_width_table.rb +5 -4
  4. data/lib/rbbt/hpc.rb +2 -2
  5. data/lib/rbbt/persist.rb +9 -4
  6. data/lib/rbbt/persist/tsv/adapter.rb +0 -1
  7. data/lib/rbbt/persist/tsv/fix_width_table.rb +5 -3
  8. data/lib/rbbt/resource.rb +12 -6
  9. data/lib/rbbt/resource/path.rb +1 -1
  10. data/lib/rbbt/tsv/dumper.rb +6 -2
  11. data/lib/rbbt/util/R.rb +2 -2
  12. data/lib/rbbt/util/cmd.rb +10 -0
  13. data/lib/rbbt/util/misc/bgzf.rb +1 -1
  14. data/lib/rbbt/util/misc/indiferent_hash.rb +8 -0
  15. data/lib/rbbt/util/misc/inspect.rb +11 -7
  16. data/lib/rbbt/util/named_array.rb +1 -1
  17. data/lib/rbbt/util/open.rb +18 -17
  18. data/lib/rbbt/workflow/accessor.rb +1 -1
  19. data/lib/rbbt/workflow/definition.rb +3 -1
  20. data/lib/rbbt/workflow/integration/ansible.rb +53 -0
  21. data/lib/rbbt/workflow/integration/ansible/workflow.rb +60 -0
  22. data/lib/rbbt/workflow/step.rb +21 -2
  23. data/lib/rbbt/workflow/step/accessor.rb +24 -14
  24. data/lib/rbbt/workflow/step/dependencies.rb +8 -2
  25. data/lib/rbbt/workflow/step/run.rb +22 -19
  26. data/lib/rbbt/workflow/util/archive.rb +2 -0
  27. data/lib/rbbt/workflow/util/orchestrator.rb +49 -11
  28. data/lib/rbbt/workflow/util/provenance.rb +7 -3
  29. data/share/rbbt_commands/ansible +55 -0
  30. data/share/rbbt_commands/purge_job +0 -1
  31. data/share/rbbt_commands/system/status +22 -22
  32. data/share/rbbt_commands/workflow/forget_deps +10 -3
  33. data/share/rbbt_commands/workflow/info +12 -9
  34. data/share/rbbt_commands/workflow/prov +2 -1
  35. data/test/rbbt/association/test_index.rb +6 -6
  36. data/test/rbbt/knowledge_base/test_query.rb +3 -3
  37. data/test/rbbt/knowledge_base/test_registry.rb +1 -1
  38. data/test/rbbt/persist/tsv/test_cdb.rb +0 -7
  39. data/test/rbbt/persist/tsv/test_kyotocabinet.rb +2 -8
  40. data/test/rbbt/persist/tsv/test_leveldb.rb +0 -6
  41. data/test/rbbt/persist/tsv/test_lmdb.rb +0 -6
  42. data/test/rbbt/persist/tsv/test_tokyocabinet.rb +15 -14
  43. data/test/rbbt/test_entity.rb +0 -1
  44. data/test/rbbt/test_knowledge_base.rb +3 -4
  45. data/test/rbbt/test_persist.rb +10 -6
  46. data/test/rbbt/test_workflow.rb +49 -16
  47. data/test/rbbt/tsv/test_accessor.rb +11 -0
  48. data/test/rbbt/tsv/test_attach.rb +0 -2
  49. data/test/rbbt/tsv/test_index.rb +6 -7
  50. data/test/rbbt/tsv/test_manipulate.rb +2 -3
  51. data/test/rbbt/util/R/test_model.rb +2 -1
  52. data/test/rbbt/util/R/test_plot.rb +0 -2
  53. data/test/rbbt/util/concurrency/test_processes.rb +1 -1
  54. data/test/rbbt/util/misc/test_bgzf.rb +11 -7
  55. data/test/rbbt/util/misc/test_lock.rb +0 -1
  56. data/test/rbbt/util/misc/test_multipart_payload.rb +1 -1
  57. data/test/rbbt/util/misc/test_pipes.rb +0 -5
  58. data/test/rbbt/util/test_R.rb +1 -0
  59. data/test/rbbt/util/test_log.rb +4 -6
  60. data/test/rbbt/util/test_misc.rb +0 -2
  61. data/test/rbbt/util/test_open.rb +0 -1
  62. data/test/rbbt/util/test_python.rb +17 -1
  63. data/test/rbbt/workflow/test_remote_workflow.rb +1 -1
  64. data/test/rbbt/workflow/test_step.rb +8 -3
  65. data/test/rbbt/workflow/util/test_orchestrator.rb +155 -18
  66. metadata +5 -4
  67. data/test/rbbt/workflow/remote/test_client.rb +0 -56
@@ -0,0 +1,53 @@
1
+ require_relative 'ansible/workflow'
2
+ require 'rbbt/workflow/usage'
3
+
4
+ module Ansible
5
+ def self.play(playbook, inventory = nil)
6
+ inventory = Rbbt.etc.ansible_inventory.find
7
+ Log.with_severity 0 do
8
+ TmpFile.with_file do |tmp|
9
+ if Hash === playbook
10
+ Open.write(tmp, [playbook].to_yaml)
11
+ playbook = tmp
12
+ end
13
+ CMD.cmd_log("ansible-playbook -i #{inventory} #{playbook}")
14
+ end
15
+ end
16
+ end
17
+
18
+ def self.clean_symbols(hash)
19
+ new = {}
20
+ hash.each do |key,value|
21
+ key = key.to_s
22
+ value = case value
23
+ when Symbol
24
+ value.to_s
25
+ when Hash
26
+ self.clean_symbols(value)
27
+ else
28
+ value
29
+ end
30
+ new[key] = value
31
+ end
32
+ new
33
+ end
34
+
35
+ def self.workflow2playbook(workflow, task, options = {})
36
+ job_options = workflow.get_SOPT(workflow.tasks[task])
37
+
38
+ tasks = workflow.job(task, nil, job_options).exec
39
+
40
+ hosts = options[:hosts] || 'localhost'
41
+
42
+ clean_tasks = tasks.collect{|task| self.clean_symbols task }
43
+ {"hosts" => hosts, "tasks" => clean_tasks}
44
+ end
45
+
46
+ def self.playbook(file, task = nil, options = {})
47
+ task = 'default' if task.nil?
48
+
49
+ workflow = Workflow.require_workflow file
50
+ task = workflow.tasks.keys.last if workflow.tasks[task].nil?
51
+ workflow2playbook workflow, task, options
52
+ end
53
+ end
@@ -0,0 +1,60 @@
1
+ require 'rbbt/workflow'
2
+
3
+ module Ansible
4
+ module AnsibleWorkflow
5
+ def self.extended(object)
6
+ class << object
7
+ attr_accessor :ans_tasks
8
+ end
9
+
10
+ object.helper :register do |task_info|
11
+ desc = task.description if task
12
+ name ||= desc || short_path
13
+ task_info = {"name" => name}.merge(task_info)
14
+ @ans_tasks ||= []
15
+ @ans_tasks << task_info
16
+ task
17
+ end
18
+
19
+ object.helper :ans do |name, info|
20
+ register({ name => info})
21
+ end
22
+
23
+ object.helper :add do |name, info|
24
+ @ans_tasks.last[name.to_s] = info
25
+ end
26
+
27
+ object.helper :shell do |cmd|
28
+ register({"shell" => cmd.strip})
29
+ end
30
+
31
+ object.helper :sudo do |cmd|
32
+ register({"shell" => cmd.strip, "become" => 'yes'})
33
+ end
34
+
35
+ object.helper :singularity do |scmd|
36
+ img = config :singularity_img, :build, :test, :small, :default => '/data/img/singularity/rbbt/rbbt.simg'
37
+ container = config :singularity_container, :build, :test, :small, :default => '/data/img/sandbox/mvazque2/'
38
+ cmd = <<-EOF
39
+ singularity exec -C -H '#{container}' '#{img}' #{scmd}
40
+ EOF
41
+ register({"shell" => cmd.strip, "name" => short_path})
42
+ end
43
+
44
+
45
+ object.helper :produce_task do
46
+ @ans_tasks
47
+ end
48
+ end
49
+
50
+ def play(name = nil, &block)
51
+ name = Misc.snake_case(@description) if name.nil?
52
+ task name => :yaml do |*args|
53
+ self.instance_exec *args, &block
54
+ dependencies.inject([]){|acc,dep| acc += dep.load } + produce_task
55
+ end
56
+ end
57
+
58
+ end
59
+ end
60
+
@@ -13,6 +13,8 @@ class Step
13
13
  attr_accessor :relocated
14
14
  attr_accessor :result, :mutex, :seen
15
15
 
16
+ RBBT_DEBUG_CLEAN = ENV["RBBT_DEBUG_CLEAN"] == 'true'
17
+
16
18
  class << self
17
19
  attr_accessor :lock_dir
18
20
 
@@ -109,6 +111,14 @@ class Step
109
111
  @inputs || []
110
112
  end
111
113
 
114
+ def copy_files_dir
115
+ if File.symlink?(self.files_dir)
116
+ realpath = Open.realpath(self.files_dir)
117
+ Open.rm self.files_dir
118
+ Open.cp realpath, self.files_dir
119
+ end
120
+ end
121
+
112
122
  def archive_deps
113
123
  self.set_info :archived_info, archived_info
114
124
  self.set_info :archived_dependencies, info[:dependencies]
@@ -146,6 +156,11 @@ class Step
146
156
  all_inputs
147
157
  end
148
158
 
159
+ def dependencies=(dependencies)
160
+ @dependencies = dependencies
161
+ set_info :dependencies, dependencies.collect{|dep| [dep.task_name, dep.name, dep.path]} if dependencies
162
+ end
163
+
149
164
  def recursive_inputs
150
165
  if NamedArray === inputs
151
166
  i = {}
@@ -441,6 +456,7 @@ class Step
441
456
  status << "not running" if ! done? && ! running?
442
457
  status.unshift " " if status.any?
443
458
  Log.high "Cleaning step: #{path}#{status * " "}"
459
+ Log.stack caller if RBBT_DEBUG_CLEAN
444
460
  abort if ! done? && running?
445
461
  Step.clean(path)
446
462
  self
@@ -455,12 +471,15 @@ class Step
455
471
  return [] if dependencies.nil? or dependencies.empty?
456
472
 
457
473
  new_dependencies = []
474
+ archived_deps = self.info[:archived_info] ? self.info[:archived_info].keys : []
475
+
458
476
  dependencies.each{|step|
459
477
  #next if self.done? && Open.exists?(info_file) && info[:dependencies] && info[:dependencies].select{|task,name,path| path == step.path }.empty?
460
- next if seen.include? step
478
+ next if archived_deps.include? step.path
479
+ next if seen.include? step.path
461
480
  next if self.done? && need_run && ! updatable?
462
481
 
463
- r = step.rec_dependencies(need_run, new_dependencies)
482
+ r = step.rec_dependencies(need_run, new_dependencies.collect{|d| d.path})
464
483
  new_dependencies.concat r
465
484
  new_dependencies << step
466
485
  }
@@ -8,6 +8,16 @@ class Step
8
8
  end
9
9
  end
10
10
 
11
+ def self.serialize_info(info)
12
+ info = info.clean_version if IndiferentHash === info
13
+ INFO_SERIALIZER.dump(info)
14
+ end
15
+
16
+ def self.load_serialized_info(io)
17
+ IndiferentHash.setup(INFO_SERIALIZER.load(io))
18
+ end
19
+
20
+
11
21
  def self.wait_for_jobs(jobs)
12
22
  jobs = [jobs] if Step === jobs
13
23
  begin
@@ -59,7 +69,7 @@ class Step
59
69
  def self.step_info(path)
60
70
  begin
61
71
  Open.open(info_file(path), :mode => 'rb') do |f|
62
- INFO_SERIALIZER.load(f)
72
+ self.load_serialized_info(f)
63
73
  end
64
74
  rescue Exception
65
75
  Log.exception $!
@@ -121,7 +131,7 @@ class Step
121
131
  inputs[name] = value
122
132
  end
123
133
 
124
- if options.include? 'override_dependencies'
134
+ if options && options.include?('override_dependencies')
125
135
  inputs.merge!(:override_dependencies => open[:override_dependencies])
126
136
  input_types = IndiferentHash.setup(input_types.merge(:override_dependencies => :array))
127
137
  end
@@ -188,7 +198,7 @@ class Step
188
198
  info_lock.lock if check_lock and false
189
199
  begin
190
200
  Open.open(info_file, :mode => 'rb') do |file|
191
- INFO_SERIALIZER.load(file) #|| {}
201
+ Step.load_serialized_info(file)
192
202
  end
193
203
  ensure
194
204
  info_lock.unlock if check_lock and false
@@ -204,7 +214,7 @@ class Step
204
214
  Log.debug{"Error loading info file: " + info_file}
205
215
  Log.exception $!
206
216
  Open.rm info_file
207
- Misc.sensiblewrite(info_file, INFO_SERIALIZER.dump({:status => :error, :messages => ["Info file lost"]}))
217
+ Misc.sensiblewrite(info_file, Step.serialize_info({:status => :error, :messages => ["Info file lost"]}))
208
218
  raise $!
209
219
  end
210
220
  end
@@ -214,8 +224,8 @@ class Step
214
224
  Open.lock(info_file, :lock => info_lock) do
215
225
  i = {:status => :waiting, :pid => Process.pid, :path => path}
216
226
  i[:dependencies] = dependencies.collect{|dep| [dep.task_name, dep.name, dep.path]} if dependencies
217
- @info_cache = i
218
- Misc.sensiblewrite(info_file, INFO_SERIALIZER.dump(i), :force => true, :lock => false)
227
+ Misc.sensiblewrite(info_file, Step.serialize_info(i), :force => true, :lock => false)
228
+ @info_cache = IndiferentHash.setup(i)
219
229
  @info_cache_time = Time.now
220
230
  end
221
231
  end
@@ -227,9 +237,9 @@ class Step
227
237
  Open.lock(info_file, :lock => info_lock) do
228
238
  i = info(false).dup
229
239
  i[key] = value
230
- @info_cache = i
231
- dump = INFO_SERIALIZER.dump(i)
232
- Misc.sensiblewrite(info_file, dump, :force => true, :lock => false)
240
+ dump = Step.serialize_info(i)
241
+ @info_cache = IndiferentHash.setup(i)
242
+ Misc.sensiblewrite(info_file, dump, :force => true, :lock => false) if Open.exists?(info_file)
233
243
  @info_cache_time = Time.now
234
244
  value
235
245
  end
@@ -242,9 +252,9 @@ class Step
242
252
  Open.lock(info_file, :lock => info_lock) do
243
253
  i = info(false)
244
254
  i.merge! hash
245
- @info_cache = i
246
- dump = INFO_SERIALIZER.dump(i)
247
- Misc.sensiblewrite(info_file, dump, :force => true, :lock => false)
255
+ dump = Step.serialize_info(i)
256
+ @info_cache = IndiferentHash.setup(i)
257
+ Misc.sensiblewrite(info_file, dump, :force => true, :lock => false) if Open.exists?(info_file)
248
258
  @info_cache_time = Time.now
249
259
  value
250
260
  end
@@ -439,7 +449,7 @@ class Step
439
449
  rec_dependencies = self.rec_dependencies
440
450
  return [] if rec_dependencies.empty?
441
451
  canfail_paths = self.canfail_paths
442
- dep = rec_dependencies.select{|d| d.task_name.to_s == 'contamination'}.first
452
+
443
453
  dirty_files = rec_dependencies.reject{|dep|
444
454
  (defined?(WorkflowRemoteClient) && WorkflowRemoteClient::RemoteStep === dep) ||
445
455
  ! Open.exists?(dep.info_file) ||
@@ -537,7 +547,7 @@ class Step
537
547
  end
538
548
 
539
549
  def file(name)
540
- Path.setup(File.join(files_dir, name.to_s))
550
+ Path.setup(File.join(files_dir, name.to_s), workflow, self)
541
551
  end
542
552
 
543
553
  def save_file(name, content)
@@ -92,7 +92,13 @@ class Step
92
92
  (job.done? && job.dirty?) || (job.error? && job.dirty?) ||
93
93
  (!(job.noinfo? || job.done? || job.error? || job.aborted? || job.running?))
94
94
 
95
- job.clean unless job.resumable? && (job.updated? && ! job.dirty?)
95
+ if ! (job.resumable? && (job.updated? && ! job.dirty?))
96
+ Log.high "About to clean -- status: #{status}, present #{File.exists?(job.path)}, " +
97
+ %w(done? error? recoverable_error? noinfo? updated? dirty? aborted? running? resumable?).
98
+ collect{|v| [v, job.send(v)]*": "} * ", " if RBBT_DEBUG_CLEAN
99
+
100
+ job.clean
101
+ end
96
102
  job.set_info :status, :cleaned
97
103
  end
98
104
 
@@ -121,7 +127,7 @@ class Step
121
127
  end
122
128
 
123
129
  def input_dependencies
124
- inputs.flatten.select{|i| Step === i}
130
+ (inputs.flatten.select{|i| Step === i} + inputs.flatten.select{|dep| Path === dep && Step === dep.resource}.collect{|dep| dep.resource})
125
131
  end
126
132
 
127
133
 
@@ -112,7 +112,7 @@ class Step
112
112
  end
113
113
 
114
114
  def updatable?
115
- (ENV["RBBT_UPDATE_ALL_JOBS"] == 'true' || ( ENV["RBBT_UPDATE"] == "true" && Open.exists?(info_file)) && status != :noinfo && ! (relocated? && done?))
115
+ (ENV["RBBT_UPDATE_ALL_JOBS"] == 'true' || ( ENV["RBBT_UPDATE"] == "true" && Open.exists?(info_file)) && status != :noinfo && ! (relocated? && done?)) || (ENV["RBBT_UPDATE"] && ! (done? && ! Open.exists?(info_file)))
116
116
  end
117
117
 
118
118
  def dependency_checks
@@ -128,7 +128,7 @@ class Step
128
128
  end
129
129
 
130
130
  def input_checks
131
- inputs.select{|i| Step === i }.
131
+ (inputs.select{|i| Step === i } + inputs.select{|i| Path === i && Step === i.resource}.collect{|i| i.resource}).
132
132
  select{|dependency| dependency.updatable? }
133
133
  end
134
134
 
@@ -154,25 +154,28 @@ class Step
154
154
  canfail_paths = self.canfail_paths
155
155
  this_mtime = Open.mtime(self.path) if Open.exists?(self.path)
156
156
 
157
- checks.each do |dep|
158
- next unless dep.updatable?
159
- dep_done = dep.done?
157
+ outdated_time = checks.select{|dep| dep.updatable? && dep.done? && Persist.newer?(path, dep.path) }
158
+ outdated_dep = checks.reject{|dep| dep.done? || (dep.error? && ! dep.recoverable_error? && canfail_paths.include?(dep.path)) }
160
159
 
161
- begin
162
- if this_mtime && dep_done && Open.exists?(dep.path) && (Open.mtime(dep.path) > this_mtime + 1)
163
- outdated_time << dep
164
- end
165
- rescue
166
- end
160
+ #checks.each do |dep|
161
+ # next unless dep.updatable?
162
+ # dep_done = dep.done?
167
163
 
168
- # Is this pointless? this would mean some dep got updated after a later
169
- # dep but but before this one.
170
- #if (! dep.done? && ! canfail_paths.include?(dep.path)) || ! dep.updated?
164
+ # begin
165
+ # if this_mtime && dep_done && Open.exists?(dep.path) && (Open.mtime(dep.path) > this_mtime + 1)
166
+ # outdated_time << dep
167
+ # end
168
+ # rescue
169
+ # end
171
170
 
172
- if (! dep_done && ! canfail_paths.include?(dep.path))
173
- outdated_dep << dep
174
- end
175
- end
171
+ # # Is this pointless? this would mean some dep got updated after a later
172
+ # # dep but but before this one.
173
+ # #if (! dep.done? && ! canfail_paths.include?(dep.path)) || ! dep.updated?
174
+
175
+ # if (! dep_done && ! canfail_paths.include?(dep.path))
176
+ # outdated_dep << dep
177
+ # end
178
+ #end
176
179
 
177
180
  Log.high "Some newer files found: #{Misc.fingerprint outdated_time}" if outdated_time.any?
178
181
  Log.high "Some outdated files found: #{Misc.fingerprint outdated_dep}" if outdated_dep.any?
@@ -215,7 +218,7 @@ class Step
215
218
  no_load = :stream if no_load
216
219
 
217
220
  Open.write(pid_file, Process.pid.to_s) unless Open.exists?(path) or Open.exists?(pid_file)
218
- result_type = @task.result_type
221
+ result_type = @task.result_type if @task
219
222
  result_type = info[:result_type] if result_type.nil?
220
223
  result = Persist.persist "Job", result_type, :file => path, :check => persist_checks, :no_load => no_load do
221
224
  if Step === Step.log_relay_step and not self == Step.log_relay_step
@@ -67,8 +67,10 @@ class Step
67
67
  next unless File.exists?(step.path)
68
68
  job_files << step.path
69
69
  job_files << step.info_file if File.exists?(step.info_file)
70
+ job_files << Step.md5_file(step.path) if File.exists?(Step.md5_file step.path)
70
71
  job_file_dir_content = Dir.glob(step.files_dir + '/**/*')
71
72
  job_files += job_file_dir_content
73
+ job_files << step.files_dir if File.exists?(step.files_dir)
72
74
  rec_dependencies = Set.new
73
75
 
74
76
  next unless recursive
@@ -12,6 +12,7 @@ module Workflow
12
12
  workload.merge!(job_workload(dep))
13
13
  workload[job] += workload[dep]
14
14
  workload[job] << dep
15
+ workload[job].uniq!
15
16
  end
16
17
 
17
18
  job.input_dependencies.each do |dep|
@@ -19,6 +20,7 @@ module Workflow
19
20
  workload.merge!(job_workload(dep))
20
21
  workload[job] += workload[dep]
21
22
  workload[job] << dep
23
+ workload[job].uniq!
22
24
  end
23
25
 
24
26
  workload
@@ -32,7 +34,7 @@ module Workflow
32
34
  return IndiferentHash.setup(rules["defaults"]) unless rules[workflow][task_name]
33
35
 
34
36
  job_rules = IndiferentHash.setup(rules[workflow][task_name])
35
- rules["defaults"].each{|k,v| job_rules[k] ||= v } if rules["defaults"]
37
+ rules["defaults"].each{|k,v| job_rules[k] = v if job_rules[k].nil? } if rules["defaults"]
36
38
  job_rules
37
39
  end
38
40
 
@@ -97,6 +99,7 @@ module Workflow
97
99
 
98
100
  def release_resources(job)
99
101
  if resources_used[job]
102
+ Log.debug "Orchestrator releasing resouces from #{job.path}"
100
103
  resources_used[job].each do |resource,value|
101
104
  next if resource == 'size'
102
105
  resources_requested[resource] -= value.to_i
@@ -140,30 +143,64 @@ module Workflow
140
143
  end
141
144
  end
142
145
 
146
+ def erase_job_dependencies(job, rules, all_jobs, top_level_jobs)
147
+ job.dependencies.each do |dep|
148
+ next if top_level_jobs.include? dep.path
149
+ next unless Orchestrator.job_rules(rules, dep)["erase"].to_s == 'true'
150
+
151
+ dep_path = dep.path
152
+ parents = all_jobs.select do |parent|
153
+ paths = parent.info[:dependencies].nil? ? parent.dependencies.collect{|d| d.path } : parent.info[:dependencies].collect{|d| d.last }
154
+ paths.include? dep_path
155
+ end
156
+
157
+ next unless parents.reject{|parent| parent.done? }.empty?
158
+
159
+ parents.each do |parent|
160
+ Log.high "Erasing #{dep.path} from #{parent.path}"
161
+ parent.archive_deps
162
+ parent.copy_files_dir
163
+ parent.dependencies = parent.dependencies - [dep]
164
+ end
165
+ dep.clean
166
+ end
167
+ end
168
+
143
169
  def process(rules, jobs)
144
170
  begin
145
171
 
146
- workload = jobs.inject({}){|acc,job| acc.merge!(Orchestrator.job_workload(job)) }
172
+ workload = jobs.inject({}) do |acc,job|
173
+ Orchestrator.job_workload(job).each do |j,d|
174
+ acc[j] = d unless acc.keys.collect{|k| k.path }.include? j.path
175
+ end
176
+ acc
177
+ end
178
+ all_jobs = workload.keys
147
179
 
148
- while workload.values.flatten.any?
180
+ top_level_jobs = jobs.collect{|job| job.path }
181
+ while workload.any?
149
182
 
150
183
  candidates = resources_used.keys + Orchestrator.candidates(workload, rules)
151
- raise "No candidates" if candidates.empty?
184
+ raise "No candidates and no running jobs" if candidates.empty?
152
185
 
153
186
  candidates.each do |job|
154
187
  case
155
188
  when (job.error? || job.aborted?)
156
- if job.recoverable_error?
157
- job.clean
158
- raise TryAgain
159
- else
160
- next
189
+ begin
190
+ if job.recoverable_error?
191
+ job.clean
192
+ raise TryAgain
193
+ else
194
+ next
195
+ end
196
+ ensure
197
+ Log.warn "Releases resources from failed job: #{job.path}"
198
+ release_resources(job)
161
199
  end
162
- release_resources(job)
163
200
  when job.done?
164
201
  Log.debug "Orchestrator done #{job.path}"
165
202
  release_resources(job)
166
- raise TryAgain
203
+ erase_job_dependencies(job, rules, all_jobs, top_level_jobs)
167
204
 
168
205
  when job.running?
169
206
  next
@@ -180,6 +217,7 @@ module Workflow
180
217
  next if k.done?
181
218
  new_workload[k] = v.reject{|d| d.done? || (d.error? && ! d.recoverable_error?)}
182
219
  end
220
+ workload = new_workload
183
221
  sleep timer
184
222
  end
185
223
  rescue TryAgain