scout-gear 10.9.0 → 10.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.vimproject +25 -0
- data/VERSION +1 -1
- data/bin/scout +4 -1
- data/lib/scout/knowledge_base/registry.rb +2 -3
- data/lib/scout/workflow/definition.rb +11 -0
- data/lib/scout/workflow/deployment/local.rb +288 -0
- data/lib/scout/workflow/deployment/orchestrator/batches.rb +130 -0
- data/lib/scout/workflow/deployment/orchestrator/chains.rb +104 -0
- data/lib/scout/workflow/deployment/orchestrator/rules.rb +256 -0
- data/lib/scout/workflow/deployment/orchestrator/workload.rb +67 -0
- data/lib/scout/workflow/deployment/scheduler/job.rb +740 -0
- data/lib/scout/workflow/deployment/scheduler/lfs.rb +125 -0
- data/lib/scout/workflow/deployment/scheduler/pbs.rb +176 -0
- data/lib/scout/workflow/deployment/scheduler/slurm.rb +158 -0
- data/lib/scout/workflow/deployment/scheduler.rb +73 -0
- data/lib/scout/workflow/deployment.rb +10 -1
- data/lib/scout/workflow/exceptions.rb +2 -0
- data/lib/scout/workflow/step/config.rb +3 -0
- data/lib/scout/workflow/step/info.rb +2 -2
- data/lib/scout/workflow/step/progress.rb +52 -0
- data/lib/scout/workflow/step.rb +30 -1
- data/lib/scout/workflow/task.rb +2 -0
- data/scout-gear.gemspec +23 -4
- data/scout_commands/batch/list +1 -1
- data/scout_commands/workflow/cmd +5 -13
- data/scout_commands/workflow/info +1 -1
- data/scout_commands/workflow/task +61 -25
- data/test/scout/workflow/deployment/orchestrator/test_batches.rb +138 -0
- data/test/scout/workflow/deployment/orchestrator/test_chains.rb +171 -0
- data/test/scout/workflow/deployment/orchestrator/test_rules.rb +219 -0
- data/test/scout/workflow/deployment/orchestrator/test_workload.rb +117 -0
- data/test/scout/workflow/deployment/scheduler/test_job.rb +31 -0
- data/test/scout/workflow/deployment/scheduler/test_lfs.rb +32 -0
- data/test/scout/workflow/deployment/scheduler/test_pbs.rb +32 -0
- data/test/scout/workflow/deployment/scheduler/test_slurm.rb +32 -0
- data/test/scout/workflow/deployment/{test_orchestrator.rb → test_local.rb} +161 -33
- data/test/scout/workflow/deployment/test_scheduler.rb +75 -0
- data/test/scout/workflow/deployment/test_trace.rb +1 -1
- data/test/scout/workflow/step/test_progress.rb +27 -0
- data/test/scout/workflow/task/test_inputs.rb +17 -0
- data/test/test_helper.rb +2 -1
- metadata +22 -3
- data/lib/scout/workflow/deployment/orchestrator.rb +0 -292
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: f2debf30c7757ca3bd5ce191b99ce173bc31ea86e814933f4f7589a83ef49517
|
|
4
|
+
data.tar.gz: 6ad6353a5e9eeade8169c76d3f0b1501f8f9d02fa1e826538e15e152e79582ad
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: bead6f4347748fc7365d6e24da5259417a8f3ea802ae5dbfaebc1056af062fb7af0b463eed3fe9669c7321e701daaf0fcf04245563a288feb682f9e2ff54ef29
|
|
7
|
+
data.tar.gz: 20874d977a84fab65c3c3bc50c96d48976a577f697d8e8db22b83de49b576639d1bae93a6ce604bb34cfdd86ed0fbf24baadd212077992797c2f955162139623
|
data/.vimproject
CHANGED
|
@@ -2,6 +2,10 @@ scout-gear=/$PWD filter="*.rb *.yaml" {
|
|
|
2
2
|
Rakefile
|
|
3
3
|
README.md
|
|
4
4
|
chats=chats filter="*"{
|
|
5
|
+
|
|
6
|
+
deploy
|
|
7
|
+
|
|
8
|
+
|
|
5
9
|
debug
|
|
6
10
|
document.rb
|
|
7
11
|
pipes
|
|
@@ -46,10 +50,31 @@ scout-gear=/$PWD filter="*.rb *.yaml" {
|
|
|
46
50
|
inputs.rb
|
|
47
51
|
info.rb
|
|
48
52
|
}
|
|
53
|
+
|
|
49
54
|
deployment.rb
|
|
50
55
|
deployment=deployment{
|
|
56
|
+
orchestrator=orchestrator{
|
|
57
|
+
rules.rb
|
|
58
|
+
workload.rb
|
|
59
|
+
chains.rb
|
|
60
|
+
batches.rb
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
local.rb
|
|
64
|
+
|
|
65
|
+
scheduler.rb
|
|
66
|
+
|
|
67
|
+
scheduler=scheduler{
|
|
68
|
+
job.rb
|
|
69
|
+
lfs.rb
|
|
70
|
+
slurm.rb
|
|
71
|
+
pbs.rb
|
|
72
|
+
}
|
|
73
|
+
|
|
51
74
|
queue.rb
|
|
75
|
+
|
|
52
76
|
trace.rb
|
|
77
|
+
|
|
53
78
|
orchestrator.rb
|
|
54
79
|
}
|
|
55
80
|
entity.rb
|
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
10.
|
|
1
|
+
10.10.1
|
data/bin/scout
CHANGED
|
@@ -90,7 +90,7 @@ end
|
|
|
90
90
|
if options[:log]
|
|
91
91
|
Log.severity = options.delete(:log).to_i
|
|
92
92
|
require 'scout-gear'
|
|
93
|
-
|
|
93
|
+
elsif Log.severity.nil?
|
|
94
94
|
require 'scout-gear'
|
|
95
95
|
|
|
96
96
|
global_severity = Log.get_level(Scout.etc.log_severity.read.strip) if Scout.etc.log_severity.exists?
|
|
@@ -100,8 +100,11 @@ else
|
|
|
100
100
|
global_severity = Log.get_level(Scout.etc.log_severity.read.strip) if Scout.etc.log_severity.exists?
|
|
101
101
|
Log.severity = global_severity.to_i if global_severity
|
|
102
102
|
end
|
|
103
|
+
else
|
|
104
|
+
require 'scout-gear'
|
|
103
105
|
end
|
|
104
106
|
|
|
107
|
+
|
|
105
108
|
if config_keys = options.delete(:config_keys)
|
|
106
109
|
config_keys.split(",").each do |config|
|
|
107
110
|
config = config.strip
|
|
@@ -175,14 +175,13 @@ class KnowledgeBase
|
|
|
175
175
|
|
|
176
176
|
database = if persist_path.exists? and persist_options[:persist] and not persist_options[:update]
|
|
177
177
|
Log.low "Re-opening database #{ name } from #{ Log.fingerprint persist_path }. #{options}"
|
|
178
|
-
Association.database(file, **options.merge(persist_options: persist_options))
|
|
178
|
+
Association.database(file, **options.merge(persist_options: persist_options).except(:undirected))
|
|
179
179
|
else
|
|
180
180
|
options = IndiferentHash.add_defaults options, registered_options if registered_options
|
|
181
|
-
undirected = IndiferentHash.process_options options, :undirected
|
|
182
181
|
raise "Repo #{ name } not found and not registered" if file.nil?
|
|
183
182
|
Log.medium "Opening database #{ name } from #{ Log.fingerprint file }. #{options}"
|
|
184
183
|
file = file.call if Proc === file
|
|
185
|
-
Association.database(file, **options.merge(persist_options: persist_options))
|
|
184
|
+
Association.database(file, **options.merge(persist_options: persist_options).except(:undirected))
|
|
186
185
|
end
|
|
187
186
|
|
|
188
187
|
database.namespace = self.namespace if self.namespace
|
|
@@ -227,4 +227,15 @@ module Workflow
|
|
|
227
227
|
alias export_asynchronous export
|
|
228
228
|
alias export_exec export
|
|
229
229
|
alias export_stream export
|
|
230
|
+
|
|
231
|
+
def include_workflow(workflow)
|
|
232
|
+
workflow.documentation
|
|
233
|
+
self.asynchronous_exports += workflow.asynchronous_exports
|
|
234
|
+
self.synchronous_exports += workflow.synchronous_exports
|
|
235
|
+
self.exec_exports += workflow.exec_exports
|
|
236
|
+
self.stream_exports += workflow.stream_exports
|
|
237
|
+
self.tasks.merge! workflow.tasks
|
|
238
|
+
self.tasks.each{|_,t| t.workflow = workflow }
|
|
239
|
+
self.helpers.merge! workflow.helpers
|
|
240
|
+
end
|
|
230
241
|
end
|
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
require_relative 'orchestrator/batches'
|
|
2
|
+
class Workflow::LocalExecutor
|
|
3
|
+
class NoWork < Exception; end
|
|
4
|
+
|
|
5
|
+
def self.process(*args)
|
|
6
|
+
self.new.process(*args)
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def self.produce(jobs, rules = {}, produce_cpus: Etc.nprocessors, produce_timer: 1)
|
|
10
|
+
jobs = [jobs] unless Array === jobs
|
|
11
|
+
orchestrator = self.new produce_timer.to_f, cpus: produce_cpus.to_i
|
|
12
|
+
begin
|
|
13
|
+
orchestrator.process(rules, jobs)
|
|
14
|
+
rescue self::NoWork
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def self.produce_dependencies(jobs, tasks, rules = {}, produce_cpus: Etc.nprocessors, produce_timer: 1)
|
|
19
|
+
jobs = [jobs] unless Array === jobs
|
|
20
|
+
tasks = tasks.collect{|task| (String === task) ? task.to_sym : task }
|
|
21
|
+
|
|
22
|
+
produce_list = []
|
|
23
|
+
jobs.each do |job|
|
|
24
|
+
next if job.done? || job.running?
|
|
25
|
+
job.rec_dependencies.each do |dep|
|
|
26
|
+
task_name = dep.task_name.to_sym
|
|
27
|
+
task_name = task_name.to_sym if String === task_name
|
|
28
|
+
produce_list << dep if tasks.include?(task_name) ||
|
|
29
|
+
tasks.include?(job.task_name.to_s) ||
|
|
30
|
+
tasks.include?(job.full_task_name)
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
produce(produce_list, rules, produce_cpus: produce_cpus, produce_timer: produce_timer)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
attr_accessor :available_resources, :resources_requested, :resources_used, :timer
|
|
38
|
+
|
|
39
|
+
def initialize(timer = 5, available_resources = nil)
|
|
40
|
+
available_resources = {:cpus => Etc.nprocessors } if available_resources.nil?
|
|
41
|
+
@timer = timer
|
|
42
|
+
@available_resources = IndiferentHash.setup(available_resources)
|
|
43
|
+
@resources_requested = IndiferentHash.setup({})
|
|
44
|
+
@resources_used = IndiferentHash.setup({})
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def process_batches(batches)
|
|
48
|
+
failed_jobs = []
|
|
49
|
+
|
|
50
|
+
while batches.reject{|b| Workflow::Orchestrator.done_batch?(b) }.any?
|
|
51
|
+
|
|
52
|
+
candidates = Workflow::LocalExecutor.candidates(batches)
|
|
53
|
+
top_level_jobs = candidates.collect{|batch| batch[:top_level] }
|
|
54
|
+
|
|
55
|
+
raise NoWork, "No candidates and no running jobs #{Log.fingerprint batches}" if resources_used.empty? && top_level_jobs.empty?
|
|
56
|
+
|
|
57
|
+
candidates.each do |batch|
|
|
58
|
+
begin
|
|
59
|
+
|
|
60
|
+
job = batch[:top_level]
|
|
61
|
+
|
|
62
|
+
case
|
|
63
|
+
when (job.error? || job.aborted?)
|
|
64
|
+
begin
|
|
65
|
+
if job.recoverable_error?
|
|
66
|
+
if failed_jobs.include?(job)
|
|
67
|
+
Log.warn "Failed twice #{job.path} with recoverable error"
|
|
68
|
+
next
|
|
69
|
+
else
|
|
70
|
+
failed_jobs << job
|
|
71
|
+
job.clean
|
|
72
|
+
raise TryAgain
|
|
73
|
+
end
|
|
74
|
+
else
|
|
75
|
+
Log.warn "Non-recoverable error in #{job.path}"
|
|
76
|
+
next
|
|
77
|
+
end
|
|
78
|
+
ensure
|
|
79
|
+
Log.warn "Releases resources from failed job: #{job.path}"
|
|
80
|
+
release_resources(job)
|
|
81
|
+
end
|
|
82
|
+
when job.done?
|
|
83
|
+
Log.debug "Orchestrator done #{job.path}"
|
|
84
|
+
release_resources(job)
|
|
85
|
+
clear_batch(batches, batch)
|
|
86
|
+
erase_job_dependencies(job, batches)
|
|
87
|
+
when job.running?
|
|
88
|
+
next
|
|
89
|
+
|
|
90
|
+
else
|
|
91
|
+
check_resources(batch) do
|
|
92
|
+
run_batch(batch)
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
rescue TryAgain
|
|
96
|
+
retry
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
batches.each do |batch|
|
|
101
|
+
job = batch[:top_level]
|
|
102
|
+
if job.done? || job.aborted? || job.error?
|
|
103
|
+
job.join if job.done?
|
|
104
|
+
clear_batch(batches, batch)
|
|
105
|
+
release_resources(job)
|
|
106
|
+
erase_job_dependencies(job, batches)
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
sleep timer
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
batches.each{|batch|
|
|
114
|
+
job = batch[:top_level]
|
|
115
|
+
begin
|
|
116
|
+
job.join
|
|
117
|
+
rescue
|
|
118
|
+
Log.warn "Job #{job.short_path} ended with exception #{$!.class.to_s}: #{$!.message}"
|
|
119
|
+
end
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
batches.each{|batch|
|
|
123
|
+
job = batch[:top_level]
|
|
124
|
+
erase_job_dependencies(job, batches) if job.done?
|
|
125
|
+
}
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def process(rules, jobs = nil)
|
|
129
|
+
jobs, rules = rules, {} if jobs.nil?
|
|
130
|
+
jobs = [jobs] if Step === jobs
|
|
131
|
+
|
|
132
|
+
batches = Workflow::Orchestrator.job_batches(rules, jobs)
|
|
133
|
+
batches.each do |batch|
|
|
134
|
+
rules = IndiferentHash.setup batch[:rules]
|
|
135
|
+
rules.delete :erase if jobs.include?(batch[:top_level])
|
|
136
|
+
resources = Workflow::Orchestrator.normalize_resources_from_rules(rules)
|
|
137
|
+
resources = IndiferentHash.add_defaults resources, rules[:default_resources] if rules[:default_resources]
|
|
138
|
+
batch[:resources] = resources
|
|
139
|
+
batch[:rules] = rules
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
process_batches(batches)
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def release_resources(job)
|
|
146
|
+
if resources_used[job]
|
|
147
|
+
Log.debug "Orchestrator releasing resouces from #{job.path}"
|
|
148
|
+
resources_used[job].each do |resource,value|
|
|
149
|
+
next if resource == 'size'
|
|
150
|
+
resources_requested[resource] -= value.to_i
|
|
151
|
+
end
|
|
152
|
+
resources_used.delete job
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
def check_resources(batch)
|
|
157
|
+
resources = batch[:resources]
|
|
158
|
+
job = batch[:top_level]
|
|
159
|
+
|
|
160
|
+
limit_resources = resources.select do |resource,value|
|
|
161
|
+
value && available_resources[resource] && ((resources_requested[resource] || 0) + value) > available_resources[resource]
|
|
162
|
+
end.collect do |resource,v|
|
|
163
|
+
resource
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
if limit_resources.any?
|
|
167
|
+
Log.debug "Orchestrator waiting on #{job.path} due to #{limit_resources * ", "}"
|
|
168
|
+
else
|
|
169
|
+
|
|
170
|
+
resources_used[job] = resources
|
|
171
|
+
resources.each do |resource,value|
|
|
172
|
+
resources_requested[resource] ||= 0
|
|
173
|
+
resources_requested[resource] += value.to_i
|
|
174
|
+
end
|
|
175
|
+
Log.low "Orchestrator producing #{job.path} with resources #{resources}"
|
|
176
|
+
|
|
177
|
+
return yield
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def run_batch(batch)
|
|
182
|
+
job, job_rules = batch.values_at :top_level, :rules
|
|
183
|
+
|
|
184
|
+
rules = batch[:rules]
|
|
185
|
+
deploy = rules[:deploy] if rules
|
|
186
|
+
case deploy
|
|
187
|
+
when nil, 'local', :local, :serial, 'serial'
|
|
188
|
+
Scout::Config.with_config do
|
|
189
|
+
job_rules[:config_keys].split(/,\s*/).each do |config|
|
|
190
|
+
Scout::Config.process_config config
|
|
191
|
+
end if job_rules && job_rules[:config_keys]
|
|
192
|
+
|
|
193
|
+
log = job_rules[:log] if job_rules
|
|
194
|
+
log = Log.severity if log.nil?
|
|
195
|
+
Log.with_severity log do
|
|
196
|
+
job.fork
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
when 'batch', 'sched', 'slurm', 'pbs', 'lsf'
|
|
200
|
+
job.init_info
|
|
201
|
+
Workflow::Scheduler.process_batches([batch])
|
|
202
|
+
job.join
|
|
203
|
+
else
|
|
204
|
+
require 'scout/offsite'
|
|
205
|
+
if deploy.end_with?('-batch')
|
|
206
|
+
server = deploy.sub('-batch','')
|
|
207
|
+
OffsiteStep.setup(job, server: server, batch: true)
|
|
208
|
+
else
|
|
209
|
+
OffsiteStep.setup(job, server: deploy)
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
job.produce
|
|
213
|
+
job.join
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
def erase_job_dependencies(job, batches)
|
|
218
|
+
all_jobs = batches.collect{|b| b[:jobs] }.flatten
|
|
219
|
+
top_level_jobs = batches.collect{|b| b[:top_level] }
|
|
220
|
+
|
|
221
|
+
job.dependencies.each do |dep|
|
|
222
|
+
batch = batches.select{|b| b[:jobs].include? dep}.first
|
|
223
|
+
next unless batch
|
|
224
|
+
rules = batch[:rules]
|
|
225
|
+
next unless rules[:erase].to_s == 'true'
|
|
226
|
+
|
|
227
|
+
dep_path = dep.path
|
|
228
|
+
parents = all_jobs.select do |parent|
|
|
229
|
+
parent.rec_dependencies.include?(dep)
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
next if parents.select{|parent| ! parent.done? }.any?
|
|
233
|
+
|
|
234
|
+
parents.each do |parent|
|
|
235
|
+
Log.high "Erasing #{dep.path} from #{parent.path}"
|
|
236
|
+
parent.archive_deps
|
|
237
|
+
parent.copy_linked_files_dir
|
|
238
|
+
parent.dependencies = parent.dependencies - [dep]
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
dep.clean
|
|
242
|
+
end
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
def clear_batch(batches, batch)
|
|
246
|
+
job = batch[:top_level]
|
|
247
|
+
|
|
248
|
+
parents = batches.select do |b|
|
|
249
|
+
b[:deps].include? batch
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
parents.each{|b| b[:deps].delete batch }
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
#{{{ HELPER
|
|
256
|
+
|
|
257
|
+
def self.purge_duplicates(batches)
|
|
258
|
+
seen = Set.new
|
|
259
|
+
batches.select do |batch|
|
|
260
|
+
path = batch[:top_level].path
|
|
261
|
+
if seen.include? path
|
|
262
|
+
false
|
|
263
|
+
else
|
|
264
|
+
seen << path
|
|
265
|
+
true
|
|
266
|
+
end
|
|
267
|
+
end
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
def self.sort_candidates(batches)
|
|
271
|
+
seen = Set.new
|
|
272
|
+
batches.sort_by do |batch|
|
|
273
|
+
- batch[:resources].values.compact.select{|e| Numeric === e }.inject(0.0){|acc,e| acc += e}
|
|
274
|
+
end
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
def self.candidates(batches)
|
|
278
|
+
|
|
279
|
+
leaf_nodes = batches.select{|b| b[:deps].empty? }
|
|
280
|
+
|
|
281
|
+
leaf_nodes.reject!{|b| Workflow::Orchestrator.done_batch?(b) }
|
|
282
|
+
|
|
283
|
+
leaf_nodes = purge_duplicates leaf_nodes
|
|
284
|
+
leaf_nodes = sort_candidates leaf_nodes
|
|
285
|
+
|
|
286
|
+
leaf_nodes
|
|
287
|
+
end
|
|
288
|
+
end
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
require_relative 'rules'
|
|
2
|
+
require_relative 'workload'
|
|
3
|
+
require_relative 'chains'
|
|
4
|
+
|
|
5
|
+
class Workflow::Orchestrator
|
|
6
|
+
|
|
7
|
+
def self.chain_batches(rules, chains, workload)
|
|
8
|
+
chain_rules = parse_chains(rules)
|
|
9
|
+
|
|
10
|
+
batches = []
|
|
11
|
+
jobs = workload.keys
|
|
12
|
+
while job = jobs.pop
|
|
13
|
+
next if job.done?
|
|
14
|
+
matches = chains.select{|name,info| info[:jobs].include? job }
|
|
15
|
+
if matches.any?
|
|
16
|
+
name, info = matches.sort_by do |n, info|
|
|
17
|
+
num_jobs = info[:jobs].length
|
|
18
|
+
total_tasks = chain_rules[n][:tasks].values.flatten.uniq.length
|
|
19
|
+
num_jobs.to_f + 1.0/total_tasks
|
|
20
|
+
end.last
|
|
21
|
+
jobs = jobs - info[:jobs]
|
|
22
|
+
info[:chain] = name
|
|
23
|
+
batch = info
|
|
24
|
+
else
|
|
25
|
+
batch = {:jobs => [job], :top_level => job}
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
chains.delete_if{|n,info| batch[:jobs].include? info[:top_level] }
|
|
29
|
+
|
|
30
|
+
chains.each do |n,info|
|
|
31
|
+
info[:jobs] = info[:jobs] - batch[:jobs]
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
chains.delete_if{|n,info| info[:jobs].length < 2 }
|
|
35
|
+
|
|
36
|
+
batches << IndiferentHash.setup(batch)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
batches
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def self.add_batch_deps(batches)
|
|
43
|
+
batches.each do |batch|
|
|
44
|
+
jobs = batch[:jobs]
|
|
45
|
+
all_deps = jobs.collect{|j| job_dependencies(j) }.flatten.uniq - jobs
|
|
46
|
+
|
|
47
|
+
minimum = all_deps.dup
|
|
48
|
+
all_deps.each do |dep|
|
|
49
|
+
minimum -= job_dependencies(dep)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
all_deps = minimum
|
|
53
|
+
deps = all_deps.collect do |d|
|
|
54
|
+
(batches - [batch]).select{|b| b[:jobs].collect(&:path).include? d.path }
|
|
55
|
+
end.flatten.uniq
|
|
56
|
+
batch[:deps] = deps
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
batches
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def self.add_rules_and_consolidate(rules, batches)
|
|
63
|
+
chain_rules = parse_chains(rules)
|
|
64
|
+
|
|
65
|
+
batches.each do |batch|
|
|
66
|
+
job_rules_acc = batch[:jobs].inject(nil) do |acc, p|
|
|
67
|
+
job, deps = p
|
|
68
|
+
workflow = job.workflow
|
|
69
|
+
task_name = job.task_name
|
|
70
|
+
task_rules = task_specific_rules(rules, workflow, task_name)
|
|
71
|
+
acc = accumulate_rules(acc, task_rules.dup)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
if chain = batch[:chain]
|
|
75
|
+
batch[:rules] = merge_rules(chain_rules[chain][:rules].dup, job_rules_acc)
|
|
76
|
+
else
|
|
77
|
+
batch[:rules] = job_rules_acc
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
begin
|
|
82
|
+
batches.each do |batch|
|
|
83
|
+
batch[:deps] = batch[:deps].collect do |dep|
|
|
84
|
+
dep[:target] || dep
|
|
85
|
+
end if batch[:deps]
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
batches.each do |batch|
|
|
89
|
+
next if batch[:top_level].overriden?
|
|
90
|
+
next unless batch[:rules] && batch[:rules][:skip]
|
|
91
|
+
batch[:rules].delete :skip
|
|
92
|
+
next if batch[:deps].nil?
|
|
93
|
+
|
|
94
|
+
if batch[:deps].any?
|
|
95
|
+
batch_dep_jobs = batch[:top_level].rec_dependencies
|
|
96
|
+
target = batch[:deps].select do |target|
|
|
97
|
+
batch_dep_jobs.include?(target[:top_level]) &&
|
|
98
|
+
(batch[:deps] - [target] - target[:deps]).empty?
|
|
99
|
+
end.first
|
|
100
|
+
next if target.nil?
|
|
101
|
+
target[:jobs] = batch[:jobs] + target[:jobs]
|
|
102
|
+
target[:deps] = (target[:deps] + batch[:deps]).uniq - [target]
|
|
103
|
+
target[:top_level] = batch[:top_level]
|
|
104
|
+
target[:rules] = accumulate_rules(target[:rules], batch[:rules])
|
|
105
|
+
batch[:target] = target
|
|
106
|
+
end
|
|
107
|
+
raise TryAgain
|
|
108
|
+
end
|
|
109
|
+
rescue TryAgain
|
|
110
|
+
retry
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
batches.delete_if{|b| b[:target] }
|
|
114
|
+
|
|
115
|
+
batches
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def self.job_batches(rules, jobs)
|
|
119
|
+
jobs = [jobs] unless Array === jobs
|
|
120
|
+
|
|
121
|
+
workload = job_workload(jobs)
|
|
122
|
+
job_chains_map = jobs.inject([]){|acc,job| acc.concat(self.job_chains(rules, job)) }
|
|
123
|
+
|
|
124
|
+
batches = chain_batches(rules, job_chains_map, workload)
|
|
125
|
+
batches = add_batch_deps(batches)
|
|
126
|
+
batches = add_rules_and_consolidate(rules, batches)
|
|
127
|
+
|
|
128
|
+
batches
|
|
129
|
+
end
|
|
130
|
+
end
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
require_relative 'workload'
|
|
2
|
+
|
|
3
|
+
class Workflow::Orchestrator
|
|
4
|
+
def self.check_chains(chains, job)
|
|
5
|
+
return [] if Symbol === job.overriden_task
|
|
6
|
+
matches = []
|
|
7
|
+
chains.each do |name, chain|
|
|
8
|
+
workflow = job.overriden_workflow || job.workflow
|
|
9
|
+
task_name = job.overriden_task || job.task_name
|
|
10
|
+
next unless chain[:tasks].include?(workflow.to_s)
|
|
11
|
+
next unless chain[:tasks][workflow.to_s].include?(task_name.to_s)
|
|
12
|
+
matches << name
|
|
13
|
+
end
|
|
14
|
+
matches
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def self.parse_chains(rules)
|
|
18
|
+
rules = IndiferentHash.setup(rules || {})
|
|
19
|
+
chains = IndiferentHash.setup({})
|
|
20
|
+
|
|
21
|
+
# Rules may contain chains under workflows and/or top-level
|
|
22
|
+
rules.each do |workflow_name, wf_rules|
|
|
23
|
+
next unless wf_rules.is_a?(Hash)
|
|
24
|
+
next unless wf_rules["chains"]
|
|
25
|
+
wf_rules["chains"].each do |name, cr|
|
|
26
|
+
cr = IndiferentHash.setup(cr.dup)
|
|
27
|
+
chain_tasks = cr.delete(:tasks).to_s.split(/,\s*/)
|
|
28
|
+
wf = cr.delete(:workflow) if cr.include?(:workflow)
|
|
29
|
+
|
|
30
|
+
chain_tasks.each do |task|
|
|
31
|
+
chain_workflow, chain_task = task.split('#')
|
|
32
|
+
chain_task, chain_workflow = chain_workflow, wf if chain_task.nil? || chain_task.empty?
|
|
33
|
+
|
|
34
|
+
chains[name] ||= IndiferentHash.setup({:tasks => {}, :rules => cr })
|
|
35
|
+
chains[name][:tasks][chain_workflow] ||= []
|
|
36
|
+
chains[name][:tasks][chain_workflow] << chain_task
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
if rules["chains"]
|
|
42
|
+
rules["chains"].each do |name, cr|
|
|
43
|
+
cr = IndiferentHash.setup(cr.dup)
|
|
44
|
+
chain_tasks = cr.delete(:tasks).to_s.split(/,\s*/)
|
|
45
|
+
wf = cr.delete(:workflow)
|
|
46
|
+
|
|
47
|
+
chain_tasks.each do |task|
|
|
48
|
+
chain_workflow, chain_task = task.split('#')
|
|
49
|
+
chain_task, chain_workflow = chain_workflow, wf if chain_task.nil? || chain_task.empty?
|
|
50
|
+
|
|
51
|
+
chains[name] ||= IndiferentHash.setup({:tasks => {}, :rules => cr })
|
|
52
|
+
chains[name][:tasks][chain_workflow] ||= []
|
|
53
|
+
chains[name][:tasks][chain_workflow] << chain_task
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
chains
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def self.job_chains(rules, job, computed = {})
|
|
62
|
+
key = Log.fingerprint([rules, job.path, job.object_id])
|
|
63
|
+
return computed[key] if computed.has_key?(key)
|
|
64
|
+
|
|
65
|
+
chains = parse_chains(rules)
|
|
66
|
+
matches = check_chains(chains, job)
|
|
67
|
+
dependencies = job_dependencies(job)
|
|
68
|
+
|
|
69
|
+
job_chains = []
|
|
70
|
+
new_job_chains = {}
|
|
71
|
+
dependencies.each do |dep|
|
|
72
|
+
dep_matches = check_chains(chains, dep)
|
|
73
|
+
common = matches & dep_matches
|
|
74
|
+
|
|
75
|
+
dep_chains = job_chains(rules, dep, computed)
|
|
76
|
+
found = []
|
|
77
|
+
dep_chains.each do |match, info|
|
|
78
|
+
if common.include?(match)
|
|
79
|
+
found << match
|
|
80
|
+
new_info = new_job_chains[match] ||= {}
|
|
81
|
+
new_info[:jobs] ||= []
|
|
82
|
+
new_info[:jobs].concat info[:jobs]
|
|
83
|
+
new_info[:top_level] = job
|
|
84
|
+
else
|
|
85
|
+
job_chains << [match, info]
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
(common - found).each do |match|
|
|
90
|
+
info = {}
|
|
91
|
+
info[:jobs] = [job, dep]
|
|
92
|
+
info[:top_level] = job
|
|
93
|
+
job_chains << [match, info]
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
new_job_chains.each do |match, info|
|
|
98
|
+
info[:jobs].prepend job
|
|
99
|
+
job_chains << [match, info]
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
computed[key] = job_chains
|
|
103
|
+
end
|
|
104
|
+
end
|