scout-gear 10.9.0 → 10.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.vimproject +25 -0
- data/VERSION +1 -1
- data/bin/scout +4 -1
- data/lib/scout/knowledge_base/registry.rb +2 -3
- data/lib/scout/workflow/definition.rb +11 -0
- data/lib/scout/workflow/deployment/local.rb +288 -0
- data/lib/scout/workflow/deployment/orchestrator/batches.rb +130 -0
- data/lib/scout/workflow/deployment/orchestrator/chains.rb +104 -0
- data/lib/scout/workflow/deployment/orchestrator/rules.rb +256 -0
- data/lib/scout/workflow/deployment/orchestrator/workload.rb +67 -0
- data/lib/scout/workflow/deployment/scheduler/job.rb +740 -0
- data/lib/scout/workflow/deployment/scheduler/lfs.rb +125 -0
- data/lib/scout/workflow/deployment/scheduler/pbs.rb +176 -0
- data/lib/scout/workflow/deployment/scheduler/slurm.rb +158 -0
- data/lib/scout/workflow/deployment/scheduler.rb +73 -0
- data/lib/scout/workflow/deployment.rb +10 -1
- data/lib/scout/workflow/exceptions.rb +2 -0
- data/lib/scout/workflow/step/config.rb +3 -0
- data/lib/scout/workflow/step/info.rb +2 -2
- data/lib/scout/workflow/step/progress.rb +52 -0
- data/lib/scout/workflow/step.rb +30 -1
- data/lib/scout/workflow/task.rb +2 -0
- data/scout-gear.gemspec +23 -4
- data/scout_commands/batch/list +1 -1
- data/scout_commands/workflow/cmd +5 -13
- data/scout_commands/workflow/info +1 -1
- data/scout_commands/workflow/task +61 -25
- data/test/scout/workflow/deployment/orchestrator/test_batches.rb +138 -0
- data/test/scout/workflow/deployment/orchestrator/test_chains.rb +171 -0
- data/test/scout/workflow/deployment/orchestrator/test_rules.rb +219 -0
- data/test/scout/workflow/deployment/orchestrator/test_workload.rb +117 -0
- data/test/scout/workflow/deployment/scheduler/test_job.rb +31 -0
- data/test/scout/workflow/deployment/scheduler/test_lfs.rb +32 -0
- data/test/scout/workflow/deployment/scheduler/test_pbs.rb +32 -0
- data/test/scout/workflow/deployment/scheduler/test_slurm.rb +32 -0
- data/test/scout/workflow/deployment/{test_orchestrator.rb → test_local.rb} +161 -33
- data/test/scout/workflow/deployment/test_scheduler.rb +75 -0
- data/test/scout/workflow/deployment/test_trace.rb +1 -1
- data/test/scout/workflow/step/test_progress.rb +27 -0
- data/test/scout/workflow/task/test_inputs.rb +17 -0
- data/test/test_helper.rb +2 -1
- metadata +22 -3
- data/lib/scout/workflow/deployment/orchestrator.rb +0 -292
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
class Workflow::Orchestrator
|
|
2
|
+
|
|
3
|
+
# Merge config_keys strings preserving order and de-duplicating tokens
|
|
4
|
+
def self.add_config_keys(current, new_val)
|
|
5
|
+
return new_val if current.nil?
|
|
6
|
+
current = current * ',' if Array === current
|
|
7
|
+
new_val = new_val * ',' if Array === new_val
|
|
8
|
+
(new_val.to_s + ',' + current.to_s).gsub(/,\s*/, ',').split(',').reverse.uniq.reverse * ","
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
# Workflow-level defaults
|
|
12
|
+
def self.workflow_rules(rules, workflow)
|
|
13
|
+
rules = IndiferentHash.setup(rules || {})
|
|
14
|
+
wf = workflow.to_s
|
|
15
|
+
return {} if rules[wf].nil?
|
|
16
|
+
return {} if rules[wf]["defaults"].nil?
|
|
17
|
+
IndiferentHash.setup(rules[wf]["defaults"].dup)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Prefer current unless new provides config_keys; do not override existing keys by default
|
|
21
|
+
def self.merge_rules(current, new_val)
|
|
22
|
+
current = IndiferentHash.setup((current || {}).dup)
|
|
23
|
+
new_val = IndiferentHash.setup((new_val || {}).dup)
|
|
24
|
+
return current if new_val.nil? || new_val.empty?
|
|
25
|
+
|
|
26
|
+
new_val.each do |k, value|
|
|
27
|
+
case k.to_s
|
|
28
|
+
when "config_keys"
|
|
29
|
+
current[k] = add_config_keys current["config_keys"], value
|
|
30
|
+
when 'defaults'
|
|
31
|
+
current[k] = merge_rules current[k], value
|
|
32
|
+
else
|
|
33
|
+
next if current.include?(k)
|
|
34
|
+
current[k] = value
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
current
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Accumulate across multiple rule sources (e.g., across jobs in a batch)
|
|
41
|
+
def self.accumulate_rules(current, new_val)
|
|
42
|
+
current = IndiferentHash.setup((current || {}).dup)
|
|
43
|
+
new_val = IndiferentHash.setup((new_val || {}).dup)
|
|
44
|
+
return current if new_val.nil? || new_val.empty?
|
|
45
|
+
|
|
46
|
+
new_val.each do |k, value|
|
|
47
|
+
case k.to_s
|
|
48
|
+
when "config_keys"
|
|
49
|
+
current[k] = add_config_keys current["config_keys"], value
|
|
50
|
+
when "cpus"
|
|
51
|
+
# choose max
|
|
52
|
+
vals = [current[k], value].compact.map{|v| v.to_i }
|
|
53
|
+
current[k] = vals.max unless vals.empty?
|
|
54
|
+
when "time"
|
|
55
|
+
# sum time budgets
|
|
56
|
+
t = [current[k], value].compact.inject(0){|acc,tv| acc + Misc.timespan(tv) }
|
|
57
|
+
current[k] = Misc.format_seconds(t)
|
|
58
|
+
when "skip"
|
|
59
|
+
skip = (current.key?(k) ? current[k] : true) && value
|
|
60
|
+
if skip
|
|
61
|
+
current[k] = true
|
|
62
|
+
else
|
|
63
|
+
current.delete k
|
|
64
|
+
end
|
|
65
|
+
else
|
|
66
|
+
next if current.include?(k)
|
|
67
|
+
current[k] = value
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
current
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Compute task-specific rules: defaults -> workflow defaults -> task overrides
|
|
74
|
+
def self.task_specific_rules(rules, workflow, task)
|
|
75
|
+
rules = IndiferentHash.setup(rules || {})
|
|
76
|
+
defaults = IndiferentHash.setup(rules[:defaults] || {})
|
|
77
|
+
wf = workflow.to_s
|
|
78
|
+
tk = task.to_s
|
|
79
|
+
|
|
80
|
+
wf_defaults = merge_rules(workflow_rules(rules, wf), defaults)
|
|
81
|
+
return IndiferentHash.setup(wf_defaults.dup) if rules[wf].nil? || rules[wf][tk].nil?
|
|
82
|
+
|
|
83
|
+
merge_rules(rules[wf][tk], wf_defaults)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Recursive job rules: accumulate down the dependency tree
|
|
87
|
+
def self.job_rules(rules, job, force = false)
|
|
88
|
+
return {} if (job.done? || job.error?) && !force
|
|
89
|
+
jr = task_specific_rules(rules, job.workflow.to_s, job.task_name.to_s)
|
|
90
|
+
job.dependencies.each do |dep|
|
|
91
|
+
jr = accumulate_rules(jr, job_rules(rules, dep))
|
|
92
|
+
end
|
|
93
|
+
jr
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Build a numeric-only resources hash for scheduling (parallel orchestrator)
|
|
97
|
+
def self.job_resources(rules, job)
|
|
98
|
+
jr = IndiferentHash.setup(job_rules(rules, job) || {})
|
|
99
|
+
|
|
100
|
+
resources = IndiferentHash.setup({})
|
|
101
|
+
# Nested resources
|
|
102
|
+
if jr[:resources].is_a?(Hash)
|
|
103
|
+
jr[:resources].each do |k,v|
|
|
104
|
+
resources[k] = v
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
# Top-level aliases
|
|
108
|
+
resources[:cpus] ||= jr[:cpus] if jr.key?(:cpus)
|
|
109
|
+
resources[:IO] ||= jr[:IO] if jr.key?(:IO)
|
|
110
|
+
resources[:io] ||= jr[:io] if jr.key?(:io)
|
|
111
|
+
# Memory settings are ignored for numeric scheduling unless numeric
|
|
112
|
+
resources[:mem] ||= jr[:mem] if jr.key?(:mem)
|
|
113
|
+
resources[:mem_per_cpu] ||= jr[:mem_per_cpu] if jr.key?(:mem_per_cpu)
|
|
114
|
+
|
|
115
|
+
# Default resources fallback
|
|
116
|
+
default_resources = rules["default_resources"]
|
|
117
|
+
default_resources ||= rules["defaults"]["resources"] if rules["defaults"]
|
|
118
|
+
default_resources ||= {}
|
|
119
|
+
IndiferentHash.setup(default_resources).each do |k,v|
|
|
120
|
+
resources[k] = v if resources[k].nil?
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# If still empty, use cpus:1 as safe default
|
|
124
|
+
resources = {:cpus => 1} if resources.empty?
|
|
125
|
+
|
|
126
|
+
# Only keep numeric-like values for the scheduler summations/accounting
|
|
127
|
+
numeric_resources = {}
|
|
128
|
+
resources.each do |k,v|
|
|
129
|
+
next if k.to_s == 'size'
|
|
130
|
+
if Numeric === v
|
|
131
|
+
numeric_resources[k] = v
|
|
132
|
+
elsif v.respond_to?(:to_s) && v.to_s.strip =~ /^\d+(?:\.\d+)?$/
|
|
133
|
+
numeric_resources[k] = v.to_s.include?(".") ? v.to_f : v.to_i
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
IndiferentHash.setup(numeric_resources)
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# Build resources hash directly from a rules hash (e.g., consolidated batch rules)
|
|
141
|
+
def self.resources_from_rules_hash(rules_hash, global_rules = {})
|
|
142
|
+
rules_hash = IndiferentHash.setup(rules_hash || {})
|
|
143
|
+
resources = IndiferentHash.setup({})
|
|
144
|
+
|
|
145
|
+
# Nested resources
|
|
146
|
+
if rules_hash[:resources].is_a?(Hash)
|
|
147
|
+
rules_hash[:resources].each{|k,v| resources[k] = v }
|
|
148
|
+
end
|
|
149
|
+
# Top-level cpus/IO
|
|
150
|
+
resources[:cpus] ||= rules_hash[:cpus] if rules_hash.key?(:cpus)
|
|
151
|
+
resources[:IO] ||= rules_hash[:IO] if rules_hash.key?(:IO)
|
|
152
|
+
resources[:io] ||= rules_hash[:io] if rules_hash.key?(:io)
|
|
153
|
+
resources[:mem] ||= rules_hash[:mem] if rules_hash.key?(:mem)
|
|
154
|
+
resources[:mem_per_cpu] ||= rules_hash[:mem_per_cpu] if rules_hash.key?(:mem_per_cpu)
|
|
155
|
+
|
|
156
|
+
# Default resources fallback from global rules
|
|
157
|
+
default_resources = global_rules["default_resources"]
|
|
158
|
+
default_resources ||= global_rules["defaults"]["resources"] if global_rules["defaults"]
|
|
159
|
+
default_resources ||= {}
|
|
160
|
+
IndiferentHash.setup(default_resources).each do |k,v|
|
|
161
|
+
resources[k] = v if resources[k].nil?
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# Numeric-only for local scheduling
|
|
165
|
+
numeric_resources = {}
|
|
166
|
+
resources.each do |k,v|
|
|
167
|
+
next if k.to_s == 'size'
|
|
168
|
+
if Numeric === v
|
|
169
|
+
numeric_resources[k] = v
|
|
170
|
+
elsif v.respond_to?(:to_s) && v.to_s.strip =~ /^\d+(?:\.\d+)?$/
|
|
171
|
+
numeric_resources[k] = v.to_s.include?(".") ? v.to_f : v.to_i
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
IndiferentHash.setup(numeric_resources)
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# Helper to extract a resources hash from various rule styles
|
|
179
|
+
def self.normalize_resources_from_rules(rules_block)
|
|
180
|
+
return {} if rules_block.nil? || rules_block.empty?
|
|
181
|
+
rules_block = IndiferentHash.setup rules_block
|
|
182
|
+
|
|
183
|
+
r = rules_block[:resources] || {}
|
|
184
|
+
r = IndiferentHash.setup r
|
|
185
|
+
|
|
186
|
+
r = IndiferentHash.add_defaults r,
|
|
187
|
+
cpus: rules_block[:cpus] || rules_block[:task_cpus] || 1,
|
|
188
|
+
time: rules_block[:time]
|
|
189
|
+
|
|
190
|
+
r.delete_if{|k,v| v.nil?}
|
|
191
|
+
|
|
192
|
+
IndiferentHash.setup(r)
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
def self.merge_rule_file(current, new)
|
|
196
|
+
current = IndiferentHash.setup(current)
|
|
197
|
+
new.each do |key,value|
|
|
198
|
+
if current[key].nil?
|
|
199
|
+
current[key] = value
|
|
200
|
+
elsif Hash === value
|
|
201
|
+
current[key] = merge_rules(current[key], value)
|
|
202
|
+
else
|
|
203
|
+
current[key] = value
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
current
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
def self.load_rules(rule_files = nil)
|
|
211
|
+
rule_files = [:default] if rule_files.nil?
|
|
212
|
+
rule_files = [rule_files] unless Array === rule_files
|
|
213
|
+
|
|
214
|
+
rule_files = rule_files.inject({}) do |acc,file|
|
|
215
|
+
if Path.is_filename?(file) && Open.exists?(file) and Path.can_read?(file)
|
|
216
|
+
file_rules = Open.yaml(file)
|
|
217
|
+
raise "Unknown rule file #{file}" unless Hash === file_rules
|
|
218
|
+
else
|
|
219
|
+
orig = file
|
|
220
|
+
file = Scout.etc.batch[file].find_with_extension(:yaml)
|
|
221
|
+
|
|
222
|
+
if file.exists?
|
|
223
|
+
file_rules = Open.yaml(file)
|
|
224
|
+
else
|
|
225
|
+
Log.debug "Rule file #{orig} not found"
|
|
226
|
+
next acc
|
|
227
|
+
end
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
file_rules = IndiferentHash.setup(file_rules)
|
|
231
|
+
|
|
232
|
+
if file_rules[:import]
|
|
233
|
+
imports = file_rules.delete(:import)
|
|
234
|
+
merge_rule_file(file_rules, load_rules(imports))
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
merge_rule_file(acc, file_rules)
|
|
238
|
+
end
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
def self.load_rules_for_job(jobs)
|
|
242
|
+
jobs = [jobs] unless Array === jobs
|
|
243
|
+
|
|
244
|
+
deploy_files = jobs.collect do |job|
|
|
245
|
+
job.workflow.to_s
|
|
246
|
+
end.compact
|
|
247
|
+
|
|
248
|
+
deploy_files += jobs.collect do |job|
|
|
249
|
+
job.rec_dependencies.collect{|d| d.workflow }.compact.collect(&:to_s).uniq
|
|
250
|
+
end.compact.flatten
|
|
251
|
+
|
|
252
|
+
deploy_files << :default
|
|
253
|
+
|
|
254
|
+
load_rules(deploy_files)
|
|
255
|
+
end
|
|
256
|
+
end
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
class Workflow::Orchestrator
|
|
2
|
+
|
|
3
|
+
def self.job_workload(jobs)
|
|
4
|
+
workload = []
|
|
5
|
+
path_jobs = {}
|
|
6
|
+
|
|
7
|
+
jobs = [jobs] unless Array === jobs
|
|
8
|
+
|
|
9
|
+
jobs.each do |job|
|
|
10
|
+
path_jobs[job.path] = job
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
heap = []
|
|
14
|
+
heap += jobs.collect(&:path)
|
|
15
|
+
while job_path = heap.pop
|
|
16
|
+
j = path_jobs[job_path]
|
|
17
|
+
next if j.done?
|
|
18
|
+
workload << j
|
|
19
|
+
|
|
20
|
+
deps = job_dependencies(j)
|
|
21
|
+
deps.each do |d|
|
|
22
|
+
path_jobs[d.path] ||= d
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
heap.concat deps.collect(&:path)
|
|
26
|
+
heap.uniq!
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
path_jobs
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def self.job_workload(jobs)
|
|
33
|
+
workload = {}
|
|
34
|
+
jobs = [jobs] unless Array === jobs
|
|
35
|
+
jobs.each do |job|
|
|
36
|
+
workload[job] = []
|
|
37
|
+
next if job.done? && job.updated?
|
|
38
|
+
|
|
39
|
+
job.dependencies.each do |dep|
|
|
40
|
+
next if dep.done? && dep.updated?
|
|
41
|
+
workload.merge!(job_workload(dep))
|
|
42
|
+
workload[job] += workload[dep]
|
|
43
|
+
workload[job] << dep
|
|
44
|
+
workload[job].uniq!
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
job.input_dependencies.each do |dep|
|
|
48
|
+
next if dep.done? && dep.updated?
|
|
49
|
+
workload.merge!(job_workload(dep))
|
|
50
|
+
workload[job] += workload[dep]
|
|
51
|
+
workload[job] << dep
|
|
52
|
+
workload[job].uniq!
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
workload
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def self.job_dependencies(job)
|
|
60
|
+
(job.dependencies + job.input_dependencies).uniq.select{|d| ! d.done? || d.dirty? }
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def self.done_batch?(batch)
|
|
64
|
+
top = batch[:top_level]
|
|
65
|
+
top.done? || top.running? || (top.error? && ! top.recoverable_error?)
|
|
66
|
+
end
|
|
67
|
+
end
|