scout-gear 10.11.6 → 10.11.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.vimproject +16 -2
- data/VERSION +1 -1
- data/bin/scout +10 -10
- data/lib/scout/association/fields.rb +15 -15
- data/lib/scout/association/index.rb +6 -6
- data/lib/scout/association/item.rb +18 -8
- data/lib/scout/association.rb +4 -4
- data/lib/scout/entity/identifiers.rb +5 -5
- data/lib/scout/entity/property.rb +2 -2
- data/lib/scout/entity.rb +1 -1
- data/lib/scout/knowledge_base/description.rb +10 -10
- data/lib/scout/knowledge_base/entity.rb +6 -6
- data/lib/scout/knowledge_base/list.rb +1 -1
- data/lib/scout/knowledge_base/query.rb +4 -4
- data/lib/scout/knowledge_base/registry.rb +6 -6
- data/lib/scout/knowledge_base/traverse.rb +7 -40
- data/lib/scout/persist/engine/fix_width_table.rb +6 -6
- data/lib/scout/persist/engine/packed_index.rb +2 -2
- data/lib/scout/persist/engine/sharder.rb +4 -4
- data/lib/scout/persist/engine/tkrzw.rb +1 -1
- data/lib/scout/persist/engine/tokyocabinet.rb +2 -2
- data/lib/scout/persist/tsv/adapter/fix_width_table.rb +1 -1
- data/lib/scout/persist/tsv/adapter/packed_index.rb +1 -1
- data/lib/scout/persist/tsv/adapter/tkrzw.rb +1 -1
- data/lib/scout/persist/tsv/adapter/tokyocabinet.rb +3 -3
- data/lib/scout/persist/tsv/serialize.rb +3 -3
- data/lib/scout/persist/tsv.rb +1 -1
- data/lib/scout/semaphore.rb +78 -3
- data/lib/scout/tsv/annotation/repo.rb +4 -4
- data/lib/scout/tsv/annotation.rb +2 -2
- data/lib/scout/tsv/attach.rb +7 -7
- data/lib/scout/tsv/change_id/translate.rb +1 -1
- data/lib/scout/tsv/csv.rb +3 -3
- data/lib/scout/tsv/dumper.rb +8 -8
- data/lib/scout/tsv/index.rb +1 -1
- data/lib/scout/tsv/open.rb +3 -3
- data/lib/scout/tsv/stream.rb +2 -2
- data/lib/scout/tsv/traverse.rb +4 -4
- data/lib/scout/tsv/util/filter.rb +9 -9
- data/lib/scout/tsv/util/process.rb +2 -2
- data/lib/scout/tsv/util/reorder.rb +2 -2
- data/lib/scout/tsv/util/select.rb +3 -3
- data/lib/scout/tsv/util/unzip.rb +2 -2
- data/lib/scout/tsv/util.rb +1 -1
- data/lib/scout/tsv.rb +2 -2
- data/lib/scout/work_queue/socket.rb +2 -2
- data/lib/scout/work_queue/worker.rb +4 -4
- data/lib/scout/work_queue.rb +5 -5
- data/lib/scout/workflow/definition.rb +18 -16
- data/lib/scout/workflow/deployment/local.rb +81 -62
- data/lib/scout/workflow/deployment/orchestrator/batches.rb +66 -5
- data/lib/scout/workflow/deployment/orchestrator/chains.rb +47 -30
- data/lib/scout/workflow/deployment/orchestrator/rules.rb +3 -3
- data/lib/scout/workflow/deployment/orchestrator/workload.rb +11 -22
- data/lib/scout/workflow/deployment/scheduler/job.rb +34 -36
- data/lib/scout/workflow/deployment/scheduler/lfs.rb +1 -1
- data/lib/scout/workflow/deployment/scheduler/pbs.rb +4 -4
- data/lib/scout/workflow/deployment/scheduler/slurm.rb +2 -2
- data/lib/scout/workflow/deployment/scheduler.rb +23 -12
- data/lib/scout/workflow/deployment/trace.rb +2 -2
- data/lib/scout/workflow/documentation.rb +4 -4
- data/lib/scout/workflow/export.rb +1 -1
- data/lib/scout/workflow/path.rb +2 -2
- data/lib/scout/workflow/step/children.rb +1 -1
- data/lib/scout/workflow/step/dependencies.rb +36 -3
- data/lib/scout/workflow/step/info.rb +5 -19
- data/lib/scout/workflow/step/inputs.rb +1 -1
- data/lib/scout/workflow/step/progress.rb +2 -2
- data/lib/scout/workflow/step/provenance.rb +4 -4
- data/lib/scout/workflow/step/status.rb +23 -9
- data/lib/scout/workflow/step.rb +19 -17
- data/lib/scout/workflow/task/dependencies.rb +10 -3
- data/lib/scout/workflow/task/info.rb +3 -3
- data/lib/scout/workflow/task/inputs.rb +8 -8
- data/lib/scout/workflow/task.rb +37 -22
- data/lib/scout/workflow/usage.rb +13 -13
- data/lib/scout/workflow/util.rb +1 -1
- data/lib/scout/workflow.rb +6 -6
- data/scout-gear.gemspec +3 -3
- data/scout_commands/alias +1 -1
- data/scout_commands/batch/clean +12 -12
- data/scout_commands/batch/list +26 -25
- data/scout_commands/batch/tail +9 -5
- data/scout_commands/cat +1 -1
- data/scout_commands/doc +2 -2
- data/scout_commands/entity +4 -4
- data/scout_commands/find +1 -1
- data/scout_commands/kb/config +1 -1
- data/scout_commands/kb/entities +1 -1
- data/scout_commands/kb/list +1 -1
- data/scout_commands/kb/query +2 -2
- data/scout_commands/kb/register +1 -1
- data/scout_commands/kb/show +1 -1
- data/scout_commands/kb/traverse +1 -1
- data/scout_commands/log +6 -6
- data/scout_commands/resource/produce +2 -2
- data/scout_commands/resource/sync +1 -1
- data/scout_commands/system/clean +7 -7
- data/scout_commands/system/status +4 -4
- data/scout_commands/template +1 -1
- data/scout_commands/update +1 -1
- data/scout_commands/workflow/info +1 -1
- data/scout_commands/workflow/install +1 -1
- data/scout_commands/workflow/list +2 -2
- data/scout_commands/workflow/process +2 -2
- data/scout_commands/workflow/prov +3 -3
- data/scout_commands/workflow/task +36 -11
- data/scout_commands/workflow/trace +1 -1
- data/scout_commands/workflow/write_info +2 -2
- data/share/templates/command +1 -1
- data/test/scout/association/test_item.rb +5 -0
- data/test/scout/entity/test_property.rb +3 -3
- data/test/scout/knowledge_base/test_description.rb +1 -1
- data/test/scout/knowledge_base/test_traverse.rb +2 -2
- data/test/scout/persist/engine/test_packed_index.rb +6 -6
- data/test/scout/persist/test_tsv.rb +4 -4
- data/test/scout/persist/tsv/adapter/test_packed_index.rb +4 -4
- data/test/scout/persist/tsv/adapter/test_sharder.rb +23 -23
- data/test/scout/persist/tsv/adapter/test_tokyocabinet.rb +1 -1
- data/test/scout/persist/tsv/test_serialize.rb +1 -1
- data/test/scout/test_association.rb +1 -1
- data/test/scout/test_tsv.rb +2 -2
- data/test/scout/test_workflow.rb +2 -2
- data/test/scout/tsv/test_annotation.rb +4 -4
- data/test/scout/tsv/test_index.rb +1 -1
- data/test/scout/tsv/test_open.rb +2 -2
- data/test/scout/tsv/test_parser.rb +2 -2
- data/test/scout/tsv/test_stream.rb +1 -1
- data/test/scout/tsv/test_transformer.rb +1 -1
- data/test/scout/tsv/util/test_filter.rb +1 -1
- data/test/scout/tsv/util/test_melt.rb +1 -1
- data/test/scout/tsv/util/test_reorder.rb +1 -1
- data/test/scout/work_queue/test_socket.rb +3 -3
- data/test/scout/work_queue/test_worker.rb +2 -2
- data/test/scout/workflow/deployment/orchestrator/test_batches.rb +13 -3
- data/test/scout/workflow/deployment/orchestrator/test_chains.rb +15 -13
- data/test/scout/workflow/deployment/orchestrator/test_workload.rb +1 -1
- data/test/scout/workflow/deployment/test_local.rb +2 -2
- data/test/scout/workflow/deployment/test_scheduler.rb +1 -2
- data/test/scout/workflow/step/test_children.rb +1 -1
- data/test/scout/workflow/step/test_dependencies.rb +36 -1
- data/test/scout/workflow/step/test_info.rb +3 -35
- data/test/scout/workflow/step/test_load.rb +1 -1
- data/test/scout/workflow/step/test_provenance.rb +1 -1
- data/test/scout/workflow/step/test_status.rb +33 -1
- data/test/scout/workflow/task/test_dependencies.rb +9 -7
- data/test/scout/workflow/task/test_inputs.rb +1 -1
- data/test/scout/workflow/test_definition.rb +1 -1
- data/test/scout/workflow/test_documentation.rb +1 -1
- data/test/scout/workflow/test_entity.rb +2 -2
- data/test/scout/workflow/test_step.rb +13 -13
- data/test/scout/workflow/test_usage.rb +1 -1
- data/test/test_helper.rb +1 -1
- metadata +2 -2
|
@@ -58,47 +58,64 @@ class Workflow::Orchestrator
|
|
|
58
58
|
chains
|
|
59
59
|
end
|
|
60
60
|
|
|
61
|
+
def self.add_chain(job_chains, match, info)
|
|
62
|
+
if job_chains[match]
|
|
63
|
+
current = job_chains[match]
|
|
64
|
+
new_info = {}
|
|
65
|
+
new_info[:jobs] = (current[:jobs] + info[:jobs]).uniq
|
|
66
|
+
if current[:top_level].rec_dependencies.include?(info[:top_level]) ||
|
|
67
|
+
current[:top_level].input_dependencies.include?(info[:top_level])
|
|
68
|
+
new_info[:top_level] = current[:top_level]
|
|
69
|
+
else
|
|
70
|
+
new_info[:top_level] = info[:top_level]
|
|
71
|
+
end
|
|
72
|
+
job_chains[match] = new_info
|
|
73
|
+
else
|
|
74
|
+
job_chains[match] = info
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
61
78
|
def self.job_chains(rules, job, computed = {})
|
|
62
|
-
|
|
79
|
+
chains = parse_chains(rules)
|
|
80
|
+
key = Log.fingerprint([job.path, job.object_id, chains])
|
|
63
81
|
return computed[key] if computed.has_key?(key)
|
|
64
82
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
83
|
+
job_chains = check_chains(chains, job)
|
|
84
|
+
job_batches = {}
|
|
85
|
+
new_batches = {}
|
|
86
|
+
job_dependencies(job).each do |dep|
|
|
87
|
+
dep_chains = check_chains(chains, dep)
|
|
88
|
+
common_chains = job_chains & dep_chains
|
|
68
89
|
|
|
69
|
-
|
|
70
|
-
new_job_chains = {}
|
|
71
|
-
dependencies.each do |dep|
|
|
72
|
-
dep_matches = check_chains(chains, dep)
|
|
73
|
-
common = matches & dep_matches
|
|
90
|
+
dep_batches = job_chains(rules, dep, computed)
|
|
74
91
|
|
|
75
|
-
dep_chains = job_chains(rules, dep, computed)
|
|
76
92
|
found = []
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
93
|
+
common_chains.each do |chain|
|
|
94
|
+
info = new_batches[chain]
|
|
95
|
+
info = {top_level: job, jobs: [job]} if info.nil?
|
|
96
|
+
if dep_batches[chain]
|
|
97
|
+
found << chain
|
|
98
|
+
dep_batches[chain].each do |dep_info|
|
|
99
|
+
info[:jobs] += dep_info[:jobs] - info[:jobs]
|
|
100
|
+
end
|
|
84
101
|
else
|
|
85
|
-
|
|
102
|
+
info[:jobs] << dep
|
|
86
103
|
end
|
|
104
|
+
new_batches[chain] = info
|
|
87
105
|
end
|
|
88
106
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
job_chains << [match, info]
|
|
94
|
-
end
|
|
95
|
-
end
|
|
96
|
-
|
|
97
|
-
new_job_chains.each do |match, info|
|
|
98
|
-
info[:jobs].prepend job
|
|
99
|
-
job_chains << [match, info]
|
|
107
|
+
dep_batches.each do |chain,list|
|
|
108
|
+
next if found.include? chain
|
|
109
|
+
job_batches[chain] ||= []
|
|
110
|
+
job_batches[chain].concat list
|
|
100
111
|
end
|
|
112
|
+
end
|
|
101
113
|
|
|
102
|
-
|
|
114
|
+
new_batches.each do |match, info|
|
|
115
|
+
job_batches[match] ||= []
|
|
116
|
+
job_batches[match] << info
|
|
103
117
|
end
|
|
118
|
+
|
|
119
|
+
computed[key] = job_batches
|
|
120
|
+
end
|
|
104
121
|
end
|
|
@@ -47,7 +47,7 @@ class Workflow::Orchestrator
|
|
|
47
47
|
case k.to_s
|
|
48
48
|
when "config_keys"
|
|
49
49
|
current[k] = add_config_keys current["config_keys"], value
|
|
50
|
-
when "cpus
|
|
50
|
+
when "task_cpus", 'cpus'
|
|
51
51
|
# choose max
|
|
52
52
|
vals = [current[k], value].compact.map{|v| v.to_i }
|
|
53
53
|
current[k] = vals.max unless vals.empty?
|
|
@@ -183,7 +183,7 @@ class Workflow::Orchestrator
|
|
|
183
183
|
r = rules_block[:resources] || {}
|
|
184
184
|
r = IndiferentHash.setup r
|
|
185
185
|
|
|
186
|
-
r = IndiferentHash.add_defaults r,
|
|
186
|
+
r = IndiferentHash.add_defaults r,
|
|
187
187
|
cpus: rules_block[:cpus] || rules_block[:task_cpus] || 1,
|
|
188
188
|
time: rules_block[:time]
|
|
189
189
|
|
|
@@ -237,7 +237,7 @@ class Workflow::Orchestrator
|
|
|
237
237
|
merge_rule_file(acc, file_rules)
|
|
238
238
|
end
|
|
239
239
|
end
|
|
240
|
-
|
|
240
|
+
|
|
241
241
|
def self.load_rules_for_job(jobs)
|
|
242
242
|
jobs = [jobs] unless Array === jobs
|
|
243
243
|
|
|
@@ -1,32 +1,21 @@
|
|
|
1
1
|
class Workflow::Orchestrator
|
|
2
2
|
|
|
3
|
-
def self.
|
|
4
|
-
|
|
5
|
-
path_jobs = {}
|
|
3
|
+
def self.prepare_for_execution(job)
|
|
4
|
+
rec_dependencies = job.rec_dependencies(true)
|
|
6
5
|
|
|
7
|
-
|
|
6
|
+
return if rec_dependencies.empty?
|
|
8
7
|
|
|
9
|
-
|
|
10
|
-
path_jobs[job.path] = job
|
|
11
|
-
end
|
|
8
|
+
all_deps = rec_dependencies + [job]
|
|
12
9
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
deps = job_dependencies(j)
|
|
21
|
-
deps.each do |d|
|
|
22
|
-
path_jobs[d.path] ||= d
|
|
10
|
+
all_deps.each do |dep|
|
|
11
|
+
begin
|
|
12
|
+
dep.clean if (dep.error? && dep.recoverable_error?) ||
|
|
13
|
+
dep.aborted? || (dep.done? && ! dep.updated?)
|
|
14
|
+
rescue RbbtException
|
|
15
|
+
Log.exception $!
|
|
16
|
+
next
|
|
23
17
|
end
|
|
24
|
-
|
|
25
|
-
heap.concat deps.collect(&:path)
|
|
26
|
-
heap.uniq!
|
|
27
18
|
end
|
|
28
|
-
|
|
29
|
-
path_jobs
|
|
30
19
|
end
|
|
31
20
|
|
|
32
21
|
def self.job_workload(jobs)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
module SchedulerJob
|
|
2
|
-
@batch_base_dir = File.expand_path(File.join('~/scout-batch'))
|
|
2
|
+
@batch_base_dir = File.expand_path(File.join('~/scout-batch'))
|
|
3
3
|
self.singleton_class.attr_accessor :batch_base_dir
|
|
4
4
|
|
|
5
5
|
module_function
|
|
@@ -27,7 +27,7 @@ module SchedulerJob
|
|
|
27
27
|
|
|
28
28
|
singularity_img, singularity_opt_dir, singularity_ruby_inline, singularity_mounts = options.values_at :singularity_img, :singularity_opt_dir, :singularity_ruby_inline, :singularity_mounts
|
|
29
29
|
|
|
30
|
-
singularity_cmd = %(singularity exec -e -B "#{File.expand_path singularity_opt_dir}":/singularity_opt/ -B "#{File.expand_path singularity_ruby_inline}":"/.singularity_ruby_inline":rw )
|
|
30
|
+
singularity_cmd = %(singularity exec -e -B "#{File.expand_path singularity_opt_dir}":/singularity_opt/ -B "#{File.expand_path singularity_ruby_inline}":"/.singularity_ruby_inline":rw )
|
|
31
31
|
|
|
32
32
|
if singularity_mounts
|
|
33
33
|
singularity_mounts.split(",").each do |mount|
|
|
@@ -37,7 +37,7 @@ module SchedulerJob
|
|
|
37
37
|
|
|
38
38
|
if contain && options[:hardened]
|
|
39
39
|
singularity_cmd << %( -C -H "#{contain}" \
|
|
40
|
-
-B "/.singularity_ruby_inline":"#{contain}/.singularity_ruby_inline":rw
|
|
40
|
+
-B "/.singularity_ruby_inline":"#{contain}/.singularity_ruby_inline":rw
|
|
41
41
|
-B "#{options[:batch_dir]}" \
|
|
42
42
|
-B /scratch/tmp \
|
|
43
43
|
#{ group != user_group ? "-B /gpfs/projects/#{user_group}" : "" } \
|
|
@@ -81,17 +81,15 @@ module SchedulerJob
|
|
|
81
81
|
|
|
82
82
|
task = job.task_name
|
|
83
83
|
|
|
84
|
-
if job.
|
|
85
|
-
override_deps = job.
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
end.uniq * ","
|
|
94
|
-
options[:override_deps] = override_deps unless override_deps.empty?
|
|
84
|
+
if job.overriden?
|
|
85
|
+
override_deps = job.recursive_overrider_deps.collect do |dep|
|
|
86
|
+
o_workflow = dep.overriden_workflow || dep.workflow
|
|
87
|
+
o_workflow = o_workflow.name if o_workflow.respond_to?(:name)
|
|
88
|
+
o_task_name = dep.overriden_task || dep.task.name
|
|
89
|
+
name = [o_workflow, o_task_name] * "#"
|
|
90
|
+
[name, dep.path] * "="
|
|
91
|
+
end.uniq * ","
|
|
92
|
+
options[:override_deps] = override_deps unless override_deps.empty?
|
|
95
93
|
end
|
|
96
94
|
|
|
97
95
|
# Save inputs into inputs_dir (only if provided)
|
|
@@ -192,7 +190,7 @@ workflow task #{workflow} #{task} #{cmds}
|
|
|
192
190
|
keys_from_config.each do |key|
|
|
193
191
|
next unless batch_options.include? key
|
|
194
192
|
default_value = Scout::Config.get(key, "batch_#{key}", "batch")
|
|
195
|
-
next if default_value.nil?
|
|
193
|
+
next if default_value.nil?
|
|
196
194
|
IndiferentHash.add_defaults batch_options, default_value
|
|
197
195
|
end
|
|
198
196
|
|
|
@@ -211,7 +209,7 @@ workflow task #{workflow} #{task} #{cmds}
|
|
|
211
209
|
batch_options[:contain] = File.join(contain_base, random_file)
|
|
212
210
|
end
|
|
213
211
|
|
|
214
|
-
batch_options[:sync] ||= "~/.scout/var/jobs"
|
|
212
|
+
batch_options[:sync] ||= "~/.scout/var/jobs"
|
|
215
213
|
batch_options[:wipe_container] ||= 'post'
|
|
216
214
|
end
|
|
217
215
|
|
|
@@ -219,23 +217,23 @@ workflow task #{workflow} #{task} #{cmds}
|
|
|
219
217
|
options[:workdir_all] = batch_options[:contain]
|
|
220
218
|
end
|
|
221
219
|
|
|
222
|
-
IndiferentHash.add_defaults batch_options,
|
|
220
|
+
IndiferentHash.add_defaults batch_options,
|
|
223
221
|
:batch_name => batch_name,
|
|
224
|
-
:inputs_dir => inputs_dir,
|
|
225
|
-
:nodes => 1,
|
|
222
|
+
:inputs_dir => inputs_dir,
|
|
223
|
+
:nodes => 1,
|
|
226
224
|
:step_path => job.path,
|
|
227
225
|
:task_cpus => 1,
|
|
228
|
-
:time => '2min',
|
|
226
|
+
:time => '2min',
|
|
229
227
|
:env => {'JDK_JAVA_OPTIONS' => "-Xms1g -Xmx${MAX_MEMORY}m"},
|
|
230
228
|
:singularity_img => ENV["SINGULARITY_IMG"] || "~/scout.singularity.img",
|
|
231
229
|
:singularity_ruby_inline => ENV["SINGULARITY_RUBY_INLINE"] || "~/.singularity_ruby_inline",
|
|
232
230
|
:singularity_opt_dir => ENV["SINGULARITY_OPT_DIR"] || "~/singularity_opt",
|
|
233
|
-
:workdir => Dir.pwd
|
|
231
|
+
:workdir => Dir.pwd
|
|
234
232
|
|
|
235
233
|
exec_cmd = exec_cmd(job, batch_options)
|
|
236
234
|
scout_cmd = scout_job_exec_cmd(job, options)
|
|
237
235
|
|
|
238
|
-
IndiferentHash.add_defaults batch_options,
|
|
236
|
+
IndiferentHash.add_defaults batch_options,
|
|
239
237
|
:exec_cmd => exec_cmd,
|
|
240
238
|
:scout_cmd => scout_cmd
|
|
241
239
|
|
|
@@ -344,7 +342,7 @@ batch_erase_contain_dir()
|
|
|
344
342
|
function batch_sync_contain_dir(){
|
|
345
343
|
mkdir -p "$(dirname '#{sync}')"
|
|
346
344
|
rsync -avztAXHP --copy-unsafe-links "#{source}/" "#{sync}/" 2>1 >> '#{options[:fsync]}'
|
|
347
|
-
sync_es="$?"
|
|
345
|
+
sync_es="$?"
|
|
348
346
|
echo $sync_es > '#{options[:fsexit]}'
|
|
349
347
|
find '#{sync}' -type l -ls | awk '$13 ~ /^#{sync.gsub('/','\/')}/ { sub("#{source}", "#{sync}", $13); print $11, $13 }' | while read A B; do rm $A; ln -s $B $A; done
|
|
350
348
|
}
|
|
@@ -353,7 +351,7 @@ function batch_sync_contain_dir(){
|
|
|
353
351
|
|
|
354
352
|
if options[:env]
|
|
355
353
|
prepare_environment +=<<-EOF
|
|
356
|
-
# Set ENV variables
|
|
354
|
+
# Set ENV variables
|
|
357
355
|
#{options[:env].collect{|n,v| "export #{n}=\"#{v}\"" } * "\n"}
|
|
358
356
|
EOF
|
|
359
357
|
end
|
|
@@ -384,7 +382,7 @@ for tmpd in persist_locks produce_locks R_sockets sensiblewrite sensiblewrit
|
|
|
384
382
|
mkdir -p "#{contain}/.scout/tmp/$tmpd"
|
|
385
383
|
done
|
|
386
384
|
|
|
387
|
-
# Copy environment
|
|
385
|
+
# Copy environment
|
|
388
386
|
cp ~/.scout/etc/environment #{contain}/.scout/etc/
|
|
389
387
|
|
|
390
388
|
# Set search_paths
|
|
@@ -406,7 +404,7 @@ echo "user_scratch: #{scratch_group_dir}/#{user}/{PKGDIR}/{TOPLEVEL}/{SUBPATH}"
|
|
|
406
404
|
exec_cmd, job_cmd, task_cpus = options.values_at :exec_cmd, :scout_cmd, :task_cpus
|
|
407
405
|
|
|
408
406
|
script=<<-EOF
|
|
409
|
-
step_path=$(
|
|
407
|
+
step_path=$(
|
|
410
408
|
#{exec_cmd} #{job_cmd} --printpath
|
|
411
409
|
)
|
|
412
410
|
exit_status=$?
|
|
@@ -426,7 +424,7 @@ fi
|
|
|
426
424
|
|
|
427
425
|
if options[:sync]
|
|
428
426
|
sync_environment +=<<-EOF
|
|
429
|
-
if [ $exit_status == '0' ]; then
|
|
427
|
+
if [ $exit_status == '0' ]; then
|
|
430
428
|
batch_sync_contain_dir
|
|
431
429
|
else
|
|
432
430
|
sync_es=$exit_status
|
|
@@ -441,8 +439,8 @@ fi
|
|
|
441
439
|
cleanup_environment = ""
|
|
442
440
|
|
|
443
441
|
cleanup_environment +=<<-EOF if options[:purge_deps]
|
|
444
|
-
if [ $exit_status == '0' ]; then
|
|
445
|
-
#{options[:exec_cmd]} workflow forget_deps --purge --recursive_purge "$step_path" 2>1 >> '#{options[:fsync]}'
|
|
442
|
+
if [ $exit_status == '0' ]; then
|
|
443
|
+
#{options[:exec_cmd]} workflow forget_deps --purge --recursive_purge "$step_path" 2>1 >> '#{options[:fsync]}'
|
|
446
444
|
fi
|
|
447
445
|
EOF
|
|
448
446
|
|
|
@@ -453,7 +451,7 @@ batch_erase_contain_dir
|
|
|
453
451
|
EOF
|
|
454
452
|
elsif options[:wipe_container] == 'post' || options[:wipe_container] == 'both'
|
|
455
453
|
cleanup_environment +=<<-EOF
|
|
456
|
-
if [ $sync_es == '0' -a $empty_contain_dir == 'true' ]; then
|
|
454
|
+
if [ $sync_es == '0' -a $empty_contain_dir == 'true' ]; then
|
|
457
455
|
batch_erase_contain_dir
|
|
458
456
|
fi
|
|
459
457
|
EOF
|
|
@@ -512,7 +510,7 @@ exit $exit_status
|
|
|
512
510
|
env > #{batch_options[:fenv]}
|
|
513
511
|
|
|
514
512
|
# #{Log.color :green, "2. Execute"}
|
|
515
|
-
#{execute}
|
|
513
|
+
#{execute}
|
|
516
514
|
|
|
517
515
|
# #{Log.color :green, "3. Sync and cleanup environment"}
|
|
518
516
|
#{sync_environment}
|
|
@@ -555,13 +553,13 @@ env > #{batch_options[:fenv]}
|
|
|
555
553
|
def run_job(job, options = {})
|
|
556
554
|
system = self.to_s.split("::").last
|
|
557
555
|
|
|
558
|
-
batch_base_dir, clean_batch_job, remove_batch_dir, procpath, tail, batch_dependencies, dry_run, orchestration_rules_file = IndiferentHash.process_options options,
|
|
556
|
+
batch_base_dir, clean_batch_job, remove_batch_dir, procpath, tail, batch_dependencies, dry_run, orchestration_rules_file = IndiferentHash.process_options options,
|
|
559
557
|
:batch_base_dir, :clean_batch_job, :remove_batch_dir, :batch_procpath, :tail, :batch_dependencies, :dry_run, :orchestration_rules,
|
|
560
558
|
:batch_base_dir => SchedulerJob.batch_base_dir
|
|
561
559
|
|
|
562
560
|
if (batch_job = job.info[:batch_job]) && job_queued(batch_job)
|
|
563
561
|
Log.info "Job #{job.short_path} already queued in #{batch_job}"
|
|
564
|
-
return batch_job, batch_dir_for_id(batch_base_dir, batch_job)
|
|
562
|
+
return batch_job, batch_dir_for_id(batch_base_dir, batch_job)
|
|
565
563
|
end
|
|
566
564
|
|
|
567
565
|
if job.running?
|
|
@@ -570,7 +568,7 @@ env > #{batch_options[:fenv]}
|
|
|
570
568
|
if job.info[:batch_job]
|
|
571
569
|
return job.info[:batch_job], batch_dir_for_id(batch_base_dir, batch_job)
|
|
572
570
|
else
|
|
573
|
-
return
|
|
571
|
+
return
|
|
574
572
|
end
|
|
575
573
|
end
|
|
576
574
|
|
|
@@ -582,8 +580,8 @@ env > #{batch_options[:fenv]}
|
|
|
582
580
|
workflows_to_load = job.rec_dependencies.select{|d| Step === d}.collect{|d| d.workflow }.compact.collect(&:to_s) - [workflow.to_s]
|
|
583
581
|
|
|
584
582
|
TmpFile.with_file(nil, remove_batch_dir, :tmpdir => batch_base_dir, :prefix => "#{system}_scout_job-#{workflow.to_s}-#{task_name}-") do |batch_dir|
|
|
585
|
-
IndiferentHash.add_defaults options,
|
|
586
|
-
:batch_dir => batch_dir,
|
|
583
|
+
IndiferentHash.add_defaults options,
|
|
584
|
+
:batch_dir => batch_dir,
|
|
587
585
|
:inputs_dir => File.join(batch_dir, "inputs_dir"),
|
|
588
586
|
:workflows => workflows_to_load.any? ? workflows_to_load.uniq * "," : nil
|
|
589
587
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
require_relative 'job'
|
|
2
2
|
require 'scout'
|
|
3
3
|
|
|
4
|
-
module PBS
|
|
4
|
+
module PBS
|
|
5
5
|
extend SchedulerJob
|
|
6
6
|
|
|
7
7
|
def self.system
|
|
@@ -13,7 +13,7 @@ module PBS
|
|
|
13
13
|
let TOTAL_PROCESORS="$(cat /proc/cpuinfo|grep ^processor |wc -l)"
|
|
14
14
|
let MAX_MEMORY_DEFAULT="$(grep MemTotal /proc/meminfo|grep -o "[[:digit:]]*") / ( (1024 * $TOTAL_PROCESORS) / $PBS_CPUS_PER_TASK )"
|
|
15
15
|
MAX_MEMORY="$MAX_MEMORY_DEFAULT"
|
|
16
|
-
[ ! -z $PBS_MEM_PER_CPU ] && let MAX_MEMORY="$PBS_MEM_PER_CPU * $PBS_CPUS_PER_TASK"
|
|
16
|
+
[ ! -z $PBS_MEM_PER_CPU ] && let MAX_MEMORY="$PBS_MEM_PER_CPU * $PBS_CPUS_PER_TASK"
|
|
17
17
|
[ ! -z $PBS_MEM_PER_NODE ] && MAX_MEMORY="$PBS_MEM_PER_NODE"
|
|
18
18
|
export MAX_MEMORY_DEFAULT
|
|
19
19
|
export MAX_MEMORY
|
|
@@ -36,7 +36,7 @@ cd ${PBS_O_WORKDIR}
|
|
|
36
36
|
time = IndiferentHash.process_options options, :time
|
|
37
37
|
nodes = IndiferentHash.process_options options, :nodes
|
|
38
38
|
|
|
39
|
-
# PBS
|
|
39
|
+
# PBS
|
|
40
40
|
place = IndiferentHash.process_options options, :place, :place => 'scatter'
|
|
41
41
|
system = IndiferentHash.process_options options, :partition
|
|
42
42
|
filesystems = IndiferentHash.process_options options, :filesystems
|
|
@@ -45,7 +45,7 @@ cd ${PBS_O_WORKDIR}
|
|
|
45
45
|
|
|
46
46
|
filesystems = filesystems * "," if Array === filesystems
|
|
47
47
|
|
|
48
|
-
# NOT USED
|
|
48
|
+
# NOT USED
|
|
49
49
|
partition = IndiferentHash.process_options options, :partition
|
|
50
50
|
task_cpus = IndiferentHash.process_options options, :task_cpus
|
|
51
51
|
exclusive = IndiferentHash.process_options options, :exclusive
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
require_relative 'job'
|
|
2
2
|
require 'scout'
|
|
3
3
|
|
|
4
|
-
module SLURM
|
|
4
|
+
module SLURM
|
|
5
5
|
|
|
6
6
|
extend SchedulerJob
|
|
7
7
|
|
|
@@ -15,7 +15,7 @@ module SLURM
|
|
|
15
15
|
let TOTAL_PROCESORS="$(cat /proc/cpuinfo|grep ^processor |wc -l)"
|
|
16
16
|
let MAX_MEMORY_DEFAULT="$(grep MemTotal /proc/meminfo|grep -o "[[:digit:]]*") / ( (1024 * $TOTAL_PROCESORS) / $SLURM_CPUS_PER_TASK )"
|
|
17
17
|
MAX_MEMORY="$MAX_MEMORY_DEFAULT"
|
|
18
|
-
[ ! -z $SLURM_MEM_PER_CPU ] && let MAX_MEMORY="$SLURM_MEM_PER_CPU * $SLURM_CPUS_PER_TASK"
|
|
18
|
+
[ ! -z $SLURM_MEM_PER_CPU ] && let MAX_MEMORY="$SLURM_MEM_PER_CPU * $SLURM_CPUS_PER_TASK"
|
|
19
19
|
[ ! -z $SLURM_MEM_PER_NODE ] && MAX_MEMORY="$SLURM_MEM_PER_NODE"
|
|
20
20
|
export MAX_MEMORY_DEFAULT
|
|
21
21
|
export MAX_MEMORY
|
|
@@ -12,31 +12,42 @@ module Workflow::Scheduler
|
|
|
12
12
|
Workflow::Scheduler.process_batches(batches, options)
|
|
13
13
|
end
|
|
14
14
|
|
|
15
|
+
def self.produce_single_batch(job, rules = {}, options = {})
|
|
16
|
+
batches = Workflow::Orchestrator.job_batches(rules, job)
|
|
17
|
+
rules = batches.inject({}){|acc,b| acc = Workflow::Orchestrator.accumulate_rules acc, b[:rules] }
|
|
18
|
+
|
|
19
|
+
max_time = batches.collect{|b| b[:rules][:time] }.max
|
|
20
|
+
rules[:time] = options.include?(:time) ? options[:time] : max_time
|
|
21
|
+
rules[:deploy] = :local
|
|
22
|
+
if rules[:exclusive]
|
|
23
|
+
rules[:task_cpus] = nil
|
|
24
|
+
rules[:cpus] = nil
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
batch = {top_level: job, jobs: [job], rules: rules, deps: [] }
|
|
28
|
+
Workflow::Scheduler.process_batches([batch], options)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
|
|
15
32
|
def self.process_batches(batches, process_options = {})
|
|
16
33
|
failed_jobs = []
|
|
17
34
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
sorted = []
|
|
21
|
-
while pending.any?
|
|
22
|
-
leaf_nodes = batches.select{|batch| (batch[:deps] - sorted).empty? }
|
|
23
|
-
sorted.concat(leaf_nodes - sorted)
|
|
24
|
-
pending -= leaf_nodes
|
|
25
|
-
end
|
|
35
|
+
sorted = Workflow::Orchestrator.sort_batches batches
|
|
36
|
+
cleaned = Workflow::Orchestrator.clean_batches sorted
|
|
26
37
|
|
|
27
38
|
batch_system = Scout::Config.get :system, :batch, :scheduler, 'env:BATCH_SYSTEM', default: 'SLURM'
|
|
28
39
|
|
|
29
40
|
batch_ids = {}
|
|
30
|
-
|
|
41
|
+
cleaned.collect do |batch|
|
|
31
42
|
job_options = batch[:rules]
|
|
32
43
|
job_options = IndiferentHash.add_defaults job_options, process_options.dup
|
|
33
44
|
|
|
34
45
|
if batch[:deps].nil?
|
|
35
|
-
batch_dependencies = []
|
|
36
|
-
else
|
|
46
|
+
batch_dependencies = []
|
|
47
|
+
else
|
|
37
48
|
top_jobs = batch[:jobs]
|
|
38
49
|
|
|
39
|
-
batch_dependencies = batch[:deps].collect{|dep|
|
|
50
|
+
batch_dependencies = batch[:deps].collect{|dep|
|
|
40
51
|
dep_target = dep[:top_level]
|
|
41
52
|
id = batch_ids[dep_target].to_s
|
|
42
53
|
|
|
@@ -132,10 +132,10 @@ module Workflow
|
|
|
132
132
|
info[:time] << time
|
|
133
133
|
|
|
134
134
|
report_keys.each do |key|
|
|
135
|
-
info[key] = dep_info[key]
|
|
135
|
+
info[key] = dep_info[key]
|
|
136
136
|
end
|
|
137
137
|
|
|
138
|
-
dep.info[:config_keys].each do |kinfo|
|
|
138
|
+
dep.info[:config_keys].each do |kinfo|
|
|
139
139
|
key, value, tokens = kinfo
|
|
140
140
|
|
|
141
141
|
info[key.to_s] = value if report_keys.include? key.to_s
|
|
@@ -31,7 +31,7 @@ module Workflow
|
|
|
31
31
|
title = doc_parse_first_line doc
|
|
32
32
|
description, task_info = doc_parse_up_to doc, /^# Tasks/i
|
|
33
33
|
task_description, tasks = doc_parse_up_to task_info, /^##/, true
|
|
34
|
-
tasks = doc_parse_chunks tasks, /^## (.*)/
|
|
34
|
+
tasks = doc_parse_chunks tasks, /^## (.*)/
|
|
35
35
|
{:title => title.strip, :description => description.strip, :task_description => task_description.strip, :tasks => tasks}
|
|
36
36
|
end
|
|
37
37
|
|
|
@@ -52,11 +52,11 @@ module Workflow
|
|
|
52
52
|
documentation = Workflow.parse_workflow_doc documentation_markdown
|
|
53
53
|
|
|
54
54
|
if @description && (documentation[:description].nil? || documentation[:description].empty?)
|
|
55
|
-
documentation[:description] = @description
|
|
55
|
+
documentation[:description] = @description
|
|
56
56
|
end
|
|
57
57
|
|
|
58
58
|
if @title && (documentation[:title].nil? || documentation[:title].empty?)
|
|
59
|
-
documentation[:title] = @title
|
|
59
|
+
documentation[:title] = @title
|
|
60
60
|
end
|
|
61
61
|
documentation[:tasks].each do |task, description|
|
|
62
62
|
if task.include? "#"
|
|
@@ -74,7 +74,7 @@ module Workflow
|
|
|
74
74
|
if workflow.tasks.include? task
|
|
75
75
|
workflow.tasks[task].description = description
|
|
76
76
|
else
|
|
77
|
-
Log.low "Documentation for #{ task }, but not a #{ workflow.to_s } task"
|
|
77
|
+
Log.low "Documentation for #{ task }, but not a #{ workflow.to_s } task"
|
|
78
78
|
end
|
|
79
79
|
end
|
|
80
80
|
documentation
|
data/lib/scout/workflow/path.rb
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
class Step
|
|
2
|
+
|
|
2
3
|
def rec_dependencies(connected = false, seen = Set.new)
|
|
3
4
|
@rec_dependencies = {}
|
|
4
5
|
@rec_dependencies[connected] ||= begin
|
|
@@ -34,6 +35,31 @@ class Step
|
|
|
34
35
|
end.compact.uniq
|
|
35
36
|
end
|
|
36
37
|
|
|
38
|
+
def overrider?
|
|
39
|
+
! (overriden_task.nil? && overriden_workflow.nil?)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def overriden?
|
|
43
|
+
@overriden
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def overriden_deps
|
|
47
|
+
dependencies.select{|d| d.overriden? }
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def recursive_overriden_deps
|
|
51
|
+
overriden_deps = self.overriden_deps
|
|
52
|
+
overriden_deps + overriden_deps.collect{|dep| dep.recursive_overriden_deps }
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def overrider_deps
|
|
56
|
+
dependencies.select{|d| d.overrider? }
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def recursive_overrider_deps
|
|
60
|
+
self.rec_dependencies.select{|dep| dep.overrider? }
|
|
61
|
+
end
|
|
62
|
+
|
|
37
63
|
def prepare_dependencies
|
|
38
64
|
inverse_dep = {}
|
|
39
65
|
|
|
@@ -44,7 +70,14 @@ class Step
|
|
|
44
70
|
end
|
|
45
71
|
|
|
46
72
|
next if dep.done?
|
|
47
|
-
|
|
73
|
+
|
|
74
|
+
if dep.error? && ! dep.recoverable_error?
|
|
75
|
+
if dep.canfail?
|
|
76
|
+
next
|
|
77
|
+
else
|
|
78
|
+
raise dep.exception
|
|
79
|
+
end
|
|
80
|
+
end
|
|
48
81
|
|
|
49
82
|
if dep.dependencies
|
|
50
83
|
dep.dependencies.each do |d|
|
|
@@ -74,7 +107,7 @@ class Step
|
|
|
74
107
|
end
|
|
75
108
|
|
|
76
109
|
def run_dependencies
|
|
77
|
-
all_dependencies.each do |dep|
|
|
110
|
+
all_dependencies.each do |dep|
|
|
78
111
|
next if dep.running? || dep.done?
|
|
79
112
|
next if dep.error? && ! dep.recoverable_error?
|
|
80
113
|
|
|
@@ -100,7 +133,7 @@ class Step
|
|
|
100
133
|
end
|
|
101
134
|
|
|
102
135
|
def abort_dependencies
|
|
103
|
-
all_dependencies.each{|dep| dep.abort if dep.running? }
|
|
136
|
+
all_dependencies.each{|dep| dep.abort if dep.running? }
|
|
104
137
|
end
|
|
105
138
|
|
|
106
139
|
def self.wait_for_jobs(jobs, canfail=false)
|