hpcflow-new2 0.2.0a190__py3-none-any.whl → 0.2.0a199__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hpcflow/__pyinstaller/hook-hpcflow.py +1 -0
- hpcflow/_version.py +1 -1
- hpcflow/data/scripts/bad_script.py +2 -0
- hpcflow/data/scripts/do_nothing.py +2 -0
- hpcflow/data/scripts/env_specifier_test/input_file_generator_pass_env_spec.py +4 -0
- hpcflow/data/scripts/env_specifier_test/main_script_test_pass_env_spec.py +8 -0
- hpcflow/data/scripts/env_specifier_test/output_file_parser_pass_env_spec.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v1/input_file_generator_basic.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v1/main_script_test_direct_in_direct_out.py +7 -0
- hpcflow/data/scripts/env_specifier_test/v1/output_file_parser_basic.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v2/main_script_test_direct_in_direct_out.py +7 -0
- hpcflow/data/scripts/input_file_generator_basic.py +3 -0
- hpcflow/data/scripts/input_file_generator_basic_FAIL.py +3 -0
- hpcflow/data/scripts/input_file_generator_test_stdout_stderr.py +8 -0
- hpcflow/data/scripts/main_script_test_direct_in.py +3 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed_group.py +7 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_group_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_group_one_fail_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_hdf5_in_obj_2.py +12 -0
- hpcflow/data/scripts/main_script_test_json_out_FAIL.py +3 -0
- hpcflow/data/scripts/main_script_test_shell_env_vars.py +12 -0
- hpcflow/data/scripts/main_script_test_std_out_std_err.py +6 -0
- hpcflow/data/scripts/output_file_parser_basic.py +3 -0
- hpcflow/data/scripts/output_file_parser_basic_FAIL.py +7 -0
- hpcflow/data/scripts/output_file_parser_test_stdout_stderr.py +8 -0
- hpcflow/data/scripts/script_exit_test.py +5 -0
- hpcflow/data/template_components/environments.yaml +1 -1
- hpcflow/sdk/__init__.py +5 -0
- hpcflow/sdk/app.py +150 -89
- hpcflow/sdk/cli.py +263 -84
- hpcflow/sdk/cli_common.py +99 -5
- hpcflow/sdk/config/callbacks.py +38 -1
- hpcflow/sdk/config/config.py +102 -13
- hpcflow/sdk/config/errors.py +19 -5
- hpcflow/sdk/config/types.py +3 -0
- hpcflow/sdk/core/__init__.py +25 -1
- hpcflow/sdk/core/actions.py +914 -262
- hpcflow/sdk/core/cache.py +76 -34
- hpcflow/sdk/core/command_files.py +14 -128
- hpcflow/sdk/core/commands.py +35 -6
- hpcflow/sdk/core/element.py +122 -50
- hpcflow/sdk/core/errors.py +58 -2
- hpcflow/sdk/core/execute.py +207 -0
- hpcflow/sdk/core/loop.py +408 -50
- hpcflow/sdk/core/loop_cache.py +4 -4
- hpcflow/sdk/core/parameters.py +382 -37
- hpcflow/sdk/core/run_dir_files.py +13 -40
- hpcflow/sdk/core/skip_reason.py +7 -0
- hpcflow/sdk/core/task.py +119 -30
- hpcflow/sdk/core/task_schema.py +68 -0
- hpcflow/sdk/core/test_utils.py +66 -27
- hpcflow/sdk/core/types.py +54 -1
- hpcflow/sdk/core/utils.py +78 -7
- hpcflow/sdk/core/workflow.py +1538 -336
- hpcflow/sdk/data/workflow_spec_schema.yaml +2 -0
- hpcflow/sdk/demo/cli.py +7 -0
- hpcflow/sdk/helper/cli.py +1 -0
- hpcflow/sdk/log.py +42 -15
- hpcflow/sdk/persistence/base.py +405 -53
- hpcflow/sdk/persistence/json.py +177 -52
- hpcflow/sdk/persistence/pending.py +237 -69
- hpcflow/sdk/persistence/store_resource.py +3 -2
- hpcflow/sdk/persistence/types.py +15 -4
- hpcflow/sdk/persistence/zarr.py +928 -81
- hpcflow/sdk/submission/jobscript.py +1408 -489
- hpcflow/sdk/submission/schedulers/__init__.py +40 -5
- hpcflow/sdk/submission/schedulers/direct.py +33 -19
- hpcflow/sdk/submission/schedulers/sge.py +51 -16
- hpcflow/sdk/submission/schedulers/slurm.py +44 -16
- hpcflow/sdk/submission/schedulers/utils.py +7 -2
- hpcflow/sdk/submission/shells/base.py +68 -20
- hpcflow/sdk/submission/shells/bash.py +222 -129
- hpcflow/sdk/submission/shells/powershell.py +200 -150
- hpcflow/sdk/submission/submission.py +852 -119
- hpcflow/sdk/submission/types.py +18 -21
- hpcflow/sdk/typing.py +24 -5
- hpcflow/sdk/utils/arrays.py +71 -0
- hpcflow/sdk/utils/deferred_file.py +55 -0
- hpcflow/sdk/utils/hashing.py +16 -0
- hpcflow/sdk/utils/patches.py +12 -0
- hpcflow/sdk/utils/strings.py +33 -0
- hpcflow/tests/api/test_api.py +32 -0
- hpcflow/tests/conftest.py +19 -0
- hpcflow/tests/data/multi_path_sequences.yaml +29 -0
- hpcflow/tests/data/workflow_test_run_abort.yaml +34 -35
- hpcflow/tests/schedulers/sge/test_sge_submission.py +36 -0
- hpcflow/tests/scripts/test_input_file_generators.py +282 -0
- hpcflow/tests/scripts/test_main_scripts.py +821 -70
- hpcflow/tests/scripts/test_non_snippet_script.py +46 -0
- hpcflow/tests/scripts/test_ouput_file_parsers.py +353 -0
- hpcflow/tests/shells/wsl/test_wsl_submission.py +6 -0
- hpcflow/tests/unit/test_action.py +176 -0
- hpcflow/tests/unit/test_app.py +20 -0
- hpcflow/tests/unit/test_cache.py +46 -0
- hpcflow/tests/unit/test_cli.py +133 -0
- hpcflow/tests/unit/test_config.py +122 -1
- hpcflow/tests/unit/test_element_iteration.py +47 -0
- hpcflow/tests/unit/test_jobscript_unit.py +757 -0
- hpcflow/tests/unit/test_loop.py +1332 -27
- hpcflow/tests/unit/test_meta_task.py +325 -0
- hpcflow/tests/unit/test_multi_path_sequences.py +229 -0
- hpcflow/tests/unit/test_parameter.py +13 -0
- hpcflow/tests/unit/test_persistence.py +190 -8
- hpcflow/tests/unit/test_run.py +109 -3
- hpcflow/tests/unit/test_run_directories.py +29 -0
- hpcflow/tests/unit/test_shell.py +20 -0
- hpcflow/tests/unit/test_submission.py +5 -76
- hpcflow/tests/unit/utils/test_arrays.py +40 -0
- hpcflow/tests/unit/utils/test_deferred_file_writer.py +34 -0
- hpcflow/tests/unit/utils/test_hashing.py +65 -0
- hpcflow/tests/unit/utils/test_patches.py +5 -0
- hpcflow/tests/unit/utils/test_redirect_std.py +50 -0
- hpcflow/tests/workflows/__init__.py +0 -0
- hpcflow/tests/workflows/test_directory_structure.py +31 -0
- hpcflow/tests/workflows/test_jobscript.py +332 -0
- hpcflow/tests/workflows/test_run_status.py +198 -0
- hpcflow/tests/workflows/test_skip_downstream.py +696 -0
- hpcflow/tests/workflows/test_submission.py +140 -0
- hpcflow/tests/workflows/test_workflows.py +142 -2
- hpcflow/tests/workflows/test_zip.py +18 -0
- hpcflow/viz_demo.ipynb +6587 -3
- {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a199.dist-info}/METADATA +7 -4
- hpcflow_new2-0.2.0a199.dist-info/RECORD +221 -0
- hpcflow_new2-0.2.0a190.dist-info/RECORD +0 -165
- {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a199.dist-info}/LICENSE +0 -0
- {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a199.dist-info}/WHEEL +0 -0
- {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a199.dist-info}/entry_points.txt +0 -0
@@ -3,16 +3,18 @@ Model of information submitted to a scheduler.
|
|
3
3
|
"""
|
4
4
|
|
5
5
|
from __future__ import annotations
|
6
|
+
from collections import defaultdict
|
6
7
|
|
7
8
|
import os
|
8
9
|
import shutil
|
9
10
|
import socket
|
10
11
|
import subprocess
|
11
|
-
from textwrap import indent
|
12
|
-
from typing import cast, overload, TYPE_CHECKING
|
12
|
+
from textwrap import dedent, indent
|
13
|
+
from typing import TextIO, cast, overload, TYPE_CHECKING
|
13
14
|
from typing_extensions import override
|
14
15
|
|
15
16
|
import numpy as np
|
17
|
+
from hpcflow.sdk.core import SKIPPED_EXIT_CODE
|
16
18
|
from hpcflow.sdk.core.enums import EARStatus
|
17
19
|
from hpcflow.sdk.core.errors import (
|
18
20
|
JobscriptSubmissionFailure,
|
@@ -21,8 +23,9 @@ from hpcflow.sdk.core.errors import (
|
|
21
23
|
|
22
24
|
from hpcflow.sdk.typing import hydrate
|
23
25
|
from hpcflow.sdk.core.json_like import ChildObjectSpec, JSONLike
|
24
|
-
from hpcflow.sdk.core.utils import parse_timestamp, current_timestamp
|
26
|
+
from hpcflow.sdk.core.utils import nth_value, parse_timestamp, current_timestamp
|
25
27
|
from hpcflow.sdk.log import TimeIt
|
28
|
+
from hpcflow.sdk.submission.schedulers import QueuedScheduler
|
26
29
|
from hpcflow.sdk.submission.schedulers.direct import DirectScheduler
|
27
30
|
from hpcflow.sdk.submission.shells import get_shell, DEFAULT_SHELL_NAMES
|
28
31
|
|
@@ -36,32 +39,63 @@ if TYPE_CHECKING:
|
|
36
39
|
from ..core.actions import ElementActionRun
|
37
40
|
from ..core.element import ElementResources
|
38
41
|
from ..core.loop_cache import LoopIndex
|
39
|
-
from ..core.types import JobscriptSubmissionFailureArgs
|
42
|
+
from ..core.types import JobscriptSubmissionFailureArgs, BlockActionKey
|
40
43
|
from ..core.workflow import WorkflowTask, Workflow
|
44
|
+
from ..persistence.base import PersistentStore
|
41
45
|
from .submission import Submission
|
42
46
|
from .shells.base import Shell
|
43
|
-
from .schedulers import Scheduler
|
47
|
+
from .schedulers import Scheduler
|
44
48
|
from .enums import JobscriptElementState
|
45
49
|
from .types import (
|
46
50
|
JobScriptCreationArguments,
|
47
51
|
JobScriptDescriptor,
|
48
|
-
|
52
|
+
ResolvedJobscriptBlockDependencies,
|
49
53
|
SchedulerRef,
|
50
54
|
VersionInfo,
|
51
55
|
)
|
56
|
+
from ..core.cache import ObjectCache
|
57
|
+
from hpcflow.sdk.submission.submission import JOBSCRIPT_SUBMIT_TIME_KEYS
|
58
|
+
|
59
|
+
|
60
|
+
def is_jobscript_array(
|
61
|
+
resources: ElementResources, num_elements: int, store: PersistentStore
|
62
|
+
) -> bool:
|
63
|
+
"""Return True if a job array should be used for the specified `ElementResources`."""
|
64
|
+
if resources.scheduler in ("direct", "direct_posix"):
|
65
|
+
if resources.use_job_array:
|
66
|
+
raise ValueError(
|
67
|
+
f"`use_job_array` not supported by scheduler: {resources.scheduler!r}"
|
68
|
+
)
|
69
|
+
return False
|
70
|
+
|
71
|
+
if resources.combine_scripts:
|
72
|
+
return False
|
73
|
+
|
74
|
+
run_parallelism = store._features.EAR_parallelism
|
75
|
+
if resources.use_job_array is None:
|
76
|
+
if num_elements > 1 and run_parallelism:
|
77
|
+
return True
|
78
|
+
else:
|
79
|
+
return False
|
80
|
+
else:
|
81
|
+
if resources.use_job_array and not run_parallelism:
|
82
|
+
raise ValueError(
|
83
|
+
f"Store type {store!r} does not support element parallelism, so jobs "
|
84
|
+
f"cannot be submitted as scheduler arrays."
|
85
|
+
)
|
86
|
+
return resources.use_job_array
|
52
87
|
|
53
88
|
|
54
89
|
@TimeIt.decorator
|
55
90
|
def generate_EAR_resource_map(
|
56
91
|
task: WorkflowTask,
|
57
92
|
loop_idx: LoopIndex[str, int],
|
93
|
+
cache: ObjectCache,
|
58
94
|
) -> tuple[Sequence[ElementResources], Sequence[int], NDArray, NDArray]:
|
59
95
|
"""
|
60
96
|
Generate an integer array whose rows represent actions and columns represent task
|
61
97
|
elements and whose values index unique resources.
|
62
98
|
"""
|
63
|
-
# TODO: assume single iteration for now; later we will loop over Loop tasks for each
|
64
|
-
# included task and call this func with specific loop indices
|
65
99
|
none_val = -1
|
66
100
|
resources: list[ElementResources] = []
|
67
101
|
resource_hashes: list[int] = []
|
@@ -69,16 +103,16 @@ def generate_EAR_resource_map(
|
|
69
103
|
arr_shape = (task.num_actions, task.num_elements)
|
70
104
|
resource_map = np.empty(arr_shape, dtype=int)
|
71
105
|
EAR_ID_map = np.empty(arr_shape, dtype=int)
|
72
|
-
# EAR_idx_map = np.empty(
|
73
|
-
# shape=arr_shape,
|
74
|
-
# dtype=[("EAR_idx", np.int32), ("run_idx", np.int32), ("iteration_idx", np.int32)],
|
75
|
-
# )
|
76
106
|
resource_map[:] = none_val
|
77
107
|
EAR_ID_map[:] = none_val
|
78
|
-
# EAR_idx_map[:] = (none_val, none_val, none_val) # TODO: add iteration_idx as well
|
79
108
|
|
80
|
-
|
81
|
-
|
109
|
+
assert cache.elements is not None
|
110
|
+
assert cache.iterations is not None
|
111
|
+
|
112
|
+
for elem_id in task.element_IDs:
|
113
|
+
element = cache.elements[elem_id]
|
114
|
+
for iter_ID_i in element.iteration_IDs:
|
115
|
+
iter_i = cache.iterations[iter_ID_i]
|
82
116
|
if iter_i.loop_idx != loop_idx:
|
83
117
|
continue
|
84
118
|
if iter_i.EARs_initialised: # not strictly needed (actions will be empty)
|
@@ -188,12 +222,12 @@ def group_resource_map_into_jobscripts(
|
|
188
222
|
def resolve_jobscript_dependencies(
|
189
223
|
jobscripts: Mapping[int, JobScriptCreationArguments],
|
190
224
|
element_deps: Mapping[int, Mapping[int, Sequence[int]]],
|
191
|
-
) -> Mapping[int, dict[int,
|
225
|
+
) -> Mapping[int, dict[int, ResolvedJobscriptBlockDependencies]]:
|
192
226
|
"""
|
193
227
|
Discover concrete dependencies between jobscripts.
|
194
228
|
"""
|
195
229
|
# first pass is to find the mappings between jobscript elements:
|
196
|
-
jobscript_deps: dict[int, dict[int,
|
230
|
+
jobscript_deps: dict[int, dict[int, ResolvedJobscriptBlockDependencies]] = {}
|
197
231
|
for js_idx, elem_deps in element_deps.items():
|
198
232
|
# keys of new dict are other jobscript indices on which this jobscript (js_idx)
|
199
233
|
# depends:
|
@@ -258,7 +292,9 @@ def resolve_jobscript_dependencies(
|
|
258
292
|
|
259
293
|
|
260
294
|
def _reindex_dependencies(
|
261
|
-
jobscripts: Mapping[int, JobScriptCreationArguments],
|
295
|
+
jobscripts: Mapping[int, JobScriptCreationArguments],
|
296
|
+
from_idx: int,
|
297
|
+
to_idx: int,
|
262
298
|
):
|
263
299
|
for ds_js_idx, ds_js in jobscripts.items():
|
264
300
|
if ds_js_idx <= from_idx:
|
@@ -270,7 +306,7 @@ def _reindex_dependencies(
|
|
270
306
|
|
271
307
|
@TimeIt.decorator
|
272
308
|
def merge_jobscripts_across_tasks(
|
273
|
-
jobscripts: Mapping[int, JobScriptCreationArguments]
|
309
|
+
jobscripts: Mapping[int, JobScriptCreationArguments],
|
274
310
|
) -> Mapping[int, JobScriptCreationArguments]:
|
275
311
|
"""Try to merge jobscripts between tasks.
|
276
312
|
|
@@ -284,55 +320,413 @@ def merge_jobscripts_across_tasks(
|
|
284
320
|
merged: set[int] = set()
|
285
321
|
|
286
322
|
for js_idx, js in jobscripts.items():
|
287
|
-
|
288
|
-
if len(js["dependencies"]) != 1:
|
323
|
+
if not js["dependencies"]:
|
289
324
|
continue
|
290
|
-
deps = js["dependencies"]
|
291
|
-
js_j_idx, dep_info = next(iter(deps.items()))
|
292
|
-
js_j = jobscripts[js_j_idx] # the jobscript we are merging `js` into
|
293
325
|
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
326
|
+
closest_idx = cast("int", max(js["dependencies"]))
|
327
|
+
closest_js = jobscripts[closest_idx]
|
328
|
+
other_deps = {k: v for k, v in js["dependencies"].items() if k != closest_idx}
|
329
|
+
|
330
|
+
# if all `other_deps` are also found within `closest_js`'s dependencies, then we
|
331
|
+
# can merge `js` into `closest_js`:
|
332
|
+
merge = True
|
333
|
+
for dep_idx, dep_i in other_deps.items():
|
334
|
+
try:
|
335
|
+
if closest_js["dependencies"][dep_idx] != dep_i:
|
336
|
+
merge = False
|
337
|
+
except KeyError:
|
338
|
+
merge = False
|
339
|
+
|
340
|
+
if merge:
|
341
|
+
js_j = closest_js # the jobscript we are merging `js` into
|
342
|
+
js_j_idx = closest_idx
|
343
|
+
dep_info = js["dependencies"][js_j_idx]
|
299
344
|
|
300
|
-
#
|
301
|
-
|
302
|
-
|
345
|
+
# can only merge if resources are the same and is array dependency:
|
346
|
+
if js["resource_hash"] == js_j["resource_hash"] and dep_info["is_array"]:
|
347
|
+
num_loop_idx = len(
|
348
|
+
js_j["task_loop_idx"]
|
349
|
+
) # TODO: should this be: `js_j["task_loop_idx"][0]`?
|
303
350
|
|
304
|
-
|
351
|
+
# append task_insert_IDs
|
352
|
+
js_j["task_insert_IDs"].append(js["task_insert_IDs"][0])
|
353
|
+
js_j["task_loop_idx"].append(js["task_loop_idx"][0])
|
305
354
|
|
306
|
-
|
307
|
-
for k, v in js["task_elements"].items():
|
308
|
-
js_j["task_elements"][k].extend(v)
|
355
|
+
add_acts = [(a, b, num_loop_idx) for a, b, _ in js["task_actions"]]
|
309
356
|
|
310
|
-
|
311
|
-
|
357
|
+
js_j["task_actions"].extend(add_acts)
|
358
|
+
for k, v in js["task_elements"].items():
|
359
|
+
js_j["task_elements"][k].extend(v)
|
312
360
|
|
313
|
-
|
314
|
-
|
361
|
+
# append to elements and elements_idx list
|
362
|
+
js_j["EAR_ID"] = np.vstack((js_j["EAR_ID"], js["EAR_ID"]))
|
315
363
|
|
316
|
-
|
317
|
-
|
364
|
+
# mark this js as defunct
|
365
|
+
merged.add(id(js))
|
366
|
+
|
367
|
+
# update dependencies of any downstream jobscripts that refer to this js
|
368
|
+
_reindex_dependencies(jobscripts, js_idx, js_j_idx)
|
318
369
|
|
319
370
|
# remove is_merged jobscripts:
|
320
371
|
return {k: v for k, v in jobscripts.items() if id(v) not in merged}
|
321
372
|
|
322
373
|
|
323
374
|
@TimeIt.decorator
|
324
|
-
def
|
325
|
-
jobscripts: Mapping[int, JobScriptCreationArguments]
|
326
|
-
) ->
|
327
|
-
"""
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
375
|
+
def resolve_jobscript_blocks(
|
376
|
+
jobscripts: Mapping[int, JobScriptCreationArguments],
|
377
|
+
) -> list[dict[str, Any]]:
|
378
|
+
"""For contiguous, dependent, non-array jobscripts with identical resource
|
379
|
+
requirements, combine into multi-block jobscripts.
|
380
|
+
|
381
|
+
Parameters
|
382
|
+
----------
|
383
|
+
jobscripts
|
384
|
+
Dict whose values must be dicts with keys "is_array", "resource_hash" and
|
385
|
+
"dependencies".
|
386
|
+
run_parallelism
|
387
|
+
True if the store supports run parallelism
|
388
|
+
|
389
|
+
"""
|
390
|
+
js_new: list[
|
391
|
+
list[JobScriptCreationArguments]
|
392
|
+
] = [] # TODO: not the same type, e.g. dependencies have tuple keys,
|
393
|
+
new_idx: dict[
|
394
|
+
int, tuple[int, int]
|
395
|
+
] = {} # track new positions by new jobscript index and block index
|
396
|
+
new_idx_inv: dict[int, list[int]] = defaultdict(list)
|
397
|
+
prev_hash = None
|
398
|
+
blocks: list[JobScriptCreationArguments] = []
|
399
|
+
js_deps_rec: dict[int, set[int]] = {} # recursive
|
400
|
+
for js_idx, js_i in jobscripts.items():
|
401
|
+
|
402
|
+
cur_js_idx = len(js_new)
|
403
|
+
new_deps_js_j = {
|
404
|
+
new_idx[i][0] for i in cast("Sequence[int]", js_i["dependencies"])
|
405
|
+
}
|
406
|
+
new_deps_js_j_rec = [
|
407
|
+
k for i in new_deps_js_j for j in new_idx_inv[i] for k in js_deps_rec[j]
|
408
|
+
]
|
409
|
+
|
410
|
+
js_deps_rec[js_idx] = new_deps_js_j.union(new_deps_js_j_rec)
|
411
|
+
|
412
|
+
# recursive dependencies of js_i (which we're looking to merge), excluding the
|
413
|
+
# dependency on the current jobscript:
|
414
|
+
js_j_deps_rec_no_cur = js_deps_rec[js_idx] - set([cur_js_idx])
|
415
|
+
|
416
|
+
# recursive dependencies of the current jobscript:
|
417
|
+
cur_deps_rec = {
|
418
|
+
j for i in new_idx_inv[cur_js_idx] for j in js_deps_rec[i] if j != cur_js_idx
|
419
|
+
}
|
420
|
+
|
421
|
+
# can we mege js_i into the current jobscript, as far as dependencies are
|
422
|
+
# concerned?
|
423
|
+
deps_mergable = cur_js_idx in new_deps_js_j
|
424
|
+
if deps_mergable and js_j_deps_rec_no_cur:
|
425
|
+
deps_mergable = js_j_deps_rec_no_cur == cur_deps_rec
|
426
|
+
|
427
|
+
if js_i["is_array"]:
|
428
|
+
# array jobs cannot be merged into the same jobscript
|
429
|
+
|
430
|
+
# append existing block:
|
431
|
+
if blocks:
|
432
|
+
js_new.append(blocks)
|
433
|
+
prev_hash = None
|
434
|
+
blocks = []
|
435
|
+
|
436
|
+
new_idx[js_idx] = (len(js_new), 0)
|
437
|
+
new_idx_inv[len(js_new)].append(js_idx)
|
438
|
+
js_new.append([js_i])
|
439
|
+
continue
|
440
|
+
|
441
|
+
if js_idx == 0 or prev_hash is None:
|
442
|
+
# (note: zeroth index will always exist)
|
443
|
+
|
444
|
+
# start a new block:
|
445
|
+
blocks.append(js_i)
|
446
|
+
new_idx[js_idx] = (len(js_new), len(blocks) - 1)
|
447
|
+
new_idx_inv[len(js_new)].append(js_idx)
|
448
|
+
|
449
|
+
# set resource hash to compare with the next jobscript
|
450
|
+
prev_hash = js_i["resource_hash"]
|
451
|
+
|
452
|
+
elif js_i["resource_hash"] == prev_hash and deps_mergable:
|
453
|
+
# merge with previous jobscript by adding another block
|
454
|
+
# only merge if this jobscript's dependencies include the current jobscript,
|
455
|
+
# and any other dependencies are included in the current jobscript's
|
456
|
+
# dependencies
|
457
|
+
blocks.append(js_i)
|
458
|
+
new_idx[js_idx] = (len(js_new), len(blocks) - 1)
|
459
|
+
new_idx_inv[len(js_new)].append(js_idx)
|
460
|
+
|
461
|
+
else:
|
462
|
+
# cannot merge, append the new jobscript data:
|
463
|
+
js_new.append(blocks)
|
464
|
+
|
465
|
+
# start a new block:
|
466
|
+
blocks = [js_i]
|
467
|
+
new_idx[js_idx] = (len(js_new), len(blocks) - 1)
|
468
|
+
new_idx_inv[len(js_new)].append(js_idx)
|
469
|
+
|
470
|
+
# set resource hash to compare with the next jobscript
|
471
|
+
prev_hash = js_i["resource_hash"]
|
472
|
+
|
473
|
+
# append remaining blocks:
|
474
|
+
if blocks:
|
475
|
+
js_new.append(blocks)
|
476
|
+
prev_hash = None
|
477
|
+
blocks = []
|
478
|
+
|
479
|
+
# re-index dependencies:
|
480
|
+
js_new_: list[dict[str, Any]] = []
|
481
|
+
for js_i_idx, js_new_i in enumerate(js_new):
|
482
|
+
|
483
|
+
resources = None
|
484
|
+
is_array = None
|
485
|
+
for block_j in js_new_i:
|
486
|
+
for k, v in new_idx.items():
|
487
|
+
dep_data = block_j["dependencies"].pop(k, None)
|
488
|
+
if dep_data:
|
489
|
+
block_j["dependencies"][v] = dep_data
|
490
|
+
|
491
|
+
del block_j["resource_hash"]
|
492
|
+
resources = block_j.pop("resources", None)
|
493
|
+
is_array = block_j.pop("is_array")
|
494
|
+
|
495
|
+
js_new_.append(
|
496
|
+
{
|
497
|
+
"resources": resources,
|
498
|
+
"is_array": is_array,
|
499
|
+
"blocks": js_new[js_i_idx],
|
500
|
+
}
|
501
|
+
)
|
502
|
+
|
503
|
+
return js_new_
|
504
|
+
|
505
|
+
|
506
|
+
@hydrate
|
507
|
+
class JobscriptBlock(JSONLike):
|
508
|
+
"""A rectangular block of element-actions to run within a jobscript.
|
509
|
+
|
510
|
+
Parameters
|
511
|
+
----------
|
512
|
+
task_insert_IDs: list[int]
|
513
|
+
The task insertion IDs.
|
514
|
+
task_actions: list[tuple]
|
515
|
+
The actions of the tasks.
|
516
|
+
``task insert ID, action_idx, index into task_loop_idx`` for each ``JS_ACTION_IDX``
|
517
|
+
task_elements: dict[int, list[int]]
|
518
|
+
The elements of the tasks.
|
519
|
+
Maps ``JS_ELEMENT_IDX`` to list of ``TASK_ELEMENT_IDX`` for each ``TASK_INSERT_ID``
|
520
|
+
EAR_ID:
|
521
|
+
Element action run information.
|
522
|
+
task_loop_idx: list[dict]
|
523
|
+
Description of what loops are in play.
|
524
|
+
dependencies: dict[tuple[int, int], dict]
|
525
|
+
Description of dependencies. Keys are tuples of (jobscript index,
|
526
|
+
jobscript-block index) of the dependency.
|
527
|
+
index: int
|
528
|
+
The index of the block within the parent jobscript.
|
529
|
+
jobscript: ~hpcflow.app.Jobscript
|
530
|
+
The parent jobscript.
|
531
|
+
|
532
|
+
"""
|
533
|
+
|
534
|
+
def __init__(
|
535
|
+
self,
|
536
|
+
index: int,
|
537
|
+
task_insert_IDs: list[int],
|
538
|
+
task_loop_idx: list[dict[str, int]],
|
539
|
+
task_actions: list[tuple[int, int, int]] | None = None,
|
540
|
+
task_elements: dict[int, list[int]] | None = None,
|
541
|
+
EAR_ID: NDArray | None = None,
|
542
|
+
dependencies: (
|
543
|
+
dict[tuple[int, int], ResolvedJobscriptBlockDependencies] | None
|
544
|
+
) = None,
|
545
|
+
jobscript: Jobscript | None = None,
|
546
|
+
):
|
547
|
+
self.jobscript = jobscript
|
548
|
+
self._index = index
|
549
|
+
self._task_insert_IDs = task_insert_IDs
|
550
|
+
self._task_actions = task_actions
|
551
|
+
self._task_elements = task_elements
|
552
|
+
self._task_loop_idx = task_loop_idx
|
553
|
+
self._EAR_ID = EAR_ID
|
554
|
+
self._dependencies = dependencies
|
555
|
+
|
556
|
+
self._all_EARs = None # assigned on first access to `all_EARs` property
|
557
|
+
|
558
|
+
@property
|
559
|
+
def index(self) -> int:
|
560
|
+
return self._index
|
561
|
+
|
562
|
+
@property
|
563
|
+
def submission(self) -> Submission:
|
564
|
+
assert self.jobscript is not None
|
565
|
+
return self.jobscript.submission
|
566
|
+
|
567
|
+
@property
|
568
|
+
def task_insert_IDs(self) -> Sequence[int]:
|
569
|
+
"""
|
570
|
+
The insertion IDs of tasks in this jobscript-block.
|
571
|
+
"""
|
572
|
+
return self._task_insert_IDs
|
573
|
+
|
574
|
+
@property
|
575
|
+
@TimeIt.decorator
|
576
|
+
def task_actions(self) -> NDArray:
|
577
|
+
"""
|
578
|
+
The IDs of actions of each task in this jobscript-block.
|
579
|
+
"""
|
580
|
+
assert self.jobscript is not None
|
581
|
+
return self.workflow._store.get_jobscript_block_task_actions_array(
|
582
|
+
sub_idx=self.submission.index,
|
583
|
+
js_idx=self.jobscript.index,
|
584
|
+
blk_idx=self.index,
|
585
|
+
task_actions_arr=self._task_actions,
|
586
|
+
)
|
587
|
+
|
588
|
+
@property
|
589
|
+
@TimeIt.decorator
|
590
|
+
def task_elements(self) -> Mapping[int, Sequence[int]]:
|
591
|
+
"""
|
592
|
+
The IDs of elements of each task in this jobscript-block.
|
593
|
+
"""
|
594
|
+
assert self.jobscript is not None
|
595
|
+
return self.workflow._store.get_jobscript_block_task_elements_map(
|
596
|
+
sub_idx=self.submission.index,
|
597
|
+
js_idx=self.jobscript.index,
|
598
|
+
blk_idx=self.index,
|
599
|
+
task_elems_map=self._task_elements,
|
600
|
+
)
|
601
|
+
|
602
|
+
@property
|
603
|
+
@TimeIt.decorator
|
604
|
+
def EAR_ID(self) -> NDArray:
|
605
|
+
"""
|
606
|
+
The array of EAR IDs in this jobscript-block.
|
607
|
+
"""
|
608
|
+
assert self.jobscript is not None
|
609
|
+
return self.workflow._store.get_jobscript_block_run_ID_array(
|
610
|
+
sub_idx=self.submission.index,
|
611
|
+
js_idx=self.jobscript.index,
|
612
|
+
blk_idx=self.index,
|
613
|
+
run_ID_arr=self._EAR_ID,
|
614
|
+
)
|
615
|
+
|
616
|
+
@property
|
617
|
+
@TimeIt.decorator
|
618
|
+
def dependencies(
|
619
|
+
self,
|
620
|
+
) -> Mapping[tuple[int, int], ResolvedJobscriptBlockDependencies]:
|
621
|
+
"""
|
622
|
+
The dependency descriptor.
|
623
|
+
"""
|
624
|
+
assert self.jobscript is not None
|
625
|
+
return self.workflow._store.get_jobscript_block_dependencies(
|
626
|
+
sub_idx=self.submission.index,
|
627
|
+
js_idx=self.jobscript.index,
|
628
|
+
blk_idx=self.index,
|
629
|
+
js_dependencies=self._dependencies,
|
630
|
+
)
|
631
|
+
|
632
|
+
@property
|
633
|
+
def task_loop_idx(self) -> Sequence[Mapping[str, int]]:
|
634
|
+
"""
|
635
|
+
The description of where various task loops are.
|
636
|
+
"""
|
637
|
+
return self._task_loop_idx
|
638
|
+
|
639
|
+
@property
|
640
|
+
@TimeIt.decorator
|
641
|
+
def num_actions(self) -> int:
|
642
|
+
"""
|
643
|
+
The maximal number of actions in the jobscript-block.
|
644
|
+
"""
|
645
|
+
return self.EAR_ID.shape[0]
|
646
|
+
|
647
|
+
@property
|
648
|
+
@TimeIt.decorator
|
649
|
+
def num_elements(self) -> int:
|
650
|
+
"""
|
651
|
+
The maximal number of elements in the jobscript-block.
|
652
|
+
"""
|
653
|
+
return self.EAR_ID.shape[1]
|
654
|
+
|
655
|
+
@property
|
656
|
+
def workflow(self) -> Workflow:
|
657
|
+
"""
|
658
|
+
The associated workflow.
|
659
|
+
"""
|
660
|
+
assert self.jobscript is not None
|
661
|
+
return self.jobscript.workflow
|
662
|
+
|
663
|
+
@property
|
664
|
+
@TimeIt.decorator
|
665
|
+
def all_EARs(self) -> Sequence[ElementActionRun]:
|
666
|
+
"""
|
667
|
+
Description of EAR information for this jobscript-block.
|
668
|
+
"""
|
669
|
+
assert self.jobscript is not None
|
670
|
+
return [i for i in self.jobscript.all_EARs if i.id_ in self.EAR_ID]
|
671
|
+
|
672
|
+
@override
|
673
|
+
def _postprocess_to_dict(self, d: dict[str, Any]) -> dict[str, Any]:
|
674
|
+
dct = super()._postprocess_to_dict(d)
|
675
|
+
del dct["_all_EARs"]
|
676
|
+
dct["_dependencies"] = [[list(k), v] for k, v in self.dependencies.items()]
|
677
|
+
dct = {k.lstrip("_"): v for k, v in dct.items()}
|
678
|
+
dct["EAR_ID"] = cast("NDArray", dct["EAR_ID"]).tolist()
|
679
|
+
return dct
|
680
|
+
|
681
|
+
@classmethod
|
682
|
+
def from_json_like(cls, json_like, shared_data=None):
|
683
|
+
json_like["EAR_ID"] = (
|
684
|
+
np.array(json_like["EAR_ID"]) if json_like["EAR_ID"] is not None else None
|
685
|
+
)
|
686
|
+
if json_like["dependencies"] is not None:
|
687
|
+
# transform list to dict with tuple keys, and transform string keys in
|
688
|
+
# `js_element_mapping` to integers:
|
689
|
+
deps_processed = {}
|
690
|
+
for i in json_like["dependencies"]:
|
691
|
+
deps_processed_i = {
|
692
|
+
"js_element_mapping": {
|
693
|
+
int(k): v for k, v in i[1]["js_element_mapping"].items()
|
694
|
+
},
|
695
|
+
"is_array": i[1]["is_array"],
|
696
|
+
}
|
697
|
+
deps_processed[tuple(i[0])] = deps_processed_i
|
698
|
+
json_like["dependencies"] = deps_processed
|
699
|
+
|
700
|
+
return super().from_json_like(json_like, shared_data)
|
701
|
+
|
702
|
+
def _get_EARs_arr(self) -> NDArray:
|
703
|
+
"""
|
704
|
+
Get all associated EAR objects as a 2D array.
|
705
|
+
"""
|
706
|
+
return np.array(self.all_EARs).reshape(self.EAR_ID.shape)
|
707
|
+
|
708
|
+
def get_task_loop_idx_array(self) -> NDArray:
|
709
|
+
"""
|
710
|
+
Get an array of task loop indices.
|
711
|
+
"""
|
712
|
+
loop_idx = np.empty_like(self.EAR_ID)
|
713
|
+
loop_idx[:] = np.array([i[2] for i in self.task_actions]).reshape(
|
714
|
+
(len(self.task_actions), 1)
|
715
|
+
)
|
716
|
+
return loop_idx
|
717
|
+
|
718
|
+
@TimeIt.decorator
|
719
|
+
def write_EAR_ID_file(self, fp: TextIO):
|
720
|
+
"""Write a text file with `num_elements` lines and `num_actions` delimited tokens
|
721
|
+
per line, representing whether a given EAR must be executed."""
|
722
|
+
assert self.jobscript is not None
|
723
|
+
# can't specify "open" newline if we pass the file name only, so pass handle:
|
724
|
+
np.savetxt(
|
725
|
+
fname=fp,
|
726
|
+
X=(self.EAR_ID).T,
|
727
|
+
fmt="%.0f",
|
728
|
+
delimiter=self.jobscript._EAR_files_delimiter,
|
729
|
+
)
|
336
730
|
|
337
731
|
|
338
732
|
@hydrate
|
@@ -391,32 +785,25 @@ class Jobscript(JSONLike):
|
|
391
785
|
name="resources",
|
392
786
|
class_name="ElementResources",
|
393
787
|
),
|
788
|
+
ChildObjectSpec(
|
789
|
+
name="blocks",
|
790
|
+
class_name="JobscriptBlock",
|
791
|
+
is_multiple=True,
|
792
|
+
parent_ref="jobscript",
|
793
|
+
),
|
394
794
|
)
|
395
795
|
|
396
|
-
@classmethod
|
397
|
-
def __is_QueuedScheduler(cls, value) -> TypeIs[QueuedScheduler]:
|
398
|
-
return isinstance(value, cls._app.QueuedScheduler)
|
399
|
-
|
400
796
|
def __init__(
|
401
797
|
self,
|
402
|
-
|
403
|
-
|
404
|
-
task_elements: dict[int, list[int]],
|
405
|
-
EAR_ID: NDArray,
|
798
|
+
index: int,
|
799
|
+
is_array: bool,
|
406
800
|
resources: ElementResources,
|
407
|
-
|
408
|
-
|
409
|
-
submit_time: datetime | None = None,
|
801
|
+
blocks: list[JobscriptBlock],
|
802
|
+
at_submit_metadata: dict[str, Any] | None = None,
|
410
803
|
submit_hostname: str | None = None,
|
411
804
|
submit_machine: str | None = None,
|
412
|
-
|
413
|
-
scheduler_job_ID: str | None = None,
|
414
|
-
process_ID: int | None = None,
|
805
|
+
shell_idx: int | None = None,
|
415
806
|
version_info: VersionInfo | None = None,
|
416
|
-
os_name: str | None = None,
|
417
|
-
shell_name: str | None = None,
|
418
|
-
scheduler_name: str | None = None,
|
419
|
-
running: bool | None = None,
|
420
807
|
resource_hash: str | None = None,
|
421
808
|
elements: dict[int, list[int]] | None = None,
|
422
809
|
):
|
@@ -424,74 +811,62 @@ class Jobscript(JSONLike):
|
|
424
811
|
raise AttributeError("resource_hash must not be supplied")
|
425
812
|
if elements is not None:
|
426
813
|
raise AttributeError("elements must not be supplied")
|
427
|
-
self._task_insert_IDs = task_insert_IDs
|
428
|
-
self._task_loop_idx = task_loop_idx
|
429
814
|
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
815
|
+
if not isinstance(blocks[0], JobscriptBlock):
|
816
|
+
blocks = [
|
817
|
+
JobscriptBlock(**i, index=idx, jobscript=self)
|
818
|
+
for idx, i in enumerate(blocks)
|
819
|
+
]
|
435
820
|
|
436
|
-
self.
|
821
|
+
self._index = index
|
822
|
+
self._blocks = blocks
|
823
|
+
self._at_submit_metadata = at_submit_metadata or {
|
824
|
+
k: None for k in JOBSCRIPT_SUBMIT_TIME_KEYS
|
825
|
+
}
|
826
|
+
self._is_array = is_array
|
437
827
|
self._resources = resources
|
438
|
-
self._dependencies = dependencies
|
439
828
|
|
440
829
|
# assigned on parent `Submission.submit` (or retrieved form persistent store):
|
441
|
-
self._submit_time = submit_time
|
442
830
|
self._submit_hostname = submit_hostname
|
443
831
|
self._submit_machine = submit_machine
|
444
|
-
self.
|
832
|
+
self._shell_idx = shell_idx
|
445
833
|
|
446
|
-
self._scheduler_job_ID = scheduler_job_ID
|
447
|
-
self._process_ID = process_ID
|
448
834
|
self._version_info = version_info
|
449
835
|
|
450
|
-
# assigned as submit-time:
|
451
|
-
# TODO: these should now always be set in `resources` so shouldn't need these:
|
452
|
-
self._os_name = os_name
|
453
|
-
self._shell_name = shell_name
|
454
|
-
self._scheduler_name = scheduler_name
|
455
|
-
|
456
836
|
# assigned by parent Submission
|
457
837
|
self._submission: Submission | None = None
|
458
|
-
# assigned by parent Submission
|
459
|
-
self._index: int | None = None
|
460
838
|
# assigned on first access to `scheduler` property
|
461
839
|
self._scheduler_obj: Scheduler | None = None
|
462
840
|
# assigned on first access to `shell` property
|
463
841
|
self._shell_obj: Shell | None = None
|
464
842
|
# assigned on first access to `submit_time` property
|
465
843
|
self._submit_time_obj: datetime | None = None
|
466
|
-
self._running = running
|
467
844
|
# assigned on first access to `all_EARs` property
|
468
845
|
self._all_EARs: list[ElementActionRun] | None = None
|
469
846
|
|
847
|
+
self._set_parent_refs()
|
848
|
+
|
470
849
|
def __repr__(self) -> str:
|
471
850
|
return (
|
472
851
|
f"{self.__class__.__name__}("
|
473
852
|
f"index={self.index!r}, "
|
474
|
-
f"
|
853
|
+
f"blocks={self.blocks!r}, "
|
475
854
|
f"resources={self.resources!r}, "
|
476
|
-
f"dependencies={self.dependencies!r}"
|
477
855
|
f")"
|
478
856
|
)
|
479
857
|
|
480
858
|
@override
|
481
859
|
def _postprocess_to_dict(self, d: dict[str, Any]) -> dict[str, Any]:
|
482
860
|
dct = super()._postprocess_to_dict(d)
|
483
|
-
del dct["_index"]
|
484
861
|
del dct["_scheduler_obj"]
|
485
862
|
del dct["_shell_obj"]
|
486
863
|
del dct["_submit_time_obj"]
|
487
864
|
del dct["_all_EARs"]
|
488
865
|
dct = {k.lstrip("_"): v for k, v in dct.items()}
|
489
|
-
dct["EAR_ID"] = cast("NDArray", dct["EAR_ID"]).tolist()
|
490
866
|
return dct
|
491
867
|
|
492
868
|
@classmethod
|
493
869
|
def from_json_like(cls, json_like, shared_data=None):
|
494
|
-
json_like["EAR_ID"] = np.array(json_like["EAR_ID"])
|
495
870
|
return super().from_json_like(json_like, shared_data)
|
496
871
|
|
497
872
|
@property
|
@@ -499,54 +874,36 @@ class Jobscript(JSONLike):
|
|
499
874
|
"""
|
500
875
|
Alias for the workflow app in job scripts.
|
501
876
|
"""
|
502
|
-
return self.
|
877
|
+
return self.submission.WORKFLOW_APP_ALIAS
|
503
878
|
|
504
879
|
def get_commands_file_name(
|
505
|
-
self,
|
880
|
+
self, block_act_key: BlockActionKey, shell: Shell | None = None
|
506
881
|
) -> str:
|
507
882
|
"""
|
508
883
|
Get the name of a file containing commands for a particular jobscript action.
|
509
884
|
"""
|
510
885
|
return self._app.RunDirAppFiles.get_commands_file_name(
|
511
|
-
|
512
|
-
js_action_idx=js_action_idx,
|
886
|
+
block_act_key,
|
513
887
|
shell=shell or self.shell,
|
514
888
|
)
|
515
889
|
|
516
890
|
@property
|
517
|
-
def
|
518
|
-
|
519
|
-
The insertion IDs of tasks in this jobscript.
|
520
|
-
"""
|
521
|
-
return self._task_insert_IDs
|
522
|
-
|
523
|
-
@property
|
524
|
-
def task_actions(self) -> Sequence[tuple[int, int, int]]:
|
525
|
-
"""
|
526
|
-
The IDs of actions of each task in this jobscript.
|
527
|
-
"""
|
528
|
-
return self._task_actions
|
529
|
-
|
530
|
-
@property
|
531
|
-
def task_elements(self) -> Mapping[int, Sequence[int]]:
|
532
|
-
"""
|
533
|
-
The IDs of elements of each task in this jobscript.
|
534
|
-
"""
|
535
|
-
return self._task_elements
|
891
|
+
def blocks(self) -> Sequence[JobscriptBlock]:
|
892
|
+
return self._blocks
|
536
893
|
|
537
894
|
@property
|
538
|
-
def
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
895
|
+
def at_submit_metadata(self) -> dict[str, Any]:
|
896
|
+
return self.workflow._store.get_jobscript_at_submit_metadata(
|
897
|
+
sub_idx=self.submission.index,
|
898
|
+
js_idx=self.index,
|
899
|
+
metadata_attr=self._at_submit_metadata,
|
900
|
+
)
|
543
901
|
|
544
902
|
@property
|
545
|
-
def all_EAR_IDs(self) ->
|
546
|
-
"""
|
547
|
-
|
548
|
-
|
549
|
-
return self.EAR_ID.flatten()
|
903
|
+
def all_EAR_IDs(self) -> NDArray:
|
904
|
+
"""Return all run IDs of this jobscripts (across all blocks), removing missing
|
905
|
+
run IDs (i.e. -1 values)"""
|
906
|
+
return np.concatenate([i.EAR_ID[i.EAR_ID >= 0] for i in self.blocks])
|
550
907
|
|
551
908
|
@property
|
552
909
|
@TimeIt.decorator
|
@@ -554,11 +911,10 @@ class Jobscript(JSONLike):
|
|
554
911
|
"""
|
555
912
|
Description of EAR information for this jobscript.
|
556
913
|
"""
|
557
|
-
|
558
|
-
self._all_EARs = self.workflow.get_EARs_from_IDs(self.all_EAR_IDs)
|
559
|
-
return self._all_EARs
|
914
|
+
return self.workflow.get_EARs_from_IDs(self.all_EAR_IDs)
|
560
915
|
|
561
916
|
@property
|
917
|
+
@TimeIt.decorator
|
562
918
|
def resources(self) -> ElementResources:
|
563
919
|
"""
|
564
920
|
The common resources that this jobscript requires.
|
@@ -566,18 +922,20 @@ class Jobscript(JSONLike):
|
|
566
922
|
return self._resources
|
567
923
|
|
568
924
|
@property
|
569
|
-
|
570
|
-
|
571
|
-
The description of where various task loops are.
|
572
|
-
"""
|
573
|
-
return self._task_loop_idx
|
574
|
-
|
575
|
-
@property
|
576
|
-
def dependencies(self) -> Mapping[int, ResolvedDependencies]:
|
925
|
+
@TimeIt.decorator
|
926
|
+
def dependencies(self) -> Mapping[tuple[int, int], dict[str, bool]]:
|
577
927
|
"""
|
578
|
-
The dependency descriptor.
|
928
|
+
The dependency descriptor, accounting for all blocks within this jobscript.
|
579
929
|
"""
|
580
|
-
|
930
|
+
deps = {}
|
931
|
+
for block in self.blocks:
|
932
|
+
for (js_idx, blk_idx), v in block.dependencies.items():
|
933
|
+
if js_idx == self.index:
|
934
|
+
# block dependency is internal to this jobscript
|
935
|
+
continue
|
936
|
+
else:
|
937
|
+
deps[js_idx, blk_idx] = {"is_array": v["is_array"]}
|
938
|
+
return deps
|
581
939
|
|
582
940
|
@property
|
583
941
|
@TimeIt.decorator
|
@@ -598,14 +956,15 @@ class Jobscript(JSONLike):
|
|
598
956
|
return max((ear.end_time for ear in self.all_EARs if ear.end_time), default=None)
|
599
957
|
|
600
958
|
@property
|
601
|
-
def submit_time(self)
|
959
|
+
def submit_time(self):
|
602
960
|
"""
|
603
961
|
When the jobscript was submitted, if known.
|
604
962
|
"""
|
605
|
-
if self._submit_time_obj is None
|
606
|
-
self.
|
607
|
-
self.
|
608
|
-
|
963
|
+
if self._submit_time_obj is None:
|
964
|
+
if _submit_time := self.at_submit_metadata["submit_time"]:
|
965
|
+
self._submit_time_obj = parse_timestamp(
|
966
|
+
_submit_time, self.workflow.ts_fmt
|
967
|
+
)
|
609
968
|
return self._submit_time_obj
|
610
969
|
|
611
970
|
@property
|
@@ -622,26 +981,30 @@ class Jobscript(JSONLike):
|
|
622
981
|
"""
|
623
982
|
return self._submit_machine
|
624
983
|
|
984
|
+
@property
|
985
|
+
def shell_idx(self):
|
986
|
+
return self._shell_idx
|
987
|
+
|
625
988
|
@property
|
626
989
|
def submit_cmdline(self) -> list[str] | None:
|
627
990
|
"""
|
628
|
-
The command line used to
|
991
|
+
The command line used to submit the jobscript, if known.
|
629
992
|
"""
|
630
|
-
return self.
|
993
|
+
return self.at_submit_metadata["submit_cmdline"]
|
631
994
|
|
632
995
|
@property
|
633
996
|
def scheduler_job_ID(self) -> str | None:
|
634
997
|
"""
|
635
998
|
The job ID from the scheduler, if known.
|
636
999
|
"""
|
637
|
-
return self.
|
1000
|
+
return self.at_submit_metadata["scheduler_job_ID"]
|
638
1001
|
|
639
1002
|
@property
|
640
1003
|
def process_ID(self) -> int | None:
|
641
1004
|
"""
|
642
1005
|
The process ID from direct execution, if known.
|
643
1006
|
"""
|
644
|
-
return self.
|
1007
|
+
return self.at_submit_metadata["process_ID"]
|
645
1008
|
|
646
1009
|
@property
|
647
1010
|
def version_info(self) -> VersionInfo | None:
|
@@ -674,59 +1037,32 @@ class Jobscript(JSONLike):
|
|
674
1037
|
return self.submission.workflow
|
675
1038
|
|
676
1039
|
@property
|
677
|
-
def
|
1040
|
+
def is_array(self) -> bool:
|
678
1041
|
"""
|
679
|
-
|
1042
|
+
Whether to generate an array job.
|
680
1043
|
"""
|
681
|
-
return self.
|
1044
|
+
return self._is_array
|
682
1045
|
|
683
1046
|
@property
|
684
|
-
def
|
1047
|
+
def os_name(self) -> str:
|
685
1048
|
"""
|
686
|
-
The
|
1049
|
+
The name of the OS to use.
|
687
1050
|
"""
|
688
|
-
|
689
|
-
|
690
|
-
@property
|
691
|
-
def is_array(self) -> bool:
|
692
|
-
"""
|
693
|
-
Whether to generate an array job.
|
694
|
-
"""
|
695
|
-
if self.scheduler_name == "direct":
|
696
|
-
return False
|
697
|
-
|
698
|
-
support_EAR_para = self.workflow._store._features.EAR_parallelism
|
699
|
-
if self.resources.use_job_array is None:
|
700
|
-
return self.num_elements > 1 and support_EAR_para
|
701
|
-
if self.resources.use_job_array and not support_EAR_para:
|
702
|
-
raise ValueError(
|
703
|
-
f"Store type {self.workflow._store!r} does not support element "
|
704
|
-
f"parallelism, so jobs cannot be submitted as scheduler arrays."
|
705
|
-
)
|
706
|
-
return self.resources.use_job_array
|
707
|
-
|
708
|
-
@property
|
709
|
-
def os_name(self) -> str:
|
710
|
-
"""
|
711
|
-
The name of the OS to use.
|
712
|
-
"""
|
713
|
-
name = self._os_name or self.resources.os_name
|
714
|
-
assert name is not None
|
715
|
-
return name
|
1051
|
+
assert self.resources.os_name
|
1052
|
+
return self.resources.os_name
|
716
1053
|
|
717
1054
|
@property
|
718
|
-
def shell_name(self) -> str
|
719
|
-
|
720
|
-
|
721
|
-
"""
|
722
|
-
return self._shell_name or self.resources.shell
|
1055
|
+
def shell_name(self) -> str:
|
1056
|
+
assert self.resources.shell
|
1057
|
+
return self.resources.shell
|
723
1058
|
|
724
1059
|
@property
|
725
|
-
def scheduler_name(self) -> str
|
1060
|
+
def scheduler_name(self) -> str:
|
726
1061
|
"""
|
727
1062
|
The name of the scheduler to use.
|
728
1063
|
"""
|
729
|
-
|
1064
|
+
assert self.resources.scheduler
|
1065
|
+
return self.resources.scheduler
|
730
1066
|
|
731
1067
|
def _get_submission_os_args(self) -> dict[str, str]:
|
732
1068
|
return {"linux_release_file": self._app.config.linux_release_file}
|
@@ -784,21 +1120,8 @@ class Jobscript(JSONLike):
|
|
784
1120
|
return f"js_{self.index}_EAR_IDs.txt"
|
785
1121
|
|
786
1122
|
@property
|
787
|
-
def
|
788
|
-
""
|
789
|
-
The name of a file containing run directory names.
|
790
|
-
"""
|
791
|
-
return f"js_{self.index}_run_dirs.txt"
|
792
|
-
|
793
|
-
@property
|
794
|
-
def direct_stdout_file_name(self) -> str:
|
795
|
-
"""File for direct execution stdout."""
|
796
|
-
return f"js_{self.index}_stdout.log"
|
797
|
-
|
798
|
-
@property
|
799
|
-
def direct_stderr_file_name(self) -> str:
|
800
|
-
"""File for direct execution stderr."""
|
801
|
-
return f"js_{self.index}_stderr.log"
|
1123
|
+
def combined_script_indices_file_name(self) -> str:
|
1124
|
+
return f"js_{self.index}_script_indices.txt"
|
802
1125
|
|
803
1126
|
@property
|
804
1127
|
def direct_win_pid_file_name(self) -> str:
|
@@ -810,42 +1133,212 @@ class Jobscript(JSONLike):
|
|
810
1133
|
"""The name of the jobscript file."""
|
811
1134
|
return f"js_{self.index}{self.shell.JS_EXT}"
|
812
1135
|
|
1136
|
+
@property
|
1137
|
+
def jobscript_functions_name(self):
|
1138
|
+
assert self.shell_idx is not None
|
1139
|
+
return self.submission.get_jobscript_functions_name(self.shell, self.shell_idx)
|
1140
|
+
|
813
1141
|
@property
|
814
1142
|
def EAR_ID_file_path(self) -> Path:
|
815
1143
|
"""
|
816
1144
|
The path to the file containing EAR IDs for this jobscript.
|
817
1145
|
"""
|
818
|
-
return self.submission.
|
1146
|
+
return self.submission.js_run_ids_path / self.EAR_ID_file_name
|
819
1147
|
|
820
1148
|
@property
|
821
|
-
def
|
1149
|
+
def combined_script_indices_file_path(self) -> Path:
|
822
1150
|
"""
|
823
|
-
The path to the file containing
|
1151
|
+
The path to the file containing script indices, in the case this is a
|
1152
|
+
``combine_scripts=True`` jobscript.
|
824
1153
|
"""
|
825
|
-
return
|
1154
|
+
return (
|
1155
|
+
self.submission.js_script_indices_path
|
1156
|
+
/ self.combined_script_indices_file_name
|
1157
|
+
)
|
826
1158
|
|
827
1159
|
@property
|
828
1160
|
def jobscript_path(self) -> Path:
|
829
1161
|
"""
|
830
1162
|
The path to the file containing the jobscript file.
|
831
1163
|
"""
|
832
|
-
return self.submission.
|
1164
|
+
return self.submission.js_path / self.jobscript_name
|
833
1165
|
|
834
1166
|
@property
|
835
|
-
def
|
1167
|
+
def jobscript_functions_path(self) -> Path:
|
1168
|
+
"""
|
1169
|
+
The path to the file containing the supporting shell functions."""
|
1170
|
+
assert self.shell_idx is not None
|
1171
|
+
return self.submission.get_jobscript_functions_path(self.shell, self.shell_idx)
|
1172
|
+
|
1173
|
+
@property
|
1174
|
+
def std_path(self) -> Path:
|
1175
|
+
"""Directory in which to store jobscript standard out and error stream files."""
|
1176
|
+
return self.submission.js_std_path / str(self.index)
|
1177
|
+
|
1178
|
+
@property
|
1179
|
+
def direct_std_out_err_path(self) -> Path:
|
1180
|
+
"""File path of combined standard output and error streams.
|
1181
|
+
|
1182
|
+
Notes
|
1183
|
+
-----
|
1184
|
+
This path will only exist if `resources.combine_jobscript_std` is True. Otherwise,
|
1185
|
+
see `direct_stdout_path` and `direct_stderr_path` for the separate stream paths.
|
1186
|
+
|
836
1187
|
"""
|
837
|
-
|
838
|
-
|
1188
|
+
return self.get_std_out_err_path()
|
1189
|
+
|
1190
|
+
@property
|
1191
|
+
def direct_stdout_path(self) -> Path:
|
1192
|
+
"""File path to which the jobscript's standard output is saved, for direct
|
1193
|
+
execution only.
|
1194
|
+
|
1195
|
+
Notes
|
1196
|
+
-----
|
1197
|
+
This returned path be the same as that from `get_stderr_path` if
|
1198
|
+
`resources.combine_jobscript_std` is True.
|
1199
|
+
|
839
1200
|
"""
|
840
|
-
|
1201
|
+
assert not self.is_scheduled
|
1202
|
+
return self.get_stdout_path()
|
841
1203
|
|
842
1204
|
@property
|
843
1205
|
def direct_stderr_path(self) -> Path:
|
1206
|
+
"""File path to which the jobscript's standard error is saved, for direct
|
1207
|
+
execution only.
|
1208
|
+
|
1209
|
+
Notes
|
1210
|
+
-----
|
1211
|
+
This returned path be the same as that from `get_stdout_path` if
|
1212
|
+
`resources.combine_jobscript_std` is True.
|
1213
|
+
|
1214
|
+
"""
|
1215
|
+
assert not self.is_scheduled
|
1216
|
+
return self.get_stderr_path()
|
1217
|
+
|
1218
|
+
def __validate_get_std_path_array_idx(self, array_idx: int | None = None):
|
1219
|
+
if array_idx is None and self.is_array:
|
1220
|
+
raise ValueError(
|
1221
|
+
"`array_idx` must be specified, since this jobscript is an array job."
|
1222
|
+
)
|
1223
|
+
elif array_idx is not None and not self.is_array:
|
1224
|
+
raise ValueError(
|
1225
|
+
"`array_idx` should not be specified, since this jobscript is not an "
|
1226
|
+
"array job."
|
1227
|
+
)
|
1228
|
+
|
1229
|
+
def _get_stdout_path(self, array_idx: int | None = None) -> Path:
|
1230
|
+
"""File path to the separate standard output stream.
|
1231
|
+
|
1232
|
+
Notes
|
1233
|
+
-----
|
1234
|
+
This path will only exist if `resources.combine_jobscript_std` is False.
|
1235
|
+
Otherwise, see `get_std_out_err_path` for the combined stream path.
|
1236
|
+
|
844
1237
|
"""
|
845
|
-
|
846
|
-
|
1238
|
+
self.__validate_get_std_path_array_idx(array_idx)
|
1239
|
+
return self.std_path / self.scheduler.get_stdout_filename(
|
1240
|
+
js_idx=self.index, job_ID=self.scheduler_job_ID, array_idx=array_idx
|
1241
|
+
)
|
1242
|
+
|
1243
|
+
def _get_stderr_path(self, array_idx: int | None = None) -> Path:
|
1244
|
+
"""File path to the separate standard error stream.
|
1245
|
+
|
1246
|
+
Notes
|
1247
|
+
-----
|
1248
|
+
This path will only exist if `resources.combine_jobscript_std` is False.
|
1249
|
+
Otherwise, see `get_std_out_err_path` for the combined stream path.
|
1250
|
+
|
847
1251
|
"""
|
848
|
-
|
1252
|
+
self.__validate_get_std_path_array_idx(array_idx)
|
1253
|
+
return self.std_path / self.scheduler.get_stderr_filename(
|
1254
|
+
js_idx=self.index, job_ID=self.scheduler_job_ID, array_idx=array_idx
|
1255
|
+
)
|
1256
|
+
|
1257
|
+
def get_std_out_err_path(self, array_idx: int | None = None) -> Path:
|
1258
|
+
"""File path of combined standard output and error streams.
|
1259
|
+
|
1260
|
+
Notes
|
1261
|
+
-----
|
1262
|
+
This path will only exist if `resources.combine_jobscript_std` is True. Otherwise,
|
1263
|
+
see `get_stdout_path` and `get_stderr_path` for the separate stream paths.
|
1264
|
+
|
1265
|
+
"""
|
1266
|
+
self.__validate_get_std_path_array_idx(array_idx)
|
1267
|
+
return self.std_path / self.scheduler.get_std_out_err_filename(
|
1268
|
+
js_idx=self.index, job_ID=self.scheduler_job_ID, array_idx=array_idx
|
1269
|
+
)
|
1270
|
+
|
1271
|
+
def get_stdout_path(self, array_idx: int | None = None) -> Path:
|
1272
|
+
"""File path to which the jobscript's standard output is saved.
|
1273
|
+
|
1274
|
+
Notes
|
1275
|
+
-----
|
1276
|
+
This returned path be the same as that from `get_stderr_path` if
|
1277
|
+
`resources.combine_jobscript_std` is True.
|
1278
|
+
|
1279
|
+
"""
|
1280
|
+
if self.resources.combine_jobscript_std:
|
1281
|
+
return self.get_std_out_err_path(array_idx=array_idx)
|
1282
|
+
else:
|
1283
|
+
return self._get_stdout_path(array_idx=array_idx)
|
1284
|
+
|
1285
|
+
def get_stderr_path(self, array_idx: int | None = None) -> Path:
|
1286
|
+
"""File path to which the jobscript's standard error is saved.
|
1287
|
+
|
1288
|
+
Notes
|
1289
|
+
-----
|
1290
|
+
This returned path be the same as that from `get_stdout_path` if
|
1291
|
+
`resources.combine_jobscript_std` is True.
|
1292
|
+
|
1293
|
+
"""
|
1294
|
+
if self.resources.combine_jobscript_std:
|
1295
|
+
return self.get_std_out_err_path(array_idx=array_idx)
|
1296
|
+
else:
|
1297
|
+
return self._get_stderr_path(array_idx=array_idx)
|
1298
|
+
|
1299
|
+
def get_stdout(self, array_idx: int | None = None) -> str:
|
1300
|
+
"""Retrieve the contents of the standard output stream file.
|
1301
|
+
|
1302
|
+
Notes
|
1303
|
+
-----
|
1304
|
+
In the case of non-array jobscripts, this will return the whole standard output,
|
1305
|
+
even if that includes multiple elements/actions.
|
1306
|
+
|
1307
|
+
"""
|
1308
|
+
return self.workflow.get_text_file(self.get_stdout_path(array_idx))
|
1309
|
+
|
1310
|
+
def get_stderr(self, array_idx: int | None = None) -> str:
|
1311
|
+
"""Retrieve the contents of the standard error stream file.
|
1312
|
+
|
1313
|
+
Notes
|
1314
|
+
-----
|
1315
|
+
In the case of non-array jobscripts, this will return the whole standard error,
|
1316
|
+
even if that includes multiple elements/actions.
|
1317
|
+
|
1318
|
+
"""
|
1319
|
+
return self.workflow.get_text_file(self.get_stderr_path(array_idx))
|
1320
|
+
|
1321
|
+
def print_stdout(self, array_idx: int | None = None) -> None:
|
1322
|
+
"""Print the contents of the standard output stream file.
|
1323
|
+
|
1324
|
+
Notes
|
1325
|
+
-----
|
1326
|
+
In the case of non-array jobscripts, this will print the whole standard output,
|
1327
|
+
even if that includes multiple elements/actions.
|
1328
|
+
|
1329
|
+
"""
|
1330
|
+
print(self.get_stdout(array_idx))
|
1331
|
+
|
1332
|
+
def print_stderr(self, array_idx: int | None = None) -> None:
|
1333
|
+
"""Print the contents of the standard error stream file.
|
1334
|
+
|
1335
|
+
Notes
|
1336
|
+
-----
|
1337
|
+
In the case of non-array jobscripts, this will print the whole standard error,
|
1338
|
+
even if that includes multiple elements/actions.
|
1339
|
+
|
1340
|
+
"""
|
1341
|
+
print(self.get_stderr(array_idx))
|
849
1342
|
|
850
1343
|
@property
|
851
1344
|
def direct_win_pid_file_path(self) -> Path:
|
@@ -853,14 +1346,44 @@ class Jobscript(JSONLike):
|
|
853
1346
|
The path to the file containing PIDs for directly executed commands for this
|
854
1347
|
jobscript. Windows only.
|
855
1348
|
"""
|
856
|
-
return self.submission.
|
1349
|
+
return self.submission.js_win_pids_path / self.direct_win_pid_file_name
|
857
1350
|
|
858
|
-
|
859
|
-
|
1351
|
+
@property
|
1352
|
+
def is_scheduled(self) -> bool:
|
1353
|
+
return self.scheduler_name not in ("direct", "direct_posix")
|
1354
|
+
|
1355
|
+
def _update_at_submit_metadata(
|
1356
|
+
self,
|
1357
|
+
submit_cmdline: list[str] | None = None,
|
1358
|
+
scheduler_job_ID: str | None = None,
|
1359
|
+
process_ID: int | None = None,
|
1360
|
+
submit_time: str | None = None,
|
1361
|
+
):
|
1362
|
+
"""Update persistent store and in-memory record of at-submit metadata for this
|
1363
|
+
jobscript.
|
1364
|
+
|
1365
|
+
"""
|
860
1366
|
self.workflow._store.set_jobscript_metadata(
|
861
1367
|
sub_idx=self.submission.index,
|
862
1368
|
js_idx=self.index,
|
863
|
-
|
1369
|
+
submit_cmdline=submit_cmdline,
|
1370
|
+
scheduler_job_ID=scheduler_job_ID,
|
1371
|
+
process_ID=process_ID,
|
1372
|
+
submit_time=submit_time,
|
1373
|
+
)
|
1374
|
+
|
1375
|
+
if submit_cmdline is not None:
|
1376
|
+
self._at_submit_metadata["submit_cmdline"] = submit_cmdline
|
1377
|
+
if scheduler_job_ID is not None:
|
1378
|
+
self._at_submit_metadata["scheduler_job_ID"] = scheduler_job_ID
|
1379
|
+
if process_ID is not None:
|
1380
|
+
self._at_submit_metadata["process_ID"] = process_ID
|
1381
|
+
if submit_time is not None:
|
1382
|
+
self._at_submit_metadata["submit_time"] = submit_time
|
1383
|
+
|
1384
|
+
def _set_submit_time(self, submit_time: datetime) -> None:
|
1385
|
+
self._update_at_submit_metadata(
|
1386
|
+
submit_time=submit_time.strftime(self.workflow.ts_fmt)
|
864
1387
|
)
|
865
1388
|
|
866
1389
|
def _set_submit_hostname(self, submit_hostname: str) -> None:
|
@@ -879,31 +1402,26 @@ class Jobscript(JSONLike):
|
|
879
1402
|
submit_machine=submit_machine,
|
880
1403
|
)
|
881
1404
|
|
882
|
-
def
|
883
|
-
self.
|
1405
|
+
def _set_shell_idx(self, shell_idx: int) -> None:
|
1406
|
+
self._shell_idx = shell_idx
|
884
1407
|
self.workflow._store.set_jobscript_metadata(
|
885
1408
|
sub_idx=self.submission.index,
|
886
1409
|
js_idx=self.index,
|
887
|
-
|
1410
|
+
shell_idx=shell_idx,
|
888
1411
|
)
|
889
1412
|
|
1413
|
+
def _set_submit_cmdline(self, submit_cmdline: list[str]) -> None:
|
1414
|
+
self._update_at_submit_metadata(submit_cmdline=submit_cmdline)
|
1415
|
+
|
890
1416
|
def _set_scheduler_job_ID(self, job_ID: str) -> None:
|
891
1417
|
"""For scheduled submission only."""
|
892
|
-
self.
|
893
|
-
self.
|
894
|
-
sub_idx=self.submission.index,
|
895
|
-
js_idx=self.index,
|
896
|
-
scheduler_job_ID=job_ID,
|
897
|
-
)
|
1418
|
+
assert self.is_scheduled
|
1419
|
+
self._update_at_submit_metadata(scheduler_job_ID=job_ID)
|
898
1420
|
|
899
1421
|
def _set_process_ID(self, process_ID: int) -> None:
|
900
1422
|
"""For direct submission only."""
|
901
|
-
self.
|
902
|
-
self.
|
903
|
-
sub_idx=self.submission.index,
|
904
|
-
js_idx=self.index,
|
905
|
-
process_ID=process_ID,
|
906
|
-
)
|
1423
|
+
assert not self.is_scheduled
|
1424
|
+
self._update_at_submit_metadata(process_ID=process_ID)
|
907
1425
|
|
908
1426
|
def _set_version_info(self, version_info: VersionInfo) -> None:
|
909
1427
|
self._version_info = version_info
|
@@ -913,145 +1431,38 @@ class Jobscript(JSONLike):
|
|
913
1431
|
version_info=version_info,
|
914
1432
|
)
|
915
1433
|
|
916
|
-
def _set_os_name(self) -> None:
|
917
|
-
"""Set the OS name for this jobscript. This is invoked at submit-time."""
|
918
|
-
self._os_name = self.resources.os_name
|
919
|
-
self.workflow._store.set_jobscript_metadata(
|
920
|
-
sub_idx=self.submission.index,
|
921
|
-
js_idx=self.index,
|
922
|
-
os_name=self._os_name,
|
923
|
-
)
|
924
|
-
|
925
|
-
def _set_shell_name(self) -> None:
|
926
|
-
"""Set the shell name for this jobscript. This is invoked at submit-time."""
|
927
|
-
self._shell_name = self.resources.shell
|
928
|
-
self.workflow._store.set_jobscript_metadata(
|
929
|
-
sub_idx=self.submission.index,
|
930
|
-
js_idx=self.index,
|
931
|
-
shell_name=self._shell_name,
|
932
|
-
)
|
933
|
-
|
934
|
-
def _set_scheduler_name(self) -> None:
|
935
|
-
"""Set the scheduler name for this jobscript. This is invoked at submit-time."""
|
936
|
-
self._scheduler_name = self.resources.scheduler
|
937
|
-
if self._scheduler_name:
|
938
|
-
self.workflow._store.set_jobscript_metadata(
|
939
|
-
sub_idx=self.submission.index,
|
940
|
-
js_idx=self.index,
|
941
|
-
scheduler_name=self._scheduler_name,
|
942
|
-
)
|
943
|
-
|
944
|
-
def get_task_loop_idx_array(self) -> NDArray:
|
945
|
-
"""
|
946
|
-
Get an array of task loop indices.
|
947
|
-
"""
|
948
|
-
loop_idx = np.empty_like(self.EAR_ID)
|
949
|
-
loop_idx[:] = np.array([i[2] for i in self.task_actions]).reshape(
|
950
|
-
(len(self.task_actions), 1)
|
951
|
-
)
|
952
|
-
return loop_idx
|
953
|
-
|
954
|
-
@TimeIt.decorator
|
955
|
-
def write_EAR_ID_file(self):
|
956
|
-
"""Write a text file with `num_elements` lines and `num_actions` delimited tokens
|
957
|
-
per line, representing whether a given EAR must be executed."""
|
958
|
-
|
959
|
-
with self.EAR_ID_file_path.open(mode="wt", newline="\n") as fp:
|
960
|
-
# can't specify "open" newline if we pass the file name only, so pass handle:
|
961
|
-
np.savetxt(
|
962
|
-
fname=fp,
|
963
|
-
X=(self.EAR_ID).T,
|
964
|
-
fmt="%.0f",
|
965
|
-
delimiter=self._EAR_files_delimiter,
|
966
|
-
)
|
967
|
-
|
968
|
-
@TimeIt.decorator
|
969
|
-
def write_element_run_dir_file(self, run_dirs: list[list[Path]]):
|
970
|
-
"""Write a text file with `num_elements` lines and `num_actions` delimited tokens
|
971
|
-
per line, representing the working directory for each EAR.
|
972
|
-
|
973
|
-
We assume a given task element's actions all run in the same directory, but in
|
974
|
-
general a jobscript "element" may cross task boundaries, so we need to provide
|
975
|
-
the directory for each jobscript-element/jobscript-action combination.
|
976
|
-
|
977
|
-
"""
|
978
|
-
run_dirs_paths = self.shell.prepare_element_run_dirs(run_dirs)
|
979
|
-
with self.element_run_dir_file_path.open(mode="wt", newline="\n") as fp:
|
980
|
-
# can't specify "open" newline if we pass the file name only, so pass handle:
|
981
|
-
np.savetxt(
|
982
|
-
fname=fp,
|
983
|
-
X=np.array(run_dirs_paths),
|
984
|
-
fmt="%s",
|
985
|
-
delimiter=self._EAR_files_delimiter,
|
986
|
-
)
|
987
|
-
|
988
1434
|
@TimeIt.decorator
|
989
1435
|
def compose_jobscript(
|
990
1436
|
self,
|
1437
|
+
shell,
|
991
1438
|
deps: dict[int, tuple[str, bool]] | None = None,
|
992
1439
|
os_name: str | None = None,
|
993
|
-
shell_name: str | None = None,
|
994
|
-
os_args: dict[str, Any] | None = None,
|
995
|
-
shell_args: dict[str, Any] | None = None,
|
996
1440
|
scheduler_name: str | None = None,
|
997
1441
|
scheduler_args: dict[str, Any] | None = None,
|
998
1442
|
) -> str:
|
999
1443
|
"""Prepare the jobscript file string."""
|
1000
|
-
|
1001
|
-
os_name = os_name or self.os_name
|
1002
|
-
shell_name = shell_name or self.shell_name
|
1003
1444
|
scheduler_name = scheduler_name or self.scheduler_name
|
1004
|
-
|
1005
|
-
|
1006
|
-
raise RuntimeError(
|
1007
|
-
f"Jobscript {self.index} `os_name` is not yet set. Pass the `os_name` as "
|
1008
|
-
f"a method argument to compose the jobscript for a given `os_name`."
|
1009
|
-
)
|
1010
|
-
if not shell_name:
|
1011
|
-
raise RuntimeError(
|
1012
|
-
f"Jobscript {self.index} `shell_name` is not yet set. Pass the "
|
1013
|
-
f"`shell_name` as a method argument to compose the jobscript for a given "
|
1014
|
-
f"`shell_name`."
|
1015
|
-
)
|
1016
|
-
if not scheduler_name:
|
1017
|
-
scheduler_name = self._app.config.default_scheduler
|
1018
|
-
|
1019
|
-
shell = self._get_shell(
|
1020
|
-
os_name=os_name,
|
1021
|
-
shell_name=shell_name,
|
1022
|
-
os_args=os_args or self._get_submission_os_args(),
|
1023
|
-
shell_args=shell_args or self._get_submission_shell_args(),
|
1024
|
-
)
|
1445
|
+
assert scheduler_name
|
1446
|
+
assert os_name
|
1025
1447
|
scheduler = self._app.get_scheduler(
|
1026
1448
|
scheduler_name=scheduler_name,
|
1027
1449
|
os_name=os_name,
|
1028
1450
|
scheduler_args=scheduler_args or self._get_submission_scheduler_args(),
|
1029
1451
|
)
|
1030
|
-
|
1031
|
-
|
1032
|
-
|
1033
|
-
|
1034
|
-
|
1035
|
-
|
1036
|
-
|
1037
|
-
|
1038
|
-
|
1039
|
-
|
1040
|
-
|
1041
|
-
|
1042
|
-
|
1043
|
-
|
1044
|
-
"app_invoc": list(self._app.run_time_info.invocation_command),
|
1045
|
-
"run_log_file": self._app.RunDirAppFiles.get_log_file_name(),
|
1046
|
-
"config_dir": str(self._app.config.config_directory),
|
1047
|
-
"config_invoc_key": self._app.config.config_key,
|
1048
|
-
"workflow_path": self.workflow.path,
|
1049
|
-
"sub_idx": self.submission.index,
|
1050
|
-
"js_idx": self.index,
|
1051
|
-
"EAR_file_name": self.EAR_ID_file_name,
|
1052
|
-
"element_run_dirs_file_path": self.element_run_dir_file_name,
|
1053
|
-
}
|
1054
|
-
)
|
1452
|
+
app_caps = self._app.package_name.upper()
|
1453
|
+
header_args = {
|
1454
|
+
"app_caps": app_caps,
|
1455
|
+
"jobscript_functions_name": self.jobscript_functions_name,
|
1456
|
+
"jobscript_functions_dir": self.submission.JS_FUNCS_DIR_NAME,
|
1457
|
+
"sub_idx": self.submission.index,
|
1458
|
+
"js_idx": self.index,
|
1459
|
+
"run_IDs_file_name": self.EAR_ID_file_name,
|
1460
|
+
"run_IDs_file_dir": self.submission.JS_RUN_IDS_DIR_NAME,
|
1461
|
+
"tmp_dir_name": self.submission.TMP_DIR_NAME,
|
1462
|
+
"log_dir_name": self.submission.LOG_DIR_NAME,
|
1463
|
+
"app_std_dir_name": self.submission.APP_STD_DIR_NAME,
|
1464
|
+
"scripts_dir_name": self.submission.SCRIPTS_DIR_NAME,
|
1465
|
+
}
|
1055
1466
|
|
1056
1467
|
shebang = shell.JS_SHEBANG.format(
|
1057
1468
|
shebang_executable=" ".join(shell.shebang_executable),
|
@@ -1059,14 +1470,15 @@ class Jobscript(JSONLike):
|
|
1059
1470
|
)
|
1060
1471
|
header = shell.JS_HEADER.format(**header_args)
|
1061
1472
|
|
1062
|
-
if
|
1473
|
+
if isinstance(scheduler, QueuedScheduler):
|
1063
1474
|
header = shell.JS_SCHEDULER_HEADER.format(
|
1064
1475
|
shebang=shebang,
|
1065
1476
|
scheduler_options=scheduler.format_options(
|
1066
1477
|
resources=self.resources,
|
1067
|
-
num_elements=self.num_elements,
|
1478
|
+
num_elements=self.blocks[0].num_elements, # only used for array jobs
|
1068
1479
|
is_array=self.is_array,
|
1069
1480
|
sub_idx=self.submission.index,
|
1481
|
+
js_idx=self.index,
|
1070
1482
|
),
|
1071
1483
|
header=header,
|
1072
1484
|
)
|
@@ -1085,32 +1497,104 @@ class Jobscript(JSONLike):
|
|
1085
1497
|
wait_command=wait_cmd,
|
1086
1498
|
)
|
1087
1499
|
|
1088
|
-
main = shell.JS_MAIN.format(
|
1089
|
-
num_actions=self.num_actions,
|
1090
|
-
EAR_files_delimiter=self._EAR_files_delimiter,
|
1091
|
-
workflow_app_alias=self.workflow_app_alias,
|
1092
|
-
commands_file_name=self.get_commands_file_name(r"${JS_act_idx}", shell=shell),
|
1093
|
-
run_stream_file=self._app.RunDirAppFiles.get_std_file_name(),
|
1094
|
-
)
|
1095
|
-
|
1096
1500
|
out = header
|
1097
1501
|
|
1098
|
-
if self.
|
1099
|
-
|
1100
|
-
|
1101
|
-
out += shell.JS_ELEMENT_ARRAY.format(
|
1102
|
-
scheduler_command=scheduler.js_cmd,
|
1103
|
-
scheduler_array_switch=scheduler.array_switch,
|
1104
|
-
scheduler_array_item_var=scheduler.array_item_var,
|
1105
|
-
num_elements=self.num_elements,
|
1106
|
-
main=main,
|
1502
|
+
if self.resources.combine_scripts:
|
1503
|
+
run_cmd = shell.JS_RUN_CMD_COMBINED.format(
|
1504
|
+
workflow_app_alias=self.workflow_app_alias
|
1107
1505
|
)
|
1108
|
-
|
1506
|
+
out += run_cmd + "\n"
|
1109
1507
|
else:
|
1110
|
-
|
1111
|
-
|
1112
|
-
|
1508
|
+
run_cmd = shell.JS_RUN_CMD.format(workflow_app_alias=self.workflow_app_alias)
|
1509
|
+
|
1510
|
+
if self.resources.write_app_logs:
|
1511
|
+
run_log_enable_disable = shell.JS_RUN_LOG_PATH_ENABLE.format(
|
1512
|
+
run_log_file_name=self.submission.get_app_log_file_name(
|
1513
|
+
run_ID=shell.format_env_var_get(f"{app_caps}_RUN_ID")
|
1514
|
+
)
|
1515
|
+
)
|
1516
|
+
else:
|
1517
|
+
run_log_enable_disable = shell.JS_RUN_LOG_PATH_DISABLE
|
1518
|
+
|
1519
|
+
block_run = shell.JS_RUN.format(
|
1520
|
+
EAR_files_delimiter=self._EAR_files_delimiter,
|
1521
|
+
app_caps=app_caps,
|
1522
|
+
run_cmd=run_cmd,
|
1523
|
+
sub_tmp_dir=self.submission.tmp_path,
|
1524
|
+
run_log_enable_disable=run_log_enable_disable,
|
1113
1525
|
)
|
1526
|
+
if len(self.blocks) == 1:
|
1527
|
+
# forgo element and action loops if not necessary:
|
1528
|
+
block = self.blocks[0]
|
1529
|
+
if block.num_actions > 1:
|
1530
|
+
block_act = shell.JS_ACT_MULTI.format(
|
1531
|
+
num_actions=block.num_actions,
|
1532
|
+
run_block=indent(block_run, shell.JS_INDENT),
|
1533
|
+
)
|
1534
|
+
else:
|
1535
|
+
block_act = shell.JS_ACT_SINGLE.format(run_block=block_run)
|
1536
|
+
|
1537
|
+
main = shell.JS_MAIN.format(
|
1538
|
+
action=block_act,
|
1539
|
+
app_caps=app_caps,
|
1540
|
+
block_start_elem_idx=0,
|
1541
|
+
)
|
1542
|
+
|
1543
|
+
out += shell.JS_BLOCK_HEADER.format(app_caps=app_caps)
|
1544
|
+
if self.is_array:
|
1545
|
+
if not isinstance(scheduler, QueuedScheduler):
|
1546
|
+
raise Exception("can only schedule arrays of jobs to a queue")
|
1547
|
+
out += shell.JS_ELEMENT_MULTI_ARRAY.format(
|
1548
|
+
scheduler_command=scheduler.js_cmd,
|
1549
|
+
scheduler_array_switch=scheduler.array_switch,
|
1550
|
+
scheduler_array_item_var=scheduler.array_item_var,
|
1551
|
+
num_elements=block.num_elements,
|
1552
|
+
main=main,
|
1553
|
+
)
|
1554
|
+
elif block.num_elements == 1:
|
1555
|
+
out += shell.JS_ELEMENT_SINGLE.format(
|
1556
|
+
block_start_elem_idx=0,
|
1557
|
+
main=main,
|
1558
|
+
)
|
1559
|
+
else:
|
1560
|
+
out += shell.JS_ELEMENT_MULTI_LOOP.format(
|
1561
|
+
block_start_elem_idx=0,
|
1562
|
+
num_elements=block.num_elements,
|
1563
|
+
main=indent(main, shell.JS_INDENT),
|
1564
|
+
)
|
1565
|
+
|
1566
|
+
else:
|
1567
|
+
# use a shell loop for blocks, so always write the inner element and action
|
1568
|
+
# loops:
|
1569
|
+
block_act = shell.JS_ACT_MULTI.format(
|
1570
|
+
num_actions=shell.format_array_get_item("num_actions", "$block_idx"),
|
1571
|
+
run_block=indent(block_run, shell.JS_INDENT),
|
1572
|
+
)
|
1573
|
+
main = shell.JS_MAIN.format(
|
1574
|
+
action=block_act,
|
1575
|
+
app_caps=app_caps,
|
1576
|
+
block_start_elem_idx="$block_start_elem_idx",
|
1577
|
+
)
|
1578
|
+
|
1579
|
+
# only non-array jobscripts will have multiple blocks:
|
1580
|
+
element_loop = shell.JS_ELEMENT_MULTI_LOOP.format(
|
1581
|
+
block_start_elem_idx="$block_start_elem_idx",
|
1582
|
+
num_elements=shell.format_array_get_item(
|
1583
|
+
"num_elements", "$block_idx"
|
1584
|
+
),
|
1585
|
+
main=indent(main, shell.JS_INDENT),
|
1586
|
+
)
|
1587
|
+
out += shell.JS_BLOCK_LOOP.format(
|
1588
|
+
num_elements=shell.format_array(
|
1589
|
+
[i.num_elements for i in self.blocks]
|
1590
|
+
),
|
1591
|
+
num_actions=shell.format_array([i.num_actions for i in self.blocks]),
|
1592
|
+
num_blocks=len(self.blocks),
|
1593
|
+
app_caps=app_caps,
|
1594
|
+
element_loop=indent(element_loop, shell.JS_INDENT),
|
1595
|
+
)
|
1596
|
+
|
1597
|
+
out += shell.JS_FOOTER
|
1114
1598
|
|
1115
1599
|
return out
|
1116
1600
|
|
@@ -1128,70 +1612,40 @@ class Jobscript(JSONLike):
|
|
1128
1612
|
"""
|
1129
1613
|
Write the jobscript to its file.
|
1130
1614
|
"""
|
1615
|
+
os_name = os_name or self.os_name
|
1616
|
+
shell_name = shell_name or self.shell_name
|
1617
|
+
assert os_name
|
1618
|
+
assert shell_name
|
1619
|
+
shell = self._get_shell(
|
1620
|
+
os_name=os_name,
|
1621
|
+
shell_name=shell_name,
|
1622
|
+
os_args=os_args or self._get_submission_os_args(),
|
1623
|
+
shell_args=shell_args or self._get_submission_shell_args(),
|
1624
|
+
)
|
1625
|
+
|
1131
1626
|
js_str = self.compose_jobscript(
|
1132
1627
|
deps=deps,
|
1628
|
+
shell=shell,
|
1133
1629
|
os_name=os_name,
|
1134
|
-
shell_name=shell_name,
|
1135
|
-
os_args=os_args,
|
1136
|
-
shell_args=shell_args,
|
1137
1630
|
scheduler_name=scheduler_name,
|
1138
1631
|
scheduler_args=scheduler_args,
|
1139
1632
|
)
|
1140
1633
|
with self.jobscript_path.open("wt", newline="\n") as fp:
|
1141
1634
|
fp.write(js_str)
|
1142
|
-
return self.jobscript_path
|
1143
|
-
|
1144
|
-
@TimeIt.decorator
|
1145
|
-
def make_artifact_dirs(self) -> list[list[Path]]:
|
1146
|
-
"""
|
1147
|
-
Create the directories that will hold artifacts associated with this jobscript.
|
1148
|
-
"""
|
1149
|
-
EARs_arr = np.array(self.all_EARs).reshape(self.EAR_ID.shape)
|
1150
|
-
task_loop_idx_arr = self.get_task_loop_idx_array()
|
1151
|
-
|
1152
|
-
return [
|
1153
|
-
[
|
1154
|
-
self.__make_action_dir(
|
1155
|
-
EARs_arr[js_act_idx, js_elem_idx],
|
1156
|
-
task_loop_idx_arr[js_act_idx, js_elem_idx].item(),
|
1157
|
-
js_act_idx,
|
1158
|
-
js_elem_idx,
|
1159
|
-
)
|
1160
|
-
for js_act_idx in range(self.num_actions)
|
1161
|
-
]
|
1162
|
-
for js_elem_idx in range(self.num_elements)
|
1163
|
-
]
|
1164
|
-
|
1165
|
-
def __make_action_dir(
|
1166
|
-
self, EAR_i: ElementActionRun, l_idx: int, js_act_idx: int, js_elem_idx: int
|
1167
|
-
) -> Path:
|
1168
|
-
t_iID = EAR_i.task.insert_ID
|
1169
|
-
r_idx = EAR_i.index
|
1170
|
-
loop_idx_i = self.task_loop_idx[l_idx]
|
1171
|
-
task_dir = self.workflow.tasks.get(insert_ID=t_iID).get_dir_name(loop_idx_i)
|
1172
|
-
elem_dir = EAR_i.element.dir_name
|
1173
|
-
|
1174
|
-
EAR_dir = self.workflow.execution_path / task_dir / elem_dir / f"r_{r_idx}"
|
1175
|
-
EAR_dir.mkdir(exist_ok=True, parents=True)
|
1176
1635
|
|
1177
|
-
|
1178
|
-
for path in cast("dict[Any, str]", EAR_i.get("input_files", {})).values():
|
1179
|
-
if path:
|
1180
|
-
shutil.copy(path, EAR_dir)
|
1181
|
-
return EAR_dir.relative_to(self.workflow.path)
|
1636
|
+
return self.jobscript_path
|
1182
1637
|
|
1183
1638
|
@TimeIt.decorator
|
1184
|
-
def _launch_direct_js_win(self) -> int:
|
1639
|
+
def _launch_direct_js_win(self, submit_cmd: list[str]) -> int:
|
1185
1640
|
# this is a "trick" to ensure we always get a fully detached new process (with no
|
1186
1641
|
# parent); the `powershell.exe -Command` process exits after running the inner
|
1187
1642
|
# `Start-Process`, which is where the jobscript is actually invoked. I could not
|
1188
1643
|
# find a way using `subprocess.Popen()` to ensure the new process was fully
|
1189
1644
|
# detached when submitting jobscripts via a Jupyter notebook in Windows.
|
1190
1645
|
|
1191
|
-
assert self.submit_cmdline is not None
|
1192
1646
|
# Note we need powershell.exe for this "launcher process", but the shell used for
|
1193
1647
|
# the jobscript itself need not be powershell.exe
|
1194
|
-
exe_path, arg_list =
|
1648
|
+
exe_path, arg_list = submit_cmd[0], submit_cmd[1:]
|
1195
1649
|
|
1196
1650
|
# note powershell-escaped quotes, in case of spaces in arguments (this seems to
|
1197
1651
|
# work okay even though we might have switch like arguments in this list, like
|
@@ -1223,23 +1677,30 @@ class Jobscript(JSONLike):
|
|
1223
1677
|
return int(self.direct_win_pid_file_path.read_text())
|
1224
1678
|
|
1225
1679
|
@TimeIt.decorator
|
1226
|
-
def _launch_direct_js_posix(self) -> int:
|
1680
|
+
def _launch_direct_js_posix(self, submit_cmd: list[str]) -> int:
|
1227
1681
|
# direct submission; submit jobscript asynchronously:
|
1228
1682
|
# detached process, avoid interrupt signals propagating to the subprocess:
|
1229
|
-
|
1230
|
-
|
1231
|
-
"wt"
|
1232
|
-
) as fp_stdout, self.direct_stderr_path.open("wt") as fp_stderr:
|
1683
|
+
|
1684
|
+
def _launch(fp_stdout: TextIO, fp_stderr: TextIO) -> int:
|
1233
1685
|
# note: Popen copies the file objects, so this works!
|
1234
1686
|
proc = subprocess.Popen(
|
1235
|
-
args=
|
1687
|
+
args=submit_cmd,
|
1236
1688
|
stdout=fp_stdout,
|
1237
1689
|
stderr=fp_stderr,
|
1238
|
-
cwd=self.workflow.path,
|
1690
|
+
cwd=str(self.workflow.path),
|
1239
1691
|
start_new_session=True,
|
1240
1692
|
)
|
1241
1693
|
return proc.pid
|
1242
1694
|
|
1695
|
+
if self.resources.combine_jobscript_std:
|
1696
|
+
with self.direct_std_out_err_path.open("wt") as fp_std:
|
1697
|
+
return _launch(fp_std, fp_std)
|
1698
|
+
else:
|
1699
|
+
with self.direct_stdout_path.open(
|
1700
|
+
"wt"
|
1701
|
+
) as fp_stdout, self.direct_stderr_path.open("wt") as fp_stderr:
|
1702
|
+
return _launch(fp_stdout, fp_stderr)
|
1703
|
+
|
1243
1704
|
@TimeIt.decorator
|
1244
1705
|
def _launch_queued(
|
1245
1706
|
self, submit_cmd: list[str], print_stdout: bool
|
@@ -1271,31 +1732,39 @@ class Jobscript(JSONLike):
|
|
1271
1732
|
# map each dependency jobscript index to the JS ref (job/process ID) and if the
|
1272
1733
|
# dependency is an array dependency:
|
1273
1734
|
deps: dict[int, tuple[str, bool]] = {}
|
1274
|
-
for js_idx, deps_i in self.dependencies.items():
|
1735
|
+
for (js_idx, _), deps_i in self.dependencies.items():
|
1275
1736
|
dep_js_ref, dep_js_is_arr = scheduler_refs[js_idx]
|
1276
1737
|
# only submit an array dependency if both this jobscript and the dependency
|
1277
1738
|
# are array jobs:
|
1278
1739
|
dep_is_arr = deps_i["is_array"] and self.is_array and dep_js_is_arr
|
1279
1740
|
deps[js_idx] = (dep_js_ref, dep_is_arr)
|
1280
1741
|
|
1281
|
-
if
|
1282
|
-
#
|
1283
|
-
|
1284
|
-
|
1285
|
-
|
1286
|
-
|
1742
|
+
if self.index > 0:
|
1743
|
+
# prevent this jobscript executing if jobscript parallelism is not available:
|
1744
|
+
use_parallelism = (
|
1745
|
+
self.submission.JS_parallelism is True
|
1746
|
+
or {0: "direct", 1: "scheduled"}[self.is_scheduled]
|
1747
|
+
== self.submission.JS_parallelism
|
1748
|
+
)
|
1749
|
+
if not use_parallelism:
|
1750
|
+
# add fake dependencies to all previously submitted jobscripts to avoid
|
1751
|
+
# simultaneous execution:
|
1752
|
+
for js_idx, (js_ref, _) in scheduler_refs.items():
|
1753
|
+
if js_idx not in deps:
|
1754
|
+
deps[js_idx] = (js_ref, False)
|
1755
|
+
|
1756
|
+
# make directory for jobscripts stdout/err stream files:
|
1757
|
+
self.std_path.mkdir(exist_ok=True)
|
1758
|
+
|
1759
|
+
with self.EAR_ID_file_path.open(mode="wt", newline="\n") as ID_fp:
|
1760
|
+
for block in self.blocks:
|
1761
|
+
block.write_EAR_ID_file(ID_fp)
|
1287
1762
|
|
1288
|
-
run_dirs = self.make_artifact_dirs()
|
1289
|
-
self.write_EAR_ID_file()
|
1290
|
-
self.write_element_run_dir_file(run_dirs)
|
1291
1763
|
js_path = self.shell.prepare_JS_path(self.write_jobscript(deps=deps))
|
1292
1764
|
submit_cmd = self.scheduler.get_submit_command(self.shell, js_path, deps)
|
1293
1765
|
self._app.submission_logger.info(
|
1294
1766
|
f"submitting jobscript {self.index!r} with command: {submit_cmd!r}"
|
1295
1767
|
)
|
1296
|
-
self._set_submit_cmdline(submit_cmd)
|
1297
|
-
self._set_submit_hostname(socket.gethostname())
|
1298
|
-
self._set_submit_machine(self._app.config.get("machine"))
|
1299
1768
|
|
1300
1769
|
err_args: JobscriptSubmissionFailureArgs = {
|
1301
1770
|
"submit_cmd": submit_cmd,
|
@@ -1305,23 +1774,23 @@ class Jobscript(JSONLike):
|
|
1305
1774
|
job_ID: str | None = None
|
1306
1775
|
process_ID: int | None = None
|
1307
1776
|
try:
|
1308
|
-
if
|
1777
|
+
if isinstance(self.scheduler, QueuedScheduler):
|
1309
1778
|
# scheduled submission, wait for submission so we can parse the job ID:
|
1310
1779
|
stdout, stderr = self._launch_queued(submit_cmd, print_stdout)
|
1311
1780
|
err_args["stdout"] = stdout
|
1312
1781
|
err_args["stderr"] = stderr
|
1313
1782
|
else:
|
1314
1783
|
if os.name == "nt":
|
1315
|
-
process_ID = self._launch_direct_js_win()
|
1784
|
+
process_ID = self._launch_direct_js_win(submit_cmd)
|
1316
1785
|
else:
|
1317
|
-
process_ID = self._launch_direct_js_posix()
|
1786
|
+
process_ID = self._launch_direct_js_posix(submit_cmd)
|
1318
1787
|
except Exception as subprocess_exc:
|
1319
1788
|
err_args["subprocess_exc"] = subprocess_exc
|
1320
1789
|
raise JobscriptSubmissionFailure(
|
1321
1790
|
"Failed to execute submit command.", **err_args
|
1322
1791
|
)
|
1323
1792
|
|
1324
|
-
if
|
1793
|
+
if isinstance(self.scheduler, QueuedScheduler):
|
1325
1794
|
# scheduled submission
|
1326
1795
|
if stderr:
|
1327
1796
|
raise JobscriptSubmissionFailure(
|
@@ -1348,13 +1817,15 @@ class Jobscript(JSONLike):
|
|
1348
1817
|
# direct submission
|
1349
1818
|
assert process_ID is not None
|
1350
1819
|
self._set_process_ID(process_ID)
|
1351
|
-
|
1352
|
-
# means this jobscript's process ID must be committed:
|
1353
|
-
self.workflow._store._pending.commit_all()
|
1354
|
-
ref = f"{process_ID}"
|
1820
|
+
ref = str(process_ID)
|
1355
1821
|
|
1822
|
+
self._set_submit_cmdline(submit_cmd)
|
1356
1823
|
self._set_submit_time(current_timestamp())
|
1357
1824
|
|
1825
|
+
# a downstream direct jobscript might need to wait for this jobscript, which
|
1826
|
+
# means this jobscript's process ID must be committed:
|
1827
|
+
self.workflow._store._pending.commit_all()
|
1828
|
+
|
1358
1829
|
return ref
|
1359
1830
|
|
1360
1831
|
@property
|
@@ -1363,41 +1834,35 @@ class Jobscript(JSONLike):
|
|
1363
1834
|
return self.index in self.submission.submitted_jobscripts
|
1364
1835
|
|
1365
1836
|
@property
|
1366
|
-
def scheduler_js_ref(self):
|
1837
|
+
def scheduler_js_ref(self) -> str | None | tuple[int | None, list[str] | None]:
|
1367
1838
|
"""
|
1368
1839
|
The reference to the submitted job for the jobscript.
|
1369
1840
|
"""
|
1370
|
-
if
|
1841
|
+
if isinstance(self.scheduler, QueuedScheduler):
|
1371
1842
|
return self.scheduler_job_ID
|
1372
1843
|
else:
|
1373
1844
|
return (self.process_ID, self.submit_cmdline)
|
1374
1845
|
|
1375
|
-
@property
|
1376
|
-
def scheduler_ref(self) -> SchedulerRef:
|
1377
|
-
"""
|
1378
|
-
The generalised scheduler reference descriptor.
|
1379
|
-
"""
|
1380
|
-
return {"js_refs": [self.scheduler_js_ref], "num_js_elements": self.num_elements}
|
1381
|
-
|
1382
1846
|
@overload
|
1383
1847
|
def get_active_states(
|
1384
1848
|
self, as_json: Literal[False] = False
|
1385
|
-
) -> Mapping[int, JobscriptElementState]:
|
1849
|
+
) -> Mapping[int, Mapping[int, JobscriptElementState]]:
|
1386
1850
|
...
|
1387
1851
|
|
1388
1852
|
@overload
|
1389
|
-
def get_active_states(
|
1853
|
+
def get_active_states(
|
1854
|
+
self, as_json: Literal[True]
|
1855
|
+
) -> Mapping[int, Mapping[int, str]]:
|
1390
1856
|
...
|
1391
1857
|
|
1392
1858
|
@TimeIt.decorator
|
1393
1859
|
def get_active_states(
|
1394
1860
|
self, as_json: bool = False
|
1395
|
-
) -> Mapping[int, JobscriptElementState
|
1861
|
+
) -> Mapping[int, Mapping[int, JobscriptElementState | str]]:
|
1396
1862
|
"""If this jobscript is active on this machine, return the state information from
|
1397
1863
|
the scheduler."""
|
1398
|
-
|
1399
|
-
out:
|
1400
|
-
|
1864
|
+
# this returns: {BLOCK_IDX: {JS_ELEMENT_IDX: STATE}}
|
1865
|
+
out: Mapping[int, Mapping[int, JobscriptElementState]] = {}
|
1401
1866
|
if self.is_submitted:
|
1402
1867
|
self._app.submission_logger.debug(
|
1403
1868
|
"checking if the jobscript is running according to EAR submission "
|
@@ -1407,7 +1872,7 @@ class Jobscript(JSONLike):
|
|
1407
1872
|
not_run_states = EARStatus.get_non_running_submitted_states()
|
1408
1873
|
all_EAR_states = set(ear.status for ear in self.all_EARs)
|
1409
1874
|
self._app.submission_logger.debug(
|
1410
|
-
f"Unique EAR states are: {all_EAR_states!r}"
|
1875
|
+
f"Unique EAR states are: {tuple(i.name for i in all_EAR_states)!r}"
|
1411
1876
|
)
|
1412
1877
|
if all_EAR_states.issubset(not_run_states):
|
1413
1878
|
self._app.submission_logger.debug(
|
@@ -1419,29 +1884,483 @@ class Jobscript(JSONLike):
|
|
1419
1884
|
"Checking if jobscript is running according to the scheduler/process "
|
1420
1885
|
"ID."
|
1421
1886
|
)
|
1422
|
-
out_d = self.scheduler.get_job_state_info(
|
1887
|
+
out_d = self.scheduler.get_job_state_info(js_refs=[self.scheduler_js_ref])
|
1423
1888
|
if out_d:
|
1424
|
-
|
1425
|
-
|
1426
|
-
|
1427
|
-
|
1428
|
-
|
1889
|
+
# remove scheduler ref (should be only one):
|
1890
|
+
assert len(out_d) == 1
|
1891
|
+
out_i = nth_value(cast("dict", out_d), 0)
|
1892
|
+
|
1893
|
+
if self.is_array:
|
1894
|
+
# out_i is a dict keyed by array index; there will be exactly one
|
1895
|
+
# block:
|
1896
|
+
out = {0: out_i}
|
1429
1897
|
else:
|
1430
|
-
|
1898
|
+
# out_i is a single state:
|
1899
|
+
out = {
|
1900
|
+
idx: {i: out_i for i in range(block.num_elements)}
|
1901
|
+
for idx, block in enumerate(self.blocks)
|
1902
|
+
}
|
1431
1903
|
|
1432
1904
|
else:
|
1433
1905
|
raise NotSubmitMachineError()
|
1434
1906
|
|
1435
1907
|
self._app.submission_logger.info(f"Jobscript is {'in' if not out else ''}active.")
|
1436
1908
|
if as_json:
|
1437
|
-
return {
|
1909
|
+
return {
|
1910
|
+
block_idx: {k: v.name for k, v in block_data.items()}
|
1911
|
+
for block_idx, block_data in out.items()
|
1912
|
+
}
|
1438
1913
|
return out
|
1439
1914
|
|
1440
|
-
def
|
1915
|
+
def compose_combined_script(
|
1916
|
+
self, action_scripts: list[list[tuple[str, Path, bool]]]
|
1917
|
+
) -> tuple[str, list[list[int]], list[int], list[int]]:
|
1441
1918
|
"""
|
1442
|
-
|
1919
|
+
Prepare the combined-script file string, if applicable.
|
1443
1920
|
"""
|
1444
|
-
|
1445
|
-
|
1921
|
+
|
1922
|
+
# use an index array for action scripts:
|
1923
|
+
script_names: list[str] = []
|
1924
|
+
requires_dir: list[bool] = []
|
1925
|
+
script_data: dict[str, tuple[int, Path]] = {}
|
1926
|
+
script_indices: list[list[int]] = []
|
1927
|
+
for i in action_scripts:
|
1928
|
+
indices_i: list[int] = []
|
1929
|
+
for name_j, path_j, req_dir_i in i:
|
1930
|
+
if name_j in script_data:
|
1931
|
+
idx = script_data[name_j][0]
|
1932
|
+
else:
|
1933
|
+
idx = len(script_names)
|
1934
|
+
script_names.append(name_j)
|
1935
|
+
requires_dir.append(req_dir_i)
|
1936
|
+
script_data[name_j] = (idx, path_j)
|
1937
|
+
indices_i.append(idx)
|
1938
|
+
script_indices.append(indices_i)
|
1939
|
+
|
1940
|
+
if not self.resources.combine_scripts:
|
1941
|
+
raise TypeError(
|
1942
|
+
f"Jobscript {self.index} is not a `combine_scripts` jobscript."
|
1943
|
+
)
|
1944
|
+
|
1945
|
+
tab_indent = " "
|
1946
|
+
|
1947
|
+
script_funcs_lst: list[str] = []
|
1948
|
+
for act_name, (_, snip_path) in script_data.items():
|
1949
|
+
main_func_name = snip_path.stem
|
1950
|
+
with snip_path.open("rt") as fp:
|
1951
|
+
script_str = fp.read()
|
1952
|
+
script_funcs_lst.append(
|
1953
|
+
dedent(
|
1954
|
+
"""\
|
1955
|
+
def {act_name}(*args, **kwargs):
|
1956
|
+
{script_str}
|
1957
|
+
return {main_func_name}(*args, **kwargs)
|
1958
|
+
"""
|
1959
|
+
).format(
|
1960
|
+
act_name=act_name,
|
1961
|
+
script_str=indent(script_str, tab_indent),
|
1962
|
+
main_func_name=main_func_name,
|
1963
|
+
)
|
1964
|
+
)
|
1965
|
+
|
1966
|
+
app_caps = self._app.package_name.upper()
|
1967
|
+
if self.resources.write_app_logs:
|
1968
|
+
sub_log_path = f'os.environ["{app_caps}_LOG_PATH"]'
|
1969
|
+
else:
|
1970
|
+
sub_log_path = '""'
|
1971
|
+
|
1972
|
+
py_imports = dedent(
|
1973
|
+
"""\
|
1974
|
+
import os
|
1975
|
+
from collections import defaultdict
|
1976
|
+
from pathlib import Path
|
1977
|
+
import traceback
|
1978
|
+
import time
|
1979
|
+
from typing import Dict
|
1980
|
+
|
1981
|
+
import {app_module} as app
|
1982
|
+
|
1983
|
+
from hpcflow.sdk.core.errors import UnsetParameterDataErrorBase
|
1984
|
+
|
1985
|
+
log_path = {log_path}
|
1986
|
+
wk_path = os.getenv("{app_caps}_WK_PATH")
|
1987
|
+
"""
|
1988
|
+
).format(
|
1989
|
+
app_module=self._app.module,
|
1990
|
+
app_caps=app_caps,
|
1991
|
+
log_path=sub_log_path,
|
1992
|
+
)
|
1993
|
+
|
1994
|
+
py_main_block_workflow_load = dedent(
|
1995
|
+
"""\
|
1996
|
+
app.load_config(
|
1997
|
+
log_file_path=log_path,
|
1998
|
+
config_dir=r"{cfg_dir}",
|
1999
|
+
config_key=r"{cfg_invoc_key}",
|
2000
|
+
)
|
2001
|
+
wk = app.Workflow(wk_path)
|
2002
|
+
"""
|
2003
|
+
).format(
|
2004
|
+
cfg_dir=self._app.config.config_directory,
|
2005
|
+
cfg_invoc_key=self._app.config.config_key,
|
2006
|
+
app_caps=app_caps,
|
2007
|
+
)
|
2008
|
+
|
2009
|
+
func_invoc_lines = dedent(
|
2010
|
+
"""\
|
2011
|
+
import pprint
|
2012
|
+
if not run.action.is_OFP and run.action.script_data_out_has_direct:
|
2013
|
+
outputs = func(**func_kwargs)
|
2014
|
+
elif run.action.is_OFP:
|
2015
|
+
out_name = run.action.output_file_parsers[0].output.typ
|
2016
|
+
outputs = {out_name: func(**func_kwargs)}
|
2017
|
+
else:
|
2018
|
+
outputs = {}
|
2019
|
+
func(**func_kwargs)
|
2020
|
+
"""
|
2021
|
+
)
|
2022
|
+
|
2023
|
+
script_funcs = "\n".join(script_funcs_lst)
|
2024
|
+
script_names_str = "[" + ", ".join(f"{i}" for i in script_names) + "]"
|
2025
|
+
main = dedent(
|
2026
|
+
"""\
|
2027
|
+
{py_imports}
|
2028
|
+
|
2029
|
+
sub_std_path = Path(os.environ["{app_caps}_SUB_STD_DIR"], f"js_{js_idx}.txt")
|
2030
|
+
with app.redirect_std_to_file(sub_std_path):
|
2031
|
+
{py_main_block_workflow_load}
|
2032
|
+
|
2033
|
+
with open(os.environ["{app_caps}_RUN_ID_FILE"], mode="r") as fp:
|
2034
|
+
lns = fp.read().strip().split("\\n")
|
2035
|
+
run_IDs = [[int(i) for i in ln.split("{run_ID_delim}")] for ln in lns]
|
2036
|
+
|
2037
|
+
get_all_runs_tic = time.perf_counter()
|
2038
|
+
run_IDs_flat = [j for i in run_IDs for j in i]
|
2039
|
+
runs = wk.get_EARs_from_IDs(run_IDs_flat, as_dict=True)
|
2040
|
+
run_skips : Dict[int, bool] = {{k: v.skip for k, v in runs.items()}}
|
2041
|
+
get_all_runs_toc = time.perf_counter()
|
2042
|
+
|
2043
|
+
with open(os.environ["{app_caps}_SCRIPT_INDICES_FILE"], mode="r") as fp:
|
2044
|
+
lns = fp.read().strip().split("\\n")
|
2045
|
+
section_idx = -1
|
2046
|
+
script_indices = []
|
2047
|
+
for ln in lns:
|
2048
|
+
if ln.startswith("#"):
|
2049
|
+
section_idx += 1
|
2050
|
+
continue
|
2051
|
+
ln_parsed = [int(i) for i in ln.split("{script_idx_delim}")]
|
2052
|
+
if section_idx == 0:
|
2053
|
+
num_elements = ln_parsed
|
2054
|
+
elif section_idx == 1:
|
2055
|
+
num_actions = ln_parsed
|
2056
|
+
else:
|
2057
|
+
script_indices.append(ln_parsed)
|
2058
|
+
|
2059
|
+
port = int(os.environ["{app_caps}_RUN_PORT"])
|
2060
|
+
action_scripts = {script_names}
|
2061
|
+
requires_dir = {requires_dir!r}
|
2062
|
+
run_dirs = wk.get_run_directories()
|
2063
|
+
|
2064
|
+
get_ins_time_fp = open(f"js_{js_idx}_get_inputs_times.txt", "wt")
|
2065
|
+
func_time_fp = open(f"js_{js_idx}_func_times.txt", "wt")
|
2066
|
+
run_time_fp = open(f"js_{js_idx}_run_times.txt", "wt")
|
2067
|
+
set_start_multi_times_fp = open(f"js_{js_idx}_set_start_multi_times.txt", "wt")
|
2068
|
+
set_end_multi_times_fp = open(f"js_{js_idx}_set_end_multi_times.txt", "wt")
|
2069
|
+
save_multi_times_fp = open(f"js_{js_idx}_save_multi_times.txt", "wt")
|
2070
|
+
loop_term_times_fp = open(f"js_{js_idx}_loop_term_times.txt", "wt")
|
2071
|
+
|
2072
|
+
get_all_runs_time = get_all_runs_toc - get_all_runs_tic
|
2073
|
+
print(f"get_all_runs_time: {{get_all_runs_time:.4f}}")
|
2074
|
+
|
2075
|
+
app.logger.info(
|
2076
|
+
f"running {num_blocks} jobscript block(s) in combined jobscript index "
|
2077
|
+
f"{js_idx}."
|
2078
|
+
)
|
2079
|
+
|
2080
|
+
block_start_elem_idx = 0
|
2081
|
+
for block_idx in range({num_blocks}):
|
2082
|
+
|
2083
|
+
app.logger.info(f"running block index {{block_idx}}.")
|
2084
|
+
|
2085
|
+
os.environ["{app_caps}_BLOCK_IDX"] = str(block_idx)
|
2086
|
+
|
2087
|
+
block_run_IDs = [
|
2088
|
+
run_IDs[block_start_elem_idx + i]
|
2089
|
+
for i in range(num_elements[block_idx])
|
2090
|
+
]
|
2091
|
+
|
2092
|
+
for block_act_idx in range(num_actions[block_idx]):
|
2093
|
+
|
2094
|
+
app.logger.info(
|
2095
|
+
f"running block action index {{block_act_idx}} "
|
2096
|
+
f"(in block {{block_idx}})."
|
2097
|
+
)
|
2098
|
+
|
2099
|
+
os.environ["{app_caps}_BLOCK_ACT_IDX"] = str(block_act_idx)
|
2100
|
+
|
2101
|
+
block_act_run_IDs = [i[block_act_idx] for i in block_run_IDs]
|
2102
|
+
|
2103
|
+
block_act_std_path = Path(
|
2104
|
+
os.environ["{app_caps}_SUB_STD_DIR"],
|
2105
|
+
f"js_{js_idx}_blk_{{block_idx}}_blk_act_{{block_act_idx}}.txt",
|
2106
|
+
)
|
2107
|
+
with app.redirect_std_to_file(block_act_std_path):
|
2108
|
+
# set run starts for all runs of the block/action:
|
2109
|
+
block_act_run_dirs = [run_dirs[i] for i in block_act_run_IDs]
|
2110
|
+
block_act_runs = [runs[i] for i in block_act_run_IDs]
|
2111
|
+
|
2112
|
+
block_act_run_IDs_non_skipped = []
|
2113
|
+
block_act_run_dirs_non_skipped = []
|
2114
|
+
for i, j in zip(block_act_run_IDs, block_act_run_dirs):
|
2115
|
+
if not run_skips[i]:
|
2116
|
+
block_act_run_IDs_non_skipped.append(i)
|
2117
|
+
block_act_run_dirs_non_skipped.append(j)
|
2118
|
+
|
2119
|
+
if block_act_run_IDs_non_skipped:
|
2120
|
+
set_start_multi_tic = time.perf_counter()
|
2121
|
+
app.logger.info("setting run starts.")
|
2122
|
+
wk.set_multi_run_starts(block_act_run_IDs_non_skipped, block_act_run_dirs_non_skipped, port)
|
2123
|
+
app.logger.info("finished setting run starts.")
|
2124
|
+
set_start_multi_toc = time.perf_counter()
|
2125
|
+
set_start_multi_time = set_start_multi_toc - set_start_multi_tic
|
2126
|
+
print(f"{{set_start_multi_time:.4f}}", file=set_start_multi_times_fp, flush=True)
|
2127
|
+
|
2128
|
+
all_act_outputs = {{}}
|
2129
|
+
run_end_dat = defaultdict(list)
|
2130
|
+
block_act_key=({js_idx}, block_idx, block_act_idx)
|
2131
|
+
|
2132
|
+
for block_elem_idx in range(num_elements[block_idx]):
|
2133
|
+
|
2134
|
+
js_elem_idx = block_start_elem_idx + block_elem_idx
|
2135
|
+
run_ID = block_act_run_IDs[block_elem_idx]
|
2136
|
+
|
2137
|
+
app.logger.info(
|
2138
|
+
f"run_ID is {{run_ID}}; block element index: {{block_elem_idx}}; "
|
2139
|
+
f"block action index: {{block_act_idx}}; in block {{block_idx}}."
|
2140
|
+
)
|
2141
|
+
|
2142
|
+
if run_ID == -1:
|
2143
|
+
continue
|
2144
|
+
|
2145
|
+
run = runs[run_ID]
|
2146
|
+
|
2147
|
+
skip = run_skips[run_ID]
|
2148
|
+
if skip:
|
2149
|
+
app.logger.info(f"run_ID: {{run_ID}}; run is set to skip; skipping.")
|
2150
|
+
# set run end
|
2151
|
+
run_end_dat[block_act_key].append((run, {skipped_exit_code}, None))
|
2152
|
+
continue
|
2153
|
+
|
2154
|
+
run_tic = time.perf_counter()
|
2155
|
+
|
2156
|
+
os.environ["{app_caps}_BLOCK_ELEM_IDX"] = str(block_elem_idx)
|
2157
|
+
os.environ["{app_caps}_JS_ELEM_IDX"] = str(js_elem_idx)
|
2158
|
+
os.environ["{app_caps}_RUN_ID"] = str(run_ID)
|
2159
|
+
|
2160
|
+
std_path = Path(os.environ["{app_caps}_SUB_STD_DIR"], f"{{run_ID}}.txt")
|
2161
|
+
with app.redirect_std_to_file(std_path):
|
2162
|
+
|
2163
|
+
if {write_app_logs!r}:
|
2164
|
+
new_log_path = Path(
|
2165
|
+
os.environ["{app_caps}_SUB_LOG_DIR"],
|
2166
|
+
f"{run_log_name}",
|
2167
|
+
)
|
2168
|
+
# TODO: this doesn't work!
|
2169
|
+
app.logger.info(
|
2170
|
+
f"run_ID: {{run_ID}}; moving log path to {{new_log_path}}"
|
2171
|
+
)
|
2172
|
+
app.config.log_path = new_log_path
|
2173
|
+
|
2174
|
+
run_dir = run_dirs[run_ID]
|
2175
|
+
|
2176
|
+
script_idx = script_indices[block_idx][block_act_idx]
|
2177
|
+
req_dir = requires_dir[script_idx]
|
2178
|
+
if req_dir:
|
2179
|
+
app.logger.info(f"run_ID: {{run_ID}}; changing to run directory: {{run_dir}}")
|
2180
|
+
os.chdir(run_dir)
|
2181
|
+
|
2182
|
+
# retrieve script inputs:
|
2183
|
+
app.logger.info(f"run_ID: {{run_ID}}; retrieving script inputs.")
|
2184
|
+
get_ins_tic = time.perf_counter()
|
2185
|
+
try:
|
2186
|
+
with run.raise_on_failure_threshold() as unset_params:
|
2187
|
+
app.logger.info(f"run_ID: {{run_ID}}; writing script input files.")
|
2188
|
+
run.write_script_input_files(block_act_key)
|
2189
|
+
|
2190
|
+
app.logger.info(f"run_ID: {{run_ID}}; retrieving funcion kwargs.")
|
2191
|
+
func_kwargs = run.get_py_script_func_kwargs(
|
2192
|
+
raise_on_unset=False,
|
2193
|
+
add_script_files=True,
|
2194
|
+
blk_act_key=block_act_key,
|
2195
|
+
)
|
2196
|
+
app.logger.info(
|
2197
|
+
f"run_ID: {{run_ID}}; script inputs have keys: "
|
2198
|
+
f"{{tuple(func_kwargs.keys())!r}}."
|
2199
|
+
)
|
2200
|
+
except UnsetParameterDataErrorBase:
|
2201
|
+
# not all required parameter data is set, so fail this run:
|
2202
|
+
exit_code = 1
|
2203
|
+
run_end_dat[block_act_key].append((run, exit_code, None))
|
2204
|
+
app.logger.info(
|
2205
|
+
f"run_ID: {{run_ID}}; some parameter data is unset, "
|
2206
|
+
f"so cannot run; setting exit code to 1."
|
2207
|
+
)
|
2208
|
+
continue # don't run the function
|
2209
|
+
|
2210
|
+
get_ins_toc = time.perf_counter()
|
2211
|
+
|
2212
|
+
func = action_scripts[script_idx]
|
2213
|
+
app.logger.info(f"run_ID: {{run_ID}}; function to run is: {{func.__name__}}")
|
2214
|
+
|
2215
|
+
|
2216
|
+
try:
|
2217
|
+
func_tic = time.perf_counter()
|
2218
|
+
app.logger.info(f"run_ID: {{run_ID}}; invoking function.")
|
2219
|
+
{func_invoc_lines}
|
2220
|
+
|
2221
|
+
except Exception:
|
2222
|
+
print(f"Exception caught during execution of script function {{func.__name__}}.")
|
2223
|
+
traceback.print_exc()
|
2224
|
+
exit_code = 1
|
2225
|
+
outputs = {{}}
|
2226
|
+
else:
|
2227
|
+
app.logger.info(f"run_ID: {{run_ID}}; finished function invocation.")
|
2228
|
+
exit_code = 0
|
2229
|
+
finally:
|
2230
|
+
func_toc = time.perf_counter()
|
2231
|
+
|
2232
|
+
with app.redirect_std_to_file(std_path):
|
2233
|
+
# set run end
|
2234
|
+
block_act_key=({js_idx}, block_idx, block_act_idx)
|
2235
|
+
run_end_dat[block_act_key].append((run, exit_code, run_dir))
|
2236
|
+
|
2237
|
+
# store outputs to save at end:
|
2238
|
+
app.logger.info(f"run_ID: {{run_ID}}; setting outputs to save.")
|
2239
|
+
for name_i, out_i in outputs.items():
|
2240
|
+
p_id = run.data_idx[f"outputs.{{name_i}}"]
|
2241
|
+
all_act_outputs[p_id] = out_i
|
2242
|
+
app.logger.info(f"run_ID: {{run_ID}}; finished setting outputs to save.")
|
2243
|
+
|
2244
|
+
if req_dir:
|
2245
|
+
app.logger.info(f"run_ID: {{run_ID}}; changing directory back")
|
2246
|
+
os.chdir(os.environ["{app_caps}_SUB_TMP_DIR"])
|
2247
|
+
|
2248
|
+
if {write_app_logs!r}:
|
2249
|
+
app.logger.info(f"run_ID: {{run_ID}}; moving log path back to " + {sub_log_path!r})
|
2250
|
+
app.config.log_path = {sub_log_path}
|
2251
|
+
|
2252
|
+
run_toc = time.perf_counter()
|
2253
|
+
|
2254
|
+
get_ins_time = get_ins_toc - get_ins_tic
|
2255
|
+
func_time = func_toc - func_tic
|
2256
|
+
run_time = run_toc - run_tic
|
2257
|
+
|
2258
|
+
print(f"{{get_ins_time:.4f}}", file=get_ins_time_fp)
|
2259
|
+
print(f"{{func_time:.4f}}", file=func_time_fp)
|
2260
|
+
print(f"{{run_time:.4f}}", file=run_time_fp)
|
2261
|
+
|
2262
|
+
with app.redirect_std_to_file(block_act_std_path):
|
2263
|
+
|
2264
|
+
if all_act_outputs:
|
2265
|
+
# save outputs of all elements of this action
|
2266
|
+
save_all_tic = time.perf_counter()
|
2267
|
+
app.logger.info(
|
2268
|
+
f"saving outputs of block action index {{block_act_idx}} "
|
2269
|
+
f"in block {{block_idx}}."
|
2270
|
+
)
|
2271
|
+
wk.set_parameter_values(all_act_outputs)
|
2272
|
+
app.logger.info(
|
2273
|
+
f"finished saving outputs of block action index {{block_act_idx}} "
|
2274
|
+
f"in block {{block_idx}}."
|
2275
|
+
)
|
2276
|
+
save_all_toc = time.perf_counter()
|
2277
|
+
save_all_time_i = save_all_toc - save_all_tic
|
2278
|
+
print(f"{{save_all_time_i:.4f}}", file=save_multi_times_fp, flush=True)
|
2279
|
+
|
2280
|
+
all_loop_term_tic = time.perf_counter()
|
2281
|
+
app.logger.info(f"run_ID: {{run_ID}}; checking for loop terminations")
|
2282
|
+
for run_i in block_act_runs:
|
2283
|
+
if not run_skips[run_i.id_]:
|
2284
|
+
skipped_IDs_i = wk._check_loop_termination(run_i)
|
2285
|
+
for skip_ID in skipped_IDs_i:
|
2286
|
+
run_skips[skip_ID] = 2 # SkipReason.LOOP_TERMINATION
|
2287
|
+
if skip_ID in runs:
|
2288
|
+
runs[skip_ID]._skip = 2 # mutates runs within `run_end_dat`
|
2289
|
+
app.logger.info(f"run_ID: {{run_ID}}; finished checking for loop terminations.")
|
2290
|
+
|
2291
|
+
all_loop_term_toc = time.perf_counter()
|
2292
|
+
all_loop_term_time_i = all_loop_term_toc - all_loop_term_tic
|
2293
|
+
print(f"{{all_loop_term_time_i:.4f}}", file=loop_term_times_fp, flush=True)
|
2294
|
+
|
2295
|
+
# set run end for all elements of this action
|
2296
|
+
app.logger.info(f"run_ID: {{run_ID}}; setting run ends.")
|
2297
|
+
set_multi_end_tic = time.perf_counter()
|
2298
|
+
wk.set_multi_run_ends(run_end_dat)
|
2299
|
+
set_multi_end_toc = time.perf_counter()
|
2300
|
+
set_multi_end_time = set_multi_end_toc - set_multi_end_tic
|
2301
|
+
app.logger.info(f"run_ID: {{run_ID}}; finished setting run ends.")
|
2302
|
+
print(f"{{set_multi_end_time:.4f}}", file=set_end_multi_times_fp, flush=True)
|
2303
|
+
|
2304
|
+
block_start_elem_idx += num_elements[block_idx]
|
2305
|
+
|
2306
|
+
get_ins_time_fp.close()
|
2307
|
+
func_time_fp.close()
|
2308
|
+
run_time_fp.close()
|
2309
|
+
set_start_multi_times_fp.close()
|
2310
|
+
set_end_multi_times_fp.close()
|
2311
|
+
save_multi_times_fp.close()
|
2312
|
+
loop_term_times_fp.close()
|
2313
|
+
"""
|
2314
|
+
).format(
|
2315
|
+
py_imports=py_imports,
|
2316
|
+
py_main_block_workflow_load=indent(py_main_block_workflow_load, tab_indent),
|
2317
|
+
app_caps=self._app.package_name.upper(),
|
2318
|
+
script_idx_delim=",", # TODO
|
2319
|
+
script_names=script_names_str,
|
2320
|
+
requires_dir=requires_dir,
|
2321
|
+
num_blocks=len(self.blocks),
|
2322
|
+
run_ID_delim=self._EAR_files_delimiter,
|
2323
|
+
run_log_name=self.submission.get_app_log_file_name(run_ID="{run_ID}"),
|
2324
|
+
js_idx=self.index,
|
2325
|
+
write_app_logs=self.resources.write_app_logs,
|
2326
|
+
sub_log_path=sub_log_path,
|
2327
|
+
skipped_exit_code=SKIPPED_EXIT_CODE,
|
2328
|
+
func_invoc_lines=indent(func_invoc_lines, tab_indent * 4),
|
2329
|
+
)
|
2330
|
+
|
2331
|
+
script = dedent(
|
2332
|
+
"""\
|
2333
|
+
{script_funcs}
|
2334
|
+
if __name__ == "__main__":
|
2335
|
+
{main}
|
2336
|
+
"""
|
2337
|
+
).format(script_funcs=script_funcs, main=indent(main, tab_indent))
|
2338
|
+
|
2339
|
+
num_elems = [i.num_elements for i in self.blocks]
|
2340
|
+
num_acts = [len(i) for i in action_scripts]
|
2341
|
+
|
2342
|
+
return script, script_indices, num_elems, num_acts
|
2343
|
+
|
2344
|
+
def write_script_indices_file(
|
2345
|
+
self, indices: list[list[int]], num_elems: list[int], num_acts: list[int]
|
2346
|
+
) -> None:
|
2347
|
+
"""
|
2348
|
+
Write a text file containing the action script index for each block and action
|
2349
|
+
in a `combined_scripts` script.
|
2350
|
+
"""
|
2351
|
+
delim = "," # TODO: refactor?
|
2352
|
+
with self.combined_script_indices_file_path.open("wt") as fp:
|
2353
|
+
fp.write("# number of elements per block:\n")
|
2354
|
+
fp.write(delim.join(str(i) for i in num_elems) + "\n")
|
2355
|
+
fp.write("# number of actions per block:\n")
|
2356
|
+
fp.write(delim.join(str(i) for i in num_acts) + "\n")
|
2357
|
+
fp.write("# script indices:\n")
|
2358
|
+
for block in indices:
|
2359
|
+
fp.write(delim.join(str(i) for i in block) + "\n")
|
2360
|
+
|
2361
|
+
def get_app_std_path(self) -> Path:
|
2362
|
+
std_dir = self.submission.get_app_std_path(
|
2363
|
+
self.workflow.submissions_path,
|
2364
|
+
self.submission.index,
|
1446
2365
|
)
|
1447
|
-
self.
|
2366
|
+
return std_dir / f"js_{self.index}.txt" # TODO: refactor
|