hpcflow-new2 0.2.0a189__py3-none-any.whl → 0.2.0a199__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hpcflow/__pyinstaller/hook-hpcflow.py +9 -6
- hpcflow/_version.py +1 -1
- hpcflow/app.py +1 -0
- hpcflow/data/scripts/bad_script.py +2 -0
- hpcflow/data/scripts/do_nothing.py +2 -0
- hpcflow/data/scripts/env_specifier_test/input_file_generator_pass_env_spec.py +4 -0
- hpcflow/data/scripts/env_specifier_test/main_script_test_pass_env_spec.py +8 -0
- hpcflow/data/scripts/env_specifier_test/output_file_parser_pass_env_spec.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v1/input_file_generator_basic.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v1/main_script_test_direct_in_direct_out.py +7 -0
- hpcflow/data/scripts/env_specifier_test/v1/output_file_parser_basic.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v2/main_script_test_direct_in_direct_out.py +7 -0
- hpcflow/data/scripts/input_file_generator_basic.py +3 -0
- hpcflow/data/scripts/input_file_generator_basic_FAIL.py +3 -0
- hpcflow/data/scripts/input_file_generator_test_stdout_stderr.py +8 -0
- hpcflow/data/scripts/main_script_test_direct_in.py +3 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed_group.py +7 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_group_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_group_one_fail_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_hdf5_in_obj.py +1 -1
- hpcflow/data/scripts/main_script_test_hdf5_in_obj_2.py +12 -0
- hpcflow/data/scripts/main_script_test_hdf5_out_obj.py +1 -1
- hpcflow/data/scripts/main_script_test_json_out_FAIL.py +3 -0
- hpcflow/data/scripts/main_script_test_shell_env_vars.py +12 -0
- hpcflow/data/scripts/main_script_test_std_out_std_err.py +6 -0
- hpcflow/data/scripts/output_file_parser_basic.py +3 -0
- hpcflow/data/scripts/output_file_parser_basic_FAIL.py +7 -0
- hpcflow/data/scripts/output_file_parser_test_stdout_stderr.py +8 -0
- hpcflow/data/scripts/script_exit_test.py +5 -0
- hpcflow/data/template_components/environments.yaml +1 -1
- hpcflow/sdk/__init__.py +26 -15
- hpcflow/sdk/app.py +2192 -768
- hpcflow/sdk/cli.py +506 -296
- hpcflow/sdk/cli_common.py +105 -7
- hpcflow/sdk/config/__init__.py +1 -1
- hpcflow/sdk/config/callbacks.py +115 -43
- hpcflow/sdk/config/cli.py +126 -103
- hpcflow/sdk/config/config.py +674 -318
- hpcflow/sdk/config/config_file.py +131 -95
- hpcflow/sdk/config/errors.py +125 -84
- hpcflow/sdk/config/types.py +148 -0
- hpcflow/sdk/core/__init__.py +25 -1
- hpcflow/sdk/core/actions.py +1771 -1059
- hpcflow/sdk/core/app_aware.py +24 -0
- hpcflow/sdk/core/cache.py +139 -79
- hpcflow/sdk/core/command_files.py +263 -287
- hpcflow/sdk/core/commands.py +145 -112
- hpcflow/sdk/core/element.py +828 -535
- hpcflow/sdk/core/enums.py +192 -0
- hpcflow/sdk/core/environment.py +74 -93
- hpcflow/sdk/core/errors.py +455 -52
- hpcflow/sdk/core/execute.py +207 -0
- hpcflow/sdk/core/json_like.py +540 -272
- hpcflow/sdk/core/loop.py +751 -347
- hpcflow/sdk/core/loop_cache.py +164 -47
- hpcflow/sdk/core/object_list.py +370 -207
- hpcflow/sdk/core/parameters.py +1100 -627
- hpcflow/sdk/core/rule.py +59 -41
- hpcflow/sdk/core/run_dir_files.py +21 -37
- hpcflow/sdk/core/skip_reason.py +7 -0
- hpcflow/sdk/core/task.py +1649 -1339
- hpcflow/sdk/core/task_schema.py +308 -196
- hpcflow/sdk/core/test_utils.py +191 -114
- hpcflow/sdk/core/types.py +440 -0
- hpcflow/sdk/core/utils.py +485 -309
- hpcflow/sdk/core/validation.py +82 -9
- hpcflow/sdk/core/workflow.py +2544 -1178
- hpcflow/sdk/core/zarr_io.py +98 -137
- hpcflow/sdk/data/workflow_spec_schema.yaml +2 -0
- hpcflow/sdk/demo/cli.py +53 -33
- hpcflow/sdk/helper/cli.py +18 -15
- hpcflow/sdk/helper/helper.py +75 -63
- hpcflow/sdk/helper/watcher.py +61 -28
- hpcflow/sdk/log.py +122 -71
- hpcflow/sdk/persistence/__init__.py +8 -31
- hpcflow/sdk/persistence/base.py +1360 -606
- hpcflow/sdk/persistence/defaults.py +6 -0
- hpcflow/sdk/persistence/discovery.py +38 -0
- hpcflow/sdk/persistence/json.py +568 -188
- hpcflow/sdk/persistence/pending.py +382 -179
- hpcflow/sdk/persistence/store_resource.py +39 -23
- hpcflow/sdk/persistence/types.py +318 -0
- hpcflow/sdk/persistence/utils.py +14 -11
- hpcflow/sdk/persistence/zarr.py +1337 -433
- hpcflow/sdk/runtime.py +44 -41
- hpcflow/sdk/submission/{jobscript_info.py → enums.py} +39 -12
- hpcflow/sdk/submission/jobscript.py +1651 -692
- hpcflow/sdk/submission/schedulers/__init__.py +167 -39
- hpcflow/sdk/submission/schedulers/direct.py +121 -81
- hpcflow/sdk/submission/schedulers/sge.py +170 -129
- hpcflow/sdk/submission/schedulers/slurm.py +291 -268
- hpcflow/sdk/submission/schedulers/utils.py +12 -2
- hpcflow/sdk/submission/shells/__init__.py +14 -15
- hpcflow/sdk/submission/shells/base.py +150 -29
- hpcflow/sdk/submission/shells/bash.py +283 -173
- hpcflow/sdk/submission/shells/os_version.py +31 -30
- hpcflow/sdk/submission/shells/powershell.py +228 -170
- hpcflow/sdk/submission/submission.py +1014 -335
- hpcflow/sdk/submission/types.py +140 -0
- hpcflow/sdk/typing.py +182 -12
- hpcflow/sdk/utils/arrays.py +71 -0
- hpcflow/sdk/utils/deferred_file.py +55 -0
- hpcflow/sdk/utils/hashing.py +16 -0
- hpcflow/sdk/utils/patches.py +12 -0
- hpcflow/sdk/utils/strings.py +33 -0
- hpcflow/tests/api/test_api.py +32 -0
- hpcflow/tests/conftest.py +27 -6
- hpcflow/tests/data/multi_path_sequences.yaml +29 -0
- hpcflow/tests/data/workflow_test_run_abort.yaml +34 -35
- hpcflow/tests/schedulers/sge/test_sge_submission.py +36 -0
- hpcflow/tests/schedulers/slurm/test_slurm_submission.py +5 -2
- hpcflow/tests/scripts/test_input_file_generators.py +282 -0
- hpcflow/tests/scripts/test_main_scripts.py +866 -85
- hpcflow/tests/scripts/test_non_snippet_script.py +46 -0
- hpcflow/tests/scripts/test_ouput_file_parsers.py +353 -0
- hpcflow/tests/shells/wsl/test_wsl_submission.py +12 -4
- hpcflow/tests/unit/test_action.py +262 -75
- hpcflow/tests/unit/test_action_rule.py +9 -4
- hpcflow/tests/unit/test_app.py +33 -6
- hpcflow/tests/unit/test_cache.py +46 -0
- hpcflow/tests/unit/test_cli.py +134 -1
- hpcflow/tests/unit/test_command.py +71 -54
- hpcflow/tests/unit/test_config.py +142 -16
- hpcflow/tests/unit/test_config_file.py +21 -18
- hpcflow/tests/unit/test_element.py +58 -62
- hpcflow/tests/unit/test_element_iteration.py +50 -1
- hpcflow/tests/unit/test_element_set.py +29 -19
- hpcflow/tests/unit/test_group.py +4 -2
- hpcflow/tests/unit/test_input_source.py +116 -93
- hpcflow/tests/unit/test_input_value.py +29 -24
- hpcflow/tests/unit/test_jobscript_unit.py +757 -0
- hpcflow/tests/unit/test_json_like.py +44 -35
- hpcflow/tests/unit/test_loop.py +1396 -84
- hpcflow/tests/unit/test_meta_task.py +325 -0
- hpcflow/tests/unit/test_multi_path_sequences.py +229 -0
- hpcflow/tests/unit/test_object_list.py +17 -12
- hpcflow/tests/unit/test_parameter.py +29 -7
- hpcflow/tests/unit/test_persistence.py +237 -42
- hpcflow/tests/unit/test_resources.py +20 -18
- hpcflow/tests/unit/test_run.py +117 -6
- hpcflow/tests/unit/test_run_directories.py +29 -0
- hpcflow/tests/unit/test_runtime.py +2 -1
- hpcflow/tests/unit/test_schema_input.py +23 -15
- hpcflow/tests/unit/test_shell.py +23 -2
- hpcflow/tests/unit/test_slurm.py +8 -7
- hpcflow/tests/unit/test_submission.py +38 -89
- hpcflow/tests/unit/test_task.py +352 -247
- hpcflow/tests/unit/test_task_schema.py +33 -20
- hpcflow/tests/unit/test_utils.py +9 -11
- hpcflow/tests/unit/test_value_sequence.py +15 -12
- hpcflow/tests/unit/test_workflow.py +114 -83
- hpcflow/tests/unit/test_workflow_template.py +0 -1
- hpcflow/tests/unit/utils/test_arrays.py +40 -0
- hpcflow/tests/unit/utils/test_deferred_file_writer.py +34 -0
- hpcflow/tests/unit/utils/test_hashing.py +65 -0
- hpcflow/tests/unit/utils/test_patches.py +5 -0
- hpcflow/tests/unit/utils/test_redirect_std.py +50 -0
- hpcflow/tests/workflows/__init__.py +0 -0
- hpcflow/tests/workflows/test_directory_structure.py +31 -0
- hpcflow/tests/workflows/test_jobscript.py +334 -1
- hpcflow/tests/workflows/test_run_status.py +198 -0
- hpcflow/tests/workflows/test_skip_downstream.py +696 -0
- hpcflow/tests/workflows/test_submission.py +140 -0
- hpcflow/tests/workflows/test_workflows.py +160 -15
- hpcflow/tests/workflows/test_zip.py +18 -0
- hpcflow/viz_demo.ipynb +6587 -3
- {hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/METADATA +8 -4
- hpcflow_new2-0.2.0a199.dist-info/RECORD +221 -0
- hpcflow/sdk/core/parallel.py +0 -21
- hpcflow_new2-0.2.0a189.dist-info/RECORD +0 -158
- {hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/LICENSE +0 -0
- {hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/WHEEL +0 -0
- {hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/entry_points.txt +0 -0
@@ -3,58 +3,116 @@ Model of information submitted to a scheduler.
|
|
3
3
|
"""
|
4
4
|
|
5
5
|
from __future__ import annotations
|
6
|
-
import
|
6
|
+
from collections import defaultdict
|
7
7
|
|
8
|
-
from datetime import datetime, timezone
|
9
8
|
import os
|
10
|
-
from pathlib import Path
|
11
9
|
import shutil
|
12
10
|
import socket
|
13
11
|
import subprocess
|
14
|
-
from textwrap import indent
|
15
|
-
from typing import
|
12
|
+
from textwrap import dedent, indent
|
13
|
+
from typing import TextIO, cast, overload, TYPE_CHECKING
|
14
|
+
from typing_extensions import override
|
16
15
|
|
17
16
|
import numpy as np
|
18
|
-
from
|
19
|
-
from hpcflow.sdk import
|
20
|
-
from hpcflow.sdk.core.
|
21
|
-
|
22
|
-
|
17
|
+
from hpcflow.sdk.core import SKIPPED_EXIT_CODE
|
18
|
+
from hpcflow.sdk.core.enums import EARStatus
|
19
|
+
from hpcflow.sdk.core.errors import (
|
20
|
+
JobscriptSubmissionFailure,
|
21
|
+
NotSubmitMachineError,
|
22
|
+
)
|
23
|
+
|
24
|
+
from hpcflow.sdk.typing import hydrate
|
23
25
|
from hpcflow.sdk.core.json_like import ChildObjectSpec, JSONLike
|
26
|
+
from hpcflow.sdk.core.utils import nth_value, parse_timestamp, current_timestamp
|
24
27
|
from hpcflow.sdk.log import TimeIt
|
25
|
-
from hpcflow.sdk.submission.
|
26
|
-
from hpcflow.sdk.submission.schedulers import
|
27
|
-
from hpcflow.sdk.submission.shells import get_shell
|
28
|
+
from hpcflow.sdk.submission.schedulers import QueuedScheduler
|
29
|
+
from hpcflow.sdk.submission.schedulers.direct import DirectScheduler
|
30
|
+
from hpcflow.sdk.submission.shells import get_shell, DEFAULT_SHELL_NAMES
|
31
|
+
|
32
|
+
if TYPE_CHECKING:
|
33
|
+
from collections.abc import Iterable, Iterator, Mapping, Sequence
|
34
|
+
from datetime import datetime
|
35
|
+
from pathlib import Path
|
36
|
+
from typing import Any, ClassVar, Literal
|
37
|
+
from typing_extensions import TypeIs
|
38
|
+
from numpy.typing import NDArray, ArrayLike
|
39
|
+
from ..core.actions import ElementActionRun
|
40
|
+
from ..core.element import ElementResources
|
41
|
+
from ..core.loop_cache import LoopIndex
|
42
|
+
from ..core.types import JobscriptSubmissionFailureArgs, BlockActionKey
|
43
|
+
from ..core.workflow import WorkflowTask, Workflow
|
44
|
+
from ..persistence.base import PersistentStore
|
45
|
+
from .submission import Submission
|
46
|
+
from .shells.base import Shell
|
47
|
+
from .schedulers import Scheduler
|
48
|
+
from .enums import JobscriptElementState
|
49
|
+
from .types import (
|
50
|
+
JobScriptCreationArguments,
|
51
|
+
JobScriptDescriptor,
|
52
|
+
ResolvedJobscriptBlockDependencies,
|
53
|
+
SchedulerRef,
|
54
|
+
VersionInfo,
|
55
|
+
)
|
56
|
+
from ..core.cache import ObjectCache
|
57
|
+
from hpcflow.sdk.submission.submission import JOBSCRIPT_SUBMIT_TIME_KEYS
|
58
|
+
|
59
|
+
|
60
|
+
def is_jobscript_array(
|
61
|
+
resources: ElementResources, num_elements: int, store: PersistentStore
|
62
|
+
) -> bool:
|
63
|
+
"""Return True if a job array should be used for the specified `ElementResources`."""
|
64
|
+
if resources.scheduler in ("direct", "direct_posix"):
|
65
|
+
if resources.use_job_array:
|
66
|
+
raise ValueError(
|
67
|
+
f"`use_job_array` not supported by scheduler: {resources.scheduler!r}"
|
68
|
+
)
|
69
|
+
return False
|
70
|
+
|
71
|
+
if resources.combine_scripts:
|
72
|
+
return False
|
73
|
+
|
74
|
+
run_parallelism = store._features.EAR_parallelism
|
75
|
+
if resources.use_job_array is None:
|
76
|
+
if num_elements > 1 and run_parallelism:
|
77
|
+
return True
|
78
|
+
else:
|
79
|
+
return False
|
80
|
+
else:
|
81
|
+
if resources.use_job_array and not run_parallelism:
|
82
|
+
raise ValueError(
|
83
|
+
f"Store type {store!r} does not support element parallelism, so jobs "
|
84
|
+
f"cannot be submitted as scheduler arrays."
|
85
|
+
)
|
86
|
+
return resources.use_job_array
|
28
87
|
|
29
88
|
|
30
89
|
@TimeIt.decorator
|
31
90
|
def generate_EAR_resource_map(
|
32
|
-
task:
|
33
|
-
loop_idx:
|
34
|
-
|
91
|
+
task: WorkflowTask,
|
92
|
+
loop_idx: LoopIndex[str, int],
|
93
|
+
cache: ObjectCache,
|
94
|
+
) -> tuple[Sequence[ElementResources], Sequence[int], NDArray, NDArray]:
|
35
95
|
"""
|
36
96
|
Generate an integer array whose rows represent actions and columns represent task
|
37
97
|
elements and whose values index unique resources.
|
38
98
|
"""
|
39
|
-
# TODO: assume single iteration for now; later we will loop over Loop tasks for each
|
40
|
-
# included task and call this func with specific loop indices
|
41
99
|
none_val = -1
|
42
|
-
resources = []
|
43
|
-
resource_hashes = []
|
100
|
+
resources: list[ElementResources] = []
|
101
|
+
resource_hashes: list[int] = []
|
44
102
|
|
45
103
|
arr_shape = (task.num_actions, task.num_elements)
|
46
104
|
resource_map = np.empty(arr_shape, dtype=int)
|
47
105
|
EAR_ID_map = np.empty(arr_shape, dtype=int)
|
48
|
-
# EAR_idx_map = np.empty(
|
49
|
-
# shape=arr_shape,
|
50
|
-
# dtype=[("EAR_idx", np.int32), ("run_idx", np.int32), ("iteration_idx", np.int32)],
|
51
|
-
# )
|
52
106
|
resource_map[:] = none_val
|
53
107
|
EAR_ID_map[:] = none_val
|
54
|
-
# EAR_idx_map[:] = (none_val, none_val, none_val) # TODO: add iteration_idx as well
|
55
108
|
|
56
|
-
|
57
|
-
|
109
|
+
assert cache.elements is not None
|
110
|
+
assert cache.iterations is not None
|
111
|
+
|
112
|
+
for elem_id in task.element_IDs:
|
113
|
+
element = cache.elements[elem_id]
|
114
|
+
for iter_ID_i in element.iteration_IDs:
|
115
|
+
iter_i = cache.iterations[iter_ID_i]
|
58
116
|
if iter_i.loop_idx != loop_idx:
|
59
117
|
continue
|
60
118
|
if iter_i.EARs_initialised: # not strictly needed (actions will be empty)
|
@@ -86,52 +144,53 @@ def generate_EAR_resource_map(
|
|
86
144
|
|
87
145
|
@TimeIt.decorator
|
88
146
|
def group_resource_map_into_jobscripts(
|
89
|
-
resource_map:
|
147
|
+
resource_map: ArrayLike,
|
90
148
|
none_val: Any = -1,
|
91
|
-
):
|
149
|
+
) -> tuple[list[JobScriptDescriptor], NDArray]:
|
92
150
|
"""
|
93
151
|
Convert a resource map into a plan for what elements to group together into jobscripts.
|
94
152
|
"""
|
95
|
-
|
96
|
-
resource_idx = np.unique(
|
97
|
-
jobscripts = []
|
98
|
-
allocated = np.zeros_like(
|
99
|
-
js_map = np.ones_like(
|
100
|
-
nones_bool =
|
153
|
+
resource_map_ = np.asanyarray(resource_map)
|
154
|
+
resource_idx = np.unique(resource_map_)
|
155
|
+
jobscripts: list[JobScriptDescriptor] = []
|
156
|
+
allocated = np.zeros_like(resource_map_)
|
157
|
+
js_map = np.ones_like(resource_map_, dtype=float) * np.nan
|
158
|
+
nones_bool: NDArray = resource_map_ == none_val
|
101
159
|
stop = False
|
102
|
-
for act_idx in range(
|
160
|
+
for act_idx in range(resource_map_.shape[0]):
|
103
161
|
for res_i in resource_idx:
|
104
162
|
if res_i == none_val:
|
105
163
|
continue
|
106
164
|
|
107
|
-
if res_i not in
|
165
|
+
if res_i not in resource_map_[act_idx]:
|
108
166
|
continue
|
109
167
|
|
110
|
-
|
111
|
-
diff = np.cumsum(np.abs(np.diff(
|
168
|
+
resource_map_[nones_bool] = res_i
|
169
|
+
diff = np.cumsum(np.abs(np.diff(resource_map_[act_idx:], axis=0)), axis=0)
|
112
170
|
|
113
171
|
elem_bool = np.logical_and(
|
114
|
-
|
172
|
+
resource_map_[act_idx] == res_i, allocated[act_idx] == False
|
115
173
|
)
|
116
174
|
elem_idx = np.where(elem_bool)[0]
|
117
175
|
act_elem_bool = np.logical_and(elem_bool, nones_bool[act_idx] == False)
|
118
|
-
act_elem_idx = np.where(act_elem_bool)
|
176
|
+
act_elem_idx: tuple[NDArray, ...] = np.where(act_elem_bool)
|
119
177
|
|
120
178
|
# add elements from downstream actions:
|
121
179
|
ds_bool = np.logical_and(
|
122
180
|
diff[:, elem_idx] == 0,
|
123
181
|
nones_bool[act_idx + 1 :, elem_idx] == False,
|
124
182
|
)
|
183
|
+
ds_act_idx: NDArray
|
184
|
+
ds_elem_idx: NDArray
|
125
185
|
ds_act_idx, ds_elem_idx = np.where(ds_bool)
|
126
186
|
ds_act_idx += act_idx + 1
|
127
187
|
ds_elem_idx = elem_idx[ds_elem_idx]
|
128
188
|
|
129
|
-
EARs_by_elem
|
189
|
+
EARs_by_elem: dict[int, list[int]] = {
|
190
|
+
k.item(): [act_idx] for k in act_elem_idx[0]
|
191
|
+
}
|
130
192
|
for ds_a, ds_e in zip(ds_act_idx, ds_elem_idx):
|
131
|
-
|
132
|
-
if ds_e_item not in EARs_by_elem:
|
133
|
-
EARs_by_elem[ds_e_item] = []
|
134
|
-
EARs_by_elem[ds_e_item].append(ds_a.item())
|
193
|
+
EARs_by_elem.setdefault(ds_e.item(), []).append(ds_a.item())
|
135
194
|
|
136
195
|
EARs = np.vstack([np.ones_like(act_elem_idx) * act_idx, act_elem_idx])
|
137
196
|
EARs = np.hstack([EARs, np.array([ds_act_idx, ds_elem_idx])])
|
@@ -139,7 +198,7 @@ def group_resource_map_into_jobscripts(
|
|
139
198
|
if not EARs.size:
|
140
199
|
continue
|
141
200
|
|
142
|
-
js = {
|
201
|
+
js: JobScriptDescriptor = {
|
143
202
|
"resources": res_i,
|
144
203
|
"elements": dict(sorted(EARs_by_elem.items(), key=lambda x: x[0])),
|
145
204
|
}
|
@@ -154,18 +213,21 @@ def group_resource_map_into_jobscripts(
|
|
154
213
|
if stop:
|
155
214
|
break
|
156
215
|
|
157
|
-
|
216
|
+
resource_map_[nones_bool] = none_val
|
158
217
|
|
159
218
|
return jobscripts, js_map
|
160
219
|
|
161
220
|
|
162
221
|
@TimeIt.decorator
|
163
|
-
def resolve_jobscript_dependencies(
|
222
|
+
def resolve_jobscript_dependencies(
|
223
|
+
jobscripts: Mapping[int, JobScriptCreationArguments],
|
224
|
+
element_deps: Mapping[int, Mapping[int, Sequence[int]]],
|
225
|
+
) -> Mapping[int, dict[int, ResolvedJobscriptBlockDependencies]]:
|
164
226
|
"""
|
165
227
|
Discover concrete dependencies between jobscripts.
|
166
228
|
"""
|
167
229
|
# first pass is to find the mappings between jobscript elements:
|
168
|
-
jobscript_deps = {}
|
230
|
+
jobscript_deps: dict[int, dict[int, ResolvedJobscriptBlockDependencies]] = {}
|
169
231
|
for js_idx, elem_deps in element_deps.items():
|
170
232
|
# keys of new dict are other jobscript indices on which this jobscript (js_idx)
|
171
233
|
# depends:
|
@@ -182,16 +244,12 @@ def resolve_jobscript_dependencies(jobscripts, element_deps):
|
|
182
244
|
if js_k_idx not in jobscript_deps[js_idx]:
|
183
245
|
jobscript_deps[js_idx][js_k_idx] = {"js_element_mapping": {}}
|
184
246
|
|
185
|
-
|
186
|
-
js_elem_idx_i
|
187
|
-
|
188
|
-
):
|
189
|
-
jobscript_deps[js_idx][js_k_idx]["js_element_mapping"][
|
190
|
-
js_elem_idx_i
|
191
|
-
] = []
|
247
|
+
jobscript_deps[js_idx][js_k_idx]["js_element_mapping"].setdefault(
|
248
|
+
js_elem_idx_i, []
|
249
|
+
)
|
192
250
|
|
193
251
|
# retrieve column index, which is the JS-element index:
|
194
|
-
js_elem_idx_k = np.where(
|
252
|
+
js_elem_idx_k: int = np.where(
|
195
253
|
np.any(js_k["EAR_ID"] == EAR_dep_j, axis=0)
|
196
254
|
)[0][0].item()
|
197
255
|
|
@@ -215,16 +273,16 @@ def resolve_jobscript_dependencies(jobscripts, element_deps):
|
|
215
273
|
js_i_num_js_elements = jobscripts[js_i_idx]["EAR_ID"].shape[1]
|
216
274
|
js_k_num_js_elements = jobscripts[js_k_idx]["EAR_ID"].shape[1]
|
217
275
|
|
218
|
-
is_all_i_elems = list(
|
219
|
-
|
220
|
-
)
|
276
|
+
is_all_i_elems = sorted(set(deps_j["js_element_mapping"])) == list(
|
277
|
+
range(js_i_num_js_elements)
|
278
|
+
)
|
221
279
|
|
222
280
|
is_all_k_single = set(
|
223
281
|
len(i) for i in deps_j["js_element_mapping"].values()
|
224
282
|
) == {1}
|
225
283
|
|
226
|
-
is_all_k_elems =
|
227
|
-
|
284
|
+
is_all_k_elems = sorted(
|
285
|
+
i[0] for i in deps_j["js_element_mapping"].values()
|
228
286
|
) == list(range(js_k_num_js_elements))
|
229
287
|
|
230
288
|
is_arr = is_all_i_elems and is_all_k_single and is_all_k_elems
|
@@ -233,8 +291,23 @@ def resolve_jobscript_dependencies(jobscripts, element_deps):
|
|
233
291
|
return jobscript_deps
|
234
292
|
|
235
293
|
|
294
|
+
def _reindex_dependencies(
|
295
|
+
jobscripts: Mapping[int, JobScriptCreationArguments],
|
296
|
+
from_idx: int,
|
297
|
+
to_idx: int,
|
298
|
+
):
|
299
|
+
for ds_js_idx, ds_js in jobscripts.items():
|
300
|
+
if ds_js_idx <= from_idx:
|
301
|
+
continue
|
302
|
+
deps = ds_js["dependencies"]
|
303
|
+
if from_idx in deps:
|
304
|
+
deps[to_idx] = deps.pop(from_idx)
|
305
|
+
|
306
|
+
|
236
307
|
@TimeIt.decorator
|
237
|
-
def merge_jobscripts_across_tasks(
|
308
|
+
def merge_jobscripts_across_tasks(
|
309
|
+
jobscripts: Mapping[int, JobScriptCreationArguments],
|
310
|
+
) -> Mapping[int, JobScriptCreationArguments]:
|
238
311
|
"""Try to merge jobscripts between tasks.
|
239
312
|
|
240
313
|
This is possible if two jobscripts share the same resources and have an array
|
@@ -242,12 +315,32 @@ def merge_jobscripts_across_tasks(jobscripts: Dict) -> Dict:
|
|
242
315
|
|
243
316
|
"""
|
244
317
|
|
318
|
+
# The set of IDs of dicts that we've merged, allowing us to not keep that info in
|
319
|
+
# the dicts themselves.
|
320
|
+
merged: set[int] = set()
|
321
|
+
|
245
322
|
for js_idx, js in jobscripts.items():
|
246
|
-
|
247
|
-
|
248
|
-
|
323
|
+
if not js["dependencies"]:
|
324
|
+
continue
|
325
|
+
|
326
|
+
closest_idx = cast("int", max(js["dependencies"]))
|
327
|
+
closest_js = jobscripts[closest_idx]
|
328
|
+
other_deps = {k: v for k, v in js["dependencies"].items() if k != closest_idx}
|
329
|
+
|
330
|
+
# if all `other_deps` are also found within `closest_js`'s dependencies, then we
|
331
|
+
# can merge `js` into `closest_js`:
|
332
|
+
merge = True
|
333
|
+
for dep_idx, dep_i in other_deps.items():
|
334
|
+
try:
|
335
|
+
if closest_js["dependencies"][dep_idx] != dep_i:
|
336
|
+
merge = False
|
337
|
+
except KeyError:
|
338
|
+
merge = False
|
339
|
+
|
340
|
+
if merge:
|
341
|
+
js_j = closest_js # the jobscript we are merging `js` into
|
342
|
+
js_j_idx = closest_idx
|
249
343
|
dep_info = js["dependencies"][js_j_idx]
|
250
|
-
js_j = jobscripts[js_j_idx] # the jobscript we are merging `js` into
|
251
344
|
|
252
345
|
# can only merge if resources are the same and is array dependency:
|
253
346
|
if js["resource_hash"] == js_j["resource_hash"] and dep_info["is_array"]:
|
@@ -259,11 +352,7 @@ def merge_jobscripts_across_tasks(jobscripts: Dict) -> Dict:
|
|
259
352
|
js_j["task_insert_IDs"].append(js["task_insert_IDs"][0])
|
260
353
|
js_j["task_loop_idx"].append(js["task_loop_idx"][0])
|
261
354
|
|
262
|
-
add_acts = []
|
263
|
-
for t_act in js["task_actions"]:
|
264
|
-
t_act = copy.copy(t_act)
|
265
|
-
t_act[2] += num_loop_idx
|
266
|
-
add_acts.append(t_act)
|
355
|
+
add_acts = [(a, b, num_loop_idx) for a, b, _ in js["task_actions"]]
|
267
356
|
|
268
357
|
js_j["task_actions"].extend(add_acts)
|
269
358
|
for k, v in js["task_elements"].items():
|
@@ -273,46 +362,374 @@ def merge_jobscripts_across_tasks(jobscripts: Dict) -> Dict:
|
|
273
362
|
js_j["EAR_ID"] = np.vstack((js_j["EAR_ID"], js["EAR_ID"]))
|
274
363
|
|
275
364
|
# mark this js as defunct
|
276
|
-
js
|
365
|
+
merged.add(id(js))
|
277
366
|
|
278
367
|
# update dependencies of any downstream jobscripts that refer to this js
|
279
|
-
|
280
|
-
if ds_js_idx <= js_idx:
|
281
|
-
continue
|
282
|
-
for dep_k_js_idx in list(ds_js["dependencies"].keys()):
|
283
|
-
if dep_k_js_idx == js_idx:
|
284
|
-
jobscripts[ds_js_idx]["dependencies"][js_j_idx] = ds_js[
|
285
|
-
"dependencies"
|
286
|
-
].pop(dep_k_js_idx)
|
368
|
+
_reindex_dependencies(jobscripts, js_idx, js_j_idx)
|
287
369
|
|
288
370
|
# remove is_merged jobscripts:
|
289
|
-
|
290
|
-
|
291
|
-
return jobscripts
|
371
|
+
return {k: v for k, v in jobscripts.items() if id(v) not in merged}
|
292
372
|
|
293
373
|
|
294
374
|
@TimeIt.decorator
|
295
|
-
def
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
375
|
+
def resolve_jobscript_blocks(
|
376
|
+
jobscripts: Mapping[int, JobScriptCreationArguments],
|
377
|
+
) -> list[dict[str, Any]]:
|
378
|
+
"""For contiguous, dependent, non-array jobscripts with identical resource
|
379
|
+
requirements, combine into multi-block jobscripts.
|
380
|
+
|
381
|
+
Parameters
|
382
|
+
----------
|
383
|
+
jobscripts
|
384
|
+
Dict whose values must be dicts with keys "is_array", "resource_hash" and
|
385
|
+
"dependencies".
|
386
|
+
run_parallelism
|
387
|
+
True if the store supports run parallelism
|
388
|
+
|
389
|
+
"""
|
390
|
+
js_new: list[
|
391
|
+
list[JobScriptCreationArguments]
|
392
|
+
] = [] # TODO: not the same type, e.g. dependencies have tuple keys,
|
393
|
+
new_idx: dict[
|
394
|
+
int, tuple[int, int]
|
395
|
+
] = {} # track new positions by new jobscript index and block index
|
396
|
+
new_idx_inv: dict[int, list[int]] = defaultdict(list)
|
397
|
+
prev_hash = None
|
398
|
+
blocks: list[JobScriptCreationArguments] = []
|
399
|
+
js_deps_rec: dict[int, set[int]] = {} # recursive
|
400
|
+
for js_idx, js_i in jobscripts.items():
|
401
|
+
|
402
|
+
cur_js_idx = len(js_new)
|
403
|
+
new_deps_js_j = {
|
404
|
+
new_idx[i][0] for i in cast("Sequence[int]", js_i["dependencies"])
|
405
|
+
}
|
406
|
+
new_deps_js_j_rec = [
|
407
|
+
k for i in new_deps_js_j for j in new_idx_inv[i] for k in js_deps_rec[j]
|
408
|
+
]
|
409
|
+
|
410
|
+
js_deps_rec[js_idx] = new_deps_js_j.union(new_deps_js_j_rec)
|
411
|
+
|
412
|
+
# recursive dependencies of js_i (which we're looking to merge), excluding the
|
413
|
+
# dependency on the current jobscript:
|
414
|
+
js_j_deps_rec_no_cur = js_deps_rec[js_idx] - set([cur_js_idx])
|
415
|
+
|
416
|
+
# recursive dependencies of the current jobscript:
|
417
|
+
cur_deps_rec = {
|
418
|
+
j for i in new_idx_inv[cur_js_idx] for j in js_deps_rec[i] if j != cur_js_idx
|
419
|
+
}
|
420
|
+
|
421
|
+
# can we mege js_i into the current jobscript, as far as dependencies are
|
422
|
+
# concerned?
|
423
|
+
deps_mergable = cur_js_idx in new_deps_js_j
|
424
|
+
if deps_mergable and js_j_deps_rec_no_cur:
|
425
|
+
deps_mergable = js_j_deps_rec_no_cur == cur_deps_rec
|
426
|
+
|
427
|
+
if js_i["is_array"]:
|
428
|
+
# array jobs cannot be merged into the same jobscript
|
429
|
+
|
430
|
+
# append existing block:
|
431
|
+
if blocks:
|
432
|
+
js_new.append(blocks)
|
433
|
+
prev_hash = None
|
434
|
+
blocks = []
|
435
|
+
|
436
|
+
new_idx[js_idx] = (len(js_new), 0)
|
437
|
+
new_idx_inv[len(js_new)].append(js_idx)
|
438
|
+
js_new.append([js_i])
|
439
|
+
continue
|
440
|
+
|
441
|
+
if js_idx == 0 or prev_hash is None:
|
442
|
+
# (note: zeroth index will always exist)
|
443
|
+
|
444
|
+
# start a new block:
|
445
|
+
blocks.append(js_i)
|
446
|
+
new_idx[js_idx] = (len(js_new), len(blocks) - 1)
|
447
|
+
new_idx_inv[len(js_new)].append(js_idx)
|
448
|
+
|
449
|
+
# set resource hash to compare with the next jobscript
|
450
|
+
prev_hash = js_i["resource_hash"]
|
451
|
+
|
452
|
+
elif js_i["resource_hash"] == prev_hash and deps_mergable:
|
453
|
+
# merge with previous jobscript by adding another block
|
454
|
+
# only merge if this jobscript's dependencies include the current jobscript,
|
455
|
+
# and any other dependencies are included in the current jobscript's
|
456
|
+
# dependencies
|
457
|
+
blocks.append(js_i)
|
458
|
+
new_idx[js_idx] = (len(js_new), len(blocks) - 1)
|
459
|
+
new_idx_inv[len(js_new)].append(js_idx)
|
460
|
+
|
461
|
+
else:
|
462
|
+
# cannot merge, append the new jobscript data:
|
463
|
+
js_new.append(blocks)
|
464
|
+
|
465
|
+
# start a new block:
|
466
|
+
blocks = [js_i]
|
467
|
+
new_idx[js_idx] = (len(js_new), len(blocks) - 1)
|
468
|
+
new_idx_inv[len(js_new)].append(js_idx)
|
469
|
+
|
470
|
+
# set resource hash to compare with the next jobscript
|
471
|
+
prev_hash = js_i["resource_hash"]
|
472
|
+
|
473
|
+
# append remaining blocks:
|
474
|
+
if blocks:
|
475
|
+
js_new.append(blocks)
|
476
|
+
prev_hash = None
|
477
|
+
blocks = []
|
478
|
+
|
479
|
+
# re-index dependencies:
|
480
|
+
js_new_: list[dict[str, Any]] = []
|
481
|
+
for js_i_idx, js_new_i in enumerate(js_new):
|
482
|
+
|
483
|
+
resources = None
|
484
|
+
is_array = None
|
485
|
+
for block_j in js_new_i:
|
486
|
+
for k, v in new_idx.items():
|
487
|
+
dep_data = block_j["dependencies"].pop(k, None)
|
488
|
+
if dep_data:
|
489
|
+
block_j["dependencies"][v] = dep_data
|
490
|
+
|
491
|
+
del block_j["resource_hash"]
|
492
|
+
resources = block_j.pop("resources", None)
|
493
|
+
is_array = block_j.pop("is_array")
|
494
|
+
|
495
|
+
js_new_.append(
|
496
|
+
{
|
497
|
+
"resources": resources,
|
498
|
+
"is_array": is_array,
|
499
|
+
"blocks": js_new[js_i_idx],
|
500
|
+
}
|
501
|
+
)
|
502
|
+
|
503
|
+
return js_new_
|
504
|
+
|
505
|
+
|
506
|
+
@hydrate
|
507
|
+
class JobscriptBlock(JSONLike):
|
508
|
+
"""A rectangular block of element-actions to run within a jobscript.
|
509
|
+
|
510
|
+
Parameters
|
511
|
+
----------
|
512
|
+
task_insert_IDs: list[int]
|
513
|
+
The task insertion IDs.
|
514
|
+
task_actions: list[tuple]
|
515
|
+
The actions of the tasks.
|
516
|
+
``task insert ID, action_idx, index into task_loop_idx`` for each ``JS_ACTION_IDX``
|
517
|
+
task_elements: dict[int, list[int]]
|
518
|
+
The elements of the tasks.
|
519
|
+
Maps ``JS_ELEMENT_IDX`` to list of ``TASK_ELEMENT_IDX`` for each ``TASK_INSERT_ID``
|
520
|
+
EAR_ID:
|
521
|
+
Element action run information.
|
522
|
+
task_loop_idx: list[dict]
|
523
|
+
Description of what loops are in play.
|
524
|
+
dependencies: dict[tuple[int, int], dict]
|
525
|
+
Description of dependencies. Keys are tuples of (jobscript index,
|
526
|
+
jobscript-block index) of the dependency.
|
527
|
+
index: int
|
528
|
+
The index of the block within the parent jobscript.
|
529
|
+
jobscript: ~hpcflow.app.Jobscript
|
530
|
+
The parent jobscript.
|
531
|
+
|
532
|
+
"""
|
533
|
+
|
534
|
+
def __init__(
|
535
|
+
self,
|
536
|
+
index: int,
|
537
|
+
task_insert_IDs: list[int],
|
538
|
+
task_loop_idx: list[dict[str, int]],
|
539
|
+
task_actions: list[tuple[int, int, int]] | None = None,
|
540
|
+
task_elements: dict[int, list[int]] | None = None,
|
541
|
+
EAR_ID: NDArray | None = None,
|
542
|
+
dependencies: (
|
543
|
+
dict[tuple[int, int], ResolvedJobscriptBlockDependencies] | None
|
544
|
+
) = None,
|
545
|
+
jobscript: Jobscript | None = None,
|
546
|
+
):
|
547
|
+
self.jobscript = jobscript
|
548
|
+
self._index = index
|
549
|
+
self._task_insert_IDs = task_insert_IDs
|
550
|
+
self._task_actions = task_actions
|
551
|
+
self._task_elements = task_elements
|
552
|
+
self._task_loop_idx = task_loop_idx
|
553
|
+
self._EAR_ID = EAR_ID
|
554
|
+
self._dependencies = dependencies
|
555
|
+
|
556
|
+
self._all_EARs = None # assigned on first access to `all_EARs` property
|
557
|
+
|
558
|
+
@property
|
559
|
+
def index(self) -> int:
|
560
|
+
return self._index
|
561
|
+
|
562
|
+
@property
|
563
|
+
def submission(self) -> Submission:
|
564
|
+
assert self.jobscript is not None
|
565
|
+
return self.jobscript.submission
|
566
|
+
|
567
|
+
@property
|
568
|
+
def task_insert_IDs(self) -> Sequence[int]:
|
569
|
+
"""
|
570
|
+
The insertion IDs of tasks in this jobscript-block.
|
571
|
+
"""
|
572
|
+
return self._task_insert_IDs
|
573
|
+
|
574
|
+
@property
|
575
|
+
@TimeIt.decorator
|
576
|
+
def task_actions(self) -> NDArray:
|
577
|
+
"""
|
578
|
+
The IDs of actions of each task in this jobscript-block.
|
579
|
+
"""
|
580
|
+
assert self.jobscript is not None
|
581
|
+
return self.workflow._store.get_jobscript_block_task_actions_array(
|
582
|
+
sub_idx=self.submission.index,
|
583
|
+
js_idx=self.jobscript.index,
|
584
|
+
blk_idx=self.index,
|
585
|
+
task_actions_arr=self._task_actions,
|
586
|
+
)
|
587
|
+
|
588
|
+
@property
|
589
|
+
@TimeIt.decorator
|
590
|
+
def task_elements(self) -> Mapping[int, Sequence[int]]:
|
591
|
+
"""
|
592
|
+
The IDs of elements of each task in this jobscript-block.
|
593
|
+
"""
|
594
|
+
assert self.jobscript is not None
|
595
|
+
return self.workflow._store.get_jobscript_block_task_elements_map(
|
596
|
+
sub_idx=self.submission.index,
|
597
|
+
js_idx=self.jobscript.index,
|
598
|
+
blk_idx=self.index,
|
599
|
+
task_elems_map=self._task_elements,
|
600
|
+
)
|
312
601
|
|
313
|
-
|
602
|
+
@property
|
603
|
+
@TimeIt.decorator
|
604
|
+
def EAR_ID(self) -> NDArray:
|
605
|
+
"""
|
606
|
+
The array of EAR IDs in this jobscript-block.
|
607
|
+
"""
|
608
|
+
assert self.jobscript is not None
|
609
|
+
return self.workflow._store.get_jobscript_block_run_ID_array(
|
610
|
+
sub_idx=self.submission.index,
|
611
|
+
js_idx=self.jobscript.index,
|
612
|
+
blk_idx=self.index,
|
613
|
+
run_ID_arr=self._EAR_ID,
|
614
|
+
)
|
314
615
|
|
616
|
+
@property
|
617
|
+
@TimeIt.decorator
|
618
|
+
def dependencies(
|
619
|
+
self,
|
620
|
+
) -> Mapping[tuple[int, int], ResolvedJobscriptBlockDependencies]:
|
621
|
+
"""
|
622
|
+
The dependency descriptor.
|
623
|
+
"""
|
624
|
+
assert self.jobscript is not None
|
625
|
+
return self.workflow._store.get_jobscript_block_dependencies(
|
626
|
+
sub_idx=self.submission.index,
|
627
|
+
js_idx=self.jobscript.index,
|
628
|
+
blk_idx=self.index,
|
629
|
+
js_dependencies=self._dependencies,
|
630
|
+
)
|
315
631
|
|
632
|
+
@property
|
633
|
+
def task_loop_idx(self) -> Sequence[Mapping[str, int]]:
|
634
|
+
"""
|
635
|
+
The description of where various task loops are.
|
636
|
+
"""
|
637
|
+
return self._task_loop_idx
|
638
|
+
|
639
|
+
@property
|
640
|
+
@TimeIt.decorator
|
641
|
+
def num_actions(self) -> int:
|
642
|
+
"""
|
643
|
+
The maximal number of actions in the jobscript-block.
|
644
|
+
"""
|
645
|
+
return self.EAR_ID.shape[0]
|
646
|
+
|
647
|
+
@property
|
648
|
+
@TimeIt.decorator
|
649
|
+
def num_elements(self) -> int:
|
650
|
+
"""
|
651
|
+
The maximal number of elements in the jobscript-block.
|
652
|
+
"""
|
653
|
+
return self.EAR_ID.shape[1]
|
654
|
+
|
655
|
+
@property
|
656
|
+
def workflow(self) -> Workflow:
|
657
|
+
"""
|
658
|
+
The associated workflow.
|
659
|
+
"""
|
660
|
+
assert self.jobscript is not None
|
661
|
+
return self.jobscript.workflow
|
662
|
+
|
663
|
+
@property
|
664
|
+
@TimeIt.decorator
|
665
|
+
def all_EARs(self) -> Sequence[ElementActionRun]:
|
666
|
+
"""
|
667
|
+
Description of EAR information for this jobscript-block.
|
668
|
+
"""
|
669
|
+
assert self.jobscript is not None
|
670
|
+
return [i for i in self.jobscript.all_EARs if i.id_ in self.EAR_ID]
|
671
|
+
|
672
|
+
@override
|
673
|
+
def _postprocess_to_dict(self, d: dict[str, Any]) -> dict[str, Any]:
|
674
|
+
dct = super()._postprocess_to_dict(d)
|
675
|
+
del dct["_all_EARs"]
|
676
|
+
dct["_dependencies"] = [[list(k), v] for k, v in self.dependencies.items()]
|
677
|
+
dct = {k.lstrip("_"): v for k, v in dct.items()}
|
678
|
+
dct["EAR_ID"] = cast("NDArray", dct["EAR_ID"]).tolist()
|
679
|
+
return dct
|
680
|
+
|
681
|
+
@classmethod
|
682
|
+
def from_json_like(cls, json_like, shared_data=None):
|
683
|
+
json_like["EAR_ID"] = (
|
684
|
+
np.array(json_like["EAR_ID"]) if json_like["EAR_ID"] is not None else None
|
685
|
+
)
|
686
|
+
if json_like["dependencies"] is not None:
|
687
|
+
# transform list to dict with tuple keys, and transform string keys in
|
688
|
+
# `js_element_mapping` to integers:
|
689
|
+
deps_processed = {}
|
690
|
+
for i in json_like["dependencies"]:
|
691
|
+
deps_processed_i = {
|
692
|
+
"js_element_mapping": {
|
693
|
+
int(k): v for k, v in i[1]["js_element_mapping"].items()
|
694
|
+
},
|
695
|
+
"is_array": i[1]["is_array"],
|
696
|
+
}
|
697
|
+
deps_processed[tuple(i[0])] = deps_processed_i
|
698
|
+
json_like["dependencies"] = deps_processed
|
699
|
+
|
700
|
+
return super().from_json_like(json_like, shared_data)
|
701
|
+
|
702
|
+
def _get_EARs_arr(self) -> NDArray:
|
703
|
+
"""
|
704
|
+
Get all associated EAR objects as a 2D array.
|
705
|
+
"""
|
706
|
+
return np.array(self.all_EARs).reshape(self.EAR_ID.shape)
|
707
|
+
|
708
|
+
def get_task_loop_idx_array(self) -> NDArray:
|
709
|
+
"""
|
710
|
+
Get an array of task loop indices.
|
711
|
+
"""
|
712
|
+
loop_idx = np.empty_like(self.EAR_ID)
|
713
|
+
loop_idx[:] = np.array([i[2] for i in self.task_actions]).reshape(
|
714
|
+
(len(self.task_actions), 1)
|
715
|
+
)
|
716
|
+
return loop_idx
|
717
|
+
|
718
|
+
@TimeIt.decorator
|
719
|
+
def write_EAR_ID_file(self, fp: TextIO):
|
720
|
+
"""Write a text file with `num_elements` lines and `num_actions` delimited tokens
|
721
|
+
per line, representing whether a given EAR must be executed."""
|
722
|
+
assert self.jobscript is not None
|
723
|
+
# can't specify "open" newline if we pass the file name only, so pass handle:
|
724
|
+
np.savetxt(
|
725
|
+
fname=fp,
|
726
|
+
X=(self.EAR_ID).T,
|
727
|
+
fmt="%.0f",
|
728
|
+
delimiter=self.jobscript._EAR_files_delimiter,
|
729
|
+
)
|
730
|
+
|
731
|
+
|
732
|
+
@hydrate
|
316
733
|
class Jobscript(JSONLike):
|
317
734
|
"""
|
318
735
|
A group of actions that are submitted together to be executed by the underlying job
|
@@ -348,7 +765,7 @@ class Jobscript(JSONLike):
|
|
348
765
|
The job ID from the scheduler, if known.
|
349
766
|
process_ID: int
|
350
767
|
The process ID of the subprocess, if known.
|
351
|
-
version_info:
|
768
|
+
version_info: dict[str, ...]
|
352
769
|
Version info about the target system.
|
353
770
|
os_name: str
|
354
771
|
The name of the OS.
|
@@ -360,363 +777,319 @@ class Jobscript(JSONLike):
|
|
360
777
|
Whether the jobscript is currently running.
|
361
778
|
"""
|
362
779
|
|
363
|
-
|
364
|
-
|
365
|
-
_workflow_app_alias = "wkflow_app"
|
780
|
+
_EAR_files_delimiter: ClassVar[str] = ":"
|
781
|
+
_workflow_app_alias: ClassVar[str] = "wkflow_app"
|
366
782
|
|
367
|
-
_child_objects = (
|
783
|
+
_child_objects: ClassVar[tuple[ChildObjectSpec, ...]] = (
|
368
784
|
ChildObjectSpec(
|
369
785
|
name="resources",
|
370
786
|
class_name="ElementResources",
|
371
787
|
),
|
788
|
+
ChildObjectSpec(
|
789
|
+
name="blocks",
|
790
|
+
class_name="JobscriptBlock",
|
791
|
+
is_multiple=True,
|
792
|
+
parent_ref="jobscript",
|
793
|
+
),
|
372
794
|
)
|
373
795
|
|
374
796
|
def __init__(
|
375
797
|
self,
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
scheduler_job_ID: Optional[str] = None,
|
388
|
-
process_ID: Optional[int] = None,
|
389
|
-
version_info: Optional[Tuple[str]] = None,
|
390
|
-
os_name: Optional[str] = None,
|
391
|
-
shell_name: Optional[str] = None,
|
392
|
-
scheduler_name: Optional[str] = None,
|
393
|
-
running: Optional[bool] = None,
|
798
|
+
index: int,
|
799
|
+
is_array: bool,
|
800
|
+
resources: ElementResources,
|
801
|
+
blocks: list[JobscriptBlock],
|
802
|
+
at_submit_metadata: dict[str, Any] | None = None,
|
803
|
+
submit_hostname: str | None = None,
|
804
|
+
submit_machine: str | None = None,
|
805
|
+
shell_idx: int | None = None,
|
806
|
+
version_info: VersionInfo | None = None,
|
807
|
+
resource_hash: str | None = None,
|
808
|
+
elements: dict[int, list[int]] | None = None,
|
394
809
|
):
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
810
|
+
if resource_hash is not None:
|
811
|
+
raise AttributeError("resource_hash must not be supplied")
|
812
|
+
if elements is not None:
|
813
|
+
raise AttributeError("elements must not be supplied")
|
814
|
+
|
815
|
+
if not isinstance(blocks[0], JobscriptBlock):
|
816
|
+
blocks = [
|
817
|
+
JobscriptBlock(**i, index=idx, jobscript=self)
|
818
|
+
for idx, i in enumerate(blocks)
|
819
|
+
]
|
820
|
+
|
821
|
+
self._index = index
|
822
|
+
self._blocks = blocks
|
823
|
+
self._at_submit_metadata = at_submit_metadata or {
|
824
|
+
k: None for k in JOBSCRIPT_SUBMIT_TIME_KEYS
|
825
|
+
}
|
826
|
+
self._is_array = is_array
|
405
827
|
self._resources = resources
|
406
|
-
self._dependencies = dependencies
|
407
828
|
|
408
829
|
# assigned on parent `Submission.submit` (or retrieved form persistent store):
|
409
|
-
self._submit_time = submit_time
|
410
830
|
self._submit_hostname = submit_hostname
|
411
831
|
self._submit_machine = submit_machine
|
412
|
-
self.
|
832
|
+
self._shell_idx = shell_idx
|
413
833
|
|
414
|
-
self._scheduler_job_ID = scheduler_job_ID
|
415
|
-
self._process_ID = process_ID
|
416
834
|
self._version_info = version_info
|
417
835
|
|
418
|
-
# assigned
|
419
|
-
|
420
|
-
|
421
|
-
self.
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
self.
|
426
|
-
|
427
|
-
self.
|
428
|
-
self._submit_time_obj = None # assigned on first access to `submit_time` property
|
429
|
-
self._running = None
|
430
|
-
self._all_EARs = None # assigned on first access to `all_EARs` property
|
836
|
+
# assigned by parent Submission
|
837
|
+
self._submission: Submission | None = None
|
838
|
+
# assigned on first access to `scheduler` property
|
839
|
+
self._scheduler_obj: Scheduler | None = None
|
840
|
+
# assigned on first access to `shell` property
|
841
|
+
self._shell_obj: Shell | None = None
|
842
|
+
# assigned on first access to `submit_time` property
|
843
|
+
self._submit_time_obj: datetime | None = None
|
844
|
+
# assigned on first access to `all_EARs` property
|
845
|
+
self._all_EARs: list[ElementActionRun] | None = None
|
431
846
|
|
432
|
-
|
847
|
+
self._set_parent_refs()
|
848
|
+
|
849
|
+
def __repr__(self) -> str:
|
433
850
|
return (
|
434
851
|
f"{self.__class__.__name__}("
|
435
852
|
f"index={self.index!r}, "
|
436
|
-
f"
|
853
|
+
f"blocks={self.blocks!r}, "
|
437
854
|
f"resources={self.resources!r}, "
|
438
|
-
f"dependencies={self.dependencies!r}"
|
439
855
|
f")"
|
440
856
|
)
|
441
857
|
|
442
|
-
|
443
|
-
|
444
|
-
|
858
|
+
@override
|
859
|
+
def _postprocess_to_dict(self, d: dict[str, Any]) -> dict[str, Any]:
|
860
|
+
dct = super()._postprocess_to_dict(d)
|
445
861
|
del dct["_scheduler_obj"]
|
446
862
|
del dct["_shell_obj"]
|
447
863
|
del dct["_submit_time_obj"]
|
448
864
|
del dct["_all_EARs"]
|
449
865
|
dct = {k.lstrip("_"): v for k, v in dct.items()}
|
450
|
-
dct["EAR_ID"] = dct["EAR_ID"].tolist()
|
451
866
|
return dct
|
452
867
|
|
453
868
|
@classmethod
|
454
869
|
def from_json_like(cls, json_like, shared_data=None):
|
455
|
-
json_like["EAR_ID"] = np.array(json_like["EAR_ID"])
|
456
870
|
return super().from_json_like(json_like, shared_data)
|
457
871
|
|
458
872
|
@property
|
459
|
-
def workflow_app_alias(self):
|
873
|
+
def workflow_app_alias(self) -> str:
|
460
874
|
"""
|
461
875
|
Alias for the workflow app in job scripts.
|
462
876
|
"""
|
463
|
-
return self.
|
877
|
+
return self.submission.WORKFLOW_APP_ALIAS
|
464
878
|
|
465
|
-
def get_commands_file_name(
|
879
|
+
def get_commands_file_name(
|
880
|
+
self, block_act_key: BlockActionKey, shell: Shell | None = None
|
881
|
+
) -> str:
|
466
882
|
"""
|
467
883
|
Get the name of a file containing commands for a particular jobscript action.
|
468
884
|
"""
|
469
|
-
return self.
|
470
|
-
|
471
|
-
js_action_idx=js_action_idx,
|
885
|
+
return self._app.RunDirAppFiles.get_commands_file_name(
|
886
|
+
block_act_key,
|
472
887
|
shell=shell or self.shell,
|
473
888
|
)
|
474
889
|
|
475
890
|
@property
|
476
|
-
def
|
477
|
-
|
478
|
-
The insertion IDs of tasks in this jobscript.
|
479
|
-
"""
|
480
|
-
return self._task_insert_IDs
|
481
|
-
|
482
|
-
@property
|
483
|
-
def task_actions(self):
|
484
|
-
"""
|
485
|
-
The IDs of actions of each task in this jobscript.
|
486
|
-
"""
|
487
|
-
return self._task_actions
|
488
|
-
|
489
|
-
@property
|
490
|
-
def task_elements(self):
|
491
|
-
"""
|
492
|
-
The IDs of elements of each task in this jobscript.
|
493
|
-
"""
|
494
|
-
return self._task_elements
|
891
|
+
def blocks(self) -> Sequence[JobscriptBlock]:
|
892
|
+
return self._blocks
|
495
893
|
|
496
894
|
@property
|
497
|
-
def
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
895
|
+
def at_submit_metadata(self) -> dict[str, Any]:
|
896
|
+
return self.workflow._store.get_jobscript_at_submit_metadata(
|
897
|
+
sub_idx=self.submission.index,
|
898
|
+
js_idx=self.index,
|
899
|
+
metadata_attr=self._at_submit_metadata,
|
900
|
+
)
|
502
901
|
|
503
902
|
@property
|
504
|
-
def all_EAR_IDs(self) ->
|
505
|
-
"""
|
506
|
-
|
507
|
-
|
508
|
-
return self.EAR_ID.flatten()
|
903
|
+
def all_EAR_IDs(self) -> NDArray:
|
904
|
+
"""Return all run IDs of this jobscripts (across all blocks), removing missing
|
905
|
+
run IDs (i.e. -1 values)"""
|
906
|
+
return np.concatenate([i.EAR_ID[i.EAR_ID >= 0] for i in self.blocks])
|
509
907
|
|
510
908
|
@property
|
511
909
|
@TimeIt.decorator
|
512
|
-
def all_EARs(self) ->
|
910
|
+
def all_EARs(self) -> Sequence[ElementActionRun]:
|
513
911
|
"""
|
514
912
|
Description of EAR information for this jobscript.
|
515
913
|
"""
|
516
|
-
|
517
|
-
self._all_EARs = self.workflow.get_EARs_from_IDs(self.all_EAR_IDs)
|
518
|
-
return self._all_EARs
|
914
|
+
return self.workflow.get_EARs_from_IDs(self.all_EAR_IDs)
|
519
915
|
|
520
916
|
@property
|
521
|
-
|
917
|
+
@TimeIt.decorator
|
918
|
+
def resources(self) -> ElementResources:
|
522
919
|
"""
|
523
920
|
The common resources that this jobscript requires.
|
524
921
|
"""
|
525
922
|
return self._resources
|
526
923
|
|
527
924
|
@property
|
528
|
-
|
529
|
-
|
530
|
-
The description of where various task loops are.
|
925
|
+
@TimeIt.decorator
|
926
|
+
def dependencies(self) -> Mapping[tuple[int, int], dict[str, bool]]:
|
531
927
|
"""
|
532
|
-
|
533
|
-
|
534
|
-
@property
|
535
|
-
def dependencies(self):
|
928
|
+
The dependency descriptor, accounting for all blocks within this jobscript.
|
536
929
|
"""
|
537
|
-
|
538
|
-
|
539
|
-
|
930
|
+
deps = {}
|
931
|
+
for block in self.blocks:
|
932
|
+
for (js_idx, blk_idx), v in block.dependencies.items():
|
933
|
+
if js_idx == self.index:
|
934
|
+
# block dependency is internal to this jobscript
|
935
|
+
continue
|
936
|
+
else:
|
937
|
+
deps[js_idx, blk_idx] = {"is_array": v["is_array"]}
|
938
|
+
return deps
|
540
939
|
|
541
940
|
@property
|
542
941
|
@TimeIt.decorator
|
543
|
-
def start_time(self):
|
942
|
+
def start_time(self) -> None | datetime:
|
544
943
|
"""The first known start time of any EAR in this jobscript."""
|
545
944
|
if not self.is_submitted:
|
546
|
-
return
|
547
|
-
all_times = [i.start_time for i in self.all_EARs if i.start_time]
|
548
|
-
if all_times:
|
549
|
-
return min(all_times)
|
550
|
-
else:
|
551
945
|
return None
|
946
|
+
return min(
|
947
|
+
(ear.start_time for ear in self.all_EARs if ear.start_time), default=None
|
948
|
+
)
|
552
949
|
|
553
950
|
@property
|
554
951
|
@TimeIt.decorator
|
555
|
-
def end_time(self):
|
952
|
+
def end_time(self) -> None | datetime:
|
556
953
|
"""The last known end time of any EAR in this jobscript."""
|
557
954
|
if not self.is_submitted:
|
558
|
-
return
|
559
|
-
all_times = [i.end_time for i in self.all_EARs if i.end_time]
|
560
|
-
if all_times:
|
561
|
-
return max(all_times)
|
562
|
-
else:
|
563
955
|
return None
|
956
|
+
return max((ear.end_time for ear in self.all_EARs if ear.end_time), default=None)
|
564
957
|
|
565
958
|
@property
|
566
959
|
def submit_time(self):
|
567
960
|
"""
|
568
961
|
When the jobscript was submitted, if known.
|
569
962
|
"""
|
570
|
-
if self._submit_time_obj is None
|
571
|
-
self.
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
)
|
963
|
+
if self._submit_time_obj is None:
|
964
|
+
if _submit_time := self.at_submit_metadata["submit_time"]:
|
965
|
+
self._submit_time_obj = parse_timestamp(
|
966
|
+
_submit_time, self.workflow.ts_fmt
|
967
|
+
)
|
576
968
|
return self._submit_time_obj
|
577
969
|
|
578
970
|
@property
|
579
|
-
def submit_hostname(self):
|
971
|
+
def submit_hostname(self) -> str | None:
|
580
972
|
"""
|
581
973
|
Where the jobscript was submitted, if known.
|
582
974
|
"""
|
583
975
|
return self._submit_hostname
|
584
976
|
|
585
977
|
@property
|
586
|
-
def submit_machine(self):
|
978
|
+
def submit_machine(self) -> str | None:
|
587
979
|
"""
|
588
980
|
Description of what the jobscript was submitted to, if known.
|
589
981
|
"""
|
590
982
|
return self._submit_machine
|
591
983
|
|
592
984
|
@property
|
593
|
-
def
|
985
|
+
def shell_idx(self):
|
986
|
+
return self._shell_idx
|
987
|
+
|
988
|
+
@property
|
989
|
+
def submit_cmdline(self) -> list[str] | None:
|
594
990
|
"""
|
595
|
-
The command line used to
|
991
|
+
The command line used to submit the jobscript, if known.
|
596
992
|
"""
|
597
|
-
return self.
|
993
|
+
return self.at_submit_metadata["submit_cmdline"]
|
598
994
|
|
599
995
|
@property
|
600
|
-
def scheduler_job_ID(self):
|
996
|
+
def scheduler_job_ID(self) -> str | None:
|
601
997
|
"""
|
602
998
|
The job ID from the scheduler, if known.
|
603
999
|
"""
|
604
|
-
return self.
|
1000
|
+
return self.at_submit_metadata["scheduler_job_ID"]
|
605
1001
|
|
606
1002
|
@property
|
607
|
-
def process_ID(self):
|
1003
|
+
def process_ID(self) -> int | None:
|
608
1004
|
"""
|
609
1005
|
The process ID from direct execution, if known.
|
610
1006
|
"""
|
611
|
-
return self.
|
1007
|
+
return self.at_submit_metadata["process_ID"]
|
612
1008
|
|
613
1009
|
@property
|
614
|
-
def version_info(self):
|
1010
|
+
def version_info(self) -> VersionInfo | None:
|
615
1011
|
"""
|
616
1012
|
Version information about the execution environment (OS, etc).
|
617
1013
|
"""
|
618
1014
|
return self._version_info
|
619
1015
|
|
620
1016
|
@property
|
621
|
-
def index(self):
|
1017
|
+
def index(self) -> int:
|
622
1018
|
"""
|
623
1019
|
The index of this jobscript within its parent :py:class:`Submission`.
|
624
1020
|
"""
|
1021
|
+
assert self._index is not None
|
625
1022
|
return self._index
|
626
1023
|
|
627
1024
|
@property
|
628
|
-
def submission(self):
|
1025
|
+
def submission(self) -> Submission:
|
629
1026
|
"""
|
630
1027
|
The parent submission.
|
631
1028
|
"""
|
1029
|
+
assert self._submission is not None
|
632
1030
|
return self._submission
|
633
1031
|
|
634
1032
|
@property
|
635
|
-
def workflow(self):
|
1033
|
+
def workflow(self) -> Workflow:
|
636
1034
|
"""
|
637
1035
|
The workflow this is all on behalf of.
|
638
1036
|
"""
|
639
1037
|
return self.submission.workflow
|
640
1038
|
|
641
1039
|
@property
|
642
|
-
def
|
643
|
-
"""
|
644
|
-
The number of actions in this jobscript.
|
645
|
-
"""
|
646
|
-
return self.EAR_ID.shape[0]
|
647
|
-
|
648
|
-
@property
|
649
|
-
def num_elements(self):
|
650
|
-
"""
|
651
|
-
The number of elements in this jobscript.
|
652
|
-
"""
|
653
|
-
return self.EAR_ID.shape[1]
|
654
|
-
|
655
|
-
@property
|
656
|
-
def is_array(self):
|
1040
|
+
def is_array(self) -> bool:
|
657
1041
|
"""
|
658
1042
|
Whether to generate an array job.
|
659
1043
|
"""
|
660
|
-
|
661
|
-
return False
|
662
|
-
|
663
|
-
support_EAR_para = self.workflow._store._features.EAR_parallelism
|
664
|
-
if self.resources.use_job_array is None:
|
665
|
-
if self.num_elements > 1 and support_EAR_para:
|
666
|
-
return True
|
667
|
-
else:
|
668
|
-
return False
|
669
|
-
else:
|
670
|
-
if self.resources.use_job_array and not support_EAR_para:
|
671
|
-
raise ValueError(
|
672
|
-
f"Store type {self.workflow._store!r} does not support element "
|
673
|
-
f"parallelism, so jobs cannot be submitted as scheduler arrays."
|
674
|
-
)
|
675
|
-
return self.resources.use_job_array
|
1044
|
+
return self._is_array
|
676
1045
|
|
677
1046
|
@property
|
678
|
-
def os_name(self) ->
|
1047
|
+
def os_name(self) -> str:
|
679
1048
|
"""
|
680
1049
|
The name of the OS to use.
|
681
1050
|
"""
|
682
|
-
|
1051
|
+
assert self.resources.os_name
|
1052
|
+
return self.resources.os_name
|
683
1053
|
|
684
1054
|
@property
|
685
|
-
def shell_name(self) ->
|
686
|
-
|
687
|
-
|
688
|
-
"""
|
689
|
-
return self._shell_name or self.resources.shell
|
1055
|
+
def shell_name(self) -> str:
|
1056
|
+
assert self.resources.shell
|
1057
|
+
return self.resources.shell
|
690
1058
|
|
691
1059
|
@property
|
692
|
-
def scheduler_name(self) ->
|
1060
|
+
def scheduler_name(self) -> str:
|
693
1061
|
"""
|
694
1062
|
The name of the scheduler to use.
|
695
1063
|
"""
|
696
|
-
|
1064
|
+
assert self.resources.scheduler
|
1065
|
+
return self.resources.scheduler
|
697
1066
|
|
698
|
-
def _get_submission_os_args(self):
|
699
|
-
return {"linux_release_file": self.
|
1067
|
+
def _get_submission_os_args(self) -> dict[str, str]:
|
1068
|
+
return {"linux_release_file": self._app.config.linux_release_file}
|
700
1069
|
|
701
|
-
def _get_submission_shell_args(self):
|
1070
|
+
def _get_submission_shell_args(self) -> dict[str, Any]:
|
702
1071
|
return self.resources.shell_args
|
703
1072
|
|
704
|
-
def _get_submission_scheduler_args(self):
|
1073
|
+
def _get_submission_scheduler_args(self) -> dict[str, Any]:
|
705
1074
|
return self.resources.scheduler_args
|
706
1075
|
|
707
|
-
def _get_shell(
|
1076
|
+
def _get_shell(
|
1077
|
+
self,
|
1078
|
+
os_name: str,
|
1079
|
+
shell_name: str | None,
|
1080
|
+
os_args: dict[str, Any] | None = None,
|
1081
|
+
shell_args: dict[str, Any] | None = None,
|
1082
|
+
) -> Shell:
|
708
1083
|
"""Get an arbitrary shell, not necessarily associated with submission."""
|
709
|
-
os_args = os_args or {}
|
710
|
-
shell_args = shell_args or {}
|
711
1084
|
return get_shell(
|
712
1085
|
shell_name=shell_name,
|
713
1086
|
os_name=os_name,
|
714
|
-
os_args=os_args,
|
715
|
-
**shell_args,
|
1087
|
+
os_args=os_args or {},
|
1088
|
+
**(shell_args or {}),
|
716
1089
|
)
|
717
1090
|
|
718
1091
|
@property
|
719
|
-
def shell(self):
|
1092
|
+
def shell(self) -> Shell:
|
720
1093
|
"""The shell for composing submission scripts."""
|
721
1094
|
if self._shell_obj is None:
|
722
1095
|
self._shell_obj = self._get_shell(
|
@@ -728,10 +1101,11 @@ class Jobscript(JSONLike):
|
|
728
1101
|
return self._shell_obj
|
729
1102
|
|
730
1103
|
@property
|
731
|
-
def scheduler(self):
|
1104
|
+
def scheduler(self) -> Scheduler:
|
732
1105
|
"""The scheduler that submissions go to from this jobscript."""
|
733
1106
|
if self._scheduler_obj is None:
|
734
|
-
|
1107
|
+
assert self.scheduler_name
|
1108
|
+
self._scheduler_obj = self._app.get_scheduler(
|
735
1109
|
scheduler_name=self.scheduler_name,
|
736
1110
|
os_name=self.os_name,
|
737
1111
|
scheduler_args=self._get_submission_scheduler_args(),
|
@@ -739,93 +1113,279 @@ class Jobscript(JSONLike):
|
|
739
1113
|
return self._scheduler_obj
|
740
1114
|
|
741
1115
|
@property
|
742
|
-
def EAR_ID_file_name(self):
|
1116
|
+
def EAR_ID_file_name(self) -> str:
|
743
1117
|
"""
|
744
1118
|
The name of a file containing EAR IDs.
|
745
1119
|
"""
|
746
1120
|
return f"js_{self.index}_EAR_IDs.txt"
|
747
1121
|
|
748
1122
|
@property
|
749
|
-
def
|
750
|
-
""
|
751
|
-
The name of a file containing run directory names.
|
752
|
-
"""
|
753
|
-
return f"js_{self.index}_run_dirs.txt"
|
754
|
-
|
755
|
-
@property
|
756
|
-
def direct_stdout_file_name(self):
|
757
|
-
"""File for direct execution stdout."""
|
758
|
-
return f"js_{self.index}_stdout.log"
|
1123
|
+
def combined_script_indices_file_name(self) -> str:
|
1124
|
+
return f"js_{self.index}_script_indices.txt"
|
759
1125
|
|
760
1126
|
@property
|
761
|
-
def
|
762
|
-
"""File for direct execution stderr."""
|
763
|
-
return f"js_{self.index}_stderr.log"
|
764
|
-
|
765
|
-
@property
|
766
|
-
def direct_win_pid_file_name(self):
|
1127
|
+
def direct_win_pid_file_name(self) -> str:
|
767
1128
|
"""File for holding the direct execution PID."""
|
768
1129
|
return f"js_{self.index}_pid.txt"
|
769
1130
|
|
770
1131
|
@property
|
771
|
-
def jobscript_name(self):
|
1132
|
+
def jobscript_name(self) -> str:
|
772
1133
|
"""The name of the jobscript file."""
|
773
1134
|
return f"js_{self.index}{self.shell.JS_EXT}"
|
774
1135
|
|
775
1136
|
@property
|
776
|
-
def
|
1137
|
+
def jobscript_functions_name(self):
|
1138
|
+
assert self.shell_idx is not None
|
1139
|
+
return self.submission.get_jobscript_functions_name(self.shell, self.shell_idx)
|
1140
|
+
|
1141
|
+
@property
|
1142
|
+
def EAR_ID_file_path(self) -> Path:
|
777
1143
|
"""
|
778
1144
|
The path to the file containing EAR IDs for this jobscript.
|
779
1145
|
"""
|
780
|
-
return self.submission.
|
1146
|
+
return self.submission.js_run_ids_path / self.EAR_ID_file_name
|
781
1147
|
|
782
1148
|
@property
|
783
|
-
def
|
1149
|
+
def combined_script_indices_file_path(self) -> Path:
|
784
1150
|
"""
|
785
|
-
The path to the file containing
|
1151
|
+
The path to the file containing script indices, in the case this is a
|
1152
|
+
``combine_scripts=True`` jobscript.
|
786
1153
|
"""
|
787
|
-
return
|
1154
|
+
return (
|
1155
|
+
self.submission.js_script_indices_path
|
1156
|
+
/ self.combined_script_indices_file_name
|
1157
|
+
)
|
788
1158
|
|
789
1159
|
@property
|
790
|
-
def jobscript_path(self):
|
1160
|
+
def jobscript_path(self) -> Path:
|
791
1161
|
"""
|
792
1162
|
The path to the file containing the jobscript file.
|
793
1163
|
"""
|
794
|
-
return self.submission.
|
1164
|
+
return self.submission.js_path / self.jobscript_name
|
1165
|
+
|
1166
|
+
@property
|
1167
|
+
def jobscript_functions_path(self) -> Path:
|
1168
|
+
"""
|
1169
|
+
The path to the file containing the supporting shell functions."""
|
1170
|
+
assert self.shell_idx is not None
|
1171
|
+
return self.submission.get_jobscript_functions_path(self.shell, self.shell_idx)
|
1172
|
+
|
1173
|
+
@property
|
1174
|
+
def std_path(self) -> Path:
|
1175
|
+
"""Directory in which to store jobscript standard out and error stream files."""
|
1176
|
+
return self.submission.js_std_path / str(self.index)
|
795
1177
|
|
796
1178
|
@property
|
797
|
-
def
|
1179
|
+
def direct_std_out_err_path(self) -> Path:
|
1180
|
+
"""File path of combined standard output and error streams.
|
1181
|
+
|
1182
|
+
Notes
|
1183
|
+
-----
|
1184
|
+
This path will only exist if `resources.combine_jobscript_std` is True. Otherwise,
|
1185
|
+
see `direct_stdout_path` and `direct_stderr_path` for the separate stream paths.
|
1186
|
+
|
798
1187
|
"""
|
799
|
-
|
800
|
-
|
1188
|
+
return self.get_std_out_err_path()
|
1189
|
+
|
1190
|
+
@property
|
1191
|
+
def direct_stdout_path(self) -> Path:
|
1192
|
+
"""File path to which the jobscript's standard output is saved, for direct
|
1193
|
+
execution only.
|
1194
|
+
|
1195
|
+
Notes
|
1196
|
+
-----
|
1197
|
+
This returned path be the same as that from `get_stderr_path` if
|
1198
|
+
`resources.combine_jobscript_std` is True.
|
1199
|
+
|
801
1200
|
"""
|
802
|
-
|
1201
|
+
assert not self.is_scheduled
|
1202
|
+
return self.get_stdout_path()
|
803
1203
|
|
804
1204
|
@property
|
805
|
-
def direct_stderr_path(self):
|
1205
|
+
def direct_stderr_path(self) -> Path:
|
1206
|
+
"""File path to which the jobscript's standard error is saved, for direct
|
1207
|
+
execution only.
|
1208
|
+
|
1209
|
+
Notes
|
1210
|
+
-----
|
1211
|
+
This returned path be the same as that from `get_stdout_path` if
|
1212
|
+
`resources.combine_jobscript_std` is True.
|
1213
|
+
|
1214
|
+
"""
|
1215
|
+
assert not self.is_scheduled
|
1216
|
+
return self.get_stderr_path()
|
1217
|
+
|
1218
|
+
def __validate_get_std_path_array_idx(self, array_idx: int | None = None):
|
1219
|
+
if array_idx is None and self.is_array:
|
1220
|
+
raise ValueError(
|
1221
|
+
"`array_idx` must be specified, since this jobscript is an array job."
|
1222
|
+
)
|
1223
|
+
elif array_idx is not None and not self.is_array:
|
1224
|
+
raise ValueError(
|
1225
|
+
"`array_idx` should not be specified, since this jobscript is not an "
|
1226
|
+
"array job."
|
1227
|
+
)
|
1228
|
+
|
1229
|
+
def _get_stdout_path(self, array_idx: int | None = None) -> Path:
|
1230
|
+
"""File path to the separate standard output stream.
|
1231
|
+
|
1232
|
+
Notes
|
1233
|
+
-----
|
1234
|
+
This path will only exist if `resources.combine_jobscript_std` is False.
|
1235
|
+
Otherwise, see `get_std_out_err_path` for the combined stream path.
|
1236
|
+
|
1237
|
+
"""
|
1238
|
+
self.__validate_get_std_path_array_idx(array_idx)
|
1239
|
+
return self.std_path / self.scheduler.get_stdout_filename(
|
1240
|
+
js_idx=self.index, job_ID=self.scheduler_job_ID, array_idx=array_idx
|
1241
|
+
)
|
1242
|
+
|
1243
|
+
def _get_stderr_path(self, array_idx: int | None = None) -> Path:
|
1244
|
+
"""File path to the separate standard error stream.
|
1245
|
+
|
1246
|
+
Notes
|
1247
|
+
-----
|
1248
|
+
This path will only exist if `resources.combine_jobscript_std` is False.
|
1249
|
+
Otherwise, see `get_std_out_err_path` for the combined stream path.
|
1250
|
+
|
1251
|
+
"""
|
1252
|
+
self.__validate_get_std_path_array_idx(array_idx)
|
1253
|
+
return self.std_path / self.scheduler.get_stderr_filename(
|
1254
|
+
js_idx=self.index, job_ID=self.scheduler_job_ID, array_idx=array_idx
|
1255
|
+
)
|
1256
|
+
|
1257
|
+
def get_std_out_err_path(self, array_idx: int | None = None) -> Path:
|
1258
|
+
"""File path of combined standard output and error streams.
|
1259
|
+
|
1260
|
+
Notes
|
1261
|
+
-----
|
1262
|
+
This path will only exist if `resources.combine_jobscript_std` is True. Otherwise,
|
1263
|
+
see `get_stdout_path` and `get_stderr_path` for the separate stream paths.
|
1264
|
+
|
1265
|
+
"""
|
1266
|
+
self.__validate_get_std_path_array_idx(array_idx)
|
1267
|
+
return self.std_path / self.scheduler.get_std_out_err_filename(
|
1268
|
+
js_idx=self.index, job_ID=self.scheduler_job_ID, array_idx=array_idx
|
1269
|
+
)
|
1270
|
+
|
1271
|
+
def get_stdout_path(self, array_idx: int | None = None) -> Path:
|
1272
|
+
"""File path to which the jobscript's standard output is saved.
|
1273
|
+
|
1274
|
+
Notes
|
1275
|
+
-----
|
1276
|
+
This returned path be the same as that from `get_stderr_path` if
|
1277
|
+
`resources.combine_jobscript_std` is True.
|
1278
|
+
|
1279
|
+
"""
|
1280
|
+
if self.resources.combine_jobscript_std:
|
1281
|
+
return self.get_std_out_err_path(array_idx=array_idx)
|
1282
|
+
else:
|
1283
|
+
return self._get_stdout_path(array_idx=array_idx)
|
1284
|
+
|
1285
|
+
def get_stderr_path(self, array_idx: int | None = None) -> Path:
|
1286
|
+
"""File path to which the jobscript's standard error is saved.
|
1287
|
+
|
1288
|
+
Notes
|
1289
|
+
-----
|
1290
|
+
This returned path be the same as that from `get_stdout_path` if
|
1291
|
+
`resources.combine_jobscript_std` is True.
|
1292
|
+
|
806
1293
|
"""
|
807
|
-
|
808
|
-
|
1294
|
+
if self.resources.combine_jobscript_std:
|
1295
|
+
return self.get_std_out_err_path(array_idx=array_idx)
|
1296
|
+
else:
|
1297
|
+
return self._get_stderr_path(array_idx=array_idx)
|
1298
|
+
|
1299
|
+
def get_stdout(self, array_idx: int | None = None) -> str:
|
1300
|
+
"""Retrieve the contents of the standard output stream file.
|
1301
|
+
|
1302
|
+
Notes
|
1303
|
+
-----
|
1304
|
+
In the case of non-array jobscripts, this will return the whole standard output,
|
1305
|
+
even if that includes multiple elements/actions.
|
1306
|
+
|
809
1307
|
"""
|
810
|
-
return self.
|
1308
|
+
return self.workflow.get_text_file(self.get_stdout_path(array_idx))
|
1309
|
+
|
1310
|
+
def get_stderr(self, array_idx: int | None = None) -> str:
|
1311
|
+
"""Retrieve the contents of the standard error stream file.
|
1312
|
+
|
1313
|
+
Notes
|
1314
|
+
-----
|
1315
|
+
In the case of non-array jobscripts, this will return the whole standard error,
|
1316
|
+
even if that includes multiple elements/actions.
|
1317
|
+
|
1318
|
+
"""
|
1319
|
+
return self.workflow.get_text_file(self.get_stderr_path(array_idx))
|
1320
|
+
|
1321
|
+
def print_stdout(self, array_idx: int | None = None) -> None:
|
1322
|
+
"""Print the contents of the standard output stream file.
|
1323
|
+
|
1324
|
+
Notes
|
1325
|
+
-----
|
1326
|
+
In the case of non-array jobscripts, this will print the whole standard output,
|
1327
|
+
even if that includes multiple elements/actions.
|
1328
|
+
|
1329
|
+
"""
|
1330
|
+
print(self.get_stdout(array_idx))
|
1331
|
+
|
1332
|
+
def print_stderr(self, array_idx: int | None = None) -> None:
|
1333
|
+
"""Print the contents of the standard error stream file.
|
1334
|
+
|
1335
|
+
Notes
|
1336
|
+
-----
|
1337
|
+
In the case of non-array jobscripts, this will print the whole standard error,
|
1338
|
+
even if that includes multiple elements/actions.
|
1339
|
+
|
1340
|
+
"""
|
1341
|
+
print(self.get_stderr(array_idx))
|
811
1342
|
|
812
1343
|
@property
|
813
|
-
def direct_win_pid_file_path(self):
|
1344
|
+
def direct_win_pid_file_path(self) -> Path:
|
814
1345
|
"""
|
815
1346
|
The path to the file containing PIDs for directly executed commands for this
|
816
1347
|
jobscript. Windows only.
|
817
1348
|
"""
|
818
|
-
return self.submission.
|
1349
|
+
return self.submission.js_win_pids_path / self.direct_win_pid_file_name
|
819
1350
|
|
820
|
-
|
821
|
-
|
822
|
-
self.
|
1351
|
+
@property
|
1352
|
+
def is_scheduled(self) -> bool:
|
1353
|
+
return self.scheduler_name not in ("direct", "direct_posix")
|
1354
|
+
|
1355
|
+
def _update_at_submit_metadata(
|
1356
|
+
self,
|
1357
|
+
submit_cmdline: list[str] | None = None,
|
1358
|
+
scheduler_job_ID: str | None = None,
|
1359
|
+
process_ID: int | None = None,
|
1360
|
+
submit_time: str | None = None,
|
1361
|
+
):
|
1362
|
+
"""Update persistent store and in-memory record of at-submit metadata for this
|
1363
|
+
jobscript.
|
1364
|
+
|
1365
|
+
"""
|
823
1366
|
self.workflow._store.set_jobscript_metadata(
|
824
1367
|
sub_idx=self.submission.index,
|
825
1368
|
js_idx=self.index,
|
1369
|
+
submit_cmdline=submit_cmdline,
|
1370
|
+
scheduler_job_ID=scheduler_job_ID,
|
1371
|
+
process_ID=process_ID,
|
826
1372
|
submit_time=submit_time,
|
827
1373
|
)
|
828
1374
|
|
1375
|
+
if submit_cmdline is not None:
|
1376
|
+
self._at_submit_metadata["submit_cmdline"] = submit_cmdline
|
1377
|
+
if scheduler_job_ID is not None:
|
1378
|
+
self._at_submit_metadata["scheduler_job_ID"] = scheduler_job_ID
|
1379
|
+
if process_ID is not None:
|
1380
|
+
self._at_submit_metadata["process_ID"] = process_ID
|
1381
|
+
if submit_time is not None:
|
1382
|
+
self._at_submit_metadata["submit_time"] = submit_time
|
1383
|
+
|
1384
|
+
def _set_submit_time(self, submit_time: datetime) -> None:
|
1385
|
+
self._update_at_submit_metadata(
|
1386
|
+
submit_time=submit_time.strftime(self.workflow.ts_fmt)
|
1387
|
+
)
|
1388
|
+
|
829
1389
|
def _set_submit_hostname(self, submit_hostname: str) -> None:
|
830
1390
|
self._submit_hostname = submit_hostname
|
831
1391
|
self.workflow._store.set_jobscript_metadata(
|
@@ -842,33 +1402,28 @@ class Jobscript(JSONLike):
|
|
842
1402
|
submit_machine=submit_machine,
|
843
1403
|
)
|
844
1404
|
|
845
|
-
def
|
846
|
-
self.
|
1405
|
+
def _set_shell_idx(self, shell_idx: int) -> None:
|
1406
|
+
self._shell_idx = shell_idx
|
847
1407
|
self.workflow._store.set_jobscript_metadata(
|
848
1408
|
sub_idx=self.submission.index,
|
849
1409
|
js_idx=self.index,
|
850
|
-
|
1410
|
+
shell_idx=shell_idx,
|
851
1411
|
)
|
852
1412
|
|
1413
|
+
def _set_submit_cmdline(self, submit_cmdline: list[str]) -> None:
|
1414
|
+
self._update_at_submit_metadata(submit_cmdline=submit_cmdline)
|
1415
|
+
|
853
1416
|
def _set_scheduler_job_ID(self, job_ID: str) -> None:
|
854
1417
|
"""For scheduled submission only."""
|
855
|
-
self.
|
856
|
-
self.
|
857
|
-
sub_idx=self.submission.index,
|
858
|
-
js_idx=self.index,
|
859
|
-
scheduler_job_ID=job_ID,
|
860
|
-
)
|
1418
|
+
assert self.is_scheduled
|
1419
|
+
self._update_at_submit_metadata(scheduler_job_ID=job_ID)
|
861
1420
|
|
862
|
-
def _set_process_ID(self, process_ID:
|
1421
|
+
def _set_process_ID(self, process_ID: int) -> None:
|
863
1422
|
"""For direct submission only."""
|
864
|
-
self.
|
865
|
-
self.
|
866
|
-
sub_idx=self.submission.index,
|
867
|
-
js_idx=self.index,
|
868
|
-
process_ID=process_ID,
|
869
|
-
)
|
1423
|
+
assert not self.is_scheduled
|
1424
|
+
self._update_at_submit_metadata(process_ID=process_ID)
|
870
1425
|
|
871
|
-
def _set_version_info(self, version_info:
|
1426
|
+
def _set_version_info(self, version_info: VersionInfo) -> None:
|
872
1427
|
self._version_info = version_info
|
873
1428
|
self.workflow._store.set_jobscript_metadata(
|
874
1429
|
sub_idx=self.submission.index,
|
@@ -876,147 +1431,38 @@ class Jobscript(JSONLike):
|
|
876
1431
|
version_info=version_info,
|
877
1432
|
)
|
878
1433
|
|
879
|
-
def _set_os_name(self) -> None:
|
880
|
-
"""Set the OS name for this jobscript. This is invoked at submit-time."""
|
881
|
-
self._os_name = self.resources.os_name
|
882
|
-
self.workflow._store.set_jobscript_metadata(
|
883
|
-
sub_idx=self.submission.index,
|
884
|
-
js_idx=self.index,
|
885
|
-
os_name=self._os_name,
|
886
|
-
)
|
887
|
-
|
888
|
-
def _set_shell_name(self) -> None:
|
889
|
-
"""Set the shell name for this jobscript. This is invoked at submit-time."""
|
890
|
-
self._shell_name = self.resources.shell
|
891
|
-
self.workflow._store.set_jobscript_metadata(
|
892
|
-
sub_idx=self.submission.index,
|
893
|
-
js_idx=self.index,
|
894
|
-
shell_name=self._shell_name,
|
895
|
-
)
|
896
|
-
|
897
|
-
def _set_scheduler_name(self) -> None:
|
898
|
-
"""Set the scheduler name for this jobscript. This is invoked at submit-time."""
|
899
|
-
self._scheduler_name = self.resources.scheduler
|
900
|
-
if self._scheduler_name:
|
901
|
-
self.workflow._store.set_jobscript_metadata(
|
902
|
-
sub_idx=self.submission.index,
|
903
|
-
js_idx=self.index,
|
904
|
-
scheduler_name=self._scheduler_name,
|
905
|
-
)
|
906
|
-
|
907
|
-
def get_task_loop_idx_array(self):
|
908
|
-
"""
|
909
|
-
Get an array of task loop indices.
|
910
|
-
"""
|
911
|
-
loop_idx = np.empty_like(self.EAR_ID)
|
912
|
-
loop_idx[:] = np.array([i[2] for i in self.task_actions]).reshape(
|
913
|
-
(len(self.task_actions), 1)
|
914
|
-
)
|
915
|
-
return loop_idx
|
916
|
-
|
917
|
-
@TimeIt.decorator
|
918
|
-
def write_EAR_ID_file(self):
|
919
|
-
"""Write a text file with `num_elements` lines and `num_actions` delimited tokens
|
920
|
-
per line, representing whether a given EAR must be executed."""
|
921
|
-
|
922
|
-
with self.EAR_ID_file_path.open(mode="wt", newline="\n") as fp:
|
923
|
-
# can't specify "open" newline if we pass the file name only, so pass handle:
|
924
|
-
np.savetxt(
|
925
|
-
fname=fp,
|
926
|
-
X=(self.EAR_ID).T,
|
927
|
-
fmt="%.0f",
|
928
|
-
delimiter=self._EAR_files_delimiter,
|
929
|
-
)
|
930
|
-
|
931
|
-
@TimeIt.decorator
|
932
|
-
def write_element_run_dir_file(self, run_dirs: List[List[Path]]):
|
933
|
-
"""Write a text file with `num_elements` lines and `num_actions` delimited tokens
|
934
|
-
per line, representing the working directory for each EAR.
|
935
|
-
|
936
|
-
We assume a given task element's actions all run in the same directory, but in
|
937
|
-
general a jobscript "element" may cross task boundaries, so we need to provide
|
938
|
-
the directory for each jobscript-element/jobscript-action combination.
|
939
|
-
|
940
|
-
"""
|
941
|
-
run_dirs = self.shell.prepare_element_run_dirs(run_dirs)
|
942
|
-
with self.element_run_dir_file_path.open(mode="wt", newline="\n") as fp:
|
943
|
-
# can't specify "open" newline if we pass the file name only, so pass handle:
|
944
|
-
np.savetxt(
|
945
|
-
fname=fp,
|
946
|
-
X=np.array(run_dirs),
|
947
|
-
fmt="%s",
|
948
|
-
delimiter=self._EAR_files_delimiter,
|
949
|
-
)
|
950
|
-
|
951
1434
|
@TimeIt.decorator
|
952
1435
|
def compose_jobscript(
|
953
1436
|
self,
|
954
|
-
|
955
|
-
|
956
|
-
|
957
|
-
|
958
|
-
|
959
|
-
scheduler_name: Optional[str] = None,
|
960
|
-
scheduler_args: Optional[Dict] = None,
|
1437
|
+
shell,
|
1438
|
+
deps: dict[int, tuple[str, bool]] | None = None,
|
1439
|
+
os_name: str | None = None,
|
1440
|
+
scheduler_name: str | None = None,
|
1441
|
+
scheduler_args: dict[str, Any] | None = None,
|
961
1442
|
) -> str:
|
962
1443
|
"""Prepare the jobscript file string."""
|
963
|
-
|
964
|
-
os_name = os_name or self.os_name
|
965
|
-
shell_name = shell_name or self.shell_name
|
966
1444
|
scheduler_name = scheduler_name or self.scheduler_name
|
967
|
-
|
968
|
-
|
969
|
-
|
970
|
-
f"Jobscript {self.index} `os_name` is not yet set. Pass the `os_name` as "
|
971
|
-
f"a method argument to compose the jobscript for a given `os_name`."
|
972
|
-
)
|
973
|
-
if not shell_name:
|
974
|
-
raise RuntimeError(
|
975
|
-
f"Jobscript {self.index} `shell_name` is not yet set. Pass the "
|
976
|
-
f"`shell_name` as a method argument to compose the jobscript for a given "
|
977
|
-
f"`shell_name`."
|
978
|
-
)
|
979
|
-
|
980
|
-
shell = self._get_shell(
|
981
|
-
os_name=os_name,
|
982
|
-
shell_name=shell_name,
|
983
|
-
os_args=os_args or self._get_submission_os_args(),
|
984
|
-
shell_args=shell_args or self._get_submission_shell_args(),
|
985
|
-
)
|
986
|
-
scheduler = self.app.get_scheduler(
|
1445
|
+
assert scheduler_name
|
1446
|
+
assert os_name
|
1447
|
+
scheduler = self._app.get_scheduler(
|
987
1448
|
scheduler_name=scheduler_name,
|
988
1449
|
os_name=os_name,
|
989
1450
|
scheduler_args=scheduler_args or self._get_submission_scheduler_args(),
|
990
1451
|
)
|
991
|
-
|
992
|
-
|
993
|
-
|
994
|
-
|
995
|
-
|
996
|
-
|
997
|
-
|
998
|
-
|
999
|
-
|
1000
|
-
|
1001
|
-
|
1002
|
-
|
1003
|
-
|
1004
|
-
|
1005
|
-
header_args = shell.process_JS_header_args(
|
1006
|
-
{
|
1007
|
-
"workflow_app_alias": self.workflow_app_alias,
|
1008
|
-
"env_setup": env_setup,
|
1009
|
-
"app_invoc": app_invoc,
|
1010
|
-
"run_log_file": self.app.RunDirAppFiles.get_log_file_name(),
|
1011
|
-
"config_dir": str(self.app.config.config_directory),
|
1012
|
-
"config_invoc_key": self.app.config.config_key,
|
1013
|
-
"workflow_path": self.workflow.path,
|
1014
|
-
"sub_idx": self.submission.index,
|
1015
|
-
"js_idx": self.index,
|
1016
|
-
"EAR_file_name": self.EAR_ID_file_name,
|
1017
|
-
"element_run_dirs_file_path": self.element_run_dir_file_name,
|
1018
|
-
}
|
1019
|
-
)
|
1452
|
+
app_caps = self._app.package_name.upper()
|
1453
|
+
header_args = {
|
1454
|
+
"app_caps": app_caps,
|
1455
|
+
"jobscript_functions_name": self.jobscript_functions_name,
|
1456
|
+
"jobscript_functions_dir": self.submission.JS_FUNCS_DIR_NAME,
|
1457
|
+
"sub_idx": self.submission.index,
|
1458
|
+
"js_idx": self.index,
|
1459
|
+
"run_IDs_file_name": self.EAR_ID_file_name,
|
1460
|
+
"run_IDs_file_dir": self.submission.JS_RUN_IDS_DIR_NAME,
|
1461
|
+
"tmp_dir_name": self.submission.TMP_DIR_NAME,
|
1462
|
+
"log_dir_name": self.submission.LOG_DIR_NAME,
|
1463
|
+
"app_std_dir_name": self.submission.APP_STD_DIR_NAME,
|
1464
|
+
"scripts_dir_name": self.submission.SCRIPTS_DIR_NAME,
|
1465
|
+
}
|
1020
1466
|
|
1021
1467
|
shebang = shell.JS_SHEBANG.format(
|
1022
1468
|
shebang_executable=" ".join(shell.shebang_executable),
|
@@ -1024,23 +1470,25 @@ class Jobscript(JSONLike):
|
|
1024
1470
|
)
|
1025
1471
|
header = shell.JS_HEADER.format(**header_args)
|
1026
1472
|
|
1027
|
-
if
|
1473
|
+
if isinstance(scheduler, QueuedScheduler):
|
1028
1474
|
header = shell.JS_SCHEDULER_HEADER.format(
|
1029
1475
|
shebang=shebang,
|
1030
1476
|
scheduler_options=scheduler.format_options(
|
1031
1477
|
resources=self.resources,
|
1032
|
-
num_elements=self.num_elements,
|
1478
|
+
num_elements=self.blocks[0].num_elements, # only used for array jobs
|
1033
1479
|
is_array=self.is_array,
|
1034
1480
|
sub_idx=self.submission.index,
|
1481
|
+
js_idx=self.index,
|
1035
1482
|
),
|
1036
1483
|
header=header,
|
1037
1484
|
)
|
1038
1485
|
else:
|
1039
|
-
# the
|
1486
|
+
# the Scheduler (direct submission)
|
1487
|
+
assert isinstance(scheduler, DirectScheduler)
|
1040
1488
|
wait_cmd = shell.get_wait_command(
|
1041
1489
|
workflow_app_alias=self.workflow_app_alias,
|
1042
1490
|
sub_idx=self.submission.index,
|
1043
|
-
deps=deps,
|
1491
|
+
deps=deps or {},
|
1044
1492
|
)
|
1045
1493
|
header = shell.JS_DIRECT_HEADER.format(
|
1046
1494
|
shebang=shebang,
|
@@ -1049,104 +1497,146 @@ class Jobscript(JSONLike):
|
|
1049
1497
|
wait_command=wait_cmd,
|
1050
1498
|
)
|
1051
1499
|
|
1052
|
-
main = shell.JS_MAIN.format(
|
1053
|
-
num_actions=self.num_actions,
|
1054
|
-
EAR_files_delimiter=self._EAR_files_delimiter,
|
1055
|
-
workflow_app_alias=self.workflow_app_alias,
|
1056
|
-
commands_file_name=self.get_commands_file_name(r"${JS_act_idx}", shell=shell),
|
1057
|
-
run_stream_file=self.app.RunDirAppFiles.get_std_file_name(),
|
1058
|
-
)
|
1059
|
-
|
1060
1500
|
out = header
|
1061
1501
|
|
1062
|
-
if self.
|
1063
|
-
|
1064
|
-
|
1065
|
-
scheduler_array_switch=scheduler.array_switch,
|
1066
|
-
scheduler_array_item_var=scheduler.array_item_var,
|
1067
|
-
num_elements=self.num_elements,
|
1068
|
-
main=main,
|
1502
|
+
if self.resources.combine_scripts:
|
1503
|
+
run_cmd = shell.JS_RUN_CMD_COMBINED.format(
|
1504
|
+
workflow_app_alias=self.workflow_app_alias
|
1069
1505
|
)
|
1070
|
-
|
1506
|
+
out += run_cmd + "\n"
|
1071
1507
|
else:
|
1072
|
-
|
1073
|
-
|
1074
|
-
|
1508
|
+
run_cmd = shell.JS_RUN_CMD.format(workflow_app_alias=self.workflow_app_alias)
|
1509
|
+
|
1510
|
+
if self.resources.write_app_logs:
|
1511
|
+
run_log_enable_disable = shell.JS_RUN_LOG_PATH_ENABLE.format(
|
1512
|
+
run_log_file_name=self.submission.get_app_log_file_name(
|
1513
|
+
run_ID=shell.format_env_var_get(f"{app_caps}_RUN_ID")
|
1514
|
+
)
|
1515
|
+
)
|
1516
|
+
else:
|
1517
|
+
run_log_enable_disable = shell.JS_RUN_LOG_PATH_DISABLE
|
1518
|
+
|
1519
|
+
block_run = shell.JS_RUN.format(
|
1520
|
+
EAR_files_delimiter=self._EAR_files_delimiter,
|
1521
|
+
app_caps=app_caps,
|
1522
|
+
run_cmd=run_cmd,
|
1523
|
+
sub_tmp_dir=self.submission.tmp_path,
|
1524
|
+
run_log_enable_disable=run_log_enable_disable,
|
1075
1525
|
)
|
1526
|
+
if len(self.blocks) == 1:
|
1527
|
+
# forgo element and action loops if not necessary:
|
1528
|
+
block = self.blocks[0]
|
1529
|
+
if block.num_actions > 1:
|
1530
|
+
block_act = shell.JS_ACT_MULTI.format(
|
1531
|
+
num_actions=block.num_actions,
|
1532
|
+
run_block=indent(block_run, shell.JS_INDENT),
|
1533
|
+
)
|
1534
|
+
else:
|
1535
|
+
block_act = shell.JS_ACT_SINGLE.format(run_block=block_run)
|
1536
|
+
|
1537
|
+
main = shell.JS_MAIN.format(
|
1538
|
+
action=block_act,
|
1539
|
+
app_caps=app_caps,
|
1540
|
+
block_start_elem_idx=0,
|
1541
|
+
)
|
1542
|
+
|
1543
|
+
out += shell.JS_BLOCK_HEADER.format(app_caps=app_caps)
|
1544
|
+
if self.is_array:
|
1545
|
+
if not isinstance(scheduler, QueuedScheduler):
|
1546
|
+
raise Exception("can only schedule arrays of jobs to a queue")
|
1547
|
+
out += shell.JS_ELEMENT_MULTI_ARRAY.format(
|
1548
|
+
scheduler_command=scheduler.js_cmd,
|
1549
|
+
scheduler_array_switch=scheduler.array_switch,
|
1550
|
+
scheduler_array_item_var=scheduler.array_item_var,
|
1551
|
+
num_elements=block.num_elements,
|
1552
|
+
main=main,
|
1553
|
+
)
|
1554
|
+
elif block.num_elements == 1:
|
1555
|
+
out += shell.JS_ELEMENT_SINGLE.format(
|
1556
|
+
block_start_elem_idx=0,
|
1557
|
+
main=main,
|
1558
|
+
)
|
1559
|
+
else:
|
1560
|
+
out += shell.JS_ELEMENT_MULTI_LOOP.format(
|
1561
|
+
block_start_elem_idx=0,
|
1562
|
+
num_elements=block.num_elements,
|
1563
|
+
main=indent(main, shell.JS_INDENT),
|
1564
|
+
)
|
1565
|
+
|
1566
|
+
else:
|
1567
|
+
# use a shell loop for blocks, so always write the inner element and action
|
1568
|
+
# loops:
|
1569
|
+
block_act = shell.JS_ACT_MULTI.format(
|
1570
|
+
num_actions=shell.format_array_get_item("num_actions", "$block_idx"),
|
1571
|
+
run_block=indent(block_run, shell.JS_INDENT),
|
1572
|
+
)
|
1573
|
+
main = shell.JS_MAIN.format(
|
1574
|
+
action=block_act,
|
1575
|
+
app_caps=app_caps,
|
1576
|
+
block_start_elem_idx="$block_start_elem_idx",
|
1577
|
+
)
|
1578
|
+
|
1579
|
+
# only non-array jobscripts will have multiple blocks:
|
1580
|
+
element_loop = shell.JS_ELEMENT_MULTI_LOOP.format(
|
1581
|
+
block_start_elem_idx="$block_start_elem_idx",
|
1582
|
+
num_elements=shell.format_array_get_item(
|
1583
|
+
"num_elements", "$block_idx"
|
1584
|
+
),
|
1585
|
+
main=indent(main, shell.JS_INDENT),
|
1586
|
+
)
|
1587
|
+
out += shell.JS_BLOCK_LOOP.format(
|
1588
|
+
num_elements=shell.format_array(
|
1589
|
+
[i.num_elements for i in self.blocks]
|
1590
|
+
),
|
1591
|
+
num_actions=shell.format_array([i.num_actions for i in self.blocks]),
|
1592
|
+
num_blocks=len(self.blocks),
|
1593
|
+
app_caps=app_caps,
|
1594
|
+
element_loop=indent(element_loop, shell.JS_INDENT),
|
1595
|
+
)
|
1596
|
+
|
1597
|
+
out += shell.JS_FOOTER
|
1076
1598
|
|
1077
1599
|
return out
|
1078
1600
|
|
1079
1601
|
@TimeIt.decorator
|
1080
1602
|
def write_jobscript(
|
1081
1603
|
self,
|
1082
|
-
os_name: str = None,
|
1083
|
-
shell_name: str = None,
|
1084
|
-
deps:
|
1085
|
-
os_args:
|
1086
|
-
shell_args:
|
1087
|
-
scheduler_name:
|
1088
|
-
scheduler_args:
|
1089
|
-
):
|
1604
|
+
os_name: str | None = None,
|
1605
|
+
shell_name: str | None = None,
|
1606
|
+
deps: dict[int, tuple[str, bool]] | None = None,
|
1607
|
+
os_args: dict[str, Any] | None = None,
|
1608
|
+
shell_args: dict[str, Any] | None = None,
|
1609
|
+
scheduler_name: str | None = None,
|
1610
|
+
scheduler_args: dict[str, Any] | None = None,
|
1611
|
+
) -> Path:
|
1090
1612
|
"""
|
1091
1613
|
Write the jobscript to its file.
|
1092
1614
|
"""
|
1615
|
+
os_name = os_name or self.os_name
|
1616
|
+
shell_name = shell_name or self.shell_name
|
1617
|
+
assert os_name
|
1618
|
+
assert shell_name
|
1619
|
+
shell = self._get_shell(
|
1620
|
+
os_name=os_name,
|
1621
|
+
shell_name=shell_name,
|
1622
|
+
os_args=os_args or self._get_submission_os_args(),
|
1623
|
+
shell_args=shell_args or self._get_submission_shell_args(),
|
1624
|
+
)
|
1625
|
+
|
1093
1626
|
js_str = self.compose_jobscript(
|
1094
1627
|
deps=deps,
|
1628
|
+
shell=shell,
|
1095
1629
|
os_name=os_name,
|
1096
|
-
shell_name=shell_name,
|
1097
|
-
os_args=os_args,
|
1098
|
-
shell_args=shell_args,
|
1099
1630
|
scheduler_name=scheduler_name,
|
1100
1631
|
scheduler_args=scheduler_args,
|
1101
1632
|
)
|
1102
1633
|
with self.jobscript_path.open("wt", newline="\n") as fp:
|
1103
1634
|
fp.write(js_str)
|
1104
|
-
return self.jobscript_path
|
1105
1635
|
|
1106
|
-
|
1107
|
-
EARs_arr = np.array(self.all_EARs).reshape(self.EAR_ID.shape)
|
1108
|
-
return EARs_arr
|
1109
|
-
|
1110
|
-
@TimeIt.decorator
|
1111
|
-
def make_artifact_dirs(self):
|
1112
|
-
"""
|
1113
|
-
Create the directories that will hold artifacts associated with this jobscript.
|
1114
|
-
"""
|
1115
|
-
EARs_arr = self._get_EARs_arr()
|
1116
|
-
task_loop_idx_arr = self.get_task_loop_idx_array()
|
1117
|
-
|
1118
|
-
run_dirs = []
|
1119
|
-
for js_elem_idx in range(self.num_elements):
|
1120
|
-
run_dirs_i = []
|
1121
|
-
for js_act_idx in range(self.num_actions):
|
1122
|
-
EAR_i = EARs_arr[js_act_idx, js_elem_idx]
|
1123
|
-
t_iID = EAR_i.task.insert_ID
|
1124
|
-
l_idx = task_loop_idx_arr[js_act_idx, js_elem_idx].item()
|
1125
|
-
r_idx = EAR_i.index
|
1126
|
-
|
1127
|
-
loop_idx_i = self.task_loop_idx[l_idx]
|
1128
|
-
task_dir = self.workflow.tasks.get(insert_ID=t_iID).get_dir_name(
|
1129
|
-
loop_idx_i
|
1130
|
-
)
|
1131
|
-
elem_dir = EAR_i.element.dir_name
|
1132
|
-
run_dir = f"r_{r_idx}"
|
1133
|
-
|
1134
|
-
EAR_dir = Path(self.workflow.execution_path, task_dir, elem_dir, run_dir)
|
1135
|
-
EAR_dir.mkdir(exist_ok=True, parents=True)
|
1136
|
-
|
1137
|
-
# copy (TODO: optionally symlink) any input files:
|
1138
|
-
for name, path in EAR_i.get("input_files", {}).items():
|
1139
|
-
if path:
|
1140
|
-
shutil.copy(path, EAR_dir)
|
1141
|
-
|
1142
|
-
run_dirs_i.append(EAR_dir.relative_to(self.workflow.path))
|
1143
|
-
|
1144
|
-
run_dirs.append(run_dirs_i)
|
1145
|
-
|
1146
|
-
return run_dirs
|
1636
|
+
return self.jobscript_path
|
1147
1637
|
|
1148
1638
|
@TimeIt.decorator
|
1149
|
-
def _launch_direct_js_win(self):
|
1639
|
+
def _launch_direct_js_win(self, submit_cmd: list[str]) -> int:
|
1150
1640
|
# this is a "trick" to ensure we always get a fully detached new process (with no
|
1151
1641
|
# parent); the `powershell.exe -Command` process exits after running the inner
|
1152
1642
|
# `Start-Process`, which is where the jobscript is actually invoked. I could not
|
@@ -1155,7 +1645,7 @@ class Jobscript(JSONLike):
|
|
1155
1645
|
|
1156
1646
|
# Note we need powershell.exe for this "launcher process", but the shell used for
|
1157
1647
|
# the jobscript itself need not be powershell.exe
|
1158
|
-
exe_path, arg_list =
|
1648
|
+
exe_path, arg_list = submit_cmd[0], submit_cmd[1:]
|
1159
1649
|
|
1160
1650
|
# note powershell-escaped quotes, in case of spaces in arguments (this seems to
|
1161
1651
|
# work okay even though we might have switch like arguments in this list, like
|
@@ -1165,243 +1655,712 @@ class Jobscript(JSONLike):
|
|
1165
1655
|
args = [
|
1166
1656
|
"powershell.exe",
|
1167
1657
|
"-Command",
|
1168
|
-
|
1169
|
-
|
1170
|
-
|
1171
|
-
|
1172
|
-
|
1173
|
-
|
1174
|
-
|
1175
|
-
f'Set-Content -Path "{self.direct_win_pid_file_path}" -Value $JS_proc.Id'
|
1176
|
-
),
|
1658
|
+
f"$JS_proc = Start-Process "
|
1659
|
+
f'-Passthru -NoNewWindow -FilePath "{exe_path}" '
|
1660
|
+
f'-RedirectStandardOutput "{self.direct_stdout_path}" '
|
1661
|
+
f'-RedirectStandardError "{self.direct_stderr_path}" '
|
1662
|
+
f'-WorkingDirectory "{self.workflow.path}" '
|
1663
|
+
f"-ArgumentList {arg_list_str}; "
|
1664
|
+
f'Set-Content -Path "{self.direct_win_pid_file_path}" -Value $JS_proc.Id',
|
1177
1665
|
]
|
1178
1666
|
|
1179
|
-
self.
|
1667
|
+
self._app.submission_logger.info(
|
1180
1668
|
f"running direct Windows jobscript launcher process: {args!r}"
|
1181
1669
|
)
|
1182
1670
|
# for some reason we still need to create a "detached" process here as well:
|
1183
1671
|
init_proc = subprocess.Popen(
|
1184
1672
|
args=args,
|
1185
|
-
cwd=
|
1186
|
-
creationflags=subprocess
|
1673
|
+
cwd=self.workflow.path,
|
1674
|
+
creationflags=getattr(subprocess, "CREATE_NO_WINDOW", 0),
|
1187
1675
|
)
|
1188
1676
|
init_proc.wait() # wait for the process ID file to be written
|
1189
|
-
|
1190
|
-
return process_ID
|
1677
|
+
return int(self.direct_win_pid_file_path.read_text())
|
1191
1678
|
|
1192
1679
|
@TimeIt.decorator
|
1193
|
-
def _launch_direct_js_posix(self) -> int:
|
1680
|
+
def _launch_direct_js_posix(self, submit_cmd: list[str]) -> int:
|
1194
1681
|
# direct submission; submit jobscript asynchronously:
|
1195
1682
|
# detached process, avoid interrupt signals propagating to the subprocess:
|
1196
|
-
with self.direct_stdout_path.open("wt") as fp_stdout:
|
1197
|
-
with self.direct_stderr_path.open("wt") as fp_stderr:
|
1198
|
-
# note: Popen copies the file objects, so this works!
|
1199
|
-
proc = subprocess.Popen(
|
1200
|
-
args=self.submit_cmdline,
|
1201
|
-
stdout=fp_stdout,
|
1202
|
-
stderr=fp_stderr,
|
1203
|
-
cwd=str(self.workflow.path),
|
1204
|
-
start_new_session=True,
|
1205
|
-
)
|
1206
|
-
process_ID = proc.pid
|
1207
1683
|
|
1208
|
-
|
1684
|
+
def _launch(fp_stdout: TextIO, fp_stderr: TextIO) -> int:
|
1685
|
+
# note: Popen copies the file objects, so this works!
|
1686
|
+
proc = subprocess.Popen(
|
1687
|
+
args=submit_cmd,
|
1688
|
+
stdout=fp_stdout,
|
1689
|
+
stderr=fp_stderr,
|
1690
|
+
cwd=str(self.workflow.path),
|
1691
|
+
start_new_session=True,
|
1692
|
+
)
|
1693
|
+
return proc.pid
|
1694
|
+
|
1695
|
+
if self.resources.combine_jobscript_std:
|
1696
|
+
with self.direct_std_out_err_path.open("wt") as fp_std:
|
1697
|
+
return _launch(fp_std, fp_std)
|
1698
|
+
else:
|
1699
|
+
with self.direct_stdout_path.open(
|
1700
|
+
"wt"
|
1701
|
+
) as fp_stdout, self.direct_stderr_path.open("wt") as fp_stderr:
|
1702
|
+
return _launch(fp_stdout, fp_stderr)
|
1703
|
+
|
1704
|
+
@TimeIt.decorator
|
1705
|
+
def _launch_queued(
|
1706
|
+
self, submit_cmd: list[str], print_stdout: bool
|
1707
|
+
) -> tuple[str, str]:
|
1708
|
+
# scheduled submission, wait for submission so we can parse the job ID:
|
1709
|
+
proc = subprocess.run(
|
1710
|
+
args=submit_cmd,
|
1711
|
+
stdout=subprocess.PIPE,
|
1712
|
+
stderr=subprocess.PIPE,
|
1713
|
+
cwd=self.workflow.path,
|
1714
|
+
)
|
1715
|
+
stdout = proc.stdout.decode().strip()
|
1716
|
+
stderr = proc.stderr.decode().strip()
|
1717
|
+
if print_stdout and stdout:
|
1718
|
+
print(stdout)
|
1719
|
+
if stderr:
|
1720
|
+
print(stderr)
|
1721
|
+
return stdout, stderr
|
1209
1722
|
|
1210
1723
|
@TimeIt.decorator
|
1211
1724
|
def submit(
|
1212
1725
|
self,
|
1213
|
-
scheduler_refs:
|
1214
|
-
print_stdout:
|
1726
|
+
scheduler_refs: dict[int, tuple[str, bool]],
|
1727
|
+
print_stdout: bool = False,
|
1215
1728
|
) -> str:
|
1216
1729
|
"""
|
1217
1730
|
Submit the jobscript to the scheduler.
|
1218
1731
|
"""
|
1219
1732
|
# map each dependency jobscript index to the JS ref (job/process ID) and if the
|
1220
1733
|
# dependency is an array dependency:
|
1221
|
-
deps = {}
|
1222
|
-
for js_idx, deps_i in self.dependencies.items():
|
1734
|
+
deps: dict[int, tuple[str, bool]] = {}
|
1735
|
+
for (js_idx, _), deps_i in self.dependencies.items():
|
1223
1736
|
dep_js_ref, dep_js_is_arr = scheduler_refs[js_idx]
|
1224
1737
|
# only submit an array dependency if both this jobscript and the dependency
|
1225
1738
|
# are array jobs:
|
1226
1739
|
dep_is_arr = deps_i["is_array"] and self.is_array and dep_js_is_arr
|
1227
1740
|
deps[js_idx] = (dep_js_ref, dep_is_arr)
|
1228
1741
|
|
1229
|
-
if
|
1230
|
-
#
|
1231
|
-
|
1232
|
-
|
1233
|
-
|
1234
|
-
|
1235
|
-
|
1236
|
-
|
1237
|
-
|
1238
|
-
|
1239
|
-
|
1240
|
-
|
1742
|
+
if self.index > 0:
|
1743
|
+
# prevent this jobscript executing if jobscript parallelism is not available:
|
1744
|
+
use_parallelism = (
|
1745
|
+
self.submission.JS_parallelism is True
|
1746
|
+
or {0: "direct", 1: "scheduled"}[self.is_scheduled]
|
1747
|
+
== self.submission.JS_parallelism
|
1748
|
+
)
|
1749
|
+
if not use_parallelism:
|
1750
|
+
# add fake dependencies to all previously submitted jobscripts to avoid
|
1751
|
+
# simultaneous execution:
|
1752
|
+
for js_idx, (js_ref, _) in scheduler_refs.items():
|
1753
|
+
if js_idx not in deps:
|
1754
|
+
deps[js_idx] = (js_ref, False)
|
1755
|
+
|
1756
|
+
# make directory for jobscripts stdout/err stream files:
|
1757
|
+
self.std_path.mkdir(exist_ok=True)
|
1758
|
+
|
1759
|
+
with self.EAR_ID_file_path.open(mode="wt", newline="\n") as ID_fp:
|
1760
|
+
for block in self.blocks:
|
1761
|
+
block.write_EAR_ID_file(ID_fp)
|
1762
|
+
|
1763
|
+
js_path = self.shell.prepare_JS_path(self.write_jobscript(deps=deps))
|
1241
1764
|
submit_cmd = self.scheduler.get_submit_command(self.shell, js_path, deps)
|
1242
|
-
self.
|
1765
|
+
self._app.submission_logger.info(
|
1243
1766
|
f"submitting jobscript {self.index!r} with command: {submit_cmd!r}"
|
1244
1767
|
)
|
1245
|
-
self._set_submit_cmdline(submit_cmd)
|
1246
|
-
self._set_submit_hostname(socket.gethostname())
|
1247
|
-
self._set_submit_machine(self.app.config.get("machine"))
|
1248
1768
|
|
1249
|
-
err_args = {
|
1769
|
+
err_args: JobscriptSubmissionFailureArgs = {
|
1770
|
+
"submit_cmd": submit_cmd,
|
1250
1771
|
"js_idx": self.index,
|
1251
1772
|
"js_path": js_path,
|
1252
|
-
"subprocess_exc": None,
|
1253
|
-
"job_ID_parse_exc": None,
|
1254
1773
|
}
|
1255
|
-
|
1256
|
-
|
1257
|
-
process_ID = None
|
1774
|
+
job_ID: str | None = None
|
1775
|
+
process_ID: int | None = None
|
1258
1776
|
try:
|
1259
|
-
if
|
1777
|
+
if isinstance(self.scheduler, QueuedScheduler):
|
1260
1778
|
# scheduled submission, wait for submission so we can parse the job ID:
|
1261
|
-
|
1262
|
-
args=submit_cmd,
|
1263
|
-
stdout=subprocess.PIPE,
|
1264
|
-
stderr=subprocess.PIPE,
|
1265
|
-
cwd=str(self.workflow.path),
|
1266
|
-
)
|
1267
|
-
stdout = proc.stdout.decode().strip()
|
1268
|
-
stderr = proc.stderr.decode().strip()
|
1779
|
+
stdout, stderr = self._launch_queued(submit_cmd, print_stdout)
|
1269
1780
|
err_args["stdout"] = stdout
|
1270
1781
|
err_args["stderr"] = stderr
|
1271
|
-
if print_stdout and stdout:
|
1272
|
-
print(stdout)
|
1273
|
-
if stderr:
|
1274
|
-
print(stderr)
|
1275
1782
|
else:
|
1276
1783
|
if os.name == "nt":
|
1277
|
-
process_ID = self._launch_direct_js_win()
|
1784
|
+
process_ID = self._launch_direct_js_win(submit_cmd)
|
1278
1785
|
else:
|
1279
|
-
process_ID = self._launch_direct_js_posix()
|
1280
|
-
|
1786
|
+
process_ID = self._launch_direct_js_posix(submit_cmd)
|
1281
1787
|
except Exception as subprocess_exc:
|
1282
|
-
err_args["message"] = f"Failed to execute submit command."
|
1283
|
-
err_args["submit_cmd"] = submit_cmd
|
1284
|
-
err_args["stdout"] = None
|
1285
|
-
err_args["stderr"] = None
|
1286
1788
|
err_args["subprocess_exc"] = subprocess_exc
|
1287
|
-
raise JobscriptSubmissionFailure(
|
1789
|
+
raise JobscriptSubmissionFailure(
|
1790
|
+
"Failed to execute submit command.", **err_args
|
1791
|
+
)
|
1288
1792
|
|
1289
|
-
if
|
1793
|
+
if isinstance(self.scheduler, QueuedScheduler):
|
1290
1794
|
# scheduled submission
|
1291
1795
|
if stderr:
|
1292
|
-
|
1293
|
-
|
1294
|
-
|
1796
|
+
raise JobscriptSubmissionFailure(
|
1797
|
+
"Non-empty stderr from submit command.", **err_args
|
1798
|
+
)
|
1295
1799
|
|
1296
1800
|
try:
|
1297
1801
|
job_ID = self.scheduler.parse_submission_output(stdout)
|
1298
|
-
|
1802
|
+
assert job_ID is not None
|
1299
1803
|
except Exception as job_ID_parse_exc:
|
1300
1804
|
# TODO: maybe handle this differently. If there is no stderr, then the job
|
1301
1805
|
# probably did submit fine, but the issue is just with parsing the job ID
|
1302
1806
|
# (e.g. if the scheduler version was updated and it now outputs
|
1303
1807
|
# differently).
|
1304
|
-
err_args["message"] = "Failed to parse job ID from stdout."
|
1305
|
-
err_args["submit_cmd"] = submit_cmd
|
1306
1808
|
err_args["job_ID_parse_exc"] = job_ID_parse_exc
|
1307
|
-
raise JobscriptSubmissionFailure(
|
1809
|
+
raise JobscriptSubmissionFailure(
|
1810
|
+
"Failed to parse job ID from stdout.", **err_args
|
1811
|
+
)
|
1308
1812
|
|
1309
1813
|
self._set_scheduler_job_ID(job_ID)
|
1310
1814
|
ref = job_ID
|
1311
1815
|
|
1312
1816
|
else:
|
1313
1817
|
# direct submission
|
1818
|
+
assert process_ID is not None
|
1314
1819
|
self._set_process_ID(process_ID)
|
1315
|
-
|
1316
|
-
|
1317
|
-
|
1318
|
-
|
1820
|
+
ref = str(process_ID)
|
1821
|
+
|
1822
|
+
self._set_submit_cmdline(submit_cmd)
|
1823
|
+
self._set_submit_time(current_timestamp())
|
1319
1824
|
|
1320
|
-
|
1825
|
+
# a downstream direct jobscript might need to wait for this jobscript, which
|
1826
|
+
# means this jobscript's process ID must be committed:
|
1827
|
+
self.workflow._store._pending.commit_all()
|
1321
1828
|
|
1322
1829
|
return ref
|
1323
1830
|
|
1324
1831
|
@property
|
1325
|
-
def is_submitted(self):
|
1832
|
+
def is_submitted(self) -> bool:
|
1326
1833
|
"""Whether this jobscript has been submitted."""
|
1327
1834
|
return self.index in self.submission.submitted_jobscripts
|
1328
1835
|
|
1329
1836
|
@property
|
1330
|
-
def scheduler_js_ref(self):
|
1837
|
+
def scheduler_js_ref(self) -> str | None | tuple[int | None, list[str] | None]:
|
1331
1838
|
"""
|
1332
1839
|
The reference to the submitted job for the jobscript.
|
1333
1840
|
"""
|
1334
|
-
if isinstance(self.scheduler,
|
1841
|
+
if isinstance(self.scheduler, QueuedScheduler):
|
1335
1842
|
return self.scheduler_job_ID
|
1336
1843
|
else:
|
1337
1844
|
return (self.process_ID, self.submit_cmdline)
|
1338
1845
|
|
1339
|
-
@
|
1340
|
-
def
|
1341
|
-
|
1342
|
-
|
1343
|
-
|
1344
|
-
|
1345
|
-
|
1346
|
-
|
1347
|
-
|
1846
|
+
@overload
|
1847
|
+
def get_active_states(
|
1848
|
+
self, as_json: Literal[False] = False
|
1849
|
+
) -> Mapping[int, Mapping[int, JobscriptElementState]]:
|
1850
|
+
...
|
1851
|
+
|
1852
|
+
@overload
|
1853
|
+
def get_active_states(
|
1854
|
+
self, as_json: Literal[True]
|
1855
|
+
) -> Mapping[int, Mapping[int, str]]:
|
1856
|
+
...
|
1348
1857
|
|
1349
1858
|
@TimeIt.decorator
|
1350
1859
|
def get_active_states(
|
1351
1860
|
self, as_json: bool = False
|
1352
|
-
) ->
|
1861
|
+
) -> Mapping[int, Mapping[int, JobscriptElementState | str]]:
|
1353
1862
|
"""If this jobscript is active on this machine, return the state information from
|
1354
1863
|
the scheduler."""
|
1355
|
-
|
1356
|
-
|
1357
|
-
|
1358
|
-
|
1359
|
-
else:
|
1360
|
-
self.app.submission_logger.debug(
|
1864
|
+
# this returns: {BLOCK_IDX: {JS_ELEMENT_IDX: STATE}}
|
1865
|
+
out: Mapping[int, Mapping[int, JobscriptElementState]] = {}
|
1866
|
+
if self.is_submitted:
|
1867
|
+
self._app.submission_logger.debug(
|
1361
1868
|
"checking if the jobscript is running according to EAR submission "
|
1362
1869
|
"states."
|
1363
1870
|
)
|
1364
1871
|
|
1365
1872
|
not_run_states = EARStatus.get_non_running_submitted_states()
|
1366
|
-
all_EAR_states = set(
|
1367
|
-
self.
|
1873
|
+
all_EAR_states = set(ear.status for ear in self.all_EARs)
|
1874
|
+
self._app.submission_logger.debug(
|
1875
|
+
f"Unique EAR states are: {tuple(i.name for i in all_EAR_states)!r}"
|
1876
|
+
)
|
1368
1877
|
if all_EAR_states.issubset(not_run_states):
|
1369
|
-
self.
|
1370
|
-
|
1878
|
+
self._app.submission_logger.debug(
|
1879
|
+
"All jobscript EARs are in a non-running state"
|
1371
1880
|
)
|
1372
|
-
out = {}
|
1373
1881
|
|
1374
|
-
elif self.
|
1375
|
-
self.
|
1882
|
+
elif self._app.config.get("machine") == self.submit_machine:
|
1883
|
+
self._app.submission_logger.debug(
|
1376
1884
|
"Checking if jobscript is running according to the scheduler/process "
|
1377
1885
|
"ID."
|
1378
1886
|
)
|
1379
|
-
|
1380
|
-
if
|
1381
|
-
|
1382
|
-
|
1383
|
-
|
1384
|
-
|
1385
|
-
|
1386
|
-
|
1387
|
-
|
1887
|
+
out_d = self.scheduler.get_job_state_info(js_refs=[self.scheduler_js_ref])
|
1888
|
+
if out_d:
|
1889
|
+
# remove scheduler ref (should be only one):
|
1890
|
+
assert len(out_d) == 1
|
1891
|
+
out_i = nth_value(cast("dict", out_d), 0)
|
1892
|
+
|
1893
|
+
if self.is_array:
|
1894
|
+
# out_i is a dict keyed by array index; there will be exactly one
|
1895
|
+
# block:
|
1896
|
+
out = {0: out_i}
|
1897
|
+
else:
|
1898
|
+
# out_i is a single state:
|
1899
|
+
out = {
|
1900
|
+
idx: {i: out_i for i in range(block.num_elements)}
|
1901
|
+
for idx, block in enumerate(self.blocks)
|
1902
|
+
}
|
1388
1903
|
|
1389
1904
|
else:
|
1390
|
-
raise NotSubmitMachineError(
|
1391
|
-
"Cannot get active state of the jobscript because the current machine "
|
1392
|
-
"is not the machine on which the jobscript was submitted."
|
1393
|
-
)
|
1905
|
+
raise NotSubmitMachineError()
|
1394
1906
|
|
1395
|
-
self.
|
1907
|
+
self._app.submission_logger.info(f"Jobscript is {'in' if not out else ''}active.")
|
1908
|
+
if as_json:
|
1909
|
+
return {
|
1910
|
+
block_idx: {k: v.name for k, v in block_data.items()}
|
1911
|
+
for block_idx, block_data in out.items()
|
1912
|
+
}
|
1396
1913
|
return out
|
1397
1914
|
|
1398
|
-
def
|
1915
|
+
def compose_combined_script(
|
1916
|
+
self, action_scripts: list[list[tuple[str, Path, bool]]]
|
1917
|
+
) -> tuple[str, list[list[int]], list[int], list[int]]:
|
1399
1918
|
"""
|
1400
|
-
|
1919
|
+
Prepare the combined-script file string, if applicable.
|
1401
1920
|
"""
|
1402
|
-
|
1403
|
-
|
1921
|
+
|
1922
|
+
# use an index array for action scripts:
|
1923
|
+
script_names: list[str] = []
|
1924
|
+
requires_dir: list[bool] = []
|
1925
|
+
script_data: dict[str, tuple[int, Path]] = {}
|
1926
|
+
script_indices: list[list[int]] = []
|
1927
|
+
for i in action_scripts:
|
1928
|
+
indices_i: list[int] = []
|
1929
|
+
for name_j, path_j, req_dir_i in i:
|
1930
|
+
if name_j in script_data:
|
1931
|
+
idx = script_data[name_j][0]
|
1932
|
+
else:
|
1933
|
+
idx = len(script_names)
|
1934
|
+
script_names.append(name_j)
|
1935
|
+
requires_dir.append(req_dir_i)
|
1936
|
+
script_data[name_j] = (idx, path_j)
|
1937
|
+
indices_i.append(idx)
|
1938
|
+
script_indices.append(indices_i)
|
1939
|
+
|
1940
|
+
if not self.resources.combine_scripts:
|
1941
|
+
raise TypeError(
|
1942
|
+
f"Jobscript {self.index} is not a `combine_scripts` jobscript."
|
1943
|
+
)
|
1944
|
+
|
1945
|
+
tab_indent = " "
|
1946
|
+
|
1947
|
+
script_funcs_lst: list[str] = []
|
1948
|
+
for act_name, (_, snip_path) in script_data.items():
|
1949
|
+
main_func_name = snip_path.stem
|
1950
|
+
with snip_path.open("rt") as fp:
|
1951
|
+
script_str = fp.read()
|
1952
|
+
script_funcs_lst.append(
|
1953
|
+
dedent(
|
1954
|
+
"""\
|
1955
|
+
def {act_name}(*args, **kwargs):
|
1956
|
+
{script_str}
|
1957
|
+
return {main_func_name}(*args, **kwargs)
|
1958
|
+
"""
|
1959
|
+
).format(
|
1960
|
+
act_name=act_name,
|
1961
|
+
script_str=indent(script_str, tab_indent),
|
1962
|
+
main_func_name=main_func_name,
|
1963
|
+
)
|
1964
|
+
)
|
1965
|
+
|
1966
|
+
app_caps = self._app.package_name.upper()
|
1967
|
+
if self.resources.write_app_logs:
|
1968
|
+
sub_log_path = f'os.environ["{app_caps}_LOG_PATH"]'
|
1969
|
+
else:
|
1970
|
+
sub_log_path = '""'
|
1971
|
+
|
1972
|
+
py_imports = dedent(
|
1973
|
+
"""\
|
1974
|
+
import os
|
1975
|
+
from collections import defaultdict
|
1976
|
+
from pathlib import Path
|
1977
|
+
import traceback
|
1978
|
+
import time
|
1979
|
+
from typing import Dict
|
1980
|
+
|
1981
|
+
import {app_module} as app
|
1982
|
+
|
1983
|
+
from hpcflow.sdk.core.errors import UnsetParameterDataErrorBase
|
1984
|
+
|
1985
|
+
log_path = {log_path}
|
1986
|
+
wk_path = os.getenv("{app_caps}_WK_PATH")
|
1987
|
+
"""
|
1988
|
+
).format(
|
1989
|
+
app_module=self._app.module,
|
1990
|
+
app_caps=app_caps,
|
1991
|
+
log_path=sub_log_path,
|
1992
|
+
)
|
1993
|
+
|
1994
|
+
py_main_block_workflow_load = dedent(
|
1995
|
+
"""\
|
1996
|
+
app.load_config(
|
1997
|
+
log_file_path=log_path,
|
1998
|
+
config_dir=r"{cfg_dir}",
|
1999
|
+
config_key=r"{cfg_invoc_key}",
|
2000
|
+
)
|
2001
|
+
wk = app.Workflow(wk_path)
|
2002
|
+
"""
|
2003
|
+
).format(
|
2004
|
+
cfg_dir=self._app.config.config_directory,
|
2005
|
+
cfg_invoc_key=self._app.config.config_key,
|
2006
|
+
app_caps=app_caps,
|
2007
|
+
)
|
2008
|
+
|
2009
|
+
func_invoc_lines = dedent(
|
2010
|
+
"""\
|
2011
|
+
import pprint
|
2012
|
+
if not run.action.is_OFP and run.action.script_data_out_has_direct:
|
2013
|
+
outputs = func(**func_kwargs)
|
2014
|
+
elif run.action.is_OFP:
|
2015
|
+
out_name = run.action.output_file_parsers[0].output.typ
|
2016
|
+
outputs = {out_name: func(**func_kwargs)}
|
2017
|
+
else:
|
2018
|
+
outputs = {}
|
2019
|
+
func(**func_kwargs)
|
2020
|
+
"""
|
2021
|
+
)
|
2022
|
+
|
2023
|
+
script_funcs = "\n".join(script_funcs_lst)
|
2024
|
+
script_names_str = "[" + ", ".join(f"{i}" for i in script_names) + "]"
|
2025
|
+
main = dedent(
|
2026
|
+
"""\
|
2027
|
+
{py_imports}
|
2028
|
+
|
2029
|
+
sub_std_path = Path(os.environ["{app_caps}_SUB_STD_DIR"], f"js_{js_idx}.txt")
|
2030
|
+
with app.redirect_std_to_file(sub_std_path):
|
2031
|
+
{py_main_block_workflow_load}
|
2032
|
+
|
2033
|
+
with open(os.environ["{app_caps}_RUN_ID_FILE"], mode="r") as fp:
|
2034
|
+
lns = fp.read().strip().split("\\n")
|
2035
|
+
run_IDs = [[int(i) for i in ln.split("{run_ID_delim}")] for ln in lns]
|
2036
|
+
|
2037
|
+
get_all_runs_tic = time.perf_counter()
|
2038
|
+
run_IDs_flat = [j for i in run_IDs for j in i]
|
2039
|
+
runs = wk.get_EARs_from_IDs(run_IDs_flat, as_dict=True)
|
2040
|
+
run_skips : Dict[int, bool] = {{k: v.skip for k, v in runs.items()}}
|
2041
|
+
get_all_runs_toc = time.perf_counter()
|
2042
|
+
|
2043
|
+
with open(os.environ["{app_caps}_SCRIPT_INDICES_FILE"], mode="r") as fp:
|
2044
|
+
lns = fp.read().strip().split("\\n")
|
2045
|
+
section_idx = -1
|
2046
|
+
script_indices = []
|
2047
|
+
for ln in lns:
|
2048
|
+
if ln.startswith("#"):
|
2049
|
+
section_idx += 1
|
2050
|
+
continue
|
2051
|
+
ln_parsed = [int(i) for i in ln.split("{script_idx_delim}")]
|
2052
|
+
if section_idx == 0:
|
2053
|
+
num_elements = ln_parsed
|
2054
|
+
elif section_idx == 1:
|
2055
|
+
num_actions = ln_parsed
|
2056
|
+
else:
|
2057
|
+
script_indices.append(ln_parsed)
|
2058
|
+
|
2059
|
+
port = int(os.environ["{app_caps}_RUN_PORT"])
|
2060
|
+
action_scripts = {script_names}
|
2061
|
+
requires_dir = {requires_dir!r}
|
2062
|
+
run_dirs = wk.get_run_directories()
|
2063
|
+
|
2064
|
+
get_ins_time_fp = open(f"js_{js_idx}_get_inputs_times.txt", "wt")
|
2065
|
+
func_time_fp = open(f"js_{js_idx}_func_times.txt", "wt")
|
2066
|
+
run_time_fp = open(f"js_{js_idx}_run_times.txt", "wt")
|
2067
|
+
set_start_multi_times_fp = open(f"js_{js_idx}_set_start_multi_times.txt", "wt")
|
2068
|
+
set_end_multi_times_fp = open(f"js_{js_idx}_set_end_multi_times.txt", "wt")
|
2069
|
+
save_multi_times_fp = open(f"js_{js_idx}_save_multi_times.txt", "wt")
|
2070
|
+
loop_term_times_fp = open(f"js_{js_idx}_loop_term_times.txt", "wt")
|
2071
|
+
|
2072
|
+
get_all_runs_time = get_all_runs_toc - get_all_runs_tic
|
2073
|
+
print(f"get_all_runs_time: {{get_all_runs_time:.4f}}")
|
2074
|
+
|
2075
|
+
app.logger.info(
|
2076
|
+
f"running {num_blocks} jobscript block(s) in combined jobscript index "
|
2077
|
+
f"{js_idx}."
|
2078
|
+
)
|
2079
|
+
|
2080
|
+
block_start_elem_idx = 0
|
2081
|
+
for block_idx in range({num_blocks}):
|
2082
|
+
|
2083
|
+
app.logger.info(f"running block index {{block_idx}}.")
|
2084
|
+
|
2085
|
+
os.environ["{app_caps}_BLOCK_IDX"] = str(block_idx)
|
2086
|
+
|
2087
|
+
block_run_IDs = [
|
2088
|
+
run_IDs[block_start_elem_idx + i]
|
2089
|
+
for i in range(num_elements[block_idx])
|
2090
|
+
]
|
2091
|
+
|
2092
|
+
for block_act_idx in range(num_actions[block_idx]):
|
2093
|
+
|
2094
|
+
app.logger.info(
|
2095
|
+
f"running block action index {{block_act_idx}} "
|
2096
|
+
f"(in block {{block_idx}})."
|
2097
|
+
)
|
2098
|
+
|
2099
|
+
os.environ["{app_caps}_BLOCK_ACT_IDX"] = str(block_act_idx)
|
2100
|
+
|
2101
|
+
block_act_run_IDs = [i[block_act_idx] for i in block_run_IDs]
|
2102
|
+
|
2103
|
+
block_act_std_path = Path(
|
2104
|
+
os.environ["{app_caps}_SUB_STD_DIR"],
|
2105
|
+
f"js_{js_idx}_blk_{{block_idx}}_blk_act_{{block_act_idx}}.txt",
|
2106
|
+
)
|
2107
|
+
with app.redirect_std_to_file(block_act_std_path):
|
2108
|
+
# set run starts for all runs of the block/action:
|
2109
|
+
block_act_run_dirs = [run_dirs[i] for i in block_act_run_IDs]
|
2110
|
+
block_act_runs = [runs[i] for i in block_act_run_IDs]
|
2111
|
+
|
2112
|
+
block_act_run_IDs_non_skipped = []
|
2113
|
+
block_act_run_dirs_non_skipped = []
|
2114
|
+
for i, j in zip(block_act_run_IDs, block_act_run_dirs):
|
2115
|
+
if not run_skips[i]:
|
2116
|
+
block_act_run_IDs_non_skipped.append(i)
|
2117
|
+
block_act_run_dirs_non_skipped.append(j)
|
2118
|
+
|
2119
|
+
if block_act_run_IDs_non_skipped:
|
2120
|
+
set_start_multi_tic = time.perf_counter()
|
2121
|
+
app.logger.info("setting run starts.")
|
2122
|
+
wk.set_multi_run_starts(block_act_run_IDs_non_skipped, block_act_run_dirs_non_skipped, port)
|
2123
|
+
app.logger.info("finished setting run starts.")
|
2124
|
+
set_start_multi_toc = time.perf_counter()
|
2125
|
+
set_start_multi_time = set_start_multi_toc - set_start_multi_tic
|
2126
|
+
print(f"{{set_start_multi_time:.4f}}", file=set_start_multi_times_fp, flush=True)
|
2127
|
+
|
2128
|
+
all_act_outputs = {{}}
|
2129
|
+
run_end_dat = defaultdict(list)
|
2130
|
+
block_act_key=({js_idx}, block_idx, block_act_idx)
|
2131
|
+
|
2132
|
+
for block_elem_idx in range(num_elements[block_idx]):
|
2133
|
+
|
2134
|
+
js_elem_idx = block_start_elem_idx + block_elem_idx
|
2135
|
+
run_ID = block_act_run_IDs[block_elem_idx]
|
2136
|
+
|
2137
|
+
app.logger.info(
|
2138
|
+
f"run_ID is {{run_ID}}; block element index: {{block_elem_idx}}; "
|
2139
|
+
f"block action index: {{block_act_idx}}; in block {{block_idx}}."
|
2140
|
+
)
|
2141
|
+
|
2142
|
+
if run_ID == -1:
|
2143
|
+
continue
|
2144
|
+
|
2145
|
+
run = runs[run_ID]
|
2146
|
+
|
2147
|
+
skip = run_skips[run_ID]
|
2148
|
+
if skip:
|
2149
|
+
app.logger.info(f"run_ID: {{run_ID}}; run is set to skip; skipping.")
|
2150
|
+
# set run end
|
2151
|
+
run_end_dat[block_act_key].append((run, {skipped_exit_code}, None))
|
2152
|
+
continue
|
2153
|
+
|
2154
|
+
run_tic = time.perf_counter()
|
2155
|
+
|
2156
|
+
os.environ["{app_caps}_BLOCK_ELEM_IDX"] = str(block_elem_idx)
|
2157
|
+
os.environ["{app_caps}_JS_ELEM_IDX"] = str(js_elem_idx)
|
2158
|
+
os.environ["{app_caps}_RUN_ID"] = str(run_ID)
|
2159
|
+
|
2160
|
+
std_path = Path(os.environ["{app_caps}_SUB_STD_DIR"], f"{{run_ID}}.txt")
|
2161
|
+
with app.redirect_std_to_file(std_path):
|
2162
|
+
|
2163
|
+
if {write_app_logs!r}:
|
2164
|
+
new_log_path = Path(
|
2165
|
+
os.environ["{app_caps}_SUB_LOG_DIR"],
|
2166
|
+
f"{run_log_name}",
|
2167
|
+
)
|
2168
|
+
# TODO: this doesn't work!
|
2169
|
+
app.logger.info(
|
2170
|
+
f"run_ID: {{run_ID}}; moving log path to {{new_log_path}}"
|
2171
|
+
)
|
2172
|
+
app.config.log_path = new_log_path
|
2173
|
+
|
2174
|
+
run_dir = run_dirs[run_ID]
|
2175
|
+
|
2176
|
+
script_idx = script_indices[block_idx][block_act_idx]
|
2177
|
+
req_dir = requires_dir[script_idx]
|
2178
|
+
if req_dir:
|
2179
|
+
app.logger.info(f"run_ID: {{run_ID}}; changing to run directory: {{run_dir}}")
|
2180
|
+
os.chdir(run_dir)
|
2181
|
+
|
2182
|
+
# retrieve script inputs:
|
2183
|
+
app.logger.info(f"run_ID: {{run_ID}}; retrieving script inputs.")
|
2184
|
+
get_ins_tic = time.perf_counter()
|
2185
|
+
try:
|
2186
|
+
with run.raise_on_failure_threshold() as unset_params:
|
2187
|
+
app.logger.info(f"run_ID: {{run_ID}}; writing script input files.")
|
2188
|
+
run.write_script_input_files(block_act_key)
|
2189
|
+
|
2190
|
+
app.logger.info(f"run_ID: {{run_ID}}; retrieving funcion kwargs.")
|
2191
|
+
func_kwargs = run.get_py_script_func_kwargs(
|
2192
|
+
raise_on_unset=False,
|
2193
|
+
add_script_files=True,
|
2194
|
+
blk_act_key=block_act_key,
|
2195
|
+
)
|
2196
|
+
app.logger.info(
|
2197
|
+
f"run_ID: {{run_ID}}; script inputs have keys: "
|
2198
|
+
f"{{tuple(func_kwargs.keys())!r}}."
|
2199
|
+
)
|
2200
|
+
except UnsetParameterDataErrorBase:
|
2201
|
+
# not all required parameter data is set, so fail this run:
|
2202
|
+
exit_code = 1
|
2203
|
+
run_end_dat[block_act_key].append((run, exit_code, None))
|
2204
|
+
app.logger.info(
|
2205
|
+
f"run_ID: {{run_ID}}; some parameter data is unset, "
|
2206
|
+
f"so cannot run; setting exit code to 1."
|
2207
|
+
)
|
2208
|
+
continue # don't run the function
|
2209
|
+
|
2210
|
+
get_ins_toc = time.perf_counter()
|
2211
|
+
|
2212
|
+
func = action_scripts[script_idx]
|
2213
|
+
app.logger.info(f"run_ID: {{run_ID}}; function to run is: {{func.__name__}}")
|
2214
|
+
|
2215
|
+
|
2216
|
+
try:
|
2217
|
+
func_tic = time.perf_counter()
|
2218
|
+
app.logger.info(f"run_ID: {{run_ID}}; invoking function.")
|
2219
|
+
{func_invoc_lines}
|
2220
|
+
|
2221
|
+
except Exception:
|
2222
|
+
print(f"Exception caught during execution of script function {{func.__name__}}.")
|
2223
|
+
traceback.print_exc()
|
2224
|
+
exit_code = 1
|
2225
|
+
outputs = {{}}
|
2226
|
+
else:
|
2227
|
+
app.logger.info(f"run_ID: {{run_ID}}; finished function invocation.")
|
2228
|
+
exit_code = 0
|
2229
|
+
finally:
|
2230
|
+
func_toc = time.perf_counter()
|
2231
|
+
|
2232
|
+
with app.redirect_std_to_file(std_path):
|
2233
|
+
# set run end
|
2234
|
+
block_act_key=({js_idx}, block_idx, block_act_idx)
|
2235
|
+
run_end_dat[block_act_key].append((run, exit_code, run_dir))
|
2236
|
+
|
2237
|
+
# store outputs to save at end:
|
2238
|
+
app.logger.info(f"run_ID: {{run_ID}}; setting outputs to save.")
|
2239
|
+
for name_i, out_i in outputs.items():
|
2240
|
+
p_id = run.data_idx[f"outputs.{{name_i}}"]
|
2241
|
+
all_act_outputs[p_id] = out_i
|
2242
|
+
app.logger.info(f"run_ID: {{run_ID}}; finished setting outputs to save.")
|
2243
|
+
|
2244
|
+
if req_dir:
|
2245
|
+
app.logger.info(f"run_ID: {{run_ID}}; changing directory back")
|
2246
|
+
os.chdir(os.environ["{app_caps}_SUB_TMP_DIR"])
|
2247
|
+
|
2248
|
+
if {write_app_logs!r}:
|
2249
|
+
app.logger.info(f"run_ID: {{run_ID}}; moving log path back to " + {sub_log_path!r})
|
2250
|
+
app.config.log_path = {sub_log_path}
|
2251
|
+
|
2252
|
+
run_toc = time.perf_counter()
|
2253
|
+
|
2254
|
+
get_ins_time = get_ins_toc - get_ins_tic
|
2255
|
+
func_time = func_toc - func_tic
|
2256
|
+
run_time = run_toc - run_tic
|
2257
|
+
|
2258
|
+
print(f"{{get_ins_time:.4f}}", file=get_ins_time_fp)
|
2259
|
+
print(f"{{func_time:.4f}}", file=func_time_fp)
|
2260
|
+
print(f"{{run_time:.4f}}", file=run_time_fp)
|
2261
|
+
|
2262
|
+
with app.redirect_std_to_file(block_act_std_path):
|
2263
|
+
|
2264
|
+
if all_act_outputs:
|
2265
|
+
# save outputs of all elements of this action
|
2266
|
+
save_all_tic = time.perf_counter()
|
2267
|
+
app.logger.info(
|
2268
|
+
f"saving outputs of block action index {{block_act_idx}} "
|
2269
|
+
f"in block {{block_idx}}."
|
2270
|
+
)
|
2271
|
+
wk.set_parameter_values(all_act_outputs)
|
2272
|
+
app.logger.info(
|
2273
|
+
f"finished saving outputs of block action index {{block_act_idx}} "
|
2274
|
+
f"in block {{block_idx}}."
|
2275
|
+
)
|
2276
|
+
save_all_toc = time.perf_counter()
|
2277
|
+
save_all_time_i = save_all_toc - save_all_tic
|
2278
|
+
print(f"{{save_all_time_i:.4f}}", file=save_multi_times_fp, flush=True)
|
2279
|
+
|
2280
|
+
all_loop_term_tic = time.perf_counter()
|
2281
|
+
app.logger.info(f"run_ID: {{run_ID}}; checking for loop terminations")
|
2282
|
+
for run_i in block_act_runs:
|
2283
|
+
if not run_skips[run_i.id_]:
|
2284
|
+
skipped_IDs_i = wk._check_loop_termination(run_i)
|
2285
|
+
for skip_ID in skipped_IDs_i:
|
2286
|
+
run_skips[skip_ID] = 2 # SkipReason.LOOP_TERMINATION
|
2287
|
+
if skip_ID in runs:
|
2288
|
+
runs[skip_ID]._skip = 2 # mutates runs within `run_end_dat`
|
2289
|
+
app.logger.info(f"run_ID: {{run_ID}}; finished checking for loop terminations.")
|
2290
|
+
|
2291
|
+
all_loop_term_toc = time.perf_counter()
|
2292
|
+
all_loop_term_time_i = all_loop_term_toc - all_loop_term_tic
|
2293
|
+
print(f"{{all_loop_term_time_i:.4f}}", file=loop_term_times_fp, flush=True)
|
2294
|
+
|
2295
|
+
# set run end for all elements of this action
|
2296
|
+
app.logger.info(f"run_ID: {{run_ID}}; setting run ends.")
|
2297
|
+
set_multi_end_tic = time.perf_counter()
|
2298
|
+
wk.set_multi_run_ends(run_end_dat)
|
2299
|
+
set_multi_end_toc = time.perf_counter()
|
2300
|
+
set_multi_end_time = set_multi_end_toc - set_multi_end_tic
|
2301
|
+
app.logger.info(f"run_ID: {{run_ID}}; finished setting run ends.")
|
2302
|
+
print(f"{{set_multi_end_time:.4f}}", file=set_end_multi_times_fp, flush=True)
|
2303
|
+
|
2304
|
+
block_start_elem_idx += num_elements[block_idx]
|
2305
|
+
|
2306
|
+
get_ins_time_fp.close()
|
2307
|
+
func_time_fp.close()
|
2308
|
+
run_time_fp.close()
|
2309
|
+
set_start_multi_times_fp.close()
|
2310
|
+
set_end_multi_times_fp.close()
|
2311
|
+
save_multi_times_fp.close()
|
2312
|
+
loop_term_times_fp.close()
|
2313
|
+
"""
|
2314
|
+
).format(
|
2315
|
+
py_imports=py_imports,
|
2316
|
+
py_main_block_workflow_load=indent(py_main_block_workflow_load, tab_indent),
|
2317
|
+
app_caps=self._app.package_name.upper(),
|
2318
|
+
script_idx_delim=",", # TODO
|
2319
|
+
script_names=script_names_str,
|
2320
|
+
requires_dir=requires_dir,
|
2321
|
+
num_blocks=len(self.blocks),
|
2322
|
+
run_ID_delim=self._EAR_files_delimiter,
|
2323
|
+
run_log_name=self.submission.get_app_log_file_name(run_ID="{run_ID}"),
|
2324
|
+
js_idx=self.index,
|
2325
|
+
write_app_logs=self.resources.write_app_logs,
|
2326
|
+
sub_log_path=sub_log_path,
|
2327
|
+
skipped_exit_code=SKIPPED_EXIT_CODE,
|
2328
|
+
func_invoc_lines=indent(func_invoc_lines, tab_indent * 4),
|
1404
2329
|
)
|
1405
|
-
|
1406
|
-
|
2330
|
+
|
2331
|
+
script = dedent(
|
2332
|
+
"""\
|
2333
|
+
{script_funcs}
|
2334
|
+
if __name__ == "__main__":
|
2335
|
+
{main}
|
2336
|
+
"""
|
2337
|
+
).format(script_funcs=script_funcs, main=indent(main, tab_indent))
|
2338
|
+
|
2339
|
+
num_elems = [i.num_elements for i in self.blocks]
|
2340
|
+
num_acts = [len(i) for i in action_scripts]
|
2341
|
+
|
2342
|
+
return script, script_indices, num_elems, num_acts
|
2343
|
+
|
2344
|
+
def write_script_indices_file(
|
2345
|
+
self, indices: list[list[int]], num_elems: list[int], num_acts: list[int]
|
2346
|
+
) -> None:
|
2347
|
+
"""
|
2348
|
+
Write a text file containing the action script index for each block and action
|
2349
|
+
in a `combined_scripts` script.
|
2350
|
+
"""
|
2351
|
+
delim = "," # TODO: refactor?
|
2352
|
+
with self.combined_script_indices_file_path.open("wt") as fp:
|
2353
|
+
fp.write("# number of elements per block:\n")
|
2354
|
+
fp.write(delim.join(str(i) for i in num_elems) + "\n")
|
2355
|
+
fp.write("# number of actions per block:\n")
|
2356
|
+
fp.write(delim.join(str(i) for i in num_acts) + "\n")
|
2357
|
+
fp.write("# script indices:\n")
|
2358
|
+
for block in indices:
|
2359
|
+
fp.write(delim.join(str(i) for i in block) + "\n")
|
2360
|
+
|
2361
|
+
def get_app_std_path(self) -> Path:
|
2362
|
+
std_dir = self.submission.get_app_std_path(
|
2363
|
+
self.workflow.submissions_path,
|
2364
|
+
self.submission.index,
|
1407
2365
|
)
|
2366
|
+
return std_dir / f"js_{self.index}.txt" # TODO: refactor
|