hpcflow-new2 0.2.0a188__py3-none-any.whl → 0.2.0a190__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hpcflow/__pyinstaller/hook-hpcflow.py +8 -6
- hpcflow/_version.py +1 -1
- hpcflow/app.py +1 -0
- hpcflow/data/scripts/main_script_test_hdf5_in_obj.py +1 -1
- hpcflow/data/scripts/main_script_test_hdf5_out_obj.py +1 -1
- hpcflow/sdk/__init__.py +21 -15
- hpcflow/sdk/app.py +2133 -770
- hpcflow/sdk/cli.py +281 -250
- hpcflow/sdk/cli_common.py +6 -2
- hpcflow/sdk/config/__init__.py +1 -1
- hpcflow/sdk/config/callbacks.py +77 -42
- hpcflow/sdk/config/cli.py +126 -103
- hpcflow/sdk/config/config.py +578 -311
- hpcflow/sdk/config/config_file.py +131 -95
- hpcflow/sdk/config/errors.py +112 -85
- hpcflow/sdk/config/types.py +145 -0
- hpcflow/sdk/core/actions.py +1054 -994
- hpcflow/sdk/core/app_aware.py +24 -0
- hpcflow/sdk/core/cache.py +81 -63
- hpcflow/sdk/core/command_files.py +275 -185
- hpcflow/sdk/core/commands.py +111 -107
- hpcflow/sdk/core/element.py +724 -503
- hpcflow/sdk/core/enums.py +192 -0
- hpcflow/sdk/core/environment.py +74 -93
- hpcflow/sdk/core/errors.py +398 -51
- hpcflow/sdk/core/json_like.py +540 -272
- hpcflow/sdk/core/loop.py +380 -334
- hpcflow/sdk/core/loop_cache.py +160 -43
- hpcflow/sdk/core/object_list.py +370 -207
- hpcflow/sdk/core/parameters.py +728 -600
- hpcflow/sdk/core/rule.py +59 -41
- hpcflow/sdk/core/run_dir_files.py +33 -22
- hpcflow/sdk/core/task.py +1546 -1325
- hpcflow/sdk/core/task_schema.py +240 -196
- hpcflow/sdk/core/test_utils.py +126 -88
- hpcflow/sdk/core/types.py +387 -0
- hpcflow/sdk/core/utils.py +410 -305
- hpcflow/sdk/core/validation.py +82 -9
- hpcflow/sdk/core/workflow.py +1192 -1028
- hpcflow/sdk/core/zarr_io.py +98 -137
- hpcflow/sdk/demo/cli.py +46 -33
- hpcflow/sdk/helper/cli.py +18 -16
- hpcflow/sdk/helper/helper.py +75 -63
- hpcflow/sdk/helper/watcher.py +61 -28
- hpcflow/sdk/log.py +83 -59
- hpcflow/sdk/persistence/__init__.py +8 -31
- hpcflow/sdk/persistence/base.py +988 -586
- hpcflow/sdk/persistence/defaults.py +6 -0
- hpcflow/sdk/persistence/discovery.py +38 -0
- hpcflow/sdk/persistence/json.py +408 -153
- hpcflow/sdk/persistence/pending.py +158 -123
- hpcflow/sdk/persistence/store_resource.py +37 -22
- hpcflow/sdk/persistence/types.py +307 -0
- hpcflow/sdk/persistence/utils.py +14 -11
- hpcflow/sdk/persistence/zarr.py +477 -420
- hpcflow/sdk/runtime.py +44 -41
- hpcflow/sdk/submission/{jobscript_info.py → enums.py} +39 -12
- hpcflow/sdk/submission/jobscript.py +444 -404
- hpcflow/sdk/submission/schedulers/__init__.py +133 -40
- hpcflow/sdk/submission/schedulers/direct.py +97 -71
- hpcflow/sdk/submission/schedulers/sge.py +132 -126
- hpcflow/sdk/submission/schedulers/slurm.py +263 -268
- hpcflow/sdk/submission/schedulers/utils.py +7 -2
- hpcflow/sdk/submission/shells/__init__.py +14 -15
- hpcflow/sdk/submission/shells/base.py +102 -29
- hpcflow/sdk/submission/shells/bash.py +72 -55
- hpcflow/sdk/submission/shells/os_version.py +31 -30
- hpcflow/sdk/submission/shells/powershell.py +37 -29
- hpcflow/sdk/submission/submission.py +203 -257
- hpcflow/sdk/submission/types.py +143 -0
- hpcflow/sdk/typing.py +163 -12
- hpcflow/tests/conftest.py +8 -6
- hpcflow/tests/schedulers/slurm/test_slurm_submission.py +5 -2
- hpcflow/tests/scripts/test_main_scripts.py +60 -30
- hpcflow/tests/shells/wsl/test_wsl_submission.py +6 -4
- hpcflow/tests/unit/test_action.py +86 -75
- hpcflow/tests/unit/test_action_rule.py +9 -4
- hpcflow/tests/unit/test_app.py +13 -6
- hpcflow/tests/unit/test_cli.py +1 -1
- hpcflow/tests/unit/test_command.py +71 -54
- hpcflow/tests/unit/test_config.py +20 -15
- hpcflow/tests/unit/test_config_file.py +21 -18
- hpcflow/tests/unit/test_element.py +58 -62
- hpcflow/tests/unit/test_element_iteration.py +3 -1
- hpcflow/tests/unit/test_element_set.py +29 -19
- hpcflow/tests/unit/test_group.py +4 -2
- hpcflow/tests/unit/test_input_source.py +116 -93
- hpcflow/tests/unit/test_input_value.py +29 -24
- hpcflow/tests/unit/test_json_like.py +44 -35
- hpcflow/tests/unit/test_loop.py +65 -58
- hpcflow/tests/unit/test_object_list.py +17 -12
- hpcflow/tests/unit/test_parameter.py +16 -7
- hpcflow/tests/unit/test_persistence.py +48 -35
- hpcflow/tests/unit/test_resources.py +20 -18
- hpcflow/tests/unit/test_run.py +8 -3
- hpcflow/tests/unit/test_runtime.py +2 -1
- hpcflow/tests/unit/test_schema_input.py +23 -15
- hpcflow/tests/unit/test_shell.py +3 -2
- hpcflow/tests/unit/test_slurm.py +8 -7
- hpcflow/tests/unit/test_submission.py +39 -19
- hpcflow/tests/unit/test_task.py +352 -247
- hpcflow/tests/unit/test_task_schema.py +33 -20
- hpcflow/tests/unit/test_utils.py +9 -11
- hpcflow/tests/unit/test_value_sequence.py +15 -12
- hpcflow/tests/unit/test_workflow.py +114 -83
- hpcflow/tests/unit/test_workflow_template.py +0 -1
- hpcflow/tests/workflows/test_jobscript.py +2 -1
- hpcflow/tests/workflows/test_workflows.py +18 -13
- {hpcflow_new2-0.2.0a188.dist-info → hpcflow_new2-0.2.0a190.dist-info}/METADATA +2 -1
- hpcflow_new2-0.2.0a190.dist-info/RECORD +165 -0
- hpcflow/sdk/core/parallel.py +0 -21
- hpcflow_new2-0.2.0a188.dist-info/RECORD +0 -158
- {hpcflow_new2-0.2.0a188.dist-info → hpcflow_new2-0.2.0a190.dist-info}/LICENSE +0 -0
- {hpcflow_new2-0.2.0a188.dist-info → hpcflow_new2-0.2.0a190.dist-info}/WHEEL +0 -0
- {hpcflow_new2-0.2.0a188.dist-info → hpcflow_new2-0.2.0a190.dist-info}/entry_points.txt +0 -0
@@ -4,16 +4,12 @@ A collection of submissions to a scheduler, generated from a workflow.
|
|
4
4
|
|
5
5
|
from __future__ import annotations
|
6
6
|
from collections import defaultdict
|
7
|
-
|
8
|
-
from datetime import datetime, timedelta, timezone
|
9
|
-
import enum
|
10
7
|
import os
|
11
8
|
from pathlib import Path
|
12
|
-
from
|
13
|
-
from
|
9
|
+
from typing import Any, overload, TYPE_CHECKING
|
10
|
+
from typing_extensions import override
|
14
11
|
|
15
|
-
from hpcflow.sdk import
|
16
|
-
from hpcflow.sdk.core.element import ElementResources
|
12
|
+
from hpcflow.sdk.typing import hydrate
|
17
13
|
from hpcflow.sdk.core.errors import (
|
18
14
|
JobscriptSubmissionFailure,
|
19
15
|
MissingEnvironmentError,
|
@@ -24,44 +20,27 @@ from hpcflow.sdk.core.errors import (
|
|
24
20
|
)
|
25
21
|
from hpcflow.sdk.core.json_like import ChildObjectSpec, JSONLike
|
26
22
|
from hpcflow.sdk.core.object_list import ObjectListMultipleMatchError
|
23
|
+
from hpcflow.sdk.core.utils import parse_timestamp, current_timestamp
|
24
|
+
from hpcflow.sdk.submission.enums import SubmissionStatus
|
27
25
|
from hpcflow.sdk.log import TimeIt
|
28
26
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
days, other = td_str.split("-")
|
47
|
-
days = int(days)
|
48
|
-
hours, mins, secs = [int(i) for i in other.split(":")]
|
49
|
-
return timedelta(days=days, hours=hours, minutes=mins, seconds=secs)
|
50
|
-
|
51
|
-
|
52
|
-
class SubmissionStatus(enum.Enum):
|
53
|
-
"""
|
54
|
-
The overall status of a submission.
|
55
|
-
"""
|
56
|
-
|
57
|
-
#: Not yet submitted.
|
58
|
-
PENDING = 0
|
59
|
-
#: All jobscripts submitted successfully.
|
60
|
-
SUBMITTED = 1
|
61
|
-
#: Some jobscripts submitted successfully.
|
62
|
-
PARTIALLY_SUBMITTED = 2
|
63
|
-
|
64
|
-
|
27
|
+
if TYPE_CHECKING:
|
28
|
+
from collections.abc import Iterable, Mapping, Sequence
|
29
|
+
from datetime import datetime
|
30
|
+
from typing import ClassVar, Literal
|
31
|
+
from rich.status import Status
|
32
|
+
from .jobscript import Jobscript
|
33
|
+
from .enums import JobscriptElementState
|
34
|
+
from .schedulers import Scheduler
|
35
|
+
from .shells import Shell
|
36
|
+
from .types import SubmissionPart
|
37
|
+
from ..core.element import ElementActionRun
|
38
|
+
from ..core.environment import Environment
|
39
|
+
from ..core.object_list import EnvironmentsList
|
40
|
+
from ..core.workflow import Workflow
|
41
|
+
|
42
|
+
|
43
|
+
@hydrate
|
65
44
|
class Submission(JSONLike):
|
66
45
|
"""
|
67
46
|
A collection of jobscripts to be submitted to a scheduler.
|
@@ -82,7 +61,7 @@ class Submission(JSONLike):
|
|
82
61
|
The execution environments to use.
|
83
62
|
"""
|
84
63
|
|
85
|
-
_child_objects = (
|
64
|
+
_child_objects: ClassVar[tuple[ChildObjectSpec, ...]] = (
|
86
65
|
ChildObjectSpec(
|
87
66
|
name="jobscripts",
|
88
67
|
class_name="Jobscript",
|
@@ -98,11 +77,11 @@ class Submission(JSONLike):
|
|
98
77
|
def __init__(
|
99
78
|
self,
|
100
79
|
index: int,
|
101
|
-
jobscripts:
|
102
|
-
workflow:
|
103
|
-
submission_parts:
|
104
|
-
JS_parallelism:
|
105
|
-
environments:
|
80
|
+
jobscripts: list[Jobscript],
|
81
|
+
workflow: Workflow | None = None,
|
82
|
+
submission_parts: dict[str, list[int]] | None = None,
|
83
|
+
JS_parallelism: bool | None = None,
|
84
|
+
environments: EnvironmentsList | None = None,
|
106
85
|
):
|
107
86
|
self._index = index
|
108
87
|
self._jobscripts = jobscripts
|
@@ -110,7 +89,9 @@ class Submission(JSONLike):
|
|
110
89
|
self._JS_parallelism = JS_parallelism
|
111
90
|
self._environments = environments
|
112
91
|
|
113
|
-
self._submission_parts_lst
|
92
|
+
self._submission_parts_lst: list[
|
93
|
+
SubmissionPart
|
94
|
+
] | None = None # assigned on first access
|
114
95
|
|
115
96
|
if workflow:
|
116
97
|
#: The workflow this is part of.
|
@@ -122,37 +103,33 @@ class Submission(JSONLike):
|
|
122
103
|
js._index = js_idx
|
123
104
|
|
124
105
|
@TimeIt.decorator
|
125
|
-
def _set_environments(self):
|
126
|
-
filterable = ElementResources.get_env_instance_filterable_attributes()
|
106
|
+
def _set_environments(self) -> None:
|
107
|
+
filterable = self._app.ElementResources.get_env_instance_filterable_attributes()
|
127
108
|
|
128
109
|
# map required environments and executable labels to job script indices:
|
129
|
-
req_envs
|
110
|
+
req_envs: dict[
|
111
|
+
tuple[tuple[str, ...], tuple[Any, ...]], dict[str, set[int]]
|
112
|
+
] = defaultdict(lambda: defaultdict(set))
|
130
113
|
for js_idx, js_i in enumerate(self.jobscripts):
|
131
114
|
for run in js_i.all_EARs:
|
132
|
-
|
115
|
+
# Alas, mypy can't typecheck the next line if the type is right!
|
116
|
+
# So we use Any to get it to shut up...
|
117
|
+
env_spec_h: Any = tuple(zip(*run.env_spec.items())) # hashable
|
133
118
|
for exec_label_j in run.action.get_required_executables():
|
134
119
|
req_envs[env_spec_h][exec_label_j].add(js_idx)
|
135
|
-
|
136
|
-
|
120
|
+
# Ensure overall element is present
|
121
|
+
req_envs[env_spec_h]
|
137
122
|
|
138
123
|
# check these envs/execs exist in app data:
|
139
|
-
envs = []
|
124
|
+
envs: list[Environment] = []
|
140
125
|
for env_spec_h, exec_js in req_envs.items():
|
141
126
|
env_spec = dict(zip(*env_spec_h))
|
142
|
-
non_name_spec = {k: v for k, v in env_spec.items() if k != "name"}
|
143
|
-
spec_str = f" with specifiers {non_name_spec!r}" if non_name_spec else ""
|
144
|
-
env_ref = f"{env_spec['name']!r}{spec_str}"
|
145
127
|
try:
|
146
|
-
env_i = self.
|
128
|
+
env_i = self._app.envs.get(**env_spec)
|
147
129
|
except ObjectListMultipleMatchError:
|
148
|
-
raise MultipleEnvironmentsError(
|
149
|
-
f"Multiple environments {env_ref} are defined on this machine."
|
150
|
-
)
|
130
|
+
raise MultipleEnvironmentsError(env_spec)
|
151
131
|
except ValueError:
|
152
|
-
raise MissingEnvironmentError(
|
153
|
-
f"The environment {env_ref} is not defined on this machine, so the "
|
154
|
-
f"submission cannot be created."
|
155
|
-
) from None
|
132
|
+
raise MissingEnvironmentError(env_spec) from None
|
156
133
|
else:
|
157
134
|
if env_i not in envs:
|
158
135
|
envs.append(env_i)
|
@@ -162,34 +139,28 @@ class Submission(JSONLike):
|
|
162
139
|
exec_i = env_i.executables.get(exec_i_lab)
|
163
140
|
except ValueError:
|
164
141
|
raise MissingEnvironmentExecutableError(
|
165
|
-
|
166
|
-
f"executable labelled {exec_i_lab!r}, which is required for this "
|
167
|
-
f"submission, so the submission cannot be created."
|
142
|
+
env_spec, exec_i_lab
|
168
143
|
) from None
|
169
144
|
|
170
145
|
# check matching executable instances exist:
|
171
146
|
for js_idx_j in js_idx_set:
|
172
|
-
|
173
|
-
filter_exec = {j: getattr(
|
174
|
-
|
175
|
-
if not exec_instances:
|
147
|
+
js_res = self.jobscripts[js_idx_j].resources
|
148
|
+
filter_exec = {j: getattr(js_res, j) for j in filterable}
|
149
|
+
if not exec_i.filter_instances(**filter_exec):
|
176
150
|
raise MissingEnvironmentExecutableInstanceError(
|
177
|
-
|
178
|
-
f"{exec_i_lab!r} of environment {env_ref} for jobscript "
|
179
|
-
f"index {js_idx_j!r} with requested resources "
|
180
|
-
f"{filter_exec!r}."
|
151
|
+
env_spec, exec_i_lab, js_idx_j, filter_exec
|
181
152
|
)
|
182
153
|
|
183
154
|
# save env definitions to the environments attribute:
|
184
|
-
self._environments = self.
|
155
|
+
self._environments = self._app.EnvironmentsList(envs)
|
185
156
|
|
186
|
-
|
187
|
-
|
157
|
+
@override
|
158
|
+
def _postprocess_to_dict(self, d: dict[str, Any]) -> dict[str, Any]:
|
159
|
+
dct = super()._postprocess_to_dict(d)
|
188
160
|
del dct["_workflow"]
|
189
161
|
del dct["_index"]
|
190
162
|
del dct["_submission_parts_lst"]
|
191
|
-
|
192
|
-
return dct
|
163
|
+
return {k.lstrip("_"): v for k, v in dct.items()}
|
193
164
|
|
194
165
|
@property
|
195
166
|
def index(self) -> int:
|
@@ -199,14 +170,15 @@ class Submission(JSONLike):
|
|
199
170
|
return self._index
|
200
171
|
|
201
172
|
@property
|
202
|
-
def environments(self) ->
|
173
|
+
def environments(self) -> EnvironmentsList:
|
203
174
|
"""
|
204
175
|
The execution environments to use.
|
205
176
|
"""
|
177
|
+
assert self._environments
|
206
178
|
return self._environments
|
207
179
|
|
208
180
|
@property
|
209
|
-
def submission_parts(self) ->
|
181
|
+
def submission_parts(self) -> list[SubmissionPart]:
|
210
182
|
"""
|
211
183
|
Description of the parts of this submission.
|
212
184
|
"""
|
@@ -216,9 +188,7 @@ class Submission(JSONLike):
|
|
216
188
|
if self._submission_parts_lst is None:
|
217
189
|
self._submission_parts_lst = [
|
218
190
|
{
|
219
|
-
"submit_time":
|
220
|
-
.replace(tzinfo=timezone.utc)
|
221
|
-
.astimezone(),
|
191
|
+
"submit_time": parse_timestamp(dt, self.workflow.ts_fmt),
|
222
192
|
"jobscripts": js_idx,
|
223
193
|
}
|
224
194
|
for dt, js_idx in self._submission_parts.items()
|
@@ -226,116 +196,89 @@ class Submission(JSONLike):
|
|
226
196
|
return self._submission_parts_lst
|
227
197
|
|
228
198
|
@TimeIt.decorator
|
229
|
-
def get_start_time(self, submit_time: str) ->
|
199
|
+
def get_start_time(self, submit_time: str) -> datetime | None:
|
230
200
|
"""Get the start time of a given submission part."""
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
if start_time:
|
236
|
-
all_part_starts.append(start_time)
|
237
|
-
if all_part_starts:
|
238
|
-
return min(all_part_starts)
|
239
|
-
else:
|
240
|
-
return None
|
201
|
+
times = (
|
202
|
+
self.jobscripts[i].start_time for i in self._submission_parts[submit_time]
|
203
|
+
)
|
204
|
+
return min((t for t in times if t is not None), default=None)
|
241
205
|
|
242
206
|
@TimeIt.decorator
|
243
|
-
def get_end_time(self, submit_time: str) ->
|
207
|
+
def get_end_time(self, submit_time: str) -> datetime | None:
|
244
208
|
"""Get the end time of a given submission part."""
|
245
|
-
|
246
|
-
|
247
|
-
for i in js_idx:
|
248
|
-
end_time = self.jobscripts[i].end_time
|
249
|
-
if end_time:
|
250
|
-
all_part_ends.append(end_time)
|
251
|
-
if all_part_ends:
|
252
|
-
return max(all_part_ends)
|
253
|
-
else:
|
254
|
-
return None
|
209
|
+
times = (self.jobscripts[i].end_time for i in self._submission_parts[submit_time])
|
210
|
+
return max((t for t in times if t is not None), default=None)
|
255
211
|
|
256
212
|
@property
|
257
213
|
@TimeIt.decorator
|
258
|
-
def start_time(self):
|
214
|
+
def start_time(self) -> datetime | None:
|
259
215
|
"""Get the first non-None start time over all submission parts."""
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
all_start_times.append(start_i)
|
265
|
-
if all_start_times:
|
266
|
-
return max(all_start_times)
|
267
|
-
else:
|
268
|
-
return None
|
216
|
+
times = (
|
217
|
+
self.get_start_time(submit_time) for submit_time in self._submission_parts
|
218
|
+
)
|
219
|
+
return min((t for t in times if t is not None), default=None)
|
269
220
|
|
270
221
|
@property
|
271
222
|
@TimeIt.decorator
|
272
|
-
def end_time(self):
|
223
|
+
def end_time(self) -> datetime | None:
|
273
224
|
"""Get the final non-None end time over all submission parts."""
|
274
|
-
|
275
|
-
for
|
276
|
-
end_i = self.get_end_time(submit_time)
|
277
|
-
if end_i:
|
278
|
-
all_end_times.append(end_i)
|
279
|
-
if all_end_times:
|
280
|
-
return max(all_end_times)
|
281
|
-
else:
|
282
|
-
return None
|
225
|
+
times = (self.get_end_time(submit_time) for submit_time in self._submission_parts)
|
226
|
+
return max((t for t in times if t is not None), default=None)
|
283
227
|
|
284
228
|
@property
|
285
|
-
def jobscripts(self) ->
|
229
|
+
def jobscripts(self) -> list[Jobscript]:
|
286
230
|
"""
|
287
231
|
The jobscripts in this submission.
|
288
232
|
"""
|
289
233
|
return self._jobscripts
|
290
234
|
|
291
235
|
@property
|
292
|
-
def JS_parallelism(self):
|
236
|
+
def JS_parallelism(self) -> bool | None:
|
293
237
|
"""
|
294
238
|
Whether to exploit jobscript parallelism.
|
295
239
|
"""
|
296
240
|
return self._JS_parallelism
|
297
241
|
|
298
242
|
@property
|
299
|
-
def workflow(self) ->
|
243
|
+
def workflow(self) -> Workflow:
|
300
244
|
"""
|
301
245
|
The workflow this is part of.
|
302
246
|
"""
|
303
247
|
return self._workflow
|
304
248
|
|
305
249
|
@workflow.setter
|
306
|
-
def workflow(self, wk):
|
250
|
+
def workflow(self, wk: Workflow):
|
307
251
|
self._workflow = wk
|
308
252
|
|
309
253
|
@property
|
310
|
-
def jobscript_indices(self) ->
|
254
|
+
def jobscript_indices(self) -> tuple[int, ...]:
|
311
255
|
"""All associated jobscript indices."""
|
312
|
-
return tuple(
|
256
|
+
return tuple(js.index for js in self.jobscripts)
|
313
257
|
|
314
258
|
@property
|
315
|
-
def submitted_jobscripts(self) ->
|
259
|
+
def submitted_jobscripts(self) -> tuple[int, ...]:
|
316
260
|
"""Jobscript indices that have been successfully submitted."""
|
317
|
-
return tuple(j for
|
261
|
+
return tuple(j for sp in self.submission_parts for j in sp["jobscripts"])
|
318
262
|
|
319
263
|
@property
|
320
|
-
def outstanding_jobscripts(self) ->
|
264
|
+
def outstanding_jobscripts(self) -> tuple[int, ...]:
|
321
265
|
"""Jobscript indices that have not yet been successfully submitted."""
|
322
|
-
return tuple(set(self.jobscript_indices)
|
266
|
+
return tuple(set(self.jobscript_indices).difference(self.submitted_jobscripts))
|
323
267
|
|
324
268
|
@property
|
325
|
-
def status(self):
|
269
|
+
def status(self) -> SubmissionStatus:
|
326
270
|
"""
|
327
271
|
The status of this submission.
|
328
272
|
"""
|
329
273
|
if not self.submission_parts:
|
330
274
|
return SubmissionStatus.PENDING
|
275
|
+
elif set(self.submitted_jobscripts) == set(self.jobscript_indices):
|
276
|
+
return SubmissionStatus.SUBMITTED
|
331
277
|
else:
|
332
|
-
|
333
|
-
return SubmissionStatus.SUBMITTED
|
334
|
-
else:
|
335
|
-
return SubmissionStatus.PARTIALLY_SUBMITTED
|
278
|
+
return SubmissionStatus.PARTIALLY_SUBMITTED
|
336
279
|
|
337
280
|
@property
|
338
|
-
def needs_submit(self):
|
281
|
+
def needs_submit(self) -> bool:
|
339
282
|
"""
|
340
283
|
Whether this submission needs a submit to be done.
|
341
284
|
"""
|
@@ -345,71 +288,87 @@ class Submission(JSONLike):
|
|
345
288
|
)
|
346
289
|
|
347
290
|
@property
|
348
|
-
def path(self):
|
291
|
+
def path(self) -> Path:
|
349
292
|
"""
|
350
293
|
The path to files associated with this submission.
|
351
294
|
"""
|
352
295
|
return self.workflow.submissions_path / str(self.index)
|
353
296
|
|
354
297
|
@property
|
355
|
-
def all_EAR_IDs(self):
|
298
|
+
def all_EAR_IDs(self) -> Iterable[int]:
|
356
299
|
"""
|
357
300
|
The IDs of all EARs in this submission.
|
358
301
|
"""
|
359
|
-
return
|
302
|
+
return (i for js in self.jobscripts for i in js.all_EAR_IDs)
|
360
303
|
|
361
304
|
@property
|
362
|
-
def all_EARs(self):
|
305
|
+
def all_EARs(self) -> Iterable[ElementActionRun]:
|
363
306
|
"""
|
364
307
|
All EARs in this this submission.
|
365
308
|
"""
|
366
|
-
return
|
309
|
+
return (ear for js in self.jobscripts for ear in js.all_EARs)
|
367
310
|
|
368
311
|
@property
|
369
312
|
@TimeIt.decorator
|
370
|
-
def EARs_by_elements(self):
|
313
|
+
def EARs_by_elements(self) -> Mapping[int, Mapping[int, Sequence[ElementActionRun]]]:
|
371
314
|
"""
|
372
315
|
All EARs in this submission, grouped by element.
|
373
316
|
"""
|
374
|
-
task_elem_EARs = defaultdict(
|
375
|
-
|
376
|
-
|
317
|
+
task_elem_EARs: dict[int, dict[int, list[ElementActionRun]]] = defaultdict(
|
318
|
+
lambda: defaultdict(list)
|
319
|
+
)
|
320
|
+
for ear in self.all_EARs:
|
321
|
+
task_elem_EARs[ear.task.index][ear.element.index].append(ear)
|
377
322
|
return task_elem_EARs
|
378
323
|
|
379
324
|
@property
|
380
|
-
def abort_EARs_file_name(self):
|
325
|
+
def abort_EARs_file_name(self) -> str:
|
381
326
|
"""
|
382
327
|
The name of a file describing what EARs have aborted.
|
383
328
|
"""
|
384
|
-
return
|
329
|
+
return "abort_EARs.txt"
|
385
330
|
|
386
331
|
@property
|
387
|
-
def abort_EARs_file_path(self):
|
332
|
+
def abort_EARs_file_path(self) -> Path:
|
388
333
|
"""
|
389
334
|
The path to the file describing what EARs have aborted in this submission.
|
390
335
|
"""
|
391
336
|
return self.path / self.abort_EARs_file_name
|
392
337
|
|
338
|
+
@overload
|
339
|
+
def get_active_jobscripts(
|
340
|
+
self, as_json: Literal[False] = False
|
341
|
+
) -> Mapping[int, Mapping[int, JobscriptElementState]]:
|
342
|
+
...
|
343
|
+
|
344
|
+
@overload
|
345
|
+
def get_active_jobscripts(self, as_json: Literal[True]) -> dict[int, dict[int, str]]:
|
346
|
+
...
|
347
|
+
|
393
348
|
@TimeIt.decorator
|
394
349
|
def get_active_jobscripts(
|
395
350
|
self, as_json: bool = False
|
396
|
-
) ->
|
351
|
+
) -> Mapping[int, Mapping[int, JobscriptElementState]] | dict[int, dict[int, str]]:
|
397
352
|
"""Get jobscripts that are active on this machine, and their active states."""
|
398
353
|
# this returns: {JS_IDX: {JS_ELEMENT_IDX: STATE}}
|
399
354
|
# TODO: query the scheduler once for all jobscripts?
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
355
|
+
if as_json:
|
356
|
+
details = (
|
357
|
+
(js.index, js.get_active_states(as_json=True)) for js in self.jobscripts
|
358
|
+
)
|
359
|
+
return {idx: state for idx, state in details if state}
|
360
|
+
else:
|
361
|
+
dets2 = (
|
362
|
+
(js.index, js.get_active_states(as_json=False)) for js in self.jobscripts
|
363
|
+
)
|
364
|
+
return {idx: state for idx, state in dets2 if state}
|
406
365
|
|
407
|
-
def _write_abort_EARs_file(self):
|
366
|
+
def _write_abort_EARs_file(self) -> None:
|
408
367
|
with self.abort_EARs_file_path.open(mode="wt", newline="\n") as fp:
|
409
368
|
# write a single line for each EAR currently in the workflow:
|
410
369
|
fp.write("\n".join("0" for _ in range(self.workflow.num_EARs)) + "\n")
|
411
370
|
|
412
|
-
def _set_run_abort(self, run_ID: int):
|
371
|
+
def _set_run_abort(self, run_ID: int) -> None:
|
413
372
|
"""Modify the abort runs file to indicate a specified run should be aborted."""
|
414
373
|
with self.abort_EARs_file_path.open(mode="rt", newline="\n") as fp:
|
415
374
|
lines = fp.read().splitlines()
|
@@ -418,58 +377,82 @@ class Submission(JSONLike):
|
|
418
377
|
# write a new temporary run-abort file:
|
419
378
|
tmp_suffix = self.abort_EARs_file_path.suffix + ".tmp"
|
420
379
|
tmp = self.abort_EARs_file_path.with_suffix(tmp_suffix)
|
421
|
-
self.
|
380
|
+
self._app.submission_logger.debug(f"Creating temporary run abort file: {tmp!r}.")
|
422
381
|
with tmp.open(mode="wt", newline="\n") as fp:
|
423
|
-
fp.write("\n".join(
|
382
|
+
fp.write("\n".join(lines) + "\n")
|
424
383
|
|
425
384
|
# atomic rename, overwriting original:
|
426
|
-
self.
|
385
|
+
self._app.submission_logger.debug(
|
427
386
|
"Replacing original run abort file with new temporary file."
|
428
387
|
)
|
429
388
|
os.replace(src=tmp, dst=self.abort_EARs_file_path)
|
430
389
|
|
431
390
|
@staticmethod
|
432
391
|
def get_unique_schedulers_of_jobscripts(
|
433
|
-
jobscripts:
|
434
|
-
) ->
|
392
|
+
jobscripts: Iterable[Jobscript],
|
393
|
+
) -> Iterable[tuple[tuple[tuple[int, int], ...], Scheduler]]:
|
435
394
|
"""Get unique schedulers and which of the passed jobscripts they correspond to.
|
436
395
|
|
437
|
-
Uniqueness is determines only by the `
|
396
|
+
Uniqueness is determines only by the `QueuedScheduler.unique_properties` tuple.
|
438
397
|
|
439
398
|
Parameters
|
440
399
|
----------
|
441
400
|
jobscripts: list[~hpcflow.app.Jobscript]
|
401
|
+
|
402
|
+
Returns
|
403
|
+
-------
|
404
|
+
scheduler_mapping
|
405
|
+
Mapping where keys are a sequence of jobscript index descriptors and
|
406
|
+
the values are the scheduler to use for that jobscript.
|
407
|
+
A jobscript index descriptor is a pair of the submission index and the main
|
408
|
+
jobscript index.
|
442
409
|
"""
|
443
|
-
js_idx = []
|
444
|
-
schedulers = []
|
410
|
+
js_idx: list[list[tuple[int, int]]] = []
|
411
|
+
schedulers: list[Scheduler] = []
|
445
412
|
|
446
413
|
# list of tuples of scheduler properties we consider to determine "uniqueness",
|
447
414
|
# with the first string being the scheduler type (class name):
|
448
|
-
seen_schedulers =
|
415
|
+
seen_schedulers: dict[tuple, int] = {}
|
449
416
|
|
450
417
|
for js in jobscripts:
|
451
|
-
if
|
452
|
-
seen_schedulers.
|
418
|
+
if (
|
419
|
+
sched_idx := seen_schedulers.get(key := js.scheduler.unique_properties)
|
420
|
+
) is None:
|
421
|
+
seen_schedulers[key] = sched_idx = len(seen_schedulers) - 1
|
453
422
|
schedulers.append(js.scheduler)
|
454
423
|
js_idx.append([])
|
455
|
-
sched_idx = seen_schedulers.index(js.scheduler.unique_properties)
|
456
424
|
js_idx[sched_idx].append((js.submission.index, js.index))
|
457
425
|
|
458
|
-
|
426
|
+
return zip(map(tuple, js_idx), schedulers)
|
459
427
|
|
460
|
-
|
428
|
+
@property
|
429
|
+
@TimeIt.decorator
|
430
|
+
def _unique_schedulers(
|
431
|
+
self,
|
432
|
+
) -> Iterable[tuple[tuple[tuple[int, int], ...], Scheduler]]:
|
433
|
+
return self.get_unique_schedulers_of_jobscripts(self.jobscripts)
|
461
434
|
|
462
435
|
@TimeIt.decorator
|
463
|
-
def get_unique_schedulers(self) ->
|
436
|
+
def get_unique_schedulers(self) -> Mapping[tuple[tuple[int, int], ...], Scheduler]:
|
464
437
|
"""Get unique schedulers and which of this submission's jobscripts they
|
465
|
-
correspond to.
|
466
|
-
|
438
|
+
correspond to.
|
439
|
+
|
440
|
+
Returns
|
441
|
+
-------
|
442
|
+
scheduler_mapping
|
443
|
+
Mapping where keys are a sequence of jobscript index descriptors and
|
444
|
+
the values are the scheduler to use for that jobscript.
|
445
|
+
A jobscript index descriptor is a pair of the submission index and the main
|
446
|
+
jobscript index.
|
447
|
+
"""
|
448
|
+
# This is an absurd type; you never use the key as a key
|
449
|
+
return dict(self._unique_schedulers)
|
467
450
|
|
468
451
|
@TimeIt.decorator
|
469
|
-
def get_unique_shells(self) ->
|
452
|
+
def get_unique_shells(self) -> Iterable[tuple[tuple[int, ...], Shell]]:
|
470
453
|
"""Get unique shells and which jobscripts they correspond to."""
|
471
|
-
js_idx = []
|
472
|
-
shells = []
|
454
|
+
js_idx: list[list[int]] = []
|
455
|
+
shells: list[Shell] = []
|
473
456
|
|
474
457
|
for js in self.jobscripts:
|
475
458
|
if js.shell not in shells:
|
@@ -478,38 +461,9 @@ class Submission(JSONLike):
|
|
478
461
|
shell_idx = shells.index(js.shell)
|
479
462
|
js_idx[shell_idx].append(js.index)
|
480
463
|
|
481
|
-
|
482
|
-
|
483
|
-
return shell_js_idx
|
464
|
+
return zip(map(tuple, js_idx), shells)
|
484
465
|
|
485
|
-
def
|
486
|
-
msg = f"Some jobscripts in submission index {self.index} could not be submitted"
|
487
|
-
if submitted_js_idx:
|
488
|
-
msg += f" (but jobscripts {submitted_js_idx} were submitted successfully):"
|
489
|
-
else:
|
490
|
-
msg += ":"
|
491
|
-
|
492
|
-
msg += "\n"
|
493
|
-
for sub_err in exceptions:
|
494
|
-
msg += (
|
495
|
-
f"Jobscript {sub_err.js_idx} at path: {str(sub_err.js_path)!r}\n"
|
496
|
-
f"Submit command: {sub_err.submit_cmd!r}.\n"
|
497
|
-
f"Reason: {sub_err.message!r}\n"
|
498
|
-
)
|
499
|
-
if sub_err.subprocess_exc is not None:
|
500
|
-
msg += f"Subprocess exception: {sub_err.subprocess_exc}\n"
|
501
|
-
if sub_err.job_ID_parse_exc is not None:
|
502
|
-
msg += f"Subprocess job ID parse exception: {sub_err.job_ID_parse_exc}\n"
|
503
|
-
if sub_err.job_ID_parse_exc is not None:
|
504
|
-
msg += f"Job ID parse exception: {sub_err.job_ID_parse_exc}\n"
|
505
|
-
if sub_err.stdout:
|
506
|
-
msg += f"Submission stdout:\n{indent(sub_err.stdout, ' ')}\n"
|
507
|
-
if sub_err.stderr:
|
508
|
-
msg += f"Submission stderr:\n{indent(sub_err.stderr, ' ')}\n"
|
509
|
-
|
510
|
-
raise SubmissionFailure(message=msg)
|
511
|
-
|
512
|
-
def _append_submission_part(self, submit_time: str, submitted_js_idx: List[int]):
|
466
|
+
def _append_submission_part(self, submit_time: str, submitted_js_idx: list[int]):
|
513
467
|
self._submission_parts[submit_time] = submitted_js_idx
|
514
468
|
self.workflow._store.add_submission_part(
|
515
469
|
sub_idx=self.index,
|
@@ -520,11 +474,11 @@ class Submission(JSONLike):
|
|
520
474
|
@TimeIt.decorator
|
521
475
|
def submit(
|
522
476
|
self,
|
523
|
-
status,
|
524
|
-
ignore_errors:
|
525
|
-
print_stdout:
|
526
|
-
add_to_known:
|
527
|
-
) ->
|
477
|
+
status: Status | None,
|
478
|
+
ignore_errors: bool = False,
|
479
|
+
print_stdout: bool = False,
|
480
|
+
add_to_known: bool = True,
|
481
|
+
) -> list[int]:
|
528
482
|
"""Generate and submit the jobscripts of this submission."""
|
529
483
|
|
530
484
|
# if JS_parallelism explicitly requested but store doesn't support, raise:
|
@@ -550,34 +504,28 @@ class Submission(JSONLike):
|
|
550
504
|
|
551
505
|
# get scheduler, shell and OS version information (also an opportunity to fail
|
552
506
|
# before trying to submit jobscripts):
|
553
|
-
js_vers_info = {}
|
554
|
-
for js_indices, sched in self.
|
507
|
+
js_vers_info: dict[int, dict[str, str | list[str]]] = {}
|
508
|
+
for js_indices, sched in self._unique_schedulers:
|
555
509
|
try:
|
556
510
|
vers_info = sched.get_version_info()
|
557
|
-
except Exception
|
558
|
-
if ignore_errors:
|
559
|
-
|
560
|
-
|
561
|
-
raise err
|
511
|
+
except Exception:
|
512
|
+
if not ignore_errors:
|
513
|
+
raise
|
514
|
+
vers_info = {}
|
562
515
|
for _, js_idx in js_indices:
|
563
516
|
if js_idx in outstanding:
|
564
|
-
|
565
|
-
js_vers_info[js_idx] = {}
|
566
|
-
js_vers_info[js_idx].update(vers_info)
|
517
|
+
js_vers_info.setdefault(js_idx, {}).update(vers_info)
|
567
518
|
|
568
|
-
for
|
519
|
+
for js_indices_2, shell in self.get_unique_shells():
|
569
520
|
try:
|
570
521
|
vers_info = shell.get_version_info()
|
571
|
-
except Exception
|
572
|
-
if ignore_errors:
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
for js_idx in js_indices:
|
522
|
+
except Exception:
|
523
|
+
if not ignore_errors:
|
524
|
+
raise
|
525
|
+
vers_info = {}
|
526
|
+
for js_idx in js_indices_2:
|
577
527
|
if js_idx in outstanding:
|
578
|
-
|
579
|
-
js_vers_info[js_idx] = {}
|
580
|
-
js_vers_info[js_idx].update(vers_info)
|
528
|
+
js_vers_info.setdefault(js_idx, {}).update(vers_info)
|
581
529
|
|
582
530
|
for js_idx, vers_info_i in js_vers_info.items():
|
583
531
|
self.jobscripts[js_idx]._set_version_info(vers_info_i)
|
@@ -595,9 +543,9 @@ class Submission(JSONLike):
|
|
595
543
|
self._write_abort_EARs_file()
|
596
544
|
|
597
545
|
# map jobscript `index` to (scheduler job ID or process ID, is_array):
|
598
|
-
scheduler_refs = {}
|
599
|
-
submitted_js_idx = []
|
600
|
-
errs = []
|
546
|
+
scheduler_refs: dict[int, tuple[str, bool]] = {}
|
547
|
+
submitted_js_idx: list[int] = []
|
548
|
+
errs: list[JobscriptSubmissionFailure] = []
|
601
549
|
for js in self.jobscripts:
|
602
550
|
# check not previously submitted:
|
603
551
|
if js.index not in outstanding:
|
@@ -622,14 +570,14 @@ class Submission(JSONLike):
|
|
622
570
|
continue
|
623
571
|
|
624
572
|
if submitted_js_idx:
|
625
|
-
dt_str =
|
573
|
+
dt_str = current_timestamp().strftime(self._app._submission_ts_fmt)
|
626
574
|
self._append_submission_part(
|
627
575
|
submit_time=dt_str,
|
628
576
|
submitted_js_idx=submitted_js_idx,
|
629
577
|
)
|
630
578
|
# add a record of the submission part to the known-submissions file
|
631
579
|
if add_to_known:
|
632
|
-
self.
|
580
|
+
self._app._add_to_known_submissions(
|
633
581
|
wk_path=self.workflow.path,
|
634
582
|
wk_id=self.workflow.id_,
|
635
583
|
sub_idx=self.index,
|
@@ -639,7 +587,7 @@ class Submission(JSONLike):
|
|
639
587
|
if errs and not ignore_errors:
|
640
588
|
if status:
|
641
589
|
status.stop()
|
642
|
-
self.
|
590
|
+
raise SubmissionFailure(self.index, submitted_js_idx, errs)
|
643
591
|
|
644
592
|
len_js = len(submitted_js_idx)
|
645
593
|
print(f"Submitted {len_js} jobscript{'s' if len_js > 1 else ''}.")
|
@@ -647,24 +595,22 @@ class Submission(JSONLike):
|
|
647
595
|
return submitted_js_idx
|
648
596
|
|
649
597
|
@TimeIt.decorator
|
650
|
-
def cancel(self):
|
598
|
+
def cancel(self) -> None:
|
651
599
|
"""
|
652
600
|
Cancel the active jobs for this submission's jobscripts.
|
653
601
|
"""
|
654
|
-
act_js
|
655
|
-
if not act_js:
|
602
|
+
if not (act_js := self.get_active_jobscripts()):
|
656
603
|
print("No active jobscripts to cancel.")
|
657
604
|
return
|
658
|
-
for js_indices, sched in self.
|
605
|
+
for js_indices, sched in self._unique_schedulers:
|
659
606
|
# filter by active jobscripts:
|
660
|
-
js_idx
|
661
|
-
if js_idx:
|
607
|
+
if js_idx := [i[1] for i in js_indices if i[1] in act_js]:
|
662
608
|
print(
|
663
609
|
f"Cancelling jobscripts {js_idx!r} of submission {self.index} of "
|
664
610
|
f"workflow {self.workflow.name!r}."
|
665
611
|
)
|
666
612
|
jobscripts = [self.jobscripts[i] for i in js_idx]
|
667
|
-
sched_refs = [
|
613
|
+
sched_refs = [js.scheduler_js_ref for js in jobscripts]
|
668
614
|
sched.cancel_jobs(js_refs=sched_refs, jobscripts=jobscripts)
|
669
615
|
else:
|
670
616
|
print("No active jobscripts to cancel.")
|