hpcflow-new2 0.2.0a189__py3-none-any.whl → 0.2.0a199__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hpcflow/__pyinstaller/hook-hpcflow.py +9 -6
- hpcflow/_version.py +1 -1
- hpcflow/app.py +1 -0
- hpcflow/data/scripts/bad_script.py +2 -0
- hpcflow/data/scripts/do_nothing.py +2 -0
- hpcflow/data/scripts/env_specifier_test/input_file_generator_pass_env_spec.py +4 -0
- hpcflow/data/scripts/env_specifier_test/main_script_test_pass_env_spec.py +8 -0
- hpcflow/data/scripts/env_specifier_test/output_file_parser_pass_env_spec.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v1/input_file_generator_basic.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v1/main_script_test_direct_in_direct_out.py +7 -0
- hpcflow/data/scripts/env_specifier_test/v1/output_file_parser_basic.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v2/main_script_test_direct_in_direct_out.py +7 -0
- hpcflow/data/scripts/input_file_generator_basic.py +3 -0
- hpcflow/data/scripts/input_file_generator_basic_FAIL.py +3 -0
- hpcflow/data/scripts/input_file_generator_test_stdout_stderr.py +8 -0
- hpcflow/data/scripts/main_script_test_direct_in.py +3 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed_group.py +7 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_group_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_group_one_fail_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_hdf5_in_obj.py +1 -1
- hpcflow/data/scripts/main_script_test_hdf5_in_obj_2.py +12 -0
- hpcflow/data/scripts/main_script_test_hdf5_out_obj.py +1 -1
- hpcflow/data/scripts/main_script_test_json_out_FAIL.py +3 -0
- hpcflow/data/scripts/main_script_test_shell_env_vars.py +12 -0
- hpcflow/data/scripts/main_script_test_std_out_std_err.py +6 -0
- hpcflow/data/scripts/output_file_parser_basic.py +3 -0
- hpcflow/data/scripts/output_file_parser_basic_FAIL.py +7 -0
- hpcflow/data/scripts/output_file_parser_test_stdout_stderr.py +8 -0
- hpcflow/data/scripts/script_exit_test.py +5 -0
- hpcflow/data/template_components/environments.yaml +1 -1
- hpcflow/sdk/__init__.py +26 -15
- hpcflow/sdk/app.py +2192 -768
- hpcflow/sdk/cli.py +506 -296
- hpcflow/sdk/cli_common.py +105 -7
- hpcflow/sdk/config/__init__.py +1 -1
- hpcflow/sdk/config/callbacks.py +115 -43
- hpcflow/sdk/config/cli.py +126 -103
- hpcflow/sdk/config/config.py +674 -318
- hpcflow/sdk/config/config_file.py +131 -95
- hpcflow/sdk/config/errors.py +125 -84
- hpcflow/sdk/config/types.py +148 -0
- hpcflow/sdk/core/__init__.py +25 -1
- hpcflow/sdk/core/actions.py +1771 -1059
- hpcflow/sdk/core/app_aware.py +24 -0
- hpcflow/sdk/core/cache.py +139 -79
- hpcflow/sdk/core/command_files.py +263 -287
- hpcflow/sdk/core/commands.py +145 -112
- hpcflow/sdk/core/element.py +828 -535
- hpcflow/sdk/core/enums.py +192 -0
- hpcflow/sdk/core/environment.py +74 -93
- hpcflow/sdk/core/errors.py +455 -52
- hpcflow/sdk/core/execute.py +207 -0
- hpcflow/sdk/core/json_like.py +540 -272
- hpcflow/sdk/core/loop.py +751 -347
- hpcflow/sdk/core/loop_cache.py +164 -47
- hpcflow/sdk/core/object_list.py +370 -207
- hpcflow/sdk/core/parameters.py +1100 -627
- hpcflow/sdk/core/rule.py +59 -41
- hpcflow/sdk/core/run_dir_files.py +21 -37
- hpcflow/sdk/core/skip_reason.py +7 -0
- hpcflow/sdk/core/task.py +1649 -1339
- hpcflow/sdk/core/task_schema.py +308 -196
- hpcflow/sdk/core/test_utils.py +191 -114
- hpcflow/sdk/core/types.py +440 -0
- hpcflow/sdk/core/utils.py +485 -309
- hpcflow/sdk/core/validation.py +82 -9
- hpcflow/sdk/core/workflow.py +2544 -1178
- hpcflow/sdk/core/zarr_io.py +98 -137
- hpcflow/sdk/data/workflow_spec_schema.yaml +2 -0
- hpcflow/sdk/demo/cli.py +53 -33
- hpcflow/sdk/helper/cli.py +18 -15
- hpcflow/sdk/helper/helper.py +75 -63
- hpcflow/sdk/helper/watcher.py +61 -28
- hpcflow/sdk/log.py +122 -71
- hpcflow/sdk/persistence/__init__.py +8 -31
- hpcflow/sdk/persistence/base.py +1360 -606
- hpcflow/sdk/persistence/defaults.py +6 -0
- hpcflow/sdk/persistence/discovery.py +38 -0
- hpcflow/sdk/persistence/json.py +568 -188
- hpcflow/sdk/persistence/pending.py +382 -179
- hpcflow/sdk/persistence/store_resource.py +39 -23
- hpcflow/sdk/persistence/types.py +318 -0
- hpcflow/sdk/persistence/utils.py +14 -11
- hpcflow/sdk/persistence/zarr.py +1337 -433
- hpcflow/sdk/runtime.py +44 -41
- hpcflow/sdk/submission/{jobscript_info.py → enums.py} +39 -12
- hpcflow/sdk/submission/jobscript.py +1651 -692
- hpcflow/sdk/submission/schedulers/__init__.py +167 -39
- hpcflow/sdk/submission/schedulers/direct.py +121 -81
- hpcflow/sdk/submission/schedulers/sge.py +170 -129
- hpcflow/sdk/submission/schedulers/slurm.py +291 -268
- hpcflow/sdk/submission/schedulers/utils.py +12 -2
- hpcflow/sdk/submission/shells/__init__.py +14 -15
- hpcflow/sdk/submission/shells/base.py +150 -29
- hpcflow/sdk/submission/shells/bash.py +283 -173
- hpcflow/sdk/submission/shells/os_version.py +31 -30
- hpcflow/sdk/submission/shells/powershell.py +228 -170
- hpcflow/sdk/submission/submission.py +1014 -335
- hpcflow/sdk/submission/types.py +140 -0
- hpcflow/sdk/typing.py +182 -12
- hpcflow/sdk/utils/arrays.py +71 -0
- hpcflow/sdk/utils/deferred_file.py +55 -0
- hpcflow/sdk/utils/hashing.py +16 -0
- hpcflow/sdk/utils/patches.py +12 -0
- hpcflow/sdk/utils/strings.py +33 -0
- hpcflow/tests/api/test_api.py +32 -0
- hpcflow/tests/conftest.py +27 -6
- hpcflow/tests/data/multi_path_sequences.yaml +29 -0
- hpcflow/tests/data/workflow_test_run_abort.yaml +34 -35
- hpcflow/tests/schedulers/sge/test_sge_submission.py +36 -0
- hpcflow/tests/schedulers/slurm/test_slurm_submission.py +5 -2
- hpcflow/tests/scripts/test_input_file_generators.py +282 -0
- hpcflow/tests/scripts/test_main_scripts.py +866 -85
- hpcflow/tests/scripts/test_non_snippet_script.py +46 -0
- hpcflow/tests/scripts/test_ouput_file_parsers.py +353 -0
- hpcflow/tests/shells/wsl/test_wsl_submission.py +12 -4
- hpcflow/tests/unit/test_action.py +262 -75
- hpcflow/tests/unit/test_action_rule.py +9 -4
- hpcflow/tests/unit/test_app.py +33 -6
- hpcflow/tests/unit/test_cache.py +46 -0
- hpcflow/tests/unit/test_cli.py +134 -1
- hpcflow/tests/unit/test_command.py +71 -54
- hpcflow/tests/unit/test_config.py +142 -16
- hpcflow/tests/unit/test_config_file.py +21 -18
- hpcflow/tests/unit/test_element.py +58 -62
- hpcflow/tests/unit/test_element_iteration.py +50 -1
- hpcflow/tests/unit/test_element_set.py +29 -19
- hpcflow/tests/unit/test_group.py +4 -2
- hpcflow/tests/unit/test_input_source.py +116 -93
- hpcflow/tests/unit/test_input_value.py +29 -24
- hpcflow/tests/unit/test_jobscript_unit.py +757 -0
- hpcflow/tests/unit/test_json_like.py +44 -35
- hpcflow/tests/unit/test_loop.py +1396 -84
- hpcflow/tests/unit/test_meta_task.py +325 -0
- hpcflow/tests/unit/test_multi_path_sequences.py +229 -0
- hpcflow/tests/unit/test_object_list.py +17 -12
- hpcflow/tests/unit/test_parameter.py +29 -7
- hpcflow/tests/unit/test_persistence.py +237 -42
- hpcflow/tests/unit/test_resources.py +20 -18
- hpcflow/tests/unit/test_run.py +117 -6
- hpcflow/tests/unit/test_run_directories.py +29 -0
- hpcflow/tests/unit/test_runtime.py +2 -1
- hpcflow/tests/unit/test_schema_input.py +23 -15
- hpcflow/tests/unit/test_shell.py +23 -2
- hpcflow/tests/unit/test_slurm.py +8 -7
- hpcflow/tests/unit/test_submission.py +38 -89
- hpcflow/tests/unit/test_task.py +352 -247
- hpcflow/tests/unit/test_task_schema.py +33 -20
- hpcflow/tests/unit/test_utils.py +9 -11
- hpcflow/tests/unit/test_value_sequence.py +15 -12
- hpcflow/tests/unit/test_workflow.py +114 -83
- hpcflow/tests/unit/test_workflow_template.py +0 -1
- hpcflow/tests/unit/utils/test_arrays.py +40 -0
- hpcflow/tests/unit/utils/test_deferred_file_writer.py +34 -0
- hpcflow/tests/unit/utils/test_hashing.py +65 -0
- hpcflow/tests/unit/utils/test_patches.py +5 -0
- hpcflow/tests/unit/utils/test_redirect_std.py +50 -0
- hpcflow/tests/workflows/__init__.py +0 -0
- hpcflow/tests/workflows/test_directory_structure.py +31 -0
- hpcflow/tests/workflows/test_jobscript.py +334 -1
- hpcflow/tests/workflows/test_run_status.py +198 -0
- hpcflow/tests/workflows/test_skip_downstream.py +696 -0
- hpcflow/tests/workflows/test_submission.py +140 -0
- hpcflow/tests/workflows/test_workflows.py +160 -15
- hpcflow/tests/workflows/test_zip.py +18 -0
- hpcflow/viz_demo.ipynb +6587 -3
- {hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/METADATA +8 -4
- hpcflow_new2-0.2.0a199.dist-info/RECORD +221 -0
- hpcflow/sdk/core/parallel.py +0 -21
- hpcflow_new2-0.2.0a189.dist-info/RECORD +0 -158
- {hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/LICENSE +0 -0
- {hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/WHEEL +0 -0
- {hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/entry_points.txt +0 -0
@@ -4,16 +4,19 @@ A collection of submissions to a scheduler, generated from a workflow.
|
|
4
4
|
|
5
5
|
from __future__ import annotations
|
6
6
|
from collections import defaultdict
|
7
|
-
|
8
|
-
from datetime import datetime, timedelta, timezone
|
9
|
-
import enum
|
10
|
-
import os
|
7
|
+
import shutil
|
11
8
|
from pathlib import Path
|
9
|
+
import socket
|
12
10
|
from textwrap import indent
|
13
|
-
from typing import
|
11
|
+
from typing import Any, Literal, overload, TYPE_CHECKING
|
12
|
+
from typing_extensions import override
|
13
|
+
import warnings
|
14
|
+
|
14
15
|
|
15
|
-
from hpcflow.sdk import
|
16
|
-
|
16
|
+
from hpcflow.sdk.utils.strings import shorten_list_str
|
17
|
+
import numpy as np
|
18
|
+
|
19
|
+
from hpcflow.sdk.typing import hydrate
|
17
20
|
from hpcflow.sdk.core.errors import (
|
18
21
|
JobscriptSubmissionFailure,
|
19
22
|
MissingEnvironmentError,
|
@@ -21,47 +24,50 @@ from hpcflow.sdk.core.errors import (
|
|
21
24
|
MissingEnvironmentExecutableInstanceError,
|
22
25
|
MultipleEnvironmentsError,
|
23
26
|
SubmissionFailure,
|
27
|
+
OutputFileParserNoOutputError,
|
24
28
|
)
|
25
29
|
from hpcflow.sdk.core.json_like import ChildObjectSpec, JSONLike
|
26
30
|
from hpcflow.sdk.core.object_list import ObjectListMultipleMatchError
|
31
|
+
from hpcflow.sdk.core.utils import parse_timestamp, current_timestamp
|
32
|
+
from hpcflow.sdk.submission.enums import SubmissionStatus
|
33
|
+
from hpcflow.sdk.core import RUN_DIR_ARR_DTYPE
|
27
34
|
from hpcflow.sdk.log import TimeIt
|
35
|
+
from hpcflow.sdk.utils.strings import shorten_list_str
|
36
|
+
|
37
|
+
if TYPE_CHECKING:
|
38
|
+
from collections.abc import Iterable, Mapping, Sequence
|
39
|
+
from datetime import datetime
|
40
|
+
from typing import ClassVar, Literal
|
41
|
+
from rich.status import Status
|
42
|
+
from numpy.typing import NDArray
|
43
|
+
from .jobscript import Jobscript
|
44
|
+
from .enums import JobscriptElementState
|
45
|
+
from .schedulers import Scheduler
|
46
|
+
from .shells import Shell
|
47
|
+
from .types import SubmissionPart
|
48
|
+
from ..core.element import ElementActionRun
|
49
|
+
from ..core.environment import Environment
|
50
|
+
from ..core.object_list import EnvironmentsList
|
51
|
+
from ..core.workflow import Workflow
|
52
|
+
from ..core.cache import ObjectCache
|
53
|
+
|
54
|
+
|
55
|
+
# jobscript attributes that are set persistently just after the jobscript has been
|
56
|
+
# submitted to the scheduler:
|
57
|
+
JOBSCRIPT_SUBMIT_TIME_KEYS = (
|
58
|
+
"submit_cmdline",
|
59
|
+
"scheduler_job_ID",
|
60
|
+
"process_ID",
|
61
|
+
"submit_time",
|
62
|
+
)
|
63
|
+
# submission attributes that are set persistently just after all of a submission's
|
64
|
+
# jobscripts have been submitted:
|
65
|
+
SUBMISSION_SUBMIT_TIME_KEYS = {
|
66
|
+
"submission_parts": dict,
|
67
|
+
}
|
28
68
|
|
29
69
|
|
30
|
-
|
31
|
-
"""
|
32
|
-
Convert time delta to string in standard form.
|
33
|
-
"""
|
34
|
-
days, seconds = td.days, td.seconds
|
35
|
-
hours = seconds // (60 * 60)
|
36
|
-
seconds -= hours * (60 * 60)
|
37
|
-
minutes = seconds // 60
|
38
|
-
seconds -= minutes * 60
|
39
|
-
return f"{days}-{hours:02}:{minutes:02}:{seconds:02}"
|
40
|
-
|
41
|
-
|
42
|
-
def timedelta_parse(td_str: str) -> timedelta:
|
43
|
-
"""
|
44
|
-
Parse a string in standard form as a time delta.
|
45
|
-
"""
|
46
|
-
days, other = td_str.split("-")
|
47
|
-
days = int(days)
|
48
|
-
hours, mins, secs = [int(i) for i in other.split(":")]
|
49
|
-
return timedelta(days=days, hours=hours, minutes=mins, seconds=secs)
|
50
|
-
|
51
|
-
|
52
|
-
class SubmissionStatus(enum.Enum):
|
53
|
-
"""
|
54
|
-
The overall status of a submission.
|
55
|
-
"""
|
56
|
-
|
57
|
-
#: Not yet submitted.
|
58
|
-
PENDING = 0
|
59
|
-
#: All jobscripts submitted successfully.
|
60
|
-
SUBMITTED = 1
|
61
|
-
#: Some jobscripts submitted successfully.
|
62
|
-
PARTIALLY_SUBMITTED = 2
|
63
|
-
|
64
|
-
|
70
|
+
@hydrate
|
65
71
|
class Submission(JSONLike):
|
66
72
|
"""
|
67
73
|
A collection of jobscripts to be submitted to a scheduler.
|
@@ -82,7 +88,7 @@ class Submission(JSONLike):
|
|
82
88
|
The execution environments to use.
|
83
89
|
"""
|
84
90
|
|
85
|
-
_child_objects = (
|
91
|
+
_child_objects: ClassVar[tuple[ChildObjectSpec, ...]] = (
|
86
92
|
ChildObjectSpec(
|
87
93
|
name="jobscripts",
|
88
94
|
class_name="Jobscript",
|
@@ -95,22 +101,39 @@ class Submission(JSONLike):
|
|
95
101
|
),
|
96
102
|
)
|
97
103
|
|
104
|
+
TMP_DIR_NAME = "tmp"
|
105
|
+
LOG_DIR_NAME = "app_logs"
|
106
|
+
APP_STD_DIR_NAME = "app_std"
|
107
|
+
JS_DIR_NAME = "jobscripts"
|
108
|
+
JS_STD_DIR_NAME = "js_std"
|
109
|
+
JS_RUN_IDS_DIR_NAME = "js_run_ids"
|
110
|
+
JS_FUNCS_DIR_NAME = "js_funcs"
|
111
|
+
JS_WIN_PIDS_DIR_NAME = "js_pids"
|
112
|
+
JS_SCRIPT_INDICES_DIR_NAME = "js_script_indices"
|
113
|
+
SCRIPTS_DIR_NAME = "scripts"
|
114
|
+
COMMANDS_DIR_NAME = "commands"
|
115
|
+
WORKFLOW_APP_ALIAS = "wkflow_app"
|
116
|
+
|
98
117
|
def __init__(
|
99
118
|
self,
|
100
119
|
index: int,
|
101
|
-
jobscripts:
|
102
|
-
workflow:
|
103
|
-
|
104
|
-
JS_parallelism:
|
105
|
-
environments:
|
120
|
+
jobscripts: list[Jobscript],
|
121
|
+
workflow: Workflow | None = None,
|
122
|
+
at_submit_metadata: dict[str, Any] | None = None,
|
123
|
+
JS_parallelism: bool | Literal["direct", "scheduled"] | None = None,
|
124
|
+
environments: EnvironmentsList | None = None,
|
106
125
|
):
|
107
126
|
self._index = index
|
108
127
|
self._jobscripts = jobscripts
|
109
|
-
self.
|
128
|
+
self._at_submit_metadata = at_submit_metadata or {
|
129
|
+
k: v() for k, v in SUBMISSION_SUBMIT_TIME_KEYS.items()
|
130
|
+
}
|
110
131
|
self._JS_parallelism = JS_parallelism
|
111
|
-
self._environments = environments
|
132
|
+
self._environments = environments # assigned by _set_environments
|
112
133
|
|
113
|
-
self._submission_parts_lst
|
134
|
+
self._submission_parts_lst: list[
|
135
|
+
SubmissionPart
|
136
|
+
] | None = None # assigned on first access
|
114
137
|
|
115
138
|
if workflow:
|
116
139
|
#: The workflow this is part of.
|
@@ -118,41 +141,61 @@ class Submission(JSONLike):
|
|
118
141
|
|
119
142
|
self._set_parent_refs()
|
120
143
|
|
121
|
-
|
122
|
-
|
144
|
+
def _ensure_JS_parallelism_set(self):
|
145
|
+
"""Ensure that the JS_parallelism attribute is one of `True`, `False`, `'direct'`
|
146
|
+
or `'scheduled'`.
|
147
|
+
|
148
|
+
Notes
|
149
|
+
-----
|
150
|
+
This method is called after the Submission object is first created in
|
151
|
+
`Workflow._add_submission`.
|
152
|
+
|
153
|
+
"""
|
154
|
+
# if JS_parallelism explicitly requested but store doesn't support, raise:
|
155
|
+
supports_JS_para = self.workflow._store._features.jobscript_parallelism
|
156
|
+
if self.JS_parallelism:
|
157
|
+
# could be: True | "direct" | "scheduled"
|
158
|
+
if not supports_JS_para:
|
159
|
+
# if status:
|
160
|
+
# status.stop()
|
161
|
+
raise ValueError(
|
162
|
+
f"Store type {self.workflow._store!r} does not support jobscript "
|
163
|
+
f"parallelism."
|
164
|
+
)
|
165
|
+
elif self.JS_parallelism is None:
|
166
|
+
# by default only use JS parallelism for scheduled jobscripts:
|
167
|
+
self._JS_parallelism = "scheduled" if supports_JS_para else False
|
123
168
|
|
124
169
|
@TimeIt.decorator
|
125
|
-
def _set_environments(self):
|
126
|
-
filterable = ElementResources.get_env_instance_filterable_attributes()
|
170
|
+
def _set_environments(self) -> None:
|
171
|
+
filterable = self._app.ElementResources.get_env_instance_filterable_attributes()
|
127
172
|
|
128
173
|
# map required environments and executable labels to job script indices:
|
129
|
-
req_envs
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
174
|
+
req_envs: dict[
|
175
|
+
tuple[tuple[str, ...], tuple[Any, ...]], dict[str, set[int]]
|
176
|
+
] = defaultdict(lambda: defaultdict(set))
|
177
|
+
with self.workflow.cached_merged_parameters():
|
178
|
+
# using the cache (for `run.env_spec_hashable` -> `run.resources`) should
|
179
|
+
# significantly speed up this loop, unless a large resources sequence is used:
|
180
|
+
for js_idx, all_EARs_i in enumerate(self.all_EARs_by_jobscript):
|
181
|
+
for run in all_EARs_i:
|
182
|
+
env_spec_h = run.env_spec_hashable
|
183
|
+
for exec_label_j in run.action.get_required_executables():
|
184
|
+
req_envs[env_spec_h][exec_label_j].add(js_idx)
|
185
|
+
# add any environment for which an executable was not required:
|
186
|
+
if env_spec_h not in req_envs:
|
187
|
+
req_envs[env_spec_h]
|
137
188
|
|
138
189
|
# check these envs/execs exist in app data:
|
139
|
-
envs = []
|
190
|
+
envs: list[Environment] = []
|
140
191
|
for env_spec_h, exec_js in req_envs.items():
|
141
|
-
env_spec =
|
142
|
-
non_name_spec = {k: v for k, v in env_spec.items() if k != "name"}
|
143
|
-
spec_str = f" with specifiers {non_name_spec!r}" if non_name_spec else ""
|
144
|
-
env_ref = f"{env_spec['name']!r}{spec_str}"
|
192
|
+
env_spec = self._app.Action.env_spec_from_hashable(env_spec_h)
|
145
193
|
try:
|
146
|
-
env_i = self.
|
194
|
+
env_i = self._app.envs.get(**env_spec)
|
147
195
|
except ObjectListMultipleMatchError:
|
148
|
-
raise MultipleEnvironmentsError(
|
149
|
-
f"Multiple environments {env_ref} are defined on this machine."
|
150
|
-
)
|
196
|
+
raise MultipleEnvironmentsError(env_spec)
|
151
197
|
except ValueError:
|
152
|
-
raise MissingEnvironmentError(
|
153
|
-
f"The environment {env_ref} is not defined on this machine, so the "
|
154
|
-
f"submission cannot be created."
|
155
|
-
) from None
|
198
|
+
raise MissingEnvironmentError(env_spec) from None
|
156
199
|
else:
|
157
200
|
if env_i not in envs:
|
158
201
|
envs.append(env_i)
|
@@ -162,34 +205,28 @@ class Submission(JSONLike):
|
|
162
205
|
exec_i = env_i.executables.get(exec_i_lab)
|
163
206
|
except ValueError:
|
164
207
|
raise MissingEnvironmentExecutableError(
|
165
|
-
|
166
|
-
f"executable labelled {exec_i_lab!r}, which is required for this "
|
167
|
-
f"submission, so the submission cannot be created."
|
208
|
+
env_spec, exec_i_lab
|
168
209
|
) from None
|
169
210
|
|
170
211
|
# check matching executable instances exist:
|
171
212
|
for js_idx_j in js_idx_set:
|
172
|
-
|
173
|
-
filter_exec = {j: getattr(
|
174
|
-
|
175
|
-
if not exec_instances:
|
213
|
+
js_res = self.jobscripts[js_idx_j].resources
|
214
|
+
filter_exec = {j: getattr(js_res, j) for j in filterable}
|
215
|
+
if not exec_i.filter_instances(**filter_exec):
|
176
216
|
raise MissingEnvironmentExecutableInstanceError(
|
177
|
-
|
178
|
-
f"{exec_i_lab!r} of environment {env_ref} for jobscript "
|
179
|
-
f"index {js_idx_j!r} with requested resources "
|
180
|
-
f"{filter_exec!r}."
|
217
|
+
env_spec, exec_i_lab, js_idx_j, filter_exec
|
181
218
|
)
|
182
219
|
|
183
220
|
# save env definitions to the environments attribute:
|
184
|
-
self._environments = self.
|
221
|
+
self._environments = self._app.EnvironmentsList(envs)
|
185
222
|
|
186
|
-
|
187
|
-
|
223
|
+
@override
|
224
|
+
def _postprocess_to_dict(self, d: dict[str, Any]) -> dict[str, Any]:
|
225
|
+
dct = super()._postprocess_to_dict(d)
|
188
226
|
del dct["_workflow"]
|
189
227
|
del dct["_index"]
|
190
228
|
del dct["_submission_parts_lst"]
|
191
|
-
|
192
|
-
return dct
|
229
|
+
return {k.lstrip("_"): v for k, v in dct.items()}
|
193
230
|
|
194
231
|
@property
|
195
232
|
def index(self) -> int:
|
@@ -199,26 +236,29 @@ class Submission(JSONLike):
|
|
199
236
|
return self._index
|
200
237
|
|
201
238
|
@property
|
202
|
-
def environments(self) ->
|
239
|
+
def environments(self) -> EnvironmentsList:
|
203
240
|
"""
|
204
241
|
The execution environments to use.
|
205
242
|
"""
|
243
|
+
assert self._environments
|
206
244
|
return self._environments
|
207
245
|
|
208
246
|
@property
|
209
|
-
def
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
247
|
+
def at_submit_metadata(self) -> dict[str, dict[str, Any]]:
|
248
|
+
return self.workflow._store.get_submission_at_submit_metadata(
|
249
|
+
sub_idx=self.index, metadata_attr=self._at_submit_metadata
|
250
|
+
)
|
251
|
+
|
252
|
+
@property
|
253
|
+
def _submission_parts(self) -> dict[str, list[int]]:
|
254
|
+
return self.at_submit_metadata["submission_parts"] or {}
|
215
255
|
|
256
|
+
@property
|
257
|
+
def submission_parts(self) -> list[SubmissionPart]:
|
216
258
|
if self._submission_parts_lst is None:
|
217
259
|
self._submission_parts_lst = [
|
218
260
|
{
|
219
|
-
"submit_time":
|
220
|
-
.replace(tzinfo=timezone.utc)
|
221
|
-
.astimezone(),
|
261
|
+
"submit_time": parse_timestamp(dt, self.workflow.ts_fmt),
|
222
262
|
"jobscripts": js_idx,
|
223
263
|
}
|
224
264
|
for dt, js_idx in self._submission_parts.items()
|
@@ -226,116 +266,89 @@ class Submission(JSONLike):
|
|
226
266
|
return self._submission_parts_lst
|
227
267
|
|
228
268
|
@TimeIt.decorator
|
229
|
-
def get_start_time(self, submit_time: str) ->
|
269
|
+
def get_start_time(self, submit_time: str) -> datetime | None:
|
230
270
|
"""Get the start time of a given submission part."""
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
if start_time:
|
236
|
-
all_part_starts.append(start_time)
|
237
|
-
if all_part_starts:
|
238
|
-
return min(all_part_starts)
|
239
|
-
else:
|
240
|
-
return None
|
271
|
+
times = (
|
272
|
+
self.jobscripts[i].start_time for i in self._submission_parts[submit_time]
|
273
|
+
)
|
274
|
+
return min((t for t in times if t is not None), default=None)
|
241
275
|
|
242
276
|
@TimeIt.decorator
|
243
|
-
def get_end_time(self, submit_time: str) ->
|
277
|
+
def get_end_time(self, submit_time: str) -> datetime | None:
|
244
278
|
"""Get the end time of a given submission part."""
|
245
|
-
|
246
|
-
|
247
|
-
for i in js_idx:
|
248
|
-
end_time = self.jobscripts[i].end_time
|
249
|
-
if end_time:
|
250
|
-
all_part_ends.append(end_time)
|
251
|
-
if all_part_ends:
|
252
|
-
return max(all_part_ends)
|
253
|
-
else:
|
254
|
-
return None
|
279
|
+
times = (self.jobscripts[i].end_time for i in self._submission_parts[submit_time])
|
280
|
+
return max((t for t in times if t is not None), default=None)
|
255
281
|
|
256
282
|
@property
|
257
283
|
@TimeIt.decorator
|
258
|
-
def start_time(self):
|
284
|
+
def start_time(self) -> datetime | None:
|
259
285
|
"""Get the first non-None start time over all submission parts."""
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
all_start_times.append(start_i)
|
265
|
-
if all_start_times:
|
266
|
-
return max(all_start_times)
|
267
|
-
else:
|
268
|
-
return None
|
286
|
+
times = (
|
287
|
+
self.get_start_time(submit_time) for submit_time in self._submission_parts
|
288
|
+
)
|
289
|
+
return min((t for t in times if t is not None), default=None)
|
269
290
|
|
270
291
|
@property
|
271
292
|
@TimeIt.decorator
|
272
|
-
def end_time(self):
|
293
|
+
def end_time(self) -> datetime | None:
|
273
294
|
"""Get the final non-None end time over all submission parts."""
|
274
|
-
|
275
|
-
for
|
276
|
-
end_i = self.get_end_time(submit_time)
|
277
|
-
if end_i:
|
278
|
-
all_end_times.append(end_i)
|
279
|
-
if all_end_times:
|
280
|
-
return max(all_end_times)
|
281
|
-
else:
|
282
|
-
return None
|
295
|
+
times = (self.get_end_time(submit_time) for submit_time in self._submission_parts)
|
296
|
+
return max((t for t in times if t is not None), default=None)
|
283
297
|
|
284
298
|
@property
|
285
|
-
def jobscripts(self) ->
|
299
|
+
def jobscripts(self) -> list[Jobscript]:
|
286
300
|
"""
|
287
301
|
The jobscripts in this submission.
|
288
302
|
"""
|
289
303
|
return self._jobscripts
|
290
304
|
|
291
305
|
@property
|
292
|
-
def JS_parallelism(self):
|
306
|
+
def JS_parallelism(self) -> bool | Literal["direct", "scheduled"] | None:
|
293
307
|
"""
|
294
308
|
Whether to exploit jobscript parallelism.
|
295
309
|
"""
|
296
310
|
return self._JS_parallelism
|
297
311
|
|
298
312
|
@property
|
299
|
-
def workflow(self) ->
|
313
|
+
def workflow(self) -> Workflow:
|
300
314
|
"""
|
301
315
|
The workflow this is part of.
|
302
316
|
"""
|
303
317
|
return self._workflow
|
304
318
|
|
305
319
|
@workflow.setter
|
306
|
-
def workflow(self, wk):
|
320
|
+
def workflow(self, wk: Workflow):
|
307
321
|
self._workflow = wk
|
308
322
|
|
309
323
|
@property
|
310
|
-
def jobscript_indices(self) ->
|
324
|
+
def jobscript_indices(self) -> tuple[int, ...]:
|
311
325
|
"""All associated jobscript indices."""
|
312
|
-
return tuple(
|
326
|
+
return tuple(js.index for js in self.jobscripts)
|
313
327
|
|
314
328
|
@property
|
315
|
-
def submitted_jobscripts(self) ->
|
329
|
+
def submitted_jobscripts(self) -> tuple[int, ...]:
|
316
330
|
"""Jobscript indices that have been successfully submitted."""
|
317
|
-
return tuple(j for
|
331
|
+
return tuple(j for sp in self.submission_parts for j in sp["jobscripts"])
|
318
332
|
|
319
333
|
@property
|
320
|
-
def outstanding_jobscripts(self) ->
|
334
|
+
def outstanding_jobscripts(self) -> tuple[int, ...]:
|
321
335
|
"""Jobscript indices that have not yet been successfully submitted."""
|
322
|
-
return tuple(set(self.jobscript_indices)
|
336
|
+
return tuple(set(self.jobscript_indices).difference(self.submitted_jobscripts))
|
323
337
|
|
324
338
|
@property
|
325
|
-
def status(self):
|
339
|
+
def status(self) -> SubmissionStatus:
|
326
340
|
"""
|
327
341
|
The status of this submission.
|
328
342
|
"""
|
329
343
|
if not self.submission_parts:
|
330
344
|
return SubmissionStatus.PENDING
|
345
|
+
elif set(self.submitted_jobscripts) == set(self.jobscript_indices):
|
346
|
+
return SubmissionStatus.SUBMITTED
|
331
347
|
else:
|
332
|
-
|
333
|
-
return SubmissionStatus.SUBMITTED
|
334
|
-
else:
|
335
|
-
return SubmissionStatus.PARTIALLY_SUBMITTED
|
348
|
+
return SubmissionStatus.PARTIALLY_SUBMITTED
|
336
349
|
|
337
350
|
@property
|
338
|
-
def needs_submit(self):
|
351
|
+
def needs_submit(self) -> bool:
|
339
352
|
"""
|
340
353
|
Whether this submission needs a submit to be done.
|
341
354
|
"""
|
@@ -345,131 +358,695 @@ class Submission(JSONLike):
|
|
345
358
|
)
|
346
359
|
|
347
360
|
@property
|
348
|
-
def
|
361
|
+
def needs_app_log_dir(self) -> bool:
|
349
362
|
"""
|
350
|
-
|
363
|
+
Whether this submision requires an app log directory.
|
351
364
|
"""
|
352
|
-
|
365
|
+
for js in self.jobscripts:
|
366
|
+
if js.resources.write_app_logs:
|
367
|
+
return True
|
368
|
+
return False
|
353
369
|
|
354
370
|
@property
|
355
|
-
def
|
371
|
+
def needs_win_pids_dir(self) -> bool:
|
356
372
|
"""
|
357
|
-
|
373
|
+
Whether this submision requires a directory for process ID files (Windows only).
|
374
|
+
"""
|
375
|
+
for js in self.jobscripts:
|
376
|
+
if js.os_name == "nt":
|
377
|
+
return True
|
378
|
+
return False
|
379
|
+
|
380
|
+
@property
|
381
|
+
def needs_script_indices_dir(self) -> bool:
|
382
|
+
"""
|
383
|
+
Whether this submision requires a directory for combined-script script ID files.
|
384
|
+
"""
|
385
|
+
for js in self.jobscripts:
|
386
|
+
if js.resources.combine_scripts:
|
387
|
+
return True
|
388
|
+
return False
|
389
|
+
|
390
|
+
@classmethod
|
391
|
+
def get_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
392
|
+
"""
|
393
|
+
The directory path to files associated with the specified submission.
|
394
|
+
"""
|
395
|
+
return submissions_path / str(sub_idx)
|
396
|
+
|
397
|
+
@classmethod
|
398
|
+
def get_tmp_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
399
|
+
"""
|
400
|
+
The path to the temporary files directory, for the specified submission.
|
401
|
+
"""
|
402
|
+
return cls.get_path(submissions_path, sub_idx) / cls.TMP_DIR_NAME
|
403
|
+
|
404
|
+
@classmethod
|
405
|
+
def get_app_log_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
406
|
+
"""
|
407
|
+
The path to the app log directory for this submission, for the specified
|
408
|
+
submission.
|
409
|
+
"""
|
410
|
+
return cls.get_path(submissions_path, sub_idx) / cls.LOG_DIR_NAME
|
411
|
+
|
412
|
+
@staticmethod
|
413
|
+
def get_app_log_file_name(run_ID: int | str) -> str:
|
414
|
+
"""
|
415
|
+
The app log file name.
|
416
|
+
"""
|
417
|
+
# TODO: consider combine_app_logs argument
|
418
|
+
return f"r_{run_ID}.log"
|
419
|
+
|
420
|
+
@classmethod
|
421
|
+
def get_app_log_file_path(cls, submissions_path: Path, sub_idx: int, run_ID: int):
|
422
|
+
"""
|
423
|
+
The file path to the app log, for the specified submission.
|
424
|
+
"""
|
425
|
+
return (
|
426
|
+
cls.get_path(submissions_path, sub_idx)
|
427
|
+
/ cls.LOG_DIR_NAME
|
428
|
+
/ cls.get_app_log_file_name(run_ID)
|
429
|
+
)
|
430
|
+
|
431
|
+
@classmethod
|
432
|
+
def get_app_std_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
433
|
+
"""
|
434
|
+
The path to the app standard output and error stream files directory, for the
|
435
|
+
specified submission.
|
436
|
+
"""
|
437
|
+
return cls.get_path(submissions_path, sub_idx) / cls.APP_STD_DIR_NAME
|
438
|
+
|
439
|
+
@classmethod
|
440
|
+
def get_js_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
441
|
+
"""
|
442
|
+
The path to the jobscript files directory, for the specified submission.
|
443
|
+
"""
|
444
|
+
return cls.get_path(submissions_path, sub_idx) / cls.JS_DIR_NAME
|
445
|
+
|
446
|
+
@classmethod
|
447
|
+
def get_js_std_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
448
|
+
"""
|
449
|
+
The path to the jobscript standard output and error files directory, for the
|
450
|
+
specified submission.
|
451
|
+
"""
|
452
|
+
return cls.get_path(submissions_path, sub_idx) / cls.JS_STD_DIR_NAME
|
453
|
+
|
454
|
+
@classmethod
|
455
|
+
def get_js_run_ids_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
456
|
+
"""
|
457
|
+
The path to the directory containing jobscript run IDs, for the specified
|
458
|
+
submission.
|
459
|
+
"""
|
460
|
+
return cls.get_path(submissions_path, sub_idx) / cls.JS_RUN_IDS_DIR_NAME
|
461
|
+
|
462
|
+
@classmethod
|
463
|
+
def get_js_funcs_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
464
|
+
"""
|
465
|
+
The path to the directory containing the shell functions that are invoked within
|
466
|
+
jobscripts and commmand files, for the specified submission.
|
467
|
+
"""
|
468
|
+
return cls.get_path(submissions_path, sub_idx) / cls.JS_FUNCS_DIR_NAME
|
469
|
+
|
470
|
+
@classmethod
|
471
|
+
def get_js_win_pids_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
472
|
+
"""
|
473
|
+
The path to the directory containing process ID files (Windows only), for the
|
474
|
+
specified submission.
|
475
|
+
"""
|
476
|
+
return cls.get_path(submissions_path, sub_idx) / cls.JS_WIN_PIDS_DIR_NAME
|
477
|
+
|
478
|
+
@classmethod
|
479
|
+
def get_js_script_indices_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
480
|
+
"""
|
481
|
+
The path to the directory containing script indices for combined-script jobscripts
|
482
|
+
only, for the specified submission.
|
483
|
+
"""
|
484
|
+
return cls.get_path(submissions_path, sub_idx) / cls.JS_SCRIPT_INDICES_DIR_NAME
|
485
|
+
|
486
|
+
@classmethod
|
487
|
+
def get_scripts_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
488
|
+
"""
|
489
|
+
The path to the directory containing action scripts, for the specified submission.
|
490
|
+
"""
|
491
|
+
return cls.get_path(submissions_path, sub_idx) / cls.SCRIPTS_DIR_NAME
|
492
|
+
|
493
|
+
@classmethod
|
494
|
+
def get_commands_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
495
|
+
"""
|
496
|
+
The path to the directory containing command files, for the specified submission.
|
497
|
+
"""
|
498
|
+
return cls.get_path(submissions_path, sub_idx) / cls.COMMANDS_DIR_NAME
|
499
|
+
|
500
|
+
@property
|
501
|
+
def path(self) -> Path:
|
502
|
+
"""
|
503
|
+
The path to the directory containing action scripts.
|
504
|
+
"""
|
505
|
+
return self.get_path(self.workflow.submissions_path, self.index)
|
506
|
+
|
507
|
+
@property
|
508
|
+
def tmp_path(self) -> Path:
|
509
|
+
"""
|
510
|
+
The path to the temporary files directory for this submission.
|
511
|
+
"""
|
512
|
+
return self.get_tmp_path(self.workflow.submissions_path, self.index)
|
513
|
+
|
514
|
+
@property
|
515
|
+
def app_log_path(self) -> Path:
|
516
|
+
"""
|
517
|
+
The path to the app log directory for this submission for this submission.
|
518
|
+
"""
|
519
|
+
return self.get_app_log_path(self.workflow.submissions_path, self.index)
|
520
|
+
|
521
|
+
@property
|
522
|
+
def app_std_path(self) -> Path:
|
523
|
+
"""
|
524
|
+
The path to the app standard output and error stream files directory, for the
|
525
|
+
this submission.
|
526
|
+
"""
|
527
|
+
return self.get_app_std_path(self.workflow.submissions_path, self.index)
|
528
|
+
|
529
|
+
@property
|
530
|
+
def js_path(self) -> Path:
|
531
|
+
"""
|
532
|
+
The path to the jobscript files directory, for this submission.
|
533
|
+
"""
|
534
|
+
return self.get_js_path(self.workflow.submissions_path, self.index)
|
535
|
+
|
536
|
+
@property
|
537
|
+
def js_std_path(self) -> Path:
|
538
|
+
"""
|
539
|
+
The path to the jobscript standard output and error files directory, for this
|
540
|
+
submission.
|
541
|
+
"""
|
542
|
+
return self.get_js_std_path(self.workflow.submissions_path, self.index)
|
543
|
+
|
544
|
+
@property
|
545
|
+
def js_run_ids_path(self) -> Path:
|
546
|
+
"""
|
547
|
+
The path to the directory containing jobscript run IDs, for this submission.
|
548
|
+
"""
|
549
|
+
return self.get_js_run_ids_path(self.workflow.submissions_path, self.index)
|
550
|
+
|
551
|
+
@property
|
552
|
+
def js_funcs_path(self) -> Path:
|
358
553
|
"""
|
359
|
-
|
554
|
+
The path to the directory containing the shell functions that are invoked within
|
555
|
+
jobscripts and commmand files, for this submission.
|
556
|
+
"""
|
557
|
+
return self.get_js_funcs_path(self.workflow.submissions_path, self.index)
|
558
|
+
|
559
|
+
@property
|
560
|
+
def js_win_pids_path(self) -> Path:
|
561
|
+
"""
|
562
|
+
The path to the directory containing process ID files (Windows only), for this
|
563
|
+
submission.
|
564
|
+
"""
|
565
|
+
return self.get_js_win_pids_path(self.workflow.submissions_path, self.index)
|
566
|
+
|
567
|
+
@property
|
568
|
+
def js_script_indices_path(self) -> Path:
|
569
|
+
"""
|
570
|
+
The path to the directory containing script indices for combined-script jobscripts
|
571
|
+
only, for this submission.
|
572
|
+
"""
|
573
|
+
return self.get_js_script_indices_path(self.workflow.submissions_path, self.index)
|
574
|
+
|
575
|
+
@property
|
576
|
+
def scripts_path(self) -> Path:
|
577
|
+
"""
|
578
|
+
The path to the directory containing action scripts, for this submission.
|
579
|
+
"""
|
580
|
+
return self.get_scripts_path(self.workflow.submissions_path, self.index)
|
360
581
|
|
361
582
|
@property
|
362
|
-
def
|
583
|
+
def commands_path(self) -> Path:
|
363
584
|
"""
|
364
|
-
|
585
|
+
The path to the directory containing command files, for this submission.
|
365
586
|
"""
|
366
|
-
return
|
587
|
+
return self.get_commands_path(self.workflow.submissions_path, self.index)
|
367
588
|
|
368
589
|
@property
|
369
590
|
@TimeIt.decorator
|
370
|
-
def
|
591
|
+
def all_EAR_IDs(self) -> Iterable[int]:
|
371
592
|
"""
|
372
|
-
|
593
|
+
The IDs of all EARs in this submission.
|
373
594
|
"""
|
374
|
-
|
375
|
-
for i in self.all_EARs:
|
376
|
-
task_elem_EARs[i.task.index][i.element.index].append(i)
|
377
|
-
return task_elem_EARs
|
595
|
+
return (i for js in self.jobscripts for i in js.all_EAR_IDs)
|
378
596
|
|
379
597
|
@property
|
380
|
-
|
598
|
+
@TimeIt.decorator
|
599
|
+
def all_EARs(self) -> Iterable[ElementActionRun]:
|
381
600
|
"""
|
382
|
-
|
601
|
+
All EARs in this submission.
|
383
602
|
"""
|
384
|
-
return
|
603
|
+
return (ear for js in self.jobscripts for ear in js.all_EARs)
|
604
|
+
|
605
|
+
@property
|
606
|
+
@TimeIt.decorator
|
607
|
+
def all_EARs_IDs_by_jobscript(self) -> list[np.ndarray]:
|
608
|
+
return [i.all_EAR_IDs for i in self.jobscripts]
|
385
609
|
|
386
610
|
@property
|
387
|
-
|
611
|
+
@TimeIt.decorator
|
612
|
+
def all_EARs_by_jobscript(self) -> list[list[ElementActionRun]]:
|
613
|
+
ids = [i.all_EAR_IDs for i in self.jobscripts]
|
614
|
+
all_EARs = {i.id_: i for i in self.workflow.get_EARs_from_IDs(self.all_EAR_IDs)}
|
615
|
+
return [[all_EARs[i] for i in js_ids] for js_ids in ids]
|
616
|
+
|
617
|
+
@property
|
618
|
+
@TimeIt.decorator
|
619
|
+
def EARs_by_elements(self) -> Mapping[int, Mapping[int, Sequence[ElementActionRun]]]:
|
388
620
|
"""
|
389
|
-
|
621
|
+
All EARs in this submission, grouped by element.
|
390
622
|
"""
|
391
|
-
|
623
|
+
task_elem_EARs: dict[int, dict[int, list[ElementActionRun]]] = defaultdict(
|
624
|
+
lambda: defaultdict(list)
|
625
|
+
)
|
626
|
+
for ear in self.all_EARs:
|
627
|
+
task_elem_EARs[ear.task.index][ear.element.index].append(ear)
|
628
|
+
return task_elem_EARs
|
629
|
+
|
630
|
+
@property
|
631
|
+
def is_scheduled(self) -> tuple[bool, ...]:
|
632
|
+
"""Return whether each jobscript of this submission uses a scheduler or not."""
|
633
|
+
return tuple(i.is_scheduled for i in self.jobscripts)
|
634
|
+
|
635
|
+
@overload
|
636
|
+
def get_active_jobscripts(
|
637
|
+
self, as_json: Literal[False] = False
|
638
|
+
) -> Mapping[int, Mapping[int, Mapping[int, JobscriptElementState]]]:
|
639
|
+
...
|
640
|
+
|
641
|
+
@overload
|
642
|
+
def get_active_jobscripts(
|
643
|
+
self, as_json: Literal[True]
|
644
|
+
) -> Mapping[int, Mapping[int, Mapping[int, str]]]:
|
645
|
+
...
|
392
646
|
|
393
647
|
@TimeIt.decorator
|
394
648
|
def get_active_jobscripts(
|
395
|
-
self,
|
396
|
-
|
649
|
+
self,
|
650
|
+
as_json: Literal[True] | Literal[False] = False, # TODO: why can't we use bool?
|
651
|
+
) -> Mapping[int, Mapping[int, Mapping[int, JobscriptElementState | str]]]:
|
397
652
|
"""Get jobscripts that are active on this machine, and their active states."""
|
398
|
-
# this returns: {JS_IDX: {JS_ELEMENT_IDX: STATE}}
|
653
|
+
# this returns: {JS_IDX: {BLOCK_IDX: {JS_ELEMENT_IDX: STATE}}}
|
399
654
|
# TODO: query the scheduler once for all jobscripts?
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
if
|
404
|
-
|
405
|
-
return out
|
655
|
+
return {
|
656
|
+
js.index: act_states
|
657
|
+
for js in self.jobscripts
|
658
|
+
if (act_states := js.get_active_states(as_json=as_json))
|
659
|
+
}
|
406
660
|
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
self.app.submission_logger.debug(f"Creating temporary run abort file: {tmp!r}.")
|
422
|
-
with tmp.open(mode="wt", newline="\n") as fp:
|
423
|
-
fp.write("\n".join(i for i in lines) + "\n")
|
424
|
-
|
425
|
-
# atomic rename, overwriting original:
|
426
|
-
self.app.submission_logger.debug(
|
427
|
-
"Replacing original run abort file with new temporary file."
|
661
|
+
@TimeIt.decorator
|
662
|
+
def _write_scripts(
|
663
|
+
self, cache: ObjectCache, status: Status | None = None
|
664
|
+
) -> tuple[dict[int, int | None], NDArray, dict[int, list[Path]]]:
|
665
|
+
"""Write to disk all action scripts associated with this submission."""
|
666
|
+
# TODO: rename this method
|
667
|
+
|
668
|
+
# TODO: need to check is_snippet_script is exclusive? i.e. only `script` and no
|
669
|
+
# `commands` in the action?
|
670
|
+
# TODO: scripts must have the same exe and the same environment as well?
|
671
|
+
# TODO: env_spec should be included in jobscript hash if combine_scripts=True ?
|
672
|
+
|
673
|
+
actions_by_schema: dict[str, dict[int, set]] = defaultdict(
|
674
|
+
lambda: defaultdict(set)
|
428
675
|
)
|
429
|
-
|
676
|
+
combined_env_specs = {}
|
677
|
+
|
678
|
+
# task insert IDs and action indices for each combined_scripts jobscript:
|
679
|
+
combined_actions = {}
|
680
|
+
|
681
|
+
cmd_hashes = defaultdict(set)
|
682
|
+
num_runs_tot = sum(len(js.all_EAR_IDs) for js in self.jobscripts)
|
683
|
+
run_indices = np.ones((num_runs_tot, 9), dtype=int) * -1
|
684
|
+
run_inp_files = defaultdict(
|
685
|
+
list
|
686
|
+
) # keys are `run_idx`, values are Paths to copy to run dir
|
687
|
+
run_cmd_file_names: dict[int, int | None] = {} # None if no commands to write
|
688
|
+
run_idx = 0
|
689
|
+
|
690
|
+
if status:
|
691
|
+
status.update(f"Adding new submission: processing run 1/{num_runs_tot}.")
|
692
|
+
|
693
|
+
all_runs = cache.runs
|
694
|
+
assert all_runs is not None
|
695
|
+
runs_ids_by_js = self.all_EARs_IDs_by_jobscript
|
696
|
+
|
697
|
+
with self.workflow.cached_merged_parameters():
|
698
|
+
for js in self.jobscripts:
|
699
|
+
js_idx = js.index
|
700
|
+
js_run_0 = all_runs[runs_ids_by_js[js.index][0]]
|
701
|
+
|
702
|
+
if js.resources.combine_scripts:
|
703
|
+
# this will be one or more snippet scripts that needs to be combined into
|
704
|
+
# one script for the whole jobscript
|
705
|
+
|
706
|
+
# need to write one script + one commands file for the whole jobscript
|
707
|
+
|
708
|
+
# env_spec will be the same for all runs of this jobscript:
|
709
|
+
combined_env_specs[js_idx] = js_run_0.env_spec
|
710
|
+
combined_actions[js_idx] = [
|
711
|
+
[j[0:2] for j in i.task_actions] for i in js.blocks
|
712
|
+
]
|
713
|
+
|
714
|
+
for idx, run_id in enumerate(js.all_EAR_IDs):
|
715
|
+
run = all_runs[run_id]
|
716
|
+
|
717
|
+
run_indices[run_idx] = [
|
718
|
+
run.task.insert_ID,
|
719
|
+
run.element.id_,
|
720
|
+
run.element_iteration.id_,
|
721
|
+
run.id_,
|
722
|
+
run.element.index,
|
723
|
+
run.element_iteration.index,
|
724
|
+
run.element_action.action_idx,
|
725
|
+
run.index,
|
726
|
+
int(run.action.requires_dir),
|
727
|
+
]
|
728
|
+
run_idx += 1
|
729
|
+
|
730
|
+
if status and run_idx % 10 == 0:
|
731
|
+
status.update(
|
732
|
+
f"Adding new submission: processing run {run_idx}/{num_runs_tot}."
|
733
|
+
)
|
734
|
+
|
735
|
+
if js.resources.combine_scripts:
|
736
|
+
if idx == 0:
|
737
|
+
# the commands file for a combined jobscript won't have
|
738
|
+
# any parameter data in the command line, so should raise
|
739
|
+
# if something is found to be unset:
|
740
|
+
run.try_write_commands(
|
741
|
+
environments=self.environments,
|
742
|
+
jobscript=js,
|
743
|
+
raise_on_unset=True,
|
744
|
+
)
|
745
|
+
run_cmd_file_names[run.id_] = None
|
746
|
+
|
747
|
+
else:
|
748
|
+
if run.is_snippet_script:
|
749
|
+
actions_by_schema[run.action.task_schema.name][
|
750
|
+
run.element_action.action_idx
|
751
|
+
].add(run.env_spec_hashable)
|
752
|
+
|
753
|
+
if run.action.commands:
|
754
|
+
hash_i = run.get_commands_file_hash()
|
755
|
+
# TODO: could further reduce number of files in the case the data
|
756
|
+
# indices hash is the same: if commands objects are the same and
|
757
|
+
# environment objects are the same, then the files will be the
|
758
|
+
# same, even if runs come from different task schemas/actions...
|
759
|
+
if hash_i not in cmd_hashes:
|
760
|
+
try:
|
761
|
+
run.try_write_commands(
|
762
|
+
environments=self.environments,
|
763
|
+
jobscript=js,
|
764
|
+
)
|
765
|
+
except OutputFileParserNoOutputError:
|
766
|
+
# no commands to write, might be used just for saving
|
767
|
+
# files
|
768
|
+
run_cmd_file_names[run.id_] = None
|
769
|
+
cmd_hashes[hash_i].add(run.id_)
|
770
|
+
else:
|
771
|
+
run_cmd_file_names[run.id_] = None
|
772
|
+
|
773
|
+
if run.action.requires_dir:
|
774
|
+
# TODO: what is type of `path`?
|
775
|
+
for name, path in run.get("input_files", {}).items():
|
776
|
+
if path:
|
777
|
+
run_inp_files[run_idx].append(path)
|
778
|
+
|
779
|
+
for run_ids in cmd_hashes.values():
|
780
|
+
run_ids_srt = sorted(run_ids)
|
781
|
+
root_id = run_ids_srt[0] # used for command file name for this group
|
782
|
+
# TODO: could store multiple IDs to reduce number of files created
|
783
|
+
for run_id_i in run_ids_srt:
|
784
|
+
if run_id_i not in run_cmd_file_names:
|
785
|
+
run_cmd_file_names[run_id_i] = root_id
|
786
|
+
|
787
|
+
if status:
|
788
|
+
status.update("Adding new submission: writing scripts...")
|
789
|
+
|
790
|
+
seen: dict[int, Path] = {}
|
791
|
+
combined_script_data: dict[
|
792
|
+
int, dict[int, list[tuple[str, Path, bool]]]
|
793
|
+
] = defaultdict(lambda: defaultdict(list))
|
794
|
+
for task in self.workflow.tasks:
|
795
|
+
for schema in task.template.schemas:
|
796
|
+
if schema.name in actions_by_schema:
|
797
|
+
for idx, action in enumerate(schema.actions):
|
798
|
+
|
799
|
+
if not action.script:
|
800
|
+
continue
|
801
|
+
|
802
|
+
for env_spec_h in actions_by_schema[schema.name][idx]:
|
803
|
+
|
804
|
+
env_spec = action.env_spec_from_hashable(env_spec_h)
|
805
|
+
name, snip_path, specs = action.get_script_artifact_name(
|
806
|
+
env_spec=env_spec,
|
807
|
+
act_idx=idx,
|
808
|
+
ret_specifiers=True,
|
809
|
+
)
|
810
|
+
script_hash = action.get_script_determinant_hash(specs)
|
811
|
+
script_path = self.scripts_path / name
|
812
|
+
prev_path = seen.get(script_hash)
|
813
|
+
if script_path == prev_path:
|
814
|
+
continue
|
815
|
+
|
816
|
+
elif prev_path:
|
817
|
+
# try to make a symbolic link to the file previously
|
818
|
+
# created:
|
819
|
+
try:
|
820
|
+
script_path.symlink_to(prev_path.name)
|
821
|
+
except OSError:
|
822
|
+
# windows requires admin permission, copy instead:
|
823
|
+
shutil.copy(prev_path, script_path)
|
824
|
+
else:
|
825
|
+
# write script to disk:
|
826
|
+
source_str = action.compose_source(snip_path)
|
827
|
+
if source_str:
|
828
|
+
with script_path.open("wt", newline="\n") as fp:
|
829
|
+
fp.write(source_str)
|
830
|
+
seen[script_hash] = script_path
|
831
|
+
|
832
|
+
# combined script stuff
|
833
|
+
for js_idx, act_IDs in combined_actions.items():
|
834
|
+
for block_idx, act_IDs_i in enumerate(act_IDs):
|
835
|
+
for task_iID, act_idx in act_IDs_i:
|
836
|
+
task = self.workflow.tasks.get(insert_ID=task_iID)
|
837
|
+
schema = task.template.schemas[0] # TODO: multiple schemas
|
838
|
+
action = schema.actions[act_idx]
|
839
|
+
func_name, snip_path = action.get_script_artifact_name(
|
840
|
+
env_spec=combined_env_specs[js_idx],
|
841
|
+
act_idx=act_idx,
|
842
|
+
ret_specifiers=False,
|
843
|
+
include_suffix=False,
|
844
|
+
specs_suffix_delim="_", # can't use "." in function name
|
845
|
+
)
|
846
|
+
combined_script_data[js_idx][block_idx].append(
|
847
|
+
(func_name, snip_path, action.requires_dir)
|
848
|
+
)
|
849
|
+
|
850
|
+
for js_idx, action_scripts in combined_script_data.items():
|
851
|
+
js = self.jobscripts[js_idx]
|
852
|
+
|
853
|
+
script_str, script_indices, num_elems, num_acts = js.compose_combined_script(
|
854
|
+
[i for _, i in sorted(action_scripts.items())]
|
855
|
+
)
|
856
|
+
js.write_script_indices_file(script_indices, num_elems, num_acts)
|
857
|
+
|
858
|
+
script_path = self.scripts_path / f"js_{js_idx}.py" # TODO: refactor name
|
859
|
+
with script_path.open("wt", newline="\n") as fp:
|
860
|
+
fp.write(script_str)
|
861
|
+
|
862
|
+
return run_cmd_file_names, run_indices, run_inp_files
|
863
|
+
|
864
|
+
@TimeIt.decorator
|
865
|
+
def _calculate_run_dir_indices(
|
866
|
+
self,
|
867
|
+
run_indices: np.ndarray,
|
868
|
+
cache: ObjectCache,
|
869
|
+
) -> tuple[np.ndarray, np.ndarray]:
|
870
|
+
|
871
|
+
assert cache.elements is not None
|
872
|
+
assert cache.iterations is not None
|
873
|
+
# get the multiplicities of all tasks, elements, iterations, and runs:
|
874
|
+
wk_num_tasks = self.workflow.num_tasks
|
875
|
+
task_num_elems = {}
|
876
|
+
elem_num_iters = {}
|
877
|
+
iter_num_acts = {}
|
878
|
+
iter_acts_num_runs = {}
|
879
|
+
for task in self.workflow.tasks:
|
880
|
+
elem_IDs = task.element_IDs
|
881
|
+
task_num_elems[task.insert_ID] = len(elem_IDs)
|
882
|
+
for elem_ID in elem_IDs:
|
883
|
+
iter_IDs = cache.elements[elem_ID].iteration_IDs
|
884
|
+
elem_num_iters[elem_ID] = len(iter_IDs)
|
885
|
+
for iter_ID in iter_IDs:
|
886
|
+
run_IDs = cache.iterations[iter_ID].EAR_IDs
|
887
|
+
if run_IDs: # the schema might have no actions
|
888
|
+
iter_num_acts[iter_ID] = len(run_IDs)
|
889
|
+
for act_idx, act_run_IDs in run_IDs.items():
|
890
|
+
iter_acts_num_runs[(iter_ID, act_idx)] = len(act_run_IDs)
|
891
|
+
else:
|
892
|
+
iter_num_acts[iter_ID] = 0
|
893
|
+
|
894
|
+
max_u8 = np.iinfo(np.uint8).max
|
895
|
+
max_u32 = np.iinfo(np.uint32).max
|
896
|
+
MAX_ELEMS_PER_DIR = 1000 # TODO: configurable (add `workflow_defaults` to Config)
|
897
|
+
MAX_ITERS_PER_DIR = 1000
|
898
|
+
requires_dir_idx = np.where(run_indices[:, -1] == 1)[0]
|
899
|
+
run_dir_arr = np.empty(requires_dir_idx.size, dtype=RUN_DIR_ARR_DTYPE)
|
900
|
+
run_ids = np.empty(requires_dir_idx.size, dtype=int)
|
901
|
+
|
902
|
+
elem_depths: dict[int, int] = {}
|
903
|
+
iter_depths: dict[int, int] = {}
|
904
|
+
for idx in range(requires_dir_idx.size):
|
905
|
+
row = run_indices[requires_dir_idx[idx]]
|
906
|
+
t_iID, e_id, i_id, r_id, e_idx, i_idx, a_idx, r_idx = row[:-1]
|
907
|
+
run_ids[idx] = r_id
|
908
|
+
|
909
|
+
num_elems_i = task_num_elems[t_iID]
|
910
|
+
num_iters_i = elem_num_iters[e_id]
|
911
|
+
num_acts_i = iter_num_acts[i_id] # see TODO below
|
912
|
+
num_runs_i = iter_acts_num_runs[(i_id, a_idx)]
|
913
|
+
|
914
|
+
e_depth = 1
|
915
|
+
if num_elems_i == 1:
|
916
|
+
e_idx = max_u32
|
917
|
+
elif num_elems_i > MAX_ELEMS_PER_DIR:
|
918
|
+
if (e_depth := elem_depths.get(t_iID, -1)) == -1:
|
919
|
+
e_depth = int(
|
920
|
+
np.ceil(np.log(num_elems_i) / np.log(MAX_ELEMS_PER_DIR))
|
921
|
+
)
|
922
|
+
elem_depths[t_iID] = e_depth
|
923
|
+
|
924
|
+
# TODO: i_idx should be either MAX or the iteration ID, which will index into
|
925
|
+
# a separate array to get the formatted loop indices e.g.
|
926
|
+
# ("outer_loop_0_inner_loop_9")
|
927
|
+
i_depth = 1
|
928
|
+
if num_iters_i == 1:
|
929
|
+
i_idx = max_u32
|
930
|
+
elif num_iters_i > MAX_ITERS_PER_DIR:
|
931
|
+
if (i_depth := iter_depths.get(e_id, -1)) == -1:
|
932
|
+
i_depth = int(
|
933
|
+
np.ceil(np.log(num_iters_i) / np.log(MAX_ITERS_PER_DIR))
|
934
|
+
)
|
935
|
+
iter_depths[e_id] = i_depth
|
936
|
+
|
937
|
+
a_idx = max_u8 # TODO: for now, always exclude action index dir
|
938
|
+
|
939
|
+
if num_runs_i == 1:
|
940
|
+
r_idx = max_u8
|
941
|
+
|
942
|
+
if wk_num_tasks == 1:
|
943
|
+
t_iID = max_u8
|
944
|
+
|
945
|
+
run_dir_arr[idx] = (t_iID, e_idx, i_idx, a_idx, r_idx, e_depth, i_depth)
|
946
|
+
|
947
|
+
return run_dir_arr, run_ids
|
948
|
+
|
949
|
+
@TimeIt.decorator
|
950
|
+
def _write_execute_dirs(
|
951
|
+
self,
|
952
|
+
run_indices: NDArray,
|
953
|
+
run_inp_files: dict[int, list[Path]],
|
954
|
+
cache: ObjectCache,
|
955
|
+
status: Status | None = None,
|
956
|
+
):
|
957
|
+
|
958
|
+
if status:
|
959
|
+
status.update("Adding new submission: resolving execution directories...")
|
960
|
+
|
961
|
+
run_dir_arr, run_idx = self._calculate_run_dir_indices(run_indices, cache)
|
962
|
+
|
963
|
+
# set run dirs in persistent array:
|
964
|
+
if run_idx.size:
|
965
|
+
self.workflow._store.set_run_dirs(run_dir_arr, run_idx)
|
966
|
+
|
967
|
+
# retrieve run directories as paths. array is not yet commited, so pass in
|
968
|
+
# directly:
|
969
|
+
run_dirs = self.workflow.get_run_directories(dir_indices_arr=run_dir_arr)
|
970
|
+
|
971
|
+
if status:
|
972
|
+
status.update("Adding new submission: making execution directories...")
|
973
|
+
|
974
|
+
# make directories
|
975
|
+
for idx, run_dir in enumerate(run_dirs):
|
976
|
+
assert run_dir
|
977
|
+
run_dir.mkdir(parents=True, exist_ok=True)
|
978
|
+
inp_files_i = run_inp_files.get(run_idx[idx])
|
979
|
+
if inp_files_i:
|
980
|
+
# copy (TODO: optionally symlink) any input files:
|
981
|
+
for path_i in inp_files_i:
|
982
|
+
shutil.copy(path_i, run_dir)
|
430
983
|
|
431
984
|
@staticmethod
|
432
985
|
def get_unique_schedulers_of_jobscripts(
|
433
|
-
jobscripts:
|
434
|
-
) ->
|
986
|
+
jobscripts: Iterable[Jobscript],
|
987
|
+
) -> Iterable[tuple[tuple[tuple[int, int], ...], Scheduler]]:
|
435
988
|
"""Get unique schedulers and which of the passed jobscripts they correspond to.
|
436
989
|
|
437
|
-
Uniqueness is
|
990
|
+
Uniqueness is determined only by the `QueuedScheduler.unique_properties` tuple.
|
438
991
|
|
439
992
|
Parameters
|
440
993
|
----------
|
441
994
|
jobscripts: list[~hpcflow.app.Jobscript]
|
995
|
+
|
996
|
+
Returns
|
997
|
+
-------
|
998
|
+
scheduler_mapping
|
999
|
+
Mapping where keys are a sequence of jobscript index descriptors and
|
1000
|
+
the values are the scheduler to use for that jobscript.
|
1001
|
+
A jobscript index descriptor is a pair of the submission index and the main
|
1002
|
+
jobscript index.
|
442
1003
|
"""
|
443
|
-
js_idx = []
|
444
|
-
schedulers = []
|
1004
|
+
js_idx: list[list[tuple[int, int]]] = []
|
1005
|
+
schedulers: list[Scheduler] = []
|
445
1006
|
|
446
1007
|
# list of tuples of scheduler properties we consider to determine "uniqueness",
|
447
1008
|
# with the first string being the scheduler type (class name):
|
448
|
-
seen_schedulers =
|
1009
|
+
seen_schedulers: dict[tuple, int] = {}
|
449
1010
|
|
450
1011
|
for js in jobscripts:
|
451
|
-
if
|
452
|
-
seen_schedulers.
|
1012
|
+
if (
|
1013
|
+
sched_idx := seen_schedulers.get(key := js.scheduler.unique_properties)
|
1014
|
+
) is None:
|
1015
|
+
seen_schedulers[key] = sched_idx = len(seen_schedulers) - 1
|
453
1016
|
schedulers.append(js.scheduler)
|
454
1017
|
js_idx.append([])
|
455
|
-
sched_idx = seen_schedulers.index(js.scheduler.unique_properties)
|
456
1018
|
js_idx[sched_idx].append((js.submission.index, js.index))
|
457
1019
|
|
458
|
-
|
1020
|
+
return zip(map(tuple, js_idx), schedulers)
|
459
1021
|
|
460
|
-
|
1022
|
+
@property
|
1023
|
+
@TimeIt.decorator
|
1024
|
+
def _unique_schedulers(
|
1025
|
+
self,
|
1026
|
+
) -> Iterable[tuple[tuple[tuple[int, int], ...], Scheduler]]:
|
1027
|
+
return self.get_unique_schedulers_of_jobscripts(self.jobscripts)
|
461
1028
|
|
462
1029
|
@TimeIt.decorator
|
463
|
-
def get_unique_schedulers(self) ->
|
1030
|
+
def get_unique_schedulers(self) -> Mapping[tuple[tuple[int, int], ...], Scheduler]:
|
464
1031
|
"""Get unique schedulers and which of this submission's jobscripts they
|
465
|
-
correspond to.
|
466
|
-
|
1032
|
+
correspond to.
|
1033
|
+
|
1034
|
+
Returns
|
1035
|
+
-------
|
1036
|
+
scheduler_mapping
|
1037
|
+
Mapping where keys are a sequence of jobscript index descriptors and
|
1038
|
+
the values are the scheduler to use for that jobscript.
|
1039
|
+
A jobscript index descriptor is a pair of the submission index and the main
|
1040
|
+
jobscript index.
|
1041
|
+
"""
|
1042
|
+
# This is an absurd type; you never use the key as a key
|
1043
|
+
return dict(self._unique_schedulers)
|
467
1044
|
|
468
1045
|
@TimeIt.decorator
|
469
|
-
def get_unique_shells(self) ->
|
1046
|
+
def get_unique_shells(self) -> Iterable[tuple[tuple[int, ...], Shell]]:
|
470
1047
|
"""Get unique shells and which jobscripts they correspond to."""
|
471
|
-
js_idx = []
|
472
|
-
shells = []
|
1048
|
+
js_idx: list[list[int]] = []
|
1049
|
+
shells: list[Shell] = []
|
473
1050
|
|
474
1051
|
for js in self.jobscripts:
|
475
1052
|
if js.shell not in shells:
|
@@ -478,126 +1055,154 @@ class Submission(JSONLike):
|
|
478
1055
|
shell_idx = shells.index(js.shell)
|
479
1056
|
js_idx[shell_idx].append(js.index)
|
480
1057
|
|
481
|
-
|
1058
|
+
return zip(map(tuple, js_idx), shells)
|
482
1059
|
|
483
|
-
|
1060
|
+
def _update_at_submit_metadata(self, submission_parts: dict[str, list[int]]):
|
1061
|
+
"""Update persistent store and in-memory record of at-submit metadata.
|
484
1062
|
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
f"Jobscript {sub_err.js_idx} at path: {str(sub_err.js_path)!r}\n"
|
496
|
-
f"Submit command: {sub_err.submit_cmd!r}.\n"
|
497
|
-
f"Reason: {sub_err.message!r}\n"
|
498
|
-
)
|
499
|
-
if sub_err.subprocess_exc is not None:
|
500
|
-
msg += f"Subprocess exception: {sub_err.subprocess_exc}\n"
|
501
|
-
if sub_err.job_ID_parse_exc is not None:
|
502
|
-
msg += f"Subprocess job ID parse exception: {sub_err.job_ID_parse_exc}\n"
|
503
|
-
if sub_err.job_ID_parse_exc is not None:
|
504
|
-
msg += f"Job ID parse exception: {sub_err.job_ID_parse_exc}\n"
|
505
|
-
if sub_err.stdout:
|
506
|
-
msg += f"Submission stdout:\n{indent(sub_err.stdout, ' ')}\n"
|
507
|
-
if sub_err.stderr:
|
508
|
-
msg += f"Submission stderr:\n{indent(sub_err.stderr, ' ')}\n"
|
509
|
-
|
510
|
-
raise SubmissionFailure(message=msg)
|
511
|
-
|
512
|
-
def _append_submission_part(self, submit_time: str, submitted_js_idx: List[int]):
|
513
|
-
self._submission_parts[submit_time] = submitted_js_idx
|
514
|
-
self.workflow._store.add_submission_part(
|
1063
|
+
Notes
|
1064
|
+
-----
|
1065
|
+
Currently there is only one type of at-submit metadata, which is the
|
1066
|
+
submission-parts: a mapping between a string submit-time, and the list of
|
1067
|
+
jobscript indices that were submitted at that submit-time. This method updates
|
1068
|
+
the recorded submission parts to include those passed here.
|
1069
|
+
|
1070
|
+
"""
|
1071
|
+
|
1072
|
+
self.workflow._store.update_at_submit_metadata(
|
515
1073
|
sub_idx=self.index,
|
516
|
-
|
517
|
-
|
1074
|
+
submission_parts=submission_parts,
|
1075
|
+
)
|
1076
|
+
|
1077
|
+
self._at_submit_metadata["submission_parts"].update(submission_parts)
|
1078
|
+
|
1079
|
+
# cache is now invalid:
|
1080
|
+
self._submission_parts_lst = None
|
1081
|
+
|
1082
|
+
def _append_submission_part(self, submit_time: str, submitted_js_idx: list[int]):
|
1083
|
+
self._update_at_submit_metadata(submission_parts={submit_time: submitted_js_idx})
|
1084
|
+
|
1085
|
+
def get_jobscript_functions_name(self, shell: Shell, shell_idx: int) -> str:
|
1086
|
+
"""Get the name of the jobscript functions file for the specified shell."""
|
1087
|
+
return f"js_funcs_{shell_idx}{shell.JS_EXT}"
|
1088
|
+
|
1089
|
+
def get_jobscript_functions_path(self, shell: Shell, shell_idx: int) -> Path:
|
1090
|
+
"""Get the path of the jobscript functions file for the specified shell."""
|
1091
|
+
return self.js_funcs_path / self.get_jobscript_functions_name(shell, shell_idx)
|
1092
|
+
|
1093
|
+
def _compose_functions_file(self, shell: Shell) -> str:
|
1094
|
+
"""Prepare the contents of the jobscript functions file for the specified
|
1095
|
+
shell.
|
1096
|
+
|
1097
|
+
Notes
|
1098
|
+
-----
|
1099
|
+
The functions file includes, at a minimum, a shell function that invokes the app
|
1100
|
+
with provided arguments. This file will be sourced/invoked within all jobscripts
|
1101
|
+
and command files that share the specified shell.
|
1102
|
+
|
1103
|
+
"""
|
1104
|
+
|
1105
|
+
cfg_invocation = self._app.config._file.get_invocation(
|
1106
|
+
self._app.config._config_key
|
1107
|
+
)
|
1108
|
+
env_setup = cfg_invocation["environment_setup"]
|
1109
|
+
if env_setup:
|
1110
|
+
env_setup = indent(env_setup.strip(), shell.JS_ENV_SETUP_INDENT)
|
1111
|
+
env_setup += "\n\n" + shell.JS_ENV_SETUP_INDENT
|
1112
|
+
else:
|
1113
|
+
env_setup = shell.JS_ENV_SETUP_INDENT
|
1114
|
+
app_invoc = list(self._app.run_time_info.invocation_command)
|
1115
|
+
|
1116
|
+
app_caps = self._app.package_name.upper()
|
1117
|
+
func_file_args = shell.process_JS_header_args( # TODO: rename?
|
1118
|
+
{
|
1119
|
+
"workflow_app_alias": self.WORKFLOW_APP_ALIAS,
|
1120
|
+
"env_setup": env_setup,
|
1121
|
+
"app_invoc": app_invoc,
|
1122
|
+
"app_caps": app_caps,
|
1123
|
+
"config_dir": str(self._app.config.config_directory),
|
1124
|
+
"config_invoc_key": self._app.config.config_key,
|
1125
|
+
}
|
518
1126
|
)
|
1127
|
+
out = shell.JS_FUNCS.format(**func_file_args)
|
1128
|
+
return out
|
1129
|
+
|
1130
|
+
def _write_functions_file(self, shell: Shell, shell_idx: int) -> None:
|
1131
|
+
"""Write the jobscript functions file for the specified shell.
|
1132
|
+
|
1133
|
+
Notes
|
1134
|
+
-----
|
1135
|
+
The functions file includes, at a minimum, a shell function that invokes the app
|
1136
|
+
with provided arguments. This file will be sourced/invoked within all jobscripts
|
1137
|
+
and command files that share the specified shell.
|
1138
|
+
|
1139
|
+
"""
|
1140
|
+
js_funcs_str = self._compose_functions_file(shell)
|
1141
|
+
path = self.get_jobscript_functions_path(shell, shell_idx)
|
1142
|
+
with path.open("wt", newline="\n") as fp:
|
1143
|
+
fp.write(js_funcs_str)
|
519
1144
|
|
520
1145
|
@TimeIt.decorator
|
521
1146
|
def submit(
|
522
1147
|
self,
|
523
|
-
status,
|
524
|
-
ignore_errors:
|
525
|
-
print_stdout:
|
526
|
-
add_to_known:
|
527
|
-
) ->
|
1148
|
+
status: Status | None,
|
1149
|
+
ignore_errors: bool = False,
|
1150
|
+
print_stdout: bool = False,
|
1151
|
+
add_to_known: bool = True,
|
1152
|
+
) -> list[int]:
|
528
1153
|
"""Generate and submit the jobscripts of this submission."""
|
529
1154
|
|
530
|
-
#
|
531
|
-
|
532
|
-
if
|
533
|
-
if not supports_JS_para:
|
534
|
-
if status:
|
535
|
-
status.stop()
|
536
|
-
raise ValueError(
|
537
|
-
f"Store type {self.workflow._store!r} does not support jobscript "
|
538
|
-
f"parallelism."
|
539
|
-
)
|
540
|
-
elif self.JS_parallelism is None:
|
541
|
-
self._JS_parallelism = supports_JS_para
|
542
|
-
|
543
|
-
# set os_name and shell_name for each jobscript:
|
544
|
-
for js in self.jobscripts:
|
545
|
-
js._set_os_name()
|
546
|
-
js._set_shell_name()
|
547
|
-
js._set_scheduler_name()
|
1155
|
+
# TODO: support passing list of jobscript indices to submit; this will allow us
|
1156
|
+
# to test a submision with multiple "submission parts". would also need to check
|
1157
|
+
# dependencies if this customised list is passed
|
548
1158
|
|
549
1159
|
outstanding = self.outstanding_jobscripts
|
550
1160
|
|
551
1161
|
# get scheduler, shell and OS version information (also an opportunity to fail
|
552
1162
|
# before trying to submit jobscripts):
|
553
|
-
js_vers_info = {}
|
554
|
-
for js_indices, sched in self.
|
1163
|
+
js_vers_info: dict[int, dict[str, str | list[str]]] = {}
|
1164
|
+
for js_indices, sched in self._unique_schedulers:
|
555
1165
|
try:
|
556
1166
|
vers_info = sched.get_version_info()
|
557
|
-
except Exception
|
558
|
-
if ignore_errors:
|
559
|
-
|
560
|
-
|
561
|
-
raise err
|
1167
|
+
except Exception:
|
1168
|
+
if not ignore_errors:
|
1169
|
+
raise
|
1170
|
+
vers_info = {}
|
562
1171
|
for _, js_idx in js_indices:
|
563
1172
|
if js_idx in outstanding:
|
564
|
-
|
565
|
-
js_vers_info[js_idx] = {}
|
566
|
-
js_vers_info[js_idx].update(vers_info)
|
1173
|
+
js_vers_info.setdefault(js_idx, {}).update(vers_info)
|
567
1174
|
|
568
|
-
|
1175
|
+
js_shell_indices = {}
|
1176
|
+
for shell_idx, (js_indices_2, shell) in enumerate(self.get_unique_shells()):
|
569
1177
|
try:
|
570
1178
|
vers_info = shell.get_version_info()
|
571
|
-
except Exception
|
572
|
-
if ignore_errors:
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
for js_idx in js_indices:
|
1179
|
+
except Exception:
|
1180
|
+
if not ignore_errors:
|
1181
|
+
raise
|
1182
|
+
vers_info = {}
|
1183
|
+
for js_idx in js_indices_2:
|
577
1184
|
if js_idx in outstanding:
|
578
|
-
|
579
|
-
|
580
|
-
js_vers_info[js_idx].update(vers_info)
|
1185
|
+
js_vers_info.setdefault(js_idx, {}).update(vers_info)
|
1186
|
+
js_shell_indices[js_idx] = shell_idx
|
581
1187
|
|
1188
|
+
# write a file containing useful shell functions:
|
1189
|
+
self._write_functions_file(shell, shell_idx)
|
1190
|
+
|
1191
|
+
hostname = socket.gethostname()
|
1192
|
+
machine = self._app.config.get("machine")
|
582
1193
|
for js_idx, vers_info_i in js_vers_info.items():
|
583
|
-
self.jobscripts[js_idx]
|
1194
|
+
js = self.jobscripts[js_idx]
|
1195
|
+
js._set_version_info(vers_info_i)
|
1196
|
+
js._set_submit_hostname(hostname)
|
1197
|
+
js._set_submit_machine(machine)
|
1198
|
+
js._set_shell_idx(js_shell_indices[js_idx])
|
584
1199
|
|
585
|
-
# for direct submission, it's important that os_name/shell_name/scheduler_name
|
586
|
-
# are made persistent now, because `Workflow.write_commands`, which might be
|
587
|
-
# invoked in a new process before submission has completed, needs to know these:
|
588
1200
|
self.workflow._store._pending.commit_all()
|
589
1201
|
|
590
|
-
# TODO: a submission should only be "submitted" once shouldn't it?
|
591
|
-
# no; there could be an IO error (e.g. internet connectivity), so might
|
592
|
-
# need to be able to reattempt submission of outstanding jobscripts.
|
593
|
-
self.path.mkdir(exist_ok=True)
|
594
|
-
if not self.abort_EARs_file_path.is_file():
|
595
|
-
self._write_abort_EARs_file()
|
596
|
-
|
597
1202
|
# map jobscript `index` to (scheduler job ID or process ID, is_array):
|
598
|
-
scheduler_refs = {}
|
599
|
-
submitted_js_idx = []
|
600
|
-
errs = []
|
1203
|
+
scheduler_refs: dict[int, tuple[str, bool]] = {}
|
1204
|
+
submitted_js_idx: list[int] = []
|
1205
|
+
errs: list[JobscriptSubmissionFailure] = []
|
601
1206
|
for js in self.jobscripts:
|
602
1207
|
# check not previously submitted:
|
603
1208
|
if js.index not in outstanding:
|
@@ -605,14 +1210,20 @@ class Submission(JSONLike):
|
|
605
1210
|
|
606
1211
|
# check all dependencies were submitted now or previously:
|
607
1212
|
if not all(
|
608
|
-
|
609
|
-
for
|
1213
|
+
js_idx in submitted_js_idx or js_idx in self.submitted_jobscripts
|
1214
|
+
for js_idx, _ in js.dependencies
|
610
1215
|
):
|
1216
|
+
warnings.warn(
|
1217
|
+
f"Cannot submit jobscript index {js.index} since not all of its "
|
1218
|
+
f"dependencies have been submitted: {js.dependencies!r}"
|
1219
|
+
)
|
611
1220
|
continue
|
612
1221
|
|
613
1222
|
try:
|
614
1223
|
if status:
|
615
|
-
status.update(
|
1224
|
+
status.update(
|
1225
|
+
f"Submitting jobscript {js.index + 1}/{len(self.jobscripts)}..."
|
1226
|
+
)
|
616
1227
|
js_ref_i = js.submit(scheduler_refs, print_stdout=print_stdout)
|
617
1228
|
scheduler_refs[js.index] = (js_ref_i, js.is_array)
|
618
1229
|
submitted_js_idx.append(js.index)
|
@@ -621,15 +1232,21 @@ class Submission(JSONLike):
|
|
621
1232
|
errs.append(err)
|
622
1233
|
continue
|
623
1234
|
|
1235
|
+
# TODO: some way to handle KeyboardInterrupt during submission?
|
1236
|
+
# - stop, and cancel already submitted?
|
1237
|
+
|
624
1238
|
if submitted_js_idx:
|
625
|
-
dt_str =
|
1239
|
+
dt_str = current_timestamp().strftime(self._app._submission_ts_fmt)
|
626
1240
|
self._append_submission_part(
|
627
1241
|
submit_time=dt_str,
|
628
1242
|
submitted_js_idx=submitted_js_idx,
|
629
1243
|
)
|
1244
|
+
# ensure `_submission_parts` is committed
|
1245
|
+
self.workflow._store._pending.commit_all()
|
1246
|
+
|
630
1247
|
# add a record of the submission part to the known-submissions file
|
631
1248
|
if add_to_known:
|
632
|
-
self.
|
1249
|
+
self._app._add_to_known_submissions(
|
633
1250
|
wk_path=self.workflow.path,
|
634
1251
|
wk_id=self.workflow.id_,
|
635
1252
|
sub_idx=self.index,
|
@@ -639,7 +1256,7 @@ class Submission(JSONLike):
|
|
639
1256
|
if errs and not ignore_errors:
|
640
1257
|
if status:
|
641
1258
|
status.stop()
|
642
|
-
self.
|
1259
|
+
raise SubmissionFailure(self.index, submitted_js_idx, errs)
|
643
1260
|
|
644
1261
|
len_js = len(submitted_js_idx)
|
645
1262
|
print(f"Submitted {len_js} jobscript{'s' if len_js > 1 else ''}.")
|
@@ -647,24 +1264,86 @@ class Submission(JSONLike):
|
|
647
1264
|
return submitted_js_idx
|
648
1265
|
|
649
1266
|
@TimeIt.decorator
|
650
|
-
def cancel(self):
|
1267
|
+
def cancel(self) -> None:
|
651
1268
|
"""
|
652
1269
|
Cancel the active jobs for this submission's jobscripts.
|
653
1270
|
"""
|
654
|
-
act_js
|
655
|
-
if not act_js:
|
1271
|
+
if not (act_js := self.get_active_jobscripts()):
|
656
1272
|
print("No active jobscripts to cancel.")
|
657
1273
|
return
|
658
|
-
for js_indices, sched in self.
|
1274
|
+
for js_indices, sched in self._unique_schedulers:
|
659
1275
|
# filter by active jobscripts:
|
660
|
-
js_idx
|
661
|
-
if js_idx:
|
1276
|
+
if js_idx := [i[1] for i in js_indices if i[1] in act_js]:
|
662
1277
|
print(
|
663
|
-
f"Cancelling jobscripts {js_idx
|
664
|
-
f"workflow {self.workflow.name!r}."
|
1278
|
+
f"Cancelling jobscripts {shorten_list_str(js_idx, items=5)} of "
|
1279
|
+
f"submission {self.index} of workflow {self.workflow.name!r}."
|
665
1280
|
)
|
666
1281
|
jobscripts = [self.jobscripts[i] for i in js_idx]
|
667
|
-
sched_refs = [
|
1282
|
+
sched_refs = [js.scheduler_js_ref for js in jobscripts]
|
668
1283
|
sched.cancel_jobs(js_refs=sched_refs, jobscripts=jobscripts)
|
669
1284
|
else:
|
670
1285
|
print("No active jobscripts to cancel.")
|
1286
|
+
|
1287
|
+
@TimeIt.decorator
|
1288
|
+
def get_scheduler_job_IDs(self) -> tuple[str, ...]:
|
1289
|
+
"""Return jobscript scheduler job IDs."""
|
1290
|
+
return tuple(
|
1291
|
+
js_i.scheduler_job_ID
|
1292
|
+
for js_i in self.jobscripts
|
1293
|
+
if js_i.scheduler_job_ID is not None
|
1294
|
+
)
|
1295
|
+
|
1296
|
+
@TimeIt.decorator
|
1297
|
+
def get_process_IDs(self) -> tuple[int, ...]:
|
1298
|
+
"""Return jobscript process IDs."""
|
1299
|
+
return tuple(
|
1300
|
+
js_i.process_ID for js_i in self.jobscripts if js_i.process_ID is not None
|
1301
|
+
)
|
1302
|
+
|
1303
|
+
@TimeIt.decorator
|
1304
|
+
def list_jobscripts(
|
1305
|
+
self,
|
1306
|
+
max_js: int | None = None,
|
1307
|
+
jobscripts: list[int] | None = None,
|
1308
|
+
width: int | None = None,
|
1309
|
+
) -> None:
|
1310
|
+
"""Print a table listing jobscripts and associated information.
|
1311
|
+
|
1312
|
+
Parameters
|
1313
|
+
----------
|
1314
|
+
max_js
|
1315
|
+
Maximum jobscript index to display. This cannot be specified with `jobscripts`.
|
1316
|
+
jobscripts
|
1317
|
+
A list of jobscripts to display. This cannot be specified with `max_js`.
|
1318
|
+
width
|
1319
|
+
Width in characters of the printed table.
|
1320
|
+
|
1321
|
+
"""
|
1322
|
+
self.workflow.list_jobscripts(
|
1323
|
+
sub_idx=self.index, max_js=max_js, jobscripts=jobscripts, width=width
|
1324
|
+
)
|
1325
|
+
|
1326
|
+
@TimeIt.decorator
|
1327
|
+
def list_task_jobscripts(
|
1328
|
+
self,
|
1329
|
+
task_names: list[str] | None = None,
|
1330
|
+
max_js: int | None = None,
|
1331
|
+
width: int | None = None,
|
1332
|
+
) -> None:
|
1333
|
+
"""Print a table listing the jobscripts associated with the specified (or all)
|
1334
|
+
tasks for the specified submission.
|
1335
|
+
|
1336
|
+
Parameters
|
1337
|
+
----------
|
1338
|
+
task_names
|
1339
|
+
List of sub-strings to match to task names. Only matching task names will be
|
1340
|
+
included.
|
1341
|
+
max_js
|
1342
|
+
Maximum jobscript index to display.
|
1343
|
+
width
|
1344
|
+
Width in characters of the printed table.
|
1345
|
+
|
1346
|
+
"""
|
1347
|
+
self.workflow.list_task_jobscripts(
|
1348
|
+
sub_idx=self.index, max_js=max_js, task_names=task_names, width=width
|
1349
|
+
)
|