hpcflow-new2 0.2.0a188__py3-none-any.whl → 0.2.0a190__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hpcflow/__pyinstaller/hook-hpcflow.py +8 -6
- hpcflow/_version.py +1 -1
- hpcflow/app.py +1 -0
- hpcflow/data/scripts/main_script_test_hdf5_in_obj.py +1 -1
- hpcflow/data/scripts/main_script_test_hdf5_out_obj.py +1 -1
- hpcflow/sdk/__init__.py +21 -15
- hpcflow/sdk/app.py +2133 -770
- hpcflow/sdk/cli.py +281 -250
- hpcflow/sdk/cli_common.py +6 -2
- hpcflow/sdk/config/__init__.py +1 -1
- hpcflow/sdk/config/callbacks.py +77 -42
- hpcflow/sdk/config/cli.py +126 -103
- hpcflow/sdk/config/config.py +578 -311
- hpcflow/sdk/config/config_file.py +131 -95
- hpcflow/sdk/config/errors.py +112 -85
- hpcflow/sdk/config/types.py +145 -0
- hpcflow/sdk/core/actions.py +1054 -994
- hpcflow/sdk/core/app_aware.py +24 -0
- hpcflow/sdk/core/cache.py +81 -63
- hpcflow/sdk/core/command_files.py +275 -185
- hpcflow/sdk/core/commands.py +111 -107
- hpcflow/sdk/core/element.py +724 -503
- hpcflow/sdk/core/enums.py +192 -0
- hpcflow/sdk/core/environment.py +74 -93
- hpcflow/sdk/core/errors.py +398 -51
- hpcflow/sdk/core/json_like.py +540 -272
- hpcflow/sdk/core/loop.py +380 -334
- hpcflow/sdk/core/loop_cache.py +160 -43
- hpcflow/sdk/core/object_list.py +370 -207
- hpcflow/sdk/core/parameters.py +728 -600
- hpcflow/sdk/core/rule.py +59 -41
- hpcflow/sdk/core/run_dir_files.py +33 -22
- hpcflow/sdk/core/task.py +1546 -1325
- hpcflow/sdk/core/task_schema.py +240 -196
- hpcflow/sdk/core/test_utils.py +126 -88
- hpcflow/sdk/core/types.py +387 -0
- hpcflow/sdk/core/utils.py +410 -305
- hpcflow/sdk/core/validation.py +82 -9
- hpcflow/sdk/core/workflow.py +1192 -1028
- hpcflow/sdk/core/zarr_io.py +98 -137
- hpcflow/sdk/demo/cli.py +46 -33
- hpcflow/sdk/helper/cli.py +18 -16
- hpcflow/sdk/helper/helper.py +75 -63
- hpcflow/sdk/helper/watcher.py +61 -28
- hpcflow/sdk/log.py +83 -59
- hpcflow/sdk/persistence/__init__.py +8 -31
- hpcflow/sdk/persistence/base.py +988 -586
- hpcflow/sdk/persistence/defaults.py +6 -0
- hpcflow/sdk/persistence/discovery.py +38 -0
- hpcflow/sdk/persistence/json.py +408 -153
- hpcflow/sdk/persistence/pending.py +158 -123
- hpcflow/sdk/persistence/store_resource.py +37 -22
- hpcflow/sdk/persistence/types.py +307 -0
- hpcflow/sdk/persistence/utils.py +14 -11
- hpcflow/sdk/persistence/zarr.py +477 -420
- hpcflow/sdk/runtime.py +44 -41
- hpcflow/sdk/submission/{jobscript_info.py → enums.py} +39 -12
- hpcflow/sdk/submission/jobscript.py +444 -404
- hpcflow/sdk/submission/schedulers/__init__.py +133 -40
- hpcflow/sdk/submission/schedulers/direct.py +97 -71
- hpcflow/sdk/submission/schedulers/sge.py +132 -126
- hpcflow/sdk/submission/schedulers/slurm.py +263 -268
- hpcflow/sdk/submission/schedulers/utils.py +7 -2
- hpcflow/sdk/submission/shells/__init__.py +14 -15
- hpcflow/sdk/submission/shells/base.py +102 -29
- hpcflow/sdk/submission/shells/bash.py +72 -55
- hpcflow/sdk/submission/shells/os_version.py +31 -30
- hpcflow/sdk/submission/shells/powershell.py +37 -29
- hpcflow/sdk/submission/submission.py +203 -257
- hpcflow/sdk/submission/types.py +143 -0
- hpcflow/sdk/typing.py +163 -12
- hpcflow/tests/conftest.py +8 -6
- hpcflow/tests/schedulers/slurm/test_slurm_submission.py +5 -2
- hpcflow/tests/scripts/test_main_scripts.py +60 -30
- hpcflow/tests/shells/wsl/test_wsl_submission.py +6 -4
- hpcflow/tests/unit/test_action.py +86 -75
- hpcflow/tests/unit/test_action_rule.py +9 -4
- hpcflow/tests/unit/test_app.py +13 -6
- hpcflow/tests/unit/test_cli.py +1 -1
- hpcflow/tests/unit/test_command.py +71 -54
- hpcflow/tests/unit/test_config.py +20 -15
- hpcflow/tests/unit/test_config_file.py +21 -18
- hpcflow/tests/unit/test_element.py +58 -62
- hpcflow/tests/unit/test_element_iteration.py +3 -1
- hpcflow/tests/unit/test_element_set.py +29 -19
- hpcflow/tests/unit/test_group.py +4 -2
- hpcflow/tests/unit/test_input_source.py +116 -93
- hpcflow/tests/unit/test_input_value.py +29 -24
- hpcflow/tests/unit/test_json_like.py +44 -35
- hpcflow/tests/unit/test_loop.py +65 -58
- hpcflow/tests/unit/test_object_list.py +17 -12
- hpcflow/tests/unit/test_parameter.py +16 -7
- hpcflow/tests/unit/test_persistence.py +48 -35
- hpcflow/tests/unit/test_resources.py +20 -18
- hpcflow/tests/unit/test_run.py +8 -3
- hpcflow/tests/unit/test_runtime.py +2 -1
- hpcflow/tests/unit/test_schema_input.py +23 -15
- hpcflow/tests/unit/test_shell.py +3 -2
- hpcflow/tests/unit/test_slurm.py +8 -7
- hpcflow/tests/unit/test_submission.py +39 -19
- hpcflow/tests/unit/test_task.py +352 -247
- hpcflow/tests/unit/test_task_schema.py +33 -20
- hpcflow/tests/unit/test_utils.py +9 -11
- hpcflow/tests/unit/test_value_sequence.py +15 -12
- hpcflow/tests/unit/test_workflow.py +114 -83
- hpcflow/tests/unit/test_workflow_template.py +0 -1
- hpcflow/tests/workflows/test_jobscript.py +2 -1
- hpcflow/tests/workflows/test_workflows.py +18 -13
- {hpcflow_new2-0.2.0a188.dist-info → hpcflow_new2-0.2.0a190.dist-info}/METADATA +2 -1
- hpcflow_new2-0.2.0a190.dist-info/RECORD +165 -0
- hpcflow/sdk/core/parallel.py +0 -21
- hpcflow_new2-0.2.0a188.dist-info/RECORD +0 -158
- {hpcflow_new2-0.2.0a188.dist-info → hpcflow_new2-0.2.0a190.dist-info}/LICENSE +0 -0
- {hpcflow_new2-0.2.0a188.dist-info → hpcflow_new2-0.2.0a190.dist-info}/WHEEL +0 -0
- {hpcflow_new2-0.2.0a188.dist-info → hpcflow_new2-0.2.0a190.dist-info}/entry_points.txt +0 -0
@@ -3,35 +3,59 @@ Model of information submitted to a scheduler.
|
|
3
3
|
"""
|
4
4
|
|
5
5
|
from __future__ import annotations
|
6
|
-
import copy
|
7
6
|
|
8
|
-
from datetime import datetime, timezone
|
9
7
|
import os
|
10
|
-
from pathlib import Path
|
11
8
|
import shutil
|
12
9
|
import socket
|
13
10
|
import subprocess
|
14
11
|
from textwrap import indent
|
15
|
-
from typing import
|
12
|
+
from typing import cast, overload, TYPE_CHECKING
|
13
|
+
from typing_extensions import override
|
16
14
|
|
17
15
|
import numpy as np
|
18
|
-
from
|
19
|
-
from hpcflow.sdk import
|
20
|
-
|
21
|
-
|
16
|
+
from hpcflow.sdk.core.enums import EARStatus
|
17
|
+
from hpcflow.sdk.core.errors import (
|
18
|
+
JobscriptSubmissionFailure,
|
19
|
+
NotSubmitMachineError,
|
20
|
+
)
|
22
21
|
|
22
|
+
from hpcflow.sdk.typing import hydrate
|
23
23
|
from hpcflow.sdk.core.json_like import ChildObjectSpec, JSONLike
|
24
|
+
from hpcflow.sdk.core.utils import parse_timestamp, current_timestamp
|
24
25
|
from hpcflow.sdk.log import TimeIt
|
25
|
-
from hpcflow.sdk.submission.
|
26
|
-
from hpcflow.sdk.submission.
|
27
|
-
|
26
|
+
from hpcflow.sdk.submission.schedulers.direct import DirectScheduler
|
27
|
+
from hpcflow.sdk.submission.shells import get_shell, DEFAULT_SHELL_NAMES
|
28
|
+
|
29
|
+
if TYPE_CHECKING:
|
30
|
+
from collections.abc import Iterable, Iterator, Mapping, Sequence
|
31
|
+
from datetime import datetime
|
32
|
+
from pathlib import Path
|
33
|
+
from typing import Any, ClassVar, Literal
|
34
|
+
from typing_extensions import TypeIs
|
35
|
+
from numpy.typing import NDArray, ArrayLike
|
36
|
+
from ..core.actions import ElementActionRun
|
37
|
+
from ..core.element import ElementResources
|
38
|
+
from ..core.loop_cache import LoopIndex
|
39
|
+
from ..core.types import JobscriptSubmissionFailureArgs
|
40
|
+
from ..core.workflow import WorkflowTask, Workflow
|
41
|
+
from .submission import Submission
|
42
|
+
from .shells.base import Shell
|
43
|
+
from .schedulers import Scheduler, QueuedScheduler
|
44
|
+
from .enums import JobscriptElementState
|
45
|
+
from .types import (
|
46
|
+
JobScriptCreationArguments,
|
47
|
+
JobScriptDescriptor,
|
48
|
+
ResolvedDependencies,
|
49
|
+
SchedulerRef,
|
50
|
+
VersionInfo,
|
51
|
+
)
|
28
52
|
|
29
53
|
|
30
54
|
@TimeIt.decorator
|
31
55
|
def generate_EAR_resource_map(
|
32
|
-
task:
|
33
|
-
loop_idx:
|
34
|
-
) ->
|
56
|
+
task: WorkflowTask,
|
57
|
+
loop_idx: LoopIndex[str, int],
|
58
|
+
) -> tuple[Sequence[ElementResources], Sequence[int], NDArray, NDArray]:
|
35
59
|
"""
|
36
60
|
Generate an integer array whose rows represent actions and columns represent task
|
37
61
|
elements and whose values index unique resources.
|
@@ -39,8 +63,8 @@ def generate_EAR_resource_map(
|
|
39
63
|
# TODO: assume single iteration for now; later we will loop over Loop tasks for each
|
40
64
|
# included task and call this func with specific loop indices
|
41
65
|
none_val = -1
|
42
|
-
resources = []
|
43
|
-
resource_hashes = []
|
66
|
+
resources: list[ElementResources] = []
|
67
|
+
resource_hashes: list[int] = []
|
44
68
|
|
45
69
|
arr_shape = (task.num_actions, task.num_elements)
|
46
70
|
resource_map = np.empty(arr_shape, dtype=int)
|
@@ -86,52 +110,53 @@ def generate_EAR_resource_map(
|
|
86
110
|
|
87
111
|
@TimeIt.decorator
|
88
112
|
def group_resource_map_into_jobscripts(
|
89
|
-
resource_map:
|
113
|
+
resource_map: ArrayLike,
|
90
114
|
none_val: Any = -1,
|
91
|
-
):
|
115
|
+
) -> tuple[list[JobScriptDescriptor], NDArray]:
|
92
116
|
"""
|
93
117
|
Convert a resource map into a plan for what elements to group together into jobscripts.
|
94
118
|
"""
|
95
|
-
|
96
|
-
resource_idx = np.unique(
|
97
|
-
jobscripts = []
|
98
|
-
allocated = np.zeros_like(
|
99
|
-
js_map = np.ones_like(
|
100
|
-
nones_bool =
|
119
|
+
resource_map_ = np.asanyarray(resource_map)
|
120
|
+
resource_idx = np.unique(resource_map_)
|
121
|
+
jobscripts: list[JobScriptDescriptor] = []
|
122
|
+
allocated = np.zeros_like(resource_map_)
|
123
|
+
js_map = np.ones_like(resource_map_, dtype=float) * np.nan
|
124
|
+
nones_bool: NDArray = resource_map_ == none_val
|
101
125
|
stop = False
|
102
|
-
for act_idx in range(
|
126
|
+
for act_idx in range(resource_map_.shape[0]):
|
103
127
|
for res_i in resource_idx:
|
104
128
|
if res_i == none_val:
|
105
129
|
continue
|
106
130
|
|
107
|
-
if res_i not in
|
131
|
+
if res_i not in resource_map_[act_idx]:
|
108
132
|
continue
|
109
133
|
|
110
|
-
|
111
|
-
diff = np.cumsum(np.abs(np.diff(
|
134
|
+
resource_map_[nones_bool] = res_i
|
135
|
+
diff = np.cumsum(np.abs(np.diff(resource_map_[act_idx:], axis=0)), axis=0)
|
112
136
|
|
113
137
|
elem_bool = np.logical_and(
|
114
|
-
|
138
|
+
resource_map_[act_idx] == res_i, allocated[act_idx] == False
|
115
139
|
)
|
116
140
|
elem_idx = np.where(elem_bool)[0]
|
117
141
|
act_elem_bool = np.logical_and(elem_bool, nones_bool[act_idx] == False)
|
118
|
-
act_elem_idx = np.where(act_elem_bool)
|
142
|
+
act_elem_idx: tuple[NDArray, ...] = np.where(act_elem_bool)
|
119
143
|
|
120
144
|
# add elements from downstream actions:
|
121
145
|
ds_bool = np.logical_and(
|
122
146
|
diff[:, elem_idx] == 0,
|
123
147
|
nones_bool[act_idx + 1 :, elem_idx] == False,
|
124
148
|
)
|
149
|
+
ds_act_idx: NDArray
|
150
|
+
ds_elem_idx: NDArray
|
125
151
|
ds_act_idx, ds_elem_idx = np.where(ds_bool)
|
126
152
|
ds_act_idx += act_idx + 1
|
127
153
|
ds_elem_idx = elem_idx[ds_elem_idx]
|
128
154
|
|
129
|
-
EARs_by_elem
|
155
|
+
EARs_by_elem: dict[int, list[int]] = {
|
156
|
+
k.item(): [act_idx] for k in act_elem_idx[0]
|
157
|
+
}
|
130
158
|
for ds_a, ds_e in zip(ds_act_idx, ds_elem_idx):
|
131
|
-
|
132
|
-
if ds_e_item not in EARs_by_elem:
|
133
|
-
EARs_by_elem[ds_e_item] = []
|
134
|
-
EARs_by_elem[ds_e_item].append(ds_a.item())
|
159
|
+
EARs_by_elem.setdefault(ds_e.item(), []).append(ds_a.item())
|
135
160
|
|
136
161
|
EARs = np.vstack([np.ones_like(act_elem_idx) * act_idx, act_elem_idx])
|
137
162
|
EARs = np.hstack([EARs, np.array([ds_act_idx, ds_elem_idx])])
|
@@ -139,7 +164,7 @@ def group_resource_map_into_jobscripts(
|
|
139
164
|
if not EARs.size:
|
140
165
|
continue
|
141
166
|
|
142
|
-
js = {
|
167
|
+
js: JobScriptDescriptor = {
|
143
168
|
"resources": res_i,
|
144
169
|
"elements": dict(sorted(EARs_by_elem.items(), key=lambda x: x[0])),
|
145
170
|
}
|
@@ -154,18 +179,21 @@ def group_resource_map_into_jobscripts(
|
|
154
179
|
if stop:
|
155
180
|
break
|
156
181
|
|
157
|
-
|
182
|
+
resource_map_[nones_bool] = none_val
|
158
183
|
|
159
184
|
return jobscripts, js_map
|
160
185
|
|
161
186
|
|
162
187
|
@TimeIt.decorator
|
163
|
-
def resolve_jobscript_dependencies(
|
188
|
+
def resolve_jobscript_dependencies(
|
189
|
+
jobscripts: Mapping[int, JobScriptCreationArguments],
|
190
|
+
element_deps: Mapping[int, Mapping[int, Sequence[int]]],
|
191
|
+
) -> Mapping[int, dict[int, ResolvedDependencies]]:
|
164
192
|
"""
|
165
193
|
Discover concrete dependencies between jobscripts.
|
166
194
|
"""
|
167
195
|
# first pass is to find the mappings between jobscript elements:
|
168
|
-
jobscript_deps = {}
|
196
|
+
jobscript_deps: dict[int, dict[int, ResolvedDependencies]] = {}
|
169
197
|
for js_idx, elem_deps in element_deps.items():
|
170
198
|
# keys of new dict are other jobscript indices on which this jobscript (js_idx)
|
171
199
|
# depends:
|
@@ -182,16 +210,12 @@ def resolve_jobscript_dependencies(jobscripts, element_deps):
|
|
182
210
|
if js_k_idx not in jobscript_deps[js_idx]:
|
183
211
|
jobscript_deps[js_idx][js_k_idx] = {"js_element_mapping": {}}
|
184
212
|
|
185
|
-
|
186
|
-
js_elem_idx_i
|
187
|
-
|
188
|
-
):
|
189
|
-
jobscript_deps[js_idx][js_k_idx]["js_element_mapping"][
|
190
|
-
js_elem_idx_i
|
191
|
-
] = []
|
213
|
+
jobscript_deps[js_idx][js_k_idx]["js_element_mapping"].setdefault(
|
214
|
+
js_elem_idx_i, []
|
215
|
+
)
|
192
216
|
|
193
217
|
# retrieve column index, which is the JS-element index:
|
194
|
-
js_elem_idx_k = np.where(
|
218
|
+
js_elem_idx_k: int = np.where(
|
195
219
|
np.any(js_k["EAR_ID"] == EAR_dep_j, axis=0)
|
196
220
|
)[0][0].item()
|
197
221
|
|
@@ -215,16 +239,16 @@ def resolve_jobscript_dependencies(jobscripts, element_deps):
|
|
215
239
|
js_i_num_js_elements = jobscripts[js_i_idx]["EAR_ID"].shape[1]
|
216
240
|
js_k_num_js_elements = jobscripts[js_k_idx]["EAR_ID"].shape[1]
|
217
241
|
|
218
|
-
is_all_i_elems = list(
|
219
|
-
|
220
|
-
)
|
242
|
+
is_all_i_elems = sorted(set(deps_j["js_element_mapping"])) == list(
|
243
|
+
range(js_i_num_js_elements)
|
244
|
+
)
|
221
245
|
|
222
246
|
is_all_k_single = set(
|
223
247
|
len(i) for i in deps_j["js_element_mapping"].values()
|
224
248
|
) == {1}
|
225
249
|
|
226
|
-
is_all_k_elems =
|
227
|
-
|
250
|
+
is_all_k_elems = sorted(
|
251
|
+
i[0] for i in deps_j["js_element_mapping"].values()
|
228
252
|
) == list(range(js_k_num_js_elements))
|
229
253
|
|
230
254
|
is_arr = is_all_i_elems and is_all_k_single and is_all_k_elems
|
@@ -233,8 +257,21 @@ def resolve_jobscript_dependencies(jobscripts, element_deps):
|
|
233
257
|
return jobscript_deps
|
234
258
|
|
235
259
|
|
260
|
+
def _reindex_dependencies(
|
261
|
+
jobscripts: Mapping[int, JobScriptCreationArguments], from_idx: int, to_idx: int
|
262
|
+
):
|
263
|
+
for ds_js_idx, ds_js in jobscripts.items():
|
264
|
+
if ds_js_idx <= from_idx:
|
265
|
+
continue
|
266
|
+
deps = ds_js["dependencies"]
|
267
|
+
if from_idx in deps:
|
268
|
+
deps[to_idx] = deps.pop(from_idx)
|
269
|
+
|
270
|
+
|
236
271
|
@TimeIt.decorator
|
237
|
-
def merge_jobscripts_across_tasks(
|
272
|
+
def merge_jobscripts_across_tasks(
|
273
|
+
jobscripts: Mapping[int, JobScriptCreationArguments]
|
274
|
+
) -> Mapping[int, JobScriptCreationArguments]:
|
238
275
|
"""Try to merge jobscripts between tasks.
|
239
276
|
|
240
277
|
This is possible if two jobscripts share the same resources and have an array
|
@@ -242,77 +279,63 @@ def merge_jobscripts_across_tasks(jobscripts: Dict) -> Dict:
|
|
242
279
|
|
243
280
|
"""
|
244
281
|
|
282
|
+
# The set of IDs of dicts that we've merged, allowing us to not keep that info in
|
283
|
+
# the dicts themselves.
|
284
|
+
merged: set[int] = set()
|
285
|
+
|
245
286
|
for js_idx, js in jobscripts.items():
|
246
287
|
# for now only attempt to merge a jobscript with a single dependency:
|
247
|
-
if len(js["dependencies"])
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
# can only merge if resources are the same and is array dependency:
|
253
|
-
if js["resource_hash"] == js_j["resource_hash"] and dep_info["is_array"]:
|
254
|
-
num_loop_idx = len(
|
255
|
-
js_j["task_loop_idx"]
|
256
|
-
) # TODO: should this be: `js_j["task_loop_idx"][0]`?
|
257
|
-
|
258
|
-
# append task_insert_IDs
|
259
|
-
js_j["task_insert_IDs"].append(js["task_insert_IDs"][0])
|
260
|
-
js_j["task_loop_idx"].append(js["task_loop_idx"][0])
|
261
|
-
|
262
|
-
add_acts = []
|
263
|
-
for t_act in js["task_actions"]:
|
264
|
-
t_act = copy.copy(t_act)
|
265
|
-
t_act[2] += num_loop_idx
|
266
|
-
add_acts.append(t_act)
|
267
|
-
|
268
|
-
js_j["task_actions"].extend(add_acts)
|
269
|
-
for k, v in js["task_elements"].items():
|
270
|
-
js_j["task_elements"][k].extend(v)
|
271
|
-
|
272
|
-
# append to elements and elements_idx list
|
273
|
-
js_j["EAR_ID"] = np.vstack((js_j["EAR_ID"], js["EAR_ID"]))
|
274
|
-
|
275
|
-
# mark this js as defunct
|
276
|
-
js["is_merged"] = True
|
277
|
-
|
278
|
-
# update dependencies of any downstream jobscripts that refer to this js
|
279
|
-
for ds_js_idx, ds_js in jobscripts.items():
|
280
|
-
if ds_js_idx <= js_idx:
|
281
|
-
continue
|
282
|
-
for dep_k_js_idx in list(ds_js["dependencies"].keys()):
|
283
|
-
if dep_k_js_idx == js_idx:
|
284
|
-
jobscripts[ds_js_idx]["dependencies"][js_j_idx] = ds_js[
|
285
|
-
"dependencies"
|
286
|
-
].pop(dep_k_js_idx)
|
288
|
+
if len(js["dependencies"]) != 1:
|
289
|
+
continue
|
290
|
+
deps = js["dependencies"]
|
291
|
+
js_j_idx, dep_info = next(iter(deps.items()))
|
292
|
+
js_j = jobscripts[js_j_idx] # the jobscript we are merging `js` into
|
287
293
|
|
288
|
-
|
289
|
-
|
294
|
+
# can only merge if resources are the same and is array dependency:
|
295
|
+
if js["resource_hash"] == js_j["resource_hash"] and dep_info["is_array"]:
|
296
|
+
num_loop_idx = len(
|
297
|
+
js_j["task_loop_idx"]
|
298
|
+
) # TODO: should this be: `js_j["task_loop_idx"][0]`?
|
299
|
+
|
300
|
+
# append task_insert_IDs
|
301
|
+
js_j["task_insert_IDs"].append(js["task_insert_IDs"][0])
|
302
|
+
js_j["task_loop_idx"].append(js["task_loop_idx"][0])
|
303
|
+
|
304
|
+
add_acts = [(a, b, num_loop_idx) for a, b, _ in js["task_actions"]]
|
305
|
+
|
306
|
+
js_j["task_actions"].extend(add_acts)
|
307
|
+
for k, v in js["task_elements"].items():
|
308
|
+
js_j["task_elements"][k].extend(v)
|
290
309
|
|
291
|
-
|
310
|
+
# append to elements and elements_idx list
|
311
|
+
js_j["EAR_ID"] = np.vstack((js_j["EAR_ID"], js["EAR_ID"]))
|
312
|
+
|
313
|
+
# mark this js as defunct
|
314
|
+
merged.add(id(js))
|
315
|
+
|
316
|
+
# update dependencies of any downstream jobscripts that refer to this js
|
317
|
+
_reindex_dependencies(jobscripts, js_idx, js_j_idx)
|
318
|
+
|
319
|
+
# remove is_merged jobscripts:
|
320
|
+
return {k: v for k, v in jobscripts.items() if id(v) not in merged}
|
292
321
|
|
293
322
|
|
294
323
|
@TimeIt.decorator
|
295
|
-
def jobscripts_to_list(
|
324
|
+
def jobscripts_to_list(
|
325
|
+
jobscripts: Mapping[int, JobScriptCreationArguments]
|
326
|
+
) -> Iterator[JobScriptCreationArguments]:
|
296
327
|
"""Convert the jobscripts dict to a list, normalising jobscript indices so they refer
|
297
328
|
to list indices; also remove `resource_hash`."""
|
298
|
-
lst = []
|
299
|
-
for js_idx, js in jobscripts.items():
|
300
|
-
new_idx = len(lst)
|
329
|
+
lst: list[JobScriptCreationArguments] = []
|
330
|
+
for new_idx, (js_idx, js) in enumerate(jobscripts.items()):
|
301
331
|
if js_idx != new_idx:
|
302
332
|
# need to reindex jobscripts that depend on this one
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
if js_idx in js_j["dependencies"]:
|
307
|
-
jobscripts[js_j_idx]["dependencies"][new_idx] = jobscripts[js_j_idx][
|
308
|
-
"dependencies"
|
309
|
-
].pop(js_idx)
|
310
|
-
del jobscripts[js_idx]["resource_hash"]
|
311
|
-
lst.append(js)
|
312
|
-
|
313
|
-
return lst
|
333
|
+
_reindex_dependencies(jobscripts, js_idx, new_idx)
|
334
|
+
del js["resource_hash"]
|
335
|
+
yield js
|
314
336
|
|
315
337
|
|
338
|
+
@hydrate
|
316
339
|
class Jobscript(JSONLike):
|
317
340
|
"""
|
318
341
|
A group of actions that are submitted together to be executed by the underlying job
|
@@ -348,7 +371,7 @@ class Jobscript(JSONLike):
|
|
348
371
|
The job ID from the scheduler, if known.
|
349
372
|
process_ID: int
|
350
373
|
The process ID of the subprocess, if known.
|
351
|
-
version_info:
|
374
|
+
version_info: dict[str, ...]
|
352
375
|
Version info about the target system.
|
353
376
|
os_name: str
|
354
377
|
The name of the OS.
|
@@ -360,42 +383,51 @@ class Jobscript(JSONLike):
|
|
360
383
|
Whether the jobscript is currently running.
|
361
384
|
"""
|
362
385
|
|
363
|
-
|
364
|
-
|
365
|
-
_workflow_app_alias = "wkflow_app"
|
386
|
+
_EAR_files_delimiter: ClassVar[str] = ":"
|
387
|
+
_workflow_app_alias: ClassVar[str] = "wkflow_app"
|
366
388
|
|
367
|
-
_child_objects = (
|
389
|
+
_child_objects: ClassVar[tuple[ChildObjectSpec, ...]] = (
|
368
390
|
ChildObjectSpec(
|
369
391
|
name="resources",
|
370
392
|
class_name="ElementResources",
|
371
393
|
),
|
372
394
|
)
|
373
395
|
|
396
|
+
@classmethod
|
397
|
+
def __is_QueuedScheduler(cls, value) -> TypeIs[QueuedScheduler]:
|
398
|
+
return isinstance(value, cls._app.QueuedScheduler)
|
399
|
+
|
374
400
|
def __init__(
|
375
401
|
self,
|
376
|
-
task_insert_IDs:
|
377
|
-
task_actions:
|
378
|
-
task_elements:
|
402
|
+
task_insert_IDs: list[int],
|
403
|
+
task_actions: list[tuple[int, int, int]],
|
404
|
+
task_elements: dict[int, list[int]],
|
379
405
|
EAR_ID: NDArray,
|
380
|
-
resources:
|
381
|
-
task_loop_idx:
|
382
|
-
dependencies:
|
383
|
-
submit_time:
|
384
|
-
submit_hostname:
|
385
|
-
submit_machine:
|
386
|
-
submit_cmdline:
|
387
|
-
scheduler_job_ID:
|
388
|
-
process_ID:
|
389
|
-
version_info:
|
390
|
-
os_name:
|
391
|
-
shell_name:
|
392
|
-
scheduler_name:
|
393
|
-
running:
|
406
|
+
resources: ElementResources,
|
407
|
+
task_loop_idx: list[dict[str, int]],
|
408
|
+
dependencies: dict[int, ResolvedDependencies],
|
409
|
+
submit_time: datetime | None = None,
|
410
|
+
submit_hostname: str | None = None,
|
411
|
+
submit_machine: str | None = None,
|
412
|
+
submit_cmdline: list[str] | None = None,
|
413
|
+
scheduler_job_ID: str | None = None,
|
414
|
+
process_ID: int | None = None,
|
415
|
+
version_info: VersionInfo | None = None,
|
416
|
+
os_name: str | None = None,
|
417
|
+
shell_name: str | None = None,
|
418
|
+
scheduler_name: str | None = None,
|
419
|
+
running: bool | None = None,
|
420
|
+
resource_hash: str | None = None,
|
421
|
+
elements: dict[int, list[int]] | None = None,
|
394
422
|
):
|
423
|
+
if resource_hash is not None:
|
424
|
+
raise AttributeError("resource_hash must not be supplied")
|
425
|
+
if elements is not None:
|
426
|
+
raise AttributeError("elements must not be supplied")
|
395
427
|
self._task_insert_IDs = task_insert_IDs
|
396
428
|
self._task_loop_idx = task_loop_idx
|
397
429
|
|
398
|
-
# [
|
430
|
+
# [ (task insert ID, action_idx, index into task_loop_idx) for each JS_ACTION_IDX ]:
|
399
431
|
self._task_actions = task_actions
|
400
432
|
|
401
433
|
# {JS_ELEMENT_IDX: [TASK_ELEMENT_IDX for each TASK_INSERT_ID] }:
|
@@ -421,15 +453,21 @@ class Jobscript(JSONLike):
|
|
421
453
|
self._shell_name = shell_name
|
422
454
|
self._scheduler_name = scheduler_name
|
423
455
|
|
424
|
-
|
425
|
-
self.
|
426
|
-
|
427
|
-
self.
|
428
|
-
|
429
|
-
self.
|
430
|
-
|
431
|
-
|
432
|
-
|
456
|
+
# assigned by parent Submission
|
457
|
+
self._submission: Submission | None = None
|
458
|
+
# assigned by parent Submission
|
459
|
+
self._index: int | None = None
|
460
|
+
# assigned on first access to `scheduler` property
|
461
|
+
self._scheduler_obj: Scheduler | None = None
|
462
|
+
# assigned on first access to `shell` property
|
463
|
+
self._shell_obj: Shell | None = None
|
464
|
+
# assigned on first access to `submit_time` property
|
465
|
+
self._submit_time_obj: datetime | None = None
|
466
|
+
self._running = running
|
467
|
+
# assigned on first access to `all_EARs` property
|
468
|
+
self._all_EARs: list[ElementActionRun] | None = None
|
469
|
+
|
470
|
+
def __repr__(self) -> str:
|
433
471
|
return (
|
434
472
|
f"{self.__class__.__name__}("
|
435
473
|
f"index={self.index!r}, "
|
@@ -439,15 +477,16 @@ class Jobscript(JSONLike):
|
|
439
477
|
f")"
|
440
478
|
)
|
441
479
|
|
442
|
-
|
443
|
-
|
480
|
+
@override
|
481
|
+
def _postprocess_to_dict(self, d: dict[str, Any]) -> dict[str, Any]:
|
482
|
+
dct = super()._postprocess_to_dict(d)
|
444
483
|
del dct["_index"]
|
445
484
|
del dct["_scheduler_obj"]
|
446
485
|
del dct["_shell_obj"]
|
447
486
|
del dct["_submit_time_obj"]
|
448
487
|
del dct["_all_EARs"]
|
449
488
|
dct = {k.lstrip("_"): v for k, v in dct.items()}
|
450
|
-
dct["EAR_ID"] = dct["EAR_ID"].tolist()
|
489
|
+
dct["EAR_ID"] = cast("NDArray", dct["EAR_ID"]).tolist()
|
451
490
|
return dct
|
452
491
|
|
453
492
|
@classmethod
|
@@ -456,52 +495,54 @@ class Jobscript(JSONLike):
|
|
456
495
|
return super().from_json_like(json_like, shared_data)
|
457
496
|
|
458
497
|
@property
|
459
|
-
def workflow_app_alias(self):
|
498
|
+
def workflow_app_alias(self) -> str:
|
460
499
|
"""
|
461
500
|
Alias for the workflow app in job scripts.
|
462
501
|
"""
|
463
502
|
return self._workflow_app_alias
|
464
503
|
|
465
|
-
def get_commands_file_name(
|
504
|
+
def get_commands_file_name(
|
505
|
+
self, js_action_idx: int | str, shell: Shell | None = None
|
506
|
+
) -> str:
|
466
507
|
"""
|
467
508
|
Get the name of a file containing commands for a particular jobscript action.
|
468
509
|
"""
|
469
|
-
return self.
|
510
|
+
return self._app.RunDirAppFiles.get_commands_file_name(
|
470
511
|
js_idx=self.index,
|
471
512
|
js_action_idx=js_action_idx,
|
472
513
|
shell=shell or self.shell,
|
473
514
|
)
|
474
515
|
|
475
516
|
@property
|
476
|
-
def task_insert_IDs(self):
|
517
|
+
def task_insert_IDs(self) -> Sequence[int]:
|
477
518
|
"""
|
478
519
|
The insertion IDs of tasks in this jobscript.
|
479
520
|
"""
|
480
521
|
return self._task_insert_IDs
|
481
522
|
|
482
523
|
@property
|
483
|
-
def task_actions(self):
|
524
|
+
def task_actions(self) -> Sequence[tuple[int, int, int]]:
|
484
525
|
"""
|
485
526
|
The IDs of actions of each task in this jobscript.
|
486
527
|
"""
|
487
528
|
return self._task_actions
|
488
529
|
|
489
530
|
@property
|
490
|
-
def task_elements(self):
|
531
|
+
def task_elements(self) -> Mapping[int, Sequence[int]]:
|
491
532
|
"""
|
492
533
|
The IDs of elements of each task in this jobscript.
|
493
534
|
"""
|
494
535
|
return self._task_elements
|
495
536
|
|
496
537
|
@property
|
497
|
-
def EAR_ID(self):
|
538
|
+
def EAR_ID(self) -> NDArray:
|
498
539
|
"""
|
499
540
|
The array of EAR IDs.
|
500
541
|
"""
|
501
542
|
return self._EAR_ID
|
502
543
|
|
503
544
|
@property
|
504
|
-
def all_EAR_IDs(self) ->
|
545
|
+
def all_EAR_IDs(self) -> Iterable[int]:
|
505
546
|
"""
|
506
547
|
The IDs of all EARs in this jobscript.
|
507
548
|
"""
|
@@ -509,7 +550,7 @@ class Jobscript(JSONLike):
|
|
509
550
|
|
510
551
|
@property
|
511
552
|
@TimeIt.decorator
|
512
|
-
def all_EARs(self) ->
|
553
|
+
def all_EARs(self) -> Sequence[ElementActionRun]:
|
513
554
|
"""
|
514
555
|
Description of EAR information for this jobscript.
|
515
556
|
"""
|
@@ -518,21 +559,21 @@ class Jobscript(JSONLike):
|
|
518
559
|
return self._all_EARs
|
519
560
|
|
520
561
|
@property
|
521
|
-
def resources(self):
|
562
|
+
def resources(self) -> ElementResources:
|
522
563
|
"""
|
523
564
|
The common resources that this jobscript requires.
|
524
565
|
"""
|
525
566
|
return self._resources
|
526
567
|
|
527
568
|
@property
|
528
|
-
def task_loop_idx(self):
|
569
|
+
def task_loop_idx(self) -> Sequence[Mapping[str, int]]:
|
529
570
|
"""
|
530
571
|
The description of where various task loops are.
|
531
572
|
"""
|
532
573
|
return self._task_loop_idx
|
533
574
|
|
534
575
|
@property
|
535
|
-
def dependencies(self):
|
576
|
+
def dependencies(self) -> Mapping[int, ResolvedDependencies]:
|
536
577
|
"""
|
537
578
|
The dependency descriptor.
|
538
579
|
"""
|
@@ -540,120 +581,114 @@ class Jobscript(JSONLike):
|
|
540
581
|
|
541
582
|
@property
|
542
583
|
@TimeIt.decorator
|
543
|
-
def start_time(self):
|
584
|
+
def start_time(self) -> None | datetime:
|
544
585
|
"""The first known start time of any EAR in this jobscript."""
|
545
586
|
if not self.is_submitted:
|
546
|
-
return
|
547
|
-
all_times = [i.start_time for i in self.all_EARs if i.start_time]
|
548
|
-
if all_times:
|
549
|
-
return min(all_times)
|
550
|
-
else:
|
551
587
|
return None
|
588
|
+
return min(
|
589
|
+
(ear.start_time for ear in self.all_EARs if ear.start_time), default=None
|
590
|
+
)
|
552
591
|
|
553
592
|
@property
|
554
593
|
@TimeIt.decorator
|
555
|
-
def end_time(self):
|
594
|
+
def end_time(self) -> None | datetime:
|
556
595
|
"""The last known end time of any EAR in this jobscript."""
|
557
596
|
if not self.is_submitted:
|
558
|
-
return
|
559
|
-
all_times = [i.end_time for i in self.all_EARs if i.end_time]
|
560
|
-
if all_times:
|
561
|
-
return max(all_times)
|
562
|
-
else:
|
563
597
|
return None
|
598
|
+
return max((ear.end_time for ear in self.all_EARs if ear.end_time), default=None)
|
564
599
|
|
565
600
|
@property
|
566
|
-
def submit_time(self):
|
601
|
+
def submit_time(self) -> datetime | None:
|
567
602
|
"""
|
568
603
|
When the jobscript was submitted, if known.
|
569
604
|
"""
|
570
|
-
if self._submit_time_obj is None and self._submit_time:
|
571
|
-
self._submit_time_obj = (
|
572
|
-
|
573
|
-
.replace(tzinfo=timezone.utc)
|
574
|
-
.astimezone()
|
605
|
+
if self._submit_time_obj is None and self._submit_time is not None:
|
606
|
+
self._submit_time_obj = parse_timestamp(
|
607
|
+
self._submit_time, self.workflow.ts_fmt
|
575
608
|
)
|
576
609
|
return self._submit_time_obj
|
577
610
|
|
578
611
|
@property
|
579
|
-
def submit_hostname(self):
|
612
|
+
def submit_hostname(self) -> str | None:
|
580
613
|
"""
|
581
614
|
Where the jobscript was submitted, if known.
|
582
615
|
"""
|
583
616
|
return self._submit_hostname
|
584
617
|
|
585
618
|
@property
|
586
|
-
def submit_machine(self):
|
619
|
+
def submit_machine(self) -> str | None:
|
587
620
|
"""
|
588
621
|
Description of what the jobscript was submitted to, if known.
|
589
622
|
"""
|
590
623
|
return self._submit_machine
|
591
624
|
|
592
625
|
@property
|
593
|
-
def submit_cmdline(self):
|
626
|
+
def submit_cmdline(self) -> list[str] | None:
|
594
627
|
"""
|
595
628
|
The command line used to do the commit, if known.
|
596
629
|
"""
|
597
630
|
return self._submit_cmdline
|
598
631
|
|
599
632
|
@property
|
600
|
-
def scheduler_job_ID(self):
|
633
|
+
def scheduler_job_ID(self) -> str | None:
|
601
634
|
"""
|
602
635
|
The job ID from the scheduler, if known.
|
603
636
|
"""
|
604
637
|
return self._scheduler_job_ID
|
605
638
|
|
606
639
|
@property
|
607
|
-
def process_ID(self):
|
640
|
+
def process_ID(self) -> int | None:
|
608
641
|
"""
|
609
642
|
The process ID from direct execution, if known.
|
610
643
|
"""
|
611
644
|
return self._process_ID
|
612
645
|
|
613
646
|
@property
|
614
|
-
def version_info(self):
|
647
|
+
def version_info(self) -> VersionInfo | None:
|
615
648
|
"""
|
616
649
|
Version information about the execution environment (OS, etc).
|
617
650
|
"""
|
618
651
|
return self._version_info
|
619
652
|
|
620
653
|
@property
|
621
|
-
def index(self):
|
654
|
+
def index(self) -> int:
|
622
655
|
"""
|
623
656
|
The index of this jobscript within its parent :py:class:`Submission`.
|
624
657
|
"""
|
658
|
+
assert self._index is not None
|
625
659
|
return self._index
|
626
660
|
|
627
661
|
@property
|
628
|
-
def submission(self):
|
662
|
+
def submission(self) -> Submission:
|
629
663
|
"""
|
630
664
|
The parent submission.
|
631
665
|
"""
|
666
|
+
assert self._submission is not None
|
632
667
|
return self._submission
|
633
668
|
|
634
669
|
@property
|
635
|
-
def workflow(self):
|
670
|
+
def workflow(self) -> Workflow:
|
636
671
|
"""
|
637
672
|
The workflow this is all on behalf of.
|
638
673
|
"""
|
639
674
|
return self.submission.workflow
|
640
675
|
|
641
676
|
@property
|
642
|
-
def num_actions(self):
|
677
|
+
def num_actions(self) -> int:
|
643
678
|
"""
|
644
679
|
The number of actions in this jobscript.
|
645
680
|
"""
|
646
681
|
return self.EAR_ID.shape[0]
|
647
682
|
|
648
683
|
@property
|
649
|
-
def num_elements(self):
|
684
|
+
def num_elements(self) -> int:
|
650
685
|
"""
|
651
686
|
The number of elements in this jobscript.
|
652
687
|
"""
|
653
688
|
return self.EAR_ID.shape[1]
|
654
689
|
|
655
690
|
@property
|
656
|
-
def is_array(self):
|
691
|
+
def is_array(self) -> bool:
|
657
692
|
"""
|
658
693
|
Whether to generate an array job.
|
659
694
|
"""
|
@@ -662,61 +697,63 @@ class Jobscript(JSONLike):
|
|
662
697
|
|
663
698
|
support_EAR_para = self.workflow._store._features.EAR_parallelism
|
664
699
|
if self.resources.use_job_array is None:
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
f"Store type {self.workflow._store!r} does not support element "
|
673
|
-
f"parallelism, so jobs cannot be submitted as scheduler arrays."
|
674
|
-
)
|
675
|
-
return self.resources.use_job_array
|
700
|
+
return self.num_elements > 1 and support_EAR_para
|
701
|
+
if self.resources.use_job_array and not support_EAR_para:
|
702
|
+
raise ValueError(
|
703
|
+
f"Store type {self.workflow._store!r} does not support element "
|
704
|
+
f"parallelism, so jobs cannot be submitted as scheduler arrays."
|
705
|
+
)
|
706
|
+
return self.resources.use_job_array
|
676
707
|
|
677
708
|
@property
|
678
|
-
def os_name(self) ->
|
709
|
+
def os_name(self) -> str:
|
679
710
|
"""
|
680
711
|
The name of the OS to use.
|
681
712
|
"""
|
682
|
-
|
713
|
+
name = self._os_name or self.resources.os_name
|
714
|
+
assert name is not None
|
715
|
+
return name
|
683
716
|
|
684
717
|
@property
|
685
|
-
def shell_name(self) ->
|
718
|
+
def shell_name(self) -> str | None:
|
686
719
|
"""
|
687
720
|
The name of the shell to use.
|
688
721
|
"""
|
689
722
|
return self._shell_name or self.resources.shell
|
690
723
|
|
691
724
|
@property
|
692
|
-
def scheduler_name(self) ->
|
725
|
+
def scheduler_name(self) -> str | None:
|
693
726
|
"""
|
694
727
|
The name of the scheduler to use.
|
695
728
|
"""
|
696
729
|
return self._scheduler_name or self.resources.scheduler
|
697
730
|
|
698
|
-
def _get_submission_os_args(self):
|
699
|
-
return {"linux_release_file": self.
|
731
|
+
def _get_submission_os_args(self) -> dict[str, str]:
|
732
|
+
return {"linux_release_file": self._app.config.linux_release_file}
|
700
733
|
|
701
|
-
def _get_submission_shell_args(self):
|
734
|
+
def _get_submission_shell_args(self) -> dict[str, Any]:
|
702
735
|
return self.resources.shell_args
|
703
736
|
|
704
|
-
def _get_submission_scheduler_args(self):
|
737
|
+
def _get_submission_scheduler_args(self) -> dict[str, Any]:
|
705
738
|
return self.resources.scheduler_args
|
706
739
|
|
707
|
-
def _get_shell(
|
740
|
+
def _get_shell(
|
741
|
+
self,
|
742
|
+
os_name: str,
|
743
|
+
shell_name: str | None,
|
744
|
+
os_args: dict[str, Any] | None = None,
|
745
|
+
shell_args: dict[str, Any] | None = None,
|
746
|
+
) -> Shell:
|
708
747
|
"""Get an arbitrary shell, not necessarily associated with submission."""
|
709
|
-
os_args = os_args or {}
|
710
|
-
shell_args = shell_args or {}
|
711
748
|
return get_shell(
|
712
749
|
shell_name=shell_name,
|
713
750
|
os_name=os_name,
|
714
|
-
os_args=os_args,
|
715
|
-
**shell_args,
|
751
|
+
os_args=os_args or {},
|
752
|
+
**(shell_args or {}),
|
716
753
|
)
|
717
754
|
|
718
755
|
@property
|
719
|
-
def shell(self):
|
756
|
+
def shell(self) -> Shell:
|
720
757
|
"""The shell for composing submission scripts."""
|
721
758
|
if self._shell_obj is None:
|
722
759
|
self._shell_obj = self._get_shell(
|
@@ -728,10 +765,11 @@ class Jobscript(JSONLike):
|
|
728
765
|
return self._shell_obj
|
729
766
|
|
730
767
|
@property
|
731
|
-
def scheduler(self):
|
768
|
+
def scheduler(self) -> Scheduler:
|
732
769
|
"""The scheduler that submissions go to from this jobscript."""
|
733
770
|
if self._scheduler_obj is None:
|
734
|
-
|
771
|
+
assert self.scheduler_name
|
772
|
+
self._scheduler_obj = self._app.get_scheduler(
|
735
773
|
scheduler_name=self.scheduler_name,
|
736
774
|
os_name=self.os_name,
|
737
775
|
scheduler_args=self._get_submission_scheduler_args(),
|
@@ -739,62 +777,62 @@ class Jobscript(JSONLike):
|
|
739
777
|
return self._scheduler_obj
|
740
778
|
|
741
779
|
@property
|
742
|
-
def EAR_ID_file_name(self):
|
780
|
+
def EAR_ID_file_name(self) -> str:
|
743
781
|
"""
|
744
782
|
The name of a file containing EAR IDs.
|
745
783
|
"""
|
746
784
|
return f"js_{self.index}_EAR_IDs.txt"
|
747
785
|
|
748
786
|
@property
|
749
|
-
def element_run_dir_file_name(self):
|
787
|
+
def element_run_dir_file_name(self) -> str:
|
750
788
|
"""
|
751
789
|
The name of a file containing run directory names.
|
752
790
|
"""
|
753
791
|
return f"js_{self.index}_run_dirs.txt"
|
754
792
|
|
755
793
|
@property
|
756
|
-
def direct_stdout_file_name(self):
|
794
|
+
def direct_stdout_file_name(self) -> str:
|
757
795
|
"""File for direct execution stdout."""
|
758
796
|
return f"js_{self.index}_stdout.log"
|
759
797
|
|
760
798
|
@property
|
761
|
-
def direct_stderr_file_name(self):
|
799
|
+
def direct_stderr_file_name(self) -> str:
|
762
800
|
"""File for direct execution stderr."""
|
763
801
|
return f"js_{self.index}_stderr.log"
|
764
802
|
|
765
803
|
@property
|
766
|
-
def direct_win_pid_file_name(self):
|
804
|
+
def direct_win_pid_file_name(self) -> str:
|
767
805
|
"""File for holding the direct execution PID."""
|
768
806
|
return f"js_{self.index}_pid.txt"
|
769
807
|
|
770
808
|
@property
|
771
|
-
def jobscript_name(self):
|
809
|
+
def jobscript_name(self) -> str:
|
772
810
|
"""The name of the jobscript file."""
|
773
811
|
return f"js_{self.index}{self.shell.JS_EXT}"
|
774
812
|
|
775
813
|
@property
|
776
|
-
def EAR_ID_file_path(self):
|
814
|
+
def EAR_ID_file_path(self) -> Path:
|
777
815
|
"""
|
778
816
|
The path to the file containing EAR IDs for this jobscript.
|
779
817
|
"""
|
780
818
|
return self.submission.path / self.EAR_ID_file_name
|
781
819
|
|
782
820
|
@property
|
783
|
-
def element_run_dir_file_path(self):
|
821
|
+
def element_run_dir_file_path(self) -> Path:
|
784
822
|
"""
|
785
823
|
The path to the file containing run directory names for this jobscript.
|
786
824
|
"""
|
787
825
|
return self.submission.path / self.element_run_dir_file_name
|
788
826
|
|
789
827
|
@property
|
790
|
-
def jobscript_path(self):
|
828
|
+
def jobscript_path(self) -> Path:
|
791
829
|
"""
|
792
830
|
The path to the file containing the jobscript file.
|
793
831
|
"""
|
794
832
|
return self.submission.path / self.jobscript_name
|
795
833
|
|
796
834
|
@property
|
797
|
-
def direct_stdout_path(self):
|
835
|
+
def direct_stdout_path(self) -> Path:
|
798
836
|
"""
|
799
837
|
The path to the file containing the stdout from directly executed commands
|
800
838
|
for this jobscript.
|
@@ -802,7 +840,7 @@ class Jobscript(JSONLike):
|
|
802
840
|
return self.submission.path / self.direct_stdout_file_name
|
803
841
|
|
804
842
|
@property
|
805
|
-
def direct_stderr_path(self):
|
843
|
+
def direct_stderr_path(self) -> Path:
|
806
844
|
"""
|
807
845
|
The path to the file containing the stderr from directly executed commands
|
808
846
|
for this jobscript.
|
@@ -810,7 +848,7 @@ class Jobscript(JSONLike):
|
|
810
848
|
return self.submission.path / self.direct_stderr_file_name
|
811
849
|
|
812
850
|
@property
|
813
|
-
def direct_win_pid_file_path(self):
|
851
|
+
def direct_win_pid_file_path(self) -> Path:
|
814
852
|
"""
|
815
853
|
The path to the file containing PIDs for directly executed commands for this
|
816
854
|
jobscript. Windows only.
|
@@ -818,12 +856,11 @@ class Jobscript(JSONLike):
|
|
818
856
|
return self.submission.path / self.direct_win_pid_file_name
|
819
857
|
|
820
858
|
def _set_submit_time(self, submit_time: datetime) -> None:
|
821
|
-
submit_time = submit_time.strftime(self.workflow.ts_fmt)
|
822
859
|
self._submit_time = submit_time
|
823
860
|
self.workflow._store.set_jobscript_metadata(
|
824
861
|
sub_idx=self.submission.index,
|
825
862
|
js_idx=self.index,
|
826
|
-
submit_time=submit_time,
|
863
|
+
submit_time=submit_time.strftime(self.workflow.ts_fmt),
|
827
864
|
)
|
828
865
|
|
829
866
|
def _set_submit_hostname(self, submit_hostname: str) -> None:
|
@@ -842,7 +879,7 @@ class Jobscript(JSONLike):
|
|
842
879
|
submit_machine=submit_machine,
|
843
880
|
)
|
844
881
|
|
845
|
-
def _set_submit_cmdline(self, submit_cmdline:
|
882
|
+
def _set_submit_cmdline(self, submit_cmdline: list[str]) -> None:
|
846
883
|
self._submit_cmdline = submit_cmdline
|
847
884
|
self.workflow._store.set_jobscript_metadata(
|
848
885
|
sub_idx=self.submission.index,
|
@@ -859,7 +896,7 @@ class Jobscript(JSONLike):
|
|
859
896
|
scheduler_job_ID=job_ID,
|
860
897
|
)
|
861
898
|
|
862
|
-
def _set_process_ID(self, process_ID:
|
899
|
+
def _set_process_ID(self, process_ID: int) -> None:
|
863
900
|
"""For direct submission only."""
|
864
901
|
self._process_ID = process_ID
|
865
902
|
self.workflow._store.set_jobscript_metadata(
|
@@ -868,7 +905,7 @@ class Jobscript(JSONLike):
|
|
868
905
|
process_ID=process_ID,
|
869
906
|
)
|
870
907
|
|
871
|
-
def _set_version_info(self, version_info:
|
908
|
+
def _set_version_info(self, version_info: VersionInfo) -> None:
|
872
909
|
self._version_info = version_info
|
873
910
|
self.workflow._store.set_jobscript_metadata(
|
874
911
|
sub_idx=self.submission.index,
|
@@ -904,7 +941,7 @@ class Jobscript(JSONLike):
|
|
904
941
|
scheduler_name=self._scheduler_name,
|
905
942
|
)
|
906
943
|
|
907
|
-
def get_task_loop_idx_array(self):
|
944
|
+
def get_task_loop_idx_array(self) -> NDArray:
|
908
945
|
"""
|
909
946
|
Get an array of task loop indices.
|
910
947
|
"""
|
@@ -929,7 +966,7 @@ class Jobscript(JSONLike):
|
|
929
966
|
)
|
930
967
|
|
931
968
|
@TimeIt.decorator
|
932
|
-
def write_element_run_dir_file(self, run_dirs:
|
969
|
+
def write_element_run_dir_file(self, run_dirs: list[list[Path]]):
|
933
970
|
"""Write a text file with `num_elements` lines and `num_actions` delimited tokens
|
934
971
|
per line, representing the working directory for each EAR.
|
935
972
|
|
@@ -938,12 +975,12 @@ class Jobscript(JSONLike):
|
|
938
975
|
the directory for each jobscript-element/jobscript-action combination.
|
939
976
|
|
940
977
|
"""
|
941
|
-
|
978
|
+
run_dirs_paths = self.shell.prepare_element_run_dirs(run_dirs)
|
942
979
|
with self.element_run_dir_file_path.open(mode="wt", newline="\n") as fp:
|
943
980
|
# can't specify "open" newline if we pass the file name only, so pass handle:
|
944
981
|
np.savetxt(
|
945
982
|
fname=fp,
|
946
|
-
X=np.array(
|
983
|
+
X=np.array(run_dirs_paths),
|
947
984
|
fmt="%s",
|
948
985
|
delimiter=self._EAR_files_delimiter,
|
949
986
|
)
|
@@ -951,13 +988,13 @@ class Jobscript(JSONLike):
|
|
951
988
|
@TimeIt.decorator
|
952
989
|
def compose_jobscript(
|
953
990
|
self,
|
954
|
-
deps:
|
955
|
-
os_name: str = None,
|
956
|
-
shell_name: str = None,
|
957
|
-
os_args:
|
958
|
-
shell_args:
|
959
|
-
scheduler_name:
|
960
|
-
scheduler_args:
|
991
|
+
deps: dict[int, tuple[str, bool]] | None = None,
|
992
|
+
os_name: str | None = None,
|
993
|
+
shell_name: str | None = None,
|
994
|
+
os_args: dict[str, Any] | None = None,
|
995
|
+
shell_args: dict[str, Any] | None = None,
|
996
|
+
scheduler_name: str | None = None,
|
997
|
+
scheduler_args: dict[str, Any] | None = None,
|
961
998
|
) -> str:
|
962
999
|
"""Prepare the jobscript file string."""
|
963
1000
|
|
@@ -976,6 +1013,8 @@ class Jobscript(JSONLike):
|
|
976
1013
|
f"`shell_name` as a method argument to compose the jobscript for a given "
|
977
1014
|
f"`shell_name`."
|
978
1015
|
)
|
1016
|
+
if not scheduler_name:
|
1017
|
+
scheduler_name = self._app.config.default_scheduler
|
979
1018
|
|
980
1019
|
shell = self._get_shell(
|
981
1020
|
os_name=os_name,
|
@@ -983,33 +1022,29 @@ class Jobscript(JSONLike):
|
|
983
1022
|
os_args=os_args or self._get_submission_os_args(),
|
984
1023
|
shell_args=shell_args or self._get_submission_shell_args(),
|
985
1024
|
)
|
986
|
-
scheduler = self.
|
1025
|
+
scheduler = self._app.get_scheduler(
|
987
1026
|
scheduler_name=scheduler_name,
|
988
1027
|
os_name=os_name,
|
989
1028
|
scheduler_args=scheduler_args or self._get_submission_scheduler_args(),
|
990
1029
|
)
|
991
1030
|
|
992
|
-
cfg_invocation = self.
|
993
|
-
|
994
|
-
|
1031
|
+
cfg_invocation = self._app.config._file.get_invocation(
|
1032
|
+
self._app.config._config_key
|
1033
|
+
)
|
1034
|
+
if env_setup := cfg_invocation["environment_setup"]:
|
995
1035
|
env_setup = indent(env_setup.strip(), shell.JS_ENV_SETUP_INDENT)
|
996
1036
|
env_setup += "\n\n" + shell.JS_ENV_SETUP_INDENT
|
997
1037
|
else:
|
998
1038
|
env_setup = shell.JS_ENV_SETUP_INDENT
|
999
1039
|
|
1000
|
-
is_scheduled = True
|
1001
|
-
if not isinstance(scheduler, Scheduler):
|
1002
|
-
is_scheduled = False
|
1003
|
-
|
1004
|
-
app_invoc = list(self.app.run_time_info.invocation_command)
|
1005
1040
|
header_args = shell.process_JS_header_args(
|
1006
1041
|
{
|
1007
1042
|
"workflow_app_alias": self.workflow_app_alias,
|
1008
1043
|
"env_setup": env_setup,
|
1009
|
-
"app_invoc":
|
1010
|
-
"run_log_file": self.
|
1011
|
-
"config_dir": str(self.
|
1012
|
-
"config_invoc_key": self.
|
1044
|
+
"app_invoc": list(self._app.run_time_info.invocation_command),
|
1045
|
+
"run_log_file": self._app.RunDirAppFiles.get_log_file_name(),
|
1046
|
+
"config_dir": str(self._app.config.config_directory),
|
1047
|
+
"config_invoc_key": self._app.config.config_key,
|
1013
1048
|
"workflow_path": self.workflow.path,
|
1014
1049
|
"sub_idx": self.submission.index,
|
1015
1050
|
"js_idx": self.index,
|
@@ -1024,7 +1059,7 @@ class Jobscript(JSONLike):
|
|
1024
1059
|
)
|
1025
1060
|
header = shell.JS_HEADER.format(**header_args)
|
1026
1061
|
|
1027
|
-
if
|
1062
|
+
if self.__is_QueuedScheduler(scheduler):
|
1028
1063
|
header = shell.JS_SCHEDULER_HEADER.format(
|
1029
1064
|
shebang=shebang,
|
1030
1065
|
scheduler_options=scheduler.format_options(
|
@@ -1036,11 +1071,12 @@ class Jobscript(JSONLike):
|
|
1036
1071
|
header=header,
|
1037
1072
|
)
|
1038
1073
|
else:
|
1039
|
-
# the
|
1074
|
+
# the Scheduler (direct submission)
|
1075
|
+
assert isinstance(scheduler, DirectScheduler)
|
1040
1076
|
wait_cmd = shell.get_wait_command(
|
1041
1077
|
workflow_app_alias=self.workflow_app_alias,
|
1042
1078
|
sub_idx=self.submission.index,
|
1043
|
-
deps=deps,
|
1079
|
+
deps=deps or {},
|
1044
1080
|
)
|
1045
1081
|
header = shell.JS_DIRECT_HEADER.format(
|
1046
1082
|
shebang=shebang,
|
@@ -1054,12 +1090,14 @@ class Jobscript(JSONLike):
|
|
1054
1090
|
EAR_files_delimiter=self._EAR_files_delimiter,
|
1055
1091
|
workflow_app_alias=self.workflow_app_alias,
|
1056
1092
|
commands_file_name=self.get_commands_file_name(r"${JS_act_idx}", shell=shell),
|
1057
|
-
run_stream_file=self.
|
1093
|
+
run_stream_file=self._app.RunDirAppFiles.get_std_file_name(),
|
1058
1094
|
)
|
1059
1095
|
|
1060
1096
|
out = header
|
1061
1097
|
|
1062
1098
|
if self.is_array:
|
1099
|
+
if not self.__is_QueuedScheduler(scheduler):
|
1100
|
+
raise Exception("can only schedule arrays of jobs to a queue")
|
1063
1101
|
out += shell.JS_ELEMENT_ARRAY.format(
|
1064
1102
|
scheduler_command=scheduler.js_cmd,
|
1065
1103
|
scheduler_array_switch=scheduler.array_switch,
|
@@ -1079,14 +1117,14 @@ class Jobscript(JSONLike):
|
|
1079
1117
|
@TimeIt.decorator
|
1080
1118
|
def write_jobscript(
|
1081
1119
|
self,
|
1082
|
-
os_name: str = None,
|
1083
|
-
shell_name: str = None,
|
1084
|
-
deps:
|
1085
|
-
os_args:
|
1086
|
-
shell_args:
|
1087
|
-
scheduler_name:
|
1088
|
-
scheduler_args:
|
1089
|
-
):
|
1120
|
+
os_name: str | None = None,
|
1121
|
+
shell_name: str | None = None,
|
1122
|
+
deps: dict[int, tuple[str, bool]] | None = None,
|
1123
|
+
os_args: dict[str, Any] | None = None,
|
1124
|
+
shell_args: dict[str, Any] | None = None,
|
1125
|
+
scheduler_name: str | None = None,
|
1126
|
+
scheduler_args: dict[str, Any] | None = None,
|
1127
|
+
) -> Path:
|
1090
1128
|
"""
|
1091
1129
|
Write the jobscript to its file.
|
1092
1130
|
"""
|
@@ -1103,56 +1141,54 @@ class Jobscript(JSONLike):
|
|
1103
1141
|
fp.write(js_str)
|
1104
1142
|
return self.jobscript_path
|
1105
1143
|
|
1106
|
-
def _get_EARs_arr(self):
|
1107
|
-
EARs_arr = np.array(self.all_EARs).reshape(self.EAR_ID.shape)
|
1108
|
-
return EARs_arr
|
1109
|
-
|
1110
1144
|
@TimeIt.decorator
|
1111
|
-
def make_artifact_dirs(self):
|
1145
|
+
def make_artifact_dirs(self) -> list[list[Path]]:
|
1112
1146
|
"""
|
1113
1147
|
Create the directories that will hold artifacts associated with this jobscript.
|
1114
1148
|
"""
|
1115
|
-
EARs_arr = self.
|
1149
|
+
EARs_arr = np.array(self.all_EARs).reshape(self.EAR_ID.shape)
|
1116
1150
|
task_loop_idx_arr = self.get_task_loop_idx_array()
|
1117
1151
|
|
1118
|
-
|
1119
|
-
|
1120
|
-
|
1121
|
-
|
1122
|
-
|
1123
|
-
|
1124
|
-
|
1125
|
-
r_idx = EAR_i.index
|
1126
|
-
|
1127
|
-
loop_idx_i = self.task_loop_idx[l_idx]
|
1128
|
-
task_dir = self.workflow.tasks.get(insert_ID=t_iID).get_dir_name(
|
1129
|
-
loop_idx_i
|
1152
|
+
return [
|
1153
|
+
[
|
1154
|
+
self.__make_action_dir(
|
1155
|
+
EARs_arr[js_act_idx, js_elem_idx],
|
1156
|
+
task_loop_idx_arr[js_act_idx, js_elem_idx].item(),
|
1157
|
+
js_act_idx,
|
1158
|
+
js_elem_idx,
|
1130
1159
|
)
|
1131
|
-
|
1132
|
-
|
1133
|
-
|
1134
|
-
|
1135
|
-
EAR_dir.mkdir(exist_ok=True, parents=True)
|
1136
|
-
|
1137
|
-
# copy (TODO: optionally symlink) any input files:
|
1138
|
-
for name, path in EAR_i.get("input_files", {}).items():
|
1139
|
-
if path:
|
1140
|
-
shutil.copy(path, EAR_dir)
|
1160
|
+
for js_act_idx in range(self.num_actions)
|
1161
|
+
]
|
1162
|
+
for js_elem_idx in range(self.num_elements)
|
1163
|
+
]
|
1141
1164
|
|
1142
|
-
|
1165
|
+
def __make_action_dir(
|
1166
|
+
self, EAR_i: ElementActionRun, l_idx: int, js_act_idx: int, js_elem_idx: int
|
1167
|
+
) -> Path:
|
1168
|
+
t_iID = EAR_i.task.insert_ID
|
1169
|
+
r_idx = EAR_i.index
|
1170
|
+
loop_idx_i = self.task_loop_idx[l_idx]
|
1171
|
+
task_dir = self.workflow.tasks.get(insert_ID=t_iID).get_dir_name(loop_idx_i)
|
1172
|
+
elem_dir = EAR_i.element.dir_name
|
1143
1173
|
|
1144
|
-
|
1174
|
+
EAR_dir = self.workflow.execution_path / task_dir / elem_dir / f"r_{r_idx}"
|
1175
|
+
EAR_dir.mkdir(exist_ok=True, parents=True)
|
1145
1176
|
|
1146
|
-
|
1177
|
+
# copy (TODO: optionally symlink) any input files:
|
1178
|
+
for path in cast("dict[Any, str]", EAR_i.get("input_files", {})).values():
|
1179
|
+
if path:
|
1180
|
+
shutil.copy(path, EAR_dir)
|
1181
|
+
return EAR_dir.relative_to(self.workflow.path)
|
1147
1182
|
|
1148
1183
|
@TimeIt.decorator
|
1149
|
-
def _launch_direct_js_win(self):
|
1184
|
+
def _launch_direct_js_win(self) -> int:
|
1150
1185
|
# this is a "trick" to ensure we always get a fully detached new process (with no
|
1151
1186
|
# parent); the `powershell.exe -Command` process exits after running the inner
|
1152
1187
|
# `Start-Process`, which is where the jobscript is actually invoked. I could not
|
1153
1188
|
# find a way using `subprocess.Popen()` to ensure the new process was fully
|
1154
1189
|
# detached when submitting jobscripts via a Jupyter notebook in Windows.
|
1155
1190
|
|
1191
|
+
assert self.submit_cmdline is not None
|
1156
1192
|
# Note we need powershell.exe for this "launcher process", but the shell used for
|
1157
1193
|
# the jobscript itself need not be powershell.exe
|
1158
1194
|
exe_path, arg_list = self.submit_cmdline[0], self.submit_cmdline[1:]
|
@@ -1165,60 +1201,76 @@ class Jobscript(JSONLike):
|
|
1165
1201
|
args = [
|
1166
1202
|
"powershell.exe",
|
1167
1203
|
"-Command",
|
1168
|
-
|
1169
|
-
|
1170
|
-
|
1171
|
-
|
1172
|
-
|
1173
|
-
|
1174
|
-
|
1175
|
-
f'Set-Content -Path "{self.direct_win_pid_file_path}" -Value $JS_proc.Id'
|
1176
|
-
),
|
1204
|
+
f"$JS_proc = Start-Process "
|
1205
|
+
f'-Passthru -NoNewWindow -FilePath "{exe_path}" '
|
1206
|
+
f'-RedirectStandardOutput "{self.direct_stdout_path}" '
|
1207
|
+
f'-RedirectStandardError "{self.direct_stderr_path}" '
|
1208
|
+
f'-WorkingDirectory "{self.workflow.path}" '
|
1209
|
+
f"-ArgumentList {arg_list_str}; "
|
1210
|
+
f'Set-Content -Path "{self.direct_win_pid_file_path}" -Value $JS_proc.Id',
|
1177
1211
|
]
|
1178
1212
|
|
1179
|
-
self.
|
1213
|
+
self._app.submission_logger.info(
|
1180
1214
|
f"running direct Windows jobscript launcher process: {args!r}"
|
1181
1215
|
)
|
1182
1216
|
# for some reason we still need to create a "detached" process here as well:
|
1183
1217
|
init_proc = subprocess.Popen(
|
1184
1218
|
args=args,
|
1185
|
-
cwd=
|
1186
|
-
creationflags=subprocess
|
1219
|
+
cwd=self.workflow.path,
|
1220
|
+
creationflags=getattr(subprocess, "CREATE_NO_WINDOW", 0),
|
1187
1221
|
)
|
1188
1222
|
init_proc.wait() # wait for the process ID file to be written
|
1189
|
-
|
1190
|
-
return process_ID
|
1223
|
+
return int(self.direct_win_pid_file_path.read_text())
|
1191
1224
|
|
1192
1225
|
@TimeIt.decorator
|
1193
1226
|
def _launch_direct_js_posix(self) -> int:
|
1194
1227
|
# direct submission; submit jobscript asynchronously:
|
1195
1228
|
# detached process, avoid interrupt signals propagating to the subprocess:
|
1196
|
-
|
1197
|
-
|
1198
|
-
|
1199
|
-
|
1200
|
-
|
1201
|
-
|
1202
|
-
|
1203
|
-
|
1204
|
-
|
1205
|
-
|
1206
|
-
|
1229
|
+
assert self.submit_cmdline is not None
|
1230
|
+
with self.direct_stdout_path.open(
|
1231
|
+
"wt"
|
1232
|
+
) as fp_stdout, self.direct_stderr_path.open("wt") as fp_stderr:
|
1233
|
+
# note: Popen copies the file objects, so this works!
|
1234
|
+
proc = subprocess.Popen(
|
1235
|
+
args=self.submit_cmdline,
|
1236
|
+
stdout=fp_stdout,
|
1237
|
+
stderr=fp_stderr,
|
1238
|
+
cwd=self.workflow.path,
|
1239
|
+
start_new_session=True,
|
1240
|
+
)
|
1241
|
+
return proc.pid
|
1207
1242
|
|
1208
|
-
|
1243
|
+
@TimeIt.decorator
|
1244
|
+
def _launch_queued(
|
1245
|
+
self, submit_cmd: list[str], print_stdout: bool
|
1246
|
+
) -> tuple[str, str]:
|
1247
|
+
# scheduled submission, wait for submission so we can parse the job ID:
|
1248
|
+
proc = subprocess.run(
|
1249
|
+
args=submit_cmd,
|
1250
|
+
stdout=subprocess.PIPE,
|
1251
|
+
stderr=subprocess.PIPE,
|
1252
|
+
cwd=self.workflow.path,
|
1253
|
+
)
|
1254
|
+
stdout = proc.stdout.decode().strip()
|
1255
|
+
stderr = proc.stderr.decode().strip()
|
1256
|
+
if print_stdout and stdout:
|
1257
|
+
print(stdout)
|
1258
|
+
if stderr:
|
1259
|
+
print(stderr)
|
1260
|
+
return stdout, stderr
|
1209
1261
|
|
1210
1262
|
@TimeIt.decorator
|
1211
1263
|
def submit(
|
1212
1264
|
self,
|
1213
|
-
scheduler_refs:
|
1214
|
-
print_stdout:
|
1265
|
+
scheduler_refs: dict[int, tuple[str, bool]],
|
1266
|
+
print_stdout: bool = False,
|
1215
1267
|
) -> str:
|
1216
1268
|
"""
|
1217
1269
|
Submit the jobscript to the scheduler.
|
1218
1270
|
"""
|
1219
1271
|
# map each dependency jobscript index to the JS ref (job/process ID) and if the
|
1220
1272
|
# dependency is an array dependency:
|
1221
|
-
deps = {}
|
1273
|
+
deps: dict[int, tuple[str, bool]] = {}
|
1222
1274
|
for js_idx, deps_i in self.dependencies.items():
|
1223
1275
|
dep_js_ref, dep_js_is_arr = scheduler_refs[js_idx]
|
1224
1276
|
# only submit an array dependency if both this jobscript and the dependency
|
@@ -1236,93 +1288,77 @@ class Jobscript(JSONLike):
|
|
1236
1288
|
run_dirs = self.make_artifact_dirs()
|
1237
1289
|
self.write_EAR_ID_file()
|
1238
1290
|
self.write_element_run_dir_file(run_dirs)
|
1239
|
-
js_path = self.write_jobscript(deps=deps)
|
1240
|
-
js_path = self.shell.prepare_JS_path(js_path)
|
1291
|
+
js_path = self.shell.prepare_JS_path(self.write_jobscript(deps=deps))
|
1241
1292
|
submit_cmd = self.scheduler.get_submit_command(self.shell, js_path, deps)
|
1242
|
-
self.
|
1293
|
+
self._app.submission_logger.info(
|
1243
1294
|
f"submitting jobscript {self.index!r} with command: {submit_cmd!r}"
|
1244
1295
|
)
|
1245
1296
|
self._set_submit_cmdline(submit_cmd)
|
1246
1297
|
self._set_submit_hostname(socket.gethostname())
|
1247
|
-
self._set_submit_machine(self.
|
1298
|
+
self._set_submit_machine(self._app.config.get("machine"))
|
1248
1299
|
|
1249
|
-
err_args = {
|
1300
|
+
err_args: JobscriptSubmissionFailureArgs = {
|
1301
|
+
"submit_cmd": submit_cmd,
|
1250
1302
|
"js_idx": self.index,
|
1251
1303
|
"js_path": js_path,
|
1252
|
-
"subprocess_exc": None,
|
1253
|
-
"job_ID_parse_exc": None,
|
1254
1304
|
}
|
1255
|
-
|
1256
|
-
|
1257
|
-
process_ID = None
|
1305
|
+
job_ID: str | None = None
|
1306
|
+
process_ID: int | None = None
|
1258
1307
|
try:
|
1259
|
-
if
|
1308
|
+
if self.__is_QueuedScheduler(self.scheduler):
|
1260
1309
|
# scheduled submission, wait for submission so we can parse the job ID:
|
1261
|
-
|
1262
|
-
args=submit_cmd,
|
1263
|
-
stdout=subprocess.PIPE,
|
1264
|
-
stderr=subprocess.PIPE,
|
1265
|
-
cwd=str(self.workflow.path),
|
1266
|
-
)
|
1267
|
-
stdout = proc.stdout.decode().strip()
|
1268
|
-
stderr = proc.stderr.decode().strip()
|
1310
|
+
stdout, stderr = self._launch_queued(submit_cmd, print_stdout)
|
1269
1311
|
err_args["stdout"] = stdout
|
1270
1312
|
err_args["stderr"] = stderr
|
1271
|
-
if print_stdout and stdout:
|
1272
|
-
print(stdout)
|
1273
|
-
if stderr:
|
1274
|
-
print(stderr)
|
1275
1313
|
else:
|
1276
1314
|
if os.name == "nt":
|
1277
1315
|
process_ID = self._launch_direct_js_win()
|
1278
1316
|
else:
|
1279
1317
|
process_ID = self._launch_direct_js_posix()
|
1280
|
-
|
1281
1318
|
except Exception as subprocess_exc:
|
1282
|
-
err_args["message"] = f"Failed to execute submit command."
|
1283
|
-
err_args["submit_cmd"] = submit_cmd
|
1284
|
-
err_args["stdout"] = None
|
1285
|
-
err_args["stderr"] = None
|
1286
1319
|
err_args["subprocess_exc"] = subprocess_exc
|
1287
|
-
raise JobscriptSubmissionFailure(
|
1320
|
+
raise JobscriptSubmissionFailure(
|
1321
|
+
"Failed to execute submit command.", **err_args
|
1322
|
+
)
|
1288
1323
|
|
1289
|
-
if
|
1324
|
+
if self.__is_QueuedScheduler(self.scheduler):
|
1290
1325
|
# scheduled submission
|
1291
1326
|
if stderr:
|
1292
|
-
|
1293
|
-
|
1294
|
-
|
1327
|
+
raise JobscriptSubmissionFailure(
|
1328
|
+
"Non-empty stderr from submit command.", **err_args
|
1329
|
+
)
|
1295
1330
|
|
1296
1331
|
try:
|
1297
1332
|
job_ID = self.scheduler.parse_submission_output(stdout)
|
1298
|
-
|
1333
|
+
assert job_ID is not None
|
1299
1334
|
except Exception as job_ID_parse_exc:
|
1300
1335
|
# TODO: maybe handle this differently. If there is no stderr, then the job
|
1301
1336
|
# probably did submit fine, but the issue is just with parsing the job ID
|
1302
1337
|
# (e.g. if the scheduler version was updated and it now outputs
|
1303
1338
|
# differently).
|
1304
|
-
err_args["message"] = "Failed to parse job ID from stdout."
|
1305
|
-
err_args["submit_cmd"] = submit_cmd
|
1306
1339
|
err_args["job_ID_parse_exc"] = job_ID_parse_exc
|
1307
|
-
raise JobscriptSubmissionFailure(
|
1340
|
+
raise JobscriptSubmissionFailure(
|
1341
|
+
"Failed to parse job ID from stdout.", **err_args
|
1342
|
+
)
|
1308
1343
|
|
1309
1344
|
self._set_scheduler_job_ID(job_ID)
|
1310
1345
|
ref = job_ID
|
1311
1346
|
|
1312
1347
|
else:
|
1313
1348
|
# direct submission
|
1349
|
+
assert process_ID is not None
|
1314
1350
|
self._set_process_ID(process_ID)
|
1315
1351
|
# a downstream direct jobscript might need to wait for this jobscript, which
|
1316
1352
|
# means this jobscript's process ID must be committed:
|
1317
1353
|
self.workflow._store._pending.commit_all()
|
1318
|
-
ref = process_ID
|
1354
|
+
ref = f"{process_ID}"
|
1319
1355
|
|
1320
|
-
self._set_submit_time(
|
1356
|
+
self._set_submit_time(current_timestamp())
|
1321
1357
|
|
1322
1358
|
return ref
|
1323
1359
|
|
1324
1360
|
@property
|
1325
|
-
def is_submitted(self):
|
1361
|
+
def is_submitted(self) -> bool:
|
1326
1362
|
"""Whether this jobscript has been submitted."""
|
1327
1363
|
return self.index in self.submission.submitted_jobscripts
|
1328
1364
|
|
@@ -1331,77 +1367,81 @@ class Jobscript(JSONLike):
|
|
1331
1367
|
"""
|
1332
1368
|
The reference to the submitted job for the jobscript.
|
1333
1369
|
"""
|
1334
|
-
if
|
1370
|
+
if self.__is_QueuedScheduler(self.scheduler):
|
1335
1371
|
return self.scheduler_job_ID
|
1336
1372
|
else:
|
1337
1373
|
return (self.process_ID, self.submit_cmdline)
|
1338
1374
|
|
1339
1375
|
@property
|
1340
|
-
def scheduler_ref(self):
|
1376
|
+
def scheduler_ref(self) -> SchedulerRef:
|
1341
1377
|
"""
|
1342
1378
|
The generalised scheduler reference descriptor.
|
1343
1379
|
"""
|
1344
|
-
|
1345
|
-
|
1346
|
-
|
1347
|
-
|
1380
|
+
return {"js_refs": [self.scheduler_js_ref], "num_js_elements": self.num_elements}
|
1381
|
+
|
1382
|
+
@overload
|
1383
|
+
def get_active_states(
|
1384
|
+
self, as_json: Literal[False] = False
|
1385
|
+
) -> Mapping[int, JobscriptElementState]:
|
1386
|
+
...
|
1387
|
+
|
1388
|
+
@overload
|
1389
|
+
def get_active_states(self, as_json: Literal[True]) -> dict[int, str]:
|
1390
|
+
...
|
1348
1391
|
|
1349
1392
|
@TimeIt.decorator
|
1350
1393
|
def get_active_states(
|
1351
1394
|
self, as_json: bool = False
|
1352
|
-
) ->
|
1395
|
+
) -> Mapping[int, JobscriptElementState] | dict[int, str]:
|
1353
1396
|
"""If this jobscript is active on this machine, return the state information from
|
1354
1397
|
the scheduler."""
|
1355
1398
|
|
1356
|
-
|
1357
|
-
out = {}
|
1399
|
+
out: dict[int, JobscriptElementState] = {}
|
1358
1400
|
|
1359
|
-
|
1360
|
-
self.
|
1401
|
+
if self.is_submitted:
|
1402
|
+
self._app.submission_logger.debug(
|
1361
1403
|
"checking if the jobscript is running according to EAR submission "
|
1362
1404
|
"states."
|
1363
1405
|
)
|
1364
1406
|
|
1365
1407
|
not_run_states = EARStatus.get_non_running_submitted_states()
|
1366
|
-
all_EAR_states = set(
|
1367
|
-
self.
|
1408
|
+
all_EAR_states = set(ear.status for ear in self.all_EARs)
|
1409
|
+
self._app.submission_logger.debug(
|
1410
|
+
f"Unique EAR states are: {all_EAR_states!r}"
|
1411
|
+
)
|
1368
1412
|
if all_EAR_states.issubset(not_run_states):
|
1369
|
-
self.
|
1370
|
-
|
1413
|
+
self._app.submission_logger.debug(
|
1414
|
+
"All jobscript EARs are in a non-running state"
|
1371
1415
|
)
|
1372
|
-
out = {}
|
1373
1416
|
|
1374
|
-
elif self.
|
1375
|
-
self.
|
1417
|
+
elif self._app.config.get("machine") == self.submit_machine:
|
1418
|
+
self._app.submission_logger.debug(
|
1376
1419
|
"Checking if jobscript is running according to the scheduler/process "
|
1377
1420
|
"ID."
|
1378
1421
|
)
|
1379
|
-
|
1380
|
-
if
|
1381
|
-
|
1422
|
+
out_d = self.scheduler.get_job_state_info(**self.scheduler_ref)
|
1423
|
+
if out_d:
|
1424
|
+
out_i = out_d[next(iter(out_d))] # first item only
|
1382
1425
|
# if value is single-length dict with `None` key, then transform
|
1383
1426
|
# to one key for each jobscript element:
|
1384
|
-
if
|
1385
|
-
out = {
|
1386
|
-
|
1387
|
-
out =
|
1427
|
+
if tuple(out_i) == (None,):
|
1428
|
+
out = {idx: out_i[None] for idx in range(self.num_elements)}
|
1429
|
+
else:
|
1430
|
+
out = cast("Any", out_i)
|
1388
1431
|
|
1389
1432
|
else:
|
1390
|
-
raise NotSubmitMachineError(
|
1391
|
-
"Cannot get active state of the jobscript because the current machine "
|
1392
|
-
"is not the machine on which the jobscript was submitted."
|
1393
|
-
)
|
1433
|
+
raise NotSubmitMachineError()
|
1394
1434
|
|
1395
|
-
self.
|
1435
|
+
self._app.submission_logger.info(f"Jobscript is {'in' if not out else ''}active.")
|
1436
|
+
if as_json:
|
1437
|
+
return {idx: state.name for idx, state in out.items()}
|
1396
1438
|
return out
|
1397
1439
|
|
1398
|
-
def cancel(self):
|
1440
|
+
def cancel(self) -> None:
|
1399
1441
|
"""
|
1400
1442
|
Cancel this jobscript.
|
1401
1443
|
"""
|
1402
|
-
self.
|
1444
|
+
self._app.submission_logger.info(
|
1403
1445
|
f"Cancelling jobscript {self.index} of submission {self.submission.index}"
|
1404
1446
|
)
|
1405
|
-
self.scheduler.
|
1406
|
-
js_idx=self.index, sub_idx=self.submission.index, **self.scheduler_ref
|
1407
|
-
)
|
1447
|
+
self.scheduler.cancel_jobs(**self.scheduler_ref, jobscripts=[self])
|