hpcflow-new2 0.2.0a190__py3-none-any.whl → 0.2.0a200__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hpcflow/__pyinstaller/hook-hpcflow.py +1 -0
- hpcflow/_version.py +1 -1
- hpcflow/data/scripts/bad_script.py +2 -0
- hpcflow/data/scripts/do_nothing.py +2 -0
- hpcflow/data/scripts/env_specifier_test/input_file_generator_pass_env_spec.py +4 -0
- hpcflow/data/scripts/env_specifier_test/main_script_test_pass_env_spec.py +8 -0
- hpcflow/data/scripts/env_specifier_test/output_file_parser_pass_env_spec.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v1/input_file_generator_basic.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v1/main_script_test_direct_in_direct_out.py +7 -0
- hpcflow/data/scripts/env_specifier_test/v1/output_file_parser_basic.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v2/main_script_test_direct_in_direct_out.py +7 -0
- hpcflow/data/scripts/input_file_generator_basic.py +3 -0
- hpcflow/data/scripts/input_file_generator_basic_FAIL.py +3 -0
- hpcflow/data/scripts/input_file_generator_test_stdout_stderr.py +8 -0
- hpcflow/data/scripts/main_script_test_direct_in.py +3 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed_group.py +7 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_group_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_group_one_fail_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_hdf5_in_obj_2.py +12 -0
- hpcflow/data/scripts/main_script_test_json_out_FAIL.py +3 -0
- hpcflow/data/scripts/main_script_test_shell_env_vars.py +12 -0
- hpcflow/data/scripts/main_script_test_std_out_std_err.py +6 -0
- hpcflow/data/scripts/output_file_parser_basic.py +3 -0
- hpcflow/data/scripts/output_file_parser_basic_FAIL.py +7 -0
- hpcflow/data/scripts/output_file_parser_test_stdout_stderr.py +8 -0
- hpcflow/data/scripts/script_exit_test.py +5 -0
- hpcflow/data/template_components/environments.yaml +1 -1
- hpcflow/sdk/__init__.py +5 -0
- hpcflow/sdk/app.py +166 -92
- hpcflow/sdk/cli.py +263 -84
- hpcflow/sdk/cli_common.py +99 -5
- hpcflow/sdk/config/callbacks.py +38 -1
- hpcflow/sdk/config/config.py +102 -13
- hpcflow/sdk/config/errors.py +19 -5
- hpcflow/sdk/config/types.py +3 -0
- hpcflow/sdk/core/__init__.py +25 -1
- hpcflow/sdk/core/actions.py +914 -262
- hpcflow/sdk/core/cache.py +76 -34
- hpcflow/sdk/core/command_files.py +14 -128
- hpcflow/sdk/core/commands.py +35 -6
- hpcflow/sdk/core/element.py +122 -50
- hpcflow/sdk/core/errors.py +58 -2
- hpcflow/sdk/core/execute.py +207 -0
- hpcflow/sdk/core/loop.py +408 -50
- hpcflow/sdk/core/loop_cache.py +4 -4
- hpcflow/sdk/core/parameters.py +382 -37
- hpcflow/sdk/core/run_dir_files.py +13 -40
- hpcflow/sdk/core/skip_reason.py +7 -0
- hpcflow/sdk/core/task.py +119 -30
- hpcflow/sdk/core/task_schema.py +68 -0
- hpcflow/sdk/core/test_utils.py +66 -27
- hpcflow/sdk/core/types.py +54 -1
- hpcflow/sdk/core/utils.py +136 -19
- hpcflow/sdk/core/workflow.py +1587 -356
- hpcflow/sdk/data/workflow_spec_schema.yaml +2 -0
- hpcflow/sdk/demo/cli.py +7 -0
- hpcflow/sdk/helper/cli.py +1 -0
- hpcflow/sdk/log.py +42 -15
- hpcflow/sdk/persistence/base.py +405 -53
- hpcflow/sdk/persistence/json.py +177 -52
- hpcflow/sdk/persistence/pending.py +237 -69
- hpcflow/sdk/persistence/store_resource.py +3 -2
- hpcflow/sdk/persistence/types.py +15 -4
- hpcflow/sdk/persistence/zarr.py +928 -81
- hpcflow/sdk/submission/jobscript.py +1408 -489
- hpcflow/sdk/submission/schedulers/__init__.py +40 -5
- hpcflow/sdk/submission/schedulers/direct.py +33 -19
- hpcflow/sdk/submission/schedulers/sge.py +51 -16
- hpcflow/sdk/submission/schedulers/slurm.py +44 -16
- hpcflow/sdk/submission/schedulers/utils.py +7 -2
- hpcflow/sdk/submission/shells/base.py +68 -20
- hpcflow/sdk/submission/shells/bash.py +222 -129
- hpcflow/sdk/submission/shells/powershell.py +200 -150
- hpcflow/sdk/submission/submission.py +852 -119
- hpcflow/sdk/submission/types.py +18 -21
- hpcflow/sdk/typing.py +24 -5
- hpcflow/sdk/utils/arrays.py +71 -0
- hpcflow/sdk/utils/deferred_file.py +55 -0
- hpcflow/sdk/utils/hashing.py +16 -0
- hpcflow/sdk/utils/patches.py +12 -0
- hpcflow/sdk/utils/strings.py +33 -0
- hpcflow/tests/api/test_api.py +32 -0
- hpcflow/tests/conftest.py +19 -0
- hpcflow/tests/data/benchmark_script_runner.yaml +26 -0
- hpcflow/tests/data/multi_path_sequences.yaml +29 -0
- hpcflow/tests/data/workflow_test_run_abort.yaml +34 -35
- hpcflow/tests/schedulers/sge/test_sge_submission.py +36 -0
- hpcflow/tests/scripts/test_input_file_generators.py +282 -0
- hpcflow/tests/scripts/test_main_scripts.py +821 -70
- hpcflow/tests/scripts/test_non_snippet_script.py +46 -0
- hpcflow/tests/scripts/test_ouput_file_parsers.py +353 -0
- hpcflow/tests/shells/wsl/test_wsl_submission.py +6 -0
- hpcflow/tests/unit/test_action.py +176 -0
- hpcflow/tests/unit/test_app.py +20 -0
- hpcflow/tests/unit/test_cache.py +46 -0
- hpcflow/tests/unit/test_cli.py +133 -0
- hpcflow/tests/unit/test_config.py +122 -1
- hpcflow/tests/unit/test_element_iteration.py +47 -0
- hpcflow/tests/unit/test_jobscript_unit.py +757 -0
- hpcflow/tests/unit/test_loop.py +1332 -27
- hpcflow/tests/unit/test_meta_task.py +325 -0
- hpcflow/tests/unit/test_multi_path_sequences.py +229 -0
- hpcflow/tests/unit/test_parameter.py +13 -0
- hpcflow/tests/unit/test_persistence.py +190 -8
- hpcflow/tests/unit/test_run.py +109 -3
- hpcflow/tests/unit/test_run_directories.py +29 -0
- hpcflow/tests/unit/test_shell.py +20 -0
- hpcflow/tests/unit/test_submission.py +5 -76
- hpcflow/tests/unit/test_workflow_template.py +31 -0
- hpcflow/tests/unit/utils/test_arrays.py +40 -0
- hpcflow/tests/unit/utils/test_deferred_file_writer.py +34 -0
- hpcflow/tests/unit/utils/test_hashing.py +65 -0
- hpcflow/tests/unit/utils/test_patches.py +5 -0
- hpcflow/tests/unit/utils/test_redirect_std.py +50 -0
- hpcflow/tests/workflows/__init__.py +0 -0
- hpcflow/tests/workflows/test_directory_structure.py +31 -0
- hpcflow/tests/workflows/test_jobscript.py +332 -0
- hpcflow/tests/workflows/test_run_status.py +198 -0
- hpcflow/tests/workflows/test_skip_downstream.py +696 -0
- hpcflow/tests/workflows/test_submission.py +140 -0
- hpcflow/tests/workflows/test_workflows.py +142 -2
- hpcflow/tests/workflows/test_zip.py +18 -0
- hpcflow/viz_demo.ipynb +6587 -3
- {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a200.dist-info}/METADATA +7 -4
- hpcflow_new2-0.2.0a200.dist-info/RECORD +222 -0
- hpcflow_new2-0.2.0a190.dist-info/RECORD +0 -165
- {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a200.dist-info}/LICENSE +0 -0
- {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a200.dist-info}/WHEEL +0 -0
- {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a200.dist-info}/entry_points.txt +0 -0
@@ -4,10 +4,17 @@ A collection of submissions to a scheduler, generated from a workflow.
|
|
4
4
|
|
5
5
|
from __future__ import annotations
|
6
6
|
from collections import defaultdict
|
7
|
-
import
|
7
|
+
import shutil
|
8
8
|
from pathlib import Path
|
9
|
-
|
9
|
+
import socket
|
10
|
+
from textwrap import indent
|
11
|
+
from typing import Any, Literal, overload, TYPE_CHECKING
|
10
12
|
from typing_extensions import override
|
13
|
+
import warnings
|
14
|
+
|
15
|
+
|
16
|
+
from hpcflow.sdk.utils.strings import shorten_list_str
|
17
|
+
import numpy as np
|
11
18
|
|
12
19
|
from hpcflow.sdk.typing import hydrate
|
13
20
|
from hpcflow.sdk.core.errors import (
|
@@ -17,18 +24,22 @@ from hpcflow.sdk.core.errors import (
|
|
17
24
|
MissingEnvironmentExecutableInstanceError,
|
18
25
|
MultipleEnvironmentsError,
|
19
26
|
SubmissionFailure,
|
27
|
+
OutputFileParserNoOutputError,
|
20
28
|
)
|
21
29
|
from hpcflow.sdk.core.json_like import ChildObjectSpec, JSONLike
|
22
30
|
from hpcflow.sdk.core.object_list import ObjectListMultipleMatchError
|
23
31
|
from hpcflow.sdk.core.utils import parse_timestamp, current_timestamp
|
24
32
|
from hpcflow.sdk.submission.enums import SubmissionStatus
|
33
|
+
from hpcflow.sdk.core import RUN_DIR_ARR_DTYPE
|
25
34
|
from hpcflow.sdk.log import TimeIt
|
35
|
+
from hpcflow.sdk.utils.strings import shorten_list_str
|
26
36
|
|
27
37
|
if TYPE_CHECKING:
|
28
38
|
from collections.abc import Iterable, Mapping, Sequence
|
29
39
|
from datetime import datetime
|
30
40
|
from typing import ClassVar, Literal
|
31
41
|
from rich.status import Status
|
42
|
+
from numpy.typing import NDArray
|
32
43
|
from .jobscript import Jobscript
|
33
44
|
from .enums import JobscriptElementState
|
34
45
|
from .schedulers import Scheduler
|
@@ -38,6 +49,22 @@ if TYPE_CHECKING:
|
|
38
49
|
from ..core.environment import Environment
|
39
50
|
from ..core.object_list import EnvironmentsList
|
40
51
|
from ..core.workflow import Workflow
|
52
|
+
from ..core.cache import ObjectCache
|
53
|
+
|
54
|
+
|
55
|
+
# jobscript attributes that are set persistently just after the jobscript has been
|
56
|
+
# submitted to the scheduler:
|
57
|
+
JOBSCRIPT_SUBMIT_TIME_KEYS = (
|
58
|
+
"submit_cmdline",
|
59
|
+
"scheduler_job_ID",
|
60
|
+
"process_ID",
|
61
|
+
"submit_time",
|
62
|
+
)
|
63
|
+
# submission attributes that are set persistently just after all of a submission's
|
64
|
+
# jobscripts have been submitted:
|
65
|
+
SUBMISSION_SUBMIT_TIME_KEYS = {
|
66
|
+
"submission_parts": dict,
|
67
|
+
}
|
41
68
|
|
42
69
|
|
43
70
|
@hydrate
|
@@ -74,20 +101,35 @@ class Submission(JSONLike):
|
|
74
101
|
),
|
75
102
|
)
|
76
103
|
|
104
|
+
TMP_DIR_NAME = "tmp"
|
105
|
+
LOG_DIR_NAME = "app_logs"
|
106
|
+
APP_STD_DIR_NAME = "app_std"
|
107
|
+
JS_DIR_NAME = "jobscripts"
|
108
|
+
JS_STD_DIR_NAME = "js_std"
|
109
|
+
JS_RUN_IDS_DIR_NAME = "js_run_ids"
|
110
|
+
JS_FUNCS_DIR_NAME = "js_funcs"
|
111
|
+
JS_WIN_PIDS_DIR_NAME = "js_pids"
|
112
|
+
JS_SCRIPT_INDICES_DIR_NAME = "js_script_indices"
|
113
|
+
SCRIPTS_DIR_NAME = "scripts"
|
114
|
+
COMMANDS_DIR_NAME = "commands"
|
115
|
+
WORKFLOW_APP_ALIAS = "wkflow_app"
|
116
|
+
|
77
117
|
def __init__(
|
78
118
|
self,
|
79
119
|
index: int,
|
80
120
|
jobscripts: list[Jobscript],
|
81
121
|
workflow: Workflow | None = None,
|
82
|
-
|
83
|
-
JS_parallelism: bool | None = None,
|
122
|
+
at_submit_metadata: dict[str, Any] | None = None,
|
123
|
+
JS_parallelism: bool | Literal["direct", "scheduled"] | None = None,
|
84
124
|
environments: EnvironmentsList | None = None,
|
85
125
|
):
|
86
126
|
self._index = index
|
87
127
|
self._jobscripts = jobscripts
|
88
|
-
self.
|
128
|
+
self._at_submit_metadata = at_submit_metadata or {
|
129
|
+
k: v() for k, v in SUBMISSION_SUBMIT_TIME_KEYS.items()
|
130
|
+
}
|
89
131
|
self._JS_parallelism = JS_parallelism
|
90
|
-
self._environments = environments
|
132
|
+
self._environments = environments # assigned by _set_environments
|
91
133
|
|
92
134
|
self._submission_parts_lst: list[
|
93
135
|
SubmissionPart
|
@@ -99,8 +141,30 @@ class Submission(JSONLike):
|
|
99
141
|
|
100
142
|
self._set_parent_refs()
|
101
143
|
|
102
|
-
|
103
|
-
|
144
|
+
def _ensure_JS_parallelism_set(self):
|
145
|
+
"""Ensure that the JS_parallelism attribute is one of `True`, `False`, `'direct'`
|
146
|
+
or `'scheduled'`.
|
147
|
+
|
148
|
+
Notes
|
149
|
+
-----
|
150
|
+
This method is called after the Submission object is first created in
|
151
|
+
`Workflow._add_submission`.
|
152
|
+
|
153
|
+
"""
|
154
|
+
# if JS_parallelism explicitly requested but store doesn't support, raise:
|
155
|
+
supports_JS_para = self.workflow._store._features.jobscript_parallelism
|
156
|
+
if self.JS_parallelism:
|
157
|
+
# could be: True | "direct" | "scheduled"
|
158
|
+
if not supports_JS_para:
|
159
|
+
# if status:
|
160
|
+
# status.stop()
|
161
|
+
raise ValueError(
|
162
|
+
f"Store type {self.workflow._store!r} does not support jobscript "
|
163
|
+
f"parallelism."
|
164
|
+
)
|
165
|
+
elif self.JS_parallelism is None:
|
166
|
+
# by default only use JS parallelism for scheduled jobscripts:
|
167
|
+
self._JS_parallelism = "scheduled" if supports_JS_para else False
|
104
168
|
|
105
169
|
@TimeIt.decorator
|
106
170
|
def _set_environments(self) -> None:
|
@@ -110,20 +174,22 @@ class Submission(JSONLike):
|
|
110
174
|
req_envs: dict[
|
111
175
|
tuple[tuple[str, ...], tuple[Any, ...]], dict[str, set[int]]
|
112
176
|
] = defaultdict(lambda: defaultdict(set))
|
113
|
-
|
114
|
-
for run
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
177
|
+
with self.workflow.cached_merged_parameters():
|
178
|
+
# using the cache (for `run.env_spec_hashable` -> `run.resources`) should
|
179
|
+
# significantly speed up this loop, unless a large resources sequence is used:
|
180
|
+
for js_idx, all_EARs_i in enumerate(self.all_EARs_by_jobscript):
|
181
|
+
for run in all_EARs_i:
|
182
|
+
env_spec_h = run.env_spec_hashable
|
183
|
+
for exec_label_j in run.action.get_required_executables():
|
184
|
+
req_envs[env_spec_h][exec_label_j].add(js_idx)
|
185
|
+
# add any environment for which an executable was not required:
|
186
|
+
if env_spec_h not in req_envs:
|
187
|
+
req_envs[env_spec_h]
|
122
188
|
|
123
189
|
# check these envs/execs exist in app data:
|
124
190
|
envs: list[Environment] = []
|
125
191
|
for env_spec_h, exec_js in req_envs.items():
|
126
|
-
env_spec =
|
192
|
+
env_spec = self._app.Action.env_spec_from_hashable(env_spec_h)
|
127
193
|
try:
|
128
194
|
env_i = self._app.envs.get(**env_spec)
|
129
195
|
except ObjectListMultipleMatchError:
|
@@ -178,13 +244,17 @@ class Submission(JSONLike):
|
|
178
244
|
return self._environments
|
179
245
|
|
180
246
|
@property
|
181
|
-
def
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
if not self._submission_parts:
|
186
|
-
return []
|
247
|
+
def at_submit_metadata(self) -> dict[str, dict[str, Any]]:
|
248
|
+
return self.workflow._store.get_submission_at_submit_metadata(
|
249
|
+
sub_idx=self.index, metadata_attr=self._at_submit_metadata
|
250
|
+
)
|
187
251
|
|
252
|
+
@property
|
253
|
+
def _submission_parts(self) -> dict[str, list[int]]:
|
254
|
+
return self.at_submit_metadata["submission_parts"] or {}
|
255
|
+
|
256
|
+
@property
|
257
|
+
def submission_parts(self) -> list[SubmissionPart]:
|
188
258
|
if self._submission_parts_lst is None:
|
189
259
|
self._submission_parts_lst = [
|
190
260
|
{
|
@@ -233,7 +303,7 @@ class Submission(JSONLike):
|
|
233
303
|
return self._jobscripts
|
234
304
|
|
235
305
|
@property
|
236
|
-
def JS_parallelism(self) -> bool | None:
|
306
|
+
def JS_parallelism(self) -> bool | Literal["direct", "scheduled"] | None:
|
237
307
|
"""
|
238
308
|
Whether to exploit jobscript parallelism.
|
239
309
|
"""
|
@@ -287,14 +357,237 @@ class Submission(JSONLike):
|
|
287
357
|
SubmissionStatus.PARTIALLY_SUBMITTED,
|
288
358
|
)
|
289
359
|
|
360
|
+
@property
|
361
|
+
def needs_app_log_dir(self) -> bool:
|
362
|
+
"""
|
363
|
+
Whether this submision requires an app log directory.
|
364
|
+
"""
|
365
|
+
for js in self.jobscripts:
|
366
|
+
if js.resources.write_app_logs:
|
367
|
+
return True
|
368
|
+
return False
|
369
|
+
|
370
|
+
@property
|
371
|
+
def needs_win_pids_dir(self) -> bool:
|
372
|
+
"""
|
373
|
+
Whether this submision requires a directory for process ID files (Windows only).
|
374
|
+
"""
|
375
|
+
for js in self.jobscripts:
|
376
|
+
if js.os_name == "nt":
|
377
|
+
return True
|
378
|
+
return False
|
379
|
+
|
380
|
+
@property
|
381
|
+
def needs_script_indices_dir(self) -> bool:
|
382
|
+
"""
|
383
|
+
Whether this submision requires a directory for combined-script script ID files.
|
384
|
+
"""
|
385
|
+
for js in self.jobscripts:
|
386
|
+
if js.resources.combine_scripts:
|
387
|
+
return True
|
388
|
+
return False
|
389
|
+
|
390
|
+
@classmethod
|
391
|
+
def get_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
392
|
+
"""
|
393
|
+
The directory path to files associated with the specified submission.
|
394
|
+
"""
|
395
|
+
return submissions_path / str(sub_idx)
|
396
|
+
|
397
|
+
@classmethod
|
398
|
+
def get_tmp_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
399
|
+
"""
|
400
|
+
The path to the temporary files directory, for the specified submission.
|
401
|
+
"""
|
402
|
+
return cls.get_path(submissions_path, sub_idx) / cls.TMP_DIR_NAME
|
403
|
+
|
404
|
+
@classmethod
|
405
|
+
def get_app_log_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
406
|
+
"""
|
407
|
+
The path to the app log directory for this submission, for the specified
|
408
|
+
submission.
|
409
|
+
"""
|
410
|
+
return cls.get_path(submissions_path, sub_idx) / cls.LOG_DIR_NAME
|
411
|
+
|
412
|
+
@staticmethod
|
413
|
+
def get_app_log_file_name(run_ID: int | str) -> str:
|
414
|
+
"""
|
415
|
+
The app log file name.
|
416
|
+
"""
|
417
|
+
# TODO: consider combine_app_logs argument
|
418
|
+
return f"r_{run_ID}.log"
|
419
|
+
|
420
|
+
@classmethod
|
421
|
+
def get_app_log_file_path(cls, submissions_path: Path, sub_idx: int, run_ID: int):
|
422
|
+
"""
|
423
|
+
The file path to the app log, for the specified submission.
|
424
|
+
"""
|
425
|
+
return (
|
426
|
+
cls.get_path(submissions_path, sub_idx)
|
427
|
+
/ cls.LOG_DIR_NAME
|
428
|
+
/ cls.get_app_log_file_name(run_ID)
|
429
|
+
)
|
430
|
+
|
431
|
+
@classmethod
|
432
|
+
def get_app_std_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
433
|
+
"""
|
434
|
+
The path to the app standard output and error stream files directory, for the
|
435
|
+
specified submission.
|
436
|
+
"""
|
437
|
+
return cls.get_path(submissions_path, sub_idx) / cls.APP_STD_DIR_NAME
|
438
|
+
|
439
|
+
@classmethod
|
440
|
+
def get_js_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
441
|
+
"""
|
442
|
+
The path to the jobscript files directory, for the specified submission.
|
443
|
+
"""
|
444
|
+
return cls.get_path(submissions_path, sub_idx) / cls.JS_DIR_NAME
|
445
|
+
|
446
|
+
@classmethod
|
447
|
+
def get_js_std_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
448
|
+
"""
|
449
|
+
The path to the jobscript standard output and error files directory, for the
|
450
|
+
specified submission.
|
451
|
+
"""
|
452
|
+
return cls.get_path(submissions_path, sub_idx) / cls.JS_STD_DIR_NAME
|
453
|
+
|
454
|
+
@classmethod
|
455
|
+
def get_js_run_ids_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
456
|
+
"""
|
457
|
+
The path to the directory containing jobscript run IDs, for the specified
|
458
|
+
submission.
|
459
|
+
"""
|
460
|
+
return cls.get_path(submissions_path, sub_idx) / cls.JS_RUN_IDS_DIR_NAME
|
461
|
+
|
462
|
+
@classmethod
|
463
|
+
def get_js_funcs_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
464
|
+
"""
|
465
|
+
The path to the directory containing the shell functions that are invoked within
|
466
|
+
jobscripts and commmand files, for the specified submission.
|
467
|
+
"""
|
468
|
+
return cls.get_path(submissions_path, sub_idx) / cls.JS_FUNCS_DIR_NAME
|
469
|
+
|
470
|
+
@classmethod
|
471
|
+
def get_js_win_pids_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
472
|
+
"""
|
473
|
+
The path to the directory containing process ID files (Windows only), for the
|
474
|
+
specified submission.
|
475
|
+
"""
|
476
|
+
return cls.get_path(submissions_path, sub_idx) / cls.JS_WIN_PIDS_DIR_NAME
|
477
|
+
|
478
|
+
@classmethod
|
479
|
+
def get_js_script_indices_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
480
|
+
"""
|
481
|
+
The path to the directory containing script indices for combined-script jobscripts
|
482
|
+
only, for the specified submission.
|
483
|
+
"""
|
484
|
+
return cls.get_path(submissions_path, sub_idx) / cls.JS_SCRIPT_INDICES_DIR_NAME
|
485
|
+
|
486
|
+
@classmethod
|
487
|
+
def get_scripts_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
488
|
+
"""
|
489
|
+
The path to the directory containing action scripts, for the specified submission.
|
490
|
+
"""
|
491
|
+
return cls.get_path(submissions_path, sub_idx) / cls.SCRIPTS_DIR_NAME
|
492
|
+
|
493
|
+
@classmethod
|
494
|
+
def get_commands_path(cls, submissions_path: Path, sub_idx: int) -> Path:
|
495
|
+
"""
|
496
|
+
The path to the directory containing command files, for the specified submission.
|
497
|
+
"""
|
498
|
+
return cls.get_path(submissions_path, sub_idx) / cls.COMMANDS_DIR_NAME
|
499
|
+
|
290
500
|
@property
|
291
501
|
def path(self) -> Path:
|
292
502
|
"""
|
293
|
-
The path to
|
503
|
+
The path to the directory containing action scripts.
|
504
|
+
"""
|
505
|
+
return self.get_path(self.workflow.submissions_path, self.index)
|
506
|
+
|
507
|
+
@property
|
508
|
+
def tmp_path(self) -> Path:
|
509
|
+
"""
|
510
|
+
The path to the temporary files directory for this submission.
|
511
|
+
"""
|
512
|
+
return self.get_tmp_path(self.workflow.submissions_path, self.index)
|
513
|
+
|
514
|
+
@property
|
515
|
+
def app_log_path(self) -> Path:
|
516
|
+
"""
|
517
|
+
The path to the app log directory for this submission for this submission.
|
518
|
+
"""
|
519
|
+
return self.get_app_log_path(self.workflow.submissions_path, self.index)
|
520
|
+
|
521
|
+
@property
|
522
|
+
def app_std_path(self) -> Path:
|
523
|
+
"""
|
524
|
+
The path to the app standard output and error stream files directory, for the
|
525
|
+
this submission.
|
526
|
+
"""
|
527
|
+
return self.get_app_std_path(self.workflow.submissions_path, self.index)
|
528
|
+
|
529
|
+
@property
|
530
|
+
def js_path(self) -> Path:
|
531
|
+
"""
|
532
|
+
The path to the jobscript files directory, for this submission.
|
533
|
+
"""
|
534
|
+
return self.get_js_path(self.workflow.submissions_path, self.index)
|
535
|
+
|
536
|
+
@property
|
537
|
+
def js_std_path(self) -> Path:
|
538
|
+
"""
|
539
|
+
The path to the jobscript standard output and error files directory, for this
|
540
|
+
submission.
|
541
|
+
"""
|
542
|
+
return self.get_js_std_path(self.workflow.submissions_path, self.index)
|
543
|
+
|
544
|
+
@property
|
545
|
+
def js_run_ids_path(self) -> Path:
|
546
|
+
"""
|
547
|
+
The path to the directory containing jobscript run IDs, for this submission.
|
548
|
+
"""
|
549
|
+
return self.get_js_run_ids_path(self.workflow.submissions_path, self.index)
|
550
|
+
|
551
|
+
@property
|
552
|
+
def js_funcs_path(self) -> Path:
|
553
|
+
"""
|
554
|
+
The path to the directory containing the shell functions that are invoked within
|
555
|
+
jobscripts and commmand files, for this submission.
|
556
|
+
"""
|
557
|
+
return self.get_js_funcs_path(self.workflow.submissions_path, self.index)
|
558
|
+
|
559
|
+
@property
|
560
|
+
def js_win_pids_path(self) -> Path:
|
561
|
+
"""
|
562
|
+
The path to the directory containing process ID files (Windows only), for this
|
563
|
+
submission.
|
564
|
+
"""
|
565
|
+
return self.get_js_win_pids_path(self.workflow.submissions_path, self.index)
|
566
|
+
|
567
|
+
@property
|
568
|
+
def js_script_indices_path(self) -> Path:
|
569
|
+
"""
|
570
|
+
The path to the directory containing script indices for combined-script jobscripts
|
571
|
+
only, for this submission.
|
294
572
|
"""
|
295
|
-
return self.workflow.submissions_path
|
573
|
+
return self.get_js_script_indices_path(self.workflow.submissions_path, self.index)
|
296
574
|
|
297
575
|
@property
|
576
|
+
def scripts_path(self) -> Path:
|
577
|
+
"""
|
578
|
+
The path to the directory containing action scripts, for this submission.
|
579
|
+
"""
|
580
|
+
return self.get_scripts_path(self.workflow.submissions_path, self.index)
|
581
|
+
|
582
|
+
@property
|
583
|
+
def commands_path(self) -> Path:
|
584
|
+
"""
|
585
|
+
The path to the directory containing command files, for this submission.
|
586
|
+
"""
|
587
|
+
return self.get_commands_path(self.workflow.submissions_path, self.index)
|
588
|
+
|
589
|
+
@property
|
590
|
+
@TimeIt.decorator
|
298
591
|
def all_EAR_IDs(self) -> Iterable[int]:
|
299
592
|
"""
|
300
593
|
The IDs of all EARs in this submission.
|
@@ -302,12 +595,25 @@ class Submission(JSONLike):
|
|
302
595
|
return (i for js in self.jobscripts for i in js.all_EAR_IDs)
|
303
596
|
|
304
597
|
@property
|
598
|
+
@TimeIt.decorator
|
305
599
|
def all_EARs(self) -> Iterable[ElementActionRun]:
|
306
600
|
"""
|
307
|
-
All EARs in this
|
601
|
+
All EARs in this submission.
|
308
602
|
"""
|
309
603
|
return (ear for js in self.jobscripts for ear in js.all_EARs)
|
310
604
|
|
605
|
+
@property
|
606
|
+
@TimeIt.decorator
|
607
|
+
def all_EARs_IDs_by_jobscript(self) -> list[np.ndarray]:
|
608
|
+
return [i.all_EAR_IDs for i in self.jobscripts]
|
609
|
+
|
610
|
+
@property
|
611
|
+
@TimeIt.decorator
|
612
|
+
def all_EARs_by_jobscript(self) -> list[list[ElementActionRun]]:
|
613
|
+
ids = [i.all_EAR_IDs for i in self.jobscripts]
|
614
|
+
all_EARs = {i.id_: i for i in self.workflow.get_EARs_from_IDs(self.all_EAR_IDs)}
|
615
|
+
return [[all_EARs[i] for i in js_ids] for js_ids in ids]
|
616
|
+
|
311
617
|
@property
|
312
618
|
@TimeIt.decorator
|
313
619
|
def EARs_by_elements(self) -> Mapping[int, Mapping[int, Sequence[ElementActionRun]]]:
|
@@ -322,70 +628,358 @@ class Submission(JSONLike):
|
|
322
628
|
return task_elem_EARs
|
323
629
|
|
324
630
|
@property
|
325
|
-
def
|
326
|
-
"""
|
327
|
-
|
328
|
-
"""
|
329
|
-
return "abort_EARs.txt"
|
330
|
-
|
331
|
-
@property
|
332
|
-
def abort_EARs_file_path(self) -> Path:
|
333
|
-
"""
|
334
|
-
The path to the file describing what EARs have aborted in this submission.
|
335
|
-
"""
|
336
|
-
return self.path / self.abort_EARs_file_name
|
631
|
+
def is_scheduled(self) -> tuple[bool, ...]:
|
632
|
+
"""Return whether each jobscript of this submission uses a scheduler or not."""
|
633
|
+
return tuple(i.is_scheduled for i in self.jobscripts)
|
337
634
|
|
338
635
|
@overload
|
339
636
|
def get_active_jobscripts(
|
340
637
|
self, as_json: Literal[False] = False
|
341
|
-
) -> Mapping[int, Mapping[int, JobscriptElementState]]:
|
638
|
+
) -> Mapping[int, Mapping[int, Mapping[int, JobscriptElementState]]]:
|
342
639
|
...
|
343
640
|
|
344
641
|
@overload
|
345
|
-
def get_active_jobscripts(
|
642
|
+
def get_active_jobscripts(
|
643
|
+
self, as_json: Literal[True]
|
644
|
+
) -> Mapping[int, Mapping[int, Mapping[int, str]]]:
|
346
645
|
...
|
347
646
|
|
348
647
|
@TimeIt.decorator
|
349
648
|
def get_active_jobscripts(
|
350
|
-
self,
|
351
|
-
|
649
|
+
self,
|
650
|
+
as_json: Literal[True] | Literal[False] = False, # TODO: why can't we use bool?
|
651
|
+
) -> Mapping[int, Mapping[int, Mapping[int, JobscriptElementState | str]]]:
|
352
652
|
"""Get jobscripts that are active on this machine, and their active states."""
|
353
|
-
# this returns: {JS_IDX: {JS_ELEMENT_IDX: STATE}}
|
653
|
+
# this returns: {JS_IDX: {BLOCK_IDX: {JS_ELEMENT_IDX: STATE}}}
|
354
654
|
# TODO: query the scheduler once for all jobscripts?
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
)
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
lines[run_ID] = "1"
|
376
|
-
|
377
|
-
# write a new temporary run-abort file:
|
378
|
-
tmp_suffix = self.abort_EARs_file_path.suffix + ".tmp"
|
379
|
-
tmp = self.abort_EARs_file_path.with_suffix(tmp_suffix)
|
380
|
-
self._app.submission_logger.debug(f"Creating temporary run abort file: {tmp!r}.")
|
381
|
-
with tmp.open(mode="wt", newline="\n") as fp:
|
382
|
-
fp.write("\n".join(lines) + "\n")
|
383
|
-
|
384
|
-
# atomic rename, overwriting original:
|
385
|
-
self._app.submission_logger.debug(
|
386
|
-
"Replacing original run abort file with new temporary file."
|
655
|
+
return {
|
656
|
+
js.index: act_states
|
657
|
+
for js in self.jobscripts
|
658
|
+
if (act_states := js.get_active_states(as_json=as_json))
|
659
|
+
}
|
660
|
+
|
661
|
+
@TimeIt.decorator
|
662
|
+
def _write_scripts(
|
663
|
+
self, cache: ObjectCache, status: Status | None = None
|
664
|
+
) -> tuple[dict[int, int | None], NDArray, dict[int, list[Path]]]:
|
665
|
+
"""Write to disk all action scripts associated with this submission."""
|
666
|
+
# TODO: rename this method
|
667
|
+
|
668
|
+
# TODO: need to check is_snippet_script is exclusive? i.e. only `script` and no
|
669
|
+
# `commands` in the action?
|
670
|
+
# TODO: scripts must have the same exe and the same environment as well?
|
671
|
+
# TODO: env_spec should be included in jobscript hash if combine_scripts=True ?
|
672
|
+
|
673
|
+
actions_by_schema: dict[str, dict[int, set]] = defaultdict(
|
674
|
+
lambda: defaultdict(set)
|
387
675
|
)
|
388
|
-
|
676
|
+
combined_env_specs = {}
|
677
|
+
|
678
|
+
# task insert IDs and action indices for each combined_scripts jobscript:
|
679
|
+
combined_actions = {}
|
680
|
+
|
681
|
+
cmd_hashes = defaultdict(set)
|
682
|
+
num_runs_tot = sum(len(js.all_EAR_IDs) for js in self.jobscripts)
|
683
|
+
run_indices = np.ones((num_runs_tot, 9), dtype=int) * -1
|
684
|
+
run_inp_files = defaultdict(
|
685
|
+
list
|
686
|
+
) # keys are `run_idx`, values are Paths to copy to run dir
|
687
|
+
run_cmd_file_names: dict[int, int | None] = {} # None if no commands to write
|
688
|
+
run_idx = 0
|
689
|
+
|
690
|
+
if status:
|
691
|
+
status.update(f"Adding new submission: processing run 1/{num_runs_tot}.")
|
692
|
+
|
693
|
+
all_runs = cache.runs
|
694
|
+
assert all_runs is not None
|
695
|
+
runs_ids_by_js = self.all_EARs_IDs_by_jobscript
|
696
|
+
|
697
|
+
with self.workflow.cached_merged_parameters():
|
698
|
+
for js in self.jobscripts:
|
699
|
+
js_idx = js.index
|
700
|
+
js_run_0 = all_runs[runs_ids_by_js[js.index][0]]
|
701
|
+
|
702
|
+
if js.resources.combine_scripts:
|
703
|
+
# this will be one or more snippet scripts that needs to be combined into
|
704
|
+
# one script for the whole jobscript
|
705
|
+
|
706
|
+
# need to write one script + one commands file for the whole jobscript
|
707
|
+
|
708
|
+
# env_spec will be the same for all runs of this jobscript:
|
709
|
+
combined_env_specs[js_idx] = js_run_0.env_spec
|
710
|
+
combined_actions[js_idx] = [
|
711
|
+
[j[0:2] for j in i.task_actions] for i in js.blocks
|
712
|
+
]
|
713
|
+
|
714
|
+
for idx, run_id in enumerate(js.all_EAR_IDs):
|
715
|
+
run = all_runs[run_id]
|
716
|
+
|
717
|
+
run_indices[run_idx] = [
|
718
|
+
run.task.insert_ID,
|
719
|
+
run.element.id_,
|
720
|
+
run.element_iteration.id_,
|
721
|
+
run.id_,
|
722
|
+
run.element.index,
|
723
|
+
run.element_iteration.index,
|
724
|
+
run.element_action.action_idx,
|
725
|
+
run.index,
|
726
|
+
int(run.action.requires_dir),
|
727
|
+
]
|
728
|
+
run_idx += 1
|
729
|
+
|
730
|
+
if status and run_idx % 10 == 0:
|
731
|
+
status.update(
|
732
|
+
f"Adding new submission: processing run {run_idx}/{num_runs_tot}."
|
733
|
+
)
|
734
|
+
|
735
|
+
if js.resources.combine_scripts:
|
736
|
+
if idx == 0:
|
737
|
+
# the commands file for a combined jobscript won't have
|
738
|
+
# any parameter data in the command line, so should raise
|
739
|
+
# if something is found to be unset:
|
740
|
+
run.try_write_commands(
|
741
|
+
environments=self.environments,
|
742
|
+
jobscript=js,
|
743
|
+
raise_on_unset=True,
|
744
|
+
)
|
745
|
+
run_cmd_file_names[run.id_] = None
|
746
|
+
|
747
|
+
else:
|
748
|
+
if run.is_snippet_script:
|
749
|
+
actions_by_schema[run.action.task_schema.name][
|
750
|
+
run.element_action.action_idx
|
751
|
+
].add(run.env_spec_hashable)
|
752
|
+
|
753
|
+
if run.action.commands:
|
754
|
+
hash_i = run.get_commands_file_hash()
|
755
|
+
# TODO: could further reduce number of files in the case the data
|
756
|
+
# indices hash is the same: if commands objects are the same and
|
757
|
+
# environment objects are the same, then the files will be the
|
758
|
+
# same, even if runs come from different task schemas/actions...
|
759
|
+
if hash_i not in cmd_hashes:
|
760
|
+
try:
|
761
|
+
run.try_write_commands(
|
762
|
+
environments=self.environments,
|
763
|
+
jobscript=js,
|
764
|
+
)
|
765
|
+
except OutputFileParserNoOutputError:
|
766
|
+
# no commands to write, might be used just for saving
|
767
|
+
# files
|
768
|
+
run_cmd_file_names[run.id_] = None
|
769
|
+
cmd_hashes[hash_i].add(run.id_)
|
770
|
+
else:
|
771
|
+
run_cmd_file_names[run.id_] = None
|
772
|
+
|
773
|
+
if run.action.requires_dir:
|
774
|
+
# TODO: what is type of `path`?
|
775
|
+
for name, path in run.get("input_files", {}).items():
|
776
|
+
if path:
|
777
|
+
run_inp_files[run_idx].append(path)
|
778
|
+
|
779
|
+
for run_ids in cmd_hashes.values():
|
780
|
+
run_ids_srt = sorted(run_ids)
|
781
|
+
root_id = run_ids_srt[0] # used for command file name for this group
|
782
|
+
# TODO: could store multiple IDs to reduce number of files created
|
783
|
+
for run_id_i in run_ids_srt:
|
784
|
+
if run_id_i not in run_cmd_file_names:
|
785
|
+
run_cmd_file_names[run_id_i] = root_id
|
786
|
+
|
787
|
+
if status:
|
788
|
+
status.update("Adding new submission: writing scripts...")
|
789
|
+
|
790
|
+
seen: dict[int, Path] = {}
|
791
|
+
combined_script_data: dict[
|
792
|
+
int, dict[int, list[tuple[str, Path, bool]]]
|
793
|
+
] = defaultdict(lambda: defaultdict(list))
|
794
|
+
for task in self.workflow.tasks:
|
795
|
+
for schema in task.template.schemas:
|
796
|
+
if schema.name in actions_by_schema:
|
797
|
+
for idx, action in enumerate(schema.actions):
|
798
|
+
|
799
|
+
if not action.script:
|
800
|
+
continue
|
801
|
+
|
802
|
+
for env_spec_h in actions_by_schema[schema.name][idx]:
|
803
|
+
|
804
|
+
env_spec = action.env_spec_from_hashable(env_spec_h)
|
805
|
+
name, snip_path, specs = action.get_script_artifact_name(
|
806
|
+
env_spec=env_spec,
|
807
|
+
act_idx=idx,
|
808
|
+
ret_specifiers=True,
|
809
|
+
)
|
810
|
+
script_hash = action.get_script_determinant_hash(specs)
|
811
|
+
script_path = self.scripts_path / name
|
812
|
+
prev_path = seen.get(script_hash)
|
813
|
+
if script_path == prev_path:
|
814
|
+
continue
|
815
|
+
|
816
|
+
elif prev_path:
|
817
|
+
# try to make a symbolic link to the file previously
|
818
|
+
# created:
|
819
|
+
try:
|
820
|
+
script_path.symlink_to(prev_path.name)
|
821
|
+
except OSError:
|
822
|
+
# windows requires admin permission, copy instead:
|
823
|
+
shutil.copy(prev_path, script_path)
|
824
|
+
else:
|
825
|
+
# write script to disk:
|
826
|
+
source_str = action.compose_source(snip_path)
|
827
|
+
if source_str:
|
828
|
+
with script_path.open("wt", newline="\n") as fp:
|
829
|
+
fp.write(source_str)
|
830
|
+
seen[script_hash] = script_path
|
831
|
+
|
832
|
+
# combined script stuff
|
833
|
+
for js_idx, act_IDs in combined_actions.items():
|
834
|
+
for block_idx, act_IDs_i in enumerate(act_IDs):
|
835
|
+
for task_iID, act_idx in act_IDs_i:
|
836
|
+
task = self.workflow.tasks.get(insert_ID=task_iID)
|
837
|
+
schema = task.template.schemas[0] # TODO: multiple schemas
|
838
|
+
action = schema.actions[act_idx]
|
839
|
+
func_name, snip_path = action.get_script_artifact_name(
|
840
|
+
env_spec=combined_env_specs[js_idx],
|
841
|
+
act_idx=act_idx,
|
842
|
+
ret_specifiers=False,
|
843
|
+
include_suffix=False,
|
844
|
+
specs_suffix_delim="_", # can't use "." in function name
|
845
|
+
)
|
846
|
+
combined_script_data[js_idx][block_idx].append(
|
847
|
+
(func_name, snip_path, action.requires_dir)
|
848
|
+
)
|
849
|
+
|
850
|
+
for js_idx, action_scripts in combined_script_data.items():
|
851
|
+
js = self.jobscripts[js_idx]
|
852
|
+
|
853
|
+
script_str, script_indices, num_elems, num_acts = js.compose_combined_script(
|
854
|
+
[i for _, i in sorted(action_scripts.items())]
|
855
|
+
)
|
856
|
+
js.write_script_indices_file(script_indices, num_elems, num_acts)
|
857
|
+
|
858
|
+
script_path = self.scripts_path / f"js_{js_idx}.py" # TODO: refactor name
|
859
|
+
with script_path.open("wt", newline="\n") as fp:
|
860
|
+
fp.write(script_str)
|
861
|
+
|
862
|
+
return run_cmd_file_names, run_indices, run_inp_files
|
863
|
+
|
864
|
+
@TimeIt.decorator
|
865
|
+
def _calculate_run_dir_indices(
|
866
|
+
self,
|
867
|
+
run_indices: np.ndarray,
|
868
|
+
cache: ObjectCache,
|
869
|
+
) -> tuple[np.ndarray, np.ndarray]:
|
870
|
+
|
871
|
+
assert cache.elements is not None
|
872
|
+
assert cache.iterations is not None
|
873
|
+
# get the multiplicities of all tasks, elements, iterations, and runs:
|
874
|
+
wk_num_tasks = self.workflow.num_tasks
|
875
|
+
task_num_elems = {}
|
876
|
+
elem_num_iters = {}
|
877
|
+
iter_num_acts = {}
|
878
|
+
iter_acts_num_runs = {}
|
879
|
+
for task in self.workflow.tasks:
|
880
|
+
elem_IDs = task.element_IDs
|
881
|
+
task_num_elems[task.insert_ID] = len(elem_IDs)
|
882
|
+
for elem_ID in elem_IDs:
|
883
|
+
iter_IDs = cache.elements[elem_ID].iteration_IDs
|
884
|
+
elem_num_iters[elem_ID] = len(iter_IDs)
|
885
|
+
for iter_ID in iter_IDs:
|
886
|
+
run_IDs = cache.iterations[iter_ID].EAR_IDs
|
887
|
+
if run_IDs: # the schema might have no actions
|
888
|
+
iter_num_acts[iter_ID] = len(run_IDs)
|
889
|
+
for act_idx, act_run_IDs in run_IDs.items():
|
890
|
+
iter_acts_num_runs[(iter_ID, act_idx)] = len(act_run_IDs)
|
891
|
+
else:
|
892
|
+
iter_num_acts[iter_ID] = 0
|
893
|
+
|
894
|
+
max_u8 = np.iinfo(np.uint8).max
|
895
|
+
max_u32 = np.iinfo(np.uint32).max
|
896
|
+
MAX_ELEMS_PER_DIR = 1000 # TODO: configurable (add `workflow_defaults` to Config)
|
897
|
+
MAX_ITERS_PER_DIR = 1000
|
898
|
+
requires_dir_idx = np.where(run_indices[:, -1] == 1)[0]
|
899
|
+
run_dir_arr = np.empty(requires_dir_idx.size, dtype=RUN_DIR_ARR_DTYPE)
|
900
|
+
run_ids = np.empty(requires_dir_idx.size, dtype=int)
|
901
|
+
|
902
|
+
elem_depths: dict[int, int] = {}
|
903
|
+
iter_depths: dict[int, int] = {}
|
904
|
+
for idx in range(requires_dir_idx.size):
|
905
|
+
row = run_indices[requires_dir_idx[idx]]
|
906
|
+
t_iID, e_id, i_id, r_id, e_idx, i_idx, a_idx, r_idx = row[:-1]
|
907
|
+
run_ids[idx] = r_id
|
908
|
+
|
909
|
+
num_elems_i = task_num_elems[t_iID]
|
910
|
+
num_iters_i = elem_num_iters[e_id]
|
911
|
+
num_acts_i = iter_num_acts[i_id] # see TODO below
|
912
|
+
num_runs_i = iter_acts_num_runs[(i_id, a_idx)]
|
913
|
+
|
914
|
+
e_depth = 1
|
915
|
+
if num_elems_i == 1:
|
916
|
+
e_idx = max_u32
|
917
|
+
elif num_elems_i > MAX_ELEMS_PER_DIR:
|
918
|
+
if (e_depth := elem_depths.get(t_iID, -1)) == -1:
|
919
|
+
e_depth = int(
|
920
|
+
np.ceil(np.log(num_elems_i) / np.log(MAX_ELEMS_PER_DIR))
|
921
|
+
)
|
922
|
+
elem_depths[t_iID] = e_depth
|
923
|
+
|
924
|
+
# TODO: i_idx should be either MAX or the iteration ID, which will index into
|
925
|
+
# a separate array to get the formatted loop indices e.g.
|
926
|
+
# ("outer_loop_0_inner_loop_9")
|
927
|
+
i_depth = 1
|
928
|
+
if num_iters_i == 1:
|
929
|
+
i_idx = max_u32
|
930
|
+
elif num_iters_i > MAX_ITERS_PER_DIR:
|
931
|
+
if (i_depth := iter_depths.get(e_id, -1)) == -1:
|
932
|
+
i_depth = int(
|
933
|
+
np.ceil(np.log(num_iters_i) / np.log(MAX_ITERS_PER_DIR))
|
934
|
+
)
|
935
|
+
iter_depths[e_id] = i_depth
|
936
|
+
|
937
|
+
a_idx = max_u8 # TODO: for now, always exclude action index dir
|
938
|
+
|
939
|
+
if num_runs_i == 1:
|
940
|
+
r_idx = max_u8
|
941
|
+
|
942
|
+
if wk_num_tasks == 1:
|
943
|
+
t_iID = max_u8
|
944
|
+
|
945
|
+
run_dir_arr[idx] = (t_iID, e_idx, i_idx, a_idx, r_idx, e_depth, i_depth)
|
946
|
+
|
947
|
+
return run_dir_arr, run_ids
|
948
|
+
|
949
|
+
@TimeIt.decorator
|
950
|
+
def _write_execute_dirs(
|
951
|
+
self,
|
952
|
+
run_indices: NDArray,
|
953
|
+
run_inp_files: dict[int, list[Path]],
|
954
|
+
cache: ObjectCache,
|
955
|
+
status: Status | None = None,
|
956
|
+
):
|
957
|
+
|
958
|
+
if status:
|
959
|
+
status.update("Adding new submission: resolving execution directories...")
|
960
|
+
|
961
|
+
run_dir_arr, run_idx = self._calculate_run_dir_indices(run_indices, cache)
|
962
|
+
|
963
|
+
# set run dirs in persistent array:
|
964
|
+
if run_idx.size:
|
965
|
+
self.workflow._store.set_run_dirs(run_dir_arr, run_idx)
|
966
|
+
|
967
|
+
# retrieve run directories as paths. array is not yet commited, so pass in
|
968
|
+
# directly:
|
969
|
+
run_dirs = self.workflow.get_run_directories(dir_indices_arr=run_dir_arr)
|
970
|
+
|
971
|
+
if status:
|
972
|
+
status.update("Adding new submission: making execution directories...")
|
973
|
+
|
974
|
+
# make directories
|
975
|
+
for idx, run_dir in enumerate(run_dirs):
|
976
|
+
assert run_dir
|
977
|
+
run_dir.mkdir(parents=True, exist_ok=True)
|
978
|
+
inp_files_i = run_inp_files.get(run_idx[idx])
|
979
|
+
if inp_files_i:
|
980
|
+
# copy (TODO: optionally symlink) any input files:
|
981
|
+
for path_i in inp_files_i:
|
982
|
+
shutil.copy(path_i, run_dir)
|
389
983
|
|
390
984
|
@staticmethod
|
391
985
|
def get_unique_schedulers_of_jobscripts(
|
@@ -393,7 +987,7 @@ class Submission(JSONLike):
|
|
393
987
|
) -> Iterable[tuple[tuple[tuple[int, int], ...], Scheduler]]:
|
394
988
|
"""Get unique schedulers and which of the passed jobscripts they correspond to.
|
395
989
|
|
396
|
-
Uniqueness is
|
990
|
+
Uniqueness is determined only by the `QueuedScheduler.unique_properties` tuple.
|
397
991
|
|
398
992
|
Parameters
|
399
993
|
----------
|
@@ -463,13 +1057,90 @@ class Submission(JSONLike):
|
|
463
1057
|
|
464
1058
|
return zip(map(tuple, js_idx), shells)
|
465
1059
|
|
466
|
-
def
|
467
|
-
|
468
|
-
|
1060
|
+
def _update_at_submit_metadata(self, submission_parts: dict[str, list[int]]):
|
1061
|
+
"""Update persistent store and in-memory record of at-submit metadata.
|
1062
|
+
|
1063
|
+
Notes
|
1064
|
+
-----
|
1065
|
+
Currently there is only one type of at-submit metadata, which is the
|
1066
|
+
submission-parts: a mapping between a string submit-time, and the list of
|
1067
|
+
jobscript indices that were submitted at that submit-time. This method updates
|
1068
|
+
the recorded submission parts to include those passed here.
|
1069
|
+
|
1070
|
+
"""
|
1071
|
+
|
1072
|
+
self.workflow._store.update_at_submit_metadata(
|
469
1073
|
sub_idx=self.index,
|
470
|
-
|
471
|
-
|
1074
|
+
submission_parts=submission_parts,
|
1075
|
+
)
|
1076
|
+
|
1077
|
+
self._at_submit_metadata["submission_parts"].update(submission_parts)
|
1078
|
+
|
1079
|
+
# cache is now invalid:
|
1080
|
+
self._submission_parts_lst = None
|
1081
|
+
|
1082
|
+
def _append_submission_part(self, submit_time: str, submitted_js_idx: list[int]):
|
1083
|
+
self._update_at_submit_metadata(submission_parts={submit_time: submitted_js_idx})
|
1084
|
+
|
1085
|
+
def get_jobscript_functions_name(self, shell: Shell, shell_idx: int) -> str:
|
1086
|
+
"""Get the name of the jobscript functions file for the specified shell."""
|
1087
|
+
return f"js_funcs_{shell_idx}{shell.JS_EXT}"
|
1088
|
+
|
1089
|
+
def get_jobscript_functions_path(self, shell: Shell, shell_idx: int) -> Path:
|
1090
|
+
"""Get the path of the jobscript functions file for the specified shell."""
|
1091
|
+
return self.js_funcs_path / self.get_jobscript_functions_name(shell, shell_idx)
|
1092
|
+
|
1093
|
+
def _compose_functions_file(self, shell: Shell) -> str:
|
1094
|
+
"""Prepare the contents of the jobscript functions file for the specified
|
1095
|
+
shell.
|
1096
|
+
|
1097
|
+
Notes
|
1098
|
+
-----
|
1099
|
+
The functions file includes, at a minimum, a shell function that invokes the app
|
1100
|
+
with provided arguments. This file will be sourced/invoked within all jobscripts
|
1101
|
+
and command files that share the specified shell.
|
1102
|
+
|
1103
|
+
"""
|
1104
|
+
|
1105
|
+
cfg_invocation = self._app.config._file.get_invocation(
|
1106
|
+
self._app.config._config_key
|
472
1107
|
)
|
1108
|
+
env_setup = cfg_invocation["environment_setup"]
|
1109
|
+
if env_setup:
|
1110
|
+
env_setup = indent(env_setup.strip(), shell.JS_ENV_SETUP_INDENT)
|
1111
|
+
env_setup += "\n\n" + shell.JS_ENV_SETUP_INDENT
|
1112
|
+
else:
|
1113
|
+
env_setup = shell.JS_ENV_SETUP_INDENT
|
1114
|
+
app_invoc = list(self._app.run_time_info.invocation_command)
|
1115
|
+
|
1116
|
+
app_caps = self._app.package_name.upper()
|
1117
|
+
func_file_args = shell.process_JS_header_args( # TODO: rename?
|
1118
|
+
{
|
1119
|
+
"workflow_app_alias": self.WORKFLOW_APP_ALIAS,
|
1120
|
+
"env_setup": env_setup,
|
1121
|
+
"app_invoc": app_invoc,
|
1122
|
+
"app_caps": app_caps,
|
1123
|
+
"config_dir": str(self._app.config.config_directory),
|
1124
|
+
"config_invoc_key": self._app.config.config_key,
|
1125
|
+
}
|
1126
|
+
)
|
1127
|
+
out = shell.JS_FUNCS.format(**func_file_args)
|
1128
|
+
return out
|
1129
|
+
|
1130
|
+
def _write_functions_file(self, shell: Shell, shell_idx: int) -> None:
|
1131
|
+
"""Write the jobscript functions file for the specified shell.
|
1132
|
+
|
1133
|
+
Notes
|
1134
|
+
-----
|
1135
|
+
The functions file includes, at a minimum, a shell function that invokes the app
|
1136
|
+
with provided arguments. This file will be sourced/invoked within all jobscripts
|
1137
|
+
and command files that share the specified shell.
|
1138
|
+
|
1139
|
+
"""
|
1140
|
+
js_funcs_str = self._compose_functions_file(shell)
|
1141
|
+
path = self.get_jobscript_functions_path(shell, shell_idx)
|
1142
|
+
with path.open("wt", newline="\n") as fp:
|
1143
|
+
fp.write(js_funcs_str)
|
473
1144
|
|
474
1145
|
@TimeIt.decorator
|
475
1146
|
def submit(
|
@@ -481,24 +1152,9 @@ class Submission(JSONLike):
|
|
481
1152
|
) -> list[int]:
|
482
1153
|
"""Generate and submit the jobscripts of this submission."""
|
483
1154
|
|
484
|
-
#
|
485
|
-
|
486
|
-
if
|
487
|
-
if not supports_JS_para:
|
488
|
-
if status:
|
489
|
-
status.stop()
|
490
|
-
raise ValueError(
|
491
|
-
f"Store type {self.workflow._store!r} does not support jobscript "
|
492
|
-
f"parallelism."
|
493
|
-
)
|
494
|
-
elif self.JS_parallelism is None:
|
495
|
-
self._JS_parallelism = supports_JS_para
|
496
|
-
|
497
|
-
# set os_name and shell_name for each jobscript:
|
498
|
-
for js in self.jobscripts:
|
499
|
-
js._set_os_name()
|
500
|
-
js._set_shell_name()
|
501
|
-
js._set_scheduler_name()
|
1155
|
+
# TODO: support passing list of jobscript indices to submit; this will allow us
|
1156
|
+
# to test a submision with multiple "submission parts". would also need to check
|
1157
|
+
# dependencies if this customised list is passed
|
502
1158
|
|
503
1159
|
outstanding = self.outstanding_jobscripts
|
504
1160
|
|
@@ -516,7 +1172,8 @@ class Submission(JSONLike):
|
|
516
1172
|
if js_idx in outstanding:
|
517
1173
|
js_vers_info.setdefault(js_idx, {}).update(vers_info)
|
518
1174
|
|
519
|
-
|
1175
|
+
js_shell_indices = {}
|
1176
|
+
for shell_idx, (js_indices_2, shell) in enumerate(self.get_unique_shells()):
|
520
1177
|
try:
|
521
1178
|
vers_info = shell.get_version_info()
|
522
1179
|
except Exception:
|
@@ -526,22 +1183,22 @@ class Submission(JSONLike):
|
|
526
1183
|
for js_idx in js_indices_2:
|
527
1184
|
if js_idx in outstanding:
|
528
1185
|
js_vers_info.setdefault(js_idx, {}).update(vers_info)
|
1186
|
+
js_shell_indices[js_idx] = shell_idx
|
529
1187
|
|
1188
|
+
# write a file containing useful shell functions:
|
1189
|
+
self._write_functions_file(shell, shell_idx)
|
1190
|
+
|
1191
|
+
hostname = socket.gethostname()
|
1192
|
+
machine = self._app.config.get("machine")
|
530
1193
|
for js_idx, vers_info_i in js_vers_info.items():
|
531
|
-
self.jobscripts[js_idx]
|
1194
|
+
js = self.jobscripts[js_idx]
|
1195
|
+
js._set_version_info(vers_info_i)
|
1196
|
+
js._set_submit_hostname(hostname)
|
1197
|
+
js._set_submit_machine(machine)
|
1198
|
+
js._set_shell_idx(js_shell_indices[js_idx])
|
532
1199
|
|
533
|
-
# for direct submission, it's important that os_name/shell_name/scheduler_name
|
534
|
-
# are made persistent now, because `Workflow.write_commands`, which might be
|
535
|
-
# invoked in a new process before submission has completed, needs to know these:
|
536
1200
|
self.workflow._store._pending.commit_all()
|
537
1201
|
|
538
|
-
# TODO: a submission should only be "submitted" once shouldn't it?
|
539
|
-
# no; there could be an IO error (e.g. internet connectivity), so might
|
540
|
-
# need to be able to reattempt submission of outstanding jobscripts.
|
541
|
-
self.path.mkdir(exist_ok=True)
|
542
|
-
if not self.abort_EARs_file_path.is_file():
|
543
|
-
self._write_abort_EARs_file()
|
544
|
-
|
545
1202
|
# map jobscript `index` to (scheduler job ID or process ID, is_array):
|
546
1203
|
scheduler_refs: dict[int, tuple[str, bool]] = {}
|
547
1204
|
submitted_js_idx: list[int] = []
|
@@ -553,14 +1210,20 @@ class Submission(JSONLike):
|
|
553
1210
|
|
554
1211
|
# check all dependencies were submitted now or previously:
|
555
1212
|
if not all(
|
556
|
-
|
557
|
-
for
|
1213
|
+
js_idx in submitted_js_idx or js_idx in self.submitted_jobscripts
|
1214
|
+
for js_idx, _ in js.dependencies
|
558
1215
|
):
|
1216
|
+
warnings.warn(
|
1217
|
+
f"Cannot submit jobscript index {js.index} since not all of its "
|
1218
|
+
f"dependencies have been submitted: {js.dependencies!r}"
|
1219
|
+
)
|
559
1220
|
continue
|
560
1221
|
|
561
1222
|
try:
|
562
1223
|
if status:
|
563
|
-
status.update(
|
1224
|
+
status.update(
|
1225
|
+
f"Submitting jobscript {js.index + 1}/{len(self.jobscripts)}..."
|
1226
|
+
)
|
564
1227
|
js_ref_i = js.submit(scheduler_refs, print_stdout=print_stdout)
|
565
1228
|
scheduler_refs[js.index] = (js_ref_i, js.is_array)
|
566
1229
|
submitted_js_idx.append(js.index)
|
@@ -569,12 +1232,18 @@ class Submission(JSONLike):
|
|
569
1232
|
errs.append(err)
|
570
1233
|
continue
|
571
1234
|
|
1235
|
+
# TODO: some way to handle KeyboardInterrupt during submission?
|
1236
|
+
# - stop, and cancel already submitted?
|
1237
|
+
|
572
1238
|
if submitted_js_idx:
|
573
1239
|
dt_str = current_timestamp().strftime(self._app._submission_ts_fmt)
|
574
1240
|
self._append_submission_part(
|
575
1241
|
submit_time=dt_str,
|
576
1242
|
submitted_js_idx=submitted_js_idx,
|
577
1243
|
)
|
1244
|
+
# ensure `_submission_parts` is committed
|
1245
|
+
self.workflow._store._pending.commit_all()
|
1246
|
+
|
578
1247
|
# add a record of the submission part to the known-submissions file
|
579
1248
|
if add_to_known:
|
580
1249
|
self._app._add_to_known_submissions(
|
@@ -606,11 +1275,75 @@ class Submission(JSONLike):
|
|
606
1275
|
# filter by active jobscripts:
|
607
1276
|
if js_idx := [i[1] for i in js_indices if i[1] in act_js]:
|
608
1277
|
print(
|
609
|
-
f"Cancelling jobscripts {js_idx
|
610
|
-
f"workflow {self.workflow.name!r}."
|
1278
|
+
f"Cancelling jobscripts {shorten_list_str(js_idx, items=5)} of "
|
1279
|
+
f"submission {self.index} of workflow {self.workflow.name!r}."
|
611
1280
|
)
|
612
1281
|
jobscripts = [self.jobscripts[i] for i in js_idx]
|
613
1282
|
sched_refs = [js.scheduler_js_ref for js in jobscripts]
|
614
1283
|
sched.cancel_jobs(js_refs=sched_refs, jobscripts=jobscripts)
|
615
1284
|
else:
|
616
1285
|
print("No active jobscripts to cancel.")
|
1286
|
+
|
1287
|
+
@TimeIt.decorator
|
1288
|
+
def get_scheduler_job_IDs(self) -> tuple[str, ...]:
|
1289
|
+
"""Return jobscript scheduler job IDs."""
|
1290
|
+
return tuple(
|
1291
|
+
js_i.scheduler_job_ID
|
1292
|
+
for js_i in self.jobscripts
|
1293
|
+
if js_i.scheduler_job_ID is not None
|
1294
|
+
)
|
1295
|
+
|
1296
|
+
@TimeIt.decorator
|
1297
|
+
def get_process_IDs(self) -> tuple[int, ...]:
|
1298
|
+
"""Return jobscript process IDs."""
|
1299
|
+
return tuple(
|
1300
|
+
js_i.process_ID for js_i in self.jobscripts if js_i.process_ID is not None
|
1301
|
+
)
|
1302
|
+
|
1303
|
+
@TimeIt.decorator
|
1304
|
+
def list_jobscripts(
|
1305
|
+
self,
|
1306
|
+
max_js: int | None = None,
|
1307
|
+
jobscripts: list[int] | None = None,
|
1308
|
+
width: int | None = None,
|
1309
|
+
) -> None:
|
1310
|
+
"""Print a table listing jobscripts and associated information.
|
1311
|
+
|
1312
|
+
Parameters
|
1313
|
+
----------
|
1314
|
+
max_js
|
1315
|
+
Maximum jobscript index to display. This cannot be specified with `jobscripts`.
|
1316
|
+
jobscripts
|
1317
|
+
A list of jobscripts to display. This cannot be specified with `max_js`.
|
1318
|
+
width
|
1319
|
+
Width in characters of the printed table.
|
1320
|
+
|
1321
|
+
"""
|
1322
|
+
self.workflow.list_jobscripts(
|
1323
|
+
sub_idx=self.index, max_js=max_js, jobscripts=jobscripts, width=width
|
1324
|
+
)
|
1325
|
+
|
1326
|
+
@TimeIt.decorator
|
1327
|
+
def list_task_jobscripts(
|
1328
|
+
self,
|
1329
|
+
task_names: list[str] | None = None,
|
1330
|
+
max_js: int | None = None,
|
1331
|
+
width: int | None = None,
|
1332
|
+
) -> None:
|
1333
|
+
"""Print a table listing the jobscripts associated with the specified (or all)
|
1334
|
+
tasks for the specified submission.
|
1335
|
+
|
1336
|
+
Parameters
|
1337
|
+
----------
|
1338
|
+
task_names
|
1339
|
+
List of sub-strings to match to task names. Only matching task names will be
|
1340
|
+
included.
|
1341
|
+
max_js
|
1342
|
+
Maximum jobscript index to display.
|
1343
|
+
width
|
1344
|
+
Width in characters of the printed table.
|
1345
|
+
|
1346
|
+
"""
|
1347
|
+
self.workflow.list_task_jobscripts(
|
1348
|
+
sub_idx=self.index, max_js=max_js, task_names=task_names, width=width
|
1349
|
+
)
|