hpcflow-new2 0.2.0a190__py3-none-any.whl → 0.2.0a199__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hpcflow/__pyinstaller/hook-hpcflow.py +1 -0
- hpcflow/_version.py +1 -1
- hpcflow/data/scripts/bad_script.py +2 -0
- hpcflow/data/scripts/do_nothing.py +2 -0
- hpcflow/data/scripts/env_specifier_test/input_file_generator_pass_env_spec.py +4 -0
- hpcflow/data/scripts/env_specifier_test/main_script_test_pass_env_spec.py +8 -0
- hpcflow/data/scripts/env_specifier_test/output_file_parser_pass_env_spec.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v1/input_file_generator_basic.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v1/main_script_test_direct_in_direct_out.py +7 -0
- hpcflow/data/scripts/env_specifier_test/v1/output_file_parser_basic.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v2/main_script_test_direct_in_direct_out.py +7 -0
- hpcflow/data/scripts/input_file_generator_basic.py +3 -0
- hpcflow/data/scripts/input_file_generator_basic_FAIL.py +3 -0
- hpcflow/data/scripts/input_file_generator_test_stdout_stderr.py +8 -0
- hpcflow/data/scripts/main_script_test_direct_in.py +3 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed_group.py +7 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_group_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_group_one_fail_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_hdf5_in_obj_2.py +12 -0
- hpcflow/data/scripts/main_script_test_json_out_FAIL.py +3 -0
- hpcflow/data/scripts/main_script_test_shell_env_vars.py +12 -0
- hpcflow/data/scripts/main_script_test_std_out_std_err.py +6 -0
- hpcflow/data/scripts/output_file_parser_basic.py +3 -0
- hpcflow/data/scripts/output_file_parser_basic_FAIL.py +7 -0
- hpcflow/data/scripts/output_file_parser_test_stdout_stderr.py +8 -0
- hpcflow/data/scripts/script_exit_test.py +5 -0
- hpcflow/data/template_components/environments.yaml +1 -1
- hpcflow/sdk/__init__.py +5 -0
- hpcflow/sdk/app.py +150 -89
- hpcflow/sdk/cli.py +263 -84
- hpcflow/sdk/cli_common.py +99 -5
- hpcflow/sdk/config/callbacks.py +38 -1
- hpcflow/sdk/config/config.py +102 -13
- hpcflow/sdk/config/errors.py +19 -5
- hpcflow/sdk/config/types.py +3 -0
- hpcflow/sdk/core/__init__.py +25 -1
- hpcflow/sdk/core/actions.py +914 -262
- hpcflow/sdk/core/cache.py +76 -34
- hpcflow/sdk/core/command_files.py +14 -128
- hpcflow/sdk/core/commands.py +35 -6
- hpcflow/sdk/core/element.py +122 -50
- hpcflow/sdk/core/errors.py +58 -2
- hpcflow/sdk/core/execute.py +207 -0
- hpcflow/sdk/core/loop.py +408 -50
- hpcflow/sdk/core/loop_cache.py +4 -4
- hpcflow/sdk/core/parameters.py +382 -37
- hpcflow/sdk/core/run_dir_files.py +13 -40
- hpcflow/sdk/core/skip_reason.py +7 -0
- hpcflow/sdk/core/task.py +119 -30
- hpcflow/sdk/core/task_schema.py +68 -0
- hpcflow/sdk/core/test_utils.py +66 -27
- hpcflow/sdk/core/types.py +54 -1
- hpcflow/sdk/core/utils.py +78 -7
- hpcflow/sdk/core/workflow.py +1538 -336
- hpcflow/sdk/data/workflow_spec_schema.yaml +2 -0
- hpcflow/sdk/demo/cli.py +7 -0
- hpcflow/sdk/helper/cli.py +1 -0
- hpcflow/sdk/log.py +42 -15
- hpcflow/sdk/persistence/base.py +405 -53
- hpcflow/sdk/persistence/json.py +177 -52
- hpcflow/sdk/persistence/pending.py +237 -69
- hpcflow/sdk/persistence/store_resource.py +3 -2
- hpcflow/sdk/persistence/types.py +15 -4
- hpcflow/sdk/persistence/zarr.py +928 -81
- hpcflow/sdk/submission/jobscript.py +1408 -489
- hpcflow/sdk/submission/schedulers/__init__.py +40 -5
- hpcflow/sdk/submission/schedulers/direct.py +33 -19
- hpcflow/sdk/submission/schedulers/sge.py +51 -16
- hpcflow/sdk/submission/schedulers/slurm.py +44 -16
- hpcflow/sdk/submission/schedulers/utils.py +7 -2
- hpcflow/sdk/submission/shells/base.py +68 -20
- hpcflow/sdk/submission/shells/bash.py +222 -129
- hpcflow/sdk/submission/shells/powershell.py +200 -150
- hpcflow/sdk/submission/submission.py +852 -119
- hpcflow/sdk/submission/types.py +18 -21
- hpcflow/sdk/typing.py +24 -5
- hpcflow/sdk/utils/arrays.py +71 -0
- hpcflow/sdk/utils/deferred_file.py +55 -0
- hpcflow/sdk/utils/hashing.py +16 -0
- hpcflow/sdk/utils/patches.py +12 -0
- hpcflow/sdk/utils/strings.py +33 -0
- hpcflow/tests/api/test_api.py +32 -0
- hpcflow/tests/conftest.py +19 -0
- hpcflow/tests/data/multi_path_sequences.yaml +29 -0
- hpcflow/tests/data/workflow_test_run_abort.yaml +34 -35
- hpcflow/tests/schedulers/sge/test_sge_submission.py +36 -0
- hpcflow/tests/scripts/test_input_file_generators.py +282 -0
- hpcflow/tests/scripts/test_main_scripts.py +821 -70
- hpcflow/tests/scripts/test_non_snippet_script.py +46 -0
- hpcflow/tests/scripts/test_ouput_file_parsers.py +353 -0
- hpcflow/tests/shells/wsl/test_wsl_submission.py +6 -0
- hpcflow/tests/unit/test_action.py +176 -0
- hpcflow/tests/unit/test_app.py +20 -0
- hpcflow/tests/unit/test_cache.py +46 -0
- hpcflow/tests/unit/test_cli.py +133 -0
- hpcflow/tests/unit/test_config.py +122 -1
- hpcflow/tests/unit/test_element_iteration.py +47 -0
- hpcflow/tests/unit/test_jobscript_unit.py +757 -0
- hpcflow/tests/unit/test_loop.py +1332 -27
- hpcflow/tests/unit/test_meta_task.py +325 -0
- hpcflow/tests/unit/test_multi_path_sequences.py +229 -0
- hpcflow/tests/unit/test_parameter.py +13 -0
- hpcflow/tests/unit/test_persistence.py +190 -8
- hpcflow/tests/unit/test_run.py +109 -3
- hpcflow/tests/unit/test_run_directories.py +29 -0
- hpcflow/tests/unit/test_shell.py +20 -0
- hpcflow/tests/unit/test_submission.py +5 -76
- hpcflow/tests/unit/utils/test_arrays.py +40 -0
- hpcflow/tests/unit/utils/test_deferred_file_writer.py +34 -0
- hpcflow/tests/unit/utils/test_hashing.py +65 -0
- hpcflow/tests/unit/utils/test_patches.py +5 -0
- hpcflow/tests/unit/utils/test_redirect_std.py +50 -0
- hpcflow/tests/workflows/__init__.py +0 -0
- hpcflow/tests/workflows/test_directory_structure.py +31 -0
- hpcflow/tests/workflows/test_jobscript.py +332 -0
- hpcflow/tests/workflows/test_run_status.py +198 -0
- hpcflow/tests/workflows/test_skip_downstream.py +696 -0
- hpcflow/tests/workflows/test_submission.py +140 -0
- hpcflow/tests/workflows/test_workflows.py +142 -2
- hpcflow/tests/workflows/test_zip.py +18 -0
- hpcflow/viz_demo.ipynb +6587 -3
- {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a199.dist-info}/METADATA +7 -4
- hpcflow_new2-0.2.0a199.dist-info/RECORD +221 -0
- hpcflow_new2-0.2.0a190.dist-info/RECORD +0 -165
- {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a199.dist-info}/LICENSE +0 -0
- {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a199.dist-info}/WHEEL +0 -0
- {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a199.dist-info}/entry_points.txt +0 -0
hpcflow/sdk/core/workflow.py
CHANGED
@@ -4,28 +4,53 @@ Main workflow model.
|
|
4
4
|
|
5
5
|
from __future__ import annotations
|
6
6
|
from collections import defaultdict
|
7
|
+
from collections.abc import Callable
|
7
8
|
from contextlib import contextmanager, nullcontext
|
8
9
|
import copy
|
9
10
|
from dataclasses import dataclass, field
|
10
11
|
|
12
|
+
from functools import wraps
|
13
|
+
import os
|
11
14
|
from pathlib import Path
|
12
15
|
import random
|
16
|
+
import shutil
|
13
17
|
import string
|
14
18
|
from threading import Thread
|
15
19
|
import time
|
16
|
-
from typing import overload, cast, TYPE_CHECKING
|
20
|
+
from typing import overload, cast, TYPE_CHECKING, TypeVar
|
21
|
+
from typing_extensions import ParamSpec, Concatenate
|
22
|
+
|
17
23
|
from uuid import uuid4
|
18
24
|
from warnings import warn
|
19
25
|
from fsspec.implementations.local import LocalFileSystem # type: ignore
|
20
26
|
from fsspec.implementations.zip import ZipFileSystem # type: ignore
|
21
27
|
import numpy as np
|
22
28
|
from fsspec.core import url_to_fs # type: ignore
|
29
|
+
from rich import print as rich_print
|
23
30
|
import rich.console
|
31
|
+
import rich.panel
|
32
|
+
import rich.table
|
33
|
+
import rich.text
|
34
|
+
import rich.box
|
35
|
+
|
24
36
|
|
37
|
+
from hpcflow.sdk import app
|
25
38
|
from hpcflow.sdk.typing import hydrate
|
26
|
-
from hpcflow.sdk.
|
39
|
+
from hpcflow.sdk.config.errors import (
|
40
|
+
ConfigNonConfigurableError,
|
41
|
+
UnknownMetaTaskConstitutiveSchema,
|
42
|
+
)
|
43
|
+
from hpcflow.sdk.core import (
|
44
|
+
ALL_TEMPLATE_FORMATS,
|
45
|
+
ABORT_EXIT_CODE,
|
46
|
+
RUN_DIR_ARR_FILL,
|
47
|
+
SKIPPED_EXIT_CODE,
|
48
|
+
NO_COMMANDS_EXIT_CODE,
|
49
|
+
)
|
27
50
|
from hpcflow.sdk.core.app_aware import AppAware
|
28
51
|
from hpcflow.sdk.core.enums import EARStatus
|
52
|
+
from hpcflow.sdk.core.skip_reason import SkipReason
|
53
|
+
from hpcflow.sdk.core.cache import ObjectCache
|
29
54
|
from hpcflow.sdk.core.loop_cache import LoopCache, LoopIndex
|
30
55
|
from hpcflow.sdk.log import TimeIt
|
31
56
|
from hpcflow.sdk.persistence import store_cls_from_str
|
@@ -35,18 +60,22 @@ from hpcflow.sdk.persistence.utils import ask_pw_on_auth_exc, infer_store
|
|
35
60
|
from hpcflow.sdk.submission.jobscript import (
|
36
61
|
generate_EAR_resource_map,
|
37
62
|
group_resource_map_into_jobscripts,
|
38
|
-
|
63
|
+
is_jobscript_array,
|
39
64
|
merge_jobscripts_across_tasks,
|
65
|
+
resolve_jobscript_blocks,
|
40
66
|
resolve_jobscript_dependencies,
|
41
67
|
)
|
42
68
|
from hpcflow.sdk.submission.enums import JobscriptElementState
|
43
69
|
from hpcflow.sdk.submission.schedulers.direct import DirectScheduler
|
70
|
+
from hpcflow.sdk.submission.submission import Submission
|
44
71
|
from hpcflow.sdk.core.json_like import ChildObjectSpec, JSONLike
|
72
|
+
from hpcflow.sdk.utils.strings import shorten_list_str
|
45
73
|
from hpcflow.sdk.core.utils import (
|
46
74
|
read_JSON_file,
|
47
75
|
read_JSON_string,
|
48
76
|
read_YAML_str,
|
49
77
|
read_YAML_file,
|
78
|
+
redirect_std_to_file,
|
50
79
|
replace_items,
|
51
80
|
current_timestamp,
|
52
81
|
normalise_timestamp,
|
@@ -58,6 +87,7 @@ from hpcflow.sdk.core.errors import (
|
|
58
87
|
OutputFileParserNoOutputError,
|
59
88
|
RunNotAbortableError,
|
60
89
|
SubmissionFailure,
|
90
|
+
UnsetParameterDataErrorBase,
|
61
91
|
WorkflowSubmissionFailure,
|
62
92
|
)
|
63
93
|
|
@@ -70,7 +100,7 @@ if TYPE_CHECKING:
|
|
70
100
|
import psutil
|
71
101
|
from rich.status import Status
|
72
102
|
from ..typing import DataIndex, ParamSource, PathLike, TemplateComponents
|
73
|
-
from .actions import ElementActionRun
|
103
|
+
from .actions import ElementActionRun, UnsetParamTracker
|
74
104
|
from .element import Element, ElementIteration
|
75
105
|
from .loop import Loop, WorkflowLoop
|
76
106
|
from .object_list import ObjectList, ResourceList, WorkflowLoopList, WorkflowTaskList
|
@@ -82,6 +112,8 @@ if TYPE_CHECKING:
|
|
82
112
|
Pending,
|
83
113
|
Resources,
|
84
114
|
WorkflowTemplateTaskData,
|
115
|
+
WorkflowTemplateElementSetData,
|
116
|
+
BlockActionKey,
|
85
117
|
)
|
86
118
|
from ..submission.submission import Submission
|
87
119
|
from ..submission.jobscript import (
|
@@ -97,10 +129,15 @@ if TYPE_CHECKING:
|
|
97
129
|
StoreEAR,
|
98
130
|
)
|
99
131
|
from ..persistence.types import TemplateMeta
|
132
|
+
from .json_like import JSONed
|
100
133
|
|
101
134
|
#: Convenience alias
|
102
135
|
_TemplateComponents: TypeAlias = "dict[str, ObjectList[JSONLike]]"
|
103
136
|
|
137
|
+
P = ParamSpec("P")
|
138
|
+
T = TypeVar("T")
|
139
|
+
S = TypeVar("S", bound="Workflow")
|
140
|
+
|
104
141
|
|
105
142
|
@dataclass
|
106
143
|
class _Pathway:
|
@@ -202,6 +239,7 @@ class WorkflowTemplate(JSONLike):
|
|
202
239
|
workflow: Workflow | None = None
|
203
240
|
#: Template-level resources to apply to all tasks as default values.
|
204
241
|
resources: Resources = None
|
242
|
+
config: dict = field(default_factory=lambda: {})
|
205
243
|
#: The execution environments to use.
|
206
244
|
environments: Mapping[str, Mapping[str, Any]] | None = None
|
207
245
|
#: The environment presets to use.
|
@@ -216,6 +254,34 @@ class WorkflowTemplate(JSONLike):
|
|
216
254
|
merge_envs: bool = True
|
217
255
|
|
218
256
|
def __post_init__(self) -> None:
|
257
|
+
|
258
|
+
# TODO: in what scenario is the reindex required? are loops initialised?
|
259
|
+
|
260
|
+
# replace metatasks with tasks
|
261
|
+
new_tasks: list[Task] = []
|
262
|
+
do_reindex = False
|
263
|
+
reindex = {}
|
264
|
+
for task_idx, i in enumerate(self.tasks):
|
265
|
+
if isinstance(i, app.MetaTask):
|
266
|
+
do_reindex = True
|
267
|
+
tasks_from_meta = copy.deepcopy(i.tasks)
|
268
|
+
reindex[task_idx] = [
|
269
|
+
len(new_tasks) + i for i in range(len(tasks_from_meta))
|
270
|
+
]
|
271
|
+
new_tasks.extend(tasks_from_meta)
|
272
|
+
else:
|
273
|
+
reindex[task_idx] = [len(new_tasks)]
|
274
|
+
new_tasks.append(i)
|
275
|
+
if do_reindex:
|
276
|
+
if self.loops:
|
277
|
+
for loop_idx, loop in enumerate(cast("list[dict[str, Any]]", self.loops)):
|
278
|
+
loop["tasks"] = [j for i in loop["tasks"] for j in reindex[i]]
|
279
|
+
term_task = loop.get("termination_task")
|
280
|
+
if term_task is not None:
|
281
|
+
loop["termination_task"] = reindex[term_task][0]
|
282
|
+
|
283
|
+
self.tasks = new_tasks
|
284
|
+
|
219
285
|
resources = self._app.ResourceList.normalise(self.resources)
|
220
286
|
self.resources = resources
|
221
287
|
self._set_parent_refs()
|
@@ -235,6 +301,13 @@ class WorkflowTemplate(JSONLike):
|
|
235
301
|
if self.doc and not isinstance(self.doc, list):
|
236
302
|
self.doc = [self.doc]
|
237
303
|
|
304
|
+
if self.config:
|
305
|
+
# don't do a full validation (which would require loading the config file),
|
306
|
+
# just check all specified keys are configurable:
|
307
|
+
bad_keys = set(self.config) - set(self._app.config_options._configurable_keys)
|
308
|
+
if bad_keys:
|
309
|
+
raise ConfigNonConfigurableError(name=bad_keys)
|
310
|
+
|
238
311
|
@property
|
239
312
|
def _resources(self) -> ResourceList:
|
240
313
|
res = self.resources
|
@@ -324,22 +397,121 @@ class WorkflowTemplate(JSONLike):
|
|
324
397
|
@classmethod
|
325
398
|
@TimeIt.decorator
|
326
399
|
def _from_data(cls, data: dict[str, Any]) -> WorkflowTemplate:
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
400
|
+
def _normalise_task_parametrisation(task_lst: list[WorkflowTemplateTaskData]):
|
401
|
+
"""
|
402
|
+
For each dict in a list of task parametrisations, ensure the `schema` key is
|
403
|
+
a list of values, and ensure `element_sets` are defined.
|
404
|
+
|
405
|
+
This mutates `task_lst`.
|
406
|
+
|
407
|
+
"""
|
408
|
+
# use element_sets if not already:
|
409
|
+
task_dat: WorkflowTemplateTaskData
|
410
|
+
for task_idx, task_dat in enumerate(task_lst):
|
411
|
+
schema = task_dat.pop("schema")
|
412
|
+
schema_list: list = schema if isinstance(schema, list) else [schema]
|
413
|
+
if "element_sets" in task_dat:
|
414
|
+
# just update the schema to a list:
|
415
|
+
task_lst[task_idx]["schema"] = schema_list
|
416
|
+
else:
|
417
|
+
# add a single element set, and update the schema to a list:
|
418
|
+
out_labels = task_dat.pop("output_labels", [])
|
419
|
+
es_dat = cast("WorkflowTemplateElementSetData", task_dat)
|
420
|
+
new_task_dat: WorkflowTemplateTaskData = {
|
421
|
+
"schema": schema_list,
|
422
|
+
"element_sets": [es_dat],
|
423
|
+
"output_labels": out_labels,
|
424
|
+
}
|
425
|
+
task_lst[task_idx] = new_task_dat
|
426
|
+
# move sequences with `paths` (note: plural) to multi_path_sequences:
|
427
|
+
for elem_set in task_lst[task_idx]["element_sets"]:
|
428
|
+
new_mps = []
|
429
|
+
seqs = elem_set.get("sequences", [])
|
430
|
+
seqs = list(seqs) # copy
|
431
|
+
# loop in reverse so indices for pop are valid:
|
432
|
+
for seq_idx, seq_dat in zip(range(len(seqs) - 1, -1, -1), seqs[::-1]):
|
433
|
+
if "paths" in seq_dat: # (note: plural)
|
434
|
+
# move to a multi-path sequence:
|
435
|
+
new_mps.append(elem_set["sequences"].pop(seq_idx))
|
436
|
+
elem_set.setdefault("multi_path_sequences", []).extend(new_mps[::-1])
|
437
|
+
|
438
|
+
meta_tasks = data.pop("meta_tasks", {})
|
439
|
+
if meta_tasks:
|
440
|
+
for i in list(meta_tasks):
|
441
|
+
_normalise_task_parametrisation(meta_tasks[i])
|
442
|
+
new_task_dat: list[WorkflowTemplateTaskData] = []
|
443
|
+
reindex = {}
|
444
|
+
for task_idx, task_dat in enumerate(data["tasks"]):
|
445
|
+
if meta_task_dat := meta_tasks.get(task_dat["schema"]):
|
446
|
+
reindex[task_idx] = [
|
447
|
+
len(new_task_dat) + i for i in range(len(meta_task_dat))
|
448
|
+
]
|
449
|
+
|
450
|
+
all_schema_names = [j for i in meta_task_dat for j in i["schema"]]
|
451
|
+
|
452
|
+
# update any parametrisation provided in the task list:
|
453
|
+
base_data = copy.deepcopy(meta_task_dat)
|
454
|
+
|
455
|
+
# any other keys in `task_dat` should be mappings whose keys are
|
456
|
+
# the schema name (within the meta task) optionally suffixed by
|
457
|
+
# a period and the element set index to which the updates should be
|
458
|
+
# copied (no integer suffix indicates the zeroth element set):
|
459
|
+
for k, v in task_dat.items():
|
460
|
+
if k == "schema":
|
461
|
+
continue
|
462
|
+
|
463
|
+
for elem_set_id, dat in v.items():
|
464
|
+
|
465
|
+
elem_set_id_split = elem_set_id.split(".")
|
466
|
+
try:
|
467
|
+
es_idx = int(elem_set_id_split[-1])
|
468
|
+
schema_name = ".".join(elem_set_id_split[:-1])
|
469
|
+
except ValueError:
|
470
|
+
es_idx = 0
|
471
|
+
schema_name = ".".join(elem_set_id_split)
|
472
|
+
schema_name = schema_name.strip(".")
|
473
|
+
|
474
|
+
# check valid schema name:
|
475
|
+
if schema_name not in all_schema_names:
|
476
|
+
raise UnknownMetaTaskConstitutiveSchema(
|
477
|
+
f"Task schema with objective {schema_name!r} is not "
|
478
|
+
f"part of the meta-task with objective "
|
479
|
+
f"{task_dat['schema']!r}. The constitutive schemas of"
|
480
|
+
f" this meta-task have objectives: "
|
481
|
+
f"{all_schema_names!r}."
|
482
|
+
)
|
483
|
+
|
484
|
+
# copy `dat` to the correct schema and element set in the
|
485
|
+
# meta-task:
|
486
|
+
for s_idx, s in enumerate(base_data):
|
487
|
+
if s["schema"] == [schema_name]:
|
488
|
+
if k == "inputs":
|
489
|
+
# special case; merge inputs
|
490
|
+
base_data[s_idx]["element_sets"][es_idx][
|
491
|
+
k
|
492
|
+
].update(dat)
|
493
|
+
else:
|
494
|
+
# just overwrite
|
495
|
+
base_data[s_idx]["element_sets"][es_idx][k] = dat
|
496
|
+
|
497
|
+
new_task_dat.extend(base_data)
|
498
|
+
|
499
|
+
else:
|
500
|
+
reindex[task_idx] = [len(new_task_dat)]
|
501
|
+
new_task_dat.append(task_dat)
|
502
|
+
|
503
|
+
data["tasks"] = new_task_dat
|
504
|
+
|
505
|
+
if loops := data.get("loops"):
|
506
|
+
for loop_idx, loop in enumerate(loops):
|
507
|
+
loops[loop_idx]["tasks"] = [
|
508
|
+
j for i in loop["tasks"] for j in reindex[i]
|
509
|
+
]
|
510
|
+
term_task = loop.get("termination_task")
|
511
|
+
if term_task is not None:
|
512
|
+
loops[loop_idx]["termination_task"] = reindex[term_task][0]
|
513
|
+
|
514
|
+
_normalise_task_parametrisation(data["tasks"])
|
343
515
|
|
344
516
|
# extract out any template components:
|
345
517
|
# TODO: TypedDict for data
|
@@ -368,7 +540,24 @@ class WorkflowTemplate(JSONLike):
|
|
368
540
|
)
|
369
541
|
cls._app.task_schemas.add_objects(task_schemas, skip_duplicates=True)
|
370
542
|
|
371
|
-
|
543
|
+
if mts_dat := tcs.pop("meta_task_schemas", []):
|
544
|
+
meta_ts = [
|
545
|
+
cls._app.MetaTaskSchema.from_json_like(
|
546
|
+
i, shared_data=cls._app.template_components
|
547
|
+
)
|
548
|
+
for i in mts_dat
|
549
|
+
]
|
550
|
+
cls._app.task_schemas.add_objects(meta_ts, skip_duplicates=True)
|
551
|
+
|
552
|
+
wkt = cls.from_json_like(data, shared_data=cls._app._shared_data)
|
553
|
+
|
554
|
+
# print(f"WorkflowTemplate._from_data: {wkt=!r}")
|
555
|
+
# TODO: what is this for!?
|
556
|
+
# for idx, task in enumerate(wkt.tasks):
|
557
|
+
# if isinstance(task.schema, cls._app.MetaTaskSchema):
|
558
|
+
# print(f"{task=!r}")
|
559
|
+
# wkt.tasks[idx] = cls._app.MetaTask(schema=task.schema, tasks=task.tasks)
|
560
|
+
return wkt
|
372
561
|
|
373
562
|
@classmethod
|
374
563
|
@TimeIt.decorator
|
@@ -571,6 +760,25 @@ class _IterationData:
|
|
571
760
|
idx: int
|
572
761
|
|
573
762
|
|
763
|
+
def load_workflow_config(
|
764
|
+
func: Callable[Concatenate[S, P], T],
|
765
|
+
) -> Callable[Concatenate[S, P], T]:
|
766
|
+
"""Decorator to apply workflow-level config items during execution of a Workflow
|
767
|
+
method."""
|
768
|
+
|
769
|
+
@wraps(func)
|
770
|
+
def wrapped(self: S, *args: P.args, **kwargs: P.kwargs) -> T:
|
771
|
+
|
772
|
+
updates = self.template.config
|
773
|
+
if updates:
|
774
|
+
with self._app.config._with_updates(updates):
|
775
|
+
return func(self, *args, **kwargs)
|
776
|
+
else:
|
777
|
+
return func(self, *args, **kwargs)
|
778
|
+
|
779
|
+
return wrapped
|
780
|
+
|
781
|
+
|
574
782
|
class Workflow(AppAware):
|
575
783
|
"""
|
576
784
|
A concrete workflow.
|
@@ -630,9 +838,18 @@ class Workflow(AppAware):
|
|
630
838
|
self._store = store_cls(self._app, self, self.path, fs)
|
631
839
|
self._in_batch_mode = False # flag to track when processing batch updates
|
632
840
|
|
841
|
+
self._use_merged_parameters_cache = False
|
842
|
+
self._merged_parameters_cache: dict[
|
843
|
+
tuple[str | None, tuple[tuple[str, tuple[int, ...] | int], ...]], Any
|
844
|
+
] = {}
|
845
|
+
|
633
846
|
# store indices of updates during batch update, so we can revert on failure:
|
634
847
|
self._pending = self._get_empty_pending()
|
635
848
|
|
849
|
+
# reassigned within `ElementActionRun.raise_on_failure_threshold` context manager:
|
850
|
+
self._is_tracking_unset: bool = False
|
851
|
+
self._tracked_unset: dict[str, UnsetParamTracker] | None = None
|
852
|
+
|
636
853
|
def reload(self) -> Self:
|
637
854
|
"""Reload the workflow from disk."""
|
638
855
|
return self.__class__(self.url)
|
@@ -743,7 +960,12 @@ class Workflow(AppAware):
|
|
743
960
|
f"{len(template.loops)} ({loop.name!r})"
|
744
961
|
)
|
745
962
|
wk._add_loop(loop, cache=cache, status=status)
|
746
|
-
|
963
|
+
if status:
|
964
|
+
status.update(
|
965
|
+
f"Added {len(template.loops)} loops. "
|
966
|
+
f"Committing to store..."
|
967
|
+
)
|
968
|
+
except (Exception, NotImplementedError):
|
747
969
|
if status:
|
748
970
|
status.stop()
|
749
971
|
raise
|
@@ -820,6 +1042,7 @@ class Workflow(AppAware):
|
|
820
1042
|
ts_name_fmt: str | None = None,
|
821
1043
|
store_kwargs: dict[str, Any] | None = None,
|
822
1044
|
variables: dict[str, str] | None = None,
|
1045
|
+
status: Status | None = None,
|
823
1046
|
) -> Workflow:
|
824
1047
|
"""Generate from a YAML string.
|
825
1048
|
|
@@ -864,6 +1087,7 @@ class Workflow(AppAware):
|
|
864
1087
|
ts_fmt,
|
865
1088
|
ts_name_fmt,
|
866
1089
|
store_kwargs,
|
1090
|
+
status,
|
867
1091
|
)
|
868
1092
|
|
869
1093
|
@classmethod
|
@@ -1066,6 +1290,7 @@ class Workflow(AppAware):
|
|
1066
1290
|
tasks: list[Task] | None = None,
|
1067
1291
|
loops: list[Loop] | None = None,
|
1068
1292
|
resources: Resources = None,
|
1293
|
+
config: dict | None = None,
|
1069
1294
|
path: PathLike | None = None,
|
1070
1295
|
workflow_name: str | None = None,
|
1071
1296
|
overwrite: bool = False,
|
@@ -1089,6 +1314,9 @@ class Workflow(AppAware):
|
|
1089
1314
|
Mapping of action scopes to resource requirements, to be applied to all
|
1090
1315
|
element sets in the workflow. `resources` specified in an element set take
|
1091
1316
|
precedence of those defined here for the whole workflow.
|
1317
|
+
config:
|
1318
|
+
Configuration items that should be set whenever the resulting workflow is
|
1319
|
+
loaded. This includes config items that apply during workflow execution.
|
1092
1320
|
path:
|
1093
1321
|
The directory in which the workflow will be generated. The current directory
|
1094
1322
|
if not specified.
|
@@ -1116,6 +1344,7 @@ class Workflow(AppAware):
|
|
1116
1344
|
tasks=tasks or [],
|
1117
1345
|
loops=loops or [],
|
1118
1346
|
resources=resources,
|
1347
|
+
config=config or {},
|
1119
1348
|
)
|
1120
1349
|
return cls.from_template(
|
1121
1350
|
template,
|
@@ -1248,6 +1477,7 @@ class Workflow(AppAware):
|
|
1248
1477
|
self._store.add_loop(
|
1249
1478
|
loop_template=cast("Mapping", loop_js),
|
1250
1479
|
iterable_parameters=wk_loop.iterable_parameters,
|
1480
|
+
output_parameters=wk_loop.output_parameters,
|
1251
1481
|
parents=wk_loop.parents,
|
1252
1482
|
num_added_iterations=wk_loop.num_added_iterations,
|
1253
1483
|
iter_IDs=iter_IDs,
|
@@ -1275,7 +1505,7 @@ class Workflow(AppAware):
|
|
1275
1505
|
status.update(
|
1276
1506
|
f"{status_prev}: iteration {iter_idx + 2}/{loop.num_iterations}."
|
1277
1507
|
)
|
1278
|
-
new_wk_loop.add_iteration(cache=cache_)
|
1508
|
+
new_wk_loop.add_iteration(cache=cache_, status=status)
|
1279
1509
|
|
1280
1510
|
def add_loop(self, loop: Loop) -> None:
|
1281
1511
|
"""Add a loop to a subset of workflow tasks."""
|
@@ -1360,6 +1590,7 @@ class Workflow(AppAware):
|
|
1360
1590
|
return self._template
|
1361
1591
|
|
1362
1592
|
@property
|
1593
|
+
@TimeIt.decorator
|
1363
1594
|
def tasks(self) -> WorkflowTaskList:
|
1364
1595
|
"""
|
1365
1596
|
The tasks in this workflow.
|
@@ -1410,12 +1641,14 @@ class Workflow(AppAware):
|
|
1410
1641
|
repack_iteration_tuples(loop_dat["num_added_iterations"])
|
1411
1642
|
),
|
1412
1643
|
iterable_parameters=loop_dat["iterable_parameters"],
|
1644
|
+
output_parameters=loop_dat["output_parameters"],
|
1413
1645
|
)
|
1414
1646
|
for idx, loop_dat in self._store.get_loops().items()
|
1415
1647
|
)
|
1416
1648
|
return self._loops
|
1417
1649
|
|
1418
1650
|
@property
|
1651
|
+
@TimeIt.decorator
|
1419
1652
|
def submissions(self) -> list[Submission]:
|
1420
1653
|
"""
|
1421
1654
|
The job submissions done by this workflow.
|
@@ -1587,56 +1820,70 @@ class Workflow(AppAware):
|
|
1587
1820
|
|
1588
1821
|
@TimeIt.decorator
|
1589
1822
|
def get_EARs_from_IDs(
|
1590
|
-
self, ids: Iterable[int] | int
|
1591
|
-
) -> list[ElementActionRun] | ElementActionRun:
|
1823
|
+
self, ids: Iterable[int] | int, as_dict: bool = False
|
1824
|
+
) -> list[ElementActionRun] | dict[int, ElementActionRun] | ElementActionRun:
|
1592
1825
|
"""Get element action run objects from a list of IDs."""
|
1593
1826
|
id_lst = [ids] if isinstance(ids, int) else list(ids)
|
1594
|
-
self._app.persistence_logger.debug(f"get_EARs_from_IDs: id_lst={id_lst!r}")
|
1595
1827
|
|
1596
|
-
|
1597
|
-
store_iters = self.get_store_element_iterations(
|
1598
|
-
ear.elem_iter_ID for ear in store_EARs
|
1599
|
-
)
|
1600
|
-
store_elems = self.get_store_elements(it.element_ID for it in store_iters)
|
1601
|
-
store_tasks = self.get_store_tasks(el.task_ID for el in store_elems)
|
1828
|
+
with self._store.cached_load(), self._store.cache_ctx():
|
1602
1829
|
|
1603
|
-
|
1604
|
-
|
1605
|
-
|
1606
|
-
lambda: defaultdict(set)
|
1607
|
-
)
|
1830
|
+
self._app.persistence_logger.debug(
|
1831
|
+
f"get_EARs_from_IDs: {len(id_lst)} EARs: {shorten_list_str(id_lst)}."
|
1832
|
+
)
|
1608
1833
|
|
1609
|
-
|
1610
|
-
|
1611
|
-
|
1612
|
-
run_idx = it.EAR_IDs[act_idx].index(rn.id_) if it.EAR_IDs is not None else -1
|
1613
|
-
iter_idx = el.iteration_IDs.index(it.id_)
|
1614
|
-
elem_idx = tk.element_IDs.index(el.id_)
|
1615
|
-
index_paths.append(
|
1616
|
-
Workflow._IndexPath3(run_idx, act_idx, iter_idx, elem_idx, tk.index)
|
1834
|
+
store_EARs = self.get_store_EARs(id_lst)
|
1835
|
+
store_iters = self.get_store_element_iterations(
|
1836
|
+
ear.elem_iter_ID for ear in store_EARs
|
1617
1837
|
)
|
1618
|
-
|
1619
|
-
|
1620
|
-
|
1621
|
-
|
1622
|
-
|
1623
|
-
|
1624
|
-
|
1625
|
-
|
1626
|
-
|
1838
|
+
store_elems = self.get_store_elements(it.element_ID for it in store_iters)
|
1839
|
+
store_tasks = self.get_store_tasks(el.task_ID for el in store_elems)
|
1840
|
+
|
1841
|
+
# to allow for bulk retrieval of elements/iterations
|
1842
|
+
element_idx_by_task: dict[int, set[int]] = defaultdict(set)
|
1843
|
+
iter_idx_by_task_elem: dict[int, dict[int, set[int]]] = defaultdict(
|
1844
|
+
lambda: defaultdict(set)
|
1845
|
+
)
|
1846
|
+
|
1847
|
+
index_paths: list[Workflow._IndexPath3] = []
|
1848
|
+
for rn, it, el, tk in zip(store_EARs, store_iters, store_elems, store_tasks):
|
1849
|
+
act_idx = rn.action_idx
|
1850
|
+
run_idx = (
|
1851
|
+
it.EAR_IDs[act_idx].index(rn.id_) if it.EAR_IDs is not None else -1
|
1852
|
+
)
|
1853
|
+
iter_idx = el.iteration_IDs.index(it.id_)
|
1854
|
+
elem_idx = tk.element_IDs.index(el.id_)
|
1855
|
+
index_paths.append(
|
1856
|
+
Workflow._IndexPath3(run_idx, act_idx, iter_idx, elem_idx, tk.index)
|
1857
|
+
)
|
1858
|
+
element_idx_by_task[tk.index].add(elem_idx)
|
1859
|
+
iter_idx_by_task_elem[tk.index][elem_idx].add(iter_idx)
|
1860
|
+
|
1861
|
+
# retrieve elements/iterations:
|
1862
|
+
iters = {
|
1863
|
+
task_idx: {
|
1864
|
+
elem_i.index: {
|
1865
|
+
iter_idx: elem_i.iterations[iter_idx]
|
1866
|
+
for iter_idx in iter_idx_by_task_elem[task_idx][elem_i.index]
|
1867
|
+
}
|
1868
|
+
for elem_i in self.tasks[task_idx].elements[list(elem_idxes)]
|
1627
1869
|
}
|
1628
|
-
for
|
1870
|
+
for task_idx, elem_idxes in element_idx_by_task.items()
|
1629
1871
|
}
|
1630
|
-
for task_idx, elem_idxes in element_idx_by_task.items()
|
1631
|
-
}
|
1632
1872
|
|
1633
|
-
|
1634
|
-
|
1635
|
-
|
1636
|
-
|
1637
|
-
|
1638
|
-
|
1639
|
-
|
1873
|
+
result = {}
|
1874
|
+
for path in index_paths:
|
1875
|
+
run = (
|
1876
|
+
iters[path.task][path.elem][path.iter]
|
1877
|
+
.actions[path.act]
|
1878
|
+
.runs[path.run]
|
1879
|
+
)
|
1880
|
+
result[run.id_] = run
|
1881
|
+
|
1882
|
+
if not as_dict:
|
1883
|
+
res_lst = list(result.values())
|
1884
|
+
return res_lst[0] if isinstance(ids, int) else res_lst
|
1885
|
+
|
1886
|
+
return result
|
1640
1887
|
|
1641
1888
|
@TimeIt.decorator
|
1642
1889
|
def get_all_elements(self) -> list[Element]:
|
@@ -1722,6 +1969,20 @@ class Workflow(AppAware):
|
|
1722
1969
|
self._app.persistence_logger.info("exiting batch update")
|
1723
1970
|
self._in_batch_mode = False
|
1724
1971
|
|
1972
|
+
@contextmanager
|
1973
|
+
def cached_merged_parameters(self):
|
1974
|
+
if self._use_merged_parameters_cache:
|
1975
|
+
yield
|
1976
|
+
else:
|
1977
|
+
try:
|
1978
|
+
self._app.logger.debug("entering merged-parameters cache.")
|
1979
|
+
self._use_merged_parameters_cache = True
|
1980
|
+
yield
|
1981
|
+
finally:
|
1982
|
+
self._app.logger.debug("exiting merged-parameters cache.")
|
1983
|
+
self._use_merged_parameters_cache = False
|
1984
|
+
self._merged_parameters_cache = {} # reset the cache
|
1985
|
+
|
1725
1986
|
@classmethod
|
1726
1987
|
def temporary_rename(cls, path: str, fs: AbstractFileSystem) -> str:
|
1727
1988
|
"""Rename an existing same-path workflow (directory) so we can restore it if
|
@@ -1883,7 +2144,7 @@ class Workflow(AppAware):
|
|
1883
2144
|
if template.source_file:
|
1884
2145
|
wk.artifacts_path.mkdir(exist_ok=False)
|
1885
2146
|
src = Path(template.source_file)
|
1886
|
-
wk.artifacts_path.joinpath(src.name)
|
2147
|
+
shutil.copy(src, wk.artifacts_path.joinpath(src.name))
|
1887
2148
|
|
1888
2149
|
return wk
|
1889
2150
|
|
@@ -2193,7 +2454,11 @@ class Workflow(AppAware):
|
|
2193
2454
|
"""
|
2194
2455
|
The total number of job submissions.
|
2195
2456
|
"""
|
2196
|
-
return
|
2457
|
+
return (
|
2458
|
+
len(self._submissions)
|
2459
|
+
if self._submissions is not None
|
2460
|
+
else self._store._get_num_total_submissions()
|
2461
|
+
)
|
2197
2462
|
|
2198
2463
|
@property
|
2199
2464
|
def num_elements(self) -> int:
|
@@ -2276,22 +2541,26 @@ class Workflow(AppAware):
|
|
2276
2541
|
for te in self._store.get_task_elements(task.insert_ID, idx_lst)
|
2277
2542
|
]
|
2278
2543
|
|
2279
|
-
def
|
2280
|
-
|
2544
|
+
def set_EAR_start(
|
2545
|
+
self, run_id: int, run_dir: Path | None, port_number: int | None
|
2546
|
+
) -> None:
|
2547
|
+
"""Set the start time on an EAR."""
|
2548
|
+
self._app.logger.debug(f"Setting start for EAR ID {run_id!r}")
|
2281
2549
|
with self._store.cached_load(), self.batch_update():
|
2282
|
-
self._store.
|
2550
|
+
self._store.set_EAR_start(run_id, run_dir, port_number)
|
2283
2551
|
|
2284
|
-
def
|
2285
|
-
|
2286
|
-
|
2552
|
+
def set_multi_run_starts(
|
2553
|
+
self, run_ids: list[int], run_dirs: list[Path | None], port_number: int
|
2554
|
+
) -> None:
|
2555
|
+
"""Set the start time on multiple runs."""
|
2556
|
+
self._app.logger.debug(f"Setting start for multiple run IDs {run_ids!r}")
|
2287
2557
|
with self._store.cached_load(), self.batch_update():
|
2288
|
-
self._store.
|
2558
|
+
self._store.set_multi_run_starts(run_ids, run_dirs, port_number)
|
2289
2559
|
|
2290
2560
|
def set_EAR_end(
|
2291
2561
|
self,
|
2292
|
-
|
2293
|
-
|
2294
|
-
EAR_ID: int,
|
2562
|
+
block_act_key: BlockActionKey,
|
2563
|
+
run: ElementActionRun,
|
2295
2564
|
exit_code: int,
|
2296
2565
|
) -> None:
|
2297
2566
|
"""Set the end time and exit code on an EAR.
|
@@ -2301,108 +2570,430 @@ class Workflow(AppAware):
|
|
2301
2570
|
|
2302
2571
|
"""
|
2303
2572
|
self._app.logger.debug(
|
2304
|
-
f"Setting end for
|
2573
|
+
f"Setting end for run ID {run.id_!r} with exit code {exit_code!r}."
|
2305
2574
|
)
|
2306
|
-
|
2307
|
-
|
2308
|
-
|
2309
|
-
|
2310
|
-
|
2575
|
+
param_id: int | list[int] | None
|
2576
|
+
with self._store.cached_load(), self.batch_update():
|
2577
|
+
success = exit_code == 0 # TODO more sophisticated success heuristics
|
2578
|
+
if not run.skip:
|
2579
|
+
|
2580
|
+
is_aborted = False
|
2581
|
+
if run.action.abortable and exit_code == ABORT_EXIT_CODE:
|
2311
2582
|
# the point of aborting an EAR is to continue with the workflow:
|
2583
|
+
is_aborted = True
|
2312
2584
|
success = True
|
2313
2585
|
|
2314
|
-
|
2315
|
-
|
2316
|
-
|
2317
|
-
|
2318
|
-
|
2319
|
-
|
2320
|
-
|
2321
|
-
|
2322
|
-
file_paths = inp_file.value()
|
2323
|
-
for path_i in (
|
2324
|
-
file_paths if isinstance(file_paths, list) else [file_paths]
|
2325
|
-
):
|
2326
|
-
self._set_file(
|
2327
|
-
param_id=param_id,
|
2328
|
-
store_contents=True, # TODO: make optional according to IFG
|
2329
|
-
is_input=False,
|
2330
|
-
path=Path(path_i).resolve(),
|
2586
|
+
run_dir = run.get_directory()
|
2587
|
+
if run_dir:
|
2588
|
+
assert isinstance(run_dir, Path)
|
2589
|
+
for IFG_i in run.action.input_file_generators:
|
2590
|
+
inp_file = IFG_i.input_file
|
2591
|
+
self._app.logger.debug(
|
2592
|
+
f"Saving EAR input file: {inp_file.label!r} for EAR ID "
|
2593
|
+
f"{run.id_!r}."
|
2331
2594
|
)
|
2595
|
+
param_id = run.data_idx[f"input_files.{inp_file.label}"]
|
2332
2596
|
|
2333
|
-
|
2334
|
-
|
2597
|
+
file_paths = inp_file.value(directory=run_dir)
|
2598
|
+
for path_i in (
|
2599
|
+
file_paths if isinstance(file_paths, list) else [file_paths]
|
2600
|
+
):
|
2601
|
+
full_path = run_dir.joinpath(path_i)
|
2602
|
+
if not full_path.exists():
|
2603
|
+
self._app.logger.debug(
|
2604
|
+
f"expected input file {path_i!r} does not "
|
2605
|
+
f"exist, so setting run to an error state "
|
2606
|
+
f"(if not aborted)."
|
2607
|
+
)
|
2608
|
+
if not is_aborted and success is True:
|
2609
|
+
# this is unlikely to happen, but could happen
|
2610
|
+
# if the input file is deleted in between
|
2611
|
+
# the input file generator completing and this
|
2612
|
+
# code being run
|
2613
|
+
success = False
|
2614
|
+
exit_code = 1 # TODO more custom exit codes?
|
2615
|
+
else:
|
2616
|
+
self._set_file(
|
2617
|
+
param_id=param_id,
|
2618
|
+
store_contents=True, # TODO: make optional according to IFG
|
2619
|
+
is_input=False,
|
2620
|
+
path=full_path,
|
2621
|
+
)
|
2335
2622
|
|
2336
|
-
|
2337
|
-
|
2338
|
-
|
2339
|
-
|
2340
|
-
|
2341
|
-
|
2342
|
-
|
2343
|
-
|
2344
|
-
|
2345
|
-
|
2346
|
-
|
2347
|
-
|
2348
|
-
for path_i in (
|
2349
|
-
file_paths if isinstance(file_paths, list) else [file_paths]
|
2350
|
-
):
|
2351
|
-
self._set_file(
|
2352
|
-
param_id=param_id_j,
|
2353
|
-
store_contents=True,
|
2354
|
-
is_input=False,
|
2355
|
-
path=Path(path_i).resolve(),
|
2356
|
-
clean_up=(save_file_j in EAR.action.clean_up),
|
2357
|
-
)
|
2623
|
+
if run.action.script_data_out_has_files:
|
2624
|
+
try:
|
2625
|
+
run._param_save(block_act_key, run_dir)
|
2626
|
+
except FileNotFoundError:
|
2627
|
+
self._app.logger.debug(
|
2628
|
+
f"script did not generate an expected output parameter "
|
2629
|
+
f"file (block_act_key={block_act_key!r}), so setting run "
|
2630
|
+
f"to an error state (if not aborted)."
|
2631
|
+
)
|
2632
|
+
if not is_aborted and success is True:
|
2633
|
+
success = False
|
2634
|
+
exit_code = 1 # TODO more custom exit codes?
|
2358
2635
|
|
2359
|
-
|
2360
|
-
for save_file_j in
|
2636
|
+
# Save action-level files: (TODO: refactor with below for OFPs)
|
2637
|
+
for save_file_j in run.action.save_files:
|
2361
2638
|
self._app.logger.debug(
|
2362
|
-
f"Saving
|
2363
|
-
f"{
|
2639
|
+
f"Saving file: {save_file_j.label!r} for EAR ID "
|
2640
|
+
f"{run.id_!r}."
|
2364
2641
|
)
|
2365
|
-
|
2366
|
-
|
2367
|
-
|
2368
|
-
|
2369
|
-
|
2370
|
-
|
2642
|
+
try:
|
2643
|
+
param_id = run.data_idx[f"output_files.{save_file_j.label}"]
|
2644
|
+
except KeyError:
|
2645
|
+
# We might be saving a file that is not a defined
|
2646
|
+
# "output file"; this will avoid saving a reference in the
|
2647
|
+
# parameter data:
|
2648
|
+
param_id = None
|
2649
|
+
|
2650
|
+
file_paths = save_file_j.value(directory=run_dir)
|
2371
2651
|
self._app.logger.debug(
|
2372
|
-
f"Saving
|
2652
|
+
f"Saving output file paths: {file_paths!r}"
|
2373
2653
|
)
|
2654
|
+
|
2374
2655
|
for path_i in (
|
2375
2656
|
file_paths if isinstance(file_paths, list) else [file_paths]
|
2376
2657
|
):
|
2377
|
-
|
2378
|
-
|
2379
|
-
|
2380
|
-
|
2381
|
-
|
2382
|
-
|
2658
|
+
full_path = run_dir.joinpath(path_i)
|
2659
|
+
if not full_path.exists():
|
2660
|
+
self._app.logger.debug(
|
2661
|
+
f"expected file to save {path_i!r} does not "
|
2662
|
+
f"exist, so setting run to an error state "
|
2663
|
+
f"(if not aborted)."
|
2664
|
+
)
|
2665
|
+
if not is_aborted and success is True:
|
2666
|
+
# this is unlikely to happen, but could happen
|
2667
|
+
# if the input file is deleted in between
|
2668
|
+
# the input file generator completing and this
|
2669
|
+
# code being run
|
2670
|
+
success = False
|
2671
|
+
exit_code = 1 # TODO more custom exit codes?
|
2672
|
+
else:
|
2673
|
+
self._set_file(
|
2674
|
+
param_id=param_id,
|
2675
|
+
store_contents=True,
|
2676
|
+
is_input=False,
|
2677
|
+
path=full_path,
|
2678
|
+
clean_up=(save_file_j in run.action.clean_up),
|
2679
|
+
)
|
2680
|
+
|
2681
|
+
for OFP_i in run.action.output_file_parsers:
|
2682
|
+
for save_file_j in OFP_i._save_files:
|
2683
|
+
self._app.logger.debug(
|
2684
|
+
f"Saving EAR output file: {save_file_j.label!r} for EAR ID "
|
2685
|
+
f"{run.id_!r}."
|
2686
|
+
)
|
2687
|
+
try:
|
2688
|
+
param_id = run.data_idx[
|
2689
|
+
f"output_files.{save_file_j.label}"
|
2690
|
+
]
|
2691
|
+
except KeyError:
|
2692
|
+
# We might be saving a file that is not a defined
|
2693
|
+
# "output file"; this will avoid saving a reference in the
|
2694
|
+
# parameter data:
|
2695
|
+
param_id = None
|
2696
|
+
|
2697
|
+
file_paths = save_file_j.value(directory=run_dir)
|
2698
|
+
self._app.logger.debug(
|
2699
|
+
f"Saving EAR output file paths: {file_paths!r}"
|
2383
2700
|
)
|
2384
2701
|
|
2385
|
-
|
2386
|
-
|
2387
|
-
|
2388
|
-
|
2389
|
-
|
2390
|
-
|
2391
|
-
|
2702
|
+
for path_i in (
|
2703
|
+
file_paths
|
2704
|
+
if isinstance(file_paths, list)
|
2705
|
+
else [file_paths]
|
2706
|
+
):
|
2707
|
+
full_path = run_dir.joinpath(path_i)
|
2708
|
+
if not full_path.exists():
|
2709
|
+
self._app.logger.debug(
|
2710
|
+
f"expected output file parser `save_files` file "
|
2711
|
+
f"{path_i!r} does not exist, so setting run "
|
2712
|
+
f"to an error state (if not aborted)."
|
2713
|
+
)
|
2714
|
+
if not is_aborted and success is True:
|
2715
|
+
success = False
|
2716
|
+
exit_code = 1 # TODO more custom exit codes?
|
2717
|
+
else:
|
2718
|
+
self._set_file(
|
2719
|
+
param_id=param_id,
|
2720
|
+
store_contents=True, # TODO: make optional according to OFP
|
2721
|
+
is_input=False,
|
2722
|
+
path=full_path,
|
2723
|
+
clean_up=(save_file_j in OFP_i.clean_up),
|
2724
|
+
)
|
2725
|
+
|
2726
|
+
if (
|
2727
|
+
run.resources.skip_downstream_on_failure
|
2728
|
+
and not success
|
2729
|
+
and run.skip_reason is not SkipReason.LOOP_TERMINATION
|
2730
|
+
):
|
2731
|
+
# loop termination skips are already propagated
|
2732
|
+
for EAR_dep_ID in run.get_dependent_EARs(as_objects=False):
|
2733
|
+
self._app.logger.debug(
|
2734
|
+
f"Setting EAR ID {EAR_dep_ID!r} to skip because it depends on"
|
2735
|
+
f" EAR ID {run.id_!r}, which exited with a non-zero exit code:"
|
2736
|
+
f" {exit_code!r}."
|
2737
|
+
)
|
2738
|
+
self._store.set_EAR_skip(
|
2739
|
+
{EAR_dep_ID: SkipReason.UPSTREAM_FAILURE.value}
|
2740
|
+
)
|
2741
|
+
|
2742
|
+
self._store.set_EAR_end(run.id_, exit_code, success, run.action.requires_dir)
|
2743
|
+
|
2744
|
+
def set_multi_run_ends(
|
2745
|
+
self,
|
2746
|
+
runs: dict[
|
2747
|
+
BlockActionKey,
|
2748
|
+
list[tuple[ElementActionRun, int, Path | None]],
|
2749
|
+
],
|
2750
|
+
) -> None:
|
2751
|
+
"""Set end times and exit codes on multiple runs.
|
2752
|
+
|
2753
|
+
If the exit code is non-zero, also set all downstream dependent runs to be
|
2754
|
+
skipped. Also save any generated input/output files."""
|
2755
|
+
|
2756
|
+
self._app.logger.debug(f"Setting end for multiple run IDs.")
|
2757
|
+
param_id: int | list[int] | None
|
2758
|
+
with self._store.cached_load(), self.batch_update():
|
2759
|
+
run_ids = []
|
2760
|
+
run_dirs = []
|
2761
|
+
exit_codes = []
|
2762
|
+
successes = []
|
2763
|
+
for block_act_key, run_dat in runs.items():
|
2764
|
+
for run, exit_code, run_dir in run_dat:
|
2765
|
+
|
2766
|
+
success = (
|
2767
|
+
exit_code == 0
|
2768
|
+
) # TODO more sophisticated success heuristics
|
2769
|
+
self._app.logger.info(
|
2770
|
+
f"setting end for run {run.id_} with exit_code={exit_code}, "
|
2771
|
+
f"success={success}, skip={run.skip!r}, and skip_reason="
|
2772
|
+
f"{run.skip_reason!r}."
|
2773
|
+
)
|
2774
|
+
if not run.skip:
|
2775
|
+
self._app.logger.info(f"run was not skipped.")
|
2776
|
+
is_aborted = False
|
2777
|
+
if run.action.abortable and exit_code == ABORT_EXIT_CODE:
|
2778
|
+
# the point of aborting an EAR is to continue with the
|
2779
|
+
# workflow:
|
2780
|
+
self._app.logger.info(
|
2781
|
+
"run was abortable and exit code was ABORT_EXIT_CODE,"
|
2782
|
+
" so setting success to True."
|
2783
|
+
)
|
2784
|
+
is_aborted = True
|
2785
|
+
success = True
|
2786
|
+
|
2787
|
+
run_dir = run.get_directory()
|
2788
|
+
if run_dir:
|
2789
|
+
assert isinstance(run_dir, Path)
|
2790
|
+
for IFG_i in run.action.input_file_generators:
|
2791
|
+
self._app.logger.info(f"setting IFG file {IFG_i!r}")
|
2792
|
+
inp_file = IFG_i.input_file
|
2793
|
+
self._app.logger.debug(
|
2794
|
+
f"Saving EAR input file: {inp_file.label!r} for EAR "
|
2795
|
+
f"ID {run.id_!r}."
|
2796
|
+
)
|
2797
|
+
param_id = run.data_idx[f"input_files.{inp_file.label}"]
|
2798
|
+
|
2799
|
+
file_paths = inp_file.value(directory=run_dir)
|
2800
|
+
for path_i in (
|
2801
|
+
file_paths
|
2802
|
+
if isinstance(file_paths, list)
|
2803
|
+
else [file_paths]
|
2804
|
+
):
|
2805
|
+
full_path = run_dir.joinpath(path_i)
|
2806
|
+
if not full_path.exists():
|
2807
|
+
self._app.logger.debug(
|
2808
|
+
f"expected input file {path_i!r} does not "
|
2809
|
+
f"exist, so setting run to an error state "
|
2810
|
+
f"(if not aborted)."
|
2811
|
+
)
|
2812
|
+
if not is_aborted and success is True:
|
2813
|
+
# this is unlikely to happen, but could happen
|
2814
|
+
# if the input file is deleted in between
|
2815
|
+
# the input file generator completing and this
|
2816
|
+
# code being run
|
2817
|
+
success = False
|
2818
|
+
exit_code = 1 # TODO more custom exit codes?
|
2819
|
+
else:
|
2820
|
+
self._set_file(
|
2821
|
+
param_id=param_id,
|
2822
|
+
store_contents=True, # TODO: make optional according to IFG
|
2823
|
+
is_input=False,
|
2824
|
+
path=full_path,
|
2825
|
+
)
|
2826
|
+
|
2827
|
+
if run.action.script_data_out_has_files:
|
2828
|
+
self._app.logger.info(
|
2829
|
+
f"saving script-generated parameters."
|
2830
|
+
)
|
2831
|
+
try:
|
2832
|
+
run._param_save(block_act_key, run_dir)
|
2833
|
+
except FileNotFoundError:
|
2834
|
+
# script did not generate the output parameter file, so
|
2835
|
+
# set a failed exit code (if we did not abort the run):
|
2836
|
+
self._app.logger.debug(
|
2837
|
+
f"script did not generate an expected output "
|
2838
|
+
f"parameter file (block_act_key="
|
2839
|
+
f"{block_act_key!r}), so setting run to an error "
|
2840
|
+
f"state (if not aborted)."
|
2841
|
+
)
|
2842
|
+
if not is_aborted and success is True:
|
2843
|
+
success = False
|
2844
|
+
exit_code = 1 # TODO more custom exit codes?
|
2845
|
+
|
2846
|
+
# Save action-level files: (TODO: refactor with below for OFPs)
|
2847
|
+
for save_file_j in run.action.save_files:
|
2848
|
+
self._app.logger.info(
|
2849
|
+
f"saving action-level file {save_file_j!r}."
|
2850
|
+
)
|
2851
|
+
self._app.logger.debug(
|
2852
|
+
f"Saving file: {save_file_j.label!r} for EAR ID "
|
2853
|
+
f"{run.id_!r}."
|
2854
|
+
)
|
2855
|
+
try:
|
2856
|
+
param_id = run.data_idx[
|
2857
|
+
f"output_files.{save_file_j.label}"
|
2858
|
+
]
|
2859
|
+
except KeyError:
|
2860
|
+
# We might be saving a file that is not a defined
|
2861
|
+
# "output file"; this will avoid saving a reference in
|
2862
|
+
# the parameter data:
|
2863
|
+
param_id = None
|
2864
|
+
|
2865
|
+
file_paths = save_file_j.value(directory=run_dir)
|
2866
|
+
self._app.logger.debug(
|
2867
|
+
f"Saving output file paths: {file_paths!r}"
|
2868
|
+
)
|
2869
|
+
for path_i in (
|
2870
|
+
file_paths
|
2871
|
+
if isinstance(file_paths, list)
|
2872
|
+
else [file_paths]
|
2873
|
+
):
|
2874
|
+
full_path = run_dir.joinpath(path_i)
|
2875
|
+
if not full_path.exists():
|
2876
|
+
self._app.logger.debug(
|
2877
|
+
f"expected file to save {path_i!r} does not "
|
2878
|
+
f"exist, so setting run to an error state "
|
2879
|
+
f"(if not aborted)."
|
2880
|
+
)
|
2881
|
+
if not is_aborted and success is True:
|
2882
|
+
# this is unlikely to happen, but could happen
|
2883
|
+
# if the input file is deleted in between
|
2884
|
+
# the input file generator completing and this
|
2885
|
+
# code being run
|
2886
|
+
success = False
|
2887
|
+
exit_code = 1 # TODO more custom exit codes?
|
2888
|
+
else:
|
2889
|
+
self._set_file(
|
2890
|
+
param_id=param_id,
|
2891
|
+
store_contents=True,
|
2892
|
+
is_input=False,
|
2893
|
+
path=full_path,
|
2894
|
+
clean_up=(save_file_j in run.action.clean_up),
|
2895
|
+
)
|
2896
|
+
|
2897
|
+
for OFP_i in run.action.output_file_parsers:
|
2898
|
+
self._app.logger.info(
|
2899
|
+
f"saving files from OFP: {OFP_i!r}."
|
2900
|
+
)
|
2901
|
+
for save_file_j in OFP_i._save_files:
|
2902
|
+
self._app.logger.debug(
|
2903
|
+
f"Saving EAR output file: {save_file_j.label!r} "
|
2904
|
+
f"for EAR ID {run.id_!r}."
|
2905
|
+
)
|
2906
|
+
try:
|
2907
|
+
param_id = run.data_idx[
|
2908
|
+
f"output_files.{save_file_j.label}"
|
2909
|
+
]
|
2910
|
+
except KeyError:
|
2911
|
+
# We might be saving a file that is not a defined
|
2912
|
+
# "output file"; this will avoid saving a
|
2913
|
+
# reference in the parameter data:
|
2914
|
+
param_id = None
|
2915
|
+
|
2916
|
+
file_paths = save_file_j.value(directory=run_dir)
|
2917
|
+
self._app.logger.debug(
|
2918
|
+
f"Saving EAR output file paths: {file_paths!r}"
|
2919
|
+
)
|
2920
|
+
|
2921
|
+
for path_i in (
|
2922
|
+
file_paths
|
2923
|
+
if isinstance(file_paths, list)
|
2924
|
+
else [file_paths]
|
2925
|
+
):
|
2926
|
+
full_path = run_dir.joinpath(path_i)
|
2927
|
+
if not full_path.exists():
|
2928
|
+
self._app.logger.debug(
|
2929
|
+
f"expected output file parser `save_files` file "
|
2930
|
+
f"{path_i!r} does not exist, so setting run "
|
2931
|
+
f"to an error state (if not aborted)."
|
2932
|
+
)
|
2933
|
+
if not is_aborted and success is True:
|
2934
|
+
success = False
|
2935
|
+
exit_code = (
|
2936
|
+
1 # TODO more custom exit codes?
|
2937
|
+
)
|
2938
|
+
else:
|
2939
|
+
self._set_file(
|
2940
|
+
param_id=param_id,
|
2941
|
+
store_contents=True, # TODO: make optional according to OFP
|
2942
|
+
is_input=False,
|
2943
|
+
path=full_path,
|
2944
|
+
clean_up=(save_file_j in OFP_i.clean_up),
|
2945
|
+
)
|
2946
|
+
|
2947
|
+
else:
|
2948
|
+
self._app.logger.info(
|
2949
|
+
f"run was skipped: reason: {run.skip_reason!r}."
|
2950
|
+
)
|
2951
|
+
|
2952
|
+
if (
|
2953
|
+
run.resources.skip_downstream_on_failure
|
2954
|
+
and not success
|
2955
|
+
and run.skip_reason is not SkipReason.LOOP_TERMINATION
|
2956
|
+
):
|
2957
|
+
# run failed
|
2958
|
+
self._app.logger.info(
|
2959
|
+
"run was not succcess and skip reason was not "
|
2960
|
+
"LOOP_TERMINATION."
|
2961
|
+
)
|
2962
|
+
# loop termination skips are already propagated
|
2963
|
+
for EAR_dep_ID in run.get_dependent_EARs(as_objects=False):
|
2964
|
+
# TODO: `get_dependent_EARs` seems to be stuck in a
|
2965
|
+
# recursion for some workflows
|
2966
|
+
# TODO: this needs to be recursive?
|
2967
|
+
self._app.logger.info(
|
2968
|
+
f"Setting EAR ID {EAR_dep_ID!r} to skip because it "
|
2969
|
+
f"depends on EAR ID {run.id_!r}, which exited with a "
|
2970
|
+
f"non-zero exit code: {exit_code!r}."
|
2971
|
+
)
|
2972
|
+
self._store.set_EAR_skip(
|
2973
|
+
{EAR_dep_ID: SkipReason.UPSTREAM_FAILURE.value}
|
2974
|
+
)
|
2975
|
+
else:
|
2976
|
+
self._app.logger.info(
|
2977
|
+
"`skip_downstream_on_failure` is False, run was "
|
2978
|
+
"succcess, or skip reason was LOOP_TERMINATION."
|
2392
2979
|
)
|
2393
|
-
self._store.set_EAR_skip(EAR_dep_ID)
|
2394
2980
|
|
2395
|
-
|
2981
|
+
run_ids.append(run.id_)
|
2982
|
+
run_dirs.append(run_dir)
|
2983
|
+
exit_codes.append(exit_code)
|
2984
|
+
successes.append(success)
|
2985
|
+
|
2986
|
+
self._store.set_multi_run_ends(run_ids, run_dirs, exit_codes, successes)
|
2396
2987
|
|
2397
|
-
def set_EAR_skip(self,
|
2988
|
+
def set_EAR_skip(self, skip_reasons: dict[int, SkipReason]) -> None:
|
2398
2989
|
"""
|
2399
2990
|
Record that an EAR is to be skipped due to an upstream failure or loop
|
2400
2991
|
termination condition being met.
|
2401
2992
|
"""
|
2402
2993
|
with self._store.cached_load(), self.batch_update():
|
2403
|
-
self._store.set_EAR_skip(
|
2994
|
+
self._store.set_EAR_skip({k: v.value for k, v in skip_reasons.items()})
|
2404
2995
|
|
2405
|
-
def get_EAR_skipped(self, EAR_ID: int) ->
|
2996
|
+
def get_EAR_skipped(self, EAR_ID: int) -> int:
|
2406
2997
|
"""Check if an EAR is to be skipped."""
|
2407
2998
|
with self._store.cached_load():
|
2408
2999
|
return self._store.get_EAR_skipped(EAR_ID)
|
@@ -2421,6 +3012,15 @@ class Workflow(AppAware):
|
|
2421
3012
|
# force commit now:
|
2422
3013
|
self._store._pending.commit_all()
|
2423
3014
|
|
3015
|
+
@TimeIt.decorator
|
3016
|
+
def set_parameter_values(self, values: dict[int, Any], commit: bool = False) -> None:
|
3017
|
+
with self._store.cached_load(), self.batch_update(), self._store.cache_ctx():
|
3018
|
+
self._store.set_parameter_values(values)
|
3019
|
+
|
3020
|
+
if commit:
|
3021
|
+
# force commit now:
|
3022
|
+
self._store._pending.commit_all()
|
3023
|
+
|
2424
3024
|
def set_EARs_initialised(self, iter_ID: int) -> None:
|
2425
3025
|
"""
|
2426
3026
|
Set :py:attr:`~hpcflow.app.ElementIteration.EARs_initialised` to True for the
|
@@ -2549,7 +3149,7 @@ class Workflow(AppAware):
|
|
2549
3149
|
self,
|
2550
3150
|
status: Status | None = None,
|
2551
3151
|
ignore_errors: bool = False,
|
2552
|
-
JS_parallelism: bool | None = None,
|
3152
|
+
JS_parallelism: bool | Literal["direct", "scheduled"] | None = None,
|
2553
3153
|
print_stdout: bool = False,
|
2554
3154
|
add_to_known: bool = True,
|
2555
3155
|
tasks: Sequence[int] | None = None,
|
@@ -2560,16 +3160,23 @@ class Workflow(AppAware):
|
|
2560
3160
|
if not (pending := [sub for sub in self.submissions if sub.needs_submit]):
|
2561
3161
|
if status:
|
2562
3162
|
status.update("Adding new submission...")
|
2563
|
-
if not (
|
3163
|
+
if not (
|
3164
|
+
new_sub := self._add_submission(
|
3165
|
+
tasks=tasks,
|
3166
|
+
JS_parallelism=JS_parallelism,
|
3167
|
+
status=status,
|
3168
|
+
)
|
3169
|
+
):
|
3170
|
+
if status:
|
3171
|
+
status.stop()
|
2564
3172
|
raise ValueError("No pending element action runs to submit!")
|
2565
3173
|
pending = [new_sub]
|
2566
3174
|
|
2567
|
-
self.submissions_path.mkdir(exist_ok=True, parents=True)
|
2568
3175
|
self.execution_path.mkdir(exist_ok=True, parents=True)
|
2569
3176
|
self.task_artifacts_path.mkdir(exist_ok=True, parents=True)
|
2570
3177
|
|
2571
|
-
#
|
2572
|
-
#
|
3178
|
+
# the submission must be persistent at submit-time, because it will be read by a
|
3179
|
+
# new instance of the app:
|
2573
3180
|
if status:
|
2574
3181
|
status.update("Committing to the store...")
|
2575
3182
|
self._store._pending.commit_all()
|
@@ -2598,7 +3205,7 @@ class Workflow(AppAware):
|
|
2598
3205
|
self,
|
2599
3206
|
*,
|
2600
3207
|
ignore_errors: bool = False,
|
2601
|
-
JS_parallelism: bool | None = None,
|
3208
|
+
JS_parallelism: bool | Literal["direct", "scheduled"] | None = None,
|
2602
3209
|
print_stdout: bool = False,
|
2603
3210
|
wait: bool = False,
|
2604
3211
|
add_to_known: bool = True,
|
@@ -2614,7 +3221,7 @@ class Workflow(AppAware):
|
|
2614
3221
|
self,
|
2615
3222
|
*,
|
2616
3223
|
ignore_errors: bool = False,
|
2617
|
-
JS_parallelism: bool | None = None,
|
3224
|
+
JS_parallelism: bool | Literal["direct", "scheduled"] | None = None,
|
2618
3225
|
print_stdout: bool = False,
|
2619
3226
|
wait: bool = False,
|
2620
3227
|
add_to_known: bool = True,
|
@@ -2629,7 +3236,7 @@ class Workflow(AppAware):
|
|
2629
3236
|
self,
|
2630
3237
|
*,
|
2631
3238
|
ignore_errors: bool = False,
|
2632
|
-
JS_parallelism: bool | None = None,
|
3239
|
+
JS_parallelism: bool | Literal["direct", "scheduled"] | None = None,
|
2633
3240
|
print_stdout: bool = False,
|
2634
3241
|
wait: bool = False,
|
2635
3242
|
add_to_known: bool = True,
|
@@ -2646,9 +3253,12 @@ class Workflow(AppAware):
|
|
2646
3253
|
If True, ignore jobscript submission errors. If False (the default) jobscript
|
2647
3254
|
submission will halt when a jobscript fails to submit.
|
2648
3255
|
JS_parallelism
|
2649
|
-
If True, allow multiple jobscripts to execute simultaneously.
|
2650
|
-
|
2651
|
-
|
3256
|
+
If True, allow multiple jobscripts to execute simultaneously. If
|
3257
|
+
'scheduled'/'direct', only allow simultaneous execution of scheduled/direct
|
3258
|
+
jobscripts. Raises if set to True, 'scheduled', or 'direct', but the store
|
3259
|
+
type does not support the `jobscript_parallelism` feature. If not set,
|
3260
|
+
jobscript parallelism will be used if the store type supports it, for
|
3261
|
+
scheduled jobscripts only.
|
2652
3262
|
print_stdout
|
2653
3263
|
If True, print any jobscript submission standard output, otherwise hide it.
|
2654
3264
|
wait
|
@@ -2679,7 +3289,11 @@ class Workflow(AppAware):
|
|
2679
3289
|
if not self._store.is_submittable:
|
2680
3290
|
raise NotImplementedError("The workflow is not submittable.")
|
2681
3291
|
# commit updates before raising exception:
|
2682
|
-
with
|
3292
|
+
with (
|
3293
|
+
self.batch_update(),
|
3294
|
+
self._store.parameters_metadata_cache(),
|
3295
|
+
self._store.cache_ctx(),
|
3296
|
+
):
|
2683
3297
|
exceptions, submitted_js = self._submit(
|
2684
3298
|
ignore_errors=ignore_errors,
|
2685
3299
|
JS_parallelism=JS_parallelism,
|
@@ -2693,7 +3307,7 @@ class Workflow(AppAware):
|
|
2693
3307
|
raise WorkflowSubmissionFailure(exceptions)
|
2694
3308
|
|
2695
3309
|
if cancel:
|
2696
|
-
self.cancel()
|
3310
|
+
self.cancel(status=status)
|
2697
3311
|
|
2698
3312
|
elif wait:
|
2699
3313
|
self.wait(submitted_js)
|
@@ -2822,14 +3436,16 @@ class Workflow(AppAware):
|
|
2822
3436
|
# keys are task_insert_IDs, values are element indices:
|
2823
3437
|
active_elems: dict[int, set[int]] = defaultdict(set)
|
2824
3438
|
sub = self.submissions[submission_idx]
|
2825
|
-
for js_idx,
|
3439
|
+
for js_idx, block_states in sub.get_active_jobscripts().items():
|
2826
3440
|
js = sub.jobscripts[js_idx]
|
2827
|
-
for
|
2828
|
-
|
2829
|
-
|
2830
|
-
|
2831
|
-
|
2832
|
-
|
3441
|
+
for block_idx, block in enumerate(js.blocks):
|
3442
|
+
states = block_states[block_idx]
|
3443
|
+
for js_elem_idx, state in states.items():
|
3444
|
+
if state is JobscriptElementState.running:
|
3445
|
+
for task_iID, elem_idx in zip(
|
3446
|
+
block.task_insert_IDs, block.task_elements[js_elem_idx]
|
3447
|
+
):
|
3448
|
+
active_elems[task_iID].add(elem_idx)
|
2833
3449
|
|
2834
3450
|
# retrieve Element objects:
|
2835
3451
|
out: list[Element] = []
|
@@ -2862,18 +3478,22 @@ class Workflow(AppAware):
|
|
2862
3478
|
for elem in elems:
|
2863
3479
|
if element_idx is not None and elem.index != element_idx:
|
2864
3480
|
continue
|
2865
|
-
|
2866
|
-
|
2867
|
-
|
2868
|
-
|
2869
|
-
|
2870
|
-
|
3481
|
+
for iter_i in elem.iterations:
|
3482
|
+
for elem_acts in iter_i.actions.values():
|
3483
|
+
for run in elem_acts.runs:
|
3484
|
+
if run.status is EARStatus.running:
|
3485
|
+
out.append(run)
|
3486
|
+
# for a given element and submission, only one run
|
3487
|
+
# may be running at a time:
|
3488
|
+
break
|
2871
3489
|
return out
|
2872
3490
|
|
2873
|
-
def
|
2874
|
-
|
2875
|
-
|
2876
|
-
self.
|
3491
|
+
def _abort_run(self, run: ElementActionRun):
|
3492
|
+
# connect to the ZeroMQ server on the worker node:
|
3493
|
+
self._app.logger.info(f"abort run: {run!r}")
|
3494
|
+
self._app.Executor.send_abort(
|
3495
|
+
hostname=run.run_hostname, port_number=run.port_number
|
3496
|
+
)
|
2877
3497
|
|
2878
3498
|
def abort_run(
|
2879
3499
|
self,
|
@@ -2916,38 +3536,77 @@ class Workflow(AppAware):
|
|
2916
3536
|
run = running[0]
|
2917
3537
|
if not run.action.abortable:
|
2918
3538
|
raise RunNotAbortableError()
|
2919
|
-
self.
|
3539
|
+
self._abort_run(run)
|
2920
3540
|
|
2921
3541
|
@TimeIt.decorator
|
2922
|
-
def cancel(self,
|
3542
|
+
def cancel(self, status: bool = True):
|
2923
3543
|
"""Cancel any running jobscripts."""
|
2924
|
-
|
2925
|
-
|
3544
|
+
status_msg = f"Cancelling jobscripts of workflow {self.path!r}"
|
3545
|
+
# Type hint for mypy
|
3546
|
+
status_context: AbstractContextManager[Status] | AbstractContextManager[None] = (
|
3547
|
+
rich.console.Console().status(status_msg) if status else nullcontext()
|
3548
|
+
)
|
3549
|
+
with status_context as status_, self._store.cached_load():
|
3550
|
+
for sub in self.submissions:
|
3551
|
+
sub.cancel()
|
2926
3552
|
|
2927
3553
|
def add_submission(
|
2928
|
-
self,
|
3554
|
+
self,
|
3555
|
+
tasks: list[int] | None = None,
|
3556
|
+
JS_parallelism: bool | Literal["direct", "scheduled"] | None = None,
|
3557
|
+
force_array: bool = False,
|
3558
|
+
status: bool = True,
|
2929
3559
|
) -> Submission | None:
|
2930
|
-
"""
|
2931
|
-
|
3560
|
+
"""Add a new submission.
|
3561
|
+
|
3562
|
+
Parameters
|
3563
|
+
----------
|
3564
|
+
force_array
|
3565
|
+
Used to force the use of job arrays, even if the scheduler does not support
|
3566
|
+
it. This is provided for testing purposes only.
|
2932
3567
|
"""
|
2933
3568
|
# JS_parallelism=None means guess
|
2934
|
-
|
2935
|
-
|
3569
|
+
# Type hint for mypy
|
3570
|
+
status_context: AbstractContextManager[Status] | AbstractContextManager[None] = (
|
3571
|
+
rich.console.Console().status("") if status else nullcontext()
|
3572
|
+
)
|
3573
|
+
with status_context as status_, self._store.cached_load(), self.batch_update():
|
3574
|
+
return self._add_submission(tasks, JS_parallelism, force_array, status_)
|
2936
3575
|
|
2937
3576
|
@TimeIt.decorator
|
3577
|
+
@load_workflow_config
|
2938
3578
|
def _add_submission(
|
2939
|
-
self,
|
3579
|
+
self,
|
3580
|
+
tasks: Sequence[int] | None = None,
|
3581
|
+
JS_parallelism: bool | Literal["direct", "scheduled"] | None = None,
|
3582
|
+
force_array: bool = False,
|
3583
|
+
status: Status | None = None,
|
2940
3584
|
) -> Submission | None:
|
3585
|
+
"""Add a new submission.
|
3586
|
+
|
3587
|
+
Parameters
|
3588
|
+
----------
|
3589
|
+
force_array
|
3590
|
+
Used to force the use of job arrays, even if the scheduler does not support
|
3591
|
+
it. This is provided for testing purposes only.
|
3592
|
+
"""
|
2941
3593
|
new_idx = self.num_submissions
|
2942
3594
|
_ = self.submissions # TODO: just to ensure `submissions` is loaded
|
3595
|
+
if status:
|
3596
|
+
status.update("Adding new submission: resolving jobscripts...")
|
3597
|
+
|
3598
|
+
cache = ObjectCache.build(self, elements=True, iterations=True, runs=True)
|
3599
|
+
|
2943
3600
|
sub_obj: Submission = self._app.Submission(
|
2944
3601
|
index=new_idx,
|
2945
3602
|
workflow=self,
|
2946
|
-
jobscripts=self.resolve_jobscripts(tasks),
|
3603
|
+
jobscripts=self.resolve_jobscripts(cache, tasks, force_array),
|
2947
3604
|
JS_parallelism=JS_parallelism,
|
2948
3605
|
)
|
3606
|
+
if status:
|
3607
|
+
status.update("Adding new submission: setting environments...")
|
2949
3608
|
sub_obj._set_environments()
|
2950
|
-
all_EAR_ID =
|
3609
|
+
all_EAR_ID = sub_obj.all_EAR_IDs
|
2951
3610
|
if not all_EAR_ID:
|
2952
3611
|
print(
|
2953
3612
|
"There are no pending element action runs, so a new submission was not "
|
@@ -2955,33 +3614,97 @@ class Workflow(AppAware):
|
|
2955
3614
|
)
|
2956
3615
|
return None
|
2957
3616
|
|
3617
|
+
if status:
|
3618
|
+
status.update("Adding new submission: making artifact directories...")
|
3619
|
+
|
3620
|
+
# TODO: a submission should only be "submitted" once shouldn't it?
|
3621
|
+
# no; there could be an IO error (e.g. internet connectivity), so might
|
3622
|
+
# need to be able to reattempt submission of outstanding jobscripts.
|
3623
|
+
self.submissions_path.mkdir(exist_ok=True, parents=True)
|
3624
|
+
sub_obj.path.mkdir(exist_ok=True)
|
3625
|
+
sub_obj.tmp_path.mkdir(exist_ok=True)
|
3626
|
+
sub_obj.app_std_path.mkdir(exist_ok=True)
|
3627
|
+
sub_obj.js_path.mkdir(exist_ok=True) # for jobscripts
|
3628
|
+
sub_obj.js_std_path.mkdir(exist_ok=True) # for stdout/err stream files
|
3629
|
+
sub_obj.js_funcs_path.mkdir(exist_ok=True)
|
3630
|
+
sub_obj.js_run_ids_path.mkdir(exist_ok=True)
|
3631
|
+
sub_obj.scripts_path.mkdir(exist_ok=True)
|
3632
|
+
sub_obj.commands_path.mkdir(exist_ok=True)
|
3633
|
+
|
3634
|
+
if sub_obj.needs_app_log_dir:
|
3635
|
+
sub_obj.app_log_path.mkdir(exist_ok=True)
|
3636
|
+
|
3637
|
+
if sub_obj.needs_win_pids_dir:
|
3638
|
+
sub_obj.js_win_pids_path.mkdir(exist_ok=True)
|
3639
|
+
|
3640
|
+
if sub_obj.needs_script_indices_dir:
|
3641
|
+
sub_obj.js_script_indices_path.mkdir(exist_ok=True)
|
3642
|
+
|
3643
|
+
if status:
|
3644
|
+
status.update("Adding new submission: writing scripts and command files...")
|
3645
|
+
|
3646
|
+
# write scripts and command files where possible to the submission directory:
|
3647
|
+
cmd_file_IDs, run_indices, run_inp_files = sub_obj._write_scripts(cache, status)
|
3648
|
+
|
3649
|
+
sub_obj._write_execute_dirs(run_indices, run_inp_files, cache, status)
|
3650
|
+
|
3651
|
+
if status:
|
3652
|
+
status.update("Adding new submission: updating the store...")
|
3653
|
+
|
2958
3654
|
with self._store.cached_load(), self.batch_update():
|
2959
3655
|
for id_ in all_EAR_ID:
|
2960
|
-
self._store.
|
3656
|
+
self._store.set_run_submission_data(
|
3657
|
+
EAR_ID=id_,
|
3658
|
+
cmds_ID=cmd_file_IDs[id_],
|
3659
|
+
sub_idx=new_idx,
|
3660
|
+
)
|
2961
3661
|
|
3662
|
+
sub_obj._ensure_JS_parallelism_set()
|
2962
3663
|
sub_obj_js, _ = sub_obj.to_json_like()
|
2963
3664
|
assert self._submissions is not None
|
2964
3665
|
self._submissions.append(sub_obj)
|
2965
3666
|
self._pending["submissions"].append(new_idx)
|
2966
3667
|
with self._store.cached_load(), self.batch_update():
|
2967
|
-
self._store.add_submission(new_idx, sub_obj_js)
|
3668
|
+
self._store.add_submission(new_idx, cast("Mapping[str, JSONed]", sub_obj_js))
|
2968
3669
|
|
2969
3670
|
return self.submissions[new_idx]
|
2970
3671
|
|
2971
3672
|
@TimeIt.decorator
|
2972
|
-
def resolve_jobscripts(
|
3673
|
+
def resolve_jobscripts(
|
3674
|
+
self,
|
3675
|
+
cache: ObjectCache,
|
3676
|
+
tasks: Sequence[int] | None = None,
|
3677
|
+
force_array: bool = False,
|
3678
|
+
) -> list[Jobscript]:
|
2973
3679
|
"""
|
2974
|
-
Resolve this workflow to a set of
|
3680
|
+
Resolve this workflow to a set of jobscripts to run for a new submission.
|
3681
|
+
|
3682
|
+
Parameters
|
3683
|
+
----------
|
3684
|
+
force_array
|
3685
|
+
Used to force the use of job arrays, even if the scheduler does not support
|
3686
|
+
it. This is provided for testing purposes only.
|
3687
|
+
|
2975
3688
|
"""
|
2976
|
-
|
2977
|
-
|
3689
|
+
with self._app.config.cached_config():
|
3690
|
+
with self.cached_merged_parameters():
|
3691
|
+
js, element_deps = self._resolve_singular_jobscripts(
|
3692
|
+
cache, tasks, force_array
|
3693
|
+
)
|
3694
|
+
|
3695
|
+
js_deps = resolve_jobscript_dependencies(js, element_deps)
|
2978
3696
|
|
2979
|
-
|
2980
|
-
|
2981
|
-
|
3697
|
+
for js_idx, jsca in js.items():
|
3698
|
+
if js_idx in js_deps:
|
3699
|
+
jsca["dependencies"] = js_deps[js_idx] # type: ignore
|
2982
3700
|
|
2983
|
-
|
2984
|
-
|
3701
|
+
js = merge_jobscripts_across_tasks(js)
|
3702
|
+
|
3703
|
+
# for direct or (non-array scheduled), combine into jobscripts of multiple
|
3704
|
+
# blocks for dependent jobscripts that have the same resource hashes
|
3705
|
+
js_ = resolve_jobscript_blocks(js)
|
3706
|
+
|
3707
|
+
return [self._app.Jobscript(**i, index=idx) for idx, i in enumerate(js_)]
|
2985
3708
|
|
2986
3709
|
def __EAR_obj_map(
|
2987
3710
|
self,
|
@@ -2990,7 +3713,9 @@ class Workflow(AppAware):
|
|
2990
3713
|
task: WorkflowTask,
|
2991
3714
|
task_actions: Sequence[tuple[int, int, int]],
|
2992
3715
|
EAR_map: NDArray,
|
3716
|
+
cache: ObjectCache,
|
2993
3717
|
) -> Mapping[int, ElementActionRun]:
|
3718
|
+
assert cache.runs is not None
|
2994
3719
|
all_EAR_IDs: list[int] = []
|
2995
3720
|
for js_elem_idx, (elem_idx, act_indices) in enumerate(
|
2996
3721
|
js_desc["elements"].items()
|
@@ -3000,11 +3725,14 @@ class Workflow(AppAware):
|
|
3000
3725
|
all_EAR_IDs.append(EAR_ID_i)
|
3001
3726
|
js_act_idx = task_actions.index((task.insert_ID, act_idx, 0))
|
3002
3727
|
jsca["EAR_ID"][js_act_idx][js_elem_idx] = EAR_ID_i
|
3003
|
-
return dict(zip(all_EAR_IDs,
|
3728
|
+
return dict(zip(all_EAR_IDs, (cache.runs[i] for i in all_EAR_IDs)))
|
3004
3729
|
|
3005
3730
|
@TimeIt.decorator
|
3006
3731
|
def _resolve_singular_jobscripts(
|
3007
|
-
self,
|
3732
|
+
self,
|
3733
|
+
cache: ObjectCache,
|
3734
|
+
tasks: Sequence[int] | None = None,
|
3735
|
+
force_array: bool = False,
|
3008
3736
|
) -> tuple[
|
3009
3737
|
Mapping[int, JobScriptCreationArguments],
|
3010
3738
|
Mapping[int, Mapping[int, Sequence[int]]],
|
@@ -3013,6 +3741,12 @@ class Workflow(AppAware):
|
|
3013
3741
|
We arrange EARs into `EARs` and `elements` so we can quickly look up membership
|
3014
3742
|
by EAR idx in the `EARs` dict.
|
3015
3743
|
|
3744
|
+
Parameters
|
3745
|
+
----------
|
3746
|
+
force_array
|
3747
|
+
Used to force the use of job arrays, even if the scheduler does not support
|
3748
|
+
it. This is provided for testing purposes only.
|
3749
|
+
|
3016
3750
|
Returns
|
3017
3751
|
-------
|
3018
3752
|
submission_jobscripts
|
@@ -3025,6 +3759,7 @@ class Workflow(AppAware):
|
|
3025
3759
|
|
3026
3760
|
if self._store.use_cache:
|
3027
3761
|
# pre-cache parameter sources (used in `EAR.get_EAR_dependencies`):
|
3762
|
+
# note: this cache is unrelated to the `cache` argument
|
3028
3763
|
self.get_all_parameter_sources()
|
3029
3764
|
|
3030
3765
|
submission_jobscripts: dict[int, JobScriptCreationArguments] = {}
|
@@ -3034,7 +3769,9 @@ class Workflow(AppAware):
|
|
3034
3769
|
task = self.tasks.get(insert_ID=task_iID)
|
3035
3770
|
if task.index not in task_set:
|
3036
3771
|
continue
|
3037
|
-
res, res_hash, res_map, EAR_map = generate_EAR_resource_map(
|
3772
|
+
res, res_hash, res_map, EAR_map = generate_EAR_resource_map(
|
3773
|
+
task, loop_idx_i, cache
|
3774
|
+
)
|
3038
3775
|
jobscripts, _ = group_resource_map_into_jobscripts(res_map)
|
3039
3776
|
|
3040
3777
|
for js_dat in jobscripts:
|
@@ -3063,6 +3800,11 @@ class Workflow(AppAware):
|
|
3063
3800
|
|
3064
3801
|
new_js_idx = len(submission_jobscripts)
|
3065
3802
|
|
3803
|
+
is_array = force_array or is_jobscript_array(
|
3804
|
+
res[js_dat["resources"]],
|
3805
|
+
EAR_ID_arr.shape[1],
|
3806
|
+
self._store,
|
3807
|
+
)
|
3066
3808
|
js_i: JobScriptCreationArguments = {
|
3067
3809
|
"task_insert_IDs": [task.insert_ID],
|
3068
3810
|
"task_loop_idx": [loop_idx_i],
|
@@ -3072,10 +3814,11 @@ class Workflow(AppAware):
|
|
3072
3814
|
"resources": res[js_dat["resources"]],
|
3073
3815
|
"resource_hash": res_hash[js_dat["resources"]],
|
3074
3816
|
"dependencies": {},
|
3817
|
+
"is_array": is_array,
|
3075
3818
|
}
|
3076
3819
|
|
3077
3820
|
all_EAR_objs = self.__EAR_obj_map(
|
3078
|
-
js_dat, js_i, task, task_actions, EAR_map
|
3821
|
+
js_dat, js_i, task, task_actions, EAR_map, cache
|
3079
3822
|
)
|
3080
3823
|
|
3081
3824
|
for js_elem_idx, (elem_idx, act_indices) in enumerate(
|
@@ -3104,76 +3847,290 @@ class Workflow(AppAware):
|
|
3104
3847
|
|
3105
3848
|
return submission_jobscripts, all_element_deps
|
3106
3849
|
|
3107
|
-
|
3108
|
-
|
3109
|
-
|
3110
|
-
|
3111
|
-
|
3112
|
-
|
3113
|
-
|
3114
|
-
|
3115
|
-
|
3116
|
-
|
3117
|
-
|
3118
|
-
|
3119
|
-
|
3120
|
-
|
3121
|
-
|
3122
|
-
|
3123
|
-
|
3124
|
-
|
3125
|
-
|
3126
|
-
|
3127
|
-
|
3128
|
-
|
3850
|
+
@load_workflow_config
|
3851
|
+
def execute_run(
|
3852
|
+
self,
|
3853
|
+
submission_idx: int,
|
3854
|
+
block_act_key: BlockActionKey,
|
3855
|
+
run_ID: int,
|
3856
|
+
) -> None:
|
3857
|
+
"""Execute commands of a run via a subprocess."""
|
3858
|
+
|
3859
|
+
# CD to submission tmp dir to ensure std streams and exceptions have somewhere
|
3860
|
+
# sensible to go:
|
3861
|
+
os.chdir(Submission.get_tmp_path(self.submissions_path, submission_idx))
|
3862
|
+
|
3863
|
+
sub_str_path = Submission.get_app_std_path(self.submissions_path, submission_idx)
|
3864
|
+
run_std_path = sub_str_path / f"{str(run_ID)}.txt" # TODO: refactor
|
3865
|
+
has_commands = False
|
3866
|
+
|
3867
|
+
# redirect (as much as possible) app-generated stdout/err to a dedicated file:
|
3868
|
+
with redirect_std_to_file(run_std_path):
|
3869
|
+
with self._store.cached_load():
|
3870
|
+
js_idx = cast("int", block_act_key[0])
|
3871
|
+
run = self.get_EARs_from_IDs([run_ID])[0]
|
3872
|
+
run_dir = None
|
3873
|
+
if run.action.requires_dir:
|
3874
|
+
run_dir = run.get_directory()
|
3875
|
+
assert run_dir
|
3876
|
+
self._app.submission_logger.debug(
|
3877
|
+
f"changing directory to run execution directory: {run_dir}."
|
3129
3878
|
)
|
3130
|
-
|
3131
|
-
|
3132
|
-
|
3133
|
-
|
3134
|
-
|
3135
|
-
|
3136
|
-
|
3137
|
-
|
3138
|
-
|
3879
|
+
os.chdir(run_dir)
|
3880
|
+
self._app.submission_logger.debug(f"{run.skip=}; {run.skip_reason=}")
|
3881
|
+
|
3882
|
+
# check if we should skip:
|
3883
|
+
if not run.skip:
|
3884
|
+
|
3885
|
+
try:
|
3886
|
+
with run.raise_on_failure_threshold() as unset_params:
|
3887
|
+
if run.action.script:
|
3888
|
+
run.write_script_input_files(block_act_key)
|
3889
|
+
|
3890
|
+
# write the command file that will be executed:
|
3891
|
+
cmd_file_path = self.ensure_commands_file(
|
3892
|
+
submission_idx, js_idx, run
|
3893
|
+
)
|
3894
|
+
|
3895
|
+
except UnsetParameterDataErrorBase:
|
3896
|
+
# not all required parameter data is set, so fail this run:
|
3897
|
+
self._app.submission_logger.debug(
|
3898
|
+
f"unset parameter threshold satisfied (or any unset "
|
3899
|
+
f"parameters found when trying to write commands file), so "
|
3900
|
+
f"not attempting run. unset_params={unset_params!r}."
|
3901
|
+
)
|
3902
|
+
self.set_EAR_start(run_ID, run_dir, port_number=None)
|
3903
|
+
self._check_loop_termination(run) # not sure if this is required
|
3904
|
+
self.set_EAR_end(
|
3905
|
+
block_act_key=block_act_key,
|
3906
|
+
run=run,
|
3907
|
+
exit_code=1,
|
3908
|
+
)
|
3909
|
+
return
|
3910
|
+
|
3911
|
+
# sufficient parameter data is set so far, but need to pass `unset_params`
|
3912
|
+
# on as an environment variable so it can be appended to and failure
|
3913
|
+
# thresholds can be rechecked if necessary (i.e. in a Python script
|
3914
|
+
# where we also load input parameters "directly")
|
3915
|
+
if unset_params:
|
3916
|
+
self._app.submission_logger.debug(
|
3917
|
+
f"some unset parameters found, but no unset-thresholds met: "
|
3918
|
+
f"unset_params={unset_params!r}."
|
3919
|
+
)
|
3920
|
+
|
3921
|
+
# TODO: pass on unset_params to script as environment variable
|
3922
|
+
|
3923
|
+
if has_commands := bool(cmd_file_path):
|
3924
|
+
|
3925
|
+
assert isinstance(cmd_file_path, Path)
|
3926
|
+
if not cmd_file_path.is_file():
|
3927
|
+
raise RuntimeError(
|
3928
|
+
f"Command file {cmd_file_path!r} does not exist."
|
3929
|
+
)
|
3930
|
+
# prepare subprocess command:
|
3931
|
+
jobscript = self.submissions[submission_idx].jobscripts[js_idx]
|
3932
|
+
cmd = jobscript.shell.get_command_file_launch_command(
|
3933
|
+
str(cmd_file_path)
|
3934
|
+
)
|
3935
|
+
loop_idx_str = ";".join(
|
3936
|
+
f"{k}={v}" for k, v in run.element_iteration.loop_idx.items()
|
3937
|
+
)
|
3938
|
+
app_caps = self._app.package_name.upper()
|
3939
|
+
|
3940
|
+
# TODO: make these optionally set (more difficult to set in combine_script,
|
3941
|
+
# so have the option to turn off) [default ON]
|
3942
|
+
add_env = {
|
3943
|
+
f"{app_caps}_RUN_ID": str(run_ID),
|
3944
|
+
f"{app_caps}_RUN_IDX": str(run.index),
|
3945
|
+
f"{app_caps}_ELEMENT_IDX": str(run.element.index),
|
3946
|
+
f"{app_caps}_ELEMENT_ID": str(run.element.id_),
|
3947
|
+
f"{app_caps}_ELEMENT_ITER_IDX": str(
|
3948
|
+
run.element_iteration.index
|
3949
|
+
),
|
3950
|
+
f"{app_caps}_ELEMENT_ITER_ID": str(run.element_iteration.id_),
|
3951
|
+
f"{app_caps}_ELEMENT_ITER_LOOP_IDX": loop_idx_str,
|
3952
|
+
}
|
3953
|
+
|
3954
|
+
if run.action.script:
|
3955
|
+
if run.is_snippet_script:
|
3956
|
+
script_artifact_name = run.get_script_artifact_name()
|
3957
|
+
script_dir = Path(
|
3958
|
+
os.environ[f"{app_caps}_SUB_SCRIPTS_DIR"]
|
3959
|
+
)
|
3960
|
+
script_name = script_artifact_name
|
3961
|
+
else:
|
3962
|
+
# not a snippet script; expect the script in the run execute
|
3963
|
+
# directory (i.e. created by a previous action)
|
3964
|
+
script_dir = Path.cwd()
|
3965
|
+
script_name = run.action.script
|
3966
|
+
script_name_no_ext = Path(script_name).stem
|
3967
|
+
add_env.update(
|
3968
|
+
{
|
3969
|
+
f"{app_caps}_RUN_SCRIPT_NAME": script_name,
|
3970
|
+
f"{app_caps}_RUN_SCRIPT_NAME_NO_EXT": script_name_no_ext,
|
3971
|
+
f"{app_caps}_RUN_SCRIPT_DIR": str(script_dir),
|
3972
|
+
f"{app_caps}_RUN_SCRIPT_PATH": str(
|
3973
|
+
script_dir / script_name
|
3974
|
+
),
|
3975
|
+
}
|
3976
|
+
)
|
3977
|
+
|
3978
|
+
env = {**dict(os.environ), **add_env}
|
3979
|
+
|
3980
|
+
self._app.submission_logger.debug(
|
3981
|
+
f"Executing run commands via subprocess with command {cmd!r}, and "
|
3982
|
+
f"environment variables as below."
|
3983
|
+
)
|
3984
|
+
for k, v in env.items():
|
3985
|
+
if k.startswith(app_caps):
|
3986
|
+
self._app.submission_logger.debug(f"{k} = {v!r}")
|
3987
|
+
exe = self._app.Executor(cmd, env, self._app.package_name)
|
3988
|
+
port = (
|
3989
|
+
exe.start_zmq_server()
|
3990
|
+
) # start the server so we know the port
|
3991
|
+
|
3992
|
+
try:
|
3993
|
+
self.set_EAR_start(run_ID, run_dir, port)
|
3994
|
+
except:
|
3995
|
+
self._app.submission_logger.error(f"Failed to set run start.")
|
3996
|
+
exe.stop_zmq_server()
|
3997
|
+
raise
|
3998
|
+
|
3999
|
+
# this subprocess may include commands that redirect to the std_stream file (e.g.
|
4000
|
+
# calling the app to save a parameter from a shell command output):
|
4001
|
+
if not run.skip and has_commands:
|
4002
|
+
ret_code = exe.run() # this also shuts down the server
|
4003
|
+
|
4004
|
+
# redirect (as much as possible) app-generated stdout/err to a dedicated file:
|
4005
|
+
with redirect_std_to_file(run_std_path):
|
4006
|
+
if run.skip:
|
4007
|
+
ret_code = SKIPPED_EXIT_CODE
|
4008
|
+
elif not has_commands:
|
4009
|
+
ret_code = NO_COMMANDS_EXIT_CODE
|
4010
|
+
else:
|
4011
|
+
self._check_loop_termination(run)
|
4012
|
+
|
4013
|
+
# set run end:
|
4014
|
+
self.set_EAR_end(
|
4015
|
+
block_act_key=block_act_key,
|
4016
|
+
run=run,
|
4017
|
+
exit_code=ret_code,
|
3139
4018
|
)
|
3140
|
-
|
3141
|
-
|
3142
|
-
|
3143
|
-
|
3144
|
-
|
3145
|
-
|
3146
|
-
|
3147
|
-
|
4019
|
+
|
4020
|
+
def _check_loop_termination(self, run: ElementActionRun) -> set[int]:
|
4021
|
+
"""Check if we need to terminate a loop if this is the last action of the loop
|
4022
|
+
iteration for this element, and set downstream iteration runs to skip."""
|
4023
|
+
|
4024
|
+
elem_iter = run.element_iteration
|
4025
|
+
task = elem_iter.task
|
4026
|
+
check_loops = []
|
4027
|
+
to_skip = set()
|
4028
|
+
for loop_name in elem_iter.loop_idx:
|
4029
|
+
self._app.logger.info(f"checking loop termination of loop {loop_name!r}.")
|
4030
|
+
loop = self.loops.get(loop_name)
|
4031
|
+
if (
|
4032
|
+
loop.template.termination
|
4033
|
+
and task.insert_ID == loop.template.termination_task_insert_ID
|
4034
|
+
and run.element_action.action_idx == max(elem_iter.actions)
|
4035
|
+
):
|
4036
|
+
check_loops.append(loop_name)
|
4037
|
+
# TODO: test with condition actions
|
4038
|
+
if loop.test_termination(elem_iter):
|
4039
|
+
self._app.logger.info(
|
4040
|
+
f"loop {loop_name!r} termination condition met for run "
|
4041
|
+
f"ID {run.id_!r}."
|
3148
4042
|
)
|
3149
|
-
|
3150
|
-
|
3151
|
-
|
4043
|
+
to_skip.update(loop.skip_downstream_iterations(elem_iter))
|
4044
|
+
return to_skip
|
4045
|
+
|
4046
|
+
@load_workflow_config
|
4047
|
+
def execute_combined_runs(self, submission_idx: int, jobscript_idx: int) -> None:
|
4048
|
+
"""Execute a combined script (multiple runs) via a subprocess."""
|
4049
|
+
|
4050
|
+
# CD to submission tmp dir to ensure std streams and exceptions have somewhere
|
4051
|
+
# sensible to go:
|
4052
|
+
os.chdir(Submission.get_tmp_path(self.submissions_path, submission_idx))
|
4053
|
+
|
4054
|
+
sub = self.submissions[submission_idx]
|
4055
|
+
js = sub.jobscripts[jobscript_idx]
|
4056
|
+
|
4057
|
+
app_caps = self._app.package_name.upper()
|
4058
|
+
script_dir = Path(os.environ[f"{app_caps}_SUB_SCRIPTS_DIR"])
|
4059
|
+
script_name = f"js_{jobscript_idx}.py" # TODO: refactor script name
|
4060
|
+
script_path = script_dir / script_name
|
4061
|
+
|
4062
|
+
add_env = {
|
4063
|
+
f"{app_caps}_RUN_SCRIPT_NAME": script_name,
|
4064
|
+
f"{app_caps}_RUN_SCRIPT_NAME_NO_EXT": script_path.stem,
|
4065
|
+
f"{app_caps}_RUN_SCRIPT_DIR": str(script_dir),
|
4066
|
+
f"{app_caps}_RUN_SCRIPT_PATH": str(script_path),
|
4067
|
+
f"{app_caps}_SCRIPT_INDICES_FILE": str(js.combined_script_indices_file_path),
|
4068
|
+
}
|
4069
|
+
env = {**dict(os.environ), **add_env}
|
4070
|
+
|
4071
|
+
# note: unlike in `Workflow.execute_run`, here we can be reasonably sure the
|
4072
|
+
# commands file already exists, because we call `Action.try_write_commands` with
|
4073
|
+
# `raise_on_unset=True` in `Workflow._add_submission` during submission.
|
4074
|
+
|
4075
|
+
# TODO: refactor cmd file name:
|
4076
|
+
cmd_file_path = sub.commands_path / f"js_{jobscript_idx}{js.shell.JS_EXT}"
|
4077
|
+
cmd = js.shell.get_command_file_launch_command(str(cmd_file_path))
|
3152
4078
|
|
3153
|
-
|
4079
|
+
self._app.submission_logger.debug(
|
4080
|
+
f"Executing combined runs via subprocess with command {cmd!r}, and "
|
4081
|
+
f"environment variables as below."
|
4082
|
+
)
|
4083
|
+
for k, v in env.items():
|
4084
|
+
if k.startswith(app_caps):
|
4085
|
+
self._app.submission_logger.debug(f"{k} = {v}")
|
4086
|
+
|
4087
|
+
exe = self._app.Executor(cmd, env, self._app.package_name)
|
4088
|
+
exe.start_zmq_server() # start the server
|
4089
|
+
exe.run() # this also shuts down the server
|
4090
|
+
|
4091
|
+
def ensure_commands_file(
|
3154
4092
|
self,
|
3155
4093
|
submission_idx: int,
|
3156
|
-
|
3157
|
-
|
3158
|
-
|
3159
|
-
|
3160
|
-
"
|
4094
|
+
js_idx: int,
|
4095
|
+
run: ElementActionRun,
|
4096
|
+
) -> Path | bool:
|
4097
|
+
"""Ensure a commands file exists for the specified run."""
|
4098
|
+
self._app.persistence_logger.debug("Workflow.ensure_commands_file")
|
4099
|
+
|
4100
|
+
if run.commands_file_ID is None:
|
4101
|
+
# no commands to write
|
4102
|
+
return False
|
4103
|
+
|
3161
4104
|
with self._store.cached_load():
|
3162
|
-
self.
|
3163
|
-
|
3164
|
-
|
3165
|
-
|
3166
|
-
|
3167
|
-
|
3168
|
-
|
3169
|
-
|
3170
|
-
|
3171
|
-
|
3172
|
-
|
3173
|
-
|
3174
|
-
|
3175
|
-
|
3176
|
-
|
4105
|
+
sub = self.submissions[submission_idx]
|
4106
|
+
jobscript = sub.jobscripts[js_idx]
|
4107
|
+
|
4108
|
+
# check if a commands file already exists, first checking using the run ID:
|
4109
|
+
cmd_file_name = f"{run.id_}{jobscript.shell.JS_EXT}" # TODO: refactor
|
4110
|
+
cmd_file_path = jobscript.submission.commands_path / cmd_file_name
|
4111
|
+
|
4112
|
+
if not cmd_file_path.is_file():
|
4113
|
+
# then check for a file from the "root" run ID (the run ID of a run that
|
4114
|
+
# shares the same commands file):
|
4115
|
+
|
4116
|
+
cmd_file_name = (
|
4117
|
+
f"{run.commands_file_ID}{jobscript.shell.JS_EXT}" # TODO: refactor
|
4118
|
+
)
|
4119
|
+
cmd_file_path = jobscript.submission.commands_path / cmd_file_name
|
4120
|
+
|
4121
|
+
if not cmd_file_path.is_file():
|
4122
|
+
# no file available, so write (using the run ID):
|
4123
|
+
try:
|
4124
|
+
cmd_file_path = run.try_write_commands(
|
4125
|
+
jobscript=jobscript,
|
4126
|
+
environments=sub.environments,
|
4127
|
+
raise_on_unset=True,
|
4128
|
+
)
|
4129
|
+
except OutputFileParserNoOutputError:
|
4130
|
+
# no commands to write, might be used just for saving files
|
4131
|
+
return False
|
4132
|
+
|
4133
|
+
return cmd_file_path
|
3177
4134
|
|
3178
4135
|
def process_shell_parameter_output(
|
3179
4136
|
self, name: str, value: str, EAR_ID: int, cmd_idx: int, stderr: bool = False
|
@@ -3257,9 +4214,11 @@ class Workflow(AppAware):
|
|
3257
4214
|
input_source.task_ref = uniq_names_cur[input_source.task_ref]
|
3258
4215
|
except KeyError:
|
3259
4216
|
raise InvalidInputSourceTaskReference(
|
3260
|
-
|
4217
|
+
f"Input source {input_source.to_string()!r} refers to a missing "
|
4218
|
+
f"or inaccessible task: {input_source.task_ref!r}."
|
3261
4219
|
)
|
3262
4220
|
|
4221
|
+
@TimeIt.decorator
|
3263
4222
|
def get_all_submission_run_IDs(self) -> Iterable[int]:
|
3264
4223
|
"""
|
3265
4224
|
Get the run IDs of all submissions.
|
@@ -3268,68 +4227,6 @@ class Workflow(AppAware):
|
|
3268
4227
|
for sub in self.submissions:
|
3269
4228
|
yield from sub.all_EAR_IDs
|
3270
4229
|
|
3271
|
-
def check_loop_termination(self, loop_name: str, run_ID: int) -> None:
|
3272
|
-
"""Check if a loop should terminate, given the specified completed run, and if so,
|
3273
|
-
set downstream iteration runs to be skipped."""
|
3274
|
-
loop = self.loops.get(loop_name)
|
3275
|
-
elem_iter = self.get_EARs_from_IDs(run_ID).element_iteration
|
3276
|
-
if loop.test_termination(elem_iter):
|
3277
|
-
# run IDs of downstream iterations that can be skipped
|
3278
|
-
to_skip: set[int] = set()
|
3279
|
-
elem_id = elem_iter.element.id_
|
3280
|
-
loop_map = self.get_loop_map() # over all jobscripts
|
3281
|
-
for iter_idx, iter_dat in loop_map[loop_name][elem_id].items():
|
3282
|
-
if iter_idx > elem_iter.index:
|
3283
|
-
to_skip.update(itr_d.id_ for itr_d in iter_dat)
|
3284
|
-
self._app.logger.info(
|
3285
|
-
f"Loop {loop_name!r} termination condition met for run_ID {run_ID!r}."
|
3286
|
-
)
|
3287
|
-
for run_ID in to_skip:
|
3288
|
-
self.set_EAR_skip(run_ID)
|
3289
|
-
|
3290
|
-
def get_loop_map(
|
3291
|
-
self, id_lst: Iterable[int] | None = None
|
3292
|
-
) -> Mapping[str, Mapping[int, Mapping[int, Sequence[_IterationData]]]]:
|
3293
|
-
"""
|
3294
|
-
Get a description of what is going on with looping.
|
3295
|
-
"""
|
3296
|
-
# TODO: test this works across multiple jobscripts
|
3297
|
-
self._app.persistence_logger.debug("Workflow.get_loop_map")
|
3298
|
-
if id_lst is None:
|
3299
|
-
id_lst = self.get_all_submission_run_IDs()
|
3300
|
-
loop_map: dict[str, dict[int, dict[int, list[_IterationData]]]] = defaultdict(
|
3301
|
-
lambda: defaultdict(lambda: defaultdict(list))
|
3302
|
-
)
|
3303
|
-
for EAR in self.get_EARs_from_IDs(id_lst):
|
3304
|
-
for loop_name, iter_idx in EAR.element_iteration.loop_idx.items():
|
3305
|
-
act_idx = EAR.element_action.action_idx
|
3306
|
-
loop_map[loop_name][EAR.element.id_][iter_idx].append(
|
3307
|
-
_IterationData(EAR.id_, act_idx)
|
3308
|
-
)
|
3309
|
-
return loop_map
|
3310
|
-
|
3311
|
-
def get_iteration_final_run_IDs(
|
3312
|
-
self,
|
3313
|
-
id_lst: Iterable[int] | None = None,
|
3314
|
-
) -> Mapping[str, Sequence[int]]:
|
3315
|
-
"""Retrieve the run IDs of those runs that correspond to the final action within
|
3316
|
-
a named loop iteration.
|
3317
|
-
|
3318
|
-
These runs represent the final action of a given element-iteration; this is used to
|
3319
|
-
identify which commands file to append a loop-termination check to.
|
3320
|
-
"""
|
3321
|
-
self._app.persistence_logger.debug("Workflow.get_iteration_final_run_IDs")
|
3322
|
-
|
3323
|
-
loop_map = self.get_loop_map(id_lst)
|
3324
|
-
|
3325
|
-
# find final EARs for each loop:
|
3326
|
-
final_runs: dict[str, list[int]] = defaultdict(list)
|
3327
|
-
for loop_name, dat in loop_map.items():
|
3328
|
-
for elem_dat in dat.values():
|
3329
|
-
for iter_dat in elem_dat.values():
|
3330
|
-
final_runs[loop_name].append(max(iter_dat, key=lambda x: x.idx).id_)
|
3331
|
-
return final_runs
|
3332
|
-
|
3333
4230
|
def rechunk_runs(
|
3334
4231
|
self,
|
3335
4232
|
chunk_size: int | None = None,
|
@@ -3348,7 +4245,7 @@ class Workflow(AppAware):
|
|
3348
4245
|
status: bool = True,
|
3349
4246
|
):
|
3350
4247
|
"""
|
3351
|
-
Reorganise the stored data chunks for
|
4248
|
+
Reorganise the stored data chunks for parameters to be more efficient.
|
3352
4249
|
"""
|
3353
4250
|
self._store.rechunk_parameter_base(
|
3354
4251
|
chunk_size=chunk_size, backup=backup, status=status
|
@@ -3366,6 +4263,311 @@ class Workflow(AppAware):
|
|
3366
4263
|
self.rechunk_runs(chunk_size=chunk_size, backup=backup, status=status)
|
3367
4264
|
self.rechunk_parameter_base(chunk_size=chunk_size, backup=backup, status=status)
|
3368
4265
|
|
4266
|
+
@TimeIt.decorator
|
4267
|
+
def get_run_directories(
|
4268
|
+
self,
|
4269
|
+
run_ids: list[int] | None = None,
|
4270
|
+
dir_indices_arr: np.ndarray | None = None,
|
4271
|
+
) -> list[Path | None]:
|
4272
|
+
""""""
|
4273
|
+
|
4274
|
+
@TimeIt.decorator
|
4275
|
+
def _get_depth_dirs(
|
4276
|
+
item_idx: int,
|
4277
|
+
max_per_dir: int,
|
4278
|
+
max_depth: int,
|
4279
|
+
depth_idx_cache: dict[tuple[int, int], NDArray],
|
4280
|
+
prefix: str,
|
4281
|
+
) -> list[str]:
|
4282
|
+
dirs = []
|
4283
|
+
max_avail_items = max_per_dir**max_depth
|
4284
|
+
for depth_i in range(1, max_depth):
|
4285
|
+
tot_items_per_level = int(max_avail_items / max_per_dir**depth_i)
|
4286
|
+
key = (max_avail_items, tot_items_per_level)
|
4287
|
+
if (depth_idx := depth_idx_cache.get(key)) is None:
|
4288
|
+
depth_idx = np.repeat(
|
4289
|
+
np.arange(max_avail_items / tot_items_per_level, dtype=int),
|
4290
|
+
tot_items_per_level,
|
4291
|
+
)
|
4292
|
+
depth_idx_cache[key] = depth_idx
|
4293
|
+
idx_i = cast("NDArray", depth_idx)[item_idx]
|
4294
|
+
start_idx = idx_i * tot_items_per_level
|
4295
|
+
end_idx = start_idx + tot_items_per_level - 1
|
4296
|
+
dirs.append(f"{prefix}_{start_idx}-{end_idx}")
|
4297
|
+
return dirs
|
4298
|
+
|
4299
|
+
if dir_indices_arr is None: # TODO: document behaviour!
|
4300
|
+
dir_indices_arr = self._store.get_dirs_array()
|
4301
|
+
if run_ids is not None:
|
4302
|
+
dir_indices_arr = dir_indices_arr[run_ids]
|
4303
|
+
|
4304
|
+
# TODO: make these configurable so easier to test!
|
4305
|
+
MAX_ELEMS_PER_DIR = 1000 # TODO: configurable (add `workflow_defaults` to Config)
|
4306
|
+
MAX_ITERS_PER_DIR = 1000
|
4307
|
+
|
4308
|
+
exec_path = self.execution_path
|
4309
|
+
|
4310
|
+
# a fill value means no sub directory should be created
|
4311
|
+
T_FILL, E_FILL, I_FILL, A_FILL, R_FILL, _, _ = RUN_DIR_ARR_FILL
|
4312
|
+
|
4313
|
+
depth_idx_cache: dict[
|
4314
|
+
tuple[int, int], NDArray
|
4315
|
+
] = {} # keys are (max_avail, tot_elems_per_dir_level)
|
4316
|
+
|
4317
|
+
# format run directories:
|
4318
|
+
dirs = []
|
4319
|
+
for dir_data in dir_indices_arr:
|
4320
|
+
|
4321
|
+
# TODO: retrieve task,element,iteration,action,run dir formats from
|
4322
|
+
# (t_iID, act_idx) combo (cached)?
|
4323
|
+
|
4324
|
+
t_iID, e_idx, i_idx, _, r_idx, e_depth, i_depth = dir_data
|
4325
|
+
path_args = []
|
4326
|
+
|
4327
|
+
if t_iID != T_FILL:
|
4328
|
+
path_args.append(f"t_{t_iID}")
|
4329
|
+
|
4330
|
+
if e_idx != E_FILL:
|
4331
|
+
if e_depth > 1:
|
4332
|
+
path_args.extend(
|
4333
|
+
_get_depth_dirs(
|
4334
|
+
item_idx=e_idx,
|
4335
|
+
max_per_dir=MAX_ELEMS_PER_DIR,
|
4336
|
+
max_depth=e_depth,
|
4337
|
+
depth_idx_cache=depth_idx_cache,
|
4338
|
+
prefix="e",
|
4339
|
+
)
|
4340
|
+
)
|
4341
|
+
path_args.append(f"e_{e_idx}")
|
4342
|
+
|
4343
|
+
if i_idx != I_FILL:
|
4344
|
+
if i_depth > 1:
|
4345
|
+
path_args.extend(
|
4346
|
+
_get_depth_dirs(
|
4347
|
+
item_idx=i_idx,
|
4348
|
+
max_per_dir=MAX_ITERS_PER_DIR,
|
4349
|
+
max_depth=i_depth,
|
4350
|
+
depth_idx_cache=depth_idx_cache,
|
4351
|
+
prefix="i",
|
4352
|
+
)
|
4353
|
+
)
|
4354
|
+
path_args.append(f"i_{i_idx}")
|
4355
|
+
|
4356
|
+
if r_idx != R_FILL:
|
4357
|
+
path_args.append(f"r_{r_idx}")
|
4358
|
+
|
4359
|
+
if path_args:
|
4360
|
+
run_dir = exec_path.joinpath(*path_args)
|
4361
|
+
elif e_depth == 1:
|
4362
|
+
run_dir = exec_path
|
4363
|
+
else:
|
4364
|
+
run_dir = None
|
4365
|
+
|
4366
|
+
dirs.append(run_dir)
|
4367
|
+
|
4368
|
+
return dirs
|
4369
|
+
|
4370
|
+
@TimeIt.decorator
|
4371
|
+
def get_scheduler_job_IDs(self) -> tuple[str, ...]:
|
4372
|
+
"""Return jobscript scheduler job IDs from all submissions of this workflow."""
|
4373
|
+
return tuple(
|
4374
|
+
IDs_j for sub_i in self.submissions for IDs_j in sub_i.get_scheduler_job_IDs()
|
4375
|
+
)
|
4376
|
+
|
4377
|
+
@TimeIt.decorator
|
4378
|
+
def get_process_IDs(self) -> tuple[int, ...]:
|
4379
|
+
"""Return jobscript process IDs from all submissions of this workflow."""
|
4380
|
+
return tuple(
|
4381
|
+
IDs_j for sub_i in self.submissions for IDs_j in sub_i.get_process_IDs()
|
4382
|
+
)
|
4383
|
+
|
4384
|
+
@TimeIt.decorator
|
4385
|
+
def list_jobscripts(
|
4386
|
+
self,
|
4387
|
+
sub_idx: int = 0,
|
4388
|
+
max_js: int | None = None,
|
4389
|
+
jobscripts: list[int] | None = None,
|
4390
|
+
width: int | None = None,
|
4391
|
+
) -> None:
|
4392
|
+
"""Print a table listing jobscripts and associated information from the specified
|
4393
|
+
submission.
|
4394
|
+
|
4395
|
+
Parameters
|
4396
|
+
----------
|
4397
|
+
sub_idx
|
4398
|
+
The submission index whose jobscripts are to be displayed.
|
4399
|
+
max_js
|
4400
|
+
Maximum jobscript index to display. This cannot be specified with `jobscripts`.
|
4401
|
+
jobscripts
|
4402
|
+
A list of jobscripts to display. This cannot be specified with `max_js`.
|
4403
|
+
width
|
4404
|
+
Width in characters of the printed table.
|
4405
|
+
"""
|
4406
|
+
|
4407
|
+
with self._store.cached_load():
|
4408
|
+
|
4409
|
+
if max_js is not None and jobscripts is not None:
|
4410
|
+
raise ValueError("Do not specify both `max_js` and `jobscripts`.")
|
4411
|
+
|
4412
|
+
loop_names = [i.name for i in self.loops][::-1]
|
4413
|
+
loop_names_panel: rich.panel.Panel | str = ""
|
4414
|
+
if loop_names:
|
4415
|
+
loop_names_panel = rich.panel.Panel(
|
4416
|
+
"\n".join(f"{idx}: {i}" for idx, i in enumerate(loop_names)),
|
4417
|
+
title="[b]Loops[/b]",
|
4418
|
+
title_align="left",
|
4419
|
+
box=rich.box.SIMPLE,
|
4420
|
+
)
|
4421
|
+
|
4422
|
+
table = rich.table.Table(width=width)
|
4423
|
+
|
4424
|
+
table.add_column("Jobscript", justify="right", style="cyan", no_wrap=True)
|
4425
|
+
table.add_column("Acts, Elms", justify="right", style="green")
|
4426
|
+
table.add_column("Deps.", style="orange3")
|
4427
|
+
table.add_column("Tasks", overflow="fold")
|
4428
|
+
table.add_column("Loops")
|
4429
|
+
|
4430
|
+
sub_js = self.submissions[sub_idx].jobscripts
|
4431
|
+
max_js = max_js if max_js is not None else len(sub_js)
|
4432
|
+
for js in sub_js:
|
4433
|
+
if jobscripts is not None and js.index not in jobscripts:
|
4434
|
+
continue
|
4435
|
+
if js.index > max_js:
|
4436
|
+
break
|
4437
|
+
for blk in js.blocks:
|
4438
|
+
blk_task_actions = blk.task_actions
|
4439
|
+
num_actions = blk_task_actions.shape[0]
|
4440
|
+
|
4441
|
+
if blk.index == 0:
|
4442
|
+
c1 = f"{js.index} - {blk.index}"
|
4443
|
+
else:
|
4444
|
+
c1 = f"{blk.index}"
|
4445
|
+
c3 = f"{num_actions}, {blk.num_elements}"
|
4446
|
+
|
4447
|
+
deps = "; ".join(f"{i[0],i[1]}" for i in blk.dependencies)
|
4448
|
+
|
4449
|
+
for blk_t_idx, t_iID in enumerate(blk.task_insert_IDs):
|
4450
|
+
|
4451
|
+
# loop indices are the same for all actions within a task, so get the
|
4452
|
+
# first `task_action` for this task insert ID:
|
4453
|
+
for i in blk_task_actions:
|
4454
|
+
if i[0] == t_iID:
|
4455
|
+
loop_idx = [
|
4456
|
+
blk.task_loop_idx[i[2]].get(loop_name_i, "-")
|
4457
|
+
for loop_name_i in loop_names
|
4458
|
+
]
|
4459
|
+
break
|
4460
|
+
|
4461
|
+
c2 = self.tasks.get(insert_ID=t_iID).unique_name
|
4462
|
+
|
4463
|
+
if blk_t_idx > 0:
|
4464
|
+
c1 = ""
|
4465
|
+
c3 = ""
|
4466
|
+
deps = ""
|
4467
|
+
|
4468
|
+
table.add_row(
|
4469
|
+
c1, c3, deps, c2, (" | ".join(f"{i}" for i in loop_idx))
|
4470
|
+
)
|
4471
|
+
|
4472
|
+
table.add_section()
|
4473
|
+
|
4474
|
+
group = rich.console.Group(
|
4475
|
+
rich.text.Text(f"Workflow: {self.name}"),
|
4476
|
+
rich.text.Text(f"Submission: {sub_idx}" + ("\n" if loop_names_panel else "")),
|
4477
|
+
loop_names_panel,
|
4478
|
+
table,
|
4479
|
+
)
|
4480
|
+
rich_print(group)
|
4481
|
+
|
4482
|
+
def list_task_jobscripts(
|
4483
|
+
self,
|
4484
|
+
sub_idx: int = 0,
|
4485
|
+
task_names: list[str] | None = None,
|
4486
|
+
max_js: int | None = None,
|
4487
|
+
width: int | None = None,
|
4488
|
+
):
|
4489
|
+
"""Print a table listing the jobscripts associated with the specified (or all)
|
4490
|
+
tasks for the specified submission.
|
4491
|
+
|
4492
|
+
Parameters
|
4493
|
+
----------
|
4494
|
+
sub_idx
|
4495
|
+
The submission index whose jobscripts are to be displayed.
|
4496
|
+
task_names
|
4497
|
+
List of sub-strings to match to task names. Only matching task names will be
|
4498
|
+
included.
|
4499
|
+
max_js
|
4500
|
+
Maximum jobscript index to display.
|
4501
|
+
width
|
4502
|
+
Width in characters of the printed table.
|
4503
|
+
"""
|
4504
|
+
|
4505
|
+
with self._store.cached_load():
|
4506
|
+
loop_names = [i.name for i in self.loops][::-1]
|
4507
|
+
loop_names_panel: rich.panel.Panel | str = ""
|
4508
|
+
if loop_names:
|
4509
|
+
loop_names_panel = rich.panel.Panel(
|
4510
|
+
"\n".join(f"{idx}: {i}" for idx, i in enumerate(loop_names)),
|
4511
|
+
title="[b]Loops[/b]",
|
4512
|
+
title_align="left",
|
4513
|
+
box=rich.box.SIMPLE,
|
4514
|
+
)
|
4515
|
+
|
4516
|
+
sub_js = self.submissions[sub_idx].jobscripts
|
4517
|
+
all_task_names = {i.insert_ID: i.unique_name for i in self.tasks}
|
4518
|
+
|
4519
|
+
# filter task names by those matching the specified names
|
4520
|
+
matched = all_task_names
|
4521
|
+
if task_names:
|
4522
|
+
matched = {
|
4523
|
+
k: v
|
4524
|
+
for k, v in all_task_names.items()
|
4525
|
+
if any(i in v for i in task_names)
|
4526
|
+
}
|
4527
|
+
|
4528
|
+
task_jobscripts = defaultdict(list)
|
4529
|
+
for js in sub_js:
|
4530
|
+
if max_js is not None and js.index > max_js:
|
4531
|
+
break
|
4532
|
+
for blk in js.blocks:
|
4533
|
+
blk_task_actions = blk.task_actions
|
4534
|
+
for i in blk.task_insert_IDs:
|
4535
|
+
if i in matched:
|
4536
|
+
for j in blk_task_actions:
|
4537
|
+
if j[0] == i:
|
4538
|
+
loop_idx = [
|
4539
|
+
blk.task_loop_idx[j[2]].get(loop_name_i, "-")
|
4540
|
+
for loop_name_i in loop_names
|
4541
|
+
]
|
4542
|
+
break
|
4543
|
+
task_jobscripts[i].append((js.index, blk.index, loop_idx))
|
4544
|
+
|
4545
|
+
table = rich.table.Table(width=width)
|
4546
|
+
table.add_column("Task")
|
4547
|
+
table.add_column("Jobscripts", style="cyan", no_wrap=True)
|
4548
|
+
table.add_column("Loops")
|
4549
|
+
for insert_ID_i, jobscripts_i in task_jobscripts.items():
|
4550
|
+
for idx, js_j in enumerate(jobscripts_i):
|
4551
|
+
js_idx, blk_idx, loop_idx = js_j
|
4552
|
+
table.add_row(
|
4553
|
+
matched[insert_ID_i] if idx == 0 else "",
|
4554
|
+
f"({js_idx}, {blk_idx})",
|
4555
|
+
(" | ".join(f"{i}" for i in loop_idx)),
|
4556
|
+
)
|
4557
|
+
table.add_section()
|
4558
|
+
|
4559
|
+
group = rich.console.Group(
|
4560
|
+
rich.text.Text(f"Workflow: {self.name}"),
|
4561
|
+
rich.text.Text(f"Submission: {sub_idx}" + ("\n" if loop_names_panel else "")),
|
4562
|
+
loop_names_panel,
|
4563
|
+
table,
|
4564
|
+
)
|
4565
|
+
rich_print(group)
|
4566
|
+
|
4567
|
+
def get_text_file(self, path: str | Path) -> str:
|
4568
|
+
"""Retrieve the contents of a text file stored within the workflow."""
|
4569
|
+
return self._store.get_text_file(path)
|
4570
|
+
|
3369
4571
|
|
3370
4572
|
@dataclass
|
3371
4573
|
class WorkflowBlueprint:
|