hpcflow-new2 0.2.0a190__py3-none-any.whl → 0.2.0a200__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hpcflow/__pyinstaller/hook-hpcflow.py +1 -0
- hpcflow/_version.py +1 -1
- hpcflow/data/scripts/bad_script.py +2 -0
- hpcflow/data/scripts/do_nothing.py +2 -0
- hpcflow/data/scripts/env_specifier_test/input_file_generator_pass_env_spec.py +4 -0
- hpcflow/data/scripts/env_specifier_test/main_script_test_pass_env_spec.py +8 -0
- hpcflow/data/scripts/env_specifier_test/output_file_parser_pass_env_spec.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v1/input_file_generator_basic.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v1/main_script_test_direct_in_direct_out.py +7 -0
- hpcflow/data/scripts/env_specifier_test/v1/output_file_parser_basic.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v2/main_script_test_direct_in_direct_out.py +7 -0
- hpcflow/data/scripts/input_file_generator_basic.py +3 -0
- hpcflow/data/scripts/input_file_generator_basic_FAIL.py +3 -0
- hpcflow/data/scripts/input_file_generator_test_stdout_stderr.py +8 -0
- hpcflow/data/scripts/main_script_test_direct_in.py +3 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed_group.py +7 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_group_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_group_one_fail_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_hdf5_in_obj_2.py +12 -0
- hpcflow/data/scripts/main_script_test_json_out_FAIL.py +3 -0
- hpcflow/data/scripts/main_script_test_shell_env_vars.py +12 -0
- hpcflow/data/scripts/main_script_test_std_out_std_err.py +6 -0
- hpcflow/data/scripts/output_file_parser_basic.py +3 -0
- hpcflow/data/scripts/output_file_parser_basic_FAIL.py +7 -0
- hpcflow/data/scripts/output_file_parser_test_stdout_stderr.py +8 -0
- hpcflow/data/scripts/script_exit_test.py +5 -0
- hpcflow/data/template_components/environments.yaml +1 -1
- hpcflow/sdk/__init__.py +5 -0
- hpcflow/sdk/app.py +166 -92
- hpcflow/sdk/cli.py +263 -84
- hpcflow/sdk/cli_common.py +99 -5
- hpcflow/sdk/config/callbacks.py +38 -1
- hpcflow/sdk/config/config.py +102 -13
- hpcflow/sdk/config/errors.py +19 -5
- hpcflow/sdk/config/types.py +3 -0
- hpcflow/sdk/core/__init__.py +25 -1
- hpcflow/sdk/core/actions.py +914 -262
- hpcflow/sdk/core/cache.py +76 -34
- hpcflow/sdk/core/command_files.py +14 -128
- hpcflow/sdk/core/commands.py +35 -6
- hpcflow/sdk/core/element.py +122 -50
- hpcflow/sdk/core/errors.py +58 -2
- hpcflow/sdk/core/execute.py +207 -0
- hpcflow/sdk/core/loop.py +408 -50
- hpcflow/sdk/core/loop_cache.py +4 -4
- hpcflow/sdk/core/parameters.py +382 -37
- hpcflow/sdk/core/run_dir_files.py +13 -40
- hpcflow/sdk/core/skip_reason.py +7 -0
- hpcflow/sdk/core/task.py +119 -30
- hpcflow/sdk/core/task_schema.py +68 -0
- hpcflow/sdk/core/test_utils.py +66 -27
- hpcflow/sdk/core/types.py +54 -1
- hpcflow/sdk/core/utils.py +136 -19
- hpcflow/sdk/core/workflow.py +1587 -356
- hpcflow/sdk/data/workflow_spec_schema.yaml +2 -0
- hpcflow/sdk/demo/cli.py +7 -0
- hpcflow/sdk/helper/cli.py +1 -0
- hpcflow/sdk/log.py +42 -15
- hpcflow/sdk/persistence/base.py +405 -53
- hpcflow/sdk/persistence/json.py +177 -52
- hpcflow/sdk/persistence/pending.py +237 -69
- hpcflow/sdk/persistence/store_resource.py +3 -2
- hpcflow/sdk/persistence/types.py +15 -4
- hpcflow/sdk/persistence/zarr.py +928 -81
- hpcflow/sdk/submission/jobscript.py +1408 -489
- hpcflow/sdk/submission/schedulers/__init__.py +40 -5
- hpcflow/sdk/submission/schedulers/direct.py +33 -19
- hpcflow/sdk/submission/schedulers/sge.py +51 -16
- hpcflow/sdk/submission/schedulers/slurm.py +44 -16
- hpcflow/sdk/submission/schedulers/utils.py +7 -2
- hpcflow/sdk/submission/shells/base.py +68 -20
- hpcflow/sdk/submission/shells/bash.py +222 -129
- hpcflow/sdk/submission/shells/powershell.py +200 -150
- hpcflow/sdk/submission/submission.py +852 -119
- hpcflow/sdk/submission/types.py +18 -21
- hpcflow/sdk/typing.py +24 -5
- hpcflow/sdk/utils/arrays.py +71 -0
- hpcflow/sdk/utils/deferred_file.py +55 -0
- hpcflow/sdk/utils/hashing.py +16 -0
- hpcflow/sdk/utils/patches.py +12 -0
- hpcflow/sdk/utils/strings.py +33 -0
- hpcflow/tests/api/test_api.py +32 -0
- hpcflow/tests/conftest.py +19 -0
- hpcflow/tests/data/benchmark_script_runner.yaml +26 -0
- hpcflow/tests/data/multi_path_sequences.yaml +29 -0
- hpcflow/tests/data/workflow_test_run_abort.yaml +34 -35
- hpcflow/tests/schedulers/sge/test_sge_submission.py +36 -0
- hpcflow/tests/scripts/test_input_file_generators.py +282 -0
- hpcflow/tests/scripts/test_main_scripts.py +821 -70
- hpcflow/tests/scripts/test_non_snippet_script.py +46 -0
- hpcflow/tests/scripts/test_ouput_file_parsers.py +353 -0
- hpcflow/tests/shells/wsl/test_wsl_submission.py +6 -0
- hpcflow/tests/unit/test_action.py +176 -0
- hpcflow/tests/unit/test_app.py +20 -0
- hpcflow/tests/unit/test_cache.py +46 -0
- hpcflow/tests/unit/test_cli.py +133 -0
- hpcflow/tests/unit/test_config.py +122 -1
- hpcflow/tests/unit/test_element_iteration.py +47 -0
- hpcflow/tests/unit/test_jobscript_unit.py +757 -0
- hpcflow/tests/unit/test_loop.py +1332 -27
- hpcflow/tests/unit/test_meta_task.py +325 -0
- hpcflow/tests/unit/test_multi_path_sequences.py +229 -0
- hpcflow/tests/unit/test_parameter.py +13 -0
- hpcflow/tests/unit/test_persistence.py +190 -8
- hpcflow/tests/unit/test_run.py +109 -3
- hpcflow/tests/unit/test_run_directories.py +29 -0
- hpcflow/tests/unit/test_shell.py +20 -0
- hpcflow/tests/unit/test_submission.py +5 -76
- hpcflow/tests/unit/test_workflow_template.py +31 -0
- hpcflow/tests/unit/utils/test_arrays.py +40 -0
- hpcflow/tests/unit/utils/test_deferred_file_writer.py +34 -0
- hpcflow/tests/unit/utils/test_hashing.py +65 -0
- hpcflow/tests/unit/utils/test_patches.py +5 -0
- hpcflow/tests/unit/utils/test_redirect_std.py +50 -0
- hpcflow/tests/workflows/__init__.py +0 -0
- hpcflow/tests/workflows/test_directory_structure.py +31 -0
- hpcflow/tests/workflows/test_jobscript.py +332 -0
- hpcflow/tests/workflows/test_run_status.py +198 -0
- hpcflow/tests/workflows/test_skip_downstream.py +696 -0
- hpcflow/tests/workflows/test_submission.py +140 -0
- hpcflow/tests/workflows/test_workflows.py +142 -2
- hpcflow/tests/workflows/test_zip.py +18 -0
- hpcflow/viz_demo.ipynb +6587 -3
- {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a200.dist-info}/METADATA +7 -4
- hpcflow_new2-0.2.0a200.dist-info/RECORD +222 -0
- hpcflow_new2-0.2.0a190.dist-info/RECORD +0 -165
- {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a200.dist-info}/LICENSE +0 -0
- {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a200.dist-info}/WHEEL +0 -0
- {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a200.dist-info}/entry_points.txt +0 -0
hpcflow/sdk/core/workflow.py
CHANGED
@@ -4,28 +4,53 @@ Main workflow model.
|
|
4
4
|
|
5
5
|
from __future__ import annotations
|
6
6
|
from collections import defaultdict
|
7
|
+
from collections.abc import Callable
|
7
8
|
from contextlib import contextmanager, nullcontext
|
8
9
|
import copy
|
9
10
|
from dataclasses import dataclass, field
|
10
11
|
|
12
|
+
from functools import wraps
|
13
|
+
import os
|
11
14
|
from pathlib import Path
|
12
15
|
import random
|
16
|
+
import shutil
|
13
17
|
import string
|
14
18
|
from threading import Thread
|
15
19
|
import time
|
16
|
-
from typing import overload, cast, TYPE_CHECKING
|
20
|
+
from typing import overload, cast, TYPE_CHECKING, TypeVar
|
21
|
+
from typing_extensions import ParamSpec, Concatenate
|
22
|
+
|
17
23
|
from uuid import uuid4
|
18
24
|
from warnings import warn
|
19
25
|
from fsspec.implementations.local import LocalFileSystem # type: ignore
|
20
26
|
from fsspec.implementations.zip import ZipFileSystem # type: ignore
|
21
27
|
import numpy as np
|
22
28
|
from fsspec.core import url_to_fs # type: ignore
|
29
|
+
from rich import print as rich_print
|
23
30
|
import rich.console
|
31
|
+
import rich.panel
|
32
|
+
import rich.table
|
33
|
+
import rich.text
|
34
|
+
import rich.box
|
35
|
+
|
24
36
|
|
37
|
+
from hpcflow.sdk import app
|
25
38
|
from hpcflow.sdk.typing import hydrate
|
26
|
-
from hpcflow.sdk.
|
39
|
+
from hpcflow.sdk.config.errors import (
|
40
|
+
ConfigNonConfigurableError,
|
41
|
+
UnknownMetaTaskConstitutiveSchema,
|
42
|
+
)
|
43
|
+
from hpcflow.sdk.core import (
|
44
|
+
ALL_TEMPLATE_FORMATS,
|
45
|
+
ABORT_EXIT_CODE,
|
46
|
+
RUN_DIR_ARR_FILL,
|
47
|
+
SKIPPED_EXIT_CODE,
|
48
|
+
NO_COMMANDS_EXIT_CODE,
|
49
|
+
)
|
27
50
|
from hpcflow.sdk.core.app_aware import AppAware
|
28
51
|
from hpcflow.sdk.core.enums import EARStatus
|
52
|
+
from hpcflow.sdk.core.skip_reason import SkipReason
|
53
|
+
from hpcflow.sdk.core.cache import ObjectCache
|
29
54
|
from hpcflow.sdk.core.loop_cache import LoopCache, LoopIndex
|
30
55
|
from hpcflow.sdk.log import TimeIt
|
31
56
|
from hpcflow.sdk.persistence import store_cls_from_str
|
@@ -35,18 +60,22 @@ from hpcflow.sdk.persistence.utils import ask_pw_on_auth_exc, infer_store
|
|
35
60
|
from hpcflow.sdk.submission.jobscript import (
|
36
61
|
generate_EAR_resource_map,
|
37
62
|
group_resource_map_into_jobscripts,
|
38
|
-
|
63
|
+
is_jobscript_array,
|
39
64
|
merge_jobscripts_across_tasks,
|
65
|
+
resolve_jobscript_blocks,
|
40
66
|
resolve_jobscript_dependencies,
|
41
67
|
)
|
42
68
|
from hpcflow.sdk.submission.enums import JobscriptElementState
|
43
69
|
from hpcflow.sdk.submission.schedulers.direct import DirectScheduler
|
70
|
+
from hpcflow.sdk.submission.submission import Submission
|
44
71
|
from hpcflow.sdk.core.json_like import ChildObjectSpec, JSONLike
|
72
|
+
from hpcflow.sdk.utils.strings import shorten_list_str
|
45
73
|
from hpcflow.sdk.core.utils import (
|
46
74
|
read_JSON_file,
|
47
75
|
read_JSON_string,
|
48
76
|
read_YAML_str,
|
49
77
|
read_YAML_file,
|
78
|
+
redirect_std_to_file,
|
50
79
|
replace_items,
|
51
80
|
current_timestamp,
|
52
81
|
normalise_timestamp,
|
@@ -58,6 +87,7 @@ from hpcflow.sdk.core.errors import (
|
|
58
87
|
OutputFileParserNoOutputError,
|
59
88
|
RunNotAbortableError,
|
60
89
|
SubmissionFailure,
|
90
|
+
UnsetParameterDataErrorBase,
|
61
91
|
WorkflowSubmissionFailure,
|
62
92
|
)
|
63
93
|
|
@@ -70,7 +100,7 @@ if TYPE_CHECKING:
|
|
70
100
|
import psutil
|
71
101
|
from rich.status import Status
|
72
102
|
from ..typing import DataIndex, ParamSource, PathLike, TemplateComponents
|
73
|
-
from .actions import ElementActionRun
|
103
|
+
from .actions import ElementActionRun, UnsetParamTracker
|
74
104
|
from .element import Element, ElementIteration
|
75
105
|
from .loop import Loop, WorkflowLoop
|
76
106
|
from .object_list import ObjectList, ResourceList, WorkflowLoopList, WorkflowTaskList
|
@@ -82,6 +112,8 @@ if TYPE_CHECKING:
|
|
82
112
|
Pending,
|
83
113
|
Resources,
|
84
114
|
WorkflowTemplateTaskData,
|
115
|
+
WorkflowTemplateElementSetData,
|
116
|
+
BlockActionKey,
|
85
117
|
)
|
86
118
|
from ..submission.submission import Submission
|
87
119
|
from ..submission.jobscript import (
|
@@ -97,10 +129,15 @@ if TYPE_CHECKING:
|
|
97
129
|
StoreEAR,
|
98
130
|
)
|
99
131
|
from ..persistence.types import TemplateMeta
|
132
|
+
from .json_like import JSONed
|
100
133
|
|
101
134
|
#: Convenience alias
|
102
135
|
_TemplateComponents: TypeAlias = "dict[str, ObjectList[JSONLike]]"
|
103
136
|
|
137
|
+
P = ParamSpec("P")
|
138
|
+
T = TypeVar("T")
|
139
|
+
S = TypeVar("S", bound="Workflow")
|
140
|
+
|
104
141
|
|
105
142
|
@dataclass
|
106
143
|
class _Pathway:
|
@@ -202,6 +239,7 @@ class WorkflowTemplate(JSONLike):
|
|
202
239
|
workflow: Workflow | None = None
|
203
240
|
#: Template-level resources to apply to all tasks as default values.
|
204
241
|
resources: Resources = None
|
242
|
+
config: dict = field(default_factory=lambda: {})
|
205
243
|
#: The execution environments to use.
|
206
244
|
environments: Mapping[str, Mapping[str, Any]] | None = None
|
207
245
|
#: The environment presets to use.
|
@@ -216,6 +254,34 @@ class WorkflowTemplate(JSONLike):
|
|
216
254
|
merge_envs: bool = True
|
217
255
|
|
218
256
|
def __post_init__(self) -> None:
|
257
|
+
|
258
|
+
# TODO: in what scenario is the reindex required? are loops initialised?
|
259
|
+
|
260
|
+
# replace metatasks with tasks
|
261
|
+
new_tasks: list[Task] = []
|
262
|
+
do_reindex = False
|
263
|
+
reindex = {}
|
264
|
+
for task_idx, i in enumerate(self.tasks):
|
265
|
+
if isinstance(i, app.MetaTask):
|
266
|
+
do_reindex = True
|
267
|
+
tasks_from_meta = copy.deepcopy(i.tasks)
|
268
|
+
reindex[task_idx] = [
|
269
|
+
len(new_tasks) + i for i in range(len(tasks_from_meta))
|
270
|
+
]
|
271
|
+
new_tasks.extend(tasks_from_meta)
|
272
|
+
else:
|
273
|
+
reindex[task_idx] = [len(new_tasks)]
|
274
|
+
new_tasks.append(i)
|
275
|
+
if do_reindex:
|
276
|
+
if self.loops:
|
277
|
+
for loop_idx, loop in enumerate(cast("list[dict[str, Any]]", self.loops)):
|
278
|
+
loop["tasks"] = [j for i in loop["tasks"] for j in reindex[i]]
|
279
|
+
term_task = loop.get("termination_task")
|
280
|
+
if term_task is not None:
|
281
|
+
loop["termination_task"] = reindex[term_task][0]
|
282
|
+
|
283
|
+
self.tasks = new_tasks
|
284
|
+
|
219
285
|
resources = self._app.ResourceList.normalise(self.resources)
|
220
286
|
self.resources = resources
|
221
287
|
self._set_parent_refs()
|
@@ -235,6 +301,13 @@ class WorkflowTemplate(JSONLike):
|
|
235
301
|
if self.doc and not isinstance(self.doc, list):
|
236
302
|
self.doc = [self.doc]
|
237
303
|
|
304
|
+
if self.config:
|
305
|
+
# don't do a full validation (which would require loading the config file),
|
306
|
+
# just check all specified keys are configurable:
|
307
|
+
bad_keys = set(self.config) - set(self._app.config_options._configurable_keys)
|
308
|
+
if bad_keys:
|
309
|
+
raise ConfigNonConfigurableError(name=bad_keys)
|
310
|
+
|
238
311
|
@property
|
239
312
|
def _resources(self) -> ResourceList:
|
240
313
|
res = self.resources
|
@@ -324,22 +397,121 @@ class WorkflowTemplate(JSONLike):
|
|
324
397
|
@classmethod
|
325
398
|
@TimeIt.decorator
|
326
399
|
def _from_data(cls, data: dict[str, Any]) -> WorkflowTemplate:
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
400
|
+
def _normalise_task_parametrisation(task_lst: list[WorkflowTemplateTaskData]):
|
401
|
+
"""
|
402
|
+
For each dict in a list of task parametrisations, ensure the `schema` key is
|
403
|
+
a list of values, and ensure `element_sets` are defined.
|
404
|
+
|
405
|
+
This mutates `task_lst`.
|
406
|
+
|
407
|
+
"""
|
408
|
+
# use element_sets if not already:
|
409
|
+
task_dat: WorkflowTemplateTaskData
|
410
|
+
for task_idx, task_dat in enumerate(task_lst):
|
411
|
+
schema = task_dat.pop("schema")
|
412
|
+
schema_list: list = schema if isinstance(schema, list) else [schema]
|
413
|
+
if "element_sets" in task_dat:
|
414
|
+
# just update the schema to a list:
|
415
|
+
task_lst[task_idx]["schema"] = schema_list
|
416
|
+
else:
|
417
|
+
# add a single element set, and update the schema to a list:
|
418
|
+
out_labels = task_dat.pop("output_labels", [])
|
419
|
+
es_dat = cast("WorkflowTemplateElementSetData", task_dat)
|
420
|
+
new_task_dat: WorkflowTemplateTaskData = {
|
421
|
+
"schema": schema_list,
|
422
|
+
"element_sets": [es_dat],
|
423
|
+
"output_labels": out_labels,
|
424
|
+
}
|
425
|
+
task_lst[task_idx] = new_task_dat
|
426
|
+
# move sequences with `paths` (note: plural) to multi_path_sequences:
|
427
|
+
for elem_set in task_lst[task_idx]["element_sets"]:
|
428
|
+
new_mps = []
|
429
|
+
seqs = elem_set.get("sequences", [])
|
430
|
+
seqs = list(seqs) # copy
|
431
|
+
# loop in reverse so indices for pop are valid:
|
432
|
+
for seq_idx, seq_dat in zip(range(len(seqs) - 1, -1, -1), seqs[::-1]):
|
433
|
+
if "paths" in seq_dat: # (note: plural)
|
434
|
+
# move to a multi-path sequence:
|
435
|
+
new_mps.append(elem_set["sequences"].pop(seq_idx))
|
436
|
+
elem_set.setdefault("multi_path_sequences", []).extend(new_mps[::-1])
|
437
|
+
|
438
|
+
meta_tasks = data.pop("meta_tasks", {})
|
439
|
+
if meta_tasks:
|
440
|
+
for i in list(meta_tasks):
|
441
|
+
_normalise_task_parametrisation(meta_tasks[i])
|
442
|
+
new_task_dat: list[WorkflowTemplateTaskData] = []
|
443
|
+
reindex = {}
|
444
|
+
for task_idx, task_dat in enumerate(data["tasks"]):
|
445
|
+
if meta_task_dat := meta_tasks.get(task_dat["schema"]):
|
446
|
+
reindex[task_idx] = [
|
447
|
+
len(new_task_dat) + i for i in range(len(meta_task_dat))
|
448
|
+
]
|
449
|
+
|
450
|
+
all_schema_names = [j for i in meta_task_dat for j in i["schema"]]
|
451
|
+
|
452
|
+
# update any parametrisation provided in the task list:
|
453
|
+
base_data = copy.deepcopy(meta_task_dat)
|
454
|
+
|
455
|
+
# any other keys in `task_dat` should be mappings whose keys are
|
456
|
+
# the schema name (within the meta task) optionally suffixed by
|
457
|
+
# a period and the element set index to which the updates should be
|
458
|
+
# copied (no integer suffix indicates the zeroth element set):
|
459
|
+
for k, v in task_dat.items():
|
460
|
+
if k == "schema":
|
461
|
+
continue
|
462
|
+
|
463
|
+
for elem_set_id, dat in v.items():
|
464
|
+
|
465
|
+
elem_set_id_split = elem_set_id.split(".")
|
466
|
+
try:
|
467
|
+
es_idx = int(elem_set_id_split[-1])
|
468
|
+
schema_name = ".".join(elem_set_id_split[:-1])
|
469
|
+
except ValueError:
|
470
|
+
es_idx = 0
|
471
|
+
schema_name = ".".join(elem_set_id_split)
|
472
|
+
schema_name = schema_name.strip(".")
|
473
|
+
|
474
|
+
# check valid schema name:
|
475
|
+
if schema_name not in all_schema_names:
|
476
|
+
raise UnknownMetaTaskConstitutiveSchema(
|
477
|
+
f"Task schema with objective {schema_name!r} is not "
|
478
|
+
f"part of the meta-task with objective "
|
479
|
+
f"{task_dat['schema']!r}. The constitutive schemas of"
|
480
|
+
f" this meta-task have objectives: "
|
481
|
+
f"{all_schema_names!r}."
|
482
|
+
)
|
483
|
+
|
484
|
+
# copy `dat` to the correct schema and element set in the
|
485
|
+
# meta-task:
|
486
|
+
for s_idx, s in enumerate(base_data):
|
487
|
+
if s["schema"] == [schema_name]:
|
488
|
+
if k == "inputs":
|
489
|
+
# special case; merge inputs
|
490
|
+
base_data[s_idx]["element_sets"][es_idx][
|
491
|
+
k
|
492
|
+
].update(dat)
|
493
|
+
else:
|
494
|
+
# just overwrite
|
495
|
+
base_data[s_idx]["element_sets"][es_idx][k] = dat
|
496
|
+
|
497
|
+
new_task_dat.extend(base_data)
|
498
|
+
|
499
|
+
else:
|
500
|
+
reindex[task_idx] = [len(new_task_dat)]
|
501
|
+
new_task_dat.append(task_dat)
|
502
|
+
|
503
|
+
data["tasks"] = new_task_dat
|
504
|
+
|
505
|
+
if loops := data.get("loops"):
|
506
|
+
for loop_idx, loop in enumerate(loops):
|
507
|
+
loops[loop_idx]["tasks"] = [
|
508
|
+
j for i in loop["tasks"] for j in reindex[i]
|
509
|
+
]
|
510
|
+
term_task = loop.get("termination_task")
|
511
|
+
if term_task is not None:
|
512
|
+
loops[loop_idx]["termination_task"] = reindex[term_task][0]
|
513
|
+
|
514
|
+
_normalise_task_parametrisation(data["tasks"])
|
343
515
|
|
344
516
|
# extract out any template components:
|
345
517
|
# TODO: TypedDict for data
|
@@ -368,14 +540,31 @@ class WorkflowTemplate(JSONLike):
|
|
368
540
|
)
|
369
541
|
cls._app.task_schemas.add_objects(task_schemas, skip_duplicates=True)
|
370
542
|
|
371
|
-
|
543
|
+
if mts_dat := tcs.pop("meta_task_schemas", []):
|
544
|
+
meta_ts = [
|
545
|
+
cls._app.MetaTaskSchema.from_json_like(
|
546
|
+
i, shared_data=cls._app.template_components
|
547
|
+
)
|
548
|
+
for i in mts_dat
|
549
|
+
]
|
550
|
+
cls._app.task_schemas.add_objects(meta_ts, skip_duplicates=True)
|
551
|
+
|
552
|
+
wkt = cls.from_json_like(data, shared_data=cls._app._shared_data)
|
553
|
+
|
554
|
+
# print(f"WorkflowTemplate._from_data: {wkt=!r}")
|
555
|
+
# TODO: what is this for!?
|
556
|
+
# for idx, task in enumerate(wkt.tasks):
|
557
|
+
# if isinstance(task.schema, cls._app.MetaTaskSchema):
|
558
|
+
# print(f"{task=!r}")
|
559
|
+
# wkt.tasks[idx] = cls._app.MetaTask(schema=task.schema, tasks=task.tasks)
|
560
|
+
return wkt
|
372
561
|
|
373
562
|
@classmethod
|
374
563
|
@TimeIt.decorator
|
375
564
|
def from_YAML_string(
|
376
565
|
cls,
|
377
566
|
string: str,
|
378
|
-
variables: dict[str, str] | None = None,
|
567
|
+
variables: dict[str, str] | Literal[False] | None = None,
|
379
568
|
) -> WorkflowTemplate:
|
380
569
|
"""Load from a YAML string.
|
381
570
|
|
@@ -384,7 +573,10 @@ class WorkflowTemplate(JSONLike):
|
|
384
573
|
string
|
385
574
|
The YAML string containing the workflow template parametrisation.
|
386
575
|
variables
|
387
|
-
String variables to substitute in `string`.
|
576
|
+
String variables to substitute in `string`. Substitutions will be attempted if
|
577
|
+
the YAML string looks to contain variable references (like "<<var:name>>"). If
|
578
|
+
set to `False`, no substitutions will occur, which may result in an invalid
|
579
|
+
workflow template!
|
388
580
|
"""
|
389
581
|
return cls._from_data(read_YAML_str(string, variables=variables))
|
390
582
|
|
@@ -408,7 +600,7 @@ class WorkflowTemplate(JSONLike):
|
|
408
600
|
def from_YAML_file(
|
409
601
|
cls,
|
410
602
|
path: PathLike,
|
411
|
-
variables: dict[str, str] | None = None,
|
603
|
+
variables: dict[str, str] | Literal[False] | None = None,
|
412
604
|
) -> WorkflowTemplate:
|
413
605
|
"""Load from a YAML file.
|
414
606
|
|
@@ -417,7 +609,10 @@ class WorkflowTemplate(JSONLike):
|
|
417
609
|
path
|
418
610
|
The path to the YAML file containing the workflow template parametrisation.
|
419
611
|
variables
|
420
|
-
String variables to substitute in the file given by `path`.
|
612
|
+
String variables to substitute in the file given by `path`. Substitutions will
|
613
|
+
be attempted if the YAML file looks to contain variable references (like
|
614
|
+
"<<var:name>>"). If set to `False`, no substitutions will occur, which may
|
615
|
+
result in an invalid workflow template!
|
421
616
|
|
422
617
|
"""
|
423
618
|
cls._app.logger.debug("parsing workflow template from a YAML file")
|
@@ -431,7 +626,7 @@ class WorkflowTemplate(JSONLike):
|
|
431
626
|
def from_JSON_string(
|
432
627
|
cls,
|
433
628
|
string: str,
|
434
|
-
variables: dict[str, str] | None = None,
|
629
|
+
variables: dict[str, str] | Literal[False] | None = None,
|
435
630
|
) -> WorkflowTemplate:
|
436
631
|
"""Load from a JSON string.
|
437
632
|
|
@@ -440,7 +635,10 @@ class WorkflowTemplate(JSONLike):
|
|
440
635
|
string
|
441
636
|
The JSON string containing the workflow template parametrisation.
|
442
637
|
variables
|
443
|
-
String variables to substitute in `string`.
|
638
|
+
String variables to substitute in `string`. Substitutions will be attempted if
|
639
|
+
the JSON string looks to contain variable references (like "<<var:name>>"). If
|
640
|
+
set to `False`, no substitutions will occur, which may result in an invalid
|
641
|
+
workflow template!
|
444
642
|
"""
|
445
643
|
return cls._from_data(read_JSON_string(string, variables=variables))
|
446
644
|
|
@@ -449,7 +647,7 @@ class WorkflowTemplate(JSONLike):
|
|
449
647
|
def from_JSON_file(
|
450
648
|
cls,
|
451
649
|
path: PathLike,
|
452
|
-
variables: dict[str, str] | None = None,
|
650
|
+
variables: dict[str, str] | Literal[False] | None = None,
|
453
651
|
) -> WorkflowTemplate:
|
454
652
|
"""Load from a JSON file.
|
455
653
|
|
@@ -458,7 +656,10 @@ class WorkflowTemplate(JSONLike):
|
|
458
656
|
path
|
459
657
|
The path to the JSON file containing the workflow template parametrisation.
|
460
658
|
variables
|
461
|
-
String variables to substitute in the file given by `path`.
|
659
|
+
String variables to substitute in the file given by `path`. Substitutions will
|
660
|
+
be attempted if the JSON file looks to contain variable references (like
|
661
|
+
"<<var:name>>"). If set to `False`, no substitutions will occur, which may
|
662
|
+
result in an invalid workflow template!
|
462
663
|
"""
|
463
664
|
cls._app.logger.debug("parsing workflow template from a JSON file")
|
464
665
|
data = read_JSON_file(path, variables=variables)
|
@@ -472,7 +673,7 @@ class WorkflowTemplate(JSONLike):
|
|
472
673
|
cls,
|
473
674
|
path: PathLike,
|
474
675
|
template_format: Literal["yaml", "json"] | None = None,
|
475
|
-
variables: dict[str, str] | None = None,
|
676
|
+
variables: dict[str, str] | Literal[False] | None = None,
|
476
677
|
) -> WorkflowTemplate:
|
477
678
|
"""Load from either a YAML or JSON file, depending on the file extension.
|
478
679
|
|
@@ -484,8 +685,10 @@ class WorkflowTemplate(JSONLike):
|
|
484
685
|
The file format to expect at `path`. One of "json" or "yaml", if specified. By
|
485
686
|
default, "yaml".
|
486
687
|
variables
|
487
|
-
String variables to substitute in the file given by `path`.
|
488
|
-
|
688
|
+
String variables to substitute in the file given by `path`. Substitutions will
|
689
|
+
be attempted if the file looks to contain variable references (like
|
690
|
+
"<<var:name>>"). If set to `False`, no substitutions will occur, which may
|
691
|
+
result in an invalid workflow template!
|
489
692
|
"""
|
490
693
|
path_ = Path(path or ".")
|
491
694
|
fmt = template_format.lower() if template_format else None
|
@@ -571,6 +774,25 @@ class _IterationData:
|
|
571
774
|
idx: int
|
572
775
|
|
573
776
|
|
777
|
+
def load_workflow_config(
|
778
|
+
func: Callable[Concatenate[S, P], T],
|
779
|
+
) -> Callable[Concatenate[S, P], T]:
|
780
|
+
"""Decorator to apply workflow-level config items during execution of a Workflow
|
781
|
+
method."""
|
782
|
+
|
783
|
+
@wraps(func)
|
784
|
+
def wrapped(self: S, *args: P.args, **kwargs: P.kwargs) -> T:
|
785
|
+
|
786
|
+
updates = self.template.config
|
787
|
+
if updates:
|
788
|
+
with self._app.config._with_updates(updates):
|
789
|
+
return func(self, *args, **kwargs)
|
790
|
+
else:
|
791
|
+
return func(self, *args, **kwargs)
|
792
|
+
|
793
|
+
return wrapped
|
794
|
+
|
795
|
+
|
574
796
|
class Workflow(AppAware):
|
575
797
|
"""
|
576
798
|
A concrete workflow.
|
@@ -630,9 +852,18 @@ class Workflow(AppAware):
|
|
630
852
|
self._store = store_cls(self._app, self, self.path, fs)
|
631
853
|
self._in_batch_mode = False # flag to track when processing batch updates
|
632
854
|
|
855
|
+
self._use_merged_parameters_cache = False
|
856
|
+
self._merged_parameters_cache: dict[
|
857
|
+
tuple[str | None, tuple[tuple[str, tuple[int, ...] | int], ...]], Any
|
858
|
+
] = {}
|
859
|
+
|
633
860
|
# store indices of updates during batch update, so we can revert on failure:
|
634
861
|
self._pending = self._get_empty_pending()
|
635
862
|
|
863
|
+
# reassigned within `ElementActionRun.raise_on_failure_threshold` context manager:
|
864
|
+
self._is_tracking_unset: bool = False
|
865
|
+
self._tracked_unset: dict[str, UnsetParamTracker] | None = None
|
866
|
+
|
636
867
|
def reload(self) -> Self:
|
637
868
|
"""Reload the workflow from disk."""
|
638
869
|
return self.__class__(self.url)
|
@@ -743,7 +974,12 @@ class Workflow(AppAware):
|
|
743
974
|
f"{len(template.loops)} ({loop.name!r})"
|
744
975
|
)
|
745
976
|
wk._add_loop(loop, cache=cache, status=status)
|
746
|
-
|
977
|
+
if status:
|
978
|
+
status.update(
|
979
|
+
f"Added {len(template.loops)} loops. "
|
980
|
+
f"Committing to store..."
|
981
|
+
)
|
982
|
+
except (Exception, NotImplementedError):
|
747
983
|
if status:
|
748
984
|
status.stop()
|
749
985
|
raise
|
@@ -761,7 +997,7 @@ class Workflow(AppAware):
|
|
761
997
|
ts_fmt: str | None = None,
|
762
998
|
ts_name_fmt: str | None = None,
|
763
999
|
store_kwargs: dict[str, Any] | None = None,
|
764
|
-
variables: dict[str, str] | None = None,
|
1000
|
+
variables: dict[str, str] | Literal[False] | None = None,
|
765
1001
|
) -> Workflow:
|
766
1002
|
"""Generate from a YAML file.
|
767
1003
|
|
@@ -791,7 +1027,10 @@ class Workflow(AppAware):
|
|
791
1027
|
store_kwargs:
|
792
1028
|
Keyword arguments to pass to the store's `write_empty_workflow` method.
|
793
1029
|
variables:
|
794
|
-
String variables to substitute in the file given by `YAML_path`.
|
1030
|
+
String variables to substitute in the file given by `YAML_path`. Substitutions
|
1031
|
+
will be attempted if the YAML file looks to contain variable references (like
|
1032
|
+
"<<var:name>>"). If set to `False`, no substitutions will occur, which may
|
1033
|
+
result in an invalid workflow template!
|
795
1034
|
"""
|
796
1035
|
template = cls._app.WorkflowTemplate.from_YAML_file(
|
797
1036
|
path=YAML_path,
|
@@ -819,7 +1058,8 @@ class Workflow(AppAware):
|
|
819
1058
|
ts_fmt: str | None = None,
|
820
1059
|
ts_name_fmt: str | None = None,
|
821
1060
|
store_kwargs: dict[str, Any] | None = None,
|
822
|
-
variables: dict[str, str] | None = None,
|
1061
|
+
variables: dict[str, str] | Literal[False] | None = None,
|
1062
|
+
status: Status | None = None,
|
823
1063
|
) -> Workflow:
|
824
1064
|
"""Generate from a YAML string.
|
825
1065
|
|
@@ -849,7 +1089,10 @@ class Workflow(AppAware):
|
|
849
1089
|
store_kwargs:
|
850
1090
|
Keyword arguments to pass to the store's `write_empty_workflow` method.
|
851
1091
|
variables:
|
852
|
-
String variables to substitute in the string `YAML_str`.
|
1092
|
+
String variables to substitute in the string `YAML_str`. Substitutions will be
|
1093
|
+
attempted if the YAML string looks to contain variable references (like
|
1094
|
+
"<<var:name>>"). If set to `False`, no substitutions will occur, which may
|
1095
|
+
result in an invalid workflow template!
|
853
1096
|
"""
|
854
1097
|
template = cls._app.WorkflowTemplate.from_YAML_string(
|
855
1098
|
string=YAML_str,
|
@@ -864,6 +1107,7 @@ class Workflow(AppAware):
|
|
864
1107
|
ts_fmt,
|
865
1108
|
ts_name_fmt,
|
866
1109
|
store_kwargs,
|
1110
|
+
status,
|
867
1111
|
)
|
868
1112
|
|
869
1113
|
@classmethod
|
@@ -877,7 +1121,7 @@ class Workflow(AppAware):
|
|
877
1121
|
ts_fmt: str | None = None,
|
878
1122
|
ts_name_fmt: str | None = None,
|
879
1123
|
store_kwargs: dict[str, Any] | None = None,
|
880
|
-
variables: dict[str, str] | None = None,
|
1124
|
+
variables: dict[str, str] | Literal[False] | None = None,
|
881
1125
|
status: Status | None = None,
|
882
1126
|
) -> Workflow:
|
883
1127
|
"""Generate from a JSON file.
|
@@ -908,7 +1152,10 @@ class Workflow(AppAware):
|
|
908
1152
|
store_kwargs:
|
909
1153
|
Keyword arguments to pass to the store's `write_empty_workflow` method.
|
910
1154
|
variables:
|
911
|
-
String variables to substitute in the file given by `JSON_path`.
|
1155
|
+
String variables to substitute in the file given by `JSON_path`. Substitutions
|
1156
|
+
will be attempted if the JSON file looks to contain variable references (like
|
1157
|
+
"<<var:name>>"). If set to `False`, no substitutions will occur, which may
|
1158
|
+
result in an invalid workflow template!
|
912
1159
|
"""
|
913
1160
|
template = cls._app.WorkflowTemplate.from_JSON_file(
|
914
1161
|
path=JSON_path,
|
@@ -937,7 +1184,7 @@ class Workflow(AppAware):
|
|
937
1184
|
ts_fmt: str | None = None,
|
938
1185
|
ts_name_fmt: str | None = None,
|
939
1186
|
store_kwargs: dict[str, Any] | None = None,
|
940
|
-
variables: dict[str, str] | None = None,
|
1187
|
+
variables: dict[str, str] | Literal[False] | None = None,
|
941
1188
|
status: Status | None = None,
|
942
1189
|
) -> Workflow:
|
943
1190
|
"""Generate from a JSON string.
|
@@ -968,7 +1215,10 @@ class Workflow(AppAware):
|
|
968
1215
|
store_kwargs:
|
969
1216
|
Keyword arguments to pass to the store's `write_empty_workflow` method.
|
970
1217
|
variables:
|
971
|
-
String variables to substitute in the string `JSON_str`.
|
1218
|
+
String variables to substitute in the string `JSON_str`. Substitutions will be
|
1219
|
+
attempted if the JSON string looks to contain variable references (like
|
1220
|
+
"<<var:name>>"). If set to `False`, no substitutions will occur, which may
|
1221
|
+
result in an invalid workflow template!
|
972
1222
|
"""
|
973
1223
|
template = cls._app.WorkflowTemplate.from_JSON_string(
|
974
1224
|
string=JSON_str,
|
@@ -999,7 +1249,7 @@ class Workflow(AppAware):
|
|
999
1249
|
ts_fmt: str | None = None,
|
1000
1250
|
ts_name_fmt: str | None = None,
|
1001
1251
|
store_kwargs: dict[str, Any] | None = None,
|
1002
|
-
variables: dict[str, str] | None = None,
|
1252
|
+
variables: dict[str, str] | Literal[False] | None = None,
|
1003
1253
|
status: Status | None = None,
|
1004
1254
|
) -> Workflow:
|
1005
1255
|
"""Generate from either a YAML or JSON file, depending on the file extension.
|
@@ -1035,6 +1285,9 @@ class Workflow(AppAware):
|
|
1035
1285
|
Keyword arguments to pass to the store's `write_empty_workflow` method.
|
1036
1286
|
variables:
|
1037
1287
|
String variables to substitute in the file given by `template_path`.
|
1288
|
+
Substitutions will be attempted if the file looks to contain variable
|
1289
|
+
references (like "<<var:name>>"). If set to `False`, no substitutions will
|
1290
|
+
occur, which may result in an invalid workflow template!
|
1038
1291
|
"""
|
1039
1292
|
try:
|
1040
1293
|
template = cls._app.WorkflowTemplate.from_file(
|
@@ -1066,6 +1319,7 @@ class Workflow(AppAware):
|
|
1066
1319
|
tasks: list[Task] | None = None,
|
1067
1320
|
loops: list[Loop] | None = None,
|
1068
1321
|
resources: Resources = None,
|
1322
|
+
config: dict | None = None,
|
1069
1323
|
path: PathLike | None = None,
|
1070
1324
|
workflow_name: str | None = None,
|
1071
1325
|
overwrite: bool = False,
|
@@ -1089,6 +1343,9 @@ class Workflow(AppAware):
|
|
1089
1343
|
Mapping of action scopes to resource requirements, to be applied to all
|
1090
1344
|
element sets in the workflow. `resources` specified in an element set take
|
1091
1345
|
precedence of those defined here for the whole workflow.
|
1346
|
+
config:
|
1347
|
+
Configuration items that should be set whenever the resulting workflow is
|
1348
|
+
loaded. This includes config items that apply during workflow execution.
|
1092
1349
|
path:
|
1093
1350
|
The directory in which the workflow will be generated. The current directory
|
1094
1351
|
if not specified.
|
@@ -1116,6 +1373,7 @@ class Workflow(AppAware):
|
|
1116
1373
|
tasks=tasks or [],
|
1117
1374
|
loops=loops or [],
|
1118
1375
|
resources=resources,
|
1376
|
+
config=config or {},
|
1119
1377
|
)
|
1120
1378
|
return cls.from_template(
|
1121
1379
|
template,
|
@@ -1248,6 +1506,7 @@ class Workflow(AppAware):
|
|
1248
1506
|
self._store.add_loop(
|
1249
1507
|
loop_template=cast("Mapping", loop_js),
|
1250
1508
|
iterable_parameters=wk_loop.iterable_parameters,
|
1509
|
+
output_parameters=wk_loop.output_parameters,
|
1251
1510
|
parents=wk_loop.parents,
|
1252
1511
|
num_added_iterations=wk_loop.num_added_iterations,
|
1253
1512
|
iter_IDs=iter_IDs,
|
@@ -1275,7 +1534,7 @@ class Workflow(AppAware):
|
|
1275
1534
|
status.update(
|
1276
1535
|
f"{status_prev}: iteration {iter_idx + 2}/{loop.num_iterations}."
|
1277
1536
|
)
|
1278
|
-
new_wk_loop.add_iteration(cache=cache_)
|
1537
|
+
new_wk_loop.add_iteration(cache=cache_, status=status)
|
1279
1538
|
|
1280
1539
|
def add_loop(self, loop: Loop) -> None:
|
1281
1540
|
"""Add a loop to a subset of workflow tasks."""
|
@@ -1360,6 +1619,7 @@ class Workflow(AppAware):
|
|
1360
1619
|
return self._template
|
1361
1620
|
|
1362
1621
|
@property
|
1622
|
+
@TimeIt.decorator
|
1363
1623
|
def tasks(self) -> WorkflowTaskList:
|
1364
1624
|
"""
|
1365
1625
|
The tasks in this workflow.
|
@@ -1410,12 +1670,14 @@ class Workflow(AppAware):
|
|
1410
1670
|
repack_iteration_tuples(loop_dat["num_added_iterations"])
|
1411
1671
|
),
|
1412
1672
|
iterable_parameters=loop_dat["iterable_parameters"],
|
1673
|
+
output_parameters=loop_dat["output_parameters"],
|
1413
1674
|
)
|
1414
1675
|
for idx, loop_dat in self._store.get_loops().items()
|
1415
1676
|
)
|
1416
1677
|
return self._loops
|
1417
1678
|
|
1418
1679
|
@property
|
1680
|
+
@TimeIt.decorator
|
1419
1681
|
def submissions(self) -> list[Submission]:
|
1420
1682
|
"""
|
1421
1683
|
The job submissions done by this workflow.
|
@@ -1587,56 +1849,70 @@ class Workflow(AppAware):
|
|
1587
1849
|
|
1588
1850
|
@TimeIt.decorator
|
1589
1851
|
def get_EARs_from_IDs(
|
1590
|
-
self, ids: Iterable[int] | int
|
1591
|
-
) -> list[ElementActionRun] | ElementActionRun:
|
1852
|
+
self, ids: Iterable[int] | int, as_dict: bool = False
|
1853
|
+
) -> list[ElementActionRun] | dict[int, ElementActionRun] | ElementActionRun:
|
1592
1854
|
"""Get element action run objects from a list of IDs."""
|
1593
1855
|
id_lst = [ids] if isinstance(ids, int) else list(ids)
|
1594
|
-
self._app.persistence_logger.debug(f"get_EARs_from_IDs: id_lst={id_lst!r}")
|
1595
1856
|
|
1596
|
-
|
1597
|
-
store_iters = self.get_store_element_iterations(
|
1598
|
-
ear.elem_iter_ID for ear in store_EARs
|
1599
|
-
)
|
1600
|
-
store_elems = self.get_store_elements(it.element_ID for it in store_iters)
|
1601
|
-
store_tasks = self.get_store_tasks(el.task_ID for el in store_elems)
|
1857
|
+
with self._store.cached_load(), self._store.cache_ctx():
|
1602
1858
|
|
1603
|
-
|
1604
|
-
|
1605
|
-
|
1606
|
-
|
1607
|
-
|
1859
|
+
self._app.persistence_logger.debug(
|
1860
|
+
f"get_EARs_from_IDs: {len(id_lst)} EARs: {shorten_list_str(id_lst)}."
|
1861
|
+
)
|
1862
|
+
|
1863
|
+
store_EARs = self.get_store_EARs(id_lst)
|
1864
|
+
store_iters = self.get_store_element_iterations(
|
1865
|
+
ear.elem_iter_ID for ear in store_EARs
|
1866
|
+
)
|
1867
|
+
store_elems = self.get_store_elements(it.element_ID for it in store_iters)
|
1868
|
+
store_tasks = self.get_store_tasks(el.task_ID for el in store_elems)
|
1608
1869
|
|
1609
|
-
|
1610
|
-
|
1611
|
-
|
1612
|
-
|
1613
|
-
iter_idx = el.iteration_IDs.index(it.id_)
|
1614
|
-
elem_idx = tk.element_IDs.index(el.id_)
|
1615
|
-
index_paths.append(
|
1616
|
-
Workflow._IndexPath3(run_idx, act_idx, iter_idx, elem_idx, tk.index)
|
1870
|
+
# to allow for bulk retrieval of elements/iterations
|
1871
|
+
element_idx_by_task: dict[int, set[int]] = defaultdict(set)
|
1872
|
+
iter_idx_by_task_elem: dict[int, dict[int, set[int]]] = defaultdict(
|
1873
|
+
lambda: defaultdict(set)
|
1617
1874
|
)
|
1618
|
-
|
1619
|
-
|
1620
|
-
|
1621
|
-
|
1622
|
-
|
1623
|
-
|
1624
|
-
|
1625
|
-
|
1626
|
-
|
1875
|
+
|
1876
|
+
index_paths: list[Workflow._IndexPath3] = []
|
1877
|
+
for rn, it, el, tk in zip(store_EARs, store_iters, store_elems, store_tasks):
|
1878
|
+
act_idx = rn.action_idx
|
1879
|
+
run_idx = (
|
1880
|
+
it.EAR_IDs[act_idx].index(rn.id_) if it.EAR_IDs is not None else -1
|
1881
|
+
)
|
1882
|
+
iter_idx = el.iteration_IDs.index(it.id_)
|
1883
|
+
elem_idx = tk.element_IDs.index(el.id_)
|
1884
|
+
index_paths.append(
|
1885
|
+
Workflow._IndexPath3(run_idx, act_idx, iter_idx, elem_idx, tk.index)
|
1886
|
+
)
|
1887
|
+
element_idx_by_task[tk.index].add(elem_idx)
|
1888
|
+
iter_idx_by_task_elem[tk.index][elem_idx].add(iter_idx)
|
1889
|
+
|
1890
|
+
# retrieve elements/iterations:
|
1891
|
+
iters = {
|
1892
|
+
task_idx: {
|
1893
|
+
elem_i.index: {
|
1894
|
+
iter_idx: elem_i.iterations[iter_idx]
|
1895
|
+
for iter_idx in iter_idx_by_task_elem[task_idx][elem_i.index]
|
1896
|
+
}
|
1897
|
+
for elem_i in self.tasks[task_idx].elements[list(elem_idxes)]
|
1627
1898
|
}
|
1628
|
-
for
|
1899
|
+
for task_idx, elem_idxes in element_idx_by_task.items()
|
1629
1900
|
}
|
1630
|
-
for task_idx, elem_idxes in element_idx_by_task.items()
|
1631
|
-
}
|
1632
1901
|
|
1633
|
-
|
1634
|
-
|
1635
|
-
|
1636
|
-
|
1637
|
-
|
1638
|
-
|
1639
|
-
|
1902
|
+
result = {}
|
1903
|
+
for path in index_paths:
|
1904
|
+
run = (
|
1905
|
+
iters[path.task][path.elem][path.iter]
|
1906
|
+
.actions[path.act]
|
1907
|
+
.runs[path.run]
|
1908
|
+
)
|
1909
|
+
result[run.id_] = run
|
1910
|
+
|
1911
|
+
if not as_dict:
|
1912
|
+
res_lst = list(result.values())
|
1913
|
+
return res_lst[0] if isinstance(ids, int) else res_lst
|
1914
|
+
|
1915
|
+
return result
|
1640
1916
|
|
1641
1917
|
@TimeIt.decorator
|
1642
1918
|
def get_all_elements(self) -> list[Element]:
|
@@ -1722,6 +1998,20 @@ class Workflow(AppAware):
|
|
1722
1998
|
self._app.persistence_logger.info("exiting batch update")
|
1723
1999
|
self._in_batch_mode = False
|
1724
2000
|
|
2001
|
+
@contextmanager
|
2002
|
+
def cached_merged_parameters(self):
|
2003
|
+
if self._use_merged_parameters_cache:
|
2004
|
+
yield
|
2005
|
+
else:
|
2006
|
+
try:
|
2007
|
+
self._app.logger.debug("entering merged-parameters cache.")
|
2008
|
+
self._use_merged_parameters_cache = True
|
2009
|
+
yield
|
2010
|
+
finally:
|
2011
|
+
self._app.logger.debug("exiting merged-parameters cache.")
|
2012
|
+
self._use_merged_parameters_cache = False
|
2013
|
+
self._merged_parameters_cache = {} # reset the cache
|
2014
|
+
|
1725
2015
|
@classmethod
|
1726
2016
|
def temporary_rename(cls, path: str, fs: AbstractFileSystem) -> str:
|
1727
2017
|
"""Rename an existing same-path workflow (directory) so we can restore it if
|
@@ -1883,7 +2173,7 @@ class Workflow(AppAware):
|
|
1883
2173
|
if template.source_file:
|
1884
2174
|
wk.artifacts_path.mkdir(exist_ok=False)
|
1885
2175
|
src = Path(template.source_file)
|
1886
|
-
wk.artifacts_path.joinpath(src.name)
|
2176
|
+
shutil.copy(src, wk.artifacts_path.joinpath(src.name))
|
1887
2177
|
|
1888
2178
|
return wk
|
1889
2179
|
|
@@ -2193,7 +2483,11 @@ class Workflow(AppAware):
|
|
2193
2483
|
"""
|
2194
2484
|
The total number of job submissions.
|
2195
2485
|
"""
|
2196
|
-
return
|
2486
|
+
return (
|
2487
|
+
len(self._submissions)
|
2488
|
+
if self._submissions is not None
|
2489
|
+
else self._store._get_num_total_submissions()
|
2490
|
+
)
|
2197
2491
|
|
2198
2492
|
@property
|
2199
2493
|
def num_elements(self) -> int:
|
@@ -2276,22 +2570,26 @@ class Workflow(AppAware):
|
|
2276
2570
|
for te in self._store.get_task_elements(task.insert_ID, idx_lst)
|
2277
2571
|
]
|
2278
2572
|
|
2279
|
-
def
|
2280
|
-
|
2573
|
+
def set_EAR_start(
|
2574
|
+
self, run_id: int, run_dir: Path | None, port_number: int | None
|
2575
|
+
) -> None:
|
2576
|
+
"""Set the start time on an EAR."""
|
2577
|
+
self._app.logger.debug(f"Setting start for EAR ID {run_id!r}")
|
2281
2578
|
with self._store.cached_load(), self.batch_update():
|
2282
|
-
self._store.
|
2579
|
+
self._store.set_EAR_start(run_id, run_dir, port_number)
|
2283
2580
|
|
2284
|
-
def
|
2285
|
-
|
2286
|
-
|
2581
|
+
def set_multi_run_starts(
|
2582
|
+
self, run_ids: list[int], run_dirs: list[Path | None], port_number: int
|
2583
|
+
) -> None:
|
2584
|
+
"""Set the start time on multiple runs."""
|
2585
|
+
self._app.logger.debug(f"Setting start for multiple run IDs {run_ids!r}")
|
2287
2586
|
with self._store.cached_load(), self.batch_update():
|
2288
|
-
self._store.
|
2587
|
+
self._store.set_multi_run_starts(run_ids, run_dirs, port_number)
|
2289
2588
|
|
2290
2589
|
def set_EAR_end(
|
2291
2590
|
self,
|
2292
|
-
|
2293
|
-
|
2294
|
-
EAR_ID: int,
|
2591
|
+
block_act_key: BlockActionKey,
|
2592
|
+
run: ElementActionRun,
|
2295
2593
|
exit_code: int,
|
2296
2594
|
) -> None:
|
2297
2595
|
"""Set the end time and exit code on an EAR.
|
@@ -2301,108 +2599,430 @@ class Workflow(AppAware):
|
|
2301
2599
|
|
2302
2600
|
"""
|
2303
2601
|
self._app.logger.debug(
|
2304
|
-
f"Setting end for
|
2602
|
+
f"Setting end for run ID {run.id_!r} with exit code {exit_code!r}."
|
2305
2603
|
)
|
2306
|
-
|
2307
|
-
|
2308
|
-
|
2309
|
-
|
2310
|
-
|
2604
|
+
param_id: int | list[int] | None
|
2605
|
+
with self._store.cached_load(), self.batch_update():
|
2606
|
+
success = exit_code == 0 # TODO more sophisticated success heuristics
|
2607
|
+
if not run.skip:
|
2608
|
+
|
2609
|
+
is_aborted = False
|
2610
|
+
if run.action.abortable and exit_code == ABORT_EXIT_CODE:
|
2311
2611
|
# the point of aborting an EAR is to continue with the workflow:
|
2612
|
+
is_aborted = True
|
2312
2613
|
success = True
|
2313
2614
|
|
2314
|
-
|
2315
|
-
|
2316
|
-
|
2317
|
-
|
2318
|
-
|
2319
|
-
|
2320
|
-
|
2321
|
-
|
2322
|
-
file_paths = inp_file.value()
|
2323
|
-
for path_i in (
|
2324
|
-
file_paths if isinstance(file_paths, list) else [file_paths]
|
2325
|
-
):
|
2326
|
-
self._set_file(
|
2327
|
-
param_id=param_id,
|
2328
|
-
store_contents=True, # TODO: make optional according to IFG
|
2329
|
-
is_input=False,
|
2330
|
-
path=Path(path_i).resolve(),
|
2615
|
+
run_dir = run.get_directory()
|
2616
|
+
if run_dir:
|
2617
|
+
assert isinstance(run_dir, Path)
|
2618
|
+
for IFG_i in run.action.input_file_generators:
|
2619
|
+
inp_file = IFG_i.input_file
|
2620
|
+
self._app.logger.debug(
|
2621
|
+
f"Saving EAR input file: {inp_file.label!r} for EAR ID "
|
2622
|
+
f"{run.id_!r}."
|
2331
2623
|
)
|
2624
|
+
param_id = run.data_idx[f"input_files.{inp_file.label}"]
|
2332
2625
|
|
2333
|
-
|
2334
|
-
|
2626
|
+
file_paths = inp_file.value(directory=run_dir)
|
2627
|
+
for path_i in (
|
2628
|
+
file_paths if isinstance(file_paths, list) else [file_paths]
|
2629
|
+
):
|
2630
|
+
full_path = run_dir.joinpath(path_i)
|
2631
|
+
if not full_path.exists():
|
2632
|
+
self._app.logger.debug(
|
2633
|
+
f"expected input file {path_i!r} does not "
|
2634
|
+
f"exist, so setting run to an error state "
|
2635
|
+
f"(if not aborted)."
|
2636
|
+
)
|
2637
|
+
if not is_aborted and success is True:
|
2638
|
+
# this is unlikely to happen, but could happen
|
2639
|
+
# if the input file is deleted in between
|
2640
|
+
# the input file generator completing and this
|
2641
|
+
# code being run
|
2642
|
+
success = False
|
2643
|
+
exit_code = 1 # TODO more custom exit codes?
|
2644
|
+
else:
|
2645
|
+
self._set_file(
|
2646
|
+
param_id=param_id,
|
2647
|
+
store_contents=True, # TODO: make optional according to IFG
|
2648
|
+
is_input=False,
|
2649
|
+
path=full_path,
|
2650
|
+
)
|
2335
2651
|
|
2336
|
-
|
2337
|
-
|
2338
|
-
|
2339
|
-
|
2340
|
-
|
2341
|
-
|
2342
|
-
|
2343
|
-
|
2344
|
-
|
2345
|
-
|
2346
|
-
|
2347
|
-
|
2348
|
-
for path_i in (
|
2349
|
-
file_paths if isinstance(file_paths, list) else [file_paths]
|
2350
|
-
):
|
2351
|
-
self._set_file(
|
2352
|
-
param_id=param_id_j,
|
2353
|
-
store_contents=True,
|
2354
|
-
is_input=False,
|
2355
|
-
path=Path(path_i).resolve(),
|
2356
|
-
clean_up=(save_file_j in EAR.action.clean_up),
|
2357
|
-
)
|
2652
|
+
if run.action.script_data_out_has_files:
|
2653
|
+
try:
|
2654
|
+
run._param_save(block_act_key, run_dir)
|
2655
|
+
except FileNotFoundError:
|
2656
|
+
self._app.logger.debug(
|
2657
|
+
f"script did not generate an expected output parameter "
|
2658
|
+
f"file (block_act_key={block_act_key!r}), so setting run "
|
2659
|
+
f"to an error state (if not aborted)."
|
2660
|
+
)
|
2661
|
+
if not is_aborted and success is True:
|
2662
|
+
success = False
|
2663
|
+
exit_code = 1 # TODO more custom exit codes?
|
2358
2664
|
|
2359
|
-
|
2360
|
-
for save_file_j in
|
2665
|
+
# Save action-level files: (TODO: refactor with below for OFPs)
|
2666
|
+
for save_file_j in run.action.save_files:
|
2361
2667
|
self._app.logger.debug(
|
2362
|
-
f"Saving
|
2363
|
-
f"{
|
2668
|
+
f"Saving file: {save_file_j.label!r} for EAR ID "
|
2669
|
+
f"{run.id_!r}."
|
2364
2670
|
)
|
2365
|
-
|
2366
|
-
|
2367
|
-
|
2368
|
-
|
2369
|
-
|
2370
|
-
|
2671
|
+
try:
|
2672
|
+
param_id = run.data_idx[f"output_files.{save_file_j.label}"]
|
2673
|
+
except KeyError:
|
2674
|
+
# We might be saving a file that is not a defined
|
2675
|
+
# "output file"; this will avoid saving a reference in the
|
2676
|
+
# parameter data:
|
2677
|
+
param_id = None
|
2678
|
+
|
2679
|
+
file_paths = save_file_j.value(directory=run_dir)
|
2371
2680
|
self._app.logger.debug(
|
2372
|
-
f"Saving
|
2681
|
+
f"Saving output file paths: {file_paths!r}"
|
2373
2682
|
)
|
2683
|
+
|
2374
2684
|
for path_i in (
|
2375
2685
|
file_paths if isinstance(file_paths, list) else [file_paths]
|
2376
2686
|
):
|
2377
|
-
|
2378
|
-
|
2379
|
-
|
2380
|
-
|
2381
|
-
|
2382
|
-
|
2687
|
+
full_path = run_dir.joinpath(path_i)
|
2688
|
+
if not full_path.exists():
|
2689
|
+
self._app.logger.debug(
|
2690
|
+
f"expected file to save {path_i!r} does not "
|
2691
|
+
f"exist, so setting run to an error state "
|
2692
|
+
f"(if not aborted)."
|
2693
|
+
)
|
2694
|
+
if not is_aborted and success is True:
|
2695
|
+
# this is unlikely to happen, but could happen
|
2696
|
+
# if the input file is deleted in between
|
2697
|
+
# the input file generator completing and this
|
2698
|
+
# code being run
|
2699
|
+
success = False
|
2700
|
+
exit_code = 1 # TODO more custom exit codes?
|
2701
|
+
else:
|
2702
|
+
self._set_file(
|
2703
|
+
param_id=param_id,
|
2704
|
+
store_contents=True,
|
2705
|
+
is_input=False,
|
2706
|
+
path=full_path,
|
2707
|
+
clean_up=(save_file_j in run.action.clean_up),
|
2708
|
+
)
|
2709
|
+
|
2710
|
+
for OFP_i in run.action.output_file_parsers:
|
2711
|
+
for save_file_j in OFP_i._save_files:
|
2712
|
+
self._app.logger.debug(
|
2713
|
+
f"Saving EAR output file: {save_file_j.label!r} for EAR ID "
|
2714
|
+
f"{run.id_!r}."
|
2715
|
+
)
|
2716
|
+
try:
|
2717
|
+
param_id = run.data_idx[
|
2718
|
+
f"output_files.{save_file_j.label}"
|
2719
|
+
]
|
2720
|
+
except KeyError:
|
2721
|
+
# We might be saving a file that is not a defined
|
2722
|
+
# "output file"; this will avoid saving a reference in the
|
2723
|
+
# parameter data:
|
2724
|
+
param_id = None
|
2725
|
+
|
2726
|
+
file_paths = save_file_j.value(directory=run_dir)
|
2727
|
+
self._app.logger.debug(
|
2728
|
+
f"Saving EAR output file paths: {file_paths!r}"
|
2383
2729
|
)
|
2384
2730
|
|
2385
|
-
|
2386
|
-
|
2387
|
-
|
2388
|
-
|
2389
|
-
|
2390
|
-
|
2391
|
-
|
2731
|
+
for path_i in (
|
2732
|
+
file_paths
|
2733
|
+
if isinstance(file_paths, list)
|
2734
|
+
else [file_paths]
|
2735
|
+
):
|
2736
|
+
full_path = run_dir.joinpath(path_i)
|
2737
|
+
if not full_path.exists():
|
2738
|
+
self._app.logger.debug(
|
2739
|
+
f"expected output file parser `save_files` file "
|
2740
|
+
f"{path_i!r} does not exist, so setting run "
|
2741
|
+
f"to an error state (if not aborted)."
|
2742
|
+
)
|
2743
|
+
if not is_aborted and success is True:
|
2744
|
+
success = False
|
2745
|
+
exit_code = 1 # TODO more custom exit codes?
|
2746
|
+
else:
|
2747
|
+
self._set_file(
|
2748
|
+
param_id=param_id,
|
2749
|
+
store_contents=True, # TODO: make optional according to OFP
|
2750
|
+
is_input=False,
|
2751
|
+
path=full_path,
|
2752
|
+
clean_up=(save_file_j in OFP_i.clean_up),
|
2753
|
+
)
|
2754
|
+
|
2755
|
+
if (
|
2756
|
+
run.resources.skip_downstream_on_failure
|
2757
|
+
and not success
|
2758
|
+
and run.skip_reason is not SkipReason.LOOP_TERMINATION
|
2759
|
+
):
|
2760
|
+
# loop termination skips are already propagated
|
2761
|
+
for EAR_dep_ID in run.get_dependent_EARs(as_objects=False):
|
2762
|
+
self._app.logger.debug(
|
2763
|
+
f"Setting EAR ID {EAR_dep_ID!r} to skip because it depends on"
|
2764
|
+
f" EAR ID {run.id_!r}, which exited with a non-zero exit code:"
|
2765
|
+
f" {exit_code!r}."
|
2766
|
+
)
|
2767
|
+
self._store.set_EAR_skip(
|
2768
|
+
{EAR_dep_ID: SkipReason.UPSTREAM_FAILURE.value}
|
2769
|
+
)
|
2770
|
+
|
2771
|
+
self._store.set_EAR_end(run.id_, exit_code, success, run.action.requires_dir)
|
2772
|
+
|
2773
|
+
def set_multi_run_ends(
|
2774
|
+
self,
|
2775
|
+
runs: dict[
|
2776
|
+
BlockActionKey,
|
2777
|
+
list[tuple[ElementActionRun, int, Path | None]],
|
2778
|
+
],
|
2779
|
+
) -> None:
|
2780
|
+
"""Set end times and exit codes on multiple runs.
|
2781
|
+
|
2782
|
+
If the exit code is non-zero, also set all downstream dependent runs to be
|
2783
|
+
skipped. Also save any generated input/output files."""
|
2784
|
+
|
2785
|
+
self._app.logger.debug(f"Setting end for multiple run IDs.")
|
2786
|
+
param_id: int | list[int] | None
|
2787
|
+
with self._store.cached_load(), self.batch_update():
|
2788
|
+
run_ids = []
|
2789
|
+
run_dirs = []
|
2790
|
+
exit_codes = []
|
2791
|
+
successes = []
|
2792
|
+
for block_act_key, run_dat in runs.items():
|
2793
|
+
for run, exit_code, run_dir in run_dat:
|
2794
|
+
|
2795
|
+
success = (
|
2796
|
+
exit_code == 0
|
2797
|
+
) # TODO more sophisticated success heuristics
|
2798
|
+
self._app.logger.info(
|
2799
|
+
f"setting end for run {run.id_} with exit_code={exit_code}, "
|
2800
|
+
f"success={success}, skip={run.skip!r}, and skip_reason="
|
2801
|
+
f"{run.skip_reason!r}."
|
2802
|
+
)
|
2803
|
+
if not run.skip:
|
2804
|
+
self._app.logger.info(f"run was not skipped.")
|
2805
|
+
is_aborted = False
|
2806
|
+
if run.action.abortable and exit_code == ABORT_EXIT_CODE:
|
2807
|
+
# the point of aborting an EAR is to continue with the
|
2808
|
+
# workflow:
|
2809
|
+
self._app.logger.info(
|
2810
|
+
"run was abortable and exit code was ABORT_EXIT_CODE,"
|
2811
|
+
" so setting success to True."
|
2812
|
+
)
|
2813
|
+
is_aborted = True
|
2814
|
+
success = True
|
2815
|
+
|
2816
|
+
run_dir = run.get_directory()
|
2817
|
+
if run_dir:
|
2818
|
+
assert isinstance(run_dir, Path)
|
2819
|
+
for IFG_i in run.action.input_file_generators:
|
2820
|
+
self._app.logger.info(f"setting IFG file {IFG_i!r}")
|
2821
|
+
inp_file = IFG_i.input_file
|
2822
|
+
self._app.logger.debug(
|
2823
|
+
f"Saving EAR input file: {inp_file.label!r} for EAR "
|
2824
|
+
f"ID {run.id_!r}."
|
2825
|
+
)
|
2826
|
+
param_id = run.data_idx[f"input_files.{inp_file.label}"]
|
2827
|
+
|
2828
|
+
file_paths = inp_file.value(directory=run_dir)
|
2829
|
+
for path_i in (
|
2830
|
+
file_paths
|
2831
|
+
if isinstance(file_paths, list)
|
2832
|
+
else [file_paths]
|
2833
|
+
):
|
2834
|
+
full_path = run_dir.joinpath(path_i)
|
2835
|
+
if not full_path.exists():
|
2836
|
+
self._app.logger.debug(
|
2837
|
+
f"expected input file {path_i!r} does not "
|
2838
|
+
f"exist, so setting run to an error state "
|
2839
|
+
f"(if not aborted)."
|
2840
|
+
)
|
2841
|
+
if not is_aborted and success is True:
|
2842
|
+
# this is unlikely to happen, but could happen
|
2843
|
+
# if the input file is deleted in between
|
2844
|
+
# the input file generator completing and this
|
2845
|
+
# code being run
|
2846
|
+
success = False
|
2847
|
+
exit_code = 1 # TODO more custom exit codes?
|
2848
|
+
else:
|
2849
|
+
self._set_file(
|
2850
|
+
param_id=param_id,
|
2851
|
+
store_contents=True, # TODO: make optional according to IFG
|
2852
|
+
is_input=False,
|
2853
|
+
path=full_path,
|
2854
|
+
)
|
2855
|
+
|
2856
|
+
if run.action.script_data_out_has_files:
|
2857
|
+
self._app.logger.info(
|
2858
|
+
f"saving script-generated parameters."
|
2859
|
+
)
|
2860
|
+
try:
|
2861
|
+
run._param_save(block_act_key, run_dir)
|
2862
|
+
except FileNotFoundError:
|
2863
|
+
# script did not generate the output parameter file, so
|
2864
|
+
# set a failed exit code (if we did not abort the run):
|
2865
|
+
self._app.logger.debug(
|
2866
|
+
f"script did not generate an expected output "
|
2867
|
+
f"parameter file (block_act_key="
|
2868
|
+
f"{block_act_key!r}), so setting run to an error "
|
2869
|
+
f"state (if not aborted)."
|
2870
|
+
)
|
2871
|
+
if not is_aborted and success is True:
|
2872
|
+
success = False
|
2873
|
+
exit_code = 1 # TODO more custom exit codes?
|
2874
|
+
|
2875
|
+
# Save action-level files: (TODO: refactor with below for OFPs)
|
2876
|
+
for save_file_j in run.action.save_files:
|
2877
|
+
self._app.logger.info(
|
2878
|
+
f"saving action-level file {save_file_j!r}."
|
2879
|
+
)
|
2880
|
+
self._app.logger.debug(
|
2881
|
+
f"Saving file: {save_file_j.label!r} for EAR ID "
|
2882
|
+
f"{run.id_!r}."
|
2883
|
+
)
|
2884
|
+
try:
|
2885
|
+
param_id = run.data_idx[
|
2886
|
+
f"output_files.{save_file_j.label}"
|
2887
|
+
]
|
2888
|
+
except KeyError:
|
2889
|
+
# We might be saving a file that is not a defined
|
2890
|
+
# "output file"; this will avoid saving a reference in
|
2891
|
+
# the parameter data:
|
2892
|
+
param_id = None
|
2893
|
+
|
2894
|
+
file_paths = save_file_j.value(directory=run_dir)
|
2895
|
+
self._app.logger.debug(
|
2896
|
+
f"Saving output file paths: {file_paths!r}"
|
2897
|
+
)
|
2898
|
+
for path_i in (
|
2899
|
+
file_paths
|
2900
|
+
if isinstance(file_paths, list)
|
2901
|
+
else [file_paths]
|
2902
|
+
):
|
2903
|
+
full_path = run_dir.joinpath(path_i)
|
2904
|
+
if not full_path.exists():
|
2905
|
+
self._app.logger.debug(
|
2906
|
+
f"expected file to save {path_i!r} does not "
|
2907
|
+
f"exist, so setting run to an error state "
|
2908
|
+
f"(if not aborted)."
|
2909
|
+
)
|
2910
|
+
if not is_aborted and success is True:
|
2911
|
+
# this is unlikely to happen, but could happen
|
2912
|
+
# if the input file is deleted in between
|
2913
|
+
# the input file generator completing and this
|
2914
|
+
# code being run
|
2915
|
+
success = False
|
2916
|
+
exit_code = 1 # TODO more custom exit codes?
|
2917
|
+
else:
|
2918
|
+
self._set_file(
|
2919
|
+
param_id=param_id,
|
2920
|
+
store_contents=True,
|
2921
|
+
is_input=False,
|
2922
|
+
path=full_path,
|
2923
|
+
clean_up=(save_file_j in run.action.clean_up),
|
2924
|
+
)
|
2925
|
+
|
2926
|
+
for OFP_i in run.action.output_file_parsers:
|
2927
|
+
self._app.logger.info(
|
2928
|
+
f"saving files from OFP: {OFP_i!r}."
|
2929
|
+
)
|
2930
|
+
for save_file_j in OFP_i._save_files:
|
2931
|
+
self._app.logger.debug(
|
2932
|
+
f"Saving EAR output file: {save_file_j.label!r} "
|
2933
|
+
f"for EAR ID {run.id_!r}."
|
2934
|
+
)
|
2935
|
+
try:
|
2936
|
+
param_id = run.data_idx[
|
2937
|
+
f"output_files.{save_file_j.label}"
|
2938
|
+
]
|
2939
|
+
except KeyError:
|
2940
|
+
# We might be saving a file that is not a defined
|
2941
|
+
# "output file"; this will avoid saving a
|
2942
|
+
# reference in the parameter data:
|
2943
|
+
param_id = None
|
2944
|
+
|
2945
|
+
file_paths = save_file_j.value(directory=run_dir)
|
2946
|
+
self._app.logger.debug(
|
2947
|
+
f"Saving EAR output file paths: {file_paths!r}"
|
2948
|
+
)
|
2949
|
+
|
2950
|
+
for path_i in (
|
2951
|
+
file_paths
|
2952
|
+
if isinstance(file_paths, list)
|
2953
|
+
else [file_paths]
|
2954
|
+
):
|
2955
|
+
full_path = run_dir.joinpath(path_i)
|
2956
|
+
if not full_path.exists():
|
2957
|
+
self._app.logger.debug(
|
2958
|
+
f"expected output file parser `save_files` file "
|
2959
|
+
f"{path_i!r} does not exist, so setting run "
|
2960
|
+
f"to an error state (if not aborted)."
|
2961
|
+
)
|
2962
|
+
if not is_aborted and success is True:
|
2963
|
+
success = False
|
2964
|
+
exit_code = (
|
2965
|
+
1 # TODO more custom exit codes?
|
2966
|
+
)
|
2967
|
+
else:
|
2968
|
+
self._set_file(
|
2969
|
+
param_id=param_id,
|
2970
|
+
store_contents=True, # TODO: make optional according to OFP
|
2971
|
+
is_input=False,
|
2972
|
+
path=full_path,
|
2973
|
+
clean_up=(save_file_j in OFP_i.clean_up),
|
2974
|
+
)
|
2975
|
+
|
2976
|
+
else:
|
2977
|
+
self._app.logger.info(
|
2978
|
+
f"run was skipped: reason: {run.skip_reason!r}."
|
2392
2979
|
)
|
2393
|
-
self._store.set_EAR_skip(EAR_dep_ID)
|
2394
2980
|
|
2395
|
-
|
2981
|
+
if (
|
2982
|
+
run.resources.skip_downstream_on_failure
|
2983
|
+
and not success
|
2984
|
+
and run.skip_reason is not SkipReason.LOOP_TERMINATION
|
2985
|
+
):
|
2986
|
+
# run failed
|
2987
|
+
self._app.logger.info(
|
2988
|
+
"run was not succcess and skip reason was not "
|
2989
|
+
"LOOP_TERMINATION."
|
2990
|
+
)
|
2991
|
+
# loop termination skips are already propagated
|
2992
|
+
for EAR_dep_ID in run.get_dependent_EARs(as_objects=False):
|
2993
|
+
# TODO: `get_dependent_EARs` seems to be stuck in a
|
2994
|
+
# recursion for some workflows
|
2995
|
+
# TODO: this needs to be recursive?
|
2996
|
+
self._app.logger.info(
|
2997
|
+
f"Setting EAR ID {EAR_dep_ID!r} to skip because it "
|
2998
|
+
f"depends on EAR ID {run.id_!r}, which exited with a "
|
2999
|
+
f"non-zero exit code: {exit_code!r}."
|
3000
|
+
)
|
3001
|
+
self._store.set_EAR_skip(
|
3002
|
+
{EAR_dep_ID: SkipReason.UPSTREAM_FAILURE.value}
|
3003
|
+
)
|
3004
|
+
else:
|
3005
|
+
self._app.logger.info(
|
3006
|
+
"`skip_downstream_on_failure` is False, run was "
|
3007
|
+
"succcess, or skip reason was LOOP_TERMINATION."
|
3008
|
+
)
|
3009
|
+
|
3010
|
+
run_ids.append(run.id_)
|
3011
|
+
run_dirs.append(run_dir)
|
3012
|
+
exit_codes.append(exit_code)
|
3013
|
+
successes.append(success)
|
3014
|
+
|
3015
|
+
self._store.set_multi_run_ends(run_ids, run_dirs, exit_codes, successes)
|
2396
3016
|
|
2397
|
-
def set_EAR_skip(self,
|
3017
|
+
def set_EAR_skip(self, skip_reasons: dict[int, SkipReason]) -> None:
|
2398
3018
|
"""
|
2399
3019
|
Record that an EAR is to be skipped due to an upstream failure or loop
|
2400
3020
|
termination condition being met.
|
2401
3021
|
"""
|
2402
3022
|
with self._store.cached_load(), self.batch_update():
|
2403
|
-
self._store.set_EAR_skip(
|
3023
|
+
self._store.set_EAR_skip({k: v.value for k, v in skip_reasons.items()})
|
2404
3024
|
|
2405
|
-
def get_EAR_skipped(self, EAR_ID: int) ->
|
3025
|
+
def get_EAR_skipped(self, EAR_ID: int) -> int:
|
2406
3026
|
"""Check if an EAR is to be skipped."""
|
2407
3027
|
with self._store.cached_load():
|
2408
3028
|
return self._store.get_EAR_skipped(EAR_ID)
|
@@ -2421,6 +3041,15 @@ class Workflow(AppAware):
|
|
2421
3041
|
# force commit now:
|
2422
3042
|
self._store._pending.commit_all()
|
2423
3043
|
|
3044
|
+
@TimeIt.decorator
|
3045
|
+
def set_parameter_values(self, values: dict[int, Any], commit: bool = False) -> None:
|
3046
|
+
with self._store.cached_load(), self.batch_update(), self._store.cache_ctx():
|
3047
|
+
self._store.set_parameter_values(values)
|
3048
|
+
|
3049
|
+
if commit:
|
3050
|
+
# force commit now:
|
3051
|
+
self._store._pending.commit_all()
|
3052
|
+
|
2424
3053
|
def set_EARs_initialised(self, iter_ID: int) -> None:
|
2425
3054
|
"""
|
2426
3055
|
Set :py:attr:`~hpcflow.app.ElementIteration.EARs_initialised` to True for the
|
@@ -2549,7 +3178,7 @@ class Workflow(AppAware):
|
|
2549
3178
|
self,
|
2550
3179
|
status: Status | None = None,
|
2551
3180
|
ignore_errors: bool = False,
|
2552
|
-
JS_parallelism: bool | None = None,
|
3181
|
+
JS_parallelism: bool | Literal["direct", "scheduled"] | None = None,
|
2553
3182
|
print_stdout: bool = False,
|
2554
3183
|
add_to_known: bool = True,
|
2555
3184
|
tasks: Sequence[int] | None = None,
|
@@ -2560,16 +3189,23 @@ class Workflow(AppAware):
|
|
2560
3189
|
if not (pending := [sub for sub in self.submissions if sub.needs_submit]):
|
2561
3190
|
if status:
|
2562
3191
|
status.update("Adding new submission...")
|
2563
|
-
if not (
|
3192
|
+
if not (
|
3193
|
+
new_sub := self._add_submission(
|
3194
|
+
tasks=tasks,
|
3195
|
+
JS_parallelism=JS_parallelism,
|
3196
|
+
status=status,
|
3197
|
+
)
|
3198
|
+
):
|
3199
|
+
if status:
|
3200
|
+
status.stop()
|
2564
3201
|
raise ValueError("No pending element action runs to submit!")
|
2565
3202
|
pending = [new_sub]
|
2566
3203
|
|
2567
|
-
self.submissions_path.mkdir(exist_ok=True, parents=True)
|
2568
3204
|
self.execution_path.mkdir(exist_ok=True, parents=True)
|
2569
3205
|
self.task_artifacts_path.mkdir(exist_ok=True, parents=True)
|
2570
3206
|
|
2571
|
-
#
|
2572
|
-
#
|
3207
|
+
# the submission must be persistent at submit-time, because it will be read by a
|
3208
|
+
# new instance of the app:
|
2573
3209
|
if status:
|
2574
3210
|
status.update("Committing to the store...")
|
2575
3211
|
self._store._pending.commit_all()
|
@@ -2598,7 +3234,7 @@ class Workflow(AppAware):
|
|
2598
3234
|
self,
|
2599
3235
|
*,
|
2600
3236
|
ignore_errors: bool = False,
|
2601
|
-
JS_parallelism: bool | None = None,
|
3237
|
+
JS_parallelism: bool | Literal["direct", "scheduled"] | None = None,
|
2602
3238
|
print_stdout: bool = False,
|
2603
3239
|
wait: bool = False,
|
2604
3240
|
add_to_known: bool = True,
|
@@ -2614,7 +3250,7 @@ class Workflow(AppAware):
|
|
2614
3250
|
self,
|
2615
3251
|
*,
|
2616
3252
|
ignore_errors: bool = False,
|
2617
|
-
JS_parallelism: bool | None = None,
|
3253
|
+
JS_parallelism: bool | Literal["direct", "scheduled"] | None = None,
|
2618
3254
|
print_stdout: bool = False,
|
2619
3255
|
wait: bool = False,
|
2620
3256
|
add_to_known: bool = True,
|
@@ -2629,7 +3265,7 @@ class Workflow(AppAware):
|
|
2629
3265
|
self,
|
2630
3266
|
*,
|
2631
3267
|
ignore_errors: bool = False,
|
2632
|
-
JS_parallelism: bool | None = None,
|
3268
|
+
JS_parallelism: bool | Literal["direct", "scheduled"] | None = None,
|
2633
3269
|
print_stdout: bool = False,
|
2634
3270
|
wait: bool = False,
|
2635
3271
|
add_to_known: bool = True,
|
@@ -2646,9 +3282,12 @@ class Workflow(AppAware):
|
|
2646
3282
|
If True, ignore jobscript submission errors. If False (the default) jobscript
|
2647
3283
|
submission will halt when a jobscript fails to submit.
|
2648
3284
|
JS_parallelism
|
2649
|
-
If True, allow multiple jobscripts to execute simultaneously.
|
2650
|
-
|
2651
|
-
|
3285
|
+
If True, allow multiple jobscripts to execute simultaneously. If
|
3286
|
+
'scheduled'/'direct', only allow simultaneous execution of scheduled/direct
|
3287
|
+
jobscripts. Raises if set to True, 'scheduled', or 'direct', but the store
|
3288
|
+
type does not support the `jobscript_parallelism` feature. If not set,
|
3289
|
+
jobscript parallelism will be used if the store type supports it, for
|
3290
|
+
scheduled jobscripts only.
|
2652
3291
|
print_stdout
|
2653
3292
|
If True, print any jobscript submission standard output, otherwise hide it.
|
2654
3293
|
wait
|
@@ -2679,7 +3318,11 @@ class Workflow(AppAware):
|
|
2679
3318
|
if not self._store.is_submittable:
|
2680
3319
|
raise NotImplementedError("The workflow is not submittable.")
|
2681
3320
|
# commit updates before raising exception:
|
2682
|
-
with
|
3321
|
+
with (
|
3322
|
+
self.batch_update(),
|
3323
|
+
self._store.parameters_metadata_cache(),
|
3324
|
+
self._store.cache_ctx(),
|
3325
|
+
):
|
2683
3326
|
exceptions, submitted_js = self._submit(
|
2684
3327
|
ignore_errors=ignore_errors,
|
2685
3328
|
JS_parallelism=JS_parallelism,
|
@@ -2693,7 +3336,7 @@ class Workflow(AppAware):
|
|
2693
3336
|
raise WorkflowSubmissionFailure(exceptions)
|
2694
3337
|
|
2695
3338
|
if cancel:
|
2696
|
-
self.cancel()
|
3339
|
+
self.cancel(status=status)
|
2697
3340
|
|
2698
3341
|
elif wait:
|
2699
3342
|
self.wait(submitted_js)
|
@@ -2822,14 +3465,16 @@ class Workflow(AppAware):
|
|
2822
3465
|
# keys are task_insert_IDs, values are element indices:
|
2823
3466
|
active_elems: dict[int, set[int]] = defaultdict(set)
|
2824
3467
|
sub = self.submissions[submission_idx]
|
2825
|
-
for js_idx,
|
3468
|
+
for js_idx, block_states in sub.get_active_jobscripts().items():
|
2826
3469
|
js = sub.jobscripts[js_idx]
|
2827
|
-
for
|
2828
|
-
|
2829
|
-
|
2830
|
-
|
2831
|
-
|
2832
|
-
|
3470
|
+
for block_idx, block in enumerate(js.blocks):
|
3471
|
+
states = block_states[block_idx]
|
3472
|
+
for js_elem_idx, state in states.items():
|
3473
|
+
if state is JobscriptElementState.running:
|
3474
|
+
for task_iID, elem_idx in zip(
|
3475
|
+
block.task_insert_IDs, block.task_elements[js_elem_idx]
|
3476
|
+
):
|
3477
|
+
active_elems[task_iID].add(elem_idx)
|
2833
3478
|
|
2834
3479
|
# retrieve Element objects:
|
2835
3480
|
out: list[Element] = []
|
@@ -2862,18 +3507,22 @@ class Workflow(AppAware):
|
|
2862
3507
|
for elem in elems:
|
2863
3508
|
if element_idx is not None and elem.index != element_idx:
|
2864
3509
|
continue
|
2865
|
-
|
2866
|
-
|
2867
|
-
|
2868
|
-
|
2869
|
-
|
2870
|
-
|
3510
|
+
for iter_i in elem.iterations:
|
3511
|
+
for elem_acts in iter_i.actions.values():
|
3512
|
+
for run in elem_acts.runs:
|
3513
|
+
if run.status is EARStatus.running:
|
3514
|
+
out.append(run)
|
3515
|
+
# for a given element and submission, only one run
|
3516
|
+
# may be running at a time:
|
3517
|
+
break
|
2871
3518
|
return out
|
2872
3519
|
|
2873
|
-
def
|
2874
|
-
|
2875
|
-
|
2876
|
-
self.
|
3520
|
+
def _abort_run(self, run: ElementActionRun):
|
3521
|
+
# connect to the ZeroMQ server on the worker node:
|
3522
|
+
self._app.logger.info(f"abort run: {run!r}")
|
3523
|
+
self._app.Executor.send_abort(
|
3524
|
+
hostname=run.run_hostname, port_number=run.port_number
|
3525
|
+
)
|
2877
3526
|
|
2878
3527
|
def abort_run(
|
2879
3528
|
self,
|
@@ -2916,38 +3565,77 @@ class Workflow(AppAware):
|
|
2916
3565
|
run = running[0]
|
2917
3566
|
if not run.action.abortable:
|
2918
3567
|
raise RunNotAbortableError()
|
2919
|
-
self.
|
3568
|
+
self._abort_run(run)
|
2920
3569
|
|
2921
3570
|
@TimeIt.decorator
|
2922
|
-
def cancel(self,
|
3571
|
+
def cancel(self, status: bool = True):
|
2923
3572
|
"""Cancel any running jobscripts."""
|
2924
|
-
|
2925
|
-
|
3573
|
+
status_msg = f"Cancelling jobscripts of workflow {self.path!r}"
|
3574
|
+
# Type hint for mypy
|
3575
|
+
status_context: AbstractContextManager[Status] | AbstractContextManager[None] = (
|
3576
|
+
rich.console.Console().status(status_msg) if status else nullcontext()
|
3577
|
+
)
|
3578
|
+
with status_context as status_, self._store.cached_load():
|
3579
|
+
for sub in self.submissions:
|
3580
|
+
sub.cancel()
|
2926
3581
|
|
2927
3582
|
def add_submission(
|
2928
|
-
self,
|
3583
|
+
self,
|
3584
|
+
tasks: list[int] | None = None,
|
3585
|
+
JS_parallelism: bool | Literal["direct", "scheduled"] | None = None,
|
3586
|
+
force_array: bool = False,
|
3587
|
+
status: bool = True,
|
2929
3588
|
) -> Submission | None:
|
2930
|
-
"""
|
2931
|
-
|
3589
|
+
"""Add a new submission.
|
3590
|
+
|
3591
|
+
Parameters
|
3592
|
+
----------
|
3593
|
+
force_array
|
3594
|
+
Used to force the use of job arrays, even if the scheduler does not support
|
3595
|
+
it. This is provided for testing purposes only.
|
2932
3596
|
"""
|
2933
3597
|
# JS_parallelism=None means guess
|
2934
|
-
|
2935
|
-
|
3598
|
+
# Type hint for mypy
|
3599
|
+
status_context: AbstractContextManager[Status] | AbstractContextManager[None] = (
|
3600
|
+
rich.console.Console().status("") if status else nullcontext()
|
3601
|
+
)
|
3602
|
+
with status_context as status_, self._store.cached_load(), self.batch_update():
|
3603
|
+
return self._add_submission(tasks, JS_parallelism, force_array, status_)
|
2936
3604
|
|
2937
3605
|
@TimeIt.decorator
|
3606
|
+
@load_workflow_config
|
2938
3607
|
def _add_submission(
|
2939
|
-
self,
|
3608
|
+
self,
|
3609
|
+
tasks: Sequence[int] | None = None,
|
3610
|
+
JS_parallelism: bool | Literal["direct", "scheduled"] | None = None,
|
3611
|
+
force_array: bool = False,
|
3612
|
+
status: Status | None = None,
|
2940
3613
|
) -> Submission | None:
|
3614
|
+
"""Add a new submission.
|
3615
|
+
|
3616
|
+
Parameters
|
3617
|
+
----------
|
3618
|
+
force_array
|
3619
|
+
Used to force the use of job arrays, even if the scheduler does not support
|
3620
|
+
it. This is provided for testing purposes only.
|
3621
|
+
"""
|
2941
3622
|
new_idx = self.num_submissions
|
2942
3623
|
_ = self.submissions # TODO: just to ensure `submissions` is loaded
|
3624
|
+
if status:
|
3625
|
+
status.update("Adding new submission: resolving jobscripts...")
|
3626
|
+
|
3627
|
+
cache = ObjectCache.build(self, elements=True, iterations=True, runs=True)
|
3628
|
+
|
2943
3629
|
sub_obj: Submission = self._app.Submission(
|
2944
3630
|
index=new_idx,
|
2945
3631
|
workflow=self,
|
2946
|
-
jobscripts=self.resolve_jobscripts(tasks),
|
3632
|
+
jobscripts=self.resolve_jobscripts(cache, tasks, force_array),
|
2947
3633
|
JS_parallelism=JS_parallelism,
|
2948
3634
|
)
|
3635
|
+
if status:
|
3636
|
+
status.update("Adding new submission: setting environments...")
|
2949
3637
|
sub_obj._set_environments()
|
2950
|
-
all_EAR_ID =
|
3638
|
+
all_EAR_ID = sub_obj.all_EAR_IDs
|
2951
3639
|
if not all_EAR_ID:
|
2952
3640
|
print(
|
2953
3641
|
"There are no pending element action runs, so a new submission was not "
|
@@ -2955,33 +3643,97 @@ class Workflow(AppAware):
|
|
2955
3643
|
)
|
2956
3644
|
return None
|
2957
3645
|
|
3646
|
+
if status:
|
3647
|
+
status.update("Adding new submission: making artifact directories...")
|
3648
|
+
|
3649
|
+
# TODO: a submission should only be "submitted" once shouldn't it?
|
3650
|
+
# no; there could be an IO error (e.g. internet connectivity), so might
|
3651
|
+
# need to be able to reattempt submission of outstanding jobscripts.
|
3652
|
+
self.submissions_path.mkdir(exist_ok=True, parents=True)
|
3653
|
+
sub_obj.path.mkdir(exist_ok=True)
|
3654
|
+
sub_obj.tmp_path.mkdir(exist_ok=True)
|
3655
|
+
sub_obj.app_std_path.mkdir(exist_ok=True)
|
3656
|
+
sub_obj.js_path.mkdir(exist_ok=True) # for jobscripts
|
3657
|
+
sub_obj.js_std_path.mkdir(exist_ok=True) # for stdout/err stream files
|
3658
|
+
sub_obj.js_funcs_path.mkdir(exist_ok=True)
|
3659
|
+
sub_obj.js_run_ids_path.mkdir(exist_ok=True)
|
3660
|
+
sub_obj.scripts_path.mkdir(exist_ok=True)
|
3661
|
+
sub_obj.commands_path.mkdir(exist_ok=True)
|
3662
|
+
|
3663
|
+
if sub_obj.needs_app_log_dir:
|
3664
|
+
sub_obj.app_log_path.mkdir(exist_ok=True)
|
3665
|
+
|
3666
|
+
if sub_obj.needs_win_pids_dir:
|
3667
|
+
sub_obj.js_win_pids_path.mkdir(exist_ok=True)
|
3668
|
+
|
3669
|
+
if sub_obj.needs_script_indices_dir:
|
3670
|
+
sub_obj.js_script_indices_path.mkdir(exist_ok=True)
|
3671
|
+
|
3672
|
+
if status:
|
3673
|
+
status.update("Adding new submission: writing scripts and command files...")
|
3674
|
+
|
3675
|
+
# write scripts and command files where possible to the submission directory:
|
3676
|
+
cmd_file_IDs, run_indices, run_inp_files = sub_obj._write_scripts(cache, status)
|
3677
|
+
|
3678
|
+
sub_obj._write_execute_dirs(run_indices, run_inp_files, cache, status)
|
3679
|
+
|
3680
|
+
if status:
|
3681
|
+
status.update("Adding new submission: updating the store...")
|
3682
|
+
|
2958
3683
|
with self._store.cached_load(), self.batch_update():
|
2959
3684
|
for id_ in all_EAR_ID:
|
2960
|
-
self._store.
|
3685
|
+
self._store.set_run_submission_data(
|
3686
|
+
EAR_ID=id_,
|
3687
|
+
cmds_ID=cmd_file_IDs[id_],
|
3688
|
+
sub_idx=new_idx,
|
3689
|
+
)
|
2961
3690
|
|
3691
|
+
sub_obj._ensure_JS_parallelism_set()
|
2962
3692
|
sub_obj_js, _ = sub_obj.to_json_like()
|
2963
3693
|
assert self._submissions is not None
|
2964
3694
|
self._submissions.append(sub_obj)
|
2965
3695
|
self._pending["submissions"].append(new_idx)
|
2966
3696
|
with self._store.cached_load(), self.batch_update():
|
2967
|
-
self._store.add_submission(new_idx, sub_obj_js)
|
3697
|
+
self._store.add_submission(new_idx, cast("Mapping[str, JSONed]", sub_obj_js))
|
2968
3698
|
|
2969
3699
|
return self.submissions[new_idx]
|
2970
3700
|
|
2971
3701
|
@TimeIt.decorator
|
2972
|
-
def resolve_jobscripts(
|
3702
|
+
def resolve_jobscripts(
|
3703
|
+
self,
|
3704
|
+
cache: ObjectCache,
|
3705
|
+
tasks: Sequence[int] | None = None,
|
3706
|
+
force_array: bool = False,
|
3707
|
+
) -> list[Jobscript]:
|
2973
3708
|
"""
|
2974
|
-
Resolve this workflow to a set of
|
3709
|
+
Resolve this workflow to a set of jobscripts to run for a new submission.
|
3710
|
+
|
3711
|
+
Parameters
|
3712
|
+
----------
|
3713
|
+
force_array
|
3714
|
+
Used to force the use of job arrays, even if the scheduler does not support
|
3715
|
+
it. This is provided for testing purposes only.
|
3716
|
+
|
2975
3717
|
"""
|
2976
|
-
|
2977
|
-
|
3718
|
+
with self._app.config.cached_config():
|
3719
|
+
with self.cached_merged_parameters():
|
3720
|
+
js, element_deps = self._resolve_singular_jobscripts(
|
3721
|
+
cache, tasks, force_array
|
3722
|
+
)
|
3723
|
+
|
3724
|
+
js_deps = resolve_jobscript_dependencies(js, element_deps)
|
2978
3725
|
|
2979
|
-
|
2980
|
-
|
2981
|
-
|
3726
|
+
for js_idx, jsca in js.items():
|
3727
|
+
if js_idx in js_deps:
|
3728
|
+
jsca["dependencies"] = js_deps[js_idx] # type: ignore
|
2982
3729
|
|
2983
|
-
|
2984
|
-
|
3730
|
+
js = merge_jobscripts_across_tasks(js)
|
3731
|
+
|
3732
|
+
# for direct or (non-array scheduled), combine into jobscripts of multiple
|
3733
|
+
# blocks for dependent jobscripts that have the same resource hashes
|
3734
|
+
js_ = resolve_jobscript_blocks(js)
|
3735
|
+
|
3736
|
+
return [self._app.Jobscript(**i, index=idx) for idx, i in enumerate(js_)]
|
2985
3737
|
|
2986
3738
|
def __EAR_obj_map(
|
2987
3739
|
self,
|
@@ -2990,7 +3742,9 @@ class Workflow(AppAware):
|
|
2990
3742
|
task: WorkflowTask,
|
2991
3743
|
task_actions: Sequence[tuple[int, int, int]],
|
2992
3744
|
EAR_map: NDArray,
|
3745
|
+
cache: ObjectCache,
|
2993
3746
|
) -> Mapping[int, ElementActionRun]:
|
3747
|
+
assert cache.runs is not None
|
2994
3748
|
all_EAR_IDs: list[int] = []
|
2995
3749
|
for js_elem_idx, (elem_idx, act_indices) in enumerate(
|
2996
3750
|
js_desc["elements"].items()
|
@@ -3000,11 +3754,14 @@ class Workflow(AppAware):
|
|
3000
3754
|
all_EAR_IDs.append(EAR_ID_i)
|
3001
3755
|
js_act_idx = task_actions.index((task.insert_ID, act_idx, 0))
|
3002
3756
|
jsca["EAR_ID"][js_act_idx][js_elem_idx] = EAR_ID_i
|
3003
|
-
return dict(zip(all_EAR_IDs,
|
3757
|
+
return dict(zip(all_EAR_IDs, (cache.runs[i] for i in all_EAR_IDs)))
|
3004
3758
|
|
3005
3759
|
@TimeIt.decorator
|
3006
3760
|
def _resolve_singular_jobscripts(
|
3007
|
-
self,
|
3761
|
+
self,
|
3762
|
+
cache: ObjectCache,
|
3763
|
+
tasks: Sequence[int] | None = None,
|
3764
|
+
force_array: bool = False,
|
3008
3765
|
) -> tuple[
|
3009
3766
|
Mapping[int, JobScriptCreationArguments],
|
3010
3767
|
Mapping[int, Mapping[int, Sequence[int]]],
|
@@ -3013,6 +3770,12 @@ class Workflow(AppAware):
|
|
3013
3770
|
We arrange EARs into `EARs` and `elements` so we can quickly look up membership
|
3014
3771
|
by EAR idx in the `EARs` dict.
|
3015
3772
|
|
3773
|
+
Parameters
|
3774
|
+
----------
|
3775
|
+
force_array
|
3776
|
+
Used to force the use of job arrays, even if the scheduler does not support
|
3777
|
+
it. This is provided for testing purposes only.
|
3778
|
+
|
3016
3779
|
Returns
|
3017
3780
|
-------
|
3018
3781
|
submission_jobscripts
|
@@ -3025,6 +3788,7 @@ class Workflow(AppAware):
|
|
3025
3788
|
|
3026
3789
|
if self._store.use_cache:
|
3027
3790
|
# pre-cache parameter sources (used in `EAR.get_EAR_dependencies`):
|
3791
|
+
# note: this cache is unrelated to the `cache` argument
|
3028
3792
|
self.get_all_parameter_sources()
|
3029
3793
|
|
3030
3794
|
submission_jobscripts: dict[int, JobScriptCreationArguments] = {}
|
@@ -3034,7 +3798,9 @@ class Workflow(AppAware):
|
|
3034
3798
|
task = self.tasks.get(insert_ID=task_iID)
|
3035
3799
|
if task.index not in task_set:
|
3036
3800
|
continue
|
3037
|
-
res, res_hash, res_map, EAR_map = generate_EAR_resource_map(
|
3801
|
+
res, res_hash, res_map, EAR_map = generate_EAR_resource_map(
|
3802
|
+
task, loop_idx_i, cache
|
3803
|
+
)
|
3038
3804
|
jobscripts, _ = group_resource_map_into_jobscripts(res_map)
|
3039
3805
|
|
3040
3806
|
for js_dat in jobscripts:
|
@@ -3063,6 +3829,11 @@ class Workflow(AppAware):
|
|
3063
3829
|
|
3064
3830
|
new_js_idx = len(submission_jobscripts)
|
3065
3831
|
|
3832
|
+
is_array = force_array or is_jobscript_array(
|
3833
|
+
res[js_dat["resources"]],
|
3834
|
+
EAR_ID_arr.shape[1],
|
3835
|
+
self._store,
|
3836
|
+
)
|
3066
3837
|
js_i: JobScriptCreationArguments = {
|
3067
3838
|
"task_insert_IDs": [task.insert_ID],
|
3068
3839
|
"task_loop_idx": [loop_idx_i],
|
@@ -3072,10 +3843,11 @@ class Workflow(AppAware):
|
|
3072
3843
|
"resources": res[js_dat["resources"]],
|
3073
3844
|
"resource_hash": res_hash[js_dat["resources"]],
|
3074
3845
|
"dependencies": {},
|
3846
|
+
"is_array": is_array,
|
3075
3847
|
}
|
3076
3848
|
|
3077
3849
|
all_EAR_objs = self.__EAR_obj_map(
|
3078
|
-
js_dat, js_i, task, task_actions, EAR_map
|
3850
|
+
js_dat, js_i, task, task_actions, EAR_map, cache
|
3079
3851
|
)
|
3080
3852
|
|
3081
3853
|
for js_elem_idx, (elem_idx, act_indices) in enumerate(
|
@@ -3104,76 +3876,290 @@ class Workflow(AppAware):
|
|
3104
3876
|
|
3105
3877
|
return submission_jobscripts, all_element_deps
|
3106
3878
|
|
3107
|
-
|
3108
|
-
|
3109
|
-
|
3110
|
-
|
3111
|
-
|
3112
|
-
|
3113
|
-
|
3114
|
-
|
3115
|
-
|
3116
|
-
|
3117
|
-
|
3118
|
-
|
3119
|
-
|
3120
|
-
|
3121
|
-
|
3122
|
-
|
3123
|
-
|
3124
|
-
|
3125
|
-
|
3126
|
-
|
3127
|
-
|
3128
|
-
|
3879
|
+
@load_workflow_config
|
3880
|
+
def execute_run(
|
3881
|
+
self,
|
3882
|
+
submission_idx: int,
|
3883
|
+
block_act_key: BlockActionKey,
|
3884
|
+
run_ID: int,
|
3885
|
+
) -> None:
|
3886
|
+
"""Execute commands of a run via a subprocess."""
|
3887
|
+
|
3888
|
+
# CD to submission tmp dir to ensure std streams and exceptions have somewhere
|
3889
|
+
# sensible to go:
|
3890
|
+
os.chdir(Submission.get_tmp_path(self.submissions_path, submission_idx))
|
3891
|
+
|
3892
|
+
sub_str_path = Submission.get_app_std_path(self.submissions_path, submission_idx)
|
3893
|
+
run_std_path = sub_str_path / f"{str(run_ID)}.txt" # TODO: refactor
|
3894
|
+
has_commands = False
|
3895
|
+
|
3896
|
+
# redirect (as much as possible) app-generated stdout/err to a dedicated file:
|
3897
|
+
with redirect_std_to_file(run_std_path):
|
3898
|
+
with self._store.cached_load():
|
3899
|
+
js_idx = cast("int", block_act_key[0])
|
3900
|
+
run = self.get_EARs_from_IDs([run_ID])[0]
|
3901
|
+
run_dir = None
|
3902
|
+
if run.action.requires_dir:
|
3903
|
+
run_dir = run.get_directory()
|
3904
|
+
assert run_dir
|
3905
|
+
self._app.submission_logger.debug(
|
3906
|
+
f"changing directory to run execution directory: {run_dir}."
|
3129
3907
|
)
|
3130
|
-
|
3131
|
-
|
3132
|
-
|
3133
|
-
|
3134
|
-
|
3135
|
-
|
3136
|
-
|
3137
|
-
|
3138
|
-
|
3908
|
+
os.chdir(run_dir)
|
3909
|
+
self._app.submission_logger.debug(f"{run.skip=}; {run.skip_reason=}")
|
3910
|
+
|
3911
|
+
# check if we should skip:
|
3912
|
+
if not run.skip:
|
3913
|
+
|
3914
|
+
try:
|
3915
|
+
with run.raise_on_failure_threshold() as unset_params:
|
3916
|
+
if run.action.script:
|
3917
|
+
run.write_script_input_files(block_act_key)
|
3918
|
+
|
3919
|
+
# write the command file that will be executed:
|
3920
|
+
cmd_file_path = self.ensure_commands_file(
|
3921
|
+
submission_idx, js_idx, run
|
3922
|
+
)
|
3923
|
+
|
3924
|
+
except UnsetParameterDataErrorBase:
|
3925
|
+
# not all required parameter data is set, so fail this run:
|
3926
|
+
self._app.submission_logger.debug(
|
3927
|
+
f"unset parameter threshold satisfied (or any unset "
|
3928
|
+
f"parameters found when trying to write commands file), so "
|
3929
|
+
f"not attempting run. unset_params={unset_params!r}."
|
3930
|
+
)
|
3931
|
+
self.set_EAR_start(run_ID, run_dir, port_number=None)
|
3932
|
+
self._check_loop_termination(run) # not sure if this is required
|
3933
|
+
self.set_EAR_end(
|
3934
|
+
block_act_key=block_act_key,
|
3935
|
+
run=run,
|
3936
|
+
exit_code=1,
|
3937
|
+
)
|
3938
|
+
return
|
3939
|
+
|
3940
|
+
# sufficient parameter data is set so far, but need to pass `unset_params`
|
3941
|
+
# on as an environment variable so it can be appended to and failure
|
3942
|
+
# thresholds can be rechecked if necessary (i.e. in a Python script
|
3943
|
+
# where we also load input parameters "directly")
|
3944
|
+
if unset_params:
|
3945
|
+
self._app.submission_logger.debug(
|
3946
|
+
f"some unset parameters found, but no unset-thresholds met: "
|
3947
|
+
f"unset_params={unset_params!r}."
|
3948
|
+
)
|
3949
|
+
|
3950
|
+
# TODO: pass on unset_params to script as environment variable
|
3951
|
+
|
3952
|
+
if has_commands := bool(cmd_file_path):
|
3953
|
+
|
3954
|
+
assert isinstance(cmd_file_path, Path)
|
3955
|
+
if not cmd_file_path.is_file():
|
3956
|
+
raise RuntimeError(
|
3957
|
+
f"Command file {cmd_file_path!r} does not exist."
|
3958
|
+
)
|
3959
|
+
# prepare subprocess command:
|
3960
|
+
jobscript = self.submissions[submission_idx].jobscripts[js_idx]
|
3961
|
+
cmd = jobscript.shell.get_command_file_launch_command(
|
3962
|
+
str(cmd_file_path)
|
3963
|
+
)
|
3964
|
+
loop_idx_str = ";".join(
|
3965
|
+
f"{k}={v}" for k, v in run.element_iteration.loop_idx.items()
|
3966
|
+
)
|
3967
|
+
app_caps = self._app.package_name.upper()
|
3968
|
+
|
3969
|
+
# TODO: make these optionally set (more difficult to set in combine_script,
|
3970
|
+
# so have the option to turn off) [default ON]
|
3971
|
+
add_env = {
|
3972
|
+
f"{app_caps}_RUN_ID": str(run_ID),
|
3973
|
+
f"{app_caps}_RUN_IDX": str(run.index),
|
3974
|
+
f"{app_caps}_ELEMENT_IDX": str(run.element.index),
|
3975
|
+
f"{app_caps}_ELEMENT_ID": str(run.element.id_),
|
3976
|
+
f"{app_caps}_ELEMENT_ITER_IDX": str(
|
3977
|
+
run.element_iteration.index
|
3978
|
+
),
|
3979
|
+
f"{app_caps}_ELEMENT_ITER_ID": str(run.element_iteration.id_),
|
3980
|
+
f"{app_caps}_ELEMENT_ITER_LOOP_IDX": loop_idx_str,
|
3981
|
+
}
|
3982
|
+
|
3983
|
+
if run.action.script:
|
3984
|
+
if run.is_snippet_script:
|
3985
|
+
script_artifact_name = run.get_script_artifact_name()
|
3986
|
+
script_dir = Path(
|
3987
|
+
os.environ[f"{app_caps}_SUB_SCRIPTS_DIR"]
|
3988
|
+
)
|
3989
|
+
script_name = script_artifact_name
|
3990
|
+
else:
|
3991
|
+
# not a snippet script; expect the script in the run execute
|
3992
|
+
# directory (i.e. created by a previous action)
|
3993
|
+
script_dir = Path.cwd()
|
3994
|
+
script_name = run.action.script
|
3995
|
+
script_name_no_ext = Path(script_name).stem
|
3996
|
+
add_env.update(
|
3997
|
+
{
|
3998
|
+
f"{app_caps}_RUN_SCRIPT_NAME": script_name,
|
3999
|
+
f"{app_caps}_RUN_SCRIPT_NAME_NO_EXT": script_name_no_ext,
|
4000
|
+
f"{app_caps}_RUN_SCRIPT_DIR": str(script_dir),
|
4001
|
+
f"{app_caps}_RUN_SCRIPT_PATH": str(
|
4002
|
+
script_dir / script_name
|
4003
|
+
),
|
4004
|
+
}
|
4005
|
+
)
|
4006
|
+
|
4007
|
+
env = {**dict(os.environ), **add_env}
|
4008
|
+
|
4009
|
+
self._app.submission_logger.debug(
|
4010
|
+
f"Executing run commands via subprocess with command {cmd!r}, and "
|
4011
|
+
f"environment variables as below."
|
4012
|
+
)
|
4013
|
+
for k, v in env.items():
|
4014
|
+
if k.startswith(app_caps):
|
4015
|
+
self._app.submission_logger.debug(f"{k} = {v!r}")
|
4016
|
+
exe = self._app.Executor(cmd, env, self._app.package_name)
|
4017
|
+
port = (
|
4018
|
+
exe.start_zmq_server()
|
4019
|
+
) # start the server so we know the port
|
4020
|
+
|
4021
|
+
try:
|
4022
|
+
self.set_EAR_start(run_ID, run_dir, port)
|
4023
|
+
except:
|
4024
|
+
self._app.submission_logger.error(f"Failed to set run start.")
|
4025
|
+
exe.stop_zmq_server()
|
4026
|
+
raise
|
4027
|
+
|
4028
|
+
# this subprocess may include commands that redirect to the std_stream file (e.g.
|
4029
|
+
# calling the app to save a parameter from a shell command output):
|
4030
|
+
if not run.skip and has_commands:
|
4031
|
+
ret_code = exe.run() # this also shuts down the server
|
4032
|
+
|
4033
|
+
# redirect (as much as possible) app-generated stdout/err to a dedicated file:
|
4034
|
+
with redirect_std_to_file(run_std_path):
|
4035
|
+
if run.skip:
|
4036
|
+
ret_code = SKIPPED_EXIT_CODE
|
4037
|
+
elif not has_commands:
|
4038
|
+
ret_code = NO_COMMANDS_EXIT_CODE
|
4039
|
+
else:
|
4040
|
+
self._check_loop_termination(run)
|
4041
|
+
|
4042
|
+
# set run end:
|
4043
|
+
self.set_EAR_end(
|
4044
|
+
block_act_key=block_act_key,
|
4045
|
+
run=run,
|
4046
|
+
exit_code=ret_code,
|
3139
4047
|
)
|
3140
|
-
|
3141
|
-
|
3142
|
-
|
3143
|
-
|
3144
|
-
|
3145
|
-
|
3146
|
-
|
3147
|
-
|
4048
|
+
|
4049
|
+
def _check_loop_termination(self, run: ElementActionRun) -> set[int]:
|
4050
|
+
"""Check if we need to terminate a loop if this is the last action of the loop
|
4051
|
+
iteration for this element, and set downstream iteration runs to skip."""
|
4052
|
+
|
4053
|
+
elem_iter = run.element_iteration
|
4054
|
+
task = elem_iter.task
|
4055
|
+
check_loops = []
|
4056
|
+
to_skip = set()
|
4057
|
+
for loop_name in elem_iter.loop_idx:
|
4058
|
+
self._app.logger.info(f"checking loop termination of loop {loop_name!r}.")
|
4059
|
+
loop = self.loops.get(loop_name)
|
4060
|
+
if (
|
4061
|
+
loop.template.termination
|
4062
|
+
and task.insert_ID == loop.template.termination_task_insert_ID
|
4063
|
+
and run.element_action.action_idx == max(elem_iter.actions)
|
4064
|
+
):
|
4065
|
+
check_loops.append(loop_name)
|
4066
|
+
# TODO: test with condition actions
|
4067
|
+
if loop.test_termination(elem_iter):
|
4068
|
+
self._app.logger.info(
|
4069
|
+
f"loop {loop_name!r} termination condition met for run "
|
4070
|
+
f"ID {run.id_!r}."
|
3148
4071
|
)
|
3149
|
-
|
3150
|
-
|
3151
|
-
|
4072
|
+
to_skip.update(loop.skip_downstream_iterations(elem_iter))
|
4073
|
+
return to_skip
|
4074
|
+
|
4075
|
+
@load_workflow_config
|
4076
|
+
def execute_combined_runs(self, submission_idx: int, jobscript_idx: int) -> None:
|
4077
|
+
"""Execute a combined script (multiple runs) via a subprocess."""
|
4078
|
+
|
4079
|
+
# CD to submission tmp dir to ensure std streams and exceptions have somewhere
|
4080
|
+
# sensible to go:
|
4081
|
+
os.chdir(Submission.get_tmp_path(self.submissions_path, submission_idx))
|
4082
|
+
|
4083
|
+
sub = self.submissions[submission_idx]
|
4084
|
+
js = sub.jobscripts[jobscript_idx]
|
4085
|
+
|
4086
|
+
app_caps = self._app.package_name.upper()
|
4087
|
+
script_dir = Path(os.environ[f"{app_caps}_SUB_SCRIPTS_DIR"])
|
4088
|
+
script_name = f"js_{jobscript_idx}.py" # TODO: refactor script name
|
4089
|
+
script_path = script_dir / script_name
|
4090
|
+
|
4091
|
+
add_env = {
|
4092
|
+
f"{app_caps}_RUN_SCRIPT_NAME": script_name,
|
4093
|
+
f"{app_caps}_RUN_SCRIPT_NAME_NO_EXT": script_path.stem,
|
4094
|
+
f"{app_caps}_RUN_SCRIPT_DIR": str(script_dir),
|
4095
|
+
f"{app_caps}_RUN_SCRIPT_PATH": str(script_path),
|
4096
|
+
f"{app_caps}_SCRIPT_INDICES_FILE": str(js.combined_script_indices_file_path),
|
4097
|
+
}
|
4098
|
+
env = {**dict(os.environ), **add_env}
|
4099
|
+
|
4100
|
+
# note: unlike in `Workflow.execute_run`, here we can be reasonably sure the
|
4101
|
+
# commands file already exists, because we call `Action.try_write_commands` with
|
4102
|
+
# `raise_on_unset=True` in `Workflow._add_submission` during submission.
|
4103
|
+
|
4104
|
+
# TODO: refactor cmd file name:
|
4105
|
+
cmd_file_path = sub.commands_path / f"js_{jobscript_idx}{js.shell.JS_EXT}"
|
4106
|
+
cmd = js.shell.get_command_file_launch_command(str(cmd_file_path))
|
3152
4107
|
|
3153
|
-
|
4108
|
+
self._app.submission_logger.debug(
|
4109
|
+
f"Executing combined runs via subprocess with command {cmd!r}, and "
|
4110
|
+
f"environment variables as below."
|
4111
|
+
)
|
4112
|
+
for k, v in env.items():
|
4113
|
+
if k.startswith(app_caps):
|
4114
|
+
self._app.submission_logger.debug(f"{k} = {v}")
|
4115
|
+
|
4116
|
+
exe = self._app.Executor(cmd, env, self._app.package_name)
|
4117
|
+
exe.start_zmq_server() # start the server
|
4118
|
+
exe.run() # this also shuts down the server
|
4119
|
+
|
4120
|
+
def ensure_commands_file(
|
3154
4121
|
self,
|
3155
4122
|
submission_idx: int,
|
3156
|
-
|
3157
|
-
|
3158
|
-
|
3159
|
-
|
3160
|
-
"
|
4123
|
+
js_idx: int,
|
4124
|
+
run: ElementActionRun,
|
4125
|
+
) -> Path | bool:
|
4126
|
+
"""Ensure a commands file exists for the specified run."""
|
4127
|
+
self._app.persistence_logger.debug("Workflow.ensure_commands_file")
|
4128
|
+
|
4129
|
+
if run.commands_file_ID is None:
|
4130
|
+
# no commands to write
|
4131
|
+
return False
|
4132
|
+
|
3161
4133
|
with self._store.cached_load():
|
3162
|
-
self.
|
3163
|
-
|
3164
|
-
|
3165
|
-
|
3166
|
-
|
3167
|
-
|
3168
|
-
|
3169
|
-
|
3170
|
-
|
3171
|
-
|
3172
|
-
|
3173
|
-
|
3174
|
-
|
3175
|
-
|
3176
|
-
|
4134
|
+
sub = self.submissions[submission_idx]
|
4135
|
+
jobscript = sub.jobscripts[js_idx]
|
4136
|
+
|
4137
|
+
# check if a commands file already exists, first checking using the run ID:
|
4138
|
+
cmd_file_name = f"{run.id_}{jobscript.shell.JS_EXT}" # TODO: refactor
|
4139
|
+
cmd_file_path = jobscript.submission.commands_path / cmd_file_name
|
4140
|
+
|
4141
|
+
if not cmd_file_path.is_file():
|
4142
|
+
# then check for a file from the "root" run ID (the run ID of a run that
|
4143
|
+
# shares the same commands file):
|
4144
|
+
|
4145
|
+
cmd_file_name = (
|
4146
|
+
f"{run.commands_file_ID}{jobscript.shell.JS_EXT}" # TODO: refactor
|
4147
|
+
)
|
4148
|
+
cmd_file_path = jobscript.submission.commands_path / cmd_file_name
|
4149
|
+
|
4150
|
+
if not cmd_file_path.is_file():
|
4151
|
+
# no file available, so write (using the run ID):
|
4152
|
+
try:
|
4153
|
+
cmd_file_path = run.try_write_commands(
|
4154
|
+
jobscript=jobscript,
|
4155
|
+
environments=sub.environments,
|
4156
|
+
raise_on_unset=True,
|
4157
|
+
)
|
4158
|
+
except OutputFileParserNoOutputError:
|
4159
|
+
# no commands to write, might be used just for saving files
|
4160
|
+
return False
|
4161
|
+
|
4162
|
+
return cmd_file_path
|
3177
4163
|
|
3178
4164
|
def process_shell_parameter_output(
|
3179
4165
|
self, name: str, value: str, EAR_ID: int, cmd_idx: int, stderr: bool = False
|
@@ -3257,9 +4243,11 @@ class Workflow(AppAware):
|
|
3257
4243
|
input_source.task_ref = uniq_names_cur[input_source.task_ref]
|
3258
4244
|
except KeyError:
|
3259
4245
|
raise InvalidInputSourceTaskReference(
|
3260
|
-
|
4246
|
+
f"Input source {input_source.to_string()!r} refers to a missing "
|
4247
|
+
f"or inaccessible task: {input_source.task_ref!r}."
|
3261
4248
|
)
|
3262
4249
|
|
4250
|
+
@TimeIt.decorator
|
3263
4251
|
def get_all_submission_run_IDs(self) -> Iterable[int]:
|
3264
4252
|
"""
|
3265
4253
|
Get the run IDs of all submissions.
|
@@ -3268,68 +4256,6 @@ class Workflow(AppAware):
|
|
3268
4256
|
for sub in self.submissions:
|
3269
4257
|
yield from sub.all_EAR_IDs
|
3270
4258
|
|
3271
|
-
def check_loop_termination(self, loop_name: str, run_ID: int) -> None:
|
3272
|
-
"""Check if a loop should terminate, given the specified completed run, and if so,
|
3273
|
-
set downstream iteration runs to be skipped."""
|
3274
|
-
loop = self.loops.get(loop_name)
|
3275
|
-
elem_iter = self.get_EARs_from_IDs(run_ID).element_iteration
|
3276
|
-
if loop.test_termination(elem_iter):
|
3277
|
-
# run IDs of downstream iterations that can be skipped
|
3278
|
-
to_skip: set[int] = set()
|
3279
|
-
elem_id = elem_iter.element.id_
|
3280
|
-
loop_map = self.get_loop_map() # over all jobscripts
|
3281
|
-
for iter_idx, iter_dat in loop_map[loop_name][elem_id].items():
|
3282
|
-
if iter_idx > elem_iter.index:
|
3283
|
-
to_skip.update(itr_d.id_ for itr_d in iter_dat)
|
3284
|
-
self._app.logger.info(
|
3285
|
-
f"Loop {loop_name!r} termination condition met for run_ID {run_ID!r}."
|
3286
|
-
)
|
3287
|
-
for run_ID in to_skip:
|
3288
|
-
self.set_EAR_skip(run_ID)
|
3289
|
-
|
3290
|
-
def get_loop_map(
|
3291
|
-
self, id_lst: Iterable[int] | None = None
|
3292
|
-
) -> Mapping[str, Mapping[int, Mapping[int, Sequence[_IterationData]]]]:
|
3293
|
-
"""
|
3294
|
-
Get a description of what is going on with looping.
|
3295
|
-
"""
|
3296
|
-
# TODO: test this works across multiple jobscripts
|
3297
|
-
self._app.persistence_logger.debug("Workflow.get_loop_map")
|
3298
|
-
if id_lst is None:
|
3299
|
-
id_lst = self.get_all_submission_run_IDs()
|
3300
|
-
loop_map: dict[str, dict[int, dict[int, list[_IterationData]]]] = defaultdict(
|
3301
|
-
lambda: defaultdict(lambda: defaultdict(list))
|
3302
|
-
)
|
3303
|
-
for EAR in self.get_EARs_from_IDs(id_lst):
|
3304
|
-
for loop_name, iter_idx in EAR.element_iteration.loop_idx.items():
|
3305
|
-
act_idx = EAR.element_action.action_idx
|
3306
|
-
loop_map[loop_name][EAR.element.id_][iter_idx].append(
|
3307
|
-
_IterationData(EAR.id_, act_idx)
|
3308
|
-
)
|
3309
|
-
return loop_map
|
3310
|
-
|
3311
|
-
def get_iteration_final_run_IDs(
|
3312
|
-
self,
|
3313
|
-
id_lst: Iterable[int] | None = None,
|
3314
|
-
) -> Mapping[str, Sequence[int]]:
|
3315
|
-
"""Retrieve the run IDs of those runs that correspond to the final action within
|
3316
|
-
a named loop iteration.
|
3317
|
-
|
3318
|
-
These runs represent the final action of a given element-iteration; this is used to
|
3319
|
-
identify which commands file to append a loop-termination check to.
|
3320
|
-
"""
|
3321
|
-
self._app.persistence_logger.debug("Workflow.get_iteration_final_run_IDs")
|
3322
|
-
|
3323
|
-
loop_map = self.get_loop_map(id_lst)
|
3324
|
-
|
3325
|
-
# find final EARs for each loop:
|
3326
|
-
final_runs: dict[str, list[int]] = defaultdict(list)
|
3327
|
-
for loop_name, dat in loop_map.items():
|
3328
|
-
for elem_dat in dat.values():
|
3329
|
-
for iter_dat in elem_dat.values():
|
3330
|
-
final_runs[loop_name].append(max(iter_dat, key=lambda x: x.idx).id_)
|
3331
|
-
return final_runs
|
3332
|
-
|
3333
4259
|
def rechunk_runs(
|
3334
4260
|
self,
|
3335
4261
|
chunk_size: int | None = None,
|
@@ -3348,7 +4274,7 @@ class Workflow(AppAware):
|
|
3348
4274
|
status: bool = True,
|
3349
4275
|
):
|
3350
4276
|
"""
|
3351
|
-
Reorganise the stored data chunks for
|
4277
|
+
Reorganise the stored data chunks for parameters to be more efficient.
|
3352
4278
|
"""
|
3353
4279
|
self._store.rechunk_parameter_base(
|
3354
4280
|
chunk_size=chunk_size, backup=backup, status=status
|
@@ -3366,6 +4292,311 @@ class Workflow(AppAware):
|
|
3366
4292
|
self.rechunk_runs(chunk_size=chunk_size, backup=backup, status=status)
|
3367
4293
|
self.rechunk_parameter_base(chunk_size=chunk_size, backup=backup, status=status)
|
3368
4294
|
|
4295
|
+
@TimeIt.decorator
|
4296
|
+
def get_run_directories(
|
4297
|
+
self,
|
4298
|
+
run_ids: list[int] | None = None,
|
4299
|
+
dir_indices_arr: np.ndarray | None = None,
|
4300
|
+
) -> list[Path | None]:
|
4301
|
+
""""""
|
4302
|
+
|
4303
|
+
@TimeIt.decorator
|
4304
|
+
def _get_depth_dirs(
|
4305
|
+
item_idx: int,
|
4306
|
+
max_per_dir: int,
|
4307
|
+
max_depth: int,
|
4308
|
+
depth_idx_cache: dict[tuple[int, int], NDArray],
|
4309
|
+
prefix: str,
|
4310
|
+
) -> list[str]:
|
4311
|
+
dirs = []
|
4312
|
+
max_avail_items = max_per_dir**max_depth
|
4313
|
+
for depth_i in range(1, max_depth):
|
4314
|
+
tot_items_per_level = int(max_avail_items / max_per_dir**depth_i)
|
4315
|
+
key = (max_avail_items, tot_items_per_level)
|
4316
|
+
if (depth_idx := depth_idx_cache.get(key)) is None:
|
4317
|
+
depth_idx = np.repeat(
|
4318
|
+
np.arange(max_avail_items / tot_items_per_level, dtype=int),
|
4319
|
+
tot_items_per_level,
|
4320
|
+
)
|
4321
|
+
depth_idx_cache[key] = depth_idx
|
4322
|
+
idx_i = cast("NDArray", depth_idx)[item_idx]
|
4323
|
+
start_idx = idx_i * tot_items_per_level
|
4324
|
+
end_idx = start_idx + tot_items_per_level - 1
|
4325
|
+
dirs.append(f"{prefix}_{start_idx}-{end_idx}")
|
4326
|
+
return dirs
|
4327
|
+
|
4328
|
+
if dir_indices_arr is None: # TODO: document behaviour!
|
4329
|
+
dir_indices_arr = self._store.get_dirs_array()
|
4330
|
+
if run_ids is not None:
|
4331
|
+
dir_indices_arr = dir_indices_arr[run_ids]
|
4332
|
+
|
4333
|
+
# TODO: make these configurable so easier to test!
|
4334
|
+
MAX_ELEMS_PER_DIR = 1000 # TODO: configurable (add `workflow_defaults` to Config)
|
4335
|
+
MAX_ITERS_PER_DIR = 1000
|
4336
|
+
|
4337
|
+
exec_path = self.execution_path
|
4338
|
+
|
4339
|
+
# a fill value means no sub directory should be created
|
4340
|
+
T_FILL, E_FILL, I_FILL, A_FILL, R_FILL, _, _ = RUN_DIR_ARR_FILL
|
4341
|
+
|
4342
|
+
depth_idx_cache: dict[
|
4343
|
+
tuple[int, int], NDArray
|
4344
|
+
] = {} # keys are (max_avail, tot_elems_per_dir_level)
|
4345
|
+
|
4346
|
+
# format run directories:
|
4347
|
+
dirs = []
|
4348
|
+
for dir_data in dir_indices_arr:
|
4349
|
+
|
4350
|
+
# TODO: retrieve task,element,iteration,action,run dir formats from
|
4351
|
+
# (t_iID, act_idx) combo (cached)?
|
4352
|
+
|
4353
|
+
t_iID, e_idx, i_idx, _, r_idx, e_depth, i_depth = dir_data
|
4354
|
+
path_args = []
|
4355
|
+
|
4356
|
+
if t_iID != T_FILL:
|
4357
|
+
path_args.append(f"t_{t_iID}")
|
4358
|
+
|
4359
|
+
if e_idx != E_FILL:
|
4360
|
+
if e_depth > 1:
|
4361
|
+
path_args.extend(
|
4362
|
+
_get_depth_dirs(
|
4363
|
+
item_idx=e_idx,
|
4364
|
+
max_per_dir=MAX_ELEMS_PER_DIR,
|
4365
|
+
max_depth=e_depth,
|
4366
|
+
depth_idx_cache=depth_idx_cache,
|
4367
|
+
prefix="e",
|
4368
|
+
)
|
4369
|
+
)
|
4370
|
+
path_args.append(f"e_{e_idx}")
|
4371
|
+
|
4372
|
+
if i_idx != I_FILL:
|
4373
|
+
if i_depth > 1:
|
4374
|
+
path_args.extend(
|
4375
|
+
_get_depth_dirs(
|
4376
|
+
item_idx=i_idx,
|
4377
|
+
max_per_dir=MAX_ITERS_PER_DIR,
|
4378
|
+
max_depth=i_depth,
|
4379
|
+
depth_idx_cache=depth_idx_cache,
|
4380
|
+
prefix="i",
|
4381
|
+
)
|
4382
|
+
)
|
4383
|
+
path_args.append(f"i_{i_idx}")
|
4384
|
+
|
4385
|
+
if r_idx != R_FILL:
|
4386
|
+
path_args.append(f"r_{r_idx}")
|
4387
|
+
|
4388
|
+
if path_args:
|
4389
|
+
run_dir = exec_path.joinpath(*path_args)
|
4390
|
+
elif e_depth == 1:
|
4391
|
+
run_dir = exec_path
|
4392
|
+
else:
|
4393
|
+
run_dir = None
|
4394
|
+
|
4395
|
+
dirs.append(run_dir)
|
4396
|
+
|
4397
|
+
return dirs
|
4398
|
+
|
4399
|
+
@TimeIt.decorator
|
4400
|
+
def get_scheduler_job_IDs(self) -> tuple[str, ...]:
|
4401
|
+
"""Return jobscript scheduler job IDs from all submissions of this workflow."""
|
4402
|
+
return tuple(
|
4403
|
+
IDs_j for sub_i in self.submissions for IDs_j in sub_i.get_scheduler_job_IDs()
|
4404
|
+
)
|
4405
|
+
|
4406
|
+
@TimeIt.decorator
|
4407
|
+
def get_process_IDs(self) -> tuple[int, ...]:
|
4408
|
+
"""Return jobscript process IDs from all submissions of this workflow."""
|
4409
|
+
return tuple(
|
4410
|
+
IDs_j for sub_i in self.submissions for IDs_j in sub_i.get_process_IDs()
|
4411
|
+
)
|
4412
|
+
|
4413
|
+
@TimeIt.decorator
|
4414
|
+
def list_jobscripts(
|
4415
|
+
self,
|
4416
|
+
sub_idx: int = 0,
|
4417
|
+
max_js: int | None = None,
|
4418
|
+
jobscripts: list[int] | None = None,
|
4419
|
+
width: int | None = None,
|
4420
|
+
) -> None:
|
4421
|
+
"""Print a table listing jobscripts and associated information from the specified
|
4422
|
+
submission.
|
4423
|
+
|
4424
|
+
Parameters
|
4425
|
+
----------
|
4426
|
+
sub_idx
|
4427
|
+
The submission index whose jobscripts are to be displayed.
|
4428
|
+
max_js
|
4429
|
+
Maximum jobscript index to display. This cannot be specified with `jobscripts`.
|
4430
|
+
jobscripts
|
4431
|
+
A list of jobscripts to display. This cannot be specified with `max_js`.
|
4432
|
+
width
|
4433
|
+
Width in characters of the printed table.
|
4434
|
+
"""
|
4435
|
+
|
4436
|
+
with self._store.cached_load():
|
4437
|
+
|
4438
|
+
if max_js is not None and jobscripts is not None:
|
4439
|
+
raise ValueError("Do not specify both `max_js` and `jobscripts`.")
|
4440
|
+
|
4441
|
+
loop_names = [i.name for i in self.loops][::-1]
|
4442
|
+
loop_names_panel: rich.panel.Panel | str = ""
|
4443
|
+
if loop_names:
|
4444
|
+
loop_names_panel = rich.panel.Panel(
|
4445
|
+
"\n".join(f"{idx}: {i}" for idx, i in enumerate(loop_names)),
|
4446
|
+
title="[b]Loops[/b]",
|
4447
|
+
title_align="left",
|
4448
|
+
box=rich.box.SIMPLE,
|
4449
|
+
)
|
4450
|
+
|
4451
|
+
table = rich.table.Table(width=width)
|
4452
|
+
|
4453
|
+
table.add_column("Jobscript", justify="right", style="cyan", no_wrap=True)
|
4454
|
+
table.add_column("Acts, Elms", justify="right", style="green")
|
4455
|
+
table.add_column("Deps.", style="orange3")
|
4456
|
+
table.add_column("Tasks", overflow="fold")
|
4457
|
+
table.add_column("Loops")
|
4458
|
+
|
4459
|
+
sub_js = self.submissions[sub_idx].jobscripts
|
4460
|
+
max_js = max_js if max_js is not None else len(sub_js)
|
4461
|
+
for js in sub_js:
|
4462
|
+
if jobscripts is not None and js.index not in jobscripts:
|
4463
|
+
continue
|
4464
|
+
if js.index > max_js:
|
4465
|
+
break
|
4466
|
+
for blk in js.blocks:
|
4467
|
+
blk_task_actions = blk.task_actions
|
4468
|
+
num_actions = blk_task_actions.shape[0]
|
4469
|
+
|
4470
|
+
if blk.index == 0:
|
4471
|
+
c1 = f"{js.index} - {blk.index}"
|
4472
|
+
else:
|
4473
|
+
c1 = f"{blk.index}"
|
4474
|
+
c3 = f"{num_actions}, {blk.num_elements}"
|
4475
|
+
|
4476
|
+
deps = "; ".join(f"{i[0],i[1]}" for i in blk.dependencies)
|
4477
|
+
|
4478
|
+
for blk_t_idx, t_iID in enumerate(blk.task_insert_IDs):
|
4479
|
+
|
4480
|
+
# loop indices are the same for all actions within a task, so get the
|
4481
|
+
# first `task_action` for this task insert ID:
|
4482
|
+
for i in blk_task_actions:
|
4483
|
+
if i[0] == t_iID:
|
4484
|
+
loop_idx = [
|
4485
|
+
blk.task_loop_idx[i[2]].get(loop_name_i, "-")
|
4486
|
+
for loop_name_i in loop_names
|
4487
|
+
]
|
4488
|
+
break
|
4489
|
+
|
4490
|
+
c2 = self.tasks.get(insert_ID=t_iID).unique_name
|
4491
|
+
|
4492
|
+
if blk_t_idx > 0:
|
4493
|
+
c1 = ""
|
4494
|
+
c3 = ""
|
4495
|
+
deps = ""
|
4496
|
+
|
4497
|
+
table.add_row(
|
4498
|
+
c1, c3, deps, c2, (" | ".join(f"{i}" for i in loop_idx))
|
4499
|
+
)
|
4500
|
+
|
4501
|
+
table.add_section()
|
4502
|
+
|
4503
|
+
group = rich.console.Group(
|
4504
|
+
rich.text.Text(f"Workflow: {self.name}"),
|
4505
|
+
rich.text.Text(f"Submission: {sub_idx}" + ("\n" if loop_names_panel else "")),
|
4506
|
+
loop_names_panel,
|
4507
|
+
table,
|
4508
|
+
)
|
4509
|
+
rich_print(group)
|
4510
|
+
|
4511
|
+
def list_task_jobscripts(
|
4512
|
+
self,
|
4513
|
+
sub_idx: int = 0,
|
4514
|
+
task_names: list[str] | None = None,
|
4515
|
+
max_js: int | None = None,
|
4516
|
+
width: int | None = None,
|
4517
|
+
):
|
4518
|
+
"""Print a table listing the jobscripts associated with the specified (or all)
|
4519
|
+
tasks for the specified submission.
|
4520
|
+
|
4521
|
+
Parameters
|
4522
|
+
----------
|
4523
|
+
sub_idx
|
4524
|
+
The submission index whose jobscripts are to be displayed.
|
4525
|
+
task_names
|
4526
|
+
List of sub-strings to match to task names. Only matching task names will be
|
4527
|
+
included.
|
4528
|
+
max_js
|
4529
|
+
Maximum jobscript index to display.
|
4530
|
+
width
|
4531
|
+
Width in characters of the printed table.
|
4532
|
+
"""
|
4533
|
+
|
4534
|
+
with self._store.cached_load():
|
4535
|
+
loop_names = [i.name for i in self.loops][::-1]
|
4536
|
+
loop_names_panel: rich.panel.Panel | str = ""
|
4537
|
+
if loop_names:
|
4538
|
+
loop_names_panel = rich.panel.Panel(
|
4539
|
+
"\n".join(f"{idx}: {i}" for idx, i in enumerate(loop_names)),
|
4540
|
+
title="[b]Loops[/b]",
|
4541
|
+
title_align="left",
|
4542
|
+
box=rich.box.SIMPLE,
|
4543
|
+
)
|
4544
|
+
|
4545
|
+
sub_js = self.submissions[sub_idx].jobscripts
|
4546
|
+
all_task_names = {i.insert_ID: i.unique_name for i in self.tasks}
|
4547
|
+
|
4548
|
+
# filter task names by those matching the specified names
|
4549
|
+
matched = all_task_names
|
4550
|
+
if task_names:
|
4551
|
+
matched = {
|
4552
|
+
k: v
|
4553
|
+
for k, v in all_task_names.items()
|
4554
|
+
if any(i in v for i in task_names)
|
4555
|
+
}
|
4556
|
+
|
4557
|
+
task_jobscripts = defaultdict(list)
|
4558
|
+
for js in sub_js:
|
4559
|
+
if max_js is not None and js.index > max_js:
|
4560
|
+
break
|
4561
|
+
for blk in js.blocks:
|
4562
|
+
blk_task_actions = blk.task_actions
|
4563
|
+
for i in blk.task_insert_IDs:
|
4564
|
+
if i in matched:
|
4565
|
+
for j in blk_task_actions:
|
4566
|
+
if j[0] == i:
|
4567
|
+
loop_idx = [
|
4568
|
+
blk.task_loop_idx[j[2]].get(loop_name_i, "-")
|
4569
|
+
for loop_name_i in loop_names
|
4570
|
+
]
|
4571
|
+
break
|
4572
|
+
task_jobscripts[i].append((js.index, blk.index, loop_idx))
|
4573
|
+
|
4574
|
+
table = rich.table.Table(width=width)
|
4575
|
+
table.add_column("Task")
|
4576
|
+
table.add_column("Jobscripts", style="cyan", no_wrap=True)
|
4577
|
+
table.add_column("Loops")
|
4578
|
+
for insert_ID_i, jobscripts_i in task_jobscripts.items():
|
4579
|
+
for idx, js_j in enumerate(jobscripts_i):
|
4580
|
+
js_idx, blk_idx, loop_idx = js_j
|
4581
|
+
table.add_row(
|
4582
|
+
matched[insert_ID_i] if idx == 0 else "",
|
4583
|
+
f"({js_idx}, {blk_idx})",
|
4584
|
+
(" | ".join(f"{i}" for i in loop_idx)),
|
4585
|
+
)
|
4586
|
+
table.add_section()
|
4587
|
+
|
4588
|
+
group = rich.console.Group(
|
4589
|
+
rich.text.Text(f"Workflow: {self.name}"),
|
4590
|
+
rich.text.Text(f"Submission: {sub_idx}" + ("\n" if loop_names_panel else "")),
|
4591
|
+
loop_names_panel,
|
4592
|
+
table,
|
4593
|
+
)
|
4594
|
+
rich_print(group)
|
4595
|
+
|
4596
|
+
def get_text_file(self, path: str | Path) -> str:
|
4597
|
+
"""Retrieve the contents of a text file stored within the workflow."""
|
4598
|
+
return self._store.get_text_file(path)
|
4599
|
+
|
3369
4600
|
|
3370
4601
|
@dataclass
|
3371
4602
|
class WorkflowBlueprint:
|