hpcflow 0.1.15__py3-none-any.whl → 0.2.0a271__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hpcflow/__init__.py +2 -11
- hpcflow/__pyinstaller/__init__.py +5 -0
- hpcflow/__pyinstaller/hook-hpcflow.py +40 -0
- hpcflow/_version.py +1 -1
- hpcflow/app.py +43 -0
- hpcflow/cli.py +2 -461
- hpcflow/data/demo_data_manifest/__init__.py +3 -0
- hpcflow/data/demo_data_manifest/demo_data_manifest.json +6 -0
- hpcflow/data/jinja_templates/test/test_template.txt +8 -0
- hpcflow/data/programs/hello_world/README.md +1 -0
- hpcflow/data/programs/hello_world/hello_world.c +87 -0
- hpcflow/data/programs/hello_world/linux/hello_world +0 -0
- hpcflow/data/programs/hello_world/macos/hello_world +0 -0
- hpcflow/data/programs/hello_world/win/hello_world.exe +0 -0
- hpcflow/data/scripts/__init__.py +1 -0
- hpcflow/data/scripts/bad_script.py +2 -0
- hpcflow/data/scripts/demo_task_1_generate_t1_infile_1.py +8 -0
- hpcflow/data/scripts/demo_task_1_generate_t1_infile_2.py +8 -0
- hpcflow/data/scripts/demo_task_1_parse_p3.py +7 -0
- hpcflow/data/scripts/do_nothing.py +2 -0
- hpcflow/data/scripts/env_specifier_test/input_file_generator_pass_env_spec.py +4 -0
- hpcflow/data/scripts/env_specifier_test/main_script_test_pass_env_spec.py +8 -0
- hpcflow/data/scripts/env_specifier_test/output_file_parser_pass_env_spec.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v1/input_file_generator_basic.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v1/main_script_test_direct_in_direct_out.py +7 -0
- hpcflow/data/scripts/env_specifier_test/v1/output_file_parser_basic.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v2/main_script_test_direct_in_direct_out.py +7 -0
- hpcflow/data/scripts/generate_t1_file_01.py +7 -0
- hpcflow/data/scripts/import_future_script.py +7 -0
- hpcflow/data/scripts/input_file_generator_basic.py +3 -0
- hpcflow/data/scripts/input_file_generator_basic_FAIL.py +3 -0
- hpcflow/data/scripts/input_file_generator_test_stdout_stderr.py +8 -0
- hpcflow/data/scripts/main_script_test_direct_in.py +3 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed_group.py +7 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_all_iters_test.py +15 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_env_spec.py +7 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_labels.py +8 -0
- hpcflow/data/scripts/main_script_test_direct_in_group_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_group_one_fail_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_sub_param_in_direct_out.py +6 -0
- hpcflow/data/scripts/main_script_test_hdf5_in_obj.py +12 -0
- hpcflow/data/scripts/main_script_test_hdf5_in_obj_2.py +12 -0
- hpcflow/data/scripts/main_script_test_hdf5_in_obj_group.py +12 -0
- hpcflow/data/scripts/main_script_test_hdf5_out_obj.py +11 -0
- hpcflow/data/scripts/main_script_test_json_and_direct_in_json_out.py +14 -0
- hpcflow/data/scripts/main_script_test_json_in_json_and_direct_out.py +17 -0
- hpcflow/data/scripts/main_script_test_json_in_json_out.py +14 -0
- hpcflow/data/scripts/main_script_test_json_in_json_out_labels.py +16 -0
- hpcflow/data/scripts/main_script_test_json_in_obj.py +12 -0
- hpcflow/data/scripts/main_script_test_json_out_FAIL.py +3 -0
- hpcflow/data/scripts/main_script_test_json_out_obj.py +10 -0
- hpcflow/data/scripts/main_script_test_json_sub_param_in_json_out_labels.py +16 -0
- hpcflow/data/scripts/main_script_test_shell_env_vars.py +12 -0
- hpcflow/data/scripts/main_script_test_std_out_std_err.py +6 -0
- hpcflow/data/scripts/output_file_parser_basic.py +3 -0
- hpcflow/data/scripts/output_file_parser_basic_FAIL.py +7 -0
- hpcflow/data/scripts/output_file_parser_test_stdout_stderr.py +8 -0
- hpcflow/data/scripts/parse_t1_file_01.py +4 -0
- hpcflow/data/scripts/script_exit_test.py +5 -0
- hpcflow/data/template_components/__init__.py +1 -0
- hpcflow/data/template_components/command_files.yaml +26 -0
- hpcflow/data/template_components/environments.yaml +13 -0
- hpcflow/data/template_components/parameters.yaml +14 -0
- hpcflow/data/template_components/task_schemas.yaml +139 -0
- hpcflow/data/workflows/workflow_1.yaml +5 -0
- hpcflow/examples.ipynb +1037 -0
- hpcflow/sdk/__init__.py +149 -0
- hpcflow/sdk/app.py +4266 -0
- hpcflow/sdk/cli.py +1479 -0
- hpcflow/sdk/cli_common.py +385 -0
- hpcflow/sdk/config/__init__.py +5 -0
- hpcflow/sdk/config/callbacks.py +246 -0
- hpcflow/sdk/config/cli.py +388 -0
- hpcflow/sdk/config/config.py +1410 -0
- hpcflow/sdk/config/config_file.py +501 -0
- hpcflow/sdk/config/errors.py +272 -0
- hpcflow/sdk/config/types.py +150 -0
- hpcflow/sdk/core/__init__.py +38 -0
- hpcflow/sdk/core/actions.py +3857 -0
- hpcflow/sdk/core/app_aware.py +25 -0
- hpcflow/sdk/core/cache.py +224 -0
- hpcflow/sdk/core/command_files.py +814 -0
- hpcflow/sdk/core/commands.py +424 -0
- hpcflow/sdk/core/element.py +2071 -0
- hpcflow/sdk/core/enums.py +221 -0
- hpcflow/sdk/core/environment.py +256 -0
- hpcflow/sdk/core/errors.py +1043 -0
- hpcflow/sdk/core/execute.py +207 -0
- hpcflow/sdk/core/json_like.py +809 -0
- hpcflow/sdk/core/loop.py +1320 -0
- hpcflow/sdk/core/loop_cache.py +282 -0
- hpcflow/sdk/core/object_list.py +933 -0
- hpcflow/sdk/core/parameters.py +3371 -0
- hpcflow/sdk/core/rule.py +196 -0
- hpcflow/sdk/core/run_dir_files.py +57 -0
- hpcflow/sdk/core/skip_reason.py +7 -0
- hpcflow/sdk/core/task.py +3792 -0
- hpcflow/sdk/core/task_schema.py +993 -0
- hpcflow/sdk/core/test_utils.py +538 -0
- hpcflow/sdk/core/types.py +447 -0
- hpcflow/sdk/core/utils.py +1207 -0
- hpcflow/sdk/core/validation.py +87 -0
- hpcflow/sdk/core/values.py +477 -0
- hpcflow/sdk/core/workflow.py +4820 -0
- hpcflow/sdk/core/zarr_io.py +206 -0
- hpcflow/sdk/data/__init__.py +13 -0
- hpcflow/sdk/data/config_file_schema.yaml +34 -0
- hpcflow/sdk/data/config_schema.yaml +260 -0
- hpcflow/sdk/data/environments_spec_schema.yaml +21 -0
- hpcflow/sdk/data/files_spec_schema.yaml +5 -0
- hpcflow/sdk/data/parameters_spec_schema.yaml +7 -0
- hpcflow/sdk/data/task_schema_spec_schema.yaml +3 -0
- hpcflow/sdk/data/workflow_spec_schema.yaml +22 -0
- hpcflow/sdk/demo/__init__.py +3 -0
- hpcflow/sdk/demo/cli.py +242 -0
- hpcflow/sdk/helper/__init__.py +3 -0
- hpcflow/sdk/helper/cli.py +137 -0
- hpcflow/sdk/helper/helper.py +300 -0
- hpcflow/sdk/helper/watcher.py +192 -0
- hpcflow/sdk/log.py +288 -0
- hpcflow/sdk/persistence/__init__.py +18 -0
- hpcflow/sdk/persistence/base.py +2817 -0
- hpcflow/sdk/persistence/defaults.py +6 -0
- hpcflow/sdk/persistence/discovery.py +39 -0
- hpcflow/sdk/persistence/json.py +954 -0
- hpcflow/sdk/persistence/pending.py +948 -0
- hpcflow/sdk/persistence/store_resource.py +203 -0
- hpcflow/sdk/persistence/types.py +309 -0
- hpcflow/sdk/persistence/utils.py +73 -0
- hpcflow/sdk/persistence/zarr.py +2388 -0
- hpcflow/sdk/runtime.py +320 -0
- hpcflow/sdk/submission/__init__.py +3 -0
- hpcflow/sdk/submission/enums.py +70 -0
- hpcflow/sdk/submission/jobscript.py +2379 -0
- hpcflow/sdk/submission/schedulers/__init__.py +281 -0
- hpcflow/sdk/submission/schedulers/direct.py +233 -0
- hpcflow/sdk/submission/schedulers/sge.py +376 -0
- hpcflow/sdk/submission/schedulers/slurm.py +598 -0
- hpcflow/sdk/submission/schedulers/utils.py +25 -0
- hpcflow/sdk/submission/shells/__init__.py +52 -0
- hpcflow/sdk/submission/shells/base.py +229 -0
- hpcflow/sdk/submission/shells/bash.py +504 -0
- hpcflow/sdk/submission/shells/os_version.py +115 -0
- hpcflow/sdk/submission/shells/powershell.py +352 -0
- hpcflow/sdk/submission/submission.py +1402 -0
- hpcflow/sdk/submission/types.py +140 -0
- hpcflow/sdk/typing.py +194 -0
- hpcflow/sdk/utils/arrays.py +69 -0
- hpcflow/sdk/utils/deferred_file.py +55 -0
- hpcflow/sdk/utils/hashing.py +16 -0
- hpcflow/sdk/utils/patches.py +31 -0
- hpcflow/sdk/utils/strings.py +69 -0
- hpcflow/tests/api/test_api.py +32 -0
- hpcflow/tests/conftest.py +123 -0
- hpcflow/tests/data/__init__.py +0 -0
- hpcflow/tests/data/benchmark_N_elements.yaml +6 -0
- hpcflow/tests/data/benchmark_script_runner.yaml +26 -0
- hpcflow/tests/data/multi_path_sequences.yaml +29 -0
- hpcflow/tests/data/workflow_1.json +10 -0
- hpcflow/tests/data/workflow_1.yaml +5 -0
- hpcflow/tests/data/workflow_1_slurm.yaml +8 -0
- hpcflow/tests/data/workflow_1_wsl.yaml +8 -0
- hpcflow/tests/data/workflow_test_run_abort.yaml +42 -0
- hpcflow/tests/jinja_templates/test_jinja_templates.py +161 -0
- hpcflow/tests/programs/test_programs.py +180 -0
- hpcflow/tests/schedulers/direct_linux/test_direct_linux_submission.py +12 -0
- hpcflow/tests/schedulers/sge/test_sge_submission.py +36 -0
- hpcflow/tests/schedulers/slurm/test_slurm_submission.py +14 -0
- hpcflow/tests/scripts/test_input_file_generators.py +282 -0
- hpcflow/tests/scripts/test_main_scripts.py +1361 -0
- hpcflow/tests/scripts/test_non_snippet_script.py +46 -0
- hpcflow/tests/scripts/test_ouput_file_parsers.py +353 -0
- hpcflow/tests/shells/wsl/test_wsl_submission.py +14 -0
- hpcflow/tests/unit/test_action.py +1066 -0
- hpcflow/tests/unit/test_action_rule.py +24 -0
- hpcflow/tests/unit/test_app.py +132 -0
- hpcflow/tests/unit/test_cache.py +46 -0
- hpcflow/tests/unit/test_cli.py +172 -0
- hpcflow/tests/unit/test_command.py +377 -0
- hpcflow/tests/unit/test_config.py +195 -0
- hpcflow/tests/unit/test_config_file.py +162 -0
- hpcflow/tests/unit/test_element.py +666 -0
- hpcflow/tests/unit/test_element_iteration.py +88 -0
- hpcflow/tests/unit/test_element_set.py +158 -0
- hpcflow/tests/unit/test_group.py +115 -0
- hpcflow/tests/unit/test_input_source.py +1479 -0
- hpcflow/tests/unit/test_input_value.py +398 -0
- hpcflow/tests/unit/test_jobscript_unit.py +757 -0
- hpcflow/tests/unit/test_json_like.py +1247 -0
- hpcflow/tests/unit/test_loop.py +2674 -0
- hpcflow/tests/unit/test_meta_task.py +325 -0
- hpcflow/tests/unit/test_multi_path_sequences.py +259 -0
- hpcflow/tests/unit/test_object_list.py +116 -0
- hpcflow/tests/unit/test_parameter.py +243 -0
- hpcflow/tests/unit/test_persistence.py +664 -0
- hpcflow/tests/unit/test_resources.py +243 -0
- hpcflow/tests/unit/test_run.py +286 -0
- hpcflow/tests/unit/test_run_directories.py +29 -0
- hpcflow/tests/unit/test_runtime.py +9 -0
- hpcflow/tests/unit/test_schema_input.py +372 -0
- hpcflow/tests/unit/test_shell.py +129 -0
- hpcflow/tests/unit/test_slurm.py +39 -0
- hpcflow/tests/unit/test_submission.py +502 -0
- hpcflow/tests/unit/test_task.py +2560 -0
- hpcflow/tests/unit/test_task_schema.py +182 -0
- hpcflow/tests/unit/test_utils.py +616 -0
- hpcflow/tests/unit/test_value_sequence.py +549 -0
- hpcflow/tests/unit/test_values.py +91 -0
- hpcflow/tests/unit/test_workflow.py +827 -0
- hpcflow/tests/unit/test_workflow_template.py +186 -0
- hpcflow/tests/unit/utils/test_arrays.py +40 -0
- hpcflow/tests/unit/utils/test_deferred_file_writer.py +34 -0
- hpcflow/tests/unit/utils/test_hashing.py +65 -0
- hpcflow/tests/unit/utils/test_patches.py +5 -0
- hpcflow/tests/unit/utils/test_redirect_std.py +50 -0
- hpcflow/tests/unit/utils/test_strings.py +97 -0
- hpcflow/tests/workflows/__init__.py +0 -0
- hpcflow/tests/workflows/test_directory_structure.py +31 -0
- hpcflow/tests/workflows/test_jobscript.py +355 -0
- hpcflow/tests/workflows/test_run_status.py +198 -0
- hpcflow/tests/workflows/test_skip_downstream.py +696 -0
- hpcflow/tests/workflows/test_submission.py +140 -0
- hpcflow/tests/workflows/test_workflows.py +564 -0
- hpcflow/tests/workflows/test_zip.py +18 -0
- hpcflow/viz_demo.ipynb +6794 -0
- hpcflow-0.2.0a271.dist-info/LICENSE +375 -0
- hpcflow-0.2.0a271.dist-info/METADATA +65 -0
- hpcflow-0.2.0a271.dist-info/RECORD +237 -0
- {hpcflow-0.1.15.dist-info → hpcflow-0.2.0a271.dist-info}/WHEEL +4 -5
- hpcflow-0.2.0a271.dist-info/entry_points.txt +6 -0
- hpcflow/api.py +0 -490
- hpcflow/archive/archive.py +0 -307
- hpcflow/archive/cloud/cloud.py +0 -45
- hpcflow/archive/cloud/errors.py +0 -9
- hpcflow/archive/cloud/providers/dropbox.py +0 -427
- hpcflow/archive/errors.py +0 -5
- hpcflow/base_db.py +0 -4
- hpcflow/config.py +0 -233
- hpcflow/copytree.py +0 -66
- hpcflow/data/examples/_config.yml +0 -14
- hpcflow/data/examples/damask/demo/1.run.yml +0 -4
- hpcflow/data/examples/damask/demo/2.process.yml +0 -29
- hpcflow/data/examples/damask/demo/geom.geom +0 -2052
- hpcflow/data/examples/damask/demo/load.load +0 -1
- hpcflow/data/examples/damask/demo/material.config +0 -185
- hpcflow/data/examples/damask/inputs/geom.geom +0 -2052
- hpcflow/data/examples/damask/inputs/load.load +0 -1
- hpcflow/data/examples/damask/inputs/material.config +0 -185
- hpcflow/data/examples/damask/profiles/_variable_lookup.yml +0 -21
- hpcflow/data/examples/damask/profiles/damask.yml +0 -4
- hpcflow/data/examples/damask/profiles/damask_process.yml +0 -8
- hpcflow/data/examples/damask/profiles/damask_run.yml +0 -5
- hpcflow/data/examples/damask/profiles/default.yml +0 -6
- hpcflow/data/examples/thinking.yml +0 -177
- hpcflow/errors.py +0 -2
- hpcflow/init_db.py +0 -37
- hpcflow/models.py +0 -2595
- hpcflow/nesting.py +0 -9
- hpcflow/profiles.py +0 -455
- hpcflow/project.py +0 -81
- hpcflow/scheduler.py +0 -322
- hpcflow/utils.py +0 -103
- hpcflow/validation.py +0 -166
- hpcflow/variables.py +0 -543
- hpcflow-0.1.15.dist-info/METADATA +0 -168
- hpcflow-0.1.15.dist-info/RECORD +0 -45
- hpcflow-0.1.15.dist-info/entry_points.txt +0 -8
- hpcflow-0.1.15.dist-info/top_level.txt +0 -1
- /hpcflow/{archive → data/jinja_templates}/__init__.py +0 -0
- /hpcflow/{archive/cloud → data/programs}/__init__.py +0 -0
- /hpcflow/{archive/cloud/providers → data/workflows}/__init__.py +0 -0
|
@@ -0,0 +1,4820 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Main workflow model.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
from collections import defaultdict
|
|
7
|
+
from collections.abc import Callable
|
|
8
|
+
from contextlib import contextmanager, nullcontext
|
|
9
|
+
import copy
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
|
|
12
|
+
from functools import wraps
|
|
13
|
+
import os
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
import random
|
|
16
|
+
import shutil
|
|
17
|
+
import string
|
|
18
|
+
from threading import Thread
|
|
19
|
+
import time
|
|
20
|
+
from typing import overload, cast, TYPE_CHECKING, TypeVar
|
|
21
|
+
from typing_extensions import ParamSpec, Concatenate
|
|
22
|
+
|
|
23
|
+
from uuid import uuid4
|
|
24
|
+
from warnings import warn
|
|
25
|
+
from fsspec.implementations.local import LocalFileSystem # type: ignore
|
|
26
|
+
from fsspec.implementations.zip import ZipFileSystem # type: ignore
|
|
27
|
+
import numpy as np
|
|
28
|
+
from fsspec.core import url_to_fs # type: ignore
|
|
29
|
+
from rich import print as rich_print
|
|
30
|
+
import rich.console
|
|
31
|
+
import rich.panel
|
|
32
|
+
import rich.table
|
|
33
|
+
import rich.text
|
|
34
|
+
import rich.box
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
from hpcflow.sdk import app
|
|
38
|
+
from hpcflow.sdk.typing import hydrate
|
|
39
|
+
from hpcflow.sdk.config.errors import (
|
|
40
|
+
ConfigNonConfigurableError,
|
|
41
|
+
UnknownMetaTaskConstitutiveSchema,
|
|
42
|
+
)
|
|
43
|
+
from hpcflow.sdk.core import (
|
|
44
|
+
ALL_TEMPLATE_FORMATS,
|
|
45
|
+
ABORT_EXIT_CODE,
|
|
46
|
+
RUN_DIR_ARR_FILL,
|
|
47
|
+
SKIPPED_EXIT_CODE,
|
|
48
|
+
NO_COMMANDS_EXIT_CODE,
|
|
49
|
+
)
|
|
50
|
+
from hpcflow.sdk.core.app_aware import AppAware
|
|
51
|
+
from hpcflow.sdk.core.enums import EARStatus
|
|
52
|
+
from hpcflow.sdk.core.skip_reason import SkipReason
|
|
53
|
+
from hpcflow.sdk.core.cache import ObjectCache
|
|
54
|
+
from hpcflow.sdk.core.loop_cache import LoopCache, LoopIndex
|
|
55
|
+
from hpcflow.sdk.log import TimeIt
|
|
56
|
+
from hpcflow.sdk.persistence import store_cls_from_str
|
|
57
|
+
from hpcflow.sdk.persistence.defaults import DEFAULT_STORE_FORMAT
|
|
58
|
+
from hpcflow.sdk.persistence.base import TEMPLATE_COMP_TYPES
|
|
59
|
+
from hpcflow.sdk.persistence.utils import ask_pw_on_auth_exc, infer_store
|
|
60
|
+
from hpcflow.sdk.submission.jobscript import (
|
|
61
|
+
generate_EAR_resource_map,
|
|
62
|
+
group_resource_map_into_jobscripts,
|
|
63
|
+
is_jobscript_array,
|
|
64
|
+
merge_jobscripts_across_tasks,
|
|
65
|
+
resolve_jobscript_blocks,
|
|
66
|
+
resolve_jobscript_dependencies,
|
|
67
|
+
)
|
|
68
|
+
from hpcflow.sdk.submission.enums import JobscriptElementState
|
|
69
|
+
from hpcflow.sdk.submission.schedulers.direct import DirectScheduler
|
|
70
|
+
from hpcflow.sdk.submission.submission import Submission
|
|
71
|
+
from hpcflow.sdk.core.json_like import ChildObjectSpec, JSONLike
|
|
72
|
+
from hpcflow.sdk.utils.strings import shorten_list_str
|
|
73
|
+
from hpcflow.sdk.core.utils import (
|
|
74
|
+
read_JSON_file,
|
|
75
|
+
read_JSON_string,
|
|
76
|
+
read_YAML_str,
|
|
77
|
+
read_YAML_file,
|
|
78
|
+
redirect_std_to_file,
|
|
79
|
+
replace_items,
|
|
80
|
+
current_timestamp,
|
|
81
|
+
normalise_timestamp,
|
|
82
|
+
parse_timestamp,
|
|
83
|
+
)
|
|
84
|
+
from hpcflow.sdk.core.errors import (
|
|
85
|
+
InvalidInputSourceTaskReference,
|
|
86
|
+
LoopAlreadyExistsError,
|
|
87
|
+
OutputFileParserNoOutputError,
|
|
88
|
+
RunNotAbortableError,
|
|
89
|
+
SubmissionFailure,
|
|
90
|
+
UnsetParameterDataErrorBase,
|
|
91
|
+
WorkflowSubmissionFailure,
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
if TYPE_CHECKING:
|
|
95
|
+
from collections.abc import Iterable, Iterator, Mapping, Sequence
|
|
96
|
+
from contextlib import AbstractContextManager
|
|
97
|
+
from typing import Any, ClassVar, Literal
|
|
98
|
+
from typing_extensions import Self, TypeAlias
|
|
99
|
+
from numpy.typing import NDArray
|
|
100
|
+
import psutil
|
|
101
|
+
from rich.status import Status
|
|
102
|
+
from ..typing import DataIndex, ParamSource, PathLike, TemplateComponents
|
|
103
|
+
from .actions import ElementActionRun, UnsetParamTracker
|
|
104
|
+
from .element import Element, ElementIteration
|
|
105
|
+
from .loop import Loop, WorkflowLoop
|
|
106
|
+
from .object_list import ObjectList, ResourceList, WorkflowLoopList, WorkflowTaskList
|
|
107
|
+
from .parameters import InputSource, ResourceSpec
|
|
108
|
+
from .task import Task, WorkflowTask
|
|
109
|
+
from .types import (
|
|
110
|
+
AbstractFileSystem,
|
|
111
|
+
CreationInfo,
|
|
112
|
+
Pending,
|
|
113
|
+
Resources,
|
|
114
|
+
WorkflowTemplateTaskData,
|
|
115
|
+
WorkflowTemplateElementSetData,
|
|
116
|
+
BlockActionKey,
|
|
117
|
+
)
|
|
118
|
+
from ..submission.submission import Submission
|
|
119
|
+
from ..submission.jobscript import (
|
|
120
|
+
Jobscript,
|
|
121
|
+
JobScriptDescriptor,
|
|
122
|
+
JobScriptCreationArguments,
|
|
123
|
+
)
|
|
124
|
+
from ..persistence.base import (
|
|
125
|
+
StoreElement,
|
|
126
|
+
StoreElementIter,
|
|
127
|
+
StoreTask,
|
|
128
|
+
StoreParameter,
|
|
129
|
+
StoreEAR,
|
|
130
|
+
)
|
|
131
|
+
from ..persistence.types import TemplateMeta
|
|
132
|
+
from .json_like import JSONed
|
|
133
|
+
|
|
134
|
+
#: Convenience alias
|
|
135
|
+
_TemplateComponents: TypeAlias = "dict[str, ObjectList[JSONLike]]"
|
|
136
|
+
|
|
137
|
+
P = ParamSpec("P")
|
|
138
|
+
T = TypeVar("T")
|
|
139
|
+
S = TypeVar("S", bound="Workflow")
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
@dataclass
|
|
143
|
+
class _Pathway:
|
|
144
|
+
id_: int
|
|
145
|
+
names: LoopIndex[str, int] = field(default_factory=LoopIndex)
|
|
146
|
+
iter_ids: list[int] = field(default_factory=list)
|
|
147
|
+
data_idx: list[DataIndex] = field(default_factory=list)
|
|
148
|
+
|
|
149
|
+
def as_tuple(
|
|
150
|
+
self, *, ret_iter_IDs: bool = False, ret_data_idx: bool = False
|
|
151
|
+
) -> tuple:
|
|
152
|
+
if ret_iter_IDs:
|
|
153
|
+
if ret_data_idx:
|
|
154
|
+
return (self.id_, self.names, tuple(self.iter_ids), tuple(self.data_idx))
|
|
155
|
+
else:
|
|
156
|
+
return (self.id_, self.names, tuple(self.iter_ids))
|
|
157
|
+
else:
|
|
158
|
+
if ret_data_idx:
|
|
159
|
+
return (self.id_, self.names, tuple(self.data_idx))
|
|
160
|
+
else:
|
|
161
|
+
return (self.id_, self.names)
|
|
162
|
+
|
|
163
|
+
def __deepcopy__(self, memo) -> Self:
|
|
164
|
+
return self.__class__(
|
|
165
|
+
self.id_,
|
|
166
|
+
self.names,
|
|
167
|
+
copy.deepcopy(self.iter_ids, memo),
|
|
168
|
+
copy.deepcopy(self.data_idx, memo),
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
@dataclass
|
|
173
|
+
@hydrate
|
|
174
|
+
class WorkflowTemplate(JSONLike):
|
|
175
|
+
"""Class to represent initial parametrisation of a {app_name} workflow, with limited
|
|
176
|
+
validation logic.
|
|
177
|
+
|
|
178
|
+
Parameters
|
|
179
|
+
----------
|
|
180
|
+
name:
|
|
181
|
+
A string name for the workflow. By default this name will be used in combination
|
|
182
|
+
with a date-time stamp when generating a persistent workflow from the template.
|
|
183
|
+
tasks: list[~hpcflow.app.Task]
|
|
184
|
+
A list of Task objects to include in the workflow.
|
|
185
|
+
loops: list[~hpcflow.app.Loop]
|
|
186
|
+
A list of Loop objects to include in the workflow.
|
|
187
|
+
workflow:
|
|
188
|
+
The associated concrete workflow.
|
|
189
|
+
resources: dict[str, dict] | list[~hpcflow.app.ResourceSpec] | ~hpcflow.app.ResourceList
|
|
190
|
+
Template-level resources to apply to all tasks as default values. This can be a
|
|
191
|
+
dict that maps action scopes to resources (e.g. `{{"any": {{"num_cores": 2}}}}`)
|
|
192
|
+
or a list of `ResourceSpec` objects, or a `ResourceList` object.
|
|
193
|
+
environments:
|
|
194
|
+
Environment specifiers, keyed by environment name.
|
|
195
|
+
env_presets:
|
|
196
|
+
The environment presets to use.
|
|
197
|
+
source_file:
|
|
198
|
+
The file this was derived from.
|
|
199
|
+
store_kwargs:
|
|
200
|
+
Additional arguments to pass to the persistent data store constructor.
|
|
201
|
+
merge_resources:
|
|
202
|
+
If True, merge template-level `resources` into element set resources. If False,
|
|
203
|
+
template-level resources are ignored.
|
|
204
|
+
merge_envs:
|
|
205
|
+
Whether to merge the environments into task resources.
|
|
206
|
+
"""
|
|
207
|
+
|
|
208
|
+
_validation_schema: ClassVar[str] = "workflow_spec_schema.yaml"
|
|
209
|
+
|
|
210
|
+
_child_objects: ClassVar[tuple[ChildObjectSpec, ...]] = (
|
|
211
|
+
ChildObjectSpec(
|
|
212
|
+
name="tasks",
|
|
213
|
+
class_name="Task",
|
|
214
|
+
is_multiple=True,
|
|
215
|
+
parent_ref="workflow_template",
|
|
216
|
+
),
|
|
217
|
+
ChildObjectSpec(
|
|
218
|
+
name="loops",
|
|
219
|
+
class_name="Loop",
|
|
220
|
+
is_multiple=True,
|
|
221
|
+
parent_ref="_workflow_template",
|
|
222
|
+
),
|
|
223
|
+
ChildObjectSpec(
|
|
224
|
+
name="resources",
|
|
225
|
+
class_name="ResourceList",
|
|
226
|
+
parent_ref="_workflow_template",
|
|
227
|
+
),
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
#: A string name for the workflow.
|
|
231
|
+
name: str
|
|
232
|
+
#: Documentation information.
|
|
233
|
+
doc: list[str] | str | None = field(repr=False, default=None)
|
|
234
|
+
#: A list of Task objects to include in the workflow.
|
|
235
|
+
tasks: list[Task] = field(default_factory=list)
|
|
236
|
+
#: A list of Loop objects to include in the workflow.
|
|
237
|
+
loops: list[Loop] = field(default_factory=list)
|
|
238
|
+
#: The associated concrete workflow.
|
|
239
|
+
workflow: Workflow | None = None
|
|
240
|
+
#: Template-level resources to apply to all tasks as default values.
|
|
241
|
+
resources: Resources = None
|
|
242
|
+
config: dict = field(default_factory=lambda: {})
|
|
243
|
+
#: Environment specifiers, keyed by environment name.
|
|
244
|
+
environments: Mapping[str, Mapping[str, Any]] | None = None
|
|
245
|
+
#: The environment presets to use.
|
|
246
|
+
env_presets: str | list[str] | None = None
|
|
247
|
+
#: The file this was derived from.
|
|
248
|
+
source_file: str | None = field(default=None, compare=False)
|
|
249
|
+
#: Additional arguments to pass to the persistent data store constructor.
|
|
250
|
+
store_kwargs: dict[str, Any] = field(default_factory=dict)
|
|
251
|
+
#: Whether to merge template-level `resources` into element set resources.
|
|
252
|
+
merge_resources: bool = True
|
|
253
|
+
#: Whether to merge the environments into task resources.
|
|
254
|
+
merge_envs: bool = True
|
|
255
|
+
|
|
256
|
+
def __post_init__(self) -> None:
|
|
257
|
+
|
|
258
|
+
# TODO: in what scenario is the reindex required? are loops initialised?
|
|
259
|
+
|
|
260
|
+
# replace metatasks with tasks
|
|
261
|
+
new_tasks: list[Task] = []
|
|
262
|
+
do_reindex = False
|
|
263
|
+
reindex = {}
|
|
264
|
+
for task_idx, i in enumerate(self.tasks):
|
|
265
|
+
if isinstance(i, app.MetaTask):
|
|
266
|
+
do_reindex = True
|
|
267
|
+
tasks_from_meta = copy.deepcopy(i.tasks)
|
|
268
|
+
reindex[task_idx] = [
|
|
269
|
+
len(new_tasks) + i for i in range(len(tasks_from_meta))
|
|
270
|
+
]
|
|
271
|
+
new_tasks.extend(tasks_from_meta)
|
|
272
|
+
else:
|
|
273
|
+
reindex[task_idx] = [len(new_tasks)]
|
|
274
|
+
new_tasks.append(i)
|
|
275
|
+
if do_reindex:
|
|
276
|
+
if self.loops:
|
|
277
|
+
for loop_idx, loop in enumerate(cast("list[dict[str, Any]]", self.loops)):
|
|
278
|
+
loop["tasks"] = [j for i in loop["tasks"] for j in reindex[i]]
|
|
279
|
+
term_task = loop.get("termination_task")
|
|
280
|
+
if term_task is not None:
|
|
281
|
+
loop["termination_task"] = reindex[term_task][0]
|
|
282
|
+
|
|
283
|
+
self.tasks = new_tasks
|
|
284
|
+
|
|
285
|
+
resources = self._app.ResourceList.normalise(self.resources)
|
|
286
|
+
self.resources = resources
|
|
287
|
+
self._set_parent_refs()
|
|
288
|
+
|
|
289
|
+
# merge template-level `resources` into task element set resources (this mutates
|
|
290
|
+
# `tasks`, and should only happen on creation of the workflow template, not on
|
|
291
|
+
# re-initialisation from a persistent workflow):
|
|
292
|
+
if self.merge_resources:
|
|
293
|
+
for task in self.tasks:
|
|
294
|
+
for element_set in task.element_sets:
|
|
295
|
+
element_set.resources.merge_other(resources)
|
|
296
|
+
self.merge_resources = False
|
|
297
|
+
|
|
298
|
+
if self.merge_envs:
|
|
299
|
+
self._merge_envs_into_task_resources()
|
|
300
|
+
|
|
301
|
+
if self.doc and not isinstance(self.doc, list):
|
|
302
|
+
self.doc = [self.doc]
|
|
303
|
+
|
|
304
|
+
if self.config:
|
|
305
|
+
# don't do a full validation (which would require loading the config file),
|
|
306
|
+
# just check all specified keys are configurable:
|
|
307
|
+
bad_keys = set(self.config) - set(self._app.config_options._configurable_keys)
|
|
308
|
+
if bad_keys:
|
|
309
|
+
raise ConfigNonConfigurableError(name=bad_keys)
|
|
310
|
+
|
|
311
|
+
@property
|
|
312
|
+
def _resources(self) -> ResourceList:
|
|
313
|
+
res = self.resources
|
|
314
|
+
assert isinstance(res, self._app.ResourceList)
|
|
315
|
+
return res
|
|
316
|
+
|
|
317
|
+
def _get_resources_copy(self) -> Iterator[ResourceSpec]:
|
|
318
|
+
"""
|
|
319
|
+
Get a deep copy of the list of resources.
|
|
320
|
+
"""
|
|
321
|
+
memo: dict[int, Any] = {}
|
|
322
|
+
for spec in self._resources:
|
|
323
|
+
yield copy.deepcopy(spec, memo)
|
|
324
|
+
|
|
325
|
+
def _merge_envs_into_task_resources(self) -> None:
|
|
326
|
+
self.merge_envs = False
|
|
327
|
+
|
|
328
|
+
# disallow both `env_presets` and `environments` specifications:
|
|
329
|
+
if self.env_presets and self.environments:
|
|
330
|
+
raise ValueError(
|
|
331
|
+
"Workflow template: specify at most one of `env_presets` and "
|
|
332
|
+
"`environments`."
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
if not isinstance(self.env_presets, list):
|
|
336
|
+
self.env_presets = [self.env_presets] if self.env_presets else []
|
|
337
|
+
|
|
338
|
+
for task in self.tasks:
|
|
339
|
+
# get applicable environments and environment preset names:
|
|
340
|
+
try:
|
|
341
|
+
schema = task.schema
|
|
342
|
+
except ValueError:
|
|
343
|
+
# TODO: consider multiple schemas
|
|
344
|
+
raise NotImplementedError(
|
|
345
|
+
"Cannot merge environment presets into a task without multiple "
|
|
346
|
+
"schemas."
|
|
347
|
+
)
|
|
348
|
+
schema_presets = schema.environment_presets
|
|
349
|
+
app_envs = {act.get_environment_name() for act in schema.actions}
|
|
350
|
+
for es in task.element_sets:
|
|
351
|
+
app_env_specs_i: Mapping[str, Mapping[str, Any]] | None = None
|
|
352
|
+
if not es.environments and not es.env_preset:
|
|
353
|
+
# no task level envs/presets specified, so merge template-level:
|
|
354
|
+
if self.environments:
|
|
355
|
+
app_env_specs_i = {
|
|
356
|
+
k: v for k, v in self.environments.items() if k in app_envs
|
|
357
|
+
}
|
|
358
|
+
if app_env_specs_i:
|
|
359
|
+
self._app.logger.info(
|
|
360
|
+
f"(task {task.name!r}, element set {es.index}): using "
|
|
361
|
+
f"template-level requested `environment` specifiers: "
|
|
362
|
+
f"{app_env_specs_i!r}."
|
|
363
|
+
)
|
|
364
|
+
es.environments = app_env_specs_i
|
|
365
|
+
|
|
366
|
+
elif self.env_presets and schema_presets:
|
|
367
|
+
# take only the first applicable preset:
|
|
368
|
+
for app_preset in self.env_presets:
|
|
369
|
+
if app_preset in schema_presets:
|
|
370
|
+
es.env_preset = app_preset
|
|
371
|
+
app_env_specs_i = schema_presets[app_preset]
|
|
372
|
+
self._app.logger.info(
|
|
373
|
+
f"(task {task.name!r}, element set {es.index}): using "
|
|
374
|
+
f"template-level requested {app_preset!r} "
|
|
375
|
+
f"`env_preset`: {app_env_specs_i!r}."
|
|
376
|
+
)
|
|
377
|
+
break
|
|
378
|
+
|
|
379
|
+
else:
|
|
380
|
+
# no env/preset applicable here (and no env/preset at task level),
|
|
381
|
+
# so apply a default preset if available:
|
|
382
|
+
if app_env_specs_i := (schema_presets or {}).get("", None):
|
|
383
|
+
self._app.logger.info(
|
|
384
|
+
f"(task {task.name!r}, element set {es.index}): setting "
|
|
385
|
+
f"to default (empty-string named) `env_preset`: "
|
|
386
|
+
f"{app_env_specs_i}."
|
|
387
|
+
)
|
|
388
|
+
es.env_preset = ""
|
|
389
|
+
|
|
390
|
+
if app_env_specs_i:
|
|
391
|
+
es.resources.merge_one(
|
|
392
|
+
self._app.ResourceSpec(
|
|
393
|
+
scope="any", environments=app_env_specs_i
|
|
394
|
+
)
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
@classmethod
|
|
398
|
+
@TimeIt.decorator
|
|
399
|
+
def _from_data(cls, data: dict[str, Any]) -> WorkflowTemplate:
|
|
400
|
+
def _normalise_task_parametrisation(task_lst: list[WorkflowTemplateTaskData]):
|
|
401
|
+
"""
|
|
402
|
+
For each dict in a list of task parametrisations, ensure the `schema` key is
|
|
403
|
+
a list of values, and ensure `element_sets` are defined.
|
|
404
|
+
|
|
405
|
+
This mutates `task_lst`.
|
|
406
|
+
|
|
407
|
+
"""
|
|
408
|
+
# use element_sets if not already:
|
|
409
|
+
task_dat: WorkflowTemplateTaskData
|
|
410
|
+
for task_idx, task_dat in enumerate(task_lst):
|
|
411
|
+
schema = task_dat.pop("schema")
|
|
412
|
+
schema_list: list = schema if isinstance(schema, list) else [schema]
|
|
413
|
+
if "element_sets" in task_dat:
|
|
414
|
+
# just update the schema to a list:
|
|
415
|
+
task_lst[task_idx]["schema"] = schema_list
|
|
416
|
+
else:
|
|
417
|
+
# add a single element set, and update the schema to a list:
|
|
418
|
+
out_labels = task_dat.pop("output_labels", [])
|
|
419
|
+
es_dat = cast("WorkflowTemplateElementSetData", task_dat)
|
|
420
|
+
new_task_dat: WorkflowTemplateTaskData = {
|
|
421
|
+
"schema": schema_list,
|
|
422
|
+
"element_sets": [es_dat],
|
|
423
|
+
"output_labels": out_labels,
|
|
424
|
+
}
|
|
425
|
+
task_lst[task_idx] = new_task_dat
|
|
426
|
+
# move sequences with `paths` (note: plural) to multi_path_sequences:
|
|
427
|
+
for elem_set in task_lst[task_idx]["element_sets"]:
|
|
428
|
+
new_mps = []
|
|
429
|
+
seqs = elem_set.get("sequences", [])
|
|
430
|
+
seqs = list(seqs) # copy
|
|
431
|
+
# loop in reverse so indices for pop are valid:
|
|
432
|
+
for seq_idx, seq_dat in zip(range(len(seqs) - 1, -1, -1), seqs[::-1]):
|
|
433
|
+
if "paths" in seq_dat: # (note: plural)
|
|
434
|
+
# move to a multi-path sequence:
|
|
435
|
+
new_mps.append(elem_set["sequences"].pop(seq_idx))
|
|
436
|
+
elem_set.setdefault("multi_path_sequences", []).extend(new_mps[::-1])
|
|
437
|
+
|
|
438
|
+
meta_tasks = data.pop("meta_tasks", {})
|
|
439
|
+
if meta_tasks:
|
|
440
|
+
for i in list(meta_tasks):
|
|
441
|
+
_normalise_task_parametrisation(meta_tasks[i])
|
|
442
|
+
new_task_dat: list[WorkflowTemplateTaskData] = []
|
|
443
|
+
reindex = {}
|
|
444
|
+
for task_idx, task_dat in enumerate(data["tasks"]):
|
|
445
|
+
if meta_task_dat := meta_tasks.get(task_dat["schema"]):
|
|
446
|
+
reindex[task_idx] = [
|
|
447
|
+
len(new_task_dat) + i for i in range(len(meta_task_dat))
|
|
448
|
+
]
|
|
449
|
+
|
|
450
|
+
all_schema_names = [j for i in meta_task_dat for j in i["schema"]]
|
|
451
|
+
|
|
452
|
+
# update any parametrisation provided in the task list:
|
|
453
|
+
base_data = copy.deepcopy(meta_task_dat)
|
|
454
|
+
|
|
455
|
+
# any other keys in `task_dat` should be mappings whose keys are
|
|
456
|
+
# the schema name (within the meta task) optionally suffixed by
|
|
457
|
+
# a period and the element set index to which the updates should be
|
|
458
|
+
# copied (no integer suffix indicates the zeroth element set):
|
|
459
|
+
for k, v in task_dat.items():
|
|
460
|
+
if k == "schema":
|
|
461
|
+
continue
|
|
462
|
+
|
|
463
|
+
for elem_set_id, dat in v.items():
|
|
464
|
+
|
|
465
|
+
elem_set_id_split = elem_set_id.split(".")
|
|
466
|
+
try:
|
|
467
|
+
es_idx = int(elem_set_id_split[-1])
|
|
468
|
+
schema_name = ".".join(elem_set_id_split[:-1])
|
|
469
|
+
except ValueError:
|
|
470
|
+
es_idx = 0
|
|
471
|
+
schema_name = ".".join(elem_set_id_split)
|
|
472
|
+
schema_name = schema_name.strip(".")
|
|
473
|
+
|
|
474
|
+
# check valid schema name:
|
|
475
|
+
if schema_name not in all_schema_names:
|
|
476
|
+
raise UnknownMetaTaskConstitutiveSchema(
|
|
477
|
+
f"Task schema with objective {schema_name!r} is not "
|
|
478
|
+
f"part of the meta-task with objective "
|
|
479
|
+
f"{task_dat['schema']!r}. The constitutive schemas of"
|
|
480
|
+
f" this meta-task have objectives: "
|
|
481
|
+
f"{all_schema_names!r}."
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
# copy `dat` to the correct schema and element set in the
|
|
485
|
+
# meta-task:
|
|
486
|
+
for s_idx, s in enumerate(base_data):
|
|
487
|
+
if s["schema"] == [schema_name]:
|
|
488
|
+
if k == "inputs":
|
|
489
|
+
# special case; merge inputs
|
|
490
|
+
base_data[s_idx]["element_sets"][es_idx][
|
|
491
|
+
k
|
|
492
|
+
].update(dat)
|
|
493
|
+
else:
|
|
494
|
+
# just overwrite
|
|
495
|
+
base_data[s_idx]["element_sets"][es_idx][k] = dat
|
|
496
|
+
|
|
497
|
+
new_task_dat.extend(base_data)
|
|
498
|
+
|
|
499
|
+
else:
|
|
500
|
+
reindex[task_idx] = [len(new_task_dat)]
|
|
501
|
+
new_task_dat.append(task_dat)
|
|
502
|
+
|
|
503
|
+
data["tasks"] = new_task_dat
|
|
504
|
+
|
|
505
|
+
if loops := data.get("loops"):
|
|
506
|
+
for loop_idx, loop in enumerate(loops):
|
|
507
|
+
loops[loop_idx]["tasks"] = [
|
|
508
|
+
j for i in loop["tasks"] for j in reindex[i]
|
|
509
|
+
]
|
|
510
|
+
term_task = loop.get("termination_task")
|
|
511
|
+
if term_task is not None:
|
|
512
|
+
loops[loop_idx]["termination_task"] = reindex[term_task][0]
|
|
513
|
+
|
|
514
|
+
_normalise_task_parametrisation(data["tasks"])
|
|
515
|
+
|
|
516
|
+
# extract out any template components:
|
|
517
|
+
# TODO: TypedDict for data
|
|
518
|
+
tcs: dict[str, list] = data.pop("template_components", {})
|
|
519
|
+
if params_dat := tcs.pop("parameters", []):
|
|
520
|
+
parameters = cls._app.ParametersList.from_json_like(
|
|
521
|
+
params_dat, shared_data=cls._app._shared_data
|
|
522
|
+
)
|
|
523
|
+
cls._app.parameters.add_objects(parameters, skip_duplicates=True)
|
|
524
|
+
|
|
525
|
+
if cmd_files_dat := tcs.pop("command_files", []):
|
|
526
|
+
cmd_files = cls._app.CommandFilesList.from_json_like(
|
|
527
|
+
cmd_files_dat, shared_data=cls._app._shared_data
|
|
528
|
+
)
|
|
529
|
+
cls._app.command_files.add_objects(cmd_files, skip_duplicates=True)
|
|
530
|
+
|
|
531
|
+
if envs_dat := tcs.pop("environments", []):
|
|
532
|
+
envs = cls._app.EnvironmentsList.from_json_like(
|
|
533
|
+
envs_dat, shared_data=cls._app._shared_data
|
|
534
|
+
)
|
|
535
|
+
cls._app.envs.add_objects(envs, skip_duplicates=True)
|
|
536
|
+
|
|
537
|
+
if ts_dat := tcs.pop("task_schemas", []):
|
|
538
|
+
task_schemas = cls._app.TaskSchemasList.from_json_like(
|
|
539
|
+
ts_dat, shared_data=cls._app._shared_data
|
|
540
|
+
)
|
|
541
|
+
cls._app.task_schemas.add_objects(task_schemas, skip_duplicates=True)
|
|
542
|
+
|
|
543
|
+
if mts_dat := tcs.pop("meta_task_schemas", []):
|
|
544
|
+
meta_ts = [
|
|
545
|
+
cls._app.MetaTaskSchema.from_json_like(
|
|
546
|
+
i, shared_data=cls._app.template_components
|
|
547
|
+
)
|
|
548
|
+
for i in mts_dat
|
|
549
|
+
]
|
|
550
|
+
cls._app.task_schemas.add_objects(meta_ts, skip_duplicates=True)
|
|
551
|
+
|
|
552
|
+
wkt = cls.from_json_like(data, shared_data=cls._app._shared_data)
|
|
553
|
+
|
|
554
|
+
# print(f"WorkflowTemplate._from_data: {wkt=!r}")
|
|
555
|
+
# TODO: what is this for!?
|
|
556
|
+
# for idx, task in enumerate(wkt.tasks):
|
|
557
|
+
# if isinstance(task.schema, cls._app.MetaTaskSchema):
|
|
558
|
+
# print(f"{task=!r}")
|
|
559
|
+
# wkt.tasks[idx] = cls._app.MetaTask(schema=task.schema, tasks=task.tasks)
|
|
560
|
+
return wkt
|
|
561
|
+
|
|
562
|
+
@classmethod
|
|
563
|
+
@TimeIt.decorator
|
|
564
|
+
def from_YAML_string(
|
|
565
|
+
cls,
|
|
566
|
+
string: str,
|
|
567
|
+
variables: dict[str, str] | Literal[False] | None = None,
|
|
568
|
+
) -> WorkflowTemplate:
|
|
569
|
+
"""Load from a YAML string.
|
|
570
|
+
|
|
571
|
+
Parameters
|
|
572
|
+
----------
|
|
573
|
+
string
|
|
574
|
+
The YAML string containing the workflow template parametrisation.
|
|
575
|
+
variables
|
|
576
|
+
String variables to substitute in `string`. Substitutions will be attempted if
|
|
577
|
+
the YAML string looks to contain variable references (like "<<var:name>>"). If
|
|
578
|
+
set to `False`, no substitutions will occur, which may result in an invalid
|
|
579
|
+
workflow template!
|
|
580
|
+
"""
|
|
581
|
+
return cls._from_data(
|
|
582
|
+
read_YAML_str(
|
|
583
|
+
string,
|
|
584
|
+
variables=variables,
|
|
585
|
+
source="(from the inline workflow template definition)",
|
|
586
|
+
)
|
|
587
|
+
)
|
|
588
|
+
|
|
589
|
+
@classmethod
|
|
590
|
+
def _check_name(cls, data: dict[str, Any], path: PathLike) -> None:
|
|
591
|
+
"""Check the workflow template data has a "name" key. If not, add a "name" key,
|
|
592
|
+
using the file path stem.
|
|
593
|
+
|
|
594
|
+
Note: this method mutates `data`.
|
|
595
|
+
|
|
596
|
+
"""
|
|
597
|
+
if "name" not in data and path is not None:
|
|
598
|
+
name = Path(path).stem
|
|
599
|
+
cls._app.logger.info(
|
|
600
|
+
f"using file name stem ({name!r}) as the workflow template name."
|
|
601
|
+
)
|
|
602
|
+
data["name"] = name
|
|
603
|
+
|
|
604
|
+
@classmethod
|
|
605
|
+
@TimeIt.decorator
|
|
606
|
+
def from_YAML_file(
|
|
607
|
+
cls,
|
|
608
|
+
path: PathLike,
|
|
609
|
+
variables: dict[str, str] | Literal[False] | None = None,
|
|
610
|
+
) -> WorkflowTemplate:
|
|
611
|
+
"""Load from a YAML file.
|
|
612
|
+
|
|
613
|
+
Parameters
|
|
614
|
+
----------
|
|
615
|
+
path
|
|
616
|
+
The path to the YAML file containing the workflow template parametrisation.
|
|
617
|
+
variables
|
|
618
|
+
String variables to substitute in the file given by `path`. Substitutions will
|
|
619
|
+
be attempted if the YAML file looks to contain variable references (like
|
|
620
|
+
"<<var:name>>"). If set to `False`, no substitutions will occur, which may
|
|
621
|
+
result in an invalid workflow template!
|
|
622
|
+
|
|
623
|
+
"""
|
|
624
|
+
cls._app.logger.debug("parsing workflow template from a YAML file")
|
|
625
|
+
data = read_YAML_file(path, variables=variables)
|
|
626
|
+
cls._check_name(data, path)
|
|
627
|
+
data["source_file"] = str(path)
|
|
628
|
+
return cls._from_data(data)
|
|
629
|
+
|
|
630
|
+
@classmethod
|
|
631
|
+
@TimeIt.decorator
|
|
632
|
+
def from_JSON_string(
|
|
633
|
+
cls,
|
|
634
|
+
string: str,
|
|
635
|
+
variables: dict[str, str] | Literal[False] | None = None,
|
|
636
|
+
) -> WorkflowTemplate:
|
|
637
|
+
"""Load from a JSON string.
|
|
638
|
+
|
|
639
|
+
Parameters
|
|
640
|
+
----------
|
|
641
|
+
string
|
|
642
|
+
The JSON string containing the workflow template parametrisation.
|
|
643
|
+
variables
|
|
644
|
+
String variables to substitute in `string`. Substitutions will be attempted if
|
|
645
|
+
the JSON string looks to contain variable references (like "<<var:name>>"). If
|
|
646
|
+
set to `False`, no substitutions will occur, which may result in an invalid
|
|
647
|
+
workflow template!
|
|
648
|
+
"""
|
|
649
|
+
return cls._from_data(read_JSON_string(string, variables=variables))
|
|
650
|
+
|
|
651
|
+
@classmethod
|
|
652
|
+
@TimeIt.decorator
|
|
653
|
+
def from_JSON_file(
|
|
654
|
+
cls,
|
|
655
|
+
path: PathLike,
|
|
656
|
+
variables: dict[str, str] | Literal[False] | None = None,
|
|
657
|
+
) -> WorkflowTemplate:
|
|
658
|
+
"""Load from a JSON file.
|
|
659
|
+
|
|
660
|
+
Parameters
|
|
661
|
+
----------
|
|
662
|
+
path
|
|
663
|
+
The path to the JSON file containing the workflow template parametrisation.
|
|
664
|
+
variables
|
|
665
|
+
String variables to substitute in the file given by `path`. Substitutions will
|
|
666
|
+
be attempted if the JSON file looks to contain variable references (like
|
|
667
|
+
"<<var:name>>"). If set to `False`, no substitutions will occur, which may
|
|
668
|
+
result in an invalid workflow template!
|
|
669
|
+
"""
|
|
670
|
+
cls._app.logger.debug("parsing workflow template from a JSON file")
|
|
671
|
+
data = read_JSON_file(path, variables=variables)
|
|
672
|
+
cls._check_name(data, path)
|
|
673
|
+
data["source_file"] = str(path)
|
|
674
|
+
return cls._from_data(data)
|
|
675
|
+
|
|
676
|
+
@classmethod
|
|
677
|
+
@TimeIt.decorator
|
|
678
|
+
def from_file(
|
|
679
|
+
cls,
|
|
680
|
+
path: PathLike,
|
|
681
|
+
template_format: Literal["yaml", "json"] | None = None,
|
|
682
|
+
variables: dict[str, str] | Literal[False] | None = None,
|
|
683
|
+
) -> WorkflowTemplate:
|
|
684
|
+
"""Load from either a YAML or JSON file, depending on the file extension.
|
|
685
|
+
|
|
686
|
+
Parameters
|
|
687
|
+
----------
|
|
688
|
+
path
|
|
689
|
+
The path to the file containing the workflow template parametrisation.
|
|
690
|
+
template_format
|
|
691
|
+
The file format to expect at `path`. One of "json" or "yaml", if specified. By
|
|
692
|
+
default, "yaml".
|
|
693
|
+
variables
|
|
694
|
+
String variables to substitute in the file given by `path`. Substitutions will
|
|
695
|
+
be attempted if the file looks to contain variable references (like
|
|
696
|
+
"<<var:name>>"). If set to `False`, no substitutions will occur, which may
|
|
697
|
+
result in an invalid workflow template!
|
|
698
|
+
"""
|
|
699
|
+
path_ = Path(path or ".")
|
|
700
|
+
fmt = template_format.lower() if template_format else None
|
|
701
|
+
if fmt == "yaml" or path_.suffix in (".yaml", ".yml"):
|
|
702
|
+
return cls.from_YAML_file(path_, variables=variables)
|
|
703
|
+
elif fmt == "json" or path_.suffix in (".json", ".jsonc"):
|
|
704
|
+
return cls.from_JSON_file(path_, variables=variables)
|
|
705
|
+
else:
|
|
706
|
+
raise ValueError(
|
|
707
|
+
f"Unknown workflow template file extension {path_.suffix!r}. Supported "
|
|
708
|
+
f"template formats are {ALL_TEMPLATE_FORMATS!r}."
|
|
709
|
+
)
|
|
710
|
+
|
|
711
|
+
def _add_empty_task(self, task: Task, new_index: int, insert_ID: int) -> None:
|
|
712
|
+
"""Called by `Workflow._add_empty_task`."""
|
|
713
|
+
assert self.workflow
|
|
714
|
+
new_task_name = self.workflow._get_new_task_unique_name(task, new_index)
|
|
715
|
+
|
|
716
|
+
task._insert_ID = insert_ID
|
|
717
|
+
task._dir_name = f"task_{task.insert_ID}_{new_task_name}"
|
|
718
|
+
task._element_sets = [] # element sets are added to the Task during add_elements
|
|
719
|
+
|
|
720
|
+
task.workflow_template = self
|
|
721
|
+
self.tasks.insert(new_index, task)
|
|
722
|
+
|
|
723
|
+
def _add_empty_loop(self, loop: Loop) -> None:
|
|
724
|
+
"""Called by `Workflow._add_empty_loop`."""
|
|
725
|
+
|
|
726
|
+
assert self.workflow
|
|
727
|
+
if not loop.name:
|
|
728
|
+
existing = {loop.name for loop in self.loops}
|
|
729
|
+
new_idx = len(self.loops)
|
|
730
|
+
while (name := f"loop_{new_idx}") in existing:
|
|
731
|
+
new_idx += 1
|
|
732
|
+
loop._name = name
|
|
733
|
+
elif loop.name in self.workflow.loops.list_attrs():
|
|
734
|
+
raise LoopAlreadyExistsError(loop.name, self.workflow.loops)
|
|
735
|
+
|
|
736
|
+
loop._workflow_template = self
|
|
737
|
+
self.loops.append(loop)
|
|
738
|
+
|
|
739
|
+
|
|
740
|
+
def resolve_fsspec(
|
|
741
|
+
path: PathLike, **kwargs
|
|
742
|
+
) -> tuple[AbstractFileSystem, str, str | None]:
|
|
743
|
+
"""
|
|
744
|
+
Decide how to handle a particular virtual path.
|
|
745
|
+
|
|
746
|
+
Parameters
|
|
747
|
+
----------
|
|
748
|
+
kwargs
|
|
749
|
+
This can include a `password` key, for connections via SSH.
|
|
750
|
+
|
|
751
|
+
"""
|
|
752
|
+
|
|
753
|
+
path_s = str(path)
|
|
754
|
+
fs: AbstractFileSystem
|
|
755
|
+
if path_s.endswith(".zip"):
|
|
756
|
+
# `url_to_fs` does not seem to work for zip combos e.g. `zip::ssh://`, so we
|
|
757
|
+
# construct a `ZipFileSystem` ourselves and assume it is signified only by the
|
|
758
|
+
# file extension:
|
|
759
|
+
fs, pw = ask_pw_on_auth_exc(
|
|
760
|
+
ZipFileSystem,
|
|
761
|
+
fo=path_s,
|
|
762
|
+
mode="r",
|
|
763
|
+
target_options=kwargs or {},
|
|
764
|
+
add_pw_to="target_options",
|
|
765
|
+
)
|
|
766
|
+
path_s = ""
|
|
767
|
+
|
|
768
|
+
else:
|
|
769
|
+
(fs, path_s), pw = ask_pw_on_auth_exc(url_to_fs, path_s, **kwargs)
|
|
770
|
+
path_s = str(Path(path_s).as_posix())
|
|
771
|
+
if isinstance(fs, LocalFileSystem):
|
|
772
|
+
path_s = str(Path(path_s).resolve())
|
|
773
|
+
|
|
774
|
+
return fs, path_s, pw
|
|
775
|
+
|
|
776
|
+
|
|
777
|
+
@dataclass(frozen=True)
|
|
778
|
+
class _IterationData:
|
|
779
|
+
id_: int
|
|
780
|
+
idx: int
|
|
781
|
+
|
|
782
|
+
|
|
783
|
+
def load_workflow_config(
|
|
784
|
+
func: Callable[Concatenate[S, P], T],
|
|
785
|
+
) -> Callable[Concatenate[S, P], T]:
|
|
786
|
+
"""Decorator to apply workflow-level config items during execution of a Workflow
|
|
787
|
+
method."""
|
|
788
|
+
|
|
789
|
+
@wraps(func)
|
|
790
|
+
def wrapped(self: S, *args: P.args, **kwargs: P.kwargs) -> T:
|
|
791
|
+
|
|
792
|
+
updates = self.template.config
|
|
793
|
+
if updates:
|
|
794
|
+
with self._app.config._with_updates(updates):
|
|
795
|
+
return func(self, *args, **kwargs)
|
|
796
|
+
else:
|
|
797
|
+
return func(self, *args, **kwargs)
|
|
798
|
+
|
|
799
|
+
return wrapped
|
|
800
|
+
|
|
801
|
+
|
|
802
|
+
class Workflow(AppAware):
|
|
803
|
+
"""
|
|
804
|
+
A concrete workflow.
|
|
805
|
+
|
|
806
|
+
Parameters
|
|
807
|
+
----------
|
|
808
|
+
workflow_ref:
|
|
809
|
+
Either the path to a persistent workflow, or an integer that will interpreted
|
|
810
|
+
as the local ID of a workflow submission, as reported by the app `show`
|
|
811
|
+
command.
|
|
812
|
+
store_fmt:
|
|
813
|
+
The format of persistent store to use. Used to select the store manager class.
|
|
814
|
+
fs_kwargs:
|
|
815
|
+
Additional arguments to pass when resolving a virtual workflow reference.
|
|
816
|
+
kwargs:
|
|
817
|
+
For compatibility during pre-stable development phase.
|
|
818
|
+
"""
|
|
819
|
+
|
|
820
|
+
_default_ts_fmt: ClassVar[str] = r"%Y-%m-%d %H:%M:%S.%f"
|
|
821
|
+
_default_ts_name_fmt: ClassVar[str] = r"%Y-%m-%d_%H%M%S"
|
|
822
|
+
_input_files_dir_name: ClassVar[str] = "input_files"
|
|
823
|
+
_exec_dir_name: ClassVar[str] = "execute"
|
|
824
|
+
|
|
825
|
+
def __init__(
|
|
826
|
+
self,
|
|
827
|
+
workflow_ref: str | Path | int,
|
|
828
|
+
store_fmt: str | None = None,
|
|
829
|
+
fs_kwargs: dict[str, Any] | None = None,
|
|
830
|
+
**kwargs,
|
|
831
|
+
):
|
|
832
|
+
if isinstance(workflow_ref, int):
|
|
833
|
+
path = self._app._get_workflow_path_from_local_ID(workflow_ref)
|
|
834
|
+
elif isinstance(workflow_ref, str):
|
|
835
|
+
path = Path(workflow_ref)
|
|
836
|
+
else:
|
|
837
|
+
path = workflow_ref
|
|
838
|
+
|
|
839
|
+
self._app.logger.info(f"loading workflow from path: {path}")
|
|
840
|
+
fs_path = str(path)
|
|
841
|
+
fs, path_s, _ = resolve_fsspec(path, **(fs_kwargs or {}))
|
|
842
|
+
store_fmt = store_fmt or infer_store(fs_path, fs)
|
|
843
|
+
store_cls = store_cls_from_str(store_fmt)
|
|
844
|
+
|
|
845
|
+
self.path = path_s
|
|
846
|
+
|
|
847
|
+
# assigned on first access:
|
|
848
|
+
self._ts_fmt: str | None = None
|
|
849
|
+
self._ts_name_fmt: str | None = None
|
|
850
|
+
self._creation_info: CreationInfo | None = None
|
|
851
|
+
self._name: str | None = None
|
|
852
|
+
self._template: WorkflowTemplate | None = None
|
|
853
|
+
self._template_components: TemplateComponents | None = None
|
|
854
|
+
self._tasks: WorkflowTaskList | None = None
|
|
855
|
+
self._loops: WorkflowLoopList | None = None
|
|
856
|
+
self._submissions: list[Submission] | None = None
|
|
857
|
+
|
|
858
|
+
self._store = store_cls(self._app, self, self.path, fs)
|
|
859
|
+
self._in_batch_mode = False # flag to track when processing batch updates
|
|
860
|
+
|
|
861
|
+
self._use_merged_parameters_cache = False
|
|
862
|
+
self._merged_parameters_cache: dict[
|
|
863
|
+
tuple[str | None, tuple[tuple[str, tuple[int, ...] | int], ...]], Any
|
|
864
|
+
] = {}
|
|
865
|
+
|
|
866
|
+
# store indices of updates during batch update, so we can revert on failure:
|
|
867
|
+
self._pending = self._get_empty_pending()
|
|
868
|
+
|
|
869
|
+
# reassigned within `ElementActionRun.raise_on_failure_threshold` context manager:
|
|
870
|
+
self._is_tracking_unset: bool = False
|
|
871
|
+
self._tracked_unset: dict[str, UnsetParamTracker] | None = None
|
|
872
|
+
|
|
873
|
+
def reload(self) -> Self:
|
|
874
|
+
"""Reload the workflow from disk."""
|
|
875
|
+
return self.__class__(self.url)
|
|
876
|
+
|
|
877
|
+
@property
|
|
878
|
+
def name(self) -> str:
|
|
879
|
+
"""
|
|
880
|
+
The name of the workflow.
|
|
881
|
+
|
|
882
|
+
The workflow name may be different from the template name, as it includes the
|
|
883
|
+
creation date-timestamp if generated.
|
|
884
|
+
"""
|
|
885
|
+
if not self._name:
|
|
886
|
+
self._name = self._store.get_name()
|
|
887
|
+
return self._name
|
|
888
|
+
|
|
889
|
+
@property
|
|
890
|
+
def url(self) -> str:
|
|
891
|
+
"""An fsspec URL for this workflow."""
|
|
892
|
+
if self._store.fs:
|
|
893
|
+
if self._store.fs.protocol == "zip":
|
|
894
|
+
return self._store.fs.of.path
|
|
895
|
+
elif self._store.fs.protocol == ("file", "local"):
|
|
896
|
+
return self.path
|
|
897
|
+
raise NotImplementedError("Only (local) zip and local URLs provided for now.")
|
|
898
|
+
|
|
899
|
+
@property
|
|
900
|
+
def store_format(self) -> str:
|
|
901
|
+
"""
|
|
902
|
+
The format of the workflow's persistent store.
|
|
903
|
+
"""
|
|
904
|
+
return self._store._name
|
|
905
|
+
|
|
906
|
+
@classmethod
|
|
907
|
+
@TimeIt.decorator
|
|
908
|
+
def from_template(
|
|
909
|
+
cls,
|
|
910
|
+
template: WorkflowTemplate,
|
|
911
|
+
path: PathLike = None,
|
|
912
|
+
name: str | None = None,
|
|
913
|
+
name_add_timestamp: bool | None = None,
|
|
914
|
+
name_use_dir: bool | None = None,
|
|
915
|
+
overwrite: bool = False,
|
|
916
|
+
store: str = DEFAULT_STORE_FORMAT,
|
|
917
|
+
ts_fmt: str | None = None,
|
|
918
|
+
ts_name_fmt: str | None = None,
|
|
919
|
+
store_kwargs: dict[str, Any] | None = None,
|
|
920
|
+
status: Status | None = None,
|
|
921
|
+
) -> Workflow:
|
|
922
|
+
"""Generate from a `WorkflowTemplate` object.
|
|
923
|
+
|
|
924
|
+
Parameters
|
|
925
|
+
----------
|
|
926
|
+
template:
|
|
927
|
+
The WorkflowTemplate object to make persistent.
|
|
928
|
+
path:
|
|
929
|
+
The directory in which the workflow will be generated. If not specified, the
|
|
930
|
+
config item `default_workflow_path` will be used; if that is not set, the
|
|
931
|
+
current directory is used.
|
|
932
|
+
name
|
|
933
|
+
The name to use for the workflow. If not provided, the name will be set to
|
|
934
|
+
that of the template (optionally suffixed by a date-timestamp if
|
|
935
|
+
`name_add_timestamp` is True).
|
|
936
|
+
name_add_timestamp
|
|
937
|
+
If True, suffix the name with a date-timestamp. A default value can be set
|
|
938
|
+
with the config item `workflow_name_add_timestamp`; otherwise set to `True`.
|
|
939
|
+
name_use_dir
|
|
940
|
+
If True, and `name_add_timestamp` is also True, the workflow directory name
|
|
941
|
+
will be just the date-timestamp, and will be contained within a parent
|
|
942
|
+
directory corresponding to the workflow name. A default value can be set
|
|
943
|
+
with config the item `workflow_name_use_dir`; otherwise set to `False`.
|
|
944
|
+
overwrite:
|
|
945
|
+
If True and the workflow directory (`path` + `name`) already exists, the
|
|
946
|
+
existing directory will be overwritten.
|
|
947
|
+
store:
|
|
948
|
+
The persistent store to use for this workflow.
|
|
949
|
+
ts_fmt:
|
|
950
|
+
The datetime format to use for storing datetimes. Datetimes are always stored
|
|
951
|
+
in UTC (because Numpy does not store time zone info), so this should not
|
|
952
|
+
include a time zone name.
|
|
953
|
+
ts_name_fmt:
|
|
954
|
+
The datetime format to use when generating the workflow name, where it
|
|
955
|
+
includes a timestamp.
|
|
956
|
+
store_kwargs:
|
|
957
|
+
Keyword arguments to pass to the store's `write_empty_workflow` method.
|
|
958
|
+
"""
|
|
959
|
+
if status:
|
|
960
|
+
status.update("Generating empty workflow...")
|
|
961
|
+
try:
|
|
962
|
+
wk = cls._write_empty_workflow(
|
|
963
|
+
template=template,
|
|
964
|
+
path=path,
|
|
965
|
+
name=name,
|
|
966
|
+
name_add_timestamp=name_add_timestamp,
|
|
967
|
+
name_use_dir=name_use_dir,
|
|
968
|
+
overwrite=overwrite,
|
|
969
|
+
store=store,
|
|
970
|
+
ts_fmt=ts_fmt,
|
|
971
|
+
ts_name_fmt=ts_name_fmt,
|
|
972
|
+
store_kwargs=store_kwargs,
|
|
973
|
+
)
|
|
974
|
+
with wk._store.cached_load(), wk.batch_update(
|
|
975
|
+
is_workflow_creation=True
|
|
976
|
+
), wk._store.cache_ctx():
|
|
977
|
+
for idx, task in enumerate(template.tasks):
|
|
978
|
+
if status:
|
|
979
|
+
status.update(
|
|
980
|
+
f"Adding task {idx + 1}/{len(template.tasks)} "
|
|
981
|
+
f"({task.name!r})..."
|
|
982
|
+
)
|
|
983
|
+
wk._add_task(task)
|
|
984
|
+
if template.loops:
|
|
985
|
+
if status:
|
|
986
|
+
status.update(
|
|
987
|
+
f"Preparing to add {len(template.loops)} loops; building "
|
|
988
|
+
f"cache..."
|
|
989
|
+
)
|
|
990
|
+
|
|
991
|
+
for loop in template.loops:
|
|
992
|
+
loop._validate_against_workflow(wk)
|
|
993
|
+
# TODO: if loop with non-initialisable actions, will fail
|
|
994
|
+
cache = LoopCache.build(workflow=wk, loops=template.loops)
|
|
995
|
+
for idx, loop in enumerate(template.loops):
|
|
996
|
+
if status:
|
|
997
|
+
status.update(
|
|
998
|
+
f"Adding loop {idx + 1}/"
|
|
999
|
+
f"{len(template.loops)} ({loop.name!r})"
|
|
1000
|
+
)
|
|
1001
|
+
wk._add_loop(loop, cache=cache, status=status)
|
|
1002
|
+
if status:
|
|
1003
|
+
status.update(
|
|
1004
|
+
f"Added {len(template.loops)} loops. "
|
|
1005
|
+
f"Committing to store..."
|
|
1006
|
+
)
|
|
1007
|
+
elif status:
|
|
1008
|
+
status.update("Committing to store...")
|
|
1009
|
+
except (Exception, NotImplementedError):
|
|
1010
|
+
if status:
|
|
1011
|
+
status.stop()
|
|
1012
|
+
raise
|
|
1013
|
+
return wk
|
|
1014
|
+
|
|
1015
|
+
@classmethod
|
|
1016
|
+
@TimeIt.decorator
|
|
1017
|
+
def from_YAML_file(
|
|
1018
|
+
cls,
|
|
1019
|
+
YAML_path: PathLike,
|
|
1020
|
+
path: PathLike = None,
|
|
1021
|
+
name: str | None = None,
|
|
1022
|
+
name_add_timestamp: bool | None = None,
|
|
1023
|
+
name_use_dir: bool | None = None,
|
|
1024
|
+
overwrite: bool = False,
|
|
1025
|
+
store: str = DEFAULT_STORE_FORMAT,
|
|
1026
|
+
ts_fmt: str | None = None,
|
|
1027
|
+
ts_name_fmt: str | None = None,
|
|
1028
|
+
store_kwargs: dict[str, Any] | None = None,
|
|
1029
|
+
variables: dict[str, str] | Literal[False] | None = None,
|
|
1030
|
+
) -> Workflow:
|
|
1031
|
+
"""Generate from a YAML file.
|
|
1032
|
+
|
|
1033
|
+
Parameters
|
|
1034
|
+
----------
|
|
1035
|
+
YAML_path:
|
|
1036
|
+
The path to a workflow template in the YAML file format.
|
|
1037
|
+
path:
|
|
1038
|
+
The directory in which the workflow will be generated. If not specified, the
|
|
1039
|
+
config item `default_workflow_path` will be used; if that is not set, the
|
|
1040
|
+
current directory is used.
|
|
1041
|
+
name
|
|
1042
|
+
The name to use for the workflow. If not provided, the name will be set to
|
|
1043
|
+
that of the template (optionally suffixed by a date-timestamp if
|
|
1044
|
+
`name_add_timestamp` is True).
|
|
1045
|
+
name_add_timestamp
|
|
1046
|
+
If True, suffix the name with a date-timestamp. A default value can be set
|
|
1047
|
+
with the config item `workflow_name_add_timestamp`; otherwise set to `True`.
|
|
1048
|
+
name_use_dir
|
|
1049
|
+
If True, and `name_add_timestamp` is also True, the workflow directory name
|
|
1050
|
+
will be just the date-timestamp, and will be contained within a parent
|
|
1051
|
+
directory corresponding to the workflow name. A default value can be set
|
|
1052
|
+
with the config item `workflow_name_use_dir`; otherwise set to `False`.
|
|
1053
|
+
overwrite:
|
|
1054
|
+
If True and the workflow directory (`path` + `name`) already exists, the
|
|
1055
|
+
existing directory will be overwritten.
|
|
1056
|
+
store:
|
|
1057
|
+
The persistent store to use for this workflow.
|
|
1058
|
+
ts_fmt:
|
|
1059
|
+
The datetime format to use for storing datetimes. Datetimes are always stored
|
|
1060
|
+
in UTC (because Numpy does not store time zone info), so this should not
|
|
1061
|
+
include a time zone name.
|
|
1062
|
+
ts_name_fmt:
|
|
1063
|
+
The datetime format to use when generating the workflow name, where it
|
|
1064
|
+
includes a timestamp.
|
|
1065
|
+
store_kwargs:
|
|
1066
|
+
Keyword arguments to pass to the store's `write_empty_workflow` method.
|
|
1067
|
+
variables:
|
|
1068
|
+
String variables to substitute in the file given by `YAML_path`. Substitutions
|
|
1069
|
+
will be attempted if the YAML file looks to contain variable references (like
|
|
1070
|
+
"<<var:name>>"). If set to `False`, no substitutions will occur, which may
|
|
1071
|
+
result in an invalid workflow template!
|
|
1072
|
+
"""
|
|
1073
|
+
template = cls._app.WorkflowTemplate.from_YAML_file(
|
|
1074
|
+
path=YAML_path,
|
|
1075
|
+
variables=variables,
|
|
1076
|
+
)
|
|
1077
|
+
return cls.from_template(
|
|
1078
|
+
template,
|
|
1079
|
+
path,
|
|
1080
|
+
name,
|
|
1081
|
+
name_add_timestamp,
|
|
1082
|
+
name_use_dir,
|
|
1083
|
+
overwrite,
|
|
1084
|
+
store,
|
|
1085
|
+
ts_fmt,
|
|
1086
|
+
ts_name_fmt,
|
|
1087
|
+
store_kwargs,
|
|
1088
|
+
)
|
|
1089
|
+
|
|
1090
|
+
@classmethod
|
|
1091
|
+
def from_YAML_string(
|
|
1092
|
+
cls,
|
|
1093
|
+
YAML_str: str,
|
|
1094
|
+
path: PathLike = None,
|
|
1095
|
+
name: str | None = None,
|
|
1096
|
+
name_add_timestamp: bool | None = None,
|
|
1097
|
+
name_use_dir: bool | None = None,
|
|
1098
|
+
overwrite: bool = False,
|
|
1099
|
+
store: str = DEFAULT_STORE_FORMAT,
|
|
1100
|
+
ts_fmt: str | None = None,
|
|
1101
|
+
ts_name_fmt: str | None = None,
|
|
1102
|
+
store_kwargs: dict[str, Any] | None = None,
|
|
1103
|
+
variables: dict[str, str] | Literal[False] | None = None,
|
|
1104
|
+
status: Status | None = None,
|
|
1105
|
+
) -> Workflow:
|
|
1106
|
+
"""Generate from a YAML string.
|
|
1107
|
+
|
|
1108
|
+
Parameters
|
|
1109
|
+
----------
|
|
1110
|
+
YAML_str:
|
|
1111
|
+
The YAML string containing a workflow template parametrisation.
|
|
1112
|
+
path:
|
|
1113
|
+
The directory in which the workflow will be generated. If not specified, the
|
|
1114
|
+
config item `default_workflow_path` will be used; if that is not set, the
|
|
1115
|
+
current directory is used.
|
|
1116
|
+
name
|
|
1117
|
+
The name to use for the workflow. If not provided, the name will be set to
|
|
1118
|
+
that of the template (optionally suffixed by a date-timestamp if
|
|
1119
|
+
`name_add_timestamp` is True).
|
|
1120
|
+
name_add_timestamp
|
|
1121
|
+
If True, suffix the name with a date-timestamp. A default value can be set
|
|
1122
|
+
with the config item `workflow_name_add_timestamp`; otherwise set to `True`.
|
|
1123
|
+
name_use_dir
|
|
1124
|
+
If True, and `name_add_timestamp` is also True, the workflow directory name
|
|
1125
|
+
will be just the date-timestamp, and will be contained within a parent
|
|
1126
|
+
directory corresponding to the workflow name. A default value can be set
|
|
1127
|
+
with the config item `workflow_name_use_dir`; otherwise set to `False`.
|
|
1128
|
+
overwrite:
|
|
1129
|
+
If True and the workflow directory (`path` + `name`) already exists, the
|
|
1130
|
+
existing directory will be overwritten.
|
|
1131
|
+
store:
|
|
1132
|
+
The persistent store to use for this workflow.
|
|
1133
|
+
ts_fmt:
|
|
1134
|
+
The datetime format to use for storing datetimes. Datetimes are always stored
|
|
1135
|
+
in UTC (because Numpy does not store time zone info), so this should not
|
|
1136
|
+
include a time zone name.
|
|
1137
|
+
ts_name_fmt:
|
|
1138
|
+
The datetime format to use when generating the workflow name, where it
|
|
1139
|
+
includes a timestamp.
|
|
1140
|
+
store_kwargs:
|
|
1141
|
+
Keyword arguments to pass to the store's `write_empty_workflow` method.
|
|
1142
|
+
variables:
|
|
1143
|
+
String variables to substitute in the string `YAML_str`. Substitutions will be
|
|
1144
|
+
attempted if the YAML string looks to contain variable references (like
|
|
1145
|
+
"<<var:name>>"). If set to `False`, no substitutions will occur, which may
|
|
1146
|
+
result in an invalid workflow template!
|
|
1147
|
+
"""
|
|
1148
|
+
template = cls._app.WorkflowTemplate.from_YAML_string(
|
|
1149
|
+
string=YAML_str,
|
|
1150
|
+
variables=variables,
|
|
1151
|
+
)
|
|
1152
|
+
return cls.from_template(
|
|
1153
|
+
template,
|
|
1154
|
+
path,
|
|
1155
|
+
name,
|
|
1156
|
+
name_add_timestamp,
|
|
1157
|
+
name_use_dir,
|
|
1158
|
+
overwrite,
|
|
1159
|
+
store,
|
|
1160
|
+
ts_fmt,
|
|
1161
|
+
ts_name_fmt,
|
|
1162
|
+
store_kwargs,
|
|
1163
|
+
status,
|
|
1164
|
+
)
|
|
1165
|
+
|
|
1166
|
+
@classmethod
|
|
1167
|
+
def from_JSON_file(
|
|
1168
|
+
cls,
|
|
1169
|
+
JSON_path: PathLike,
|
|
1170
|
+
path: PathLike = None,
|
|
1171
|
+
name: str | None = None,
|
|
1172
|
+
name_add_timestamp: bool | None = None,
|
|
1173
|
+
name_use_dir: bool | None = None,
|
|
1174
|
+
overwrite: bool = False,
|
|
1175
|
+
store: str = DEFAULT_STORE_FORMAT,
|
|
1176
|
+
ts_fmt: str | None = None,
|
|
1177
|
+
ts_name_fmt: str | None = None,
|
|
1178
|
+
store_kwargs: dict[str, Any] | None = None,
|
|
1179
|
+
variables: dict[str, str] | Literal[False] | None = None,
|
|
1180
|
+
status: Status | None = None,
|
|
1181
|
+
) -> Workflow:
|
|
1182
|
+
"""Generate from a JSON file.
|
|
1183
|
+
|
|
1184
|
+
Parameters
|
|
1185
|
+
----------
|
|
1186
|
+
JSON_path:
|
|
1187
|
+
The path to a workflow template in the JSON file format.
|
|
1188
|
+
path:
|
|
1189
|
+
The directory in which the workflow will be generated. If not specified, the
|
|
1190
|
+
config item `default_workflow_path` will be used; if that is not set, the
|
|
1191
|
+
current directory is used.
|
|
1192
|
+
name
|
|
1193
|
+
The name to use for the workflow. If not provided, the name will be set to
|
|
1194
|
+
that of the template (optionally suffixed by a date-timestamp if
|
|
1195
|
+
`name_add_timestamp` is True).
|
|
1196
|
+
name_add_timestamp
|
|
1197
|
+
If True, suffix the name with a date-timestamp. A default value can be set
|
|
1198
|
+
with the config item `workflow_name_add_timestamp`; otherwise set to `True`.
|
|
1199
|
+
name_use_dir
|
|
1200
|
+
If True, and `name_add_timestamp` is also True, the workflow directory name
|
|
1201
|
+
will be just the date-timestamp, and will be contained within a parent
|
|
1202
|
+
directory corresponding to the workflow name. A default value can be set
|
|
1203
|
+
with the config item `workflow_name_use_dir`; otherwise set to `False`.
|
|
1204
|
+
overwrite:
|
|
1205
|
+
If True and the workflow directory (`path` + `name`) already exists, the
|
|
1206
|
+
existing directory will be overwritten.
|
|
1207
|
+
store:
|
|
1208
|
+
The persistent store to use for this workflow.
|
|
1209
|
+
ts_fmt:
|
|
1210
|
+
The datetime format to use for storing datetimes. Datetimes are always stored
|
|
1211
|
+
in UTC (because Numpy does not store time zone info), so this should not
|
|
1212
|
+
include a time zone name.
|
|
1213
|
+
ts_name_fmt:
|
|
1214
|
+
The datetime format to use when generating the workflow name, where it
|
|
1215
|
+
includes a timestamp.
|
|
1216
|
+
store_kwargs:
|
|
1217
|
+
Keyword arguments to pass to the store's `write_empty_workflow` method.
|
|
1218
|
+
variables:
|
|
1219
|
+
String variables to substitute in the file given by `JSON_path`. Substitutions
|
|
1220
|
+
will be attempted if the JSON file looks to contain variable references (like
|
|
1221
|
+
"<<var:name>>"). If set to `False`, no substitutions will occur, which may
|
|
1222
|
+
result in an invalid workflow template!
|
|
1223
|
+
"""
|
|
1224
|
+
template = cls._app.WorkflowTemplate.from_JSON_file(
|
|
1225
|
+
path=JSON_path,
|
|
1226
|
+
variables=variables,
|
|
1227
|
+
)
|
|
1228
|
+
return cls.from_template(
|
|
1229
|
+
template,
|
|
1230
|
+
path,
|
|
1231
|
+
name,
|
|
1232
|
+
name_add_timestamp,
|
|
1233
|
+
name_use_dir,
|
|
1234
|
+
overwrite,
|
|
1235
|
+
store,
|
|
1236
|
+
ts_fmt,
|
|
1237
|
+
ts_name_fmt,
|
|
1238
|
+
store_kwargs,
|
|
1239
|
+
status,
|
|
1240
|
+
)
|
|
1241
|
+
|
|
1242
|
+
@classmethod
|
|
1243
|
+
def from_JSON_string(
|
|
1244
|
+
cls,
|
|
1245
|
+
JSON_str: str,
|
|
1246
|
+
path: PathLike = None,
|
|
1247
|
+
name: str | None = None,
|
|
1248
|
+
name_add_timestamp: bool | None = None,
|
|
1249
|
+
name_use_dir: bool | None = None,
|
|
1250
|
+
overwrite: bool = False,
|
|
1251
|
+
store: str = DEFAULT_STORE_FORMAT,
|
|
1252
|
+
ts_fmt: str | None = None,
|
|
1253
|
+
ts_name_fmt: str | None = None,
|
|
1254
|
+
store_kwargs: dict[str, Any] | None = None,
|
|
1255
|
+
variables: dict[str, str] | Literal[False] | None = None,
|
|
1256
|
+
status: Status | None = None,
|
|
1257
|
+
) -> Workflow:
|
|
1258
|
+
"""Generate from a JSON string.
|
|
1259
|
+
|
|
1260
|
+
Parameters
|
|
1261
|
+
----------
|
|
1262
|
+
JSON_str:
|
|
1263
|
+
The JSON string containing a workflow template parametrisation.
|
|
1264
|
+
path:
|
|
1265
|
+
The directory in which the workflow will be generated. If not specified, the
|
|
1266
|
+
config item `default_workflow_path` will be used; if that is not set, the
|
|
1267
|
+
current directory is used.
|
|
1268
|
+
name
|
|
1269
|
+
The name to use for the workflow. If not provided, the name will be set to
|
|
1270
|
+
that of the template (optionally suffixed by a date-timestamp if
|
|
1271
|
+
`name_add_timestamp` is True).
|
|
1272
|
+
name_add_timestamp
|
|
1273
|
+
If True, suffix the name with a date-timestamp. A default value can be set
|
|
1274
|
+
with the config item `workflow_name_add_timestamp`; otherwise set to `True`.
|
|
1275
|
+
name_use_dir
|
|
1276
|
+
If True, and `name_add_timestamp` is also True, the workflow directory name
|
|
1277
|
+
will be just the date-timestamp, and will be contained within a parent
|
|
1278
|
+
directory corresponding to the workflow name. A default value can be set
|
|
1279
|
+
with the config item `workflow_name_use_dir`; otherwise set to `False`.
|
|
1280
|
+
overwrite:
|
|
1281
|
+
If True and the workflow directory (`path` + `name`) already exists, the
|
|
1282
|
+
existing directory will be overwritten.
|
|
1283
|
+
store:
|
|
1284
|
+
The persistent store to use for this workflow.
|
|
1285
|
+
ts_fmt:
|
|
1286
|
+
The datetime format to use for storing datetimes. Datetimes are always stored
|
|
1287
|
+
in UTC (because Numpy does not store time zone info), so this should not
|
|
1288
|
+
include a time zone name.
|
|
1289
|
+
ts_name_fmt:
|
|
1290
|
+
The datetime format to use when generating the workflow name, where it
|
|
1291
|
+
includes a timestamp.
|
|
1292
|
+
store_kwargs:
|
|
1293
|
+
Keyword arguments to pass to the store's `write_empty_workflow` method.
|
|
1294
|
+
variables:
|
|
1295
|
+
String variables to substitute in the string `JSON_str`. Substitutions will be
|
|
1296
|
+
attempted if the JSON string looks to contain variable references (like
|
|
1297
|
+
"<<var:name>>"). If set to `False`, no substitutions will occur, which may
|
|
1298
|
+
result in an invalid workflow template!
|
|
1299
|
+
"""
|
|
1300
|
+
template = cls._app.WorkflowTemplate.from_JSON_string(
|
|
1301
|
+
string=JSON_str,
|
|
1302
|
+
variables=variables,
|
|
1303
|
+
)
|
|
1304
|
+
return cls.from_template(
|
|
1305
|
+
template,
|
|
1306
|
+
path,
|
|
1307
|
+
name,
|
|
1308
|
+
name_add_timestamp,
|
|
1309
|
+
name_use_dir,
|
|
1310
|
+
overwrite,
|
|
1311
|
+
store,
|
|
1312
|
+
ts_fmt,
|
|
1313
|
+
ts_name_fmt,
|
|
1314
|
+
store_kwargs,
|
|
1315
|
+
status,
|
|
1316
|
+
)
|
|
1317
|
+
|
|
1318
|
+
@classmethod
|
|
1319
|
+
@TimeIt.decorator
|
|
1320
|
+
def from_file(
|
|
1321
|
+
cls,
|
|
1322
|
+
template_path: PathLike,
|
|
1323
|
+
template_format: Literal["json", "yaml"] | None = None,
|
|
1324
|
+
path: str | None = None,
|
|
1325
|
+
name: str | None = None,
|
|
1326
|
+
name_add_timestamp: bool | None = None,
|
|
1327
|
+
name_use_dir: bool | None = None,
|
|
1328
|
+
overwrite: bool = False,
|
|
1329
|
+
store: str = DEFAULT_STORE_FORMAT,
|
|
1330
|
+
ts_fmt: str | None = None,
|
|
1331
|
+
ts_name_fmt: str | None = None,
|
|
1332
|
+
store_kwargs: dict[str, Any] | None = None,
|
|
1333
|
+
variables: dict[str, str] | Literal[False] | None = None,
|
|
1334
|
+
status: Status | None = None,
|
|
1335
|
+
) -> Workflow:
|
|
1336
|
+
"""Generate from either a YAML or JSON file, depending on the file extension.
|
|
1337
|
+
|
|
1338
|
+
Parameters
|
|
1339
|
+
----------
|
|
1340
|
+
template_path:
|
|
1341
|
+
The path to a template file in YAML or JSON format, and with a ".yml",
|
|
1342
|
+
".yaml", or ".json" extension.
|
|
1343
|
+
template_format:
|
|
1344
|
+
If specified, one of "json" or "yaml". This forces parsing from a particular
|
|
1345
|
+
format regardless of the file extension.
|
|
1346
|
+
path:
|
|
1347
|
+
The directory in which the workflow will be generated. If not specified, the
|
|
1348
|
+
config item `default_workflow_path` will be used; if that is not set, the
|
|
1349
|
+
current directory is used.
|
|
1350
|
+
name
|
|
1351
|
+
The name to use for the workflow. If not provided, the name will be set to
|
|
1352
|
+
that of the template (optionally suffixed by a date-timestamp if
|
|
1353
|
+
`name_add_timestamp` is True).
|
|
1354
|
+
name_add_timestamp
|
|
1355
|
+
If True, suffix the name with a date-timestamp. A default value can be set
|
|
1356
|
+
with the config item `workflow_name_add_timestamp`; otherwise set to `True`.
|
|
1357
|
+
name_use_dir
|
|
1358
|
+
If True, and `name_add_timestamp` is also True, the workflow directory name
|
|
1359
|
+
will be just the date-timestamp, and will be contained within a parent
|
|
1360
|
+
directory corresponding to the workflow name. A default value can be set
|
|
1361
|
+
with the config item `workflow_name_use_dir`; otherwise set to `False`.
|
|
1362
|
+
overwrite:
|
|
1363
|
+
If True and the workflow directory (`path` + `name`) already exists, the
|
|
1364
|
+
existing directory will be overwritten.
|
|
1365
|
+
store:
|
|
1366
|
+
The persistent store to use for this workflow.
|
|
1367
|
+
ts_fmt:
|
|
1368
|
+
The datetime format to use for storing datetimes. Datetimes are always stored
|
|
1369
|
+
in UTC (because Numpy does not store time zone info), so this should not
|
|
1370
|
+
include a time zone name.
|
|
1371
|
+
ts_name_fmt:
|
|
1372
|
+
The datetime format to use when generating the workflow name, where it
|
|
1373
|
+
includes a timestamp.
|
|
1374
|
+
store_kwargs:
|
|
1375
|
+
Keyword arguments to pass to the store's `write_empty_workflow` method.
|
|
1376
|
+
variables:
|
|
1377
|
+
String variables to substitute in the file given by `template_path`.
|
|
1378
|
+
Substitutions will be attempted if the file looks to contain variable
|
|
1379
|
+
references (like "<<var:name>>"). If set to `False`, no substitutions will
|
|
1380
|
+
occur, which may result in an invalid workflow template!
|
|
1381
|
+
"""
|
|
1382
|
+
try:
|
|
1383
|
+
template = cls._app.WorkflowTemplate.from_file(
|
|
1384
|
+
template_path,
|
|
1385
|
+
template_format,
|
|
1386
|
+
variables=variables,
|
|
1387
|
+
)
|
|
1388
|
+
except Exception:
|
|
1389
|
+
if status:
|
|
1390
|
+
status.stop()
|
|
1391
|
+
raise
|
|
1392
|
+
return cls.from_template(
|
|
1393
|
+
template,
|
|
1394
|
+
path,
|
|
1395
|
+
name,
|
|
1396
|
+
name_add_timestamp,
|
|
1397
|
+
name_use_dir,
|
|
1398
|
+
overwrite,
|
|
1399
|
+
store,
|
|
1400
|
+
ts_fmt,
|
|
1401
|
+
ts_name_fmt,
|
|
1402
|
+
store_kwargs,
|
|
1403
|
+
status,
|
|
1404
|
+
)
|
|
1405
|
+
|
|
1406
|
+
@classmethod
|
|
1407
|
+
@TimeIt.decorator
|
|
1408
|
+
def from_template_data(
|
|
1409
|
+
cls,
|
|
1410
|
+
template_name: str,
|
|
1411
|
+
tasks: list[Task] | None = None,
|
|
1412
|
+
loops: list[Loop] | None = None,
|
|
1413
|
+
resources: Resources = None,
|
|
1414
|
+
environments: Mapping[str, Mapping[str, Any]] | None = None,
|
|
1415
|
+
config: dict | None = None,
|
|
1416
|
+
path: PathLike | None = None,
|
|
1417
|
+
workflow_name: str | None = None,
|
|
1418
|
+
name_add_timestamp: bool | None = None,
|
|
1419
|
+
name_use_dir: bool | None = None,
|
|
1420
|
+
overwrite: bool = False,
|
|
1421
|
+
store: str = DEFAULT_STORE_FORMAT,
|
|
1422
|
+
ts_fmt: str | None = None,
|
|
1423
|
+
ts_name_fmt: str | None = None,
|
|
1424
|
+
store_kwargs: dict[str, Any] | None = None,
|
|
1425
|
+
) -> Workflow:
|
|
1426
|
+
"""Generate from the data associated with a WorkflowTemplate object.
|
|
1427
|
+
|
|
1428
|
+
Parameters
|
|
1429
|
+
----------
|
|
1430
|
+
template_name
|
|
1431
|
+
The name to use for the new workflow template, from which the new workflow
|
|
1432
|
+
will be generated.
|
|
1433
|
+
tasks:
|
|
1434
|
+
List of Task objects to add to the new workflow.
|
|
1435
|
+
loops:
|
|
1436
|
+
List of Loop objects to add to the new workflow.
|
|
1437
|
+
resources:
|
|
1438
|
+
Mapping of action scopes to resource requirements, to be applied to all
|
|
1439
|
+
element sets in the workflow. `resources` specified in an element set take
|
|
1440
|
+
precedence of those defined here for the whole workflow.
|
|
1441
|
+
environments:
|
|
1442
|
+
Environment specifiers, keyed by environment name.
|
|
1443
|
+
config:
|
|
1444
|
+
Configuration items that should be set whenever the resulting workflow is
|
|
1445
|
+
loaded. This includes config items that apply during workflow execution.
|
|
1446
|
+
path:
|
|
1447
|
+
The directory in which the workflow will be generated. If not specified, the
|
|
1448
|
+
config item `default_workflow_path` will be used; if that is not set, the
|
|
1449
|
+
current directory is used.
|
|
1450
|
+
workflow_name
|
|
1451
|
+
The name to use for the workflow. If not provided, the name will be set to
|
|
1452
|
+
that of the template (optionally suffixed by a date-timestamp if
|
|
1453
|
+
`name_add_timestamp` is True).
|
|
1454
|
+
name_add_timestamp
|
|
1455
|
+
If True, suffix the workflow name with a date-timestamp. A default value can
|
|
1456
|
+
be set with the config item `workflow_name_add_timestamp`; otherwise set to
|
|
1457
|
+
`True`.
|
|
1458
|
+
name_use_dir
|
|
1459
|
+
If True, and `name_add_timestamp` is also True, the workflow directory name
|
|
1460
|
+
will be just the date-timestamp, and will be contained within a parent
|
|
1461
|
+
directory corresponding to the workflow name. A default value can be set
|
|
1462
|
+
with the config item `workflow_name_use_dir`; otherwise set to `False`.
|
|
1463
|
+
overwrite:
|
|
1464
|
+
If True and the workflow directory (`path` + `name`) already exists, the
|
|
1465
|
+
existing directory will be overwritten.
|
|
1466
|
+
store:
|
|
1467
|
+
The persistent store to use for this workflow.
|
|
1468
|
+
ts_fmt:
|
|
1469
|
+
The datetime format to use for storing datetimes. Datetimes are always stored
|
|
1470
|
+
in UTC (because Numpy does not store time zone info), so this should not
|
|
1471
|
+
include a time zone name.
|
|
1472
|
+
ts_name_fmt:
|
|
1473
|
+
The datetime format to use when generating the workflow name, where it
|
|
1474
|
+
includes a timestamp.
|
|
1475
|
+
store_kwargs:
|
|
1476
|
+
Keyword arguments to pass to the store's `write_empty_workflow` method.
|
|
1477
|
+
"""
|
|
1478
|
+
template = cls._app.WorkflowTemplate(
|
|
1479
|
+
template_name,
|
|
1480
|
+
tasks=tasks or [],
|
|
1481
|
+
loops=loops or [],
|
|
1482
|
+
resources=resources,
|
|
1483
|
+
environments=environments,
|
|
1484
|
+
config=config or {},
|
|
1485
|
+
)
|
|
1486
|
+
return cls.from_template(
|
|
1487
|
+
template,
|
|
1488
|
+
path,
|
|
1489
|
+
workflow_name,
|
|
1490
|
+
name_add_timestamp,
|
|
1491
|
+
name_use_dir,
|
|
1492
|
+
overwrite,
|
|
1493
|
+
store,
|
|
1494
|
+
ts_fmt,
|
|
1495
|
+
ts_name_fmt,
|
|
1496
|
+
store_kwargs,
|
|
1497
|
+
)
|
|
1498
|
+
|
|
1499
|
+
@TimeIt.decorator
|
|
1500
|
+
def _add_empty_task(
|
|
1501
|
+
self,
|
|
1502
|
+
task: Task,
|
|
1503
|
+
new_index: int | None = None,
|
|
1504
|
+
) -> WorkflowTask:
|
|
1505
|
+
if new_index is None:
|
|
1506
|
+
new_index = self.num_tasks
|
|
1507
|
+
|
|
1508
|
+
insert_ID = self.num_added_tasks
|
|
1509
|
+
|
|
1510
|
+
# make a copy with persistent schema inputs:
|
|
1511
|
+
task_c, _ = task.to_persistent(self, insert_ID)
|
|
1512
|
+
|
|
1513
|
+
# add to the WorkflowTemplate:
|
|
1514
|
+
self.template._add_empty_task(task_c, new_index, insert_ID)
|
|
1515
|
+
|
|
1516
|
+
# create and insert a new WorkflowTask:
|
|
1517
|
+
self.tasks.add_object(
|
|
1518
|
+
self._app.WorkflowTask.new_empty_task(self, task_c, new_index),
|
|
1519
|
+
index=new_index,
|
|
1520
|
+
)
|
|
1521
|
+
|
|
1522
|
+
# update persistent store:
|
|
1523
|
+
task_js, temp_comps_js = task_c.to_json_like()
|
|
1524
|
+
assert temp_comps_js is not None
|
|
1525
|
+
self._store.add_template_components(temp_comps_js)
|
|
1526
|
+
self._store.add_task(new_index, cast("Mapping", task_js))
|
|
1527
|
+
|
|
1528
|
+
# update in-memory workflow template components:
|
|
1529
|
+
temp_comps = cast(
|
|
1530
|
+
"_TemplateComponents",
|
|
1531
|
+
self._app.template_components_from_json_like(temp_comps_js),
|
|
1532
|
+
)
|
|
1533
|
+
for comp_type, comps in temp_comps.items():
|
|
1534
|
+
ol = self.__template_components[comp_type]
|
|
1535
|
+
for comp in comps:
|
|
1536
|
+
comp._set_hash()
|
|
1537
|
+
if comp not in ol:
|
|
1538
|
+
self._pending["template_components"][comp_type].append(
|
|
1539
|
+
ol.add_object(comp, skip_duplicates=False)
|
|
1540
|
+
)
|
|
1541
|
+
|
|
1542
|
+
self._pending["tasks"].append(new_index)
|
|
1543
|
+
return self.tasks[new_index]
|
|
1544
|
+
|
|
1545
|
+
@TimeIt.decorator
|
|
1546
|
+
def _add_task(self, task: Task, new_index: int | None = None) -> None:
|
|
1547
|
+
new_wk_task = self._add_empty_task(task=task, new_index=new_index)
|
|
1548
|
+
new_wk_task._add_elements(element_sets=task.element_sets, propagate_to={})
|
|
1549
|
+
|
|
1550
|
+
def add_task(self, task: Task, new_index: int | None = None) -> None:
|
|
1551
|
+
"""
|
|
1552
|
+
Add a task to this workflow.
|
|
1553
|
+
"""
|
|
1554
|
+
with self._store.cached_load(), self.batch_update():
|
|
1555
|
+
self._add_task(task, new_index=new_index)
|
|
1556
|
+
|
|
1557
|
+
def add_task_after(self, new_task: Task, task_ref: Task | None = None) -> None:
|
|
1558
|
+
"""Add a new task after the specified task.
|
|
1559
|
+
|
|
1560
|
+
Parameters
|
|
1561
|
+
----------
|
|
1562
|
+
task_ref
|
|
1563
|
+
If not given, the new task will be added at the end of the workflow.
|
|
1564
|
+
"""
|
|
1565
|
+
new_index = (
|
|
1566
|
+
task_ref.index + 1 if task_ref and task_ref.index is not None else None
|
|
1567
|
+
)
|
|
1568
|
+
self.add_task(new_task, new_index)
|
|
1569
|
+
# TODO: add new downstream elements?
|
|
1570
|
+
|
|
1571
|
+
def add_task_before(self, new_task: Task, task_ref: Task | None = None) -> None:
|
|
1572
|
+
"""Add a new task before the specified task.
|
|
1573
|
+
|
|
1574
|
+
Parameters
|
|
1575
|
+
----------
|
|
1576
|
+
task_ref
|
|
1577
|
+
If not given, the new task will be added at the beginning of the workflow.
|
|
1578
|
+
"""
|
|
1579
|
+
new_index = task_ref.index if task_ref else 0
|
|
1580
|
+
self.add_task(new_task, new_index)
|
|
1581
|
+
# TODO: add new downstream elements?
|
|
1582
|
+
|
|
1583
|
+
@TimeIt.decorator
|
|
1584
|
+
def _add_empty_loop(self, loop: Loop, cache: LoopCache) -> WorkflowLoop:
|
|
1585
|
+
"""Add a new loop (zeroth iterations only) to the workflow."""
|
|
1586
|
+
|
|
1587
|
+
new_index = self.num_loops
|
|
1588
|
+
|
|
1589
|
+
# don't modify passed object:
|
|
1590
|
+
loop_c = copy.deepcopy(loop)
|
|
1591
|
+
|
|
1592
|
+
# add to the WorkflowTemplate:
|
|
1593
|
+
self.template._add_empty_loop(loop_c)
|
|
1594
|
+
|
|
1595
|
+
# all these element iterations will be initialised for the new loop:
|
|
1596
|
+
iter_IDs = cache.get_iter_IDs(loop_c)
|
|
1597
|
+
iter_loop_idx = cache.get_iter_loop_indices(iter_IDs)
|
|
1598
|
+
|
|
1599
|
+
# create and insert a new WorkflowLoop:
|
|
1600
|
+
new_loop = self._app.WorkflowLoop.new_empty_loop(
|
|
1601
|
+
index=new_index,
|
|
1602
|
+
workflow=self,
|
|
1603
|
+
template=loop_c,
|
|
1604
|
+
iter_loop_idx=iter_loop_idx,
|
|
1605
|
+
)
|
|
1606
|
+
self.loops.add_object(new_loop)
|
|
1607
|
+
wk_loop = self.loops[new_index]
|
|
1608
|
+
|
|
1609
|
+
# update any child loops of the new loop to include their new parent:
|
|
1610
|
+
for chd_loop in wk_loop.get_child_loops():
|
|
1611
|
+
chd_loop._update_parents(wk_loop)
|
|
1612
|
+
|
|
1613
|
+
loop_js, _ = loop_c.to_json_like()
|
|
1614
|
+
|
|
1615
|
+
# update persistent store:
|
|
1616
|
+
self._store.add_loop(
|
|
1617
|
+
loop_template=cast("Mapping", loop_js),
|
|
1618
|
+
iterable_parameters=wk_loop.iterable_parameters,
|
|
1619
|
+
output_parameters=wk_loop.output_parameters,
|
|
1620
|
+
parents=wk_loop.parents,
|
|
1621
|
+
num_added_iterations=wk_loop.num_added_iterations,
|
|
1622
|
+
iter_IDs=iter_IDs,
|
|
1623
|
+
)
|
|
1624
|
+
|
|
1625
|
+
self._pending["loops"].append(new_index)
|
|
1626
|
+
|
|
1627
|
+
# update cache loop indices:
|
|
1628
|
+
cache.update_loop_indices(new_loop_name=loop_c.name or "", iter_IDs=iter_IDs)
|
|
1629
|
+
|
|
1630
|
+
return wk_loop
|
|
1631
|
+
|
|
1632
|
+
@TimeIt.decorator
|
|
1633
|
+
def _add_loop(
|
|
1634
|
+
self, loop: Loop, cache: LoopCache | None = None, status: Status | None = None
|
|
1635
|
+
) -> None:
|
|
1636
|
+
loop._validate_against_workflow(self)
|
|
1637
|
+
cache_ = cache or LoopCache.build(workflow=self, loops=[loop])
|
|
1638
|
+
new_wk_loop = self._add_empty_loop(loop, cache_)
|
|
1639
|
+
if loop.num_iterations is not None:
|
|
1640
|
+
# fixed number of iterations, so add remaining N > 0 iterations:
|
|
1641
|
+
if status:
|
|
1642
|
+
status_prev = status.status
|
|
1643
|
+
for iter_idx in range(loop.num_iterations - 1):
|
|
1644
|
+
if status:
|
|
1645
|
+
status.update(
|
|
1646
|
+
f"{status_prev}: iteration {iter_idx + 2}/{loop.num_iterations}."
|
|
1647
|
+
)
|
|
1648
|
+
new_wk_loop.add_iteration(cache=cache_, status=status)
|
|
1649
|
+
|
|
1650
|
+
def add_loop(self, loop: Loop) -> None:
|
|
1651
|
+
"""Add a loop to a subset of workflow tasks."""
|
|
1652
|
+
with self._store.cached_load(), self.batch_update():
|
|
1653
|
+
self._add_loop(loop)
|
|
1654
|
+
|
|
1655
|
+
@property
|
|
1656
|
+
def creation_info(self) -> CreationInfo:
|
|
1657
|
+
"""
|
|
1658
|
+
The creation descriptor for the workflow.
|
|
1659
|
+
"""
|
|
1660
|
+
if not self._creation_info:
|
|
1661
|
+
info = self._store.get_creation_info()
|
|
1662
|
+
# TODO: using `info.get` for backwards compatibility; can change with next
|
|
1663
|
+
# major release
|
|
1664
|
+
self._creation_info = {
|
|
1665
|
+
"app_info": info["app_info"],
|
|
1666
|
+
"create_time": parse_timestamp(info["create_time"], self.ts_fmt),
|
|
1667
|
+
"id": info["id"],
|
|
1668
|
+
"user_name": info.get("user_name"),
|
|
1669
|
+
"user_orcid": info.get("user_orcid"),
|
|
1670
|
+
"user_affiliations": info.get("user_affiliations"),
|
|
1671
|
+
}
|
|
1672
|
+
return self._creation_info
|
|
1673
|
+
|
|
1674
|
+
@property
|
|
1675
|
+
def id_(self) -> str:
|
|
1676
|
+
"""
|
|
1677
|
+
The ID of this workflow.
|
|
1678
|
+
"""
|
|
1679
|
+
return self.creation_info["id"]
|
|
1680
|
+
|
|
1681
|
+
@property
|
|
1682
|
+
def ts_fmt(self) -> str:
|
|
1683
|
+
"""
|
|
1684
|
+
The timestamp format.
|
|
1685
|
+
"""
|
|
1686
|
+
if not self._ts_fmt:
|
|
1687
|
+
self._ts_fmt = self._store.get_ts_fmt()
|
|
1688
|
+
return self._ts_fmt
|
|
1689
|
+
|
|
1690
|
+
@property
|
|
1691
|
+
def ts_name_fmt(self) -> str:
|
|
1692
|
+
"""
|
|
1693
|
+
The timestamp format for names.
|
|
1694
|
+
"""
|
|
1695
|
+
if not self._ts_name_fmt:
|
|
1696
|
+
self._ts_name_fmt = self._store.get_ts_name_fmt()
|
|
1697
|
+
return self._ts_name_fmt
|
|
1698
|
+
|
|
1699
|
+
@property
|
|
1700
|
+
def template_components(self) -> TemplateComponents:
|
|
1701
|
+
"""
|
|
1702
|
+
The template components used for this workflow.
|
|
1703
|
+
"""
|
|
1704
|
+
if self._template_components is None:
|
|
1705
|
+
with self._store.cached_load():
|
|
1706
|
+
tc_js = self._store.get_template_components()
|
|
1707
|
+
self._template_components = self._app.template_components_from_json_like(
|
|
1708
|
+
tc_js
|
|
1709
|
+
)
|
|
1710
|
+
return self._template_components
|
|
1711
|
+
|
|
1712
|
+
@property
|
|
1713
|
+
def __template_components(self) -> _TemplateComponents:
|
|
1714
|
+
return cast("_TemplateComponents", self.template_components)
|
|
1715
|
+
|
|
1716
|
+
@property
|
|
1717
|
+
def template(self) -> WorkflowTemplate:
|
|
1718
|
+
"""
|
|
1719
|
+
The template that this workflow was made from.
|
|
1720
|
+
"""
|
|
1721
|
+
if self._template is None:
|
|
1722
|
+
with self._store.cached_load():
|
|
1723
|
+
temp_js = self._store.get_template()
|
|
1724
|
+
|
|
1725
|
+
# TODO: insert_ID and id_ are the same thing:
|
|
1726
|
+
for task in cast("list[dict]", temp_js["tasks"]):
|
|
1727
|
+
task.pop("id_", None)
|
|
1728
|
+
|
|
1729
|
+
template = self._app.WorkflowTemplate.from_json_like(
|
|
1730
|
+
temp_js, cast("dict", self.template_components)
|
|
1731
|
+
)
|
|
1732
|
+
template.workflow = self
|
|
1733
|
+
self._template = template
|
|
1734
|
+
|
|
1735
|
+
return self._template
|
|
1736
|
+
|
|
1737
|
+
@property
|
|
1738
|
+
@TimeIt.decorator
|
|
1739
|
+
def tasks(self) -> WorkflowTaskList:
|
|
1740
|
+
"""
|
|
1741
|
+
The tasks in this workflow.
|
|
1742
|
+
"""
|
|
1743
|
+
if self._tasks is None:
|
|
1744
|
+
with self._store.cached_load():
|
|
1745
|
+
all_tasks: Iterable[StoreTask] = self._store.get_tasks()
|
|
1746
|
+
self._tasks = self._app.WorkflowTaskList(
|
|
1747
|
+
self._app.WorkflowTask(
|
|
1748
|
+
workflow=self,
|
|
1749
|
+
template=self.template.tasks[task.index],
|
|
1750
|
+
index=task.index,
|
|
1751
|
+
element_IDs=task.element_IDs,
|
|
1752
|
+
)
|
|
1753
|
+
for task in all_tasks
|
|
1754
|
+
)
|
|
1755
|
+
|
|
1756
|
+
return self._tasks
|
|
1757
|
+
|
|
1758
|
+
@property
|
|
1759
|
+
def loops(self) -> WorkflowLoopList:
|
|
1760
|
+
"""
|
|
1761
|
+
The loops in this workflow.
|
|
1762
|
+
"""
|
|
1763
|
+
|
|
1764
|
+
def repack_iteration_tuples(
|
|
1765
|
+
num_added_iterations: list[list[list[int] | int]],
|
|
1766
|
+
) -> Iterator[tuple[tuple[int, ...], int]]:
|
|
1767
|
+
"""
|
|
1768
|
+
Unpacks a very ugly type from the persistence layer, turning it into
|
|
1769
|
+
something we can process into a dict more easily. This in turn is caused
|
|
1770
|
+
by JSON and Zarr not really understanding tuples as such.
|
|
1771
|
+
"""
|
|
1772
|
+
for item in num_added_iterations:
|
|
1773
|
+
# Convert the outside to a tuple and narrow the inner types
|
|
1774
|
+
key_vec, count = item
|
|
1775
|
+
yield tuple(cast("list[int]", key_vec)), cast("int", count)
|
|
1776
|
+
|
|
1777
|
+
if self._loops is None:
|
|
1778
|
+
with self._store.cached_load():
|
|
1779
|
+
self._loops = self._app.WorkflowLoopList(
|
|
1780
|
+
self._app.WorkflowLoop(
|
|
1781
|
+
index=idx,
|
|
1782
|
+
workflow=self,
|
|
1783
|
+
template=self.template.loops[idx],
|
|
1784
|
+
parents=loop_dat["parents"],
|
|
1785
|
+
num_added_iterations=dict(
|
|
1786
|
+
repack_iteration_tuples(loop_dat["num_added_iterations"])
|
|
1787
|
+
),
|
|
1788
|
+
iterable_parameters=loop_dat["iterable_parameters"],
|
|
1789
|
+
output_parameters=loop_dat["output_parameters"],
|
|
1790
|
+
)
|
|
1791
|
+
for idx, loop_dat in self._store.get_loops().items()
|
|
1792
|
+
)
|
|
1793
|
+
return self._loops
|
|
1794
|
+
|
|
1795
|
+
@property
|
|
1796
|
+
@TimeIt.decorator
|
|
1797
|
+
def submissions(self) -> list[Submission]:
|
|
1798
|
+
"""
|
|
1799
|
+
The job submissions done by this workflow.
|
|
1800
|
+
"""
|
|
1801
|
+
if self._submissions is None:
|
|
1802
|
+
self._app.persistence_logger.debug("loading workflow submissions")
|
|
1803
|
+
with self._store.cached_load():
|
|
1804
|
+
subs: list[Submission] = []
|
|
1805
|
+
for idx, sub_dat in self._store.get_submissions().items():
|
|
1806
|
+
sub = self._app.Submission.from_json_like(
|
|
1807
|
+
{"index": idx, **cast("dict", sub_dat)}
|
|
1808
|
+
)
|
|
1809
|
+
sub.workflow = self
|
|
1810
|
+
subs.append(sub)
|
|
1811
|
+
self._submissions = subs
|
|
1812
|
+
return self._submissions
|
|
1813
|
+
|
|
1814
|
+
@property
|
|
1815
|
+
def num_added_tasks(self) -> int:
|
|
1816
|
+
"""
|
|
1817
|
+
The total number of added tasks.
|
|
1818
|
+
"""
|
|
1819
|
+
return self._store._get_num_total_added_tasks()
|
|
1820
|
+
|
|
1821
|
+
@TimeIt.decorator
|
|
1822
|
+
def get_store_EARs(self, id_lst: Iterable[int]) -> Sequence[StoreEAR]:
|
|
1823
|
+
"""
|
|
1824
|
+
Get the persistent element action runs.
|
|
1825
|
+
"""
|
|
1826
|
+
return self._store.get_EARs(id_lst)
|
|
1827
|
+
|
|
1828
|
+
@TimeIt.decorator
|
|
1829
|
+
def get_store_element_iterations(
|
|
1830
|
+
self, id_lst: Iterable[int]
|
|
1831
|
+
) -> Sequence[StoreElementIter]:
|
|
1832
|
+
"""
|
|
1833
|
+
Get the persistent element iterations.
|
|
1834
|
+
"""
|
|
1835
|
+
return self._store.get_element_iterations(id_lst)
|
|
1836
|
+
|
|
1837
|
+
@TimeIt.decorator
|
|
1838
|
+
def get_store_elements(self, id_lst: Iterable[int]) -> Sequence[StoreElement]:
|
|
1839
|
+
"""
|
|
1840
|
+
Get the persistent elements.
|
|
1841
|
+
"""
|
|
1842
|
+
return self._store.get_elements(id_lst)
|
|
1843
|
+
|
|
1844
|
+
@TimeIt.decorator
|
|
1845
|
+
def get_store_tasks(self, id_lst: Iterable[int]) -> Sequence[StoreTask]:
|
|
1846
|
+
"""
|
|
1847
|
+
Get the persistent tasks.
|
|
1848
|
+
"""
|
|
1849
|
+
return self._store.get_tasks_by_IDs(id_lst)
|
|
1850
|
+
|
|
1851
|
+
def get_element_iteration_IDs_from_EAR_IDs(self, id_lst: Iterable[int]) -> list[int]:
|
|
1852
|
+
"""
|
|
1853
|
+
Get the element iteration IDs of EARs.
|
|
1854
|
+
"""
|
|
1855
|
+
return [ear.elem_iter_ID for ear in self.get_store_EARs(id_lst)]
|
|
1856
|
+
|
|
1857
|
+
def get_element_IDs_from_EAR_IDs(self, id_lst: Iterable[int]) -> list[int]:
|
|
1858
|
+
"""
|
|
1859
|
+
Get the element IDs of EARs.
|
|
1860
|
+
"""
|
|
1861
|
+
iter_IDs = self.get_element_iteration_IDs_from_EAR_IDs(id_lst)
|
|
1862
|
+
return [itr.element_ID for itr in self.get_store_element_iterations(iter_IDs)]
|
|
1863
|
+
|
|
1864
|
+
def get_task_IDs_from_element_IDs(self, id_lst: Iterable[int]) -> list[int]:
|
|
1865
|
+
"""
|
|
1866
|
+
Get the task IDs of elements.
|
|
1867
|
+
"""
|
|
1868
|
+
return [elem.task_ID for elem in self.get_store_elements(id_lst)]
|
|
1869
|
+
|
|
1870
|
+
def get_EAR_IDs_of_tasks(self, id_lst: Iterable[int]) -> list[int]:
|
|
1871
|
+
"""Get EAR IDs belonging to multiple tasks."""
|
|
1872
|
+
return [ear.id_ for ear in self.get_EARs_of_tasks(id_lst)]
|
|
1873
|
+
|
|
1874
|
+
def get_EARs_of_tasks(self, id_lst: Iterable[int]) -> Iterator[ElementActionRun]:
|
|
1875
|
+
"""Get EARs belonging to multiple tasks."""
|
|
1876
|
+
for id_ in id_lst:
|
|
1877
|
+
for elem in self.tasks.get(insert_ID=id_).elements[:]:
|
|
1878
|
+
for iter_ in elem.iterations:
|
|
1879
|
+
yield from iter_.action_runs
|
|
1880
|
+
|
|
1881
|
+
def get_element_iterations_of_tasks(
|
|
1882
|
+
self, id_lst: Iterable[int]
|
|
1883
|
+
) -> Iterator[ElementIteration]:
|
|
1884
|
+
"""Get element iterations belonging to multiple tasks."""
|
|
1885
|
+
for id_ in id_lst:
|
|
1886
|
+
for elem in self.tasks.get(insert_ID=id_).elements[:]:
|
|
1887
|
+
yield from elem.iterations
|
|
1888
|
+
|
|
1889
|
+
@dataclass
|
|
1890
|
+
class _IndexPath1:
|
|
1891
|
+
elem: int
|
|
1892
|
+
task: int
|
|
1893
|
+
|
|
1894
|
+
@TimeIt.decorator
|
|
1895
|
+
def __get_elements_by_task_idx(
|
|
1896
|
+
self, element_idx_by_task: dict[int, set[int]]
|
|
1897
|
+
) -> dict[int, dict[int, Element]]:
|
|
1898
|
+
return {
|
|
1899
|
+
task_idx: {
|
|
1900
|
+
idx: element
|
|
1901
|
+
for idx, element in zip(
|
|
1902
|
+
elem_indices, self.tasks[task_idx].elements[list(elem_indices)]
|
|
1903
|
+
)
|
|
1904
|
+
}
|
|
1905
|
+
for task_idx, elem_indices in element_idx_by_task.items()
|
|
1906
|
+
}
|
|
1907
|
+
|
|
1908
|
+
@TimeIt.decorator
|
|
1909
|
+
def get_elements_from_IDs(self, id_lst: Iterable[int]) -> list[Element]:
|
|
1910
|
+
"""Return element objects from a list of IDs."""
|
|
1911
|
+
|
|
1912
|
+
store_elems = self.get_store_elements(id_lst)
|
|
1913
|
+
store_tasks = self.get_store_tasks(el.task_ID for el in store_elems)
|
|
1914
|
+
|
|
1915
|
+
element_idx_by_task: dict[int, set[int]] = defaultdict(set)
|
|
1916
|
+
index_paths: list[Workflow._IndexPath1] = []
|
|
1917
|
+
for elem, task in zip(store_elems, store_tasks):
|
|
1918
|
+
elem_idx = task.element_IDs.index(elem.id_)
|
|
1919
|
+
index_paths.append(Workflow._IndexPath1(elem_idx, task.index))
|
|
1920
|
+
element_idx_by_task[task.index].add(elem_idx)
|
|
1921
|
+
|
|
1922
|
+
elements_by_task = self.__get_elements_by_task_idx(element_idx_by_task)
|
|
1923
|
+
|
|
1924
|
+
return [elements_by_task[path.task][path.elem] for path in index_paths]
|
|
1925
|
+
|
|
1926
|
+
@dataclass
|
|
1927
|
+
class _IndexPath2:
|
|
1928
|
+
iter: int
|
|
1929
|
+
elem: int
|
|
1930
|
+
task: int
|
|
1931
|
+
|
|
1932
|
+
@TimeIt.decorator
|
|
1933
|
+
def get_element_iterations_from_IDs(
|
|
1934
|
+
self, id_lst: Iterable[int]
|
|
1935
|
+
) -> list[ElementIteration]:
|
|
1936
|
+
"""Return element iteration objects from a list of IDs."""
|
|
1937
|
+
|
|
1938
|
+
store_iters = self.get_store_element_iterations(id_lst)
|
|
1939
|
+
store_elems = self.get_store_elements(it.element_ID for it in store_iters)
|
|
1940
|
+
store_tasks = self.get_store_tasks(el.task_ID for el in store_elems)
|
|
1941
|
+
|
|
1942
|
+
element_idx_by_task: dict[int, set[int]] = defaultdict(set)
|
|
1943
|
+
|
|
1944
|
+
index_paths: list[Workflow._IndexPath2] = []
|
|
1945
|
+
for itr, elem, task in zip(store_iters, store_elems, store_tasks):
|
|
1946
|
+
iter_idx = elem.iteration_IDs.index(itr.id_)
|
|
1947
|
+
elem_idx = task.element_IDs.index(elem.id_)
|
|
1948
|
+
index_paths.append(Workflow._IndexPath2(iter_idx, elem_idx, task.index))
|
|
1949
|
+
element_idx_by_task[task.index].add(elem_idx)
|
|
1950
|
+
|
|
1951
|
+
elements_by_task = self.__get_elements_by_task_idx(element_idx_by_task)
|
|
1952
|
+
|
|
1953
|
+
return [
|
|
1954
|
+
elements_by_task[path.task][path.elem].iterations[path.iter]
|
|
1955
|
+
for path in index_paths
|
|
1956
|
+
]
|
|
1957
|
+
|
|
1958
|
+
@dataclass
|
|
1959
|
+
class _IndexPath3:
|
|
1960
|
+
run: int
|
|
1961
|
+
act: int
|
|
1962
|
+
iter: int
|
|
1963
|
+
elem: int
|
|
1964
|
+
task: int
|
|
1965
|
+
|
|
1966
|
+
@overload
|
|
1967
|
+
def get_EARs_from_IDs(self, ids: Iterable[int]) -> list[ElementActionRun]: ...
|
|
1968
|
+
|
|
1969
|
+
@overload
|
|
1970
|
+
def get_EARs_from_IDs(self, ids: int) -> ElementActionRun: ...
|
|
1971
|
+
|
|
1972
|
+
@TimeIt.decorator
|
|
1973
|
+
def get_EARs_from_IDs(
|
|
1974
|
+
self, ids: Iterable[int] | int, as_dict: bool = False
|
|
1975
|
+
) -> list[ElementActionRun] | dict[int, ElementActionRun] | ElementActionRun:
|
|
1976
|
+
"""Get element action run objects from a list of IDs."""
|
|
1977
|
+
id_lst = [ids] if isinstance(ids, int) else list(ids)
|
|
1978
|
+
|
|
1979
|
+
with self._store.cached_load(), self._store.cache_ctx():
|
|
1980
|
+
|
|
1981
|
+
self._app.persistence_logger.debug(
|
|
1982
|
+
f"get_EARs_from_IDs: {len(id_lst)} EARs: {shorten_list_str(id_lst)}."
|
|
1983
|
+
)
|
|
1984
|
+
|
|
1985
|
+
store_EARs = self.get_store_EARs(id_lst)
|
|
1986
|
+
store_iters = self.get_store_element_iterations(
|
|
1987
|
+
ear.elem_iter_ID for ear in store_EARs
|
|
1988
|
+
)
|
|
1989
|
+
store_elems = self.get_store_elements(it.element_ID for it in store_iters)
|
|
1990
|
+
store_tasks = self.get_store_tasks(el.task_ID for el in store_elems)
|
|
1991
|
+
|
|
1992
|
+
# to allow for bulk retrieval of elements/iterations
|
|
1993
|
+
element_idx_by_task: dict[int, set[int]] = defaultdict(set)
|
|
1994
|
+
iter_idx_by_task_elem: dict[int, dict[int, set[int]]] = defaultdict(
|
|
1995
|
+
lambda: defaultdict(set)
|
|
1996
|
+
)
|
|
1997
|
+
|
|
1998
|
+
index_paths: list[Workflow._IndexPath3] = []
|
|
1999
|
+
for rn, it, el, tk in zip(store_EARs, store_iters, store_elems, store_tasks):
|
|
2000
|
+
act_idx = rn.action_idx
|
|
2001
|
+
run_idx = (
|
|
2002
|
+
it.EAR_IDs[act_idx].index(rn.id_) if it.EAR_IDs is not None else -1
|
|
2003
|
+
)
|
|
2004
|
+
iter_idx = el.iteration_IDs.index(it.id_)
|
|
2005
|
+
elem_idx = tk.element_IDs.index(el.id_)
|
|
2006
|
+
index_paths.append(
|
|
2007
|
+
Workflow._IndexPath3(run_idx, act_idx, iter_idx, elem_idx, tk.index)
|
|
2008
|
+
)
|
|
2009
|
+
element_idx_by_task[tk.index].add(elem_idx)
|
|
2010
|
+
iter_idx_by_task_elem[tk.index][elem_idx].add(iter_idx)
|
|
2011
|
+
|
|
2012
|
+
# retrieve elements/iterations:
|
|
2013
|
+
iters = {
|
|
2014
|
+
task_idx: {
|
|
2015
|
+
elem_i.index: {
|
|
2016
|
+
iter_idx: elem_i.iterations[iter_idx]
|
|
2017
|
+
for iter_idx in iter_idx_by_task_elem[task_idx][elem_i.index]
|
|
2018
|
+
}
|
|
2019
|
+
for elem_i in self.tasks[task_idx].elements[list(elem_idxes)]
|
|
2020
|
+
}
|
|
2021
|
+
for task_idx, elem_idxes in element_idx_by_task.items()
|
|
2022
|
+
}
|
|
2023
|
+
|
|
2024
|
+
result = {}
|
|
2025
|
+
for path in index_paths:
|
|
2026
|
+
run = (
|
|
2027
|
+
iters[path.task][path.elem][path.iter]
|
|
2028
|
+
.actions[path.act]
|
|
2029
|
+
.runs[path.run]
|
|
2030
|
+
)
|
|
2031
|
+
result[run.id_] = run
|
|
2032
|
+
|
|
2033
|
+
if not as_dict:
|
|
2034
|
+
res_lst = list(result.values())
|
|
2035
|
+
return res_lst[0] if isinstance(ids, int) else res_lst
|
|
2036
|
+
|
|
2037
|
+
return result
|
|
2038
|
+
|
|
2039
|
+
@TimeIt.decorator
|
|
2040
|
+
def get_all_elements(self) -> list[Element]:
|
|
2041
|
+
"""
|
|
2042
|
+
Get all elements in the workflow.
|
|
2043
|
+
"""
|
|
2044
|
+
return self.get_elements_from_IDs(range(self.num_elements))
|
|
2045
|
+
|
|
2046
|
+
@TimeIt.decorator
|
|
2047
|
+
def get_all_element_iterations(self) -> list[ElementIteration]:
|
|
2048
|
+
"""
|
|
2049
|
+
Get all iterations in the workflow.
|
|
2050
|
+
"""
|
|
2051
|
+
return self.get_element_iterations_from_IDs(range(self.num_element_iterations))
|
|
2052
|
+
|
|
2053
|
+
@TimeIt.decorator
|
|
2054
|
+
def get_all_EARs(self) -> list[ElementActionRun]:
|
|
2055
|
+
"""
|
|
2056
|
+
Get all runs in the workflow.
|
|
2057
|
+
"""
|
|
2058
|
+
return self.get_EARs_from_IDs(range(self.num_EARs))
|
|
2059
|
+
|
|
2060
|
+
@contextmanager
|
|
2061
|
+
def batch_update(self, is_workflow_creation: bool = False) -> Iterator[None]:
|
|
2062
|
+
"""A context manager that batches up structural changes to the workflow and
|
|
2063
|
+
commits them to disk all together when the context manager exits."""
|
|
2064
|
+
|
|
2065
|
+
if self._in_batch_mode:
|
|
2066
|
+
yield
|
|
2067
|
+
else:
|
|
2068
|
+
try:
|
|
2069
|
+
self._app.persistence_logger.info(
|
|
2070
|
+
f"entering batch update (is_workflow_creation={is_workflow_creation!r})"
|
|
2071
|
+
)
|
|
2072
|
+
self._in_batch_mode = True
|
|
2073
|
+
yield
|
|
2074
|
+
|
|
2075
|
+
except Exception:
|
|
2076
|
+
self._app.persistence_logger.error("batch update exception!")
|
|
2077
|
+
self._in_batch_mode = False
|
|
2078
|
+
self._store._pending.reset()
|
|
2079
|
+
|
|
2080
|
+
for task in self.tasks:
|
|
2081
|
+
task._reset_pending_element_IDs()
|
|
2082
|
+
task.template._reset_pending_element_sets()
|
|
2083
|
+
|
|
2084
|
+
for loop in self.loops:
|
|
2085
|
+
loop._reset_pending_num_added_iters()
|
|
2086
|
+
loop._reset_pending_parents()
|
|
2087
|
+
|
|
2088
|
+
self._reject_pending()
|
|
2089
|
+
|
|
2090
|
+
if is_workflow_creation:
|
|
2091
|
+
# creation failed, so no need to keep the newly generated workflow:
|
|
2092
|
+
self._store.delete_no_confirm()
|
|
2093
|
+
self._store.reinstate_replaced_dir()
|
|
2094
|
+
|
|
2095
|
+
raise
|
|
2096
|
+
|
|
2097
|
+
else:
|
|
2098
|
+
if self._store._pending:
|
|
2099
|
+
# is_diff = self._store.is_modified_on_disk()
|
|
2100
|
+
# if is_diff:
|
|
2101
|
+
# raise WorkflowBatchUpdateFailedError(
|
|
2102
|
+
# f"Workflow modified on disk since it was loaded!"
|
|
2103
|
+
# )
|
|
2104
|
+
|
|
2105
|
+
for task in self.tasks:
|
|
2106
|
+
task._accept_pending_element_IDs()
|
|
2107
|
+
task.template._accept_pending_element_sets()
|
|
2108
|
+
|
|
2109
|
+
for loop in self.loops:
|
|
2110
|
+
loop._accept_pending_num_added_iters()
|
|
2111
|
+
loop._accept_pending_parents()
|
|
2112
|
+
|
|
2113
|
+
# TODO: handle errors in commit pending?
|
|
2114
|
+
self._store._pending.commit_all()
|
|
2115
|
+
self._accept_pending()
|
|
2116
|
+
|
|
2117
|
+
if is_workflow_creation:
|
|
2118
|
+
self._store.remove_replaced_dir()
|
|
2119
|
+
|
|
2120
|
+
self._app.persistence_logger.info("exiting batch update")
|
|
2121
|
+
self._in_batch_mode = False
|
|
2122
|
+
|
|
2123
|
+
@contextmanager
|
|
2124
|
+
def cached_merged_parameters(self):
|
|
2125
|
+
if self._use_merged_parameters_cache:
|
|
2126
|
+
yield
|
|
2127
|
+
else:
|
|
2128
|
+
try:
|
|
2129
|
+
self._app.logger.debug("entering merged-parameters cache.")
|
|
2130
|
+
self._use_merged_parameters_cache = True
|
|
2131
|
+
yield
|
|
2132
|
+
finally:
|
|
2133
|
+
self._app.logger.debug("exiting merged-parameters cache.")
|
|
2134
|
+
self._use_merged_parameters_cache = False
|
|
2135
|
+
self._merged_parameters_cache = {} # reset the cache
|
|
2136
|
+
|
|
2137
|
+
@classmethod
|
|
2138
|
+
def temporary_rename(cls, path: str, fs: AbstractFileSystem) -> str:
|
|
2139
|
+
"""Rename an existing same-path workflow (directory) so we can restore it if
|
|
2140
|
+
workflow creation fails.
|
|
2141
|
+
|
|
2142
|
+
Renaming will occur until the successfully completed. This means multiple new
|
|
2143
|
+
paths may be created, where only the final path should be considered the
|
|
2144
|
+
successfully renamed workflow. Other paths will be deleted."""
|
|
2145
|
+
|
|
2146
|
+
all_replaced: list[str] = []
|
|
2147
|
+
|
|
2148
|
+
@cls._app.perm_error_retry()
|
|
2149
|
+
def _temp_rename(path: str, fs: AbstractFileSystem) -> str:
|
|
2150
|
+
temp_ext = "".join(random.choices(string.ascii_letters, k=10))
|
|
2151
|
+
replaced = str(Path(f"{path}.{temp_ext}").as_posix())
|
|
2152
|
+
cls._app.persistence_logger.debug(
|
|
2153
|
+
f"temporary_rename: _temp_rename: {path!r} --> {replaced!r}."
|
|
2154
|
+
)
|
|
2155
|
+
all_replaced.append(replaced)
|
|
2156
|
+
try:
|
|
2157
|
+
fs.rename(path, replaced, recursive=True)
|
|
2158
|
+
except TypeError:
|
|
2159
|
+
# `SFTPFileSystem.rename` has no `recursive` argument:
|
|
2160
|
+
fs.rename(path, replaced)
|
|
2161
|
+
return replaced
|
|
2162
|
+
|
|
2163
|
+
@cls._app.perm_error_retry()
|
|
2164
|
+
def _remove_path(path: str, fs: AbstractFileSystem) -> None:
|
|
2165
|
+
cls._app.persistence_logger.debug(
|
|
2166
|
+
f"temporary_rename: _remove_path: {path!r}."
|
|
2167
|
+
)
|
|
2168
|
+
while fs.exists(path):
|
|
2169
|
+
fs.rm(path, recursive=True)
|
|
2170
|
+
time.sleep(0.5)
|
|
2171
|
+
|
|
2172
|
+
_temp_rename(path, fs)
|
|
2173
|
+
|
|
2174
|
+
for path in all_replaced[:-1]:
|
|
2175
|
+
_remove_path(path, fs)
|
|
2176
|
+
|
|
2177
|
+
return all_replaced[-1]
|
|
2178
|
+
|
|
2179
|
+
@classmethod
|
|
2180
|
+
@TimeIt.decorator
|
|
2181
|
+
def _write_empty_workflow(
|
|
2182
|
+
cls,
|
|
2183
|
+
template: WorkflowTemplate,
|
|
2184
|
+
*,
|
|
2185
|
+
path: PathLike | None = None,
|
|
2186
|
+
name: str | None = None,
|
|
2187
|
+
name_add_timestamp: bool | None = None,
|
|
2188
|
+
name_use_dir: bool | None = None,
|
|
2189
|
+
overwrite: bool | None = False,
|
|
2190
|
+
store: str = DEFAULT_STORE_FORMAT,
|
|
2191
|
+
ts_fmt: str | None = None,
|
|
2192
|
+
ts_name_fmt: str | None = None,
|
|
2193
|
+
fs_kwargs: dict[str, Any] | None = None,
|
|
2194
|
+
store_kwargs: dict[str, Any] | None = None,
|
|
2195
|
+
) -> Workflow:
|
|
2196
|
+
"""
|
|
2197
|
+
Parameters
|
|
2198
|
+
----------
|
|
2199
|
+
template
|
|
2200
|
+
The workflow description to instantiate.
|
|
2201
|
+
path
|
|
2202
|
+
The directory in which the workflow will be generated. If not specified, the
|
|
2203
|
+
config item `default_workflow_path` will be used; if that is not set, the
|
|
2204
|
+
current directory is used.
|
|
2205
|
+
name
|
|
2206
|
+
The name to use for the workflow. If not provided, the name will be set to
|
|
2207
|
+
that of the template (optionally suffixed by a date-timestamp if
|
|
2208
|
+
`name_add_timestamp` is True).
|
|
2209
|
+
name_add_timestamp
|
|
2210
|
+
If True, suffix the name with a date-timestamp. A default value can be set
|
|
2211
|
+
with the config item `workflow_name_add_timestamp`; otherwise set to `True`.
|
|
2212
|
+
name_use_dir
|
|
2213
|
+
If True, and `name_add_timestamp` is also True, the workflow directory name
|
|
2214
|
+
will be just the date-timestamp, and will be contained within a parent
|
|
2215
|
+
directory corresponding to the workflow name. A default value can be set
|
|
2216
|
+
with the config item `workflow_name_use_dir`; otherwise set to `False`.
|
|
2217
|
+
"""
|
|
2218
|
+
|
|
2219
|
+
if name_use_dir is None:
|
|
2220
|
+
# use value from the config if available
|
|
2221
|
+
if (cfg_use_dir := cls._app.config.workflow_name_use_dir) is not None:
|
|
2222
|
+
name_use_dir = cfg_use_dir
|
|
2223
|
+
else:
|
|
2224
|
+
name_use_dir = False
|
|
2225
|
+
|
|
2226
|
+
if name_add_timestamp is None:
|
|
2227
|
+
# use value from the config if available
|
|
2228
|
+
if (cfg_add_ts := cls._app.config.workflow_name_add_timestamp) is not None:
|
|
2229
|
+
name_add_timestamp = cfg_add_ts
|
|
2230
|
+
else:
|
|
2231
|
+
name_add_timestamp = True
|
|
2232
|
+
|
|
2233
|
+
# store all times in UTC, since NumPy doesn't support time zone info:
|
|
2234
|
+
ts_utc = current_timestamp()
|
|
2235
|
+
ts = normalise_timestamp(ts_utc)
|
|
2236
|
+
|
|
2237
|
+
ts_name_fmt = ts_name_fmt or cls._default_ts_name_fmt
|
|
2238
|
+
ts_fmt = ts_fmt or cls._default_ts_fmt
|
|
2239
|
+
|
|
2240
|
+
parent_dir = Path(path or cls._app.config.default_workflow_path or ".")
|
|
2241
|
+
|
|
2242
|
+
wk_name = name or template.name
|
|
2243
|
+
wk_dir_name = wk_name
|
|
2244
|
+
if name_add_timestamp:
|
|
2245
|
+
timestamp = ts.strftime(ts_name_fmt)
|
|
2246
|
+
if name_use_dir:
|
|
2247
|
+
wk_dir_name = timestamp
|
|
2248
|
+
parent_dir = parent_dir.joinpath(wk_name)
|
|
2249
|
+
else:
|
|
2250
|
+
wk_dir_name += f"_{timestamp}"
|
|
2251
|
+
wk_name += f"_{timestamp}"
|
|
2252
|
+
|
|
2253
|
+
fs_kwargs = fs_kwargs or {}
|
|
2254
|
+
fs, _, pw = resolve_fsspec(parent_dir, **fs_kwargs)
|
|
2255
|
+
wk_path = str(parent_dir.joinpath(wk_dir_name))
|
|
2256
|
+
|
|
2257
|
+
replaced_wk = None
|
|
2258
|
+
if fs.exists(wk_path):
|
|
2259
|
+
cls._app.logger.debug("workflow path exists")
|
|
2260
|
+
if overwrite:
|
|
2261
|
+
cls._app.logger.debug("renaming existing workflow path")
|
|
2262
|
+
replaced_wk = cls.temporary_rename(wk_path, fs)
|
|
2263
|
+
else:
|
|
2264
|
+
raise ValueError(
|
|
2265
|
+
f"Path already exists: {wk_path} on file system " f"{fs!r}."
|
|
2266
|
+
)
|
|
2267
|
+
|
|
2268
|
+
class PersistenceGrabber:
|
|
2269
|
+
"""An object to pass to ResourceSpec.make_persistent that pretends to be a
|
|
2270
|
+
Workflow object, so we can pretend to make template-level inputs/resources
|
|
2271
|
+
persistent before the workflow exists."""
|
|
2272
|
+
|
|
2273
|
+
def __init__(self) -> None:
|
|
2274
|
+
self.__ps: list[tuple[Any, ParamSource]] = []
|
|
2275
|
+
|
|
2276
|
+
def _add_parameter_data(self, data: Any, source: ParamSource) -> int:
|
|
2277
|
+
ref = len(self.__ps)
|
|
2278
|
+
self.__ps.append((data, source))
|
|
2279
|
+
return ref
|
|
2280
|
+
|
|
2281
|
+
def get_parameter_data(self, data_idx: int) -> Any:
|
|
2282
|
+
return self.__ps[data_idx - 1][0]
|
|
2283
|
+
|
|
2284
|
+
def check_parameters_exist(self, id_lst: int | list[int]) -> bool:
|
|
2285
|
+
r = range(len(self.__ps))
|
|
2286
|
+
if isinstance(id_lst, int):
|
|
2287
|
+
return id_lst in r
|
|
2288
|
+
else:
|
|
2289
|
+
return all(id_ in r for id_ in id_lst)
|
|
2290
|
+
|
|
2291
|
+
def write_persistence_data_to_workflow(self, workflow: Workflow) -> None:
|
|
2292
|
+
for dat_i, source_i in self.__ps:
|
|
2293
|
+
workflow._add_parameter_data(dat_i, source_i)
|
|
2294
|
+
|
|
2295
|
+
# make template-level inputs/resources think they are persistent:
|
|
2296
|
+
grabber = PersistenceGrabber()
|
|
2297
|
+
param_src: ParamSource = {"type": "workflow_resources"}
|
|
2298
|
+
for res_i_copy in template._get_resources_copy():
|
|
2299
|
+
res_i_copy.make_persistent(grabber, param_src)
|
|
2300
|
+
|
|
2301
|
+
template_js_, template_sh = template.to_json_like(exclude={"tasks", "loops"})
|
|
2302
|
+
template_js: TemplateMeta = {
|
|
2303
|
+
**cast("TemplateMeta", template_js_), # Trust me, bro!
|
|
2304
|
+
"tasks": [],
|
|
2305
|
+
"loops": [],
|
|
2306
|
+
}
|
|
2307
|
+
|
|
2308
|
+
store_kwargs = store_kwargs if store_kwargs else template.store_kwargs
|
|
2309
|
+
store_cls = store_cls_from_str(store)
|
|
2310
|
+
store_cls.write_empty_workflow(
|
|
2311
|
+
app=cls._app,
|
|
2312
|
+
template_js=template_js,
|
|
2313
|
+
template_components_js=template_sh or {},
|
|
2314
|
+
wk_path=wk_path,
|
|
2315
|
+
fs=fs,
|
|
2316
|
+
name=wk_name,
|
|
2317
|
+
replaced_wk=replaced_wk,
|
|
2318
|
+
creation_info={
|
|
2319
|
+
"app_info": cls._app.get_info(),
|
|
2320
|
+
"create_time": ts_utc.strftime(ts_fmt),
|
|
2321
|
+
"id": str(uuid4()),
|
|
2322
|
+
"user_name": cls._app.config.user_name,
|
|
2323
|
+
"user_orcid": cls._app.config.user_orcid,
|
|
2324
|
+
"user_affiliations": cls._app.config.user_affiliations,
|
|
2325
|
+
},
|
|
2326
|
+
ts_fmt=ts_fmt,
|
|
2327
|
+
ts_name_fmt=ts_name_fmt,
|
|
2328
|
+
**store_kwargs,
|
|
2329
|
+
)
|
|
2330
|
+
|
|
2331
|
+
fs_kwargs = {"password": pw, **fs_kwargs}
|
|
2332
|
+
wk = cls(wk_path, store_fmt=store, fs_kwargs=fs_kwargs)
|
|
2333
|
+
|
|
2334
|
+
# actually make template inputs/resources persistent, now the workflow exists:
|
|
2335
|
+
grabber.write_persistence_data_to_workflow(wk)
|
|
2336
|
+
|
|
2337
|
+
if template.source_file:
|
|
2338
|
+
wk.artifacts_path.mkdir(exist_ok=False)
|
|
2339
|
+
src = Path(template.source_file)
|
|
2340
|
+
shutil.copy(src, wk.artifacts_path.joinpath(src.name))
|
|
2341
|
+
|
|
2342
|
+
return wk
|
|
2343
|
+
|
|
2344
|
+
def zip(
|
|
2345
|
+
self,
|
|
2346
|
+
path: str = ".",
|
|
2347
|
+
*,
|
|
2348
|
+
log: str | None = None,
|
|
2349
|
+
overwrite: bool = False,
|
|
2350
|
+
include_execute: bool = False,
|
|
2351
|
+
include_rechunk_backups: bool = False,
|
|
2352
|
+
) -> str:
|
|
2353
|
+
"""
|
|
2354
|
+
Convert the workflow to a zipped form.
|
|
2355
|
+
|
|
2356
|
+
Parameters
|
|
2357
|
+
----------
|
|
2358
|
+
path:
|
|
2359
|
+
Path at which to create the new zipped workflow. If this is an existing
|
|
2360
|
+
directory, the zip file will be created within this directory. Otherwise,
|
|
2361
|
+
this path is assumed to be the full file path to the new zip file.
|
|
2362
|
+
"""
|
|
2363
|
+
return self._store.zip(
|
|
2364
|
+
path=path,
|
|
2365
|
+
log=log,
|
|
2366
|
+
overwrite=overwrite,
|
|
2367
|
+
include_execute=include_execute,
|
|
2368
|
+
include_rechunk_backups=include_rechunk_backups,
|
|
2369
|
+
)
|
|
2370
|
+
|
|
2371
|
+
def unzip(self, path: str = ".", *, log: str | None = None) -> str:
|
|
2372
|
+
"""
|
|
2373
|
+
Convert the workflow to an unzipped form.
|
|
2374
|
+
|
|
2375
|
+
Parameters
|
|
2376
|
+
----------
|
|
2377
|
+
path:
|
|
2378
|
+
Path at which to create the new unzipped workflow. If this is an existing
|
|
2379
|
+
directory, the new workflow directory will be created within this directory.
|
|
2380
|
+
Otherwise, this path will represent the new workflow directory path.
|
|
2381
|
+
"""
|
|
2382
|
+
return self._store.unzip(path=path, log=log)
|
|
2383
|
+
|
|
2384
|
+
def copy(self, path: str | Path = ".") -> Path:
|
|
2385
|
+
"""Copy the workflow to a new path and return the copied workflow path."""
|
|
2386
|
+
return self._store.copy(path)
|
|
2387
|
+
|
|
2388
|
+
def delete(self) -> None:
|
|
2389
|
+
"""
|
|
2390
|
+
Delete the persistent data.
|
|
2391
|
+
"""
|
|
2392
|
+
self._store.delete()
|
|
2393
|
+
|
|
2394
|
+
def _delete_no_confirm(self) -> None:
|
|
2395
|
+
self._store.delete_no_confirm()
|
|
2396
|
+
|
|
2397
|
+
def get_parameters(self, id_lst: Iterable[int], **kwargs) -> Sequence[StoreParameter]:
|
|
2398
|
+
"""
|
|
2399
|
+
Get parameters known to the workflow.
|
|
2400
|
+
|
|
2401
|
+
Parameter
|
|
2402
|
+
---------
|
|
2403
|
+
id_lst:
|
|
2404
|
+
The indices of the parameters to retrieve.
|
|
2405
|
+
|
|
2406
|
+
Keyword Arguments
|
|
2407
|
+
-----------------
|
|
2408
|
+
dataset_copy: bool
|
|
2409
|
+
For Zarr stores only. If True, copy arrays as NumPy arrays.
|
|
2410
|
+
"""
|
|
2411
|
+
return self._store.get_parameters(id_lst, **kwargs)
|
|
2412
|
+
|
|
2413
|
+
@TimeIt.decorator
|
|
2414
|
+
def get_parameter_sources(self, id_lst: Iterable[int]) -> list[ParamSource]:
|
|
2415
|
+
"""
|
|
2416
|
+
Get parameter sources known to the workflow.
|
|
2417
|
+
"""
|
|
2418
|
+
return self._store.get_parameter_sources(id_lst)
|
|
2419
|
+
|
|
2420
|
+
@TimeIt.decorator
|
|
2421
|
+
def get_parameter_set_statuses(self, id_lst: Iterable[int]) -> list[bool]:
|
|
2422
|
+
"""
|
|
2423
|
+
Get whether some parameters are set.
|
|
2424
|
+
"""
|
|
2425
|
+
return self._store.get_parameter_set_statuses(id_lst)
|
|
2426
|
+
|
|
2427
|
+
@TimeIt.decorator
|
|
2428
|
+
def get_parameter(self, index: int, **kwargs) -> StoreParameter:
|
|
2429
|
+
"""
|
|
2430
|
+
Get a single parameter.
|
|
2431
|
+
|
|
2432
|
+
Parameter
|
|
2433
|
+
---------
|
|
2434
|
+
index:
|
|
2435
|
+
The index of the parameter to retrieve.
|
|
2436
|
+
|
|
2437
|
+
Keyword Arguments
|
|
2438
|
+
-----------------
|
|
2439
|
+
dataset_copy: bool
|
|
2440
|
+
For Zarr stores only. If True, copy arrays as NumPy arrays.
|
|
2441
|
+
"""
|
|
2442
|
+
return self.get_parameters((index,), **kwargs)[0]
|
|
2443
|
+
|
|
2444
|
+
@TimeIt.decorator
|
|
2445
|
+
def get_parameter_data(self, index: int, **kwargs) -> Any:
|
|
2446
|
+
"""
|
|
2447
|
+
Get the data relating to a parameter.
|
|
2448
|
+
"""
|
|
2449
|
+
param = self.get_parameter(index, **kwargs)
|
|
2450
|
+
if param.data is not None:
|
|
2451
|
+
return param.data
|
|
2452
|
+
else:
|
|
2453
|
+
return param.file
|
|
2454
|
+
|
|
2455
|
+
@TimeIt.decorator
|
|
2456
|
+
def get_parameter_source(self, index: int) -> ParamSource:
|
|
2457
|
+
"""
|
|
2458
|
+
Get the source of a particular parameter.
|
|
2459
|
+
"""
|
|
2460
|
+
return self.get_parameter_sources((index,))[0]
|
|
2461
|
+
|
|
2462
|
+
@TimeIt.decorator
|
|
2463
|
+
def is_parameter_set(self, index: int) -> bool:
|
|
2464
|
+
"""
|
|
2465
|
+
Test if a particular parameter is set.
|
|
2466
|
+
"""
|
|
2467
|
+
return self.get_parameter_set_statuses((index,))[0]
|
|
2468
|
+
|
|
2469
|
+
@TimeIt.decorator
|
|
2470
|
+
def get_all_parameters(self, **kwargs) -> list[StoreParameter]:
|
|
2471
|
+
"""
|
|
2472
|
+
Retrieve all persistent parameters.
|
|
2473
|
+
|
|
2474
|
+
Keyword Arguments
|
|
2475
|
+
-----------------
|
|
2476
|
+
dataset_copy: bool
|
|
2477
|
+
For Zarr stores only. If True, copy arrays as NumPy arrays.
|
|
2478
|
+
"""
|
|
2479
|
+
num_params = self._store._get_num_total_parameters()
|
|
2480
|
+
return self._store.get_parameters(range(num_params), **kwargs)
|
|
2481
|
+
|
|
2482
|
+
@TimeIt.decorator
|
|
2483
|
+
def get_all_parameter_sources(self, **kwargs) -> list[ParamSource]:
|
|
2484
|
+
"""Retrieve all persistent parameters sources."""
|
|
2485
|
+
num_params = self._store._get_num_total_parameters()
|
|
2486
|
+
return self._store.get_parameter_sources(range(num_params), **kwargs)
|
|
2487
|
+
|
|
2488
|
+
@TimeIt.decorator
|
|
2489
|
+
def get_all_parameter_data(self, **kwargs) -> dict[int, Any]:
|
|
2490
|
+
"""
|
|
2491
|
+
Retrieve all workflow parameter data.
|
|
2492
|
+
|
|
2493
|
+
Keyword Arguments
|
|
2494
|
+
-----------------
|
|
2495
|
+
dataset_copy: bool
|
|
2496
|
+
For Zarr stores only. If True, copy arrays as NumPy arrays.
|
|
2497
|
+
"""
|
|
2498
|
+
return {
|
|
2499
|
+
param.id_: (param.data if param.data is not None else param.file)
|
|
2500
|
+
for param in self.get_all_parameters(**kwargs)
|
|
2501
|
+
}
|
|
2502
|
+
|
|
2503
|
+
def check_parameters_exist(self, id_lst: int | list[int]) -> bool:
|
|
2504
|
+
"""
|
|
2505
|
+
Check if all the parameters exist.
|
|
2506
|
+
"""
|
|
2507
|
+
if isinstance(id_lst, int):
|
|
2508
|
+
return next(iter(self._store.check_parameters_exist((id_lst,))))
|
|
2509
|
+
return all(self._store.check_parameters_exist(id_lst))
|
|
2510
|
+
|
|
2511
|
+
@TimeIt.decorator
|
|
2512
|
+
def _add_unset_parameter_data(self, source: ParamSource) -> int:
|
|
2513
|
+
# TODO: use this for unset files as well
|
|
2514
|
+
return self._store.add_unset_parameter(source)
|
|
2515
|
+
|
|
2516
|
+
def _add_parameter_data(self, data, source: ParamSource) -> int:
|
|
2517
|
+
return self._store.add_set_parameter(data, source)
|
|
2518
|
+
|
|
2519
|
+
def _add_file(
|
|
2520
|
+
self,
|
|
2521
|
+
*,
|
|
2522
|
+
store_contents: bool,
|
|
2523
|
+
is_input: bool,
|
|
2524
|
+
source: ParamSource,
|
|
2525
|
+
path=None,
|
|
2526
|
+
contents=None,
|
|
2527
|
+
filename: str,
|
|
2528
|
+
) -> int:
|
|
2529
|
+
return self._store.add_file(
|
|
2530
|
+
store_contents=store_contents,
|
|
2531
|
+
is_input=is_input,
|
|
2532
|
+
source=source,
|
|
2533
|
+
path=path,
|
|
2534
|
+
contents=contents,
|
|
2535
|
+
filename=filename,
|
|
2536
|
+
)
|
|
2537
|
+
|
|
2538
|
+
def _set_file(
|
|
2539
|
+
self,
|
|
2540
|
+
param_id: int | list[int] | None,
|
|
2541
|
+
store_contents: bool,
|
|
2542
|
+
is_input: bool,
|
|
2543
|
+
path: Path | str,
|
|
2544
|
+
contents=None,
|
|
2545
|
+
filename: str | None = None,
|
|
2546
|
+
clean_up: bool = False,
|
|
2547
|
+
) -> None:
|
|
2548
|
+
self._store.set_file(
|
|
2549
|
+
param_id=cast("int", param_id),
|
|
2550
|
+
store_contents=store_contents,
|
|
2551
|
+
is_input=is_input,
|
|
2552
|
+
path=path,
|
|
2553
|
+
contents=contents,
|
|
2554
|
+
filename=filename,
|
|
2555
|
+
clean_up=clean_up,
|
|
2556
|
+
)
|
|
2557
|
+
|
|
2558
|
+
@overload
|
|
2559
|
+
def get_task_unique_names(
|
|
2560
|
+
self, map_to_insert_ID: Literal[False] = False
|
|
2561
|
+
) -> Sequence[str]: ...
|
|
2562
|
+
|
|
2563
|
+
@overload
|
|
2564
|
+
def get_task_unique_names(
|
|
2565
|
+
self, map_to_insert_ID: Literal[True]
|
|
2566
|
+
) -> Mapping[str, int]: ...
|
|
2567
|
+
|
|
2568
|
+
def get_task_unique_names(
|
|
2569
|
+
self, map_to_insert_ID: bool = False
|
|
2570
|
+
) -> Sequence[str] | Mapping[str, int]:
|
|
2571
|
+
"""Return the unique names of all workflow tasks.
|
|
2572
|
+
|
|
2573
|
+
Parameters
|
|
2574
|
+
----------
|
|
2575
|
+
map_to_insert_ID : bool
|
|
2576
|
+
If True, return a dict whose values are task insert IDs, otherwise return a
|
|
2577
|
+
list.
|
|
2578
|
+
|
|
2579
|
+
"""
|
|
2580
|
+
names = self._app.Task.get_task_unique_names(self.template.tasks)
|
|
2581
|
+
if map_to_insert_ID:
|
|
2582
|
+
return dict(zip(names, (task.insert_ID for task in self.template.tasks)))
|
|
2583
|
+
else:
|
|
2584
|
+
return names
|
|
2585
|
+
|
|
2586
|
+
def _get_new_task_unique_name(self, new_task: Task, new_index: int) -> str:
|
|
2587
|
+
task_templates = list(self.template.tasks)
|
|
2588
|
+
task_templates.insert(new_index, new_task)
|
|
2589
|
+
uniq_names = self._app.Task.get_task_unique_names(task_templates)
|
|
2590
|
+
|
|
2591
|
+
return uniq_names[new_index]
|
|
2592
|
+
|
|
2593
|
+
def _get_empty_pending(self) -> Pending:
|
|
2594
|
+
return {
|
|
2595
|
+
"template_components": {k: [] for k in TEMPLATE_COMP_TYPES},
|
|
2596
|
+
"tasks": [], # list of int
|
|
2597
|
+
"loops": [], # list of int
|
|
2598
|
+
"submissions": [], # list of int
|
|
2599
|
+
}
|
|
2600
|
+
|
|
2601
|
+
def _accept_pending(self) -> None:
|
|
2602
|
+
self._reset_pending()
|
|
2603
|
+
|
|
2604
|
+
def _reset_pending(self) -> None:
|
|
2605
|
+
self._pending = self._get_empty_pending()
|
|
2606
|
+
|
|
2607
|
+
def _reject_pending(self) -> None:
|
|
2608
|
+
"""Revert pending changes to the in-memory representation of the workflow.
|
|
2609
|
+
|
|
2610
|
+
This deletes new tasks, new template component data, new loops, and new
|
|
2611
|
+
submissions. Element additions to existing (non-pending) tasks are separately
|
|
2612
|
+
rejected/accepted by the WorkflowTask object.
|
|
2613
|
+
|
|
2614
|
+
"""
|
|
2615
|
+
for task_idx in self._pending["tasks"][::-1]:
|
|
2616
|
+
# iterate in reverse so the index references are correct
|
|
2617
|
+
self.tasks._remove_object(task_idx)
|
|
2618
|
+
self.template.tasks.pop(task_idx)
|
|
2619
|
+
|
|
2620
|
+
for comp_type, comp_indices in self._pending["template_components"].items():
|
|
2621
|
+
for comp_idx in comp_indices[::-1]:
|
|
2622
|
+
# iterate in reverse so the index references are correct
|
|
2623
|
+
tc = self.__template_components[comp_type]
|
|
2624
|
+
assert hasattr(tc, "_remove_object")
|
|
2625
|
+
tc._remove_object(comp_idx)
|
|
2626
|
+
|
|
2627
|
+
for loop_idx in self._pending["loops"][::-1]:
|
|
2628
|
+
# iterate in reverse so the index references are correct
|
|
2629
|
+
self.loops._remove_object(loop_idx)
|
|
2630
|
+
self.template.loops.pop(loop_idx)
|
|
2631
|
+
|
|
2632
|
+
for sub_idx in self._pending["submissions"][::-1]:
|
|
2633
|
+
# iterate in reverse so the index references are correct
|
|
2634
|
+
assert self._submissions is not None
|
|
2635
|
+
self._submissions.pop(sub_idx)
|
|
2636
|
+
|
|
2637
|
+
self._reset_pending()
|
|
2638
|
+
|
|
2639
|
+
@property
|
|
2640
|
+
def num_tasks(self) -> int:
|
|
2641
|
+
"""
|
|
2642
|
+
The total number of tasks.
|
|
2643
|
+
"""
|
|
2644
|
+
return self._store._get_num_total_tasks()
|
|
2645
|
+
|
|
2646
|
+
@property
|
|
2647
|
+
def num_submissions(self) -> int:
|
|
2648
|
+
"""
|
|
2649
|
+
The total number of job submissions.
|
|
2650
|
+
"""
|
|
2651
|
+
return (
|
|
2652
|
+
len(self._submissions)
|
|
2653
|
+
if self._submissions is not None
|
|
2654
|
+
else self._store._get_num_total_submissions()
|
|
2655
|
+
)
|
|
2656
|
+
|
|
2657
|
+
@property
|
|
2658
|
+
def num_elements(self) -> int:
|
|
2659
|
+
"""
|
|
2660
|
+
The total number of elements.
|
|
2661
|
+
"""
|
|
2662
|
+
return self._store._get_num_total_elements()
|
|
2663
|
+
|
|
2664
|
+
@property
|
|
2665
|
+
def num_element_iterations(self) -> int:
|
|
2666
|
+
"""
|
|
2667
|
+
The total number of element iterations.
|
|
2668
|
+
"""
|
|
2669
|
+
return self._store._get_num_total_elem_iters()
|
|
2670
|
+
|
|
2671
|
+
@property
|
|
2672
|
+
@TimeIt.decorator
|
|
2673
|
+
def num_EARs(self) -> int:
|
|
2674
|
+
"""
|
|
2675
|
+
The total number of element action runs.
|
|
2676
|
+
"""
|
|
2677
|
+
return self._store._get_num_total_EARs()
|
|
2678
|
+
|
|
2679
|
+
@property
|
|
2680
|
+
def num_loops(self) -> int:
|
|
2681
|
+
"""
|
|
2682
|
+
The total number of loops.
|
|
2683
|
+
"""
|
|
2684
|
+
return self._store._get_num_total_loops()
|
|
2685
|
+
|
|
2686
|
+
@property
|
|
2687
|
+
def artifacts_path(self) -> Path:
|
|
2688
|
+
"""
|
|
2689
|
+
Path to artifacts of the workflow (temporary files, etc).
|
|
2690
|
+
"""
|
|
2691
|
+
# TODO: allow customisation of artifacts path at submission and resources level
|
|
2692
|
+
return Path(self.path) / "artifacts"
|
|
2693
|
+
|
|
2694
|
+
@property
|
|
2695
|
+
def input_files_path(self) -> Path:
|
|
2696
|
+
"""
|
|
2697
|
+
Path to input files for the workflow.
|
|
2698
|
+
"""
|
|
2699
|
+
return self.artifacts_path / self._input_files_dir_name
|
|
2700
|
+
|
|
2701
|
+
@property
|
|
2702
|
+
def submissions_path(self) -> Path:
|
|
2703
|
+
"""
|
|
2704
|
+
Path to submission data for ths workflow.
|
|
2705
|
+
"""
|
|
2706
|
+
return self.artifacts_path / "submissions"
|
|
2707
|
+
|
|
2708
|
+
@property
|
|
2709
|
+
def task_artifacts_path(self) -> Path:
|
|
2710
|
+
"""
|
|
2711
|
+
Path to artifacts of tasks.
|
|
2712
|
+
"""
|
|
2713
|
+
return self.artifacts_path / "tasks"
|
|
2714
|
+
|
|
2715
|
+
@property
|
|
2716
|
+
def execution_path(self) -> Path:
|
|
2717
|
+
"""
|
|
2718
|
+
Path to working directory path for executing.
|
|
2719
|
+
"""
|
|
2720
|
+
return Path(self.path) / self._exec_dir_name
|
|
2721
|
+
|
|
2722
|
+
@TimeIt.decorator
|
|
2723
|
+
def get_task_elements(
|
|
2724
|
+
self,
|
|
2725
|
+
task: WorkflowTask,
|
|
2726
|
+
idx_lst: list[int] | None = None,
|
|
2727
|
+
) -> list[Element]:
|
|
2728
|
+
"""
|
|
2729
|
+
Get the elements of a task.
|
|
2730
|
+
"""
|
|
2731
|
+
return [
|
|
2732
|
+
self._app.Element(
|
|
2733
|
+
task=task, **{k: v for k, v in te.items() if k != "task_ID"}
|
|
2734
|
+
)
|
|
2735
|
+
for te in self._store.get_task_elements(task.insert_ID, idx_lst)
|
|
2736
|
+
]
|
|
2737
|
+
|
|
2738
|
+
def set_EAR_start(
|
|
2739
|
+
self, run_id: int, run_dir: Path | None, port_number: int | None
|
|
2740
|
+
) -> None:
|
|
2741
|
+
"""Set the start time on an EAR."""
|
|
2742
|
+
self._app.logger.debug(f"Setting start for EAR ID {run_id!r}")
|
|
2743
|
+
with self._store.cached_load(), self.batch_update():
|
|
2744
|
+
self._store.set_EAR_start(run_id, run_dir, port_number)
|
|
2745
|
+
|
|
2746
|
+
def set_multi_run_starts(
|
|
2747
|
+
self, run_ids: list[int], run_dirs: list[Path | None], port_number: int
|
|
2748
|
+
) -> None:
|
|
2749
|
+
"""Set the start time on multiple runs."""
|
|
2750
|
+
self._app.logger.debug(f"Setting start for multiple run IDs {run_ids!r}")
|
|
2751
|
+
with self._store.cached_load(), self.batch_update():
|
|
2752
|
+
self._store.set_multi_run_starts(run_ids, run_dirs, port_number)
|
|
2753
|
+
|
|
2754
|
+
def set_EAR_end(
|
|
2755
|
+
self,
|
|
2756
|
+
block_act_key: BlockActionKey,
|
|
2757
|
+
run: ElementActionRun,
|
|
2758
|
+
exit_code: int,
|
|
2759
|
+
) -> None:
|
|
2760
|
+
"""Set the end time and exit code on an EAR.
|
|
2761
|
+
|
|
2762
|
+
If the exit code is non-zero, also set all downstream dependent EARs to be
|
|
2763
|
+
skipped. Also save any generated input/output files.
|
|
2764
|
+
|
|
2765
|
+
"""
|
|
2766
|
+
self._app.logger.debug(
|
|
2767
|
+
f"Setting end for run ID {run.id_!r} with exit code {exit_code!r}."
|
|
2768
|
+
)
|
|
2769
|
+
param_id: int | list[int] | None
|
|
2770
|
+
with self._store.cached_load(), self.batch_update():
|
|
2771
|
+
success = exit_code == 0 # TODO more sophisticated success heuristics
|
|
2772
|
+
if not run.skip:
|
|
2773
|
+
|
|
2774
|
+
is_aborted = False
|
|
2775
|
+
if run.action.abortable and exit_code == ABORT_EXIT_CODE:
|
|
2776
|
+
# the point of aborting an EAR is to continue with the workflow:
|
|
2777
|
+
is_aborted = True
|
|
2778
|
+
success = True
|
|
2779
|
+
|
|
2780
|
+
run_dir = run.get_directory()
|
|
2781
|
+
if run_dir:
|
|
2782
|
+
assert isinstance(run_dir, Path)
|
|
2783
|
+
for IFG_i in run.action.input_file_generators:
|
|
2784
|
+
inp_file = IFG_i.input_file
|
|
2785
|
+
self._app.logger.debug(
|
|
2786
|
+
f"Saving EAR input file: {inp_file.label!r} for EAR ID "
|
|
2787
|
+
f"{run.id_!r}."
|
|
2788
|
+
)
|
|
2789
|
+
param_id = run.data_idx[f"input_files.{inp_file.label}"]
|
|
2790
|
+
|
|
2791
|
+
file_paths = inp_file.value(directory=run_dir)
|
|
2792
|
+
for path_i in (
|
|
2793
|
+
file_paths if isinstance(file_paths, list) else [file_paths]
|
|
2794
|
+
):
|
|
2795
|
+
full_path = run_dir.joinpath(path_i)
|
|
2796
|
+
if not full_path.exists():
|
|
2797
|
+
self._app.logger.debug(
|
|
2798
|
+
f"expected input file {path_i!r} does not "
|
|
2799
|
+
f"exist, so setting run to an error state "
|
|
2800
|
+
f"(if not aborted)."
|
|
2801
|
+
)
|
|
2802
|
+
if not is_aborted and success is True:
|
|
2803
|
+
# this is unlikely to happen, but could happen
|
|
2804
|
+
# if the input file is deleted in between
|
|
2805
|
+
# the input file generator completing and this
|
|
2806
|
+
# code being run
|
|
2807
|
+
success = False
|
|
2808
|
+
exit_code = 1 # TODO more custom exit codes?
|
|
2809
|
+
else:
|
|
2810
|
+
self._set_file(
|
|
2811
|
+
param_id=param_id,
|
|
2812
|
+
store_contents=True, # TODO: make optional according to IFG
|
|
2813
|
+
is_input=False,
|
|
2814
|
+
path=full_path,
|
|
2815
|
+
)
|
|
2816
|
+
|
|
2817
|
+
if run.action.script_data_out_has_files:
|
|
2818
|
+
try:
|
|
2819
|
+
run._param_save("script", block_act_key, run_dir)
|
|
2820
|
+
except FileNotFoundError:
|
|
2821
|
+
self._app.logger.debug(
|
|
2822
|
+
f"script did not generate an expected output parameter "
|
|
2823
|
+
f"file (block_act_key={block_act_key!r}), so setting run "
|
|
2824
|
+
f"to an error state (if not aborted)."
|
|
2825
|
+
)
|
|
2826
|
+
if not is_aborted and success is True:
|
|
2827
|
+
success = False
|
|
2828
|
+
exit_code = 1 # TODO more custom exit codes?
|
|
2829
|
+
|
|
2830
|
+
if run.action.program_data_out_has_files:
|
|
2831
|
+
try:
|
|
2832
|
+
run._param_save("program", block_act_key, run_dir)
|
|
2833
|
+
except FileNotFoundError:
|
|
2834
|
+
self._app.logger.debug(
|
|
2835
|
+
f"program did not generate an expected output parameter "
|
|
2836
|
+
f"file (block_act_key={block_act_key!r}), so setting run "
|
|
2837
|
+
f"to an error state (if not aborted)."
|
|
2838
|
+
)
|
|
2839
|
+
if not is_aborted and success is True:
|
|
2840
|
+
success = False
|
|
2841
|
+
exit_code = 1 # TODO more custom exit codes?
|
|
2842
|
+
|
|
2843
|
+
# Save action-level files: (TODO: refactor with below for OFPs)
|
|
2844
|
+
for save_file_j in run.action.save_files:
|
|
2845
|
+
self._app.logger.debug(
|
|
2846
|
+
f"Saving file: {save_file_j.label!r} for EAR ID "
|
|
2847
|
+
f"{run.id_!r}."
|
|
2848
|
+
)
|
|
2849
|
+
try:
|
|
2850
|
+
param_id = run.data_idx[f"output_files.{save_file_j.label}"]
|
|
2851
|
+
except KeyError:
|
|
2852
|
+
# We might be saving a file that is not a defined
|
|
2853
|
+
# "output file"; this will avoid saving a reference in the
|
|
2854
|
+
# parameter data:
|
|
2855
|
+
param_id = None
|
|
2856
|
+
|
|
2857
|
+
file_paths = save_file_j.value(directory=run_dir)
|
|
2858
|
+
self._app.logger.debug(
|
|
2859
|
+
f"Saving output file paths: {file_paths!r}"
|
|
2860
|
+
)
|
|
2861
|
+
|
|
2862
|
+
for path_i in (
|
|
2863
|
+
file_paths if isinstance(file_paths, list) else [file_paths]
|
|
2864
|
+
):
|
|
2865
|
+
full_path = run_dir.joinpath(path_i)
|
|
2866
|
+
if not full_path.exists():
|
|
2867
|
+
self._app.logger.debug(
|
|
2868
|
+
f"expected file to save {path_i!r} does not "
|
|
2869
|
+
f"exist, so setting run to an error state "
|
|
2870
|
+
f"(if not aborted)."
|
|
2871
|
+
)
|
|
2872
|
+
if not is_aborted and success is True:
|
|
2873
|
+
# this is unlikely to happen, but could happen
|
|
2874
|
+
# if the input file is deleted in between
|
|
2875
|
+
# the input file generator completing and this
|
|
2876
|
+
# code being run
|
|
2877
|
+
success = False
|
|
2878
|
+
exit_code = 1 # TODO more custom exit codes?
|
|
2879
|
+
else:
|
|
2880
|
+
self._set_file(
|
|
2881
|
+
param_id=param_id,
|
|
2882
|
+
store_contents=True,
|
|
2883
|
+
is_input=False,
|
|
2884
|
+
path=full_path,
|
|
2885
|
+
clean_up=(save_file_j in run.action.clean_up),
|
|
2886
|
+
)
|
|
2887
|
+
|
|
2888
|
+
for OFP_i in run.action.output_file_parsers:
|
|
2889
|
+
for save_file_j in OFP_i._save_files:
|
|
2890
|
+
self._app.logger.debug(
|
|
2891
|
+
f"Saving EAR output file: {save_file_j.label!r} for EAR ID "
|
|
2892
|
+
f"{run.id_!r}."
|
|
2893
|
+
)
|
|
2894
|
+
try:
|
|
2895
|
+
param_id = run.data_idx[
|
|
2896
|
+
f"output_files.{save_file_j.label}"
|
|
2897
|
+
]
|
|
2898
|
+
except KeyError:
|
|
2899
|
+
# We might be saving a file that is not a defined
|
|
2900
|
+
# "output file"; this will avoid saving a reference in the
|
|
2901
|
+
# parameter data:
|
|
2902
|
+
param_id = None
|
|
2903
|
+
|
|
2904
|
+
file_paths = save_file_j.value(directory=run_dir)
|
|
2905
|
+
self._app.logger.debug(
|
|
2906
|
+
f"Saving EAR output file paths: {file_paths!r}"
|
|
2907
|
+
)
|
|
2908
|
+
|
|
2909
|
+
for path_i in (
|
|
2910
|
+
file_paths
|
|
2911
|
+
if isinstance(file_paths, list)
|
|
2912
|
+
else [file_paths]
|
|
2913
|
+
):
|
|
2914
|
+
full_path = run_dir.joinpath(path_i)
|
|
2915
|
+
if not full_path.exists():
|
|
2916
|
+
self._app.logger.debug(
|
|
2917
|
+
f"expected output file parser `save_files` file "
|
|
2918
|
+
f"{path_i!r} does not exist, so setting run "
|
|
2919
|
+
f"to an error state (if not aborted)."
|
|
2920
|
+
)
|
|
2921
|
+
if not is_aborted and success is True:
|
|
2922
|
+
success = False
|
|
2923
|
+
exit_code = 1 # TODO more custom exit codes?
|
|
2924
|
+
else:
|
|
2925
|
+
self._set_file(
|
|
2926
|
+
param_id=param_id,
|
|
2927
|
+
store_contents=True, # TODO: make optional according to OFP
|
|
2928
|
+
is_input=False,
|
|
2929
|
+
path=full_path,
|
|
2930
|
+
clean_up=(save_file_j in OFP_i.clean_up),
|
|
2931
|
+
)
|
|
2932
|
+
|
|
2933
|
+
if (
|
|
2934
|
+
run.resources.skip_downstream_on_failure
|
|
2935
|
+
and not success
|
|
2936
|
+
and run.skip_reason is not SkipReason.LOOP_TERMINATION
|
|
2937
|
+
):
|
|
2938
|
+
# loop termination skips are already propagated
|
|
2939
|
+
for EAR_dep_ID in run.get_dependent_EARs(as_objects=False):
|
|
2940
|
+
self._app.logger.debug(
|
|
2941
|
+
f"Setting EAR ID {EAR_dep_ID!r} to skip because it depends on"
|
|
2942
|
+
f" EAR ID {run.id_!r}, which exited with a non-zero exit code:"
|
|
2943
|
+
f" {exit_code!r}."
|
|
2944
|
+
)
|
|
2945
|
+
self._store.set_EAR_skip(
|
|
2946
|
+
{EAR_dep_ID: SkipReason.UPSTREAM_FAILURE.value}
|
|
2947
|
+
)
|
|
2948
|
+
|
|
2949
|
+
self._store.set_EAR_end(run.id_, exit_code, success, run.action.requires_dir)
|
|
2950
|
+
|
|
2951
|
+
def set_multi_run_ends(
|
|
2952
|
+
self,
|
|
2953
|
+
runs: dict[
|
|
2954
|
+
BlockActionKey,
|
|
2955
|
+
list[tuple[ElementActionRun, int, Path | None]],
|
|
2956
|
+
],
|
|
2957
|
+
) -> None:
|
|
2958
|
+
"""Set end times and exit codes on multiple runs.
|
|
2959
|
+
|
|
2960
|
+
If the exit code is non-zero, also set all downstream dependent runs to be
|
|
2961
|
+
skipped. Also save any generated input/output files."""
|
|
2962
|
+
|
|
2963
|
+
self._app.logger.debug(f"Setting end for multiple run IDs.")
|
|
2964
|
+
param_id: int | list[int] | None
|
|
2965
|
+
with self._store.cached_load(), self.batch_update():
|
|
2966
|
+
run_ids = []
|
|
2967
|
+
run_dirs = []
|
|
2968
|
+
exit_codes = []
|
|
2969
|
+
successes = []
|
|
2970
|
+
for block_act_key, run_dat in runs.items():
|
|
2971
|
+
for run, exit_code, run_dir in run_dat:
|
|
2972
|
+
|
|
2973
|
+
success = (
|
|
2974
|
+
exit_code == 0
|
|
2975
|
+
) # TODO more sophisticated success heuristics
|
|
2976
|
+
self._app.logger.info(
|
|
2977
|
+
f"setting end for run {run.id_} with exit_code={exit_code}, "
|
|
2978
|
+
f"success={success}, skip={run.skip!r}, and skip_reason="
|
|
2979
|
+
f"{run.skip_reason!r}."
|
|
2980
|
+
)
|
|
2981
|
+
if not run.skip:
|
|
2982
|
+
self._app.logger.info(f"run was not skipped.")
|
|
2983
|
+
is_aborted = False
|
|
2984
|
+
if run.action.abortable and exit_code == ABORT_EXIT_CODE:
|
|
2985
|
+
# the point of aborting an EAR is to continue with the
|
|
2986
|
+
# workflow:
|
|
2987
|
+
self._app.logger.info(
|
|
2988
|
+
"run was abortable and exit code was ABORT_EXIT_CODE,"
|
|
2989
|
+
" so setting success to True."
|
|
2990
|
+
)
|
|
2991
|
+
is_aborted = True
|
|
2992
|
+
success = True
|
|
2993
|
+
|
|
2994
|
+
run_dir = run.get_directory()
|
|
2995
|
+
if run_dir:
|
|
2996
|
+
assert isinstance(run_dir, Path)
|
|
2997
|
+
for IFG_i in run.action.input_file_generators:
|
|
2998
|
+
self._app.logger.info(f"setting IFG file {IFG_i!r}")
|
|
2999
|
+
inp_file = IFG_i.input_file
|
|
3000
|
+
self._app.logger.debug(
|
|
3001
|
+
f"Saving EAR input file: {inp_file.label!r} for EAR "
|
|
3002
|
+
f"ID {run.id_!r}."
|
|
3003
|
+
)
|
|
3004
|
+
param_id = run.data_idx[f"input_files.{inp_file.label}"]
|
|
3005
|
+
|
|
3006
|
+
file_paths = inp_file.value(directory=run_dir)
|
|
3007
|
+
for path_i in (
|
|
3008
|
+
file_paths
|
|
3009
|
+
if isinstance(file_paths, list)
|
|
3010
|
+
else [file_paths]
|
|
3011
|
+
):
|
|
3012
|
+
full_path = run_dir.joinpath(path_i)
|
|
3013
|
+
if not full_path.exists():
|
|
3014
|
+
self._app.logger.debug(
|
|
3015
|
+
f"expected input file {path_i!r} does not "
|
|
3016
|
+
f"exist, so setting run to an error state "
|
|
3017
|
+
f"(if not aborted)."
|
|
3018
|
+
)
|
|
3019
|
+
if not is_aborted and success is True:
|
|
3020
|
+
# this is unlikely to happen, but could happen
|
|
3021
|
+
# if the input file is deleted in between
|
|
3022
|
+
# the input file generator completing and this
|
|
3023
|
+
# code being run
|
|
3024
|
+
success = False
|
|
3025
|
+
exit_code = 1 # TODO more custom exit codes?
|
|
3026
|
+
else:
|
|
3027
|
+
self._set_file(
|
|
3028
|
+
param_id=param_id,
|
|
3029
|
+
store_contents=True, # TODO: make optional according to IFG
|
|
3030
|
+
is_input=False,
|
|
3031
|
+
path=full_path,
|
|
3032
|
+
)
|
|
3033
|
+
|
|
3034
|
+
if run.action.script_data_out_has_files:
|
|
3035
|
+
self._app.logger.info(
|
|
3036
|
+
f"saving script-generated parameters."
|
|
3037
|
+
)
|
|
3038
|
+
try:
|
|
3039
|
+
run._param_save("script", block_act_key, run_dir)
|
|
3040
|
+
except FileNotFoundError:
|
|
3041
|
+
# script did not generate the output parameter file,
|
|
3042
|
+
# so set a failed exit code (if we did not abort the
|
|
3043
|
+
# run):
|
|
3044
|
+
self._app.logger.debug(
|
|
3045
|
+
f"script did not generate an expected output "
|
|
3046
|
+
f"parameter file (block_act_key="
|
|
3047
|
+
f"{block_act_key!r}), so setting run to an error "
|
|
3048
|
+
f"state (if not aborted)."
|
|
3049
|
+
)
|
|
3050
|
+
if not is_aborted and success is True:
|
|
3051
|
+
success = False
|
|
3052
|
+
exit_code = 1 # TODO more custom exit codes?
|
|
3053
|
+
|
|
3054
|
+
if run.action.program_data_out_has_files:
|
|
3055
|
+
self._app.logger.info(
|
|
3056
|
+
f"saving program-generated parameters."
|
|
3057
|
+
)
|
|
3058
|
+
try:
|
|
3059
|
+
run._param_save("program", block_act_key, run_dir)
|
|
3060
|
+
except FileNotFoundError:
|
|
3061
|
+
# program did not generate the output parameter file,
|
|
3062
|
+
# so set a failed exit code (if we did not abort the
|
|
3063
|
+
# run):
|
|
3064
|
+
self._app.logger.debug(
|
|
3065
|
+
f"program did not generate an expected output "
|
|
3066
|
+
f"parameter file (block_act_key="
|
|
3067
|
+
f"{block_act_key!r}), so setting run to an error "
|
|
3068
|
+
f"state (if not aborted)."
|
|
3069
|
+
)
|
|
3070
|
+
if not is_aborted and success is True:
|
|
3071
|
+
success = False
|
|
3072
|
+
exit_code = 1 # TODO more custom exit codes?
|
|
3073
|
+
|
|
3074
|
+
# Save action-level files: (TODO: refactor with below for OFPs)
|
|
3075
|
+
for save_file_j in run.action.save_files:
|
|
3076
|
+
self._app.logger.info(
|
|
3077
|
+
f"saving action-level file {save_file_j!r}."
|
|
3078
|
+
)
|
|
3079
|
+
self._app.logger.debug(
|
|
3080
|
+
f"Saving file: {save_file_j.label!r} for EAR ID "
|
|
3081
|
+
f"{run.id_!r}."
|
|
3082
|
+
)
|
|
3083
|
+
try:
|
|
3084
|
+
param_id = run.data_idx[
|
|
3085
|
+
f"output_files.{save_file_j.label}"
|
|
3086
|
+
]
|
|
3087
|
+
except KeyError:
|
|
3088
|
+
# We might be saving a file that is not a defined
|
|
3089
|
+
# "output file"; this will avoid saving a reference in
|
|
3090
|
+
# the parameter data:
|
|
3091
|
+
param_id = None
|
|
3092
|
+
|
|
3093
|
+
file_paths = save_file_j.value(directory=run_dir)
|
|
3094
|
+
self._app.logger.debug(
|
|
3095
|
+
f"Saving output file paths: {file_paths!r}"
|
|
3096
|
+
)
|
|
3097
|
+
for path_i in (
|
|
3098
|
+
file_paths
|
|
3099
|
+
if isinstance(file_paths, list)
|
|
3100
|
+
else [file_paths]
|
|
3101
|
+
):
|
|
3102
|
+
full_path = run_dir.joinpath(path_i)
|
|
3103
|
+
if not full_path.exists():
|
|
3104
|
+
self._app.logger.debug(
|
|
3105
|
+
f"expected file to save {path_i!r} does not "
|
|
3106
|
+
f"exist, so setting run to an error state "
|
|
3107
|
+
f"(if not aborted)."
|
|
3108
|
+
)
|
|
3109
|
+
if not is_aborted and success is True:
|
|
3110
|
+
# this is unlikely to happen, but could happen
|
|
3111
|
+
# if the input file is deleted in between
|
|
3112
|
+
# the input file generator completing and this
|
|
3113
|
+
# code being run
|
|
3114
|
+
success = False
|
|
3115
|
+
exit_code = 1 # TODO more custom exit codes?
|
|
3116
|
+
else:
|
|
3117
|
+
self._set_file(
|
|
3118
|
+
param_id=param_id,
|
|
3119
|
+
store_contents=True,
|
|
3120
|
+
is_input=False,
|
|
3121
|
+
path=full_path,
|
|
3122
|
+
clean_up=(save_file_j in run.action.clean_up),
|
|
3123
|
+
)
|
|
3124
|
+
|
|
3125
|
+
for OFP_i in run.action.output_file_parsers:
|
|
3126
|
+
self._app.logger.info(
|
|
3127
|
+
f"saving files from OFP: {OFP_i!r}."
|
|
3128
|
+
)
|
|
3129
|
+
for save_file_j in OFP_i._save_files:
|
|
3130
|
+
self._app.logger.debug(
|
|
3131
|
+
f"Saving EAR output file: {save_file_j.label!r} "
|
|
3132
|
+
f"for EAR ID {run.id_!r}."
|
|
3133
|
+
)
|
|
3134
|
+
try:
|
|
3135
|
+
param_id = run.data_idx[
|
|
3136
|
+
f"output_files.{save_file_j.label}"
|
|
3137
|
+
]
|
|
3138
|
+
except KeyError:
|
|
3139
|
+
# We might be saving a file that is not a defined
|
|
3140
|
+
# "output file"; this will avoid saving a
|
|
3141
|
+
# reference in the parameter data:
|
|
3142
|
+
param_id = None
|
|
3143
|
+
|
|
3144
|
+
file_paths = save_file_j.value(directory=run_dir)
|
|
3145
|
+
self._app.logger.debug(
|
|
3146
|
+
f"Saving EAR output file paths: {file_paths!r}"
|
|
3147
|
+
)
|
|
3148
|
+
|
|
3149
|
+
for path_i in (
|
|
3150
|
+
file_paths
|
|
3151
|
+
if isinstance(file_paths, list)
|
|
3152
|
+
else [file_paths]
|
|
3153
|
+
):
|
|
3154
|
+
full_path = run_dir.joinpath(path_i)
|
|
3155
|
+
if not full_path.exists():
|
|
3156
|
+
self._app.logger.debug(
|
|
3157
|
+
f"expected output file parser `save_files` file "
|
|
3158
|
+
f"{path_i!r} does not exist, so setting run "
|
|
3159
|
+
f"to an error state (if not aborted)."
|
|
3160
|
+
)
|
|
3161
|
+
if not is_aborted and success is True:
|
|
3162
|
+
success = False
|
|
3163
|
+
exit_code = (
|
|
3164
|
+
1 # TODO more custom exit codes?
|
|
3165
|
+
)
|
|
3166
|
+
else:
|
|
3167
|
+
self._set_file(
|
|
3168
|
+
param_id=param_id,
|
|
3169
|
+
store_contents=True, # TODO: make optional according to OFP
|
|
3170
|
+
is_input=False,
|
|
3171
|
+
path=full_path,
|
|
3172
|
+
clean_up=(save_file_j in OFP_i.clean_up),
|
|
3173
|
+
)
|
|
3174
|
+
|
|
3175
|
+
else:
|
|
3176
|
+
self._app.logger.info(
|
|
3177
|
+
f"run was skipped: reason: {run.skip_reason!r}."
|
|
3178
|
+
)
|
|
3179
|
+
|
|
3180
|
+
if (
|
|
3181
|
+
run.resources.skip_downstream_on_failure
|
|
3182
|
+
and not success
|
|
3183
|
+
and run.skip_reason is not SkipReason.LOOP_TERMINATION
|
|
3184
|
+
):
|
|
3185
|
+
# run failed
|
|
3186
|
+
self._app.logger.info(
|
|
3187
|
+
"run was not succcess and skip reason was not "
|
|
3188
|
+
"LOOP_TERMINATION."
|
|
3189
|
+
)
|
|
3190
|
+
# loop termination skips are already propagated
|
|
3191
|
+
for EAR_dep_ID in run.get_dependent_EARs(as_objects=False):
|
|
3192
|
+
# TODO: `get_dependent_EARs` seems to be stuck in a
|
|
3193
|
+
# recursion for some workflows
|
|
3194
|
+
# TODO: this needs to be recursive?
|
|
3195
|
+
self._app.logger.info(
|
|
3196
|
+
f"Setting EAR ID {EAR_dep_ID!r} to skip because it "
|
|
3197
|
+
f"depends on EAR ID {run.id_!r}, which exited with a "
|
|
3198
|
+
f"non-zero exit code: {exit_code!r}."
|
|
3199
|
+
)
|
|
3200
|
+
self._store.set_EAR_skip(
|
|
3201
|
+
{EAR_dep_ID: SkipReason.UPSTREAM_FAILURE.value}
|
|
3202
|
+
)
|
|
3203
|
+
else:
|
|
3204
|
+
self._app.logger.info(
|
|
3205
|
+
"`skip_downstream_on_failure` is False, run was "
|
|
3206
|
+
"succcess, or skip reason was LOOP_TERMINATION."
|
|
3207
|
+
)
|
|
3208
|
+
|
|
3209
|
+
run_ids.append(run.id_)
|
|
3210
|
+
run_dirs.append(run_dir)
|
|
3211
|
+
exit_codes.append(exit_code)
|
|
3212
|
+
successes.append(success)
|
|
3213
|
+
|
|
3214
|
+
self._store.set_multi_run_ends(run_ids, run_dirs, exit_codes, successes)
|
|
3215
|
+
|
|
3216
|
+
def set_EAR_skip(self, skip_reasons: dict[int, SkipReason]) -> None:
|
|
3217
|
+
"""
|
|
3218
|
+
Record that an EAR is to be skipped due to an upstream failure or loop
|
|
3219
|
+
termination condition being met.
|
|
3220
|
+
"""
|
|
3221
|
+
with self._store.cached_load(), self.batch_update():
|
|
3222
|
+
self._store.set_EAR_skip({k: v.value for k, v in skip_reasons.items()})
|
|
3223
|
+
|
|
3224
|
+
def get_EAR_skipped(self, EAR_ID: int) -> int:
|
|
3225
|
+
"""Check if an EAR is to be skipped."""
|
|
3226
|
+
with self._store.cached_load():
|
|
3227
|
+
return self._store.get_EAR_skipped(EAR_ID)
|
|
3228
|
+
|
|
3229
|
+
@TimeIt.decorator
|
|
3230
|
+
def set_parameter_value(
|
|
3231
|
+
self, param_id: int | list[int], value: Any, commit: bool = False
|
|
3232
|
+
) -> None:
|
|
3233
|
+
"""
|
|
3234
|
+
Set the value of a parameter.
|
|
3235
|
+
"""
|
|
3236
|
+
with self._store.cached_load(), self.batch_update():
|
|
3237
|
+
self._store.set_parameter_value(cast("int", param_id), value)
|
|
3238
|
+
|
|
3239
|
+
if commit:
|
|
3240
|
+
# force commit now:
|
|
3241
|
+
self._store._pending.commit_all()
|
|
3242
|
+
|
|
3243
|
+
@TimeIt.decorator
|
|
3244
|
+
def set_parameter_values(self, values: dict[int, Any], commit: bool = False) -> None:
|
|
3245
|
+
with self._store.cached_load(), self.batch_update(), self._store.cache_ctx():
|
|
3246
|
+
self._store.set_parameter_values(values)
|
|
3247
|
+
|
|
3248
|
+
if commit:
|
|
3249
|
+
# force commit now:
|
|
3250
|
+
self._store._pending.commit_all()
|
|
3251
|
+
|
|
3252
|
+
def set_EARs_initialised(self, iter_ID: int) -> None:
|
|
3253
|
+
"""
|
|
3254
|
+
Set :py:attr:`~hpcflow.app.ElementIteration.EARs_initialised` to True for the
|
|
3255
|
+
specified iteration.
|
|
3256
|
+
"""
|
|
3257
|
+
with self._store.cached_load(), self.batch_update():
|
|
3258
|
+
self._store.set_EARs_initialised(iter_ID)
|
|
3259
|
+
|
|
3260
|
+
def elements(self) -> Iterator[Element]:
|
|
3261
|
+
"""
|
|
3262
|
+
Get the elements of the workflow's tasks.
|
|
3263
|
+
"""
|
|
3264
|
+
for task in self.tasks:
|
|
3265
|
+
for element in task.elements[:]:
|
|
3266
|
+
yield element
|
|
3267
|
+
|
|
3268
|
+
@overload
|
|
3269
|
+
def get_iteration_task_pathway(
|
|
3270
|
+
self,
|
|
3271
|
+
*,
|
|
3272
|
+
ret_iter_IDs: Literal[False] = False,
|
|
3273
|
+
ret_data_idx: Literal[False] = False,
|
|
3274
|
+
) -> Sequence[tuple[int, LoopIndex[str, int]]]: ...
|
|
3275
|
+
|
|
3276
|
+
@overload
|
|
3277
|
+
def get_iteration_task_pathway(
|
|
3278
|
+
self, *, ret_iter_IDs: Literal[False] = False, ret_data_idx: Literal[True]
|
|
3279
|
+
) -> Sequence[tuple[int, LoopIndex[str, int], tuple[Mapping[str, int], ...]]]: ...
|
|
3280
|
+
|
|
3281
|
+
@overload
|
|
3282
|
+
def get_iteration_task_pathway(
|
|
3283
|
+
self, *, ret_iter_IDs: Literal[True], ret_data_idx: Literal[False] = False
|
|
3284
|
+
) -> Sequence[tuple[int, LoopIndex[str, int], tuple[int, ...]]]: ...
|
|
3285
|
+
|
|
3286
|
+
@overload
|
|
3287
|
+
def get_iteration_task_pathway(
|
|
3288
|
+
self, *, ret_iter_IDs: Literal[True], ret_data_idx: Literal[True]
|
|
3289
|
+
) -> Sequence[
|
|
3290
|
+
tuple[int, LoopIndex[str, int], tuple[int, ...], tuple[Mapping[str, int], ...]]
|
|
3291
|
+
]: ...
|
|
3292
|
+
|
|
3293
|
+
@TimeIt.decorator
|
|
3294
|
+
def get_iteration_task_pathway(
|
|
3295
|
+
self, ret_iter_IDs: bool = False, ret_data_idx: bool = False
|
|
3296
|
+
) -> Sequence[tuple]:
|
|
3297
|
+
"""
|
|
3298
|
+
Get the iteration task pathway.
|
|
3299
|
+
"""
|
|
3300
|
+
pathway: list[_Pathway] = []
|
|
3301
|
+
for task in self.tasks:
|
|
3302
|
+
pathway.append(_Pathway(task.insert_ID))
|
|
3303
|
+
|
|
3304
|
+
added_loop_names: set[str] = set()
|
|
3305
|
+
for _ in range(self.num_loops):
|
|
3306
|
+
for loop in self.loops:
|
|
3307
|
+
if loop.name in added_loop_names:
|
|
3308
|
+
continue
|
|
3309
|
+
elif set(loop.parents).issubset(added_loop_names):
|
|
3310
|
+
# add a loop only once their parents have been added:
|
|
3311
|
+
to_add = loop
|
|
3312
|
+
break
|
|
3313
|
+
else:
|
|
3314
|
+
raise RuntimeError(
|
|
3315
|
+
"Failed to find a loop whose parents have already been added to the "
|
|
3316
|
+
"iteration task pathway."
|
|
3317
|
+
)
|
|
3318
|
+
|
|
3319
|
+
iIDs = to_add.task_insert_IDs
|
|
3320
|
+
relevant_idx = (
|
|
3321
|
+
idx for idx, path_i in enumerate(pathway) if path_i.id_ in iIDs
|
|
3322
|
+
)
|
|
3323
|
+
|
|
3324
|
+
for num_add_k, num_add in to_add.num_added_iterations.items():
|
|
3325
|
+
parent_loop_idx = list(zip(to_add.parents, num_add_k))
|
|
3326
|
+
replacement: list[_Pathway] = []
|
|
3327
|
+
repl_idx: list[int] = []
|
|
3328
|
+
for i in range(num_add):
|
|
3329
|
+
for p_idx, path in enumerate(pathway):
|
|
3330
|
+
if path.id_ not in iIDs:
|
|
3331
|
+
continue
|
|
3332
|
+
if all(path.names[k] == v for k, v in parent_loop_idx):
|
|
3333
|
+
new_path = copy.deepcopy(path)
|
|
3334
|
+
new_path.names += {to_add.name: i}
|
|
3335
|
+
repl_idx.append(p_idx)
|
|
3336
|
+
replacement.append(new_path)
|
|
3337
|
+
|
|
3338
|
+
if replacement:
|
|
3339
|
+
pathway = replace_items(
|
|
3340
|
+
pathway, min(repl_idx), max(repl_idx) + 1, replacement
|
|
3341
|
+
)
|
|
3342
|
+
|
|
3343
|
+
added_loop_names.add(to_add.name)
|
|
3344
|
+
|
|
3345
|
+
if added_loop_names != set(loop.name for loop in self.loops):
|
|
3346
|
+
raise RuntimeError(
|
|
3347
|
+
"Not all loops have been considered in the iteration task pathway."
|
|
3348
|
+
)
|
|
3349
|
+
|
|
3350
|
+
if ret_iter_IDs or ret_data_idx:
|
|
3351
|
+
all_iters = self.get_all_element_iterations()
|
|
3352
|
+
for path_i in pathway:
|
|
3353
|
+
i_iters = [
|
|
3354
|
+
iter_j
|
|
3355
|
+
for iter_j in all_iters
|
|
3356
|
+
if (
|
|
3357
|
+
iter_j.task.insert_ID == path_i.id_
|
|
3358
|
+
and iter_j.loop_idx == path_i.names
|
|
3359
|
+
)
|
|
3360
|
+
]
|
|
3361
|
+
if ret_iter_IDs:
|
|
3362
|
+
path_i.iter_ids.extend(elit.id_ for elit in i_iters)
|
|
3363
|
+
if ret_data_idx:
|
|
3364
|
+
path_i.data_idx.extend(elit.get_data_idx() for elit in i_iters)
|
|
3365
|
+
|
|
3366
|
+
return [
|
|
3367
|
+
path.as_tuple(ret_iter_IDs=ret_iter_IDs, ret_data_idx=ret_data_idx)
|
|
3368
|
+
for path in pathway
|
|
3369
|
+
]
|
|
3370
|
+
|
|
3371
|
+
@TimeIt.decorator
|
|
3372
|
+
def _submit(
|
|
3373
|
+
self,
|
|
3374
|
+
status: Status | None = None,
|
|
3375
|
+
ignore_errors: bool = False,
|
|
3376
|
+
JS_parallelism: bool | Literal["direct", "scheduled"] | None = None,
|
|
3377
|
+
print_stdout: bool = False,
|
|
3378
|
+
add_to_known: bool = True,
|
|
3379
|
+
tasks: Sequence[int] | None = None,
|
|
3380
|
+
) -> tuple[Sequence[SubmissionFailure], Mapping[int, Sequence[int]]]:
|
|
3381
|
+
"""Submit outstanding EARs for execution."""
|
|
3382
|
+
|
|
3383
|
+
# generate a new submission if there are no pending submissions:
|
|
3384
|
+
if not (pending := [sub for sub in self.submissions if sub.needs_submit]):
|
|
3385
|
+
if status:
|
|
3386
|
+
status.update("Adding new submission...")
|
|
3387
|
+
if not (
|
|
3388
|
+
new_sub := self._add_submission(
|
|
3389
|
+
tasks=tasks,
|
|
3390
|
+
JS_parallelism=JS_parallelism,
|
|
3391
|
+
status=status,
|
|
3392
|
+
)
|
|
3393
|
+
):
|
|
3394
|
+
if status:
|
|
3395
|
+
status.stop()
|
|
3396
|
+
raise ValueError("No pending element action runs to submit!")
|
|
3397
|
+
pending = [new_sub]
|
|
3398
|
+
|
|
3399
|
+
self.execution_path.mkdir(exist_ok=True, parents=True)
|
|
3400
|
+
self.task_artifacts_path.mkdir(exist_ok=True, parents=True)
|
|
3401
|
+
|
|
3402
|
+
# the submission must be persistent at submit-time, because it will be read by a
|
|
3403
|
+
# new instance of the app:
|
|
3404
|
+
if status:
|
|
3405
|
+
status.update("Committing to the store...")
|
|
3406
|
+
self._store._pending.commit_all()
|
|
3407
|
+
|
|
3408
|
+
# submit all pending submissions:
|
|
3409
|
+
exceptions: list[SubmissionFailure] = []
|
|
3410
|
+
submitted_js: dict[int, list[int]] = {}
|
|
3411
|
+
for sub in pending:
|
|
3412
|
+
try:
|
|
3413
|
+
if status:
|
|
3414
|
+
status.update(f"Preparing submission {sub.index}...")
|
|
3415
|
+
sub_js_idx = sub.submit(
|
|
3416
|
+
status=status,
|
|
3417
|
+
ignore_errors=ignore_errors,
|
|
3418
|
+
print_stdout=print_stdout,
|
|
3419
|
+
add_to_known=add_to_known,
|
|
3420
|
+
)
|
|
3421
|
+
submitted_js[sub.index] = sub_js_idx
|
|
3422
|
+
except SubmissionFailure as exc:
|
|
3423
|
+
exceptions.append(exc)
|
|
3424
|
+
|
|
3425
|
+
return exceptions, submitted_js
|
|
3426
|
+
|
|
3427
|
+
@overload
|
|
3428
|
+
def submit(
|
|
3429
|
+
self,
|
|
3430
|
+
*,
|
|
3431
|
+
ignore_errors: bool = False,
|
|
3432
|
+
JS_parallelism: bool | Literal["direct", "scheduled"] | None = None,
|
|
3433
|
+
print_stdout: bool = False,
|
|
3434
|
+
wait: bool = False,
|
|
3435
|
+
add_to_known: bool = True,
|
|
3436
|
+
return_idx: Literal[True],
|
|
3437
|
+
tasks: list[int] | None = None,
|
|
3438
|
+
cancel: bool = False,
|
|
3439
|
+
status: bool = True,
|
|
3440
|
+
) -> Mapping[int, Sequence[int]]: ...
|
|
3441
|
+
|
|
3442
|
+
@overload
|
|
3443
|
+
def submit(
|
|
3444
|
+
self,
|
|
3445
|
+
*,
|
|
3446
|
+
ignore_errors: bool = False,
|
|
3447
|
+
JS_parallelism: bool | Literal["direct", "scheduled"] | None = None,
|
|
3448
|
+
print_stdout: bool = False,
|
|
3449
|
+
wait: bool = False,
|
|
3450
|
+
add_to_known: bool = True,
|
|
3451
|
+
return_idx: Literal[False] = False,
|
|
3452
|
+
tasks: list[int] | None = None,
|
|
3453
|
+
cancel: bool = False,
|
|
3454
|
+
status: bool = True,
|
|
3455
|
+
) -> None: ...
|
|
3456
|
+
|
|
3457
|
+
def submit(
|
|
3458
|
+
self,
|
|
3459
|
+
*,
|
|
3460
|
+
ignore_errors: bool = False,
|
|
3461
|
+
JS_parallelism: bool | Literal["direct", "scheduled"] | None = None,
|
|
3462
|
+
print_stdout: bool = False,
|
|
3463
|
+
wait: bool = False,
|
|
3464
|
+
add_to_known: bool = True,
|
|
3465
|
+
return_idx: bool = False,
|
|
3466
|
+
tasks: list[int] | None = None,
|
|
3467
|
+
cancel: bool = False,
|
|
3468
|
+
status: bool = True,
|
|
3469
|
+
) -> Mapping[int, Sequence[int]] | None:
|
|
3470
|
+
"""Submit the workflow for execution.
|
|
3471
|
+
|
|
3472
|
+
Parameters
|
|
3473
|
+
----------
|
|
3474
|
+
ignore_errors
|
|
3475
|
+
If True, ignore jobscript submission errors. If False (the default) jobscript
|
|
3476
|
+
submission will halt when a jobscript fails to submit.
|
|
3477
|
+
JS_parallelism
|
|
3478
|
+
If True, allow multiple jobscripts to execute simultaneously. If
|
|
3479
|
+
'scheduled'/'direct', only allow simultaneous execution of scheduled/direct
|
|
3480
|
+
jobscripts. Raises if set to True, 'scheduled', or 'direct', but the store
|
|
3481
|
+
type does not support the `jobscript_parallelism` feature. If not set,
|
|
3482
|
+
jobscript parallelism will be used if the store type supports it, for
|
|
3483
|
+
scheduled jobscripts only.
|
|
3484
|
+
print_stdout
|
|
3485
|
+
If True, print any jobscript submission standard output, otherwise hide it.
|
|
3486
|
+
wait
|
|
3487
|
+
If True, this command will block until the workflow execution is complete.
|
|
3488
|
+
add_to_known
|
|
3489
|
+
If True, add the submitted submissions to the known-submissions file, which is
|
|
3490
|
+
used by the `show` command to monitor current and recent submissions.
|
|
3491
|
+
return_idx
|
|
3492
|
+
If True, return a dict representing the jobscript indices submitted for each
|
|
3493
|
+
submission.
|
|
3494
|
+
tasks
|
|
3495
|
+
List of task indices to include in the new submission if no submissions
|
|
3496
|
+
already exist. By default all tasks are included if a new submission is
|
|
3497
|
+
created.
|
|
3498
|
+
cancel
|
|
3499
|
+
Immediately cancel the submission. Useful for testing and benchmarking.
|
|
3500
|
+
status
|
|
3501
|
+
If True, display a live status to track submission progress.
|
|
3502
|
+
"""
|
|
3503
|
+
|
|
3504
|
+
# Type hint for mypy
|
|
3505
|
+
status_context: AbstractContextManager[Status] | AbstractContextManager[None] = (
|
|
3506
|
+
rich.console.Console().status("Submitting workflow...")
|
|
3507
|
+
if status
|
|
3508
|
+
else nullcontext()
|
|
3509
|
+
)
|
|
3510
|
+
with status_context as status_, self._store.cached_load():
|
|
3511
|
+
if not self._store.is_submittable:
|
|
3512
|
+
raise NotImplementedError("The workflow is not submittable.")
|
|
3513
|
+
# commit updates before raising exception:
|
|
3514
|
+
with (
|
|
3515
|
+
self.batch_update(),
|
|
3516
|
+
self._store.parameters_metadata_cache(),
|
|
3517
|
+
self._store.cache_ctx(),
|
|
3518
|
+
):
|
|
3519
|
+
exceptions, submitted_js = self._submit(
|
|
3520
|
+
ignore_errors=ignore_errors,
|
|
3521
|
+
JS_parallelism=JS_parallelism,
|
|
3522
|
+
print_stdout=print_stdout,
|
|
3523
|
+
status=status_,
|
|
3524
|
+
add_to_known=add_to_known,
|
|
3525
|
+
tasks=tasks,
|
|
3526
|
+
)
|
|
3527
|
+
|
|
3528
|
+
if exceptions:
|
|
3529
|
+
raise WorkflowSubmissionFailure(exceptions)
|
|
3530
|
+
|
|
3531
|
+
if cancel:
|
|
3532
|
+
self.cancel(status=status)
|
|
3533
|
+
|
|
3534
|
+
elif wait:
|
|
3535
|
+
self.wait(submitted_js)
|
|
3536
|
+
|
|
3537
|
+
if return_idx:
|
|
3538
|
+
return submitted_js
|
|
3539
|
+
return None
|
|
3540
|
+
|
|
3541
|
+
@staticmethod
|
|
3542
|
+
def __wait_for_direct_jobscripts(jobscripts: list[Jobscript]):
|
|
3543
|
+
"""Wait for the passed direct (i.e. non-scheduled) jobscripts to finish."""
|
|
3544
|
+
|
|
3545
|
+
def callback(proc: psutil.Process) -> None:
|
|
3546
|
+
js = js_pids[proc.pid]
|
|
3547
|
+
assert hasattr(proc, "returncode")
|
|
3548
|
+
# TODO sometimes proc.returncode is None; maybe because multiple wait
|
|
3549
|
+
# calls?
|
|
3550
|
+
print(
|
|
3551
|
+
f"Jobscript {js.index} from submission {js.submission.index} "
|
|
3552
|
+
f"finished with exit code {proc.returncode}."
|
|
3553
|
+
)
|
|
3554
|
+
|
|
3555
|
+
js_pids = {js.process_ID: js for js in jobscripts}
|
|
3556
|
+
process_refs = [
|
|
3557
|
+
(js.process_ID, js.submit_cmdline)
|
|
3558
|
+
for js in jobscripts
|
|
3559
|
+
if js.process_ID and js.submit_cmdline
|
|
3560
|
+
]
|
|
3561
|
+
DirectScheduler.wait_for_jobscripts(process_refs, callback=callback)
|
|
3562
|
+
|
|
3563
|
+
def __wait_for_scheduled_jobscripts(self, jobscripts: list[Jobscript]):
|
|
3564
|
+
"""Wait for the passed scheduled jobscripts to finish."""
|
|
3565
|
+
schedulers = self._app.Submission.get_unique_schedulers_of_jobscripts(jobscripts)
|
|
3566
|
+
threads: list[Thread] = []
|
|
3567
|
+
for js_indices, sched in schedulers:
|
|
3568
|
+
jobscripts_gen = (
|
|
3569
|
+
self.submissions[sub_idx].jobscripts[js_idx]
|
|
3570
|
+
for sub_idx, js_idx in js_indices
|
|
3571
|
+
)
|
|
3572
|
+
job_IDs = [
|
|
3573
|
+
js.scheduler_job_ID
|
|
3574
|
+
for js in jobscripts_gen
|
|
3575
|
+
if js.scheduler_job_ID is not None
|
|
3576
|
+
]
|
|
3577
|
+
threads.append(Thread(target=sched.wait_for_jobscripts, args=(job_IDs,)))
|
|
3578
|
+
|
|
3579
|
+
for thr in threads:
|
|
3580
|
+
thr.start()
|
|
3581
|
+
|
|
3582
|
+
for thr in threads:
|
|
3583
|
+
thr.join()
|
|
3584
|
+
|
|
3585
|
+
def wait(self, sub_js: Mapping[int, Sequence[int]] | None = None):
|
|
3586
|
+
"""Wait for the completion of specified/all submitted jobscripts."""
|
|
3587
|
+
|
|
3588
|
+
# TODO: think about how this might work with remote workflow submission (via SSH)
|
|
3589
|
+
|
|
3590
|
+
# TODO: add a log file to the submission dir where we can log stuff (e.g starting
|
|
3591
|
+
# a thread...)
|
|
3592
|
+
|
|
3593
|
+
if not sub_js:
|
|
3594
|
+
# find any active jobscripts first:
|
|
3595
|
+
sub_js_: dict[int, list[int]] = defaultdict(list)
|
|
3596
|
+
for sub in self.submissions:
|
|
3597
|
+
sub_js_[sub.index].extend(sub.get_active_jobscripts())
|
|
3598
|
+
sub_js = sub_js_
|
|
3599
|
+
|
|
3600
|
+
js_direct: list[Jobscript] = []
|
|
3601
|
+
js_sched: list[Jobscript] = []
|
|
3602
|
+
for sub_idx, all_js_idx in sub_js.items():
|
|
3603
|
+
for js_idx in all_js_idx:
|
|
3604
|
+
try:
|
|
3605
|
+
js = self.submissions[sub_idx].jobscripts[js_idx]
|
|
3606
|
+
except IndexError:
|
|
3607
|
+
raise ValueError(
|
|
3608
|
+
f"No jobscript with submission index {sub_idx!r} and/or "
|
|
3609
|
+
f"jobscript index {js_idx!r}."
|
|
3610
|
+
)
|
|
3611
|
+
if js.process_ID is not None:
|
|
3612
|
+
js_direct.append(js)
|
|
3613
|
+
elif js.scheduler_job_ID is not None:
|
|
3614
|
+
js_sched.append(js)
|
|
3615
|
+
else:
|
|
3616
|
+
raise RuntimeError(
|
|
3617
|
+
f"Process ID nor scheduler job ID is set for {js!r}."
|
|
3618
|
+
)
|
|
3619
|
+
|
|
3620
|
+
if js_direct or js_sched:
|
|
3621
|
+
# TODO: use a rich console status? how would that appear in stdout though?
|
|
3622
|
+
print("Waiting for workflow submissions to finish...")
|
|
3623
|
+
else:
|
|
3624
|
+
print("No running jobscripts.")
|
|
3625
|
+
return
|
|
3626
|
+
|
|
3627
|
+
try:
|
|
3628
|
+
t_direct = Thread(target=self.__wait_for_direct_jobscripts, args=(js_direct,))
|
|
3629
|
+
t_sched = Thread(
|
|
3630
|
+
target=self.__wait_for_scheduled_jobscripts, args=(js_sched,)
|
|
3631
|
+
)
|
|
3632
|
+
t_direct.start()
|
|
3633
|
+
t_sched.start()
|
|
3634
|
+
|
|
3635
|
+
# without these, KeyboardInterrupt seems to not be caught:
|
|
3636
|
+
while t_direct.is_alive():
|
|
3637
|
+
t_direct.join(timeout=1)
|
|
3638
|
+
|
|
3639
|
+
while t_sched.is_alive():
|
|
3640
|
+
t_sched.join(timeout=1)
|
|
3641
|
+
|
|
3642
|
+
except KeyboardInterrupt:
|
|
3643
|
+
print("No longer waiting (workflow execution will continue).")
|
|
3644
|
+
else:
|
|
3645
|
+
print("Specified submissions have finished.")
|
|
3646
|
+
|
|
3647
|
+
def get_running_elements(
|
|
3648
|
+
self,
|
|
3649
|
+
submission_idx: int = -1,
|
|
3650
|
+
task_idx: int | None = None,
|
|
3651
|
+
task_insert_ID: int | None = None,
|
|
3652
|
+
) -> list[Element]:
|
|
3653
|
+
"""Retrieve elements that are running according to the scheduler."""
|
|
3654
|
+
|
|
3655
|
+
if task_idx is not None and task_insert_ID is not None:
|
|
3656
|
+
raise ValueError("Specify at most one of `task_insert_ID` and `task_idx`.")
|
|
3657
|
+
|
|
3658
|
+
# keys are task_insert_IDs, values are element indices:
|
|
3659
|
+
active_elems: dict[int, set[int]] = defaultdict(set)
|
|
3660
|
+
sub = self.submissions[submission_idx]
|
|
3661
|
+
for js_idx, block_states in sub.get_active_jobscripts().items():
|
|
3662
|
+
js = sub.jobscripts[js_idx]
|
|
3663
|
+
for block_idx, block in enumerate(js.blocks):
|
|
3664
|
+
states = block_states[block_idx]
|
|
3665
|
+
for js_elem_idx, state in states.items():
|
|
3666
|
+
if state is JobscriptElementState.running:
|
|
3667
|
+
for task_iID, elem_idx in zip(
|
|
3668
|
+
block.task_insert_IDs, block.task_elements[js_elem_idx]
|
|
3669
|
+
):
|
|
3670
|
+
active_elems[task_iID].add(elem_idx)
|
|
3671
|
+
|
|
3672
|
+
# retrieve Element objects:
|
|
3673
|
+
out: list[Element] = []
|
|
3674
|
+
for task_iID, elem_idxes in active_elems.items():
|
|
3675
|
+
if task_insert_ID is not None and task_iID != task_insert_ID:
|
|
3676
|
+
continue
|
|
3677
|
+
task = self.tasks.get(insert_ID=task_iID)
|
|
3678
|
+
if task_idx is not None and task_idx != task.index:
|
|
3679
|
+
continue
|
|
3680
|
+
for idx_i in elem_idxes:
|
|
3681
|
+
out.append(task.elements[idx_i])
|
|
3682
|
+
|
|
3683
|
+
return out
|
|
3684
|
+
|
|
3685
|
+
def get_running_runs(
|
|
3686
|
+
self,
|
|
3687
|
+
submission_idx: int = -1,
|
|
3688
|
+
task_idx: int | None = None,
|
|
3689
|
+
task_insert_ID: int | None = None,
|
|
3690
|
+
element_idx: int | None = None,
|
|
3691
|
+
) -> list[ElementActionRun]:
|
|
3692
|
+
"""Retrieve runs that are running according to the scheduler."""
|
|
3693
|
+
|
|
3694
|
+
elems = self.get_running_elements(
|
|
3695
|
+
submission_idx=submission_idx,
|
|
3696
|
+
task_idx=task_idx,
|
|
3697
|
+
task_insert_ID=task_insert_ID,
|
|
3698
|
+
)
|
|
3699
|
+
out = []
|
|
3700
|
+
for elem in elems:
|
|
3701
|
+
if element_idx is not None and elem.index != element_idx:
|
|
3702
|
+
continue
|
|
3703
|
+
for iter_i in elem.iterations:
|
|
3704
|
+
for elem_acts in iter_i.actions.values():
|
|
3705
|
+
for run in elem_acts.runs:
|
|
3706
|
+
if run.status is EARStatus.running:
|
|
3707
|
+
out.append(run)
|
|
3708
|
+
# for a given element and submission, only one run
|
|
3709
|
+
# may be running at a time:
|
|
3710
|
+
break
|
|
3711
|
+
return out
|
|
3712
|
+
|
|
3713
|
+
def _abort_run(self, run: ElementActionRun):
|
|
3714
|
+
# connect to the ZeroMQ server on the worker node:
|
|
3715
|
+
self._app.logger.info(f"abort run: {run!r}")
|
|
3716
|
+
self._app.Executor.send_abort(
|
|
3717
|
+
hostname=run.run_hostname, port_number=run.port_number
|
|
3718
|
+
)
|
|
3719
|
+
|
|
3720
|
+
def abort_run(
|
|
3721
|
+
self,
|
|
3722
|
+
submission_idx: int = -1,
|
|
3723
|
+
task_idx: int | None = None,
|
|
3724
|
+
task_insert_ID: int | None = None,
|
|
3725
|
+
element_idx: int | None = None,
|
|
3726
|
+
):
|
|
3727
|
+
"""Abort the currently running action-run of the specified task/element.
|
|
3728
|
+
|
|
3729
|
+
Parameters
|
|
3730
|
+
----------
|
|
3731
|
+
task_idx
|
|
3732
|
+
The parent task of the run to abort.
|
|
3733
|
+
element_idx
|
|
3734
|
+
For multi-element tasks, the parent element of the run to abort.
|
|
3735
|
+
submission_idx
|
|
3736
|
+
Defaults to the most-recent submission.
|
|
3737
|
+
|
|
3738
|
+
"""
|
|
3739
|
+
running = self.get_running_runs(
|
|
3740
|
+
submission_idx=submission_idx,
|
|
3741
|
+
task_idx=task_idx,
|
|
3742
|
+
task_insert_ID=task_insert_ID,
|
|
3743
|
+
element_idx=element_idx,
|
|
3744
|
+
)
|
|
3745
|
+
if not running:
|
|
3746
|
+
raise ValueError("Specified run is not running.")
|
|
3747
|
+
|
|
3748
|
+
elif len(running) > 1:
|
|
3749
|
+
if element_idx is None:
|
|
3750
|
+
elem_idx = tuple(ear.element.index for ear in running)
|
|
3751
|
+
raise ValueError(
|
|
3752
|
+
f"Multiple elements are running (indices: {elem_idx!r}). Specify "
|
|
3753
|
+
"which element index you want to abort."
|
|
3754
|
+
)
|
|
3755
|
+
else:
|
|
3756
|
+
raise RuntimeError("Multiple running runs.")
|
|
3757
|
+
|
|
3758
|
+
run = running[0]
|
|
3759
|
+
if not run.action.abortable:
|
|
3760
|
+
raise RunNotAbortableError()
|
|
3761
|
+
self._abort_run(run)
|
|
3762
|
+
|
|
3763
|
+
@TimeIt.decorator
|
|
3764
|
+
def cancel(self, status: bool = True):
|
|
3765
|
+
"""Cancel any running jobscripts."""
|
|
3766
|
+
status_msg = f"Cancelling jobscripts of workflow {self.path!r}"
|
|
3767
|
+
# Type hint for mypy
|
|
3768
|
+
status_context: AbstractContextManager[Status] | AbstractContextManager[None] = (
|
|
3769
|
+
rich.console.Console().status(status_msg) if status else nullcontext()
|
|
3770
|
+
)
|
|
3771
|
+
with status_context as status_, self._store.cached_load():
|
|
3772
|
+
for sub in self.submissions:
|
|
3773
|
+
sub.cancel()
|
|
3774
|
+
|
|
3775
|
+
def add_submission(
|
|
3776
|
+
self,
|
|
3777
|
+
tasks: list[int] | None = None,
|
|
3778
|
+
JS_parallelism: bool | Literal["direct", "scheduled"] | None = None,
|
|
3779
|
+
force_array: bool = False,
|
|
3780
|
+
status: bool = True,
|
|
3781
|
+
) -> Submission | None:
|
|
3782
|
+
"""Add a new submission.
|
|
3783
|
+
|
|
3784
|
+
Parameters
|
|
3785
|
+
----------
|
|
3786
|
+
force_array
|
|
3787
|
+
Used to force the use of job arrays, even if the scheduler does not support
|
|
3788
|
+
it. This is provided for testing purposes only.
|
|
3789
|
+
"""
|
|
3790
|
+
# JS_parallelism=None means guess
|
|
3791
|
+
# Type hint for mypy
|
|
3792
|
+
status_context: AbstractContextManager[Status] | AbstractContextManager[None] = (
|
|
3793
|
+
rich.console.Console().status("") if status else nullcontext()
|
|
3794
|
+
)
|
|
3795
|
+
with status_context as status_, self._store.cached_load(), self.batch_update():
|
|
3796
|
+
return self._add_submission(tasks, JS_parallelism, force_array, status_)
|
|
3797
|
+
|
|
3798
|
+
@TimeIt.decorator
|
|
3799
|
+
@load_workflow_config
|
|
3800
|
+
def _add_submission(
|
|
3801
|
+
self,
|
|
3802
|
+
tasks: Sequence[int] | None = None,
|
|
3803
|
+
JS_parallelism: bool | Literal["direct", "scheduled"] | None = None,
|
|
3804
|
+
force_array: bool = False,
|
|
3805
|
+
status: Status | None = None,
|
|
3806
|
+
) -> Submission | None:
|
|
3807
|
+
"""Add a new submission.
|
|
3808
|
+
|
|
3809
|
+
Parameters
|
|
3810
|
+
----------
|
|
3811
|
+
force_array
|
|
3812
|
+
Used to force the use of job arrays, even if the scheduler does not support
|
|
3813
|
+
it. This is provided for testing purposes only.
|
|
3814
|
+
"""
|
|
3815
|
+
new_idx = self.num_submissions
|
|
3816
|
+
_ = self.submissions # TODO: just to ensure `submissions` is loaded
|
|
3817
|
+
if status:
|
|
3818
|
+
status.update("Adding new submission: resolving jobscripts...")
|
|
3819
|
+
|
|
3820
|
+
with self._store.cache_ctx():
|
|
3821
|
+
cache = ObjectCache.build(self, elements=True, iterations=True, runs=True)
|
|
3822
|
+
|
|
3823
|
+
sub_obj: Submission = self._app.Submission(
|
|
3824
|
+
index=new_idx,
|
|
3825
|
+
workflow=self,
|
|
3826
|
+
jobscripts=self.resolve_jobscripts(cache, tasks, force_array),
|
|
3827
|
+
JS_parallelism=JS_parallelism,
|
|
3828
|
+
)
|
|
3829
|
+
if status:
|
|
3830
|
+
status.update("Adding new submission: setting environments...")
|
|
3831
|
+
sub_obj._set_environments()
|
|
3832
|
+
all_EAR_ID = sub_obj.all_EAR_IDs
|
|
3833
|
+
if not all_EAR_ID:
|
|
3834
|
+
print(
|
|
3835
|
+
"There are no pending element action runs, so a new submission was not "
|
|
3836
|
+
"added."
|
|
3837
|
+
)
|
|
3838
|
+
return None
|
|
3839
|
+
|
|
3840
|
+
if status:
|
|
3841
|
+
status.update("Adding new submission: making artifact directories...")
|
|
3842
|
+
|
|
3843
|
+
# TODO: a submission should only be "submitted" once shouldn't it?
|
|
3844
|
+
# no; there could be an IO error (e.g. internet connectivity), so might
|
|
3845
|
+
# need to be able to reattempt submission of outstanding jobscripts.
|
|
3846
|
+
self.submissions_path.mkdir(exist_ok=True, parents=True)
|
|
3847
|
+
sub_obj.path.mkdir(exist_ok=True)
|
|
3848
|
+
sub_obj.tmp_path.mkdir(exist_ok=True)
|
|
3849
|
+
sub_obj.app_std_path.mkdir(exist_ok=True)
|
|
3850
|
+
sub_obj.js_path.mkdir(exist_ok=True) # for jobscripts
|
|
3851
|
+
sub_obj.js_std_path.mkdir(exist_ok=True) # for stdout/err stream files
|
|
3852
|
+
sub_obj.js_funcs_path.mkdir(exist_ok=True)
|
|
3853
|
+
sub_obj.js_run_ids_path.mkdir(exist_ok=True)
|
|
3854
|
+
sub_obj.scripts_path.mkdir(exist_ok=True)
|
|
3855
|
+
sub_obj.commands_path.mkdir(exist_ok=True)
|
|
3856
|
+
|
|
3857
|
+
if sub_obj.needs_app_log_dir:
|
|
3858
|
+
sub_obj.app_log_path.mkdir(exist_ok=True)
|
|
3859
|
+
|
|
3860
|
+
if sub_obj.needs_win_pids_dir:
|
|
3861
|
+
sub_obj.js_win_pids_path.mkdir(exist_ok=True)
|
|
3862
|
+
|
|
3863
|
+
if sub_obj.needs_script_indices_dir:
|
|
3864
|
+
sub_obj.js_script_indices_path.mkdir(exist_ok=True)
|
|
3865
|
+
|
|
3866
|
+
if status:
|
|
3867
|
+
status.update("Adding new submission: writing scripts and command files...")
|
|
3868
|
+
|
|
3869
|
+
# write scripts and command files where possible to the submission directory:
|
|
3870
|
+
cmd_file_IDs, run_indices, run_inp_files = sub_obj._write_scripts(cache, status)
|
|
3871
|
+
|
|
3872
|
+
sub_obj._write_execute_dirs(run_indices, run_inp_files, cache, status)
|
|
3873
|
+
|
|
3874
|
+
if status:
|
|
3875
|
+
status.update("Adding new submission: updating the store...")
|
|
3876
|
+
|
|
3877
|
+
with self._store.cached_load(), self.batch_update():
|
|
3878
|
+
for id_ in all_EAR_ID:
|
|
3879
|
+
self._store.set_run_submission_data(
|
|
3880
|
+
EAR_ID=id_,
|
|
3881
|
+
cmds_ID=cmd_file_IDs[id_],
|
|
3882
|
+
sub_idx=new_idx,
|
|
3883
|
+
)
|
|
3884
|
+
|
|
3885
|
+
sub_obj._ensure_JS_parallelism_set()
|
|
3886
|
+
sub_obj_js, _ = sub_obj.to_json_like()
|
|
3887
|
+
assert self._submissions is not None
|
|
3888
|
+
self._submissions.append(sub_obj)
|
|
3889
|
+
self._pending["submissions"].append(new_idx)
|
|
3890
|
+
with self._store.cached_load(), self.batch_update():
|
|
3891
|
+
self._store.add_submission(new_idx, cast("Mapping[str, JSONed]", sub_obj_js))
|
|
3892
|
+
|
|
3893
|
+
return self.submissions[new_idx]
|
|
3894
|
+
|
|
3895
|
+
@TimeIt.decorator
|
|
3896
|
+
def resolve_jobscripts(
|
|
3897
|
+
self,
|
|
3898
|
+
cache: ObjectCache,
|
|
3899
|
+
tasks: Sequence[int] | None = None,
|
|
3900
|
+
force_array: bool = False,
|
|
3901
|
+
) -> list[Jobscript]:
|
|
3902
|
+
"""
|
|
3903
|
+
Resolve this workflow to a set of jobscripts to run for a new submission.
|
|
3904
|
+
|
|
3905
|
+
Parameters
|
|
3906
|
+
----------
|
|
3907
|
+
force_array
|
|
3908
|
+
Used to force the use of job arrays, even if the scheduler does not support
|
|
3909
|
+
it. This is provided for testing purposes only.
|
|
3910
|
+
|
|
3911
|
+
"""
|
|
3912
|
+
with self._app.config.cached_config():
|
|
3913
|
+
with self.cached_merged_parameters(), self._store.cache_ctx():
|
|
3914
|
+
js, element_deps = self._resolve_singular_jobscripts(
|
|
3915
|
+
cache, tasks, force_array
|
|
3916
|
+
)
|
|
3917
|
+
|
|
3918
|
+
js_deps = resolve_jobscript_dependencies(js, element_deps)
|
|
3919
|
+
|
|
3920
|
+
for js_idx, jsca in js.items():
|
|
3921
|
+
if js_idx in js_deps:
|
|
3922
|
+
jsca["dependencies"] = js_deps[js_idx] # type: ignore
|
|
3923
|
+
|
|
3924
|
+
js = merge_jobscripts_across_tasks(js)
|
|
3925
|
+
|
|
3926
|
+
# for direct or (non-array scheduled), combine into jobscripts of multiple
|
|
3927
|
+
# blocks for dependent jobscripts that have the same resource hashes
|
|
3928
|
+
js_ = resolve_jobscript_blocks(js)
|
|
3929
|
+
|
|
3930
|
+
return [self._app.Jobscript(**i, index=idx) for idx, i in enumerate(js_)]
|
|
3931
|
+
|
|
3932
|
+
def __EAR_obj_map(
|
|
3933
|
+
self,
|
|
3934
|
+
js_desc: JobScriptDescriptor,
|
|
3935
|
+
jsca: JobScriptCreationArguments,
|
|
3936
|
+
task: WorkflowTask,
|
|
3937
|
+
task_actions: Sequence[tuple[int, int, int]],
|
|
3938
|
+
EAR_map: NDArray,
|
|
3939
|
+
cache: ObjectCache,
|
|
3940
|
+
) -> Mapping[int, ElementActionRun]:
|
|
3941
|
+
assert cache.runs is not None
|
|
3942
|
+
all_EAR_IDs: list[int] = []
|
|
3943
|
+
for js_elem_idx, (elem_idx, act_indices) in enumerate(
|
|
3944
|
+
js_desc["elements"].items()
|
|
3945
|
+
):
|
|
3946
|
+
for act_idx in act_indices:
|
|
3947
|
+
EAR_ID_i: int = EAR_map[act_idx, elem_idx].item()
|
|
3948
|
+
all_EAR_IDs.append(EAR_ID_i)
|
|
3949
|
+
js_act_idx = task_actions.index((task.insert_ID, act_idx, 0))
|
|
3950
|
+
jsca["EAR_ID"][js_act_idx][js_elem_idx] = EAR_ID_i
|
|
3951
|
+
return dict(zip(all_EAR_IDs, (cache.runs[i] for i in all_EAR_IDs)))
|
|
3952
|
+
|
|
3953
|
+
@TimeIt.decorator
|
|
3954
|
+
def _resolve_singular_jobscripts(
|
|
3955
|
+
self,
|
|
3956
|
+
cache: ObjectCache,
|
|
3957
|
+
tasks: Sequence[int] | None = None,
|
|
3958
|
+
force_array: bool = False,
|
|
3959
|
+
) -> tuple[
|
|
3960
|
+
Mapping[int, JobScriptCreationArguments],
|
|
3961
|
+
Mapping[int, Mapping[int, Sequence[int]]],
|
|
3962
|
+
]:
|
|
3963
|
+
"""
|
|
3964
|
+
We arrange EARs into `EARs` and `elements` so we can quickly look up membership
|
|
3965
|
+
by EAR idx in the `EARs` dict.
|
|
3966
|
+
|
|
3967
|
+
Parameters
|
|
3968
|
+
----------
|
|
3969
|
+
force_array
|
|
3970
|
+
Used to force the use of job arrays, even if the scheduler does not support
|
|
3971
|
+
it. This is provided for testing purposes only.
|
|
3972
|
+
|
|
3973
|
+
Returns
|
|
3974
|
+
-------
|
|
3975
|
+
submission_jobscripts
|
|
3976
|
+
Information for making each jobscript.
|
|
3977
|
+
all_element_deps
|
|
3978
|
+
For a given jobscript index, for a given jobscript element index within that
|
|
3979
|
+
jobscript, this is a list of EAR IDs dependencies of that element.
|
|
3980
|
+
"""
|
|
3981
|
+
task_set = frozenset(tasks if tasks else range(self.num_tasks))
|
|
3982
|
+
|
|
3983
|
+
if self._store.use_cache:
|
|
3984
|
+
# pre-cache parameter sources (used in `EAR.get_EAR_dependencies`):
|
|
3985
|
+
# note: this cache is unrelated to the `cache` argument
|
|
3986
|
+
self.get_all_parameter_sources()
|
|
3987
|
+
|
|
3988
|
+
submission_jobscripts: dict[int, JobScriptCreationArguments] = {}
|
|
3989
|
+
all_element_deps: dict[int, dict[int, list[int]]] = {}
|
|
3990
|
+
|
|
3991
|
+
for task_iID, loop_idx_i in self.get_iteration_task_pathway():
|
|
3992
|
+
task = self.tasks.get(insert_ID=task_iID)
|
|
3993
|
+
if task.index not in task_set:
|
|
3994
|
+
continue
|
|
3995
|
+
res, res_hash, res_map, EAR_map = generate_EAR_resource_map(
|
|
3996
|
+
task, loop_idx_i, cache
|
|
3997
|
+
)
|
|
3998
|
+
jobscripts, _ = group_resource_map_into_jobscripts(res_map)
|
|
3999
|
+
|
|
4000
|
+
for js_dat in jobscripts:
|
|
4001
|
+
# (insert ID, action_idx, index into task_loop_idx):
|
|
4002
|
+
task_actions = sorted(
|
|
4003
|
+
set(
|
|
4004
|
+
(task.insert_ID, act_idx_i, 0)
|
|
4005
|
+
for act_idx in js_dat["elements"].values()
|
|
4006
|
+
for act_idx_i in act_idx
|
|
4007
|
+
),
|
|
4008
|
+
key=lambda x: x[1],
|
|
4009
|
+
)
|
|
4010
|
+
# Invert the mapping
|
|
4011
|
+
task_actions_inv = {k: idx for idx, k in enumerate(task_actions)}
|
|
4012
|
+
# task_elements: { JS_ELEM_IDX: [TASK_ELEM_IDX for each task insert ID]}
|
|
4013
|
+
task_elements = {
|
|
4014
|
+
js_elem_idx: [task_elem_idx]
|
|
4015
|
+
for js_elem_idx, task_elem_idx in enumerate(js_dat["elements"])
|
|
4016
|
+
}
|
|
4017
|
+
EAR_idx_arr_shape = (
|
|
4018
|
+
len(task_actions),
|
|
4019
|
+
len(js_dat["elements"]),
|
|
4020
|
+
)
|
|
4021
|
+
EAR_ID_arr = np.empty(EAR_idx_arr_shape, dtype=np.int32)
|
|
4022
|
+
EAR_ID_arr[:] = -1
|
|
4023
|
+
|
|
4024
|
+
new_js_idx = len(submission_jobscripts)
|
|
4025
|
+
|
|
4026
|
+
is_array = force_array or is_jobscript_array(
|
|
4027
|
+
res[js_dat["resources"]],
|
|
4028
|
+
EAR_ID_arr.shape[1],
|
|
4029
|
+
self._store,
|
|
4030
|
+
)
|
|
4031
|
+
js_i: JobScriptCreationArguments = {
|
|
4032
|
+
"task_insert_IDs": [task.insert_ID],
|
|
4033
|
+
"task_loop_idx": [loop_idx_i],
|
|
4034
|
+
"task_actions": task_actions, # map jobscript actions to task actions
|
|
4035
|
+
"task_elements": task_elements, # map jobscript elements to task elements
|
|
4036
|
+
"EAR_ID": EAR_ID_arr,
|
|
4037
|
+
"resources": res[js_dat["resources"]],
|
|
4038
|
+
"resource_hash": res_hash[js_dat["resources"]],
|
|
4039
|
+
"dependencies": {},
|
|
4040
|
+
"is_array": is_array,
|
|
4041
|
+
}
|
|
4042
|
+
|
|
4043
|
+
all_EAR_objs = self.__EAR_obj_map(
|
|
4044
|
+
js_dat, js_i, task, task_actions, EAR_map, cache
|
|
4045
|
+
)
|
|
4046
|
+
|
|
4047
|
+
for js_elem_idx, (elem_idx, act_indices) in enumerate(
|
|
4048
|
+
js_dat["elements"].items()
|
|
4049
|
+
):
|
|
4050
|
+
all_EAR_IDs: list[int] = []
|
|
4051
|
+
for act_idx in act_indices:
|
|
4052
|
+
EAR_ID_i: int = EAR_map[act_idx, elem_idx].item()
|
|
4053
|
+
all_EAR_IDs.append(EAR_ID_i)
|
|
4054
|
+
js_act_idx = task_actions_inv[task.insert_ID, act_idx, 0]
|
|
4055
|
+
EAR_ID_arr[js_act_idx][js_elem_idx] = EAR_ID_i
|
|
4056
|
+
|
|
4057
|
+
# get indices of EARs that this element depends on:
|
|
4058
|
+
EAR_deps_EAR_idx = [
|
|
4059
|
+
dep_ear_id
|
|
4060
|
+
for main_ear_id in all_EAR_IDs
|
|
4061
|
+
for dep_ear_id in all_EAR_objs[main_ear_id].get_EAR_dependencies()
|
|
4062
|
+
if dep_ear_id not in EAR_ID_arr
|
|
4063
|
+
]
|
|
4064
|
+
if EAR_deps_EAR_idx:
|
|
4065
|
+
all_element_deps.setdefault(new_js_idx, {})[
|
|
4066
|
+
js_elem_idx
|
|
4067
|
+
] = EAR_deps_EAR_idx
|
|
4068
|
+
|
|
4069
|
+
submission_jobscripts[new_js_idx] = js_i
|
|
4070
|
+
|
|
4071
|
+
return submission_jobscripts, all_element_deps
|
|
4072
|
+
|
|
4073
|
+
@load_workflow_config
|
|
4074
|
+
def execute_run(
|
|
4075
|
+
self,
|
|
4076
|
+
submission_idx: int,
|
|
4077
|
+
block_act_key: BlockActionKey,
|
|
4078
|
+
run_ID: int,
|
|
4079
|
+
) -> None:
|
|
4080
|
+
"""Execute commands of a run via a subprocess."""
|
|
4081
|
+
|
|
4082
|
+
# CD to submission tmp dir to ensure std streams and exceptions have somewhere
|
|
4083
|
+
# sensible to go:
|
|
4084
|
+
os.chdir(Submission.get_tmp_path(self.submissions_path, submission_idx))
|
|
4085
|
+
|
|
4086
|
+
sub_str_path = Submission.get_app_std_path(self.submissions_path, submission_idx)
|
|
4087
|
+
run_std_path = sub_str_path / f"{str(run_ID)}.txt" # TODO: refactor
|
|
4088
|
+
has_commands = False
|
|
4089
|
+
|
|
4090
|
+
# redirect (as much as possible) app-generated stdout/err to a dedicated file:
|
|
4091
|
+
with redirect_std_to_file(run_std_path):
|
|
4092
|
+
with self._store.cached_load():
|
|
4093
|
+
js_idx = cast("int", block_act_key[0])
|
|
4094
|
+
run = self.get_EARs_from_IDs([run_ID])[0]
|
|
4095
|
+
run_dir = None
|
|
4096
|
+
if run.action.requires_dir:
|
|
4097
|
+
run_dir = run.get_directory()
|
|
4098
|
+
assert run_dir
|
|
4099
|
+
self._app.submission_logger.debug(
|
|
4100
|
+
f"changing directory to run execution directory: {run_dir}."
|
|
4101
|
+
)
|
|
4102
|
+
os.chdir(run_dir)
|
|
4103
|
+
self._app.submission_logger.debug(f"{run.skip=}; {run.skip_reason=}")
|
|
4104
|
+
|
|
4105
|
+
# check if we should skip:
|
|
4106
|
+
if not run.skip:
|
|
4107
|
+
|
|
4108
|
+
try:
|
|
4109
|
+
with run.raise_on_failure_threshold() as unset_params:
|
|
4110
|
+
if run.action.script:
|
|
4111
|
+
run.write_script_data_in_files(block_act_key)
|
|
4112
|
+
if run.action.has_program:
|
|
4113
|
+
run.write_program_data_in_files(block_act_key)
|
|
4114
|
+
|
|
4115
|
+
# write the command file that will be executed:
|
|
4116
|
+
cmd_file_path = self.ensure_commands_file(
|
|
4117
|
+
submission_idx, js_idx, run
|
|
4118
|
+
)
|
|
4119
|
+
|
|
4120
|
+
except UnsetParameterDataErrorBase:
|
|
4121
|
+
# not all required parameter data is set, so fail this run:
|
|
4122
|
+
self._app.submission_logger.debug(
|
|
4123
|
+
f"unset parameter threshold satisfied (or any unset "
|
|
4124
|
+
f"parameters found when trying to write commands file), so "
|
|
4125
|
+
f"not attempting run. unset_params={unset_params!r}."
|
|
4126
|
+
)
|
|
4127
|
+
self.set_EAR_start(run_ID, run_dir, port_number=None)
|
|
4128
|
+
self._check_loop_termination(run) # not sure if this is required
|
|
4129
|
+
self.set_EAR_end(
|
|
4130
|
+
block_act_key=block_act_key,
|
|
4131
|
+
run=run,
|
|
4132
|
+
exit_code=1,
|
|
4133
|
+
)
|
|
4134
|
+
return
|
|
4135
|
+
|
|
4136
|
+
# sufficient parameter data is set so far, but need to pass `unset_params`
|
|
4137
|
+
# on as an environment variable so it can be appended to and failure
|
|
4138
|
+
# thresholds can be rechecked if necessary (i.e. in a Python script
|
|
4139
|
+
# where we also load input parameters "directly")
|
|
4140
|
+
if unset_params:
|
|
4141
|
+
self._app.submission_logger.debug(
|
|
4142
|
+
f"some unset parameters found, but no unset-thresholds met: "
|
|
4143
|
+
f"unset_params={unset_params!r}."
|
|
4144
|
+
)
|
|
4145
|
+
|
|
4146
|
+
# TODO: pass on unset_params to script as environment variable
|
|
4147
|
+
|
|
4148
|
+
if run.action.jinja_template_or_template_path:
|
|
4149
|
+
# TODO: write Jinja templates in shared submissions directory
|
|
4150
|
+
run.write_jinja_template()
|
|
4151
|
+
|
|
4152
|
+
if has_commands := bool(cmd_file_path):
|
|
4153
|
+
|
|
4154
|
+
assert isinstance(cmd_file_path, Path)
|
|
4155
|
+
if not cmd_file_path.is_file():
|
|
4156
|
+
raise RuntimeError(
|
|
4157
|
+
f"Command file {cmd_file_path!r} does not exist."
|
|
4158
|
+
)
|
|
4159
|
+
# prepare subprocess command:
|
|
4160
|
+
jobscript = self.submissions[submission_idx].jobscripts[js_idx]
|
|
4161
|
+
cmd = jobscript.shell.get_command_file_launch_command(
|
|
4162
|
+
str(cmd_file_path)
|
|
4163
|
+
)
|
|
4164
|
+
loop_idx_str = ";".join(
|
|
4165
|
+
f"{k}={v}" for k, v in run.element_iteration.loop_idx.items()
|
|
4166
|
+
)
|
|
4167
|
+
app_caps = self._app.package_name.upper()
|
|
4168
|
+
|
|
4169
|
+
# TODO: make these optionally set (more difficult to set in combine_script,
|
|
4170
|
+
# so have the option to turn off) [default ON]
|
|
4171
|
+
add_env = {
|
|
4172
|
+
f"{app_caps}_RUN_ID": str(run_ID),
|
|
4173
|
+
f"{app_caps}_RUN_IDX": str(run.index),
|
|
4174
|
+
f"{app_caps}_ELEMENT_IDX": str(run.element.index),
|
|
4175
|
+
f"{app_caps}_ELEMENT_ID": str(run.element.id_),
|
|
4176
|
+
f"{app_caps}_ELEMENT_ITER_IDX": str(
|
|
4177
|
+
run.element_iteration.index
|
|
4178
|
+
),
|
|
4179
|
+
f"{app_caps}_ELEMENT_ITER_ID": str(run.element_iteration.id_),
|
|
4180
|
+
f"{app_caps}_ELEMENT_ITER_LOOP_IDX": loop_idx_str,
|
|
4181
|
+
}
|
|
4182
|
+
|
|
4183
|
+
if run.action.script:
|
|
4184
|
+
if run.is_snippet_script:
|
|
4185
|
+
script_artifact_name = run.get_script_artifact_name()
|
|
4186
|
+
script_dir = Path(
|
|
4187
|
+
os.environ[f"{app_caps}_SUB_SCRIPTS_DIR"]
|
|
4188
|
+
)
|
|
4189
|
+
script_name = script_artifact_name
|
|
4190
|
+
else:
|
|
4191
|
+
# not a snippet script; expect the script in the run execute
|
|
4192
|
+
# directory (i.e. created by a previous action)
|
|
4193
|
+
script_dir = Path.cwd()
|
|
4194
|
+
script_name = run.action.script
|
|
4195
|
+
script_name_no_ext = Path(script_name).stem
|
|
4196
|
+
add_env.update(
|
|
4197
|
+
{
|
|
4198
|
+
f"{app_caps}_RUN_SCRIPT_NAME": script_name,
|
|
4199
|
+
f"{app_caps}_RUN_SCRIPT_NAME_NO_EXT": script_name_no_ext,
|
|
4200
|
+
f"{app_caps}_RUN_SCRIPT_DIR": str(script_dir),
|
|
4201
|
+
f"{app_caps}_RUN_SCRIPT_PATH": str(
|
|
4202
|
+
script_dir / script_name
|
|
4203
|
+
),
|
|
4204
|
+
}
|
|
4205
|
+
)
|
|
4206
|
+
if program_path := run.program_path_actual:
|
|
4207
|
+
program_dir = program_path.parent
|
|
4208
|
+
program_name = program_path.name
|
|
4209
|
+
program_name_no_ext = program_path.stem
|
|
4210
|
+
add_env.update(
|
|
4211
|
+
{
|
|
4212
|
+
f"{app_caps}_RUN_PROGRAM_NAME": program_name,
|
|
4213
|
+
f"{app_caps}_RUN_PROGRAM_NAME_NO_EXT": program_name_no_ext,
|
|
4214
|
+
f"{app_caps}_RUN_PROGRAM_DIR": str(program_dir),
|
|
4215
|
+
f"{app_caps}_RUN_PROGRAM_PATH": str(program_path),
|
|
4216
|
+
}
|
|
4217
|
+
)
|
|
4218
|
+
|
|
4219
|
+
env = {**dict(os.environ), **add_env}
|
|
4220
|
+
|
|
4221
|
+
self._app.submission_logger.debug(
|
|
4222
|
+
f"Executing run commands via subprocess with command {cmd!r}, and "
|
|
4223
|
+
f"environment variables as below."
|
|
4224
|
+
)
|
|
4225
|
+
for k, v in env.items():
|
|
4226
|
+
if k.startswith(app_caps):
|
|
4227
|
+
self._app.submission_logger.debug(f"{k} = {v!r}")
|
|
4228
|
+
exe = self._app.Executor(cmd, env, self._app.package_name)
|
|
4229
|
+
port = (
|
|
4230
|
+
exe.start_zmq_server()
|
|
4231
|
+
) # start the server so we know the port
|
|
4232
|
+
|
|
4233
|
+
try:
|
|
4234
|
+
self.set_EAR_start(run_ID, run_dir, port)
|
|
4235
|
+
except:
|
|
4236
|
+
self._app.submission_logger.error(f"Failed to set run start.")
|
|
4237
|
+
exe.stop_zmq_server()
|
|
4238
|
+
raise
|
|
4239
|
+
|
|
4240
|
+
# this subprocess may include commands that redirect to the std_stream file (e.g.
|
|
4241
|
+
# calling the app to save a parameter from a shell command output):
|
|
4242
|
+
if not run.skip and has_commands:
|
|
4243
|
+
ret_code = exe.run() # this also shuts down the server
|
|
4244
|
+
|
|
4245
|
+
# redirect (as much as possible) app-generated stdout/err to a dedicated file:
|
|
4246
|
+
with redirect_std_to_file(run_std_path):
|
|
4247
|
+
if run.skip:
|
|
4248
|
+
ret_code = SKIPPED_EXIT_CODE
|
|
4249
|
+
elif not (has_commands or run.action.jinja_template):
|
|
4250
|
+
ret_code = NO_COMMANDS_EXIT_CODE
|
|
4251
|
+
elif run.action.jinja_template:
|
|
4252
|
+
ret_code = 0
|
|
4253
|
+
else:
|
|
4254
|
+
self._check_loop_termination(run)
|
|
4255
|
+
|
|
4256
|
+
# set run end:
|
|
4257
|
+
self.set_EAR_end(
|
|
4258
|
+
block_act_key=block_act_key,
|
|
4259
|
+
run=run,
|
|
4260
|
+
exit_code=ret_code,
|
|
4261
|
+
)
|
|
4262
|
+
|
|
4263
|
+
def _check_loop_termination(self, run: ElementActionRun) -> set[int]:
|
|
4264
|
+
"""Check if we need to terminate a loop if this is the last action of the loop
|
|
4265
|
+
iteration for this element, and set downstream iteration runs to skip."""
|
|
4266
|
+
|
|
4267
|
+
elem_iter = run.element_iteration
|
|
4268
|
+
task = elem_iter.task
|
|
4269
|
+
check_loops = []
|
|
4270
|
+
to_skip = set()
|
|
4271
|
+
for loop_name in elem_iter.loop_idx:
|
|
4272
|
+
self._app.logger.info(f"checking loop termination of loop {loop_name!r}.")
|
|
4273
|
+
loop = self.loops.get(loop_name)
|
|
4274
|
+
if (
|
|
4275
|
+
loop.template.termination
|
|
4276
|
+
and task.insert_ID == loop.template.termination_task_insert_ID
|
|
4277
|
+
and run.element_action.action_idx == max(elem_iter.actions)
|
|
4278
|
+
):
|
|
4279
|
+
check_loops.append(loop_name)
|
|
4280
|
+
# TODO: test with condition actions
|
|
4281
|
+
if loop.test_termination(elem_iter):
|
|
4282
|
+
self._app.logger.info(
|
|
4283
|
+
f"loop {loop_name!r} termination condition met for run "
|
|
4284
|
+
f"ID {run.id_!r}."
|
|
4285
|
+
)
|
|
4286
|
+
to_skip.update(loop.skip_downstream_iterations(elem_iter))
|
|
4287
|
+
return to_skip
|
|
4288
|
+
|
|
4289
|
+
@load_workflow_config
|
|
4290
|
+
def execute_combined_runs(self, submission_idx: int, jobscript_idx: int) -> None:
|
|
4291
|
+
"""Execute a combined script (multiple runs) via a subprocess."""
|
|
4292
|
+
|
|
4293
|
+
# CD to submission tmp dir to ensure std streams and exceptions have somewhere
|
|
4294
|
+
# sensible to go:
|
|
4295
|
+
os.chdir(Submission.get_tmp_path(self.submissions_path, submission_idx))
|
|
4296
|
+
|
|
4297
|
+
sub = self.submissions[submission_idx]
|
|
4298
|
+
js = sub.jobscripts[jobscript_idx]
|
|
4299
|
+
|
|
4300
|
+
app_caps = self._app.package_name.upper()
|
|
4301
|
+
script_dir = Path(os.environ[f"{app_caps}_SUB_SCRIPTS_DIR"])
|
|
4302
|
+
script_name = f"js_{jobscript_idx}.py" # TODO: refactor script name
|
|
4303
|
+
script_path = script_dir / script_name
|
|
4304
|
+
|
|
4305
|
+
add_env = {
|
|
4306
|
+
f"{app_caps}_RUN_SCRIPT_NAME": script_name,
|
|
4307
|
+
f"{app_caps}_RUN_SCRIPT_NAME_NO_EXT": script_path.stem,
|
|
4308
|
+
f"{app_caps}_RUN_SCRIPT_DIR": str(script_dir),
|
|
4309
|
+
f"{app_caps}_RUN_SCRIPT_PATH": str(script_path),
|
|
4310
|
+
f"{app_caps}_SCRIPT_INDICES_FILE": str(js.combined_script_indices_file_path),
|
|
4311
|
+
}
|
|
4312
|
+
env = {**dict(os.environ), **add_env}
|
|
4313
|
+
|
|
4314
|
+
# note: unlike in `Workflow.execute_run`, here we can be reasonably sure the
|
|
4315
|
+
# commands file already exists, because we call `Action.try_write_commands` with
|
|
4316
|
+
# `raise_on_unset=True` in `Workflow._add_submission` during submission.
|
|
4317
|
+
|
|
4318
|
+
# TODO: refactor cmd file name:
|
|
4319
|
+
cmd_file_path = sub.commands_path / f"js_{jobscript_idx}{js.shell.JS_EXT}"
|
|
4320
|
+
cmd = js.shell.get_command_file_launch_command(str(cmd_file_path))
|
|
4321
|
+
|
|
4322
|
+
self._app.submission_logger.debug(
|
|
4323
|
+
f"Executing combined runs via subprocess with command {cmd!r}, and "
|
|
4324
|
+
f"environment variables as below."
|
|
4325
|
+
)
|
|
4326
|
+
for k, v in env.items():
|
|
4327
|
+
if k.startswith(app_caps):
|
|
4328
|
+
self._app.submission_logger.debug(f"{k} = {v}")
|
|
4329
|
+
|
|
4330
|
+
exe = self._app.Executor(cmd, env, self._app.package_name)
|
|
4331
|
+
exe.start_zmq_server() # start the server
|
|
4332
|
+
exe.run() # this also shuts down the server
|
|
4333
|
+
|
|
4334
|
+
def ensure_commands_file(
|
|
4335
|
+
self,
|
|
4336
|
+
submission_idx: int,
|
|
4337
|
+
js_idx: int,
|
|
4338
|
+
run: ElementActionRun,
|
|
4339
|
+
) -> Path | bool:
|
|
4340
|
+
"""Ensure a commands file exists for the specified run."""
|
|
4341
|
+
self._app.persistence_logger.debug("Workflow.ensure_commands_file")
|
|
4342
|
+
|
|
4343
|
+
if run.commands_file_ID is None:
|
|
4344
|
+
# no commands to write
|
|
4345
|
+
return False
|
|
4346
|
+
|
|
4347
|
+
with self._store.cached_load():
|
|
4348
|
+
sub = self.submissions[submission_idx]
|
|
4349
|
+
jobscript = sub.jobscripts[js_idx]
|
|
4350
|
+
|
|
4351
|
+
# check if a commands file already exists, first checking using the run ID:
|
|
4352
|
+
cmd_file_name = f"{run.id_}{jobscript.shell.JS_EXT}" # TODO: refactor
|
|
4353
|
+
cmd_file_path = jobscript.submission.commands_path / cmd_file_name
|
|
4354
|
+
|
|
4355
|
+
if not cmd_file_path.is_file():
|
|
4356
|
+
# then check for a file from the "root" run ID (the run ID of a run that
|
|
4357
|
+
# shares the same commands file):
|
|
4358
|
+
|
|
4359
|
+
cmd_file_name = (
|
|
4360
|
+
f"{run.commands_file_ID}{jobscript.shell.JS_EXT}" # TODO: refactor
|
|
4361
|
+
)
|
|
4362
|
+
cmd_file_path = jobscript.submission.commands_path / cmd_file_name
|
|
4363
|
+
|
|
4364
|
+
if not cmd_file_path.is_file():
|
|
4365
|
+
# no file available, so write (using the run ID):
|
|
4366
|
+
try:
|
|
4367
|
+
cmd_file_path = run.try_write_commands(
|
|
4368
|
+
jobscript=jobscript,
|
|
4369
|
+
environments=sub.environments,
|
|
4370
|
+
raise_on_unset=True,
|
|
4371
|
+
)
|
|
4372
|
+
except OutputFileParserNoOutputError:
|
|
4373
|
+
# no commands to write, might be used just for saving files
|
|
4374
|
+
return False
|
|
4375
|
+
|
|
4376
|
+
return cmd_file_path
|
|
4377
|
+
|
|
4378
|
+
def process_shell_parameter_output(
|
|
4379
|
+
self, name: str, value: str, EAR_ID: int, cmd_idx: int, stderr: bool = False
|
|
4380
|
+
) -> Any:
|
|
4381
|
+
"""Process the shell stdout/stderr stream according to the associated Command
|
|
4382
|
+
object."""
|
|
4383
|
+
with self._store.cached_load(), self.batch_update():
|
|
4384
|
+
EAR = self.get_EARs_from_IDs(EAR_ID)
|
|
4385
|
+
command = EAR.action.commands[cmd_idx]
|
|
4386
|
+
return command.process_std_stream(name, value, stderr)
|
|
4387
|
+
|
|
4388
|
+
def save_parameter(
|
|
4389
|
+
self,
|
|
4390
|
+
name: str,
|
|
4391
|
+
value: Any,
|
|
4392
|
+
EAR_ID: int,
|
|
4393
|
+
):
|
|
4394
|
+
"""
|
|
4395
|
+
Save a parameter where an EAR can find it.
|
|
4396
|
+
"""
|
|
4397
|
+
self._app.logger.info(f"save parameter {name!r} for EAR_ID {EAR_ID}.")
|
|
4398
|
+
self._app.logger.debug(f"save parameter {name!r} value is {value!r}.")
|
|
4399
|
+
with self._store.cached_load(), self.batch_update():
|
|
4400
|
+
EAR = self.get_EARs_from_IDs(EAR_ID)
|
|
4401
|
+
param_id = EAR.data_idx[name]
|
|
4402
|
+
self.set_parameter_value(param_id, value)
|
|
4403
|
+
|
|
4404
|
+
def show_all_EAR_statuses(self) -> None:
|
|
4405
|
+
"""
|
|
4406
|
+
Print a description of the status of every element action run in
|
|
4407
|
+
the workflow.
|
|
4408
|
+
"""
|
|
4409
|
+
print(
|
|
4410
|
+
f"{'task':8s} {'element':8s} {'iteration':8s} {'action':8s} "
|
|
4411
|
+
f"{'run':8s} {'sub.':8s} {'exitcode':8s} {'success':8s} {'skip':8s}"
|
|
4412
|
+
)
|
|
4413
|
+
for task in self.tasks:
|
|
4414
|
+
for element in task.elements[:]:
|
|
4415
|
+
for iter_idx, iteration in enumerate(element.iterations):
|
|
4416
|
+
for act_idx, action_runs in iteration.actions.items():
|
|
4417
|
+
for run_idx, EAR in enumerate(action_runs.runs):
|
|
4418
|
+
suc = EAR.success if EAR.success is not None else "-"
|
|
4419
|
+
if EAR.exit_code is not None:
|
|
4420
|
+
exc = f"{EAR.exit_code:^8d}"
|
|
4421
|
+
else:
|
|
4422
|
+
exc = f"{'-':^8}"
|
|
4423
|
+
print(
|
|
4424
|
+
f"{task.insert_ID:^8d} {element.index:^8d} "
|
|
4425
|
+
f"{iter_idx:^8d} {act_idx:^8d} {run_idx:^8d} "
|
|
4426
|
+
f"{EAR.status.name.lower():^8s}"
|
|
4427
|
+
f"{exc}"
|
|
4428
|
+
f"{suc:^8}"
|
|
4429
|
+
f"{EAR.skip:^8}"
|
|
4430
|
+
)
|
|
4431
|
+
|
|
4432
|
+
def _resolve_input_source_task_reference(
|
|
4433
|
+
self, input_source: InputSource, new_task_name: str
|
|
4434
|
+
) -> None:
|
|
4435
|
+
"""Normalise the input source task reference and convert a source to a local type
|
|
4436
|
+
if required."""
|
|
4437
|
+
|
|
4438
|
+
# TODO: test thoroughly!
|
|
4439
|
+
|
|
4440
|
+
if isinstance(input_source.task_ref, str):
|
|
4441
|
+
if input_source.task_ref == new_task_name:
|
|
4442
|
+
if input_source.task_source_type is self._app.TaskSourceType.OUTPUT:
|
|
4443
|
+
raise InvalidInputSourceTaskReference(input_source)
|
|
4444
|
+
warn(
|
|
4445
|
+
f"Changing input source {input_source.to_string()!r} to a local "
|
|
4446
|
+
f"type, since the input source task reference refers to its own "
|
|
4447
|
+
f"task."
|
|
4448
|
+
)
|
|
4449
|
+
# TODO: add an InputSource source_type setter to reset
|
|
4450
|
+
# task_ref/source_type?
|
|
4451
|
+
input_source.source_type = self._app.InputSourceType.LOCAL
|
|
4452
|
+
input_source.task_ref = None
|
|
4453
|
+
input_source.task_source_type = None
|
|
4454
|
+
else:
|
|
4455
|
+
try:
|
|
4456
|
+
uniq_names_cur = self.get_task_unique_names(map_to_insert_ID=True)
|
|
4457
|
+
input_source.task_ref = uniq_names_cur[input_source.task_ref]
|
|
4458
|
+
except KeyError:
|
|
4459
|
+
raise InvalidInputSourceTaskReference(
|
|
4460
|
+
input_source, task_ref=input_source.task_ref
|
|
4461
|
+
)
|
|
4462
|
+
|
|
4463
|
+
@TimeIt.decorator
|
|
4464
|
+
def get_all_submission_run_IDs(self) -> Iterable[int]:
|
|
4465
|
+
"""
|
|
4466
|
+
Get the run IDs of all submissions.
|
|
4467
|
+
"""
|
|
4468
|
+
self._app.persistence_logger.debug("Workflow.get_all_submission_run_IDs")
|
|
4469
|
+
for sub in self.submissions:
|
|
4470
|
+
yield from sub.all_EAR_IDs
|
|
4471
|
+
|
|
4472
|
+
def rechunk_runs(
|
|
4473
|
+
self,
|
|
4474
|
+
chunk_size: int | None = None,
|
|
4475
|
+
backup: bool = True,
|
|
4476
|
+
status: bool = True,
|
|
4477
|
+
):
|
|
4478
|
+
"""
|
|
4479
|
+
Reorganise the stored data chunks for EARs to be more efficient.
|
|
4480
|
+
"""
|
|
4481
|
+
self._store.rechunk_runs(chunk_size=chunk_size, backup=backup, status=status)
|
|
4482
|
+
|
|
4483
|
+
def rechunk_parameter_base(
|
|
4484
|
+
self,
|
|
4485
|
+
chunk_size: int | None = None,
|
|
4486
|
+
backup: bool = True,
|
|
4487
|
+
status: bool = True,
|
|
4488
|
+
):
|
|
4489
|
+
"""
|
|
4490
|
+
Reorganise the stored data chunks for parameters to be more efficient.
|
|
4491
|
+
"""
|
|
4492
|
+
self._store.rechunk_parameter_base(
|
|
4493
|
+
chunk_size=chunk_size, backup=backup, status=status
|
|
4494
|
+
)
|
|
4495
|
+
|
|
4496
|
+
def rechunk(
|
|
4497
|
+
self,
|
|
4498
|
+
chunk_size: int | None = None,
|
|
4499
|
+
backup: bool = True,
|
|
4500
|
+
status: bool = True,
|
|
4501
|
+
):
|
|
4502
|
+
"""
|
|
4503
|
+
Rechunk metadata/runs and parameters/base arrays, making them more efficient.
|
|
4504
|
+
"""
|
|
4505
|
+
self.rechunk_runs(chunk_size=chunk_size, backup=backup, status=status)
|
|
4506
|
+
self.rechunk_parameter_base(chunk_size=chunk_size, backup=backup, status=status)
|
|
4507
|
+
|
|
4508
|
+
@TimeIt.decorator
|
|
4509
|
+
def get_run_directories(
|
|
4510
|
+
self,
|
|
4511
|
+
run_ids: list[int] | None = None,
|
|
4512
|
+
dir_indices_arr: np.ndarray | None = None,
|
|
4513
|
+
) -> list[Path | None]:
|
|
4514
|
+
""""""
|
|
4515
|
+
|
|
4516
|
+
@TimeIt.decorator
|
|
4517
|
+
def _get_depth_dirs(
|
|
4518
|
+
item_idx: int,
|
|
4519
|
+
max_per_dir: int,
|
|
4520
|
+
max_depth: int,
|
|
4521
|
+
depth_idx_cache: dict[tuple[int, int], NDArray],
|
|
4522
|
+
prefix: str,
|
|
4523
|
+
) -> list[str]:
|
|
4524
|
+
dirs = []
|
|
4525
|
+
max_avail_items = max_per_dir**max_depth
|
|
4526
|
+
for depth_i in range(1, max_depth):
|
|
4527
|
+
tot_items_per_level = int(max_avail_items / max_per_dir**depth_i)
|
|
4528
|
+
key = (max_avail_items, tot_items_per_level)
|
|
4529
|
+
if (depth_idx := depth_idx_cache.get(key)) is None:
|
|
4530
|
+
depth_idx = np.repeat(
|
|
4531
|
+
np.arange(max_avail_items / tot_items_per_level, dtype=int),
|
|
4532
|
+
tot_items_per_level,
|
|
4533
|
+
)
|
|
4534
|
+
depth_idx_cache[key] = depth_idx
|
|
4535
|
+
idx_i = cast("NDArray", depth_idx)[item_idx]
|
|
4536
|
+
start_idx = idx_i * tot_items_per_level
|
|
4537
|
+
end_idx = start_idx + tot_items_per_level - 1
|
|
4538
|
+
dirs.append(f"{prefix}_{start_idx}-{end_idx}")
|
|
4539
|
+
return dirs
|
|
4540
|
+
|
|
4541
|
+
if dir_indices_arr is None: # TODO: document behaviour!
|
|
4542
|
+
dir_indices_arr = self._store.get_dirs_array()
|
|
4543
|
+
if run_ids is not None:
|
|
4544
|
+
dir_indices_arr = dir_indices_arr[run_ids]
|
|
4545
|
+
|
|
4546
|
+
# TODO: make these configurable so easier to test!
|
|
4547
|
+
MAX_ELEMS_PER_DIR = 1000 # TODO: configurable (add `workflow_defaults` to Config)
|
|
4548
|
+
MAX_ITERS_PER_DIR = 1000
|
|
4549
|
+
|
|
4550
|
+
exec_path = self.execution_path
|
|
4551
|
+
|
|
4552
|
+
# a fill value means no sub directory should be created
|
|
4553
|
+
T_FILL, E_FILL, I_FILL, A_FILL, R_FILL, _, _ = RUN_DIR_ARR_FILL
|
|
4554
|
+
|
|
4555
|
+
depth_idx_cache: dict[tuple[int, int], NDArray] = (
|
|
4556
|
+
{}
|
|
4557
|
+
) # keys are (max_avail, tot_elems_per_dir_level)
|
|
4558
|
+
|
|
4559
|
+
# format run directories:
|
|
4560
|
+
dirs = []
|
|
4561
|
+
for dir_data in dir_indices_arr:
|
|
4562
|
+
|
|
4563
|
+
# TODO: retrieve task,element,iteration,action,run dir formats from
|
|
4564
|
+
# (t_iID, act_idx) combo (cached)?
|
|
4565
|
+
|
|
4566
|
+
t_iID, e_idx, i_idx, _, r_idx, e_depth, i_depth = dir_data
|
|
4567
|
+
path_args = []
|
|
4568
|
+
|
|
4569
|
+
if t_iID != T_FILL:
|
|
4570
|
+
path_args.append(f"t_{t_iID}")
|
|
4571
|
+
|
|
4572
|
+
if e_idx != E_FILL:
|
|
4573
|
+
if e_depth > 1:
|
|
4574
|
+
path_args.extend(
|
|
4575
|
+
_get_depth_dirs(
|
|
4576
|
+
item_idx=e_idx,
|
|
4577
|
+
max_per_dir=MAX_ELEMS_PER_DIR,
|
|
4578
|
+
max_depth=e_depth,
|
|
4579
|
+
depth_idx_cache=depth_idx_cache,
|
|
4580
|
+
prefix="e",
|
|
4581
|
+
)
|
|
4582
|
+
)
|
|
4583
|
+
path_args.append(f"e_{e_idx}")
|
|
4584
|
+
|
|
4585
|
+
if i_idx != I_FILL:
|
|
4586
|
+
if i_depth > 1:
|
|
4587
|
+
path_args.extend(
|
|
4588
|
+
_get_depth_dirs(
|
|
4589
|
+
item_idx=i_idx,
|
|
4590
|
+
max_per_dir=MAX_ITERS_PER_DIR,
|
|
4591
|
+
max_depth=i_depth,
|
|
4592
|
+
depth_idx_cache=depth_idx_cache,
|
|
4593
|
+
prefix="i",
|
|
4594
|
+
)
|
|
4595
|
+
)
|
|
4596
|
+
path_args.append(f"i_{i_idx}")
|
|
4597
|
+
|
|
4598
|
+
if r_idx != R_FILL:
|
|
4599
|
+
path_args.append(f"r_{r_idx}")
|
|
4600
|
+
|
|
4601
|
+
if path_args:
|
|
4602
|
+
run_dir = exec_path.joinpath(*path_args)
|
|
4603
|
+
elif e_depth == 1:
|
|
4604
|
+
run_dir = exec_path
|
|
4605
|
+
else:
|
|
4606
|
+
run_dir = None
|
|
4607
|
+
|
|
4608
|
+
dirs.append(run_dir)
|
|
4609
|
+
|
|
4610
|
+
return dirs
|
|
4611
|
+
|
|
4612
|
+
@TimeIt.decorator
|
|
4613
|
+
def get_scheduler_job_IDs(self) -> tuple[str, ...]:
|
|
4614
|
+
"""Return jobscript scheduler job IDs from all submissions of this workflow."""
|
|
4615
|
+
return tuple(
|
|
4616
|
+
IDs_j for sub_i in self.submissions for IDs_j in sub_i.get_scheduler_job_IDs()
|
|
4617
|
+
)
|
|
4618
|
+
|
|
4619
|
+
@TimeIt.decorator
|
|
4620
|
+
def get_process_IDs(self) -> tuple[int, ...]:
|
|
4621
|
+
"""Return jobscript process IDs from all submissions of this workflow."""
|
|
4622
|
+
return tuple(
|
|
4623
|
+
IDs_j for sub_i in self.submissions for IDs_j in sub_i.get_process_IDs()
|
|
4624
|
+
)
|
|
4625
|
+
|
|
4626
|
+
@TimeIt.decorator
|
|
4627
|
+
def list_jobscripts(
|
|
4628
|
+
self,
|
|
4629
|
+
sub_idx: int = 0,
|
|
4630
|
+
max_js: int | None = None,
|
|
4631
|
+
jobscripts: list[int] | None = None,
|
|
4632
|
+
width: int | None = None,
|
|
4633
|
+
) -> None:
|
|
4634
|
+
"""Print a table listing jobscripts and associated information from the specified
|
|
4635
|
+
submission.
|
|
4636
|
+
|
|
4637
|
+
Parameters
|
|
4638
|
+
----------
|
|
4639
|
+
sub_idx
|
|
4640
|
+
The submission index whose jobscripts are to be displayed.
|
|
4641
|
+
max_js
|
|
4642
|
+
Maximum jobscript index to display. This cannot be specified with `jobscripts`.
|
|
4643
|
+
jobscripts
|
|
4644
|
+
A list of jobscripts to display. This cannot be specified with `max_js`.
|
|
4645
|
+
width
|
|
4646
|
+
Width in characters of the printed table.
|
|
4647
|
+
"""
|
|
4648
|
+
|
|
4649
|
+
with self._store.cached_load():
|
|
4650
|
+
|
|
4651
|
+
if max_js is not None and jobscripts is not None:
|
|
4652
|
+
raise ValueError("Do not specify both `max_js` and `jobscripts`.")
|
|
4653
|
+
|
|
4654
|
+
loop_names = [i.name for i in self.loops][::-1]
|
|
4655
|
+
loop_names_panel: rich.panel.Panel | str = ""
|
|
4656
|
+
if loop_names:
|
|
4657
|
+
loop_names_panel = rich.panel.Panel(
|
|
4658
|
+
"\n".join(f"{idx}: {i}" for idx, i in enumerate(loop_names)),
|
|
4659
|
+
title="[b]Loops[/b]",
|
|
4660
|
+
title_align="left",
|
|
4661
|
+
box=rich.box.SIMPLE,
|
|
4662
|
+
)
|
|
4663
|
+
|
|
4664
|
+
table = rich.table.Table(width=width)
|
|
4665
|
+
|
|
4666
|
+
table.add_column("Jobscript", justify="right", style="cyan", no_wrap=True)
|
|
4667
|
+
table.add_column("Acts, Elms", justify="right", style="green")
|
|
4668
|
+
table.add_column("Deps.", style="orange3")
|
|
4669
|
+
table.add_column("Tasks", overflow="fold")
|
|
4670
|
+
table.add_column("Loops")
|
|
4671
|
+
|
|
4672
|
+
sub_js = self.submissions[sub_idx].jobscripts
|
|
4673
|
+
max_js = max_js if max_js is not None else len(sub_js)
|
|
4674
|
+
for js in sub_js:
|
|
4675
|
+
if jobscripts is not None and js.index not in jobscripts:
|
|
4676
|
+
continue
|
|
4677
|
+
if js.index > max_js:
|
|
4678
|
+
break
|
|
4679
|
+
for blk in js.blocks:
|
|
4680
|
+
blk_task_actions = blk.task_actions
|
|
4681
|
+
num_actions = blk_task_actions.shape[0]
|
|
4682
|
+
|
|
4683
|
+
if blk.index == 0:
|
|
4684
|
+
c1 = f"{js.index} - {blk.index}"
|
|
4685
|
+
else:
|
|
4686
|
+
c1 = f"{blk.index}"
|
|
4687
|
+
c3 = f"{num_actions}, {blk.num_elements}"
|
|
4688
|
+
|
|
4689
|
+
deps = "; ".join(f"{i[0],i[1]}" for i in blk.dependencies)
|
|
4690
|
+
|
|
4691
|
+
for blk_t_idx, t_iID in enumerate(blk.task_insert_IDs):
|
|
4692
|
+
|
|
4693
|
+
# loop indices are the same for all actions within a task, so get the
|
|
4694
|
+
# first `task_action` for this task insert ID:
|
|
4695
|
+
for i in blk_task_actions:
|
|
4696
|
+
if i[0] == t_iID:
|
|
4697
|
+
loop_idx = [
|
|
4698
|
+
blk.task_loop_idx[i[2]].get(loop_name_i, "-")
|
|
4699
|
+
for loop_name_i in loop_names
|
|
4700
|
+
]
|
|
4701
|
+
break
|
|
4702
|
+
|
|
4703
|
+
c2 = self.tasks.get(insert_ID=t_iID).unique_name
|
|
4704
|
+
|
|
4705
|
+
if blk_t_idx > 0:
|
|
4706
|
+
c1 = ""
|
|
4707
|
+
c3 = ""
|
|
4708
|
+
deps = ""
|
|
4709
|
+
|
|
4710
|
+
table.add_row(
|
|
4711
|
+
c1, c3, deps, c2, (" | ".join(f"{i}" for i in loop_idx))
|
|
4712
|
+
)
|
|
4713
|
+
|
|
4714
|
+
table.add_section()
|
|
4715
|
+
|
|
4716
|
+
group = rich.console.Group(
|
|
4717
|
+
rich.text.Text(f"Workflow: {self.name}"),
|
|
4718
|
+
rich.text.Text(f"Submission: {sub_idx}" + ("\n" if loop_names_panel else "")),
|
|
4719
|
+
loop_names_panel,
|
|
4720
|
+
table,
|
|
4721
|
+
)
|
|
4722
|
+
rich_print(group)
|
|
4723
|
+
|
|
4724
|
+
def list_task_jobscripts(
|
|
4725
|
+
self,
|
|
4726
|
+
sub_idx: int = 0,
|
|
4727
|
+
task_names: list[str] | None = None,
|
|
4728
|
+
max_js: int | None = None,
|
|
4729
|
+
width: int | None = None,
|
|
4730
|
+
):
|
|
4731
|
+
"""Print a table listing the jobscripts associated with the specified (or all)
|
|
4732
|
+
tasks for the specified submission.
|
|
4733
|
+
|
|
4734
|
+
Parameters
|
|
4735
|
+
----------
|
|
4736
|
+
sub_idx
|
|
4737
|
+
The submission index whose jobscripts are to be displayed.
|
|
4738
|
+
task_names
|
|
4739
|
+
List of sub-strings to match to task names. Only matching task names will be
|
|
4740
|
+
included.
|
|
4741
|
+
max_js
|
|
4742
|
+
Maximum jobscript index to display.
|
|
4743
|
+
width
|
|
4744
|
+
Width in characters of the printed table.
|
|
4745
|
+
"""
|
|
4746
|
+
|
|
4747
|
+
with self._store.cached_load():
|
|
4748
|
+
loop_names = [i.name for i in self.loops][::-1]
|
|
4749
|
+
loop_names_panel: rich.panel.Panel | str = ""
|
|
4750
|
+
if loop_names:
|
|
4751
|
+
loop_names_panel = rich.panel.Panel(
|
|
4752
|
+
"\n".join(f"{idx}: {i}" for idx, i in enumerate(loop_names)),
|
|
4753
|
+
title="[b]Loops[/b]",
|
|
4754
|
+
title_align="left",
|
|
4755
|
+
box=rich.box.SIMPLE,
|
|
4756
|
+
)
|
|
4757
|
+
|
|
4758
|
+
sub_js = self.submissions[sub_idx].jobscripts
|
|
4759
|
+
all_task_names = {i.insert_ID: i.unique_name for i in self.tasks}
|
|
4760
|
+
|
|
4761
|
+
# filter task names by those matching the specified names
|
|
4762
|
+
matched = all_task_names
|
|
4763
|
+
if task_names:
|
|
4764
|
+
matched = {
|
|
4765
|
+
k: v
|
|
4766
|
+
for k, v in all_task_names.items()
|
|
4767
|
+
if any(i in v for i in task_names)
|
|
4768
|
+
}
|
|
4769
|
+
|
|
4770
|
+
task_jobscripts = defaultdict(list)
|
|
4771
|
+
for js in sub_js:
|
|
4772
|
+
if max_js is not None and js.index > max_js:
|
|
4773
|
+
break
|
|
4774
|
+
for blk in js.blocks:
|
|
4775
|
+
blk_task_actions = blk.task_actions
|
|
4776
|
+
for i in blk.task_insert_IDs:
|
|
4777
|
+
if i in matched:
|
|
4778
|
+
for j in blk_task_actions:
|
|
4779
|
+
if j[0] == i:
|
|
4780
|
+
loop_idx = [
|
|
4781
|
+
blk.task_loop_idx[j[2]].get(loop_name_i, "-")
|
|
4782
|
+
for loop_name_i in loop_names
|
|
4783
|
+
]
|
|
4784
|
+
break
|
|
4785
|
+
task_jobscripts[i].append((js.index, blk.index, loop_idx))
|
|
4786
|
+
|
|
4787
|
+
table = rich.table.Table(width=width)
|
|
4788
|
+
table.add_column("Task")
|
|
4789
|
+
table.add_column("Jobscripts", style="cyan", no_wrap=True)
|
|
4790
|
+
table.add_column("Loops")
|
|
4791
|
+
for insert_ID_i, jobscripts_i in task_jobscripts.items():
|
|
4792
|
+
for idx, js_j in enumerate(jobscripts_i):
|
|
4793
|
+
js_idx, blk_idx, loop_idx = js_j
|
|
4794
|
+
table.add_row(
|
|
4795
|
+
matched[insert_ID_i] if idx == 0 else "",
|
|
4796
|
+
f"({js_idx}, {blk_idx})",
|
|
4797
|
+
(" | ".join(f"{i}" for i in loop_idx)),
|
|
4798
|
+
)
|
|
4799
|
+
table.add_section()
|
|
4800
|
+
|
|
4801
|
+
group = rich.console.Group(
|
|
4802
|
+
rich.text.Text(f"Workflow: {self.name}"),
|
|
4803
|
+
rich.text.Text(f"Submission: {sub_idx}" + ("\n" if loop_names_panel else "")),
|
|
4804
|
+
loop_names_panel,
|
|
4805
|
+
table,
|
|
4806
|
+
)
|
|
4807
|
+
rich_print(group)
|
|
4808
|
+
|
|
4809
|
+
def get_text_file(self, path: str | Path) -> str:
|
|
4810
|
+
"""Retrieve the contents of a text file stored within the workflow."""
|
|
4811
|
+
return self._store.get_text_file(path)
|
|
4812
|
+
|
|
4813
|
+
|
|
4814
|
+
@dataclass
|
|
4815
|
+
class WorkflowBlueprint:
|
|
4816
|
+
"""Pre-built workflow templates that are simpler to parameterise.
|
|
4817
|
+
(For example, fitting workflows.)"""
|
|
4818
|
+
|
|
4819
|
+
#: The template inside this blueprint.
|
|
4820
|
+
workflow_template: WorkflowTemplate
|