hpcflow 0.1.9__py3-none-any.whl → 0.2.0a271__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (275) hide show
  1. hpcflow/__init__.py +2 -11
  2. hpcflow/__pyinstaller/__init__.py +5 -0
  3. hpcflow/__pyinstaller/hook-hpcflow.py +40 -0
  4. hpcflow/_version.py +1 -1
  5. hpcflow/app.py +43 -0
  6. hpcflow/cli.py +2 -462
  7. hpcflow/data/demo_data_manifest/__init__.py +3 -0
  8. hpcflow/data/demo_data_manifest/demo_data_manifest.json +6 -0
  9. hpcflow/data/jinja_templates/test/test_template.txt +8 -0
  10. hpcflow/data/programs/hello_world/README.md +1 -0
  11. hpcflow/data/programs/hello_world/hello_world.c +87 -0
  12. hpcflow/data/programs/hello_world/linux/hello_world +0 -0
  13. hpcflow/data/programs/hello_world/macos/hello_world +0 -0
  14. hpcflow/data/programs/hello_world/win/hello_world.exe +0 -0
  15. hpcflow/data/scripts/__init__.py +1 -0
  16. hpcflow/data/scripts/bad_script.py +2 -0
  17. hpcflow/data/scripts/demo_task_1_generate_t1_infile_1.py +8 -0
  18. hpcflow/data/scripts/demo_task_1_generate_t1_infile_2.py +8 -0
  19. hpcflow/data/scripts/demo_task_1_parse_p3.py +7 -0
  20. hpcflow/data/scripts/do_nothing.py +2 -0
  21. hpcflow/data/scripts/env_specifier_test/input_file_generator_pass_env_spec.py +4 -0
  22. hpcflow/data/scripts/env_specifier_test/main_script_test_pass_env_spec.py +8 -0
  23. hpcflow/data/scripts/env_specifier_test/output_file_parser_pass_env_spec.py +4 -0
  24. hpcflow/data/scripts/env_specifier_test/v1/input_file_generator_basic.py +4 -0
  25. hpcflow/data/scripts/env_specifier_test/v1/main_script_test_direct_in_direct_out.py +7 -0
  26. hpcflow/data/scripts/env_specifier_test/v1/output_file_parser_basic.py +4 -0
  27. hpcflow/data/scripts/env_specifier_test/v2/main_script_test_direct_in_direct_out.py +7 -0
  28. hpcflow/data/scripts/generate_t1_file_01.py +7 -0
  29. hpcflow/data/scripts/import_future_script.py +7 -0
  30. hpcflow/data/scripts/input_file_generator_basic.py +3 -0
  31. hpcflow/data/scripts/input_file_generator_basic_FAIL.py +3 -0
  32. hpcflow/data/scripts/input_file_generator_test_stdout_stderr.py +8 -0
  33. hpcflow/data/scripts/main_script_test_direct_in.py +3 -0
  34. hpcflow/data/scripts/main_script_test_direct_in_direct_out.py +6 -0
  35. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2.py +6 -0
  36. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed.py +6 -0
  37. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed_group.py +7 -0
  38. hpcflow/data/scripts/main_script_test_direct_in_direct_out_3.py +6 -0
  39. hpcflow/data/scripts/main_script_test_direct_in_direct_out_all_iters_test.py +15 -0
  40. hpcflow/data/scripts/main_script_test_direct_in_direct_out_env_spec.py +7 -0
  41. hpcflow/data/scripts/main_script_test_direct_in_direct_out_labels.py +8 -0
  42. hpcflow/data/scripts/main_script_test_direct_in_group_direct_out_3.py +6 -0
  43. hpcflow/data/scripts/main_script_test_direct_in_group_one_fail_direct_out_3.py +6 -0
  44. hpcflow/data/scripts/main_script_test_direct_sub_param_in_direct_out.py +6 -0
  45. hpcflow/data/scripts/main_script_test_hdf5_in_obj.py +12 -0
  46. hpcflow/data/scripts/main_script_test_hdf5_in_obj_2.py +12 -0
  47. hpcflow/data/scripts/main_script_test_hdf5_in_obj_group.py +12 -0
  48. hpcflow/data/scripts/main_script_test_hdf5_out_obj.py +11 -0
  49. hpcflow/data/scripts/main_script_test_json_and_direct_in_json_out.py +14 -0
  50. hpcflow/data/scripts/main_script_test_json_in_json_and_direct_out.py +17 -0
  51. hpcflow/data/scripts/main_script_test_json_in_json_out.py +14 -0
  52. hpcflow/data/scripts/main_script_test_json_in_json_out_labels.py +16 -0
  53. hpcflow/data/scripts/main_script_test_json_in_obj.py +12 -0
  54. hpcflow/data/scripts/main_script_test_json_out_FAIL.py +3 -0
  55. hpcflow/data/scripts/main_script_test_json_out_obj.py +10 -0
  56. hpcflow/data/scripts/main_script_test_json_sub_param_in_json_out_labels.py +16 -0
  57. hpcflow/data/scripts/main_script_test_shell_env_vars.py +12 -0
  58. hpcflow/data/scripts/main_script_test_std_out_std_err.py +6 -0
  59. hpcflow/data/scripts/output_file_parser_basic.py +3 -0
  60. hpcflow/data/scripts/output_file_parser_basic_FAIL.py +7 -0
  61. hpcflow/data/scripts/output_file_parser_test_stdout_stderr.py +8 -0
  62. hpcflow/data/scripts/parse_t1_file_01.py +4 -0
  63. hpcflow/data/scripts/script_exit_test.py +5 -0
  64. hpcflow/data/template_components/__init__.py +1 -0
  65. hpcflow/data/template_components/command_files.yaml +26 -0
  66. hpcflow/data/template_components/environments.yaml +13 -0
  67. hpcflow/data/template_components/parameters.yaml +14 -0
  68. hpcflow/data/template_components/task_schemas.yaml +139 -0
  69. hpcflow/data/workflows/workflow_1.yaml +5 -0
  70. hpcflow/examples.ipynb +1037 -0
  71. hpcflow/sdk/__init__.py +149 -0
  72. hpcflow/sdk/app.py +4266 -0
  73. hpcflow/sdk/cli.py +1479 -0
  74. hpcflow/sdk/cli_common.py +385 -0
  75. hpcflow/sdk/config/__init__.py +5 -0
  76. hpcflow/sdk/config/callbacks.py +246 -0
  77. hpcflow/sdk/config/cli.py +388 -0
  78. hpcflow/sdk/config/config.py +1410 -0
  79. hpcflow/sdk/config/config_file.py +501 -0
  80. hpcflow/sdk/config/errors.py +272 -0
  81. hpcflow/sdk/config/types.py +150 -0
  82. hpcflow/sdk/core/__init__.py +38 -0
  83. hpcflow/sdk/core/actions.py +3857 -0
  84. hpcflow/sdk/core/app_aware.py +25 -0
  85. hpcflow/sdk/core/cache.py +224 -0
  86. hpcflow/sdk/core/command_files.py +814 -0
  87. hpcflow/sdk/core/commands.py +424 -0
  88. hpcflow/sdk/core/element.py +2071 -0
  89. hpcflow/sdk/core/enums.py +221 -0
  90. hpcflow/sdk/core/environment.py +256 -0
  91. hpcflow/sdk/core/errors.py +1043 -0
  92. hpcflow/sdk/core/execute.py +207 -0
  93. hpcflow/sdk/core/json_like.py +809 -0
  94. hpcflow/sdk/core/loop.py +1320 -0
  95. hpcflow/sdk/core/loop_cache.py +282 -0
  96. hpcflow/sdk/core/object_list.py +933 -0
  97. hpcflow/sdk/core/parameters.py +3371 -0
  98. hpcflow/sdk/core/rule.py +196 -0
  99. hpcflow/sdk/core/run_dir_files.py +57 -0
  100. hpcflow/sdk/core/skip_reason.py +7 -0
  101. hpcflow/sdk/core/task.py +3792 -0
  102. hpcflow/sdk/core/task_schema.py +993 -0
  103. hpcflow/sdk/core/test_utils.py +538 -0
  104. hpcflow/sdk/core/types.py +447 -0
  105. hpcflow/sdk/core/utils.py +1207 -0
  106. hpcflow/sdk/core/validation.py +87 -0
  107. hpcflow/sdk/core/values.py +477 -0
  108. hpcflow/sdk/core/workflow.py +4820 -0
  109. hpcflow/sdk/core/zarr_io.py +206 -0
  110. hpcflow/sdk/data/__init__.py +13 -0
  111. hpcflow/sdk/data/config_file_schema.yaml +34 -0
  112. hpcflow/sdk/data/config_schema.yaml +260 -0
  113. hpcflow/sdk/data/environments_spec_schema.yaml +21 -0
  114. hpcflow/sdk/data/files_spec_schema.yaml +5 -0
  115. hpcflow/sdk/data/parameters_spec_schema.yaml +7 -0
  116. hpcflow/sdk/data/task_schema_spec_schema.yaml +3 -0
  117. hpcflow/sdk/data/workflow_spec_schema.yaml +22 -0
  118. hpcflow/sdk/demo/__init__.py +3 -0
  119. hpcflow/sdk/demo/cli.py +242 -0
  120. hpcflow/sdk/helper/__init__.py +3 -0
  121. hpcflow/sdk/helper/cli.py +137 -0
  122. hpcflow/sdk/helper/helper.py +300 -0
  123. hpcflow/sdk/helper/watcher.py +192 -0
  124. hpcflow/sdk/log.py +288 -0
  125. hpcflow/sdk/persistence/__init__.py +18 -0
  126. hpcflow/sdk/persistence/base.py +2817 -0
  127. hpcflow/sdk/persistence/defaults.py +6 -0
  128. hpcflow/sdk/persistence/discovery.py +39 -0
  129. hpcflow/sdk/persistence/json.py +954 -0
  130. hpcflow/sdk/persistence/pending.py +948 -0
  131. hpcflow/sdk/persistence/store_resource.py +203 -0
  132. hpcflow/sdk/persistence/types.py +309 -0
  133. hpcflow/sdk/persistence/utils.py +73 -0
  134. hpcflow/sdk/persistence/zarr.py +2388 -0
  135. hpcflow/sdk/runtime.py +320 -0
  136. hpcflow/sdk/submission/__init__.py +3 -0
  137. hpcflow/sdk/submission/enums.py +70 -0
  138. hpcflow/sdk/submission/jobscript.py +2379 -0
  139. hpcflow/sdk/submission/schedulers/__init__.py +281 -0
  140. hpcflow/sdk/submission/schedulers/direct.py +233 -0
  141. hpcflow/sdk/submission/schedulers/sge.py +376 -0
  142. hpcflow/sdk/submission/schedulers/slurm.py +598 -0
  143. hpcflow/sdk/submission/schedulers/utils.py +25 -0
  144. hpcflow/sdk/submission/shells/__init__.py +52 -0
  145. hpcflow/sdk/submission/shells/base.py +229 -0
  146. hpcflow/sdk/submission/shells/bash.py +504 -0
  147. hpcflow/sdk/submission/shells/os_version.py +115 -0
  148. hpcflow/sdk/submission/shells/powershell.py +352 -0
  149. hpcflow/sdk/submission/submission.py +1402 -0
  150. hpcflow/sdk/submission/types.py +140 -0
  151. hpcflow/sdk/typing.py +194 -0
  152. hpcflow/sdk/utils/arrays.py +69 -0
  153. hpcflow/sdk/utils/deferred_file.py +55 -0
  154. hpcflow/sdk/utils/hashing.py +16 -0
  155. hpcflow/sdk/utils/patches.py +31 -0
  156. hpcflow/sdk/utils/strings.py +69 -0
  157. hpcflow/tests/api/test_api.py +32 -0
  158. hpcflow/tests/conftest.py +123 -0
  159. hpcflow/tests/data/__init__.py +0 -0
  160. hpcflow/tests/data/benchmark_N_elements.yaml +6 -0
  161. hpcflow/tests/data/benchmark_script_runner.yaml +26 -0
  162. hpcflow/tests/data/multi_path_sequences.yaml +29 -0
  163. hpcflow/tests/data/workflow_1.json +10 -0
  164. hpcflow/tests/data/workflow_1.yaml +5 -0
  165. hpcflow/tests/data/workflow_1_slurm.yaml +8 -0
  166. hpcflow/tests/data/workflow_1_wsl.yaml +8 -0
  167. hpcflow/tests/data/workflow_test_run_abort.yaml +42 -0
  168. hpcflow/tests/jinja_templates/test_jinja_templates.py +161 -0
  169. hpcflow/tests/programs/test_programs.py +180 -0
  170. hpcflow/tests/schedulers/direct_linux/test_direct_linux_submission.py +12 -0
  171. hpcflow/tests/schedulers/sge/test_sge_submission.py +36 -0
  172. hpcflow/tests/schedulers/slurm/test_slurm_submission.py +14 -0
  173. hpcflow/tests/scripts/test_input_file_generators.py +282 -0
  174. hpcflow/tests/scripts/test_main_scripts.py +1361 -0
  175. hpcflow/tests/scripts/test_non_snippet_script.py +46 -0
  176. hpcflow/tests/scripts/test_ouput_file_parsers.py +353 -0
  177. hpcflow/tests/shells/wsl/test_wsl_submission.py +14 -0
  178. hpcflow/tests/unit/test_action.py +1066 -0
  179. hpcflow/tests/unit/test_action_rule.py +24 -0
  180. hpcflow/tests/unit/test_app.py +132 -0
  181. hpcflow/tests/unit/test_cache.py +46 -0
  182. hpcflow/tests/unit/test_cli.py +172 -0
  183. hpcflow/tests/unit/test_command.py +377 -0
  184. hpcflow/tests/unit/test_config.py +195 -0
  185. hpcflow/tests/unit/test_config_file.py +162 -0
  186. hpcflow/tests/unit/test_element.py +666 -0
  187. hpcflow/tests/unit/test_element_iteration.py +88 -0
  188. hpcflow/tests/unit/test_element_set.py +158 -0
  189. hpcflow/tests/unit/test_group.py +115 -0
  190. hpcflow/tests/unit/test_input_source.py +1479 -0
  191. hpcflow/tests/unit/test_input_value.py +398 -0
  192. hpcflow/tests/unit/test_jobscript_unit.py +757 -0
  193. hpcflow/tests/unit/test_json_like.py +1247 -0
  194. hpcflow/tests/unit/test_loop.py +2674 -0
  195. hpcflow/tests/unit/test_meta_task.py +325 -0
  196. hpcflow/tests/unit/test_multi_path_sequences.py +259 -0
  197. hpcflow/tests/unit/test_object_list.py +116 -0
  198. hpcflow/tests/unit/test_parameter.py +243 -0
  199. hpcflow/tests/unit/test_persistence.py +664 -0
  200. hpcflow/tests/unit/test_resources.py +243 -0
  201. hpcflow/tests/unit/test_run.py +286 -0
  202. hpcflow/tests/unit/test_run_directories.py +29 -0
  203. hpcflow/tests/unit/test_runtime.py +9 -0
  204. hpcflow/tests/unit/test_schema_input.py +372 -0
  205. hpcflow/tests/unit/test_shell.py +129 -0
  206. hpcflow/tests/unit/test_slurm.py +39 -0
  207. hpcflow/tests/unit/test_submission.py +502 -0
  208. hpcflow/tests/unit/test_task.py +2560 -0
  209. hpcflow/tests/unit/test_task_schema.py +182 -0
  210. hpcflow/tests/unit/test_utils.py +616 -0
  211. hpcflow/tests/unit/test_value_sequence.py +549 -0
  212. hpcflow/tests/unit/test_values.py +91 -0
  213. hpcflow/tests/unit/test_workflow.py +827 -0
  214. hpcflow/tests/unit/test_workflow_template.py +186 -0
  215. hpcflow/tests/unit/utils/test_arrays.py +40 -0
  216. hpcflow/tests/unit/utils/test_deferred_file_writer.py +34 -0
  217. hpcflow/tests/unit/utils/test_hashing.py +65 -0
  218. hpcflow/tests/unit/utils/test_patches.py +5 -0
  219. hpcflow/tests/unit/utils/test_redirect_std.py +50 -0
  220. hpcflow/tests/unit/utils/test_strings.py +97 -0
  221. hpcflow/tests/workflows/__init__.py +0 -0
  222. hpcflow/tests/workflows/test_directory_structure.py +31 -0
  223. hpcflow/tests/workflows/test_jobscript.py +355 -0
  224. hpcflow/tests/workflows/test_run_status.py +198 -0
  225. hpcflow/tests/workflows/test_skip_downstream.py +696 -0
  226. hpcflow/tests/workflows/test_submission.py +140 -0
  227. hpcflow/tests/workflows/test_workflows.py +564 -0
  228. hpcflow/tests/workflows/test_zip.py +18 -0
  229. hpcflow/viz_demo.ipynb +6794 -0
  230. hpcflow-0.2.0a271.dist-info/LICENSE +375 -0
  231. hpcflow-0.2.0a271.dist-info/METADATA +65 -0
  232. hpcflow-0.2.0a271.dist-info/RECORD +237 -0
  233. {hpcflow-0.1.9.dist-info → hpcflow-0.2.0a271.dist-info}/WHEEL +4 -5
  234. hpcflow-0.2.0a271.dist-info/entry_points.txt +6 -0
  235. hpcflow/api.py +0 -458
  236. hpcflow/archive/archive.py +0 -308
  237. hpcflow/archive/cloud/cloud.py +0 -47
  238. hpcflow/archive/cloud/errors.py +0 -9
  239. hpcflow/archive/cloud/providers/dropbox.py +0 -432
  240. hpcflow/archive/errors.py +0 -5
  241. hpcflow/base_db.py +0 -4
  242. hpcflow/config.py +0 -232
  243. hpcflow/copytree.py +0 -66
  244. hpcflow/data/examples/_config.yml +0 -14
  245. hpcflow/data/examples/damask/demo/1.run.yml +0 -4
  246. hpcflow/data/examples/damask/demo/2.process.yml +0 -29
  247. hpcflow/data/examples/damask/demo/geom.geom +0 -2052
  248. hpcflow/data/examples/damask/demo/load.load +0 -1
  249. hpcflow/data/examples/damask/demo/material.config +0 -185
  250. hpcflow/data/examples/damask/inputs/geom.geom +0 -2052
  251. hpcflow/data/examples/damask/inputs/load.load +0 -1
  252. hpcflow/data/examples/damask/inputs/material.config +0 -185
  253. hpcflow/data/examples/damask/profiles/_variable_lookup.yml +0 -21
  254. hpcflow/data/examples/damask/profiles/damask.yml +0 -4
  255. hpcflow/data/examples/damask/profiles/damask_process.yml +0 -8
  256. hpcflow/data/examples/damask/profiles/damask_run.yml +0 -5
  257. hpcflow/data/examples/damask/profiles/default.yml +0 -6
  258. hpcflow/data/examples/thinking.yml +0 -177
  259. hpcflow/errors.py +0 -2
  260. hpcflow/init_db.py +0 -37
  261. hpcflow/models.py +0 -2549
  262. hpcflow/nesting.py +0 -9
  263. hpcflow/profiles.py +0 -455
  264. hpcflow/project.py +0 -81
  265. hpcflow/scheduler.py +0 -323
  266. hpcflow/utils.py +0 -103
  267. hpcflow/validation.py +0 -167
  268. hpcflow/variables.py +0 -544
  269. hpcflow-0.1.9.dist-info/METADATA +0 -168
  270. hpcflow-0.1.9.dist-info/RECORD +0 -45
  271. hpcflow-0.1.9.dist-info/entry_points.txt +0 -8
  272. hpcflow-0.1.9.dist-info/top_level.txt +0 -1
  273. /hpcflow/{archive → data/jinja_templates}/__init__.py +0 -0
  274. /hpcflow/{archive/cloud → data/programs}/__init__.py +0 -0
  275. /hpcflow/{archive/cloud/providers → data/workflows}/__init__.py +0 -0
@@ -0,0 +1,4820 @@
1
+ """
2
+ Main workflow model.
3
+ """
4
+
5
+ from __future__ import annotations
6
+ from collections import defaultdict
7
+ from collections.abc import Callable
8
+ from contextlib import contextmanager, nullcontext
9
+ import copy
10
+ from dataclasses import dataclass, field
11
+
12
+ from functools import wraps
13
+ import os
14
+ from pathlib import Path
15
+ import random
16
+ import shutil
17
+ import string
18
+ from threading import Thread
19
+ import time
20
+ from typing import overload, cast, TYPE_CHECKING, TypeVar
21
+ from typing_extensions import ParamSpec, Concatenate
22
+
23
+ from uuid import uuid4
24
+ from warnings import warn
25
+ from fsspec.implementations.local import LocalFileSystem # type: ignore
26
+ from fsspec.implementations.zip import ZipFileSystem # type: ignore
27
+ import numpy as np
28
+ from fsspec.core import url_to_fs # type: ignore
29
+ from rich import print as rich_print
30
+ import rich.console
31
+ import rich.panel
32
+ import rich.table
33
+ import rich.text
34
+ import rich.box
35
+
36
+
37
+ from hpcflow.sdk import app
38
+ from hpcflow.sdk.typing import hydrate
39
+ from hpcflow.sdk.config.errors import (
40
+ ConfigNonConfigurableError,
41
+ UnknownMetaTaskConstitutiveSchema,
42
+ )
43
+ from hpcflow.sdk.core import (
44
+ ALL_TEMPLATE_FORMATS,
45
+ ABORT_EXIT_CODE,
46
+ RUN_DIR_ARR_FILL,
47
+ SKIPPED_EXIT_CODE,
48
+ NO_COMMANDS_EXIT_CODE,
49
+ )
50
+ from hpcflow.sdk.core.app_aware import AppAware
51
+ from hpcflow.sdk.core.enums import EARStatus
52
+ from hpcflow.sdk.core.skip_reason import SkipReason
53
+ from hpcflow.sdk.core.cache import ObjectCache
54
+ from hpcflow.sdk.core.loop_cache import LoopCache, LoopIndex
55
+ from hpcflow.sdk.log import TimeIt
56
+ from hpcflow.sdk.persistence import store_cls_from_str
57
+ from hpcflow.sdk.persistence.defaults import DEFAULT_STORE_FORMAT
58
+ from hpcflow.sdk.persistence.base import TEMPLATE_COMP_TYPES
59
+ from hpcflow.sdk.persistence.utils import ask_pw_on_auth_exc, infer_store
60
+ from hpcflow.sdk.submission.jobscript import (
61
+ generate_EAR_resource_map,
62
+ group_resource_map_into_jobscripts,
63
+ is_jobscript_array,
64
+ merge_jobscripts_across_tasks,
65
+ resolve_jobscript_blocks,
66
+ resolve_jobscript_dependencies,
67
+ )
68
+ from hpcflow.sdk.submission.enums import JobscriptElementState
69
+ from hpcflow.sdk.submission.schedulers.direct import DirectScheduler
70
+ from hpcflow.sdk.submission.submission import Submission
71
+ from hpcflow.sdk.core.json_like import ChildObjectSpec, JSONLike
72
+ from hpcflow.sdk.utils.strings import shorten_list_str
73
+ from hpcflow.sdk.core.utils import (
74
+ read_JSON_file,
75
+ read_JSON_string,
76
+ read_YAML_str,
77
+ read_YAML_file,
78
+ redirect_std_to_file,
79
+ replace_items,
80
+ current_timestamp,
81
+ normalise_timestamp,
82
+ parse_timestamp,
83
+ )
84
+ from hpcflow.sdk.core.errors import (
85
+ InvalidInputSourceTaskReference,
86
+ LoopAlreadyExistsError,
87
+ OutputFileParserNoOutputError,
88
+ RunNotAbortableError,
89
+ SubmissionFailure,
90
+ UnsetParameterDataErrorBase,
91
+ WorkflowSubmissionFailure,
92
+ )
93
+
94
+ if TYPE_CHECKING:
95
+ from collections.abc import Iterable, Iterator, Mapping, Sequence
96
+ from contextlib import AbstractContextManager
97
+ from typing import Any, ClassVar, Literal
98
+ from typing_extensions import Self, TypeAlias
99
+ from numpy.typing import NDArray
100
+ import psutil
101
+ from rich.status import Status
102
+ from ..typing import DataIndex, ParamSource, PathLike, TemplateComponents
103
+ from .actions import ElementActionRun, UnsetParamTracker
104
+ from .element import Element, ElementIteration
105
+ from .loop import Loop, WorkflowLoop
106
+ from .object_list import ObjectList, ResourceList, WorkflowLoopList, WorkflowTaskList
107
+ from .parameters import InputSource, ResourceSpec
108
+ from .task import Task, WorkflowTask
109
+ from .types import (
110
+ AbstractFileSystem,
111
+ CreationInfo,
112
+ Pending,
113
+ Resources,
114
+ WorkflowTemplateTaskData,
115
+ WorkflowTemplateElementSetData,
116
+ BlockActionKey,
117
+ )
118
+ from ..submission.submission import Submission
119
+ from ..submission.jobscript import (
120
+ Jobscript,
121
+ JobScriptDescriptor,
122
+ JobScriptCreationArguments,
123
+ )
124
+ from ..persistence.base import (
125
+ StoreElement,
126
+ StoreElementIter,
127
+ StoreTask,
128
+ StoreParameter,
129
+ StoreEAR,
130
+ )
131
+ from ..persistence.types import TemplateMeta
132
+ from .json_like import JSONed
133
+
134
+ #: Convenience alias
135
+ _TemplateComponents: TypeAlias = "dict[str, ObjectList[JSONLike]]"
136
+
137
+ P = ParamSpec("P")
138
+ T = TypeVar("T")
139
+ S = TypeVar("S", bound="Workflow")
140
+
141
+
142
+ @dataclass
143
+ class _Pathway:
144
+ id_: int
145
+ names: LoopIndex[str, int] = field(default_factory=LoopIndex)
146
+ iter_ids: list[int] = field(default_factory=list)
147
+ data_idx: list[DataIndex] = field(default_factory=list)
148
+
149
+ def as_tuple(
150
+ self, *, ret_iter_IDs: bool = False, ret_data_idx: bool = False
151
+ ) -> tuple:
152
+ if ret_iter_IDs:
153
+ if ret_data_idx:
154
+ return (self.id_, self.names, tuple(self.iter_ids), tuple(self.data_idx))
155
+ else:
156
+ return (self.id_, self.names, tuple(self.iter_ids))
157
+ else:
158
+ if ret_data_idx:
159
+ return (self.id_, self.names, tuple(self.data_idx))
160
+ else:
161
+ return (self.id_, self.names)
162
+
163
+ def __deepcopy__(self, memo) -> Self:
164
+ return self.__class__(
165
+ self.id_,
166
+ self.names,
167
+ copy.deepcopy(self.iter_ids, memo),
168
+ copy.deepcopy(self.data_idx, memo),
169
+ )
170
+
171
+
172
+ @dataclass
173
+ @hydrate
174
+ class WorkflowTemplate(JSONLike):
175
+ """Class to represent initial parametrisation of a {app_name} workflow, with limited
176
+ validation logic.
177
+
178
+ Parameters
179
+ ----------
180
+ name:
181
+ A string name for the workflow. By default this name will be used in combination
182
+ with a date-time stamp when generating a persistent workflow from the template.
183
+ tasks: list[~hpcflow.app.Task]
184
+ A list of Task objects to include in the workflow.
185
+ loops: list[~hpcflow.app.Loop]
186
+ A list of Loop objects to include in the workflow.
187
+ workflow:
188
+ The associated concrete workflow.
189
+ resources: dict[str, dict] | list[~hpcflow.app.ResourceSpec] | ~hpcflow.app.ResourceList
190
+ Template-level resources to apply to all tasks as default values. This can be a
191
+ dict that maps action scopes to resources (e.g. `{{"any": {{"num_cores": 2}}}}`)
192
+ or a list of `ResourceSpec` objects, or a `ResourceList` object.
193
+ environments:
194
+ Environment specifiers, keyed by environment name.
195
+ env_presets:
196
+ The environment presets to use.
197
+ source_file:
198
+ The file this was derived from.
199
+ store_kwargs:
200
+ Additional arguments to pass to the persistent data store constructor.
201
+ merge_resources:
202
+ If True, merge template-level `resources` into element set resources. If False,
203
+ template-level resources are ignored.
204
+ merge_envs:
205
+ Whether to merge the environments into task resources.
206
+ """
207
+
208
+ _validation_schema: ClassVar[str] = "workflow_spec_schema.yaml"
209
+
210
+ _child_objects: ClassVar[tuple[ChildObjectSpec, ...]] = (
211
+ ChildObjectSpec(
212
+ name="tasks",
213
+ class_name="Task",
214
+ is_multiple=True,
215
+ parent_ref="workflow_template",
216
+ ),
217
+ ChildObjectSpec(
218
+ name="loops",
219
+ class_name="Loop",
220
+ is_multiple=True,
221
+ parent_ref="_workflow_template",
222
+ ),
223
+ ChildObjectSpec(
224
+ name="resources",
225
+ class_name="ResourceList",
226
+ parent_ref="_workflow_template",
227
+ ),
228
+ )
229
+
230
+ #: A string name for the workflow.
231
+ name: str
232
+ #: Documentation information.
233
+ doc: list[str] | str | None = field(repr=False, default=None)
234
+ #: A list of Task objects to include in the workflow.
235
+ tasks: list[Task] = field(default_factory=list)
236
+ #: A list of Loop objects to include in the workflow.
237
+ loops: list[Loop] = field(default_factory=list)
238
+ #: The associated concrete workflow.
239
+ workflow: Workflow | None = None
240
+ #: Template-level resources to apply to all tasks as default values.
241
+ resources: Resources = None
242
+ config: dict = field(default_factory=lambda: {})
243
+ #: Environment specifiers, keyed by environment name.
244
+ environments: Mapping[str, Mapping[str, Any]] | None = None
245
+ #: The environment presets to use.
246
+ env_presets: str | list[str] | None = None
247
+ #: The file this was derived from.
248
+ source_file: str | None = field(default=None, compare=False)
249
+ #: Additional arguments to pass to the persistent data store constructor.
250
+ store_kwargs: dict[str, Any] = field(default_factory=dict)
251
+ #: Whether to merge template-level `resources` into element set resources.
252
+ merge_resources: bool = True
253
+ #: Whether to merge the environments into task resources.
254
+ merge_envs: bool = True
255
+
256
+ def __post_init__(self) -> None:
257
+
258
+ # TODO: in what scenario is the reindex required? are loops initialised?
259
+
260
+ # replace metatasks with tasks
261
+ new_tasks: list[Task] = []
262
+ do_reindex = False
263
+ reindex = {}
264
+ for task_idx, i in enumerate(self.tasks):
265
+ if isinstance(i, app.MetaTask):
266
+ do_reindex = True
267
+ tasks_from_meta = copy.deepcopy(i.tasks)
268
+ reindex[task_idx] = [
269
+ len(new_tasks) + i for i in range(len(tasks_from_meta))
270
+ ]
271
+ new_tasks.extend(tasks_from_meta)
272
+ else:
273
+ reindex[task_idx] = [len(new_tasks)]
274
+ new_tasks.append(i)
275
+ if do_reindex:
276
+ if self.loops:
277
+ for loop_idx, loop in enumerate(cast("list[dict[str, Any]]", self.loops)):
278
+ loop["tasks"] = [j for i in loop["tasks"] for j in reindex[i]]
279
+ term_task = loop.get("termination_task")
280
+ if term_task is not None:
281
+ loop["termination_task"] = reindex[term_task][0]
282
+
283
+ self.tasks = new_tasks
284
+
285
+ resources = self._app.ResourceList.normalise(self.resources)
286
+ self.resources = resources
287
+ self._set_parent_refs()
288
+
289
+ # merge template-level `resources` into task element set resources (this mutates
290
+ # `tasks`, and should only happen on creation of the workflow template, not on
291
+ # re-initialisation from a persistent workflow):
292
+ if self.merge_resources:
293
+ for task in self.tasks:
294
+ for element_set in task.element_sets:
295
+ element_set.resources.merge_other(resources)
296
+ self.merge_resources = False
297
+
298
+ if self.merge_envs:
299
+ self._merge_envs_into_task_resources()
300
+
301
+ if self.doc and not isinstance(self.doc, list):
302
+ self.doc = [self.doc]
303
+
304
+ if self.config:
305
+ # don't do a full validation (which would require loading the config file),
306
+ # just check all specified keys are configurable:
307
+ bad_keys = set(self.config) - set(self._app.config_options._configurable_keys)
308
+ if bad_keys:
309
+ raise ConfigNonConfigurableError(name=bad_keys)
310
+
311
+ @property
312
+ def _resources(self) -> ResourceList:
313
+ res = self.resources
314
+ assert isinstance(res, self._app.ResourceList)
315
+ return res
316
+
317
+ def _get_resources_copy(self) -> Iterator[ResourceSpec]:
318
+ """
319
+ Get a deep copy of the list of resources.
320
+ """
321
+ memo: dict[int, Any] = {}
322
+ for spec in self._resources:
323
+ yield copy.deepcopy(spec, memo)
324
+
325
+ def _merge_envs_into_task_resources(self) -> None:
326
+ self.merge_envs = False
327
+
328
+ # disallow both `env_presets` and `environments` specifications:
329
+ if self.env_presets and self.environments:
330
+ raise ValueError(
331
+ "Workflow template: specify at most one of `env_presets` and "
332
+ "`environments`."
333
+ )
334
+
335
+ if not isinstance(self.env_presets, list):
336
+ self.env_presets = [self.env_presets] if self.env_presets else []
337
+
338
+ for task in self.tasks:
339
+ # get applicable environments and environment preset names:
340
+ try:
341
+ schema = task.schema
342
+ except ValueError:
343
+ # TODO: consider multiple schemas
344
+ raise NotImplementedError(
345
+ "Cannot merge environment presets into a task without multiple "
346
+ "schemas."
347
+ )
348
+ schema_presets = schema.environment_presets
349
+ app_envs = {act.get_environment_name() for act in schema.actions}
350
+ for es in task.element_sets:
351
+ app_env_specs_i: Mapping[str, Mapping[str, Any]] | None = None
352
+ if not es.environments and not es.env_preset:
353
+ # no task level envs/presets specified, so merge template-level:
354
+ if self.environments:
355
+ app_env_specs_i = {
356
+ k: v for k, v in self.environments.items() if k in app_envs
357
+ }
358
+ if app_env_specs_i:
359
+ self._app.logger.info(
360
+ f"(task {task.name!r}, element set {es.index}): using "
361
+ f"template-level requested `environment` specifiers: "
362
+ f"{app_env_specs_i!r}."
363
+ )
364
+ es.environments = app_env_specs_i
365
+
366
+ elif self.env_presets and schema_presets:
367
+ # take only the first applicable preset:
368
+ for app_preset in self.env_presets:
369
+ if app_preset in schema_presets:
370
+ es.env_preset = app_preset
371
+ app_env_specs_i = schema_presets[app_preset]
372
+ self._app.logger.info(
373
+ f"(task {task.name!r}, element set {es.index}): using "
374
+ f"template-level requested {app_preset!r} "
375
+ f"`env_preset`: {app_env_specs_i!r}."
376
+ )
377
+ break
378
+
379
+ else:
380
+ # no env/preset applicable here (and no env/preset at task level),
381
+ # so apply a default preset if available:
382
+ if app_env_specs_i := (schema_presets or {}).get("", None):
383
+ self._app.logger.info(
384
+ f"(task {task.name!r}, element set {es.index}): setting "
385
+ f"to default (empty-string named) `env_preset`: "
386
+ f"{app_env_specs_i}."
387
+ )
388
+ es.env_preset = ""
389
+
390
+ if app_env_specs_i:
391
+ es.resources.merge_one(
392
+ self._app.ResourceSpec(
393
+ scope="any", environments=app_env_specs_i
394
+ )
395
+ )
396
+
397
+ @classmethod
398
+ @TimeIt.decorator
399
+ def _from_data(cls, data: dict[str, Any]) -> WorkflowTemplate:
400
+ def _normalise_task_parametrisation(task_lst: list[WorkflowTemplateTaskData]):
401
+ """
402
+ For each dict in a list of task parametrisations, ensure the `schema` key is
403
+ a list of values, and ensure `element_sets` are defined.
404
+
405
+ This mutates `task_lst`.
406
+
407
+ """
408
+ # use element_sets if not already:
409
+ task_dat: WorkflowTemplateTaskData
410
+ for task_idx, task_dat in enumerate(task_lst):
411
+ schema = task_dat.pop("schema")
412
+ schema_list: list = schema if isinstance(schema, list) else [schema]
413
+ if "element_sets" in task_dat:
414
+ # just update the schema to a list:
415
+ task_lst[task_idx]["schema"] = schema_list
416
+ else:
417
+ # add a single element set, and update the schema to a list:
418
+ out_labels = task_dat.pop("output_labels", [])
419
+ es_dat = cast("WorkflowTemplateElementSetData", task_dat)
420
+ new_task_dat: WorkflowTemplateTaskData = {
421
+ "schema": schema_list,
422
+ "element_sets": [es_dat],
423
+ "output_labels": out_labels,
424
+ }
425
+ task_lst[task_idx] = new_task_dat
426
+ # move sequences with `paths` (note: plural) to multi_path_sequences:
427
+ for elem_set in task_lst[task_idx]["element_sets"]:
428
+ new_mps = []
429
+ seqs = elem_set.get("sequences", [])
430
+ seqs = list(seqs) # copy
431
+ # loop in reverse so indices for pop are valid:
432
+ for seq_idx, seq_dat in zip(range(len(seqs) - 1, -1, -1), seqs[::-1]):
433
+ if "paths" in seq_dat: # (note: plural)
434
+ # move to a multi-path sequence:
435
+ new_mps.append(elem_set["sequences"].pop(seq_idx))
436
+ elem_set.setdefault("multi_path_sequences", []).extend(new_mps[::-1])
437
+
438
+ meta_tasks = data.pop("meta_tasks", {})
439
+ if meta_tasks:
440
+ for i in list(meta_tasks):
441
+ _normalise_task_parametrisation(meta_tasks[i])
442
+ new_task_dat: list[WorkflowTemplateTaskData] = []
443
+ reindex = {}
444
+ for task_idx, task_dat in enumerate(data["tasks"]):
445
+ if meta_task_dat := meta_tasks.get(task_dat["schema"]):
446
+ reindex[task_idx] = [
447
+ len(new_task_dat) + i for i in range(len(meta_task_dat))
448
+ ]
449
+
450
+ all_schema_names = [j for i in meta_task_dat for j in i["schema"]]
451
+
452
+ # update any parametrisation provided in the task list:
453
+ base_data = copy.deepcopy(meta_task_dat)
454
+
455
+ # any other keys in `task_dat` should be mappings whose keys are
456
+ # the schema name (within the meta task) optionally suffixed by
457
+ # a period and the element set index to which the updates should be
458
+ # copied (no integer suffix indicates the zeroth element set):
459
+ for k, v in task_dat.items():
460
+ if k == "schema":
461
+ continue
462
+
463
+ for elem_set_id, dat in v.items():
464
+
465
+ elem_set_id_split = elem_set_id.split(".")
466
+ try:
467
+ es_idx = int(elem_set_id_split[-1])
468
+ schema_name = ".".join(elem_set_id_split[:-1])
469
+ except ValueError:
470
+ es_idx = 0
471
+ schema_name = ".".join(elem_set_id_split)
472
+ schema_name = schema_name.strip(".")
473
+
474
+ # check valid schema name:
475
+ if schema_name not in all_schema_names:
476
+ raise UnknownMetaTaskConstitutiveSchema(
477
+ f"Task schema with objective {schema_name!r} is not "
478
+ f"part of the meta-task with objective "
479
+ f"{task_dat['schema']!r}. The constitutive schemas of"
480
+ f" this meta-task have objectives: "
481
+ f"{all_schema_names!r}."
482
+ )
483
+
484
+ # copy `dat` to the correct schema and element set in the
485
+ # meta-task:
486
+ for s_idx, s in enumerate(base_data):
487
+ if s["schema"] == [schema_name]:
488
+ if k == "inputs":
489
+ # special case; merge inputs
490
+ base_data[s_idx]["element_sets"][es_idx][
491
+ k
492
+ ].update(dat)
493
+ else:
494
+ # just overwrite
495
+ base_data[s_idx]["element_sets"][es_idx][k] = dat
496
+
497
+ new_task_dat.extend(base_data)
498
+
499
+ else:
500
+ reindex[task_idx] = [len(new_task_dat)]
501
+ new_task_dat.append(task_dat)
502
+
503
+ data["tasks"] = new_task_dat
504
+
505
+ if loops := data.get("loops"):
506
+ for loop_idx, loop in enumerate(loops):
507
+ loops[loop_idx]["tasks"] = [
508
+ j for i in loop["tasks"] for j in reindex[i]
509
+ ]
510
+ term_task = loop.get("termination_task")
511
+ if term_task is not None:
512
+ loops[loop_idx]["termination_task"] = reindex[term_task][0]
513
+
514
+ _normalise_task_parametrisation(data["tasks"])
515
+
516
+ # extract out any template components:
517
+ # TODO: TypedDict for data
518
+ tcs: dict[str, list] = data.pop("template_components", {})
519
+ if params_dat := tcs.pop("parameters", []):
520
+ parameters = cls._app.ParametersList.from_json_like(
521
+ params_dat, shared_data=cls._app._shared_data
522
+ )
523
+ cls._app.parameters.add_objects(parameters, skip_duplicates=True)
524
+
525
+ if cmd_files_dat := tcs.pop("command_files", []):
526
+ cmd_files = cls._app.CommandFilesList.from_json_like(
527
+ cmd_files_dat, shared_data=cls._app._shared_data
528
+ )
529
+ cls._app.command_files.add_objects(cmd_files, skip_duplicates=True)
530
+
531
+ if envs_dat := tcs.pop("environments", []):
532
+ envs = cls._app.EnvironmentsList.from_json_like(
533
+ envs_dat, shared_data=cls._app._shared_data
534
+ )
535
+ cls._app.envs.add_objects(envs, skip_duplicates=True)
536
+
537
+ if ts_dat := tcs.pop("task_schemas", []):
538
+ task_schemas = cls._app.TaskSchemasList.from_json_like(
539
+ ts_dat, shared_data=cls._app._shared_data
540
+ )
541
+ cls._app.task_schemas.add_objects(task_schemas, skip_duplicates=True)
542
+
543
+ if mts_dat := tcs.pop("meta_task_schemas", []):
544
+ meta_ts = [
545
+ cls._app.MetaTaskSchema.from_json_like(
546
+ i, shared_data=cls._app.template_components
547
+ )
548
+ for i in mts_dat
549
+ ]
550
+ cls._app.task_schemas.add_objects(meta_ts, skip_duplicates=True)
551
+
552
+ wkt = cls.from_json_like(data, shared_data=cls._app._shared_data)
553
+
554
+ # print(f"WorkflowTemplate._from_data: {wkt=!r}")
555
+ # TODO: what is this for!?
556
+ # for idx, task in enumerate(wkt.tasks):
557
+ # if isinstance(task.schema, cls._app.MetaTaskSchema):
558
+ # print(f"{task=!r}")
559
+ # wkt.tasks[idx] = cls._app.MetaTask(schema=task.schema, tasks=task.tasks)
560
+ return wkt
561
+
562
+ @classmethod
563
+ @TimeIt.decorator
564
+ def from_YAML_string(
565
+ cls,
566
+ string: str,
567
+ variables: dict[str, str] | Literal[False] | None = None,
568
+ ) -> WorkflowTemplate:
569
+ """Load from a YAML string.
570
+
571
+ Parameters
572
+ ----------
573
+ string
574
+ The YAML string containing the workflow template parametrisation.
575
+ variables
576
+ String variables to substitute in `string`. Substitutions will be attempted if
577
+ the YAML string looks to contain variable references (like "<<var:name>>"). If
578
+ set to `False`, no substitutions will occur, which may result in an invalid
579
+ workflow template!
580
+ """
581
+ return cls._from_data(
582
+ read_YAML_str(
583
+ string,
584
+ variables=variables,
585
+ source="(from the inline workflow template definition)",
586
+ )
587
+ )
588
+
589
+ @classmethod
590
+ def _check_name(cls, data: dict[str, Any], path: PathLike) -> None:
591
+ """Check the workflow template data has a "name" key. If not, add a "name" key,
592
+ using the file path stem.
593
+
594
+ Note: this method mutates `data`.
595
+
596
+ """
597
+ if "name" not in data and path is not None:
598
+ name = Path(path).stem
599
+ cls._app.logger.info(
600
+ f"using file name stem ({name!r}) as the workflow template name."
601
+ )
602
+ data["name"] = name
603
+
604
+ @classmethod
605
+ @TimeIt.decorator
606
+ def from_YAML_file(
607
+ cls,
608
+ path: PathLike,
609
+ variables: dict[str, str] | Literal[False] | None = None,
610
+ ) -> WorkflowTemplate:
611
+ """Load from a YAML file.
612
+
613
+ Parameters
614
+ ----------
615
+ path
616
+ The path to the YAML file containing the workflow template parametrisation.
617
+ variables
618
+ String variables to substitute in the file given by `path`. Substitutions will
619
+ be attempted if the YAML file looks to contain variable references (like
620
+ "<<var:name>>"). If set to `False`, no substitutions will occur, which may
621
+ result in an invalid workflow template!
622
+
623
+ """
624
+ cls._app.logger.debug("parsing workflow template from a YAML file")
625
+ data = read_YAML_file(path, variables=variables)
626
+ cls._check_name(data, path)
627
+ data["source_file"] = str(path)
628
+ return cls._from_data(data)
629
+
630
+ @classmethod
631
+ @TimeIt.decorator
632
+ def from_JSON_string(
633
+ cls,
634
+ string: str,
635
+ variables: dict[str, str] | Literal[False] | None = None,
636
+ ) -> WorkflowTemplate:
637
+ """Load from a JSON string.
638
+
639
+ Parameters
640
+ ----------
641
+ string
642
+ The JSON string containing the workflow template parametrisation.
643
+ variables
644
+ String variables to substitute in `string`. Substitutions will be attempted if
645
+ the JSON string looks to contain variable references (like "<<var:name>>"). If
646
+ set to `False`, no substitutions will occur, which may result in an invalid
647
+ workflow template!
648
+ """
649
+ return cls._from_data(read_JSON_string(string, variables=variables))
650
+
651
+ @classmethod
652
+ @TimeIt.decorator
653
+ def from_JSON_file(
654
+ cls,
655
+ path: PathLike,
656
+ variables: dict[str, str] | Literal[False] | None = None,
657
+ ) -> WorkflowTemplate:
658
+ """Load from a JSON file.
659
+
660
+ Parameters
661
+ ----------
662
+ path
663
+ The path to the JSON file containing the workflow template parametrisation.
664
+ variables
665
+ String variables to substitute in the file given by `path`. Substitutions will
666
+ be attempted if the JSON file looks to contain variable references (like
667
+ "<<var:name>>"). If set to `False`, no substitutions will occur, which may
668
+ result in an invalid workflow template!
669
+ """
670
+ cls._app.logger.debug("parsing workflow template from a JSON file")
671
+ data = read_JSON_file(path, variables=variables)
672
+ cls._check_name(data, path)
673
+ data["source_file"] = str(path)
674
+ return cls._from_data(data)
675
+
676
+ @classmethod
677
+ @TimeIt.decorator
678
+ def from_file(
679
+ cls,
680
+ path: PathLike,
681
+ template_format: Literal["yaml", "json"] | None = None,
682
+ variables: dict[str, str] | Literal[False] | None = None,
683
+ ) -> WorkflowTemplate:
684
+ """Load from either a YAML or JSON file, depending on the file extension.
685
+
686
+ Parameters
687
+ ----------
688
+ path
689
+ The path to the file containing the workflow template parametrisation.
690
+ template_format
691
+ The file format to expect at `path`. One of "json" or "yaml", if specified. By
692
+ default, "yaml".
693
+ variables
694
+ String variables to substitute in the file given by `path`. Substitutions will
695
+ be attempted if the file looks to contain variable references (like
696
+ "<<var:name>>"). If set to `False`, no substitutions will occur, which may
697
+ result in an invalid workflow template!
698
+ """
699
+ path_ = Path(path or ".")
700
+ fmt = template_format.lower() if template_format else None
701
+ if fmt == "yaml" or path_.suffix in (".yaml", ".yml"):
702
+ return cls.from_YAML_file(path_, variables=variables)
703
+ elif fmt == "json" or path_.suffix in (".json", ".jsonc"):
704
+ return cls.from_JSON_file(path_, variables=variables)
705
+ else:
706
+ raise ValueError(
707
+ f"Unknown workflow template file extension {path_.suffix!r}. Supported "
708
+ f"template formats are {ALL_TEMPLATE_FORMATS!r}."
709
+ )
710
+
711
+ def _add_empty_task(self, task: Task, new_index: int, insert_ID: int) -> None:
712
+ """Called by `Workflow._add_empty_task`."""
713
+ assert self.workflow
714
+ new_task_name = self.workflow._get_new_task_unique_name(task, new_index)
715
+
716
+ task._insert_ID = insert_ID
717
+ task._dir_name = f"task_{task.insert_ID}_{new_task_name}"
718
+ task._element_sets = [] # element sets are added to the Task during add_elements
719
+
720
+ task.workflow_template = self
721
+ self.tasks.insert(new_index, task)
722
+
723
+ def _add_empty_loop(self, loop: Loop) -> None:
724
+ """Called by `Workflow._add_empty_loop`."""
725
+
726
+ assert self.workflow
727
+ if not loop.name:
728
+ existing = {loop.name for loop in self.loops}
729
+ new_idx = len(self.loops)
730
+ while (name := f"loop_{new_idx}") in existing:
731
+ new_idx += 1
732
+ loop._name = name
733
+ elif loop.name in self.workflow.loops.list_attrs():
734
+ raise LoopAlreadyExistsError(loop.name, self.workflow.loops)
735
+
736
+ loop._workflow_template = self
737
+ self.loops.append(loop)
738
+
739
+
740
+ def resolve_fsspec(
741
+ path: PathLike, **kwargs
742
+ ) -> tuple[AbstractFileSystem, str, str | None]:
743
+ """
744
+ Decide how to handle a particular virtual path.
745
+
746
+ Parameters
747
+ ----------
748
+ kwargs
749
+ This can include a `password` key, for connections via SSH.
750
+
751
+ """
752
+
753
+ path_s = str(path)
754
+ fs: AbstractFileSystem
755
+ if path_s.endswith(".zip"):
756
+ # `url_to_fs` does not seem to work for zip combos e.g. `zip::ssh://`, so we
757
+ # construct a `ZipFileSystem` ourselves and assume it is signified only by the
758
+ # file extension:
759
+ fs, pw = ask_pw_on_auth_exc(
760
+ ZipFileSystem,
761
+ fo=path_s,
762
+ mode="r",
763
+ target_options=kwargs or {},
764
+ add_pw_to="target_options",
765
+ )
766
+ path_s = ""
767
+
768
+ else:
769
+ (fs, path_s), pw = ask_pw_on_auth_exc(url_to_fs, path_s, **kwargs)
770
+ path_s = str(Path(path_s).as_posix())
771
+ if isinstance(fs, LocalFileSystem):
772
+ path_s = str(Path(path_s).resolve())
773
+
774
+ return fs, path_s, pw
775
+
776
+
777
+ @dataclass(frozen=True)
778
+ class _IterationData:
779
+ id_: int
780
+ idx: int
781
+
782
+
783
+ def load_workflow_config(
784
+ func: Callable[Concatenate[S, P], T],
785
+ ) -> Callable[Concatenate[S, P], T]:
786
+ """Decorator to apply workflow-level config items during execution of a Workflow
787
+ method."""
788
+
789
+ @wraps(func)
790
+ def wrapped(self: S, *args: P.args, **kwargs: P.kwargs) -> T:
791
+
792
+ updates = self.template.config
793
+ if updates:
794
+ with self._app.config._with_updates(updates):
795
+ return func(self, *args, **kwargs)
796
+ else:
797
+ return func(self, *args, **kwargs)
798
+
799
+ return wrapped
800
+
801
+
802
+ class Workflow(AppAware):
803
+ """
804
+ A concrete workflow.
805
+
806
+ Parameters
807
+ ----------
808
+ workflow_ref:
809
+ Either the path to a persistent workflow, or an integer that will interpreted
810
+ as the local ID of a workflow submission, as reported by the app `show`
811
+ command.
812
+ store_fmt:
813
+ The format of persistent store to use. Used to select the store manager class.
814
+ fs_kwargs:
815
+ Additional arguments to pass when resolving a virtual workflow reference.
816
+ kwargs:
817
+ For compatibility during pre-stable development phase.
818
+ """
819
+
820
+ _default_ts_fmt: ClassVar[str] = r"%Y-%m-%d %H:%M:%S.%f"
821
+ _default_ts_name_fmt: ClassVar[str] = r"%Y-%m-%d_%H%M%S"
822
+ _input_files_dir_name: ClassVar[str] = "input_files"
823
+ _exec_dir_name: ClassVar[str] = "execute"
824
+
825
+ def __init__(
826
+ self,
827
+ workflow_ref: str | Path | int,
828
+ store_fmt: str | None = None,
829
+ fs_kwargs: dict[str, Any] | None = None,
830
+ **kwargs,
831
+ ):
832
+ if isinstance(workflow_ref, int):
833
+ path = self._app._get_workflow_path_from_local_ID(workflow_ref)
834
+ elif isinstance(workflow_ref, str):
835
+ path = Path(workflow_ref)
836
+ else:
837
+ path = workflow_ref
838
+
839
+ self._app.logger.info(f"loading workflow from path: {path}")
840
+ fs_path = str(path)
841
+ fs, path_s, _ = resolve_fsspec(path, **(fs_kwargs or {}))
842
+ store_fmt = store_fmt or infer_store(fs_path, fs)
843
+ store_cls = store_cls_from_str(store_fmt)
844
+
845
+ self.path = path_s
846
+
847
+ # assigned on first access:
848
+ self._ts_fmt: str | None = None
849
+ self._ts_name_fmt: str | None = None
850
+ self._creation_info: CreationInfo | None = None
851
+ self._name: str | None = None
852
+ self._template: WorkflowTemplate | None = None
853
+ self._template_components: TemplateComponents | None = None
854
+ self._tasks: WorkflowTaskList | None = None
855
+ self._loops: WorkflowLoopList | None = None
856
+ self._submissions: list[Submission] | None = None
857
+
858
+ self._store = store_cls(self._app, self, self.path, fs)
859
+ self._in_batch_mode = False # flag to track when processing batch updates
860
+
861
+ self._use_merged_parameters_cache = False
862
+ self._merged_parameters_cache: dict[
863
+ tuple[str | None, tuple[tuple[str, tuple[int, ...] | int], ...]], Any
864
+ ] = {}
865
+
866
+ # store indices of updates during batch update, so we can revert on failure:
867
+ self._pending = self._get_empty_pending()
868
+
869
+ # reassigned within `ElementActionRun.raise_on_failure_threshold` context manager:
870
+ self._is_tracking_unset: bool = False
871
+ self._tracked_unset: dict[str, UnsetParamTracker] | None = None
872
+
873
+ def reload(self) -> Self:
874
+ """Reload the workflow from disk."""
875
+ return self.__class__(self.url)
876
+
877
+ @property
878
+ def name(self) -> str:
879
+ """
880
+ The name of the workflow.
881
+
882
+ The workflow name may be different from the template name, as it includes the
883
+ creation date-timestamp if generated.
884
+ """
885
+ if not self._name:
886
+ self._name = self._store.get_name()
887
+ return self._name
888
+
889
+ @property
890
+ def url(self) -> str:
891
+ """An fsspec URL for this workflow."""
892
+ if self._store.fs:
893
+ if self._store.fs.protocol == "zip":
894
+ return self._store.fs.of.path
895
+ elif self._store.fs.protocol == ("file", "local"):
896
+ return self.path
897
+ raise NotImplementedError("Only (local) zip and local URLs provided for now.")
898
+
899
+ @property
900
+ def store_format(self) -> str:
901
+ """
902
+ The format of the workflow's persistent store.
903
+ """
904
+ return self._store._name
905
+
906
+ @classmethod
907
+ @TimeIt.decorator
908
+ def from_template(
909
+ cls,
910
+ template: WorkflowTemplate,
911
+ path: PathLike = None,
912
+ name: str | None = None,
913
+ name_add_timestamp: bool | None = None,
914
+ name_use_dir: bool | None = None,
915
+ overwrite: bool = False,
916
+ store: str = DEFAULT_STORE_FORMAT,
917
+ ts_fmt: str | None = None,
918
+ ts_name_fmt: str | None = None,
919
+ store_kwargs: dict[str, Any] | None = None,
920
+ status: Status | None = None,
921
+ ) -> Workflow:
922
+ """Generate from a `WorkflowTemplate` object.
923
+
924
+ Parameters
925
+ ----------
926
+ template:
927
+ The WorkflowTemplate object to make persistent.
928
+ path:
929
+ The directory in which the workflow will be generated. If not specified, the
930
+ config item `default_workflow_path` will be used; if that is not set, the
931
+ current directory is used.
932
+ name
933
+ The name to use for the workflow. If not provided, the name will be set to
934
+ that of the template (optionally suffixed by a date-timestamp if
935
+ `name_add_timestamp` is True).
936
+ name_add_timestamp
937
+ If True, suffix the name with a date-timestamp. A default value can be set
938
+ with the config item `workflow_name_add_timestamp`; otherwise set to `True`.
939
+ name_use_dir
940
+ If True, and `name_add_timestamp` is also True, the workflow directory name
941
+ will be just the date-timestamp, and will be contained within a parent
942
+ directory corresponding to the workflow name. A default value can be set
943
+ with config the item `workflow_name_use_dir`; otherwise set to `False`.
944
+ overwrite:
945
+ If True and the workflow directory (`path` + `name`) already exists, the
946
+ existing directory will be overwritten.
947
+ store:
948
+ The persistent store to use for this workflow.
949
+ ts_fmt:
950
+ The datetime format to use for storing datetimes. Datetimes are always stored
951
+ in UTC (because Numpy does not store time zone info), so this should not
952
+ include a time zone name.
953
+ ts_name_fmt:
954
+ The datetime format to use when generating the workflow name, where it
955
+ includes a timestamp.
956
+ store_kwargs:
957
+ Keyword arguments to pass to the store's `write_empty_workflow` method.
958
+ """
959
+ if status:
960
+ status.update("Generating empty workflow...")
961
+ try:
962
+ wk = cls._write_empty_workflow(
963
+ template=template,
964
+ path=path,
965
+ name=name,
966
+ name_add_timestamp=name_add_timestamp,
967
+ name_use_dir=name_use_dir,
968
+ overwrite=overwrite,
969
+ store=store,
970
+ ts_fmt=ts_fmt,
971
+ ts_name_fmt=ts_name_fmt,
972
+ store_kwargs=store_kwargs,
973
+ )
974
+ with wk._store.cached_load(), wk.batch_update(
975
+ is_workflow_creation=True
976
+ ), wk._store.cache_ctx():
977
+ for idx, task in enumerate(template.tasks):
978
+ if status:
979
+ status.update(
980
+ f"Adding task {idx + 1}/{len(template.tasks)} "
981
+ f"({task.name!r})..."
982
+ )
983
+ wk._add_task(task)
984
+ if template.loops:
985
+ if status:
986
+ status.update(
987
+ f"Preparing to add {len(template.loops)} loops; building "
988
+ f"cache..."
989
+ )
990
+
991
+ for loop in template.loops:
992
+ loop._validate_against_workflow(wk)
993
+ # TODO: if loop with non-initialisable actions, will fail
994
+ cache = LoopCache.build(workflow=wk, loops=template.loops)
995
+ for idx, loop in enumerate(template.loops):
996
+ if status:
997
+ status.update(
998
+ f"Adding loop {idx + 1}/"
999
+ f"{len(template.loops)} ({loop.name!r})"
1000
+ )
1001
+ wk._add_loop(loop, cache=cache, status=status)
1002
+ if status:
1003
+ status.update(
1004
+ f"Added {len(template.loops)} loops. "
1005
+ f"Committing to store..."
1006
+ )
1007
+ elif status:
1008
+ status.update("Committing to store...")
1009
+ except (Exception, NotImplementedError):
1010
+ if status:
1011
+ status.stop()
1012
+ raise
1013
+ return wk
1014
+
1015
+ @classmethod
1016
+ @TimeIt.decorator
1017
+ def from_YAML_file(
1018
+ cls,
1019
+ YAML_path: PathLike,
1020
+ path: PathLike = None,
1021
+ name: str | None = None,
1022
+ name_add_timestamp: bool | None = None,
1023
+ name_use_dir: bool | None = None,
1024
+ overwrite: bool = False,
1025
+ store: str = DEFAULT_STORE_FORMAT,
1026
+ ts_fmt: str | None = None,
1027
+ ts_name_fmt: str | None = None,
1028
+ store_kwargs: dict[str, Any] | None = None,
1029
+ variables: dict[str, str] | Literal[False] | None = None,
1030
+ ) -> Workflow:
1031
+ """Generate from a YAML file.
1032
+
1033
+ Parameters
1034
+ ----------
1035
+ YAML_path:
1036
+ The path to a workflow template in the YAML file format.
1037
+ path:
1038
+ The directory in which the workflow will be generated. If not specified, the
1039
+ config item `default_workflow_path` will be used; if that is not set, the
1040
+ current directory is used.
1041
+ name
1042
+ The name to use for the workflow. If not provided, the name will be set to
1043
+ that of the template (optionally suffixed by a date-timestamp if
1044
+ `name_add_timestamp` is True).
1045
+ name_add_timestamp
1046
+ If True, suffix the name with a date-timestamp. A default value can be set
1047
+ with the config item `workflow_name_add_timestamp`; otherwise set to `True`.
1048
+ name_use_dir
1049
+ If True, and `name_add_timestamp` is also True, the workflow directory name
1050
+ will be just the date-timestamp, and will be contained within a parent
1051
+ directory corresponding to the workflow name. A default value can be set
1052
+ with the config item `workflow_name_use_dir`; otherwise set to `False`.
1053
+ overwrite:
1054
+ If True and the workflow directory (`path` + `name`) already exists, the
1055
+ existing directory will be overwritten.
1056
+ store:
1057
+ The persistent store to use for this workflow.
1058
+ ts_fmt:
1059
+ The datetime format to use for storing datetimes. Datetimes are always stored
1060
+ in UTC (because Numpy does not store time zone info), so this should not
1061
+ include a time zone name.
1062
+ ts_name_fmt:
1063
+ The datetime format to use when generating the workflow name, where it
1064
+ includes a timestamp.
1065
+ store_kwargs:
1066
+ Keyword arguments to pass to the store's `write_empty_workflow` method.
1067
+ variables:
1068
+ String variables to substitute in the file given by `YAML_path`. Substitutions
1069
+ will be attempted if the YAML file looks to contain variable references (like
1070
+ "<<var:name>>"). If set to `False`, no substitutions will occur, which may
1071
+ result in an invalid workflow template!
1072
+ """
1073
+ template = cls._app.WorkflowTemplate.from_YAML_file(
1074
+ path=YAML_path,
1075
+ variables=variables,
1076
+ )
1077
+ return cls.from_template(
1078
+ template,
1079
+ path,
1080
+ name,
1081
+ name_add_timestamp,
1082
+ name_use_dir,
1083
+ overwrite,
1084
+ store,
1085
+ ts_fmt,
1086
+ ts_name_fmt,
1087
+ store_kwargs,
1088
+ )
1089
+
1090
+ @classmethod
1091
+ def from_YAML_string(
1092
+ cls,
1093
+ YAML_str: str,
1094
+ path: PathLike = None,
1095
+ name: str | None = None,
1096
+ name_add_timestamp: bool | None = None,
1097
+ name_use_dir: bool | None = None,
1098
+ overwrite: bool = False,
1099
+ store: str = DEFAULT_STORE_FORMAT,
1100
+ ts_fmt: str | None = None,
1101
+ ts_name_fmt: str | None = None,
1102
+ store_kwargs: dict[str, Any] | None = None,
1103
+ variables: dict[str, str] | Literal[False] | None = None,
1104
+ status: Status | None = None,
1105
+ ) -> Workflow:
1106
+ """Generate from a YAML string.
1107
+
1108
+ Parameters
1109
+ ----------
1110
+ YAML_str:
1111
+ The YAML string containing a workflow template parametrisation.
1112
+ path:
1113
+ The directory in which the workflow will be generated. If not specified, the
1114
+ config item `default_workflow_path` will be used; if that is not set, the
1115
+ current directory is used.
1116
+ name
1117
+ The name to use for the workflow. If not provided, the name will be set to
1118
+ that of the template (optionally suffixed by a date-timestamp if
1119
+ `name_add_timestamp` is True).
1120
+ name_add_timestamp
1121
+ If True, suffix the name with a date-timestamp. A default value can be set
1122
+ with the config item `workflow_name_add_timestamp`; otherwise set to `True`.
1123
+ name_use_dir
1124
+ If True, and `name_add_timestamp` is also True, the workflow directory name
1125
+ will be just the date-timestamp, and will be contained within a parent
1126
+ directory corresponding to the workflow name. A default value can be set
1127
+ with the config item `workflow_name_use_dir`; otherwise set to `False`.
1128
+ overwrite:
1129
+ If True and the workflow directory (`path` + `name`) already exists, the
1130
+ existing directory will be overwritten.
1131
+ store:
1132
+ The persistent store to use for this workflow.
1133
+ ts_fmt:
1134
+ The datetime format to use for storing datetimes. Datetimes are always stored
1135
+ in UTC (because Numpy does not store time zone info), so this should not
1136
+ include a time zone name.
1137
+ ts_name_fmt:
1138
+ The datetime format to use when generating the workflow name, where it
1139
+ includes a timestamp.
1140
+ store_kwargs:
1141
+ Keyword arguments to pass to the store's `write_empty_workflow` method.
1142
+ variables:
1143
+ String variables to substitute in the string `YAML_str`. Substitutions will be
1144
+ attempted if the YAML string looks to contain variable references (like
1145
+ "<<var:name>>"). If set to `False`, no substitutions will occur, which may
1146
+ result in an invalid workflow template!
1147
+ """
1148
+ template = cls._app.WorkflowTemplate.from_YAML_string(
1149
+ string=YAML_str,
1150
+ variables=variables,
1151
+ )
1152
+ return cls.from_template(
1153
+ template,
1154
+ path,
1155
+ name,
1156
+ name_add_timestamp,
1157
+ name_use_dir,
1158
+ overwrite,
1159
+ store,
1160
+ ts_fmt,
1161
+ ts_name_fmt,
1162
+ store_kwargs,
1163
+ status,
1164
+ )
1165
+
1166
+ @classmethod
1167
+ def from_JSON_file(
1168
+ cls,
1169
+ JSON_path: PathLike,
1170
+ path: PathLike = None,
1171
+ name: str | None = None,
1172
+ name_add_timestamp: bool | None = None,
1173
+ name_use_dir: bool | None = None,
1174
+ overwrite: bool = False,
1175
+ store: str = DEFAULT_STORE_FORMAT,
1176
+ ts_fmt: str | None = None,
1177
+ ts_name_fmt: str | None = None,
1178
+ store_kwargs: dict[str, Any] | None = None,
1179
+ variables: dict[str, str] | Literal[False] | None = None,
1180
+ status: Status | None = None,
1181
+ ) -> Workflow:
1182
+ """Generate from a JSON file.
1183
+
1184
+ Parameters
1185
+ ----------
1186
+ JSON_path:
1187
+ The path to a workflow template in the JSON file format.
1188
+ path:
1189
+ The directory in which the workflow will be generated. If not specified, the
1190
+ config item `default_workflow_path` will be used; if that is not set, the
1191
+ current directory is used.
1192
+ name
1193
+ The name to use for the workflow. If not provided, the name will be set to
1194
+ that of the template (optionally suffixed by a date-timestamp if
1195
+ `name_add_timestamp` is True).
1196
+ name_add_timestamp
1197
+ If True, suffix the name with a date-timestamp. A default value can be set
1198
+ with the config item `workflow_name_add_timestamp`; otherwise set to `True`.
1199
+ name_use_dir
1200
+ If True, and `name_add_timestamp` is also True, the workflow directory name
1201
+ will be just the date-timestamp, and will be contained within a parent
1202
+ directory corresponding to the workflow name. A default value can be set
1203
+ with the config item `workflow_name_use_dir`; otherwise set to `False`.
1204
+ overwrite:
1205
+ If True and the workflow directory (`path` + `name`) already exists, the
1206
+ existing directory will be overwritten.
1207
+ store:
1208
+ The persistent store to use for this workflow.
1209
+ ts_fmt:
1210
+ The datetime format to use for storing datetimes. Datetimes are always stored
1211
+ in UTC (because Numpy does not store time zone info), so this should not
1212
+ include a time zone name.
1213
+ ts_name_fmt:
1214
+ The datetime format to use when generating the workflow name, where it
1215
+ includes a timestamp.
1216
+ store_kwargs:
1217
+ Keyword arguments to pass to the store's `write_empty_workflow` method.
1218
+ variables:
1219
+ String variables to substitute in the file given by `JSON_path`. Substitutions
1220
+ will be attempted if the JSON file looks to contain variable references (like
1221
+ "<<var:name>>"). If set to `False`, no substitutions will occur, which may
1222
+ result in an invalid workflow template!
1223
+ """
1224
+ template = cls._app.WorkflowTemplate.from_JSON_file(
1225
+ path=JSON_path,
1226
+ variables=variables,
1227
+ )
1228
+ return cls.from_template(
1229
+ template,
1230
+ path,
1231
+ name,
1232
+ name_add_timestamp,
1233
+ name_use_dir,
1234
+ overwrite,
1235
+ store,
1236
+ ts_fmt,
1237
+ ts_name_fmt,
1238
+ store_kwargs,
1239
+ status,
1240
+ )
1241
+
1242
+ @classmethod
1243
+ def from_JSON_string(
1244
+ cls,
1245
+ JSON_str: str,
1246
+ path: PathLike = None,
1247
+ name: str | None = None,
1248
+ name_add_timestamp: bool | None = None,
1249
+ name_use_dir: bool | None = None,
1250
+ overwrite: bool = False,
1251
+ store: str = DEFAULT_STORE_FORMAT,
1252
+ ts_fmt: str | None = None,
1253
+ ts_name_fmt: str | None = None,
1254
+ store_kwargs: dict[str, Any] | None = None,
1255
+ variables: dict[str, str] | Literal[False] | None = None,
1256
+ status: Status | None = None,
1257
+ ) -> Workflow:
1258
+ """Generate from a JSON string.
1259
+
1260
+ Parameters
1261
+ ----------
1262
+ JSON_str:
1263
+ The JSON string containing a workflow template parametrisation.
1264
+ path:
1265
+ The directory in which the workflow will be generated. If not specified, the
1266
+ config item `default_workflow_path` will be used; if that is not set, the
1267
+ current directory is used.
1268
+ name
1269
+ The name to use for the workflow. If not provided, the name will be set to
1270
+ that of the template (optionally suffixed by a date-timestamp if
1271
+ `name_add_timestamp` is True).
1272
+ name_add_timestamp
1273
+ If True, suffix the name with a date-timestamp. A default value can be set
1274
+ with the config item `workflow_name_add_timestamp`; otherwise set to `True`.
1275
+ name_use_dir
1276
+ If True, and `name_add_timestamp` is also True, the workflow directory name
1277
+ will be just the date-timestamp, and will be contained within a parent
1278
+ directory corresponding to the workflow name. A default value can be set
1279
+ with the config item `workflow_name_use_dir`; otherwise set to `False`.
1280
+ overwrite:
1281
+ If True and the workflow directory (`path` + `name`) already exists, the
1282
+ existing directory will be overwritten.
1283
+ store:
1284
+ The persistent store to use for this workflow.
1285
+ ts_fmt:
1286
+ The datetime format to use for storing datetimes. Datetimes are always stored
1287
+ in UTC (because Numpy does not store time zone info), so this should not
1288
+ include a time zone name.
1289
+ ts_name_fmt:
1290
+ The datetime format to use when generating the workflow name, where it
1291
+ includes a timestamp.
1292
+ store_kwargs:
1293
+ Keyword arguments to pass to the store's `write_empty_workflow` method.
1294
+ variables:
1295
+ String variables to substitute in the string `JSON_str`. Substitutions will be
1296
+ attempted if the JSON string looks to contain variable references (like
1297
+ "<<var:name>>"). If set to `False`, no substitutions will occur, which may
1298
+ result in an invalid workflow template!
1299
+ """
1300
+ template = cls._app.WorkflowTemplate.from_JSON_string(
1301
+ string=JSON_str,
1302
+ variables=variables,
1303
+ )
1304
+ return cls.from_template(
1305
+ template,
1306
+ path,
1307
+ name,
1308
+ name_add_timestamp,
1309
+ name_use_dir,
1310
+ overwrite,
1311
+ store,
1312
+ ts_fmt,
1313
+ ts_name_fmt,
1314
+ store_kwargs,
1315
+ status,
1316
+ )
1317
+
1318
+ @classmethod
1319
+ @TimeIt.decorator
1320
+ def from_file(
1321
+ cls,
1322
+ template_path: PathLike,
1323
+ template_format: Literal["json", "yaml"] | None = None,
1324
+ path: str | None = None,
1325
+ name: str | None = None,
1326
+ name_add_timestamp: bool | None = None,
1327
+ name_use_dir: bool | None = None,
1328
+ overwrite: bool = False,
1329
+ store: str = DEFAULT_STORE_FORMAT,
1330
+ ts_fmt: str | None = None,
1331
+ ts_name_fmt: str | None = None,
1332
+ store_kwargs: dict[str, Any] | None = None,
1333
+ variables: dict[str, str] | Literal[False] | None = None,
1334
+ status: Status | None = None,
1335
+ ) -> Workflow:
1336
+ """Generate from either a YAML or JSON file, depending on the file extension.
1337
+
1338
+ Parameters
1339
+ ----------
1340
+ template_path:
1341
+ The path to a template file in YAML or JSON format, and with a ".yml",
1342
+ ".yaml", or ".json" extension.
1343
+ template_format:
1344
+ If specified, one of "json" or "yaml". This forces parsing from a particular
1345
+ format regardless of the file extension.
1346
+ path:
1347
+ The directory in which the workflow will be generated. If not specified, the
1348
+ config item `default_workflow_path` will be used; if that is not set, the
1349
+ current directory is used.
1350
+ name
1351
+ The name to use for the workflow. If not provided, the name will be set to
1352
+ that of the template (optionally suffixed by a date-timestamp if
1353
+ `name_add_timestamp` is True).
1354
+ name_add_timestamp
1355
+ If True, suffix the name with a date-timestamp. A default value can be set
1356
+ with the config item `workflow_name_add_timestamp`; otherwise set to `True`.
1357
+ name_use_dir
1358
+ If True, and `name_add_timestamp` is also True, the workflow directory name
1359
+ will be just the date-timestamp, and will be contained within a parent
1360
+ directory corresponding to the workflow name. A default value can be set
1361
+ with the config item `workflow_name_use_dir`; otherwise set to `False`.
1362
+ overwrite:
1363
+ If True and the workflow directory (`path` + `name`) already exists, the
1364
+ existing directory will be overwritten.
1365
+ store:
1366
+ The persistent store to use for this workflow.
1367
+ ts_fmt:
1368
+ The datetime format to use for storing datetimes. Datetimes are always stored
1369
+ in UTC (because Numpy does not store time zone info), so this should not
1370
+ include a time zone name.
1371
+ ts_name_fmt:
1372
+ The datetime format to use when generating the workflow name, where it
1373
+ includes a timestamp.
1374
+ store_kwargs:
1375
+ Keyword arguments to pass to the store's `write_empty_workflow` method.
1376
+ variables:
1377
+ String variables to substitute in the file given by `template_path`.
1378
+ Substitutions will be attempted if the file looks to contain variable
1379
+ references (like "<<var:name>>"). If set to `False`, no substitutions will
1380
+ occur, which may result in an invalid workflow template!
1381
+ """
1382
+ try:
1383
+ template = cls._app.WorkflowTemplate.from_file(
1384
+ template_path,
1385
+ template_format,
1386
+ variables=variables,
1387
+ )
1388
+ except Exception:
1389
+ if status:
1390
+ status.stop()
1391
+ raise
1392
+ return cls.from_template(
1393
+ template,
1394
+ path,
1395
+ name,
1396
+ name_add_timestamp,
1397
+ name_use_dir,
1398
+ overwrite,
1399
+ store,
1400
+ ts_fmt,
1401
+ ts_name_fmt,
1402
+ store_kwargs,
1403
+ status,
1404
+ )
1405
+
1406
+ @classmethod
1407
+ @TimeIt.decorator
1408
+ def from_template_data(
1409
+ cls,
1410
+ template_name: str,
1411
+ tasks: list[Task] | None = None,
1412
+ loops: list[Loop] | None = None,
1413
+ resources: Resources = None,
1414
+ environments: Mapping[str, Mapping[str, Any]] | None = None,
1415
+ config: dict | None = None,
1416
+ path: PathLike | None = None,
1417
+ workflow_name: str | None = None,
1418
+ name_add_timestamp: bool | None = None,
1419
+ name_use_dir: bool | None = None,
1420
+ overwrite: bool = False,
1421
+ store: str = DEFAULT_STORE_FORMAT,
1422
+ ts_fmt: str | None = None,
1423
+ ts_name_fmt: str | None = None,
1424
+ store_kwargs: dict[str, Any] | None = None,
1425
+ ) -> Workflow:
1426
+ """Generate from the data associated with a WorkflowTemplate object.
1427
+
1428
+ Parameters
1429
+ ----------
1430
+ template_name
1431
+ The name to use for the new workflow template, from which the new workflow
1432
+ will be generated.
1433
+ tasks:
1434
+ List of Task objects to add to the new workflow.
1435
+ loops:
1436
+ List of Loop objects to add to the new workflow.
1437
+ resources:
1438
+ Mapping of action scopes to resource requirements, to be applied to all
1439
+ element sets in the workflow. `resources` specified in an element set take
1440
+ precedence of those defined here for the whole workflow.
1441
+ environments:
1442
+ Environment specifiers, keyed by environment name.
1443
+ config:
1444
+ Configuration items that should be set whenever the resulting workflow is
1445
+ loaded. This includes config items that apply during workflow execution.
1446
+ path:
1447
+ The directory in which the workflow will be generated. If not specified, the
1448
+ config item `default_workflow_path` will be used; if that is not set, the
1449
+ current directory is used.
1450
+ workflow_name
1451
+ The name to use for the workflow. If not provided, the name will be set to
1452
+ that of the template (optionally suffixed by a date-timestamp if
1453
+ `name_add_timestamp` is True).
1454
+ name_add_timestamp
1455
+ If True, suffix the workflow name with a date-timestamp. A default value can
1456
+ be set with the config item `workflow_name_add_timestamp`; otherwise set to
1457
+ `True`.
1458
+ name_use_dir
1459
+ If True, and `name_add_timestamp` is also True, the workflow directory name
1460
+ will be just the date-timestamp, and will be contained within a parent
1461
+ directory corresponding to the workflow name. A default value can be set
1462
+ with the config item `workflow_name_use_dir`; otherwise set to `False`.
1463
+ overwrite:
1464
+ If True and the workflow directory (`path` + `name`) already exists, the
1465
+ existing directory will be overwritten.
1466
+ store:
1467
+ The persistent store to use for this workflow.
1468
+ ts_fmt:
1469
+ The datetime format to use for storing datetimes. Datetimes are always stored
1470
+ in UTC (because Numpy does not store time zone info), so this should not
1471
+ include a time zone name.
1472
+ ts_name_fmt:
1473
+ The datetime format to use when generating the workflow name, where it
1474
+ includes a timestamp.
1475
+ store_kwargs:
1476
+ Keyword arguments to pass to the store's `write_empty_workflow` method.
1477
+ """
1478
+ template = cls._app.WorkflowTemplate(
1479
+ template_name,
1480
+ tasks=tasks or [],
1481
+ loops=loops or [],
1482
+ resources=resources,
1483
+ environments=environments,
1484
+ config=config or {},
1485
+ )
1486
+ return cls.from_template(
1487
+ template,
1488
+ path,
1489
+ workflow_name,
1490
+ name_add_timestamp,
1491
+ name_use_dir,
1492
+ overwrite,
1493
+ store,
1494
+ ts_fmt,
1495
+ ts_name_fmt,
1496
+ store_kwargs,
1497
+ )
1498
+
1499
+ @TimeIt.decorator
1500
+ def _add_empty_task(
1501
+ self,
1502
+ task: Task,
1503
+ new_index: int | None = None,
1504
+ ) -> WorkflowTask:
1505
+ if new_index is None:
1506
+ new_index = self.num_tasks
1507
+
1508
+ insert_ID = self.num_added_tasks
1509
+
1510
+ # make a copy with persistent schema inputs:
1511
+ task_c, _ = task.to_persistent(self, insert_ID)
1512
+
1513
+ # add to the WorkflowTemplate:
1514
+ self.template._add_empty_task(task_c, new_index, insert_ID)
1515
+
1516
+ # create and insert a new WorkflowTask:
1517
+ self.tasks.add_object(
1518
+ self._app.WorkflowTask.new_empty_task(self, task_c, new_index),
1519
+ index=new_index,
1520
+ )
1521
+
1522
+ # update persistent store:
1523
+ task_js, temp_comps_js = task_c.to_json_like()
1524
+ assert temp_comps_js is not None
1525
+ self._store.add_template_components(temp_comps_js)
1526
+ self._store.add_task(new_index, cast("Mapping", task_js))
1527
+
1528
+ # update in-memory workflow template components:
1529
+ temp_comps = cast(
1530
+ "_TemplateComponents",
1531
+ self._app.template_components_from_json_like(temp_comps_js),
1532
+ )
1533
+ for comp_type, comps in temp_comps.items():
1534
+ ol = self.__template_components[comp_type]
1535
+ for comp in comps:
1536
+ comp._set_hash()
1537
+ if comp not in ol:
1538
+ self._pending["template_components"][comp_type].append(
1539
+ ol.add_object(comp, skip_duplicates=False)
1540
+ )
1541
+
1542
+ self._pending["tasks"].append(new_index)
1543
+ return self.tasks[new_index]
1544
+
1545
+ @TimeIt.decorator
1546
+ def _add_task(self, task: Task, new_index: int | None = None) -> None:
1547
+ new_wk_task = self._add_empty_task(task=task, new_index=new_index)
1548
+ new_wk_task._add_elements(element_sets=task.element_sets, propagate_to={})
1549
+
1550
+ def add_task(self, task: Task, new_index: int | None = None) -> None:
1551
+ """
1552
+ Add a task to this workflow.
1553
+ """
1554
+ with self._store.cached_load(), self.batch_update():
1555
+ self._add_task(task, new_index=new_index)
1556
+
1557
+ def add_task_after(self, new_task: Task, task_ref: Task | None = None) -> None:
1558
+ """Add a new task after the specified task.
1559
+
1560
+ Parameters
1561
+ ----------
1562
+ task_ref
1563
+ If not given, the new task will be added at the end of the workflow.
1564
+ """
1565
+ new_index = (
1566
+ task_ref.index + 1 if task_ref and task_ref.index is not None else None
1567
+ )
1568
+ self.add_task(new_task, new_index)
1569
+ # TODO: add new downstream elements?
1570
+
1571
+ def add_task_before(self, new_task: Task, task_ref: Task | None = None) -> None:
1572
+ """Add a new task before the specified task.
1573
+
1574
+ Parameters
1575
+ ----------
1576
+ task_ref
1577
+ If not given, the new task will be added at the beginning of the workflow.
1578
+ """
1579
+ new_index = task_ref.index if task_ref else 0
1580
+ self.add_task(new_task, new_index)
1581
+ # TODO: add new downstream elements?
1582
+
1583
+ @TimeIt.decorator
1584
+ def _add_empty_loop(self, loop: Loop, cache: LoopCache) -> WorkflowLoop:
1585
+ """Add a new loop (zeroth iterations only) to the workflow."""
1586
+
1587
+ new_index = self.num_loops
1588
+
1589
+ # don't modify passed object:
1590
+ loop_c = copy.deepcopy(loop)
1591
+
1592
+ # add to the WorkflowTemplate:
1593
+ self.template._add_empty_loop(loop_c)
1594
+
1595
+ # all these element iterations will be initialised for the new loop:
1596
+ iter_IDs = cache.get_iter_IDs(loop_c)
1597
+ iter_loop_idx = cache.get_iter_loop_indices(iter_IDs)
1598
+
1599
+ # create and insert a new WorkflowLoop:
1600
+ new_loop = self._app.WorkflowLoop.new_empty_loop(
1601
+ index=new_index,
1602
+ workflow=self,
1603
+ template=loop_c,
1604
+ iter_loop_idx=iter_loop_idx,
1605
+ )
1606
+ self.loops.add_object(new_loop)
1607
+ wk_loop = self.loops[new_index]
1608
+
1609
+ # update any child loops of the new loop to include their new parent:
1610
+ for chd_loop in wk_loop.get_child_loops():
1611
+ chd_loop._update_parents(wk_loop)
1612
+
1613
+ loop_js, _ = loop_c.to_json_like()
1614
+
1615
+ # update persistent store:
1616
+ self._store.add_loop(
1617
+ loop_template=cast("Mapping", loop_js),
1618
+ iterable_parameters=wk_loop.iterable_parameters,
1619
+ output_parameters=wk_loop.output_parameters,
1620
+ parents=wk_loop.parents,
1621
+ num_added_iterations=wk_loop.num_added_iterations,
1622
+ iter_IDs=iter_IDs,
1623
+ )
1624
+
1625
+ self._pending["loops"].append(new_index)
1626
+
1627
+ # update cache loop indices:
1628
+ cache.update_loop_indices(new_loop_name=loop_c.name or "", iter_IDs=iter_IDs)
1629
+
1630
+ return wk_loop
1631
+
1632
+ @TimeIt.decorator
1633
+ def _add_loop(
1634
+ self, loop: Loop, cache: LoopCache | None = None, status: Status | None = None
1635
+ ) -> None:
1636
+ loop._validate_against_workflow(self)
1637
+ cache_ = cache or LoopCache.build(workflow=self, loops=[loop])
1638
+ new_wk_loop = self._add_empty_loop(loop, cache_)
1639
+ if loop.num_iterations is not None:
1640
+ # fixed number of iterations, so add remaining N > 0 iterations:
1641
+ if status:
1642
+ status_prev = status.status
1643
+ for iter_idx in range(loop.num_iterations - 1):
1644
+ if status:
1645
+ status.update(
1646
+ f"{status_prev}: iteration {iter_idx + 2}/{loop.num_iterations}."
1647
+ )
1648
+ new_wk_loop.add_iteration(cache=cache_, status=status)
1649
+
1650
+ def add_loop(self, loop: Loop) -> None:
1651
+ """Add a loop to a subset of workflow tasks."""
1652
+ with self._store.cached_load(), self.batch_update():
1653
+ self._add_loop(loop)
1654
+
1655
+ @property
1656
+ def creation_info(self) -> CreationInfo:
1657
+ """
1658
+ The creation descriptor for the workflow.
1659
+ """
1660
+ if not self._creation_info:
1661
+ info = self._store.get_creation_info()
1662
+ # TODO: using `info.get` for backwards compatibility; can change with next
1663
+ # major release
1664
+ self._creation_info = {
1665
+ "app_info": info["app_info"],
1666
+ "create_time": parse_timestamp(info["create_time"], self.ts_fmt),
1667
+ "id": info["id"],
1668
+ "user_name": info.get("user_name"),
1669
+ "user_orcid": info.get("user_orcid"),
1670
+ "user_affiliations": info.get("user_affiliations"),
1671
+ }
1672
+ return self._creation_info
1673
+
1674
+ @property
1675
+ def id_(self) -> str:
1676
+ """
1677
+ The ID of this workflow.
1678
+ """
1679
+ return self.creation_info["id"]
1680
+
1681
+ @property
1682
+ def ts_fmt(self) -> str:
1683
+ """
1684
+ The timestamp format.
1685
+ """
1686
+ if not self._ts_fmt:
1687
+ self._ts_fmt = self._store.get_ts_fmt()
1688
+ return self._ts_fmt
1689
+
1690
+ @property
1691
+ def ts_name_fmt(self) -> str:
1692
+ """
1693
+ The timestamp format for names.
1694
+ """
1695
+ if not self._ts_name_fmt:
1696
+ self._ts_name_fmt = self._store.get_ts_name_fmt()
1697
+ return self._ts_name_fmt
1698
+
1699
+ @property
1700
+ def template_components(self) -> TemplateComponents:
1701
+ """
1702
+ The template components used for this workflow.
1703
+ """
1704
+ if self._template_components is None:
1705
+ with self._store.cached_load():
1706
+ tc_js = self._store.get_template_components()
1707
+ self._template_components = self._app.template_components_from_json_like(
1708
+ tc_js
1709
+ )
1710
+ return self._template_components
1711
+
1712
+ @property
1713
+ def __template_components(self) -> _TemplateComponents:
1714
+ return cast("_TemplateComponents", self.template_components)
1715
+
1716
+ @property
1717
+ def template(self) -> WorkflowTemplate:
1718
+ """
1719
+ The template that this workflow was made from.
1720
+ """
1721
+ if self._template is None:
1722
+ with self._store.cached_load():
1723
+ temp_js = self._store.get_template()
1724
+
1725
+ # TODO: insert_ID and id_ are the same thing:
1726
+ for task in cast("list[dict]", temp_js["tasks"]):
1727
+ task.pop("id_", None)
1728
+
1729
+ template = self._app.WorkflowTemplate.from_json_like(
1730
+ temp_js, cast("dict", self.template_components)
1731
+ )
1732
+ template.workflow = self
1733
+ self._template = template
1734
+
1735
+ return self._template
1736
+
1737
+ @property
1738
+ @TimeIt.decorator
1739
+ def tasks(self) -> WorkflowTaskList:
1740
+ """
1741
+ The tasks in this workflow.
1742
+ """
1743
+ if self._tasks is None:
1744
+ with self._store.cached_load():
1745
+ all_tasks: Iterable[StoreTask] = self._store.get_tasks()
1746
+ self._tasks = self._app.WorkflowTaskList(
1747
+ self._app.WorkflowTask(
1748
+ workflow=self,
1749
+ template=self.template.tasks[task.index],
1750
+ index=task.index,
1751
+ element_IDs=task.element_IDs,
1752
+ )
1753
+ for task in all_tasks
1754
+ )
1755
+
1756
+ return self._tasks
1757
+
1758
+ @property
1759
+ def loops(self) -> WorkflowLoopList:
1760
+ """
1761
+ The loops in this workflow.
1762
+ """
1763
+
1764
+ def repack_iteration_tuples(
1765
+ num_added_iterations: list[list[list[int] | int]],
1766
+ ) -> Iterator[tuple[tuple[int, ...], int]]:
1767
+ """
1768
+ Unpacks a very ugly type from the persistence layer, turning it into
1769
+ something we can process into a dict more easily. This in turn is caused
1770
+ by JSON and Zarr not really understanding tuples as such.
1771
+ """
1772
+ for item in num_added_iterations:
1773
+ # Convert the outside to a tuple and narrow the inner types
1774
+ key_vec, count = item
1775
+ yield tuple(cast("list[int]", key_vec)), cast("int", count)
1776
+
1777
+ if self._loops is None:
1778
+ with self._store.cached_load():
1779
+ self._loops = self._app.WorkflowLoopList(
1780
+ self._app.WorkflowLoop(
1781
+ index=idx,
1782
+ workflow=self,
1783
+ template=self.template.loops[idx],
1784
+ parents=loop_dat["parents"],
1785
+ num_added_iterations=dict(
1786
+ repack_iteration_tuples(loop_dat["num_added_iterations"])
1787
+ ),
1788
+ iterable_parameters=loop_dat["iterable_parameters"],
1789
+ output_parameters=loop_dat["output_parameters"],
1790
+ )
1791
+ for idx, loop_dat in self._store.get_loops().items()
1792
+ )
1793
+ return self._loops
1794
+
1795
+ @property
1796
+ @TimeIt.decorator
1797
+ def submissions(self) -> list[Submission]:
1798
+ """
1799
+ The job submissions done by this workflow.
1800
+ """
1801
+ if self._submissions is None:
1802
+ self._app.persistence_logger.debug("loading workflow submissions")
1803
+ with self._store.cached_load():
1804
+ subs: list[Submission] = []
1805
+ for idx, sub_dat in self._store.get_submissions().items():
1806
+ sub = self._app.Submission.from_json_like(
1807
+ {"index": idx, **cast("dict", sub_dat)}
1808
+ )
1809
+ sub.workflow = self
1810
+ subs.append(sub)
1811
+ self._submissions = subs
1812
+ return self._submissions
1813
+
1814
+ @property
1815
+ def num_added_tasks(self) -> int:
1816
+ """
1817
+ The total number of added tasks.
1818
+ """
1819
+ return self._store._get_num_total_added_tasks()
1820
+
1821
+ @TimeIt.decorator
1822
+ def get_store_EARs(self, id_lst: Iterable[int]) -> Sequence[StoreEAR]:
1823
+ """
1824
+ Get the persistent element action runs.
1825
+ """
1826
+ return self._store.get_EARs(id_lst)
1827
+
1828
+ @TimeIt.decorator
1829
+ def get_store_element_iterations(
1830
+ self, id_lst: Iterable[int]
1831
+ ) -> Sequence[StoreElementIter]:
1832
+ """
1833
+ Get the persistent element iterations.
1834
+ """
1835
+ return self._store.get_element_iterations(id_lst)
1836
+
1837
+ @TimeIt.decorator
1838
+ def get_store_elements(self, id_lst: Iterable[int]) -> Sequence[StoreElement]:
1839
+ """
1840
+ Get the persistent elements.
1841
+ """
1842
+ return self._store.get_elements(id_lst)
1843
+
1844
+ @TimeIt.decorator
1845
+ def get_store_tasks(self, id_lst: Iterable[int]) -> Sequence[StoreTask]:
1846
+ """
1847
+ Get the persistent tasks.
1848
+ """
1849
+ return self._store.get_tasks_by_IDs(id_lst)
1850
+
1851
+ def get_element_iteration_IDs_from_EAR_IDs(self, id_lst: Iterable[int]) -> list[int]:
1852
+ """
1853
+ Get the element iteration IDs of EARs.
1854
+ """
1855
+ return [ear.elem_iter_ID for ear in self.get_store_EARs(id_lst)]
1856
+
1857
+ def get_element_IDs_from_EAR_IDs(self, id_lst: Iterable[int]) -> list[int]:
1858
+ """
1859
+ Get the element IDs of EARs.
1860
+ """
1861
+ iter_IDs = self.get_element_iteration_IDs_from_EAR_IDs(id_lst)
1862
+ return [itr.element_ID for itr in self.get_store_element_iterations(iter_IDs)]
1863
+
1864
+ def get_task_IDs_from_element_IDs(self, id_lst: Iterable[int]) -> list[int]:
1865
+ """
1866
+ Get the task IDs of elements.
1867
+ """
1868
+ return [elem.task_ID for elem in self.get_store_elements(id_lst)]
1869
+
1870
+ def get_EAR_IDs_of_tasks(self, id_lst: Iterable[int]) -> list[int]:
1871
+ """Get EAR IDs belonging to multiple tasks."""
1872
+ return [ear.id_ for ear in self.get_EARs_of_tasks(id_lst)]
1873
+
1874
+ def get_EARs_of_tasks(self, id_lst: Iterable[int]) -> Iterator[ElementActionRun]:
1875
+ """Get EARs belonging to multiple tasks."""
1876
+ for id_ in id_lst:
1877
+ for elem in self.tasks.get(insert_ID=id_).elements[:]:
1878
+ for iter_ in elem.iterations:
1879
+ yield from iter_.action_runs
1880
+
1881
+ def get_element_iterations_of_tasks(
1882
+ self, id_lst: Iterable[int]
1883
+ ) -> Iterator[ElementIteration]:
1884
+ """Get element iterations belonging to multiple tasks."""
1885
+ for id_ in id_lst:
1886
+ for elem in self.tasks.get(insert_ID=id_).elements[:]:
1887
+ yield from elem.iterations
1888
+
1889
+ @dataclass
1890
+ class _IndexPath1:
1891
+ elem: int
1892
+ task: int
1893
+
1894
+ @TimeIt.decorator
1895
+ def __get_elements_by_task_idx(
1896
+ self, element_idx_by_task: dict[int, set[int]]
1897
+ ) -> dict[int, dict[int, Element]]:
1898
+ return {
1899
+ task_idx: {
1900
+ idx: element
1901
+ for idx, element in zip(
1902
+ elem_indices, self.tasks[task_idx].elements[list(elem_indices)]
1903
+ )
1904
+ }
1905
+ for task_idx, elem_indices in element_idx_by_task.items()
1906
+ }
1907
+
1908
+ @TimeIt.decorator
1909
+ def get_elements_from_IDs(self, id_lst: Iterable[int]) -> list[Element]:
1910
+ """Return element objects from a list of IDs."""
1911
+
1912
+ store_elems = self.get_store_elements(id_lst)
1913
+ store_tasks = self.get_store_tasks(el.task_ID for el in store_elems)
1914
+
1915
+ element_idx_by_task: dict[int, set[int]] = defaultdict(set)
1916
+ index_paths: list[Workflow._IndexPath1] = []
1917
+ for elem, task in zip(store_elems, store_tasks):
1918
+ elem_idx = task.element_IDs.index(elem.id_)
1919
+ index_paths.append(Workflow._IndexPath1(elem_idx, task.index))
1920
+ element_idx_by_task[task.index].add(elem_idx)
1921
+
1922
+ elements_by_task = self.__get_elements_by_task_idx(element_idx_by_task)
1923
+
1924
+ return [elements_by_task[path.task][path.elem] for path in index_paths]
1925
+
1926
+ @dataclass
1927
+ class _IndexPath2:
1928
+ iter: int
1929
+ elem: int
1930
+ task: int
1931
+
1932
+ @TimeIt.decorator
1933
+ def get_element_iterations_from_IDs(
1934
+ self, id_lst: Iterable[int]
1935
+ ) -> list[ElementIteration]:
1936
+ """Return element iteration objects from a list of IDs."""
1937
+
1938
+ store_iters = self.get_store_element_iterations(id_lst)
1939
+ store_elems = self.get_store_elements(it.element_ID for it in store_iters)
1940
+ store_tasks = self.get_store_tasks(el.task_ID for el in store_elems)
1941
+
1942
+ element_idx_by_task: dict[int, set[int]] = defaultdict(set)
1943
+
1944
+ index_paths: list[Workflow._IndexPath2] = []
1945
+ for itr, elem, task in zip(store_iters, store_elems, store_tasks):
1946
+ iter_idx = elem.iteration_IDs.index(itr.id_)
1947
+ elem_idx = task.element_IDs.index(elem.id_)
1948
+ index_paths.append(Workflow._IndexPath2(iter_idx, elem_idx, task.index))
1949
+ element_idx_by_task[task.index].add(elem_idx)
1950
+
1951
+ elements_by_task = self.__get_elements_by_task_idx(element_idx_by_task)
1952
+
1953
+ return [
1954
+ elements_by_task[path.task][path.elem].iterations[path.iter]
1955
+ for path in index_paths
1956
+ ]
1957
+
1958
+ @dataclass
1959
+ class _IndexPath3:
1960
+ run: int
1961
+ act: int
1962
+ iter: int
1963
+ elem: int
1964
+ task: int
1965
+
1966
+ @overload
1967
+ def get_EARs_from_IDs(self, ids: Iterable[int]) -> list[ElementActionRun]: ...
1968
+
1969
+ @overload
1970
+ def get_EARs_from_IDs(self, ids: int) -> ElementActionRun: ...
1971
+
1972
+ @TimeIt.decorator
1973
+ def get_EARs_from_IDs(
1974
+ self, ids: Iterable[int] | int, as_dict: bool = False
1975
+ ) -> list[ElementActionRun] | dict[int, ElementActionRun] | ElementActionRun:
1976
+ """Get element action run objects from a list of IDs."""
1977
+ id_lst = [ids] if isinstance(ids, int) else list(ids)
1978
+
1979
+ with self._store.cached_load(), self._store.cache_ctx():
1980
+
1981
+ self._app.persistence_logger.debug(
1982
+ f"get_EARs_from_IDs: {len(id_lst)} EARs: {shorten_list_str(id_lst)}."
1983
+ )
1984
+
1985
+ store_EARs = self.get_store_EARs(id_lst)
1986
+ store_iters = self.get_store_element_iterations(
1987
+ ear.elem_iter_ID for ear in store_EARs
1988
+ )
1989
+ store_elems = self.get_store_elements(it.element_ID for it in store_iters)
1990
+ store_tasks = self.get_store_tasks(el.task_ID for el in store_elems)
1991
+
1992
+ # to allow for bulk retrieval of elements/iterations
1993
+ element_idx_by_task: dict[int, set[int]] = defaultdict(set)
1994
+ iter_idx_by_task_elem: dict[int, dict[int, set[int]]] = defaultdict(
1995
+ lambda: defaultdict(set)
1996
+ )
1997
+
1998
+ index_paths: list[Workflow._IndexPath3] = []
1999
+ for rn, it, el, tk in zip(store_EARs, store_iters, store_elems, store_tasks):
2000
+ act_idx = rn.action_idx
2001
+ run_idx = (
2002
+ it.EAR_IDs[act_idx].index(rn.id_) if it.EAR_IDs is not None else -1
2003
+ )
2004
+ iter_idx = el.iteration_IDs.index(it.id_)
2005
+ elem_idx = tk.element_IDs.index(el.id_)
2006
+ index_paths.append(
2007
+ Workflow._IndexPath3(run_idx, act_idx, iter_idx, elem_idx, tk.index)
2008
+ )
2009
+ element_idx_by_task[tk.index].add(elem_idx)
2010
+ iter_idx_by_task_elem[tk.index][elem_idx].add(iter_idx)
2011
+
2012
+ # retrieve elements/iterations:
2013
+ iters = {
2014
+ task_idx: {
2015
+ elem_i.index: {
2016
+ iter_idx: elem_i.iterations[iter_idx]
2017
+ for iter_idx in iter_idx_by_task_elem[task_idx][elem_i.index]
2018
+ }
2019
+ for elem_i in self.tasks[task_idx].elements[list(elem_idxes)]
2020
+ }
2021
+ for task_idx, elem_idxes in element_idx_by_task.items()
2022
+ }
2023
+
2024
+ result = {}
2025
+ for path in index_paths:
2026
+ run = (
2027
+ iters[path.task][path.elem][path.iter]
2028
+ .actions[path.act]
2029
+ .runs[path.run]
2030
+ )
2031
+ result[run.id_] = run
2032
+
2033
+ if not as_dict:
2034
+ res_lst = list(result.values())
2035
+ return res_lst[0] if isinstance(ids, int) else res_lst
2036
+
2037
+ return result
2038
+
2039
+ @TimeIt.decorator
2040
+ def get_all_elements(self) -> list[Element]:
2041
+ """
2042
+ Get all elements in the workflow.
2043
+ """
2044
+ return self.get_elements_from_IDs(range(self.num_elements))
2045
+
2046
+ @TimeIt.decorator
2047
+ def get_all_element_iterations(self) -> list[ElementIteration]:
2048
+ """
2049
+ Get all iterations in the workflow.
2050
+ """
2051
+ return self.get_element_iterations_from_IDs(range(self.num_element_iterations))
2052
+
2053
+ @TimeIt.decorator
2054
+ def get_all_EARs(self) -> list[ElementActionRun]:
2055
+ """
2056
+ Get all runs in the workflow.
2057
+ """
2058
+ return self.get_EARs_from_IDs(range(self.num_EARs))
2059
+
2060
+ @contextmanager
2061
+ def batch_update(self, is_workflow_creation: bool = False) -> Iterator[None]:
2062
+ """A context manager that batches up structural changes to the workflow and
2063
+ commits them to disk all together when the context manager exits."""
2064
+
2065
+ if self._in_batch_mode:
2066
+ yield
2067
+ else:
2068
+ try:
2069
+ self._app.persistence_logger.info(
2070
+ f"entering batch update (is_workflow_creation={is_workflow_creation!r})"
2071
+ )
2072
+ self._in_batch_mode = True
2073
+ yield
2074
+
2075
+ except Exception:
2076
+ self._app.persistence_logger.error("batch update exception!")
2077
+ self._in_batch_mode = False
2078
+ self._store._pending.reset()
2079
+
2080
+ for task in self.tasks:
2081
+ task._reset_pending_element_IDs()
2082
+ task.template._reset_pending_element_sets()
2083
+
2084
+ for loop in self.loops:
2085
+ loop._reset_pending_num_added_iters()
2086
+ loop._reset_pending_parents()
2087
+
2088
+ self._reject_pending()
2089
+
2090
+ if is_workflow_creation:
2091
+ # creation failed, so no need to keep the newly generated workflow:
2092
+ self._store.delete_no_confirm()
2093
+ self._store.reinstate_replaced_dir()
2094
+
2095
+ raise
2096
+
2097
+ else:
2098
+ if self._store._pending:
2099
+ # is_diff = self._store.is_modified_on_disk()
2100
+ # if is_diff:
2101
+ # raise WorkflowBatchUpdateFailedError(
2102
+ # f"Workflow modified on disk since it was loaded!"
2103
+ # )
2104
+
2105
+ for task in self.tasks:
2106
+ task._accept_pending_element_IDs()
2107
+ task.template._accept_pending_element_sets()
2108
+
2109
+ for loop in self.loops:
2110
+ loop._accept_pending_num_added_iters()
2111
+ loop._accept_pending_parents()
2112
+
2113
+ # TODO: handle errors in commit pending?
2114
+ self._store._pending.commit_all()
2115
+ self._accept_pending()
2116
+
2117
+ if is_workflow_creation:
2118
+ self._store.remove_replaced_dir()
2119
+
2120
+ self._app.persistence_logger.info("exiting batch update")
2121
+ self._in_batch_mode = False
2122
+
2123
+ @contextmanager
2124
+ def cached_merged_parameters(self):
2125
+ if self._use_merged_parameters_cache:
2126
+ yield
2127
+ else:
2128
+ try:
2129
+ self._app.logger.debug("entering merged-parameters cache.")
2130
+ self._use_merged_parameters_cache = True
2131
+ yield
2132
+ finally:
2133
+ self._app.logger.debug("exiting merged-parameters cache.")
2134
+ self._use_merged_parameters_cache = False
2135
+ self._merged_parameters_cache = {} # reset the cache
2136
+
2137
+ @classmethod
2138
+ def temporary_rename(cls, path: str, fs: AbstractFileSystem) -> str:
2139
+ """Rename an existing same-path workflow (directory) so we can restore it if
2140
+ workflow creation fails.
2141
+
2142
+ Renaming will occur until the successfully completed. This means multiple new
2143
+ paths may be created, where only the final path should be considered the
2144
+ successfully renamed workflow. Other paths will be deleted."""
2145
+
2146
+ all_replaced: list[str] = []
2147
+
2148
+ @cls._app.perm_error_retry()
2149
+ def _temp_rename(path: str, fs: AbstractFileSystem) -> str:
2150
+ temp_ext = "".join(random.choices(string.ascii_letters, k=10))
2151
+ replaced = str(Path(f"{path}.{temp_ext}").as_posix())
2152
+ cls._app.persistence_logger.debug(
2153
+ f"temporary_rename: _temp_rename: {path!r} --> {replaced!r}."
2154
+ )
2155
+ all_replaced.append(replaced)
2156
+ try:
2157
+ fs.rename(path, replaced, recursive=True)
2158
+ except TypeError:
2159
+ # `SFTPFileSystem.rename` has no `recursive` argument:
2160
+ fs.rename(path, replaced)
2161
+ return replaced
2162
+
2163
+ @cls._app.perm_error_retry()
2164
+ def _remove_path(path: str, fs: AbstractFileSystem) -> None:
2165
+ cls._app.persistence_logger.debug(
2166
+ f"temporary_rename: _remove_path: {path!r}."
2167
+ )
2168
+ while fs.exists(path):
2169
+ fs.rm(path, recursive=True)
2170
+ time.sleep(0.5)
2171
+
2172
+ _temp_rename(path, fs)
2173
+
2174
+ for path in all_replaced[:-1]:
2175
+ _remove_path(path, fs)
2176
+
2177
+ return all_replaced[-1]
2178
+
2179
+ @classmethod
2180
+ @TimeIt.decorator
2181
+ def _write_empty_workflow(
2182
+ cls,
2183
+ template: WorkflowTemplate,
2184
+ *,
2185
+ path: PathLike | None = None,
2186
+ name: str | None = None,
2187
+ name_add_timestamp: bool | None = None,
2188
+ name_use_dir: bool | None = None,
2189
+ overwrite: bool | None = False,
2190
+ store: str = DEFAULT_STORE_FORMAT,
2191
+ ts_fmt: str | None = None,
2192
+ ts_name_fmt: str | None = None,
2193
+ fs_kwargs: dict[str, Any] | None = None,
2194
+ store_kwargs: dict[str, Any] | None = None,
2195
+ ) -> Workflow:
2196
+ """
2197
+ Parameters
2198
+ ----------
2199
+ template
2200
+ The workflow description to instantiate.
2201
+ path
2202
+ The directory in which the workflow will be generated. If not specified, the
2203
+ config item `default_workflow_path` will be used; if that is not set, the
2204
+ current directory is used.
2205
+ name
2206
+ The name to use for the workflow. If not provided, the name will be set to
2207
+ that of the template (optionally suffixed by a date-timestamp if
2208
+ `name_add_timestamp` is True).
2209
+ name_add_timestamp
2210
+ If True, suffix the name with a date-timestamp. A default value can be set
2211
+ with the config item `workflow_name_add_timestamp`; otherwise set to `True`.
2212
+ name_use_dir
2213
+ If True, and `name_add_timestamp` is also True, the workflow directory name
2214
+ will be just the date-timestamp, and will be contained within a parent
2215
+ directory corresponding to the workflow name. A default value can be set
2216
+ with the config item `workflow_name_use_dir`; otherwise set to `False`.
2217
+ """
2218
+
2219
+ if name_use_dir is None:
2220
+ # use value from the config if available
2221
+ if (cfg_use_dir := cls._app.config.workflow_name_use_dir) is not None:
2222
+ name_use_dir = cfg_use_dir
2223
+ else:
2224
+ name_use_dir = False
2225
+
2226
+ if name_add_timestamp is None:
2227
+ # use value from the config if available
2228
+ if (cfg_add_ts := cls._app.config.workflow_name_add_timestamp) is not None:
2229
+ name_add_timestamp = cfg_add_ts
2230
+ else:
2231
+ name_add_timestamp = True
2232
+
2233
+ # store all times in UTC, since NumPy doesn't support time zone info:
2234
+ ts_utc = current_timestamp()
2235
+ ts = normalise_timestamp(ts_utc)
2236
+
2237
+ ts_name_fmt = ts_name_fmt or cls._default_ts_name_fmt
2238
+ ts_fmt = ts_fmt or cls._default_ts_fmt
2239
+
2240
+ parent_dir = Path(path or cls._app.config.default_workflow_path or ".")
2241
+
2242
+ wk_name = name or template.name
2243
+ wk_dir_name = wk_name
2244
+ if name_add_timestamp:
2245
+ timestamp = ts.strftime(ts_name_fmt)
2246
+ if name_use_dir:
2247
+ wk_dir_name = timestamp
2248
+ parent_dir = parent_dir.joinpath(wk_name)
2249
+ else:
2250
+ wk_dir_name += f"_{timestamp}"
2251
+ wk_name += f"_{timestamp}"
2252
+
2253
+ fs_kwargs = fs_kwargs or {}
2254
+ fs, _, pw = resolve_fsspec(parent_dir, **fs_kwargs)
2255
+ wk_path = str(parent_dir.joinpath(wk_dir_name))
2256
+
2257
+ replaced_wk = None
2258
+ if fs.exists(wk_path):
2259
+ cls._app.logger.debug("workflow path exists")
2260
+ if overwrite:
2261
+ cls._app.logger.debug("renaming existing workflow path")
2262
+ replaced_wk = cls.temporary_rename(wk_path, fs)
2263
+ else:
2264
+ raise ValueError(
2265
+ f"Path already exists: {wk_path} on file system " f"{fs!r}."
2266
+ )
2267
+
2268
+ class PersistenceGrabber:
2269
+ """An object to pass to ResourceSpec.make_persistent that pretends to be a
2270
+ Workflow object, so we can pretend to make template-level inputs/resources
2271
+ persistent before the workflow exists."""
2272
+
2273
+ def __init__(self) -> None:
2274
+ self.__ps: list[tuple[Any, ParamSource]] = []
2275
+
2276
+ def _add_parameter_data(self, data: Any, source: ParamSource) -> int:
2277
+ ref = len(self.__ps)
2278
+ self.__ps.append((data, source))
2279
+ return ref
2280
+
2281
+ def get_parameter_data(self, data_idx: int) -> Any:
2282
+ return self.__ps[data_idx - 1][0]
2283
+
2284
+ def check_parameters_exist(self, id_lst: int | list[int]) -> bool:
2285
+ r = range(len(self.__ps))
2286
+ if isinstance(id_lst, int):
2287
+ return id_lst in r
2288
+ else:
2289
+ return all(id_ in r for id_ in id_lst)
2290
+
2291
+ def write_persistence_data_to_workflow(self, workflow: Workflow) -> None:
2292
+ for dat_i, source_i in self.__ps:
2293
+ workflow._add_parameter_data(dat_i, source_i)
2294
+
2295
+ # make template-level inputs/resources think they are persistent:
2296
+ grabber = PersistenceGrabber()
2297
+ param_src: ParamSource = {"type": "workflow_resources"}
2298
+ for res_i_copy in template._get_resources_copy():
2299
+ res_i_copy.make_persistent(grabber, param_src)
2300
+
2301
+ template_js_, template_sh = template.to_json_like(exclude={"tasks", "loops"})
2302
+ template_js: TemplateMeta = {
2303
+ **cast("TemplateMeta", template_js_), # Trust me, bro!
2304
+ "tasks": [],
2305
+ "loops": [],
2306
+ }
2307
+
2308
+ store_kwargs = store_kwargs if store_kwargs else template.store_kwargs
2309
+ store_cls = store_cls_from_str(store)
2310
+ store_cls.write_empty_workflow(
2311
+ app=cls._app,
2312
+ template_js=template_js,
2313
+ template_components_js=template_sh or {},
2314
+ wk_path=wk_path,
2315
+ fs=fs,
2316
+ name=wk_name,
2317
+ replaced_wk=replaced_wk,
2318
+ creation_info={
2319
+ "app_info": cls._app.get_info(),
2320
+ "create_time": ts_utc.strftime(ts_fmt),
2321
+ "id": str(uuid4()),
2322
+ "user_name": cls._app.config.user_name,
2323
+ "user_orcid": cls._app.config.user_orcid,
2324
+ "user_affiliations": cls._app.config.user_affiliations,
2325
+ },
2326
+ ts_fmt=ts_fmt,
2327
+ ts_name_fmt=ts_name_fmt,
2328
+ **store_kwargs,
2329
+ )
2330
+
2331
+ fs_kwargs = {"password": pw, **fs_kwargs}
2332
+ wk = cls(wk_path, store_fmt=store, fs_kwargs=fs_kwargs)
2333
+
2334
+ # actually make template inputs/resources persistent, now the workflow exists:
2335
+ grabber.write_persistence_data_to_workflow(wk)
2336
+
2337
+ if template.source_file:
2338
+ wk.artifacts_path.mkdir(exist_ok=False)
2339
+ src = Path(template.source_file)
2340
+ shutil.copy(src, wk.artifacts_path.joinpath(src.name))
2341
+
2342
+ return wk
2343
+
2344
+ def zip(
2345
+ self,
2346
+ path: str = ".",
2347
+ *,
2348
+ log: str | None = None,
2349
+ overwrite: bool = False,
2350
+ include_execute: bool = False,
2351
+ include_rechunk_backups: bool = False,
2352
+ ) -> str:
2353
+ """
2354
+ Convert the workflow to a zipped form.
2355
+
2356
+ Parameters
2357
+ ----------
2358
+ path:
2359
+ Path at which to create the new zipped workflow. If this is an existing
2360
+ directory, the zip file will be created within this directory. Otherwise,
2361
+ this path is assumed to be the full file path to the new zip file.
2362
+ """
2363
+ return self._store.zip(
2364
+ path=path,
2365
+ log=log,
2366
+ overwrite=overwrite,
2367
+ include_execute=include_execute,
2368
+ include_rechunk_backups=include_rechunk_backups,
2369
+ )
2370
+
2371
+ def unzip(self, path: str = ".", *, log: str | None = None) -> str:
2372
+ """
2373
+ Convert the workflow to an unzipped form.
2374
+
2375
+ Parameters
2376
+ ----------
2377
+ path:
2378
+ Path at which to create the new unzipped workflow. If this is an existing
2379
+ directory, the new workflow directory will be created within this directory.
2380
+ Otherwise, this path will represent the new workflow directory path.
2381
+ """
2382
+ return self._store.unzip(path=path, log=log)
2383
+
2384
+ def copy(self, path: str | Path = ".") -> Path:
2385
+ """Copy the workflow to a new path and return the copied workflow path."""
2386
+ return self._store.copy(path)
2387
+
2388
+ def delete(self) -> None:
2389
+ """
2390
+ Delete the persistent data.
2391
+ """
2392
+ self._store.delete()
2393
+
2394
+ def _delete_no_confirm(self) -> None:
2395
+ self._store.delete_no_confirm()
2396
+
2397
+ def get_parameters(self, id_lst: Iterable[int], **kwargs) -> Sequence[StoreParameter]:
2398
+ """
2399
+ Get parameters known to the workflow.
2400
+
2401
+ Parameter
2402
+ ---------
2403
+ id_lst:
2404
+ The indices of the parameters to retrieve.
2405
+
2406
+ Keyword Arguments
2407
+ -----------------
2408
+ dataset_copy: bool
2409
+ For Zarr stores only. If True, copy arrays as NumPy arrays.
2410
+ """
2411
+ return self._store.get_parameters(id_lst, **kwargs)
2412
+
2413
+ @TimeIt.decorator
2414
+ def get_parameter_sources(self, id_lst: Iterable[int]) -> list[ParamSource]:
2415
+ """
2416
+ Get parameter sources known to the workflow.
2417
+ """
2418
+ return self._store.get_parameter_sources(id_lst)
2419
+
2420
+ @TimeIt.decorator
2421
+ def get_parameter_set_statuses(self, id_lst: Iterable[int]) -> list[bool]:
2422
+ """
2423
+ Get whether some parameters are set.
2424
+ """
2425
+ return self._store.get_parameter_set_statuses(id_lst)
2426
+
2427
+ @TimeIt.decorator
2428
+ def get_parameter(self, index: int, **kwargs) -> StoreParameter:
2429
+ """
2430
+ Get a single parameter.
2431
+
2432
+ Parameter
2433
+ ---------
2434
+ index:
2435
+ The index of the parameter to retrieve.
2436
+
2437
+ Keyword Arguments
2438
+ -----------------
2439
+ dataset_copy: bool
2440
+ For Zarr stores only. If True, copy arrays as NumPy arrays.
2441
+ """
2442
+ return self.get_parameters((index,), **kwargs)[0]
2443
+
2444
+ @TimeIt.decorator
2445
+ def get_parameter_data(self, index: int, **kwargs) -> Any:
2446
+ """
2447
+ Get the data relating to a parameter.
2448
+ """
2449
+ param = self.get_parameter(index, **kwargs)
2450
+ if param.data is not None:
2451
+ return param.data
2452
+ else:
2453
+ return param.file
2454
+
2455
+ @TimeIt.decorator
2456
+ def get_parameter_source(self, index: int) -> ParamSource:
2457
+ """
2458
+ Get the source of a particular parameter.
2459
+ """
2460
+ return self.get_parameter_sources((index,))[0]
2461
+
2462
+ @TimeIt.decorator
2463
+ def is_parameter_set(self, index: int) -> bool:
2464
+ """
2465
+ Test if a particular parameter is set.
2466
+ """
2467
+ return self.get_parameter_set_statuses((index,))[0]
2468
+
2469
+ @TimeIt.decorator
2470
+ def get_all_parameters(self, **kwargs) -> list[StoreParameter]:
2471
+ """
2472
+ Retrieve all persistent parameters.
2473
+
2474
+ Keyword Arguments
2475
+ -----------------
2476
+ dataset_copy: bool
2477
+ For Zarr stores only. If True, copy arrays as NumPy arrays.
2478
+ """
2479
+ num_params = self._store._get_num_total_parameters()
2480
+ return self._store.get_parameters(range(num_params), **kwargs)
2481
+
2482
+ @TimeIt.decorator
2483
+ def get_all_parameter_sources(self, **kwargs) -> list[ParamSource]:
2484
+ """Retrieve all persistent parameters sources."""
2485
+ num_params = self._store._get_num_total_parameters()
2486
+ return self._store.get_parameter_sources(range(num_params), **kwargs)
2487
+
2488
+ @TimeIt.decorator
2489
+ def get_all_parameter_data(self, **kwargs) -> dict[int, Any]:
2490
+ """
2491
+ Retrieve all workflow parameter data.
2492
+
2493
+ Keyword Arguments
2494
+ -----------------
2495
+ dataset_copy: bool
2496
+ For Zarr stores only. If True, copy arrays as NumPy arrays.
2497
+ """
2498
+ return {
2499
+ param.id_: (param.data if param.data is not None else param.file)
2500
+ for param in self.get_all_parameters(**kwargs)
2501
+ }
2502
+
2503
+ def check_parameters_exist(self, id_lst: int | list[int]) -> bool:
2504
+ """
2505
+ Check if all the parameters exist.
2506
+ """
2507
+ if isinstance(id_lst, int):
2508
+ return next(iter(self._store.check_parameters_exist((id_lst,))))
2509
+ return all(self._store.check_parameters_exist(id_lst))
2510
+
2511
+ @TimeIt.decorator
2512
+ def _add_unset_parameter_data(self, source: ParamSource) -> int:
2513
+ # TODO: use this for unset files as well
2514
+ return self._store.add_unset_parameter(source)
2515
+
2516
+ def _add_parameter_data(self, data, source: ParamSource) -> int:
2517
+ return self._store.add_set_parameter(data, source)
2518
+
2519
+ def _add_file(
2520
+ self,
2521
+ *,
2522
+ store_contents: bool,
2523
+ is_input: bool,
2524
+ source: ParamSource,
2525
+ path=None,
2526
+ contents=None,
2527
+ filename: str,
2528
+ ) -> int:
2529
+ return self._store.add_file(
2530
+ store_contents=store_contents,
2531
+ is_input=is_input,
2532
+ source=source,
2533
+ path=path,
2534
+ contents=contents,
2535
+ filename=filename,
2536
+ )
2537
+
2538
+ def _set_file(
2539
+ self,
2540
+ param_id: int | list[int] | None,
2541
+ store_contents: bool,
2542
+ is_input: bool,
2543
+ path: Path | str,
2544
+ contents=None,
2545
+ filename: str | None = None,
2546
+ clean_up: bool = False,
2547
+ ) -> None:
2548
+ self._store.set_file(
2549
+ param_id=cast("int", param_id),
2550
+ store_contents=store_contents,
2551
+ is_input=is_input,
2552
+ path=path,
2553
+ contents=contents,
2554
+ filename=filename,
2555
+ clean_up=clean_up,
2556
+ )
2557
+
2558
+ @overload
2559
+ def get_task_unique_names(
2560
+ self, map_to_insert_ID: Literal[False] = False
2561
+ ) -> Sequence[str]: ...
2562
+
2563
+ @overload
2564
+ def get_task_unique_names(
2565
+ self, map_to_insert_ID: Literal[True]
2566
+ ) -> Mapping[str, int]: ...
2567
+
2568
+ def get_task_unique_names(
2569
+ self, map_to_insert_ID: bool = False
2570
+ ) -> Sequence[str] | Mapping[str, int]:
2571
+ """Return the unique names of all workflow tasks.
2572
+
2573
+ Parameters
2574
+ ----------
2575
+ map_to_insert_ID : bool
2576
+ If True, return a dict whose values are task insert IDs, otherwise return a
2577
+ list.
2578
+
2579
+ """
2580
+ names = self._app.Task.get_task_unique_names(self.template.tasks)
2581
+ if map_to_insert_ID:
2582
+ return dict(zip(names, (task.insert_ID for task in self.template.tasks)))
2583
+ else:
2584
+ return names
2585
+
2586
+ def _get_new_task_unique_name(self, new_task: Task, new_index: int) -> str:
2587
+ task_templates = list(self.template.tasks)
2588
+ task_templates.insert(new_index, new_task)
2589
+ uniq_names = self._app.Task.get_task_unique_names(task_templates)
2590
+
2591
+ return uniq_names[new_index]
2592
+
2593
+ def _get_empty_pending(self) -> Pending:
2594
+ return {
2595
+ "template_components": {k: [] for k in TEMPLATE_COMP_TYPES},
2596
+ "tasks": [], # list of int
2597
+ "loops": [], # list of int
2598
+ "submissions": [], # list of int
2599
+ }
2600
+
2601
+ def _accept_pending(self) -> None:
2602
+ self._reset_pending()
2603
+
2604
+ def _reset_pending(self) -> None:
2605
+ self._pending = self._get_empty_pending()
2606
+
2607
+ def _reject_pending(self) -> None:
2608
+ """Revert pending changes to the in-memory representation of the workflow.
2609
+
2610
+ This deletes new tasks, new template component data, new loops, and new
2611
+ submissions. Element additions to existing (non-pending) tasks are separately
2612
+ rejected/accepted by the WorkflowTask object.
2613
+
2614
+ """
2615
+ for task_idx in self._pending["tasks"][::-1]:
2616
+ # iterate in reverse so the index references are correct
2617
+ self.tasks._remove_object(task_idx)
2618
+ self.template.tasks.pop(task_idx)
2619
+
2620
+ for comp_type, comp_indices in self._pending["template_components"].items():
2621
+ for comp_idx in comp_indices[::-1]:
2622
+ # iterate in reverse so the index references are correct
2623
+ tc = self.__template_components[comp_type]
2624
+ assert hasattr(tc, "_remove_object")
2625
+ tc._remove_object(comp_idx)
2626
+
2627
+ for loop_idx in self._pending["loops"][::-1]:
2628
+ # iterate in reverse so the index references are correct
2629
+ self.loops._remove_object(loop_idx)
2630
+ self.template.loops.pop(loop_idx)
2631
+
2632
+ for sub_idx in self._pending["submissions"][::-1]:
2633
+ # iterate in reverse so the index references are correct
2634
+ assert self._submissions is not None
2635
+ self._submissions.pop(sub_idx)
2636
+
2637
+ self._reset_pending()
2638
+
2639
+ @property
2640
+ def num_tasks(self) -> int:
2641
+ """
2642
+ The total number of tasks.
2643
+ """
2644
+ return self._store._get_num_total_tasks()
2645
+
2646
+ @property
2647
+ def num_submissions(self) -> int:
2648
+ """
2649
+ The total number of job submissions.
2650
+ """
2651
+ return (
2652
+ len(self._submissions)
2653
+ if self._submissions is not None
2654
+ else self._store._get_num_total_submissions()
2655
+ )
2656
+
2657
+ @property
2658
+ def num_elements(self) -> int:
2659
+ """
2660
+ The total number of elements.
2661
+ """
2662
+ return self._store._get_num_total_elements()
2663
+
2664
+ @property
2665
+ def num_element_iterations(self) -> int:
2666
+ """
2667
+ The total number of element iterations.
2668
+ """
2669
+ return self._store._get_num_total_elem_iters()
2670
+
2671
+ @property
2672
+ @TimeIt.decorator
2673
+ def num_EARs(self) -> int:
2674
+ """
2675
+ The total number of element action runs.
2676
+ """
2677
+ return self._store._get_num_total_EARs()
2678
+
2679
+ @property
2680
+ def num_loops(self) -> int:
2681
+ """
2682
+ The total number of loops.
2683
+ """
2684
+ return self._store._get_num_total_loops()
2685
+
2686
+ @property
2687
+ def artifacts_path(self) -> Path:
2688
+ """
2689
+ Path to artifacts of the workflow (temporary files, etc).
2690
+ """
2691
+ # TODO: allow customisation of artifacts path at submission and resources level
2692
+ return Path(self.path) / "artifacts"
2693
+
2694
+ @property
2695
+ def input_files_path(self) -> Path:
2696
+ """
2697
+ Path to input files for the workflow.
2698
+ """
2699
+ return self.artifacts_path / self._input_files_dir_name
2700
+
2701
+ @property
2702
+ def submissions_path(self) -> Path:
2703
+ """
2704
+ Path to submission data for ths workflow.
2705
+ """
2706
+ return self.artifacts_path / "submissions"
2707
+
2708
+ @property
2709
+ def task_artifacts_path(self) -> Path:
2710
+ """
2711
+ Path to artifacts of tasks.
2712
+ """
2713
+ return self.artifacts_path / "tasks"
2714
+
2715
+ @property
2716
+ def execution_path(self) -> Path:
2717
+ """
2718
+ Path to working directory path for executing.
2719
+ """
2720
+ return Path(self.path) / self._exec_dir_name
2721
+
2722
+ @TimeIt.decorator
2723
+ def get_task_elements(
2724
+ self,
2725
+ task: WorkflowTask,
2726
+ idx_lst: list[int] | None = None,
2727
+ ) -> list[Element]:
2728
+ """
2729
+ Get the elements of a task.
2730
+ """
2731
+ return [
2732
+ self._app.Element(
2733
+ task=task, **{k: v for k, v in te.items() if k != "task_ID"}
2734
+ )
2735
+ for te in self._store.get_task_elements(task.insert_ID, idx_lst)
2736
+ ]
2737
+
2738
+ def set_EAR_start(
2739
+ self, run_id: int, run_dir: Path | None, port_number: int | None
2740
+ ) -> None:
2741
+ """Set the start time on an EAR."""
2742
+ self._app.logger.debug(f"Setting start for EAR ID {run_id!r}")
2743
+ with self._store.cached_load(), self.batch_update():
2744
+ self._store.set_EAR_start(run_id, run_dir, port_number)
2745
+
2746
+ def set_multi_run_starts(
2747
+ self, run_ids: list[int], run_dirs: list[Path | None], port_number: int
2748
+ ) -> None:
2749
+ """Set the start time on multiple runs."""
2750
+ self._app.logger.debug(f"Setting start for multiple run IDs {run_ids!r}")
2751
+ with self._store.cached_load(), self.batch_update():
2752
+ self._store.set_multi_run_starts(run_ids, run_dirs, port_number)
2753
+
2754
+ def set_EAR_end(
2755
+ self,
2756
+ block_act_key: BlockActionKey,
2757
+ run: ElementActionRun,
2758
+ exit_code: int,
2759
+ ) -> None:
2760
+ """Set the end time and exit code on an EAR.
2761
+
2762
+ If the exit code is non-zero, also set all downstream dependent EARs to be
2763
+ skipped. Also save any generated input/output files.
2764
+
2765
+ """
2766
+ self._app.logger.debug(
2767
+ f"Setting end for run ID {run.id_!r} with exit code {exit_code!r}."
2768
+ )
2769
+ param_id: int | list[int] | None
2770
+ with self._store.cached_load(), self.batch_update():
2771
+ success = exit_code == 0 # TODO more sophisticated success heuristics
2772
+ if not run.skip:
2773
+
2774
+ is_aborted = False
2775
+ if run.action.abortable and exit_code == ABORT_EXIT_CODE:
2776
+ # the point of aborting an EAR is to continue with the workflow:
2777
+ is_aborted = True
2778
+ success = True
2779
+
2780
+ run_dir = run.get_directory()
2781
+ if run_dir:
2782
+ assert isinstance(run_dir, Path)
2783
+ for IFG_i in run.action.input_file_generators:
2784
+ inp_file = IFG_i.input_file
2785
+ self._app.logger.debug(
2786
+ f"Saving EAR input file: {inp_file.label!r} for EAR ID "
2787
+ f"{run.id_!r}."
2788
+ )
2789
+ param_id = run.data_idx[f"input_files.{inp_file.label}"]
2790
+
2791
+ file_paths = inp_file.value(directory=run_dir)
2792
+ for path_i in (
2793
+ file_paths if isinstance(file_paths, list) else [file_paths]
2794
+ ):
2795
+ full_path = run_dir.joinpath(path_i)
2796
+ if not full_path.exists():
2797
+ self._app.logger.debug(
2798
+ f"expected input file {path_i!r} does not "
2799
+ f"exist, so setting run to an error state "
2800
+ f"(if not aborted)."
2801
+ )
2802
+ if not is_aborted and success is True:
2803
+ # this is unlikely to happen, but could happen
2804
+ # if the input file is deleted in between
2805
+ # the input file generator completing and this
2806
+ # code being run
2807
+ success = False
2808
+ exit_code = 1 # TODO more custom exit codes?
2809
+ else:
2810
+ self._set_file(
2811
+ param_id=param_id,
2812
+ store_contents=True, # TODO: make optional according to IFG
2813
+ is_input=False,
2814
+ path=full_path,
2815
+ )
2816
+
2817
+ if run.action.script_data_out_has_files:
2818
+ try:
2819
+ run._param_save("script", block_act_key, run_dir)
2820
+ except FileNotFoundError:
2821
+ self._app.logger.debug(
2822
+ f"script did not generate an expected output parameter "
2823
+ f"file (block_act_key={block_act_key!r}), so setting run "
2824
+ f"to an error state (if not aborted)."
2825
+ )
2826
+ if not is_aborted and success is True:
2827
+ success = False
2828
+ exit_code = 1 # TODO more custom exit codes?
2829
+
2830
+ if run.action.program_data_out_has_files:
2831
+ try:
2832
+ run._param_save("program", block_act_key, run_dir)
2833
+ except FileNotFoundError:
2834
+ self._app.logger.debug(
2835
+ f"program did not generate an expected output parameter "
2836
+ f"file (block_act_key={block_act_key!r}), so setting run "
2837
+ f"to an error state (if not aborted)."
2838
+ )
2839
+ if not is_aborted and success is True:
2840
+ success = False
2841
+ exit_code = 1 # TODO more custom exit codes?
2842
+
2843
+ # Save action-level files: (TODO: refactor with below for OFPs)
2844
+ for save_file_j in run.action.save_files:
2845
+ self._app.logger.debug(
2846
+ f"Saving file: {save_file_j.label!r} for EAR ID "
2847
+ f"{run.id_!r}."
2848
+ )
2849
+ try:
2850
+ param_id = run.data_idx[f"output_files.{save_file_j.label}"]
2851
+ except KeyError:
2852
+ # We might be saving a file that is not a defined
2853
+ # "output file"; this will avoid saving a reference in the
2854
+ # parameter data:
2855
+ param_id = None
2856
+
2857
+ file_paths = save_file_j.value(directory=run_dir)
2858
+ self._app.logger.debug(
2859
+ f"Saving output file paths: {file_paths!r}"
2860
+ )
2861
+
2862
+ for path_i in (
2863
+ file_paths if isinstance(file_paths, list) else [file_paths]
2864
+ ):
2865
+ full_path = run_dir.joinpath(path_i)
2866
+ if not full_path.exists():
2867
+ self._app.logger.debug(
2868
+ f"expected file to save {path_i!r} does not "
2869
+ f"exist, so setting run to an error state "
2870
+ f"(if not aborted)."
2871
+ )
2872
+ if not is_aborted and success is True:
2873
+ # this is unlikely to happen, but could happen
2874
+ # if the input file is deleted in between
2875
+ # the input file generator completing and this
2876
+ # code being run
2877
+ success = False
2878
+ exit_code = 1 # TODO more custom exit codes?
2879
+ else:
2880
+ self._set_file(
2881
+ param_id=param_id,
2882
+ store_contents=True,
2883
+ is_input=False,
2884
+ path=full_path,
2885
+ clean_up=(save_file_j in run.action.clean_up),
2886
+ )
2887
+
2888
+ for OFP_i in run.action.output_file_parsers:
2889
+ for save_file_j in OFP_i._save_files:
2890
+ self._app.logger.debug(
2891
+ f"Saving EAR output file: {save_file_j.label!r} for EAR ID "
2892
+ f"{run.id_!r}."
2893
+ )
2894
+ try:
2895
+ param_id = run.data_idx[
2896
+ f"output_files.{save_file_j.label}"
2897
+ ]
2898
+ except KeyError:
2899
+ # We might be saving a file that is not a defined
2900
+ # "output file"; this will avoid saving a reference in the
2901
+ # parameter data:
2902
+ param_id = None
2903
+
2904
+ file_paths = save_file_j.value(directory=run_dir)
2905
+ self._app.logger.debug(
2906
+ f"Saving EAR output file paths: {file_paths!r}"
2907
+ )
2908
+
2909
+ for path_i in (
2910
+ file_paths
2911
+ if isinstance(file_paths, list)
2912
+ else [file_paths]
2913
+ ):
2914
+ full_path = run_dir.joinpath(path_i)
2915
+ if not full_path.exists():
2916
+ self._app.logger.debug(
2917
+ f"expected output file parser `save_files` file "
2918
+ f"{path_i!r} does not exist, so setting run "
2919
+ f"to an error state (if not aborted)."
2920
+ )
2921
+ if not is_aborted and success is True:
2922
+ success = False
2923
+ exit_code = 1 # TODO more custom exit codes?
2924
+ else:
2925
+ self._set_file(
2926
+ param_id=param_id,
2927
+ store_contents=True, # TODO: make optional according to OFP
2928
+ is_input=False,
2929
+ path=full_path,
2930
+ clean_up=(save_file_j in OFP_i.clean_up),
2931
+ )
2932
+
2933
+ if (
2934
+ run.resources.skip_downstream_on_failure
2935
+ and not success
2936
+ and run.skip_reason is not SkipReason.LOOP_TERMINATION
2937
+ ):
2938
+ # loop termination skips are already propagated
2939
+ for EAR_dep_ID in run.get_dependent_EARs(as_objects=False):
2940
+ self._app.logger.debug(
2941
+ f"Setting EAR ID {EAR_dep_ID!r} to skip because it depends on"
2942
+ f" EAR ID {run.id_!r}, which exited with a non-zero exit code:"
2943
+ f" {exit_code!r}."
2944
+ )
2945
+ self._store.set_EAR_skip(
2946
+ {EAR_dep_ID: SkipReason.UPSTREAM_FAILURE.value}
2947
+ )
2948
+
2949
+ self._store.set_EAR_end(run.id_, exit_code, success, run.action.requires_dir)
2950
+
2951
+ def set_multi_run_ends(
2952
+ self,
2953
+ runs: dict[
2954
+ BlockActionKey,
2955
+ list[tuple[ElementActionRun, int, Path | None]],
2956
+ ],
2957
+ ) -> None:
2958
+ """Set end times and exit codes on multiple runs.
2959
+
2960
+ If the exit code is non-zero, also set all downstream dependent runs to be
2961
+ skipped. Also save any generated input/output files."""
2962
+
2963
+ self._app.logger.debug(f"Setting end for multiple run IDs.")
2964
+ param_id: int | list[int] | None
2965
+ with self._store.cached_load(), self.batch_update():
2966
+ run_ids = []
2967
+ run_dirs = []
2968
+ exit_codes = []
2969
+ successes = []
2970
+ for block_act_key, run_dat in runs.items():
2971
+ for run, exit_code, run_dir in run_dat:
2972
+
2973
+ success = (
2974
+ exit_code == 0
2975
+ ) # TODO more sophisticated success heuristics
2976
+ self._app.logger.info(
2977
+ f"setting end for run {run.id_} with exit_code={exit_code}, "
2978
+ f"success={success}, skip={run.skip!r}, and skip_reason="
2979
+ f"{run.skip_reason!r}."
2980
+ )
2981
+ if not run.skip:
2982
+ self._app.logger.info(f"run was not skipped.")
2983
+ is_aborted = False
2984
+ if run.action.abortable and exit_code == ABORT_EXIT_CODE:
2985
+ # the point of aborting an EAR is to continue with the
2986
+ # workflow:
2987
+ self._app.logger.info(
2988
+ "run was abortable and exit code was ABORT_EXIT_CODE,"
2989
+ " so setting success to True."
2990
+ )
2991
+ is_aborted = True
2992
+ success = True
2993
+
2994
+ run_dir = run.get_directory()
2995
+ if run_dir:
2996
+ assert isinstance(run_dir, Path)
2997
+ for IFG_i in run.action.input_file_generators:
2998
+ self._app.logger.info(f"setting IFG file {IFG_i!r}")
2999
+ inp_file = IFG_i.input_file
3000
+ self._app.logger.debug(
3001
+ f"Saving EAR input file: {inp_file.label!r} for EAR "
3002
+ f"ID {run.id_!r}."
3003
+ )
3004
+ param_id = run.data_idx[f"input_files.{inp_file.label}"]
3005
+
3006
+ file_paths = inp_file.value(directory=run_dir)
3007
+ for path_i in (
3008
+ file_paths
3009
+ if isinstance(file_paths, list)
3010
+ else [file_paths]
3011
+ ):
3012
+ full_path = run_dir.joinpath(path_i)
3013
+ if not full_path.exists():
3014
+ self._app.logger.debug(
3015
+ f"expected input file {path_i!r} does not "
3016
+ f"exist, so setting run to an error state "
3017
+ f"(if not aborted)."
3018
+ )
3019
+ if not is_aborted and success is True:
3020
+ # this is unlikely to happen, but could happen
3021
+ # if the input file is deleted in between
3022
+ # the input file generator completing and this
3023
+ # code being run
3024
+ success = False
3025
+ exit_code = 1 # TODO more custom exit codes?
3026
+ else:
3027
+ self._set_file(
3028
+ param_id=param_id,
3029
+ store_contents=True, # TODO: make optional according to IFG
3030
+ is_input=False,
3031
+ path=full_path,
3032
+ )
3033
+
3034
+ if run.action.script_data_out_has_files:
3035
+ self._app.logger.info(
3036
+ f"saving script-generated parameters."
3037
+ )
3038
+ try:
3039
+ run._param_save("script", block_act_key, run_dir)
3040
+ except FileNotFoundError:
3041
+ # script did not generate the output parameter file,
3042
+ # so set a failed exit code (if we did not abort the
3043
+ # run):
3044
+ self._app.logger.debug(
3045
+ f"script did not generate an expected output "
3046
+ f"parameter file (block_act_key="
3047
+ f"{block_act_key!r}), so setting run to an error "
3048
+ f"state (if not aborted)."
3049
+ )
3050
+ if not is_aborted and success is True:
3051
+ success = False
3052
+ exit_code = 1 # TODO more custom exit codes?
3053
+
3054
+ if run.action.program_data_out_has_files:
3055
+ self._app.logger.info(
3056
+ f"saving program-generated parameters."
3057
+ )
3058
+ try:
3059
+ run._param_save("program", block_act_key, run_dir)
3060
+ except FileNotFoundError:
3061
+ # program did not generate the output parameter file,
3062
+ # so set a failed exit code (if we did not abort the
3063
+ # run):
3064
+ self._app.logger.debug(
3065
+ f"program did not generate an expected output "
3066
+ f"parameter file (block_act_key="
3067
+ f"{block_act_key!r}), so setting run to an error "
3068
+ f"state (if not aborted)."
3069
+ )
3070
+ if not is_aborted and success is True:
3071
+ success = False
3072
+ exit_code = 1 # TODO more custom exit codes?
3073
+
3074
+ # Save action-level files: (TODO: refactor with below for OFPs)
3075
+ for save_file_j in run.action.save_files:
3076
+ self._app.logger.info(
3077
+ f"saving action-level file {save_file_j!r}."
3078
+ )
3079
+ self._app.logger.debug(
3080
+ f"Saving file: {save_file_j.label!r} for EAR ID "
3081
+ f"{run.id_!r}."
3082
+ )
3083
+ try:
3084
+ param_id = run.data_idx[
3085
+ f"output_files.{save_file_j.label}"
3086
+ ]
3087
+ except KeyError:
3088
+ # We might be saving a file that is not a defined
3089
+ # "output file"; this will avoid saving a reference in
3090
+ # the parameter data:
3091
+ param_id = None
3092
+
3093
+ file_paths = save_file_j.value(directory=run_dir)
3094
+ self._app.logger.debug(
3095
+ f"Saving output file paths: {file_paths!r}"
3096
+ )
3097
+ for path_i in (
3098
+ file_paths
3099
+ if isinstance(file_paths, list)
3100
+ else [file_paths]
3101
+ ):
3102
+ full_path = run_dir.joinpath(path_i)
3103
+ if not full_path.exists():
3104
+ self._app.logger.debug(
3105
+ f"expected file to save {path_i!r} does not "
3106
+ f"exist, so setting run to an error state "
3107
+ f"(if not aborted)."
3108
+ )
3109
+ if not is_aborted and success is True:
3110
+ # this is unlikely to happen, but could happen
3111
+ # if the input file is deleted in between
3112
+ # the input file generator completing and this
3113
+ # code being run
3114
+ success = False
3115
+ exit_code = 1 # TODO more custom exit codes?
3116
+ else:
3117
+ self._set_file(
3118
+ param_id=param_id,
3119
+ store_contents=True,
3120
+ is_input=False,
3121
+ path=full_path,
3122
+ clean_up=(save_file_j in run.action.clean_up),
3123
+ )
3124
+
3125
+ for OFP_i in run.action.output_file_parsers:
3126
+ self._app.logger.info(
3127
+ f"saving files from OFP: {OFP_i!r}."
3128
+ )
3129
+ for save_file_j in OFP_i._save_files:
3130
+ self._app.logger.debug(
3131
+ f"Saving EAR output file: {save_file_j.label!r} "
3132
+ f"for EAR ID {run.id_!r}."
3133
+ )
3134
+ try:
3135
+ param_id = run.data_idx[
3136
+ f"output_files.{save_file_j.label}"
3137
+ ]
3138
+ except KeyError:
3139
+ # We might be saving a file that is not a defined
3140
+ # "output file"; this will avoid saving a
3141
+ # reference in the parameter data:
3142
+ param_id = None
3143
+
3144
+ file_paths = save_file_j.value(directory=run_dir)
3145
+ self._app.logger.debug(
3146
+ f"Saving EAR output file paths: {file_paths!r}"
3147
+ )
3148
+
3149
+ for path_i in (
3150
+ file_paths
3151
+ if isinstance(file_paths, list)
3152
+ else [file_paths]
3153
+ ):
3154
+ full_path = run_dir.joinpath(path_i)
3155
+ if not full_path.exists():
3156
+ self._app.logger.debug(
3157
+ f"expected output file parser `save_files` file "
3158
+ f"{path_i!r} does not exist, so setting run "
3159
+ f"to an error state (if not aborted)."
3160
+ )
3161
+ if not is_aborted and success is True:
3162
+ success = False
3163
+ exit_code = (
3164
+ 1 # TODO more custom exit codes?
3165
+ )
3166
+ else:
3167
+ self._set_file(
3168
+ param_id=param_id,
3169
+ store_contents=True, # TODO: make optional according to OFP
3170
+ is_input=False,
3171
+ path=full_path,
3172
+ clean_up=(save_file_j in OFP_i.clean_up),
3173
+ )
3174
+
3175
+ else:
3176
+ self._app.logger.info(
3177
+ f"run was skipped: reason: {run.skip_reason!r}."
3178
+ )
3179
+
3180
+ if (
3181
+ run.resources.skip_downstream_on_failure
3182
+ and not success
3183
+ and run.skip_reason is not SkipReason.LOOP_TERMINATION
3184
+ ):
3185
+ # run failed
3186
+ self._app.logger.info(
3187
+ "run was not succcess and skip reason was not "
3188
+ "LOOP_TERMINATION."
3189
+ )
3190
+ # loop termination skips are already propagated
3191
+ for EAR_dep_ID in run.get_dependent_EARs(as_objects=False):
3192
+ # TODO: `get_dependent_EARs` seems to be stuck in a
3193
+ # recursion for some workflows
3194
+ # TODO: this needs to be recursive?
3195
+ self._app.logger.info(
3196
+ f"Setting EAR ID {EAR_dep_ID!r} to skip because it "
3197
+ f"depends on EAR ID {run.id_!r}, which exited with a "
3198
+ f"non-zero exit code: {exit_code!r}."
3199
+ )
3200
+ self._store.set_EAR_skip(
3201
+ {EAR_dep_ID: SkipReason.UPSTREAM_FAILURE.value}
3202
+ )
3203
+ else:
3204
+ self._app.logger.info(
3205
+ "`skip_downstream_on_failure` is False, run was "
3206
+ "succcess, or skip reason was LOOP_TERMINATION."
3207
+ )
3208
+
3209
+ run_ids.append(run.id_)
3210
+ run_dirs.append(run_dir)
3211
+ exit_codes.append(exit_code)
3212
+ successes.append(success)
3213
+
3214
+ self._store.set_multi_run_ends(run_ids, run_dirs, exit_codes, successes)
3215
+
3216
+ def set_EAR_skip(self, skip_reasons: dict[int, SkipReason]) -> None:
3217
+ """
3218
+ Record that an EAR is to be skipped due to an upstream failure or loop
3219
+ termination condition being met.
3220
+ """
3221
+ with self._store.cached_load(), self.batch_update():
3222
+ self._store.set_EAR_skip({k: v.value for k, v in skip_reasons.items()})
3223
+
3224
+ def get_EAR_skipped(self, EAR_ID: int) -> int:
3225
+ """Check if an EAR is to be skipped."""
3226
+ with self._store.cached_load():
3227
+ return self._store.get_EAR_skipped(EAR_ID)
3228
+
3229
+ @TimeIt.decorator
3230
+ def set_parameter_value(
3231
+ self, param_id: int | list[int], value: Any, commit: bool = False
3232
+ ) -> None:
3233
+ """
3234
+ Set the value of a parameter.
3235
+ """
3236
+ with self._store.cached_load(), self.batch_update():
3237
+ self._store.set_parameter_value(cast("int", param_id), value)
3238
+
3239
+ if commit:
3240
+ # force commit now:
3241
+ self._store._pending.commit_all()
3242
+
3243
+ @TimeIt.decorator
3244
+ def set_parameter_values(self, values: dict[int, Any], commit: bool = False) -> None:
3245
+ with self._store.cached_load(), self.batch_update(), self._store.cache_ctx():
3246
+ self._store.set_parameter_values(values)
3247
+
3248
+ if commit:
3249
+ # force commit now:
3250
+ self._store._pending.commit_all()
3251
+
3252
+ def set_EARs_initialised(self, iter_ID: int) -> None:
3253
+ """
3254
+ Set :py:attr:`~hpcflow.app.ElementIteration.EARs_initialised` to True for the
3255
+ specified iteration.
3256
+ """
3257
+ with self._store.cached_load(), self.batch_update():
3258
+ self._store.set_EARs_initialised(iter_ID)
3259
+
3260
+ def elements(self) -> Iterator[Element]:
3261
+ """
3262
+ Get the elements of the workflow's tasks.
3263
+ """
3264
+ for task in self.tasks:
3265
+ for element in task.elements[:]:
3266
+ yield element
3267
+
3268
+ @overload
3269
+ def get_iteration_task_pathway(
3270
+ self,
3271
+ *,
3272
+ ret_iter_IDs: Literal[False] = False,
3273
+ ret_data_idx: Literal[False] = False,
3274
+ ) -> Sequence[tuple[int, LoopIndex[str, int]]]: ...
3275
+
3276
+ @overload
3277
+ def get_iteration_task_pathway(
3278
+ self, *, ret_iter_IDs: Literal[False] = False, ret_data_idx: Literal[True]
3279
+ ) -> Sequence[tuple[int, LoopIndex[str, int], tuple[Mapping[str, int], ...]]]: ...
3280
+
3281
+ @overload
3282
+ def get_iteration_task_pathway(
3283
+ self, *, ret_iter_IDs: Literal[True], ret_data_idx: Literal[False] = False
3284
+ ) -> Sequence[tuple[int, LoopIndex[str, int], tuple[int, ...]]]: ...
3285
+
3286
+ @overload
3287
+ def get_iteration_task_pathway(
3288
+ self, *, ret_iter_IDs: Literal[True], ret_data_idx: Literal[True]
3289
+ ) -> Sequence[
3290
+ tuple[int, LoopIndex[str, int], tuple[int, ...], tuple[Mapping[str, int], ...]]
3291
+ ]: ...
3292
+
3293
+ @TimeIt.decorator
3294
+ def get_iteration_task_pathway(
3295
+ self, ret_iter_IDs: bool = False, ret_data_idx: bool = False
3296
+ ) -> Sequence[tuple]:
3297
+ """
3298
+ Get the iteration task pathway.
3299
+ """
3300
+ pathway: list[_Pathway] = []
3301
+ for task in self.tasks:
3302
+ pathway.append(_Pathway(task.insert_ID))
3303
+
3304
+ added_loop_names: set[str] = set()
3305
+ for _ in range(self.num_loops):
3306
+ for loop in self.loops:
3307
+ if loop.name in added_loop_names:
3308
+ continue
3309
+ elif set(loop.parents).issubset(added_loop_names):
3310
+ # add a loop only once their parents have been added:
3311
+ to_add = loop
3312
+ break
3313
+ else:
3314
+ raise RuntimeError(
3315
+ "Failed to find a loop whose parents have already been added to the "
3316
+ "iteration task pathway."
3317
+ )
3318
+
3319
+ iIDs = to_add.task_insert_IDs
3320
+ relevant_idx = (
3321
+ idx for idx, path_i in enumerate(pathway) if path_i.id_ in iIDs
3322
+ )
3323
+
3324
+ for num_add_k, num_add in to_add.num_added_iterations.items():
3325
+ parent_loop_idx = list(zip(to_add.parents, num_add_k))
3326
+ replacement: list[_Pathway] = []
3327
+ repl_idx: list[int] = []
3328
+ for i in range(num_add):
3329
+ for p_idx, path in enumerate(pathway):
3330
+ if path.id_ not in iIDs:
3331
+ continue
3332
+ if all(path.names[k] == v for k, v in parent_loop_idx):
3333
+ new_path = copy.deepcopy(path)
3334
+ new_path.names += {to_add.name: i}
3335
+ repl_idx.append(p_idx)
3336
+ replacement.append(new_path)
3337
+
3338
+ if replacement:
3339
+ pathway = replace_items(
3340
+ pathway, min(repl_idx), max(repl_idx) + 1, replacement
3341
+ )
3342
+
3343
+ added_loop_names.add(to_add.name)
3344
+
3345
+ if added_loop_names != set(loop.name for loop in self.loops):
3346
+ raise RuntimeError(
3347
+ "Not all loops have been considered in the iteration task pathway."
3348
+ )
3349
+
3350
+ if ret_iter_IDs or ret_data_idx:
3351
+ all_iters = self.get_all_element_iterations()
3352
+ for path_i in pathway:
3353
+ i_iters = [
3354
+ iter_j
3355
+ for iter_j in all_iters
3356
+ if (
3357
+ iter_j.task.insert_ID == path_i.id_
3358
+ and iter_j.loop_idx == path_i.names
3359
+ )
3360
+ ]
3361
+ if ret_iter_IDs:
3362
+ path_i.iter_ids.extend(elit.id_ for elit in i_iters)
3363
+ if ret_data_idx:
3364
+ path_i.data_idx.extend(elit.get_data_idx() for elit in i_iters)
3365
+
3366
+ return [
3367
+ path.as_tuple(ret_iter_IDs=ret_iter_IDs, ret_data_idx=ret_data_idx)
3368
+ for path in pathway
3369
+ ]
3370
+
3371
+ @TimeIt.decorator
3372
+ def _submit(
3373
+ self,
3374
+ status: Status | None = None,
3375
+ ignore_errors: bool = False,
3376
+ JS_parallelism: bool | Literal["direct", "scheduled"] | None = None,
3377
+ print_stdout: bool = False,
3378
+ add_to_known: bool = True,
3379
+ tasks: Sequence[int] | None = None,
3380
+ ) -> tuple[Sequence[SubmissionFailure], Mapping[int, Sequence[int]]]:
3381
+ """Submit outstanding EARs for execution."""
3382
+
3383
+ # generate a new submission if there are no pending submissions:
3384
+ if not (pending := [sub for sub in self.submissions if sub.needs_submit]):
3385
+ if status:
3386
+ status.update("Adding new submission...")
3387
+ if not (
3388
+ new_sub := self._add_submission(
3389
+ tasks=tasks,
3390
+ JS_parallelism=JS_parallelism,
3391
+ status=status,
3392
+ )
3393
+ ):
3394
+ if status:
3395
+ status.stop()
3396
+ raise ValueError("No pending element action runs to submit!")
3397
+ pending = [new_sub]
3398
+
3399
+ self.execution_path.mkdir(exist_ok=True, parents=True)
3400
+ self.task_artifacts_path.mkdir(exist_ok=True, parents=True)
3401
+
3402
+ # the submission must be persistent at submit-time, because it will be read by a
3403
+ # new instance of the app:
3404
+ if status:
3405
+ status.update("Committing to the store...")
3406
+ self._store._pending.commit_all()
3407
+
3408
+ # submit all pending submissions:
3409
+ exceptions: list[SubmissionFailure] = []
3410
+ submitted_js: dict[int, list[int]] = {}
3411
+ for sub in pending:
3412
+ try:
3413
+ if status:
3414
+ status.update(f"Preparing submission {sub.index}...")
3415
+ sub_js_idx = sub.submit(
3416
+ status=status,
3417
+ ignore_errors=ignore_errors,
3418
+ print_stdout=print_stdout,
3419
+ add_to_known=add_to_known,
3420
+ )
3421
+ submitted_js[sub.index] = sub_js_idx
3422
+ except SubmissionFailure as exc:
3423
+ exceptions.append(exc)
3424
+
3425
+ return exceptions, submitted_js
3426
+
3427
+ @overload
3428
+ def submit(
3429
+ self,
3430
+ *,
3431
+ ignore_errors: bool = False,
3432
+ JS_parallelism: bool | Literal["direct", "scheduled"] | None = None,
3433
+ print_stdout: bool = False,
3434
+ wait: bool = False,
3435
+ add_to_known: bool = True,
3436
+ return_idx: Literal[True],
3437
+ tasks: list[int] | None = None,
3438
+ cancel: bool = False,
3439
+ status: bool = True,
3440
+ ) -> Mapping[int, Sequence[int]]: ...
3441
+
3442
+ @overload
3443
+ def submit(
3444
+ self,
3445
+ *,
3446
+ ignore_errors: bool = False,
3447
+ JS_parallelism: bool | Literal["direct", "scheduled"] | None = None,
3448
+ print_stdout: bool = False,
3449
+ wait: bool = False,
3450
+ add_to_known: bool = True,
3451
+ return_idx: Literal[False] = False,
3452
+ tasks: list[int] | None = None,
3453
+ cancel: bool = False,
3454
+ status: bool = True,
3455
+ ) -> None: ...
3456
+
3457
+ def submit(
3458
+ self,
3459
+ *,
3460
+ ignore_errors: bool = False,
3461
+ JS_parallelism: bool | Literal["direct", "scheduled"] | None = None,
3462
+ print_stdout: bool = False,
3463
+ wait: bool = False,
3464
+ add_to_known: bool = True,
3465
+ return_idx: bool = False,
3466
+ tasks: list[int] | None = None,
3467
+ cancel: bool = False,
3468
+ status: bool = True,
3469
+ ) -> Mapping[int, Sequence[int]] | None:
3470
+ """Submit the workflow for execution.
3471
+
3472
+ Parameters
3473
+ ----------
3474
+ ignore_errors
3475
+ If True, ignore jobscript submission errors. If False (the default) jobscript
3476
+ submission will halt when a jobscript fails to submit.
3477
+ JS_parallelism
3478
+ If True, allow multiple jobscripts to execute simultaneously. If
3479
+ 'scheduled'/'direct', only allow simultaneous execution of scheduled/direct
3480
+ jobscripts. Raises if set to True, 'scheduled', or 'direct', but the store
3481
+ type does not support the `jobscript_parallelism` feature. If not set,
3482
+ jobscript parallelism will be used if the store type supports it, for
3483
+ scheduled jobscripts only.
3484
+ print_stdout
3485
+ If True, print any jobscript submission standard output, otherwise hide it.
3486
+ wait
3487
+ If True, this command will block until the workflow execution is complete.
3488
+ add_to_known
3489
+ If True, add the submitted submissions to the known-submissions file, which is
3490
+ used by the `show` command to monitor current and recent submissions.
3491
+ return_idx
3492
+ If True, return a dict representing the jobscript indices submitted for each
3493
+ submission.
3494
+ tasks
3495
+ List of task indices to include in the new submission if no submissions
3496
+ already exist. By default all tasks are included if a new submission is
3497
+ created.
3498
+ cancel
3499
+ Immediately cancel the submission. Useful for testing and benchmarking.
3500
+ status
3501
+ If True, display a live status to track submission progress.
3502
+ """
3503
+
3504
+ # Type hint for mypy
3505
+ status_context: AbstractContextManager[Status] | AbstractContextManager[None] = (
3506
+ rich.console.Console().status("Submitting workflow...")
3507
+ if status
3508
+ else nullcontext()
3509
+ )
3510
+ with status_context as status_, self._store.cached_load():
3511
+ if not self._store.is_submittable:
3512
+ raise NotImplementedError("The workflow is not submittable.")
3513
+ # commit updates before raising exception:
3514
+ with (
3515
+ self.batch_update(),
3516
+ self._store.parameters_metadata_cache(),
3517
+ self._store.cache_ctx(),
3518
+ ):
3519
+ exceptions, submitted_js = self._submit(
3520
+ ignore_errors=ignore_errors,
3521
+ JS_parallelism=JS_parallelism,
3522
+ print_stdout=print_stdout,
3523
+ status=status_,
3524
+ add_to_known=add_to_known,
3525
+ tasks=tasks,
3526
+ )
3527
+
3528
+ if exceptions:
3529
+ raise WorkflowSubmissionFailure(exceptions)
3530
+
3531
+ if cancel:
3532
+ self.cancel(status=status)
3533
+
3534
+ elif wait:
3535
+ self.wait(submitted_js)
3536
+
3537
+ if return_idx:
3538
+ return submitted_js
3539
+ return None
3540
+
3541
+ @staticmethod
3542
+ def __wait_for_direct_jobscripts(jobscripts: list[Jobscript]):
3543
+ """Wait for the passed direct (i.e. non-scheduled) jobscripts to finish."""
3544
+
3545
+ def callback(proc: psutil.Process) -> None:
3546
+ js = js_pids[proc.pid]
3547
+ assert hasattr(proc, "returncode")
3548
+ # TODO sometimes proc.returncode is None; maybe because multiple wait
3549
+ # calls?
3550
+ print(
3551
+ f"Jobscript {js.index} from submission {js.submission.index} "
3552
+ f"finished with exit code {proc.returncode}."
3553
+ )
3554
+
3555
+ js_pids = {js.process_ID: js for js in jobscripts}
3556
+ process_refs = [
3557
+ (js.process_ID, js.submit_cmdline)
3558
+ for js in jobscripts
3559
+ if js.process_ID and js.submit_cmdline
3560
+ ]
3561
+ DirectScheduler.wait_for_jobscripts(process_refs, callback=callback)
3562
+
3563
+ def __wait_for_scheduled_jobscripts(self, jobscripts: list[Jobscript]):
3564
+ """Wait for the passed scheduled jobscripts to finish."""
3565
+ schedulers = self._app.Submission.get_unique_schedulers_of_jobscripts(jobscripts)
3566
+ threads: list[Thread] = []
3567
+ for js_indices, sched in schedulers:
3568
+ jobscripts_gen = (
3569
+ self.submissions[sub_idx].jobscripts[js_idx]
3570
+ for sub_idx, js_idx in js_indices
3571
+ )
3572
+ job_IDs = [
3573
+ js.scheduler_job_ID
3574
+ for js in jobscripts_gen
3575
+ if js.scheduler_job_ID is not None
3576
+ ]
3577
+ threads.append(Thread(target=sched.wait_for_jobscripts, args=(job_IDs,)))
3578
+
3579
+ for thr in threads:
3580
+ thr.start()
3581
+
3582
+ for thr in threads:
3583
+ thr.join()
3584
+
3585
+ def wait(self, sub_js: Mapping[int, Sequence[int]] | None = None):
3586
+ """Wait for the completion of specified/all submitted jobscripts."""
3587
+
3588
+ # TODO: think about how this might work with remote workflow submission (via SSH)
3589
+
3590
+ # TODO: add a log file to the submission dir where we can log stuff (e.g starting
3591
+ # a thread...)
3592
+
3593
+ if not sub_js:
3594
+ # find any active jobscripts first:
3595
+ sub_js_: dict[int, list[int]] = defaultdict(list)
3596
+ for sub in self.submissions:
3597
+ sub_js_[sub.index].extend(sub.get_active_jobscripts())
3598
+ sub_js = sub_js_
3599
+
3600
+ js_direct: list[Jobscript] = []
3601
+ js_sched: list[Jobscript] = []
3602
+ for sub_idx, all_js_idx in sub_js.items():
3603
+ for js_idx in all_js_idx:
3604
+ try:
3605
+ js = self.submissions[sub_idx].jobscripts[js_idx]
3606
+ except IndexError:
3607
+ raise ValueError(
3608
+ f"No jobscript with submission index {sub_idx!r} and/or "
3609
+ f"jobscript index {js_idx!r}."
3610
+ )
3611
+ if js.process_ID is not None:
3612
+ js_direct.append(js)
3613
+ elif js.scheduler_job_ID is not None:
3614
+ js_sched.append(js)
3615
+ else:
3616
+ raise RuntimeError(
3617
+ f"Process ID nor scheduler job ID is set for {js!r}."
3618
+ )
3619
+
3620
+ if js_direct or js_sched:
3621
+ # TODO: use a rich console status? how would that appear in stdout though?
3622
+ print("Waiting for workflow submissions to finish...")
3623
+ else:
3624
+ print("No running jobscripts.")
3625
+ return
3626
+
3627
+ try:
3628
+ t_direct = Thread(target=self.__wait_for_direct_jobscripts, args=(js_direct,))
3629
+ t_sched = Thread(
3630
+ target=self.__wait_for_scheduled_jobscripts, args=(js_sched,)
3631
+ )
3632
+ t_direct.start()
3633
+ t_sched.start()
3634
+
3635
+ # without these, KeyboardInterrupt seems to not be caught:
3636
+ while t_direct.is_alive():
3637
+ t_direct.join(timeout=1)
3638
+
3639
+ while t_sched.is_alive():
3640
+ t_sched.join(timeout=1)
3641
+
3642
+ except KeyboardInterrupt:
3643
+ print("No longer waiting (workflow execution will continue).")
3644
+ else:
3645
+ print("Specified submissions have finished.")
3646
+
3647
+ def get_running_elements(
3648
+ self,
3649
+ submission_idx: int = -1,
3650
+ task_idx: int | None = None,
3651
+ task_insert_ID: int | None = None,
3652
+ ) -> list[Element]:
3653
+ """Retrieve elements that are running according to the scheduler."""
3654
+
3655
+ if task_idx is not None and task_insert_ID is not None:
3656
+ raise ValueError("Specify at most one of `task_insert_ID` and `task_idx`.")
3657
+
3658
+ # keys are task_insert_IDs, values are element indices:
3659
+ active_elems: dict[int, set[int]] = defaultdict(set)
3660
+ sub = self.submissions[submission_idx]
3661
+ for js_idx, block_states in sub.get_active_jobscripts().items():
3662
+ js = sub.jobscripts[js_idx]
3663
+ for block_idx, block in enumerate(js.blocks):
3664
+ states = block_states[block_idx]
3665
+ for js_elem_idx, state in states.items():
3666
+ if state is JobscriptElementState.running:
3667
+ for task_iID, elem_idx in zip(
3668
+ block.task_insert_IDs, block.task_elements[js_elem_idx]
3669
+ ):
3670
+ active_elems[task_iID].add(elem_idx)
3671
+
3672
+ # retrieve Element objects:
3673
+ out: list[Element] = []
3674
+ for task_iID, elem_idxes in active_elems.items():
3675
+ if task_insert_ID is not None and task_iID != task_insert_ID:
3676
+ continue
3677
+ task = self.tasks.get(insert_ID=task_iID)
3678
+ if task_idx is not None and task_idx != task.index:
3679
+ continue
3680
+ for idx_i in elem_idxes:
3681
+ out.append(task.elements[idx_i])
3682
+
3683
+ return out
3684
+
3685
+ def get_running_runs(
3686
+ self,
3687
+ submission_idx: int = -1,
3688
+ task_idx: int | None = None,
3689
+ task_insert_ID: int | None = None,
3690
+ element_idx: int | None = None,
3691
+ ) -> list[ElementActionRun]:
3692
+ """Retrieve runs that are running according to the scheduler."""
3693
+
3694
+ elems = self.get_running_elements(
3695
+ submission_idx=submission_idx,
3696
+ task_idx=task_idx,
3697
+ task_insert_ID=task_insert_ID,
3698
+ )
3699
+ out = []
3700
+ for elem in elems:
3701
+ if element_idx is not None and elem.index != element_idx:
3702
+ continue
3703
+ for iter_i in elem.iterations:
3704
+ for elem_acts in iter_i.actions.values():
3705
+ for run in elem_acts.runs:
3706
+ if run.status is EARStatus.running:
3707
+ out.append(run)
3708
+ # for a given element and submission, only one run
3709
+ # may be running at a time:
3710
+ break
3711
+ return out
3712
+
3713
+ def _abort_run(self, run: ElementActionRun):
3714
+ # connect to the ZeroMQ server on the worker node:
3715
+ self._app.logger.info(f"abort run: {run!r}")
3716
+ self._app.Executor.send_abort(
3717
+ hostname=run.run_hostname, port_number=run.port_number
3718
+ )
3719
+
3720
+ def abort_run(
3721
+ self,
3722
+ submission_idx: int = -1,
3723
+ task_idx: int | None = None,
3724
+ task_insert_ID: int | None = None,
3725
+ element_idx: int | None = None,
3726
+ ):
3727
+ """Abort the currently running action-run of the specified task/element.
3728
+
3729
+ Parameters
3730
+ ----------
3731
+ task_idx
3732
+ The parent task of the run to abort.
3733
+ element_idx
3734
+ For multi-element tasks, the parent element of the run to abort.
3735
+ submission_idx
3736
+ Defaults to the most-recent submission.
3737
+
3738
+ """
3739
+ running = self.get_running_runs(
3740
+ submission_idx=submission_idx,
3741
+ task_idx=task_idx,
3742
+ task_insert_ID=task_insert_ID,
3743
+ element_idx=element_idx,
3744
+ )
3745
+ if not running:
3746
+ raise ValueError("Specified run is not running.")
3747
+
3748
+ elif len(running) > 1:
3749
+ if element_idx is None:
3750
+ elem_idx = tuple(ear.element.index for ear in running)
3751
+ raise ValueError(
3752
+ f"Multiple elements are running (indices: {elem_idx!r}). Specify "
3753
+ "which element index you want to abort."
3754
+ )
3755
+ else:
3756
+ raise RuntimeError("Multiple running runs.")
3757
+
3758
+ run = running[0]
3759
+ if not run.action.abortable:
3760
+ raise RunNotAbortableError()
3761
+ self._abort_run(run)
3762
+
3763
+ @TimeIt.decorator
3764
+ def cancel(self, status: bool = True):
3765
+ """Cancel any running jobscripts."""
3766
+ status_msg = f"Cancelling jobscripts of workflow {self.path!r}"
3767
+ # Type hint for mypy
3768
+ status_context: AbstractContextManager[Status] | AbstractContextManager[None] = (
3769
+ rich.console.Console().status(status_msg) if status else nullcontext()
3770
+ )
3771
+ with status_context as status_, self._store.cached_load():
3772
+ for sub in self.submissions:
3773
+ sub.cancel()
3774
+
3775
+ def add_submission(
3776
+ self,
3777
+ tasks: list[int] | None = None,
3778
+ JS_parallelism: bool | Literal["direct", "scheduled"] | None = None,
3779
+ force_array: bool = False,
3780
+ status: bool = True,
3781
+ ) -> Submission | None:
3782
+ """Add a new submission.
3783
+
3784
+ Parameters
3785
+ ----------
3786
+ force_array
3787
+ Used to force the use of job arrays, even if the scheduler does not support
3788
+ it. This is provided for testing purposes only.
3789
+ """
3790
+ # JS_parallelism=None means guess
3791
+ # Type hint for mypy
3792
+ status_context: AbstractContextManager[Status] | AbstractContextManager[None] = (
3793
+ rich.console.Console().status("") if status else nullcontext()
3794
+ )
3795
+ with status_context as status_, self._store.cached_load(), self.batch_update():
3796
+ return self._add_submission(tasks, JS_parallelism, force_array, status_)
3797
+
3798
+ @TimeIt.decorator
3799
+ @load_workflow_config
3800
+ def _add_submission(
3801
+ self,
3802
+ tasks: Sequence[int] | None = None,
3803
+ JS_parallelism: bool | Literal["direct", "scheduled"] | None = None,
3804
+ force_array: bool = False,
3805
+ status: Status | None = None,
3806
+ ) -> Submission | None:
3807
+ """Add a new submission.
3808
+
3809
+ Parameters
3810
+ ----------
3811
+ force_array
3812
+ Used to force the use of job arrays, even if the scheduler does not support
3813
+ it. This is provided for testing purposes only.
3814
+ """
3815
+ new_idx = self.num_submissions
3816
+ _ = self.submissions # TODO: just to ensure `submissions` is loaded
3817
+ if status:
3818
+ status.update("Adding new submission: resolving jobscripts...")
3819
+
3820
+ with self._store.cache_ctx():
3821
+ cache = ObjectCache.build(self, elements=True, iterations=True, runs=True)
3822
+
3823
+ sub_obj: Submission = self._app.Submission(
3824
+ index=new_idx,
3825
+ workflow=self,
3826
+ jobscripts=self.resolve_jobscripts(cache, tasks, force_array),
3827
+ JS_parallelism=JS_parallelism,
3828
+ )
3829
+ if status:
3830
+ status.update("Adding new submission: setting environments...")
3831
+ sub_obj._set_environments()
3832
+ all_EAR_ID = sub_obj.all_EAR_IDs
3833
+ if not all_EAR_ID:
3834
+ print(
3835
+ "There are no pending element action runs, so a new submission was not "
3836
+ "added."
3837
+ )
3838
+ return None
3839
+
3840
+ if status:
3841
+ status.update("Adding new submission: making artifact directories...")
3842
+
3843
+ # TODO: a submission should only be "submitted" once shouldn't it?
3844
+ # no; there could be an IO error (e.g. internet connectivity), so might
3845
+ # need to be able to reattempt submission of outstanding jobscripts.
3846
+ self.submissions_path.mkdir(exist_ok=True, parents=True)
3847
+ sub_obj.path.mkdir(exist_ok=True)
3848
+ sub_obj.tmp_path.mkdir(exist_ok=True)
3849
+ sub_obj.app_std_path.mkdir(exist_ok=True)
3850
+ sub_obj.js_path.mkdir(exist_ok=True) # for jobscripts
3851
+ sub_obj.js_std_path.mkdir(exist_ok=True) # for stdout/err stream files
3852
+ sub_obj.js_funcs_path.mkdir(exist_ok=True)
3853
+ sub_obj.js_run_ids_path.mkdir(exist_ok=True)
3854
+ sub_obj.scripts_path.mkdir(exist_ok=True)
3855
+ sub_obj.commands_path.mkdir(exist_ok=True)
3856
+
3857
+ if sub_obj.needs_app_log_dir:
3858
+ sub_obj.app_log_path.mkdir(exist_ok=True)
3859
+
3860
+ if sub_obj.needs_win_pids_dir:
3861
+ sub_obj.js_win_pids_path.mkdir(exist_ok=True)
3862
+
3863
+ if sub_obj.needs_script_indices_dir:
3864
+ sub_obj.js_script_indices_path.mkdir(exist_ok=True)
3865
+
3866
+ if status:
3867
+ status.update("Adding new submission: writing scripts and command files...")
3868
+
3869
+ # write scripts and command files where possible to the submission directory:
3870
+ cmd_file_IDs, run_indices, run_inp_files = sub_obj._write_scripts(cache, status)
3871
+
3872
+ sub_obj._write_execute_dirs(run_indices, run_inp_files, cache, status)
3873
+
3874
+ if status:
3875
+ status.update("Adding new submission: updating the store...")
3876
+
3877
+ with self._store.cached_load(), self.batch_update():
3878
+ for id_ in all_EAR_ID:
3879
+ self._store.set_run_submission_data(
3880
+ EAR_ID=id_,
3881
+ cmds_ID=cmd_file_IDs[id_],
3882
+ sub_idx=new_idx,
3883
+ )
3884
+
3885
+ sub_obj._ensure_JS_parallelism_set()
3886
+ sub_obj_js, _ = sub_obj.to_json_like()
3887
+ assert self._submissions is not None
3888
+ self._submissions.append(sub_obj)
3889
+ self._pending["submissions"].append(new_idx)
3890
+ with self._store.cached_load(), self.batch_update():
3891
+ self._store.add_submission(new_idx, cast("Mapping[str, JSONed]", sub_obj_js))
3892
+
3893
+ return self.submissions[new_idx]
3894
+
3895
+ @TimeIt.decorator
3896
+ def resolve_jobscripts(
3897
+ self,
3898
+ cache: ObjectCache,
3899
+ tasks: Sequence[int] | None = None,
3900
+ force_array: bool = False,
3901
+ ) -> list[Jobscript]:
3902
+ """
3903
+ Resolve this workflow to a set of jobscripts to run for a new submission.
3904
+
3905
+ Parameters
3906
+ ----------
3907
+ force_array
3908
+ Used to force the use of job arrays, even if the scheduler does not support
3909
+ it. This is provided for testing purposes only.
3910
+
3911
+ """
3912
+ with self._app.config.cached_config():
3913
+ with self.cached_merged_parameters(), self._store.cache_ctx():
3914
+ js, element_deps = self._resolve_singular_jobscripts(
3915
+ cache, tasks, force_array
3916
+ )
3917
+
3918
+ js_deps = resolve_jobscript_dependencies(js, element_deps)
3919
+
3920
+ for js_idx, jsca in js.items():
3921
+ if js_idx in js_deps:
3922
+ jsca["dependencies"] = js_deps[js_idx] # type: ignore
3923
+
3924
+ js = merge_jobscripts_across_tasks(js)
3925
+
3926
+ # for direct or (non-array scheduled), combine into jobscripts of multiple
3927
+ # blocks for dependent jobscripts that have the same resource hashes
3928
+ js_ = resolve_jobscript_blocks(js)
3929
+
3930
+ return [self._app.Jobscript(**i, index=idx) for idx, i in enumerate(js_)]
3931
+
3932
+ def __EAR_obj_map(
3933
+ self,
3934
+ js_desc: JobScriptDescriptor,
3935
+ jsca: JobScriptCreationArguments,
3936
+ task: WorkflowTask,
3937
+ task_actions: Sequence[tuple[int, int, int]],
3938
+ EAR_map: NDArray,
3939
+ cache: ObjectCache,
3940
+ ) -> Mapping[int, ElementActionRun]:
3941
+ assert cache.runs is not None
3942
+ all_EAR_IDs: list[int] = []
3943
+ for js_elem_idx, (elem_idx, act_indices) in enumerate(
3944
+ js_desc["elements"].items()
3945
+ ):
3946
+ for act_idx in act_indices:
3947
+ EAR_ID_i: int = EAR_map[act_idx, elem_idx].item()
3948
+ all_EAR_IDs.append(EAR_ID_i)
3949
+ js_act_idx = task_actions.index((task.insert_ID, act_idx, 0))
3950
+ jsca["EAR_ID"][js_act_idx][js_elem_idx] = EAR_ID_i
3951
+ return dict(zip(all_EAR_IDs, (cache.runs[i] for i in all_EAR_IDs)))
3952
+
3953
+ @TimeIt.decorator
3954
+ def _resolve_singular_jobscripts(
3955
+ self,
3956
+ cache: ObjectCache,
3957
+ tasks: Sequence[int] | None = None,
3958
+ force_array: bool = False,
3959
+ ) -> tuple[
3960
+ Mapping[int, JobScriptCreationArguments],
3961
+ Mapping[int, Mapping[int, Sequence[int]]],
3962
+ ]:
3963
+ """
3964
+ We arrange EARs into `EARs` and `elements` so we can quickly look up membership
3965
+ by EAR idx in the `EARs` dict.
3966
+
3967
+ Parameters
3968
+ ----------
3969
+ force_array
3970
+ Used to force the use of job arrays, even if the scheduler does not support
3971
+ it. This is provided for testing purposes only.
3972
+
3973
+ Returns
3974
+ -------
3975
+ submission_jobscripts
3976
+ Information for making each jobscript.
3977
+ all_element_deps
3978
+ For a given jobscript index, for a given jobscript element index within that
3979
+ jobscript, this is a list of EAR IDs dependencies of that element.
3980
+ """
3981
+ task_set = frozenset(tasks if tasks else range(self.num_tasks))
3982
+
3983
+ if self._store.use_cache:
3984
+ # pre-cache parameter sources (used in `EAR.get_EAR_dependencies`):
3985
+ # note: this cache is unrelated to the `cache` argument
3986
+ self.get_all_parameter_sources()
3987
+
3988
+ submission_jobscripts: dict[int, JobScriptCreationArguments] = {}
3989
+ all_element_deps: dict[int, dict[int, list[int]]] = {}
3990
+
3991
+ for task_iID, loop_idx_i in self.get_iteration_task_pathway():
3992
+ task = self.tasks.get(insert_ID=task_iID)
3993
+ if task.index not in task_set:
3994
+ continue
3995
+ res, res_hash, res_map, EAR_map = generate_EAR_resource_map(
3996
+ task, loop_idx_i, cache
3997
+ )
3998
+ jobscripts, _ = group_resource_map_into_jobscripts(res_map)
3999
+
4000
+ for js_dat in jobscripts:
4001
+ # (insert ID, action_idx, index into task_loop_idx):
4002
+ task_actions = sorted(
4003
+ set(
4004
+ (task.insert_ID, act_idx_i, 0)
4005
+ for act_idx in js_dat["elements"].values()
4006
+ for act_idx_i in act_idx
4007
+ ),
4008
+ key=lambda x: x[1],
4009
+ )
4010
+ # Invert the mapping
4011
+ task_actions_inv = {k: idx for idx, k in enumerate(task_actions)}
4012
+ # task_elements: { JS_ELEM_IDX: [TASK_ELEM_IDX for each task insert ID]}
4013
+ task_elements = {
4014
+ js_elem_idx: [task_elem_idx]
4015
+ for js_elem_idx, task_elem_idx in enumerate(js_dat["elements"])
4016
+ }
4017
+ EAR_idx_arr_shape = (
4018
+ len(task_actions),
4019
+ len(js_dat["elements"]),
4020
+ )
4021
+ EAR_ID_arr = np.empty(EAR_idx_arr_shape, dtype=np.int32)
4022
+ EAR_ID_arr[:] = -1
4023
+
4024
+ new_js_idx = len(submission_jobscripts)
4025
+
4026
+ is_array = force_array or is_jobscript_array(
4027
+ res[js_dat["resources"]],
4028
+ EAR_ID_arr.shape[1],
4029
+ self._store,
4030
+ )
4031
+ js_i: JobScriptCreationArguments = {
4032
+ "task_insert_IDs": [task.insert_ID],
4033
+ "task_loop_idx": [loop_idx_i],
4034
+ "task_actions": task_actions, # map jobscript actions to task actions
4035
+ "task_elements": task_elements, # map jobscript elements to task elements
4036
+ "EAR_ID": EAR_ID_arr,
4037
+ "resources": res[js_dat["resources"]],
4038
+ "resource_hash": res_hash[js_dat["resources"]],
4039
+ "dependencies": {},
4040
+ "is_array": is_array,
4041
+ }
4042
+
4043
+ all_EAR_objs = self.__EAR_obj_map(
4044
+ js_dat, js_i, task, task_actions, EAR_map, cache
4045
+ )
4046
+
4047
+ for js_elem_idx, (elem_idx, act_indices) in enumerate(
4048
+ js_dat["elements"].items()
4049
+ ):
4050
+ all_EAR_IDs: list[int] = []
4051
+ for act_idx in act_indices:
4052
+ EAR_ID_i: int = EAR_map[act_idx, elem_idx].item()
4053
+ all_EAR_IDs.append(EAR_ID_i)
4054
+ js_act_idx = task_actions_inv[task.insert_ID, act_idx, 0]
4055
+ EAR_ID_arr[js_act_idx][js_elem_idx] = EAR_ID_i
4056
+
4057
+ # get indices of EARs that this element depends on:
4058
+ EAR_deps_EAR_idx = [
4059
+ dep_ear_id
4060
+ for main_ear_id in all_EAR_IDs
4061
+ for dep_ear_id in all_EAR_objs[main_ear_id].get_EAR_dependencies()
4062
+ if dep_ear_id not in EAR_ID_arr
4063
+ ]
4064
+ if EAR_deps_EAR_idx:
4065
+ all_element_deps.setdefault(new_js_idx, {})[
4066
+ js_elem_idx
4067
+ ] = EAR_deps_EAR_idx
4068
+
4069
+ submission_jobscripts[new_js_idx] = js_i
4070
+
4071
+ return submission_jobscripts, all_element_deps
4072
+
4073
+ @load_workflow_config
4074
+ def execute_run(
4075
+ self,
4076
+ submission_idx: int,
4077
+ block_act_key: BlockActionKey,
4078
+ run_ID: int,
4079
+ ) -> None:
4080
+ """Execute commands of a run via a subprocess."""
4081
+
4082
+ # CD to submission tmp dir to ensure std streams and exceptions have somewhere
4083
+ # sensible to go:
4084
+ os.chdir(Submission.get_tmp_path(self.submissions_path, submission_idx))
4085
+
4086
+ sub_str_path = Submission.get_app_std_path(self.submissions_path, submission_idx)
4087
+ run_std_path = sub_str_path / f"{str(run_ID)}.txt" # TODO: refactor
4088
+ has_commands = False
4089
+
4090
+ # redirect (as much as possible) app-generated stdout/err to a dedicated file:
4091
+ with redirect_std_to_file(run_std_path):
4092
+ with self._store.cached_load():
4093
+ js_idx = cast("int", block_act_key[0])
4094
+ run = self.get_EARs_from_IDs([run_ID])[0]
4095
+ run_dir = None
4096
+ if run.action.requires_dir:
4097
+ run_dir = run.get_directory()
4098
+ assert run_dir
4099
+ self._app.submission_logger.debug(
4100
+ f"changing directory to run execution directory: {run_dir}."
4101
+ )
4102
+ os.chdir(run_dir)
4103
+ self._app.submission_logger.debug(f"{run.skip=}; {run.skip_reason=}")
4104
+
4105
+ # check if we should skip:
4106
+ if not run.skip:
4107
+
4108
+ try:
4109
+ with run.raise_on_failure_threshold() as unset_params:
4110
+ if run.action.script:
4111
+ run.write_script_data_in_files(block_act_key)
4112
+ if run.action.has_program:
4113
+ run.write_program_data_in_files(block_act_key)
4114
+
4115
+ # write the command file that will be executed:
4116
+ cmd_file_path = self.ensure_commands_file(
4117
+ submission_idx, js_idx, run
4118
+ )
4119
+
4120
+ except UnsetParameterDataErrorBase:
4121
+ # not all required parameter data is set, so fail this run:
4122
+ self._app.submission_logger.debug(
4123
+ f"unset parameter threshold satisfied (or any unset "
4124
+ f"parameters found when trying to write commands file), so "
4125
+ f"not attempting run. unset_params={unset_params!r}."
4126
+ )
4127
+ self.set_EAR_start(run_ID, run_dir, port_number=None)
4128
+ self._check_loop_termination(run) # not sure if this is required
4129
+ self.set_EAR_end(
4130
+ block_act_key=block_act_key,
4131
+ run=run,
4132
+ exit_code=1,
4133
+ )
4134
+ return
4135
+
4136
+ # sufficient parameter data is set so far, but need to pass `unset_params`
4137
+ # on as an environment variable so it can be appended to and failure
4138
+ # thresholds can be rechecked if necessary (i.e. in a Python script
4139
+ # where we also load input parameters "directly")
4140
+ if unset_params:
4141
+ self._app.submission_logger.debug(
4142
+ f"some unset parameters found, but no unset-thresholds met: "
4143
+ f"unset_params={unset_params!r}."
4144
+ )
4145
+
4146
+ # TODO: pass on unset_params to script as environment variable
4147
+
4148
+ if run.action.jinja_template_or_template_path:
4149
+ # TODO: write Jinja templates in shared submissions directory
4150
+ run.write_jinja_template()
4151
+
4152
+ if has_commands := bool(cmd_file_path):
4153
+
4154
+ assert isinstance(cmd_file_path, Path)
4155
+ if not cmd_file_path.is_file():
4156
+ raise RuntimeError(
4157
+ f"Command file {cmd_file_path!r} does not exist."
4158
+ )
4159
+ # prepare subprocess command:
4160
+ jobscript = self.submissions[submission_idx].jobscripts[js_idx]
4161
+ cmd = jobscript.shell.get_command_file_launch_command(
4162
+ str(cmd_file_path)
4163
+ )
4164
+ loop_idx_str = ";".join(
4165
+ f"{k}={v}" for k, v in run.element_iteration.loop_idx.items()
4166
+ )
4167
+ app_caps = self._app.package_name.upper()
4168
+
4169
+ # TODO: make these optionally set (more difficult to set in combine_script,
4170
+ # so have the option to turn off) [default ON]
4171
+ add_env = {
4172
+ f"{app_caps}_RUN_ID": str(run_ID),
4173
+ f"{app_caps}_RUN_IDX": str(run.index),
4174
+ f"{app_caps}_ELEMENT_IDX": str(run.element.index),
4175
+ f"{app_caps}_ELEMENT_ID": str(run.element.id_),
4176
+ f"{app_caps}_ELEMENT_ITER_IDX": str(
4177
+ run.element_iteration.index
4178
+ ),
4179
+ f"{app_caps}_ELEMENT_ITER_ID": str(run.element_iteration.id_),
4180
+ f"{app_caps}_ELEMENT_ITER_LOOP_IDX": loop_idx_str,
4181
+ }
4182
+
4183
+ if run.action.script:
4184
+ if run.is_snippet_script:
4185
+ script_artifact_name = run.get_script_artifact_name()
4186
+ script_dir = Path(
4187
+ os.environ[f"{app_caps}_SUB_SCRIPTS_DIR"]
4188
+ )
4189
+ script_name = script_artifact_name
4190
+ else:
4191
+ # not a snippet script; expect the script in the run execute
4192
+ # directory (i.e. created by a previous action)
4193
+ script_dir = Path.cwd()
4194
+ script_name = run.action.script
4195
+ script_name_no_ext = Path(script_name).stem
4196
+ add_env.update(
4197
+ {
4198
+ f"{app_caps}_RUN_SCRIPT_NAME": script_name,
4199
+ f"{app_caps}_RUN_SCRIPT_NAME_NO_EXT": script_name_no_ext,
4200
+ f"{app_caps}_RUN_SCRIPT_DIR": str(script_dir),
4201
+ f"{app_caps}_RUN_SCRIPT_PATH": str(
4202
+ script_dir / script_name
4203
+ ),
4204
+ }
4205
+ )
4206
+ if program_path := run.program_path_actual:
4207
+ program_dir = program_path.parent
4208
+ program_name = program_path.name
4209
+ program_name_no_ext = program_path.stem
4210
+ add_env.update(
4211
+ {
4212
+ f"{app_caps}_RUN_PROGRAM_NAME": program_name,
4213
+ f"{app_caps}_RUN_PROGRAM_NAME_NO_EXT": program_name_no_ext,
4214
+ f"{app_caps}_RUN_PROGRAM_DIR": str(program_dir),
4215
+ f"{app_caps}_RUN_PROGRAM_PATH": str(program_path),
4216
+ }
4217
+ )
4218
+
4219
+ env = {**dict(os.environ), **add_env}
4220
+
4221
+ self._app.submission_logger.debug(
4222
+ f"Executing run commands via subprocess with command {cmd!r}, and "
4223
+ f"environment variables as below."
4224
+ )
4225
+ for k, v in env.items():
4226
+ if k.startswith(app_caps):
4227
+ self._app.submission_logger.debug(f"{k} = {v!r}")
4228
+ exe = self._app.Executor(cmd, env, self._app.package_name)
4229
+ port = (
4230
+ exe.start_zmq_server()
4231
+ ) # start the server so we know the port
4232
+
4233
+ try:
4234
+ self.set_EAR_start(run_ID, run_dir, port)
4235
+ except:
4236
+ self._app.submission_logger.error(f"Failed to set run start.")
4237
+ exe.stop_zmq_server()
4238
+ raise
4239
+
4240
+ # this subprocess may include commands that redirect to the std_stream file (e.g.
4241
+ # calling the app to save a parameter from a shell command output):
4242
+ if not run.skip and has_commands:
4243
+ ret_code = exe.run() # this also shuts down the server
4244
+
4245
+ # redirect (as much as possible) app-generated stdout/err to a dedicated file:
4246
+ with redirect_std_to_file(run_std_path):
4247
+ if run.skip:
4248
+ ret_code = SKIPPED_EXIT_CODE
4249
+ elif not (has_commands or run.action.jinja_template):
4250
+ ret_code = NO_COMMANDS_EXIT_CODE
4251
+ elif run.action.jinja_template:
4252
+ ret_code = 0
4253
+ else:
4254
+ self._check_loop_termination(run)
4255
+
4256
+ # set run end:
4257
+ self.set_EAR_end(
4258
+ block_act_key=block_act_key,
4259
+ run=run,
4260
+ exit_code=ret_code,
4261
+ )
4262
+
4263
+ def _check_loop_termination(self, run: ElementActionRun) -> set[int]:
4264
+ """Check if we need to terminate a loop if this is the last action of the loop
4265
+ iteration for this element, and set downstream iteration runs to skip."""
4266
+
4267
+ elem_iter = run.element_iteration
4268
+ task = elem_iter.task
4269
+ check_loops = []
4270
+ to_skip = set()
4271
+ for loop_name in elem_iter.loop_idx:
4272
+ self._app.logger.info(f"checking loop termination of loop {loop_name!r}.")
4273
+ loop = self.loops.get(loop_name)
4274
+ if (
4275
+ loop.template.termination
4276
+ and task.insert_ID == loop.template.termination_task_insert_ID
4277
+ and run.element_action.action_idx == max(elem_iter.actions)
4278
+ ):
4279
+ check_loops.append(loop_name)
4280
+ # TODO: test with condition actions
4281
+ if loop.test_termination(elem_iter):
4282
+ self._app.logger.info(
4283
+ f"loop {loop_name!r} termination condition met for run "
4284
+ f"ID {run.id_!r}."
4285
+ )
4286
+ to_skip.update(loop.skip_downstream_iterations(elem_iter))
4287
+ return to_skip
4288
+
4289
+ @load_workflow_config
4290
+ def execute_combined_runs(self, submission_idx: int, jobscript_idx: int) -> None:
4291
+ """Execute a combined script (multiple runs) via a subprocess."""
4292
+
4293
+ # CD to submission tmp dir to ensure std streams and exceptions have somewhere
4294
+ # sensible to go:
4295
+ os.chdir(Submission.get_tmp_path(self.submissions_path, submission_idx))
4296
+
4297
+ sub = self.submissions[submission_idx]
4298
+ js = sub.jobscripts[jobscript_idx]
4299
+
4300
+ app_caps = self._app.package_name.upper()
4301
+ script_dir = Path(os.environ[f"{app_caps}_SUB_SCRIPTS_DIR"])
4302
+ script_name = f"js_{jobscript_idx}.py" # TODO: refactor script name
4303
+ script_path = script_dir / script_name
4304
+
4305
+ add_env = {
4306
+ f"{app_caps}_RUN_SCRIPT_NAME": script_name,
4307
+ f"{app_caps}_RUN_SCRIPT_NAME_NO_EXT": script_path.stem,
4308
+ f"{app_caps}_RUN_SCRIPT_DIR": str(script_dir),
4309
+ f"{app_caps}_RUN_SCRIPT_PATH": str(script_path),
4310
+ f"{app_caps}_SCRIPT_INDICES_FILE": str(js.combined_script_indices_file_path),
4311
+ }
4312
+ env = {**dict(os.environ), **add_env}
4313
+
4314
+ # note: unlike in `Workflow.execute_run`, here we can be reasonably sure the
4315
+ # commands file already exists, because we call `Action.try_write_commands` with
4316
+ # `raise_on_unset=True` in `Workflow._add_submission` during submission.
4317
+
4318
+ # TODO: refactor cmd file name:
4319
+ cmd_file_path = sub.commands_path / f"js_{jobscript_idx}{js.shell.JS_EXT}"
4320
+ cmd = js.shell.get_command_file_launch_command(str(cmd_file_path))
4321
+
4322
+ self._app.submission_logger.debug(
4323
+ f"Executing combined runs via subprocess with command {cmd!r}, and "
4324
+ f"environment variables as below."
4325
+ )
4326
+ for k, v in env.items():
4327
+ if k.startswith(app_caps):
4328
+ self._app.submission_logger.debug(f"{k} = {v}")
4329
+
4330
+ exe = self._app.Executor(cmd, env, self._app.package_name)
4331
+ exe.start_zmq_server() # start the server
4332
+ exe.run() # this also shuts down the server
4333
+
4334
+ def ensure_commands_file(
4335
+ self,
4336
+ submission_idx: int,
4337
+ js_idx: int,
4338
+ run: ElementActionRun,
4339
+ ) -> Path | bool:
4340
+ """Ensure a commands file exists for the specified run."""
4341
+ self._app.persistence_logger.debug("Workflow.ensure_commands_file")
4342
+
4343
+ if run.commands_file_ID is None:
4344
+ # no commands to write
4345
+ return False
4346
+
4347
+ with self._store.cached_load():
4348
+ sub = self.submissions[submission_idx]
4349
+ jobscript = sub.jobscripts[js_idx]
4350
+
4351
+ # check if a commands file already exists, first checking using the run ID:
4352
+ cmd_file_name = f"{run.id_}{jobscript.shell.JS_EXT}" # TODO: refactor
4353
+ cmd_file_path = jobscript.submission.commands_path / cmd_file_name
4354
+
4355
+ if not cmd_file_path.is_file():
4356
+ # then check for a file from the "root" run ID (the run ID of a run that
4357
+ # shares the same commands file):
4358
+
4359
+ cmd_file_name = (
4360
+ f"{run.commands_file_ID}{jobscript.shell.JS_EXT}" # TODO: refactor
4361
+ )
4362
+ cmd_file_path = jobscript.submission.commands_path / cmd_file_name
4363
+
4364
+ if not cmd_file_path.is_file():
4365
+ # no file available, so write (using the run ID):
4366
+ try:
4367
+ cmd_file_path = run.try_write_commands(
4368
+ jobscript=jobscript,
4369
+ environments=sub.environments,
4370
+ raise_on_unset=True,
4371
+ )
4372
+ except OutputFileParserNoOutputError:
4373
+ # no commands to write, might be used just for saving files
4374
+ return False
4375
+
4376
+ return cmd_file_path
4377
+
4378
+ def process_shell_parameter_output(
4379
+ self, name: str, value: str, EAR_ID: int, cmd_idx: int, stderr: bool = False
4380
+ ) -> Any:
4381
+ """Process the shell stdout/stderr stream according to the associated Command
4382
+ object."""
4383
+ with self._store.cached_load(), self.batch_update():
4384
+ EAR = self.get_EARs_from_IDs(EAR_ID)
4385
+ command = EAR.action.commands[cmd_idx]
4386
+ return command.process_std_stream(name, value, stderr)
4387
+
4388
+ def save_parameter(
4389
+ self,
4390
+ name: str,
4391
+ value: Any,
4392
+ EAR_ID: int,
4393
+ ):
4394
+ """
4395
+ Save a parameter where an EAR can find it.
4396
+ """
4397
+ self._app.logger.info(f"save parameter {name!r} for EAR_ID {EAR_ID}.")
4398
+ self._app.logger.debug(f"save parameter {name!r} value is {value!r}.")
4399
+ with self._store.cached_load(), self.batch_update():
4400
+ EAR = self.get_EARs_from_IDs(EAR_ID)
4401
+ param_id = EAR.data_idx[name]
4402
+ self.set_parameter_value(param_id, value)
4403
+
4404
+ def show_all_EAR_statuses(self) -> None:
4405
+ """
4406
+ Print a description of the status of every element action run in
4407
+ the workflow.
4408
+ """
4409
+ print(
4410
+ f"{'task':8s} {'element':8s} {'iteration':8s} {'action':8s} "
4411
+ f"{'run':8s} {'sub.':8s} {'exitcode':8s} {'success':8s} {'skip':8s}"
4412
+ )
4413
+ for task in self.tasks:
4414
+ for element in task.elements[:]:
4415
+ for iter_idx, iteration in enumerate(element.iterations):
4416
+ for act_idx, action_runs in iteration.actions.items():
4417
+ for run_idx, EAR in enumerate(action_runs.runs):
4418
+ suc = EAR.success if EAR.success is not None else "-"
4419
+ if EAR.exit_code is not None:
4420
+ exc = f"{EAR.exit_code:^8d}"
4421
+ else:
4422
+ exc = f"{'-':^8}"
4423
+ print(
4424
+ f"{task.insert_ID:^8d} {element.index:^8d} "
4425
+ f"{iter_idx:^8d} {act_idx:^8d} {run_idx:^8d} "
4426
+ f"{EAR.status.name.lower():^8s}"
4427
+ f"{exc}"
4428
+ f"{suc:^8}"
4429
+ f"{EAR.skip:^8}"
4430
+ )
4431
+
4432
+ def _resolve_input_source_task_reference(
4433
+ self, input_source: InputSource, new_task_name: str
4434
+ ) -> None:
4435
+ """Normalise the input source task reference and convert a source to a local type
4436
+ if required."""
4437
+
4438
+ # TODO: test thoroughly!
4439
+
4440
+ if isinstance(input_source.task_ref, str):
4441
+ if input_source.task_ref == new_task_name:
4442
+ if input_source.task_source_type is self._app.TaskSourceType.OUTPUT:
4443
+ raise InvalidInputSourceTaskReference(input_source)
4444
+ warn(
4445
+ f"Changing input source {input_source.to_string()!r} to a local "
4446
+ f"type, since the input source task reference refers to its own "
4447
+ f"task."
4448
+ )
4449
+ # TODO: add an InputSource source_type setter to reset
4450
+ # task_ref/source_type?
4451
+ input_source.source_type = self._app.InputSourceType.LOCAL
4452
+ input_source.task_ref = None
4453
+ input_source.task_source_type = None
4454
+ else:
4455
+ try:
4456
+ uniq_names_cur = self.get_task_unique_names(map_to_insert_ID=True)
4457
+ input_source.task_ref = uniq_names_cur[input_source.task_ref]
4458
+ except KeyError:
4459
+ raise InvalidInputSourceTaskReference(
4460
+ input_source, task_ref=input_source.task_ref
4461
+ )
4462
+
4463
+ @TimeIt.decorator
4464
+ def get_all_submission_run_IDs(self) -> Iterable[int]:
4465
+ """
4466
+ Get the run IDs of all submissions.
4467
+ """
4468
+ self._app.persistence_logger.debug("Workflow.get_all_submission_run_IDs")
4469
+ for sub in self.submissions:
4470
+ yield from sub.all_EAR_IDs
4471
+
4472
+ def rechunk_runs(
4473
+ self,
4474
+ chunk_size: int | None = None,
4475
+ backup: bool = True,
4476
+ status: bool = True,
4477
+ ):
4478
+ """
4479
+ Reorganise the stored data chunks for EARs to be more efficient.
4480
+ """
4481
+ self._store.rechunk_runs(chunk_size=chunk_size, backup=backup, status=status)
4482
+
4483
+ def rechunk_parameter_base(
4484
+ self,
4485
+ chunk_size: int | None = None,
4486
+ backup: bool = True,
4487
+ status: bool = True,
4488
+ ):
4489
+ """
4490
+ Reorganise the stored data chunks for parameters to be more efficient.
4491
+ """
4492
+ self._store.rechunk_parameter_base(
4493
+ chunk_size=chunk_size, backup=backup, status=status
4494
+ )
4495
+
4496
+ def rechunk(
4497
+ self,
4498
+ chunk_size: int | None = None,
4499
+ backup: bool = True,
4500
+ status: bool = True,
4501
+ ):
4502
+ """
4503
+ Rechunk metadata/runs and parameters/base arrays, making them more efficient.
4504
+ """
4505
+ self.rechunk_runs(chunk_size=chunk_size, backup=backup, status=status)
4506
+ self.rechunk_parameter_base(chunk_size=chunk_size, backup=backup, status=status)
4507
+
4508
+ @TimeIt.decorator
4509
+ def get_run_directories(
4510
+ self,
4511
+ run_ids: list[int] | None = None,
4512
+ dir_indices_arr: np.ndarray | None = None,
4513
+ ) -> list[Path | None]:
4514
+ """"""
4515
+
4516
+ @TimeIt.decorator
4517
+ def _get_depth_dirs(
4518
+ item_idx: int,
4519
+ max_per_dir: int,
4520
+ max_depth: int,
4521
+ depth_idx_cache: dict[tuple[int, int], NDArray],
4522
+ prefix: str,
4523
+ ) -> list[str]:
4524
+ dirs = []
4525
+ max_avail_items = max_per_dir**max_depth
4526
+ for depth_i in range(1, max_depth):
4527
+ tot_items_per_level = int(max_avail_items / max_per_dir**depth_i)
4528
+ key = (max_avail_items, tot_items_per_level)
4529
+ if (depth_idx := depth_idx_cache.get(key)) is None:
4530
+ depth_idx = np.repeat(
4531
+ np.arange(max_avail_items / tot_items_per_level, dtype=int),
4532
+ tot_items_per_level,
4533
+ )
4534
+ depth_idx_cache[key] = depth_idx
4535
+ idx_i = cast("NDArray", depth_idx)[item_idx]
4536
+ start_idx = idx_i * tot_items_per_level
4537
+ end_idx = start_idx + tot_items_per_level - 1
4538
+ dirs.append(f"{prefix}_{start_idx}-{end_idx}")
4539
+ return dirs
4540
+
4541
+ if dir_indices_arr is None: # TODO: document behaviour!
4542
+ dir_indices_arr = self._store.get_dirs_array()
4543
+ if run_ids is not None:
4544
+ dir_indices_arr = dir_indices_arr[run_ids]
4545
+
4546
+ # TODO: make these configurable so easier to test!
4547
+ MAX_ELEMS_PER_DIR = 1000 # TODO: configurable (add `workflow_defaults` to Config)
4548
+ MAX_ITERS_PER_DIR = 1000
4549
+
4550
+ exec_path = self.execution_path
4551
+
4552
+ # a fill value means no sub directory should be created
4553
+ T_FILL, E_FILL, I_FILL, A_FILL, R_FILL, _, _ = RUN_DIR_ARR_FILL
4554
+
4555
+ depth_idx_cache: dict[tuple[int, int], NDArray] = (
4556
+ {}
4557
+ ) # keys are (max_avail, tot_elems_per_dir_level)
4558
+
4559
+ # format run directories:
4560
+ dirs = []
4561
+ for dir_data in dir_indices_arr:
4562
+
4563
+ # TODO: retrieve task,element,iteration,action,run dir formats from
4564
+ # (t_iID, act_idx) combo (cached)?
4565
+
4566
+ t_iID, e_idx, i_idx, _, r_idx, e_depth, i_depth = dir_data
4567
+ path_args = []
4568
+
4569
+ if t_iID != T_FILL:
4570
+ path_args.append(f"t_{t_iID}")
4571
+
4572
+ if e_idx != E_FILL:
4573
+ if e_depth > 1:
4574
+ path_args.extend(
4575
+ _get_depth_dirs(
4576
+ item_idx=e_idx,
4577
+ max_per_dir=MAX_ELEMS_PER_DIR,
4578
+ max_depth=e_depth,
4579
+ depth_idx_cache=depth_idx_cache,
4580
+ prefix="e",
4581
+ )
4582
+ )
4583
+ path_args.append(f"e_{e_idx}")
4584
+
4585
+ if i_idx != I_FILL:
4586
+ if i_depth > 1:
4587
+ path_args.extend(
4588
+ _get_depth_dirs(
4589
+ item_idx=i_idx,
4590
+ max_per_dir=MAX_ITERS_PER_DIR,
4591
+ max_depth=i_depth,
4592
+ depth_idx_cache=depth_idx_cache,
4593
+ prefix="i",
4594
+ )
4595
+ )
4596
+ path_args.append(f"i_{i_idx}")
4597
+
4598
+ if r_idx != R_FILL:
4599
+ path_args.append(f"r_{r_idx}")
4600
+
4601
+ if path_args:
4602
+ run_dir = exec_path.joinpath(*path_args)
4603
+ elif e_depth == 1:
4604
+ run_dir = exec_path
4605
+ else:
4606
+ run_dir = None
4607
+
4608
+ dirs.append(run_dir)
4609
+
4610
+ return dirs
4611
+
4612
+ @TimeIt.decorator
4613
+ def get_scheduler_job_IDs(self) -> tuple[str, ...]:
4614
+ """Return jobscript scheduler job IDs from all submissions of this workflow."""
4615
+ return tuple(
4616
+ IDs_j for sub_i in self.submissions for IDs_j in sub_i.get_scheduler_job_IDs()
4617
+ )
4618
+
4619
+ @TimeIt.decorator
4620
+ def get_process_IDs(self) -> tuple[int, ...]:
4621
+ """Return jobscript process IDs from all submissions of this workflow."""
4622
+ return tuple(
4623
+ IDs_j for sub_i in self.submissions for IDs_j in sub_i.get_process_IDs()
4624
+ )
4625
+
4626
+ @TimeIt.decorator
4627
+ def list_jobscripts(
4628
+ self,
4629
+ sub_idx: int = 0,
4630
+ max_js: int | None = None,
4631
+ jobscripts: list[int] | None = None,
4632
+ width: int | None = None,
4633
+ ) -> None:
4634
+ """Print a table listing jobscripts and associated information from the specified
4635
+ submission.
4636
+
4637
+ Parameters
4638
+ ----------
4639
+ sub_idx
4640
+ The submission index whose jobscripts are to be displayed.
4641
+ max_js
4642
+ Maximum jobscript index to display. This cannot be specified with `jobscripts`.
4643
+ jobscripts
4644
+ A list of jobscripts to display. This cannot be specified with `max_js`.
4645
+ width
4646
+ Width in characters of the printed table.
4647
+ """
4648
+
4649
+ with self._store.cached_load():
4650
+
4651
+ if max_js is not None and jobscripts is not None:
4652
+ raise ValueError("Do not specify both `max_js` and `jobscripts`.")
4653
+
4654
+ loop_names = [i.name for i in self.loops][::-1]
4655
+ loop_names_panel: rich.panel.Panel | str = ""
4656
+ if loop_names:
4657
+ loop_names_panel = rich.panel.Panel(
4658
+ "\n".join(f"{idx}: {i}" for idx, i in enumerate(loop_names)),
4659
+ title="[b]Loops[/b]",
4660
+ title_align="left",
4661
+ box=rich.box.SIMPLE,
4662
+ )
4663
+
4664
+ table = rich.table.Table(width=width)
4665
+
4666
+ table.add_column("Jobscript", justify="right", style="cyan", no_wrap=True)
4667
+ table.add_column("Acts, Elms", justify="right", style="green")
4668
+ table.add_column("Deps.", style="orange3")
4669
+ table.add_column("Tasks", overflow="fold")
4670
+ table.add_column("Loops")
4671
+
4672
+ sub_js = self.submissions[sub_idx].jobscripts
4673
+ max_js = max_js if max_js is not None else len(sub_js)
4674
+ for js in sub_js:
4675
+ if jobscripts is not None and js.index not in jobscripts:
4676
+ continue
4677
+ if js.index > max_js:
4678
+ break
4679
+ for blk in js.blocks:
4680
+ blk_task_actions = blk.task_actions
4681
+ num_actions = blk_task_actions.shape[0]
4682
+
4683
+ if blk.index == 0:
4684
+ c1 = f"{js.index} - {blk.index}"
4685
+ else:
4686
+ c1 = f"{blk.index}"
4687
+ c3 = f"{num_actions}, {blk.num_elements}"
4688
+
4689
+ deps = "; ".join(f"{i[0],i[1]}" for i in blk.dependencies)
4690
+
4691
+ for blk_t_idx, t_iID in enumerate(blk.task_insert_IDs):
4692
+
4693
+ # loop indices are the same for all actions within a task, so get the
4694
+ # first `task_action` for this task insert ID:
4695
+ for i in blk_task_actions:
4696
+ if i[0] == t_iID:
4697
+ loop_idx = [
4698
+ blk.task_loop_idx[i[2]].get(loop_name_i, "-")
4699
+ for loop_name_i in loop_names
4700
+ ]
4701
+ break
4702
+
4703
+ c2 = self.tasks.get(insert_ID=t_iID).unique_name
4704
+
4705
+ if blk_t_idx > 0:
4706
+ c1 = ""
4707
+ c3 = ""
4708
+ deps = ""
4709
+
4710
+ table.add_row(
4711
+ c1, c3, deps, c2, (" | ".join(f"{i}" for i in loop_idx))
4712
+ )
4713
+
4714
+ table.add_section()
4715
+
4716
+ group = rich.console.Group(
4717
+ rich.text.Text(f"Workflow: {self.name}"),
4718
+ rich.text.Text(f"Submission: {sub_idx}" + ("\n" if loop_names_panel else "")),
4719
+ loop_names_panel,
4720
+ table,
4721
+ )
4722
+ rich_print(group)
4723
+
4724
+ def list_task_jobscripts(
4725
+ self,
4726
+ sub_idx: int = 0,
4727
+ task_names: list[str] | None = None,
4728
+ max_js: int | None = None,
4729
+ width: int | None = None,
4730
+ ):
4731
+ """Print a table listing the jobscripts associated with the specified (or all)
4732
+ tasks for the specified submission.
4733
+
4734
+ Parameters
4735
+ ----------
4736
+ sub_idx
4737
+ The submission index whose jobscripts are to be displayed.
4738
+ task_names
4739
+ List of sub-strings to match to task names. Only matching task names will be
4740
+ included.
4741
+ max_js
4742
+ Maximum jobscript index to display.
4743
+ width
4744
+ Width in characters of the printed table.
4745
+ """
4746
+
4747
+ with self._store.cached_load():
4748
+ loop_names = [i.name for i in self.loops][::-1]
4749
+ loop_names_panel: rich.panel.Panel | str = ""
4750
+ if loop_names:
4751
+ loop_names_panel = rich.panel.Panel(
4752
+ "\n".join(f"{idx}: {i}" for idx, i in enumerate(loop_names)),
4753
+ title="[b]Loops[/b]",
4754
+ title_align="left",
4755
+ box=rich.box.SIMPLE,
4756
+ )
4757
+
4758
+ sub_js = self.submissions[sub_idx].jobscripts
4759
+ all_task_names = {i.insert_ID: i.unique_name for i in self.tasks}
4760
+
4761
+ # filter task names by those matching the specified names
4762
+ matched = all_task_names
4763
+ if task_names:
4764
+ matched = {
4765
+ k: v
4766
+ for k, v in all_task_names.items()
4767
+ if any(i in v for i in task_names)
4768
+ }
4769
+
4770
+ task_jobscripts = defaultdict(list)
4771
+ for js in sub_js:
4772
+ if max_js is not None and js.index > max_js:
4773
+ break
4774
+ for blk in js.blocks:
4775
+ blk_task_actions = blk.task_actions
4776
+ for i in blk.task_insert_IDs:
4777
+ if i in matched:
4778
+ for j in blk_task_actions:
4779
+ if j[0] == i:
4780
+ loop_idx = [
4781
+ blk.task_loop_idx[j[2]].get(loop_name_i, "-")
4782
+ for loop_name_i in loop_names
4783
+ ]
4784
+ break
4785
+ task_jobscripts[i].append((js.index, blk.index, loop_idx))
4786
+
4787
+ table = rich.table.Table(width=width)
4788
+ table.add_column("Task")
4789
+ table.add_column("Jobscripts", style="cyan", no_wrap=True)
4790
+ table.add_column("Loops")
4791
+ for insert_ID_i, jobscripts_i in task_jobscripts.items():
4792
+ for idx, js_j in enumerate(jobscripts_i):
4793
+ js_idx, blk_idx, loop_idx = js_j
4794
+ table.add_row(
4795
+ matched[insert_ID_i] if idx == 0 else "",
4796
+ f"({js_idx}, {blk_idx})",
4797
+ (" | ".join(f"{i}" for i in loop_idx)),
4798
+ )
4799
+ table.add_section()
4800
+
4801
+ group = rich.console.Group(
4802
+ rich.text.Text(f"Workflow: {self.name}"),
4803
+ rich.text.Text(f"Submission: {sub_idx}" + ("\n" if loop_names_panel else "")),
4804
+ loop_names_panel,
4805
+ table,
4806
+ )
4807
+ rich_print(group)
4808
+
4809
+ def get_text_file(self, path: str | Path) -> str:
4810
+ """Retrieve the contents of a text file stored within the workflow."""
4811
+ return self._store.get_text_file(path)
4812
+
4813
+
4814
+ @dataclass
4815
+ class WorkflowBlueprint:
4816
+ """Pre-built workflow templates that are simpler to parameterise.
4817
+ (For example, fitting workflows.)"""
4818
+
4819
+ #: The template inside this blueprint.
4820
+ workflow_template: WorkflowTemplate