hpcflow 0.1.15__py3-none-any.whl → 0.2.0a271__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (275) hide show
  1. hpcflow/__init__.py +2 -11
  2. hpcflow/__pyinstaller/__init__.py +5 -0
  3. hpcflow/__pyinstaller/hook-hpcflow.py +40 -0
  4. hpcflow/_version.py +1 -1
  5. hpcflow/app.py +43 -0
  6. hpcflow/cli.py +2 -461
  7. hpcflow/data/demo_data_manifest/__init__.py +3 -0
  8. hpcflow/data/demo_data_manifest/demo_data_manifest.json +6 -0
  9. hpcflow/data/jinja_templates/test/test_template.txt +8 -0
  10. hpcflow/data/programs/hello_world/README.md +1 -0
  11. hpcflow/data/programs/hello_world/hello_world.c +87 -0
  12. hpcflow/data/programs/hello_world/linux/hello_world +0 -0
  13. hpcflow/data/programs/hello_world/macos/hello_world +0 -0
  14. hpcflow/data/programs/hello_world/win/hello_world.exe +0 -0
  15. hpcflow/data/scripts/__init__.py +1 -0
  16. hpcflow/data/scripts/bad_script.py +2 -0
  17. hpcflow/data/scripts/demo_task_1_generate_t1_infile_1.py +8 -0
  18. hpcflow/data/scripts/demo_task_1_generate_t1_infile_2.py +8 -0
  19. hpcflow/data/scripts/demo_task_1_parse_p3.py +7 -0
  20. hpcflow/data/scripts/do_nothing.py +2 -0
  21. hpcflow/data/scripts/env_specifier_test/input_file_generator_pass_env_spec.py +4 -0
  22. hpcflow/data/scripts/env_specifier_test/main_script_test_pass_env_spec.py +8 -0
  23. hpcflow/data/scripts/env_specifier_test/output_file_parser_pass_env_spec.py +4 -0
  24. hpcflow/data/scripts/env_specifier_test/v1/input_file_generator_basic.py +4 -0
  25. hpcflow/data/scripts/env_specifier_test/v1/main_script_test_direct_in_direct_out.py +7 -0
  26. hpcflow/data/scripts/env_specifier_test/v1/output_file_parser_basic.py +4 -0
  27. hpcflow/data/scripts/env_specifier_test/v2/main_script_test_direct_in_direct_out.py +7 -0
  28. hpcflow/data/scripts/generate_t1_file_01.py +7 -0
  29. hpcflow/data/scripts/import_future_script.py +7 -0
  30. hpcflow/data/scripts/input_file_generator_basic.py +3 -0
  31. hpcflow/data/scripts/input_file_generator_basic_FAIL.py +3 -0
  32. hpcflow/data/scripts/input_file_generator_test_stdout_stderr.py +8 -0
  33. hpcflow/data/scripts/main_script_test_direct_in.py +3 -0
  34. hpcflow/data/scripts/main_script_test_direct_in_direct_out.py +6 -0
  35. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2.py +6 -0
  36. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed.py +6 -0
  37. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed_group.py +7 -0
  38. hpcflow/data/scripts/main_script_test_direct_in_direct_out_3.py +6 -0
  39. hpcflow/data/scripts/main_script_test_direct_in_direct_out_all_iters_test.py +15 -0
  40. hpcflow/data/scripts/main_script_test_direct_in_direct_out_env_spec.py +7 -0
  41. hpcflow/data/scripts/main_script_test_direct_in_direct_out_labels.py +8 -0
  42. hpcflow/data/scripts/main_script_test_direct_in_group_direct_out_3.py +6 -0
  43. hpcflow/data/scripts/main_script_test_direct_in_group_one_fail_direct_out_3.py +6 -0
  44. hpcflow/data/scripts/main_script_test_direct_sub_param_in_direct_out.py +6 -0
  45. hpcflow/data/scripts/main_script_test_hdf5_in_obj.py +12 -0
  46. hpcflow/data/scripts/main_script_test_hdf5_in_obj_2.py +12 -0
  47. hpcflow/data/scripts/main_script_test_hdf5_in_obj_group.py +12 -0
  48. hpcflow/data/scripts/main_script_test_hdf5_out_obj.py +11 -0
  49. hpcflow/data/scripts/main_script_test_json_and_direct_in_json_out.py +14 -0
  50. hpcflow/data/scripts/main_script_test_json_in_json_and_direct_out.py +17 -0
  51. hpcflow/data/scripts/main_script_test_json_in_json_out.py +14 -0
  52. hpcflow/data/scripts/main_script_test_json_in_json_out_labels.py +16 -0
  53. hpcflow/data/scripts/main_script_test_json_in_obj.py +12 -0
  54. hpcflow/data/scripts/main_script_test_json_out_FAIL.py +3 -0
  55. hpcflow/data/scripts/main_script_test_json_out_obj.py +10 -0
  56. hpcflow/data/scripts/main_script_test_json_sub_param_in_json_out_labels.py +16 -0
  57. hpcflow/data/scripts/main_script_test_shell_env_vars.py +12 -0
  58. hpcflow/data/scripts/main_script_test_std_out_std_err.py +6 -0
  59. hpcflow/data/scripts/output_file_parser_basic.py +3 -0
  60. hpcflow/data/scripts/output_file_parser_basic_FAIL.py +7 -0
  61. hpcflow/data/scripts/output_file_parser_test_stdout_stderr.py +8 -0
  62. hpcflow/data/scripts/parse_t1_file_01.py +4 -0
  63. hpcflow/data/scripts/script_exit_test.py +5 -0
  64. hpcflow/data/template_components/__init__.py +1 -0
  65. hpcflow/data/template_components/command_files.yaml +26 -0
  66. hpcflow/data/template_components/environments.yaml +13 -0
  67. hpcflow/data/template_components/parameters.yaml +14 -0
  68. hpcflow/data/template_components/task_schemas.yaml +139 -0
  69. hpcflow/data/workflows/workflow_1.yaml +5 -0
  70. hpcflow/examples.ipynb +1037 -0
  71. hpcflow/sdk/__init__.py +149 -0
  72. hpcflow/sdk/app.py +4266 -0
  73. hpcflow/sdk/cli.py +1479 -0
  74. hpcflow/sdk/cli_common.py +385 -0
  75. hpcflow/sdk/config/__init__.py +5 -0
  76. hpcflow/sdk/config/callbacks.py +246 -0
  77. hpcflow/sdk/config/cli.py +388 -0
  78. hpcflow/sdk/config/config.py +1410 -0
  79. hpcflow/sdk/config/config_file.py +501 -0
  80. hpcflow/sdk/config/errors.py +272 -0
  81. hpcflow/sdk/config/types.py +150 -0
  82. hpcflow/sdk/core/__init__.py +38 -0
  83. hpcflow/sdk/core/actions.py +3857 -0
  84. hpcflow/sdk/core/app_aware.py +25 -0
  85. hpcflow/sdk/core/cache.py +224 -0
  86. hpcflow/sdk/core/command_files.py +814 -0
  87. hpcflow/sdk/core/commands.py +424 -0
  88. hpcflow/sdk/core/element.py +2071 -0
  89. hpcflow/sdk/core/enums.py +221 -0
  90. hpcflow/sdk/core/environment.py +256 -0
  91. hpcflow/sdk/core/errors.py +1043 -0
  92. hpcflow/sdk/core/execute.py +207 -0
  93. hpcflow/sdk/core/json_like.py +809 -0
  94. hpcflow/sdk/core/loop.py +1320 -0
  95. hpcflow/sdk/core/loop_cache.py +282 -0
  96. hpcflow/sdk/core/object_list.py +933 -0
  97. hpcflow/sdk/core/parameters.py +3371 -0
  98. hpcflow/sdk/core/rule.py +196 -0
  99. hpcflow/sdk/core/run_dir_files.py +57 -0
  100. hpcflow/sdk/core/skip_reason.py +7 -0
  101. hpcflow/sdk/core/task.py +3792 -0
  102. hpcflow/sdk/core/task_schema.py +993 -0
  103. hpcflow/sdk/core/test_utils.py +538 -0
  104. hpcflow/sdk/core/types.py +447 -0
  105. hpcflow/sdk/core/utils.py +1207 -0
  106. hpcflow/sdk/core/validation.py +87 -0
  107. hpcflow/sdk/core/values.py +477 -0
  108. hpcflow/sdk/core/workflow.py +4820 -0
  109. hpcflow/sdk/core/zarr_io.py +206 -0
  110. hpcflow/sdk/data/__init__.py +13 -0
  111. hpcflow/sdk/data/config_file_schema.yaml +34 -0
  112. hpcflow/sdk/data/config_schema.yaml +260 -0
  113. hpcflow/sdk/data/environments_spec_schema.yaml +21 -0
  114. hpcflow/sdk/data/files_spec_schema.yaml +5 -0
  115. hpcflow/sdk/data/parameters_spec_schema.yaml +7 -0
  116. hpcflow/sdk/data/task_schema_spec_schema.yaml +3 -0
  117. hpcflow/sdk/data/workflow_spec_schema.yaml +22 -0
  118. hpcflow/sdk/demo/__init__.py +3 -0
  119. hpcflow/sdk/demo/cli.py +242 -0
  120. hpcflow/sdk/helper/__init__.py +3 -0
  121. hpcflow/sdk/helper/cli.py +137 -0
  122. hpcflow/sdk/helper/helper.py +300 -0
  123. hpcflow/sdk/helper/watcher.py +192 -0
  124. hpcflow/sdk/log.py +288 -0
  125. hpcflow/sdk/persistence/__init__.py +18 -0
  126. hpcflow/sdk/persistence/base.py +2817 -0
  127. hpcflow/sdk/persistence/defaults.py +6 -0
  128. hpcflow/sdk/persistence/discovery.py +39 -0
  129. hpcflow/sdk/persistence/json.py +954 -0
  130. hpcflow/sdk/persistence/pending.py +948 -0
  131. hpcflow/sdk/persistence/store_resource.py +203 -0
  132. hpcflow/sdk/persistence/types.py +309 -0
  133. hpcflow/sdk/persistence/utils.py +73 -0
  134. hpcflow/sdk/persistence/zarr.py +2388 -0
  135. hpcflow/sdk/runtime.py +320 -0
  136. hpcflow/sdk/submission/__init__.py +3 -0
  137. hpcflow/sdk/submission/enums.py +70 -0
  138. hpcflow/sdk/submission/jobscript.py +2379 -0
  139. hpcflow/sdk/submission/schedulers/__init__.py +281 -0
  140. hpcflow/sdk/submission/schedulers/direct.py +233 -0
  141. hpcflow/sdk/submission/schedulers/sge.py +376 -0
  142. hpcflow/sdk/submission/schedulers/slurm.py +598 -0
  143. hpcflow/sdk/submission/schedulers/utils.py +25 -0
  144. hpcflow/sdk/submission/shells/__init__.py +52 -0
  145. hpcflow/sdk/submission/shells/base.py +229 -0
  146. hpcflow/sdk/submission/shells/bash.py +504 -0
  147. hpcflow/sdk/submission/shells/os_version.py +115 -0
  148. hpcflow/sdk/submission/shells/powershell.py +352 -0
  149. hpcflow/sdk/submission/submission.py +1402 -0
  150. hpcflow/sdk/submission/types.py +140 -0
  151. hpcflow/sdk/typing.py +194 -0
  152. hpcflow/sdk/utils/arrays.py +69 -0
  153. hpcflow/sdk/utils/deferred_file.py +55 -0
  154. hpcflow/sdk/utils/hashing.py +16 -0
  155. hpcflow/sdk/utils/patches.py +31 -0
  156. hpcflow/sdk/utils/strings.py +69 -0
  157. hpcflow/tests/api/test_api.py +32 -0
  158. hpcflow/tests/conftest.py +123 -0
  159. hpcflow/tests/data/__init__.py +0 -0
  160. hpcflow/tests/data/benchmark_N_elements.yaml +6 -0
  161. hpcflow/tests/data/benchmark_script_runner.yaml +26 -0
  162. hpcflow/tests/data/multi_path_sequences.yaml +29 -0
  163. hpcflow/tests/data/workflow_1.json +10 -0
  164. hpcflow/tests/data/workflow_1.yaml +5 -0
  165. hpcflow/tests/data/workflow_1_slurm.yaml +8 -0
  166. hpcflow/tests/data/workflow_1_wsl.yaml +8 -0
  167. hpcflow/tests/data/workflow_test_run_abort.yaml +42 -0
  168. hpcflow/tests/jinja_templates/test_jinja_templates.py +161 -0
  169. hpcflow/tests/programs/test_programs.py +180 -0
  170. hpcflow/tests/schedulers/direct_linux/test_direct_linux_submission.py +12 -0
  171. hpcflow/tests/schedulers/sge/test_sge_submission.py +36 -0
  172. hpcflow/tests/schedulers/slurm/test_slurm_submission.py +14 -0
  173. hpcflow/tests/scripts/test_input_file_generators.py +282 -0
  174. hpcflow/tests/scripts/test_main_scripts.py +1361 -0
  175. hpcflow/tests/scripts/test_non_snippet_script.py +46 -0
  176. hpcflow/tests/scripts/test_ouput_file_parsers.py +353 -0
  177. hpcflow/tests/shells/wsl/test_wsl_submission.py +14 -0
  178. hpcflow/tests/unit/test_action.py +1066 -0
  179. hpcflow/tests/unit/test_action_rule.py +24 -0
  180. hpcflow/tests/unit/test_app.py +132 -0
  181. hpcflow/tests/unit/test_cache.py +46 -0
  182. hpcflow/tests/unit/test_cli.py +172 -0
  183. hpcflow/tests/unit/test_command.py +377 -0
  184. hpcflow/tests/unit/test_config.py +195 -0
  185. hpcflow/tests/unit/test_config_file.py +162 -0
  186. hpcflow/tests/unit/test_element.py +666 -0
  187. hpcflow/tests/unit/test_element_iteration.py +88 -0
  188. hpcflow/tests/unit/test_element_set.py +158 -0
  189. hpcflow/tests/unit/test_group.py +115 -0
  190. hpcflow/tests/unit/test_input_source.py +1479 -0
  191. hpcflow/tests/unit/test_input_value.py +398 -0
  192. hpcflow/tests/unit/test_jobscript_unit.py +757 -0
  193. hpcflow/tests/unit/test_json_like.py +1247 -0
  194. hpcflow/tests/unit/test_loop.py +2674 -0
  195. hpcflow/tests/unit/test_meta_task.py +325 -0
  196. hpcflow/tests/unit/test_multi_path_sequences.py +259 -0
  197. hpcflow/tests/unit/test_object_list.py +116 -0
  198. hpcflow/tests/unit/test_parameter.py +243 -0
  199. hpcflow/tests/unit/test_persistence.py +664 -0
  200. hpcflow/tests/unit/test_resources.py +243 -0
  201. hpcflow/tests/unit/test_run.py +286 -0
  202. hpcflow/tests/unit/test_run_directories.py +29 -0
  203. hpcflow/tests/unit/test_runtime.py +9 -0
  204. hpcflow/tests/unit/test_schema_input.py +372 -0
  205. hpcflow/tests/unit/test_shell.py +129 -0
  206. hpcflow/tests/unit/test_slurm.py +39 -0
  207. hpcflow/tests/unit/test_submission.py +502 -0
  208. hpcflow/tests/unit/test_task.py +2560 -0
  209. hpcflow/tests/unit/test_task_schema.py +182 -0
  210. hpcflow/tests/unit/test_utils.py +616 -0
  211. hpcflow/tests/unit/test_value_sequence.py +549 -0
  212. hpcflow/tests/unit/test_values.py +91 -0
  213. hpcflow/tests/unit/test_workflow.py +827 -0
  214. hpcflow/tests/unit/test_workflow_template.py +186 -0
  215. hpcflow/tests/unit/utils/test_arrays.py +40 -0
  216. hpcflow/tests/unit/utils/test_deferred_file_writer.py +34 -0
  217. hpcflow/tests/unit/utils/test_hashing.py +65 -0
  218. hpcflow/tests/unit/utils/test_patches.py +5 -0
  219. hpcflow/tests/unit/utils/test_redirect_std.py +50 -0
  220. hpcflow/tests/unit/utils/test_strings.py +97 -0
  221. hpcflow/tests/workflows/__init__.py +0 -0
  222. hpcflow/tests/workflows/test_directory_structure.py +31 -0
  223. hpcflow/tests/workflows/test_jobscript.py +355 -0
  224. hpcflow/tests/workflows/test_run_status.py +198 -0
  225. hpcflow/tests/workflows/test_skip_downstream.py +696 -0
  226. hpcflow/tests/workflows/test_submission.py +140 -0
  227. hpcflow/tests/workflows/test_workflows.py +564 -0
  228. hpcflow/tests/workflows/test_zip.py +18 -0
  229. hpcflow/viz_demo.ipynb +6794 -0
  230. hpcflow-0.2.0a271.dist-info/LICENSE +375 -0
  231. hpcflow-0.2.0a271.dist-info/METADATA +65 -0
  232. hpcflow-0.2.0a271.dist-info/RECORD +237 -0
  233. {hpcflow-0.1.15.dist-info → hpcflow-0.2.0a271.dist-info}/WHEEL +4 -5
  234. hpcflow-0.2.0a271.dist-info/entry_points.txt +6 -0
  235. hpcflow/api.py +0 -490
  236. hpcflow/archive/archive.py +0 -307
  237. hpcflow/archive/cloud/cloud.py +0 -45
  238. hpcflow/archive/cloud/errors.py +0 -9
  239. hpcflow/archive/cloud/providers/dropbox.py +0 -427
  240. hpcflow/archive/errors.py +0 -5
  241. hpcflow/base_db.py +0 -4
  242. hpcflow/config.py +0 -233
  243. hpcflow/copytree.py +0 -66
  244. hpcflow/data/examples/_config.yml +0 -14
  245. hpcflow/data/examples/damask/demo/1.run.yml +0 -4
  246. hpcflow/data/examples/damask/demo/2.process.yml +0 -29
  247. hpcflow/data/examples/damask/demo/geom.geom +0 -2052
  248. hpcflow/data/examples/damask/demo/load.load +0 -1
  249. hpcflow/data/examples/damask/demo/material.config +0 -185
  250. hpcflow/data/examples/damask/inputs/geom.geom +0 -2052
  251. hpcflow/data/examples/damask/inputs/load.load +0 -1
  252. hpcflow/data/examples/damask/inputs/material.config +0 -185
  253. hpcflow/data/examples/damask/profiles/_variable_lookup.yml +0 -21
  254. hpcflow/data/examples/damask/profiles/damask.yml +0 -4
  255. hpcflow/data/examples/damask/profiles/damask_process.yml +0 -8
  256. hpcflow/data/examples/damask/profiles/damask_run.yml +0 -5
  257. hpcflow/data/examples/damask/profiles/default.yml +0 -6
  258. hpcflow/data/examples/thinking.yml +0 -177
  259. hpcflow/errors.py +0 -2
  260. hpcflow/init_db.py +0 -37
  261. hpcflow/models.py +0 -2595
  262. hpcflow/nesting.py +0 -9
  263. hpcflow/profiles.py +0 -455
  264. hpcflow/project.py +0 -81
  265. hpcflow/scheduler.py +0 -322
  266. hpcflow/utils.py +0 -103
  267. hpcflow/validation.py +0 -166
  268. hpcflow/variables.py +0 -543
  269. hpcflow-0.1.15.dist-info/METADATA +0 -168
  270. hpcflow-0.1.15.dist-info/RECORD +0 -45
  271. hpcflow-0.1.15.dist-info/entry_points.txt +0 -8
  272. hpcflow-0.1.15.dist-info/top_level.txt +0 -1
  273. /hpcflow/{archive → data/jinja_templates}/__init__.py +0 -0
  274. /hpcflow/{archive/cloud → data/programs}/__init__.py +0 -0
  275. /hpcflow/{archive/cloud/providers → data/workflows}/__init__.py +0 -0
@@ -0,0 +1,2817 @@
1
+ """
2
+ Base persistence models.
3
+
4
+ Store* classes represent the element-metadata in the store, in a store-agnostic way.
5
+ """
6
+
7
+ from __future__ import annotations
8
+ from abc import ABC, abstractmethod
9
+ from collections import defaultdict
10
+ import contextlib
11
+ import copy
12
+ from dataclasses import dataclass, field
13
+ import enum
14
+ from logging import Logger
15
+ from functools import wraps
16
+ import os
17
+ from pathlib import Path
18
+ import shutil
19
+ import socket
20
+ import time
21
+ from typing import Generic, TypeVar, cast, overload, TYPE_CHECKING
22
+
23
+ import numpy as np
24
+
25
+ from hpcflow.sdk.core.utils import (
26
+ flatten,
27
+ get_in_container,
28
+ get_relative_path,
29
+ remap,
30
+ reshape,
31
+ set_in_container,
32
+ normalise_timestamp,
33
+ parse_timestamp,
34
+ current_timestamp,
35
+ )
36
+ from hpcflow.sdk.core.errors import ParametersMetadataReadOnlyError
37
+ from hpcflow.sdk.submission.submission import (
38
+ JOBSCRIPT_SUBMIT_TIME_KEYS,
39
+ SUBMISSION_SUBMIT_TIME_KEYS,
40
+ )
41
+ from hpcflow.sdk.utils.strings import shorten_list_str
42
+ from hpcflow.sdk.log import TimeIt
43
+ from hpcflow.sdk.typing import hydrate
44
+ from hpcflow.sdk.persistence.pending import PendingChanges
45
+ from hpcflow.sdk.persistence.types import (
46
+ AnySTask,
47
+ AnySElement,
48
+ AnySElementIter,
49
+ AnySEAR,
50
+ AnySParameter,
51
+ )
52
+
53
+ if TYPE_CHECKING:
54
+ from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence
55
+ from contextlib import AbstractContextManager
56
+ from datetime import datetime
57
+ from typing import Any, ClassVar, Final, Literal
58
+ from typing_extensions import Self, TypeIs
59
+ from fsspec import AbstractFileSystem # type: ignore
60
+ from numpy.typing import NDArray
61
+ from .pending import CommitResourceMap
62
+ from .store_resource import StoreResource
63
+ from .types import (
64
+ EncodedStoreParameter,
65
+ File,
66
+ FileDescriptor,
67
+ LoopDescriptor,
68
+ Metadata,
69
+ ParameterTypes,
70
+ PersistenceCache,
71
+ StoreCreationInfo,
72
+ TemplateMeta,
73
+ TypeLookup,
74
+ IterableParam,
75
+ )
76
+ from .zarr import ZarrAttrsDict
77
+ from ..app import BaseApp
78
+ from ..typing import DataIndex, PathLike, ParamSource
79
+ from ..core.json_like import JSONed, JSONDocument
80
+ from ..core.parameters import ParameterValue
81
+ from ..core.workflow import Workflow
82
+ from ..submission.types import VersionInfo, ResolvedJobscriptBlockDependencies
83
+
84
+ T = TypeVar("T")
85
+ #: Type of the serialized form.
86
+ SerFormT = TypeVar("SerFormT")
87
+ #: Type of the encoding and decoding context.
88
+ ContextT = TypeVar("ContextT")
89
+
90
+ PRIMITIVES = (
91
+ int,
92
+ float,
93
+ str,
94
+ type(None),
95
+ )
96
+
97
+ TEMPLATE_COMP_TYPES = (
98
+ "parameters",
99
+ "command_files",
100
+ "environments",
101
+ "task_schemas",
102
+ )
103
+
104
+ PARAM_DATA_NOT_SET: Final[int] = 0
105
+
106
+
107
+ def update_param_source_dict(source: ParamSource, update: ParamSource) -> ParamSource:
108
+ """
109
+ Combine two dicts into a new dict that is ordered on its keys.
110
+ """
111
+ return cast("ParamSource", dict(sorted({**source, **update}.items())))
112
+
113
+
114
+ def writes_parameter_data(func: Callable):
115
+ """Decorator function that should wrap `PersistentStore` methods that write
116
+ parameter-associated data.
117
+
118
+ Notes
119
+ -----
120
+ This decorator checks that the parameters-metadata cache is not in use, which should
121
+ not be used during writing of parameter-associated data.
122
+ """
123
+
124
+ @wraps(func)
125
+ def inner(self, *args, **kwargs):
126
+ if self._use_parameters_metadata_cache:
127
+ raise ParametersMetadataReadOnlyError(
128
+ "Cannot use the `parameters_metadata_cache` when writing parameter-"
129
+ "associated data!"
130
+ )
131
+ return func(self, *args, **kwargs)
132
+
133
+ return inner
134
+
135
+
136
+ @dataclass
137
+ class PersistentStoreFeatures:
138
+ """
139
+ Represents the features provided by a persistent store.
140
+
141
+ Parameters
142
+ ----------
143
+ create:
144
+ If True, a new workflow can be created using this store.
145
+ edit:
146
+ If True, the workflow can be modified.
147
+ jobscript_parallelism:
148
+ If True, the store supports workflows running multiple independent jobscripts
149
+ simultaneously.
150
+ EAR_parallelism:
151
+ If True, the store supports workflows running multiple EARs simultaneously.
152
+ schedulers:
153
+ If True, the store supports submitting workflows to a scheduler.
154
+ submission:
155
+ If True, the store supports submission. If False, the store can be considered to
156
+ be an archive, which would need transforming to another store type before
157
+ submission.
158
+ """
159
+
160
+ #: Whether a new workflow can be created using this store.
161
+ create: bool = False
162
+ #: Whether the workflow can be modified.
163
+ edit: bool = False
164
+ #: Whetherthe store supports workflows running multiple independent jobscripts
165
+ #: simultaneously.
166
+ jobscript_parallelism: bool = False
167
+ #: Whether the store supports workflows running multiple EARs simultaneously.
168
+ EAR_parallelism: bool = False
169
+ #: Whether the store supports submitting workflows to a scheduler.
170
+ schedulers: bool = False
171
+ #: Whether the store supports submission. If not, the store can be considered to
172
+ #: be an archive, which would need transforming to another store type before
173
+ #: submission.
174
+ submission: bool = False
175
+
176
+
177
+ @dataclass
178
+ class StoreTask(Generic[SerFormT]):
179
+ """
180
+ Represents a task in a persistent store.
181
+
182
+ Parameters
183
+ ----------
184
+ id_:
185
+ The ID of the task.
186
+ index:
187
+ The index of the task within its workflow.
188
+ is_pending:
189
+ Whether the task has changes not yet persisted.
190
+ element_IDs:
191
+ The IDs of elements in the task.
192
+ task_template:
193
+ Description of the template for the task.
194
+ """
195
+
196
+ # This would be in the docstring except it renders really wrongly!
197
+ # Type Parameters
198
+ # ---------------
199
+ # SerFormT
200
+ # Type of the serialized form.
201
+
202
+ #: The ID of the task.
203
+ id_: int
204
+ #: The index of the task within its workflow.
205
+ index: int
206
+ #: Whether the task has changes not yet persisted.
207
+ is_pending: bool
208
+ #: The IDs of elements in the task.
209
+ element_IDs: list[int]
210
+ #: Description of the template for the task.
211
+ task_template: Mapping[str, Any] | None = None
212
+
213
+ @abstractmethod
214
+ def encode(self) -> tuple[int, SerFormT, dict[str, Any]]:
215
+ """Prepare store task data for the persistent store."""
216
+
217
+ @classmethod
218
+ @abstractmethod
219
+ def decode(cls, task_dat: SerFormT) -> Self:
220
+ """Initialise a `StoreTask` from store task data
221
+
222
+ Note: the `task_template` is only needed for encoding because it is retrieved as
223
+ part of the `WorkflowTemplate` so we don't need to load it when decoding.
224
+
225
+ """
226
+
227
+ @TimeIt.decorator
228
+ def append_element_IDs(self, pend_IDs: list[int]) -> Self:
229
+ """Return a copy, with additional element IDs."""
230
+ return self.__class__(
231
+ id_=self.id_,
232
+ index=self.index,
233
+ is_pending=self.is_pending,
234
+ element_IDs=[*self.element_IDs, *pend_IDs],
235
+ task_template=self.task_template,
236
+ )
237
+
238
+
239
+ @dataclass
240
+ class StoreElement(Generic[SerFormT, ContextT]):
241
+ """
242
+ Represents an element in a persistent store.
243
+
244
+ Parameters
245
+ ----------
246
+ id_:
247
+ The ID of the element.
248
+ is_pending:
249
+ Whether the element has changes not yet persisted.
250
+ index:
251
+ Index of the element within its parent task.
252
+ es_idx:
253
+ Index of the element set containing this element.
254
+ seq_idx:
255
+ Value sequence index map.
256
+ src_idx:
257
+ Data source index map.
258
+ task_ID:
259
+ ID of the task that contains this element.
260
+ iteration_IDs:
261
+ IDs of element-iterations that belong to this element.
262
+ """
263
+
264
+ # These would be in the docstring except they render really wrongly!
265
+ # Type Parameters
266
+ # ---------------
267
+ # SerFormT
268
+ # Type of the serialized form.
269
+ # ContextT
270
+ # Type of the encoding and decoding context.
271
+
272
+ #: The ID of the element.
273
+ id_: int
274
+ #: Whether the element has changes not yet persisted.
275
+ is_pending: bool
276
+ #: Index of the element within its parent task.
277
+ index: int
278
+ #: Index of the element set containing this element.
279
+ es_idx: int
280
+ #: Value sequence index map.
281
+ seq_idx: dict[str, int]
282
+ #: Data source index map.
283
+ src_idx: dict[str, int]
284
+ #: ID of the task that contains this element.
285
+ task_ID: int
286
+ #: IDs of element-iterations that belong to this element.
287
+ iteration_IDs: list[int]
288
+
289
+ @abstractmethod
290
+ def encode(self, context: ContextT) -> SerFormT:
291
+ """Prepare store element data for the persistent store."""
292
+
293
+ @classmethod
294
+ @abstractmethod
295
+ def decode(cls, elem_dat: SerFormT, context: ContextT) -> Self:
296
+ """Initialise a `StoreElement` from store element data"""
297
+
298
+ def to_dict(self, iters) -> dict[str, Any]:
299
+ """Prepare data for the user-facing `Element` object."""
300
+ return {
301
+ "id_": self.id_,
302
+ "is_pending": self.is_pending,
303
+ "index": self.index,
304
+ "es_idx": self.es_idx,
305
+ "seq_idx": self.seq_idx,
306
+ "src_idx": self.src_idx,
307
+ "iteration_IDs": self.iteration_IDs,
308
+ "task_ID": self.task_ID,
309
+ "iterations": iters,
310
+ }
311
+
312
+ @TimeIt.decorator
313
+ def append_iteration_IDs(self, pend_IDs: Iterable[int]) -> Self:
314
+ """Return a copy, with additional iteration IDs."""
315
+ iter_IDs = [*self.iteration_IDs, *pend_IDs]
316
+ return self.__class__(
317
+ id_=self.id_,
318
+ is_pending=self.is_pending,
319
+ index=self.index,
320
+ es_idx=self.es_idx,
321
+ seq_idx=self.seq_idx,
322
+ src_idx=self.src_idx,
323
+ task_ID=self.task_ID,
324
+ iteration_IDs=iter_IDs,
325
+ )
326
+
327
+
328
+ @dataclass
329
+ class StoreElementIter(Generic[SerFormT, ContextT]):
330
+ """
331
+ Represents an element iteration in a persistent store.
332
+
333
+ Parameters
334
+ ----------
335
+ id_:
336
+ The ID of this element iteration.
337
+ is_pending:
338
+ Whether the element iteration has changes not yet persisted.
339
+ element_ID:
340
+ Which element is an iteration for.
341
+ EARs_initialised:
342
+ Whether EARs have been initialised for this element iteration.
343
+ EAR_IDs:
344
+ Maps task schema action indices to EARs by ID.
345
+ data_idx:
346
+ Overall data index for the element-iteration, which maps parameter names to
347
+ parameter data indices.
348
+ schema_parameters:
349
+ List of parameters defined by the associated task schema.
350
+ loop_idx:
351
+ What loops are being handled here and where they're up to.
352
+ """
353
+
354
+ # These would be in the docstring except they render really wrongly!
355
+ # Type Parameters
356
+ # ---------------
357
+ # SerFormT
358
+ # Type of the serialized form.
359
+ # ContextT
360
+ # Type of the encoding and decoding context.
361
+
362
+ #: The ID of this element iteration.
363
+ id_: int
364
+ #: Whether the element iteration has changes not yet persisted.
365
+ is_pending: bool
366
+ #: Which element is an iteration for.
367
+ element_ID: int
368
+ #: Whether EARs have been initialised for this element iteration.
369
+ EARs_initialised: bool
370
+ #: Maps task schema action indices to EARs by ID.
371
+ EAR_IDs: dict[int, list[int]] | None
372
+ #: Overall data index for the element-iteration, which maps parameter names to
373
+ #: parameter data indices.
374
+ data_idx: DataIndex
375
+ #: List of parameters defined by the associated task schema.
376
+ schema_parameters: list[str]
377
+ #: What loops are being handled here and where they're up to.
378
+ loop_idx: Mapping[str, int] = field(default_factory=dict)
379
+
380
+ @abstractmethod
381
+ def encode(self, context: ContextT) -> SerFormT:
382
+ """Prepare store element iteration data for the persistent store."""
383
+
384
+ @classmethod
385
+ @abstractmethod
386
+ def decode(cls, iter_dat: SerFormT, context: ContextT) -> Self:
387
+ """Initialise a `StoreElementIter` from persistent store element iteration data"""
388
+
389
+ def to_dict(self, EARs: dict[int, dict[str, Any]] | None) -> dict[str, Any]:
390
+ """Prepare data for the user-facing `ElementIteration` object."""
391
+ return {
392
+ "id_": self.id_,
393
+ "is_pending": self.is_pending,
394
+ "element_ID": self.element_ID,
395
+ "EAR_IDs": self.EAR_IDs,
396
+ "data_idx": self.data_idx,
397
+ "schema_parameters": self.schema_parameters,
398
+ "EARs": EARs,
399
+ "EARs_initialised": self.EARs_initialised,
400
+ "loop_idx": dict(self.loop_idx),
401
+ }
402
+
403
+ @TimeIt.decorator
404
+ def append_EAR_IDs(self, pend_IDs: Mapping[int, Sequence[int]]) -> Self:
405
+ """Return a copy, with additional EAR IDs."""
406
+
407
+ EAR_IDs = copy.deepcopy(self.EAR_IDs) or {}
408
+ for act_idx, IDs_i in pend_IDs.items():
409
+ EAR_IDs.setdefault(act_idx, []).extend(IDs_i)
410
+
411
+ return self.__class__(
412
+ id_=self.id_,
413
+ is_pending=self.is_pending,
414
+ element_ID=self.element_ID,
415
+ EAR_IDs=EAR_IDs,
416
+ data_idx=self.data_idx,
417
+ schema_parameters=self.schema_parameters,
418
+ loop_idx=self.loop_idx,
419
+ EARs_initialised=self.EARs_initialised,
420
+ )
421
+
422
+ @TimeIt.decorator
423
+ def update_loop_idx(self, loop_idx: Mapping[str, int]) -> Self:
424
+ """Return a copy, with the loop index updated."""
425
+ loop_idx_new = dict(self.loop_idx)
426
+ loop_idx_new.update(loop_idx)
427
+ return self.__class__(
428
+ id_=self.id_,
429
+ is_pending=self.is_pending,
430
+ element_ID=self.element_ID,
431
+ EAR_IDs=self.EAR_IDs,
432
+ data_idx=self.data_idx,
433
+ schema_parameters=self.schema_parameters,
434
+ EARs_initialised=self.EARs_initialised,
435
+ loop_idx=loop_idx_new,
436
+ )
437
+
438
+ @TimeIt.decorator
439
+ def set_EARs_initialised(self) -> Self:
440
+ """Return a copy with `EARs_initialised` set to `True`."""
441
+ return self.__class__(
442
+ id_=self.id_,
443
+ is_pending=self.is_pending,
444
+ element_ID=self.element_ID,
445
+ EAR_IDs=self.EAR_IDs,
446
+ data_idx=self.data_idx,
447
+ schema_parameters=self.schema_parameters,
448
+ loop_idx=self.loop_idx,
449
+ EARs_initialised=True,
450
+ )
451
+
452
+ @TimeIt.decorator
453
+ def update_data_idx(self: AnySElementIter, data_idx: DataIndex) -> AnySElementIter:
454
+ """Return a copy with an updated `data_idx`.
455
+
456
+ The existing data index is updated, not overwritten.
457
+
458
+ """
459
+ new_data_idx = copy.deepcopy(self.data_idx)
460
+ new_data_idx.update(data_idx)
461
+ return self.__class__(
462
+ id_=self.id_,
463
+ is_pending=self.is_pending,
464
+ element_ID=self.element_ID,
465
+ EAR_IDs=self.EAR_IDs,
466
+ data_idx=new_data_idx,
467
+ schema_parameters=self.schema_parameters,
468
+ loop_idx=self.loop_idx,
469
+ EARs_initialised=self.EARs_initialised,
470
+ )
471
+
472
+
473
+ @dataclass
474
+ class StoreEAR(Generic[SerFormT, ContextT]):
475
+ """
476
+ Represents an element action run in a persistent store.
477
+
478
+ Parameters
479
+ ----------
480
+ id_:
481
+ The ID of this element action run.
482
+ is_pending:
483
+ Whether the element action run has changes not yet persisted.
484
+ elem_iter_ID:
485
+ What element iteration owns this EAR.
486
+ action_idx:
487
+ The task schema action associated with this EAR.
488
+ commands_idx:
489
+ The indices of the commands in the EAR.
490
+ data_idx:
491
+ Maps parameter names within this EAR to parameter data indices.
492
+ submission_idx:
493
+ Which submission contained this EAR, if known.
494
+ skip:
495
+ Whether to skip this EAR.
496
+ success:
497
+ Whether this EAR was successful, if known.
498
+ start_time:
499
+ When this EAR started, if known.
500
+ end_time:
501
+ When this EAR finished, if known.
502
+ snapshot_start:
503
+ Snapshot of files at EAR start, if recorded.
504
+ snapshot_end:
505
+ Snapshot of files at EAR end, if recorded.
506
+ exit_code:
507
+ The exit code of the underlying executable, if known.
508
+ metadata:
509
+ Metadata concerning e.g. the state of the EAR.
510
+ run_hostname:
511
+ Where this EAR was submitted to run, if known.
512
+ """
513
+
514
+ # These would be in the docstring except they render really wrongly!
515
+ # Type Parameters
516
+ # ---------------
517
+ # SerFormT
518
+ # Type of the serialized form.
519
+ # ContextT
520
+ # Type of the encoding and decoding context.
521
+
522
+ #: The ID of this element action run.
523
+ id_: int
524
+ #: Whether the element action run has changes not yet persisted.
525
+ is_pending: bool
526
+ #: What element iteration owns this EAR.
527
+ elem_iter_ID: int
528
+ #: The task schema action associated with this EAR.
529
+ action_idx: int
530
+ #: The indices of the commands in the EAR.
531
+ commands_idx: list[int]
532
+ #: Maps parameter names within this EAR to parameter data indices.
533
+ data_idx: DataIndex
534
+ #: Which submission contained this EAR, if known.
535
+ submission_idx: int | None = None
536
+ #: Run ID whose commands can be used for this run (may be this run's ID).
537
+ commands_file_ID: int | None = None
538
+ #: Whether to skip this EAR.
539
+ skip: int = 0
540
+ #: Whether this EAR was successful, if known.
541
+ success: bool | None = None
542
+ #: When this EAR started, if known.
543
+ start_time: datetime | None = None
544
+ #: When this EAR finished, if known.
545
+ end_time: datetime | None = None
546
+ #: Snapshot of files at EAR start, if recorded.
547
+ snapshot_start: dict[str, Any] | None = None
548
+ #: Snapshot of files at EAR end, if recorded.
549
+ snapshot_end: dict[str, Any] | None = None
550
+ #: The exit code of the underlying executable, if known.
551
+ exit_code: int | None = None
552
+ #: Metadata concerning e.g. the state of the EAR.
553
+ metadata: Metadata | None = None
554
+ #: Where this EAR was submitted to run, if known.
555
+ run_hostname: str | None = None
556
+ port_number: int | None = None
557
+
558
+ @staticmethod
559
+ def _encode_datetime(dt: datetime | None, ts_fmt: str) -> str | None:
560
+ return dt.strftime(ts_fmt) if dt else None
561
+
562
+ @staticmethod
563
+ def _decode_datetime(dt_str: str | None, ts_fmt: str) -> datetime | None:
564
+ return parse_timestamp(dt_str, ts_fmt) if dt_str else None
565
+
566
+ @abstractmethod
567
+ def encode(self, ts_fmt: str, context: ContextT) -> SerFormT:
568
+ """Prepare store EAR data for the persistent store."""
569
+
570
+ @classmethod
571
+ @abstractmethod
572
+ def decode(cls, EAR_dat: SerFormT, ts_fmt: str, context: ContextT) -> Self:
573
+ """Initialise a `StoreEAR` from persistent store EAR data"""
574
+
575
+ def to_dict(self) -> dict[str, Any]:
576
+ """Prepare data for the user-facing `ElementActionRun` object."""
577
+
578
+ def _process_datetime(dt: datetime | None) -> datetime | None:
579
+ """We store datetime objects implicitly in UTC, so we need to first make
580
+ that explicit, and then convert to the local time zone."""
581
+ return normalise_timestamp(dt) if dt else None
582
+
583
+ return {
584
+ "id_": self.id_,
585
+ "is_pending": self.is_pending,
586
+ "elem_iter_ID": self.elem_iter_ID,
587
+ "action_idx": self.action_idx,
588
+ "commands_idx": self.commands_idx,
589
+ "data_idx": self.data_idx,
590
+ "submission_idx": self.submission_idx,
591
+ "commands_file_ID": self.commands_file_ID,
592
+ "success": self.success,
593
+ "skip": self.skip,
594
+ "start_time": _process_datetime(self.start_time),
595
+ "end_time": _process_datetime(self.end_time),
596
+ "snapshot_start": self.snapshot_start,
597
+ "snapshot_end": self.snapshot_end,
598
+ "exit_code": self.exit_code,
599
+ "metadata": self.metadata,
600
+ "run_hostname": self.run_hostname,
601
+ "port_number": self.port_number,
602
+ }
603
+
604
+ @TimeIt.decorator
605
+ def update(
606
+ self,
607
+ submission_idx: int | None = None,
608
+ commands_file_ID: int | None = None,
609
+ skip: int | None = None,
610
+ success: bool | None = None,
611
+ start_time: datetime | None = None,
612
+ end_time: datetime | None = None,
613
+ snapshot_start: dict[str, Any] | None = None,
614
+ snapshot_end: dict[str, Any] | None = None,
615
+ exit_code: int | None = None,
616
+ run_hostname: str | None = None,
617
+ port_number: int | None = None,
618
+ data_idx: DataIndex | None = None,
619
+ ) -> Self:
620
+ """Return a shallow copy, with specified data updated."""
621
+
622
+ sub_idx = submission_idx if submission_idx is not None else self.submission_idx
623
+ skip = skip if skip is not None else self.skip
624
+ success = success if success is not None else self.success
625
+ start_time = start_time if start_time is not None else self.start_time
626
+ end_time = end_time if end_time is not None else self.end_time
627
+ snap_s = snapshot_start if snapshot_start is not None else self.snapshot_start
628
+ snap_e = snapshot_end if snapshot_end is not None else self.snapshot_end
629
+ exit_code = exit_code if exit_code is not None else self.exit_code
630
+ run_hn = run_hostname if run_hostname is not None else self.run_hostname
631
+ port_num = port_number if port_number is not None else self.port_number
632
+ cmd_file = (
633
+ commands_file_ID if commands_file_ID is not None else self.commands_file_ID
634
+ )
635
+ if data_idx is not None:
636
+ new_data_idx = copy.deepcopy(self.data_idx)
637
+ new_data_idx.update(data_idx)
638
+ data_idx = new_data_idx
639
+ else:
640
+ data_idx = self.data_idx
641
+
642
+ return self.__class__(
643
+ id_=self.id_,
644
+ is_pending=self.is_pending,
645
+ elem_iter_ID=self.elem_iter_ID,
646
+ action_idx=self.action_idx,
647
+ commands_idx=self.commands_idx,
648
+ data_idx=data_idx,
649
+ metadata=self.metadata,
650
+ submission_idx=sub_idx,
651
+ commands_file_ID=cmd_file,
652
+ skip=skip,
653
+ success=success,
654
+ start_time=start_time,
655
+ end_time=end_time,
656
+ snapshot_start=snap_s,
657
+ snapshot_end=snap_e,
658
+ exit_code=exit_code,
659
+ run_hostname=run_hn,
660
+ port_number=port_num,
661
+ )
662
+
663
+
664
+ @dataclass
665
+ @hydrate
666
+ class StoreParameter:
667
+ """
668
+ Represents a parameter in a persistent store.
669
+
670
+ Parameters
671
+ ----------
672
+ id_:
673
+ The ID of this parameter.
674
+ is_pending:
675
+ Whether the parameter has changes not yet persisted.
676
+ is_set:
677
+ Whether the parameter is set.
678
+ data:
679
+ Description of the value of the parameter.
680
+ file:
681
+ Description of the file this parameter represents.
682
+ source:
683
+ Description of where this parameter originated.
684
+ """
685
+
686
+ #: The ID of this parameter.
687
+ id_: int
688
+ #: Whether the parameter has changes not yet persisted.
689
+ is_pending: bool
690
+ #: Whether the parameter is set.
691
+ is_set: bool
692
+ #: Description of the value of the parameter.
693
+ data: ParameterTypes
694
+ #: Description of the file this parameter represents.
695
+ file: File | None
696
+ #: Description of where this parameter originated.
697
+ source: ParamSource
698
+
699
+ _encoders: ClassVar[dict[type, Callable]] = {}
700
+ _decoders: ClassVar[dict[str, Callable]] = {}
701
+ _MAX_DEPTH: ClassVar[int] = 50
702
+
703
+ _all_encoders: ClassVar[dict[type, Callable]] = {}
704
+ _all_decoders: ClassVar[dict[str, Callable]] = {}
705
+
706
+ def encode(self, **kwargs) -> dict[str, Any] | int:
707
+ """Prepare store parameter data for the persistent store."""
708
+ if self.is_set:
709
+ if self.file:
710
+ return {"file": self.file}
711
+ else:
712
+ return cast("dict", self._encode(obj=self.data, **kwargs))
713
+ else:
714
+ return PARAM_DATA_NOT_SET
715
+
716
+ @staticmethod
717
+ def __is_ParameterValue(value) -> TypeIs[ParameterValue]:
718
+ # avoid circular import of `ParameterValue` until needed...
719
+ from ..core.parameters import ParameterValue as PV
720
+
721
+ return isinstance(value, PV)
722
+
723
+ def _encode(
724
+ self,
725
+ obj: ParameterTypes,
726
+ path: list[int] | None = None,
727
+ type_lookup: TypeLookup | None = None,
728
+ **kwargs,
729
+ ) -> EncodedStoreParameter:
730
+ """Recursive encoder."""
731
+
732
+ path = path or []
733
+ if type_lookup is None:
734
+ type_lookup = cast("TypeLookup", defaultdict(list))
735
+
736
+ if len(path) > self._MAX_DEPTH:
737
+ raise RuntimeError("I'm in too deep!")
738
+
739
+ if self.__is_ParameterValue(obj):
740
+ encoded = self._encode(
741
+ obj=obj.to_dict(),
742
+ path=path,
743
+ type_lookup=type_lookup,
744
+ **kwargs,
745
+ )
746
+ data, type_lookup = encoded["data"], encoded["type_lookup"]
747
+
748
+ elif isinstance(obj, (list, tuple, set)):
749
+ data = []
750
+ for idx, item in enumerate(obj):
751
+ encoded = self._encode(
752
+ obj=item,
753
+ path=[*path, idx],
754
+ type_lookup=type_lookup,
755
+ **kwargs,
756
+ )
757
+ item, type_lookup = encoded["data"], encoded["type_lookup"]
758
+ assert type_lookup is not None
759
+ data.append(item)
760
+
761
+ if isinstance(obj, tuple):
762
+ type_lookup["tuples"].append(path)
763
+
764
+ elif isinstance(obj, set):
765
+ type_lookup["sets"].append(path)
766
+
767
+ elif isinstance(obj, dict):
768
+ assert type_lookup is not None
769
+ data = {}
770
+ for dct_key, dct_val in obj.items():
771
+ encoded = self._encode(
772
+ obj=dct_val,
773
+ path=[*path, dct_key],
774
+ type_lookup=type_lookup,
775
+ **kwargs,
776
+ )
777
+ dct_val, type_lookup = encoded["data"], encoded["type_lookup"]
778
+ assert type_lookup is not None
779
+ data[dct_key] = dct_val
780
+
781
+ elif isinstance(obj, PRIMITIVES):
782
+ data = obj
783
+
784
+ elif type(obj) in self._all_encoders:
785
+ assert type_lookup is not None
786
+ data = self._all_encoders[type(obj)](
787
+ obj=obj,
788
+ path=path,
789
+ type_lookup=type_lookup,
790
+ root_encoder=self._encode,
791
+ **kwargs,
792
+ )
793
+
794
+ elif isinstance(obj, enum.Enum):
795
+ data = obj.value
796
+
797
+ else:
798
+ raise ValueError(
799
+ f"Parameter data with type {type(obj)} cannot be serialised into a "
800
+ f"{self.__class__.__name__}: {obj}."
801
+ )
802
+
803
+ return {"data": data, "type_lookup": type_lookup}
804
+
805
+ @classmethod
806
+ def decode(
807
+ cls,
808
+ id_: int,
809
+ data: dict[str, Any] | Literal[0] | None,
810
+ source: ParamSource,
811
+ *,
812
+ path: list[str] | None = None,
813
+ **kwargs,
814
+ ) -> Self:
815
+ """Initialise from persistent store parameter data."""
816
+ if data and "file" in data:
817
+ return cls(
818
+ id_=id_,
819
+ data=None,
820
+ file=cast("File", data["file"]),
821
+ is_set=True,
822
+ source=source,
823
+ is_pending=False,
824
+ )
825
+ elif not isinstance(data, dict):
826
+ # parameter is not set
827
+ return cls(
828
+ id_=id_,
829
+ data=None,
830
+ file=None,
831
+ is_set=False,
832
+ source=source,
833
+ is_pending=False,
834
+ )
835
+
836
+ data_ = cast("EncodedStoreParameter", data)
837
+ path = path or []
838
+
839
+ obj = get_in_container(data_["data"], path)
840
+
841
+ for type_, paths in data_["type_lookup"].items():
842
+ for type_path in paths:
843
+ if type_ == "tuples":
844
+ try:
845
+ rel_path = get_relative_path(type_path, path)
846
+ except ValueError:
847
+ continue
848
+ if rel_path:
849
+ set_in_container(
850
+ obj, rel_path, tuple(get_in_container(obj, rel_path))
851
+ )
852
+ else:
853
+ obj = tuple(obj)
854
+ elif type_ == "sets":
855
+ try:
856
+ rel_path = get_relative_path(type_path, path)
857
+ except ValueError:
858
+ continue
859
+ if rel_path:
860
+ set_in_container(
861
+ obj, rel_path, set(get_in_container(obj, rel_path))
862
+ )
863
+ else:
864
+ obj = set(obj)
865
+ elif type_ in cls._all_decoders:
866
+ obj = cls._all_decoders[type_](
867
+ obj=obj,
868
+ type_lookup=data_["type_lookup"],
869
+ path=path,
870
+ **kwargs,
871
+ )
872
+
873
+ return cls(
874
+ id_=id_,
875
+ data=obj,
876
+ file=None,
877
+ is_set=True,
878
+ source=source,
879
+ is_pending=False,
880
+ )
881
+
882
+ def set_data(self, value: Any) -> Self:
883
+ """Return a copy, with data set."""
884
+ if self.is_set:
885
+ raise RuntimeError(f"Parameter ID {self.id_!r} is already set!")
886
+ return self.__class__(
887
+ id_=self.id_,
888
+ is_set=True,
889
+ is_pending=self.is_pending,
890
+ data=value,
891
+ file=None,
892
+ source=self.source,
893
+ )
894
+
895
+ def set_file(self, value: File) -> Self:
896
+ """Return a copy, with file set."""
897
+ if self.is_set:
898
+ raise RuntimeError(f"Parameter ID {self.id_!r} is already set!")
899
+ return self.__class__(
900
+ id_=self.id_,
901
+ is_set=True,
902
+ is_pending=self.is_pending,
903
+ data=None,
904
+ file=value,
905
+ source=self.source,
906
+ )
907
+
908
+ def update_source(self, src: ParamSource) -> Self:
909
+ """Return a copy, with updated source."""
910
+ return self.__class__(
911
+ id_=self.id_,
912
+ is_set=self.is_set,
913
+ is_pending=self.is_pending,
914
+ data=self.data,
915
+ file=self.file,
916
+ source=update_param_source_dict(self.source, src),
917
+ )
918
+
919
+
920
+ class PersistentStore(
921
+ ABC, Generic[AnySTask, AnySElement, AnySElementIter, AnySEAR, AnySParameter]
922
+ ):
923
+ """
924
+ An abstract class representing a persistent workflow store.
925
+
926
+ Parameters
927
+ ----------
928
+ app: App
929
+ The main hpcflow core.
930
+ workflow: ~hpcflow.app.Workflow
931
+ The workflow being persisted.
932
+ path: pathlib.Path
933
+ Where to hold the store.
934
+ fs: fsspec.AbstractFileSystem
935
+ Optionally, information about how to access the store.
936
+ """
937
+
938
+ # These would be in the docstring except they render really wrongly!
939
+ # Type Parameters
940
+ # ---------------
941
+ # AnySTask: StoreTask
942
+ # The type of stored tasks.
943
+ # AnySElement: StoreElement
944
+ # The type of stored elements.
945
+ # AnySElementIter: StoreElementIter
946
+ # The type of stored element iterations.
947
+ # AnySEAR: StoreEAR
948
+ # The type of stored EARs.
949
+ # AnySParameter: StoreParameter
950
+ # The type of stored parameters.
951
+
952
+ _name: ClassVar[str]
953
+
954
+ @classmethod
955
+ @abstractmethod
956
+ def _store_task_cls(cls) -> type[AnySTask]: ...
957
+
958
+ @classmethod
959
+ @abstractmethod
960
+ def _store_elem_cls(cls) -> type[AnySElement]: ...
961
+
962
+ @classmethod
963
+ @abstractmethod
964
+ def _store_iter_cls(cls) -> type[AnySElementIter]: ...
965
+
966
+ @classmethod
967
+ @abstractmethod
968
+ def _store_EAR_cls(cls) -> type[AnySEAR]: ...
969
+
970
+ @classmethod
971
+ @abstractmethod
972
+ def _store_param_cls(cls) -> type[AnySParameter]: ...
973
+
974
+ _resources: dict[str, StoreResource]
975
+ _features: ClassVar[PersistentStoreFeatures]
976
+ _res_map: ClassVar[CommitResourceMap]
977
+
978
+ def __init__(
979
+ self,
980
+ app: BaseApp,
981
+ workflow: Workflow | None,
982
+ path: Path | str,
983
+ fs: AbstractFileSystem | None = None,
984
+ ):
985
+ self._app = app
986
+ self.__workflow = workflow
987
+ self.path = str(path)
988
+ self.fs = fs
989
+
990
+ self._pending: PendingChanges[
991
+ AnySTask, AnySElement, AnySElementIter, AnySEAR, AnySParameter
992
+ ] = PendingChanges(app=app, store=self, resource_map=self._res_map)
993
+
994
+ self._resources_in_use: set[tuple[str, str]] = set()
995
+ self._in_batch_mode = False
996
+
997
+ self._use_cache = False
998
+ self._reset_cache()
999
+
1000
+ self._use_parameters_metadata_cache: bool = False # subclass-specific cache
1001
+
1002
+ def _ensure_all_encoders(self):
1003
+ """Ensure app-defined encoders are included in the StoreParameter's encoders
1004
+ map."""
1005
+ param_cls = self._store_param_cls()
1006
+ if not param_cls._all_encoders:
1007
+ param_cls._all_encoders = {
1008
+ **param_cls._encoders,
1009
+ **self.workflow._app.encoders().get(self._name, {}),
1010
+ }
1011
+
1012
+ def _ensure_all_decoders(self):
1013
+ """Ensure app-defined decoders are included in the StoreParameter's decoders
1014
+ map."""
1015
+ param_cls = self._store_param_cls()
1016
+ if not param_cls._all_decoders:
1017
+ param_cls._all_decoders = {
1018
+ **param_cls._decoders,
1019
+ **self.workflow._app.decoders().get(self._name, {}),
1020
+ }
1021
+
1022
+ @abstractmethod
1023
+ def cached_load(self) -> contextlib.AbstractContextManager[None]:
1024
+ """
1025
+ Perform a load with cache enabled while the ``with``-wrapped code runs.
1026
+ """
1027
+
1028
+ @abstractmethod
1029
+ def get_name(self) -> str:
1030
+ """
1031
+ Get the workflow name.
1032
+ """
1033
+
1034
+ @abstractmethod
1035
+ def get_creation_info(self) -> StoreCreationInfo:
1036
+ """
1037
+ Get the workflow creation data.
1038
+ """
1039
+
1040
+ @abstractmethod
1041
+ def get_ts_fmt(self) -> str:
1042
+ """
1043
+ Get the timestamp format.
1044
+ """
1045
+
1046
+ @abstractmethod
1047
+ def get_ts_name_fmt(self) -> str:
1048
+ """
1049
+ Get the timestamp format for names.
1050
+ """
1051
+
1052
+ @abstractmethod
1053
+ def remove_replaced_dir(self) -> None:
1054
+ """
1055
+ Remove a replaced directory.
1056
+ """
1057
+
1058
+ @abstractmethod
1059
+ def reinstate_replaced_dir(self) -> None:
1060
+ """
1061
+ Reinstate a replaced directory.
1062
+ """
1063
+
1064
+ @abstractmethod
1065
+ def zip(
1066
+ self,
1067
+ path: str = ".",
1068
+ log: str | None = None,
1069
+ overwrite=False,
1070
+ include_execute=False,
1071
+ include_rechunk_backups=False,
1072
+ ) -> str:
1073
+ """
1074
+ Convert this store into archival form.
1075
+ """
1076
+
1077
+ @abstractmethod
1078
+ def unzip(self, path: str = ".", log: str | None = None) -> str:
1079
+ """
1080
+ Convert this store into expanded form.
1081
+ """
1082
+
1083
+ @abstractmethod
1084
+ def rechunk_parameter_base(
1085
+ self,
1086
+ chunk_size: int | None = None,
1087
+ backup: bool = True,
1088
+ status: bool = True,
1089
+ ) -> Any: ...
1090
+
1091
+ @abstractmethod
1092
+ def rechunk_runs(
1093
+ self,
1094
+ chunk_size: int | None = None,
1095
+ backup: bool = True,
1096
+ status: bool = True,
1097
+ ) -> Any: ...
1098
+
1099
+ @abstractmethod
1100
+ def get_dirs_array(self) -> NDArray:
1101
+ """
1102
+ Retrieve the run directories array.
1103
+ """
1104
+
1105
+ @classmethod
1106
+ @abstractmethod
1107
+ def write_empty_workflow(
1108
+ cls,
1109
+ app: BaseApp,
1110
+ *,
1111
+ template_js: TemplateMeta,
1112
+ template_components_js: dict[str, Any],
1113
+ wk_path: str,
1114
+ fs: AbstractFileSystem,
1115
+ name: str,
1116
+ replaced_wk: str | None,
1117
+ creation_info: StoreCreationInfo,
1118
+ ts_fmt: str,
1119
+ ts_name_fmt: str,
1120
+ ) -> None:
1121
+ """
1122
+ Write an empty workflow.
1123
+ """
1124
+
1125
+ @property
1126
+ def workflow(self) -> Workflow:
1127
+ """
1128
+ The workflow this relates to.
1129
+ """
1130
+ assert self.__workflow is not None
1131
+ return self.__workflow
1132
+
1133
+ @property
1134
+ def logger(self) -> Logger:
1135
+ """
1136
+ The logger to use.
1137
+ """
1138
+ return self._app.persistence_logger
1139
+
1140
+ @property
1141
+ def ts_fmt(self) -> str:
1142
+ """
1143
+ The format for timestamps.
1144
+ """
1145
+ return self.workflow.ts_fmt
1146
+
1147
+ @property
1148
+ def has_pending(self) -> bool:
1149
+ """
1150
+ Whether there are any pending changes.
1151
+ """
1152
+ return bool(self._pending)
1153
+
1154
+ @property
1155
+ def is_submittable(self) -> bool:
1156
+ """Does this store support workflow submission?"""
1157
+ return self.fs.__class__.__name__ == "LocalFileSystem"
1158
+
1159
+ @property
1160
+ def use_cache(self) -> bool:
1161
+ """
1162
+ Whether to use a cache.
1163
+ """
1164
+ return self._use_cache
1165
+
1166
+ @property
1167
+ def task_cache(self) -> dict[int, AnySTask]:
1168
+ """Cache for persistent tasks."""
1169
+ return self._cache["tasks"]
1170
+
1171
+ @property
1172
+ def element_cache(self) -> dict[int, AnySElement]:
1173
+ """Cache for persistent elements."""
1174
+ return self._cache["elements"]
1175
+
1176
+ @property
1177
+ def element_iter_cache(self) -> dict[int, AnySElementIter]:
1178
+ """Cache for persistent element iterations."""
1179
+ return self._cache["element_iters"]
1180
+
1181
+ @property
1182
+ def EAR_cache(self) -> dict[int, AnySEAR]:
1183
+ """Cache for persistent EARs."""
1184
+ return self._cache["EARs"]
1185
+
1186
+ @property
1187
+ def num_tasks_cache(self) -> int | None:
1188
+ """Cache for number of persistent tasks."""
1189
+ return self._cache["num_tasks"]
1190
+
1191
+ @num_tasks_cache.setter
1192
+ def num_tasks_cache(self, value: int | None):
1193
+ self._cache["num_tasks"] = value
1194
+
1195
+ @property
1196
+ def num_EARs_cache(self) -> int | None:
1197
+ """Cache for total number of persistent EARs."""
1198
+ return self._cache["num_EARs"]
1199
+
1200
+ @num_EARs_cache.setter
1201
+ def num_EARs_cache(self, value: int | None):
1202
+ self._cache["num_EARs"] = value
1203
+
1204
+ @property
1205
+ def num_params_cache(self) -> int | None:
1206
+ return self._cache["num_params"]
1207
+
1208
+ @num_params_cache.setter
1209
+ def num_params_cache(self, value: int | None):
1210
+ self._cache["num_params"] = value
1211
+
1212
+ @property
1213
+ def param_sources_cache(self) -> dict[int, ParamSource]:
1214
+ """Cache for persistent parameter sources."""
1215
+ return self._cache["param_sources"]
1216
+
1217
+ @property
1218
+ def parameter_cache(self) -> dict[int, AnySParameter]:
1219
+ """Cache for persistent parameters."""
1220
+ return self._cache["parameters"]
1221
+
1222
+ def _reset_cache(self) -> None:
1223
+ self._cache: PersistenceCache[
1224
+ AnySTask, AnySElement, AnySElementIter, AnySEAR, AnySParameter
1225
+ ] = {
1226
+ "tasks": {},
1227
+ "elements": {},
1228
+ "element_iters": {},
1229
+ "EARs": {},
1230
+ "param_sources": {},
1231
+ "num_tasks": None,
1232
+ "parameters": {},
1233
+ "num_EARs": None,
1234
+ "num_params": None,
1235
+ }
1236
+
1237
+ @contextlib.contextmanager
1238
+ def cache_ctx(self) -> Iterator[None]:
1239
+ """Context manager for using the persistent element/iteration/run cache."""
1240
+ if self._use_cache:
1241
+ yield
1242
+ else:
1243
+ self._use_cache = True
1244
+ self._reset_cache()
1245
+ try:
1246
+ yield
1247
+ finally:
1248
+ self._use_cache = False
1249
+ self._reset_cache()
1250
+
1251
+ @contextlib.contextmanager
1252
+ def parameters_metadata_cache(self):
1253
+ """Context manager for using the parameters-metadata cache.
1254
+
1255
+ Notes
1256
+ -----
1257
+ This method can be overridden by a subclass to provide an implementation-specific
1258
+ cache of metadata associated with parameters, or even parameter data itself.
1259
+
1260
+ Using this cache precludes writing/setting parameter data.
1261
+
1262
+ """
1263
+ yield
1264
+
1265
+ @staticmethod
1266
+ def prepare_test_store_from_spec(
1267
+ task_spec: Sequence[
1268
+ Mapping[str, Sequence[Mapping[str, Sequence[Mapping[str, Sequence]]]]]
1269
+ ],
1270
+ ) -> tuple[list[dict], list[dict], list[dict], list[dict]]:
1271
+ """Generate a valid store from a specification in terms of nested
1272
+ elements/iterations/EARs.
1273
+
1274
+ """
1275
+ tasks: list[dict] = []
1276
+ elements: list[dict] = []
1277
+ elem_iters: list[dict] = []
1278
+ EARs: list[dict] = []
1279
+
1280
+ for task_idx, task_i in enumerate(task_spec):
1281
+ elems_i = task_i.get("elements", [])
1282
+ elem_IDs = list(range(len(elements), len(elements) + len(elems_i)))
1283
+
1284
+ for elem_idx, elem_j in enumerate(elems_i):
1285
+ iters_j = elem_j.get("iterations", [])
1286
+ iter_IDs = list(range(len(elem_iters), len(elem_iters) + len(iters_j)))
1287
+
1288
+ for iter_k in iters_j:
1289
+ EARs_k = iter_k.get("EARs", [])
1290
+ EAR_IDs = list(range(len(EARs), len(EARs) + len(EARs_k)))
1291
+ EAR_IDs_dct = {0: EAR_IDs} if EAR_IDs else {}
1292
+
1293
+ for _ in EARs_k:
1294
+ EARs.append(
1295
+ {
1296
+ "id_": len(EARs),
1297
+ "is_pending": False,
1298
+ "elem_iter_ID": len(elem_iters),
1299
+ "action_idx": 0,
1300
+ "data_idx": {},
1301
+ "metadata": {},
1302
+ }
1303
+ )
1304
+
1305
+ elem_iters.append(
1306
+ {
1307
+ "id_": len(elem_iters),
1308
+ "is_pending": False,
1309
+ "element_ID": len(elements),
1310
+ "EAR_IDs": EAR_IDs_dct,
1311
+ "data_idx": {},
1312
+ "schema_parameters": [],
1313
+ }
1314
+ )
1315
+ elements.append(
1316
+ {
1317
+ "id_": len(elements),
1318
+ "is_pending": False,
1319
+ "element_idx": elem_idx,
1320
+ "seq_idx": {},
1321
+ "src_idx": {},
1322
+ "task_ID": task_idx,
1323
+ "iteration_IDs": iter_IDs,
1324
+ }
1325
+ )
1326
+ tasks.append(
1327
+ {
1328
+ "id_": len(tasks),
1329
+ "is_pending": False,
1330
+ "element_IDs": elem_IDs,
1331
+ }
1332
+ )
1333
+ return (tasks, elements, elem_iters, EARs)
1334
+
1335
+ def remove_path(self, path: str | Path) -> None:
1336
+ """Try very hard to delete a directory or file.
1337
+
1338
+ Dropbox (on Windows, at least) seems to try to re-sync files if the parent directory
1339
+ is deleted soon after creation, which is the case on a failed workflow creation (e.g.
1340
+ missing inputs), so in addition to catching PermissionErrors generated when
1341
+ Dropbox has a lock on files, we repeatedly try deleting the directory tree.
1342
+
1343
+ """
1344
+
1345
+ fs = self.fs
1346
+ assert fs is not None
1347
+
1348
+ @self._app.perm_error_retry()
1349
+ def _remove_path(_path: str) -> None:
1350
+ self.logger.debug(f"_remove_path: path={_path}")
1351
+ while fs.exists(_path):
1352
+ fs.rm(_path, recursive=True)
1353
+ time.sleep(0.5)
1354
+
1355
+ return _remove_path(str(path))
1356
+
1357
+ def rename_path(self, replaced: str, original: str | Path) -> None:
1358
+ """Revert the replaced workflow path to its original name.
1359
+
1360
+ This happens when new workflow creation fails and there is an existing workflow
1361
+ with the same name; the original workflow which was renamed, must be reverted."""
1362
+
1363
+ fs = self.fs
1364
+ assert fs is not None
1365
+
1366
+ @self._app.perm_error_retry()
1367
+ def _rename_path(_replaced: str, _original: str) -> None:
1368
+ self.logger.debug(f"_rename_path: {_replaced!r} --> {_original!r}.")
1369
+ try:
1370
+ fs.rename(
1371
+ _replaced, _original, recursive=True
1372
+ ) # TODO: why need recursive?
1373
+ except TypeError:
1374
+ # `SFTPFileSystem.rename` has no `recursive` argument:
1375
+ fs.rename(_replaced, _original)
1376
+
1377
+ return _rename_path(str(replaced), str(original))
1378
+
1379
+ @abstractmethod
1380
+ def _get_num_persistent_tasks(self) -> int: ...
1381
+
1382
+ def _get_num_total_tasks(self) -> int:
1383
+ """Get the total number of persistent and pending tasks."""
1384
+ return self._get_num_persistent_tasks() + len(self._pending.add_tasks)
1385
+
1386
+ @abstractmethod
1387
+ def _get_num_persistent_loops(self) -> int: ...
1388
+
1389
+ def _get_num_total_loops(self) -> int:
1390
+ """Get the total number of persistent and pending loops."""
1391
+ return self._get_num_persistent_loops() + len(self._pending.add_loops)
1392
+
1393
+ @abstractmethod
1394
+ def _get_num_persistent_submissions(self) -> int: ...
1395
+
1396
+ def _get_num_total_submissions(self) -> int:
1397
+ """Get the total number of persistent and pending submissions."""
1398
+ return self._get_num_persistent_submissions() + len(self._pending.add_submissions)
1399
+
1400
+ @abstractmethod
1401
+ def _get_num_persistent_elements(self) -> int: ...
1402
+
1403
+ def _get_num_total_elements(self) -> int:
1404
+ """Get the total number of persistent and pending elements."""
1405
+ return self._get_num_persistent_elements() + len(self._pending.add_elements)
1406
+
1407
+ @abstractmethod
1408
+ def _get_num_persistent_elem_iters(self) -> int: ...
1409
+
1410
+ def _get_num_total_elem_iters(self) -> int:
1411
+ """Get the total number of persistent and pending element iterations."""
1412
+ return self._get_num_persistent_elem_iters() + len(self._pending.add_elem_iters)
1413
+
1414
+ @abstractmethod
1415
+ def _get_num_persistent_EARs(self) -> int: ...
1416
+
1417
+ @TimeIt.decorator
1418
+ def _get_num_total_EARs(self) -> int:
1419
+ """Get the total number of persistent and pending EARs."""
1420
+ return self._get_num_persistent_EARs() + len(self._pending.add_EARs)
1421
+
1422
+ def _get_task_total_num_elements(self, task_ID: int) -> int:
1423
+ """Get the total number of persistent and pending elements of a given task."""
1424
+ return len(self.get_task(task_ID).element_IDs)
1425
+
1426
+ @abstractmethod
1427
+ def _get_num_persistent_parameters(self) -> int: ...
1428
+
1429
+ def _get_num_total_parameters(self) -> int:
1430
+ """Get the total number of persistent and pending parameters."""
1431
+ return self._get_num_persistent_parameters() + len(self._pending.add_parameters)
1432
+
1433
+ def _get_num_total_input_files(self) -> int:
1434
+ """Get the total number of persistent and pending user-supplied input files."""
1435
+ return self._get_num_persistent_input_files() + sum(
1436
+ fd["is_input"] for fd in self._pending.add_files
1437
+ )
1438
+
1439
+ @abstractmethod
1440
+ def _get_num_persistent_added_tasks(self) -> int: ...
1441
+
1442
+ def _get_num_total_added_tasks(self) -> int:
1443
+ """Get the total number of tasks ever added to the workflow."""
1444
+ return self._get_num_persistent_added_tasks() + len(self._pending.add_tasks)
1445
+
1446
+ def _get_num_persistent_input_files(self) -> int:
1447
+ return sum(1 for _ in self.workflow.input_files_path.glob("*"))
1448
+
1449
+ def save(self) -> None:
1450
+ """Commit pending changes to disk, if not in batch-update mode."""
1451
+ if not self.workflow._in_batch_mode:
1452
+ self._pending.commit_all()
1453
+
1454
+ def add_template_components(
1455
+ self, temp_comps: Mapping[str, dict], save: bool = True
1456
+ ) -> None:
1457
+ """
1458
+ Add template components to the workflow.
1459
+ """
1460
+ all_tc = self.get_template_components()
1461
+ for name, dat in temp_comps.items():
1462
+ if name in all_tc:
1463
+ for hash_i, dat_i in dat.items():
1464
+ if hash_i not in all_tc[name]:
1465
+ self._pending.add_template_components[name][hash_i] = dat_i
1466
+ else:
1467
+ self._pending.add_template_components[name] = dat
1468
+
1469
+ if save:
1470
+ self.save()
1471
+
1472
+ def add_task(self, idx: int, task_template: Mapping, save: bool = True):
1473
+ """Add a new task to the workflow."""
1474
+ self.logger.debug("Adding store task.")
1475
+ new_ID = self._get_num_total_added_tasks()
1476
+ self._pending.add_tasks[new_ID] = self._store_task_cls()(
1477
+ id_=new_ID,
1478
+ index=idx,
1479
+ task_template=task_template,
1480
+ is_pending=True,
1481
+ element_IDs=[],
1482
+ )
1483
+ if save:
1484
+ self.save()
1485
+ return new_ID
1486
+
1487
+ def add_loop(
1488
+ self,
1489
+ loop_template: Mapping[str, Any],
1490
+ iterable_parameters: Mapping[str, IterableParam],
1491
+ output_parameters: Mapping[str, int],
1492
+ parents: Sequence[str],
1493
+ num_added_iterations: Mapping[tuple[int, ...], int],
1494
+ iter_IDs: Iterable[int],
1495
+ save: bool = True,
1496
+ ):
1497
+ """Add a new loop to the workflow."""
1498
+ self.logger.debug("Adding store loop.")
1499
+ new_idx = self._get_num_total_loops()
1500
+ added_iters: list[list[list[int] | int]] = [
1501
+ [list(k), v] for k, v in num_added_iterations.items()
1502
+ ]
1503
+ self._pending.add_loops[new_idx] = {
1504
+ "loop_template": dict(loop_template),
1505
+ "iterable_parameters": cast("dict", iterable_parameters),
1506
+ "output_parameters": cast("dict", output_parameters),
1507
+ "parents": list(parents),
1508
+ "num_added_iterations": added_iters,
1509
+ }
1510
+
1511
+ for i in iter_IDs:
1512
+ self._pending.update_loop_indices[i][loop_template["name"]] = 0
1513
+
1514
+ if save:
1515
+ self.save()
1516
+
1517
+ @TimeIt.decorator
1518
+ def add_submission(
1519
+ self, sub_idx: int, sub_js: Mapping[str, JSONed], save: bool = True
1520
+ ):
1521
+ """Add a new submission."""
1522
+ self.logger.debug("Adding store submission.")
1523
+ self._pending.add_submissions[sub_idx] = sub_js
1524
+ if save:
1525
+ self.save()
1526
+
1527
+ def add_element_set(self, task_id: int, es_js: Mapping, save: bool = True):
1528
+ """
1529
+ Add an element set to a task.
1530
+ """
1531
+ self._pending.add_element_sets[task_id].append(es_js)
1532
+ if save:
1533
+ self.save()
1534
+
1535
+ def add_element(
1536
+ self,
1537
+ task_ID: int,
1538
+ es_idx: int,
1539
+ seq_idx: dict[str, int],
1540
+ src_idx: dict[str, int],
1541
+ save: bool = True,
1542
+ ) -> int:
1543
+ """Add a new element to a task."""
1544
+ self.logger.debug("Adding store element.")
1545
+ new_ID = self._get_num_total_elements()
1546
+ new_elem_idx = self._get_task_total_num_elements(task_ID)
1547
+ self._pending.add_elements[new_ID] = self._store_elem_cls()(
1548
+ id_=new_ID,
1549
+ is_pending=True,
1550
+ index=new_elem_idx,
1551
+ es_idx=es_idx,
1552
+ seq_idx=seq_idx,
1553
+ src_idx=src_idx,
1554
+ task_ID=task_ID,
1555
+ iteration_IDs=[],
1556
+ )
1557
+ self._pending.add_elem_IDs[task_ID].append(new_ID)
1558
+ if save:
1559
+ self.save()
1560
+ return new_ID
1561
+
1562
+ def add_element_iteration(
1563
+ self,
1564
+ element_ID: int,
1565
+ data_idx: DataIndex,
1566
+ schema_parameters: list[str],
1567
+ loop_idx: Mapping[str, int] | None = None,
1568
+ save: bool = True,
1569
+ ) -> int:
1570
+ """Add a new iteration to an element."""
1571
+ self.logger.debug("Adding store element-iteration.")
1572
+ new_ID = self._get_num_total_elem_iters()
1573
+ self._pending.add_elem_iters[new_ID] = self._store_iter_cls()(
1574
+ id_=new_ID,
1575
+ element_ID=element_ID,
1576
+ is_pending=True,
1577
+ EARs_initialised=False,
1578
+ EAR_IDs=None,
1579
+ data_idx=data_idx,
1580
+ schema_parameters=schema_parameters,
1581
+ loop_idx=loop_idx or {},
1582
+ )
1583
+ self._pending.add_elem_iter_IDs[element_ID].append(new_ID)
1584
+ if save:
1585
+ self.save()
1586
+ return new_ID
1587
+
1588
+ @TimeIt.decorator
1589
+ def add_EAR(
1590
+ self,
1591
+ elem_iter_ID: int,
1592
+ action_idx: int,
1593
+ commands_idx: list[int],
1594
+ data_idx: DataIndex,
1595
+ metadata: Metadata | None = None,
1596
+ save: bool = True,
1597
+ ) -> int:
1598
+ """Add a new EAR to an element iteration."""
1599
+ self.logger.debug("Adding store EAR.")
1600
+ new_ID = self._get_num_total_EARs()
1601
+ self._pending.add_EARs[new_ID] = self._store_EAR_cls()(
1602
+ id_=new_ID,
1603
+ is_pending=True,
1604
+ elem_iter_ID=elem_iter_ID,
1605
+ action_idx=action_idx,
1606
+ commands_idx=commands_idx,
1607
+ data_idx=data_idx,
1608
+ metadata=metadata or {},
1609
+ )
1610
+ self._pending.add_elem_iter_EAR_IDs[elem_iter_ID][action_idx].append(new_ID)
1611
+ if save:
1612
+ self.save()
1613
+ return new_ID
1614
+
1615
+ @TimeIt.decorator
1616
+ def set_run_dirs(
1617
+ self, run_dir_indices: np.ndarray, run_idx: np.ndarray, save: bool = True
1618
+ ):
1619
+ self.logger.debug(f"Setting {run_idx.size} run directory indices.")
1620
+ self._pending.set_run_dirs.append((run_dir_indices, run_idx))
1621
+ if save:
1622
+ self.save()
1623
+
1624
+ def update_at_submit_metadata(
1625
+ self, sub_idx: int, submission_parts: dict[str, list[int]], save: bool = True
1626
+ ):
1627
+ """
1628
+ Update metadata that is set at submit-time.
1629
+ """
1630
+ if submission_parts:
1631
+ self._pending.update_at_submit_metadata[sub_idx][
1632
+ "submission_parts"
1633
+ ] = submission_parts
1634
+ if save:
1635
+ self.save()
1636
+
1637
+ @TimeIt.decorator
1638
+ def set_run_submission_data(
1639
+ self, EAR_ID: int, cmds_ID: int | None, sub_idx: int, save: bool = True
1640
+ ) -> None:
1641
+ """
1642
+ Set the run submission data, like the submission index for an element action run.
1643
+ """
1644
+ self._pending.set_EAR_submission_data[EAR_ID] = (sub_idx, cmds_ID)
1645
+ if save:
1646
+ self.save()
1647
+
1648
+ def set_EAR_start(
1649
+ self,
1650
+ EAR_ID: int,
1651
+ run_dir: Path | None,
1652
+ port_number: int | None,
1653
+ save: bool = True,
1654
+ ) -> datetime:
1655
+ """
1656
+ Mark an element action run as started.
1657
+ """
1658
+ dt = current_timestamp()
1659
+ ss_js = self._app.RunDirAppFiles.take_snapshot() if run_dir else None
1660
+ run_hostname = socket.gethostname()
1661
+ self._pending.set_EAR_starts[EAR_ID] = (dt, ss_js, run_hostname, port_number)
1662
+ if save:
1663
+ self.save()
1664
+ return dt
1665
+
1666
+ def set_multi_run_starts(
1667
+ self,
1668
+ run_ids: list[int],
1669
+ run_dirs: list[Path | None],
1670
+ port_number: int,
1671
+ save: bool = True,
1672
+ ) -> datetime:
1673
+ dt = current_timestamp()
1674
+ run_hostname = socket.gethostname()
1675
+ run_start_data: dict[int, tuple] = {}
1676
+ for id_i, dir_i in zip(run_ids, run_dirs):
1677
+ ss_js_i = self._app.RunDirAppFiles.take_snapshot(dir_i) if dir_i else None
1678
+ run_start_data[id_i] = (dt, ss_js_i, run_hostname, port_number)
1679
+
1680
+ self._pending.set_EAR_starts.update(run_start_data)
1681
+ if save:
1682
+ self.save()
1683
+ return dt
1684
+
1685
+ def set_EAR_end(
1686
+ self,
1687
+ EAR_ID: int,
1688
+ exit_code: int,
1689
+ success: bool,
1690
+ snapshot: bool,
1691
+ save: bool = True,
1692
+ ) -> datetime:
1693
+ """
1694
+ Mark an element action run as finished.
1695
+ """
1696
+ # TODO: save output files
1697
+ dt = current_timestamp()
1698
+ ss_js = self._app.RunDirAppFiles.take_snapshot() if snapshot else None
1699
+ self._pending.set_EAR_ends[EAR_ID] = (dt, ss_js, exit_code, success)
1700
+ if save:
1701
+ self.save()
1702
+ return dt
1703
+
1704
+ def set_multi_run_ends(
1705
+ self,
1706
+ run_ids: list[int],
1707
+ run_dirs: list[Path | None],
1708
+ exit_codes: list[int],
1709
+ successes: list[bool],
1710
+ save: bool = True,
1711
+ ) -> datetime:
1712
+ self.logger.info("PersistentStore.set_multi_run_ends.")
1713
+ dt = current_timestamp()
1714
+ run_end_data: dict[int, tuple] = {}
1715
+ for id_i, dir_i, ex_i, sc_i in zip(run_ids, run_dirs, exit_codes, successes):
1716
+ ss_js_i = self._app.RunDirAppFiles.take_snapshot(dir_i) if dir_i else None
1717
+ run_end_data[id_i] = (dt, ss_js_i, ex_i, sc_i)
1718
+
1719
+ self._pending.set_EAR_ends.update(run_end_data)
1720
+ if save:
1721
+ self.save()
1722
+ self.logger.info("PersistentStore.set_multi_run_ends finished.")
1723
+ return dt
1724
+
1725
+ def set_EAR_skip(self, skip_reasons: dict[int, int], save: bool = True) -> None:
1726
+ """
1727
+ Mark element action runs as skipped for the specified reasons.
1728
+ """
1729
+ self._pending.set_EAR_skips.update(skip_reasons)
1730
+ if save:
1731
+ self.save()
1732
+
1733
+ def set_EARs_initialised(self, iter_ID: int, save: bool = True) -> None:
1734
+ """
1735
+ Mark an element action run as initialised.
1736
+ """
1737
+ self._pending.set_EARs_initialised.append(iter_ID)
1738
+ if save:
1739
+ self.save()
1740
+
1741
+ def set_jobscript_metadata(
1742
+ self,
1743
+ sub_idx: int,
1744
+ js_idx: int,
1745
+ version_info: VersionInfo | None = None,
1746
+ submit_time: str | None = None,
1747
+ submit_hostname: str | None = None,
1748
+ submit_machine: str | None = None,
1749
+ shell_idx: int | None = None,
1750
+ submit_cmdline: list[str] | None = None,
1751
+ os_name: str | None = None,
1752
+ shell_name: str | None = None,
1753
+ scheduler_name: str | None = None,
1754
+ scheduler_job_ID: str | None = None,
1755
+ process_ID: int | None = None,
1756
+ save: bool = True,
1757
+ ):
1758
+ """
1759
+ Set the metadata for a job script.
1760
+ """
1761
+ entry = self._pending.set_js_metadata[sub_idx][js_idx]
1762
+ if version_info:
1763
+ entry["version_info"] = version_info
1764
+ if submit_time:
1765
+ entry["submit_time"] = submit_time
1766
+ if submit_hostname:
1767
+ entry["submit_hostname"] = submit_hostname
1768
+ if submit_machine:
1769
+ entry["submit_machine"] = submit_machine
1770
+ if shell_idx is not None:
1771
+ entry["shell_idx"] = shell_idx
1772
+ if submit_cmdline:
1773
+ entry["submit_cmdline"] = submit_cmdline
1774
+ if os_name:
1775
+ entry["os_name"] = os_name
1776
+ if shell_name:
1777
+ entry["shell_name"] = shell_name
1778
+ if scheduler_name:
1779
+ entry["scheduler_name"] = scheduler_name
1780
+ if scheduler_job_ID or process_ID:
1781
+ entry["scheduler_job_ID"] = scheduler_job_ID
1782
+ if process_ID or scheduler_job_ID:
1783
+ entry["process_ID"] = process_ID
1784
+ if save:
1785
+ self.save()
1786
+
1787
+ @writes_parameter_data
1788
+ def _add_parameter(
1789
+ self,
1790
+ is_set: bool,
1791
+ source: ParamSource,
1792
+ data: (
1793
+ ParameterValue | list | tuple | set | dict | int | float | str | None | Any
1794
+ ) = None,
1795
+ file: File | None = None,
1796
+ save: bool = True,
1797
+ ) -> int:
1798
+ self.logger.debug(f"Adding store parameter{f' (unset)' if not is_set else ''}.")
1799
+ new_idx = self._get_num_total_parameters()
1800
+ self._pending.add_parameters[new_idx] = self._store_param_cls()(
1801
+ id_=new_idx,
1802
+ is_pending=True,
1803
+ is_set=is_set,
1804
+ data=PARAM_DATA_NOT_SET if not is_set else data,
1805
+ file=file,
1806
+ source=source,
1807
+ )
1808
+ if save:
1809
+ self.save()
1810
+ return new_idx
1811
+
1812
+ def _prepare_set_file(
1813
+ self,
1814
+ store_contents: bool,
1815
+ is_input: bool,
1816
+ path: Path | str,
1817
+ contents: str | None = None,
1818
+ filename: str | None = None,
1819
+ clean_up: bool = False,
1820
+ ) -> File:
1821
+ if filename is None:
1822
+ filename = Path(path).name
1823
+
1824
+ if store_contents:
1825
+ if is_input:
1826
+ new_idx = self._get_num_total_input_files()
1827
+ dst_dir = Path(self.workflow.input_files_path, str(new_idx))
1828
+ dst_path = dst_dir / filename
1829
+ else:
1830
+ # assume path is inside the EAR execution directory; transform that to the
1831
+ # equivalent artifacts directory:
1832
+ exec_sub_path = Path(path).relative_to(self.path)
1833
+ dst_path = Path(
1834
+ self.workflow.task_artifacts_path, *exec_sub_path.parts[1:]
1835
+ )
1836
+ if dst_path.is_file():
1837
+ dst_path = dst_path.with_suffix(dst_path.suffix + "_2") # TODO: better!
1838
+ else:
1839
+ dst_path = Path(path)
1840
+
1841
+ file_param_dat: File = {
1842
+ "store_contents": store_contents,
1843
+ "path": str(dst_path.relative_to(self.path)),
1844
+ }
1845
+ self._pending.add_files.append(
1846
+ {
1847
+ "store_contents": store_contents,
1848
+ "is_input": is_input,
1849
+ "dst_path": str(dst_path),
1850
+ "path": str(path),
1851
+ "contents": contents or "",
1852
+ "clean_up": clean_up,
1853
+ }
1854
+ )
1855
+
1856
+ return file_param_dat
1857
+
1858
+ def set_file(
1859
+ self,
1860
+ store_contents: bool,
1861
+ is_input: bool,
1862
+ param_id: int | None,
1863
+ path: Path | str,
1864
+ contents: str | None = None,
1865
+ filename: str | None = None,
1866
+ clean_up: bool = False,
1867
+ save: bool = True,
1868
+ ):
1869
+ """
1870
+ Set details of a file, including whether it is associated with a parameter.
1871
+ """
1872
+ self.logger.debug("Setting new file")
1873
+ file_param_dat = self._prepare_set_file(
1874
+ store_contents=store_contents,
1875
+ is_input=is_input,
1876
+ path=path,
1877
+ contents=contents,
1878
+ filename=filename,
1879
+ clean_up=clean_up,
1880
+ )
1881
+ if param_id is not None:
1882
+ self.set_parameter_value(
1883
+ param_id, value=file_param_dat, is_file=True, save=save
1884
+ )
1885
+ if save:
1886
+ self.save()
1887
+
1888
+ def add_file(
1889
+ self,
1890
+ store_contents: bool,
1891
+ is_input: bool,
1892
+ source: ParamSource,
1893
+ path: Path | str,
1894
+ contents: str | None = None,
1895
+ filename: str | None = None,
1896
+ save: bool = True,
1897
+ ):
1898
+ """
1899
+ Add a file that will be associated with a parameter.
1900
+ """
1901
+ self.logger.debug("Adding new file")
1902
+ file_param_dat = self._prepare_set_file(
1903
+ store_contents=store_contents,
1904
+ is_input=is_input,
1905
+ path=path,
1906
+ contents=contents,
1907
+ filename=filename,
1908
+ )
1909
+ p_id = self._add_parameter(
1910
+ file=file_param_dat,
1911
+ is_set=True,
1912
+ source=source,
1913
+ save=save,
1914
+ )
1915
+ if save:
1916
+ self.save()
1917
+ return p_id
1918
+
1919
+ def _append_files(self, files: list[FileDescriptor]):
1920
+ """Add new files to the files or artifacts directories."""
1921
+ for dat in files:
1922
+ if dat["store_contents"]:
1923
+ dst_path = Path(dat["dst_path"])
1924
+ dst_path.parent.mkdir(parents=True, exist_ok=True)
1925
+ if dat["path"] is not None:
1926
+ # copy from source path to destination:
1927
+ shutil.copy(dat["path"], dst_path)
1928
+ if dat["clean_up"]:
1929
+ self.logger.info(f"deleting file {dat['path']}")
1930
+ os.remove(dat["path"])
1931
+ else:
1932
+ # write out text file:
1933
+ with dst_path.open("wt") as fp:
1934
+ fp.write(dat["contents"])
1935
+
1936
+ @writes_parameter_data
1937
+ def add_set_parameter(
1938
+ self,
1939
+ data: ParameterValue | list | tuple | set | dict | int | float | str | Any,
1940
+ source: ParamSource,
1941
+ save: bool = True,
1942
+ ) -> int:
1943
+ """
1944
+ Add a parameter that is set to a value.
1945
+ """
1946
+ return self._add_parameter(data=data, is_set=True, source=source, save=save)
1947
+
1948
+ @writes_parameter_data
1949
+ def add_unset_parameter(self, source: ParamSource, save: bool = True) -> int:
1950
+ """
1951
+ Add a parameter that is not set to any value.
1952
+ """
1953
+ return self._add_parameter(data=None, is_set=False, source=source, save=save)
1954
+
1955
+ @abstractmethod
1956
+ def _set_parameter_values(self, set_parameters: dict[int, tuple[Any, bool]]): ...
1957
+
1958
+ @writes_parameter_data
1959
+ def set_parameter_value(
1960
+ self, param_id: int, value: Any, is_file: bool = False, save: bool = True
1961
+ ):
1962
+ """
1963
+ Set the value of a parameter.
1964
+ """
1965
+ self.logger.debug(
1966
+ f"Setting store parameter ID {param_id} value with type: {type(value)!r})."
1967
+ )
1968
+ self._pending.set_parameters[param_id] = (value, is_file)
1969
+ if save:
1970
+ self.save()
1971
+
1972
+ @writes_parameter_data
1973
+ def set_parameter_values(self, values: dict[int, Any], save: bool = True):
1974
+ """Set multiple non-file parameter values by parameter IDs."""
1975
+ param_ids = values.keys()
1976
+ self.logger.debug(f"Setting multiple store parameter IDs {param_ids!r}.")
1977
+ self._pending.set_parameters.update({k: (v, False) for k, v in values.items()})
1978
+ if save:
1979
+ self.save()
1980
+
1981
+ @TimeIt.decorator
1982
+ @writes_parameter_data
1983
+ def update_param_source(
1984
+ self, param_sources: Mapping[int, ParamSource], save: bool = True
1985
+ ) -> None:
1986
+ """
1987
+ Set the source of a parameter.
1988
+ """
1989
+ self.logger.debug(f"Updating parameter sources with {param_sources!r}.")
1990
+ self._pending.update_param_sources.update(param_sources)
1991
+ if save:
1992
+ self.save()
1993
+
1994
+ def update_loop_num_iters(
1995
+ self,
1996
+ index: int,
1997
+ num_added_iters: Mapping[tuple[int, ...], int],
1998
+ save: bool = True,
1999
+ ) -> None:
2000
+ """
2001
+ Add iterations to a loop.
2002
+ """
2003
+ self.logger.debug(
2004
+ f"Updating loop {index!r} num added iterations to {num_added_iters!r}."
2005
+ )
2006
+ self._pending.update_loop_num_iters[index] = [
2007
+ [list(k), v] for k, v in num_added_iters.items()
2008
+ ]
2009
+ if save:
2010
+ self.save()
2011
+
2012
+ def update_loop_parents(
2013
+ self,
2014
+ index: int,
2015
+ num_added_iters: Mapping[tuple[int, ...], int],
2016
+ parents: Sequence[str],
2017
+ save: bool = True,
2018
+ ) -> None:
2019
+ """
2020
+ Set the parents of a loop.
2021
+ """
2022
+ self.logger.debug(
2023
+ f"Updating loop {index!r} parents to {parents!r}, and num added iterations "
2024
+ f"to {num_added_iters}."
2025
+ )
2026
+ self._pending.update_loop_num_iters[index] = [
2027
+ [list(k), v] for k, v in num_added_iters.items()
2028
+ ]
2029
+ self._pending.update_loop_parents[index] = list(parents)
2030
+ if save:
2031
+ self.save()
2032
+
2033
+ def update_iter_data_indices(self, data_indices: dict[int, DataIndex]):
2034
+ """Update data indices of one or more iterations."""
2035
+ for k, v in data_indices.items():
2036
+ self._pending.update_iter_data_idx[k].update(v)
2037
+
2038
+ def update_run_data_indices(self, data_indices: dict[int, DataIndex]):
2039
+ """Update data indices of one or more runs."""
2040
+ for k, v in data_indices.items():
2041
+ self._pending.update_run_data_idx[k].update(v)
2042
+
2043
+ def get_template_components(self) -> dict[str, Any]:
2044
+ """Get all template components, including pending."""
2045
+ tc = copy.deepcopy(self._get_persistent_template_components())
2046
+ for typ in TEMPLATE_COMP_TYPES:
2047
+ for hash_i, dat_i in self._pending.add_template_components.get(
2048
+ typ, {}
2049
+ ).items():
2050
+ tc.setdefault(typ, {})[hash_i] = dat_i
2051
+
2052
+ return tc
2053
+
2054
+ @abstractmethod
2055
+ def _get_persistent_template_components(self) -> dict[str, Any]: ...
2056
+
2057
+ def get_template(self) -> dict[str, JSONed]:
2058
+ """
2059
+ Get the workflow template.
2060
+ """
2061
+ return self._get_persistent_template()
2062
+
2063
+ @abstractmethod
2064
+ def _get_persistent_template(self) -> dict[str, JSONed]: ...
2065
+
2066
+ def _get_task_id_to_idx_map(self) -> dict[int, int]:
2067
+ return {task.id_: task.index for task in self.get_tasks()}
2068
+
2069
+ @TimeIt.decorator
2070
+ def get_task(self, task_idx: int) -> AnySTask:
2071
+ """
2072
+ Get a task.
2073
+ """
2074
+ return self.get_tasks()[task_idx]
2075
+
2076
+ def __process_retrieved_tasks(self, tasks: Iterable[AnySTask]) -> list[AnySTask]:
2077
+ """Add pending data to retrieved tasks."""
2078
+ tasks_new: list[AnySTask] = []
2079
+ for task in tasks:
2080
+ # consider pending element IDs:
2081
+ if pend_elems := self._pending.add_elem_IDs.get(task.id_):
2082
+ task = task.append_element_IDs(pend_elems)
2083
+ tasks_new.append(task)
2084
+ return tasks_new
2085
+
2086
+ def __process_retrieved_loops(
2087
+ self, loops: Iterable[tuple[int, LoopDescriptor]]
2088
+ ) -> dict[int, LoopDescriptor]:
2089
+ """Add pending data to retrieved loops."""
2090
+ loops_new: dict[int, LoopDescriptor] = {}
2091
+ for id_, loop_i in loops:
2092
+ if "num_added_iterations" not in loop_i:
2093
+ loop_i["num_added_iterations"] = 1
2094
+ # consider pending changes to num added iterations:
2095
+ if pend_num_iters := self._pending.update_loop_num_iters.get(id_):
2096
+ loop_i["num_added_iterations"] = pend_num_iters
2097
+ # consider pending change to parents:
2098
+ if pend_parents := self._pending.update_loop_parents.get(id_):
2099
+ loop_i["parents"] = pend_parents
2100
+
2101
+ loops_new[id_] = loop_i
2102
+ return loops_new
2103
+
2104
+ @staticmethod
2105
+ def __split_pending(
2106
+ ids: Iterable[int], all_pending: Mapping[int, Any]
2107
+ ) -> tuple[tuple[int, ...], set[int], set[int]]:
2108
+ id_all = tuple(ids)
2109
+ id_set = set(id_all)
2110
+ id_pers = id_set.difference(all_pending)
2111
+ id_pend = id_set.intersection(all_pending)
2112
+ return id_all, id_pers, id_pend
2113
+
2114
+ @abstractmethod
2115
+ def _get_persistent_tasks(self, id_lst: Iterable[int]) -> dict[int, AnySTask]: ...
2116
+
2117
+ def get_tasks_by_IDs(self, ids: Iterable[int]) -> Sequence[AnySTask]:
2118
+ """
2119
+ Get tasks with the given IDs.
2120
+ """
2121
+ # separate pending and persistent IDs:
2122
+
2123
+ ids, id_pers, id_pend = self.__split_pending(ids, self._pending.add_tasks)
2124
+ tasks = self._get_persistent_tasks(id_pers) if id_pers else {}
2125
+ tasks.update((id_, self._pending.add_tasks[id_]) for id_ in id_pend)
2126
+
2127
+ # order as requested:
2128
+ return self.__process_retrieved_tasks(tasks[id_] for id_ in ids)
2129
+
2130
+ @TimeIt.decorator
2131
+ def get_tasks(self) -> list[AnySTask]:
2132
+ """Retrieve all tasks, including pending."""
2133
+ tasks = self._get_persistent_tasks(range(self._get_num_persistent_tasks()))
2134
+ tasks.update(self._pending.add_tasks)
2135
+
2136
+ # order by index:
2137
+ return self.__process_retrieved_tasks(
2138
+ sorted(tasks.values(), key=lambda x: x.index)
2139
+ )
2140
+
2141
+ @abstractmethod
2142
+ def _get_persistent_loops(
2143
+ self, id_lst: Iterable[int] | None = None
2144
+ ) -> dict[int, LoopDescriptor]: ...
2145
+
2146
+ def get_loops_by_IDs(self, ids: Iterable[int]) -> dict[int, LoopDescriptor]:
2147
+ """Retrieve loops by index (ID), including pending."""
2148
+
2149
+ # separate pending and persistent IDs:
2150
+ ids, id_pers, id_pend = self.__split_pending(ids, self._pending.add_loops)
2151
+
2152
+ loops = self._get_persistent_loops(id_pers) if id_pers else {}
2153
+ loops.update((id_, self._pending.add_loops[id_]) for id_ in id_pend)
2154
+
2155
+ # order as requested:
2156
+ return self.__process_retrieved_loops((id_, loops[id_]) for id_ in ids)
2157
+
2158
+ def get_loops(self) -> dict[int, LoopDescriptor]:
2159
+ """Retrieve all loops, including pending."""
2160
+
2161
+ loops = self._get_persistent_loops()
2162
+ loops.update(self._pending.add_loops)
2163
+
2164
+ # order by index/ID:
2165
+ return self.__process_retrieved_loops(sorted(loops.items()))
2166
+
2167
+ @abstractmethod
2168
+ def _get_persistent_submissions(
2169
+ self, id_lst: Iterable[int] | None = None
2170
+ ) -> dict[int, Mapping[str, JSONed]]: ...
2171
+
2172
+ @TimeIt.decorator
2173
+ def get_submissions(self) -> dict[int, Mapping[str, JSONed]]:
2174
+ """Retrieve all submissions, including pending."""
2175
+
2176
+ subs = self._get_persistent_submissions()
2177
+ subs.update(self._pending.add_submissions)
2178
+
2179
+ # order by index/ID
2180
+ return dict(sorted(subs.items()))
2181
+
2182
+ @TimeIt.decorator
2183
+ def get_submission_at_submit_metadata(
2184
+ self, sub_idx: int, metadata_attr: dict[str, Any] | None
2185
+ ) -> dict[str, Any]:
2186
+ """Retrieve the values of submission attributes that are stored at submit-time.
2187
+
2188
+ Notes
2189
+ -----
2190
+ This method may need to be overridden if these attributes are stored separately
2191
+ from the remainder of the submission attributes.
2192
+
2193
+ """
2194
+ return metadata_attr or {i: None for i in SUBMISSION_SUBMIT_TIME_KEYS}
2195
+
2196
+ @TimeIt.decorator
2197
+ def get_jobscript_at_submit_metadata(
2198
+ self,
2199
+ sub_idx: int,
2200
+ js_idx: int,
2201
+ metadata_attr: dict[str, Any] | None,
2202
+ ) -> dict[str, Any]:
2203
+ """For the specified jobscript, retrieve the values of jobscript-submit-time
2204
+ attributes.
2205
+
2206
+ Notes
2207
+ -----
2208
+ This method may need to be overridden if these jobscript-submit-time attributes
2209
+ are stored separately from the remainder of the jobscript attributes.
2210
+
2211
+ """
2212
+ return metadata_attr or {i: None for i in JOBSCRIPT_SUBMIT_TIME_KEYS}
2213
+
2214
+ @TimeIt.decorator
2215
+ def get_jobscript_block_run_ID_array(
2216
+ self, sub_idx: int, js_idx: int, blk_idx: int, run_ID_arr: NDArray | None
2217
+ ) -> NDArray:
2218
+ """For the specified jobscript-block, retrieve the run ID array.
2219
+
2220
+ Notes
2221
+ -----
2222
+ This method may need to be overridden if these attributes are stored separately
2223
+ from the remainder of the submission attributes.
2224
+
2225
+ """
2226
+ assert run_ID_arr is not None
2227
+ return np.asarray(run_ID_arr)
2228
+
2229
+ @TimeIt.decorator
2230
+ def get_jobscript_block_task_elements_map(
2231
+ self,
2232
+ sub_idx: int,
2233
+ js_idx: int,
2234
+ blk_idx: int,
2235
+ task_elems_map: dict[int, list[int]] | None,
2236
+ ) -> dict[int, list[int]]:
2237
+ """For the specified jobscript-block, retrieve the task-elements mapping.
2238
+
2239
+ Notes
2240
+ -----
2241
+ This method may need to be overridden if these attributes are stored separately
2242
+ from the remainder of the submission attributes.
2243
+
2244
+ """
2245
+ assert task_elems_map is not None
2246
+ return task_elems_map
2247
+
2248
+ @TimeIt.decorator
2249
+ def get_jobscript_block_task_actions_array(
2250
+ self,
2251
+ sub_idx: int,
2252
+ js_idx: int,
2253
+ blk_idx: int,
2254
+ task_actions_arr: NDArray | list[tuple[int, int, int]] | None,
2255
+ ) -> NDArray:
2256
+ """For the specified jobscript-block, retrieve the task-actions array.
2257
+
2258
+ Notes
2259
+ -----
2260
+ This method may need to be overridden if these attributes are stored separately
2261
+ from the remainder of the submission attributes.
2262
+
2263
+ """
2264
+ assert task_actions_arr is not None
2265
+ return np.asarray(task_actions_arr)
2266
+
2267
+ @TimeIt.decorator
2268
+ def get_jobscript_block_dependencies(
2269
+ self,
2270
+ sub_idx: int,
2271
+ js_idx: int,
2272
+ blk_idx: int,
2273
+ js_dependencies: dict[tuple[int, int], ResolvedJobscriptBlockDependencies] | None,
2274
+ ) -> dict[tuple[int, int], ResolvedJobscriptBlockDependencies]:
2275
+ """For the specified jobscript-block, retrieve the dependencies.
2276
+
2277
+ Notes
2278
+ -----
2279
+ This method may need to be overridden if these attributes are stored separately
2280
+ from the remainder of the submission attributes.
2281
+
2282
+ """
2283
+ assert js_dependencies is not None
2284
+ return js_dependencies
2285
+
2286
+ @TimeIt.decorator
2287
+ def get_submissions_by_ID(
2288
+ self, ids: Iterable[int]
2289
+ ) -> dict[int, Mapping[str, JSONed]]:
2290
+ """
2291
+ Get submissions with the given IDs.
2292
+ """
2293
+ # separate pending and persistent IDs:
2294
+ _, id_pers, id_pend = self.__split_pending(ids, self._pending.add_submissions)
2295
+ subs = self._get_persistent_submissions(id_pers) if id_pers else {}
2296
+ subs.update((id_, self._pending.add_submissions[id_]) for id_ in id_pend)
2297
+
2298
+ # order by index/ID
2299
+ return dict(sorted(subs.items()))
2300
+
2301
+ @abstractmethod
2302
+ def _get_persistent_elements(
2303
+ self, id_lst: Iterable[int]
2304
+ ) -> dict[int, AnySElement]: ...
2305
+
2306
+ @TimeIt.decorator
2307
+ def get_elements(self, ids: Iterable[int]) -> Sequence[AnySElement]:
2308
+ """
2309
+ Get elements with the given IDs.
2310
+ """
2311
+ # separate pending and persistent IDs:
2312
+ ids, id_pers, id_pend = self.__split_pending(ids, self._pending.add_elements)
2313
+ self.logger.debug(
2314
+ f"PersistentStore.get_elements: {len(ids)} elements: "
2315
+ f"{shorten_list_str(ids)}."
2316
+ )
2317
+ elems = self._get_persistent_elements(id_pers) if id_pers else {}
2318
+ elems.update((id_, self._pending.add_elements[id_]) for id_ in id_pend)
2319
+
2320
+ elems_new: list[AnySElement] = []
2321
+ # order as requested:
2322
+ for elem_i in (elems[id_] for id_ in ids):
2323
+ # consider pending iteration IDs:
2324
+ # TODO: does this consider pending iterations from new loop iterations?
2325
+ if pend_iters := self._pending.add_elem_iter_IDs.get(elem_i.id_):
2326
+ elem_i = elem_i.append_iteration_IDs(pend_iters)
2327
+ elems_new.append(elem_i)
2328
+
2329
+ return elems_new
2330
+
2331
+ @abstractmethod
2332
+ def _get_persistent_element_iters(
2333
+ self, id_lst: Iterable[int]
2334
+ ) -> dict[int, AnySElementIter]: ...
2335
+
2336
+ @TimeIt.decorator
2337
+ def get_element_iterations(self, ids: Iterable[int]) -> Sequence[AnySElementIter]:
2338
+ """
2339
+ Get element iterations with the given IDs.
2340
+ """
2341
+ # separate pending and persistent IDs:
2342
+ ids, id_pers, id_pend = self.__split_pending(ids, self._pending.add_elem_iters)
2343
+ self.logger.debug(
2344
+ f"PersistentStore.get_element_iterations: {len(ids)} iterations: "
2345
+ f"{shorten_list_str(ids)}."
2346
+ )
2347
+ iters = self._get_persistent_element_iters(id_pers) if id_pers else {}
2348
+ iters.update((id_, self._pending.add_elem_iters[id_]) for id_ in id_pend)
2349
+
2350
+ iters_new: list[AnySElementIter] = []
2351
+ # order as requested:
2352
+ for iter_i in (iters[id_] for id_ in ids):
2353
+ # consider pending EAR IDs:
2354
+ if pend_EARs := self._pending.add_elem_iter_EAR_IDs.get(iter_i.id_):
2355
+ iter_i = iter_i.append_EAR_IDs(pend_EARs)
2356
+
2357
+ # consider pending loop idx
2358
+ if pend_loop_idx := self._pending.update_loop_indices.get(iter_i.id_):
2359
+ iter_i = iter_i.update_loop_idx(pend_loop_idx)
2360
+
2361
+ # consider pending `EARs_initialised`:
2362
+ if iter_i.id_ in self._pending.set_EARs_initialised:
2363
+ iter_i = iter_i.set_EARs_initialised()
2364
+
2365
+ iters_new.append(iter_i)
2366
+
2367
+ return iters_new
2368
+
2369
+ @abstractmethod
2370
+ def _get_persistent_EARs(self, id_lst: Iterable[int]) -> dict[int, AnySEAR]: ...
2371
+
2372
+ @TimeIt.decorator
2373
+ def get_EARs(self, ids: Iterable[int]) -> Sequence[AnySEAR]:
2374
+ """
2375
+ Get element action runs with the given IDs.
2376
+ """
2377
+ # separate pending and persistent IDs:
2378
+ ids, id_pers, id_pend = self.__split_pending(ids, self._pending.add_EARs)
2379
+ self.logger.debug(
2380
+ f"PersistentStore.get_EARs: {len(ids)} EARs: {shorten_list_str(ids)}."
2381
+ )
2382
+ EARs = self._get_persistent_EARs(id_pers) if id_pers else {}
2383
+ EARs.update((id_, self._pending.add_EARs[id_]) for id_ in id_pend)
2384
+
2385
+ EARs_new: list[AnySEAR] = []
2386
+ # order as requested:
2387
+ for EAR_i in (EARs[id_] for id_ in ids):
2388
+ # consider updates:
2389
+ updates: dict[str, Any] = {}
2390
+ if EAR_i.id_ in self._pending.set_EAR_skips:
2391
+ updates["skip"] = True
2392
+ (
2393
+ updates["submission_idx"],
2394
+ updates["commands_file_ID"],
2395
+ ) = self._pending.set_EAR_submission_data.get(EAR_i.id_, (None, None))
2396
+ (
2397
+ updates["start_time"],
2398
+ updates["snapshot_start"],
2399
+ updates["run_hostname"],
2400
+ updates["port_number"],
2401
+ ) = self._pending.set_EAR_starts.get(EAR_i.id_, (None, None, None, None))
2402
+ (
2403
+ updates["end_time"],
2404
+ updates["snapshot_end"],
2405
+ updates["exit_code"],
2406
+ updates["success"],
2407
+ ) = self._pending.set_EAR_ends.get(EAR_i.id_, (None, None, None, None))
2408
+ if any(i is not None for i in updates.values()):
2409
+ EAR_i = EAR_i.update(**updates)
2410
+
2411
+ EARs_new.append(EAR_i)
2412
+
2413
+ return EARs_new
2414
+
2415
+ @TimeIt.decorator
2416
+ def __get_cached_persistent_items(
2417
+ self, id_lst: Iterable[int], cache: dict[int, T]
2418
+ ) -> tuple[dict[int, T], list[int]]:
2419
+ """How to get things out of the cache. Caller says which cache."""
2420
+ if self.use_cache:
2421
+ id_cached = set(id_lst)
2422
+ id_non_cached = sorted(id_cached.difference(cache))
2423
+ id_cached.intersection_update(cache)
2424
+ items = {id_: cache[id_] for id_ in sorted(id_cached)}
2425
+ else:
2426
+ items = {}
2427
+ id_non_cached = list(id_lst)
2428
+ return items, id_non_cached
2429
+
2430
+ def _get_cached_persistent_EARs(
2431
+ self, id_lst: Iterable[int]
2432
+ ) -> tuple[dict[int, AnySEAR], list[int]]:
2433
+ return self.__get_cached_persistent_items(id_lst, self.EAR_cache)
2434
+
2435
+ def _get_cached_persistent_element_iters(
2436
+ self, id_lst: Iterable[int]
2437
+ ) -> tuple[dict[int, AnySElementIter], list[int]]:
2438
+ return self.__get_cached_persistent_items(id_lst, self.element_iter_cache)
2439
+
2440
+ def _get_cached_persistent_elements(
2441
+ self, id_lst: Iterable[int]
2442
+ ) -> tuple[dict[int, AnySElement], list[int]]:
2443
+ return self.__get_cached_persistent_items(id_lst, self.element_cache)
2444
+
2445
+ def _get_cached_persistent_tasks(
2446
+ self, id_lst: Iterable[int]
2447
+ ) -> tuple[dict[int, AnySTask], list[int]]:
2448
+ return self.__get_cached_persistent_items(id_lst, self.task_cache)
2449
+
2450
+ def _get_cached_persistent_param_sources(
2451
+ self, id_lst: Iterable[int]
2452
+ ) -> tuple[dict[int, ParamSource], list[int]]:
2453
+ return self.__get_cached_persistent_items(id_lst, self.param_sources_cache)
2454
+
2455
+ def _get_cached_persistent_parameters(
2456
+ self, id_lst: Iterable[int]
2457
+ ) -> tuple[dict[int, AnySParameter], list[int]]:
2458
+ return self.__get_cached_persistent_items(id_lst, self.parameter_cache)
2459
+
2460
+ def get_EAR_skipped(self, EAR_ID: int) -> int:
2461
+ """
2462
+ Whether the element action run with the given ID was skipped.
2463
+ """
2464
+ self.logger.debug(f"PersistentStore.get_EAR_skipped: EAR_ID={EAR_ID!r}")
2465
+ return self.get_EARs((EAR_ID,))[0].skip
2466
+
2467
+ @TimeIt.decorator
2468
+ def get_parameters(self, ids: Iterable[int], **kwargs) -> list[AnySParameter]:
2469
+ """
2470
+ Get parameters with the given IDs.
2471
+
2472
+ Parameters
2473
+ ----------
2474
+ ids:
2475
+ The IDs of the parameters to get.
2476
+
2477
+ Keyword Arguments
2478
+ -----------------
2479
+ dataset_copy: bool
2480
+ For Zarr stores only. If True, copy arrays as NumPy arrays.
2481
+ """
2482
+ # separate pending and persistent IDs:
2483
+ ids, id_pers, id_pend = self.__split_pending(ids, self._pending.add_parameters)
2484
+ params = (
2485
+ dict(self._get_persistent_parameters(id_pers, **kwargs)) if id_pers else {}
2486
+ )
2487
+ params.update((id_, self._pending.add_parameters[id_]) for id_ in id_pend)
2488
+
2489
+ # order as requested:
2490
+ return [params[id_] for id_ in ids]
2491
+
2492
+ @abstractmethod
2493
+ def _get_persistent_parameters(
2494
+ self, id_lst: Iterable[int], **kwargs
2495
+ ) -> Mapping[int, AnySParameter]: ...
2496
+
2497
+ @TimeIt.decorator
2498
+ def get_parameter_set_statuses(self, ids: Iterable[int]) -> list[bool]:
2499
+ """
2500
+ Get whether the parameters with the given IDs are set.
2501
+ """
2502
+ # separate pending and persistent IDs:
2503
+ ids, id_pers, id_pend = self.__split_pending(ids, self._pending.add_parameters)
2504
+ set_status = self._get_persistent_parameter_set_status(id_pers) if id_pers else {}
2505
+ set_status.update(
2506
+ (id_, self._pending.add_parameters[id_].is_set) for id_ in id_pend
2507
+ )
2508
+
2509
+ # order as requested:
2510
+ return [set_status[id_] for id_ in ids]
2511
+
2512
+ @abstractmethod
2513
+ def _get_persistent_parameter_set_status(
2514
+ self, id_lst: Iterable[int]
2515
+ ) -> dict[int, bool]: ...
2516
+
2517
+ @TimeIt.decorator
2518
+ def get_parameter_sources(self, ids: Iterable[int]) -> list[ParamSource]:
2519
+ """
2520
+ Get the sources of the parameters with the given IDs.
2521
+ """
2522
+ # separate pending and persistent IDs:
2523
+ ids, id_pers, id_pend = self.__split_pending(ids, self._pending.add_parameters)
2524
+ src = self._get_persistent_param_sources(id_pers) if id_pers else {}
2525
+ src.update((id_, self._pending.add_parameters[id_].source) for id_ in id_pend)
2526
+
2527
+ # order as requested, and consider pending source updates:
2528
+ return [
2529
+ self.__merge_param_source(
2530
+ src[id_i], self._pending.update_param_sources.get(id_i)
2531
+ )
2532
+ for id_i in ids
2533
+ ]
2534
+
2535
+ @staticmethod
2536
+ def __merge_param_source(
2537
+ src_i: ParamSource, pend_src: ParamSource | None
2538
+ ) -> ParamSource:
2539
+ """
2540
+ Helper to merge a second dict in if it is provided.
2541
+ """
2542
+ return {**src_i, **pend_src} if pend_src else src_i
2543
+
2544
+ @abstractmethod
2545
+ def _get_persistent_param_sources(
2546
+ self, id_lst: Iterable[int]
2547
+ ) -> dict[int, ParamSource]: ...
2548
+
2549
+ @TimeIt.decorator
2550
+ def get_task_elements(
2551
+ self,
2552
+ task_id: int,
2553
+ idx_lst: Iterable[int] | None = None,
2554
+ ) -> Iterator[Mapping[str, Any]]:
2555
+ """
2556
+ Get element data by an indices within a given task.
2557
+
2558
+ Element iterations and EARs belonging to the elements are included.
2559
+ """
2560
+
2561
+ all_elem_IDs = self.get_task(task_id).element_IDs
2562
+ store_elements = self.get_elements(
2563
+ all_elem_IDs if idx_lst is None else (all_elem_IDs[idx] for idx in idx_lst)
2564
+ )
2565
+ iter_IDs_flat, iter_IDs_lens = flatten(
2566
+ [el.iteration_IDs for el in store_elements]
2567
+ )
2568
+ store_iters = self.get_element_iterations(iter_IDs_flat)
2569
+
2570
+ # retrieve EARs:
2571
+ EARs_dcts = remap(
2572
+ [list((elit.EAR_IDs or {}).values()) for elit in store_iters],
2573
+ lambda ears: [ear.to_dict() for ear in self.get_EARs(ears)],
2574
+ )
2575
+
2576
+ # add EARs to iterations:
2577
+ iters: list[dict[str, Any]] = []
2578
+ for idx, i in enumerate(store_iters):
2579
+ EARs: dict[int, dict[str, Any]] | None = None
2580
+ if i.EAR_IDs is not None:
2581
+ EARs = dict(zip(i.EAR_IDs, cast("Any", EARs_dcts[idx])))
2582
+ iters.append(i.to_dict(EARs))
2583
+
2584
+ # reshape iterations:
2585
+ iters_rs = reshape(iters, iter_IDs_lens)
2586
+
2587
+ # add iterations to elements:
2588
+ for idx, element in enumerate(store_elements):
2589
+ yield element.to_dict(iters_rs[idx])
2590
+
2591
+ @abstractmethod
2592
+ def _get_persistent_parameter_IDs(self) -> Iterable[int]: ...
2593
+
2594
+ def check_parameters_exist(self, ids: Sequence[int]) -> Iterator[bool]:
2595
+ """
2596
+ For each parameter ID, return True if it exists, else False.
2597
+ """
2598
+ id_miss = set()
2599
+ if id_not_pend := set(ids).difference(self._pending.add_parameters):
2600
+ id_miss = id_not_pend.difference(self._get_persistent_parameter_IDs())
2601
+ return (id_ not in id_miss for id_ in ids)
2602
+
2603
+ @abstractmethod
2604
+ def _append_tasks(self, tasks: Iterable[AnySTask]) -> None: ...
2605
+
2606
+ @abstractmethod
2607
+ def _append_loops(self, loops: dict[int, LoopDescriptor]) -> None: ...
2608
+
2609
+ @abstractmethod
2610
+ def _append_submissions(self, subs: dict[int, Mapping[str, JSONed]]) -> None: ...
2611
+
2612
+ @abstractmethod
2613
+ def _update_at_submit_metadata(
2614
+ self, at_submit_metadata: dict[int, dict[str, Any]]
2615
+ ) -> None: ...
2616
+
2617
+ @abstractmethod
2618
+ def _append_elements(self, elems: Sequence[AnySElement]) -> None: ...
2619
+
2620
+ @abstractmethod
2621
+ def _append_element_sets(self, task_id: int, es_js: Sequence[Mapping]) -> None: ...
2622
+
2623
+ @abstractmethod
2624
+ def _append_elem_iter_IDs(self, elem_ID: int, iter_IDs: Iterable[int]) -> None: ...
2625
+
2626
+ @abstractmethod
2627
+ def _append_elem_iters(self, iters: Sequence[AnySElementIter]) -> None: ...
2628
+
2629
+ @abstractmethod
2630
+ def _append_elem_iter_EAR_IDs(
2631
+ self, iter_ID: int, act_idx: int, EAR_IDs: Sequence[int]
2632
+ ) -> None: ...
2633
+
2634
+ @abstractmethod
2635
+ def _append_EARs(self, EARs: Sequence[AnySEAR]) -> None: ...
2636
+
2637
+ @abstractmethod
2638
+ def _update_elem_iter_EARs_initialised(self, iter_ID: int) -> None: ...
2639
+
2640
+ @abstractmethod
2641
+ def _update_EAR_submission_data(
2642
+ self, sub_data: Mapping[int, tuple[int, int | None]]
2643
+ ): ...
2644
+
2645
+ @abstractmethod
2646
+ def _update_EAR_start(
2647
+ self,
2648
+ run_starts: dict[int, tuple[datetime, dict[str, Any] | None, str, int | None]],
2649
+ ) -> None: ...
2650
+
2651
+ @abstractmethod
2652
+ def _update_EAR_end(
2653
+ self, run_ends: dict[int, tuple[datetime, dict[str, Any] | None, int, bool]]
2654
+ ) -> None: ...
2655
+
2656
+ @abstractmethod
2657
+ def _update_EAR_skip(self, skips: dict[int, int]) -> None: ...
2658
+
2659
+ @abstractmethod
2660
+ def _update_js_metadata(
2661
+ self, js_meta: dict[int, dict[int, dict[str, Any]]]
2662
+ ) -> None: ...
2663
+
2664
+ @abstractmethod
2665
+ def _append_parameters(self, params: Sequence[AnySParameter]) -> None: ...
2666
+
2667
+ @abstractmethod
2668
+ def _update_template_components(self, tc: dict[str, Any]) -> None: ...
2669
+
2670
+ @abstractmethod
2671
+ def _update_parameter_sources(self, sources: Mapping[int, ParamSource]) -> None: ...
2672
+
2673
+ @abstractmethod
2674
+ def _update_loop_index(self, loop_indices: dict[int, dict[str, int]]) -> None: ...
2675
+
2676
+ @abstractmethod
2677
+ def _update_loop_num_iters(
2678
+ self, index: int, num_iters: list[list[list[int] | int]]
2679
+ ) -> None: ...
2680
+
2681
+ @abstractmethod
2682
+ def _update_loop_parents(self, index: int, parents: list[str]) -> None: ...
2683
+
2684
+ @overload
2685
+ def using_resource(
2686
+ self, res_label: Literal["metadata"], action: str
2687
+ ) -> AbstractContextManager[Metadata]: ...
2688
+
2689
+ @overload
2690
+ def using_resource(
2691
+ self, res_label: Literal["submissions"], action: str
2692
+ ) -> AbstractContextManager[list[dict[str, JSONed]]]: ...
2693
+
2694
+ @overload
2695
+ def using_resource(
2696
+ self, res_label: Literal["parameters"], action: str
2697
+ ) -> AbstractContextManager[dict[str, dict[str, Any]]]: ...
2698
+
2699
+ @overload
2700
+ def using_resource(
2701
+ self, res_label: Literal["runs"], action: str
2702
+ ) -> AbstractContextManager[dict[str, Any]]: ...
2703
+
2704
+ @overload
2705
+ def using_resource(
2706
+ self, res_label: Literal["attrs"], action: str
2707
+ ) -> AbstractContextManager[ZarrAttrsDict]: ...
2708
+
2709
+ @contextlib.contextmanager
2710
+ def using_resource(
2711
+ self,
2712
+ res_label: Literal["metadata", "submissions", "parameters", "attrs", "runs"],
2713
+ action: str,
2714
+ ) -> Iterator[Any]:
2715
+ """Context manager for managing `StoreResource` objects associated with the store."""
2716
+
2717
+ try:
2718
+ res = self._resources[res_label]
2719
+ except KeyError:
2720
+ raise RuntimeError(
2721
+ f"{self.__class__.__name__!r} has no resource named {res_label!r}."
2722
+ ) from None
2723
+
2724
+ key = (res_label, action)
2725
+ if key in self._resources_in_use:
2726
+ # retrieve existing data for this action:
2727
+ yield res.data[action]
2728
+
2729
+ else:
2730
+ try:
2731
+ # "open" the resource, which assigns data for this action, which we yield:
2732
+ res.open(action)
2733
+ self._resources_in_use.add(key)
2734
+ yield res.data[action]
2735
+
2736
+ except Exception as exc:
2737
+ self._resources_in_use.remove(key)
2738
+ raise exc
2739
+
2740
+ else:
2741
+ # "close" the resource, clearing cached data for this action:
2742
+ res.close(action)
2743
+ self._resources_in_use.remove(key)
2744
+
2745
+ def copy(self, path: PathLike = None) -> Path:
2746
+ """Copy the workflow store.
2747
+
2748
+ This does not work on remote filesystems.
2749
+
2750
+ """
2751
+ assert self.fs is not None
2752
+ if path is None:
2753
+ _path = Path(self.path)
2754
+ path = _path.parent / Path(_path.stem + "_copy" + _path.suffix)
2755
+
2756
+ if self.fs.exists(str(path)):
2757
+ raise ValueError(f"Path already exists: {path}.")
2758
+ else:
2759
+ path = str(path)
2760
+
2761
+ self.fs.copy(self.path, path)
2762
+
2763
+ return Path(self.workflow._store.path).replace(path)
2764
+
2765
+ def delete(self) -> None:
2766
+ """Delete the persistent workflow."""
2767
+ confirm = input(
2768
+ f"Permanently delete the workflow at path {self.path!r}; [y]es or [n]o?"
2769
+ )
2770
+ if confirm.strip().lower() == "y":
2771
+ self.delete_no_confirm()
2772
+
2773
+ def delete_no_confirm(self) -> None:
2774
+ """Permanently delete the workflow data with no confirmation."""
2775
+
2776
+ fs = self.fs
2777
+ assert fs is not None
2778
+
2779
+ @self._app.perm_error_retry()
2780
+ def _delete_no_confirm() -> None:
2781
+ self.logger.debug(f"_delete_no_confirm: {self.path!r}.")
2782
+ fs.rm(self.path, recursive=True)
2783
+
2784
+ return _delete_no_confirm()
2785
+
2786
+ def get_text_file(self, path: str | Path) -> str:
2787
+ """Retrieve the contents of a text file stored within the workflow.
2788
+
2789
+ Parameters
2790
+ ----------
2791
+ path
2792
+ The path to a text file stored within the workflow. This can either be an
2793
+ absolute path or a path that is relative to the workflow root.
2794
+ """
2795
+ path = Path(path)
2796
+ if not path.is_absolute():
2797
+ path = Path(self.path).joinpath(path)
2798
+ if not path.is_file():
2799
+ raise FileNotFoundError(f"File at location {path!r} does not exist.")
2800
+ return path.read_text()
2801
+
2802
+ @abstractmethod
2803
+ def _append_task_element_IDs(self, task_ID: int, elem_IDs: list[int]):
2804
+ raise NotImplementedError
2805
+
2806
+ @abstractmethod
2807
+ def _set_run_dirs(self, run_dir_arr: np.ndarray, run_idx: np.ndarray) -> None: ...
2808
+
2809
+ @abstractmethod
2810
+ def _update_iter_data_indices(
2811
+ self, iter_data_indices: dict[int, DataIndex]
2812
+ ) -> None: ...
2813
+
2814
+ @abstractmethod
2815
+ def _update_run_data_indices(
2816
+ self, run_data_indices: dict[int, DataIndex]
2817
+ ) -> None: ...