hpcflow-new2 0.2.0a189__py3-none-any.whl → 0.2.0a199__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. hpcflow/__pyinstaller/hook-hpcflow.py +9 -6
  2. hpcflow/_version.py +1 -1
  3. hpcflow/app.py +1 -0
  4. hpcflow/data/scripts/bad_script.py +2 -0
  5. hpcflow/data/scripts/do_nothing.py +2 -0
  6. hpcflow/data/scripts/env_specifier_test/input_file_generator_pass_env_spec.py +4 -0
  7. hpcflow/data/scripts/env_specifier_test/main_script_test_pass_env_spec.py +8 -0
  8. hpcflow/data/scripts/env_specifier_test/output_file_parser_pass_env_spec.py +4 -0
  9. hpcflow/data/scripts/env_specifier_test/v1/input_file_generator_basic.py +4 -0
  10. hpcflow/data/scripts/env_specifier_test/v1/main_script_test_direct_in_direct_out.py +7 -0
  11. hpcflow/data/scripts/env_specifier_test/v1/output_file_parser_basic.py +4 -0
  12. hpcflow/data/scripts/env_specifier_test/v2/main_script_test_direct_in_direct_out.py +7 -0
  13. hpcflow/data/scripts/input_file_generator_basic.py +3 -0
  14. hpcflow/data/scripts/input_file_generator_basic_FAIL.py +3 -0
  15. hpcflow/data/scripts/input_file_generator_test_stdout_stderr.py +8 -0
  16. hpcflow/data/scripts/main_script_test_direct_in.py +3 -0
  17. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2.py +6 -0
  18. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed.py +6 -0
  19. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed_group.py +7 -0
  20. hpcflow/data/scripts/main_script_test_direct_in_direct_out_3.py +6 -0
  21. hpcflow/data/scripts/main_script_test_direct_in_group_direct_out_3.py +6 -0
  22. hpcflow/data/scripts/main_script_test_direct_in_group_one_fail_direct_out_3.py +6 -0
  23. hpcflow/data/scripts/main_script_test_hdf5_in_obj.py +1 -1
  24. hpcflow/data/scripts/main_script_test_hdf5_in_obj_2.py +12 -0
  25. hpcflow/data/scripts/main_script_test_hdf5_out_obj.py +1 -1
  26. hpcflow/data/scripts/main_script_test_json_out_FAIL.py +3 -0
  27. hpcflow/data/scripts/main_script_test_shell_env_vars.py +12 -0
  28. hpcflow/data/scripts/main_script_test_std_out_std_err.py +6 -0
  29. hpcflow/data/scripts/output_file_parser_basic.py +3 -0
  30. hpcflow/data/scripts/output_file_parser_basic_FAIL.py +7 -0
  31. hpcflow/data/scripts/output_file_parser_test_stdout_stderr.py +8 -0
  32. hpcflow/data/scripts/script_exit_test.py +5 -0
  33. hpcflow/data/template_components/environments.yaml +1 -1
  34. hpcflow/sdk/__init__.py +26 -15
  35. hpcflow/sdk/app.py +2192 -768
  36. hpcflow/sdk/cli.py +506 -296
  37. hpcflow/sdk/cli_common.py +105 -7
  38. hpcflow/sdk/config/__init__.py +1 -1
  39. hpcflow/sdk/config/callbacks.py +115 -43
  40. hpcflow/sdk/config/cli.py +126 -103
  41. hpcflow/sdk/config/config.py +674 -318
  42. hpcflow/sdk/config/config_file.py +131 -95
  43. hpcflow/sdk/config/errors.py +125 -84
  44. hpcflow/sdk/config/types.py +148 -0
  45. hpcflow/sdk/core/__init__.py +25 -1
  46. hpcflow/sdk/core/actions.py +1771 -1059
  47. hpcflow/sdk/core/app_aware.py +24 -0
  48. hpcflow/sdk/core/cache.py +139 -79
  49. hpcflow/sdk/core/command_files.py +263 -287
  50. hpcflow/sdk/core/commands.py +145 -112
  51. hpcflow/sdk/core/element.py +828 -535
  52. hpcflow/sdk/core/enums.py +192 -0
  53. hpcflow/sdk/core/environment.py +74 -93
  54. hpcflow/sdk/core/errors.py +455 -52
  55. hpcflow/sdk/core/execute.py +207 -0
  56. hpcflow/sdk/core/json_like.py +540 -272
  57. hpcflow/sdk/core/loop.py +751 -347
  58. hpcflow/sdk/core/loop_cache.py +164 -47
  59. hpcflow/sdk/core/object_list.py +370 -207
  60. hpcflow/sdk/core/parameters.py +1100 -627
  61. hpcflow/sdk/core/rule.py +59 -41
  62. hpcflow/sdk/core/run_dir_files.py +21 -37
  63. hpcflow/sdk/core/skip_reason.py +7 -0
  64. hpcflow/sdk/core/task.py +1649 -1339
  65. hpcflow/sdk/core/task_schema.py +308 -196
  66. hpcflow/sdk/core/test_utils.py +191 -114
  67. hpcflow/sdk/core/types.py +440 -0
  68. hpcflow/sdk/core/utils.py +485 -309
  69. hpcflow/sdk/core/validation.py +82 -9
  70. hpcflow/sdk/core/workflow.py +2544 -1178
  71. hpcflow/sdk/core/zarr_io.py +98 -137
  72. hpcflow/sdk/data/workflow_spec_schema.yaml +2 -0
  73. hpcflow/sdk/demo/cli.py +53 -33
  74. hpcflow/sdk/helper/cli.py +18 -15
  75. hpcflow/sdk/helper/helper.py +75 -63
  76. hpcflow/sdk/helper/watcher.py +61 -28
  77. hpcflow/sdk/log.py +122 -71
  78. hpcflow/sdk/persistence/__init__.py +8 -31
  79. hpcflow/sdk/persistence/base.py +1360 -606
  80. hpcflow/sdk/persistence/defaults.py +6 -0
  81. hpcflow/sdk/persistence/discovery.py +38 -0
  82. hpcflow/sdk/persistence/json.py +568 -188
  83. hpcflow/sdk/persistence/pending.py +382 -179
  84. hpcflow/sdk/persistence/store_resource.py +39 -23
  85. hpcflow/sdk/persistence/types.py +318 -0
  86. hpcflow/sdk/persistence/utils.py +14 -11
  87. hpcflow/sdk/persistence/zarr.py +1337 -433
  88. hpcflow/sdk/runtime.py +44 -41
  89. hpcflow/sdk/submission/{jobscript_info.py → enums.py} +39 -12
  90. hpcflow/sdk/submission/jobscript.py +1651 -692
  91. hpcflow/sdk/submission/schedulers/__init__.py +167 -39
  92. hpcflow/sdk/submission/schedulers/direct.py +121 -81
  93. hpcflow/sdk/submission/schedulers/sge.py +170 -129
  94. hpcflow/sdk/submission/schedulers/slurm.py +291 -268
  95. hpcflow/sdk/submission/schedulers/utils.py +12 -2
  96. hpcflow/sdk/submission/shells/__init__.py +14 -15
  97. hpcflow/sdk/submission/shells/base.py +150 -29
  98. hpcflow/sdk/submission/shells/bash.py +283 -173
  99. hpcflow/sdk/submission/shells/os_version.py +31 -30
  100. hpcflow/sdk/submission/shells/powershell.py +228 -170
  101. hpcflow/sdk/submission/submission.py +1014 -335
  102. hpcflow/sdk/submission/types.py +140 -0
  103. hpcflow/sdk/typing.py +182 -12
  104. hpcflow/sdk/utils/arrays.py +71 -0
  105. hpcflow/sdk/utils/deferred_file.py +55 -0
  106. hpcflow/sdk/utils/hashing.py +16 -0
  107. hpcflow/sdk/utils/patches.py +12 -0
  108. hpcflow/sdk/utils/strings.py +33 -0
  109. hpcflow/tests/api/test_api.py +32 -0
  110. hpcflow/tests/conftest.py +27 -6
  111. hpcflow/tests/data/multi_path_sequences.yaml +29 -0
  112. hpcflow/tests/data/workflow_test_run_abort.yaml +34 -35
  113. hpcflow/tests/schedulers/sge/test_sge_submission.py +36 -0
  114. hpcflow/tests/schedulers/slurm/test_slurm_submission.py +5 -2
  115. hpcflow/tests/scripts/test_input_file_generators.py +282 -0
  116. hpcflow/tests/scripts/test_main_scripts.py +866 -85
  117. hpcflow/tests/scripts/test_non_snippet_script.py +46 -0
  118. hpcflow/tests/scripts/test_ouput_file_parsers.py +353 -0
  119. hpcflow/tests/shells/wsl/test_wsl_submission.py +12 -4
  120. hpcflow/tests/unit/test_action.py +262 -75
  121. hpcflow/tests/unit/test_action_rule.py +9 -4
  122. hpcflow/tests/unit/test_app.py +33 -6
  123. hpcflow/tests/unit/test_cache.py +46 -0
  124. hpcflow/tests/unit/test_cli.py +134 -1
  125. hpcflow/tests/unit/test_command.py +71 -54
  126. hpcflow/tests/unit/test_config.py +142 -16
  127. hpcflow/tests/unit/test_config_file.py +21 -18
  128. hpcflow/tests/unit/test_element.py +58 -62
  129. hpcflow/tests/unit/test_element_iteration.py +50 -1
  130. hpcflow/tests/unit/test_element_set.py +29 -19
  131. hpcflow/tests/unit/test_group.py +4 -2
  132. hpcflow/tests/unit/test_input_source.py +116 -93
  133. hpcflow/tests/unit/test_input_value.py +29 -24
  134. hpcflow/tests/unit/test_jobscript_unit.py +757 -0
  135. hpcflow/tests/unit/test_json_like.py +44 -35
  136. hpcflow/tests/unit/test_loop.py +1396 -84
  137. hpcflow/tests/unit/test_meta_task.py +325 -0
  138. hpcflow/tests/unit/test_multi_path_sequences.py +229 -0
  139. hpcflow/tests/unit/test_object_list.py +17 -12
  140. hpcflow/tests/unit/test_parameter.py +29 -7
  141. hpcflow/tests/unit/test_persistence.py +237 -42
  142. hpcflow/tests/unit/test_resources.py +20 -18
  143. hpcflow/tests/unit/test_run.py +117 -6
  144. hpcflow/tests/unit/test_run_directories.py +29 -0
  145. hpcflow/tests/unit/test_runtime.py +2 -1
  146. hpcflow/tests/unit/test_schema_input.py +23 -15
  147. hpcflow/tests/unit/test_shell.py +23 -2
  148. hpcflow/tests/unit/test_slurm.py +8 -7
  149. hpcflow/tests/unit/test_submission.py +38 -89
  150. hpcflow/tests/unit/test_task.py +352 -247
  151. hpcflow/tests/unit/test_task_schema.py +33 -20
  152. hpcflow/tests/unit/test_utils.py +9 -11
  153. hpcflow/tests/unit/test_value_sequence.py +15 -12
  154. hpcflow/tests/unit/test_workflow.py +114 -83
  155. hpcflow/tests/unit/test_workflow_template.py +0 -1
  156. hpcflow/tests/unit/utils/test_arrays.py +40 -0
  157. hpcflow/tests/unit/utils/test_deferred_file_writer.py +34 -0
  158. hpcflow/tests/unit/utils/test_hashing.py +65 -0
  159. hpcflow/tests/unit/utils/test_patches.py +5 -0
  160. hpcflow/tests/unit/utils/test_redirect_std.py +50 -0
  161. hpcflow/tests/workflows/__init__.py +0 -0
  162. hpcflow/tests/workflows/test_directory_structure.py +31 -0
  163. hpcflow/tests/workflows/test_jobscript.py +334 -1
  164. hpcflow/tests/workflows/test_run_status.py +198 -0
  165. hpcflow/tests/workflows/test_skip_downstream.py +696 -0
  166. hpcflow/tests/workflows/test_submission.py +140 -0
  167. hpcflow/tests/workflows/test_workflows.py +160 -15
  168. hpcflow/tests/workflows/test_zip.py +18 -0
  169. hpcflow/viz_demo.ipynb +6587 -3
  170. {hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/METADATA +8 -4
  171. hpcflow_new2-0.2.0a199.dist-info/RECORD +221 -0
  172. hpcflow/sdk/core/parallel.py +0 -21
  173. hpcflow_new2-0.2.0a189.dist-info/RECORD +0 -158
  174. {hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/LICENSE +0 -0
  175. {hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/WHEEL +0 -0
  176. {hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/entry_points.txt +0 -0
@@ -3,38 +3,88 @@ Base persistence models.
3
3
 
4
4
  Store* classes represent the element-metadata in the store, in a store-agnostic way.
5
5
  """
6
- from __future__ import annotations
7
- from abc import ABC
8
6
 
7
+ from __future__ import annotations
8
+ from abc import ABC, abstractmethod
9
9
  import contextlib
10
10
  import copy
11
11
  from dataclasses import dataclass, field
12
- from datetime import datetime, timezone
13
12
  import enum
13
+ from logging import Logger
14
+ from functools import wraps
14
15
  import os
15
16
  from pathlib import Path
16
- import re
17
17
  import shutil
18
18
  import socket
19
19
  import time
20
- from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, TypeVar, Union
20
+ from typing import Generic, TypeVar, cast, overload, TYPE_CHECKING
21
+
22
+ import numpy as np
21
23
 
22
24
  from hpcflow.sdk.core.utils import (
23
25
  flatten,
24
26
  get_in_container,
25
27
  get_relative_path,
28
+ remap,
26
29
  reshape,
27
30
  set_in_container,
28
- JSONLikeDirSnapShot,
31
+ normalise_timestamp,
32
+ parse_timestamp,
33
+ current_timestamp,
29
34
  )
35
+ from hpcflow.sdk.core.errors import ParametersMetadataReadOnlyError
36
+ from hpcflow.sdk.submission.submission import (
37
+ JOBSCRIPT_SUBMIT_TIME_KEYS,
38
+ SUBMISSION_SUBMIT_TIME_KEYS,
39
+ )
40
+ from hpcflow.sdk.utils.strings import shorten_list_str
30
41
  from hpcflow.sdk.log import TimeIt
42
+ from hpcflow.sdk.typing import hydrate
31
43
  from hpcflow.sdk.persistence.pending import PendingChanges
44
+ from hpcflow.sdk.persistence.types import (
45
+ AnySTask,
46
+ AnySElement,
47
+ AnySElementIter,
48
+ AnySEAR,
49
+ AnySParameter,
50
+ )
32
51
 
33
- AnySTask = TypeVar("AnySTask", bound="StoreTask")
34
- AnySElement = TypeVar("AnySElement", bound="StoreElement")
35
- AnySElementIter = TypeVar("AnySElementIter", bound="StoreElementIter")
36
- AnySEAR = TypeVar("AnySEAR", bound="StoreEAR")
37
- AnySParameter = TypeVar("AnySParameter", bound="StoreParameter")
52
+ if TYPE_CHECKING:
53
+ from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence
54
+ from contextlib import AbstractContextManager
55
+ from datetime import datetime
56
+ from typing import Any, ClassVar, Final, Literal
57
+ from typing_extensions import Self, TypeIs
58
+ from fsspec import AbstractFileSystem # type: ignore
59
+ from numpy.typing import NDArray
60
+ from .pending import CommitResourceMap
61
+ from .store_resource import StoreResource
62
+ from .types import (
63
+ EncodedStoreParameter,
64
+ File,
65
+ FileDescriptor,
66
+ LoopDescriptor,
67
+ Metadata,
68
+ ParameterTypes,
69
+ PersistenceCache,
70
+ StoreCreationInfo,
71
+ TemplateMeta,
72
+ TypeLookup,
73
+ IterableParam,
74
+ )
75
+ from .zarr import ZarrAttrsDict
76
+ from ..app import BaseApp
77
+ from ..typing import DataIndex, PathLike, ParamSource
78
+ from ..core.json_like import JSONed, JSONDocument
79
+ from ..core.parameters import ParameterValue
80
+ from ..core.workflow import Workflow
81
+ from ..submission.types import VersionInfo, ResolvedJobscriptBlockDependencies
82
+
83
+ T = TypeVar("T")
84
+ #: Type of the serialized form.
85
+ SerFormT = TypeVar("SerFormT")
86
+ #: Type of the encoding and decoding context.
87
+ ContextT = TypeVar("ContextT")
38
88
 
39
89
  PRIMITIVES = (
40
90
  int,
@@ -50,14 +100,36 @@ TEMPLATE_COMP_TYPES = (
50
100
  "task_schemas",
51
101
  )
52
102
 
53
- PARAM_DATA_NOT_SET = 0
103
+ PARAM_DATA_NOT_SET: Final[int] = 0
54
104
 
55
105
 
56
- def update_param_source_dict(source, update):
106
+ def update_param_source_dict(source: ParamSource, update: ParamSource) -> ParamSource:
57
107
  """
58
108
  Combine two dicts into a new dict that is ordered on its keys.
59
109
  """
60
- return dict(sorted({**source, **update}.items()))
110
+ return cast("ParamSource", dict(sorted({**source, **update}.items())))
111
+
112
+
113
+ def writes_parameter_data(func: Callable):
114
+ """Decorator function that should wrap `PersistentStore` methods that write
115
+ parameter-associated data.
116
+
117
+ Notes
118
+ -----
119
+ This decorator checks that the parameters-metadata cache is not in use, which should
120
+ not be used during writing of parameter-associated data.
121
+ """
122
+
123
+ @wraps(func)
124
+ def inner(self, *args, **kwargs):
125
+ if self._use_parameters_metadata_cache:
126
+ raise ParametersMetadataReadOnlyError(
127
+ "Cannot use the `parameters_metadata_cache` when writing parameter-"
128
+ "associated data!"
129
+ )
130
+ return func(self, *args, **kwargs)
131
+
132
+ return inner
61
133
 
62
134
 
63
135
  @dataclass
@@ -102,7 +174,7 @@ class PersistentStoreFeatures:
102
174
 
103
175
 
104
176
  @dataclass
105
- class StoreTask:
177
+ class StoreTask(Generic[SerFormT]):
106
178
  """
107
179
  Represents a task in a persistent store.
108
180
 
@@ -120,6 +192,12 @@ class StoreTask:
120
192
  Description of the template for the task.
121
193
  """
122
194
 
195
+ # This would be in the docstring except it renders really wrongly!
196
+ # Type Parameters
197
+ # ---------------
198
+ # SerFormT
199
+ # Type of the serialized form.
200
+
123
201
  #: The ID of the task.
124
202
  id_: int
125
203
  #: The index of the task within its workflow.
@@ -127,41 +205,38 @@ class StoreTask:
127
205
  #: Whether the task has changes not yet persisted.
128
206
  is_pending: bool
129
207
  #: The IDs of elements in the task.
130
- element_IDs: List[int]
208
+ element_IDs: list[int]
131
209
  #: Description of the template for the task.
132
- task_template: Optional[Dict] = None
210
+ task_template: Mapping[str, Any] | None = None
133
211
 
134
- def encode(self) -> Tuple[int, Dict, Dict]:
212
+ @abstractmethod
213
+ def encode(self) -> tuple[int, SerFormT, dict[str, Any]]:
135
214
  """Prepare store task data for the persistent store."""
136
- wk_task = {"id_": self.id_, "element_IDs": self.element_IDs}
137
- task = {"id_": self.id_, **self.task_template}
138
- return self.index, wk_task, task
139
215
 
140
216
  @classmethod
141
- def decode(cls, task_dat: Dict) -> StoreTask:
217
+ @abstractmethod
218
+ def decode(cls, task_dat: SerFormT) -> Self:
142
219
  """Initialise a `StoreTask` from store task data
143
220
 
144
221
  Note: the `task_template` is only needed for encoding because it is retrieved as
145
222
  part of the `WorkflowTemplate` so we don't need to load it when decoding.
146
223
 
147
224
  """
148
- return cls(is_pending=False, **task_dat)
149
225
 
150
226
  @TimeIt.decorator
151
- def append_element_IDs(self: AnySTask, pend_IDs: List[int]) -> AnySTask:
227
+ def append_element_IDs(self, pend_IDs: list[int]) -> Self:
152
228
  """Return a copy, with additional element IDs."""
153
- elem_IDs = self.element_IDs[:] + pend_IDs
154
229
  return self.__class__(
155
230
  id_=self.id_,
156
231
  index=self.index,
157
232
  is_pending=self.is_pending,
158
- element_IDs=elem_IDs,
233
+ element_IDs=[*self.element_IDs, *pend_IDs],
159
234
  task_template=self.task_template,
160
235
  )
161
236
 
162
237
 
163
238
  @dataclass
164
- class StoreElement:
239
+ class StoreElement(Generic[SerFormT, ContextT]):
165
240
  """
166
241
  Represents an element in a persistent store.
167
242
 
@@ -185,6 +260,14 @@ class StoreElement:
185
260
  IDs of element-iterations that belong to this element.
186
261
  """
187
262
 
263
+ # These would be in the docstring except they render really wrongly!
264
+ # Type Parameters
265
+ # ---------------
266
+ # SerFormT
267
+ # Type of the serialized form.
268
+ # ContextT
269
+ # Type of the encoding and decoding context.
270
+
188
271
  #: The ID of the element.
189
272
  id_: int
190
273
  #: Whether the element has changes not yet persisted.
@@ -194,26 +277,24 @@ class StoreElement:
194
277
  #: Index of the element set containing this element.
195
278
  es_idx: int
196
279
  #: Value sequence index map.
197
- seq_idx: Dict[str, int]
280
+ seq_idx: dict[str, int]
198
281
  #: Data source index map.
199
- src_idx: Dict[str, int]
282
+ src_idx: dict[str, int]
200
283
  #: ID of the task that contains this element.
201
284
  task_ID: int
202
285
  #: IDs of element-iterations that belong to this element.
203
- iteration_IDs: List[int]
286
+ iteration_IDs: list[int]
204
287
 
205
- def encode(self) -> Dict:
288
+ @abstractmethod
289
+ def encode(self, context: ContextT) -> SerFormT:
206
290
  """Prepare store element data for the persistent store."""
207
- dct = self.__dict__
208
- del dct["is_pending"]
209
- return dct
210
291
 
211
292
  @classmethod
212
- def decode(cls, elem_dat: Dict) -> StoreElement:
293
+ @abstractmethod
294
+ def decode(cls, elem_dat: SerFormT, context: ContextT) -> Self:
213
295
  """Initialise a `StoreElement` from store element data"""
214
- return cls(is_pending=False, **elem_dat)
215
296
 
216
- def to_dict(self, iters):
297
+ def to_dict(self, iters) -> dict[str, Any]:
217
298
  """Prepare data for the user-facing `Element` object."""
218
299
  return {
219
300
  "id_": self.id_,
@@ -228,9 +309,9 @@ class StoreElement:
228
309
  }
229
310
 
230
311
  @TimeIt.decorator
231
- def append_iteration_IDs(self: AnySElement, pend_IDs: List[int]) -> AnySElement:
312
+ def append_iteration_IDs(self, pend_IDs: Iterable[int]) -> Self:
232
313
  """Return a copy, with additional iteration IDs."""
233
- iter_IDs = self.iteration_IDs[:] + pend_IDs
314
+ iter_IDs = [*self.iteration_IDs, *pend_IDs]
234
315
  return self.__class__(
235
316
  id_=self.id_,
236
317
  is_pending=self.is_pending,
@@ -244,7 +325,7 @@ class StoreElement:
244
325
 
245
326
 
246
327
  @dataclass
247
- class StoreElementIter:
328
+ class StoreElementIter(Generic[SerFormT, ContextT]):
248
329
  """
249
330
  Represents an element iteration in a persistent store.
250
331
 
@@ -269,6 +350,14 @@ class StoreElementIter:
269
350
  What loops are being handled here and where they're up to.
270
351
  """
271
352
 
353
+ # These would be in the docstring except they render really wrongly!
354
+ # Type Parameters
355
+ # ---------------
356
+ # SerFormT
357
+ # Type of the serialized form.
358
+ # ContextT
359
+ # Type of the encoding and decoding context.
360
+
272
361
  #: The ID of this element iteration.
273
362
  id_: int
274
363
  #: Whether the element iteration has changes not yet persisted.
@@ -278,34 +367,25 @@ class StoreElementIter:
278
367
  #: Whether EARs have been initialised for this element iteration.
279
368
  EARs_initialised: bool
280
369
  #: Maps task schema action indices to EARs by ID.
281
- EAR_IDs: Dict[int, List[int]]
370
+ EAR_IDs: dict[int, list[int]] | None
282
371
  #: Overall data index for the element-iteration, which maps parameter names to
283
372
  #: parameter data indices.
284
- data_idx: Dict[str, int]
373
+ data_idx: DataIndex
285
374
  #: List of parameters defined by the associated task schema.
286
- schema_parameters: List[str]
375
+ schema_parameters: list[str]
287
376
  #: What loops are being handled here and where they're up to.
288
- loop_idx: Dict[str, int] = field(default_factory=dict)
377
+ loop_idx: Mapping[str, int] = field(default_factory=dict)
289
378
 
290
- def encode(self) -> Dict:
379
+ @abstractmethod
380
+ def encode(self, context: ContextT) -> SerFormT:
291
381
  """Prepare store element iteration data for the persistent store."""
292
- dct = self.__dict__
293
- del dct["is_pending"]
294
- return dct
295
382
 
296
383
  @classmethod
297
- def decode(cls, iter_dat: Dict) -> StoreElementIter:
384
+ @abstractmethod
385
+ def decode(cls, iter_dat: SerFormT, context: ContextT) -> Self:
298
386
  """Initialise a `StoreElementIter` from persistent store element iteration data"""
299
387
 
300
- iter_dat = copy.deepcopy(iter_dat) # to avoid mutating; can we avoid this?
301
-
302
- # cast JSON string keys to integers:
303
- for act_idx in list((iter_dat["EAR_IDs"] or {}).keys()):
304
- iter_dat["EAR_IDs"][int(act_idx)] = iter_dat["EAR_IDs"].pop(act_idx)
305
-
306
- return cls(is_pending=False, **iter_dat)
307
-
308
- def to_dict(self, EARs):
388
+ def to_dict(self, EARs: dict[int, dict[str, Any]] | None) -> dict[str, Any]:
309
389
  """Prepare data for the user-facing `ElementIteration` object."""
310
390
  return {
311
391
  "id_": self.id_,
@@ -316,20 +396,16 @@ class StoreElementIter:
316
396
  "schema_parameters": self.schema_parameters,
317
397
  "EARs": EARs,
318
398
  "EARs_initialised": self.EARs_initialised,
319
- "loop_idx": self.loop_idx,
399
+ "loop_idx": dict(self.loop_idx),
320
400
  }
321
401
 
322
402
  @TimeIt.decorator
323
- def append_EAR_IDs(
324
- self: AnySElementIter, pend_IDs: Dict[int, List[int]]
325
- ) -> AnySElementIter:
403
+ def append_EAR_IDs(self, pend_IDs: Mapping[int, Sequence[int]]) -> Self:
326
404
  """Return a copy, with additional EAR IDs."""
327
405
 
328
406
  EAR_IDs = copy.deepcopy(self.EAR_IDs) or {}
329
407
  for act_idx, IDs_i in pend_IDs.items():
330
- if act_idx not in EAR_IDs:
331
- EAR_IDs[act_idx] = []
332
- EAR_IDs[act_idx].extend(IDs_i)
408
+ EAR_IDs.setdefault(act_idx, []).extend(IDs_i)
333
409
 
334
410
  return self.__class__(
335
411
  id_=self.id_,
@@ -343,11 +419,9 @@ class StoreElementIter:
343
419
  )
344
420
 
345
421
  @TimeIt.decorator
346
- def update_loop_idx(
347
- self: AnySElementIter, loop_idx: Dict[str, int]
348
- ) -> AnySElementIter:
422
+ def update_loop_idx(self, loop_idx: Mapping[str, int]) -> Self:
349
423
  """Return a copy, with the loop index updated."""
350
- loop_idx_new = copy.deepcopy(self.loop_idx)
424
+ loop_idx_new = dict(self.loop_idx)
351
425
  loop_idx_new.update(loop_idx)
352
426
  return self.__class__(
353
427
  id_=self.id_,
@@ -361,7 +435,7 @@ class StoreElementIter:
361
435
  )
362
436
 
363
437
  @TimeIt.decorator
364
- def set_EARs_initialised(self: AnySElementIter) -> AnySElementIter:
438
+ def set_EARs_initialised(self) -> Self:
365
439
  """Return a copy with `EARs_initialised` set to `True`."""
366
440
  return self.__class__(
367
441
  id_=self.id_,
@@ -374,9 +448,29 @@ class StoreElementIter:
374
448
  EARs_initialised=True,
375
449
  )
376
450
 
451
+ @TimeIt.decorator
452
+ def update_data_idx(self: AnySElementIter, data_idx: DataIndex) -> AnySElementIter:
453
+ """Return a copy with an updated `data_idx`.
454
+
455
+ The existing data index is updated, not overwritten.
456
+
457
+ """
458
+ new_data_idx = copy.deepcopy(self.data_idx)
459
+ new_data_idx.update(data_idx)
460
+ return self.__class__(
461
+ id_=self.id_,
462
+ is_pending=self.is_pending,
463
+ element_ID=self.element_ID,
464
+ EAR_IDs=self.EAR_IDs,
465
+ data_idx=new_data_idx,
466
+ schema_parameters=self.schema_parameters,
467
+ loop_idx=self.loop_idx,
468
+ EARs_initialised=self.EARs_initialised,
469
+ )
470
+
377
471
 
378
472
  @dataclass
379
- class StoreEAR:
473
+ class StoreEAR(Generic[SerFormT, ContextT]):
380
474
  """
381
475
  Represents an element action run in a persistent store.
382
476
 
@@ -416,6 +510,14 @@ class StoreEAR:
416
510
  Where this EAR was submitted to run, if known.
417
511
  """
418
512
 
513
+ # These would be in the docstring except they render really wrongly!
514
+ # Type Parameters
515
+ # ---------------
516
+ # SerFormT
517
+ # Type of the serialized form.
518
+ # ContextT
519
+ # Type of the encoding and decoding context.
520
+
419
521
  #: The ID of this element action run.
420
522
  id_: int
421
523
  #: Whether the element action run has changes not yet persisted.
@@ -425,74 +527,57 @@ class StoreEAR:
425
527
  #: The task schema action associated with this EAR.
426
528
  action_idx: int
427
529
  #: The indices of the commands in the EAR.
428
- commands_idx: List[int]
530
+ commands_idx: list[int]
429
531
  #: Maps parameter names within this EAR to parameter data indices.
430
- data_idx: Dict[str, int]
532
+ data_idx: DataIndex
431
533
  #: Which submission contained this EAR, if known.
432
- submission_idx: Optional[int] = None
534
+ submission_idx: int | None = None
535
+ #: Run ID whose commands can be used for this run (may be this run's ID).
536
+ commands_file_ID: int | None = None
433
537
  #: Whether to skip this EAR.
434
- skip: Optional[bool] = False
538
+ skip: int = 0
435
539
  #: Whether this EAR was successful, if known.
436
- success: Optional[bool] = None
540
+ success: bool | None = None
437
541
  #: When this EAR started, if known.
438
- start_time: Optional[datetime] = None
542
+ start_time: datetime | None = None
439
543
  #: When this EAR finished, if known.
440
- end_time: Optional[datetime] = None
544
+ end_time: datetime | None = None
441
545
  #: Snapshot of files at EAR start, if recorded.
442
- snapshot_start: Optional[Dict] = None
546
+ snapshot_start: dict[str, Any] | None = None
443
547
  #: Snapshot of files at EAR end, if recorded.
444
- snapshot_end: Optional[Dict] = None
548
+ snapshot_end: dict[str, Any] | None = None
445
549
  #: The exit code of the underlying executable, if known.
446
- exit_code: Optional[int] = None
550
+ exit_code: int | None = None
447
551
  #: Metadata concerning e.g. the state of the EAR.
448
- metadata: Dict[str, Any] = None
552
+ metadata: Metadata | None = None
449
553
  #: Where this EAR was submitted to run, if known.
450
- run_hostname: Optional[str] = None
554
+ run_hostname: str | None = None
555
+ port_number: int | None = None
451
556
 
452
557
  @staticmethod
453
- def _encode_datetime(dt: Union[datetime, None], ts_fmt: str) -> str:
558
+ def _encode_datetime(dt: datetime | None, ts_fmt: str) -> str | None:
454
559
  return dt.strftime(ts_fmt) if dt else None
455
560
 
456
561
  @staticmethod
457
- def _decode_datetime(dt_str: Union[str, None], ts_fmt: str) -> datetime:
458
- return datetime.strptime(dt_str, ts_fmt) if dt_str else None
562
+ def _decode_datetime(dt_str: str | None, ts_fmt: str) -> datetime | None:
563
+ return parse_timestamp(dt_str, ts_fmt) if dt_str else None
459
564
 
460
- def encode(self, ts_fmt: str) -> Dict:
565
+ @abstractmethod
566
+ def encode(self, ts_fmt: str, context: ContextT) -> SerFormT:
461
567
  """Prepare store EAR data for the persistent store."""
462
- return {
463
- "id_": self.id_,
464
- "elem_iter_ID": self.elem_iter_ID,
465
- "action_idx": self.action_idx,
466
- "commands_idx": self.commands_idx,
467
- "data_idx": self.data_idx,
468
- "submission_idx": self.submission_idx,
469
- "success": self.success,
470
- "skip": self.skip,
471
- "start_time": self._encode_datetime(self.start_time, ts_fmt),
472
- "end_time": self._encode_datetime(self.end_time, ts_fmt),
473
- "snapshot_start": self.snapshot_start,
474
- "snapshot_end": self.snapshot_end,
475
- "exit_code": self.exit_code,
476
- "metadata": self.metadata,
477
- "run_hostname": self.run_hostname,
478
- }
479
568
 
480
569
  @classmethod
481
- def decode(cls, EAR_dat: Dict, ts_fmt: str) -> StoreEAR:
570
+ @abstractmethod
571
+ def decode(cls, EAR_dat: SerFormT, ts_fmt: str, context: ContextT) -> Self:
482
572
  """Initialise a `StoreEAR` from persistent store EAR data"""
483
- # don't want to mutate EAR_dat:
484
- EAR_dat = copy.deepcopy(EAR_dat)
485
- EAR_dat["start_time"] = cls._decode_datetime(EAR_dat["start_time"], ts_fmt)
486
- EAR_dat["end_time"] = cls._decode_datetime(EAR_dat["end_time"], ts_fmt)
487
- return cls(is_pending=False, **EAR_dat)
488
573
 
489
- def to_dict(self) -> Dict:
574
+ def to_dict(self) -> dict[str, Any]:
490
575
  """Prepare data for the user-facing `ElementActionRun` object."""
491
576
 
492
- def _process_datetime(dt: datetime) -> datetime:
577
+ def _process_datetime(dt: datetime | None) -> datetime | None:
493
578
  """We store datetime objects implicitly in UTC, so we need to first make
494
579
  that explicit, and then convert to the local time zone."""
495
- return dt.replace(tzinfo=timezone.utc).astimezone() if dt else None
580
+ return normalise_timestamp(dt) if dt else None
496
581
 
497
582
  return {
498
583
  "id_": self.id_,
@@ -502,6 +587,7 @@ class StoreEAR:
502
587
  "commands_idx": self.commands_idx,
503
588
  "data_idx": self.data_idx,
504
589
  "submission_idx": self.submission_idx,
590
+ "commands_file_ID": self.commands_file_ID,
505
591
  "success": self.success,
506
592
  "skip": self.skip,
507
593
  "start_time": _process_datetime(self.start_time),
@@ -511,21 +597,25 @@ class StoreEAR:
511
597
  "exit_code": self.exit_code,
512
598
  "metadata": self.metadata,
513
599
  "run_hostname": self.run_hostname,
600
+ "port_number": self.port_number,
514
601
  }
515
602
 
516
603
  @TimeIt.decorator
517
604
  def update(
518
605
  self,
519
- submission_idx: Optional[int] = None,
520
- skip: Optional[bool] = None,
521
- success: Optional[bool] = None,
522
- start_time: Optional[datetime] = None,
523
- end_time: Optional[datetime] = None,
524
- snapshot_start: Optional[Dict] = None,
525
- snapshot_end: Optional[Dict] = None,
526
- exit_code: Optional[int] = None,
527
- run_hostname: Optional[str] = None,
528
- ) -> AnySEAR:
606
+ submission_idx: int | None = None,
607
+ commands_file_ID: int | None = None,
608
+ skip: int | None = None,
609
+ success: bool | None = None,
610
+ start_time: datetime | None = None,
611
+ end_time: datetime | None = None,
612
+ snapshot_start: dict[str, Any] | None = None,
613
+ snapshot_end: dict[str, Any] | None = None,
614
+ exit_code: int | None = None,
615
+ run_hostname: str | None = None,
616
+ port_number: int | None = None,
617
+ data_idx: DataIndex | None = None,
618
+ ) -> Self:
529
619
  """Return a shallow copy, with specified data updated."""
530
620
 
531
621
  sub_idx = submission_idx if submission_idx is not None else self.submission_idx
@@ -537,6 +627,16 @@ class StoreEAR:
537
627
  snap_e = snapshot_end if snapshot_end is not None else self.snapshot_end
538
628
  exit_code = exit_code if exit_code is not None else self.exit_code
539
629
  run_hn = run_hostname if run_hostname is not None else self.run_hostname
630
+ port_num = port_number if port_number is not None else self.port_number
631
+ cmd_file = (
632
+ commands_file_ID if commands_file_ID is not None else self.commands_file_ID
633
+ )
634
+ if data_idx is not None:
635
+ new_data_idx = copy.deepcopy(self.data_idx)
636
+ new_data_idx.update(data_idx)
637
+ data_idx = new_data_idx
638
+ else:
639
+ data_idx = self.data_idx
540
640
 
541
641
  return self.__class__(
542
642
  id_=self.id_,
@@ -544,9 +644,10 @@ class StoreEAR:
544
644
  elem_iter_ID=self.elem_iter_ID,
545
645
  action_idx=self.action_idx,
546
646
  commands_idx=self.commands_idx,
547
- data_idx=self.data_idx,
647
+ data_idx=data_idx,
548
648
  metadata=self.metadata,
549
649
  submission_idx=sub_idx,
650
+ commands_file_ID=cmd_file,
550
651
  skip=skip,
551
652
  success=success,
552
653
  start_time=start_time,
@@ -555,10 +656,12 @@ class StoreEAR:
555
656
  snapshot_end=snap_e,
556
657
  exit_code=exit_code,
557
658
  run_hostname=run_hn,
659
+ port_number=port_num,
558
660
  )
559
661
 
560
662
 
561
663
  @dataclass
664
+ @hydrate
562
665
  class StoreParameter:
563
666
  """
564
667
  Represents a parameter in a persistent store.
@@ -586,47 +689,60 @@ class StoreParameter:
586
689
  #: Whether the parameter is set.
587
690
  is_set: bool
588
691
  #: Description of the value of the parameter.
589
- data: Any
692
+ data: ParameterTypes
590
693
  #: Description of the file this parameter represents.
591
- file: Dict
694
+ file: File | None
592
695
  #: Description of where this parameter originated.
593
- source: Dict
696
+ source: ParamSource
594
697
 
595
- _encoders = {}
596
- _decoders = {}
698
+ _encoders: ClassVar[dict[type, Callable]] = {}
699
+ _decoders: ClassVar[dict[str, Callable]] = {}
700
+ _MAX_DEPTH: ClassVar[int] = 50
597
701
 
598
- def encode(self, **kwargs) -> Dict:
702
+ def encode(self, **kwargs) -> dict[str, Any] | int:
599
703
  """Prepare store parameter data for the persistent store."""
600
704
  if self.is_set:
601
705
  if self.file:
602
706
  return {"file": self.file}
603
707
  else:
604
- return self._encode(obj=self.data, **kwargs)
708
+ return cast("dict", self._encode(obj=self.data, **kwargs))
605
709
  else:
606
710
  return PARAM_DATA_NOT_SET
607
711
 
712
+ @staticmethod
713
+ def __is_ParameterValue(value) -> TypeIs[ParameterValue]:
714
+ # avoid circular import of `ParameterValue` until needed...
715
+ from ..core.parameters import ParameterValue as PV
716
+
717
+ return isinstance(value, PV)
718
+
719
+ def _init_type_lookup(self) -> TypeLookup:
720
+ return cast(
721
+ "TypeLookup",
722
+ {
723
+ "tuples": [],
724
+ "sets": [],
725
+ **{k: [] for k in self._decoders},
726
+ },
727
+ )
728
+
608
729
  def _encode(
609
730
  self,
610
- obj: Any,
611
- path: Optional[List] = None,
612
- type_lookup: Optional[Dict] = None,
731
+ obj: ParameterTypes,
732
+ path: list[int] | None = None,
733
+ type_lookup: TypeLookup | None = None,
613
734
  **kwargs,
614
- ) -> Dict:
735
+ ) -> EncodedStoreParameter:
615
736
  """Recursive encoder."""
616
737
 
617
738
  path = path or []
618
739
  if type_lookup is None:
619
- type_lookup = {
620
- "tuples": [],
621
- "sets": [],
622
- **{k: [] for k in self._decoders.keys()},
623
- }
740
+ type_lookup = self._init_type_lookup()
624
741
 
625
- if len(path) > 50:
742
+ if len(path) > self._MAX_DEPTH:
626
743
  raise RuntimeError("I'm in too deep!")
627
744
 
628
- if any("ParameterValue" in i.__name__ for i in obj.__class__.__mro__):
629
- # TODO: not nice; did this to avoid circular import of `ParameterValue`
745
+ if self.__is_ParameterValue(obj):
630
746
  encoded = self._encode(
631
747
  obj=obj.to_dict(),
632
748
  path=path,
@@ -640,11 +756,12 @@ class StoreParameter:
640
756
  for idx, item in enumerate(obj):
641
757
  encoded = self._encode(
642
758
  obj=item,
643
- path=path + [idx],
759
+ path=[*path, idx],
644
760
  type_lookup=type_lookup,
645
761
  **kwargs,
646
762
  )
647
763
  item, type_lookup = encoded["data"], encoded["type_lookup"]
764
+ assert type_lookup is not None
648
765
  data.append(item)
649
766
 
650
767
  if isinstance(obj, tuple):
@@ -654,21 +771,24 @@ class StoreParameter:
654
771
  type_lookup["sets"].append(path)
655
772
 
656
773
  elif isinstance(obj, dict):
774
+ assert type_lookup is not None
657
775
  data = {}
658
776
  for dct_key, dct_val in obj.items():
659
777
  encoded = self._encode(
660
778
  obj=dct_val,
661
- path=path + [dct_key],
779
+ path=[*path, dct_key],
662
780
  type_lookup=type_lookup,
663
781
  **kwargs,
664
782
  )
665
783
  dct_val, type_lookup = encoded["data"], encoded["type_lookup"]
784
+ assert type_lookup is not None
666
785
  data[dct_key] = dct_val
667
786
 
668
787
  elif isinstance(obj, PRIMITIVES):
669
788
  data = obj
670
789
 
671
790
  elif type(obj) in self._encoders:
791
+ assert type_lookup is not None
672
792
  data = self._encoders[type(obj)](
673
793
  obj=obj,
674
794
  path=path,
@@ -691,22 +811,23 @@ class StoreParameter:
691
811
  def decode(
692
812
  cls,
693
813
  id_: int,
694
- data: Union[None, Dict],
695
- source: Dict,
696
- path: Optional[List[str]] = None,
814
+ data: dict[str, Any] | Literal[0] | None,
815
+ source: ParamSource,
816
+ *,
817
+ path: list[str] | None = None,
697
818
  **kwargs,
698
- ) -> Any:
819
+ ) -> Self:
699
820
  """Initialise from persistent store parameter data."""
700
821
  if data and "file" in data:
701
822
  return cls(
702
823
  id_=id_,
703
824
  data=None,
704
- file=data["file"],
825
+ file=cast("File", data["file"]),
705
826
  is_set=True,
706
827
  source=source,
707
828
  is_pending=False,
708
829
  )
709
- elif data == PARAM_DATA_NOT_SET:
830
+ elif not isinstance(data, dict):
710
831
  # parameter is not set
711
832
  return cls(
712
833
  id_=id_,
@@ -717,11 +838,12 @@ class StoreParameter:
717
838
  is_pending=False,
718
839
  )
719
840
 
841
+ data_ = cast("EncodedStoreParameter", data)
720
842
  path = path or []
721
843
 
722
- obj = get_in_container(data["data"], path)
844
+ obj = get_in_container(data_["data"], path)
723
845
 
724
- for tuple_path in data["type_lookup"]["tuples"]:
846
+ for tuple_path in data_["type_lookup"]["tuples"]:
725
847
  try:
726
848
  rel_path = get_relative_path(tuple_path, path)
727
849
  except ValueError:
@@ -731,7 +853,7 @@ class StoreParameter:
731
853
  else:
732
854
  obj = tuple(obj)
733
855
 
734
- for set_path in data["type_lookup"]["sets"]:
856
+ for set_path in data_["type_lookup"]["sets"]:
735
857
  try:
736
858
  rel_path = get_relative_path(set_path, path)
737
859
  except ValueError:
@@ -744,7 +866,7 @@ class StoreParameter:
744
866
  for data_type in cls._decoders:
745
867
  obj = cls._decoders[data_type](
746
868
  obj=obj,
747
- type_lookup=data["type_lookup"],
869
+ type_lookup=data_["type_lookup"],
748
870
  path=path,
749
871
  **kwargs,
750
872
  )
@@ -758,7 +880,7 @@ class StoreParameter:
758
880
  is_pending=False,
759
881
  )
760
882
 
761
- def set_data(self, value: Any) -> None:
883
+ def set_data(self, value: Any) -> Self:
762
884
  """Return a copy, with data set."""
763
885
  if self.is_set:
764
886
  raise RuntimeError(f"Parameter ID {self.id_!r} is already set!")
@@ -771,7 +893,7 @@ class StoreParameter:
771
893
  source=self.source,
772
894
  )
773
895
 
774
- def set_file(self, value: Any) -> None:
896
+ def set_file(self, value: File) -> Self:
775
897
  """Return a copy, with file set."""
776
898
  if self.is_set:
777
899
  raise RuntimeError(f"Parameter ID {self.id_!r} is already set!")
@@ -784,20 +906,21 @@ class StoreParameter:
784
906
  source=self.source,
785
907
  )
786
908
 
787
- def update_source(self, src: Dict) -> None:
909
+ def update_source(self, src: ParamSource) -> Self:
788
910
  """Return a copy, with updated source."""
789
- new_src = update_param_source_dict(self.source, src)
790
911
  return self.__class__(
791
912
  id_=self.id_,
792
913
  is_set=self.is_set,
793
914
  is_pending=self.is_pending,
794
915
  data=self.data,
795
916
  file=self.file,
796
- source=new_src,
917
+ source=update_param_source_dict(self.source, src),
797
918
  )
798
919
 
799
920
 
800
- class PersistentStore(ABC):
921
+ class PersistentStore(
922
+ ABC, Generic[AnySTask, AnySElement, AnySElementIter, AnySEAR, AnySParameter]
923
+ ):
801
924
  """
802
925
  An abstract class representing a persistent workflow store.
803
926
 
@@ -813,35 +936,194 @@ class PersistentStore(ABC):
813
936
  Optionally, information about how to access the store.
814
937
  """
815
938
 
816
- _store_task_cls = StoreTask
817
- _store_elem_cls = StoreElement
818
- _store_iter_cls = StoreElementIter
819
- _store_EAR_cls = StoreEAR
820
- _store_param_cls = StoreParameter
939
+ # These would be in the docstring except they render really wrongly!
940
+ # Type Parameters
941
+ # ---------------
942
+ # AnySTask: StoreTask
943
+ # The type of stored tasks.
944
+ # AnySElement: StoreElement
945
+ # The type of stored elements.
946
+ # AnySElementIter: StoreElementIter
947
+ # The type of stored element iterations.
948
+ # AnySEAR: StoreEAR
949
+ # The type of stored EARs.
950
+ # AnySParameter: StoreParameter
951
+ # The type of stored parameters.
952
+
953
+ _name: ClassVar[str]
954
+
955
+ @classmethod
956
+ @abstractmethod
957
+ def _store_task_cls(cls) -> type[AnySTask]:
958
+ ...
959
+
960
+ @classmethod
961
+ @abstractmethod
962
+ def _store_elem_cls(cls) -> type[AnySElement]:
963
+ ...
964
+
965
+ @classmethod
966
+ @abstractmethod
967
+ def _store_iter_cls(cls) -> type[AnySElementIter]:
968
+ ...
821
969
 
822
- _resources = {}
970
+ @classmethod
971
+ @abstractmethod
972
+ def _store_EAR_cls(cls) -> type[AnySEAR]:
973
+ ...
823
974
 
824
- def __init__(self, app, workflow, path, fs=None) -> None:
825
- self.app = app
826
- self.workflow = workflow
827
- self.path = path
975
+ @classmethod
976
+ @abstractmethod
977
+ def _store_param_cls(cls) -> type[AnySParameter]:
978
+ ...
979
+
980
+ _resources: dict[str, StoreResource]
981
+ _features: ClassVar[PersistentStoreFeatures]
982
+ _res_map: ClassVar[CommitResourceMap]
983
+
984
+ def __init__(
985
+ self,
986
+ app: BaseApp,
987
+ workflow: Workflow | None,
988
+ path: Path | str,
989
+ fs: AbstractFileSystem | None = None,
990
+ ):
991
+ self._app = app
992
+ self.__workflow = workflow
993
+ self.path = str(path)
828
994
  self.fs = fs
829
995
 
830
- self._pending = PendingChanges(app=app, store=self, resource_map=self._res_map)
996
+ self._pending: PendingChanges[
997
+ AnySTask, AnySElement, AnySElementIter, AnySEAR, AnySParameter
998
+ ] = PendingChanges(app=app, store=self, resource_map=self._res_map)
831
999
 
832
- self._resources_in_use = set()
1000
+ self._resources_in_use: set[tuple[str, str]] = set()
833
1001
  self._in_batch_mode = False
834
1002
 
835
1003
  self._use_cache = False
836
- self._cache = None
837
1004
  self._reset_cache()
838
1005
 
1006
+ self._use_parameters_metadata_cache: bool = False # subclass-specific cache
1007
+
1008
+ @abstractmethod
1009
+ def cached_load(self) -> contextlib.AbstractContextManager[None]:
1010
+ """
1011
+ Perform a load with cache enabled while the ``with``-wrapped code runs.
1012
+ """
1013
+
1014
+ @abstractmethod
1015
+ def get_name(self) -> str:
1016
+ """
1017
+ Get the workflow name.
1018
+ """
1019
+
1020
+ @abstractmethod
1021
+ def get_creation_info(self) -> StoreCreationInfo:
1022
+ """
1023
+ Get the workflow creation data.
1024
+ """
1025
+
1026
+ @abstractmethod
1027
+ def get_ts_fmt(self) -> str:
1028
+ """
1029
+ Get the timestamp format.
1030
+ """
1031
+
1032
+ @abstractmethod
1033
+ def get_ts_name_fmt(self) -> str:
1034
+ """
1035
+ Get the timestamp format for names.
1036
+ """
1037
+
1038
+ @abstractmethod
1039
+ def remove_replaced_dir(self) -> None:
1040
+ """
1041
+ Remove a replaced directory.
1042
+ """
1043
+
1044
+ @abstractmethod
1045
+ def reinstate_replaced_dir(self) -> None:
1046
+ """
1047
+ Reinstate a replaced directory.
1048
+ """
1049
+
1050
+ @abstractmethod
1051
+ def zip(
1052
+ self,
1053
+ path: str = ".",
1054
+ log: str | None = None,
1055
+ overwrite=False,
1056
+ include_execute=False,
1057
+ include_rechunk_backups=False,
1058
+ ) -> str:
1059
+ """
1060
+ Convert this store into archival form.
1061
+ """
1062
+
1063
+ @abstractmethod
1064
+ def unzip(self, path: str = ".", log: str | None = None) -> str:
1065
+ """
1066
+ Convert this store into expanded form.
1067
+ """
1068
+
1069
+ @abstractmethod
1070
+ def rechunk_parameter_base(
1071
+ self,
1072
+ chunk_size: int | None = None,
1073
+ backup: bool = True,
1074
+ status: bool = True,
1075
+ ) -> Any:
1076
+ ...
1077
+
1078
+ @abstractmethod
1079
+ def rechunk_runs(
1080
+ self,
1081
+ chunk_size: int | None = None,
1082
+ backup: bool = True,
1083
+ status: bool = True,
1084
+ ) -> Any:
1085
+ ...
1086
+
1087
+ @abstractmethod
1088
+ def get_dirs_array(self) -> NDArray:
1089
+ """
1090
+ Retrieve the run directories array.
1091
+ """
1092
+
1093
+ @classmethod
1094
+ @abstractmethod
1095
+ def write_empty_workflow(
1096
+ cls,
1097
+ app: BaseApp,
1098
+ *,
1099
+ template_js: TemplateMeta,
1100
+ template_components_js: dict[str, Any],
1101
+ wk_path: str,
1102
+ fs: AbstractFileSystem,
1103
+ name: str,
1104
+ replaced_wk: str | None,
1105
+ creation_info: StoreCreationInfo,
1106
+ ts_fmt: str,
1107
+ ts_name_fmt: str,
1108
+ ) -> None:
1109
+ """
1110
+ Write an empty workflow.
1111
+ """
1112
+
1113
+ @property
1114
+ def workflow(self) -> Workflow:
1115
+ """
1116
+ The workflow this relates to.
1117
+ """
1118
+ assert self.__workflow is not None
1119
+ return self.__workflow
1120
+
839
1121
  @property
840
- def logger(self):
1122
+ def logger(self) -> Logger:
841
1123
  """
842
1124
  The logger to use.
843
1125
  """
844
- return self.app.persistence_logger
1126
+ return self._app.persistence_logger
845
1127
 
846
1128
  @property
847
1129
  def ts_fmt(self) -> str:
@@ -851,74 +1133,84 @@ class PersistentStore(ABC):
851
1133
  return self.workflow.ts_fmt
852
1134
 
853
1135
  @property
854
- def has_pending(self):
1136
+ def has_pending(self) -> bool:
855
1137
  """
856
1138
  Whether there are any pending changes.
857
1139
  """
858
1140
  return bool(self._pending)
859
1141
 
860
1142
  @property
861
- def is_submittable(self):
1143
+ def is_submittable(self) -> bool:
862
1144
  """Does this store support workflow submission?"""
863
1145
  return self.fs.__class__.__name__ == "LocalFileSystem"
864
1146
 
865
1147
  @property
866
- def use_cache(self):
1148
+ def use_cache(self) -> bool:
867
1149
  """
868
1150
  Whether to use a cache.
869
1151
  """
870
1152
  return self._use_cache
871
1153
 
872
1154
  @property
873
- def task_cache(self):
1155
+ def task_cache(self) -> dict[int, AnySTask]:
874
1156
  """Cache for persistent tasks."""
875
1157
  return self._cache["tasks"]
876
1158
 
877
1159
  @property
878
- def element_cache(self):
1160
+ def element_cache(self) -> dict[int, AnySElement]:
879
1161
  """Cache for persistent elements."""
880
1162
  return self._cache["elements"]
881
1163
 
882
1164
  @property
883
- def element_iter_cache(self):
1165
+ def element_iter_cache(self) -> dict[int, AnySElementIter]:
884
1166
  """Cache for persistent element iterations."""
885
1167
  return self._cache["element_iters"]
886
1168
 
887
1169
  @property
888
- def EAR_cache(self):
1170
+ def EAR_cache(self) -> dict[int, AnySEAR]:
889
1171
  """Cache for persistent EARs."""
890
1172
  return self._cache["EARs"]
891
1173
 
892
1174
  @property
893
- def num_tasks_cache(self):
1175
+ def num_tasks_cache(self) -> int | None:
894
1176
  """Cache for number of persistent tasks."""
895
1177
  return self._cache["num_tasks"]
896
1178
 
1179
+ @num_tasks_cache.setter
1180
+ def num_tasks_cache(self, value: int | None):
1181
+ self._cache["num_tasks"] = value
1182
+
897
1183
  @property
898
- def num_EARs_cache(self):
1184
+ def num_EARs_cache(self) -> int | None:
899
1185
  """Cache for total number of persistent EARs."""
900
1186
  return self._cache["num_EARs"]
901
1187
 
1188
+ @num_EARs_cache.setter
1189
+ def num_EARs_cache(self, value: int | None):
1190
+ self._cache["num_EARs"] = value
1191
+
902
1192
  @property
903
- def param_sources_cache(self):
1193
+ def num_params_cache(self) -> int | None:
1194
+ return self._cache["num_params"]
1195
+
1196
+ @num_params_cache.setter
1197
+ def num_params_cache(self, value: int | None):
1198
+ self._cache["num_params"] = value
1199
+
1200
+ @property
1201
+ def param_sources_cache(self) -> dict[int, ParamSource]:
904
1202
  """Cache for persistent parameter sources."""
905
1203
  return self._cache["param_sources"]
906
1204
 
907
1205
  @property
908
- def parameter_cache(self):
1206
+ def parameter_cache(self) -> dict[int, AnySParameter]:
909
1207
  """Cache for persistent parameters."""
910
1208
  return self._cache["parameters"]
911
1209
 
912
- @num_tasks_cache.setter
913
- def num_tasks_cache(self, value):
914
- self._cache["num_tasks"] = value
915
-
916
- @num_EARs_cache.setter
917
- def num_EARs_cache(self, value):
918
- self._cache["num_EARs"] = value
919
-
920
- def _reset_cache(self):
921
- self._cache = {
1210
+ def _reset_cache(self) -> None:
1211
+ self._cache: PersistenceCache[
1212
+ AnySTask, AnySElement, AnySElementIter, AnySEAR, AnySParameter
1213
+ ] = {
922
1214
  "tasks": {},
923
1215
  "elements": {},
924
1216
  "element_iters": {},
@@ -927,10 +1219,11 @@ class PersistentStore(ABC):
927
1219
  "num_tasks": None,
928
1220
  "parameters": {},
929
1221
  "num_EARs": None,
1222
+ "num_params": None,
930
1223
  }
931
1224
 
932
1225
  @contextlib.contextmanager
933
- def cache_ctx(self):
1226
+ def cache_ctx(self) -> Iterator[None]:
934
1227
  """Context manager for using the persistent element/iteration/run cache."""
935
1228
  self._use_cache = True
936
1229
  try:
@@ -939,16 +1232,34 @@ class PersistentStore(ABC):
939
1232
  self._use_cache = False
940
1233
  self._reset_cache()
941
1234
 
1235
+ @contextlib.contextmanager
1236
+ def parameters_metadata_cache(self):
1237
+ """Context manager for using the parameters-metadata cache.
1238
+
1239
+ Notes
1240
+ -----
1241
+ This method can be overridden by a subclass to provide an implementation-specific
1242
+ cache of metadata associated with parameters, or even parameter data itself.
1243
+
1244
+ Using this cache precludes writing/setting parameter data.
1245
+
1246
+ """
1247
+ yield
1248
+
942
1249
  @staticmethod
943
- def prepare_test_store_from_spec(task_spec):
1250
+ def prepare_test_store_from_spec(
1251
+ task_spec: Sequence[
1252
+ Mapping[str, Sequence[Mapping[str, Sequence[Mapping[str, Sequence]]]]]
1253
+ ],
1254
+ ) -> tuple[list[dict], list[dict], list[dict], list[dict]]:
944
1255
  """Generate a valid store from a specification in terms of nested
945
1256
  elements/iterations/EARs.
946
1257
 
947
1258
  """
948
- tasks = []
949
- elements = []
950
- elem_iters = []
951
- EARs = []
1259
+ tasks: list[dict] = []
1260
+ elements: list[dict] = []
1261
+ elem_iters: list[dict] = []
1262
+ EARs: list[dict] = []
952
1263
 
953
1264
  for task_idx, task_i in enumerate(task_spec):
954
1265
  elems_i = task_i.get("elements", [])
@@ -965,47 +1276,47 @@ class PersistentStore(ABC):
965
1276
 
966
1277
  for _ in EARs_k:
967
1278
  EARs.append(
968
- dict(
969
- id_=len(EARs),
970
- is_pending=False,
971
- elem_iter_ID=len(elem_iters),
972
- action_idx=0,
973
- data_idx={},
974
- metadata={},
975
- )
1279
+ {
1280
+ "id_": len(EARs),
1281
+ "is_pending": False,
1282
+ "elem_iter_ID": len(elem_iters),
1283
+ "action_idx": 0,
1284
+ "data_idx": {},
1285
+ "metadata": {},
1286
+ }
976
1287
  )
977
1288
 
978
1289
  elem_iters.append(
979
- dict(
980
- id_=len(elem_iters),
981
- is_pending=False,
982
- element_ID=len(elements),
983
- EAR_IDs=EAR_IDs_dct,
984
- data_idx={},
985
- schema_parameters=[],
986
- )
1290
+ {
1291
+ "id_": len(elem_iters),
1292
+ "is_pending": False,
1293
+ "element_ID": len(elements),
1294
+ "EAR_IDs": EAR_IDs_dct,
1295
+ "data_idx": {},
1296
+ "schema_parameters": [],
1297
+ }
987
1298
  )
988
1299
  elements.append(
989
- dict(
990
- id_=len(elements),
991
- is_pending=False,
992
- element_idx=elem_idx,
993
- seq_idx={},
994
- src_idx={},
995
- task_ID=task_idx,
996
- iteration_IDs=iter_IDs,
997
- )
1300
+ {
1301
+ "id_": len(elements),
1302
+ "is_pending": False,
1303
+ "element_idx": elem_idx,
1304
+ "seq_idx": {},
1305
+ "src_idx": {},
1306
+ "task_ID": task_idx,
1307
+ "iteration_IDs": iter_IDs,
1308
+ }
998
1309
  )
999
1310
  tasks.append(
1000
- dict(
1001
- id_=len(tasks),
1002
- is_pending=False,
1003
- element_IDs=elem_IDs,
1004
- )
1311
+ {
1312
+ "id_": len(tasks),
1313
+ "is_pending": False,
1314
+ "element_IDs": elem_IDs,
1315
+ }
1005
1316
  )
1006
1317
  return (tasks, elements, elem_iters, EARs)
1007
1318
 
1008
- def remove_path(self, path: str, fs) -> None:
1319
+ def remove_path(self, path: str | Path) -> None:
1009
1320
  """Try very hard to delete a directory or file.
1010
1321
 
1011
1322
  Dropbox (on Windows, at least) seems to try to re-sync files if the parent directory
@@ -1015,83 +1326,126 @@ class PersistentStore(ABC):
1015
1326
 
1016
1327
  """
1017
1328
 
1018
- @self.app.perm_error_retry()
1019
- def _remove_path(path: str, fs) -> None:
1020
- self.logger.debug(f"_remove_path: path={path}")
1021
- while fs.exists(path):
1022
- fs.rm(path, recursive=True)
1329
+ fs = self.fs
1330
+ assert fs is not None
1331
+
1332
+ @self._app.perm_error_retry()
1333
+ def _remove_path(_path: str) -> None:
1334
+ self.logger.debug(f"_remove_path: path={_path}")
1335
+ while fs.exists(_path):
1336
+ fs.rm(_path, recursive=True)
1023
1337
  time.sleep(0.5)
1024
1338
 
1025
- return _remove_path(path, fs)
1339
+ return _remove_path(str(path))
1026
1340
 
1027
- def rename_path(self, replaced: str, original: str, fs) -> None:
1341
+ def rename_path(self, replaced: str, original: str | Path) -> None:
1028
1342
  """Revert the replaced workflow path to its original name.
1029
1343
 
1030
1344
  This happens when new workflow creation fails and there is an existing workflow
1031
1345
  with the same name; the original workflow which was renamed, must be reverted."""
1032
1346
 
1033
- @self.app.perm_error_retry()
1034
- def _rename_path(replaced: str, original: str, fs) -> None:
1035
- self.logger.debug(f"_rename_path: {replaced!r} --> {original!r}.")
1347
+ fs = self.fs
1348
+ assert fs is not None
1349
+
1350
+ @self._app.perm_error_retry()
1351
+ def _rename_path(_replaced: str, _original: str) -> None:
1352
+ self.logger.debug(f"_rename_path: {_replaced!r} --> {_original!r}.")
1036
1353
  try:
1037
- fs.rename(replaced, original, recursive=True) # TODO: why need recursive?
1354
+ fs.rename(
1355
+ _replaced, _original, recursive=True
1356
+ ) # TODO: why need recursive?
1038
1357
  except TypeError:
1039
1358
  # `SFTPFileSystem.rename` has no `recursive` argument:
1040
- fs.rename(replaced, original)
1359
+ fs.rename(_replaced, _original)
1360
+
1361
+ return _rename_path(str(replaced), str(original))
1041
1362
 
1042
- return _rename_path(replaced, original, fs)
1363
+ @abstractmethod
1364
+ def _get_num_persistent_tasks(self) -> int:
1365
+ ...
1043
1366
 
1044
- def _get_num_total_tasks(self):
1367
+ def _get_num_total_tasks(self) -> int:
1045
1368
  """Get the total number of persistent and pending tasks."""
1046
1369
  return self._get_num_persistent_tasks() + len(self._pending.add_tasks)
1047
1370
 
1048
- def _get_num_total_loops(self):
1371
+ @abstractmethod
1372
+ def _get_num_persistent_loops(self) -> int:
1373
+ ...
1374
+
1375
+ def _get_num_total_loops(self) -> int:
1049
1376
  """Get the total number of persistent and pending loops."""
1050
1377
  return self._get_num_persistent_loops() + len(self._pending.add_loops)
1051
1378
 
1052
- def _get_num_total_submissions(self):
1379
+ @abstractmethod
1380
+ def _get_num_persistent_submissions(self) -> int:
1381
+ ...
1382
+
1383
+ def _get_num_total_submissions(self) -> int:
1053
1384
  """Get the total number of persistent and pending submissions."""
1054
1385
  return self._get_num_persistent_submissions() + len(self._pending.add_submissions)
1055
1386
 
1056
- def _get_num_total_elements(self):
1387
+ @abstractmethod
1388
+ def _get_num_persistent_elements(self) -> int:
1389
+ ...
1390
+
1391
+ def _get_num_total_elements(self) -> int:
1057
1392
  """Get the total number of persistent and pending elements."""
1058
1393
  return self._get_num_persistent_elements() + len(self._pending.add_elements)
1059
1394
 
1060
- def _get_num_total_elem_iters(self):
1395
+ @abstractmethod
1396
+ def _get_num_persistent_elem_iters(self) -> int:
1397
+ ...
1398
+
1399
+ def _get_num_total_elem_iters(self) -> int:
1061
1400
  """Get the total number of persistent and pending element iterations."""
1062
1401
  return self._get_num_persistent_elem_iters() + len(self._pending.add_elem_iters)
1063
1402
 
1403
+ @abstractmethod
1404
+ def _get_num_persistent_EARs(self) -> int:
1405
+ ...
1406
+
1064
1407
  @TimeIt.decorator
1065
- def _get_num_total_EARs(self):
1408
+ def _get_num_total_EARs(self) -> int:
1066
1409
  """Get the total number of persistent and pending EARs."""
1067
1410
  return self._get_num_persistent_EARs() + len(self._pending.add_EARs)
1068
1411
 
1069
- def _get_task_total_num_elements(self, task_ID: int):
1412
+ def _get_task_total_num_elements(self, task_ID: int) -> int:
1070
1413
  """Get the total number of persistent and pending elements of a given task."""
1071
1414
  return len(self.get_task(task_ID).element_IDs)
1072
1415
 
1073
- def _get_num_total_parameters(self):
1416
+ @abstractmethod
1417
+ def _get_num_persistent_parameters(self) -> int:
1418
+ ...
1419
+
1420
+ def _get_num_total_parameters(self) -> int:
1074
1421
  """Get the total number of persistent and pending parameters."""
1075
1422
  return self._get_num_persistent_parameters() + len(self._pending.add_parameters)
1076
1423
 
1077
- def _get_num_total_input_files(self):
1424
+ def _get_num_total_input_files(self) -> int:
1078
1425
  """Get the total number of persistent and pending user-supplied input files."""
1079
- num_pend_inp_files = len([i for i in self._pending.add_files if i["is_input"]])
1080
- return self._get_num_persistent_input_files() + num_pend_inp_files
1426
+ return self._get_num_persistent_input_files() + sum(
1427
+ fd["is_input"] for fd in self._pending.add_files
1428
+ )
1429
+
1430
+ @abstractmethod
1431
+ def _get_num_persistent_added_tasks(self) -> int:
1432
+ ...
1081
1433
 
1082
- def _get_num_total_added_tasks(self):
1434
+ def _get_num_total_added_tasks(self) -> int:
1083
1435
  """Get the total number of tasks ever added to the workflow."""
1084
1436
  return self._get_num_persistent_added_tasks() + len(self._pending.add_tasks)
1085
1437
 
1086
- def _get_num_persistent_input_files(self):
1087
- return len(list(self.workflow.input_files_path.glob("*")))
1438
+ def _get_num_persistent_input_files(self) -> int:
1439
+ return sum(1 for _ in self.workflow.input_files_path.glob("*"))
1088
1440
 
1089
- def save(self):
1441
+ def save(self) -> None:
1090
1442
  """Commit pending changes to disk, if not in batch-update mode."""
1091
1443
  if not self.workflow._in_batch_mode:
1092
1444
  self._pending.commit_all()
1093
1445
 
1094
- def add_template_components(self, temp_comps: Dict, save: bool = True) -> None:
1446
+ def add_template_components(
1447
+ self, temp_comps: Mapping[str, dict], save: bool = True
1448
+ ) -> None:
1095
1449
  """
1096
1450
  Add template components to the workflow.
1097
1451
  """
@@ -1107,11 +1461,11 @@ class PersistentStore(ABC):
1107
1461
  if save:
1108
1462
  self.save()
1109
1463
 
1110
- def add_task(self, idx: int, task_template: Dict, save: bool = True):
1464
+ def add_task(self, idx: int, task_template: Mapping, save: bool = True):
1111
1465
  """Add a new task to the workflow."""
1112
- self.logger.debug(f"Adding store task.")
1466
+ self.logger.debug("Adding store task.")
1113
1467
  new_ID = self._get_num_total_added_tasks()
1114
- self._pending.add_tasks[new_ID] = self._store_task_cls(
1468
+ self._pending.add_tasks[new_ID] = self._store_task_cls()(
1115
1469
  id_=new_ID,
1116
1470
  index=idx,
1117
1471
  task_template=task_template,
@@ -1124,39 +1478,45 @@ class PersistentStore(ABC):
1124
1478
 
1125
1479
  def add_loop(
1126
1480
  self,
1127
- loop_template: Dict,
1128
- iterable_parameters,
1129
- parents: List[str],
1130
- num_added_iterations: Dict[Tuple[int], int],
1131
- iter_IDs: List[int],
1481
+ loop_template: Mapping[str, Any],
1482
+ iterable_parameters: Mapping[str, IterableParam],
1483
+ output_parameters: Mapping[str, int],
1484
+ parents: Sequence[str],
1485
+ num_added_iterations: Mapping[tuple[int, ...], int],
1486
+ iter_IDs: Iterable[int],
1132
1487
  save: bool = True,
1133
1488
  ):
1134
1489
  """Add a new loop to the workflow."""
1135
- self.logger.debug(f"Adding store loop.")
1490
+ self.logger.debug("Adding store loop.")
1136
1491
  new_idx = self._get_num_total_loops()
1137
- added_iters = [[list(k), v] for k, v in num_added_iterations.items()]
1492
+ added_iters: list[list[list[int] | int]] = [
1493
+ [list(k), v] for k, v in num_added_iterations.items()
1494
+ ]
1138
1495
  self._pending.add_loops[new_idx] = {
1139
- "loop_template": loop_template,
1140
- "iterable_parameters": iterable_parameters,
1141
- "parents": parents,
1496
+ "loop_template": dict(loop_template),
1497
+ "iterable_parameters": cast("dict", iterable_parameters),
1498
+ "output_parameters": cast("dict", output_parameters),
1499
+ "parents": list(parents),
1142
1500
  "num_added_iterations": added_iters,
1143
1501
  }
1144
1502
 
1145
1503
  for i in iter_IDs:
1146
- self._pending.update_loop_indices[i].update({loop_template["name"]: 0})
1504
+ self._pending.update_loop_indices[i][loop_template["name"]] = 0
1147
1505
 
1148
1506
  if save:
1149
1507
  self.save()
1150
1508
 
1151
1509
  @TimeIt.decorator
1152
- def add_submission(self, sub_idx: int, sub_js: Dict, save: bool = True):
1510
+ def add_submission(
1511
+ self, sub_idx: int, sub_js: Mapping[str, JSONed], save: bool = True
1512
+ ):
1153
1513
  """Add a new submission."""
1154
- self.logger.debug(f"Adding store submission.")
1514
+ self.logger.debug("Adding store submission.")
1155
1515
  self._pending.add_submissions[sub_idx] = sub_js
1156
1516
  if save:
1157
1517
  self.save()
1158
1518
 
1159
- def add_element_set(self, task_id: int, es_js: Dict, save: bool = True):
1519
+ def add_element_set(self, task_id: int, es_js: Mapping, save: bool = True):
1160
1520
  """
1161
1521
  Add an element set to a task.
1162
1522
  """
@@ -1165,13 +1525,18 @@ class PersistentStore(ABC):
1165
1525
  self.save()
1166
1526
 
1167
1527
  def add_element(
1168
- self, task_ID: int, es_idx: int, seq_idx: Dict, src_idx: Dict, save: bool = True
1169
- ):
1528
+ self,
1529
+ task_ID: int,
1530
+ es_idx: int,
1531
+ seq_idx: dict[str, int],
1532
+ src_idx: dict[str, int],
1533
+ save: bool = True,
1534
+ ) -> int:
1170
1535
  """Add a new element to a task."""
1171
- self.logger.debug(f"Adding store element.")
1536
+ self.logger.debug("Adding store element.")
1172
1537
  new_ID = self._get_num_total_elements()
1173
1538
  new_elem_idx = self._get_task_total_num_elements(task_ID)
1174
- self._pending.add_elements[new_ID] = self._store_elem_cls(
1539
+ self._pending.add_elements[new_ID] = self._store_elem_cls()(
1175
1540
  id_=new_ID,
1176
1541
  is_pending=True,
1177
1542
  index=new_elem_idx,
@@ -1189,15 +1554,15 @@ class PersistentStore(ABC):
1189
1554
  def add_element_iteration(
1190
1555
  self,
1191
1556
  element_ID: int,
1192
- data_idx: Dict,
1193
- schema_parameters: List[str],
1194
- loop_idx: Optional[Dict] = None,
1557
+ data_idx: DataIndex,
1558
+ schema_parameters: list[str],
1559
+ loop_idx: Mapping[str, int] | None = None,
1195
1560
  save: bool = True,
1196
1561
  ) -> int:
1197
1562
  """Add a new iteration to an element."""
1198
- self.logger.debug(f"Adding store element-iteration.")
1563
+ self.logger.debug("Adding store element-iteration.")
1199
1564
  new_ID = self._get_num_total_elem_iters()
1200
- self._pending.add_elem_iters[new_ID] = self._store_iter_cls(
1565
+ self._pending.add_elem_iters[new_ID] = self._store_iter_cls()(
1201
1566
  id_=new_ID,
1202
1567
  element_ID=element_ID,
1203
1568
  is_pending=True,
@@ -1217,80 +1582,143 @@ class PersistentStore(ABC):
1217
1582
  self,
1218
1583
  elem_iter_ID: int,
1219
1584
  action_idx: int,
1220
- commands_idx: List[int],
1221
- data_idx: Dict,
1222
- metadata: Dict,
1585
+ commands_idx: list[int],
1586
+ data_idx: DataIndex,
1587
+ metadata: Metadata | None = None,
1223
1588
  save: bool = True,
1224
1589
  ) -> int:
1225
1590
  """Add a new EAR to an element iteration."""
1226
- self.logger.debug(f"Adding store EAR.")
1591
+ self.logger.debug("Adding store EAR.")
1227
1592
  new_ID = self._get_num_total_EARs()
1228
- self._pending.add_EARs[new_ID] = self._store_EAR_cls(
1593
+ self._pending.add_EARs[new_ID] = self._store_EAR_cls()(
1229
1594
  id_=new_ID,
1230
1595
  is_pending=True,
1231
1596
  elem_iter_ID=elem_iter_ID,
1232
1597
  action_idx=action_idx,
1233
1598
  commands_idx=commands_idx,
1234
1599
  data_idx=data_idx,
1235
- metadata=metadata,
1600
+ metadata=metadata or {},
1236
1601
  )
1237
1602
  self._pending.add_elem_iter_EAR_IDs[elem_iter_ID][action_idx].append(new_ID)
1238
1603
  if save:
1239
1604
  self.save()
1240
1605
  return new_ID
1241
1606
 
1242
- def add_submission_part(
1243
- self, sub_idx: int, dt_str: str, submitted_js_idx: List[int], save: bool = True
1607
+ @TimeIt.decorator
1608
+ def set_run_dirs(
1609
+ self, run_dir_indices: np.ndarray, run_idx: np.ndarray, save: bool = True
1610
+ ):
1611
+ self.logger.debug(f"Setting {run_idx.size} run directory indices.")
1612
+ self._pending.set_run_dirs.append((run_dir_indices, run_idx))
1613
+ if save:
1614
+ self.save()
1615
+
1616
+ def update_at_submit_metadata(
1617
+ self, sub_idx: int, submission_parts: dict[str, list[int]], save: bool = True
1244
1618
  ):
1245
1619
  """
1246
- Add a submission part.
1620
+ Update metadata that is set at submit-time.
1247
1621
  """
1248
- self._pending.add_submission_parts[sub_idx][dt_str] = submitted_js_idx
1622
+ if submission_parts:
1623
+ self._pending.update_at_submit_metadata[sub_idx][
1624
+ "submission_parts"
1625
+ ] = submission_parts
1249
1626
  if save:
1250
1627
  self.save()
1251
1628
 
1252
1629
  @TimeIt.decorator
1253
- def set_EAR_submission_index(
1254
- self, EAR_ID: int, sub_idx: int, save: bool = True
1630
+ def set_run_submission_data(
1631
+ self, EAR_ID: int, cmds_ID: int | None, sub_idx: int, save: bool = True
1255
1632
  ) -> None:
1256
1633
  """
1257
- Set the submission index for an element action run.
1634
+ Set the run submission data, like the submission index for an element action run.
1258
1635
  """
1259
- self._pending.set_EAR_submission_indices[EAR_ID] = sub_idx
1636
+ self._pending.set_EAR_submission_data[EAR_ID] = (sub_idx, cmds_ID)
1260
1637
  if save:
1261
1638
  self.save()
1262
1639
 
1263
- def set_EAR_start(self, EAR_ID: int, save: bool = True) -> datetime:
1640
+ def set_EAR_start(
1641
+ self,
1642
+ EAR_ID: int,
1643
+ run_dir: Path | None,
1644
+ port_number: int | None,
1645
+ save: bool = True,
1646
+ ) -> datetime:
1264
1647
  """
1265
1648
  Mark an element action run as started.
1266
1649
  """
1267
- dt = datetime.utcnow()
1268
- ss_js = self.app.RunDirAppFiles.take_snapshot()
1650
+ dt = current_timestamp()
1651
+ ss_js = self._app.RunDirAppFiles.take_snapshot() if run_dir else None
1652
+ run_hostname = socket.gethostname()
1653
+ self._pending.set_EAR_starts[EAR_ID] = (dt, ss_js, run_hostname, port_number)
1654
+ if save:
1655
+ self.save()
1656
+ return dt
1657
+
1658
+ def set_multi_run_starts(
1659
+ self,
1660
+ run_ids: list[int],
1661
+ run_dirs: list[Path | None],
1662
+ port_number: int,
1663
+ save: bool = True,
1664
+ ) -> datetime:
1665
+ dt = current_timestamp()
1269
1666
  run_hostname = socket.gethostname()
1270
- self._pending.set_EAR_starts[EAR_ID] = (dt, ss_js, run_hostname)
1667
+ run_start_data: dict[int, tuple] = {}
1668
+ for id_i, dir_i in zip(run_ids, run_dirs):
1669
+ ss_js_i = self._app.RunDirAppFiles.take_snapshot(dir_i) if dir_i else None
1670
+ run_start_data[id_i] = (dt, ss_js_i, run_hostname, port_number)
1671
+
1672
+ self._pending.set_EAR_starts.update(run_start_data)
1271
1673
  if save:
1272
1674
  self.save()
1273
1675
  return dt
1274
1676
 
1275
1677
  def set_EAR_end(
1276
- self, EAR_ID: int, exit_code: int, success: bool, save: bool = True
1678
+ self,
1679
+ EAR_ID: int,
1680
+ exit_code: int,
1681
+ success: bool,
1682
+ snapshot: bool,
1683
+ save: bool = True,
1277
1684
  ) -> datetime:
1278
1685
  """
1279
1686
  Mark an element action run as finished.
1280
1687
  """
1281
1688
  # TODO: save output files
1282
- dt = datetime.utcnow()
1283
- ss_js = self.app.RunDirAppFiles.take_snapshot()
1689
+ dt = current_timestamp()
1690
+ ss_js = self._app.RunDirAppFiles.take_snapshot() if snapshot else None
1284
1691
  self._pending.set_EAR_ends[EAR_ID] = (dt, ss_js, exit_code, success)
1285
1692
  if save:
1286
1693
  self.save()
1287
1694
  return dt
1288
1695
 
1289
- def set_EAR_skip(self, EAR_ID: int, save: bool = True) -> None:
1696
+ def set_multi_run_ends(
1697
+ self,
1698
+ run_ids: list[int],
1699
+ run_dirs: list[Path | None],
1700
+ exit_codes: list[int],
1701
+ successes: list[bool],
1702
+ save: bool = True,
1703
+ ) -> datetime:
1704
+ self.logger.info("PersistentStore.set_multi_run_ends.")
1705
+ dt = current_timestamp()
1706
+ run_end_data: dict[int, tuple] = {}
1707
+ for id_i, dir_i, ex_i, sc_i in zip(run_ids, run_dirs, exit_codes, successes):
1708
+ ss_js_i = self._app.RunDirAppFiles.take_snapshot(dir_i) if dir_i else None
1709
+ run_end_data[id_i] = (dt, ss_js_i, ex_i, sc_i)
1710
+
1711
+ self._pending.set_EAR_ends.update(run_end_data)
1712
+ if save:
1713
+ self.save()
1714
+ self.logger.info("PersistentStore.set_multi_run_ends finished.")
1715
+ return dt
1716
+
1717
+ def set_EAR_skip(self, skip_reasons: dict[int, int], save: bool = True) -> None:
1290
1718
  """
1291
- Mark an element action run as skipped.
1719
+ Mark element action runs as skipped for the specified reasons.
1292
1720
  """
1293
- self._pending.set_EAR_skips.append(EAR_ID)
1721
+ self._pending.set_EAR_skips.update(skip_reasons)
1294
1722
  if save:
1295
1723
  self.save()
1296
1724
 
@@ -1306,65 +1734,62 @@ class PersistentStore(ABC):
1306
1734
  self,
1307
1735
  sub_idx: int,
1308
1736
  js_idx: int,
1309
- version_info: Optional[Dict] = None,
1310
- submit_time: Optional[str] = None,
1311
- submit_hostname: Optional[str] = None,
1312
- submit_machine: Optional[str] = None,
1313
- submit_cmdline: Optional[List[str]] = None,
1314
- os_name: Optional[str] = None,
1315
- shell_name: Optional[str] = None,
1316
- scheduler_name: Optional[str] = None,
1317
- scheduler_job_ID: Optional[str] = None,
1318
- process_ID: Optional[int] = None,
1737
+ version_info: VersionInfo | None = None,
1738
+ submit_time: str | None = None,
1739
+ submit_hostname: str | None = None,
1740
+ submit_machine: str | None = None,
1741
+ shell_idx: int | None = None,
1742
+ submit_cmdline: list[str] | None = None,
1743
+ os_name: str | None = None,
1744
+ shell_name: str | None = None,
1745
+ scheduler_name: str | None = None,
1746
+ scheduler_job_ID: str | None = None,
1747
+ process_ID: int | None = None,
1319
1748
  save: bool = True,
1320
1749
  ):
1321
1750
  """
1322
1751
  Set the metadata for a job script.
1323
1752
  """
1753
+ entry = self._pending.set_js_metadata[sub_idx][js_idx]
1324
1754
  if version_info:
1325
- self._pending.set_js_metadata[sub_idx][js_idx]["version_info"] = version_info
1755
+ entry["version_info"] = version_info
1326
1756
  if submit_time:
1327
- self._pending.set_js_metadata[sub_idx][js_idx]["submit_time"] = submit_time
1757
+ entry["submit_time"] = submit_time
1328
1758
  if submit_hostname:
1329
- self._pending.set_js_metadata[sub_idx][js_idx][
1330
- "submit_hostname"
1331
- ] = submit_hostname
1759
+ entry["submit_hostname"] = submit_hostname
1332
1760
  if submit_machine:
1333
- self._pending.set_js_metadata[sub_idx][js_idx][
1334
- "submit_machine"
1335
- ] = submit_machine
1761
+ entry["submit_machine"] = submit_machine
1762
+ if shell_idx is not None:
1763
+ entry["shell_idx"] = shell_idx
1336
1764
  if submit_cmdline:
1337
- self._pending.set_js_metadata[sub_idx][js_idx][
1338
- "submit_cmdline"
1339
- ] = submit_cmdline
1765
+ entry["submit_cmdline"] = submit_cmdline
1340
1766
  if os_name:
1341
- self._pending.set_js_metadata[sub_idx][js_idx]["os_name"] = os_name
1767
+ entry["os_name"] = os_name
1342
1768
  if shell_name:
1343
- self._pending.set_js_metadata[sub_idx][js_idx]["shell_name"] = shell_name
1769
+ entry["shell_name"] = shell_name
1344
1770
  if scheduler_name:
1345
- self._pending.set_js_metadata[sub_idx][js_idx][
1346
- "scheduler_name"
1347
- ] = scheduler_name
1348
- if scheduler_job_ID:
1349
- self._pending.set_js_metadata[sub_idx][js_idx][
1350
- "scheduler_job_ID"
1351
- ] = scheduler_job_ID
1352
- if process_ID:
1353
- self._pending.set_js_metadata[sub_idx][js_idx]["process_ID"] = process_ID
1771
+ entry["scheduler_name"] = scheduler_name
1772
+ if scheduler_job_ID or process_ID:
1773
+ entry["scheduler_job_ID"] = scheduler_job_ID
1774
+ if process_ID or scheduler_job_ID:
1775
+ entry["process_ID"] = process_ID
1354
1776
  if save:
1355
1777
  self.save()
1356
1778
 
1779
+ @writes_parameter_data
1357
1780
  def _add_parameter(
1358
1781
  self,
1359
1782
  is_set: bool,
1360
- source: Dict,
1361
- data: Any = None,
1362
- file: Dict = None,
1783
+ source: ParamSource,
1784
+ data: (
1785
+ ParameterValue | list | tuple | set | dict | int | float | str | None | Any
1786
+ ) = None,
1787
+ file: File | None = None,
1363
1788
  save: bool = True,
1364
1789
  ) -> int:
1365
1790
  self.logger.debug(f"Adding store parameter{f' (unset)' if not is_set else ''}.")
1366
1791
  new_idx = self._get_num_total_parameters()
1367
- self._pending.add_parameters[new_idx] = self._store_param_cls(
1792
+ self._pending.add_parameters[new_idx] = self._store_param_cls()(
1368
1793
  id_=new_idx,
1369
1794
  is_pending=True,
1370
1795
  is_set=is_set,
@@ -1380,11 +1805,11 @@ class PersistentStore(ABC):
1380
1805
  self,
1381
1806
  store_contents: bool,
1382
1807
  is_input: bool,
1383
- path=None,
1384
- contents: str = None,
1385
- filename: str = None,
1808
+ path: Path | str,
1809
+ contents: str | None = None,
1810
+ filename: str | None = None,
1386
1811
  clean_up: bool = False,
1387
- ):
1812
+ ) -> File:
1388
1813
  if filename is None:
1389
1814
  filename = Path(path).name
1390
1815
 
@@ -1396,7 +1821,6 @@ class PersistentStore(ABC):
1396
1821
  else:
1397
1822
  # assume path is inside the EAR execution directory; transform that to the
1398
1823
  # equivalent artifacts directory:
1399
- assert path is not None
1400
1824
  exec_sub_path = Path(path).relative_to(self.path)
1401
1825
  dst_path = Path(
1402
1826
  self.workflow.task_artifacts_path, *exec_sub_path.parts[1:]
@@ -1404,9 +1828,9 @@ class PersistentStore(ABC):
1404
1828
  if dst_path.is_file():
1405
1829
  dst_path = dst_path.with_suffix(dst_path.suffix + "_2") # TODO: better!
1406
1830
  else:
1407
- dst_path = path
1831
+ dst_path = Path(path)
1408
1832
 
1409
- file_param_dat = {
1833
+ file_param_dat: File = {
1410
1834
  "store_contents": store_contents,
1411
1835
  "path": str(dst_path.relative_to(self.path)),
1412
1836
  }
@@ -1416,7 +1840,7 @@ class PersistentStore(ABC):
1416
1840
  "is_input": is_input,
1417
1841
  "dst_path": str(dst_path),
1418
1842
  "path": str(path),
1419
- "contents": contents,
1843
+ "contents": contents or "",
1420
1844
  "clean_up": clean_up,
1421
1845
  }
1422
1846
  )
@@ -1427,17 +1851,17 @@ class PersistentStore(ABC):
1427
1851
  self,
1428
1852
  store_contents: bool,
1429
1853
  is_input: bool,
1430
- param_id: int = None,
1431
- path=None,
1432
- contents: str = None,
1433
- filename: str = None,
1854
+ param_id: int | None,
1855
+ path: Path | str,
1856
+ contents: str | None = None,
1857
+ filename: str | None = None,
1434
1858
  clean_up: bool = False,
1435
1859
  save: bool = True,
1436
1860
  ):
1437
1861
  """
1438
1862
  Set details of a file, including whether it is associated with a parameter.
1439
1863
  """
1440
- self.logger.debug(f"Setting new file")
1864
+ self.logger.debug("Setting new file")
1441
1865
  file_param_dat = self._prepare_set_file(
1442
1866
  store_contents=store_contents,
1443
1867
  is_input=is_input,
@@ -1457,16 +1881,16 @@ class PersistentStore(ABC):
1457
1881
  self,
1458
1882
  store_contents: bool,
1459
1883
  is_input: bool,
1460
- source: Dict,
1461
- path=None,
1462
- contents: str = None,
1463
- filename: str = None,
1884
+ source: ParamSource,
1885
+ path: Path | str,
1886
+ contents: str | None = None,
1887
+ filename: str | None = None,
1464
1888
  save: bool = True,
1465
1889
  ):
1466
1890
  """
1467
1891
  Add a file that will be associated with a parameter.
1468
1892
  """
1469
- self.logger.debug(f"Adding new file")
1893
+ self.logger.debug("Adding new file")
1470
1894
  file_param_dat = self._prepare_set_file(
1471
1895
  store_contents=store_contents,
1472
1896
  is_input=is_input,
@@ -1484,7 +1908,7 @@ class PersistentStore(ABC):
1484
1908
  self.save()
1485
1909
  return p_id
1486
1910
 
1487
- def _append_files(self, files: Dict[int, Dict]):
1911
+ def _append_files(self, files: list[FileDescriptor]):
1488
1912
  """Add new files to the files or artifacts directories."""
1489
1913
  for dat in files:
1490
1914
  if dat["store_contents"]:
@@ -1501,18 +1925,30 @@ class PersistentStore(ABC):
1501
1925
  with dst_path.open("wt") as fp:
1502
1926
  fp.write(dat["contents"])
1503
1927
 
1504
- def add_set_parameter(self, data: Any, source: Dict, save: bool = True) -> int:
1928
+ @writes_parameter_data
1929
+ def add_set_parameter(
1930
+ self,
1931
+ data: ParameterValue | list | tuple | set | dict | int | float | str | Any,
1932
+ source: ParamSource,
1933
+ save: bool = True,
1934
+ ) -> int:
1505
1935
  """
1506
1936
  Add a parameter that is set to a value.
1507
1937
  """
1508
1938
  return self._add_parameter(data=data, is_set=True, source=source, save=save)
1509
1939
 
1510
- def add_unset_parameter(self, source: Dict, save: bool = True) -> int:
1940
+ @writes_parameter_data
1941
+ def add_unset_parameter(self, source: ParamSource, save: bool = True) -> int:
1511
1942
  """
1512
1943
  Add a parameter that is not set to any value.
1513
1944
  """
1514
1945
  return self._add_parameter(data=None, is_set=False, source=source, save=save)
1515
1946
 
1947
+ @abstractmethod
1948
+ def _set_parameter_values(self, set_parameters: dict[int, tuple[Any, bool]]):
1949
+ ...
1950
+
1951
+ @writes_parameter_data
1516
1952
  def set_parameter_value(
1517
1953
  self, param_id: int, value: Any, is_file: bool = False, save: bool = True
1518
1954
  ):
@@ -1526,9 +1962,19 @@ class PersistentStore(ABC):
1526
1962
  if save:
1527
1963
  self.save()
1528
1964
 
1965
+ @writes_parameter_data
1966
+ def set_parameter_values(self, values: dict[int, Any], save: bool = True):
1967
+ """Set multiple non-file parameter values by parameter IDs."""
1968
+ param_ids = values.keys()
1969
+ self.logger.debug(f"Setting multiple store parameter IDs {param_ids!r}.")
1970
+ self._pending.set_parameters.update({k: (v, False) for k, v in values.items()})
1971
+ if save:
1972
+ self.save()
1973
+
1529
1974
  @TimeIt.decorator
1975
+ @writes_parameter_data
1530
1976
  def update_param_source(
1531
- self, param_sources: Dict[int, Dict], save: bool = True
1977
+ self, param_sources: Mapping[int, ParamSource], save: bool = True
1532
1978
  ) -> None:
1533
1979
  """
1534
1980
  Set the source of a parameter.
@@ -1539,7 +1985,10 @@ class PersistentStore(ABC):
1539
1985
  self.save()
1540
1986
 
1541
1987
  def update_loop_num_iters(
1542
- self, index: int, num_added_iters: int, save: bool = True
1988
+ self,
1989
+ index: int,
1990
+ num_added_iters: Mapping[tuple[int, ...], int],
1991
+ save: bool = True,
1543
1992
  ) -> None:
1544
1993
  """
1545
1994
  Add iterations to a loop.
@@ -1547,16 +1996,17 @@ class PersistentStore(ABC):
1547
1996
  self.logger.debug(
1548
1997
  f"Updating loop {index!r} num added iterations to {num_added_iters!r}."
1549
1998
  )
1550
- num_added_iters = [[list(k), v] for k, v in num_added_iters.items()]
1551
- self._pending.update_loop_num_iters[index] = num_added_iters
1999
+ self._pending.update_loop_num_iters[index] = [
2000
+ [list(k), v] for k, v in num_added_iters.items()
2001
+ ]
1552
2002
  if save:
1553
2003
  self.save()
1554
2004
 
1555
2005
  def update_loop_parents(
1556
2006
  self,
1557
2007
  index: int,
1558
- num_added_iters: int,
1559
- parents: List[str],
2008
+ num_added_iters: Mapping[tuple[int, ...], int],
2009
+ parents: Sequence[str],
1560
2010
  save: bool = True,
1561
2011
  ) -> None:
1562
2012
  """
@@ -1566,31 +2016,50 @@ class PersistentStore(ABC):
1566
2016
  f"Updating loop {index!r} parents to {parents!r}, and num added iterations "
1567
2017
  f"to {num_added_iters}."
1568
2018
  )
1569
- num_added_iters = [[list(k), v] for k, v in num_added_iters.items()]
1570
- self._pending.update_loop_num_iters[index] = num_added_iters
1571
- self._pending.update_loop_parents[index] = parents
2019
+ self._pending.update_loop_num_iters[index] = [
2020
+ [list(k), v] for k, v in num_added_iters.items()
2021
+ ]
2022
+ self._pending.update_loop_parents[index] = list(parents)
1572
2023
  if save:
1573
2024
  self.save()
1574
2025
 
1575
- def get_template_components(self) -> Dict:
2026
+ def update_iter_data_indices(self, data_indices: dict[int, DataIndex]):
2027
+ """Update data indices of one or more iterations."""
2028
+ for k, v in data_indices.items():
2029
+ self._pending.update_iter_data_idx[k].update(v)
2030
+
2031
+ def update_run_data_indices(self, data_indices: dict[int, DataIndex]):
2032
+ """Update data indices of one or more runs."""
2033
+ for k, v in data_indices.items():
2034
+ self._pending.update_run_data_idx[k].update(v)
2035
+
2036
+ def get_template_components(self) -> dict[str, Any]:
1576
2037
  """Get all template components, including pending."""
1577
2038
  tc = copy.deepcopy(self._get_persistent_template_components())
1578
2039
  for typ in TEMPLATE_COMP_TYPES:
1579
- for hash_i, dat_i in self._pending.add_template_components[typ].items():
1580
- if typ not in tc:
1581
- tc[typ] = {}
1582
- tc[typ][hash_i] = dat_i
2040
+ for hash_i, dat_i in self._pending.add_template_components.get(
2041
+ typ, {}
2042
+ ).items():
2043
+ tc.setdefault(typ, {})[hash_i] = dat_i
1583
2044
 
1584
2045
  return tc
1585
2046
 
1586
- def get_template(self) -> Dict:
2047
+ @abstractmethod
2048
+ def _get_persistent_template_components(self) -> dict[str, Any]:
2049
+ ...
2050
+
2051
+ def get_template(self) -> dict[str, JSONed]:
1587
2052
  """
1588
2053
  Get the workflow template.
1589
2054
  """
1590
2055
  return self._get_persistent_template()
1591
2056
 
1592
- def _get_task_id_to_idx_map(self) -> Dict[int, int]:
1593
- return {i.id_: i.index for i in self.get_tasks()}
2057
+ @abstractmethod
2058
+ def _get_persistent_template(self) -> dict[str, JSONed]:
2059
+ ...
2060
+
2061
+ def _get_task_id_to_idx_map(self) -> dict[int, int]:
2062
+ return {task.id_: task.index for task in self.get_tasks()}
1594
2063
 
1595
2064
  @TimeIt.decorator
1596
2065
  def get_task(self, task_idx: int) -> AnySTask:
@@ -1599,182 +2068,292 @@ class PersistentStore(ABC):
1599
2068
  """
1600
2069
  return self.get_tasks()[task_idx]
1601
2070
 
1602
- def _process_retrieved_tasks(self, tasks: List[AnySTask]) -> List[AnySTask]:
2071
+ def __process_retrieved_tasks(self, tasks: Iterable[AnySTask]) -> list[AnySTask]:
1603
2072
  """Add pending data to retrieved tasks."""
1604
- tasks_new = []
1605
- for task_i in tasks:
2073
+ tasks_new: list[AnySTask] = []
2074
+ for task in tasks:
1606
2075
  # consider pending element IDs:
1607
- pend_elems = self._pending.add_elem_IDs.get(task_i.id_)
1608
- if pend_elems:
1609
- task_i = task_i.append_element_IDs(pend_elems)
1610
- tasks_new.append(task_i)
2076
+ if pend_elems := self._pending.add_elem_IDs.get(task.id_):
2077
+ task = task.append_element_IDs(pend_elems)
2078
+ tasks_new.append(task)
1611
2079
  return tasks_new
1612
2080
 
1613
- def _process_retrieved_loops(self, loops: Dict[int, Dict]) -> Dict[int, Dict]:
2081
+ def __process_retrieved_loops(
2082
+ self, loops: Iterable[tuple[int, LoopDescriptor]]
2083
+ ) -> dict[int, LoopDescriptor]:
1614
2084
  """Add pending data to retrieved loops."""
1615
- loops_new = {}
1616
- for id_, loop_i in loops.items():
2085
+ loops_new: dict[int, LoopDescriptor] = {}
2086
+ for id_, loop_i in loops:
1617
2087
  if "num_added_iterations" not in loop_i:
1618
2088
  loop_i["num_added_iterations"] = 1
1619
2089
  # consider pending changes to num added iterations:
1620
- pend_num_iters = self._pending.update_loop_num_iters.get(id_)
1621
- if pend_num_iters:
2090
+ if pend_num_iters := self._pending.update_loop_num_iters.get(id_):
1622
2091
  loop_i["num_added_iterations"] = pend_num_iters
1623
2092
  # consider pending change to parents:
1624
- pend_parents = self._pending.update_loop_parents.get(id_)
1625
- if pend_parents:
2093
+ if pend_parents := self._pending.update_loop_parents.get(id_):
1626
2094
  loop_i["parents"] = pend_parents
1627
2095
 
1628
2096
  loops_new[id_] = loop_i
1629
2097
  return loops_new
1630
2098
 
1631
- def get_tasks_by_IDs(self, id_lst: Iterable[int]) -> List[AnySTask]:
2099
+ @staticmethod
2100
+ def __split_pending(
2101
+ ids: Iterable[int], all_pending: Mapping[int, Any]
2102
+ ) -> tuple[tuple[int, ...], set[int], set[int]]:
2103
+ id_all = tuple(ids)
2104
+ id_set = set(id_all)
2105
+ id_pers = id_set.difference(all_pending)
2106
+ id_pend = id_set.intersection(all_pending)
2107
+ return id_all, id_pers, id_pend
2108
+
2109
+ @abstractmethod
2110
+ def _get_persistent_tasks(self, id_lst: Iterable[int]) -> dict[int, AnySTask]:
2111
+ ...
2112
+
2113
+ def get_tasks_by_IDs(self, ids: Iterable[int]) -> Sequence[AnySTask]:
1632
2114
  """
1633
2115
  Get tasks with the given IDs.
1634
2116
  """
1635
2117
  # separate pending and persistent IDs:
1636
- id_set = set(id_lst)
1637
- all_pending = set(self._pending.add_tasks)
1638
- id_pers = id_set.difference(all_pending)
1639
- id_pend = id_set.intersection(all_pending)
1640
2118
 
2119
+ ids, id_pers, id_pend = self.__split_pending(ids, self._pending.add_tasks)
1641
2120
  tasks = self._get_persistent_tasks(id_pers) if id_pers else {}
1642
- tasks.update({i: self._pending.add_tasks[i] for i in id_pend})
2121
+ tasks.update((id_, self._pending.add_tasks[id_]) for id_ in id_pend)
1643
2122
 
1644
2123
  # order as requested:
1645
- tasks = [tasks[id_] for id_ in id_lst]
1646
-
1647
- return self._process_retrieved_tasks(tasks)
2124
+ return self.__process_retrieved_tasks(tasks[id_] for id_ in ids)
1648
2125
 
1649
2126
  @TimeIt.decorator
1650
- def get_tasks(self) -> List[AnySTask]:
2127
+ def get_tasks(self) -> list[AnySTask]:
1651
2128
  """Retrieve all tasks, including pending."""
1652
2129
  tasks = self._get_persistent_tasks(range(self._get_num_persistent_tasks()))
1653
- tasks.update({k: v for k, v in self._pending.add_tasks.items()})
2130
+ tasks.update(self._pending.add_tasks)
1654
2131
 
1655
2132
  # order by index:
1656
- tasks = sorted((i for i in tasks.values()), key=lambda x: x.index)
2133
+ return self.__process_retrieved_tasks(
2134
+ sorted(tasks.values(), key=lambda x: x.index)
2135
+ )
1657
2136
 
1658
- return self._process_retrieved_tasks(tasks)
2137
+ @abstractmethod
2138
+ def _get_persistent_loops(
2139
+ self, id_lst: Iterable[int] | None = None
2140
+ ) -> dict[int, LoopDescriptor]:
2141
+ ...
1659
2142
 
1660
- def get_loops_by_IDs(self, id_lst: Iterable[int]) -> Dict[int, Dict]:
2143
+ def get_loops_by_IDs(self, ids: Iterable[int]) -> dict[int, LoopDescriptor]:
1661
2144
  """Retrieve loops by index (ID), including pending."""
1662
2145
 
1663
2146
  # separate pending and persistent IDs:
1664
- id_set = set(id_lst)
1665
- all_pending = set(self._pending.add_loops)
1666
- id_pers = id_set.difference(all_pending)
1667
- id_pend = id_set.intersection(all_pending)
2147
+ ids, id_pers, id_pend = self.__split_pending(ids, self._pending.add_loops)
1668
2148
 
1669
2149
  loops = self._get_persistent_loops(id_pers) if id_pers else {}
1670
- loops.update({i: self._pending.add_loops[i] for i in id_pend})
2150
+ loops.update((id_, self._pending.add_loops[id_]) for id_ in id_pend)
1671
2151
 
1672
2152
  # order as requested:
1673
- loops = {id_: loops[id_] for id_ in id_lst}
2153
+ return self.__process_retrieved_loops((id_, loops[id_]) for id_ in ids)
1674
2154
 
1675
- return self._process_retrieved_loops(loops)
1676
-
1677
- def get_loops(self) -> Dict[int, Dict]:
2155
+ def get_loops(self) -> dict[int, LoopDescriptor]:
1678
2156
  """Retrieve all loops, including pending."""
1679
2157
 
1680
2158
  loops = self._get_persistent_loops()
1681
- loops.update({k: v for k, v in self._pending.add_loops.items()})
2159
+ loops.update(self._pending.add_loops)
1682
2160
 
1683
2161
  # order by index/ID:
1684
- loops = dict(sorted(loops.items()))
2162
+ return self.__process_retrieved_loops(sorted(loops.items()))
1685
2163
 
1686
- return self._process_retrieved_loops(loops)
2164
+ @abstractmethod
2165
+ def _get_persistent_submissions(
2166
+ self, id_lst: Iterable[int] | None = None
2167
+ ) -> dict[int, Mapping[str, JSONed]]:
2168
+ ...
1687
2169
 
1688
2170
  @TimeIt.decorator
1689
- def get_submissions(self) -> Dict[int, Dict]:
2171
+ def get_submissions(self) -> dict[int, Mapping[str, JSONed]]:
1690
2172
  """Retrieve all submissions, including pending."""
1691
2173
 
1692
2174
  subs = self._get_persistent_submissions()
1693
- subs.update({k: v for k, v in self._pending.add_submissions.items()})
2175
+ subs.update(self._pending.add_submissions)
1694
2176
 
1695
2177
  # order by index/ID
1696
- subs = dict(sorted(subs.items()))
2178
+ return dict(sorted(subs.items()))
2179
+
2180
+ @TimeIt.decorator
2181
+ def get_submission_at_submit_metadata(
2182
+ self, sub_idx: int, metadata_attr: dict[str, Any] | None
2183
+ ) -> dict[str, Any]:
2184
+ """Retrieve the values of submission attributes that are stored at submit-time.
2185
+
2186
+ Notes
2187
+ -----
2188
+ This method may need to be overridden if these attributes are stored separately
2189
+ from the remainder of the submission attributes.
2190
+
2191
+ """
2192
+ return metadata_attr or {i: None for i in SUBMISSION_SUBMIT_TIME_KEYS}
2193
+
2194
+ @TimeIt.decorator
2195
+ def get_jobscript_at_submit_metadata(
2196
+ self,
2197
+ sub_idx: int,
2198
+ js_idx: int,
2199
+ metadata_attr: dict[str, Any] | None,
2200
+ ) -> dict[str, Any]:
2201
+ """For the specified jobscript, retrieve the values of jobscript-submit-time
2202
+ attributes.
2203
+
2204
+ Notes
2205
+ -----
2206
+ This method may need to be overridden if these jobscript-submit-time attributes
2207
+ are stored separately from the remainder of the jobscript attributes.
2208
+
2209
+ """
2210
+ return metadata_attr or {i: None for i in JOBSCRIPT_SUBMIT_TIME_KEYS}
2211
+
2212
+ @TimeIt.decorator
2213
+ def get_jobscript_block_run_ID_array(
2214
+ self, sub_idx: int, js_idx: int, blk_idx: int, run_ID_arr: NDArray | None
2215
+ ) -> NDArray:
2216
+ """For the specified jobscript-block, retrieve the run ID array.
2217
+
2218
+ Notes
2219
+ -----
2220
+ This method may need to be overridden if these attributes are stored separately
2221
+ from the remainder of the submission attributes.
2222
+
2223
+ """
2224
+ assert run_ID_arr is not None
2225
+ return np.asarray(run_ID_arr)
2226
+
2227
+ @TimeIt.decorator
2228
+ def get_jobscript_block_task_elements_map(
2229
+ self,
2230
+ sub_idx: int,
2231
+ js_idx: int,
2232
+ blk_idx: int,
2233
+ task_elems_map: dict[int, list[int]] | None,
2234
+ ) -> dict[int, list[int]]:
2235
+ """For the specified jobscript-block, retrieve the task-elements mapping.
1697
2236
 
1698
- return subs
2237
+ Notes
2238
+ -----
2239
+ This method may need to be overridden if these attributes are stored separately
2240
+ from the remainder of the submission attributes.
2241
+
2242
+ """
2243
+ assert task_elems_map is not None
2244
+ return task_elems_map
2245
+
2246
+ @TimeIt.decorator
2247
+ def get_jobscript_block_task_actions_array(
2248
+ self,
2249
+ sub_idx: int,
2250
+ js_idx: int,
2251
+ blk_idx: int,
2252
+ task_actions_arr: NDArray | list[tuple[int, int, int]] | None,
2253
+ ) -> NDArray:
2254
+ """For the specified jobscript-block, retrieve the task-actions array.
2255
+
2256
+ Notes
2257
+ -----
2258
+ This method may need to be overridden if these attributes are stored separately
2259
+ from the remainder of the submission attributes.
2260
+
2261
+ """
2262
+ assert task_actions_arr is not None
2263
+ return np.asarray(task_actions_arr)
2264
+
2265
+ @TimeIt.decorator
2266
+ def get_jobscript_block_dependencies(
2267
+ self,
2268
+ sub_idx: int,
2269
+ js_idx: int,
2270
+ blk_idx: int,
2271
+ js_dependencies: dict[tuple[int, int], ResolvedJobscriptBlockDependencies] | None,
2272
+ ) -> dict[tuple[int, int], ResolvedJobscriptBlockDependencies]:
2273
+ """For the specified jobscript-block, retrieve the dependencies.
2274
+
2275
+ Notes
2276
+ -----
2277
+ This method may need to be overridden if these attributes are stored separately
2278
+ from the remainder of the submission attributes.
2279
+
2280
+ """
2281
+ assert js_dependencies is not None
2282
+ return js_dependencies
1699
2283
 
1700
2284
  @TimeIt.decorator
1701
- def get_submissions_by_ID(self, id_lst: Iterable[int]) -> Dict[int, Dict]:
2285
+ def get_submissions_by_ID(
2286
+ self, ids: Iterable[int]
2287
+ ) -> dict[int, Mapping[str, JSONed]]:
1702
2288
  """
1703
2289
  Get submissions with the given IDs.
1704
2290
  """
1705
2291
  # separate pending and persistent IDs:
1706
- id_set = set(id_lst)
1707
- all_pending = set(self._pending.add_submissions)
1708
- id_pers = id_set.difference(all_pending)
1709
- id_pend = id_set.intersection(all_pending)
1710
-
2292
+ _, id_pers, id_pend = self.__split_pending(ids, self._pending.add_submissions)
1711
2293
  subs = self._get_persistent_submissions(id_pers) if id_pers else {}
1712
- subs.update({i: self._pending.add_submissions[i] for i in id_pend})
2294
+ subs.update((id_, self._pending.add_submissions[id_]) for id_ in id_pend)
1713
2295
 
1714
2296
  # order by index/ID
1715
- subs = dict(sorted(subs.items()))
2297
+ return dict(sorted(subs.items()))
1716
2298
 
1717
- return subs
2299
+ @abstractmethod
2300
+ def _get_persistent_elements(self, id_lst: Iterable[int]) -> dict[int, AnySElement]:
2301
+ ...
1718
2302
 
1719
2303
  @TimeIt.decorator
1720
- def get_elements(self, id_lst: Iterable[int]) -> List[AnySElement]:
2304
+ def get_elements(self, ids: Iterable[int]) -> Sequence[AnySElement]:
1721
2305
  """
1722
2306
  Get elements with the given IDs.
1723
2307
  """
1724
- self.logger.debug(f"PersistentStore.get_elements: id_lst={id_lst!r}")
1725
-
1726
2308
  # separate pending and persistent IDs:
1727
- id_set = set(id_lst)
1728
- all_pending = set(self._pending.add_elements)
1729
- id_pers = id_set.difference(all_pending)
1730
- id_pend = id_set.intersection(all_pending)
1731
-
2309
+ ids, id_pers, id_pend = self.__split_pending(ids, self._pending.add_elements)
2310
+ self.logger.debug(
2311
+ f"PersistentStore.get_elements: {len(ids)} elements: "
2312
+ f"{shorten_list_str(ids)}."
2313
+ )
1732
2314
  elems = self._get_persistent_elements(id_pers) if id_pers else {}
1733
- elems.update({i: self._pending.add_elements[i] for i in id_pend})
2315
+ elems.update((id_, self._pending.add_elements[id_]) for id_ in id_pend)
1734
2316
 
2317
+ elems_new: list[AnySElement] = []
1735
2318
  # order as requested:
1736
- elems = [elems[id_] for id_ in id_lst]
1737
-
1738
- elems_new = []
1739
- for elem_i in elems:
2319
+ for elem_i in (elems[id_] for id_ in ids):
1740
2320
  # consider pending iteration IDs:
1741
2321
  # TODO: does this consider pending iterations from new loop iterations?
1742
- pend_iters = self._pending.add_elem_iter_IDs.get(elem_i.id_)
1743
- if pend_iters:
2322
+ if pend_iters := self._pending.add_elem_iter_IDs.get(elem_i.id_):
1744
2323
  elem_i = elem_i.append_iteration_IDs(pend_iters)
1745
2324
  elems_new.append(elem_i)
1746
2325
 
1747
2326
  return elems_new
1748
2327
 
2328
+ @abstractmethod
2329
+ def _get_persistent_element_iters(
2330
+ self, id_lst: Iterable[int]
2331
+ ) -> dict[int, AnySElementIter]:
2332
+ ...
2333
+
1749
2334
  @TimeIt.decorator
1750
- def get_element_iterations(self, id_lst: Iterable[int]) -> List[AnySElementIter]:
2335
+ def get_element_iterations(self, ids: Iterable[int]) -> Sequence[AnySElementIter]:
1751
2336
  """
1752
2337
  Get element iterations with the given IDs.
1753
2338
  """
1754
- self.logger.debug(f"PersistentStore.get_element_iterations: id_lst={id_lst!r}")
1755
-
1756
2339
  # separate pending and persistent IDs:
1757
- id_set = set(id_lst)
1758
- all_pending = set(self._pending.add_elem_iters)
1759
- id_pers = id_set.difference(all_pending)
1760
- id_pend = id_set.intersection(all_pending)
1761
-
2340
+ ids, id_pers, id_pend = self.__split_pending(ids, self._pending.add_elem_iters)
2341
+ self.logger.debug(
2342
+ f"PersistentStore.get_element_iterations: {len(ids)} iterations: "
2343
+ f"{shorten_list_str(ids)}."
2344
+ )
1762
2345
  iters = self._get_persistent_element_iters(id_pers) if id_pers else {}
1763
- iters.update({i: self._pending.add_elem_iters[i] for i in id_pend})
2346
+ iters.update((id_, self._pending.add_elem_iters[id_]) for id_ in id_pend)
1764
2347
 
2348
+ iters_new: list[AnySElementIter] = []
1765
2349
  # order as requested:
1766
- iters = [iters[id_] for id_ in id_lst]
1767
-
1768
- iters_new = []
1769
- for iter_i in iters:
2350
+ for iter_i in (iters[id_] for id_ in ids):
1770
2351
  # consider pending EAR IDs:
1771
- pend_EARs = self._pending.add_elem_iter_EAR_IDs.get(iter_i.id_)
1772
- if pend_EARs:
2352
+ if pend_EARs := self._pending.add_elem_iter_EAR_IDs.get(iter_i.id_):
1773
2353
  iter_i = iter_i.append_EAR_IDs(pend_EARs)
1774
2354
 
1775
2355
  # consider pending loop idx
1776
- pend_loop_idx = self._pending.update_loop_indices.get(iter_i.id_)
1777
- if pend_loop_idx:
2356
+ if pend_loop_idx := self._pending.update_loop_indices.get(iter_i.id_):
1778
2357
  iter_i = iter_i.update_loop_idx(pend_loop_idx)
1779
2358
 
1780
2359
  # consider pending `EARs_initialised`:
@@ -1785,47 +2364,46 @@ class PersistentStore(ABC):
1785
2364
 
1786
2365
  return iters_new
1787
2366
 
2367
+ @abstractmethod
2368
+ def _get_persistent_EARs(self, id_lst: Iterable[int]) -> dict[int, AnySEAR]:
2369
+ ...
2370
+
1788
2371
  @TimeIt.decorator
1789
- def get_EARs(self, id_lst: Iterable[int]) -> List[AnySEAR]:
2372
+ def get_EARs(self, ids: Iterable[int]) -> Sequence[AnySEAR]:
1790
2373
  """
1791
2374
  Get element action runs with the given IDs.
1792
2375
  """
1793
- self.logger.debug(f"PersistentStore.get_EARs: id_lst={id_lst!r}")
1794
-
1795
2376
  # separate pending and persistent IDs:
1796
- id_set = set(id_lst)
1797
- all_pending = set(self._pending.add_EARs)
1798
- id_pers = id_set.difference(all_pending)
1799
- id_pend = id_set.intersection(all_pending)
1800
-
2377
+ ids, id_pers, id_pend = self.__split_pending(ids, self._pending.add_EARs)
2378
+ self.logger.debug(
2379
+ f"PersistentStore.get_EARs: {len(ids)} EARs: {shorten_list_str(ids)}."
2380
+ )
1801
2381
  EARs = self._get_persistent_EARs(id_pers) if id_pers else {}
1802
- EARs.update({i: self._pending.add_EARs[i] for i in id_pend})
2382
+ EARs.update((id_, self._pending.add_EARs[id_]) for id_ in id_pend)
1803
2383
 
2384
+ EARs_new: list[AnySEAR] = []
1804
2385
  # order as requested:
1805
- EARs = [EARs[id_] for id_ in id_lst]
1806
-
1807
- EARs_new = []
1808
- for EAR_i in EARs:
2386
+ for EAR_i in (EARs[id_] for id_ in ids):
1809
2387
  # consider updates:
1810
- pend_sub = self._pending.set_EAR_submission_indices.get(EAR_i.id_)
1811
- pend_start = self._pending.set_EAR_starts.get(EAR_i.id_)
1812
- pend_end = self._pending.set_EAR_ends.get(EAR_i.id_)
1813
- pend_skip = True if EAR_i.id_ in self._pending.set_EAR_skips else None
1814
-
1815
- p_st, p_ss, p_hn = pend_start if pend_start else (None, None, None)
1816
- p_et, p_se, p_ex, p_sx = pend_end if pend_end else (None, None, None, None)
1817
-
1818
- updates = {
1819
- "submission_idx": pend_sub,
1820
- "skip": pend_skip,
1821
- "success": p_sx,
1822
- "start_time": p_st,
1823
- "end_time": p_et,
1824
- "snapshot_start": p_ss,
1825
- "snapshot_end": p_se,
1826
- "exit_code": p_ex,
1827
- "run_hostname": p_hn,
1828
- }
2388
+ updates: dict[str, Any] = {}
2389
+ if EAR_i.id_ in self._pending.set_EAR_skips:
2390
+ updates["skip"] = True
2391
+ (
2392
+ updates["submission_idx"],
2393
+ updates["commands_file_ID"],
2394
+ ) = self._pending.set_EAR_submission_data.get(EAR_i.id_, (None, None))
2395
+ (
2396
+ updates["start_time"],
2397
+ updates["snapshot_start"],
2398
+ updates["run_hostname"],
2399
+ updates["port_number"],
2400
+ ) = self._pending.set_EAR_starts.get(EAR_i.id_, (None, None, None, None))
2401
+ (
2402
+ updates["end_time"],
2403
+ updates["snapshot_end"],
2404
+ updates["exit_code"],
2405
+ updates["success"],
2406
+ ) = self._pending.set_EAR_ends.get(EAR_i.id_, (None, None, None, None))
1829
2407
  if any(i is not None for i in updates.values()):
1830
2408
  EAR_i = EAR_i.update(**updates)
1831
2409
 
@@ -1834,64 +2412,65 @@ class PersistentStore(ABC):
1834
2412
  return EARs_new
1835
2413
 
1836
2414
  @TimeIt.decorator
1837
- def _get_cached_persistent_items(
1838
- self, id_lst: Iterable[int], cache: Dict
1839
- ) -> Tuple[Dict[int, Any], List[int]]:
1840
- id_lst = list(id_lst)
2415
+ def __get_cached_persistent_items(
2416
+ self, id_lst: Iterable[int], cache: dict[int, T]
2417
+ ) -> tuple[dict[int, T], list[int]]:
2418
+ """How to get things out of the cache. Caller says which cache."""
1841
2419
  if self.use_cache:
1842
- id_set = set(id_lst)
1843
- all_cached = set(cache.keys())
1844
- id_cached = id_set.intersection(all_cached)
1845
- id_non_cached = list(id_set.difference(all_cached))
1846
- items = {k: cache[k] for k in id_cached}
2420
+ id_cached = set(id_lst)
2421
+ id_non_cached = sorted(id_cached.difference(cache))
2422
+ id_cached.intersection_update(cache)
2423
+ items = {id_: cache[id_] for id_ in sorted(id_cached)}
1847
2424
  else:
1848
2425
  items = {}
1849
- id_non_cached = id_lst
2426
+ id_non_cached = list(id_lst)
1850
2427
  return items, id_non_cached
1851
2428
 
1852
2429
  def _get_cached_persistent_EARs(
1853
2430
  self, id_lst: Iterable[int]
1854
- ) -> Tuple[Dict[int, AnySEAR], List[int]]:
1855
- return self._get_cached_persistent_items(id_lst, self.EAR_cache)
2431
+ ) -> tuple[dict[int, AnySEAR], list[int]]:
2432
+ return self.__get_cached_persistent_items(id_lst, self.EAR_cache)
1856
2433
 
1857
2434
  def _get_cached_persistent_element_iters(
1858
2435
  self, id_lst: Iterable[int]
1859
- ) -> Tuple[Dict[int, AnySEAR], List[int]]:
1860
- return self._get_cached_persistent_items(id_lst, self.element_iter_cache)
2436
+ ) -> tuple[dict[int, AnySElementIter], list[int]]:
2437
+ return self.__get_cached_persistent_items(id_lst, self.element_iter_cache)
1861
2438
 
1862
2439
  def _get_cached_persistent_elements(
1863
2440
  self, id_lst: Iterable[int]
1864
- ) -> Tuple[Dict[int, AnySEAR], List[int]]:
1865
- return self._get_cached_persistent_items(id_lst, self.element_cache)
2441
+ ) -> tuple[dict[int, AnySElement], list[int]]:
2442
+ return self.__get_cached_persistent_items(id_lst, self.element_cache)
1866
2443
 
1867
- def _get_cached_persistent_tasks(self, id_lst: Iterable[int]):
1868
- return self._get_cached_persistent_items(id_lst, self.task_cache)
2444
+ def _get_cached_persistent_tasks(
2445
+ self, id_lst: Iterable[int]
2446
+ ) -> tuple[dict[int, AnySTask], list[int]]:
2447
+ return self.__get_cached_persistent_items(id_lst, self.task_cache)
1869
2448
 
1870
- def _get_cached_persistent_param_sources(self, id_lst: Iterable[int]):
1871
- return self._get_cached_persistent_items(id_lst, self.param_sources_cache)
2449
+ def _get_cached_persistent_param_sources(
2450
+ self, id_lst: Iterable[int]
2451
+ ) -> tuple[dict[int, ParamSource], list[int]]:
2452
+ return self.__get_cached_persistent_items(id_lst, self.param_sources_cache)
1872
2453
 
1873
- def _get_cached_persistent_parameters(self, id_lst: Iterable[int]):
1874
- return self._get_cached_persistent_items(id_lst, self.parameter_cache)
2454
+ def _get_cached_persistent_parameters(
2455
+ self, id_lst: Iterable[int]
2456
+ ) -> tuple[dict[int, AnySParameter], list[int]]:
2457
+ return self.__get_cached_persistent_items(id_lst, self.parameter_cache)
1875
2458
 
1876
- def get_EAR_skipped(self, EAR_ID: int) -> bool:
2459
+ def get_EAR_skipped(self, EAR_ID: int) -> int:
1877
2460
  """
1878
2461
  Whether the element action run with the given ID was skipped.
1879
2462
  """
1880
2463
  self.logger.debug(f"PersistentStore.get_EAR_skipped: EAR_ID={EAR_ID!r}")
1881
- return self.get_EARs([EAR_ID])[0].skip
2464
+ return self.get_EARs((EAR_ID,))[0].skip
1882
2465
 
1883
2466
  @TimeIt.decorator
1884
- def get_parameters(
1885
- self,
1886
- id_lst: Iterable[int],
1887
- **kwargs: Dict,
1888
- ) -> List[AnySParameter]:
2467
+ def get_parameters(self, ids: Iterable[int], **kwargs) -> list[AnySParameter]:
1889
2468
  """
1890
2469
  Get parameters with the given IDs.
1891
2470
 
1892
2471
  Parameters
1893
2472
  ----------
1894
- id_lst:
2473
+ ids:
1895
2474
  The IDs of the parameters to get.
1896
2475
 
1897
2476
  Keyword Arguments
@@ -1900,124 +2479,265 @@ class PersistentStore(ABC):
1900
2479
  For Zarr stores only. If True, copy arrays as NumPy arrays.
1901
2480
  """
1902
2481
  # separate pending and persistent IDs:
1903
- id_set = set(id_lst)
1904
- all_pending = set(self._pending.add_parameters)
1905
- id_pers = id_set.difference(all_pending)
1906
- id_pend = id_set.intersection(all_pending)
1907
-
1908
- params = self._get_persistent_parameters(id_pers, **kwargs) if id_pers else {}
1909
- params.update({i: self._pending.add_parameters[i] for i in id_pend})
2482
+ ids, id_pers, id_pend = self.__split_pending(ids, self._pending.add_parameters)
2483
+ params = (
2484
+ dict(self._get_persistent_parameters(id_pers, **kwargs)) if id_pers else {}
2485
+ )
2486
+ params.update((id_, self._pending.add_parameters[id_]) for id_ in id_pend)
1910
2487
 
1911
2488
  # order as requested:
1912
- params = [params[id_] for id_ in id_lst]
2489
+ return [params[id_] for id_ in ids]
1913
2490
 
1914
- return params
2491
+ @abstractmethod
2492
+ def _get_persistent_parameters(
2493
+ self, id_lst: Iterable[int], **kwargs
2494
+ ) -> Mapping[int, AnySParameter]:
2495
+ ...
1915
2496
 
1916
2497
  @TimeIt.decorator
1917
- def get_parameter_set_statuses(self, id_lst: Iterable[int]) -> List[bool]:
2498
+ def get_parameter_set_statuses(self, ids: Iterable[int]) -> list[bool]:
1918
2499
  """
1919
2500
  Get whether the parameters with the given IDs are set.
1920
2501
  """
1921
2502
  # separate pending and persistent IDs:
1922
- id_set = set(id_lst)
1923
- all_pending = set(self._pending.add_parameters)
1924
- id_pers = id_set.difference(all_pending)
1925
- id_pend = id_set.intersection(all_pending)
1926
-
2503
+ ids, id_pers, id_pend = self.__split_pending(ids, self._pending.add_parameters)
1927
2504
  set_status = self._get_persistent_parameter_set_status(id_pers) if id_pers else {}
1928
- set_status.update({i: self._pending.add_parameters[i].is_set for i in id_pend})
2505
+ set_status.update(
2506
+ (id_, self._pending.add_parameters[id_].is_set) for id_ in id_pend
2507
+ )
1929
2508
 
1930
2509
  # order as requested:
1931
- return [set_status[id_] for id_ in id_lst]
2510
+ return [set_status[id_] for id_ in ids]
2511
+
2512
+ @abstractmethod
2513
+ def _get_persistent_parameter_set_status(
2514
+ self, id_lst: Iterable[int]
2515
+ ) -> dict[int, bool]:
2516
+ ...
1932
2517
 
1933
2518
  @TimeIt.decorator
1934
- def get_parameter_sources(self, id_lst: Iterable[int]) -> List[Dict]:
2519
+ def get_parameter_sources(self, ids: Iterable[int]) -> list[ParamSource]:
1935
2520
  """
1936
2521
  Get the sources of the parameters with the given IDs.
1937
2522
  """
1938
2523
  # separate pending and persistent IDs:
1939
- id_set = set(id_lst)
1940
- all_pending = set(self._pending.add_parameters)
1941
- id_pers = id_set.difference(all_pending)
1942
- id_pend = id_set.intersection(all_pending)
1943
-
2524
+ ids, id_pers, id_pend = self.__split_pending(ids, self._pending.add_parameters)
1944
2525
  src = self._get_persistent_param_sources(id_pers) if id_pers else {}
1945
- src.update({i: self._pending.add_parameters[i].source for i in id_pend})
2526
+ src.update((id_, self._pending.add_parameters[id_].source) for id_ in id_pend)
1946
2527
 
1947
- # order as requested:
1948
- src = {id_: src[id_] for id_ in id_lst}
2528
+ # order as requested, and consider pending source updates:
2529
+ return [
2530
+ self.__merge_param_source(
2531
+ src[id_i], self._pending.update_param_sources.get(id_i)
2532
+ )
2533
+ for id_i in ids
2534
+ ]
1949
2535
 
1950
- src_new = []
1951
- for id_i, src_i in src.items():
1952
- # consider pending source updates:
1953
- pend_src = self._pending.update_param_sources.get(id_i)
1954
- if pend_src:
1955
- src_i = {**src_i, **pend_src}
1956
- src_new.append(src_i)
2536
+ @staticmethod
2537
+ def __merge_param_source(
2538
+ src_i: ParamSource, pend_src: ParamSource | None
2539
+ ) -> ParamSource:
2540
+ """
2541
+ Helper to merge a second dict in if it is provided.
2542
+ """
2543
+ return {**src_i, **pend_src} if pend_src else src_i
1957
2544
 
1958
- return src_new
2545
+ @abstractmethod
2546
+ def _get_persistent_param_sources(
2547
+ self, id_lst: Iterable[int]
2548
+ ) -> dict[int, ParamSource]:
2549
+ ...
1959
2550
 
1960
2551
  @TimeIt.decorator
1961
2552
  def get_task_elements(
1962
2553
  self,
1963
- task_id,
1964
- idx_lst: Optional[Iterable[int]] = None,
1965
- ) -> List[Dict]:
2554
+ task_id: int,
2555
+ idx_lst: Iterable[int] | None = None,
2556
+ ) -> Iterator[Mapping[str, Any]]:
1966
2557
  """
1967
2558
  Get element data by an indices within a given task.
1968
2559
 
1969
2560
  Element iterations and EARs belonging to the elements are included.
1970
-
1971
2561
  """
1972
2562
 
1973
2563
  all_elem_IDs = self.get_task(task_id).element_IDs
1974
- if idx_lst is None:
1975
- req_IDs = all_elem_IDs
1976
- else:
1977
- req_IDs = [all_elem_IDs[i] for i in idx_lst]
1978
- store_elements = self.get_elements(req_IDs)
1979
- iter_IDs = [i.iteration_IDs for i in store_elements]
1980
- iter_IDs_flat, iter_IDs_lens = flatten(iter_IDs)
2564
+ store_elements = self.get_elements(
2565
+ all_elem_IDs if idx_lst is None else (all_elem_IDs[idx] for idx in idx_lst)
2566
+ )
2567
+ iter_IDs_flat, iter_IDs_lens = flatten(
2568
+ [el.iteration_IDs for el in store_elements]
2569
+ )
1981
2570
  store_iters = self.get_element_iterations(iter_IDs_flat)
1982
2571
 
1983
2572
  # retrieve EARs:
1984
- EAR_IDs = [list((i.EAR_IDs or {}).values()) for i in store_iters]
1985
- EAR_IDs_flat, EAR_IDs_lens = flatten(EAR_IDs)
1986
- EARs_dct = [i.to_dict() for i in self.get_EARs(EAR_IDs_flat)]
1987
- EARs_dct_rs = reshape(EARs_dct, EAR_IDs_lens)
2573
+ EARs_dcts = remap(
2574
+ [list((elit.EAR_IDs or {}).values()) for elit in store_iters],
2575
+ lambda ears: [ear.to_dict() for ear in self.get_EARs(ears)],
2576
+ )
1988
2577
 
1989
2578
  # add EARs to iterations:
1990
- iters = []
2579
+ iters: list[dict[str, Any]] = []
1991
2580
  for idx, i in enumerate(store_iters):
1992
- EARs = None
2581
+ EARs: dict[int, dict[str, Any]] | None = None
1993
2582
  if i.EAR_IDs is not None:
1994
- EARs = dict(zip(i.EAR_IDs.keys(), EARs_dct_rs[idx]))
2583
+ EARs = dict(zip(i.EAR_IDs, cast("Any", EARs_dcts[idx])))
1995
2584
  iters.append(i.to_dict(EARs))
1996
2585
 
1997
2586
  # reshape iterations:
1998
2587
  iters_rs = reshape(iters, iter_IDs_lens)
1999
2588
 
2000
2589
  # add iterations to elements:
2001
- elements = []
2002
- for idx, i in enumerate(store_elements):
2003
- elements.append(i.to_dict(iters_rs[idx]))
2004
- return elements
2590
+ for idx, element in enumerate(store_elements):
2591
+ yield element.to_dict(iters_rs[idx])
2005
2592
 
2006
- def check_parameters_exist(self, id_lst: Iterable[int]) -> List[bool]:
2007
- """For each parameter ID, return True if it exists, else False"""
2593
+ @abstractmethod
2594
+ def _get_persistent_parameter_IDs(self) -> Iterable[int]:
2595
+ ...
2008
2596
 
2009
- id_set = set(id_lst)
2010
- all_pending = set(self._pending.add_parameters)
2011
- id_not_pend = id_set.difference(all_pending)
2597
+ def check_parameters_exist(self, ids: Sequence[int]) -> Iterator[bool]:
2598
+ """
2599
+ For each parameter ID, return True if it exists, else False.
2600
+ """
2012
2601
  id_miss = set()
2013
- if id_not_pend:
2014
- all_id_pers = self._get_persistent_parameter_IDs()
2015
- id_miss = id_not_pend.difference(all_id_pers)
2602
+ if id_not_pend := set(ids).difference(self._pending.add_parameters):
2603
+ id_miss = id_not_pend.difference(self._get_persistent_parameter_IDs())
2604
+ return (id_ not in id_miss for id_ in ids)
2605
+
2606
+ @abstractmethod
2607
+ def _append_tasks(self, tasks: Iterable[AnySTask]) -> None:
2608
+ ...
2609
+
2610
+ @abstractmethod
2611
+ def _append_loops(self, loops: dict[int, LoopDescriptor]) -> None:
2612
+ ...
2613
+
2614
+ @abstractmethod
2615
+ def _append_submissions(self, subs: dict[int, Mapping[str, JSONed]]) -> None:
2616
+ ...
2617
+
2618
+ @abstractmethod
2619
+ def _update_at_submit_metadata(
2620
+ self, at_submit_metadata: dict[int, dict[str, Any]]
2621
+ ) -> None:
2622
+ ...
2623
+
2624
+ @abstractmethod
2625
+ def _append_elements(self, elems: Sequence[AnySElement]) -> None:
2626
+ ...
2627
+
2628
+ @abstractmethod
2629
+ def _append_element_sets(self, task_id: int, es_js: Sequence[Mapping]) -> None:
2630
+ ...
2631
+
2632
+ @abstractmethod
2633
+ def _append_elem_iter_IDs(self, elem_ID: int, iter_IDs: Iterable[int]) -> None:
2634
+ ...
2635
+
2636
+ @abstractmethod
2637
+ def _append_elem_iters(self, iters: Sequence[AnySElementIter]) -> None:
2638
+ ...
2639
+
2640
+ @abstractmethod
2641
+ def _append_elem_iter_EAR_IDs(
2642
+ self, iter_ID: int, act_idx: int, EAR_IDs: Sequence[int]
2643
+ ) -> None:
2644
+ ...
2645
+
2646
+ @abstractmethod
2647
+ def _append_EARs(self, EARs: Sequence[AnySEAR]) -> None:
2648
+ ...
2016
2649
 
2017
- return [False if i in id_miss else True for i in id_lst]
2650
+ @abstractmethod
2651
+ def _update_elem_iter_EARs_initialised(self, iter_ID: int) -> None:
2652
+ ...
2653
+
2654
+ @abstractmethod
2655
+ def _update_EAR_submission_data(self, sub_data: Mapping[int, tuple[int, int | None]]):
2656
+ ...
2657
+
2658
+ @abstractmethod
2659
+ def _update_EAR_start(
2660
+ self,
2661
+ run_starts: dict[int, tuple[datetime, dict[str, Any] | None, str, int | None]],
2662
+ ) -> None:
2663
+ ...
2664
+
2665
+ @abstractmethod
2666
+ def _update_EAR_end(
2667
+ self, run_ends: dict[int, tuple[datetime, dict[str, Any] | None, int, bool]]
2668
+ ) -> None:
2669
+ ...
2670
+
2671
+ @abstractmethod
2672
+ def _update_EAR_skip(self, skips: dict[int, int]) -> None:
2673
+ ...
2674
+
2675
+ @abstractmethod
2676
+ def _update_js_metadata(self, js_meta: dict[int, dict[int, dict[str, Any]]]) -> None:
2677
+ ...
2678
+
2679
+ @abstractmethod
2680
+ def _append_parameters(self, params: Sequence[AnySParameter]) -> None:
2681
+ ...
2682
+
2683
+ @abstractmethod
2684
+ def _update_template_components(self, tc: dict[str, Any]) -> None:
2685
+ ...
2686
+
2687
+ @abstractmethod
2688
+ def _update_parameter_sources(self, sources: Mapping[int, ParamSource]) -> None:
2689
+ ...
2690
+
2691
+ @abstractmethod
2692
+ def _update_loop_index(self, loop_indices: dict[int, dict[str, int]]) -> None:
2693
+ ...
2694
+
2695
+ @abstractmethod
2696
+ def _update_loop_num_iters(
2697
+ self, index: int, num_iters: list[list[list[int] | int]]
2698
+ ) -> None:
2699
+ ...
2700
+
2701
+ @abstractmethod
2702
+ def _update_loop_parents(self, index: int, parents: list[str]) -> None:
2703
+ ...
2704
+
2705
+ @overload
2706
+ def using_resource(
2707
+ self, res_label: Literal["metadata"], action: str
2708
+ ) -> AbstractContextManager[Metadata]:
2709
+ ...
2710
+
2711
+ @overload
2712
+ def using_resource(
2713
+ self, res_label: Literal["submissions"], action: str
2714
+ ) -> AbstractContextManager[list[dict[str, JSONed]]]:
2715
+ ...
2716
+
2717
+ @overload
2718
+ def using_resource(
2719
+ self, res_label: Literal["parameters"], action: str
2720
+ ) -> AbstractContextManager[dict[str, dict[str, Any]]]:
2721
+ ...
2722
+
2723
+ @overload
2724
+ def using_resource(
2725
+ self, res_label: Literal["runs"], action: str
2726
+ ) -> AbstractContextManager[dict[str, Any]]:
2727
+ ...
2728
+
2729
+ @overload
2730
+ def using_resource(
2731
+ self, res_label: Literal["attrs"], action: str
2732
+ ) -> AbstractContextManager[ZarrAttrsDict]:
2733
+ ...
2018
2734
 
2019
2735
  @contextlib.contextmanager
2020
- def using_resource(self, res_label, action):
2736
+ def using_resource(
2737
+ self,
2738
+ res_label: Literal["metadata", "submissions", "parameters", "attrs", "runs"],
2739
+ action: str,
2740
+ ) -> Iterator[Any]:
2021
2741
  """Context manager for managing `StoreResource` objects associated with the store."""
2022
2742
 
2023
2743
  try:
@@ -2048,12 +2768,13 @@ class PersistentStore(ABC):
2048
2768
  res.close(action)
2049
2769
  self._resources_in_use.remove(key)
2050
2770
 
2051
- def copy(self, path=None) -> str:
2771
+ def copy(self, path: PathLike = None) -> Path:
2052
2772
  """Copy the workflow store.
2053
2773
 
2054
2774
  This does not work on remote filesystems.
2055
2775
 
2056
2776
  """
2777
+ assert self.fs is not None
2057
2778
  if path is None:
2058
2779
  _path = Path(self.path)
2059
2780
  path = _path.parent / Path(_path.stem + "_copy" + _path.suffix)
@@ -2065,9 +2786,7 @@ class PersistentStore(ABC):
2065
2786
 
2066
2787
  self.fs.copy(self.path, path)
2067
2788
 
2068
- new_fs_path = self.workflow.fs_path.replace(self.path, path)
2069
-
2070
- return new_fs_path
2789
+ return Path(self.workflow._store.path).replace(path)
2071
2790
 
2072
2791
  def delete(self) -> None:
2073
2792
  """Delete the persistent workflow."""
@@ -2080,9 +2799,44 @@ class PersistentStore(ABC):
2080
2799
  def delete_no_confirm(self) -> None:
2081
2800
  """Permanently delete the workflow data with no confirmation."""
2082
2801
 
2083
- @self.app.perm_error_retry()
2802
+ fs = self.fs
2803
+ assert fs is not None
2804
+
2805
+ @self._app.perm_error_retry()
2084
2806
  def _delete_no_confirm() -> None:
2085
2807
  self.logger.debug(f"_delete_no_confirm: {self.path!r}.")
2086
- self.fs.rm(self.path, recursive=True)
2808
+ fs.rm(self.path, recursive=True)
2087
2809
 
2088
2810
  return _delete_no_confirm()
2811
+
2812
+ def get_text_file(self, path: str | Path) -> str:
2813
+ """Retrieve the contents of a text file stored within the workflow.
2814
+
2815
+ Parameters
2816
+ ----------
2817
+ path
2818
+ The path to a text file stored within the workflow. This can either be an
2819
+ absolute path or a path that is relative to the workflow root.
2820
+ """
2821
+ path = Path(path)
2822
+ if not path.is_absolute():
2823
+ path = Path(self.path).joinpath(path)
2824
+ if not path.is_file():
2825
+ raise FileNotFoundError(f"File at location {path!r} does not exist.")
2826
+ return path.read_text()
2827
+
2828
+ @abstractmethod
2829
+ def _append_task_element_IDs(self, task_ID: int, elem_IDs: list[int]):
2830
+ raise NotImplementedError
2831
+
2832
+ @abstractmethod
2833
+ def _set_run_dirs(self, run_dir_arr: np.ndarray, run_idx: np.ndarray) -> None:
2834
+ ...
2835
+
2836
+ @abstractmethod
2837
+ def _update_iter_data_indices(self, iter_data_indices: dict[int, DataIndex]) -> None:
2838
+ ...
2839
+
2840
+ @abstractmethod
2841
+ def _update_run_data_indices(self, run_data_indices: dict[int, DataIndex]) -> None:
2842
+ ...