hpcflow-new2 0.2.0a50__py3-none-any.whl → 0.2.0a52__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. hpcflow/_version.py +1 -1
  2. hpcflow/sdk/__init__.py +1 -1
  3. hpcflow/sdk/api.py +1 -1
  4. hpcflow/sdk/app.py +20 -11
  5. hpcflow/sdk/cli.py +34 -59
  6. hpcflow/sdk/core/__init__.py +13 -1
  7. hpcflow/sdk/core/actions.py +235 -126
  8. hpcflow/sdk/core/command_files.py +32 -24
  9. hpcflow/sdk/core/element.py +110 -114
  10. hpcflow/sdk/core/errors.py +57 -0
  11. hpcflow/sdk/core/loop.py +18 -34
  12. hpcflow/sdk/core/parameters.py +5 -3
  13. hpcflow/sdk/core/task.py +135 -131
  14. hpcflow/sdk/core/task_schema.py +11 -4
  15. hpcflow/sdk/core/utils.py +110 -2
  16. hpcflow/sdk/core/workflow.py +964 -676
  17. hpcflow/sdk/data/template_components/environments.yaml +0 -44
  18. hpcflow/sdk/data/template_components/task_schemas.yaml +52 -10
  19. hpcflow/sdk/persistence/__init__.py +21 -33
  20. hpcflow/sdk/persistence/base.py +1340 -458
  21. hpcflow/sdk/persistence/json.py +424 -546
  22. hpcflow/sdk/persistence/pending.py +563 -0
  23. hpcflow/sdk/persistence/store_resource.py +131 -0
  24. hpcflow/sdk/persistence/utils.py +57 -0
  25. hpcflow/sdk/persistence/zarr.py +852 -841
  26. hpcflow/sdk/submission/jobscript.py +133 -112
  27. hpcflow/sdk/submission/shells/bash.py +62 -16
  28. hpcflow/sdk/submission/shells/powershell.py +87 -16
  29. hpcflow/sdk/submission/submission.py +59 -35
  30. hpcflow/tests/unit/test_element.py +4 -9
  31. hpcflow/tests/unit/test_persistence.py +218 -0
  32. hpcflow/tests/unit/test_task.py +11 -12
  33. hpcflow/tests/unit/test_utils.py +82 -0
  34. hpcflow/tests/unit/test_workflow.py +3 -1
  35. {hpcflow_new2-0.2.0a50.dist-info → hpcflow_new2-0.2.0a52.dist-info}/METADATA +3 -1
  36. {hpcflow_new2-0.2.0a50.dist-info → hpcflow_new2-0.2.0a52.dist-info}/RECORD +38 -34
  37. {hpcflow_new2-0.2.0a50.dist-info → hpcflow_new2-0.2.0a52.dist-info}/WHEEL +0 -0
  38. {hpcflow_new2-0.2.0a50.dist-info → hpcflow_new2-0.2.0a52.dist-info}/entry_points.txt +0 -0
@@ -1,21 +1,31 @@
1
+ # Store* classes represent the element-metadata in the store, in a store-agnostic way
1
2
  from __future__ import annotations
2
- from abc import ABC, abstractmethod
3
- from contextlib import contextmanager
3
+ from abc import ABC
4
+
5
+ import contextlib
4
6
  import copy
5
- from dataclasses import dataclass
6
- from datetime import datetime
7
+ from dataclasses import dataclass, field
8
+ from datetime import datetime, timezone
9
+ from pathlib import Path
7
10
  import shutil
8
11
  import time
9
- from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
10
- from pathlib import Path
11
-
12
- from reretry import retry
13
- from hpcflow.sdk import app
14
- from hpcflow.sdk.core.errors import WorkflowNotFoundError
15
- from hpcflow.sdk.core.parameters import ParameterValue
16
- from hpcflow.sdk.core.utils import get_in_container, get_relative_path, set_in_container
17
- from hpcflow.sdk.typing import PathLike
12
+ from typing import Any, Dict, Iterable, List, Optional, Tuple, TypeVar, Union
13
+
14
+ from hpcflow.sdk.core.utils import (
15
+ flatten,
16
+ get_in_container,
17
+ get_relative_path,
18
+ reshape,
19
+ set_in_container,
20
+ JSONLikeDirSnapShot,
21
+ )
22
+ from hpcflow.sdk.persistence.pending import PendingChanges
18
23
 
24
+ AnySTask = TypeVar("AnySTask", bound="StoreTask")
25
+ AnySElement = TypeVar("AnySElement", bound="StoreElement")
26
+ AnySElementIter = TypeVar("AnySElementIter", bound="StoreElementIter")
27
+ AnySEAR = TypeVar("AnySEAR", bound="StoreEAR")
28
+ AnySParameter = TypeVar("AnySParameter", bound="StoreParameter")
19
29
 
20
30
  PRIMITIVES = (
21
31
  int,
@@ -24,48 +34,24 @@ PRIMITIVES = (
24
34
  type(None),
25
35
  )
26
36
 
27
-
28
- def dropbox_retry_fail(err: Exception) -> None:
29
- # TODO: this should log instead of printing!
30
- print("retrying...")
31
-
32
-
33
- # TODO: maybe this is only an issue on Windows?
34
- dropbox_permission_err_retry = retry(
35
- (PermissionError, OSError),
36
- tries=10,
37
- delay=1,
38
- backoff=2,
39
- fail_callback=dropbox_retry_fail,
37
+ TEMPLATE_COMP_TYPES = (
38
+ "parameters",
39
+ "command_files",
40
+ "environments",
41
+ "task_schemas",
40
42
  )
41
43
 
42
44
 
43
- @dropbox_permission_err_retry
44
- def remove_dir(dir_path: Path) -> None:
45
- """Try very hard to delete a directory.
46
-
47
- Dropbox (on Windows, at least) seems to try to re-sync files if the parent directory
48
- is deleted soon after creation, which is the case on a failed workflow creation (e.g.
49
- missing inputs), so in addition to catching PermissionErrors generated when
50
- Dropbox has a lock on files, we repeatedly try deleting the directory tree.
51
-
52
- """
53
- while dir_path.is_dir():
54
- shutil.rmtree(dir_path)
55
- time.sleep(0.5)
56
-
57
-
58
- @dropbox_permission_err_retry
59
- def rename_dir(replaced_dir, original_dir) -> None:
60
- replaced_dir.rename(original_dir)
61
-
62
-
63
45
  @dataclass
64
46
  class PersistentStoreFeatures:
65
47
  """Class to represent the features provided by a persistent store.
66
48
 
67
49
  Parameters
68
50
  ----------
51
+ create
52
+ If True, a new workflow can be created using this store.
53
+ edit
54
+ If True, the workflow can be modified.
69
55
  jobscript_parallelism
70
56
  If True, the store supports workflows running multiple independent jobscripts
71
57
  simultaneously.
@@ -79,323 +65,374 @@ class PersistentStoreFeatures:
79
65
  submission.
80
66
  """
81
67
 
68
+ create: bool = False
69
+ edit: bool = False
82
70
  jobscript_parallelism: bool = False
83
71
  EAR_parallelism: bool = False
84
72
  schedulers: bool = False
85
73
  submission: bool = False
86
74
 
87
75
 
88
- class PersistentStore(ABC):
89
- _parameter_encoders = {}
90
- _parameter_decoders = {}
91
- _features = None
92
-
93
- def __init__(self, workflow: app.Workflow) -> None:
94
- self._workflow = workflow
95
- self._pending = self._get_pending_dct()
96
- if not self.exists():
97
- raise WorkflowNotFoundError(
98
- f"No workflow found at path: {self.workflow_path}"
99
- )
100
-
101
- @property
102
- def features(self) -> PersistentStoreFeatures:
103
- return self._features
104
-
105
- @property
106
- def store_name(self) -> str:
107
- return self._name
108
-
109
- @property
110
- def workflow(self) -> app.Workflow:
111
- return self._workflow
112
-
113
- @property
114
- def workflow_path(self) -> Path:
115
- return self.workflow.path
116
-
117
- @property
118
- def has_pending(self) -> bool:
119
- """Returns True if there are pending changes that are not yet committed."""
120
- return any(bool(v) for v in self._pending.values())
121
-
122
- def _get_pending_dct(self) -> Dict:
123
- return {
124
- "tasks": {}, # keys are new task indices
125
- "loops": [],
126
- "submissions": [],
127
- "submission_attempts": {}, # keys are submission indices, values are list of jobscript indices
128
- "jobscript_version_info": {}, # keys are submission indices, values are dicts with jobscript index keys
129
- "jobscript_submit_times": {}, # keys are submission indices, values are dicts with jobscript index keys
130
- "jobscript_job_IDs": {}, # keys are submission indices, values are dicts with jobscript index keys
131
- "loops_added_iters": {}, # keys are loop indices, values are num added iterations
132
- "template_tasks": {}, # keys are new task indices
133
- "template_loops": [],
134
- "template_components": {},
135
- "element_sets": {}, # keys are task indices
136
- "element_iterations": {}, # keys are (task index, task insert ID)
137
- "element_iterations_idx": {}, # keys are (task index, task insert ID), then element_idx
138
- "elements": {}, # keys are (task index, task insert ID)
139
- "EARs": {}, # keys are (task index, task insert ID, element_iter idx)
140
- "loop_idx": {}, # keys are (task index, task insert ID, element iteration index)
141
- "parameter_data": {}, # keys are parameter indices
142
- "parameter_sources": {}, # keys are parameter indices
143
- "parameter_source_updates": {}, # keys are parameter indices
144
- "remove_replaced_dir_record": False,
145
- "EAR_submission_idx": {}, # keys are (task insert ID, element_iter idx, action idx, run idx)
146
- "EAR_start_times": {}, # keys are (task insert ID, element_iter idx, action idx, run idx)
147
- "EAR_end_times": {}, # keys are (task insert ID, element_iter idx, action idx, run idx)
148
- }
149
-
150
- def reject_pending(self) -> None:
151
- self.clear_pending()
152
-
153
- def clear_pending(self) -> None:
154
- self._pending = self._get_pending_dct()
155
-
156
- def save(self) -> None:
157
- if not self.workflow._in_batch_mode:
158
- self.commit_pending()
159
-
160
- @contextmanager
161
- def cached_load(self) -> Iterator[None]:
162
- """Override this if a more performant implementation, is possible.
163
-
164
- For example, in a JSON persistent store, we need to load the whole document from
165
- disk to read anything from it, so we can temporarily cache the document if we know
166
- we will be making multiple reads."""
76
+ @dataclass
77
+ class StoreTask:
78
+ id_: int
79
+ index: int
80
+ is_pending: bool
81
+ element_IDs: List[int]
82
+ task_template: Optional[Dict] = None
83
+
84
+ def encode(self) -> Tuple[int, Dict, Dict]:
85
+ """Prepare store task data for the persistent store."""
86
+ wk_task = {"id_": self.id_, "element_IDs": self.element_IDs}
87
+ task = {"id_": self.id_, **self.task_template}
88
+ return self.index, wk_task, task
167
89
 
168
- yield
90
+ @classmethod
91
+ def decode(cls, task_dat: Dict) -> StoreTask:
92
+ """Initialise a `StoreTask` from store task data
169
93
 
170
- def get_task_elements_islice(
171
- self,
172
- task_idx: int,
173
- task_insert_ID: int,
174
- selection: Union[int, slice],
175
- ) -> Iterator[Dict]:
176
- """Override this for a more performant implementation."""
177
- for idx in range(selection.start, selection.stop, selection.step):
178
- yield self.get_task_elements(
179
- task_idx, task_insert_ID, slice(idx, idx + 1, 1)
180
- )[0]
94
+ Note: the `task_template` is only needed for encoding because it is retrieved as
95
+ part of the `WorkflowTemplate` so we don't need to load it when decoding.
181
96
 
182
- def delete(self) -> None:
183
- """Delete the persistent workflow."""
184
- confirm = input(
185
- f"Permanently delete the workflow at path {self.workflow.path}; "
186
- f"[y]es or [n]o?"
97
+ """
98
+ return cls(is_pending=False, **task_dat)
99
+
100
+ def append_element_IDs(self: AnySTask, pend_IDs: List[int]) -> AnySTask:
101
+ """Return a copy, with additional element IDs."""
102
+ elem_IDs = self.element_IDs[:] + pend_IDs
103
+ return self.__class__(
104
+ id_=self.id_,
105
+ index=self.index,
106
+ is_pending=self.is_pending,
107
+ element_IDs=elem_IDs,
108
+ task_template=self.task_template,
187
109
  )
188
- if confirm.strip().lower() == "y":
189
- self.delete_no_confirm()
190
110
 
191
- def delete_no_confirm(self) -> None:
192
- """Permanently delete the workflow data with no confirmation."""
193
- remove_dir(self.workflow.path)
194
-
195
- def _merge_pending_template_components(self, template_components: Dict) -> bool:
196
- # assumes we have already checked for duplicates when adding to pending:
197
- is_modified = False
198
- for name, dat in self._pending["template_components"].items():
199
- if name not in template_components:
200
- template_components[name] = {}
201
- for k, v in dat.items():
202
- template_components[name][k] = v
203
- is_modified = True
204
- return is_modified
205
111
 
206
- def get_template_components(self) -> Dict:
207
- """Get all template components, including pending."""
208
- tc = self._get_persistent_template_components()
209
- if self._pending["template_components"]:
210
- tc = copy.deepcopy(tc)
211
- self._merge_pending_template_components(tc)
212
- return tc
213
-
214
- def add_template_components(self, template_components: Dict) -> None:
215
- ptc = self._get_persistent_template_components()
216
- pending = self._pending["template_components"]
217
-
218
- for name, dat in template_components.items():
219
- if name in ptc and name in pending:
220
- for hash, dat_i in dat.items():
221
- if hash not in ptc[name] and hash not in pending[name]:
222
- pending[name][hash] = dat_i
112
+ @dataclass
113
+ class StoreElement:
114
+ """
115
+ Parameters
116
+ ----------
117
+ index
118
+ Index of the element within its parent task.
119
+ iteration_IDs
120
+ IDs of element-iterations that belong to this element.
121
+ """
223
122
 
224
- elif name in pending:
225
- for hash, dat_i in dat.items():
226
- if hash not in pending[name]:
227
- pending[name][hash] = dat_i
123
+ id_: int
124
+ is_pending: bool
125
+ index: int
126
+ es_idx: int
127
+ seq_idx: Dict[str, int]
128
+ src_idx: Dict[str, int]
129
+ task_ID: int
130
+ iteration_IDs: List[int]
131
+
132
+ def encode(self) -> Dict:
133
+ """Prepare store element data for the persistent store."""
134
+ dct = self.__dict__
135
+ del dct["is_pending"]
136
+ return dct
228
137
 
229
- else:
230
- pending[name] = dat
138
+ @classmethod
139
+ def decode(cls, elem_dat: Dict) -> StoreElement:
140
+ """Initialise a `StoreElement` from store element data"""
141
+ return cls(is_pending=False, **elem_dat)
231
142
 
232
- self.save()
143
+ def to_dict(self, iters):
144
+ """Prepare data for the user-facing `Element` object."""
145
+ return {
146
+ "id_": self.id_,
147
+ "is_pending": self.is_pending,
148
+ "index": self.index,
149
+ "es_idx": self.es_idx,
150
+ "seq_idx": self.seq_idx,
151
+ "src_idx": self.src_idx,
152
+ "iteration_IDs": self.iteration_IDs,
153
+ "task_ID": self.task_ID,
154
+ "iterations": iters,
155
+ }
233
156
 
234
- def add_empty_task(self, task_idx: int, task_js: Dict) -> None:
235
- self._pending["template_tasks"][task_idx] = task_js
236
- self._pending["tasks"][task_idx] = {"elements": [], "element_iterations": []}
237
- self.save()
157
+ def append_iteration_IDs(self: AnySElement, pend_IDs: List[int]) -> AnySElement:
158
+ """Return a copy, with additional iteration IDs."""
159
+ iter_IDs = self.iteration_IDs[:] + pend_IDs
160
+ return self.__class__(
161
+ id_=self.id_,
162
+ is_pending=self.is_pending,
163
+ index=self.index,
164
+ es_idx=self.es_idx,
165
+ seq_idx=self.seq_idx,
166
+ src_idx=self.src_idx,
167
+ task_ID=self.task_ID,
168
+ iteration_IDs=iter_IDs,
169
+ )
238
170
 
239
- def add_element_set(self, task_idx: int, element_set_js: Dict) -> None:
240
- if task_idx not in self._pending["element_sets"]:
241
- self._pending["element_sets"][task_idx] = []
242
- self._pending["element_sets"][task_idx].append(element_set_js)
243
- self.save()
244
171
 
245
- def add_elements(
246
- self,
247
- task_idx: int,
248
- task_insert_ID: int,
249
- elements: List[Dict],
250
- element_iterations: List[Dict],
251
- ) -> None:
252
- key = (task_idx, task_insert_ID)
253
- if key not in self._pending["elements"]:
254
- self._pending["elements"][key] = []
255
- if key not in self._pending["element_iterations"]:
256
- self._pending["element_iterations"][key] = []
257
- self._pending["elements"][key].extend(elements)
258
- self._pending["element_iterations"][key].extend(element_iterations)
259
- self.save()
260
-
261
- def add_element_iterations(
262
- self,
263
- task_idx: int,
264
- task_insert_ID: int,
265
- element_iterations: List[Dict],
266
- element_iters_idx: Dict[int, List[int]],
267
- ) -> None:
268
- key = (task_idx, task_insert_ID)
269
- if key not in self._pending["element_iterations"]:
270
- self._pending["element_iterations"][key] = []
271
- if key not in self._pending["element_iterations_idx"]:
272
- self._pending["element_iterations_idx"][key] = {}
172
+ @dataclass
173
+ class StoreElementIter:
174
+ """
175
+ Parameters
176
+ ----------
177
+ data_idx
178
+ Overall data index for the element-iteration, which maps parameter names to
179
+ parameter data indices.
180
+ EAR_IDs
181
+ Maps task schema action indices to EARs by ID.
182
+ schema_parameters
183
+ List of parameters defined by the associated task schema.
184
+ """
273
185
 
274
- self._pending["element_iterations"][key].extend(element_iterations)
186
+ id_: int
187
+ is_pending: bool
188
+ element_ID: int
189
+ EAR_IDs: Dict[int, List[int]]
190
+ data_idx: Dict[str, int]
191
+ schema_parameters: List[str]
192
+ loop_idx: Dict[str, int] = field(default_factory=dict)
275
193
 
276
- for elem_idx, iters_idx in element_iters_idx.items():
277
- if elem_idx not in self._pending["element_iterations_idx"][key]:
278
- self._pending["element_iterations_idx"][key][elem_idx] = []
279
- self._pending["element_iterations_idx"][key][elem_idx].extend(iters_idx)
194
+ def encode(self) -> Dict:
195
+ """Prepare store element iteration data for the persistent store."""
196
+ dct = self.__dict__
197
+ del dct["is_pending"]
198
+ return dct
280
199
 
281
- self.save()
200
+ @classmethod
201
+ def decode(cls, iter_dat: Dict) -> StoreElementIter:
202
+ """Initialise a `StoreElementIter` from persistent store element iteration data"""
282
203
 
283
- def update_loop_num_added_iters(self, loop_idx: int, num_added_iters: int):
284
- self._pending["loops_added_iters"][loop_idx] = num_added_iters
285
- self.save()
204
+ iter_dat = copy.deepcopy(iter_dat) # to avoid mutating; can we avoid this?
286
205
 
287
- def add_EARs(
288
- self,
289
- task_idx: int,
290
- task_insert_ID: int,
291
- element_iter_idx: int,
292
- EARs: Dict,
293
- param_src_updates: Dict,
294
- ) -> None:
295
- key = (task_idx, task_insert_ID, element_iter_idx)
296
- if key not in self._pending["EARs"]:
297
- self._pending["EARs"][key] = {}
298
- self._pending["EARs"][key].update(EARs)
299
- self._pending["parameter_source_updates"].update(param_src_updates)
300
- self.save()
206
+ # cast JSON string keys to integers:
207
+ for act_idx in list((iter_dat["EAR_IDs"] or {}).keys()):
208
+ iter_dat["EAR_IDs"][int(act_idx)] = iter_dat["EAR_IDs"].pop(act_idx)
301
209
 
302
- def add_loop(
303
- self,
304
- task_indices: List[int],
305
- loop_js: Dict,
306
- iterable_parameters: Dict[str:Dict],
307
- ) -> None:
308
- """Initialise the zeroth iterations of a named loop across the specified task
309
- subset.
210
+ return cls(is_pending=False, **iter_dat)
310
211
 
311
- Parameters
312
- ----------
313
- task_indices
314
- List of task indices that identifies the task subset over which the new loop
315
- should iterate.
212
+ def to_dict(self, EARs):
213
+ """Prepare data for the user-facing `ElementIteration` object."""
214
+ return {
215
+ "id_": self.id_,
216
+ "is_pending": self.is_pending,
217
+ "element_ID": self.element_ID,
218
+ "EAR_IDs": self.EAR_IDs,
219
+ "data_idx": self.data_idx,
220
+ "schema_parameters": self.schema_parameters,
221
+ "EARs": EARs,
222
+ "loop_idx": self.loop_idx,
223
+ }
316
224
 
317
- """
318
- self._pending["template_loops"].append(loop_js)
319
- self._pending["loops"].append(
320
- {
321
- "num_added_iterations": 1,
322
- "iterable_parameters": iterable_parameters,
323
- }
225
+ def append_EAR_IDs(
226
+ self: AnySElementIter, pend_IDs: Dict[int, List[int]]
227
+ ) -> AnySElementIter:
228
+ """Return a copy, with additional EAR IDs."""
229
+
230
+ EAR_IDs = copy.deepcopy(self.EAR_IDs) or {}
231
+ for act_idx, IDs_i in pend_IDs.items():
232
+ if act_idx not in EAR_IDs:
233
+ EAR_IDs[act_idx] = []
234
+ EAR_IDs[act_idx].extend(IDs_i)
235
+
236
+ return self.__class__(
237
+ id_=self.id_,
238
+ is_pending=self.is_pending,
239
+ element_ID=self.element_ID,
240
+ EAR_IDs=EAR_IDs,
241
+ data_idx=self.data_idx,
242
+ schema_parameters=self.schema_parameters,
243
+ loop_idx=self.loop_idx,
324
244
  )
325
245
 
326
- for task_idx, task_insert_ID in zip(task_indices, loop_js["task_insert_IDs"]):
327
- all_elements = slice(0, self.workflow.tasks[task_idx].num_elements, 1)
328
- self._init_task_loop(
329
- task_idx=task_idx,
330
- task_insert_ID=task_insert_ID,
331
- name=loop_js["name"],
332
- element_sel=all_elements,
333
- )
246
+ def update_loop_idx(
247
+ self: AnySElementIter, loop_idx: Dict[str, int]
248
+ ) -> AnySElementIter:
249
+ """Return a copy, with the loop index updated."""
250
+ loop_idx_new = copy.deepcopy(self.loop_idx)
251
+ loop_idx_new.update(loop_idx)
252
+ return self.__class__(
253
+ id_=self.id_,
254
+ is_pending=self.is_pending,
255
+ element_ID=self.element_ID,
256
+ EAR_IDs=self.EAR_IDs,
257
+ data_idx=self.data_idx,
258
+ schema_parameters=self.schema_parameters,
259
+ loop_idx=loop_idx_new,
260
+ )
334
261
 
335
- self.save()
336
262
 
337
- def add_submission(self, submission_js: Dict):
338
- """Add a new submission to the workflow."""
339
- self._pending["submissions"].append(submission_js)
340
- self.save()
263
+ @dataclass
264
+ class StoreEAR:
265
+ """
266
+ Parameters
267
+ ----------
268
+ data_idx
269
+ Maps parameter names within this EAR to parameter data indices.
270
+ metadata
271
+ Metadata concerning e.g. the state of the EAR.
272
+ action_idx
273
+ The task schema action associated with this EAR.
274
+ """
341
275
 
342
- def append_submission_attempt(self, sub_idx, submitted_js_idx: int) -> None:
343
- if sub_idx not in self._pending["submission_attempts"]:
344
- self._pending["submission_attempts"][sub_idx] = []
345
- self._pending["submission_attempts"][sub_idx].append(submitted_js_idx)
346
- self.save()
276
+ id_: int
277
+ is_pending: bool
278
+ elem_iter_ID: int
279
+ action_idx: int
280
+ data_idx: Dict[str, int]
281
+ submission_idx: Optional[int] = None
282
+ skip: Optional[bool] = False
283
+ success: Optional[bool] = None
284
+ start_time: Optional[datetime] = None
285
+ end_time: Optional[datetime] = None
286
+ snapshot_start: Optional[Dict] = None
287
+ snapshot_end: Optional[Dict] = None
288
+ exit_code: Optional[int] = None
289
+ metadata: Dict[str, Any] = None
290
+
291
+ @staticmethod
292
+ def _encode_datetime(dt: Union[datetime, None], ts_fmt: str) -> str:
293
+ return dt.strftime(ts_fmt) if dt else None
294
+
295
+ @staticmethod
296
+ def _decode_datetime(dt_str: Union[str, None], ts_fmt: str) -> datetime:
297
+ return datetime.strptime(dt_str, ts_fmt) if dt_str else None
298
+
299
+ def encode(self, ts_fmt: str) -> Dict:
300
+ """Prepare store EAR data for the persistent store."""
301
+ return {
302
+ "id_": self.id_,
303
+ "elem_iter_ID": self.elem_iter_ID,
304
+ "action_idx": self.action_idx,
305
+ "data_idx": self.data_idx,
306
+ "submission_idx": self.submission_idx,
307
+ "success": self.success,
308
+ "skip": self.skip,
309
+ "start_time": self._encode_datetime(self.start_time, ts_fmt),
310
+ "end_time": self._encode_datetime(self.end_time, ts_fmt),
311
+ "snapshot_start": self.snapshot_start,
312
+ "snapshot_end": self.snapshot_end,
313
+ "exit_code": self.exit_code,
314
+ "metadata": self.metadata,
315
+ }
347
316
 
348
- def set_jobscript_submit_time(
349
- self, sub_idx: int, js_idx: int, submit_time: datetime
350
- ) -> None:
351
- if sub_idx not in self._pending["jobscript_submit_times"]:
352
- self._pending["jobscript_submit_times"][sub_idx] = {}
353
- self._pending["jobscript_submit_times"][sub_idx][js_idx] = submit_time
354
- self.save()
317
+ @classmethod
318
+ def decode(cls, EAR_dat: Dict, ts_fmt: str) -> StoreEAR:
319
+ """Initialise a `StoreEAR` from persistent store EAR data"""
320
+ # don't want to mutate EAR_dat:
321
+ EAR_dat = copy.deepcopy(EAR_dat)
322
+ EAR_dat["start_time"] = cls._decode_datetime(EAR_dat["start_time"], ts_fmt)
323
+ EAR_dat["end_time"] = cls._decode_datetime(EAR_dat["end_time"], ts_fmt)
324
+ return cls(is_pending=False, **EAR_dat)
325
+
326
+ def to_dict(self) -> Dict:
327
+ """Prepare data for the user-facing `ElementActionRun` object."""
328
+
329
+ def _process_datetime(dt: datetime) -> datetime:
330
+ """We store datetime objects implicitly in UTC, so we need to first make
331
+ that explicit, and then convert to the local time zone."""
332
+ return dt.replace(tzinfo=timezone.utc).astimezone() if dt else None
355
333
 
356
- def set_jobscript_job_ID(self, sub_idx: int, js_idx: int, job_ID: int) -> None:
357
- if sub_idx not in self._pending["jobscript_job_IDs"]:
358
- self._pending["jobscript_job_IDs"][sub_idx] = {}
359
- self._pending["jobscript_job_IDs"][sub_idx][js_idx] = job_ID
360
- self.save()
334
+ return {
335
+ "id_": self.id_,
336
+ "is_pending": self.is_pending,
337
+ "elem_iter_ID": self.elem_iter_ID,
338
+ "action_idx": self.action_idx,
339
+ "data_idx": self.data_idx,
340
+ "submission_idx": self.submission_idx,
341
+ "success": self.success,
342
+ "skip": self.skip,
343
+ "start_time": _process_datetime(self.start_time),
344
+ "end_time": _process_datetime(self.end_time),
345
+ "snapshot_start": self.snapshot_start,
346
+ "snapshot_end": self.snapshot_end,
347
+ "exit_code": self.exit_code,
348
+ "metadata": self.metadata,
349
+ }
361
350
 
362
- def set_jobscript_version_info(
351
+ def update(
363
352
  self,
364
- sub_idx: int,
365
- js_idx: int,
366
- vers_info: Tuple,
367
- ) -> None:
368
- if sub_idx not in self._pending["jobscript_version_info"]:
369
- self._pending["jobscript_version_info"][sub_idx] = {}
370
- self._pending["jobscript_version_info"][sub_idx][js_idx] = vers_info
371
- self.save()
353
+ submission_idx: Optional[int] = None,
354
+ skip: Optional[bool] = None,
355
+ success: Optional[bool] = None,
356
+ start_time: Optional[datetime] = None,
357
+ end_time: Optional[datetime] = None,
358
+ snapshot_start: Optional[Dict] = None,
359
+ snapshot_end: Optional[Dict] = None,
360
+ exit_code: Optional[int] = None,
361
+ ) -> AnySEAR:
362
+ """Return a shallow copy, with specified data updated."""
363
+
364
+ sub_idx = submission_idx if submission_idx is not None else self.submission_idx
365
+ skip = skip if skip is not None else self.skip
366
+ success = success if success is not None else self.success
367
+ start_time = start_time if start_time is not None else self.start_time
368
+ end_time = end_time if end_time is not None else self.end_time
369
+ snap_s = snapshot_start if snapshot_start is not None else self.snapshot_start
370
+ snap_e = snapshot_end if snapshot_end is not None else self.snapshot_end
371
+ exit_code = exit_code if exit_code is not None else self.exit_code
372
+
373
+ return self.__class__(
374
+ id_=self.id_,
375
+ is_pending=self.is_pending,
376
+ elem_iter_ID=self.elem_iter_ID,
377
+ action_idx=self.action_idx,
378
+ data_idx=self.data_idx,
379
+ metadata=self.metadata,
380
+ submission_idx=sub_idx,
381
+ skip=skip,
382
+ success=success,
383
+ start_time=start_time,
384
+ end_time=end_time,
385
+ snapshot_start=snap_s,
386
+ snapshot_end=snap_e,
387
+ exit_code=exit_code,
388
+ )
372
389
 
373
- def add_parameter_data(self, data: Any, source: Dict) -> int:
374
- return self._add_parameter_data({"data": data}, source)
375
390
 
376
- def add_unset_parameter_data(self, source: Dict) -> int:
377
- return self._add_parameter_data(None, source)
391
+ @dataclass
392
+ class StoreParameter:
393
+ id_: int
394
+ is_pending: bool
395
+ is_set: bool
396
+ data: Any
397
+ file: Dict
398
+ source: Dict
399
+
400
+ _encoders = {}
401
+ _decoders = {}
402
+
403
+ def encode(self, **kwargs) -> Dict:
404
+ """Prepare store parameter data for the persistent store."""
405
+ if self.is_set:
406
+ if self.file:
407
+ return {"file": self.file}
408
+ else:
409
+ return self._encode(obj=self.data, **kwargs)
410
+ else:
411
+ return None
378
412
 
379
- def _encode_parameter_data(
413
+ def _encode(
380
414
  self,
381
415
  obj: Any,
382
- path: List = None,
416
+ path: Optional[List] = None,
383
417
  type_lookup: Optional[Dict] = None,
384
418
  **kwargs,
385
- ) -> Any:
419
+ ) -> Dict:
420
+ """Recursive encoder."""
421
+
386
422
  path = path or []
387
423
  if type_lookup is None:
388
424
  type_lookup = {
389
425
  "tuples": [],
390
426
  "sets": [],
391
- **{k: [] for k in self._parameter_decoders.keys()},
427
+ **{k: [] for k in self._decoders.keys()},
392
428
  }
393
429
 
394
430
  if len(path) > 50:
395
431
  raise RuntimeError("I'm in too deep!")
396
432
 
397
- if isinstance(obj, ParameterValue):
398
- encoded = self._encode_parameter_data(
433
+ if any("ParameterValue" in i.__name__ for i in obj.__class__.__mro__):
434
+ # TODO: not nice; did this to avoid circular import of `ParameterValue`
435
+ encoded = self._encode(
399
436
  obj=obj.to_dict(),
400
437
  path=path,
401
438
  type_lookup=type_lookup,
@@ -406,7 +443,7 @@ class PersistentStore(ABC):
406
443
  elif isinstance(obj, (list, tuple, set)):
407
444
  data = []
408
445
  for idx, item in enumerate(obj):
409
- encoded = self._encode_parameter_data(
446
+ encoded = self._encode(
410
447
  obj=item,
411
448
  path=path + [idx],
412
449
  type_lookup=type_lookup,
@@ -424,7 +461,7 @@ class PersistentStore(ABC):
424
461
  elif isinstance(obj, dict):
425
462
  data = {}
426
463
  for dct_key, dct_val in obj.items():
427
- encoded = self._encode_parameter_data(
464
+ encoded = self._encode(
428
465
  obj=dct_val,
429
466
  path=path + [dct_key],
430
467
  type_lookup=type_lookup,
@@ -436,8 +473,8 @@ class PersistentStore(ABC):
436
473
  elif isinstance(obj, PRIMITIVES):
437
474
  data = obj
438
475
 
439
- elif type(obj) in self._parameter_encoders:
440
- data = self._parameter_encoders[type(obj)](
476
+ elif type(obj) in self._encoders:
477
+ data = self._encoders[type(obj)](
441
478
  obj=obj,
442
479
  path=path,
443
480
  type_lookup=type_lookup,
@@ -452,14 +489,35 @@ class PersistentStore(ABC):
452
489
 
453
490
  return {"data": data, "type_lookup": type_lookup}
454
491
 
455
- def _decode_parameter_data(
456
- self,
492
+ @classmethod
493
+ def decode(
494
+ cls,
495
+ id_: int,
457
496
  data: Union[None, Dict],
497
+ source: Dict,
458
498
  path: Optional[List[str]] = None,
459
499
  **kwargs,
460
500
  ) -> Any:
461
- if data is None:
462
- return None
501
+ """Initialise from persistent store parameter data."""
502
+ if data and "file" in data:
503
+ return cls(
504
+ id_=id_,
505
+ data=None,
506
+ file=data["file"],
507
+ is_set=True,
508
+ source=source,
509
+ is_pending=False,
510
+ )
511
+ elif data is None:
512
+ # parameter is not set
513
+ return cls(
514
+ id_=id_,
515
+ data=None,
516
+ file=None,
517
+ is_set=False,
518
+ source=source,
519
+ is_pending=False,
520
+ )
463
521
 
464
522
  path = path or []
465
523
 
@@ -485,177 +543,1001 @@ class PersistentStore(ABC):
485
543
  else:
486
544
  obj = set(obj)
487
545
 
488
- for data_type in self._parameter_decoders:
489
- obj = self._parameter_decoders[data_type](
546
+ for data_type in cls._decoders:
547
+ obj = cls._decoders[data_type](
490
548
  obj=obj,
491
549
  type_lookup=data["type_lookup"],
492
550
  path=path,
493
551
  **kwargs,
494
552
  )
495
553
 
496
- return obj
554
+ return cls(
555
+ id_=id_,
556
+ data=obj,
557
+ file=None,
558
+ is_set=True,
559
+ source=source,
560
+ is_pending=False,
561
+ )
497
562
 
498
- def get_creation_info(self):
499
- """Get information about the app that created the workflow."""
500
- return self.load_metadata()["creation_info"]
563
+ def set_data(self, value: Any) -> None:
564
+ """Return a copy, with data set."""
565
+ if self.is_set:
566
+ raise RuntimeError(f"Parameter ID {self.id_!r} is already set!")
567
+ return self.__class__(
568
+ id_=self.id_,
569
+ is_set=True,
570
+ is_pending=self.is_pending,
571
+ data=value,
572
+ file=None,
573
+ source=self.source,
574
+ )
501
575
 
502
- @classmethod
503
- @abstractmethod
504
- def path_has_store(cls, path):
505
- """Is a given workflow path of this store type?"""
506
- pass
576
+ def set_file(self, value: Any) -> None:
577
+ """Return a copy, with file set."""
578
+ if self.is_set:
579
+ raise RuntimeError(f"Parameter ID {self.id_!r} is already set!")
580
+ return self.__class__(
581
+ id_=self.id_,
582
+ is_set=True,
583
+ is_pending=self.is_pending,
584
+ data=None,
585
+ file=value,
586
+ source=self.source,
587
+ )
588
+
589
+ def update_source(self, src: Dict) -> None:
590
+ """Return a copy, with updated source."""
591
+ new_src = dict(sorted({**self.source, **src}.items()))
592
+ return self.__class__(
593
+ id_=self.id_,
594
+ is_set=self.is_set,
595
+ is_pending=self.is_pending,
596
+ data=self.data,
597
+ file=self.file,
598
+ source=new_src,
599
+ )
600
+
601
+
602
+ class PersistentStore(ABC):
603
+ _store_task_cls = StoreTask
604
+ _store_elem_cls = StoreElement
605
+ _store_iter_cls = StoreElementIter
606
+ _store_EAR_cls = StoreEAR
607
+ _store_param_cls = StoreParameter
608
+
609
+ _resources = {}
610
+
611
+ def __init__(self, app, workflow, path, fs=None) -> None:
612
+ self.app = app
613
+ self.workflow = workflow
614
+ self.path = path
615
+ self.fs = fs
616
+
617
+ self._pending = PendingChanges(app=app, store=self, resource_map=self._res_map)
618
+
619
+ self._resources_in_use = set()
620
+ self._in_batch_mode = False
507
621
 
508
622
  @property
509
- @abstractmethod
510
- def store_path(self):
511
- """Get the store path, which may be the same as the workflow path."""
512
- pass
623
+ def logger(self):
624
+ return self.app.persistence_logger
513
625
 
514
- @classmethod
515
- @abstractmethod
516
- def write_empty_workflow(
517
- cls,
518
- template_js: Dict,
519
- template_components_js: Dict,
520
- workflow_path: Path,
521
- replaced_dir: Path,
522
- creation_info: Dict,
523
- ) -> None:
524
- pass
626
+ @property
627
+ def ts_fmt(self) -> str:
628
+ return r"%Y-%m-%d %H:%M:%S.%f" # TODO: self.workflow.ts_fmt
525
629
 
526
- @abstractmethod
527
- def exists(self) -> bool:
528
- pass
630
+ @property
631
+ def has_pending(self):
632
+ return bool(self._pending)
529
633
 
530
- @abstractmethod
531
- def commit_pending(self) -> None:
532
- pass
634
+ @staticmethod
635
+ def prepare_test_store_from_spec(task_spec):
636
+ """Generate a valid store from a specification in terms of nested
637
+ elements/iterations/EARs.
533
638
 
534
- @abstractmethod
535
- def _get_persistent_template_components(self) -> Dict:
536
- """Get currently persistent template components, excluding pending."""
639
+ """
640
+ tasks = []
641
+ elements = []
642
+ elem_iters = []
643
+ EARs = []
644
+
645
+ for task_idx, task_i in enumerate(task_spec):
646
+ elems_i = task_i.get("elements", [])
647
+ elem_IDs = list(range(len(elements), len(elements) + len(elems_i)))
648
+
649
+ for elem_idx, elem_j in enumerate(elems_i):
650
+ iters_j = elem_j.get("iterations", [])
651
+ iter_IDs = list(range(len(elem_iters), len(elem_iters) + len(iters_j)))
652
+
653
+ for iter_k in iters_j:
654
+ EARs_k = iter_k.get("EARs", [])
655
+ EAR_IDs = list(range(len(EARs), len(EARs) + len(EARs_k)))
656
+ EAR_IDs_dct = {0: EAR_IDs} if EAR_IDs else {}
657
+
658
+ for _ in EARs_k:
659
+ EARs.append(
660
+ dict(
661
+ id_=len(EARs),
662
+ is_pending=False,
663
+ elem_iter_ID=len(elem_iters),
664
+ action_idx=0,
665
+ data_idx={},
666
+ metadata={},
667
+ )
668
+ )
669
+
670
+ elem_iters.append(
671
+ dict(
672
+ id_=len(elem_iters),
673
+ is_pending=False,
674
+ element_ID=len(elements),
675
+ EAR_IDs=EAR_IDs_dct,
676
+ data_idx={},
677
+ schema_parameters=[],
678
+ )
679
+ )
680
+ elements.append(
681
+ dict(
682
+ id_=len(elements),
683
+ is_pending=False,
684
+ element_idx=elem_idx,
685
+ seq_idx={},
686
+ src_idx={},
687
+ task_ID=task_idx,
688
+ iteration_IDs=iter_IDs,
689
+ )
690
+ )
691
+ tasks.append(
692
+ dict(
693
+ id_=len(tasks),
694
+ is_pending=False,
695
+ element_IDs=elem_IDs,
696
+ )
697
+ )
698
+ return (tasks, elements, elem_iters, EARs)
537
699
 
538
- @abstractmethod
539
- def get_template(self) -> Dict:
540
- pass
700
+ def remove_path(self, path: str, fs) -> None:
701
+ """Try very hard to delete a directory or file.
541
702
 
542
- @abstractmethod
543
- def get_all_tasks_metadata(self) -> List[Dict]:
544
- pass
703
+ Dropbox (on Windows, at least) seems to try to re-sync files if the parent directory
704
+ is deleted soon after creation, which is the case on a failed workflow creation (e.g.
705
+ missing inputs), so in addition to catching PermissionErrors generated when
706
+ Dropbox has a lock on files, we repeatedly try deleting the directory tree.
545
707
 
546
- @abstractmethod
547
- def get_loops(self) -> List[Dict]:
548
- pass
708
+ """
549
709
 
550
- @abstractmethod
551
- def get_submissions(self) -> List[Dict]:
552
- pass
710
+ @self.app.perm_error_retry()
711
+ def _remove_path(path: str, fs) -> None:
712
+ self.logger.debug(f"_remove_path: path={path}")
713
+ while fs.exists(path):
714
+ fs.rm(path, recursive=True)
715
+ time.sleep(0.5)
553
716
 
554
- @abstractmethod
555
- def get_task_elements(
556
- self,
557
- task_idx: int,
558
- task_insert_ID: int,
559
- selection: slice,
560
- ) -> List:
561
- pass
562
-
563
- @abstractmethod
564
- def _add_parameter_data(self, data: Any, source: Dict) -> int:
565
- pass
566
-
567
- @abstractmethod
568
- def get_parameter_data(self, index: int) -> Tuple[bool, Any]:
569
- pass
570
-
571
- @abstractmethod
572
- def get_parameter_source(self, index: int) -> Dict:
573
- pass
574
-
575
- @abstractmethod
576
- def get_all_parameter_data(self) -> Dict[int, Any]:
577
- pass
578
-
579
- @abstractmethod
580
- def is_parameter_set(self, index: int) -> bool:
581
- pass
582
-
583
- @abstractmethod
584
- def set_parameter(self, index: int, data: Any) -> None:
585
- """Set the value of a pre-allocated parameter."""
586
- pass
587
-
588
- @abstractmethod
589
- def check_parameters_exist(
590
- self, indices: Union[int, List[int]]
591
- ) -> Union[bool, List[bool]]:
592
- pass
593
-
594
- @abstractmethod
595
- def _init_task_loop(
596
- self,
597
- task_idx: int,
598
- task_insert_ID: int,
599
- element_sel: slice,
600
- name: str,
601
- ) -> None:
602
- """Initialise the zeroth iteration of a named loop for a specified task."""
717
+ return _remove_path(path, fs)
603
718
 
604
- @abstractmethod
605
- def remove_replaced_dir(self) -> None:
606
- pass
719
+ def rename_path(self, replaced: str, original: str, fs) -> None:
720
+ """Revert the replaced workflow path to its original name.
607
721
 
608
- @abstractmethod
609
- def reinstate_replaced_dir(self) -> None:
610
- pass
722
+ This happens when new workflow creation fails and there is an existing workflow
723
+ with the same name; the original workflow which was renamed, must be reverted."""
611
724
 
612
- @abstractmethod
613
- def copy(self, path: PathLike = None) -> None:
614
- """Make a copy of the store."""
615
- pass
725
+ @self.app.perm_error_retry()
726
+ def _rename_path(replaced: str, original: str, fs) -> None:
727
+ self.logger.debug(f"_rename_path: {replaced!r} --> {original!r}.")
728
+ try:
729
+ fs.rename(replaced, original, recursive=True) # TODO: why need recursive?
730
+ except TypeError:
731
+ # `SFTPFileSystem.rename` has no `recursive` argument:
732
+ fs.rename(replaced, original)
616
733
 
617
- @abstractmethod
618
- def is_modified_on_disk(self) -> bool:
619
- """Check if the workflow (metadata) has been modified on disk since initial
620
- load (this is bad)."""
621
- pass
734
+ return _rename_path(replaced, original, fs)
622
735
 
623
- @abstractmethod
624
- def get_num_added_tasks(self) -> int:
625
- """Get the total number of tasks ever added to the workflow, regardless of whether
626
- any of those tasks were subsequently removed from the workflow."""
736
+ def _get_num_total_tasks(self):
737
+ """Get the total number of persistent and pending tasks."""
738
+ return self._get_num_persistent_tasks() + len(self._pending.add_tasks)
627
739
 
628
- @property
629
- def ts_fmt(self) -> str:
630
- return self.workflow.ts_fmt
740
+ def _get_num_total_loops(self):
741
+ """Get the total number of persistent and pending loops."""
742
+ return self._get_num_persistent_loops() + len(self._pending.add_loops)
743
+
744
+ def _get_num_total_submissions(self):
745
+ """Get the total number of persistent and pending submissions."""
746
+ return self._get_num_persistent_submissions() + len(self._pending.add_submissions)
747
+
748
+ def _get_num_total_elements(self):
749
+ """Get the total number of persistent and pending elements."""
750
+ return self._get_num_persistent_elements() + len(self._pending.add_elements)
751
+
752
+ def _get_num_total_elem_iters(self):
753
+ """Get the total number of persistent and pending element iterations."""
754
+ return self._get_num_persistent_elem_iters() + len(self._pending.add_elem_iters)
755
+
756
+ def _get_num_total_EARs(self):
757
+ """Get the total number of persistent and pending EARs."""
758
+ return self._get_num_persistent_EARs() + len(self._pending.add_EARs)
759
+
760
+ def _get_task_total_num_elements(self, task_ID: int):
761
+ """Get the total number of persistent and pending elements of a given task."""
762
+ return len(self.get_task(task_ID).element_IDs)
763
+
764
+ def _get_num_total_parameters(self):
765
+ """Get the total number of persistent and pending parameters."""
766
+ return self._get_num_persistent_parameters() + len(self._pending.add_parameters)
767
+
768
+ def _get_num_total_input_files(self):
769
+ """Get the total number of persistent and pending user-supplied input files."""
770
+ num_pend_inp_files = len([i for i in self._pending.add_files if i["is_input"]])
771
+ return self._get_num_persistent_input_files() + num_pend_inp_files
631
772
 
632
- def set_EAR_submission_indices(
773
+ def _get_num_total_added_tasks(self):
774
+ """Get the total number of tasks ever added to the workflow."""
775
+ return self._get_num_persistent_added_tasks() + len(self._pending.add_tasks)
776
+
777
+ def _get_num_persistent_input_files(self):
778
+ return len(list(self.workflow.input_files_path.glob("*")))
779
+
780
+ def save(self):
781
+ """Commit pending changes to disk, if not in batch-update mode."""
782
+ if not self.workflow._in_batch_mode:
783
+ self._pending.commit_all()
784
+
785
+ def add_template_components(self, temp_comps: Dict, save: bool = True) -> None:
786
+ all_tc = self.get_template_components()
787
+ for name, dat in temp_comps.items():
788
+ if name in all_tc:
789
+ for hash_i, dat_i in dat.items():
790
+ if hash_i not in all_tc[name]:
791
+ self._pending.add_template_components[name][hash_i] = dat_i
792
+ else:
793
+ self._pending.add_template_components[name] = dat
794
+
795
+ if save:
796
+ self.save()
797
+
798
+ def add_task(self, idx: int, task_template: Dict, save: bool = True):
799
+ """Add a new task to the workflow."""
800
+ self.logger.debug(f"Adding store task.")
801
+ new_ID = self._get_num_total_added_tasks()
802
+ self._pending.add_tasks[new_ID] = self._store_task_cls(
803
+ id_=new_ID,
804
+ index=idx,
805
+ task_template=task_template,
806
+ is_pending=True,
807
+ element_IDs=[],
808
+ )
809
+ if save:
810
+ self.save()
811
+ return new_ID
812
+
813
+ def add_loop(
633
814
  self,
634
- sub_idx: int,
635
- EAR_indices: Tuple[int, int, int, int],
636
- ) -> None:
637
- for key in EAR_indices:
638
- self._pending["EAR_submission_idx"][key] = sub_idx
639
- self.save()
815
+ loop_template: Dict,
816
+ iterable_parameters,
817
+ iter_IDs: List[int],
818
+ save: bool = True,
819
+ ):
820
+ """Add a new loop to the workflow."""
821
+ self.logger.debug(f"Adding store loop.")
822
+ new_idx = self._get_num_total_loops()
823
+ self._pending.add_loops[new_idx] = {
824
+ "loop_template": loop_template,
825
+ "iterable_parameters": iterable_parameters,
826
+ }
640
827
 
641
- def set_EAR_start(
828
+ for i in iter_IDs:
829
+ self._pending.update_loop_indices[i] = {loop_template["name"]: 0}
830
+
831
+ if save:
832
+ self.save()
833
+
834
+ def add_submission(self, sub_idx: int, sub_js: Dict, save: bool = True):
835
+ """Add a new submission."""
836
+ self.logger.debug(f"Adding store submission.")
837
+ self._pending.add_submissions[sub_idx] = sub_js
838
+ if save:
839
+ self.save()
840
+
841
+ def add_element_set(self, task_id: int, es_js: Dict, save: bool = True):
842
+ self._pending.add_element_sets[task_id].append(es_js)
843
+ if save:
844
+ self.save()
845
+
846
+ def add_element(
847
+ self, task_ID: int, es_idx: int, seq_idx: Dict, src_idx: Dict, save: bool = True
848
+ ):
849
+ """Add a new element to a task."""
850
+ self.logger.debug(f"Adding store element.")
851
+ new_ID = self._get_num_total_elements()
852
+ new_elem_idx = self._get_task_total_num_elements(task_ID)
853
+ self._pending.add_elements[new_ID] = self._store_elem_cls(
854
+ id_=new_ID,
855
+ is_pending=True,
856
+ index=new_elem_idx,
857
+ es_idx=es_idx,
858
+ seq_idx=seq_idx,
859
+ src_idx=src_idx,
860
+ task_ID=task_ID,
861
+ iteration_IDs=[],
862
+ )
863
+ self._pending.add_elem_IDs[task_ID].append(new_ID)
864
+ if save:
865
+ self.save()
866
+ return new_ID
867
+
868
+ def add_element_iteration(
869
+ self,
870
+ element_ID: int,
871
+ data_idx: Dict,
872
+ schema_parameters: List[str],
873
+ loop_idx: Optional[Dict] = None,
874
+ save: bool = True,
875
+ ) -> int:
876
+ """Add a new iteration to an element."""
877
+ self.logger.debug(f"Adding store element-iteration.")
878
+ new_ID = self._get_num_total_elem_iters()
879
+ self._pending.add_elem_iters[new_ID] = self._store_iter_cls(
880
+ id_=new_ID,
881
+ element_ID=element_ID,
882
+ is_pending=True,
883
+ EAR_IDs=None,
884
+ data_idx=data_idx,
885
+ schema_parameters=schema_parameters,
886
+ loop_idx=loop_idx or {},
887
+ )
888
+ self._pending.add_elem_iter_IDs[element_ID].append(new_ID)
889
+ if save:
890
+ self.save()
891
+ return new_ID
892
+
893
+ def add_EAR(
642
894
  self,
643
- task_insert_ID: int,
644
- element_iteration_idx: int,
895
+ elem_iter_ID: int,
645
896
  action_idx: int,
646
- run_idx: int,
897
+ data_idx: Dict,
898
+ metadata: Dict,
899
+ save: bool = True,
900
+ ) -> int:
901
+ """Add a new EAR to an element iteration."""
902
+ self.logger.debug(f"Adding store EAR.")
903
+ new_ID = self._get_num_total_EARs()
904
+ self._pending.add_EARs[new_ID] = self._store_EAR_cls(
905
+ id_=new_ID,
906
+ is_pending=True,
907
+ elem_iter_ID=elem_iter_ID,
908
+ action_idx=action_idx,
909
+ data_idx=data_idx,
910
+ metadata=metadata,
911
+ )
912
+ self._pending.add_elem_iter_EAR_IDs[elem_iter_ID][action_idx].append(new_ID)
913
+ if save:
914
+ self.save()
915
+ return new_ID
916
+
917
+ def add_submission_attempt(
918
+ self, sub_idx: int, submitted_js_idx: List[int], save: bool = True
919
+ ):
920
+ self._pending.add_submission_attempts[sub_idx] = submitted_js_idx
921
+ if save:
922
+ self.save()
923
+
924
+ def set_EAR_submission_index(
925
+ self, EAR_ID: int, sub_idx: int, save: bool = True
647
926
  ) -> None:
648
- key = (task_insert_ID, element_iteration_idx, action_idx, run_idx)
649
- self._pending["EAR_start_times"][key] = datetime.utcnow()
650
- self.save()
927
+ self._pending.set_EAR_submission_indices[EAR_ID] = sub_idx
928
+ if save:
929
+ self.save()
930
+
931
+ def set_EAR_start(self, EAR_ID: int, save: bool = True) -> datetime:
932
+ dt = datetime.utcnow()
933
+ snapshot = JSONLikeDirSnapShot()
934
+ snapshot.take(".")
935
+ ss_js = snapshot.to_json_like()
936
+ self._pending.set_EAR_starts[EAR_ID] = (dt, ss_js)
937
+ if save:
938
+ self.save()
939
+ return dt
651
940
 
652
941
  def set_EAR_end(
942
+ self, EAR_ID: int, exit_code: int, success: bool, save: bool = True
943
+ ) -> datetime:
944
+ # TODO: save output files
945
+ dt = datetime.utcnow()
946
+ snapshot = JSONLikeDirSnapShot()
947
+ snapshot.take(".")
948
+ ss_js = snapshot.to_json_like()
949
+ self._pending.set_EAR_ends[EAR_ID] = (dt, ss_js, exit_code, success)
950
+ if save:
951
+ self.save()
952
+ return dt
953
+
954
+ def set_EAR_skip(self, EAR_ID: int, save: bool = True) -> None:
955
+ self._pending.set_EAR_skips.append(EAR_ID)
956
+ if save:
957
+ self.save()
958
+
959
+ def set_jobscript_version_info(
960
+ self, sub_idx: int, js_idx: int, vers_info: Dict, save: bool = True
961
+ ):
962
+ self._pending.set_jobscript_version_info[sub_idx][js_idx] = vers_info
963
+ if save:
964
+ self.save()
965
+
966
+ def set_jobscript_submit_time(
967
+ self, sub_idx: int, js_idx: int, submit_time: datetime, save: bool = True
968
+ ):
969
+ self._pending.set_jobscript_submit_time[sub_idx][js_idx] = submit_time
970
+ if save:
971
+ self.save()
972
+
973
+ def set_jobscript_job_ID(
974
+ self, sub_idx: int, js_idx: int, job_ID: str, save: bool = True
975
+ ):
976
+ self._pending.set_jobscript_job_ID[sub_idx][js_idx] = job_ID
977
+ if save:
978
+ self.save()
979
+
980
+ def _add_parameter(
653
981
  self,
654
- task_insert_ID: int,
655
- element_iteration_idx: int,
656
- action_idx: int,
657
- run_idx: int,
982
+ is_set: bool,
983
+ source: Dict,
984
+ data: Any = None,
985
+ file: Dict = None,
986
+ save: bool = True,
987
+ ) -> int:
988
+ self.logger.debug(f"Adding store parameter{f' (unset)' if data is None else ''}.")
989
+ new_idx = self._get_num_total_parameters()
990
+ self._pending.add_parameters[new_idx] = self._store_param_cls(
991
+ id_=new_idx,
992
+ is_pending=True,
993
+ is_set=is_set,
994
+ data=data,
995
+ file=file,
996
+ source=source,
997
+ )
998
+ if save:
999
+ self.save()
1000
+ return new_idx
1001
+
1002
+ def _prepare_set_file(
1003
+ self,
1004
+ store_contents: bool,
1005
+ is_input: bool,
1006
+ path=None,
1007
+ contents: str = None,
1008
+ filename: str = None,
1009
+ ):
1010
+ if filename is None:
1011
+ filename = Path(path).name
1012
+
1013
+ if store_contents:
1014
+ if is_input:
1015
+ new_idx = self._get_num_total_input_files()
1016
+ dst_dir = Path(self.workflow.input_files_path, str(new_idx))
1017
+ dst_path = dst_dir / filename
1018
+ else:
1019
+ # assume path is inside the EAR execution directory; transform that to the
1020
+ # equivalent artifacts directory:
1021
+ assert path is not None
1022
+ exec_sub_path = Path(path).relative_to(self.path)
1023
+ dst_path = Path(
1024
+ self.workflow.task_artifacts_path, *exec_sub_path.parts[1:]
1025
+ )
1026
+ if dst_path.is_file():
1027
+ dst_path = dst_path.with_suffix(dst_path.suffix + "_2") # TODO: better!
1028
+ else:
1029
+ dst_path = path
1030
+
1031
+ file_param_dat = {
1032
+ "store_contents": store_contents,
1033
+ "path": str(dst_path.relative_to(self.path)),
1034
+ }
1035
+ self._pending.add_files.append(
1036
+ {
1037
+ "store_contents": store_contents,
1038
+ "is_input": is_input,
1039
+ "dst_path": str(dst_path),
1040
+ "path": str(path),
1041
+ "contents": contents,
1042
+ }
1043
+ )
1044
+
1045
+ return file_param_dat
1046
+
1047
+ def set_file(
1048
+ self,
1049
+ param_id: int,
1050
+ store_contents: bool,
1051
+ is_input: bool,
1052
+ path=None,
1053
+ contents: str = None,
1054
+ filename: str = None,
1055
+ save: bool = True,
1056
+ ):
1057
+ self.logger.debug(f"Setting new file")
1058
+ file_param_dat = self._prepare_set_file(
1059
+ store_contents=store_contents,
1060
+ is_input=is_input,
1061
+ path=path,
1062
+ contents=contents,
1063
+ filename=filename,
1064
+ )
1065
+ self.set_parameter_value(param_id, value=file_param_dat, is_file=True, save=save)
1066
+ if save:
1067
+ self.save()
1068
+
1069
+ def add_file(
1070
+ self,
1071
+ store_contents: bool,
1072
+ is_input: bool,
1073
+ source: Dict,
1074
+ path=None,
1075
+ contents: str = None,
1076
+ filename: str = None,
1077
+ save: bool = True,
1078
+ ):
1079
+ self.logger.debug(f"Adding new file")
1080
+ file_param_dat = self._prepare_set_file(
1081
+ store_contents=store_contents,
1082
+ is_input=is_input,
1083
+ path=path,
1084
+ contents=contents,
1085
+ filename=filename,
1086
+ )
1087
+ p_id = self._add_parameter(
1088
+ file=file_param_dat,
1089
+ is_set=True,
1090
+ source=source,
1091
+ save=save,
1092
+ )
1093
+ if save:
1094
+ self.save()
1095
+ return p_id
1096
+
1097
+ def _append_files(self, files: Dict[int, Dict]):
1098
+ """Add new files to the files or artifacts directories."""
1099
+ for dat in files:
1100
+ if dat["store_contents"]:
1101
+ dst_path = Path(dat["dst_path"])
1102
+ dst_path.parent.mkdir(parents=True, exist_ok=True)
1103
+ if dat["path"] is not None:
1104
+ # copy from source path to destination:
1105
+ shutil.copy(dat["path"], dst_path)
1106
+ else:
1107
+ # write out text file:
1108
+ with dst_path.open("wt") as fp:
1109
+ fp.write(dat["contents"])
1110
+
1111
+ def add_set_parameter(self, data: Any, source: Dict, save: bool = True) -> int:
1112
+ return self._add_parameter(data=data, is_set=True, source=source, save=save)
1113
+
1114
+ def add_unset_parameter(self, source: Dict, save: bool = True) -> int:
1115
+ return self._add_parameter(data=None, is_set=False, source=source, save=save)
1116
+
1117
+ def set_parameter_value(
1118
+ self, param_id: int, value: Any, is_file: bool = False, save: bool = True
1119
+ ):
1120
+ self.logger.debug(f"Setting store parameter ID {param_id} value to {value!r}.")
1121
+ self._pending.set_parameters[param_id] = (value, is_file)
1122
+ if save:
1123
+ self.save()
1124
+
1125
+ def update_param_source(self, param_id: int, source: Dict, save: bool = True) -> None:
1126
+ self.logger.debug(f"Updating parameter ID {param_id!r} source to {source!r}.")
1127
+ self._pending.update_param_sources[param_id] = source
1128
+ if save:
1129
+ self.save()
1130
+
1131
+ def update_loop_num_iters(
1132
+ self, index: int, num_iters: int, save: bool = True
658
1133
  ) -> None:
659
- key = (task_insert_ID, element_iteration_idx, action_idx, run_idx)
660
- self._pending["EAR_end_times"][key] = datetime.utcnow()
661
- self.save()
1134
+ self.logger.debug(
1135
+ f"Updating loop {index!r} num added iterations to {num_iters!r}."
1136
+ )
1137
+ self._pending.update_loop_num_iters[index] = num_iters
1138
+ if save:
1139
+ self.save()
1140
+
1141
+ def get_template_components(self) -> Dict:
1142
+ """Get all template components, including pending."""
1143
+ tc = copy.deepcopy(self._get_persistent_template_components())
1144
+ for typ in TEMPLATE_COMP_TYPES:
1145
+ for hash_i, dat_i in self._pending.add_template_components[typ].items():
1146
+ if typ not in tc:
1147
+ tc[typ] = {}
1148
+ tc[typ][hash_i] = dat_i
1149
+
1150
+ return tc
1151
+
1152
+ def get_template(self) -> Dict:
1153
+ return self._get_persistent_template()
1154
+
1155
+ def _get_task_id_to_idx_map(self) -> Dict[int, int]:
1156
+ return {i.id_: i.index for i in self.get_tasks()}
1157
+
1158
+ def get_task(self, task_idx: int) -> AnySTask:
1159
+ return self.get_tasks()[task_idx]
1160
+
1161
+ def _process_retrieved_tasks(self, tasks: List[AnySTask]) -> List[AnySTask]:
1162
+ """Add pending data to retrieved tasks."""
1163
+ tasks_new = []
1164
+ for task_i in tasks:
1165
+ # consider pending element IDs:
1166
+ pend_elems = self._pending.add_elem_IDs.get(task_i.id_)
1167
+ if pend_elems:
1168
+ task_i = task_i.append_element_IDs(pend_elems)
1169
+ tasks_new.append(task_i)
1170
+ return tasks_new
1171
+
1172
+ def _process_retrieved_loops(self, loops: Dict[int, Dict]) -> Dict[int, Dict]:
1173
+ """Add pending data to retrieved loops."""
1174
+ loops_new = {}
1175
+ for id_, loop_i in loops.items():
1176
+ if "num_added_iterations" not in loop_i:
1177
+ loop_i["num_added_iterations"] = 1
1178
+ # consider pending changes to num added iterations:
1179
+ pend_num_iters = self._pending.update_loop_num_iters.get(id_)
1180
+ if pend_num_iters:
1181
+ loop_i["num_added_iterations"] = pend_num_iters
1182
+ loops_new[id_] = loop_i
1183
+ return loops_new
1184
+
1185
+ def get_tasks_by_IDs(self, id_lst: Iterable[int]) -> List[AnySTask]:
1186
+ # separate pending and persistent IDs:
1187
+ id_set = set(id_lst)
1188
+ all_pending = set(self._pending.add_tasks)
1189
+ id_pers = id_set.difference(all_pending)
1190
+ id_pend = id_set.intersection(all_pending)
1191
+
1192
+ tasks = self._get_persistent_tasks(id_pers) if id_pers else {}
1193
+ tasks.update({i: self._pending.add_tasks[i] for i in id_pend})
1194
+
1195
+ # order as requested:
1196
+ tasks = [tasks[id_] for id_ in id_lst]
1197
+
1198
+ return self._process_retrieved_tasks(tasks)
1199
+
1200
+ def get_tasks(self) -> List[AnySTask]:
1201
+ """Retrieve all tasks, including pending."""
1202
+
1203
+ tasks = self._get_persistent_tasks()
1204
+ tasks.update({k: v for k, v in self._pending.add_tasks.items()})
1205
+
1206
+ # order by index:
1207
+ tasks = sorted((i for i in tasks.values()), key=lambda x: x.index)
1208
+
1209
+ return self._process_retrieved_tasks(tasks)
1210
+
1211
+ def get_loops_by_IDs(self, id_lst: Iterable[int]) -> Dict[int, Dict]:
1212
+ """Retrieve loops by index (ID), including pending."""
1213
+
1214
+ # separate pending and persistent IDs:
1215
+ id_set = set(id_lst)
1216
+ all_pending = set(self._pending.add_loops)
1217
+ id_pers = id_set.difference(all_pending)
1218
+ id_pend = id_set.intersection(all_pending)
1219
+
1220
+ loops = self._get_persistent_loops(id_pers) if id_pers else {}
1221
+ loops.update({i: self._pending.add_loops[i] for i in id_pend})
1222
+
1223
+ # order as requested:
1224
+ loops = {id_: loops[id_] for id_ in id_lst}
1225
+
1226
+ return self._process_retrieved_loops(loops)
1227
+
1228
+ def get_loops(self) -> Dict[int, Dict]:
1229
+ """Retrieve all loops, including pending."""
1230
+
1231
+ loops = self._get_persistent_loops()
1232
+ loops.update({k: v for k, v in self._pending.add_loops.items()})
1233
+
1234
+ # order by index/ID:
1235
+ loops = dict(sorted(loops.items()))
1236
+
1237
+ return self._process_retrieved_loops(loops)
1238
+
1239
+ def get_submissions(self) -> Dict[int, Dict]:
1240
+ """Retrieve all submissions, including pending."""
1241
+
1242
+ subs = self._get_persistent_submissions()
1243
+ subs.update({k: v for k, v in self._pending.add_submissions.items()})
1244
+
1245
+ # order by index/ID
1246
+ subs = dict(sorted(subs.items()))
1247
+
1248
+ return subs
1249
+
1250
+ def get_submissions_by_ID(self, id_lst: Iterable[int]) -> Dict[int, Dict]:
1251
+ # separate pending and persistent IDs:
1252
+ id_set = set(id_lst)
1253
+ all_pending = set(self._pending.add_submissions)
1254
+ id_pers = id_set.difference(all_pending)
1255
+ id_pend = id_set.intersection(all_pending)
1256
+
1257
+ subs = self._get_persistent_submissions(id_pers) if id_pers else {}
1258
+ subs.update({i: self._pending.add_submissions[i] for i in id_pend})
1259
+
1260
+ # order by index/ID
1261
+ subs = dict(sorted(subs.items()))
1262
+
1263
+ return subs
1264
+
1265
+ def get_elements(self, id_lst: Iterable[int]) -> List[AnySElement]:
1266
+ # separate pending and persistent IDs:
1267
+ id_set = set(id_lst)
1268
+ all_pending = set(self._pending.add_elements)
1269
+ id_pers = id_set.difference(all_pending)
1270
+ id_pend = id_set.intersection(all_pending)
1271
+
1272
+ elems = self._get_persistent_elements(id_pers) if id_pers else {}
1273
+ elems.update({i: self._pending.add_elements[i] for i in id_pend})
1274
+
1275
+ # order as requested:
1276
+ elems = [elems[id_] for id_ in id_lst]
1277
+
1278
+ elems_new = []
1279
+ for elem_i in elems:
1280
+ # consider pending iteration IDs:
1281
+ # TODO: does this consider pending iterations from new loop iterations?
1282
+ pend_iters = self._pending.add_elem_iter_IDs.get(elem_i.id_)
1283
+ if pend_iters:
1284
+ elem_i = elem_i.append_iteration_IDs(pend_iters)
1285
+ elems_new.append(elem_i)
1286
+
1287
+ return elems_new
1288
+
1289
+ def get_element_iterations(self, id_lst: Iterable[int]) -> List[AnySElementIter]:
1290
+ # separate pending and persistent IDs:
1291
+ id_set = set(id_lst)
1292
+ all_pending = set(self._pending.add_elem_iters)
1293
+ id_pers = id_set.difference(all_pending)
1294
+ id_pend = id_set.intersection(all_pending)
1295
+
1296
+ iters = self._get_persistent_element_iters(id_pers) if id_pers else {}
1297
+ iters.update({i: self._pending.add_elem_iters[i] for i in id_pend})
1298
+
1299
+ # order as requested:
1300
+ iters = [iters[id_] for id_ in id_lst]
1301
+
1302
+ iters_new = []
1303
+ for iter_i in iters:
1304
+ # consider pending EAR IDs:
1305
+ pend_EARs = self._pending.add_elem_iter_EAR_IDs.get(iter_i.id_)
1306
+ if pend_EARs:
1307
+ iter_i = iter_i.append_EAR_IDs(pend_EARs)
1308
+
1309
+ # consider pending loop idx
1310
+ pend_loop_idx = self._pending.update_loop_indices.get(iter_i.id_)
1311
+ if pend_loop_idx:
1312
+ iter_i = iter_i.update_loop_idx(pend_loop_idx)
1313
+
1314
+ iters_new.append(iter_i)
1315
+
1316
+ return iters_new
1317
+
1318
+ def get_EARs(self, id_lst: Iterable[int]) -> List[AnySEAR]:
1319
+ # separate pending and persistent IDs:
1320
+ id_set = set(id_lst)
1321
+ all_pending = set(self._pending.add_EARs)
1322
+ id_pers = id_set.difference(all_pending)
1323
+ id_pend = id_set.intersection(all_pending)
1324
+
1325
+ EARs = self._get_persistent_EARs(id_pers) if id_pers else {}
1326
+ EARs.update({i: self._pending.add_EARs[i] for i in id_pend})
1327
+
1328
+ # order as requested:
1329
+ EARs = [EARs[id_] for id_ in id_lst]
1330
+
1331
+ EARs_new = []
1332
+ for EAR_i in EARs:
1333
+ # consider updates:
1334
+ pend_sub = self._pending.set_EAR_submission_indices.get(EAR_i.id_)
1335
+ pend_start = self._pending.set_EAR_starts.get(EAR_i.id_)
1336
+ pend_end = self._pending.set_EAR_ends.get(EAR_i.id_)
1337
+ pend_skip = True if EAR_i.id_ in self._pending.set_EAR_skips else None
1338
+
1339
+ p_st, p_ss = pend_start if pend_start else (None, None)
1340
+ p_et, p_se, p_ex, p_sx = pend_end if pend_end else (None, None, None, None)
1341
+
1342
+ updates = {
1343
+ "submission_idx": pend_sub,
1344
+ "skip": pend_skip,
1345
+ "success": p_sx,
1346
+ "start_time": p_st,
1347
+ "end_time": p_et,
1348
+ "snapshot_start": p_ss,
1349
+ "snapshot_end": p_se,
1350
+ "exit_code": p_ex,
1351
+ }
1352
+ if any(i is not None for i in updates.values()):
1353
+ EAR_i = EAR_i.update(**updates)
1354
+
1355
+ EARs_new.append(EAR_i)
1356
+
1357
+ return EARs_new
1358
+
1359
+ def get_EAR_skipped(self, EAR_ID: int) -> bool:
1360
+ return self.get_EARs([EAR_ID])[0].skip
1361
+
1362
+ def get_parameters(
1363
+ self,
1364
+ id_lst: Iterable[int],
1365
+ **kwargs: Dict,
1366
+ ) -> List[AnySParameter]:
1367
+ """
1368
+ Parameters
1369
+ ----------
1370
+ kwargs :
1371
+ dataset_copy : bool
1372
+ For Zarr stores only. If True, copy arrays as NumPy arrays.
1373
+ """
1374
+ # separate pending and persistent IDs:
1375
+ id_set = set(id_lst)
1376
+ all_pending = set(self._pending.add_parameters)
1377
+ id_pers = id_set.difference(all_pending)
1378
+ id_pend = id_set.intersection(all_pending)
1379
+
1380
+ params = self._get_persistent_parameters(id_pers, **kwargs) if id_pers else {}
1381
+ params.update({i: self._pending.add_parameters[i] for i in id_pend})
1382
+
1383
+ # order as requested:
1384
+ params = [params[id_] for id_ in id_lst]
1385
+
1386
+ return params
1387
+
1388
+ def get_parameter_set_statuses(self, id_lst: Iterable[int]) -> List[bool]:
1389
+ # separate pending and persistent IDs:
1390
+ id_set = set(id_lst)
1391
+ all_pending = set(self._pending.add_parameters)
1392
+ id_pers = id_set.difference(all_pending)
1393
+ id_pend = id_set.intersection(all_pending)
1394
+
1395
+ set_status = self._get_persistent_parameter_set_status(id_pers) if id_pers else {}
1396
+ set_status.update({i: self._pending.add_parameters[i].is_set for i in id_pend})
1397
+
1398
+ # order as requested:
1399
+ return [set_status[id_] for id_ in id_lst]
1400
+
1401
+ def get_parameter_sources(self, id_lst: Iterable[int]) -> List[Dict]:
1402
+ # separate pending and persistent IDs:
1403
+ id_set = set(id_lst)
1404
+ all_pending = set(self._pending.add_parameters)
1405
+ id_pers = id_set.difference(all_pending)
1406
+ id_pend = id_set.intersection(all_pending)
1407
+
1408
+ src = self._get_persistent_param_sources(id_pers) if id_pers else {}
1409
+ src.update({i: self._pending.add_parameters[i].source for i in id_pend})
1410
+
1411
+ # order as requested:
1412
+ src = {id_: src[id_] for id_ in id_lst}
1413
+
1414
+ src_new = []
1415
+ for id_i, src_i in src.items():
1416
+ # consider pending source updates:
1417
+ pend_src = self._pending.update_param_sources.get(id_i)
1418
+ if pend_src:
1419
+ src_i = {**src_i, **pend_src}
1420
+ src_new.append(src_i)
1421
+
1422
+ return src_new
1423
+
1424
+ def get_task_elements(self, task_id, idx_sel: slice) -> List[Dict]:
1425
+ """Get element data by an index slice within a given task.
1426
+
1427
+ Element iterations and EARs belonging to the elements are included.
1428
+
1429
+ """
1430
+
1431
+ all_elem_IDs = self.get_task(task_id).element_IDs
1432
+ req_IDs = all_elem_IDs[idx_sel]
1433
+ store_elements = self.get_elements(req_IDs)
1434
+ iter_IDs = [i.iteration_IDs for i in store_elements]
1435
+ iter_IDs_flat, iter_IDs_lens = flatten(iter_IDs)
1436
+ store_iters = self.get_element_iterations(iter_IDs_flat)
1437
+
1438
+ # retrieve EARs:
1439
+ EAR_IDs = [list((i.EAR_IDs or {}).values()) for i in store_iters]
1440
+ EAR_IDs_flat, EAR_IDs_lens = flatten(EAR_IDs)
1441
+ EARs_dct = [i.to_dict() for i in self.get_EARs(EAR_IDs_flat)]
1442
+ EARs_dct_rs = reshape(EARs_dct, EAR_IDs_lens)
1443
+
1444
+ # add EARs to iterations:
1445
+ iters = []
1446
+ for idx, i in enumerate(store_iters):
1447
+ EARs = None
1448
+ if i.EAR_IDs is not None:
1449
+ EARs = dict(zip(i.EAR_IDs.keys(), EARs_dct_rs[idx]))
1450
+ iters.append(i.to_dict(EARs))
1451
+
1452
+ # reshape iterations:
1453
+ iters_rs = reshape(iters, iter_IDs_lens)
1454
+
1455
+ # add iterations to elements:
1456
+ elements = []
1457
+ for idx, i in enumerate(store_elements):
1458
+ elements.append(i.to_dict(iters_rs[idx]))
1459
+ return elements
1460
+
1461
+ def check_parameters_exist(self, id_lst: Iterable[int]) -> List[bool]:
1462
+ """For each parameter ID, return True if it exists, else False"""
1463
+
1464
+ id_set = set(id_lst)
1465
+ all_pending = set(self._pending.add_parameters)
1466
+ id_not_pend = id_set.difference(all_pending)
1467
+ id_miss = set()
1468
+ if id_not_pend:
1469
+ all_id_pers = self._get_persistent_parameter_IDs()
1470
+ id_miss = id_not_pend.difference(all_id_pers)
1471
+
1472
+ return [False if i in id_miss else True for i in id_lst]
1473
+
1474
+ @contextlib.contextmanager
1475
+ def using_resource(self, res_label, action):
1476
+ """Context manager for managing `StoreResource` objects associated with the store."""
1477
+
1478
+ try:
1479
+ res = self._resources[res_label]
1480
+ except KeyError:
1481
+ raise RuntimeError(
1482
+ f"{self.__class__.__name__!r} has no resource named {res_label!r}."
1483
+ ) from None
1484
+
1485
+ key = (res_label, action)
1486
+ if key in self._resources_in_use:
1487
+ # retrieve existing data for this action:
1488
+ yield res.data[action]
1489
+
1490
+ else:
1491
+ try:
1492
+ # "open" the resource, which assigns data for this action, which we yield:
1493
+ res.open(action)
1494
+ self._resources_in_use.add(key)
1495
+ yield res.data[action]
1496
+
1497
+ except Exception as exc:
1498
+ self._resources_in_use.remove(key)
1499
+ raise exc
1500
+
1501
+ else:
1502
+ # "close" the resource, clearing cached data for this action:
1503
+ res.close(action)
1504
+ self._resources_in_use.remove(key)
1505
+
1506
+ def copy(self, path=None) -> str:
1507
+ """Copy the workflow store.
1508
+
1509
+ This does not work on remote filesystems.
1510
+
1511
+ """
1512
+ if path is None:
1513
+ _path = Path(self.path)
1514
+ path = _path.parent / Path(_path.stem + "_copy" + _path.suffix)
1515
+
1516
+ if self.fs.exists(str(path)):
1517
+ raise ValueError(f"Path already exists: {path}.")
1518
+ else:
1519
+ path = str(path)
1520
+
1521
+ self.fs.copy(self.path, path)
1522
+
1523
+ new_fs_path = self.workflow.fs_path.replace(self.path, path)
1524
+
1525
+ return new_fs_path
1526
+
1527
+ def delete(self) -> None:
1528
+ """Delete the persistent workflow."""
1529
+ confirm = input(
1530
+ f"Permanently delete the workflow at path {self.path!r}; [y]es or [n]o?"
1531
+ )
1532
+ if confirm.strip().lower() == "y":
1533
+ self.delete_no_confirm()
1534
+
1535
+ def delete_no_confirm(self) -> None:
1536
+ """Permanently delete the workflow data with no confirmation."""
1537
+
1538
+ @self.app.perm_error_retry()
1539
+ def _delete_no_confirm() -> None:
1540
+ self.logger.debug(f"_delete_no_confirm: {self.path!r}.")
1541
+ self.fs.rm(self.path, recursive=True)
1542
+
1543
+ return _delete_no_confirm()