hpcflow-new2 0.2.0a50__py3-none-any.whl → 0.2.0a52__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hpcflow/_version.py +1 -1
- hpcflow/sdk/__init__.py +1 -1
- hpcflow/sdk/api.py +1 -1
- hpcflow/sdk/app.py +20 -11
- hpcflow/sdk/cli.py +34 -59
- hpcflow/sdk/core/__init__.py +13 -1
- hpcflow/sdk/core/actions.py +235 -126
- hpcflow/sdk/core/command_files.py +32 -24
- hpcflow/sdk/core/element.py +110 -114
- hpcflow/sdk/core/errors.py +57 -0
- hpcflow/sdk/core/loop.py +18 -34
- hpcflow/sdk/core/parameters.py +5 -3
- hpcflow/sdk/core/task.py +135 -131
- hpcflow/sdk/core/task_schema.py +11 -4
- hpcflow/sdk/core/utils.py +110 -2
- hpcflow/sdk/core/workflow.py +964 -676
- hpcflow/sdk/data/template_components/environments.yaml +0 -44
- hpcflow/sdk/data/template_components/task_schemas.yaml +52 -10
- hpcflow/sdk/persistence/__init__.py +21 -33
- hpcflow/sdk/persistence/base.py +1340 -458
- hpcflow/sdk/persistence/json.py +424 -546
- hpcflow/sdk/persistence/pending.py +563 -0
- hpcflow/sdk/persistence/store_resource.py +131 -0
- hpcflow/sdk/persistence/utils.py +57 -0
- hpcflow/sdk/persistence/zarr.py +852 -841
- hpcflow/sdk/submission/jobscript.py +133 -112
- hpcflow/sdk/submission/shells/bash.py +62 -16
- hpcflow/sdk/submission/shells/powershell.py +87 -16
- hpcflow/sdk/submission/submission.py +59 -35
- hpcflow/tests/unit/test_element.py +4 -9
- hpcflow/tests/unit/test_persistence.py +218 -0
- hpcflow/tests/unit/test_task.py +11 -12
- hpcflow/tests/unit/test_utils.py +82 -0
- hpcflow/tests/unit/test_workflow.py +3 -1
- {hpcflow_new2-0.2.0a50.dist-info → hpcflow_new2-0.2.0a52.dist-info}/METADATA +3 -1
- {hpcflow_new2-0.2.0a50.dist-info → hpcflow_new2-0.2.0a52.dist-info}/RECORD +38 -34
- {hpcflow_new2-0.2.0a50.dist-info → hpcflow_new2-0.2.0a52.dist-info}/WHEEL +0 -0
- {hpcflow_new2-0.2.0a50.dist-info → hpcflow_new2-0.2.0a52.dist-info}/entry_points.txt +0 -0
hpcflow/sdk/persistence/base.py
CHANGED
@@ -1,21 +1,31 @@
|
|
1
|
+
# Store* classes represent the element-metadata in the store, in a store-agnostic way
|
1
2
|
from __future__ import annotations
|
2
|
-
from abc import ABC
|
3
|
-
|
3
|
+
from abc import ABC
|
4
|
+
|
5
|
+
import contextlib
|
4
6
|
import copy
|
5
|
-
from dataclasses import dataclass
|
6
|
-
from datetime import datetime
|
7
|
+
from dataclasses import dataclass, field
|
8
|
+
from datetime import datetime, timezone
|
9
|
+
from pathlib import Path
|
7
10
|
import shutil
|
8
11
|
import time
|
9
|
-
from typing import Any, Dict,
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
12
|
+
from typing import Any, Dict, Iterable, List, Optional, Tuple, TypeVar, Union
|
13
|
+
|
14
|
+
from hpcflow.sdk.core.utils import (
|
15
|
+
flatten,
|
16
|
+
get_in_container,
|
17
|
+
get_relative_path,
|
18
|
+
reshape,
|
19
|
+
set_in_container,
|
20
|
+
JSONLikeDirSnapShot,
|
21
|
+
)
|
22
|
+
from hpcflow.sdk.persistence.pending import PendingChanges
|
18
23
|
|
24
|
+
AnySTask = TypeVar("AnySTask", bound="StoreTask")
|
25
|
+
AnySElement = TypeVar("AnySElement", bound="StoreElement")
|
26
|
+
AnySElementIter = TypeVar("AnySElementIter", bound="StoreElementIter")
|
27
|
+
AnySEAR = TypeVar("AnySEAR", bound="StoreEAR")
|
28
|
+
AnySParameter = TypeVar("AnySParameter", bound="StoreParameter")
|
19
29
|
|
20
30
|
PRIMITIVES = (
|
21
31
|
int,
|
@@ -24,48 +34,24 @@ PRIMITIVES = (
|
|
24
34
|
type(None),
|
25
35
|
)
|
26
36
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
# TODO: maybe this is only an issue on Windows?
|
34
|
-
dropbox_permission_err_retry = retry(
|
35
|
-
(PermissionError, OSError),
|
36
|
-
tries=10,
|
37
|
-
delay=1,
|
38
|
-
backoff=2,
|
39
|
-
fail_callback=dropbox_retry_fail,
|
37
|
+
TEMPLATE_COMP_TYPES = (
|
38
|
+
"parameters",
|
39
|
+
"command_files",
|
40
|
+
"environments",
|
41
|
+
"task_schemas",
|
40
42
|
)
|
41
43
|
|
42
44
|
|
43
|
-
@dropbox_permission_err_retry
|
44
|
-
def remove_dir(dir_path: Path) -> None:
|
45
|
-
"""Try very hard to delete a directory.
|
46
|
-
|
47
|
-
Dropbox (on Windows, at least) seems to try to re-sync files if the parent directory
|
48
|
-
is deleted soon after creation, which is the case on a failed workflow creation (e.g.
|
49
|
-
missing inputs), so in addition to catching PermissionErrors generated when
|
50
|
-
Dropbox has a lock on files, we repeatedly try deleting the directory tree.
|
51
|
-
|
52
|
-
"""
|
53
|
-
while dir_path.is_dir():
|
54
|
-
shutil.rmtree(dir_path)
|
55
|
-
time.sleep(0.5)
|
56
|
-
|
57
|
-
|
58
|
-
@dropbox_permission_err_retry
|
59
|
-
def rename_dir(replaced_dir, original_dir) -> None:
|
60
|
-
replaced_dir.rename(original_dir)
|
61
|
-
|
62
|
-
|
63
45
|
@dataclass
|
64
46
|
class PersistentStoreFeatures:
|
65
47
|
"""Class to represent the features provided by a persistent store.
|
66
48
|
|
67
49
|
Parameters
|
68
50
|
----------
|
51
|
+
create
|
52
|
+
If True, a new workflow can be created using this store.
|
53
|
+
edit
|
54
|
+
If True, the workflow can be modified.
|
69
55
|
jobscript_parallelism
|
70
56
|
If True, the store supports workflows running multiple independent jobscripts
|
71
57
|
simultaneously.
|
@@ -79,323 +65,374 @@ class PersistentStoreFeatures:
|
|
79
65
|
submission.
|
80
66
|
"""
|
81
67
|
|
68
|
+
create: bool = False
|
69
|
+
edit: bool = False
|
82
70
|
jobscript_parallelism: bool = False
|
83
71
|
EAR_parallelism: bool = False
|
84
72
|
schedulers: bool = False
|
85
73
|
submission: bool = False
|
86
74
|
|
87
75
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
@property
|
102
|
-
def features(self) -> PersistentStoreFeatures:
|
103
|
-
return self._features
|
104
|
-
|
105
|
-
@property
|
106
|
-
def store_name(self) -> str:
|
107
|
-
return self._name
|
108
|
-
|
109
|
-
@property
|
110
|
-
def workflow(self) -> app.Workflow:
|
111
|
-
return self._workflow
|
112
|
-
|
113
|
-
@property
|
114
|
-
def workflow_path(self) -> Path:
|
115
|
-
return self.workflow.path
|
116
|
-
|
117
|
-
@property
|
118
|
-
def has_pending(self) -> bool:
|
119
|
-
"""Returns True if there are pending changes that are not yet committed."""
|
120
|
-
return any(bool(v) for v in self._pending.values())
|
121
|
-
|
122
|
-
def _get_pending_dct(self) -> Dict:
|
123
|
-
return {
|
124
|
-
"tasks": {}, # keys are new task indices
|
125
|
-
"loops": [],
|
126
|
-
"submissions": [],
|
127
|
-
"submission_attempts": {}, # keys are submission indices, values are list of jobscript indices
|
128
|
-
"jobscript_version_info": {}, # keys are submission indices, values are dicts with jobscript index keys
|
129
|
-
"jobscript_submit_times": {}, # keys are submission indices, values are dicts with jobscript index keys
|
130
|
-
"jobscript_job_IDs": {}, # keys are submission indices, values are dicts with jobscript index keys
|
131
|
-
"loops_added_iters": {}, # keys are loop indices, values are num added iterations
|
132
|
-
"template_tasks": {}, # keys are new task indices
|
133
|
-
"template_loops": [],
|
134
|
-
"template_components": {},
|
135
|
-
"element_sets": {}, # keys are task indices
|
136
|
-
"element_iterations": {}, # keys are (task index, task insert ID)
|
137
|
-
"element_iterations_idx": {}, # keys are (task index, task insert ID), then element_idx
|
138
|
-
"elements": {}, # keys are (task index, task insert ID)
|
139
|
-
"EARs": {}, # keys are (task index, task insert ID, element_iter idx)
|
140
|
-
"loop_idx": {}, # keys are (task index, task insert ID, element iteration index)
|
141
|
-
"parameter_data": {}, # keys are parameter indices
|
142
|
-
"parameter_sources": {}, # keys are parameter indices
|
143
|
-
"parameter_source_updates": {}, # keys are parameter indices
|
144
|
-
"remove_replaced_dir_record": False,
|
145
|
-
"EAR_submission_idx": {}, # keys are (task insert ID, element_iter idx, action idx, run idx)
|
146
|
-
"EAR_start_times": {}, # keys are (task insert ID, element_iter idx, action idx, run idx)
|
147
|
-
"EAR_end_times": {}, # keys are (task insert ID, element_iter idx, action idx, run idx)
|
148
|
-
}
|
149
|
-
|
150
|
-
def reject_pending(self) -> None:
|
151
|
-
self.clear_pending()
|
152
|
-
|
153
|
-
def clear_pending(self) -> None:
|
154
|
-
self._pending = self._get_pending_dct()
|
155
|
-
|
156
|
-
def save(self) -> None:
|
157
|
-
if not self.workflow._in_batch_mode:
|
158
|
-
self.commit_pending()
|
159
|
-
|
160
|
-
@contextmanager
|
161
|
-
def cached_load(self) -> Iterator[None]:
|
162
|
-
"""Override this if a more performant implementation, is possible.
|
163
|
-
|
164
|
-
For example, in a JSON persistent store, we need to load the whole document from
|
165
|
-
disk to read anything from it, so we can temporarily cache the document if we know
|
166
|
-
we will be making multiple reads."""
|
76
|
+
@dataclass
|
77
|
+
class StoreTask:
|
78
|
+
id_: int
|
79
|
+
index: int
|
80
|
+
is_pending: bool
|
81
|
+
element_IDs: List[int]
|
82
|
+
task_template: Optional[Dict] = None
|
83
|
+
|
84
|
+
def encode(self) -> Tuple[int, Dict, Dict]:
|
85
|
+
"""Prepare store task data for the persistent store."""
|
86
|
+
wk_task = {"id_": self.id_, "element_IDs": self.element_IDs}
|
87
|
+
task = {"id_": self.id_, **self.task_template}
|
88
|
+
return self.index, wk_task, task
|
167
89
|
|
168
|
-
|
90
|
+
@classmethod
|
91
|
+
def decode(cls, task_dat: Dict) -> StoreTask:
|
92
|
+
"""Initialise a `StoreTask` from store task data
|
169
93
|
|
170
|
-
|
171
|
-
|
172
|
-
task_idx: int,
|
173
|
-
task_insert_ID: int,
|
174
|
-
selection: Union[int, slice],
|
175
|
-
) -> Iterator[Dict]:
|
176
|
-
"""Override this for a more performant implementation."""
|
177
|
-
for idx in range(selection.start, selection.stop, selection.step):
|
178
|
-
yield self.get_task_elements(
|
179
|
-
task_idx, task_insert_ID, slice(idx, idx + 1, 1)
|
180
|
-
)[0]
|
94
|
+
Note: the `task_template` is only needed for encoding because it is retrieved as
|
95
|
+
part of the `WorkflowTemplate` so we don't need to load it when decoding.
|
181
96
|
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
97
|
+
"""
|
98
|
+
return cls(is_pending=False, **task_dat)
|
99
|
+
|
100
|
+
def append_element_IDs(self: AnySTask, pend_IDs: List[int]) -> AnySTask:
|
101
|
+
"""Return a copy, with additional element IDs."""
|
102
|
+
elem_IDs = self.element_IDs[:] + pend_IDs
|
103
|
+
return self.__class__(
|
104
|
+
id_=self.id_,
|
105
|
+
index=self.index,
|
106
|
+
is_pending=self.is_pending,
|
107
|
+
element_IDs=elem_IDs,
|
108
|
+
task_template=self.task_template,
|
187
109
|
)
|
188
|
-
if confirm.strip().lower() == "y":
|
189
|
-
self.delete_no_confirm()
|
190
110
|
|
191
|
-
def delete_no_confirm(self) -> None:
|
192
|
-
"""Permanently delete the workflow data with no confirmation."""
|
193
|
-
remove_dir(self.workflow.path)
|
194
|
-
|
195
|
-
def _merge_pending_template_components(self, template_components: Dict) -> bool:
|
196
|
-
# assumes we have already checked for duplicates when adding to pending:
|
197
|
-
is_modified = False
|
198
|
-
for name, dat in self._pending["template_components"].items():
|
199
|
-
if name not in template_components:
|
200
|
-
template_components[name] = {}
|
201
|
-
for k, v in dat.items():
|
202
|
-
template_components[name][k] = v
|
203
|
-
is_modified = True
|
204
|
-
return is_modified
|
205
111
|
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
pending = self._pending["template_components"]
|
217
|
-
|
218
|
-
for name, dat in template_components.items():
|
219
|
-
if name in ptc and name in pending:
|
220
|
-
for hash, dat_i in dat.items():
|
221
|
-
if hash not in ptc[name] and hash not in pending[name]:
|
222
|
-
pending[name][hash] = dat_i
|
112
|
+
@dataclass
|
113
|
+
class StoreElement:
|
114
|
+
"""
|
115
|
+
Parameters
|
116
|
+
----------
|
117
|
+
index
|
118
|
+
Index of the element within its parent task.
|
119
|
+
iteration_IDs
|
120
|
+
IDs of element-iterations that belong to this element.
|
121
|
+
"""
|
223
122
|
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
123
|
+
id_: int
|
124
|
+
is_pending: bool
|
125
|
+
index: int
|
126
|
+
es_idx: int
|
127
|
+
seq_idx: Dict[str, int]
|
128
|
+
src_idx: Dict[str, int]
|
129
|
+
task_ID: int
|
130
|
+
iteration_IDs: List[int]
|
131
|
+
|
132
|
+
def encode(self) -> Dict:
|
133
|
+
"""Prepare store element data for the persistent store."""
|
134
|
+
dct = self.__dict__
|
135
|
+
del dct["is_pending"]
|
136
|
+
return dct
|
228
137
|
|
229
|
-
|
230
|
-
|
138
|
+
@classmethod
|
139
|
+
def decode(cls, elem_dat: Dict) -> StoreElement:
|
140
|
+
"""Initialise a `StoreElement` from store element data"""
|
141
|
+
return cls(is_pending=False, **elem_dat)
|
231
142
|
|
232
|
-
|
143
|
+
def to_dict(self, iters):
|
144
|
+
"""Prepare data for the user-facing `Element` object."""
|
145
|
+
return {
|
146
|
+
"id_": self.id_,
|
147
|
+
"is_pending": self.is_pending,
|
148
|
+
"index": self.index,
|
149
|
+
"es_idx": self.es_idx,
|
150
|
+
"seq_idx": self.seq_idx,
|
151
|
+
"src_idx": self.src_idx,
|
152
|
+
"iteration_IDs": self.iteration_IDs,
|
153
|
+
"task_ID": self.task_ID,
|
154
|
+
"iterations": iters,
|
155
|
+
}
|
233
156
|
|
234
|
-
def
|
235
|
-
|
236
|
-
self.
|
237
|
-
self.
|
157
|
+
def append_iteration_IDs(self: AnySElement, pend_IDs: List[int]) -> AnySElement:
|
158
|
+
"""Return a copy, with additional iteration IDs."""
|
159
|
+
iter_IDs = self.iteration_IDs[:] + pend_IDs
|
160
|
+
return self.__class__(
|
161
|
+
id_=self.id_,
|
162
|
+
is_pending=self.is_pending,
|
163
|
+
index=self.index,
|
164
|
+
es_idx=self.es_idx,
|
165
|
+
seq_idx=self.seq_idx,
|
166
|
+
src_idx=self.src_idx,
|
167
|
+
task_ID=self.task_ID,
|
168
|
+
iteration_IDs=iter_IDs,
|
169
|
+
)
|
238
170
|
|
239
|
-
def add_element_set(self, task_idx: int, element_set_js: Dict) -> None:
|
240
|
-
if task_idx not in self._pending["element_sets"]:
|
241
|
-
self._pending["element_sets"][task_idx] = []
|
242
|
-
self._pending["element_sets"][task_idx].append(element_set_js)
|
243
|
-
self.save()
|
244
171
|
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
self._pending["element_iterations"][key].extend(element_iterations)
|
259
|
-
self.save()
|
260
|
-
|
261
|
-
def add_element_iterations(
|
262
|
-
self,
|
263
|
-
task_idx: int,
|
264
|
-
task_insert_ID: int,
|
265
|
-
element_iterations: List[Dict],
|
266
|
-
element_iters_idx: Dict[int, List[int]],
|
267
|
-
) -> None:
|
268
|
-
key = (task_idx, task_insert_ID)
|
269
|
-
if key not in self._pending["element_iterations"]:
|
270
|
-
self._pending["element_iterations"][key] = []
|
271
|
-
if key not in self._pending["element_iterations_idx"]:
|
272
|
-
self._pending["element_iterations_idx"][key] = {}
|
172
|
+
@dataclass
|
173
|
+
class StoreElementIter:
|
174
|
+
"""
|
175
|
+
Parameters
|
176
|
+
----------
|
177
|
+
data_idx
|
178
|
+
Overall data index for the element-iteration, which maps parameter names to
|
179
|
+
parameter data indices.
|
180
|
+
EAR_IDs
|
181
|
+
Maps task schema action indices to EARs by ID.
|
182
|
+
schema_parameters
|
183
|
+
List of parameters defined by the associated task schema.
|
184
|
+
"""
|
273
185
|
|
274
|
-
|
186
|
+
id_: int
|
187
|
+
is_pending: bool
|
188
|
+
element_ID: int
|
189
|
+
EAR_IDs: Dict[int, List[int]]
|
190
|
+
data_idx: Dict[str, int]
|
191
|
+
schema_parameters: List[str]
|
192
|
+
loop_idx: Dict[str, int] = field(default_factory=dict)
|
275
193
|
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
194
|
+
def encode(self) -> Dict:
|
195
|
+
"""Prepare store element iteration data for the persistent store."""
|
196
|
+
dct = self.__dict__
|
197
|
+
del dct["is_pending"]
|
198
|
+
return dct
|
280
199
|
|
281
|
-
|
200
|
+
@classmethod
|
201
|
+
def decode(cls, iter_dat: Dict) -> StoreElementIter:
|
202
|
+
"""Initialise a `StoreElementIter` from persistent store element iteration data"""
|
282
203
|
|
283
|
-
|
284
|
-
self._pending["loops_added_iters"][loop_idx] = num_added_iters
|
285
|
-
self.save()
|
204
|
+
iter_dat = copy.deepcopy(iter_dat) # to avoid mutating; can we avoid this?
|
286
205
|
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
task_insert_ID: int,
|
291
|
-
element_iter_idx: int,
|
292
|
-
EARs: Dict,
|
293
|
-
param_src_updates: Dict,
|
294
|
-
) -> None:
|
295
|
-
key = (task_idx, task_insert_ID, element_iter_idx)
|
296
|
-
if key not in self._pending["EARs"]:
|
297
|
-
self._pending["EARs"][key] = {}
|
298
|
-
self._pending["EARs"][key].update(EARs)
|
299
|
-
self._pending["parameter_source_updates"].update(param_src_updates)
|
300
|
-
self.save()
|
206
|
+
# cast JSON string keys to integers:
|
207
|
+
for act_idx in list((iter_dat["EAR_IDs"] or {}).keys()):
|
208
|
+
iter_dat["EAR_IDs"][int(act_idx)] = iter_dat["EAR_IDs"].pop(act_idx)
|
301
209
|
|
302
|
-
|
303
|
-
self,
|
304
|
-
task_indices: List[int],
|
305
|
-
loop_js: Dict,
|
306
|
-
iterable_parameters: Dict[str:Dict],
|
307
|
-
) -> None:
|
308
|
-
"""Initialise the zeroth iterations of a named loop across the specified task
|
309
|
-
subset.
|
210
|
+
return cls(is_pending=False, **iter_dat)
|
310
211
|
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
212
|
+
def to_dict(self, EARs):
|
213
|
+
"""Prepare data for the user-facing `ElementIteration` object."""
|
214
|
+
return {
|
215
|
+
"id_": self.id_,
|
216
|
+
"is_pending": self.is_pending,
|
217
|
+
"element_ID": self.element_ID,
|
218
|
+
"EAR_IDs": self.EAR_IDs,
|
219
|
+
"data_idx": self.data_idx,
|
220
|
+
"schema_parameters": self.schema_parameters,
|
221
|
+
"EARs": EARs,
|
222
|
+
"loop_idx": self.loop_idx,
|
223
|
+
}
|
316
224
|
|
317
|
-
|
318
|
-
self
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
225
|
+
def append_EAR_IDs(
|
226
|
+
self: AnySElementIter, pend_IDs: Dict[int, List[int]]
|
227
|
+
) -> AnySElementIter:
|
228
|
+
"""Return a copy, with additional EAR IDs."""
|
229
|
+
|
230
|
+
EAR_IDs = copy.deepcopy(self.EAR_IDs) or {}
|
231
|
+
for act_idx, IDs_i in pend_IDs.items():
|
232
|
+
if act_idx not in EAR_IDs:
|
233
|
+
EAR_IDs[act_idx] = []
|
234
|
+
EAR_IDs[act_idx].extend(IDs_i)
|
235
|
+
|
236
|
+
return self.__class__(
|
237
|
+
id_=self.id_,
|
238
|
+
is_pending=self.is_pending,
|
239
|
+
element_ID=self.element_ID,
|
240
|
+
EAR_IDs=EAR_IDs,
|
241
|
+
data_idx=self.data_idx,
|
242
|
+
schema_parameters=self.schema_parameters,
|
243
|
+
loop_idx=self.loop_idx,
|
324
244
|
)
|
325
245
|
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
246
|
+
def update_loop_idx(
|
247
|
+
self: AnySElementIter, loop_idx: Dict[str, int]
|
248
|
+
) -> AnySElementIter:
|
249
|
+
"""Return a copy, with the loop index updated."""
|
250
|
+
loop_idx_new = copy.deepcopy(self.loop_idx)
|
251
|
+
loop_idx_new.update(loop_idx)
|
252
|
+
return self.__class__(
|
253
|
+
id_=self.id_,
|
254
|
+
is_pending=self.is_pending,
|
255
|
+
element_ID=self.element_ID,
|
256
|
+
EAR_IDs=self.EAR_IDs,
|
257
|
+
data_idx=self.data_idx,
|
258
|
+
schema_parameters=self.schema_parameters,
|
259
|
+
loop_idx=loop_idx_new,
|
260
|
+
)
|
334
261
|
|
335
|
-
self.save()
|
336
262
|
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
263
|
+
@dataclass
|
264
|
+
class StoreEAR:
|
265
|
+
"""
|
266
|
+
Parameters
|
267
|
+
----------
|
268
|
+
data_idx
|
269
|
+
Maps parameter names within this EAR to parameter data indices.
|
270
|
+
metadata
|
271
|
+
Metadata concerning e.g. the state of the EAR.
|
272
|
+
action_idx
|
273
|
+
The task schema action associated with this EAR.
|
274
|
+
"""
|
341
275
|
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
276
|
+
id_: int
|
277
|
+
is_pending: bool
|
278
|
+
elem_iter_ID: int
|
279
|
+
action_idx: int
|
280
|
+
data_idx: Dict[str, int]
|
281
|
+
submission_idx: Optional[int] = None
|
282
|
+
skip: Optional[bool] = False
|
283
|
+
success: Optional[bool] = None
|
284
|
+
start_time: Optional[datetime] = None
|
285
|
+
end_time: Optional[datetime] = None
|
286
|
+
snapshot_start: Optional[Dict] = None
|
287
|
+
snapshot_end: Optional[Dict] = None
|
288
|
+
exit_code: Optional[int] = None
|
289
|
+
metadata: Dict[str, Any] = None
|
290
|
+
|
291
|
+
@staticmethod
|
292
|
+
def _encode_datetime(dt: Union[datetime, None], ts_fmt: str) -> str:
|
293
|
+
return dt.strftime(ts_fmt) if dt else None
|
294
|
+
|
295
|
+
@staticmethod
|
296
|
+
def _decode_datetime(dt_str: Union[str, None], ts_fmt: str) -> datetime:
|
297
|
+
return datetime.strptime(dt_str, ts_fmt) if dt_str else None
|
298
|
+
|
299
|
+
def encode(self, ts_fmt: str) -> Dict:
|
300
|
+
"""Prepare store EAR data for the persistent store."""
|
301
|
+
return {
|
302
|
+
"id_": self.id_,
|
303
|
+
"elem_iter_ID": self.elem_iter_ID,
|
304
|
+
"action_idx": self.action_idx,
|
305
|
+
"data_idx": self.data_idx,
|
306
|
+
"submission_idx": self.submission_idx,
|
307
|
+
"success": self.success,
|
308
|
+
"skip": self.skip,
|
309
|
+
"start_time": self._encode_datetime(self.start_time, ts_fmt),
|
310
|
+
"end_time": self._encode_datetime(self.end_time, ts_fmt),
|
311
|
+
"snapshot_start": self.snapshot_start,
|
312
|
+
"snapshot_end": self.snapshot_end,
|
313
|
+
"exit_code": self.exit_code,
|
314
|
+
"metadata": self.metadata,
|
315
|
+
}
|
347
316
|
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
317
|
+
@classmethod
|
318
|
+
def decode(cls, EAR_dat: Dict, ts_fmt: str) -> StoreEAR:
|
319
|
+
"""Initialise a `StoreEAR` from persistent store EAR data"""
|
320
|
+
# don't want to mutate EAR_dat:
|
321
|
+
EAR_dat = copy.deepcopy(EAR_dat)
|
322
|
+
EAR_dat["start_time"] = cls._decode_datetime(EAR_dat["start_time"], ts_fmt)
|
323
|
+
EAR_dat["end_time"] = cls._decode_datetime(EAR_dat["end_time"], ts_fmt)
|
324
|
+
return cls(is_pending=False, **EAR_dat)
|
325
|
+
|
326
|
+
def to_dict(self) -> Dict:
|
327
|
+
"""Prepare data for the user-facing `ElementActionRun` object."""
|
328
|
+
|
329
|
+
def _process_datetime(dt: datetime) -> datetime:
|
330
|
+
"""We store datetime objects implicitly in UTC, so we need to first make
|
331
|
+
that explicit, and then convert to the local time zone."""
|
332
|
+
return dt.replace(tzinfo=timezone.utc).astimezone() if dt else None
|
355
333
|
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
334
|
+
return {
|
335
|
+
"id_": self.id_,
|
336
|
+
"is_pending": self.is_pending,
|
337
|
+
"elem_iter_ID": self.elem_iter_ID,
|
338
|
+
"action_idx": self.action_idx,
|
339
|
+
"data_idx": self.data_idx,
|
340
|
+
"submission_idx": self.submission_idx,
|
341
|
+
"success": self.success,
|
342
|
+
"skip": self.skip,
|
343
|
+
"start_time": _process_datetime(self.start_time),
|
344
|
+
"end_time": _process_datetime(self.end_time),
|
345
|
+
"snapshot_start": self.snapshot_start,
|
346
|
+
"snapshot_end": self.snapshot_end,
|
347
|
+
"exit_code": self.exit_code,
|
348
|
+
"metadata": self.metadata,
|
349
|
+
}
|
361
350
|
|
362
|
-
def
|
351
|
+
def update(
|
363
352
|
self,
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
353
|
+
submission_idx: Optional[int] = None,
|
354
|
+
skip: Optional[bool] = None,
|
355
|
+
success: Optional[bool] = None,
|
356
|
+
start_time: Optional[datetime] = None,
|
357
|
+
end_time: Optional[datetime] = None,
|
358
|
+
snapshot_start: Optional[Dict] = None,
|
359
|
+
snapshot_end: Optional[Dict] = None,
|
360
|
+
exit_code: Optional[int] = None,
|
361
|
+
) -> AnySEAR:
|
362
|
+
"""Return a shallow copy, with specified data updated."""
|
363
|
+
|
364
|
+
sub_idx = submission_idx if submission_idx is not None else self.submission_idx
|
365
|
+
skip = skip if skip is not None else self.skip
|
366
|
+
success = success if success is not None else self.success
|
367
|
+
start_time = start_time if start_time is not None else self.start_time
|
368
|
+
end_time = end_time if end_time is not None else self.end_time
|
369
|
+
snap_s = snapshot_start if snapshot_start is not None else self.snapshot_start
|
370
|
+
snap_e = snapshot_end if snapshot_end is not None else self.snapshot_end
|
371
|
+
exit_code = exit_code if exit_code is not None else self.exit_code
|
372
|
+
|
373
|
+
return self.__class__(
|
374
|
+
id_=self.id_,
|
375
|
+
is_pending=self.is_pending,
|
376
|
+
elem_iter_ID=self.elem_iter_ID,
|
377
|
+
action_idx=self.action_idx,
|
378
|
+
data_idx=self.data_idx,
|
379
|
+
metadata=self.metadata,
|
380
|
+
submission_idx=sub_idx,
|
381
|
+
skip=skip,
|
382
|
+
success=success,
|
383
|
+
start_time=start_time,
|
384
|
+
end_time=end_time,
|
385
|
+
snapshot_start=snap_s,
|
386
|
+
snapshot_end=snap_e,
|
387
|
+
exit_code=exit_code,
|
388
|
+
)
|
372
389
|
|
373
|
-
def add_parameter_data(self, data: Any, source: Dict) -> int:
|
374
|
-
return self._add_parameter_data({"data": data}, source)
|
375
390
|
|
376
|
-
|
377
|
-
|
391
|
+
@dataclass
|
392
|
+
class StoreParameter:
|
393
|
+
id_: int
|
394
|
+
is_pending: bool
|
395
|
+
is_set: bool
|
396
|
+
data: Any
|
397
|
+
file: Dict
|
398
|
+
source: Dict
|
399
|
+
|
400
|
+
_encoders = {}
|
401
|
+
_decoders = {}
|
402
|
+
|
403
|
+
def encode(self, **kwargs) -> Dict:
|
404
|
+
"""Prepare store parameter data for the persistent store."""
|
405
|
+
if self.is_set:
|
406
|
+
if self.file:
|
407
|
+
return {"file": self.file}
|
408
|
+
else:
|
409
|
+
return self._encode(obj=self.data, **kwargs)
|
410
|
+
else:
|
411
|
+
return None
|
378
412
|
|
379
|
-
def
|
413
|
+
def _encode(
|
380
414
|
self,
|
381
415
|
obj: Any,
|
382
|
-
path: List = None,
|
416
|
+
path: Optional[List] = None,
|
383
417
|
type_lookup: Optional[Dict] = None,
|
384
418
|
**kwargs,
|
385
|
-
) ->
|
419
|
+
) -> Dict:
|
420
|
+
"""Recursive encoder."""
|
421
|
+
|
386
422
|
path = path or []
|
387
423
|
if type_lookup is None:
|
388
424
|
type_lookup = {
|
389
425
|
"tuples": [],
|
390
426
|
"sets": [],
|
391
|
-
**{k: [] for k in self.
|
427
|
+
**{k: [] for k in self._decoders.keys()},
|
392
428
|
}
|
393
429
|
|
394
430
|
if len(path) > 50:
|
395
431
|
raise RuntimeError("I'm in too deep!")
|
396
432
|
|
397
|
-
if
|
398
|
-
|
433
|
+
if any("ParameterValue" in i.__name__ for i in obj.__class__.__mro__):
|
434
|
+
# TODO: not nice; did this to avoid circular import of `ParameterValue`
|
435
|
+
encoded = self._encode(
|
399
436
|
obj=obj.to_dict(),
|
400
437
|
path=path,
|
401
438
|
type_lookup=type_lookup,
|
@@ -406,7 +443,7 @@ class PersistentStore(ABC):
|
|
406
443
|
elif isinstance(obj, (list, tuple, set)):
|
407
444
|
data = []
|
408
445
|
for idx, item in enumerate(obj):
|
409
|
-
encoded = self.
|
446
|
+
encoded = self._encode(
|
410
447
|
obj=item,
|
411
448
|
path=path + [idx],
|
412
449
|
type_lookup=type_lookup,
|
@@ -424,7 +461,7 @@ class PersistentStore(ABC):
|
|
424
461
|
elif isinstance(obj, dict):
|
425
462
|
data = {}
|
426
463
|
for dct_key, dct_val in obj.items():
|
427
|
-
encoded = self.
|
464
|
+
encoded = self._encode(
|
428
465
|
obj=dct_val,
|
429
466
|
path=path + [dct_key],
|
430
467
|
type_lookup=type_lookup,
|
@@ -436,8 +473,8 @@ class PersistentStore(ABC):
|
|
436
473
|
elif isinstance(obj, PRIMITIVES):
|
437
474
|
data = obj
|
438
475
|
|
439
|
-
elif type(obj) in self.
|
440
|
-
data = self.
|
476
|
+
elif type(obj) in self._encoders:
|
477
|
+
data = self._encoders[type(obj)](
|
441
478
|
obj=obj,
|
442
479
|
path=path,
|
443
480
|
type_lookup=type_lookup,
|
@@ -452,14 +489,35 @@ class PersistentStore(ABC):
|
|
452
489
|
|
453
490
|
return {"data": data, "type_lookup": type_lookup}
|
454
491
|
|
455
|
-
|
456
|
-
|
492
|
+
@classmethod
|
493
|
+
def decode(
|
494
|
+
cls,
|
495
|
+
id_: int,
|
457
496
|
data: Union[None, Dict],
|
497
|
+
source: Dict,
|
458
498
|
path: Optional[List[str]] = None,
|
459
499
|
**kwargs,
|
460
500
|
) -> Any:
|
461
|
-
|
462
|
-
|
501
|
+
"""Initialise from persistent store parameter data."""
|
502
|
+
if data and "file" in data:
|
503
|
+
return cls(
|
504
|
+
id_=id_,
|
505
|
+
data=None,
|
506
|
+
file=data["file"],
|
507
|
+
is_set=True,
|
508
|
+
source=source,
|
509
|
+
is_pending=False,
|
510
|
+
)
|
511
|
+
elif data is None:
|
512
|
+
# parameter is not set
|
513
|
+
return cls(
|
514
|
+
id_=id_,
|
515
|
+
data=None,
|
516
|
+
file=None,
|
517
|
+
is_set=False,
|
518
|
+
source=source,
|
519
|
+
is_pending=False,
|
520
|
+
)
|
463
521
|
|
464
522
|
path = path or []
|
465
523
|
|
@@ -485,177 +543,1001 @@ class PersistentStore(ABC):
|
|
485
543
|
else:
|
486
544
|
obj = set(obj)
|
487
545
|
|
488
|
-
for data_type in
|
489
|
-
obj =
|
546
|
+
for data_type in cls._decoders:
|
547
|
+
obj = cls._decoders[data_type](
|
490
548
|
obj=obj,
|
491
549
|
type_lookup=data["type_lookup"],
|
492
550
|
path=path,
|
493
551
|
**kwargs,
|
494
552
|
)
|
495
553
|
|
496
|
-
return
|
554
|
+
return cls(
|
555
|
+
id_=id_,
|
556
|
+
data=obj,
|
557
|
+
file=None,
|
558
|
+
is_set=True,
|
559
|
+
source=source,
|
560
|
+
is_pending=False,
|
561
|
+
)
|
497
562
|
|
498
|
-
def
|
499
|
-
"""
|
500
|
-
|
563
|
+
def set_data(self, value: Any) -> None:
|
564
|
+
"""Return a copy, with data set."""
|
565
|
+
if self.is_set:
|
566
|
+
raise RuntimeError(f"Parameter ID {self.id_!r} is already set!")
|
567
|
+
return self.__class__(
|
568
|
+
id_=self.id_,
|
569
|
+
is_set=True,
|
570
|
+
is_pending=self.is_pending,
|
571
|
+
data=value,
|
572
|
+
file=None,
|
573
|
+
source=self.source,
|
574
|
+
)
|
501
575
|
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
576
|
+
def set_file(self, value: Any) -> None:
|
577
|
+
"""Return a copy, with file set."""
|
578
|
+
if self.is_set:
|
579
|
+
raise RuntimeError(f"Parameter ID {self.id_!r} is already set!")
|
580
|
+
return self.__class__(
|
581
|
+
id_=self.id_,
|
582
|
+
is_set=True,
|
583
|
+
is_pending=self.is_pending,
|
584
|
+
data=None,
|
585
|
+
file=value,
|
586
|
+
source=self.source,
|
587
|
+
)
|
588
|
+
|
589
|
+
def update_source(self, src: Dict) -> None:
|
590
|
+
"""Return a copy, with updated source."""
|
591
|
+
new_src = dict(sorted({**self.source, **src}.items()))
|
592
|
+
return self.__class__(
|
593
|
+
id_=self.id_,
|
594
|
+
is_set=self.is_set,
|
595
|
+
is_pending=self.is_pending,
|
596
|
+
data=self.data,
|
597
|
+
file=self.file,
|
598
|
+
source=new_src,
|
599
|
+
)
|
600
|
+
|
601
|
+
|
602
|
+
class PersistentStore(ABC):
|
603
|
+
_store_task_cls = StoreTask
|
604
|
+
_store_elem_cls = StoreElement
|
605
|
+
_store_iter_cls = StoreElementIter
|
606
|
+
_store_EAR_cls = StoreEAR
|
607
|
+
_store_param_cls = StoreParameter
|
608
|
+
|
609
|
+
_resources = {}
|
610
|
+
|
611
|
+
def __init__(self, app, workflow, path, fs=None) -> None:
|
612
|
+
self.app = app
|
613
|
+
self.workflow = workflow
|
614
|
+
self.path = path
|
615
|
+
self.fs = fs
|
616
|
+
|
617
|
+
self._pending = PendingChanges(app=app, store=self, resource_map=self._res_map)
|
618
|
+
|
619
|
+
self._resources_in_use = set()
|
620
|
+
self._in_batch_mode = False
|
507
621
|
|
508
622
|
@property
|
509
|
-
|
510
|
-
|
511
|
-
"""Get the store path, which may be the same as the workflow path."""
|
512
|
-
pass
|
623
|
+
def logger(self):
|
624
|
+
return self.app.persistence_logger
|
513
625
|
|
514
|
-
@
|
515
|
-
|
516
|
-
|
517
|
-
cls,
|
518
|
-
template_js: Dict,
|
519
|
-
template_components_js: Dict,
|
520
|
-
workflow_path: Path,
|
521
|
-
replaced_dir: Path,
|
522
|
-
creation_info: Dict,
|
523
|
-
) -> None:
|
524
|
-
pass
|
626
|
+
@property
|
627
|
+
def ts_fmt(self) -> str:
|
628
|
+
return r"%Y-%m-%d %H:%M:%S.%f" # TODO: self.workflow.ts_fmt
|
525
629
|
|
526
|
-
@
|
527
|
-
def
|
528
|
-
|
630
|
+
@property
|
631
|
+
def has_pending(self):
|
632
|
+
return bool(self._pending)
|
529
633
|
|
530
|
-
@
|
531
|
-
def
|
532
|
-
|
634
|
+
@staticmethod
|
635
|
+
def prepare_test_store_from_spec(task_spec):
|
636
|
+
"""Generate a valid store from a specification in terms of nested
|
637
|
+
elements/iterations/EARs.
|
533
638
|
|
534
|
-
|
535
|
-
|
536
|
-
|
639
|
+
"""
|
640
|
+
tasks = []
|
641
|
+
elements = []
|
642
|
+
elem_iters = []
|
643
|
+
EARs = []
|
644
|
+
|
645
|
+
for task_idx, task_i in enumerate(task_spec):
|
646
|
+
elems_i = task_i.get("elements", [])
|
647
|
+
elem_IDs = list(range(len(elements), len(elements) + len(elems_i)))
|
648
|
+
|
649
|
+
for elem_idx, elem_j in enumerate(elems_i):
|
650
|
+
iters_j = elem_j.get("iterations", [])
|
651
|
+
iter_IDs = list(range(len(elem_iters), len(elem_iters) + len(iters_j)))
|
652
|
+
|
653
|
+
for iter_k in iters_j:
|
654
|
+
EARs_k = iter_k.get("EARs", [])
|
655
|
+
EAR_IDs = list(range(len(EARs), len(EARs) + len(EARs_k)))
|
656
|
+
EAR_IDs_dct = {0: EAR_IDs} if EAR_IDs else {}
|
657
|
+
|
658
|
+
for _ in EARs_k:
|
659
|
+
EARs.append(
|
660
|
+
dict(
|
661
|
+
id_=len(EARs),
|
662
|
+
is_pending=False,
|
663
|
+
elem_iter_ID=len(elem_iters),
|
664
|
+
action_idx=0,
|
665
|
+
data_idx={},
|
666
|
+
metadata={},
|
667
|
+
)
|
668
|
+
)
|
669
|
+
|
670
|
+
elem_iters.append(
|
671
|
+
dict(
|
672
|
+
id_=len(elem_iters),
|
673
|
+
is_pending=False,
|
674
|
+
element_ID=len(elements),
|
675
|
+
EAR_IDs=EAR_IDs_dct,
|
676
|
+
data_idx={},
|
677
|
+
schema_parameters=[],
|
678
|
+
)
|
679
|
+
)
|
680
|
+
elements.append(
|
681
|
+
dict(
|
682
|
+
id_=len(elements),
|
683
|
+
is_pending=False,
|
684
|
+
element_idx=elem_idx,
|
685
|
+
seq_idx={},
|
686
|
+
src_idx={},
|
687
|
+
task_ID=task_idx,
|
688
|
+
iteration_IDs=iter_IDs,
|
689
|
+
)
|
690
|
+
)
|
691
|
+
tasks.append(
|
692
|
+
dict(
|
693
|
+
id_=len(tasks),
|
694
|
+
is_pending=False,
|
695
|
+
element_IDs=elem_IDs,
|
696
|
+
)
|
697
|
+
)
|
698
|
+
return (tasks, elements, elem_iters, EARs)
|
537
699
|
|
538
|
-
|
539
|
-
|
540
|
-
pass
|
700
|
+
def remove_path(self, path: str, fs) -> None:
|
701
|
+
"""Try very hard to delete a directory or file.
|
541
702
|
|
542
|
-
|
543
|
-
|
544
|
-
|
703
|
+
Dropbox (on Windows, at least) seems to try to re-sync files if the parent directory
|
704
|
+
is deleted soon after creation, which is the case on a failed workflow creation (e.g.
|
705
|
+
missing inputs), so in addition to catching PermissionErrors generated when
|
706
|
+
Dropbox has a lock on files, we repeatedly try deleting the directory tree.
|
545
707
|
|
546
|
-
|
547
|
-
def get_loops(self) -> List[Dict]:
|
548
|
-
pass
|
708
|
+
"""
|
549
709
|
|
550
|
-
|
551
|
-
|
552
|
-
|
710
|
+
@self.app.perm_error_retry()
|
711
|
+
def _remove_path(path: str, fs) -> None:
|
712
|
+
self.logger.debug(f"_remove_path: path={path}")
|
713
|
+
while fs.exists(path):
|
714
|
+
fs.rm(path, recursive=True)
|
715
|
+
time.sleep(0.5)
|
553
716
|
|
554
|
-
|
555
|
-
def get_task_elements(
|
556
|
-
self,
|
557
|
-
task_idx: int,
|
558
|
-
task_insert_ID: int,
|
559
|
-
selection: slice,
|
560
|
-
) -> List:
|
561
|
-
pass
|
562
|
-
|
563
|
-
@abstractmethod
|
564
|
-
def _add_parameter_data(self, data: Any, source: Dict) -> int:
|
565
|
-
pass
|
566
|
-
|
567
|
-
@abstractmethod
|
568
|
-
def get_parameter_data(self, index: int) -> Tuple[bool, Any]:
|
569
|
-
pass
|
570
|
-
|
571
|
-
@abstractmethod
|
572
|
-
def get_parameter_source(self, index: int) -> Dict:
|
573
|
-
pass
|
574
|
-
|
575
|
-
@abstractmethod
|
576
|
-
def get_all_parameter_data(self) -> Dict[int, Any]:
|
577
|
-
pass
|
578
|
-
|
579
|
-
@abstractmethod
|
580
|
-
def is_parameter_set(self, index: int) -> bool:
|
581
|
-
pass
|
582
|
-
|
583
|
-
@abstractmethod
|
584
|
-
def set_parameter(self, index: int, data: Any) -> None:
|
585
|
-
"""Set the value of a pre-allocated parameter."""
|
586
|
-
pass
|
587
|
-
|
588
|
-
@abstractmethod
|
589
|
-
def check_parameters_exist(
|
590
|
-
self, indices: Union[int, List[int]]
|
591
|
-
) -> Union[bool, List[bool]]:
|
592
|
-
pass
|
593
|
-
|
594
|
-
@abstractmethod
|
595
|
-
def _init_task_loop(
|
596
|
-
self,
|
597
|
-
task_idx: int,
|
598
|
-
task_insert_ID: int,
|
599
|
-
element_sel: slice,
|
600
|
-
name: str,
|
601
|
-
) -> None:
|
602
|
-
"""Initialise the zeroth iteration of a named loop for a specified task."""
|
717
|
+
return _remove_path(path, fs)
|
603
718
|
|
604
|
-
|
605
|
-
|
606
|
-
pass
|
719
|
+
def rename_path(self, replaced: str, original: str, fs) -> None:
|
720
|
+
"""Revert the replaced workflow path to its original name.
|
607
721
|
|
608
|
-
|
609
|
-
|
610
|
-
pass
|
722
|
+
This happens when new workflow creation fails and there is an existing workflow
|
723
|
+
with the same name; the original workflow which was renamed, must be reverted."""
|
611
724
|
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
725
|
+
@self.app.perm_error_retry()
|
726
|
+
def _rename_path(replaced: str, original: str, fs) -> None:
|
727
|
+
self.logger.debug(f"_rename_path: {replaced!r} --> {original!r}.")
|
728
|
+
try:
|
729
|
+
fs.rename(replaced, original, recursive=True) # TODO: why need recursive?
|
730
|
+
except TypeError:
|
731
|
+
# `SFTPFileSystem.rename` has no `recursive` argument:
|
732
|
+
fs.rename(replaced, original)
|
616
733
|
|
617
|
-
|
618
|
-
def is_modified_on_disk(self) -> bool:
|
619
|
-
"""Check if the workflow (metadata) has been modified on disk since initial
|
620
|
-
load (this is bad)."""
|
621
|
-
pass
|
734
|
+
return _rename_path(replaced, original, fs)
|
622
735
|
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
any of those tasks were subsequently removed from the workflow."""
|
736
|
+
def _get_num_total_tasks(self):
|
737
|
+
"""Get the total number of persistent and pending tasks."""
|
738
|
+
return self._get_num_persistent_tasks() + len(self._pending.add_tasks)
|
627
739
|
|
628
|
-
|
629
|
-
|
630
|
-
return self.
|
740
|
+
def _get_num_total_loops(self):
|
741
|
+
"""Get the total number of persistent and pending loops."""
|
742
|
+
return self._get_num_persistent_loops() + len(self._pending.add_loops)
|
743
|
+
|
744
|
+
def _get_num_total_submissions(self):
|
745
|
+
"""Get the total number of persistent and pending submissions."""
|
746
|
+
return self._get_num_persistent_submissions() + len(self._pending.add_submissions)
|
747
|
+
|
748
|
+
def _get_num_total_elements(self):
|
749
|
+
"""Get the total number of persistent and pending elements."""
|
750
|
+
return self._get_num_persistent_elements() + len(self._pending.add_elements)
|
751
|
+
|
752
|
+
def _get_num_total_elem_iters(self):
|
753
|
+
"""Get the total number of persistent and pending element iterations."""
|
754
|
+
return self._get_num_persistent_elem_iters() + len(self._pending.add_elem_iters)
|
755
|
+
|
756
|
+
def _get_num_total_EARs(self):
|
757
|
+
"""Get the total number of persistent and pending EARs."""
|
758
|
+
return self._get_num_persistent_EARs() + len(self._pending.add_EARs)
|
759
|
+
|
760
|
+
def _get_task_total_num_elements(self, task_ID: int):
|
761
|
+
"""Get the total number of persistent and pending elements of a given task."""
|
762
|
+
return len(self.get_task(task_ID).element_IDs)
|
763
|
+
|
764
|
+
def _get_num_total_parameters(self):
|
765
|
+
"""Get the total number of persistent and pending parameters."""
|
766
|
+
return self._get_num_persistent_parameters() + len(self._pending.add_parameters)
|
767
|
+
|
768
|
+
def _get_num_total_input_files(self):
|
769
|
+
"""Get the total number of persistent and pending user-supplied input files."""
|
770
|
+
num_pend_inp_files = len([i for i in self._pending.add_files if i["is_input"]])
|
771
|
+
return self._get_num_persistent_input_files() + num_pend_inp_files
|
631
772
|
|
632
|
-
def
|
773
|
+
def _get_num_total_added_tasks(self):
|
774
|
+
"""Get the total number of tasks ever added to the workflow."""
|
775
|
+
return self._get_num_persistent_added_tasks() + len(self._pending.add_tasks)
|
776
|
+
|
777
|
+
def _get_num_persistent_input_files(self):
|
778
|
+
return len(list(self.workflow.input_files_path.glob("*")))
|
779
|
+
|
780
|
+
def save(self):
|
781
|
+
"""Commit pending changes to disk, if not in batch-update mode."""
|
782
|
+
if not self.workflow._in_batch_mode:
|
783
|
+
self._pending.commit_all()
|
784
|
+
|
785
|
+
def add_template_components(self, temp_comps: Dict, save: bool = True) -> None:
|
786
|
+
all_tc = self.get_template_components()
|
787
|
+
for name, dat in temp_comps.items():
|
788
|
+
if name in all_tc:
|
789
|
+
for hash_i, dat_i in dat.items():
|
790
|
+
if hash_i not in all_tc[name]:
|
791
|
+
self._pending.add_template_components[name][hash_i] = dat_i
|
792
|
+
else:
|
793
|
+
self._pending.add_template_components[name] = dat
|
794
|
+
|
795
|
+
if save:
|
796
|
+
self.save()
|
797
|
+
|
798
|
+
def add_task(self, idx: int, task_template: Dict, save: bool = True):
|
799
|
+
"""Add a new task to the workflow."""
|
800
|
+
self.logger.debug(f"Adding store task.")
|
801
|
+
new_ID = self._get_num_total_added_tasks()
|
802
|
+
self._pending.add_tasks[new_ID] = self._store_task_cls(
|
803
|
+
id_=new_ID,
|
804
|
+
index=idx,
|
805
|
+
task_template=task_template,
|
806
|
+
is_pending=True,
|
807
|
+
element_IDs=[],
|
808
|
+
)
|
809
|
+
if save:
|
810
|
+
self.save()
|
811
|
+
return new_ID
|
812
|
+
|
813
|
+
def add_loop(
|
633
814
|
self,
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
815
|
+
loop_template: Dict,
|
816
|
+
iterable_parameters,
|
817
|
+
iter_IDs: List[int],
|
818
|
+
save: bool = True,
|
819
|
+
):
|
820
|
+
"""Add a new loop to the workflow."""
|
821
|
+
self.logger.debug(f"Adding store loop.")
|
822
|
+
new_idx = self._get_num_total_loops()
|
823
|
+
self._pending.add_loops[new_idx] = {
|
824
|
+
"loop_template": loop_template,
|
825
|
+
"iterable_parameters": iterable_parameters,
|
826
|
+
}
|
640
827
|
|
641
|
-
|
828
|
+
for i in iter_IDs:
|
829
|
+
self._pending.update_loop_indices[i] = {loop_template["name"]: 0}
|
830
|
+
|
831
|
+
if save:
|
832
|
+
self.save()
|
833
|
+
|
834
|
+
def add_submission(self, sub_idx: int, sub_js: Dict, save: bool = True):
|
835
|
+
"""Add a new submission."""
|
836
|
+
self.logger.debug(f"Adding store submission.")
|
837
|
+
self._pending.add_submissions[sub_idx] = sub_js
|
838
|
+
if save:
|
839
|
+
self.save()
|
840
|
+
|
841
|
+
def add_element_set(self, task_id: int, es_js: Dict, save: bool = True):
|
842
|
+
self._pending.add_element_sets[task_id].append(es_js)
|
843
|
+
if save:
|
844
|
+
self.save()
|
845
|
+
|
846
|
+
def add_element(
|
847
|
+
self, task_ID: int, es_idx: int, seq_idx: Dict, src_idx: Dict, save: bool = True
|
848
|
+
):
|
849
|
+
"""Add a new element to a task."""
|
850
|
+
self.logger.debug(f"Adding store element.")
|
851
|
+
new_ID = self._get_num_total_elements()
|
852
|
+
new_elem_idx = self._get_task_total_num_elements(task_ID)
|
853
|
+
self._pending.add_elements[new_ID] = self._store_elem_cls(
|
854
|
+
id_=new_ID,
|
855
|
+
is_pending=True,
|
856
|
+
index=new_elem_idx,
|
857
|
+
es_idx=es_idx,
|
858
|
+
seq_idx=seq_idx,
|
859
|
+
src_idx=src_idx,
|
860
|
+
task_ID=task_ID,
|
861
|
+
iteration_IDs=[],
|
862
|
+
)
|
863
|
+
self._pending.add_elem_IDs[task_ID].append(new_ID)
|
864
|
+
if save:
|
865
|
+
self.save()
|
866
|
+
return new_ID
|
867
|
+
|
868
|
+
def add_element_iteration(
|
869
|
+
self,
|
870
|
+
element_ID: int,
|
871
|
+
data_idx: Dict,
|
872
|
+
schema_parameters: List[str],
|
873
|
+
loop_idx: Optional[Dict] = None,
|
874
|
+
save: bool = True,
|
875
|
+
) -> int:
|
876
|
+
"""Add a new iteration to an element."""
|
877
|
+
self.logger.debug(f"Adding store element-iteration.")
|
878
|
+
new_ID = self._get_num_total_elem_iters()
|
879
|
+
self._pending.add_elem_iters[new_ID] = self._store_iter_cls(
|
880
|
+
id_=new_ID,
|
881
|
+
element_ID=element_ID,
|
882
|
+
is_pending=True,
|
883
|
+
EAR_IDs=None,
|
884
|
+
data_idx=data_idx,
|
885
|
+
schema_parameters=schema_parameters,
|
886
|
+
loop_idx=loop_idx or {},
|
887
|
+
)
|
888
|
+
self._pending.add_elem_iter_IDs[element_ID].append(new_ID)
|
889
|
+
if save:
|
890
|
+
self.save()
|
891
|
+
return new_ID
|
892
|
+
|
893
|
+
def add_EAR(
|
642
894
|
self,
|
643
|
-
|
644
|
-
element_iteration_idx: int,
|
895
|
+
elem_iter_ID: int,
|
645
896
|
action_idx: int,
|
646
|
-
|
897
|
+
data_idx: Dict,
|
898
|
+
metadata: Dict,
|
899
|
+
save: bool = True,
|
900
|
+
) -> int:
|
901
|
+
"""Add a new EAR to an element iteration."""
|
902
|
+
self.logger.debug(f"Adding store EAR.")
|
903
|
+
new_ID = self._get_num_total_EARs()
|
904
|
+
self._pending.add_EARs[new_ID] = self._store_EAR_cls(
|
905
|
+
id_=new_ID,
|
906
|
+
is_pending=True,
|
907
|
+
elem_iter_ID=elem_iter_ID,
|
908
|
+
action_idx=action_idx,
|
909
|
+
data_idx=data_idx,
|
910
|
+
metadata=metadata,
|
911
|
+
)
|
912
|
+
self._pending.add_elem_iter_EAR_IDs[elem_iter_ID][action_idx].append(new_ID)
|
913
|
+
if save:
|
914
|
+
self.save()
|
915
|
+
return new_ID
|
916
|
+
|
917
|
+
def add_submission_attempt(
|
918
|
+
self, sub_idx: int, submitted_js_idx: List[int], save: bool = True
|
919
|
+
):
|
920
|
+
self._pending.add_submission_attempts[sub_idx] = submitted_js_idx
|
921
|
+
if save:
|
922
|
+
self.save()
|
923
|
+
|
924
|
+
def set_EAR_submission_index(
|
925
|
+
self, EAR_ID: int, sub_idx: int, save: bool = True
|
647
926
|
) -> None:
|
648
|
-
|
649
|
-
|
650
|
-
|
927
|
+
self._pending.set_EAR_submission_indices[EAR_ID] = sub_idx
|
928
|
+
if save:
|
929
|
+
self.save()
|
930
|
+
|
931
|
+
def set_EAR_start(self, EAR_ID: int, save: bool = True) -> datetime:
|
932
|
+
dt = datetime.utcnow()
|
933
|
+
snapshot = JSONLikeDirSnapShot()
|
934
|
+
snapshot.take(".")
|
935
|
+
ss_js = snapshot.to_json_like()
|
936
|
+
self._pending.set_EAR_starts[EAR_ID] = (dt, ss_js)
|
937
|
+
if save:
|
938
|
+
self.save()
|
939
|
+
return dt
|
651
940
|
|
652
941
|
def set_EAR_end(
|
942
|
+
self, EAR_ID: int, exit_code: int, success: bool, save: bool = True
|
943
|
+
) -> datetime:
|
944
|
+
# TODO: save output files
|
945
|
+
dt = datetime.utcnow()
|
946
|
+
snapshot = JSONLikeDirSnapShot()
|
947
|
+
snapshot.take(".")
|
948
|
+
ss_js = snapshot.to_json_like()
|
949
|
+
self._pending.set_EAR_ends[EAR_ID] = (dt, ss_js, exit_code, success)
|
950
|
+
if save:
|
951
|
+
self.save()
|
952
|
+
return dt
|
953
|
+
|
954
|
+
def set_EAR_skip(self, EAR_ID: int, save: bool = True) -> None:
|
955
|
+
self._pending.set_EAR_skips.append(EAR_ID)
|
956
|
+
if save:
|
957
|
+
self.save()
|
958
|
+
|
959
|
+
def set_jobscript_version_info(
|
960
|
+
self, sub_idx: int, js_idx: int, vers_info: Dict, save: bool = True
|
961
|
+
):
|
962
|
+
self._pending.set_jobscript_version_info[sub_idx][js_idx] = vers_info
|
963
|
+
if save:
|
964
|
+
self.save()
|
965
|
+
|
966
|
+
def set_jobscript_submit_time(
|
967
|
+
self, sub_idx: int, js_idx: int, submit_time: datetime, save: bool = True
|
968
|
+
):
|
969
|
+
self._pending.set_jobscript_submit_time[sub_idx][js_idx] = submit_time
|
970
|
+
if save:
|
971
|
+
self.save()
|
972
|
+
|
973
|
+
def set_jobscript_job_ID(
|
974
|
+
self, sub_idx: int, js_idx: int, job_ID: str, save: bool = True
|
975
|
+
):
|
976
|
+
self._pending.set_jobscript_job_ID[sub_idx][js_idx] = job_ID
|
977
|
+
if save:
|
978
|
+
self.save()
|
979
|
+
|
980
|
+
def _add_parameter(
|
653
981
|
self,
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
982
|
+
is_set: bool,
|
983
|
+
source: Dict,
|
984
|
+
data: Any = None,
|
985
|
+
file: Dict = None,
|
986
|
+
save: bool = True,
|
987
|
+
) -> int:
|
988
|
+
self.logger.debug(f"Adding store parameter{f' (unset)' if data is None else ''}.")
|
989
|
+
new_idx = self._get_num_total_parameters()
|
990
|
+
self._pending.add_parameters[new_idx] = self._store_param_cls(
|
991
|
+
id_=new_idx,
|
992
|
+
is_pending=True,
|
993
|
+
is_set=is_set,
|
994
|
+
data=data,
|
995
|
+
file=file,
|
996
|
+
source=source,
|
997
|
+
)
|
998
|
+
if save:
|
999
|
+
self.save()
|
1000
|
+
return new_idx
|
1001
|
+
|
1002
|
+
def _prepare_set_file(
|
1003
|
+
self,
|
1004
|
+
store_contents: bool,
|
1005
|
+
is_input: bool,
|
1006
|
+
path=None,
|
1007
|
+
contents: str = None,
|
1008
|
+
filename: str = None,
|
1009
|
+
):
|
1010
|
+
if filename is None:
|
1011
|
+
filename = Path(path).name
|
1012
|
+
|
1013
|
+
if store_contents:
|
1014
|
+
if is_input:
|
1015
|
+
new_idx = self._get_num_total_input_files()
|
1016
|
+
dst_dir = Path(self.workflow.input_files_path, str(new_idx))
|
1017
|
+
dst_path = dst_dir / filename
|
1018
|
+
else:
|
1019
|
+
# assume path is inside the EAR execution directory; transform that to the
|
1020
|
+
# equivalent artifacts directory:
|
1021
|
+
assert path is not None
|
1022
|
+
exec_sub_path = Path(path).relative_to(self.path)
|
1023
|
+
dst_path = Path(
|
1024
|
+
self.workflow.task_artifacts_path, *exec_sub_path.parts[1:]
|
1025
|
+
)
|
1026
|
+
if dst_path.is_file():
|
1027
|
+
dst_path = dst_path.with_suffix(dst_path.suffix + "_2") # TODO: better!
|
1028
|
+
else:
|
1029
|
+
dst_path = path
|
1030
|
+
|
1031
|
+
file_param_dat = {
|
1032
|
+
"store_contents": store_contents,
|
1033
|
+
"path": str(dst_path.relative_to(self.path)),
|
1034
|
+
}
|
1035
|
+
self._pending.add_files.append(
|
1036
|
+
{
|
1037
|
+
"store_contents": store_contents,
|
1038
|
+
"is_input": is_input,
|
1039
|
+
"dst_path": str(dst_path),
|
1040
|
+
"path": str(path),
|
1041
|
+
"contents": contents,
|
1042
|
+
}
|
1043
|
+
)
|
1044
|
+
|
1045
|
+
return file_param_dat
|
1046
|
+
|
1047
|
+
def set_file(
|
1048
|
+
self,
|
1049
|
+
param_id: int,
|
1050
|
+
store_contents: bool,
|
1051
|
+
is_input: bool,
|
1052
|
+
path=None,
|
1053
|
+
contents: str = None,
|
1054
|
+
filename: str = None,
|
1055
|
+
save: bool = True,
|
1056
|
+
):
|
1057
|
+
self.logger.debug(f"Setting new file")
|
1058
|
+
file_param_dat = self._prepare_set_file(
|
1059
|
+
store_contents=store_contents,
|
1060
|
+
is_input=is_input,
|
1061
|
+
path=path,
|
1062
|
+
contents=contents,
|
1063
|
+
filename=filename,
|
1064
|
+
)
|
1065
|
+
self.set_parameter_value(param_id, value=file_param_dat, is_file=True, save=save)
|
1066
|
+
if save:
|
1067
|
+
self.save()
|
1068
|
+
|
1069
|
+
def add_file(
|
1070
|
+
self,
|
1071
|
+
store_contents: bool,
|
1072
|
+
is_input: bool,
|
1073
|
+
source: Dict,
|
1074
|
+
path=None,
|
1075
|
+
contents: str = None,
|
1076
|
+
filename: str = None,
|
1077
|
+
save: bool = True,
|
1078
|
+
):
|
1079
|
+
self.logger.debug(f"Adding new file")
|
1080
|
+
file_param_dat = self._prepare_set_file(
|
1081
|
+
store_contents=store_contents,
|
1082
|
+
is_input=is_input,
|
1083
|
+
path=path,
|
1084
|
+
contents=contents,
|
1085
|
+
filename=filename,
|
1086
|
+
)
|
1087
|
+
p_id = self._add_parameter(
|
1088
|
+
file=file_param_dat,
|
1089
|
+
is_set=True,
|
1090
|
+
source=source,
|
1091
|
+
save=save,
|
1092
|
+
)
|
1093
|
+
if save:
|
1094
|
+
self.save()
|
1095
|
+
return p_id
|
1096
|
+
|
1097
|
+
def _append_files(self, files: Dict[int, Dict]):
|
1098
|
+
"""Add new files to the files or artifacts directories."""
|
1099
|
+
for dat in files:
|
1100
|
+
if dat["store_contents"]:
|
1101
|
+
dst_path = Path(dat["dst_path"])
|
1102
|
+
dst_path.parent.mkdir(parents=True, exist_ok=True)
|
1103
|
+
if dat["path"] is not None:
|
1104
|
+
# copy from source path to destination:
|
1105
|
+
shutil.copy(dat["path"], dst_path)
|
1106
|
+
else:
|
1107
|
+
# write out text file:
|
1108
|
+
with dst_path.open("wt") as fp:
|
1109
|
+
fp.write(dat["contents"])
|
1110
|
+
|
1111
|
+
def add_set_parameter(self, data: Any, source: Dict, save: bool = True) -> int:
|
1112
|
+
return self._add_parameter(data=data, is_set=True, source=source, save=save)
|
1113
|
+
|
1114
|
+
def add_unset_parameter(self, source: Dict, save: bool = True) -> int:
|
1115
|
+
return self._add_parameter(data=None, is_set=False, source=source, save=save)
|
1116
|
+
|
1117
|
+
def set_parameter_value(
|
1118
|
+
self, param_id: int, value: Any, is_file: bool = False, save: bool = True
|
1119
|
+
):
|
1120
|
+
self.logger.debug(f"Setting store parameter ID {param_id} value to {value!r}.")
|
1121
|
+
self._pending.set_parameters[param_id] = (value, is_file)
|
1122
|
+
if save:
|
1123
|
+
self.save()
|
1124
|
+
|
1125
|
+
def update_param_source(self, param_id: int, source: Dict, save: bool = True) -> None:
|
1126
|
+
self.logger.debug(f"Updating parameter ID {param_id!r} source to {source!r}.")
|
1127
|
+
self._pending.update_param_sources[param_id] = source
|
1128
|
+
if save:
|
1129
|
+
self.save()
|
1130
|
+
|
1131
|
+
def update_loop_num_iters(
|
1132
|
+
self, index: int, num_iters: int, save: bool = True
|
658
1133
|
) -> None:
|
659
|
-
|
660
|
-
|
661
|
-
|
1134
|
+
self.logger.debug(
|
1135
|
+
f"Updating loop {index!r} num added iterations to {num_iters!r}."
|
1136
|
+
)
|
1137
|
+
self._pending.update_loop_num_iters[index] = num_iters
|
1138
|
+
if save:
|
1139
|
+
self.save()
|
1140
|
+
|
1141
|
+
def get_template_components(self) -> Dict:
|
1142
|
+
"""Get all template components, including pending."""
|
1143
|
+
tc = copy.deepcopy(self._get_persistent_template_components())
|
1144
|
+
for typ in TEMPLATE_COMP_TYPES:
|
1145
|
+
for hash_i, dat_i in self._pending.add_template_components[typ].items():
|
1146
|
+
if typ not in tc:
|
1147
|
+
tc[typ] = {}
|
1148
|
+
tc[typ][hash_i] = dat_i
|
1149
|
+
|
1150
|
+
return tc
|
1151
|
+
|
1152
|
+
def get_template(self) -> Dict:
|
1153
|
+
return self._get_persistent_template()
|
1154
|
+
|
1155
|
+
def _get_task_id_to_idx_map(self) -> Dict[int, int]:
|
1156
|
+
return {i.id_: i.index for i in self.get_tasks()}
|
1157
|
+
|
1158
|
+
def get_task(self, task_idx: int) -> AnySTask:
|
1159
|
+
return self.get_tasks()[task_idx]
|
1160
|
+
|
1161
|
+
def _process_retrieved_tasks(self, tasks: List[AnySTask]) -> List[AnySTask]:
|
1162
|
+
"""Add pending data to retrieved tasks."""
|
1163
|
+
tasks_new = []
|
1164
|
+
for task_i in tasks:
|
1165
|
+
# consider pending element IDs:
|
1166
|
+
pend_elems = self._pending.add_elem_IDs.get(task_i.id_)
|
1167
|
+
if pend_elems:
|
1168
|
+
task_i = task_i.append_element_IDs(pend_elems)
|
1169
|
+
tasks_new.append(task_i)
|
1170
|
+
return tasks_new
|
1171
|
+
|
1172
|
+
def _process_retrieved_loops(self, loops: Dict[int, Dict]) -> Dict[int, Dict]:
|
1173
|
+
"""Add pending data to retrieved loops."""
|
1174
|
+
loops_new = {}
|
1175
|
+
for id_, loop_i in loops.items():
|
1176
|
+
if "num_added_iterations" not in loop_i:
|
1177
|
+
loop_i["num_added_iterations"] = 1
|
1178
|
+
# consider pending changes to num added iterations:
|
1179
|
+
pend_num_iters = self._pending.update_loop_num_iters.get(id_)
|
1180
|
+
if pend_num_iters:
|
1181
|
+
loop_i["num_added_iterations"] = pend_num_iters
|
1182
|
+
loops_new[id_] = loop_i
|
1183
|
+
return loops_new
|
1184
|
+
|
1185
|
+
def get_tasks_by_IDs(self, id_lst: Iterable[int]) -> List[AnySTask]:
|
1186
|
+
# separate pending and persistent IDs:
|
1187
|
+
id_set = set(id_lst)
|
1188
|
+
all_pending = set(self._pending.add_tasks)
|
1189
|
+
id_pers = id_set.difference(all_pending)
|
1190
|
+
id_pend = id_set.intersection(all_pending)
|
1191
|
+
|
1192
|
+
tasks = self._get_persistent_tasks(id_pers) if id_pers else {}
|
1193
|
+
tasks.update({i: self._pending.add_tasks[i] for i in id_pend})
|
1194
|
+
|
1195
|
+
# order as requested:
|
1196
|
+
tasks = [tasks[id_] for id_ in id_lst]
|
1197
|
+
|
1198
|
+
return self._process_retrieved_tasks(tasks)
|
1199
|
+
|
1200
|
+
def get_tasks(self) -> List[AnySTask]:
|
1201
|
+
"""Retrieve all tasks, including pending."""
|
1202
|
+
|
1203
|
+
tasks = self._get_persistent_tasks()
|
1204
|
+
tasks.update({k: v for k, v in self._pending.add_tasks.items()})
|
1205
|
+
|
1206
|
+
# order by index:
|
1207
|
+
tasks = sorted((i for i in tasks.values()), key=lambda x: x.index)
|
1208
|
+
|
1209
|
+
return self._process_retrieved_tasks(tasks)
|
1210
|
+
|
1211
|
+
def get_loops_by_IDs(self, id_lst: Iterable[int]) -> Dict[int, Dict]:
|
1212
|
+
"""Retrieve loops by index (ID), including pending."""
|
1213
|
+
|
1214
|
+
# separate pending and persistent IDs:
|
1215
|
+
id_set = set(id_lst)
|
1216
|
+
all_pending = set(self._pending.add_loops)
|
1217
|
+
id_pers = id_set.difference(all_pending)
|
1218
|
+
id_pend = id_set.intersection(all_pending)
|
1219
|
+
|
1220
|
+
loops = self._get_persistent_loops(id_pers) if id_pers else {}
|
1221
|
+
loops.update({i: self._pending.add_loops[i] for i in id_pend})
|
1222
|
+
|
1223
|
+
# order as requested:
|
1224
|
+
loops = {id_: loops[id_] for id_ in id_lst}
|
1225
|
+
|
1226
|
+
return self._process_retrieved_loops(loops)
|
1227
|
+
|
1228
|
+
def get_loops(self) -> Dict[int, Dict]:
|
1229
|
+
"""Retrieve all loops, including pending."""
|
1230
|
+
|
1231
|
+
loops = self._get_persistent_loops()
|
1232
|
+
loops.update({k: v for k, v in self._pending.add_loops.items()})
|
1233
|
+
|
1234
|
+
# order by index/ID:
|
1235
|
+
loops = dict(sorted(loops.items()))
|
1236
|
+
|
1237
|
+
return self._process_retrieved_loops(loops)
|
1238
|
+
|
1239
|
+
def get_submissions(self) -> Dict[int, Dict]:
|
1240
|
+
"""Retrieve all submissions, including pending."""
|
1241
|
+
|
1242
|
+
subs = self._get_persistent_submissions()
|
1243
|
+
subs.update({k: v for k, v in self._pending.add_submissions.items()})
|
1244
|
+
|
1245
|
+
# order by index/ID
|
1246
|
+
subs = dict(sorted(subs.items()))
|
1247
|
+
|
1248
|
+
return subs
|
1249
|
+
|
1250
|
+
def get_submissions_by_ID(self, id_lst: Iterable[int]) -> Dict[int, Dict]:
|
1251
|
+
# separate pending and persistent IDs:
|
1252
|
+
id_set = set(id_lst)
|
1253
|
+
all_pending = set(self._pending.add_submissions)
|
1254
|
+
id_pers = id_set.difference(all_pending)
|
1255
|
+
id_pend = id_set.intersection(all_pending)
|
1256
|
+
|
1257
|
+
subs = self._get_persistent_submissions(id_pers) if id_pers else {}
|
1258
|
+
subs.update({i: self._pending.add_submissions[i] for i in id_pend})
|
1259
|
+
|
1260
|
+
# order by index/ID
|
1261
|
+
subs = dict(sorted(subs.items()))
|
1262
|
+
|
1263
|
+
return subs
|
1264
|
+
|
1265
|
+
def get_elements(self, id_lst: Iterable[int]) -> List[AnySElement]:
|
1266
|
+
# separate pending and persistent IDs:
|
1267
|
+
id_set = set(id_lst)
|
1268
|
+
all_pending = set(self._pending.add_elements)
|
1269
|
+
id_pers = id_set.difference(all_pending)
|
1270
|
+
id_pend = id_set.intersection(all_pending)
|
1271
|
+
|
1272
|
+
elems = self._get_persistent_elements(id_pers) if id_pers else {}
|
1273
|
+
elems.update({i: self._pending.add_elements[i] for i in id_pend})
|
1274
|
+
|
1275
|
+
# order as requested:
|
1276
|
+
elems = [elems[id_] for id_ in id_lst]
|
1277
|
+
|
1278
|
+
elems_new = []
|
1279
|
+
for elem_i in elems:
|
1280
|
+
# consider pending iteration IDs:
|
1281
|
+
# TODO: does this consider pending iterations from new loop iterations?
|
1282
|
+
pend_iters = self._pending.add_elem_iter_IDs.get(elem_i.id_)
|
1283
|
+
if pend_iters:
|
1284
|
+
elem_i = elem_i.append_iteration_IDs(pend_iters)
|
1285
|
+
elems_new.append(elem_i)
|
1286
|
+
|
1287
|
+
return elems_new
|
1288
|
+
|
1289
|
+
def get_element_iterations(self, id_lst: Iterable[int]) -> List[AnySElementIter]:
|
1290
|
+
# separate pending and persistent IDs:
|
1291
|
+
id_set = set(id_lst)
|
1292
|
+
all_pending = set(self._pending.add_elem_iters)
|
1293
|
+
id_pers = id_set.difference(all_pending)
|
1294
|
+
id_pend = id_set.intersection(all_pending)
|
1295
|
+
|
1296
|
+
iters = self._get_persistent_element_iters(id_pers) if id_pers else {}
|
1297
|
+
iters.update({i: self._pending.add_elem_iters[i] for i in id_pend})
|
1298
|
+
|
1299
|
+
# order as requested:
|
1300
|
+
iters = [iters[id_] for id_ in id_lst]
|
1301
|
+
|
1302
|
+
iters_new = []
|
1303
|
+
for iter_i in iters:
|
1304
|
+
# consider pending EAR IDs:
|
1305
|
+
pend_EARs = self._pending.add_elem_iter_EAR_IDs.get(iter_i.id_)
|
1306
|
+
if pend_EARs:
|
1307
|
+
iter_i = iter_i.append_EAR_IDs(pend_EARs)
|
1308
|
+
|
1309
|
+
# consider pending loop idx
|
1310
|
+
pend_loop_idx = self._pending.update_loop_indices.get(iter_i.id_)
|
1311
|
+
if pend_loop_idx:
|
1312
|
+
iter_i = iter_i.update_loop_idx(pend_loop_idx)
|
1313
|
+
|
1314
|
+
iters_new.append(iter_i)
|
1315
|
+
|
1316
|
+
return iters_new
|
1317
|
+
|
1318
|
+
def get_EARs(self, id_lst: Iterable[int]) -> List[AnySEAR]:
|
1319
|
+
# separate pending and persistent IDs:
|
1320
|
+
id_set = set(id_lst)
|
1321
|
+
all_pending = set(self._pending.add_EARs)
|
1322
|
+
id_pers = id_set.difference(all_pending)
|
1323
|
+
id_pend = id_set.intersection(all_pending)
|
1324
|
+
|
1325
|
+
EARs = self._get_persistent_EARs(id_pers) if id_pers else {}
|
1326
|
+
EARs.update({i: self._pending.add_EARs[i] for i in id_pend})
|
1327
|
+
|
1328
|
+
# order as requested:
|
1329
|
+
EARs = [EARs[id_] for id_ in id_lst]
|
1330
|
+
|
1331
|
+
EARs_new = []
|
1332
|
+
for EAR_i in EARs:
|
1333
|
+
# consider updates:
|
1334
|
+
pend_sub = self._pending.set_EAR_submission_indices.get(EAR_i.id_)
|
1335
|
+
pend_start = self._pending.set_EAR_starts.get(EAR_i.id_)
|
1336
|
+
pend_end = self._pending.set_EAR_ends.get(EAR_i.id_)
|
1337
|
+
pend_skip = True if EAR_i.id_ in self._pending.set_EAR_skips else None
|
1338
|
+
|
1339
|
+
p_st, p_ss = pend_start if pend_start else (None, None)
|
1340
|
+
p_et, p_se, p_ex, p_sx = pend_end if pend_end else (None, None, None, None)
|
1341
|
+
|
1342
|
+
updates = {
|
1343
|
+
"submission_idx": pend_sub,
|
1344
|
+
"skip": pend_skip,
|
1345
|
+
"success": p_sx,
|
1346
|
+
"start_time": p_st,
|
1347
|
+
"end_time": p_et,
|
1348
|
+
"snapshot_start": p_ss,
|
1349
|
+
"snapshot_end": p_se,
|
1350
|
+
"exit_code": p_ex,
|
1351
|
+
}
|
1352
|
+
if any(i is not None for i in updates.values()):
|
1353
|
+
EAR_i = EAR_i.update(**updates)
|
1354
|
+
|
1355
|
+
EARs_new.append(EAR_i)
|
1356
|
+
|
1357
|
+
return EARs_new
|
1358
|
+
|
1359
|
+
def get_EAR_skipped(self, EAR_ID: int) -> bool:
|
1360
|
+
return self.get_EARs([EAR_ID])[0].skip
|
1361
|
+
|
1362
|
+
def get_parameters(
|
1363
|
+
self,
|
1364
|
+
id_lst: Iterable[int],
|
1365
|
+
**kwargs: Dict,
|
1366
|
+
) -> List[AnySParameter]:
|
1367
|
+
"""
|
1368
|
+
Parameters
|
1369
|
+
----------
|
1370
|
+
kwargs :
|
1371
|
+
dataset_copy : bool
|
1372
|
+
For Zarr stores only. If True, copy arrays as NumPy arrays.
|
1373
|
+
"""
|
1374
|
+
# separate pending and persistent IDs:
|
1375
|
+
id_set = set(id_lst)
|
1376
|
+
all_pending = set(self._pending.add_parameters)
|
1377
|
+
id_pers = id_set.difference(all_pending)
|
1378
|
+
id_pend = id_set.intersection(all_pending)
|
1379
|
+
|
1380
|
+
params = self._get_persistent_parameters(id_pers, **kwargs) if id_pers else {}
|
1381
|
+
params.update({i: self._pending.add_parameters[i] for i in id_pend})
|
1382
|
+
|
1383
|
+
# order as requested:
|
1384
|
+
params = [params[id_] for id_ in id_lst]
|
1385
|
+
|
1386
|
+
return params
|
1387
|
+
|
1388
|
+
def get_parameter_set_statuses(self, id_lst: Iterable[int]) -> List[bool]:
|
1389
|
+
# separate pending and persistent IDs:
|
1390
|
+
id_set = set(id_lst)
|
1391
|
+
all_pending = set(self._pending.add_parameters)
|
1392
|
+
id_pers = id_set.difference(all_pending)
|
1393
|
+
id_pend = id_set.intersection(all_pending)
|
1394
|
+
|
1395
|
+
set_status = self._get_persistent_parameter_set_status(id_pers) if id_pers else {}
|
1396
|
+
set_status.update({i: self._pending.add_parameters[i].is_set for i in id_pend})
|
1397
|
+
|
1398
|
+
# order as requested:
|
1399
|
+
return [set_status[id_] for id_ in id_lst]
|
1400
|
+
|
1401
|
+
def get_parameter_sources(self, id_lst: Iterable[int]) -> List[Dict]:
|
1402
|
+
# separate pending and persistent IDs:
|
1403
|
+
id_set = set(id_lst)
|
1404
|
+
all_pending = set(self._pending.add_parameters)
|
1405
|
+
id_pers = id_set.difference(all_pending)
|
1406
|
+
id_pend = id_set.intersection(all_pending)
|
1407
|
+
|
1408
|
+
src = self._get_persistent_param_sources(id_pers) if id_pers else {}
|
1409
|
+
src.update({i: self._pending.add_parameters[i].source for i in id_pend})
|
1410
|
+
|
1411
|
+
# order as requested:
|
1412
|
+
src = {id_: src[id_] for id_ in id_lst}
|
1413
|
+
|
1414
|
+
src_new = []
|
1415
|
+
for id_i, src_i in src.items():
|
1416
|
+
# consider pending source updates:
|
1417
|
+
pend_src = self._pending.update_param_sources.get(id_i)
|
1418
|
+
if pend_src:
|
1419
|
+
src_i = {**src_i, **pend_src}
|
1420
|
+
src_new.append(src_i)
|
1421
|
+
|
1422
|
+
return src_new
|
1423
|
+
|
1424
|
+
def get_task_elements(self, task_id, idx_sel: slice) -> List[Dict]:
|
1425
|
+
"""Get element data by an index slice within a given task.
|
1426
|
+
|
1427
|
+
Element iterations and EARs belonging to the elements are included.
|
1428
|
+
|
1429
|
+
"""
|
1430
|
+
|
1431
|
+
all_elem_IDs = self.get_task(task_id).element_IDs
|
1432
|
+
req_IDs = all_elem_IDs[idx_sel]
|
1433
|
+
store_elements = self.get_elements(req_IDs)
|
1434
|
+
iter_IDs = [i.iteration_IDs for i in store_elements]
|
1435
|
+
iter_IDs_flat, iter_IDs_lens = flatten(iter_IDs)
|
1436
|
+
store_iters = self.get_element_iterations(iter_IDs_flat)
|
1437
|
+
|
1438
|
+
# retrieve EARs:
|
1439
|
+
EAR_IDs = [list((i.EAR_IDs or {}).values()) for i in store_iters]
|
1440
|
+
EAR_IDs_flat, EAR_IDs_lens = flatten(EAR_IDs)
|
1441
|
+
EARs_dct = [i.to_dict() for i in self.get_EARs(EAR_IDs_flat)]
|
1442
|
+
EARs_dct_rs = reshape(EARs_dct, EAR_IDs_lens)
|
1443
|
+
|
1444
|
+
# add EARs to iterations:
|
1445
|
+
iters = []
|
1446
|
+
for idx, i in enumerate(store_iters):
|
1447
|
+
EARs = None
|
1448
|
+
if i.EAR_IDs is not None:
|
1449
|
+
EARs = dict(zip(i.EAR_IDs.keys(), EARs_dct_rs[idx]))
|
1450
|
+
iters.append(i.to_dict(EARs))
|
1451
|
+
|
1452
|
+
# reshape iterations:
|
1453
|
+
iters_rs = reshape(iters, iter_IDs_lens)
|
1454
|
+
|
1455
|
+
# add iterations to elements:
|
1456
|
+
elements = []
|
1457
|
+
for idx, i in enumerate(store_elements):
|
1458
|
+
elements.append(i.to_dict(iters_rs[idx]))
|
1459
|
+
return elements
|
1460
|
+
|
1461
|
+
def check_parameters_exist(self, id_lst: Iterable[int]) -> List[bool]:
|
1462
|
+
"""For each parameter ID, return True if it exists, else False"""
|
1463
|
+
|
1464
|
+
id_set = set(id_lst)
|
1465
|
+
all_pending = set(self._pending.add_parameters)
|
1466
|
+
id_not_pend = id_set.difference(all_pending)
|
1467
|
+
id_miss = set()
|
1468
|
+
if id_not_pend:
|
1469
|
+
all_id_pers = self._get_persistent_parameter_IDs()
|
1470
|
+
id_miss = id_not_pend.difference(all_id_pers)
|
1471
|
+
|
1472
|
+
return [False if i in id_miss else True for i in id_lst]
|
1473
|
+
|
1474
|
+
@contextlib.contextmanager
|
1475
|
+
def using_resource(self, res_label, action):
|
1476
|
+
"""Context manager for managing `StoreResource` objects associated with the store."""
|
1477
|
+
|
1478
|
+
try:
|
1479
|
+
res = self._resources[res_label]
|
1480
|
+
except KeyError:
|
1481
|
+
raise RuntimeError(
|
1482
|
+
f"{self.__class__.__name__!r} has no resource named {res_label!r}."
|
1483
|
+
) from None
|
1484
|
+
|
1485
|
+
key = (res_label, action)
|
1486
|
+
if key in self._resources_in_use:
|
1487
|
+
# retrieve existing data for this action:
|
1488
|
+
yield res.data[action]
|
1489
|
+
|
1490
|
+
else:
|
1491
|
+
try:
|
1492
|
+
# "open" the resource, which assigns data for this action, which we yield:
|
1493
|
+
res.open(action)
|
1494
|
+
self._resources_in_use.add(key)
|
1495
|
+
yield res.data[action]
|
1496
|
+
|
1497
|
+
except Exception as exc:
|
1498
|
+
self._resources_in_use.remove(key)
|
1499
|
+
raise exc
|
1500
|
+
|
1501
|
+
else:
|
1502
|
+
# "close" the resource, clearing cached data for this action:
|
1503
|
+
res.close(action)
|
1504
|
+
self._resources_in_use.remove(key)
|
1505
|
+
|
1506
|
+
def copy(self, path=None) -> str:
|
1507
|
+
"""Copy the workflow store.
|
1508
|
+
|
1509
|
+
This does not work on remote filesystems.
|
1510
|
+
|
1511
|
+
"""
|
1512
|
+
if path is None:
|
1513
|
+
_path = Path(self.path)
|
1514
|
+
path = _path.parent / Path(_path.stem + "_copy" + _path.suffix)
|
1515
|
+
|
1516
|
+
if self.fs.exists(str(path)):
|
1517
|
+
raise ValueError(f"Path already exists: {path}.")
|
1518
|
+
else:
|
1519
|
+
path = str(path)
|
1520
|
+
|
1521
|
+
self.fs.copy(self.path, path)
|
1522
|
+
|
1523
|
+
new_fs_path = self.workflow.fs_path.replace(self.path, path)
|
1524
|
+
|
1525
|
+
return new_fs_path
|
1526
|
+
|
1527
|
+
def delete(self) -> None:
|
1528
|
+
"""Delete the persistent workflow."""
|
1529
|
+
confirm = input(
|
1530
|
+
f"Permanently delete the workflow at path {self.path!r}; [y]es or [n]o?"
|
1531
|
+
)
|
1532
|
+
if confirm.strip().lower() == "y":
|
1533
|
+
self.delete_no_confirm()
|
1534
|
+
|
1535
|
+
def delete_no_confirm(self) -> None:
|
1536
|
+
"""Permanently delete the workflow data with no confirmation."""
|
1537
|
+
|
1538
|
+
@self.app.perm_error_retry()
|
1539
|
+
def _delete_no_confirm() -> None:
|
1540
|
+
self.logger.debug(f"_delete_no_confirm: {self.path!r}.")
|
1541
|
+
self.fs.rm(self.path, recursive=True)
|
1542
|
+
|
1543
|
+
return _delete_no_confirm()
|