hpcflow-new2 0.2.0a50__py3-none-any.whl → 0.2.0a52__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. hpcflow/_version.py +1 -1
  2. hpcflow/sdk/__init__.py +1 -1
  3. hpcflow/sdk/api.py +1 -1
  4. hpcflow/sdk/app.py +20 -11
  5. hpcflow/sdk/cli.py +34 -59
  6. hpcflow/sdk/core/__init__.py +13 -1
  7. hpcflow/sdk/core/actions.py +235 -126
  8. hpcflow/sdk/core/command_files.py +32 -24
  9. hpcflow/sdk/core/element.py +110 -114
  10. hpcflow/sdk/core/errors.py +57 -0
  11. hpcflow/sdk/core/loop.py +18 -34
  12. hpcflow/sdk/core/parameters.py +5 -3
  13. hpcflow/sdk/core/task.py +135 -131
  14. hpcflow/sdk/core/task_schema.py +11 -4
  15. hpcflow/sdk/core/utils.py +110 -2
  16. hpcflow/sdk/core/workflow.py +964 -676
  17. hpcflow/sdk/data/template_components/environments.yaml +0 -44
  18. hpcflow/sdk/data/template_components/task_schemas.yaml +52 -10
  19. hpcflow/sdk/persistence/__init__.py +21 -33
  20. hpcflow/sdk/persistence/base.py +1340 -458
  21. hpcflow/sdk/persistence/json.py +424 -546
  22. hpcflow/sdk/persistence/pending.py +563 -0
  23. hpcflow/sdk/persistence/store_resource.py +131 -0
  24. hpcflow/sdk/persistence/utils.py +57 -0
  25. hpcflow/sdk/persistence/zarr.py +852 -841
  26. hpcflow/sdk/submission/jobscript.py +133 -112
  27. hpcflow/sdk/submission/shells/bash.py +62 -16
  28. hpcflow/sdk/submission/shells/powershell.py +87 -16
  29. hpcflow/sdk/submission/submission.py +59 -35
  30. hpcflow/tests/unit/test_element.py +4 -9
  31. hpcflow/tests/unit/test_persistence.py +218 -0
  32. hpcflow/tests/unit/test_task.py +11 -12
  33. hpcflow/tests/unit/test_utils.py +82 -0
  34. hpcflow/tests/unit/test_workflow.py +3 -1
  35. {hpcflow_new2-0.2.0a50.dist-info → hpcflow_new2-0.2.0a52.dist-info}/METADATA +3 -1
  36. {hpcflow_new2-0.2.0a50.dist-info → hpcflow_new2-0.2.0a52.dist-info}/RECORD +38 -34
  37. {hpcflow_new2-0.2.0a50.dist-info → hpcflow_new2-0.2.0a52.dist-info}/WHEEL +0 -0
  38. {hpcflow_new2-0.2.0a50.dist-info → hpcflow_new2-0.2.0a52.dist-info}/entry_points.txt +0 -0
@@ -1,614 +1,492 @@
1
1
  from __future__ import annotations
2
-
2
+ from contextlib import contextmanager
3
3
  import copy
4
4
  from datetime import datetime
5
5
  import json
6
- from contextlib import contextmanager
7
- from os import PathLike
8
6
  from pathlib import Path
9
- from pprint import pprint
10
- import shutil
11
- from typing import Any, Dict, Generator, Iterator, List, Optional, Tuple, Union
12
- from hpcflow.sdk import app
13
7
 
14
- from hpcflow.sdk.core.errors import WorkflowNotFoundError
15
- from hpcflow.sdk.core.utils import bisect_slice, get_md5_hash
8
+ from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple
16
9
 
10
+ from fsspec import filesystem
11
+ from hpcflow.sdk.core.errors import (
12
+ MissingParameterData,
13
+ MissingStoreEARError,
14
+ MissingStoreElementError,
15
+ MissingStoreElementIterationError,
16
+ MissingStoreTaskError,
17
+ )
17
18
  from hpcflow.sdk.persistence.base import (
18
- PersistentStore,
19
19
  PersistentStoreFeatures,
20
- dropbox_permission_err_retry,
21
- remove_dir,
22
- rename_dir,
20
+ PersistentStore,
21
+ StoreEAR,
22
+ StoreElement,
23
+ StoreElementIter,
24
+ StoreParameter,
25
+ StoreTask,
23
26
  )
27
+ from hpcflow.sdk.persistence.pending import CommitResourceMap
28
+ from hpcflow.sdk.persistence.store_resource import JSONFileStoreResource
24
29
 
25
30
 
26
31
  class JSONPersistentStore(PersistentStore):
27
- """A verbose but inefficient storage backend, to help with understanding and
28
- debugging.
29
-
30
- Notes
31
- -----
32
- We split the data across three JSON files to support submission to schedulers. During
33
- scheduler submission, if a task is quick, parameter data might be written at the
34
- same time as both submission metadata (jobscript submission time), and EAR metadata
35
- (EAR start/end time).
36
-
37
- """
38
-
39
32
  _name = "json"
40
-
41
- _metadata_file_name = "metadata.json"
42
- _submissions_file_name = "submissions.json"
43
- _parameters_file_name = "parameters.json"
44
-
45
33
  _features = PersistentStoreFeatures(
34
+ create=True,
35
+ edit=True,
46
36
  jobscript_parallelism=False,
47
37
  EAR_parallelism=False,
48
38
  schedulers=True,
49
39
  submission=True,
50
40
  )
51
41
 
52
- def __init__(self, workflow: app.Workflow) -> None:
53
- self._loaded = None # cache used in `cached_load` context manager
54
- super().__init__(workflow)
55
-
56
- @classmethod
57
- def path_has_store(cls, path):
58
- return (
59
- path.joinpath(cls._metadata_file_name).is_file()
60
- and path.joinpath(cls._submissions_file_name).is_file()
61
- and path.joinpath(cls._parameters_file_name).is_file()
62
- )
63
-
64
- @property
65
- def store_path(self):
66
- return self.workflow_path
67
-
68
- @property
69
- def _metadata_file_path(self):
70
- return self.store_path.joinpath(self._metadata_file_name)
71
-
72
- @property
73
- def _submissions_file_path(self):
74
- return self.store_path.joinpath(self._submissions_file_name)
42
+ _meta_res = "metadata"
43
+ _params_res = "parameters"
44
+ _subs_res = "submissions"
45
+
46
+ _res_file_names = {
47
+ _meta_res: "metadata.json",
48
+ _params_res: "parameters.json",
49
+ _subs_res: "submissions.json",
50
+ }
51
+
52
+ _res_map = CommitResourceMap(
53
+ commit_tasks=(_meta_res,),
54
+ commit_loops=(_meta_res,),
55
+ commit_loop_num_iters=(_meta_res,),
56
+ commit_submissions=(_subs_res,),
57
+ commit_submission_attempts=(_subs_res,),
58
+ commit_jobscript_version_info=(_subs_res,),
59
+ commit_jobscript_submit_time=(_subs_res,),
60
+ commit_jobscript_job_ID=(_subs_res,),
61
+ commit_elem_IDs=(_meta_res,),
62
+ commit_elements=(_meta_res,),
63
+ commit_elem_iter_IDs=(_meta_res,),
64
+ commit_elem_iters=(_meta_res,),
65
+ commit_loop_indices=(_meta_res,),
66
+ commit_elem_iter_EAR_IDs=(_meta_res,),
67
+ commit_EARs=(_meta_res,),
68
+ commit_EAR_submission_indices=(_meta_res,),
69
+ commit_EAR_skips=(_meta_res,),
70
+ commit_EAR_starts=(_meta_res,),
71
+ commit_EAR_ends=(_meta_res,),
72
+ commit_template_components=(_meta_res,),
73
+ commit_parameters=(_params_res,),
74
+ commit_param_sources=(_params_res,),
75
+ )
75
76
 
76
- @property
77
- def _parameters_file_path(self):
78
- return self.store_path.joinpath(self._parameters_file_name)
77
+ def __init__(self, app, workflow, path, fs):
78
+ self._resources = {
79
+ self._meta_res: self._get_store_resource(app, "metadata", path, fs),
80
+ self._params_res: self._get_store_resource(app, "parameters", path, fs),
81
+ self._subs_res: self._get_store_resource(app, "submissions", path, fs),
82
+ }
83
+ super().__init__(app, workflow, path, fs)
79
84
 
80
- def exists(self) -> bool:
81
- return self.path_has_store(self.store_path)
85
+ @contextmanager
86
+ def cached_load(self) -> Iterator[Dict]:
87
+ """Context manager to cache the metadata."""
88
+ with self.using_resource("metadata", "read") as md:
89
+ yield md
82
90
 
83
- def _load_metadata_file(self) -> Dict:
84
- with open(self._metadata_file_path, "rt") as fp:
85
- return json.load(fp)
91
+ def remove_replaced_dir(self) -> None:
92
+ with self.using_resource("metadata", "update") as md:
93
+ if "replaced_workflow" in md:
94
+ self.remove_path(md["replaced_workflow"], self.fs)
95
+ self.logger.debug("removing temporarily renamed pre-existing workflow.")
96
+ md["replaced_workflow"] = None
86
97
 
87
- def _load_submissions_file(self) -> Dict:
88
- with open(self._submissions_file_path, "rt") as fp:
89
- return json.load(fp)
98
+ def reinstate_replaced_dir(self) -> None:
99
+ with self.using_resource("metadata", "read") as md:
100
+ if "replaced_workflow" in md:
101
+ self.logger.debug(
102
+ "reinstating temporarily renamed pre-existing workflow."
103
+ )
104
+ self.rename_path(md["replaced_workflow"], self.path, self.fs)
90
105
 
91
- def _load_parameters_file(self) -> Dict:
92
- with open(self._parameters_file_path, "rt") as fp:
93
- return json.load(fp)
106
+ @classmethod
107
+ def _get_store_resource(cls, app, name, path, fs):
108
+ return JSONFileStoreResource(
109
+ app=app,
110
+ name=name,
111
+ path=path,
112
+ fs=fs,
113
+ filename=cls._res_file_names[name],
114
+ )
94
115
 
95
116
  @classmethod
96
117
  def write_empty_workflow(
97
118
  cls,
119
+ app,
98
120
  template_js: Dict,
99
121
  template_components_js: Dict,
100
- workflow_path: Path,
101
- replaced_dir: Path,
122
+ wk_path: str,
123
+ fs,
124
+ fs_path: str,
125
+ replaced_wk: str,
102
126
  creation_info: Dict,
103
127
  ) -> None:
104
- workflow_path.mkdir()
105
- store_path = workflow_path
106
-
128
+ fs.mkdir(wk_path)
107
129
  submissions = []
108
- parameters = {}
130
+ parameters = {
131
+ "data": {},
132
+ "sources": {},
133
+ }
109
134
  metadata = {
135
+ "fs_path": fs_path,
110
136
  "creation_info": creation_info,
111
- "parameter_sources": {},
112
137
  "template_components": template_components_js,
113
138
  "template": template_js,
114
139
  "tasks": [],
140
+ "elements": [],
141
+ "iters": [],
142
+ "runs": [],
115
143
  "num_added_tasks": 0,
116
144
  "loops": [],
117
145
  }
118
- if replaced_dir:
119
- metadata["replaced_dir"] = str(replaced_dir.name)
120
-
121
- cls._dump_to_path(store_path.joinpath(cls._metadata_file_name), metadata)
122
- cls._dump_to_path(store_path.joinpath(cls._submissions_file_name), submissions)
123
- cls._dump_to_path(store_path.joinpath(cls._parameters_file_name), parameters)
124
-
125
- @contextmanager
126
- def cached_load(self) -> Iterator[Dict]:
127
- """Context manager to cache the whole JSON document, allowing for multiple read
128
- operations with one disk read."""
129
- if self._loaded:
130
- yield
146
+ if replaced_wk:
147
+ metadata["replaced_workflow"] = replaced_wk
148
+
149
+ cls._get_store_resource(app, "metadata", wk_path, fs)._dump(metadata)
150
+ cls._get_store_resource(app, "parameters", wk_path, fs)._dump(parameters)
151
+ cls._get_store_resource(app, "submissions", wk_path, fs)._dump(submissions)
152
+
153
+ def _append_tasks(self, tasks: List[StoreTask]):
154
+ with self.using_resource("metadata", action="update") as md:
155
+ for i in tasks:
156
+ idx, wk_task_i, task_i = i.encode()
157
+ md["tasks"].insert(idx, wk_task_i)
158
+ md["template"]["tasks"].insert(idx, task_i)
159
+ md["num_added_tasks"] += 1
160
+
161
+ def _append_loops(self, loops: Dict[int, Dict]):
162
+ with self.using_resource("metadata", action="update") as md:
163
+ for loop_idx, loop in loops.items():
164
+ md["loops"].append(
165
+ {
166
+ "num_added_iterations": loop["num_added_iterations"],
167
+ "iterable_parameters": loop["iterable_parameters"],
168
+ }
169
+ )
170
+ md["template"]["loops"].append(loop["loop_template"])
171
+
172
+ def _append_submissions(self, subs: Dict[int, Dict]):
173
+ with self.using_resource("submissions", action="update") as subs_res:
174
+ for sub_idx, sub_i in subs.items():
175
+ subs_res.append(sub_i)
176
+
177
+ def _append_task_element_IDs(self, task_ID: int, elem_IDs: List[int]):
178
+ with self.using_resource("metadata", action="update") as md:
179
+ md["tasks"][task_ID]["element_IDs"].extend(elem_IDs)
180
+
181
+ def _append_elements(self, elems: List[StoreElement]):
182
+ with self.using_resource("metadata", action="update") as md:
183
+ md["elements"].extend(i.encode() for i in elems)
184
+
185
+ def _append_element_sets(self, task_id: int, es_js: List[Dict]):
186
+ task_idx = self._get_task_id_to_idx_map()[task_id]
187
+ with self.using_resource("metadata", "update") as md:
188
+ md["template"]["tasks"][task_idx]["element_sets"].extend(es_js)
189
+
190
+ def _append_elem_iter_IDs(self, elem_ID: int, iter_IDs: List[int]):
191
+ with self.using_resource("metadata", action="update") as md:
192
+ md["elements"][elem_ID]["iteration_IDs"].extend(iter_IDs)
193
+
194
+ def _append_elem_iters(self, iters: List[StoreElementIter]):
195
+ with self.using_resource("metadata", action="update") as md:
196
+ md["iters"].extend(i.encode() for i in iters)
197
+
198
+ def _append_elem_iter_EAR_IDs(self, iter_ID: int, act_idx: int, EAR_IDs: List[int]):
199
+ with self.using_resource("metadata", action="update") as md:
200
+ if md["iters"][iter_ID]["EAR_IDs"] is None:
201
+ md["iters"][iter_ID]["EAR_IDs"] = {}
202
+ if act_idx not in md["iters"][iter_ID]["EAR_IDs"]:
203
+ md["iters"][iter_ID]["EAR_IDs"][act_idx] = []
204
+ md["iters"][iter_ID]["EAR_IDs"][act_idx].extend(EAR_IDs)
205
+
206
+ def _append_submission_attempts(self, sub_attempts: Dict[int, List[int]]):
207
+ with self.using_resource("submissions", action="update") as subs_res:
208
+ for sub_idx, attempts_i in sub_attempts.items():
209
+ subs_res[sub_idx]["submission_attempts"].extend(attempts_i)
210
+
211
+ def _update_loop_index(self, iter_ID: int, loop_idx: Dict):
212
+ with self.using_resource("metadata", action="update") as md:
213
+ md["iters"][iter_ID]["loop_idx"].update(loop_idx)
214
+
215
+ def _update_loop_num_iters(self, index: int, num_iters: int):
216
+ with self.using_resource("metadata", action="update") as md:
217
+ md["loops"][index]["num_added_iterations"] = num_iters
218
+
219
+ def _append_EARs(self, EARs: List[StoreEAR]):
220
+ with self.using_resource("metadata", action="update") as md:
221
+ md["runs"].extend(i.encode(self.ts_fmt) for i in EARs)
222
+
223
+ def _update_EAR_submission_index(self, EAR_id: int, sub_idx: int):
224
+ with self.using_resource("metadata", action="update") as md:
225
+ md["runs"][EAR_id]["submission_idx"] = sub_idx
226
+
227
+ def _update_EAR_start(self, EAR_id: int, s_time: datetime, s_snap: Dict):
228
+ with self.using_resource("metadata", action="update") as md:
229
+ md["runs"][EAR_id]["start_time"] = s_time.strftime(self.ts_fmt)
230
+ md["runs"][EAR_id]["snapshot_start"] = s_snap
231
+
232
+ def _update_EAR_end(
233
+ self, EAR_id: int, e_time: datetime, e_snap: Dict, ext_code: int, success: bool
234
+ ):
235
+ with self.using_resource("metadata", action="update") as md:
236
+ md["runs"][EAR_id]["end_time"] = e_time.strftime(self.ts_fmt)
237
+ md["runs"][EAR_id]["snapshot_end"] = e_snap
238
+ md["runs"][EAR_id]["exit_code"] = ext_code
239
+ md["runs"][EAR_id]["success"] = success
240
+
241
+ def _update_EAR_skip(self, EAR_id: int):
242
+ with self.using_resource("metadata", action="update") as md:
243
+ md["runs"][EAR_id]["skip"] = True
244
+
245
+ def _update_jobscript_version_info(self, vers_info: Dict):
246
+ with self.using_resource("submissions", action="update") as sub_res:
247
+ for sub_idx, js_vers_info in vers_info.items():
248
+ for js_idx, vers_info_i in js_vers_info.items():
249
+ sub_res[sub_idx]["jobscripts"][js_idx]["version_info"] = vers_info_i
250
+
251
+ def _update_jobscript_submit_time(self, sub_times: Dict):
252
+ with self.using_resource("submissions", action="update") as sub_res:
253
+ for sub_idx, js_sub_times in sub_times.items():
254
+ for js_idx, sub_time_i in js_sub_times.items():
255
+ sub_time_fmt = sub_time_i.strftime(self.ts_fmt)
256
+ sub_res[sub_idx]["jobscripts"][js_idx]["submit_time"] = sub_time_fmt
257
+
258
+ def _update_jobscript_job_ID(self, job_IDs: Dict):
259
+ with self.using_resource("submissions", action="update") as sub_res:
260
+ for sub_idx, js_job_IDs in job_IDs.items():
261
+ for js_idx, job_ID_i in js_job_IDs.items():
262
+ sub_res[sub_idx]["jobscripts"][js_idx]["scheduler_job_ID"] = job_ID_i
263
+
264
+ def _append_parameters(self, new_params: List[StoreParameter]):
265
+ with self.using_resource("parameters", "update") as params:
266
+ for param_i in new_params:
267
+ params["data"][str(param_i.id_)] = param_i.encode()
268
+ params["sources"][str(param_i.id_)] = param_i.source
269
+
270
+ def _set_parameter_value(self, param_id: int, value: Any, is_file: bool):
271
+ """Set an unset persistent parameter."""
272
+
273
+ # the `decode` call in `_get_persistent_parameters` should be quick:
274
+ param = self._get_persistent_parameters([param_id])[param_id]
275
+ if is_file:
276
+ param = param.set_file(value)
131
277
  else:
132
- try:
133
- self._loaded = self._load()
134
- yield
135
- finally:
136
- self._loaded = None
278
+ param = param.set_data(value)
137
279
 
138
- def _load(self) -> Dict:
139
- return {
140
- "metadata": self._load_metadata_file(),
141
- "submissions": self._load_submissions_file(),
142
- "parameter_data": self._load_parameters_file(),
143
- }
280
+ with self.using_resource("parameters", "update") as params:
281
+ # no need to update sources array:
282
+ params["data"][str(param_id)] = param.encode()
144
283
 
145
- def load(self) -> Dict:
146
- # TODO: can we prevent loaded data being modified? this has caused some bugs...
147
- return self._loaded or self._load()
284
+ def _update_parameter_source(self, param_id: int, src: Dict):
285
+ """Update the source of a persistent parameter."""
148
286
 
149
- def load_metadata(self) -> Dict:
150
- return self.load()["metadata"]
287
+ param = self._get_persistent_parameters([param_id])[param_id]
288
+ param = param.update_source(src)
151
289
 
152
- def load_submissions(self) -> Dict:
153
- return self.load()["submissions"]
290
+ with self.using_resource("parameters", "update") as params:
291
+ # no need to update data array:
292
+ params["sources"][str(param_id)] = param.source
154
293
 
155
- def load_parameter_data(self) -> Dict:
156
- return self.load()["parameter_data"]
294
+ def _update_template_components(self, tc: Dict):
295
+ with self.using_resource("metadata", "update") as md:
296
+ md["template_components"] = tc
157
297
 
158
- @staticmethod
159
- @dropbox_permission_err_retry
160
- def _dump_to_path(path: Path, data: Dict) -> None:
161
- with open(path, "wt", newline="") as fp:
162
- json.dump(data, fp, indent=4)
298
+ def _get_num_persistent_tasks(self) -> int:
299
+ """Get the number of persistent tasks."""
300
+ with self.using_resource("metadata", action="read") as md:
301
+ return len(md["tasks"])
163
302
 
164
- def _dump_metadata(self, metadata: Dict) -> None:
165
- self._dump_to_path(self._metadata_file_path, metadata)
303
+ def _get_num_persistent_loops(self) -> int:
304
+ """Get the number of persistent loops."""
305
+ with self.using_resource("metadata", action="read") as md:
306
+ return len(md["loops"])
166
307
 
167
- def _dump_submissions(self, submissions: List) -> None:
168
- self._dump_to_path(self._submissions_file_path, submissions)
308
+ def _get_num_persistent_submissions(self) -> int:
309
+ """Get the number of persistent submissions."""
310
+ with self.using_resource("submissions", "read") as subs_res:
311
+ return len(subs_res)
169
312
 
170
- def _dump_parameters(self, parameters: Dict) -> None:
171
- self._dump_to_path(self._parameters_file_path, parameters)
313
+ def _get_num_persistent_elements(self) -> int:
314
+ """Get the number of persistent elements."""
315
+ with self.using_resource("metadata", action="read") as md:
316
+ return len(md["elements"])
172
317
 
173
- def _add_parameter_data(self, data: Any, source: Dict) -> int:
174
- idx = len(self.load_parameter_data()) + len(self._pending["parameter_data"])
318
+ def _get_num_persistent_elem_iters(self) -> int:
319
+ """Get the number of persistent element iterations."""
320
+ with self.using_resource("metadata", action="read") as md:
321
+ return len(md["iters"])
175
322
 
176
- if data is not None:
177
- data = self._encode_parameter_data(data["data"])
323
+ def _get_num_persistent_EARs(self) -> int:
324
+ """Get the number of persistent EARs."""
325
+ with self.using_resource("metadata", action="read") as md:
326
+ return len(md["runs"])
178
327
 
179
- self._pending["parameter_data"][idx] = data
180
- self._pending["parameter_sources"][idx] = dict(sorted(source.items()))
181
- self.save()
328
+ def _get_num_persistent_parameters(self):
329
+ with self.using_resource("parameters", "read") as params:
330
+ return len(params["data"])
182
331
 
183
- return idx
332
+ def _get_num_persistent_added_tasks(self):
333
+ with self.using_resource("metadata", "read") as md:
334
+ return md["num_added_tasks"]
184
335
 
185
- def set_parameter(self, index: int, data: Any) -> None:
186
- """Set the value of a pre-allocated parameter."""
187
- if self.is_parameter_set(index):
188
- raise RuntimeError(f"Parameter at index {index} is already set!")
189
- self._pending["parameter_data"][index] = self._encode_parameter_data(data)
190
- self.save()
336
+ @classmethod
337
+ def make_test_store_from_spec(
338
+ cls,
339
+ app,
340
+ spec,
341
+ dir=None,
342
+ path="test_store.json",
343
+ overwrite=False,
344
+ ):
345
+ """Generate an store for testing purposes."""
346
+
347
+ tasks, elems, elem_iters, EARs = super().prepare_test_store_from_spec(spec)
348
+
349
+ path = Path(path).resolve()
350
+ tasks = [StoreTask(**i).encode() for i in tasks]
351
+ elements = [StoreElement(**i).encode() for i in elems]
352
+ elem_iters = [StoreElementIter(**i).encode() for i in elem_iters]
353
+ EARs = [StoreEAR(**i).encode() for i in EARs]
354
+
355
+ persistent_data = {
356
+ "tasks": tasks,
357
+ "elements": elements,
358
+ "iters": elem_iters,
359
+ "runs": EARs,
360
+ }
191
361
 
192
- def get_parameter_data(self, index: int) -> Tuple[bool, Any]:
193
- if index in self._pending["parameter_data"]:
194
- data = self._pending["parameter_data"][index]
195
- else:
196
- data = self.load_parameter_data()[str(index)]
197
- is_set = False if data is None else True
198
- data = self._decode_parameter_data(data=data)
199
- return (is_set, data)
200
-
201
- def get_parameter_source(self, index: int) -> Dict:
202
- if index in self._pending["parameter_sources"]:
203
- src = self._pending["parameter_sources"][index]
204
- else:
205
- src = self.load_metadata()["parameter_sources"][str(index)]
362
+ path = Path(dir or "", path)
363
+ with path.open("wt") as fp:
364
+ json.dump(persistent_data, fp, indent=2)
206
365
 
207
- if index in self._pending["parameter_source_updates"]:
208
- src.update(self._pending["parameter_source_updates"][index])
209
- src = dict(sorted(src.items()))
366
+ return cls(app=app, workflow=None, path=path, fs=filesystem("file"))
210
367
 
211
- return src
368
+ def _get_persistent_template_components(self):
369
+ with self.using_resource("metadata", "read") as md:
370
+ return md["template_components"]
212
371
 
213
- def get_all_parameter_data(self) -> Dict[int, Any]:
214
- if self._pending["parameter_data"]:
215
- max_key = max(self._pending["parameter_data"].keys())
216
- else:
217
- max_key = int(max(self.load_parameter_data().keys(), key=lambda x: int(x)))
218
-
219
- out = {}
220
- for idx in range(max_key + 1):
221
- out[idx] = self.get_parameter_data(idx)
222
-
223
- return out
224
-
225
- def is_parameter_set(self, index: int) -> bool:
226
- return self.load_parameter_data()[str(index)] is not None
227
-
228
- def check_parameters_exist(
229
- self, indices: Union[int, List[int]]
230
- ) -> Union[bool, List[bool]]:
231
- is_multi = True
232
- if not isinstance(indices, (list, tuple)):
233
- is_multi = False
234
- indices = [indices]
235
- exists = [
236
- i in self._pending["parameter_data"] or str(i) in self.load_parameter_data()
237
- for i in indices
238
- ]
239
- if not is_multi:
240
- exists = exists[0]
241
- return exists
242
-
243
- def commit_pending(self) -> None:
244
- dump_metadata = False
245
- dump_submissions = False
246
- dump_parameters = False
247
-
248
- metadata = self.load_metadata()
249
- submissions = self.load_submissions()
250
- parameters = self.load_parameter_data()
251
-
252
- # commit new tasks:
253
- for new_index, task_js in self._pending["template_tasks"].items():
254
- dump_metadata = True
255
- metadata["template"]["tasks"].insert(new_index, task_js)
256
-
257
- # commit new workflow tasks:
258
- for new_index, wk_task in self._pending["tasks"].items():
259
- dump_metadata = True
260
- metadata["tasks"].insert(new_index, wk_task)
261
- metadata["num_added_tasks"] += 1
262
-
263
- # commit new template components:
264
- if self._merge_pending_template_components(metadata["template_components"]):
265
- dump_metadata = True
266
-
267
- # commit new element sets:
268
- for task_idx, es_js in self._pending["element_sets"].items():
269
- dump_metadata = True
270
- metadata["template"]["tasks"][task_idx]["element_sets"].extend(es_js)
271
-
272
- # commit new elements:
273
- for (task_idx, _), elements in self._pending["elements"].items():
274
- dump_metadata = True
275
- metadata["tasks"][task_idx]["elements"].extend(elements)
276
-
277
- for (task_idx, _), iters_idx in self._pending["element_iterations_idx"].items():
278
- for elem_idx, iters_idx_i in iters_idx.items():
279
- dump_metadata = True
280
- metadata["tasks"][task_idx]["elements"][elem_idx][
281
- "iterations_idx"
282
- ] += iters_idx_i
283
-
284
- # commit new element iterations:
285
- for (task_idx, _), element_iters in self._pending["element_iterations"].items():
286
- dump_metadata = True
287
- metadata["tasks"][task_idx]["element_iterations"].extend(element_iters)
288
-
289
- # commit new element iteration loop indices:
290
- for (t_idx, _, iters_idx_i), loop_idx_i in self._pending["loop_idx"].items():
291
- dump_metadata = True
292
- metadata["tasks"][t_idx]["element_iterations"][iters_idx_i][
293
- "loop_idx"
294
- ].update(loop_idx_i)
295
-
296
- # commit new element iteration EARs:
297
- for (t_idx, _, iters_idx_i), actions_i in self._pending["EARs"].items():
298
- dump_metadata = True
299
- iter_i = metadata["tasks"][t_idx]["element_iterations"][iters_idx_i]
300
- iter_i["actions"].update(actions_i)
301
- iter_i["EARs_initialised"] = True
302
-
303
- # commit new EAR submission indices:
304
- for (ins_ID, it_idx, act_idx, rn_idx), sub_idx in self._pending[
305
- "EAR_submission_idx"
306
- ].items():
307
- dump_metadata = True
308
- t_idx = self.get_task_idx_from_insert_ID(ins_ID)
309
- iter_i = metadata["tasks"][t_idx]["element_iterations"][it_idx]
310
- EAR = iter_i["actions"][str(act_idx)][rn_idx]
311
- EAR["metadata"]["submission_idx"] = sub_idx
312
-
313
- # commit new EAR start times:
314
- for (ins_ID, it_idx, act_idx, rn_idx), start in self._pending[
315
- "EAR_start_times"
316
- ].items():
317
- dump_metadata = True
318
- t_idx = self.get_task_idx_from_insert_ID(ins_ID)
319
- iter_i = metadata["tasks"][t_idx]["element_iterations"][it_idx]
320
- EAR = iter_i["actions"][str(act_idx)][rn_idx]
321
- EAR["metadata"]["start_time"] = start.strftime(self.ts_fmt)
322
-
323
- # commit new EAR end times:
324
- for (ins_ID, it_idx, act_idx, rn_idx), end in self._pending[
325
- "EAR_end_times"
326
- ].items():
327
- dump_metadata = True
328
- t_idx = self.get_task_idx_from_insert_ID(ins_ID)
329
- iter_i = metadata["tasks"][t_idx]["element_iterations"][it_idx]
330
- EAR = iter_i["actions"][str(act_idx)][rn_idx]
331
- EAR["metadata"]["end_time"] = end.strftime(self.ts_fmt)
332
-
333
- # commit new loops:
334
- if self._pending["template_loops"]:
335
- dump_metadata = True
336
- metadata["template"]["loops"].extend(self._pending["template_loops"])
337
-
338
- # commit new workflow loops:
339
- if self._pending["loops"]:
340
- dump_metadata = True
341
- metadata["loops"].extend(self._pending["loops"])
342
-
343
- for loop_idx, num_added_iters in self._pending["loops_added_iters"].items():
344
- dump_metadata = True
345
- metadata["loops"][loop_idx]["num_added_iterations"] = num_added_iters
346
-
347
- # commit new submissions:
348
- if self._pending["submissions"]:
349
- dump_submissions = True
350
- submissions.extend(self._pending["submissions"])
351
-
352
- # commit new submission attempts:
353
- for sub_idx, attempts_i in self._pending["submission_attempts"].items():
354
- dump_submissions = True
355
- submissions[sub_idx]["submission_attempts"].extend(attempts_i)
356
-
357
- # commit new jobscript scheduler version info:
358
- for sub_idx, js_vers_info in self._pending["jobscript_version_info"].items():
359
- for js_idx, vers_info in js_vers_info.items():
360
- dump_submissions = True
361
- submissions[sub_idx]["jobscripts"][js_idx]["version_info"] = vers_info
362
-
363
- # commit new jobscript job IDs:
364
- for sub_idx, job_IDs in self._pending["jobscript_job_IDs"].items():
365
- for js_idx, job_ID in job_IDs.items():
366
- dump_submissions = True
367
- submissions[sub_idx]["jobscripts"][js_idx]["scheduler_job_ID"] = job_ID
368
-
369
- # commit new jobscript submit times:
370
- for sub_idx, js_submit_times in self._pending["jobscript_submit_times"].items():
371
- for js_idx, submit_time in js_submit_times.items():
372
- dump_submissions = True
373
- submissions[sub_idx]["jobscripts"][js_idx][
374
- "submit_time"
375
- ] = submit_time.strftime(self.ts_fmt)
376
-
377
- # commit new parameters:
378
- for param_idx, param_dat in self._pending["parameter_data"].items():
379
- dump_parameters = True
380
- parameters[str(param_idx)] = param_dat
381
-
382
- for param_idx, param_src in self._pending["parameter_sources"].items():
383
- dump_metadata = True
384
- metadata["parameter_sources"][str(param_idx)] = param_src
385
-
386
- for param_idx, src_update in self._pending["parameter_source_updates"].items():
387
- dump_metadata = True
388
- src = metadata["parameter_sources"][str(param_idx)]
389
- src.update(src_update)
390
- src = dict(sorted(src.items()))
391
- metadata["parameter_sources"][str(param_idx)] = src
392
-
393
- if self._pending["remove_replaced_dir_record"]:
394
- dump_metadata = True
395
- del metadata["replaced_dir"]
396
-
397
- if dump_metadata:
398
- self._dump_metadata(metadata)
399
- if dump_submissions:
400
- self._dump_submissions(submissions)
401
- if dump_parameters:
402
- self._dump_parameters(parameters)
403
-
404
- # TODO: return files changed? useful for testing expected changes
405
-
406
- self.clear_pending()
407
-
408
- def _get_persistent_template_components(self) -> Dict:
409
- return self.load_metadata()["template_components"]
410
-
411
- def get_template(self) -> Dict:
412
- # No need to consider pending; this is called once per Workflow object
413
- return self.load_metadata()["template"]
414
-
415
- def get_loops(self) -> List[Dict]:
416
- # No need to consider pending; this is called once per Workflow object
417
- return self.load_metadata()["loops"]
418
-
419
- def get_submissions(self) -> List[Dict]:
420
- # No need to consider pending; this is called once per Workflow object
421
- subs = copy.deepcopy(self.load_submissions())
422
-
423
- # cast jobscript submit-times and jobscript `task_elements` keys:
424
- for sub_idx, sub in enumerate(subs):
425
- for js_idx, js in enumerate(sub["jobscripts"]):
426
- if js["submit_time"]:
427
- subs[sub_idx]["jobscripts"][js_idx][
428
- "submit_time"
429
- ] = datetime.strptime(js["submit_time"], self.ts_fmt)
430
-
431
- for key in list(js["task_elements"].keys()):
432
- subs[sub_idx]["jobscripts"][js_idx]["task_elements"][int(key)] = subs[
433
- sub_idx
434
- ]["jobscripts"][js_idx]["task_elements"].pop(key)
435
-
436
- return subs
437
-
438
- def get_num_added_tasks(self) -> int:
439
- return self.load_metadata()["num_added_tasks"] + len(self._pending["tasks"])
440
-
441
- def get_all_tasks_metadata(self) -> List[Dict]:
442
- # No need to consider pending; this is called once per Workflow object
443
- return [
444
- {
445
- "num_elements": len(task["elements"]),
446
- "num_element_iterations": len(task["element_iterations"]),
447
- "num_EARs": sum(
448
- len(runs)
449
- for iter_i in task["element_iterations"]
450
- for runs in iter_i["actions"].values()
451
- ),
372
+ def _get_persistent_template(self) -> Dict:
373
+ with self.using_resource("metadata", "read") as md:
374
+ return md["template"]
375
+
376
+ def _get_persistent_tasks(
377
+ self, id_lst: Optional[Iterable[int]] = None
378
+ ) -> Dict[int, StoreTask]:
379
+ with self.using_resource("metadata", action="read") as md:
380
+ task_dat = {
381
+ i["id_"]: StoreTask.decode({**i, "index": idx})
382
+ for idx, i in enumerate(md["tasks"])
383
+ if id_lst is None or i["id_"] in id_lst
384
+ }
385
+ return task_dat
386
+
387
+ def _get_persistent_loops(self, id_lst: Optional[Iterable[int]] = None):
388
+ with self.using_resource("metadata", "read") as md:
389
+ loop_dat = {
390
+ idx: i
391
+ for idx, i in enumerate(md["loops"])
392
+ if id_lst is None or idx in id_lst
452
393
  }
453
- for task in self.load_metadata()["tasks"]
454
- ]
394
+ return loop_dat
395
+
396
+ def _get_persistent_submissions(self, id_lst: Optional[Iterable[int]] = None):
397
+ with self.using_resource("submissions", "read") as sub_res:
398
+ subs_dat = copy.deepcopy(
399
+ {
400
+ idx: i
401
+ for idx, i in enumerate(sub_res)
402
+ if id_lst is None or idx in id_lst
403
+ }
404
+ )
405
+ # cast jobscript submit-times and jobscript `task_elements` keys:
406
+ for sub_idx, sub in subs_dat.items():
407
+ for js_idx, js in enumerate(sub["jobscripts"]):
408
+ if js["submit_time"]:
409
+ subs_dat[sub_idx]["jobscripts"][js_idx][
410
+ "submit_time"
411
+ ] = datetime.strptime(js["submit_time"], self.ts_fmt)
412
+
413
+ for key in list(js["task_elements"].keys()):
414
+ subs_dat[sub_idx]["jobscripts"][js_idx]["task_elements"][
415
+ int(key)
416
+ ] = subs_dat[sub_idx]["jobscripts"][js_idx]["task_elements"].pop(
417
+ key
418
+ )
419
+
420
+ return subs_dat
421
+
422
+ def _get_persistent_elements(self, id_lst: Iterable[int]) -> Dict[int, StoreElement]:
423
+ # could convert `id_lst` to e.g. slices if more efficient for a given store
424
+ with self.using_resource("metadata", action="read") as md:
425
+ try:
426
+ elem_dat = {i: md["elements"][i] for i in id_lst}
427
+ except KeyError:
428
+ raise MissingStoreElementError(id_lst) from None
429
+ return {k: StoreElement.decode(v) for k, v in elem_dat.items()}
430
+
431
+ def _get_persistent_element_iters(
432
+ self, id_lst: Iterable[int]
433
+ ) -> Dict[int, StoreElementIter]:
434
+ with self.using_resource("metadata", action="read") as md:
435
+ try:
436
+ iter_dat = {i: md["iters"][i] for i in id_lst}
437
+ except KeyError:
438
+ raise MissingStoreElementIterationError(id_lst) from None
439
+ return {k: StoreElementIter.decode(v) for k, v in iter_dat.items()}
455
440
 
456
- def get_task_elements(
457
- self,
458
- task_idx: int,
459
- task_insert_ID: int,
460
- selection: slice,
461
- keep_iterations_idx: bool = False,
462
- ) -> List[Dict]:
463
- # TODO: add tests to check correct return in various states of pending
464
-
465
- num_pers = self.workflow.tasks[task_idx]._num_elements
466
- pers_slice, pend_slice = bisect_slice(selection, num_pers)
467
- pers_range = range(pers_slice.start, pers_slice.stop, pers_slice.step)
468
-
469
- if task_idx in self._pending["tasks"]:
470
- task_data = self._pending["tasks"][task_idx]
471
- else:
472
- task_data = copy.deepcopy(self.load_metadata()["tasks"][task_idx])
441
+ def _get_persistent_EARs(self, id_lst: Iterable[int]) -> Dict[int, StoreEAR]:
442
+ with self.using_resource("metadata", action="read") as md:
443
+ try:
444
+ EAR_dat = {i: md["runs"][i] for i in id_lst}
445
+ except KeyError:
446
+ raise MissingStoreEARError(id_lst) from None
447
+ return {k: StoreEAR.decode(v, self.ts_fmt) for k, v in EAR_dat.items()}
473
448
 
474
- if len(pers_range):
475
- elements = task_data["elements"][pers_slice]
476
- else:
477
- elements = []
478
-
479
- key = (task_idx, task_insert_ID)
480
- if key in self._pending["elements"]:
481
- elements += copy.deepcopy(self._pending["elements"][key][pend_slice])
482
-
483
- # add iterations:
484
- sel_range = range(selection.start, selection.stop, selection.step)
485
- for element_idx, element in zip(sel_range, elements):
486
- # find which iterations to add:
487
- iters_idx = element["iterations_idx"]
488
- if not keep_iterations_idx:
489
- del element["iterations_idx"]
490
-
491
- # include pending iterations:
492
- if key in self._pending["element_iterations_idx"]:
493
- iters_idx += self._pending["element_iterations_idx"][key][element_idx]
494
-
495
- # populate new iterations list:
496
- element["iterations"] = []
497
- for iters_idx_i in iters_idx:
498
- if iters_idx_i + 1 > len(task_data["element_iterations"]):
499
- i_pending = iters_idx_i - len(task_data["element_iterations"])
500
- iter_i = copy.deepcopy(
501
- self._pending["element_iterations"][key][i_pending]
502
- )
503
- else:
504
- iter_i = task_data["element_iterations"][iters_idx_i]
505
-
506
- for act_idx_str in list(iter_i["actions"].keys()):
507
- runs = iter_i["actions"].pop(act_idx_str)
508
- iter_i["actions"][int(act_idx_str)] = runs
509
-
510
- # include pending EARs:
511
- EARs_key = (task_idx, task_insert_ID, iters_idx_i)
512
- if EARs_key in self._pending["EARs"]:
513
- iter_i["actions"].update(self._pending["EARs"][EARs_key])
514
- # if there are pending EARs then EARs must be initialised:
515
- iter_i["EARs_initialised"] = True
516
-
517
- # include pending loops:
518
- loop_idx_key = (task_idx, task_insert_ID, iters_idx_i)
519
- if loop_idx_key in self._pending["loop_idx"]:
520
- iter_i["loop_idx"].update(self._pending["loop_idx"][loop_idx_key])
521
-
522
- iter_i["index"] = iters_idx_i
523
- element["iterations"].append(iter_i)
524
-
525
- element["index"] = element_idx
526
-
527
- # cast EAR start/end times to datetime types:
528
- for element in elements:
529
- element_idx = element["index"]
530
- for iter_i in element["iterations"]:
531
- iter_idx = iter_i["index"]
532
- for act_idx, runs in iter_i["actions"].items():
533
- for run_idx in range(len(runs)):
534
- run = iter_i["actions"][act_idx][run_idx]
535
- start_time = run["metadata"]["start_time"]
536
- end_time = run["metadata"]["end_time"]
537
- if start_time is not None:
538
- run["metadata"]["start_time"] = datetime.strptime(
539
- start_time, self.ts_fmt
540
- )
541
- if end_time is not None:
542
- run["metadata"]["end_time"] = datetime.strptime(
543
- end_time, self.ts_fmt
544
- )
545
-
546
- # update pending submission indices:
547
- key = (task_insert_ID, iter_idx, act_idx, run_idx)
548
- if key in self._pending["EAR_submission_idx"]:
549
- sub_idx = self._pending["EAR_submission_idx"][key]
550
- run["metadata"]["submission_idx"] = sub_idx
551
-
552
- return elements
553
-
554
- def _init_task_loop(
449
+ def _get_persistent_parameters(
555
450
  self,
556
- task_idx: int,
557
- task_insert_ID: int,
558
- element_sel: slice,
559
- name: str,
560
- ) -> None:
561
- """Initialise the zeroth iteration of a named loop for a specified task."""
562
-
563
- elements = self.get_task_elements(
564
- task_idx=task_idx,
565
- task_insert_ID=task_insert_ID,
566
- selection=element_sel,
567
- keep_iterations_idx=True,
568
- )
569
-
570
- for element in elements:
571
- for iter_idx, iter_i in zip(element["iterations_idx"], element["iterations"]):
572
- if name in iter_i["loop_idx"]:
573
- raise ValueError(f"Loop {name!r} already initialised!")
574
- key = (task_idx, task_insert_ID, iter_idx)
575
- if key not in self._pending["loop_idx"]:
576
- self._pending["loop_idx"][key] = {}
577
- self._pending["loop_idx"][key].update({name: 0})
578
-
579
- def remove_replaced_dir(self) -> None:
580
- md = self.load_metadata()
581
- if "replaced_dir" in md:
582
- remove_dir(Path(md["replaced_dir"]))
583
- self._pending["remove_replaced_dir_record"] = True
584
- self.save()
451
+ id_lst: Iterable[int],
452
+ ) -> Dict[int, StoreParameter]:
453
+ with self.using_resource("parameters", "read") as params:
454
+ try:
455
+ param_dat = {i: params["data"][str(i)] for i in id_lst}
456
+ src_dat = {i: params["sources"][str(i)] for i in id_lst}
457
+ except KeyError:
458
+ raise MissingParameterData(id_lst) from None
585
459
 
586
- def reinstate_replaced_dir(self) -> None:
587
- print(f"reinstate replaced directory!")
588
- md = self.load_metadata()
589
- if "replaced_dir" in md:
590
- rename_dir(Path(md["replaced_dir"]), self.workflow_path)
591
-
592
- def copy(self, path: PathLike = None) -> None:
593
- shutil.copy(self.workflow_path, path)
594
-
595
- def is_modified_on_disk(self) -> Union[bool, Dict]:
596
- if self._loaded:
597
- # TODO: define "structural_metadata" as everything that defines the structure
598
- # of the workflow. this will be everything in the metadata file except the EAR
599
- # metadata, which includes start/end times etc.
600
- on_disk = {
601
- k: v for k, v in self._load_metadata_file().items() if k not in ("tasks",)
602
- }
603
- in_mem = {
604
- k: v for k, v in self._loaded["metadata"].items() if k not in ("tasks",)
605
- }
606
- return get_md5_hash(on_disk) != get_md5_hash(in_mem)
607
- else:
608
- # nothing to compare to
609
- return False
460
+ return {
461
+ k: StoreParameter.decode(id_=k, data=v, source=src_dat[k])
462
+ for k, v in param_dat.items()
463
+ }
610
464
 
611
- def get_task_idx_from_insert_ID(self, insert_ID):
612
- for task in self.workflow.template.tasks:
613
- if task.insert_ID == insert_ID:
614
- return task.index
465
+ def _get_persistent_param_sources(self, id_lst: Iterable[int]) -> Dict[int, Dict]:
466
+ with self.using_resource("parameters", "read") as params:
467
+ try:
468
+ return {i: params["sources"][str(i)] for i in id_lst}
469
+ except KeyError:
470
+ raise MissingParameterData(id_lst) from None
471
+
472
+ def _get_persistent_parameter_set_status(
473
+ self, id_lst: Iterable[int]
474
+ ) -> Dict[int, bool]:
475
+ with self.using_resource("parameters", "read") as params:
476
+ try:
477
+ param_dat = {i: params["data"][str(i)] for i in id_lst}
478
+ except KeyError:
479
+ raise MissingParameterData(id_lst) from None
480
+ return {k: v is not None for k, v in param_dat.items()}
481
+
482
+ def _get_persistent_parameter_IDs(self) -> List[int]:
483
+ with self.using_resource("parameters", "read") as params:
484
+ return list(int(i) for i in params["data"].keys())
485
+
486
+ def get_creation_info(self):
487
+ with self.using_resource("metadata", action="read") as md:
488
+ return md["creation_info"]
489
+
490
+ def get_fs_path(self):
491
+ with self.using_resource("metadata", action="read") as md:
492
+ return md["fs_path"]