hpcflow-new2 0.2.0a50__py3-none-any.whl → 0.2.0a52__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. hpcflow/_version.py +1 -1
  2. hpcflow/sdk/__init__.py +1 -1
  3. hpcflow/sdk/api.py +1 -1
  4. hpcflow/sdk/app.py +20 -11
  5. hpcflow/sdk/cli.py +34 -59
  6. hpcflow/sdk/core/__init__.py +13 -1
  7. hpcflow/sdk/core/actions.py +235 -126
  8. hpcflow/sdk/core/command_files.py +32 -24
  9. hpcflow/sdk/core/element.py +110 -114
  10. hpcflow/sdk/core/errors.py +57 -0
  11. hpcflow/sdk/core/loop.py +18 -34
  12. hpcflow/sdk/core/parameters.py +5 -3
  13. hpcflow/sdk/core/task.py +135 -131
  14. hpcflow/sdk/core/task_schema.py +11 -4
  15. hpcflow/sdk/core/utils.py +110 -2
  16. hpcflow/sdk/core/workflow.py +964 -676
  17. hpcflow/sdk/data/template_components/environments.yaml +0 -44
  18. hpcflow/sdk/data/template_components/task_schemas.yaml +52 -10
  19. hpcflow/sdk/persistence/__init__.py +21 -33
  20. hpcflow/sdk/persistence/base.py +1340 -458
  21. hpcflow/sdk/persistence/json.py +424 -546
  22. hpcflow/sdk/persistence/pending.py +563 -0
  23. hpcflow/sdk/persistence/store_resource.py +131 -0
  24. hpcflow/sdk/persistence/utils.py +57 -0
  25. hpcflow/sdk/persistence/zarr.py +852 -841
  26. hpcflow/sdk/submission/jobscript.py +133 -112
  27. hpcflow/sdk/submission/shells/bash.py +62 -16
  28. hpcflow/sdk/submission/shells/powershell.py +87 -16
  29. hpcflow/sdk/submission/submission.py +59 -35
  30. hpcflow/tests/unit/test_element.py +4 -9
  31. hpcflow/tests/unit/test_persistence.py +218 -0
  32. hpcflow/tests/unit/test_task.py +11 -12
  33. hpcflow/tests/unit/test_utils.py +82 -0
  34. hpcflow/tests/unit/test_workflow.py +3 -1
  35. {hpcflow_new2-0.2.0a50.dist-info → hpcflow_new2-0.2.0a52.dist-info}/METADATA +3 -1
  36. {hpcflow_new2-0.2.0a50.dist-info → hpcflow_new2-0.2.0a52.dist-info}/RECORD +38 -34
  37. {hpcflow_new2-0.2.0a50.dist-info → hpcflow_new2-0.2.0a52.dist-info}/WHEEL +0 -0
  38. {hpcflow_new2-0.2.0a50.dist-info → hpcflow_new2-0.2.0a52.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,563 @@
1
+ from __future__ import annotations
2
+
3
+ from collections import defaultdict
4
+ import contextlib
5
+ from dataclasses import dataclass, fields
6
+ from datetime import datetime
7
+
8
+ from typing import Any, Dict, List, Optional, Tuple
9
+
10
+
11
+ class PendingChanges:
12
+ """Class to store pending changes and merge them into a persistent store.
13
+
14
+ Parameters
15
+ ----------
16
+ add_tasks
17
+ Keys are new task IDs
18
+ add_elem_iter_EAR_IDs
19
+ Keys are element iteration IDs, then EAR action index, and values are EAR IDs.
20
+ This is a list of EAR IDs to add to a given element iteration action.
21
+ add_elem_iter_IDs
22
+ Keys are element IDs, and values are iteration IDs to add to that element.
23
+ add_elem_IDs
24
+ Keys are task IDs, and values are element IDs to add to that task.
25
+ add_parameters
26
+ Keys are parameter indices and values are tuples whose first element is data to
27
+ add and whose second element is the source dict for the new data.
28
+ update_param_sources
29
+ Keys are parameter indices and values are dict parameter sources to merge with
30
+ existing source of that parameter.
31
+ set_EAR_starts
32
+ Keys are EAR IDs and values are tuples of start time, and start dir snapshot.
33
+ set_EAR_ends
34
+ Keys are EAR IDs and values are tuples of end time, end dir snapshot, exit
35
+ code, and success boolean.
36
+ """
37
+
38
+ def __init__(self, app, store, resource_map):
39
+ self.app = app
40
+ self.store = store
41
+ self.resource_map = resource_map
42
+
43
+ self.add_tasks: Dict[int, AnySTask] = None
44
+ self.add_loops: Dict[int, Dict] = None
45
+ self.add_submissions: Dict[int, Dict] = None
46
+ self.add_elements: Dict[int, AnySElement] = None
47
+ self.add_elem_iters: Dict[int, AnySElementIter] = None
48
+ self.add_EARs: Dict[int, AnySEAR] = None
49
+ self.add_parameters: Dict[int, AnySParameter] = None
50
+ self.add_files: List[Dict] = None
51
+ self.add_template_components: Dict[str, Dict[str, Dict]] = None
52
+ self.add_element_sets: Dict[int, Dict] = None
53
+
54
+ self.add_elem_IDs: Dict[int, List] = None
55
+ self.add_elem_iter_IDs: Dict[int, List] = None
56
+ self.add_elem_iter_EAR_IDs: Dict[int, Dict[int, List]] = None
57
+ self.add_submission_attempts: Dict[int, List[int]] = None
58
+
59
+ self.set_EAR_submission_indices: Dict[int, int] = None
60
+ self.set_EAR_skips: List[int] = None
61
+ self.set_EAR_starts: Dict[int, Tuple[datetime, Dict]] = None
62
+ self.set_EAR_ends: Dict[int, Tuple[datetime, Dict, int, bool]] = None
63
+
64
+ self.set_jobscript_version_info: Dict[int, Dict[int, Dict]] = None
65
+ self.set_jobscript_submit_time: Dict[int, Dict[int, datetime]] = None
66
+ self.set_jobscript_job_ID: Dict[int, Dict[int, str]] = None
67
+
68
+ self.set_parameters: Dict[int, AnySParameter] = None
69
+
70
+ self.update_param_sources: Dict[int, Dict] = None
71
+ self.update_loop_indices: Dict[int, Dict] = None
72
+ self.update_loop_num_iters: Dict[int, int] = None
73
+
74
+ self.reset()
75
+
76
+ def __bool__(self):
77
+ """Returns True if there are any outstanding pending items."""
78
+ return (
79
+ bool(self.add_tasks)
80
+ or bool(self.add_loops)
81
+ or bool(self.add_submissions)
82
+ or bool(self.add_elements)
83
+ or bool(self.add_elem_iters)
84
+ or bool(self.add_EARs)
85
+ or bool(self.add_elem_IDs)
86
+ or bool(self.add_elem_iter_IDs)
87
+ or bool(self.add_elem_iter_EAR_IDs)
88
+ or bool(self.add_submission_attempts)
89
+ or bool(self.add_parameters)
90
+ or bool(self.add_files)
91
+ or bool(self.add_template_components)
92
+ or bool(self.add_element_sets)
93
+ or bool(self.set_EAR_submission_indices)
94
+ or bool(self.set_EAR_starts)
95
+ or bool(self.set_EAR_ends)
96
+ or bool(self.set_EAR_skips)
97
+ or bool(self.set_jobscript_version_info)
98
+ or bool(self.set_jobscript_submit_time)
99
+ or bool(self.set_jobscript_job_ID)
100
+ or bool(self.set_parameters)
101
+ or bool(self.update_param_sources)
102
+ or bool(self.update_loop_indices)
103
+ or bool(self.update_loop_num_iters)
104
+ )
105
+
106
+ def where_pending(self) -> List[str]:
107
+ return [
108
+ k
109
+ for k, v in self.__dict__.items()
110
+ if k not in ("app", "store", "resource_map") and bool(v)
111
+ ]
112
+
113
+ @property
114
+ def logger(self):
115
+ return self.app.persistence_logger
116
+
117
+ def commit_all(self):
118
+ """Commit all pending changes to disk."""
119
+ self.logger.info(f"committing all pending changes: {self.where_pending()}")
120
+
121
+ if not self:
122
+ self.logger.debug("commit: no pending changes to commit.")
123
+ return
124
+
125
+ for resources, methods in self.resource_map.groups.items():
126
+ # for each resource, enter `using_resource` context manager in "update" mode:
127
+ with contextlib.ExitStack() as stack:
128
+ for res in resources:
129
+ # TODO: only enter required resources!
130
+ stack.enter_context(self.store.using_resource(res, "update"))
131
+ for meth in methods:
132
+ getattr(self, meth)()
133
+
134
+ assert not (self)
135
+
136
+ def commit_tasks(self) -> None:
137
+ """Commit pending tasks to disk."""
138
+ if self.add_tasks:
139
+ tasks = self.store.get_tasks_by_IDs(self.add_tasks)
140
+ task_ids = list(self.add_tasks.keys())
141
+ self.logger.debug(f"commit: adding pending tasks with IDs: {task_ids!r}")
142
+ self.store._append_tasks(tasks)
143
+ # pending element IDs that belong to pending tasks are now committed:
144
+ self.add_elem_IDs = {
145
+ k: v for k, v in self.add_elem_IDs.items() if k not in task_ids
146
+ }
147
+ self.clear_add_tasks()
148
+
149
+ def commit_loops(self) -> None:
150
+ """Commit pending loops to disk."""
151
+ if self.add_loops:
152
+ # retrieve pending loops, including pending changes to num_added_iterations:
153
+ loops = self.store.get_loops_by_IDs(self.add_loops)
154
+ loop_ids = list(self.add_loops.keys())
155
+ self.logger.debug(f"commit: adding pending loops with indices {loop_ids!r}")
156
+ self.store._append_loops(loops)
157
+ self.clear_add_loops()
158
+
159
+ def commit_submissions(self) -> None:
160
+ """Commit pending submissions to disk."""
161
+ if self.add_submissions:
162
+ # retrieve pending submissions:
163
+ subs = self.store.get_submissions_by_ID(self.add_submissions)
164
+ sub_ids = list(self.add_submissions.keys())
165
+ self.logger.debug(
166
+ f"commit: adding pending submissions with indices {sub_ids!r}"
167
+ )
168
+ self.store._append_submissions(subs)
169
+ self.clear_add_submissions()
170
+
171
+ def commit_submission_attempts(self) -> None:
172
+ if self.add_submission_attempts:
173
+ self.logger.debug(f"commit: adding pending submission attempts")
174
+ self.store._append_submission_attempts(self.add_submission_attempts)
175
+ self.clear_add_submission_attempts()
176
+
177
+ def commit_elem_IDs(self) -> None:
178
+ # TODO: could be batched up?
179
+ for task_ID, elem_IDs in self.add_elem_IDs.items():
180
+ self.logger.debug(
181
+ f"commit: adding pending element IDs to task {task_ID!r}: {elem_IDs!r}."
182
+ )
183
+ self.store._append_task_element_IDs(task_ID, elem_IDs)
184
+ self.clear_add_elem_IDs()
185
+
186
+ def commit_elements(self) -> None:
187
+ if self.add_elements:
188
+ elems = self.store.get_elements(self.add_elements)
189
+ elem_ids = list(self.add_elements.keys())
190
+ self.logger.debug(f"commit: adding pending elements with IDs: {elem_ids!r}")
191
+ self.store._append_elements(elems)
192
+ # pending iter IDs that belong to pending elements are now committed:
193
+ self.add_elem_iter_IDs = {
194
+ k: v for k, v in self.add_elem_iter_IDs.items() if k not in elem_ids
195
+ }
196
+ self.clear_add_elements()
197
+
198
+ def commit_element_sets(self) -> None:
199
+ # TODO: could be batched up?
200
+ for task_id, es_js in self.add_element_sets.items():
201
+ self.logger.debug(f"commit: adding pending element sets.")
202
+ self.store._append_element_sets(task_id, es_js)
203
+ self.clear_add_element_sets()
204
+
205
+ def commit_elem_iter_IDs(self) -> None:
206
+ # TODO: could be batched up?
207
+ for elem_ID, iter_IDs in self.add_elem_iter_IDs.items():
208
+ self.logger.debug(
209
+ f"commit: adding pending element iteration IDs to element {elem_ID!r}: "
210
+ f"{iter_IDs!r}."
211
+ )
212
+ self.store._append_elem_iter_IDs(elem_ID, iter_IDs)
213
+ self.clear_add_elem_iter_IDs()
214
+
215
+ def commit_elem_iters(self) -> None:
216
+ if self.add_elem_iters:
217
+ iters = self.store.get_element_iterations(self.add_elem_iters.keys())
218
+ iter_ids = list(self.add_elem_iters.keys())
219
+ self.logger.debug(
220
+ f"commit: adding pending element iterations with IDs: {iter_ids!r}"
221
+ )
222
+ self.store._append_elem_iters(iters)
223
+ # pending EAR IDs that belong to pending iters are now committed:
224
+ self.add_elem_iter_EAR_IDs = {
225
+ k: v for k, v in self.add_elem_iter_EAR_IDs.items() if k not in iter_ids
226
+ }
227
+ self.clear_add_elem_iters()
228
+
229
+ def commit_elem_iter_EAR_IDs(self) -> None:
230
+ # TODO: could be batched up?
231
+ for iter_ID, act_EAR_IDs in self.add_elem_iter_EAR_IDs.items():
232
+ self.logger.debug(
233
+ f"commit: adding pending EAR IDs to element iteration {iter_ID!r}: "
234
+ f"{dict(act_EAR_IDs)!r}."
235
+ )
236
+ for act_idx, EAR_IDs in act_EAR_IDs.items():
237
+ self.store._append_elem_iter_EAR_IDs(iter_ID, act_idx, EAR_IDs)
238
+ self.clear_add_elem_iter_EAR_IDs()
239
+
240
+ def commit_EARs(self) -> None:
241
+ if self.add_EARs:
242
+ EARs = self.store.get_EARs(self.add_EARs)
243
+ EAR_ids = list(self.add_EARs.keys())
244
+ self.logger.debug(f"commit: adding pending EARs with IDs: {EAR_ids!r}")
245
+ self.store._append_EARs(EARs)
246
+ # pending start/end times/snapshots, submission indices, and skips that belong
247
+ # to pending EARs are now committed (accounted for in `get_EARs` above):
248
+ self.set_EAR_submission_indices = {
249
+ k: v
250
+ for k, v in self.set_EAR_submission_indices.items()
251
+ if k not in EAR_ids
252
+ }
253
+ self.set_EAR_skips = [i for i in self.set_EAR_skips if i not in EAR_ids]
254
+ self.set_EAR_starts = {
255
+ k: v for k, v in self.set_EAR_starts.items() if k not in EAR_ids
256
+ }
257
+ self.set_EAR_ends = {
258
+ k: v for k, v in self.set_EAR_ends.items() if k not in EAR_ids
259
+ }
260
+
261
+ self.clear_add_EARs()
262
+
263
+ def commit_EAR_submission_indices(self) -> None:
264
+ # TODO: could be batched up?
265
+ for EAR_id, sub_idx in self.set_EAR_submission_indices.items():
266
+ self.logger.debug(
267
+ f"commit: adding pending submission index ({sub_idx!r}) to EAR ID "
268
+ f"{EAR_id!r}."
269
+ )
270
+ self.store._update_EAR_submission_index(EAR_id, sub_idx)
271
+ self.clear_set_EAR_submission_indices()
272
+
273
+ def commit_EAR_starts(self) -> None:
274
+ # TODO: could be batched up?
275
+ for EAR_id, (time, snap) in self.set_EAR_starts.items():
276
+ self.logger.debug(
277
+ f"commit: adding pending start time ({time!r}) and "
278
+ f"directory snapshot to EAR ID {EAR_id!r}."
279
+ )
280
+ self.store._update_EAR_start(EAR_id, time, snap)
281
+ self.clear_set_EAR_starts()
282
+
283
+ def commit_EAR_ends(self) -> None:
284
+ # TODO: could be batched up?
285
+ for EAR_id, (time, snap, ext, suc) in self.set_EAR_ends.items():
286
+ self.logger.debug(
287
+ f"commit: adding pending end time ({time!r}), directory snapshot, "
288
+ f"exit code ({ext!r}), and success status {suc!r} to EAR ID {EAR_id!r}."
289
+ )
290
+ self.store._update_EAR_end(EAR_id, time, snap, ext, suc)
291
+ self.clear_set_EAR_ends()
292
+
293
+ def commit_EAR_skips(self) -> None:
294
+ # TODO: could be batched up?
295
+ for EAR_id in self.set_EAR_skips:
296
+ self.logger.debug(f"commit: setting EAR ID {EAR_id!r} as skipped.")
297
+ self.store._update_EAR_skip(EAR_id)
298
+ self.clear_set_EAR_skips()
299
+
300
+ def commit_jobscript_version_info(self) -> None:
301
+ if self.set_jobscript_version_info:
302
+ self.logger.debug(f"commit: setting jobscript version info")
303
+ self.store._update_jobscript_version_info(self.set_jobscript_version_info)
304
+ self.clear_set_jobscript_version_info()
305
+
306
+ def commit_jobscript_submit_time(self) -> None:
307
+ if self.set_jobscript_submit_time:
308
+ self.logger.debug(f"commit: setting jobscript submit times")
309
+ self.store._update_jobscript_submit_time(self.set_jobscript_submit_time)
310
+ self.clear_set_jobscript_submit_time()
311
+
312
+ def commit_jobscript_job_ID(self) -> None:
313
+ if self.set_jobscript_job_ID:
314
+ self.logger.debug(f"commit: setting jobscript job IDs.")
315
+ self.store._update_jobscript_job_ID(self.set_jobscript_job_ID)
316
+ self.clear_set_jobscript_job_ID()
317
+
318
+ def commit_parameters(self) -> None:
319
+ """Make pending parameters persistent."""
320
+ if self.add_parameters:
321
+ params = self.store.get_parameters(self.add_parameters)
322
+ param_ids = list(self.add_parameters.keys())
323
+ self.logger.debug(f"commit: adding pending parameters IDs: {param_ids!r}")
324
+ self.store._append_parameters(params)
325
+ self.clear_add_parameters()
326
+
327
+ for param_id, (value, is_file) in self.set_parameters.items():
328
+ # TODO: could be batched up?
329
+ self.logger.debug(f"commit: setting value of parameter ID {param_id!r}.")
330
+ self.store._set_parameter_value(param_id, value, is_file)
331
+ self.clear_set_parameters()
332
+
333
+ def commit_files(self) -> None:
334
+ """Add pending files to the files directory."""
335
+ if self.add_files:
336
+ self.logger.debug(f"commit: adding pending files to the files directory.")
337
+ self.store._append_files(self.add_files)
338
+ self.clear_add_files()
339
+
340
+ def commit_template_components(self) -> None:
341
+ if self.add_template_components:
342
+ self.logger.debug(f"commit: adding template components.")
343
+ self.store._update_template_components(self.store.get_template_components())
344
+ self.clear_add_template_components()
345
+
346
+ def commit_param_sources(self) -> None:
347
+ """Make pending changes to parameter sources persistent."""
348
+ for param_id, src in self.update_param_sources.items():
349
+ # TODO: could be batched up?
350
+ self.logger.debug(f"commit: updating source of parameter ID {param_id!r}.")
351
+ self.store._update_parameter_source(param_id, src)
352
+ self.clear_update_param_sources()
353
+
354
+ def commit_loop_indices(self) -> None:
355
+ """Make pending update to element iteration loop indices persistent."""
356
+ for iter_ID, loop_idx in self.update_loop_indices.items():
357
+ self.logger.debug(
358
+ f"commit: updating loop indices of iteration ID {iter_ID!r} with "
359
+ f"{loop_idx!r}."
360
+ )
361
+ self.store._update_loop_index(iter_ID, loop_idx)
362
+ self.clear_update_loop_indices()
363
+
364
+ def commit_loop_num_iters(self) -> None:
365
+ """Make pending update to the number of loop iterations."""
366
+ for index, num_iters in self.update_loop_num_iters.items():
367
+ self.logger.debug(
368
+ f"commit: updating loop {index!r} number of iterations to {num_iters!r}."
369
+ )
370
+ self.store._update_loop_num_iters(index, num_iters)
371
+ self.clear_update_loop_num_iters()
372
+
373
+ def clear_add_tasks(self):
374
+ self.add_tasks = {}
375
+
376
+ def clear_add_loops(self):
377
+ self.add_loops = {}
378
+
379
+ def clear_add_submissions(self):
380
+ self.add_submissions = {}
381
+
382
+ def clear_add_submission_attempts(self):
383
+ self.add_submission_attempts = {}
384
+
385
+ def clear_add_elements(self):
386
+ self.add_elements = {}
387
+
388
+ def clear_add_element_sets(self):
389
+ self.add_element_sets = defaultdict(list)
390
+
391
+ def clear_add_elem_iters(self):
392
+ self.add_elem_iters = {}
393
+
394
+ def clear_add_EARs(self):
395
+ self.add_EARs = {}
396
+
397
+ def clear_add_elem_IDs(self):
398
+ self.add_elem_IDs = defaultdict(list)
399
+
400
+ def clear_add_elem_iter_IDs(self):
401
+ self.add_elem_iter_IDs = defaultdict(list)
402
+
403
+ def clear_add_elem_iter_EAR_IDs(self):
404
+ self.add_elem_iter_EAR_IDs = defaultdict(lambda: defaultdict(list))
405
+
406
+ def clear_set_EAR_submission_indices(self):
407
+ self.set_EAR_submission_indices = {}
408
+
409
+ def clear_set_EAR_starts(self):
410
+ self.set_EAR_starts = {}
411
+
412
+ def clear_set_EAR_ends(self):
413
+ self.set_EAR_ends = {}
414
+
415
+ def clear_set_EAR_skips(self):
416
+ self.set_EAR_skips = []
417
+
418
+ def clear_set_jobscript_version_info(self):
419
+ self.set_jobscript_version_info = defaultdict(dict)
420
+
421
+ def clear_set_jobscript_submit_time(self):
422
+ self.set_jobscript_submit_time = defaultdict(dict)
423
+
424
+ def clear_set_jobscript_job_ID(self):
425
+ self.set_jobscript_job_ID = defaultdict(dict)
426
+
427
+ def clear_add_parameters(self):
428
+ self.add_parameters = {}
429
+
430
+ def clear_add_files(self):
431
+ self.add_files = []
432
+
433
+ def clear_add_template_components(self):
434
+ self.add_template_components = defaultdict(dict)
435
+
436
+ def clear_set_parameters(self):
437
+ self.set_parameters = {}
438
+
439
+ def clear_update_param_sources(self):
440
+ self.update_param_sources = {}
441
+
442
+ def clear_update_loop_indices(self):
443
+ self.update_loop_indices = {}
444
+
445
+ def clear_update_loop_num_iters(self):
446
+ self.update_loop_num_iters = {}
447
+
448
+ def reset(self) -> None:
449
+ """Clear all pending data and prepare to accept new pending data."""
450
+
451
+ self.logger.info("resetting pending changes.")
452
+
453
+ self.clear_add_tasks()
454
+ self.clear_add_loops()
455
+ self.clear_add_submissions()
456
+ self.clear_add_submission_attempts()
457
+ self.clear_add_elements()
458
+ self.clear_add_element_sets()
459
+ self.clear_add_elem_iters()
460
+ self.clear_add_EARs()
461
+
462
+ self.clear_add_elem_IDs()
463
+ self.clear_add_elem_iter_IDs()
464
+ self.clear_add_elem_iter_EAR_IDs()
465
+
466
+ self.clear_add_parameters()
467
+ self.clear_add_files()
468
+ self.clear_add_template_components()
469
+
470
+ self.clear_set_EAR_submission_indices()
471
+ self.clear_set_EAR_starts()
472
+ self.clear_set_EAR_ends()
473
+ self.clear_set_EAR_skips()
474
+
475
+ self.clear_set_jobscript_version_info()
476
+ self.clear_set_jobscript_submit_time()
477
+ self.clear_set_jobscript_job_ID()
478
+ self.clear_set_parameters()
479
+
480
+ self.clear_update_param_sources()
481
+ self.clear_update_loop_indices()
482
+ self.clear_update_loop_num_iters()
483
+
484
+
485
+ @dataclass
486
+ class CommitResourceMap:
487
+ """Map of `PendingChanges` commit method names to store resource labels, representing
488
+ the store resources required by each commit method, for a given `PersistentStore`
489
+
490
+ When `PendingChanges.commit_all` is called, the resources specified will be opened in
491
+ "update" mode, for each `commit_` method.
492
+
493
+ """
494
+
495
+ commit_tasks: Optional[Tuple[str]] = tuple()
496
+ commit_loops: Optional[Tuple[str]] = tuple()
497
+ commit_submissions: Optional[Tuple[str]] = tuple()
498
+ commit_submission_attempts: Optional[Tuple[str]] = tuple()
499
+ commit_elem_IDs: Optional[Tuple[str]] = tuple()
500
+ commit_elements: Optional[Tuple[str]] = tuple()
501
+ commit_element_sets: Optional[Tuple[str]] = tuple()
502
+ commit_elem_iter_IDs: Optional[Tuple[str]] = tuple()
503
+ commit_elem_iters: Optional[Tuple[str]] = tuple()
504
+ commit_elem_iter_EAR_IDs: Optional[Tuple[str]] = tuple()
505
+ commit_EARs: Optional[Tuple[str]] = tuple()
506
+ commit_EAR_submission_indices: Optional[Tuple[str]] = tuple()
507
+ commit_EAR_skips: Optional[Tuple[str]] = tuple()
508
+ commit_EAR_starts: Optional[Tuple[str]] = tuple()
509
+ commit_EAR_ends: Optional[Tuple[str]] = tuple()
510
+ commit_jobscript_version_info: Optional[Tuple[str]] = tuple()
511
+ commit_jobscript_submit_time: Optional[Tuple[str]] = tuple()
512
+ commit_jobscript_job_ID: Optional[Tuple[str]] = tuple()
513
+ commit_parameters: Optional[Tuple[str]] = tuple()
514
+ commit_files: Optional[Tuple[str]] = tuple()
515
+ commit_template_components: Optional[Tuple[str]] = tuple()
516
+ commit_param_sources: Optional[Tuple[str]] = tuple()
517
+ commit_loop_indices: Optional[Tuple[str]] = tuple()
518
+ commit_loop_num_iters: Optional[Tuple[str]] = tuple()
519
+
520
+ def __post_init__(self):
521
+ self.groups = self.group_by_resource()
522
+
523
+ def group_by_resource(self) -> Dict[Tuple[str], List[str]]:
524
+ """Return a dict whose keys are tuples of resource labels and whose values are
525
+ lists of `PendingChanges` commit method names that require those resource.
526
+
527
+ This grouping allows us to batch up commit methods by resource requirements, which
528
+ in turn means we can potentially minimise e.g. the number of network requests.
529
+
530
+ """
531
+ groups = {}
532
+ cur_res_group = None
533
+ for fld in fields(self):
534
+ res_labels = getattr(self, fld.name)
535
+
536
+ if not cur_res_group:
537
+ # start a new resource group: a mapping between resource labels and the
538
+ # commit methods that require those resources:
539
+ cur_res_group = [list(res_labels), [fld.name]]
540
+
541
+ elif not res_labels or set(res_labels).intersection(cur_res_group[0]):
542
+ # there is some overlap between resource labels required in the current
543
+ # group and this commit method, so we merge resource labels and add the
544
+ # new commit method:
545
+ cur_res_group[0] = list(set(cur_res_group[0] + list(res_labels)))
546
+ cur_res_group[1].append(fld.name)
547
+
548
+ else:
549
+ # no overlap between resource labels required in the current group and
550
+ # those required by this commit method, so append the current group, and
551
+ # start a new group for this commit method:
552
+ if tuple(cur_res_group[0]) not in groups:
553
+ groups[tuple(cur_res_group[0])] = []
554
+ groups[tuple(cur_res_group[0])].extend(cur_res_group[1])
555
+ cur_res_group = [list(res_labels), [fld.name]]
556
+
557
+ if cur_res_group:
558
+ if tuple(cur_res_group[0]) not in groups:
559
+ groups[tuple(cur_res_group[0])] = []
560
+
561
+ groups[tuple(cur_res_group[0])].extend(cur_res_group[1])
562
+
563
+ return groups
@@ -0,0 +1,131 @@
1
+ from abc import ABC, abstractmethod
2
+ import copy
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Callable, Union
6
+
7
+ from hpcflow.sdk.core.utils import get_md5_hash
8
+
9
+
10
+ class StoreResource(ABC):
11
+ """Class to represent a persistent resource within which store data lives.
12
+
13
+ A `PersistentStore` maps workflow data across zero or more store resources. Updates to
14
+ persistent workflow data that live in the same store resource are performed together.
15
+
16
+ """
17
+
18
+ def __init__(self, app, name: str) -> None:
19
+ self.app = app
20
+ self.name = name
21
+ self.data = {"read": None, "update": None}
22
+ self.hash = None
23
+
24
+ def __repr__(self) -> str:
25
+ return f"{self.__class__.__name__}(name={self.name!r})"
26
+
27
+ @property
28
+ def logger(self):
29
+ return self.app.persistence_logger
30
+
31
+ @abstractmethod
32
+ def _load(self):
33
+ pass
34
+
35
+ @abstractmethod
36
+ def _dump(self, data):
37
+ pass
38
+
39
+ def open(self, action):
40
+ if action == "read":
41
+ # reuse "update" data if set, rather than re-loading from disk -- but copy,
42
+ # so changes made in the "read" scope do not update!
43
+ update_data = self.data["update"]
44
+ rd_msg = " (using `update` data)" if update_data else ""
45
+ self.logger.debug(f"{self!r}: opening to read{rd_msg}.")
46
+ data = copy.deepcopy(update_data) if update_data else self._load()
47
+
48
+ elif action == "update":
49
+ # reuse "read" data if set, rather than re-loading from disk; this also means
50
+ # updates will be reflected in the "read" data as soon as they are made:
51
+ read_data = self.data["read"]
52
+ upd_msg = " (using `read` data)" if read_data else ""
53
+ self.logger.debug(f"{self!r}: opening to update{upd_msg}.")
54
+ data = read_data or self._load()
55
+
56
+ else:
57
+ self._check_action(action)
58
+
59
+ self.data[action] = data
60
+
61
+ try:
62
+ self.hash = get_md5_hash(data)
63
+ except Exception:
64
+ pass
65
+
66
+ def close(self, action):
67
+ if action == "read":
68
+ self.logger.debug(f"{self!r}: closing read.")
69
+ elif action == "update":
70
+ if self.hash:
71
+ # check if it has changed:
72
+ new_hash = get_md5_hash(self.data[action])
73
+ if not self.hash or self.hash != new_hash:
74
+ self.logger.debug(f"{self!r}: data (hash) changed.")
75
+ self._dump(self.data[action])
76
+ self.logger.debug(f"{self!r}: closing update.")
77
+ else:
78
+ self._check_action(action)
79
+
80
+ # unset data for this action:
81
+ self.data[action] = None
82
+
83
+ def _check_action(self, action: str):
84
+ if action not in self.data:
85
+ raise ValueError(
86
+ f"Action {action!r} not known for {self.__class__.__name__!r}"
87
+ )
88
+
89
+
90
+ class JSONFileStoreResource(StoreResource):
91
+ """For caching reads and writes to a JSON file."""
92
+
93
+ def __init__(self, app, name: str, filename: str, path: Union[str, Path], fs):
94
+ self.filename = filename
95
+ self.path = path
96
+ self.fs = fs
97
+ super().__init__(app, name)
98
+
99
+ @property
100
+ def _full_path(self):
101
+ return f"{self.path}/{self.filename}"
102
+
103
+ def _load(self):
104
+ self.logger.debug(f"{self!r}: loading JSON from file.")
105
+ with self.fs.open(self._full_path, mode="rt") as fp:
106
+ return json.load(fp)
107
+
108
+ def _dump(self, data):
109
+ self.logger.debug(f"{self!r}: dumping JSON to file")
110
+ if "runs" in data:
111
+ self.logger.debug(f"...runs: {data['runs']}")
112
+ with self.fs.open(self._full_path, mode="wt") as fp:
113
+ json.dump(data, fp, indent=2)
114
+
115
+
116
+ class ZarrAttrsStoreResource(StoreResource):
117
+ """For caching reads and writes to Zarr attributes on groups and arrays."""
118
+
119
+ def __init__(self, app, name: str, open_call: Callable):
120
+ self.open_call = open_call
121
+ super().__init__(app, name)
122
+
123
+ def _load(self):
124
+ self.logger.debug(f"{self!r}: loading Zarr attributes.")
125
+ item = self.open_call(mode="r")
126
+ return copy.deepcopy(item.attrs.asdict())
127
+
128
+ def _dump(self, data):
129
+ self.logger.debug(f"{self!r}: dumping Zarr attributes.")
130
+ item = self.open_call(mode="r+")
131
+ item.attrs.put(data)