hpcflow-new2 0.2.0a190__py3-none-any.whl → 0.2.0a199__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. hpcflow/__pyinstaller/hook-hpcflow.py +1 -0
  2. hpcflow/_version.py +1 -1
  3. hpcflow/data/scripts/bad_script.py +2 -0
  4. hpcflow/data/scripts/do_nothing.py +2 -0
  5. hpcflow/data/scripts/env_specifier_test/input_file_generator_pass_env_spec.py +4 -0
  6. hpcflow/data/scripts/env_specifier_test/main_script_test_pass_env_spec.py +8 -0
  7. hpcflow/data/scripts/env_specifier_test/output_file_parser_pass_env_spec.py +4 -0
  8. hpcflow/data/scripts/env_specifier_test/v1/input_file_generator_basic.py +4 -0
  9. hpcflow/data/scripts/env_specifier_test/v1/main_script_test_direct_in_direct_out.py +7 -0
  10. hpcflow/data/scripts/env_specifier_test/v1/output_file_parser_basic.py +4 -0
  11. hpcflow/data/scripts/env_specifier_test/v2/main_script_test_direct_in_direct_out.py +7 -0
  12. hpcflow/data/scripts/input_file_generator_basic.py +3 -0
  13. hpcflow/data/scripts/input_file_generator_basic_FAIL.py +3 -0
  14. hpcflow/data/scripts/input_file_generator_test_stdout_stderr.py +8 -0
  15. hpcflow/data/scripts/main_script_test_direct_in.py +3 -0
  16. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2.py +6 -0
  17. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed.py +6 -0
  18. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed_group.py +7 -0
  19. hpcflow/data/scripts/main_script_test_direct_in_direct_out_3.py +6 -0
  20. hpcflow/data/scripts/main_script_test_direct_in_group_direct_out_3.py +6 -0
  21. hpcflow/data/scripts/main_script_test_direct_in_group_one_fail_direct_out_3.py +6 -0
  22. hpcflow/data/scripts/main_script_test_hdf5_in_obj_2.py +12 -0
  23. hpcflow/data/scripts/main_script_test_json_out_FAIL.py +3 -0
  24. hpcflow/data/scripts/main_script_test_shell_env_vars.py +12 -0
  25. hpcflow/data/scripts/main_script_test_std_out_std_err.py +6 -0
  26. hpcflow/data/scripts/output_file_parser_basic.py +3 -0
  27. hpcflow/data/scripts/output_file_parser_basic_FAIL.py +7 -0
  28. hpcflow/data/scripts/output_file_parser_test_stdout_stderr.py +8 -0
  29. hpcflow/data/scripts/script_exit_test.py +5 -0
  30. hpcflow/data/template_components/environments.yaml +1 -1
  31. hpcflow/sdk/__init__.py +5 -0
  32. hpcflow/sdk/app.py +150 -89
  33. hpcflow/sdk/cli.py +263 -84
  34. hpcflow/sdk/cli_common.py +99 -5
  35. hpcflow/sdk/config/callbacks.py +38 -1
  36. hpcflow/sdk/config/config.py +102 -13
  37. hpcflow/sdk/config/errors.py +19 -5
  38. hpcflow/sdk/config/types.py +3 -0
  39. hpcflow/sdk/core/__init__.py +25 -1
  40. hpcflow/sdk/core/actions.py +914 -262
  41. hpcflow/sdk/core/cache.py +76 -34
  42. hpcflow/sdk/core/command_files.py +14 -128
  43. hpcflow/sdk/core/commands.py +35 -6
  44. hpcflow/sdk/core/element.py +122 -50
  45. hpcflow/sdk/core/errors.py +58 -2
  46. hpcflow/sdk/core/execute.py +207 -0
  47. hpcflow/sdk/core/loop.py +408 -50
  48. hpcflow/sdk/core/loop_cache.py +4 -4
  49. hpcflow/sdk/core/parameters.py +382 -37
  50. hpcflow/sdk/core/run_dir_files.py +13 -40
  51. hpcflow/sdk/core/skip_reason.py +7 -0
  52. hpcflow/sdk/core/task.py +119 -30
  53. hpcflow/sdk/core/task_schema.py +68 -0
  54. hpcflow/sdk/core/test_utils.py +66 -27
  55. hpcflow/sdk/core/types.py +54 -1
  56. hpcflow/sdk/core/utils.py +78 -7
  57. hpcflow/sdk/core/workflow.py +1538 -336
  58. hpcflow/sdk/data/workflow_spec_schema.yaml +2 -0
  59. hpcflow/sdk/demo/cli.py +7 -0
  60. hpcflow/sdk/helper/cli.py +1 -0
  61. hpcflow/sdk/log.py +42 -15
  62. hpcflow/sdk/persistence/base.py +405 -53
  63. hpcflow/sdk/persistence/json.py +177 -52
  64. hpcflow/sdk/persistence/pending.py +237 -69
  65. hpcflow/sdk/persistence/store_resource.py +3 -2
  66. hpcflow/sdk/persistence/types.py +15 -4
  67. hpcflow/sdk/persistence/zarr.py +928 -81
  68. hpcflow/sdk/submission/jobscript.py +1408 -489
  69. hpcflow/sdk/submission/schedulers/__init__.py +40 -5
  70. hpcflow/sdk/submission/schedulers/direct.py +33 -19
  71. hpcflow/sdk/submission/schedulers/sge.py +51 -16
  72. hpcflow/sdk/submission/schedulers/slurm.py +44 -16
  73. hpcflow/sdk/submission/schedulers/utils.py +7 -2
  74. hpcflow/sdk/submission/shells/base.py +68 -20
  75. hpcflow/sdk/submission/shells/bash.py +222 -129
  76. hpcflow/sdk/submission/shells/powershell.py +200 -150
  77. hpcflow/sdk/submission/submission.py +852 -119
  78. hpcflow/sdk/submission/types.py +18 -21
  79. hpcflow/sdk/typing.py +24 -5
  80. hpcflow/sdk/utils/arrays.py +71 -0
  81. hpcflow/sdk/utils/deferred_file.py +55 -0
  82. hpcflow/sdk/utils/hashing.py +16 -0
  83. hpcflow/sdk/utils/patches.py +12 -0
  84. hpcflow/sdk/utils/strings.py +33 -0
  85. hpcflow/tests/api/test_api.py +32 -0
  86. hpcflow/tests/conftest.py +19 -0
  87. hpcflow/tests/data/multi_path_sequences.yaml +29 -0
  88. hpcflow/tests/data/workflow_test_run_abort.yaml +34 -35
  89. hpcflow/tests/schedulers/sge/test_sge_submission.py +36 -0
  90. hpcflow/tests/scripts/test_input_file_generators.py +282 -0
  91. hpcflow/tests/scripts/test_main_scripts.py +821 -70
  92. hpcflow/tests/scripts/test_non_snippet_script.py +46 -0
  93. hpcflow/tests/scripts/test_ouput_file_parsers.py +353 -0
  94. hpcflow/tests/shells/wsl/test_wsl_submission.py +6 -0
  95. hpcflow/tests/unit/test_action.py +176 -0
  96. hpcflow/tests/unit/test_app.py +20 -0
  97. hpcflow/tests/unit/test_cache.py +46 -0
  98. hpcflow/tests/unit/test_cli.py +133 -0
  99. hpcflow/tests/unit/test_config.py +122 -1
  100. hpcflow/tests/unit/test_element_iteration.py +47 -0
  101. hpcflow/tests/unit/test_jobscript_unit.py +757 -0
  102. hpcflow/tests/unit/test_loop.py +1332 -27
  103. hpcflow/tests/unit/test_meta_task.py +325 -0
  104. hpcflow/tests/unit/test_multi_path_sequences.py +229 -0
  105. hpcflow/tests/unit/test_parameter.py +13 -0
  106. hpcflow/tests/unit/test_persistence.py +190 -8
  107. hpcflow/tests/unit/test_run.py +109 -3
  108. hpcflow/tests/unit/test_run_directories.py +29 -0
  109. hpcflow/tests/unit/test_shell.py +20 -0
  110. hpcflow/tests/unit/test_submission.py +5 -76
  111. hpcflow/tests/unit/utils/test_arrays.py +40 -0
  112. hpcflow/tests/unit/utils/test_deferred_file_writer.py +34 -0
  113. hpcflow/tests/unit/utils/test_hashing.py +65 -0
  114. hpcflow/tests/unit/utils/test_patches.py +5 -0
  115. hpcflow/tests/unit/utils/test_redirect_std.py +50 -0
  116. hpcflow/tests/workflows/__init__.py +0 -0
  117. hpcflow/tests/workflows/test_directory_structure.py +31 -0
  118. hpcflow/tests/workflows/test_jobscript.py +332 -0
  119. hpcflow/tests/workflows/test_run_status.py +198 -0
  120. hpcflow/tests/workflows/test_skip_downstream.py +696 -0
  121. hpcflow/tests/workflows/test_submission.py +140 -0
  122. hpcflow/tests/workflows/test_workflows.py +142 -2
  123. hpcflow/tests/workflows/test_zip.py +18 -0
  124. hpcflow/viz_demo.ipynb +6587 -3
  125. {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a199.dist-info}/METADATA +7 -4
  126. hpcflow_new2-0.2.0a199.dist-info/RECORD +221 -0
  127. hpcflow_new2-0.2.0a190.dist-info/RECORD +0 -165
  128. {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a199.dist-info}/LICENSE +0 -0
  129. {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a199.dist-info}/WHEEL +0 -0
  130. {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a199.dist-info}/entry_points.txt +0 -0
@@ -5,12 +5,23 @@ Class to hold the state that is waiting to be committed to disk.
5
5
  from __future__ import annotations
6
6
 
7
7
  from collections import defaultdict
8
+ from collections.abc import Callable
8
9
  import contextlib
9
10
  from dataclasses import dataclass, field, fields
11
+ from functools import wraps
12
+ import copy
10
13
 
11
- from typing import Any, Generic, TYPE_CHECKING
14
+ from typing import Any, Generic, TYPE_CHECKING, TypeVar
15
+ from typing_extensions import ParamSpec
16
+
17
+ import numpy as np
12
18
 
13
19
  from hpcflow.sdk.log import TimeIt
20
+ from hpcflow.sdk.submission.submission import SUBMISSION_SUBMIT_TIME_KEYS
21
+ from hpcflow.sdk.typing import DataIndex
22
+ from hpcflow.sdk.utils.strings import shorten_list_str
23
+
24
+
14
25
  from hpcflow.sdk.persistence.types import (
15
26
  AnySTask,
16
27
  AnySElement,
@@ -26,7 +37,14 @@ if TYPE_CHECKING:
26
37
  from .base import PersistentStore, FileDescriptor, LoopDescriptor
27
38
  from ..app import BaseApp
28
39
  from ..typing import ParamSource
29
- from ..core.json_like import JSONDocument
40
+ from ..core.json_like import JSONed
41
+
42
+ P = ParamSpec("P")
43
+ T = TypeVar("T")
44
+
45
+ _commit_method_data_map: dict[str, list[str]] = defaultdict(
46
+ list
47
+ ) # note: this is updated at module-import time.
30
48
 
31
49
 
32
50
  class PendingChanges(
@@ -76,7 +94,7 @@ class PendingChanges(
76
94
  #: Keys are loop IDs, values are loop descriptors.
77
95
  self.add_loops: dict[int, LoopDescriptor] = {}
78
96
  #: Keys are submission IDs, values are submission descriptors.
79
- self.add_submissions: dict[int, JSONDocument] = {}
97
+ self.add_submissions: dict[int, Mapping[str, JSONed]] = {}
80
98
  #: Keys are element IDs.
81
99
  self.add_elements: dict[int, AnySElement] = {}
82
100
  #: Keys are element iteration IDs.
@@ -100,25 +118,35 @@ class PendingChanges(
100
118
  #: Keys are element iteration IDs, then EAR action index, and values are EAR IDs.
101
119
  #: This is a list of EAR IDs to add to a given element iteration action.
102
120
  self.add_elem_iter_EAR_IDs: dict[int, dict[int, list[int]]] = {}
103
- #: Submission parts to add.
104
- self.add_submission_parts: dict[int, dict[str, list[int]]] = {}
121
+ #: Submission metadata added at submit-time, including submission parts.
122
+ self.update_at_submit_metadata: dict[int, dict[str, Any]] = {}
105
123
 
106
124
  #: IDs of EARs to mark as initialised.
107
125
  self.set_EARs_initialised: list[int] = []
108
- #: Submission IDs to attach to EARs.
109
- self.set_EAR_submission_indices: dict[int, int] = {}
126
+ #: Submission IDs and commands file IDs to attach to EARs.
127
+ self.set_EAR_submission_data: dict[int, tuple[int, int | None]] = {}
110
128
  #: IDs of EARs to mark as skipped.
111
- self.set_EAR_skips: list[int] = []
112
- #: Keys are EAR IDs and values are tuples of start time, and start dir snapshot.
113
- self.set_EAR_starts: dict[int, tuple[datetime, dict[str, Any], str]] = {}
129
+ self.set_EAR_skips: dict[int, int] = {}
130
+ #: Keys are EAR IDs and values are tuples of start time, start dir snapshot, run
131
+ #: hostname, and port number.
132
+ self.set_EAR_starts: dict[
133
+ int, tuple[datetime, dict[str, Any] | None, str, int | None]
134
+ ] = {}
114
135
  #: Keys are EAR IDs and values are tuples of end time, end dir snapshot, exit
115
136
  #: code, and success boolean.
116
- self.set_EAR_ends: dict[int, tuple[datetime, dict[str, Any], int, bool]] = {}
137
+ self.set_EAR_ends: dict[
138
+ int, tuple[datetime, dict[str, Any] | None, int, bool]
139
+ ] = {}
140
+ #: Each list item is a tuple of two arrays, the first of which is a run directory
141
+ #: indices array, and the second of which is an integer array indicating with
142
+ #: which run ID each run directory is associated.
143
+ self.set_run_dirs: list[tuple[np.ndarray, np.ndarray]] = []
117
144
 
118
145
  #: Keys are IDs of jobscripts.
119
146
  self.set_js_metadata: dict[int, dict[int, dict[str, Any]]] = {}
120
147
 
121
- #: Keys are IDs of parameters to add or modify.
148
+ #: Keys are IDs of parameters to add or modify, and values are tuples of the
149
+ #: parameter value, and whether the parameter is a file.
122
150
  self.set_parameters: dict[int, tuple[Any, bool]] = {}
123
151
 
124
152
  #: Keys are parameter indices and values are dict parameter sources to merge
@@ -131,6 +159,9 @@ class PendingChanges(
131
159
  #: Keys are indices of loops, values are list of parent names.
132
160
  self.update_loop_parents: dict[int, list[str]] = {}
133
161
 
162
+ self.update_iter_data_idx: dict[int, DataIndex] = {}
163
+ self.update_run_data_idx: dict[int, DataIndex] = {}
164
+
134
165
  self.reset(is_init=True) # set up initial data structures
135
166
 
136
167
  def __bool__(self):
@@ -145,22 +176,25 @@ class PendingChanges(
145
176
  or bool(self.add_elem_IDs)
146
177
  or bool(self.add_elem_iter_IDs)
147
178
  or bool(self.add_elem_iter_EAR_IDs)
148
- or bool(self.add_submission_parts)
179
+ or bool(self.update_at_submit_metadata)
149
180
  or bool(self.add_parameters)
150
181
  or bool(self.add_files)
151
182
  or bool(self.add_template_components)
152
183
  or bool(self.add_element_sets)
153
184
  or bool(self.set_EARs_initialised)
154
- or bool(self.set_EAR_submission_indices)
185
+ or bool(self.set_EAR_submission_data)
155
186
  or bool(self.set_EAR_starts)
156
187
  or bool(self.set_EAR_ends)
157
188
  or bool(self.set_EAR_skips)
189
+ or bool(self.set_run_dirs)
158
190
  or bool(self.set_js_metadata)
159
191
  or bool(self.set_parameters)
160
192
  or bool(self.update_param_sources)
161
193
  or bool(self.update_loop_indices)
162
194
  or bool(self.update_loop_num_iters)
163
195
  or bool(self.update_loop_parents)
196
+ or bool(self.update_iter_data_idx)
197
+ or bool(self.update_run_data_idx)
164
198
  )
165
199
 
166
200
  def where_pending(self) -> list[str]:
@@ -177,6 +211,62 @@ class PendingChanges(
177
211
  """
178
212
  return self._app.persistence_logger
179
213
 
214
+ def commits_data(*data_list: str):
215
+ """Decorator that wraps `PendingChanges.commit_*` methods with arguments listing
216
+ which `PendingChanges` attributes must have non-trivial data in them for the method's
217
+ invocation to be required.
218
+
219
+ Notes
220
+ -----
221
+ This essentially provides a mapping between `PendingChanges` attributes and
222
+ `commit_*` methods. This allows us to only open the resources that need updating
223
+ in `PendingChanges.commit_all`.
224
+
225
+ We use a decorator rather than an explicitly declaring the map in
226
+ `_commit_method_data_map` to make the mapping obvious near the commit methods, and
227
+ hopefully avoid us forgetting to update `_commit_method_data_map` when we modify
228
+ or add commit methods in future!
229
+
230
+ """
231
+
232
+ def decorator(func: Callable[P, T]) -> Callable[P, T]:
233
+
234
+ _commit_method_data_map[func.__name__].extend(data_list)
235
+
236
+ @wraps(func)
237
+ def inner(*args, **kwargs) -> T:
238
+ return func(*args, **kwargs)
239
+
240
+ return inner
241
+
242
+ return decorator
243
+
244
+ def get_pending_resource_map_groups(self) -> dict[tuple[str, ...], list[str]]:
245
+ """Retrive resource map groups, where values are filtered to include only those
246
+ commit methods that must be invoked, due to pending data associated with those
247
+ methods.
248
+
249
+ Notes
250
+ -----
251
+ This method allows us to open only those resources that need to be updated, given
252
+ the state of pending data.
253
+ """
254
+
255
+ where_pending = self.where_pending()
256
+ pending_groups = {}
257
+ for res_names, methods in self.resource_map.groups.items():
258
+ req_methods = [
259
+ meth_i
260
+ for meth_i in methods
261
+ if any(
262
+ dat_j in where_pending for dat_j in _commit_method_data_map[meth_i]
263
+ )
264
+ ]
265
+ if req_methods:
266
+ pending_groups[res_names] = req_methods
267
+
268
+ return pending_groups
269
+
180
270
  @TimeIt.decorator
181
271
  def commit_all(self) -> None:
182
272
  """Commit all pending changes to disk."""
@@ -186,11 +276,10 @@ class PendingChanges(
186
276
  self.logger.debug("commit: no pending changes to commit.")
187
277
  return
188
278
 
189
- for resources, methods in self.resource_map.groups.items():
279
+ for resources, methods in self.get_pending_resource_map_groups().items():
190
280
  # for each resource, enter `using_resource` context manager in "update" mode:
191
281
  with contextlib.ExitStack() as stack:
192
282
  for res in resources:
193
- # TODO: only enter required resources!
194
283
  stack.enter_context(
195
284
  self.store.using_resource(res, "update") # type: ignore[call-overload]
196
285
  )
@@ -200,6 +289,7 @@ class PendingChanges(
200
289
  assert not (self)
201
290
 
202
291
  @TimeIt.decorator
292
+ @commits_data("add_tasks")
203
293
  def commit_tasks(self) -> None:
204
294
  """Commit pending tasks to disk."""
205
295
  if self.add_tasks:
@@ -215,6 +305,7 @@ class PendingChanges(
215
305
  self._clear_add_tasks()
216
306
 
217
307
  @TimeIt.decorator
308
+ @commits_data("add_loops")
218
309
  def commit_loops(self) -> None:
219
310
  """Commit pending loops to disk."""
220
311
  if self.add_loops:
@@ -236,11 +327,14 @@ class PendingChanges(
236
327
  self._clear_add_loops()
237
328
 
238
329
  @TimeIt.decorator
330
+ @commits_data("add_submissions")
239
331
  def commit_submissions(self) -> None:
240
332
  """Commit pending submissions to disk."""
241
333
  if self.add_submissions:
242
334
  # retrieve pending submissions:
243
- subs = self.store.get_submissions_by_ID(self.add_submissions)
335
+ subs = self.store.get_submissions_by_ID(
336
+ self.add_submissions
337
+ ) # TODO: I think this just returns add_submissions?
244
338
  sub_ids = set(self.add_submissions)
245
339
  self.logger.debug(
246
340
  f"commit: adding pending submissions with indices {sub_ids!r}"
@@ -249,16 +343,18 @@ class PendingChanges(
249
343
  self._clear_add_submissions()
250
344
 
251
345
  @TimeIt.decorator
252
- def commit_submission_parts(self) -> None:
346
+ @commits_data("update_at_submit_metadata")
347
+ def commit_at_submit_metadata(self) -> None:
253
348
  """
254
- Commit pending submission parts to disk.
349
+ Commit to disk pending at-submit-time metadata, including submission parts.
255
350
  """
256
- if self.add_submission_parts:
257
- self.logger.debug("commit: adding pending submission parts")
258
- self.store._append_submission_parts(self.add_submission_parts)
259
- self._clear_add_submission_parts()
351
+ if self.update_at_submit_metadata:
352
+ self.logger.debug("commit: adding pending at-submit metadata")
353
+ self.store._update_at_submit_metadata(self.update_at_submit_metadata)
354
+ self._clear_at_submit_metadata()
260
355
 
261
356
  @TimeIt.decorator
357
+ @commits_data("add_elem_IDs")
262
358
  def commit_elem_IDs(self) -> None:
263
359
  """
264
360
  Commit pending element ID updates to disk.
@@ -273,6 +369,7 @@ class PendingChanges(
273
369
  self._clear_add_elem_IDs()
274
370
 
275
371
  @TimeIt.decorator
372
+ @commits_data("add_elements")
276
373
  def commit_elements(self) -> None:
277
374
  """
278
375
  Commit pending elements to disk.
@@ -289,6 +386,7 @@ class PendingChanges(
289
386
  self._clear_add_elements()
290
387
 
291
388
  @TimeIt.decorator
389
+ @commits_data("add_element_sets")
292
390
  def commit_element_sets(self) -> None:
293
391
  """
294
392
  Commit pending element sets to disk.
@@ -300,6 +398,7 @@ class PendingChanges(
300
398
  self._clear_add_element_sets()
301
399
 
302
400
  @TimeIt.decorator
401
+ @commits_data("add_elem_iter_IDs")
303
402
  def commit_elem_iter_IDs(self) -> None:
304
403
  """
305
404
  Commit pending element iteration ID updates to disk.
@@ -315,6 +414,7 @@ class PendingChanges(
315
414
  self._clear_add_elem_iter_IDs()
316
415
 
317
416
  @TimeIt.decorator
417
+ @commits_data("add_elem_iters")
318
418
  def commit_elem_iters(self) -> None:
319
419
  """
320
420
  Commit pending element iterations to disk.
@@ -327,9 +427,14 @@ class PendingChanges(
327
427
  )
328
428
  self.store._append_elem_iters(iters)
329
429
  # pending EAR IDs that belong to pending iters are now committed:
330
- self.add_elem_iter_EAR_IDs = {
331
- k: v for k, v in self.add_elem_iter_EAR_IDs.items() if k not in iter_ids
332
- }
430
+ add_elem_iter_EAR_IDs_cur = copy.deepcopy(self.add_elem_iter_EAR_IDs)
431
+ self._clear_add_elem_iter_EAR_IDs() # reset to empty nested defaultdict
432
+ for iter_id, all_run_IDs in add_elem_iter_EAR_IDs_cur.items():
433
+ # only re-assign iter_IDs that have not been comitted above:
434
+ if iter_id not in iter_ids:
435
+ for act_idx, run_IDs in all_run_IDs.items():
436
+ self.add_elem_iter_EAR_IDs[iter_id][act_idx].extend(run_IDs)
437
+
333
438
  # pending EARs_initialised that belong to pending iters are now committed:
334
439
  self.set_EARs_initialised = [
335
440
  i for i in self.set_EARs_initialised if i not in iter_ids
@@ -337,6 +442,7 @@ class PendingChanges(
337
442
  self._clear_add_elem_iters()
338
443
 
339
444
  @TimeIt.decorator
445
+ @commits_data("add_elem_iter_EAR_IDs")
340
446
  def commit_elem_iter_EAR_IDs(self) -> None:
341
447
  """
342
448
  Commit pending element action run ID updates to disk.
@@ -353,6 +459,7 @@ class PendingChanges(
353
459
  self._clear_add_elem_iter_EAR_IDs()
354
460
 
355
461
  @TimeIt.decorator
462
+ @commits_data("add_EARs")
356
463
  def commit_EARs(self) -> None:
357
464
  """
358
465
  Commit pending element action runs to disk.
@@ -365,12 +472,12 @@ class PendingChanges(
365
472
  self.store.num_EARs_cache = None # invalidate cache
366
473
  # pending start/end times/snapshots, submission indices, and skips that belong
367
474
  # to pending EARs are now committed (accounted for in `get_EARs` above):
368
- self.set_EAR_submission_indices = {
369
- k: v
370
- for k, v in self.set_EAR_submission_indices.items()
371
- if k not in EAR_ids
475
+ self.set_EAR_submission_data = {
476
+ k: v for k, v in self.set_EAR_submission_data.items() if k not in EAR_ids
477
+ }
478
+ self.set_EAR_skips = {
479
+ k: v for k, v in self.set_EAR_skips.items() if k not in EAR_ids
372
480
  }
373
- self.set_EAR_skips = [i for i in self.set_EAR_skips if i not in EAR_ids]
374
481
  self.set_EAR_starts = {
375
482
  k: v for k, v in self.set_EAR_starts.items() if k not in EAR_ids
376
483
  }
@@ -381,6 +488,17 @@ class PendingChanges(
381
488
  self._clear_add_EARs()
382
489
 
383
490
  @TimeIt.decorator
491
+ @commits_data("set_run_dirs")
492
+ def commit_set_run_dirs(self) -> None:
493
+ """
494
+ Commit pending run directory indices.
495
+ """
496
+ for run_dir_arr, run_idx in self.set_run_dirs:
497
+ self.store._set_run_dirs(run_dir_arr, run_idx)
498
+ self._clear_set_run_dirs()
499
+
500
+ @TimeIt.decorator
501
+ @commits_data("set_EARs_initialised")
384
502
  def commit_EARs_initialised(self) -> None:
385
503
  """
386
504
  Commit pending element action run init state updates to disk.
@@ -398,63 +516,71 @@ class PendingChanges(
398
516
  self._clear_set_EARs_initialised()
399
517
 
400
518
  @TimeIt.decorator
519
+ @commits_data("set_EAR_submission_data")
401
520
  def commit_EAR_submission_indices(self) -> None:
402
521
  """
403
522
  Commit pending element action run submission index updates to disk.
404
523
  """
405
- if self.set_EAR_submission_indices:
524
+ if self.set_EAR_submission_data:
406
525
  self.logger.debug(
407
- f"commit: updating submission indices: "
408
- f"{self.set_EAR_submission_indices!r}."
526
+ f"commit: updating submission data: {self.set_EAR_submission_data!r}."
409
527
  )
410
- self.store._update_EAR_submission_indices(self.set_EAR_submission_indices)
411
- for EAR_ID_i in self.set_EAR_submission_indices:
528
+ self.store._update_EAR_submission_data(self.set_EAR_submission_data)
529
+ for EAR_ID_i in self.set_EAR_submission_data:
412
530
  self.store.EAR_cache.pop(EAR_ID_i, None) # invalidate cache
413
- self._clear_set_EAR_submission_indices()
531
+ self._clear_EAR_submission_data()
414
532
 
415
533
  @TimeIt.decorator
534
+ @commits_data("set_EAR_starts")
416
535
  def commit_EAR_starts(self) -> None:
417
536
  """
418
537
  Commit pending element action run start information to disk.
419
538
  """
420
- # TODO: could be batched up?
421
- for EAR_id, (time, snap, hostname) in self.set_EAR_starts.items():
539
+ updates = self.set_EAR_starts
540
+ if updates:
422
541
  self.logger.debug(
423
- f"commit: adding pending start time ({time!r}), run hostname "
424
- f"({hostname!r}), and directory snapshot to EAR ID {EAR_id!r}."
542
+ f"commit: registering {len(updates)} run(s) as started: "
543
+ f"{shorten_list_str(updates)}."
425
544
  )
426
- self.store._update_EAR_start(EAR_id, time, snap, hostname)
427
- self.store.EAR_cache.pop(EAR_id, None) # invalidate cache
545
+ self.store._update_EAR_start(updates)
546
+ for run_id in updates:
547
+ self.store.EAR_cache.pop(run_id, None) # invalidate cache
428
548
  self._clear_set_EAR_starts()
429
549
 
430
550
  @TimeIt.decorator
551
+ @commits_data("set_EAR_ends")
431
552
  def commit_EAR_ends(self) -> None:
432
553
  """
433
554
  Commit pending element action run finish information to disk.
434
555
  """
435
- # TODO: could be batched up?
436
- for EAR_id, (time, snap, ext, suc) in self.set_EAR_ends.items():
556
+ updates = self.set_EAR_ends
557
+ if updates:
437
558
  self.logger.debug(
438
- f"commit: adding pending end time ({time!r}), directory snapshot, "
439
- f"exit code ({ext!r}), and success status {suc!r} to EAR ID {EAR_id!r}."
559
+ f"commit: registering {len(updates)} run(s) as ended: "
560
+ f"{shorten_list_str(updates)}, with exit codes: "
561
+ f"{shorten_list_str([i[2] for i in updates.values()])}."
440
562
  )
441
- self.store._update_EAR_end(EAR_id, time, snap, ext, suc)
442
- self.store.EAR_cache.pop(EAR_id, None) # invalidate cache
563
+ self.store._update_EAR_end(updates)
564
+ for run_id in updates:
565
+ self.store.EAR_cache.pop(run_id, None) # invalidate cache
443
566
  self._clear_set_EAR_ends()
444
567
 
445
568
  @TimeIt.decorator
569
+ @commits_data("set_EAR_skips")
446
570
  def commit_EAR_skips(self) -> None:
447
571
  """
448
572
  Commit pending element action skip flags to disk.
449
573
  """
450
- # TODO: could be batched up?
451
- for EAR_id in self.set_EAR_skips:
452
- self.logger.debug(f"commit: setting EAR ID {EAR_id!r} as skipped.")
453
- self.store._update_EAR_skip(EAR_id)
454
- self.store.EAR_cache.pop(EAR_id, None) # invalidate cache
574
+ updates = self.set_EAR_skips
575
+ if updates:
576
+ self.logger.debug(f"commit: setting {len(updates)} run IDs as skipped.")
577
+ self.store._update_EAR_skip(updates)
578
+ for run_ID in updates:
579
+ self.store.EAR_cache.pop(run_ID, None) # invalidate cache
455
580
  self._clear_set_EAR_skips()
456
581
 
457
582
  @TimeIt.decorator
583
+ @commits_data("set_js_metadata")
458
584
  def commit_js_metadata(self) -> None:
459
585
  """
460
586
  Commit pending jobscript metadata changes to disk.
@@ -467,6 +593,7 @@ class PendingChanges(
467
593
  self._clear_set_js_metadata()
468
594
 
469
595
  @TimeIt.decorator
596
+ @commits_data("add_parameters", "set_parameters")
470
597
  def commit_parameters(self) -> None:
471
598
  """Make pending parameters persistent."""
472
599
  if self.add_parameters:
@@ -485,6 +612,7 @@ class PendingChanges(
485
612
  self._clear_set_parameters()
486
613
 
487
614
  @TimeIt.decorator
615
+ @commits_data("add_files")
488
616
  def commit_files(self) -> None:
489
617
  """Add pending files to the files directory."""
490
618
  if self.add_files:
@@ -493,6 +621,7 @@ class PendingChanges(
493
621
  self._clear_add_files()
494
622
 
495
623
  @TimeIt.decorator
624
+ @commits_data("add_template_components")
496
625
  def commit_template_components(self) -> None:
497
626
  """
498
627
  Commit pending template components to disk.
@@ -503,6 +632,7 @@ class PendingChanges(
503
632
  self._clear_add_template_components()
504
633
 
505
634
  @TimeIt.decorator
635
+ @commits_data("update_param_sources")
506
636
  def commit_param_sources(self) -> None:
507
637
  """Make pending changes to parameter sources persistent."""
508
638
  if self.update_param_sources:
@@ -514,19 +644,21 @@ class PendingChanges(
514
644
  self._clear_update_param_sources()
515
645
 
516
646
  @TimeIt.decorator
647
+ @commits_data("update_loop_indices")
517
648
  def commit_loop_indices(self) -> None:
518
649
  """Make pending update to element iteration loop indices persistent."""
519
- # TODO: batch up
520
- for iter_ID, loop_idx in self.update_loop_indices.items():
650
+ updates = self.update_loop_indices
651
+ if updates:
521
652
  self.logger.debug(
522
- f"commit: updating loop indices of iteration ID {iter_ID!r} with "
523
- f"{loop_idx!r}."
653
+ f"commit: updating loop indices of {len(updates)} iteration(s)."
524
654
  )
525
- self.store._update_loop_index(iter_ID, loop_idx)
526
- self.store.element_iter_cache.pop(iter_ID, None) # invalidate cache
655
+ self.store._update_loop_index(updates)
656
+ for iter_ID in updates:
657
+ self.store.element_iter_cache.pop(iter_ID, None) # invalidate cache
527
658
  self._clear_update_loop_indices()
528
659
 
529
660
  @TimeIt.decorator
661
+ @commits_data("update_loop_num_iters")
530
662
  def commit_loop_num_iters(self) -> None:
531
663
  """Make pending update to the number of loop iterations."""
532
664
  for index, num_iters in self.update_loop_num_iters.items():
@@ -537,6 +669,7 @@ class PendingChanges(
537
669
  self._clear_update_loop_num_iters()
538
670
 
539
671
  @TimeIt.decorator
672
+ @commits_data("update_loop_parents")
540
673
  def commit_loop_parents(self) -> None:
541
674
  """Make pending update to additional loop parents."""
542
675
  for index, parents in self.update_loop_parents.items():
@@ -544,6 +677,20 @@ class PendingChanges(
544
677
  self.store._update_loop_parents(index, parents)
545
678
  self._clear_update_loop_parents()
546
679
 
680
+ @TimeIt.decorator
681
+ @commits_data("update_iter_data_idx")
682
+ def commit_iter_data_idx(self) -> None:
683
+ if self.update_iter_data_idx:
684
+ self.store._update_iter_data_indices(self.update_iter_data_idx)
685
+ self._clear_update_iter_data_idx()
686
+
687
+ @TimeIt.decorator
688
+ @commits_data("update_run_data_idx")
689
+ def commit_run_data_idx(self) -> None:
690
+ if self.update_run_data_idx:
691
+ self.store._update_run_data_indices(self.update_run_data_idx)
692
+ self._clear_update_run_data_idx()
693
+
547
694
  def _clear_add_tasks(self) -> None:
548
695
  self.add_tasks = {}
549
696
 
@@ -553,8 +700,10 @@ class PendingChanges(
553
700
  def _clear_add_submissions(self) -> None:
554
701
  self.add_submissions = {}
555
702
 
556
- def _clear_add_submission_parts(self) -> None:
557
- self.add_submission_parts = defaultdict(dict)
703
+ def _clear_at_submit_metadata(self) -> None:
704
+ self.update_at_submit_metadata = defaultdict(
705
+ lambda: {i: None for i in SUBMISSION_SUBMIT_TIME_KEYS}
706
+ )
558
707
 
559
708
  def _clear_add_elements(self) -> None:
560
709
  self.add_elements = {}
@@ -568,6 +717,9 @@ class PendingChanges(
568
717
  def _clear_add_EARs(self) -> None:
569
718
  self.add_EARs = {}
570
719
 
720
+ def _clear_set_run_dirs(self):
721
+ self.set_run_dirs = []
722
+
571
723
  def _clear_add_elem_IDs(self) -> None:
572
724
  self.add_elem_IDs = defaultdict(list)
573
725
 
@@ -580,8 +732,8 @@ class PendingChanges(
580
732
  def _clear_set_EARs_initialised(self) -> None:
581
733
  self.set_EARs_initialised = []
582
734
 
583
- def _clear_set_EAR_submission_indices(self) -> None:
584
- self.set_EAR_submission_indices = {}
735
+ def _clear_EAR_submission_data(self) -> None:
736
+ self.set_EAR_submission_data = {}
585
737
 
586
738
  def _clear_set_EAR_starts(self) -> None:
587
739
  self.set_EAR_starts = {}
@@ -590,7 +742,7 @@ class PendingChanges(
590
742
  self.set_EAR_ends = {}
591
743
 
592
744
  def _clear_set_EAR_skips(self) -> None:
593
- self.set_EAR_skips = []
745
+ self.set_EAR_skips = {}
594
746
 
595
747
  def _clear_set_js_metadata(self) -> None:
596
748
  self.set_js_metadata = defaultdict(lambda: defaultdict(dict))
@@ -619,6 +771,12 @@ class PendingChanges(
619
771
  def _clear_update_loop_parents(self) -> None:
620
772
  self.update_loop_parents = {}
621
773
 
774
+ def _clear_update_iter_data_idx(self):
775
+ self.update_iter_data_idx = defaultdict(dict)
776
+
777
+ def _clear_update_run_data_idx(self):
778
+ self.update_run_data_idx = defaultdict(dict)
779
+
622
780
  def reset(self, is_init: bool = False) -> None:
623
781
  """Clear all pending data and prepare to accept new pending data."""
624
782
 
@@ -632,11 +790,12 @@ class PendingChanges(
632
790
  self._clear_add_tasks()
633
791
  self._clear_add_loops()
634
792
  self._clear_add_submissions()
635
- self._clear_add_submission_parts()
793
+ self._clear_at_submit_metadata()
636
794
  self._clear_add_elements()
637
795
  self._clear_add_element_sets()
638
796
  self._clear_add_elem_iters()
639
797
  self._clear_add_EARs()
798
+ self._clear_set_run_dirs()
640
799
 
641
800
  self._clear_set_EARs_initialised()
642
801
  self._clear_add_elem_IDs()
@@ -647,7 +806,7 @@ class PendingChanges(
647
806
  self._clear_add_files()
648
807
  self._clear_add_template_components()
649
808
 
650
- self._clear_set_EAR_submission_indices()
809
+ self._clear_EAR_submission_data()
651
810
  self._clear_set_EAR_starts()
652
811
  self._clear_set_EAR_ends()
653
812
  self._clear_set_EAR_skips()
@@ -659,6 +818,8 @@ class PendingChanges(
659
818
  self._clear_update_loop_indices()
660
819
  self._clear_update_loop_num_iters()
661
820
  self._clear_update_loop_parents()
821
+ self._clear_update_iter_data_idx()
822
+ self._clear_update_run_data_idx()
662
823
 
663
824
 
664
825
  @dataclass
@@ -682,8 +843,8 @@ class CommitResourceMap:
682
843
  commit_loops: tuple[str, ...] | None = tuple()
683
844
  #: Resources for :py:meth:`~.PendingChanges.commit_submissions`.
684
845
  commit_submissions: tuple[str, ...] | None = tuple()
685
- #: Resources for :py:meth:`~.PendingChanges.commit_submission_parts`.
686
- commit_submission_parts: tuple[str, ...] | None = tuple()
846
+ #: Resources for :py:meth:`~.PendingChanges.commit_at_submit_metadata`.
847
+ commit_at_submit_metadata: tuple[str, ...] | None = tuple()
687
848
  #: Resources for :py:meth:`~.PendingChanges.commit_elem_IDs`.
688
849
  commit_elem_IDs: tuple[str, ...] | None = tuple()
689
850
  #: Resources for :py:meth:`~.PendingChanges.commit_elements`.
@@ -724,6 +885,13 @@ class CommitResourceMap:
724
885
  commit_loop_num_iters: tuple[str, ...] | None = tuple()
725
886
  #: Resources for :py:meth:`~.PendingChanges.commit_loop_parents`.
726
887
  commit_loop_parents: tuple[str, ...] | None = tuple()
888
+ #: Resources for :py:meth:`~.PendingChanges.commit_set_run_dirs`.
889
+ commit_set_run_dirs: tuple[str, ...] | None = tuple()
890
+ #: Resources for :py:meth:`~.PendingChanges.commit_iter_data_idx`.
891
+ commit_iter_data_idx: tuple[str, ...] | None = tuple()
892
+ #: Resources for :py:meth:`~.PendingChanges.commit_run_data_idx`.
893
+ commit_run_data_idx: tuple[str, ...] | None = tuple()
894
+
727
895
  #: A dict whose keys are tuples of resource labels and whose values are lists
728
896
  #: of :py:class:`PendingChanges` commit method names that require those resources.
729
897
  #:
@@ -66,6 +66,9 @@ class StoreResource(ABC):
66
66
  action:
67
67
  What we are opening the store for; typically either ``read`` or ``update``.
68
68
  """
69
+
70
+ # TODO: some tests?
71
+
69
72
  if action == "read":
70
73
  # reuse "update" data if set, rather than re-loading from disk -- but copy,
71
74
  # so changes made in the "read" scope do not update!
@@ -167,8 +170,6 @@ class JSONFileStoreResource(StoreResource):
167
170
 
168
171
  def _dump(self, data: Mapping | list):
169
172
  self.logger.debug(f"{self!r}: dumping JSON to file")
170
- if isinstance(data, dict) and "runs" in data:
171
- self.logger.debug(f"...runs: {data['runs']}")
172
173
  with self.fs.open(self._full_path, mode="wt") as fp:
173
174
  json.dump(data, fp, indent=2)
174
175