hpcflow-new2 0.2.0a176__py3-none-any.whl → 0.2.0a177__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hpcflow/_version.py +1 -1
- hpcflow/sdk/core/cache.py +142 -0
- hpcflow/sdk/core/element.py +7 -0
- hpcflow/sdk/core/loop.py +105 -84
- hpcflow/sdk/core/loop_cache.py +140 -0
- hpcflow/sdk/core/task.py +29 -24
- hpcflow/sdk/core/utils.py +11 -1
- hpcflow/sdk/core/workflow.py +65 -22
- hpcflow/sdk/persistence/base.py +16 -3
- hpcflow/sdk/persistence/json.py +11 -4
- hpcflow/sdk/persistence/pending.py +2 -0
- hpcflow/sdk/persistence/zarr.py +8 -1
- hpcflow/tests/unit/test_utils.py +21 -0
- {hpcflow_new2-0.2.0a176.dist-info → hpcflow_new2-0.2.0a177.dist-info}/METADATA +1 -1
- {hpcflow_new2-0.2.0a176.dist-info → hpcflow_new2-0.2.0a177.dist-info}/RECORD +17 -15
- {hpcflow_new2-0.2.0a176.dist-info → hpcflow_new2-0.2.0a177.dist-info}/WHEEL +0 -0
- {hpcflow_new2-0.2.0a176.dist-info → hpcflow_new2-0.2.0a177.dist-info}/entry_points.txt +0 -0
hpcflow/_version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.2.
|
1
|
+
__version__ = "0.2.0a177"
|
@@ -0,0 +1,142 @@
|
|
1
|
+
from collections import defaultdict
|
2
|
+
from dataclasses import dataclass
|
3
|
+
from typing import Set, Dict
|
4
|
+
|
5
|
+
from hpcflow.sdk.log import TimeIt
|
6
|
+
|
7
|
+
|
8
|
+
@dataclass
|
9
|
+
class DependencyCache:
|
10
|
+
"""Class to bulk-retrieve dependencies between elements, iterations, and runs."""
|
11
|
+
|
12
|
+
run_dependencies: Dict[int, Set]
|
13
|
+
run_dependents: Dict[int, Set]
|
14
|
+
iter_run_dependencies: Dict[int, Set]
|
15
|
+
iter_iter_dependencies: Dict[int, Set]
|
16
|
+
elem_iter_dependencies: Dict[int, Set]
|
17
|
+
elem_elem_dependencies: Dict[int, Set]
|
18
|
+
elem_elem_dependents: Dict[int, Set]
|
19
|
+
elem_elem_dependents_rec: Dict[int, Set]
|
20
|
+
|
21
|
+
elements: Dict
|
22
|
+
iterations: Dict
|
23
|
+
|
24
|
+
@classmethod
|
25
|
+
@TimeIt.decorator
|
26
|
+
def build(cls, workflow):
|
27
|
+
num_iters = workflow.num_element_iterations
|
28
|
+
num_elems = workflow.num_elements
|
29
|
+
num_runs = workflow.num_EARs
|
30
|
+
|
31
|
+
all_store_runs = workflow._store.get_EARs(list(range(num_runs)))
|
32
|
+
all_store_iters = workflow._store.get_element_iterations(list(range(num_iters)))
|
33
|
+
all_store_elements = workflow._store.get_elements(list(range(num_elems)))
|
34
|
+
all_param_sources = workflow.get_all_parameter_sources()
|
35
|
+
all_data_idx = [
|
36
|
+
{
|
37
|
+
k: v if isinstance(v, list) else [v]
|
38
|
+
for k, v in i.data_idx.items()
|
39
|
+
if k not in ("repeats.",)
|
40
|
+
}
|
41
|
+
for i in all_store_runs
|
42
|
+
]
|
43
|
+
|
44
|
+
# run dependencies and dependents
|
45
|
+
run_dependencies = {}
|
46
|
+
run_dependents = defaultdict(set)
|
47
|
+
for idx, i in enumerate(all_data_idx):
|
48
|
+
run_i_sources = set()
|
49
|
+
for j in i.values():
|
50
|
+
for k in j:
|
51
|
+
run_k = all_param_sources[k].get("EAR_ID")
|
52
|
+
if run_k is not None and run_k != idx:
|
53
|
+
run_i_sources.add(run_k)
|
54
|
+
run_dependencies[idx] = run_i_sources
|
55
|
+
for m in run_i_sources:
|
56
|
+
run_dependents[m].add(idx)
|
57
|
+
|
58
|
+
# add missing:
|
59
|
+
for k in range(num_runs):
|
60
|
+
run_dependents[k]
|
61
|
+
|
62
|
+
run_dependents = dict(run_dependents)
|
63
|
+
|
64
|
+
# iteration dependencies
|
65
|
+
all_iter_run_IDs = {
|
66
|
+
i.id_: [k for j in i.EAR_IDs.values() for k in j] for i in all_store_iters
|
67
|
+
}
|
68
|
+
# for each iteration, which runs does it depend on?
|
69
|
+
iter_run_dependencies = {
|
70
|
+
k: set(j for i in v for j in run_dependencies[i])
|
71
|
+
for k, v in all_iter_run_IDs.items()
|
72
|
+
}
|
73
|
+
|
74
|
+
# for each run, which iteration does it belong to?
|
75
|
+
all_run_iter_IDs = {}
|
76
|
+
for iter_ID, run_IDs in all_iter_run_IDs.items():
|
77
|
+
for run_ID in run_IDs:
|
78
|
+
all_run_iter_IDs[run_ID] = iter_ID
|
79
|
+
|
80
|
+
# for each iteration, which iterations does it depend on?
|
81
|
+
iter_iter_dependencies = {
|
82
|
+
k: set(all_run_iter_IDs[i] for i in v)
|
83
|
+
for k, v in iter_run_dependencies.items()
|
84
|
+
}
|
85
|
+
|
86
|
+
all_elem_iter_IDs = {i.id_: i.iteration_IDs for i in all_store_elements}
|
87
|
+
|
88
|
+
elem_iter_dependencies = {
|
89
|
+
k: set(j for i in v for j in iter_iter_dependencies[i])
|
90
|
+
for k, v in all_elem_iter_IDs.items()
|
91
|
+
}
|
92
|
+
|
93
|
+
# for each iteration, which element does it belong to?
|
94
|
+
all_iter_elem_IDs = {}
|
95
|
+
for elem_ID, iter_IDs in all_elem_iter_IDs.items():
|
96
|
+
for iter_ID in iter_IDs:
|
97
|
+
all_iter_elem_IDs[iter_ID] = elem_ID
|
98
|
+
|
99
|
+
# element dependencies
|
100
|
+
elem_elem_dependencies = {
|
101
|
+
k: set(all_iter_elem_IDs[i] for i in v)
|
102
|
+
for k, v in elem_iter_dependencies.items()
|
103
|
+
}
|
104
|
+
|
105
|
+
# for each element, which elements depend on it (directly)?
|
106
|
+
elem_elem_dependents = defaultdict(set)
|
107
|
+
for k, v in elem_elem_dependencies.items():
|
108
|
+
for i in v:
|
109
|
+
elem_elem_dependents[i].add(k)
|
110
|
+
|
111
|
+
# for each element, which elements depend on it (recursively)?
|
112
|
+
elem_elem_dependents_rec = defaultdict(set)
|
113
|
+
for k in list(elem_elem_dependents):
|
114
|
+
for i in elem_elem_dependents[k]:
|
115
|
+
elem_elem_dependents_rec[k].add(i)
|
116
|
+
elem_elem_dependents_rec[k].update(
|
117
|
+
{m for m in elem_elem_dependents[i] if m != k}
|
118
|
+
)
|
119
|
+
|
120
|
+
# add missing keys:
|
121
|
+
for k in range(num_elems):
|
122
|
+
elem_elem_dependents[k]
|
123
|
+
elem_elem_dependents_rec[k]
|
124
|
+
|
125
|
+
elem_elem_dependents = dict(elem_elem_dependents)
|
126
|
+
elem_elem_dependents_rec = dict(elem_elem_dependents_rec)
|
127
|
+
|
128
|
+
elements = workflow.get_all_elements()
|
129
|
+
iterations = workflow.get_all_element_iterations()
|
130
|
+
|
131
|
+
return cls(
|
132
|
+
run_dependencies=run_dependencies,
|
133
|
+
run_dependents=run_dependents,
|
134
|
+
iter_run_dependencies=iter_run_dependencies,
|
135
|
+
iter_iter_dependencies=iter_iter_dependencies,
|
136
|
+
elem_iter_dependencies=elem_iter_dependencies,
|
137
|
+
elem_elem_dependencies=elem_elem_dependencies,
|
138
|
+
elem_elem_dependents=elem_elem_dependents,
|
139
|
+
elem_elem_dependents_rec=elem_elem_dependents_rec,
|
140
|
+
elements=elements,
|
141
|
+
iterations=iterations,
|
142
|
+
)
|
hpcflow/sdk/core/element.py
CHANGED
@@ -675,6 +675,7 @@ class ElementIteration:
|
|
675
675
|
default=default,
|
676
676
|
)
|
677
677
|
|
678
|
+
@TimeIt.decorator
|
678
679
|
def get_EAR_dependencies(
|
679
680
|
self,
|
680
681
|
as_objects: Optional[bool] = False,
|
@@ -708,6 +709,7 @@ class ElementIteration:
|
|
708
709
|
out = self.workflow.get_EARs_from_IDs(out)
|
709
710
|
return out
|
710
711
|
|
712
|
+
@TimeIt.decorator
|
711
713
|
def get_element_iteration_dependencies(
|
712
714
|
self, as_objects: bool = False
|
713
715
|
) -> List[Union[int, app.ElementIteration]]:
|
@@ -719,6 +721,7 @@ class ElementIteration:
|
|
719
721
|
out = self.workflow.get_element_iterations_from_IDs(out)
|
720
722
|
return out
|
721
723
|
|
724
|
+
@TimeIt.decorator
|
722
725
|
def get_element_dependencies(
|
723
726
|
self,
|
724
727
|
as_objects: Optional[bool] = False,
|
@@ -769,6 +772,7 @@ class ElementIteration:
|
|
769
772
|
|
770
773
|
return out
|
771
774
|
|
775
|
+
@TimeIt.decorator
|
772
776
|
def get_dependent_EARs(
|
773
777
|
self, as_objects: bool = False
|
774
778
|
) -> List[Union[int, app.ElementActionRun]]:
|
@@ -793,6 +797,7 @@ class ElementIteration:
|
|
793
797
|
|
794
798
|
return deps
|
795
799
|
|
800
|
+
@TimeIt.decorator
|
796
801
|
def get_dependent_element_iterations(
|
797
802
|
self, as_objects: bool = False
|
798
803
|
) -> List[Union[int, app.ElementIteration]]:
|
@@ -816,6 +821,7 @@ class ElementIteration:
|
|
816
821
|
|
817
822
|
return deps
|
818
823
|
|
824
|
+
@TimeIt.decorator
|
819
825
|
def get_dependent_elements(
|
820
826
|
self,
|
821
827
|
as_objects: bool = False,
|
@@ -1246,6 +1252,7 @@ class Element:
|
|
1246
1252
|
"""Get tasks that depend on the most recent iteration of this element."""
|
1247
1253
|
return self.latest_iteration.get_dependent_tasks(as_objects=as_objects)
|
1248
1254
|
|
1255
|
+
@TimeIt.decorator
|
1249
1256
|
def get_dependent_elements_recursively(self, task_insert_ID=None):
|
1250
1257
|
"""Get downstream elements that depend on this element, including recursive
|
1251
1258
|
dependencies.
|
hpcflow/sdk/core/loop.py
CHANGED
@@ -6,9 +6,11 @@ from typing import Dict, List, Optional, Tuple, Union
|
|
6
6
|
from hpcflow.sdk import app
|
7
7
|
from hpcflow.sdk.core.errors import LoopTaskSubsetError
|
8
8
|
from hpcflow.sdk.core.json_like import ChildObjectSpec, JSONLike
|
9
|
+
from hpcflow.sdk.core.loop_cache import LoopCache
|
9
10
|
from hpcflow.sdk.core.parameters import InputSourceType
|
10
11
|
from hpcflow.sdk.core.task import WorkflowTask
|
11
|
-
from hpcflow.sdk.core.utils import check_valid_py_identifier
|
12
|
+
from hpcflow.sdk.core.utils import check_valid_py_identifier, nth_key, nth_value
|
13
|
+
from hpcflow.sdk.log import TimeIt
|
12
14
|
|
13
15
|
# from .parameters import Parameter
|
14
16
|
|
@@ -198,6 +200,7 @@ class WorkflowLoop:
|
|
198
200
|
|
199
201
|
self._validate()
|
200
202
|
|
203
|
+
@TimeIt.decorator
|
201
204
|
def _validate(self):
|
202
205
|
# task subset must be a contiguous range of task indices:
|
203
206
|
task_indices = self.task_indices
|
@@ -328,6 +331,7 @@ class WorkflowLoop:
|
|
328
331
|
return self.workflow.tasks[: self.task_objects[0].index]
|
329
332
|
|
330
333
|
@staticmethod
|
334
|
+
@TimeIt.decorator
|
331
335
|
def _find_iterable_parameters(loop_template: app.Loop):
|
332
336
|
all_inputs_first_idx = {}
|
333
337
|
all_outputs_idx = {}
|
@@ -355,18 +359,19 @@ class WorkflowLoop:
|
|
355
359
|
return iterable_params
|
356
360
|
|
357
361
|
@classmethod
|
362
|
+
@TimeIt.decorator
|
358
363
|
def new_empty_loop(
|
359
364
|
cls,
|
360
365
|
index: int,
|
361
366
|
workflow: app.Workflow,
|
362
367
|
template: app.Loop,
|
363
|
-
|
368
|
+
iter_loop_idx: List[Dict],
|
364
369
|
) -> Tuple[app.WorkflowLoop, List[Dict[str, int]]]:
|
365
370
|
parent_loops = cls._get_parent_loops(index, workflow, template)
|
366
371
|
parent_names = [i.name for i in parent_loops]
|
367
372
|
num_added_iters = {}
|
368
|
-
for
|
369
|
-
num_added_iters[tuple([
|
373
|
+
for i in iter_loop_idx:
|
374
|
+
num_added_iters[tuple([i[j] for j in parent_names])] = 1
|
370
375
|
|
371
376
|
obj = cls(
|
372
377
|
index=index,
|
@@ -379,6 +384,7 @@ class WorkflowLoop:
|
|
379
384
|
return obj
|
380
385
|
|
381
386
|
@classmethod
|
387
|
+
@TimeIt.decorator
|
382
388
|
def _get_parent_loops(
|
383
389
|
cls,
|
384
390
|
index: int,
|
@@ -399,12 +405,14 @@ class WorkflowLoop:
|
|
399
405
|
parents.append(loop_i)
|
400
406
|
return parents
|
401
407
|
|
408
|
+
@TimeIt.decorator
|
402
409
|
def get_parent_loops(self) -> List[app.WorkflowLoop]:
|
403
410
|
"""Get loops whose task subset is a superset of this loop's task subset. If two
|
404
411
|
loops have identical task subsets, the first loop in the workflow loop list is
|
405
412
|
considered the child."""
|
406
413
|
return self._get_parent_loops(self.index, self.workflow, self.template)
|
407
414
|
|
415
|
+
@TimeIt.decorator
|
408
416
|
def get_child_loops(self) -> List[app.WorkflowLoop]:
|
409
417
|
"""Get loops whose task subset is a subset of this loop's task subset. If two
|
410
418
|
loops have identical task subsets, the first loop in the workflow loop list is
|
@@ -426,10 +434,12 @@ class WorkflowLoop:
|
|
426
434
|
children = sorted(children, key=lambda x: len(next(iter(x.num_added_iterations))))
|
427
435
|
return children
|
428
436
|
|
429
|
-
|
437
|
+
@TimeIt.decorator
|
438
|
+
def add_iteration(self, parent_loop_indices=None, cache: Optional[LoopCache] = None):
|
439
|
+
if not cache:
|
440
|
+
cache = LoopCache.build(self.workflow)
|
430
441
|
parent_loops = self.get_parent_loops()
|
431
442
|
child_loops = self.get_child_loops()
|
432
|
-
child_loop_names = [i.name for i in child_loops]
|
433
443
|
parent_loop_indices = parent_loop_indices or {}
|
434
444
|
if parent_loops and not parent_loop_indices:
|
435
445
|
parent_loop_indices = {i.name: 0 for i in parent_loops}
|
@@ -458,24 +468,19 @@ class WorkflowLoop:
|
|
458
468
|
if task.insert_ID in child.task_insert_IDs
|
459
469
|
},
|
460
470
|
}
|
471
|
+
added_iter_IDs = []
|
461
472
|
for elem_idx in range(task.num_elements):
|
462
|
-
|
463
|
-
|
464
|
-
|
473
|
+
|
474
|
+
elem_ID = task.element_IDs[elem_idx]
|
475
|
+
|
465
476
|
new_data_idx = {}
|
466
|
-
existing_inners = []
|
467
|
-
for iter_i in element.iterations:
|
468
|
-
if iter_i.loop_idx[self.name] == cur_loop_idx:
|
469
|
-
existing_inner_i = {
|
470
|
-
k: v
|
471
|
-
for k, v in iter_i.loop_idx.items()
|
472
|
-
if k in child_loop_names
|
473
|
-
}
|
474
|
-
if existing_inner_i:
|
475
|
-
existing_inners.append(existing_inner_i)
|
476
477
|
|
477
478
|
# copy resources from zeroth iteration:
|
478
|
-
|
479
|
+
zeroth_iter_ID, zi_iter_data_idx = cache.zeroth_iters[elem_ID]
|
480
|
+
zi_elem_ID, zi_idx = cache.iterations[zeroth_iter_ID]
|
481
|
+
zi_data_idx = nth_value(cache.data_idx[zi_elem_ID], zi_idx)
|
482
|
+
|
483
|
+
for key, val in zi_data_idx.items():
|
479
484
|
if key.startswith("resources."):
|
480
485
|
new_data_idx[key] = val
|
481
486
|
|
@@ -493,41 +498,47 @@ class WorkflowLoop:
|
|
493
498
|
# identify element(s) from which this iterable input should be
|
494
499
|
# parametrised:
|
495
500
|
if task.insert_ID == iter_dat["output_tasks"][-1]:
|
496
|
-
|
501
|
+
src_elem_ID = elem_ID
|
497
502
|
grouped_elems = None
|
498
503
|
else:
|
499
|
-
|
500
|
-
|
501
|
-
|
504
|
+
src_elem_IDs_all = cache.element_dependents[elem_ID]
|
505
|
+
src_elem_IDs = {
|
506
|
+
k: v
|
507
|
+
for k, v in src_elem_IDs_all.items()
|
508
|
+
if cache.elements[k]["task_insert_ID"]
|
509
|
+
== iter_dat["output_tasks"][-1]
|
510
|
+
}
|
502
511
|
# consider groups
|
503
512
|
inp_group_name = inp.single_labelled_data.get("group")
|
504
513
|
grouped_elems = []
|
505
|
-
for
|
514
|
+
for src_elem_j_ID, src_elem_j_dat in src_elem_IDs.items():
|
506
515
|
i_in_group = any(
|
507
|
-
|
516
|
+
k == inp_group_name
|
517
|
+
for k in src_elem_j_dat["group_names"]
|
508
518
|
)
|
509
519
|
if i_in_group:
|
510
|
-
grouped_elems.append(
|
520
|
+
grouped_elems.append(src_elem_j_ID)
|
511
521
|
|
512
|
-
if not grouped_elems and len(
|
522
|
+
if not grouped_elems and len(src_elem_IDs) > 1:
|
513
523
|
raise NotImplementedError(
|
514
|
-
f"Multiple elements found in the iterable parameter
|
515
|
-
f" latest output task (insert ID: "
|
516
|
-
f"{iter_dat['output_tasks'][-1]}) that can be used
|
517
|
-
f"parametrise the next iteration:
|
524
|
+
f"Multiple elements found in the iterable parameter "
|
525
|
+
f"{inp!r}'s latest output task (insert ID: "
|
526
|
+
f"{iter_dat['output_tasks'][-1]}) that can be used "
|
527
|
+
f"to parametrise the next iteration: "
|
528
|
+
f"{list(src_elem_IDs.keys())!r}."
|
518
529
|
)
|
519
530
|
|
520
|
-
elif not
|
531
|
+
elif not src_elem_IDs:
|
521
532
|
# TODO: maybe OK?
|
522
533
|
raise NotImplementedError(
|
523
|
-
f"No elements found in the iterable parameter
|
524
|
-
f" latest output task (insert ID: "
|
525
|
-
f"{iter_dat['output_tasks'][-1]}) that can be used
|
526
|
-
f"parametrise the next iteration."
|
534
|
+
f"No elements found in the iterable parameter "
|
535
|
+
f"{inp!r}'s latest output task (insert ID: "
|
536
|
+
f"{iter_dat['output_tasks'][-1]}) that can be used "
|
537
|
+
f"to parametrise the next iteration."
|
527
538
|
)
|
528
539
|
|
529
540
|
else:
|
530
|
-
|
541
|
+
src_elem_ID = nth_key(src_elem_IDs, 0)
|
531
542
|
|
532
543
|
child_loop_max_iters = {}
|
533
544
|
parent_loop_same_iters = {
|
@@ -553,76 +564,69 @@ class WorkflowLoop:
|
|
553
564
|
|
554
565
|
# identify the ElementIteration from which this input should be
|
555
566
|
# parametrised:
|
556
|
-
|
567
|
+
loop_idx_key = tuple(sorted(source_iter_loop_idx.items()))
|
557
568
|
if grouped_elems:
|
558
|
-
|
559
|
-
for
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
break
|
569
|
+
src_data_idx = []
|
570
|
+
for src_elem_ID in grouped_elems:
|
571
|
+
src_data_idx.append(
|
572
|
+
cache.data_idx[src_elem_ID][loop_idx_key]
|
573
|
+
)
|
564
574
|
else:
|
565
|
-
|
566
|
-
if iter_i.loop_idx == source_iter_loop_idx:
|
567
|
-
source_iter = iter_i
|
568
|
-
break
|
575
|
+
src_data_idx = cache.data_idx[src_elem_ID][loop_idx_key]
|
569
576
|
|
570
|
-
if not
|
577
|
+
if not src_data_idx:
|
571
578
|
raise RuntimeError(
|
572
579
|
f"Could not find a source iteration with loop_idx: "
|
573
580
|
f"{source_iter_loop_idx!r}."
|
574
581
|
)
|
575
582
|
|
576
583
|
if grouped_elems:
|
577
|
-
inp_dat_idx = [
|
578
|
-
i.get_data_idx()[f"outputs.{inp.typ}"]
|
579
|
-
for i in source_iter
|
580
|
-
]
|
584
|
+
inp_dat_idx = [i[f"outputs.{inp.typ}"] for i in src_data_idx]
|
581
585
|
else:
|
582
|
-
inp_dat_idx =
|
586
|
+
inp_dat_idx = src_data_idx[f"outputs.{inp.typ}"]
|
583
587
|
new_data_idx[f"inputs.{inp.typ}"] = inp_dat_idx
|
584
588
|
|
585
589
|
else:
|
586
590
|
inp_key = f"inputs.{inp.typ}"
|
587
591
|
|
588
|
-
orig_inp_src =
|
592
|
+
orig_inp_src = cache.elements[elem_ID]["input_sources"][inp_key]
|
589
593
|
inp_dat_idx = None
|
590
594
|
|
591
595
|
if orig_inp_src.source_type is InputSourceType.LOCAL:
|
592
596
|
# keep locally defined inputs from original element
|
593
|
-
inp_dat_idx =
|
597
|
+
inp_dat_idx = zi_data_idx[inp_key]
|
594
598
|
|
595
599
|
elif orig_inp_src.source_type is InputSourceType.DEFAULT:
|
596
600
|
# keep default value from original element
|
597
|
-
inp_dat_idx_iter_0 = element.iterations[0].get_data_idx()
|
598
601
|
try:
|
599
|
-
inp_dat_idx =
|
602
|
+
inp_dat_idx = zi_data_idx[inp_key]
|
600
603
|
except KeyError:
|
601
604
|
# if this input is required by a conditional action, and
|
602
605
|
# that condition is not met, then this input will not
|
603
606
|
# exist in the action-run data index, so use the initial
|
604
607
|
# iteration data index:
|
605
|
-
inp_dat_idx =
|
608
|
+
inp_dat_idx = zi_iter_data_idx[inp_key]
|
606
609
|
|
607
610
|
elif orig_inp_src.source_type is InputSourceType.TASK:
|
608
611
|
if orig_inp_src.task_ref not in self.task_insert_IDs:
|
609
|
-
# TODO: what about groups?
|
610
612
|
# source the data_idx from the iteration with same parent
|
611
613
|
# loop indices as the new iteration to add:
|
612
|
-
src_iters = []
|
613
|
-
|
614
|
+
# src_iters = []
|
615
|
+
src_data_idx = []
|
616
|
+
for li_k, di_k in cache.data_idx[elem_ID].items():
|
614
617
|
skip_iter = False
|
618
|
+
li_k_dct = dict(li_k)
|
615
619
|
for p_k, p_v in parent_loop_indices.items():
|
616
|
-
if
|
620
|
+
if li_k_dct.get(p_k) != p_v:
|
617
621
|
skip_iter = True
|
618
622
|
break
|
619
623
|
if not skip_iter:
|
620
|
-
|
624
|
+
src_data_idx.append(di_k)
|
621
625
|
|
622
626
|
# could be multiple, but they should all have the same
|
623
627
|
# data index for this parameter:
|
624
|
-
|
625
|
-
inp_dat_idx =
|
628
|
+
src_data_idx = src_data_idx[0]
|
629
|
+
inp_dat_idx = src_data_idx[inp_key]
|
626
630
|
else:
|
627
631
|
is_group = False
|
628
632
|
if (
|
@@ -645,19 +649,24 @@ class WorkflowLoop:
|
|
645
649
|
# find which element in that task `element`
|
646
650
|
# depends on:
|
647
651
|
task_i = self.workflow.tasks.get(insert_ID=tiID)
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
652
|
+
elem_i_ID = task_i.element_IDs[e_idx]
|
653
|
+
src_elem_IDs_all = cache.element_dependents[
|
654
|
+
elem_i_ID
|
655
|
+
]
|
656
|
+
src_elem_IDs_i = {
|
657
|
+
k: v
|
658
|
+
for k, v in src_elem_IDs_all.items()
|
659
|
+
if cache.elements[k]["task_insert_ID"]
|
660
|
+
== task.insert_ID
|
661
|
+
}
|
662
|
+
|
663
|
+
# filter src_elem_IDs_i for matching element IDs:
|
664
|
+
src_elem_IDs_i = [
|
665
|
+
i for i in src_elem_IDs_i if i == elem_ID
|
657
666
|
]
|
658
667
|
if (
|
659
|
-
len(
|
660
|
-
and
|
668
|
+
len(src_elem_IDs_i) == 1
|
669
|
+
and src_elem_IDs_i[0] == elem_ID
|
661
670
|
):
|
662
671
|
new_sources.append((tiID, e_idx))
|
663
672
|
|
@@ -680,10 +689,11 @@ class WorkflowLoop:
|
|
680
689
|
new_data_idx[inp_key] = inp_dat_idx
|
681
690
|
|
682
691
|
# add any locally defined sub-parameters:
|
692
|
+
inp_statuses = cache.elements[elem_ID]["input_statuses"]
|
683
693
|
inp_status_inps = set([f"inputs.{i}" for i in inp_statuses])
|
684
694
|
sub_params = inp_status_inps - set(new_data_idx.keys())
|
685
695
|
for sub_param_i in sub_params:
|
686
|
-
sub_param_data_idx_iter_0 =
|
696
|
+
sub_param_data_idx_iter_0 = zi_data_idx
|
687
697
|
try:
|
688
698
|
sub_param_data_idx = sub_param_data_idx_iter_0[sub_param_i]
|
689
699
|
except KeyError:
|
@@ -691,7 +701,7 @@ class WorkflowLoop:
|
|
691
701
|
# and that condition is not met, then this input will not exist in
|
692
702
|
# the action-run data index, so use the initial iteration data
|
693
703
|
# index:
|
694
|
-
sub_param_data_idx =
|
704
|
+
sub_param_data_idx = zi_data_idx[sub_param_i]
|
695
705
|
|
696
706
|
new_data_idx[sub_param_i] = sub_param_data_idx
|
697
707
|
|
@@ -703,16 +713,26 @@ class WorkflowLoop:
|
|
703
713
|
schema_params = set(
|
704
714
|
i for i in new_data_idx.keys() if len(i.split(".")) == 2
|
705
715
|
)
|
706
|
-
all_new_data_idx[(task.insert_ID,
|
716
|
+
all_new_data_idx[(task.insert_ID, elem_idx)] = new_data_idx
|
707
717
|
|
708
718
|
iter_ID_i = self.workflow._store.add_element_iteration(
|
709
|
-
element_ID=
|
719
|
+
element_ID=elem_ID,
|
710
720
|
data_idx=new_data_idx,
|
711
721
|
schema_parameters=list(schema_params),
|
712
722
|
loop_idx=new_loop_idx,
|
713
723
|
)
|
724
|
+
if cache:
|
725
|
+
cache.add_iteration(
|
726
|
+
iter_ID=iter_ID_i,
|
727
|
+
task_insert_ID=task.insert_ID,
|
728
|
+
element_ID=elem_ID,
|
729
|
+
loop_idx=new_loop_idx,
|
730
|
+
data_idx=new_data_idx,
|
731
|
+
)
|
714
732
|
|
715
|
-
|
733
|
+
added_iter_IDs.append(iter_ID_i)
|
734
|
+
|
735
|
+
task.initialise_EARs(iter_IDs=added_iter_IDs)
|
716
736
|
|
717
737
|
added_iters_key = tuple(parent_loop_indices[k] for k in self.parents)
|
718
738
|
self._increment_pending_added_iters(added_iters_key)
|
@@ -731,7 +751,8 @@ class WorkflowLoop:
|
|
731
751
|
**par_idx,
|
732
752
|
**parent_loop_indices,
|
733
753
|
self.name: cur_loop_idx + 1,
|
734
|
-
}
|
754
|
+
},
|
755
|
+
cache=cache,
|
735
756
|
)
|
736
757
|
|
737
758
|
def test_termination(self, element_iter):
|
@@ -0,0 +1,140 @@
|
|
1
|
+
from dataclasses import dataclass
|
2
|
+
from collections import defaultdict
|
3
|
+
from typing import Dict, List, Optional, Tuple
|
4
|
+
|
5
|
+
from hpcflow.sdk import app
|
6
|
+
from hpcflow.sdk.core.utils import nth_key
|
7
|
+
from hpcflow.sdk.log import TimeIt
|
8
|
+
from hpcflow.sdk.core.cache import DependencyCache
|
9
|
+
|
10
|
+
|
11
|
+
@dataclass
|
12
|
+
class LoopCache:
|
13
|
+
"""Class to store a cache for use in `Workflow.add_empty_loop` and
|
14
|
+
`WorkflowLoop.add_iterations`.
|
15
|
+
|
16
|
+
Attributes
|
17
|
+
----------
|
18
|
+
element_dependents
|
19
|
+
Keys are element IDs, values are dicts whose keys are element IDs that depend on
|
20
|
+
the key element ID (via `Element.get_dependent_elements_recursively`), and whose
|
21
|
+
values are dicts with keys: `group_names`, which is a tuple of the string group
|
22
|
+
names associated with the dependent element's element set.
|
23
|
+
elements
|
24
|
+
Keys are element IDs, values are dicts with keys: `input_statuses`,
|
25
|
+
`input_sources`, and `task_insert_ID`.
|
26
|
+
zeroth_iters
|
27
|
+
Keys are element IDs, values are data associated with the zeroth iteration of that
|
28
|
+
element, namely a tuple of iteration ID and `ElementIteration.data_idx`.
|
29
|
+
data_idx
|
30
|
+
Keys are element IDs, values are data associated with all iterations of that
|
31
|
+
element, namely a dict whose keys are the iteration loop index as a tuple, and
|
32
|
+
whose values are data indices via `ElementIteration.get_data_idx()`.
|
33
|
+
iterations
|
34
|
+
Keys are iteration IDs, values are tuples of element ID and iteration index within
|
35
|
+
that element.
|
36
|
+
task_iterations
|
37
|
+
Keys are task insert IDs, values are list of all iteration IDs associated with
|
38
|
+
that task.
|
39
|
+
|
40
|
+
"""
|
41
|
+
|
42
|
+
element_dependents: Dict[int, Dict]
|
43
|
+
elements: Dict[int, Dict]
|
44
|
+
zeroth_iters: Dict[int, Tuple]
|
45
|
+
data_idx: Dict[int, Dict]
|
46
|
+
iterations: Dict[int, Tuple]
|
47
|
+
task_iterations: Dict[int, List[int]]
|
48
|
+
|
49
|
+
@TimeIt.decorator
|
50
|
+
def get_iter_IDs(self, loop: "app.Loop") -> List[int]:
|
51
|
+
"""Retrieve a list of iteration IDs belonging to a given loop."""
|
52
|
+
return [j for i in loop.task_insert_IDs for j in self.task_iterations[i]]
|
53
|
+
|
54
|
+
@TimeIt.decorator
|
55
|
+
def get_iter_loop_indices(self, iter_IDs: List[int]) -> List[Dict[str, int]]:
|
56
|
+
iter_loop_idx = []
|
57
|
+
for i in iter_IDs:
|
58
|
+
elem_id, idx = self.iterations[i]
|
59
|
+
iter_loop_idx.append(dict(nth_key(self.data_idx[elem_id], idx)))
|
60
|
+
return iter_loop_idx
|
61
|
+
|
62
|
+
@TimeIt.decorator
|
63
|
+
def update_loop_indices(self, new_loop_name: str, iter_IDs: List[int]):
|
64
|
+
elem_ids = {v[0] for k, v in self.iterations.items() if k in iter_IDs}
|
65
|
+
for i in elem_ids:
|
66
|
+
new_item = {}
|
67
|
+
for k, v in self.data_idx[i].items():
|
68
|
+
new_k = dict(k)
|
69
|
+
new_k.update({new_loop_name: 0})
|
70
|
+
new_item[tuple(sorted(new_k.items()))] = v
|
71
|
+
self.data_idx[i] = new_item
|
72
|
+
|
73
|
+
@TimeIt.decorator
|
74
|
+
def add_iteration(self, iter_ID, task_insert_ID, element_ID, loop_idx, data_idx):
|
75
|
+
"""Update the cache to include a newly added iteration."""
|
76
|
+
self.task_iterations[task_insert_ID].append(iter_ID)
|
77
|
+
new_iter_idx = len(self.data_idx[element_ID])
|
78
|
+
self.data_idx[element_ID][tuple(sorted(loop_idx.items()))] = data_idx
|
79
|
+
self.iterations[iter_ID] = (element_ID, new_iter_idx)
|
80
|
+
|
81
|
+
@classmethod
|
82
|
+
@TimeIt.decorator
|
83
|
+
def build(cls, workflow: "app.Workflow", loops: Optional[List["app.Loop"]] = None):
|
84
|
+
"""Build a cache of data for use in adding loops and iterations."""
|
85
|
+
|
86
|
+
deps_cache = DependencyCache.build(workflow)
|
87
|
+
|
88
|
+
loops = list(workflow.template.loops) + (loops or [])
|
89
|
+
task_iIDs = set(j for i in loops for j in i.task_insert_IDs)
|
90
|
+
tasks = [workflow.tasks.get(insert_ID=i) for i in sorted(task_iIDs)]
|
91
|
+
elem_deps = {}
|
92
|
+
|
93
|
+
# keys: element IDs, values: dict with keys: tuple(loop_idx), values: data index
|
94
|
+
data_idx_cache = {}
|
95
|
+
|
96
|
+
# keys: iteration IDs, values: tuple of (element ID, integer index into values
|
97
|
+
# dict in `data_idx_cache` [accessed via `.keys()[index]`])
|
98
|
+
iters = {}
|
99
|
+
|
100
|
+
# keys: element IDs, values: dict with keys: "input_statues", "input_sources",
|
101
|
+
# "task_insert_ID":
|
102
|
+
elements = {}
|
103
|
+
|
104
|
+
zeroth_iters = {}
|
105
|
+
task_iterations = defaultdict(list)
|
106
|
+
for task in tasks:
|
107
|
+
for elem_idx in task.element_IDs:
|
108
|
+
element = deps_cache.elements[elem_idx]
|
109
|
+
inp_statuses = task.template.get_input_statuses(element.element_set)
|
110
|
+
elements[element.id_] = {
|
111
|
+
"input_statuses": inp_statuses,
|
112
|
+
"input_sources": element.input_sources,
|
113
|
+
"task_insert_ID": task.insert_ID,
|
114
|
+
}
|
115
|
+
elem_deps[element.id_] = {
|
116
|
+
i: {
|
117
|
+
"group_names": tuple(
|
118
|
+
j.name for j in deps_cache.elements[i].element_set.groups
|
119
|
+
),
|
120
|
+
}
|
121
|
+
for i in deps_cache.elem_elem_dependents_rec[element.id_]
|
122
|
+
}
|
123
|
+
elem_iters = {}
|
124
|
+
for idx, iter_i in enumerate(element.iterations):
|
125
|
+
if idx == 0:
|
126
|
+
zeroth_iters[element.id_] = (iter_i.id_, iter_i.data_idx)
|
127
|
+
loop_idx_key = tuple(sorted(iter_i.loop_idx.items()))
|
128
|
+
elem_iters[loop_idx_key] = iter_i.get_data_idx()
|
129
|
+
task_iterations[task.insert_ID].append(iter_i.id_)
|
130
|
+
iters[iter_i.id_] = (element.id_, idx)
|
131
|
+
data_idx_cache[element.id_] = elem_iters
|
132
|
+
|
133
|
+
return cls(
|
134
|
+
element_dependents=elem_deps,
|
135
|
+
elements=elements,
|
136
|
+
zeroth_iters=zeroth_iters,
|
137
|
+
data_idx=data_idx_cache,
|
138
|
+
iterations=iters,
|
139
|
+
task_iterations=dict(task_iterations),
|
140
|
+
)
|
hpcflow/sdk/core/task.py
CHANGED
@@ -2062,29 +2062,36 @@ class WorkflowTask:
|
|
2062
2062
|
return element_dat_idx
|
2063
2063
|
|
2064
2064
|
@TimeIt.decorator
|
2065
|
-
def initialise_EARs(self) -> List[int]:
|
2065
|
+
def initialise_EARs(self, iter_IDs: Optional[List[int]] = None) -> List[int]:
|
2066
2066
|
"""Try to initialise any uninitialised EARs of this task."""
|
2067
|
+
if iter_IDs:
|
2068
|
+
iters = self.workflow.get_element_iterations_from_IDs(iter_IDs)
|
2069
|
+
else:
|
2070
|
+
iters = []
|
2071
|
+
for element in self.elements:
|
2072
|
+
# We don't yet cache Element objects, so `element`, and also it's
|
2073
|
+
# `ElementIterations, are transient. So there is no reason to update these
|
2074
|
+
# objects in memory to account for the new EARs. Subsequent calls to
|
2075
|
+
# `WorkflowTask.elements` will retrieve correct element data from the
|
2076
|
+
# store. This might need changing once/if we start caching Element
|
2077
|
+
# objects.
|
2078
|
+
iters.extend(element.iterations)
|
2079
|
+
|
2067
2080
|
initialised = []
|
2068
|
-
for
|
2069
|
-
|
2070
|
-
|
2071
|
-
|
2072
|
-
|
2073
|
-
|
2074
|
-
|
2075
|
-
|
2076
|
-
|
2077
|
-
|
2078
|
-
|
2079
|
-
|
2080
|
-
|
2081
|
-
|
2082
|
-
f"UnsetParameterDataError raised: cannot yet initialise runs."
|
2083
|
-
)
|
2084
|
-
pass
|
2085
|
-
else:
|
2086
|
-
iter_i._EARs_initialised = True
|
2087
|
-
self.workflow.set_EARs_initialised(iter_i.id_)
|
2081
|
+
for iter_i in iters:
|
2082
|
+
if not iter_i.EARs_initialised:
|
2083
|
+
try:
|
2084
|
+
self._initialise_element_iter_EARs(iter_i)
|
2085
|
+
initialised.append(iter_i.id_)
|
2086
|
+
except UnsetParameterDataError:
|
2087
|
+
# raised by `Action.test_rules`; cannot yet initialise EARs
|
2088
|
+
self.app.logger.debug(
|
2089
|
+
f"UnsetParameterDataError raised: cannot yet initialise runs."
|
2090
|
+
)
|
2091
|
+
pass
|
2092
|
+
else:
|
2093
|
+
iter_i._EARs_initialised = True
|
2094
|
+
self.workflow.set_EARs_initialised(iter_i.id_)
|
2088
2095
|
return initialised
|
2089
2096
|
|
2090
2097
|
@TimeIt.decorator
|
@@ -2097,7 +2104,6 @@ class WorkflowTask:
|
|
2097
2104
|
param_src_updates = {}
|
2098
2105
|
|
2099
2106
|
count = 0
|
2100
|
-
# TODO: generator is an IO op here, can be pre-calculated/cached?
|
2101
2107
|
for act_idx, action in self.template.all_schema_actions():
|
2102
2108
|
log_common = (
|
2103
2109
|
f"for action {act_idx} of element iteration {element_iter.index} of "
|
@@ -2151,8 +2157,7 @@ class WorkflowTask:
|
|
2151
2157
|
metadata={},
|
2152
2158
|
)
|
2153
2159
|
|
2154
|
-
|
2155
|
-
self.workflow._store.update_param_source(pid, src)
|
2160
|
+
self.workflow._store.update_param_source(param_src_updates)
|
2156
2161
|
|
2157
2162
|
@TimeIt.decorator
|
2158
2163
|
def _add_element_set(self, element_set):
|
hpcflow/sdk/core/utils.py
CHANGED
@@ -3,7 +3,7 @@ import enum
|
|
3
3
|
from functools import wraps
|
4
4
|
import contextlib
|
5
5
|
import hashlib
|
6
|
-
from itertools import accumulate
|
6
|
+
from itertools import accumulate, islice
|
7
7
|
import json
|
8
8
|
import keyword
|
9
9
|
import os
|
@@ -871,3 +871,13 @@ def dict_values_process_flat(d, callable):
|
|
871
871
|
out[k] = proc_idx_k
|
872
872
|
|
873
873
|
return out
|
874
|
+
|
875
|
+
|
876
|
+
def nth_key(dct, n):
|
877
|
+
it = iter(dct)
|
878
|
+
next(islice(it, n, n), None)
|
879
|
+
return next(it)
|
880
|
+
|
881
|
+
|
882
|
+
def nth_value(dct, n):
|
883
|
+
return dct[nth_key(dct, n)]
|
hpcflow/sdk/core/workflow.py
CHANGED
@@ -25,6 +25,7 @@ from hpcflow.sdk.core import (
|
|
25
25
|
ABORT_EXIT_CODE,
|
26
26
|
)
|
27
27
|
from hpcflow.sdk.core.actions import EARStatus
|
28
|
+
from hpcflow.sdk.core.loop_cache import LoopCache
|
28
29
|
from hpcflow.sdk.log import TimeIt
|
29
30
|
from hpcflow.sdk.persistence import store_cls_from_str, DEFAULT_STORE_FORMAT
|
30
31
|
from hpcflow.sdk.persistence.base import TEMPLATE_COMP_TYPES, AnySEAR
|
@@ -41,6 +42,7 @@ from hpcflow.sdk.submission.schedulers.direct import DirectScheduler
|
|
41
42
|
from hpcflow.sdk.typing import PathLike
|
42
43
|
from hpcflow.sdk.core.json_like import ChildObjectSpec, JSONLike
|
43
44
|
from .utils import (
|
45
|
+
nth_key,
|
44
46
|
read_JSON_file,
|
45
47
|
read_JSON_string,
|
46
48
|
read_YAML_str,
|
@@ -625,19 +627,28 @@ class Workflow:
|
|
625
627
|
)
|
626
628
|
with wk._store.cached_load():
|
627
629
|
with wk.batch_update(is_workflow_creation=True):
|
628
|
-
|
630
|
+
with wk._store.cache_ctx():
|
631
|
+
for idx, task in enumerate(template.tasks):
|
632
|
+
if status:
|
633
|
+
status.update(
|
634
|
+
f"Adding task {idx + 1}/{len(template.tasks)} "
|
635
|
+
f"({task.name!r})..."
|
636
|
+
)
|
637
|
+
wk._add_task(task)
|
629
638
|
if status:
|
630
639
|
status.update(
|
631
|
-
f"
|
632
|
-
f"({task.name!r})..."
|
640
|
+
f"Preparing to add {len(template.loops)} loops..."
|
633
641
|
)
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
642
|
+
if template.loops:
|
643
|
+
# TODO: if loop with non-initialisable actions, will fail
|
644
|
+
cache = LoopCache.build(workflow=wk, loops=template.loops)
|
645
|
+
for idx, loop in enumerate(template.loops):
|
646
|
+
if status:
|
647
|
+
status.update(
|
648
|
+
f"Adding loop {idx + 1}/"
|
649
|
+
f"{len(template.loops)} ({loop.name!r})"
|
650
|
+
)
|
651
|
+
wk._add_loop(loop, cache=cache, status=status)
|
641
652
|
except Exception:
|
642
653
|
if status:
|
643
654
|
status.stop()
|
@@ -1101,7 +1112,7 @@ class Workflow:
|
|
1101
1112
|
|
1102
1113
|
@TimeIt.decorator
|
1103
1114
|
def _add_empty_loop(
|
1104
|
-
self, loop: app.Loop
|
1115
|
+
self, loop: app.Loop, cache: LoopCache
|
1105
1116
|
) -> Tuple[app.WorkflowLoop, List[app.ElementIteration]]:
|
1106
1117
|
"""Add a new loop (zeroth iterations only) to the workflow."""
|
1107
1118
|
|
@@ -1114,15 +1125,15 @@ class Workflow:
|
|
1114
1125
|
self.template._add_empty_loop(loop_c)
|
1115
1126
|
|
1116
1127
|
# all these element iterations will be initialised for the new loop:
|
1117
|
-
|
1118
|
-
|
1128
|
+
iter_IDs = cache.get_iter_IDs(loop_c)
|
1129
|
+
iter_loop_idx = cache.get_iter_loop_indices(iter_IDs)
|
1119
1130
|
|
1120
1131
|
# create and insert a new WorkflowLoop:
|
1121
1132
|
new_loop = self.app.WorkflowLoop.new_empty_loop(
|
1122
1133
|
index=new_index,
|
1123
1134
|
workflow=self,
|
1124
1135
|
template=loop_c,
|
1125
|
-
|
1136
|
+
iter_loop_idx=iter_loop_idx,
|
1126
1137
|
)
|
1127
1138
|
self.loops.add_object(new_loop)
|
1128
1139
|
wk_loop = self.loops[new_index]
|
@@ -1144,15 +1155,28 @@ class Workflow:
|
|
1144
1155
|
|
1145
1156
|
self._pending["loops"].append(new_index)
|
1146
1157
|
|
1158
|
+
# update cache loop indices:
|
1159
|
+
cache.update_loop_indices(new_loop_name=loop_c.name, iter_IDs=iter_IDs)
|
1160
|
+
|
1147
1161
|
return wk_loop
|
1148
1162
|
|
1149
1163
|
@TimeIt.decorator
|
1150
|
-
def _add_loop(
|
1151
|
-
|
1164
|
+
def _add_loop(
|
1165
|
+
self, loop: app.Loop, cache: Optional[Dict] = None, status: Optional[Any] = None
|
1166
|
+
) -> None:
|
1167
|
+
if not cache:
|
1168
|
+
cache = LoopCache.build(workflow=self, loops=[loop])
|
1169
|
+
new_wk_loop = self._add_empty_loop(loop, cache)
|
1152
1170
|
if loop.num_iterations is not None:
|
1153
1171
|
# fixed number of iterations, so add remaining N > 0 iterations:
|
1154
|
-
|
1155
|
-
|
1172
|
+
if status:
|
1173
|
+
status_prev = status.status
|
1174
|
+
for iter_idx in range(loop.num_iterations - 1):
|
1175
|
+
if status:
|
1176
|
+
status.update(
|
1177
|
+
f"{status_prev}: iteration {iter_idx + 2}/{loop.num_iterations}."
|
1178
|
+
)
|
1179
|
+
new_wk_loop.add_iteration(cache=cache)
|
1156
1180
|
|
1157
1181
|
def add_loop(self, loop: app.Loop) -> None:
|
1158
1182
|
"""Add a loop to a subset of workflow tasks."""
|
@@ -1326,6 +1350,7 @@ class Workflow:
|
|
1326
1350
|
iters.append(iter_i)
|
1327
1351
|
return iters
|
1328
1352
|
|
1353
|
+
@TimeIt.decorator
|
1329
1354
|
def get_elements_from_IDs(self, id_lst: Iterable[int]) -> List[app.Element]:
|
1330
1355
|
"""Return element objects from a list of IDs."""
|
1331
1356
|
|
@@ -1334,6 +1359,7 @@ class Workflow:
|
|
1334
1359
|
task_IDs = [i.task_ID for i in store_elems]
|
1335
1360
|
store_tasks = self._store.get_tasks_by_IDs(task_IDs)
|
1336
1361
|
|
1362
|
+
element_idx_by_task = defaultdict(set)
|
1337
1363
|
index_paths = []
|
1338
1364
|
for el, tk in zip(store_elems, store_tasks):
|
1339
1365
|
elem_idx = tk.element_IDs.index(el.id_)
|
@@ -1343,15 +1369,23 @@ class Workflow:
|
|
1343
1369
|
"task_idx": tk.index,
|
1344
1370
|
}
|
1345
1371
|
)
|
1372
|
+
element_idx_by_task[tk.index].add(elem_idx)
|
1373
|
+
|
1374
|
+
elements_by_task = {}
|
1375
|
+
for task_idx, elem_idx in element_idx_by_task.items():
|
1376
|
+
task = self.tasks[task_idx]
|
1377
|
+
elements_by_task[task_idx] = dict(
|
1378
|
+
zip(elem_idx, task.elements[list(elem_idx)])
|
1379
|
+
)
|
1346
1380
|
|
1347
1381
|
objs = []
|
1348
1382
|
for idx_dat in index_paths:
|
1349
|
-
|
1350
|
-
elem = task.elements[idx_dat["elem_idx"]]
|
1383
|
+
elem = elements_by_task[idx_dat["task_idx"]][idx_dat["elem_idx"]]
|
1351
1384
|
objs.append(elem)
|
1352
1385
|
|
1353
1386
|
return objs
|
1354
1387
|
|
1388
|
+
@TimeIt.decorator
|
1355
1389
|
def get_element_iterations_from_IDs(
|
1356
1390
|
self, id_lst: Iterable[int]
|
1357
1391
|
) -> List[app.ElementIteration]:
|
@@ -1365,6 +1399,8 @@ class Workflow:
|
|
1365
1399
|
task_IDs = [i.task_ID for i in store_elems]
|
1366
1400
|
store_tasks = self._store.get_tasks_by_IDs(task_IDs)
|
1367
1401
|
|
1402
|
+
element_idx_by_task = defaultdict(set)
|
1403
|
+
|
1368
1404
|
index_paths = []
|
1369
1405
|
for it, el, tk in zip(store_iters, store_elems, store_tasks):
|
1370
1406
|
iter_idx = el.iteration_IDs.index(it.id_)
|
@@ -1376,11 +1412,18 @@ class Workflow:
|
|
1376
1412
|
"task_idx": tk.index,
|
1377
1413
|
}
|
1378
1414
|
)
|
1415
|
+
element_idx_by_task[tk.index].add(elem_idx)
|
1416
|
+
|
1417
|
+
elements_by_task = {}
|
1418
|
+
for task_idx, elem_idx in element_idx_by_task.items():
|
1419
|
+
task = self.tasks[task_idx]
|
1420
|
+
elements_by_task[task_idx] = dict(
|
1421
|
+
zip(elem_idx, task.elements[list(elem_idx)])
|
1422
|
+
)
|
1379
1423
|
|
1380
1424
|
objs = []
|
1381
1425
|
for idx_dat in index_paths:
|
1382
|
-
|
1383
|
-
elem = task.elements[idx_dat["elem_idx"]]
|
1426
|
+
elem = elements_by_task[idx_dat["task_idx"]][idx_dat["elem_idx"]]
|
1384
1427
|
iter_ = elem.iterations[idx_dat["iter_idx"]]
|
1385
1428
|
objs.append(iter_)
|
1386
1429
|
|
hpcflow/sdk/persistence/base.py
CHANGED
@@ -716,6 +716,11 @@ class PersistentStore(ABC):
|
|
716
716
|
"""Cache for number of persistent tasks."""
|
717
717
|
return self._cache["num_tasks"]
|
718
718
|
|
719
|
+
@property
|
720
|
+
def num_EARs_cache(self):
|
721
|
+
"""Cache for total number of persistent EARs."""
|
722
|
+
return self._cache["num_EARs"]
|
723
|
+
|
719
724
|
@property
|
720
725
|
def param_sources_cache(self):
|
721
726
|
"""Cache for persistent parameter sources."""
|
@@ -730,6 +735,10 @@ class PersistentStore(ABC):
|
|
730
735
|
def num_tasks_cache(self, value):
|
731
736
|
self._cache["num_tasks"] = value
|
732
737
|
|
738
|
+
@num_EARs_cache.setter
|
739
|
+
def num_EARs_cache(self, value):
|
740
|
+
self._cache["num_EARs"] = value
|
741
|
+
|
733
742
|
def _reset_cache(self):
|
734
743
|
self._cache = {
|
735
744
|
"tasks": {},
|
@@ -739,6 +748,7 @@ class PersistentStore(ABC):
|
|
739
748
|
"param_sources": {},
|
740
749
|
"num_tasks": None,
|
741
750
|
"parameters": {},
|
751
|
+
"num_EARs": None,
|
742
752
|
}
|
743
753
|
|
744
754
|
@contextlib.contextmanager
|
@@ -873,6 +883,7 @@ class PersistentStore(ABC):
|
|
873
883
|
"""Get the total number of persistent and pending element iterations."""
|
874
884
|
return self._get_num_persistent_elem_iters() + len(self._pending.add_elem_iters)
|
875
885
|
|
886
|
+
@TimeIt.decorator
|
876
887
|
def _get_num_total_EARs(self):
|
877
888
|
"""Get the total number of persistent and pending EARs."""
|
878
889
|
return self._get_num_persistent_EARs() + len(self._pending.add_EARs)
|
@@ -1296,9 +1307,11 @@ class PersistentStore(ABC):
|
|
1296
1307
|
self.save()
|
1297
1308
|
|
1298
1309
|
@TimeIt.decorator
|
1299
|
-
def update_param_source(
|
1300
|
-
self
|
1301
|
-
|
1310
|
+
def update_param_source(
|
1311
|
+
self, param_sources: Dict[int, Dict], save: bool = True
|
1312
|
+
) -> None:
|
1313
|
+
self.logger.debug(f"Updating parameter sources with {param_sources!r}.")
|
1314
|
+
self._pending.update_param_sources.update(param_sources)
|
1302
1315
|
if save:
|
1303
1316
|
self.save()
|
1304
1317
|
|
hpcflow/sdk/persistence/json.py
CHANGED
@@ -303,12 +303,13 @@ class JSONPersistentStore(PersistentStore):
|
|
303
303
|
|
304
304
|
def _get_num_persistent_tasks(self) -> int:
|
305
305
|
"""Get the number of persistent tasks."""
|
306
|
-
if self.num_tasks_cache is not None:
|
306
|
+
if self.use_cache and self.num_tasks_cache is not None:
|
307
307
|
num = self.num_tasks_cache
|
308
308
|
else:
|
309
309
|
with self.using_resource("metadata", action="read") as md:
|
310
310
|
num = len(md["tasks"])
|
311
|
-
|
311
|
+
if self.use_cache and self.num_tasks_cache is None:
|
312
|
+
self.num_tasks_cache = num
|
312
313
|
return num
|
313
314
|
|
314
315
|
def _get_num_persistent_loops(self) -> int:
|
@@ -333,8 +334,14 @@ class JSONPersistentStore(PersistentStore):
|
|
333
334
|
|
334
335
|
def _get_num_persistent_EARs(self) -> int:
|
335
336
|
"""Get the number of persistent EARs."""
|
336
|
-
|
337
|
-
|
337
|
+
if self.use_cache and self.num_EARs_cache is not None:
|
338
|
+
num = self.num_EARs_cache
|
339
|
+
else:
|
340
|
+
with self.using_resource("metadata", action="read") as md:
|
341
|
+
num = len(md["runs"])
|
342
|
+
if self.use_cache and self.num_EARs_cache is None:
|
343
|
+
self.num_EARs_cache = num
|
344
|
+
return num
|
338
345
|
|
339
346
|
def _get_num_persistent_parameters(self):
|
340
347
|
with self.using_resource("parameters", "read") as params:
|
@@ -275,6 +275,7 @@ class PendingChanges:
|
|
275
275
|
EAR_ids = list(self.add_EARs.keys())
|
276
276
|
self.logger.debug(f"commit: adding pending EARs with IDs: {EAR_ids!r}")
|
277
277
|
self.store._append_EARs(EARs)
|
278
|
+
self.store.num_EARs_cache = None # invalidate cache
|
278
279
|
# pending start/end times/snapshots, submission indices, and skips that belong
|
279
280
|
# to pending EARs are now committed (accounted for in `get_EARs` above):
|
280
281
|
self.set_EAR_submission_indices = {
|
@@ -408,6 +409,7 @@ class PendingChanges:
|
|
408
409
|
@TimeIt.decorator
|
409
410
|
def commit_loop_indices(self) -> None:
|
410
411
|
"""Make pending update to element iteration loop indices persistent."""
|
412
|
+
# TODO: batch up
|
411
413
|
for iter_ID, loop_idx in self.update_loop_indices.items():
|
412
414
|
self.logger.debug(
|
413
415
|
f"commit: updating loop indices of iteration ID {iter_ID!r} with "
|
hpcflow/sdk/persistence/zarr.py
CHANGED
@@ -774,9 +774,16 @@ class ZarrPersistentStore(PersistentStore):
|
|
774
774
|
"""Get the number of persistent element iterations."""
|
775
775
|
return len(self._get_iters_arr())
|
776
776
|
|
777
|
+
@TimeIt.decorator
|
777
778
|
def _get_num_persistent_EARs(self) -> int:
|
778
779
|
"""Get the number of persistent EARs."""
|
779
|
-
|
780
|
+
if self.use_cache and self.num_EARs_cache is not None:
|
781
|
+
num = self.num_EARs_cache
|
782
|
+
else:
|
783
|
+
num = len(self._get_EARs_arr())
|
784
|
+
if self.use_cache and self.num_EARs_cache is None:
|
785
|
+
self.num_EARs_cache = num
|
786
|
+
return num
|
780
787
|
|
781
788
|
def _get_num_persistent_parameters(self):
|
782
789
|
return len(self._get_parameter_base_array())
|
hpcflow/tests/unit/test_utils.py
CHANGED
@@ -13,6 +13,8 @@ from hpcflow.sdk.core.utils import (
|
|
13
13
|
get_nested_indices,
|
14
14
|
is_fsspec_url,
|
15
15
|
linspace_rect,
|
16
|
+
nth_key,
|
17
|
+
nth_value,
|
16
18
|
process_string_nodes,
|
17
19
|
replace_items,
|
18
20
|
check_valid_py_identifier,
|
@@ -556,3 +558,22 @@ def test_dict_values_process_flat_single_item_lists():
|
|
556
558
|
"b": [4],
|
557
559
|
"c": [5],
|
558
560
|
}
|
561
|
+
|
562
|
+
|
563
|
+
def test_nth_key():
|
564
|
+
dct = {"a": 1, "b": 2}
|
565
|
+
assert [nth_key(dct, i) for i in range(len(dct))] == ["a", "b"]
|
566
|
+
|
567
|
+
|
568
|
+
def test_nth_value():
|
569
|
+
dct = {"a": 1, "b": 2}
|
570
|
+
assert [nth_value(dct, i) for i in range(len(dct))] == [1, 2]
|
571
|
+
|
572
|
+
|
573
|
+
def test_nth_key_raises():
|
574
|
+
dct = {"a": 1, "b": 2}
|
575
|
+
with pytest.raises(Exception):
|
576
|
+
nth_key(dct, 2)
|
577
|
+
|
578
|
+
with pytest.raises(Exception):
|
579
|
+
nth_key(dct, -1)
|
@@ -1,7 +1,7 @@
|
|
1
1
|
hpcflow/__init__.py,sha256=WIETuRHeOp2SqUqHUzpjQ-lk9acbYv-6aWOhZPRdlhs,64
|
2
2
|
hpcflow/__pyinstaller/__init__.py,sha256=YOzBlPSck6slucv6lJM9K80JtsJWxXRL00cv6tRj3oc,98
|
3
3
|
hpcflow/__pyinstaller/hook-hpcflow.py,sha256=SeMopsPkhCyd9gqIrzwFNRj3ZlkUlUYl-74QYz61mo4,1089
|
4
|
-
hpcflow/_version.py,sha256=
|
4
|
+
hpcflow/_version.py,sha256=c6sVLAvL-ZtlEF_CBbI6lCJoA-qh3KPMN5Smq6FFaPk,26
|
5
5
|
hpcflow/app.py,sha256=d-kgfnZNlqlCi2H8bK26714brD_u3ibN3FaEZgjF9aA,1332
|
6
6
|
hpcflow/cli.py,sha256=G2J3D9v6MnMWOWMMWK6UEKLn_6wnV9lT_qygEBBxg-I,66
|
7
7
|
hpcflow/data/demo_data_manifest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -46,24 +46,26 @@ hpcflow/sdk/config/config_file.py,sha256=JlMcprj0aujFVk8552ahP2f8EXB0tglMaHwzbcG
|
|
46
46
|
hpcflow/sdk/config/errors.py,sha256=2D7HJ1dbyeoD3xk4MuaGSsbJsUyQzyw8kaThEBZfP2I,6876
|
47
47
|
hpcflow/sdk/core/__init__.py,sha256=GcIklEsXy3M5PWpmxyhd2KoI0u6HjXRIjD_aR1bgRjo,215
|
48
48
|
hpcflow/sdk/core/actions.py,sha256=66CHgwYAB0oCR6oB5bNbBdUGRGTU3juS1XcMNjj3vP0,77068
|
49
|
+
hpcflow/sdk/core/cache.py,sha256=MDzqsCg8uMjxEdQ-8ta-uG042yiPrzQoVKMeE6jYW8k,5127
|
49
50
|
hpcflow/sdk/core/command_files.py,sha256=GEFlgZv7g9lkFoNgwyDtmlI_90e2TWliCJuJimnJZts,18685
|
50
51
|
hpcflow/sdk/core/commands.py,sha256=5SKxSBuYz8sSvfpp9p5utBwMoQV6Pd2KlGBCpXAHDxE,12741
|
51
|
-
hpcflow/sdk/core/element.py,sha256=
|
52
|
+
hpcflow/sdk/core/element.py,sha256=kWEbGWzrXCwhQ1Ie1RFm1v5_q3MQkCDPEIp01nHIf1Q,47202
|
52
53
|
hpcflow/sdk/core/environment.py,sha256=DGUz1NvliKh6opP0IueGHD69rn_8wFLhDsq6kAmEgM4,4849
|
53
54
|
hpcflow/sdk/core/errors.py,sha256=ku4wwsrmxBpJBFflUeZD6vrmAqgC7H02VdlRG4aAGqQ,9292
|
54
55
|
hpcflow/sdk/core/json_like.py,sha256=LRZsUd1tn8zXC8fESeiXs7Eko-VdnB8zcXiqixKVcZM,18874
|
55
|
-
hpcflow/sdk/core/loop.py,sha256=
|
56
|
+
hpcflow/sdk/core/loop.py,sha256=vj3b0jRCJxkKdhURYTgULoDJ6U3LzAYZMXBzqcCMHr8,31506
|
57
|
+
hpcflow/sdk/core/loop_cache.py,sha256=BBmJn_pS11gaiHS8qvujBpzWLzPsfs8N6iYIBkZtIwI,5881
|
56
58
|
hpcflow/sdk/core/object_list.py,sha256=HASx7AMniX82bTlROIgIvrjE_DupmwDgxfkfROmI3GA,20168
|
57
59
|
hpcflow/sdk/core/parallel.py,sha256=LI-g-qOuOR1oaEUWVT0qW0hmiP9hsJyUP8_IfSTKYYo,95
|
58
60
|
hpcflow/sdk/core/parameters.py,sha256=0h1M-fXqOVgruyM0Au7Fo38cUbHgDNEPd1Alb1FULxE,65588
|
59
61
|
hpcflow/sdk/core/rule.py,sha256=3jVsSZCBv4Odxy8QbSbKo9ZcRuU-5DRJoNK8adXCEpI,4567
|
60
62
|
hpcflow/sdk/core/run_dir_files.py,sha256=_k-hA7dlry9GZw5ZXcntFcPGxg07p03hnHSM5S-2G2Y,2197
|
61
|
-
hpcflow/sdk/core/task.py,sha256
|
63
|
+
hpcflow/sdk/core/task.py,sha256=TTAn9aeJOLyso7t11wt87wxPDVi037vwpFgF9rCfZwQ,122319
|
62
64
|
hpcflow/sdk/core/task_schema.py,sha256=TipXzC2guu9zilv0En-rHt6lUCTSIj5faI4lVWQdUbA,32346
|
63
65
|
hpcflow/sdk/core/test_utils.py,sha256=IhCLvRzDuG4hVNGeGulGKfZEgg7Ow-vgiEqewzMiaZ4,9762
|
64
|
-
hpcflow/sdk/core/utils.py,sha256=
|
66
|
+
hpcflow/sdk/core/utils.py,sha256=cpwfoHgbHanZQXmVZRN3VRW8X-zZxb1I6T0v2tWgBK0,25811
|
65
67
|
hpcflow/sdk/core/validation.py,sha256=KBKiy5DdfGiGmMaB0HdKTY0V972u5dJzvkYkX0_KtCo,518
|
66
|
-
hpcflow/sdk/core/workflow.py,sha256=
|
68
|
+
hpcflow/sdk/core/workflow.py,sha256=ziKn1cA4s_eHKPMzyKfHF4bVNF7bfho4dko5qtyZKjU,113111
|
67
69
|
hpcflow/sdk/core/zarr_io.py,sha256=V_Zm6uSiuaCbXyHFJUO74K1pAr4Zqrj3aLCBjohCwvs,5724
|
68
70
|
hpcflow/sdk/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
69
71
|
hpcflow/sdk/data/config_file_schema.yaml,sha256=7i3z_m3GBRtLyB4c7qPngnlQWqcIq1CyCcOysDyq4es,791
|
@@ -81,12 +83,12 @@ hpcflow/sdk/helper/helper.py,sha256=MkjYKHox1F4XOpy-20sCCDUTWUbQY84QpWZkcpSq9n8,
|
|
81
83
|
hpcflow/sdk/helper/watcher.py,sha256=hLqgwXtZw-6ihNUUcWYnZw8TCyD_AdhYE7abOrO2r_0,4003
|
82
84
|
hpcflow/sdk/log.py,sha256=_DA5nNS8BoSIFB3d9nrIjbxNDxFflEaL3Ubkq8UYQK8,5735
|
83
85
|
hpcflow/sdk/persistence/__init__.py,sha256=IzWycfiO6rDn_7Kocw4Df5ETe9BSoaqqxG7Yp4FW_ls,900
|
84
|
-
hpcflow/sdk/persistence/base.py,sha256=
|
85
|
-
hpcflow/sdk/persistence/json.py,sha256=
|
86
|
-
hpcflow/sdk/persistence/pending.py,sha256=
|
86
|
+
hpcflow/sdk/persistence/base.py,sha256=cvk2Uqd671ZFe6JEP_UrZ7W0q2mZjimyA8DFvss8hdo,62030
|
87
|
+
hpcflow/sdk/persistence/json.py,sha256=55F4Txa50I9HzfETPqwo6gAOBRaoewMHGR3V4-2Fifc,22013
|
88
|
+
hpcflow/sdk/persistence/pending.py,sha256=XktGkRpJmlyjceOiDY0GEL2xRl6k-gbjt057LmEj5oc,25656
|
87
89
|
hpcflow/sdk/persistence/store_resource.py,sha256=oEyocRqa8Uym-57UFosrwate-Xw9O7i2FM82TxHc4m0,4307
|
88
90
|
hpcflow/sdk/persistence/utils.py,sha256=yQT6gS-Ipj2N6grtlV5d0czxxKE0CaeqAkXA1247XGo,1522
|
89
|
-
hpcflow/sdk/persistence/zarr.py,sha256=
|
91
|
+
hpcflow/sdk/persistence/zarr.py,sha256=otZbR17O3ZNIot7uVAP1UdWs_L0PHS682LuWk3z1_1g,45708
|
90
92
|
hpcflow/sdk/runtime.py,sha256=_in5ojiy9R8fD1ZNbdE6PDmZx6kSaiG9WPB6kVBFE7k,9217
|
91
93
|
hpcflow/sdk/submission/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
92
94
|
hpcflow/sdk/submission/jobscript.py,sha256=Z9NUzkIcmoFw-XAtG8FdLpO2LtMt3czk1v1BnbM1eZw,44678
|
@@ -142,14 +144,14 @@ hpcflow/tests/unit/test_slurm.py,sha256=ewfNuXXUEEelAxcd7MBbAQ-RCvU8xBenHTAyfXYF
|
|
142
144
|
hpcflow/tests/unit/test_submission.py,sha256=kQ3ksjGlfp47AYuwTA27RDX2XxRU3YxKlKC1ACTbXw8,16682
|
143
145
|
hpcflow/tests/unit/test_task.py,sha256=QJuEpJ0y0nBesprgoau5R2kFZBCW-ygNmYatLT_M5-o,80227
|
144
146
|
hpcflow/tests/unit/test_task_schema.py,sha256=j5HHxoqq4Mb223jKcusgX-C6-TsmKG0PLjYQ4M01ZHo,4531
|
145
|
-
hpcflow/tests/unit/test_utils.py,sha256=
|
147
|
+
hpcflow/tests/unit/test_utils.py,sha256=RH3UZ99g1pKKJme1rNgzT3j_txWLT9_OWE1lWx67W5M,14610
|
146
148
|
hpcflow/tests/unit/test_value_sequence.py,sha256=yJh5YRxN-VYMbCWiUaLH4T_Ue5F2IfVS3e11zx6HlS0,15740
|
147
149
|
hpcflow/tests/unit/test_workflow.py,sha256=Eyr9BhnsFisAPotEAeYrAvxXT1d2i6oshEh1_OxgnSc,22732
|
148
150
|
hpcflow/tests/unit/test_workflow_template.py,sha256=fF7LNveMwCledgncNCRfD9Nd9dL9tSPtlAAOKV3ovAU,5396
|
149
151
|
hpcflow/tests/workflows/test_jobscript.py,sha256=9sp1o0g72JZbv2QlOl5v7wCZEFjotxiIKGNUxVaFgaA,724
|
150
152
|
hpcflow/tests/workflows/test_workflows.py,sha256=xai6FRtGqG4lStJk6KmsqPUSuvqs9FrsBOxMVALshIs,13400
|
151
153
|
hpcflow/viz_demo.ipynb,sha256=1QdnVsk72vihv2L6hOGyk318uEa22ZSgGxQCa7hW2oo,6238
|
152
|
-
hpcflow_new2-0.2.
|
153
|
-
hpcflow_new2-0.2.
|
154
|
-
hpcflow_new2-0.2.
|
155
|
-
hpcflow_new2-0.2.
|
154
|
+
hpcflow_new2-0.2.0a177.dist-info/METADATA,sha256=I1tkrIUSwRwNsKxjcTy7lrpcOhZp9Z3HUGV_Wdoo9qw,2466
|
155
|
+
hpcflow_new2-0.2.0a177.dist-info/WHEEL,sha256=kLuE8m1WYU0Ig0_YEGrXyTtiJvKPpLpDEiChiNyei5Y,88
|
156
|
+
hpcflow_new2-0.2.0a177.dist-info/entry_points.txt,sha256=aoGtCnFdfPcXfBdu2zZyMOJoz6fPgdR0elqsgrE-USU,106
|
157
|
+
hpcflow_new2-0.2.0a177.dist-info/RECORD,,
|
File without changes
|
File without changes
|