hpcflow-new2 0.2.0a176__py3-none-any.whl → 0.2.0a177__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hpcflow/_version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.2.0a176"
1
+ __version__ = "0.2.0a177"
@@ -0,0 +1,142 @@
1
+ from collections import defaultdict
2
+ from dataclasses import dataclass
3
+ from typing import Set, Dict
4
+
5
+ from hpcflow.sdk.log import TimeIt
6
+
7
+
8
+ @dataclass
9
+ class DependencyCache:
10
+ """Class to bulk-retrieve dependencies between elements, iterations, and runs."""
11
+
12
+ run_dependencies: Dict[int, Set]
13
+ run_dependents: Dict[int, Set]
14
+ iter_run_dependencies: Dict[int, Set]
15
+ iter_iter_dependencies: Dict[int, Set]
16
+ elem_iter_dependencies: Dict[int, Set]
17
+ elem_elem_dependencies: Dict[int, Set]
18
+ elem_elem_dependents: Dict[int, Set]
19
+ elem_elem_dependents_rec: Dict[int, Set]
20
+
21
+ elements: Dict
22
+ iterations: Dict
23
+
24
+ @classmethod
25
+ @TimeIt.decorator
26
+ def build(cls, workflow):
27
+ num_iters = workflow.num_element_iterations
28
+ num_elems = workflow.num_elements
29
+ num_runs = workflow.num_EARs
30
+
31
+ all_store_runs = workflow._store.get_EARs(list(range(num_runs)))
32
+ all_store_iters = workflow._store.get_element_iterations(list(range(num_iters)))
33
+ all_store_elements = workflow._store.get_elements(list(range(num_elems)))
34
+ all_param_sources = workflow.get_all_parameter_sources()
35
+ all_data_idx = [
36
+ {
37
+ k: v if isinstance(v, list) else [v]
38
+ for k, v in i.data_idx.items()
39
+ if k not in ("repeats.",)
40
+ }
41
+ for i in all_store_runs
42
+ ]
43
+
44
+ # run dependencies and dependents
45
+ run_dependencies = {}
46
+ run_dependents = defaultdict(set)
47
+ for idx, i in enumerate(all_data_idx):
48
+ run_i_sources = set()
49
+ for j in i.values():
50
+ for k in j:
51
+ run_k = all_param_sources[k].get("EAR_ID")
52
+ if run_k is not None and run_k != idx:
53
+ run_i_sources.add(run_k)
54
+ run_dependencies[idx] = run_i_sources
55
+ for m in run_i_sources:
56
+ run_dependents[m].add(idx)
57
+
58
+ # add missing:
59
+ for k in range(num_runs):
60
+ run_dependents[k]
61
+
62
+ run_dependents = dict(run_dependents)
63
+
64
+ # iteration dependencies
65
+ all_iter_run_IDs = {
66
+ i.id_: [k for j in i.EAR_IDs.values() for k in j] for i in all_store_iters
67
+ }
68
+ # for each iteration, which runs does it depend on?
69
+ iter_run_dependencies = {
70
+ k: set(j for i in v for j in run_dependencies[i])
71
+ for k, v in all_iter_run_IDs.items()
72
+ }
73
+
74
+ # for each run, which iteration does it belong to?
75
+ all_run_iter_IDs = {}
76
+ for iter_ID, run_IDs in all_iter_run_IDs.items():
77
+ for run_ID in run_IDs:
78
+ all_run_iter_IDs[run_ID] = iter_ID
79
+
80
+ # for each iteration, which iterations does it depend on?
81
+ iter_iter_dependencies = {
82
+ k: set(all_run_iter_IDs[i] for i in v)
83
+ for k, v in iter_run_dependencies.items()
84
+ }
85
+
86
+ all_elem_iter_IDs = {i.id_: i.iteration_IDs for i in all_store_elements}
87
+
88
+ elem_iter_dependencies = {
89
+ k: set(j for i in v for j in iter_iter_dependencies[i])
90
+ for k, v in all_elem_iter_IDs.items()
91
+ }
92
+
93
+ # for each iteration, which element does it belong to?
94
+ all_iter_elem_IDs = {}
95
+ for elem_ID, iter_IDs in all_elem_iter_IDs.items():
96
+ for iter_ID in iter_IDs:
97
+ all_iter_elem_IDs[iter_ID] = elem_ID
98
+
99
+ # element dependencies
100
+ elem_elem_dependencies = {
101
+ k: set(all_iter_elem_IDs[i] for i in v)
102
+ for k, v in elem_iter_dependencies.items()
103
+ }
104
+
105
+ # for each element, which elements depend on it (directly)?
106
+ elem_elem_dependents = defaultdict(set)
107
+ for k, v in elem_elem_dependencies.items():
108
+ for i in v:
109
+ elem_elem_dependents[i].add(k)
110
+
111
+ # for each element, which elements depend on it (recursively)?
112
+ elem_elem_dependents_rec = defaultdict(set)
113
+ for k in list(elem_elem_dependents):
114
+ for i in elem_elem_dependents[k]:
115
+ elem_elem_dependents_rec[k].add(i)
116
+ elem_elem_dependents_rec[k].update(
117
+ {m for m in elem_elem_dependents[i] if m != k}
118
+ )
119
+
120
+ # add missing keys:
121
+ for k in range(num_elems):
122
+ elem_elem_dependents[k]
123
+ elem_elem_dependents_rec[k]
124
+
125
+ elem_elem_dependents = dict(elem_elem_dependents)
126
+ elem_elem_dependents_rec = dict(elem_elem_dependents_rec)
127
+
128
+ elements = workflow.get_all_elements()
129
+ iterations = workflow.get_all_element_iterations()
130
+
131
+ return cls(
132
+ run_dependencies=run_dependencies,
133
+ run_dependents=run_dependents,
134
+ iter_run_dependencies=iter_run_dependencies,
135
+ iter_iter_dependencies=iter_iter_dependencies,
136
+ elem_iter_dependencies=elem_iter_dependencies,
137
+ elem_elem_dependencies=elem_elem_dependencies,
138
+ elem_elem_dependents=elem_elem_dependents,
139
+ elem_elem_dependents_rec=elem_elem_dependents_rec,
140
+ elements=elements,
141
+ iterations=iterations,
142
+ )
@@ -675,6 +675,7 @@ class ElementIteration:
675
675
  default=default,
676
676
  )
677
677
 
678
+ @TimeIt.decorator
678
679
  def get_EAR_dependencies(
679
680
  self,
680
681
  as_objects: Optional[bool] = False,
@@ -708,6 +709,7 @@ class ElementIteration:
708
709
  out = self.workflow.get_EARs_from_IDs(out)
709
710
  return out
710
711
 
712
+ @TimeIt.decorator
711
713
  def get_element_iteration_dependencies(
712
714
  self, as_objects: bool = False
713
715
  ) -> List[Union[int, app.ElementIteration]]:
@@ -719,6 +721,7 @@ class ElementIteration:
719
721
  out = self.workflow.get_element_iterations_from_IDs(out)
720
722
  return out
721
723
 
724
+ @TimeIt.decorator
722
725
  def get_element_dependencies(
723
726
  self,
724
727
  as_objects: Optional[bool] = False,
@@ -769,6 +772,7 @@ class ElementIteration:
769
772
 
770
773
  return out
771
774
 
775
+ @TimeIt.decorator
772
776
  def get_dependent_EARs(
773
777
  self, as_objects: bool = False
774
778
  ) -> List[Union[int, app.ElementActionRun]]:
@@ -793,6 +797,7 @@ class ElementIteration:
793
797
 
794
798
  return deps
795
799
 
800
+ @TimeIt.decorator
796
801
  def get_dependent_element_iterations(
797
802
  self, as_objects: bool = False
798
803
  ) -> List[Union[int, app.ElementIteration]]:
@@ -816,6 +821,7 @@ class ElementIteration:
816
821
 
817
822
  return deps
818
823
 
824
+ @TimeIt.decorator
819
825
  def get_dependent_elements(
820
826
  self,
821
827
  as_objects: bool = False,
@@ -1246,6 +1252,7 @@ class Element:
1246
1252
  """Get tasks that depend on the most recent iteration of this element."""
1247
1253
  return self.latest_iteration.get_dependent_tasks(as_objects=as_objects)
1248
1254
 
1255
+ @TimeIt.decorator
1249
1256
  def get_dependent_elements_recursively(self, task_insert_ID=None):
1250
1257
  """Get downstream elements that depend on this element, including recursive
1251
1258
  dependencies.
hpcflow/sdk/core/loop.py CHANGED
@@ -6,9 +6,11 @@ from typing import Dict, List, Optional, Tuple, Union
6
6
  from hpcflow.sdk import app
7
7
  from hpcflow.sdk.core.errors import LoopTaskSubsetError
8
8
  from hpcflow.sdk.core.json_like import ChildObjectSpec, JSONLike
9
+ from hpcflow.sdk.core.loop_cache import LoopCache
9
10
  from hpcflow.sdk.core.parameters import InputSourceType
10
11
  from hpcflow.sdk.core.task import WorkflowTask
11
- from hpcflow.sdk.core.utils import check_valid_py_identifier
12
+ from hpcflow.sdk.core.utils import check_valid_py_identifier, nth_key, nth_value
13
+ from hpcflow.sdk.log import TimeIt
12
14
 
13
15
  # from .parameters import Parameter
14
16
 
@@ -198,6 +200,7 @@ class WorkflowLoop:
198
200
 
199
201
  self._validate()
200
202
 
203
+ @TimeIt.decorator
201
204
  def _validate(self):
202
205
  # task subset must be a contiguous range of task indices:
203
206
  task_indices = self.task_indices
@@ -328,6 +331,7 @@ class WorkflowLoop:
328
331
  return self.workflow.tasks[: self.task_objects[0].index]
329
332
 
330
333
  @staticmethod
334
+ @TimeIt.decorator
331
335
  def _find_iterable_parameters(loop_template: app.Loop):
332
336
  all_inputs_first_idx = {}
333
337
  all_outputs_idx = {}
@@ -355,18 +359,19 @@ class WorkflowLoop:
355
359
  return iterable_params
356
360
 
357
361
  @classmethod
362
+ @TimeIt.decorator
358
363
  def new_empty_loop(
359
364
  cls,
360
365
  index: int,
361
366
  workflow: app.Workflow,
362
367
  template: app.Loop,
363
- iterations: List[app.ElementIteration],
368
+ iter_loop_idx: List[Dict],
364
369
  ) -> Tuple[app.WorkflowLoop, List[Dict[str, int]]]:
365
370
  parent_loops = cls._get_parent_loops(index, workflow, template)
366
371
  parent_names = [i.name for i in parent_loops]
367
372
  num_added_iters = {}
368
- for iter_i in iterations:
369
- num_added_iters[tuple([iter_i.loop_idx[j] for j in parent_names])] = 1
373
+ for i in iter_loop_idx:
374
+ num_added_iters[tuple([i[j] for j in parent_names])] = 1
370
375
 
371
376
  obj = cls(
372
377
  index=index,
@@ -379,6 +384,7 @@ class WorkflowLoop:
379
384
  return obj
380
385
 
381
386
  @classmethod
387
+ @TimeIt.decorator
382
388
  def _get_parent_loops(
383
389
  cls,
384
390
  index: int,
@@ -399,12 +405,14 @@ class WorkflowLoop:
399
405
  parents.append(loop_i)
400
406
  return parents
401
407
 
408
+ @TimeIt.decorator
402
409
  def get_parent_loops(self) -> List[app.WorkflowLoop]:
403
410
  """Get loops whose task subset is a superset of this loop's task subset. If two
404
411
  loops have identical task subsets, the first loop in the workflow loop list is
405
412
  considered the child."""
406
413
  return self._get_parent_loops(self.index, self.workflow, self.template)
407
414
 
415
+ @TimeIt.decorator
408
416
  def get_child_loops(self) -> List[app.WorkflowLoop]:
409
417
  """Get loops whose task subset is a subset of this loop's task subset. If two
410
418
  loops have identical task subsets, the first loop in the workflow loop list is
@@ -426,10 +434,12 @@ class WorkflowLoop:
426
434
  children = sorted(children, key=lambda x: len(next(iter(x.num_added_iterations))))
427
435
  return children
428
436
 
429
- def add_iteration(self, parent_loop_indices=None):
437
+ @TimeIt.decorator
438
+ def add_iteration(self, parent_loop_indices=None, cache: Optional[LoopCache] = None):
439
+ if not cache:
440
+ cache = LoopCache.build(self.workflow)
430
441
  parent_loops = self.get_parent_loops()
431
442
  child_loops = self.get_child_loops()
432
- child_loop_names = [i.name for i in child_loops]
433
443
  parent_loop_indices = parent_loop_indices or {}
434
444
  if parent_loops and not parent_loop_indices:
435
445
  parent_loop_indices = {i.name: 0 for i in parent_loops}
@@ -458,24 +468,19 @@ class WorkflowLoop:
458
468
  if task.insert_ID in child.task_insert_IDs
459
469
  },
460
470
  }
471
+ added_iter_IDs = []
461
472
  for elem_idx in range(task.num_elements):
462
- # element needs to take into account changes made in this code
463
- element = task.elements[elem_idx]
464
- inp_statuses = task.template.get_input_statuses(element.element_set)
473
+
474
+ elem_ID = task.element_IDs[elem_idx]
475
+
465
476
  new_data_idx = {}
466
- existing_inners = []
467
- for iter_i in element.iterations:
468
- if iter_i.loop_idx[self.name] == cur_loop_idx:
469
- existing_inner_i = {
470
- k: v
471
- for k, v in iter_i.loop_idx.items()
472
- if k in child_loop_names
473
- }
474
- if existing_inner_i:
475
- existing_inners.append(existing_inner_i)
476
477
 
477
478
  # copy resources from zeroth iteration:
478
- for key, val in element.iterations[0].get_data_idx().items():
479
+ zeroth_iter_ID, zi_iter_data_idx = cache.zeroth_iters[elem_ID]
480
+ zi_elem_ID, zi_idx = cache.iterations[zeroth_iter_ID]
481
+ zi_data_idx = nth_value(cache.data_idx[zi_elem_ID], zi_idx)
482
+
483
+ for key, val in zi_data_idx.items():
479
484
  if key.startswith("resources."):
480
485
  new_data_idx[key] = val
481
486
 
@@ -493,41 +498,47 @@ class WorkflowLoop:
493
498
  # identify element(s) from which this iterable input should be
494
499
  # parametrised:
495
500
  if task.insert_ID == iter_dat["output_tasks"][-1]:
496
- src_elem = element
501
+ src_elem_ID = elem_ID
497
502
  grouped_elems = None
498
503
  else:
499
- src_elems = element.get_dependent_elements_recursively(
500
- task_insert_ID=iter_dat["output_tasks"][-1]
501
- )
504
+ src_elem_IDs_all = cache.element_dependents[elem_ID]
505
+ src_elem_IDs = {
506
+ k: v
507
+ for k, v in src_elem_IDs_all.items()
508
+ if cache.elements[k]["task_insert_ID"]
509
+ == iter_dat["output_tasks"][-1]
510
+ }
502
511
  # consider groups
503
512
  inp_group_name = inp.single_labelled_data.get("group")
504
513
  grouped_elems = []
505
- for i in src_elems:
514
+ for src_elem_j_ID, src_elem_j_dat in src_elem_IDs.items():
506
515
  i_in_group = any(
507
- j.name == inp_group_name for j in i.element_set.groups
516
+ k == inp_group_name
517
+ for k in src_elem_j_dat["group_names"]
508
518
  )
509
519
  if i_in_group:
510
- grouped_elems.append(i)
520
+ grouped_elems.append(src_elem_j_ID)
511
521
 
512
- if not grouped_elems and len(src_elems) > 1:
522
+ if not grouped_elems and len(src_elem_IDs) > 1:
513
523
  raise NotImplementedError(
514
- f"Multiple elements found in the iterable parameter {inp!r}'s"
515
- f" latest output task (insert ID: "
516
- f"{iter_dat['output_tasks'][-1]}) that can be used to "
517
- f"parametrise the next iteration: {src_elems!r}."
524
+ f"Multiple elements found in the iterable parameter "
525
+ f"{inp!r}'s latest output task (insert ID: "
526
+ f"{iter_dat['output_tasks'][-1]}) that can be used "
527
+ f"to parametrise the next iteration: "
528
+ f"{list(src_elem_IDs.keys())!r}."
518
529
  )
519
530
 
520
- elif not src_elems:
531
+ elif not src_elem_IDs:
521
532
  # TODO: maybe OK?
522
533
  raise NotImplementedError(
523
- f"No elements found in the iterable parameter {inp!r}'s"
524
- f" latest output task (insert ID: "
525
- f"{iter_dat['output_tasks'][-1]}) that can be used to "
526
- f"parametrise the next iteration."
534
+ f"No elements found in the iterable parameter "
535
+ f"{inp!r}'s latest output task (insert ID: "
536
+ f"{iter_dat['output_tasks'][-1]}) that can be used "
537
+ f"to parametrise the next iteration."
527
538
  )
528
539
 
529
540
  else:
530
- src_elem = src_elems[0]
541
+ src_elem_ID = nth_key(src_elem_IDs, 0)
531
542
 
532
543
  child_loop_max_iters = {}
533
544
  parent_loop_same_iters = {
@@ -553,76 +564,69 @@ class WorkflowLoop:
553
564
 
554
565
  # identify the ElementIteration from which this input should be
555
566
  # parametrised:
556
- source_iter = None
567
+ loop_idx_key = tuple(sorted(source_iter_loop_idx.items()))
557
568
  if grouped_elems:
558
- source_iter = []
559
- for src_elem in grouped_elems:
560
- for iter_i in src_elem.iterations:
561
- if iter_i.loop_idx == source_iter_loop_idx:
562
- source_iter.append(iter_i)
563
- break
569
+ src_data_idx = []
570
+ for src_elem_ID in grouped_elems:
571
+ src_data_idx.append(
572
+ cache.data_idx[src_elem_ID][loop_idx_key]
573
+ )
564
574
  else:
565
- for iter_i in src_elem.iterations:
566
- if iter_i.loop_idx == source_iter_loop_idx:
567
- source_iter = iter_i
568
- break
575
+ src_data_idx = cache.data_idx[src_elem_ID][loop_idx_key]
569
576
 
570
- if not source_iter:
577
+ if not src_data_idx:
571
578
  raise RuntimeError(
572
579
  f"Could not find a source iteration with loop_idx: "
573
580
  f"{source_iter_loop_idx!r}."
574
581
  )
575
582
 
576
583
  if grouped_elems:
577
- inp_dat_idx = [
578
- i.get_data_idx()[f"outputs.{inp.typ}"]
579
- for i in source_iter
580
- ]
584
+ inp_dat_idx = [i[f"outputs.{inp.typ}"] for i in src_data_idx]
581
585
  else:
582
- inp_dat_idx = source_iter.get_data_idx()[f"outputs.{inp.typ}"]
586
+ inp_dat_idx = src_data_idx[f"outputs.{inp.typ}"]
583
587
  new_data_idx[f"inputs.{inp.typ}"] = inp_dat_idx
584
588
 
585
589
  else:
586
590
  inp_key = f"inputs.{inp.typ}"
587
591
 
588
- orig_inp_src = element.input_sources[inp_key]
592
+ orig_inp_src = cache.elements[elem_ID]["input_sources"][inp_key]
589
593
  inp_dat_idx = None
590
594
 
591
595
  if orig_inp_src.source_type is InputSourceType.LOCAL:
592
596
  # keep locally defined inputs from original element
593
- inp_dat_idx = element.iterations[0].get_data_idx()[inp_key]
597
+ inp_dat_idx = zi_data_idx[inp_key]
594
598
 
595
599
  elif orig_inp_src.source_type is InputSourceType.DEFAULT:
596
600
  # keep default value from original element
597
- inp_dat_idx_iter_0 = element.iterations[0].get_data_idx()
598
601
  try:
599
- inp_dat_idx = inp_dat_idx_iter_0[inp_key]
602
+ inp_dat_idx = zi_data_idx[inp_key]
600
603
  except KeyError:
601
604
  # if this input is required by a conditional action, and
602
605
  # that condition is not met, then this input will not
603
606
  # exist in the action-run data index, so use the initial
604
607
  # iteration data index:
605
- inp_dat_idx = element.iterations[0].data_idx[inp_key]
608
+ inp_dat_idx = zi_iter_data_idx[inp_key]
606
609
 
607
610
  elif orig_inp_src.source_type is InputSourceType.TASK:
608
611
  if orig_inp_src.task_ref not in self.task_insert_IDs:
609
- # TODO: what about groups?
610
612
  # source the data_idx from the iteration with same parent
611
613
  # loop indices as the new iteration to add:
612
- src_iters = []
613
- for iter_i in element.iterations:
614
+ # src_iters = []
615
+ src_data_idx = []
616
+ for li_k, di_k in cache.data_idx[elem_ID].items():
614
617
  skip_iter = False
618
+ li_k_dct = dict(li_k)
615
619
  for p_k, p_v in parent_loop_indices.items():
616
- if iter_i.loop_idx.get(p_k) != p_v:
620
+ if li_k_dct.get(p_k) != p_v:
617
621
  skip_iter = True
618
622
  break
619
623
  if not skip_iter:
620
- src_iters.append(iter_i)
624
+ src_data_idx.append(di_k)
621
625
 
622
626
  # could be multiple, but they should all have the same
623
627
  # data index for this parameter:
624
- src_iter = src_iters[0]
625
- inp_dat_idx = src_iter.get_data_idx()[inp_key]
628
+ src_data_idx = src_data_idx[0]
629
+ inp_dat_idx = src_data_idx[inp_key]
626
630
  else:
627
631
  is_group = False
628
632
  if (
@@ -645,19 +649,24 @@ class WorkflowLoop:
645
649
  # find which element in that task `element`
646
650
  # depends on:
647
651
  task_i = self.workflow.tasks.get(insert_ID=tiID)
648
- elem_i = task_i.elements[e_idx]
649
- src_elems_i = (
650
- elem_i.get_dependent_elements_recursively(
651
- task_insert_ID=task.insert_ID
652
- )
653
- )
654
- # filter src_elems_i for matching element IDs:
655
- src_elems_i = [
656
- i for i in src_elems_i if i.id_ == element.id_
652
+ elem_i_ID = task_i.element_IDs[e_idx]
653
+ src_elem_IDs_all = cache.element_dependents[
654
+ elem_i_ID
655
+ ]
656
+ src_elem_IDs_i = {
657
+ k: v
658
+ for k, v in src_elem_IDs_all.items()
659
+ if cache.elements[k]["task_insert_ID"]
660
+ == task.insert_ID
661
+ }
662
+
663
+ # filter src_elem_IDs_i for matching element IDs:
664
+ src_elem_IDs_i = [
665
+ i for i in src_elem_IDs_i if i == elem_ID
657
666
  ]
658
667
  if (
659
- len(src_elems_i) == 1
660
- and src_elems_i[0].id_ == element.id_
668
+ len(src_elem_IDs_i) == 1
669
+ and src_elem_IDs_i[0] == elem_ID
661
670
  ):
662
671
  new_sources.append((tiID, e_idx))
663
672
 
@@ -680,10 +689,11 @@ class WorkflowLoop:
680
689
  new_data_idx[inp_key] = inp_dat_idx
681
690
 
682
691
  # add any locally defined sub-parameters:
692
+ inp_statuses = cache.elements[elem_ID]["input_statuses"]
683
693
  inp_status_inps = set([f"inputs.{i}" for i in inp_statuses])
684
694
  sub_params = inp_status_inps - set(new_data_idx.keys())
685
695
  for sub_param_i in sub_params:
686
- sub_param_data_idx_iter_0 = element.iterations[0].get_data_idx()
696
+ sub_param_data_idx_iter_0 = zi_data_idx
687
697
  try:
688
698
  sub_param_data_idx = sub_param_data_idx_iter_0[sub_param_i]
689
699
  except KeyError:
@@ -691,7 +701,7 @@ class WorkflowLoop:
691
701
  # and that condition is not met, then this input will not exist in
692
702
  # the action-run data index, so use the initial iteration data
693
703
  # index:
694
- sub_param_data_idx = element.iterations[0].data_idx[sub_param_i]
704
+ sub_param_data_idx = zi_data_idx[sub_param_i]
695
705
 
696
706
  new_data_idx[sub_param_i] = sub_param_data_idx
697
707
 
@@ -703,16 +713,26 @@ class WorkflowLoop:
703
713
  schema_params = set(
704
714
  i for i in new_data_idx.keys() if len(i.split(".")) == 2
705
715
  )
706
- all_new_data_idx[(task.insert_ID, element.index)] = new_data_idx
716
+ all_new_data_idx[(task.insert_ID, elem_idx)] = new_data_idx
707
717
 
708
718
  iter_ID_i = self.workflow._store.add_element_iteration(
709
- element_ID=element.id_,
719
+ element_ID=elem_ID,
710
720
  data_idx=new_data_idx,
711
721
  schema_parameters=list(schema_params),
712
722
  loop_idx=new_loop_idx,
713
723
  )
724
+ if cache:
725
+ cache.add_iteration(
726
+ iter_ID=iter_ID_i,
727
+ task_insert_ID=task.insert_ID,
728
+ element_ID=elem_ID,
729
+ loop_idx=new_loop_idx,
730
+ data_idx=new_data_idx,
731
+ )
714
732
 
715
- task.initialise_EARs()
733
+ added_iter_IDs.append(iter_ID_i)
734
+
735
+ task.initialise_EARs(iter_IDs=added_iter_IDs)
716
736
 
717
737
  added_iters_key = tuple(parent_loop_indices[k] for k in self.parents)
718
738
  self._increment_pending_added_iters(added_iters_key)
@@ -731,7 +751,8 @@ class WorkflowLoop:
731
751
  **par_idx,
732
752
  **parent_loop_indices,
733
753
  self.name: cur_loop_idx + 1,
734
- }
754
+ },
755
+ cache=cache,
735
756
  )
736
757
 
737
758
  def test_termination(self, element_iter):
@@ -0,0 +1,140 @@
1
+ from dataclasses import dataclass
2
+ from collections import defaultdict
3
+ from typing import Dict, List, Optional, Tuple
4
+
5
+ from hpcflow.sdk import app
6
+ from hpcflow.sdk.core.utils import nth_key
7
+ from hpcflow.sdk.log import TimeIt
8
+ from hpcflow.sdk.core.cache import DependencyCache
9
+
10
+
11
+ @dataclass
12
+ class LoopCache:
13
+ """Class to store a cache for use in `Workflow.add_empty_loop` and
14
+ `WorkflowLoop.add_iterations`.
15
+
16
+ Attributes
17
+ ----------
18
+ element_dependents
19
+ Keys are element IDs, values are dicts whose keys are element IDs that depend on
20
+ the key element ID (via `Element.get_dependent_elements_recursively`), and whose
21
+ values are dicts with keys: `group_names`, which is a tuple of the string group
22
+ names associated with the dependent element's element set.
23
+ elements
24
+ Keys are element IDs, values are dicts with keys: `input_statuses`,
25
+ `input_sources`, and `task_insert_ID`.
26
+ zeroth_iters
27
+ Keys are element IDs, values are data associated with the zeroth iteration of that
28
+ element, namely a tuple of iteration ID and `ElementIteration.data_idx`.
29
+ data_idx
30
+ Keys are element IDs, values are data associated with all iterations of that
31
+ element, namely a dict whose keys are the iteration loop index as a tuple, and
32
+ whose values are data indices via `ElementIteration.get_data_idx()`.
33
+ iterations
34
+ Keys are iteration IDs, values are tuples of element ID and iteration index within
35
+ that element.
36
+ task_iterations
37
+ Keys are task insert IDs, values are list of all iteration IDs associated with
38
+ that task.
39
+
40
+ """
41
+
42
+ element_dependents: Dict[int, Dict]
43
+ elements: Dict[int, Dict]
44
+ zeroth_iters: Dict[int, Tuple]
45
+ data_idx: Dict[int, Dict]
46
+ iterations: Dict[int, Tuple]
47
+ task_iterations: Dict[int, List[int]]
48
+
49
+ @TimeIt.decorator
50
+ def get_iter_IDs(self, loop: "app.Loop") -> List[int]:
51
+ """Retrieve a list of iteration IDs belonging to a given loop."""
52
+ return [j for i in loop.task_insert_IDs for j in self.task_iterations[i]]
53
+
54
+ @TimeIt.decorator
55
+ def get_iter_loop_indices(self, iter_IDs: List[int]) -> List[Dict[str, int]]:
56
+ iter_loop_idx = []
57
+ for i in iter_IDs:
58
+ elem_id, idx = self.iterations[i]
59
+ iter_loop_idx.append(dict(nth_key(self.data_idx[elem_id], idx)))
60
+ return iter_loop_idx
61
+
62
+ @TimeIt.decorator
63
+ def update_loop_indices(self, new_loop_name: str, iter_IDs: List[int]):
64
+ elem_ids = {v[0] for k, v in self.iterations.items() if k in iter_IDs}
65
+ for i in elem_ids:
66
+ new_item = {}
67
+ for k, v in self.data_idx[i].items():
68
+ new_k = dict(k)
69
+ new_k.update({new_loop_name: 0})
70
+ new_item[tuple(sorted(new_k.items()))] = v
71
+ self.data_idx[i] = new_item
72
+
73
+ @TimeIt.decorator
74
+ def add_iteration(self, iter_ID, task_insert_ID, element_ID, loop_idx, data_idx):
75
+ """Update the cache to include a newly added iteration."""
76
+ self.task_iterations[task_insert_ID].append(iter_ID)
77
+ new_iter_idx = len(self.data_idx[element_ID])
78
+ self.data_idx[element_ID][tuple(sorted(loop_idx.items()))] = data_idx
79
+ self.iterations[iter_ID] = (element_ID, new_iter_idx)
80
+
81
+ @classmethod
82
+ @TimeIt.decorator
83
+ def build(cls, workflow: "app.Workflow", loops: Optional[List["app.Loop"]] = None):
84
+ """Build a cache of data for use in adding loops and iterations."""
85
+
86
+ deps_cache = DependencyCache.build(workflow)
87
+
88
+ loops = list(workflow.template.loops) + (loops or [])
89
+ task_iIDs = set(j for i in loops for j in i.task_insert_IDs)
90
+ tasks = [workflow.tasks.get(insert_ID=i) for i in sorted(task_iIDs)]
91
+ elem_deps = {}
92
+
93
+ # keys: element IDs, values: dict with keys: tuple(loop_idx), values: data index
94
+ data_idx_cache = {}
95
+
96
+ # keys: iteration IDs, values: tuple of (element ID, integer index into values
97
+ # dict in `data_idx_cache` [accessed via `.keys()[index]`])
98
+ iters = {}
99
+
100
+ # keys: element IDs, values: dict with keys: "input_statues", "input_sources",
101
+ # "task_insert_ID":
102
+ elements = {}
103
+
104
+ zeroth_iters = {}
105
+ task_iterations = defaultdict(list)
106
+ for task in tasks:
107
+ for elem_idx in task.element_IDs:
108
+ element = deps_cache.elements[elem_idx]
109
+ inp_statuses = task.template.get_input_statuses(element.element_set)
110
+ elements[element.id_] = {
111
+ "input_statuses": inp_statuses,
112
+ "input_sources": element.input_sources,
113
+ "task_insert_ID": task.insert_ID,
114
+ }
115
+ elem_deps[element.id_] = {
116
+ i: {
117
+ "group_names": tuple(
118
+ j.name for j in deps_cache.elements[i].element_set.groups
119
+ ),
120
+ }
121
+ for i in deps_cache.elem_elem_dependents_rec[element.id_]
122
+ }
123
+ elem_iters = {}
124
+ for idx, iter_i in enumerate(element.iterations):
125
+ if idx == 0:
126
+ zeroth_iters[element.id_] = (iter_i.id_, iter_i.data_idx)
127
+ loop_idx_key = tuple(sorted(iter_i.loop_idx.items()))
128
+ elem_iters[loop_idx_key] = iter_i.get_data_idx()
129
+ task_iterations[task.insert_ID].append(iter_i.id_)
130
+ iters[iter_i.id_] = (element.id_, idx)
131
+ data_idx_cache[element.id_] = elem_iters
132
+
133
+ return cls(
134
+ element_dependents=elem_deps,
135
+ elements=elements,
136
+ zeroth_iters=zeroth_iters,
137
+ data_idx=data_idx_cache,
138
+ iterations=iters,
139
+ task_iterations=dict(task_iterations),
140
+ )
hpcflow/sdk/core/task.py CHANGED
@@ -2062,29 +2062,36 @@ class WorkflowTask:
2062
2062
  return element_dat_idx
2063
2063
 
2064
2064
  @TimeIt.decorator
2065
- def initialise_EARs(self) -> List[int]:
2065
+ def initialise_EARs(self, iter_IDs: Optional[List[int]] = None) -> List[int]:
2066
2066
  """Try to initialise any uninitialised EARs of this task."""
2067
+ if iter_IDs:
2068
+ iters = self.workflow.get_element_iterations_from_IDs(iter_IDs)
2069
+ else:
2070
+ iters = []
2071
+ for element in self.elements:
2072
+ # We don't yet cache Element objects, so `element`, and also it's
2073
+ # `ElementIterations, are transient. So there is no reason to update these
2074
+ # objects in memory to account for the new EARs. Subsequent calls to
2075
+ # `WorkflowTask.elements` will retrieve correct element data from the
2076
+ # store. This might need changing once/if we start caching Element
2077
+ # objects.
2078
+ iters.extend(element.iterations)
2079
+
2067
2080
  initialised = []
2068
- for element in self.elements[:]:
2069
- # We don't yet cache Element objects, so `element`, and also it's
2070
- # `ElementIterations, are transient. So there is no reason to update these
2071
- # objects in memory to account for the new EARs. Subsequent calls to
2072
- # `WorkflowTask.elements` will retrieve correct element data from the store.
2073
- # This might need changing once/if we start caching Element objects.
2074
- for iter_i in element.iterations:
2075
- if not iter_i.EARs_initialised:
2076
- try:
2077
- self._initialise_element_iter_EARs(iter_i)
2078
- initialised.append(iter_i.id_)
2079
- except UnsetParameterDataError:
2080
- # raised by `Action.test_rules`; cannot yet initialise EARs
2081
- self.app.logger.debug(
2082
- f"UnsetParameterDataError raised: cannot yet initialise runs."
2083
- )
2084
- pass
2085
- else:
2086
- iter_i._EARs_initialised = True
2087
- self.workflow.set_EARs_initialised(iter_i.id_)
2081
+ for iter_i in iters:
2082
+ if not iter_i.EARs_initialised:
2083
+ try:
2084
+ self._initialise_element_iter_EARs(iter_i)
2085
+ initialised.append(iter_i.id_)
2086
+ except UnsetParameterDataError:
2087
+ # raised by `Action.test_rules`; cannot yet initialise EARs
2088
+ self.app.logger.debug(
2089
+ f"UnsetParameterDataError raised: cannot yet initialise runs."
2090
+ )
2091
+ pass
2092
+ else:
2093
+ iter_i._EARs_initialised = True
2094
+ self.workflow.set_EARs_initialised(iter_i.id_)
2088
2095
  return initialised
2089
2096
 
2090
2097
  @TimeIt.decorator
@@ -2097,7 +2104,6 @@ class WorkflowTask:
2097
2104
  param_src_updates = {}
2098
2105
 
2099
2106
  count = 0
2100
- # TODO: generator is an IO op here, can be pre-calculated/cached?
2101
2107
  for act_idx, action in self.template.all_schema_actions():
2102
2108
  log_common = (
2103
2109
  f"for action {act_idx} of element iteration {element_iter.index} of "
@@ -2151,8 +2157,7 @@ class WorkflowTask:
2151
2157
  metadata={},
2152
2158
  )
2153
2159
 
2154
- for pid, src in param_src_updates.items():
2155
- self.workflow._store.update_param_source(pid, src)
2160
+ self.workflow._store.update_param_source(param_src_updates)
2156
2161
 
2157
2162
  @TimeIt.decorator
2158
2163
  def _add_element_set(self, element_set):
hpcflow/sdk/core/utils.py CHANGED
@@ -3,7 +3,7 @@ import enum
3
3
  from functools import wraps
4
4
  import contextlib
5
5
  import hashlib
6
- from itertools import accumulate
6
+ from itertools import accumulate, islice
7
7
  import json
8
8
  import keyword
9
9
  import os
@@ -871,3 +871,13 @@ def dict_values_process_flat(d, callable):
871
871
  out[k] = proc_idx_k
872
872
 
873
873
  return out
874
+
875
+
876
+ def nth_key(dct, n):
877
+ it = iter(dct)
878
+ next(islice(it, n, n), None)
879
+ return next(it)
880
+
881
+
882
+ def nth_value(dct, n):
883
+ return dct[nth_key(dct, n)]
@@ -25,6 +25,7 @@ from hpcflow.sdk.core import (
25
25
  ABORT_EXIT_CODE,
26
26
  )
27
27
  from hpcflow.sdk.core.actions import EARStatus
28
+ from hpcflow.sdk.core.loop_cache import LoopCache
28
29
  from hpcflow.sdk.log import TimeIt
29
30
  from hpcflow.sdk.persistence import store_cls_from_str, DEFAULT_STORE_FORMAT
30
31
  from hpcflow.sdk.persistence.base import TEMPLATE_COMP_TYPES, AnySEAR
@@ -41,6 +42,7 @@ from hpcflow.sdk.submission.schedulers.direct import DirectScheduler
41
42
  from hpcflow.sdk.typing import PathLike
42
43
  from hpcflow.sdk.core.json_like import ChildObjectSpec, JSONLike
43
44
  from .utils import (
45
+ nth_key,
44
46
  read_JSON_file,
45
47
  read_JSON_string,
46
48
  read_YAML_str,
@@ -625,19 +627,28 @@ class Workflow:
625
627
  )
626
628
  with wk._store.cached_load():
627
629
  with wk.batch_update(is_workflow_creation=True):
628
- for idx, task in enumerate(template.tasks):
630
+ with wk._store.cache_ctx():
631
+ for idx, task in enumerate(template.tasks):
632
+ if status:
633
+ status.update(
634
+ f"Adding task {idx + 1}/{len(template.tasks)} "
635
+ f"({task.name!r})..."
636
+ )
637
+ wk._add_task(task)
629
638
  if status:
630
639
  status.update(
631
- f"Adding task {idx + 1}/{len(template.tasks)} "
632
- f"({task.name!r})..."
640
+ f"Preparing to add {len(template.loops)} loops..."
633
641
  )
634
- wk._add_task(task)
635
- for idx, loop in enumerate(template.loops):
636
- if status:
637
- status.update(
638
- f"Adding loop {idx + 1}/" f"{len(template.loops)}..."
639
- )
640
- wk._add_loop(loop)
642
+ if template.loops:
643
+ # TODO: if loop with non-initialisable actions, will fail
644
+ cache = LoopCache.build(workflow=wk, loops=template.loops)
645
+ for idx, loop in enumerate(template.loops):
646
+ if status:
647
+ status.update(
648
+ f"Adding loop {idx + 1}/"
649
+ f"{len(template.loops)} ({loop.name!r})"
650
+ )
651
+ wk._add_loop(loop, cache=cache, status=status)
641
652
  except Exception:
642
653
  if status:
643
654
  status.stop()
@@ -1101,7 +1112,7 @@ class Workflow:
1101
1112
 
1102
1113
  @TimeIt.decorator
1103
1114
  def _add_empty_loop(
1104
- self, loop: app.Loop
1115
+ self, loop: app.Loop, cache: LoopCache
1105
1116
  ) -> Tuple[app.WorkflowLoop, List[app.ElementIteration]]:
1106
1117
  """Add a new loop (zeroth iterations only) to the workflow."""
1107
1118
 
@@ -1114,15 +1125,15 @@ class Workflow:
1114
1125
  self.template._add_empty_loop(loop_c)
1115
1126
 
1116
1127
  # all these element iterations will be initialised for the new loop:
1117
- iters = self.get_element_iterations_of_tasks(loop_c.task_insert_IDs)
1118
- iter_IDs = [i.id_ for i in iters]
1128
+ iter_IDs = cache.get_iter_IDs(loop_c)
1129
+ iter_loop_idx = cache.get_iter_loop_indices(iter_IDs)
1119
1130
 
1120
1131
  # create and insert a new WorkflowLoop:
1121
1132
  new_loop = self.app.WorkflowLoop.new_empty_loop(
1122
1133
  index=new_index,
1123
1134
  workflow=self,
1124
1135
  template=loop_c,
1125
- iterations=iters,
1136
+ iter_loop_idx=iter_loop_idx,
1126
1137
  )
1127
1138
  self.loops.add_object(new_loop)
1128
1139
  wk_loop = self.loops[new_index]
@@ -1144,15 +1155,28 @@ class Workflow:
1144
1155
 
1145
1156
  self._pending["loops"].append(new_index)
1146
1157
 
1158
+ # update cache loop indices:
1159
+ cache.update_loop_indices(new_loop_name=loop_c.name, iter_IDs=iter_IDs)
1160
+
1147
1161
  return wk_loop
1148
1162
 
1149
1163
  @TimeIt.decorator
1150
- def _add_loop(self, loop: app.Loop) -> None:
1151
- new_wk_loop = self._add_empty_loop(loop)
1164
+ def _add_loop(
1165
+ self, loop: app.Loop, cache: Optional[Dict] = None, status: Optional[Any] = None
1166
+ ) -> None:
1167
+ if not cache:
1168
+ cache = LoopCache.build(workflow=self, loops=[loop])
1169
+ new_wk_loop = self._add_empty_loop(loop, cache)
1152
1170
  if loop.num_iterations is not None:
1153
1171
  # fixed number of iterations, so add remaining N > 0 iterations:
1154
- for _ in range(loop.num_iterations - 1):
1155
- new_wk_loop.add_iteration()
1172
+ if status:
1173
+ status_prev = status.status
1174
+ for iter_idx in range(loop.num_iterations - 1):
1175
+ if status:
1176
+ status.update(
1177
+ f"{status_prev}: iteration {iter_idx + 2}/{loop.num_iterations}."
1178
+ )
1179
+ new_wk_loop.add_iteration(cache=cache)
1156
1180
 
1157
1181
  def add_loop(self, loop: app.Loop) -> None:
1158
1182
  """Add a loop to a subset of workflow tasks."""
@@ -1326,6 +1350,7 @@ class Workflow:
1326
1350
  iters.append(iter_i)
1327
1351
  return iters
1328
1352
 
1353
+ @TimeIt.decorator
1329
1354
  def get_elements_from_IDs(self, id_lst: Iterable[int]) -> List[app.Element]:
1330
1355
  """Return element objects from a list of IDs."""
1331
1356
 
@@ -1334,6 +1359,7 @@ class Workflow:
1334
1359
  task_IDs = [i.task_ID for i in store_elems]
1335
1360
  store_tasks = self._store.get_tasks_by_IDs(task_IDs)
1336
1361
 
1362
+ element_idx_by_task = defaultdict(set)
1337
1363
  index_paths = []
1338
1364
  for el, tk in zip(store_elems, store_tasks):
1339
1365
  elem_idx = tk.element_IDs.index(el.id_)
@@ -1343,15 +1369,23 @@ class Workflow:
1343
1369
  "task_idx": tk.index,
1344
1370
  }
1345
1371
  )
1372
+ element_idx_by_task[tk.index].add(elem_idx)
1373
+
1374
+ elements_by_task = {}
1375
+ for task_idx, elem_idx in element_idx_by_task.items():
1376
+ task = self.tasks[task_idx]
1377
+ elements_by_task[task_idx] = dict(
1378
+ zip(elem_idx, task.elements[list(elem_idx)])
1379
+ )
1346
1380
 
1347
1381
  objs = []
1348
1382
  for idx_dat in index_paths:
1349
- task = self.tasks[idx_dat["task_idx"]]
1350
- elem = task.elements[idx_dat["elem_idx"]]
1383
+ elem = elements_by_task[idx_dat["task_idx"]][idx_dat["elem_idx"]]
1351
1384
  objs.append(elem)
1352
1385
 
1353
1386
  return objs
1354
1387
 
1388
+ @TimeIt.decorator
1355
1389
  def get_element_iterations_from_IDs(
1356
1390
  self, id_lst: Iterable[int]
1357
1391
  ) -> List[app.ElementIteration]:
@@ -1365,6 +1399,8 @@ class Workflow:
1365
1399
  task_IDs = [i.task_ID for i in store_elems]
1366
1400
  store_tasks = self._store.get_tasks_by_IDs(task_IDs)
1367
1401
 
1402
+ element_idx_by_task = defaultdict(set)
1403
+
1368
1404
  index_paths = []
1369
1405
  for it, el, tk in zip(store_iters, store_elems, store_tasks):
1370
1406
  iter_idx = el.iteration_IDs.index(it.id_)
@@ -1376,11 +1412,18 @@ class Workflow:
1376
1412
  "task_idx": tk.index,
1377
1413
  }
1378
1414
  )
1415
+ element_idx_by_task[tk.index].add(elem_idx)
1416
+
1417
+ elements_by_task = {}
1418
+ for task_idx, elem_idx in element_idx_by_task.items():
1419
+ task = self.tasks[task_idx]
1420
+ elements_by_task[task_idx] = dict(
1421
+ zip(elem_idx, task.elements[list(elem_idx)])
1422
+ )
1379
1423
 
1380
1424
  objs = []
1381
1425
  for idx_dat in index_paths:
1382
- task = self.tasks[idx_dat["task_idx"]]
1383
- elem = task.elements[idx_dat["elem_idx"]]
1426
+ elem = elements_by_task[idx_dat["task_idx"]][idx_dat["elem_idx"]]
1384
1427
  iter_ = elem.iterations[idx_dat["iter_idx"]]
1385
1428
  objs.append(iter_)
1386
1429
 
@@ -716,6 +716,11 @@ class PersistentStore(ABC):
716
716
  """Cache for number of persistent tasks."""
717
717
  return self._cache["num_tasks"]
718
718
 
719
+ @property
720
+ def num_EARs_cache(self):
721
+ """Cache for total number of persistent EARs."""
722
+ return self._cache["num_EARs"]
723
+
719
724
  @property
720
725
  def param_sources_cache(self):
721
726
  """Cache for persistent parameter sources."""
@@ -730,6 +735,10 @@ class PersistentStore(ABC):
730
735
  def num_tasks_cache(self, value):
731
736
  self._cache["num_tasks"] = value
732
737
 
738
+ @num_EARs_cache.setter
739
+ def num_EARs_cache(self, value):
740
+ self._cache["num_EARs"] = value
741
+
733
742
  def _reset_cache(self):
734
743
  self._cache = {
735
744
  "tasks": {},
@@ -739,6 +748,7 @@ class PersistentStore(ABC):
739
748
  "param_sources": {},
740
749
  "num_tasks": None,
741
750
  "parameters": {},
751
+ "num_EARs": None,
742
752
  }
743
753
 
744
754
  @contextlib.contextmanager
@@ -873,6 +883,7 @@ class PersistentStore(ABC):
873
883
  """Get the total number of persistent and pending element iterations."""
874
884
  return self._get_num_persistent_elem_iters() + len(self._pending.add_elem_iters)
875
885
 
886
+ @TimeIt.decorator
876
887
  def _get_num_total_EARs(self):
877
888
  """Get the total number of persistent and pending EARs."""
878
889
  return self._get_num_persistent_EARs() + len(self._pending.add_EARs)
@@ -1296,9 +1307,11 @@ class PersistentStore(ABC):
1296
1307
  self.save()
1297
1308
 
1298
1309
  @TimeIt.decorator
1299
- def update_param_source(self, param_id: int, source: Dict, save: bool = True) -> None:
1300
- self.logger.debug(f"Updating parameter ID {param_id!r} source to {source!r}.")
1301
- self._pending.update_param_sources[param_id] = source
1310
+ def update_param_source(
1311
+ self, param_sources: Dict[int, Dict], save: bool = True
1312
+ ) -> None:
1313
+ self.logger.debug(f"Updating parameter sources with {param_sources!r}.")
1314
+ self._pending.update_param_sources.update(param_sources)
1302
1315
  if save:
1303
1316
  self.save()
1304
1317
 
@@ -303,12 +303,13 @@ class JSONPersistentStore(PersistentStore):
303
303
 
304
304
  def _get_num_persistent_tasks(self) -> int:
305
305
  """Get the number of persistent tasks."""
306
- if self.num_tasks_cache is not None:
306
+ if self.use_cache and self.num_tasks_cache is not None:
307
307
  num = self.num_tasks_cache
308
308
  else:
309
309
  with self.using_resource("metadata", action="read") as md:
310
310
  num = len(md["tasks"])
311
- self.num_tasks_cache = num
311
+ if self.use_cache and self.num_tasks_cache is None:
312
+ self.num_tasks_cache = num
312
313
  return num
313
314
 
314
315
  def _get_num_persistent_loops(self) -> int:
@@ -333,8 +334,14 @@ class JSONPersistentStore(PersistentStore):
333
334
 
334
335
  def _get_num_persistent_EARs(self) -> int:
335
336
  """Get the number of persistent EARs."""
336
- with self.using_resource("metadata", action="read") as md:
337
- return len(md["runs"])
337
+ if self.use_cache and self.num_EARs_cache is not None:
338
+ num = self.num_EARs_cache
339
+ else:
340
+ with self.using_resource("metadata", action="read") as md:
341
+ num = len(md["runs"])
342
+ if self.use_cache and self.num_EARs_cache is None:
343
+ self.num_EARs_cache = num
344
+ return num
338
345
 
339
346
  def _get_num_persistent_parameters(self):
340
347
  with self.using_resource("parameters", "read") as params:
@@ -275,6 +275,7 @@ class PendingChanges:
275
275
  EAR_ids = list(self.add_EARs.keys())
276
276
  self.logger.debug(f"commit: adding pending EARs with IDs: {EAR_ids!r}")
277
277
  self.store._append_EARs(EARs)
278
+ self.store.num_EARs_cache = None # invalidate cache
278
279
  # pending start/end times/snapshots, submission indices, and skips that belong
279
280
  # to pending EARs are now committed (accounted for in `get_EARs` above):
280
281
  self.set_EAR_submission_indices = {
@@ -408,6 +409,7 @@ class PendingChanges:
408
409
  @TimeIt.decorator
409
410
  def commit_loop_indices(self) -> None:
410
411
  """Make pending update to element iteration loop indices persistent."""
412
+ # TODO: batch up
411
413
  for iter_ID, loop_idx in self.update_loop_indices.items():
412
414
  self.logger.debug(
413
415
  f"commit: updating loop indices of iteration ID {iter_ID!r} with "
@@ -774,9 +774,16 @@ class ZarrPersistentStore(PersistentStore):
774
774
  """Get the number of persistent element iterations."""
775
775
  return len(self._get_iters_arr())
776
776
 
777
+ @TimeIt.decorator
777
778
  def _get_num_persistent_EARs(self) -> int:
778
779
  """Get the number of persistent EARs."""
779
- return len(self._get_EARs_arr())
780
+ if self.use_cache and self.num_EARs_cache is not None:
781
+ num = self.num_EARs_cache
782
+ else:
783
+ num = len(self._get_EARs_arr())
784
+ if self.use_cache and self.num_EARs_cache is None:
785
+ self.num_EARs_cache = num
786
+ return num
780
787
 
781
788
  def _get_num_persistent_parameters(self):
782
789
  return len(self._get_parameter_base_array())
@@ -13,6 +13,8 @@ from hpcflow.sdk.core.utils import (
13
13
  get_nested_indices,
14
14
  is_fsspec_url,
15
15
  linspace_rect,
16
+ nth_key,
17
+ nth_value,
16
18
  process_string_nodes,
17
19
  replace_items,
18
20
  check_valid_py_identifier,
@@ -556,3 +558,22 @@ def test_dict_values_process_flat_single_item_lists():
556
558
  "b": [4],
557
559
  "c": [5],
558
560
  }
561
+
562
+
563
+ def test_nth_key():
564
+ dct = {"a": 1, "b": 2}
565
+ assert [nth_key(dct, i) for i in range(len(dct))] == ["a", "b"]
566
+
567
+
568
+ def test_nth_value():
569
+ dct = {"a": 1, "b": 2}
570
+ assert [nth_value(dct, i) for i in range(len(dct))] == [1, 2]
571
+
572
+
573
+ def test_nth_key_raises():
574
+ dct = {"a": 1, "b": 2}
575
+ with pytest.raises(Exception):
576
+ nth_key(dct, 2)
577
+
578
+ with pytest.raises(Exception):
579
+ nth_key(dct, -1)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: hpcflow-new2
3
- Version: 0.2.0a176
3
+ Version: 0.2.0a177
4
4
  Summary: Computational workflow management
5
5
  License: MIT
6
6
  Author: aplowman
@@ -1,7 +1,7 @@
1
1
  hpcflow/__init__.py,sha256=WIETuRHeOp2SqUqHUzpjQ-lk9acbYv-6aWOhZPRdlhs,64
2
2
  hpcflow/__pyinstaller/__init__.py,sha256=YOzBlPSck6slucv6lJM9K80JtsJWxXRL00cv6tRj3oc,98
3
3
  hpcflow/__pyinstaller/hook-hpcflow.py,sha256=SeMopsPkhCyd9gqIrzwFNRj3ZlkUlUYl-74QYz61mo4,1089
4
- hpcflow/_version.py,sha256=SYxreQlS-GrGdoMoqVR3gpikj6-V4k7yVrXGBKl-Xxg,26
4
+ hpcflow/_version.py,sha256=c6sVLAvL-ZtlEF_CBbI6lCJoA-qh3KPMN5Smq6FFaPk,26
5
5
  hpcflow/app.py,sha256=d-kgfnZNlqlCi2H8bK26714brD_u3ibN3FaEZgjF9aA,1332
6
6
  hpcflow/cli.py,sha256=G2J3D9v6MnMWOWMMWK6UEKLn_6wnV9lT_qygEBBxg-I,66
7
7
  hpcflow/data/demo_data_manifest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -46,24 +46,26 @@ hpcflow/sdk/config/config_file.py,sha256=JlMcprj0aujFVk8552ahP2f8EXB0tglMaHwzbcG
46
46
  hpcflow/sdk/config/errors.py,sha256=2D7HJ1dbyeoD3xk4MuaGSsbJsUyQzyw8kaThEBZfP2I,6876
47
47
  hpcflow/sdk/core/__init__.py,sha256=GcIklEsXy3M5PWpmxyhd2KoI0u6HjXRIjD_aR1bgRjo,215
48
48
  hpcflow/sdk/core/actions.py,sha256=66CHgwYAB0oCR6oB5bNbBdUGRGTU3juS1XcMNjj3vP0,77068
49
+ hpcflow/sdk/core/cache.py,sha256=MDzqsCg8uMjxEdQ-8ta-uG042yiPrzQoVKMeE6jYW8k,5127
49
50
  hpcflow/sdk/core/command_files.py,sha256=GEFlgZv7g9lkFoNgwyDtmlI_90e2TWliCJuJimnJZts,18685
50
51
  hpcflow/sdk/core/commands.py,sha256=5SKxSBuYz8sSvfpp9p5utBwMoQV6Pd2KlGBCpXAHDxE,12741
51
- hpcflow/sdk/core/element.py,sha256=hTAR2kxfGSRf4vFgWwrnyuP5z5RnKYOd2X6c6Xd70zo,47048
52
+ hpcflow/sdk/core/element.py,sha256=kWEbGWzrXCwhQ1Ie1RFm1v5_q3MQkCDPEIp01nHIf1Q,47202
52
53
  hpcflow/sdk/core/environment.py,sha256=DGUz1NvliKh6opP0IueGHD69rn_8wFLhDsq6kAmEgM4,4849
53
54
  hpcflow/sdk/core/errors.py,sha256=ku4wwsrmxBpJBFflUeZD6vrmAqgC7H02VdlRG4aAGqQ,9292
54
55
  hpcflow/sdk/core/json_like.py,sha256=LRZsUd1tn8zXC8fESeiXs7Eko-VdnB8zcXiqixKVcZM,18874
55
- hpcflow/sdk/core/loop.py,sha256=SYlkmjvxifBfykhjgXCbxnCvn37BfXGMRIbyOholrEE,30936
56
+ hpcflow/sdk/core/loop.py,sha256=vj3b0jRCJxkKdhURYTgULoDJ6U3LzAYZMXBzqcCMHr8,31506
57
+ hpcflow/sdk/core/loop_cache.py,sha256=BBmJn_pS11gaiHS8qvujBpzWLzPsfs8N6iYIBkZtIwI,5881
56
58
  hpcflow/sdk/core/object_list.py,sha256=HASx7AMniX82bTlROIgIvrjE_DupmwDgxfkfROmI3GA,20168
57
59
  hpcflow/sdk/core/parallel.py,sha256=LI-g-qOuOR1oaEUWVT0qW0hmiP9hsJyUP8_IfSTKYYo,95
58
60
  hpcflow/sdk/core/parameters.py,sha256=0h1M-fXqOVgruyM0Au7Fo38cUbHgDNEPd1Alb1FULxE,65588
59
61
  hpcflow/sdk/core/rule.py,sha256=3jVsSZCBv4Odxy8QbSbKo9ZcRuU-5DRJoNK8adXCEpI,4567
60
62
  hpcflow/sdk/core/run_dir_files.py,sha256=_k-hA7dlry9GZw5ZXcntFcPGxg07p03hnHSM5S-2G2Y,2197
61
- hpcflow/sdk/core/task.py,sha256=-Ugs13wl76FxBv9q0Ik2dWGSDPZQ4KR4qvwmrfsAEgE,122247
63
+ hpcflow/sdk/core/task.py,sha256=TTAn9aeJOLyso7t11wt87wxPDVi037vwpFgF9rCfZwQ,122319
62
64
  hpcflow/sdk/core/task_schema.py,sha256=TipXzC2guu9zilv0En-rHt6lUCTSIj5faI4lVWQdUbA,32346
63
65
  hpcflow/sdk/core/test_utils.py,sha256=IhCLvRzDuG4hVNGeGulGKfZEgg7Ow-vgiEqewzMiaZ4,9762
64
- hpcflow/sdk/core/utils.py,sha256=pReOwnmuxJqexPUdaA8UMjJ4o8ucllBVVssWjb_LNQc,25651
66
+ hpcflow/sdk/core/utils.py,sha256=cpwfoHgbHanZQXmVZRN3VRW8X-zZxb1I6T0v2tWgBK0,25811
65
67
  hpcflow/sdk/core/validation.py,sha256=KBKiy5DdfGiGmMaB0HdKTY0V972u5dJzvkYkX0_KtCo,518
66
- hpcflow/sdk/core/workflow.py,sha256=oi9QdBgAMSr62c6HQx2OXw3ljRmxhEb_3YNXGCwIbBE,111164
68
+ hpcflow/sdk/core/workflow.py,sha256=ziKn1cA4s_eHKPMzyKfHF4bVNF7bfho4dko5qtyZKjU,113111
67
69
  hpcflow/sdk/core/zarr_io.py,sha256=V_Zm6uSiuaCbXyHFJUO74K1pAr4Zqrj3aLCBjohCwvs,5724
68
70
  hpcflow/sdk/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
69
71
  hpcflow/sdk/data/config_file_schema.yaml,sha256=7i3z_m3GBRtLyB4c7qPngnlQWqcIq1CyCcOysDyq4es,791
@@ -81,12 +83,12 @@ hpcflow/sdk/helper/helper.py,sha256=MkjYKHox1F4XOpy-20sCCDUTWUbQY84QpWZkcpSq9n8,
81
83
  hpcflow/sdk/helper/watcher.py,sha256=hLqgwXtZw-6ihNUUcWYnZw8TCyD_AdhYE7abOrO2r_0,4003
82
84
  hpcflow/sdk/log.py,sha256=_DA5nNS8BoSIFB3d9nrIjbxNDxFflEaL3Ubkq8UYQK8,5735
83
85
  hpcflow/sdk/persistence/__init__.py,sha256=IzWycfiO6rDn_7Kocw4Df5ETe9BSoaqqxG7Yp4FW_ls,900
84
- hpcflow/sdk/persistence/base.py,sha256=cmfvgUQAp6BVcOL04IemzmtPvzSJfVd2nQFvgp2VLyU,61718
85
- hpcflow/sdk/persistence/json.py,sha256=XD4km426WZ6F6YCrnw87ntS_Qfg3qa9CAyK2GQmJNFU,21700
86
- hpcflow/sdk/persistence/pending.py,sha256=ZaiY_I6c0CpXGPOkHfs595GtDvx8daP4nqVZGSJ6tS4,25566
86
+ hpcflow/sdk/persistence/base.py,sha256=cvk2Uqd671ZFe6JEP_UrZ7W0q2mZjimyA8DFvss8hdo,62030
87
+ hpcflow/sdk/persistence/json.py,sha256=55F4Txa50I9HzfETPqwo6gAOBRaoewMHGR3V4-2Fifc,22013
88
+ hpcflow/sdk/persistence/pending.py,sha256=XktGkRpJmlyjceOiDY0GEL2xRl6k-gbjt057LmEj5oc,25656
87
89
  hpcflow/sdk/persistence/store_resource.py,sha256=oEyocRqa8Uym-57UFosrwate-Xw9O7i2FM82TxHc4m0,4307
88
90
  hpcflow/sdk/persistence/utils.py,sha256=yQT6gS-Ipj2N6grtlV5d0czxxKE0CaeqAkXA1247XGo,1522
89
- hpcflow/sdk/persistence/zarr.py,sha256=cyhLVaHUVLuFg-sokfD8L2YOMvfCCk1KCawRD9_qg30,45452
91
+ hpcflow/sdk/persistence/zarr.py,sha256=otZbR17O3ZNIot7uVAP1UdWs_L0PHS682LuWk3z1_1g,45708
90
92
  hpcflow/sdk/runtime.py,sha256=_in5ojiy9R8fD1ZNbdE6PDmZx6kSaiG9WPB6kVBFE7k,9217
91
93
  hpcflow/sdk/submission/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
92
94
  hpcflow/sdk/submission/jobscript.py,sha256=Z9NUzkIcmoFw-XAtG8FdLpO2LtMt3czk1v1BnbM1eZw,44678
@@ -142,14 +144,14 @@ hpcflow/tests/unit/test_slurm.py,sha256=ewfNuXXUEEelAxcd7MBbAQ-RCvU8xBenHTAyfXYF
142
144
  hpcflow/tests/unit/test_submission.py,sha256=kQ3ksjGlfp47AYuwTA27RDX2XxRU3YxKlKC1ACTbXw8,16682
143
145
  hpcflow/tests/unit/test_task.py,sha256=QJuEpJ0y0nBesprgoau5R2kFZBCW-ygNmYatLT_M5-o,80227
144
146
  hpcflow/tests/unit/test_task_schema.py,sha256=j5HHxoqq4Mb223jKcusgX-C6-TsmKG0PLjYQ4M01ZHo,4531
145
- hpcflow/tests/unit/test_utils.py,sha256=JMhSRZFqmI9ZhREJet9en_y3aRVlQlWE7OKpkdt8SVI,14172
147
+ hpcflow/tests/unit/test_utils.py,sha256=RH3UZ99g1pKKJme1rNgzT3j_txWLT9_OWE1lWx67W5M,14610
146
148
  hpcflow/tests/unit/test_value_sequence.py,sha256=yJh5YRxN-VYMbCWiUaLH4T_Ue5F2IfVS3e11zx6HlS0,15740
147
149
  hpcflow/tests/unit/test_workflow.py,sha256=Eyr9BhnsFisAPotEAeYrAvxXT1d2i6oshEh1_OxgnSc,22732
148
150
  hpcflow/tests/unit/test_workflow_template.py,sha256=fF7LNveMwCledgncNCRfD9Nd9dL9tSPtlAAOKV3ovAU,5396
149
151
  hpcflow/tests/workflows/test_jobscript.py,sha256=9sp1o0g72JZbv2QlOl5v7wCZEFjotxiIKGNUxVaFgaA,724
150
152
  hpcflow/tests/workflows/test_workflows.py,sha256=xai6FRtGqG4lStJk6KmsqPUSuvqs9FrsBOxMVALshIs,13400
151
153
  hpcflow/viz_demo.ipynb,sha256=1QdnVsk72vihv2L6hOGyk318uEa22ZSgGxQCa7hW2oo,6238
152
- hpcflow_new2-0.2.0a176.dist-info/METADATA,sha256=Xg97HSWIATWbQGGHQ-ORIDzX1dRhvQ2O0mmtxDluQto,2466
153
- hpcflow_new2-0.2.0a176.dist-info/WHEEL,sha256=kLuE8m1WYU0Ig0_YEGrXyTtiJvKPpLpDEiChiNyei5Y,88
154
- hpcflow_new2-0.2.0a176.dist-info/entry_points.txt,sha256=aoGtCnFdfPcXfBdu2zZyMOJoz6fPgdR0elqsgrE-USU,106
155
- hpcflow_new2-0.2.0a176.dist-info/RECORD,,
154
+ hpcflow_new2-0.2.0a177.dist-info/METADATA,sha256=I1tkrIUSwRwNsKxjcTy7lrpcOhZp9Z3HUGV_Wdoo9qw,2466
155
+ hpcflow_new2-0.2.0a177.dist-info/WHEEL,sha256=kLuE8m1WYU0Ig0_YEGrXyTtiJvKPpLpDEiChiNyei5Y,88
156
+ hpcflow_new2-0.2.0a177.dist-info/entry_points.txt,sha256=aoGtCnFdfPcXfBdu2zZyMOJoz6fPgdR0elqsgrE-USU,106
157
+ hpcflow_new2-0.2.0a177.dist-info/RECORD,,