hpcflow-new2 0.2.0a169__py3-none-any.whl → 0.2.0a174__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1100,7 +1100,9 @@ class Workflow:
1100
1100
  # TODO: add new downstream elements?
1101
1101
 
1102
1102
  @TimeIt.decorator
1103
- def _add_empty_loop(self, loop: app.Loop) -> app.WorkflowLoop:
1103
+ def _add_empty_loop(
1104
+ self, loop: app.Loop
1105
+ ) -> Tuple[app.WorkflowLoop, List[app.ElementIteration]]:
1104
1106
  """Add a new loop (zeroth iterations only) to the workflow."""
1105
1107
 
1106
1108
  new_index = self.num_loops
@@ -1111,27 +1113,32 @@ class Workflow:
1111
1113
  # add to the WorkflowTemplate:
1112
1114
  self.template._add_empty_loop(loop_c)
1113
1115
 
1116
+ # all these element iterations will be initialised for the new loop:
1117
+ iters = self.get_element_iterations_of_tasks(loop_c.task_insert_IDs)
1118
+ iter_IDs = [i.id_ for i in iters]
1119
+
1114
1120
  # create and insert a new WorkflowLoop:
1115
- self.loops.add_object(
1116
- self.app.WorkflowLoop.new_empty_loop(
1117
- index=new_index,
1118
- workflow=self,
1119
- template=loop_c,
1120
- )
1121
+ new_loop = self.app.WorkflowLoop.new_empty_loop(
1122
+ index=new_index,
1123
+ workflow=self,
1124
+ template=loop_c,
1125
+ iterations=iters,
1121
1126
  )
1127
+ self.loops.add_object(new_loop)
1122
1128
  wk_loop = self.loops[new_index]
1123
1129
 
1124
- loop_js, _ = loop_c.to_json_like()
1130
+ # update any child loops of the new loop to include their new parent:
1131
+ for chd_loop in wk_loop.get_child_loops():
1132
+ chd_loop._update_parents(wk_loop)
1125
1133
 
1126
- # all these element iterations will be initialised for the new loop:
1127
- iter_IDs = [
1128
- i.id_ for i in self.get_element_iterations_of_tasks(loop_c.task_insert_IDs)
1129
- ]
1134
+ loop_js, _ = loop_c.to_json_like()
1130
1135
 
1131
1136
  # update persistent store:
1132
1137
  self._store.add_loop(
1133
1138
  loop_template=loop_js,
1134
1139
  iterable_parameters=wk_loop.iterable_parameters,
1140
+ parents=wk_loop.parents,
1141
+ num_added_iterations=wk_loop.num_added_iterations,
1135
1142
  iter_IDs=iter_IDs,
1136
1143
  )
1137
1144
 
@@ -1140,18 +1147,18 @@ class Workflow:
1140
1147
  return wk_loop
1141
1148
 
1142
1149
  @TimeIt.decorator
1143
- def _add_loop(self, loop: app.Loop, parent_loop_indices: Dict = None) -> None:
1150
+ def _add_loop(self, loop: app.Loop) -> None:
1144
1151
  new_wk_loop = self._add_empty_loop(loop)
1145
1152
  if loop.num_iterations is not None:
1146
1153
  # fixed number of iterations, so add remaining N > 0 iterations:
1147
1154
  for _ in range(loop.num_iterations - 1):
1148
- new_wk_loop.add_iteration(parent_loop_indices=parent_loop_indices)
1155
+ new_wk_loop.add_iteration()
1149
1156
 
1150
- def add_loop(self, loop: app.Loop, parent_loop_indices: Dict = None) -> None:
1157
+ def add_loop(self, loop: app.Loop) -> None:
1151
1158
  """Add a loop to a subset of workflow tasks."""
1152
1159
  with self._store.cached_load():
1153
1160
  with self.batch_update():
1154
- self._add_loop(loop, parent_loop_indices)
1161
+ self._add_loop(loop)
1155
1162
 
1156
1163
  @property
1157
1164
  def creation_info(self):
@@ -1231,11 +1238,15 @@ class Workflow:
1231
1238
  with self._store.cached_load():
1232
1239
  wk_loops = []
1233
1240
  for idx, loop_dat in self._store.get_loops().items():
1241
+ num_add_iters = {
1242
+ tuple(i[0]): i[1] for i in loop_dat["num_added_iterations"]
1243
+ }
1234
1244
  wk_loop = self.app.WorkflowLoop(
1235
1245
  index=idx,
1236
1246
  workflow=self,
1237
1247
  template=self.template.loops[idx],
1238
- num_added_iterations=loop_dat["num_added_iterations"],
1248
+ parents=loop_dat["parents"],
1249
+ num_added_iterations=num_add_iters,
1239
1250
  iterable_parameters=loop_dat["iterable_parameters"],
1240
1251
  )
1241
1252
  wk_loops.append(wk_loop)
@@ -1472,6 +1483,7 @@ class Workflow:
1472
1483
 
1473
1484
  for loop in self.loops:
1474
1485
  loop._reset_pending_num_added_iters()
1486
+ loop._reset_pending_parents()
1475
1487
 
1476
1488
  self._reject_pending()
1477
1489
 
@@ -1496,6 +1508,7 @@ class Workflow:
1496
1508
 
1497
1509
  for loop in self.loops:
1498
1510
  loop._accept_pending_num_added_iters()
1511
+ loop._accept_pending_parents()
1499
1512
 
1500
1513
  # TODO: handle errors in commit pending?
1501
1514
  self._store._pending.commit_all()
@@ -2073,27 +2086,79 @@ class Workflow:
2073
2086
  yield element
2074
2087
 
2075
2088
  @TimeIt.decorator
2076
- def get_iteration_task_pathway(self):
2089
+ def get_iteration_task_pathway(self, ret_iter_IDs=False, ret_data_idx=False):
2077
2090
  pathway = []
2078
2091
  for task in self.tasks:
2079
- loop_idx = {}
2080
- pathway.append((task.insert_ID, loop_idx))
2081
-
2082
- for loop in self.loops: # TODO: order by depth (inner loops first?)
2083
- task_subset = loop.task_insert_IDs
2084
- subset_idx = [idx for idx, i in enumerate(pathway) if i[0] in task_subset]
2085
- looped_pathway = []
2086
- for iter_i in range(loop.num_added_iterations):
2087
- for j in subset_idx:
2088
- item_j = copy.deepcopy(pathway[j])
2089
- item_j[1][loop.name] = iter_i
2090
- looped_pathway.append(item_j)
2091
-
2092
- # replaced pathway `sub_idx` items with `looped_pathway` items:
2093
- pathway = replace_items(
2094
- pathway, subset_idx[0], subset_idx[-1] + 1, looped_pathway
2092
+ pathway.append((task.insert_ID, {}))
2093
+
2094
+ added_loop_names = set()
2095
+ for _ in range(self.num_loops):
2096
+ to_add = None
2097
+ for loop in self.loops:
2098
+ if loop.name in added_loop_names:
2099
+ continue
2100
+ elif set(loop.parents).issubset(added_loop_names):
2101
+ # add a loop only once their parents have been added:
2102
+ to_add = loop
2103
+ break
2104
+
2105
+ if to_add is None:
2106
+ raise RuntimeError(
2107
+ "Failed to find a loop whose parents have already been added to the "
2108
+ "iteration task pathway."
2109
+ )
2110
+
2111
+ iIDs = to_add.task_insert_IDs
2112
+ relevant_idx = [idx for idx, i in enumerate(pathway) if i[0] in iIDs]
2113
+
2114
+ for num_add_k, num_add in to_add.num_added_iterations.items():
2115
+ parent_loop_idx = {
2116
+ to_add.parents[idx]: i for idx, i in enumerate(num_add_k)
2117
+ }
2118
+
2119
+ repl = []
2120
+ repl_idx = []
2121
+ for i in range(num_add):
2122
+ for p_idx, p in enumerate(pathway):
2123
+ skip = False
2124
+ if p[0] not in iIDs:
2125
+ continue
2126
+ for k, v in parent_loop_idx.items():
2127
+ if p[1][k] != v:
2128
+ skip = True
2129
+ break
2130
+ if skip:
2131
+ continue
2132
+ p = copy.deepcopy(p)
2133
+ p[1].update({to_add.name: i})
2134
+ repl_idx.append(p_idx)
2135
+ repl.append(p)
2136
+
2137
+ if repl:
2138
+ repl_start, repl_stop = min(repl_idx), max(repl_idx)
2139
+ pathway = replace_items(pathway, repl_start, repl_stop + 1, repl)
2140
+
2141
+ added_loop_names.add(to_add.name)
2142
+
2143
+ if added_loop_names != set(i.name for i in self.loops):
2144
+ raise RuntimeError(
2145
+ "Not all loops have been considered in the iteration task pathway."
2095
2146
  )
2096
2147
 
2148
+ if ret_iter_IDs or ret_data_idx:
2149
+ all_iters = self.get_all_element_iterations()
2150
+ for idx, i in enumerate(pathway):
2151
+ i_iters = []
2152
+ for iter_j in all_iters:
2153
+ if iter_j.task.insert_ID == i[0] and iter_j.loop_idx == i[1]:
2154
+ i_iters.append(iter_j)
2155
+ new = list(i)
2156
+ if ret_iter_IDs:
2157
+ new += [tuple([j.id_ for j in i_iters])]
2158
+ if ret_data_idx:
2159
+ new += [tuple(j.get_data_idx() for j in i_iters)]
2160
+ pathway[idx] = tuple(new)
2161
+
2097
2162
  return pathway
2098
2163
 
2099
2164
  @TimeIt.decorator
@@ -934,19 +934,24 @@ class PersistentStore(ABC):
934
934
  self,
935
935
  loop_template: Dict,
936
936
  iterable_parameters,
937
+ parents: List[str],
938
+ num_added_iterations: Dict[Tuple[int], int],
937
939
  iter_IDs: List[int],
938
940
  save: bool = True,
939
941
  ):
940
942
  """Add a new loop to the workflow."""
941
943
  self.logger.debug(f"Adding store loop.")
942
944
  new_idx = self._get_num_total_loops()
945
+ added_iters = [[list(k), v] for k, v in num_added_iterations.items()]
943
946
  self._pending.add_loops[new_idx] = {
944
947
  "loop_template": loop_template,
945
948
  "iterable_parameters": iterable_parameters,
949
+ "parents": parents,
950
+ "num_added_iterations": added_iters,
946
951
  }
947
952
 
948
953
  for i in iter_IDs:
949
- self._pending.update_loop_indices[i] = {loop_template["name"]: 0}
954
+ self._pending.update_loop_indices[i].update({loop_template["name"]: 0})
950
955
 
951
956
  if save:
952
957
  self.save()
@@ -1298,12 +1303,30 @@ class PersistentStore(ABC):
1298
1303
  self.save()
1299
1304
 
1300
1305
  def update_loop_num_iters(
1301
- self, index: int, num_iters: int, save: bool = True
1306
+ self, index: int, num_added_iters: int, save: bool = True
1302
1307
  ) -> None:
1303
1308
  self.logger.debug(
1304
- f"Updating loop {index!r} num added iterations to {num_iters!r}."
1309
+ f"Updating loop {index!r} num added iterations to {num_added_iters!r}."
1305
1310
  )
1306
- self._pending.update_loop_num_iters[index] = num_iters
1311
+ num_added_iters = [[list(k), v] for k, v in num_added_iters.items()]
1312
+ self._pending.update_loop_num_iters[index] = num_added_iters
1313
+ if save:
1314
+ self.save()
1315
+
1316
+ def update_loop_parents(
1317
+ self,
1318
+ index: int,
1319
+ num_added_iters: int,
1320
+ parents: List[str],
1321
+ save: bool = True,
1322
+ ) -> None:
1323
+ self.logger.debug(
1324
+ f"Updating loop {index!r} parents to {parents!r}, and num added iterations "
1325
+ f"to {num_added_iters}."
1326
+ )
1327
+ num_added_iters = [[list(k), v] for k, v in num_added_iters.items()]
1328
+ self._pending.update_loop_num_iters[index] = num_added_iters
1329
+ self._pending.update_loop_parents[index] = parents
1307
1330
  if save:
1308
1331
  self.save()
1309
1332
 
@@ -1349,6 +1372,11 @@ class PersistentStore(ABC):
1349
1372
  pend_num_iters = self._pending.update_loop_num_iters.get(id_)
1350
1373
  if pend_num_iters:
1351
1374
  loop_i["num_added_iterations"] = pend_num_iters
1375
+ # consider pending change to parents:
1376
+ pend_parents = self._pending.update_loop_parents.get(id_)
1377
+ if pend_parents:
1378
+ loop_i["parents"] = pend_parents
1379
+
1352
1380
  loops_new[id_] = loop_i
1353
1381
  return loops_new
1354
1382
 
@@ -54,6 +54,7 @@ class JSONPersistentStore(PersistentStore):
54
54
  commit_tasks=(_meta_res,),
55
55
  commit_loops=(_meta_res,),
56
56
  commit_loop_num_iters=(_meta_res,),
57
+ commit_loop_parents=(_meta_res,),
57
58
  commit_submissions=(_subs_res,),
58
59
  commit_submission_parts=(_subs_res,),
59
60
  commit_js_metadata=(_subs_res,),
@@ -169,6 +170,7 @@ class JSONPersistentStore(PersistentStore):
169
170
  {
170
171
  "num_added_iterations": loop["num_added_iterations"],
171
172
  "iterable_parameters": loop["iterable_parameters"],
173
+ "parents": loop["parents"],
172
174
  }
173
175
  )
174
176
  md["template"]["loops"].append(loop["loop_template"])
@@ -225,6 +227,10 @@ class JSONPersistentStore(PersistentStore):
225
227
  with self.using_resource("metadata", action="update") as md:
226
228
  md["loops"][index]["num_added_iterations"] = num_iters
227
229
 
230
+ def _update_loop_parents(self, index: int, parents: List[str]):
231
+ with self.using_resource("metadata", action="update") as md:
232
+ md["loops"][index]["parents"] = parents
233
+
228
234
  def _append_EARs(self, EARs: List[StoreEAR]):
229
235
  with self.using_resource("metadata", action="update") as md:
230
236
  md["runs"].extend(i.encode(self.ts_fmt) for i in EARs)
@@ -69,8 +69,9 @@ class PendingChanges:
69
69
  self.set_parameters: Dict[int, AnySParameter] = None
70
70
 
71
71
  self.update_param_sources: Dict[int, Dict] = None
72
- self.update_loop_indices: Dict[int, Dict] = None
72
+ self.update_loop_indices: Dict[int, Dict[str, int]] = None
73
73
  self.update_loop_num_iters: Dict[int, int] = None
74
+ self.update_loop_parents: Dict[int, List[str]] = None
74
75
 
75
76
  self.reset(is_init=True) # set up initial data structures
76
77
 
@@ -101,6 +102,7 @@ class PendingChanges:
101
102
  or bool(self.update_param_sources)
102
103
  or bool(self.update_loop_indices)
103
104
  or bool(self.update_loop_num_iters)
105
+ or bool(self.update_loop_parents)
104
106
  )
105
107
 
106
108
  def where_pending(self) -> List[str]:
@@ -158,6 +160,16 @@ class PendingChanges:
158
160
  loop_ids = list(self.add_loops.keys())
159
161
  self.logger.debug(f"commit: adding pending loops with indices {loop_ids!r}")
160
162
  self.store._append_loops(loops)
163
+
164
+ # pending num_added_iters and parents that belong to pending loops are now
165
+ # committed:
166
+ self.update_loop_num_iters = {
167
+ k: v for k, v in self.update_loop_num_iters.items() if k not in loop_ids
168
+ }
169
+ self.update_loop_parents = {
170
+ k: v for k, v in self.update_loop_parents.items() if k not in loop_ids
171
+ }
172
+
161
173
  self.clear_add_loops()
162
174
 
163
175
  @TimeIt.decorator
@@ -415,6 +427,14 @@ class PendingChanges:
415
427
  self.store._update_loop_num_iters(index, num_iters)
416
428
  self.clear_update_loop_num_iters()
417
429
 
430
+ @TimeIt.decorator
431
+ def commit_loop_parents(self) -> None:
432
+ """Make pending update to additional loop parents."""
433
+ for index, parents in self.update_loop_parents.items():
434
+ self.logger.debug(f"commit: updating loop {index!r} parents to {parents!r}.")
435
+ self.store._update_loop_parents(index, parents)
436
+ self.clear_update_loop_parents()
437
+
418
438
  def clear_add_tasks(self):
419
439
  self.add_tasks = {}
420
440
 
@@ -482,11 +502,14 @@ class PendingChanges:
482
502
  self.update_param_sources = {}
483
503
 
484
504
  def clear_update_loop_indices(self):
485
- self.update_loop_indices = {}
505
+ self.update_loop_indices = defaultdict(dict)
486
506
 
487
507
  def clear_update_loop_num_iters(self):
488
508
  self.update_loop_num_iters = {}
489
509
 
510
+ def clear_update_loop_parents(self):
511
+ self.update_loop_parents = {}
512
+
490
513
  def reset(self, is_init=False) -> None:
491
514
  """Clear all pending data and prepare to accept new pending data."""
492
515
 
@@ -526,6 +549,7 @@ class PendingChanges:
526
549
  self.clear_update_param_sources()
527
550
  self.clear_update_loop_indices()
528
551
  self.clear_update_loop_num_iters()
552
+ self.clear_update_loop_parents()
529
553
 
530
554
 
531
555
  @dataclass
@@ -561,6 +585,7 @@ class CommitResourceMap:
561
585
  commit_param_sources: Optional[Tuple[str]] = tuple()
562
586
  commit_loop_indices: Optional[Tuple[str]] = tuple()
563
587
  commit_loop_num_iters: Optional[Tuple[str]] = tuple()
588
+ commit_loop_parents: Optional[Tuple[str]] = tuple()
564
589
 
565
590
  def __post_init__(self):
566
591
  self.groups = self.group_by_resource()
@@ -497,6 +497,7 @@ class ZarrPersistentStore(PersistentStore):
497
497
  {
498
498
  "num_added_iterations": loop["num_added_iterations"],
499
499
  "iterable_parameters": loop["iterable_parameters"],
500
+ "parents": loop["parents"],
500
501
  }
501
502
  )
502
503
  attrs["template"]["loops"].append(loop["loop_template"])
@@ -587,6 +588,10 @@ class ZarrPersistentStore(PersistentStore):
587
588
  with self.using_resource("attrs", action="update") as attrs:
588
589
  attrs["loops"][index]["num_added_iterations"] = num_iters
589
590
 
591
+ def _update_loop_parents(self, index: int, parents: List[str]):
592
+ with self.using_resource("attrs", action="update") as attrs:
593
+ attrs["loops"][index]["parents"] = parents
594
+
590
595
  def _append_EARs(self, EARs: List[ZarrStoreEAR]):
591
596
  arr = self._get_EARs_arr(mode="r+")
592
597
  attrs_orig = arr.attrs.asdict()
@@ -1,4 +1,6 @@
1
+ import sys
1
2
  import pytest
3
+ import requests
2
4
 
3
5
  from hpcflow.app import app as hf
4
6
 
@@ -87,6 +89,14 @@ def test_get_demo_data_manifest(null_config):
87
89
  hf.get_demo_data_files_manifest()
88
90
 
89
91
 
92
+ @pytest.mark.xfail(
93
+ condition=sys.platform == "darwin",
94
+ raises=requests.exceptions.HTTPError,
95
+ reason=(
96
+ "GHA MacOS runners use the same IP address, so we get rate limited when "
97
+ "retrieving demo data from GitHub."
98
+ ),
99
+ )
90
100
  def test_get_demo_data_cache(null_config):
91
101
  hf.clear_demo_data_cache_dir()
92
102
  hf.cache_demo_data_file("text_file.txt")
@@ -0,0 +1,91 @@
1
+ import pytest
2
+ from hpcflow.app import app as hf
3
+ from hpcflow.sdk.core.errors import MissingElementGroup
4
+
5
+
6
+ def test_group_simple(null_config, tmp_path):
7
+ s1 = hf.TaskSchema(
8
+ objective="t1",
9
+ inputs=[hf.SchemaInput("p1")],
10
+ outputs=[hf.SchemaOutput("p2")],
11
+ actions=[
12
+ hf.Action(
13
+ commands=[
14
+ hf.Command(
15
+ "echo $(( <<parameter:p1>> + 1 ))", stdout="<<parameter:p2>>"
16
+ )
17
+ ]
18
+ )
19
+ ],
20
+ )
21
+ s2 = hf.TaskSchema(
22
+ objective="t2",
23
+ inputs=[hf.SchemaInput("p2", group="my_group")],
24
+ outputs=[hf.SchemaOutput("p3")],
25
+ actions=[
26
+ hf.Action(
27
+ commands=[
28
+ hf.Command(
29
+ "echo $(( <<parameter:p2>> + 2 ))", stdout="<<parameter:p3>>"
30
+ )
31
+ ]
32
+ )
33
+ ],
34
+ )
35
+
36
+ t1 = hf.Task(
37
+ schema=s1,
38
+ sequences=[hf.ValueSequence("inputs.p1", values=[1, 2, 3])],
39
+ groups=[hf.ElementGroup(name="my_group")],
40
+ )
41
+ t2 = hf.Task(schema=s2)
42
+ wk = hf.Workflow.from_template_data(
43
+ template_name="test_groups",
44
+ path=tmp_path,
45
+ tasks=[t1, t2],
46
+ )
47
+ assert [task.num_elements for task in wk.tasks] == [3, 1]
48
+ assert len(wk.tasks.t2.elements[0].get_data_idx("inputs.p2")["inputs.p2"]) == 3
49
+
50
+
51
+ def test_group_raise_no_elements(null_config, tmp_path):
52
+ s1 = hf.TaskSchema(
53
+ objective="t1",
54
+ inputs=[hf.SchemaInput("p1")],
55
+ outputs=[hf.SchemaOutput("p2")],
56
+ actions=[
57
+ hf.Action(
58
+ commands=[
59
+ hf.Command(
60
+ "echo $(( <<parameter:p1>> + 1 ))", stdout="<<parameter:p2>>"
61
+ )
62
+ ]
63
+ )
64
+ ],
65
+ )
66
+ s2 = hf.TaskSchema(
67
+ objective="t2",
68
+ inputs=[hf.SchemaInput("p2", group="my_group")],
69
+ outputs=[hf.SchemaOutput("p3")],
70
+ actions=[
71
+ hf.Action(
72
+ commands=[
73
+ hf.Command(
74
+ "echo $(( <<parameter:p2>> + 2 ))", stdout="<<parameter:p3>>"
75
+ )
76
+ ]
77
+ )
78
+ ],
79
+ )
80
+
81
+ t1 = hf.Task(
82
+ schema=s1,
83
+ sequences=[hf.ValueSequence("inputs.p1", values=[1, 2, 3])],
84
+ )
85
+ t2 = hf.Task(schema=s2)
86
+ with pytest.raises(MissingElementGroup):
87
+ hf.Workflow.from_template_data(
88
+ template_name="test_groups",
89
+ path=tmp_path,
90
+ tasks=[t1, t2],
91
+ )
@@ -1,4 +1,6 @@
1
+ import sys
1
2
  import pytest
3
+ import requests
2
4
 
3
5
  from hpcflow.app import app as hf
4
6
  from hpcflow.sdk.core.errors import InputValueDuplicateSequenceAddress
@@ -155,6 +157,14 @@ def test_value_is_dict_check_no_raise_if_sub_parameter(null_config):
155
157
  hf.InputValue("p1c", path="a", value=101)
156
158
 
157
159
 
160
+ @pytest.mark.xfail(
161
+ condition=sys.platform == "darwin",
162
+ raises=requests.exceptions.HTTPError,
163
+ reason=(
164
+ "GHA MacOS runners use the same IP address, so we get rate limited when "
165
+ "retrieving demo data from GitHub."
166
+ ),
167
+ )
158
168
  def test_demo_data_value(null_config):
159
169
  name = "text_file.txt"
160
170
  assert hf.InputValue("p1", value=f"<<demo_data_file:{name}>>").value == str(