hpcflow-new2 0.2.0a169__py3-none-any.whl → 0.2.0a174__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hpcflow/_version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.2.0a169"
1
+ __version__ = "0.2.0a174"
hpcflow/sdk/app.py CHANGED
@@ -19,6 +19,7 @@ from typing import Any, Callable, Dict, List, Optional, Type, Union, Tuple
19
19
  import warnings
20
20
  import zipfile
21
21
  from platformdirs import user_cache_path, user_data_dir
22
+ import requests
22
23
  from reretry import retry
23
24
  import rich
24
25
  from rich.console import Console, Group
@@ -63,6 +64,46 @@ SDK_logger = get_SDK_logger(__name__)
63
64
  DEMO_WK_FORMATS = {".yaml": "yaml", ".yml": "yaml", ".json": "json", ".jsonc": "json"}
64
65
 
65
66
 
67
+ def rate_limit_safe_url_to_fs(app, *args, logger=None, **kwargs):
68
+ """Call fsspec's `url_to_fs` but retry on `requests.exceptions.HTTPError`s
69
+
70
+ References
71
+ ----------
72
+ [1]: https://docs.github.com/en/rest/using-the-rest-api/rate-limits-for-the-rest-api?
73
+ apiVersion=2022-11-28#about-secondary-rate-limits
74
+ """
75
+
76
+ auth = {}
77
+ if app.run_time_info.in_pytest:
78
+ gh_token = os.environ.get("GH_TOKEN")
79
+ if gh_token:
80
+ # using the GitHub actions built in token increases the number of API
81
+ # requests allowed per hour to 1000 [1]. fsspec requires "username" to be
82
+ # set if using "token":
83
+ auth = {"username": "", "token": gh_token}
84
+ logger.info(
85
+ "calling fsspec's `url_to_fs` with a token from the env variable "
86
+ "`GH_TOKEN`."
87
+ )
88
+
89
+ # GitHub actions testing is potentially highly concurrent, with multiple
90
+ # Python versions and OSes being tested at the same time; so we might hit
91
+ # GitHub's secondary rate limit:
92
+ @retry(
93
+ requests.exceptions.HTTPError,
94
+ tries=3,
95
+ delay=5,
96
+ backoff=1.5,
97
+ jitter=(0, 20),
98
+ logger=logger,
99
+ )
100
+ def _inner(*args, **kwargs):
101
+ kwargs.update(auth)
102
+ return url_to_fs(*args, **kwargs)
103
+
104
+ return _inner(*args, **kwargs)
105
+
106
+
66
107
  def __getattr__(name):
67
108
  """Allow access to core classes and API functions (useful for type annotations)."""
68
109
  try:
@@ -2278,7 +2319,11 @@ class BaseApp(metaclass=Singleton):
2278
2319
  f"`demo_data_manifest_file`: "
2279
2320
  f"{self.config.demo_data_manifest_file!r}."
2280
2321
  )
2281
- fs, url_path = url_to_fs(str(self.config.demo_data_manifest_file))
2322
+ fs, url_path = rate_limit_safe_url_to_fs(
2323
+ self,
2324
+ str(self.config.demo_data_manifest_file),
2325
+ logger=self.logger,
2326
+ )
2282
2327
  with fs.open(url_path) as fh:
2283
2328
  manifest = json.load(fh)
2284
2329
  else:
@@ -2324,7 +2369,11 @@ class BaseApp(metaclass=Singleton):
2324
2369
  """
2325
2370
 
2326
2371
  def _retrieve_source_path_from_config(src_fn):
2327
- fs, url_path = url_to_fs(self.config.demo_data_dir)
2372
+ fs, url_path = rate_limit_safe_url_to_fs(
2373
+ self,
2374
+ self.config.demo_data_dir,
2375
+ logger=self.logger,
2376
+ )
2328
2377
  if isinstance(fs, LocalFileSystem):
2329
2378
  out = url_path
2330
2379
  delete = False
@@ -171,6 +171,10 @@ class LoopAlreadyExistsError(Exception):
171
171
  pass
172
172
 
173
173
 
174
+ class LoopTaskSubsetError(ValueError):
175
+ pass
176
+
177
+
174
178
  class SchedulerVersionsFailure(RuntimeError):
175
179
  """We couldn't get the scheduler and or shell versions."""
176
180
 
@@ -423,3 +427,7 @@ class UnknownEnvironmentPresetError(ValueError):
423
427
 
424
428
  class MultipleEnvironmentsError(ValueError):
425
429
  pass
430
+
431
+
432
+ class MissingElementGroup(ValueError):
433
+ pass
hpcflow/sdk/core/loop.py CHANGED
@@ -4,6 +4,7 @@ import copy
4
4
  from typing import Dict, List, Optional, Tuple, Union
5
5
 
6
6
  from hpcflow.sdk import app
7
+ from hpcflow.sdk.core.errors import LoopTaskSubsetError
7
8
  from hpcflow.sdk.core.json_like import ChildObjectSpec, JSONLike
8
9
  from hpcflow.sdk.core.parameters import InputSourceType
9
10
  from hpcflow.sdk.core.task import WorkflowTask
@@ -175,17 +176,25 @@ class WorkflowLoop:
175
176
  index: int,
176
177
  workflow: app.Workflow,
177
178
  template: app.Loop,
178
- num_added_iterations: int,
179
+ num_added_iterations: Dict[Tuple[int], int],
179
180
  iterable_parameters: Dict[int : List[int, List[int]]],
181
+ parents: List[str],
180
182
  ):
181
183
  self._index = index
182
184
  self._workflow = workflow
183
185
  self._template = template
184
186
  self._num_added_iterations = num_added_iterations
185
187
  self._iterable_parameters = iterable_parameters
188
+ self._parents = parents
186
189
 
187
- # incremented when a new loop iteration is added, reset on dump to disk:
188
- self._pending_num_added_iterations = 0
190
+ # appended to on adding a empty loop to the workflow that's a parent of this loop,
191
+ # reset and added to `self._parents` on dump to disk:
192
+ self._pending_parents = []
193
+
194
+ # used for `num_added_iterations` when a new loop iteration is added, or when
195
+ # parents are append to; reset to None on dump to disk. Each key is a tuple of
196
+ # parent loop indices and each value is the number of pending new iterations:
197
+ self._pending_num_added_iterations = None
189
198
 
190
199
  self._validate()
191
200
 
@@ -194,7 +203,20 @@ class WorkflowLoop:
194
203
  task_indices = self.task_indices
195
204
  task_min, task_max = task_indices[0], task_indices[-1]
196
205
  if task_indices != tuple(range(task_min, task_max + 1)):
197
- raise ValueError(f"Loop task subset must be a contiguous range")
206
+ raise LoopTaskSubsetError(
207
+ f"Loop {self.name!r}: task subset must be an ascending contiguous range, "
208
+ f"but specified task indices were: {self.task_indices!r}."
209
+ )
210
+
211
+ for task in self.downstream_tasks:
212
+ for param in self.iterable_parameters:
213
+ if param in task.template.all_schema_input_types:
214
+ raise NotImplementedError(
215
+ f"Downstream task {task.unique_name!r} of loop {self.name!r} "
216
+ f"has as one of its input parameters this loop's iterable "
217
+ f"parameter {param!r}. This parameter cannot be sourced "
218
+ f"correctly."
219
+ )
198
220
 
199
221
  def __repr__(self) -> str:
200
222
  return (
@@ -202,12 +224,57 @@ class WorkflowLoop:
202
224
  f"num_added_iterations={self.num_added_iterations!r})"
203
225
  )
204
226
 
227
+ @property
228
+ def num_added_iterations(self):
229
+
230
+ if self._pending_num_added_iterations:
231
+ return self._pending_num_added_iterations
232
+ else:
233
+ return self._num_added_iterations
234
+
235
+ def _initialise_pending_added_iters(self, added_iters_key):
236
+ if not self._pending_num_added_iterations:
237
+ self._pending_num_added_iterations = copy.deepcopy(self._num_added_iterations)
238
+
239
+ if added_iters_key not in self._pending_num_added_iterations:
240
+ self._pending_num_added_iterations[added_iters_key] = 1
241
+
242
+ def _increment_pending_added_iters(self, added_iters_key):
243
+ if not self._pending_num_added_iterations:
244
+ self._pending_num_added_iterations = copy.deepcopy(self._num_added_iterations)
245
+
246
+ self._pending_num_added_iterations[added_iters_key] += 1
247
+
248
+ def _update_parents(self, parent: app.WorkflowLoop):
249
+ self._pending_parents.append(parent.name)
250
+
251
+ if not self._pending_num_added_iterations:
252
+ self._pending_num_added_iterations = copy.deepcopy(self._num_added_iterations)
253
+
254
+ self._pending_num_added_iterations = {
255
+ tuple(list(k) + [0]): v for k, v in self._pending_num_added_iterations.items()
256
+ }
257
+
258
+ self.workflow._store.update_loop_parents(
259
+ index=self.index,
260
+ num_added_iters=self.num_added_iterations,
261
+ parents=self.parents,
262
+ )
263
+
205
264
  def _reset_pending_num_added_iters(self):
206
- self._pending_num_added_iterations = 0
265
+ self._pending_num_added_iterations = None
207
266
 
208
267
  def _accept_pending_num_added_iters(self):
209
- self._num_added_iterations = self.num_added_iterations
210
- self._reset_pending_num_added_iters()
268
+ if self._pending_num_added_iterations:
269
+ self._num_added_iterations = copy.deepcopy(self._pending_num_added_iterations)
270
+ self._reset_pending_num_added_iters()
271
+
272
+ def _reset_pending_parents(self):
273
+ self._pending_parents = []
274
+
275
+ def _accept_pending_parents(self):
276
+ self._parents += self._pending_parents
277
+ self._reset_pending_parents()
211
278
 
212
279
  @property
213
280
  def index(self):
@@ -234,6 +301,10 @@ class WorkflowLoop:
234
301
  def template(self):
235
302
  return self._template
236
303
 
304
+ @property
305
+ def parents(self) -> List[str]:
306
+ return self._parents + self._pending_parents
307
+
237
308
  @property
238
309
  def name(self):
239
310
  return self.template.name
@@ -247,8 +318,14 @@ class WorkflowLoop:
247
318
  return self.template.num_iterations
248
319
 
249
320
  @property
250
- def num_added_iterations(self):
251
- return self._num_added_iterations + self._pending_num_added_iterations
321
+ def downstream_tasks(self) -> List[app.WorkflowLoop]:
322
+ """Return tasks that are not part of the loop, and downstream from this loop."""
323
+ return self.workflow.tasks[self.task_objects[-1].index + 1 :]
324
+
325
+ @property
326
+ def upstream_tasks(self) -> List[app.WorkflowLoop]:
327
+ """Return tasks that are not part of the loop, and upstream from this loop."""
328
+ return self.workflow.tasks[: self.task_objects[0].index]
252
329
 
253
330
  @staticmethod
254
331
  def _find_iterable_parameters(loop_template: app.Loop):
@@ -263,8 +340,6 @@ class WorkflowLoop:
263
340
  all_outputs_idx[typ] = []
264
341
  all_outputs_idx[typ].append(task.insert_ID)
265
342
 
266
- all_inputs_first_idx, all_outputs_idx
267
-
268
343
  iterable_params = {}
269
344
  for typ, first_idx in all_inputs_first_idx.items():
270
345
  if typ in all_outputs_idx and first_idx <= all_outputs_idx[typ][0]:
@@ -280,38 +355,60 @@ class WorkflowLoop:
280
355
  return iterable_params
281
356
 
282
357
  @classmethod
283
- def new_empty_loop(cls, index: int, workflow: app.Workflow, template: app.Loop):
358
+ def new_empty_loop(
359
+ cls,
360
+ index: int,
361
+ workflow: app.Workflow,
362
+ template: app.Loop,
363
+ iterations: List[app.ElementIteration],
364
+ ) -> Tuple[app.WorkflowLoop, List[Dict[str, int]]]:
365
+ parent_loops = cls._get_parent_loops(index, workflow, template)
366
+ parent_names = [i.name for i in parent_loops]
367
+ num_added_iters = {}
368
+ for iter_i in iterations:
369
+ num_added_iters[tuple([iter_i.loop_idx[j] for j in parent_names])] = 1
370
+
284
371
  obj = cls(
285
372
  index=index,
286
373
  workflow=workflow,
287
374
  template=template,
288
- num_added_iterations=1,
375
+ num_added_iterations=num_added_iters,
289
376
  iterable_parameters=cls._find_iterable_parameters(template),
377
+ parents=parent_names,
290
378
  )
291
379
  return obj
292
380
 
293
- def get_parent_loops(self) -> List[app.WorkflowLoop]:
294
- """Get loops whose task subset is a superset of this loop's task subset. If two
295
- loops have identical task subsets, the first loop in the workflow loop index is
296
- considered the parent."""
381
+ @classmethod
382
+ def _get_parent_loops(
383
+ cls,
384
+ index: int,
385
+ workflow: app.Workflow,
386
+ template: app.Loop,
387
+ ) -> List[app.WorkflowLoop]:
297
388
  parents = []
298
389
  passed_self = False
299
- self_tasks = set(self.task_insert_IDs)
300
- for loop_i in self.workflow.loops:
301
- if loop_i.index == self.index:
390
+ self_tasks = set(template.task_insert_IDs)
391
+ for loop_i in workflow.loops:
392
+ if loop_i.index == index:
302
393
  passed_self = True
303
394
  continue
304
395
  other_tasks = set(loop_i.task_insert_IDs)
305
396
  if self_tasks.issubset(other_tasks):
306
- if (self_tasks == other_tasks) and passed_self:
397
+ if (self_tasks == other_tasks) and not passed_self:
307
398
  continue
308
399
  parents.append(loop_i)
309
400
  return parents
310
401
 
402
+ def get_parent_loops(self) -> List[app.WorkflowLoop]:
403
+ """Get loops whose task subset is a superset of this loop's task subset. If two
404
+ loops have identical task subsets, the first loop in the workflow loop list is
405
+ considered the child."""
406
+ return self._get_parent_loops(self.index, self.workflow, self.template)
407
+
311
408
  def get_child_loops(self) -> List[app.WorkflowLoop]:
312
409
  """Get loops whose task subset is a subset of this loop's task subset. If two
313
- loops have identical task subsets, the first loop in the workflow loop index is
314
- considered the parent."""
410
+ loops have identical task subsets, the first loop in the workflow loop list is
411
+ considered the child."""
315
412
  children = []
316
413
  passed_self = False
317
414
  self_tasks = set(self.task_insert_IDs)
@@ -321,23 +418,24 @@ class WorkflowLoop:
321
418
  continue
322
419
  other_tasks = set(loop_i.task_insert_IDs)
323
420
  if self_tasks.issuperset(other_tasks):
324
- if (self_tasks == other_tasks) and not passed_self:
421
+ if (self_tasks == other_tasks) and passed_self:
325
422
  continue
326
423
  children.append(loop_i)
424
+
425
+ # order by depth, so direct child is first:
426
+ children = sorted(children, key=lambda x: len(next(iter(x.num_added_iterations))))
327
427
  return children
328
428
 
329
429
  def add_iteration(self, parent_loop_indices=None):
330
- parent_loop_indices = parent_loop_indices or {}
331
- cur_loop_idx = self.num_added_iterations - 1
332
430
  parent_loops = self.get_parent_loops()
333
431
  child_loops = self.get_child_loops()
432
+ child_loop_names = [i.name for i in child_loops]
433
+ parent_loop_indices = parent_loop_indices or {}
434
+ if parent_loops and not parent_loop_indices:
435
+ parent_loop_indices = {i.name: 0 for i in parent_loops}
334
436
 
335
- for parent_loop in parent_loops:
336
- if parent_loop.name not in parent_loop_indices:
337
- raise ValueError(
338
- f"Parent loop {parent_loop.name!r} must be specified in "
339
- f"`parent_loop_indices`."
340
- )
437
+ iters_key = tuple([parent_loop_indices[k] for k in self.parents])
438
+ cur_loop_idx = self.num_added_iterations[iters_key] - 1
341
439
  all_new_data_idx = {} # keys are (task.insert_ID and element.index)
342
440
 
343
441
  for task in self.task_objects:
@@ -346,6 +444,16 @@ class WorkflowLoop:
346
444
  element = task.elements[elem_idx]
347
445
  inp_statuses = task.template.get_input_statuses(element.element_set)
348
446
  new_data_idx = {}
447
+ existing_inners = []
448
+ for iter_i in element.iterations:
449
+ if iter_i.loop_idx[self.name] == cur_loop_idx:
450
+ existing_inner_i = {
451
+ k: v
452
+ for k, v in iter_i.loop_idx.items()
453
+ if k in child_loop_names
454
+ }
455
+ if existing_inner_i:
456
+ existing_inners.append(existing_inner_i)
349
457
 
350
458
  # copy resources from zeroth iteration:
351
459
  for key, val in element.iterations[0].get_data_idx().items():
@@ -367,17 +475,29 @@ class WorkflowLoop:
367
475
  # parametrised:
368
476
  if task.insert_ID == iter_dat["output_tasks"][-1]:
369
477
  src_elem = element
478
+ grouped_elems = None
370
479
  else:
371
480
  src_elems = element.get_dependent_elements_recursively(
372
481
  task_insert_ID=iter_dat["output_tasks"][-1]
373
482
  )
374
- if len(src_elems) > 1:
483
+ # consider groups
484
+ inp_group_name = inp.single_labelled_data.get("group")
485
+ grouped_elems = []
486
+ for i in src_elems:
487
+ i_in_group = any(
488
+ j.name == inp_group_name for j in i.element_set.groups
489
+ )
490
+ if i_in_group:
491
+ grouped_elems.append(i)
492
+
493
+ if not grouped_elems and len(src_elems) > 1:
375
494
  raise NotImplementedError(
376
495
  f"Multiple elements found in the iterable parameter {inp!r}'s"
377
496
  f" latest output task (insert ID: "
378
497
  f"{iter_dat['output_tasks'][-1]}) that can be used to "
379
- f"parametrise the next iteration."
498
+ f"parametrise the next iteration: {src_elems!r}."
380
499
  )
500
+
381
501
  elif not src_elems:
382
502
  # TODO: maybe OK?
383
503
  raise NotImplementedError(
@@ -386,14 +506,26 @@ class WorkflowLoop:
386
506
  f"{iter_dat['output_tasks'][-1]}) that can be used to "
387
507
  f"parametrise the next iteration."
388
508
  )
389
- src_elem = src_elems[0]
390
509
 
391
- child_loop_max_iters = {
392
- i.name: i.num_added_iterations - 1 for i in child_loops
393
- }
510
+ else:
511
+ src_elem = src_elems[0]
512
+
513
+ child_loop_max_iters = {}
394
514
  parent_loop_same_iters = {
395
515
  i.name: parent_loop_indices[i.name] for i in parent_loops
396
516
  }
517
+ child_iter_parents = {
518
+ **parent_loop_same_iters,
519
+ self.name: cur_loop_idx,
520
+ }
521
+ for i in child_loops:
522
+ i_num_iters = i.num_added_iterations[
523
+ tuple(child_iter_parents[j] for j in i.parents)
524
+ ]
525
+ i_max = i_num_iters - 1
526
+ child_iter_parents[i.name] = i_max
527
+ child_loop_max_iters[i.name] = i_max
528
+
397
529
  source_iter_loop_idx = {
398
530
  **child_loop_max_iters,
399
531
  **parent_loop_same_iters,
@@ -403,12 +535,32 @@ class WorkflowLoop:
403
535
  # identify the ElementIteration from which this input should be
404
536
  # parametrised:
405
537
  source_iter = None
406
- for iter_i in src_elem.iterations:
407
- if iter_i.loop_idx == source_iter_loop_idx:
408
- source_iter = iter_i
409
- break
538
+ if grouped_elems:
539
+ source_iter = []
540
+ for src_elem in grouped_elems:
541
+ for iter_i in src_elem.iterations:
542
+ if iter_i.loop_idx == source_iter_loop_idx:
543
+ source_iter.append(iter_i)
544
+ break
545
+ else:
546
+ for iter_i in src_elem.iterations:
547
+ if iter_i.loop_idx == source_iter_loop_idx:
548
+ source_iter = iter_i
549
+ break
550
+
551
+ if not source_iter:
552
+ raise RuntimeError(
553
+ f"Could not find a source iteration with loop_idx: "
554
+ f"{source_iter_loop_idx!r}."
555
+ )
410
556
 
411
- inp_dat_idx = source_iter.get_data_idx()[f"outputs.{inp.typ}"]
557
+ if grouped_elems:
558
+ inp_dat_idx = [
559
+ i.get_data_idx()[f"outputs.{inp.typ}"]
560
+ for i in source_iter
561
+ ]
562
+ else:
563
+ inp_dat_idx = source_iter.get_data_idx()[f"outputs.{inp.typ}"]
412
564
  new_data_idx[f"inputs.{inp.typ}"] = inp_dat_idx
413
565
 
414
566
  else:
@@ -467,11 +619,16 @@ class WorkflowLoop:
467
619
  task_insert_ID=task.insert_ID
468
620
  )
469
621
  )
622
+ # filter src_elems_i for matching element IDs:
623
+ src_elems_i = [
624
+ i for i in src_elems_i if i.id_ == element.id_
625
+ ]
470
626
  if (
471
627
  len(src_elems_i) == 1
472
628
  and src_elems_i[0].id_ == element.id_
473
629
  ):
474
630
  new_sources.append((tiID, e_idx))
631
+
475
632
  if is_group:
476
633
  inp_dat_idx = [
477
634
  all_new_data_idx[i][prev_dat_idx_key]
@@ -515,21 +672,50 @@ class WorkflowLoop:
515
672
  i for i in new_data_idx.keys() if len(i.split(".")) == 2
516
673
  )
517
674
  all_new_data_idx[(task.insert_ID, element.index)] = new_data_idx
675
+
676
+ new_loop_idx = {
677
+ **parent_loop_indices,
678
+ self.name: cur_loop_idx + 1,
679
+ **{
680
+ child.name: 0
681
+ for child in child_loops
682
+ if task.insert_ID in child.task_insert_IDs
683
+ },
684
+ }
685
+ # increment num_added_iterations on child loop for this parent loop index:
686
+ for i in child_loops:
687
+ added_iters_key_chd = tuple([new_loop_idx[j] for j in i.parents])
688
+ i._initialise_pending_added_iters(added_iters_key_chd)
689
+
518
690
  iter_ID_i = self.workflow._store.add_element_iteration(
519
691
  element_ID=element.id_,
520
692
  data_idx=new_data_idx,
521
693
  schema_parameters=list(schema_params),
522
- loop_idx={**parent_loop_indices, self.name: cur_loop_idx + 1},
694
+ loop_idx=new_loop_idx,
523
695
  )
524
696
 
525
697
  task.initialise_EARs()
526
698
 
527
- self._pending_num_added_iterations += 1
699
+ added_iters_key = tuple(parent_loop_indices[k] for k in self.parents)
700
+ self._increment_pending_added_iters(added_iters_key)
528
701
  self.workflow._store.update_loop_num_iters(
529
702
  index=self.index,
530
- num_iters=self.num_added_iterations,
703
+ num_added_iters=self.num_added_iterations,
531
704
  )
532
705
 
706
+ # add iterations to fixed-number-iteration children only:
707
+ for child in child_loops[::-1]:
708
+ if child.num_iterations is not None:
709
+ for _ in range(child.num_iterations - 1):
710
+ par_idx = {k: 0 for k in child.parents}
711
+ child.add_iteration(
712
+ parent_loop_indices={
713
+ **par_idx,
714
+ **parent_loop_indices,
715
+ self.name: cur_loop_idx + 1,
716
+ }
717
+ )
718
+
533
719
  def test_termination(self, element_iter):
534
720
  """Check if a loop should terminate, given the specified completed element
535
721
  iteration."""
hpcflow/sdk/core/task.py CHANGED
@@ -19,6 +19,7 @@ from .errors import (
19
19
  InapplicableInputSourceElementIters,
20
20
  MalformedNestingOrderPath,
21
21
  MayNeedObjectError,
22
+ MissingElementGroup,
22
23
  MissingInputs,
23
24
  NoAvailableElementSetsError,
24
25
  NoCoincidentInputSources,
@@ -941,6 +942,14 @@ class Task(JSONLike):
941
942
  # directly, so consider only source task element sets that
942
943
  # provide the input locally:
943
944
  es_idx = src_task.get_param_provided_element_sets(labelled_path)
945
+ for es_i in src_task.element_sets:
946
+ # add any element set that has task sources for this parameter
947
+ for inp_src_i in es_i.input_sources.get(labelled_path, []):
948
+ if inp_src_i.source_type is InputSourceType.TASK:
949
+ if es_i.index not in es_idx:
950
+ es_idx.append(es_i.index)
951
+ break
952
+
944
953
  else:
945
954
  # outputs are always available, so consider all source task
946
955
  # element sets:
@@ -1604,6 +1613,13 @@ class WorkflowTask:
1604
1613
 
1605
1614
  # TODO: this only goes to one level of dependency
1606
1615
 
1616
+ if not group_dat_idx:
1617
+ raise MissingElementGroup(
1618
+ f"Adding elements to task {self.unique_name!r}: no "
1619
+ f"element group named {inp_group_name!r} found for input "
1620
+ f"{labelled_path_i!r}."
1621
+ )
1622
+
1607
1623
  grp_idx = [group_dat_idx] # TODO: generalise to multiple groups
1608
1624
 
1609
1625
  if self.app.InputSource.local() in sources_i:
@@ -2332,6 +2348,7 @@ class WorkflowTask:
2332
2348
  resources=elem_prop.element_set.resources[:],
2333
2349
  repeats=elem_prop.element_set.repeats,
2334
2350
  nesting_order=elem_prop.nesting_order,
2351
+ input_sources=elem_prop.input_sources,
2335
2352
  sourceable_elem_iters=src_elem_iters,
2336
2353
  )
2337
2354
 
@@ -3027,6 +3044,7 @@ class ElementPropagation:
3027
3044
 
3028
3045
  task: app.Task
3029
3046
  nesting_order: Optional[Dict] = None
3047
+ input_sources: Optional[Dict] = None
3030
3048
 
3031
3049
  @property
3032
3050
  def element_set(self):
@@ -3037,6 +3055,7 @@ class ElementPropagation:
3037
3055
  return self.__class__(
3038
3056
  task=self.task,
3039
3057
  nesting_order=copy.deepcopy(self.nesting_order, memo),
3058
+ input_sources=copy.deepcopy(self.input_sources, memo),
3040
3059
  )
3041
3060
 
3042
3061
  @classmethod
@@ -43,7 +43,6 @@ def make_schemas(ins_outs, ret_list=False):
43
43
  output_file_parsers=out_file_parsers,
44
44
  environments=[hf.ActionEnvironment("env_1")],
45
45
  )
46
- print(f"{ins_i=}")
47
46
  out.append(
48
47
  hf.TaskSchema(
49
48
  objective=obj,
@@ -96,11 +95,15 @@ def make_tasks(
96
95
  local_sequences=None,
97
96
  local_resources=None,
98
97
  nesting_orders=None,
98
+ input_sources=None,
99
+ groups=None,
99
100
  ):
100
101
  local_inputs = local_inputs or {}
101
102
  local_sequences = local_sequences or {}
102
103
  local_resources = local_resources or {}
103
104
  nesting_orders = nesting_orders or {}
105
+ input_sources = input_sources or {}
106
+ groups = groups or {}
104
107
  schemas = make_schemas(schemas_spec, ret_list=True)
105
108
  tasks = []
106
109
  for s_idx, s in enumerate(schemas):
@@ -117,13 +120,14 @@ def make_tasks(
117
120
  for i in local_sequences.get(s_idx, [])
118
121
  ]
119
122
  res = {k: v for k, v in local_resources.get(s_idx, {}).items()}
120
-
121
123
  task = hf.Task(
122
124
  schema=s,
123
125
  inputs=inputs,
124
126
  sequences=seqs,
125
127
  resources=res,
126
128
  nesting_order=nesting_orders.get(s_idx, {}),
129
+ input_sources=input_sources.get(s_idx, None),
130
+ groups=groups.get(s_idx),
127
131
  )
128
132
  tasks.append(task)
129
133
  return tasks
@@ -136,7 +140,10 @@ def make_workflow(
136
140
  local_sequences=None,
137
141
  local_resources=None,
138
142
  nesting_orders=None,
143
+ input_sources=None,
139
144
  resources=None,
145
+ loops=None,
146
+ groups=None,
140
147
  name="w1",
141
148
  overwrite=False,
142
149
  store="zarr",
@@ -147,9 +154,18 @@ def make_workflow(
147
154
  local_sequences=local_sequences,
148
155
  local_resources=local_resources,
149
156
  nesting_orders=nesting_orders,
157
+ input_sources=input_sources,
158
+ groups=groups,
150
159
  )
160
+ template = {
161
+ "name": name,
162
+ "tasks": tasks,
163
+ "resources": resources,
164
+ }
165
+ if loops:
166
+ template["loops"] = loops
151
167
  wk = hf.Workflow.from_template(
152
- hf.WorkflowTemplate(name=name, tasks=tasks, resources=resources),
168
+ hf.WorkflowTemplate(**template),
153
169
  path=path,
154
170
  name=name,
155
171
  overwrite=overwrite,