easylink 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
easylink/step.py CHANGED
@@ -54,21 +54,18 @@ class Step:
54
54
  Parameters
55
55
  ----------
56
56
  step_name
57
- The name of the pipeline step in the ``PipelineSchema``.
57
+ The name of the pipeline step in the ``PipelineSchema``. It must also match
58
+ the key in the implementation metadata file to be used to run this ``Step``.
58
59
  name
59
- The name of this step *node*. This can be different from the ``step_name``
60
- due to the need for disambiguation during the process of unrolling loops,
61
- etc. For example, if step 1 is looped multiple times, each node would
62
- have a ``step_name`` of, perhaps, "step_1" but unique ``names``
63
- ("step_1_loop_1", etc).
60
+ The name of this ``Step's`` node in its :class:`easylink.graph_components.StepGraph`.
61
+ This can be different from the ``step_name`` due to the need for disambiguation
62
+ during the process of flattening the ``Stepgraph``, e.g. unrolling loops, etc.
63
+ For example, if step 1 is looped multiple times, each node would have a
64
+ ``step_name`` of, perhaps, "step_1" but unique ``names`` ("step_1_loop_1", etc).
64
65
  input_slots
65
66
  All required :class:`InputSlots<easylink.graph_components.InputSlot>`.
66
67
  output_slots
67
68
  All required :class:`OutputSlots<easylink.graph_components.OutputSlot>`.
68
- nodes
69
- All sub-nodes (i.e. sub-``Steps``) of this particular ``Step`` instance.
70
- edges
71
- The :class:`~easylink.graph_components.EdgeParams` of this ``Step``.
72
69
  input_slot_mappings
73
70
  The :class:`InputSlotMapping<easylink.graph_components.InputSlotMapping>` of this ``Step``.
74
71
  output_slot_mappings
@@ -89,31 +86,22 @@ class Step:
89
86
  name: str | None = None,
90
87
  input_slots: Iterable[InputSlot] = (),
91
88
  output_slots: Iterable[OutputSlot] = (),
92
- nodes: Iterable[Step] = (),
93
- edges: Iterable[EdgeParams] = (),
94
89
  input_slot_mappings: Iterable[InputSlotMapping] = (),
95
90
  output_slot_mappings: Iterable[OutputSlotMapping] = (),
96
91
  ) -> None:
97
92
  self.step_name = step_name
98
- """The name of the high-level pipeline step."""
93
+ """The name of the pipeline step in the ``PipelineSchema``. It must also match
94
+ the key in the implementation metadata file to be used to run this ``Step``."""
99
95
  self.name = name if name else step_name
100
- """The name of ``Step's`` node in its :class:`~easylink.graph_components.StepGraph`.
101
- This is a more descriptive name than the ``step_name``, e.g. if "step 1"
102
- is looped multiple times. If not provided, defaults to the :attr:`step_name`."""
96
+ """The name of this ``Step's`` node in its :class:`easylink.graph_components.StepGraph`.
97
+ This can be different from the ``step_name`` due to the need for disambiguation
98
+ during the process of flattening the ``Stepgraph``, e.g. unrolling loops, etc.
99
+ For example, if step 1 is looped multiple times, each node would have a
100
+ ``step_name`` of, perhaps, "step_1" but unique ``names`` ("step_1_loop_1", etc)."""
103
101
  self.input_slots = {slot.name: slot for slot in input_slots}
104
102
  """A mapping of ``InputSlot`` names to their instances."""
105
103
  self.output_slots = {slot.name: slot for slot in output_slots}
106
104
  """A mapping of ``OutputSlot`` names to their instances."""
107
- self.nodes = nodes
108
- """All sub-nodes (i.e. sub-``Steps``) of this particular ``Step`` instance."""
109
- for node in self.nodes:
110
- node.set_parent_step(self)
111
- self.edges = edges
112
- """The :class:`~easylink.graph_components.EdgeParams` of this ``Step``."""
113
- self.step_graph = self._get_step_graph(nodes, edges)
114
- """The :class:`~easylink.graph_components.StepGraph` of this ``Step``, i.e.
115
- the directed acyclic graph (DAG) of sub-nodes and their edges that make
116
- up this ``Step`` instance."""
117
105
  self.slot_mappings = {
118
106
  "input": list(input_slot_mappings),
119
107
  "output": list(output_slot_mappings),
@@ -164,7 +152,7 @@ class Step:
164
152
  """
165
153
  step = self
166
154
  implementation_name = (
167
- self.configuration_state.pipeline_config[COMBINED_IMPLEMENTATION_KEY]
155
+ self.configuration_state.step_config[COMBINED_IMPLEMENTATION_KEY]
168
156
  if self.configuration_state.is_combined
169
157
  else self.configuration_state.implementation_config.name
170
158
  )
@@ -203,7 +191,8 @@ class Step:
203
191
  Parameters
204
192
  ----------
205
193
  step_config
206
- The configuration of this ``Step``.
194
+ The internal configuration of this ``Step``, i.e. it should not include
195
+ the ``Step's`` name.
207
196
  combined_implementations
208
197
  The configuration for any implementations to be combined.
209
198
  input_data_config
@@ -216,9 +205,6 @@ class Step:
216
205
 
217
206
  Notes
218
207
  -----
219
- A ``Step`` can be in either a "leaf" or a "non-leaf" configuration state
220
- and the validation process is different for each.
221
-
222
208
  If the ``Step`` does not validate (i.e. errors are found and the returned
223
209
  dictionary is non-empty), the tool will exit and the pipeline will not run.
224
210
 
@@ -227,14 +213,42 @@ class Step:
227
213
  all issues in one pass. In these cases, new errors may be found after the
228
214
  initial ones are handled.
229
215
  """
230
- if len(self.step_graph.nodes) == 0:
231
- return self._validate_leaf(step_config, combined_implementations)
232
- elif self.config_key in step_config:
233
- return self._validate_nonleaf(
234
- step_config[self.config_key], combined_implementations, input_data_config
235
- )
216
+ errors = {}
217
+ metadata = load_yaml(paths.IMPLEMENTATION_METADATA)
218
+ error_key = f"step {self.name}"
219
+ if (
220
+ "implementation" not in step_config
221
+ and COMBINED_IMPLEMENTATION_KEY not in step_config
222
+ ):
223
+ errors[error_key] = [
224
+ "The step configuration does not contain an 'implementation' key "
225
+ "or a reference to a combined implementation."
226
+ ]
227
+ elif (
228
+ COMBINED_IMPLEMENTATION_KEY in step_config
229
+ and not step_config[COMBINED_IMPLEMENTATION_KEY] in combined_implementations
230
+ ):
231
+ errors[error_key] = [
232
+ "The step refers to a combined implementation but "
233
+ f"{step_config[COMBINED_IMPLEMENTATION_KEY]} is not a valid combined "
234
+ "implementation."
235
+ ]
236
236
  else:
237
- return self._validate_leaf(step_config, combined_implementations)
237
+ implementation_config = (
238
+ step_config["implementation"]
239
+ if "implementation" in step_config
240
+ else combined_implementations[step_config[COMBINED_IMPLEMENTATION_KEY]]
241
+ )
242
+ if not "name" in implementation_config:
243
+ errors[error_key] = [
244
+ "The implementation configuration does not contain a 'name' key."
245
+ ]
246
+ elif not implementation_config["name"] in metadata:
247
+ errors[error_key] = [
248
+ f"Implementation '{implementation_config['name']}' is not supported. "
249
+ f"Supported implementations are: {list(metadata.keys())}."
250
+ ]
251
+ return errors
238
252
 
239
253
  def get_implementation_graph(self) -> ImplementationGraph:
240
254
  """Gets this ``Step's`` :class:`~easylink.graph_components.ImplementationGraph`.
@@ -276,42 +290,25 @@ class Step:
276
290
 
277
291
  def set_configuration_state(
278
292
  self,
279
- parent_config: LayeredConfigTree,
293
+ step_config: LayeredConfigTree,
280
294
  combined_implementations: LayeredConfigTree,
281
295
  input_data_config: LayeredConfigTree,
282
296
  ) -> None:
283
- """Sets the configuration state for this ``Step``.
284
-
285
- The so-called 'configuration state' for a given ``Step`` is backed up by
286
- a :class:`ConfigurationState` class and is assigned to its :attr:`_configuration_state`
287
- attribute. There are two possible ``ConfigurationStates``:
288
- :class:`LeafConfigurationState` and :class:`NonLeafConfigurationState`.
289
-
290
- This method sets the configuration state of this ``Step`` based on whether
291
- or not a :attr:`config_key` is set *and exists is the ``Step's`` configuration*
292
- (i.e. its portion of the user-suppled pipeline specification
293
- file); any required deviation from this behavior requires special
294
- handling.
297
+ """Sets the configuration state to 'leaf'.
295
298
 
296
299
  Parameters
297
300
  ----------
298
- parent_config
299
- The configuration of the parent ``Step``.
301
+ step_config
302
+ The internal configuration of this ``Step``, i.e. it should not include
303
+ the ``Step's`` name.
300
304
  combined_implementations
301
305
  The configuration for any implementations to be combined.
302
306
  input_data_config
303
307
  The input data configuration for the entire pipeline.
304
308
  """
305
- step_config = parent_config[self.name]
306
- sub_config = self._get_config(step_config)
307
- if self.config_key is not None and self.config_key in step_config:
308
- self._configuration_state = NonLeafConfigurationState(
309
- self, sub_config, combined_implementations, input_data_config
310
- )
311
- else:
312
- self._configuration_state = LeafConfigurationState(
313
- self, sub_config, combined_implementations, input_data_config
314
- )
309
+ self._configuration_state = LeafConfigurationState(
310
+ self, step_config, combined_implementations, input_data_config
311
+ )
315
312
 
316
313
  def get_implementation_slot_mappings(self) -> dict[str, list[SlotMapping]]:
317
314
  """Gets the input and output :class:`SlotMappings<easylink.graph_components.SlotMapping>`."""
@@ -326,113 +323,6 @@ class Step:
326
323
  ],
327
324
  }
328
325
 
329
- ##################
330
- # Helper methods #
331
- ##################
332
-
333
- def _get_step_graph(self, nodes: list[Step], edges: list[EdgeParams]) -> StepGraph:
334
- """Create a StepGraph from the nodes and edges the step was initialized with."""
335
- step_graph = StepGraph()
336
- for step in nodes:
337
- step_graph.add_node_from_step(step)
338
- for edge in edges:
339
- step_graph.add_edge_from_params(edge)
340
- return step_graph
341
-
342
- def _validate_leaf(
343
- self,
344
- step_config: LayeredConfigTree,
345
- combined_implementations: LayeredConfigTree,
346
- ) -> dict[str, list[str]]:
347
- """Validates a leaf ``Step``."""
348
- errors = {}
349
- metadata = load_yaml(paths.IMPLEMENTATION_METADATA)
350
- error_key = f"step {self.name}"
351
- if (
352
- "implementation" not in step_config
353
- and COMBINED_IMPLEMENTATION_KEY not in step_config
354
- ):
355
- errors[error_key] = [
356
- "The step configuration does not contain an 'implementation' key or a "
357
- "reference to a combined implementation."
358
- ]
359
- elif (
360
- COMBINED_IMPLEMENTATION_KEY in step_config
361
- and not step_config[COMBINED_IMPLEMENTATION_KEY] in combined_implementations
362
- ):
363
- errors[error_key] = [
364
- f"The step refers to a combined implementation but {step_config[COMBINED_IMPLEMENTATION_KEY]} is not a "
365
- f"valid combined implementation."
366
- ]
367
- else:
368
- implementation_config = (
369
- step_config["implementation"]
370
- if "implementation" in step_config
371
- else combined_implementations[step_config[COMBINED_IMPLEMENTATION_KEY]]
372
- )
373
- if not "name" in implementation_config:
374
- errors[error_key] = [
375
- "The implementation configuration does not contain a 'name' key."
376
- ]
377
- elif not implementation_config["name"] in metadata:
378
- errors[error_key] = [
379
- f"Implementation '{implementation_config['name']}' is not supported. "
380
- f"Supported implementations are: {list(metadata.keys())}."
381
- ]
382
- return errors
383
-
384
- def _validate_nonleaf(
385
- self,
386
- step_config: LayeredConfigTree,
387
- combined_implementations: LayeredConfigTree,
388
- input_data_config: LayeredConfigTree,
389
- ) -> dict[str, list[str]]:
390
- """Validates a non-leaf ``Step``."""
391
- errors = {}
392
- nodes = self.step_graph.nodes
393
- for node in nodes:
394
- step = nodes[node]["step"]
395
- if isinstance(step, IOStep):
396
- continue
397
- if step.name not in step_config:
398
- step_errors = {f"step {step.name}": [f"The step is not configured."]}
399
- else:
400
- step_errors = step.validate_step(
401
- step_config[step.name], combined_implementations, input_data_config
402
- )
403
- if step_errors:
404
- errors.update(step_errors)
405
- extra_steps = set(step_config.keys()) - set(nodes)
406
- for extra_step in extra_steps:
407
- errors[f"step {extra_step}"] = [f"{extra_step} is not a valid step."]
408
- return errors
409
-
410
- def _get_config(self, step_config: LayeredConfigTree) -> LayeredConfigTree:
411
- """Convenience method to get a ``Step's`` configuration.
412
-
413
- Some types of ``Steps`` have a unique :attr:`config_key` (defined by the
414
- user via the pipeline specification file) that is used to specify the behavior
415
- of the ``Step`` (e.g. looping, parallel, etc). This method simply returns
416
- the ``Step's`` sub-configuration keyed to that ``config_key`` (if it exists,
417
- i.e. is not a basic ``Step``).
418
-
419
- Parameters
420
- ----------
421
- step_config
422
- The high-level configuration of this ``Step``.
423
-
424
- Returns
425
- -------
426
- The sub-configuration of this ``Step`` keyed on the ``config_key``
427
- (if it exists).
428
-
429
- """
430
- return (
431
- step_config
432
- if not self.config_key in step_config
433
- else step_config[self.config_key]
434
- )
435
-
436
326
 
437
327
  class IOStep(Step):
438
328
  """A special case type of :class:`Step` used to represent incoming and outgoing data.
@@ -485,27 +375,24 @@ class IOStep(Step):
485
375
 
486
376
  def set_configuration_state(
487
377
  self,
488
- parent_config: LayeredConfigTree,
378
+ step_config: LayeredConfigTree,
489
379
  combined_implementations: LayeredConfigTree,
490
380
  input_data_config: LayeredConfigTree,
491
381
  ) -> None:
492
- """Sets the configuration state to leaf.
493
-
494
- An ``IOStep`` is by definition a leaf ``Step`` and so we assign that here
495
- instead of relying on the default behavior of the parent class.
382
+ """Sets the configuration state to 'leaf'.
496
383
 
497
384
  Parameters
498
385
  ----------
499
- parent_config
500
- The configuration of the parent ``Step``. For ``IOSteps``, this will
501
- always be the entire pipeline configuration.
386
+ step_config
387
+ The internal configuration of this ``Step``, i.e. it should not include
388
+ the ``Step's`` name.
502
389
  combined_implementations
503
390
  The configuration for any implementations to be combined.
504
391
  input_data_config
505
392
  The input data configuration for the entire pipeline.
506
393
  """
507
394
  self._configuration_state = LeafConfigurationState(
508
- self, parent_config, combined_implementations, input_data_config
395
+ self, step_config, combined_implementations, input_data_config
509
396
  )
510
397
 
511
398
  def get_implementation_graph(self) -> ImplementationGraph:
@@ -548,29 +435,29 @@ class InputStep(IOStep):
548
435
 
549
436
  def set_configuration_state(
550
437
  self,
551
- parent_config: LayeredConfigTree,
438
+ step_config: LayeredConfigTree,
552
439
  combined_implementations: LayeredConfigTree,
553
440
  input_data_config: LayeredConfigTree,
554
441
  ) -> None:
555
442
  """Sets the configuration state and updates the ``OutputSlots``.
556
443
 
557
- In addition to setting ``InputStep`` to a leaf configuration state, this
444
+ In addition to setting ``InputStep`` to a 'leaf' configuration state, this
558
445
  method also updates the ``OutputSlots`` to include all of the dataset keys
559
446
  in the input data specification file. This allows for future use of
560
- specific datasets instead of only "all" of them.
447
+ *specific* datasets instead of only *all* of them.
561
448
 
562
449
  Parameters
563
450
  ----------
564
- parent_config
565
- The configuration of the parent ``Step``. For ``IOSteps``, this will
566
- always be the entire pipeline configuration.
451
+ step_config
452
+ The internal configuration of this ``Step``, i.e. it should not include
453
+ the ``Step's`` name.
567
454
  combined_implementations
568
455
  The configuration for any implementations to be combined.
569
456
  input_data_config
570
457
  The input data configuration for the entire pipeline.
571
458
  """
572
459
  super().set_configuration_state(
573
- parent_config, combined_implementations, input_data_config
460
+ step_config, combined_implementations, input_data_config
574
461
  )
575
462
  for input_data_key in input_data_config:
576
463
  self.output_slots[input_data_key] = OutputSlot(name=input_data_key)
@@ -601,19 +488,185 @@ class HierarchicalStep(Step):
601
488
 
602
489
  See :class:`Step` for inherited attributes.
603
490
 
604
- Notes
605
- -----
606
- To use this feature, the sub-``Steps`` must be defined in the pipeline specification
607
- file under a "substeps" key. If no "substeps" key is present, it will be treated
608
- as a single ``Step``.
491
+ Parameters
492
+ ----------
493
+ nodes
494
+ All sub-nodes (i.e. sub-``Steps``) that make up this ``HierarchicalStep``.
495
+ edges
496
+ The :class:`~easylink.graph_components.EdgeParams` of the sub-nodes.
497
+ step_graph
498
+ The :class:`~easylink.graph_components.StepGraph` i.e. the directed acyclic
499
+ graph (DAG) of sub-nodes and their edges that make up this ``HierarchicalStep``.
500
+ user_configurable
501
+ Whether or not the ``HierarchicalStep`` is user-configurable. It is a convenience
502
+ attribute to allow for back-end ``HierarchicalStep`` creation that are not
503
+ user-facing (i.e. they do not need to provide a 'substeps' configuration key).
609
504
 
610
505
  """
611
506
 
507
+ def __init__(
508
+ self,
509
+ step_name,
510
+ name=None,
511
+ input_slots=(),
512
+ output_slots=(),
513
+ nodes=(),
514
+ edges=(),
515
+ input_slot_mappings=(),
516
+ output_slot_mappings=(),
517
+ user_configurable=True,
518
+ ):
519
+ super().__init__(
520
+ step_name,
521
+ name,
522
+ input_slots,
523
+ output_slots,
524
+ input_slot_mappings,
525
+ output_slot_mappings,
526
+ )
527
+ self.nodes = nodes
528
+ """All sub-nodes (i.e. sub-``Steps``) that make up this ``HierarchicalStep``."""
529
+ for node in self.nodes:
530
+ node.set_parent_step(self)
531
+ self.edges = edges
532
+ """The :class:`~easylink.graph_components.EdgeParams` of the sub-nodes."""
533
+ self.step_graph = self._get_step_graph(nodes, edges)
534
+ """The :class:`~easylink.graph_components.StepGraph` i.e. the directed acyclic
535
+ graph (DAG) of sub-nodes and their edges that make up this ``HierarchicalStep``."""
536
+ self.user_configurable = user_configurable
537
+ """Whether or not the ``HierarchicalStep`` is user-configurable. It is a convenience
538
+ attribute to allow for back-end ``HierarchicalStep`` creation that are not
539
+ user-facing (i.e. they do not need to provide a 'substeps' configuration key)."""
540
+
612
541
  @property
613
542
  def config_key(self):
614
543
  """The pipeline specification key required for a ``HierarchicalStep``."""
615
544
  return "substeps"
616
545
 
546
+ def validate_step(
547
+ self,
548
+ step_config: LayeredConfigTree,
549
+ combined_implementations: LayeredConfigTree,
550
+ input_data_config: LayeredConfigTree,
551
+ ) -> dict[str, list[str]]:
552
+ """Validates the ``HierarchicalStep``.
553
+
554
+ Parameters
555
+ ----------
556
+ step_config
557
+ The internal configuration of this ``Step``, i.e. it should not include
558
+ the ``Step's`` name.
559
+ combined_implementations
560
+ The configuration for any implementations to be combined.
561
+ input_data_config
562
+ The input data configuration for the entire pipeline.
563
+
564
+ Returns
565
+ -------
566
+ A dictionary of errors, where the keys are the ``HierarchicalStep``
567
+ name and the values are lists of error messages associated with the
568
+ given ``HierarchicalStep``.
569
+
570
+ Notes
571
+ -----
572
+ A ``HierarchicalStep`` can be in either a "leaf" or a "non-leaf" configuration
573
+ state and the validation process is different for each.
574
+
575
+ If the ``HierarchicalStep`` does not validate (i.e. errors are found and
576
+ the returned dictionary is non-empty), the tool will exit and the pipeline
577
+ will not run.
578
+
579
+ We attempt to batch error messages as much as possible, but there may be
580
+ times where the configuration is so ill-formed that we are unable to handle
581
+ all issues in one pass. In these cases, new errors may be found after the
582
+ initial ones are handled.
583
+ """
584
+ if self.user_configurable:
585
+ if self.config_key in step_config:
586
+ step_config = step_config[self.config_key]
587
+ else:
588
+ # This is a leaf step
589
+ return super().validate_step(
590
+ step_config, combined_implementations, input_data_config
591
+ )
592
+ return self._validate_step_graph(
593
+ step_config, combined_implementations, input_data_config
594
+ )
595
+
596
+ def set_configuration_state(
597
+ self,
598
+ step_config: LayeredConfigTree,
599
+ combined_implementations: LayeredConfigTree,
600
+ input_data_config: LayeredConfigTree,
601
+ ) -> None:
602
+ """Sets the configuration state.
603
+
604
+ The configuration state of a ``HierarchicalStep`` depends on (1) whether
605
+ or not it is :attr:`user_configurable` and (2) whether or not the
606
+ :attr:`config_key` exists in the pipeline specification file.
607
+
608
+ Parameters
609
+ ----------
610
+ step_config
611
+ The internal configuration of this ``Step``, i.e. it should not include
612
+ the ``Step's`` name.
613
+ combined_implementations
614
+ The configuration for any implementations to be combined.
615
+ input_data_config
616
+ The input data configuration for the entire pipeline.
617
+ """
618
+ if self.user_configurable:
619
+ if self.config_key in step_config:
620
+ step_config = step_config[self.config_key]
621
+ configuration_state_type = NonLeafConfigurationState
622
+ else:
623
+ configuration_state_type = LeafConfigurationState
624
+ else:
625
+ # Substeps must be used, so we require non-leaf here
626
+ configuration_state_type = NonLeafConfigurationState
627
+ self._configuration_state = configuration_state_type(
628
+ self, step_config, combined_implementations, input_data_config
629
+ )
630
+
631
+ ##################
632
+ # Helper methods #
633
+ ##################
634
+
635
+ def _get_step_graph(self, nodes: list[Step], edges: list[EdgeParams]) -> StepGraph:
636
+ """Creates a :class:`~easylink.graph_components.StepGraph` from the nodes and edges the step was initialized with."""
637
+ step_graph = StepGraph()
638
+ for step in nodes:
639
+ step_graph.add_node_from_step(step)
640
+ for edge in edges:
641
+ step_graph.add_edge_from_params(edge)
642
+ return step_graph
643
+
644
+ def _validate_step_graph(
645
+ self,
646
+ step_config: LayeredConfigTree,
647
+ combined_implementations: LayeredConfigTree,
648
+ input_data_config: LayeredConfigTree,
649
+ ) -> dict[str, list[str]]:
650
+ """Validates the nodes of a :class:`~easylink.graph_components.StepGraph`."""
651
+ errors = {}
652
+ for node in self.step_graph.nodes:
653
+ step = self.step_graph.nodes[node]["step"]
654
+ if isinstance(step, IOStep):
655
+ continue
656
+ else:
657
+ if step.name not in step_config:
658
+ step_errors = {f"step {step.name}": ["The step is not configured."]}
659
+ else:
660
+ step_errors = step.validate_step(
661
+ step_config[step.name], combined_implementations, input_data_config
662
+ )
663
+ if step_errors:
664
+ errors.update(step_errors)
665
+ extra_steps = set(step_config.keys()) - set(self.step_graph.nodes)
666
+ for extra_step in extra_steps:
667
+ errors[f"step {extra_step}"] = [f"{extra_step} is not a valid step."]
668
+ return errors
669
+
617
670
 
618
671
  class TemplatedStep(Step, ABC):
619
672
  """A type of :class:`Step` that may contain multiplicity.
@@ -641,8 +694,12 @@ class TemplatedStep(Step, ABC):
641
694
  template_step.input_slots.values(),
642
695
  template_step.output_slots.values(),
643
696
  )
697
+ self.step_graph = None
698
+ """The :class:`~easylink.graph_components.StepGraph` i.e. the directed acyclic
699
+ graph (DAG) of sub-nodes and their edges that make up this ``TemplatedStep``."""
644
700
  self.template_step = template_step
645
701
  """The ``Step`` to be templated."""
702
+
646
703
  self.template_step.set_parent_step(self)
647
704
 
648
705
  @property
@@ -716,7 +773,8 @@ class TemplatedStep(Step, ABC):
716
773
  Parameters
717
774
  ----------
718
775
  step_config
719
- The configuration of this ``TemplatedStep``.
776
+ The internal configuration of this ``Step``, i.e. it should not include
777
+ the ``Step's`` name.
720
778
  combined_implementations
721
779
  The configuration for any implementations to be combined.
722
780
  input_data_config
@@ -730,7 +788,7 @@ class TemplatedStep(Step, ABC):
730
788
 
731
789
  Notes
732
790
  -----
733
- If the ``Step`` does not validate (i.e. errors are found and the returned
791
+ If the ``TemplatedStep`` does not validate (i.e. errors are found and the returned
734
792
  dictionary is non-empty), the tool will exit and the pipeline will not run.
735
793
 
736
794
  We attempt to batch error messages as much as possible, but there may be
@@ -739,6 +797,7 @@ class TemplatedStep(Step, ABC):
739
797
  initial ones are handled.
740
798
  """
741
799
  if not self.config_key in step_config:
800
+ # This is a leaf step
742
801
  return self.template_step.validate_step(
743
802
  step_config, combined_implementations, input_data_config
744
803
  )
@@ -770,51 +829,32 @@ class TemplatedStep(Step, ABC):
770
829
  ]
771
830
  parallel_errors.update(
772
831
  self.template_step.validate_step(
773
- parallel_config, combined_implementations, input_data_config
832
+ LayeredConfigTree(parallel_config),
833
+ combined_implementations,
834
+ input_data_config,
774
835
  )
775
836
  )
776
837
  if parallel_errors:
777
838
  errors[f"step {self.name}"][f"{self.node_prefix}_{i+1}"] = parallel_errors
778
839
  return errors
779
840
 
780
- def _get_config(self, step_config: LayeredConfigTree) -> LayeredConfigTree:
781
- """Convenience method to get the ``TemplatedStep's`` configuration.
782
-
783
- ``TemplatedSteps`` may include multiplicity. In such cases, their configurations
784
- must be modified to include the expanded ``Steps``.
785
-
786
- Parameters
787
- ----------
788
- step_config
789
- The high-level configuration of this ``TemplatedStep``.
790
-
791
- Returns
792
- -------
793
- The expanded sub-configuration of this ``TemplatedStep`` based on the
794
- :attr:`Step.config_key` and expanded to include all looped or parallelized
795
- sub-``Steps``).
796
- """
797
- if self.config_key in step_config:
798
- expanded_step_config = LayeredConfigTree()
799
- for i, sub_config in enumerate(step_config[self.config_key]):
800
- expanded_step_config.update(
801
- {f"{self.name}_{self.node_prefix}_{i+1}": sub_config}
802
- )
803
- return expanded_step_config
804
- return step_config
805
-
806
841
  def set_configuration_state(
807
842
  self,
808
- parent_config: LayeredConfigTree,
843
+ step_config: LayeredConfigTree,
809
844
  combined_implementations: LayeredConfigTree,
810
845
  input_data_config: LayeredConfigTree,
811
846
  ):
812
- """Sets the configuration state and updates the :class:`SlotMappings<easylink.graph_components.SlotMapping>`.
847
+ """Sets the configuration state to 'non-leaf'.
848
+
849
+ In addition to setting the configuration state, this also updates the
850
+ :class:`~easylink.graph_components.StepGraph` and
851
+ :class:`SlotMappings<easylink.graph_components.SlotMapping>`.
813
852
 
814
853
  Parameters
815
854
  ----------
816
- parent_config
817
- The configuration of the parent ``Step``.
855
+ step_config
856
+ The internal configuration of this ``Step``, i.e. it should not include
857
+ the ``Step's`` name.
818
858
  combined_implementations
819
859
  The configuration for any implementations to be combined.
820
860
  input_data_config
@@ -828,7 +868,6 @@ class TemplatedStep(Step, ABC):
828
868
  :class:`~easylink.implementation.Implementation`, i.e. the one with a
829
869
  :class:`LeafConfigurationState`.
830
870
  """
831
- step_config = parent_config[self.name]
832
871
  if self.config_key not in step_config:
833
872
  # Special handle the step_graph update
834
873
  self.step_graph = StepGraph()
@@ -857,6 +896,36 @@ class TemplatedStep(Step, ABC):
857
896
  self, expanded_config, combined_implementations, input_data_config
858
897
  )
859
898
 
899
+ ##################
900
+ # Helper Methods #
901
+ ##################
902
+
903
+ def _get_config(self, step_config: LayeredConfigTree) -> LayeredConfigTree:
904
+ """Convenience method to get the ``TemplatedStep's`` configuration.
905
+
906
+ ``TemplatedSteps`` may include multiplicity. In such cases, their configurations
907
+ must be modified to include the expanded ``Steps``.
908
+
909
+ Parameters
910
+ ----------
911
+ step_config
912
+ The high-level configuration of this ``TemplatedStep``.
913
+
914
+ Returns
915
+ -------
916
+ The expanded sub-configuration of this ``TemplatedStep`` based on the
917
+ :attr:`Step.config_key` and expanded to include all looped or parallelized
918
+ sub-``Steps``).
919
+ """
920
+ if self.config_key in step_config:
921
+ expanded_step_config = LayeredConfigTree()
922
+ for i, sub_config in enumerate(step_config[self.config_key]):
923
+ expanded_step_config.update(
924
+ {f"{self.name}_{self.node_prefix}_{i+1}": sub_config}
925
+ )
926
+ return expanded_step_config
927
+ return step_config
928
+
860
929
  def _duplicate_template_step(self) -> Step:
861
930
  """Makes a duplicate of the template ``Step``.
862
931
 
@@ -1064,11 +1133,69 @@ class ParallelStep(TemplatedStep):
1064
1133
  return {"input": input_mappings, "output": output_mappings}
1065
1134
 
1066
1135
 
1067
- class ChoiceStep(Step):
1068
- """A type of :class:`Step` that allows for choosing between multiple paths.
1136
+ class EmbarrassinglyParallelStep(Step):
1137
+ """A step that is run in parallel on the backend.
1069
1138
 
1070
- A ``ChoiceStep`` allows a user to select a single path from a set of possible
1071
- paths.
1139
+ An ``EmbarrassinglyParallelStep`` is different than a :class:`ParallelStep`
1140
+ in that it is not configured by the user to be run in parallel - it completely
1141
+ happens on the back end for performance reasons. As such, note that it inherits
1142
+ from :class:`Step` instead of :class:`TemplatedStep`.
1143
+
1144
+ See :class:`Step` for inherited attributes.
1145
+ """
1146
+
1147
+ def __init__(
1148
+ self,
1149
+ step_name: str,
1150
+ input_slots: Iterable[InputSlot],
1151
+ output_slots: Iterable[OutputSlot],
1152
+ ) -> None:
1153
+ super().__init__(step_name, input_slots=input_slots, output_slots=output_slots)
1154
+ self._validate()
1155
+
1156
+ def _validate(self) -> None:
1157
+ """Validates the ``EmbarrassinglyParallelStep``.
1158
+
1159
+ ``EmbarrassinglyParallelSteps`` are not configured by the user to be run
1160
+ in parallel. Since it happens on the back end, we need to do somewhat unique
1161
+ validations during construction. Specifically,
1162
+ - one and only one :class:`~easylink.graph_components.InputSlot` *must* include
1163
+ a :attr:`~easylink.graph_components.InputSlot.splitter` method.
1164
+ - all :class:`OutputSlots<easylink.graph_components.OutputSlot>` *must* include
1165
+ an :attr:`~easylink.graph_components.OutputSlot.aggregator` method.
1166
+ """
1167
+ errors = []
1168
+ # assert that only one input slot has a splitter assigned
1169
+ splitters = {
1170
+ slot.name: slot.splitter.__name__
1171
+ for slot in self.input_slots.values()
1172
+ if slot.splitter
1173
+ }
1174
+ if len(splitters) == 0:
1175
+ errors.append(
1176
+ f"EmbarrassinglyParallelStep '{self.step_name}' does not have any input slots with a "
1177
+ "splitter method assigned; one and only one input slot must have a splitter."
1178
+ )
1179
+ if len(splitters) > 1:
1180
+ errors.append(
1181
+ f"EmbarrassinglyParallelStep '{self.step_name}' has multiple input slots with "
1182
+ "splitter methods assigned; one and only one input slot must have a splitter.\n"
1183
+ f"Input slots with splitters: {splitters}"
1184
+ )
1185
+ missing_aggregators = [
1186
+ slot.name for slot in self.output_slots.values() if not slot.aggregator
1187
+ ]
1188
+ if len(missing_aggregators) != 0:
1189
+ errors.append(
1190
+ f"EmbarrassinglyParallelStep '{self.step_name}' has output slots without "
1191
+ f"aggregator methods assigned: {missing_aggregators}"
1192
+ )
1193
+ if errors:
1194
+ raise ValueError("\n".join(errors))
1195
+
1196
+
1197
+ class ChoiceStep(Step):
1198
+ """A type of :class:`Step` that allows for choosing from a set of options.
1072
1199
 
1073
1200
  See :class:`Step` for inherited attributes.
1074
1201
 
@@ -1082,7 +1209,7 @@ class ChoiceStep(Step):
1082
1209
  All required :class:`OutputSlots<easylink.graph_components.OutputSlot>`.
1083
1210
  choices
1084
1211
  A dictionary of choices, where the keys are the names/types of choices and
1085
- the values are dictionaries containing that type's nodes, edges, and
1212
+ the values are dictionaries containing that type's ``Step`` and related
1086
1213
  :class:`SlotMappings<easylink.graph_components.SlotMapping>`.
1087
1214
 
1088
1215
  Notes
@@ -1091,6 +1218,13 @@ class ChoiceStep(Step):
1091
1218
  :attr:`Step.config_key` in the pipeline specification file. Instead, the pipeline
1092
1219
  configuration must contain a 'type' key that specifies which option to choose.
1093
1220
 
1221
+ The :attr:`choices` dictionary must contain the choice type names as the outer
1222
+ keys. The values of each of these types is then another dictionary containing
1223
+ 'step', 'input_slot_mappings', and 'output_slot_mappings' keys with their
1224
+ corresponding values.
1225
+
1226
+ Each choice type must specify a *single* ``Step`` and its associated ``SlotMappings``.
1227
+ Any choice paths that require multiple sub-steps should specify a :class:`HierarchicalStep`.
1094
1228
  """
1095
1229
 
1096
1230
  def __init__(
@@ -1098,9 +1232,7 @@ class ChoiceStep(Step):
1098
1232
  step_name: str,
1099
1233
  input_slots: Iterable[InputSlot],
1100
1234
  output_slots: Iterable[OutputSlot],
1101
- choices: dict[
1102
- str, dict[str, list[Step | EdgeParams | InputSlotMapping | OutputSlotMapping]]
1103
- ],
1235
+ choices: dict[str, dict[str, Step | SlotMapping]],
1104
1236
  ) -> None:
1105
1237
  super().__init__(
1106
1238
  step_name,
@@ -1123,7 +1255,8 @@ class ChoiceStep(Step):
1123
1255
  Parameters
1124
1256
  ----------
1125
1257
  step_config
1126
- The configuration of this ``ChoiceStep``.
1258
+ The internal configuration of this ``Step``, i.e. it should not include
1259
+ the ``Step's`` name.
1127
1260
  combined_implementations
1128
1261
  The configuration for any implementations to be combined.
1129
1262
  input_data_config
@@ -1136,8 +1269,6 @@ class ChoiceStep(Step):
1136
1269
 
1137
1270
  Notes
1138
1271
  -----
1139
- A ``ChoiceStep`` by definition must be set with a :class:`NonLeafConfigurationState`.
1140
-
1141
1272
  If the ``Step`` does not validate (i.e. errors are found and the returned
1142
1273
  dictionary is non-empty), the tool will exit and the pipeline will not run.
1143
1274
 
@@ -1146,16 +1277,9 @@ class ChoiceStep(Step):
1146
1277
  all issues in one pass. In these cases, new errors may be found after the
1147
1278
  initial ones are handled.
1148
1279
 
1149
- We update the :class:`easylink.graph_components.StepGraph` and ``SlotMappings``
1150
- in :meth:`validate_step` (as opposed to in :meth:`set_configuration_state`
1151
- as is done in :class:`TemplatedStep`) because :meth:`validate_step` is called
1152
- prior to :meth:`set_configuration_state`, but the validations itself actually
1153
- requires the updated ``StepGraph`` and ``SlotMappings``.
1154
-
1155
1280
  We do not attempt to validate the subgraph here if the 'type' key is unable
1156
1281
  to be validated.
1157
1282
  """
1158
-
1159
1283
  chosen_type = step_config.get("type")
1160
1284
  # Handle problems with the 'type' key
1161
1285
  if not chosen_type:
@@ -1163,104 +1287,64 @@ class ChoiceStep(Step):
1163
1287
  if chosen_type not in self.choices:
1164
1288
  return {
1165
1289
  f"step {self.name}": [
1166
- f"'{step_config['type']}' is not a supported 'type'. Valid choices are: {list(self.choices)}."
1290
+ f"'{step_config.type}' is not a supported 'type'. Valid choices are: {list(self.choices)}."
1167
1291
  ]
1168
1292
  }
1169
- # Handle type-subgraph inconsistencies
1170
- subgraph = self.choices[chosen_type]
1293
+
1294
+ chosen_step = self.choices[chosen_type]["step"]
1171
1295
  chosen_step_config = LayeredConfigTree(
1172
1296
  {key: value for key, value in step_config.items() if key != "type"}
1173
1297
  )
1174
- allowable_steps = [node.name for node in subgraph["nodes"]]
1175
- if set(allowable_steps) != set(chosen_step_config):
1298
+ if chosen_step.name not in chosen_step_config:
1176
1299
  return {
1177
1300
  f"step {self.name}": [
1178
- f"Invalid configuration for '{chosen_type}' type. Valid steps are {allowable_steps}."
1301
+ f"'{chosen_step.name}' is not configured. Confirm you have specified "
1302
+ f"the correct steps for the '{chosen_type}' type."
1179
1303
  ]
1180
1304
  }
1181
-
1182
- # HACK: Update the step graph and mappings here because we need them for validation
1183
- self.step_graph = self._update_step_graph(subgraph)
1184
- self.slot_mappings = self._update_slot_mappings(subgraph)
1185
1305
  # NOTE: A ChoiceStep is by definition non-leaf step
1186
- return self._validate_nonleaf(
1187
- chosen_step_config, combined_implementations, input_data_config
1306
+ return chosen_step.validate_step(
1307
+ chosen_step_config[chosen_step.name], combined_implementations, input_data_config
1188
1308
  )
1189
1309
 
1190
1310
  def set_configuration_state(
1191
1311
  self,
1192
- parent_config: LayeredConfigTree,
1312
+ step_config: LayeredConfigTree,
1193
1313
  combined_implementations: LayeredConfigTree,
1194
1314
  input_data_config: LayeredConfigTree,
1195
1315
  ):
1196
- """Sets the configuration state for a ``ChoiceStep``.
1316
+ """Sets the configuration state to 'non-leaf'.
1317
+
1318
+ In addition to setting the configuration state, this also updates the
1319
+ :class:`~easylink.graph_components.StepGraph` and
1320
+ :class:`SlotMappings<easylink.graph_components.SlotMapping>`.
1197
1321
 
1198
1322
  Parameters
1199
1323
  ----------
1200
- parent_config
1201
- The configuration of the parent ``Step``.
1324
+ step_config
1325
+ The internal configuration of this ``Step``, i.e. it should not include
1326
+ the ``Step's`` name.
1202
1327
  combined_implementations
1203
1328
  The configuration for any implementations to be combined.
1204
1329
  input_data_config
1205
1330
  The input data configuration for the entire pipeline.
1206
-
1207
- Notes
1208
- -----
1209
- We update the :class:`easylink.graph_components.StepGraph` and ``SlotMappings``
1210
- in :meth:`validate_step` (as opposed to in :meth:`set_configuration_state`
1211
- as is done in :class:`TemplatedStep`) because :meth:`validate_step` is called
1212
- prior to :meth:`set_configuration_state`, but the validations itself actually
1213
- requires the updated ``StepGraph`` and ``SlotMappings``.
1214
1331
  """
1332
+ choice = self.choices[step_config["type"]]
1333
+ self.step_graph = StepGraph()
1334
+ self.step_graph.add_node_from_step(choice["step"])
1335
+ self.slot_mappings = {
1336
+ "input": choice["input_slot_mappings"],
1337
+ "output": choice["output_slot_mappings"],
1338
+ }
1215
1339
 
1216
- chosen_parent_config = LayeredConfigTree(
1217
- {key: value for key, value in parent_config[self.name].items() if key != "type"}
1340
+ chosen_step_config = LayeredConfigTree(
1341
+ {key: value for key, value in step_config.items() if key != "type"}
1218
1342
  )
1219
- # ChoiceSteps by definition cannot be in a LeafConfigurationState.
1343
+ # ChoiceSteps by definition are in a NonLeafConfigurationState
1220
1344
  self._configuration_state = NonLeafConfigurationState(
1221
- self, chosen_parent_config, combined_implementations, input_data_config
1345
+ self, chosen_step_config, combined_implementations, input_data_config
1222
1346
  )
1223
1347
 
1224
- @staticmethod
1225
- def _update_step_graph(subgraph: dict[str, Any]) -> StepGraph:
1226
- """Updates the :class:`~easylink.graph_components.StepGraph` with the choice.
1227
-
1228
- Parameters
1229
- ----------
1230
- subgraph
1231
- Subgraph parameters (nodes, edges, and slot mappings) for the chosen type.
1232
-
1233
- Returns
1234
- -------
1235
- The updated ``StepGraph`` for the chosen type.
1236
- """
1237
- nodes = subgraph["nodes"]
1238
- edges = subgraph["edges"]
1239
-
1240
- graph = StepGraph()
1241
- for node in nodes:
1242
- graph.add_node_from_step(node)
1243
- for edge in edges:
1244
- graph.add_edge_from_params(edge)
1245
- return graph
1246
-
1247
- @staticmethod
1248
- def _update_slot_mappings(subgraph: dict[str, Any]) -> dict[str, list[SlotMapping]]:
1249
- """Updates the :class:`SlotMappings<easylink.graph_components.SlotMapping>` to the choice type.
1250
-
1251
- Parameters
1252
- ----------
1253
- sub_graph
1254
- Subgraph parameters (nodes, edges, and slot mappings) for the chosen type.
1255
-
1256
- Returns
1257
- -------
1258
- Updated ``SlotMappings`` that match the choice type.
1259
- """
1260
- input_mappings = subgraph["input_slot_mappings"]
1261
- output_mappings = subgraph["output_slot_mappings"]
1262
- return {"input": input_mappings, "output": output_mappings}
1263
-
1264
1348
 
1265
1349
  class ConfigurationState(ABC):
1266
1350
  """A given :class:`Step's<Step>` configuration state.
@@ -1275,8 +1359,9 @@ class ConfigurationState(ABC):
1275
1359
  ----------
1276
1360
  step
1277
1361
  The ``Step`` this ``ConfigurationState`` is tied to.
1278
- pipeline_config
1279
- The relevant configuration for the ``Step`` we are setting the state for.
1362
+ step_config
1363
+ The internal configuration of this ``Step`` we are setting the state
1364
+ for; it should not include the ``Step's`` name.
1280
1365
  combined_implementations
1281
1366
  The configuration for any implementations to be combined.
1282
1367
  input_data_config
@@ -1287,14 +1372,15 @@ class ConfigurationState(ABC):
1287
1372
  def __init__(
1288
1373
  self,
1289
1374
  step: Step,
1290
- pipeline_config: LayeredConfigTree,
1375
+ step_config: LayeredConfigTree,
1291
1376
  combined_implementations: LayeredConfigTree,
1292
1377
  input_data_config: LayeredConfigTree,
1293
1378
  ):
1294
1379
  self._step = step
1295
1380
  """The ``Step`` this ``ConfigurationState`` is tied to."""
1296
- self.pipeline_config = pipeline_config
1297
- """The relevant configuration for the ``Step`` we are setting the state for."""
1381
+ self.step_config = step_config
1382
+ """The internal configuration of this ``Step`` we are setting the state
1383
+ for; it should not include the ``Step's`` name."""
1298
1384
  self.combined_implementations = combined_implementations
1299
1385
  """The relevant configuration if the ``Step's`` ``Implementation``
1300
1386
  has been requested to be combined with that of a different ``Step``."""
@@ -1335,15 +1421,15 @@ class LeafConfigurationState(ConfigurationState):
1335
1421
  @property
1336
1422
  def is_combined(self) -> bool:
1337
1423
  """Whether or not this ``Step`` is combined with another ``Step``."""
1338
- return True if COMBINED_IMPLEMENTATION_KEY in self.pipeline_config else False
1424
+ return COMBINED_IMPLEMENTATION_KEY in self.step_config
1339
1425
 
1340
1426
  @property
1341
1427
  def implementation_config(self) -> LayeredConfigTree:
1342
1428
  """The ``Step's`` specific ``Implementation`` configuration."""
1343
1429
  return (
1344
- self.combined_implementations[self.pipeline_config[COMBINED_IMPLEMENTATION_KEY]]
1430
+ self.combined_implementations[self.step_config[COMBINED_IMPLEMENTATION_KEY]]
1345
1431
  if self.is_combined
1346
- else self.pipeline_config["implementation"]
1432
+ else self.step_config.implementation
1347
1433
  )
1348
1434
 
1349
1435
  def get_implementation_graph(self) -> ImplementationGraph:
@@ -1357,25 +1443,30 @@ class LeafConfigurationState(ConfigurationState):
1357
1443
  -------
1358
1444
  The ``ImplementationGraph`` related to this ``Step``.
1359
1445
  """
1360
-
1446
+ step = self._step
1361
1447
  implementation_graph = ImplementationGraph()
1362
- implementation_node_name = self._step.implementation_node_name
1363
1448
  if self.is_combined:
1449
+ if isinstance(step, EmbarrassinglyParallelStep):
1450
+ raise NotImplementedError(
1451
+ "Combining implementations with embarrassingly parallel steps "
1452
+ "is not yet supported."
1453
+ )
1364
1454
  implementation = PartialImplementation(
1365
- combined_name=self.pipeline_config[COMBINED_IMPLEMENTATION_KEY],
1366
- schema_step=self._step.step_name,
1367
- input_slots=self._step.input_slots.values(),
1368
- output_slots=self._step.output_slots.values(),
1455
+ combined_name=self.step_config[COMBINED_IMPLEMENTATION_KEY],
1456
+ schema_step=step.step_name,
1457
+ input_slots=step.input_slots.values(),
1458
+ output_slots=step.output_slots.values(),
1369
1459
  )
1370
1460
  else:
1371
1461
  implementation = Implementation(
1372
- schema_steps=[self._step.step_name],
1462
+ schema_steps=[step.step_name],
1373
1463
  implementation_config=self.implementation_config,
1374
- input_slots=self._step.input_slots.values(),
1375
- output_slots=self._step.output_slots.values(),
1464
+ input_slots=step.input_slots.values(),
1465
+ output_slots=step.output_slots.values(),
1466
+ is_embarrassingly_parallel=isinstance(step, EmbarrassinglyParallelStep),
1376
1467
  )
1377
1468
  implementation_graph.add_node_from_implementation(
1378
- implementation_node_name,
1469
+ step.implementation_node_name,
1379
1470
  implementation=implementation,
1380
1471
  )
1381
1472
  return implementation_graph
@@ -1416,10 +1507,10 @@ class LeafConfigurationState(ConfigurationState):
1416
1507
  for mapping in mappings:
1417
1508
  # FIXME [MIC-5771]: Fix ParallelSteps
1418
1509
  if (
1419
- "input_data_file" in self.pipeline_config
1510
+ "input_data_file" in self.step_config
1420
1511
  and edge.source_node == "pipeline_graph_input_data"
1421
1512
  ):
1422
- edge.output_slot = self.pipeline_config["input_data_file"]
1513
+ edge.output_slot = self.step_config["input_data_file"]
1423
1514
  imp_edge = mapping.remap_edge(edge)
1424
1515
  implementation_edges.append(imp_edge)
1425
1516
  else:
@@ -1441,8 +1532,10 @@ class NonLeafConfigurationState(ConfigurationState):
1441
1532
  ----------
1442
1533
  step
1443
1534
  The ``Step`` this ``ConfigurationState`` is tied to.
1444
- pipeline_config
1445
- The relevant configuration for the ``Step`` we are setting the state for.
1535
+ step_config
1536
+ The internal configuration of this ``Step`` we are setting the state
1537
+ for; it should not include the ``Step's`` name (though it must include
1538
+ the sub-step names).
1446
1539
  combined_implementations
1447
1540
  The configuration for any implementations to be combined.
1448
1541
  input_data_config
@@ -1473,16 +1566,17 @@ class NonLeafConfigurationState(ConfigurationState):
1473
1566
  def __init__(
1474
1567
  self,
1475
1568
  step: Step,
1476
- pipeline_config: LayeredConfigTree,
1569
+ step_config: LayeredConfigTree,
1477
1570
  combined_implementations: LayeredConfigTree,
1478
1571
  input_data_config: LayeredConfigTree,
1479
1572
  ):
1480
- super().__init__(step, pipeline_config, combined_implementations, input_data_config)
1573
+ super().__init__(step, step_config, combined_implementations, input_data_config)
1481
1574
  if not step.step_graph:
1482
1575
  raise ValueError(
1483
1576
  "NonLeafConfigurationState requires a subgraph upon which to operate, "
1484
1577
  f"but Step {step.name} has no step graph."
1485
1578
  )
1579
+ self._nodes = step.step_graph.nodes
1486
1580
  self._configure_subgraph_steps()
1487
1581
 
1488
1582
  def get_implementation_graph(self) -> ImplementationGraph:
@@ -1513,8 +1607,8 @@ class NonLeafConfigurationState(ConfigurationState):
1513
1607
 
1514
1608
  def add_nodes(self, implementation_graph: ImplementationGraph) -> None:
1515
1609
  """Adds nodes for each ``Step`` to the ``ImplementationGraph``."""
1516
- for node in self._step.step_graph.nodes:
1517
- step = self._step.step_graph.nodes[node]["step"]
1610
+ for node in self._nodes:
1611
+ step = self._nodes[node]["step"]
1518
1612
  implementation_graph.update(step.get_implementation_graph())
1519
1613
 
1520
1614
  def add_edges(self, implementation_graph: ImplementationGraph) -> None:
@@ -1522,8 +1616,8 @@ class NonLeafConfigurationState(ConfigurationState):
1522
1616
  for source, target, edge_attrs in self._step.step_graph.edges(data=True):
1523
1617
  all_edges = []
1524
1618
  edge = EdgeParams.from_graph_edge(source, target, edge_attrs)
1525
- parent_source_step = self._step.step_graph.nodes[source]["step"]
1526
- parent_target_step = self._step.step_graph.nodes[target]["step"]
1619
+ parent_source_step = self._nodes[source]["step"]
1620
+ parent_target_step = self._nodes[target]["step"]
1527
1621
 
1528
1622
  source_edges = parent_source_step.get_implementation_edges(edge)
1529
1623
  for source_edge in source_edges:
@@ -1559,7 +1653,7 @@ class NonLeafConfigurationState(ConfigurationState):
1559
1653
  ]
1560
1654
  for mapping in mappings:
1561
1655
  new_edge = mapping.remap_edge(edge)
1562
- new_step = self._step.step_graph.nodes[mapping.child_node]["step"]
1656
+ new_step = self._nodes[mapping.child_node]["step"]
1563
1657
  imp_edges = new_step.get_implementation_edges(new_edge)
1564
1658
  implementation_edges.extend(imp_edges)
1565
1659
  elif edge.target_node == self._step.name:
@@ -1570,7 +1664,7 @@ class NonLeafConfigurationState(ConfigurationState):
1570
1664
  ]
1571
1665
  for mapping in mappings:
1572
1666
  new_edge = mapping.remap_edge(edge)
1573
- new_step = self._step.step_graph.nodes[mapping.child_node]["step"]
1667
+ new_step = self._nodes[mapping.child_node]["step"]
1574
1668
  imp_edges = new_step.get_implementation_edges(new_edge)
1575
1669
  implementation_edges.extend(imp_edges)
1576
1670
  else:
@@ -1585,9 +1679,12 @@ class NonLeafConfigurationState(ConfigurationState):
1585
1679
  This method recursively traverses the ``StepGraph`` and sets the configuration
1586
1680
  state for each ``Step`` until reaching all leaf nodes.
1587
1681
  """
1588
- nodes = self._step.step_graph.nodes
1589
- for node in nodes:
1590
- step = nodes[node]["step"]
1682
+ for node in self._nodes:
1683
+ step = self._nodes[node]["step"]
1684
+ # IOStep names never appear in configuration
1685
+ step_config = (
1686
+ self.step_config if isinstance(step, IOStep) else self.step_config[step.name]
1687
+ )
1591
1688
  step.set_configuration_state(
1592
- self.pipeline_config, self.combined_implementations, self.input_data_config
1689
+ step_config, self.combined_implementations, self.input_data_config
1593
1690
  )