easylink 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
easylink/step.py CHANGED
@@ -54,21 +54,18 @@ class Step:
54
54
  Parameters
55
55
  ----------
56
56
  step_name
57
- The name of the pipeline step in the ``PipelineSchema``.
57
+ The name of the pipeline step in the ``PipelineSchema``. It must also match
58
+ the key in the implementation metadata file to be used to run this ``Step``.
58
59
  name
59
- The name of this step *node*. This can be different from the ``step_name``
60
- due to the need for disambiguation during the process of unrolling loops,
61
- etc. For example, if step 1 is looped multiple times, each node would
62
- have a ``step_name`` of, perhaps, "step_1" but unique ``names``
63
- ("step_1_loop_1", etc).
60
+ The name of this ``Step's`` node in its :class:`easylink.graph_components.StepGraph`.
61
+ This can be different from the ``step_name`` due to the need for disambiguation
62
+ during the process of flattening the ``Stepgraph``, e.g. unrolling loops, etc.
63
+ For example, if step 1 is looped multiple times, each node would have a
64
+ ``step_name`` of, perhaps, "step_1" but unique ``names`` ("step_1_loop_1", etc).
64
65
  input_slots
65
66
  All required :class:`InputSlots<easylink.graph_components.InputSlot>`.
66
67
  output_slots
67
68
  All required :class:`OutputSlots<easylink.graph_components.OutputSlot>`.
68
- nodes
69
- All sub-nodes (i.e. sub-``Steps``) of this particular ``Step`` instance.
70
- edges
71
- The :class:`~easylink.graph_components.EdgeParams` of this ``Step``.
72
69
  input_slot_mappings
73
70
  The :class:`InputSlotMapping<easylink.graph_components.InputSlotMapping>` of this ``Step``.
74
71
  output_slot_mappings
@@ -89,31 +86,22 @@ class Step:
89
86
  name: str | None = None,
90
87
  input_slots: Iterable[InputSlot] = (),
91
88
  output_slots: Iterable[OutputSlot] = (),
92
- nodes: Iterable[Step] = (),
93
- edges: Iterable[EdgeParams] = (),
94
89
  input_slot_mappings: Iterable[InputSlotMapping] = (),
95
90
  output_slot_mappings: Iterable[OutputSlotMapping] = (),
96
91
  ) -> None:
97
92
  self.step_name = step_name
98
- """The name of the high-level pipeline step."""
93
+ """The name of the pipeline step in the ``PipelineSchema``. It must also match
94
+ the key in the implementation metadata file to be used to run this ``Step``."""
99
95
  self.name = name if name else step_name
100
- """The name of ``Step's`` node in its :class:`~easylink.graph_components.StepGraph`.
101
- This is a more descriptive name than the ``step_name``, e.g. if "step 1"
102
- is looped multiple times. If not provided, defaults to the :attr:`step_name`."""
96
+ """The name of this ``Step's`` node in its :class:`easylink.graph_components.StepGraph`.
97
+ This can be different from the ``step_name`` due to the need for disambiguation
98
+ during the process of flattening the ``Stepgraph``, e.g. unrolling loops, etc.
99
+ For example, if step 1 is looped multiple times, each node would have a
100
+ ``step_name`` of, perhaps, "step_1" but unique ``names`` ("step_1_loop_1", etc)."""
103
101
  self.input_slots = {slot.name: slot for slot in input_slots}
104
102
  """A mapping of ``InputSlot`` names to their instances."""
105
103
  self.output_slots = {slot.name: slot for slot in output_slots}
106
104
  """A mapping of ``OutputSlot`` names to their instances."""
107
- self.nodes = nodes
108
- """All sub-nodes (i.e. sub-``Steps``) of this particular ``Step`` instance."""
109
- for node in self.nodes:
110
- node.set_parent_step(self)
111
- self.edges = edges
112
- """The :class:`~easylink.graph_components.EdgeParams` of this ``Step``."""
113
- self.step_graph = self._get_step_graph(nodes, edges)
114
- """The :class:`~easylink.graph_components.StepGraph` of this ``Step``, i.e.
115
- the directed acyclic graph (DAG) of sub-nodes and their edges that make
116
- up this ``Step`` instance."""
117
105
  self.slot_mappings = {
118
106
  "input": list(input_slot_mappings),
119
107
  "output": list(output_slot_mappings),
@@ -164,7 +152,7 @@ class Step:
164
152
  """
165
153
  step = self
166
154
  implementation_name = (
167
- self.configuration_state.pipeline_config[COMBINED_IMPLEMENTATION_KEY]
155
+ self.configuration_state.step_config[COMBINED_IMPLEMENTATION_KEY]
168
156
  if self.configuration_state.is_combined
169
157
  else self.configuration_state.implementation_config.name
170
158
  )
@@ -203,7 +191,8 @@ class Step:
203
191
  Parameters
204
192
  ----------
205
193
  step_config
206
- The configuration of this ``Step``.
194
+ The internal configuration of this ``Step``, i.e. it should not include
195
+ the ``Step's`` name.
207
196
  combined_implementations
208
197
  The configuration for any implementations to be combined.
209
198
  input_data_config
@@ -216,9 +205,6 @@ class Step:
216
205
 
217
206
  Notes
218
207
  -----
219
- A ``Step`` can be in either a "leaf" or a "non-leaf" configuration state
220
- and the validation process is different for each.
221
-
222
208
  If the ``Step`` does not validate (i.e. errors are found and the returned
223
209
  dictionary is non-empty), the tool will exit and the pipeline will not run.
224
210
 
@@ -227,14 +213,42 @@ class Step:
227
213
  all issues in one pass. In these cases, new errors may be found after the
228
214
  initial ones are handled.
229
215
  """
230
- if len(self.step_graph.nodes) == 0:
231
- return self._validate_leaf(step_config, combined_implementations)
232
- elif self.config_key in step_config:
233
- return self._validate_nonleaf(
234
- step_config[self.config_key], combined_implementations, input_data_config
235
- )
216
+ errors = {}
217
+ metadata = load_yaml(paths.IMPLEMENTATION_METADATA)
218
+ error_key = f"step {self.name}"
219
+ if (
220
+ "implementation" not in step_config
221
+ and COMBINED_IMPLEMENTATION_KEY not in step_config
222
+ ):
223
+ errors[error_key] = [
224
+ "The step configuration does not contain an 'implementation' key "
225
+ "or a reference to a combined implementation."
226
+ ]
227
+ elif (
228
+ COMBINED_IMPLEMENTATION_KEY in step_config
229
+ and not step_config[COMBINED_IMPLEMENTATION_KEY] in combined_implementations
230
+ ):
231
+ errors[error_key] = [
232
+ "The step refers to a combined implementation but "
233
+ f"{step_config[COMBINED_IMPLEMENTATION_KEY]} is not a valid combined "
234
+ "implementation."
235
+ ]
236
236
  else:
237
- return self._validate_leaf(step_config, combined_implementations)
237
+ implementation_config = (
238
+ step_config["implementation"]
239
+ if "implementation" in step_config
240
+ else combined_implementations[step_config[COMBINED_IMPLEMENTATION_KEY]]
241
+ )
242
+ if not "name" in implementation_config:
243
+ errors[error_key] = [
244
+ "The implementation configuration does not contain a 'name' key."
245
+ ]
246
+ elif not implementation_config["name"] in metadata:
247
+ errors[error_key] = [
248
+ f"Implementation '{implementation_config['name']}' is not supported. "
249
+ f"Supported implementations are: {list(metadata.keys())}."
250
+ ]
251
+ return errors
238
252
 
239
253
  def get_implementation_graph(self) -> ImplementationGraph:
240
254
  """Gets this ``Step's`` :class:`~easylink.graph_components.ImplementationGraph`.
@@ -276,42 +290,25 @@ class Step:
276
290
 
277
291
  def set_configuration_state(
278
292
  self,
279
- parent_config: LayeredConfigTree,
293
+ step_config: LayeredConfigTree,
280
294
  combined_implementations: LayeredConfigTree,
281
295
  input_data_config: LayeredConfigTree,
282
296
  ) -> None:
283
- """Sets the configuration state for this ``Step``.
284
-
285
- The so-called 'configuration state' for a given ``Step`` is backed up by
286
- a :class:`ConfigurationState` class and is assigned to its :attr:`_configuration_state`
287
- attribute. There are two possible ``ConfigurationStates``:
288
- :class:`LeafConfigurationState` and :class:`NonLeafConfigurationState`.
289
-
290
- This method sets the configuration state of this ``Step`` based on whether
291
- or not a :attr:`config_key` is set *and exists is the ``Step's`` configuration*
292
- (i.e. its portion of the user-suppled pipeline specification
293
- file); any required deviation from this behavior requires special
294
- handling.
297
+ """Sets the configuration state to 'leaf'.
295
298
 
296
299
  Parameters
297
300
  ----------
298
- parent_config
299
- The configuration of the parent ``Step``.
301
+ step_config
302
+ The internal configuration of this ``Step``, i.e. it should not include
303
+ the ``Step's`` name.
300
304
  combined_implementations
301
305
  The configuration for any implementations to be combined.
302
306
  input_data_config
303
307
  The input data configuration for the entire pipeline.
304
308
  """
305
- step_config = parent_config[self.name]
306
- sub_config = self._get_config(step_config)
307
- if self.config_key is not None and self.config_key in step_config:
308
- self._configuration_state = NonLeafConfigurationState(
309
- self, sub_config, combined_implementations, input_data_config
310
- )
311
- else:
312
- self._configuration_state = LeafConfigurationState(
313
- self, sub_config, combined_implementations, input_data_config
314
- )
309
+ self._configuration_state = LeafConfigurationState(
310
+ self, step_config, combined_implementations, input_data_config
311
+ )
315
312
 
316
313
  def get_implementation_slot_mappings(self) -> dict[str, list[SlotMapping]]:
317
314
  """Gets the input and output :class:`SlotMappings<easylink.graph_components.SlotMapping>`."""
@@ -326,113 +323,6 @@ class Step:
326
323
  ],
327
324
  }
328
325
 
329
- ##################
330
- # Helper methods #
331
- ##################
332
-
333
- def _get_step_graph(self, nodes: list[Step], edges: list[EdgeParams]) -> StepGraph:
334
- """Create a StepGraph from the nodes and edges the step was initialized with."""
335
- step_graph = StepGraph()
336
- for step in nodes:
337
- step_graph.add_node_from_step(step)
338
- for edge in edges:
339
- step_graph.add_edge_from_params(edge)
340
- return step_graph
341
-
342
- def _validate_leaf(
343
- self,
344
- step_config: LayeredConfigTree,
345
- combined_implementations: LayeredConfigTree,
346
- ) -> dict[str, list[str]]:
347
- """Validates a leaf ``Step``."""
348
- errors = {}
349
- metadata = load_yaml(paths.IMPLEMENTATION_METADATA)
350
- error_key = f"step {self.name}"
351
- if (
352
- "implementation" not in step_config
353
- and COMBINED_IMPLEMENTATION_KEY not in step_config
354
- ):
355
- errors[error_key] = [
356
- "The step configuration does not contain an 'implementation' key or a "
357
- "reference to a combined implementation."
358
- ]
359
- elif (
360
- COMBINED_IMPLEMENTATION_KEY in step_config
361
- and not step_config[COMBINED_IMPLEMENTATION_KEY] in combined_implementations
362
- ):
363
- errors[error_key] = [
364
- f"The step refers to a combined implementation but {step_config[COMBINED_IMPLEMENTATION_KEY]} is not a "
365
- f"valid combined implementation."
366
- ]
367
- else:
368
- implementation_config = (
369
- step_config["implementation"]
370
- if "implementation" in step_config
371
- else combined_implementations[step_config[COMBINED_IMPLEMENTATION_KEY]]
372
- )
373
- if not "name" in implementation_config:
374
- errors[error_key] = [
375
- "The implementation configuration does not contain a 'name' key."
376
- ]
377
- elif not implementation_config["name"] in metadata:
378
- errors[error_key] = [
379
- f"Implementation '{implementation_config['name']}' is not supported. "
380
- f"Supported implementations are: {list(metadata.keys())}."
381
- ]
382
- return errors
383
-
384
- def _validate_nonleaf(
385
- self,
386
- step_config: LayeredConfigTree,
387
- combined_implementations: LayeredConfigTree,
388
- input_data_config: LayeredConfigTree,
389
- ) -> dict[str, list[str]]:
390
- """Validates a non-leaf ``Step``."""
391
- errors = {}
392
- nodes = self.step_graph.nodes
393
- for node in nodes:
394
- step = nodes[node]["step"]
395
- if isinstance(step, IOStep):
396
- continue
397
- if step.name not in step_config:
398
- step_errors = {f"step {step.name}": [f"The step is not configured."]}
399
- else:
400
- step_errors = step.validate_step(
401
- step_config[step.name], combined_implementations, input_data_config
402
- )
403
- if step_errors:
404
- errors.update(step_errors)
405
- extra_steps = set(step_config.keys()) - set(nodes)
406
- for extra_step in extra_steps:
407
- errors[f"step {extra_step}"] = [f"{extra_step} is not a valid step."]
408
- return errors
409
-
410
- def _get_config(self, step_config: LayeredConfigTree) -> LayeredConfigTree:
411
- """Convenience method to get a ``Step's`` configuration.
412
-
413
- Some types of ``Steps`` have a unique :attr:`config_key` (defined by the
414
- user via the pipeline specification file) that is used to specify the behavior
415
- of the ``Step`` (e.g. looping, parallel, etc). This method simply returns
416
- the ``Step's`` sub-configuration keyed to that ``config_key`` (if it exists,
417
- i.e. is not a basic ``Step``).
418
-
419
- Parameters
420
- ----------
421
- step_config
422
- The high-level configuration of this ``Step``.
423
-
424
- Returns
425
- -------
426
- The sub-configuration of this ``Step`` keyed on the ``config_key``
427
- (if it exists).
428
-
429
- """
430
- return (
431
- step_config
432
- if not self.config_key in step_config
433
- else step_config[self.config_key]
434
- )
435
-
436
326
 
437
327
  class IOStep(Step):
438
328
  """A special case type of :class:`Step` used to represent incoming and outgoing data.
@@ -485,27 +375,24 @@ class IOStep(Step):
485
375
 
486
376
  def set_configuration_state(
487
377
  self,
488
- parent_config: LayeredConfigTree,
378
+ step_config: LayeredConfigTree,
489
379
  combined_implementations: LayeredConfigTree,
490
380
  input_data_config: LayeredConfigTree,
491
381
  ) -> None:
492
- """Sets the configuration state to leaf.
493
-
494
- An ``IOStep`` is by definition a leaf ``Step`` and so we assign that here
495
- instead of relying on the default behavior of the parent class.
382
+ """Sets the configuration state to 'leaf'.
496
383
 
497
384
  Parameters
498
385
  ----------
499
- parent_config
500
- The configuration of the parent ``Step``. For ``IOSteps``, this will
501
- always be the entire pipeline configuration.
386
+ step_config
387
+ The internal configuration of this ``Step``, i.e. it should not include
388
+ the ``Step's`` name.
502
389
  combined_implementations
503
390
  The configuration for any implementations to be combined.
504
391
  input_data_config
505
392
  The input data configuration for the entire pipeline.
506
393
  """
507
394
  self._configuration_state = LeafConfigurationState(
508
- self, parent_config, combined_implementations, input_data_config
395
+ self, step_config, combined_implementations, input_data_config
509
396
  )
510
397
 
511
398
  def get_implementation_graph(self) -> ImplementationGraph:
@@ -548,29 +435,29 @@ class InputStep(IOStep):
548
435
 
549
436
  def set_configuration_state(
550
437
  self,
551
- parent_config: LayeredConfigTree,
438
+ step_config: LayeredConfigTree,
552
439
  combined_implementations: LayeredConfigTree,
553
440
  input_data_config: LayeredConfigTree,
554
441
  ) -> None:
555
442
  """Sets the configuration state and updates the ``OutputSlots``.
556
443
 
557
- In addition to setting ``InputStep`` to a leaf configuration state, this
444
+ In addition to setting ``InputStep`` to a 'leaf' configuration state, this
558
445
  method also updates the ``OutputSlots`` to include all of the dataset keys
559
446
  in the input data specification file. This allows for future use of
560
- specific datasets instead of only "all" of them.
447
+ *specific* datasets instead of only *all* of them.
561
448
 
562
449
  Parameters
563
450
  ----------
564
- parent_config
565
- The configuration of the parent ``Step``. For ``IOSteps``, this will
566
- always be the entire pipeline configuration.
451
+ step_config
452
+ The internal configuration of this ``Step``, i.e. it should not include
453
+ the ``Step's`` name.
567
454
  combined_implementations
568
455
  The configuration for any implementations to be combined.
569
456
  input_data_config
570
457
  The input data configuration for the entire pipeline.
571
458
  """
572
459
  super().set_configuration_state(
573
- parent_config, combined_implementations, input_data_config
460
+ step_config, combined_implementations, input_data_config
574
461
  )
575
462
  for input_data_key in input_data_config:
576
463
  self.output_slots[input_data_key] = OutputSlot(name=input_data_key)
@@ -601,19 +488,185 @@ class HierarchicalStep(Step):
601
488
 
602
489
  See :class:`Step` for inherited attributes.
603
490
 
604
- Notes
605
- -----
606
- To use this feature, the sub-``Steps`` must be defined in the pipeline specification
607
- file under a "substeps" key. If no "substeps" key is present, it will be treated
608
- as a single ``Step``.
491
+ Parameters
492
+ ----------
493
+ nodes
494
+ All sub-nodes (i.e. sub-``Steps``) that make up this ``HierarchicalStep``.
495
+ edges
496
+ The :class:`~easylink.graph_components.EdgeParams` of the sub-nodes.
497
+ step_graph
498
+ The :class:`~easylink.graph_components.StepGraph` i.e. the directed acyclic
499
+ graph (DAG) of sub-nodes and their edges that make up this ``HierarchicalStep``.
500
+ user_configurable
501
+ Whether or not the ``HierarchicalStep`` is user-configurable. It is a convenience
502
+ attribute to allow for back-end ``HierarchicalStep`` creation that are not
503
+ user-facing (i.e. they do not need to provide a 'substeps' configuration key).
609
504
 
610
505
  """
611
506
 
507
+ def __init__(
508
+ self,
509
+ step_name,
510
+ name=None,
511
+ input_slots=(),
512
+ output_slots=(),
513
+ nodes=(),
514
+ edges=(),
515
+ input_slot_mappings=(),
516
+ output_slot_mappings=(),
517
+ user_configurable=True,
518
+ ):
519
+ super().__init__(
520
+ step_name,
521
+ name,
522
+ input_slots,
523
+ output_slots,
524
+ input_slot_mappings,
525
+ output_slot_mappings,
526
+ )
527
+ self.nodes = nodes
528
+ """All sub-nodes (i.e. sub-``Steps``) that make up this ``HierarchicalStep``."""
529
+ for node in self.nodes:
530
+ node.set_parent_step(self)
531
+ self.edges = edges
532
+ """The :class:`~easylink.graph_components.EdgeParams` of the sub-nodes."""
533
+ self.step_graph = self._get_step_graph(nodes, edges)
534
+ """The :class:`~easylink.graph_components.StepGraph` i.e. the directed acyclic
535
+ graph (DAG) of sub-nodes and their edges that make up this ``HierarchicalStep``."""
536
+ self.user_configurable = user_configurable
537
+ """Whether or not the ``HierarchicalStep`` is user-configurable. It is a convenience
538
+ attribute to allow for back-end ``HierarchicalStep`` creation that are not
539
+ user-facing (i.e. they do not need to provide a 'substeps' configuration key)."""
540
+
612
541
  @property
613
542
  def config_key(self):
614
543
  """The pipeline specification key required for a ``HierarchicalStep``."""
615
544
  return "substeps"
616
545
 
546
+ def validate_step(
547
+ self,
548
+ step_config: LayeredConfigTree,
549
+ combined_implementations: LayeredConfigTree,
550
+ input_data_config: LayeredConfigTree,
551
+ ) -> dict[str, list[str]]:
552
+ """Validates the ``HierarchicalStep``.
553
+
554
+ Parameters
555
+ ----------
556
+ step_config
557
+ The internal configuration of this ``Step``, i.e. it should not include
558
+ the ``Step's`` name.
559
+ combined_implementations
560
+ The configuration for any implementations to be combined.
561
+ input_data_config
562
+ The input data configuration for the entire pipeline.
563
+
564
+ Returns
565
+ -------
566
+ A dictionary of errors, where the keys are the ``HierarchicalStep``
567
+ name and the values are lists of error messages associated with the
568
+ given ``HierarchicalStep``.
569
+
570
+ Notes
571
+ -----
572
+ A ``HierarchicalStep`` can be in either a "leaf" or a "non-leaf" configuration
573
+ state and the validation process is different for each.
574
+
575
+ If the ``HierarchicalStep`` does not validate (i.e. errors are found and
576
+ the returned dictionary is non-empty), the tool will exit and the pipeline
577
+ will not run.
578
+
579
+ We attempt to batch error messages as much as possible, but there may be
580
+ times where the configuration is so ill-formed that we are unable to handle
581
+ all issues in one pass. In these cases, new errors may be found after the
582
+ initial ones are handled.
583
+ """
584
+ if self.user_configurable:
585
+ if self.config_key in step_config:
586
+ step_config = step_config[self.config_key]
587
+ else:
588
+ # This is a leaf step
589
+ return super().validate_step(
590
+ step_config, combined_implementations, input_data_config
591
+ )
592
+ return self._validate_step_graph(
593
+ step_config, combined_implementations, input_data_config
594
+ )
595
+
596
+ def set_configuration_state(
597
+ self,
598
+ step_config: LayeredConfigTree,
599
+ combined_implementations: LayeredConfigTree,
600
+ input_data_config: LayeredConfigTree,
601
+ ) -> None:
602
+ """Sets the configuration state.
603
+
604
+ The configuration state of a ``HierarchicalStep`` depends on (1) whether
605
+ or not it is :attr:`user_configurable` and (2) whether or not the
606
+ :attr:`config_key` exists in the pipeline specification file.
607
+
608
+ Parameters
609
+ ----------
610
+ step_config
611
+ The internal configuration of this ``Step``, i.e. it should not include
612
+ the ``Step's`` name.
613
+ combined_implementations
614
+ The configuration for any implementations to be combined.
615
+ input_data_config
616
+ The input data configuration for the entire pipeline.
617
+ """
618
+ if self.user_configurable:
619
+ if self.config_key in step_config:
620
+ step_config = step_config[self.config_key]
621
+ configuration_state_type = NonLeafConfigurationState
622
+ else:
623
+ configuration_state_type = LeafConfigurationState
624
+ else:
625
+ # Substeps must be used, so we require non-leaf here
626
+ configuration_state_type = NonLeafConfigurationState
627
+ self._configuration_state = configuration_state_type(
628
+ self, step_config, combined_implementations, input_data_config
629
+ )
630
+
631
+ ##################
632
+ # Helper methods #
633
+ ##################
634
+
635
+ def _get_step_graph(self, nodes: list[Step], edges: list[EdgeParams]) -> StepGraph:
636
+ """Creates a :class:`~easylink.graph_components.StepGraph` from the nodes and edges the step was initialized with."""
637
+ step_graph = StepGraph()
638
+ for step in nodes:
639
+ step_graph.add_node_from_step(step)
640
+ for edge in edges:
641
+ step_graph.add_edge_from_params(edge)
642
+ return step_graph
643
+
644
+ def _validate_step_graph(
645
+ self,
646
+ step_config: LayeredConfigTree,
647
+ combined_implementations: LayeredConfigTree,
648
+ input_data_config: LayeredConfigTree,
649
+ ) -> dict[str, list[str]]:
650
+ """Validates the nodes of a :class:`~easylink.graph_components.StepGraph`."""
651
+ errors = {}
652
+ for node in self.step_graph.nodes:
653
+ step = self.step_graph.nodes[node]["step"]
654
+ if isinstance(step, IOStep):
655
+ continue
656
+ else:
657
+ if step.name not in step_config:
658
+ step_errors = {f"step {step.name}": ["The step is not configured."]}
659
+ else:
660
+ step_errors = step.validate_step(
661
+ step_config[step.name], combined_implementations, input_data_config
662
+ )
663
+ if step_errors:
664
+ errors.update(step_errors)
665
+ extra_steps = set(step_config.keys()) - set(self.step_graph.nodes)
666
+ for extra_step in extra_steps:
667
+ errors[f"step {extra_step}"] = [f"{extra_step} is not a valid step."]
668
+ return errors
669
+
617
670
 
618
671
  class TemplatedStep(Step, ABC):
619
672
  """A type of :class:`Step` that may contain multiplicity.
@@ -641,8 +694,12 @@ class TemplatedStep(Step, ABC):
641
694
  template_step.input_slots.values(),
642
695
  template_step.output_slots.values(),
643
696
  )
697
+ self.step_graph = None
698
+ """The :class:`~easylink.graph_components.StepGraph` i.e. the directed acyclic
699
+ graph (DAG) of sub-nodes and their edges that make up this ``TemplatedStep``."""
644
700
  self.template_step = template_step
645
701
  """The ``Step`` to be templated."""
702
+
646
703
  self.template_step.set_parent_step(self)
647
704
 
648
705
  @property
@@ -716,7 +773,8 @@ class TemplatedStep(Step, ABC):
716
773
  Parameters
717
774
  ----------
718
775
  step_config
719
- The configuration of this ``TemplatedStep``.
776
+ The internal configuration of this ``Step``, i.e. it should not include
777
+ the ``Step's`` name.
720
778
  combined_implementations
721
779
  The configuration for any implementations to be combined.
722
780
  input_data_config
@@ -730,7 +788,7 @@ class TemplatedStep(Step, ABC):
730
788
 
731
789
  Notes
732
790
  -----
733
- If the ``Step`` does not validate (i.e. errors are found and the returned
791
+ If the ``TemplatedStep`` does not validate (i.e. errors are found and the returned
734
792
  dictionary is non-empty), the tool will exit and the pipeline will not run.
735
793
 
736
794
  We attempt to batch error messages as much as possible, but there may be
@@ -739,6 +797,7 @@ class TemplatedStep(Step, ABC):
739
797
  initial ones are handled.
740
798
  """
741
799
  if not self.config_key in step_config:
800
+ # This is a leaf step
742
801
  return self.template_step.validate_step(
743
802
  step_config, combined_implementations, input_data_config
744
803
  )
@@ -770,51 +829,32 @@ class TemplatedStep(Step, ABC):
770
829
  ]
771
830
  parallel_errors.update(
772
831
  self.template_step.validate_step(
773
- parallel_config, combined_implementations, input_data_config
832
+ LayeredConfigTree(parallel_config),
833
+ combined_implementations,
834
+ input_data_config,
774
835
  )
775
836
  )
776
837
  if parallel_errors:
777
838
  errors[f"step {self.name}"][f"{self.node_prefix}_{i+1}"] = parallel_errors
778
839
  return errors
779
840
 
780
- def _get_config(self, step_config: LayeredConfigTree) -> LayeredConfigTree:
781
- """Convenience method to get the ``TemplatedStep's`` configuration.
782
-
783
- ``TemplatedSteps`` may include multiplicity. In such cases, their configurations
784
- must be modified to include the expanded ``Steps``.
785
-
786
- Parameters
787
- ----------
788
- step_config
789
- The high-level configuration of this ``TemplatedStep``.
790
-
791
- Returns
792
- -------
793
- The expanded sub-configuration of this ``TemplatedStep`` based on the
794
- :attr:`Step.config_key` and expanded to include all looped or parallelized
795
- sub-``Steps``).
796
- """
797
- if self.config_key in step_config:
798
- expanded_step_config = LayeredConfigTree()
799
- for i, sub_config in enumerate(step_config[self.config_key]):
800
- expanded_step_config.update(
801
- {f"{self.name}_{self.node_prefix}_{i+1}": sub_config}
802
- )
803
- return expanded_step_config
804
- return step_config
805
-
806
841
  def set_configuration_state(
807
842
  self,
808
- parent_config: LayeredConfigTree,
843
+ step_config: LayeredConfigTree,
809
844
  combined_implementations: LayeredConfigTree,
810
845
  input_data_config: LayeredConfigTree,
811
846
  ):
812
- """Sets the configuration state and updates the :class:`SlotMappings<easylink.graph_components.SlotMapping>`.
847
+ """Sets the configuration state to 'non-leaf'.
848
+
849
+ In addition to setting the configuration state, this also updates the
850
+ :class:`~easylink.graph_components.StepGraph` and
851
+ :class:`SlotMappings<easylink.graph_components.SlotMapping>`.
813
852
 
814
853
  Parameters
815
854
  ----------
816
- parent_config
817
- The configuration of the parent ``Step``.
855
+ step_config
856
+ The internal configuration of this ``Step``, i.e. it should not include
857
+ the ``Step's`` name.
818
858
  combined_implementations
819
859
  The configuration for any implementations to be combined.
820
860
  input_data_config
@@ -828,7 +868,6 @@ class TemplatedStep(Step, ABC):
828
868
  :class:`~easylink.implementation.Implementation`, i.e. the one with a
829
869
  :class:`LeafConfigurationState`.
830
870
  """
831
- step_config = parent_config[self.name]
832
871
  if self.config_key not in step_config:
833
872
  # Special handle the step_graph update
834
873
  self.step_graph = StepGraph()
@@ -857,6 +896,36 @@ class TemplatedStep(Step, ABC):
857
896
  self, expanded_config, combined_implementations, input_data_config
858
897
  )
859
898
 
899
+ ##################
900
+ # Helper Methods #
901
+ ##################
902
+
903
+ def _get_config(self, step_config: LayeredConfigTree) -> LayeredConfigTree:
904
+ """Convenience method to get the ``TemplatedStep's`` configuration.
905
+
906
+ ``TemplatedSteps`` may include multiplicity. In such cases, their configurations
907
+ must be modified to include the expanded ``Steps``.
908
+
909
+ Parameters
910
+ ----------
911
+ step_config
912
+ The high-level configuration of this ``TemplatedStep``.
913
+
914
+ Returns
915
+ -------
916
+ The expanded sub-configuration of this ``TemplatedStep`` based on the
917
+ :attr:`Step.config_key` and expanded to include all looped or parallelized
918
+ sub-``Steps``).
919
+ """
920
+ if self.config_key in step_config:
921
+ expanded_step_config = LayeredConfigTree()
922
+ for i, sub_config in enumerate(step_config[self.config_key]):
923
+ expanded_step_config.update(
924
+ {f"{self.name}_{self.node_prefix}_{i+1}": sub_config}
925
+ )
926
+ return expanded_step_config
927
+ return step_config
928
+
860
929
  def _duplicate_template_step(self) -> Step:
861
930
  """Makes a duplicate of the template ``Step``.
862
931
 
@@ -1069,17 +1138,25 @@ class EmbarrassinglyParallelStep(Step):
1069
1138
 
1070
1139
  An ``EmbarrassinglyParallelStep`` is different than a :class:`ParallelStep`
1071
1140
  in that it is not configured by the user to be run in parallel - it completely
1072
- happens on the back end for performance reasons. As such, note that it inherits
1073
- from :class:`Step` instead of :class:`TemplatedStep`.
1141
+ happens on the back end for performance reasons.
1142
+
1143
+ See :class:`Step` for inherited attributes.
1144
+
1145
+ Parameters
1146
+ ----------
1147
+ step
1148
+ The ``Step`` to be run in an embarrassingly parallel manner. To run multiple
1149
+ steps in parallel, use a :class:`HierarchicalStep`.
1150
+
1074
1151
  """
1075
1152
 
1076
1153
  def __init__(
1077
1154
  self,
1078
- step_name: str,
1079
- input_slots: Iterable[InputSlot],
1080
- output_slots: Iterable[OutputSlot],
1155
+ step: Step,
1081
1156
  ) -> None:
1082
- super().__init__(step_name, input_slots=input_slots, output_slots=output_slots)
1157
+ super().__init__(
1158
+ step.step_name, step.name, step.input_slots.values(), step.output_slots.values()
1159
+ )
1083
1160
  self._validate()
1084
1161
 
1085
1162
  def _validate(self) -> None:
@@ -1124,10 +1201,7 @@ class EmbarrassinglyParallelStep(Step):
1124
1201
 
1125
1202
 
1126
1203
  class ChoiceStep(Step):
1127
- """A type of :class:`Step` that allows for choosing between multiple paths.
1128
-
1129
- A ``ChoiceStep`` allows a user to select a single path from a set of possible
1130
- paths.
1204
+ """A type of :class:`Step` that allows for choosing from a set of options.
1131
1205
 
1132
1206
  See :class:`Step` for inherited attributes.
1133
1207
 
@@ -1141,7 +1215,7 @@ class ChoiceStep(Step):
1141
1215
  All required :class:`OutputSlots<easylink.graph_components.OutputSlot>`.
1142
1216
  choices
1143
1217
  A dictionary of choices, where the keys are the names/types of choices and
1144
- the values are dictionaries containing that type's nodes, edges, and
1218
+ the values are dictionaries containing that type's ``Step`` and related
1145
1219
  :class:`SlotMappings<easylink.graph_components.SlotMapping>`.
1146
1220
 
1147
1221
  Notes
@@ -1150,6 +1224,13 @@ class ChoiceStep(Step):
1150
1224
  :attr:`Step.config_key` in the pipeline specification file. Instead, the pipeline
1151
1225
  configuration must contain a 'type' key that specifies which option to choose.
1152
1226
 
1227
+ The :attr:`choices` dictionary must contain the choice type names as the outer
1228
+ keys. The values of each of these types is then another dictionary containing
1229
+ 'step', 'input_slot_mappings', and 'output_slot_mappings' keys with their
1230
+ corresponding values.
1231
+
1232
+ Each choice type must specify a *single* ``Step`` and its associated ``SlotMappings``.
1233
+ Any choice paths that require multiple sub-steps should specify a :class:`HierarchicalStep`.
1153
1234
  """
1154
1235
 
1155
1236
  def __init__(
@@ -1157,9 +1238,7 @@ class ChoiceStep(Step):
1157
1238
  step_name: str,
1158
1239
  input_slots: Iterable[InputSlot],
1159
1240
  output_slots: Iterable[OutputSlot],
1160
- choices: dict[
1161
- str, dict[str, list[Step | EdgeParams | InputSlotMapping | OutputSlotMapping]]
1162
- ],
1241
+ choices: dict[str, dict[str, Step | SlotMapping]],
1163
1242
  ) -> None:
1164
1243
  super().__init__(
1165
1244
  step_name,
@@ -1182,7 +1261,8 @@ class ChoiceStep(Step):
1182
1261
  Parameters
1183
1262
  ----------
1184
1263
  step_config
1185
- The configuration of this ``ChoiceStep``.
1264
+ The internal configuration of this ``Step``, i.e. it should not include
1265
+ the ``Step's`` name.
1186
1266
  combined_implementations
1187
1267
  The configuration for any implementations to be combined.
1188
1268
  input_data_config
@@ -1195,8 +1275,6 @@ class ChoiceStep(Step):
1195
1275
 
1196
1276
  Notes
1197
1277
  -----
1198
- A ``ChoiceStep`` by definition must be set with a :class:`NonLeafConfigurationState`.
1199
-
1200
1278
  If the ``Step`` does not validate (i.e. errors are found and the returned
1201
1279
  dictionary is non-empty), the tool will exit and the pipeline will not run.
1202
1280
 
@@ -1205,16 +1283,9 @@ class ChoiceStep(Step):
1205
1283
  all issues in one pass. In these cases, new errors may be found after the
1206
1284
  initial ones are handled.
1207
1285
 
1208
- We update the :class:`easylink.graph_components.StepGraph` and ``SlotMappings``
1209
- in :meth:`validate_step` (as opposed to in :meth:`set_configuration_state`
1210
- as is done in :class:`TemplatedStep`) because :meth:`validate_step` is called
1211
- prior to :meth:`set_configuration_state`, but the validations itself actually
1212
- requires the updated ``StepGraph`` and ``SlotMappings``.
1213
-
1214
1286
  We do not attempt to validate the subgraph here if the 'type' key is unable
1215
1287
  to be validated.
1216
1288
  """
1217
-
1218
1289
  chosen_type = step_config.get("type")
1219
1290
  # Handle problems with the 'type' key
1220
1291
  if not chosen_type:
@@ -1222,104 +1293,64 @@ class ChoiceStep(Step):
1222
1293
  if chosen_type not in self.choices:
1223
1294
  return {
1224
1295
  f"step {self.name}": [
1225
- f"'{step_config['type']}' is not a supported 'type'. Valid choices are: {list(self.choices)}."
1296
+ f"'{step_config.type}' is not a supported 'type'. Valid choices are: {list(self.choices)}."
1226
1297
  ]
1227
1298
  }
1228
- # Handle type-subgraph inconsistencies
1229
- subgraph = self.choices[chosen_type]
1299
+
1300
+ chosen_step = self.choices[chosen_type]["step"]
1230
1301
  chosen_step_config = LayeredConfigTree(
1231
1302
  {key: value for key, value in step_config.items() if key != "type"}
1232
1303
  )
1233
- allowable_steps = [node.name for node in subgraph["nodes"]]
1234
- if set(allowable_steps) != set(chosen_step_config):
1304
+ if chosen_step.name not in chosen_step_config:
1235
1305
  return {
1236
1306
  f"step {self.name}": [
1237
- f"Invalid configuration for '{chosen_type}' type. Valid steps are {allowable_steps}."
1307
+ f"'{chosen_step.name}' is not configured. Confirm you have specified "
1308
+ f"the correct steps for the '{chosen_type}' type."
1238
1309
  ]
1239
1310
  }
1240
-
1241
- # HACK: Update the step graph and mappings here because we need them for validation
1242
- self.step_graph = self._update_step_graph(subgraph)
1243
- self.slot_mappings = self._update_slot_mappings(subgraph)
1244
1311
  # NOTE: A ChoiceStep is by definition non-leaf step
1245
- return self._validate_nonleaf(
1246
- chosen_step_config, combined_implementations, input_data_config
1312
+ return chosen_step.validate_step(
1313
+ chosen_step_config[chosen_step.name], combined_implementations, input_data_config
1247
1314
  )
1248
1315
 
1249
1316
  def set_configuration_state(
1250
1317
  self,
1251
- parent_config: LayeredConfigTree,
1318
+ step_config: LayeredConfigTree,
1252
1319
  combined_implementations: LayeredConfigTree,
1253
1320
  input_data_config: LayeredConfigTree,
1254
1321
  ):
1255
- """Sets the configuration state for a ``ChoiceStep``.
1322
+ """Sets the configuration state to 'non-leaf'.
1323
+
1324
+ In addition to setting the configuration state, this also updates the
1325
+ :class:`~easylink.graph_components.StepGraph` and
1326
+ :class:`SlotMappings<easylink.graph_components.SlotMapping>`.
1256
1327
 
1257
1328
  Parameters
1258
1329
  ----------
1259
- parent_config
1260
- The configuration of the parent ``Step``.
1330
+ step_config
1331
+ The internal configuration of this ``Step``, i.e. it should not include
1332
+ the ``Step's`` name.
1261
1333
  combined_implementations
1262
1334
  The configuration for any implementations to be combined.
1263
1335
  input_data_config
1264
1336
  The input data configuration for the entire pipeline.
1265
-
1266
- Notes
1267
- -----
1268
- We update the :class:`easylink.graph_components.StepGraph` and ``SlotMappings``
1269
- in :meth:`validate_step` (as opposed to in :meth:`set_configuration_state`
1270
- as is done in :class:`TemplatedStep`) because :meth:`validate_step` is called
1271
- prior to :meth:`set_configuration_state`, but the validations itself actually
1272
- requires the updated ``StepGraph`` and ``SlotMappings``.
1273
1337
  """
1338
+ choice = self.choices[step_config["type"]]
1339
+ self.step_graph = StepGraph()
1340
+ self.step_graph.add_node_from_step(choice["step"])
1341
+ self.slot_mappings = {
1342
+ "input": choice["input_slot_mappings"],
1343
+ "output": choice["output_slot_mappings"],
1344
+ }
1274
1345
 
1275
- chosen_parent_config = LayeredConfigTree(
1276
- {key: value for key, value in parent_config[self.name].items() if key != "type"}
1346
+ chosen_step_config = LayeredConfigTree(
1347
+ {key: value for key, value in step_config.items() if key != "type"}
1277
1348
  )
1278
- # ChoiceSteps by definition cannot be in a LeafConfigurationState.
1349
+ # ChoiceSteps by definition are in a NonLeafConfigurationState
1279
1350
  self._configuration_state = NonLeafConfigurationState(
1280
- self, chosen_parent_config, combined_implementations, input_data_config
1351
+ self, chosen_step_config, combined_implementations, input_data_config
1281
1352
  )
1282
1353
 
1283
- @staticmethod
1284
- def _update_step_graph(subgraph: dict[str, Any]) -> StepGraph:
1285
- """Updates the :class:`~easylink.graph_components.StepGraph` with the choice.
1286
-
1287
- Parameters
1288
- ----------
1289
- subgraph
1290
- Subgraph parameters (nodes, edges, and slot mappings) for the chosen type.
1291
-
1292
- Returns
1293
- -------
1294
- The updated ``StepGraph`` for the chosen type.
1295
- """
1296
- nodes = subgraph["nodes"]
1297
- edges = subgraph["edges"]
1298
-
1299
- graph = StepGraph()
1300
- for node in nodes:
1301
- graph.add_node_from_step(node)
1302
- for edge in edges:
1303
- graph.add_edge_from_params(edge)
1304
- return graph
1305
-
1306
- @staticmethod
1307
- def _update_slot_mappings(subgraph: dict[str, Any]) -> dict[str, list[SlotMapping]]:
1308
- """Updates the :class:`SlotMappings<easylink.graph_components.SlotMapping>` to the choice type.
1309
-
1310
- Parameters
1311
- ----------
1312
- sub_graph
1313
- Subgraph parameters (nodes, edges, and slot mappings) for the chosen type.
1314
-
1315
- Returns
1316
- -------
1317
- Updated ``SlotMappings`` that match the choice type.
1318
- """
1319
- input_mappings = subgraph["input_slot_mappings"]
1320
- output_mappings = subgraph["output_slot_mappings"]
1321
- return {"input": input_mappings, "output": output_mappings}
1322
-
1323
1354
 
1324
1355
  class ConfigurationState(ABC):
1325
1356
  """A given :class:`Step's<Step>` configuration state.
@@ -1334,8 +1365,9 @@ class ConfigurationState(ABC):
1334
1365
  ----------
1335
1366
  step
1336
1367
  The ``Step`` this ``ConfigurationState`` is tied to.
1337
- pipeline_config
1338
- The relevant configuration for the ``Step`` we are setting the state for.
1368
+ step_config
1369
+ The internal configuration of this ``Step`` we are setting the state
1370
+ for; it should not include the ``Step's`` name.
1339
1371
  combined_implementations
1340
1372
  The configuration for any implementations to be combined.
1341
1373
  input_data_config
@@ -1346,14 +1378,15 @@ class ConfigurationState(ABC):
1346
1378
  def __init__(
1347
1379
  self,
1348
1380
  step: Step,
1349
- pipeline_config: LayeredConfigTree,
1381
+ step_config: LayeredConfigTree,
1350
1382
  combined_implementations: LayeredConfigTree,
1351
1383
  input_data_config: LayeredConfigTree,
1352
1384
  ):
1353
1385
  self._step = step
1354
1386
  """The ``Step`` this ``ConfigurationState`` is tied to."""
1355
- self.pipeline_config = pipeline_config
1356
- """The relevant configuration for the ``Step`` we are setting the state for."""
1387
+ self.step_config = step_config
1388
+ """The internal configuration of this ``Step`` we are setting the state
1389
+ for; it should not include the ``Step's`` name."""
1357
1390
  self.combined_implementations = combined_implementations
1358
1391
  """The relevant configuration if the ``Step's`` ``Implementation``
1359
1392
  has been requested to be combined with that of a different ``Step``."""
@@ -1394,15 +1427,15 @@ class LeafConfigurationState(ConfigurationState):
1394
1427
  @property
1395
1428
  def is_combined(self) -> bool:
1396
1429
  """Whether or not this ``Step`` is combined with another ``Step``."""
1397
- return True if COMBINED_IMPLEMENTATION_KEY in self.pipeline_config else False
1430
+ return COMBINED_IMPLEMENTATION_KEY in self.step_config
1398
1431
 
1399
1432
  @property
1400
1433
  def implementation_config(self) -> LayeredConfigTree:
1401
1434
  """The ``Step's`` specific ``Implementation`` configuration."""
1402
1435
  return (
1403
- self.combined_implementations[self.pipeline_config[COMBINED_IMPLEMENTATION_KEY]]
1436
+ self.combined_implementations[self.step_config[COMBINED_IMPLEMENTATION_KEY]]
1404
1437
  if self.is_combined
1405
- else self.pipeline_config["implementation"]
1438
+ else self.step_config.implementation
1406
1439
  )
1407
1440
 
1408
1441
  def get_implementation_graph(self) -> ImplementationGraph:
@@ -1416,31 +1449,30 @@ class LeafConfigurationState(ConfigurationState):
1416
1449
  -------
1417
1450
  The ``ImplementationGraph`` related to this ``Step``.
1418
1451
  """
1419
-
1452
+ step = self._step
1420
1453
  implementation_graph = ImplementationGraph()
1421
- implementation_node_name = self._step.implementation_node_name
1422
1454
  if self.is_combined:
1423
- if isinstance(self._step, EmbarrassinglyParallelStep):
1455
+ if isinstance(step, EmbarrassinglyParallelStep):
1424
1456
  raise NotImplementedError(
1425
1457
  "Combining implementations with embarrassingly parallel steps "
1426
1458
  "is not yet supported."
1427
1459
  )
1428
1460
  implementation = PartialImplementation(
1429
- combined_name=self.pipeline_config[COMBINED_IMPLEMENTATION_KEY],
1430
- schema_step=self._step.step_name,
1431
- input_slots=self._step.input_slots.values(),
1432
- output_slots=self._step.output_slots.values(),
1461
+ combined_name=self.step_config[COMBINED_IMPLEMENTATION_KEY],
1462
+ schema_step=step.step_name,
1463
+ input_slots=step.input_slots.values(),
1464
+ output_slots=step.output_slots.values(),
1433
1465
  )
1434
1466
  else:
1435
1467
  implementation = Implementation(
1436
- schema_steps=[self._step.step_name],
1468
+ schema_steps=[step.step_name],
1437
1469
  implementation_config=self.implementation_config,
1438
- input_slots=self._step.input_slots.values(),
1439
- output_slots=self._step.output_slots.values(),
1440
- is_embarrassingly_parallel=isinstance(self._step, EmbarrassinglyParallelStep),
1470
+ input_slots=step.input_slots.values(),
1471
+ output_slots=step.output_slots.values(),
1472
+ is_embarrassingly_parallel=isinstance(step, EmbarrassinglyParallelStep),
1441
1473
  )
1442
1474
  implementation_graph.add_node_from_implementation(
1443
- implementation_node_name,
1475
+ step.implementation_node_name,
1444
1476
  implementation=implementation,
1445
1477
  )
1446
1478
  return implementation_graph
@@ -1481,10 +1513,10 @@ class LeafConfigurationState(ConfigurationState):
1481
1513
  for mapping in mappings:
1482
1514
  # FIXME [MIC-5771]: Fix ParallelSteps
1483
1515
  if (
1484
- "input_data_file" in self.pipeline_config
1516
+ "input_data_file" in self.step_config
1485
1517
  and edge.source_node == "pipeline_graph_input_data"
1486
1518
  ):
1487
- edge.output_slot = self.pipeline_config["input_data_file"]
1519
+ edge.output_slot = self.step_config["input_data_file"]
1488
1520
  imp_edge = mapping.remap_edge(edge)
1489
1521
  implementation_edges.append(imp_edge)
1490
1522
  else:
@@ -1506,8 +1538,10 @@ class NonLeafConfigurationState(ConfigurationState):
1506
1538
  ----------
1507
1539
  step
1508
1540
  The ``Step`` this ``ConfigurationState`` is tied to.
1509
- pipeline_config
1510
- The relevant configuration for the ``Step`` we are setting the state for.
1541
+ step_config
1542
+ The internal configuration of this ``Step`` we are setting the state
1543
+ for; it should not include the ``Step's`` name (though it must include
1544
+ the sub-step names).
1511
1545
  combined_implementations
1512
1546
  The configuration for any implementations to be combined.
1513
1547
  input_data_config
@@ -1538,16 +1572,17 @@ class NonLeafConfigurationState(ConfigurationState):
1538
1572
  def __init__(
1539
1573
  self,
1540
1574
  step: Step,
1541
- pipeline_config: LayeredConfigTree,
1575
+ step_config: LayeredConfigTree,
1542
1576
  combined_implementations: LayeredConfigTree,
1543
1577
  input_data_config: LayeredConfigTree,
1544
1578
  ):
1545
- super().__init__(step, pipeline_config, combined_implementations, input_data_config)
1579
+ super().__init__(step, step_config, combined_implementations, input_data_config)
1546
1580
  if not step.step_graph:
1547
1581
  raise ValueError(
1548
1582
  "NonLeafConfigurationState requires a subgraph upon which to operate, "
1549
1583
  f"but Step {step.name} has no step graph."
1550
1584
  )
1585
+ self._nodes = step.step_graph.nodes
1551
1586
  self._configure_subgraph_steps()
1552
1587
 
1553
1588
  def get_implementation_graph(self) -> ImplementationGraph:
@@ -1578,8 +1613,8 @@ class NonLeafConfigurationState(ConfigurationState):
1578
1613
 
1579
1614
  def add_nodes(self, implementation_graph: ImplementationGraph) -> None:
1580
1615
  """Adds nodes for each ``Step`` to the ``ImplementationGraph``."""
1581
- for node in self._step.step_graph.nodes:
1582
- step = self._step.step_graph.nodes[node]["step"]
1616
+ for node in self._nodes:
1617
+ step = self._nodes[node]["step"]
1583
1618
  implementation_graph.update(step.get_implementation_graph())
1584
1619
 
1585
1620
  def add_edges(self, implementation_graph: ImplementationGraph) -> None:
@@ -1587,8 +1622,8 @@ class NonLeafConfigurationState(ConfigurationState):
1587
1622
  for source, target, edge_attrs in self._step.step_graph.edges(data=True):
1588
1623
  all_edges = []
1589
1624
  edge = EdgeParams.from_graph_edge(source, target, edge_attrs)
1590
- parent_source_step = self._step.step_graph.nodes[source]["step"]
1591
- parent_target_step = self._step.step_graph.nodes[target]["step"]
1625
+ parent_source_step = self._nodes[source]["step"]
1626
+ parent_target_step = self._nodes[target]["step"]
1592
1627
 
1593
1628
  source_edges = parent_source_step.get_implementation_edges(edge)
1594
1629
  for source_edge in source_edges:
@@ -1624,7 +1659,7 @@ class NonLeafConfigurationState(ConfigurationState):
1624
1659
  ]
1625
1660
  for mapping in mappings:
1626
1661
  new_edge = mapping.remap_edge(edge)
1627
- new_step = self._step.step_graph.nodes[mapping.child_node]["step"]
1662
+ new_step = self._nodes[mapping.child_node]["step"]
1628
1663
  imp_edges = new_step.get_implementation_edges(new_edge)
1629
1664
  implementation_edges.extend(imp_edges)
1630
1665
  elif edge.target_node == self._step.name:
@@ -1635,7 +1670,7 @@ class NonLeafConfigurationState(ConfigurationState):
1635
1670
  ]
1636
1671
  for mapping in mappings:
1637
1672
  new_edge = mapping.remap_edge(edge)
1638
- new_step = self._step.step_graph.nodes[mapping.child_node]["step"]
1673
+ new_step = self._nodes[mapping.child_node]["step"]
1639
1674
  imp_edges = new_step.get_implementation_edges(new_edge)
1640
1675
  implementation_edges.extend(imp_edges)
1641
1676
  else:
@@ -1650,9 +1685,12 @@ class NonLeafConfigurationState(ConfigurationState):
1650
1685
  This method recursively traverses the ``StepGraph`` and sets the configuration
1651
1686
  state for each ``Step`` until reaching all leaf nodes.
1652
1687
  """
1653
- nodes = self._step.step_graph.nodes
1654
- for node in nodes:
1655
- step = nodes[node]["step"]
1688
+ for node in self._nodes:
1689
+ step = self._nodes[node]["step"]
1690
+ # IOStep names never appear in configuration
1691
+ step_config = (
1692
+ self.step_config if isinstance(step, IOStep) else self.step_config[step.name]
1693
+ )
1656
1694
  step.set_configuration_state(
1657
- self.pipeline_config, self.combined_implementations, self.input_data_config
1695
+ step_config, self.combined_implementations, self.input_data_config
1658
1696
  )