easylink 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- easylink/_version.py +1 -1
- easylink/graph_components.py +7 -3
- easylink/pipeline_schema.py +7 -7
- easylink/pipeline_schema_constants/__init__.py +11 -0
- easylink/pipeline_schema_constants/development.py +143 -135
- easylink/pipeline_schema_constants/testing.py +7 -3
- easylink/step.py +391 -353
- easylink/utilities/__init__.py +3 -2
- easylink/utilities/aggregator_utils.py +1 -0
- easylink/utilities/data_utils.py +98 -5
- easylink/utilities/general_utils.py +48 -10
- easylink/utilities/paths.py +9 -3
- easylink/utilities/splitter_utils.py +1 -0
- easylink/utilities/validation_utils.py +29 -0
- {easylink-0.1.7.dist-info → easylink-0.1.9.dist-info}/METADATA +1 -1
- {easylink-0.1.7.dist-info → easylink-0.1.9.dist-info}/RECORD +19 -19
- {easylink-0.1.7.dist-info → easylink-0.1.9.dist-info}/WHEEL +1 -1
- {easylink-0.1.7.dist-info → easylink-0.1.9.dist-info}/entry_points.txt +0 -0
- {easylink-0.1.7.dist-info → easylink-0.1.9.dist-info}/top_level.txt +0 -0
easylink/step.py
CHANGED
@@ -54,21 +54,18 @@ class Step:
|
|
54
54
|
Parameters
|
55
55
|
----------
|
56
56
|
step_name
|
57
|
-
The name of the pipeline step in the ``PipelineSchema``.
|
57
|
+
The name of the pipeline step in the ``PipelineSchema``. It must also match
|
58
|
+
the key in the implementation metadata file to be used to run this ``Step``.
|
58
59
|
name
|
59
|
-
The name of this
|
60
|
-
due to the need for disambiguation
|
61
|
-
|
62
|
-
|
63
|
-
("step_1_loop_1", etc).
|
60
|
+
The name of this ``Step's`` node in its :class:`easylink.graph_components.StepGraph`.
|
61
|
+
This can be different from the ``step_name`` due to the need for disambiguation
|
62
|
+
during the process of flattening the ``Stepgraph``, e.g. unrolling loops, etc.
|
63
|
+
For example, if step 1 is looped multiple times, each node would have a
|
64
|
+
``step_name`` of, perhaps, "step_1" but unique ``names`` ("step_1_loop_1", etc).
|
64
65
|
input_slots
|
65
66
|
All required :class:`InputSlots<easylink.graph_components.InputSlot>`.
|
66
67
|
output_slots
|
67
68
|
All required :class:`OutputSlots<easylink.graph_components.OutputSlot>`.
|
68
|
-
nodes
|
69
|
-
All sub-nodes (i.e. sub-``Steps``) of this particular ``Step`` instance.
|
70
|
-
edges
|
71
|
-
The :class:`~easylink.graph_components.EdgeParams` of this ``Step``.
|
72
69
|
input_slot_mappings
|
73
70
|
The :class:`InputSlotMapping<easylink.graph_components.InputSlotMapping>` of this ``Step``.
|
74
71
|
output_slot_mappings
|
@@ -89,31 +86,22 @@ class Step:
|
|
89
86
|
name: str | None = None,
|
90
87
|
input_slots: Iterable[InputSlot] = (),
|
91
88
|
output_slots: Iterable[OutputSlot] = (),
|
92
|
-
nodes: Iterable[Step] = (),
|
93
|
-
edges: Iterable[EdgeParams] = (),
|
94
89
|
input_slot_mappings: Iterable[InputSlotMapping] = (),
|
95
90
|
output_slot_mappings: Iterable[OutputSlotMapping] = (),
|
96
91
|
) -> None:
|
97
92
|
self.step_name = step_name
|
98
|
-
"""The name of the
|
93
|
+
"""The name of the pipeline step in the ``PipelineSchema``. It must also match
|
94
|
+
the key in the implementation metadata file to be used to run this ``Step``."""
|
99
95
|
self.name = name if name else step_name
|
100
|
-
"""The name of ``Step's`` node in its :class
|
101
|
-
This
|
102
|
-
|
96
|
+
"""The name of this ``Step's`` node in its :class:`easylink.graph_components.StepGraph`.
|
97
|
+
This can be different from the ``step_name`` due to the need for disambiguation
|
98
|
+
during the process of flattening the ``Stepgraph``, e.g. unrolling loops, etc.
|
99
|
+
For example, if step 1 is looped multiple times, each node would have a
|
100
|
+
``step_name`` of, perhaps, "step_1" but unique ``names`` ("step_1_loop_1", etc)."""
|
103
101
|
self.input_slots = {slot.name: slot for slot in input_slots}
|
104
102
|
"""A mapping of ``InputSlot`` names to their instances."""
|
105
103
|
self.output_slots = {slot.name: slot for slot in output_slots}
|
106
104
|
"""A mapping of ``OutputSlot`` names to their instances."""
|
107
|
-
self.nodes = nodes
|
108
|
-
"""All sub-nodes (i.e. sub-``Steps``) of this particular ``Step`` instance."""
|
109
|
-
for node in self.nodes:
|
110
|
-
node.set_parent_step(self)
|
111
|
-
self.edges = edges
|
112
|
-
"""The :class:`~easylink.graph_components.EdgeParams` of this ``Step``."""
|
113
|
-
self.step_graph = self._get_step_graph(nodes, edges)
|
114
|
-
"""The :class:`~easylink.graph_components.StepGraph` of this ``Step``, i.e.
|
115
|
-
the directed acyclic graph (DAG) of sub-nodes and their edges that make
|
116
|
-
up this ``Step`` instance."""
|
117
105
|
self.slot_mappings = {
|
118
106
|
"input": list(input_slot_mappings),
|
119
107
|
"output": list(output_slot_mappings),
|
@@ -164,7 +152,7 @@ class Step:
|
|
164
152
|
"""
|
165
153
|
step = self
|
166
154
|
implementation_name = (
|
167
|
-
self.configuration_state.
|
155
|
+
self.configuration_state.step_config[COMBINED_IMPLEMENTATION_KEY]
|
168
156
|
if self.configuration_state.is_combined
|
169
157
|
else self.configuration_state.implementation_config.name
|
170
158
|
)
|
@@ -203,7 +191,8 @@ class Step:
|
|
203
191
|
Parameters
|
204
192
|
----------
|
205
193
|
step_config
|
206
|
-
The configuration of this ``Step
|
194
|
+
The internal configuration of this ``Step``, i.e. it should not include
|
195
|
+
the ``Step's`` name.
|
207
196
|
combined_implementations
|
208
197
|
The configuration for any implementations to be combined.
|
209
198
|
input_data_config
|
@@ -216,9 +205,6 @@ class Step:
|
|
216
205
|
|
217
206
|
Notes
|
218
207
|
-----
|
219
|
-
A ``Step`` can be in either a "leaf" or a "non-leaf" configuration state
|
220
|
-
and the validation process is different for each.
|
221
|
-
|
222
208
|
If the ``Step`` does not validate (i.e. errors are found and the returned
|
223
209
|
dictionary is non-empty), the tool will exit and the pipeline will not run.
|
224
210
|
|
@@ -227,14 +213,42 @@ class Step:
|
|
227
213
|
all issues in one pass. In these cases, new errors may be found after the
|
228
214
|
initial ones are handled.
|
229
215
|
"""
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
216
|
+
errors = {}
|
217
|
+
metadata = load_yaml(paths.IMPLEMENTATION_METADATA)
|
218
|
+
error_key = f"step {self.name}"
|
219
|
+
if (
|
220
|
+
"implementation" not in step_config
|
221
|
+
and COMBINED_IMPLEMENTATION_KEY not in step_config
|
222
|
+
):
|
223
|
+
errors[error_key] = [
|
224
|
+
"The step configuration does not contain an 'implementation' key "
|
225
|
+
"or a reference to a combined implementation."
|
226
|
+
]
|
227
|
+
elif (
|
228
|
+
COMBINED_IMPLEMENTATION_KEY in step_config
|
229
|
+
and not step_config[COMBINED_IMPLEMENTATION_KEY] in combined_implementations
|
230
|
+
):
|
231
|
+
errors[error_key] = [
|
232
|
+
"The step refers to a combined implementation but "
|
233
|
+
f"{step_config[COMBINED_IMPLEMENTATION_KEY]} is not a valid combined "
|
234
|
+
"implementation."
|
235
|
+
]
|
236
236
|
else:
|
237
|
-
|
237
|
+
implementation_config = (
|
238
|
+
step_config["implementation"]
|
239
|
+
if "implementation" in step_config
|
240
|
+
else combined_implementations[step_config[COMBINED_IMPLEMENTATION_KEY]]
|
241
|
+
)
|
242
|
+
if not "name" in implementation_config:
|
243
|
+
errors[error_key] = [
|
244
|
+
"The implementation configuration does not contain a 'name' key."
|
245
|
+
]
|
246
|
+
elif not implementation_config["name"] in metadata:
|
247
|
+
errors[error_key] = [
|
248
|
+
f"Implementation '{implementation_config['name']}' is not supported. "
|
249
|
+
f"Supported implementations are: {list(metadata.keys())}."
|
250
|
+
]
|
251
|
+
return errors
|
238
252
|
|
239
253
|
def get_implementation_graph(self) -> ImplementationGraph:
|
240
254
|
"""Gets this ``Step's`` :class:`~easylink.graph_components.ImplementationGraph`.
|
@@ -276,42 +290,25 @@ class Step:
|
|
276
290
|
|
277
291
|
def set_configuration_state(
|
278
292
|
self,
|
279
|
-
|
293
|
+
step_config: LayeredConfigTree,
|
280
294
|
combined_implementations: LayeredConfigTree,
|
281
295
|
input_data_config: LayeredConfigTree,
|
282
296
|
) -> None:
|
283
|
-
"""Sets the configuration state
|
284
|
-
|
285
|
-
The so-called 'configuration state' for a given ``Step`` is backed up by
|
286
|
-
a :class:`ConfigurationState` class and is assigned to its :attr:`_configuration_state`
|
287
|
-
attribute. There are two possible ``ConfigurationStates``:
|
288
|
-
:class:`LeafConfigurationState` and :class:`NonLeafConfigurationState`.
|
289
|
-
|
290
|
-
This method sets the configuration state of this ``Step`` based on whether
|
291
|
-
or not a :attr:`config_key` is set *and exists is the ``Step's`` configuration*
|
292
|
-
(i.e. its portion of the user-suppled pipeline specification
|
293
|
-
file); any required deviation from this behavior requires special
|
294
|
-
handling.
|
297
|
+
"""Sets the configuration state to 'leaf'.
|
295
298
|
|
296
299
|
Parameters
|
297
300
|
----------
|
298
|
-
|
299
|
-
The configuration of
|
301
|
+
step_config
|
302
|
+
The internal configuration of this ``Step``, i.e. it should not include
|
303
|
+
the ``Step's`` name.
|
300
304
|
combined_implementations
|
301
305
|
The configuration for any implementations to be combined.
|
302
306
|
input_data_config
|
303
307
|
The input data configuration for the entire pipeline.
|
304
308
|
"""
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
self._configuration_state = NonLeafConfigurationState(
|
309
|
-
self, sub_config, combined_implementations, input_data_config
|
310
|
-
)
|
311
|
-
else:
|
312
|
-
self._configuration_state = LeafConfigurationState(
|
313
|
-
self, sub_config, combined_implementations, input_data_config
|
314
|
-
)
|
309
|
+
self._configuration_state = LeafConfigurationState(
|
310
|
+
self, step_config, combined_implementations, input_data_config
|
311
|
+
)
|
315
312
|
|
316
313
|
def get_implementation_slot_mappings(self) -> dict[str, list[SlotMapping]]:
|
317
314
|
"""Gets the input and output :class:`SlotMappings<easylink.graph_components.SlotMapping>`."""
|
@@ -326,113 +323,6 @@ class Step:
|
|
326
323
|
],
|
327
324
|
}
|
328
325
|
|
329
|
-
##################
|
330
|
-
# Helper methods #
|
331
|
-
##################
|
332
|
-
|
333
|
-
def _get_step_graph(self, nodes: list[Step], edges: list[EdgeParams]) -> StepGraph:
|
334
|
-
"""Create a StepGraph from the nodes and edges the step was initialized with."""
|
335
|
-
step_graph = StepGraph()
|
336
|
-
for step in nodes:
|
337
|
-
step_graph.add_node_from_step(step)
|
338
|
-
for edge in edges:
|
339
|
-
step_graph.add_edge_from_params(edge)
|
340
|
-
return step_graph
|
341
|
-
|
342
|
-
def _validate_leaf(
|
343
|
-
self,
|
344
|
-
step_config: LayeredConfigTree,
|
345
|
-
combined_implementations: LayeredConfigTree,
|
346
|
-
) -> dict[str, list[str]]:
|
347
|
-
"""Validates a leaf ``Step``."""
|
348
|
-
errors = {}
|
349
|
-
metadata = load_yaml(paths.IMPLEMENTATION_METADATA)
|
350
|
-
error_key = f"step {self.name}"
|
351
|
-
if (
|
352
|
-
"implementation" not in step_config
|
353
|
-
and COMBINED_IMPLEMENTATION_KEY not in step_config
|
354
|
-
):
|
355
|
-
errors[error_key] = [
|
356
|
-
"The step configuration does not contain an 'implementation' key or a "
|
357
|
-
"reference to a combined implementation."
|
358
|
-
]
|
359
|
-
elif (
|
360
|
-
COMBINED_IMPLEMENTATION_KEY in step_config
|
361
|
-
and not step_config[COMBINED_IMPLEMENTATION_KEY] in combined_implementations
|
362
|
-
):
|
363
|
-
errors[error_key] = [
|
364
|
-
f"The step refers to a combined implementation but {step_config[COMBINED_IMPLEMENTATION_KEY]} is not a "
|
365
|
-
f"valid combined implementation."
|
366
|
-
]
|
367
|
-
else:
|
368
|
-
implementation_config = (
|
369
|
-
step_config["implementation"]
|
370
|
-
if "implementation" in step_config
|
371
|
-
else combined_implementations[step_config[COMBINED_IMPLEMENTATION_KEY]]
|
372
|
-
)
|
373
|
-
if not "name" in implementation_config:
|
374
|
-
errors[error_key] = [
|
375
|
-
"The implementation configuration does not contain a 'name' key."
|
376
|
-
]
|
377
|
-
elif not implementation_config["name"] in metadata:
|
378
|
-
errors[error_key] = [
|
379
|
-
f"Implementation '{implementation_config['name']}' is not supported. "
|
380
|
-
f"Supported implementations are: {list(metadata.keys())}."
|
381
|
-
]
|
382
|
-
return errors
|
383
|
-
|
384
|
-
def _validate_nonleaf(
|
385
|
-
self,
|
386
|
-
step_config: LayeredConfigTree,
|
387
|
-
combined_implementations: LayeredConfigTree,
|
388
|
-
input_data_config: LayeredConfigTree,
|
389
|
-
) -> dict[str, list[str]]:
|
390
|
-
"""Validates a non-leaf ``Step``."""
|
391
|
-
errors = {}
|
392
|
-
nodes = self.step_graph.nodes
|
393
|
-
for node in nodes:
|
394
|
-
step = nodes[node]["step"]
|
395
|
-
if isinstance(step, IOStep):
|
396
|
-
continue
|
397
|
-
if step.name not in step_config:
|
398
|
-
step_errors = {f"step {step.name}": [f"The step is not configured."]}
|
399
|
-
else:
|
400
|
-
step_errors = step.validate_step(
|
401
|
-
step_config[step.name], combined_implementations, input_data_config
|
402
|
-
)
|
403
|
-
if step_errors:
|
404
|
-
errors.update(step_errors)
|
405
|
-
extra_steps = set(step_config.keys()) - set(nodes)
|
406
|
-
for extra_step in extra_steps:
|
407
|
-
errors[f"step {extra_step}"] = [f"{extra_step} is not a valid step."]
|
408
|
-
return errors
|
409
|
-
|
410
|
-
def _get_config(self, step_config: LayeredConfigTree) -> LayeredConfigTree:
|
411
|
-
"""Convenience method to get a ``Step's`` configuration.
|
412
|
-
|
413
|
-
Some types of ``Steps`` have a unique :attr:`config_key` (defined by the
|
414
|
-
user via the pipeline specification file) that is used to specify the behavior
|
415
|
-
of the ``Step`` (e.g. looping, parallel, etc). This method simply returns
|
416
|
-
the ``Step's`` sub-configuration keyed to that ``config_key`` (if it exists,
|
417
|
-
i.e. is not a basic ``Step``).
|
418
|
-
|
419
|
-
Parameters
|
420
|
-
----------
|
421
|
-
step_config
|
422
|
-
The high-level configuration of this ``Step``.
|
423
|
-
|
424
|
-
Returns
|
425
|
-
-------
|
426
|
-
The sub-configuration of this ``Step`` keyed on the ``config_key``
|
427
|
-
(if it exists).
|
428
|
-
|
429
|
-
"""
|
430
|
-
return (
|
431
|
-
step_config
|
432
|
-
if not self.config_key in step_config
|
433
|
-
else step_config[self.config_key]
|
434
|
-
)
|
435
|
-
|
436
326
|
|
437
327
|
class IOStep(Step):
|
438
328
|
"""A special case type of :class:`Step` used to represent incoming and outgoing data.
|
@@ -485,27 +375,24 @@ class IOStep(Step):
|
|
485
375
|
|
486
376
|
def set_configuration_state(
|
487
377
|
self,
|
488
|
-
|
378
|
+
step_config: LayeredConfigTree,
|
489
379
|
combined_implementations: LayeredConfigTree,
|
490
380
|
input_data_config: LayeredConfigTree,
|
491
381
|
) -> None:
|
492
|
-
"""Sets the configuration state to leaf.
|
493
|
-
|
494
|
-
An ``IOStep`` is by definition a leaf ``Step`` and so we assign that here
|
495
|
-
instead of relying on the default behavior of the parent class.
|
382
|
+
"""Sets the configuration state to 'leaf'.
|
496
383
|
|
497
384
|
Parameters
|
498
385
|
----------
|
499
|
-
|
500
|
-
The configuration of
|
501
|
-
|
386
|
+
step_config
|
387
|
+
The internal configuration of this ``Step``, i.e. it should not include
|
388
|
+
the ``Step's`` name.
|
502
389
|
combined_implementations
|
503
390
|
The configuration for any implementations to be combined.
|
504
391
|
input_data_config
|
505
392
|
The input data configuration for the entire pipeline.
|
506
393
|
"""
|
507
394
|
self._configuration_state = LeafConfigurationState(
|
508
|
-
self,
|
395
|
+
self, step_config, combined_implementations, input_data_config
|
509
396
|
)
|
510
397
|
|
511
398
|
def get_implementation_graph(self) -> ImplementationGraph:
|
@@ -548,29 +435,29 @@ class InputStep(IOStep):
|
|
548
435
|
|
549
436
|
def set_configuration_state(
|
550
437
|
self,
|
551
|
-
|
438
|
+
step_config: LayeredConfigTree,
|
552
439
|
combined_implementations: LayeredConfigTree,
|
553
440
|
input_data_config: LayeredConfigTree,
|
554
441
|
) -> None:
|
555
442
|
"""Sets the configuration state and updates the ``OutputSlots``.
|
556
443
|
|
557
|
-
In addition to setting ``InputStep`` to a leaf configuration state, this
|
444
|
+
In addition to setting ``InputStep`` to a 'leaf' configuration state, this
|
558
445
|
method also updates the ``OutputSlots`` to include all of the dataset keys
|
559
446
|
in the input data specification file. This allows for future use of
|
560
|
-
specific datasets instead of only
|
447
|
+
*specific* datasets instead of only *all* of them.
|
561
448
|
|
562
449
|
Parameters
|
563
450
|
----------
|
564
|
-
|
565
|
-
The configuration of
|
566
|
-
|
451
|
+
step_config
|
452
|
+
The internal configuration of this ``Step``, i.e. it should not include
|
453
|
+
the ``Step's`` name.
|
567
454
|
combined_implementations
|
568
455
|
The configuration for any implementations to be combined.
|
569
456
|
input_data_config
|
570
457
|
The input data configuration for the entire pipeline.
|
571
458
|
"""
|
572
459
|
super().set_configuration_state(
|
573
|
-
|
460
|
+
step_config, combined_implementations, input_data_config
|
574
461
|
)
|
575
462
|
for input_data_key in input_data_config:
|
576
463
|
self.output_slots[input_data_key] = OutputSlot(name=input_data_key)
|
@@ -601,19 +488,185 @@ class HierarchicalStep(Step):
|
|
601
488
|
|
602
489
|
See :class:`Step` for inherited attributes.
|
603
490
|
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
491
|
+
Parameters
|
492
|
+
----------
|
493
|
+
nodes
|
494
|
+
All sub-nodes (i.e. sub-``Steps``) that make up this ``HierarchicalStep``.
|
495
|
+
edges
|
496
|
+
The :class:`~easylink.graph_components.EdgeParams` of the sub-nodes.
|
497
|
+
step_graph
|
498
|
+
The :class:`~easylink.graph_components.StepGraph` i.e. the directed acyclic
|
499
|
+
graph (DAG) of sub-nodes and their edges that make up this ``HierarchicalStep``.
|
500
|
+
user_configurable
|
501
|
+
Whether or not the ``HierarchicalStep`` is user-configurable. It is a convenience
|
502
|
+
attribute to allow for back-end ``HierarchicalStep`` creation that are not
|
503
|
+
user-facing (i.e. they do not need to provide a 'substeps' configuration key).
|
609
504
|
|
610
505
|
"""
|
611
506
|
|
507
|
+
def __init__(
|
508
|
+
self,
|
509
|
+
step_name,
|
510
|
+
name=None,
|
511
|
+
input_slots=(),
|
512
|
+
output_slots=(),
|
513
|
+
nodes=(),
|
514
|
+
edges=(),
|
515
|
+
input_slot_mappings=(),
|
516
|
+
output_slot_mappings=(),
|
517
|
+
user_configurable=True,
|
518
|
+
):
|
519
|
+
super().__init__(
|
520
|
+
step_name,
|
521
|
+
name,
|
522
|
+
input_slots,
|
523
|
+
output_slots,
|
524
|
+
input_slot_mappings,
|
525
|
+
output_slot_mappings,
|
526
|
+
)
|
527
|
+
self.nodes = nodes
|
528
|
+
"""All sub-nodes (i.e. sub-``Steps``) that make up this ``HierarchicalStep``."""
|
529
|
+
for node in self.nodes:
|
530
|
+
node.set_parent_step(self)
|
531
|
+
self.edges = edges
|
532
|
+
"""The :class:`~easylink.graph_components.EdgeParams` of the sub-nodes."""
|
533
|
+
self.step_graph = self._get_step_graph(nodes, edges)
|
534
|
+
"""The :class:`~easylink.graph_components.StepGraph` i.e. the directed acyclic
|
535
|
+
graph (DAG) of sub-nodes and their edges that make up this ``HierarchicalStep``."""
|
536
|
+
self.user_configurable = user_configurable
|
537
|
+
"""Whether or not the ``HierarchicalStep`` is user-configurable. It is a convenience
|
538
|
+
attribute to allow for back-end ``HierarchicalStep`` creation that are not
|
539
|
+
user-facing (i.e. they do not need to provide a 'substeps' configuration key)."""
|
540
|
+
|
612
541
|
@property
|
613
542
|
def config_key(self):
|
614
543
|
"""The pipeline specification key required for a ``HierarchicalStep``."""
|
615
544
|
return "substeps"
|
616
545
|
|
546
|
+
def validate_step(
|
547
|
+
self,
|
548
|
+
step_config: LayeredConfigTree,
|
549
|
+
combined_implementations: LayeredConfigTree,
|
550
|
+
input_data_config: LayeredConfigTree,
|
551
|
+
) -> dict[str, list[str]]:
|
552
|
+
"""Validates the ``HierarchicalStep``.
|
553
|
+
|
554
|
+
Parameters
|
555
|
+
----------
|
556
|
+
step_config
|
557
|
+
The internal configuration of this ``Step``, i.e. it should not include
|
558
|
+
the ``Step's`` name.
|
559
|
+
combined_implementations
|
560
|
+
The configuration for any implementations to be combined.
|
561
|
+
input_data_config
|
562
|
+
The input data configuration for the entire pipeline.
|
563
|
+
|
564
|
+
Returns
|
565
|
+
-------
|
566
|
+
A dictionary of errors, where the keys are the ``HierarchicalStep``
|
567
|
+
name and the values are lists of error messages associated with the
|
568
|
+
given ``HierarchicalStep``.
|
569
|
+
|
570
|
+
Notes
|
571
|
+
-----
|
572
|
+
A ``HierarchicalStep`` can be in either a "leaf" or a "non-leaf" configuration
|
573
|
+
state and the validation process is different for each.
|
574
|
+
|
575
|
+
If the ``HierarchicalStep`` does not validate (i.e. errors are found and
|
576
|
+
the returned dictionary is non-empty), the tool will exit and the pipeline
|
577
|
+
will not run.
|
578
|
+
|
579
|
+
We attempt to batch error messages as much as possible, but there may be
|
580
|
+
times where the configuration is so ill-formed that we are unable to handle
|
581
|
+
all issues in one pass. In these cases, new errors may be found after the
|
582
|
+
initial ones are handled.
|
583
|
+
"""
|
584
|
+
if self.user_configurable:
|
585
|
+
if self.config_key in step_config:
|
586
|
+
step_config = step_config[self.config_key]
|
587
|
+
else:
|
588
|
+
# This is a leaf step
|
589
|
+
return super().validate_step(
|
590
|
+
step_config, combined_implementations, input_data_config
|
591
|
+
)
|
592
|
+
return self._validate_step_graph(
|
593
|
+
step_config, combined_implementations, input_data_config
|
594
|
+
)
|
595
|
+
|
596
|
+
def set_configuration_state(
|
597
|
+
self,
|
598
|
+
step_config: LayeredConfigTree,
|
599
|
+
combined_implementations: LayeredConfigTree,
|
600
|
+
input_data_config: LayeredConfigTree,
|
601
|
+
) -> None:
|
602
|
+
"""Sets the configuration state.
|
603
|
+
|
604
|
+
The configuration state of a ``HierarchicalStep`` depends on (1) whether
|
605
|
+
or not it is :attr:`user_configurable` and (2) whether or not the
|
606
|
+
:attr:`config_key` exists in the pipeline specification file.
|
607
|
+
|
608
|
+
Parameters
|
609
|
+
----------
|
610
|
+
step_config
|
611
|
+
The internal configuration of this ``Step``, i.e. it should not include
|
612
|
+
the ``Step's`` name.
|
613
|
+
combined_implementations
|
614
|
+
The configuration for any implementations to be combined.
|
615
|
+
input_data_config
|
616
|
+
The input data configuration for the entire pipeline.
|
617
|
+
"""
|
618
|
+
if self.user_configurable:
|
619
|
+
if self.config_key in step_config:
|
620
|
+
step_config = step_config[self.config_key]
|
621
|
+
configuration_state_type = NonLeafConfigurationState
|
622
|
+
else:
|
623
|
+
configuration_state_type = LeafConfigurationState
|
624
|
+
else:
|
625
|
+
# Substeps must be used, so we require non-leaf here
|
626
|
+
configuration_state_type = NonLeafConfigurationState
|
627
|
+
self._configuration_state = configuration_state_type(
|
628
|
+
self, step_config, combined_implementations, input_data_config
|
629
|
+
)
|
630
|
+
|
631
|
+
##################
|
632
|
+
# Helper methods #
|
633
|
+
##################
|
634
|
+
|
635
|
+
def _get_step_graph(self, nodes: list[Step], edges: list[EdgeParams]) -> StepGraph:
|
636
|
+
"""Creates a :class:`~easylink.graph_components.StepGraph` from the nodes and edges the step was initialized with."""
|
637
|
+
step_graph = StepGraph()
|
638
|
+
for step in nodes:
|
639
|
+
step_graph.add_node_from_step(step)
|
640
|
+
for edge in edges:
|
641
|
+
step_graph.add_edge_from_params(edge)
|
642
|
+
return step_graph
|
643
|
+
|
644
|
+
def _validate_step_graph(
|
645
|
+
self,
|
646
|
+
step_config: LayeredConfigTree,
|
647
|
+
combined_implementations: LayeredConfigTree,
|
648
|
+
input_data_config: LayeredConfigTree,
|
649
|
+
) -> dict[str, list[str]]:
|
650
|
+
"""Validates the nodes of a :class:`~easylink.graph_components.StepGraph`."""
|
651
|
+
errors = {}
|
652
|
+
for node in self.step_graph.nodes:
|
653
|
+
step = self.step_graph.nodes[node]["step"]
|
654
|
+
if isinstance(step, IOStep):
|
655
|
+
continue
|
656
|
+
else:
|
657
|
+
if step.name not in step_config:
|
658
|
+
step_errors = {f"step {step.name}": ["The step is not configured."]}
|
659
|
+
else:
|
660
|
+
step_errors = step.validate_step(
|
661
|
+
step_config[step.name], combined_implementations, input_data_config
|
662
|
+
)
|
663
|
+
if step_errors:
|
664
|
+
errors.update(step_errors)
|
665
|
+
extra_steps = set(step_config.keys()) - set(self.step_graph.nodes)
|
666
|
+
for extra_step in extra_steps:
|
667
|
+
errors[f"step {extra_step}"] = [f"{extra_step} is not a valid step."]
|
668
|
+
return errors
|
669
|
+
|
617
670
|
|
618
671
|
class TemplatedStep(Step, ABC):
|
619
672
|
"""A type of :class:`Step` that may contain multiplicity.
|
@@ -641,8 +694,12 @@ class TemplatedStep(Step, ABC):
|
|
641
694
|
template_step.input_slots.values(),
|
642
695
|
template_step.output_slots.values(),
|
643
696
|
)
|
697
|
+
self.step_graph = None
|
698
|
+
"""The :class:`~easylink.graph_components.StepGraph` i.e. the directed acyclic
|
699
|
+
graph (DAG) of sub-nodes and their edges that make up this ``TemplatedStep``."""
|
644
700
|
self.template_step = template_step
|
645
701
|
"""The ``Step`` to be templated."""
|
702
|
+
|
646
703
|
self.template_step.set_parent_step(self)
|
647
704
|
|
648
705
|
@property
|
@@ -716,7 +773,8 @@ class TemplatedStep(Step, ABC):
|
|
716
773
|
Parameters
|
717
774
|
----------
|
718
775
|
step_config
|
719
|
-
The configuration of this ``
|
776
|
+
The internal configuration of this ``Step``, i.e. it should not include
|
777
|
+
the ``Step's`` name.
|
720
778
|
combined_implementations
|
721
779
|
The configuration for any implementations to be combined.
|
722
780
|
input_data_config
|
@@ -730,7 +788,7 @@ class TemplatedStep(Step, ABC):
|
|
730
788
|
|
731
789
|
Notes
|
732
790
|
-----
|
733
|
-
If the ``
|
791
|
+
If the ``TemplatedStep`` does not validate (i.e. errors are found and the returned
|
734
792
|
dictionary is non-empty), the tool will exit and the pipeline will not run.
|
735
793
|
|
736
794
|
We attempt to batch error messages as much as possible, but there may be
|
@@ -739,6 +797,7 @@ class TemplatedStep(Step, ABC):
|
|
739
797
|
initial ones are handled.
|
740
798
|
"""
|
741
799
|
if not self.config_key in step_config:
|
800
|
+
# This is a leaf step
|
742
801
|
return self.template_step.validate_step(
|
743
802
|
step_config, combined_implementations, input_data_config
|
744
803
|
)
|
@@ -770,51 +829,32 @@ class TemplatedStep(Step, ABC):
|
|
770
829
|
]
|
771
830
|
parallel_errors.update(
|
772
831
|
self.template_step.validate_step(
|
773
|
-
parallel_config,
|
832
|
+
LayeredConfigTree(parallel_config),
|
833
|
+
combined_implementations,
|
834
|
+
input_data_config,
|
774
835
|
)
|
775
836
|
)
|
776
837
|
if parallel_errors:
|
777
838
|
errors[f"step {self.name}"][f"{self.node_prefix}_{i+1}"] = parallel_errors
|
778
839
|
return errors
|
779
840
|
|
780
|
-
def _get_config(self, step_config: LayeredConfigTree) -> LayeredConfigTree:
|
781
|
-
"""Convenience method to get the ``TemplatedStep's`` configuration.
|
782
|
-
|
783
|
-
``TemplatedSteps`` may include multiplicity. In such cases, their configurations
|
784
|
-
must be modified to include the expanded ``Steps``.
|
785
|
-
|
786
|
-
Parameters
|
787
|
-
----------
|
788
|
-
step_config
|
789
|
-
The high-level configuration of this ``TemplatedStep``.
|
790
|
-
|
791
|
-
Returns
|
792
|
-
-------
|
793
|
-
The expanded sub-configuration of this ``TemplatedStep`` based on the
|
794
|
-
:attr:`Step.config_key` and expanded to include all looped or parallelized
|
795
|
-
sub-``Steps``).
|
796
|
-
"""
|
797
|
-
if self.config_key in step_config:
|
798
|
-
expanded_step_config = LayeredConfigTree()
|
799
|
-
for i, sub_config in enumerate(step_config[self.config_key]):
|
800
|
-
expanded_step_config.update(
|
801
|
-
{f"{self.name}_{self.node_prefix}_{i+1}": sub_config}
|
802
|
-
)
|
803
|
-
return expanded_step_config
|
804
|
-
return step_config
|
805
|
-
|
806
841
|
def set_configuration_state(
|
807
842
|
self,
|
808
|
-
|
843
|
+
step_config: LayeredConfigTree,
|
809
844
|
combined_implementations: LayeredConfigTree,
|
810
845
|
input_data_config: LayeredConfigTree,
|
811
846
|
):
|
812
|
-
"""Sets the configuration state
|
847
|
+
"""Sets the configuration state to 'non-leaf'.
|
848
|
+
|
849
|
+
In addition to setting the configuration state, this also updates the
|
850
|
+
:class:`~easylink.graph_components.StepGraph` and
|
851
|
+
:class:`SlotMappings<easylink.graph_components.SlotMapping>`.
|
813
852
|
|
814
853
|
Parameters
|
815
854
|
----------
|
816
|
-
|
817
|
-
The configuration of
|
855
|
+
step_config
|
856
|
+
The internal configuration of this ``Step``, i.e. it should not include
|
857
|
+
the ``Step's`` name.
|
818
858
|
combined_implementations
|
819
859
|
The configuration for any implementations to be combined.
|
820
860
|
input_data_config
|
@@ -828,7 +868,6 @@ class TemplatedStep(Step, ABC):
|
|
828
868
|
:class:`~easylink.implementation.Implementation`, i.e. the one with a
|
829
869
|
:class:`LeafConfigurationState`.
|
830
870
|
"""
|
831
|
-
step_config = parent_config[self.name]
|
832
871
|
if self.config_key not in step_config:
|
833
872
|
# Special handle the step_graph update
|
834
873
|
self.step_graph = StepGraph()
|
@@ -857,6 +896,36 @@ class TemplatedStep(Step, ABC):
|
|
857
896
|
self, expanded_config, combined_implementations, input_data_config
|
858
897
|
)
|
859
898
|
|
899
|
+
##################
|
900
|
+
# Helper Methods #
|
901
|
+
##################
|
902
|
+
|
903
|
+
def _get_config(self, step_config: LayeredConfigTree) -> LayeredConfigTree:
|
904
|
+
"""Convenience method to get the ``TemplatedStep's`` configuration.
|
905
|
+
|
906
|
+
``TemplatedSteps`` may include multiplicity. In such cases, their configurations
|
907
|
+
must be modified to include the expanded ``Steps``.
|
908
|
+
|
909
|
+
Parameters
|
910
|
+
----------
|
911
|
+
step_config
|
912
|
+
The high-level configuration of this ``TemplatedStep``.
|
913
|
+
|
914
|
+
Returns
|
915
|
+
-------
|
916
|
+
The expanded sub-configuration of this ``TemplatedStep`` based on the
|
917
|
+
:attr:`Step.config_key` and expanded to include all looped or parallelized
|
918
|
+
sub-``Steps``).
|
919
|
+
"""
|
920
|
+
if self.config_key in step_config:
|
921
|
+
expanded_step_config = LayeredConfigTree()
|
922
|
+
for i, sub_config in enumerate(step_config[self.config_key]):
|
923
|
+
expanded_step_config.update(
|
924
|
+
{f"{self.name}_{self.node_prefix}_{i+1}": sub_config}
|
925
|
+
)
|
926
|
+
return expanded_step_config
|
927
|
+
return step_config
|
928
|
+
|
860
929
|
def _duplicate_template_step(self) -> Step:
|
861
930
|
"""Makes a duplicate of the template ``Step``.
|
862
931
|
|
@@ -1069,17 +1138,25 @@ class EmbarrassinglyParallelStep(Step):
|
|
1069
1138
|
|
1070
1139
|
An ``EmbarrassinglyParallelStep`` is different than a :class:`ParallelStep`
|
1071
1140
|
in that it is not configured by the user to be run in parallel - it completely
|
1072
|
-
happens on the back end for performance reasons.
|
1073
|
-
|
1141
|
+
happens on the back end for performance reasons.
|
1142
|
+
|
1143
|
+
See :class:`Step` for inherited attributes.
|
1144
|
+
|
1145
|
+
Parameters
|
1146
|
+
----------
|
1147
|
+
step
|
1148
|
+
The ``Step`` to be run in an embarrassingly parallel manner. To run multiple
|
1149
|
+
steps in parallel, use a :class:`HierarchicalStep`.
|
1150
|
+
|
1074
1151
|
"""
|
1075
1152
|
|
1076
1153
|
def __init__(
|
1077
1154
|
self,
|
1078
|
-
|
1079
|
-
input_slots: Iterable[InputSlot],
|
1080
|
-
output_slots: Iterable[OutputSlot],
|
1155
|
+
step: Step,
|
1081
1156
|
) -> None:
|
1082
|
-
super().__init__(
|
1157
|
+
super().__init__(
|
1158
|
+
step.step_name, step.name, step.input_slots.values(), step.output_slots.values()
|
1159
|
+
)
|
1083
1160
|
self._validate()
|
1084
1161
|
|
1085
1162
|
def _validate(self) -> None:
|
@@ -1124,10 +1201,7 @@ class EmbarrassinglyParallelStep(Step):
|
|
1124
1201
|
|
1125
1202
|
|
1126
1203
|
class ChoiceStep(Step):
|
1127
|
-
"""A type of :class:`Step` that allows for choosing
|
1128
|
-
|
1129
|
-
A ``ChoiceStep`` allows a user to select a single path from a set of possible
|
1130
|
-
paths.
|
1204
|
+
"""A type of :class:`Step` that allows for choosing from a set of options.
|
1131
1205
|
|
1132
1206
|
See :class:`Step` for inherited attributes.
|
1133
1207
|
|
@@ -1141,7 +1215,7 @@ class ChoiceStep(Step):
|
|
1141
1215
|
All required :class:`OutputSlots<easylink.graph_components.OutputSlot>`.
|
1142
1216
|
choices
|
1143
1217
|
A dictionary of choices, where the keys are the names/types of choices and
|
1144
|
-
the values are dictionaries containing that type's
|
1218
|
+
the values are dictionaries containing that type's ``Step`` and related
|
1145
1219
|
:class:`SlotMappings<easylink.graph_components.SlotMapping>`.
|
1146
1220
|
|
1147
1221
|
Notes
|
@@ -1150,6 +1224,13 @@ class ChoiceStep(Step):
|
|
1150
1224
|
:attr:`Step.config_key` in the pipeline specification file. Instead, the pipeline
|
1151
1225
|
configuration must contain a 'type' key that specifies which option to choose.
|
1152
1226
|
|
1227
|
+
The :attr:`choices` dictionary must contain the choice type names as the outer
|
1228
|
+
keys. The values of each of these types is then another dictionary containing
|
1229
|
+
'step', 'input_slot_mappings', and 'output_slot_mappings' keys with their
|
1230
|
+
corresponding values.
|
1231
|
+
|
1232
|
+
Each choice type must specify a *single* ``Step`` and its associated ``SlotMappings``.
|
1233
|
+
Any choice paths that require multiple sub-steps should specify a :class:`HierarchicalStep`.
|
1153
1234
|
"""
|
1154
1235
|
|
1155
1236
|
def __init__(
|
@@ -1157,9 +1238,7 @@ class ChoiceStep(Step):
|
|
1157
1238
|
step_name: str,
|
1158
1239
|
input_slots: Iterable[InputSlot],
|
1159
1240
|
output_slots: Iterable[OutputSlot],
|
1160
|
-
choices: dict[
|
1161
|
-
str, dict[str, list[Step | EdgeParams | InputSlotMapping | OutputSlotMapping]]
|
1162
|
-
],
|
1241
|
+
choices: dict[str, dict[str, Step | SlotMapping]],
|
1163
1242
|
) -> None:
|
1164
1243
|
super().__init__(
|
1165
1244
|
step_name,
|
@@ -1182,7 +1261,8 @@ class ChoiceStep(Step):
|
|
1182
1261
|
Parameters
|
1183
1262
|
----------
|
1184
1263
|
step_config
|
1185
|
-
The configuration of this ``
|
1264
|
+
The internal configuration of this ``Step``, i.e. it should not include
|
1265
|
+
the ``Step's`` name.
|
1186
1266
|
combined_implementations
|
1187
1267
|
The configuration for any implementations to be combined.
|
1188
1268
|
input_data_config
|
@@ -1195,8 +1275,6 @@ class ChoiceStep(Step):
|
|
1195
1275
|
|
1196
1276
|
Notes
|
1197
1277
|
-----
|
1198
|
-
A ``ChoiceStep`` by definition must be set with a :class:`NonLeafConfigurationState`.
|
1199
|
-
|
1200
1278
|
If the ``Step`` does not validate (i.e. errors are found and the returned
|
1201
1279
|
dictionary is non-empty), the tool will exit and the pipeline will not run.
|
1202
1280
|
|
@@ -1205,16 +1283,9 @@ class ChoiceStep(Step):
|
|
1205
1283
|
all issues in one pass. In these cases, new errors may be found after the
|
1206
1284
|
initial ones are handled.
|
1207
1285
|
|
1208
|
-
We update the :class:`easylink.graph_components.StepGraph` and ``SlotMappings``
|
1209
|
-
in :meth:`validate_step` (as opposed to in :meth:`set_configuration_state`
|
1210
|
-
as is done in :class:`TemplatedStep`) because :meth:`validate_step` is called
|
1211
|
-
prior to :meth:`set_configuration_state`, but the validations itself actually
|
1212
|
-
requires the updated ``StepGraph`` and ``SlotMappings``.
|
1213
|
-
|
1214
1286
|
We do not attempt to validate the subgraph here if the 'type' key is unable
|
1215
1287
|
to be validated.
|
1216
1288
|
"""
|
1217
|
-
|
1218
1289
|
chosen_type = step_config.get("type")
|
1219
1290
|
# Handle problems with the 'type' key
|
1220
1291
|
if not chosen_type:
|
@@ -1222,104 +1293,64 @@ class ChoiceStep(Step):
|
|
1222
1293
|
if chosen_type not in self.choices:
|
1223
1294
|
return {
|
1224
1295
|
f"step {self.name}": [
|
1225
|
-
f"'{step_config
|
1296
|
+
f"'{step_config.type}' is not a supported 'type'. Valid choices are: {list(self.choices)}."
|
1226
1297
|
]
|
1227
1298
|
}
|
1228
|
-
|
1229
|
-
|
1299
|
+
|
1300
|
+
chosen_step = self.choices[chosen_type]["step"]
|
1230
1301
|
chosen_step_config = LayeredConfigTree(
|
1231
1302
|
{key: value for key, value in step_config.items() if key != "type"}
|
1232
1303
|
)
|
1233
|
-
|
1234
|
-
if set(allowable_steps) != set(chosen_step_config):
|
1304
|
+
if chosen_step.name not in chosen_step_config:
|
1235
1305
|
return {
|
1236
1306
|
f"step {self.name}": [
|
1237
|
-
f"
|
1307
|
+
f"'{chosen_step.name}' is not configured. Confirm you have specified "
|
1308
|
+
f"the correct steps for the '{chosen_type}' type."
|
1238
1309
|
]
|
1239
1310
|
}
|
1240
|
-
|
1241
|
-
# HACK: Update the step graph and mappings here because we need them for validation
|
1242
|
-
self.step_graph = self._update_step_graph(subgraph)
|
1243
|
-
self.slot_mappings = self._update_slot_mappings(subgraph)
|
1244
1311
|
# NOTE: A ChoiceStep is by definition non-leaf step
|
1245
|
-
return
|
1246
|
-
chosen_step_config, combined_implementations, input_data_config
|
1312
|
+
return chosen_step.validate_step(
|
1313
|
+
chosen_step_config[chosen_step.name], combined_implementations, input_data_config
|
1247
1314
|
)
|
1248
1315
|
|
1249
1316
|
def set_configuration_state(
|
1250
1317
|
self,
|
1251
|
-
|
1318
|
+
step_config: LayeredConfigTree,
|
1252
1319
|
combined_implementations: LayeredConfigTree,
|
1253
1320
|
input_data_config: LayeredConfigTree,
|
1254
1321
|
):
|
1255
|
-
"""Sets the configuration state
|
1322
|
+
"""Sets the configuration state to 'non-leaf'.
|
1323
|
+
|
1324
|
+
In addition to setting the configuration state, this also updates the
|
1325
|
+
:class:`~easylink.graph_components.StepGraph` and
|
1326
|
+
:class:`SlotMappings<easylink.graph_components.SlotMapping>`.
|
1256
1327
|
|
1257
1328
|
Parameters
|
1258
1329
|
----------
|
1259
|
-
|
1260
|
-
The configuration of
|
1330
|
+
step_config
|
1331
|
+
The internal configuration of this ``Step``, i.e. it should not include
|
1332
|
+
the ``Step's`` name.
|
1261
1333
|
combined_implementations
|
1262
1334
|
The configuration for any implementations to be combined.
|
1263
1335
|
input_data_config
|
1264
1336
|
The input data configuration for the entire pipeline.
|
1265
|
-
|
1266
|
-
Notes
|
1267
|
-
-----
|
1268
|
-
We update the :class:`easylink.graph_components.StepGraph` and ``SlotMappings``
|
1269
|
-
in :meth:`validate_step` (as opposed to in :meth:`set_configuration_state`
|
1270
|
-
as is done in :class:`TemplatedStep`) because :meth:`validate_step` is called
|
1271
|
-
prior to :meth:`set_configuration_state`, but the validations itself actually
|
1272
|
-
requires the updated ``StepGraph`` and ``SlotMappings``.
|
1273
1337
|
"""
|
1338
|
+
choice = self.choices[step_config["type"]]
|
1339
|
+
self.step_graph = StepGraph()
|
1340
|
+
self.step_graph.add_node_from_step(choice["step"])
|
1341
|
+
self.slot_mappings = {
|
1342
|
+
"input": choice["input_slot_mappings"],
|
1343
|
+
"output": choice["output_slot_mappings"],
|
1344
|
+
}
|
1274
1345
|
|
1275
|
-
|
1276
|
-
{key: value for key, value in
|
1346
|
+
chosen_step_config = LayeredConfigTree(
|
1347
|
+
{key: value for key, value in step_config.items() if key != "type"}
|
1277
1348
|
)
|
1278
|
-
# ChoiceSteps by definition
|
1349
|
+
# ChoiceSteps by definition are in a NonLeafConfigurationState
|
1279
1350
|
self._configuration_state = NonLeafConfigurationState(
|
1280
|
-
self,
|
1351
|
+
self, chosen_step_config, combined_implementations, input_data_config
|
1281
1352
|
)
|
1282
1353
|
|
1283
|
-
@staticmethod
|
1284
|
-
def _update_step_graph(subgraph: dict[str, Any]) -> StepGraph:
|
1285
|
-
"""Updates the :class:`~easylink.graph_components.StepGraph` with the choice.
|
1286
|
-
|
1287
|
-
Parameters
|
1288
|
-
----------
|
1289
|
-
subgraph
|
1290
|
-
Subgraph parameters (nodes, edges, and slot mappings) for the chosen type.
|
1291
|
-
|
1292
|
-
Returns
|
1293
|
-
-------
|
1294
|
-
The updated ``StepGraph`` for the chosen type.
|
1295
|
-
"""
|
1296
|
-
nodes = subgraph["nodes"]
|
1297
|
-
edges = subgraph["edges"]
|
1298
|
-
|
1299
|
-
graph = StepGraph()
|
1300
|
-
for node in nodes:
|
1301
|
-
graph.add_node_from_step(node)
|
1302
|
-
for edge in edges:
|
1303
|
-
graph.add_edge_from_params(edge)
|
1304
|
-
return graph
|
1305
|
-
|
1306
|
-
@staticmethod
|
1307
|
-
def _update_slot_mappings(subgraph: dict[str, Any]) -> dict[str, list[SlotMapping]]:
|
1308
|
-
"""Updates the :class:`SlotMappings<easylink.graph_components.SlotMapping>` to the choice type.
|
1309
|
-
|
1310
|
-
Parameters
|
1311
|
-
----------
|
1312
|
-
sub_graph
|
1313
|
-
Subgraph parameters (nodes, edges, and slot mappings) for the chosen type.
|
1314
|
-
|
1315
|
-
Returns
|
1316
|
-
-------
|
1317
|
-
Updated ``SlotMappings`` that match the choice type.
|
1318
|
-
"""
|
1319
|
-
input_mappings = subgraph["input_slot_mappings"]
|
1320
|
-
output_mappings = subgraph["output_slot_mappings"]
|
1321
|
-
return {"input": input_mappings, "output": output_mappings}
|
1322
|
-
|
1323
1354
|
|
1324
1355
|
class ConfigurationState(ABC):
|
1325
1356
|
"""A given :class:`Step's<Step>` configuration state.
|
@@ -1334,8 +1365,9 @@ class ConfigurationState(ABC):
|
|
1334
1365
|
----------
|
1335
1366
|
step
|
1336
1367
|
The ``Step`` this ``ConfigurationState`` is tied to.
|
1337
|
-
|
1338
|
-
The
|
1368
|
+
step_config
|
1369
|
+
The internal configuration of this ``Step`` we are setting the state
|
1370
|
+
for; it should not include the ``Step's`` name.
|
1339
1371
|
combined_implementations
|
1340
1372
|
The configuration for any implementations to be combined.
|
1341
1373
|
input_data_config
|
@@ -1346,14 +1378,15 @@ class ConfigurationState(ABC):
|
|
1346
1378
|
def __init__(
|
1347
1379
|
self,
|
1348
1380
|
step: Step,
|
1349
|
-
|
1381
|
+
step_config: LayeredConfigTree,
|
1350
1382
|
combined_implementations: LayeredConfigTree,
|
1351
1383
|
input_data_config: LayeredConfigTree,
|
1352
1384
|
):
|
1353
1385
|
self._step = step
|
1354
1386
|
"""The ``Step`` this ``ConfigurationState`` is tied to."""
|
1355
|
-
self.
|
1356
|
-
"""The
|
1387
|
+
self.step_config = step_config
|
1388
|
+
"""The internal configuration of this ``Step`` we are setting the state
|
1389
|
+
for; it should not include the ``Step's`` name."""
|
1357
1390
|
self.combined_implementations = combined_implementations
|
1358
1391
|
"""The relevant configuration if the ``Step's`` ``Implementation``
|
1359
1392
|
has been requested to be combined with that of a different ``Step``."""
|
@@ -1394,15 +1427,15 @@ class LeafConfigurationState(ConfigurationState):
|
|
1394
1427
|
@property
|
1395
1428
|
def is_combined(self) -> bool:
|
1396
1429
|
"""Whether or not this ``Step`` is combined with another ``Step``."""
|
1397
|
-
return
|
1430
|
+
return COMBINED_IMPLEMENTATION_KEY in self.step_config
|
1398
1431
|
|
1399
1432
|
@property
|
1400
1433
|
def implementation_config(self) -> LayeredConfigTree:
|
1401
1434
|
"""The ``Step's`` specific ``Implementation`` configuration."""
|
1402
1435
|
return (
|
1403
|
-
self.combined_implementations[self.
|
1436
|
+
self.combined_implementations[self.step_config[COMBINED_IMPLEMENTATION_KEY]]
|
1404
1437
|
if self.is_combined
|
1405
|
-
else self.
|
1438
|
+
else self.step_config.implementation
|
1406
1439
|
)
|
1407
1440
|
|
1408
1441
|
def get_implementation_graph(self) -> ImplementationGraph:
|
@@ -1416,31 +1449,30 @@ class LeafConfigurationState(ConfigurationState):
|
|
1416
1449
|
-------
|
1417
1450
|
The ``ImplementationGraph`` related to this ``Step``.
|
1418
1451
|
"""
|
1419
|
-
|
1452
|
+
step = self._step
|
1420
1453
|
implementation_graph = ImplementationGraph()
|
1421
|
-
implementation_node_name = self._step.implementation_node_name
|
1422
1454
|
if self.is_combined:
|
1423
|
-
if isinstance(
|
1455
|
+
if isinstance(step, EmbarrassinglyParallelStep):
|
1424
1456
|
raise NotImplementedError(
|
1425
1457
|
"Combining implementations with embarrassingly parallel steps "
|
1426
1458
|
"is not yet supported."
|
1427
1459
|
)
|
1428
1460
|
implementation = PartialImplementation(
|
1429
|
-
combined_name=self.
|
1430
|
-
schema_step=
|
1431
|
-
input_slots=
|
1432
|
-
output_slots=
|
1461
|
+
combined_name=self.step_config[COMBINED_IMPLEMENTATION_KEY],
|
1462
|
+
schema_step=step.step_name,
|
1463
|
+
input_slots=step.input_slots.values(),
|
1464
|
+
output_slots=step.output_slots.values(),
|
1433
1465
|
)
|
1434
1466
|
else:
|
1435
1467
|
implementation = Implementation(
|
1436
|
-
schema_steps=[
|
1468
|
+
schema_steps=[step.step_name],
|
1437
1469
|
implementation_config=self.implementation_config,
|
1438
|
-
input_slots=
|
1439
|
-
output_slots=
|
1440
|
-
is_embarrassingly_parallel=isinstance(
|
1470
|
+
input_slots=step.input_slots.values(),
|
1471
|
+
output_slots=step.output_slots.values(),
|
1472
|
+
is_embarrassingly_parallel=isinstance(step, EmbarrassinglyParallelStep),
|
1441
1473
|
)
|
1442
1474
|
implementation_graph.add_node_from_implementation(
|
1443
|
-
implementation_node_name,
|
1475
|
+
step.implementation_node_name,
|
1444
1476
|
implementation=implementation,
|
1445
1477
|
)
|
1446
1478
|
return implementation_graph
|
@@ -1481,10 +1513,10 @@ class LeafConfigurationState(ConfigurationState):
|
|
1481
1513
|
for mapping in mappings:
|
1482
1514
|
# FIXME [MIC-5771]: Fix ParallelSteps
|
1483
1515
|
if (
|
1484
|
-
"input_data_file" in self.
|
1516
|
+
"input_data_file" in self.step_config
|
1485
1517
|
and edge.source_node == "pipeline_graph_input_data"
|
1486
1518
|
):
|
1487
|
-
edge.output_slot = self.
|
1519
|
+
edge.output_slot = self.step_config["input_data_file"]
|
1488
1520
|
imp_edge = mapping.remap_edge(edge)
|
1489
1521
|
implementation_edges.append(imp_edge)
|
1490
1522
|
else:
|
@@ -1506,8 +1538,10 @@ class NonLeafConfigurationState(ConfigurationState):
|
|
1506
1538
|
----------
|
1507
1539
|
step
|
1508
1540
|
The ``Step`` this ``ConfigurationState`` is tied to.
|
1509
|
-
|
1510
|
-
The
|
1541
|
+
step_config
|
1542
|
+
The internal configuration of this ``Step`` we are setting the state
|
1543
|
+
for; it should not include the ``Step's`` name (though it must include
|
1544
|
+
the sub-step names).
|
1511
1545
|
combined_implementations
|
1512
1546
|
The configuration for any implementations to be combined.
|
1513
1547
|
input_data_config
|
@@ -1538,16 +1572,17 @@ class NonLeafConfigurationState(ConfigurationState):
|
|
1538
1572
|
def __init__(
|
1539
1573
|
self,
|
1540
1574
|
step: Step,
|
1541
|
-
|
1575
|
+
step_config: LayeredConfigTree,
|
1542
1576
|
combined_implementations: LayeredConfigTree,
|
1543
1577
|
input_data_config: LayeredConfigTree,
|
1544
1578
|
):
|
1545
|
-
super().__init__(step,
|
1579
|
+
super().__init__(step, step_config, combined_implementations, input_data_config)
|
1546
1580
|
if not step.step_graph:
|
1547
1581
|
raise ValueError(
|
1548
1582
|
"NonLeafConfigurationState requires a subgraph upon which to operate, "
|
1549
1583
|
f"but Step {step.name} has no step graph."
|
1550
1584
|
)
|
1585
|
+
self._nodes = step.step_graph.nodes
|
1551
1586
|
self._configure_subgraph_steps()
|
1552
1587
|
|
1553
1588
|
def get_implementation_graph(self) -> ImplementationGraph:
|
@@ -1578,8 +1613,8 @@ class NonLeafConfigurationState(ConfigurationState):
|
|
1578
1613
|
|
1579
1614
|
def add_nodes(self, implementation_graph: ImplementationGraph) -> None:
|
1580
1615
|
"""Adds nodes for each ``Step`` to the ``ImplementationGraph``."""
|
1581
|
-
for node in self.
|
1582
|
-
step = self.
|
1616
|
+
for node in self._nodes:
|
1617
|
+
step = self._nodes[node]["step"]
|
1583
1618
|
implementation_graph.update(step.get_implementation_graph())
|
1584
1619
|
|
1585
1620
|
def add_edges(self, implementation_graph: ImplementationGraph) -> None:
|
@@ -1587,8 +1622,8 @@ class NonLeafConfigurationState(ConfigurationState):
|
|
1587
1622
|
for source, target, edge_attrs in self._step.step_graph.edges(data=True):
|
1588
1623
|
all_edges = []
|
1589
1624
|
edge = EdgeParams.from_graph_edge(source, target, edge_attrs)
|
1590
|
-
parent_source_step = self.
|
1591
|
-
parent_target_step = self.
|
1625
|
+
parent_source_step = self._nodes[source]["step"]
|
1626
|
+
parent_target_step = self._nodes[target]["step"]
|
1592
1627
|
|
1593
1628
|
source_edges = parent_source_step.get_implementation_edges(edge)
|
1594
1629
|
for source_edge in source_edges:
|
@@ -1624,7 +1659,7 @@ class NonLeafConfigurationState(ConfigurationState):
|
|
1624
1659
|
]
|
1625
1660
|
for mapping in mappings:
|
1626
1661
|
new_edge = mapping.remap_edge(edge)
|
1627
|
-
new_step = self.
|
1662
|
+
new_step = self._nodes[mapping.child_node]["step"]
|
1628
1663
|
imp_edges = new_step.get_implementation_edges(new_edge)
|
1629
1664
|
implementation_edges.extend(imp_edges)
|
1630
1665
|
elif edge.target_node == self._step.name:
|
@@ -1635,7 +1670,7 @@ class NonLeafConfigurationState(ConfigurationState):
|
|
1635
1670
|
]
|
1636
1671
|
for mapping in mappings:
|
1637
1672
|
new_edge = mapping.remap_edge(edge)
|
1638
|
-
new_step = self.
|
1673
|
+
new_step = self._nodes[mapping.child_node]["step"]
|
1639
1674
|
imp_edges = new_step.get_implementation_edges(new_edge)
|
1640
1675
|
implementation_edges.extend(imp_edges)
|
1641
1676
|
else:
|
@@ -1650,9 +1685,12 @@ class NonLeafConfigurationState(ConfigurationState):
|
|
1650
1685
|
This method recursively traverses the ``StepGraph`` and sets the configuration
|
1651
1686
|
state for each ``Step`` until reaching all leaf nodes.
|
1652
1687
|
"""
|
1653
|
-
|
1654
|
-
|
1655
|
-
|
1688
|
+
for node in self._nodes:
|
1689
|
+
step = self._nodes[node]["step"]
|
1690
|
+
# IOStep names never appear in configuration
|
1691
|
+
step_config = (
|
1692
|
+
self.step_config if isinstance(step, IOStep) else self.step_config[step.name]
|
1693
|
+
)
|
1656
1694
|
step.set_configuration_state(
|
1657
|
-
|
1695
|
+
step_config, self.combined_implementations, self.input_data_config
|
1658
1696
|
)
|