easylink 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- easylink/_version.py +1 -1
- easylink/cli.py +18 -9
- easylink/graph_components.py +19 -5
- easylink/implementation.py +2 -0
- easylink/pipeline.py +92 -34
- easylink/pipeline_graph.py +112 -27
- easylink/pipeline_schema.py +7 -7
- easylink/pipeline_schema_constants/__init__.py +14 -0
- easylink/pipeline_schema_constants/development.py +137 -122
- easylink/pipeline_schema_constants/testing.py +142 -3
- easylink/rule.py +282 -22
- easylink/runner.py +1 -0
- easylink/step.py +442 -345
- easylink/utilities/__init__.py +3 -2
- easylink/utilities/aggregator_utils.py +32 -0
- easylink/utilities/data_utils.py +99 -5
- easylink/utilities/general_utils.py +49 -10
- easylink/utilities/paths.py +9 -3
- easylink/utilities/splitter_utils.py +72 -0
- easylink/utilities/validation_utils.py +29 -0
- {easylink-0.1.6.dist-info → easylink-0.1.8.dist-info}/METADATA +1 -1
- {easylink-0.1.6.dist-info → easylink-0.1.8.dist-info}/RECORD +25 -23
- {easylink-0.1.6.dist-info → easylink-0.1.8.dist-info}/WHEEL +1 -1
- {easylink-0.1.6.dist-info → easylink-0.1.8.dist-info}/entry_points.txt +0 -0
- {easylink-0.1.6.dist-info → easylink-0.1.8.dist-info}/top_level.txt +0 -0
easylink/step.py
CHANGED
@@ -54,21 +54,18 @@ class Step:
|
|
54
54
|
Parameters
|
55
55
|
----------
|
56
56
|
step_name
|
57
|
-
The name of the pipeline step in the ``PipelineSchema``.
|
57
|
+
The name of the pipeline step in the ``PipelineSchema``. It must also match
|
58
|
+
the key in the implementation metadata file to be used to run this ``Step``.
|
58
59
|
name
|
59
|
-
The name of this
|
60
|
-
due to the need for disambiguation
|
61
|
-
|
62
|
-
|
63
|
-
("step_1_loop_1", etc).
|
60
|
+
The name of this ``Step's`` node in its :class:`easylink.graph_components.StepGraph`.
|
61
|
+
This can be different from the ``step_name`` due to the need for disambiguation
|
62
|
+
during the process of flattening the ``Stepgraph``, e.g. unrolling loops, etc.
|
63
|
+
For example, if step 1 is looped multiple times, each node would have a
|
64
|
+
``step_name`` of, perhaps, "step_1" but unique ``names`` ("step_1_loop_1", etc).
|
64
65
|
input_slots
|
65
66
|
All required :class:`InputSlots<easylink.graph_components.InputSlot>`.
|
66
67
|
output_slots
|
67
68
|
All required :class:`OutputSlots<easylink.graph_components.OutputSlot>`.
|
68
|
-
nodes
|
69
|
-
All sub-nodes (i.e. sub-``Steps``) of this particular ``Step`` instance.
|
70
|
-
edges
|
71
|
-
The :class:`~easylink.graph_components.EdgeParams` of this ``Step``.
|
72
69
|
input_slot_mappings
|
73
70
|
The :class:`InputSlotMapping<easylink.graph_components.InputSlotMapping>` of this ``Step``.
|
74
71
|
output_slot_mappings
|
@@ -89,31 +86,22 @@ class Step:
|
|
89
86
|
name: str | None = None,
|
90
87
|
input_slots: Iterable[InputSlot] = (),
|
91
88
|
output_slots: Iterable[OutputSlot] = (),
|
92
|
-
nodes: Iterable[Step] = (),
|
93
|
-
edges: Iterable[EdgeParams] = (),
|
94
89
|
input_slot_mappings: Iterable[InputSlotMapping] = (),
|
95
90
|
output_slot_mappings: Iterable[OutputSlotMapping] = (),
|
96
91
|
) -> None:
|
97
92
|
self.step_name = step_name
|
98
|
-
"""The name of the
|
93
|
+
"""The name of the pipeline step in the ``PipelineSchema``. It must also match
|
94
|
+
the key in the implementation metadata file to be used to run this ``Step``."""
|
99
95
|
self.name = name if name else step_name
|
100
|
-
"""The name of ``Step's`` node in its :class
|
101
|
-
This
|
102
|
-
|
96
|
+
"""The name of this ``Step's`` node in its :class:`easylink.graph_components.StepGraph`.
|
97
|
+
This can be different from the ``step_name`` due to the need for disambiguation
|
98
|
+
during the process of flattening the ``Stepgraph``, e.g. unrolling loops, etc.
|
99
|
+
For example, if step 1 is looped multiple times, each node would have a
|
100
|
+
``step_name`` of, perhaps, "step_1" but unique ``names`` ("step_1_loop_1", etc)."""
|
103
101
|
self.input_slots = {slot.name: slot for slot in input_slots}
|
104
102
|
"""A mapping of ``InputSlot`` names to their instances."""
|
105
103
|
self.output_slots = {slot.name: slot for slot in output_slots}
|
106
104
|
"""A mapping of ``OutputSlot`` names to their instances."""
|
107
|
-
self.nodes = nodes
|
108
|
-
"""All sub-nodes (i.e. sub-``Steps``) of this particular ``Step`` instance."""
|
109
|
-
for node in self.nodes:
|
110
|
-
node.set_parent_step(self)
|
111
|
-
self.edges = edges
|
112
|
-
"""The :class:`~easylink.graph_components.EdgeParams` of this ``Step``."""
|
113
|
-
self.step_graph = self._get_step_graph(nodes, edges)
|
114
|
-
"""The :class:`~easylink.graph_components.StepGraph` of this ``Step``, i.e.
|
115
|
-
the directed acyclic graph (DAG) of sub-nodes and their edges that make
|
116
|
-
up this ``Step`` instance."""
|
117
105
|
self.slot_mappings = {
|
118
106
|
"input": list(input_slot_mappings),
|
119
107
|
"output": list(output_slot_mappings),
|
@@ -164,7 +152,7 @@ class Step:
|
|
164
152
|
"""
|
165
153
|
step = self
|
166
154
|
implementation_name = (
|
167
|
-
self.configuration_state.
|
155
|
+
self.configuration_state.step_config[COMBINED_IMPLEMENTATION_KEY]
|
168
156
|
if self.configuration_state.is_combined
|
169
157
|
else self.configuration_state.implementation_config.name
|
170
158
|
)
|
@@ -203,7 +191,8 @@ class Step:
|
|
203
191
|
Parameters
|
204
192
|
----------
|
205
193
|
step_config
|
206
|
-
The configuration of this ``Step
|
194
|
+
The internal configuration of this ``Step``, i.e. it should not include
|
195
|
+
the ``Step's`` name.
|
207
196
|
combined_implementations
|
208
197
|
The configuration for any implementations to be combined.
|
209
198
|
input_data_config
|
@@ -216,9 +205,6 @@ class Step:
|
|
216
205
|
|
217
206
|
Notes
|
218
207
|
-----
|
219
|
-
A ``Step`` can be in either a "leaf" or a "non-leaf" configuration state
|
220
|
-
and the validation process is different for each.
|
221
|
-
|
222
208
|
If the ``Step`` does not validate (i.e. errors are found and the returned
|
223
209
|
dictionary is non-empty), the tool will exit and the pipeline will not run.
|
224
210
|
|
@@ -227,14 +213,42 @@ class Step:
|
|
227
213
|
all issues in one pass. In these cases, new errors may be found after the
|
228
214
|
initial ones are handled.
|
229
215
|
"""
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
216
|
+
errors = {}
|
217
|
+
metadata = load_yaml(paths.IMPLEMENTATION_METADATA)
|
218
|
+
error_key = f"step {self.name}"
|
219
|
+
if (
|
220
|
+
"implementation" not in step_config
|
221
|
+
and COMBINED_IMPLEMENTATION_KEY not in step_config
|
222
|
+
):
|
223
|
+
errors[error_key] = [
|
224
|
+
"The step configuration does not contain an 'implementation' key "
|
225
|
+
"or a reference to a combined implementation."
|
226
|
+
]
|
227
|
+
elif (
|
228
|
+
COMBINED_IMPLEMENTATION_KEY in step_config
|
229
|
+
and not step_config[COMBINED_IMPLEMENTATION_KEY] in combined_implementations
|
230
|
+
):
|
231
|
+
errors[error_key] = [
|
232
|
+
"The step refers to a combined implementation but "
|
233
|
+
f"{step_config[COMBINED_IMPLEMENTATION_KEY]} is not a valid combined "
|
234
|
+
"implementation."
|
235
|
+
]
|
236
236
|
else:
|
237
|
-
|
237
|
+
implementation_config = (
|
238
|
+
step_config["implementation"]
|
239
|
+
if "implementation" in step_config
|
240
|
+
else combined_implementations[step_config[COMBINED_IMPLEMENTATION_KEY]]
|
241
|
+
)
|
242
|
+
if not "name" in implementation_config:
|
243
|
+
errors[error_key] = [
|
244
|
+
"The implementation configuration does not contain a 'name' key."
|
245
|
+
]
|
246
|
+
elif not implementation_config["name"] in metadata:
|
247
|
+
errors[error_key] = [
|
248
|
+
f"Implementation '{implementation_config['name']}' is not supported. "
|
249
|
+
f"Supported implementations are: {list(metadata.keys())}."
|
250
|
+
]
|
251
|
+
return errors
|
238
252
|
|
239
253
|
def get_implementation_graph(self) -> ImplementationGraph:
|
240
254
|
"""Gets this ``Step's`` :class:`~easylink.graph_components.ImplementationGraph`.
|
@@ -276,42 +290,25 @@ class Step:
|
|
276
290
|
|
277
291
|
def set_configuration_state(
|
278
292
|
self,
|
279
|
-
|
293
|
+
step_config: LayeredConfigTree,
|
280
294
|
combined_implementations: LayeredConfigTree,
|
281
295
|
input_data_config: LayeredConfigTree,
|
282
296
|
) -> None:
|
283
|
-
"""Sets the configuration state
|
284
|
-
|
285
|
-
The so-called 'configuration state' for a given ``Step`` is backed up by
|
286
|
-
a :class:`ConfigurationState` class and is assigned to its :attr:`_configuration_state`
|
287
|
-
attribute. There are two possible ``ConfigurationStates``:
|
288
|
-
:class:`LeafConfigurationState` and :class:`NonLeafConfigurationState`.
|
289
|
-
|
290
|
-
This method sets the configuration state of this ``Step`` based on whether
|
291
|
-
or not a :attr:`config_key` is set *and exists is the ``Step's`` configuration*
|
292
|
-
(i.e. its portion of the user-suppled pipeline specification
|
293
|
-
file); any required deviation from this behavior requires special
|
294
|
-
handling.
|
297
|
+
"""Sets the configuration state to 'leaf'.
|
295
298
|
|
296
299
|
Parameters
|
297
300
|
----------
|
298
|
-
|
299
|
-
The configuration of
|
301
|
+
step_config
|
302
|
+
The internal configuration of this ``Step``, i.e. it should not include
|
303
|
+
the ``Step's`` name.
|
300
304
|
combined_implementations
|
301
305
|
The configuration for any implementations to be combined.
|
302
306
|
input_data_config
|
303
307
|
The input data configuration for the entire pipeline.
|
304
308
|
"""
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
self._configuration_state = NonLeafConfigurationState(
|
309
|
-
self, sub_config, combined_implementations, input_data_config
|
310
|
-
)
|
311
|
-
else:
|
312
|
-
self._configuration_state = LeafConfigurationState(
|
313
|
-
self, sub_config, combined_implementations, input_data_config
|
314
|
-
)
|
309
|
+
self._configuration_state = LeafConfigurationState(
|
310
|
+
self, step_config, combined_implementations, input_data_config
|
311
|
+
)
|
315
312
|
|
316
313
|
def get_implementation_slot_mappings(self) -> dict[str, list[SlotMapping]]:
|
317
314
|
"""Gets the input and output :class:`SlotMappings<easylink.graph_components.SlotMapping>`."""
|
@@ -326,113 +323,6 @@ class Step:
|
|
326
323
|
],
|
327
324
|
}
|
328
325
|
|
329
|
-
##################
|
330
|
-
# Helper methods #
|
331
|
-
##################
|
332
|
-
|
333
|
-
def _get_step_graph(self, nodes: list[Step], edges: list[EdgeParams]) -> StepGraph:
|
334
|
-
"""Create a StepGraph from the nodes and edges the step was initialized with."""
|
335
|
-
step_graph = StepGraph()
|
336
|
-
for step in nodes:
|
337
|
-
step_graph.add_node_from_step(step)
|
338
|
-
for edge in edges:
|
339
|
-
step_graph.add_edge_from_params(edge)
|
340
|
-
return step_graph
|
341
|
-
|
342
|
-
def _validate_leaf(
|
343
|
-
self,
|
344
|
-
step_config: LayeredConfigTree,
|
345
|
-
combined_implementations: LayeredConfigTree,
|
346
|
-
) -> dict[str, list[str]]:
|
347
|
-
"""Validates a leaf ``Step``."""
|
348
|
-
errors = {}
|
349
|
-
metadata = load_yaml(paths.IMPLEMENTATION_METADATA)
|
350
|
-
error_key = f"step {self.name}"
|
351
|
-
if (
|
352
|
-
"implementation" not in step_config
|
353
|
-
and COMBINED_IMPLEMENTATION_KEY not in step_config
|
354
|
-
):
|
355
|
-
errors[error_key] = [
|
356
|
-
"The step configuration does not contain an 'implementation' key or a "
|
357
|
-
"reference to a combined implementation."
|
358
|
-
]
|
359
|
-
elif (
|
360
|
-
COMBINED_IMPLEMENTATION_KEY in step_config
|
361
|
-
and not step_config[COMBINED_IMPLEMENTATION_KEY] in combined_implementations
|
362
|
-
):
|
363
|
-
errors[error_key] = [
|
364
|
-
f"The step refers to a combined implementation but {step_config[COMBINED_IMPLEMENTATION_KEY]} is not a "
|
365
|
-
f"valid combined implementation."
|
366
|
-
]
|
367
|
-
else:
|
368
|
-
implementation_config = (
|
369
|
-
step_config["implementation"]
|
370
|
-
if "implementation" in step_config
|
371
|
-
else combined_implementations[step_config[COMBINED_IMPLEMENTATION_KEY]]
|
372
|
-
)
|
373
|
-
if not "name" in implementation_config:
|
374
|
-
errors[error_key] = [
|
375
|
-
"The implementation configuration does not contain a 'name' key."
|
376
|
-
]
|
377
|
-
elif not implementation_config["name"] in metadata:
|
378
|
-
errors[error_key] = [
|
379
|
-
f"Implementation '{implementation_config['name']}' is not supported. "
|
380
|
-
f"Supported implementations are: {list(metadata.keys())}."
|
381
|
-
]
|
382
|
-
return errors
|
383
|
-
|
384
|
-
def _validate_nonleaf(
|
385
|
-
self,
|
386
|
-
step_config: LayeredConfigTree,
|
387
|
-
combined_implementations: LayeredConfigTree,
|
388
|
-
input_data_config: LayeredConfigTree,
|
389
|
-
) -> dict[str, list[str]]:
|
390
|
-
"""Validates a non-leaf ``Step``."""
|
391
|
-
errors = {}
|
392
|
-
nodes = self.step_graph.nodes
|
393
|
-
for node in nodes:
|
394
|
-
step = nodes[node]["step"]
|
395
|
-
if isinstance(step, IOStep):
|
396
|
-
continue
|
397
|
-
if step.name not in step_config:
|
398
|
-
step_errors = {f"step {step.name}": [f"The step is not configured."]}
|
399
|
-
else:
|
400
|
-
step_errors = step.validate_step(
|
401
|
-
step_config[step.name], combined_implementations, input_data_config
|
402
|
-
)
|
403
|
-
if step_errors:
|
404
|
-
errors.update(step_errors)
|
405
|
-
extra_steps = set(step_config.keys()) - set(nodes)
|
406
|
-
for extra_step in extra_steps:
|
407
|
-
errors[f"step {extra_step}"] = [f"{extra_step} is not a valid step."]
|
408
|
-
return errors
|
409
|
-
|
410
|
-
def _get_config(self, step_config: LayeredConfigTree) -> LayeredConfigTree:
|
411
|
-
"""Convenience method to get a ``Step's`` configuration.
|
412
|
-
|
413
|
-
Some types of ``Steps`` have a unique :attr:`config_key` (defined by the
|
414
|
-
user via the pipeline specification file) that is used to specify the behavior
|
415
|
-
of the ``Step`` (e.g. looping, parallel, etc). This method simply returns
|
416
|
-
the ``Step's`` sub-configuration keyed to that ``config_key`` (if it exists,
|
417
|
-
i.e. is not a basic ``Step``).
|
418
|
-
|
419
|
-
Parameters
|
420
|
-
----------
|
421
|
-
step_config
|
422
|
-
The high-level configuration of this ``Step``.
|
423
|
-
|
424
|
-
Returns
|
425
|
-
-------
|
426
|
-
The sub-configuration of this ``Step`` keyed on the ``config_key``
|
427
|
-
(if it exists).
|
428
|
-
|
429
|
-
"""
|
430
|
-
return (
|
431
|
-
step_config
|
432
|
-
if not self.config_key in step_config
|
433
|
-
else step_config[self.config_key]
|
434
|
-
)
|
435
|
-
|
436
326
|
|
437
327
|
class IOStep(Step):
|
438
328
|
"""A special case type of :class:`Step` used to represent incoming and outgoing data.
|
@@ -485,27 +375,24 @@ class IOStep(Step):
|
|
485
375
|
|
486
376
|
def set_configuration_state(
|
487
377
|
self,
|
488
|
-
|
378
|
+
step_config: LayeredConfigTree,
|
489
379
|
combined_implementations: LayeredConfigTree,
|
490
380
|
input_data_config: LayeredConfigTree,
|
491
381
|
) -> None:
|
492
|
-
"""Sets the configuration state to leaf.
|
493
|
-
|
494
|
-
An ``IOStep`` is by definition a leaf ``Step`` and so we assign that here
|
495
|
-
instead of relying on the default behavior of the parent class.
|
382
|
+
"""Sets the configuration state to 'leaf'.
|
496
383
|
|
497
384
|
Parameters
|
498
385
|
----------
|
499
|
-
|
500
|
-
The configuration of
|
501
|
-
|
386
|
+
step_config
|
387
|
+
The internal configuration of this ``Step``, i.e. it should not include
|
388
|
+
the ``Step's`` name.
|
502
389
|
combined_implementations
|
503
390
|
The configuration for any implementations to be combined.
|
504
391
|
input_data_config
|
505
392
|
The input data configuration for the entire pipeline.
|
506
393
|
"""
|
507
394
|
self._configuration_state = LeafConfigurationState(
|
508
|
-
self,
|
395
|
+
self, step_config, combined_implementations, input_data_config
|
509
396
|
)
|
510
397
|
|
511
398
|
def get_implementation_graph(self) -> ImplementationGraph:
|
@@ -548,29 +435,29 @@ class InputStep(IOStep):
|
|
548
435
|
|
549
436
|
def set_configuration_state(
|
550
437
|
self,
|
551
|
-
|
438
|
+
step_config: LayeredConfigTree,
|
552
439
|
combined_implementations: LayeredConfigTree,
|
553
440
|
input_data_config: LayeredConfigTree,
|
554
441
|
) -> None:
|
555
442
|
"""Sets the configuration state and updates the ``OutputSlots``.
|
556
443
|
|
557
|
-
In addition to setting ``InputStep`` to a leaf configuration state, this
|
444
|
+
In addition to setting ``InputStep`` to a 'leaf' configuration state, this
|
558
445
|
method also updates the ``OutputSlots`` to include all of the dataset keys
|
559
446
|
in the input data specification file. This allows for future use of
|
560
|
-
specific datasets instead of only
|
447
|
+
*specific* datasets instead of only *all* of them.
|
561
448
|
|
562
449
|
Parameters
|
563
450
|
----------
|
564
|
-
|
565
|
-
The configuration of
|
566
|
-
|
451
|
+
step_config
|
452
|
+
The internal configuration of this ``Step``, i.e. it should not include
|
453
|
+
the ``Step's`` name.
|
567
454
|
combined_implementations
|
568
455
|
The configuration for any implementations to be combined.
|
569
456
|
input_data_config
|
570
457
|
The input data configuration for the entire pipeline.
|
571
458
|
"""
|
572
459
|
super().set_configuration_state(
|
573
|
-
|
460
|
+
step_config, combined_implementations, input_data_config
|
574
461
|
)
|
575
462
|
for input_data_key in input_data_config:
|
576
463
|
self.output_slots[input_data_key] = OutputSlot(name=input_data_key)
|
@@ -601,19 +488,185 @@ class HierarchicalStep(Step):
|
|
601
488
|
|
602
489
|
See :class:`Step` for inherited attributes.
|
603
490
|
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
491
|
+
Parameters
|
492
|
+
----------
|
493
|
+
nodes
|
494
|
+
All sub-nodes (i.e. sub-``Steps``) that make up this ``HierarchicalStep``.
|
495
|
+
edges
|
496
|
+
The :class:`~easylink.graph_components.EdgeParams` of the sub-nodes.
|
497
|
+
step_graph
|
498
|
+
The :class:`~easylink.graph_components.StepGraph` i.e. the directed acyclic
|
499
|
+
graph (DAG) of sub-nodes and their edges that make up this ``HierarchicalStep``.
|
500
|
+
user_configurable
|
501
|
+
Whether or not the ``HierarchicalStep`` is user-configurable. It is a convenience
|
502
|
+
attribute to allow for back-end ``HierarchicalStep`` creation that are not
|
503
|
+
user-facing (i.e. they do not need to provide a 'substeps' configuration key).
|
609
504
|
|
610
505
|
"""
|
611
506
|
|
507
|
+
def __init__(
|
508
|
+
self,
|
509
|
+
step_name,
|
510
|
+
name=None,
|
511
|
+
input_slots=(),
|
512
|
+
output_slots=(),
|
513
|
+
nodes=(),
|
514
|
+
edges=(),
|
515
|
+
input_slot_mappings=(),
|
516
|
+
output_slot_mappings=(),
|
517
|
+
user_configurable=True,
|
518
|
+
):
|
519
|
+
super().__init__(
|
520
|
+
step_name,
|
521
|
+
name,
|
522
|
+
input_slots,
|
523
|
+
output_slots,
|
524
|
+
input_slot_mappings,
|
525
|
+
output_slot_mappings,
|
526
|
+
)
|
527
|
+
self.nodes = nodes
|
528
|
+
"""All sub-nodes (i.e. sub-``Steps``) that make up this ``HierarchicalStep``."""
|
529
|
+
for node in self.nodes:
|
530
|
+
node.set_parent_step(self)
|
531
|
+
self.edges = edges
|
532
|
+
"""The :class:`~easylink.graph_components.EdgeParams` of the sub-nodes."""
|
533
|
+
self.step_graph = self._get_step_graph(nodes, edges)
|
534
|
+
"""The :class:`~easylink.graph_components.StepGraph` i.e. the directed acyclic
|
535
|
+
graph (DAG) of sub-nodes and their edges that make up this ``HierarchicalStep``."""
|
536
|
+
self.user_configurable = user_configurable
|
537
|
+
"""Whether or not the ``HierarchicalStep`` is user-configurable. It is a convenience
|
538
|
+
attribute to allow for back-end ``HierarchicalStep`` creation that are not
|
539
|
+
user-facing (i.e. they do not need to provide a 'substeps' configuration key)."""
|
540
|
+
|
612
541
|
@property
|
613
542
|
def config_key(self):
|
614
543
|
"""The pipeline specification key required for a ``HierarchicalStep``."""
|
615
544
|
return "substeps"
|
616
545
|
|
546
|
+
def validate_step(
|
547
|
+
self,
|
548
|
+
step_config: LayeredConfigTree,
|
549
|
+
combined_implementations: LayeredConfigTree,
|
550
|
+
input_data_config: LayeredConfigTree,
|
551
|
+
) -> dict[str, list[str]]:
|
552
|
+
"""Validates the ``HierarchicalStep``.
|
553
|
+
|
554
|
+
Parameters
|
555
|
+
----------
|
556
|
+
step_config
|
557
|
+
The internal configuration of this ``Step``, i.e. it should not include
|
558
|
+
the ``Step's`` name.
|
559
|
+
combined_implementations
|
560
|
+
The configuration for any implementations to be combined.
|
561
|
+
input_data_config
|
562
|
+
The input data configuration for the entire pipeline.
|
563
|
+
|
564
|
+
Returns
|
565
|
+
-------
|
566
|
+
A dictionary of errors, where the keys are the ``HierarchicalStep``
|
567
|
+
name and the values are lists of error messages associated with the
|
568
|
+
given ``HierarchicalStep``.
|
569
|
+
|
570
|
+
Notes
|
571
|
+
-----
|
572
|
+
A ``HierarchicalStep`` can be in either a "leaf" or a "non-leaf" configuration
|
573
|
+
state and the validation process is different for each.
|
574
|
+
|
575
|
+
If the ``HierarchicalStep`` does not validate (i.e. errors are found and
|
576
|
+
the returned dictionary is non-empty), the tool will exit and the pipeline
|
577
|
+
will not run.
|
578
|
+
|
579
|
+
We attempt to batch error messages as much as possible, but there may be
|
580
|
+
times where the configuration is so ill-formed that we are unable to handle
|
581
|
+
all issues in one pass. In these cases, new errors may be found after the
|
582
|
+
initial ones are handled.
|
583
|
+
"""
|
584
|
+
if self.user_configurable:
|
585
|
+
if self.config_key in step_config:
|
586
|
+
step_config = step_config[self.config_key]
|
587
|
+
else:
|
588
|
+
# This is a leaf step
|
589
|
+
return super().validate_step(
|
590
|
+
step_config, combined_implementations, input_data_config
|
591
|
+
)
|
592
|
+
return self._validate_step_graph(
|
593
|
+
step_config, combined_implementations, input_data_config
|
594
|
+
)
|
595
|
+
|
596
|
+
def set_configuration_state(
|
597
|
+
self,
|
598
|
+
step_config: LayeredConfigTree,
|
599
|
+
combined_implementations: LayeredConfigTree,
|
600
|
+
input_data_config: LayeredConfigTree,
|
601
|
+
) -> None:
|
602
|
+
"""Sets the configuration state.
|
603
|
+
|
604
|
+
The configuration state of a ``HierarchicalStep`` depends on (1) whether
|
605
|
+
or not it is :attr:`user_configurable` and (2) whether or not the
|
606
|
+
:attr:`config_key` exists in the pipeline specification file.
|
607
|
+
|
608
|
+
Parameters
|
609
|
+
----------
|
610
|
+
step_config
|
611
|
+
The internal configuration of this ``Step``, i.e. it should not include
|
612
|
+
the ``Step's`` name.
|
613
|
+
combined_implementations
|
614
|
+
The configuration for any implementations to be combined.
|
615
|
+
input_data_config
|
616
|
+
The input data configuration for the entire pipeline.
|
617
|
+
"""
|
618
|
+
if self.user_configurable:
|
619
|
+
if self.config_key in step_config:
|
620
|
+
step_config = step_config[self.config_key]
|
621
|
+
configuration_state_type = NonLeafConfigurationState
|
622
|
+
else:
|
623
|
+
configuration_state_type = LeafConfigurationState
|
624
|
+
else:
|
625
|
+
# Substeps must be used, so we require non-leaf here
|
626
|
+
configuration_state_type = NonLeafConfigurationState
|
627
|
+
self._configuration_state = configuration_state_type(
|
628
|
+
self, step_config, combined_implementations, input_data_config
|
629
|
+
)
|
630
|
+
|
631
|
+
##################
|
632
|
+
# Helper methods #
|
633
|
+
##################
|
634
|
+
|
635
|
+
def _get_step_graph(self, nodes: list[Step], edges: list[EdgeParams]) -> StepGraph:
|
636
|
+
"""Creates a :class:`~easylink.graph_components.StepGraph` from the nodes and edges the step was initialized with."""
|
637
|
+
step_graph = StepGraph()
|
638
|
+
for step in nodes:
|
639
|
+
step_graph.add_node_from_step(step)
|
640
|
+
for edge in edges:
|
641
|
+
step_graph.add_edge_from_params(edge)
|
642
|
+
return step_graph
|
643
|
+
|
644
|
+
def _validate_step_graph(
|
645
|
+
self,
|
646
|
+
step_config: LayeredConfigTree,
|
647
|
+
combined_implementations: LayeredConfigTree,
|
648
|
+
input_data_config: LayeredConfigTree,
|
649
|
+
) -> dict[str, list[str]]:
|
650
|
+
"""Validates the nodes of a :class:`~easylink.graph_components.StepGraph`."""
|
651
|
+
errors = {}
|
652
|
+
for node in self.step_graph.nodes:
|
653
|
+
step = self.step_graph.nodes[node]["step"]
|
654
|
+
if isinstance(step, IOStep):
|
655
|
+
continue
|
656
|
+
else:
|
657
|
+
if step.name not in step_config:
|
658
|
+
step_errors = {f"step {step.name}": ["The step is not configured."]}
|
659
|
+
else:
|
660
|
+
step_errors = step.validate_step(
|
661
|
+
step_config[step.name], combined_implementations, input_data_config
|
662
|
+
)
|
663
|
+
if step_errors:
|
664
|
+
errors.update(step_errors)
|
665
|
+
extra_steps = set(step_config.keys()) - set(self.step_graph.nodes)
|
666
|
+
for extra_step in extra_steps:
|
667
|
+
errors[f"step {extra_step}"] = [f"{extra_step} is not a valid step."]
|
668
|
+
return errors
|
669
|
+
|
617
670
|
|
618
671
|
class TemplatedStep(Step, ABC):
|
619
672
|
"""A type of :class:`Step` that may contain multiplicity.
|
@@ -641,8 +694,12 @@ class TemplatedStep(Step, ABC):
|
|
641
694
|
template_step.input_slots.values(),
|
642
695
|
template_step.output_slots.values(),
|
643
696
|
)
|
697
|
+
self.step_graph = None
|
698
|
+
"""The :class:`~easylink.graph_components.StepGraph` i.e. the directed acyclic
|
699
|
+
graph (DAG) of sub-nodes and their edges that make up this ``TemplatedStep``."""
|
644
700
|
self.template_step = template_step
|
645
701
|
"""The ``Step`` to be templated."""
|
702
|
+
|
646
703
|
self.template_step.set_parent_step(self)
|
647
704
|
|
648
705
|
@property
|
@@ -716,7 +773,8 @@ class TemplatedStep(Step, ABC):
|
|
716
773
|
Parameters
|
717
774
|
----------
|
718
775
|
step_config
|
719
|
-
The configuration of this ``
|
776
|
+
The internal configuration of this ``Step``, i.e. it should not include
|
777
|
+
the ``Step's`` name.
|
720
778
|
combined_implementations
|
721
779
|
The configuration for any implementations to be combined.
|
722
780
|
input_data_config
|
@@ -730,7 +788,7 @@ class TemplatedStep(Step, ABC):
|
|
730
788
|
|
731
789
|
Notes
|
732
790
|
-----
|
733
|
-
If the ``
|
791
|
+
If the ``TemplatedStep`` does not validate (i.e. errors are found and the returned
|
734
792
|
dictionary is non-empty), the tool will exit and the pipeline will not run.
|
735
793
|
|
736
794
|
We attempt to batch error messages as much as possible, but there may be
|
@@ -739,6 +797,7 @@ class TemplatedStep(Step, ABC):
|
|
739
797
|
initial ones are handled.
|
740
798
|
"""
|
741
799
|
if not self.config_key in step_config:
|
800
|
+
# This is a leaf step
|
742
801
|
return self.template_step.validate_step(
|
743
802
|
step_config, combined_implementations, input_data_config
|
744
803
|
)
|
@@ -770,51 +829,32 @@ class TemplatedStep(Step, ABC):
|
|
770
829
|
]
|
771
830
|
parallel_errors.update(
|
772
831
|
self.template_step.validate_step(
|
773
|
-
parallel_config,
|
832
|
+
LayeredConfigTree(parallel_config),
|
833
|
+
combined_implementations,
|
834
|
+
input_data_config,
|
774
835
|
)
|
775
836
|
)
|
776
837
|
if parallel_errors:
|
777
838
|
errors[f"step {self.name}"][f"{self.node_prefix}_{i+1}"] = parallel_errors
|
778
839
|
return errors
|
779
840
|
|
780
|
-
def _get_config(self, step_config: LayeredConfigTree) -> LayeredConfigTree:
|
781
|
-
"""Convenience method to get the ``TemplatedStep's`` configuration.
|
782
|
-
|
783
|
-
``TemplatedSteps`` may include multiplicity. In such cases, their configurations
|
784
|
-
must be modified to include the expanded ``Steps``.
|
785
|
-
|
786
|
-
Parameters
|
787
|
-
----------
|
788
|
-
step_config
|
789
|
-
The high-level configuration of this ``TemplatedStep``.
|
790
|
-
|
791
|
-
Returns
|
792
|
-
-------
|
793
|
-
The expanded sub-configuration of this ``TemplatedStep`` based on the
|
794
|
-
:attr:`Step.config_key` and expanded to include all looped or parallelized
|
795
|
-
sub-``Steps``).
|
796
|
-
"""
|
797
|
-
if self.config_key in step_config:
|
798
|
-
expanded_step_config = LayeredConfigTree()
|
799
|
-
for i, sub_config in enumerate(step_config[self.config_key]):
|
800
|
-
expanded_step_config.update(
|
801
|
-
{f"{self.name}_{self.node_prefix}_{i+1}": sub_config}
|
802
|
-
)
|
803
|
-
return expanded_step_config
|
804
|
-
return step_config
|
805
|
-
|
806
841
|
def set_configuration_state(
|
807
842
|
self,
|
808
|
-
|
843
|
+
step_config: LayeredConfigTree,
|
809
844
|
combined_implementations: LayeredConfigTree,
|
810
845
|
input_data_config: LayeredConfigTree,
|
811
846
|
):
|
812
|
-
"""Sets the configuration state
|
847
|
+
"""Sets the configuration state to 'non-leaf'.
|
848
|
+
|
849
|
+
In addition to setting the configuration state, this also updates the
|
850
|
+
:class:`~easylink.graph_components.StepGraph` and
|
851
|
+
:class:`SlotMappings<easylink.graph_components.SlotMapping>`.
|
813
852
|
|
814
853
|
Parameters
|
815
854
|
----------
|
816
|
-
|
817
|
-
The configuration of
|
855
|
+
step_config
|
856
|
+
The internal configuration of this ``Step``, i.e. it should not include
|
857
|
+
the ``Step's`` name.
|
818
858
|
combined_implementations
|
819
859
|
The configuration for any implementations to be combined.
|
820
860
|
input_data_config
|
@@ -828,7 +868,6 @@ class TemplatedStep(Step, ABC):
|
|
828
868
|
:class:`~easylink.implementation.Implementation`, i.e. the one with a
|
829
869
|
:class:`LeafConfigurationState`.
|
830
870
|
"""
|
831
|
-
step_config = parent_config[self.name]
|
832
871
|
if self.config_key not in step_config:
|
833
872
|
# Special handle the step_graph update
|
834
873
|
self.step_graph = StepGraph()
|
@@ -857,6 +896,36 @@ class TemplatedStep(Step, ABC):
|
|
857
896
|
self, expanded_config, combined_implementations, input_data_config
|
858
897
|
)
|
859
898
|
|
899
|
+
##################
|
900
|
+
# Helper Methods #
|
901
|
+
##################
|
902
|
+
|
903
|
+
def _get_config(self, step_config: LayeredConfigTree) -> LayeredConfigTree:
|
904
|
+
"""Convenience method to get the ``TemplatedStep's`` configuration.
|
905
|
+
|
906
|
+
``TemplatedSteps`` may include multiplicity. In such cases, their configurations
|
907
|
+
must be modified to include the expanded ``Steps``.
|
908
|
+
|
909
|
+
Parameters
|
910
|
+
----------
|
911
|
+
step_config
|
912
|
+
The high-level configuration of this ``TemplatedStep``.
|
913
|
+
|
914
|
+
Returns
|
915
|
+
-------
|
916
|
+
The expanded sub-configuration of this ``TemplatedStep`` based on the
|
917
|
+
:attr:`Step.config_key` and expanded to include all looped or parallelized
|
918
|
+
sub-``Steps``).
|
919
|
+
"""
|
920
|
+
if self.config_key in step_config:
|
921
|
+
expanded_step_config = LayeredConfigTree()
|
922
|
+
for i, sub_config in enumerate(step_config[self.config_key]):
|
923
|
+
expanded_step_config.update(
|
924
|
+
{f"{self.name}_{self.node_prefix}_{i+1}": sub_config}
|
925
|
+
)
|
926
|
+
return expanded_step_config
|
927
|
+
return step_config
|
928
|
+
|
860
929
|
def _duplicate_template_step(self) -> Step:
|
861
930
|
"""Makes a duplicate of the template ``Step``.
|
862
931
|
|
@@ -1064,11 +1133,69 @@ class ParallelStep(TemplatedStep):
|
|
1064
1133
|
return {"input": input_mappings, "output": output_mappings}
|
1065
1134
|
|
1066
1135
|
|
1067
|
-
class
|
1068
|
-
"""A
|
1136
|
+
class EmbarrassinglyParallelStep(Step):
|
1137
|
+
"""A step that is run in parallel on the backend.
|
1069
1138
|
|
1070
|
-
|
1071
|
-
|
1139
|
+
An ``EmbarrassinglyParallelStep`` is different than a :class:`ParallelStep`
|
1140
|
+
in that it is not configured by the user to be run in parallel - it completely
|
1141
|
+
happens on the back end for performance reasons. As such, note that it inherits
|
1142
|
+
from :class:`Step` instead of :class:`TemplatedStep`.
|
1143
|
+
|
1144
|
+
See :class:`Step` for inherited attributes.
|
1145
|
+
"""
|
1146
|
+
|
1147
|
+
def __init__(
|
1148
|
+
self,
|
1149
|
+
step_name: str,
|
1150
|
+
input_slots: Iterable[InputSlot],
|
1151
|
+
output_slots: Iterable[OutputSlot],
|
1152
|
+
) -> None:
|
1153
|
+
super().__init__(step_name, input_slots=input_slots, output_slots=output_slots)
|
1154
|
+
self._validate()
|
1155
|
+
|
1156
|
+
def _validate(self) -> None:
|
1157
|
+
"""Validates the ``EmbarrassinglyParallelStep``.
|
1158
|
+
|
1159
|
+
``EmbarrassinglyParallelSteps`` are not configured by the user to be run
|
1160
|
+
in parallel. Since it happens on the back end, we need to do somewhat unique
|
1161
|
+
validations during construction. Specifically,
|
1162
|
+
- one and only one :class:`~easylink.graph_components.InputSlot` *must* include
|
1163
|
+
a :attr:`~easylink.graph_components.InputSlot.splitter` method.
|
1164
|
+
- all :class:`OutputSlots<easylink.graph_components.OutputSlot>` *must* include
|
1165
|
+
an :attr:`~easylink.graph_components.OutputSlot.aggregator` method.
|
1166
|
+
"""
|
1167
|
+
errors = []
|
1168
|
+
# assert that only one input slot has a splitter assigned
|
1169
|
+
splitters = {
|
1170
|
+
slot.name: slot.splitter.__name__
|
1171
|
+
for slot in self.input_slots.values()
|
1172
|
+
if slot.splitter
|
1173
|
+
}
|
1174
|
+
if len(splitters) == 0:
|
1175
|
+
errors.append(
|
1176
|
+
f"EmbarrassinglyParallelStep '{self.step_name}' does not have any input slots with a "
|
1177
|
+
"splitter method assigned; one and only one input slot must have a splitter."
|
1178
|
+
)
|
1179
|
+
if len(splitters) > 1:
|
1180
|
+
errors.append(
|
1181
|
+
f"EmbarrassinglyParallelStep '{self.step_name}' has multiple input slots with "
|
1182
|
+
"splitter methods assigned; one and only one input slot must have a splitter.\n"
|
1183
|
+
f"Input slots with splitters: {splitters}"
|
1184
|
+
)
|
1185
|
+
missing_aggregators = [
|
1186
|
+
slot.name for slot in self.output_slots.values() if not slot.aggregator
|
1187
|
+
]
|
1188
|
+
if len(missing_aggregators) != 0:
|
1189
|
+
errors.append(
|
1190
|
+
f"EmbarrassinglyParallelStep '{self.step_name}' has output slots without "
|
1191
|
+
f"aggregator methods assigned: {missing_aggregators}"
|
1192
|
+
)
|
1193
|
+
if errors:
|
1194
|
+
raise ValueError("\n".join(errors))
|
1195
|
+
|
1196
|
+
|
1197
|
+
class ChoiceStep(Step):
|
1198
|
+
"""A type of :class:`Step` that allows for choosing from a set of options.
|
1072
1199
|
|
1073
1200
|
See :class:`Step` for inherited attributes.
|
1074
1201
|
|
@@ -1082,7 +1209,7 @@ class ChoiceStep(Step):
|
|
1082
1209
|
All required :class:`OutputSlots<easylink.graph_components.OutputSlot>`.
|
1083
1210
|
choices
|
1084
1211
|
A dictionary of choices, where the keys are the names/types of choices and
|
1085
|
-
the values are dictionaries containing that type's
|
1212
|
+
the values are dictionaries containing that type's ``Step`` and related
|
1086
1213
|
:class:`SlotMappings<easylink.graph_components.SlotMapping>`.
|
1087
1214
|
|
1088
1215
|
Notes
|
@@ -1091,6 +1218,13 @@ class ChoiceStep(Step):
|
|
1091
1218
|
:attr:`Step.config_key` in the pipeline specification file. Instead, the pipeline
|
1092
1219
|
configuration must contain a 'type' key that specifies which option to choose.
|
1093
1220
|
|
1221
|
+
The :attr:`choices` dictionary must contain the choice type names as the outer
|
1222
|
+
keys. The values of each of these types is then another dictionary containing
|
1223
|
+
'step', 'input_slot_mappings', and 'output_slot_mappings' keys with their
|
1224
|
+
corresponding values.
|
1225
|
+
|
1226
|
+
Each choice type must specify a *single* ``Step`` and its associated ``SlotMappings``.
|
1227
|
+
Any choice paths that require multiple sub-steps should specify a :class:`HierarchicalStep`.
|
1094
1228
|
"""
|
1095
1229
|
|
1096
1230
|
def __init__(
|
@@ -1098,9 +1232,7 @@ class ChoiceStep(Step):
|
|
1098
1232
|
step_name: str,
|
1099
1233
|
input_slots: Iterable[InputSlot],
|
1100
1234
|
output_slots: Iterable[OutputSlot],
|
1101
|
-
choices: dict[
|
1102
|
-
str, dict[str, list[Step | EdgeParams | InputSlotMapping | OutputSlotMapping]]
|
1103
|
-
],
|
1235
|
+
choices: dict[str, dict[str, Step | SlotMapping]],
|
1104
1236
|
) -> None:
|
1105
1237
|
super().__init__(
|
1106
1238
|
step_name,
|
@@ -1123,7 +1255,8 @@ class ChoiceStep(Step):
|
|
1123
1255
|
Parameters
|
1124
1256
|
----------
|
1125
1257
|
step_config
|
1126
|
-
The configuration of this ``
|
1258
|
+
The internal configuration of this ``Step``, i.e. it should not include
|
1259
|
+
the ``Step's`` name.
|
1127
1260
|
combined_implementations
|
1128
1261
|
The configuration for any implementations to be combined.
|
1129
1262
|
input_data_config
|
@@ -1136,8 +1269,6 @@ class ChoiceStep(Step):
|
|
1136
1269
|
|
1137
1270
|
Notes
|
1138
1271
|
-----
|
1139
|
-
A ``ChoiceStep`` by definition must be set with a :class:`NonLeafConfigurationState`.
|
1140
|
-
|
1141
1272
|
If the ``Step`` does not validate (i.e. errors are found and the returned
|
1142
1273
|
dictionary is non-empty), the tool will exit and the pipeline will not run.
|
1143
1274
|
|
@@ -1146,16 +1277,9 @@ class ChoiceStep(Step):
|
|
1146
1277
|
all issues in one pass. In these cases, new errors may be found after the
|
1147
1278
|
initial ones are handled.
|
1148
1279
|
|
1149
|
-
We update the :class:`easylink.graph_components.StepGraph` and ``SlotMappings``
|
1150
|
-
in :meth:`validate_step` (as opposed to in :meth:`set_configuration_state`
|
1151
|
-
as is done in :class:`TemplatedStep`) because :meth:`validate_step` is called
|
1152
|
-
prior to :meth:`set_configuration_state`, but the validations itself actually
|
1153
|
-
requires the updated ``StepGraph`` and ``SlotMappings``.
|
1154
|
-
|
1155
1280
|
We do not attempt to validate the subgraph here if the 'type' key is unable
|
1156
1281
|
to be validated.
|
1157
1282
|
"""
|
1158
|
-
|
1159
1283
|
chosen_type = step_config.get("type")
|
1160
1284
|
# Handle problems with the 'type' key
|
1161
1285
|
if not chosen_type:
|
@@ -1163,104 +1287,64 @@ class ChoiceStep(Step):
|
|
1163
1287
|
if chosen_type not in self.choices:
|
1164
1288
|
return {
|
1165
1289
|
f"step {self.name}": [
|
1166
|
-
f"'{step_config
|
1290
|
+
f"'{step_config.type}' is not a supported 'type'. Valid choices are: {list(self.choices)}."
|
1167
1291
|
]
|
1168
1292
|
}
|
1169
|
-
|
1170
|
-
|
1293
|
+
|
1294
|
+
chosen_step = self.choices[chosen_type]["step"]
|
1171
1295
|
chosen_step_config = LayeredConfigTree(
|
1172
1296
|
{key: value for key, value in step_config.items() if key != "type"}
|
1173
1297
|
)
|
1174
|
-
|
1175
|
-
if set(allowable_steps) != set(chosen_step_config):
|
1298
|
+
if chosen_step.name not in chosen_step_config:
|
1176
1299
|
return {
|
1177
1300
|
f"step {self.name}": [
|
1178
|
-
f"
|
1301
|
+
f"'{chosen_step.name}' is not configured. Confirm you have specified "
|
1302
|
+
f"the correct steps for the '{chosen_type}' type."
|
1179
1303
|
]
|
1180
1304
|
}
|
1181
|
-
|
1182
|
-
# HACK: Update the step graph and mappings here because we need them for validation
|
1183
|
-
self.step_graph = self._update_step_graph(subgraph)
|
1184
|
-
self.slot_mappings = self._update_slot_mappings(subgraph)
|
1185
1305
|
# NOTE: A ChoiceStep is by definition non-leaf step
|
1186
|
-
return
|
1187
|
-
chosen_step_config, combined_implementations, input_data_config
|
1306
|
+
return chosen_step.validate_step(
|
1307
|
+
chosen_step_config[chosen_step.name], combined_implementations, input_data_config
|
1188
1308
|
)
|
1189
1309
|
|
1190
1310
|
def set_configuration_state(
|
1191
1311
|
self,
|
1192
|
-
|
1312
|
+
step_config: LayeredConfigTree,
|
1193
1313
|
combined_implementations: LayeredConfigTree,
|
1194
1314
|
input_data_config: LayeredConfigTree,
|
1195
1315
|
):
|
1196
|
-
"""Sets the configuration state
|
1316
|
+
"""Sets the configuration state to 'non-leaf'.
|
1317
|
+
|
1318
|
+
In addition to setting the configuration state, this also updates the
|
1319
|
+
:class:`~easylink.graph_components.StepGraph` and
|
1320
|
+
:class:`SlotMappings<easylink.graph_components.SlotMapping>`.
|
1197
1321
|
|
1198
1322
|
Parameters
|
1199
1323
|
----------
|
1200
|
-
|
1201
|
-
The configuration of
|
1324
|
+
step_config
|
1325
|
+
The internal configuration of this ``Step``, i.e. it should not include
|
1326
|
+
the ``Step's`` name.
|
1202
1327
|
combined_implementations
|
1203
1328
|
The configuration for any implementations to be combined.
|
1204
1329
|
input_data_config
|
1205
1330
|
The input data configuration for the entire pipeline.
|
1206
|
-
|
1207
|
-
Notes
|
1208
|
-
-----
|
1209
|
-
We update the :class:`easylink.graph_components.StepGraph` and ``SlotMappings``
|
1210
|
-
in :meth:`validate_step` (as opposed to in :meth:`set_configuration_state`
|
1211
|
-
as is done in :class:`TemplatedStep`) because :meth:`validate_step` is called
|
1212
|
-
prior to :meth:`set_configuration_state`, but the validations itself actually
|
1213
|
-
requires the updated ``StepGraph`` and ``SlotMappings``.
|
1214
1331
|
"""
|
1332
|
+
choice = self.choices[step_config["type"]]
|
1333
|
+
self.step_graph = StepGraph()
|
1334
|
+
self.step_graph.add_node_from_step(choice["step"])
|
1335
|
+
self.slot_mappings = {
|
1336
|
+
"input": choice["input_slot_mappings"],
|
1337
|
+
"output": choice["output_slot_mappings"],
|
1338
|
+
}
|
1215
1339
|
|
1216
|
-
|
1217
|
-
{key: value for key, value in
|
1340
|
+
chosen_step_config = LayeredConfigTree(
|
1341
|
+
{key: value for key, value in step_config.items() if key != "type"}
|
1218
1342
|
)
|
1219
|
-
# ChoiceSteps by definition
|
1343
|
+
# ChoiceSteps by definition are in a NonLeafConfigurationState
|
1220
1344
|
self._configuration_state = NonLeafConfigurationState(
|
1221
|
-
self,
|
1345
|
+
self, chosen_step_config, combined_implementations, input_data_config
|
1222
1346
|
)
|
1223
1347
|
|
1224
|
-
@staticmethod
|
1225
|
-
def _update_step_graph(subgraph: dict[str, Any]) -> StepGraph:
|
1226
|
-
"""Updates the :class:`~easylink.graph_components.StepGraph` with the choice.
|
1227
|
-
|
1228
|
-
Parameters
|
1229
|
-
----------
|
1230
|
-
subgraph
|
1231
|
-
Subgraph parameters (nodes, edges, and slot mappings) for the chosen type.
|
1232
|
-
|
1233
|
-
Returns
|
1234
|
-
-------
|
1235
|
-
The updated ``StepGraph`` for the chosen type.
|
1236
|
-
"""
|
1237
|
-
nodes = subgraph["nodes"]
|
1238
|
-
edges = subgraph["edges"]
|
1239
|
-
|
1240
|
-
graph = StepGraph()
|
1241
|
-
for node in nodes:
|
1242
|
-
graph.add_node_from_step(node)
|
1243
|
-
for edge in edges:
|
1244
|
-
graph.add_edge_from_params(edge)
|
1245
|
-
return graph
|
1246
|
-
|
1247
|
-
@staticmethod
|
1248
|
-
def _update_slot_mappings(subgraph: dict[str, Any]) -> dict[str, list[SlotMapping]]:
|
1249
|
-
"""Updates the :class:`SlotMappings<easylink.graph_components.SlotMapping>` to the choice type.
|
1250
|
-
|
1251
|
-
Parameters
|
1252
|
-
----------
|
1253
|
-
sub_graph
|
1254
|
-
Subgraph parameters (nodes, edges, and slot mappings) for the chosen type.
|
1255
|
-
|
1256
|
-
Returns
|
1257
|
-
-------
|
1258
|
-
Updated ``SlotMappings`` that match the choice type.
|
1259
|
-
"""
|
1260
|
-
input_mappings = subgraph["input_slot_mappings"]
|
1261
|
-
output_mappings = subgraph["output_slot_mappings"]
|
1262
|
-
return {"input": input_mappings, "output": output_mappings}
|
1263
|
-
|
1264
1348
|
|
1265
1349
|
class ConfigurationState(ABC):
|
1266
1350
|
"""A given :class:`Step's<Step>` configuration state.
|
@@ -1275,8 +1359,9 @@ class ConfigurationState(ABC):
|
|
1275
1359
|
----------
|
1276
1360
|
step
|
1277
1361
|
The ``Step`` this ``ConfigurationState`` is tied to.
|
1278
|
-
|
1279
|
-
The
|
1362
|
+
step_config
|
1363
|
+
The internal configuration of this ``Step`` we are setting the state
|
1364
|
+
for; it should not include the ``Step's`` name.
|
1280
1365
|
combined_implementations
|
1281
1366
|
The configuration for any implementations to be combined.
|
1282
1367
|
input_data_config
|
@@ -1287,14 +1372,15 @@ class ConfigurationState(ABC):
|
|
1287
1372
|
def __init__(
|
1288
1373
|
self,
|
1289
1374
|
step: Step,
|
1290
|
-
|
1375
|
+
step_config: LayeredConfigTree,
|
1291
1376
|
combined_implementations: LayeredConfigTree,
|
1292
1377
|
input_data_config: LayeredConfigTree,
|
1293
1378
|
):
|
1294
1379
|
self._step = step
|
1295
1380
|
"""The ``Step`` this ``ConfigurationState`` is tied to."""
|
1296
|
-
self.
|
1297
|
-
"""The
|
1381
|
+
self.step_config = step_config
|
1382
|
+
"""The internal configuration of this ``Step`` we are setting the state
|
1383
|
+
for; it should not include the ``Step's`` name."""
|
1298
1384
|
self.combined_implementations = combined_implementations
|
1299
1385
|
"""The relevant configuration if the ``Step's`` ``Implementation``
|
1300
1386
|
has been requested to be combined with that of a different ``Step``."""
|
@@ -1335,15 +1421,15 @@ class LeafConfigurationState(ConfigurationState):
|
|
1335
1421
|
@property
|
1336
1422
|
def is_combined(self) -> bool:
|
1337
1423
|
"""Whether or not this ``Step`` is combined with another ``Step``."""
|
1338
|
-
return
|
1424
|
+
return COMBINED_IMPLEMENTATION_KEY in self.step_config
|
1339
1425
|
|
1340
1426
|
@property
|
1341
1427
|
def implementation_config(self) -> LayeredConfigTree:
|
1342
1428
|
"""The ``Step's`` specific ``Implementation`` configuration."""
|
1343
1429
|
return (
|
1344
|
-
self.combined_implementations[self.
|
1430
|
+
self.combined_implementations[self.step_config[COMBINED_IMPLEMENTATION_KEY]]
|
1345
1431
|
if self.is_combined
|
1346
|
-
else self.
|
1432
|
+
else self.step_config.implementation
|
1347
1433
|
)
|
1348
1434
|
|
1349
1435
|
def get_implementation_graph(self) -> ImplementationGraph:
|
@@ -1357,25 +1443,30 @@ class LeafConfigurationState(ConfigurationState):
|
|
1357
1443
|
-------
|
1358
1444
|
The ``ImplementationGraph`` related to this ``Step``.
|
1359
1445
|
"""
|
1360
|
-
|
1446
|
+
step = self._step
|
1361
1447
|
implementation_graph = ImplementationGraph()
|
1362
|
-
implementation_node_name = self._step.implementation_node_name
|
1363
1448
|
if self.is_combined:
|
1449
|
+
if isinstance(step, EmbarrassinglyParallelStep):
|
1450
|
+
raise NotImplementedError(
|
1451
|
+
"Combining implementations with embarrassingly parallel steps "
|
1452
|
+
"is not yet supported."
|
1453
|
+
)
|
1364
1454
|
implementation = PartialImplementation(
|
1365
|
-
combined_name=self.
|
1366
|
-
schema_step=
|
1367
|
-
input_slots=
|
1368
|
-
output_slots=
|
1455
|
+
combined_name=self.step_config[COMBINED_IMPLEMENTATION_KEY],
|
1456
|
+
schema_step=step.step_name,
|
1457
|
+
input_slots=step.input_slots.values(),
|
1458
|
+
output_slots=step.output_slots.values(),
|
1369
1459
|
)
|
1370
1460
|
else:
|
1371
1461
|
implementation = Implementation(
|
1372
|
-
schema_steps=[
|
1462
|
+
schema_steps=[step.step_name],
|
1373
1463
|
implementation_config=self.implementation_config,
|
1374
|
-
input_slots=
|
1375
|
-
output_slots=
|
1464
|
+
input_slots=step.input_slots.values(),
|
1465
|
+
output_slots=step.output_slots.values(),
|
1466
|
+
is_embarrassingly_parallel=isinstance(step, EmbarrassinglyParallelStep),
|
1376
1467
|
)
|
1377
1468
|
implementation_graph.add_node_from_implementation(
|
1378
|
-
implementation_node_name,
|
1469
|
+
step.implementation_node_name,
|
1379
1470
|
implementation=implementation,
|
1380
1471
|
)
|
1381
1472
|
return implementation_graph
|
@@ -1416,10 +1507,10 @@ class LeafConfigurationState(ConfigurationState):
|
|
1416
1507
|
for mapping in mappings:
|
1417
1508
|
# FIXME [MIC-5771]: Fix ParallelSteps
|
1418
1509
|
if (
|
1419
|
-
"input_data_file" in self.
|
1510
|
+
"input_data_file" in self.step_config
|
1420
1511
|
and edge.source_node == "pipeline_graph_input_data"
|
1421
1512
|
):
|
1422
|
-
edge.output_slot = self.
|
1513
|
+
edge.output_slot = self.step_config["input_data_file"]
|
1423
1514
|
imp_edge = mapping.remap_edge(edge)
|
1424
1515
|
implementation_edges.append(imp_edge)
|
1425
1516
|
else:
|
@@ -1441,8 +1532,10 @@ class NonLeafConfigurationState(ConfigurationState):
|
|
1441
1532
|
----------
|
1442
1533
|
step
|
1443
1534
|
The ``Step`` this ``ConfigurationState`` is tied to.
|
1444
|
-
|
1445
|
-
The
|
1535
|
+
step_config
|
1536
|
+
The internal configuration of this ``Step`` we are setting the state
|
1537
|
+
for; it should not include the ``Step's`` name (though it must include
|
1538
|
+
the sub-step names).
|
1446
1539
|
combined_implementations
|
1447
1540
|
The configuration for any implementations to be combined.
|
1448
1541
|
input_data_config
|
@@ -1473,16 +1566,17 @@ class NonLeafConfigurationState(ConfigurationState):
|
|
1473
1566
|
def __init__(
|
1474
1567
|
self,
|
1475
1568
|
step: Step,
|
1476
|
-
|
1569
|
+
step_config: LayeredConfigTree,
|
1477
1570
|
combined_implementations: LayeredConfigTree,
|
1478
1571
|
input_data_config: LayeredConfigTree,
|
1479
1572
|
):
|
1480
|
-
super().__init__(step,
|
1573
|
+
super().__init__(step, step_config, combined_implementations, input_data_config)
|
1481
1574
|
if not step.step_graph:
|
1482
1575
|
raise ValueError(
|
1483
1576
|
"NonLeafConfigurationState requires a subgraph upon which to operate, "
|
1484
1577
|
f"but Step {step.name} has no step graph."
|
1485
1578
|
)
|
1579
|
+
self._nodes = step.step_graph.nodes
|
1486
1580
|
self._configure_subgraph_steps()
|
1487
1581
|
|
1488
1582
|
def get_implementation_graph(self) -> ImplementationGraph:
|
@@ -1513,8 +1607,8 @@ class NonLeafConfigurationState(ConfigurationState):
|
|
1513
1607
|
|
1514
1608
|
def add_nodes(self, implementation_graph: ImplementationGraph) -> None:
|
1515
1609
|
"""Adds nodes for each ``Step`` to the ``ImplementationGraph``."""
|
1516
|
-
for node in self.
|
1517
|
-
step = self.
|
1610
|
+
for node in self._nodes:
|
1611
|
+
step = self._nodes[node]["step"]
|
1518
1612
|
implementation_graph.update(step.get_implementation_graph())
|
1519
1613
|
|
1520
1614
|
def add_edges(self, implementation_graph: ImplementationGraph) -> None:
|
@@ -1522,8 +1616,8 @@ class NonLeafConfigurationState(ConfigurationState):
|
|
1522
1616
|
for source, target, edge_attrs in self._step.step_graph.edges(data=True):
|
1523
1617
|
all_edges = []
|
1524
1618
|
edge = EdgeParams.from_graph_edge(source, target, edge_attrs)
|
1525
|
-
parent_source_step = self.
|
1526
|
-
parent_target_step = self.
|
1619
|
+
parent_source_step = self._nodes[source]["step"]
|
1620
|
+
parent_target_step = self._nodes[target]["step"]
|
1527
1621
|
|
1528
1622
|
source_edges = parent_source_step.get_implementation_edges(edge)
|
1529
1623
|
for source_edge in source_edges:
|
@@ -1559,7 +1653,7 @@ class NonLeafConfigurationState(ConfigurationState):
|
|
1559
1653
|
]
|
1560
1654
|
for mapping in mappings:
|
1561
1655
|
new_edge = mapping.remap_edge(edge)
|
1562
|
-
new_step = self.
|
1656
|
+
new_step = self._nodes[mapping.child_node]["step"]
|
1563
1657
|
imp_edges = new_step.get_implementation_edges(new_edge)
|
1564
1658
|
implementation_edges.extend(imp_edges)
|
1565
1659
|
elif edge.target_node == self._step.name:
|
@@ -1570,7 +1664,7 @@ class NonLeafConfigurationState(ConfigurationState):
|
|
1570
1664
|
]
|
1571
1665
|
for mapping in mappings:
|
1572
1666
|
new_edge = mapping.remap_edge(edge)
|
1573
|
-
new_step = self.
|
1667
|
+
new_step = self._nodes[mapping.child_node]["step"]
|
1574
1668
|
imp_edges = new_step.get_implementation_edges(new_edge)
|
1575
1669
|
implementation_edges.extend(imp_edges)
|
1576
1670
|
else:
|
@@ -1585,9 +1679,12 @@ class NonLeafConfigurationState(ConfigurationState):
|
|
1585
1679
|
This method recursively traverses the ``StepGraph`` and sets the configuration
|
1586
1680
|
state for each ``Step`` until reaching all leaf nodes.
|
1587
1681
|
"""
|
1588
|
-
|
1589
|
-
|
1590
|
-
|
1682
|
+
for node in self._nodes:
|
1683
|
+
step = self._nodes[node]["step"]
|
1684
|
+
# IOStep names never appear in configuration
|
1685
|
+
step_config = (
|
1686
|
+
self.step_config if isinstance(step, IOStep) else self.step_config[step.name]
|
1687
|
+
)
|
1591
1688
|
step.set_configuration_state(
|
1592
|
-
|
1689
|
+
step_config, self.combined_implementations, self.input_data_config
|
1593
1690
|
)
|