easylink 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- easylink/_version.py +1 -1
- easylink/graph_components.py +7 -3
- easylink/pipeline_schema.py +7 -7
- easylink/pipeline_schema_constants/__init__.py +11 -0
- easylink/pipeline_schema_constants/development.py +126 -120
- easylink/pipeline_schema_constants/testing.py +7 -3
- easylink/step.py +379 -347
- easylink/utilities/__init__.py +3 -2
- easylink/utilities/aggregator_utils.py +1 -0
- easylink/utilities/data_utils.py +98 -5
- easylink/utilities/general_utils.py +48 -10
- easylink/utilities/paths.py +9 -3
- easylink/utilities/splitter_utils.py +1 -0
- easylink/utilities/validation_utils.py +29 -0
- {easylink-0.1.7.dist-info → easylink-0.1.8.dist-info}/METADATA +1 -1
- {easylink-0.1.7.dist-info → easylink-0.1.8.dist-info}/RECORD +19 -19
- {easylink-0.1.7.dist-info → easylink-0.1.8.dist-info}/WHEEL +1 -1
- {easylink-0.1.7.dist-info → easylink-0.1.8.dist-info}/entry_points.txt +0 -0
- {easylink-0.1.7.dist-info → easylink-0.1.8.dist-info}/top_level.txt +0 -0
easylink/step.py
CHANGED
@@ -54,21 +54,18 @@ class Step:
|
|
54
54
|
Parameters
|
55
55
|
----------
|
56
56
|
step_name
|
57
|
-
The name of the pipeline step in the ``PipelineSchema``.
|
57
|
+
The name of the pipeline step in the ``PipelineSchema``. It must also match
|
58
|
+
the key in the implementation metadata file to be used to run this ``Step``.
|
58
59
|
name
|
59
|
-
The name of this
|
60
|
-
due to the need for disambiguation
|
61
|
-
|
62
|
-
|
63
|
-
("step_1_loop_1", etc).
|
60
|
+
The name of this ``Step's`` node in its :class:`easylink.graph_components.StepGraph`.
|
61
|
+
This can be different from the ``step_name`` due to the need for disambiguation
|
62
|
+
during the process of flattening the ``Stepgraph``, e.g. unrolling loops, etc.
|
63
|
+
For example, if step 1 is looped multiple times, each node would have a
|
64
|
+
``step_name`` of, perhaps, "step_1" but unique ``names`` ("step_1_loop_1", etc).
|
64
65
|
input_slots
|
65
66
|
All required :class:`InputSlots<easylink.graph_components.InputSlot>`.
|
66
67
|
output_slots
|
67
68
|
All required :class:`OutputSlots<easylink.graph_components.OutputSlot>`.
|
68
|
-
nodes
|
69
|
-
All sub-nodes (i.e. sub-``Steps``) of this particular ``Step`` instance.
|
70
|
-
edges
|
71
|
-
The :class:`~easylink.graph_components.EdgeParams` of this ``Step``.
|
72
69
|
input_slot_mappings
|
73
70
|
The :class:`InputSlotMapping<easylink.graph_components.InputSlotMapping>` of this ``Step``.
|
74
71
|
output_slot_mappings
|
@@ -89,31 +86,22 @@ class Step:
|
|
89
86
|
name: str | None = None,
|
90
87
|
input_slots: Iterable[InputSlot] = (),
|
91
88
|
output_slots: Iterable[OutputSlot] = (),
|
92
|
-
nodes: Iterable[Step] = (),
|
93
|
-
edges: Iterable[EdgeParams] = (),
|
94
89
|
input_slot_mappings: Iterable[InputSlotMapping] = (),
|
95
90
|
output_slot_mappings: Iterable[OutputSlotMapping] = (),
|
96
91
|
) -> None:
|
97
92
|
self.step_name = step_name
|
98
|
-
"""The name of the
|
93
|
+
"""The name of the pipeline step in the ``PipelineSchema``. It must also match
|
94
|
+
the key in the implementation metadata file to be used to run this ``Step``."""
|
99
95
|
self.name = name if name else step_name
|
100
|
-
"""The name of ``Step's`` node in its :class
|
101
|
-
This
|
102
|
-
|
96
|
+
"""The name of this ``Step's`` node in its :class:`easylink.graph_components.StepGraph`.
|
97
|
+
This can be different from the ``step_name`` due to the need for disambiguation
|
98
|
+
during the process of flattening the ``Stepgraph``, e.g. unrolling loops, etc.
|
99
|
+
For example, if step 1 is looped multiple times, each node would have a
|
100
|
+
``step_name`` of, perhaps, "step_1" but unique ``names`` ("step_1_loop_1", etc)."""
|
103
101
|
self.input_slots = {slot.name: slot for slot in input_slots}
|
104
102
|
"""A mapping of ``InputSlot`` names to their instances."""
|
105
103
|
self.output_slots = {slot.name: slot for slot in output_slots}
|
106
104
|
"""A mapping of ``OutputSlot`` names to their instances."""
|
107
|
-
self.nodes = nodes
|
108
|
-
"""All sub-nodes (i.e. sub-``Steps``) of this particular ``Step`` instance."""
|
109
|
-
for node in self.nodes:
|
110
|
-
node.set_parent_step(self)
|
111
|
-
self.edges = edges
|
112
|
-
"""The :class:`~easylink.graph_components.EdgeParams` of this ``Step``."""
|
113
|
-
self.step_graph = self._get_step_graph(nodes, edges)
|
114
|
-
"""The :class:`~easylink.graph_components.StepGraph` of this ``Step``, i.e.
|
115
|
-
the directed acyclic graph (DAG) of sub-nodes and their edges that make
|
116
|
-
up this ``Step`` instance."""
|
117
105
|
self.slot_mappings = {
|
118
106
|
"input": list(input_slot_mappings),
|
119
107
|
"output": list(output_slot_mappings),
|
@@ -164,7 +152,7 @@ class Step:
|
|
164
152
|
"""
|
165
153
|
step = self
|
166
154
|
implementation_name = (
|
167
|
-
self.configuration_state.
|
155
|
+
self.configuration_state.step_config[COMBINED_IMPLEMENTATION_KEY]
|
168
156
|
if self.configuration_state.is_combined
|
169
157
|
else self.configuration_state.implementation_config.name
|
170
158
|
)
|
@@ -203,7 +191,8 @@ class Step:
|
|
203
191
|
Parameters
|
204
192
|
----------
|
205
193
|
step_config
|
206
|
-
The configuration of this ``Step
|
194
|
+
The internal configuration of this ``Step``, i.e. it should not include
|
195
|
+
the ``Step's`` name.
|
207
196
|
combined_implementations
|
208
197
|
The configuration for any implementations to be combined.
|
209
198
|
input_data_config
|
@@ -216,9 +205,6 @@ class Step:
|
|
216
205
|
|
217
206
|
Notes
|
218
207
|
-----
|
219
|
-
A ``Step`` can be in either a "leaf" or a "non-leaf" configuration state
|
220
|
-
and the validation process is different for each.
|
221
|
-
|
222
208
|
If the ``Step`` does not validate (i.e. errors are found and the returned
|
223
209
|
dictionary is non-empty), the tool will exit and the pipeline will not run.
|
224
210
|
|
@@ -227,14 +213,42 @@ class Step:
|
|
227
213
|
all issues in one pass. In these cases, new errors may be found after the
|
228
214
|
initial ones are handled.
|
229
215
|
"""
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
216
|
+
errors = {}
|
217
|
+
metadata = load_yaml(paths.IMPLEMENTATION_METADATA)
|
218
|
+
error_key = f"step {self.name}"
|
219
|
+
if (
|
220
|
+
"implementation" not in step_config
|
221
|
+
and COMBINED_IMPLEMENTATION_KEY not in step_config
|
222
|
+
):
|
223
|
+
errors[error_key] = [
|
224
|
+
"The step configuration does not contain an 'implementation' key "
|
225
|
+
"or a reference to a combined implementation."
|
226
|
+
]
|
227
|
+
elif (
|
228
|
+
COMBINED_IMPLEMENTATION_KEY in step_config
|
229
|
+
and not step_config[COMBINED_IMPLEMENTATION_KEY] in combined_implementations
|
230
|
+
):
|
231
|
+
errors[error_key] = [
|
232
|
+
"The step refers to a combined implementation but "
|
233
|
+
f"{step_config[COMBINED_IMPLEMENTATION_KEY]} is not a valid combined "
|
234
|
+
"implementation."
|
235
|
+
]
|
236
236
|
else:
|
237
|
-
|
237
|
+
implementation_config = (
|
238
|
+
step_config["implementation"]
|
239
|
+
if "implementation" in step_config
|
240
|
+
else combined_implementations[step_config[COMBINED_IMPLEMENTATION_KEY]]
|
241
|
+
)
|
242
|
+
if not "name" in implementation_config:
|
243
|
+
errors[error_key] = [
|
244
|
+
"The implementation configuration does not contain a 'name' key."
|
245
|
+
]
|
246
|
+
elif not implementation_config["name"] in metadata:
|
247
|
+
errors[error_key] = [
|
248
|
+
f"Implementation '{implementation_config['name']}' is not supported. "
|
249
|
+
f"Supported implementations are: {list(metadata.keys())}."
|
250
|
+
]
|
251
|
+
return errors
|
238
252
|
|
239
253
|
def get_implementation_graph(self) -> ImplementationGraph:
|
240
254
|
"""Gets this ``Step's`` :class:`~easylink.graph_components.ImplementationGraph`.
|
@@ -276,42 +290,25 @@ class Step:
|
|
276
290
|
|
277
291
|
def set_configuration_state(
|
278
292
|
self,
|
279
|
-
|
293
|
+
step_config: LayeredConfigTree,
|
280
294
|
combined_implementations: LayeredConfigTree,
|
281
295
|
input_data_config: LayeredConfigTree,
|
282
296
|
) -> None:
|
283
|
-
"""Sets the configuration state
|
284
|
-
|
285
|
-
The so-called 'configuration state' for a given ``Step`` is backed up by
|
286
|
-
a :class:`ConfigurationState` class and is assigned to its :attr:`_configuration_state`
|
287
|
-
attribute. There are two possible ``ConfigurationStates``:
|
288
|
-
:class:`LeafConfigurationState` and :class:`NonLeafConfigurationState`.
|
289
|
-
|
290
|
-
This method sets the configuration state of this ``Step`` based on whether
|
291
|
-
or not a :attr:`config_key` is set *and exists is the ``Step's`` configuration*
|
292
|
-
(i.e. its portion of the user-suppled pipeline specification
|
293
|
-
file); any required deviation from this behavior requires special
|
294
|
-
handling.
|
297
|
+
"""Sets the configuration state to 'leaf'.
|
295
298
|
|
296
299
|
Parameters
|
297
300
|
----------
|
298
|
-
|
299
|
-
The configuration of
|
301
|
+
step_config
|
302
|
+
The internal configuration of this ``Step``, i.e. it should not include
|
303
|
+
the ``Step's`` name.
|
300
304
|
combined_implementations
|
301
305
|
The configuration for any implementations to be combined.
|
302
306
|
input_data_config
|
303
307
|
The input data configuration for the entire pipeline.
|
304
308
|
"""
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
self._configuration_state = NonLeafConfigurationState(
|
309
|
-
self, sub_config, combined_implementations, input_data_config
|
310
|
-
)
|
311
|
-
else:
|
312
|
-
self._configuration_state = LeafConfigurationState(
|
313
|
-
self, sub_config, combined_implementations, input_data_config
|
314
|
-
)
|
309
|
+
self._configuration_state = LeafConfigurationState(
|
310
|
+
self, step_config, combined_implementations, input_data_config
|
311
|
+
)
|
315
312
|
|
316
313
|
def get_implementation_slot_mappings(self) -> dict[str, list[SlotMapping]]:
|
317
314
|
"""Gets the input and output :class:`SlotMappings<easylink.graph_components.SlotMapping>`."""
|
@@ -326,113 +323,6 @@ class Step:
|
|
326
323
|
],
|
327
324
|
}
|
328
325
|
|
329
|
-
##################
|
330
|
-
# Helper methods #
|
331
|
-
##################
|
332
|
-
|
333
|
-
def _get_step_graph(self, nodes: list[Step], edges: list[EdgeParams]) -> StepGraph:
|
334
|
-
"""Create a StepGraph from the nodes and edges the step was initialized with."""
|
335
|
-
step_graph = StepGraph()
|
336
|
-
for step in nodes:
|
337
|
-
step_graph.add_node_from_step(step)
|
338
|
-
for edge in edges:
|
339
|
-
step_graph.add_edge_from_params(edge)
|
340
|
-
return step_graph
|
341
|
-
|
342
|
-
def _validate_leaf(
|
343
|
-
self,
|
344
|
-
step_config: LayeredConfigTree,
|
345
|
-
combined_implementations: LayeredConfigTree,
|
346
|
-
) -> dict[str, list[str]]:
|
347
|
-
"""Validates a leaf ``Step``."""
|
348
|
-
errors = {}
|
349
|
-
metadata = load_yaml(paths.IMPLEMENTATION_METADATA)
|
350
|
-
error_key = f"step {self.name}"
|
351
|
-
if (
|
352
|
-
"implementation" not in step_config
|
353
|
-
and COMBINED_IMPLEMENTATION_KEY not in step_config
|
354
|
-
):
|
355
|
-
errors[error_key] = [
|
356
|
-
"The step configuration does not contain an 'implementation' key or a "
|
357
|
-
"reference to a combined implementation."
|
358
|
-
]
|
359
|
-
elif (
|
360
|
-
COMBINED_IMPLEMENTATION_KEY in step_config
|
361
|
-
and not step_config[COMBINED_IMPLEMENTATION_KEY] in combined_implementations
|
362
|
-
):
|
363
|
-
errors[error_key] = [
|
364
|
-
f"The step refers to a combined implementation but {step_config[COMBINED_IMPLEMENTATION_KEY]} is not a "
|
365
|
-
f"valid combined implementation."
|
366
|
-
]
|
367
|
-
else:
|
368
|
-
implementation_config = (
|
369
|
-
step_config["implementation"]
|
370
|
-
if "implementation" in step_config
|
371
|
-
else combined_implementations[step_config[COMBINED_IMPLEMENTATION_KEY]]
|
372
|
-
)
|
373
|
-
if not "name" in implementation_config:
|
374
|
-
errors[error_key] = [
|
375
|
-
"The implementation configuration does not contain a 'name' key."
|
376
|
-
]
|
377
|
-
elif not implementation_config["name"] in metadata:
|
378
|
-
errors[error_key] = [
|
379
|
-
f"Implementation '{implementation_config['name']}' is not supported. "
|
380
|
-
f"Supported implementations are: {list(metadata.keys())}."
|
381
|
-
]
|
382
|
-
return errors
|
383
|
-
|
384
|
-
def _validate_nonleaf(
|
385
|
-
self,
|
386
|
-
step_config: LayeredConfigTree,
|
387
|
-
combined_implementations: LayeredConfigTree,
|
388
|
-
input_data_config: LayeredConfigTree,
|
389
|
-
) -> dict[str, list[str]]:
|
390
|
-
"""Validates a non-leaf ``Step``."""
|
391
|
-
errors = {}
|
392
|
-
nodes = self.step_graph.nodes
|
393
|
-
for node in nodes:
|
394
|
-
step = nodes[node]["step"]
|
395
|
-
if isinstance(step, IOStep):
|
396
|
-
continue
|
397
|
-
if step.name not in step_config:
|
398
|
-
step_errors = {f"step {step.name}": [f"The step is not configured."]}
|
399
|
-
else:
|
400
|
-
step_errors = step.validate_step(
|
401
|
-
step_config[step.name], combined_implementations, input_data_config
|
402
|
-
)
|
403
|
-
if step_errors:
|
404
|
-
errors.update(step_errors)
|
405
|
-
extra_steps = set(step_config.keys()) - set(nodes)
|
406
|
-
for extra_step in extra_steps:
|
407
|
-
errors[f"step {extra_step}"] = [f"{extra_step} is not a valid step."]
|
408
|
-
return errors
|
409
|
-
|
410
|
-
def _get_config(self, step_config: LayeredConfigTree) -> LayeredConfigTree:
|
411
|
-
"""Convenience method to get a ``Step's`` configuration.
|
412
|
-
|
413
|
-
Some types of ``Steps`` have a unique :attr:`config_key` (defined by the
|
414
|
-
user via the pipeline specification file) that is used to specify the behavior
|
415
|
-
of the ``Step`` (e.g. looping, parallel, etc). This method simply returns
|
416
|
-
the ``Step's`` sub-configuration keyed to that ``config_key`` (if it exists,
|
417
|
-
i.e. is not a basic ``Step``).
|
418
|
-
|
419
|
-
Parameters
|
420
|
-
----------
|
421
|
-
step_config
|
422
|
-
The high-level configuration of this ``Step``.
|
423
|
-
|
424
|
-
Returns
|
425
|
-
-------
|
426
|
-
The sub-configuration of this ``Step`` keyed on the ``config_key``
|
427
|
-
(if it exists).
|
428
|
-
|
429
|
-
"""
|
430
|
-
return (
|
431
|
-
step_config
|
432
|
-
if not self.config_key in step_config
|
433
|
-
else step_config[self.config_key]
|
434
|
-
)
|
435
|
-
|
436
326
|
|
437
327
|
class IOStep(Step):
|
438
328
|
"""A special case type of :class:`Step` used to represent incoming and outgoing data.
|
@@ -485,27 +375,24 @@ class IOStep(Step):
|
|
485
375
|
|
486
376
|
def set_configuration_state(
|
487
377
|
self,
|
488
|
-
|
378
|
+
step_config: LayeredConfigTree,
|
489
379
|
combined_implementations: LayeredConfigTree,
|
490
380
|
input_data_config: LayeredConfigTree,
|
491
381
|
) -> None:
|
492
|
-
"""Sets the configuration state to leaf.
|
493
|
-
|
494
|
-
An ``IOStep`` is by definition a leaf ``Step`` and so we assign that here
|
495
|
-
instead of relying on the default behavior of the parent class.
|
382
|
+
"""Sets the configuration state to 'leaf'.
|
496
383
|
|
497
384
|
Parameters
|
498
385
|
----------
|
499
|
-
|
500
|
-
The configuration of
|
501
|
-
|
386
|
+
step_config
|
387
|
+
The internal configuration of this ``Step``, i.e. it should not include
|
388
|
+
the ``Step's`` name.
|
502
389
|
combined_implementations
|
503
390
|
The configuration for any implementations to be combined.
|
504
391
|
input_data_config
|
505
392
|
The input data configuration for the entire pipeline.
|
506
393
|
"""
|
507
394
|
self._configuration_state = LeafConfigurationState(
|
508
|
-
self,
|
395
|
+
self, step_config, combined_implementations, input_data_config
|
509
396
|
)
|
510
397
|
|
511
398
|
def get_implementation_graph(self) -> ImplementationGraph:
|
@@ -548,29 +435,29 @@ class InputStep(IOStep):
|
|
548
435
|
|
549
436
|
def set_configuration_state(
|
550
437
|
self,
|
551
|
-
|
438
|
+
step_config: LayeredConfigTree,
|
552
439
|
combined_implementations: LayeredConfigTree,
|
553
440
|
input_data_config: LayeredConfigTree,
|
554
441
|
) -> None:
|
555
442
|
"""Sets the configuration state and updates the ``OutputSlots``.
|
556
443
|
|
557
|
-
In addition to setting ``InputStep`` to a leaf configuration state, this
|
444
|
+
In addition to setting ``InputStep`` to a 'leaf' configuration state, this
|
558
445
|
method also updates the ``OutputSlots`` to include all of the dataset keys
|
559
446
|
in the input data specification file. This allows for future use of
|
560
|
-
specific datasets instead of only
|
447
|
+
*specific* datasets instead of only *all* of them.
|
561
448
|
|
562
449
|
Parameters
|
563
450
|
----------
|
564
|
-
|
565
|
-
The configuration of
|
566
|
-
|
451
|
+
step_config
|
452
|
+
The internal configuration of this ``Step``, i.e. it should not include
|
453
|
+
the ``Step's`` name.
|
567
454
|
combined_implementations
|
568
455
|
The configuration for any implementations to be combined.
|
569
456
|
input_data_config
|
570
457
|
The input data configuration for the entire pipeline.
|
571
458
|
"""
|
572
459
|
super().set_configuration_state(
|
573
|
-
|
460
|
+
step_config, combined_implementations, input_data_config
|
574
461
|
)
|
575
462
|
for input_data_key in input_data_config:
|
576
463
|
self.output_slots[input_data_key] = OutputSlot(name=input_data_key)
|
@@ -601,19 +488,185 @@ class HierarchicalStep(Step):
|
|
601
488
|
|
602
489
|
See :class:`Step` for inherited attributes.
|
603
490
|
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
491
|
+
Parameters
|
492
|
+
----------
|
493
|
+
nodes
|
494
|
+
All sub-nodes (i.e. sub-``Steps``) that make up this ``HierarchicalStep``.
|
495
|
+
edges
|
496
|
+
The :class:`~easylink.graph_components.EdgeParams` of the sub-nodes.
|
497
|
+
step_graph
|
498
|
+
The :class:`~easylink.graph_components.StepGraph` i.e. the directed acyclic
|
499
|
+
graph (DAG) of sub-nodes and their edges that make up this ``HierarchicalStep``.
|
500
|
+
user_configurable
|
501
|
+
Whether or not the ``HierarchicalStep`` is user-configurable. It is a convenience
|
502
|
+
attribute to allow for back-end ``HierarchicalStep`` creation that are not
|
503
|
+
user-facing (i.e. they do not need to provide a 'substeps' configuration key).
|
609
504
|
|
610
505
|
"""
|
611
506
|
|
507
|
+
def __init__(
|
508
|
+
self,
|
509
|
+
step_name,
|
510
|
+
name=None,
|
511
|
+
input_slots=(),
|
512
|
+
output_slots=(),
|
513
|
+
nodes=(),
|
514
|
+
edges=(),
|
515
|
+
input_slot_mappings=(),
|
516
|
+
output_slot_mappings=(),
|
517
|
+
user_configurable=True,
|
518
|
+
):
|
519
|
+
super().__init__(
|
520
|
+
step_name,
|
521
|
+
name,
|
522
|
+
input_slots,
|
523
|
+
output_slots,
|
524
|
+
input_slot_mappings,
|
525
|
+
output_slot_mappings,
|
526
|
+
)
|
527
|
+
self.nodes = nodes
|
528
|
+
"""All sub-nodes (i.e. sub-``Steps``) that make up this ``HierarchicalStep``."""
|
529
|
+
for node in self.nodes:
|
530
|
+
node.set_parent_step(self)
|
531
|
+
self.edges = edges
|
532
|
+
"""The :class:`~easylink.graph_components.EdgeParams` of the sub-nodes."""
|
533
|
+
self.step_graph = self._get_step_graph(nodes, edges)
|
534
|
+
"""The :class:`~easylink.graph_components.StepGraph` i.e. the directed acyclic
|
535
|
+
graph (DAG) of sub-nodes and their edges that make up this ``HierarchicalStep``."""
|
536
|
+
self.user_configurable = user_configurable
|
537
|
+
"""Whether or not the ``HierarchicalStep`` is user-configurable. It is a convenience
|
538
|
+
attribute to allow for back-end ``HierarchicalStep`` creation that are not
|
539
|
+
user-facing (i.e. they do not need to provide a 'substeps' configuration key)."""
|
540
|
+
|
612
541
|
@property
|
613
542
|
def config_key(self):
|
614
543
|
"""The pipeline specification key required for a ``HierarchicalStep``."""
|
615
544
|
return "substeps"
|
616
545
|
|
546
|
+
def validate_step(
|
547
|
+
self,
|
548
|
+
step_config: LayeredConfigTree,
|
549
|
+
combined_implementations: LayeredConfigTree,
|
550
|
+
input_data_config: LayeredConfigTree,
|
551
|
+
) -> dict[str, list[str]]:
|
552
|
+
"""Validates the ``HierarchicalStep``.
|
553
|
+
|
554
|
+
Parameters
|
555
|
+
----------
|
556
|
+
step_config
|
557
|
+
The internal configuration of this ``Step``, i.e. it should not include
|
558
|
+
the ``Step's`` name.
|
559
|
+
combined_implementations
|
560
|
+
The configuration for any implementations to be combined.
|
561
|
+
input_data_config
|
562
|
+
The input data configuration for the entire pipeline.
|
563
|
+
|
564
|
+
Returns
|
565
|
+
-------
|
566
|
+
A dictionary of errors, where the keys are the ``HierarchicalStep``
|
567
|
+
name and the values are lists of error messages associated with the
|
568
|
+
given ``HierarchicalStep``.
|
569
|
+
|
570
|
+
Notes
|
571
|
+
-----
|
572
|
+
A ``HierarchicalStep`` can be in either a "leaf" or a "non-leaf" configuration
|
573
|
+
state and the validation process is different for each.
|
574
|
+
|
575
|
+
If the ``HierarchicalStep`` does not validate (i.e. errors are found and
|
576
|
+
the returned dictionary is non-empty), the tool will exit and the pipeline
|
577
|
+
will not run.
|
578
|
+
|
579
|
+
We attempt to batch error messages as much as possible, but there may be
|
580
|
+
times where the configuration is so ill-formed that we are unable to handle
|
581
|
+
all issues in one pass. In these cases, new errors may be found after the
|
582
|
+
initial ones are handled.
|
583
|
+
"""
|
584
|
+
if self.user_configurable:
|
585
|
+
if self.config_key in step_config:
|
586
|
+
step_config = step_config[self.config_key]
|
587
|
+
else:
|
588
|
+
# This is a leaf step
|
589
|
+
return super().validate_step(
|
590
|
+
step_config, combined_implementations, input_data_config
|
591
|
+
)
|
592
|
+
return self._validate_step_graph(
|
593
|
+
step_config, combined_implementations, input_data_config
|
594
|
+
)
|
595
|
+
|
596
|
+
def set_configuration_state(
|
597
|
+
self,
|
598
|
+
step_config: LayeredConfigTree,
|
599
|
+
combined_implementations: LayeredConfigTree,
|
600
|
+
input_data_config: LayeredConfigTree,
|
601
|
+
) -> None:
|
602
|
+
"""Sets the configuration state.
|
603
|
+
|
604
|
+
The configuration state of a ``HierarchicalStep`` depends on (1) whether
|
605
|
+
or not it is :attr:`user_configurable` and (2) whether or not the
|
606
|
+
:attr:`config_key` exists in the pipeline specification file.
|
607
|
+
|
608
|
+
Parameters
|
609
|
+
----------
|
610
|
+
step_config
|
611
|
+
The internal configuration of this ``Step``, i.e. it should not include
|
612
|
+
the ``Step's`` name.
|
613
|
+
combined_implementations
|
614
|
+
The configuration for any implementations to be combined.
|
615
|
+
input_data_config
|
616
|
+
The input data configuration for the entire pipeline.
|
617
|
+
"""
|
618
|
+
if self.user_configurable:
|
619
|
+
if self.config_key in step_config:
|
620
|
+
step_config = step_config[self.config_key]
|
621
|
+
configuration_state_type = NonLeafConfigurationState
|
622
|
+
else:
|
623
|
+
configuration_state_type = LeafConfigurationState
|
624
|
+
else:
|
625
|
+
# Substeps must be used, so we require non-leaf here
|
626
|
+
configuration_state_type = NonLeafConfigurationState
|
627
|
+
self._configuration_state = configuration_state_type(
|
628
|
+
self, step_config, combined_implementations, input_data_config
|
629
|
+
)
|
630
|
+
|
631
|
+
##################
|
632
|
+
# Helper methods #
|
633
|
+
##################
|
634
|
+
|
635
|
+
def _get_step_graph(self, nodes: list[Step], edges: list[EdgeParams]) -> StepGraph:
|
636
|
+
"""Creates a :class:`~easylink.graph_components.StepGraph` from the nodes and edges the step was initialized with."""
|
637
|
+
step_graph = StepGraph()
|
638
|
+
for step in nodes:
|
639
|
+
step_graph.add_node_from_step(step)
|
640
|
+
for edge in edges:
|
641
|
+
step_graph.add_edge_from_params(edge)
|
642
|
+
return step_graph
|
643
|
+
|
644
|
+
def _validate_step_graph(
|
645
|
+
self,
|
646
|
+
step_config: LayeredConfigTree,
|
647
|
+
combined_implementations: LayeredConfigTree,
|
648
|
+
input_data_config: LayeredConfigTree,
|
649
|
+
) -> dict[str, list[str]]:
|
650
|
+
"""Validates the nodes of a :class:`~easylink.graph_components.StepGraph`."""
|
651
|
+
errors = {}
|
652
|
+
for node in self.step_graph.nodes:
|
653
|
+
step = self.step_graph.nodes[node]["step"]
|
654
|
+
if isinstance(step, IOStep):
|
655
|
+
continue
|
656
|
+
else:
|
657
|
+
if step.name not in step_config:
|
658
|
+
step_errors = {f"step {step.name}": ["The step is not configured."]}
|
659
|
+
else:
|
660
|
+
step_errors = step.validate_step(
|
661
|
+
step_config[step.name], combined_implementations, input_data_config
|
662
|
+
)
|
663
|
+
if step_errors:
|
664
|
+
errors.update(step_errors)
|
665
|
+
extra_steps = set(step_config.keys()) - set(self.step_graph.nodes)
|
666
|
+
for extra_step in extra_steps:
|
667
|
+
errors[f"step {extra_step}"] = [f"{extra_step} is not a valid step."]
|
668
|
+
return errors
|
669
|
+
|
617
670
|
|
618
671
|
class TemplatedStep(Step, ABC):
|
619
672
|
"""A type of :class:`Step` that may contain multiplicity.
|
@@ -641,8 +694,12 @@ class TemplatedStep(Step, ABC):
|
|
641
694
|
template_step.input_slots.values(),
|
642
695
|
template_step.output_slots.values(),
|
643
696
|
)
|
697
|
+
self.step_graph = None
|
698
|
+
"""The :class:`~easylink.graph_components.StepGraph` i.e. the directed acyclic
|
699
|
+
graph (DAG) of sub-nodes and their edges that make up this ``TemplatedStep``."""
|
644
700
|
self.template_step = template_step
|
645
701
|
"""The ``Step`` to be templated."""
|
702
|
+
|
646
703
|
self.template_step.set_parent_step(self)
|
647
704
|
|
648
705
|
@property
|
@@ -716,7 +773,8 @@ class TemplatedStep(Step, ABC):
|
|
716
773
|
Parameters
|
717
774
|
----------
|
718
775
|
step_config
|
719
|
-
The configuration of this ``
|
776
|
+
The internal configuration of this ``Step``, i.e. it should not include
|
777
|
+
the ``Step's`` name.
|
720
778
|
combined_implementations
|
721
779
|
The configuration for any implementations to be combined.
|
722
780
|
input_data_config
|
@@ -730,7 +788,7 @@ class TemplatedStep(Step, ABC):
|
|
730
788
|
|
731
789
|
Notes
|
732
790
|
-----
|
733
|
-
If the ``
|
791
|
+
If the ``TemplatedStep`` does not validate (i.e. errors are found and the returned
|
734
792
|
dictionary is non-empty), the tool will exit and the pipeline will not run.
|
735
793
|
|
736
794
|
We attempt to batch error messages as much as possible, but there may be
|
@@ -739,6 +797,7 @@ class TemplatedStep(Step, ABC):
|
|
739
797
|
initial ones are handled.
|
740
798
|
"""
|
741
799
|
if not self.config_key in step_config:
|
800
|
+
# This is a leaf step
|
742
801
|
return self.template_step.validate_step(
|
743
802
|
step_config, combined_implementations, input_data_config
|
744
803
|
)
|
@@ -770,51 +829,32 @@ class TemplatedStep(Step, ABC):
|
|
770
829
|
]
|
771
830
|
parallel_errors.update(
|
772
831
|
self.template_step.validate_step(
|
773
|
-
parallel_config,
|
832
|
+
LayeredConfigTree(parallel_config),
|
833
|
+
combined_implementations,
|
834
|
+
input_data_config,
|
774
835
|
)
|
775
836
|
)
|
776
837
|
if parallel_errors:
|
777
838
|
errors[f"step {self.name}"][f"{self.node_prefix}_{i+1}"] = parallel_errors
|
778
839
|
return errors
|
779
840
|
|
780
|
-
def _get_config(self, step_config: LayeredConfigTree) -> LayeredConfigTree:
|
781
|
-
"""Convenience method to get the ``TemplatedStep's`` configuration.
|
782
|
-
|
783
|
-
``TemplatedSteps`` may include multiplicity. In such cases, their configurations
|
784
|
-
must be modified to include the expanded ``Steps``.
|
785
|
-
|
786
|
-
Parameters
|
787
|
-
----------
|
788
|
-
step_config
|
789
|
-
The high-level configuration of this ``TemplatedStep``.
|
790
|
-
|
791
|
-
Returns
|
792
|
-
-------
|
793
|
-
The expanded sub-configuration of this ``TemplatedStep`` based on the
|
794
|
-
:attr:`Step.config_key` and expanded to include all looped or parallelized
|
795
|
-
sub-``Steps``).
|
796
|
-
"""
|
797
|
-
if self.config_key in step_config:
|
798
|
-
expanded_step_config = LayeredConfigTree()
|
799
|
-
for i, sub_config in enumerate(step_config[self.config_key]):
|
800
|
-
expanded_step_config.update(
|
801
|
-
{f"{self.name}_{self.node_prefix}_{i+1}": sub_config}
|
802
|
-
)
|
803
|
-
return expanded_step_config
|
804
|
-
return step_config
|
805
|
-
|
806
841
|
def set_configuration_state(
|
807
842
|
self,
|
808
|
-
|
843
|
+
step_config: LayeredConfigTree,
|
809
844
|
combined_implementations: LayeredConfigTree,
|
810
845
|
input_data_config: LayeredConfigTree,
|
811
846
|
):
|
812
|
-
"""Sets the configuration state
|
847
|
+
"""Sets the configuration state to 'non-leaf'.
|
848
|
+
|
849
|
+
In addition to setting the configuration state, this also updates the
|
850
|
+
:class:`~easylink.graph_components.StepGraph` and
|
851
|
+
:class:`SlotMappings<easylink.graph_components.SlotMapping>`.
|
813
852
|
|
814
853
|
Parameters
|
815
854
|
----------
|
816
|
-
|
817
|
-
The configuration of
|
855
|
+
step_config
|
856
|
+
The internal configuration of this ``Step``, i.e. it should not include
|
857
|
+
the ``Step's`` name.
|
818
858
|
combined_implementations
|
819
859
|
The configuration for any implementations to be combined.
|
820
860
|
input_data_config
|
@@ -828,7 +868,6 @@ class TemplatedStep(Step, ABC):
|
|
828
868
|
:class:`~easylink.implementation.Implementation`, i.e. the one with a
|
829
869
|
:class:`LeafConfigurationState`.
|
830
870
|
"""
|
831
|
-
step_config = parent_config[self.name]
|
832
871
|
if self.config_key not in step_config:
|
833
872
|
# Special handle the step_graph update
|
834
873
|
self.step_graph = StepGraph()
|
@@ -857,6 +896,36 @@ class TemplatedStep(Step, ABC):
|
|
857
896
|
self, expanded_config, combined_implementations, input_data_config
|
858
897
|
)
|
859
898
|
|
899
|
+
##################
|
900
|
+
# Helper Methods #
|
901
|
+
##################
|
902
|
+
|
903
|
+
def _get_config(self, step_config: LayeredConfigTree) -> LayeredConfigTree:
|
904
|
+
"""Convenience method to get the ``TemplatedStep's`` configuration.
|
905
|
+
|
906
|
+
``TemplatedSteps`` may include multiplicity. In such cases, their configurations
|
907
|
+
must be modified to include the expanded ``Steps``.
|
908
|
+
|
909
|
+
Parameters
|
910
|
+
----------
|
911
|
+
step_config
|
912
|
+
The high-level configuration of this ``TemplatedStep``.
|
913
|
+
|
914
|
+
Returns
|
915
|
+
-------
|
916
|
+
The expanded sub-configuration of this ``TemplatedStep`` based on the
|
917
|
+
:attr:`Step.config_key` and expanded to include all looped or parallelized
|
918
|
+
sub-``Steps``).
|
919
|
+
"""
|
920
|
+
if self.config_key in step_config:
|
921
|
+
expanded_step_config = LayeredConfigTree()
|
922
|
+
for i, sub_config in enumerate(step_config[self.config_key]):
|
923
|
+
expanded_step_config.update(
|
924
|
+
{f"{self.name}_{self.node_prefix}_{i+1}": sub_config}
|
925
|
+
)
|
926
|
+
return expanded_step_config
|
927
|
+
return step_config
|
928
|
+
|
860
929
|
def _duplicate_template_step(self) -> Step:
|
861
930
|
"""Makes a duplicate of the template ``Step``.
|
862
931
|
|
@@ -1071,6 +1140,8 @@ class EmbarrassinglyParallelStep(Step):
|
|
1071
1140
|
in that it is not configured by the user to be run in parallel - it completely
|
1072
1141
|
happens on the back end for performance reasons. As such, note that it inherits
|
1073
1142
|
from :class:`Step` instead of :class:`TemplatedStep`.
|
1143
|
+
|
1144
|
+
See :class:`Step` for inherited attributes.
|
1074
1145
|
"""
|
1075
1146
|
|
1076
1147
|
def __init__(
|
@@ -1124,10 +1195,7 @@ class EmbarrassinglyParallelStep(Step):
|
|
1124
1195
|
|
1125
1196
|
|
1126
1197
|
class ChoiceStep(Step):
|
1127
|
-
"""A type of :class:`Step` that allows for choosing
|
1128
|
-
|
1129
|
-
A ``ChoiceStep`` allows a user to select a single path from a set of possible
|
1130
|
-
paths.
|
1198
|
+
"""A type of :class:`Step` that allows for choosing from a set of options.
|
1131
1199
|
|
1132
1200
|
See :class:`Step` for inherited attributes.
|
1133
1201
|
|
@@ -1141,7 +1209,7 @@ class ChoiceStep(Step):
|
|
1141
1209
|
All required :class:`OutputSlots<easylink.graph_components.OutputSlot>`.
|
1142
1210
|
choices
|
1143
1211
|
A dictionary of choices, where the keys are the names/types of choices and
|
1144
|
-
the values are dictionaries containing that type's
|
1212
|
+
the values are dictionaries containing that type's ``Step`` and related
|
1145
1213
|
:class:`SlotMappings<easylink.graph_components.SlotMapping>`.
|
1146
1214
|
|
1147
1215
|
Notes
|
@@ -1150,6 +1218,13 @@ class ChoiceStep(Step):
|
|
1150
1218
|
:attr:`Step.config_key` in the pipeline specification file. Instead, the pipeline
|
1151
1219
|
configuration must contain a 'type' key that specifies which option to choose.
|
1152
1220
|
|
1221
|
+
The :attr:`choices` dictionary must contain the choice type names as the outer
|
1222
|
+
keys. The values of each of these types is then another dictionary containing
|
1223
|
+
'step', 'input_slot_mappings', and 'output_slot_mappings' keys with their
|
1224
|
+
corresponding values.
|
1225
|
+
|
1226
|
+
Each choice type must specify a *single* ``Step`` and its associated ``SlotMappings``.
|
1227
|
+
Any choice paths that require multiple sub-steps should specify a :class:`HierarchicalStep`.
|
1153
1228
|
"""
|
1154
1229
|
|
1155
1230
|
def __init__(
|
@@ -1157,9 +1232,7 @@ class ChoiceStep(Step):
|
|
1157
1232
|
step_name: str,
|
1158
1233
|
input_slots: Iterable[InputSlot],
|
1159
1234
|
output_slots: Iterable[OutputSlot],
|
1160
|
-
choices: dict[
|
1161
|
-
str, dict[str, list[Step | EdgeParams | InputSlotMapping | OutputSlotMapping]]
|
1162
|
-
],
|
1235
|
+
choices: dict[str, dict[str, Step | SlotMapping]],
|
1163
1236
|
) -> None:
|
1164
1237
|
super().__init__(
|
1165
1238
|
step_name,
|
@@ -1182,7 +1255,8 @@ class ChoiceStep(Step):
|
|
1182
1255
|
Parameters
|
1183
1256
|
----------
|
1184
1257
|
step_config
|
1185
|
-
The configuration of this ``
|
1258
|
+
The internal configuration of this ``Step``, i.e. it should not include
|
1259
|
+
the ``Step's`` name.
|
1186
1260
|
combined_implementations
|
1187
1261
|
The configuration for any implementations to be combined.
|
1188
1262
|
input_data_config
|
@@ -1195,8 +1269,6 @@ class ChoiceStep(Step):
|
|
1195
1269
|
|
1196
1270
|
Notes
|
1197
1271
|
-----
|
1198
|
-
A ``ChoiceStep`` by definition must be set with a :class:`NonLeafConfigurationState`.
|
1199
|
-
|
1200
1272
|
If the ``Step`` does not validate (i.e. errors are found and the returned
|
1201
1273
|
dictionary is non-empty), the tool will exit and the pipeline will not run.
|
1202
1274
|
|
@@ -1205,16 +1277,9 @@ class ChoiceStep(Step):
|
|
1205
1277
|
all issues in one pass. In these cases, new errors may be found after the
|
1206
1278
|
initial ones are handled.
|
1207
1279
|
|
1208
|
-
We update the :class:`easylink.graph_components.StepGraph` and ``SlotMappings``
|
1209
|
-
in :meth:`validate_step` (as opposed to in :meth:`set_configuration_state`
|
1210
|
-
as is done in :class:`TemplatedStep`) because :meth:`validate_step` is called
|
1211
|
-
prior to :meth:`set_configuration_state`, but the validations itself actually
|
1212
|
-
requires the updated ``StepGraph`` and ``SlotMappings``.
|
1213
|
-
|
1214
1280
|
We do not attempt to validate the subgraph here if the 'type' key is unable
|
1215
1281
|
to be validated.
|
1216
1282
|
"""
|
1217
|
-
|
1218
1283
|
chosen_type = step_config.get("type")
|
1219
1284
|
# Handle problems with the 'type' key
|
1220
1285
|
if not chosen_type:
|
@@ -1222,104 +1287,64 @@ class ChoiceStep(Step):
|
|
1222
1287
|
if chosen_type not in self.choices:
|
1223
1288
|
return {
|
1224
1289
|
f"step {self.name}": [
|
1225
|
-
f"'{step_config
|
1290
|
+
f"'{step_config.type}' is not a supported 'type'. Valid choices are: {list(self.choices)}."
|
1226
1291
|
]
|
1227
1292
|
}
|
1228
|
-
|
1229
|
-
|
1293
|
+
|
1294
|
+
chosen_step = self.choices[chosen_type]["step"]
|
1230
1295
|
chosen_step_config = LayeredConfigTree(
|
1231
1296
|
{key: value for key, value in step_config.items() if key != "type"}
|
1232
1297
|
)
|
1233
|
-
|
1234
|
-
if set(allowable_steps) != set(chosen_step_config):
|
1298
|
+
if chosen_step.name not in chosen_step_config:
|
1235
1299
|
return {
|
1236
1300
|
f"step {self.name}": [
|
1237
|
-
f"
|
1301
|
+
f"'{chosen_step.name}' is not configured. Confirm you have specified "
|
1302
|
+
f"the correct steps for the '{chosen_type}' type."
|
1238
1303
|
]
|
1239
1304
|
}
|
1240
|
-
|
1241
|
-
# HACK: Update the step graph and mappings here because we need them for validation
|
1242
|
-
self.step_graph = self._update_step_graph(subgraph)
|
1243
|
-
self.slot_mappings = self._update_slot_mappings(subgraph)
|
1244
1305
|
# NOTE: A ChoiceStep is by definition non-leaf step
|
1245
|
-
return
|
1246
|
-
chosen_step_config, combined_implementations, input_data_config
|
1306
|
+
return chosen_step.validate_step(
|
1307
|
+
chosen_step_config[chosen_step.name], combined_implementations, input_data_config
|
1247
1308
|
)
|
1248
1309
|
|
1249
1310
|
def set_configuration_state(
|
1250
1311
|
self,
|
1251
|
-
|
1312
|
+
step_config: LayeredConfigTree,
|
1252
1313
|
combined_implementations: LayeredConfigTree,
|
1253
1314
|
input_data_config: LayeredConfigTree,
|
1254
1315
|
):
|
1255
|
-
"""Sets the configuration state
|
1316
|
+
"""Sets the configuration state to 'non-leaf'.
|
1317
|
+
|
1318
|
+
In addition to setting the configuration state, this also updates the
|
1319
|
+
:class:`~easylink.graph_components.StepGraph` and
|
1320
|
+
:class:`SlotMappings<easylink.graph_components.SlotMapping>`.
|
1256
1321
|
|
1257
1322
|
Parameters
|
1258
1323
|
----------
|
1259
|
-
|
1260
|
-
The configuration of
|
1324
|
+
step_config
|
1325
|
+
The internal configuration of this ``Step``, i.e. it should not include
|
1326
|
+
the ``Step's`` name.
|
1261
1327
|
combined_implementations
|
1262
1328
|
The configuration for any implementations to be combined.
|
1263
1329
|
input_data_config
|
1264
1330
|
The input data configuration for the entire pipeline.
|
1265
|
-
|
1266
|
-
Notes
|
1267
|
-
-----
|
1268
|
-
We update the :class:`easylink.graph_components.StepGraph` and ``SlotMappings``
|
1269
|
-
in :meth:`validate_step` (as opposed to in :meth:`set_configuration_state`
|
1270
|
-
as is done in :class:`TemplatedStep`) because :meth:`validate_step` is called
|
1271
|
-
prior to :meth:`set_configuration_state`, but the validations itself actually
|
1272
|
-
requires the updated ``StepGraph`` and ``SlotMappings``.
|
1273
1331
|
"""
|
1332
|
+
choice = self.choices[step_config["type"]]
|
1333
|
+
self.step_graph = StepGraph()
|
1334
|
+
self.step_graph.add_node_from_step(choice["step"])
|
1335
|
+
self.slot_mappings = {
|
1336
|
+
"input": choice["input_slot_mappings"],
|
1337
|
+
"output": choice["output_slot_mappings"],
|
1338
|
+
}
|
1274
1339
|
|
1275
|
-
|
1276
|
-
{key: value for key, value in
|
1340
|
+
chosen_step_config = LayeredConfigTree(
|
1341
|
+
{key: value for key, value in step_config.items() if key != "type"}
|
1277
1342
|
)
|
1278
|
-
# ChoiceSteps by definition
|
1343
|
+
# ChoiceSteps by definition are in a NonLeafConfigurationState
|
1279
1344
|
self._configuration_state = NonLeafConfigurationState(
|
1280
|
-
self,
|
1345
|
+
self, chosen_step_config, combined_implementations, input_data_config
|
1281
1346
|
)
|
1282
1347
|
|
1283
|
-
@staticmethod
|
1284
|
-
def _update_step_graph(subgraph: dict[str, Any]) -> StepGraph:
|
1285
|
-
"""Updates the :class:`~easylink.graph_components.StepGraph` with the choice.
|
1286
|
-
|
1287
|
-
Parameters
|
1288
|
-
----------
|
1289
|
-
subgraph
|
1290
|
-
Subgraph parameters (nodes, edges, and slot mappings) for the chosen type.
|
1291
|
-
|
1292
|
-
Returns
|
1293
|
-
-------
|
1294
|
-
The updated ``StepGraph`` for the chosen type.
|
1295
|
-
"""
|
1296
|
-
nodes = subgraph["nodes"]
|
1297
|
-
edges = subgraph["edges"]
|
1298
|
-
|
1299
|
-
graph = StepGraph()
|
1300
|
-
for node in nodes:
|
1301
|
-
graph.add_node_from_step(node)
|
1302
|
-
for edge in edges:
|
1303
|
-
graph.add_edge_from_params(edge)
|
1304
|
-
return graph
|
1305
|
-
|
1306
|
-
@staticmethod
|
1307
|
-
def _update_slot_mappings(subgraph: dict[str, Any]) -> dict[str, list[SlotMapping]]:
|
1308
|
-
"""Updates the :class:`SlotMappings<easylink.graph_components.SlotMapping>` to the choice type.
|
1309
|
-
|
1310
|
-
Parameters
|
1311
|
-
----------
|
1312
|
-
sub_graph
|
1313
|
-
Subgraph parameters (nodes, edges, and slot mappings) for the chosen type.
|
1314
|
-
|
1315
|
-
Returns
|
1316
|
-
-------
|
1317
|
-
Updated ``SlotMappings`` that match the choice type.
|
1318
|
-
"""
|
1319
|
-
input_mappings = subgraph["input_slot_mappings"]
|
1320
|
-
output_mappings = subgraph["output_slot_mappings"]
|
1321
|
-
return {"input": input_mappings, "output": output_mappings}
|
1322
|
-
|
1323
1348
|
|
1324
1349
|
class ConfigurationState(ABC):
|
1325
1350
|
"""A given :class:`Step's<Step>` configuration state.
|
@@ -1334,8 +1359,9 @@ class ConfigurationState(ABC):
|
|
1334
1359
|
----------
|
1335
1360
|
step
|
1336
1361
|
The ``Step`` this ``ConfigurationState`` is tied to.
|
1337
|
-
|
1338
|
-
The
|
1362
|
+
step_config
|
1363
|
+
The internal configuration of this ``Step`` we are setting the state
|
1364
|
+
for; it should not include the ``Step's`` name.
|
1339
1365
|
combined_implementations
|
1340
1366
|
The configuration for any implementations to be combined.
|
1341
1367
|
input_data_config
|
@@ -1346,14 +1372,15 @@ class ConfigurationState(ABC):
|
|
1346
1372
|
def __init__(
|
1347
1373
|
self,
|
1348
1374
|
step: Step,
|
1349
|
-
|
1375
|
+
step_config: LayeredConfigTree,
|
1350
1376
|
combined_implementations: LayeredConfigTree,
|
1351
1377
|
input_data_config: LayeredConfigTree,
|
1352
1378
|
):
|
1353
1379
|
self._step = step
|
1354
1380
|
"""The ``Step`` this ``ConfigurationState`` is tied to."""
|
1355
|
-
self.
|
1356
|
-
"""The
|
1381
|
+
self.step_config = step_config
|
1382
|
+
"""The internal configuration of this ``Step`` we are setting the state
|
1383
|
+
for; it should not include the ``Step's`` name."""
|
1357
1384
|
self.combined_implementations = combined_implementations
|
1358
1385
|
"""The relevant configuration if the ``Step's`` ``Implementation``
|
1359
1386
|
has been requested to be combined with that of a different ``Step``."""
|
@@ -1394,15 +1421,15 @@ class LeafConfigurationState(ConfigurationState):
|
|
1394
1421
|
@property
|
1395
1422
|
def is_combined(self) -> bool:
|
1396
1423
|
"""Whether or not this ``Step`` is combined with another ``Step``."""
|
1397
|
-
return
|
1424
|
+
return COMBINED_IMPLEMENTATION_KEY in self.step_config
|
1398
1425
|
|
1399
1426
|
@property
|
1400
1427
|
def implementation_config(self) -> LayeredConfigTree:
|
1401
1428
|
"""The ``Step's`` specific ``Implementation`` configuration."""
|
1402
1429
|
return (
|
1403
|
-
self.combined_implementations[self.
|
1430
|
+
self.combined_implementations[self.step_config[COMBINED_IMPLEMENTATION_KEY]]
|
1404
1431
|
if self.is_combined
|
1405
|
-
else self.
|
1432
|
+
else self.step_config.implementation
|
1406
1433
|
)
|
1407
1434
|
|
1408
1435
|
def get_implementation_graph(self) -> ImplementationGraph:
|
@@ -1416,31 +1443,30 @@ class LeafConfigurationState(ConfigurationState):
|
|
1416
1443
|
-------
|
1417
1444
|
The ``ImplementationGraph`` related to this ``Step``.
|
1418
1445
|
"""
|
1419
|
-
|
1446
|
+
step = self._step
|
1420
1447
|
implementation_graph = ImplementationGraph()
|
1421
|
-
implementation_node_name = self._step.implementation_node_name
|
1422
1448
|
if self.is_combined:
|
1423
|
-
if isinstance(
|
1449
|
+
if isinstance(step, EmbarrassinglyParallelStep):
|
1424
1450
|
raise NotImplementedError(
|
1425
1451
|
"Combining implementations with embarrassingly parallel steps "
|
1426
1452
|
"is not yet supported."
|
1427
1453
|
)
|
1428
1454
|
implementation = PartialImplementation(
|
1429
|
-
combined_name=self.
|
1430
|
-
schema_step=
|
1431
|
-
input_slots=
|
1432
|
-
output_slots=
|
1455
|
+
combined_name=self.step_config[COMBINED_IMPLEMENTATION_KEY],
|
1456
|
+
schema_step=step.step_name,
|
1457
|
+
input_slots=step.input_slots.values(),
|
1458
|
+
output_slots=step.output_slots.values(),
|
1433
1459
|
)
|
1434
1460
|
else:
|
1435
1461
|
implementation = Implementation(
|
1436
|
-
schema_steps=[
|
1462
|
+
schema_steps=[step.step_name],
|
1437
1463
|
implementation_config=self.implementation_config,
|
1438
|
-
input_slots=
|
1439
|
-
output_slots=
|
1440
|
-
is_embarrassingly_parallel=isinstance(
|
1464
|
+
input_slots=step.input_slots.values(),
|
1465
|
+
output_slots=step.output_slots.values(),
|
1466
|
+
is_embarrassingly_parallel=isinstance(step, EmbarrassinglyParallelStep),
|
1441
1467
|
)
|
1442
1468
|
implementation_graph.add_node_from_implementation(
|
1443
|
-
implementation_node_name,
|
1469
|
+
step.implementation_node_name,
|
1444
1470
|
implementation=implementation,
|
1445
1471
|
)
|
1446
1472
|
return implementation_graph
|
@@ -1481,10 +1507,10 @@ class LeafConfigurationState(ConfigurationState):
|
|
1481
1507
|
for mapping in mappings:
|
1482
1508
|
# FIXME [MIC-5771]: Fix ParallelSteps
|
1483
1509
|
if (
|
1484
|
-
"input_data_file" in self.
|
1510
|
+
"input_data_file" in self.step_config
|
1485
1511
|
and edge.source_node == "pipeline_graph_input_data"
|
1486
1512
|
):
|
1487
|
-
edge.output_slot = self.
|
1513
|
+
edge.output_slot = self.step_config["input_data_file"]
|
1488
1514
|
imp_edge = mapping.remap_edge(edge)
|
1489
1515
|
implementation_edges.append(imp_edge)
|
1490
1516
|
else:
|
@@ -1506,8 +1532,10 @@ class NonLeafConfigurationState(ConfigurationState):
|
|
1506
1532
|
----------
|
1507
1533
|
step
|
1508
1534
|
The ``Step`` this ``ConfigurationState`` is tied to.
|
1509
|
-
|
1510
|
-
The
|
1535
|
+
step_config
|
1536
|
+
The internal configuration of this ``Step`` we are setting the state
|
1537
|
+
for; it should not include the ``Step's`` name (though it must include
|
1538
|
+
the sub-step names).
|
1511
1539
|
combined_implementations
|
1512
1540
|
The configuration for any implementations to be combined.
|
1513
1541
|
input_data_config
|
@@ -1538,16 +1566,17 @@ class NonLeafConfigurationState(ConfigurationState):
|
|
1538
1566
|
def __init__(
|
1539
1567
|
self,
|
1540
1568
|
step: Step,
|
1541
|
-
|
1569
|
+
step_config: LayeredConfigTree,
|
1542
1570
|
combined_implementations: LayeredConfigTree,
|
1543
1571
|
input_data_config: LayeredConfigTree,
|
1544
1572
|
):
|
1545
|
-
super().__init__(step,
|
1573
|
+
super().__init__(step, step_config, combined_implementations, input_data_config)
|
1546
1574
|
if not step.step_graph:
|
1547
1575
|
raise ValueError(
|
1548
1576
|
"NonLeafConfigurationState requires a subgraph upon which to operate, "
|
1549
1577
|
f"but Step {step.name} has no step graph."
|
1550
1578
|
)
|
1579
|
+
self._nodes = step.step_graph.nodes
|
1551
1580
|
self._configure_subgraph_steps()
|
1552
1581
|
|
1553
1582
|
def get_implementation_graph(self) -> ImplementationGraph:
|
@@ -1578,8 +1607,8 @@ class NonLeafConfigurationState(ConfigurationState):
|
|
1578
1607
|
|
1579
1608
|
def add_nodes(self, implementation_graph: ImplementationGraph) -> None:
|
1580
1609
|
"""Adds nodes for each ``Step`` to the ``ImplementationGraph``."""
|
1581
|
-
for node in self.
|
1582
|
-
step = self.
|
1610
|
+
for node in self._nodes:
|
1611
|
+
step = self._nodes[node]["step"]
|
1583
1612
|
implementation_graph.update(step.get_implementation_graph())
|
1584
1613
|
|
1585
1614
|
def add_edges(self, implementation_graph: ImplementationGraph) -> None:
|
@@ -1587,8 +1616,8 @@ class NonLeafConfigurationState(ConfigurationState):
|
|
1587
1616
|
for source, target, edge_attrs in self._step.step_graph.edges(data=True):
|
1588
1617
|
all_edges = []
|
1589
1618
|
edge = EdgeParams.from_graph_edge(source, target, edge_attrs)
|
1590
|
-
parent_source_step = self.
|
1591
|
-
parent_target_step = self.
|
1619
|
+
parent_source_step = self._nodes[source]["step"]
|
1620
|
+
parent_target_step = self._nodes[target]["step"]
|
1592
1621
|
|
1593
1622
|
source_edges = parent_source_step.get_implementation_edges(edge)
|
1594
1623
|
for source_edge in source_edges:
|
@@ -1624,7 +1653,7 @@ class NonLeafConfigurationState(ConfigurationState):
|
|
1624
1653
|
]
|
1625
1654
|
for mapping in mappings:
|
1626
1655
|
new_edge = mapping.remap_edge(edge)
|
1627
|
-
new_step = self.
|
1656
|
+
new_step = self._nodes[mapping.child_node]["step"]
|
1628
1657
|
imp_edges = new_step.get_implementation_edges(new_edge)
|
1629
1658
|
implementation_edges.extend(imp_edges)
|
1630
1659
|
elif edge.target_node == self._step.name:
|
@@ -1635,7 +1664,7 @@ class NonLeafConfigurationState(ConfigurationState):
|
|
1635
1664
|
]
|
1636
1665
|
for mapping in mappings:
|
1637
1666
|
new_edge = mapping.remap_edge(edge)
|
1638
|
-
new_step = self.
|
1667
|
+
new_step = self._nodes[mapping.child_node]["step"]
|
1639
1668
|
imp_edges = new_step.get_implementation_edges(new_edge)
|
1640
1669
|
implementation_edges.extend(imp_edges)
|
1641
1670
|
else:
|
@@ -1650,9 +1679,12 @@ class NonLeafConfigurationState(ConfigurationState):
|
|
1650
1679
|
This method recursively traverses the ``StepGraph`` and sets the configuration
|
1651
1680
|
state for each ``Step`` until reaching all leaf nodes.
|
1652
1681
|
"""
|
1653
|
-
|
1654
|
-
|
1655
|
-
|
1682
|
+
for node in self._nodes:
|
1683
|
+
step = self._nodes[node]["step"]
|
1684
|
+
# IOStep names never appear in configuration
|
1685
|
+
step_config = (
|
1686
|
+
self.step_config if isinstance(step, IOStep) else self.step_config[step.name]
|
1687
|
+
)
|
1656
1688
|
step.set_configuration_state(
|
1657
|
-
|
1689
|
+
step_config, self.combined_implementations, self.input_data_config
|
1658
1690
|
)
|