easylink 0.1.11__py3-none-any.whl → 0.1.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- easylink/_version.py +1 -1
- easylink/pipeline_schema_constants/development.py +23 -2
- easylink/step.py +127 -18
- {easylink-0.1.11.dist-info → easylink-0.1.13.dist-info}/METADATA +1 -1
- {easylink-0.1.11.dist-info → easylink-0.1.13.dist-info}/RECORD +8 -8
- {easylink-0.1.11.dist-info → easylink-0.1.13.dist-info}/WHEEL +1 -1
- {easylink-0.1.11.dist-info → easylink-0.1.13.dist-info}/entry_points.txt +0 -0
- {easylink-0.1.11.dist-info → easylink-0.1.13.dist-info}/top_level.txt +0 -0
easylink/_version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.1.
|
1
|
+
__version__ = "0.1.13"
|
@@ -66,16 +66,37 @@ NODES = [
|
|
66
66
|
name="step_3_main_input",
|
67
67
|
env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
|
68
68
|
validator=validate_input_file_dummy,
|
69
|
-
splitter=split_data_by_size,
|
70
69
|
),
|
71
70
|
],
|
72
71
|
output_slots=[
|
73
72
|
OutputSlot(
|
74
73
|
name="step_3_main_output",
|
75
|
-
aggregator=concatenate_datasets,
|
76
74
|
),
|
77
75
|
],
|
78
76
|
),
|
77
|
+
input_slots=[
|
78
|
+
InputSlot(
|
79
|
+
name="step_3_main_input",
|
80
|
+
env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
|
81
|
+
validator=validate_input_file_dummy,
|
82
|
+
splitter=split_data_by_size,
|
83
|
+
),
|
84
|
+
],
|
85
|
+
output_slots=[OutputSlot("step_3_main_output", aggregator=concatenate_datasets)],
|
86
|
+
input_slot_mappings=[
|
87
|
+
InputSlotMapping(
|
88
|
+
parent_slot="step_3_main_input",
|
89
|
+
child_node="step_3",
|
90
|
+
child_slot="step_3_main_input",
|
91
|
+
),
|
92
|
+
],
|
93
|
+
output_slot_mappings=[
|
94
|
+
OutputSlotMapping(
|
95
|
+
parent_slot="step_3_main_output",
|
96
|
+
child_node="step_3",
|
97
|
+
child_slot="step_3_main_output",
|
98
|
+
),
|
99
|
+
],
|
79
100
|
),
|
80
101
|
self_edges=[
|
81
102
|
EdgeParams(
|
easylink/step.py
CHANGED
@@ -87,6 +87,7 @@ class Step:
|
|
87
87
|
output_slots: Iterable[OutputSlot] = (),
|
88
88
|
input_slot_mappings: Iterable[InputSlotMapping] = (),
|
89
89
|
output_slot_mappings: Iterable[OutputSlotMapping] = (),
|
90
|
+
is_embarrassingly_parallel: bool = False,
|
90
91
|
) -> None:
|
91
92
|
self.step_name = step_name
|
92
93
|
"""The name of the pipeline step in the ``PipelineSchema``. It must also match
|
@@ -107,6 +108,8 @@ class Step:
|
|
107
108
|
}
|
108
109
|
"""A combined dictionary containing both the ``InputSlotMappings`` and
|
109
110
|
``OutputSlotMappings`` of this ``Step``."""
|
111
|
+
self.is_embarrassingly_parallel = is_embarrassingly_parallel
|
112
|
+
"""Whether or not this ``Step`` is to be run in an embarrassingly parallel manner."""
|
110
113
|
self.parent_step = None
|
111
114
|
"""This ``Step's`` parent ``Step``, if applicable."""
|
112
115
|
self._configuration_state = None
|
@@ -884,7 +887,7 @@ class TemplatedStep(Step, ABC):
|
|
884
887
|
self.step_graph = StepGraph()
|
885
888
|
self.template_step.name = self.name
|
886
889
|
self.step_graph.add_node_from_step(self.template_step)
|
887
|
-
#
|
890
|
+
# Update the slot mappings with renamed children
|
888
891
|
input_mappings = [
|
889
892
|
InputSlotMapping(slot, self.name, slot) for slot in self.input_slots
|
890
893
|
]
|
@@ -899,10 +902,8 @@ class TemplatedStep(Step, ABC):
|
|
899
902
|
num_repeats = len(expanded_config)
|
900
903
|
self.step_graph = self._update_step_graph(num_repeats)
|
901
904
|
self.slot_mappings = self._update_slot_mappings(num_repeats)
|
902
|
-
|
903
|
-
#
|
904
|
-
# set to leaf state in the event the user didn't include the config_key
|
905
|
-
# in the pipeline specification.
|
905
|
+
|
906
|
+
# TemplatedSteps are by definition non-leaf steps.
|
906
907
|
self._configuration_state = NonLeafConfigurationState(
|
907
908
|
self, expanded_config, combined_implementations, input_data_config
|
908
909
|
)
|
@@ -1164,10 +1165,22 @@ class EmbarrassinglyParallelStep(Step):
|
|
1164
1165
|
def __init__(
|
1165
1166
|
self,
|
1166
1167
|
step: Step,
|
1168
|
+
input_slots: Iterable[InputSlot],
|
1169
|
+
output_slots: Iterable[OutputSlot],
|
1170
|
+
input_slot_mappings: Iterable[InputSlotMapping],
|
1171
|
+
output_slot_mappings: Iterable[OutputSlotMapping],
|
1167
1172
|
) -> None:
|
1168
1173
|
super().__init__(
|
1169
|
-
step.step_name,
|
1174
|
+
step.step_name,
|
1175
|
+
step.name,
|
1176
|
+
input_slots,
|
1177
|
+
output_slots,
|
1178
|
+
input_slot_mappings,
|
1179
|
+
output_slot_mappings,
|
1180
|
+
is_embarrassingly_parallel=True,
|
1170
1181
|
)
|
1182
|
+
self.step_graph = None
|
1183
|
+
self.step = step
|
1171
1184
|
self._validate()
|
1172
1185
|
|
1173
1186
|
def _validate(self) -> None:
|
@@ -1210,6 +1223,50 @@ class EmbarrassinglyParallelStep(Step):
|
|
1210
1223
|
if errors:
|
1211
1224
|
raise ValueError("\n".join(errors))
|
1212
1225
|
|
1226
|
+
def set_configuration_state(
|
1227
|
+
self,
|
1228
|
+
step_config: LayeredConfigTree,
|
1229
|
+
combined_implementations: LayeredConfigTree,
|
1230
|
+
input_data_config: LayeredConfigTree,
|
1231
|
+
):
|
1232
|
+
"""Sets the configuration state to 'non-leaf'.
|
1233
|
+
|
1234
|
+
In addition to setting the configuration state, this also updates the
|
1235
|
+
:class:`~easylink.graph_components.StepGraph` and
|
1236
|
+
:class:`SlotMappings<easylink.graph_components.SlotMapping>`.
|
1237
|
+
|
1238
|
+
Parameters
|
1239
|
+
----------
|
1240
|
+
step_config
|
1241
|
+
The internal configuration of this ``Step``, i.e. it should not include
|
1242
|
+
the ``Step's`` name.
|
1243
|
+
combined_implementations
|
1244
|
+
The configuration for any implementations to be combined.
|
1245
|
+
input_data_config
|
1246
|
+
The input data configuration for the entire pipeline.
|
1247
|
+
"""
|
1248
|
+
if self.step.name != self.name:
|
1249
|
+
# Update the step name if the parent got renamed, e.g. a parent LoopStep
|
1250
|
+
# 'step_1' that got expanded to 'step_1_loop_1', etc.
|
1251
|
+
self.step.name = self.name
|
1252
|
+
input_mappings = [
|
1253
|
+
InputSlotMapping(slot, self.name, slot) for slot in self.input_slots
|
1254
|
+
]
|
1255
|
+
output_mappings = [
|
1256
|
+
OutputSlotMapping(slot, self.name, slot) for slot in self.output_slots
|
1257
|
+
]
|
1258
|
+
self.slot_mappings = {"input": input_mappings, "output": output_mappings}
|
1259
|
+
# Generate step graph from the single ``step`` attr
|
1260
|
+
self.step_graph = StepGraph()
|
1261
|
+
self.step_graph.add_node_from_step(self.step)
|
1262
|
+
# Add the key back to the expanded config
|
1263
|
+
expanded_config = LayeredConfigTree({self.name: step_config})
|
1264
|
+
|
1265
|
+
# EmbarrassinglyParallelSteps are by definition non-leaf steps
|
1266
|
+
self._configuration_state = NonLeafConfigurationState(
|
1267
|
+
self, expanded_config, combined_implementations, input_data_config
|
1268
|
+
)
|
1269
|
+
|
1213
1270
|
|
1214
1271
|
class ChoiceStep(Step):
|
1215
1272
|
"""A type of :class:`Step` that allows for choosing from a set of options.
|
@@ -1469,10 +1526,10 @@ class LeafConfigurationState(ConfigurationState):
|
|
1469
1526
|
"""
|
1470
1527
|
step = self._step
|
1471
1528
|
if self.is_combined:
|
1472
|
-
if
|
1529
|
+
if step.is_embarrassingly_parallel:
|
1473
1530
|
raise NotImplementedError(
|
1474
1531
|
"Combining implementations with embarrassingly parallel steps "
|
1475
|
-
"is not
|
1532
|
+
"is not supported."
|
1476
1533
|
)
|
1477
1534
|
implementation = PartialImplementation(
|
1478
1535
|
combined_name=self.step_config[COMBINED_IMPLEMENTATION_KEY],
|
@@ -1486,7 +1543,7 @@ class LeafConfigurationState(ConfigurationState):
|
|
1486
1543
|
implementation_config=self.implementation_config,
|
1487
1544
|
input_slots=step.input_slots.values(),
|
1488
1545
|
output_slots=step.output_slots.values(),
|
1489
|
-
is_embarrassingly_parallel=
|
1546
|
+
is_embarrassingly_parallel=step.is_embarrassingly_parallel,
|
1490
1547
|
)
|
1491
1548
|
implementation_graph.add_node_from_implementation(
|
1492
1549
|
step.implementation_node_name,
|
@@ -1608,7 +1665,6 @@ class NonLeafConfigurationState(ConfigurationState):
|
|
1608
1665
|
"NonLeafConfigurationState requires a subgraph upon which to operate, "
|
1609
1666
|
f"but Step {step.name} has no step graph."
|
1610
1667
|
)
|
1611
|
-
self._nodes = step.step_graph.nodes
|
1612
1668
|
self._configure_subgraph_steps()
|
1613
1669
|
|
1614
1670
|
def add_nodes_to_implementation_graph(
|
@@ -1622,8 +1678,59 @@ class NonLeafConfigurationState(ConfigurationState):
|
|
1622
1678
|
"""
|
1623
1679
|
for node in self._step.step_graph.nodes:
|
1624
1680
|
substep = self._step.step_graph.nodes[node]["step"]
|
1681
|
+
if self._step.is_embarrassingly_parallel:
|
1682
|
+
substep.is_embarrassingly_parallel = True
|
1683
|
+
self._propagate_splitter_aggregators(self._step, substep)
|
1625
1684
|
substep.add_nodes_to_implementation_graph(implementation_graph)
|
1626
1685
|
|
1686
|
+
@staticmethod
|
1687
|
+
def _propagate_splitter_aggregators(parent: Step, child: Step):
|
1688
|
+
"""Propagates splitters and aggregators to child ``Steps``.
|
1689
|
+
|
1690
|
+
This method adds the :meth:`~easylink.graph_components.InputSlot.splitter`
|
1691
|
+
and :meth:`~easylink.graph_components.OutputSlot.aggregator` methods from a
|
1692
|
+
parent ``Step's`` :class:`~easylink.graph_components.InputSlot` and
|
1693
|
+
:class:`OutputSlots<easylink.graph_components.OutputSlot>` to the corresponding
|
1694
|
+
child steps' slots.
|
1695
|
+
|
1696
|
+
Parameters
|
1697
|
+
----------
|
1698
|
+
parent
|
1699
|
+
The parent ``Step`` whose ``splitter`` and ``aggregator`` methods are
|
1700
|
+
to be propagated to the appropriate child ``Step``.
|
1701
|
+
child
|
1702
|
+
A child ``Step`` to potentially have its parent's ``splitter`` and
|
1703
|
+
``aggregators`` assigned to its ``InputSlot`` and ``OutputSlots``,
|
1704
|
+
respectively.
|
1705
|
+
"""
|
1706
|
+
for parent_input_slot_name, parent_input_slot in parent.input_slots.items():
|
1707
|
+
if parent_input_slot.splitter:
|
1708
|
+
# Extract the appropriate child slot name from the mapping
|
1709
|
+
mappings_with_splitter = [
|
1710
|
+
mapping
|
1711
|
+
for mapping in parent.slot_mappings["input"]
|
1712
|
+
if mapping.parent_slot == parent_input_slot_name
|
1713
|
+
]
|
1714
|
+
for mapping in mappings_with_splitter:
|
1715
|
+
child_node = mapping.child_node
|
1716
|
+
child_slot = mapping.child_slot
|
1717
|
+
# Assign the splitter to the appropriate child slot
|
1718
|
+
if child_slot in child.input_slots and child_node == child.name:
|
1719
|
+
child.input_slots[child_slot].splitter = parent_input_slot.splitter
|
1720
|
+
for parent_output_slot_name, parent_output_slot in parent.output_slots.items():
|
1721
|
+
# Extract the appropriate child slot name from the mapping
|
1722
|
+
mappings_from_parent = [
|
1723
|
+
mapping
|
1724
|
+
for mapping in parent.slot_mappings["output"]
|
1725
|
+
if mapping.parent_slot == parent_output_slot_name
|
1726
|
+
]
|
1727
|
+
for mapping in mappings_from_parent:
|
1728
|
+
child_node = mapping.child_node
|
1729
|
+
child_slot = mapping.child_slot
|
1730
|
+
# Assign the aggregator to the appropriate child slot
|
1731
|
+
if child_slot in child.output_slots and child_node == child.name:
|
1732
|
+
child.output_slots[child_slot].aggregator = parent_output_slot.aggregator
|
1733
|
+
|
1627
1734
|
def add_edges_to_implementation_graph(
|
1628
1735
|
self, implementation_graph: ImplementationGraph
|
1629
1736
|
) -> None:
|
@@ -1643,8 +1750,8 @@ class NonLeafConfigurationState(ConfigurationState):
|
|
1643
1750
|
# Add the edges at this level (i.e. the edges at this `self._step`)
|
1644
1751
|
for source, target, edge_attrs in self._step.step_graph.edges(data=True):
|
1645
1752
|
edge = EdgeParams.from_graph_edge(source, target, edge_attrs)
|
1646
|
-
source_step = self.
|
1647
|
-
target_step = self.
|
1753
|
+
source_step = self._step.step_graph.nodes[source]["step"]
|
1754
|
+
target_step = self._step.step_graph.nodes[target]["step"]
|
1648
1755
|
|
1649
1756
|
source_edges = source_step.get_implementation_edges(edge)
|
1650
1757
|
for source_edge in source_edges:
|
@@ -1707,7 +1814,7 @@ class NonLeafConfigurationState(ConfigurationState):
|
|
1707
1814
|
]
|
1708
1815
|
for mapping in mappings:
|
1709
1816
|
new_edge = mapping.remap_edge(edge)
|
1710
|
-
new_step = self.
|
1817
|
+
new_step = self._step.step_graph.nodes[mapping.child_node]["step"]
|
1711
1818
|
imp_edges = new_step.get_implementation_edges(new_edge)
|
1712
1819
|
implementation_edges.extend(imp_edges)
|
1713
1820
|
elif edge.target_node == self._step.name:
|
@@ -1718,7 +1825,7 @@ class NonLeafConfigurationState(ConfigurationState):
|
|
1718
1825
|
]
|
1719
1826
|
for mapping in mappings:
|
1720
1827
|
new_edge = mapping.remap_edge(edge)
|
1721
|
-
new_step = self.
|
1828
|
+
new_step = self._step.step_graph.nodes[mapping.child_node]["step"]
|
1722
1829
|
imp_edges = new_step.get_implementation_edges(new_edge)
|
1723
1830
|
implementation_edges.extend(imp_edges)
|
1724
1831
|
else:
|
@@ -1733,12 +1840,14 @@ class NonLeafConfigurationState(ConfigurationState):
|
|
1733
1840
|
This method recursively traverses the ``StepGraph`` and sets the configuration
|
1734
1841
|
state for each ``Step`` until reaching all leaf nodes.
|
1735
1842
|
"""
|
1736
|
-
for
|
1737
|
-
|
1843
|
+
for sub_node in self._step.step_graph.nodes:
|
1844
|
+
sub_step = self._step.step_graph.nodes[sub_node]["step"]
|
1738
1845
|
# IOStep names never appear in configuration
|
1739
1846
|
step_config = (
|
1740
|
-
self.step_config
|
1847
|
+
self.step_config
|
1848
|
+
if isinstance(sub_step, IOStep)
|
1849
|
+
else self.step_config[sub_step.name]
|
1741
1850
|
)
|
1742
|
-
|
1851
|
+
sub_step.set_configuration_state(
|
1743
1852
|
step_config, self.combined_implementations, self.input_data_config
|
1744
1853
|
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
easylink/__about__.py,sha256=2-oxCfu9t9yUJouLDwqYRZ0eii8kN25SxRzsawjWjho,440
|
2
2
|
easylink/__init__.py,sha256=gGMcIVfiVnHtlDw5mZwhevcDb2wt-kuP6F64gnkFack,159
|
3
|
-
easylink/_version.py,sha256=
|
3
|
+
easylink/_version.py,sha256=khDKUuWafURKVs5EAZkpOMiUHI2-V7axlqrWLPUpuZo,23
|
4
4
|
easylink/cli.py,sha256=ARSKAljepNOEYd1VCS_QqBJQIBLzE3IgKiOb5-OROdY,6380
|
5
5
|
easylink/configuration.py,sha256=Ire2pMZNZ6wtSwhcWnQpYa-snX4KrhXgovlQwQ2Wxf4,12530
|
6
6
|
easylink/graph_components.py,sha256=PhMKxpgZjorhubS7vcta1pgXgXSGplmPulQpV0YZhqo,14811
|
@@ -11,11 +11,11 @@ easylink/pipeline_graph.py,sha256=vsY6nW_iEwZCNf_N_3CsixsKBUy_5JxGEi61-1Q-KAw,22
|
|
11
11
|
easylink/pipeline_schema.py,sha256=Q2sCpsC-F2W0yxVP7ufunowDepOBrRVENXOdap9J5iY,6921
|
12
12
|
easylink/rule.py,sha256=W97LMI-vkEPipJbnSZLn2BxfYfFtvzGTKzq6YgDVri0,19913
|
13
13
|
easylink/runner.py,sha256=k9ICTToHj2xr6MGIuvlWf6YMeZ47UGgseaMByMgUGac,6271
|
14
|
-
easylink/step.py,sha256=
|
14
|
+
easylink/step.py,sha256=Hweg1OAGcmrNAt95C-M4ksOAtc_db0oeibbF3cnqhq0,74951
|
15
15
|
easylink/images/spark_cluster/Dockerfile,sha256=3PHotbR4jdjVYRHOJ0VQW55b5Qd4tQ1pLLQMrTKWVA0,576
|
16
16
|
easylink/images/spark_cluster/README.md,sha256=KdgSttZRplNNWqHn4K1GTsTIab3dTOSG4V99QPLxSp8,569
|
17
17
|
easylink/pipeline_schema_constants/__init__.py,sha256=uRVjQw7_Ff5IBQw0_Jc93Fzfa-MnbPVPKsy18CCaW7E,1021
|
18
|
-
easylink/pipeline_schema_constants/development.py,sha256=
|
18
|
+
easylink/pipeline_schema_constants/development.py,sha256=0fc6xWRBr5e_xDaldR9sY2vMQJU1wnlhDQS_-yUOT6g,12339
|
19
19
|
easylink/pipeline_schema_constants/testing.py,sha256=ohcTlT_viZYxS1GkO46mjkb8IzXo6yIOqvBbb4YrOhA,10897
|
20
20
|
easylink/steps/dev/README.md,sha256=u9dZUggpY2Lf2qb-xkDLWWgHjcmi4osbQtzSNo4uklE,4549
|
21
21
|
easylink/steps/dev/build-containers-local.sh,sha256=Wy3pfcyt7I-BNvHcr7ZXDe0g5Ihd00BIPqt9YuRbLeA,259
|
@@ -43,8 +43,8 @@ easylink/utilities/paths.py,sha256=KM1GlnsAcKbUJrC4LZKpeJfPljxe_aXP1ZhVp43TYRA,9
|
|
43
43
|
easylink/utilities/spark.smk,sha256=tQ7RArNQzhjbaBQQcRORB4IxxkuDx4gPHUBcWHDYJ_U,5795
|
44
44
|
easylink/utilities/splitter_utils.py,sha256=y4CbbTBgRaoXFxy-9Eu5eWx4lA4ZEcbrYpxgLIzG_kc,2602
|
45
45
|
easylink/utilities/validation_utils.py,sha256=W9r_RXcivJjfpioLhONirfwdByYttxNsVY489_sbrYQ,1683
|
46
|
-
easylink-0.1.
|
47
|
-
easylink-0.1.
|
48
|
-
easylink-0.1.
|
49
|
-
easylink-0.1.
|
50
|
-
easylink-0.1.
|
46
|
+
easylink-0.1.13.dist-info/METADATA,sha256=ooL68LseA1cN5X2wLOB_uIFgXIjW0PCINie7aMHw6t0,2805
|
47
|
+
easylink-0.1.13.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
|
48
|
+
easylink-0.1.13.dist-info/entry_points.txt,sha256=OGMZDFltg3yMboT7XjJt3joiPhRfV_7jnREVtrAIQNU,51
|
49
|
+
easylink-0.1.13.dist-info/top_level.txt,sha256=oHcOpcF_jDMWFiJRzfGQvuskENGDjSPC_Agu9Z_Xvik,9
|
50
|
+
easylink-0.1.13.dist-info/RECORD,,
|
File without changes
|
File without changes
|