easylink 0.1.11__py3-none-any.whl → 0.1.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
easylink/_version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.11"
1
+ __version__ = "0.1.13"
@@ -66,16 +66,37 @@ NODES = [
66
66
  name="step_3_main_input",
67
67
  env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
68
68
  validator=validate_input_file_dummy,
69
- splitter=split_data_by_size,
70
69
  ),
71
70
  ],
72
71
  output_slots=[
73
72
  OutputSlot(
74
73
  name="step_3_main_output",
75
- aggregator=concatenate_datasets,
76
74
  ),
77
75
  ],
78
76
  ),
77
+ input_slots=[
78
+ InputSlot(
79
+ name="step_3_main_input",
80
+ env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
81
+ validator=validate_input_file_dummy,
82
+ splitter=split_data_by_size,
83
+ ),
84
+ ],
85
+ output_slots=[OutputSlot("step_3_main_output", aggregator=concatenate_datasets)],
86
+ input_slot_mappings=[
87
+ InputSlotMapping(
88
+ parent_slot="step_3_main_input",
89
+ child_node="step_3",
90
+ child_slot="step_3_main_input",
91
+ ),
92
+ ],
93
+ output_slot_mappings=[
94
+ OutputSlotMapping(
95
+ parent_slot="step_3_main_output",
96
+ child_node="step_3",
97
+ child_slot="step_3_main_output",
98
+ ),
99
+ ],
79
100
  ),
80
101
  self_edges=[
81
102
  EdgeParams(
easylink/step.py CHANGED
@@ -87,6 +87,7 @@ class Step:
87
87
  output_slots: Iterable[OutputSlot] = (),
88
88
  input_slot_mappings: Iterable[InputSlotMapping] = (),
89
89
  output_slot_mappings: Iterable[OutputSlotMapping] = (),
90
+ is_embarrassingly_parallel: bool = False,
90
91
  ) -> None:
91
92
  self.step_name = step_name
92
93
  """The name of the pipeline step in the ``PipelineSchema``. It must also match
@@ -107,6 +108,8 @@ class Step:
107
108
  }
108
109
  """A combined dictionary containing both the ``InputSlotMappings`` and
109
110
  ``OutputSlotMappings`` of this ``Step``."""
111
+ self.is_embarrassingly_parallel = is_embarrassingly_parallel
112
+ """Whether or not this ``Step`` is to be run in an embarrassingly parallel manner."""
110
113
  self.parent_step = None
111
114
  """This ``Step's`` parent ``Step``, if applicable."""
112
115
  self._configuration_state = None
@@ -884,7 +887,7 @@ class TemplatedStep(Step, ABC):
884
887
  self.step_graph = StepGraph()
885
888
  self.template_step.name = self.name
886
889
  self.step_graph.add_node_from_step(self.template_step)
887
- # Special handle the slot_mappings update
890
+ # Update the slot mappings with renamed children
888
891
  input_mappings = [
889
892
  InputSlotMapping(slot, self.name, slot) for slot in self.input_slots
890
893
  ]
@@ -899,10 +902,8 @@ class TemplatedStep(Step, ABC):
899
902
  num_repeats = len(expanded_config)
900
903
  self.step_graph = self._update_step_graph(num_repeats)
901
904
  self.slot_mappings = self._update_slot_mappings(num_repeats)
902
- # Manually set the configuration state to non-leaf instead of relying
903
- # on super().get_configuration_state() because that method will erroneously
904
- # set to leaf state in the event the user didn't include the config_key
905
- # in the pipeline specification.
905
+
906
+ # TemplatedSteps are by definition non-leaf steps.
906
907
  self._configuration_state = NonLeafConfigurationState(
907
908
  self, expanded_config, combined_implementations, input_data_config
908
909
  )
@@ -1164,10 +1165,22 @@ class EmbarrassinglyParallelStep(Step):
1164
1165
  def __init__(
1165
1166
  self,
1166
1167
  step: Step,
1168
+ input_slots: Iterable[InputSlot],
1169
+ output_slots: Iterable[OutputSlot],
1170
+ input_slot_mappings: Iterable[InputSlotMapping],
1171
+ output_slot_mappings: Iterable[OutputSlotMapping],
1167
1172
  ) -> None:
1168
1173
  super().__init__(
1169
- step.step_name, step.name, step.input_slots.values(), step.output_slots.values()
1174
+ step.step_name,
1175
+ step.name,
1176
+ input_slots,
1177
+ output_slots,
1178
+ input_slot_mappings,
1179
+ output_slot_mappings,
1180
+ is_embarrassingly_parallel=True,
1170
1181
  )
1182
+ self.step_graph = None
1183
+ self.step = step
1171
1184
  self._validate()
1172
1185
 
1173
1186
  def _validate(self) -> None:
@@ -1210,6 +1223,50 @@ class EmbarrassinglyParallelStep(Step):
1210
1223
  if errors:
1211
1224
  raise ValueError("\n".join(errors))
1212
1225
 
1226
+ def set_configuration_state(
1227
+ self,
1228
+ step_config: LayeredConfigTree,
1229
+ combined_implementations: LayeredConfigTree,
1230
+ input_data_config: LayeredConfigTree,
1231
+ ):
1232
+ """Sets the configuration state to 'non-leaf'.
1233
+
1234
+ In addition to setting the configuration state, this also updates the
1235
+ :class:`~easylink.graph_components.StepGraph` and
1236
+ :class:`SlotMappings<easylink.graph_components.SlotMapping>`.
1237
+
1238
+ Parameters
1239
+ ----------
1240
+ step_config
1241
+ The internal configuration of this ``Step``, i.e. it should not include
1242
+ the ``Step's`` name.
1243
+ combined_implementations
1244
+ The configuration for any implementations to be combined.
1245
+ input_data_config
1246
+ The input data configuration for the entire pipeline.
1247
+ """
1248
+ if self.step.name != self.name:
1249
+ # Update the step name if the parent got renamed, e.g. a parent LoopStep
1250
+ # 'step_1' that got expanded to 'step_1_loop_1', etc.
1251
+ self.step.name = self.name
1252
+ input_mappings = [
1253
+ InputSlotMapping(slot, self.name, slot) for slot in self.input_slots
1254
+ ]
1255
+ output_mappings = [
1256
+ OutputSlotMapping(slot, self.name, slot) for slot in self.output_slots
1257
+ ]
1258
+ self.slot_mappings = {"input": input_mappings, "output": output_mappings}
1259
+ # Generate step graph from the single ``step`` attr
1260
+ self.step_graph = StepGraph()
1261
+ self.step_graph.add_node_from_step(self.step)
1262
+ # Add the key back to the expanded config
1263
+ expanded_config = LayeredConfigTree({self.name: step_config})
1264
+
1265
+ # EmbarrassinglyParallelSteps are by definition non-leaf steps
1266
+ self._configuration_state = NonLeafConfigurationState(
1267
+ self, expanded_config, combined_implementations, input_data_config
1268
+ )
1269
+
1213
1270
 
1214
1271
  class ChoiceStep(Step):
1215
1272
  """A type of :class:`Step` that allows for choosing from a set of options.
@@ -1469,10 +1526,10 @@ class LeafConfigurationState(ConfigurationState):
1469
1526
  """
1470
1527
  step = self._step
1471
1528
  if self.is_combined:
1472
- if isinstance(step, EmbarrassinglyParallelStep):
1529
+ if step.is_embarrassingly_parallel:
1473
1530
  raise NotImplementedError(
1474
1531
  "Combining implementations with embarrassingly parallel steps "
1475
- "is not yet supported."
1532
+ "is not supported."
1476
1533
  )
1477
1534
  implementation = PartialImplementation(
1478
1535
  combined_name=self.step_config[COMBINED_IMPLEMENTATION_KEY],
@@ -1486,7 +1543,7 @@ class LeafConfigurationState(ConfigurationState):
1486
1543
  implementation_config=self.implementation_config,
1487
1544
  input_slots=step.input_slots.values(),
1488
1545
  output_slots=step.output_slots.values(),
1489
- is_embarrassingly_parallel=isinstance(step, EmbarrassinglyParallelStep),
1546
+ is_embarrassingly_parallel=step.is_embarrassingly_parallel,
1490
1547
  )
1491
1548
  implementation_graph.add_node_from_implementation(
1492
1549
  step.implementation_node_name,
@@ -1608,7 +1665,6 @@ class NonLeafConfigurationState(ConfigurationState):
1608
1665
  "NonLeafConfigurationState requires a subgraph upon which to operate, "
1609
1666
  f"but Step {step.name} has no step graph."
1610
1667
  )
1611
- self._nodes = step.step_graph.nodes
1612
1668
  self._configure_subgraph_steps()
1613
1669
 
1614
1670
  def add_nodes_to_implementation_graph(
@@ -1622,8 +1678,59 @@ class NonLeafConfigurationState(ConfigurationState):
1622
1678
  """
1623
1679
  for node in self._step.step_graph.nodes:
1624
1680
  substep = self._step.step_graph.nodes[node]["step"]
1681
+ if self._step.is_embarrassingly_parallel:
1682
+ substep.is_embarrassingly_parallel = True
1683
+ self._propagate_splitter_aggregators(self._step, substep)
1625
1684
  substep.add_nodes_to_implementation_graph(implementation_graph)
1626
1685
 
1686
+ @staticmethod
1687
+ def _propagate_splitter_aggregators(parent: Step, child: Step):
1688
+ """Propagates splitters and aggregators to child ``Steps``.
1689
+
1690
+ This method adds the :meth:`~easylink.graph_components.InputSlot.splitter`
1691
+ and :meth:`~easylink.graph_components.OutputSlot.aggregator` methods from a
1692
+ parent ``Step's`` :class:`~easylink.graph_components.InputSlot` and
1693
+ :class:`OutputSlots<easylink.graph_components.OutputSlot>` to the corresponding
1694
+ child steps' slots.
1695
+
1696
+ Parameters
1697
+ ----------
1698
+ parent
1699
+ The parent ``Step`` whose ``splitter`` and ``aggregator`` methods are
1700
+ to be propagated to the appropriate child ``Step``.
1701
+ child
1702
+ A child ``Step`` to potentially have its parent's ``splitter`` and
1703
+ ``aggregators`` assigned to its ``InputSlot`` and ``OutputSlots``,
1704
+ respectively.
1705
+ """
1706
+ for parent_input_slot_name, parent_input_slot in parent.input_slots.items():
1707
+ if parent_input_slot.splitter:
1708
+ # Extract the appropriate child slot name from the mapping
1709
+ mappings_with_splitter = [
1710
+ mapping
1711
+ for mapping in parent.slot_mappings["input"]
1712
+ if mapping.parent_slot == parent_input_slot_name
1713
+ ]
1714
+ for mapping in mappings_with_splitter:
1715
+ child_node = mapping.child_node
1716
+ child_slot = mapping.child_slot
1717
+ # Assign the splitter to the appropriate child slot
1718
+ if child_slot in child.input_slots and child_node == child.name:
1719
+ child.input_slots[child_slot].splitter = parent_input_slot.splitter
1720
+ for parent_output_slot_name, parent_output_slot in parent.output_slots.items():
1721
+ # Extract the appropriate child slot name from the mapping
1722
+ mappings_from_parent = [
1723
+ mapping
1724
+ for mapping in parent.slot_mappings["output"]
1725
+ if mapping.parent_slot == parent_output_slot_name
1726
+ ]
1727
+ for mapping in mappings_from_parent:
1728
+ child_node = mapping.child_node
1729
+ child_slot = mapping.child_slot
1730
+ # Assign the aggregator to the appropriate child slot
1731
+ if child_slot in child.output_slots and child_node == child.name:
1732
+ child.output_slots[child_slot].aggregator = parent_output_slot.aggregator
1733
+
1627
1734
  def add_edges_to_implementation_graph(
1628
1735
  self, implementation_graph: ImplementationGraph
1629
1736
  ) -> None:
@@ -1643,8 +1750,8 @@ class NonLeafConfigurationState(ConfigurationState):
1643
1750
  # Add the edges at this level (i.e. the edges at this `self._step`)
1644
1751
  for source, target, edge_attrs in self._step.step_graph.edges(data=True):
1645
1752
  edge = EdgeParams.from_graph_edge(source, target, edge_attrs)
1646
- source_step = self._nodes[source]["step"]
1647
- target_step = self._nodes[target]["step"]
1753
+ source_step = self._step.step_graph.nodes[source]["step"]
1754
+ target_step = self._step.step_graph.nodes[target]["step"]
1648
1755
 
1649
1756
  source_edges = source_step.get_implementation_edges(edge)
1650
1757
  for source_edge in source_edges:
@@ -1707,7 +1814,7 @@ class NonLeafConfigurationState(ConfigurationState):
1707
1814
  ]
1708
1815
  for mapping in mappings:
1709
1816
  new_edge = mapping.remap_edge(edge)
1710
- new_step = self._nodes[mapping.child_node]["step"]
1817
+ new_step = self._step.step_graph.nodes[mapping.child_node]["step"]
1711
1818
  imp_edges = new_step.get_implementation_edges(new_edge)
1712
1819
  implementation_edges.extend(imp_edges)
1713
1820
  elif edge.target_node == self._step.name:
@@ -1718,7 +1825,7 @@ class NonLeafConfigurationState(ConfigurationState):
1718
1825
  ]
1719
1826
  for mapping in mappings:
1720
1827
  new_edge = mapping.remap_edge(edge)
1721
- new_step = self._nodes[mapping.child_node]["step"]
1828
+ new_step = self._step.step_graph.nodes[mapping.child_node]["step"]
1722
1829
  imp_edges = new_step.get_implementation_edges(new_edge)
1723
1830
  implementation_edges.extend(imp_edges)
1724
1831
  else:
@@ -1733,12 +1840,14 @@ class NonLeafConfigurationState(ConfigurationState):
1733
1840
  This method recursively traverses the ``StepGraph`` and sets the configuration
1734
1841
  state for each ``Step`` until reaching all leaf nodes.
1735
1842
  """
1736
- for node in self._nodes:
1737
- step = self._nodes[node]["step"]
1843
+ for sub_node in self._step.step_graph.nodes:
1844
+ sub_step = self._step.step_graph.nodes[sub_node]["step"]
1738
1845
  # IOStep names never appear in configuration
1739
1846
  step_config = (
1740
- self.step_config if isinstance(step, IOStep) else self.step_config[step.name]
1847
+ self.step_config
1848
+ if isinstance(sub_step, IOStep)
1849
+ else self.step_config[sub_step.name]
1741
1850
  )
1742
- step.set_configuration_state(
1851
+ sub_step.set_configuration_state(
1743
1852
  step_config, self.combined_implementations, self.input_data_config
1744
1853
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: easylink
3
- Version: 0.1.11
3
+ Version: 0.1.13
4
4
  Summary: Research repository for the EasyLink ER ecosystem project.
5
5
  Home-page: https://github.com/ihmeuw/easylink
6
6
  Author: The EasyLink developers
@@ -1,6 +1,6 @@
1
1
  easylink/__about__.py,sha256=2-oxCfu9t9yUJouLDwqYRZ0eii8kN25SxRzsawjWjho,440
2
2
  easylink/__init__.py,sha256=gGMcIVfiVnHtlDw5mZwhevcDb2wt-kuP6F64gnkFack,159
3
- easylink/_version.py,sha256=nllDrH0jyChMuuYrK0CC55iTBKUNTUjejtcwxyUF2EQ,23
3
+ easylink/_version.py,sha256=khDKUuWafURKVs5EAZkpOMiUHI2-V7axlqrWLPUpuZo,23
4
4
  easylink/cli.py,sha256=ARSKAljepNOEYd1VCS_QqBJQIBLzE3IgKiOb5-OROdY,6380
5
5
  easylink/configuration.py,sha256=Ire2pMZNZ6wtSwhcWnQpYa-snX4KrhXgovlQwQ2Wxf4,12530
6
6
  easylink/graph_components.py,sha256=PhMKxpgZjorhubS7vcta1pgXgXSGplmPulQpV0YZhqo,14811
@@ -11,11 +11,11 @@ easylink/pipeline_graph.py,sha256=vsY6nW_iEwZCNf_N_3CsixsKBUy_5JxGEi61-1Q-KAw,22
11
11
  easylink/pipeline_schema.py,sha256=Q2sCpsC-F2W0yxVP7ufunowDepOBrRVENXOdap9J5iY,6921
12
12
  easylink/rule.py,sha256=W97LMI-vkEPipJbnSZLn2BxfYfFtvzGTKzq6YgDVri0,19913
13
13
  easylink/runner.py,sha256=k9ICTToHj2xr6MGIuvlWf6YMeZ47UGgseaMByMgUGac,6271
14
- easylink/step.py,sha256=ttteoyIiwnlDjc6QxWKO85GI9ubVGu9Oy5XDG2u4YrY,69885
14
+ easylink/step.py,sha256=Hweg1OAGcmrNAt95C-M4ksOAtc_db0oeibbF3cnqhq0,74951
15
15
  easylink/images/spark_cluster/Dockerfile,sha256=3PHotbR4jdjVYRHOJ0VQW55b5Qd4tQ1pLLQMrTKWVA0,576
16
16
  easylink/images/spark_cluster/README.md,sha256=KdgSttZRplNNWqHn4K1GTsTIab3dTOSG4V99QPLxSp8,569
17
17
  easylink/pipeline_schema_constants/__init__.py,sha256=uRVjQw7_Ff5IBQw0_Jc93Fzfa-MnbPVPKsy18CCaW7E,1021
18
- easylink/pipeline_schema_constants/development.py,sha256=kOTEqfZD5pWqP9gu7E6r9Cubf3ILtWEUxCfJfrN8znc,11547
18
+ easylink/pipeline_schema_constants/development.py,sha256=0fc6xWRBr5e_xDaldR9sY2vMQJU1wnlhDQS_-yUOT6g,12339
19
19
  easylink/pipeline_schema_constants/testing.py,sha256=ohcTlT_viZYxS1GkO46mjkb8IzXo6yIOqvBbb4YrOhA,10897
20
20
  easylink/steps/dev/README.md,sha256=u9dZUggpY2Lf2qb-xkDLWWgHjcmi4osbQtzSNo4uklE,4549
21
21
  easylink/steps/dev/build-containers-local.sh,sha256=Wy3pfcyt7I-BNvHcr7ZXDe0g5Ihd00BIPqt9YuRbLeA,259
@@ -43,8 +43,8 @@ easylink/utilities/paths.py,sha256=KM1GlnsAcKbUJrC4LZKpeJfPljxe_aXP1ZhVp43TYRA,9
43
43
  easylink/utilities/spark.smk,sha256=tQ7RArNQzhjbaBQQcRORB4IxxkuDx4gPHUBcWHDYJ_U,5795
44
44
  easylink/utilities/splitter_utils.py,sha256=y4CbbTBgRaoXFxy-9Eu5eWx4lA4ZEcbrYpxgLIzG_kc,2602
45
45
  easylink/utilities/validation_utils.py,sha256=W9r_RXcivJjfpioLhONirfwdByYttxNsVY489_sbrYQ,1683
46
- easylink-0.1.11.dist-info/METADATA,sha256=LCqBkp3ndZAQF8Dwo-4ugO2yLyyduY7oRwg8EcBX0lY,2805
47
- easylink-0.1.11.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
48
- easylink-0.1.11.dist-info/entry_points.txt,sha256=OGMZDFltg3yMboT7XjJt3joiPhRfV_7jnREVtrAIQNU,51
49
- easylink-0.1.11.dist-info/top_level.txt,sha256=oHcOpcF_jDMWFiJRzfGQvuskENGDjSPC_Agu9Z_Xvik,9
50
- easylink-0.1.11.dist-info/RECORD,,
46
+ easylink-0.1.13.dist-info/METADATA,sha256=ooL68LseA1cN5X2wLOB_uIFgXIjW0PCINie7aMHw6t0,2805
47
+ easylink-0.1.13.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
48
+ easylink-0.1.13.dist-info/entry_points.txt,sha256=OGMZDFltg3yMboT7XjJt3joiPhRfV_7jnREVtrAIQNU,51
49
+ easylink-0.1.13.dist-info/top_level.txt,sha256=oHcOpcF_jDMWFiJRzfGQvuskENGDjSPC_Agu9Z_Xvik,9
50
+ easylink-0.1.13.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (78.1.0)
2
+ Generator: setuptools (79.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5