easylink 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
easylink/_version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.8"
1
+ __version__ = "0.1.10"
@@ -12,7 +12,8 @@ from __future__ import annotations
12
12
 
13
13
  from abc import ABC, abstractmethod
14
14
  from collections.abc import Callable
15
- from dataclasses import dataclass
15
+ from dataclasses import dataclass, field
16
+ from types import NotImplementedType
16
17
  from typing import TYPE_CHECKING, Any
17
18
 
18
19
  import networkx as nx
@@ -22,7 +23,7 @@ if TYPE_CHECKING:
22
23
  from easylink.step import Step
23
24
 
24
25
 
25
- @dataclass(frozen=True)
26
+ @dataclass()
26
27
  class InputSlot:
27
28
  """A single input slot to a specific node.
28
29
 
@@ -41,20 +42,48 @@ class InputSlot:
41
42
  env_var: str | None
42
43
  """The environment variable that is used to pass a list of data filepaths to
43
44
  an ``Implementation``."""
44
- validator: Callable[[str], None]
45
+ validator: Callable[[str], None] = field(compare=False)
45
46
  """A function that validates the input data being passed into the pipeline via
46
47
  this ``InputSlot``. If the data is invalid, the function should raise an exception
47
48
  with a descriptive error message which will then be reported to the user.
48
49
  **Note that the function *must* be defined in the** :mod:`easylink.utilities.validation_utils`
49
50
  **module!**"""
50
- splitter: Callable[[list[str], str, Any], None] | None = None
51
+ splitter: Callable[[list[str], str, Any], None] | None = field(
52
+ default=None, compare=False
53
+ )
51
54
  """A function that splits the incoming data to this ``InputSlot`` into smaller
52
55
  pieces. The primary purpose of this functionality is to run sections of the
53
56
  pipeline in an embarrassingly parallel manner. **Note that the function *must*
54
57
  be defined in the **:mod:`easylink.utilities.splitter_utils`** module!**"""
55
58
 
59
+ def __eq__(self, other: Any) -> bool | NotImplementedType:
60
+ """Checks if two ``InputSlots`` are equal.
56
61
 
57
- @dataclass(frozen=True)
62
+ Two ``InputSlots`` are considered equal if their names, ``env_vars``, and
63
+ names of their ``validators`` and ``splitters`` are all the same.
64
+ """
65
+ if not isinstance(other, InputSlot):
66
+ return NotImplemented
67
+ splitter_name = self.splitter.__name__ if self.splitter else None
68
+ other_splitter_name = other.splitter.__name__ if other.splitter else None
69
+ return (
70
+ self.name == other.name
71
+ and self.env_var == other.env_var
72
+ and self.validator.__name__ == other.validator.__name__
73
+ and splitter_name == other_splitter_name
74
+ )
75
+
76
+ def __hash__(self) -> int:
77
+ """Hashes an ``InputSlot``.
78
+
79
+ The hash is based on the name of the ``InputSlot``, its ``env_var``, and
80
+ the names of its ``validator`` and ``splitter``.
81
+ """
82
+ splitter_name = self.splitter.__name__ if self.splitter else None
83
+ return hash((self.name, self.env_var, self.validator.__name__, splitter_name))
84
+
85
+
86
+ @dataclass()
58
87
  class OutputSlot:
59
88
  """A single output slot from a specific node.
60
89
 
@@ -75,12 +104,32 @@ class OutputSlot:
75
104
 
76
105
  name: str
77
106
  """The name of the ``OutputSlot``."""
78
- aggregator: Callable[[list[str], str], None] = None
107
+ aggregator: Callable[[list[str], str], None] = field(default=None, compare=False)
79
108
  """A function that aggregates all of the generated data to be passed out via this
80
109
  ``OutputSlot``. The primary purpose of this functionality is to run sections
81
110
  of the pipeline in an embarrassingly parallel manner. **Note that the function
82
111
  *must* be defined in the **:py:mod:`easylink.utilities.aggregator_utils`** module!**"""
83
112
 
113
+ def __eq__(self, other: Any) -> bool | NotImplementedType:
114
+ """Checks if two ``OutputSlots`` are equal.
115
+
116
+ Two ``OutputSlots`` are considered equal if their names and the names of their
117
+ ``aggregators`` are the same.
118
+ """
119
+ if not isinstance(other, OutputSlot):
120
+ return NotImplemented
121
+ aggregator_name = self.aggregator.__name__ if self.aggregator else None
122
+ other_aggregator_name = other.aggregator.__name__ if other.aggregator else None
123
+ return self.name == other.name and aggregator_name == other_aggregator_name
124
+
125
+ def __hash__(self) -> int:
126
+ """Hashes an ``OutputSlot``.
127
+
128
+ The hash is based on the name of the ``OutputSlot`` and the name of its ``aggregator``.
129
+ """
130
+ aggregator_name = self.aggregator.__name__ if self.aggregator else None
131
+ return hash((self.name, aggregator_name))
132
+
84
133
 
85
134
  @dataclass(frozen=True)
86
135
  class EdgeParams:
@@ -59,21 +59,23 @@ NODES = [
59
59
  ),
60
60
  LoopStep(
61
61
  template_step=EmbarrassinglyParallelStep(
62
- step_name="step_3",
63
- input_slots=[
64
- InputSlot(
65
- name="step_3_main_input",
66
- env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
67
- validator=validate_input_file_dummy,
68
- splitter=split_data_by_size,
69
- ),
70
- ],
71
- output_slots=[
72
- OutputSlot(
73
- name="step_3_main_output",
74
- aggregator=concatenate_datasets,
75
- ),
76
- ],
62
+ step=Step(
63
+ step_name="step_3",
64
+ input_slots=[
65
+ InputSlot(
66
+ name="step_3_main_input",
67
+ env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
68
+ validator=validate_input_file_dummy,
69
+ splitter=split_data_by_size,
70
+ ),
71
+ ],
72
+ output_slots=[
73
+ OutputSlot(
74
+ name="step_3_main_output",
75
+ aggregator=concatenate_datasets,
76
+ ),
77
+ ],
78
+ ),
77
79
  ),
78
80
  self_edges=[
79
81
  EdgeParams(
easylink/step.py CHANGED
@@ -1138,19 +1138,25 @@ class EmbarrassinglyParallelStep(Step):
1138
1138
 
1139
1139
  An ``EmbarrassinglyParallelStep`` is different than a :class:`ParallelStep`
1140
1140
  in that it is not configured by the user to be run in parallel - it completely
1141
- happens on the back end for performance reasons. As such, note that it inherits
1142
- from :class:`Step` instead of :class:`TemplatedStep`.
1141
+ happens on the back end for performance reasons.
1143
1142
 
1144
1143
  See :class:`Step` for inherited attributes.
1144
+
1145
+ Parameters
1146
+ ----------
1147
+ step
1148
+ The ``Step`` to be run in an embarrassingly parallel manner. To run multiple
1149
+ steps in parallel, use a :class:`HierarchicalStep`.
1150
+
1145
1151
  """
1146
1152
 
1147
1153
  def __init__(
1148
1154
  self,
1149
- step_name: str,
1150
- input_slots: Iterable[InputSlot],
1151
- output_slots: Iterable[OutputSlot],
1155
+ step: Step,
1152
1156
  ) -> None:
1153
- super().__init__(step_name, input_slots=input_slots, output_slots=output_slots)
1157
+ super().__init__(
1158
+ step.step_name, step.name, step.input_slots.values(), step.output_slots.values()
1159
+ )
1154
1160
  self._validate()
1155
1161
 
1156
1162
  def _validate(self) -> None:
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: easylink
3
- Version: 0.1.8
3
+ Version: 0.1.10
4
4
  Summary: Research repository for the EasyLink ER ecosystem project.
5
5
  Home-page: https://github.com/ihmeuw/easylink
6
6
  Author: The EasyLink developers
@@ -1,9 +1,9 @@
1
1
  easylink/__about__.py,sha256=2-oxCfu9t9yUJouLDwqYRZ0eii8kN25SxRzsawjWjho,440
2
2
  easylink/__init__.py,sha256=gGMcIVfiVnHtlDw5mZwhevcDb2wt-kuP6F64gnkFack,159
3
- easylink/_version.py,sha256=C69ADlbQREQlR15trneyA2sk8x0-oH4rDAX5fsv19_U,22
3
+ easylink/_version.py,sha256=z0zCHFTcKSR0tJ6h5qrpNmRVP21QIPP8N0p7quCnnm0,23
4
4
  easylink/cli.py,sha256=ARSKAljepNOEYd1VCS_QqBJQIBLzE3IgKiOb5-OROdY,6380
5
5
  easylink/configuration.py,sha256=Ire2pMZNZ6wtSwhcWnQpYa-snX4KrhXgovlQwQ2Wxf4,12530
6
- easylink/graph_components.py,sha256=6OipaUkCW2ESBW6bxwZVgpRAX8RuL15m4x_mGE7i4R8,12669
6
+ easylink/graph_components.py,sha256=PhMKxpgZjorhubS7vcta1pgXgXSGplmPulQpV0YZhqo,14811
7
7
  easylink/implementation.py,sha256=AwGl5YCKCSQo91owWj-gg9_5lBz7H_4q2z7jF0BhXs4,8992
8
8
  easylink/implementation_metadata.yaml,sha256=VvlEu3Dvlmeh1MpzeYx91j22GiV-9mu3hZP5yVuW04o,6763
9
9
  easylink/pipeline.py,sha256=EyCXv5p9WzTqcndXK6ukBJE6jY_fWIP_DGZQUl1wRcY,12284
@@ -11,11 +11,11 @@ easylink/pipeline_graph.py,sha256=vsY6nW_iEwZCNf_N_3CsixsKBUy_5JxGEi61-1Q-KAw,22
11
11
  easylink/pipeline_schema.py,sha256=kINpvy2Fl2S3FBqgdgZCCFHEk237_36X4ltLOtk5-dE,5862
12
12
  easylink/rule.py,sha256=W97LMI-vkEPipJbnSZLn2BxfYfFtvzGTKzq6YgDVri0,19913
13
13
  easylink/runner.py,sha256=k9ICTToHj2xr6MGIuvlWf6YMeZ47UGgseaMByMgUGac,6271
14
- easylink/step.py,sha256=gIDrfgo6SIDCqJfY_2h6ShiYMn0f5qwdTVgVaWuOa1k,67052
14
+ easylink/step.py,sha256=8EhoFOXBLWgDfb3OhmQu5g03fqElIJCWg8-Y_5azKEA,67100
15
15
  easylink/images/spark_cluster/Dockerfile,sha256=3PHotbR4jdjVYRHOJ0VQW55b5Qd4tQ1pLLQMrTKWVA0,576
16
16
  easylink/images/spark_cluster/README.md,sha256=KdgSttZRplNNWqHn4K1GTsTIab3dTOSG4V99QPLxSp8,569
17
17
  easylink/pipeline_schema_constants/__init__.py,sha256=uRVjQw7_Ff5IBQw0_Jc93Fzfa-MnbPVPKsy18CCaW7E,1021
18
- easylink/pipeline_schema_constants/development.py,sha256=k7GSUvbxyk9gynSfS-IFzCusXfQLBbIaxDfxiwQpQxM,11449
18
+ easylink/pipeline_schema_constants/development.py,sha256=kOTEqfZD5pWqP9gu7E6r9Cubf3ILtWEUxCfJfrN8znc,11547
19
19
  easylink/pipeline_schema_constants/testing.py,sha256=ohcTlT_viZYxS1GkO46mjkb8IzXo6yIOqvBbb4YrOhA,10897
20
20
  easylink/steps/dev/README.md,sha256=u9dZUggpY2Lf2qb-xkDLWWgHjcmi4osbQtzSNo4uklE,4549
21
21
  easylink/steps/dev/build-containers-local.sh,sha256=Wy3pfcyt7I-BNvHcr7ZXDe0g5Ihd00BIPqt9YuRbLeA,259
@@ -43,8 +43,8 @@ easylink/utilities/paths.py,sha256=KM1GlnsAcKbUJrC4LZKpeJfPljxe_aXP1ZhVp43TYRA,9
43
43
  easylink/utilities/spark.smk,sha256=tQ7RArNQzhjbaBQQcRORB4IxxkuDx4gPHUBcWHDYJ_U,5795
44
44
  easylink/utilities/splitter_utils.py,sha256=y4CbbTBgRaoXFxy-9Eu5eWx4lA4ZEcbrYpxgLIzG_kc,2602
45
45
  easylink/utilities/validation_utils.py,sha256=W9r_RXcivJjfpioLhONirfwdByYttxNsVY489_sbrYQ,1683
46
- easylink-0.1.8.dist-info/METADATA,sha256=enI0WFt--1KZ6BDFxZ0diVH2YiHZABnSwxirpf64PSA,2804
47
- easylink-0.1.8.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
48
- easylink-0.1.8.dist-info/entry_points.txt,sha256=OGMZDFltg3yMboT7XjJt3joiPhRfV_7jnREVtrAIQNU,51
49
- easylink-0.1.8.dist-info/top_level.txt,sha256=oHcOpcF_jDMWFiJRzfGQvuskENGDjSPC_Agu9Z_Xvik,9
50
- easylink-0.1.8.dist-info/RECORD,,
46
+ easylink-0.1.10.dist-info/METADATA,sha256=wPKznMaCPytiFoECbqY0jcFra-Hl28FMQJLo4XQcwdo,2805
47
+ easylink-0.1.10.dist-info/WHEEL,sha256=DK49LOLCYiurdXXOXwGJm6U4DkHkg4lcxjhqwRa0CP4,91
48
+ easylink-0.1.10.dist-info/entry_points.txt,sha256=OGMZDFltg3yMboT7XjJt3joiPhRfV_7jnREVtrAIQNU,51
49
+ easylink-0.1.10.dist-info/top_level.txt,sha256=oHcOpcF_jDMWFiJRzfGQvuskENGDjSPC_Agu9Z_Xvik,9
50
+ easylink-0.1.10.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (76.0.0)
2
+ Generator: setuptools (78.0.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5