easylink 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- easylink/_version.py +1 -1
- easylink/graph_components.py +55 -6
- easylink/pipeline_schema_constants/development.py +17 -15
- easylink/step.py +12 -6
- {easylink-0.1.8.dist-info → easylink-0.1.10.dist-info}/METADATA +2 -2
- {easylink-0.1.8.dist-info → easylink-0.1.10.dist-info}/RECORD +9 -9
- {easylink-0.1.8.dist-info → easylink-0.1.10.dist-info}/WHEEL +1 -1
- {easylink-0.1.8.dist-info → easylink-0.1.10.dist-info}/entry_points.txt +0 -0
- {easylink-0.1.8.dist-info → easylink-0.1.10.dist-info}/top_level.txt +0 -0
easylink/_version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.1.
|
1
|
+
__version__ = "0.1.10"
|
easylink/graph_components.py
CHANGED
@@ -12,7 +12,8 @@ from __future__ import annotations
|
|
12
12
|
|
13
13
|
from abc import ABC, abstractmethod
|
14
14
|
from collections.abc import Callable
|
15
|
-
from dataclasses import dataclass
|
15
|
+
from dataclasses import dataclass, field
|
16
|
+
from types import NotImplementedType
|
16
17
|
from typing import TYPE_CHECKING, Any
|
17
18
|
|
18
19
|
import networkx as nx
|
@@ -22,7 +23,7 @@ if TYPE_CHECKING:
|
|
22
23
|
from easylink.step import Step
|
23
24
|
|
24
25
|
|
25
|
-
@dataclass(
|
26
|
+
@dataclass()
|
26
27
|
class InputSlot:
|
27
28
|
"""A single input slot to a specific node.
|
28
29
|
|
@@ -41,20 +42,48 @@ class InputSlot:
|
|
41
42
|
env_var: str | None
|
42
43
|
"""The environment variable that is used to pass a list of data filepaths to
|
43
44
|
an ``Implementation``."""
|
44
|
-
validator: Callable[[str], None]
|
45
|
+
validator: Callable[[str], None] = field(compare=False)
|
45
46
|
"""A function that validates the input data being passed into the pipeline via
|
46
47
|
this ``InputSlot``. If the data is invalid, the function should raise an exception
|
47
48
|
with a descriptive error message which will then be reported to the user.
|
48
49
|
**Note that the function *must* be defined in the** :mod:`easylink.utilities.validation_utils`
|
49
50
|
**module!**"""
|
50
|
-
splitter: Callable[[list[str], str, Any], None] | None =
|
51
|
+
splitter: Callable[[list[str], str, Any], None] | None = field(
|
52
|
+
default=None, compare=False
|
53
|
+
)
|
51
54
|
"""A function that splits the incoming data to this ``InputSlot`` into smaller
|
52
55
|
pieces. The primary purpose of this functionality is to run sections of the
|
53
56
|
pipeline in an embarrassingly parallel manner. **Note that the function *must*
|
54
57
|
be defined in the **:mod:`easylink.utilities.splitter_utils`** module!**"""
|
55
58
|
|
59
|
+
def __eq__(self, other: Any) -> bool | NotImplementedType:
|
60
|
+
"""Checks if two ``InputSlots`` are equal.
|
56
61
|
|
57
|
-
|
62
|
+
Two ``InputSlots`` are considered equal if their names, ``env_vars``, and
|
63
|
+
names of their ``validators`` and ``splitters`` are all the same.
|
64
|
+
"""
|
65
|
+
if not isinstance(other, InputSlot):
|
66
|
+
return NotImplemented
|
67
|
+
splitter_name = self.splitter.__name__ if self.splitter else None
|
68
|
+
other_splitter_name = other.splitter.__name__ if other.splitter else None
|
69
|
+
return (
|
70
|
+
self.name == other.name
|
71
|
+
and self.env_var == other.env_var
|
72
|
+
and self.validator.__name__ == other.validator.__name__
|
73
|
+
and splitter_name == other_splitter_name
|
74
|
+
)
|
75
|
+
|
76
|
+
def __hash__(self) -> int:
|
77
|
+
"""Hashes an ``InputSlot``.
|
78
|
+
|
79
|
+
The hash is based on the name of the ``InputSlot``, its ``env_var``, and
|
80
|
+
the names of its ``validator`` and ``splitter``.
|
81
|
+
"""
|
82
|
+
splitter_name = self.splitter.__name__ if self.splitter else None
|
83
|
+
return hash((self.name, self.env_var, self.validator.__name__, splitter_name))
|
84
|
+
|
85
|
+
|
86
|
+
@dataclass()
|
58
87
|
class OutputSlot:
|
59
88
|
"""A single output slot from a specific node.
|
60
89
|
|
@@ -75,12 +104,32 @@ class OutputSlot:
|
|
75
104
|
|
76
105
|
name: str
|
77
106
|
"""The name of the ``OutputSlot``."""
|
78
|
-
aggregator: Callable[[list[str], str], None] = None
|
107
|
+
aggregator: Callable[[list[str], str], None] = field(default=None, compare=False)
|
79
108
|
"""A function that aggregates all of the generated data to be passed out via this
|
80
109
|
``OutputSlot``. The primary purpose of this functionality is to run sections
|
81
110
|
of the pipeline in an embarrassingly parallel manner. **Note that the function
|
82
111
|
*must* be defined in the **:py:mod:`easylink.utilities.aggregator_utils`** module!**"""
|
83
112
|
|
113
|
+
def __eq__(self, other: Any) -> bool | NotImplementedType:
|
114
|
+
"""Checks if two ``OutputSlots`` are equal.
|
115
|
+
|
116
|
+
Two ``OutputSlots`` are considered equal if their names and the names of their
|
117
|
+
``aggregators`` are the same.
|
118
|
+
"""
|
119
|
+
if not isinstance(other, OutputSlot):
|
120
|
+
return NotImplemented
|
121
|
+
aggregator_name = self.aggregator.__name__ if self.aggregator else None
|
122
|
+
other_aggregator_name = other.aggregator.__name__ if other.aggregator else None
|
123
|
+
return self.name == other.name and aggregator_name == other_aggregator_name
|
124
|
+
|
125
|
+
def __hash__(self) -> int:
|
126
|
+
"""Hashes an ``OutputSlot``.
|
127
|
+
|
128
|
+
The hash is based on the name of the ``OutputSlot`` and the name of its ``aggregator``.
|
129
|
+
"""
|
130
|
+
aggregator_name = self.aggregator.__name__ if self.aggregator else None
|
131
|
+
return hash((self.name, aggregator_name))
|
132
|
+
|
84
133
|
|
85
134
|
@dataclass(frozen=True)
|
86
135
|
class EdgeParams:
|
@@ -59,21 +59,23 @@ NODES = [
|
|
59
59
|
),
|
60
60
|
LoopStep(
|
61
61
|
template_step=EmbarrassinglyParallelStep(
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
62
|
+
step=Step(
|
63
|
+
step_name="step_3",
|
64
|
+
input_slots=[
|
65
|
+
InputSlot(
|
66
|
+
name="step_3_main_input",
|
67
|
+
env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
|
68
|
+
validator=validate_input_file_dummy,
|
69
|
+
splitter=split_data_by_size,
|
70
|
+
),
|
71
|
+
],
|
72
|
+
output_slots=[
|
73
|
+
OutputSlot(
|
74
|
+
name="step_3_main_output",
|
75
|
+
aggregator=concatenate_datasets,
|
76
|
+
),
|
77
|
+
],
|
78
|
+
),
|
77
79
|
),
|
78
80
|
self_edges=[
|
79
81
|
EdgeParams(
|
easylink/step.py
CHANGED
@@ -1138,19 +1138,25 @@ class EmbarrassinglyParallelStep(Step):
|
|
1138
1138
|
|
1139
1139
|
An ``EmbarrassinglyParallelStep`` is different than a :class:`ParallelStep`
|
1140
1140
|
in that it is not configured by the user to be run in parallel - it completely
|
1141
|
-
happens on the back end for performance reasons.
|
1142
|
-
from :class:`Step` instead of :class:`TemplatedStep`.
|
1141
|
+
happens on the back end for performance reasons.
|
1143
1142
|
|
1144
1143
|
See :class:`Step` for inherited attributes.
|
1144
|
+
|
1145
|
+
Parameters
|
1146
|
+
----------
|
1147
|
+
step
|
1148
|
+
The ``Step`` to be run in an embarrassingly parallel manner. To run multiple
|
1149
|
+
steps in parallel, use a :class:`HierarchicalStep`.
|
1150
|
+
|
1145
1151
|
"""
|
1146
1152
|
|
1147
1153
|
def __init__(
|
1148
1154
|
self,
|
1149
|
-
|
1150
|
-
input_slots: Iterable[InputSlot],
|
1151
|
-
output_slots: Iterable[OutputSlot],
|
1155
|
+
step: Step,
|
1152
1156
|
) -> None:
|
1153
|
-
super().__init__(
|
1157
|
+
super().__init__(
|
1158
|
+
step.step_name, step.name, step.input_slots.values(), step.output_slots.values()
|
1159
|
+
)
|
1154
1160
|
self._validate()
|
1155
1161
|
|
1156
1162
|
def _validate(self) -> None:
|
@@ -1,9 +1,9 @@
|
|
1
1
|
easylink/__about__.py,sha256=2-oxCfu9t9yUJouLDwqYRZ0eii8kN25SxRzsawjWjho,440
|
2
2
|
easylink/__init__.py,sha256=gGMcIVfiVnHtlDw5mZwhevcDb2wt-kuP6F64gnkFack,159
|
3
|
-
easylink/_version.py,sha256=
|
3
|
+
easylink/_version.py,sha256=z0zCHFTcKSR0tJ6h5qrpNmRVP21QIPP8N0p7quCnnm0,23
|
4
4
|
easylink/cli.py,sha256=ARSKAljepNOEYd1VCS_QqBJQIBLzE3IgKiOb5-OROdY,6380
|
5
5
|
easylink/configuration.py,sha256=Ire2pMZNZ6wtSwhcWnQpYa-snX4KrhXgovlQwQ2Wxf4,12530
|
6
|
-
easylink/graph_components.py,sha256=
|
6
|
+
easylink/graph_components.py,sha256=PhMKxpgZjorhubS7vcta1pgXgXSGplmPulQpV0YZhqo,14811
|
7
7
|
easylink/implementation.py,sha256=AwGl5YCKCSQo91owWj-gg9_5lBz7H_4q2z7jF0BhXs4,8992
|
8
8
|
easylink/implementation_metadata.yaml,sha256=VvlEu3Dvlmeh1MpzeYx91j22GiV-9mu3hZP5yVuW04o,6763
|
9
9
|
easylink/pipeline.py,sha256=EyCXv5p9WzTqcndXK6ukBJE6jY_fWIP_DGZQUl1wRcY,12284
|
@@ -11,11 +11,11 @@ easylink/pipeline_graph.py,sha256=vsY6nW_iEwZCNf_N_3CsixsKBUy_5JxGEi61-1Q-KAw,22
|
|
11
11
|
easylink/pipeline_schema.py,sha256=kINpvy2Fl2S3FBqgdgZCCFHEk237_36X4ltLOtk5-dE,5862
|
12
12
|
easylink/rule.py,sha256=W97LMI-vkEPipJbnSZLn2BxfYfFtvzGTKzq6YgDVri0,19913
|
13
13
|
easylink/runner.py,sha256=k9ICTToHj2xr6MGIuvlWf6YMeZ47UGgseaMByMgUGac,6271
|
14
|
-
easylink/step.py,sha256=
|
14
|
+
easylink/step.py,sha256=8EhoFOXBLWgDfb3OhmQu5g03fqElIJCWg8-Y_5azKEA,67100
|
15
15
|
easylink/images/spark_cluster/Dockerfile,sha256=3PHotbR4jdjVYRHOJ0VQW55b5Qd4tQ1pLLQMrTKWVA0,576
|
16
16
|
easylink/images/spark_cluster/README.md,sha256=KdgSttZRplNNWqHn4K1GTsTIab3dTOSG4V99QPLxSp8,569
|
17
17
|
easylink/pipeline_schema_constants/__init__.py,sha256=uRVjQw7_Ff5IBQw0_Jc93Fzfa-MnbPVPKsy18CCaW7E,1021
|
18
|
-
easylink/pipeline_schema_constants/development.py,sha256=
|
18
|
+
easylink/pipeline_schema_constants/development.py,sha256=kOTEqfZD5pWqP9gu7E6r9Cubf3ILtWEUxCfJfrN8znc,11547
|
19
19
|
easylink/pipeline_schema_constants/testing.py,sha256=ohcTlT_viZYxS1GkO46mjkb8IzXo6yIOqvBbb4YrOhA,10897
|
20
20
|
easylink/steps/dev/README.md,sha256=u9dZUggpY2Lf2qb-xkDLWWgHjcmi4osbQtzSNo4uklE,4549
|
21
21
|
easylink/steps/dev/build-containers-local.sh,sha256=Wy3pfcyt7I-BNvHcr7ZXDe0g5Ihd00BIPqt9YuRbLeA,259
|
@@ -43,8 +43,8 @@ easylink/utilities/paths.py,sha256=KM1GlnsAcKbUJrC4LZKpeJfPljxe_aXP1ZhVp43TYRA,9
|
|
43
43
|
easylink/utilities/spark.smk,sha256=tQ7RArNQzhjbaBQQcRORB4IxxkuDx4gPHUBcWHDYJ_U,5795
|
44
44
|
easylink/utilities/splitter_utils.py,sha256=y4CbbTBgRaoXFxy-9Eu5eWx4lA4ZEcbrYpxgLIzG_kc,2602
|
45
45
|
easylink/utilities/validation_utils.py,sha256=W9r_RXcivJjfpioLhONirfwdByYttxNsVY489_sbrYQ,1683
|
46
|
-
easylink-0.1.
|
47
|
-
easylink-0.1.
|
48
|
-
easylink-0.1.
|
49
|
-
easylink-0.1.
|
50
|
-
easylink-0.1.
|
46
|
+
easylink-0.1.10.dist-info/METADATA,sha256=wPKznMaCPytiFoECbqY0jcFra-Hl28FMQJLo4XQcwdo,2805
|
47
|
+
easylink-0.1.10.dist-info/WHEEL,sha256=DK49LOLCYiurdXXOXwGJm6U4DkHkg4lcxjhqwRa0CP4,91
|
48
|
+
easylink-0.1.10.dist-info/entry_points.txt,sha256=OGMZDFltg3yMboT7XjJt3joiPhRfV_7jnREVtrAIQNU,51
|
49
|
+
easylink-0.1.10.dist-info/top_level.txt,sha256=oHcOpcF_jDMWFiJRzfGQvuskENGDjSPC_Agu9Z_Xvik,9
|
50
|
+
easylink-0.1.10.dist-info/RECORD,,
|
File without changes
|
File without changes
|