easylink 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- easylink/_version.py +1 -1
- easylink/graph_components.py +7 -3
- easylink/pipeline_schema.py +7 -7
- easylink/pipeline_schema_constants/__init__.py +11 -0
- easylink/pipeline_schema_constants/development.py +143 -135
- easylink/pipeline_schema_constants/testing.py +7 -3
- easylink/step.py +391 -353
- easylink/utilities/__init__.py +3 -2
- easylink/utilities/aggregator_utils.py +1 -0
- easylink/utilities/data_utils.py +98 -5
- easylink/utilities/general_utils.py +48 -10
- easylink/utilities/paths.py +9 -3
- easylink/utilities/splitter_utils.py +1 -0
- easylink/utilities/validation_utils.py +29 -0
- {easylink-0.1.7.dist-info → easylink-0.1.9.dist-info}/METADATA +1 -1
- {easylink-0.1.7.dist-info → easylink-0.1.9.dist-info}/RECORD +19 -19
- {easylink-0.1.7.dist-info → easylink-0.1.9.dist-info}/WHEEL +1 -1
- {easylink-0.1.7.dist-info → easylink-0.1.9.dist-info}/entry_points.txt +0 -0
- {easylink-0.1.7.dist-info → easylink-0.1.9.dist-info}/top_level.txt +0 -0
easylink/_version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.1.
|
1
|
+
__version__ = "0.1.9"
|
easylink/graph_components.py
CHANGED
@@ -259,9 +259,13 @@ class SlotMapping(ABC):
|
|
259
259
|
"""A mapping between a slot on a parent node and a slot on one of its child nodes.
|
260
260
|
|
261
261
|
``SlotMapping`` is an interface intended to be used by concrete :class:`InputSlotMapping`
|
262
|
-
and :class:`OutputSlotMapping` classes
|
263
|
-
|
264
|
-
|
262
|
+
and :class:`OutputSlotMapping` classes to represent a mapping between parent
|
263
|
+
and child nodes at different levels of a potentially-nested graph. Specifically,
|
264
|
+
they are used to (1) remap edges between parent and child nodes in a
|
265
|
+
:class:`~easylink.pipeline_schema.PipelineSchema` and (2) map a leaf
|
266
|
+
:class:`Step's<easylink.step.Step>` slots to the corresponding
|
267
|
+
:class:`~easylink.implementation.Implementation` slots when building the
|
268
|
+
:class:`~easylink.graph_components.ImplementationGraph`.
|
265
269
|
|
266
270
|
Notes
|
267
271
|
-----
|
easylink/pipeline_schema.py
CHANGED
@@ -22,8 +22,8 @@ class PipelineSchema(HierarchicalStep):
|
|
22
22
|
"""All possible pipelines that are fully supported.
|
23
23
|
|
24
24
|
A ``PipelineSchema`` is a :class:`~easylink.step.HierarchicalStep` whose
|
25
|
-
:class:`~easylink.graph_components.StepGraph` determines all possible allowable
|
26
|
-
The fundamental purpose of this class is to validate that the user-requested
|
25
|
+
:class:`~easylink.graph_components.StepGraph` determines all possible allowable
|
26
|
+
pipelines. The fundamental purpose of this class is to validate that the user-requested
|
27
27
|
pipeline to run conforms to a fully supported pipeline.
|
28
28
|
|
29
29
|
See :class:`~easylink.step.HierarchicalStep` for inherited attributes.
|
@@ -80,9 +80,9 @@ class PipelineSchema(HierarchicalStep):
|
|
80
80
|
the user-provided pipeline specification file.
|
81
81
|
"""
|
82
82
|
return super().validate_step(
|
83
|
-
|
84
|
-
|
85
|
-
input_data_config
|
83
|
+
LayeredConfigTree({"substeps": pipeline_config.steps.to_dict()}),
|
84
|
+
pipeline_config.combined_implementations,
|
85
|
+
input_data_config,
|
86
86
|
)
|
87
87
|
|
88
88
|
def validate_inputs(self, input_data: dict[str, Path]) -> dict[str, list[str]]:
|
@@ -133,8 +133,8 @@ class PipelineSchema(HierarchicalStep):
|
|
133
133
|
"""
|
134
134
|
self._configuration_state = NonLeafConfigurationState(
|
135
135
|
self,
|
136
|
-
pipeline_config
|
137
|
-
combined_implementations=pipeline_config
|
136
|
+
pipeline_config.steps,
|
137
|
+
combined_implementations=pipeline_config.combined_implementations,
|
138
138
|
input_data_config=input_data_config,
|
139
139
|
)
|
140
140
|
|
@@ -1,3 +1,14 @@
|
|
1
|
+
"""
|
2
|
+
=========================
|
3
|
+
Pipeline Schema Constants
|
4
|
+
=========================
|
5
|
+
|
6
|
+
An EasyLink :class:`~easylink.pipeline_schema.PipelineSchema` is a collection of
|
7
|
+
:class:`Steps<easylink.step.Step>` that define a fully supported pipeline. This
|
8
|
+
package defines the nodes and edges required to instantiate such ``PipelineSchemas``.
|
9
|
+
|
10
|
+
"""
|
11
|
+
|
1
12
|
from easylink.pipeline_schema_constants import development, testing
|
2
13
|
|
3
14
|
ALLOWED_SCHEMA_PARAMS = {
|
@@ -1,7 +1,13 @@
|
|
1
1
|
"""
|
2
|
-
|
3
|
-
Development Pipeline
|
4
|
-
|
2
|
+
=====================================
|
3
|
+
Development Pipeline Schema Constants
|
4
|
+
=====================================
|
5
|
+
|
6
|
+
This module contains the parameters required to instantiate the
|
7
|
+
:class:`~easylink.pipeline_schema.PipelineSchema` for the so-called "development"
|
8
|
+
pipeline, i.e. the pipeline used strictly for development purposes as opposed to
|
9
|
+
real entity resolution since it relies on dummy steps, data, and containers.
|
10
|
+
|
5
11
|
"""
|
6
12
|
|
7
13
|
from easylink.graph_components import (
|
@@ -53,21 +59,23 @@ NODES = [
|
|
53
59
|
),
|
54
60
|
LoopStep(
|
55
61
|
template_step=EmbarrassinglyParallelStep(
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
62
|
+
step=Step(
|
63
|
+
step_name="step_3",
|
64
|
+
input_slots=[
|
65
|
+
InputSlot(
|
66
|
+
name="step_3_main_input",
|
67
|
+
env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
|
68
|
+
validator=validate_input_file_dummy,
|
69
|
+
splitter=split_data_by_size,
|
70
|
+
),
|
71
|
+
],
|
72
|
+
output_slots=[
|
73
|
+
OutputSlot(
|
74
|
+
name="step_3_main_output",
|
75
|
+
aggregator=concatenate_datasets,
|
76
|
+
),
|
77
|
+
],
|
78
|
+
),
|
71
79
|
),
|
72
80
|
self_edges=[
|
73
81
|
EdgeParams(
|
@@ -95,91 +103,88 @@ NODES = [
|
|
95
103
|
output_slots=[OutputSlot("choice_section_main_output")],
|
96
104
|
choices={
|
97
105
|
"simple": {
|
98
|
-
"
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
),
|
181
|
-
],
|
182
|
-
"edges": [],
|
106
|
+
"step": HierarchicalStep(
|
107
|
+
step_name="step_4",
|
108
|
+
input_slots=[
|
109
|
+
InputSlot(
|
110
|
+
name="step_4_main_input",
|
111
|
+
env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
|
112
|
+
validator=validate_input_file_dummy,
|
113
|
+
),
|
114
|
+
InputSlot(
|
115
|
+
name="step_4_secondary_input",
|
116
|
+
env_var="DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS",
|
117
|
+
validator=validate_input_file_dummy,
|
118
|
+
),
|
119
|
+
],
|
120
|
+
output_slots=[OutputSlot("step_4_main_output")],
|
121
|
+
nodes=[
|
122
|
+
Step(
|
123
|
+
step_name="step_4a",
|
124
|
+
input_slots=[
|
125
|
+
InputSlot(
|
126
|
+
name="step_4a_main_input",
|
127
|
+
env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
|
128
|
+
validator=validate_input_file_dummy,
|
129
|
+
),
|
130
|
+
InputSlot(
|
131
|
+
name="step_4a_secondary_input",
|
132
|
+
env_var="DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS",
|
133
|
+
validator=validate_input_file_dummy,
|
134
|
+
),
|
135
|
+
],
|
136
|
+
output_slots=[OutputSlot("step_4a_main_output")],
|
137
|
+
),
|
138
|
+
Step(
|
139
|
+
step_name="step_4b",
|
140
|
+
input_slots=[
|
141
|
+
InputSlot(
|
142
|
+
name="step_4b_main_input",
|
143
|
+
env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
|
144
|
+
validator=validate_input_file_dummy,
|
145
|
+
),
|
146
|
+
InputSlot(
|
147
|
+
name="step_4b_secondary_input",
|
148
|
+
env_var="DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS",
|
149
|
+
validator=validate_input_file_dummy,
|
150
|
+
),
|
151
|
+
],
|
152
|
+
output_slots=[OutputSlot("step_4b_main_output")],
|
153
|
+
),
|
154
|
+
],
|
155
|
+
edges=[
|
156
|
+
EdgeParams(
|
157
|
+
source_node="step_4a",
|
158
|
+
target_node="step_4b",
|
159
|
+
output_slot="step_4a_main_output",
|
160
|
+
input_slot="step_4b_main_input",
|
161
|
+
),
|
162
|
+
],
|
163
|
+
input_slot_mappings=[
|
164
|
+
InputSlotMapping(
|
165
|
+
parent_slot="step_4_main_input",
|
166
|
+
child_node="step_4a",
|
167
|
+
child_slot="step_4a_main_input",
|
168
|
+
),
|
169
|
+
InputSlotMapping(
|
170
|
+
parent_slot="step_4_secondary_input",
|
171
|
+
child_node="step_4a",
|
172
|
+
child_slot="step_4a_secondary_input",
|
173
|
+
),
|
174
|
+
InputSlotMapping(
|
175
|
+
parent_slot="step_4_secondary_input",
|
176
|
+
child_node="step_4b",
|
177
|
+
child_slot="step_4b_secondary_input",
|
178
|
+
),
|
179
|
+
],
|
180
|
+
output_slot_mappings=[
|
181
|
+
OutputSlotMapping(
|
182
|
+
parent_slot="step_4_main_output",
|
183
|
+
child_node="step_4b",
|
184
|
+
child_slot="step_4b_main_output",
|
185
|
+
),
|
186
|
+
],
|
187
|
+
),
|
183
188
|
"input_slot_mappings": [
|
184
189
|
InputSlotMapping(
|
185
190
|
parent_slot="choice_section_main_input",
|
@@ -201,38 +206,41 @@ NODES = [
|
|
201
206
|
],
|
202
207
|
},
|
203
208
|
"complex": {
|
204
|
-
"
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
209
|
+
"step": HierarchicalStep(
|
210
|
+
step_name="step_5_and_6",
|
211
|
+
nodes=[
|
212
|
+
Step(
|
213
|
+
step_name="step_5",
|
214
|
+
input_slots=[
|
215
|
+
InputSlot(
|
216
|
+
name="step_5_main_input",
|
217
|
+
env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
|
218
|
+
validator=validate_input_file_dummy,
|
219
|
+
),
|
220
|
+
],
|
221
|
+
output_slots=[OutputSlot("step_5_main_output")],
|
222
|
+
),
|
223
|
+
Step(
|
224
|
+
step_name="step_6",
|
225
|
+
input_slots=[
|
226
|
+
InputSlot(
|
227
|
+
name="step_6_main_input",
|
228
|
+
env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
|
229
|
+
validator=validate_input_file_dummy,
|
230
|
+
),
|
231
|
+
],
|
232
|
+
output_slots=[OutputSlot("step_6_main_output")],
|
233
|
+
),
|
234
|
+
],
|
235
|
+
edges=[
|
236
|
+
EdgeParams(
|
237
|
+
source_node="step_5",
|
238
|
+
target_node="step_6",
|
239
|
+
output_slot="step_5_main_output",
|
240
|
+
input_slot="step_6_main_input",
|
241
|
+
),
|
242
|
+
],
|
243
|
+
),
|
236
244
|
"input_slot_mappings": [
|
237
245
|
InputSlotMapping(
|
238
246
|
parent_slot="choice_section_main_input",
|
@@ -1,7 +1,11 @@
|
|
1
1
|
"""
|
2
|
-
|
3
|
-
|
4
|
-
|
2
|
+
=================================
|
3
|
+
Testing Pipeline Schema Constants
|
4
|
+
=================================
|
5
|
+
|
6
|
+
This module contains the parameters required to instantiate various
|
7
|
+
:class:`~easylink.pipeline_schema.PipelineSchema` used strictly for testing purposes.
|
8
|
+
|
5
9
|
"""
|
6
10
|
|
7
11
|
from easylink.graph_components import (
|