easylink 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- easylink/_version.py +1 -1
- easylink/cli.py +18 -9
- easylink/graph_components.py +19 -5
- easylink/implementation.py +2 -0
- easylink/pipeline.py +92 -34
- easylink/pipeline_graph.py +112 -27
- easylink/pipeline_schema.py +7 -7
- easylink/pipeline_schema_constants/__init__.py +14 -0
- easylink/pipeline_schema_constants/development.py +137 -122
- easylink/pipeline_schema_constants/testing.py +142 -3
- easylink/rule.py +282 -22
- easylink/runner.py +1 -0
- easylink/step.py +442 -345
- easylink/utilities/__init__.py +3 -2
- easylink/utilities/aggregator_utils.py +32 -0
- easylink/utilities/data_utils.py +99 -5
- easylink/utilities/general_utils.py +49 -10
- easylink/utilities/paths.py +9 -3
- easylink/utilities/splitter_utils.py +72 -0
- easylink/utilities/validation_utils.py +29 -0
- {easylink-0.1.6.dist-info → easylink-0.1.8.dist-info}/METADATA +1 -1
- {easylink-0.1.6.dist-info → easylink-0.1.8.dist-info}/RECORD +25 -23
- {easylink-0.1.6.dist-info → easylink-0.1.8.dist-info}/WHEEL +1 -1
- {easylink-0.1.6.dist-info → easylink-0.1.8.dist-info}/entry_points.txt +0 -0
- {easylink-0.1.6.dist-info → easylink-0.1.8.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,13 @@
|
|
1
1
|
"""
|
2
|
-
|
3
|
-
Development Pipeline
|
4
|
-
|
2
|
+
=====================================
|
3
|
+
Development Pipeline Schema Constants
|
4
|
+
=====================================
|
5
|
+
|
6
|
+
This module contains the parameters required to instantiate the
|
7
|
+
:class:`~easylink.pipeline_schema.PipelineSchema` for the so-called "development"
|
8
|
+
pipeline, i.e. the pipeline used strictly for development purposes as opposed to
|
9
|
+
real entity resolution since it relies on dummy steps, data, and containers.
|
10
|
+
|
5
11
|
"""
|
6
12
|
|
7
13
|
from easylink.graph_components import (
|
@@ -13,6 +19,7 @@ from easylink.graph_components import (
|
|
13
19
|
)
|
14
20
|
from easylink.step import (
|
15
21
|
ChoiceStep,
|
22
|
+
EmbarrassinglyParallelStep,
|
16
23
|
HierarchicalStep,
|
17
24
|
InputStep,
|
18
25
|
LoopStep,
|
@@ -20,6 +27,8 @@ from easylink.step import (
|
|
20
27
|
ParallelStep,
|
21
28
|
Step,
|
22
29
|
)
|
30
|
+
from easylink.utilities.aggregator_utils import concatenate_datasets
|
31
|
+
from easylink.utilities.splitter_utils import split_data_by_size
|
23
32
|
from easylink.utilities.validation_utils import validate_input_file_dummy
|
24
33
|
|
25
34
|
NODES = [
|
@@ -49,16 +58,22 @@ NODES = [
|
|
49
58
|
output_slots=[OutputSlot("step_2_main_output")],
|
50
59
|
),
|
51
60
|
LoopStep(
|
52
|
-
template_step=
|
61
|
+
template_step=EmbarrassinglyParallelStep(
|
53
62
|
step_name="step_3",
|
54
63
|
input_slots=[
|
55
64
|
InputSlot(
|
56
65
|
name="step_3_main_input",
|
57
66
|
env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
|
58
67
|
validator=validate_input_file_dummy,
|
68
|
+
splitter=split_data_by_size,
|
69
|
+
),
|
70
|
+
],
|
71
|
+
output_slots=[
|
72
|
+
OutputSlot(
|
73
|
+
name="step_3_main_output",
|
74
|
+
aggregator=concatenate_datasets,
|
59
75
|
),
|
60
76
|
],
|
61
|
-
output_slots=[OutputSlot("step_3_main_output")],
|
62
77
|
),
|
63
78
|
self_edges=[
|
64
79
|
EdgeParams(
|
@@ -86,91 +101,88 @@ NODES = [
|
|
86
101
|
output_slots=[OutputSlot("choice_section_main_output")],
|
87
102
|
choices={
|
88
103
|
"simple": {
|
89
|
-
"
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
),
|
172
|
-
],
|
173
|
-
"edges": [],
|
104
|
+
"step": HierarchicalStep(
|
105
|
+
step_name="step_4",
|
106
|
+
input_slots=[
|
107
|
+
InputSlot(
|
108
|
+
name="step_4_main_input",
|
109
|
+
env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
|
110
|
+
validator=validate_input_file_dummy,
|
111
|
+
),
|
112
|
+
InputSlot(
|
113
|
+
name="step_4_secondary_input",
|
114
|
+
env_var="DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS",
|
115
|
+
validator=validate_input_file_dummy,
|
116
|
+
),
|
117
|
+
],
|
118
|
+
output_slots=[OutputSlot("step_4_main_output")],
|
119
|
+
nodes=[
|
120
|
+
Step(
|
121
|
+
step_name="step_4a",
|
122
|
+
input_slots=[
|
123
|
+
InputSlot(
|
124
|
+
name="step_4a_main_input",
|
125
|
+
env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
|
126
|
+
validator=validate_input_file_dummy,
|
127
|
+
),
|
128
|
+
InputSlot(
|
129
|
+
name="step_4a_secondary_input",
|
130
|
+
env_var="DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS",
|
131
|
+
validator=validate_input_file_dummy,
|
132
|
+
),
|
133
|
+
],
|
134
|
+
output_slots=[OutputSlot("step_4a_main_output")],
|
135
|
+
),
|
136
|
+
Step(
|
137
|
+
step_name="step_4b",
|
138
|
+
input_slots=[
|
139
|
+
InputSlot(
|
140
|
+
name="step_4b_main_input",
|
141
|
+
env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
|
142
|
+
validator=validate_input_file_dummy,
|
143
|
+
),
|
144
|
+
InputSlot(
|
145
|
+
name="step_4b_secondary_input",
|
146
|
+
env_var="DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS",
|
147
|
+
validator=validate_input_file_dummy,
|
148
|
+
),
|
149
|
+
],
|
150
|
+
output_slots=[OutputSlot("step_4b_main_output")],
|
151
|
+
),
|
152
|
+
],
|
153
|
+
edges=[
|
154
|
+
EdgeParams(
|
155
|
+
source_node="step_4a",
|
156
|
+
target_node="step_4b",
|
157
|
+
output_slot="step_4a_main_output",
|
158
|
+
input_slot="step_4b_main_input",
|
159
|
+
),
|
160
|
+
],
|
161
|
+
input_slot_mappings=[
|
162
|
+
InputSlotMapping(
|
163
|
+
parent_slot="step_4_main_input",
|
164
|
+
child_node="step_4a",
|
165
|
+
child_slot="step_4a_main_input",
|
166
|
+
),
|
167
|
+
InputSlotMapping(
|
168
|
+
parent_slot="step_4_secondary_input",
|
169
|
+
child_node="step_4a",
|
170
|
+
child_slot="step_4a_secondary_input",
|
171
|
+
),
|
172
|
+
InputSlotMapping(
|
173
|
+
parent_slot="step_4_secondary_input",
|
174
|
+
child_node="step_4b",
|
175
|
+
child_slot="step_4b_secondary_input",
|
176
|
+
),
|
177
|
+
],
|
178
|
+
output_slot_mappings=[
|
179
|
+
OutputSlotMapping(
|
180
|
+
parent_slot="step_4_main_output",
|
181
|
+
child_node="step_4b",
|
182
|
+
child_slot="step_4b_main_output",
|
183
|
+
),
|
184
|
+
],
|
185
|
+
),
|
174
186
|
"input_slot_mappings": [
|
175
187
|
InputSlotMapping(
|
176
188
|
parent_slot="choice_section_main_input",
|
@@ -192,38 +204,41 @@ NODES = [
|
|
192
204
|
],
|
193
205
|
},
|
194
206
|
"complex": {
|
195
|
-
"
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
207
|
+
"step": HierarchicalStep(
|
208
|
+
step_name="step_5_and_6",
|
209
|
+
nodes=[
|
210
|
+
Step(
|
211
|
+
step_name="step_5",
|
212
|
+
input_slots=[
|
213
|
+
InputSlot(
|
214
|
+
name="step_5_main_input",
|
215
|
+
env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
|
216
|
+
validator=validate_input_file_dummy,
|
217
|
+
),
|
218
|
+
],
|
219
|
+
output_slots=[OutputSlot("step_5_main_output")],
|
220
|
+
),
|
221
|
+
Step(
|
222
|
+
step_name="step_6",
|
223
|
+
input_slots=[
|
224
|
+
InputSlot(
|
225
|
+
name="step_6_main_input",
|
226
|
+
env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
|
227
|
+
validator=validate_input_file_dummy,
|
228
|
+
),
|
229
|
+
],
|
230
|
+
output_slots=[OutputSlot("step_6_main_output")],
|
231
|
+
),
|
232
|
+
],
|
233
|
+
edges=[
|
234
|
+
EdgeParams(
|
235
|
+
source_node="step_5",
|
236
|
+
target_node="step_6",
|
237
|
+
output_slot="step_5_main_output",
|
238
|
+
input_slot="step_6_main_input",
|
239
|
+
),
|
240
|
+
],
|
241
|
+
),
|
227
242
|
"input_slot_mappings": [
|
228
243
|
InputSlotMapping(
|
229
244
|
parent_slot="choice_section_main_input",
|
@@ -1,7 +1,11 @@
|
|
1
1
|
"""
|
2
|
-
|
3
|
-
|
4
|
-
|
2
|
+
=================================
|
3
|
+
Testing Pipeline Schema Constants
|
4
|
+
=================================
|
5
|
+
|
6
|
+
This module contains the parameters required to instantiate various
|
7
|
+
:class:`~easylink.pipeline_schema.PipelineSchema` used strictly for testing purposes.
|
8
|
+
|
5
9
|
"""
|
6
10
|
|
7
11
|
from easylink.graph_components import (
|
@@ -57,6 +61,76 @@ SINGLE_STEP_EDGES = [
|
|
57
61
|
|
58
62
|
SINGLE_STEP_SCHEMA_PARAMS = (SINGLE_STEP_NODES, SINGLE_STEP_EDGES)
|
59
63
|
|
64
|
+
TRIPLE_STEP_NODES = [
|
65
|
+
InputStep(),
|
66
|
+
Step(
|
67
|
+
step_name="step_1",
|
68
|
+
input_slots=[
|
69
|
+
InputSlot(
|
70
|
+
name="step_1_main_input",
|
71
|
+
env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
|
72
|
+
validator=validate_input_file_dummy,
|
73
|
+
)
|
74
|
+
],
|
75
|
+
output_slots=[OutputSlot("step_1_main_output")],
|
76
|
+
),
|
77
|
+
Step(
|
78
|
+
step_name="step_2",
|
79
|
+
input_slots=[
|
80
|
+
InputSlot(
|
81
|
+
name="step_2_main_input",
|
82
|
+
env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
|
83
|
+
validator=validate_input_file_dummy,
|
84
|
+
)
|
85
|
+
],
|
86
|
+
output_slots=[OutputSlot("step_2_main_output")],
|
87
|
+
),
|
88
|
+
Step(
|
89
|
+
step_name="step_3",
|
90
|
+
input_slots=[
|
91
|
+
InputSlot(
|
92
|
+
name="step_3_main_input",
|
93
|
+
env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
|
94
|
+
validator=validate_input_file_dummy,
|
95
|
+
)
|
96
|
+
],
|
97
|
+
output_slots=[OutputSlot("step_3_main_output")],
|
98
|
+
),
|
99
|
+
OutputStep(
|
100
|
+
input_slots=[
|
101
|
+
InputSlot(name="result", env_var=None, validator=validate_input_file_dummy)
|
102
|
+
],
|
103
|
+
),
|
104
|
+
]
|
105
|
+
TRIPLE_STEP_EDGES = [
|
106
|
+
EdgeParams(
|
107
|
+
source_node="input_data",
|
108
|
+
target_node="step_1",
|
109
|
+
output_slot="all",
|
110
|
+
input_slot="step_1_main_input",
|
111
|
+
),
|
112
|
+
EdgeParams(
|
113
|
+
source_node="step_1",
|
114
|
+
target_node="step_2",
|
115
|
+
output_slot="step_1_main_output",
|
116
|
+
input_slot="step_2_main_input",
|
117
|
+
),
|
118
|
+
EdgeParams(
|
119
|
+
source_node="step_2",
|
120
|
+
target_node="step_3",
|
121
|
+
output_slot="step_2_main_output",
|
122
|
+
input_slot="step_3_main_input",
|
123
|
+
),
|
124
|
+
EdgeParams(
|
125
|
+
source_node="step_3",
|
126
|
+
target_node="results",
|
127
|
+
output_slot="step_3_main_output",
|
128
|
+
input_slot="result",
|
129
|
+
),
|
130
|
+
]
|
131
|
+
|
132
|
+
TRIPLE_STEP_SCHEMA_PARAMS = (TRIPLE_STEP_NODES, TRIPLE_STEP_EDGES)
|
133
|
+
|
60
134
|
|
61
135
|
BAD_COMBINED_TOPOLOGY_NODES = [
|
62
136
|
InputStep(),
|
@@ -217,3 +291,68 @@ NESTED_TEMPLATED_STEPS_NODES = [
|
|
217
291
|
|
218
292
|
|
219
293
|
NESTED_TEMPLATED_STEPS_SCHEMA_PARAMS = (NESTED_TEMPLATED_STEPS_NODES, SINGLE_STEP_EDGES)
|
294
|
+
|
295
|
+
|
296
|
+
COMBINE_WITH_ITERATION_NODES = [
|
297
|
+
InputStep(),
|
298
|
+
LoopStep(
|
299
|
+
template_step=Step(
|
300
|
+
step_name="step_1",
|
301
|
+
input_slots=[
|
302
|
+
InputSlot(
|
303
|
+
name="step_1_main_input",
|
304
|
+
env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
|
305
|
+
validator=validate_input_file_dummy,
|
306
|
+
)
|
307
|
+
],
|
308
|
+
output_slots=[OutputSlot("step_1_main_output")],
|
309
|
+
),
|
310
|
+
self_edges=[
|
311
|
+
EdgeParams(
|
312
|
+
source_node="step_1",
|
313
|
+
target_node="step_1",
|
314
|
+
output_slot="step_1_main_output",
|
315
|
+
input_slot="step_1_main_input",
|
316
|
+
),
|
317
|
+
],
|
318
|
+
),
|
319
|
+
Step(
|
320
|
+
step_name="step_2",
|
321
|
+
input_slots=[
|
322
|
+
InputSlot(
|
323
|
+
name="step_2_main_input",
|
324
|
+
env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
|
325
|
+
validator=validate_input_file_dummy,
|
326
|
+
)
|
327
|
+
],
|
328
|
+
output_slots=[OutputSlot("step_2_main_output")],
|
329
|
+
),
|
330
|
+
OutputStep(
|
331
|
+
input_slots=[
|
332
|
+
InputSlot(name="result", env_var=None, validator=validate_input_file_dummy)
|
333
|
+
],
|
334
|
+
),
|
335
|
+
]
|
336
|
+
DOUBLE_STEP_EDGES = [
|
337
|
+
EdgeParams(
|
338
|
+
source_node="input_data",
|
339
|
+
target_node="step_1",
|
340
|
+
output_slot="all",
|
341
|
+
input_slot="step_1_main_input",
|
342
|
+
),
|
343
|
+
EdgeParams(
|
344
|
+
source_node="step_1",
|
345
|
+
target_node="step_2",
|
346
|
+
output_slot="step_1_main_output",
|
347
|
+
input_slot="step_2_main_input",
|
348
|
+
),
|
349
|
+
EdgeParams(
|
350
|
+
source_node="step_2",
|
351
|
+
target_node="results",
|
352
|
+
output_slot="step_2_main_output",
|
353
|
+
input_slot="result",
|
354
|
+
),
|
355
|
+
]
|
356
|
+
|
357
|
+
|
358
|
+
COMBINE_WITH_ITERATION_SCHEMA_PARAMS = (COMBINE_WITH_ITERATION_NODES, DOUBLE_STEP_EDGES)
|