easylink 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,13 @@
1
1
  """
2
- ============================
3
- Development Pipeline Schemas
4
- ============================
2
+ =====================================
3
+ Development Pipeline Schema Constants
4
+ =====================================
5
+
6
+ This module contains the parameters required to instantiate the
7
+ :class:`~easylink.pipeline_schema.PipelineSchema` for the so-called "development"
8
+ pipeline, i.e. the pipeline used strictly for development purposes as opposed to
9
+ real entity resolution since it relies on dummy steps, data, and containers.
10
+
5
11
  """
6
12
 
7
13
  from easylink.graph_components import (
@@ -13,6 +19,7 @@ from easylink.graph_components import (
13
19
  )
14
20
  from easylink.step import (
15
21
  ChoiceStep,
22
+ EmbarrassinglyParallelStep,
16
23
  HierarchicalStep,
17
24
  InputStep,
18
25
  LoopStep,
@@ -20,6 +27,8 @@ from easylink.step import (
20
27
  ParallelStep,
21
28
  Step,
22
29
  )
30
+ from easylink.utilities.aggregator_utils import concatenate_datasets
31
+ from easylink.utilities.splitter_utils import split_data_by_size
23
32
  from easylink.utilities.validation_utils import validate_input_file_dummy
24
33
 
25
34
  NODES = [
@@ -49,16 +58,22 @@ NODES = [
49
58
  output_slots=[OutputSlot("step_2_main_output")],
50
59
  ),
51
60
  LoopStep(
52
- template_step=Step(
61
+ template_step=EmbarrassinglyParallelStep(
53
62
  step_name="step_3",
54
63
  input_slots=[
55
64
  InputSlot(
56
65
  name="step_3_main_input",
57
66
  env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
58
67
  validator=validate_input_file_dummy,
68
+ splitter=split_data_by_size,
69
+ ),
70
+ ],
71
+ output_slots=[
72
+ OutputSlot(
73
+ name="step_3_main_output",
74
+ aggregator=concatenate_datasets,
59
75
  ),
60
76
  ],
61
- output_slots=[OutputSlot("step_3_main_output")],
62
77
  ),
63
78
  self_edges=[
64
79
  EdgeParams(
@@ -86,91 +101,88 @@ NODES = [
86
101
  output_slots=[OutputSlot("choice_section_main_output")],
87
102
  choices={
88
103
  "simple": {
89
- "nodes": [
90
- HierarchicalStep(
91
- step_name="step_4",
92
- input_slots=[
93
- InputSlot(
94
- name="step_4_main_input",
95
- env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
96
- validator=validate_input_file_dummy,
97
- ),
98
- InputSlot(
99
- name="step_4_secondary_input",
100
- env_var="DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS",
101
- validator=validate_input_file_dummy,
102
- ),
103
- ],
104
- output_slots=[OutputSlot("step_4_main_output")],
105
- nodes=[
106
- Step(
107
- step_name="step_4a",
108
- input_slots=[
109
- InputSlot(
110
- name="step_4a_main_input",
111
- env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
112
- validator=validate_input_file_dummy,
113
- ),
114
- InputSlot(
115
- name="step_4a_secondary_input",
116
- env_var="DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS",
117
- validator=validate_input_file_dummy,
118
- ),
119
- ],
120
- output_slots=[OutputSlot("step_4a_main_output")],
121
- ),
122
- Step(
123
- step_name="step_4b",
124
- input_slots=[
125
- InputSlot(
126
- name="step_4b_main_input",
127
- env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
128
- validator=validate_input_file_dummy,
129
- ),
130
- InputSlot(
131
- name="step_4b_secondary_input",
132
- env_var="DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS",
133
- validator=validate_input_file_dummy,
134
- ),
135
- ],
136
- output_slots=[OutputSlot("step_4b_main_output")],
137
- ),
138
- ],
139
- edges=[
140
- EdgeParams(
141
- source_node="step_4a",
142
- target_node="step_4b",
143
- output_slot="step_4a_main_output",
144
- input_slot="step_4b_main_input",
145
- ),
146
- ],
147
- input_slot_mappings=[
148
- InputSlotMapping(
149
- parent_slot="step_4_main_input",
150
- child_node="step_4a",
151
- child_slot="step_4a_main_input",
152
- ),
153
- InputSlotMapping(
154
- parent_slot="step_4_secondary_input",
155
- child_node="step_4a",
156
- child_slot="step_4a_secondary_input",
157
- ),
158
- InputSlotMapping(
159
- parent_slot="step_4_secondary_input",
160
- child_node="step_4b",
161
- child_slot="step_4b_secondary_input",
162
- ),
163
- ],
164
- output_slot_mappings=[
165
- OutputSlotMapping(
166
- parent_slot="step_4_main_output",
167
- child_node="step_4b",
168
- child_slot="step_4b_main_output",
169
- ),
170
- ],
171
- ),
172
- ],
173
- "edges": [],
104
+ "step": HierarchicalStep(
105
+ step_name="step_4",
106
+ input_slots=[
107
+ InputSlot(
108
+ name="step_4_main_input",
109
+ env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
110
+ validator=validate_input_file_dummy,
111
+ ),
112
+ InputSlot(
113
+ name="step_4_secondary_input",
114
+ env_var="DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS",
115
+ validator=validate_input_file_dummy,
116
+ ),
117
+ ],
118
+ output_slots=[OutputSlot("step_4_main_output")],
119
+ nodes=[
120
+ Step(
121
+ step_name="step_4a",
122
+ input_slots=[
123
+ InputSlot(
124
+ name="step_4a_main_input",
125
+ env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
126
+ validator=validate_input_file_dummy,
127
+ ),
128
+ InputSlot(
129
+ name="step_4a_secondary_input",
130
+ env_var="DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS",
131
+ validator=validate_input_file_dummy,
132
+ ),
133
+ ],
134
+ output_slots=[OutputSlot("step_4a_main_output")],
135
+ ),
136
+ Step(
137
+ step_name="step_4b",
138
+ input_slots=[
139
+ InputSlot(
140
+ name="step_4b_main_input",
141
+ env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
142
+ validator=validate_input_file_dummy,
143
+ ),
144
+ InputSlot(
145
+ name="step_4b_secondary_input",
146
+ env_var="DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS",
147
+ validator=validate_input_file_dummy,
148
+ ),
149
+ ],
150
+ output_slots=[OutputSlot("step_4b_main_output")],
151
+ ),
152
+ ],
153
+ edges=[
154
+ EdgeParams(
155
+ source_node="step_4a",
156
+ target_node="step_4b",
157
+ output_slot="step_4a_main_output",
158
+ input_slot="step_4b_main_input",
159
+ ),
160
+ ],
161
+ input_slot_mappings=[
162
+ InputSlotMapping(
163
+ parent_slot="step_4_main_input",
164
+ child_node="step_4a",
165
+ child_slot="step_4a_main_input",
166
+ ),
167
+ InputSlotMapping(
168
+ parent_slot="step_4_secondary_input",
169
+ child_node="step_4a",
170
+ child_slot="step_4a_secondary_input",
171
+ ),
172
+ InputSlotMapping(
173
+ parent_slot="step_4_secondary_input",
174
+ child_node="step_4b",
175
+ child_slot="step_4b_secondary_input",
176
+ ),
177
+ ],
178
+ output_slot_mappings=[
179
+ OutputSlotMapping(
180
+ parent_slot="step_4_main_output",
181
+ child_node="step_4b",
182
+ child_slot="step_4b_main_output",
183
+ ),
184
+ ],
185
+ ),
174
186
  "input_slot_mappings": [
175
187
  InputSlotMapping(
176
188
  parent_slot="choice_section_main_input",
@@ -192,38 +204,41 @@ NODES = [
192
204
  ],
193
205
  },
194
206
  "complex": {
195
- "nodes": [
196
- Step(
197
- step_name="step_5",
198
- input_slots=[
199
- InputSlot(
200
- name="step_5_main_input",
201
- env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
202
- validator=validate_input_file_dummy,
203
- ),
204
- ],
205
- output_slots=[OutputSlot("step_5_main_output")],
206
- ),
207
- Step(
208
- step_name="step_6",
209
- input_slots=[
210
- InputSlot(
211
- name="step_6_main_input",
212
- env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
213
- validator=validate_input_file_dummy,
214
- ),
215
- ],
216
- output_slots=[OutputSlot("step_6_main_output")],
217
- ),
218
- ],
219
- "edges": [
220
- EdgeParams(
221
- source_node="step_5",
222
- target_node="step_6",
223
- output_slot="step_5_main_output",
224
- input_slot="step_6_main_input",
225
- ),
226
- ],
207
+ "step": HierarchicalStep(
208
+ step_name="step_5_and_6",
209
+ nodes=[
210
+ Step(
211
+ step_name="step_5",
212
+ input_slots=[
213
+ InputSlot(
214
+ name="step_5_main_input",
215
+ env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
216
+ validator=validate_input_file_dummy,
217
+ ),
218
+ ],
219
+ output_slots=[OutputSlot("step_5_main_output")],
220
+ ),
221
+ Step(
222
+ step_name="step_6",
223
+ input_slots=[
224
+ InputSlot(
225
+ name="step_6_main_input",
226
+ env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
227
+ validator=validate_input_file_dummy,
228
+ ),
229
+ ],
230
+ output_slots=[OutputSlot("step_6_main_output")],
231
+ ),
232
+ ],
233
+ edges=[
234
+ EdgeParams(
235
+ source_node="step_5",
236
+ target_node="step_6",
237
+ output_slot="step_5_main_output",
238
+ input_slot="step_6_main_input",
239
+ ),
240
+ ],
241
+ ),
227
242
  "input_slot_mappings": [
228
243
  InputSlotMapping(
229
244
  parent_slot="choice_section_main_input",
@@ -1,7 +1,11 @@
1
1
  """
2
- =====================
3
- Test Pipeline Schemas
4
- =====================
2
+ =================================
3
+ Testing Pipeline Schema Constants
4
+ =================================
5
+
6
+ This module contains the parameters required to instantiate various
7
+ :class:`~easylink.pipeline_schema.PipelineSchema` used strictly for testing purposes.
8
+
5
9
  """
6
10
 
7
11
  from easylink.graph_components import (
@@ -57,6 +61,76 @@ SINGLE_STEP_EDGES = [
57
61
 
58
62
  SINGLE_STEP_SCHEMA_PARAMS = (SINGLE_STEP_NODES, SINGLE_STEP_EDGES)
59
63
 
64
+ TRIPLE_STEP_NODES = [
65
+ InputStep(),
66
+ Step(
67
+ step_name="step_1",
68
+ input_slots=[
69
+ InputSlot(
70
+ name="step_1_main_input",
71
+ env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
72
+ validator=validate_input_file_dummy,
73
+ )
74
+ ],
75
+ output_slots=[OutputSlot("step_1_main_output")],
76
+ ),
77
+ Step(
78
+ step_name="step_2",
79
+ input_slots=[
80
+ InputSlot(
81
+ name="step_2_main_input",
82
+ env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
83
+ validator=validate_input_file_dummy,
84
+ )
85
+ ],
86
+ output_slots=[OutputSlot("step_2_main_output")],
87
+ ),
88
+ Step(
89
+ step_name="step_3",
90
+ input_slots=[
91
+ InputSlot(
92
+ name="step_3_main_input",
93
+ env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
94
+ validator=validate_input_file_dummy,
95
+ )
96
+ ],
97
+ output_slots=[OutputSlot("step_3_main_output")],
98
+ ),
99
+ OutputStep(
100
+ input_slots=[
101
+ InputSlot(name="result", env_var=None, validator=validate_input_file_dummy)
102
+ ],
103
+ ),
104
+ ]
105
+ TRIPLE_STEP_EDGES = [
106
+ EdgeParams(
107
+ source_node="input_data",
108
+ target_node="step_1",
109
+ output_slot="all",
110
+ input_slot="step_1_main_input",
111
+ ),
112
+ EdgeParams(
113
+ source_node="step_1",
114
+ target_node="step_2",
115
+ output_slot="step_1_main_output",
116
+ input_slot="step_2_main_input",
117
+ ),
118
+ EdgeParams(
119
+ source_node="step_2",
120
+ target_node="step_3",
121
+ output_slot="step_2_main_output",
122
+ input_slot="step_3_main_input",
123
+ ),
124
+ EdgeParams(
125
+ source_node="step_3",
126
+ target_node="results",
127
+ output_slot="step_3_main_output",
128
+ input_slot="result",
129
+ ),
130
+ ]
131
+
132
+ TRIPLE_STEP_SCHEMA_PARAMS = (TRIPLE_STEP_NODES, TRIPLE_STEP_EDGES)
133
+
60
134
 
61
135
  BAD_COMBINED_TOPOLOGY_NODES = [
62
136
  InputStep(),
@@ -217,3 +291,68 @@ NESTED_TEMPLATED_STEPS_NODES = [
217
291
 
218
292
 
219
293
  NESTED_TEMPLATED_STEPS_SCHEMA_PARAMS = (NESTED_TEMPLATED_STEPS_NODES, SINGLE_STEP_EDGES)
294
+
295
+
296
+ COMBINE_WITH_ITERATION_NODES = [
297
+ InputStep(),
298
+ LoopStep(
299
+ template_step=Step(
300
+ step_name="step_1",
301
+ input_slots=[
302
+ InputSlot(
303
+ name="step_1_main_input",
304
+ env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
305
+ validator=validate_input_file_dummy,
306
+ )
307
+ ],
308
+ output_slots=[OutputSlot("step_1_main_output")],
309
+ ),
310
+ self_edges=[
311
+ EdgeParams(
312
+ source_node="step_1",
313
+ target_node="step_1",
314
+ output_slot="step_1_main_output",
315
+ input_slot="step_1_main_input",
316
+ ),
317
+ ],
318
+ ),
319
+ Step(
320
+ step_name="step_2",
321
+ input_slots=[
322
+ InputSlot(
323
+ name="step_2_main_input",
324
+ env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
325
+ validator=validate_input_file_dummy,
326
+ )
327
+ ],
328
+ output_slots=[OutputSlot("step_2_main_output")],
329
+ ),
330
+ OutputStep(
331
+ input_slots=[
332
+ InputSlot(name="result", env_var=None, validator=validate_input_file_dummy)
333
+ ],
334
+ ),
335
+ ]
336
+ DOUBLE_STEP_EDGES = [
337
+ EdgeParams(
338
+ source_node="input_data",
339
+ target_node="step_1",
340
+ output_slot="all",
341
+ input_slot="step_1_main_input",
342
+ ),
343
+ EdgeParams(
344
+ source_node="step_1",
345
+ target_node="step_2",
346
+ output_slot="step_1_main_output",
347
+ input_slot="step_2_main_input",
348
+ ),
349
+ EdgeParams(
350
+ source_node="step_2",
351
+ target_node="results",
352
+ output_slot="step_2_main_output",
353
+ input_slot="result",
354
+ ),
355
+ ]
356
+
357
+
358
+ COMBINE_WITH_ITERATION_SCHEMA_PARAMS = (COMBINE_WITH_ITERATION_NODES, DOUBLE_STEP_EDGES)