easylink 0.1.17__py3-none-any.whl → 0.1.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- easylink/_version.py +1 -1
- easylink/cli.py +24 -3
- easylink/configuration.py +43 -36
- easylink/devtools/implementation_creator.py +71 -22
- easylink/implementation.py +88 -11
- easylink/implementation_metadata.yaml +177 -29
- easylink/pipeline.py +15 -6
- easylink/pipeline_schema.py +12 -13
- easylink/pipeline_schema_constants/__init__.py +4 -5
- easylink/pipeline_schema_constants/main.py +489 -0
- easylink/runner.py +11 -7
- easylink/step.py +89 -0
- easylink/steps/cascading/exclude_clustered.def +22 -0
- easylink/steps/cascading/exclude_clustered.py +76 -0
- easylink/steps/cascading/exclude_none.def +22 -0
- easylink/steps/cascading/exclude_none.py +76 -0
- easylink/steps/cascading/update_clusters_by_connected_components.def +22 -0
- easylink/steps/cascading/update_clusters_by_connected_components.py +101 -0
- easylink/steps/default/default_clusters_to_links.def +22 -0
- easylink/steps/default/default_clusters_to_links.py +91 -0
- easylink/steps/default/default_determining_exclusions.def +22 -0
- easylink/steps/default/default_determining_exclusions.py +81 -0
- easylink/steps/default/default_removing_records.def +22 -0
- easylink/steps/default/default_removing_records.py +59 -0
- easylink/steps/default/default_schema_alignment.def +22 -0
- easylink/steps/default/default_schema_alignment.py +53 -0
- easylink/steps/default/default_updating_clusters.def +22 -0
- easylink/steps/default/default_updating_clusters.py +67 -0
- easylink/steps/fastLink/fastLink_evaluating_pairs.R +136 -0
- easylink/steps/fastLink/fastLink_evaluating_pairs.def +21 -0
- easylink/steps/fastLink/fastLink_links_to_clusters.R +128 -0
- easylink/steps/fastLink/fastLink_links_to_clusters.def +21 -0
- easylink/steps/rl-dummy/canonicalizing_and_downstream_analysis/dummy_canonicalizing_and_downstream_analysis.def +22 -0
- easylink/steps/rl-dummy/canonicalizing_and_downstream_analysis/dummy_canonicalizing_and_downstream_analysis.py +42 -0
- easylink/steps/rl-dummy/input_data/create_input_files.ipynb +1433 -0
- easylink/steps/rl-dummy/input_data/input_file_1.parquet +0 -0
- easylink/steps/rl-dummy/input_data/input_file_2.parquet +0 -0
- easylink/steps/rl-dummy/input_data/known_clusters.parquet +0 -0
- easylink/steps/rl-dummy/pre-processing/dummy_pre-processing.def +22 -0
- easylink/steps/rl-dummy/pre-processing/dummy_pre-processing.py +59 -0
- easylink/steps/splink/splink_blocking_and_filtering.def +22 -0
- easylink/steps/splink/splink_blocking_and_filtering.py +130 -0
- easylink/steps/splink/splink_evaluating_pairs.def +22 -0
- easylink/steps/splink/splink_evaluating_pairs.py +164 -0
- easylink/steps/splink/splink_links_to_clusters.def +22 -0
- easylink/steps/splink/splink_links_to_clusters.py +63 -0
- easylink/utilities/data_utils.py +72 -0
- easylink/utilities/paths.py +4 -3
- easylink/utilities/validation_utils.py +509 -11
- {easylink-0.1.17.dist-info → easylink-0.1.19.dist-info}/METADATA +5 -1
- easylink-0.1.19.dist-info/RECORD +91 -0
- {easylink-0.1.17.dist-info → easylink-0.1.19.dist-info}/WHEEL +1 -1
- easylink-0.1.19.dist-info/licenses/LICENSE +28 -0
- easylink-0.1.17.dist-info/RECORD +0 -55
- {easylink-0.1.17.dist-info → easylink-0.1.19.dist-info}/entry_points.txt +0 -0
- {easylink-0.1.17.dist-info → easylink-0.1.19.dist-info}/top_level.txt +0 -0
@@ -1,14 +1,18 @@
|
|
1
1
|
step_1_python_pandas:
|
2
2
|
steps:
|
3
3
|
- step_1
|
4
|
-
|
4
|
+
image_name: python_pandas.sif
|
5
|
+
zenodo_record_id: 15611084
|
6
|
+
md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
|
5
7
|
script_cmd: python /dummy_step.py
|
6
8
|
outputs:
|
7
9
|
step_1_main_output: result.parquet
|
8
10
|
step_1a_python_pandas:
|
9
11
|
steps:
|
10
12
|
- step_1a
|
11
|
-
|
13
|
+
image_name: python_pandas.sif
|
14
|
+
zenodo_record_id: 15611084
|
15
|
+
md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
|
12
16
|
script_cmd: python /dummy_step.py
|
13
17
|
env:
|
14
18
|
INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
|
@@ -17,7 +21,9 @@ step_1a_python_pandas:
|
|
17
21
|
step_1b_python_pandas:
|
18
22
|
steps:
|
19
23
|
- step_1b
|
20
|
-
|
24
|
+
image_name: python_pandas.sif
|
25
|
+
zenodo_record_id: 15611084
|
26
|
+
md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
|
21
27
|
script_cmd: python /dummy_step.py
|
22
28
|
env:
|
23
29
|
INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
|
@@ -26,21 +32,27 @@ step_1b_python_pandas:
|
|
26
32
|
step_2_python_pandas:
|
27
33
|
steps:
|
28
34
|
- step_2
|
29
|
-
|
35
|
+
image_name: python_pandas.sif
|
36
|
+
zenodo_record_id: 15611084
|
37
|
+
md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
|
30
38
|
script_cmd: python /dummy_step.py
|
31
39
|
outputs:
|
32
40
|
step_2_main_output: result.parquet
|
33
41
|
step_3_python_pandas:
|
34
42
|
steps:
|
35
43
|
- step_3
|
36
|
-
|
44
|
+
image_name: python_pandas.sif
|
45
|
+
zenodo_record_id: 15611084
|
46
|
+
md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
|
37
47
|
script_cmd: python /dummy_step.py
|
38
48
|
outputs:
|
39
49
|
step_3_main_output: result.parquet
|
40
50
|
step_4_python_pandas:
|
41
51
|
steps:
|
42
52
|
- step_4
|
43
|
-
|
53
|
+
image_name: python_pandas.sif
|
54
|
+
zenodo_record_id: 15611084
|
55
|
+
md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
|
44
56
|
script_cmd: python /dummy_step.py
|
45
57
|
env:
|
46
58
|
INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
|
@@ -49,7 +61,9 @@ step_4_python_pandas:
|
|
49
61
|
step_5_python_pandas:
|
50
62
|
steps:
|
51
63
|
- step_5
|
52
|
-
|
64
|
+
image_name: python_pandas.sif
|
65
|
+
zenodo_record_id: 15611084
|
66
|
+
md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
|
53
67
|
script_cmd: python /dummy_step.py
|
54
68
|
env:
|
55
69
|
INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
|
@@ -58,7 +72,9 @@ step_5_python_pandas:
|
|
58
72
|
step_6_python_pandas:
|
59
73
|
steps:
|
60
74
|
- step_6
|
61
|
-
|
75
|
+
image_name: python_pandas.sif
|
76
|
+
zenodo_record_id: 15611084
|
77
|
+
md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
|
62
78
|
script_cmd: python /dummy_step.py
|
63
79
|
env:
|
64
80
|
INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
|
@@ -67,7 +83,9 @@ step_6_python_pandas:
|
|
67
83
|
step_4a_python_pandas:
|
68
84
|
steps:
|
69
85
|
- step_4a
|
70
|
-
|
86
|
+
image_name: python_pandas.sif
|
87
|
+
zenodo_record_id: 15611084
|
88
|
+
md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
|
71
89
|
script_cmd: python /dummy_step.py
|
72
90
|
env:
|
73
91
|
INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
|
@@ -76,7 +94,9 @@ step_4a_python_pandas:
|
|
76
94
|
step_4b_python_pandas:
|
77
95
|
steps:
|
78
96
|
- step_4b
|
79
|
-
|
97
|
+
image_name: python_pandas.sif
|
98
|
+
zenodo_record_id: 15611084
|
99
|
+
md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
|
80
100
|
script_cmd: python /dummy_step.py
|
81
101
|
env:
|
82
102
|
INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
|
@@ -85,7 +105,9 @@ step_4b_python_pandas:
|
|
85
105
|
step_4b_r:
|
86
106
|
steps:
|
87
107
|
- step_4b
|
88
|
-
|
108
|
+
image_name: r-image.sif
|
109
|
+
zenodo_record_id: 15611084
|
110
|
+
md5_checksum: 9410af1317aabc332604cbec33b59d42
|
89
111
|
script_cmd: Rscript /dummy_step.R
|
90
112
|
env:
|
91
113
|
INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
|
@@ -94,7 +116,9 @@ step_4b_r:
|
|
94
116
|
step_1_python_pyspark:
|
95
117
|
steps:
|
96
118
|
- step_1
|
97
|
-
|
119
|
+
image_name: python_pyspark.sif
|
120
|
+
zenodo_record_id: 15611084
|
121
|
+
md5_checksum: 6fb2a2119630138f4db82356b8d78b87
|
98
122
|
script_cmd: python3 /code/dummy_step.py
|
99
123
|
outputs:
|
100
124
|
step_1_main_output: result.parquet
|
@@ -102,7 +126,9 @@ step_1_python_pyspark:
|
|
102
126
|
step_2_python_pyspark:
|
103
127
|
steps:
|
104
128
|
- step_2
|
105
|
-
|
129
|
+
image_name: python_pyspark.sif
|
130
|
+
zenodo_record_id: 15611084
|
131
|
+
md5_checksum: 6fb2a2119630138f4db82356b8d78b87
|
106
132
|
script_cmd: python3 /code/dummy_step.py
|
107
133
|
outputs:
|
108
134
|
step_2_main_output: result.parquet
|
@@ -110,7 +136,9 @@ step_2_python_pyspark:
|
|
110
136
|
step_3_python_pyspark:
|
111
137
|
steps:
|
112
138
|
- step_3
|
113
|
-
|
139
|
+
image_name: python_pyspark.sif
|
140
|
+
zenodo_record_id: 15611084
|
141
|
+
md5_checksum: 6fb2a2119630138f4db82356b8d78b87
|
114
142
|
script_cmd: python3 /code/dummy_step.py
|
115
143
|
outputs:
|
116
144
|
step_3_main_output: result.parquet
|
@@ -118,7 +146,9 @@ step_3_python_pyspark:
|
|
118
146
|
step_4_python_pyspark:
|
119
147
|
steps:
|
120
148
|
- step_4
|
121
|
-
|
149
|
+
image_name: python_pyspark.sif
|
150
|
+
zenodo_record_id: 15611084
|
151
|
+
md5_checksum: 6fb2a2119630138f4db82356b8d78b87
|
122
152
|
script_cmd: python3 /code/dummy_step.py
|
123
153
|
env:
|
124
154
|
INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
|
@@ -127,7 +157,9 @@ step_4_python_pyspark:
|
|
127
157
|
step_1_r:
|
128
158
|
steps:
|
129
159
|
- step_1
|
130
|
-
|
160
|
+
image_name: r-image.sif
|
161
|
+
zenodo_record_id: 15611084
|
162
|
+
md5_checksum: 9410af1317aabc332604cbec33b59d42
|
131
163
|
script_cmd: Rscript /dummy_step.R
|
132
164
|
outputs:
|
133
165
|
step_1_main_output: result.parquet
|
@@ -135,7 +167,9 @@ step_1_r:
|
|
135
167
|
step_2_r:
|
136
168
|
steps:
|
137
169
|
- step_2
|
138
|
-
|
170
|
+
image_name: r-image.sif
|
171
|
+
zenodo_record_id: 15611084
|
172
|
+
md5_checksum: 9410af1317aabc332604cbec33b59d42
|
139
173
|
script_cmd: Rscript /dummy_step.R
|
140
174
|
outputs:
|
141
175
|
step_2_main_output: result.parquet
|
@@ -143,7 +177,9 @@ step_2_r:
|
|
143
177
|
step_3_r:
|
144
178
|
steps:
|
145
179
|
- step_3
|
146
|
-
|
180
|
+
image_name: r-image.sif
|
181
|
+
zenodo_record_id: 15611084
|
182
|
+
md5_checksum: 9410af1317aabc332604cbec33b59d42
|
147
183
|
script_cmd: Rscript /dummy_step.R
|
148
184
|
outputs:
|
149
185
|
step_3_main_output: result.parquet
|
@@ -151,7 +187,9 @@ step_3_r:
|
|
151
187
|
step_4_r:
|
152
188
|
steps:
|
153
189
|
- step_4
|
154
|
-
|
190
|
+
image_name: r-image.sif
|
191
|
+
zenodo_record_id: 15611084
|
192
|
+
md5_checksum: 9410af1317aabc332604cbec33b59d42
|
155
193
|
script_cmd: Rscript /dummy_step.R
|
156
194
|
env:
|
157
195
|
INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
|
@@ -162,7 +200,9 @@ step_1_and_step_2_combined_python_pandas:
|
|
162
200
|
steps:
|
163
201
|
- step_1
|
164
202
|
- step_2
|
165
|
-
|
203
|
+
image_name: python_pandas.sif
|
204
|
+
zenodo_record_id: 15611084
|
205
|
+
md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
|
166
206
|
script_cmd: python /dummy_step.py
|
167
207
|
outputs:
|
168
208
|
step_2_main_output: result.parquet
|
@@ -170,7 +210,9 @@ step_1_and_step_2_parallel_python_pandas:
|
|
170
210
|
steps:
|
171
211
|
- step_1
|
172
212
|
- step_2
|
173
|
-
|
213
|
+
image_name: python_pandas.sif
|
214
|
+
zenodo_record_id: 15611084
|
215
|
+
md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
|
174
216
|
script_cmd: python /dummy_step.py
|
175
217
|
env:
|
176
218
|
INPUT_ENV_VARS: STEP_1_DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,STEP_2_DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS
|
@@ -180,7 +222,9 @@ step_3_and_step_4_combined_python_pandas:
|
|
180
222
|
steps:
|
181
223
|
- step_3
|
182
224
|
- step_4
|
183
|
-
|
225
|
+
image_name: python_pandas.sif
|
226
|
+
zenodo_record_id: 15611084
|
227
|
+
md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
|
184
228
|
script_cmd: python /dummy_step.py
|
185
229
|
outputs:
|
186
230
|
step_4_main_output: result.parquet
|
@@ -188,29 +232,133 @@ step_1a_and_step_1b_combined_python_pandas:
|
|
188
232
|
steps:
|
189
233
|
- step_1a
|
190
234
|
- step_1b
|
191
|
-
|
235
|
+
image_name: python_pandas.sif
|
236
|
+
zenodo_record_id: 15611084
|
237
|
+
md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
|
192
238
|
script_cmd: python /dummy_step.py
|
193
239
|
outputs:
|
194
240
|
step_1_main_output: result.parquet
|
195
241
|
dummy_step_1_for_output_dir_example:
|
196
242
|
steps:
|
197
243
|
- step_1_for_output_dir_example
|
198
|
-
|
244
|
+
image_name: main/dummy_step_1_for_output_dir_example.sif
|
199
245
|
script_cmd: python /dummy_step_1_for_output_dir_example.py
|
200
246
|
outputs:
|
201
247
|
step_1_main_output_directory: output_dir/
|
202
248
|
dummy_step_1_for_output_dir_example_default:
|
203
249
|
steps:
|
204
250
|
- step_1_for_output_dir_example
|
205
|
-
|
251
|
+
image_name: main/dummy_step_1_for_output_dir_example.sif
|
206
252
|
script_cmd: python /dummy_step_1_for_output_dir_example.py
|
207
|
-
# leave outputs out for testing purposes
|
208
|
-
# outputs:
|
209
|
-
# step_1_main_output_directory: output_dir/
|
210
253
|
dummy_step_2_for_output_dir_example:
|
211
254
|
steps:
|
212
255
|
- step_2_for_output_dir_example
|
213
|
-
|
256
|
+
image_name: main/dummy_step_2_for_output_dir_example.sif
|
214
257
|
script_cmd: python /dummy_step_2_for_output_dir_example.py
|
215
258
|
outputs:
|
216
259
|
step_2_main_output: result.parquet
|
260
|
+
default_removing_records:
|
261
|
+
steps:
|
262
|
+
- removing_records
|
263
|
+
image_name: main/default_removing_records.sif
|
264
|
+
script_cmd: python /default_removing_records.py
|
265
|
+
outputs:
|
266
|
+
dataset: dataset
|
267
|
+
default_clusters_to_links:
|
268
|
+
steps:
|
269
|
+
- clusters_to_links
|
270
|
+
image_name: main/default_clusters_to_links.sif
|
271
|
+
script_cmd: python /default_clusters_to_links.py
|
272
|
+
outputs:
|
273
|
+
known_links: result.parquet
|
274
|
+
default_determining_exclusions:
|
275
|
+
steps:
|
276
|
+
- determining_exclusions
|
277
|
+
image_name: main/default_determining_exclusions.sif
|
278
|
+
script_cmd: python /default_determining_exclusions.py
|
279
|
+
outputs:
|
280
|
+
ids_to_remove: result.parquet
|
281
|
+
default_updating_clusters:
|
282
|
+
steps:
|
283
|
+
- updating_clusters
|
284
|
+
image_name: main/default_updating_clusters.sif
|
285
|
+
script_cmd: python /default_updating_clusters.py
|
286
|
+
outputs:
|
287
|
+
clusters: clusters.parquet
|
288
|
+
dummy_canonicalizing_and_downstream_analysis:
|
289
|
+
steps:
|
290
|
+
- canonicalizing_and_downstream_analysis
|
291
|
+
image_name: main/dummy_canonicalizing_and_downstream_analysis.sif
|
292
|
+
script_cmd: python /dummy_canonicalizing_and_downstream_analysis.py
|
293
|
+
outputs:
|
294
|
+
analysis_output: result.parquet
|
295
|
+
dummy_pre-processing:
|
296
|
+
steps:
|
297
|
+
- pre-processing
|
298
|
+
image_name: main/dummy_pre-processing.sif
|
299
|
+
script_cmd: python /dummy_pre-processing.py
|
300
|
+
outputs:
|
301
|
+
dataset: dataset
|
302
|
+
default_schema_alignment:
|
303
|
+
steps:
|
304
|
+
- schema_alignment
|
305
|
+
image_name: main/default_schema_alignment.sif
|
306
|
+
script_cmd: python /default_schema_alignment.py
|
307
|
+
outputs:
|
308
|
+
records: result.parquet
|
309
|
+
splink_blocking_and_filtering:
|
310
|
+
steps:
|
311
|
+
- blocking_and_filtering
|
312
|
+
image_name: main/splink_blocking_and_filtering.sif
|
313
|
+
script_cmd: python /splink_blocking_and_filtering.py
|
314
|
+
outputs:
|
315
|
+
blocks: blocks
|
316
|
+
splink_evaluating_pairs:
|
317
|
+
steps:
|
318
|
+
- evaluating_pairs
|
319
|
+
image_name: main/splink_evaluating_pairs.sif
|
320
|
+
script_cmd: python /splink_evaluating_pairs.py
|
321
|
+
outputs:
|
322
|
+
links: result.parquet
|
323
|
+
splink_links_to_clusters:
|
324
|
+
steps:
|
325
|
+
- links_to_clusters
|
326
|
+
image_name: main/splink_links_to_clusters.sif
|
327
|
+
script_cmd: python /splink_links_to_clusters.py
|
328
|
+
outputs:
|
329
|
+
clusters: result.parquet
|
330
|
+
fastLink_evaluating_pairs:
|
331
|
+
steps:
|
332
|
+
- evaluating_pairs
|
333
|
+
image_name: main/fastLink_evaluating_pairs.sif
|
334
|
+
script_cmd: Rscript /fastLink_evaluating_pairs.R
|
335
|
+
outputs:
|
336
|
+
links: result.parquet
|
337
|
+
fastLink_links_to_clusters:
|
338
|
+
steps:
|
339
|
+
- links_to_clusters
|
340
|
+
image_name: main/fastLink_links_to_clusters.sif
|
341
|
+
script_cmd: Rscript /fastLink_links_to_clusters.R
|
342
|
+
outputs:
|
343
|
+
clusters: result.parquet
|
344
|
+
exclude_clustered:
|
345
|
+
steps:
|
346
|
+
- determining_exclusions
|
347
|
+
image_name: main/exclude_clustered.sif
|
348
|
+
script_cmd: python /exclude_clustered.py
|
349
|
+
outputs:
|
350
|
+
ids_to_remove: result.parquet
|
351
|
+
exclude_none:
|
352
|
+
steps:
|
353
|
+
- determining_exclusions
|
354
|
+
image_name: main/exclude_none.sif
|
355
|
+
script_cmd: python /exclude_none.py
|
356
|
+
outputs:
|
357
|
+
ids_to_remove: result.parquet
|
358
|
+
update_clusters_by_connected_components:
|
359
|
+
steps:
|
360
|
+
- updating_clusters
|
361
|
+
image_name: main/update_clusters_by_connected_components.sif
|
362
|
+
script_cmd: python /update_clusters_by_connected_components.py
|
363
|
+
outputs:
|
364
|
+
clusters: result.parquet
|
easylink/pipeline.py
CHANGED
@@ -25,7 +25,6 @@ from easylink.rule import (
|
|
25
25
|
)
|
26
26
|
from easylink.utilities.general_utils import exit_with_validation_error
|
27
27
|
from easylink.utilities.paths import SPARK_SNAKEFILE
|
28
|
-
from easylink.utilities.validation_utils import validate_input_file_dummy
|
29
28
|
|
30
29
|
IMPLEMENTATION_ERRORS_KEY = "IMPLEMENTATION ERRORS"
|
31
30
|
|
@@ -135,7 +134,10 @@ class Pipeline:
|
|
135
134
|
"""
|
136
135
|
errors = defaultdict(dict)
|
137
136
|
for implementation in self.pipeline_graph.implementations:
|
138
|
-
implementation_errors = implementation.validate(
|
137
|
+
implementation_errors = implementation.validate(
|
138
|
+
skip_image_validation=(self.config.command == "generate_dag"),
|
139
|
+
images_dir=self.config.images_dir,
|
140
|
+
)
|
139
141
|
if implementation_errors:
|
140
142
|
errors[IMPLEMENTATION_ERRORS_KEY][implementation.name] = implementation_errors
|
141
143
|
return errors
|
@@ -206,7 +208,14 @@ wildcard_constraints:
|
|
206
208
|
The input files to the target rule (i.e. the result node) are the final
|
207
209
|
output themselves.
|
208
210
|
"""
|
209
|
-
|
211
|
+
input_slots, _ = self.pipeline_graph.get_io_slot_attributes("results")
|
212
|
+
|
213
|
+
if len(input_slots) != 1:
|
214
|
+
raise ValueError("Results node must have only one input slot")
|
215
|
+
|
216
|
+
input_slot_name = list(input_slots.keys())[0]
|
217
|
+
input_slot_attrs = input_slots[input_slot_name]
|
218
|
+
final_output = input_slot_attrs["filepaths"]
|
210
219
|
validator_file = str("input_validations/final_validator")
|
211
220
|
# Snakemake resolves the DAG based on the first rule, so we put the target
|
212
221
|
# before the validation
|
@@ -217,10 +226,10 @@ wildcard_constraints:
|
|
217
226
|
)
|
218
227
|
final_validation = InputValidationRule(
|
219
228
|
name="results",
|
220
|
-
input_slot_name=
|
229
|
+
input_slot_name=input_slot_name,
|
221
230
|
input=final_output,
|
222
231
|
output=validator_file,
|
223
|
-
validator=
|
232
|
+
validator=input_slot_attrs["validator"],
|
224
233
|
)
|
225
234
|
target_rule.write_to_snakefile(self.snakefile_path)
|
226
235
|
final_validation.write_to_snakefile(self.snakefile_path)
|
@@ -322,7 +331,7 @@ use rule start_spark_worker from spark_cluster with:
|
|
322
331
|
resources=resources,
|
323
332
|
envvars=implementation.environment_variables,
|
324
333
|
diagnostics_dir=str(diagnostics_dir),
|
325
|
-
image_path=implementation.
|
334
|
+
image_path=self.config.images_dir / implementation.singularity_image_name,
|
326
335
|
script_cmd=implementation.script_cmd,
|
327
336
|
requires_spark=implementation.requires_spark,
|
328
337
|
is_embarrassingly_parallel=is_embarrassingly_parallel,
|
easylink/pipeline_schema.py
CHANGED
@@ -14,7 +14,7 @@ from pathlib import Path
|
|
14
14
|
from layered_config_tree import LayeredConfigTree
|
15
15
|
|
16
16
|
from easylink.graph_components import EdgeParams, ImplementationGraph
|
17
|
-
from easylink.pipeline_schema_constants import
|
17
|
+
from easylink.pipeline_schema_constants import SCHEMA_PARAMS
|
18
18
|
from easylink.step import HierarchicalStep, NonLeafConfigurationState, Step
|
19
19
|
|
20
20
|
|
@@ -39,7 +39,7 @@ class PipelineSchema(HierarchicalStep):
|
|
39
39
|
|
40
40
|
Notes
|
41
41
|
-----
|
42
|
-
|
42
|
+
A ``PipelineSchema`` is intended to be constructed by the :meth:`get_schema`
|
43
43
|
class method.
|
44
44
|
|
45
45
|
The ``PipelineSchema`` is a high-level abstraction; it represents the desired
|
@@ -159,22 +159,21 @@ class PipelineSchema(HierarchicalStep):
|
|
159
159
|
)
|
160
160
|
|
161
161
|
@classmethod
|
162
|
-
def
|
162
|
+
def get_schema(cls, name: str = "main") -> list["PipelineSchema"]:
|
163
163
|
"""Gets all allowable ``PipelineSchemas``.
|
164
164
|
|
165
165
|
These ``PipelineSchemas`` represent the fully supported pipelines and are
|
166
166
|
used to validate the user-requested pipeline.
|
167
167
|
|
168
|
+
Parameters
|
169
|
+
----------
|
170
|
+
name
|
171
|
+
The name of the ``PipelineSchema`` to get.
|
172
|
+
|
168
173
|
Returns
|
169
174
|
-------
|
170
|
-
|
175
|
+
The requested ``PipelineSchema``.
|
171
176
|
"""
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
]
|
176
|
-
|
177
|
-
|
178
|
-
PIPELINE_SCHEMAS = PipelineSchema._get_schemas()
|
179
|
-
"""All allowable :class:`PipelineSchemas<PipelineSchema>` to validate the requested
|
180
|
-
pipeline against."""
|
177
|
+
if name not in SCHEMA_PARAMS:
|
178
|
+
raise ValueError(f"Pipeline schema '{name}' is not supported.")
|
179
|
+
return cls(name, *SCHEMA_PARAMS[name])
|
@@ -9,13 +9,12 @@ package defines the nodes and edges required to instantiate such ``PipelineSchem
|
|
9
9
|
|
10
10
|
"""
|
11
11
|
|
12
|
-
from easylink.pipeline_schema_constants import development, testing
|
12
|
+
from easylink.pipeline_schema_constants import development, main, testing
|
13
13
|
|
14
|
-
|
14
|
+
SCHEMA_PARAMS = {
|
15
|
+
"main": main.SCHEMA_PARAMS,
|
16
|
+
# development and testing
|
15
17
|
"development": development.SCHEMA_PARAMS,
|
16
|
-
}
|
17
|
-
|
18
|
-
TESTING_SCHEMA_PARAMS = {
|
19
18
|
"integration": testing.SCHEMA_PARAMS_ONE_STEP,
|
20
19
|
"output_dir": testing.SCHEMA_PARAMS_OUTPUT_DIR,
|
21
20
|
"combine_bad_topology": testing.SCHEMA_PARAMS_BAD_COMBINED_TOPOLOGY,
|