easylink 0.1.18__py3-none-any.whl → 0.1.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- easylink/_version.py +1 -1
- easylink/cli.py +15 -3
- easylink/configuration.py +25 -2
- easylink/devtools/implementation_creator.py +58 -11
- easylink/implementation.py +88 -11
- easylink/implementation_metadata.yaml +177 -26
- easylink/pipeline.py +15 -6
- easylink/pipeline_schema_constants/__init__.py +2 -2
- easylink/pipeline_schema_constants/main.py +489 -0
- easylink/runner.py +7 -1
- easylink/step.py +89 -0
- easylink/steps/cascading/exclude_clustered.def +22 -0
- easylink/steps/cascading/exclude_clustered.py +76 -0
- easylink/steps/cascading/exclude_none.def +22 -0
- easylink/steps/cascading/exclude_none.py +76 -0
- easylink/steps/cascading/update_clusters_by_connected_components.def +22 -0
- easylink/steps/cascading/update_clusters_by_connected_components.py +101 -0
- easylink/steps/default/default_clusters_to_links.def +22 -0
- easylink/steps/default/default_clusters_to_links.py +91 -0
- easylink/steps/default/default_determining_exclusions.def +22 -0
- easylink/steps/default/default_determining_exclusions.py +81 -0
- easylink/steps/default/default_removing_records.def +22 -0
- easylink/steps/default/default_removing_records.py +59 -0
- easylink/steps/default/default_schema_alignment.def +22 -0
- easylink/steps/default/default_schema_alignment.py +53 -0
- easylink/steps/default/default_updating_clusters.def +22 -0
- easylink/steps/default/default_updating_clusters.py +67 -0
- easylink/steps/fastLink/fastLink_evaluating_pairs.R +136 -0
- easylink/steps/fastLink/fastLink_evaluating_pairs.def +21 -0
- easylink/steps/fastLink/fastLink_links_to_clusters.R +128 -0
- easylink/steps/fastLink/fastLink_links_to_clusters.def +21 -0
- easylink/steps/rl-dummy/canonicalizing_and_downstream_analysis/dummy_canonicalizing_and_downstream_analysis.def +22 -0
- easylink/steps/rl-dummy/canonicalizing_and_downstream_analysis/dummy_canonicalizing_and_downstream_analysis.py +42 -0
- easylink/steps/rl-dummy/input_data/create_input_files.ipynb +1433 -0
- easylink/steps/rl-dummy/input_data/input_file_1.parquet +0 -0
- easylink/steps/rl-dummy/input_data/input_file_2.parquet +0 -0
- easylink/steps/rl-dummy/input_data/known_clusters.parquet +0 -0
- easylink/steps/rl-dummy/pre-processing/dummy_pre-processing.def +22 -0
- easylink/steps/rl-dummy/pre-processing/dummy_pre-processing.py +59 -0
- easylink/steps/splink/splink_blocking_and_filtering.def +22 -0
- easylink/steps/splink/splink_blocking_and_filtering.py +130 -0
- easylink/steps/splink/splink_evaluating_pairs.def +22 -0
- easylink/steps/splink/splink_evaluating_pairs.py +164 -0
- easylink/steps/splink/splink_links_to_clusters.def +22 -0
- easylink/steps/splink/splink_links_to_clusters.py +63 -0
- easylink/utilities/data_utils.py +72 -0
- easylink/utilities/paths.py +4 -3
- easylink/utilities/validation_utils.py +509 -11
- {easylink-0.1.18.dist-info → easylink-0.1.19.dist-info}/METADATA +5 -1
- easylink-0.1.19.dist-info/RECORD +91 -0
- {easylink-0.1.18.dist-info → easylink-0.1.19.dist-info}/WHEEL +1 -1
- easylink-0.1.19.dist-info/licenses/LICENSE +28 -0
- easylink-0.1.18.dist-info/RECORD +0 -55
- {easylink-0.1.18.dist-info → easylink-0.1.19.dist-info}/entry_points.txt +0 -0
- {easylink-0.1.18.dist-info → easylink-0.1.19.dist-info}/top_level.txt +0 -0
@@ -1,14 +1,18 @@
|
|
1
1
|
step_1_python_pandas:
|
2
2
|
steps:
|
3
3
|
- step_1
|
4
|
-
|
4
|
+
image_name: python_pandas.sif
|
5
|
+
zenodo_record_id: 15611084
|
6
|
+
md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
|
5
7
|
script_cmd: python /dummy_step.py
|
6
8
|
outputs:
|
7
9
|
step_1_main_output: result.parquet
|
8
10
|
step_1a_python_pandas:
|
9
11
|
steps:
|
10
12
|
- step_1a
|
11
|
-
|
13
|
+
image_name: python_pandas.sif
|
14
|
+
zenodo_record_id: 15611084
|
15
|
+
md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
|
12
16
|
script_cmd: python /dummy_step.py
|
13
17
|
env:
|
14
18
|
INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
|
@@ -17,7 +21,9 @@ step_1a_python_pandas:
|
|
17
21
|
step_1b_python_pandas:
|
18
22
|
steps:
|
19
23
|
- step_1b
|
20
|
-
|
24
|
+
image_name: python_pandas.sif
|
25
|
+
zenodo_record_id: 15611084
|
26
|
+
md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
|
21
27
|
script_cmd: python /dummy_step.py
|
22
28
|
env:
|
23
29
|
INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
|
@@ -26,21 +32,27 @@ step_1b_python_pandas:
|
|
26
32
|
step_2_python_pandas:
|
27
33
|
steps:
|
28
34
|
- step_2
|
29
|
-
|
35
|
+
image_name: python_pandas.sif
|
36
|
+
zenodo_record_id: 15611084
|
37
|
+
md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
|
30
38
|
script_cmd: python /dummy_step.py
|
31
39
|
outputs:
|
32
40
|
step_2_main_output: result.parquet
|
33
41
|
step_3_python_pandas:
|
34
42
|
steps:
|
35
43
|
- step_3
|
36
|
-
|
44
|
+
image_name: python_pandas.sif
|
45
|
+
zenodo_record_id: 15611084
|
46
|
+
md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
|
37
47
|
script_cmd: python /dummy_step.py
|
38
48
|
outputs:
|
39
49
|
step_3_main_output: result.parquet
|
40
50
|
step_4_python_pandas:
|
41
51
|
steps:
|
42
52
|
- step_4
|
43
|
-
|
53
|
+
image_name: python_pandas.sif
|
54
|
+
zenodo_record_id: 15611084
|
55
|
+
md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
|
44
56
|
script_cmd: python /dummy_step.py
|
45
57
|
env:
|
46
58
|
INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
|
@@ -49,7 +61,9 @@ step_4_python_pandas:
|
|
49
61
|
step_5_python_pandas:
|
50
62
|
steps:
|
51
63
|
- step_5
|
52
|
-
|
64
|
+
image_name: python_pandas.sif
|
65
|
+
zenodo_record_id: 15611084
|
66
|
+
md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
|
53
67
|
script_cmd: python /dummy_step.py
|
54
68
|
env:
|
55
69
|
INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
|
@@ -58,7 +72,9 @@ step_5_python_pandas:
|
|
58
72
|
step_6_python_pandas:
|
59
73
|
steps:
|
60
74
|
- step_6
|
61
|
-
|
75
|
+
image_name: python_pandas.sif
|
76
|
+
zenodo_record_id: 15611084
|
77
|
+
md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
|
62
78
|
script_cmd: python /dummy_step.py
|
63
79
|
env:
|
64
80
|
INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
|
@@ -67,7 +83,9 @@ step_6_python_pandas:
|
|
67
83
|
step_4a_python_pandas:
|
68
84
|
steps:
|
69
85
|
- step_4a
|
70
|
-
|
86
|
+
image_name: python_pandas.sif
|
87
|
+
zenodo_record_id: 15611084
|
88
|
+
md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
|
71
89
|
script_cmd: python /dummy_step.py
|
72
90
|
env:
|
73
91
|
INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
|
@@ -76,7 +94,9 @@ step_4a_python_pandas:
|
|
76
94
|
step_4b_python_pandas:
|
77
95
|
steps:
|
78
96
|
- step_4b
|
79
|
-
|
97
|
+
image_name: python_pandas.sif
|
98
|
+
zenodo_record_id: 15611084
|
99
|
+
md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
|
80
100
|
script_cmd: python /dummy_step.py
|
81
101
|
env:
|
82
102
|
INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
|
@@ -85,7 +105,9 @@ step_4b_python_pandas:
|
|
85
105
|
step_4b_r:
|
86
106
|
steps:
|
87
107
|
- step_4b
|
88
|
-
|
108
|
+
image_name: r-image.sif
|
109
|
+
zenodo_record_id: 15611084
|
110
|
+
md5_checksum: 9410af1317aabc332604cbec33b59d42
|
89
111
|
script_cmd: Rscript /dummy_step.R
|
90
112
|
env:
|
91
113
|
INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
|
@@ -94,7 +116,9 @@ step_4b_r:
|
|
94
116
|
step_1_python_pyspark:
|
95
117
|
steps:
|
96
118
|
- step_1
|
97
|
-
|
119
|
+
image_name: python_pyspark.sif
|
120
|
+
zenodo_record_id: 15611084
|
121
|
+
md5_checksum: 6fb2a2119630138f4db82356b8d78b87
|
98
122
|
script_cmd: python3 /code/dummy_step.py
|
99
123
|
outputs:
|
100
124
|
step_1_main_output: result.parquet
|
@@ -102,7 +126,9 @@ step_1_python_pyspark:
|
|
102
126
|
step_2_python_pyspark:
|
103
127
|
steps:
|
104
128
|
- step_2
|
105
|
-
|
129
|
+
image_name: python_pyspark.sif
|
130
|
+
zenodo_record_id: 15611084
|
131
|
+
md5_checksum: 6fb2a2119630138f4db82356b8d78b87
|
106
132
|
script_cmd: python3 /code/dummy_step.py
|
107
133
|
outputs:
|
108
134
|
step_2_main_output: result.parquet
|
@@ -110,7 +136,9 @@ step_2_python_pyspark:
|
|
110
136
|
step_3_python_pyspark:
|
111
137
|
steps:
|
112
138
|
- step_3
|
113
|
-
|
139
|
+
image_name: python_pyspark.sif
|
140
|
+
zenodo_record_id: 15611084
|
141
|
+
md5_checksum: 6fb2a2119630138f4db82356b8d78b87
|
114
142
|
script_cmd: python3 /code/dummy_step.py
|
115
143
|
outputs:
|
116
144
|
step_3_main_output: result.parquet
|
@@ -118,7 +146,9 @@ step_3_python_pyspark:
|
|
118
146
|
step_4_python_pyspark:
|
119
147
|
steps:
|
120
148
|
- step_4
|
121
|
-
|
149
|
+
image_name: python_pyspark.sif
|
150
|
+
zenodo_record_id: 15611084
|
151
|
+
md5_checksum: 6fb2a2119630138f4db82356b8d78b87
|
122
152
|
script_cmd: python3 /code/dummy_step.py
|
123
153
|
env:
|
124
154
|
INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
|
@@ -127,7 +157,9 @@ step_4_python_pyspark:
|
|
127
157
|
step_1_r:
|
128
158
|
steps:
|
129
159
|
- step_1
|
130
|
-
|
160
|
+
image_name: r-image.sif
|
161
|
+
zenodo_record_id: 15611084
|
162
|
+
md5_checksum: 9410af1317aabc332604cbec33b59d42
|
131
163
|
script_cmd: Rscript /dummy_step.R
|
132
164
|
outputs:
|
133
165
|
step_1_main_output: result.parquet
|
@@ -135,7 +167,9 @@ step_1_r:
|
|
135
167
|
step_2_r:
|
136
168
|
steps:
|
137
169
|
- step_2
|
138
|
-
|
170
|
+
image_name: r-image.sif
|
171
|
+
zenodo_record_id: 15611084
|
172
|
+
md5_checksum: 9410af1317aabc332604cbec33b59d42
|
139
173
|
script_cmd: Rscript /dummy_step.R
|
140
174
|
outputs:
|
141
175
|
step_2_main_output: result.parquet
|
@@ -143,7 +177,9 @@ step_2_r:
|
|
143
177
|
step_3_r:
|
144
178
|
steps:
|
145
179
|
- step_3
|
146
|
-
|
180
|
+
image_name: r-image.sif
|
181
|
+
zenodo_record_id: 15611084
|
182
|
+
md5_checksum: 9410af1317aabc332604cbec33b59d42
|
147
183
|
script_cmd: Rscript /dummy_step.R
|
148
184
|
outputs:
|
149
185
|
step_3_main_output: result.parquet
|
@@ -151,7 +187,9 @@ step_3_r:
|
|
151
187
|
step_4_r:
|
152
188
|
steps:
|
153
189
|
- step_4
|
154
|
-
|
190
|
+
image_name: r-image.sif
|
191
|
+
zenodo_record_id: 15611084
|
192
|
+
md5_checksum: 9410af1317aabc332604cbec33b59d42
|
155
193
|
script_cmd: Rscript /dummy_step.R
|
156
194
|
env:
|
157
195
|
INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
|
@@ -162,7 +200,9 @@ step_1_and_step_2_combined_python_pandas:
|
|
162
200
|
steps:
|
163
201
|
- step_1
|
164
202
|
- step_2
|
165
|
-
|
203
|
+
image_name: python_pandas.sif
|
204
|
+
zenodo_record_id: 15611084
|
205
|
+
md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
|
166
206
|
script_cmd: python /dummy_step.py
|
167
207
|
outputs:
|
168
208
|
step_2_main_output: result.parquet
|
@@ -170,7 +210,9 @@ step_1_and_step_2_parallel_python_pandas:
|
|
170
210
|
steps:
|
171
211
|
- step_1
|
172
212
|
- step_2
|
173
|
-
|
213
|
+
image_name: python_pandas.sif
|
214
|
+
zenodo_record_id: 15611084
|
215
|
+
md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
|
174
216
|
script_cmd: python /dummy_step.py
|
175
217
|
env:
|
176
218
|
INPUT_ENV_VARS: STEP_1_DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,STEP_2_DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS
|
@@ -180,7 +222,9 @@ step_3_and_step_4_combined_python_pandas:
|
|
180
222
|
steps:
|
181
223
|
- step_3
|
182
224
|
- step_4
|
183
|
-
|
225
|
+
image_name: python_pandas.sif
|
226
|
+
zenodo_record_id: 15611084
|
227
|
+
md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
|
184
228
|
script_cmd: python /dummy_step.py
|
185
229
|
outputs:
|
186
230
|
step_4_main_output: result.parquet
|
@@ -188,26 +232,133 @@ step_1a_and_step_1b_combined_python_pandas:
|
|
188
232
|
steps:
|
189
233
|
- step_1a
|
190
234
|
- step_1b
|
191
|
-
|
235
|
+
image_name: python_pandas.sif
|
236
|
+
zenodo_record_id: 15611084
|
237
|
+
md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
|
192
238
|
script_cmd: python /dummy_step.py
|
193
239
|
outputs:
|
194
240
|
step_1_main_output: result.parquet
|
195
241
|
dummy_step_1_for_output_dir_example:
|
196
242
|
steps:
|
197
243
|
- step_1_for_output_dir_example
|
198
|
-
|
244
|
+
image_name: main/dummy_step_1_for_output_dir_example.sif
|
199
245
|
script_cmd: python /dummy_step_1_for_output_dir_example.py
|
200
246
|
outputs:
|
201
247
|
step_1_main_output_directory: output_dir/
|
202
248
|
dummy_step_1_for_output_dir_example_default:
|
203
249
|
steps:
|
204
250
|
- step_1_for_output_dir_example
|
205
|
-
|
251
|
+
image_name: main/dummy_step_1_for_output_dir_example.sif
|
206
252
|
script_cmd: python /dummy_step_1_for_output_dir_example.py
|
207
253
|
dummy_step_2_for_output_dir_example:
|
208
254
|
steps:
|
209
255
|
- step_2_for_output_dir_example
|
210
|
-
|
256
|
+
image_name: main/dummy_step_2_for_output_dir_example.sif
|
211
257
|
script_cmd: python /dummy_step_2_for_output_dir_example.py
|
212
258
|
outputs:
|
213
259
|
step_2_main_output: result.parquet
|
260
|
+
default_removing_records:
|
261
|
+
steps:
|
262
|
+
- removing_records
|
263
|
+
image_name: main/default_removing_records.sif
|
264
|
+
script_cmd: python /default_removing_records.py
|
265
|
+
outputs:
|
266
|
+
dataset: dataset
|
267
|
+
default_clusters_to_links:
|
268
|
+
steps:
|
269
|
+
- clusters_to_links
|
270
|
+
image_name: main/default_clusters_to_links.sif
|
271
|
+
script_cmd: python /default_clusters_to_links.py
|
272
|
+
outputs:
|
273
|
+
known_links: result.parquet
|
274
|
+
default_determining_exclusions:
|
275
|
+
steps:
|
276
|
+
- determining_exclusions
|
277
|
+
image_name: main/default_determining_exclusions.sif
|
278
|
+
script_cmd: python /default_determining_exclusions.py
|
279
|
+
outputs:
|
280
|
+
ids_to_remove: result.parquet
|
281
|
+
default_updating_clusters:
|
282
|
+
steps:
|
283
|
+
- updating_clusters
|
284
|
+
image_name: main/default_updating_clusters.sif
|
285
|
+
script_cmd: python /default_updating_clusters.py
|
286
|
+
outputs:
|
287
|
+
clusters: clusters.parquet
|
288
|
+
dummy_canonicalizing_and_downstream_analysis:
|
289
|
+
steps:
|
290
|
+
- canonicalizing_and_downstream_analysis
|
291
|
+
image_name: main/dummy_canonicalizing_and_downstream_analysis.sif
|
292
|
+
script_cmd: python /dummy_canonicalizing_and_downstream_analysis.py
|
293
|
+
outputs:
|
294
|
+
analysis_output: result.parquet
|
295
|
+
dummy_pre-processing:
|
296
|
+
steps:
|
297
|
+
- pre-processing
|
298
|
+
image_name: main/dummy_pre-processing.sif
|
299
|
+
script_cmd: python /dummy_pre-processing.py
|
300
|
+
outputs:
|
301
|
+
dataset: dataset
|
302
|
+
default_schema_alignment:
|
303
|
+
steps:
|
304
|
+
- schema_alignment
|
305
|
+
image_name: main/default_schema_alignment.sif
|
306
|
+
script_cmd: python /default_schema_alignment.py
|
307
|
+
outputs:
|
308
|
+
records: result.parquet
|
309
|
+
splink_blocking_and_filtering:
|
310
|
+
steps:
|
311
|
+
- blocking_and_filtering
|
312
|
+
image_name: main/splink_blocking_and_filtering.sif
|
313
|
+
script_cmd: python /splink_blocking_and_filtering.py
|
314
|
+
outputs:
|
315
|
+
blocks: blocks
|
316
|
+
splink_evaluating_pairs:
|
317
|
+
steps:
|
318
|
+
- evaluating_pairs
|
319
|
+
image_name: main/splink_evaluating_pairs.sif
|
320
|
+
script_cmd: python /splink_evaluating_pairs.py
|
321
|
+
outputs:
|
322
|
+
links: result.parquet
|
323
|
+
splink_links_to_clusters:
|
324
|
+
steps:
|
325
|
+
- links_to_clusters
|
326
|
+
image_name: main/splink_links_to_clusters.sif
|
327
|
+
script_cmd: python /splink_links_to_clusters.py
|
328
|
+
outputs:
|
329
|
+
clusters: result.parquet
|
330
|
+
fastLink_evaluating_pairs:
|
331
|
+
steps:
|
332
|
+
- evaluating_pairs
|
333
|
+
image_name: main/fastLink_evaluating_pairs.sif
|
334
|
+
script_cmd: Rscript /fastLink_evaluating_pairs.R
|
335
|
+
outputs:
|
336
|
+
links: result.parquet
|
337
|
+
fastLink_links_to_clusters:
|
338
|
+
steps:
|
339
|
+
- links_to_clusters
|
340
|
+
image_name: main/fastLink_links_to_clusters.sif
|
341
|
+
script_cmd: Rscript /fastLink_links_to_clusters.R
|
342
|
+
outputs:
|
343
|
+
clusters: result.parquet
|
344
|
+
exclude_clustered:
|
345
|
+
steps:
|
346
|
+
- determining_exclusions
|
347
|
+
image_name: main/exclude_clustered.sif
|
348
|
+
script_cmd: python /exclude_clustered.py
|
349
|
+
outputs:
|
350
|
+
ids_to_remove: result.parquet
|
351
|
+
exclude_none:
|
352
|
+
steps:
|
353
|
+
- determining_exclusions
|
354
|
+
image_name: main/exclude_none.sif
|
355
|
+
script_cmd: python /exclude_none.py
|
356
|
+
outputs:
|
357
|
+
ids_to_remove: result.parquet
|
358
|
+
update_clusters_by_connected_components:
|
359
|
+
steps:
|
360
|
+
- updating_clusters
|
361
|
+
image_name: main/update_clusters_by_connected_components.sif
|
362
|
+
script_cmd: python /update_clusters_by_connected_components.py
|
363
|
+
outputs:
|
364
|
+
clusters: result.parquet
|
easylink/pipeline.py
CHANGED
@@ -25,7 +25,6 @@ from easylink.rule import (
|
|
25
25
|
)
|
26
26
|
from easylink.utilities.general_utils import exit_with_validation_error
|
27
27
|
from easylink.utilities.paths import SPARK_SNAKEFILE
|
28
|
-
from easylink.utilities.validation_utils import validate_input_file_dummy
|
29
28
|
|
30
29
|
IMPLEMENTATION_ERRORS_KEY = "IMPLEMENTATION ERRORS"
|
31
30
|
|
@@ -135,7 +134,10 @@ class Pipeline:
|
|
135
134
|
"""
|
136
135
|
errors = defaultdict(dict)
|
137
136
|
for implementation in self.pipeline_graph.implementations:
|
138
|
-
implementation_errors = implementation.validate(
|
137
|
+
implementation_errors = implementation.validate(
|
138
|
+
skip_image_validation=(self.config.command == "generate_dag"),
|
139
|
+
images_dir=self.config.images_dir,
|
140
|
+
)
|
139
141
|
if implementation_errors:
|
140
142
|
errors[IMPLEMENTATION_ERRORS_KEY][implementation.name] = implementation_errors
|
141
143
|
return errors
|
@@ -206,7 +208,14 @@ wildcard_constraints:
|
|
206
208
|
The input files to the target rule (i.e. the result node) are the final
|
207
209
|
output themselves.
|
208
210
|
"""
|
209
|
-
|
211
|
+
input_slots, _ = self.pipeline_graph.get_io_slot_attributes("results")
|
212
|
+
|
213
|
+
if len(input_slots) != 1:
|
214
|
+
raise ValueError("Results node must have only one input slot")
|
215
|
+
|
216
|
+
input_slot_name = list(input_slots.keys())[0]
|
217
|
+
input_slot_attrs = input_slots[input_slot_name]
|
218
|
+
final_output = input_slot_attrs["filepaths"]
|
210
219
|
validator_file = str("input_validations/final_validator")
|
211
220
|
# Snakemake resolves the DAG based on the first rule, so we put the target
|
212
221
|
# before the validation
|
@@ -217,10 +226,10 @@ wildcard_constraints:
|
|
217
226
|
)
|
218
227
|
final_validation = InputValidationRule(
|
219
228
|
name="results",
|
220
|
-
input_slot_name=
|
229
|
+
input_slot_name=input_slot_name,
|
221
230
|
input=final_output,
|
222
231
|
output=validator_file,
|
223
|
-
validator=
|
232
|
+
validator=input_slot_attrs["validator"],
|
224
233
|
)
|
225
234
|
target_rule.write_to_snakefile(self.snakefile_path)
|
226
235
|
final_validation.write_to_snakefile(self.snakefile_path)
|
@@ -322,7 +331,7 @@ use rule start_spark_worker from spark_cluster with:
|
|
322
331
|
resources=resources,
|
323
332
|
envvars=implementation.environment_variables,
|
324
333
|
diagnostics_dir=str(diagnostics_dir),
|
325
|
-
image_path=implementation.
|
334
|
+
image_path=self.config.images_dir / implementation.singularity_image_name,
|
326
335
|
script_cmd=implementation.script_cmd,
|
327
336
|
requires_spark=implementation.requires_spark,
|
328
337
|
is_embarrassingly_parallel=is_embarrassingly_parallel,
|
@@ -9,10 +9,10 @@ package defines the nodes and edges required to instantiate such ``PipelineSchem
|
|
9
9
|
|
10
10
|
"""
|
11
11
|
|
12
|
-
from easylink.pipeline_schema_constants import development, testing
|
12
|
+
from easylink.pipeline_schema_constants import development, main, testing
|
13
13
|
|
14
14
|
SCHEMA_PARAMS = {
|
15
|
-
"main":
|
15
|
+
"main": main.SCHEMA_PARAMS,
|
16
16
|
# development and testing
|
17
17
|
"development": development.SCHEMA_PARAMS,
|
18
18
|
"integration": testing.SCHEMA_PARAMS_ONE_STEP,
|