easylink 0.1.17__py3-none-any.whl → 0.1.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. easylink/_version.py +1 -1
  2. easylink/cli.py +24 -3
  3. easylink/configuration.py +43 -36
  4. easylink/devtools/implementation_creator.py +71 -22
  5. easylink/implementation.py +88 -11
  6. easylink/implementation_metadata.yaml +177 -29
  7. easylink/pipeline.py +15 -6
  8. easylink/pipeline_schema.py +12 -13
  9. easylink/pipeline_schema_constants/__init__.py +4 -5
  10. easylink/pipeline_schema_constants/main.py +489 -0
  11. easylink/runner.py +11 -7
  12. easylink/step.py +89 -0
  13. easylink/steps/cascading/exclude_clustered.def +22 -0
  14. easylink/steps/cascading/exclude_clustered.py +76 -0
  15. easylink/steps/cascading/exclude_none.def +22 -0
  16. easylink/steps/cascading/exclude_none.py +76 -0
  17. easylink/steps/cascading/update_clusters_by_connected_components.def +22 -0
  18. easylink/steps/cascading/update_clusters_by_connected_components.py +101 -0
  19. easylink/steps/default/default_clusters_to_links.def +22 -0
  20. easylink/steps/default/default_clusters_to_links.py +91 -0
  21. easylink/steps/default/default_determining_exclusions.def +22 -0
  22. easylink/steps/default/default_determining_exclusions.py +81 -0
  23. easylink/steps/default/default_removing_records.def +22 -0
  24. easylink/steps/default/default_removing_records.py +59 -0
  25. easylink/steps/default/default_schema_alignment.def +22 -0
  26. easylink/steps/default/default_schema_alignment.py +53 -0
  27. easylink/steps/default/default_updating_clusters.def +22 -0
  28. easylink/steps/default/default_updating_clusters.py +67 -0
  29. easylink/steps/fastLink/fastLink_evaluating_pairs.R +136 -0
  30. easylink/steps/fastLink/fastLink_evaluating_pairs.def +21 -0
  31. easylink/steps/fastLink/fastLink_links_to_clusters.R +128 -0
  32. easylink/steps/fastLink/fastLink_links_to_clusters.def +21 -0
  33. easylink/steps/rl-dummy/canonicalizing_and_downstream_analysis/dummy_canonicalizing_and_downstream_analysis.def +22 -0
  34. easylink/steps/rl-dummy/canonicalizing_and_downstream_analysis/dummy_canonicalizing_and_downstream_analysis.py +42 -0
  35. easylink/steps/rl-dummy/input_data/create_input_files.ipynb +1433 -0
  36. easylink/steps/rl-dummy/input_data/input_file_1.parquet +0 -0
  37. easylink/steps/rl-dummy/input_data/input_file_2.parquet +0 -0
  38. easylink/steps/rl-dummy/input_data/known_clusters.parquet +0 -0
  39. easylink/steps/rl-dummy/pre-processing/dummy_pre-processing.def +22 -0
  40. easylink/steps/rl-dummy/pre-processing/dummy_pre-processing.py +59 -0
  41. easylink/steps/splink/splink_blocking_and_filtering.def +22 -0
  42. easylink/steps/splink/splink_blocking_and_filtering.py +130 -0
  43. easylink/steps/splink/splink_evaluating_pairs.def +22 -0
  44. easylink/steps/splink/splink_evaluating_pairs.py +164 -0
  45. easylink/steps/splink/splink_links_to_clusters.def +22 -0
  46. easylink/steps/splink/splink_links_to_clusters.py +63 -0
  47. easylink/utilities/data_utils.py +72 -0
  48. easylink/utilities/paths.py +4 -3
  49. easylink/utilities/validation_utils.py +509 -11
  50. {easylink-0.1.17.dist-info → easylink-0.1.19.dist-info}/METADATA +5 -1
  51. easylink-0.1.19.dist-info/RECORD +91 -0
  52. {easylink-0.1.17.dist-info → easylink-0.1.19.dist-info}/WHEEL +1 -1
  53. easylink-0.1.19.dist-info/licenses/LICENSE +28 -0
  54. easylink-0.1.17.dist-info/RECORD +0 -55
  55. {easylink-0.1.17.dist-info → easylink-0.1.19.dist-info}/entry_points.txt +0 -0
  56. {easylink-0.1.17.dist-info → easylink-0.1.19.dist-info}/top_level.txt +0 -0
@@ -1,14 +1,18 @@
1
1
  step_1_python_pandas:
2
2
  steps:
3
3
  - step_1
4
- image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pandas.sif
4
+ image_name: python_pandas.sif
5
+ zenodo_record_id: 15611084
6
+ md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
5
7
  script_cmd: python /dummy_step.py
6
8
  outputs:
7
9
  step_1_main_output: result.parquet
8
10
  step_1a_python_pandas:
9
11
  steps:
10
12
  - step_1a
11
- image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pandas.sif
13
+ image_name: python_pandas.sif
14
+ zenodo_record_id: 15611084
15
+ md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
12
16
  script_cmd: python /dummy_step.py
13
17
  env:
14
18
  INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
@@ -17,7 +21,9 @@ step_1a_python_pandas:
17
21
  step_1b_python_pandas:
18
22
  steps:
19
23
  - step_1b
20
- image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pandas.sif
24
+ image_name: python_pandas.sif
25
+ zenodo_record_id: 15611084
26
+ md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
21
27
  script_cmd: python /dummy_step.py
22
28
  env:
23
29
  INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
@@ -26,21 +32,27 @@ step_1b_python_pandas:
26
32
  step_2_python_pandas:
27
33
  steps:
28
34
  - step_2
29
- image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pandas.sif
35
+ image_name: python_pandas.sif
36
+ zenodo_record_id: 15611084
37
+ md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
30
38
  script_cmd: python /dummy_step.py
31
39
  outputs:
32
40
  step_2_main_output: result.parquet
33
41
  step_3_python_pandas:
34
42
  steps:
35
43
  - step_3
36
- image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pandas.sif
44
+ image_name: python_pandas.sif
45
+ zenodo_record_id: 15611084
46
+ md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
37
47
  script_cmd: python /dummy_step.py
38
48
  outputs:
39
49
  step_3_main_output: result.parquet
40
50
  step_4_python_pandas:
41
51
  steps:
42
52
  - step_4
43
- image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pandas.sif
53
+ image_name: python_pandas.sif
54
+ zenodo_record_id: 15611084
55
+ md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
44
56
  script_cmd: python /dummy_step.py
45
57
  env:
46
58
  INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
@@ -49,7 +61,9 @@ step_4_python_pandas:
49
61
  step_5_python_pandas:
50
62
  steps:
51
63
  - step_5
52
- image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pandas.sif
64
+ image_name: python_pandas.sif
65
+ zenodo_record_id: 15611084
66
+ md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
53
67
  script_cmd: python /dummy_step.py
54
68
  env:
55
69
  INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
@@ -58,7 +72,9 @@ step_5_python_pandas:
58
72
  step_6_python_pandas:
59
73
  steps:
60
74
  - step_6
61
- image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pandas.sif
75
+ image_name: python_pandas.sif
76
+ zenodo_record_id: 15611084
77
+ md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
62
78
  script_cmd: python /dummy_step.py
63
79
  env:
64
80
  INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
@@ -67,7 +83,9 @@ step_6_python_pandas:
67
83
  step_4a_python_pandas:
68
84
  steps:
69
85
  - step_4a
70
- image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pandas.sif
86
+ image_name: python_pandas.sif
87
+ zenodo_record_id: 15611084
88
+ md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
71
89
  script_cmd: python /dummy_step.py
72
90
  env:
73
91
  INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
@@ -76,7 +94,9 @@ step_4a_python_pandas:
76
94
  step_4b_python_pandas:
77
95
  steps:
78
96
  - step_4b
79
- image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pandas.sif
97
+ image_name: python_pandas.sif
98
+ zenodo_record_id: 15611084
99
+ md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
80
100
  script_cmd: python /dummy_step.py
81
101
  env:
82
102
  INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
@@ -85,7 +105,9 @@ step_4b_python_pandas:
85
105
  step_4b_r:
86
106
  steps:
87
107
  - step_4b
88
- image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/r-image.sif
108
+ image_name: r-image.sif
109
+ zenodo_record_id: 15611084
110
+ md5_checksum: 9410af1317aabc332604cbec33b59d42
89
111
  script_cmd: Rscript /dummy_step.R
90
112
  env:
91
113
  INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
@@ -94,7 +116,9 @@ step_4b_r:
94
116
  step_1_python_pyspark:
95
117
  steps:
96
118
  - step_1
97
- image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pyspark.sif
119
+ image_name: python_pyspark.sif
120
+ zenodo_record_id: 15611084
121
+ md5_checksum: 6fb2a2119630138f4db82356b8d78b87
98
122
  script_cmd: python3 /code/dummy_step.py
99
123
  outputs:
100
124
  step_1_main_output: result.parquet
@@ -102,7 +126,9 @@ step_1_python_pyspark:
102
126
  step_2_python_pyspark:
103
127
  steps:
104
128
  - step_2
105
- image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pyspark.sif
129
+ image_name: python_pyspark.sif
130
+ zenodo_record_id: 15611084
131
+ md5_checksum: 6fb2a2119630138f4db82356b8d78b87
106
132
  script_cmd: python3 /code/dummy_step.py
107
133
  outputs:
108
134
  step_2_main_output: result.parquet
@@ -110,7 +136,9 @@ step_2_python_pyspark:
110
136
  step_3_python_pyspark:
111
137
  steps:
112
138
  - step_3
113
- image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pyspark.sif
139
+ image_name: python_pyspark.sif
140
+ zenodo_record_id: 15611084
141
+ md5_checksum: 6fb2a2119630138f4db82356b8d78b87
114
142
  script_cmd: python3 /code/dummy_step.py
115
143
  outputs:
116
144
  step_3_main_output: result.parquet
@@ -118,7 +146,9 @@ step_3_python_pyspark:
118
146
  step_4_python_pyspark:
119
147
  steps:
120
148
  - step_4
121
- image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pyspark.sif
149
+ image_name: python_pyspark.sif
150
+ zenodo_record_id: 15611084
151
+ md5_checksum: 6fb2a2119630138f4db82356b8d78b87
122
152
  script_cmd: python3 /code/dummy_step.py
123
153
  env:
124
154
  INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
@@ -127,7 +157,9 @@ step_4_python_pyspark:
127
157
  step_1_r:
128
158
  steps:
129
159
  - step_1
130
- image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/r-image.sif
160
+ image_name: r-image.sif
161
+ zenodo_record_id: 15611084
162
+ md5_checksum: 9410af1317aabc332604cbec33b59d42
131
163
  script_cmd: Rscript /dummy_step.R
132
164
  outputs:
133
165
  step_1_main_output: result.parquet
@@ -135,7 +167,9 @@ step_1_r:
135
167
  step_2_r:
136
168
  steps:
137
169
  - step_2
138
- image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/r-image.sif
170
+ image_name: r-image.sif
171
+ zenodo_record_id: 15611084
172
+ md5_checksum: 9410af1317aabc332604cbec33b59d42
139
173
  script_cmd: Rscript /dummy_step.R
140
174
  outputs:
141
175
  step_2_main_output: result.parquet
@@ -143,7 +177,9 @@ step_2_r:
143
177
  step_3_r:
144
178
  steps:
145
179
  - step_3
146
- image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/r-image.sif
180
+ image_name: r-image.sif
181
+ zenodo_record_id: 15611084
182
+ md5_checksum: 9410af1317aabc332604cbec33b59d42
147
183
  script_cmd: Rscript /dummy_step.R
148
184
  outputs:
149
185
  step_3_main_output: result.parquet
@@ -151,7 +187,9 @@ step_3_r:
151
187
  step_4_r:
152
188
  steps:
153
189
  - step_4
154
- image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/r-image.sif
190
+ image_name: r-image.sif
191
+ zenodo_record_id: 15611084
192
+ md5_checksum: 9410af1317aabc332604cbec33b59d42
155
193
  script_cmd: Rscript /dummy_step.R
156
194
  env:
157
195
  INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
@@ -162,7 +200,9 @@ step_1_and_step_2_combined_python_pandas:
162
200
  steps:
163
201
  - step_1
164
202
  - step_2
165
- image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pandas.sif
203
+ image_name: python_pandas.sif
204
+ zenodo_record_id: 15611084
205
+ md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
166
206
  script_cmd: python /dummy_step.py
167
207
  outputs:
168
208
  step_2_main_output: result.parquet
@@ -170,7 +210,9 @@ step_1_and_step_2_parallel_python_pandas:
170
210
  steps:
171
211
  - step_1
172
212
  - step_2
173
- image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pandas.sif
213
+ image_name: python_pandas.sif
214
+ zenodo_record_id: 15611084
215
+ md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
174
216
  script_cmd: python /dummy_step.py
175
217
  env:
176
218
  INPUT_ENV_VARS: STEP_1_DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,STEP_2_DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS
@@ -180,7 +222,9 @@ step_3_and_step_4_combined_python_pandas:
180
222
  steps:
181
223
  - step_3
182
224
  - step_4
183
- image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pandas.sif
225
+ image_name: python_pandas.sif
226
+ zenodo_record_id: 15611084
227
+ md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
184
228
  script_cmd: python /dummy_step.py
185
229
  outputs:
186
230
  step_4_main_output: result.parquet
@@ -188,29 +232,133 @@ step_1a_and_step_1b_combined_python_pandas:
188
232
  steps:
189
233
  - step_1a
190
234
  - step_1b
191
- image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pandas.sif
235
+ image_name: python_pandas.sif
236
+ zenodo_record_id: 15611084
237
+ md5_checksum: 7cc7cb37195c635684903b6777cf1cdf
192
238
  script_cmd: python /dummy_step.py
193
239
  outputs:
194
240
  step_1_main_output: result.parquet
195
241
  dummy_step_1_for_output_dir_example:
196
242
  steps:
197
243
  - step_1_for_output_dir_example
198
- image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/zmbc/dummy_step_1_for_output_dir_example.sif
244
+ image_name: main/dummy_step_1_for_output_dir_example.sif
199
245
  script_cmd: python /dummy_step_1_for_output_dir_example.py
200
246
  outputs:
201
247
  step_1_main_output_directory: output_dir/
202
248
  dummy_step_1_for_output_dir_example_default:
203
249
  steps:
204
250
  - step_1_for_output_dir_example
205
- image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/zmbc/dummy_step_1_for_output_dir_example.sif
251
+ image_name: main/dummy_step_1_for_output_dir_example.sif
206
252
  script_cmd: python /dummy_step_1_for_output_dir_example.py
207
- # leave outputs out for testing purposes
208
- # outputs:
209
- # step_1_main_output_directory: output_dir/
210
253
  dummy_step_2_for_output_dir_example:
211
254
  steps:
212
255
  - step_2_for_output_dir_example
213
- image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/zmbc/dummy_step_2_for_output_dir_example.sif
256
+ image_name: main/dummy_step_2_for_output_dir_example.sif
214
257
  script_cmd: python /dummy_step_2_for_output_dir_example.py
215
258
  outputs:
216
259
  step_2_main_output: result.parquet
260
+ default_removing_records:
261
+ steps:
262
+ - removing_records
263
+ image_name: main/default_removing_records.sif
264
+ script_cmd: python /default_removing_records.py
265
+ outputs:
266
+ dataset: dataset
267
+ default_clusters_to_links:
268
+ steps:
269
+ - clusters_to_links
270
+ image_name: main/default_clusters_to_links.sif
271
+ script_cmd: python /default_clusters_to_links.py
272
+ outputs:
273
+ known_links: result.parquet
274
+ default_determining_exclusions:
275
+ steps:
276
+ - determining_exclusions
277
+ image_name: main/default_determining_exclusions.sif
278
+ script_cmd: python /default_determining_exclusions.py
279
+ outputs:
280
+ ids_to_remove: result.parquet
281
+ default_updating_clusters:
282
+ steps:
283
+ - updating_clusters
284
+ image_name: main/default_updating_clusters.sif
285
+ script_cmd: python /default_updating_clusters.py
286
+ outputs:
287
+ clusters: clusters.parquet
288
+ dummy_canonicalizing_and_downstream_analysis:
289
+ steps:
290
+ - canonicalizing_and_downstream_analysis
291
+ image_name: main/dummy_canonicalizing_and_downstream_analysis.sif
292
+ script_cmd: python /dummy_canonicalizing_and_downstream_analysis.py
293
+ outputs:
294
+ analysis_output: result.parquet
295
+ dummy_pre-processing:
296
+ steps:
297
+ - pre-processing
298
+ image_name: main/dummy_pre-processing.sif
299
+ script_cmd: python /dummy_pre-processing.py
300
+ outputs:
301
+ dataset: dataset
302
+ default_schema_alignment:
303
+ steps:
304
+ - schema_alignment
305
+ image_name: main/default_schema_alignment.sif
306
+ script_cmd: python /default_schema_alignment.py
307
+ outputs:
308
+ records: result.parquet
309
+ splink_blocking_and_filtering:
310
+ steps:
311
+ - blocking_and_filtering
312
+ image_name: main/splink_blocking_and_filtering.sif
313
+ script_cmd: python /splink_blocking_and_filtering.py
314
+ outputs:
315
+ blocks: blocks
316
+ splink_evaluating_pairs:
317
+ steps:
318
+ - evaluating_pairs
319
+ image_name: main/splink_evaluating_pairs.sif
320
+ script_cmd: python /splink_evaluating_pairs.py
321
+ outputs:
322
+ links: result.parquet
323
+ splink_links_to_clusters:
324
+ steps:
325
+ - links_to_clusters
326
+ image_name: main/splink_links_to_clusters.sif
327
+ script_cmd: python /splink_links_to_clusters.py
328
+ outputs:
329
+ clusters: result.parquet
330
+ fastLink_evaluating_pairs:
331
+ steps:
332
+ - evaluating_pairs
333
+ image_name: main/fastLink_evaluating_pairs.sif
334
+ script_cmd: Rscript /fastLink_evaluating_pairs.R
335
+ outputs:
336
+ links: result.parquet
337
+ fastLink_links_to_clusters:
338
+ steps:
339
+ - links_to_clusters
340
+ image_name: main/fastLink_links_to_clusters.sif
341
+ script_cmd: Rscript /fastLink_links_to_clusters.R
342
+ outputs:
343
+ clusters: result.parquet
344
+ exclude_clustered:
345
+ steps:
346
+ - determining_exclusions
347
+ image_name: main/exclude_clustered.sif
348
+ script_cmd: python /exclude_clustered.py
349
+ outputs:
350
+ ids_to_remove: result.parquet
351
+ exclude_none:
352
+ steps:
353
+ - determining_exclusions
354
+ image_name: main/exclude_none.sif
355
+ script_cmd: python /exclude_none.py
356
+ outputs:
357
+ ids_to_remove: result.parquet
358
+ update_clusters_by_connected_components:
359
+ steps:
360
+ - updating_clusters
361
+ image_name: main/update_clusters_by_connected_components.sif
362
+ script_cmd: python /update_clusters_by_connected_components.py
363
+ outputs:
364
+ clusters: result.parquet
easylink/pipeline.py CHANGED
@@ -25,7 +25,6 @@ from easylink.rule import (
25
25
  )
26
26
  from easylink.utilities.general_utils import exit_with_validation_error
27
27
  from easylink.utilities.paths import SPARK_SNAKEFILE
28
- from easylink.utilities.validation_utils import validate_input_file_dummy
29
28
 
30
29
  IMPLEMENTATION_ERRORS_KEY = "IMPLEMENTATION ERRORS"
31
30
 
@@ -135,7 +134,10 @@ class Pipeline:
135
134
  """
136
135
  errors = defaultdict(dict)
137
136
  for implementation in self.pipeline_graph.implementations:
138
- implementation_errors = implementation.validate()
137
+ implementation_errors = implementation.validate(
138
+ skip_image_validation=(self.config.command == "generate_dag"),
139
+ images_dir=self.config.images_dir,
140
+ )
139
141
  if implementation_errors:
140
142
  errors[IMPLEMENTATION_ERRORS_KEY][implementation.name] = implementation_errors
141
143
  return errors
@@ -206,7 +208,14 @@ wildcard_constraints:
206
208
  The input files to the target rule (i.e. the result node) are the final
207
209
  output themselves.
208
210
  """
209
- final_output, _ = self.pipeline_graph.get_io_filepaths("results")
211
+ input_slots, _ = self.pipeline_graph.get_io_slot_attributes("results")
212
+
213
+ if len(input_slots) != 1:
214
+ raise ValueError("Results node must have only one input slot")
215
+
216
+ input_slot_name = list(input_slots.keys())[0]
217
+ input_slot_attrs = input_slots[input_slot_name]
218
+ final_output = input_slot_attrs["filepaths"]
210
219
  validator_file = str("input_validations/final_validator")
211
220
  # Snakemake resolves the DAG based on the first rule, so we put the target
212
221
  # before the validation
@@ -217,10 +226,10 @@ wildcard_constraints:
217
226
  )
218
227
  final_validation = InputValidationRule(
219
228
  name="results",
220
- input_slot_name="main_input",
229
+ input_slot_name=input_slot_name,
221
230
  input=final_output,
222
231
  output=validator_file,
223
- validator=validate_input_file_dummy,
232
+ validator=input_slot_attrs["validator"],
224
233
  )
225
234
  target_rule.write_to_snakefile(self.snakefile_path)
226
235
  final_validation.write_to_snakefile(self.snakefile_path)
@@ -322,7 +331,7 @@ use rule start_spark_worker from spark_cluster with:
322
331
  resources=resources,
323
332
  envvars=implementation.environment_variables,
324
333
  diagnostics_dir=str(diagnostics_dir),
325
- image_path=implementation.singularity_image_path,
334
+ image_path=self.config.images_dir / implementation.singularity_image_name,
326
335
  script_cmd=implementation.script_cmd,
327
336
  requires_spark=implementation.requires_spark,
328
337
  is_embarrassingly_parallel=is_embarrassingly_parallel,
@@ -14,7 +14,7 @@ from pathlib import Path
14
14
  from layered_config_tree import LayeredConfigTree
15
15
 
16
16
  from easylink.graph_components import EdgeParams, ImplementationGraph
17
- from easylink.pipeline_schema_constants import ALLOWED_SCHEMA_PARAMS
17
+ from easylink.pipeline_schema_constants import SCHEMA_PARAMS
18
18
  from easylink.step import HierarchicalStep, NonLeafConfigurationState, Step
19
19
 
20
20
 
@@ -39,7 +39,7 @@ class PipelineSchema(HierarchicalStep):
39
39
 
40
40
  Notes
41
41
  -----
42
- All ``PipelineSchema`` instances are intended to be created by the :meth:`_get_schemas`
42
+ A ``PipelineSchema`` is intended to be constructed by the :meth:`get_schema`
43
43
  class method.
44
44
 
45
45
  The ``PipelineSchema`` is a high-level abstraction; it represents the desired
@@ -159,22 +159,21 @@ class PipelineSchema(HierarchicalStep):
159
159
  )
160
160
 
161
161
  @classmethod
162
- def _get_schemas(cls) -> list["PipelineSchema"]:
162
+ def get_schema(cls, name: str = "main") -> list["PipelineSchema"]:
163
163
  """Gets all allowable ``PipelineSchemas``.
164
164
 
165
165
  These ``PipelineSchemas`` represent the fully supported pipelines and are
166
166
  used to validate the user-requested pipeline.
167
167
 
168
+ Parameters
169
+ ----------
170
+ name
171
+ The name of the ``PipelineSchema`` to get.
172
+
168
173
  Returns
169
174
  -------
170
- All allowable ``PipelineSchemas``.
175
+ The requested ``PipelineSchema``.
171
176
  """
172
- return [
173
- cls(name, nodes=nodes, edges=edges)
174
- for name, (nodes, edges) in ALLOWED_SCHEMA_PARAMS.items()
175
- ]
176
-
177
-
178
- PIPELINE_SCHEMAS = PipelineSchema._get_schemas()
179
- """All allowable :class:`PipelineSchemas<PipelineSchema>` to validate the requested
180
- pipeline against."""
177
+ if name not in SCHEMA_PARAMS:
178
+ raise ValueError(f"Pipeline schema '{name}' is not supported.")
179
+ return cls(name, *SCHEMA_PARAMS[name])
@@ -9,13 +9,12 @@ package defines the nodes and edges required to instantiate such ``PipelineSchem
9
9
 
10
10
  """
11
11
 
12
- from easylink.pipeline_schema_constants import development, testing
12
+ from easylink.pipeline_schema_constants import development, main, testing
13
13
 
14
- ALLOWED_SCHEMA_PARAMS = {
14
+ SCHEMA_PARAMS = {
15
+ "main": main.SCHEMA_PARAMS,
16
+ # development and testing
15
17
  "development": development.SCHEMA_PARAMS,
16
- }
17
-
18
- TESTING_SCHEMA_PARAMS = {
19
18
  "integration": testing.SCHEMA_PARAMS_ONE_STEP,
20
19
  "output_dir": testing.SCHEMA_PARAMS_OUTPUT_DIR,
21
20
  "combine_bad_topology": testing.SCHEMA_PARAMS_BAD_COMBINED_TOPOLOGY,