easylink 0.1.14__py3-none-any.whl → 0.1.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
easylink/_version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.14"
1
+ __version__ = "0.1.16"
easylink/cli.py CHANGED
@@ -41,17 +41,21 @@ As before, refer to ``easylink generate-dag --help`` for information on other op
41
41
  For usage documentation, see :ref:`cli`.
42
42
  """
43
43
 
44
+ import os
44
45
  from collections.abc import Callable
46
+ from pathlib import Path
45
47
 
46
48
  import click
47
49
  from loguru import logger
48
50
 
49
51
  from easylink import runner
52
+ from easylink.devtools import implementation_creator
50
53
  from easylink.utilities.data_utils import get_results_directory
51
54
  from easylink.utilities.general_utils import (
52
55
  configure_logging_to_terminal,
53
56
  handle_exceptions,
54
57
  )
58
+ from easylink.utilities.paths import CONTAINER_DIR
55
59
 
56
60
  SHARED_OPTIONS = [
57
61
  click.option(
@@ -87,6 +91,9 @@ SHARED_OPTIONS = [
87
91
  default=False,
88
92
  help="Do not save the results in a timestamped sub-directory of ``--output-dir``.",
89
93
  ),
94
+ ]
95
+
96
+ VERBOSE_WITH_DEBUGGER_OPTIONS = [
90
97
  click.option(
91
98
  "-v", "--verbose", count=True, help="Increase logging verbosity.", hidden=True
92
99
  ),
@@ -100,12 +107,26 @@ SHARED_OPTIONS = [
100
107
  ]
101
108
 
102
109
 
110
+ def _pass_verbose_with_debugger_options(func: Callable) -> Callable:
111
+ """Passes verbosity and debugger options to a click command.
112
+
113
+ Parameters
114
+ ----------
115
+ func
116
+ The click command function to add shared options to.
117
+
118
+ Returns
119
+ -------
120
+ The click command function with the shared options added.
121
+ """
122
+ for option in VERBOSE_WITH_DEBUGGER_OPTIONS:
123
+ func = option(func)
124
+ return func
125
+
126
+
103
127
  def _pass_shared_options(func: Callable) -> Callable:
104
128
  """Passes shared options to a click command.
105
129
 
106
- This function is a decorator that takes a click command callable and adds the
107
- shared options defined in ``SHARED_OPTIONS`` to it.
108
-
109
130
  Parameters
110
131
  ----------
111
132
  func
@@ -115,7 +136,7 @@ def _pass_shared_options(func: Callable) -> Callable:
115
136
  -------
116
137
  The click command function with the shared options added.
117
138
  """
118
- for option in SHARED_OPTIONS:
139
+ for option in SHARED_OPTIONS + VERBOSE_WITH_DEBUGGER_OPTIONS:
119
140
  func = option(func)
120
141
  return func
121
142
 
@@ -204,3 +225,90 @@ def generate_dag(
204
225
  results_dir=results_dir,
205
226
  )
206
227
  logger.info("*** DAG saved to result directory ***")
228
+
229
+
230
+ #####################
231
+ # Development tools #
232
+ #####################
233
+
234
+
235
+ @click.group(hidden=True)
236
+ def devtools():
237
+ """Development tools for EasyLink."""
238
+ pass
239
+
240
+
241
+ easylink.add_command(devtools)
242
+
243
+
244
+ @devtools.command()
245
+ @_pass_verbose_with_debugger_options
246
+ @click.argument(
247
+ "scripts",
248
+ type=click.Path(exists=True, dir_okay=False, file_okay=True, resolve_path=True),
249
+ nargs=-1,
250
+ )
251
+ @click.option(
252
+ "-o",
253
+ "--output-dir",
254
+ type=click.Path(exists=False, dir_okay=True, file_okay=False, resolve_path=True),
255
+ help=(
256
+ "The directory to move the container to. If no value is passed, it will "
257
+ f"be moved to {CONTAINER_DIR} in a sub-directory named with the username."
258
+ ),
259
+ )
260
+ def create_implementation(
261
+ scripts: tuple[str, ...],
262
+ output_dir: str | None,
263
+ verbose: int,
264
+ with_debugger: bool,
265
+ ):
266
+ """Creates EasyLink implementations from implementation details.
267
+
268
+ This is a helper tool for developers to more easily create implementations
269
+ and register them with the EasyLink framework.
270
+
271
+ SCRIPTS are the filepaths to the implementation Python scripts to be run from within
272
+ a newly created container. Each script must specify (1) the name of the pipeline
273
+ step that it is implementing as well as, optionally, (2) any required pypi dependencies,
274
+ and (3) the pipeline schema that that the step the script implements is part of
275
+ (will default to "main" if not specified).
276
+
277
+ These values are to be specified in the script using comments with the exact
278
+ format shown in the example below.
279
+
280
+ # STEP_NAME: blocking
281
+
282
+ # REQUIREMENTS: pandas==2.1.2 pyarrow pyyaml
283
+
284
+ # PIPELINE_SCHEMA: development
285
+
286
+ Note that the requirements should be formatted as a single line.
287
+
288
+ If an implementation of the same name already exists, it will be overwritten
289
+ automatically and the new one registered with EasyLink.
290
+ """
291
+ if not scripts:
292
+ logger.error("No scripts provided.")
293
+ return
294
+ output_dir = Path(output_dir) if output_dir else Path(f"{CONTAINER_DIR}/{os.getlogin()}")
295
+ if not output_dir.exists():
296
+ # make the directory with rwxrwxr-x permissions
297
+ output_dir.mkdir(parents=True, mode=0o775)
298
+ if not output_dir.exists():
299
+ raise FileNotFoundError(
300
+ f"Output directory {output_dir} does not exist and could not be created."
301
+ )
302
+ configure_logging_to_terminal(verbose)
303
+ main = handle_exceptions(
304
+ func=implementation_creator.main,
305
+ exceptions_logger=logger,
306
+ with_debugger=with_debugger,
307
+ )
308
+ list_str = ""
309
+ for script in scripts:
310
+ script = Path(script)
311
+ logger.info(f"Creating implementation for {script.name}")
312
+ main(script_path=script, host=output_dir)
313
+ list_str += f" - {script.stem}\n"
314
+ logger.info("*** Implementations created ***\n" f"{list_str}")
@@ -0,0 +1,435 @@
1
+ """
2
+ An EasyLink "implementation" (related to, but not to be confused with the
3
+ :class:`~easylink.implementation.Implementation` class object) is, at the most
4
+ basic level, a container that implements some step of the pipeline as well as
5
+ other supporting information to connect said container to the EasyLink framework.
6
+
7
+ In order to create an implementation, three things are needed:
8
+ 1. The container that runs the script that implements a step of the pipeline must be created.
9
+ 2. The container must be moved to the proper hosting location so EasyLink can find it.
10
+ 3. The container must be registered with EasyLink so it can be used.
11
+
12
+ """
13
+
14
+ import shutil
15
+ import subprocess
16
+ from pathlib import Path
17
+ from typing import cast
18
+
19
+ import yaml
20
+ from loguru import logger
21
+
22
+ from easylink.pipeline_schema_constants import ALLOWED_SCHEMA_PARAMS
23
+ from easylink.step import (
24
+ ChoiceStep,
25
+ EmbarrassinglyParallelStep,
26
+ HierarchicalStep,
27
+ IOStep,
28
+ Step,
29
+ TemplatedStep,
30
+ )
31
+ from easylink.utilities.data_utils import load_yaml
32
+ from easylink.utilities.paths import IMPLEMENTATION_METADATA
33
+
34
+
35
+ def main(script_path: Path, host: Path) -> None:
36
+ """Creates a container to run a specific script and registers it with EasyLink.
37
+
38
+ Parameters
39
+ ----------
40
+ script_path
41
+ The filepath to a single script that implements a step of the pipeline.
42
+ host
43
+ The host directory to move the container to.
44
+ """
45
+ creator = ImplementationCreator(script_path, host)
46
+ creator.create_recipe()
47
+ creator.build_container()
48
+ creator.move_container()
49
+ creator.register()
50
+
51
+
52
+ class ImplementationCreator:
53
+ """A class used to create a container for a specific implementation.
54
+
55
+ Parameters
56
+ ----------
57
+ script_path
58
+ The filepath to a single script that implements a step of the pipeline.
59
+ host
60
+ The host directory to move the container to.
61
+ recipe_path
62
+ The filepath to the recipe file that will be created. It will be created
63
+ in the same directory as the script.
64
+ local_container_path
65
+ The filepath to the local container that will be created. It will be created
66
+ in the same directory as the script.
67
+ hosted_container_path
68
+ The filepath to to move the container to. This is where EasyLink will look
69
+ for the container.
70
+ implementation_name
71
+ The name of the implementation. It is by definition the name of the script.
72
+ requirements
73
+ The install requirements for the implementation (if any).
74
+ step
75
+ The name of the step that this implementation implements.
76
+ output_slot
77
+ The name of the output slot that this implementation sends results to.
78
+ """
79
+
80
+ def __init__(self, script_path: Path, host: Path) -> None:
81
+ self.script_path = script_path
82
+ """The filepath to a single script that implements a step of the pipeline."""
83
+ self.host = host
84
+ """The host directory to move the container to."""
85
+ self.recipe_path = script_path.with_suffix(".def")
86
+ """The filepath to the recipe file that will be created. It will be created
87
+ in the same directory as the script."""
88
+ self.local_container_path = script_path.with_suffix(".sif")
89
+ """The filepath to the local container that will be created. It will be created
90
+ in the same directory as the script."""
91
+ self.hosted_container_path = self.host / self.local_container_path.name
92
+ """The filepath to to move the container to. This is where EasyLink will look
93
+ for the container."""
94
+ self.implementation_name = script_path.stem
95
+ """The name of the implementation. It is by definition the name of the script."""
96
+ self.requirements = self._extract_requirements(script_path)
97
+ """The install requirements for the implementation (if any)."""
98
+ self.step = self._extract_implemented_step(script_path)
99
+ """The name of the step that this implementation implements."""
100
+ self.output_slot = self._extract_output_slot(script_path, self.step)
101
+ """The name of the output slot that this implementation sends results to."""
102
+
103
+ def create_recipe(self) -> None:
104
+ """Builds the singularity recipe and writes it to disk."""
105
+
106
+ recipe = PythonRecipe(self.script_path, self.recipe_path, self.requirements)
107
+ recipe.build()
108
+ recipe.write()
109
+ pass
110
+
111
+ def build_container(self) -> None:
112
+ """Builds the container from the recipe.
113
+
114
+ Raises
115
+ ------
116
+ subprocess.CalledProcessError
117
+ If the subprocess fails.
118
+ Exception
119
+ If the container fails to build for any reason.
120
+ """
121
+ logger.info(f"Building container for '{self.implementation_name}'")
122
+ if self.local_container_path.exists():
123
+ logger.warning(
124
+ f"Container {self.local_container_path} already exists. Overwriting it."
125
+ )
126
+
127
+ try:
128
+ cmd = [
129
+ "singularity",
130
+ "build",
131
+ "--remote",
132
+ "--force",
133
+ str(self.local_container_path),
134
+ str(self.recipe_path),
135
+ ]
136
+ process = subprocess.Popen(
137
+ cmd,
138
+ stdout=subprocess.PIPE,
139
+ stderr=subprocess.PIPE,
140
+ text=True,
141
+ cwd=self.local_container_path.parent,
142
+ )
143
+
144
+ # stream output to console
145
+ for line in process.stdout: # type: ignore[union-attr]
146
+ print(line, end="")
147
+ process.wait()
148
+
149
+ if process.returncode == 0:
150
+ logger.info(
151
+ f"Successfully built container '{self.local_container_path.name}'"
152
+ )
153
+ else:
154
+ logger.error(
155
+ f"Failed to build container '{self.local_container_path.name}'. "
156
+ f"Error: {process.returncode}"
157
+ )
158
+ raise subprocess.CalledProcessError(
159
+ process.returncode, cmd, output=process.stderr.read() # type: ignore[union-attr]
160
+ )
161
+ except Exception as e:
162
+ logger.error(
163
+ f"Failed to build container '{self.local_container_path.name}'. "
164
+ f"Error: {e}"
165
+ )
166
+ raise
167
+
168
+ def move_container(self) -> None:
169
+ """Moves the container to the proper location for EasyLink to find it."""
170
+ logger.info(f"Moving container '{self.implementation_name}' to {self.host}")
171
+ if self.hosted_container_path.exists():
172
+ logger.warning(
173
+ f"Container {self.hosted_container_path} already exists. Overwriting it."
174
+ )
175
+ shutil.move(str(self.local_container_path), str(self.hosted_container_path))
176
+
177
+ def register(self) -> None:
178
+ """Registers the container with EasyLink.
179
+
180
+ Specifically, this function adds the implementation details to the
181
+ implementation_metadata.yaml registry file.
182
+ """
183
+ logger.info(f"Registering container '{self.implementation_name}'")
184
+ info = load_yaml(IMPLEMENTATION_METADATA)
185
+ if self.implementation_name in info:
186
+ logger.warning(
187
+ f"Implementation '{self.implementation_name}' already exists in the registry. "
188
+ "Overwriting it with the latest data."
189
+ )
190
+ info[self.implementation_name] = {
191
+ "steps": [self.step],
192
+ "image_path": str(self.hosted_container_path),
193
+ "script_cmd": f"python /{self.script_path.name}",
194
+ "outputs": {
195
+ self.output_slot: "result.parquet",
196
+ },
197
+ }
198
+ self._write_metadata(info)
199
+
200
+ @staticmethod
201
+ def _extract_requirements(script_path: Path) -> str:
202
+ """Extracts the script's dependency requirements (if any).
203
+
204
+ The expectation is that any requirements are specified within the script
205
+ as a comment of the format:
206
+
207
+ .. code-block:: python
208
+ # REQUIREMENTS: pandas==2.1.2 pyarrow pyyaml
209
+
210
+ This is an optional field and only required if the script actually has dependencies.
211
+
212
+ The requirements must be specified as a single space-separated line.
213
+ """
214
+ requirements = _extract_metadata("REQUIREMENTS", script_path)
215
+ if len(requirements) == 0:
216
+ logger.info(f"No requirements found in {script_path}.")
217
+ requirements.append("")
218
+ return requirements[0]
219
+
220
+ @staticmethod
221
+ def _extract_implemented_step(script_path: Path) -> str:
222
+ """Extracts the name of the step that this script is implementing.
223
+
224
+ The expectation is that the step's name is specified within the script
225
+ as a comment of the format:
226
+
227
+ .. code-block:: python
228
+ # STEP_NAME: blocking
229
+ """
230
+ step_info = _extract_metadata("STEP_NAME", script_path)
231
+ if len(step_info) == 0:
232
+ raise ValueError(
233
+ f"Could not find a step name in {script_path}. "
234
+ "Please ensure the script contains a comment of the form '# STEP_NAME: <name>'"
235
+ )
236
+ steps = [step.strip() for step in step_info[0].split(",")]
237
+ if len(steps) > 1:
238
+ raise NotImplementedError(
239
+ f"Multiple steps are not yet supported. {script_path} is requesting "
240
+ f"to implement {steps}."
241
+ )
242
+ return steps[0]
243
+
244
+ @staticmethod
245
+ def _extract_output_slot(script_path: Path, step_name: str) -> str:
246
+ """Extracts the name of the output slot that this script is implementing."""
247
+ schema = ImplementationCreator._extract_pipeline_schema(script_path)
248
+ implementable_steps = ImplementationCreator._extract_implementable_steps(schema)
249
+ step_names = [step.name for step in implementable_steps]
250
+ if step_name not in step_names:
251
+ raise ValueError(
252
+ f"'{step_name}' does not exist as an implementable step in the '{schema}' pipeline schema. "
253
+ )
254
+ duplicates = list(set([step for step in step_names if step_names.count(step) > 1]))
255
+ if duplicates:
256
+ raise ValueError(
257
+ f"Multiple implementable steps with the same name found in the '{schema}' "
258
+ f"pipeline schema: {duplicates}."
259
+ )
260
+ implemented_step = [step for step in implementable_steps if step.name == step_name][0]
261
+ if len(implemented_step.output_slots) != 1:
262
+ raise NotImplementedError(
263
+ f"Multiple output slots are not yet supported. {script_path} is requesting "
264
+ f"to implement {step_name} with {len(implemented_step.output_slots)} output slots."
265
+ )
266
+ return list(implemented_step.output_slots)[0]
267
+
268
+ @staticmethod
269
+ def _extract_implementable_steps(schema: str) -> list[Step]:
270
+ """Extracts all implementable steps from the pipeline schema.
271
+
272
+ This method recursively traverses the pipeline schema specified in the script
273
+ to dynamically build a list of all implementable steps.
274
+ """
275
+
276
+ def _process_step(node: Step) -> None:
277
+ """Adds `step` to the `implementable_steps` list if it is implementable."""
278
+ if isinstance(node, IOStep):
279
+ return
280
+ elif isinstance(node, TemplatedStep):
281
+ _process_step(node.template_step)
282
+ return
283
+ elif isinstance(node, EmbarrassinglyParallelStep):
284
+ _process_step(node.step)
285
+ return
286
+ elif isinstance(node, ChoiceStep):
287
+ for choice_step in node.choices.values():
288
+ _process_step(cast(Step, choice_step["step"]))
289
+ return
290
+ elif isinstance(node, HierarchicalStep):
291
+ implementable_steps.append(node)
292
+ for sub_step in node.nodes:
293
+ _process_step(sub_step)
294
+ return
295
+ else: # base Step
296
+ implementable_steps.append(node)
297
+ return
298
+
299
+ schema_steps = ALLOWED_SCHEMA_PARAMS[schema][0]
300
+
301
+ implementable_steps: list[Step] = []
302
+ for schema_step in schema_steps:
303
+ _process_step(schema_step)
304
+
305
+ return implementable_steps
306
+
307
+ @staticmethod
308
+ def _extract_pipeline_schema(script_path: Path) -> str:
309
+ """Extracts the relevant pipeline schema name.
310
+
311
+ The expectation is that the output slot's name is specified within the script
312
+ as a comment of the format:
313
+
314
+ .. code-block:: python
315
+ # PIPELINE_SCHEMA: development
316
+
317
+ If no pipeline schema is specified, "main" will be used by default.
318
+ """
319
+ schema = _extract_metadata("PIPELINE_SCHEMA", script_path)
320
+ return "main" if len(schema) == 0 else schema[0]
321
+
322
+ @staticmethod
323
+ def _write_metadata(info: dict[str, dict[str, str]]) -> None:
324
+ """Writes the implementation metadata to disk.
325
+
326
+ Parameters
327
+ ----------
328
+ info
329
+ The implementation metadata to write to disk.
330
+ """
331
+ with open(IMPLEMENTATION_METADATA, "w") as f:
332
+ yaml.dump(info, f, sort_keys=False)
333
+
334
+
335
+ class PythonRecipe:
336
+ """A singularity recipe generator specific to implementations written in Python."""
337
+
338
+ BASE_IMAGE = (
339
+ "python@sha256:1c26c25390307b64e8ff73e7edf34b4fbeac59d41da41c08da28dc316a721899"
340
+ )
341
+
342
+ def __init__(self, script_path: Path, recipe_path: Path, requirements: str) -> None:
343
+ self.script_path = script_path
344
+ self.recipe_path = recipe_path
345
+ self.requirements = requirements
346
+ self.text: str | None = None
347
+
348
+ def build(self) -> None:
349
+ """Builds the recipe for the container."""
350
+ logger.info(f"Building recipe for '{self.script_path.stem}'")
351
+
352
+ script_name = self.script_path.name
353
+ self.text = f"""
354
+ Bootstrap: docker
355
+ From: {self.BASE_IMAGE}
356
+
357
+ %files
358
+ ./{script_name} /{script_name}
359
+
360
+ %post
361
+ # Create directories
362
+ mkdir -p /input_data
363
+ mkdir -p /extra_implementation_specific_input_data
364
+ mkdir -p /results
365
+ mkdir -p /diagnostics
366
+
367
+ # Install Python packages with specific versions
368
+ pip install {self.requirements}
369
+
370
+ %environment
371
+ export LC_ALL=C
372
+
373
+ %runscript
374
+ python /{script_name} '$@'"""
375
+
376
+ def write(self) -> None:
377
+ """Writes the recipe to disk.
378
+
379
+ Raises
380
+ ------
381
+ ValueError
382
+ If there is no recipe text to write a recipe from.
383
+ FileNotFoundError
384
+ If the recipe file was not written to disk.
385
+ """
386
+ logger.info(f"Writing recipe for '{self.script_path.stem}' to disk.")
387
+ if not self.text:
388
+ raise ValueError("No recipe text to build.")
389
+ if self.recipe_path.exists():
390
+ logger.warning(f"Recipe file {self.recipe_path} already exists. Overwriting it.")
391
+ with open(self.recipe_path, "w") as f:
392
+ f.write(self.text)
393
+ f.flush()
394
+ if not self.recipe_path.exists():
395
+ raise FileNotFoundError(f"Failed to write recipe to {self.recipe_path}.")
396
+
397
+
398
+ ####################
399
+ # Helper functions #
400
+ ####################
401
+
402
+
403
+ def _extract_metadata(key: str, script_path: Path) -> list[str]:
404
+ """Extracts the container metadata from the script comments.
405
+
406
+ Parameters
407
+ ----------
408
+ key
409
+ The key to search for in the script comments, e.g. "REQUIREMENTS" or "STEP_NAME".
410
+ script_path
411
+ The path to the script file.
412
+
413
+ Returns
414
+ -------
415
+ A list of metadata values found in the script comments.
416
+
417
+ Raises
418
+ ------
419
+ ValueError
420
+ If a key is found multiple times in the script.
421
+ """
422
+ metadata = []
423
+ for line in script_path.read_text().splitlines():
424
+ if key in line:
425
+ packed_line = line.replace(" ", "")
426
+ if packed_line.startswith(f"#{key}:"):
427
+ info = line.split(":")[1].strip()
428
+ metadata.append(info)
429
+
430
+ if len(metadata) > 1:
431
+ raise ValueError(
432
+ f"Found multiple {key.lower()} requests in {script_path}: {metadata}"
433
+ f"Please ensure the script contains only one comment of the form '# {key}: <request>'"
434
+ )
435
+ return metadata
@@ -1,195 +1,194 @@
1
- # development dummies
2
1
  step_1_python_pandas:
3
- steps:
4
- - step_1
2
+ steps:
3
+ - step_1
5
4
  image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pandas.sif
6
5
  script_cmd: python /dummy_step.py
7
6
  outputs:
8
7
  step_1_main_output: result.parquet
9
8
  step_1a_python_pandas:
10
- steps:
11
- - step_1a
9
+ steps:
10
+ - step_1a
12
11
  image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pandas.sif
13
12
  script_cmd: python /dummy_step.py
14
13
  env:
15
- INPUT_ENV_VARS: "DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS"
14
+ INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
16
15
  outputs:
17
16
  step_1a_main_output: result.parquet
18
17
  step_1b_python_pandas:
19
- steps:
20
- - step_1b
18
+ steps:
19
+ - step_1b
21
20
  image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pandas.sif
22
21
  script_cmd: python /dummy_step.py
23
22
  env:
24
- INPUT_ENV_VARS: "DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS"
23
+ INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
25
24
  outputs:
26
25
  step_1b_main_output: result.parquet
27
26
  step_2_python_pandas:
28
- steps:
29
- - step_2
27
+ steps:
28
+ - step_2
30
29
  image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pandas.sif
31
30
  script_cmd: python /dummy_step.py
32
31
  outputs:
33
32
  step_2_main_output: result.parquet
34
33
  step_3_python_pandas:
35
- steps:
36
- - step_3
34
+ steps:
35
+ - step_3
37
36
  image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pandas.sif
38
37
  script_cmd: python /dummy_step.py
39
38
  outputs:
40
39
  step_3_main_output: result.parquet
41
40
  step_4_python_pandas:
42
- steps:
43
- - step_4
41
+ steps:
42
+ - step_4
44
43
  image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pandas.sif
45
44
  script_cmd: python /dummy_step.py
46
45
  env:
47
- INPUT_ENV_VARS: "DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS"
46
+ INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
48
47
  outputs:
49
48
  step_4_main_output: result.parquet
50
49
  step_5_python_pandas:
51
- steps:
52
- - step_5
50
+ steps:
51
+ - step_5
53
52
  image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pandas.sif
54
53
  script_cmd: python /dummy_step.py
55
54
  env:
56
- INPUT_ENV_VARS: "DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS"
55
+ INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
57
56
  outputs:
58
57
  step_5_main_output: result.parquet
59
58
  step_6_python_pandas:
60
- steps:
61
- - step_6
59
+ steps:
60
+ - step_6
62
61
  image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pandas.sif
63
62
  script_cmd: python /dummy_step.py
64
63
  env:
65
- INPUT_ENV_VARS: "DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS"
64
+ INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
66
65
  outputs:
67
66
  step_6_main_output: result.parquet
68
67
  step_4a_python_pandas:
69
- steps:
70
- - step_4a
68
+ steps:
69
+ - step_4a
71
70
  image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pandas.sif
72
71
  script_cmd: python /dummy_step.py
73
72
  env:
74
- INPUT_ENV_VARS: "DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS"
73
+ INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
75
74
  outputs:
76
75
  step_4a_main_output: result.parquet
77
76
  step_4b_python_pandas:
78
- steps:
79
- - step_4b
77
+ steps:
78
+ - step_4b
80
79
  image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pandas.sif
81
80
  script_cmd: python /dummy_step.py
82
81
  env:
83
- INPUT_ENV_VARS: "DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS"
82
+ INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
84
83
  outputs:
85
84
  step_4b_main_output: result.parquet
86
85
  step_4b_r:
87
- steps:
88
- - step_4b
86
+ steps:
87
+ - step_4b
89
88
  image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/r-image.sif
90
89
  script_cmd: Rscript /dummy_step.R
91
90
  env:
92
- INPUT_ENV_VARS: "DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS"
91
+ INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
93
92
  outputs:
94
93
  step_4b_main_output: result.parquet
95
94
  step_1_python_pyspark:
96
- steps:
97
- - step_1
95
+ steps:
96
+ - step_1
98
97
  image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pyspark.sif
99
98
  script_cmd: python3 /code/dummy_step.py
100
99
  outputs:
101
100
  step_1_main_output: result.parquet
102
101
  requires_spark: true
103
102
  step_2_python_pyspark:
104
- steps:
105
- - step_2
103
+ steps:
104
+ - step_2
106
105
  image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pyspark.sif
107
106
  script_cmd: python3 /code/dummy_step.py
108
107
  outputs:
109
- step_2_main_output: result.parquet
108
+ step_2_main_output: result.parquet
110
109
  requires_spark: true
111
110
  step_3_python_pyspark:
112
- steps:
113
- - step_3
111
+ steps:
112
+ - step_3
114
113
  image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pyspark.sif
115
114
  script_cmd: python3 /code/dummy_step.py
116
115
  outputs:
117
116
  step_3_main_output: result.parquet
118
117
  requires_spark: true
119
118
  step_4_python_pyspark:
120
- steps:
121
- - step_4
119
+ steps:
120
+ - step_4
122
121
  image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pyspark.sif
123
- script_cmd: python3 /code/dummy_step.py
122
+ script_cmd: python3 /code/dummy_step.py
124
123
  env:
125
- INPUT_ENV_VARS: "DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS"
124
+ INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
126
125
  outputs:
127
126
  step_4_main_output: result.parquet
128
127
  step_1_r:
129
- steps:
130
- - step_1
128
+ steps:
129
+ - step_1
131
130
  image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/r-image.sif
132
131
  script_cmd: Rscript /dummy_step.R
133
132
  outputs:
134
133
  step_1_main_output: result.parquet
135
134
  requires_spark: false
136
135
  step_2_r:
137
- steps:
138
- - step_2
136
+ steps:
137
+ - step_2
139
138
  image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/r-image.sif
140
139
  script_cmd: Rscript /dummy_step.R
141
140
  outputs:
142
141
  step_2_main_output: result.parquet
143
142
  requires_spark: false
144
143
  step_3_r:
145
- steps:
146
- - step_3
144
+ steps:
145
+ - step_3
147
146
  image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/r-image.sif
148
147
  script_cmd: Rscript /dummy_step.R
149
148
  outputs:
150
149
  step_3_main_output: result.parquet
151
150
  requires_spark: false
152
151
  step_4_r:
153
- steps:
154
- - step_4
152
+ steps:
153
+ - step_4
155
154
  image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/r-image.sif
156
155
  script_cmd: Rscript /dummy_step.R
157
156
  env:
158
- INPUT_ENV_VARS: "DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS"
157
+ INPUT_ENV_VARS: DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS
159
158
  outputs:
160
159
  step_4_main_output: result.parquet
161
160
  requires_spark: false
162
161
  step_1_and_step_2_combined_python_pandas:
163
162
  steps:
164
- - step_1
165
- - step_2
163
+ - step_1
164
+ - step_2
166
165
  image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pandas.sif
167
166
  script_cmd: python /dummy_step.py
168
167
  outputs:
169
168
  step_2_main_output: result.parquet
170
169
  step_1_and_step_2_parallel_python_pandas:
171
170
  steps:
172
- - step_1
173
- - step_2
171
+ - step_1
172
+ - step_2
174
173
  image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pandas.sif
175
174
  script_cmd: python /dummy_step.py
176
175
  env:
177
- INPUT_ENV_VARS: "STEP_1_DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,STEP_2_DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS"
176
+ INPUT_ENV_VARS: STEP_1_DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS,STEP_2_DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS
178
177
  outputs:
179
178
  step_2_main_output: result.parquet
180
179
  step_3_and_step_4_combined_python_pandas:
181
180
  steps:
182
- - step_3
183
- - step_4
181
+ - step_3
182
+ - step_4
184
183
  image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pandas.sif
185
184
  script_cmd: python /dummy_step.py
186
185
  outputs:
187
186
  step_4_main_output: result.parquet
188
187
  step_1a_and_step_1b_combined_python_pandas:
189
188
  steps:
190
- - step_1a
191
- - step_1b
189
+ - step_1a
190
+ - step_1b
192
191
  image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/python_pandas.sif
193
192
  script_cmd: python /dummy_step.py
194
193
  outputs:
195
- step_1_main_output: result.parquet
194
+ step_1_main_output: result.parquet
@@ -208,6 +208,14 @@ NODES = [
208
208
  "complex": {
209
209
  "step": HierarchicalStep(
210
210
  step_name="step_5_and_6",
211
+ input_slots=[
212
+ InputSlot(
213
+ name="step_5_and_6_main_input",
214
+ env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
215
+ validator=validate_input_file_dummy,
216
+ ),
217
+ ],
218
+ output_slots=[OutputSlot("step_5_and_6_main_output")],
211
219
  nodes=[
212
220
  Step(
213
221
  step_name="step_5",
@@ -240,19 +248,33 @@ NODES = [
240
248
  input_slot="step_6_main_input",
241
249
  ),
242
250
  ],
251
+ input_slot_mappings=[
252
+ InputSlotMapping(
253
+ parent_slot="step_5_and_6_main_input",
254
+ child_node="step_5",
255
+ child_slot="step_5_main_input",
256
+ ),
257
+ ],
258
+ output_slot_mappings=[
259
+ OutputSlotMapping(
260
+ parent_slot="step_5_and_6_main_output",
261
+ child_node="step_6",
262
+ child_slot="step_6_main_output",
263
+ ),
264
+ ],
243
265
  ),
244
266
  "input_slot_mappings": [
245
267
  InputSlotMapping(
246
268
  parent_slot="choice_section_main_input",
247
- child_node="step_5",
248
- child_slot="step_5_main_input",
269
+ child_node="step_5_and_6",
270
+ child_slot="step_5_and_6_main_input",
249
271
  ),
250
272
  ],
251
273
  "output_slot_mappings": [
252
274
  OutputSlotMapping(
253
275
  parent_slot="choice_section_main_output",
254
- child_node="step_6",
255
- child_slot="step_6_main_output",
276
+ child_node="step_5_and_6",
277
+ child_slot="step_5_and_6_main_output",
256
278
  ),
257
279
  ],
258
280
  },
@@ -70,7 +70,7 @@ rule wait_for_spark_master:
70
70
  while true; do
71
71
 
72
72
  if [[ -e {params.spark_master_log_file} ]]; then
73
- found=`grep -o "\(spark://.*$\)" {params.spark_master_log_file} || true`
73
+ found=`grep -o "\\(spark://.*$\\)" {params.spark_master_log_file} || true`
74
74
 
75
75
  if [[ ! -z $found ]]; then
76
76
  echo "Spark master URL found: $found"
@@ -178,7 +178,7 @@ rule wait_for_spark_worker:
178
178
  while true; do
179
179
 
180
180
  if [[ -e {params.spark_worker_log_file} ]]; then
181
- found=`grep -o "\(Worker: Successfully registered with master $MASTER_URL\)" {params.spark_worker_log_file} || true`
181
+ found=`grep -o "\\(Worker: Successfully registered with master $MASTER_URL\\)" {params.spark_worker_log_file} || true`
182
182
 
183
183
  if [[ ! -z $found ]]; then
184
184
  echo "Spark Worker {wildcards.scatteritem} registered successfully"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: easylink
3
- Version: 0.1.14
3
+ Version: 0.1.16
4
4
  Summary: Research repository for the EasyLink ER ecosystem project.
5
5
  Home-page: https://github.com/ihmeuw/easylink
6
6
  Author: The EasyLink developers
@@ -21,6 +21,7 @@ Requires-Dist: snakemake-interface-executor-plugins<9.0.0
21
21
  Requires-Dist: snakemake-executor-plugin-slurm
22
22
  Requires-Dist: pandas-stubs
23
23
  Requires-Dist: pyarrow-stubs
24
+ Requires-Dist: types-PyYAML
24
25
  Provides-Extra: docs
25
26
  Requires-Dist: sphinx<8.2.0; extra == "docs"
26
27
  Requires-Dist: sphinx-rtd-theme; extra == "docs"
@@ -78,15 +79,16 @@ There are a few things to install in order to use this package:
78
79
  - Install singularity.
79
80
 
80
81
  You may need to request it from your system admin.
81
- Refer to https://docs.sylabs.io/guides/4.1/admin-guide/installation.html.
82
- You can check if you already have singularity installed by running the command ``singularity --version``. For an
83
- existing installation, your singularity version number is printed.
82
+ Refer to https://docs.sylabs.io/guides/4.1/admin-guide/installation.html.
83
+ You can check if you already have singularity installed by running the command
84
+ ``singularity --version``. For an existing installation, your singularity version
85
+ number is printed.
84
86
 
85
87
  - Install conda.
86
88
 
87
- We recommend `miniforge <https://github.com/conda-forge/miniforge>`_. You can check if you already
88
- have conda installed by running the command ``conda --version``. For an existing installation, a version
89
- will be displayed.
89
+ We recommend `miniforge <https://github.com/conda-forge/miniforge>`_. You can
90
+ check if you already have conda installed by running the command ``conda --version``.
91
+ For an existing installation, a version will be displayed.
90
92
 
91
93
  - Install easylink, python and graphviz in a conda environment.
92
94
 
@@ -1,21 +1,22 @@
1
1
  easylink/__about__.py,sha256=2-oxCfu9t9yUJouLDwqYRZ0eii8kN25SxRzsawjWjho,440
2
2
  easylink/__init__.py,sha256=gGMcIVfiVnHtlDw5mZwhevcDb2wt-kuP6F64gnkFack,159
3
- easylink/_version.py,sha256=PIBqEOI-nqKFL9oJAWQQwlHuujG9Cd7EmdxDrThNQto,23
4
- easylink/cli.py,sha256=ARSKAljepNOEYd1VCS_QqBJQIBLzE3IgKiOb5-OROdY,6380
3
+ easylink/_version.py,sha256=yF88-8vL8keLe6gCTumymw0UoMkWkSrJnzLru4zBCLQ,23
4
+ easylink/cli.py,sha256=mv9l9XHojfhDK4hpDeV1E4iensgt6zx2ovkGBQ8x9xk,9745
5
5
  easylink/configuration.py,sha256=lfm8ViUpr1-O-EovTjKZbAlIht2EBv3RndN1mzYbmDE,12565
6
6
  easylink/graph_components.py,sha256=zZDZXg5smReHO3ryQC4pao24wyKXzWDe6jS3C6fM2ak,13892
7
7
  easylink/implementation.py,sha256=4u3QgLOrNttfU9Kd_9u_lg3in4ePoYUfO9u_udwiuh0,10878
8
- easylink/implementation_metadata.yaml,sha256=VvlEu3Dvlmeh1MpzeYx91j22GiV-9mu3hZP5yVuW04o,6763
8
+ easylink/implementation_metadata.yaml,sha256=_maN5UWFZxDykYcUrDXoEKMej4jeF_rZLt3QZj72kQM,6645
9
9
  easylink/pipeline.py,sha256=5KOYH5HyJjVlFoBRKGLs2hn5mpC3tPYG_ux3T1qSV9k,17504
10
10
  easylink/pipeline_graph.py,sha256=9ysX4wAkA-WkUoo15jSLAErncybE4tJwznVx7N_kwIA,23922
11
11
  easylink/pipeline_schema.py,sha256=Q2sCpsC-F2W0yxVP7ufunowDepOBrRVENXOdap9J5iY,6921
12
12
  easylink/rule.py,sha256=uoPj7yFFqiwvxlnhoejrZuPR3YX--y1k02uDDz3viTc,16196
13
13
  easylink/runner.py,sha256=cbCo5_NvvulmjjAaBCG6qCmbtJiHK-7NuDvbngdU_PY,6675
14
14
  easylink/step.py,sha256=u1AMPrYGNVb3ZH6uB_U0dUeJvOeQ2MoVHdlC8k63AA8,85226
15
+ easylink/devtools/implementation_creator.py,sha256=mkiQ9nhtQC3mhxcG8IyvejzSK0WSkwplCztPLXbpXXQ,16199
15
16
  easylink/images/spark_cluster/Dockerfile,sha256=3PHotbR4jdjVYRHOJ0VQW55b5Qd4tQ1pLLQMrTKWVA0,576
16
17
  easylink/images/spark_cluster/README.md,sha256=KdgSttZRplNNWqHn4K1GTsTIab3dTOSG4V99QPLxSp8,569
17
18
  easylink/pipeline_schema_constants/__init__.py,sha256=HbN-NytoGuk8aTfe0Wal232UnLopFBQGe2uRjmg_igQ,1272
18
- easylink/pipeline_schema_constants/development.py,sha256=yRzkCiBqF_Jv3Y0GNvswVAWeZfKJRXk8Y8Q9ZhwCg_A,11596
19
+ easylink/pipeline_schema_constants/development.py,sha256=XxcYYZDZM4IADp3eFPQCchD6-OtMp99GiyZBfSswzFo,12640
19
20
  easylink/pipeline_schema_constants/testing.py,sha256=8vVGj7opZ9Uzj7EHGMbgXyZj3_SboIeUPB0XlZkmvrM,18901
20
21
  easylink/steps/dev/README.md,sha256=u9dZUggpY2Lf2qb-xkDLWWgHjcmi4osbQtzSNo4uklE,4549
21
22
  easylink/steps/dev/build-containers-local.sh,sha256=Wy3pfcyt7I-BNvHcr7ZXDe0g5Ihd00BIPqt9YuRbLeA,259
@@ -40,11 +41,11 @@ easylink/utilities/aggregator_utils.py,sha256=pqBog6kEX4MXBBMjQtHFlE5gEMqRWb5VFl
40
41
  easylink/utilities/data_utils.py,sha256=CcnM3u0_MQDQo3jMs3E4IK_rz8wAsFdJ674fZxYEFZg,4620
41
42
  easylink/utilities/general_utils.py,sha256=El1W0nn4P27sRBGotNQb-9du-Gbhk9ggSuu4vmGDfwo,4591
42
43
  easylink/utilities/paths.py,sha256=KM1GlnsAcKbUJrC4LZKpeJfPljxe_aXP1ZhVp43TYRA,924
43
- easylink/utilities/spark.smk,sha256=tQ7RArNQzhjbaBQQcRORB4IxxkuDx4gPHUBcWHDYJ_U,5795
44
+ easylink/utilities/spark.smk,sha256=kGtpem7LfQc71tMh5WAYaqKnHQKFvcdhPQSdumOP70k,5799
44
45
  easylink/utilities/splitter_utils.py,sha256=UOz4hjkEPqaAz0RrDkDYYej79lLSaq0VVVSH_tF1z0o,3838
45
46
  easylink/utilities/validation_utils.py,sha256=W9r_RXcivJjfpioLhONirfwdByYttxNsVY489_sbrYQ,1683
46
- easylink-0.1.14.dist-info/METADATA,sha256=zJQ4KHmrAf6NoEW_MMbqj9RmpuBav6IggUTxnXFfezg,3449
47
- easylink-0.1.14.dist-info/WHEEL,sha256=wXxTzcEDnjrTwFYjLPcsW_7_XihufBwmpiBeiXNBGEA,91
48
- easylink-0.1.14.dist-info/entry_points.txt,sha256=OGMZDFltg3yMboT7XjJt3joiPhRfV_7jnREVtrAIQNU,51
49
- easylink-0.1.14.dist-info/top_level.txt,sha256=oHcOpcF_jDMWFiJRzfGQvuskENGDjSPC_Agu9Z_Xvik,9
50
- easylink-0.1.14.dist-info/RECORD,,
47
+ easylink-0.1.16.dist-info/METADATA,sha256=xkRlfeXuPHvvZXwEHaObnpu6MsOWSF6Lu-1wi7wRlJQ,3477
48
+ easylink-0.1.16.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
49
+ easylink-0.1.16.dist-info/entry_points.txt,sha256=OGMZDFltg3yMboT7XjJt3joiPhRfV_7jnREVtrAIQNU,51
50
+ easylink-0.1.16.dist-info/top_level.txt,sha256=oHcOpcF_jDMWFiJRzfGQvuskENGDjSPC_Agu9Z_Xvik,9
51
+ easylink-0.1.16.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.1.0)
2
+ Generator: setuptools (80.4.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5