easylink 0.1.16__py3-none-any.whl → 0.1.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
easylink/_version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.16"
1
+ __version__ = "0.1.17"
@@ -135,8 +135,17 @@ class Implementation:
135
135
 
136
136
  @property
137
137
  def outputs(self) -> dict[str, list[str]]:
138
- """The expected output metadata."""
139
- return self._metadata["outputs"]
138
+ """The expected output paths. If output metadata is provided, use it. Otherwise,
139
+ assume that the output is a sub-directory with the name of the output slot.
140
+ If there is only one output slot, use '.'."""
141
+ if len(self.output_slots) == 1:
142
+ return self._metadata.get("outputs", {list(self.output_slots.keys())[0]: "."})
143
+ return {
144
+ output_slot_name: self._metadata.get("outputs", {}).get(
145
+ output_slot_name, output_slot_name
146
+ )
147
+ for output_slot_name in self.output_slots
148
+ }
140
149
 
141
150
 
142
151
  class NullImplementation:
@@ -192,3 +192,25 @@ step_1a_and_step_1b_combined_python_pandas:
192
192
  script_cmd: python /dummy_step.py
193
193
  outputs:
194
194
  step_1_main_output: result.parquet
195
+ dummy_step_1_for_output_dir_example:
196
+ steps:
197
+ - step_1_for_output_dir_example
198
+ image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/zmbc/dummy_step_1_for_output_dir_example.sif
199
+ script_cmd: python /dummy_step_1_for_output_dir_example.py
200
+ outputs:
201
+ step_1_main_output_directory: output_dir/
202
+ dummy_step_1_for_output_dir_example_default:
203
+ steps:
204
+ - step_1_for_output_dir_example
205
+ image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/zmbc/dummy_step_1_for_output_dir_example.sif
206
+ script_cmd: python /dummy_step_1_for_output_dir_example.py
207
+ # leave outputs out for testing purposes
208
+ # outputs:
209
+ # step_1_main_output_directory: output_dir/
210
+ dummy_step_2_for_output_dir_example:
211
+ steps:
212
+ - step_2_for_output_dir_example
213
+ image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/zmbc/dummy_step_2_for_output_dir_example.sif
214
+ script_cmd: python /dummy_step_2_for_output_dir_example.py
215
+ outputs:
216
+ step_2_main_output: result.parquet
@@ -17,6 +17,7 @@ ALLOWED_SCHEMA_PARAMS = {
17
17
 
18
18
  TESTING_SCHEMA_PARAMS = {
19
19
  "integration": testing.SCHEMA_PARAMS_ONE_STEP,
20
+ "output_dir": testing.SCHEMA_PARAMS_OUTPUT_DIR,
20
21
  "combine_bad_topology": testing.SCHEMA_PARAMS_BAD_COMBINED_TOPOLOGY,
21
22
  "combine_bad_implementation_names": testing.SCHEMA_PARAMS_BAD_COMBINED_TOPOLOGY,
22
23
  "nested_templated_steps": testing.SCHEMA_PARAMS_NESTED_TEMPLATED_STEPS,
@@ -26,7 +26,7 @@ from easylink.step import (
26
26
  )
27
27
  from easylink.utilities.aggregator_utils import concatenate_datasets
28
28
  from easylink.utilities.splitter_utils import split_data_in_two
29
- from easylink.utilities.validation_utils import validate_input_file_dummy
29
+ from easylink.utilities.validation_utils import validate_dir, validate_input_file_dummy
30
30
 
31
31
  NODES_ONE_STEP = [
32
32
  InputStep(),
@@ -582,3 +582,55 @@ EDGES_ONE_STEP_TWO_ISLOTS = [
582
582
  ),
583
583
  ]
584
584
  SCHEMA_PARAMS_EP_HIERARCHICAL_STEP = (NODES_EP_HIERARCHICAL_STEP, EDGES_ONE_STEP_TWO_ISLOTS)
585
+
586
+ NODES_OUTPUT_DIR = [
587
+ InputStep(),
588
+ Step(
589
+ step_name="step_1_for_output_dir_example",
590
+ input_slots=[
591
+ InputSlot(
592
+ name="step_1_main_input",
593
+ env_var="STEP_1_MAIN_INPUT_FILE_PATHS",
594
+ validator=validate_input_file_dummy,
595
+ )
596
+ ],
597
+ output_slots=[OutputSlot("step_1_main_output_directory")],
598
+ ),
599
+ Step(
600
+ step_name="step_2_for_output_dir_example",
601
+ input_slots=[
602
+ InputSlot(
603
+ name="step_2_main_input",
604
+ env_var="DUMMY_CONTAINER_MAIN_INPUT_DIR_PATH",
605
+ validator=validate_dir,
606
+ )
607
+ ],
608
+ output_slots=[OutputSlot("step_2_main_output")],
609
+ ),
610
+ OutputStep(
611
+ input_slots=[
612
+ InputSlot(name="result", env_var=None, validator=validate_input_file_dummy)
613
+ ],
614
+ ),
615
+ ]
616
+ EDGES_OUTPUT_DIR = [
617
+ EdgeParams(
618
+ source_node="input_data",
619
+ target_node="step_1_for_output_dir_example",
620
+ output_slot="all",
621
+ input_slot="step_1_main_input",
622
+ ),
623
+ EdgeParams(
624
+ source_node="step_1_for_output_dir_example",
625
+ target_node="step_2_for_output_dir_example",
626
+ output_slot="step_1_main_output_directory",
627
+ input_slot="step_2_main_input",
628
+ ),
629
+ EdgeParams(
630
+ source_node="step_2_for_output_dir_example",
631
+ target_node="results",
632
+ output_slot="step_2_main_output",
633
+ input_slot="result",
634
+ ),
635
+ ]
636
+ SCHEMA_PARAMS_OUTPUT_DIR = (NODES_OUTPUT_DIR, EDGES_OUTPUT_DIR)
easylink/rule.py CHANGED
@@ -17,6 +17,7 @@ import os
17
17
  from abc import ABC, abstractmethod
18
18
  from collections.abc import Callable
19
19
  from dataclasses import dataclass
20
+ from pathlib import Path
20
21
 
21
22
 
22
23
  class Rule(ABC):
@@ -125,6 +126,18 @@ class ImplementedRule(Rule):
125
126
  def _build_io(self) -> str:
126
127
  """Builds the input/output portion of the rule."""
127
128
  log_path_chunk_adder = "-{chunk}" if self.is_embarrassingly_parallel else ""
129
+ # Handle output files vs directories
130
+ files = [path for path in self.output if Path(path).suffix != ""]
131
+ if len(files) == len(self.output):
132
+ output = self.output
133
+ elif len(files) == 0:
134
+ if len(self.output) != 1:
135
+ raise NotImplementedError("Multiple output directories is not supported.")
136
+ output = f"directory('{self.output[0]}')"
137
+ else:
138
+ raise NotImplementedError(
139
+ "Mixed output types (files and directories) is not supported."
140
+ )
128
141
  io_str = (
129
142
  f"""
130
143
  rule:
@@ -132,7 +145,7 @@ rule:
132
145
  message: "Running {self.step_name} implementation: {self.implementation_name}" """
133
146
  + self._build_input()
134
147
  + f"""
135
- output: {self.output}
148
+ output: {output}
136
149
  log: "{self.diagnostics_dir}/{self.name}-output{log_path_chunk_adder}.log"
137
150
  container: "{self.image_path}" """
138
151
  )
@@ -0,0 +1,22 @@
1
+
2
+ Bootstrap: docker
3
+ From: python@sha256:1c26c25390307b64e8ff73e7edf34b4fbeac59d41da41c08da28dc316a721899
4
+
5
+ %files
6
+ ./dummy_step_1_for_output_dir_example.py /dummy_step_1_for_output_dir_example.py
7
+
8
+ %post
9
+ # Create directories
10
+ mkdir -p /input_data
11
+ mkdir -p /extra_implementation_specific_input_data
12
+ mkdir -p /results
13
+ mkdir -p /diagnostics
14
+
15
+ # Install Python packages with specific versions
16
+ pip install pandas==2.1.2 pyarrow
17
+
18
+ %environment
19
+ export LC_ALL=C
20
+
21
+ %runscript
22
+ python /dummy_step_1_for_output_dir_example.py '$@'
@@ -0,0 +1,18 @@
1
+ # PIPELINE_SCHEMA: output_dir
2
+ # STEP_NAME: step_1_for_output_dir_example
3
+ # REQUIREMENTS: pandas==2.1.2 pyarrow
4
+
5
+ import os
6
+ from pathlib import Path
7
+
8
+ import pandas as pd
9
+
10
+ data = pd.read_parquet(os.environ["STEP_1_MAIN_INPUT_FILE_PATHS"])
11
+
12
+ print(data)
13
+
14
+ dir_path = Path(os.environ["DUMMY_CONTAINER_OUTPUT_PATHS"])
15
+ dir_path.mkdir(parents=True, exist_ok=True)
16
+
17
+ for i in range(3):
18
+ data.to_parquet(dir_path / f"result_{i}.parquet")
@@ -0,0 +1,22 @@
1
+
2
+ Bootstrap: docker
3
+ From: python@sha256:1c26c25390307b64e8ff73e7edf34b4fbeac59d41da41c08da28dc316a721899
4
+
5
+ %files
6
+ ./dummy_step_2_for_output_dir_example.py /dummy_step_2_for_output_dir_example.py
7
+
8
+ %post
9
+ # Create directories
10
+ mkdir -p /input_data
11
+ mkdir -p /extra_implementation_specific_input_data
12
+ mkdir -p /results
13
+ mkdir -p /diagnostics
14
+
15
+ # Install Python packages with specific versions
16
+ pip install pandas==2.1.2 pyarrow
17
+
18
+ %environment
19
+ export LC_ALL=C
20
+
21
+ %runscript
22
+ python /dummy_step_2_for_output_dir_example.py '$@'
@@ -0,0 +1,22 @@
1
+ # PIPELINE_SCHEMA: output_dir
2
+ # STEP_NAME: step_2_for_output_dir_example
3
+ # REQUIREMENTS: pandas==2.1.2 pyarrow
4
+
5
+ import os
6
+ import shutil
7
+ from pathlib import Path
8
+
9
+ import pandas as pd
10
+
11
+ dir_path = Path(os.environ["DUMMY_CONTAINER_MAIN_INPUT_DIR_PATH"])
12
+ saved = False
13
+
14
+ for i, f in enumerate([f for f in dir_path.iterdir() if f.is_file()]):
15
+ if "snakemake" in str(f):
16
+ continue
17
+
18
+ if not saved:
19
+ shutil.copy(f, os.environ["DUMMY_CONTAINER_OUTPUT_PATHS"])
20
+ saved = True
21
+
22
+ print(pd.read_parquet(f))
@@ -50,3 +50,9 @@ def validate_input_file_dummy(filepath: str) -> None:
50
50
  raise LookupError(
51
51
  f"Data file {filepath} is missing required column(s) {missing_columns}"
52
52
  )
53
+
54
+
55
+ def validate_dir(filepath: str) -> None:
56
+ input_path = Path(filepath)
57
+ if not input_path.is_dir():
58
+ raise NotADirectoryError(f"The path {filepath} is not a directory.")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: easylink
3
- Version: 0.1.16
3
+ Version: 0.1.17
4
4
  Summary: Research repository for the EasyLink ER ecosystem project.
5
5
  Home-page: https://github.com/ihmeuw/easylink
6
6
  Author: The EasyLink developers
@@ -1,23 +1,23 @@
1
1
  easylink/__about__.py,sha256=2-oxCfu9t9yUJouLDwqYRZ0eii8kN25SxRzsawjWjho,440
2
2
  easylink/__init__.py,sha256=gGMcIVfiVnHtlDw5mZwhevcDb2wt-kuP6F64gnkFack,159
3
- easylink/_version.py,sha256=yF88-8vL8keLe6gCTumymw0UoMkWkSrJnzLru4zBCLQ,23
3
+ easylink/_version.py,sha256=BzIjnki8Bz3evNWo6bjGxxpLhy_tN9MRYhtM0MnDiWs,23
4
4
  easylink/cli.py,sha256=mv9l9XHojfhDK4hpDeV1E4iensgt6zx2ovkGBQ8x9xk,9745
5
5
  easylink/configuration.py,sha256=lfm8ViUpr1-O-EovTjKZbAlIht2EBv3RndN1mzYbmDE,12565
6
6
  easylink/graph_components.py,sha256=zZDZXg5smReHO3ryQC4pao24wyKXzWDe6jS3C6fM2ak,13892
7
- easylink/implementation.py,sha256=4u3QgLOrNttfU9Kd_9u_lg3in4ePoYUfO9u_udwiuh0,10878
8
- easylink/implementation_metadata.yaml,sha256=_maN5UWFZxDykYcUrDXoEKMej4jeF_rZLt3QZj72kQM,6645
7
+ easylink/implementation.py,sha256=XLSat6_IXFn-nH6X8AazmfWhDtTK4GtA7yiht9QLlQQ,11366
8
+ easylink/implementation_metadata.yaml,sha256=0BQ_NIV29EtJ6G_wGD_-2OYPjYw2sNLAgCg2GbNwcuI,7662
9
9
  easylink/pipeline.py,sha256=5KOYH5HyJjVlFoBRKGLs2hn5mpC3tPYG_ux3T1qSV9k,17504
10
10
  easylink/pipeline_graph.py,sha256=9ysX4wAkA-WkUoo15jSLAErncybE4tJwznVx7N_kwIA,23922
11
11
  easylink/pipeline_schema.py,sha256=Q2sCpsC-F2W0yxVP7ufunowDepOBrRVENXOdap9J5iY,6921
12
- easylink/rule.py,sha256=uoPj7yFFqiwvxlnhoejrZuPR3YX--y1k02uDDz3viTc,16196
12
+ easylink/rule.py,sha256=NusEUtBxx18L7UCcgDi3KKooFxSUgyS4eisVM5aPqFE,16770
13
13
  easylink/runner.py,sha256=cbCo5_NvvulmjjAaBCG6qCmbtJiHK-7NuDvbngdU_PY,6675
14
14
  easylink/step.py,sha256=u1AMPrYGNVb3ZH6uB_U0dUeJvOeQ2MoVHdlC8k63AA8,85226
15
15
  easylink/devtools/implementation_creator.py,sha256=mkiQ9nhtQC3mhxcG8IyvejzSK0WSkwplCztPLXbpXXQ,16199
16
16
  easylink/images/spark_cluster/Dockerfile,sha256=3PHotbR4jdjVYRHOJ0VQW55b5Qd4tQ1pLLQMrTKWVA0,576
17
17
  easylink/images/spark_cluster/README.md,sha256=KdgSttZRplNNWqHn4K1GTsTIab3dTOSG4V99QPLxSp8,569
18
- easylink/pipeline_schema_constants/__init__.py,sha256=HbN-NytoGuk8aTfe0Wal232UnLopFBQGe2uRjmg_igQ,1272
18
+ easylink/pipeline_schema_constants/__init__.py,sha256=FUngnh80yfpr76j18iBVKSoR8-5zxQj_mW_muUMrafw,1324
19
19
  easylink/pipeline_schema_constants/development.py,sha256=XxcYYZDZM4IADp3eFPQCchD6-OtMp99GiyZBfSswzFo,12640
20
- easylink/pipeline_schema_constants/testing.py,sha256=8vVGj7opZ9Uzj7EHGMbgXyZj3_SboIeUPB0XlZkmvrM,18901
20
+ easylink/pipeline_schema_constants/testing.py,sha256=UDmVVjI1SiDktMbJ2CrSb7amHSYNwhgqNkXhl4lYxQw,20459
21
21
  easylink/steps/dev/README.md,sha256=u9dZUggpY2Lf2qb-xkDLWWgHjcmi4osbQtzSNo4uklE,4549
22
22
  easylink/steps/dev/build-containers-local.sh,sha256=Wy3pfcyt7I-BNvHcr7ZXDe0g5Ihd00BIPqt9YuRbLeA,259
23
23
  easylink/steps/dev/build-containers-remote.sh,sha256=Hy-kaaXf-ta6n8SzOz_ahByjMY5T7J71MvzXRXDvQw8,271
@@ -36,6 +36,10 @@ easylink/steps/dev/python_pyspark/python_pyspark.def,sha256=j_RmVjspmXGOhJTr10ED
36
36
  easylink/steps/dev/r/README.md,sha256=dPjZdDTqcJsZCiwhddzlOj1ob0P7YocZUNFrLIGM1-0,1201
37
37
  easylink/steps/dev/r/dummy_step.R,sha256=1TWZY8CEkT6gavrulBxFsKbDSKJJjk0NtJrGH7TIikE,4975
38
38
  easylink/steps/dev/r/r-image.def,sha256=LrhXlt0C3k7d_VJWopRPEVARnFWSuq_oILlwo7g03bE,627
39
+ easylink/steps/output_dir/dummy_step_1_for_output_dir_example.def,sha256=CkQVG-uDRQ9spAavdkZbhx2GD_fRsKZGELPrr8yltsc,550
40
+ easylink/steps/output_dir/dummy_step_1_for_output_dir_example.py,sha256=dI0OWugE35ABLcSwsI-T3C4dvuPTKXwjE52dtSsCo8Y,428
41
+ easylink/steps/output_dir/dummy_step_2_for_output_dir_example.def,sha256=9gShg1EDJEHZcz7Z5VfZ1A4Gpm9XQes8ezn6rAZDgDM,550
42
+ easylink/steps/output_dir/dummy_step_2_for_output_dir_example.py,sha256=DMJW5TXjhELxhY4U9q2RpLjqxlS1YSosTGL2AfRnaZM,521
39
43
  easylink/utilities/__init__.py,sha256=0U33kbv4hoMfFQ_lh5hLwifxRPzOgkLkjKLYxmaK10g,196
40
44
  easylink/utilities/aggregator_utils.py,sha256=pqBog6kEX4MXBBMjQtHFlE5gEMqRWb5VFl64u0Lr__g,972
41
45
  easylink/utilities/data_utils.py,sha256=CcnM3u0_MQDQo3jMs3E4IK_rz8wAsFdJ674fZxYEFZg,4620
@@ -43,9 +47,9 @@ easylink/utilities/general_utils.py,sha256=El1W0nn4P27sRBGotNQb-9du-Gbhk9ggSuu4v
43
47
  easylink/utilities/paths.py,sha256=KM1GlnsAcKbUJrC4LZKpeJfPljxe_aXP1ZhVp43TYRA,924
44
48
  easylink/utilities/spark.smk,sha256=kGtpem7LfQc71tMh5WAYaqKnHQKFvcdhPQSdumOP70k,5799
45
49
  easylink/utilities/splitter_utils.py,sha256=UOz4hjkEPqaAz0RrDkDYYej79lLSaq0VVVSH_tF1z0o,3838
46
- easylink/utilities/validation_utils.py,sha256=W9r_RXcivJjfpioLhONirfwdByYttxNsVY489_sbrYQ,1683
47
- easylink-0.1.16.dist-info/METADATA,sha256=xkRlfeXuPHvvZXwEHaObnpu6MsOWSF6Lu-1wi7wRlJQ,3477
48
- easylink-0.1.16.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
49
- easylink-0.1.16.dist-info/entry_points.txt,sha256=OGMZDFltg3yMboT7XjJt3joiPhRfV_7jnREVtrAIQNU,51
50
- easylink-0.1.16.dist-info/top_level.txt,sha256=oHcOpcF_jDMWFiJRzfGQvuskENGDjSPC_Agu9Z_Xvik,9
51
- easylink-0.1.16.dist-info/RECORD,,
50
+ easylink/utilities/validation_utils.py,sha256=rOIeQbbXXdsuL2hI0i2gApAWfiNJXMwYH4pmw8uLrGM,1867
51
+ easylink-0.1.17.dist-info/METADATA,sha256=fl6OzaU74KHClV-dADXheUoKBMuIs8pUqLANelfaqBY,3477
52
+ easylink-0.1.17.dist-info/WHEEL,sha256=A8Eltl-h0W-qZDVezsLjjslosEH_pdYC2lQ0JcbgCzs,91
53
+ easylink-0.1.17.dist-info/entry_points.txt,sha256=OGMZDFltg3yMboT7XjJt3joiPhRfV_7jnREVtrAIQNU,51
54
+ easylink-0.1.17.dist-info/top_level.txt,sha256=oHcOpcF_jDMWFiJRzfGQvuskENGDjSPC_Agu9Z_Xvik,9
55
+ easylink-0.1.17.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.4.0)
2
+ Generator: setuptools (80.7.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5