easylink 0.1.16__py3-none-any.whl → 0.1.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- easylink/_version.py +1 -1
- easylink/implementation.py +11 -2
- easylink/implementation_metadata.yaml +22 -0
- easylink/pipeline_schema_constants/__init__.py +1 -0
- easylink/pipeline_schema_constants/testing.py +53 -1
- easylink/rule.py +14 -1
- easylink/steps/output_dir/dummy_step_1_for_output_dir_example.def +22 -0
- easylink/steps/output_dir/dummy_step_1_for_output_dir_example.py +18 -0
- easylink/steps/output_dir/dummy_step_2_for_output_dir_example.def +22 -0
- easylink/steps/output_dir/dummy_step_2_for_output_dir_example.py +22 -0
- easylink/utilities/validation_utils.py +6 -0
- {easylink-0.1.16.dist-info → easylink-0.1.17.dist-info}/METADATA +1 -1
- {easylink-0.1.16.dist-info → easylink-0.1.17.dist-info}/RECORD +16 -12
- {easylink-0.1.16.dist-info → easylink-0.1.17.dist-info}/WHEEL +1 -1
- {easylink-0.1.16.dist-info → easylink-0.1.17.dist-info}/entry_points.txt +0 -0
- {easylink-0.1.16.dist-info → easylink-0.1.17.dist-info}/top_level.txt +0 -0
easylink/_version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.1.
|
1
|
+
__version__ = "0.1.17"
|
easylink/implementation.py
CHANGED
@@ -135,8 +135,17 @@ class Implementation:
|
|
135
135
|
|
136
136
|
@property
|
137
137
|
def outputs(self) -> dict[str, list[str]]:
|
138
|
-
"""The expected output metadata.
|
139
|
-
|
138
|
+
"""The expected output paths. If output metadata is provided, use it. Otherwise,
|
139
|
+
assume that the output is a sub-directory with the name of the output slot.
|
140
|
+
If there is only one output slot, use '.'."""
|
141
|
+
if len(self.output_slots) == 1:
|
142
|
+
return self._metadata.get("outputs", {list(self.output_slots.keys())[0]: "."})
|
143
|
+
return {
|
144
|
+
output_slot_name: self._metadata.get("outputs", {}).get(
|
145
|
+
output_slot_name, output_slot_name
|
146
|
+
)
|
147
|
+
for output_slot_name in self.output_slots
|
148
|
+
}
|
140
149
|
|
141
150
|
|
142
151
|
class NullImplementation:
|
@@ -192,3 +192,25 @@ step_1a_and_step_1b_combined_python_pandas:
|
|
192
192
|
script_cmd: python /dummy_step.py
|
193
193
|
outputs:
|
194
194
|
step_1_main_output: result.parquet
|
195
|
+
dummy_step_1_for_output_dir_example:
|
196
|
+
steps:
|
197
|
+
- step_1_for_output_dir_example
|
198
|
+
image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/zmbc/dummy_step_1_for_output_dir_example.sif
|
199
|
+
script_cmd: python /dummy_step_1_for_output_dir_example.py
|
200
|
+
outputs:
|
201
|
+
step_1_main_output_directory: output_dir/
|
202
|
+
dummy_step_1_for_output_dir_example_default:
|
203
|
+
steps:
|
204
|
+
- step_1_for_output_dir_example
|
205
|
+
image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/zmbc/dummy_step_1_for_output_dir_example.sif
|
206
|
+
script_cmd: python /dummy_step_1_for_output_dir_example.py
|
207
|
+
# leave outputs out for testing purposes
|
208
|
+
# outputs:
|
209
|
+
# step_1_main_output_directory: output_dir/
|
210
|
+
dummy_step_2_for_output_dir_example:
|
211
|
+
steps:
|
212
|
+
- step_2_for_output_dir_example
|
213
|
+
image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/zmbc/dummy_step_2_for_output_dir_example.sif
|
214
|
+
script_cmd: python /dummy_step_2_for_output_dir_example.py
|
215
|
+
outputs:
|
216
|
+
step_2_main_output: result.parquet
|
@@ -17,6 +17,7 @@ ALLOWED_SCHEMA_PARAMS = {
|
|
17
17
|
|
18
18
|
TESTING_SCHEMA_PARAMS = {
|
19
19
|
"integration": testing.SCHEMA_PARAMS_ONE_STEP,
|
20
|
+
"output_dir": testing.SCHEMA_PARAMS_OUTPUT_DIR,
|
20
21
|
"combine_bad_topology": testing.SCHEMA_PARAMS_BAD_COMBINED_TOPOLOGY,
|
21
22
|
"combine_bad_implementation_names": testing.SCHEMA_PARAMS_BAD_COMBINED_TOPOLOGY,
|
22
23
|
"nested_templated_steps": testing.SCHEMA_PARAMS_NESTED_TEMPLATED_STEPS,
|
@@ -26,7 +26,7 @@ from easylink.step import (
|
|
26
26
|
)
|
27
27
|
from easylink.utilities.aggregator_utils import concatenate_datasets
|
28
28
|
from easylink.utilities.splitter_utils import split_data_in_two
|
29
|
-
from easylink.utilities.validation_utils import validate_input_file_dummy
|
29
|
+
from easylink.utilities.validation_utils import validate_dir, validate_input_file_dummy
|
30
30
|
|
31
31
|
NODES_ONE_STEP = [
|
32
32
|
InputStep(),
|
@@ -582,3 +582,55 @@ EDGES_ONE_STEP_TWO_ISLOTS = [
|
|
582
582
|
),
|
583
583
|
]
|
584
584
|
SCHEMA_PARAMS_EP_HIERARCHICAL_STEP = (NODES_EP_HIERARCHICAL_STEP, EDGES_ONE_STEP_TWO_ISLOTS)
|
585
|
+
|
586
|
+
NODES_OUTPUT_DIR = [
|
587
|
+
InputStep(),
|
588
|
+
Step(
|
589
|
+
step_name="step_1_for_output_dir_example",
|
590
|
+
input_slots=[
|
591
|
+
InputSlot(
|
592
|
+
name="step_1_main_input",
|
593
|
+
env_var="STEP_1_MAIN_INPUT_FILE_PATHS",
|
594
|
+
validator=validate_input_file_dummy,
|
595
|
+
)
|
596
|
+
],
|
597
|
+
output_slots=[OutputSlot("step_1_main_output_directory")],
|
598
|
+
),
|
599
|
+
Step(
|
600
|
+
step_name="step_2_for_output_dir_example",
|
601
|
+
input_slots=[
|
602
|
+
InputSlot(
|
603
|
+
name="step_2_main_input",
|
604
|
+
env_var="DUMMY_CONTAINER_MAIN_INPUT_DIR_PATH",
|
605
|
+
validator=validate_dir,
|
606
|
+
)
|
607
|
+
],
|
608
|
+
output_slots=[OutputSlot("step_2_main_output")],
|
609
|
+
),
|
610
|
+
OutputStep(
|
611
|
+
input_slots=[
|
612
|
+
InputSlot(name="result", env_var=None, validator=validate_input_file_dummy)
|
613
|
+
],
|
614
|
+
),
|
615
|
+
]
|
616
|
+
EDGES_OUTPUT_DIR = [
|
617
|
+
EdgeParams(
|
618
|
+
source_node="input_data",
|
619
|
+
target_node="step_1_for_output_dir_example",
|
620
|
+
output_slot="all",
|
621
|
+
input_slot="step_1_main_input",
|
622
|
+
),
|
623
|
+
EdgeParams(
|
624
|
+
source_node="step_1_for_output_dir_example",
|
625
|
+
target_node="step_2_for_output_dir_example",
|
626
|
+
output_slot="step_1_main_output_directory",
|
627
|
+
input_slot="step_2_main_input",
|
628
|
+
),
|
629
|
+
EdgeParams(
|
630
|
+
source_node="step_2_for_output_dir_example",
|
631
|
+
target_node="results",
|
632
|
+
output_slot="step_2_main_output",
|
633
|
+
input_slot="result",
|
634
|
+
),
|
635
|
+
]
|
636
|
+
SCHEMA_PARAMS_OUTPUT_DIR = (NODES_OUTPUT_DIR, EDGES_OUTPUT_DIR)
|
easylink/rule.py
CHANGED
@@ -17,6 +17,7 @@ import os
|
|
17
17
|
from abc import ABC, abstractmethod
|
18
18
|
from collections.abc import Callable
|
19
19
|
from dataclasses import dataclass
|
20
|
+
from pathlib import Path
|
20
21
|
|
21
22
|
|
22
23
|
class Rule(ABC):
|
@@ -125,6 +126,18 @@ class ImplementedRule(Rule):
|
|
125
126
|
def _build_io(self) -> str:
|
126
127
|
"""Builds the input/output portion of the rule."""
|
127
128
|
log_path_chunk_adder = "-{chunk}" if self.is_embarrassingly_parallel else ""
|
129
|
+
# Handle output files vs directories
|
130
|
+
files = [path for path in self.output if Path(path).suffix != ""]
|
131
|
+
if len(files) == len(self.output):
|
132
|
+
output = self.output
|
133
|
+
elif len(files) == 0:
|
134
|
+
if len(self.output) != 1:
|
135
|
+
raise NotImplementedError("Multiple output directories is not supported.")
|
136
|
+
output = f"directory('{self.output[0]}')"
|
137
|
+
else:
|
138
|
+
raise NotImplementedError(
|
139
|
+
"Mixed output types (files and directories) is not supported."
|
140
|
+
)
|
128
141
|
io_str = (
|
129
142
|
f"""
|
130
143
|
rule:
|
@@ -132,7 +145,7 @@ rule:
|
|
132
145
|
message: "Running {self.step_name} implementation: {self.implementation_name}" """
|
133
146
|
+ self._build_input()
|
134
147
|
+ f"""
|
135
|
-
output: {
|
148
|
+
output: {output}
|
136
149
|
log: "{self.diagnostics_dir}/{self.name}-output{log_path_chunk_adder}.log"
|
137
150
|
container: "{self.image_path}" """
|
138
151
|
)
|
@@ -0,0 +1,22 @@
|
|
1
|
+
|
2
|
+
Bootstrap: docker
|
3
|
+
From: python@sha256:1c26c25390307b64e8ff73e7edf34b4fbeac59d41da41c08da28dc316a721899
|
4
|
+
|
5
|
+
%files
|
6
|
+
./dummy_step_1_for_output_dir_example.py /dummy_step_1_for_output_dir_example.py
|
7
|
+
|
8
|
+
%post
|
9
|
+
# Create directories
|
10
|
+
mkdir -p /input_data
|
11
|
+
mkdir -p /extra_implementation_specific_input_data
|
12
|
+
mkdir -p /results
|
13
|
+
mkdir -p /diagnostics
|
14
|
+
|
15
|
+
# Install Python packages with specific versions
|
16
|
+
pip install pandas==2.1.2 pyarrow
|
17
|
+
|
18
|
+
%environment
|
19
|
+
export LC_ALL=C
|
20
|
+
|
21
|
+
%runscript
|
22
|
+
python /dummy_step_1_for_output_dir_example.py '$@'
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# PIPELINE_SCHEMA: output_dir
|
2
|
+
# STEP_NAME: step_1_for_output_dir_example
|
3
|
+
# REQUIREMENTS: pandas==2.1.2 pyarrow
|
4
|
+
|
5
|
+
import os
|
6
|
+
from pathlib import Path
|
7
|
+
|
8
|
+
import pandas as pd
|
9
|
+
|
10
|
+
data = pd.read_parquet(os.environ["STEP_1_MAIN_INPUT_FILE_PATHS"])
|
11
|
+
|
12
|
+
print(data)
|
13
|
+
|
14
|
+
dir_path = Path(os.environ["DUMMY_CONTAINER_OUTPUT_PATHS"])
|
15
|
+
dir_path.mkdir(parents=True, exist_ok=True)
|
16
|
+
|
17
|
+
for i in range(3):
|
18
|
+
data.to_parquet(dir_path / f"result_{i}.parquet")
|
@@ -0,0 +1,22 @@
|
|
1
|
+
|
2
|
+
Bootstrap: docker
|
3
|
+
From: python@sha256:1c26c25390307b64e8ff73e7edf34b4fbeac59d41da41c08da28dc316a721899
|
4
|
+
|
5
|
+
%files
|
6
|
+
./dummy_step_2_for_output_dir_example.py /dummy_step_2_for_output_dir_example.py
|
7
|
+
|
8
|
+
%post
|
9
|
+
# Create directories
|
10
|
+
mkdir -p /input_data
|
11
|
+
mkdir -p /extra_implementation_specific_input_data
|
12
|
+
mkdir -p /results
|
13
|
+
mkdir -p /diagnostics
|
14
|
+
|
15
|
+
# Install Python packages with specific versions
|
16
|
+
pip install pandas==2.1.2 pyarrow
|
17
|
+
|
18
|
+
%environment
|
19
|
+
export LC_ALL=C
|
20
|
+
|
21
|
+
%runscript
|
22
|
+
python /dummy_step_2_for_output_dir_example.py '$@'
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# PIPELINE_SCHEMA: output_dir
|
2
|
+
# STEP_NAME: step_2_for_output_dir_example
|
3
|
+
# REQUIREMENTS: pandas==2.1.2 pyarrow
|
4
|
+
|
5
|
+
import os
|
6
|
+
import shutil
|
7
|
+
from pathlib import Path
|
8
|
+
|
9
|
+
import pandas as pd
|
10
|
+
|
11
|
+
dir_path = Path(os.environ["DUMMY_CONTAINER_MAIN_INPUT_DIR_PATH"])
|
12
|
+
saved = False
|
13
|
+
|
14
|
+
for i, f in enumerate([f for f in dir_path.iterdir() if f.is_file()]):
|
15
|
+
if "snakemake" in str(f):
|
16
|
+
continue
|
17
|
+
|
18
|
+
if not saved:
|
19
|
+
shutil.copy(f, os.environ["DUMMY_CONTAINER_OUTPUT_PATHS"])
|
20
|
+
saved = True
|
21
|
+
|
22
|
+
print(pd.read_parquet(f))
|
@@ -50,3 +50,9 @@ def validate_input_file_dummy(filepath: str) -> None:
|
|
50
50
|
raise LookupError(
|
51
51
|
f"Data file {filepath} is missing required column(s) {missing_columns}"
|
52
52
|
)
|
53
|
+
|
54
|
+
|
55
|
+
def validate_dir(filepath: str) -> None:
|
56
|
+
input_path = Path(filepath)
|
57
|
+
if not input_path.is_dir():
|
58
|
+
raise NotADirectoryError(f"The path {filepath} is not a directory.")
|
@@ -1,23 +1,23 @@
|
|
1
1
|
easylink/__about__.py,sha256=2-oxCfu9t9yUJouLDwqYRZ0eii8kN25SxRzsawjWjho,440
|
2
2
|
easylink/__init__.py,sha256=gGMcIVfiVnHtlDw5mZwhevcDb2wt-kuP6F64gnkFack,159
|
3
|
-
easylink/_version.py,sha256=
|
3
|
+
easylink/_version.py,sha256=BzIjnki8Bz3evNWo6bjGxxpLhy_tN9MRYhtM0MnDiWs,23
|
4
4
|
easylink/cli.py,sha256=mv9l9XHojfhDK4hpDeV1E4iensgt6zx2ovkGBQ8x9xk,9745
|
5
5
|
easylink/configuration.py,sha256=lfm8ViUpr1-O-EovTjKZbAlIht2EBv3RndN1mzYbmDE,12565
|
6
6
|
easylink/graph_components.py,sha256=zZDZXg5smReHO3ryQC4pao24wyKXzWDe6jS3C6fM2ak,13892
|
7
|
-
easylink/implementation.py,sha256=
|
8
|
-
easylink/implementation_metadata.yaml,sha256=
|
7
|
+
easylink/implementation.py,sha256=XLSat6_IXFn-nH6X8AazmfWhDtTK4GtA7yiht9QLlQQ,11366
|
8
|
+
easylink/implementation_metadata.yaml,sha256=0BQ_NIV29EtJ6G_wGD_-2OYPjYw2sNLAgCg2GbNwcuI,7662
|
9
9
|
easylink/pipeline.py,sha256=5KOYH5HyJjVlFoBRKGLs2hn5mpC3tPYG_ux3T1qSV9k,17504
|
10
10
|
easylink/pipeline_graph.py,sha256=9ysX4wAkA-WkUoo15jSLAErncybE4tJwznVx7N_kwIA,23922
|
11
11
|
easylink/pipeline_schema.py,sha256=Q2sCpsC-F2W0yxVP7ufunowDepOBrRVENXOdap9J5iY,6921
|
12
|
-
easylink/rule.py,sha256=
|
12
|
+
easylink/rule.py,sha256=NusEUtBxx18L7UCcgDi3KKooFxSUgyS4eisVM5aPqFE,16770
|
13
13
|
easylink/runner.py,sha256=cbCo5_NvvulmjjAaBCG6qCmbtJiHK-7NuDvbngdU_PY,6675
|
14
14
|
easylink/step.py,sha256=u1AMPrYGNVb3ZH6uB_U0dUeJvOeQ2MoVHdlC8k63AA8,85226
|
15
15
|
easylink/devtools/implementation_creator.py,sha256=mkiQ9nhtQC3mhxcG8IyvejzSK0WSkwplCztPLXbpXXQ,16199
|
16
16
|
easylink/images/spark_cluster/Dockerfile,sha256=3PHotbR4jdjVYRHOJ0VQW55b5Qd4tQ1pLLQMrTKWVA0,576
|
17
17
|
easylink/images/spark_cluster/README.md,sha256=KdgSttZRplNNWqHn4K1GTsTIab3dTOSG4V99QPLxSp8,569
|
18
|
-
easylink/pipeline_schema_constants/__init__.py,sha256=
|
18
|
+
easylink/pipeline_schema_constants/__init__.py,sha256=FUngnh80yfpr76j18iBVKSoR8-5zxQj_mW_muUMrafw,1324
|
19
19
|
easylink/pipeline_schema_constants/development.py,sha256=XxcYYZDZM4IADp3eFPQCchD6-OtMp99GiyZBfSswzFo,12640
|
20
|
-
easylink/pipeline_schema_constants/testing.py,sha256=
|
20
|
+
easylink/pipeline_schema_constants/testing.py,sha256=UDmVVjI1SiDktMbJ2CrSb7amHSYNwhgqNkXhl4lYxQw,20459
|
21
21
|
easylink/steps/dev/README.md,sha256=u9dZUggpY2Lf2qb-xkDLWWgHjcmi4osbQtzSNo4uklE,4549
|
22
22
|
easylink/steps/dev/build-containers-local.sh,sha256=Wy3pfcyt7I-BNvHcr7ZXDe0g5Ihd00BIPqt9YuRbLeA,259
|
23
23
|
easylink/steps/dev/build-containers-remote.sh,sha256=Hy-kaaXf-ta6n8SzOz_ahByjMY5T7J71MvzXRXDvQw8,271
|
@@ -36,6 +36,10 @@ easylink/steps/dev/python_pyspark/python_pyspark.def,sha256=j_RmVjspmXGOhJTr10ED
|
|
36
36
|
easylink/steps/dev/r/README.md,sha256=dPjZdDTqcJsZCiwhddzlOj1ob0P7YocZUNFrLIGM1-0,1201
|
37
37
|
easylink/steps/dev/r/dummy_step.R,sha256=1TWZY8CEkT6gavrulBxFsKbDSKJJjk0NtJrGH7TIikE,4975
|
38
38
|
easylink/steps/dev/r/r-image.def,sha256=LrhXlt0C3k7d_VJWopRPEVARnFWSuq_oILlwo7g03bE,627
|
39
|
+
easylink/steps/output_dir/dummy_step_1_for_output_dir_example.def,sha256=CkQVG-uDRQ9spAavdkZbhx2GD_fRsKZGELPrr8yltsc,550
|
40
|
+
easylink/steps/output_dir/dummy_step_1_for_output_dir_example.py,sha256=dI0OWugE35ABLcSwsI-T3C4dvuPTKXwjE52dtSsCo8Y,428
|
41
|
+
easylink/steps/output_dir/dummy_step_2_for_output_dir_example.def,sha256=9gShg1EDJEHZcz7Z5VfZ1A4Gpm9XQes8ezn6rAZDgDM,550
|
42
|
+
easylink/steps/output_dir/dummy_step_2_for_output_dir_example.py,sha256=DMJW5TXjhELxhY4U9q2RpLjqxlS1YSosTGL2AfRnaZM,521
|
39
43
|
easylink/utilities/__init__.py,sha256=0U33kbv4hoMfFQ_lh5hLwifxRPzOgkLkjKLYxmaK10g,196
|
40
44
|
easylink/utilities/aggregator_utils.py,sha256=pqBog6kEX4MXBBMjQtHFlE5gEMqRWb5VFl64u0Lr__g,972
|
41
45
|
easylink/utilities/data_utils.py,sha256=CcnM3u0_MQDQo3jMs3E4IK_rz8wAsFdJ674fZxYEFZg,4620
|
@@ -43,9 +47,9 @@ easylink/utilities/general_utils.py,sha256=El1W0nn4P27sRBGotNQb-9du-Gbhk9ggSuu4v
|
|
43
47
|
easylink/utilities/paths.py,sha256=KM1GlnsAcKbUJrC4LZKpeJfPljxe_aXP1ZhVp43TYRA,924
|
44
48
|
easylink/utilities/spark.smk,sha256=kGtpem7LfQc71tMh5WAYaqKnHQKFvcdhPQSdumOP70k,5799
|
45
49
|
easylink/utilities/splitter_utils.py,sha256=UOz4hjkEPqaAz0RrDkDYYej79lLSaq0VVVSH_tF1z0o,3838
|
46
|
-
easylink/utilities/validation_utils.py,sha256=
|
47
|
-
easylink-0.1.
|
48
|
-
easylink-0.1.
|
49
|
-
easylink-0.1.
|
50
|
-
easylink-0.1.
|
51
|
-
easylink-0.1.
|
50
|
+
easylink/utilities/validation_utils.py,sha256=rOIeQbbXXdsuL2hI0i2gApAWfiNJXMwYH4pmw8uLrGM,1867
|
51
|
+
easylink-0.1.17.dist-info/METADATA,sha256=fl6OzaU74KHClV-dADXheUoKBMuIs8pUqLANelfaqBY,3477
|
52
|
+
easylink-0.1.17.dist-info/WHEEL,sha256=A8Eltl-h0W-qZDVezsLjjslosEH_pdYC2lQ0JcbgCzs,91
|
53
|
+
easylink-0.1.17.dist-info/entry_points.txt,sha256=OGMZDFltg3yMboT7XjJt3joiPhRfV_7jnREVtrAIQNU,51
|
54
|
+
easylink-0.1.17.dist-info/top_level.txt,sha256=oHcOpcF_jDMWFiJRzfGQvuskENGDjSPC_Agu9Z_Xvik,9
|
55
|
+
easylink-0.1.17.dist-info/RECORD,,
|
File without changes
|
File without changes
|