PyPI - easylink - Versions diffs - 0.1.12__py3-none-any.whl → 0.1.14__py3-none-any.whl - Mend

easylink 0.1.12py3-none-any.whl → 0.1.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

easylink/_version.py +1 -1
easylink/configuration.py +5 -5
easylink/graph_components.py +48 -51
easylink/implementation.py +70 -10
easylink/pipeline.py +127 -24
easylink/pipeline_graph.py +46 -26
easylink/pipeline_schema_constants/__init__.py +11 -7
easylink/pipeline_schema_constants/development.py +2 -23
easylink/pipeline_schema_constants/testing.py +243 -17
easylink/rule.py +60 -140
easylink/runner.py +14 -9
easylink/step.py +397 -143
easylink/utilities/splitter_utils.py +35 -0
{easylink-0.1.12.dist-info → easylink-0.1.14.dist-info}/METADATA +22 -14
{easylink-0.1.12.dist-info → easylink-0.1.14.dist-info}/RECORD +18 -18
{easylink-0.1.12.dist-info → easylink-0.1.14.dist-info}/WHEEL +1 -1
{easylink-0.1.12.dist-info → easylink-0.1.14.dist-info}/entry_points.txt +0 -0
{easylink-0.1.12.dist-info → easylink-0.1.14.dist-info}/top_level.txt +0 -0

easylink/pipeline_schema_constants/development.py CHANGED Viewed

@@ -74,29 +74,8 @@ NODES = [
                     ),
                 ],
             ),
-            input_slots=[
-                InputSlot(
-                    name="step_3_main_input",
-                    env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
-                    validator=validate_input_file_dummy,
-                    splitter=split_data_by_size,
-                ),
-            ],
-            output_slots=[OutputSlot("step_3_main_output", aggregator=concatenate_datasets)],
-            input_slot_mappings=[
-                InputSlotMapping(
-                    parent_slot="step_3_main_input",
-                    child_node="step_3",
-                    child_slot="step_3_main_input",
-                ),
-            ],
-            output_slot_mappings=[
-                OutputSlotMapping(
-                    parent_slot="step_3_main_output",
-                    child_node="step_3",
-                    child_slot="step_3_main_output",
-                ),
-            ],
+            slot_splitter_mapping={"step_3_main_input": split_data_by_size},
+            slot_aggregator_mapping={"step_3_main_output": concatenate_datasets},
         ),
         self_edges=[
             EdgeParams(

easylink/pipeline_schema_constants/testing.py CHANGED Viewed

@@ -16,6 +16,7 @@ from easylink.graph_components import (
     OutputSlotMapping,
 )
 from easylink.step import (
+    EmbarrassinglyParallelStep,
     HierarchicalStep,
     InputStep,
     LoopStep,
@@ -23,9 +24,11 @@ from easylink.step import (
     ParallelStep,
     Step,
 )
+from easylink.utilities.aggregator_utils import concatenate_datasets
+from easylink.utilities.splitter_utils import split_data_in_two
 from easylink.utilities.validation_utils import validate_input_file_dummy
-SINGLE_STEP_NODES = [
+NODES_ONE_STEP = [
     InputStep(),
     Step(
         step_name="step_1",
@@ -44,7 +47,7 @@ SINGLE_STEP_NODES = [
         ],
     ),
 ]
-SINGLE_STEP_EDGES = [
+EDGES_ONE_STEP = [
     EdgeParams(
         source_node="input_data",
         target_node="step_1",
@@ -58,10 +61,10 @@ SINGLE_STEP_EDGES = [
         input_slot="result",
     ),
 ]
+SCHEMA_PARAMS_ONE_STEP = (NODES_ONE_STEP, EDGES_ONE_STEP)
-SINGLE_STEP_SCHEMA_PARAMS = (SINGLE_STEP_NODES, SINGLE_STEP_EDGES)
-TRIPLE_STEP_NODES = [
+NODES_THREE_STEPS = [
     InputStep(),
     Step(
         step_name="step_1",
@@ -102,7 +105,7 @@ TRIPLE_STEP_NODES = [
         ],
     ),
 ]
-TRIPLE_STEP_EDGES = [
+EDGES_THREE_STEPS = [
     EdgeParams(
         source_node="input_data",
         target_node="step_1",
@@ -128,11 +131,10 @@ TRIPLE_STEP_EDGES = [
         input_slot="result",
     ),
 ]
+SCHEMA_PARAMS_THREE_STEPS = (NODES_THREE_STEPS, EDGES_THREE_STEPS)
-TRIPLE_STEP_SCHEMA_PARAMS = (TRIPLE_STEP_NODES, TRIPLE_STEP_EDGES)
-BAD_COMBINED_TOPOLOGY_NODES = [
+NODES_BAD_COMBINED_TOPOLOGY = [
     InputStep(),
     LoopStep(
         template_step=HierarchicalStep(
@@ -207,11 +209,10 @@ BAD_COMBINED_TOPOLOGY_NODES = [
         ],
     ),
 ]
-BAD_COMBINED_TOPOLOGY_SCHEMA_PARAMS = (BAD_COMBINED_TOPOLOGY_NODES, SINGLE_STEP_EDGES)
+SCHEMA_PARAMS_BAD_COMBINED_TOPOLOGY = (NODES_BAD_COMBINED_TOPOLOGY, EDGES_ONE_STEP)
-NESTED_TEMPLATED_STEPS_NODES = [
+NODES_NESTED_TEMPLATED_STEPS = [
     InputStep(),
     LoopStep(
         template_step=ParallelStep(
@@ -288,12 +289,10 @@ NESTED_TEMPLATED_STEPS_NODES = [
         ],
     ),
 ]
+SCHEMA_PARAMS_NESTED_TEMPLATED_STEPS = (NODES_NESTED_TEMPLATED_STEPS, EDGES_ONE_STEP)
-NESTED_TEMPLATED_STEPS_SCHEMA_PARAMS = (NESTED_TEMPLATED_STEPS_NODES, SINGLE_STEP_EDGES)
-COMBINE_WITH_ITERATION_NODES = [
+NODES_COMBINE_WITH_ITERATION = [
     InputStep(),
     LoopStep(
         template_step=Step(
@@ -333,7 +332,7 @@ COMBINE_WITH_ITERATION_NODES = [
         ],
     ),
 ]
-DOUBLE_STEP_EDGES = [
+EDGES_TWO_STEPS = [
     EdgeParams(
         source_node="input_data",
         target_node="step_1",
@@ -353,6 +352,233 @@ DOUBLE_STEP_EDGES = [
         input_slot="result",
     ),
 ]
+SCHEMA_PARAMS_COMBINE_WITH_ITERATION = (NODES_COMBINE_WITH_ITERATION, EDGES_TWO_STEPS)
+NODES_LOOPING_EP_STEP = [
+    InputStep(),
+    LoopStep(
+        template_step=EmbarrassinglyParallelStep(
+            step=Step(
+                step_name="step_1",
+                input_slots=[
+                    InputSlot(
+                        name="step_1_main_input",
+                        env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
+                        validator=validate_input_file_dummy,
+                    ),
+                ],
+                output_slots=[
+                    OutputSlot(
+                        name="step_1_main_output",
+                    ),
+                ],
+            ),
+            slot_splitter_mapping={"step_1_main_input": split_data_in_two},
+            slot_aggregator_mapping={"step_1_main_output": concatenate_datasets},
+        ),
+        self_edges=[
+            EdgeParams(
+                source_node="step_1",
+                target_node="step_1",
+                output_slot="step_1_main_output",
+                input_slot="step_1_main_input",
+            )
+        ],
+    ),
+    OutputStep(
+        input_slots=[
+            InputSlot(name="result", env_var=None, validator=validate_input_file_dummy)
+        ]
+    ),
+]
+SCHEMA_PARAMS_LOOPING_EP_STEP = (NODES_LOOPING_EP_STEP, EDGES_ONE_STEP)
+NODES_EP_PARALLEL_STEP = [
+    InputStep(),
+    EmbarrassinglyParallelStep(
+        step=ParallelStep(
+            template_step=Step(
+                step_name="step_1",
+                input_slots=[
+                    InputSlot(
+                        name="step_1_main_input",
+                        env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
+                        validator=validate_input_file_dummy,
+                    ),
+                ],
+                output_slots=[
+                    OutputSlot(
+                        name="step_1_main_output",
+                    ),
+                ],
+            ),
+        ),
+        slot_splitter_mapping={"step_1_main_input": split_data_in_two},
+        slot_aggregator_mapping={"step_1_main_output": concatenate_datasets},
+    ),
+    OutputStep(
+        input_slots=[
+            InputSlot(name="result", env_var=None, validator=validate_input_file_dummy)
+        ]
+    ),
+]
+SCHEMA_PARAMS_EP_PARALLEL_STEP = (NODES_EP_PARALLEL_STEP, EDGES_ONE_STEP)
+NODES_EP_LOOP_STEP = [
+    InputStep(),
+    EmbarrassinglyParallelStep(
+        step=LoopStep(
+            template_step=Step(
+                step_name="step_1",
+                input_slots=[
+                    InputSlot(
+                        name="step_1_main_input",
+                        env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
+                        validator=validate_input_file_dummy,
+                    ),
+                ],
+                output_slots=[
+                    OutputSlot(
+                        name="step_1_main_output",
+                    ),
+                ],
+            ),
+            self_edges=[
+                EdgeParams(
+                    source_node="step_1",
+                    target_node="step_1",
+                    output_slot="step_1_main_output",
+                    input_slot="step_1_main_input",
+                ),
+            ],
+        ),
+        slot_splitter_mapping={"step_1_main_input": split_data_in_two},
+        slot_aggregator_mapping={"step_1_main_output": concatenate_datasets},
+    ),
+    OutputStep(
+        input_slots=[
+            InputSlot(name="result", env_var=None, validator=validate_input_file_dummy)
+        ]
+    ),
+]
+SCHEMA_PARAMS_EP_LOOP_STEP = (NODES_EP_LOOP_STEP, EDGES_ONE_STEP)
-COMBINE_WITH_ITERATION_SCHEMA_PARAMS = (COMBINE_WITH_ITERATION_NODES, DOUBLE_STEP_EDGES)
+NODES_EP_HIERARCHICAL_STEP = [
+    InputStep(),
+    EmbarrassinglyParallelStep(
+        step=HierarchicalStep(
+            step_name="step_1",
+            input_slots=[
+                InputSlot(
+                    name="step_1_main_input",
+                    env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
+                    validator=validate_input_file_dummy,
+                ),
+                InputSlot(
+                    name="step_1_secondary_input",
+                    env_var="DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS",
+                    validator=validate_input_file_dummy,
+                ),
+            ],
+            output_slots=[OutputSlot("step_1_main_output")],
+            nodes=[
+                Step(
+                    step_name="step_1a",
+                    input_slots=[
+                        InputSlot(
+                            name="step_1a_main_input",
+                            env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
+                            validator=validate_input_file_dummy,
+                        ),
+                        InputSlot(
+                            name="step_1a_secondary_input",
+                            env_var="DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS",
+                            validator=validate_input_file_dummy,
+                        ),
+                    ],
+                    output_slots=[OutputSlot("step_1a_main_output")],
+                ),
+                Step(
+                    step_name="step_1b",
+                    input_slots=[
+                        InputSlot(
+                            name="step_1b_main_input",
+                            env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
+                            validator=validate_input_file_dummy,
+                        ),
+                        InputSlot(
+                            name="step_1b_secondary_input",
+                            env_var="DUMMY_CONTAINER_SECONDARY_INPUT_FILE_PATHS",
+                            validator=validate_input_file_dummy,
+                        ),
+                    ],
+                    output_slots=[OutputSlot("step_1b_main_output")],
+                ),
+            ],
+            edges=[
+                EdgeParams(
+                    source_node="step_1a",
+                    target_node="step_1b",
+                    output_slot="step_1a_main_output",
+                    input_slot="step_1b_main_input",
+                ),
+            ],
+            input_slot_mappings=[
+                InputSlotMapping(
+                    parent_slot="step_1_main_input",
+                    child_node="step_1a",
+                    child_slot="step_1a_main_input",
+                ),
+                InputSlotMapping(
+                    parent_slot="step_1_secondary_input",
+                    child_node="step_1a",
+                    child_slot="step_1a_secondary_input",
+                ),
+                InputSlotMapping(
+                    parent_slot="step_1_secondary_input",
+                    child_node="step_1b",
+                    child_slot="step_1b_secondary_input",
+                ),
+            ],
+            output_slot_mappings=[
+                OutputSlotMapping(
+                    parent_slot="step_1_main_output",
+                    child_node="step_1b",
+                    child_slot="step_1b_main_output",
+                ),
+            ],
+        ),
+        slot_splitter_mapping={"step_1_main_input": split_data_in_two},
+        slot_aggregator_mapping={"step_1_main_output": concatenate_datasets},
+    ),
+    OutputStep(
+        input_slots=[
+            InputSlot(name="result", env_var=None, validator=validate_input_file_dummy)
+        ]
+    ),
+]
+EDGES_ONE_STEP_TWO_ISLOTS = [
+    EdgeParams(
+        source_node="input_data",
+        target_node="step_1",
+        output_slot="all",
+        input_slot="step_1_main_input",
+    ),
+    EdgeParams(
+        source_node="input_data",
+        target_node="step_1",
+        output_slot="all",
+        input_slot="step_1_secondary_input",
+    ),
+    EdgeParams(
+        source_node="step_1",
+        target_node="results",
+        output_slot="step_1_main_output",
+        input_slot="result",
+    ),
+]
+SCHEMA_PARAMS_EP_HIERARCHICAL_STEP = (NODES_EP_HIERARCHICAL_STEP, EDGES_ONE_STEP_TWO_ISLOTS)

easylink/rule.py CHANGED Viewed

@@ -41,15 +41,6 @@ class Rule(ABC):
         """
         pass
-    @staticmethod
-    def get_input_slots_to_split(input_slots) -> list[str]:
-        input_slots_to_split = [
-            slot_name
-            for slot_name, slot_attrs in input_slots.items()
-            if slot_attrs.get("splitter", None)
-        ]
-        return input_slots_to_split
 @dataclass
 class TargetRule(Rule):
@@ -125,23 +116,15 @@ class ImplementedRule(Rule):
     def build_rule(self) -> str:
         """Builds the Snakemake rule for this ``Implementation``."""
+        if self.is_embarrassingly_parallel and len(self.output) > 1:
+            raise NotImplementedError(
+                "Multiple output slots/files of EmbarrassinglyParallelSteps not yet supported"
+            )
         return self._build_io() + self._build_resources() + self._build_shell_cmd()
     def _build_io(self) -> str:
         """Builds the input/output portion of the rule."""
-        if self.is_embarrassingly_parallel:
-            # Processed chunks are sent to a 'processed' subdir
-            output_files = [
-                os.path.dirname(file_path)
-                + "/processed/{chunk}/"
-                + os.path.basename(file_path)
-                for file_path in self.output
-            ]
-            log_path_chunk_adder = "-{chunk}"
-        else:
-            output_files = self.output
-            log_path_chunk_adder = ""
+        log_path_chunk_adder = "-{chunk}" if self.is_embarrassingly_parallel else ""
         io_str = (
             f"""
 rule:
@@ -149,7 +132,7 @@ rule:
     message: "Running {self.step_name} implementation: {self.implementation_name}" """
             + self._build_input()
             + f"""
-    output: {output_files}
+    output: {self.output}
     log: "{self.diagnostics_dir}/{self.name}-output{log_path_chunk_adder}.log"
     container: "{self.image_path}" """
         )
@@ -158,33 +141,11 @@ rule:
     def _build_input(self) -> str:
         input_str = f"""
     input:"""
-        input_slots_to_split = self.get_input_slots_to_split(self.input_slots)
         for slot, attrs in self.input_slots.items():
             env_var = attrs["env_var"].lower()
-            if len(input_slots_to_split) > 1:
-                raise NotImplementedError(
-                    "FIXME [MIC-5883] Multiple input slots to split not yet supported"
-                )
-            if self.is_embarrassingly_parallel and slot == input_slots_to_split[0]:
-                # The input to this is the input_chunks subdir from the checkpoint
-                # rule (which is built by modifying the output of the overall implementation)
-                if len(self.output) > 1:
-                    raise NotImplementedError(
-                        "FIXME [MIC-5883] Multiple output slots/files of EmbarrassinglyParallelSteps not yet supported"
-                    )
-                input_files = [
-                    os.path.dirname(self.output[0])
-                    + "/input_chunks/{chunk}/"
-                    + os.path.basename(self.output[0])
-                ]
-            else:
-                input_files = attrs["filepaths"]
-            input_str += f"""
-        {env_var}={input_files},"""
-        if not self.is_embarrassingly_parallel:
-            # validations were already handled in the checkpoint rule - no need
-            # to validate the individual chunks
             input_str += f"""
+        {env_var}={attrs["filepaths"]},"""
+        input_str += f"""
         validations={self.validations},"""
         if self.requires_spark:
             input_str += f"""
@@ -210,38 +171,19 @@ rule:
         #   output_paths = ",".join(self.output)
         #   wildcards_subdir = "/".join([f"{{wildcards.{wc}}}" for wc in self.wildcards])
         #   and then in shell cmd: export DUMMY_CONTAINER_OUTPUT_PATHS={output_paths}/{wildcards_subdir}
-        if self.is_embarrassingly_parallel:
-            if len(self.output) > 1:
-                raise NotImplementedError(
-                    "FIXME [MIC-5883] Multiple output slots/files of EmbarrassinglyParallelSteps not yet supported"
-                )
-            output_files = (
-                os.path.dirname(self.output[0])
-                + "/processed/{wildcards.chunk}/"
-                + os.path.basename(self.output[0])
-            )
-        else:
-            output_files = ",".join(self.output)
+        # snakemake shell commands require wildcards to be prefaced with 'wildcards.'
+        output_files = ",".join(self.output).replace("{chunk}", "{wildcards.chunk}")
         shell_cmd = f"""
     shell:
         '''
         export DUMMY_CONTAINER_OUTPUT_PATHS={output_files}
         export DUMMY_CONTAINER_DIAGNOSTICS_DIRECTORY={self.diagnostics_dir}"""
-        for input_slot_name, input_slot_attrs in self.input_slots.items():
-            input_slots_to_split = self.get_input_slots_to_split(self.input_slots)
-            if len(input_slots_to_split) > 1:
-                raise NotImplementedError(
-                    "FIXME [MIC-5883] Multiple input slots to split not yet supported"
-                )
-            if input_slot_name in input_slots_to_split:
-                # The inputs to this come from the input_chunks subdir
-                input_files = (
-                    os.path.dirname(self.output[0])
-                    + "/input_chunks/{wildcards.chunk}/"
-                    + os.path.basename(self.output[0])
-                )
-            else:
-                input_files = ",".join(input_slot_attrs["filepaths"])
+        for input_slot_attrs in self.input_slots.values():
+            # snakemake shell commands require wildcards to be prefaced with 'wildcards.'
+            input_files = ",".join(input_slot_attrs["filepaths"]).replace(
+                "{chunk}", "{wildcards.chunk}"
+            )
             shell_cmd += f"""
         export {input_slot_attrs["env_var"]}={input_files}"""
         if self.requires_spark:
@@ -278,7 +220,7 @@ class InputValidationRule(Rule):
     """List of filepaths to validate."""
     output: str
     """Filepath of validation output. It must be used as an input for next rule."""
-    validator: Callable
+    validator: Callable | None
     """Callable that takes a filepath as input. Raises an error if invalid."""
     def build_rule(self) -> str:
@@ -329,12 +271,14 @@ class CheckpointRule(Rule):
     name: str
     """Name of the rule."""
-    input_slots: dict[str, dict[str, str | list[str]]]
-    """This ``Implementation's`` input slot attributes."""
-    validations: list[str]
-    """Validation files from previous rule."""
-    output: list[str]
+    input_files: list[str]
+    """The input filepaths."""
+    splitter_func_name: str
+    """The splitter function's name."""
+    output_dir: str
     """Output directory path. It must be used as an input for next rule."""
+    checkpoint_filepath: str
+    """Path to the checkpoint file. This is only needed for the bugfix workaround."""
     def build_rule(self) -> str:
         """Builds the Snakemake rule for this checkpoint.
@@ -344,29 +288,20 @@ class CheckpointRule(Rule):
         files into chunks. Note that the output of this rule is a Snakemake ``directory``
         object as opposed to a specific file like typical rules have.
         """
-        # Replace the output filepath with an input_chunks subdir
-        output_dir = os.path.dirname(self.output[0]) + "/input_chunks"
-        input_slots_to_split = self.get_input_slots_to_split(self.input_slots)
-        if len(input_slots_to_split) > 1:
-            raise NotImplementedError(
-                "FIXME [MIC-5883] Multiple input slots to split not yet supported"
-            )
-        input_slot_to_split = input_slots_to_split[0]
         checkpoint = f"""
 checkpoint:
-    name: "split_{self.name}_{input_slot_to_split}"
+    name: "{self.name}"
     input:
-        files={self.input_slots[input_slot_to_split]['filepaths']},
-        validations={self.validations},
+        files={self.input_files},
     output:
-        output_dir=directory("{output_dir}"),
-        checkpoint_file=touch("{output_dir}/checkpoint.txt"),
+        output_dir=directory("{self.output_dir}"),
+        checkpoint_file=touch("{self.checkpoint_filepath}"),
     params:
         input_files=lambda wildcards, input: ",".join(input.files),
     localrule: True
-    message: "Splitting {self.name} {input_slot_to_split} into chunks"
+    message: "Splitting {self.name} into chunks"
     run:
-        splitter_utils.{self.input_slots[input_slot_to_split]["splitter"].__name__}(
+        splitter_utils.{self.splitter_func_name}(
             input_files=list(input.files),
             output_dir=output.output_dir,
             desired_chunk_size_mb=0.1,
@@ -385,12 +320,16 @@ class AggregationRule(Rule):
     name: str
     """Name of the rule."""
-    input_slots: dict[str, dict[str, str | list[str]]]
-    """This ``Implementation's`` input slot attributes."""
-    output_slot_name: str
-    """Name of the :class:`~easylink.graph_components.OutputSlot`."""
-    output_slot: dict[str, str | list[str]]
-    """The output slot attributes to create this rule for."""
+    input_files: list[str]
+    """The input processed chunk files to aggregate."""
+    aggregated_output_file: str
+    """The final aggregated results file."""
+    aggregator_func_name: str
+    """The name of the aggregation function to run."""
+    checkpoint_filepath: str
+    """Path to the checkpoint file. This is only needed for the bugfix workaround."""
+    checkpoint_rule_name: str
+    """Name of the checkpoint rule."""
     def build_rule(self) -> str:
         """Builds the Snakemake rule for this aggregator.
@@ -421,56 +360,37 @@ class AggregationRule(Rule):
     def _define_input_function(self):
         """Builds the `input function <https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#input-functions>`_."""
-        if len(self.output_slot["filepaths"]) > 1:
-            raise NotImplementedError(
-                "FIXME [MIC-5883] Multiple output slots/files of EmbarrassinglyParallelSteps not yet supported"
-            )
-        if len(self.output_slot["filepaths"]) > 1:
-            raise NotImplementedError(
-                "FIXME [MIC-5883] Multiple slots/files of EmbarrassinglyParallelSteps not yet supported"
-            )
-        output_filepath = self.output_slot["filepaths"][0]
-        checkpoint_file_path = (
-            os.path.dirname(output_filepath) + "/input_chunks/checkpoint.txt"
-        )
-        input_slots_to_split = self.get_input_slots_to_split(self.input_slots)
-        if len(input_slots_to_split) > 1:
-            raise NotImplementedError(
-                "FIXME [MIC-5883] Multiple input slots to split not yet supported"
-            )
-        input_slot_to_split = input_slots_to_split[0]
-        checkpoint_name = f"checkpoints.split_{self.name}_{input_slot_to_split}"
-        output_files = (
-            os.path.dirname(output_filepath)
-            + "/processed/{chunk}/"
-            + os.path.basename(output_filepath)
-        )
+        # NOTE: In the f-string below, we serialize the list `self.input_files`
+        # into a string which must later be executed as python code (by snakemake).
+        # Let's expand the list into a string representation of a python list so that
+        # we explicitly rely on `eval(repr(self.input_files)) == self.input_files`.
+        input_files_list_str = repr(self.input_files)
         func = f"""
-def get_aggregation_inputs_{self.name}_{self.output_slot_name}(wildcards):
-    checkpoint_file = "{checkpoint_file_path}"
+def get_aggregation_inputs_{self.name}(wildcards):
+    checkpoint_file = "{self.checkpoint_filepath}"
     if not os.path.exists(checkpoint_file):
-        output, _ = {checkpoint_name}.rule.expand_output(wildcards)
-        raise IncompleteCheckpointException({checkpoint_name}.rule, checkpoint_target(output[0]))
-    checkpoint_output = glob.glob(f"{{{checkpoint_name}.get(**wildcards).output.output_dir}}/*/")
+        output, _ = {self.checkpoint_rule_name}.rule.expand_output(wildcards)
+        raise IncompleteCheckpointException({self.checkpoint_rule_name}.rule, checkpoint_target(output[0]))
+    checkpoint_output = glob.glob(f"{{{self.checkpoint_rule_name}.get(**wildcards).output.output_dir}}/*/")
     chunks = [Path(filepath).parts[-1] for filepath in checkpoint_output]
-    return expand(
-        "{output_files}",
-        chunk=chunks
-    )"""
+    input_files = []
+    for filepath in {input_files_list_str}:
+        input_files.extend(expand(filepath, chunk=chunks))
+    return input_files"""
         return func
     def _define_aggregator_rule(self):
         """Builds the rule that runs the aggregation."""
         rule = f"""
 rule:
-    name: "aggregate_{self.name}_{self.output_slot_name}"
-    input: get_aggregation_inputs_{self.name}_{self.output_slot_name}
-    output: {self.output_slot["filepaths"]}
+    name: "{self.name}"
+    input: get_aggregation_inputs_{self.name}
+    output: {[self.aggregated_output_file]}
     localrule: True
-    message: "Aggregating {self.name} {self.output_slot_name}"
+    message: "Aggregating {self.name}"
     run:
-        aggregator_utils.{self.output_slot["aggregator"].__name__}(
+        aggregator_utils.{self.aggregator_func_name}(
             input_files=list(input),
-            output_filepath="{self.output_slot["filepaths"][0]}",
+            output_filepath="{self.aggregated_output_file}",
         )"""
         return rule

easylink 0.1.12__py3-none-any.whl → 0.1.14__py3-none-any.whl

easylink 0.1.12py3-none-any.whl → 0.1.14py3-none-any.whl