PyPI - looper - Versions diffs - 1.7.0__py3-none-any.whl → 2.0.0__py3-none-any.whl - Mend

looper 1.7.0py3-none-any.whl → 2.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

looper/__main__.py +1 -1
looper/_version.py +2 -1
looper/cli_divvy.py +10 -6
looper/cli_pydantic.py +413 -0
looper/command_models/DEVELOPER.md +85 -0
looper/command_models/README.md +4 -0
looper/command_models/__init__.py +6 -0
looper/command_models/arguments.py +293 -0
looper/command_models/commands.py +335 -0
looper/conductor.py +147 -28
looper/const.py +9 -0
looper/divvy.py +56 -47
looper/exceptions.py +9 -1
looper/looper.py +196 -169
looper/pipeline_interface.py +2 -12
looper/project.py +154 -176
looper/schemas/pipeline_interface_schema_generic.yaml +14 -6
looper/utils.py +450 -78
{looper-1.7.0.dist-info → looper-2.0.0.dist-info}/METADATA +24 -14
{looper-1.7.0.dist-info → looper-2.0.0.dist-info}/RECORD +24 -19
{looper-1.7.0.dist-info → looper-2.0.0.dist-info}/WHEEL +1 -1
{looper-1.7.0.dist-info → looper-2.0.0.dist-info}/entry_points.txt +1 -1
looper/cli_looper.py +0 -796
{looper-1.7.0.dist-info → looper-2.0.0.dist-info}/LICENSE.txt +0 -0
{looper-1.7.0.dist-info → looper-2.0.0.dist-info}/top_level.txt +0 -0

looper/project.py CHANGED Viewed

@@ -3,29 +3,28 @@
 import itertools
 import os
+from yaml import safe_load
 try:
     from functools import cached_property
 except ImportError:
     # cached_property was introduced in python 3.8
     cached_property = property
-from logging import getLogger
 from .divvy import ComputingConfiguration
 from eido import PathAttrNotFoundError, read_schema
 from jsonschema import ValidationError
 from pandas.core.common import flatten
-from peppy import CONFIG_KEY, OUTDIR_KEY
-from peppy import Project as peppyProject
 from peppy.utils import make_abs_via_cfg
-from pipestat import PipestatError, PipestatManager
-from ubiquerg import expandpath, is_command_callable
-from yacman import YAMLConfigManager
+from pipestat import PipestatManager
 from .conductor import write_pipestat_config
 from .exceptions import *
 from .pipeline_interface import PipelineInterface
 from .processed_project import populate_project_paths, populate_sample_paths
 from .utils import *
+from .const import PipelineLevel
 __all__ = ["Project"]
@@ -126,6 +125,12 @@ class Project(peppyProject):
         self[EXTRA_KEY] = {}
+        try:
+            # For loading PEPs via CSV, Peppy cannot infer project name.
+            name = self.name
+        except NotImplementedError:
+            self.name = None
         # add sample pipeline interface to the project
         if kwargs.get(SAMPLE_PL_ARG):
             self.set_sample_piface(kwargs.get(SAMPLE_PL_ARG))
@@ -144,7 +149,7 @@ class Project(peppyProject):
         self.dcc = (
             None
             if divcfg_path is None
-            else ComputingConfiguration(filepath=divcfg_path)
+            else ComputingConfiguration.from_yaml_file(filepath=divcfg_path)
         )
         if DRY_RUN_KEY in self and not self[DRY_RUN_KEY]:
             _LOGGER.debug("Ensuring project directories exist")
@@ -300,7 +305,7 @@ class Project(peppyProject):
         :return list[looper.PipelineInterface]: list of pipeline interfaces
         """
         return [
-            PipelineInterface(pi, pipeline_type="project")
+            PipelineInterface(pi, pipeline_type=PipelineLevel.PROJECT.value)
             for pi in self.project_pipeline_interface_sources
         ]
@@ -343,7 +348,9 @@ class Project(peppyProject):
         :return bool: whether pipestat configuration is complete
         """
-        return self._check_if_pipestat_configured(project_level=True)
+        return self._check_if_pipestat_configured(
+            pipeline_type=PipelineLevel.PROJECT.value
+        )
     def get_sample_piface(self, sample_name):
         """
@@ -363,65 +370,6 @@ class Project(peppyProject):
         except KeyError:
             return None
-    def build_submission_bundles(self, protocol, priority=True):
-        """
-        Create pipelines to submit for each sample of a particular protocol.
-        With the argument (flag) to the priority parameter, there's control
-        over whether to submit pipeline(s) from only one of the project's
-        known pipeline locations with a match for the protocol, or whether to
-        submit pipelines created from all locations with a match for the
-        protocol.
-        :param str protocol: name of the protocol/library for which to
-            create pipeline(s)
-        :param bool priority: to only submit pipeline(s) from the first of the
-            pipelines location(s) (indicated in the project config file) that
-            has a match for the given protocol; optional, default True
-        :return Iterable[(PipelineInterface, type, str, str)]:
-        :raises AssertionError: if there's a failure in the attempt to
-            partition an interface's pipeline scripts into disjoint subsets of
-            those already mapped and those not yet mapped
-        """
-        if not priority:
-            raise NotImplementedError(
-                "Currently, only prioritized protocol mapping is supported "
-                "(i.e., pipeline interfaces collection is a prioritized list, "
-                "so only the first interface with a protocol match is used.)"
-            )
-        # Pull out the collection of interfaces (potentially one from each of
-        # the locations indicated in the project configuration file) as a
-        # sort of pool of information about possible ways in which to submit
-        # pipeline(s) for sample(s) of the indicated protocol.
-        pifaces = self.interfaces.get_pipeline_interface(protocol)
-        if not pifaces:
-            raise PipelineInterfaceConfigError(
-                "No interfaces for protocol: {}".format(protocol)
-            )
-        # coonvert to a list, in the future we might allow to match multiple
-        pifaces = pifaces if isinstance(pifaces, str) else [pifaces]
-        job_submission_bundles = []
-        new_jobs = []
-        _LOGGER.debug("Building pipelines matched by protocol: {}".format(protocol))
-        for pipe_iface in pifaces:
-            # Determine how to reference the pipeline and where it is.
-            path = pipe_iface["path"]
-            if not (os.path.exists(path) or is_command_callable(path)):
-                _LOGGER.warning("Missing pipeline script: {}".format(path))
-                continue
-            # Add this bundle to the collection of ones relevant for the
-            # current PipelineInterface.
-            new_jobs.append(pipe_iface)
-            job_submission_bundles.append(new_jobs)
-        return list(itertools.chain(*job_submission_bundles))
     @staticmethod
     def get_schemas(pifaces, schema_key=INPUT_SCHEMA_KEY):
         """
@@ -441,73 +389,95 @@ class Project(peppyProject):
                 schema_set.update([schema_file])
         return list(schema_set)
-    def get_pipestat_managers(self, sample_name=None, project_level=False):
-        """
-        Get a collection of pipestat managers for the selected sample or project.
+    def _check_if_pipestat_configured(self, pipeline_type=PipelineLevel.SAMPLE.value):
-        The number of pipestat managers corresponds to the number of unique
-        output schemas in the pipeline interfaces specified by the sample or project.
+        # First check if pipestat key is in looper_config, if not return false
-        :param str sample_name: sample name to get pipestat managers for
-        :param bool project_level: whether the project PipestatManagers
-            should be returned
-        :return dict[str, pipestat.PipestatManager]: a mapping of pipestat
-            managers by pipeline interface name
-        """
-        pipestat_configs = self._get_pipestat_configuration(
-            sample_name=sample_name, project_level=project_level
-        )
-        return {
-            pipeline_name: PipestatManager(**pipestat_vars)
-            for pipeline_name, pipestat_vars in pipestat_configs.items()
-        }
+        if PIPESTAT_KEY not in self[EXTRA_KEY]:
+            return False
+        elif PIPESTAT_KEY in self[EXTRA_KEY]:
+            if self[EXTRA_KEY][PIPESTAT_KEY] is None:
+                return False
+            else:
+                # If pipestat key is available assume user desires pipestat usage
+                # This should return True OR raise an exception at this point.
+                return self._get_pipestat_configuration(pipeline_type)
-    def _check_if_pipestat_configured(self, project_level=False):
-        """
-        A helper method determining whether pipestat configuration is complete
+    def _get_pipestat_configuration(self, pipeline_type=PipelineLevel.SAMPLE.value):
-        :param bool project_level: whether the project pipestat config should be checked
-        :return bool: whether pipestat configuration is complete
-        """
-        try:
-            if project_level:
-                pipestat_configured = self._get_pipestat_configuration(
-                    sample_name=None, project_level=project_level
+        # First check if it already exists
+        if pipeline_type == PipelineLevel.SAMPLE.value:
+            for piface in self.pipeline_interfaces:
+                pipestat_config_path = self._check_for_existing_pipestat_config(piface)
+                if not pipestat_config_path:
+                    self._create_pipestat_config(piface, pipeline_type)
+                else:
+                    piface.psm = PipestatManager(
+                        config_file=pipestat_config_path,
+                        multi_pipelines=True,
+                        pipeline_type="sample",
+                    )
+        elif pipeline_type == PipelineLevel.PROJECT.value:
+            for prj_piface in self.project_pipeline_interfaces:
+                pipestat_config_path = self._check_for_existing_pipestat_config(
+                    prj_piface
                 )
-            else:
-                for s in self.samples:
-                    pipestat_configured = self._get_pipestat_configuration(
-                        sample_name=s.sample_name
+                if not pipestat_config_path:
+                    self._create_pipestat_config(prj_piface, pipeline_type)
+                else:
+                    prj_piface.psm = PipestatManager(
+                        config_file=pipestat_config_path,
+                        multi_pipelines=True,
+                        pipeline_type="project",
                     )
-        except Exception as e:
-            context = (
-                f"Project '{self.name}'"
-                if project_level
-                else f"Sample '{s.sample_name}'"
-            )
-            _LOGGER.debug(
-                f"Pipestat configuration incomplete for {context}; "
-                f"caught exception: {getattr(e, 'message', repr(e))}"
-            )
-            return False
         else:
-            if pipestat_configured is not None and pipestat_configured != {}:
-                return True
-            else:
-                return False
+            _LOGGER.error(
+                msg="No pipeline type specified during pipestat configuration"
+            )
+        return True
-    def _get_pipestat_configuration(self, sample_name=None, project_level=False):
+    def _check_for_existing_pipestat_config(self, piface):
         """
-        Get all required pipestat configuration variables from looper_config file
+        config files should be in looper output directory and named as:
+        pipestat_config_pipelinename.yaml
         """
-        ret = {}
-        if not project_level and sample_name is None:
-            raise ValueError(
-                "Must provide the sample_name to determine the "
-                "sample to get the PipestatManagers for"
+        # Cannot do much if we cannot retrieve the pipeline_name
+        try:
+            pipeline_name = piface.data["pipeline_name"]
+        except KeyError:
+            raise Exception(
+                "To use pipestat, a pipeline_name must be set in the pipeline interface."
             )
+        config_file_name = f"pipestat_config_{pipeline_name}.yaml"
+        output_dir = expandpath(self.output_dir)
+        config_file_path = os.path.join(
+            # os.path.dirname(output_dir), config_file_name
+            output_dir,
+            config_file_name,
+        )
+        if os.path.exists(config_file_path):
+            return config_file_path
+        else:
+            return None
+    def _create_pipestat_config(self, piface, pipeline_type):
+        """
+        Each piface needs its own config file and associated psm
+        """
         if PIPESTAT_KEY in self[EXTRA_KEY]:
             pipestat_config_dict = self[EXTRA_KEY][PIPESTAT_KEY]
         else:
@@ -521,13 +491,56 @@ class Project(peppyProject):
         # Expand paths in the event ENV variables were used in config files
         output_dir = expandpath(self.output_dir)
-        # Get looper user configured items first and update the pipestat_config_dict
+        pipestat_config_dict.update({"output_dir": output_dir})
+        if "output_schema" in piface.data:
+            schema_path = expandpath(piface.data["output_schema"])
+            if not os.path.isabs(schema_path):
+                # Get path relative to the pipeline_interface
+                schema_path = os.path.join(
+                    os.path.dirname(piface.pipe_iface_file), schema_path
+                )
+            pipestat_config_dict.update({"schema_path": schema_path})
+            try:
+                with open(schema_path, "r") as f:
+                    output_schema_data = safe_load(f)
+                    output_schema_pipeline_name = output_schema_data[
+                        PIPELINE_INTERFACE_PIPELINE_NAME_KEY
+                    ]
+            except Exception:
+                output_schema_pipeline_name = None
+        else:
+            output_schema_pipeline_name = None
+        if "pipeline_name" in piface.data:
+            pipeline_name = piface.data["pipeline_name"]
+            pipestat_config_dict.update({"pipeline_name": piface.data["pipeline_name"]})
+        else:
+            pipeline_name = None
+        # Warn user if there is a mismatch in pipeline_names from sources!!!
+        if pipeline_name != output_schema_pipeline_name:
+            _LOGGER.warning(
+                msg=f"Pipeline name mismatch detected. Pipeline interface: {pipeline_name}  Output schema: {output_schema_pipeline_name}  Defaulting to pipeline_interface value."
+            )
         try:
             results_file_path = expandpath(pipestat_config_dict["results_file_path"])
-            if not os.path.exists(os.path.dirname(results_file_path)):
-                results_file_path = os.path.join(
-                    os.path.dirname(output_dir), results_file_path
-                )
+            if not os.path.isabs(results_file_path):
+                # e.g. user configures "results.yaml" as results_file_path
+                if "{record_identifier}" in results_file_path:
+                    # this is specifically to check if the user wishes tro generate a file for EACH record
+                    if not os.path.exists(os.path.dirname(results_file_path)):
+                        results_file_path = os.path.join(output_dir, results_file_path)
+                else:
+                    if not os.path.exists(os.path.dirname(results_file_path)):
+                        results_file_path = os.path.join(
+                            output_dir, f"{pipeline_name}/", results_file_path
+                        )
+            else:
+                # Do nothing because the user has given an absolute file path
+                pass
             pipestat_config_dict.update({"results_file_path": results_file_path})
         except KeyError:
             results_file_path = None
@@ -540,57 +553,20 @@ class Project(peppyProject):
         except KeyError:
             flag_file_dir = None
-        if sample_name:
-            pipestat_config_dict.update({"record_identifier": sample_name})
-        if project_level and "project_name" in pipestat_config_dict:
-            pipestat_config_dict.update(
-                {"project_name": pipestat_config_dict["project_name"]}
-            )
-        if project_level and "{record_identifier}" in results_file_path:
-            # if project level and using {record_identifier}, pipestat needs some sort of record_identifier during creation
-            pipestat_config_dict.update(
-                {"record_identifier": "default_project_record_identifier"}
-            )
-        pipestat_config_dict.update({"output_dir": output_dir})
-        pifaces = (
-            self.project_pipeline_interfaces
-            if project_level
-            else self._interfaces_by_sample[sample_name]
+        # Pipestat_dict_ is now updated from all sources and can be written to a yaml.
+        pipestat_config_path = os.path.join(
+            output_dir,
+            f"pipestat_config_{pipeline_name}.yaml",
         )
-        for piface in pifaces:
-            # We must also obtain additional pipestat items from the pipeline author's piface
-            if "output_schema" in piface.data:
-                schema_path = expandpath(piface.data["output_schema"])
-                if not os.path.isabs(schema_path):
-                    # Get path relative to the pipeline_interface
-                    schema_path = os.path.join(
-                        os.path.dirname(piface.pipe_iface_file), schema_path
-                    )
-                pipestat_config_dict.update({"schema_path": schema_path})
-            if "pipeline_name" in piface.data:
-                pipestat_config_dict.update(
-                    {"pipeline_name": piface.data["pipeline_name"]}
-                )
-            if "pipeline_type" in piface.data:
-                pipestat_config_dict.update(
-                    {"pipeline_type": piface.data["pipeline_type"]}
-                )
+        # Two end goals, create a config file
+        write_pipestat_config(pipestat_config_path, pipestat_config_dict)
-            # Pipestat_dict_ is now updated from all sources and can be written to a yaml.
-            looper_pipestat_config_path = os.path.join(
-                os.path.dirname(output_dir), "looper_pipestat_config.yaml"
-            )
-            write_pipestat_config(looper_pipestat_config_path, pipestat_config_dict)
+        piface.psm = PipestatManager(
+            config_file=pipestat_config_path, multi_pipelines=True
+        )
-            ret[piface.pipeline_name] = {
-                "config_file": looper_pipestat_config_path,
-            }
-        return ret
+        return None
     def populate_pipeline_outputs(self):
         """
@@ -657,7 +633,7 @@ class Project(peppyProject):
         pifaces_by_sample = {}
         for source, sample_names in self._samples_by_interface.items():
             try:
-                pi = PipelineInterface(source, pipeline_type="sample")
+                pi = PipelineInterface(source, pipeline_type=PipelineLevel.SAMPLE.value)
             except PipelineInterfaceConfigError as e:
                 _LOGGER.debug(f"Skipping pipeline interface creation: {e}")
             else:
@@ -708,7 +684,9 @@ class Project(peppyProject):
                 for source in piface_srcs:
                     source = self._resolve_path_with_cfg(source)
                     try:
-                        PipelineInterface(source, pipeline_type="sample")
+                        PipelineInterface(
+                            source, pipeline_type=PipelineLevel.SAMPLE.value
+                        )
                     except (
                         ValidationError,
                         IOError,

looper/schemas/pipeline_interface_schema_generic.yaml CHANGED Viewed

@@ -9,12 +9,20 @@ properties:
     type: string
     enum: ["project", "sample"]
     description: "type of the pipeline, either 'project' or 'sample'"
-  command_template:
-    type: string
-    description: "Jinja2-like template to construct the command to run"
-  path:
-    type: string
-    description: "path to the pipeline program. Relative to pipeline interface file or absolute."
+  sample_interface:
+    type: object
+    description: "Section that defines compute environment settings"
+    properties:
+      command_template:
+        type: string
+        description: "Jinja2-like template to construct the command to run"
+  project_interface:
+    type: object
+    description: "Section that defines compute environment settings"
+    properties:
+      command_template:
+        type: string
+        description: "Jinja2-like template to construct the command to run"
   compute:
     type: object
     description: "Section that defines compute environment settings"

looper 1.7.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

looper 1.7.0py3-none-any.whl → 2.0.0py3-none-any.whl