PyPI - runnable - Versions diffs - 0.14.0__py3-none-any.whl → 0.17.0__py3-none-any.whl - Mend

runnable 0.14.0py3-none-any.whl → 0.17.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

runnable/__init__.py +1 -1
runnable/catalog.py +2 -0
runnable/cli.py +264 -307
runnable/context.py +12 -3
runnable/datastore.py +159 -25
runnable/defaults.py +13 -54
runnable/entrypoints.py +197 -185
runnable/exceptions.py +22 -0
runnable/executor.py +114 -88
runnable/graph.py +0 -1
runnable/nodes.py +36 -6
runnable/sdk.py +132 -36
runnable/tasks.py +6 -15
runnable/utils.py +22 -30
{runnable-0.14.0.dist-info → runnable-0.17.0.dist-info}/METADATA +6 -3
runnable-0.17.0.dist-info/RECORD +23 -0
{runnable-0.14.0.dist-info → runnable-0.17.0.dist-info}/entry_points.txt +12 -7
runnable/integration.py +0 -197
runnable-0.14.0.dist-info/RECORD +0 -24
{runnable-0.14.0.dist-info → runnable-0.17.0.dist-info}/WHEEL +0 -0
{runnable-0.14.0.dist-info → runnable-0.17.0.dist-info}/licenses/LICENSE +0 -0

runnable/context.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Dict, Optional
+from typing import Any, Dict, List, Optional
 from pydantic import BaseModel, ConfigDict, Field, SerializeAsAny
 from rich.progress import Progress
@@ -9,6 +9,7 @@ from runnable.executor import BaseExecutor
 from runnable.graph import Graph
 from runnable.pickler import BasePickler
 from runnable.secrets import BaseSecrets
+from runnable.tasks import BaseTaskType
 class Context(BaseModel):
@@ -22,15 +23,23 @@ class Context(BaseModel):
     model_config = ConfigDict(arbitrary_types_allowed=True)
     pipeline_file: Optional[str] = ""
+    job_definition_file: Optional[str] = ""
     parameters_file: Optional[str] = ""
     configuration_file: Optional[str] = ""
+    from_sdk: bool = False
-    tag: str = ""
     run_id: str = ""
+    object_serialisation: bool = True
+    return_objects: Dict[str, Any] = {}
+    tag: str = ""
     variables: Dict[str, str] = {}
     dag: Optional[Graph] = None
     dag_hash: str = ""
-    execution_plan: str = ""
+    job: Optional[BaseTaskType] = None
+    job_catalog_settings: Optional[List[str]] = []
 run_context = None  # type: Context # type: ignore

runnable/datastore.py CHANGED Viewed

@@ -24,7 +24,7 @@ logger = logging.getLogger(defaults.LOGGER_NAME)
 JSONType = Union[
-    str, int, float, bool, List[Any], Dict[str, Any]
+    Union[None, bool, str, float, int, List[Any], Dict[str, Any]]
 ]  # This is actually JSONType, but pydantic doesn't support TypeAlias yet
@@ -98,22 +98,33 @@ class ObjectParameter(BaseModel):
     @computed_field  # type: ignore
     @property
     def description(self) -> str:
-        return f"Pickled object stored in catalog as: {self.value}"
+        if context.run_context.object_serialisation:
+            return f"Pickled object stored in catalog as: {self.value}"
+        return f"Object stored in memory as: {self.value}"
     @property
     def file_name(self) -> str:
         return f"{self.value}{context.run_context.pickler.extension}"
     def get_value(self) -> Any:
-        # Get the pickled object
-        catalog_handler = context.run_context.catalog_handler
+        # If there was no serialisation, return the object from the return objects
+        if not context.run_context.object_serialisation:
+            return context.run_context.return_objects[self.value]
+        # If the object was serialised, get it from the catalog
+        catalog_handler = context.run_context.catalog_handler
         catalog_handler.get(name=self.file_name, run_id=context.run_context.run_id)
         obj = context.run_context.pickler.load(path=self.file_name)
         os.remove(self.file_name)  # Remove after loading
         return obj
     def put_object(self, data: Any) -> None:
+        if not context.run_context.object_serialisation:
+            context.run_context.return_objects[self.value] = data
+            return
+        # If the object was serialised, put it in the catalog
         context.run_context.pickler.dump(data=data, path=self.file_name)
         catalog_handler = context.run_context.catalog_handler
@@ -300,6 +311,73 @@ class BranchLog(BaseModel):
 StepLog.model_rebuild()
+class JobLog(BaseModel):
+    """
+    The data class capturing the data of a job
+    This should be treated as a step log
+    """
+    status: str = defaults.FAIL
+    message: str = ""
+    mock: bool = False
+    code_identities: List[CodeIdentity] = Field(default_factory=list)
+    attempts: List[StepAttempt] = Field(default_factory=list)
+    data_catalog: List[DataCatalog] = Field(default_factory=list)
+    def add_data_catalogs(self, data_catalogs: List[DataCatalog]):
+        """
+        Add the data catalogs as asked by the user
+        Args:
+            dict_catalogs ([DataCatalog]): A list of data catalog items
+        """
+        if not self.data_catalog:
+            self.data_catalog = []
+        for data_catalog in data_catalogs:
+            self.data_catalog.append(data_catalog)
+    def get_summary(self) -> Dict[str, Any]:
+        """
+        Summarize the step log to log
+        """
+        summary: Dict[str, Any] = {}
+        summary["Available parameters"] = [
+            (p, v.description)
+            for attempt in self.attempts
+            for p, v in attempt.input_parameters.items()
+        ]
+        summary["Output catalog content"] = [
+            dc.name for dc in self.data_catalog if dc.stage == "put"
+        ]
+        summary["Output parameters"] = [
+            (p, v.description)
+            for attempt in self.attempts
+            for p, v in attempt.output_parameters.items()
+        ]
+        summary["Metrics"] = [
+            (p, v.description)
+            for attempt in self.attempts
+            for p, v in attempt.user_defined_metrics.items()
+        ]
+        cis = []
+        for ci in self.code_identities:
+            message = f"{ci.code_identifier_type}:{ci.code_identifier}"
+            if not ci.code_identifier_dependable:
+                message += " but is not dependable"
+            cis.append(message)
+        summary["Code identities"] = cis
+        summary["status"] = self.status
+        return summary
 class RunLog(BaseModel):
     """
     The data captured as part of Run Log
@@ -310,6 +388,7 @@ class RunLog(BaseModel):
     tag: Optional[str] = ""
     status: str = defaults.FAIL
     steps: OrderedDict[str, StepLog] = Field(default_factory=OrderedDict)
+    job: Optional[JobLog] = None
     parameters: Dict[str, Parameter] = Field(default_factory=dict)
     run_config: Dict[str, Any] = Field(default_factory=dict)
@@ -469,6 +548,23 @@ class BaseRunLogStore(ABC, BaseModel):
     def _context(self):
         return context.run_context
+        """
+        Retrieves a Job log from the database using the config and the job_id
+        Args:
+            job_id (str): The job_id of the job
+        Returns:
+            JobLog: The JobLog object identified by the job_id
+        Logically the method should:
+            * Returns the job_log defined by id from the data store defined by the config
+        Raises:
+            NotImplementedError: This is a base class and therefore has no default implementation
+            JobLogNotFoundError: If the job log for job_id is not found in the datastore
+        """
     @abstractmethod
     def create_run_log(
         self,
@@ -478,7 +574,6 @@ class BaseRunLogStore(ABC, BaseModel):
         tag: str = "",
         original_run_id: str = "",
         status: str = defaults.CREATED,
-        **kwargs,
     ):
         """
         Creates a Run Log object by using the config
@@ -494,7 +589,7 @@ class BaseRunLogStore(ABC, BaseModel):
         raise NotImplementedError
     @abstractmethod
-    def get_run_log_by_id(self, run_id: str, full: bool = False, **kwargs) -> RunLog:
+    def get_run_log_by_id(self, run_id: str, full: bool = False) -> RunLog:
         """
         Retrieves a Run log from the database using the config and the run_id
@@ -516,7 +611,7 @@ class BaseRunLogStore(ABC, BaseModel):
         raise NotImplementedError
     @abstractmethod
-    def put_run_log(self, run_log: RunLog, **kwargs):
+    def put_run_log(self, run_log: RunLog):
         """
         Puts the Run Log in the database as defined by the config
@@ -544,7 +639,7 @@ class BaseRunLogStore(ABC, BaseModel):
         run_log.status = status
         self.put_run_log(run_log)
-    def get_parameters(self, run_id: str, **kwargs) -> Dict[str, Parameter]:
+    def get_parameters(self, run_id: str) -> Dict[str, Parameter]:
         """
         Get the parameters from the Run log defined by the run_id
@@ -563,7 +658,7 @@ class BaseRunLogStore(ABC, BaseModel):
         run_log = self.get_run_log_by_id(run_id=run_id)
         return run_log.parameters
-    def set_parameters(self, run_id: str, parameters: Dict[str, Parameter], **kwargs):
+    def set_parameters(self, run_id: str, parameters: Dict[str, Parameter]):
         """
         Update the parameters of the Run log with the new parameters
@@ -584,7 +679,7 @@ class BaseRunLogStore(ABC, BaseModel):
         run_log.parameters.update(parameters)
         self.put_run_log(run_log=run_log)
-    def get_run_config(self, run_id: str, **kwargs) -> dict:
+    def get_run_config(self, run_id: str) -> dict:
         """
         Given a run_id, return the run_config used to perform the run.
@@ -598,7 +693,7 @@ class BaseRunLogStore(ABC, BaseModel):
         run_log = self.get_run_log_by_id(run_id=run_id)
         return run_log.run_config
-    def set_run_config(self, run_id: str, run_config: dict, **kwargs):
+    def set_run_config(self, run_id: str, run_config: dict):
         """Set the run config used to run the run_id
         Args:
@@ -610,7 +705,7 @@ class BaseRunLogStore(ABC, BaseModel):
         run_log.run_config.update(run_config)
         self.put_run_log(run_log=run_log)
-    def create_step_log(self, name: str, internal_name: str, **kwargs):
+    def create_step_log(self, name: str, internal_name: str):
         """
         Create a step log by the name and internal name
@@ -628,7 +723,7 @@ class BaseRunLogStore(ABC, BaseModel):
         logger.info(f"{self.service_name} Creating a Step Log: {internal_name}")
         return StepLog(name=name, internal_name=internal_name, status=defaults.CREATED)
-    def get_step_log(self, internal_name: str, run_id: str, **kwargs) -> StepLog:
+    def get_step_log(self, internal_name: str, run_id: str) -> StepLog:
         """
         Get a step log from the datastore for run_id and the internal naming of the step log
@@ -657,7 +752,7 @@ class BaseRunLogStore(ABC, BaseModel):
         step_log, _ = run_log.search_step_by_internal_name(internal_name)
         return step_log
-    def add_step_log(self, step_log: StepLog, run_id: str, **kwargs):
+    def add_step_log(self, step_log: StepLog, run_id: str):
         """
         Add the step log in the run log as identified by the run_id in the datastore
@@ -687,7 +782,7 @@ class BaseRunLogStore(ABC, BaseModel):
         branch.steps[step_log.internal_name] = step_log
         self.put_run_log(run_log=run_log)
-    def create_branch_log(self, internal_branch_name: str, **kwargs) -> BranchLog:
+    def create_branch_log(self, internal_branch_name: str) -> BranchLog:
         """
         Creates a uncommitted branch log object by the internal name given
@@ -704,7 +799,7 @@ class BaseRunLogStore(ABC, BaseModel):
         return BranchLog(internal_name=internal_branch_name, status=defaults.CREATED)
     def get_branch_log(
-        self, internal_branch_name: str, run_id: str, **kwargs
+        self, internal_branch_name: str, run_id: str
     ) -> Union[BranchLog, RunLog]:
         """
         Returns the branch log by the internal branch name for the run id
@@ -724,9 +819,7 @@ class BaseRunLogStore(ABC, BaseModel):
         branch, _ = run_log.search_branch_by_internal_name(internal_branch_name)
         return branch
-    def add_branch_log(
-        self, branch_log: Union[BranchLog, RunLog], run_id: str, **kwargs
-    ):
+    def add_branch_log(self, branch_log: Union[BranchLog, RunLog], run_id: str):
         """
         The method should:
         # Get the run log
@@ -758,8 +851,7 @@ class BaseRunLogStore(ABC, BaseModel):
         step.branches[internal_branch_name] = branch_log  # type: ignore
         self.put_run_log(run_log)
-    #
-    def create_code_identity(self, **kwargs) -> CodeIdentity:
+    def create_code_identity(self) -> CodeIdentity:
         """
         Creates an uncommitted Code identity class
@@ -769,7 +861,7 @@ class BaseRunLogStore(ABC, BaseModel):
         logger.info(f"{self.service_name} Creating Code identity")
         return CodeIdentity()
-    def create_data_catalog(self, name: str, **kwargs) -> DataCatalog:
+    def create_data_catalog(self, name: str) -> DataCatalog:
         """
         Create a uncommitted data catalog object
@@ -782,6 +874,45 @@ class BaseRunLogStore(ABC, BaseModel):
         logger.info(f"{self.service_name} Creating Data Catalog for {name}")
         return DataCatalog(name=name)
+    def create_job_log(self) -> JobLog:
+        """
+        Creates a Job log and adds it to the db
+        Refer to BaseRunLogStore.create_job_log
+        """
+        logger.info(f"{self.service_name} Creating a Job Log and adding it to DB")
+        return JobLog(status=defaults.CREATED)
+    def get_job_log(self, run_id: str) -> JobLog:
+        """
+        Returns the run_log defined by id
+        Raises Exception if not found
+        """
+        logger.info(f"{self.service_name} Getting the run log from DB for {run_id}")
+        run_log = self.get_run_log_by_id(run_id)
+        try:
+            assert run_log.job
+        except AssertionError as exc:
+            raise exceptions.JobLogNotFoundError(run_id) from exc
+        return run_log.job
+    def add_job_log(self, run_id: str, job_log: JobLog):
+        """
+        Adds the job log to the run log
+        Args:
+            run_id (str): The run_id of the run
+            job_log (JobLog): The job log to add to the run log
+        """
+        logger.info(f"{self.service_name} Adding the job log to DB for: {run_id}")
+        run_log = self.get_run_log_by_id(run_id=run_id)
+        run_log.job = job_log
+        run_log.status = job_log.status
+        self.put_run_log(run_log=run_log)
 class BufferRunLogstore(BaseRunLogStore):
     """
@@ -804,9 +935,13 @@ class BufferRunLogstore(BaseRunLogStore):
     """
     service_name: str = "buffered"
     run_log: Optional[RunLog] = Field(
         default=None, exclude=True
     )  # For a buffered Run Log, this is the database
+    job_log: Optional[JobLog] = Field(
+        default=None, exclude=True
+    )  # For a buffered Run Log, this is the database
     def get_summary(self) -> Dict[str, Any]:
         summary = {"Type": self.service_name, "Location": "Not persisted"}
@@ -821,7 +956,6 @@ class BufferRunLogstore(BaseRunLogStore):
         tag: str = "",
         original_run_id: str = "",
         status: str = defaults.CREATED,
-        **kwargs,
     ) -> RunLog:
         """
         # Creates a Run log
@@ -840,7 +974,7 @@ class BufferRunLogstore(BaseRunLogStore):
         )
         return self.run_log
-    def get_run_log_by_id(self, run_id: str, full: bool = False, **kwargs):
+    def get_run_log_by_id(self, run_id: str, full: bool = False):
         """
         # Returns the run_log defined by id
         # Raises Exception if not found
@@ -852,7 +986,7 @@ class BufferRunLogstore(BaseRunLogStore):
         raise exceptions.RunLogNotFoundError(run_id)
-    def put_run_log(self, run_log: RunLog, **kwargs):
+    def put_run_log(self, run_log: RunLog):
         """
         # Puts the run log in the db
         # Raises Exception if not found

runnable/defaults.py CHANGED Viewed

@@ -1,6 +1,11 @@
-from enum import Enum
-from typing import TypedDict  # type: ignore[unused-ignore]
-from typing import Any, Dict, Mapping, Optional, Union
+from typing import (
+    Any,
+    Dict,
+    Mapping,
+    Optional,
+    TypedDict,  # type: ignore[unused-ignore]
+    Union,
+)
 from rich.style import Style
 from typing_extensions import TypeAlias
@@ -12,16 +17,6 @@ LOGGER_NAME = "runnable"
 LOG_LEVEL = "WARNING"
-class EXECUTION_PLAN(Enum):
-    """
-    The possible execution plans for a runnable job.
-    """
-    CHAINED = "chained"  #  121 relationship between run log and the dag.
-    UNCHAINED = "unchained"  # Only captures execution of steps, no relation.
-    INTERACTIVE = "interactive"  # used for interactive sessions
 # Type definitions
 class ServiceConfig(TypedDict):
     type: str
@@ -32,7 +27,7 @@ class RunnableConfig(TypedDict, total=False):
     run_log_store: Optional[ServiceConfig]
     secrets: Optional[ServiceConfig]
     catalog: Optional[ServiceConfig]
-    executor: Optional[ServiceConfig]
+    pipeline_executor: Optional[ServiceConfig]
     pickler: Optional[ServiceConfig]
@@ -45,7 +40,6 @@ RUNNABLE_RUN_TAG = "RUNNABLE_RUN_TAG"
 # Interaction settings
 TRACK_PREFIX = "RUNNABLE_TRACK_"
-STEP_INDICATOR = "_STEP_"
 PARAMETER_PREFIX = "RUNNABLE_PRM_"
 MAP_VARIABLE = "RUNNABLE_MAP_VARIABLE"
 VARIABLE_PREFIX = "RUNNABLE_VAR_"
@@ -66,18 +60,14 @@ TRIGGERED = "TRIGGERED"
 # Node and Command settings
 COMMAND_TYPE = "python"
-NODE_SPEC_FILE = "node_spec.yaml"
 COMMAND_FRIENDLY_CHARACTER = "%"
-DEFAULT_CONTAINER_CONTEXT_PATH = "/opt/runnable/"
-DEFAULT_CONTAINER_DATA_PATH = "data/"
-DEFAULT_CONTAINER_OUTPUT_PARAMETERS = "parameters.json"
 # Default services
-DEFAULT_EXECUTOR = ServiceConfig(type="local", config={})
+DEFAULT_PIPELINE_EXECUTOR = ServiceConfig(type="local", config={})
+DEFAULT_JOB_EXECUTOR = ServiceConfig(type="local", config={})
 DEFAULT_RUN_LOG_STORE = ServiceConfig(type="file-system", config={})
 DEFAULT_CATALOG = ServiceConfig(type="file-system", config={})
 DEFAULT_SECRETS = ServiceConfig(type="env-secrets", config={})
-DEFAULT_EXPERIMENT_TRACKER = ServiceConfig(type="do-nothing", config={})
 DEFAULT_PICKLER = ServiceConfig(type="pickle", config={})
 # Map state
@@ -109,41 +99,10 @@ COMPUTE_DATA_FOLDER = "."
 # Secrets settings
 DOTENV_FILE_LOCATION = ".env"
-# Docker settings
-DOCKERFILE_NAME = "Dockerfile"
-DOCKERFILE_CONTENT = r"""# Python 3.8 Image without Dependecies
-FROM python:3.8
-LABEL maintainer="mesanthu@gmail.com"
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    git \
-    && rm -rf /var/lib/apt/lists/*
-${INSTALL_STYLE}
-ENV VIRTUAL_ENV=/opt/venv
-RUN python -m virtualenv --python=/usr/local/bin/python $VIRTUAL_ENV
-ENV PATH="$VIRTUAL_ENV/bin:$PATH"
-${COPY_CONTENT}
-WORKDIR /app
-${INSTALL_REQUIREMENTS}
-"""
-GIT_ARCHIVE_NAME = "git_tracked"
 LEN_SHA_FOR_TAG = 8
-class ENTRYPOINT(Enum):
-    """
-    The possible container entrypoint types.
-    """
-    USER = "user"
-    SYSTEM = "system"
+# JOB CONFIG
+DEFAULT_JOB_NAME = "job"
 ## Logging settings

runnable 0.14.0__py3-none-any.whl → 0.17.0__py3-none-any.whl

runnable 0.14.0py3-none-any.whl → 0.17.0py3-none-any.whl