PyPI - runnable - Versions diffs - 0.9.1__py3-none-any.whl → 0.11.0__py3-none-any.whl - Mend

runnable 0.9.1py3-none-any.whl → 0.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

runnable/__init__.py +13 -9
runnable/catalog.py +8 -1
runnable/cli.py +1 -0
runnable/context.py +5 -3
runnable/datastore.py +96 -12
runnable/defaults.py +9 -9
runnable/entrypoints.py +38 -24
runnable/exceptions.py +4 -0
runnable/extensions/catalog/file_system/implementation.py +8 -1
runnable/extensions/executor/__init__.py +85 -29
runnable/extensions/executor/argo/implementation.py +8 -4
runnable/extensions/executor/local/implementation.py +1 -0
runnable/extensions/nodes.py +90 -13
runnable/extensions/run_log_store/chunked_file_system/implementation.py +6 -1
runnable/extensions/run_log_store/file_system/implementation.py +6 -0
runnable/graph.py +11 -0
runnable/integration.py +4 -17
runnable/nodes.py +9 -0
runnable/parameters.py +3 -1
runnable/sdk.py +123 -18
runnable/tasks.py +45 -15
runnable/utils.py +2 -1
{runnable-0.9.1.dist-info → runnable-0.11.0.dist-info}/METADATA +1 -1
{runnable-0.9.1.dist-info → runnable-0.11.0.dist-info}/RECORD +27 -31
{runnable-0.9.1.dist-info → runnable-0.11.0.dist-info}/entry_points.txt +0 -4
runnable/experiment_tracker.py +0 -139
runnable/extensions/experiment_tracker/__init__.py +0 -0
runnable/extensions/experiment_tracker/mlflow/__init__.py +0 -0
runnable/extensions/experiment_tracker/mlflow/implementation.py +0 -94
{runnable-0.9.1.dist-info → runnable-0.11.0.dist-info}/LICENSE +0 -0
{runnable-0.9.1.dist-info → runnable-0.11.0.dist-info}/WHEEL +0 -0

runnable/__init__.py CHANGED Viewed

@@ -4,26 +4,30 @@
 import logging
 from logging.config import dictConfig
+from rich.console import Console
 from runnable import defaults
 dictConfig(defaults.LOGGING_CONFIG)
 logger = logging.getLogger(defaults.LOGGER_NAME)
+console = Console()
+console.print(":runner: Lets go!!")
-from runnable.sdk import (
-    Stub,
-    Pipeline,
-    Parallel,
-    Map,
+from runnable.sdk import (  # noqa
     Catalog,
-    Success,
     Fail,
-    PythonTask,
+    Map,
     NotebookTask,
+    Parallel,
+    Pipeline,
+    PythonTask,
     ShellTask,
+    Stub,
+    Success,
+    metric,
     pickled,
-)  # noqa
+)
 # TODO: Think of model registry as a central place to store models.
 # TODO: Implement Sagemaker pipelines as a executor.

runnable/catalog.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import logging
 from abc import ABC, abstractmethod
-from typing import List, Optional
+from typing import Any, Dict, List, Optional
 from pydantic import BaseModel, ConfigDict
@@ -25,6 +25,10 @@ class BaseCatalog(ABC, BaseModel):
     service_type: str = "catalog"
     model_config = ConfigDict(extra="forbid")
+    @abstractmethod
+    def get_summary(self) -> Dict[str, Any]:
+        ...
     @property
     def _context(self):
         return context.run_context
@@ -112,6 +116,9 @@ class DoNothingCatalog(BaseCatalog):
     service_name: str = "do-nothing"
+    def get_summary(self) -> Dict[str, Any]:
+        return {}
     def get(self, name: str, run_id: str, compute_data_folder: str = "", **kwargs) -> List[DataCatalog]:
         """
         Does nothing

runnable/cli.py CHANGED Viewed

@@ -60,6 +60,7 @@ def execute(file, config_file, parameters_file, log_level, tag, run_id):  # prag
                                     provided
     """
     logger.setLevel(log_level)
     entrypoints.execute(
         configuration_file=config_file,
         pipeline_file=file,

runnable/context.py CHANGED Viewed

@@ -1,11 +1,11 @@
 from typing import Dict, Optional
-from pydantic import BaseModel, SerializeAsAny
+from pydantic import BaseModel, ConfigDict, Field, SerializeAsAny
+from rich.progress import Progress
 from runnable.catalog import BaseCatalog
 from runnable.datastore import BaseRunLogStore
 from runnable.executor import BaseExecutor
-from runnable.experiment_tracker import BaseExperimentTracker
 from runnable.graph import Graph
 from runnable.pickler import BasePickler
 from runnable.secrets import BaseSecrets
@@ -16,8 +16,10 @@ class Context(BaseModel):
     run_log_store: SerializeAsAny[BaseRunLogStore]
     secrets_handler: SerializeAsAny[BaseSecrets]
     catalog_handler: SerializeAsAny[BaseCatalog]
-    experiment_tracker: SerializeAsAny[BaseExperimentTracker]
     pickler: SerializeAsAny[BasePickler]
+    progress: SerializeAsAny[Optional[Progress]] = Field(default=None, exclude=True)
+    model_config = ConfigDict(arbitrary_types_allowed=True)
     pipeline_file: Optional[str] = ""
     parameters_file: Optional[str] = ""

runnable/datastore.py CHANGED Viewed

@@ -4,23 +4,29 @@ import logging
 import os
 from abc import ABC, abstractmethod
 from datetime import datetime
-from typing import Annotated, Any, Dict, List, Literal, Optional, OrderedDict, Tuple, Union
+from typing import (
+    Annotated,
+    Any,
+    Dict,
+    List,
+    Literal,
+    Optional,
+    OrderedDict,
+    Tuple,
+    Union,
+)
 from pydantic import BaseModel, Field, computed_field
-from typing_extensions import TypeAliasType
 import runnable.context as context
 from runnable import defaults, exceptions
 logger = logging.getLogger(defaults.LOGGER_NAME)
-# Once defined these classes are sealed to any additions unless a default is provided
-# Breaking this rule might make runnable backwardly incompatible
-JSONType = TypeAliasType(
-    "JSONType",
-    Union[bool, int, float, str, None, List["JSONType"], Dict[str, "JSONType"]],  # type: ignore
-)
+JSONType = Union[
+    str, int, float, bool, List[Any], Dict[str, Any]
+]  # This is actually JSONType, but pydantic doesn't support TypeAlias yet
 class DataCatalog(BaseModel, extra="allow"):
@@ -62,10 +68,29 @@ The theory behind reduced:
 class JsonParameter(BaseModel):
     kind: Literal["json"]
-    value: JSONType  # type: ignore
+    value: JSONType
+    reduced: bool = True
+    @computed_field  # type: ignore
+    @property
+    def description(self) -> JSONType:
+        return self.value
+    def get_value(self) -> JSONType:
+        return self.value
+class MetricParameter(BaseModel):
+    kind: Literal["metric"]
+    value: JSONType
     reduced: bool = True
-    def get_value(self) -> JSONType:  # type: ignore
+    @computed_field  # type: ignore
+    @property
+    def description(self) -> JSONType:
+        return self.value
+    def get_value(self) -> JSONType:
         return self.value
@@ -100,7 +125,7 @@ class ObjectParameter(BaseModel):
         os.remove(self.file_name)  # Remove after loading
-Parameter = Annotated[Union[JsonParameter, ObjectParameter], Field(discriminator="kind")]
+Parameter = Annotated[Union[JsonParameter, ObjectParameter, MetricParameter], Field(discriminator="kind")]
 class StepAttempt(BaseModel):
@@ -115,6 +140,7 @@ class StepAttempt(BaseModel):
     message: str = ""
     input_parameters: Dict[str, Parameter] = Field(default_factory=dict)
     output_parameters: Dict[str, Parameter] = Field(default_factory=dict)
+    user_defined_metrics: Dict[str, Parameter] = Field(default_factory=dict)
     @property
     def duration(self):
@@ -149,10 +175,43 @@ class StepLog(BaseModel):
     mock: bool = False
     code_identities: List[CodeIdentity] = Field(default_factory=list)
     attempts: List[StepAttempt] = Field(default_factory=list)
-    user_defined_metrics: Dict[str, Any] = Field(default_factory=dict)
     branches: Dict[str, BranchLog] = Field(default_factory=dict)
     data_catalog: List[DataCatalog] = Field(default_factory=list)
+    def get_summary(self) -> Dict[str, Any]:
+        """
+        Summarize the step log to log
+        """
+        summary: Dict[str, Any] = {}
+        summary["Name"] = self.internal_name
+        summary["Input catalog content"] = [dc.name for dc in self.data_catalog if dc.stage == "get"]
+        summary["Available parameters"] = [
+            (p, v.description) for attempt in self.attempts for p, v in attempt.input_parameters.items()
+        ]
+        summary["Output catalog content"] = [dc.name for dc in self.data_catalog if dc.stage == "put"]
+        summary["Output parameters"] = [
+            (p, v.description) for attempt in self.attempts for p, v in attempt.output_parameters.items()
+        ]
+        summary["Metrics"] = [
+            (p, v.description) for attempt in self.attempts for p, v in attempt.user_defined_metrics.items()
+        ]
+        cis = []
+        for ci in self.code_identities:
+            message = f"{ci.code_identifier_type}:{ci.code_identifier}"
+            if not ci.code_identifier_dependable:
+                message += " but is not dependable"
+            cis.append(message)
+        summary["Code identities"] = cis
+        summary["status"] = self.status
+        return summary
     def get_data_catalogs_by_stage(self, stage="put") -> List[DataCatalog]:
         """
         Given a stage, return the data catalogs according to the stage
@@ -242,6 +301,22 @@ class RunLog(BaseModel):
     parameters: Dict[str, Parameter] = Field(default_factory=dict)
     run_config: Dict[str, Any] = Field(default_factory=dict)
+    def get_summary(self) -> Dict[str, Any]:
+        summary: Dict[str, Any] = {}
+        _context = context.run_context
+        summary["Unique execution id"] = self.run_id
+        summary["status"] = self.status
+        summary["Catalog Location"] = _context.catalog_handler.get_summary()
+        summary["Full Run log present at: "] = _context.run_log_store.get_summary()
+        summary["Final Parameters"] = {p: v.description for p, v in self.parameters.items()}
+        summary["Collected metrics"] = {p: v.description for p, v in self.parameters.items() if v.kind == "metric"}
+        return summary
     def get_data_catalogs_by_stage(self, stage: str = "put") -> List[DataCatalog]:
         """
         Return all the cataloged data by the stage at which they were cataloged.
@@ -360,6 +435,10 @@ class BaseRunLogStore(ABC, BaseModel):
     service_name: str = ""
     service_type: str = "run_log_store"
+    @abstractmethod
+    def get_summary(self) -> Dict[str, Any]:
+        ...
     @property
     def _context(self):
         return context.run_context
@@ -693,6 +772,11 @@ class BufferRunLogstore(BaseRunLogStore):
     service_name: str = "buffered"
     run_log: Optional[RunLog] = Field(default=None, exclude=True)  # For a buffered Run Log, this is the database
+    def get_summary(self) -> Dict[str, Any]:
+        summary = {"Type": self.service_name, "Location": "Not persisted"}
+        return summary
     def create_run_log(
         self,
         run_id: str,

runnable/defaults.py CHANGED Viewed

@@ -1,17 +1,10 @@
-# mypy: ignore-errors
-# The above should be done until https://github.com/python/mypy/issues/8823
 from enum import Enum
+from typing import TypedDict  # type: ignore[unused-ignore]
 from typing import Any, Dict, Mapping, Optional, Union
+from rich.style import Style
 from typing_extensions import TypeAlias
-# TODO: This is not the correct way to do this.
-try:  # pragma: no cover
-    from typing import TypedDict  # type: ignore[unused-ignore]
-except ImportError:  # pragma: no cover
-    from typing_extensions import TypedDict  # type: ignore[unused-ignore]
 NAME = "runnable"
 LOGGER_NAME = "runnable"
@@ -182,3 +175,10 @@ LOGGING_CONFIG = {
         LOGGER_NAME: {"handlers": ["runnable_handler"], "propagate": False},
     },
 }
+# styles
+error_style = Style(color="red", bold=True)
+warning_style = Style(color="yellow", bold=True)
+success_style = Style(color="green", bold=True)
+info_style = Style(color="blue", bold=True)

runnable/entrypoints.py CHANGED Viewed

@@ -5,10 +5,11 @@ import os
 import sys
 from typing import Optional, cast
-from rich import print
+from rich.progress import BarColumn, Progress, TextColumn, TimeElapsedColumn
+from rich.table import Column
 import runnable.context as context
-from runnable import defaults, graph, utils
+from runnable import console, defaults, graph, utils
 from runnable.defaults import RunnableConfig, ServiceConfig
 logger = logging.getLogger(defaults.LOGGER_NAME)
@@ -64,6 +65,8 @@ def prepare_configurations(
     configuration: RunnableConfig = cast(RunnableConfig, templated_configuration)
+    logger.info(f"Resolved configurations: {configuration}")
     # Run log settings, configuration over-rides everything
     run_log_config: Optional[ServiceConfig] = configuration.get("run_log_store", None)
     if not run_log_config:
@@ -86,14 +89,6 @@ def prepare_configurations(
     pickler_config = cast(ServiceConfig, runnable_defaults.get("pickler", defaults.DEFAULT_PICKLER))
     pickler_handler = utils.get_provider_by_name_and_type("pickler", pickler_config)
-    # experiment tracker settings, configuration over-rides everything
-    tracker_config: Optional[ServiceConfig] = configuration.get("experiment_tracker", None)
-    if not tracker_config:
-        tracker_config = cast(
-            ServiceConfig, runnable_defaults.get("experiment_tracker", defaults.DEFAULT_EXPERIMENT_TRACKER)
-        )
-    tracker_handler = utils.get_provider_by_name_and_type("experiment_tracker", tracker_config)
     # executor configurations, configuration over rides everything
     executor_config: Optional[ServiceConfig] = configuration.get("executor", None)
     if force_local_executor:
@@ -110,7 +105,6 @@ def prepare_configurations(
         catalog_handler=catalog_handler,
         secrets_handler=secrets_handler,
         pickler=pickler_handler,
-        experiment_tracker=tracker_handler,
         variables=variables,
         tag=tag,
         run_id=run_id,
@@ -176,8 +170,8 @@ def execute(
         tag=tag,
         parameters_file=parameters_file,
     )
-    print("Working with context:")
-    print(run_context)
+    console.print("Working with context:")
+    console.print(run_context)
     executor = run_context.executor
@@ -188,8 +182,28 @@ def execute(
     # Prepare for graph execution
     executor.prepare_for_graph_execution()
-    logger.info("Executing the graph")
-    executor.execute_graph(dag=run_context.dag)  # type: ignore
+    logger.info(f"Executing the graph: {run_context.dag}")
+    with Progress(
+        TextColumn("[progress.description]{task.description}", table_column=Column(ratio=2)),
+        BarColumn(table_column=Column(ratio=1), style="dark_orange"),
+        TimeElapsedColumn(table_column=Column(ratio=1)),
+        console=console,
+        expand=True,
+    ) as progress:
+        pipeline_execution_task = progress.add_task("[dark_orange] Starting execution .. ", total=1)
+        try:
+            run_context.progress = progress
+            executor.execute_graph(dag=run_context.dag)  # type: ignore
+            run_log = run_context.run_log_store.get_run_log_by_id(run_id=run_context.run_id, full=False)
+            if run_log.status == defaults.SUCCESS:
+                progress.update(pipeline_execution_task, description="[green] Success", completed=True)
+            else:
+                progress.update(pipeline_execution_task, description="[red] Failed", completed=True)
+        except Exception as e:  # noqa: E722
+            console.print(e, style=defaults.error_style)
+            progress.update(pipeline_execution_task, description="[red] Errored execution", completed=True)
     executor.send_return_code()
@@ -227,8 +241,8 @@ def execute_single_node(
         tag=tag,
         parameters_file=parameters_file,
     )
-    print("Working with context:")
-    print(run_context)
+    console.print("Working with context:")
+    console.print(run_context)
     executor = run_context.executor
     run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value
@@ -280,8 +294,8 @@ def execute_notebook(
     run_context.execution_plan = defaults.EXECUTION_PLAN.UNCHAINED.value
     utils.set_runnable_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag)
-    print("Working with context:")
-    print(run_context)
+    console.print("Working with context:")
+    console.print(run_context)
     step_config = {
         "command": notebook_file,
@@ -342,8 +356,8 @@ def execute_function(
     run_context.execution_plan = defaults.EXECUTION_PLAN.UNCHAINED.value
     utils.set_runnable_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag)
-    print("Working with context:")
-    print(run_context)
+    console.print("Working with context:")
+    console.print(run_context)
     # Prepare the graph with a single node
     step_config = {
@@ -411,8 +425,8 @@ def fan(
         tag=tag,
         parameters_file=parameters_file,
     )
-    print("Working with context:")
-    print(run_context)
+    console.print("Working with context:")
+    console.print(run_context)
     executor = run_context.executor
     run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value
@@ -437,4 +451,4 @@ def fan(
 if __name__ == "__main__":
     # This is only for perf testing purposes.
-    prepare_configurations(run_id="abc", pipeline_file="example/mocking.yaml")
+    prepare_configurations(run_id="abc", pipeline_file="examples/mocking.yaml")

runnable/exceptions.py CHANGED Viewed

@@ -92,3 +92,7 @@ class ExecutionFailedError(Exception):  # pragma: no cover
     def __init__(self, run_id: str):
         super().__init__()
         self.message = f"Execution failed for run id: {run_id}"
+class CommandCallError(Exception):  # pragma: no cover
+    "An exception during the call of the command"

runnable/extensions/catalog/file_system/implementation.py CHANGED Viewed

@@ -2,7 +2,7 @@ import logging
 import os
 import shutil
 from pathlib import Path
-from typing import List, Optional
+from typing import Any, Dict, List, Optional
 from runnable import defaults, utils
 from runnable.catalog import BaseCatalog
@@ -34,6 +34,13 @@ class FileSystemCatalog(BaseCatalog):
     def get_catalog_location(self):
         return self.catalog_location
+    def get_summary(self) -> Dict[str, Any]:
+        summary = {
+            "Catalog Location": self.get_catalog_location(),
+        }
+        return summary
     def get(self, name: str, run_id: str, compute_data_folder: str = "", **kwargs) -> List[DataCatalog]:
         """
         Get the file by matching glob pattern to the name

runnable 0.9.1__py3-none-any.whl → 0.11.0__py3-none-any.whl

runnable 0.9.1py3-none-any.whl → 0.11.0py3-none-any.whl