PyPI - runnable - Versions diffs - 0.9.1__tar.gz → 0.11.0__tar.gz - Mend

runnable 0.9.1tar.gz → 0.11.0tar.gz

Files changed (69) hide show

{runnable-0.9.1 → runnable-0.11.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: runnable
-Version: 0.9.1
+Version: 0.11.0
 Summary: A Compute agnostic pipelining software
 Home-page: https://github.com/vijayvammi/runnable
 License: Apache-2.0

{runnable-0.9.1 → runnable-0.11.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "runnable"
-version = "0.9.1"
+version = "0.11.0"
 description = "A Compute agnostic pipelining software"
 authors = ["Vijay Vammi <mesanthu@gmail.com>"]
 license = "Apache-2.0"
@@ -32,6 +32,7 @@ mkdocs-section-index = "^0.3.5"
 mkdocstrings = { extras = ["python"], version = "^0.24.0" }
 nbconvert = "^7.13.1"
 mkdocs-click = "^0.8.1"
+tensorflow = "^2.16.1"
 [tool.poetry.group.binary.dependencies]
 pyinstaller = "^5.13.2"
@@ -46,6 +47,7 @@ pandas = "^2.2.1"
 numpy = "^1.26.4"
 scikit-learn = "^1.4.1.post1"
 en-core-web-sm = { url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1.tar.gz" }
+matplotlib = "^3.8.3"
 [tool.poetry.extras]
 docker = ['docker']
@@ -95,10 +97,6 @@ runnable = 'runnable.cli:cli'
 "file-system" = "runnable.extensions.run_log_store.file_system.implementation:FileSystemRunLogstore"
 "chunked-fs" = "runnable.extensions.run_log_store.chunked_file_system.implementation:ChunkedFileSystemRunLogStore"
-# Plugins for Experiment tracker
-[tool.poetry.plugins."experiment_tracker"]
-"do-nothing" = "runnable.experiment_tracker:DoNothingTracker"
-"mlflow" = "runnable.extensions.experiment_tracker.mlflow.implementation:MLFlowExperimentTracker"
 # Plugins for Pickler
 [tool.poetry.plugins."pickler"]

{runnable-0.9.1 → runnable-0.11.0}/runnable/__init__.py RENAMED Viewed

@@ -4,26 +4,30 @@
 import logging
 from logging.config import dictConfig
+from rich.console import Console
 from runnable import defaults
 dictConfig(defaults.LOGGING_CONFIG)
 logger = logging.getLogger(defaults.LOGGER_NAME)
+console = Console()
+console.print(":runner: Lets go!!")
-from runnable.sdk import (
-    Stub,
-    Pipeline,
-    Parallel,
-    Map,
+from runnable.sdk import (  # noqa
     Catalog,
-    Success,
     Fail,
-    PythonTask,
+    Map,
     NotebookTask,
+    Parallel,
+    Pipeline,
+    PythonTask,
     ShellTask,
+    Stub,
+    Success,
+    metric,
     pickled,
-)  # noqa
+)
 # TODO: Think of model registry as a central place to store models.
 # TODO: Implement Sagemaker pipelines as a executor.

{runnable-0.9.1 → runnable-0.11.0}/runnable/catalog.py RENAMED Viewed

@@ -1,6 +1,6 @@
 import logging
 from abc import ABC, abstractmethod
-from typing import List, Optional
+from typing import Any, Dict, List, Optional
 from pydantic import BaseModel, ConfigDict
@@ -25,6 +25,10 @@ class BaseCatalog(ABC, BaseModel):
     service_type: str = "catalog"
     model_config = ConfigDict(extra="forbid")
+    @abstractmethod
+    def get_summary(self) -> Dict[str, Any]:
+        ...
     @property
     def _context(self):
         return context.run_context
@@ -112,6 +116,9 @@ class DoNothingCatalog(BaseCatalog):
     service_name: str = "do-nothing"
+    def get_summary(self) -> Dict[str, Any]:
+        return {}
     def get(self, name: str, run_id: str, compute_data_folder: str = "", **kwargs) -> List[DataCatalog]:
         """
         Does nothing

{runnable-0.9.1 → runnable-0.11.0}/runnable/cli.py RENAMED Viewed

@@ -60,6 +60,7 @@ def execute(file, config_file, parameters_file, log_level, tag, run_id):  # prag
                                     provided
     """
     logger.setLevel(log_level)
     entrypoints.execute(
         configuration_file=config_file,
         pipeline_file=file,

{runnable-0.9.1 → runnable-0.11.0}/runnable/context.py RENAMED Viewed

@@ -1,11 +1,11 @@
 from typing import Dict, Optional
-from pydantic import BaseModel, SerializeAsAny
+from pydantic import BaseModel, ConfigDict, Field, SerializeAsAny
+from rich.progress import Progress
 from runnable.catalog import BaseCatalog
 from runnable.datastore import BaseRunLogStore
 from runnable.executor import BaseExecutor
-from runnable.experiment_tracker import BaseExperimentTracker
 from runnable.graph import Graph
 from runnable.pickler import BasePickler
 from runnable.secrets import BaseSecrets
@@ -16,8 +16,10 @@ class Context(BaseModel):
     run_log_store: SerializeAsAny[BaseRunLogStore]
     secrets_handler: SerializeAsAny[BaseSecrets]
     catalog_handler: SerializeAsAny[BaseCatalog]
-    experiment_tracker: SerializeAsAny[BaseExperimentTracker]
     pickler: SerializeAsAny[BasePickler]
+    progress: SerializeAsAny[Optional[Progress]] = Field(default=None, exclude=True)
+    model_config = ConfigDict(arbitrary_types_allowed=True)
     pipeline_file: Optional[str] = ""
     parameters_file: Optional[str] = ""

{runnable-0.9.1 → runnable-0.11.0}/runnable/datastore.py RENAMED Viewed

@@ -4,23 +4,29 @@ import logging
 import os
 from abc import ABC, abstractmethod
 from datetime import datetime
-from typing import Annotated, Any, Dict, List, Literal, Optional, OrderedDict, Tuple, Union
+from typing import (
+    Annotated,
+    Any,
+    Dict,
+    List,
+    Literal,
+    Optional,
+    OrderedDict,
+    Tuple,
+    Union,
+)
 from pydantic import BaseModel, Field, computed_field
-from typing_extensions import TypeAliasType
 import runnable.context as context
 from runnable import defaults, exceptions
 logger = logging.getLogger(defaults.LOGGER_NAME)
-# Once defined these classes are sealed to any additions unless a default is provided
-# Breaking this rule might make runnable backwardly incompatible
-JSONType = TypeAliasType(
-    "JSONType",
-    Union[bool, int, float, str, None, List["JSONType"], Dict[str, "JSONType"]],  # type: ignore
-)
+JSONType = Union[
+    str, int, float, bool, List[Any], Dict[str, Any]
+]  # This is actually JSONType, but pydantic doesn't support TypeAlias yet
 class DataCatalog(BaseModel, extra="allow"):
@@ -62,10 +68,29 @@ The theory behind reduced:
 class JsonParameter(BaseModel):
     kind: Literal["json"]
-    value: JSONType  # type: ignore
+    value: JSONType
+    reduced: bool = True
+    @computed_field  # type: ignore
+    @property
+    def description(self) -> JSONType:
+        return self.value
+    def get_value(self) -> JSONType:
+        return self.value
+class MetricParameter(BaseModel):
+    kind: Literal["metric"]
+    value: JSONType
     reduced: bool = True
-    def get_value(self) -> JSONType:  # type: ignore
+    @computed_field  # type: ignore
+    @property
+    def description(self) -> JSONType:
+        return self.value
+    def get_value(self) -> JSONType:
         return self.value
@@ -100,7 +125,7 @@ class ObjectParameter(BaseModel):
         os.remove(self.file_name)  # Remove after loading
-Parameter = Annotated[Union[JsonParameter, ObjectParameter], Field(discriminator="kind")]
+Parameter = Annotated[Union[JsonParameter, ObjectParameter, MetricParameter], Field(discriminator="kind")]
 class StepAttempt(BaseModel):
@@ -115,6 +140,7 @@ class StepAttempt(BaseModel):
     message: str = ""
     input_parameters: Dict[str, Parameter] = Field(default_factory=dict)
     output_parameters: Dict[str, Parameter] = Field(default_factory=dict)
+    user_defined_metrics: Dict[str, Parameter] = Field(default_factory=dict)
     @property
     def duration(self):
@@ -149,10 +175,43 @@ class StepLog(BaseModel):
     mock: bool = False
     code_identities: List[CodeIdentity] = Field(default_factory=list)
     attempts: List[StepAttempt] = Field(default_factory=list)
-    user_defined_metrics: Dict[str, Any] = Field(default_factory=dict)
     branches: Dict[str, BranchLog] = Field(default_factory=dict)
     data_catalog: List[DataCatalog] = Field(default_factory=list)
+    def get_summary(self) -> Dict[str, Any]:
+        """
+        Summarize the step log to log
+        """
+        summary: Dict[str, Any] = {}
+        summary["Name"] = self.internal_name
+        summary["Input catalog content"] = [dc.name for dc in self.data_catalog if dc.stage == "get"]
+        summary["Available parameters"] = [
+            (p, v.description) for attempt in self.attempts for p, v in attempt.input_parameters.items()
+        ]
+        summary["Output catalog content"] = [dc.name for dc in self.data_catalog if dc.stage == "put"]
+        summary["Output parameters"] = [
+            (p, v.description) for attempt in self.attempts for p, v in attempt.output_parameters.items()
+        ]
+        summary["Metrics"] = [
+            (p, v.description) for attempt in self.attempts for p, v in attempt.user_defined_metrics.items()
+        ]
+        cis = []
+        for ci in self.code_identities:
+            message = f"{ci.code_identifier_type}:{ci.code_identifier}"
+            if not ci.code_identifier_dependable:
+                message += " but is not dependable"
+            cis.append(message)
+        summary["Code identities"] = cis
+        summary["status"] = self.status
+        return summary
     def get_data_catalogs_by_stage(self, stage="put") -> List[DataCatalog]:
         """
         Given a stage, return the data catalogs according to the stage
@@ -242,6 +301,22 @@ class RunLog(BaseModel):
     parameters: Dict[str, Parameter] = Field(default_factory=dict)
     run_config: Dict[str, Any] = Field(default_factory=dict)
+    def get_summary(self) -> Dict[str, Any]:
+        summary: Dict[str, Any] = {}
+        _context = context.run_context
+        summary["Unique execution id"] = self.run_id
+        summary["status"] = self.status
+        summary["Catalog Location"] = _context.catalog_handler.get_summary()
+        summary["Full Run log present at: "] = _context.run_log_store.get_summary()
+        summary["Final Parameters"] = {p: v.description for p, v in self.parameters.items()}
+        summary["Collected metrics"] = {p: v.description for p, v in self.parameters.items() if v.kind == "metric"}
+        return summary
     def get_data_catalogs_by_stage(self, stage: str = "put") -> List[DataCatalog]:
         """
         Return all the cataloged data by the stage at which they were cataloged.
@@ -360,6 +435,10 @@ class BaseRunLogStore(ABC, BaseModel):
     service_name: str = ""
     service_type: str = "run_log_store"
+    @abstractmethod
+    def get_summary(self) -> Dict[str, Any]:
+        ...
     @property
     def _context(self):
         return context.run_context
@@ -693,6 +772,11 @@ class BufferRunLogstore(BaseRunLogStore):
     service_name: str = "buffered"
     run_log: Optional[RunLog] = Field(default=None, exclude=True)  # For a buffered Run Log, this is the database
+    def get_summary(self) -> Dict[str, Any]:
+        summary = {"Type": self.service_name, "Location": "Not persisted"}
+        return summary
     def create_run_log(
         self,
         run_id: str,

{runnable-0.9.1 → runnable-0.11.0}/runnable/defaults.py RENAMED Viewed

@@ -1,17 +1,10 @@
-# mypy: ignore-errors
-# The above should be done until https://github.com/python/mypy/issues/8823
 from enum import Enum
+from typing import TypedDict  # type: ignore[unused-ignore]
 from typing import Any, Dict, Mapping, Optional, Union
+from rich.style import Style
 from typing_extensions import TypeAlias
-# TODO: This is not the correct way to do this.
-try:  # pragma: no cover
-    from typing import TypedDict  # type: ignore[unused-ignore]
-except ImportError:  # pragma: no cover
-    from typing_extensions import TypedDict  # type: ignore[unused-ignore]
 NAME = "runnable"
 LOGGER_NAME = "runnable"
@@ -182,3 +175,10 @@ LOGGING_CONFIG = {
         LOGGER_NAME: {"handlers": ["runnable_handler"], "propagate": False},
     },
 }
+# styles
+error_style = Style(color="red", bold=True)
+warning_style = Style(color="yellow", bold=True)
+success_style = Style(color="green", bold=True)
+info_style = Style(color="blue", bold=True)

{runnable-0.9.1 → runnable-0.11.0}/runnable/entrypoints.py RENAMED Viewed

@@ -5,10 +5,11 @@ import os
 import sys
 from typing import Optional, cast
-from rich import print
+from rich.progress import BarColumn, Progress, TextColumn, TimeElapsedColumn
+from rich.table import Column
 import runnable.context as context
-from runnable import defaults, graph, utils
+from runnable import console, defaults, graph, utils
 from runnable.defaults import RunnableConfig, ServiceConfig
 logger = logging.getLogger(defaults.LOGGER_NAME)
@@ -64,6 +65,8 @@ def prepare_configurations(
     configuration: RunnableConfig = cast(RunnableConfig, templated_configuration)
+    logger.info(f"Resolved configurations: {configuration}")
     # Run log settings, configuration over-rides everything
     run_log_config: Optional[ServiceConfig] = configuration.get("run_log_store", None)
     if not run_log_config:
@@ -86,14 +89,6 @@ def prepare_configurations(
     pickler_config = cast(ServiceConfig, runnable_defaults.get("pickler", defaults.DEFAULT_PICKLER))
     pickler_handler = utils.get_provider_by_name_and_type("pickler", pickler_config)
-    # experiment tracker settings, configuration over-rides everything
-    tracker_config: Optional[ServiceConfig] = configuration.get("experiment_tracker", None)
-    if not tracker_config:
-        tracker_config = cast(
-            ServiceConfig, runnable_defaults.get("experiment_tracker", defaults.DEFAULT_EXPERIMENT_TRACKER)
-        )
-    tracker_handler = utils.get_provider_by_name_and_type("experiment_tracker", tracker_config)
     # executor configurations, configuration over rides everything
     executor_config: Optional[ServiceConfig] = configuration.get("executor", None)
     if force_local_executor:
@@ -110,7 +105,6 @@ def prepare_configurations(
         catalog_handler=catalog_handler,
         secrets_handler=secrets_handler,
         pickler=pickler_handler,
-        experiment_tracker=tracker_handler,
         variables=variables,
         tag=tag,
         run_id=run_id,
@@ -176,8 +170,8 @@ def execute(
         tag=tag,
         parameters_file=parameters_file,
     )
-    print("Working with context:")
-    print(run_context)
+    console.print("Working with context:")
+    console.print(run_context)
     executor = run_context.executor
@@ -188,8 +182,28 @@ def execute(
     # Prepare for graph execution
     executor.prepare_for_graph_execution()
-    logger.info("Executing the graph")
-    executor.execute_graph(dag=run_context.dag)  # type: ignore
+    logger.info(f"Executing the graph: {run_context.dag}")
+    with Progress(
+        TextColumn("[progress.description]{task.description}", table_column=Column(ratio=2)),
+        BarColumn(table_column=Column(ratio=1), style="dark_orange"),
+        TimeElapsedColumn(table_column=Column(ratio=1)),
+        console=console,
+        expand=True,
+    ) as progress:
+        pipeline_execution_task = progress.add_task("[dark_orange] Starting execution .. ", total=1)
+        try:
+            run_context.progress = progress
+            executor.execute_graph(dag=run_context.dag)  # type: ignore
+            run_log = run_context.run_log_store.get_run_log_by_id(run_id=run_context.run_id, full=False)
+            if run_log.status == defaults.SUCCESS:
+                progress.update(pipeline_execution_task, description="[green] Success", completed=True)
+            else:
+                progress.update(pipeline_execution_task, description="[red] Failed", completed=True)
+        except Exception as e:  # noqa: E722
+            console.print(e, style=defaults.error_style)
+            progress.update(pipeline_execution_task, description="[red] Errored execution", completed=True)
     executor.send_return_code()
@@ -227,8 +241,8 @@ def execute_single_node(
         tag=tag,
         parameters_file=parameters_file,
     )
-    print("Working with context:")
-    print(run_context)
+    console.print("Working with context:")
+    console.print(run_context)
     executor = run_context.executor
     run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value
@@ -280,8 +294,8 @@ def execute_notebook(
     run_context.execution_plan = defaults.EXECUTION_PLAN.UNCHAINED.value
     utils.set_runnable_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag)
-    print("Working with context:")
-    print(run_context)
+    console.print("Working with context:")
+    console.print(run_context)
     step_config = {
         "command": notebook_file,
@@ -342,8 +356,8 @@ def execute_function(
     run_context.execution_plan = defaults.EXECUTION_PLAN.UNCHAINED.value
     utils.set_runnable_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag)
-    print("Working with context:")
-    print(run_context)
+    console.print("Working with context:")
+    console.print(run_context)
     # Prepare the graph with a single node
     step_config = {
@@ -411,8 +425,8 @@ def fan(
         tag=tag,
         parameters_file=parameters_file,
     )
-    print("Working with context:")
-    print(run_context)
+    console.print("Working with context:")
+    console.print(run_context)
     executor = run_context.executor
     run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value
@@ -437,4 +451,4 @@ def fan(
 if __name__ == "__main__":
     # This is only for perf testing purposes.
-    prepare_configurations(run_id="abc", pipeline_file="example/mocking.yaml")
+    prepare_configurations(run_id="abc", pipeline_file="examples/mocking.yaml")

{runnable-0.9.1 → runnable-0.11.0}/runnable/exceptions.py RENAMED Viewed

@@ -92,3 +92,7 @@ class ExecutionFailedError(Exception):  # pragma: no cover
     def __init__(self, run_id: str):
         super().__init__()
         self.message = f"Execution failed for run id: {run_id}"
+class CommandCallError(Exception):  # pragma: no cover
+    "An exception during the call of the command"

{runnable-0.9.1 → runnable-0.11.0}/runnable/extensions/catalog/file_system/implementation.py RENAMED Viewed

@@ -2,7 +2,7 @@ import logging
 import os
 import shutil
 from pathlib import Path
-from typing import List, Optional
+from typing import Any, Dict, List, Optional
 from runnable import defaults, utils
 from runnable.catalog import BaseCatalog
@@ -34,6 +34,13 @@ class FileSystemCatalog(BaseCatalog):
     def get_catalog_location(self):
         return self.catalog_location
+    def get_summary(self) -> Dict[str, Any]:
+        summary = {
+            "Catalog Location": self.get_catalog_location(),
+        }
+        return summary
     def get(self, name: str, run_id: str, compute_data_folder: str = "", **kwargs) -> List[DataCatalog]:
         """
         Get the file by matching glob pattern to the name

runnable 0.9.1__tar.gz → 0.11.0__tar.gz

runnable 0.9.1tar.gz → 0.11.0tar.gz