PyPI - runnable - Versions diffs - 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

runnable 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

runnable/__init__.py +34 -0
runnable/catalog.py +141 -0
runnable/cli.py +272 -0
runnable/context.py +34 -0
runnable/datastore.py +686 -0
runnable/defaults.py +179 -0
runnable/entrypoints.py +484 -0
runnable/exceptions.py +94 -0
runnable/executor.py +431 -0
runnable/experiment_tracker.py +139 -0
runnable/extensions/catalog/__init__.py +21 -0
runnable/extensions/catalog/file_system/__init__.py +0 -0
runnable/extensions/catalog/file_system/implementation.py +226 -0
runnable/extensions/catalog/k8s_pvc/__init__.py +0 -0
runnable/extensions/catalog/k8s_pvc/implementation.py +16 -0
runnable/extensions/catalog/k8s_pvc/integration.py +59 -0
runnable/extensions/executor/__init__.py +714 -0
runnable/extensions/executor/argo/__init__.py +0 -0
runnable/extensions/executor/argo/implementation.py +1182 -0
runnable/extensions/executor/argo/specification.yaml +51 -0
runnable/extensions/executor/k8s_job/__init__.py +0 -0
runnable/extensions/executor/k8s_job/implementation_FF.py +259 -0
runnable/extensions/executor/k8s_job/integration_FF.py +69 -0
runnable/extensions/executor/local/__init__.py +0 -0
runnable/extensions/executor/local/implementation.py +69 -0
runnable/extensions/executor/local_container/__init__.py +0 -0
runnable/extensions/executor/local_container/implementation.py +367 -0
runnable/extensions/executor/mocked/__init__.py +0 -0
runnable/extensions/executor/mocked/implementation.py +220 -0
runnable/extensions/experiment_tracker/__init__.py +0 -0
runnable/extensions/experiment_tracker/mlflow/__init__.py +0 -0
runnable/extensions/experiment_tracker/mlflow/implementation.py +94 -0
runnable/extensions/nodes.py +675 -0
runnable/extensions/run_log_store/__init__.py +0 -0
runnable/extensions/run_log_store/chunked_file_system/__init__.py +0 -0
runnable/extensions/run_log_store/chunked_file_system/implementation.py +106 -0
runnable/extensions/run_log_store/chunked_k8s_pvc/__init__.py +0 -0
runnable/extensions/run_log_store/chunked_k8s_pvc/implementation.py +21 -0
runnable/extensions/run_log_store/chunked_k8s_pvc/integration.py +61 -0
runnable/extensions/run_log_store/db/implementation_FF.py +157 -0
runnable/extensions/run_log_store/db/integration_FF.py +0 -0
runnable/extensions/run_log_store/file_system/__init__.py +0 -0
runnable/extensions/run_log_store/file_system/implementation.py +136 -0
runnable/extensions/run_log_store/generic_chunked.py +541 -0
runnable/extensions/run_log_store/k8s_pvc/__init__.py +0 -0
runnable/extensions/run_log_store/k8s_pvc/implementation.py +21 -0
runnable/extensions/run_log_store/k8s_pvc/integration.py +56 -0
runnable/extensions/secrets/__init__.py +0 -0
runnable/extensions/secrets/dotenv/__init__.py +0 -0
runnable/extensions/secrets/dotenv/implementation.py +100 -0
runnable/extensions/secrets/env_secrets/__init__.py +0 -0
runnable/extensions/secrets/env_secrets/implementation.py +42 -0
runnable/graph.py +464 -0
runnable/integration.py +205 -0
runnable/interaction.py +399 -0
runnable/names.py +546 -0
runnable/nodes.py +489 -0
runnable/parameters.py +183 -0
runnable/pickler.py +102 -0
runnable/sdk.py +470 -0
runnable/secrets.py +95 -0
runnable/tasks.py +392 -0
runnable/utils.py +630 -0
runnable-0.2.0.dist-info/METADATA +437 -0
runnable-0.2.0.dist-info/RECORD +69 -0
runnable-0.2.0.dist-info/entry_points.txt +44 -0
runnable-0.1.0.dist-info/METADATA +0 -16
runnable-0.1.0.dist-info/RECORD +0 -6
/runnable/{.gitkeep → extensions/__init__.py} +0 -0
{runnable-0.1.0.dist-info → runnable-0.2.0.dist-info}/LICENSE +0 -0
{runnable-0.1.0.dist-info → runnable-0.2.0.dist-info}/WHEEL +0 -0

runnable/integration.py ADDED Viewed

@@ -0,0 +1,205 @@
+import logging
+from stevedore import extension
+from runnable import defaults
+from runnable.executor import BaseExecutor
+logger = logging.getLogger(defaults.LOGGER_NAME)
+logging.getLogger("stevedore").setLevel(logging.CRITICAL)
+# --8<-- [start:docs]
+class BaseIntegration:
+    """
+    Base class for handling integration between Executor and one of Catalog, Secrets, RunLogStore.
+    """
+    executor_type = ""
+    service_type = ""  # One of secret, catalog, datastore, experiment tracker
+    service_provider = ""  # The actual implementation of the service
+    def __init__(self, executor: "BaseExecutor", integration_service: object):
+        self.executor = executor
+        self.service = integration_service
+    def validate(self, **kwargs):
+        """
+        Raise an exception if the executor_type is not compatible with service provider.
+        By default, it is considered as compatible.
+        """
+    def configure_for_traversal(self, **kwargs):
+        """
+        Do any changes needed to both executor and service provider during traversal of the graph.
+        By default, no change is required.
+        """
+    def configure_for_execution(self, **kwargs):
+        """
+        Do any changes needed to both executor and service provider during execution of a node.
+        By default, no change is required.
+        """
+# --8<-- [end:docs]
+def get_integration_handler(executor: "BaseExecutor", service: object) -> BaseIntegration:
+    """
+    Return the integration handler between executor and the service.
+    If none found to be implemented, return the BaseIntegration which does nothing.
+    Args:
+        executor (BaseExecutor): The executor
+        service (object): The service provider
+    Returns:
+        [BaseIntegration]: The implemented integration handler or BaseIntegration if none found
+    Raises:
+        Exception: If multiple integrations are found for the executor and service
+    """
+    service_type = service.service_type  # type: ignore
+    service_name = getattr(service, "service_name")
+    integrations = []
+    # Get all the integrations defined by the 3rd party in their pyproject.toml
+    mgr = extension.ExtensionManager(
+        namespace="integration",
+        invoke_on_load=True,
+        invoke_kwds={"executor": executor, "integration_service": service},
+    )
+    for _, kls in mgr.items():
+        if (
+            kls.obj.executor_type == executor.service_name
+            and kls.obj.service_type == service_type
+            and kls.obj.service_provider == service_name
+        ):
+            logger.info(f"Identified an integration pattern {kls.obj}")
+            integrations.append(kls.obj)
+    # Get all the implementations defined by the magnus package
+    for kls in BaseIntegration.__subclasses__():
+        # Match the exact service type
+        if kls.service_type == service_type and kls.service_provider == service_name:
+            # Match either all executor or specific ones provided
+            if kls.executor_type == "" or kls.executor_type == executor.service_name:
+                integrations.append(kls(executor=executor, integration_service=service))
+    if len(integrations) > 1:
+        msg = (
+            f"Multiple integrations between {executor.service_name} and {service_name} of type {service_type} found. "
+            "If you defined an integration pattern, please ensure it is specific and does not conflict with magnus "
+            " implementations."
+        )
+        logger.exception(msg)
+        raise Exception(msg)
+    if not integrations:
+        logger.warning(
+            f"Could not find an integration pattern for {executor.service_name} and {service_name} for {service_type}."
+            " This implies that there is no need to change the configurations."
+        )
+        return BaseIntegration(executor, service)
+    return integrations[0]
+def validate(executor: "BaseExecutor", service: object, **kwargs):
+    """
+    Helper function to resolve the Integration class and validate the compatibility between executor and service
+    Args:
+        executor (BaseExecutor) : The executor
+        service (object): The service provider
+    """
+    integration_handler = get_integration_handler(executor, service)
+    integration_handler.validate(**kwargs)
+def configure_for_traversal(executor: "BaseExecutor", service: object, **kwargs):
+    """
+    Helper function to resolve the Integration class and configure the executor and service for graph traversal
+    Args:
+        executor (BaseExecutor) : The executor
+        service (object): The service provider
+    """
+    integration_handler = get_integration_handler(executor, service)
+    integration_handler.configure_for_traversal(**kwargs)
+def configure_for_execution(executor: "BaseExecutor", service: object, **kwargs):
+    """
+    Helper function to resolve the Integration class and configure the executor and service for execution
+    Args:
+        executor (BaseExecutor) : The executor
+        service (object): The service provider
+    """
+    integration_handler = get_integration_handler(executor, service)
+    integration_handler.configure_for_execution(**kwargs)
+class BufferedRunLogStore(BaseIntegration):
+    """
+    Integration between any executor and buffered run log store
+    """
+    service_type = "run_log_store"  # One of secret, catalog, datastore
+    service_provider = "buffered"  # The actual implementation of the service
+    def validate(self, **kwargs):
+        if not self.executor.service_name == "local":
+            raise Exception("Buffered run log store is only supported for local executor")
+        msg = (
+            "Run log generated by buffered run log store are not persisted. "
+            "Re-running this run, in case of a failure, is not possible"
+        )
+        logger.warning(msg)
+class DoNothingCatalog(BaseIntegration):
+    """
+    Integration between any executor and do nothing catalog
+    """
+    service_type = "catalog"  # One of secret, catalog, datastore
+    service_provider = "do-nothing"  # The actual implementation of the service
+    def validate(self, **kwargs):
+        msg = "A do-nothing catalog does not hold any data and therefore cannot pass data between nodes."
+        logger.warning(msg)
+class DoNothingSecrets(BaseIntegration):
+    """
+    Integration between any executor and do nothing secrets
+    """
+    service_type = "secrets"  # One of secret, catalog, datastore
+    service_provider = "do-nothing"  # The actual implementation of the service
+    def validate(self, **kwargs):
+        msg = "A do-nothing secrets does not hold any secrets and therefore cannot return you any secrets."
+        logger.warning(msg)
+class DoNothingExperimentTracker(BaseIntegration):
+    """
+    Integration between any executor and do nothing experiment tracker
+    """
+    service_type = "experiment_tracker"  # One of secret, catalog, datastore
+    service_provider = "do-nothing"  # The actual implementation of the service
+    def validate(self, **kwargs):
+        msg = "A do-nothing experiment tracker does nothing and therefore cannot track anything."
+        logger.warning(msg)

runnable/interaction.py ADDED Viewed

@@ -0,0 +1,399 @@
+from __future__ import annotations
+import json
+import logging
+import os
+from functools import wraps
+from typing import Any, ContextManager, Dict, Optional, TypeVar, Union, cast, overload
+from pydantic import BaseModel
+import runnable.context as context
+from runnable import defaults, exceptions, parameters, pickler, utils
+from runnable.datastore import RunLog, StepLog
+logger = logging.getLogger(defaults.LOGGER_NAME)
+CastT = TypeVar("CastT")
+def check_context(func):
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        if not context.run_context.executor:
+            msg = (
+                "There are no active executor and services. This should not have happened and is a bug."
+                " Please raise a bug report."
+            )
+            raise Exception(msg)
+        result = func(*args, **kwargs)
+        return result
+    return wrapper
+@check_context
+def track_this(step: int = 0, **kwargs):
+    """
+    Tracks key-value pairs to the experiment tracker.
+    The value is dumped as a dict, by alias, if it is a pydantic model.
+    Args:
+        step (int, optional): The step to track the data at. Defaults to 0.
+        **kwargs (dict): The key-value pairs to track.
+    Examples:
+        >>> track_this(step=0, my_int_param=123, my_float_param=123.45, my_str_param='hello world')
+        >>> track_this(step=1, my_int_param=456, my_float_param=456.78, my_str_param='goodbye world')
+    """
+    prefix = defaults.TRACK_PREFIX
+    for key, value in kwargs.items():
+        logger.info(f"Tracking {key} with value: {value}")
+        if isinstance(value, BaseModel):
+            value = value.model_dump(by_alias=True)
+        os.environ[prefix + key + f"{defaults.STEP_INDICATOR}{step}"] = json.dumps(value)
+@check_context
+def set_parameter(**kwargs) -> None:
+    """
+    Store a set of parameters.
+    !!! note
+        The parameters are not stored in run log at this point in time.
+        They are collected now and stored in the run log after completion of the task.
+    Parameters:
+        **kwargs (dict): A dictionary of key-value pairs to store as parameters.
+    Returns:
+        None
+    Examples:
+        >>> set_parameter(my_int_param=123, my_float_param=123.45, my_bool_param=True, my_str_param='hello world')
+        >>> get_parameter('my_int_param', int)
+        123
+        >>> get_parameter('my_float_param', float)
+        123.45
+        >>> get_parameter('my_bool_param', bool)
+        True
+        >>> get_parameter('my_str_param', str)
+        'hello world'
+        >>> # Example of using Pydantic models
+        >>> class MyModel(BaseModel):
+        ...     field1: str
+        ...     field2: int
+        >>> set_parameter(my_model_param=MyModel(field1='value1', field2=2))
+        >>> get_parameter('my_model_param', MyModel)
+        MyModel(field1='value1', field2=2)
+    """
+    parameters.set_user_defined_params_as_environment_variables(kwargs)
+@overload
+def get_parameter(key: str, cast_as: Optional[CastT]) -> CastT:
+    ...
+@overload
+def get_parameter(cast_as: Optional[CastT]) -> CastT:
+    ...
+@check_context
+def get_parameter(key: Optional[str] = None, cast_as: Optional[CastT] = None) -> Union[Dict[str, Any], CastT]:
+    """
+    Get a parameter by its key.
+    If the key is not provided, all parameters will be returned.
+    cast_as is not required for JSON supported type (int, float, bool, str).
+    For complex nested parameters, cast_as could package them into a pydantic model.
+    If cast_as is not provided, the type will remain as dict for nested structures.
+    Note that the cast_as pydantic model is the class, not an instance.
+    Args:
+        key (str, optional): The key of the parameter to retrieve. If not provided, all parameters will be returned.
+        cast_as (Type, optional): The type to cast the parameter to. If not provided, the type will remain as it is
+            for simple data types (int, float, bool, str). For nested parameters, it would be a dict.
+    Raises:
+        Exception: If the parameter does not exist and key is not provided.
+        ValidationError: If the parameter cannot be cast as pydantic model, when cast_as is provided.
+    Examples:
+        >>> get_parameter('my_int_param', int)
+        123
+        >>> get_parameter('my_float_param', float)
+        123.45
+        >>> get_parameter('my_bool_param', bool)
+        True
+        >>> get_parameter('my_str_param', str)
+        'hello world'
+        >>> get_parameter('my_model_param', MyModel)
+        MyModel(field1='value1', field2=2)
+        >>> get_parameter(cast_as=MyModel)
+        MyModel(field1='value1', field2=2)
+    """
+    params = parameters.get_user_set_parameters(remove=False)
+    if not key:
+        # Return all parameters
+        return cast(CastT, parameters.cast_parameters_as_type(params, cast_as))  # type: ignore
+    if key not in params:
+        raise Exception(f"Parameter {key} is not set before")
+    # Return the parameter value, casted as asked.
+    return cast(CastT, parameters.cast_parameters_as_type(params[key], cast_as))  # type: ignore
+@check_context
+def get_secret(secret_name: str) -> str:
+    """
+    Retrieve a secret from the secret store.
+    Args:
+        secret_name (str): The name of the secret to retrieve.
+    Raises:
+        SecretNotFoundError: If the secret does not exist in the store.
+    Returns:
+        str: The secret value.
+    """
+    secrets_handler = context.run_context.secrets_handler
+    try:
+        return secrets_handler.get(name=secret_name)
+    except exceptions.SecretNotFoundError:
+        logger.exception(f"No secret by the name {secret_name} found in the store")
+        raise
+@check_context
+def get_from_catalog(name: str, destination_folder: str = ""):
+    """
+    Get data from the catalog.
+    The name can be a wildcard pattern following globing rules.
+    Args:
+        name (str): The name of the data catalog entry.
+        destination_folder (str, optional): The destination folder to download the data to.
+            If not provided, the default destination folder set in the catalog will be used.
+    """
+    if not destination_folder:
+        destination_folder = context.run_context.catalog_handler.compute_data_folder
+    data_catalog = context.run_context.catalog_handler.get(
+        name,
+        run_id=context.run_context.run_id,
+    )
+    if context.run_context.executor._context_step_log:
+        context.run_context.executor._context_step_log.add_data_catalogs(data_catalog)
+    else:
+        logger.warning("Step log context was not found during interaction! The step log will miss the record")
+@check_context
+def put_in_catalog(filepath: str):
+    """
+    Add a file or folder to the data catalog.
+    You can use wild cards following globing rules.
+    Args:
+        filepath (str): The path to the file or folder added to the catalog
+    """
+    data_catalog = context.run_context.catalog_handler.put(
+        filepath,
+        run_id=context.run_context.run_id,
+    )
+    if not data_catalog:
+        logger.warning(f"No catalog was done by the {filepath}")
+    if context.run_context.executor._context_step_log:
+        context.run_context.executor._context_step_log.add_data_catalogs(data_catalog)
+    else:
+        logger.warning("Step log context was not found during interaction! The step log will miss the record")
+@check_context
+def put_object(data: Any, name: str):
+    """
+    Serialize and store a python object in the data catalog.
+    This function behaves the same as `put_in_catalog`
+    but with python objects.
+    Args:
+        data (Any): The python data object to store.
+        name (str): The name to store it against.
+    """
+    native_pickler = pickler.NativePickler()
+    native_pickler.dump(data=data, path=name)
+    put_in_catalog(f"{name}{native_pickler.extension}")
+    # Remove the file
+    os.remove(f"{name}{native_pickler.extension}")
+@check_context
+def get_object(name: str) -> Any:
+    """
+    Retrieve and deserialize a python object from the data catalog.
+    This function behaves the same as `get_from_catalog` but with
+    python objects.
+    Returns:
+        Any : The object
+    """
+    native_pickler = pickler.NativePickler()
+    get_from_catalog(name=f"{name}{native_pickler.extension}", destination_folder=".")
+    try:
+        data = native_pickler.load(name)
+        # Remove the file
+        os.remove(f"{name}{native_pickler.extension}")
+        return data
+    except FileNotFoundError as e:
+        msg = f"No object by the name {name} has been put in the catalog before."
+        logger.exception(msg)
+        raise e
+@check_context
+def get_run_id() -> str:
+    """
+    Returns the run_id of the current run.
+    You can also access this from the environment variable `MAGNUS_RUN_ID`.
+    """
+    return context.run_context.run_id
+@check_context
+def get_run_log() -> RunLog:
+    """
+    Returns the run_log of the current run.
+    The return is a deep copy of the run log to prevent any modification.
+    """
+    return context.run_context.run_log_store.get_run_log_by_id(
+        context.run_context.run_id,
+        full=True,
+    ).copy(deep=True)
+@check_context
+def get_tag() -> str:
+    """
+    Returns the tag from the environment.
+    Returns:
+        str: The tag if provided for the run, otherwise None
+    """
+    return context.run_context.tag
+@check_context
+def get_experiment_tracker_context() -> ContextManager:
+    """
+    Return a context session of the experiment tracker.
+    You can start to use the context with the python ```with``` statement.
+    """
+    experiment_tracker = context.run_context.experiment_tracker
+    return experiment_tracker.client_context
+def start_interactive_session(run_id: str = "", config_file: str = "", tag: str = "", parameters_file: str = ""):
+    """
+    During interactive python coding, either via notebooks or ipython, you can start a magnus session by calling
+    this function. The executor would always be local executor as its interactive.
+    If this was called during a pipeline/function/notebook execution, it will be ignored.
+    Args:
+        run_id (str, optional): The run id to use. Defaults to "" and would be created if not provided.
+        config_file (str, optional): The configuration file to use. Defaults to "" and magnus defaults.
+        tag (str, optional): The tag to attach to the run. Defaults to "".
+        parameters_file (str, optional): The parameters file to use. Defaults to "".
+    """
+    from runnable import entrypoints, graph  # pylint: disable=import-outside-toplevel
+    if context.run_context.executor:
+        logger.warn("This is not an interactive session or a session has already been activated.")
+        return
+    run_id = utils.generate_run_id(run_id=run_id)
+    context.run_context = entrypoints.prepare_configurations(
+        configuration_file=config_file,
+        run_id=run_id,
+        tag=tag,
+        parameters_file=parameters_file,
+        force_local_executor=True,
+    )
+    executor = context.run_context.executor
+    utils.set_magnus_environment_variables(run_id=run_id, configuration_file=config_file, tag=tag)
+    context.run_context.execution_plan = defaults.EXECUTION_PLAN.INTERACTIVE.value
+    executor.prepare_for_graph_execution()
+    step_config = {
+        "command": "interactive",
+        "command_type": "python",
+        "type": "task",
+        "next": "success",
+    }
+    node = graph.create_node(name="interactive", step_config=step_config)
+    step_log = context.run_context.run_log_store.create_step_log("interactive", node._get_step_log_name())
+    executor.add_code_identities(node=node, step_log=step_log)
+    step_log.step_type = node.node_type
+    step_log.status = defaults.PROCESSING
+    executor._context_step_log = step_log
+def end_interactive_session():
+    """
+    Ends an interactive session.
+    Does nothing if the executor is not interactive.
+    """
+    if not context.run_context.executor:
+        logger.warn("There is no active session in play, doing nothing!")
+        return
+    if context.run_context.execution_plan != defaults.EXECUTION_PLAN.INTERACTIVE.value:
+        logger.warn("There is not an interactive session, doing nothing!")
+        return
+    tracked_data = utils.get_tracked_data()
+    set_parameters = parameters.get_user_set_parameters(remove=True)
+    step_log = cast(StepLog, context.run_context.executor._context_step_log)
+    step_log.user_defined_metrics = tracked_data
+    context.run_context.run_log_store.add_step_log(step_log, context.run_context.run_id)
+    context.run_context.run_log_store.set_parameters(context.run_context.run_id, set_parameters)
+    context.run_context.executor._context_step_log = None
+    context.run_context.execution_plan = ""
+    context.run_context.executor = None  # type: ignore

runnable 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

runnable 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl