PyPI - runnable - Versions diffs - 0.50.0__py3-none-any.whl - Mend

runnable 0.50.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

extensions/README.md +0 -0
extensions/__init__.py +0 -0
extensions/catalog/README.md +0 -0
extensions/catalog/any_path.py +214 -0
extensions/catalog/file_system.py +52 -0
extensions/catalog/minio.py +72 -0
extensions/catalog/pyproject.toml +14 -0
extensions/catalog/s3.py +11 -0
extensions/job_executor/README.md +0 -0
extensions/job_executor/__init__.py +236 -0
extensions/job_executor/emulate.py +70 -0
extensions/job_executor/k8s.py +553 -0
extensions/job_executor/k8s_job_spec.yaml +37 -0
extensions/job_executor/local.py +35 -0
extensions/job_executor/local_container.py +161 -0
extensions/job_executor/pyproject.toml +16 -0
extensions/nodes/README.md +0 -0
extensions/nodes/__init__.py +0 -0
extensions/nodes/conditional.py +301 -0
extensions/nodes/fail.py +78 -0
extensions/nodes/loop.py +394 -0
extensions/nodes/map.py +477 -0
extensions/nodes/parallel.py +281 -0
extensions/nodes/pyproject.toml +15 -0
extensions/nodes/stub.py +93 -0
extensions/nodes/success.py +78 -0
extensions/nodes/task.py +156 -0
extensions/pipeline_executor/README.md +0 -0
extensions/pipeline_executor/__init__.py +871 -0
extensions/pipeline_executor/argo.py +1266 -0
extensions/pipeline_executor/emulate.py +119 -0
extensions/pipeline_executor/local.py +226 -0
extensions/pipeline_executor/local_container.py +369 -0
extensions/pipeline_executor/mocked.py +159 -0
extensions/pipeline_executor/pyproject.toml +16 -0
extensions/run_log_store/README.md +0 -0
extensions/run_log_store/__init__.py +0 -0
extensions/run_log_store/any_path.py +100 -0
extensions/run_log_store/chunked_fs.py +122 -0
extensions/run_log_store/chunked_minio.py +141 -0
extensions/run_log_store/file_system.py +91 -0
extensions/run_log_store/generic_chunked.py +549 -0
extensions/run_log_store/minio.py +114 -0
extensions/run_log_store/pyproject.toml +15 -0
extensions/secrets/README.md +0 -0
extensions/secrets/dotenv.py +62 -0
extensions/secrets/pyproject.toml +15 -0
runnable/__init__.py +108 -0
runnable/catalog.py +141 -0
runnable/cli.py +484 -0
runnable/context.py +730 -0
runnable/datastore.py +1058 -0
runnable/defaults.py +159 -0
runnable/entrypoints.py +390 -0
runnable/exceptions.py +137 -0
runnable/executor.py +561 -0
runnable/gantt.py +1646 -0
runnable/graph.py +501 -0
runnable/names.py +546 -0
runnable/nodes.py +593 -0
runnable/parameters.py +217 -0
runnable/pickler.py +96 -0
runnable/sdk.py +1277 -0
runnable/secrets.py +92 -0
runnable/tasks.py +1268 -0
runnable/telemetry.py +142 -0
runnable/utils.py +423 -0
runnable-0.50.0.dist-info/METADATA +189 -0
runnable-0.50.0.dist-info/RECORD +72 -0
runnable-0.50.0.dist-info/WHEEL +4 -0
runnable-0.50.0.dist-info/entry_points.txt +53 -0
runnable-0.50.0.dist-info/licenses/LICENSE +201 -0

runnable/defaults.py ADDED Viewed

@@ -0,0 +1,159 @@
+from typing import Any, Dict, Optional, OrderedDict, Union
+from pydantic import BaseModel, Field, field_validator
+from rich.style import Style
+from typing_extensions import TypeAlias
+NAME = "runnable"
+LOGGER_NAME = "runnable"
+# CLI settings
+LOG_LEVEL = "WARNING"
+MapVariableType: TypeAlias = Optional[Dict[str, Union[str, int, float]]]
+# Config file environment variable
+RUNNABLE_CONFIGURATION_FILE = "RUNNABLE_CONFIGURATION_FILE"
+RUNNABLE_RUN_TAG = "RUNNABLE_RUN_TAG"
+RUNNABLE_PARAMETERS_FILE = "RUNNABLE_PARAMETERS_FILE"
+# Interaction settings
+PARAMETER_PREFIX = "RUNNABLE_PRM_"
+MAP_VARIABLE = "RUNNABLE_MAP_VARIABLE"
+VARIABLE_PREFIX = "RUNNABLE_VAR_"
+ENV_RUN_ID = "RUNNABLE_RUN_ID"
+RETRY_RUN_ID = "RUNNABLE_RETRY_RUN_ID"
+RETRY_INDICATOR = "RUNNABLE_RETRY_INDICATOR"
+ATTEMPT_NUMBER = "RUNNABLE_STEP_ATTEMPT"
+class MapVariableModel(BaseModel):
+    value: Any
+    @field_validator("value")
+    @classmethod
+    def validate_json_serializable(cls, v):
+        """Ensure the value is JSON serializable"""
+        import json
+        try:
+            json.dumps(v)
+            return v
+        except (TypeError, ValueError) as e:
+            raise ValueError(f"Value must be JSON serializable: {e}") from e
+class LoopIndexModel(BaseModel):
+    value: int
+class IterableParameterModel(BaseModel):
+    # {i1: {value: v1}, i2: {value: v2}} where i1 is outside map and i2 is nested map
+    map_variable: OrderedDict[str, MapVariableModel] | None = Field(
+        default_factory=OrderedDict
+    )
+    # [ {value: v1}, {value: v2} ] for  index based iteration,
+    # i1 is outside loop and i2 is nested loop
+    loop_variable: list[LoopIndexModel] | None = Field(default_factory=lambda: [])
+## Generated pipeline file
+GENERATED_PIPELINE_FILE = "generated_pipeline.yaml"
+# STATUS progression
+# For Branch, CREATED -> PROCESSING -> SUCCESS OR FAIL
+# For a step, CREATED -> TRIGGERED ->  PROCESSING -> SUCCESS OR FAIL
+CREATED = "CREATED"
+PROCESSING = "PROCESSING"
+SUCCESS = "SUCCESS"
+FAIL = "FAIL"
+# Node and Command settings
+COMMAND_TYPE = "python"
+COMMAND_FRIENDLY_CHARACTER = "%"
+# Default services
+DEFAULT_SERVICES: dict[str, Any] = {
+    "pipeline_executor": {"type": "local", "config": {}},
+    "job_executor": {"type": "local", "config": {}},
+    "run_log_store": {"type": "file-system", "config": {}},
+    "catalog": {"type": "file-system", "config": {}},
+    "pickler": {"type": "pickle", "config": {}},
+    "secrets": {"type": "env-secrets", "config": {}},
+}
+# Map state
+MAP_PLACEHOLDER = "map_variable_placeholder"
+LOOP_PLACEHOLDER = "loop_variable_placeholder"
+# Dag node
+DAG_BRANCH_NAME = "dag"
+# RUN settings
+RANDOM_RUN_ID_LEN = 6
+MAX_TIME = 86400  # 1 day in seconds
+# User extensions
+USER_CONFIG_FILE = "runnable-config.yaml"
+# RUN log store settings
+LOG_LOCATION_FOLDER = ".run_log_store"
+# Dag node
+DAG_BRANCH_NAME = "dag"
+# Data catalog settings
+CATALOG_LOCATION_FOLDER = ".catalog"
+COMPUTE_DATA_FOLDER = "."
+# Secrets settings
+DOTENV_FILE_LOCATION = ".env"
+LEN_SHA_FOR_TAG = 8
+# JOB CONFIG
+DEFAULT_JOB_NAME = "job"
+## Logging settings
+LOGGING_CONFIG = {
+    "version": 1,
+    "disable_existing_loggers": True,
+    "formatters": {
+        "standard": {"format": "%(asctime)s [%(levelname)s] %(name)s: %(message)s"},
+        "runnable_formatter": {"format": "%(message)s", "datefmt": "[%X]"},
+    },
+    "handlers": {
+        "default": {
+            "formatter": "standard",
+            "class": "logging.StreamHandler",
+            "stream": "ext://sys.stdout",  # Default is stderr
+        },
+        "runnable_handler": {
+            "formatter": "runnable_formatter",
+            "class": "rich.logging.RichHandler",
+            "rich_tracebacks": True,
+        },
+    },
+    "loggers": {
+        "": {
+            "handlers": ["default"],
+            "propagate": True,
+        },  # Root logger
+        LOGGER_NAME: {"handlers": ["runnable_handler"], "propagate": False},
+    },
+}
+# styles
+error_style = Style(color="red", bold=True)
+warning_style = Style(color="yellow", bold=True)
+success_style = Style(color="green", bold=True)
+info_style = Style(color="blue", bold=True)
+# Hash computation settings
+HASH_ALGORITHM = "sha256"  # More secure and faster than MD5
+LARGE_FILE_THRESHOLD_BYTES = 1024 * 1024 * 1024  # 1GB
+HASH_CHUNK_SIZE = 1024 * 1024  # 1MB chunks for fingerprint hashing

runnable/entrypoints.py ADDED Viewed

@@ -0,0 +1,390 @@
+import json
+import logging
+from typing import Optional
+import runnable.context as context
+from runnable import console, defaults, graph, nodes
+from runnable.defaults import IterableParameterModel
+logger = logging.getLogger(defaults.LOGGER_NAME)
+def execute_pipeline_yaml_spec(
+    pipeline_file: str,
+    configuration_file: str = "",
+    tag: str = "",
+    run_id: str = "",
+    parameters_file: str = "",
+):
+    # pylint: disable=R0914,R0913
+    """
+    The entry point to runnable execution for any YAML based spec.
+    The result could:
+        - Execution of the pipeline if its local executor
+        - Rendering of the spec in the case of non local executor
+    """
+    service_configurations = context.ServiceConfigurations(
+        configuration_file=configuration_file,
+        execution_context=context.ExecutionContext.PIPELINE,
+    )
+    configurations = {
+        "pipeline_definition_file": pipeline_file,
+        "parameters_file": parameters_file,
+        "tag": tag,
+        "run_id": run_id,
+        "execution_mode": context.ExecutionMode.YAML,
+        "configuration_file": configuration_file,
+        **service_configurations.services,
+    }
+    logger.info("Resolved configurations:")
+    logger.info(json.dumps(configurations, indent=4))
+    run_context = context.PipelineContext.model_validate(configurations)
+    run_context.execute()
+    run_context.pipeline_executor.send_return_code()
+def execute_single_node(
+    configuration_file: str,
+    pipeline_file: str,
+    step_name: str,
+    iter_variable: str,
+    mode: str,
+    run_id: str,
+    tag: str = "",
+    parameters_file: str = "",
+):
+    """
+    This entry point is triggered during the execution of the pipeline
+        - non local execution environments
+    The mode defines how the pipeline spec is provided to the runnable
+        - yaml
+        - python
+    """
+    service_configurations = context.ServiceConfigurations(
+        configuration_file=configuration_file,
+        execution_context=context.ExecutionContext.PIPELINE,
+    )
+    configurations = {
+        "pipeline_definition_file": pipeline_file,
+        "parameters_file": parameters_file,
+        "tag": tag,
+        "run_id": run_id,
+        "execution_mode": mode,
+        "configuration_file": configuration_file,
+        **service_configurations.services,
+    }
+    logger.info("Resolved configurations:")
+    logger.info(json.dumps(configurations, indent=4))
+    run_context = context.PipelineContext.model_validate(configurations)
+    context.set_run_context(run_context)
+    assert run_context.dag
+    iteration_variable: Optional[IterableParameterModel] = None
+    if iter_variable:
+        iteration_variable = IterableParameterModel.model_validate_json(iter_variable)
+    step_internal_name = nodes.BaseNode._get_internal_name_from_command_name(step_name)
+    node_to_execute, _ = graph.search_node_by_internal_name(
+        run_context.dag, step_internal_name
+    )
+    logger.info("Executing the single node of : %s", node_to_execute)
+    run_context.pipeline_executor.execute_node(
+        node=node_to_execute, iter_variable=iteration_variable
+    )
+    # run_context.pipeline_executor.send_return_code()
+def execute_single_branch(
+    branch_name: str,
+    branch: graph.Graph,
+    run_context: context.PipelineContext,
+    iter_variable: str | None = None,
+):
+    """
+    Execute a single branch in a separate process for parallel execution.
+    This function is designed to be called by multiprocessing to execute
+    individual branches of parallel and map nodes.
+    Args:
+        branch_name (str): The name/identifier of the branch
+        branch (Graph): The graph object representing the branch to execute
+        run_context (PipelineContext): The pipeline execution context
+        map_variable (dict, optional): Map variables for the execution
+    """
+    # Set up branch-specific logging
+    _setup_branch_logging(branch_name)
+    logger.info(f"Executing single branch: {branch_name}")
+    try:
+        context.set_run_context(run_context)
+        # Convert to IterableParameterModel
+        iteration_variable: Optional[IterableParameterModel] = None
+        if iter_variable:
+            iteration_variable = IterableParameterModel.model_validate_json(
+                iter_variable
+            )
+        # Execute the branch using the pipeline executor
+        run_context.pipeline_executor.execute_graph(
+            branch, iter_variable=iteration_variable
+        )
+        logger.info(f"Branch {branch_name} completed successfully")
+        return True
+    except Exception as e:
+        logger.error(f"Branch {branch_name} failed with error: {e}")
+        return False
+def _setup_branch_logging(branch_name: str):
+    """
+    Set up branch-specific logging with prefixes to organize parallel execution logs.
+    Args:
+        branch_name (str): The name of the branch to use as a prefix
+    """
+    import logging
+    import sys
+    # Create a custom formatter that includes the branch name
+    class BranchFormatter(logging.Formatter):
+        def __init__(self, branch_name: str):
+            self.branch_name = branch_name
+            # Extract just the meaningful part of the branch name for cleaner display
+            self.display_name = self._get_display_name(branch_name)
+            super().__init__()
+        def _get_display_name(self, branch_name: str) -> str:
+            """Extract a clean display name from the full branch name."""
+            # For parallel branches like 'parallel_step.branch1', use 'branch1'
+            # For map branches like 'map_state.1', use 'iter:1'
+            if "." in branch_name:
+                parts = branch_name.split(".")
+                if len(parts) >= 2:
+                    last_part = parts[-1]
+                    # Check if it looks like a map iteration (numeric)
+                    if last_part.isdigit():
+                        return f"iter:{last_part}"
+                    else:
+                        return last_part
+            return branch_name
+        def format(self, record):
+            # Add branch prefix to the message
+            original_msg = record.getMessage()
+            record.msg = f"[{self.display_name}] {original_msg}"
+            record.args = ()
+            # Use a simple format for clarity
+            return f"{record.levelname}:{record.msg}"
+    # Get the root logger and add our custom formatter
+    root_logger = logging.getLogger()
+    # Remove existing handlers to avoid duplicate logs
+    for handler in root_logger.handlers[:]:
+        if hasattr(handler, "_branch_handler"):
+            root_logger.removeHandler(handler)
+    # Create a new handler with branch-specific formatting
+    handler = logging.StreamHandler(sys.stdout)
+    handler.setFormatter(BranchFormatter(branch_name))
+    handler._branch_handler = True  # type: ignore  # Mark it as our custom handler
+    handler.setLevel(logging.INFO)
+    # Add the handler to the root logger
+    root_logger.addHandler(handler)
+    root_logger.setLevel(logging.INFO)
+def execute_job_non_local(
+    job_definition_file: str,
+    configuration_file: str = "",
+    tag: str = "",
+    run_id: str = "",
+    parameters_file: str = "",
+):
+    service_configurations = context.ServiceConfigurations(
+        configuration_file=configuration_file,
+        execution_context=context.ExecutionContext.JOB,
+    )
+    configurations = {
+        "job_definition_file": job_definition_file,
+        "parameters_file": parameters_file,
+        "tag": tag,
+        "run_id": run_id,
+        "configuration_file": configuration_file,
+        **service_configurations.services,
+    }
+    logger.info("Resolved configurations:")
+    logger.info(json.dumps(configurations, indent=4))
+    run_context = context.JobContext.model_validate(configurations)
+    context.set_run_context(run_context)
+    assert run_context.job
+    logger.info("Executing the job in non-local mode")
+    logger.info("Job to execute: %s", run_context.job)
+    try:
+        run_context.job_executor.execute_job(
+            run_context.job,
+            catalog_settings=run_context.catalog_settings,
+        )
+    finally:
+        console.print("Job execution completed. Sending return code...")
+    run_context.job_executor.send_return_code()
+def fan(
+    configuration_file: str,
+    pipeline_file: str,
+    step_name: str,
+    mode: str,
+    in_or_out: str,
+    iter_variable: str,
+    run_id: str,
+    tag: str = "",
+    parameters_file: str = "",
+):
+    """
+    The entry point to either fan in or out for a composite node. Only 3rd party orchestrators should use this.
+    It should have similar set up of configurations to execute because orchestrator modes can initiate the execution.
+    Args:
+        configuration_file (str): The configuration file.
+        mode: in or out
+        step_name : The name of the step to execute in dot path convention
+        pipeline_file (str): The config/dag file
+        run_id (str): The run id of the run.
+        tag (str): If a tag is provided at the run time
+        parameters_file (str): The parameters being sent in to the application
+    """
+    service_configurations = context.ServiceConfigurations(
+        configuration_file=configuration_file,
+        execution_context=context.ExecutionContext.PIPELINE,
+    )
+    configurations = {
+        "pipeline_definition_file": pipeline_file,
+        "parameters_file": parameters_file,
+        "tag": tag,
+        "run_id": run_id,
+        "execution_mode": mode,
+        "configuration_file": configuration_file,
+        **service_configurations.services,
+    }
+    logger.info("Resolved configurations:")
+    logger.info(json.dumps(configurations, indent=4))
+    run_context = context.PipelineContext.model_validate(configurations)
+    context.set_run_context(run_context)
+    assert run_context.dag
+    step_internal_name = nodes.BaseNode._get_internal_name_from_command_name(step_name)
+    node_to_execute, _ = graph.search_node_by_internal_name(
+        run_context.dag, step_internal_name
+    )
+    iteration_variable: Optional[IterableParameterModel] = None
+    if iter_variable:
+        iteration_variable = IterableParameterModel.model_validate_json(iter_variable)
+    if in_or_out == "in":
+        logger.info("Fanning in for : %s", node_to_execute)
+        run_context.pipeline_executor.fan_in(
+            node=node_to_execute, iter_variable=iteration_variable
+        )
+    elif in_or_out == "out":
+        logger.info("Fanning out for : %s", node_to_execute)
+        run_context.pipeline_executor.fan_out(
+            node=node_to_execute, iter_variable=iteration_variable
+        )
+    else:
+        raise ValueError(f"Invalid mode {mode}")
+def retry_pipeline(
+    run_id: str,
+    configuration_file: str = "",
+    tag: str = "",
+):
+    """
+    Retry a failed pipeline run from the point of failure.
+    This entrypoint:
+    1. Loads the run log for the given run_id
+    2. Extracts pipeline_definition_file from run_config
+    3. Sets RUNNABLE_RETRY_RUN_ID env var
+    4. Re-executes the pipeline via context
+    Args:
+        run_id: The run_id of the failed run to retry
+        configuration_file: Optional config file (defaults to local execution)
+        tag: Optional tag for the retry run
+    """
+    import os
+    # Set up service configurations
+    service_configurations = context.ServiceConfigurations(
+        configuration_file=configuration_file,
+        execution_context=context.ExecutionContext.PIPELINE,
+    )
+    # Instantiate run log store to query the original run
+    run_log_store_config = service_configurations.services["run_log_store"]
+    store_instance = context.get_service_by_name(
+        "run_log_store", run_log_store_config, None
+    )
+    run_log = store_instance.get_run_log_by_id(run_id=run_id, full=False)
+    run_config = run_log.run_config
+    pipeline_definition_file = run_config.get("pipeline_definition_file", "")
+    if not pipeline_definition_file:
+        raise ValueError(f"No pipeline_definition_file found in run log for {run_id}")
+    logger.info(f"Retrying run {run_id}")
+    logger.info(f"Pipeline definition: {pipeline_definition_file}")
+    # Set the retry environment variable
+    os.environ[defaults.RETRY_RUN_ID] = run_id
+    # Create full pipeline context and execute
+    configurations = {
+        "pipeline_definition_file": pipeline_definition_file,
+        "parameters_file": "",
+        "tag": tag,
+        "run_id": run_id,
+        "execution_mode": context.ExecutionMode.PYTHON,
+        "configuration_file": configuration_file,
+        **service_configurations.services,
+    }
+    run_context = context.PipelineContext.model_validate(configurations)
+    context.set_run_context(run_context)
+    run_context.execute()
+    # run_context.pipeline_executor.send_return_code()
+if __name__ == "__main__":
+    # This is only for perf testing purposes.
+    # execute_single_branch()  # Missing required arguments
+    pass

runnable/exceptions.py ADDED Viewed

@@ -0,0 +1,137 @@
+class RunLogExistsError(Exception):  # pragma: no cover
+    """
+    Exception class
+    Args:
+        Exception ([type]): [description]
+    """
+    def __init__(self, run_id):
+        self.run_id = run_id
+        message = f"Run id for {run_id} is already found in the datastore"
+        super().__init__(message)
+class JobLogNotFoundError(Exception):
+    """
+    Exception class
+    Args:
+        Exception ([type]): [description]
+    """
+    def __init__(self, run_id):
+        self.run_id = run_id
+        message = f"Job for {run_id} is not found in the datastore"
+        super().__init__(message)
+class RunLogNotFoundError(Exception):  # pragma: no cover
+    """
+    Exception class
+    Args:
+        Exception ([type]): [description]
+    """
+    def __init__(self, run_id):
+        self.run_id = run_id
+        message = f"Run id for {run_id} is not found in the datastore"
+        super().__init__(message)
+class StepLogNotFoundError(Exception):  # pragma: no cover
+    """
+    Exception class
+    Args:
+        Exception ([type]): [description]
+    """
+    def __init__(self, run_id, step_name):
+        self.run_id = run_id
+        self.step_name = step_name
+        message = f"Step log for {step_name} is not found in the datastore for Run id: {run_id}"
+        super().__init__(message)
+class BranchLogNotFoundError(Exception):  # pragma: no cover
+    """
+    Exception class
+    Args:
+        Exception ([type]): [description]
+    """
+    def __init__(self, run_id, branch_name):
+        self.run_id = run_id
+        self.branch_name = branch_name
+        message = f"Branch log for {branch_name} is not found in the datastore for Run id: {run_id}"
+        super().__init__(message)
+class NodeNotFoundError(Exception):  # pragma: no cover
+    """
+    Exception class
+    Args:
+        Exception ([type]): [description]
+    """
+    def __init__(self, name):
+        self.name = name
+        message = f"Node of name {name} is not found the graph"
+        super().__init__(message)
+class BranchNotFoundError(Exception):  # pragma: no cover
+    """
+    Exception class
+    Args:
+        Exception ([type]): [description]
+    """
+    def __init__(self, name):
+        self.name = name
+        message = f"Branch of name {name} is not found the graph"
+        super().__init__(message)
+class NodeMethodCallError(Exception):
+    """
+    Exception class
+    """
+    def __init__(self, message):
+        super().__init__(message)
+class TerminalNodeError(Exception):  # pragma: no cover
+    def __init__(self):
+        message = "Terminal Nodes do not have next node"
+        super().__init__(message)
+class SecretNotFoundError(Exception):  # pragma: no cover
+    def __init__(self, secret_name, secret_setting):
+        self.secret_name = secret_name
+        self.secret_setting = secret_setting
+        message = f"No secret found by name:{secret_name} in {secret_setting}"
+        super().__init__(message)
+class ExecutionFailedError(Exception):  # pragma: no cover
+    def __init__(self, run_id: str):
+        self.run_id = run_id
+        message = f"Execution failed for run id: {run_id}"
+        super().__init__(message)
+class CommandCallError(Exception):  # pragma: no cover
+    "An exception during the call of the command"
+class RetryValidationError(Exception):
+    """Raised when retry validation fails (missing run log, DAG mismatch, etc.)"""
+    def __init__(self, message: str, run_id: str = ""):
+        self.run_id = run_id
+        super().__init__(message)
+class EntityNotFoundError(Exception):
+    pass