PyPI - runnable - Versions diffs - 0.31.0__py3-none-any.whl → 0.32.0__py3-none-any.whl - Mend

runnable 0.31.0py3-none-any.whl → 0.32.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

extensions/nodes/torch.py +71 -9
extensions/nodes/torch_config.py +15 -35
extensions/tasks/torch.py +235 -0
extensions/tasks/torch_config.py +76 -0
runnable/__init__.py +2 -1
runnable/entrypoints.py +1 -0
runnable/sdk.py +50 -50
runnable/tasks.py +3 -3
{runnable-0.31.0.dist-info → runnable-0.32.0.dist-info}/METADATA +2 -1
{runnable-0.31.0.dist-info → runnable-0.32.0.dist-info}/RECORD +13 -11
{runnable-0.31.0.dist-info → runnable-0.32.0.dist-info}/entry_points.txt +1 -0
{runnable-0.31.0.dist-info → runnable-0.32.0.dist-info}/WHEEL +0 -0
{runnable-0.31.0.dist-info → runnable-0.32.0.dist-info}/licenses/LICENSE +0 -0

extensions/nodes/torch.py CHANGED Viewed

@@ -4,11 +4,12 @@ import os
 import random
 import string
 from datetime import datetime
-from typing import Any, Callable
+from pathlib import Path
+from typing import Any, Callable, Optional
-from pydantic import ConfigDict, Field
+from pydantic import BaseModel, ConfigDict, Field, field_serializer
-from extensions.nodes.torch_config import EasyTorchConfig, InternalLogSpecs, TorchConfig
+from extensions.nodes.torch_config import EasyTorchConfig, TorchConfig
 from runnable import PythonJob, datastore, defaults
 from runnable.datastore import StepLog
 from runnable.nodes import DistributedNode
@@ -18,7 +19,7 @@ from runnable.utils import TypeMapVariable
 logger = logging.getLogger(defaults.LOGGER_NAME)
 try:
-    from torch.distributed.elastic.multiprocessing.api import DefaultLogsSpecs
+    from torch.distributed.elastic.multiprocessing.api import DefaultLogsSpecs, Std
     from torch.distributed.launcher.api import LaunchConfig, elastic_launch
 except ImportError:
@@ -28,9 +29,30 @@ print("torch is installed")
 def training_subprocess():
+    """
+    This function is called by the torch.distributed.launcher.api.elastic_launch
+    It happens in a subprocess and is responsible for executing the user's function
+    It is unrelated to the actual node execution, so any cataloging, run_log_store should be
+    handled to match to main process.
+    We have these variables to use:
+    os.environ["RUNNABLE_TORCH_COMMAND"] = self.executable.command
+    os.environ["RUNNABLE_TORCH_PARAMETERS_FILES"] = (
+        self._context.parameters_file or ""
+    )
+    os.environ["RUNNABLE_TORCH_RUN_ID"] = self._context.run_id
+    os.environ["RUNNABLE_TORCH_COPY_CONTENTS_TO"] = (
+        self._context.catalog_handler.compute_data_folder
+    )
+    os.environ["RUNNABLE_TORCH_TORCH_LOGS"] = self.log_dir or ""
+    """
     command = os.environ.get("RUNNABLE_TORCH_COMMAND")
     run_id = os.environ.get("RUNNABLE_TORCH_RUN_ID", "")
     parameters_files = os.environ.get("RUNNABLE_TORCH_PARAMETERS_FILES", "")
     process_run_id = (
         run_id
         + "-"
@@ -38,10 +60,14 @@ def training_subprocess():
         + "-"
         + "".join(random.choices(string.ascii_lowercase, k=3))
     )
+    os.environ["TORCH_DISTRIBUTED_DEBUG"] = "DETAIL"
     delete_env_vars_with_prefix("RUNNABLE_")
     func = get_callable_from_dotted_path(command)
+    # The job runs with the default configuration
+    # ALl the execution logs are stored in .catalog
     job = PythonJob(function=func)
     job.execute(
@@ -57,6 +83,7 @@ def training_subprocess():
         raise Exception(f"Job {process_run_id} failed")
+# TODO: Can this be utils.get_module_and_attr_names
 def get_callable_from_dotted_path(dotted_path) -> Callable:
     try:
         # Split the path into module path and callable object
@@ -91,6 +118,7 @@ def delete_env_vars_with_prefix(prefix):
         del os.environ[var]
+# TODO: The design of this class is not final
 class TorchNode(DistributedNode, TorchConfig):
     node_type: str = Field(default="torch", serialization_alias="type")
     executable: PythonTaskType = Field(exclude=True)
@@ -131,15 +159,15 @@ class TorchNode(DistributedNode, TorchConfig):
             )
         )
-        laugch_config = LaunchConfig(
+        launch_config = LaunchConfig(
             **easy_torch_config.model_dump(
                 exclude_none=True,
             ),
             logs_specs=log_spec,
             run_id=self._context.run_id,
         )
-        print(laugch_config)
-        return laugch_config
+        logger.info(f"launch_config: {launch_config}")
+        return launch_config
     def execute(
         self,
@@ -159,13 +187,13 @@ class TorchNode(DistributedNode, TorchConfig):
         launch_config = self.get_launch_config()
         logger.info(f"launch_config: {launch_config}")
+        # ENV variables are shared with the subprocess, use that as communication
         os.environ["RUNNABLE_TORCH_COMMAND"] = self.executable.command
         os.environ["RUNNABLE_TORCH_PARAMETERS_FILES"] = (
             self._context.parameters_file or ""
         )
         os.environ["RUNNABLE_TORCH_RUN_ID"] = self._context.run_id
-        # retrieve the master address and port from the parameters
-        # default to localhost and 29500
         launcher = elastic_launch(
             launch_config,
             training_subprocess,
@@ -186,6 +214,20 @@ class TorchNode(DistributedNode, TorchConfig):
                 attempt_number=attempt_number,
             )
             logger.error(f"Error executing TorchNode: {e}")
+        finally:
+            # This can only come from the subprocess
+            if Path(".catalog").exists():
+                os.rename(".catalog", "proc_logs")
+                # Move .catalog and torch_logs to the parent node's catalog location
+                self._context.catalog_handler.put(
+                    "proc_logs/**/*", allow_file_not_found_exc=True
+                )
+            # TODO: This is not working!!
+            if self.log_dir:
+                self._context.catalog_handler.put(
+                    self.log_dir + "/**/*", allow_file_not_found_exc=True
+                )
         delete_env_vars_with_prefix("RUNNABLE_TORCH")
@@ -211,3 +253,23 @@ class TorchNode(DistributedNode, TorchConfig):
         assert (
             map_variable is None or not map_variable
         ), "TorchNode does not support map_variable"
+# This internal model makes it easier to extract the required fields
+# of log specs from user specification.
+# https://github.com/pytorch/pytorch/blob/main/torch/distributed/elastic/multiprocessing/api.py#L243
+class InternalLogSpecs(BaseModel):
+    log_dir: Optional[str] = Field(default="torch_logs")
+    redirects: str = Field(default="0")  # Std.NONE
+    tee: str = Field(default="0")  # Std.NONE
+    local_ranks_filter: Optional[set[int]] = Field(default=None)
+    model_config = ConfigDict(extra="ignore")
+    @field_serializer("redirects")
+    def convert_redirects(self, redirects: str) -> Std | dict[int, Std]:
+        return Std.from_str(redirects)
+    @field_serializer("tee")
+    def convert_tee(self, tee: str) -> Std | dict[int, Std]:
+        return Std.from_str(tee)

extensions/nodes/torch_config.py CHANGED Viewed

@@ -10,59 +10,39 @@ class StartMethod(str, Enum):
     forkserver = "forkserver"
-# min_nodes: int
-# max_nodes: int
-# nproc_per_node: int
-# logs_specs: Optional[LogsSpecs] = None
-# run_id: str = ""
-# role: str = "default_role"
-# rdzv_endpoint: str = ""
-# rdzv_backend: str = "etcd"
-# rdzv_configs: dict[str, Any] = field(default_factory=dict)
-# rdzv_timeout: int = -1
-# max_restarts: int = 3
-# monitor_interval: float = 0.1
-# start_method: str = "spawn"
-# log_line_prefix_template: Optional[str] = None
-# metrics_cfg: dict[str, str] = field(default_factory=dict)
-# local_addr: Optional[str] = None
 ## The idea is the following:
 # Users can configure any of the options present in TorchConfig class.
-# The LaunchConfig class will be created from torch config.
+# The LaunchConfig class will be created from TorchConfig.
 # The LogSpecs is sent as a parameter to the launch config.
-# None as much as possible to get
 ## NO idea of standalone and how to send it
-class InternalLogSpecs(BaseModel):
-    log_dir: Optional[str] = Field(default="torch_logs")
-    redirects: int | None = Field(default=None)
-    tee: int | None = Field(default=None)
-    local_ranks_filter: Optional[set[int]] = Field(default=None)
-    model_config = ConfigDict(extra="ignore")
+# The user sees this as part of the config of the node.
+# It is kept as similar as possible to torchrun
 class TorchConfig(BaseModel):
     model_config = ConfigDict(extra="forbid")
-    nnodes: str = Field(default="1:1", exclude=True)
-    nproc_per_node: int = Field(default=1)
+    # excluded as LaunchConfig requires min and max nodes
+    nnodes: str = Field(default="1:1", exclude=True, description="min:max")
+    nproc_per_node: int = Field(default=1, description="Number of processes per node")
     # will be used to create the log specs
+    # But they are excluded from dump as logs specs is a class for LaunchConfig
+    # from_str("0") -> Std.NONE
+    # from_str("1") -> Std.OUT
+    # from_str("0:3,1:0,2:1,3:2") -> {0: Std.ALL, 1: Std.NONE, 2: Std.OUT, 3: Std.ERR}
     log_dir: Optional[str] = Field(default="torch_logs", exclude=True)
-    redirects: int | None = Field(default=None, exclude=True)
-    tee: int | None = Field(default=None, exclude=True)
+    redirects: str = Field(default="0", exclude=True)  # Std.NONE
+    tee: str = Field(default="0", exclude=True)  # Std.NONE
     local_ranks_filter: Optional[set[int]] = Field(default=None, exclude=True)
     role: str | None = Field(default=None)
     # run_id would be the run_id of the context
     # and sent at the creation of the LaunchConfig
+    # This section is about the communication between nodes/processes
     rdzv_backend: str | None = Field(default="static")
     rdzv_endpoint: str | None = Field(default="")
     rdzv_configs: dict[str, Any] = Field(default_factory=dict)

extensions/tasks/torch.py ADDED Viewed

@@ -0,0 +1,235 @@
+import importlib
+import logging
+import os
+import random
+import string
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Optional
+from pydantic import BaseModel, ConfigDict, Field, field_serializer, model_validator
+from ruamel.yaml import YAML
+import runnable.context as context
+from extensions.tasks.torch_config import EasyTorchConfig, TorchConfig
+from runnable import Catalog, defaults
+from runnable.datastore import StepAttempt
+from runnable.tasks import BaseTaskType
+from runnable.utils import get_module_and_attr_names
+try:
+    from torch.distributed.elastic.multiprocessing.api import DefaultLogsSpecs, Std
+    from torch.distributed.launcher.api import LaunchConfig, elastic_launch
+except ImportError:
+    raise ImportError("torch is not installed. Please install torch first.")
+logger = logging.getLogger(defaults.LOGGER_NAME)
+class TorchTaskType(BaseTaskType, TorchConfig):
+    task_type: str = Field(default="torch", serialization_alias="command_type")
+    catalog: Optional[Catalog] = Field(default=None, alias="catalog")
+    command: str
+    @model_validator(mode="before")
+    @classmethod
+    def check_secrets_and_returns(cls, data: Any) -> Any:
+        if isinstance(data, dict):
+            if "secrets" in data and data["secrets"]:
+                raise ValueError("'secrets' is not supported for torch")
+            if "returns" in data and data["returns"]:
+                raise ValueError("'secrets' is not supported for torch")
+        return data
+    def get_summary(self) -> dict[str, Any]:
+        return self.model_dump(by_alias=True, exclude_none=True)
+    @property
+    def _context(self):
+        return context.run_context
+    def _get_launch_config(self) -> LaunchConfig:
+        internal_log_spec = InternalLogSpecs(**self.model_dump(exclude_none=True))
+        log_spec: DefaultLogsSpecs = DefaultLogsSpecs(
+            **internal_log_spec.model_dump(exclude_none=True)
+        )
+        easy_torch_config = EasyTorchConfig(
+            **self.model_dump(
+                exclude_none=True,
+            )
+        )
+        launch_config = LaunchConfig(
+            **easy_torch_config.model_dump(
+                exclude_none=True,
+            ),
+            logs_specs=log_spec,
+            run_id=self._context.run_id,
+        )
+        logger.info(f"launch_config: {launch_config}")
+        return launch_config
+    def execute_command(
+        self,
+        map_variable: defaults.TypeMapVariable = None,
+    ):
+        assert map_variable is None, "map_variable is not supported for torch"
+        launch_config = self._get_launch_config()
+        logger.info(f"launch_config: {launch_config}")
+        # ENV variables are shared with the subprocess, use that as communication
+        os.environ["RUNNABLE_TORCH_COMMAND"] = self.command
+        os.environ["RUNNABLE_TORCH_PARAMETERS_FILES"] = (
+            self._context.parameters_file or ""
+        )
+        os.environ["RUNNABLE_TORCH_RUN_ID"] = self._context.run_id
+        launcher = elastic_launch(
+            launch_config,
+            training_subprocess,
+        )
+        try:
+            launcher()
+            attempt_log = StepAttempt(
+                status=defaults.SUCCESS,
+                start_time=str(datetime.now()),
+                end_time=str(datetime.now()),
+                attempt_number=1,
+            )
+        except Exception as e:
+            attempt_log = StepAttempt(
+                status=defaults.FAIL,
+                start_time=str(datetime.now()),
+                end_time=str(datetime.now()),
+                attempt_number=1,
+            )
+            logger.error(f"Error executing TorchNode: {e}")
+        finally:
+            # This can only come from the subprocess
+            if Path("proc_logs").exists():
+                # Move .catalog and torch_logs to the parent node's catalog location
+                self._context.catalog_handler.put(
+                    "proc_logs/**/*", allow_file_not_found_exc=True
+                )
+            # TODO: This is not working!!
+            if self.log_dir:
+                self._context.catalog_handler.put(
+                    self.log_dir + "/**/*", allow_file_not_found_exc=True
+                )
+        delete_env_vars_with_prefix("RUNNABLE_TORCH")
+        logger.info(f"attempt_log: {attempt_log}")
+        return attempt_log
+# This internal model makes it easier to extract the required fields
+# of log specs from user specification.
+# https://github.com/pytorch/pytorch/blob/main/torch/distributed/elastic/multiprocessing/api.py#L243
+class InternalLogSpecs(BaseModel):
+    log_dir: Optional[str] = Field(default="torch_logs")
+    redirects: str = Field(default="0")  # Std.NONE
+    tee: str = Field(default="0")  # Std.NONE
+    local_ranks_filter: Optional[set[int]] = Field(default=None)
+    model_config = ConfigDict(extra="ignore")
+    @field_serializer("redirects")
+    def convert_redirects(self, redirects: str) -> Std | dict[int, Std]:
+        return Std.from_str(redirects)
+    @field_serializer("tee")
+    def convert_tee(self, tee: str) -> Std | dict[int, Std]:
+        return Std.from_str(tee)
+def delete_env_vars_with_prefix(prefix):
+    to_delete = []  # List to keep track of variables to delete
+    # Iterate over a list of all environment variable keys
+    for var in os.environ:
+        if var.startswith(prefix):
+            to_delete.append(var)
+    # Delete each of the variables collected
+    for var in to_delete:
+        del os.environ[var]
+def training_subprocess():
+    """
+    This function is called by the torch.distributed.launcher.api.elastic_launch
+    It happens in a subprocess and is responsible for executing the user's function
+    It is unrelated to the actual node execution, so any cataloging, run_log_store should be
+    handled to match to main process.
+    We have these variables to use:
+    os.environ["RUNNABLE_TORCH_COMMAND"] = self.executable.command
+    os.environ["RUNNABLE_TORCH_PARAMETERS_FILES"] = (
+        self._context.parameters_file or ""
+    )
+    os.environ["RUNNABLE_TORCH_RUN_ID"] = self._context.run_id
+    os.environ["RUNNABLE_TORCH_COPY_CONTENTS_TO"] = (
+        self._context.catalog_handler.compute_data_folder
+    )
+    os.environ["RUNNABLE_TORCH_TORCH_LOGS"] = self.log_dir or ""
+    """
+    from runnable import PythonJob  # noqa: F401
+    command = os.environ.get("RUNNABLE_TORCH_COMMAND")
+    assert command, "Command is not provided"
+    run_id = os.environ.get("RUNNABLE_TORCH_RUN_ID", "")
+    parameters_files = os.environ.get("RUNNABLE_TORCH_PARAMETERS_FILES", "")
+    process_run_id = (
+        run_id
+        + "-"
+        + os.environ.get("RANK", "")
+        + "-"
+        + "".join(random.choices(string.ascii_lowercase, k=3))
+    )
+    os.environ["TORCH_DISTRIBUTED_DEBUG"] = "DETAIL"
+    # In this subprocess there shoould not be any RUNNABLE environment variables
+    delete_env_vars_with_prefix("RUNNABLE_")
+    module_name, func_name = get_module_and_attr_names(command)
+    module = importlib.import_module(module_name)
+    callable_obj = getattr(module, func_name)
+    # The job runs with the default configuration
+    # ALl the execution logs are stored in .catalog
+    job = PythonJob(function=callable_obj)
+    config_content = {
+        "catalog": {"type": "file-system", "config": {"catalog_location": "proc_logs"}}
+    }
+    temp_config_file = Path("runnable-config.yaml")
+    with open(str(temp_config_file), "w", encoding="utf-8") as config_file:
+        yaml = YAML(typ="safe", pure=True)
+        yaml.dump(config_content, config_file)
+    job.execute(
+        parameters_file=parameters_files,
+        job_id=process_run_id,
+    )
+    # delete the temp config file
+    temp_config_file.unlink()
+    from runnable.context import run_context
+    job_log = run_context.run_log_store.get_run_log_by_id(run_id=run_context.run_id)
+    if job_log.status == defaults.FAIL:
+        raise Exception(f"Job {process_run_id} failed")

extensions/tasks/torch_config.py ADDED Viewed

@@ -0,0 +1,76 @@
+from enum import Enum
+from typing import Any, Optional
+from pydantic import BaseModel, ConfigDict, Field, computed_field
+class StartMethod(str, Enum):
+    spawn = "spawn"
+    fork = "fork"
+    forkserver = "forkserver"
+## The idea is the following:
+# Users can configure any of the options present in TorchConfig class.
+# The LaunchConfig class will be created from TorchConfig.
+# The LogSpecs is sent as a parameter to the launch config.
+## NO idea of standalone and how to send it
+# The user sees this as part of the config of the node.
+# It is kept as similar as possible to torchrun
+class TorchConfig(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    # excluded as LaunchConfig requires min and max nodes
+    nnodes: str = Field(default="1:1", exclude=True, description="min:max")
+    nproc_per_node: int = Field(default=1, description="Number of processes per node")
+    # will be used to create the log specs
+    # But they are excluded from dump as logs specs is a class for LaunchConfig
+    # from_str("0") -> Std.NONE
+    # from_str("1") -> Std.OUT
+    # from_str("0:3,1:0,2:1,3:2") -> {0: Std.ALL, 1: Std.NONE, 2: Std.OUT, 3: Std.ERR}
+    log_dir: Optional[str] = Field(default="torch_logs", exclude=True)
+    redirects: str = Field(default="0", exclude=True)  # Std.NONE
+    tee: str = Field(default="0", exclude=True)  # Std.NONE
+    local_ranks_filter: Optional[set[int]] = Field(default=None, exclude=True)
+    role: str | None = Field(default=None)
+    # run_id would be the run_id of the context
+    # and sent at the creation of the LaunchConfig
+    # This section is about the communication between nodes/processes
+    rdzv_backend: str | None = Field(default="static")
+    rdzv_endpoint: str | None = Field(default="")
+    rdzv_configs: dict[str, Any] = Field(default_factory=dict)
+    rdzv_timeout: int | None = Field(default=None)
+    max_restarts: int | None = Field(default=None)
+    monitor_interval: float | None = Field(default=None)
+    start_method: str | None = Field(default=StartMethod.spawn)
+    log_line_prefix_template: str | None = Field(default=None)
+    local_addr: Optional[str] = None
+    # https://github.com/pytorch/pytorch/blob/main/torch/distributed/run.py#L753
+    # master_addr: str | None = Field(default="localhost")
+    # master_port: str | None = Field(default="29500")
+    # training_script: str = Field(default="dummy_training_script")
+    # training_script_args: str = Field(default="")
+class EasyTorchConfig(TorchConfig):
+    model_config = ConfigDict(extra="ignore")
+    # TODO: Validate min < max
+    @computed_field  # type: ignore
+    @property
+    def min_nodes(self) -> int:
+        return int(self.nnodes.split(":")[0])
+    @computed_field  # type: ignore
+    @property
+    def max_nodes(self) -> int:
+        return int(self.nnodes.split(":")[1])

runnable/__init__.py CHANGED Viewed

@@ -31,7 +31,8 @@ from runnable.sdk import (  # noqa
     ShellTask,
     Stub,
     Success,
-    Torch,
+    TorchJob,
+    TorchTask,
     metric,
     pickled,
 )

runnable/entrypoints.py CHANGED Viewed

@@ -129,6 +129,7 @@ def prepare_configurations(
                 ServiceConfig,
                 runnable_defaults.get("job-executor", defaults.DEFAULT_JOB_EXECUTOR),
             )
         assert job_executor_config, "Job executor is not provided"
         configured_executor = utils.get_provider_by_name_and_type(
             "job_executor", job_executor_config

runnable/sdk.py CHANGED Viewed

@@ -44,10 +44,10 @@ from runnable.tasks import TaskReturns
 logger = logging.getLogger(defaults.LOGGER_NAME)
 StepType = Union[
-    "Stub", "PythonTask", "NotebookTask", "ShellTask", "Parallel", "Map", "Torch"
+    "Stub", "PythonTask", "NotebookTask", "ShellTask", "Parallel", "Map", "TorchTask"
 ]
 if TYPE_CHECKING:
-    from extensions.nodes.torch import TorchNode
+    pass
 def pickled(name: str) -> TaskReturns:
@@ -191,6 +191,34 @@ class BaseTask(BaseTraversal):
         )
+class TorchTask(BaseTask, TorchConfig):
+    function: Callable = Field(exclude=True)
+    @field_validator("returns", mode="before")
+    @classmethod
+    def serialize_returns(
+        cls, returns: List[Union[str, TaskReturns]]
+    ) -> List[TaskReturns]:
+        assert len(returns) == 0, "Torch tasks cannot return any variables"
+        return []
+    @computed_field
+    def command_type(self) -> str:
+        return "torch"
+    @computed_field
+    def command(self) -> str:
+        module = self.function.__module__
+        name = self.function.__name__
+        return f"{module}.{name}"
+    def create_job(self) -> RunnableTask:
+        self.terminate_with_success = True
+        node = self.create_node()
+        return node.executable
 class PythonTask(BaseTask):
     """
     An execution node of the pipeline of python functions.
@@ -459,43 +487,6 @@ class Stub(BaseTraversal):
         return StubNode.parse_from_config(self.model_dump(exclude_none=True))
-class Torch(BaseTraversal, TorchConfig):
-    function: Callable = Field(exclude=True)
-    catalog: Optional[Catalog] = Field(default=None, alias="catalog")
-    overrides: Dict[str, Any] = Field(default_factory=dict, alias="overrides")
-    returns: List[Union[str, TaskReturns]] = Field(
-        default_factory=list, alias="returns"
-    )
-    secrets: List[str] = Field(default_factory=list)
-    @computed_field
-    def command_type(self) -> str:
-        return "python"
-    @computed_field
-    def command(self) -> str:
-        module = self.function.__module__
-        name = self.function.__name__
-        return f"{module}.{name}"
-    def create_node(self) -> TorchNode:
-        if not self.next_node:
-            if not (self.terminate_with_failure or self.terminate_with_success):
-                raise AssertionError(
-                    "A node not being terminated must have a user defined next node"
-                )
-        if self.on_failure:
-            self.on_failure = self.on_failure.steps[0].name  # type: ignore
-        from extensions.nodes.torch import TorchNode
-        return TorchNode.parse_from_config(
-            self.model_dump(exclude_none=True, by_alias=True)
-        )
 class Parallel(BaseTraversal):
     """
     A node that executes multiple branches in parallel.
@@ -685,6 +676,7 @@ class Pipeline(BaseModel):
         terminal_step: StepType = self.steps[-1]
         if not terminal_step.terminate_with_failure:
             terminal_step.terminate_with_success = True
+            terminal_step.next_node = "success"
         # assert that there is only one termination node with success or failure
         # Assert that there are no duplicate step names
@@ -965,7 +957,7 @@ class BaseJob(BaseModel):
 class PythonJob(BaseJob):
-    function: Callable = Field(exclude=True)
+    function: Callable = Field()
     @property
     @computed_field
@@ -975,14 +967,27 @@ class PythonJob(BaseJob):
         return f"{module}.{name}"
+    # TODO: can this be simplified to just self.model_dump(exclude_none=True)?
     def get_task(self) -> RunnableTask:
         # Piggy bank on existing tasks as a hack
         task = PythonTask(
             name="dummy",
             terminate_with_success=True,
-            returns=self.returns,
-            secrets=self.secrets,
-            function=self.function,
+            **self.model_dump(exclude_defaults=True, exclude_none=True),
+        )
+        return task.create_node().executable
+class TorchJob(BaseJob, TorchConfig):
+    function: Callable = Field()
+    # min and max should always be 1
+    def get_task(self) -> RunnableTask:
+        # Piggy bank on existing tasks as a hack
+        task = TorchTask(
+            name="dummy",
+            terminate_with_success=True,
+            **self.model_dump(exclude_defaults=True, exclude_none=True),
         )
         return task.create_node().executable
@@ -998,10 +1003,7 @@ class NotebookJob(BaseJob):
         task = NotebookTask(
             name="dummy",
             terminate_with_success=True,
-            returns=self.returns,
-            secrets=self.secrets,
-            notebook=self.notebook,
-            optional_ploomber_args=self.optional_ploomber_args,
+            **self.model_dump(exclude_defaults=True, exclude_none=True),
         )
         return task.create_node().executable
@@ -1014,8 +1016,6 @@ class ShellJob(BaseJob):
         task = ShellTask(
             name="dummy",
             terminate_with_success=True,
-            returns=self.returns,
-            secrets=self.secrets,
-            command=self.command,
+            **self.model_dump(exclude_defaults=True, exclude_none=True),
         )
         return task.create_node().executable

runnable/tasks.py CHANGED Viewed

@@ -28,7 +28,6 @@ from runnable.datastore import (
 from runnable.defaults import TypeMapVariable
 logger = logging.getLogger(defaults.LOGGER_NAME)
-logging.getLogger("stevedore").setLevel(logging.CRITICAL)
 class TeeIO(io.StringIO):
@@ -49,8 +48,7 @@ class TeeIO(io.StringIO):
         self.output_stream.flush()
-buffer = TeeIO()
-sys.stdout = buffer
+sys.stdout = TeeIO()
 class TaskReturns(BaseModel):
@@ -761,6 +759,8 @@ def create_task(kwargs_for_init) -> BaseTaskType:
         tasks.BaseTaskType: The command object
     """
     # The dictionary cannot be modified
+    print(kwargs_for_init)
     kwargs = kwargs_for_init.copy()
     command_type = kwargs.pop("command_type", defaults.COMMAND_TYPE)

{runnable-0.31.0.dist-info → runnable-0.32.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: runnable
-Version: 0.31.0
+Version: 0.32.0
 Summary: Add your description here
 Author-email: "Vammi, Vijay" <vijay.vammi@astrazeneca.com>
 License-File: LICENSE
@@ -28,6 +28,7 @@ Provides-Extra: s3
 Requires-Dist: cloudpathlib[s3]; extra == 's3'
 Provides-Extra: torch
 Requires-Dist: torch>=2.6.0; extra == 'torch'
+Requires-Dist: torchvision>=0.21.0; extra == 'torch'
 Description-Content-Type: text/markdown

{runnable-0.31.0.dist-info → runnable-0.32.0.dist-info}/RECORD RENAMED Viewed

@@ -16,8 +16,8 @@ extensions/job_executor/pyproject.toml,sha256=UIEgiCYHTXcRWSByNMFuKJFKgxTBpQqTqy
 extensions/nodes/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 extensions/nodes/nodes.py,sha256=s9ub1dqy4qHjRQG6YElCdL7rCOTYNs9RUIrStZ6tEB4,28256
 extensions/nodes/pyproject.toml,sha256=YTu-ETN3JNFSkMzzWeOwn4m-O2nbRH-PmiPBALDCUw4,278
-extensions/nodes/torch.py,sha256=RUelXV7Pa4U5F7Ww3cfRG0Oaz9SkYF3b_CmpFHlpbyI,6885
-extensions/nodes/torch_config.py,sha256=jfUtkwCYolyKVcFxiMjjwm63yv-HjTKvSQR8JLA7sZg,3151
+extensions/nodes/torch.py,sha256=h3x5931ePBNckeSXM3JFjSoUnxmIWvDyEpn1AI9TKaU,9347
+extensions/nodes/torch_config.py,sha256=tO3sG2_fj8a6FmPZZllwKVx3WaRr4QmQYcACseg8YXM,2839
 extensions/pipeline_executor/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 extensions/pipeline_executor/__init__.py,sha256=wfigTL2T9OHrmE8b2Ydmb8h6hr-oF--Yc2FectC7WaY,24623
 extensions/pipeline_executor/argo.py,sha256=AEGSWVZulBL6EsvbVCaeBeTl2m_t5ymc6RFpMKhivis,37946
@@ -40,13 +40,15 @@ extensions/run_log_store/db/integration_FF.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeR
 extensions/secrets/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 extensions/secrets/dotenv.py,sha256=nADHXI6KJ_LUYOIe5EbtYH-21OBebSNVr0Pjb1GlZ7w,1573
 extensions/secrets/pyproject.toml,sha256=mLJNImNcBlbLKHh-0ugVWT9V83R4RibyyYDtBCSqVF4,282
-runnable/__init__.py,sha256=swvqdCjeddn40o4zjsluyahdVcU0r1arSRrxmRsvFEQ,673
+extensions/tasks/torch.py,sha256=R0J_Q6SRAW2Ii0XQbXaaBWTah8TYs4P_48j2M1bIXeA,7983
+extensions/tasks/torch_config.py,sha256=tO3sG2_fj8a6FmPZZllwKVx3WaRr4QmQYcACseg8YXM,2839
+runnable/__init__.py,sha256=3ZKuvGEkY_zHVQlJtarXd4jkjICxjgnw-bbKN_5SiJI,691
 runnable/catalog.py,sha256=4msQxLhLKlsDDrHFnGauPYe-Or-q9g8_RYCn_4dpxaU,4466
 runnable/cli.py,sha256=3BiKSj95h2Drn__YlchMPZ5rBMafuRb2OGIsVpbsO5Y,8788
 runnable/context.py,sha256=by5uepmuCP0dmM9BmsliXihSes5QEFejwAsmekcqylE,1388
 runnable/datastore.py,sha256=ZobM1aVkgeUJ2fZYt63IFDsoNzObwc93hdByegS5YKQ,32396
 runnable/defaults.py,sha256=3o9IVGryyCE6PoQTOoaIaHHTbJGEzmdXMcwzOhwAYoI,3518
-runnable/entrypoints.py,sha256=cDbhtmLUWdBh9K6hNusfQpSd5NadcX8V1K2JEDf_YAg,18984
+runnable/entrypoints.py,sha256=1xCbWVUQLGmg5gkWnAVWFLAUf6j4avP9azX_vuGQUMY,18985
 runnable/exceptions.py,sha256=LFbp0-Qxg2PAMLEVt7w2whhBxSG-5pzUEv5qN-Rc4_c,3003
 runnable/executor.py,sha256=UOsYJ3NkTGw4FTR0iePX7AOJzY7vODhZ62aqrwVMO1c,15143
 runnable/graph.py,sha256=poQz5zcvq89ju_u5sYlunQLPbHnXTaUmjcvstPwvT4U,16536
@@ -54,12 +56,12 @@ runnable/names.py,sha256=vn92Kv9ANROYSZX6Z4z1v_WA3WiEdIYmG6KEStBFZug,8134
 runnable/nodes.py,sha256=d1eLttMAcV7CTwTEqOuNwZqItANoLUkXJ73Xp-srlyI,17811
 runnable/parameters.py,sha256=sT3DNGczivP9z7r4Cp_brbudg1z4J-zjmvrq3ppIrVs,5089
 runnable/pickler.py,sha256=ydJ_eti_U1F4l-YacFp7BWm6g5vTn04UXye25S1HVok,2684
-runnable/sdk.py,sha256=NZVQGaL4Zm2hwloRmqEgp8UPbBg9hY1abQGYnOgniPI,35128
+runnable/sdk.py,sha256=J1PyiHQD2v_0JaqHjY7xSaXwCUMi_mCNr70TsC-SFZU,35012
 runnable/secrets.py,sha256=4L_dBFxTgr8r_hHUD6RlZEtqaOHDRsFG5PXO5wlvMI0,2324
-runnable/tasks.py,sha256=Qb1IhVxHv68E7vf3M3YCf7MGRHyjmsEEYBpEpiZ4mRI,29062
+runnable/tasks.py,sha256=_A0pcTyOGQL-72AicOxracsrwfs2Vg0r4mQyxz3k6Iw,29016
 runnable/utils.py,sha256=hBr7oGwGL2VgfITlQCTz-a1iwvvf7Mfl-HY8UdENZac,19929
-runnable-0.31.0.dist-info/METADATA,sha256=9c3Ixkq-Kl0_hiQfDX-KwtSAdSWzMRLJMfEze2oVQhE,10115
-runnable-0.31.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-runnable-0.31.0.dist-info/entry_points.txt,sha256=PrjKrlfXPZaV_7hz8orGu4FDnatLqnhPOXljyllszdw,1880
-runnable-0.31.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-runnable-0.31.0.dist-info/RECORD,,
+runnable-0.32.0.dist-info/METADATA,sha256=t44gRxxaRugnqaRY9gGwweGT0OLvo_inlC3jxrhP3sg,10168
+runnable-0.32.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+runnable-0.32.0.dist-info/entry_points.txt,sha256=uWHbbOSj0jlG54tFHw377xKkfVbjWvb_1Y9L_LgjJ0Q,1925
+runnable-0.32.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+runnable-0.32.0.dist-info/RECORD,,

{runnable-0.31.0.dist-info → runnable-0.32.0.dist-info}/entry_points.txt RENAMED Viewed

@@ -49,3 +49,4 @@ env-secrets = runnable.secrets:EnvSecretsManager
 notebook = runnable.tasks:NotebookTaskType
 python = runnable.tasks:PythonTaskType
 shell = runnable.tasks:ShellTaskType
+torch = extensions.tasks.torch:TorchTaskType

{runnable-0.31.0.dist-info → runnable-0.32.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{runnable-0.31.0.dist-info → runnable-0.32.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

runnable 0.31.0__py3-none-any.whl → 0.32.0__py3-none-any.whl

runnable 0.31.0py3-none-any.whl → 0.32.0py3-none-any.whl