PyPI - runnable - Versions diffs - 0.32.0__py3-none-any.whl → 0.32.2__py3-none-any.whl - Mend

runnable 0.32.0py3-none-any.whl → 0.32.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

extensions/job_executor/k8s.py +1 -1
extensions/nodes/torch.py +5 -4
extensions/tasks/torch.py +35 -7
extensions/tasks/torch_config.py +1 -1
runnable/executor.py +8 -0
runnable/parameters.py +5 -2
runnable/sdk.py +4 -4
runnable/tasks.py +0 -1
{runnable-0.32.0.dist-info → runnable-0.32.2.dist-info}/METADATA +1 -1
{runnable-0.32.0.dist-info → runnable-0.32.2.dist-info}/RECORD +13 -13
{runnable-0.32.0.dist-info → runnable-0.32.2.dist-info}/WHEEL +0 -0
{runnable-0.32.0.dist-info → runnable-0.32.2.dist-info}/entry_points.txt +0 -0
{runnable-0.32.0.dist-info → runnable-0.32.2.dist-info}/licenses/LICENSE +0 -0

extensions/job_executor/k8s.py CHANGED Viewed

@@ -329,7 +329,7 @@ class GenericK8sJobExecutor(GenericJobExecutor):
         logger.info(f"Submitting job: {job.__dict__}")
         if self.mock:
-            print(job.__dict__)
+            logger.info(job.__dict__)
             return
         try:

extensions/nodes/torch.py CHANGED Viewed

@@ -5,7 +5,7 @@ import random
 import string
 from datetime import datetime
 from pathlib import Path
-from typing import Any, Callable, Optional
+from typing import TYPE_CHECKING, Any, Callable, Optional
 from pydantic import BaseModel, ConfigDict, Field, field_serializer
@@ -21,11 +21,12 @@ logger = logging.getLogger(defaults.LOGGER_NAME)
 try:
     from torch.distributed.elastic.multiprocessing.api import DefaultLogsSpecs, Std
     from torch.distributed.launcher.api import LaunchConfig, elastic_launch
 except ImportError:
-    raise ImportError("torch is not installed. Please install torch first.")
+    logger.exception("Torch is not installed. Please install torch first.")
-print("torch is installed")
+if TYPE_CHECKING:
+    from torch.distributed.elastic.multiprocessing.api import DefaultLogsSpecs, Std
+    from torch.distributed.launcher.api import LaunchConfig, elastic_launch
 def training_subprocess():

extensions/tasks/torch.py CHANGED Viewed

@@ -5,7 +5,7 @@ import random
 import string
 from datetime import datetime
 from pathlib import Path
-from typing import Any, Optional
+from typing import TYPE_CHECKING, Any, Optional
 from pydantic import BaseModel, ConfigDict, Field, field_serializer, model_validator
 from ruamel.yaml import YAML
@@ -17,15 +17,23 @@ from runnable.datastore import StepAttempt
 from runnable.tasks import BaseTaskType
 from runnable.utils import get_module_and_attr_names
+logger = logging.getLogger(defaults.LOGGER_NAME)
 try:
     from torch.distributed.elastic.multiprocessing.api import DefaultLogsSpecs, Std
     from torch.distributed.launcher.api import LaunchConfig, elastic_launch
 except ImportError:
-    raise ImportError("torch is not installed. Please install torch first.")
+    logger.exception("torch is not installed")
+if TYPE_CHECKING:
+    from torch.distributed.elastic.multiprocessing.api import DefaultLogsSpecs, Std
+    from torch.distributed.launcher.api import LaunchConfig, elastic_launch
-logger = logging.getLogger(defaults.LOGGER_NAME)
+def get_min_max_nodes(nnodes: str) -> tuple[int, int]:
+    min_nodes, max_nodes = (int(x) for x in nnodes.split(":"))
+    return min_nodes, max_nodes
 class TorchTaskType(BaseTaskType, TorchConfig):
@@ -60,7 +68,7 @@ class TorchTaskType(BaseTaskType, TorchConfig):
                 exclude_none=True,
             )
         )
+        print("###", easy_torch_config)
         launch_config = LaunchConfig(
             **easy_torch_config.model_dump(
                 exclude_none=True,
@@ -77,7 +85,30 @@ class TorchTaskType(BaseTaskType, TorchConfig):
     ):
         assert map_variable is None, "map_variable is not supported for torch"
+        # The below should happen only if we are in the node that we want to execute
+        # For a single node, multi worker setup, this should be the entry point
+        # For a multi-node, we need to:
+        # - create a service config
+        # - Create a stateful set with number of nodes
+        # - Create a job to run the torch.distributed.launcher.api.elastic_launch on every node
+        # - the entry point to runnnable could be a way to trigger execution instead of scaling
+        is_execute = os.environ.get("RUNNABLE_TORCH_EXECUTE", "true") == "true"
+        _, max_nodes = get_min_max_nodes(self.nnodes)
+        if max_nodes > 1 and not is_execute:
+            executor = self._context.executor
+            executor.scale_up(self)
+            return StepAttempt(
+                status=defaults.SUCCESS,
+                start_time=str(datetime.now()),
+                end_time=str(datetime.now()),
+                attempt_number=1,
+                message="Triggered a scale up",
+            )
         launch_config = self._get_launch_config()
+        print("###****", launch_config)
         logger.info(f"launch_config: {launch_config}")
         # ENV variables are shared with the subprocess, use that as communication
@@ -175,9 +206,6 @@ def training_subprocess():
         self._context.parameters_file or ""
     )
     os.environ["RUNNABLE_TORCH_RUN_ID"] = self._context.run_id
-    os.environ["RUNNABLE_TORCH_COPY_CONTENTS_TO"] = (
-        self._context.catalog_handler.compute_data_folder
-    )
     os.environ["RUNNABLE_TORCH_TORCH_LOGS"] = self.log_dir or ""
     """

extensions/tasks/torch_config.py CHANGED Viewed

@@ -43,7 +43,7 @@ class TorchConfig(BaseModel):
     # and sent at the creation of the LaunchConfig
     # This section is about the communication between nodes/processes
-    rdzv_backend: str | None = Field(default="static")
+    rdzv_backend: str | None = Field(default="")
     rdzv_endpoint: str | None = Field(default="")
     rdzv_configs: dict[str, Any] = Field(default_factory=dict)
     rdzv_timeout: int | None = Field(default=None)

runnable/executor.py CHANGED Viewed

@@ -153,6 +153,14 @@ class BaseJobExecutor(BaseExecutor):
         """
         ...
+    # @abstractmethod
+    # def scale_up(self, job: BaseTaskType):
+    #     """
+    #     Scale up the job to run on max_nodes
+    #     This has to also call the entry point
+    #     """
+    #     ...
 # TODO: Consolidate execute_node, trigger_node_execution, _execute_node
 class BasePipelineExecutor(BaseExecutor):

runnable/parameters.py CHANGED Viewed

@@ -9,7 +9,7 @@ from pydantic import BaseModel, ConfigDict
 from typing_extensions import Callable
 from runnable import defaults
-from runnable.datastore import JsonParameter
+from runnable.datastore import JsonParameter, ObjectParameter
 from runnable.defaults import TypeMapVariable
 from runnable.utils import remove_prefix
@@ -101,10 +101,13 @@ def filter_arguments_for_func(
             # default value is given in the function signature, nothing further to do.
             continue
+        param_value = params[name]
         if type(value.annotation) in [
             BaseModel,
             pydantic._internal._model_construction.ModelMetaclass,
-        ]:
+        ] and not isinstance(param_value, ObjectParameter):
+            # Even if the annotation is a pydantic model, it can be passed as an object parameter
             # We try to cast it as a pydantic model if asked
             named_param = params[name].get_value()

runnable/sdk.py CHANGED Viewed

@@ -5,7 +5,7 @@ import os
 import re
 from abc import ABC, abstractmethod
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union
+from typing import Any, Callable, Dict, List, Optional, Union
 from pydantic import (
     BaseModel,
@@ -34,7 +34,7 @@ from extensions.nodes.nodes import (
     SuccessNode,
     TaskNode,
 )
-from extensions.nodes.torch_config import TorchConfig
+from extensions.tasks.torch_config import TorchConfig
 from runnable import console, defaults, entrypoints, exceptions, graph, utils
 from runnable.executor import BaseJobExecutor, BasePipelineExecutor
 from runnable.nodes import TraversalNode
@@ -46,8 +46,6 @@ logger = logging.getLogger(defaults.LOGGER_NAME)
 StepType = Union[
     "Stub", "PythonTask", "NotebookTask", "ShellTask", "Parallel", "Map", "TorchTask"
 ]
-if TYPE_CHECKING:
-    pass
 def pickled(name: str) -> TaskReturns:
@@ -192,6 +190,8 @@ class BaseTask(BaseTraversal):
 class TorchTask(BaseTask, TorchConfig):
+    # The user will not know the rnnz variables for multi node
+    # They should be overridden in the environment
     function: Callable = Field(exclude=True)
     @field_validator("returns", mode="before")

runnable/tasks.py CHANGED Viewed

@@ -760,7 +760,6 @@ def create_task(kwargs_for_init) -> BaseTaskType:
     """
     # The dictionary cannot be modified
-    print(kwargs_for_init)
     kwargs = kwargs_for_init.copy()
     command_type = kwargs.pop("command_type", defaults.COMMAND_TYPE)

{runnable-0.32.0.dist-info → runnable-0.32.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: runnable
-Version: 0.32.0
+Version: 0.32.2
 Summary: Add your description here
 Author-email: "Vammi, Vijay" <vijay.vammi@astrazeneca.com>
 License-File: LICENSE

{runnable-0.32.0.dist-info → runnable-0.32.2.dist-info}/RECORD RENAMED Viewed

@@ -8,7 +8,7 @@ extensions/catalog/pyproject.toml,sha256=lLNxY6v04c8I5QK_zKw_E6sJTArSJRA_V-79kta
 extensions/catalog/s3.py,sha256=Sw5t8_kVRprn3uGGJCiHn7M9zw1CLaCOFj6YErtfG0o,287
 extensions/job_executor/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 extensions/job_executor/__init__.py,sha256=VeLuYCcShCIYT0TNtAXfUF9tOk4ZHoLzdTEvbsz0spM,5870
-extensions/job_executor/k8s.py,sha256=0V7BL7ERmonVMgCsO-J57cxH__v8KomwukMwepH3qgs,16388
+extensions/job_executor/k8s.py,sha256=Jl0s3YryISx-SJIhDhyNskzlUlhy4ynBHEc9DfAXjAY,16394
 extensions/job_executor/k8s_job_spec.yaml,sha256=7aFpxHdO_p6Hkc3YxusUOuAQTD1Myu0yTPX9DrhxbOg,1158
 extensions/job_executor/local.py,sha256=3ZbCFXBvbLlMp10JTmQJJrjBKG2keHI6SH8hEvmHDkA,2230
 extensions/job_executor/local_container.py,sha256=1JcLJ0zrNSNHdubrSO9miN54iwvPLHqKMZ08aOC8WWo,6886
@@ -16,7 +16,7 @@ extensions/job_executor/pyproject.toml,sha256=UIEgiCYHTXcRWSByNMFuKJFKgxTBpQqTqy
 extensions/nodes/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 extensions/nodes/nodes.py,sha256=s9ub1dqy4qHjRQG6YElCdL7rCOTYNs9RUIrStZ6tEB4,28256
 extensions/nodes/pyproject.toml,sha256=YTu-ETN3JNFSkMzzWeOwn4m-O2nbRH-PmiPBALDCUw4,278
-extensions/nodes/torch.py,sha256=h3x5931ePBNckeSXM3JFjSoUnxmIWvDyEpn1AI9TKaU,9347
+extensions/nodes/torch.py,sha256=gydcRX5C7jEdPnxLsAQkpRD_by_0Lp4dFg96xDkRVW0,9510
 extensions/nodes/torch_config.py,sha256=tO3sG2_fj8a6FmPZZllwKVx3WaRr4QmQYcACseg8YXM,2839
 extensions/pipeline_executor/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 extensions/pipeline_executor/__init__.py,sha256=wfigTL2T9OHrmE8b2Ydmb8h6hr-oF--Yc2FectC7WaY,24623
@@ -40,8 +40,8 @@ extensions/run_log_store/db/integration_FF.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeR
 extensions/secrets/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 extensions/secrets/dotenv.py,sha256=nADHXI6KJ_LUYOIe5EbtYH-21OBebSNVr0Pjb1GlZ7w,1573
 extensions/secrets/pyproject.toml,sha256=mLJNImNcBlbLKHh-0ugVWT9V83R4RibyyYDtBCSqVF4,282
-extensions/tasks/torch.py,sha256=R0J_Q6SRAW2Ii0XQbXaaBWTah8TYs4P_48j2M1bIXeA,7983
-extensions/tasks/torch_config.py,sha256=tO3sG2_fj8a6FmPZZllwKVx3WaRr4QmQYcACseg8YXM,2839
+extensions/tasks/torch.py,sha256=At2eMpJas4sUUjzJfPrEBGamG-k3MsxXU6Bou0h9BEs,9274
+extensions/tasks/torch_config.py,sha256=UjfMitT-TXASRDGR30I2vDRnyk7JQnR-5CsOVidjpSY,2833
 runnable/__init__.py,sha256=3ZKuvGEkY_zHVQlJtarXd4jkjICxjgnw-bbKN_5SiJI,691
 runnable/catalog.py,sha256=4msQxLhLKlsDDrHFnGauPYe-Or-q9g8_RYCn_4dpxaU,4466
 runnable/cli.py,sha256=3BiKSj95h2Drn__YlchMPZ5rBMafuRb2OGIsVpbsO5Y,8788
@@ -50,18 +50,18 @@ runnable/datastore.py,sha256=ZobM1aVkgeUJ2fZYt63IFDsoNzObwc93hdByegS5YKQ,32396
 runnable/defaults.py,sha256=3o9IVGryyCE6PoQTOoaIaHHTbJGEzmdXMcwzOhwAYoI,3518
 runnable/entrypoints.py,sha256=1xCbWVUQLGmg5gkWnAVWFLAUf6j4avP9azX_vuGQUMY,18985
 runnable/exceptions.py,sha256=LFbp0-Qxg2PAMLEVt7w2whhBxSG-5pzUEv5qN-Rc4_c,3003
-runnable/executor.py,sha256=UOsYJ3NkTGw4FTR0iePX7AOJzY7vODhZ62aqrwVMO1c,15143
+runnable/executor.py,sha256=Jr9yJtSH7CzjXJLWx3VWIUAQblstuGqzpFtajv7d39M,15348
 runnable/graph.py,sha256=poQz5zcvq89ju_u5sYlunQLPbHnXTaUmjcvstPwvT4U,16536
 runnable/names.py,sha256=vn92Kv9ANROYSZX6Z4z1v_WA3WiEdIYmG6KEStBFZug,8134
 runnable/nodes.py,sha256=d1eLttMAcV7CTwTEqOuNwZqItANoLUkXJ73Xp-srlyI,17811
-runnable/parameters.py,sha256=sT3DNGczivP9z7r4Cp_brbudg1z4J-zjmvrq3ppIrVs,5089
+runnable/parameters.py,sha256=u77CdqqDAbVdzNeBFPNUfGnWPy9-SpBVmwEJ56xmDm8,5289
 runnable/pickler.py,sha256=ydJ_eti_U1F4l-YacFp7BWm6g5vTn04UXye25S1HVok,2684
-runnable/sdk.py,sha256=J1PyiHQD2v_0JaqHjY7xSaXwCUMi_mCNr70TsC-SFZU,35012
+runnable/sdk.py,sha256=hwsEGCCFSijm0DZwDJGHmV8jdMuSU_3Pf-vYoomWYHw,35084
 runnable/secrets.py,sha256=4L_dBFxTgr8r_hHUD6RlZEtqaOHDRsFG5PXO5wlvMI0,2324
-runnable/tasks.py,sha256=_A0pcTyOGQL-72AicOxracsrwfs2Vg0r4mQyxz3k6Iw,29016
+runnable/tasks.py,sha256=ABRhgiTY8F62pNlqJmVTDjwJwuzp8DqciUEOq1fpt1U,28989
 runnable/utils.py,sha256=hBr7oGwGL2VgfITlQCTz-a1iwvvf7Mfl-HY8UdENZac,19929
-runnable-0.32.0.dist-info/METADATA,sha256=t44gRxxaRugnqaRY9gGwweGT0OLvo_inlC3jxrhP3sg,10168
-runnable-0.32.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-runnable-0.32.0.dist-info/entry_points.txt,sha256=uWHbbOSj0jlG54tFHw377xKkfVbjWvb_1Y9L_LgjJ0Q,1925
-runnable-0.32.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-runnable-0.32.0.dist-info/RECORD,,
+runnable-0.32.2.dist-info/METADATA,sha256=fcKKBj2v2AhRQFZ7ALqSdJrKF5r0Wg-QV6HVKqkBpRY,10168
+runnable-0.32.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+runnable-0.32.2.dist-info/entry_points.txt,sha256=uWHbbOSj0jlG54tFHw377xKkfVbjWvb_1Y9L_LgjJ0Q,1925
+runnable-0.32.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+runnable-0.32.2.dist-info/RECORD,,

{runnable-0.32.0.dist-info → runnable-0.32.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{runnable-0.32.0.dist-info → runnable-0.32.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{runnable-0.32.0.dist-info → runnable-0.32.2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

runnable 0.32.0__py3-none-any.whl → 0.32.2__py3-none-any.whl

runnable 0.32.0py3-none-any.whl → 0.32.2py3-none-any.whl