PyPI - runnable - Versions diffs - 0.30.4__py3-none-any.whl → 0.31.0__py3-none-any.whl - Mend

runnable 0.30.4py3-none-any.whl → 0.31.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

extensions/nodes/torch.py CHANGED Viewed

@@ -8,7 +8,7 @@ from typing import Any, Callable
 from pydantic import ConfigDict, Field
-from extensions.nodes.torch_config import TorchConfig
+from extensions.nodes.torch_config import EasyTorchConfig, InternalLogSpecs, TorchConfig
 from runnable import PythonJob, datastore, defaults
 from runnable.datastore import StepLog
 from runnable.nodes import DistributedNode
@@ -18,8 +18,9 @@ from runnable.utils import TypeMapVariable
 logger = logging.getLogger(defaults.LOGGER_NAME)
 try:
+    from torch.distributed.elastic.multiprocessing.api import DefaultLogsSpecs
     from torch.distributed.launcher.api import LaunchConfig, elastic_launch
-    from torch.distributed.run import config_from_args
 except ImportError:
     raise ImportError("torch is not installed. Please install torch first.")
@@ -120,9 +121,25 @@ class TorchNode(DistributedNode, TorchConfig):
         return cls(executable=executable, **node_config, **task_config)
     def get_launch_config(self) -> LaunchConfig:
-        config, _, _ = config_from_args(self)
-        config.run_id = self._context.run_id
-        return config
+        internal_log_spec = InternalLogSpecs(**self.model_dump(exclude_none=True))
+        log_spec: DefaultLogsSpecs = DefaultLogsSpecs(
+            **internal_log_spec.model_dump(exclude_none=True)
+        )
+        easy_torch_config = EasyTorchConfig(
+            **self.model_dump(
+                exclude_none=True,
+            )
+        )
+        laugch_config = LaunchConfig(
+            **easy_torch_config.model_dump(
+                exclude_none=True,
+            ),
+            logs_specs=log_spec,
+            run_id=self._context.run_id,
+        )
+        print(laugch_config)
+        return laugch_config
     def execute(
         self,

extensions/nodes/torch_config.py CHANGED Viewed

@@ -1,33 +1,96 @@
-from pydantic import BaseModel, Field
+from enum import Enum
+from typing import Any, Optional
+from pydantic import BaseModel, ConfigDict, Field, computed_field
+class StartMethod(str, Enum):
+    spawn = "spawn"
+    fork = "fork"
+    forkserver = "forkserver"
+# min_nodes: int
+# max_nodes: int
+# nproc_per_node: int
+# logs_specs: Optional[LogsSpecs] = None
+# run_id: str = ""
+# role: str = "default_role"
+# rdzv_endpoint: str = ""
+# rdzv_backend: str = "etcd"
+# rdzv_configs: dict[str, Any] = field(default_factory=dict)
+# rdzv_timeout: int = -1
+# max_restarts: int = 3
+# monitor_interval: float = 0.1
+# start_method: str = "spawn"
+# log_line_prefix_template: Optional[str] = None
+# metrics_cfg: dict[str, str] = field(default_factory=dict)
+# local_addr: Optional[str] = None
+## The idea is the following:
+# Users can configure any of the options present in TorchConfig class.
+# The LaunchConfig class will be created from torch config.
+# The LogSpecs is sent as a parameter to the launch config.
+# None as much as possible to get
+## NO idea of standalone and how to send it
+class InternalLogSpecs(BaseModel):
+    log_dir: Optional[str] = Field(default="torch_logs")
+    redirects: int | None = Field(default=None)
+    tee: int | None = Field(default=None)
+    local_ranks_filter: Optional[set[int]] = Field(default=None)
+    model_config = ConfigDict(extra="ignore")
 class TorchConfig(BaseModel):
-    nnodes: str = Field(default="1:1")
-    nproc_per_node: int = Field(default=4)
-    rdzv_backend: str = Field(default="static")
-    rdzv_endpoint: str = Field(default="")
-    rdzv_id: str | None = Field(default=None)
-    rdzv_conf: str = Field(default="")
-    max_restarts: int = Field(default=3)
-    monitor_interval: float = Field(default=0.1)
-    start_method: str = Field(default="spawn")
-    role: str = Field(default="default_role")
-    log_dir: str = Field(default="torch_logs")
-    redirects: str = Field(default="1")
-    tee: str = Field(default="1")
-    master_addr: str = Field(default="localhost")
-    master_port: str = Field(default="29500")
-    training_script: str = Field(default="dummy_training_script")
-    training_script_args: str = Field(default="")
-    # Optional fields
-    local_ranks_filter: str = Field(default="")
-    node_rank: int = Field(default=0)
-    local_addr: str | None = Field(default=None)
-    logs_specs: str | None = Field(default=None)
-    standalone: bool = Field(default=False)
-    module: bool = Field(default=False)
-    no_python: bool = Field(default=False)
-    run_path: bool = Field(default=False)
+    model_config = ConfigDict(extra="forbid")
+    nnodes: str = Field(default="1:1", exclude=True)
+    nproc_per_node: int = Field(default=1)
+    # will be used to create the log specs
+    log_dir: Optional[str] = Field(default="torch_logs", exclude=True)
+    redirects: int | None = Field(default=None, exclude=True)
+    tee: int | None = Field(default=None, exclude=True)
+    local_ranks_filter: Optional[set[int]] = Field(default=None, exclude=True)
+    role: str | None = Field(default=None)
+    # run_id would be the run_id of the context
+    # and sent at the creation of the LaunchConfig
+    rdzv_backend: str | None = Field(default="static")
+    rdzv_endpoint: str | None = Field(default="")
+    rdzv_configs: dict[str, Any] = Field(default_factory=dict)
+    rdzv_timeout: int | None = Field(default=None)
+    max_restarts: int | None = Field(default=None)
+    monitor_interval: float | None = Field(default=None)
+    start_method: str | None = Field(default=StartMethod.spawn)
+    log_line_prefix_template: str | None = Field(default=None)
+    local_addr: Optional[str] = None
+    # https://github.com/pytorch/pytorch/blob/main/torch/distributed/run.py#L753
+    # master_addr: str | None = Field(default="localhost")
+    # master_port: str | None = Field(default="29500")
+    # training_script: str = Field(default="dummy_training_script")
+    # training_script_args: str = Field(default="")
+class EasyTorchConfig(TorchConfig):
+    model_config = ConfigDict(extra="ignore")
+    # TODO: Validate min < max
+    @computed_field  # type: ignore
+    @property
+    def min_nodes(self) -> int:
+        return int(self.nnodes.split(":")[0])
+    @computed_field  # type: ignore
+    @property
+    def max_nodes(self) -> int:
+        return int(self.nnodes.split(":")[1])

{runnable-0.30.4.dist-info → runnable-0.31.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: runnable
-Version: 0.30.4
+Version: 0.31.0
 Summary: Add your description here
 Author-email: "Vammi, Vijay" <vijay.vammi@astrazeneca.com>
 License-File: LICENSE

{runnable-0.30.4.dist-info → runnable-0.31.0.dist-info}/RECORD RENAMED Viewed

@@ -16,8 +16,8 @@ extensions/job_executor/pyproject.toml,sha256=UIEgiCYHTXcRWSByNMFuKJFKgxTBpQqTqy
 extensions/nodes/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 extensions/nodes/nodes.py,sha256=s9ub1dqy4qHjRQG6YElCdL7rCOTYNs9RUIrStZ6tEB4,28256
 extensions/nodes/pyproject.toml,sha256=YTu-ETN3JNFSkMzzWeOwn4m-O2nbRH-PmiPBALDCUw4,278
-extensions/nodes/torch.py,sha256=id0_HVkRcqL9_DPOI-b53vaDwRgVfGB-zZS3yrRej9g,6318
-extensions/nodes/torch_config.py,sha256=yDvDADpnLhQsNtfH8qIztLHQ2LhYiOJEWljxpH9GZzs,1222
+extensions/nodes/torch.py,sha256=RUelXV7Pa4U5F7Ww3cfRG0Oaz9SkYF3b_CmpFHlpbyI,6885
+extensions/nodes/torch_config.py,sha256=jfUtkwCYolyKVcFxiMjjwm63yv-HjTKvSQR8JLA7sZg,3151
 extensions/pipeline_executor/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 extensions/pipeline_executor/__init__.py,sha256=wfigTL2T9OHrmE8b2Ydmb8h6hr-oF--Yc2FectC7WaY,24623
 extensions/pipeline_executor/argo.py,sha256=AEGSWVZulBL6EsvbVCaeBeTl2m_t5ymc6RFpMKhivis,37946
@@ -58,8 +58,8 @@ runnable/sdk.py,sha256=NZVQGaL4Zm2hwloRmqEgp8UPbBg9hY1abQGYnOgniPI,35128
 runnable/secrets.py,sha256=4L_dBFxTgr8r_hHUD6RlZEtqaOHDRsFG5PXO5wlvMI0,2324
 runnable/tasks.py,sha256=Qb1IhVxHv68E7vf3M3YCf7MGRHyjmsEEYBpEpiZ4mRI,29062
 runnable/utils.py,sha256=hBr7oGwGL2VgfITlQCTz-a1iwvvf7Mfl-HY8UdENZac,19929
-runnable-0.30.4.dist-info/METADATA,sha256=S-5zecrqE4tU5MW4Fe1-2F-Q_hLU7fAXZ2oo9xVRRUw,10115
-runnable-0.30.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-runnable-0.30.4.dist-info/entry_points.txt,sha256=PrjKrlfXPZaV_7hz8orGu4FDnatLqnhPOXljyllszdw,1880
-runnable-0.30.4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-runnable-0.30.4.dist-info/RECORD,,
+runnable-0.31.0.dist-info/METADATA,sha256=9c3Ixkq-Kl0_hiQfDX-KwtSAdSWzMRLJMfEze2oVQhE,10115
+runnable-0.31.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+runnable-0.31.0.dist-info/entry_points.txt,sha256=PrjKrlfXPZaV_7hz8orGu4FDnatLqnhPOXljyllszdw,1880
+runnable-0.31.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+runnable-0.31.0.dist-info/RECORD,,

{runnable-0.30.4.dist-info → runnable-0.31.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{runnable-0.30.4.dist-info → runnable-0.31.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{runnable-0.30.4.dist-info → runnable-0.31.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

runnable 0.30.4__py3-none-any.whl → 0.31.0__py3-none-any.whl

runnable 0.30.4py3-none-any.whl → 0.31.0py3-none-any.whl