PyPI - lightning-sdk - Versions diffs - 2025.7.17__py3-none-any.whl → 2025.7.30rc0__py3-none-any.whl - Mend

lightning-sdk 2025.7.17py3-none-any.whl → 2025.7.30rc0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (99) hide show

lightning_sdk/llm/llm.py CHANGED Viewed

@@ -2,6 +2,7 @@ import json
 import os
 from typing import Any, AsyncGenerator, ClassVar, Dict, Generator, List, Optional, Tuple, Union
+from lightning_sdk.api import TeamspaceApi, UserApi
 from lightning_sdk.api.llm_api import LLMApi
 from lightning_sdk.lightning_cloud.openapi.models.v1_conversation_response_chunk import V1ConversationResponseChunk
@@ -13,7 +14,7 @@ PUBLIC_MODEL_PROVIDERS: Dict[str, str] = {
 }
-def _load_public_assistants() -> Dict[str, str]:
+def _load_public_assistants() -> Dict[str, Dict[str, Any]]:
     """Load public assistants from a JSON file."""
     try:
         json_path = os.path.join(os.path.dirname(__file__), "public_assistants.json")
@@ -28,7 +29,7 @@ class LLM:
     _auth_info_cached: ClassVar[bool] = False
     _cached_auth_info: ClassVar[Dict[str, Optional[str]]] = {}
     _llm_api_cache: ClassVar[Dict[Optional[str], LLMApi]] = {}
-    _public_assistants: ClassVar[Optional[Dict[str, str]]] = None
+    _public_assistants: ClassVar[Optional[Dict[str, Dict[str, Any]]]] = None
     def __new__(cls, name: str, teamspace: Optional[str] = None, enable_async: Optional[bool] = False) -> "LLM":
         return super().__new__(cls)
@@ -55,8 +56,18 @@ class LLM:
         Raises:
             ValueError: If teamspace information cannot be resolved.
         """
-        # TODO support user input teamspace
-        self._get_auth_info()
+        teamspace_name = None
+        if teamspace:
+            try:
+                owner, teamspace_name = teamspace.split("/", maxsplit=1)
+            except ValueError as e:
+                raise ValueError(
+                    f"Invalid teamspace format: '{teamspace}'. "
+                    "Teamspace should be specified as '{teamspace_owner}/{teamspace_name}' "
+                    "(e.g., 'my-org/my-teamspace')."
+                ) from e
+        self._get_auth_info(teamspace_name)
         self._model_provider, self._model_name = self._parse_model_name(name)
         self._enable_async = enable_async
@@ -66,6 +77,7 @@ class LLM:
             LLM._llm_api_cache[teamspace] = LLMApi()
         self._llm_api = LLM._llm_api_cache[teamspace]
+        self._context_length = None
         self._model_id = self._get_model_id()
         self._conversations = {}
@@ -77,14 +89,45 @@ class LLM:
     def provider(self) -> str:
         return self._model_provider
-    def _get_auth_info(self) -> None:
+    def context_length(self, model: Optional[str] = None) -> Optional[int]:
+        if model is None:
+            return self._context_length
+        context_info = self._public_assistants.get(model)
+        if context_info is None or "context_length" not in context_info:
+            raise ValueError(f"Cannot access context length of model '{model}'.")
+        return int(context_info["context_length"])
+    def _get_auth_info(self, teamspace_name: Optional[str] = None) -> None:
+        # TODO: Validate user input teamspace name
         if not LLM._auth_info_cached:
-            teamspace_name = os.environ.get("LIGHTNING_TEAMSPACE", None)
             if teamspace_name is None:
-                raise ValueError(
-                    "Teamspace name must be provided either through "
-                    "the environment variable LIGHTNING_TEAMSPACE or as an argument."
-                )
+                # studio users
+                teamspace_name = os.environ.get("LIGHTNING_TEAMSPACE", None)
+            if teamspace_name is None:
+                # local users with no given teamspace
+                try:
+                    teamspace_api = TeamspaceApi()
+                    user_api = UserApi()
+                    authed_user = user_api._client.auth_service_get_user()
+                    default_teamspace = teamspace_api.list_teamspaces(owner_id=authed_user.id)[0]
+                    teamspace_name = default_teamspace.name
+                    teamspace_id = default_teamspace.id
+                    os.environ["LIGHTNING_CLOUD_PROJECT_ID"] = teamspace_id
+                    os.environ["LIGHTNING_TEAMSPACE"] = teamspace_name
+                except Exception as err:
+                    # throw an appropriate error that guides users to login through the platform
+                    raise ValueError(
+                        "Teamspace information is missing. "
+                        "If this is your first time using LitAI, please log in at https://lightning.ai/sign-up "
+                        "and re-run your script, or set the environment variable LIGHTNING_TEAMSPACE=<your-teamspace>."
+                    ) from err
+            # TODO: when teamspace_name is given, we don't know the teamspace_id yet
+            # TODO: if LIGHTNING_CLOUD_PROJECT_ID does not exist, we have to get the id from the teamspace name
             LLM._cached_auth_info = {
                 "teamspace_name": teamspace_name,
                 "teamspace_id": os.environ.get("LIGHTNING_CLOUD_PROJECT_ID", None),
@@ -125,7 +168,10 @@ class LLM:
                 and LLM._public_assistants
                 and f"{self._model_provider}/{self._model_name}" in LLM._public_assistants
             ):
-                return LLM._public_assistants[f"{self._model_provider}/{self._model_name}"]
+                self._context_length = int(
+                    LLM._public_assistants[f"{self._model_provider}/{self._model_name}"]["context_length"]
+                )
+                return LLM._public_assistants[f"{self._model_provider}/{self._model_name}"]["id"]
             try:
                 return self._llm_api.get_assistant(
                     model_provider=PUBLIC_MODEL_PROVIDERS[self._model_provider],
@@ -139,6 +185,18 @@ class LLM:
                     "Please check the model name or provider."
                 ) from e
+        if self._model_provider == "lightning-ai":
+            # Try model provider model
+            try:
+                return self._llm_api.get_assistant(
+                    model_provider=self._model_provider,
+                    model_name=self._model_name,
+                    user_name="",
+                    org_name="",
+                )
+            except Exception:
+                pass
         # Try organization model
         try:
             return self._llm_api.get_assistant(

lightning_sdk/llm/public_assistants.json CHANGED Viewed

@@ -1,10 +1,34 @@
 {
-    "openai/gpt-4o": "ast_01jdjds71fs8gt47jexzed4czs",
-    "openai/gpt-4": "ast_01jd38ze6tjbrcd4942nhz41zn",
-    "openai/o3-mini": "ast_01jz3t13fhnjhh11t1k8b5gyp1",
-    "anthropic/claude-3-5-sonnet-20240620": "ast_01jd3923a6p98rqwh3dpj686pq",
-    "google/gemini-2.5-pro": "ast_01jz3tdb1fhey798k95pv61v57",
-    "google/gemini-2.5-flash": "ast_01jz3thxskg4fcdk4xhkjkym5a",
-    "google/gemini-2.5-flash-lite-preview-06-17": "ast_01jz3thxskg4fcdk4xhkjkym5b",
-    "lightning-ai/llama4-maverick": "ast_01k073vsqs66tenpns02cf5jnq"
+    "openai/gpt-4o": {
+        "id": "ast_01jdjds71fs8gt47jexzed4czs",
+        "context_length": 128000
+    },
+    "openai/gpt-4": {
+        "id": "ast_01jd38ze6tjbrcd4942nhz41zn",
+        "context_length": 8192
+    },
+    "openai/o3-mini": {
+        "id": "ast_01jz3t13fhnjhh11t1k8b5gyp1",
+        "context_length": 128000
+    },
+    "anthropic/claude-3-5-sonnet-20240620": {
+        "id": "ast_01jd3923a6p98rqwh3dpj686pq",
+        "context_length": 200000
+    },
+    "google/gemini-2.5-pro": {
+        "id": "ast_01jz3tdb1fhey798k95pv61v57",
+        "context_length": 1048576
+    },
+    "google/gemini-2.5-flash": {
+        "id": "ast_01jz3thxskg4fcdk4xhkjkym5a",
+        "context_length": 8000
+    },
+    "google/gemini-2.5-flash-lite-preview-06-17": {
+        "id": "ast_01jz3thxskg4fcdk4xhkjkym5b",
+        "context_length": 8000
+    },
+    "lightning-ai/llama4-maverick": {
+        "id": "ast_01k0wgg56tm8mv9n12aq2mnxas",
+        "context_length": 100000
+    }
 }

lightning_sdk/machine.py CHANGED Viewed

@@ -1,60 +1,113 @@
 from dataclasses import dataclass
+from enum import Enum
 from typing import Any, ClassVar, Optional, Tuple
+class CloudProvider(Enum):
+    AWS = "AWS"
+    GCP = "GCP"
+    VULTR = "VULTR"
+    LAMBDA_LABS = "LAMBDA_LABS"
+    DGX = "DGX"
+    VOLTAGE_PARK = "VOLTAGE_PARK"
+    NEBIUS = "NEBIUS"
+    LIGHTNING = "LIGHTNING"
+    def __str__(self) -> str:
+        """Converts the CloudProvider to a str."""
+        return self.value
 @dataclass(frozen=True)
 class Machine:
-    # Default Machines
-    CPU: ClassVar["Machine"]
+    # supported CPU variations
+    CPU_X_2: ClassVar["Machine"]
+    CPU_X_4: ClassVar["Machine"]
+    CPU_X_8: ClassVar["Machine"]
+    CPU_X_16: ClassVar["Machine"]
+    # default CPU machines
     CPU_SMALL: ClassVar["Machine"]
+    CPU: ClassVar["Machine"]
+    # supported data-prep variations (big disk)
     DATA_PREP: ClassVar["Machine"]
     DATA_PREP_MAX: ClassVar["Machine"]
     DATA_PREP_ULTRA: ClassVar["Machine"]
+    # supported GPU types
+    # supported T4 variations
     T4: ClassVar["Machine"]
+    T4_X_2: ClassVar["Machine"]
     T4_X_4: ClassVar["Machine"]
+    T4_X_8: ClassVar["Machine"]
+    # supported L4 variations
     L4: ClassVar["Machine"]
     L4_X_2: ClassVar["Machine"]
     L4_X_4: ClassVar["Machine"]
     L4_X_8: ClassVar["Machine"]
-    A10G: ClassVar["Machine"]
-    A10G_X_4: ClassVar["Machine"]
-    A10G_X_8: ClassVar["Machine"]
+    # supported L40S variations
     L40S: ClassVar["Machine"]
+    L40S_X_2: ClassVar["Machine"]
     L40S_X_4: ClassVar["Machine"]
     L40S_X_8: ClassVar["Machine"]
+    # supported A100 variations
+    # defaults, can be either A100 type depending on cloud provider availability
+    A100: ClassVar["Machine"]
     A100_X_2: ClassVar["Machine"]
     A100_X_4: ClassVar["Machine"]
     A100_X_8: ClassVar["Machine"]
-    B200_X_8: ClassVar["Machine"]
+    # A100 40GB versions
+    A100_40GB: ClassVar["Machine"]
+    A100_40GB_X_2: ClassVar["Machine"]
+    A100_40GB_X_4: ClassVar["Machine"]
+    A100_40GB_X_8: ClassVar["Machine"]
+    # A100 80GB versions
+    A100_80GB: ClassVar["Machine"]
+    A100_80GB_X_2: ClassVar["Machine"]
+    A100_80GB_X_4: ClassVar["Machine"]
+    A100_80GB_X_8: ClassVar["Machine"]
+    H100: ClassVar["Machine"]
+    H100_X_2: ClassVar["Machine"]
+    H100_X_4: ClassVar["Machine"]
     H100_X_8: ClassVar["Machine"]
+    H200: ClassVar["Machine"]
     H200_X_8: ClassVar["Machine"]
+    B200_X_8: ClassVar["Machine"]
+    # Specialized Machines
     name: str
-    instance_type: str
+    slug: str
+    instance_type: Optional[str] = None
+    family: Optional[str] = None
+    accelerator_count: Optional[int] = None
     cost: Optional[float] = None
     interruptible_cost: Optional[float] = None
     wait_time: Optional[float] = None
     interruptible_wait_time: Optional[float] = None
+    _include_in_cli: bool = True
     def __str__(self) -> str:
         """String representation of the Machine."""
-        return str(self.name) if self.name else str(self.instance_type)
+        return str(self.name) if self.name else (self.slug if self.slug else str(self.instance_type))
     def __eq__(self, other: object) -> bool:
         """Machines are equal if the instance type is equal."""
         if isinstance(other, Machine):
-            return self.instance_type == other.instance_type
+            return (
+                # equality based on raw instance type (provider specific)
+                (self.instance_type and self.instance_type == other.instance_type)
+                # equality based on slug (provider agnostic)
+                or self.slug == other.slug
+                # equality based on machine specs (e.g. A100_80GB_X_8 == A100_X_8)
+                or (self.family == other.family and self.accelerator_count == other.accelerator_count)
+            )
         return False
     def is_cpu(self) -> bool:
         """Whether the machine is a CPU."""
-        return (
-            self == Machine.CPU
-            or self == Machine.CPU_SMALL
-            or self == Machine.DATA_PREP
-            or self == Machine.DATA_PREP_MAX
-            or self == Machine.DATA_PREP_ULTRA
-        )
+        return self.family in ("CPU", "DATA_PREP")
     @classmethod
     def from_str(cls, machine: str, *additional_machine_ids: Any) -> "Machine":
@@ -63,34 +116,89 @@ class Machine:
         )
         for m in possible_values:
             for machine_id in [machine, *additional_machine_ids]:
-                if machine_id in (getattr(m, "name", None), getattr(m, "instance_type", None)):
+                if machine_id in (
+                    getattr(m, "name", None),
+                    getattr(m, "instance_type", None),
+                    getattr(m, "slug", None),
+                ):
                     return m
         if additional_machine_ids:
             return cls(machine, *additional_machine_ids)
-        return cls(machine, machine)
-Machine.CPU = Machine(name="CPU", instance_type="cpu-4")
-Machine.CPU_SMALL = Machine(name="CPU_SMALL", instance_type="n2d-standard-2")  # GCP
-Machine.DATA_PREP = Machine(name="DATA_PREP", instance_type="data-large")
-Machine.DATA_PREP_MAX = Machine(name="DATA_PREP_MAX", instance_type="data-max")
-Machine.DATA_PREP_ULTRA = Machine(name="DATA_PREP_ULTRA", instance_type="data-ultra")
-Machine.T4 = Machine(name="T4", instance_type="g4dn.2xlarge")
-Machine.T4_X_4 = Machine(name="T4_X_4", instance_type="g4dn.12xlarge")
-Machine.L4 = Machine(name="L4", instance_type="g6.4xlarge")
-Machine.L4_X_2 = Machine(name="L4_X_2", instance_type="g2-standard-24")  # GCP
-Machine.L4_X_4 = Machine(name="L4_X_4", instance_type="g6.12xlarge")
-Machine.L4_X_8 = Machine(name="L4_X_8", instance_type="g6.48xlarge")
-Machine.A10G = Machine(name="A10G", instance_type="g5.8xlarge")
-Machine.A10G_X_4 = Machine(name="A10G_X_4", instance_type="g5.12xlarge")
-Machine.A10G_X_8 = Machine(name="A10G_X_8", instance_type="g5.48xlarge")
-Machine.L40S = Machine(name="L40S", instance_type="g6e.4xlarge")
-Machine.L40S_X_4 = Machine(name="L40S_X_4", instance_type="g6e.12xlarge")
-Machine.L40S_X_8 = Machine(name="L40S_X_8", instance_type="g6e.48xlarge")
-Machine.A100_X_2 = Machine(name="A100_X_2", instance_type="a2-ultragpu-2g")  # GCP
-Machine.A100_X_4 = Machine(name="A100_X_4", instance_type="a2-ultragpu-4g")  # GCP
-Machine.A100_X_8 = Machine(name="A100_X_8", instance_type="p4d.24xlarge")
-Machine.B200_X_8 = Machine(name="B200_X_8", instance_type="a4-highgpu-8g")  # GCP
-Machine.H100_X_8 = Machine(name="H100_X_8", instance_type="p5.48xlarge")
-Machine.H200_X_8 = Machine(name="H200_X_8", instance_type="p5en.48xlarge")
+        return cls(machine, machine, machine)
+# CPU machines
+# default CPU machines
+Machine.CPU_SMALL = Machine(name="CPU_SMALL", slug="cpu-2", family="CPU", accelerator_count=2)
+Machine.CPU = Machine(name="CPU", slug="cpu-4", family="CPU", accelerator_count=4)
+# available CPU variations
+Machine.CPU_X_2 = Machine(name="CPU_X_2", slug="cpu-2", family="CPU", accelerator_count=2)
+Machine.CPU_X_4 = Machine(name="CPU_X_4", slug="cpu-4", family="CPU", accelerator_count=4)
+Machine.CPU_X_8 = Machine(name="CPU_X_8", slug="cpu-8", family="CPU", accelerator_count=8)
+Machine.CPU_X_16 = Machine(name="CPU_X_16", slug="cpu-16", family="CPU", accelerator_count=16)
+# available data-prep (big disk) machines
+Machine.DATA_PREP = Machine(name="DATA_PREP", slug="data-prep-mid", family="DATA_PREP", accelerator_count=32)
+Machine.DATA_PREP_MAX = Machine(
+    name="DATA_PREP_MAX", slug="data-prep-max-large", family="DATA_PREP", accelerator_count=64
+)
+Machine.DATA_PREP_ULTRA = Machine(
+    name="DATA_PREP_ULTRA", slug="data-prep-ultra-extra-large", family="DATA_PREP", accelerator_count=96
+)
+# GPU machines
+# available T4 machines
+Machine.T4 = Machine(name="T4", slug="lit-t4-1", family="T4", accelerator_count=1)
+Machine.T4_X_2 = Machine(name="T4_X_2", slug="lit-t4-2", family="T4", accelerator_count=2)
+Machine.T4_X_4 = Machine(name="T4_X_4", slug="lit-t4-4", family="T4", accelerator_count=4)
+Machine.T4_X_8 = Machine(name="T4_X_8", slug="lit-t4-8", family="T4", accelerator_count=8)
+# available L4 machines
+Machine.L4 = Machine(name="L4", slug="lit-l4-1", family="L4", accelerator_count=1)
+Machine.L4_X_2 = Machine(name="L4_X_2", slug="lit-l4-2", family="L4", accelerator_count=2)
+Machine.L4_X_4 = Machine(name="L4_X_4", slug="lit-l4-4", family="L4", accelerator_count=4)
+Machine.L4_X_8 = Machine(name="L4_X_8", slug="lit-l4-8", family="L4", accelerator_count=8)
+# available L40S machines
+Machine.L40S = Machine(name="L40S", slug="lit-l40s-1", family="L40S", accelerator_count=1)
+Machine.L40S_X_2 = Machine(name="L40S_X_2", slug="lit-l40s-2", family="L40S", accelerator_count=2)
+Machine.L40S_X_4 = Machine(name="L40S_X_4", slug="lit-l40s-4", family="L40S", accelerator_count=4)
+Machine.L40S_X_8 = Machine(name="L40S_X_8", slug="lit-l40s-8", family="L40S", accelerator_count=8)
+# available A100 Machines
+Machine.A100 = Machine(name="A100", slug="lit-a100-1", family="A100", accelerator_count=1)
+Machine.A100_X_2 = Machine(name="A100_X_2", slug="lit-a100-2", family="A100", accelerator_count=2)
+Machine.A100_X_4 = Machine(name="A100_X_4", slug="lit-a100-4", family="A100", accelerator_count=4)
+Machine.A100_X_8 = Machine(name="A100_X_8", slug="lit-a100-8", family="A100", accelerator_count=8)
+# don't include variants in cli, only default types that can match for all variants
+Machine.A100_40GB = Machine(
+    name="A100_40GB", slug="lit-a100-40gb-1", family="A100", accelerator_count=1, _include_in_cli=False
+)
+Machine.A100_40GB_X_2 = Machine(
+    name="A100_40GB_X_2", slug="lit-a100-40gb-2", family="A100", accelerator_count=2, _include_in_cli=False
+)
+Machine.A100_40GB_X_4 = Machine(
+    name="A100_40GB_X_4", slug="lit-a100-40gb-4", family="A100", accelerator_count=4, _include_in_cli=False
+)
+Machine.A100_40GB_X_8 = Machine(
+    name="A100_40GB_X_8", slug="lit-a100-40gb-8", family="A100", accelerator_count=8, _include_in_cli=False
+)
+Machine.A100_80GB = Machine(
+    name="A100_80GB", slug="lit-a100-80gb-1", family="A100", accelerator_count=1, _include_in_cli=False
+)
+Machine.A100_80GB_X_2 = Machine(
+    name="A100_80GB_X_2", slug="lit-a100-80gb-2", family="A100", accelerator_count=2, _include_in_cli=False
+)
+Machine.A100_80GB_X_4 = Machine(
+    name="A100_80GB_X_4", slug="lit-a100-80gb-4", family="A100", accelerator_count=4, _include_in_cli=False
+)
+Machine.A100_80GB_X_8 = Machine(
+    name="A100_80GB_X_8", slug="lit-a100-80gb-8", family="A100", accelerator_count=8, _include_in_cli=False
+)
+# available H100 machines
+Machine.H100 = Machine(name="H100", slug="lit-h100-1", family="H100", accelerator_count=1)
+Machine.H100_X_2 = Machine(name="H100_X_2", slug="lit-h100-2", family="H100", accelerator_count=2)
+Machine.H100_X_4 = Machine(name="H100_X_4", slug="lit-h100-4", family="H100", accelerator_count=4)
+Machine.H100_X_8 = Machine(name="H100_X_8", slug="lit-h100-8", family="H100", accelerator_count=8)
+# available H200 machines
+Machine.H200 = Machine(name="H200", slug="lit-h200x-1", family="H200", accelerator_count=1)
+Machine.H200_X_8 = Machine(name="H200_X_8", slug="lit-h200x-8", family="H200", accelerator_count=8)
+# available B200 machines
+Machine.B200_X_8 = Machine(name="B200_X_8", slug="lit-b200x-8", family="B200", accelerator_count=8)

lightning_sdk/mmt/base.py CHANGED Viewed

@@ -4,7 +4,7 @@ from typing import TYPE_CHECKING, Dict, List, Optional, Protocol, Tuple, Union
 if TYPE_CHECKING:
     from lightning_sdk.job.base import MachineDict
-    from lightning_sdk.machine import Machine
+    from lightning_sdk.machine import CloudProvider, Machine
     from lightning_sdk.organization import Organization
     from lightning_sdk.status import Status
     from lightning_sdk.studio import Studio
@@ -64,12 +64,14 @@ class _BaseMMT(_BaseJob):
         org: Union[str, "Organization", None] = None,
         user: Union[str, "User", None] = None,
         cloud_account: Optional[str] = None,
+        cloud_provider: Optional[Union["CloudProvider", str]] = None,
         env: Optional[Dict[str, str]] = None,
         interruptible: bool = False,
         image_credentials: Optional[str] = None,
         cloud_account_auth: bool = False,
         entrypoint: str = "sh -c",
         path_mappings: Optional[Dict[str, str]] = None,
+        max_runtime: Optional[int] = None,
         artifacts_local: Optional[str] = None,  # deprecated in favor of path_mappings
         artifacts_remote: Optional[str] = None,  # deprecated in favor of path_mappings
         cluster: Optional[str] = None,  # deprecated in favor of cloud_account
@@ -89,7 +91,11 @@ class _BaseMMT(_BaseJob):
             user: The user owning the teamspace (if any). Defaults to the current user.
             cloud_account: The cloud account to run the job on.
                 Defaults to the studio cloud account if running with studio compute env.
-                If not provided will fall back to the teamspaces default cloud account.
+                If not provided and `cloud_account_provider` is set, will resolve cluster from this, else
+                will fall back to the teamspaces default cloud account.
+            cloud_account_provider: The provider to select the cloud-account from.
+                If set, must be in agreement with the provider from the cloud_account (if specified).
+                If not specified, falls backto the teamspace default cloud account.
             env: Environment variables to set inside the job.
             interruptible: Whether the job should run on interruptible instances. They are cheaper but can be preempted.
             image_credentials: The credentials used to pull the image. Required if the image is private.
@@ -109,6 +115,10 @@ class _BaseMMT(_BaseJob):
                     }
                 If the path inside the connection is omitted it's assumed to be the root path of that connection.
                 Only applicable when submitting docker jobs.
+            max_runtime: the duration (in seconds) for which to allocate the machine.
+                Irrelevant for most machines, required for some of the top-end machines on GCP.
+                If in doubt, set it. Won't have an effect on machines not requiring it.
+                Defaults to 3h
         """
         from lightning_sdk.lightning_cloud.openapi.rest import ApiException
         from lightning_sdk.studio import Studio
@@ -191,6 +201,7 @@ class _BaseMMT(_BaseJob):
             num_machines=num_machines,
             machine=machine,
             cloud_account=cloud_account,
+            cloud_provider=cloud_provider,
             command=command,
             studio=studio,
             image=image,
@@ -202,6 +213,7 @@ class _BaseMMT(_BaseJob):
             path_mappings=path_mappings,
             artifacts_local=artifacts_local,
             artifacts_remote=artifacts_remote,
+            max_runtime=max_runtime,
         )
         return inst
@@ -216,12 +228,14 @@ class _BaseMMT(_BaseJob):
         env: Optional[Dict[str, str]] = None,
         interruptible: bool = False,
         cloud_account: Optional[str] = None,
+        cloud_provider: Optional[Union["CloudProvider", str]] = None,
         image_credentials: Optional[str] = None,
         cloud_account_auth: bool = False,
         entrypoint: str = "sh -c",
         path_mappings: Optional[Dict[str, str]] = None,
         artifacts_local: Optional[str] = None,  # deprecated in favor of path_mappings
         artifacts_remote: Optional[str] = None,  # deprecated in favor of path_mappings
+        max_runtime: Optional[int] = None,
     ) -> None:
         """Submit a new multi-machine job to the Lightning AI platform.
@@ -253,6 +267,10 @@ class _BaseMMT(_BaseJob):
                     }
                 If the path inside the connection is omitted it's assumed to be the root path of that connection.
                 Only applicable when submitting docker jobs.
+            max_runtime: the duration (in seconds) for which to allocate the machine.
+                Irrelevant for most machines, required for some of the top-end machines on GCP.
+                If in doubt, set it. Won't have an effect on machines not requiring it.
+                Defaults to 3h
         """
     @property

lightning_sdk/mmt/mmt.py CHANGED Viewed

@@ -1,12 +1,13 @@
 from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
+from lightning_sdk.api.cloud_account_api import CloudAccountApi
 from lightning_sdk.mmt.base import MMTMachine, _BaseMMT
 from lightning_sdk.mmt.v1 import _MMTV1
 from lightning_sdk.mmt.v2 import _MMTV2
 from lightning_sdk.utils.resolve import _setup_logger
 if TYPE_CHECKING:
-    from lightning_sdk.machine import Machine
+    from lightning_sdk.machine import CloudProvider, Machine
     from lightning_sdk.organization import Organization
     from lightning_sdk.status import Status
     from lightning_sdk.studio import Studio
@@ -75,6 +76,7 @@ class MMT(_BaseMMT):
             )
         self._internal_mmt = mmt
+        self._cloud_account_api = CloudAccountApi()
     @classmethod
     def run(
@@ -89,12 +91,14 @@ class MMT(_BaseMMT):
         org: Union[str, "Organization", None] = None,
         user: Union[str, "User", None] = None,
         cloud_account: Optional[str] = None,
+        cloud_provider: Optional[Union["CloudProvider", str]] = None,
         env: Optional[Dict[str, str]] = None,
         interruptible: bool = False,
         image_credentials: Optional[str] = None,
         cloud_account_auth: bool = False,
         entrypoint: str = "sh -c",
         path_mappings: Optional[Dict[str, str]] = None,
+        max_runtime: Optional[int] = None,
         artifacts_local: Optional[str] = None,
         artifacts_remote: Optional[str] = None,
         cluster: Optional[str] = None,  # deprecated in favor of cloud_account
@@ -114,7 +118,11 @@ class MMT(_BaseMMT):
             user: The user owning the teamspace (if any). Defaults to the current user.
             cloud_account: The cloud account to run the job on.
                 Defaults to the studio cloud account if running with studio compute env.
-                If not provided will fall back to the teamspaces default cloud account.
+                If not provided and `cloud_account_provider` is set, will resolve cluster from this, else
+                will fall back to the teamspaces default cloud account.
+            cloud_account_provider: The provider to select the cloud-account from.
+                If set, must be in agreement with the provider from the cloud_account (if specified).
+                If not specified, falls backto the teamspace default cloud account.
             env: Environment variables to set inside the job.
             interruptible: Whether the job should run on interruptible instances. They are cheaper but can be preempted.
             image_credentials: The credentials used to pull the image. Required if the image is private.
@@ -145,6 +153,7 @@ class MMT(_BaseMMT):
             org=org,
             user=user,
             cloud_account=cloud_account,
+            cloud_provider=cloud_provider,
             env=env,
             interruptible=interruptible,
             image_credentials=image_credentials,
@@ -154,6 +163,7 @@ class MMT(_BaseMMT):
             artifacts_local=artifacts_local,
             artifacts_remote=artifacts_remote,
             cluster=cluster,  # deprecated in favor of cloud_account
+            max_runtime=max_runtime,
         )
         # required for typing with "MMT"
         assert isinstance(ret_val, cls)
@@ -173,10 +183,12 @@ class MMT(_BaseMMT):
         env: Optional[Dict[str, str]] = None,
         interruptible: bool = False,
         cloud_account: Optional[str] = None,
+        cloud_provider: Optional[Union["CloudProvider", str]] = None,
         image_credentials: Optional[str] = None,
         cloud_account_auth: bool = False,
         entrypoint: str = "sh -c",
         path_mappings: Optional[Dict[str, str]] = None,
+        max_runtime: Optional[int] = None,
         artifacts_local: Optional[str] = None,  # deprecated in favor of path_mappings
         artifacts_remote: Optional[str] = None,  # deprecated in favor of path_mappings
     ) -> "MMT":
@@ -193,7 +205,11 @@ class MMT(_BaseMMT):
             interruptible: Whether the job should run on interruptible instances. They are cheaper but can be preempted.
             cloud_account: The cloud account to run the job on.
                 Defaults to the studio cloud account if running with studio compute env.
-                If not provided will fall back to the teamspaces default cloud account.
+                If not provided and `cloud_account_provider` is set, will resolve cluster from this, else
+                will fall back to the teamspaces default cloud account.
+            cloud_account_provider: The provider to select the cloud-account from.
+                If set, must be in agreement with the provider from the cloud_account (if specified).
+                If not specified, falls backto the teamspace default cloud account.
             image_credentials: The credentials used to pull the image. Required if the image is private.
                 This should be the name of the respective credentials secret created on the Lightning AI platform.
             cloud_account_auth: Whether to authenticate with the cloud account to pull the image.
@@ -211,11 +227,16 @@ class MMT(_BaseMMT):
                     }
                 If the path inside the connection is omitted it's assumed to be the root path of that connection.
                 Only applicable when submitting docker jobs.
+            max_runtime: the duration (in seconds) for which to allocate the machine.
+                Irrelevant for most machines, required for some of the top-end machines on GCP.
+                If in doubt, set it. Won't have an effect on machines not requiring it.
+                Defaults to 3h
         """
         self._job = self._internal_mmt._submit(
             num_machines=num_machines,
             machine=machine,
             cloud_account=cloud_account,
+            cloud_provider=cloud_provider,
             command=command,
             studio=studio,
             image=image,
@@ -227,6 +248,7 @@ class MMT(_BaseMMT):
             path_mappings=path_mappings,
             artifacts_local=artifacts_local,
             artifacts_remote=artifacts_remote,
+            max_runtime=max_runtime,
         )
         return self

lightning-sdk 2025.7.17__py3-none-any.whl → 2025.7.30rc0__py3-none-any.whl

lightning-sdk 2025.7.17py3-none-any.whl → 2025.7.30rc0py3-none-any.whl