lightning-sdk 0.1.53__py3-none-any.whl → 0.1.55__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. lightning_sdk/__init__.py +1 -1
  2. lightning_sdk/api/deployment_api.py +9 -1
  3. lightning_sdk/api/job_api.py +1 -1
  4. lightning_sdk/api/lit_container_api.py +29 -8
  5. lightning_sdk/cli/delete.py +27 -0
  6. lightning_sdk/cli/entrypoint.py +6 -0
  7. lightning_sdk/cli/generate.py +58 -0
  8. lightning_sdk/cli/list.py +48 -0
  9. lightning_sdk/cli/start.py +43 -0
  10. lightning_sdk/cli/stop.py +26 -0
  11. lightning_sdk/cli/switch.py +43 -0
  12. lightning_sdk/deployment/deployment.py +12 -3
  13. lightning_sdk/job/base.py +7 -2
  14. lightning_sdk/job/job.py +1 -12
  15. lightning_sdk/job/v1.py +1 -32
  16. lightning_sdk/job/v2.py +6 -1
  17. lightning_sdk/lightning_cloud/openapi/__init__.py +2 -0
  18. lightning_sdk/lightning_cloud/openapi/api/cluster_service_api.py +10 -2
  19. lightning_sdk/lightning_cloud/openapi/api/lit_registry_service_api.py +210 -0
  20. lightning_sdk/lightning_cloud/openapi/models/__init__.py +2 -0
  21. lightning_sdk/lightning_cloud/openapi/models/cluster_id_usagerestrictions_body.py +27 -1
  22. lightning_sdk/lightning_cloud/openapi/models/usagerestrictions_id_body.py +27 -1
  23. lightning_sdk/lightning_cloud/openapi/models/v1_cloud_provider.py +3 -0
  24. lightning_sdk/lightning_cloud/openapi/models/v1_cluster_accelerator.py +27 -1
  25. lightning_sdk/lightning_cloud/openapi/models/v1_job.py +27 -1
  26. lightning_sdk/lightning_cloud/openapi/models/v1_job_spec.py +27 -1
  27. lightning_sdk/lightning_cloud/openapi/models/v1_lambda_labs_direct_v1.py +55 -3
  28. lightning_sdk/lightning_cloud/openapi/models/v1_list_lit_registry_repository_image_artifact_versions_response.py +231 -0
  29. lightning_sdk/lightning_cloud/openapi/models/v1_lit_registry_artifact.py +253 -0
  30. lightning_sdk/lightning_cloud/openapi/models/v1_user_features.py +27 -53
  31. lightning_sdk/mmt/mmt.py +7 -6
  32. lightning_sdk/plugin.py +5 -3
  33. {lightning_sdk-0.1.53.dist-info → lightning_sdk-0.1.55.dist-info}/METADATA +1 -1
  34. {lightning_sdk-0.1.53.dist-info → lightning_sdk-0.1.55.dist-info}/RECORD +38 -33
  35. {lightning_sdk-0.1.53.dist-info → lightning_sdk-0.1.55.dist-info}/LICENSE +0 -0
  36. {lightning_sdk-0.1.53.dist-info → lightning_sdk-0.1.55.dist-info}/WHEEL +0 -0
  37. {lightning_sdk-0.1.53.dist-info → lightning_sdk-0.1.55.dist-info}/entry_points.txt +0 -0
  38. {lightning_sdk-0.1.53.dist-info → lightning_sdk-0.1.55.dist-info}/top_level.txt +0 -0
lightning_sdk/__init__.py CHANGED
@@ -29,5 +29,5 @@ __all__ = [
29
29
  "AIHub",
30
30
  ]
31
31
 
32
- __version__ = "0.1.53"
32
+ __version__ = "0.1.55"
33
33
  _check_version_and_prompt_upgrade(__version__)
@@ -213,6 +213,14 @@ class DeploymentApi:
213
213
  return None
214
214
  raise ex
215
215
 
216
+ def get_deployment_by_id(self, deployment_id: str, teamspace_id: str) -> Optional[V1Deployment]:
217
+ try:
218
+ return self._client.jobs_service_get_deployment(project_id=teamspace_id, id=deployment_id)
219
+ except ApiException as ex:
220
+ if "Reason: Not Found" in str(ex):
221
+ return None
222
+ raise ex
223
+
216
224
  def create_deployment(
217
225
  self,
218
226
  deployment: V1Deployment,
@@ -269,7 +277,7 @@ class DeploymentApi:
269
277
  requires_release |= apply_change(deployment.spec, "entrypoint", entrypoint)
270
278
  requires_release |= apply_change(deployment.spec, "command", command)
271
279
  requires_release |= apply_change(deployment.spec, "env", to_env(env))
272
- requires_release |= apply_change(deployment.spec, "env", to_health_check(health_check))
280
+ requires_release |= apply_change(deployment.spec, "readiness_probe", to_health_check(health_check))
273
281
  requires_release |= apply_change(deployment.spec, "cluster_id", cloud_account)
274
282
  requires_release |= apply_change(deployment.spec, "spot", spot)
275
283
 
@@ -275,7 +275,7 @@ class JobApiV2:
275
275
  return
276
276
 
277
277
  if current_state != Status.Stopping:
278
- update_body = JobsIdBody1(cloudspace_id=current_job.spec.cloudspace_id, state=self.v2_job_state_stop)
278
+ update_body = JobsIdBody1(state=self.v2_job_state_stop)
279
279
  self._client.jobs_service_update_job(body=update_body, project_id=teamspace_id, id=job_id)
280
280
 
281
281
  while True:
@@ -1,6 +1,7 @@
1
- from typing import Generator, List
1
+ from typing import Any, Callable, Generator, List
2
2
 
3
3
  import docker
4
+ import requests
4
5
 
5
6
  from lightning_sdk.api.utils import _get_registry_url
6
7
  from lightning_sdk.lightning_cloud.env import LIGHTNING_CLOUD_URL
@@ -11,7 +12,22 @@ from lightning_sdk.teamspace import Teamspace
11
12
 
12
13
  class LCRAuthFailedError(Exception):
13
14
  def __init__(self) -> None:
14
- super().__init__("Failed to authenticate with Lightning Container Registry")
15
+ super().__init__(
16
+ "Failed to authenticate with Lightning Container Registry. Please login manually "
17
+ "using the following command:\n "
18
+ "echo $LIGHTNING_API_KEY | docker login litcr.io --username=LIGHTNING_USERNAME --password-stdin"
19
+ )
20
+
21
+
22
+ def retry_on_lcr_auth_failure(func: Callable) -> Callable:
23
+ def wrapper(self: "LitContainerApi", *args: Any, **kwargs: Any) -> Callable:
24
+ try:
25
+ return func(self, *args, **kwargs)
26
+ except LCRAuthFailedError:
27
+ self.authenticate()
28
+ return func(self, *args, **kwargs)
29
+
30
+ return wrapper
15
31
 
16
32
 
17
33
  class LitContainerApi:
@@ -38,35 +54,40 @@ class LitContainerApi:
38
54
  def delete_container(self, project_id: str, container: str) -> V1DeleteLitRepositoryResponse:
39
55
  try:
40
56
  return self._client.lit_registry_service_delete_lit_repository(project_id, container)
41
- except Exception as ex:
42
- raise ValueError(f"Could not delete container {container} from project {project_id}") from ex
57
+ except Exception as e:
58
+ raise ValueError(f"Could not delete container {container} from project {project_id}: {e!s}") from e
43
59
 
44
- def upload_container(self, container: str, teamspace: Teamspace, tag: str) -> Generator[str, None, None]:
60
+ def upload_container(self, container: str, teamspace: Teamspace, tag: str) -> Generator[dict, None, None]:
45
61
  try:
46
62
  self._docker_client.images.get(container)
47
63
  except docker.errors.ImageNotFound:
48
64
  raise ValueError(f"Container {container} does not exist") from None
49
65
 
50
66
  registry_url = _get_registry_url()
51
- repository = f"{registry_url}/lit-container/{teamspace.owner.name}/{teamspace.name}/{container}"
67
+ container_basename = container.split("/")[-1]
68
+ repository = f"{registry_url}/lit-container/{teamspace.owner.name}/{teamspace.name}/{container_basename}"
52
69
  tagged = self._docker_client.api.tag(container, repository, tag)
53
70
  if not tagged:
54
71
  raise ValueError(f"Could not tag container {container} with {repository}:{tag}")
55
72
  lines = self._docker_client.api.push(repository, stream=True, decode=True)
56
73
  for line in lines:
57
- if "errorDetail" in line and "authorization failed" in line["error"]:
74
+ if "errorDetail" in line and ("authorization failed" in line["error"] or "unauth" in line["error"]):
58
75
  raise LCRAuthFailedError()
59
76
  yield line
60
77
  yield {
61
78
  "finish": True,
62
- "url": f"{LIGHTNING_CLOUD_URL}/{teamspace.owner.name}/{teamspace.name}/containers/{container}",
79
+ "url": f"{LIGHTNING_CLOUD_URL}/{teamspace.owner.name}/{teamspace.name}/containers/{container_basename}",
63
80
  }
64
81
 
82
+ @retry_on_lcr_auth_failure
65
83
  def download_container(self, container: str, teamspace: Teamspace, tag: str) -> Generator[str, None, None]:
66
84
  registry_url = _get_registry_url()
67
85
  repository = f"{registry_url}/lit-container/{teamspace.owner.name}/{teamspace.name}/{container}"
68
86
  try:
69
87
  self._docker_client.images.pull(repository, tag=tag)
88
+ except requests.exceptions.HTTPError as e:
89
+ if "unauthorized" in e.response.text:
90
+ raise LCRAuthFailedError() from e
70
91
  except docker.errors.APIError as e:
71
92
  raise ValueError(f"Could not pull container {container} from {repository}:{tag}") from e
72
93
  return self._docker_client.api.tag(repository, container, tag)
@@ -4,6 +4,7 @@ from lightning_sdk.cli.exceptions import StudioCliError
4
4
  from lightning_sdk.cli.job_and_mmt_action import _JobAndMMTAction
5
5
  from lightning_sdk.cli.teamspace_menu import _TeamspacesMenu
6
6
  from lightning_sdk.lit_container import LitContainer
7
+ from lightning_sdk.studio import Studio
7
8
 
8
9
 
9
10
  class _Delete(_JobAndMMTAction, _TeamspacesMenu):
@@ -56,3 +57,29 @@ class _Delete(_JobAndMMTAction, _TeamspacesMenu):
56
57
 
57
58
  mmt.delete()
58
59
  print(f"Successfully deleted {mmt.name}!")
60
+
61
+ def studio(self, name: Optional[str] = None, teamspace: Optional[str] = None) -> None:
62
+ """Delete an existing studio.
63
+
64
+ Args:
65
+ name: The name of the studio to delete.
66
+ If not specified, tries to infer from the environment (e.g. when run from within a Studio.)
67
+ Note: This could delete your current studio if run without arguments.
68
+ teamspace: The teamspace the studio is part of. Should be of format <OWNER>/<TEAMSPACE_NAME>.
69
+ If not specified, tries to infer from the environment (e.g. when run from within a Studio.)
70
+ """
71
+ if teamspace is not None:
72
+ ts_splits = teamspace.split("/")
73
+ if len(ts_splits) != 2:
74
+ raise ValueError(f"Teamspace should be of format <OWNER>/<TEAMSPACE_NAME> but got {teamspace}")
75
+ owner, teamspace = ts_splits
76
+ else:
77
+ owner, teamspace = None, None
78
+
79
+ try:
80
+ studio = Studio(name=name, teamspace=teamspace, org=owner, user=None, create_ok=False)
81
+ except (RuntimeError, ValueError):
82
+ studio = Studio(name=name, teamspace=teamspace, org=None, user=owner, create_ok=False)
83
+
84
+ studio.delete()
85
+ print("Studio successfully deleted")
@@ -9,12 +9,15 @@ from lightning_sdk.api.studio_api import _cloud_url
9
9
  from lightning_sdk.cli.ai_hub import _AIHub
10
10
  from lightning_sdk.cli.delete import _Delete
11
11
  from lightning_sdk.cli.download import _Downloads
12
+ from lightning_sdk.cli.generate import _Generate
12
13
  from lightning_sdk.cli.inspect import _Inspect
13
14
  from lightning_sdk.cli.legacy import _LegacyLightningCLI
14
15
  from lightning_sdk.cli.list import _List
15
16
  from lightning_sdk.cli.run import _Run
16
17
  from lightning_sdk.cli.serve import _Docker, _LitServe
18
+ from lightning_sdk.cli.start import _Start
17
19
  from lightning_sdk.cli.stop import _Stop
20
+ from lightning_sdk.cli.switch import _Switch
18
21
  from lightning_sdk.cli.upload import _Uploads
19
22
  from lightning_sdk.lightning_cloud.login import Auth
20
23
 
@@ -35,6 +38,9 @@ class StudioCLI:
35
38
  self.delete = _Delete()
36
39
  self.inspect = _Inspect()
37
40
  self.stop = _Stop()
41
+ self.start = _Start()
42
+ self.switch = _Switch()
43
+ self.generate = _Generate()
38
44
 
39
45
  sys.excepthook = _notify_exception
40
46
 
@@ -0,0 +1,58 @@
1
+ from typing import Optional
2
+
3
+ from rich.console import Console
4
+
5
+ from lightning_sdk import Studio
6
+
7
+
8
+ class _Generate:
9
+ """Generate configs (such as ssh for studio) and print them to commandline."""
10
+
11
+ console = Console()
12
+
13
+ def _generate_ssh_config(self, name: str, studio_id: str) -> str:
14
+ """Generate SSH config entry for the studio.
15
+
16
+ Args:
17
+ name: Studio name
18
+ studio_id: Studio space ID
19
+
20
+ Returns:
21
+ str: SSH config entry
22
+ """
23
+ return f"""# ssh s_{studio_id}@ssh.lightning.ai
24
+
25
+ Host {name}
26
+ User s_{studio_id}
27
+ Hostname ssh.lightning.ai
28
+ IdentityFile ~/.ssh/lightning_rsa
29
+ IdentitiesOnly yes
30
+ ServerAliveInterval 15
31
+ ServerAliveCountMax 4
32
+ StrictHostKeyChecking no
33
+ UserKnownHostsFile=/dev/null"""
34
+
35
+ def ssh(self, name: Optional[str] = None, teamspace: Optional[str] = None) -> None:
36
+ """Get SSH config entry for a studio. Will start the studio if needed.
37
+
38
+ Args:
39
+ name: The name of the studio to stop.
40
+ If not specified, tries to infer from the environment (e.g. when run from within a Studio.)
41
+ teamspace: The teamspace the studio is part of. Should be of format <OWNER>/<TEAMSPACE_NAME>.
42
+ If not specified, tries to infer from the environment (e.g. when run from within a Studio.)
43
+ """
44
+ if teamspace:
45
+ ts_splits = teamspace.split("/")
46
+ if len(ts_splits) != 2:
47
+ raise ValueError(f"Teamspace should be of format <OWNER>/<TEAMSPACE_NAME> but got {teamspace}")
48
+ owner, teamspace = ts_splits
49
+ else:
50
+ owner, teamspace = None, None
51
+
52
+ try:
53
+ studio = Studio(name=name, teamspace=teamspace, org=owner, user=None, create_ok=False)
54
+ except (RuntimeError, ValueError):
55
+ studio = Studio(name=name, teamspace=teamspace, org=None, user=owner, create_ok=False)
56
+
57
+ # Print the SSH config
58
+ self.console.print(self._generate_ssh_config(name, studio._studio.id))
lightning_sdk/cli/list.py CHANGED
@@ -3,6 +3,7 @@ from typing import Optional
3
3
  from rich.console import Console
4
4
  from rich.table import Table
5
5
 
6
+ from lightning_sdk import Machine
6
7
  from lightning_sdk.cli.teamspace_menu import _TeamspacesMenu
7
8
  from lightning_sdk.lit_container import LitContainer
8
9
 
@@ -10,6 +11,38 @@ from lightning_sdk.lit_container import LitContainer
10
11
  class _List(_TeamspacesMenu):
11
12
  """List resources on the Lightning AI platform."""
12
13
 
14
+ def studios(self, teamspace: Optional[str] = None) -> None:
15
+ """List studios for a given teamspace.
16
+
17
+ Args:
18
+ teamspace: the teamspace to list studios from. Should be specified as {owner}/{name}
19
+ If not provided, can be selected in an interactive menu.
20
+
21
+ """
22
+ resolved_teamspace = self._resolve_teamspace(teamspace=teamspace)
23
+
24
+ studios = resolved_teamspace.studios
25
+
26
+ table = Table(
27
+ pad_edge=True,
28
+ )
29
+ table.add_column("Name")
30
+ table.add_column("Teamspace")
31
+ table.add_column("Status")
32
+ table.add_column("Machine")
33
+ table.add_column("Cloud account")
34
+ for studio in studios:
35
+ table.add_row(
36
+ studio.name,
37
+ f"{studio.teamspace.owner.name}/{studio.teamspace.name}",
38
+ str(studio.status),
39
+ str(studio.machine) if studio.machine is not None else None,
40
+ str(studio.cloud_account),
41
+ )
42
+
43
+ console = Console()
44
+ console.print(table)
45
+
13
46
  def jobs(self, teamspace: Optional[str] = None) -> None:
14
47
  """List jobs for a given teamspace.
15
48
 
@@ -110,3 +143,18 @@ class _List(_TeamspacesMenu):
110
143
  table.add_row(repo["REPOSITORY"], repo["IMAGE ID"], repo["CREATED"])
111
144
  console = Console()
112
145
  console.print(table)
146
+
147
+ def machines(self) -> None:
148
+ """Display the list of available machines."""
149
+ table = Table(pad_edge=True)
150
+ table.add_column("Name")
151
+
152
+ # Get all machine types from the enum
153
+ machine_types = [name for name in dir(Machine) if not name.startswith("_")]
154
+
155
+ # Add rows to table
156
+ for name in sorted(machine_types):
157
+ table.add_row(name)
158
+
159
+ console = Console()
160
+ console.print(table)
@@ -0,0 +1,43 @@
1
+ from typing import Optional
2
+
3
+ from lightning_sdk import Machine, Studio
4
+
5
+
6
+ class _Start:
7
+ """Start resources on the Lightning AI platform."""
8
+
9
+ def __init__(self) -> None:
10
+ _machine_values = tuple([machine.value for machine in Machine])
11
+
12
+ docstr_studio = f"""Start a studio on a given machine.
13
+
14
+ Args:
15
+ name: The name of the studio to start.
16
+ If not specified, tries to infer from the environment (e.g. when run from within a Studio.)
17
+ teamspace: The teamspace the studio is part of. Should be of format <OWNER>/<TEAMSPACE_NAME>.
18
+ If not specified, tries to infer from the environment (e.g. when run from within a Studio.)
19
+ machine: The machine type to start the studio on. One of {", ".join(_machine_values)}.
20
+ Defaults to the CPU Machine.
21
+ """
22
+ self.studio.__func__.__doc__ = docstr_studio
23
+
24
+ def studio(self, name: Optional[str] = None, teamspace: Optional[str] = None, machine: str = "CPU") -> None:
25
+ if teamspace is not None:
26
+ ts_splits = teamspace.split("/")
27
+ if len(ts_splits) != 2:
28
+ raise ValueError(f"Teamspace should be of format <OWNER>/<TEAMSPACE_NAME> but got {teamspace}")
29
+ owner, teamspace = ts_splits
30
+ else:
31
+ owner, teamspace = None, None
32
+
33
+ try:
34
+ studio = Studio(name=name, teamspace=teamspace, org=owner, user=None, create_ok=False)
35
+ except (RuntimeError, ValueError):
36
+ studio = Studio(name=name, teamspace=teamspace, org=None, user=owner, create_ok=False)
37
+
38
+ try:
39
+ resolved_machine = Machine[machine.upper()]
40
+ except KeyError:
41
+ resolved_machine = machine
42
+
43
+ studio.start(resolved_machine)
lightning_sdk/cli/stop.py CHANGED
@@ -1,6 +1,7 @@
1
1
  from typing import Optional
2
2
 
3
3
  from lightning_sdk.cli.job_and_mmt_action import _JobAndMMTAction
4
+ from lightning_sdk.studio import Studio
4
5
 
5
6
 
6
7
  class _Stop(_JobAndMMTAction):
@@ -35,3 +36,28 @@ class _Stop(_JobAndMMTAction):
35
36
 
36
37
  mmt.stop()
37
38
  print(f"Successfully stopped {mmt.name}!")
39
+
40
+ def studio(self, name: Optional[str] = None, teamspace: Optional[str] = None) -> None:
41
+ """Stop a running studio.
42
+
43
+ Args:
44
+ name: The name of the studio to stop.
45
+ If not specified, tries to infer from the environment (e.g. when run from within a Studio.)
46
+ teamspace: The teamspace the studio is part of. Should be of format <OWNER>/<TEAMSPACE_NAME>.
47
+ If not specified, tries to infer from the environment (e.g. when run from within a Studio.)
48
+ """
49
+ if teamspace is not None:
50
+ ts_splits = teamspace.split("/")
51
+ if len(ts_splits) != 2:
52
+ raise ValueError(f"Teamspace should be of format <OWNER>/<TEAMSPACE_NAME> but got {teamspace}")
53
+ owner, teamspace = ts_splits
54
+ else:
55
+ owner, teamspace = None, None
56
+
57
+ try:
58
+ studio = Studio(name=name, teamspace=teamspace, org=owner, user=None, create_ok=False)
59
+ except (RuntimeError, ValueError):
60
+ studio = Studio(name=name, teamspace=teamspace, org=None, user=owner, create_ok=False)
61
+
62
+ studio.stop()
63
+ print("Studio successfully stopped")
@@ -0,0 +1,43 @@
1
+ from typing import Optional
2
+
3
+ from lightning_sdk import Machine, Studio
4
+
5
+
6
+ class _Switch:
7
+ """Switch machines for resources on the Lightning AI platform."""
8
+
9
+ def __init__(self) -> None:
10
+ _machine_values = tuple([machine.value for machine in Machine])
11
+
12
+ docstr_studio = f"""Switch a studio to a given machine.
13
+
14
+ Args:
15
+ name: The name of the studio to start.
16
+ If not specified, tries to infer from the environment (e.g. when run from within a Studio.)
17
+ teamspace: The teamspace the studio is part of. Should be of format <OWNER>/<TEAMSPACE_NAME>.
18
+ If not specified, tries to infer from the environment (e.g. when run from within a Studio.)
19
+ machine: The machine type to switch to. One of {", ".join(_machine_values)}.
20
+ Defaults to the CPU Machine.
21
+ """
22
+ self.studio.__func__.__doc__ = docstr_studio
23
+
24
+ def studio(self, name: Optional[str] = None, teamspace: Optional[str] = None, machine: str = "CPU") -> None:
25
+ if teamspace is not None:
26
+ ts_splits = teamspace.split("/")
27
+ if len(ts_splits) != 2:
28
+ raise ValueError(f"Teamspace should be of format <OWNER>/<TEAMSPACE_NAME> but got {teamspace}")
29
+ owner, teamspace = ts_splits
30
+ else:
31
+ owner, teamspace = None, None
32
+
33
+ try:
34
+ studio = Studio(name=name, teamspace=teamspace, org=owner, user=None, create_ok=False)
35
+ except (RuntimeError, ValueError):
36
+ studio = Studio(name=name, teamspace=teamspace, org=None, user=owner, create_ok=False)
37
+
38
+ try:
39
+ resolved_machine = Machine[machine.upper()]
40
+ except KeyError:
41
+ resolved_machine = machine
42
+
43
+ studio.switch_machine(resolved_machine)
@@ -42,7 +42,7 @@ class Deployment:
42
42
  and switching machine types, etc..
43
43
 
44
44
  Args:
45
- name: The name of the deployment.
45
+ name: The name or the id of the deployment.
46
46
  teamspace: The teamspace in which you want to deploy.
47
47
  org: The name of the organization owning the :param`teamspace` in case it is owned by an org
48
48
  user: The name of the user owning the :param`teamspace` in case it is owned directly by a user instead of an org
@@ -55,7 +55,7 @@ class Deployment:
55
55
 
56
56
  def __init__(
57
57
  self,
58
- name: str, # Only the name is required in case a deployment already exist.
58
+ name: str,
59
59
  teamspace: Optional[Union[str, Teamspace]] = None,
60
60
  org: Optional[Union[str, Organization]] = None,
61
61
  user: Optional[Union[str, User]] = None,
@@ -83,8 +83,14 @@ class Deployment:
83
83
  self._deployment_api = DeploymentApi()
84
84
  self._cloud_account = _get_cluster(client=self._deployment_api._client, project_id=self._teamspace.id)
85
85
  self._is_created = False
86
- deployment = self._deployment_api.get_deployment_by_name(name, self._teamspace.id)
86
+
87
+ if name.startswith("dep_"):
88
+ deployment = self._deployment_api.get_deployment_by_id(name, self._teamspace.id)
89
+ else:
90
+ deployment = self._deployment_api.get_deployment_by_name(name, self._teamspace.id)
91
+
87
92
  if deployment:
93
+ self._name = deployment.name
88
94
  self._is_created = True
89
95
  self._deployment = deployment
90
96
 
@@ -163,6 +169,9 @@ class Deployment:
163
169
  strategy=to_strategy(release_strategy),
164
170
  )
165
171
  )
172
+
173
+ # Overrides the name
174
+ self._name = self._deployment._name
166
175
  self._is_created = True
167
176
 
168
177
  def update(
lightning_sdk/job/base.py CHANGED
@@ -275,20 +275,25 @@ class _BaseJob(ABC):
275
275
  Caution: This also deletes all artifacts and snapshots associated with the job.
276
276
  """
277
277
 
278
- def wait(self, interval: float = 5.0) -> None:
278
+ def wait(self, interval: float = 5.0, timeout: Optional[float] = None) -> None:
279
279
  """Waits for the job to be either completed, manually stopped or failed.
280
280
 
281
281
  Args:
282
- interval: the number of seconds to spend in-between status checks.
282
+ interval: The number of seconds to spend in-between status checks.
283
+ timeout: The maximum number of seconds to wait before raising an error. If None, waits forever.
283
284
  """
284
285
  import time
285
286
 
286
287
  from lightning_sdk.status import Status
287
288
 
289
+ start = time.time()
288
290
  while True:
289
291
  if self.status in (Status.Completed, Status.Stopped, Status.Failed):
290
292
  break
291
293
 
294
+ if timeout is not None and time.time() - start > timeout:
295
+ raise TimeoutError("Job didn't finish within the provided timeout.")
296
+
292
297
  time.sleep(interval)
293
298
 
294
299
  @property
lightning_sdk/job/job.py CHANGED
@@ -1,7 +1,5 @@
1
- from functools import lru_cache
2
1
  from typing import TYPE_CHECKING, Any, Dict, Optional, Union
3
2
 
4
- from lightning_sdk.api.user_api import UserApi
5
3
  from lightning_sdk.job.base import _BaseJob
6
4
  from lightning_sdk.job.v1 import _JobV1
7
5
  from lightning_sdk.job.v2 import _JobV2
@@ -18,15 +16,6 @@ if TYPE_CHECKING:
18
16
  from lightning_sdk.user import User
19
17
 
20
18
 
21
- @lru_cache(maxsize=None)
22
- def _has_jobs_v2() -> bool:
23
- api = UserApi()
24
- try:
25
- return api._get_feature_flags().jobs_v2
26
- except Exception:
27
- return False
28
-
29
-
30
19
  class Job(_BaseJob):
31
20
  """Class to submit and manage single-machine jobs on the Lightning AI Platform."""
32
21
 
@@ -52,7 +41,7 @@ class Job(_BaseJob):
52
41
  """
53
42
  from lightning_sdk.lightning_cloud.openapi.rest import ApiException
54
43
 
55
- if _has_jobs_v2() and not self._force_v1:
44
+ if not self._force_v1:
56
45
  # try with v2 and fall back to v1
57
46
  try:
58
47
  job = _JobV2(
lightning_sdk/job/v1.py CHANGED
@@ -126,38 +126,7 @@ class _JobV1(_BaseJob):
126
126
  The submitted job.
127
127
 
128
128
  """
129
- if studio is None:
130
- raise ValueError("Studio is required for submitting jobs")
131
- if image is not None or image_credentials is not None or cloud_account_auth:
132
- raise ValueError("Image is not supported for submitting jobs")
133
-
134
- if artifacts_local is not None or artifacts_remote is not None:
135
- raise ValueError("Specifying how to persist artifacts is not yet supported with jobs")
136
-
137
- if env is not None:
138
- raise ValueError("Environment variables are not supported for submitting jobs")
139
- if command is None:
140
- raise ValueError("Command is required for submitting jobs")
141
-
142
- if entrypoint != "sh -c":
143
- raise ValueError("Specifying the entrypoint is not yet supported with jobs")
144
-
145
- if path_mappings is not None:
146
- raise ValueError("Specifying path mappings is not yet supported with jobs")
147
-
148
- # TODO: add support for empty names (will give an empty string)
149
- _submitted = self._job_api.submit_job(
150
- name=self._name,
151
- command=command,
152
- studio_id=studio._studio.id,
153
- teamspace_id=self._teamspace.id,
154
- cloud_account=cloud_account or "",
155
- machine=machine,
156
- interruptible=interruptible,
157
- )
158
- self._name = _submitted.name
159
- self._job = _submitted
160
- return self
129
+ raise NotImplementedError("Cannot submit new jobs with JobsV1!")
161
130
 
162
131
  def _update_internal_job(self) -> None:
163
132
  try:
lightning_sdk/job/v2.py CHANGED
@@ -140,7 +140,12 @@ class _JobV2(_BaseJob):
140
140
  @property
141
141
  def status(self) -> "Status":
142
142
  """The current status of the job."""
143
- return self._job_api._job_state_to_external(self._latest_job.state)
143
+ try:
144
+ return self._job_api._job_state_to_external(self._latest_job.state)
145
+ except Exception:
146
+ raise RuntimeError(
147
+ f"Job {self._name} does not exist in Teamspace {self.teamspace.name}. Did you delete it?"
148
+ ) from None
144
149
 
145
150
  @property
146
151
  def machine(self) -> Union["Machine", str]:
@@ -571,6 +571,7 @@ from lightning_sdk.lightning_cloud.openapi.models.v1_list_lightningapp_instances
571
571
  from lightning_sdk.lightning_cloud.openapi.models.v1_list_lightningwork_events_response import V1ListLightningworkEventsResponse
572
572
  from lightning_sdk.lightning_cloud.openapi.models.v1_list_lightningwork_response import V1ListLightningworkResponse
573
573
  from lightning_sdk.lightning_cloud.openapi.models.v1_list_lit_pages_response import V1ListLitPagesResponse
574
+ from lightning_sdk.lightning_cloud.openapi.models.v1_list_lit_registry_repository_image_artifact_versions_response import V1ListLitRegistryRepositoryImageArtifactVersionsResponse
574
575
  from lightning_sdk.lightning_cloud.openapi.models.v1_list_logger_artifact_response import V1ListLoggerArtifactResponse
575
576
  from lightning_sdk.lightning_cloud.openapi.models.v1_list_managed_endpoints_response import V1ListManagedEndpointsResponse
576
577
  from lightning_sdk.lightning_cloud.openapi.models.v1_list_memberships_response import V1ListMembershipsResponse
@@ -606,6 +607,7 @@ from lightning_sdk.lightning_cloud.openapi.models.v1_list_studio_jobs_response i
606
607
  from lightning_sdk.lightning_cloud.openapi.models.v1_list_user_slurm_jobs_response import V1ListUserSLURMJobsResponse
607
608
  from lightning_sdk.lightning_cloud.openapi.models.v1_lit_page import V1LitPage
608
609
  from lightning_sdk.lightning_cloud.openapi.models.v1_lit_page_type import V1LitPageType
610
+ from lightning_sdk.lightning_cloud.openapi.models.v1_lit_registry_artifact import V1LitRegistryArtifact
609
611
  from lightning_sdk.lightning_cloud.openapi.models.v1_lit_registry_project import V1LitRegistryProject
610
612
  from lightning_sdk.lightning_cloud.openapi.models.v1_lit_repository import V1LitRepository
611
613
  from lightning_sdk.lightning_cloud.openapi.models.v1_locked_resource import V1LockedResource