lightning-sdk 0.1.41__py3-none-any.whl → 0.1.42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. lightning_sdk/__init__.py +1 -1
  2. lightning_sdk/ai_hub.py +8 -3
  3. lightning_sdk/api/ai_hub_api.py +3 -3
  4. lightning_sdk/api/deployment_api.py +6 -6
  5. lightning_sdk/api/job_api.py +32 -6
  6. lightning_sdk/api/mmt_api.py +59 -19
  7. lightning_sdk/api/studio_api.py +37 -19
  8. lightning_sdk/api/teamspace_api.py +34 -29
  9. lightning_sdk/api/utils.py +46 -34
  10. lightning_sdk/cli/ai_hub.py +3 -3
  11. lightning_sdk/cli/entrypoint.py +3 -1
  12. lightning_sdk/cli/mmt.py +11 -10
  13. lightning_sdk/cli/run.py +9 -8
  14. lightning_sdk/cli/serve.py +130 -0
  15. lightning_sdk/deployment/deployment.py +18 -12
  16. lightning_sdk/job/base.py +118 -24
  17. lightning_sdk/job/job.py +87 -9
  18. lightning_sdk/job/v1.py +75 -18
  19. lightning_sdk/job/v2.py +51 -15
  20. lightning_sdk/job/work.py +36 -7
  21. lightning_sdk/lightning_cloud/openapi/__init__.py +12 -0
  22. lightning_sdk/lightning_cloud/openapi/api/jobs_service_api.py +215 -5
  23. lightning_sdk/lightning_cloud/openapi/api/lit_logger_service_api.py +218 -0
  24. lightning_sdk/lightning_cloud/openapi/api/models_store_api.py +226 -0
  25. lightning_sdk/lightning_cloud/openapi/api/snowflake_service_api.py +21 -1
  26. lightning_sdk/lightning_cloud/openapi/models/__init__.py +12 -0
  27. lightning_sdk/lightning_cloud/openapi/models/deploymenttemplates_id_body.py +27 -1
  28. lightning_sdk/lightning_cloud/openapi/models/id_visibility_body.py +123 -0
  29. lightning_sdk/lightning_cloud/openapi/models/model_id_versions_body.py +29 -3
  30. lightning_sdk/lightning_cloud/openapi/models/project_id_multimachinejobs_body.py +27 -1
  31. lightning_sdk/lightning_cloud/openapi/models/project_id_snowflake_body.py +15 -67
  32. lightning_sdk/lightning_cloud/openapi/models/query_query_id_body.py +17 -69
  33. lightning_sdk/lightning_cloud/openapi/models/snowflake_export_body.py +29 -81
  34. lightning_sdk/lightning_cloud/openapi/models/snowflake_query_body.py +17 -69
  35. lightning_sdk/lightning_cloud/openapi/models/v1_get_model_file_url_response.py +27 -1
  36. lightning_sdk/lightning_cloud/openapi/models/v1_get_model_files_response.py +17 -17
  37. lightning_sdk/lightning_cloud/openapi/models/v1_get_model_files_url_response.py +149 -0
  38. lightning_sdk/lightning_cloud/openapi/models/v1_get_project_balance_response.py +27 -1
  39. lightning_sdk/lightning_cloud/openapi/models/v1_list_multi_machine_job_events_response.py +123 -0
  40. lightning_sdk/lightning_cloud/openapi/models/v1_metrics_stream.py +27 -1
  41. lightning_sdk/lightning_cloud/openapi/models/v1_model_file.py +175 -0
  42. lightning_sdk/lightning_cloud/openapi/models/v1_multi_machine_job.py +27 -1
  43. lightning_sdk/lightning_cloud/openapi/models/v1_multi_machine_job_event.py +331 -0
  44. lightning_sdk/lightning_cloud/openapi/models/v1_multi_machine_job_event_type.py +104 -0
  45. lightning_sdk/lightning_cloud/openapi/models/v1_multi_machine_job_fault_tolerance.py +149 -0
  46. lightning_sdk/lightning_cloud/openapi/models/v1_multi_machine_job_fault_tolerance_strategy.py +105 -0
  47. lightning_sdk/lightning_cloud/openapi/models/v1_multi_machine_job_status.py +27 -1
  48. lightning_sdk/lightning_cloud/openapi/models/v1_rule_resource.py +1 -0
  49. lightning_sdk/lightning_cloud/openapi/models/v1_snowflake_data_connection.py +29 -81
  50. lightning_sdk/lightning_cloud/openapi/models/v1_system_metrics.py +29 -3
  51. lightning_sdk/lightning_cloud/openapi/models/v1_trainium_system_metrics.py +175 -0
  52. lightning_sdk/lightning_cloud/openapi/models/v1_update_metrics_stream_visibility_response.py +97 -0
  53. lightning_sdk/lightning_cloud/openapi/models/v1_user_features.py +27 -53
  54. lightning_sdk/lightning_cloud/openapi/models/v1_validate_deployment_image_request.py +149 -0
  55. lightning_sdk/lightning_cloud/openapi/models/v1_validate_deployment_image_response.py +97 -0
  56. lightning_sdk/lightning_cloud/rest_client.py +2 -0
  57. lightning_sdk/mmt/__init__.py +3 -0
  58. lightning_sdk/{_mmt → mmt}/base.py +20 -14
  59. lightning_sdk/{_mmt → mmt}/mmt.py +46 -17
  60. lightning_sdk/mmt/v1.py +129 -0
  61. lightning_sdk/{_mmt → mmt}/v2.py +16 -21
  62. lightning_sdk/plugin.py +43 -16
  63. lightning_sdk/services/file_endpoint.py +11 -5
  64. lightning_sdk/studio.py +16 -9
  65. lightning_sdk/teamspace.py +21 -8
  66. lightning_sdk/utils/resolve.py +18 -0
  67. {lightning_sdk-0.1.41.dist-info → lightning_sdk-0.1.42.dist-info}/METADATA +3 -1
  68. {lightning_sdk-0.1.41.dist-info → lightning_sdk-0.1.42.dist-info}/RECORD +72 -59
  69. lightning_sdk/_mmt/__init__.py +0 -3
  70. lightning_sdk/_mmt/v1.py +0 -69
  71. {lightning_sdk-0.1.41.dist-info → lightning_sdk-0.1.42.dist-info}/LICENSE +0 -0
  72. {lightning_sdk-0.1.41.dist-info → lightning_sdk-0.1.42.dist-info}/WHEEL +0 -0
  73. {lightning_sdk-0.1.41.dist-info → lightning_sdk-0.1.42.dist-info}/entry_points.txt +0 -0
  74. {lightning_sdk-0.1.41.dist-info → lightning_sdk-0.1.42.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,4 @@
1
+ import concurrent.futures
1
2
  import errno
2
3
  import math
3
4
  import os
@@ -8,7 +9,7 @@ from typing import Any, Dict, List, Optional, Tuple, Union
8
9
 
9
10
  import backoff
10
11
  import requests
11
- from tqdm import tqdm
12
+ from tqdm.auto import tqdm
12
13
 
13
14
  from lightning_sdk.constants import __GLOBAL_LIGHTNING_UNIQUE_IDS_STORE__, _LIGHTNING_DEBUG
14
15
  from lightning_sdk.lightning_cloud.openapi import (
@@ -66,14 +67,14 @@ class _FileUploader:
66
67
  self,
67
68
  client: LightningClient,
68
69
  teamspace_id: str,
69
- cluster_id: str,
70
+ cloud_account: str,
70
71
  file_path: str,
71
72
  remote_path: str,
72
73
  progress_bar: bool,
73
74
  ) -> None:
74
75
  self.client = client
75
76
  self.teamspace_id = teamspace_id
76
- self.cluster_id = cluster_id
77
+ self.cloud_account = cloud_account
77
78
 
78
79
  self.local_path = file_path
79
80
 
@@ -107,7 +108,7 @@ class _FileUploader:
107
108
 
108
109
  def _multipart_upload(self, count: int) -> None:
109
110
  """Does a parallel multipart upload."""
110
- body = ProjectIdStorageBody(cluster_id=self.cluster_id, filename=self.remote_path)
111
+ body = ProjectIdStorageBody(cluster_id=self.cloud_account, filename=self.remote_path)
111
112
  resp: V1UploadProjectArtifactResponse = self.client.storage_service_upload_project_artifact(
112
113
  body=body, project_id=self.teamspace_id
113
114
  )
@@ -123,7 +124,7 @@ class _FileUploader:
123
124
  completed.extend(self._process_upload_batch(executor=p, batch=batch, upload_id=resp.upload_id))
124
125
 
125
126
  completed_body = StorageCompleteBody(
126
- cluster_id=self.cluster_id, filename=self.remote_path, parts=completed, upload_id=resp.upload_id
127
+ cluster_id=self.cloud_account, filename=self.remote_path, parts=completed, upload_id=resp.upload_id
127
128
  )
128
129
  self.client.storage_service_complete_upload_project_artifact(body=completed_body, project_id=self.teamspace_id)
129
130
 
@@ -135,7 +136,7 @@ class _FileUploader:
135
136
 
136
137
  def _request_urls(self, parts: List[int], upload_id: str) -> List[V1PresignedUrl]:
137
138
  """Requests urls for a batch of parts."""
138
- body = UploadsUploadIdBody(cluster_id=self.cluster_id, filename=self.remote_path, parts=parts)
139
+ body = UploadsUploadIdBody(cluster_id=self.cloud_account, filename=self.remote_path, parts=parts)
139
140
  resp: V1UploadProjectArtifactPartsResponse = self.client.storage_service_upload_project_artifact_parts(
140
141
  body, self.teamspace_id, upload_id
141
142
  )
@@ -192,7 +193,7 @@ class _ModelFileUploader:
192
193
  model_id: str,
193
194
  version: str,
194
195
  teamspace_id: str,
195
- cluster_id: str,
196
+ cloud_account: str,
196
197
  file_path: str,
197
198
  remote_path: str,
198
199
  progress_bar: bool,
@@ -201,7 +202,6 @@ class _ModelFileUploader:
201
202
  self.model_id = model_id
202
203
  self.version = version
203
204
  self.teamspace_id = teamspace_id
204
- self.cluster_id = cluster_id
205
205
  self.local_path = file_path
206
206
  self.remote_path = remote_path
207
207
 
@@ -215,6 +215,8 @@ class _ModelFileUploader:
215
215
  unit="B",
216
216
  unit_scale=True,
217
217
  unit_divisor=1000,
218
+ position=1,
219
+ leave=False,
218
220
  )
219
221
  else:
220
222
  self.progress_bar = None
@@ -376,6 +378,7 @@ class _FileDownloader:
376
378
  teamspace_id: str,
377
379
  remote_path: str,
378
380
  file_path: str,
381
+ executor: ThreadPoolExecutor,
379
382
  num_workers: int = 20,
380
383
  progress_bar: Optional[tqdm] = None,
381
384
  ) -> None:
@@ -389,7 +392,7 @@ class _FileDownloader:
389
392
  self.num_workers = num_workers
390
393
  self._url = ""
391
394
  self._size = 0
392
- self.refresh()
395
+ self.executor = executor
393
396
 
394
397
  @backoff.on_exception(backoff.expo, ApiException, max_tries=10)
395
398
  def refresh(self) -> None:
@@ -445,26 +448,26 @@ class _FileDownloader:
445
448
  if remaining_size > 0:
446
449
  f.write(b"\x00" * remaining_size)
447
450
 
448
- def _multipart_download(self, filename: str, max_workers: int) -> None:
449
- num_chunks = max_workers
451
+ def _multipart_download(self, filename: str, num_workers: int) -> None:
452
+ num_chunks = num_workers
450
453
  chunk_size = math.ceil(self.size / num_chunks)
451
454
 
452
455
  if chunk_size < _DOWNLOAD_MIN_CHUNK_SIZE:
453
456
  num_chunks = math.ceil(self.size / _DOWNLOAD_MIN_CHUNK_SIZE)
454
457
  chunk_size = _DOWNLOAD_MIN_CHUNK_SIZE
455
458
 
456
- num_workers = min(max_workers, num_chunks)
457
-
458
459
  ranges = []
459
460
  for part_number in range(num_chunks):
460
461
  start = part_number * chunk_size
461
462
  end = min(start + chunk_size - 1, self.size - 1)
462
463
  ranges.append((start, end))
463
464
 
464
- with ThreadPoolExecutor(max_workers=num_workers) as executor:
465
- executor.map(partial(self._download_chunk, filename), ranges)
465
+ futures = [self.executor.submit(self._download_chunk, filename, r) for r in ranges]
466
+ concurrent.futures.wait(futures)
466
467
 
467
468
  def download(self) -> None:
469
+ self.refresh()
470
+
468
471
  tmp_filename = f"{self.local_path}.download"
469
472
 
470
473
  try:
@@ -536,31 +539,40 @@ def _download_model_files(
536
539
  unit_divisor=1000,
537
540
  )
538
541
 
539
- for filepath in response.filepaths:
540
- local_file = download_dir / filepath
541
- local_file.parent.mkdir(parents=True, exist_ok=True)
542
-
543
- file_downloader = _FileDownloader(
544
- client=client,
545
- model_id=response.model_id,
546
- version=response.version,
547
- teamspace_id=response.project_id,
548
- remote_path=filepath,
549
- file_path=str(local_file),
550
- num_workers=num_workers,
551
- progress_bar=pbar,
552
- )
542
+ with ThreadPoolExecutor(max_workers=min(num_workers, len(response.filepaths))) as file_executor, ThreadPoolExecutor(
543
+ max_workers=num_workers
544
+ ) as part_executor:
545
+ futures = []
546
+
547
+ for filepath in response.filepaths:
548
+ local_file = download_dir / filepath
549
+ local_file.parent.mkdir(parents=True, exist_ok=True)
550
+
551
+ file_downloader = _FileDownloader(
552
+ client=client,
553
+ model_id=response.model_id,
554
+ version=response.version,
555
+ teamspace_id=response.project_id,
556
+ remote_path=filepath,
557
+ file_path=str(local_file),
558
+ num_workers=num_workers,
559
+ progress_bar=pbar,
560
+ executor=part_executor,
561
+ )
562
+
563
+ futures.append(file_executor.submit(file_downloader.download))
553
564
 
554
- file_downloader.download()
565
+ # wait for all threads
566
+ concurrent.futures.wait(futures)
555
567
 
556
- return response.filepaths
568
+ return response.filepaths
557
569
 
558
570
 
559
571
  def _create_app(
560
572
  client: CloudSpaceServiceApi,
561
573
  studio_id: str,
562
574
  teamspace_id: str,
563
- cluster_id: str,
575
+ cloud_account: str,
564
576
  plugin_type: str,
565
577
  **other_arguments: Any,
566
578
  ) -> Externalv1LightningappInstance:
@@ -573,7 +585,7 @@ def _create_app(
573
585
  del other_arguments["interruptible"]
574
586
 
575
587
  body = AppsIdBody(
576
- cluster_id=cluster_id,
588
+ cluster_id=cloud_account,
577
589
  plugin_arguments=other_arguments,
578
590
  service_id=os.getenv(_LIGHTNING_SERVICE_EXECUTION_ID_KEY),
579
591
  unique_id=__GLOBAL_LIGHTNING_UNIQUE_IDS_STORE__[studio_id],
@@ -584,6 +596,6 @@ def _create_app(
584
596
  ).lightningappinstance
585
597
 
586
598
  if _LIGHTNING_DEBUG:
587
- print(f"Create App: {resp.id=} {teamspace_id=} {studio_id=} {cluster_id=}")
599
+ print(f"Create App: {resp.id=} {teamspace_id=} {studio_id=} {cloud_account=}")
588
600
 
589
601
  return resp
@@ -32,7 +32,7 @@ class _AIHub(_StudiosMenu):
32
32
  def deploy(
33
33
  self,
34
34
  api_id: str,
35
- cluster: Optional[str] = None,
35
+ cloud_account: Optional[str] = None,
36
36
  name: Optional[str] = None,
37
37
  teamspace: Optional[str] = None,
38
38
  org: Optional[str] = None,
@@ -41,9 +41,9 @@ class _AIHub(_StudiosMenu):
41
41
 
42
42
  Args:
43
43
  api_id: API template ID.
44
- cluster: Cluster to deploy the API to. Defaults to user's default cluster.
44
+ cloud_account: Cloud Account to deploy the API to. Defaults to user's default cloud account.
45
45
  name: Name of the deployed API. Defaults to the name of the API template.
46
46
  teamspace: Teamspace to deploy the API to. Defaults to user's default teamspace.
47
47
  org: Organization to deploy the API to. Defaults to user's default organization.
48
48
  """
49
- return self._hub.run(api_id, cluster=cluster, name=name, teamspace=teamspace, org=org)
49
+ return self._hub.run(api_id, cloud_account=cloud_account, name=name, teamspace=teamspace, org=org)
@@ -6,6 +6,7 @@ from lightning_sdk.cli.ai_hub import _AIHub
6
6
  from lightning_sdk.cli.download import _Downloads
7
7
  from lightning_sdk.cli.legacy import _LegacyLightningCLI
8
8
  from lightning_sdk.cli.run import _Run
9
+ from lightning_sdk.cli.serve import _Docker, _LitServe
9
10
  from lightning_sdk.cli.upload import _Uploads
10
11
  from lightning_sdk.lightning_cloud.login import Auth
11
12
 
@@ -19,8 +20,9 @@ class StudioCLI:
19
20
  self.download = _Downloads()
20
21
  self.upload = _Uploads()
21
22
  self.aihub = _AIHub()
22
-
23
23
  self.run = _Run(legacy_run=_LegacyLightningCLI() if _LIGHTNING_AVAILABLE else None)
24
+ self.serve = _LitServe()
25
+ self.dockerize = _Docker()
24
26
 
25
27
  def login(self) -> None:
26
28
  """Login to Lightning AI Studios."""
lightning_sdk/cli/mmt.py CHANGED
@@ -31,14 +31,15 @@ class MMTCLI:
31
31
  teamspace: The teamspace the job should be associated with. Defaults to the current teamspace.
32
32
  org: The organization owning the teamspace (if any). Defaults to the current organization.
33
33
  user: The user owning the teamspace (if any). Defaults to the current user.
34
- cluster: The cluster to run the job on. Defaults to the studio cluster if running with studio compute env.
35
- If not provided will fall back to the teamspaces default cluster.
34
+ cloud_account: The cloud account to run the job on.
35
+ Defaults to the studio cloud account if running with studio compute env.
36
+ If not provided will fall back to the teamspaces default cloud account.
36
37
  env: Environment variables to set inside the job.
37
38
  interruptible: Whether the job should run on interruptible instances. They are cheaper but can be preempted.
38
39
  image_credentials: The credentials used to pull the image. Required if the image is private.
39
40
  This should be the name of the respective credentials secret created on the Lightning AI platform.
40
- cluster_auth: Whether to authenticate with the cluster to pull the image.
41
- Required if the registry is part of a cluster provider (e.g. ECR).
41
+ cloud_account_auth: Whether to authenticate with the cloud account to pull the image.
42
+ Required if the registry is part of a cloud provider (e.g. ECR).
42
43
  artifacts_local: The path of inside the docker container, you want to persist images from.
43
44
  CAUTION: When setting this to "/", it will effectively erase your container.
44
45
  Only supported for jobs with a docker image compute environment.
@@ -83,11 +84,11 @@ class MMTCLI:
83
84
  teamspace: Optional[str] = None,
84
85
  org: Optional[str] = None,
85
86
  user: Optional[str] = None,
86
- cluster: Optional[str] = None,
87
+ cloud_account: Optional[str] = None,
87
88
  env: Optional[Dict[str, str]] = None,
88
89
  interruptible: bool = False,
89
90
  image_credentials: Optional[str] = None,
90
- cluster_auth: bool = False,
91
+ cloud_account_auth: bool = False,
91
92
  artifacts_local: Optional[str] = None,
92
93
  artifacts_remote: Optional[str] = None,
93
94
  ) -> None:
@@ -103,8 +104,8 @@ class MMTCLI:
103
104
  machine_enum = Machine(machine.upper())
104
105
 
105
106
  teamspace = Teamspace(name=teamspace, org=org, user=user)
106
- if cluster is None:
107
- cluster = teamspace.default_cluster
107
+ if cloud_account is None:
108
+ cloud_account = teamspace.default_cloud_account
108
109
 
109
110
  if image is None:
110
111
  raise RuntimeError("Currently only docker images are specified")
@@ -118,11 +119,11 @@ class MMTCLI:
118
119
  teamspace=teamspace,
119
120
  org=org,
120
121
  user=user,
121
- cluster=cluster,
122
+ cloud_account=cloud_account,
122
123
  env=env,
123
124
  interruptible=interruptible,
124
125
  image_credentials=image_credentials,
125
- cluster_auth=cluster_auth,
126
+ cloud_account_auth=cloud_account_auth,
126
127
  artifacts_local=artifacts_local,
127
128
  artifacts_remote=artifacts_remote,
128
129
  )
lightning_sdk/cli/run.py CHANGED
@@ -32,14 +32,15 @@ class _Run:
32
32
  teamspace: The teamspace the job should be associated with. Defaults to the current teamspace.
33
33
  org: The organization owning the teamspace (if any). Defaults to the current organization.
34
34
  user: The user owning the teamspace (if any). Defaults to the current user.
35
- cluster: The cluster to run the job on. Defaults to the studio cluster if running with studio compute env.
36
- If not provided will fall back to the teamspaces default cluster.
35
+ cloud_account: The cloud account to run the job on.
36
+ Defaults to the studio cloud account if running with studio compute env.
37
+ If not provided will fall back to the teamspaces default cloud account.
37
38
  env: Environment variables to set inside the job.
38
39
  interruptible: Whether the job should run on interruptible instances. They are cheaper but can be preempted.
39
40
  image_credentials: The credentials used to pull the image. Required if the image is private.
40
41
  This should be the name of the respective credentials secret created on the Lightning AI platform.
41
- cluster_auth: Whether to authenticate with the cluster to pull the image.
42
- Required if the registry is part of a cluster provider (e.g. ECR).
42
+ cloud_account_auth: Whether to authenticate with the cloud account to pull the image.
43
+ Required if the registry is part of a cloud provider (e.g. ECR).
43
44
  artifacts_local: The path of inside the docker container, you want to persist images from.
44
45
  CAUTION: When setting this to "/", it will effectively erase your container.
45
46
  Only supported for jobs with a docker image compute environment.
@@ -68,11 +69,11 @@ class _Run:
68
69
  teamspace: Optional[str] = None,
69
70
  org: Optional[str] = None,
70
71
  user: Optional[str] = None,
71
- cluster: Optional[str] = None,
72
+ cloud_account: Optional[str] = None,
72
73
  env: Optional[Dict[str, str]] = None,
73
74
  interruptible: bool = False,
74
75
  image_credentials: Optional[str] = None,
75
- cluster_auth: bool = False,
76
+ cloud_account_auth: bool = False,
76
77
  artifacts_local: Optional[str] = None,
77
78
  artifacts_remote: Optional[str] = None,
78
79
  ) -> None:
@@ -86,11 +87,11 @@ class _Run:
86
87
  teamspace=teamspace,
87
88
  org=org,
88
89
  user=user,
89
- cluster=cluster,
90
+ cloud_account=cloud_account,
90
91
  env=env,
91
92
  interruptible=interruptible,
92
93
  image_credentials=image_credentials,
93
- cluster_auth=cluster_auth,
94
+ cloud_account_auth=cloud_account_auth,
94
95
  artifacts_local=artifacts_local,
95
96
  artifacts_remote=artifacts_remote,
96
97
  )
@@ -0,0 +1,130 @@
1
+ import os
2
+ import subprocess
3
+ import warnings
4
+ from pathlib import Path
5
+ from typing import Union
6
+
7
+ from rich.console import Console
8
+
9
+
10
+ class _LitServe:
11
+ """Serve a LitServe model.
12
+
13
+ Example:
14
+ lightning serve api server.py
15
+ """
16
+
17
+ def api(
18
+ self,
19
+ script_path: Union[str, Path],
20
+ easy: bool = False,
21
+ ) -> None:
22
+ """Deploy a LitServe model script.
23
+
24
+ Args:
25
+ script_path: Path to the script to serve
26
+ easy: If True, generates a client for the model
27
+
28
+ Raises:
29
+ FileNotFoundError: If script_path doesn't exist
30
+ ImportError: If litserve is not installed
31
+ subprocess.CalledProcessError: If the script fails to run
32
+ IOError: If client.py generation fails
33
+ """
34
+ console = Console()
35
+ script_path = Path(script_path)
36
+ if not script_path.exists():
37
+ raise FileNotFoundError(f"Script not found: {script_path}")
38
+ if not script_path.is_file():
39
+ raise ValueError(f"Path is not a file: {script_path}")
40
+
41
+ try:
42
+ from litserve.python_client import client_template
43
+ except ImportError:
44
+ raise ImportError(
45
+ "litserve is not installed. Please install it with `pip install lightning_sdk[serve]`"
46
+ ) from None
47
+
48
+ if easy:
49
+ client_path = Path("client.py")
50
+ if client_path.exists():
51
+ console.print("Skipping client generation: client.py already exists", style="blue")
52
+ else:
53
+ try:
54
+ client_path.write_text(client_template)
55
+ console.print("✅ Client generated at client.py", style="bold green")
56
+ except OSError as e:
57
+ raise OSError(f"Failed to generate client.py: {e!s}") from None
58
+
59
+ try:
60
+ subprocess.run(
61
+ ["python", str(script_path)],
62
+ check=True,
63
+ text=True,
64
+ )
65
+ except subprocess.CalledProcessError as e:
66
+ error_msg = f"Script execution failed with exit code {e.returncode}\nstdout: {e.stdout}\nstderr: {e.stderr}"
67
+ raise RuntimeError(error_msg) from None
68
+
69
+
70
+ class _Docker:
71
+ """Generate a Dockerfile for a LitServe model."""
72
+
73
+ def api(self, server_filename: str, port: int = 8000, gpu: bool = False) -> None:
74
+ """Generate a Dockerfile for the given server code.
75
+
76
+ Example:
77
+ lightning litserve dockerize server.py --port 8000 --gpu
78
+
79
+ Args:
80
+ server_filename (str): The path to the server file. Example sever.py or app.py.
81
+ port (int, optional): The port to expose in the Docker container.
82
+ gpu (bool, optional): Whether to use a GPU-enabled Docker image.
83
+ """
84
+ import litserve as ls
85
+ from litserve import docker_builder
86
+
87
+ console = Console()
88
+ requirements = ""
89
+ if os.path.exists("requirements.txt"):
90
+ requirements = "-r requirements.txt"
91
+ else:
92
+ warnings.warn(
93
+ f"requirements.txt not found at {os.getcwd()}. "
94
+ f"Make sure to install the required packages in the Dockerfile.",
95
+ UserWarning,
96
+ )
97
+
98
+ current_dir = Path.cwd()
99
+ if not (current_dir / server_filename).is_file():
100
+ raise FileNotFoundError(f"Server file `{server_filename}` must be in the current directory: {os.getcwd()}")
101
+
102
+ version = ls.__version__
103
+ if gpu:
104
+ run_cmd = f"docker run --gpus all -p {port}:{port} litserve-model:latest"
105
+ docker_template = docker_builder.CUDA_DOCKER_TEMPLATE
106
+ else:
107
+ run_cmd = f"docker run -p {port}:{port} litserve-model:latest"
108
+ docker_template = docker_builder.DOCKERFILE_TEMPLATE
109
+ dockerfile_content = docker_template.format(
110
+ server_filename=server_filename,
111
+ port=port,
112
+ version=version,
113
+ requirements=requirements,
114
+ )
115
+ with open("Dockerfile", "w") as f:
116
+ f.write(dockerfile_content)
117
+
118
+ success_msg = f"""[bold]Dockerfile created successfully[/bold]
119
+ Update [underline]{os.path.abspath("Dockerfile")}[/underline] to add any additional dependencies or commands.
120
+
121
+ [bold]Build the container with:[/bold]
122
+ > [underline]docker build -t litserve-model .[/underline]
123
+
124
+ [bold]To run the Docker container on the machine:[/bold]
125
+ > [underline]{run_cmd}[/underline]
126
+
127
+ [bold]To push the container to a registry:[/bold]
128
+ > [underline]docker push litserve-model[/underline]
129
+ """
130
+ console.print(success_msg)
@@ -32,7 +32,7 @@ from lightning_sdk.organization import Organization
32
32
  from lightning_sdk.services.utilities import _get_cluster
33
33
  from lightning_sdk.teamspace import Teamspace
34
34
  from lightning_sdk.user import User
35
- from lightning_sdk.utils.resolve import _resolve_org, _resolve_teamspace, _resolve_user
35
+ from lightning_sdk.utils.resolve import _resolve_deprecated_cluster, _resolve_org, _resolve_teamspace, _resolve_user
36
36
 
37
37
 
38
38
  class Deployment:
@@ -81,7 +81,7 @@ class Deployment:
81
81
  raise ValueError("You need to pass a teamspace or an org for your deployment.")
82
82
 
83
83
  self._deployment_api = DeploymentApi()
84
- self._cluster = _get_cluster(client=self._deployment_api._client, project_id=self._teamspace.id)
84
+ self._cloud_account = _get_cluster(client=self._deployment_api._client, project_id=self._teamspace.id)
85
85
  self._is_created = False
86
86
  deployment = self._deployment_api.get_deployment_by_name(name, self._teamspace.id)
87
87
  if deployment:
@@ -102,8 +102,9 @@ class Deployment:
102
102
  replicas: Optional[int] = None,
103
103
  health_check: Optional[Union[HttpHealthCheck, ExecHealthCheck]] = None,
104
104
  auth: Optional[Union[BasicAuth, TokenAuth]] = None,
105
- cluster: Optional[str] = None,
105
+ cloud_account: Optional[str] = None,
106
106
  custom_domain: Optional[str] = None,
107
+ cluster: Optional[str] = None, # deprecated in favor of cloud_account
107
108
  ) -> None:
108
109
  """The Lightning AI Deployment.
109
110
 
@@ -124,7 +125,7 @@ class Deployment:
124
125
  replicas: The number of replicas to deploy with.
125
126
  health_check: The health check config to know whether your service is ready to receive traffic.
126
127
  auth: The auth config to protect your services. Only Basic and Token supported.
127
- cluster: The name of the cluster, the studio should be created on.
128
+ cloud_account: The name of the cloud account, the studio should be created on.
128
129
  Doesn't matter when the studio already exists.
129
130
  custom_domain: Whether your service would be referenced under a custom doamin.
130
131
 
@@ -136,9 +137,11 @@ class Deployment:
136
137
  if self._is_created:
137
138
  raise RuntimeError("This deployment has already been started.")
138
139
 
139
- if cluster is None and self._cluster is not None:
140
- print(f"No cluster was provided, defaulting to {self._cluster.cluster_id}")
141
- cluster = os.getenv("LIGHTNING_CLUSTER_ID") or self._cluster.cluster_id
140
+ cloud_account = _resolve_deprecated_cluster(cloud_account, cluster)
141
+
142
+ if cloud_account is None and self._cloud_account is not None:
143
+ print(f"No cloud account was provided, defaulting to {self._cloud_account.cluster_id}")
144
+ cloud_account = os.getenv("LIGHTNING_CLUSTER_ID") or self._cloud_account.cluster_id
142
145
 
143
146
  self._deployment = self._deployment_api.create_deployment(
144
147
  V1Deployment(
@@ -148,7 +151,7 @@ class Deployment:
148
151
  project_id=self._teamspace.id,
149
152
  replicas=replicas,
150
153
  spec=to_spec(
151
- cluster_id=cluster,
154
+ cloud_account=cloud_account,
152
155
  command=command,
153
156
  entrypoint=entrypoint,
154
157
  env=env,
@@ -171,7 +174,7 @@ class Deployment:
171
174
  command: Optional[str] = None,
172
175
  env: Optional[List[Union[Env, Secret]]] = None,
173
176
  spot: Optional[bool] = None,
174
- cluster: Optional[str] = None,
177
+ cloud_account: Optional[str] = None,
175
178
  health_check: Optional[Union[HttpHealthCheck, ExecHealthCheck]] = None,
176
179
  # Changing those arguments don't create a new release
177
180
  min_replicas: Optional[int] = None,
@@ -182,7 +185,10 @@ class Deployment:
182
185
  replicas: Optional[int] = None,
183
186
  auth: Optional[Union[BasicAuth, TokenAuth]] = None,
184
187
  custom_domain: Optional[str] = None,
188
+ cluster: Optional[str] = None, # deprecated in favor of cloud_account
185
189
  ) -> None:
190
+ cloud_account = _resolve_deprecated_cluster(cloud_account, cluster)
191
+
186
192
  self._deployment = self._deployment_api.update_deployment(
187
193
  self._deployment,
188
194
  name=name or self._name,
@@ -190,7 +196,7 @@ class Deployment:
190
196
  replicas=replicas,
191
197
  min_replicas=min_replicas,
192
198
  max_replicas=max_replicas,
193
- cluster_id=cluster,
199
+ cloud_account=cloud_account,
194
200
  machine=machine,
195
201
  environment=environment,
196
202
  entrypoint=entrypoint,
@@ -312,8 +318,8 @@ class Deployment:
312
318
  return None
313
319
 
314
320
  @property
315
- def cluster(self) -> Optional[str]:
316
- """The cluster of the replicas."""
321
+ def cloud_account(self) -> Optional[str]:
322
+ """The cloud_account of the replicas."""
317
323
  if self._deployment:
318
324
  self._deployment = self._deployment_api.get_deployment_by_name(self._name, self._teamspace.id)
319
325
  return self._deployment.spec.cluster_id