lightning-sdk 0.2.8__py3-none-any.whl → 0.2.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lightning_sdk/__init__.py +1 -1
- lightning_sdk/api/cluster_api.py +22 -0
- lightning_sdk/api/deployment_api.py +1 -0
- lightning_sdk/api/lit_container_api.py +24 -5
- lightning_sdk/api/teamspace_api.py +22 -17
- lightning_sdk/api/utils.py +1 -1
- lightning_sdk/cli/clusters_menu.py +46 -0
- lightning_sdk/cli/entrypoint.py +2 -2
- lightning_sdk/cli/list.py +25 -5
- lightning_sdk/cli/serve.py +232 -24
- lightning_sdk/cli/upload.py +4 -1
- lightning_sdk/deployment/deployment.py +5 -2
- lightning_sdk/lightning_cloud/openapi/__init__.py +10 -0
- lightning_sdk/lightning_cloud/openapi/api/cloud_space_service_api.py +303 -0
- lightning_sdk/lightning_cloud/openapi/models/__init__.py +10 -0
- lightning_sdk/lightning_cloud/openapi/models/cloudspace_id_systemmetrics_body.py +149 -0
- lightning_sdk/lightning_cloud/openapi/models/cluster_id_capacityreservations_body.py +55 -3
- lightning_sdk/lightning_cloud/openapi/models/create.py +53 -1
- lightning_sdk/lightning_cloud/openapi/models/orgs_id_body.py +55 -3
- lightning_sdk/lightning_cloud/openapi/models/update.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_billing_tier.py +1 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_cloud_space.py +53 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_cloud_space_cold_start_metrics.py +53 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_cloud_space_cold_start_metrics_stats.py +357 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_cloud_space_environment_template_config.py +29 -3
- lightning_sdk/lightning_cloud/openapi/models/v1_cloudflare_v1.py +227 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_cluster_accelerator.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_cluster_capacity_reservation.py +55 -3
- lightning_sdk/lightning_cloud/openapi/models/v1_cluster_security_options.py +79 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_cluster_spec.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_create_cloud_space_environment_template_request.py +29 -3
- lightning_sdk/lightning_cloud/openapi/models/v1_create_cluster_capacity_reservation_response.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_data_connection.py +53 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_gcp_direct_vpc.py +149 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_get_cloud_space_cold_start_metrics_stats_response.py +123 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_get_cloud_space_instance_system_metrics_aggregate_response.py +123 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_get_user_response.py +53 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_google_cloud_direct_v1.py +43 -17
- lightning_sdk/lightning_cloud/openapi/models/v1_organization.py +55 -3
- lightning_sdk/lightning_cloud/openapi/models/v1_project_cluster_binding.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_r2_data_connection.py +253 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_report_cloud_space_instance_system_metrics_response.py +97 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_server_alert_phase.py +1 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_system_metrics_aggregated.py +227 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_update_user_request.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_user_features.py +104 -208
- lightning_sdk/lightning_cloud/openapi/models/v1_validate_data_connection_response.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_weka_data_connection.py +201 -0
- lightning_sdk/lightning_cloud/openapi/models/validate.py +27 -1
- lightning_sdk/lit_container.py +25 -7
- lightning_sdk/models.py +26 -8
- lightning_sdk/serve.py +3 -20
- lightning_sdk/teamspace.py +21 -4
- lightning_sdk/utils/resolve.py +11 -4
- {lightning_sdk-0.2.8.dist-info → lightning_sdk-0.2.10.dist-info}/METADATA +1 -1
- {lightning_sdk-0.2.8.dist-info → lightning_sdk-0.2.10.dist-info}/RECORD +60 -48
- {lightning_sdk-0.2.8.dist-info → lightning_sdk-0.2.10.dist-info}/LICENSE +0 -0
- {lightning_sdk-0.2.8.dist-info → lightning_sdk-0.2.10.dist-info}/WHEEL +0 -0
- {lightning_sdk-0.2.8.dist-info → lightning_sdk-0.2.10.dist-info}/entry_points.txt +0 -0
- {lightning_sdk-0.2.8.dist-info → lightning_sdk-0.2.10.dist-info}/top_level.txt +0 -0
lightning_sdk/__init__.py
CHANGED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from lightning_sdk.lightning_cloud.openapi import Externalv1Cluster
|
|
2
|
+
from lightning_sdk.lightning_cloud.rest_client import LightningClient
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class ClusterApi:
|
|
6
|
+
"""Internal API client for API requests to cluster endpoints."""
|
|
7
|
+
|
|
8
|
+
def __init__(self) -> None:
|
|
9
|
+
self._client = LightningClient(max_tries=7)
|
|
10
|
+
|
|
11
|
+
def get_cluster(self, cluster_id: str, project_id: str, org_id: str) -> Externalv1Cluster:
|
|
12
|
+
"""Gets the cluster from given params cluster_id, project_id and owner.
|
|
13
|
+
|
|
14
|
+
:param cluster_id: cluster ID test
|
|
15
|
+
:param project_id: the project the cluster is supposed to be associated with
|
|
16
|
+
:param org_id: The owning org of this cluster
|
|
17
|
+
:return:
|
|
18
|
+
"""
|
|
19
|
+
res = self._client.cluster_service_get_cluster(id=cluster_id, org_id=org_id, project_id=project_id)
|
|
20
|
+
if not res:
|
|
21
|
+
raise ValueError(f"Cluster {cluster_id} does not exist")
|
|
22
|
+
return res
|
|
@@ -228,6 +228,7 @@ class DeploymentApi:
|
|
|
228
228
|
return self._client.jobs_service_create_deployment(
|
|
229
229
|
project_id=deployment.project_id,
|
|
230
230
|
body=CreateDeploymentRequestDefinesASpecForTheJobThatAllowsForAutoscalingJobs(
|
|
231
|
+
cloudspace_id=deployment.cloudspace_id,
|
|
231
232
|
autoscaling=deployment.autoscaling,
|
|
232
233
|
cluster_id=deployment.spec.cluster_id,
|
|
233
234
|
endpoint=deployment.endpoint,
|
|
@@ -7,6 +7,7 @@ import requests
|
|
|
7
7
|
from rich.console import Console
|
|
8
8
|
|
|
9
9
|
from lightning_sdk.api.utils import _get_registry_url
|
|
10
|
+
from lightning_sdk.lightning_cloud.env import LIGHTNING_CLOUD_URL
|
|
10
11
|
from lightning_sdk.lightning_cloud.openapi.models import V1DeleteLitRepositoryResponse
|
|
11
12
|
from lightning_sdk.lightning_cloud.rest_client import LightningClient
|
|
12
13
|
from lightning_sdk.teamspace import Teamspace
|
|
@@ -105,9 +106,11 @@ class LitContainerApi:
|
|
|
105
106
|
:param cloud_account: The cluster ID of the cloud account. If None, will use the default cluster.
|
|
106
107
|
:return:
|
|
107
108
|
"""
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
109
|
+
if cloud_account is None:
|
|
110
|
+
project = self._client.lit_registry_service_get_lit_project_registry(project_id)
|
|
111
|
+
else:
|
|
112
|
+
project = self._client.lit_registry_service_get_lit_project_registry(project_id, cluster_id=cloud_account)
|
|
113
|
+
|
|
111
114
|
return project.repositories
|
|
112
115
|
|
|
113
116
|
def delete_container(self, project_id: str, container: str) -> V1DeleteLitRepositoryResponse:
|
|
@@ -124,8 +127,14 @@ class LitContainerApi:
|
|
|
124
127
|
|
|
125
128
|
@retry_on_lcr_auth_failure
|
|
126
129
|
def upload_container(
|
|
127
|
-
self,
|
|
128
|
-
|
|
130
|
+
self,
|
|
131
|
+
container: str,
|
|
132
|
+
teamspace: Teamspace,
|
|
133
|
+
tag: str,
|
|
134
|
+
cloud_account: str,
|
|
135
|
+
platform: str,
|
|
136
|
+
return_final_dict: bool = False,
|
|
137
|
+
) -> Generator[dict, None, Dict]:
|
|
129
138
|
"""Upload container will push the container to LitCR.
|
|
130
139
|
|
|
131
140
|
It uses docker push API to interact with docker daemon which will then push the container to a storage
|
|
@@ -138,6 +147,7 @@ class LitContainerApi:
|
|
|
138
147
|
Named cloud-account in the CLI options.
|
|
139
148
|
:param platform: If empty will be linux/amd64. This is important because our entire deployment infra runs on
|
|
140
149
|
linux/amd64. Will show user a warning otherwise.
|
|
150
|
+
:return_final_dict: Controls whether we respond with the dictionary containing metadata about container upload
|
|
141
151
|
:return: Generator[dict, None, dict]
|
|
142
152
|
"""
|
|
143
153
|
try:
|
|
@@ -164,6 +174,15 @@ class LitContainerApi:
|
|
|
164
174
|
raise ValueError(f"Could not tag container {container}:{tag} with {repository}:{tag}")
|
|
165
175
|
yield from self._push_with_retry(repository, tag=tag)
|
|
166
176
|
|
|
177
|
+
if return_final_dict:
|
|
178
|
+
yield {
|
|
179
|
+
"finish": True,
|
|
180
|
+
"url": f"{LIGHTNING_CLOUD_URL}/{teamspace.owner.name}/{teamspace.name}/containers/"
|
|
181
|
+
f"{container_basename}?section=tags"
|
|
182
|
+
f"{f'?clusterId={cloud_account}' if cloud_account is not None else ''}",
|
|
183
|
+
"repository": repository,
|
|
184
|
+
}
|
|
185
|
+
|
|
167
186
|
def _push_with_retry(self, repository: str, tag: str, max_retries: int = 3) -> Iterator[Dict[str, Any]]:
|
|
168
187
|
def is_auth_error(error_msg: str) -> bool:
|
|
169
188
|
auth_errors = ["unauthorized", "authentication required", "unauth"]
|
|
@@ -34,7 +34,7 @@ class TeamspaceApi:
|
|
|
34
34
|
|
|
35
35
|
def __init__(self) -> None:
|
|
36
36
|
self._client = LightningClient(max_tries=7)
|
|
37
|
-
self.
|
|
37
|
+
self._models_api: Optional[ModelsStoreApi] = None
|
|
38
38
|
|
|
39
39
|
def get_teamspace(self, name: str, owner_id: str) -> V1Project:
|
|
40
40
|
"""Get the current teamspace from the owner."""
|
|
@@ -166,48 +166,51 @@ class TeamspaceApi:
|
|
|
166
166
|
|
|
167
167
|
# lazy property which is only created when needed
|
|
168
168
|
@property
|
|
169
|
-
def
|
|
170
|
-
if not self.
|
|
171
|
-
self.
|
|
172
|
-
return self.
|
|
169
|
+
def models_api(self) -> ModelsStoreApi:
|
|
170
|
+
if not self._models_api:
|
|
171
|
+
self._models_api = ModelsStoreApi(self._client.api_client)
|
|
172
|
+
return self._models_api
|
|
173
173
|
|
|
174
|
-
def get_model_version(self, name: str, version: str, teamspace_id: str) -> V1ModelVersionArchive:
|
|
174
|
+
def get_model_version(self, name: str, version: Optional[str], teamspace_id: str) -> V1ModelVersionArchive:
|
|
175
175
|
return _get_model_version(client=self._client, name=name, version=version, teamspace_id=teamspace_id)
|
|
176
176
|
|
|
177
177
|
def create_model(
|
|
178
178
|
self,
|
|
179
179
|
name: str,
|
|
180
|
+
version: Optional[str],
|
|
180
181
|
metadata: Dict[str, str],
|
|
181
182
|
private: bool,
|
|
182
183
|
teamspace_id: str,
|
|
183
184
|
cloud_account: str,
|
|
184
185
|
) -> V1ModelVersionArchive:
|
|
185
186
|
# ask if such model already exists by listing models with specific name
|
|
186
|
-
models = self.
|
|
187
|
+
models = self.models_api.models_store_list_models(project_id=teamspace_id, name=name).models
|
|
187
188
|
if len(models) == 0:
|
|
188
|
-
return self.
|
|
189
|
+
return self.models_api.models_store_create_model(
|
|
189
190
|
body=ProjectIdModelsBody(cluster_id=cloud_account, metadata=metadata, name=name, private=private),
|
|
190
191
|
project_id=teamspace_id,
|
|
191
192
|
)
|
|
192
193
|
assert len(models) == 1, "Multiple models with the same name found"
|
|
193
|
-
return self.
|
|
194
|
-
body=ModelIdVersionsBody(cluster_id=cloud_account),
|
|
194
|
+
return self.models_api.models_store_create_model_version(
|
|
195
|
+
body=ModelIdVersionsBody(cluster_id=cloud_account, version=version),
|
|
195
196
|
project_id=teamspace_id,
|
|
196
197
|
model_id=models[0].id,
|
|
197
198
|
)
|
|
198
199
|
|
|
199
200
|
def delete_model(self, name: str, version: Optional[str], teamspace_id: str) -> None:
|
|
200
201
|
"""Delete a model or a version from the model store."""
|
|
201
|
-
models = self.
|
|
202
|
+
models = self.models_api.models_store_list_models(project_id=teamspace_id, name=name).models
|
|
202
203
|
assert len(models) == 1, "Multiple models with the same name found"
|
|
203
|
-
|
|
204
|
+
model = models[0]
|
|
204
205
|
# decide if delete only version of whole model
|
|
205
206
|
if version:
|
|
206
207
|
if version == "default":
|
|
207
|
-
version =
|
|
208
|
-
self.
|
|
208
|
+
version = model.default_version
|
|
209
|
+
self.models_api.models_store_delete_model_version(
|
|
210
|
+
project_id=teamspace_id, model_id=model.id, version=version
|
|
211
|
+
)
|
|
209
212
|
else:
|
|
210
|
-
self.
|
|
213
|
+
self.models_api.models_store_delete_model(project_id=teamspace_id, model_id=model.id)
|
|
211
214
|
|
|
212
215
|
def upload_model_file(
|
|
213
216
|
self,
|
|
@@ -255,7 +258,7 @@ class TeamspaceApi:
|
|
|
255
258
|
main_pbar.update(1)
|
|
256
259
|
|
|
257
260
|
def complete_model_upload(self, model_id: str, version: str, teamspace_id: str) -> None:
|
|
258
|
-
self.
|
|
261
|
+
self.models_api.models_store_complete_model_upload(
|
|
259
262
|
body=_DummyBody(),
|
|
260
263
|
project_id=teamspace_id,
|
|
261
264
|
model_id=model_id,
|
|
@@ -265,12 +268,14 @@ class TeamspaceApi:
|
|
|
265
268
|
def download_model_files(
|
|
266
269
|
self,
|
|
267
270
|
name: str,
|
|
268
|
-
version: str,
|
|
271
|
+
version: Optional[str],
|
|
269
272
|
download_dir: Path,
|
|
270
273
|
teamspace_name: str,
|
|
271
274
|
teamspace_owner_name: str,
|
|
272
275
|
progress_bar: bool = True,
|
|
273
276
|
) -> List[str]:
|
|
277
|
+
if version is None:
|
|
278
|
+
version = "default"
|
|
274
279
|
return _download_model_files(
|
|
275
280
|
client=self._client,
|
|
276
281
|
teamspace_name=teamspace_name,
|
lightning_sdk/api/utils.py
CHANGED
|
@@ -493,7 +493,7 @@ def _get_model_version(client: LightningClient, teamspace_id: str, name: str, ve
|
|
|
493
493
|
raise ValueError(f"Model `{name}` does not exist")
|
|
494
494
|
elif len(models) > 1:
|
|
495
495
|
raise ValueError("Multiple models with the same name found")
|
|
496
|
-
if version ==
|
|
496
|
+
if version is None or version == "default":
|
|
497
497
|
return models[0].default_version
|
|
498
498
|
versions = api.models_store_list_model_versions(project_id=teamspace_id, model_id=models[0].id).versions
|
|
499
499
|
if not versions:
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
from rich.console import Console
|
|
5
|
+
from simple_term_menu import TerminalMenu
|
|
6
|
+
|
|
7
|
+
from lightning_sdk import Teamspace
|
|
8
|
+
from lightning_sdk.api.cluster_api import ClusterApi
|
|
9
|
+
from lightning_sdk.lightning_cloud.openapi import Externalv1Cluster, V1ProjectClusterBinding
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class _ClustersMenu:
|
|
13
|
+
def _get_cluster_from_interactive_menu(self, possible_clusters: List[V1ProjectClusterBinding]) -> str:
|
|
14
|
+
terminal_menu = self._prepare_terminal_menu_teamspaces([cluster.cluster_id for cluster in possible_clusters])
|
|
15
|
+
terminal_menu.show()
|
|
16
|
+
|
|
17
|
+
return possible_clusters[terminal_menu.chosen_menu_index].cluster_id
|
|
18
|
+
|
|
19
|
+
@staticmethod
|
|
20
|
+
def _prepare_terminal_menu_teamspaces(cluster_ids: List[str]) -> TerminalMenu:
|
|
21
|
+
title = "Please select a cluster from the following:"
|
|
22
|
+
|
|
23
|
+
return TerminalMenu(cluster_ids, title=title, clear_menu_on_exit=True)
|
|
24
|
+
|
|
25
|
+
def _resolve_cluster(self, teamspace: Teamspace) -> Externalv1Cluster:
|
|
26
|
+
selected_cluster_id = None
|
|
27
|
+
console = Console()
|
|
28
|
+
try:
|
|
29
|
+
selected_cluster_id = self._get_cluster_from_interactive_menu(
|
|
30
|
+
possible_clusters=teamspace.cloud_account_objs
|
|
31
|
+
)
|
|
32
|
+
cluster_api = ClusterApi()
|
|
33
|
+
|
|
34
|
+
return cluster_api.get_cluster(
|
|
35
|
+
cluster_id=selected_cluster_id, org_id=teamspace.owner.id, project_id=teamspace.id
|
|
36
|
+
)
|
|
37
|
+
except KeyboardInterrupt:
|
|
38
|
+
console.print("Operation cancelled by user")
|
|
39
|
+
sys.exit(0)
|
|
40
|
+
|
|
41
|
+
except Exception:
|
|
42
|
+
console.print(
|
|
43
|
+
f"[red]Could not find the given Cluster:[/red] {selected_cluster_id}. "
|
|
44
|
+
"Please contact Lightning AI directly to resolve this issue."
|
|
45
|
+
)
|
|
46
|
+
sys.exit(1)
|
lightning_sdk/cli/entrypoint.py
CHANGED
|
@@ -21,7 +21,7 @@ from lightning_sdk.cli.inspect import inspect
|
|
|
21
21
|
from lightning_sdk.cli.list import list_cli
|
|
22
22
|
from lightning_sdk.cli.open import open
|
|
23
23
|
from lightning_sdk.cli.run import run
|
|
24
|
-
from lightning_sdk.cli.serve import
|
|
24
|
+
from lightning_sdk.cli.serve import deploy
|
|
25
25
|
from lightning_sdk.cli.start import start
|
|
26
26
|
from lightning_sdk.cli.stop import stop
|
|
27
27
|
from lightning_sdk.cli.switch import switch
|
|
@@ -76,7 +76,7 @@ main_cli.add_command(generate)
|
|
|
76
76
|
main_cli.add_command(inspect)
|
|
77
77
|
main_cli.add_command(list_cli)
|
|
78
78
|
main_cli.add_command(run)
|
|
79
|
-
main_cli.add_command(
|
|
79
|
+
main_cli.add_command(deploy)
|
|
80
80
|
main_cli.add_command(start)
|
|
81
81
|
main_cli.add_command(stop)
|
|
82
82
|
main_cli.add_command(switch)
|
lightning_sdk/cli/list.py
CHANGED
|
@@ -7,8 +7,9 @@ from rich.table import Table
|
|
|
7
7
|
from typing_extensions import Literal
|
|
8
8
|
|
|
9
9
|
from lightning_sdk import Job, Machine, Studio, Teamspace
|
|
10
|
+
from lightning_sdk.cli.clusters_menu import _ClustersMenu
|
|
10
11
|
from lightning_sdk.cli.teamspace_menu import _TeamspacesMenu
|
|
11
|
-
from lightning_sdk.lightning_cloud.openapi import V1MultiMachineJob
|
|
12
|
+
from lightning_sdk.lightning_cloud.openapi import V1ClusterType, V1MultiMachineJob
|
|
12
13
|
from lightning_sdk.lit_container import LitContainer
|
|
13
14
|
from lightning_sdk.utils.resolve import _get_authed_user
|
|
14
15
|
|
|
@@ -232,18 +233,37 @@ def mmts(
|
|
|
232
233
|
"If not provided, can be selected in an interactive menu."
|
|
233
234
|
),
|
|
234
235
|
)
|
|
235
|
-
|
|
236
|
+
@click.option(
|
|
237
|
+
"--cloud-account",
|
|
238
|
+
"--cloud_account", # The UI will present the above variant, using this as a secondary to be consistent w/ models
|
|
239
|
+
default=None,
|
|
240
|
+
help="The name of the cloud account where containers are stored in.",
|
|
241
|
+
)
|
|
242
|
+
def containers(teamspace: Optional[str] = None, cloud_account: Optional[str] = None) -> None:
|
|
236
243
|
"""Display the list of available containers."""
|
|
237
244
|
api = LitContainer()
|
|
238
245
|
menu = _TeamspacesMenu()
|
|
246
|
+
clusters_menu = _ClustersMenu()
|
|
239
247
|
resolved_teamspace = menu._resolve_teamspace(teamspace=teamspace)
|
|
240
|
-
|
|
248
|
+
|
|
249
|
+
if not cloud_account:
|
|
250
|
+
cloud_account_obj = clusters_menu._resolve_cluster(resolved_teamspace)
|
|
251
|
+
cloud_account = "" if cloud_account_obj.spec.cluster_type == V1ClusterType.GLOBAL else cloud_account_obj.id
|
|
252
|
+
|
|
253
|
+
result = api.list_containers(
|
|
254
|
+
teamspace=resolved_teamspace.name, org=resolved_teamspace.owner.name, cloud_account=cloud_account
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
if not result:
|
|
258
|
+
return
|
|
259
|
+
|
|
241
260
|
table = Table(pad_edge=True, box=None)
|
|
242
261
|
table.add_column("REPOSITORY")
|
|
243
|
-
table.add_column("
|
|
262
|
+
table.add_column("CLOUD ACCOUNT")
|
|
263
|
+
table.add_column("LATEST TAG")
|
|
244
264
|
table.add_column("CREATED")
|
|
245
265
|
for repo in result:
|
|
246
|
-
table.add_row(repo["REPOSITORY"], repo["
|
|
266
|
+
table.add_row(repo["REPOSITORY"], repo["CLOUD ACCOUNT"], repo["LATEST TAG"], repo["CREATED"])
|
|
247
267
|
Console().print(table)
|
|
248
268
|
|
|
249
269
|
|
lightning_sdk/cli/serve.py
CHANGED
|
@@ -1,8 +1,13 @@
|
|
|
1
1
|
import os
|
|
2
|
+
import socket
|
|
2
3
|
import subprocess
|
|
4
|
+
import time
|
|
5
|
+
import webbrowser
|
|
3
6
|
from datetime import datetime
|
|
7
|
+
from enum import Enum
|
|
4
8
|
from pathlib import Path
|
|
5
|
-
from typing import Optional, Union
|
|
9
|
+
from typing import List, Optional, TypedDict, Union
|
|
10
|
+
from urllib.parse import urlencode
|
|
6
11
|
|
|
7
12
|
import click
|
|
8
13
|
from rich.console import Console
|
|
@@ -10,11 +15,18 @@ from rich.progress import Progress, SpinnerColumn, TextColumn, TimeElapsedColumn
|
|
|
10
15
|
from rich.prompt import Confirm
|
|
11
16
|
|
|
12
17
|
from lightning_sdk import Machine, Teamspace
|
|
18
|
+
from lightning_sdk.api import UserApi
|
|
13
19
|
from lightning_sdk.api.lit_container_api import LitContainerApi
|
|
14
20
|
from lightning_sdk.cli.teamspace_menu import _TeamspacesMenu
|
|
15
|
-
from lightning_sdk.
|
|
21
|
+
from lightning_sdk.lightning_cloud import env
|
|
22
|
+
from lightning_sdk.lightning_cloud.login import Auth, AuthServer
|
|
23
|
+
from lightning_sdk.lightning_cloud.openapi import V1CloudSpace
|
|
24
|
+
from lightning_sdk.lightning_cloud.rest_client import LightningClient
|
|
25
|
+
from lightning_sdk.serve import _LitServeDeployer
|
|
26
|
+
from lightning_sdk.utils.resolve import _get_authed_user, _resolve_teamspace
|
|
16
27
|
|
|
17
28
|
_MACHINE_VALUES = tuple([machine.name for machine in Machine.__dict__.values() if isinstance(machine, Machine)])
|
|
29
|
+
_POLL_TIMEOUT = 600
|
|
18
30
|
|
|
19
31
|
|
|
20
32
|
class _ServeGroup(click.Group):
|
|
@@ -26,22 +38,22 @@ class _ServeGroup(click.Group):
|
|
|
26
38
|
return super().parse_args(ctx, args)
|
|
27
39
|
|
|
28
40
|
|
|
29
|
-
@click.group("
|
|
30
|
-
def
|
|
31
|
-
"""
|
|
41
|
+
@click.group("deploy", cls=_ServeGroup)
|
|
42
|
+
def deploy() -> None:
|
|
43
|
+
"""Deploy a LitServe model.
|
|
32
44
|
|
|
33
45
|
Example:
|
|
34
|
-
lightning
|
|
46
|
+
lightning deploy server.py # deploy to the cloud
|
|
35
47
|
|
|
36
48
|
Example:
|
|
37
|
-
lightning
|
|
49
|
+
lightning deploy server.py --local # run locally
|
|
38
50
|
|
|
39
|
-
You can deploy the API to the cloud by running `lightning
|
|
51
|
+
You can deploy the API to the cloud by running `lightning deploy server.py`.
|
|
40
52
|
This will build a docker container for the server.py script and deploy it to the Lightning AI platform.
|
|
41
53
|
"""
|
|
42
54
|
|
|
43
55
|
|
|
44
|
-
@
|
|
56
|
+
@deploy.command("api")
|
|
45
57
|
@click.argument("script-path", type=click.Path(exists=True))
|
|
46
58
|
@click.option(
|
|
47
59
|
"--easy",
|
|
@@ -218,6 +230,176 @@ def api_impl(
|
|
|
218
230
|
raise RuntimeError(error_msg) from None
|
|
219
231
|
|
|
220
232
|
|
|
233
|
+
class _AuthServer(AuthServer):
|
|
234
|
+
def get_auth_url(self, port: int) -> str:
|
|
235
|
+
redirect_uri = f"http://localhost:{port}/login-complete"
|
|
236
|
+
params = urlencode({"redirectTo": redirect_uri, "inviteCode": "litserve"})
|
|
237
|
+
return f"{env.LIGHTNING_CLOUD_URL}/sign-in?{params}"
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
class _Auth(Auth):
|
|
241
|
+
def __init__(self, shall_confirm: bool = False) -> None:
|
|
242
|
+
super().__init__()
|
|
243
|
+
self._shall_confirm = shall_confirm
|
|
244
|
+
|
|
245
|
+
def _run_server(self) -> None:
|
|
246
|
+
if self._shall_confirm:
|
|
247
|
+
proceed = Confirm.ask(
|
|
248
|
+
"Authenticating with Lightning AI. This will open a browser window. Continue?", default=True
|
|
249
|
+
)
|
|
250
|
+
if not proceed:
|
|
251
|
+
raise RuntimeError(
|
|
252
|
+
"Login cancelled. Please login to Lightning AI to deploy the API."
|
|
253
|
+
" Run `lightning login` to login."
|
|
254
|
+
) from None
|
|
255
|
+
print("Opening browser for authentication...")
|
|
256
|
+
print("Please come back to the terminal after logging in.")
|
|
257
|
+
time.sleep(3)
|
|
258
|
+
_AuthServer().login_with_browser(self)
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def authenticate(shall_confirm: bool = True) -> None:
|
|
262
|
+
auth = _Auth(shall_confirm)
|
|
263
|
+
auth.authenticate()
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def select_teamspace(teamspace: Optional[str], org: Optional[str], user: Optional[str]) -> Teamspace:
|
|
267
|
+
if teamspace is None:
|
|
268
|
+
user = _get_authed_user()
|
|
269
|
+
menu = _TeamspacesMenu()
|
|
270
|
+
possible_teamspaces = menu._get_possible_teamspaces(user)
|
|
271
|
+
if len(possible_teamspaces) == 1:
|
|
272
|
+
name = next(iter(possible_teamspaces.values()))["name"]
|
|
273
|
+
return Teamspace(name=name, org=org, user=user)
|
|
274
|
+
|
|
275
|
+
return menu._resolve_teamspace(teamspace)
|
|
276
|
+
|
|
277
|
+
return _resolve_teamspace(teamspace=teamspace, org=org, user=user)
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
class _UserStatus(TypedDict):
|
|
281
|
+
verified: bool
|
|
282
|
+
onboarded: bool
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def poll_verified_status(timeout: int = _POLL_TIMEOUT) -> _UserStatus:
|
|
286
|
+
"""Polls the verified status of the user until it is True or a timeout occurs."""
|
|
287
|
+
user_api = UserApi()
|
|
288
|
+
user = _get_authed_user()
|
|
289
|
+
start_time = datetime.now()
|
|
290
|
+
result = {"onboarded": False, "verified": False}
|
|
291
|
+
while True:
|
|
292
|
+
user_resp = user_api.get_user(name=user.name)
|
|
293
|
+
result["onboarded"] = user_resp.status.completed_project_onboarding
|
|
294
|
+
result["verified"] = user_resp.status.verified
|
|
295
|
+
if user_resp.status.verified:
|
|
296
|
+
return result
|
|
297
|
+
if (datetime.now() - start_time).total_seconds() > timeout:
|
|
298
|
+
break
|
|
299
|
+
time.sleep(5)
|
|
300
|
+
return result
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
class _OnboardingStatus(Enum):
|
|
304
|
+
NOT_VERIFIED = "not_verified"
|
|
305
|
+
ONBOARDING = "onboarding"
|
|
306
|
+
ONBOARDED = "onboarded"
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
class _Onboarding:
|
|
310
|
+
def __init__(self, console: Console) -> None:
|
|
311
|
+
self.console = console
|
|
312
|
+
self.user = _get_authed_user()
|
|
313
|
+
self.user_api = UserApi()
|
|
314
|
+
self.client = LightningClient(max_tries=7)
|
|
315
|
+
|
|
316
|
+
@property
|
|
317
|
+
def verified(self) -> bool:
|
|
318
|
+
return self.user_api.get_user(name=self.user.name).status.verified
|
|
319
|
+
|
|
320
|
+
@property
|
|
321
|
+
def is_onboarded(self) -> bool:
|
|
322
|
+
return self.user_api.get_user(name=self.user.name).status.completed_project_onboarding
|
|
323
|
+
|
|
324
|
+
@property
|
|
325
|
+
def can_join_org(self) -> bool:
|
|
326
|
+
return len(self.client.organizations_service_list_joinable_organizations().joinable_organizations) > 0
|
|
327
|
+
|
|
328
|
+
@property
|
|
329
|
+
def status(self) -> _OnboardingStatus:
|
|
330
|
+
if not self.verified:
|
|
331
|
+
return _OnboardingStatus.NOT_VERIFIED
|
|
332
|
+
if self.is_onboarded:
|
|
333
|
+
return _OnboardingStatus.ONBOARDED
|
|
334
|
+
return _OnboardingStatus.ONBOARDING
|
|
335
|
+
|
|
336
|
+
def _wait(self, timeout: int = _POLL_TIMEOUT) -> None:
|
|
337
|
+
"""Wait for user onboarding if they can join the teamspace otherwise move to select a teamspace."""
|
|
338
|
+
status = self.status
|
|
339
|
+
if status == _OnboardingStatus.ONBOARDED:
|
|
340
|
+
return
|
|
341
|
+
|
|
342
|
+
start_time = datetime.now()
|
|
343
|
+
while self.status != _OnboardingStatus.ONBOARDED:
|
|
344
|
+
time.sleep(5)
|
|
345
|
+
if self.is_onboarded:
|
|
346
|
+
return
|
|
347
|
+
if (datetime.now() - start_time).total_seconds() > timeout:
|
|
348
|
+
break
|
|
349
|
+
|
|
350
|
+
raise RuntimeError("Timed out waiting for onboarding status")
|
|
351
|
+
|
|
352
|
+
def get_cloudspace_id(self, teamspace: Teamspace) -> Optional[str]:
|
|
353
|
+
cloudspaces: List[V1CloudSpace] = self.client.cloud_space_service_list_cloud_spaces(teamspace.id).cloudspaces
|
|
354
|
+
for cloudspace in cloudspaces:
|
|
355
|
+
if "scratch-studio" in cloudspace.name or "scratch-studio" in cloudspace.display_name:
|
|
356
|
+
return cloudspace.id
|
|
357
|
+
return None
|
|
358
|
+
|
|
359
|
+
def select_teamspace(self, teamspace: Optional[str], org: Optional[str], user: Optional[str]) -> Teamspace:
|
|
360
|
+
"""Select a teamspace while onboarding.
|
|
361
|
+
|
|
362
|
+
If user is being onboarded and can't join any org, the teamspace it will be resolved to the default
|
|
363
|
+
personal teamspace.
|
|
364
|
+
If user is being onboarded and can join an org then it will select default teamspace from the org.
|
|
365
|
+
"""
|
|
366
|
+
if self.is_onboarded:
|
|
367
|
+
return select_teamspace(teamspace, org, user)
|
|
368
|
+
|
|
369
|
+
# Run only when user hasn't completed onboarding yet.
|
|
370
|
+
menu = _TeamspacesMenu()
|
|
371
|
+
possible_teamspaces = menu._get_possible_teamspaces(self.user)
|
|
372
|
+
can_join_org = self.can_join_org
|
|
373
|
+
|
|
374
|
+
if len(possible_teamspaces) == 1 and can_join_org:
|
|
375
|
+
# wait for onboarding to complete so that user can join an org
|
|
376
|
+
# create deployment in the org default teamspace
|
|
377
|
+
self.console.print("Waiting for account setup. Visit lightning.ai")
|
|
378
|
+
self._wait()
|
|
379
|
+
|
|
380
|
+
possible_teamspaces = menu._get_possible_teamspaces(self.user)
|
|
381
|
+
if len(possible_teamspaces) == 1:
|
|
382
|
+
# User didn't select any org
|
|
383
|
+
value = next(iter(possible_teamspaces.values()))
|
|
384
|
+
return Teamspace(name=value["name"], org=value["org"], user=value["user"])
|
|
385
|
+
|
|
386
|
+
for _, value in possible_teamspaces.items():
|
|
387
|
+
# User select an org
|
|
388
|
+
# Onboarding teamspace will be the default teamspace in the selected org
|
|
389
|
+
if value["org"]:
|
|
390
|
+
return Teamspace(name=value["name"], org=value["org"], user=value["user"])
|
|
391
|
+
raise RuntimeError("Unable to select teamspace. Visit lightning.ai")
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
def is_connected(host: str = "8.8.8.8", port: int = 53, timeout: int = 10) -> bool:
|
|
395
|
+
try:
|
|
396
|
+
socket.setdefaulttimeout(timeout)
|
|
397
|
+
socket.create_connection((host, port))
|
|
398
|
+
return True
|
|
399
|
+
except OSError:
|
|
400
|
+
return False
|
|
401
|
+
|
|
402
|
+
|
|
221
403
|
def _handle_cloud(
|
|
222
404
|
script_path: Union[str, Path],
|
|
223
405
|
console: Console,
|
|
@@ -236,22 +418,19 @@ def _handle_cloud(
|
|
|
236
418
|
replicas: Optional[int] = 1,
|
|
237
419
|
include_credentials: Optional[bool] = True,
|
|
238
420
|
) -> None:
|
|
421
|
+
if not is_connected():
|
|
422
|
+
console.print("❌ Internet connection required to deploy to the cloud.", style="red")
|
|
423
|
+
console.print("To run locally instead, use: `lightning serve [SCRIPT | server.py] --local`")
|
|
424
|
+
return
|
|
425
|
+
|
|
239
426
|
deployment_name = os.path.basename(repository)
|
|
240
427
|
tag = tag if tag else "latest"
|
|
241
428
|
|
|
242
429
|
if non_interactive:
|
|
243
430
|
console.print("[italic]non-interactive[/italic] mode enabled, skipping confirmation prompts", style="blue")
|
|
244
431
|
|
|
245
|
-
# Authenticate with LitServe affiliate
|
|
246
|
-
authenticate()
|
|
247
|
-
if teamspace is None:
|
|
248
|
-
menu = _TeamspacesMenu()
|
|
249
|
-
resolved_teamspace = menu._resolve_teamspace(teamspace)
|
|
250
|
-
else:
|
|
251
|
-
resolved_teamspace = Teamspace(name=teamspace, org=org, user=user)
|
|
252
|
-
|
|
253
432
|
port = port or 8000
|
|
254
|
-
ls_deployer = _LitServeDeployer(name=deployment_name, teamspace=
|
|
433
|
+
ls_deployer = _LitServeDeployer(name=deployment_name, teamspace=None)
|
|
255
434
|
path = ls_deployer.dockerize_api(script_path, port=port, gpu=not machine.is_cpu(), tag=tag, print_success=False)
|
|
256
435
|
|
|
257
436
|
console.print(f"\nPlease review the Dockerfile at [u]{path}[/u] and make sure it is correct.", style="bold")
|
|
@@ -260,10 +439,6 @@ def _handle_cloud(
|
|
|
260
439
|
console.print("Please fix the Dockerfile and try again.", style="red")
|
|
261
440
|
return
|
|
262
441
|
|
|
263
|
-
# list containers to create the project if it doesn't exist
|
|
264
|
-
lit_cr = LitContainerApi()
|
|
265
|
-
lit_cr.list_containers(resolved_teamspace.id, cloud_account=cloud_account)
|
|
266
|
-
|
|
267
442
|
with Progress(
|
|
268
443
|
SpinnerColumn(),
|
|
269
444
|
TextColumn("[progress.description]{task.description}"),
|
|
@@ -280,8 +455,38 @@ def _handle_cloud(
|
|
|
280
455
|
progress.update(build_task, description="[green]Build completed![/green]", completed=1.0)
|
|
281
456
|
progress.remove_task(build_task)
|
|
282
457
|
|
|
283
|
-
|
|
284
|
-
console.print("
|
|
458
|
+
except Exception as e:
|
|
459
|
+
console.print(f"❌ Deployment failed: {e}", style="red")
|
|
460
|
+
return
|
|
461
|
+
|
|
462
|
+
# Push the container to the registry
|
|
463
|
+
console.print("\nPushing container to registry. It may take a while...", style="bold")
|
|
464
|
+
# Authenticate with LitServe affiliate
|
|
465
|
+
authenticate(shall_confirm=not non_interactive)
|
|
466
|
+
user_status = poll_verified_status()
|
|
467
|
+
cloudspace_id: Optional[str] = None
|
|
468
|
+
if not user_status["verified"]:
|
|
469
|
+
console.print("❌ Verify phone number to continue. Visit lightning.ai.", style="red")
|
|
470
|
+
return
|
|
471
|
+
if not user_status["onboarded"]:
|
|
472
|
+
onboarding = _Onboarding(console)
|
|
473
|
+
resolved_teamspace = onboarding.select_teamspace(teamspace, org, user)
|
|
474
|
+
cloudspace_id = onboarding.get_cloudspace_id(resolved_teamspace)
|
|
475
|
+
else:
|
|
476
|
+
resolved_teamspace = select_teamspace(teamspace, org, user)
|
|
477
|
+
|
|
478
|
+
# list containers to create the project if it doesn't exist
|
|
479
|
+
lit_cr = LitContainerApi()
|
|
480
|
+
lit_cr.list_containers(resolved_teamspace.id, cloud_account=cloud_account)
|
|
481
|
+
|
|
482
|
+
with Progress(
|
|
483
|
+
SpinnerColumn(),
|
|
484
|
+
TextColumn("[progress.description]{task.description}"),
|
|
485
|
+
TimeElapsedColumn(),
|
|
486
|
+
console=console,
|
|
487
|
+
transient=True,
|
|
488
|
+
) as progress:
|
|
489
|
+
try:
|
|
285
490
|
push_task = progress.add_task("Pushing to registry", total=None)
|
|
286
491
|
push_status = {}
|
|
287
492
|
for line in ls_deployer.push_container(
|
|
@@ -311,5 +516,8 @@ def _handle_cloud(
|
|
|
311
516
|
max_replica=max_replica,
|
|
312
517
|
replicas=replicas,
|
|
313
518
|
include_credentials=include_credentials,
|
|
519
|
+
cloudspace_id=cloudspace_id,
|
|
314
520
|
)
|
|
315
521
|
console.print(f"🚀 Deployment started, access at [i]{deployment_status.get('url')}[/i]")
|
|
522
|
+
if user_status["onboarded"]:
|
|
523
|
+
webbrowser.open(deployment_status.get("url"))
|