lightning-sdk 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lightning_sdk/__init__.py +1 -1
- lightning_sdk/ai_hub.py +10 -17
- lightning_sdk/api/ai_hub_api.py +20 -3
- lightning_sdk/api/studio_api.py +0 -8
- lightning_sdk/cli/serve.py +139 -22
- lightning_sdk/deployment/deployment.py +32 -4
- lightning_sdk/lightning_cloud/openapi/__init__.py +49 -1
- lightning_sdk/lightning_cloud/openapi/api/__init__.py +4 -0
- lightning_sdk/lightning_cloud/openapi/api/cloud_space_environment_template_service_api.py +537 -0
- lightning_sdk/lightning_cloud/openapi/api/cluster_service_api.py +10 -6
- lightning_sdk/lightning_cloud/openapi/api/lit_dataset_service_api.py +1973 -0
- lightning_sdk/lightning_cloud/openapi/api/lit_registry_service_api.py +5 -1
- lightning_sdk/lightning_cloud/openapi/api/models_store_api.py +20 -20
- lightning_sdk/lightning_cloud/openapi/api/pipeline_templates_service_api.py +339 -0
- lightning_sdk/lightning_cloud/openapi/api/pipelines_service_api.py +5 -1
- lightning_sdk/lightning_cloud/openapi/api/schedules_service_api.py +573 -0
- lightning_sdk/lightning_cloud/openapi/api/slurm_jobs_user_service_api.py +202 -0
- lightning_sdk/lightning_cloud/openapi/models/__init__.py +45 -1
- lightning_sdk/lightning_cloud/openapi/models/cluster_id_capacityblock_body.py +15 -15
- lightning_sdk/lightning_cloud/openapi/models/cluster_id_slurmusers_body.py +201 -0
- lightning_sdk/lightning_cloud/openapi/models/dataset_id_versions_body.py +149 -0
- lightning_sdk/lightning_cloud/openapi/models/dataset_id_visibility_body.py +149 -0
- lightning_sdk/lightning_cloud/openapi/models/environmenttemplates_id_body.py +227 -0
- lightning_sdk/lightning_cloud/openapi/models/externalv1_cloud_space_instance_status.py +69 -69
- lightning_sdk/lightning_cloud/openapi/models/litdatasets_dataset_id_body.py +149 -0
- lightning_sdk/lightning_cloud/openapi/models/orgs_id_body.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/pipelines_id_body.py +69 -17
- lightning_sdk/lightning_cloud/openapi/models/pipelinetemplates_id_body.py +331 -0
- lightning_sdk/lightning_cloud/openapi/models/project_id_litdatasets_body.py +227 -0
- lightning_sdk/lightning_cloud/openapi/models/project_id_pipelines_body.py +17 -17
- lightning_sdk/lightning_cloud/openapi/models/project_id_schedules_body.py +201 -0
- lightning_sdk/lightning_cloud/openapi/models/schedules_id_body.py +383 -0
- lightning_sdk/lightning_cloud/openapi/models/slurm_jobs_body.py +15 -15
- lightning_sdk/lightning_cloud/openapi/models/upload_id_complete_body1.py +149 -0
- lightning_sdk/lightning_cloud/openapi/models/upload_id_parts_body1.py +149 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_agent_job.py +124 -20
- lightning_sdk/lightning_cloud/openapi/models/v1_assistant_model_status.py +2 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_cloud_provider.py +1 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_cloud_space.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_cloud_space_environment_template.py +253 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_cluster_spec.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_complete_lit_dataset_multi_part_upload_response.py +97 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_complete_lit_dataset_upload_response.py +97 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_create_cloud_space_environment_template_request.py +149 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_create_lit_dataset_multi_part_upload_response.py +123 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_create_organization_request.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_create_pipeline_template_request.py +383 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_create_project_request.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/{v1_pipeline_schedule.py → v1_delete_cloud_space_environment_template_response.py} +32 -32
- lightning_sdk/lightning_cloud/openapi/models/v1_delete_lit_dataset_response.py +97 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_delete_lit_dataset_version_response.py +97 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_delete_schedule_response.py +175 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_get_cloud_space_size_response.py +79 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_get_lit_dataset_file_upload_urls_response.py +123 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_get_lit_dataset_files_url_response.py +149 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_get_user_response.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_instance_overprovisioning_spec.py +79 -27
- lightning_sdk/lightning_cloud/openapi/models/v1_job_spec.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_list_cloud_space_environment_templates_response.py +123 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_list_lit_dataset_versions_response.py +123 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_list_lit_datasets_response.py +123 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_list_schedules_response.py +123 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_list_slurm_cluster_users_response.py +123 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_lit_dataset.py +539 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_lit_dataset_file.py +175 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_lit_dataset_version_archive.py +435 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_lit_registry_project.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_lit_repository.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_magic_link_login_request.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_managed_model.py +107 -3
- lightning_sdk/lightning_cloud/openapi/models/v1_organization.py +53 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_pipeline.py +69 -17
- lightning_sdk/lightning_cloud/openapi/models/v1_pipeline_parameter.py +435 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_pipeline_parameter_placement.py +149 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_pipeline_parameter_placement_type.py +106 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_pipeline_parameter_type.py +106 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_pipeline_template.py +513 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_pipeline_template_visibility_type.py +105 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_project_settings.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_schedule.py +435 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_schedule_resource_type.py +103 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_slurm_cluster_user.py +227 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_slurm_job.py +58 -6
- lightning_sdk/lightning_cloud/openapi/models/v1_slurm_node.py +31 -291
- lightning_sdk/lightning_cloud/openapi/models/v1_update_lit_dataset_visibility_response.py +123 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_update_user_request.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_user_features.py +157 -105
- lightning_sdk/lightning_cloud/openapi/models/v1_validate_managed_endpoint_response.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_voltage_park_direct_v1.py +203 -0
- lightning_sdk/lightning_cloud/openapi/models/version_default_body.py +29 -29
- lightning_sdk/lightning_cloud/openapi/models/version_default_body1.py +149 -0
- lightning_sdk/lightning_cloud/openapi/models/version_uploads_body1.py +123 -0
- lightning_sdk/lightning_cloud/openapi/models/versions_version_body1.py +123 -0
- lightning_sdk/lightning_cloud/rest_client.py +2 -0
- lightning_sdk/lit_container.py +8 -1
- lightning_sdk/mmt/mmt.py +1 -18
- lightning_sdk/mmt/v1.py +1 -28
- lightning_sdk/models.py +15 -6
- lightning_sdk/pipeline/pipeline.py +2 -2
- lightning_sdk/pipeline/types.py +28 -2
- lightning_sdk/pipeline/utils.py +1 -1
- lightning_sdk/plugin.py +0 -6
- lightning_sdk/serve.py +55 -22
- lightning_sdk/utils/resolve.py +1 -0
- {lightning_sdk-0.2.3.dist-info → lightning_sdk-0.2.5.dist-info}/METADATA +1 -1
- {lightning_sdk-0.2.3.dist-info → lightning_sdk-0.2.5.dist-info}/RECORD +110 -62
- {lightning_sdk-0.2.3.dist-info → lightning_sdk-0.2.5.dist-info}/WHEEL +1 -1
- {lightning_sdk-0.2.3.dist-info → lightning_sdk-0.2.5.dist-info}/LICENSE +0 -0
- {lightning_sdk-0.2.3.dist-info → lightning_sdk-0.2.5.dist-info}/entry_points.txt +0 -0
- {lightning_sdk-0.2.3.dist-info → lightning_sdk-0.2.5.dist-info}/top_level.txt +0 -0
lightning_sdk/lit_container.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from datetime import datetime
|
|
1
2
|
from typing import Dict, List, Optional
|
|
2
3
|
|
|
3
4
|
from lightning_sdk.api.lit_container_api import LitContainerApi
|
|
@@ -29,11 +30,17 @@ class LitContainer:
|
|
|
29
30
|
repositories = self._api.list_containers(project_id)
|
|
30
31
|
table = []
|
|
31
32
|
for repo in repositories:
|
|
33
|
+
created_date = repo.creation_time
|
|
34
|
+
if isinstance(repo.creation_time, str):
|
|
35
|
+
created_date = datetime.fromisoformat(created_date)
|
|
36
|
+
|
|
37
|
+
created = created_date.strftime("%Y-%m-%d %H:%M:%S")
|
|
38
|
+
|
|
32
39
|
table.append(
|
|
33
40
|
{
|
|
34
41
|
"REPOSITORY": repo.name,
|
|
35
42
|
"IMAGE ID": repo.id,
|
|
36
|
-
"CREATED":
|
|
43
|
+
"CREATED": created,
|
|
37
44
|
}
|
|
38
45
|
)
|
|
39
46
|
return table
|
lightning_sdk/mmt/mmt.py
CHANGED
|
@@ -1,7 +1,5 @@
|
|
|
1
|
-
from functools import lru_cache
|
|
2
1
|
from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
|
|
3
2
|
|
|
4
|
-
from lightning_sdk.api.user_api import UserApi
|
|
5
3
|
from lightning_sdk.mmt.base import MMTMachine, _BaseMMT
|
|
6
4
|
from lightning_sdk.mmt.v1 import _MMTV1
|
|
7
5
|
from lightning_sdk.mmt.v2 import _MMTV2
|
|
@@ -18,21 +16,6 @@ if TYPE_CHECKING:
|
|
|
18
16
|
_logger = _setup_logger(__name__)
|
|
19
17
|
|
|
20
18
|
|
|
21
|
-
@lru_cache(maxsize=None)
|
|
22
|
-
def _has_mmt_v2() -> bool:
|
|
23
|
-
api = UserApi()
|
|
24
|
-
try:
|
|
25
|
-
feature_flags = api._get_feature_flags()
|
|
26
|
-
except Exception:
|
|
27
|
-
return False
|
|
28
|
-
|
|
29
|
-
try:
|
|
30
|
-
return feature_flags.mmt_v2
|
|
31
|
-
except AttributeError:
|
|
32
|
-
# Feature flag doesn't exist anymore, so return True
|
|
33
|
-
return True
|
|
34
|
-
|
|
35
|
-
|
|
36
19
|
class MMT(_BaseMMT):
|
|
37
20
|
"""Class to submit and manage multi-machine jobs on the Lightning AI Platform."""
|
|
38
21
|
|
|
@@ -60,7 +43,7 @@ class MMT(_BaseMMT):
|
|
|
60
43
|
"""
|
|
61
44
|
from lightning_sdk.lightning_cloud.openapi.rest import ApiException
|
|
62
45
|
|
|
63
|
-
if
|
|
46
|
+
if not self._force_v1:
|
|
64
47
|
# try with v2 and fall back to v1
|
|
65
48
|
try:
|
|
66
49
|
mmt = _MMTV2(
|
lightning_sdk/mmt/v1.py
CHANGED
|
@@ -92,34 +92,7 @@ class _MMTV1(_BaseMMT):
|
|
|
92
92
|
path_mappings: The mappings from data connection inside your container (not supported)
|
|
93
93
|
|
|
94
94
|
"""
|
|
95
|
-
|
|
96
|
-
raise ValueError("Studio is required for submitting jobs")
|
|
97
|
-
if image is not None or image_credentials is not None or cloud_account_auth or entrypoint != "sh -c":
|
|
98
|
-
raise ValueError("Image is not supported for submitting jobs")
|
|
99
|
-
|
|
100
|
-
if artifacts_local is not None or artifacts_remote is not None:
|
|
101
|
-
raise ValueError("Specifying how to persist artifacts is not yet supported with jobs")
|
|
102
|
-
|
|
103
|
-
if env is not None:
|
|
104
|
-
raise ValueError("Environment variables are not supported for submitting jobs")
|
|
105
|
-
if command is None:
|
|
106
|
-
raise ValueError("Command is required for submitting multi-machine jobs")
|
|
107
|
-
|
|
108
|
-
_submitted = self._job_api.submit_job(
|
|
109
|
-
name=self._name,
|
|
110
|
-
num_machines=num_machines,
|
|
111
|
-
command=command,
|
|
112
|
-
studio_id=studio._studio.id,
|
|
113
|
-
teamspace_id=self._teamspace.id,
|
|
114
|
-
cloud_account=cloud_account or "",
|
|
115
|
-
machine=machine,
|
|
116
|
-
interruptible=interruptible,
|
|
117
|
-
strategy="parallel",
|
|
118
|
-
)
|
|
119
|
-
|
|
120
|
-
self._name = _submitted.name
|
|
121
|
-
self._job = _submitted
|
|
122
|
-
return self
|
|
95
|
+
raise NotImplementedError("Cannot submit new mmts with MMTV1!")
|
|
123
96
|
|
|
124
97
|
def _update_internal_job(self) -> None:
|
|
125
98
|
try:
|
lightning_sdk/models.py
CHANGED
|
@@ -72,6 +72,19 @@ def _get_teamspace(name: str, organization: str) -> "Teamspace":
|
|
|
72
72
|
raise RuntimeError(f"Teamspace `{requested_teamspace}` not found. Available teamspaces: {os.linesep}\t{options}")
|
|
73
73
|
|
|
74
74
|
|
|
75
|
+
def _extend_model_name_with_teamspace(name: str) -> str:
|
|
76
|
+
"""Extend the model name with the teamspace if it can be determined from env. variables."""
|
|
77
|
+
if "/" in name:
|
|
78
|
+
return name
|
|
79
|
+
# do some magic if you run studio
|
|
80
|
+
teamspace = _resolve_teamspace(None, None, None)
|
|
81
|
+
if not teamspace:
|
|
82
|
+
raise ValueError(
|
|
83
|
+
f"Model name must be in the format `organization/teamspace/model_name` but you provided '{name}'."
|
|
84
|
+
)
|
|
85
|
+
return f"{teamspace.owner.name}/{teamspace.name}/{name}"
|
|
86
|
+
|
|
87
|
+
|
|
75
88
|
def _parse_model_name_and_version(name: str) -> Tuple[str, str, str, str]:
|
|
76
89
|
"""Parse the name argument into its components."""
|
|
77
90
|
try:
|
|
@@ -105,9 +118,7 @@ def download_model(
|
|
|
105
118
|
download_dir: The directory where the Model should be downloaded.
|
|
106
119
|
progress_bar: Whether to show a progress bar when downloading.
|
|
107
120
|
"""
|
|
108
|
-
|
|
109
|
-
teamspace = _resolve_teamspace(None, None, None)
|
|
110
|
-
name = f"{teamspace.owner.name}/{teamspace.name}/{name}"
|
|
121
|
+
name = _extend_model_name_with_teamspace(name)
|
|
111
122
|
teamspace_owner_name, teamspace_name, model_name, version = _parse_model_name_and_version(name)
|
|
112
123
|
|
|
113
124
|
download_dir = Path(download_dir)
|
|
@@ -147,9 +158,7 @@ def upload_model(
|
|
|
147
158
|
If not provided, the default cloud account for the Teamspace will be used.
|
|
148
159
|
progress_bar: Whether to show a progress bar for the upload.
|
|
149
160
|
"""
|
|
150
|
-
|
|
151
|
-
teamspace = _resolve_teamspace(None, None, None)
|
|
152
|
-
name = f"{teamspace.owner.name}/{teamspace.name}/{name}"
|
|
161
|
+
name = _extend_model_name_with_teamspace(name)
|
|
153
162
|
org_name, teamspace_name, model_name, _ = _parse_model_name_and_version(name)
|
|
154
163
|
teamspace = _get_teamspace(name=teamspace_name, organization=org_name)
|
|
155
164
|
return teamspace.upload_model(
|
lightning_sdk/pipeline/types.py
CHANGED
|
@@ -152,12 +152,20 @@ class Job:
|
|
|
152
152
|
self.wait_for = wait_for
|
|
153
153
|
|
|
154
154
|
def to_proto(self, teamspace: "Teamspace", cloud_account: str, shared_filesystem: bool) -> V1PipelineStep:
|
|
155
|
+
studio = _get_studio(self.studio)
|
|
156
|
+
if isinstance(studio, Studio):
|
|
157
|
+
if self.cloud_account is None:
|
|
158
|
+
self.cloud_account = studio.cloud_account
|
|
159
|
+
elif studio.cloud_account != self.cloud_account:
|
|
160
|
+
raise ValueError("The provided cloud account doesn't match the studio")
|
|
161
|
+
|
|
155
162
|
_validate_cloud_account(cloud_account, self.cloud_account, shared_filesystem)
|
|
163
|
+
|
|
156
164
|
body = JobApiV2._create_job_body(
|
|
157
165
|
name=self.name,
|
|
158
166
|
command=self.command,
|
|
159
167
|
cloud_account=self.cloud_account or cloud_account,
|
|
160
|
-
studio_id=None,
|
|
168
|
+
studio_id=studio._studio.id if isinstance(studio, Studio) else None,
|
|
161
169
|
image=self.image,
|
|
162
170
|
machine=self.machine,
|
|
163
171
|
interruptible=self.interruptible,
|
|
@@ -220,13 +228,21 @@ class MMT:
|
|
|
220
228
|
self.wait_for = wait_for
|
|
221
229
|
|
|
222
230
|
def to_proto(self, teamspace: "Teamspace", cloud_account: str, shared_filesystem: bool) -> V1PipelineStep:
|
|
231
|
+
studio = _get_studio(self.studio)
|
|
232
|
+
if isinstance(studio, Studio):
|
|
233
|
+
if self.cloud_account is None:
|
|
234
|
+
self.cloud_account = studio.cloud_account
|
|
235
|
+
elif studio.cloud_account != self.cloud_account:
|
|
236
|
+
raise ValueError("The provided cloud account doesn't match the studio")
|
|
237
|
+
|
|
223
238
|
_validate_cloud_account(cloud_account, self.cloud_account, shared_filesystem)
|
|
239
|
+
|
|
224
240
|
body = MMTApiV2._create_mmt_body(
|
|
225
241
|
name=self.name,
|
|
226
242
|
num_machines=self.num_machines,
|
|
227
243
|
command=self.command,
|
|
228
244
|
cloud_account=self.cloud_account or cloud_account,
|
|
229
|
-
studio_id=
|
|
245
|
+
studio_id=studio._studio.id if isinstance(studio, Studio) else None,
|
|
230
246
|
image=self.image,
|
|
231
247
|
machine=self.machine,
|
|
232
248
|
interruptible=self.interruptible,
|
|
@@ -266,3 +282,13 @@ def _validate_cloud_account(pipeline_cloud_account: str, step_cloud_account: str
|
|
|
266
282
|
"With shared filesystem enabled, all the pipeline steps wait_for to be on the same cluster."
|
|
267
283
|
f" Found {pipeline_cloud_account} and {step_cloud_account}"
|
|
268
284
|
)
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def _get_studio(studio: Union["Studio", str, None]) -> Union[Studio, None]:
|
|
288
|
+
if studio is None:
|
|
289
|
+
return None
|
|
290
|
+
|
|
291
|
+
if isinstance(studio, Studio):
|
|
292
|
+
return studio
|
|
293
|
+
|
|
294
|
+
return Studio(studio)
|
lightning_sdk/pipeline/utils.py
CHANGED
|
@@ -43,7 +43,7 @@ def prepare_steps(steps: List["V1PipelineStep"]) -> List["V1PipelineStep"]:
|
|
|
43
43
|
else:
|
|
44
44
|
for name in current_step.wait_for:
|
|
45
45
|
if current_step.name == name:
|
|
46
|
-
raise ValueError("You can only reference prior steps")
|
|
46
|
+
raise ValueError(f"You can only reference prior steps. Found {current_step.name}")
|
|
47
47
|
|
|
48
48
|
if name not in name_to_step:
|
|
49
49
|
raise ValueError(f"The step {current_step_idx} doesn't have a valid wait_for. Found {name}")
|
lightning_sdk/plugin.py
CHANGED
|
@@ -303,7 +303,6 @@ class SlurmJobsPlugin(_Plugin):
|
|
|
303
303
|
name: Optional[str] = None,
|
|
304
304
|
cluster_id: Optional[str] = None,
|
|
305
305
|
work_dir: str = "/home/lightning_manager",
|
|
306
|
-
num_gpus: int = 1,
|
|
307
306
|
sync_env: bool = True,
|
|
308
307
|
cache_id: Optional[str] = None,
|
|
309
308
|
) -> "Externalv1LightningappInstance":
|
|
@@ -315,7 +314,6 @@ class SlurmJobsPlugin(_Plugin):
|
|
|
315
314
|
cluster_id: The name of the SLURM Cluster to submit the job on.
|
|
316
315
|
If the cluster_id isn't provided, the oldest running SLURM cluster will be selected.
|
|
317
316
|
work_dir: The position where the the files will be created on the SLURM cluster.
|
|
318
|
-
num_gpus: The number of GPUs requested.
|
|
319
317
|
sync_env: Whether to force an environement sync.
|
|
320
318
|
cache_id: A string to avoid re-downloading the Studio files to the SLURM cluster.
|
|
321
319
|
If you update your files and don't change the cache_id, they won't be used.
|
|
@@ -326,9 +324,6 @@ class SlurmJobsPlugin(_Plugin):
|
|
|
326
324
|
if work_dir == "":
|
|
327
325
|
raise ValueError("The argument `work_dir` needs to be a proper path on the SLURM Cluster.")
|
|
328
326
|
|
|
329
|
-
if num_gpus <= 0:
|
|
330
|
-
raise ValueError("The argument `num_gpus` needs to be strictly positive.")
|
|
331
|
-
|
|
332
327
|
if name is None:
|
|
333
328
|
name = _run_name("slurm")
|
|
334
329
|
|
|
@@ -392,7 +387,6 @@ class SlurmJobsPlugin(_Plugin):
|
|
|
392
387
|
sync_env=sync_env,
|
|
393
388
|
work_dir=work_dir,
|
|
394
389
|
service_id=service_id,
|
|
395
|
-
num_gpus=num_gpus,
|
|
396
390
|
cache_id=cache_id,
|
|
397
391
|
),
|
|
398
392
|
)
|
lightning_sdk/serve.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import shlex
|
|
3
3
|
import subprocess
|
|
4
|
-
import warnings
|
|
5
4
|
from pathlib import Path
|
|
6
|
-
from typing import Generator,
|
|
5
|
+
from typing import Generator, Optional
|
|
6
|
+
from urllib.parse import urlencode
|
|
7
7
|
|
|
8
8
|
import docker
|
|
9
9
|
from rich.console import Console
|
|
@@ -13,6 +13,26 @@ from lightning_sdk import Deployment, Machine, Teamspace
|
|
|
13
13
|
from lightning_sdk.api.deployment_api import AutoScaleConfig
|
|
14
14
|
from lightning_sdk.api.lit_container_api import LitContainerApi
|
|
15
15
|
from lightning_sdk.api.utils import _get_cloud_url
|
|
16
|
+
from lightning_sdk.lightning_cloud import env
|
|
17
|
+
from lightning_sdk.lightning_cloud.login import Auth, AuthServer
|
|
18
|
+
|
|
19
|
+
_DOCKER_NOT_RUNNING_MSG = (
|
|
20
|
+
"Deploying LitServe requires Docker to be running on the machine. "
|
|
21
|
+
"If Docker is not installed, please install it from https://docs.docker.com/get-docker/ "
|
|
22
|
+
"and start the Docker daemon before running this command."
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class _AuthServer(AuthServer):
|
|
27
|
+
def get_auth_url(self, port: int) -> str:
|
|
28
|
+
redirect_uri = f"http://localhost:{port}/login-complete"
|
|
29
|
+
params = urlencode({"redirectTo": redirect_uri, "inviteCode": "litserve"})
|
|
30
|
+
return f"{env.LIGHTNING_CLOUD_URL}/sign-in?{params}"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class _Auth(Auth):
|
|
34
|
+
def _run_server(self) -> None:
|
|
35
|
+
_AuthServer().login_with_browser(self)
|
|
16
36
|
|
|
17
37
|
|
|
18
38
|
class _LitServeDeployer:
|
|
@@ -20,38 +40,49 @@ class _LitServeDeployer:
|
|
|
20
40
|
self._console = Console()
|
|
21
41
|
self._client = None
|
|
22
42
|
|
|
43
|
+
@staticmethod
|
|
44
|
+
def authenticate() -> None:
|
|
45
|
+
auth = _Auth()
|
|
46
|
+
auth.authenticate()
|
|
47
|
+
|
|
23
48
|
@property
|
|
24
49
|
def client(self) -> docker.DockerClient:
|
|
25
|
-
os.environ["DOCKER_BUILDKIT"] = "1"
|
|
26
|
-
|
|
27
50
|
if self._client is None:
|
|
28
51
|
try:
|
|
52
|
+
os.environ["DOCKER_BUILDKIT"] = "1"
|
|
29
53
|
self._client = docker.from_env()
|
|
30
54
|
self._client.ping()
|
|
31
|
-
except docker.errors.DockerException
|
|
32
|
-
raise RuntimeError(
|
|
55
|
+
except docker.errors.DockerException:
|
|
56
|
+
raise RuntimeError(_DOCKER_NOT_RUNNING_MSG) from None
|
|
33
57
|
return self._client
|
|
34
58
|
|
|
35
59
|
def dockerize_api(
|
|
36
|
-
self,
|
|
60
|
+
self,
|
|
61
|
+
server_filename: str,
|
|
62
|
+
port: int = 8000,
|
|
63
|
+
gpu: bool = False,
|
|
64
|
+
tag: str = "litserve-model",
|
|
65
|
+
print_success: bool = True,
|
|
37
66
|
) -> str:
|
|
38
67
|
import litserve as ls
|
|
39
68
|
from litserve import docker_builder
|
|
40
69
|
|
|
41
70
|
console = self._console
|
|
42
|
-
if os.path.exists("Dockerfile"):
|
|
43
|
-
console.print("Dockerfile already exists. Skipping generation.")
|
|
44
|
-
return os.path.abspath("Dockerfile")
|
|
45
|
-
|
|
46
71
|
requirements = ""
|
|
47
72
|
if os.path.exists("requirements.txt"):
|
|
48
73
|
requirements = "-r requirements.txt"
|
|
49
74
|
else:
|
|
50
|
-
|
|
75
|
+
console.print(
|
|
51
76
|
f"requirements.txt not found at {os.getcwd()}. "
|
|
52
77
|
f"Make sure to install the required packages in the Dockerfile.",
|
|
53
|
-
|
|
78
|
+
style="yellow",
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
if os.path.exists("Dockerfile"):
|
|
82
|
+
console.print(
|
|
83
|
+
"Dockerfile already exists in the current directory, we will use it for building the container."
|
|
54
84
|
)
|
|
85
|
+
return os.path.abspath("Dockerfile")
|
|
55
86
|
current_dir = Path.cwd()
|
|
56
87
|
if not (current_dir / server_filename).is_file():
|
|
57
88
|
raise FileNotFoundError(f"Server file `{server_filename}` must be in the current directory: {os.getcwd()}")
|
|
@@ -72,7 +103,8 @@ class _LitServeDeployer:
|
|
|
72
103
|
with open("Dockerfile", "w") as f:
|
|
73
104
|
f.write(dockerfile_content)
|
|
74
105
|
|
|
75
|
-
|
|
106
|
+
if print_success:
|
|
107
|
+
success_msg = f"""[bold]Dockerfile created successfully[/bold]
|
|
76
108
|
Update [underline]{os.path.abspath("Dockerfile")}[/underline] to add any additional dependencies or commands.
|
|
77
109
|
|
|
78
110
|
[bold]Build the container with:[/bold]
|
|
@@ -84,7 +116,7 @@ Update [underline]{os.path.abspath("Dockerfile")}[/underline] to add any additio
|
|
|
84
116
|
[bold]To push the container to a registry:[/bold]
|
|
85
117
|
> [underline]docker push {tag}[/underline]
|
|
86
118
|
"""
|
|
87
|
-
|
|
119
|
+
console.print(success_msg)
|
|
88
120
|
return os.path.abspath("Dockerfile")
|
|
89
121
|
|
|
90
122
|
@staticmethod
|
|
@@ -187,19 +219,19 @@ Update [underline]{os.path.abspath("Dockerfile")}[/underline] to add any additio
|
|
|
187
219
|
deployment_name: str,
|
|
188
220
|
teamspace: Teamspace,
|
|
189
221
|
image: str,
|
|
190
|
-
ports: List[int],
|
|
191
|
-
gpu: bool = False,
|
|
192
222
|
metric: Optional[str] = None,
|
|
193
223
|
machine: Optional[Machine] = None,
|
|
194
|
-
min_replica: Optional[int] =
|
|
224
|
+
min_replica: Optional[int] = 0,
|
|
195
225
|
max_replica: Optional[int] = 1,
|
|
196
226
|
spot: Optional[bool] = None,
|
|
197
|
-
replicas: Optional[int] =
|
|
227
|
+
replicas: Optional[int] = 1,
|
|
198
228
|
cloud_account: Optional[str] = None,
|
|
229
|
+
port: Optional[int] = 8000,
|
|
230
|
+
include_credentials: Optional[bool] = True,
|
|
199
231
|
) -> dict:
|
|
200
|
-
machine = machine or Machine.CPU
|
|
201
|
-
metric = metric or "GPU" if gpu else "CPU"
|
|
202
232
|
url = f"{_get_cloud_url()}/{teamspace.owner.name}/{teamspace.name}/jobs/{deployment_name}"
|
|
233
|
+
machine = machine or Machine.CPU
|
|
234
|
+
metric = metric or ("CPU" if machine.is_cpu() else "GPU")
|
|
203
235
|
deployment = Deployment(deployment_name, teamspace)
|
|
204
236
|
if deployment.is_started:
|
|
205
237
|
raise RuntimeError(
|
|
@@ -211,11 +243,12 @@ Update [underline]{os.path.abspath("Dockerfile")}[/underline] to add any additio
|
|
|
211
243
|
deployment.start(
|
|
212
244
|
machine=machine,
|
|
213
245
|
image=image,
|
|
214
|
-
ports=ports,
|
|
215
246
|
autoscale=autoscale,
|
|
216
247
|
spot=spot,
|
|
217
248
|
replicas=replicas,
|
|
218
249
|
cloud_account=cloud_account,
|
|
250
|
+
ports=[port],
|
|
251
|
+
include_credentials=include_credentials,
|
|
219
252
|
)
|
|
220
253
|
|
|
221
254
|
return {"deployment": deployment, "url": url}
|
lightning_sdk/utils/resolve.py
CHANGED