lightning-sdk 0.1.58__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lightning_sdk/__init__.py +5 -3
- lightning_sdk/api/deployment_api.py +23 -11
- lightning_sdk/api/job_api.py +42 -7
- lightning_sdk/api/lit_container_api.py +23 -3
- lightning_sdk/api/mmt_api.py +46 -8
- lightning_sdk/api/pipeline_api.py +50 -0
- lightning_sdk/api/teamspace_api.py +2 -2
- lightning_sdk/api/utils.py +15 -5
- lightning_sdk/cli/ai_hub.py +30 -65
- lightning_sdk/cli/coloring.py +60 -0
- lightning_sdk/cli/configure.py +25 -40
- lightning_sdk/cli/connect.py +7 -20
- lightning_sdk/cli/create.py +83 -0
- lightning_sdk/cli/delete.py +72 -75
- lightning_sdk/cli/docker.py +22 -0
- lightning_sdk/cli/download.py +78 -113
- lightning_sdk/cli/entrypoint.py +44 -65
- lightning_sdk/cli/generate.py +28 -43
- lightning_sdk/cli/inspect.py +22 -50
- lightning_sdk/cli/list.py +281 -222
- lightning_sdk/cli/mmts_menu.py +1 -1
- lightning_sdk/cli/open.py +62 -0
- lightning_sdk/cli/run.py +430 -263
- lightning_sdk/cli/serve.py +128 -191
- lightning_sdk/cli/start.py +55 -36
- lightning_sdk/cli/stop.py +97 -55
- lightning_sdk/cli/switch.py +53 -36
- lightning_sdk/cli/upload.py +318 -255
- lightning_sdk/deployment/__init__.py +2 -0
- lightning_sdk/deployment/deployment.py +33 -8
- lightning_sdk/lightning_cloud/openapi/__init__.py +23 -0
- lightning_sdk/lightning_cloud/openapi/api/__init__.py +1 -0
- lightning_sdk/lightning_cloud/openapi/api/assistants_service_api.py +10 -6
- lightning_sdk/lightning_cloud/openapi/api/jobs_service_api.py +355 -4
- lightning_sdk/lightning_cloud/openapi/api/lit_logger_service_api.py +4 -4
- lightning_sdk/lightning_cloud/openapi/api/lit_registry_service_api.py +14 -2
- lightning_sdk/lightning_cloud/openapi/api/pipelines_service_api.py +674 -0
- lightning_sdk/lightning_cloud/openapi/api/storage_service_api.py +303 -4
- lightning_sdk/lightning_cloud/openapi/models/__init__.py +22 -0
- lightning_sdk/lightning_cloud/openapi/models/agents_id_body.py +17 -69
- lightning_sdk/lightning_cloud/openapi/models/cluster_id_capacityreservations_body.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/create.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/create_deployment_request_defines_a_spec_for_the_job_that_allows_for_autoscaling_jobs.py +53 -1
- lightning_sdk/lightning_cloud/openapi/models/deployments_id_body.py +105 -1
- lightning_sdk/lightning_cloud/openapi/models/id_visibility_body1.py +1 -27
- lightning_sdk/lightning_cloud/openapi/models/id_visibility_body2.py +149 -0
- lightning_sdk/lightning_cloud/openapi/models/org_id_memberships_body.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/orgs_id_body.py +157 -1
- lightning_sdk/lightning_cloud/openapi/models/pipelines_id_body.py +461 -0
- lightning_sdk/lightning_cloud/openapi/models/project_id_pipelines_body.py +227 -0
- lightning_sdk/lightning_cloud/openapi/models/projects_id_body.py +157 -1
- lightning_sdk/lightning_cloud/openapi/models/slurm_jobs_body.py +79 -1
- lightning_sdk/lightning_cloud/openapi/models/uploads_upload_id_body.py +1 -27
- lightning_sdk/lightning_cloud/openapi/models/uploads_upload_id_body1.py +175 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_agent_job.py +79 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_assistant.py +17 -69
- lightning_sdk/lightning_cloud/openapi/models/v1_capacity_block_offering.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_cloud_space_artifact_event_type.py +1 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_cluster_accelerator.py +131 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_cluster_capacity_reservation.py +79 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_cluster_security_options.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_complete_upload_temporary_artifact_request.py +175 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_create_deployment_request.py +461 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_create_deployment_template_request.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_create_job_request.py +201 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_create_managed_endpoint_response.py +149 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_create_multi_machine_job_request.py +253 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_data_connection.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_delete_pipeline_response.py +149 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_deployment.py +105 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_deployment_details.py +175 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_deployment_template.py +53 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_filestore_data_connection.py +201 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_filesystem_job.py +53 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_filesystem_mmt.py +53 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_find_capacity_block_offering_response.py +29 -3
- lightning_sdk/lightning_cloud/openapi/models/v1_job.py +133 -3
- lightning_sdk/lightning_cloud/openapi/models/v1_job_artifacts_type.py +103 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_job_spec.py +53 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_job_timing.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_list_pipelines_response.py +123 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_lit_registry_artifact.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_lit_repository.py +29 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_managed_model.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_multi_machine_job.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_multi_machine_job_state.py +2 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_organization.py +209 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_pipeline.py +513 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_pipeline_schedule.py +149 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_pipeline_step.py +253 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_pipeline_step_status.py +331 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_pipeline_step_type.py +104 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_project_settings.py +157 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_restart_timing.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_rule_resource.py +1 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_shared_filesystem.py +201 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_slurm_job.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_update_job_visibility_response.py +97 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_upload_temporary_artifact_request.py +123 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_user_features.py +95 -355
- lightning_sdk/lightning_cloud/openapi/models/validate.py +27 -1
- lightning_sdk/lightning_cloud/rest_client.py +4 -2
- lightning_sdk/machine.py +25 -1
- lightning_sdk/models.py +18 -12
- lightning_sdk/pipeline/__init__.py +4 -0
- lightning_sdk/pipeline/pipeline.py +109 -0
- lightning_sdk/pipeline/types.py +268 -0
- lightning_sdk/pipeline/utils.py +69 -0
- lightning_sdk/plugin.py +9 -10
- lightning_sdk/serve.py +134 -0
- lightning_sdk/services/utilities.py +2 -2
- lightning_sdk/studio.py +5 -1
- lightning_sdk/teamspace.py +1 -1
- lightning_sdk/utils/resolve.py +12 -1
- {lightning_sdk-0.1.58.dist-info → lightning_sdk-0.2.1.dist-info}/METADATA +6 -8
- {lightning_sdk-0.1.58.dist-info → lightning_sdk-0.2.1.dist-info}/RECORD +120 -88
- lightning_sdk/cli/legacy.py +0 -135
- {lightning_sdk-0.1.58.dist-info → lightning_sdk-0.2.1.dist-info}/LICENSE +0 -0
- {lightning_sdk-0.1.58.dist-info → lightning_sdk-0.2.1.dist-info}/WHEEL +0 -0
- {lightning_sdk-0.1.58.dist-info → lightning_sdk-0.2.1.dist-info}/entry_points.txt +0 -0
- {lightning_sdk-0.1.58.dist-info → lightning_sdk-0.2.1.dist-info}/top_level.txt +0 -0
lightning_sdk/__init__.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from lightning_sdk.agents import Agent
|
|
2
2
|
from lightning_sdk.ai_hub import AIHub
|
|
3
3
|
from lightning_sdk.constants import __GLOBAL_LIGHTNING_UNIQUE_IDS_STORE__ # noqa: F401
|
|
4
|
+
from lightning_sdk.deployment import Deployment
|
|
4
5
|
from lightning_sdk.helpers import _check_version_and_prompt_upgrade, _set_tqdm_envvars_noninteractive
|
|
5
6
|
from lightning_sdk.job import Job
|
|
6
7
|
from lightning_sdk.machine import Machine
|
|
@@ -13,6 +14,9 @@ from lightning_sdk.teamspace import Teamspace
|
|
|
13
14
|
from lightning_sdk.user import User
|
|
14
15
|
|
|
15
16
|
__all__ = [
|
|
17
|
+
"AIHub",
|
|
18
|
+
"Agent",
|
|
19
|
+
"Deployment",
|
|
16
20
|
"Job",
|
|
17
21
|
"JobsPlugin",
|
|
18
22
|
"Machine",
|
|
@@ -25,10 +29,8 @@ __all__ = [
|
|
|
25
29
|
"Studio",
|
|
26
30
|
"Teamspace",
|
|
27
31
|
"User",
|
|
28
|
-
"Agent",
|
|
29
|
-
"AIHub",
|
|
30
32
|
]
|
|
31
33
|
|
|
32
|
-
__version__ = "0.1
|
|
34
|
+
__version__ = "0.2.1"
|
|
33
35
|
_check_version_and_prompt_upgrade(__version__)
|
|
34
36
|
_set_tqdm_envvars_noninteractive()
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from time import sleep
|
|
2
|
-
from typing import Any, List, Literal, Optional, Union
|
|
2
|
+
from typing import Any, Dict, List, Literal, Optional, Union
|
|
3
3
|
|
|
4
4
|
from lightning_sdk.api.utils import _machine_to_compute_name
|
|
5
5
|
from lightning_sdk.lightning_cloud.openapi import (
|
|
@@ -242,7 +242,7 @@ class DeploymentApi:
|
|
|
242
242
|
self,
|
|
243
243
|
deployment: V1Deployment,
|
|
244
244
|
machine: Optional[Machine] = None,
|
|
245
|
-
|
|
245
|
+
image: Optional[str] = None,
|
|
246
246
|
entrypoint: Optional[str] = None,
|
|
247
247
|
command: Optional[str] = None,
|
|
248
248
|
env: Optional[List[Union[Env, Secret]]] = None,
|
|
@@ -257,6 +257,7 @@ class DeploymentApi:
|
|
|
257
257
|
health_check: Optional[Union[HttpHealthCheck, ExecHealthCheck]] = None,
|
|
258
258
|
auth: Optional[Union[BasicAuth, TokenAuth]] = None,
|
|
259
259
|
custom_domain: Optional[str] = None,
|
|
260
|
+
quantity: Optional[int] = None,
|
|
260
261
|
) -> V1Deployment:
|
|
261
262
|
# Update the deployment in place
|
|
262
263
|
|
|
@@ -274,13 +275,14 @@ class DeploymentApi:
|
|
|
274
275
|
apply_change(deployment.spec, "instance_type", _machine_to_compute_name(machine))
|
|
275
276
|
|
|
276
277
|
requires_release = False
|
|
277
|
-
requires_release |= apply_change(deployment.spec, "image",
|
|
278
|
+
requires_release |= apply_change(deployment.spec, "image", image)
|
|
278
279
|
requires_release |= apply_change(deployment.spec, "entrypoint", entrypoint)
|
|
279
280
|
requires_release |= apply_change(deployment.spec, "command", command)
|
|
280
281
|
requires_release |= apply_change(deployment.spec, "env", to_env(env))
|
|
281
282
|
requires_release |= apply_change(deployment.spec, "readiness_probe", to_health_check(health_check))
|
|
282
283
|
requires_release |= apply_change(deployment.spec, "cluster_id", cloud_account)
|
|
283
284
|
requires_release |= apply_change(deployment.spec, "spot", spot)
|
|
285
|
+
requires_release |= apply_change(deployment.spec, "quantity", quantity)
|
|
284
286
|
|
|
285
287
|
if requires_release:
|
|
286
288
|
if deployment.strategy is None:
|
|
@@ -382,13 +384,21 @@ def restore_env(env: List[V1EnvVar]) -> List[Union[Secret, Env]]:
|
|
|
382
384
|
return [Secret(name=e.from_secret) if e.from_secret else Env(name=e.name, value=e.value) for e in env]
|
|
383
385
|
|
|
384
386
|
|
|
385
|
-
def to_env(env:
|
|
387
|
+
def to_env(env: Union[List[Union[Secret, Env]], Dict[str, str], None] = None) -> Optional[List[V1EnvVar]]:
|
|
386
388
|
if not env:
|
|
387
389
|
return None
|
|
388
390
|
|
|
391
|
+
env_list = []
|
|
392
|
+
|
|
393
|
+
if isinstance(env, dict):
|
|
394
|
+
for k, v in env.items():
|
|
395
|
+
env_list.append(Env(name=k, value=v))
|
|
396
|
+
else:
|
|
397
|
+
env_list = env
|
|
398
|
+
|
|
389
399
|
return [
|
|
390
400
|
V1EnvVar(name=env.name, value=env.value) if isinstance(env, Env) else V1EnvVar(from_secret=env.name)
|
|
391
|
-
for env in
|
|
401
|
+
for env in env_list
|
|
392
402
|
]
|
|
393
403
|
|
|
394
404
|
|
|
@@ -459,7 +469,7 @@ def to_autoscaling(
|
|
|
459
469
|
target_metric.target = str(target_metric.target)
|
|
460
470
|
|
|
461
471
|
metrics = (
|
|
462
|
-
target_metrics
|
|
472
|
+
[V1AutoscalingTargetMetric(name=t.name, target=t.target) for t in target_metrics]
|
|
463
473
|
if target_metrics is not None
|
|
464
474
|
else [V1AutoscalingTargetMetric(name=metric, target=str(threshold))]
|
|
465
475
|
)
|
|
@@ -532,12 +542,13 @@ def to_health_check(
|
|
|
532
542
|
def to_spec(
|
|
533
543
|
cloud_account: Optional[str],
|
|
534
544
|
machine: Optional[Machine],
|
|
535
|
-
|
|
545
|
+
image: Optional[str],
|
|
536
546
|
entrypoint: Optional[str],
|
|
537
547
|
command: Optional[str],
|
|
538
548
|
spot: Optional[bool] = False,
|
|
539
|
-
env:
|
|
549
|
+
env: Union[List[Union[Secret, Env]], Dict[str, str], None] = None,
|
|
540
550
|
health_check: Optional[Union[HttpHealthCheck, ExecHealthCheck]] = None,
|
|
551
|
+
quantity: Optional[int] = None,
|
|
541
552
|
) -> V1JobSpec:
|
|
542
553
|
if cloud_account is None:
|
|
543
554
|
raise ValueError("The cloud account should be defined.")
|
|
@@ -545,18 +556,19 @@ def to_spec(
|
|
|
545
556
|
if machine is None:
|
|
546
557
|
raise ValueError("The machine should be defined.")
|
|
547
558
|
|
|
548
|
-
if
|
|
549
|
-
raise ValueError("The
|
|
559
|
+
if image is None:
|
|
560
|
+
raise ValueError("The image should be defined.")
|
|
550
561
|
|
|
551
562
|
return V1JobSpec(
|
|
552
563
|
cluster_id=cloud_account,
|
|
553
564
|
command=command,
|
|
554
565
|
entrypoint=entrypoint,
|
|
555
566
|
env=to_env(env),
|
|
556
|
-
image=
|
|
567
|
+
image=image,
|
|
557
568
|
spot=spot,
|
|
558
569
|
instance_name=_machine_to_compute_name(machine),
|
|
559
570
|
readiness_probe=to_health_check(health_check),
|
|
571
|
+
quantity=quantity,
|
|
560
572
|
)
|
|
561
573
|
|
|
562
574
|
|
lightning_sdk/api/job_api.py
CHANGED
|
@@ -99,9 +99,10 @@ class JobApiV1:
|
|
|
99
99
|
# prefer user-requested config if specified
|
|
100
100
|
user_requested_compute_config: V1UserRequestedComputeConfig = spec.user_requested_compute_config
|
|
101
101
|
if user_requested_compute_config.name:
|
|
102
|
-
return Machine(user_requested_compute_config.name
|
|
102
|
+
return Machine.from_str(user_requested_compute_config.name)
|
|
103
103
|
compute_config: V1ComputeConfig = spec.compute_config
|
|
104
|
-
|
|
104
|
+
|
|
105
|
+
return Machine.from_str(compute_config.instance_type)
|
|
105
106
|
|
|
106
107
|
def get_studio_name(self, job: Externalv1LightningappInstance) -> str:
|
|
107
108
|
cs: V1CloudSpace = self._client.cloud_space_service_get_cloud_space(
|
|
@@ -215,6 +216,43 @@ class JobApiV2:
|
|
|
215
216
|
artifacts_local: Optional[str], # deprecated in favor of path_mappings
|
|
216
217
|
artifacts_remote: Optional[str], # deprecated in favor of path_mappings
|
|
217
218
|
) -> V1Job:
|
|
219
|
+
body = self._create_job_body(
|
|
220
|
+
name=name,
|
|
221
|
+
command=command,
|
|
222
|
+
cloud_account=cloud_account,
|
|
223
|
+
studio_id=studio_id,
|
|
224
|
+
image=image,
|
|
225
|
+
machine=machine,
|
|
226
|
+
interruptible=interruptible,
|
|
227
|
+
env=env,
|
|
228
|
+
image_credentials=image_credentials,
|
|
229
|
+
cloud_account_auth=cloud_account_auth,
|
|
230
|
+
entrypoint=entrypoint,
|
|
231
|
+
path_mappings=path_mappings,
|
|
232
|
+
artifacts_local=artifacts_local,
|
|
233
|
+
artifacts_remote=artifacts_remote,
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
job: V1Job = self._client.jobs_service_create_job(project_id=teamspace_id, body=body)
|
|
237
|
+
return job
|
|
238
|
+
|
|
239
|
+
@staticmethod
|
|
240
|
+
def _create_job_body(
|
|
241
|
+
name: str,
|
|
242
|
+
command: Optional[str],
|
|
243
|
+
cloud_account: Optional[str],
|
|
244
|
+
studio_id: Optional[str],
|
|
245
|
+
image: Optional[str],
|
|
246
|
+
machine: Union[Machine, str],
|
|
247
|
+
interruptible: bool,
|
|
248
|
+
env: Optional[Dict[str, str]],
|
|
249
|
+
image_credentials: Optional[str],
|
|
250
|
+
cloud_account_auth: bool,
|
|
251
|
+
entrypoint: str,
|
|
252
|
+
path_mappings: Optional[Dict[str, str]],
|
|
253
|
+
artifacts_local: Optional[str], # deprecated in favor of path_mappings
|
|
254
|
+
artifacts_remote: Optional[str], # deprecated in favor of path_mappings)
|
|
255
|
+
) -> ProjectIdJobsBody:
|
|
218
256
|
env_vars = []
|
|
219
257
|
if env is not None:
|
|
220
258
|
for k, v in env.items():
|
|
@@ -244,10 +282,7 @@ class JobApiV2:
|
|
|
244
282
|
image_secret_ref=image_credentials or "",
|
|
245
283
|
path_mappings=path_mappings_list,
|
|
246
284
|
)
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
job: V1Job = self._client.jobs_service_create_job(project_id=teamspace_id, body=body)
|
|
250
|
-
return job
|
|
285
|
+
return ProjectIdJobsBody(name=name, spec=spec)
|
|
251
286
|
|
|
252
287
|
def get_job_by_name(self, name: str, teamspace_id: str) -> V1Job:
|
|
253
288
|
job: V1Job = self._client.jobs_service_find_job(project_id=teamspace_id, name=name)
|
|
@@ -340,7 +375,7 @@ class JobApiV2:
|
|
|
340
375
|
instance_name = spec.instance_name
|
|
341
376
|
instance_type = spec.instance_type
|
|
342
377
|
|
|
343
|
-
return Machine(instance_name, instance_type or instance_name)
|
|
378
|
+
return Machine.from_str(instance_name, instance_type or instance_name)
|
|
344
379
|
|
|
345
380
|
def get_total_cost(self, job: V1Job) -> float:
|
|
346
381
|
return job.total_cost
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import inspect
|
|
1
2
|
import time
|
|
2
3
|
from typing import Any, Callable, Dict, Generator, Iterator, List
|
|
3
4
|
|
|
@@ -21,6 +22,16 @@ class DockerPushError(Exception):
|
|
|
21
22
|
|
|
22
23
|
|
|
23
24
|
def retry_on_lcr_auth_failure(func: Callable) -> Callable:
|
|
25
|
+
def generator_wrapper(self: "LitContainerApi", *args: Any, **kwargs: Any) -> Callable:
|
|
26
|
+
try:
|
|
27
|
+
gen = func(self, *args, **kwargs)
|
|
28
|
+
yield from gen
|
|
29
|
+
except LCRAuthFailedError:
|
|
30
|
+
self.authenticate(reauth=True)
|
|
31
|
+
gen = func(self, *args, **kwargs)
|
|
32
|
+
yield from gen
|
|
33
|
+
return
|
|
34
|
+
|
|
24
35
|
def wrapper(self: "LitContainerApi", *args: Any, **kwargs: Any) -> Callable:
|
|
25
36
|
try:
|
|
26
37
|
return func(self, *args, **kwargs)
|
|
@@ -28,6 +39,9 @@ def retry_on_lcr_auth_failure(func: Callable) -> Callable:
|
|
|
28
39
|
self.authenticate(reauth=True)
|
|
29
40
|
return func(self, *args, **kwargs)
|
|
30
41
|
|
|
42
|
+
if inspect.isgeneratorfunction(func):
|
|
43
|
+
return generator_wrapper
|
|
44
|
+
|
|
31
45
|
return wrapper
|
|
32
46
|
|
|
33
47
|
|
|
@@ -39,10 +53,13 @@ class LitContainerApi:
|
|
|
39
53
|
self._docker_client = docker.from_env()
|
|
40
54
|
self._docker_client.ping()
|
|
41
55
|
self._docker_auth_config = {}
|
|
42
|
-
except docker.errors.DockerException
|
|
43
|
-
raise RuntimeError(
|
|
56
|
+
except docker.errors.DockerException:
|
|
57
|
+
raise RuntimeError(
|
|
58
|
+
"Failed to connect to Docker, follow these steps to start it: https://docs.docker.com/engine/daemon/start/"
|
|
59
|
+
) from None
|
|
44
60
|
|
|
45
61
|
def authenticate(self, reauth: bool = False) -> bool:
|
|
62
|
+
resp = None
|
|
46
63
|
try:
|
|
47
64
|
authed_user = self._client.auth_service_get_user()
|
|
48
65
|
username = authed_user.username
|
|
@@ -78,6 +95,7 @@ class LitContainerApi:
|
|
|
78
95
|
except Exception as e:
|
|
79
96
|
raise ValueError(f"Could not delete container {container} from project {project_id}: {e!s}") from e
|
|
80
97
|
|
|
98
|
+
@retry_on_lcr_auth_failure
|
|
81
99
|
def upload_container(self, container: str, teamspace: Teamspace, tag: str) -> Generator[dict, None, None]:
|
|
82
100
|
try:
|
|
83
101
|
self._docker_client.images.get(container)
|
|
@@ -121,7 +139,9 @@ class LitContainerApi:
|
|
|
121
139
|
self.authenticate(reauth=True)
|
|
122
140
|
time.sleep(2)
|
|
123
141
|
|
|
124
|
-
lines = self._docker_client.api.push(
|
|
142
|
+
lines = self._docker_client.api.push(
|
|
143
|
+
repository, stream=True, decode=True, auth_config=self._docker_auth_config
|
|
144
|
+
)
|
|
125
145
|
for line in lines:
|
|
126
146
|
if isinstance(line, dict) and "error" in line:
|
|
127
147
|
error = line["error"]
|
lightning_sdk/api/mmt_api.py
CHANGED
|
@@ -88,6 +88,45 @@ class MMTApiV2:
|
|
|
88
88
|
artifacts_local: Optional[str], # deprecated in favor of path_mappings
|
|
89
89
|
artifacts_remote: Optional[str], # deprecated in favor of path_mappings
|
|
90
90
|
) -> V1MultiMachineJob:
|
|
91
|
+
body = self._create_mmt_body(
|
|
92
|
+
name=name,
|
|
93
|
+
num_machines=num_machines,
|
|
94
|
+
command=command,
|
|
95
|
+
cloud_account=cloud_account,
|
|
96
|
+
studio_id=studio_id,
|
|
97
|
+
image=image,
|
|
98
|
+
machine=machine,
|
|
99
|
+
interruptible=interruptible,
|
|
100
|
+
env=env,
|
|
101
|
+
image_credentials=image_credentials,
|
|
102
|
+
cloud_account_auth=cloud_account_auth,
|
|
103
|
+
entrypoint=entrypoint,
|
|
104
|
+
path_mappings=path_mappings,
|
|
105
|
+
artifacts_local=artifacts_local, # deprecated in favor of path_mappings
|
|
106
|
+
artifacts_remote=artifacts_remote, # deprecated in favor of path_mappings
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
job: V1MultiMachineJob = self._client.jobs_service_create_multi_machine_job(project_id=teamspace_id, body=body)
|
|
110
|
+
return job
|
|
111
|
+
|
|
112
|
+
@staticmethod
|
|
113
|
+
def _create_mmt_body(
|
|
114
|
+
name: str,
|
|
115
|
+
num_machines: int,
|
|
116
|
+
command: Optional[str],
|
|
117
|
+
cloud_account: Optional[str],
|
|
118
|
+
studio_id: Optional[str],
|
|
119
|
+
image: Optional[str],
|
|
120
|
+
machine: Union[Machine, str],
|
|
121
|
+
interruptible: bool,
|
|
122
|
+
env: Optional[Dict[str, str]],
|
|
123
|
+
image_credentials: Optional[str],
|
|
124
|
+
cloud_account_auth: bool,
|
|
125
|
+
entrypoint: str,
|
|
126
|
+
path_mappings: Optional[Dict[str, str]],
|
|
127
|
+
artifacts_local: Optional[str], # deprecated in favor of path_mappings
|
|
128
|
+
artifacts_remote: Optional[str], # deprecated in favor of path_mappings
|
|
129
|
+
) -> ProjectIdMultimachinejobsBody:
|
|
91
130
|
env_vars = []
|
|
92
131
|
if env is not None:
|
|
93
132
|
for k, v in env.items():
|
|
@@ -117,13 +156,10 @@ class MMTApiV2:
|
|
|
117
156
|
image_secret_ref=image_credentials or "",
|
|
118
157
|
path_mappings=path_mappings_list,
|
|
119
158
|
)
|
|
120
|
-
|
|
159
|
+
return ProjectIdMultimachinejobsBody(
|
|
121
160
|
name=name, spec=spec, cluster_id=cloud_account or "", machines=num_machines
|
|
122
161
|
)
|
|
123
162
|
|
|
124
|
-
job: V1MultiMachineJob = self._client.jobs_service_create_multi_machine_job(project_id=teamspace_id, body=body)
|
|
125
|
-
return job
|
|
126
|
-
|
|
127
163
|
def get_job_by_name(self, name: str, teamspace_id: str) -> V1MultiMachineJob:
|
|
128
164
|
job: V1MultiMachineJob = self._client.jobs_service_get_multi_machine_job_by_name(
|
|
129
165
|
project_id=teamspace_id, name=name
|
|
@@ -149,13 +185,13 @@ class MMTApiV2:
|
|
|
149
185
|
return
|
|
150
186
|
|
|
151
187
|
if current_state != Status.Stopped:
|
|
152
|
-
update_body = MultimachinejobsIdBody(desired_state=V1MultiMachineJobState.
|
|
188
|
+
update_body = MultimachinejobsIdBody(desired_state=V1MultiMachineJobState.STOP)
|
|
153
189
|
self._client.jobs_service_update_multi_machine_job(body=update_body, project_id=teamspace_id, id=job_id)
|
|
154
190
|
|
|
155
191
|
while True:
|
|
156
192
|
current_job = self.get_job(job_id=job_id, teamspace_id=teamspace_id)
|
|
157
|
-
if self._job_state_to_external(current_job.
|
|
158
|
-
Status.
|
|
193
|
+
if self._job_state_to_external(current_job.state) in (
|
|
194
|
+
Status.Stopping,
|
|
159
195
|
Status.Completed,
|
|
160
196
|
Status.Stopped,
|
|
161
197
|
Status.Failed,
|
|
@@ -183,6 +219,8 @@ class MMTApiV2:
|
|
|
183
219
|
return Status.Completed
|
|
184
220
|
if str(state) == V1MultiMachineJobState.FAILED:
|
|
185
221
|
return Status.Failed
|
|
222
|
+
if str(state) == V1MultiMachineJobState.STOP:
|
|
223
|
+
return Status.Stopping
|
|
186
224
|
return Status.Pending
|
|
187
225
|
|
|
188
226
|
def get_studio_name(self, job: V1MultiMachineJob) -> Optional[str]:
|
|
@@ -204,7 +242,7 @@ class MMTApiV2:
|
|
|
204
242
|
instance_name = spec.instance_name
|
|
205
243
|
instance_type = spec.instance_type
|
|
206
244
|
|
|
207
|
-
return Machine(instance_name, instance_type or instance_name)
|
|
245
|
+
return Machine.from_str(instance_name, instance_type or instance_name)
|
|
208
246
|
|
|
209
247
|
def get_total_cost(self, job: V1MultiMachineJob) -> float:
|
|
210
248
|
return job.total_cost
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from lightning_sdk.lightning_cloud.openapi.models import (
|
|
4
|
+
ProjectIdPipelinesBody,
|
|
5
|
+
V1DeletePipelineResponse,
|
|
6
|
+
V1Pipeline,
|
|
7
|
+
V1PipelineStep,
|
|
8
|
+
V1SharedFilesystem,
|
|
9
|
+
)
|
|
10
|
+
from lightning_sdk.lightning_cloud.openapi.rest import ApiException
|
|
11
|
+
from lightning_sdk.lightning_cloud.rest_client import LightningClient
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class PipelineApi:
|
|
15
|
+
"""Internal API client for Pipeline requests (mainly http requests)."""
|
|
16
|
+
|
|
17
|
+
def __init__(self) -> None:
|
|
18
|
+
self._client = LightningClient(retry=False, max_tries=0)
|
|
19
|
+
|
|
20
|
+
def get_pipeline_by_id(self, project_id: str, pipeline_id: str) -> V1Pipeline:
|
|
21
|
+
try:
|
|
22
|
+
return self._client.jobs_service_get_deployment(project_id=project_id, id=pipeline_id)
|
|
23
|
+
except ApiException as ex:
|
|
24
|
+
if "Reason: Not Found" in str(ex):
|
|
25
|
+
return None
|
|
26
|
+
raise ex
|
|
27
|
+
|
|
28
|
+
def create_pipeline(
|
|
29
|
+
self,
|
|
30
|
+
name: str,
|
|
31
|
+
project_id: str,
|
|
32
|
+
steps: List["V1PipelineStep"],
|
|
33
|
+
shared_filesystem: bool,
|
|
34
|
+
) -> V1Pipeline:
|
|
35
|
+
body = ProjectIdPipelinesBody(
|
|
36
|
+
name=name,
|
|
37
|
+
steps=steps,
|
|
38
|
+
shared_filesystem=V1SharedFilesystem(
|
|
39
|
+
enabled=shared_filesystem,
|
|
40
|
+
),
|
|
41
|
+
)
|
|
42
|
+
return self._client.pipelines_service_create_pipeline(body, project_id)
|
|
43
|
+
|
|
44
|
+
def stop(self, pipeline: V1Pipeline) -> V1Pipeline:
|
|
45
|
+
body = pipeline
|
|
46
|
+
body.state = "stop"
|
|
47
|
+
return self._client.pipelines_service_update_pipeline(body)
|
|
48
|
+
|
|
49
|
+
def delete(self, project_id: str, pipeline_id: str) -> V1DeletePipelineResponse:
|
|
50
|
+
return self._client.pipelines_service_delete_pipeline(project_id, pipeline_id)
|
|
@@ -203,8 +203,8 @@ class TeamspaceApi:
|
|
|
203
203
|
model_id = models[0].id
|
|
204
204
|
# decide if delete only version of whole model
|
|
205
205
|
if version:
|
|
206
|
-
if version == "
|
|
207
|
-
version = models[0].
|
|
206
|
+
if version == "default":
|
|
207
|
+
version = models[0].default_version
|
|
208
208
|
self.models.models_store_delete_model_version(project_id=teamspace_id, model_id=model_id, version=version)
|
|
209
209
|
else:
|
|
210
210
|
self.models.models_store_delete_model(project_id=teamspace_id, model_id=model_id)
|
lightning_sdk/api/utils.py
CHANGED
|
@@ -21,7 +21,6 @@ from lightning_sdk.lightning_cloud.openapi import (
|
|
|
21
21
|
StorageCompleteBody,
|
|
22
22
|
UploadIdCompleteBody,
|
|
23
23
|
UploadIdPartsBody,
|
|
24
|
-
UploadsUploadIdBody,
|
|
25
24
|
V1CompletedPart,
|
|
26
25
|
V1CompleteUpload,
|
|
27
26
|
V1PathMapping,
|
|
@@ -37,6 +36,12 @@ try:
|
|
|
37
36
|
from lightning_sdk.lightning_cloud.openapi import AppsIdBody1 as AppsIdBody
|
|
38
37
|
except ImportError:
|
|
39
38
|
from lightning_sdk.lightning_cloud.openapi import AppsIdBody
|
|
39
|
+
|
|
40
|
+
try:
|
|
41
|
+
from lightning_sdk.lightning_cloud.openapi import UploadsUploadIdBody1 as UploadsUploadIdBody
|
|
42
|
+
except ImportError:
|
|
43
|
+
from lightning_sdk.lightning_cloud.openapi import UploadsUploadIdBody
|
|
44
|
+
|
|
40
45
|
from lightning_sdk.lightning_cloud.openapi.rest import ApiException
|
|
41
46
|
from lightning_sdk.lightning_cloud.rest_client import LightningClient
|
|
42
47
|
from lightning_sdk.machine import Machine
|
|
@@ -90,6 +95,8 @@ class _FileUploader:
|
|
|
90
95
|
unit="B",
|
|
91
96
|
unit_scale=True,
|
|
92
97
|
unit_divisor=1000,
|
|
98
|
+
position=-1,
|
|
99
|
+
mininterval=1,
|
|
93
100
|
)
|
|
94
101
|
else:
|
|
95
102
|
self.progress_bar = None
|
|
@@ -138,7 +145,7 @@ class _FileUploader:
|
|
|
138
145
|
|
|
139
146
|
def _request_urls(self, parts: List[int], upload_id: str) -> List[V1PresignedUrl]:
|
|
140
147
|
"""Requests urls for a batch of parts."""
|
|
141
|
-
body = UploadsUploadIdBody(
|
|
148
|
+
body = UploadsUploadIdBody(filename=self.remote_path, parts=parts, cluster_id=self.cloud_account)
|
|
142
149
|
resp: V1UploadProjectArtifactPartsResponse = self.client.storage_service_upload_project_artifact_parts(
|
|
143
150
|
body, self.teamspace_id, upload_id
|
|
144
151
|
)
|
|
@@ -217,8 +224,9 @@ class _ModelFileUploader:
|
|
|
217
224
|
unit="B",
|
|
218
225
|
unit_scale=True,
|
|
219
226
|
unit_divisor=1000,
|
|
220
|
-
position=1,
|
|
221
227
|
leave=False,
|
|
228
|
+
position=-1,
|
|
229
|
+
mininterval=1,
|
|
222
230
|
)
|
|
223
231
|
else:
|
|
224
232
|
self.progress_bar = None
|
|
@@ -486,8 +494,8 @@ def _get_model_version(client: LightningClient, teamspace_id: str, name: str, ve
|
|
|
486
494
|
raise ValueError(f"Model `{name}` does not exist")
|
|
487
495
|
elif len(models) > 1:
|
|
488
496
|
raise ValueError("Multiple models with the same name found")
|
|
489
|
-
if version == "
|
|
490
|
-
return models[0].
|
|
497
|
+
if version == ("default"):
|
|
498
|
+
return models[0].default_version
|
|
491
499
|
versions = api.models_store_list_model_versions(project_id=teamspace_id, model_id=models[0].id).versions
|
|
492
500
|
if not versions:
|
|
493
501
|
raise ValueError(f"Model `{name}` does not have any versions")
|
|
@@ -520,6 +528,8 @@ def _download_model_files(
|
|
|
520
528
|
total=float(response.size_bytes),
|
|
521
529
|
unit_scale=True,
|
|
522
530
|
unit_divisor=1000,
|
|
531
|
+
position=-1,
|
|
532
|
+
mininterval=1,
|
|
523
533
|
)
|
|
524
534
|
|
|
525
535
|
with ThreadPoolExecutor(max_workers=min(num_workers, len(response.filepaths))) as file_executor, ThreadPoolExecutor(
|
lightning_sdk/cli/ai_hub.py
CHANGED
|
@@ -3,49 +3,6 @@ from typing import Optional
|
|
|
3
3
|
import click
|
|
4
4
|
|
|
5
5
|
from lightning_sdk.ai_hub import AIHub
|
|
6
|
-
from lightning_sdk.cli.studios_menu import _StudiosMenu
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class _AIHub(_StudiosMenu):
|
|
10
|
-
"""Interact with Lightning Studio - AI Hub."""
|
|
11
|
-
|
|
12
|
-
def api_info(self, api_id: str) -> None:
|
|
13
|
-
"""Get full API template info such as input details.
|
|
14
|
-
|
|
15
|
-
Example:
|
|
16
|
-
lightning aihub api_info [API_ID]
|
|
17
|
-
|
|
18
|
-
Args:
|
|
19
|
-
api_id: The ID of the API for which information is requested.
|
|
20
|
-
"""
|
|
21
|
-
return api_info(api_id=api_id)
|
|
22
|
-
|
|
23
|
-
def list_apis(self, search: Optional[str] = None) -> None:
|
|
24
|
-
"""List API templates available in the AI Hub.
|
|
25
|
-
|
|
26
|
-
Args:
|
|
27
|
-
search: Search for API templates by name.
|
|
28
|
-
"""
|
|
29
|
-
return list_apis(search=search)
|
|
30
|
-
|
|
31
|
-
def deploy(
|
|
32
|
-
self,
|
|
33
|
-
api_id: str,
|
|
34
|
-
cloud_account: Optional[str] = None,
|
|
35
|
-
name: Optional[str] = None,
|
|
36
|
-
teamspace: Optional[str] = None,
|
|
37
|
-
org: Optional[str] = None,
|
|
38
|
-
) -> None:
|
|
39
|
-
"""Deploy an API template from the AI Hub.
|
|
40
|
-
|
|
41
|
-
Args:
|
|
42
|
-
api_id: API template ID.
|
|
43
|
-
cloud_account: Cloud Account to deploy the API to. Defaults to user's default cloud account.
|
|
44
|
-
name: Name of the deployed API. Defaults to the name of the API template.
|
|
45
|
-
teamspace: Teamspace to deploy the API to. Defaults to user's default teamspace.
|
|
46
|
-
org: Organization to deploy the API to. Defaults to user's default organization.
|
|
47
|
-
"""
|
|
48
|
-
return deploy(api_id=api_id, cloud_account=cloud_account, name=name, teamspace=teamspace, org=org)
|
|
49
6
|
|
|
50
7
|
|
|
51
8
|
@click.group(name="aihub")
|
|
@@ -53,8 +10,8 @@ def aihub() -> None:
|
|
|
53
10
|
"""Interact with Lightning Studio - AI Hub."""
|
|
54
11
|
|
|
55
12
|
|
|
56
|
-
|
|
57
|
-
|
|
13
|
+
@aihub.command(name="api-info")
|
|
14
|
+
@click.argument("api-id")
|
|
58
15
|
def api_info(api_id: str) -> None:
|
|
59
16
|
"""Get full API template info such as input details.
|
|
60
17
|
|
|
@@ -67,34 +24,42 @@ def api_info(api_id: str) -> None:
|
|
|
67
24
|
ai_hub.api_info(api_id)
|
|
68
25
|
|
|
69
26
|
|
|
70
|
-
|
|
71
|
-
|
|
27
|
+
@aihub.command(name="list-apis")
|
|
28
|
+
@click.option("--search", default=None, help="Search for API templates by name.")
|
|
72
29
|
def list_apis(search: Optional[str]) -> None:
|
|
73
30
|
"""List API templates available in the AI Hub."""
|
|
74
31
|
ai_hub = AIHub()
|
|
75
32
|
ai_hub.list_apis(search=search)
|
|
76
33
|
|
|
77
34
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
35
|
+
@aihub.command(name="deploy")
|
|
36
|
+
@click.argument("api-id")
|
|
37
|
+
@click.option(
|
|
38
|
+
"--cloud-account",
|
|
39
|
+
"--cloud_account",
|
|
40
|
+
default=None,
|
|
41
|
+
help="Cloud Account to deploy the API to. Defaults to user's default cloud account.",
|
|
42
|
+
)
|
|
43
|
+
@click.option("--name", default=None, help="Name of the deployed API. Defaults to the name of the API template.")
|
|
44
|
+
@click.option(
|
|
45
|
+
"--teamspace",
|
|
46
|
+
default=None,
|
|
47
|
+
help="Teamspace to deploy the API to. Defaults to user's default teamspace.",
|
|
48
|
+
)
|
|
49
|
+
@click.option(
|
|
50
|
+
"--org",
|
|
51
|
+
default=None,
|
|
52
|
+
help="Organization to deploy the API to. Defaults to user's default organization.",
|
|
53
|
+
)
|
|
96
54
|
def deploy(
|
|
97
55
|
api_id: str, cloud_account: Optional[str], name: Optional[str], teamspace: Optional[str], org: Optional[str]
|
|
98
56
|
) -> None:
|
|
57
|
+
"""Deploy an API template from the AI Hub.
|
|
58
|
+
|
|
59
|
+
Example:
|
|
60
|
+
lightning aihub deploy API-ID
|
|
61
|
+
|
|
62
|
+
API-ID: The ID of the API which should be deployed.
|
|
63
|
+
"""
|
|
99
64
|
ai_hub = AIHub()
|
|
100
65
|
ai_hub.run(api_id, cloud_account=cloud_account, name=name, teamspace=teamspace, org=org)
|