lightning-sdk 0.1.47__py3-none-any.whl → 0.1.49__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lightning_sdk/__init__.py +3 -1
- lightning_sdk/api/job_api.py +19 -1
- lightning_sdk/api/lit_container_api.py +42 -0
- lightning_sdk/api/mmt_api.py +6 -0
- lightning_sdk/api/utils.py +8 -1
- lightning_sdk/cli/delete.py +58 -0
- lightning_sdk/cli/entrypoint.py +17 -0
- lightning_sdk/cli/inspect.py +31 -0
- lightning_sdk/cli/job_and_mmt_action.py +37 -0
- lightning_sdk/cli/jobs_menu.py +57 -0
- lightning_sdk/cli/list.py +62 -4
- lightning_sdk/cli/mmts_menu.py +57 -0
- lightning_sdk/cli/run.py +22 -3
- lightning_sdk/cli/stop.py +37 -0
- lightning_sdk/cli/upload.py +32 -1
- lightning_sdk/job/base.py +24 -1
- lightning_sdk/job/job.py +34 -9
- lightning_sdk/job/v1.py +1 -1
- lightning_sdk/job/v2.py +16 -11
- lightning_sdk/lightning_cloud/openapi/__init__.py +3 -1
- lightning_sdk/lightning_cloud/openapi/api/lit_registry_service_api.py +124 -23
- lightning_sdk/lightning_cloud/openapi/api/models_store_api.py +250 -8
- lightning_sdk/lightning_cloud/openapi/models/__init__.py +3 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_aws_direct_v1.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_cluster_spec.py +1 -29
- lightning_sdk/lightning_cloud/openapi/models/{v1_delete_container_response.py → v1_delete_lit_repository_response.py} +6 -6
- lightning_sdk/lightning_cloud/openapi/models/v1_google_cloud_direct_v1.py +29 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_lambda_labs_direct_v1.py +31 -3
- lightning_sdk/lightning_cloud/openapi/models/v1_model.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_user_features.py +27 -53
- lightning_sdk/lightning_cloud/openapi/models/v1_vultr_direct_v1.py +81 -3
- lightning_sdk/lightning_cloud/openapi/models/version_default_body.py +149 -0
- lightning_sdk/lightning_cloud/openapi/models/versions_version_body.py +123 -0
- lightning_sdk/lightning_cloud/utils/data_connection.py +1 -0
- lightning_sdk/lit_container.py +78 -0
- lightning_sdk/mmt/base.py +18 -1
- lightning_sdk/mmt/mmt.py +35 -14
- lightning_sdk/mmt/v1.py +8 -2
- lightning_sdk/mmt/v2.py +12 -10
- {lightning_sdk-0.1.47.dist-info → lightning_sdk-0.1.49.dist-info}/METADATA +1 -1
- {lightning_sdk-0.1.47.dist-info → lightning_sdk-0.1.49.dist-info}/RECORD +45 -37
- lightning_sdk/api/lit_registry_api.py +0 -12
- lightning_sdk/lit_registry.py +0 -39
- {lightning_sdk-0.1.47.dist-info → lightning_sdk-0.1.49.dist-info}/LICENSE +0 -0
- {lightning_sdk-0.1.47.dist-info → lightning_sdk-0.1.49.dist-info}/WHEEL +0 -0
- {lightning_sdk-0.1.47.dist-info → lightning_sdk-0.1.49.dist-info}/entry_points.txt +0 -0
- {lightning_sdk-0.1.47.dist-info → lightning_sdk-0.1.49.dist-info}/top_level.txt +0 -0
lightning_sdk/__init__.py
CHANGED
|
@@ -4,6 +4,7 @@ from lightning_sdk.constants import __GLOBAL_LIGHTNING_UNIQUE_IDS_STORE__ # noq
|
|
|
4
4
|
from lightning_sdk.helpers import _check_version_and_prompt_upgrade
|
|
5
5
|
from lightning_sdk.job import Job
|
|
6
6
|
from lightning_sdk.machine import Machine
|
|
7
|
+
from lightning_sdk.mmt import MMT
|
|
7
8
|
from lightning_sdk.organization import Organization
|
|
8
9
|
from lightning_sdk.plugin import JobsPlugin, MultiMachineTrainingPlugin, Plugin, SlurmJobsPlugin
|
|
9
10
|
from lightning_sdk.status import Status
|
|
@@ -15,6 +16,7 @@ __all__ = [
|
|
|
15
16
|
"Job",
|
|
16
17
|
"JobsPlugin",
|
|
17
18
|
"Machine",
|
|
19
|
+
"MMT",
|
|
18
20
|
"MultiMachineTrainingPlugin",
|
|
19
21
|
"Organization",
|
|
20
22
|
"Plugin",
|
|
@@ -27,5 +29,5 @@ __all__ = [
|
|
|
27
29
|
"AIHub",
|
|
28
30
|
]
|
|
29
31
|
|
|
30
|
-
__version__ = "0.1.
|
|
32
|
+
__version__ = "0.1.49"
|
|
31
33
|
_check_version_and_prompt_upgrade(__version__)
|
lightning_sdk/api/job_api.py
CHANGED
|
@@ -180,8 +180,13 @@ class JobApiV1:
|
|
|
180
180
|
|
|
181
181
|
raise RuntimeError("Could not extract command from app")
|
|
182
182
|
|
|
183
|
+
def get_total_cost(self, job: Externalv1LightningappInstance) -> float:
|
|
184
|
+
status: V1LightningappInstanceStatus = job.status
|
|
185
|
+
return status.total_cost
|
|
186
|
+
|
|
183
187
|
|
|
184
188
|
class JobApiV2:
|
|
189
|
+
# these are stages the job can be in.
|
|
185
190
|
v2_job_state_pending = "pending"
|
|
186
191
|
v2_job_state_running = "running"
|
|
187
192
|
v2_job_state_stopped = "stopped"
|
|
@@ -189,6 +194,9 @@ class JobApiV2:
|
|
|
189
194
|
v2_job_state_failed = "failed"
|
|
190
195
|
v2_job_state_stopping = "stopping"
|
|
191
196
|
|
|
197
|
+
# this is the user action to stop the job.
|
|
198
|
+
v2_job_state_stop = "stop"
|
|
199
|
+
|
|
192
200
|
def __init__(self) -> None:
|
|
193
201
|
self._cloud_url = _cloud_url()
|
|
194
202
|
self._client = LightningClient(max_tries=7)
|
|
@@ -262,7 +270,7 @@ class JobApiV2:
|
|
|
262
270
|
return
|
|
263
271
|
|
|
264
272
|
if current_state != Status.Stopping:
|
|
265
|
-
update_body = JobsIdBody1(cloudspace_id=current_job.spec.cloudspace_id, state=self.
|
|
273
|
+
update_body = JobsIdBody1(cloudspace_id=current_job.spec.cloudspace_id, state=self.v2_job_state_stop)
|
|
266
274
|
self._client.jobs_service_update_job(body=update_body, project_id=teamspace_id, id=job_id)
|
|
267
275
|
|
|
268
276
|
while True:
|
|
@@ -302,6 +310,13 @@ class JobApiV2:
|
|
|
302
310
|
def get_command(self, job: V1Job) -> str:
|
|
303
311
|
return job.spec.command
|
|
304
312
|
|
|
313
|
+
def get_mmt_name(self, job: V1Job) -> str:
|
|
314
|
+
if job.multi_machine_job_id:
|
|
315
|
+
splits = job.name.rsplit("-", 1)
|
|
316
|
+
if len(splits) == 2:
|
|
317
|
+
return splits[0]
|
|
318
|
+
return ""
|
|
319
|
+
|
|
305
320
|
def _job_state_to_external(self, state: str) -> "Status":
|
|
306
321
|
from lightning_sdk.status import Status
|
|
307
322
|
|
|
@@ -326,3 +341,6 @@ class JobApiV2:
|
|
|
326
341
|
return _COMPUTE_NAME_TO_MACHINE.get(
|
|
327
342
|
instance_type, _COMPUTE_NAME_TO_MACHINE.get(instance_name, instance_type or instance_name)
|
|
328
343
|
)
|
|
344
|
+
|
|
345
|
+
def get_total_cost(self, job: V1Job) -> float:
|
|
346
|
+
return job.total_cost
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from typing import Generator, List
|
|
2
|
+
|
|
3
|
+
from lightning_sdk.api.utils import _get_registry_url
|
|
4
|
+
from lightning_sdk.lightning_cloud.openapi.models import V1DeleteLitRepositoryResponse
|
|
5
|
+
from lightning_sdk.lightning_cloud.rest_client import LightningClient
|
|
6
|
+
from lightning_sdk.teamspace import Teamspace
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class LitContainerApi:
|
|
10
|
+
def __init__(self) -> None:
|
|
11
|
+
self._client = LightningClient(max_tries=3)
|
|
12
|
+
|
|
13
|
+
def list_containers(self, project_id: str) -> List:
|
|
14
|
+
project = self._client.lit_registry_service_get_lit_project_registry(project_id)
|
|
15
|
+
return project.repositories
|
|
16
|
+
|
|
17
|
+
def delete_container(self, project_id: str, container: str) -> V1DeleteLitRepositoryResponse:
|
|
18
|
+
try:
|
|
19
|
+
return self._client.lit_registry_service_delete_lit_repository(project_id, container)
|
|
20
|
+
except Exception as ex:
|
|
21
|
+
raise ValueError(f"Could not delete container {container} from project {project_id}") from ex
|
|
22
|
+
|
|
23
|
+
def upload_container(self, container: str, teamspace: Teamspace, tag: str) -> Generator[str, None, None]:
|
|
24
|
+
import docker
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
client = docker.from_env()
|
|
28
|
+
client.ping()
|
|
29
|
+
except docker.errors.DockerException as e:
|
|
30
|
+
raise RuntimeError(f"Failed to connect to Docker daemon: {e!s}. Is Docker running?") from None
|
|
31
|
+
|
|
32
|
+
try:
|
|
33
|
+
client.images.get(container)
|
|
34
|
+
except docker.errors.ImageNotFound:
|
|
35
|
+
raise ValueError(f"Container {container} does not exist") from None
|
|
36
|
+
|
|
37
|
+
registry_url = _get_registry_url()
|
|
38
|
+
repository = f"{registry_url}/lit-container/{teamspace.owner.name}/{teamspace.name}/{container}"
|
|
39
|
+
tagged = client.api.tag(container, repository, tag)
|
|
40
|
+
if not tagged:
|
|
41
|
+
raise ValueError(f"Could not tag container {container} with {repository}:{tag}")
|
|
42
|
+
return client.api.push(repository, stream=True, decode=True)
|
lightning_sdk/api/mmt_api.py
CHANGED
|
@@ -203,3 +203,9 @@ class MMTApiV2:
|
|
|
203
203
|
return _COMPUTE_NAME_TO_MACHINE.get(
|
|
204
204
|
instance_type, _COMPUTE_NAME_TO_MACHINE.get(instance_name, instance_type or instance_name)
|
|
205
205
|
)
|
|
206
|
+
|
|
207
|
+
def get_total_cost(self, job: V1MultiMachineJob) -> float:
|
|
208
|
+
return job.total_cost
|
|
209
|
+
|
|
210
|
+
def get_num_machines(self, job: V1MultiMachineJob) -> int:
|
|
211
|
+
return job.machines
|
lightning_sdk/api/utils.py
CHANGED
|
@@ -353,7 +353,8 @@ def _machine_to_compute_name(machine: Union[Machine, str]) -> str:
|
|
|
353
353
|
|
|
354
354
|
_COMPUTE_NAME_TO_MACHINE: Dict[str, Machine] = {v: k for k, v in _MACHINE_TO_COMPUTE_NAME.items()}
|
|
355
355
|
|
|
356
|
-
_DEFAULT_CLOUD_URL = "https://lightning.ai
|
|
356
|
+
_DEFAULT_CLOUD_URL = "https://lightning.ai"
|
|
357
|
+
_DEFAULT_REGISTRY_URL = "litcr.io"
|
|
357
358
|
|
|
358
359
|
|
|
359
360
|
def _get_cloud_url() -> str:
|
|
@@ -362,6 +363,12 @@ def _get_cloud_url() -> str:
|
|
|
362
363
|
return cloud_url
|
|
363
364
|
|
|
364
365
|
|
|
366
|
+
def _get_registry_url() -> str:
|
|
367
|
+
registry_url = os.environ.get("LIGHTNING_REGISTRY_URL", _DEFAULT_REGISTRY_URL)
|
|
368
|
+
os.environ["LIGHTNING_REGISTRY_URL"] = registry_url
|
|
369
|
+
return registry_url
|
|
370
|
+
|
|
371
|
+
|
|
365
372
|
def _sanitize_studio_remote_path(path: str, studio_id: str) -> str:
|
|
366
373
|
return f"/cloudspaces/{studio_id}/code/content/{path.replace('/teamspace/studios/this_studio/', '')}"
|
|
367
374
|
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from lightning_sdk.cli.exceptions import StudioCliError
|
|
4
|
+
from lightning_sdk.cli.job_and_mmt_action import _JobAndMMTAction
|
|
5
|
+
from lightning_sdk.cli.teamspace_menu import _TeamspacesMenu
|
|
6
|
+
from lightning_sdk.lit_container import LitContainer
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class _Delete(_JobAndMMTAction, _TeamspacesMenu):
|
|
10
|
+
"""Delete resources on the Lightning AI platform."""
|
|
11
|
+
|
|
12
|
+
def container(self, container: str, teamspace: Optional[str] = None) -> None:
|
|
13
|
+
"""Delete a docker container.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
container: The name of the container to delete.
|
|
17
|
+
teamspace: The teamspace to delete the container from. Should be specified as {owner}/{name}
|
|
18
|
+
If not provided, can be selected in an interactive menu.
|
|
19
|
+
"""
|
|
20
|
+
api = LitContainer()
|
|
21
|
+
resolved_teamspace = self._resolve_teamspace(teamspace=teamspace)
|
|
22
|
+
try:
|
|
23
|
+
api.delete_container(container, resolved_teamspace.name, resolved_teamspace.owner.name)
|
|
24
|
+
print(f"Container {container} deleted successfully.")
|
|
25
|
+
except Exception as e:
|
|
26
|
+
raise StudioCliError(
|
|
27
|
+
f"Could not delete container {container} from project {resolved_teamspace.name}: {e}"
|
|
28
|
+
) from None
|
|
29
|
+
|
|
30
|
+
def job(self, name: Optional[str] = None, teamspace: Optional[str] = None) -> None:
|
|
31
|
+
"""Delete a job.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
name: the name of the job. If not specified can be selected interactively.
|
|
35
|
+
teamspace: the name of the teamspace the job lives in.
|
|
36
|
+
Should be specified as {teamspace_owner}/{teamspace_name} (e.g my-org/my-teamspace).
|
|
37
|
+
If not specified can be selected interactively.
|
|
38
|
+
|
|
39
|
+
"""
|
|
40
|
+
job = super().job(name=name, teamspace=teamspace)
|
|
41
|
+
|
|
42
|
+
job.delete()
|
|
43
|
+
print(f"Successfully deleted {job.name}!")
|
|
44
|
+
|
|
45
|
+
def mmt(self, name: Optional[str] = None, teamspace: Optional[str] = None) -> None:
|
|
46
|
+
"""Delete a multi-machine job.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
name: the name of the job. If not specified can be selected interactively.
|
|
50
|
+
teamspace: the name of the teamspace the job lives in.
|
|
51
|
+
Should be specified as {teamspace_owner}/{teamspace_name} (e.g my-org/my-teamspace).
|
|
52
|
+
If not specified can be selected interactively.
|
|
53
|
+
|
|
54
|
+
"""
|
|
55
|
+
mmt = super().mmt(name=name, teamspace=teamspace)
|
|
56
|
+
|
|
57
|
+
mmt.delete()
|
|
58
|
+
print(f"Successfully deleted {mmt.name}!")
|
lightning_sdk/cli/entrypoint.py
CHANGED
|
@@ -1,13 +1,20 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
from types import TracebackType
|
|
3
|
+
from typing import Type
|
|
4
|
+
|
|
1
5
|
from fire import Fire
|
|
2
6
|
from lightning_utilities.core.imports import RequirementCache
|
|
3
7
|
|
|
4
8
|
from lightning_sdk.api.studio_api import _cloud_url
|
|
5
9
|
from lightning_sdk.cli.ai_hub import _AIHub
|
|
10
|
+
from lightning_sdk.cli.delete import _Delete
|
|
6
11
|
from lightning_sdk.cli.download import _Downloads
|
|
12
|
+
from lightning_sdk.cli.inspect import _Inspect
|
|
7
13
|
from lightning_sdk.cli.legacy import _LegacyLightningCLI
|
|
8
14
|
from lightning_sdk.cli.list import _List
|
|
9
15
|
from lightning_sdk.cli.run import _Run
|
|
10
16
|
from lightning_sdk.cli.serve import _Docker, _LitServe
|
|
17
|
+
from lightning_sdk.cli.stop import _Stop
|
|
11
18
|
from lightning_sdk.cli.upload import _Uploads
|
|
12
19
|
from lightning_sdk.lightning_cloud.login import Auth
|
|
13
20
|
|
|
@@ -25,6 +32,11 @@ class StudioCLI:
|
|
|
25
32
|
self.serve = _LitServe()
|
|
26
33
|
self.dockerize = _Docker()
|
|
27
34
|
self.list = _List()
|
|
35
|
+
self.delete = _Delete()
|
|
36
|
+
self.inspect = _Inspect()
|
|
37
|
+
self.stop = _Stop()
|
|
38
|
+
|
|
39
|
+
sys.excepthook = _notify_exception
|
|
28
40
|
|
|
29
41
|
def login(self) -> None:
|
|
30
42
|
"""Login to Lightning AI Studios."""
|
|
@@ -42,6 +54,11 @@ class StudioCLI:
|
|
|
42
54
|
auth.clear()
|
|
43
55
|
|
|
44
56
|
|
|
57
|
+
def _notify_exception(exception_type: Type[BaseException], value: BaseException, tb: TracebackType) -> None: # No
|
|
58
|
+
"""CLI won't show tracebacks, just print the exception message."""
|
|
59
|
+
print(value)
|
|
60
|
+
|
|
61
|
+
|
|
45
62
|
def main_cli() -> None:
|
|
46
63
|
"""CLI entrypoint."""
|
|
47
64
|
Fire(StudioCLI(), name="lightning")
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from lightning_sdk.cli.job_and_mmt_action import _JobAndMMTAction
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class _Inspect(_JobAndMMTAction):
|
|
7
|
+
"""Inspect resources of the Lightning AI platform to get additional details as JSON."""
|
|
8
|
+
|
|
9
|
+
def job(self, name: Optional[str] = None, teamspace: Optional[str] = None) -> None:
|
|
10
|
+
"""Inspect a job for further details as JSON.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
name: the name of the job. If not specified can be selected interactively.
|
|
14
|
+
teamspace: the name of the teamspace the job lives in.
|
|
15
|
+
Should be specified as {teamspace_owner}/{teamspace_name} (e.g my-org/my-teamspace).
|
|
16
|
+
If not specified can be selected interactively.
|
|
17
|
+
|
|
18
|
+
"""
|
|
19
|
+
print(super().job(name=name, teamspace=teamspace).json())
|
|
20
|
+
|
|
21
|
+
def mmt(self, name: Optional[str] = None, teamspace: Optional[str] = None) -> None:
|
|
22
|
+
"""Inspect a multi-machine job for further details as JSON.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
name: the name of the job. If not specified can be selected interactively.
|
|
26
|
+
teamspace: the name of the teamspace the job lives in.
|
|
27
|
+
Should be specified as {teamspace_owner}/{teamspace_name} (e.g my-org/my-teamspace).
|
|
28
|
+
If not specified can be selected interactively.
|
|
29
|
+
|
|
30
|
+
"""
|
|
31
|
+
print(super().mmt(name=name, teamspace=teamspace).json())
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from lightning_sdk.cli.jobs_menu import _JobsMenu
|
|
4
|
+
from lightning_sdk.cli.mmts_menu import _MMTsMenu
|
|
5
|
+
from lightning_sdk.cli.teamspace_menu import _TeamspacesMenu
|
|
6
|
+
from lightning_sdk.job import Job
|
|
7
|
+
from lightning_sdk.mmt import MMT
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class _JobAndMMTAction(_TeamspacesMenu, _JobsMenu, _MMTsMenu):
|
|
11
|
+
"""Inspect resources of the Lightning AI platform to get additional details as JSON."""
|
|
12
|
+
|
|
13
|
+
def job(self, name: Optional[str] = None, teamspace: Optional[str] = None) -> Job:
|
|
14
|
+
"""Fetch a job for further processing.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
name: the name of the job. If not specified can be selected interactively.
|
|
18
|
+
teamspace: the name of the teamspace the job lives in.
|
|
19
|
+
Should be specified as {teamspace_owner}/{teamspace_name} (e.g my-org/my-teamspace).
|
|
20
|
+
If not specified can be selected interactively.
|
|
21
|
+
|
|
22
|
+
"""
|
|
23
|
+
resolved_teamspace = self._resolve_teamspace(teamspace)
|
|
24
|
+
return self._resolve_job(name, teamspace=resolved_teamspace)
|
|
25
|
+
|
|
26
|
+
def mmt(self, name: Optional[str] = None, teamspace: Optional[str] = None) -> MMT:
|
|
27
|
+
"""Fetch a multi-machine job for further processing.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
name: the name of the job. If not specified can be selected interactively.
|
|
31
|
+
teamspace: the name of the teamspace the job lives in.
|
|
32
|
+
Should be specified as {teamspace_owner}/{teamspace_name} (e.g my-org/my-teamspace).
|
|
33
|
+
If not specified can be selected interactively.
|
|
34
|
+
|
|
35
|
+
"""
|
|
36
|
+
resolved_teamspace = self._resolve_teamspace(teamspace)
|
|
37
|
+
return self._resolve_mmt(name, teamspace=resolved_teamspace)
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
from typing import Dict, List, Optional
|
|
2
|
+
|
|
3
|
+
from simple_term_menu import TerminalMenu
|
|
4
|
+
|
|
5
|
+
from lightning_sdk.cli.exceptions import StudioCliError
|
|
6
|
+
from lightning_sdk.job import Job
|
|
7
|
+
from lightning_sdk.teamspace import Teamspace
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class _JobsMenu:
|
|
11
|
+
def _get_job_from_interactive_menu(self, possible_jobs: Dict[str, Job]) -> Job:
|
|
12
|
+
job_ids = sorted(possible_jobs.keys())
|
|
13
|
+
terminal_menu = self._prepare_terminal_menu_jobs([possible_jobs[k] for k in job_ids])
|
|
14
|
+
terminal_menu.show()
|
|
15
|
+
|
|
16
|
+
return possible_jobs[terminal_menu.chosen_menu_entry]
|
|
17
|
+
|
|
18
|
+
def _get_job_from_name(self, job: str, possible_jobs: Dict[str, Job]) -> Job:
|
|
19
|
+
for _, j in possible_jobs.items():
|
|
20
|
+
if j.name == job:
|
|
21
|
+
return j
|
|
22
|
+
|
|
23
|
+
print("Could not find Job {job}, please select it from the list:")
|
|
24
|
+
return self._get_job_from_interactive_menu(possible_jobs)
|
|
25
|
+
|
|
26
|
+
@staticmethod
|
|
27
|
+
def _prepare_terminal_menu_jobs(possible_jobs: List[Job], title: Optional[str] = None) -> TerminalMenu:
|
|
28
|
+
if title is None:
|
|
29
|
+
title = "Please select a Job of the following:"
|
|
30
|
+
|
|
31
|
+
return TerminalMenu([j.name for j in possible_jobs], title=title, clear_menu_on_exit=True)
|
|
32
|
+
|
|
33
|
+
@staticmethod
|
|
34
|
+
def _get_possible_jobs(teamspace: Teamspace) -> Dict[str, Job]:
|
|
35
|
+
jobs = {}
|
|
36
|
+
for j in teamspace.jobs:
|
|
37
|
+
jobs[j.name] = j
|
|
38
|
+
|
|
39
|
+
return jobs
|
|
40
|
+
|
|
41
|
+
def _resolve_job(self, job: Optional[str], teamspace: Teamspace) -> Job:
|
|
42
|
+
try:
|
|
43
|
+
possible_jobs = self._get_possible_jobs(teamspace)
|
|
44
|
+
if job is None:
|
|
45
|
+
resolved_job = self._get_job_from_interactive_menu(possible_jobs)
|
|
46
|
+
else:
|
|
47
|
+
resolved_job = self._get_job_from_name(job=job, possible_jobs=possible_jobs)
|
|
48
|
+
|
|
49
|
+
return resolved_job
|
|
50
|
+
except KeyboardInterrupt:
|
|
51
|
+
raise KeyboardInterrupt from None
|
|
52
|
+
|
|
53
|
+
except Exception as e:
|
|
54
|
+
raise StudioCliError(
|
|
55
|
+
f"Could not find the given Job {job} in Teamspace {teamspace.name}. "
|
|
56
|
+
"Please contact Lightning AI directly to resolve this issue."
|
|
57
|
+
) from e
|
lightning_sdk/cli/list.py
CHANGED
|
@@ -4,7 +4,7 @@ from rich.console import Console
|
|
|
4
4
|
from rich.table import Table
|
|
5
5
|
|
|
6
6
|
from lightning_sdk.cli.teamspace_menu import _TeamspacesMenu
|
|
7
|
-
from lightning_sdk.
|
|
7
|
+
from lightning_sdk.lit_container import LitContainer
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class _List(_TeamspacesMenu):
|
|
@@ -20,7 +20,36 @@ class _List(_TeamspacesMenu):
|
|
|
20
20
|
"""
|
|
21
21
|
resolved_teamspace = self._resolve_teamspace(teamspace=teamspace)
|
|
22
22
|
|
|
23
|
-
|
|
23
|
+
jobs = resolved_teamspace.jobs
|
|
24
|
+
|
|
25
|
+
table = Table(
|
|
26
|
+
pad_edge=True,
|
|
27
|
+
)
|
|
28
|
+
table.add_column("Name")
|
|
29
|
+
table.add_column("Teamspace")
|
|
30
|
+
table.add_column("Studio")
|
|
31
|
+
table.add_column("Image")
|
|
32
|
+
table.add_column("Status")
|
|
33
|
+
table.add_column("Machine")
|
|
34
|
+
table.add_column("Total Cost")
|
|
35
|
+
for j in jobs:
|
|
36
|
+
# we know we just fetched these, so no need to refetch
|
|
37
|
+
j._prevent_refetch_latest = True
|
|
38
|
+
j._internal_job._prevent_refetch_latest = True
|
|
39
|
+
|
|
40
|
+
studio = j.studio
|
|
41
|
+
table.add_row(
|
|
42
|
+
j.name,
|
|
43
|
+
f"{j.teamspace.owner.name}/{j.teamspace.name}",
|
|
44
|
+
studio.name if studio else None,
|
|
45
|
+
j.image,
|
|
46
|
+
str(j.status),
|
|
47
|
+
str(j.machine),
|
|
48
|
+
f"{j.total_cost:.3f}",
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
console = Console()
|
|
52
|
+
console.print(table)
|
|
24
53
|
|
|
25
54
|
def mmts(self, teamspace: Optional[str] = None) -> None:
|
|
26
55
|
"""List multi-machine jobs for a given teamspace.
|
|
@@ -32,7 +61,36 @@ class _List(_TeamspacesMenu):
|
|
|
32
61
|
"""
|
|
33
62
|
resolved_teamspace = self._resolve_teamspace(teamspace=teamspace)
|
|
34
63
|
|
|
35
|
-
|
|
64
|
+
jobs = resolved_teamspace.multi_machine_jobs
|
|
65
|
+
|
|
66
|
+
table = Table(pad_edge=True)
|
|
67
|
+
table.add_column("Name")
|
|
68
|
+
table.add_column("Teamspace")
|
|
69
|
+
table.add_column("Studio")
|
|
70
|
+
table.add_column("Image")
|
|
71
|
+
table.add_column("Status")
|
|
72
|
+
table.add_column("Machine")
|
|
73
|
+
table.add_column("Num Machines")
|
|
74
|
+
table.add_column("Total Cost")
|
|
75
|
+
for j in jobs:
|
|
76
|
+
# we know we just fetched these, so no need to refetch
|
|
77
|
+
j._prevent_refetch_latest = True
|
|
78
|
+
j._internal_job._prevent_refetch_latest = True
|
|
79
|
+
|
|
80
|
+
studio = j.studio
|
|
81
|
+
table.add_row(
|
|
82
|
+
j.name,
|
|
83
|
+
f"{j.teamspace.owner.name}/{j.teamspace.name}",
|
|
84
|
+
studio.name if studio else None,
|
|
85
|
+
j.image,
|
|
86
|
+
str(j.status),
|
|
87
|
+
str(j.machine),
|
|
88
|
+
str(j.num_machines),
|
|
89
|
+
str(j.total_cost),
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
console = Console()
|
|
93
|
+
console.print(table)
|
|
36
94
|
|
|
37
95
|
def containers(self, teamspace: Optional[str] = None) -> None:
|
|
38
96
|
"""Display the list of available containers.
|
|
@@ -41,7 +99,7 @@ class _List(_TeamspacesMenu):
|
|
|
41
99
|
teamspace: The teamspace to list containers from. Should be specified as {owner}/{name}
|
|
42
100
|
If not provided, can be selected in an interactive menu.
|
|
43
101
|
"""
|
|
44
|
-
api =
|
|
102
|
+
api = LitContainer()
|
|
45
103
|
resolved_teamspace = self._resolve_teamspace(teamspace=teamspace)
|
|
46
104
|
result = api.list_containers(teamspace=resolved_teamspace.name, org=resolved_teamspace.owner.name)
|
|
47
105
|
table = Table(pad_edge=True, box=None)
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
from typing import Dict, List, Optional
|
|
2
|
+
|
|
3
|
+
from simple_term_menu import TerminalMenu
|
|
4
|
+
|
|
5
|
+
from lightning_sdk.cli.exceptions import StudioCliError
|
|
6
|
+
from lightning_sdk.mmt import MMT
|
|
7
|
+
from lightning_sdk.teamspace import Teamspace
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class _MMTsMenu:
|
|
11
|
+
def _get_mmt_from_interactive_menu(self, possible_mmts: Dict[str, MMT]) -> MMT:
|
|
12
|
+
job_ids = sorted(possible_mmts.keys())
|
|
13
|
+
terminal_menu = self._prepare_terminal_menu_mmts([possible_mmts[k] for k in job_ids])
|
|
14
|
+
terminal_menu.show()
|
|
15
|
+
|
|
16
|
+
return possible_mmts[terminal_menu.chosen_menu_entry]
|
|
17
|
+
|
|
18
|
+
def _get_mmt_from_name(self, mmt: str, possible_mmts: Dict[str, MMT]) -> MMT:
|
|
19
|
+
for _, j in possible_mmts.items():
|
|
20
|
+
if j.name == mmt:
|
|
21
|
+
return j
|
|
22
|
+
|
|
23
|
+
print("Could not find Multi-Machine Job {mmt}, please select it from the list:")
|
|
24
|
+
return self._get_mmt_from_interactive_menu(possible_mmts)
|
|
25
|
+
|
|
26
|
+
@staticmethod
|
|
27
|
+
def _prepare_terminal_menu_mmts(possible_mmts: List[MMT], title: Optional[str] = None) -> TerminalMenu:
|
|
28
|
+
if title is None:
|
|
29
|
+
title = "Please select a Multi-Machine Job of the following:"
|
|
30
|
+
|
|
31
|
+
return TerminalMenu([m.name for m in possible_mmts], title=title, clear_menu_on_exit=True)
|
|
32
|
+
|
|
33
|
+
@staticmethod
|
|
34
|
+
def _get_possible_mmts(teamspace: Teamspace) -> Dict[str, MMT]:
|
|
35
|
+
jobs = {}
|
|
36
|
+
for j in teamspace.multi_machine_jobs:
|
|
37
|
+
jobs[j.name] = j
|
|
38
|
+
|
|
39
|
+
return jobs
|
|
40
|
+
|
|
41
|
+
def _resolve_mmt(self, mmt: Optional[str], teamspace: Teamspace) -> MMT:
|
|
42
|
+
try:
|
|
43
|
+
possible_mmts = self._get_possible_mmts(teamspace)
|
|
44
|
+
if mmt is None:
|
|
45
|
+
resolved_mmt = self._get_mmt_from_interactive_menu(possible_mmts)
|
|
46
|
+
else:
|
|
47
|
+
resolved_mmt = self._get_mmt_from_name(mmt=mmt, possible_mmts=possible_mmts)
|
|
48
|
+
|
|
49
|
+
return resolved_mmt
|
|
50
|
+
except KeyboardInterrupt:
|
|
51
|
+
raise KeyboardInterrupt from None
|
|
52
|
+
|
|
53
|
+
except Exception as e:
|
|
54
|
+
raise StudioCliError(
|
|
55
|
+
f"Could not find the given Multi-Machine-Job {mmt} in Teamspace {teamspace.name}. "
|
|
56
|
+
"Please contact Lightning AI directly to resolve this issue."
|
|
57
|
+
) from e
|
lightning_sdk/cli/run.py
CHANGED
|
@@ -53,6 +53,10 @@ class _Run:
|
|
|
53
53
|
within it.
|
|
54
54
|
Note that the connection needs to be added to the teamspace already in order for it to be found.
|
|
55
55
|
Only supported for jobs with a docker image compute environment.
|
|
56
|
+
entrypoint: The entrypoint of your docker container. Defaults to `sh -c` which
|
|
57
|
+
just runs the provided command in a standard shell.
|
|
58
|
+
To use the pre-defined entrypoint of the provided image, set this to an empty string.
|
|
59
|
+
Only applicable when submitting docker jobs.
|
|
56
60
|
"""
|
|
57
61
|
# TODO: the docstrings from artifacts_local and artifacts_remote don't show up completely,
|
|
58
62
|
# might need to switch to explicit cli definition
|
|
@@ -93,6 +97,10 @@ class _Run:
|
|
|
93
97
|
within it.
|
|
94
98
|
Note that the connection needs to be added to the teamspace already in order for it to be found.
|
|
95
99
|
Only supported for jobs with a docker image compute environment.
|
|
100
|
+
entrypoint: The entrypoint of your docker container. Defaults to `sh -c` which
|
|
101
|
+
just runs the provided command in a standard shell.
|
|
102
|
+
To use the pre-defined entrypoint of the provided image, set this to an empty string.
|
|
103
|
+
Only applicable when submitting docker jobs.
|
|
96
104
|
"""
|
|
97
105
|
# TODO: the docstrings from artifacts_local and artifacts_remote don't show up completely,
|
|
98
106
|
# might need to switch to explicit cli definition
|
|
@@ -103,7 +111,7 @@ class _Run:
|
|
|
103
111
|
# might need to move to different cli library
|
|
104
112
|
def job(
|
|
105
113
|
self,
|
|
106
|
-
name: str,
|
|
114
|
+
name: Optional[str] = None,
|
|
107
115
|
machine: Optional[str] = None,
|
|
108
116
|
command: Optional[str] = None,
|
|
109
117
|
studio: Optional[str] = None,
|
|
@@ -118,17 +126,25 @@ class _Run:
|
|
|
118
126
|
cloud_account_auth: bool = False,
|
|
119
127
|
artifacts_local: Optional[str] = None,
|
|
120
128
|
artifacts_remote: Optional[str] = None,
|
|
129
|
+
entrypoint: str = "sh -c",
|
|
121
130
|
) -> None:
|
|
131
|
+
if not name:
|
|
132
|
+
from datetime import datetime
|
|
133
|
+
|
|
134
|
+
timestr = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
|
|
135
|
+
name = f"job-{timestr}"
|
|
136
|
+
|
|
122
137
|
if machine is None:
|
|
123
138
|
# TODO: infer from studio
|
|
124
139
|
machine = "CPU"
|
|
125
|
-
machine_enum = Machine
|
|
140
|
+
machine_enum = Machine[machine.upper()]
|
|
126
141
|
|
|
127
142
|
resolved_teamspace = Teamspace(name=teamspace, org=org, user=user)
|
|
128
143
|
|
|
129
144
|
if cloud_account is None:
|
|
130
145
|
cloud_account = resolved_teamspace.default_cloud_account
|
|
131
146
|
machine_enum = Machine(machine.upper())
|
|
147
|
+
|
|
132
148
|
Job.run(
|
|
133
149
|
name=name,
|
|
134
150
|
machine=machine_enum,
|
|
@@ -145,6 +161,7 @@ class _Run:
|
|
|
145
161
|
cloud_account_auth=cloud_account_auth,
|
|
146
162
|
artifacts_local=artifacts_local,
|
|
147
163
|
artifacts_remote=artifacts_remote,
|
|
164
|
+
entrypoint=entrypoint,
|
|
148
165
|
)
|
|
149
166
|
|
|
150
167
|
# TODO: sadly, fire displays both Optional[type] and Union[type, None] as Optional[Optional]
|
|
@@ -167,6 +184,7 @@ class _Run:
|
|
|
167
184
|
cloud_account_auth: bool = False,
|
|
168
185
|
artifacts_local: Optional[str] = None,
|
|
169
186
|
artifacts_remote: Optional[str] = None,
|
|
187
|
+
entrypoint: str = "sh -c",
|
|
170
188
|
) -> None:
|
|
171
189
|
if name is None:
|
|
172
190
|
from datetime import datetime
|
|
@@ -177,7 +195,7 @@ class _Run:
|
|
|
177
195
|
if machine is None:
|
|
178
196
|
# TODO: infer from studio
|
|
179
197
|
machine = "CPU"
|
|
180
|
-
machine_enum = Machine
|
|
198
|
+
machine_enum = Machine[machine.upper()]
|
|
181
199
|
|
|
182
200
|
resolved_teamspace = Teamspace(name=teamspace, org=org, user=user)
|
|
183
201
|
if cloud_account is None:
|
|
@@ -203,4 +221,5 @@ class _Run:
|
|
|
203
221
|
cloud_account_auth=cloud_account_auth,
|
|
204
222
|
artifacts_local=artifacts_local,
|
|
205
223
|
artifacts_remote=artifacts_remote,
|
|
224
|
+
entrypoint=entrypoint,
|
|
206
225
|
)
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from lightning_sdk.cli.job_and_mmt_action import _JobAndMMTAction
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class _Stop(_JobAndMMTAction):
|
|
7
|
+
"""Stop resources on the Lightning AI platform."""
|
|
8
|
+
|
|
9
|
+
def job(self, name: Optional[str] = None, teamspace: Optional[str] = None) -> None:
|
|
10
|
+
"""Stop a job.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
name: the name of the job. If not specified can be selected interactively.
|
|
14
|
+
teamspace: the name of the teamspace the job lives in.
|
|
15
|
+
Should be specified as {teamspace_owner}/{teamspace_name} (e.g my-org/my-teamspace).
|
|
16
|
+
If not specified can be selected interactively.
|
|
17
|
+
|
|
18
|
+
"""
|
|
19
|
+
job = super().job(name=name, teamspace=teamspace)
|
|
20
|
+
|
|
21
|
+
job.stop()
|
|
22
|
+
print(f"Successfully stopped {job.name}!")
|
|
23
|
+
|
|
24
|
+
def mmt(self, name: Optional[str] = None, teamspace: Optional[str] = None) -> None:
|
|
25
|
+
"""Stop a multi-machine job.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
name: the name of the job. If not specified can be selected interactively.
|
|
29
|
+
teamspace: the name of the teamspace the job lives in.
|
|
30
|
+
Should be specified as {teamspace_owner}/{teamspace_name} (e.g my-org/my-teamspace).
|
|
31
|
+
If not specified can be selected interactively.
|
|
32
|
+
|
|
33
|
+
"""
|
|
34
|
+
mmt = super().mmt(name=name, teamspace=teamspace)
|
|
35
|
+
|
|
36
|
+
mmt.stop()
|
|
37
|
+
print(f"Successfully stopped {mmt.name}!")
|