lightning-sdk 0.1.42__py3-none-any.whl → 0.1.44__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lightning_sdk/__init__.py +1 -1
- lightning_sdk/api/job_api.py +35 -0
- lightning_sdk/api/utils.py +8 -0
- lightning_sdk/cli/run.py +113 -4
- lightning_sdk/cli/serve.py +102 -14
- lightning_sdk/job/base.py +10 -0
- lightning_sdk/job/job.py +28 -4
- lightning_sdk/job/v1.py +5 -0
- lightning_sdk/job/v2.py +18 -0
- lightning_sdk/job/work.py +10 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_membership.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_multi_machine_job_state.py +1 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_project_membership.py +27 -1
- lightning_sdk/mmt/__init__.py +2 -1
- lightning_sdk/mmt/base.py +117 -15
- lightning_sdk/mmt/mmt.py +114 -22
- lightning_sdk/mmt/v1.py +56 -0
- lightning_sdk/mmt/v2.py +57 -0
- lightning_sdk/plugin.py +28 -23
- {lightning_sdk-0.1.42.dist-info → lightning_sdk-0.1.44.dist-info}/METADATA +2 -1
- {lightning_sdk-0.1.42.dist-info → lightning_sdk-0.1.44.dist-info}/RECORD +25 -26
- {lightning_sdk-0.1.42.dist-info → lightning_sdk-0.1.44.dist-info}/entry_points.txt +0 -1
- lightning_sdk/cli/mmt.py +0 -138
- {lightning_sdk-0.1.42.dist-info → lightning_sdk-0.1.44.dist-info}/LICENSE +0 -0
- {lightning_sdk-0.1.42.dist-info → lightning_sdk-0.1.44.dist-info}/WHEEL +0 -0
- {lightning_sdk-0.1.42.dist-info → lightning_sdk-0.1.44.dist-info}/top_level.txt +0 -0
lightning_sdk/mmt/v2.py
CHANGED
|
@@ -15,6 +15,8 @@ from lightning_sdk.mmt.base import _BaseMMT
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class _MMTV2(_BaseMMT):
|
|
18
|
+
"""New implementation of Multi-Machine Training."""
|
|
19
|
+
|
|
18
20
|
def __init__(
|
|
19
21
|
self,
|
|
20
22
|
name: str,
|
|
@@ -24,6 +26,15 @@ class _MMTV2(_BaseMMT):
|
|
|
24
26
|
*,
|
|
25
27
|
_fetch_job: bool = True,
|
|
26
28
|
) -> None:
|
|
29
|
+
"""Fetch already existing jobs.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
name: the name of the job
|
|
33
|
+
teamspace: the teamspace the job is part of
|
|
34
|
+
org: the name of the organization owning the :param`teamspace` in case it is owned by an org
|
|
35
|
+
user: the name of the user owning the :param`teamspace`
|
|
36
|
+
in case it is owned directly by a user instead of an org.
|
|
37
|
+
"""
|
|
27
38
|
self._job_api = MMTApiV2()
|
|
28
39
|
super().__init__(name=name, teamspace=teamspace, org=org, user=user, _fetch_job=_fetch_job)
|
|
29
40
|
|
|
@@ -42,6 +53,35 @@ class _MMTV2(_BaseMMT):
|
|
|
42
53
|
artifacts_local: Optional[str] = None,
|
|
43
54
|
artifacts_remote: Optional[str] = None,
|
|
44
55
|
) -> "_MMTV2":
|
|
56
|
+
"""Submit a new multi-machine job to the Lightning AI platform.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
num_machines: The number of machines to run on.
|
|
60
|
+
machine: The machine type to run the job on. One of {", ".join(_MACHINE_VALUES)}.
|
|
61
|
+
command: The command to run inside your job. Required if using a studio. Optional if using an image.
|
|
62
|
+
If not provided for images, will run the container entrypoint and default command.
|
|
63
|
+
studio: The studio env to run the job with. Mutually exclusive with image.
|
|
64
|
+
image: The docker image to run the job with. Mutually exclusive with studio.
|
|
65
|
+
env: Environment variables to set inside the job.
|
|
66
|
+
interruptible: Whether the job should run on interruptible instances. They are cheaper but can be preempted.
|
|
67
|
+
cloud_account: The cloud account to run the job on.
|
|
68
|
+
Defaults to the studio cloud account if running with studio compute env.
|
|
69
|
+
If not provided will fall back to the teamspaces default cloud account.
|
|
70
|
+
image_credentials: The credentials used to pull the image. Required if the image is private.
|
|
71
|
+
This should be the name of the respective credentials secret created on the Lightning AI platform.
|
|
72
|
+
cloud_account_auth: Whether to authenticate with the cloud account to pull the image.
|
|
73
|
+
Required if the registry is part of a cloud provider (e.g. ECR).
|
|
74
|
+
artifacts_local: The path of inside the docker container, you want to persist images from.
|
|
75
|
+
CAUTION: When setting this to "/", it will effectively erase your container.
|
|
76
|
+
Only supported for jobs with a docker image compute environment.
|
|
77
|
+
artifacts_remote: The remote storage to persist your artifacts to.
|
|
78
|
+
Should be of format <CONNECTION_TYPE>:<CONNECTION_NAME>:<PATH_WITHIN_CONNECTION>.
|
|
79
|
+
PATH_WITHIN_CONNECTION hereby is a path relative to the connection's root.
|
|
80
|
+
E.g. efs:data:some-path would result in an EFS connection named `data` and to the path `some-path`
|
|
81
|
+
within it.
|
|
82
|
+
Note that the connection needs to be added to the teamspace already in order for it to be found.
|
|
83
|
+
Only supported for jobs with a docker image compute environment.
|
|
84
|
+
"""
|
|
45
85
|
# Command is required if Studio is provided to know what to run
|
|
46
86
|
# Image is mutually exclusive with Studio
|
|
47
87
|
# Command is optional for Image
|
|
@@ -80,6 +120,7 @@ class _MMTV2(_BaseMMT):
|
|
|
80
120
|
|
|
81
121
|
@property
|
|
82
122
|
def machines(self) -> Tuple["Job", ...]:
|
|
123
|
+
"""Returns the sub-jobs for each individual instance."""
|
|
83
124
|
from lightning_sdk.job import Job
|
|
84
125
|
|
|
85
126
|
return tuple(
|
|
@@ -88,9 +129,14 @@ class _MMTV2(_BaseMMT):
|
|
|
88
129
|
)
|
|
89
130
|
|
|
90
131
|
def stop(self) -> None:
|
|
132
|
+
"""Stops the job."""
|
|
91
133
|
self._job_api.stop_job(job_id=self._guaranteed_job.id, teamspace_id=self._teamspace.id)
|
|
92
134
|
|
|
93
135
|
def delete(self) -> None:
|
|
136
|
+
"""Deletes the job.
|
|
137
|
+
|
|
138
|
+
Caution: This also deletes all artifacts and snapshots associated with the job.
|
|
139
|
+
"""
|
|
94
140
|
self._job_api.delete_job(
|
|
95
141
|
job_id=self._guaranteed_job.id,
|
|
96
142
|
teamspace_id=self._teamspace.id,
|
|
@@ -104,20 +150,24 @@ class _MMTV2(_BaseMMT):
|
|
|
104
150
|
|
|
105
151
|
@property
|
|
106
152
|
def status(self) -> "Status":
|
|
153
|
+
"""The current status of the job."""
|
|
107
154
|
return self._job_api._job_state_to_external(self._latest_job.state)
|
|
108
155
|
|
|
109
156
|
@property
|
|
110
157
|
def artifact_path(self) -> Optional[str]:
|
|
158
|
+
"""Path to the artifacts created by the job within the distributed teamspace filesystem."""
|
|
111
159
|
# TODO: Since grouping for those is not done yet on the BE, we cannot yet have a unified link here
|
|
112
160
|
raise NotImplementedError
|
|
113
161
|
|
|
114
162
|
@property
|
|
115
163
|
def snapshot_path(self) -> Optional[str]:
|
|
164
|
+
"""Path to the studio snapshot used to create the job within the distributed teamspace filesystem."""
|
|
116
165
|
# TODO: Since grouping for those is not done yet on the BE, we cannot yet have a unified link here
|
|
117
166
|
raise NotImplementedError
|
|
118
167
|
|
|
119
168
|
@property
|
|
120
169
|
def machine(self) -> "Machine":
|
|
170
|
+
"""Returns the machine type this job is running on."""
|
|
121
171
|
return self._job_api._get_job_machine_from_spec(self._guaranteed_job.spec)
|
|
122
172
|
|
|
123
173
|
def _update_internal_job(self) -> None:
|
|
@@ -129,8 +179,15 @@ class _MMTV2(_BaseMMT):
|
|
|
129
179
|
|
|
130
180
|
@property
|
|
131
181
|
def name(self) -> str:
|
|
182
|
+
"""The job's name."""
|
|
132
183
|
return self._name
|
|
133
184
|
|
|
134
185
|
@property
|
|
135
186
|
def teamspace(self) -> "Teamspace":
|
|
187
|
+
"""The teamspace the job is part of."""
|
|
136
188
|
return self._teamspace
|
|
189
|
+
|
|
190
|
+
@property
|
|
191
|
+
def link(self) -> str:
|
|
192
|
+
# TODO: Since we don't have a UI for this yet, we can't have a link
|
|
193
|
+
raise NotImplementedError
|
lightning_sdk/plugin.py
CHANGED
|
@@ -3,7 +3,8 @@ import logging
|
|
|
3
3
|
import os
|
|
4
4
|
import warnings
|
|
5
5
|
from abc import ABC, abstractmethod
|
|
6
|
-
from
|
|
6
|
+
from contextlib import contextmanager
|
|
7
|
+
from typing import TYPE_CHECKING, Any, Generator, Optional, Protocol, Union, runtime_checkable
|
|
7
8
|
|
|
8
9
|
from lightning_sdk.job import Job
|
|
9
10
|
from lightning_sdk.machine import Machine
|
|
@@ -16,6 +17,7 @@ from lightning_sdk.utils.resolve import (
|
|
|
16
17
|
|
|
17
18
|
if TYPE_CHECKING:
|
|
18
19
|
from lightning_sdk.lightning_cloud.openapi import Externalv1LightningappInstance
|
|
20
|
+
from lightning_sdk.mmt import MMT
|
|
19
21
|
|
|
20
22
|
_logger = _setup_logger(__name__)
|
|
21
23
|
|
|
@@ -141,7 +143,7 @@ class JobsPlugin(_Plugin):
|
|
|
141
143
|
|
|
142
144
|
machine = _resolve_deprecated_cloud_compute(machine, cloud_compute)
|
|
143
145
|
|
|
144
|
-
|
|
146
|
+
return Job.run(
|
|
145
147
|
name=name,
|
|
146
148
|
machine=machine,
|
|
147
149
|
command=command,
|
|
@@ -151,10 +153,6 @@ class JobsPlugin(_Plugin):
|
|
|
151
153
|
interruptible=interruptible,
|
|
152
154
|
)
|
|
153
155
|
|
|
154
|
-
_logger.info(_success_message(job, self))
|
|
155
|
-
|
|
156
|
-
return job
|
|
157
|
-
|
|
158
156
|
|
|
159
157
|
class MultiMachineTrainingPlugin(_Plugin):
|
|
160
158
|
"""Plugin handling multi-machine-training jobs."""
|
|
@@ -170,7 +168,7 @@ class MultiMachineTrainingPlugin(_Plugin):
|
|
|
170
168
|
cloud_compute: Optional[Machine] = None,
|
|
171
169
|
num_instances: int = 2,
|
|
172
170
|
interruptible: bool = False,
|
|
173
|
-
) ->
|
|
171
|
+
) -> "MMT":
|
|
174
172
|
"""Launches an asynchronous multi-machine-training.
|
|
175
173
|
|
|
176
174
|
Args:
|
|
@@ -188,20 +186,16 @@ class MultiMachineTrainingPlugin(_Plugin):
|
|
|
188
186
|
|
|
189
187
|
machine = _resolve_deprecated_cloud_compute(machine, cloud_compute)
|
|
190
188
|
|
|
191
|
-
MMT
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
)
|
|
202
|
-
|
|
203
|
-
MMT._force_v1 = False
|
|
204
|
-
return mmt
|
|
189
|
+
with forced_v1(MMT) as v1mmt:
|
|
190
|
+
return v1mmt.run(
|
|
191
|
+
name=name,
|
|
192
|
+
num_machines=num_instances,
|
|
193
|
+
machine=machine,
|
|
194
|
+
command=command,
|
|
195
|
+
studio=self._studio,
|
|
196
|
+
teamspace=self._studio.teamspace,
|
|
197
|
+
interruptible=interruptible,
|
|
198
|
+
)
|
|
205
199
|
|
|
206
200
|
|
|
207
201
|
class MultiMachineDataPrepPlugin(_Plugin):
|
|
@@ -245,7 +239,8 @@ class MultiMachineDataPrepPlugin(_Plugin):
|
|
|
245
239
|
interruptible=interruptible,
|
|
246
240
|
)
|
|
247
241
|
|
|
248
|
-
|
|
242
|
+
with forced_v1(Job) as v1_job:
|
|
243
|
+
return v1_job(resp.name, self._studio.teamspace)
|
|
249
244
|
|
|
250
245
|
|
|
251
246
|
class InferenceServerPlugin(_Plugin):
|
|
@@ -293,7 +288,8 @@ class InferenceServerPlugin(_Plugin):
|
|
|
293
288
|
)
|
|
294
289
|
|
|
295
290
|
_logger.info(_success_message(resp, self))
|
|
296
|
-
|
|
291
|
+
with forced_v1(Job) as v1_job:
|
|
292
|
+
return v1_job(resp.name, self._studio.teamspace)
|
|
297
293
|
|
|
298
294
|
|
|
299
295
|
class SlurmJobsPlugin(_Plugin):
|
|
@@ -430,3 +426,12 @@ def _run_name(plugin_type: str) -> str:
|
|
|
430
426
|
def _success_message(resp: Union["Externalv1LightningappInstance", Job], plugin_instance: _RunnablePlugin) -> str:
|
|
431
427
|
"""Compiles the success message for a given runnable plugin."""
|
|
432
428
|
return f"{plugin_instance._plugin_run_name} {resp.name} was successfully launched. View it at https://lightning.ai/{plugin_instance._studio.owner.name}/{plugin_instance._studio.teamspace.name}/studios/{plugin_instance.studio}/app?app_id={plugin_instance._slug_name}&job_name={resp.name}"
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
@contextmanager
|
|
432
|
+
def forced_v1(cls: Any) -> Generator[Any, None, None]:
|
|
433
|
+
"""Forces to use the v1 version of a class when using a class with multiple backends."""
|
|
434
|
+
orig_val = getattr(cls, "_force_v1", False)
|
|
435
|
+
cls._force_v1 = True
|
|
436
|
+
yield cls
|
|
437
|
+
cls._force_v1 = orig_val
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: lightning_sdk
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.44
|
|
4
4
|
Summary: SDK to develop using Lightning AI Studios
|
|
5
5
|
Author-email: Lightning-AI <justus@lightning.ai>
|
|
6
6
|
License: MIT License
|
|
@@ -47,6 +47,7 @@ Requires-Dist: simple-term-menu
|
|
|
47
47
|
Requires-Dist: lightning-utilities
|
|
48
48
|
Provides-Extra: serve
|
|
49
49
|
Requires-Dist: litserve>=0.2.5; extra == "serve"
|
|
50
|
+
Requires-Dist: docker; extra == "serve"
|
|
50
51
|
|
|
51
52
|
# Lightning SDK
|
|
52
53
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
docs/source/conf.py,sha256=r8yX20eC-4mHhMTd0SbQb5TlSWHhO6wnJ0VJ_FBFpag,13249
|
|
2
|
-
lightning_sdk/__init__.py,sha256=
|
|
2
|
+
lightning_sdk/__init__.py,sha256=XJlg9TRR7l1VeCSLkPRsB0dOcFAU6kcMciUwmYdtojQ,925
|
|
3
3
|
lightning_sdk/agents.py,sha256=ly6Ma1j0ZgGPFyvPvMN28JWiB9dATIstFa5XM8pMi6I,1577
|
|
4
4
|
lightning_sdk/ai_hub.py,sha256=kBjtmrzVHPCgqtV_TrSNkuf4oT2DLm8SYRTz4iTQmmY,6624
|
|
5
5
|
lightning_sdk/constants.py,sha256=ztl1PTUBULnqTf3DyKUSJaV_O20hNtUYT6XvAYIrmIk,749
|
|
@@ -8,7 +8,7 @@ lightning_sdk/machine.py,sha256=VdFXStR6ilYBEYuxgGWzcAw2TtW-nEQVsh6hz-2aaEw,750
|
|
|
8
8
|
lightning_sdk/models.py,sha256=d27VAYUcbWKd4kuL_CqwCi3IguyjmKUR9EVWfXWTwmc,5606
|
|
9
9
|
lightning_sdk/organization.py,sha256=WCfzdgjtvY1_A07DnxOpp74V2JR2gQwtXbIEcFDnoVU,1232
|
|
10
10
|
lightning_sdk/owner.py,sha256=t5svD2it4C9pbSpVuG9WJL46CYi37JXNziwnXxhiU5U,1361
|
|
11
|
-
lightning_sdk/plugin.py,sha256=
|
|
11
|
+
lightning_sdk/plugin.py,sha256=nWFL1l6Q9DE8Lr2krD5qRhZljwmYm00R2eR1tYRb20s,14902
|
|
12
12
|
lightning_sdk/status.py,sha256=kLDhN4-zdsGuZM577JMl1BbUIoF61bUOadW89ZAATFA,219
|
|
13
13
|
lightning_sdk/studio.py,sha256=lezGs111RUFejLWgp4Urov5l6uiUmYJjtRK8D8EYFU8,17198
|
|
14
14
|
lightning_sdk/teamspace.py,sha256=dKT-WrYF2xGP1C1bjOY2aYlEpkrvYkz2fXvtYVgogwo,11508
|
|
@@ -17,32 +17,31 @@ lightning_sdk/api/__init__.py,sha256=Qn2VVRvir_gO7w4yxGLkZY-R3T7kdiTPKgQ57BhIA9k
|
|
|
17
17
|
lightning_sdk/api/agents_api.py,sha256=G47TbFo9kYqnBMqdw2RW-lfS1VAUBSXDmzs6fpIEMUs,4059
|
|
18
18
|
lightning_sdk/api/ai_hub_api.py,sha256=CYQLFLA89m3xQ-6Ss3UX4TDK6ZWRwmPGA5DjyJqW3RM,5578
|
|
19
19
|
lightning_sdk/api/deployment_api.py,sha256=T480Nej7LqmtkAx8SBkPGQ5JxeyQ-GVIDqUCc7Z1yfk,21448
|
|
20
|
-
lightning_sdk/api/job_api.py,sha256=
|
|
20
|
+
lightning_sdk/api/job_api.py,sha256=KgXl0uO32Ja0AvxOASh-LihUPGERq2Fwy1rovXnq5Sg,11074
|
|
21
21
|
lightning_sdk/api/mmt_api.py,sha256=texQJqSjbQNpfLLrumpvZ0MauPjmBlJAc8oSk3i46wk,6569
|
|
22
22
|
lightning_sdk/api/org_api.py,sha256=Ze3z_ATVrukobujV5YdC42DKj45Vuwl7X52q_Vr-o3U,803
|
|
23
23
|
lightning_sdk/api/studio_api.py,sha256=Cfsq8HFc4uUsj8hncnhnD_TLhw0cg-ryclGowj8S6Y0,26374
|
|
24
24
|
lightning_sdk/api/teamspace_api.py,sha256=NYhD-Br2NIIn-1Noc8Q94TzWxEwFM1qCkf9RZhOqQu0,10321
|
|
25
25
|
lightning_sdk/api/user_api.py,sha256=sL7RIjjtmZmvCZWx7BBZslhj1BeNh4Idn-RVcdmf7M0,2598
|
|
26
|
-
lightning_sdk/api/utils.py,sha256=
|
|
26
|
+
lightning_sdk/api/utils.py,sha256=GbfTdqW1yV-fjyEwVebsbAdKQQjRmwT_fXUamrcnwAY,22534
|
|
27
27
|
lightning_sdk/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
28
28
|
lightning_sdk/cli/ai_hub.py,sha256=8oy6TogDiWnHuLT3cv33XEW7vPqXPA0dDMds8kX3Z4g,1649
|
|
29
29
|
lightning_sdk/cli/download.py,sha256=nyQN3q1vZ0fg4_cfit8cKaokQ9VUd46l_TNcAQWkLwU,5996
|
|
30
30
|
lightning_sdk/cli/entrypoint.py,sha256=Hl2Lm7-OS0kx_pyJyGe7Nii0Soc6HYe4r4xXKeJuC_o,1507
|
|
31
31
|
lightning_sdk/cli/exceptions.py,sha256=QUF3OMAMZwBikvlusimSHSBjb6ywvHpfAumJBEaodSw,169
|
|
32
32
|
lightning_sdk/cli/legacy.py,sha256=ocTVNwlsLRS5aMjbMkwFPjT3uEYvS8C40CJ0PeRRv8g,4707
|
|
33
|
-
lightning_sdk/cli/
|
|
34
|
-
lightning_sdk/cli/
|
|
35
|
-
lightning_sdk/cli/serve.py,sha256=dfhbxNscaDJijJSXxpqRKZoI-eGvaIVWKoqTsg_xZWk,4619
|
|
33
|
+
lightning_sdk/cli/run.py,sha256=B6ttd9SKg373ngug-lj74CEcuEoxwz-P6nUBVnQeijI,10836
|
|
34
|
+
lightning_sdk/cli/serve.py,sha256=UaXhGHU6nbAzrnVigSKOTrMjLwSs-sjyhuJCdVUBwzc,8722
|
|
36
35
|
lightning_sdk/cli/studios_menu.py,sha256=0kQGqGel8gAbpdJtjOM1a6NEat_TnIqRNprNn8QiK58,3236
|
|
37
36
|
lightning_sdk/cli/upload.py,sha256=H9OyipYTYAQ9Mzy2e8jtoaa-B34-uXHbTQTzY2Vmhv4,9078
|
|
38
37
|
lightning_sdk/deployment/__init__.py,sha256=BLu7_cVLp97TYxe6qe-J1zKUSZXAVcvCjgcA7plV2k4,497
|
|
39
38
|
lightning_sdk/deployment/deployment.py,sha256=Dp15pn8rFAfMfaDhKn0v3bphFuvLgkPFs3KSNxW6eyc,15472
|
|
40
39
|
lightning_sdk/job/__init__.py,sha256=1MxjQ6rHkyUHCypSW9RuXuVMVH11WiqhIXcU2LCFMwE,64
|
|
41
|
-
lightning_sdk/job/base.py,sha256=
|
|
42
|
-
lightning_sdk/job/job.py,sha256=
|
|
43
|
-
lightning_sdk/job/v1.py,sha256=
|
|
44
|
-
lightning_sdk/job/v2.py,sha256=
|
|
45
|
-
lightning_sdk/job/work.py,sha256=
|
|
40
|
+
lightning_sdk/job/base.py,sha256=WHd2jz1qEeQWg2ljphlJMwoEVU6qtSBjnO9hMScLF7E,13383
|
|
41
|
+
lightning_sdk/job/job.py,sha256=8Bch1TCKjf4jxf46tVN64_k_ZIq677kblHLJHQJO8v4,11536
|
|
42
|
+
lightning_sdk/job/v1.py,sha256=Ff1iTvZqa1J90eIex6_xDjwAxJztHtUSTmjihFwq5Vg,9060
|
|
43
|
+
lightning_sdk/job/v2.py,sha256=mLEgSbog3QiLKsk1-9pWNWA_yYuQzvNfkWt8Mt2Y598,8720
|
|
44
|
+
lightning_sdk/job/work.py,sha256=_L1eF9L0dW_BI17Wo0HwgoOkwwd48vFkqBD0uOt_rJk,2043
|
|
46
45
|
lightning_sdk/lightning_cloud/__init__.py,sha256=o91SMAlwr4Ke5ESe8fHjqXcj31_h7rT-MlFoXA-n2EI,173
|
|
47
46
|
lightning_sdk/lightning_cloud/__version__.py,sha256=lOfmWHtjmiuSG28TbKQqd2B3nwmSGOlKVFwhaj_cRJk,23
|
|
48
47
|
lightning_sdk/lightning_cloud/env.py,sha256=XZXpF4sD9jlB8DY0herTy_8XiUJuDVjxy5APjRD2_aU,1379
|
|
@@ -633,7 +632,7 @@ lightning_sdk/lightning_cloud/openapi/models/v1_magic_link_login_response.py,sha
|
|
|
633
632
|
lightning_sdk/lightning_cloud/openapi/models/v1_managed_endpoint.py,sha256=T0IXmXRI5paAN6NxpQQ8873lgTT7bIHYZZmeJEmW4yA,10175
|
|
634
633
|
lightning_sdk/lightning_cloud/openapi/models/v1_managed_model.py,sha256=97KxjHxTsaLWYlTNQeE2yGhF7KbTbmqLb8c39BEy02s,9117
|
|
635
634
|
lightning_sdk/lightning_cloud/openapi/models/v1_managed_model_abilities.py,sha256=1of-AEIimGnUd_0fpS5U8apGFYdsIXogRpFGxed67s0,5892
|
|
636
|
-
lightning_sdk/lightning_cloud/openapi/models/v1_membership.py,sha256=
|
|
635
|
+
lightning_sdk/lightning_cloud/openapi/models/v1_membership.py,sha256=6wqHKmxv3SOj9mTTRAw2g3ljxQyg_HKU18rexmA1KII,17207
|
|
637
636
|
lightning_sdk/lightning_cloud/openapi/models/v1_message.py,sha256=6E1FDyfNM8fMkJi6FZ4U9UBk4k5VhfmCYOHGkc5CyzA,10189
|
|
638
637
|
lightning_sdk/lightning_cloud/openapi/models/v1_message_author.py,sha256=QRC1HofufQOPROrxwtLeaEfE8f1h_ehZ0tdqlYfkgyM,3591
|
|
639
638
|
lightning_sdk/lightning_cloud/openapi/models/v1_message_content.py,sha256=rlYtO8Llal6jISWpQMyOC8zMOvdVWaljXTwlHp_gzdc,4535
|
|
@@ -653,7 +652,7 @@ lightning_sdk/lightning_cloud/openapi/models/v1_multi_machine_job_event.py,sha25
|
|
|
653
652
|
lightning_sdk/lightning_cloud/openapi/models/v1_multi_machine_job_event_type.py,sha256=HMBQuVSxwwbhbDQAAAMF8k99drvllwifjwjqEQrrDaQ,3248
|
|
654
653
|
lightning_sdk/lightning_cloud/openapi/models/v1_multi_machine_job_fault_tolerance.py,sha256=5Q0mZ3FM6yRi81mVCnYcyYh84loK4LVZTIv-2GgAy1I,4863
|
|
655
654
|
lightning_sdk/lightning_cloud/openapi/models/v1_multi_machine_job_fault_tolerance_strategy.py,sha256=FQ8qqDLfpguNtMqEETybXre4IArM32_vFuLrrUOcHb4,3539
|
|
656
|
-
lightning_sdk/lightning_cloud/openapi/models/v1_multi_machine_job_state.py,sha256=
|
|
655
|
+
lightning_sdk/lightning_cloud/openapi/models/v1_multi_machine_job_state.py,sha256=xNpq6pM1UF9r0hPHoEQ124SlCW2sCb_zW628kEa7IVU,3395
|
|
657
656
|
lightning_sdk/lightning_cloud/openapi/models/v1_multi_machine_job_status.py,sha256=yoOMJoszmdfEoUP7mzjl3PTK9dq6JUi8yXrxJljrY-0,9540
|
|
658
657
|
lightning_sdk/lightning_cloud/openapi/models/v1_named_get_logger_metrics.py,sha256=VygKKjSPGIbWT9RxbmJfOSlNR-Fq12yKj8wT_SHd2ZQ,3966
|
|
659
658
|
lightning_sdk/lightning_cloud/openapi/models/v1_network_config.py,sha256=bHDZVIsqgiSi7eBf72RunvrLR-d-4b9S7oDMAZFBvCM,5476
|
|
@@ -683,7 +682,7 @@ lightning_sdk/lightning_cloud/openapi/models/v1_project_artifact.py,sha256=jMg7B
|
|
|
683
682
|
lightning_sdk/lightning_cloud/openapi/models/v1_project_cluster_binding.py,sha256=KLZYf_bkkB1OJenDpZsG02BvomPQqJSP3gMf3ofJ83k,8719
|
|
684
683
|
lightning_sdk/lightning_cloud/openapi/models/v1_project_compute_daily_usage.py,sha256=92qqkHCx9gfZCg7FGVn7TDMQjuHw5E0POtLC7HoQ9pE,6565
|
|
685
684
|
lightning_sdk/lightning_cloud/openapi/models/v1_project_compute_usage.py,sha256=-IJmDR0j5M2S7wfkwz5qO0riW2ZolAzrLnopnBAfckU,12121
|
|
686
|
-
lightning_sdk/lightning_cloud/openapi/models/v1_project_membership.py,sha256=
|
|
685
|
+
lightning_sdk/lightning_cloud/openapi/models/v1_project_membership.py,sha256=pzF0Jm8gNDV1-TerXtPWf1xNM2pRgJnCtDtKNRWcb8o,23595
|
|
687
686
|
lightning_sdk/lightning_cloud/openapi/models/v1_project_membership_invite.py,sha256=V9Djs6edeaKv-X2-GpfAU-EVnx2rlKppGSTLfKzfrP0,9310
|
|
688
687
|
lightning_sdk/lightning_cloud/openapi/models/v1_project_membership_role_binding.py,sha256=pS5454riAOUNeB7B3Y8ChEj9RJQYaroelZuJQs8w2yM,7727
|
|
689
688
|
lightning_sdk/lightning_cloud/openapi/models/v1_project_settings.py,sha256=OugDsaWQPBMBGx4MykBJZO-xacRzzVMV3uGbeyLtQi0,5977
|
|
@@ -834,11 +833,11 @@ lightning_sdk/lightning_cloud/utils/data_connection.py,sha256=VN-Gs0a4g3tA9TQCwP
|
|
|
834
833
|
lightning_sdk/lightning_cloud/utils/dataset.py,sha256=4nUspe8iAaRPgSYpXA2uAQCgydm78kJzhOIx3C9qKls,2011
|
|
835
834
|
lightning_sdk/lightning_cloud/utils/name_generator.py,sha256=MkciuA10332V0mcE2PxLIiwWomWE0Fm_gNGK01vwRr4,58046
|
|
836
835
|
lightning_sdk/lightning_cloud/utils/network.py,sha256=axPgl8rhyPcPjxiztDxyksfxax3VNg2OXL5F5Uc81b4,406
|
|
837
|
-
lightning_sdk/mmt/__init__.py,sha256
|
|
838
|
-
lightning_sdk/mmt/base.py,sha256=
|
|
839
|
-
lightning_sdk/mmt/mmt.py,sha256=
|
|
840
|
-
lightning_sdk/mmt/v1.py,sha256=
|
|
841
|
-
lightning_sdk/mmt/v2.py,sha256=
|
|
836
|
+
lightning_sdk/mmt/__init__.py,sha256=ExMu90-96bGBnyp5h0CErQszUGB1-PcjC4-R8_NYbeY,117
|
|
837
|
+
lightning_sdk/mmt/base.py,sha256=V_ysuSjddXksCjkpGy-YVqD9j8b0jA6hkfFz0oW-Jn0,12768
|
|
838
|
+
lightning_sdk/mmt/mmt.py,sha256=Br73TO7TrEdx7VH80CA_gCGDgz3JX4p16R9DdrwQshk,12307
|
|
839
|
+
lightning_sdk/mmt/v1.py,sha256=Ovzwc-mm3PCsyVb8XfvbWUSFGZEI8E5NKayQB1pXvwg,8159
|
|
840
|
+
lightning_sdk/mmt/v2.py,sha256=mUHnBN33UOfD7LX5akGShG7W1q5vGASkVaOTPhHhpDI,8353
|
|
842
841
|
lightning_sdk/services/__init__.py,sha256=gSWUjccEhMI9CIWL_nbrFHUK2S6TM2725mEzrLMfK1Y,225
|
|
843
842
|
lightning_sdk/services/file_endpoint.py,sha256=we5HC_o74J4Y6fSP_31jIizi_I_1FO_Rb2qblspD9eE,7855
|
|
844
843
|
lightning_sdk/services/utilities.py,sha256=IeOx8hc3F8ZevHeKBysh08BXhJliTNzvKp1gwpEfdik,4087
|
|
@@ -847,9 +846,9 @@ lightning_sdk/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSu
|
|
|
847
846
|
lightning_sdk/utils/dynamic.py,sha256=glUTO1JC9APtQ6Gr9SO02a3zr56-sPAXM5C3NrTpgyQ,1959
|
|
848
847
|
lightning_sdk/utils/enum.py,sha256=h2JRzqoBcSlUdanFHmkj_j5DleBHAu1esQYUsdNI-hU,4106
|
|
849
848
|
lightning_sdk/utils/resolve.py,sha256=RWvlOWLHjaHhR0W0zT3mN719cbzhFfYCKBss38zfv3k,5783
|
|
850
|
-
lightning_sdk-0.1.
|
|
851
|
-
lightning_sdk-0.1.
|
|
852
|
-
lightning_sdk-0.1.
|
|
853
|
-
lightning_sdk-0.1.
|
|
854
|
-
lightning_sdk-0.1.
|
|
855
|
-
lightning_sdk-0.1.
|
|
849
|
+
lightning_sdk-0.1.44.dist-info/LICENSE,sha256=uFIuZwj5z-4TeF2UuacPZ1o17HkvKObT8fY50qN84sg,1064
|
|
850
|
+
lightning_sdk-0.1.44.dist-info/METADATA,sha256=FpuwHfzu6vOmo_c85UKIO8Ky54Hm8l0gyUOgV84XC2Y,4031
|
|
851
|
+
lightning_sdk-0.1.44.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
|
|
852
|
+
lightning_sdk-0.1.44.dist-info/entry_points.txt,sha256=msB9PJWIJ784dX-OP8by51d4IbKYH3Fj1vCuA9oXjHY,68
|
|
853
|
+
lightning_sdk-0.1.44.dist-info/top_level.txt,sha256=ps8doKILFXmN7F1mHncShmnQoTxKBRPIcchC8TpoBw4,19
|
|
854
|
+
lightning_sdk-0.1.44.dist-info/RECORD,,
|
lightning_sdk/cli/mmt.py
DELETED
|
@@ -1,138 +0,0 @@
|
|
|
1
|
-
from typing import Dict, Optional
|
|
2
|
-
|
|
3
|
-
from fire import Fire
|
|
4
|
-
|
|
5
|
-
from lightning_sdk._mmt import MMT
|
|
6
|
-
from lightning_sdk.api.studio_api import _cloud_url
|
|
7
|
-
from lightning_sdk.lightning_cloud.login import Auth
|
|
8
|
-
from lightning_sdk.machine import Machine
|
|
9
|
-
from lightning_sdk.teamspace import Teamspace
|
|
10
|
-
|
|
11
|
-
_MACHINE_VALUES = tuple([machine.value for machine in Machine])
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class MMTCLI:
|
|
15
|
-
"""Command line interface (CLI) to interact with/manage Lightning AI MMT."""
|
|
16
|
-
|
|
17
|
-
def __init__(self) -> None:
|
|
18
|
-
# Need to set the docstring here for f-strings to work.
|
|
19
|
-
# Sadly this is the only way to really show options as f-strings are not allowed as docstrings directly
|
|
20
|
-
# and fire does not show values for literals, just that it is a literal.
|
|
21
|
-
docstr = f"""Run async workloads on multiple machines using a docker image.
|
|
22
|
-
|
|
23
|
-
Args:
|
|
24
|
-
name: The name of the job. Needs to be unique within the teamspace.
|
|
25
|
-
num_machines: The number of Machines to run on. Defaults to 2 Machines
|
|
26
|
-
machine: The machine type to run the job on. One of {", ".join(_MACHINE_VALUES)}. Defaults to CPU
|
|
27
|
-
command: The command to run inside your job. Required if using a studio. Optional if using an image.
|
|
28
|
-
If not provided for images, will run the container entrypoint and default command.
|
|
29
|
-
studio: The studio env to run the job with. Mutually exclusive with image.
|
|
30
|
-
image: The docker image to run the job with. Mutually exclusive with studio.
|
|
31
|
-
teamspace: The teamspace the job should be associated with. Defaults to the current teamspace.
|
|
32
|
-
org: The organization owning the teamspace (if any). Defaults to the current organization.
|
|
33
|
-
user: The user owning the teamspace (if any). Defaults to the current user.
|
|
34
|
-
cloud_account: The cloud account to run the job on.
|
|
35
|
-
Defaults to the studio cloud account if running with studio compute env.
|
|
36
|
-
If not provided will fall back to the teamspaces default cloud account.
|
|
37
|
-
env: Environment variables to set inside the job.
|
|
38
|
-
interruptible: Whether the job should run on interruptible instances. They are cheaper but can be preempted.
|
|
39
|
-
image_credentials: The credentials used to pull the image. Required if the image is private.
|
|
40
|
-
This should be the name of the respective credentials secret created on the Lightning AI platform.
|
|
41
|
-
cloud_account_auth: Whether to authenticate with the cloud account to pull the image.
|
|
42
|
-
Required if the registry is part of a cloud provider (e.g. ECR).
|
|
43
|
-
artifacts_local: The path of inside the docker container, you want to persist images from.
|
|
44
|
-
CAUTION: When setting this to "/", it will effectively erase your container.
|
|
45
|
-
Only supported for jobs with a docker image compute environment.
|
|
46
|
-
artifacts_remote: The remote storage to persist your artifacts to.
|
|
47
|
-
Should be of format <CONNECTION_TYPE>:<CONNECTION_NAME>:<PATH_WITHIN_CONNECTION>.
|
|
48
|
-
PATH_WITHIN_CONNECTION hereby is a path relative to the connection's root.
|
|
49
|
-
E.g. efs:data:some-path would result in an EFS connection named `data` and to the path `some-path`
|
|
50
|
-
within it.
|
|
51
|
-
Note that the connection needs to be added to the teamspace already in order for it to be found.
|
|
52
|
-
Only supported for jobs with a docker image compute environment.
|
|
53
|
-
"""
|
|
54
|
-
# TODO: the docstrings from artifacts_local and artifacts_remote don't show up completely,
|
|
55
|
-
# might need to switch to explicit cli definition
|
|
56
|
-
self.run.__func__.__doc__ = docstr
|
|
57
|
-
|
|
58
|
-
def login(self) -> None:
|
|
59
|
-
"""Login to Lightning AI Studios."""
|
|
60
|
-
auth = Auth()
|
|
61
|
-
auth.clear()
|
|
62
|
-
|
|
63
|
-
try:
|
|
64
|
-
auth.authenticate()
|
|
65
|
-
except ConnectionError:
|
|
66
|
-
raise RuntimeError(f"Unable to connect to {_cloud_url()}. Please check your internet connection.") from None
|
|
67
|
-
|
|
68
|
-
def logout(self) -> None:
|
|
69
|
-
"""Logout from Lightning AI Studios."""
|
|
70
|
-
auth = Auth()
|
|
71
|
-
auth.clear()
|
|
72
|
-
|
|
73
|
-
# TODO: sadly, fire displays both Optional[type] and Union[type, None] as Optional[Optional]
|
|
74
|
-
# see https://github.com/google/python-fire/pull/513
|
|
75
|
-
# might need to move to different cli library
|
|
76
|
-
def run(
|
|
77
|
-
self,
|
|
78
|
-
name: Optional[str] = None,
|
|
79
|
-
num_machines: int = 2,
|
|
80
|
-
machine: Optional[str] = None,
|
|
81
|
-
command: Optional[str] = None,
|
|
82
|
-
studio: Optional[str] = None,
|
|
83
|
-
image: Optional[str] = None,
|
|
84
|
-
teamspace: Optional[str] = None,
|
|
85
|
-
org: Optional[str] = None,
|
|
86
|
-
user: Optional[str] = None,
|
|
87
|
-
cloud_account: Optional[str] = None,
|
|
88
|
-
env: Optional[Dict[str, str]] = None,
|
|
89
|
-
interruptible: bool = False,
|
|
90
|
-
image_credentials: Optional[str] = None,
|
|
91
|
-
cloud_account_auth: bool = False,
|
|
92
|
-
artifacts_local: Optional[str] = None,
|
|
93
|
-
artifacts_remote: Optional[str] = None,
|
|
94
|
-
) -> None:
|
|
95
|
-
if name is None:
|
|
96
|
-
from datetime import datetime
|
|
97
|
-
|
|
98
|
-
timestr = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
|
|
99
|
-
name = f"mmt-{timestr}"
|
|
100
|
-
|
|
101
|
-
if machine is None:
|
|
102
|
-
# TODO: infer from studio
|
|
103
|
-
machine = "CPU"
|
|
104
|
-
machine_enum = Machine(machine.upper())
|
|
105
|
-
|
|
106
|
-
teamspace = Teamspace(name=teamspace, org=org, user=user)
|
|
107
|
-
if cloud_account is None:
|
|
108
|
-
cloud_account = teamspace.default_cloud_account
|
|
109
|
-
|
|
110
|
-
if image is None:
|
|
111
|
-
raise RuntimeError("Currently only docker images are specified")
|
|
112
|
-
MMT.run(
|
|
113
|
-
name=name,
|
|
114
|
-
num_machines=num_machines,
|
|
115
|
-
machine=machine_enum,
|
|
116
|
-
command=command,
|
|
117
|
-
studio=studio,
|
|
118
|
-
image=image,
|
|
119
|
-
teamspace=teamspace,
|
|
120
|
-
org=org,
|
|
121
|
-
user=user,
|
|
122
|
-
cloud_account=cloud_account,
|
|
123
|
-
env=env,
|
|
124
|
-
interruptible=interruptible,
|
|
125
|
-
image_credentials=image_credentials,
|
|
126
|
-
cloud_account_auth=cloud_account_auth,
|
|
127
|
-
artifacts_local=artifacts_local,
|
|
128
|
-
artifacts_remote=artifacts_remote,
|
|
129
|
-
)
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
def main_cli() -> None:
|
|
133
|
-
"""CLI entrypoint."""
|
|
134
|
-
Fire(MMTCLI(), name="_mmt")
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
if __name__ == "__main__":
|
|
138
|
-
main_cli()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|