lightning-sdk 0.1.50__py3-none-any.whl → 0.1.53__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lightning_sdk/__init__.py +1 -1
- lightning_sdk/ai_hub.py +16 -27
- lightning_sdk/api/ai_hub_api.py +7 -1
- lightning_sdk/api/job_api.py +12 -7
- lightning_sdk/api/lit_container_api.py +24 -7
- lightning_sdk/api/mmt_api.py +12 -7
- lightning_sdk/api/utils.py +52 -0
- lightning_sdk/cli/run.py +65 -18
- lightning_sdk/cli/serve.py +1 -5
- lightning_sdk/cli/upload.py +33 -15
- lightning_sdk/helpers.py +1 -1
- lightning_sdk/job/base.py +28 -1
- lightning_sdk/job/job.py +27 -25
- lightning_sdk/job/v1.py +6 -2
- lightning_sdk/job/v2.py +12 -12
- lightning_sdk/lightning_cloud/login.py +4 -1
- lightning_sdk/lightning_cloud/openapi/__init__.py +17 -0
- lightning_sdk/lightning_cloud/openapi/api/assistants_service_api.py +105 -0
- lightning_sdk/lightning_cloud/openapi/api/cluster_service_api.py +417 -1
- lightning_sdk/lightning_cloud/openapi/api/file_system_service_api.py +105 -0
- lightning_sdk/lightning_cloud/openapi/api/jobs_service_api.py +5 -1
- lightning_sdk/lightning_cloud/openapi/api/lit_registry_service_api.py +113 -0
- lightning_sdk/lightning_cloud/openapi/api/storage_service_api.py +101 -0
- lightning_sdk/lightning_cloud/openapi/api/user_service_api.py +5 -1
- lightning_sdk/lightning_cloud/openapi/models/__init__.py +17 -0
- lightning_sdk/lightning_cloud/openapi/models/cluster_id_usagerestrictions_body.py +175 -0
- lightning_sdk/lightning_cloud/openapi/models/deployments_id_body.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/id_contactowner_body.py +149 -0
- lightning_sdk/lightning_cloud/openapi/models/litregistry_lit_repo_name_body.py +123 -0
- lightning_sdk/lightning_cloud/openapi/models/metricsstream_create_body.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/usagerestrictions_id_body.py +175 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_assistant_model_status.py +4 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_cloud_provider.py +104 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_cloud_space_artifact_event.py +149 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_cloud_space_artifact_event_type.py +103 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_cluster_accelerator.py +81 -3
- lightning_sdk/lightning_cloud/openapi/models/v1_cluster_spec.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_cluster_tagging_options.py +29 -3
- lightning_sdk/lightning_cloud/openapi/models/v1_cluster_usage_restriction.py +227 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_contact_assistant_owner_reason.py +102 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_contact_assistant_owner_response.py +97 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_delete_cluster_usage_restriction_response.py +97 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_deployment.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_deployment_api.py +53 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_filesystem_mmt.py +175 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_job_spec.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_list_cluster_usage_restrictions_response.py +123 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_list_filesystem_mm_ts_response.py +123 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_metrics_stream.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_model.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_path_mapping.py +175 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_post_cloud_space_artifact_events_response.py +97 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_resource_visibility.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_update_lit_repository_response.py +97 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_user_features.py +128 -76
- lightning_sdk/lightning_cloud/utils/data_connection.py +75 -7
- lightning_sdk/mmt/base.py +36 -26
- lightning_sdk/mmt/mmt.py +28 -26
- lightning_sdk/mmt/v1.py +4 -1
- lightning_sdk/mmt/v2.py +14 -13
- lightning_sdk/models.py +5 -4
- lightning_sdk/studio.py +68 -1
- lightning_sdk/utils/resolve.py +7 -0
- {lightning_sdk-0.1.50.dist-info → lightning_sdk-0.1.53.dist-info}/METADATA +2 -2
- {lightning_sdk-0.1.50.dist-info → lightning_sdk-0.1.53.dist-info}/RECORD +69 -52
- {lightning_sdk-0.1.50.dist-info → lightning_sdk-0.1.53.dist-info}/LICENSE +0 -0
- {lightning_sdk-0.1.50.dist-info → lightning_sdk-0.1.53.dist-info}/WHEEL +0 -0
- {lightning_sdk-0.1.50.dist-info → lightning_sdk-0.1.53.dist-info}/entry_points.txt +0 -0
- {lightning_sdk-0.1.50.dist-info → lightning_sdk-0.1.53.dist-info}/top_level.txt +0 -0
lightning_sdk/mmt/base.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import warnings
|
|
1
2
|
from abc import abstractmethod
|
|
2
3
|
from typing import TYPE_CHECKING, Dict, List, Optional, Protocol, Tuple, Union
|
|
3
4
|
|
|
@@ -11,7 +12,7 @@ if TYPE_CHECKING:
|
|
|
11
12
|
from lightning_sdk.user import User
|
|
12
13
|
|
|
13
14
|
from lightning_sdk.job.base import _BaseJob
|
|
14
|
-
from lightning_sdk.utils.resolve import _resolve_deprecated_cluster
|
|
15
|
+
from lightning_sdk.utils.resolve import _resolve_deprecated_cluster, in_studio
|
|
15
16
|
|
|
16
17
|
|
|
17
18
|
class MMTMachine(Protocol):
|
|
@@ -67,9 +68,10 @@ class _BaseMMT(_BaseJob):
|
|
|
67
68
|
interruptible: bool = False,
|
|
68
69
|
image_credentials: Optional[str] = None,
|
|
69
70
|
cloud_account_auth: bool = False,
|
|
70
|
-
artifacts_local: Optional[str] = None,
|
|
71
|
-
artifacts_remote: Optional[str] = None,
|
|
72
71
|
entrypoint: str = "sh -c",
|
|
72
|
+
path_mappings: Optional[Dict[str, str]] = None,
|
|
73
|
+
artifacts_local: Optional[str] = None, # deprecated in favor of path_mappings
|
|
74
|
+
artifacts_remote: Optional[str] = None, # deprecated in favor of path_mappings
|
|
73
75
|
cluster: Optional[str] = None, # deprecated in favor of cloud_account
|
|
74
76
|
) -> "_BaseMMT":
|
|
75
77
|
"""Run async workloads using a docker image across multiple machines.
|
|
@@ -94,21 +96,21 @@ class _BaseMMT(_BaseJob):
|
|
|
94
96
|
This should be the name of the respective credentials secret created on the Lightning AI platform.
|
|
95
97
|
cloud_account_auth: Whether to authenticate with the cloud account to pull the image.
|
|
96
98
|
Required if the registry is part of a cloud provider (e.g. ECR).
|
|
97
|
-
artifacts_local: The path of inside the docker container, you want to persist images from.
|
|
98
|
-
CAUTION: When setting this to "/", it will effectively erase your container.
|
|
99
|
-
Only supported for jobs with a docker image compute environment.
|
|
100
|
-
artifacts_remote: The remote storage to persist your artifacts to.
|
|
101
|
-
Should be of format <CONNECTION_TYPE>:<CONNECTION_NAME>:<PATH_WITHIN_CONNECTION>.
|
|
102
|
-
PATH_WITHIN_CONNECTION hereby is a path relative to the connection's root.
|
|
103
|
-
E.g. efs:data:some-path would result in an EFS connection named `data` and to the path `some-path`
|
|
104
|
-
within it.
|
|
105
|
-
Note that the connection needs to be added to the teamspace already in order for it to be found.
|
|
106
|
-
Only supported for jobs with a docker image compute environment.
|
|
107
99
|
entrypoint: The entrypoint of your docker container. Defaults to `sh -c` which
|
|
108
100
|
just runs the provided command in a standard shell.
|
|
109
101
|
To use the pre-defined entrypoint of the provided image, set this to an empty string.
|
|
110
102
|
Only applicable when submitting docker jobs.
|
|
103
|
+
path_mappings: Dictionary of path mappings. The keys are the path inside the container whereas the value
|
|
104
|
+
represents the data-connection name and the path inside that connection.
|
|
105
|
+
Should be of form
|
|
106
|
+
{
|
|
107
|
+
"<CONTAINER_PATH_1>": "<CONNECTION_NAME_1>:<PATH_WITHIN_CONNECTION_1>",
|
|
108
|
+
"<CONTAINER_PATH_2>": "<CONNECTION_NAME_2>"
|
|
109
|
+
}
|
|
110
|
+
If the path inside the connection is omitted it's assumed to be the root path of that connection.
|
|
111
|
+
Only applicable when submitting docker jobs.
|
|
111
112
|
"""
|
|
113
|
+
from lightning_sdk.lightning_cloud.openapi.rest import ApiException
|
|
112
114
|
from lightning_sdk.studio import Studio
|
|
113
115
|
|
|
114
116
|
cloud_account = _resolve_deprecated_cluster(cloud_account, cluster)
|
|
@@ -167,6 +169,13 @@ class _BaseMMT(_BaseJob):
|
|
|
167
169
|
"image and studio are mutually exclusive as both define the environment to run the job in"
|
|
168
170
|
)
|
|
169
171
|
|
|
172
|
+
if cloud_account is None and in_studio():
|
|
173
|
+
try:
|
|
174
|
+
resolve_studio = Studio(teamspace=teamspace, user=user, org=org)
|
|
175
|
+
cloud_account = resolve_studio.cloud_account
|
|
176
|
+
except (ValueError, ApiException):
|
|
177
|
+
warnings.warn("Could not infer cloud account from studio. Using teamspace default.")
|
|
178
|
+
|
|
170
179
|
# they either need to specified both or none of them
|
|
171
180
|
if bool(artifacts_local) != bool(artifacts_remote):
|
|
172
181
|
raise ValueError("Artifact persistence requires both artifacts_local and artifacts_remote to be set")
|
|
@@ -189,9 +198,10 @@ class _BaseMMT(_BaseJob):
|
|
|
189
198
|
interruptible=interruptible,
|
|
190
199
|
image_credentials=image_credentials,
|
|
191
200
|
cloud_account_auth=cloud_account_auth,
|
|
201
|
+
entrypoint=entrypoint,
|
|
202
|
+
path_mappings=path_mappings,
|
|
192
203
|
artifacts_local=artifacts_local,
|
|
193
204
|
artifacts_remote=artifacts_remote,
|
|
194
|
-
entrypoint=entrypoint,
|
|
195
205
|
)
|
|
196
206
|
return inst
|
|
197
207
|
|
|
@@ -208,9 +218,10 @@ class _BaseMMT(_BaseJob):
|
|
|
208
218
|
cloud_account: Optional[str] = None,
|
|
209
219
|
image_credentials: Optional[str] = None,
|
|
210
220
|
cloud_account_auth: bool = False,
|
|
211
|
-
artifacts_local: Optional[str] = None,
|
|
212
|
-
artifacts_remote: Optional[str] = None,
|
|
213
221
|
entrypoint: str = "sh -c",
|
|
222
|
+
path_mappings: Optional[Dict[str, str]] = None,
|
|
223
|
+
artifacts_local: Optional[str] = None, # deprecated in favor of path_mappings
|
|
224
|
+
artifacts_remote: Optional[str] = None, # deprecated in favor of path_mappings
|
|
214
225
|
) -> None:
|
|
215
226
|
"""Submit a new multi-machine job to the Lightning AI platform.
|
|
216
227
|
|
|
@@ -230,19 +241,18 @@ class _BaseMMT(_BaseJob):
|
|
|
230
241
|
This should be the name of the respective credentials secret created on the Lightning AI platform.
|
|
231
242
|
cloud_account_auth: Whether to authenticate with the cloud account to pull the image.
|
|
232
243
|
Required if the registry is part of a cloud provider (e.g. ECR).
|
|
233
|
-
artifacts_local: The path of inside the docker container, you want to persist images from.
|
|
234
|
-
CAUTION: When setting this to "/", it will effectively erase your container.
|
|
235
|
-
Only supported for jobs with a docker image compute environment.
|
|
236
|
-
artifacts_remote: The remote storage to persist your artifacts to.
|
|
237
|
-
Should be of format <CONNECTION_TYPE>:<CONNECTION_NAME>:<PATH_WITHIN_CONNECTION>.
|
|
238
|
-
PATH_WITHIN_CONNECTION hereby is a path relative to the connection's root.
|
|
239
|
-
E.g. efs:data:some-path would result in an EFS connection named `data` and to the path `some-path`
|
|
240
|
-
within it.
|
|
241
|
-
Note that the connection needs to be added to the teamspace already in order for it to be found.
|
|
242
|
-
Only supported for jobs with a docker image compute environment.
|
|
243
244
|
entrypoint: The entrypoint of your docker container. Defaults to sh -c.
|
|
244
245
|
To use the pre-defined entrypoint of the provided image, set this to an empty string.
|
|
245
246
|
Only applicable when submitting docker jobs.
|
|
247
|
+
path_mappings: Dictionary of path mappings. The keys are the path inside the container whereas the value
|
|
248
|
+
represents the data-connection name and the path inside that connection.
|
|
249
|
+
Should be of form
|
|
250
|
+
{
|
|
251
|
+
"<CONTAINER_PATH_1>": "<CONNECTION_NAME_1>:<PATH_WITHIN_CONNECTION_1>",
|
|
252
|
+
"<CONTAINER_PATH_2>": "<CONNECTION_NAME_2>"
|
|
253
|
+
}
|
|
254
|
+
If the path inside the connection is omitted it's assumed to be the root path of that connection.
|
|
255
|
+
Only applicable when submitting docker jobs.
|
|
246
256
|
"""
|
|
247
257
|
|
|
248
258
|
@property
|
lightning_sdk/mmt/mmt.py
CHANGED
|
@@ -109,9 +109,10 @@ class MMT(_BaseMMT):
|
|
|
109
109
|
interruptible: bool = False,
|
|
110
110
|
image_credentials: Optional[str] = None,
|
|
111
111
|
cloud_account_auth: bool = False,
|
|
112
|
+
entrypoint: str = "sh -c",
|
|
113
|
+
path_mappings: Optional[Dict[str, str]] = None,
|
|
112
114
|
artifacts_local: Optional[str] = None,
|
|
113
115
|
artifacts_remote: Optional[str] = None,
|
|
114
|
-
entrypoint: str = "sh -c",
|
|
115
116
|
cluster: Optional[str] = None, # deprecated in favor of cloud_account
|
|
116
117
|
) -> "MMT":
|
|
117
118
|
"""Run async workloads using a docker image across multiple machines.
|
|
@@ -119,7 +120,7 @@ class MMT(_BaseMMT):
|
|
|
119
120
|
Args:
|
|
120
121
|
name: The name of the job. Needs to be unique within the teamspace.
|
|
121
122
|
machine: The machine type to run the job on. One of {", ".join(_MACHINE_VALUES)}.
|
|
122
|
-
|
|
123
|
+
num_machines: The number of machines to run on.
|
|
123
124
|
command: The command to run inside your job. Required if using a studio. Optional if using an image.
|
|
124
125
|
If not provided for images, will run the container entrypoint and default command.
|
|
125
126
|
studio: The studio env to run the job with. Mutually exclusive with image.
|
|
@@ -136,19 +137,18 @@ class MMT(_BaseMMT):
|
|
|
136
137
|
This should be the name of the respective credentials secret created on the Lightning AI platform.
|
|
137
138
|
cloud_account_auth: Whether to authenticate with the cloud account to pull the image.
|
|
138
139
|
Required if the registry is part of a cloud provider (e.g. ECR).
|
|
139
|
-
artifacts_local: The path of inside the docker container, you want to persist images from.
|
|
140
|
-
CAUTION: When setting this to "/", it will effectively erase your container.
|
|
141
|
-
Only supported for jobs with a docker image compute environment.
|
|
142
|
-
artifacts_remote: The remote storage to persist your artifacts to.
|
|
143
|
-
Should be of format <CONNECTION_TYPE>:<CONNECTION_NAME>:<PATH_WITHIN_CONNECTION>.
|
|
144
|
-
PATH_WITHIN_CONNECTION hereby is a path relative to the connection's root.
|
|
145
|
-
E.g. efs:data:some-path would result in an EFS connection named `data` and to the path `some-path`
|
|
146
|
-
within it.
|
|
147
|
-
Note that the connection needs to be added to the teamspace already in order for it to be found.
|
|
148
|
-
Only supported for jobs with a docker image compute environment.
|
|
149
140
|
entrypoint: The entrypoint of your docker container. Defaults to sh -c.
|
|
150
141
|
To use the pre-defined entrypoint of the provided image, set this to an empty string.
|
|
151
142
|
Only applicable when submitting docker jobs.
|
|
143
|
+
path_mappings: Dictionary of path mappings. The keys are the path inside the container whereas the value
|
|
144
|
+
represents the data-connection name and the path inside that connection.
|
|
145
|
+
Should be of form
|
|
146
|
+
{
|
|
147
|
+
"<CONTAINER_PATH_1>": "<CONNECTION_NAME_1>:<PATH_WITHIN_CONNECTION_1>",
|
|
148
|
+
"<CONTAINER_PATH_2>": "<CONNECTION_NAME_2>"
|
|
149
|
+
}
|
|
150
|
+
If the path inside the connection is omitted it's assumed to be the root path of that connection.
|
|
151
|
+
Only applicable when submitting docker jobs.
|
|
152
152
|
"""
|
|
153
153
|
ret_val = super().run(
|
|
154
154
|
name=name,
|
|
@@ -165,9 +165,10 @@ class MMT(_BaseMMT):
|
|
|
165
165
|
interruptible=interruptible,
|
|
166
166
|
image_credentials=image_credentials,
|
|
167
167
|
cloud_account_auth=cloud_account_auth,
|
|
168
|
+
entrypoint=entrypoint,
|
|
169
|
+
path_mappings=path_mappings,
|
|
168
170
|
artifacts_local=artifacts_local,
|
|
169
171
|
artifacts_remote=artifacts_remote,
|
|
170
|
-
entrypoint=entrypoint,
|
|
171
172
|
cluster=cluster, # deprecated in favor of cloud_account
|
|
172
173
|
)
|
|
173
174
|
# required for typing with "MMT"
|
|
@@ -190,9 +191,10 @@ class MMT(_BaseMMT):
|
|
|
190
191
|
cloud_account: Optional[str] = None,
|
|
191
192
|
image_credentials: Optional[str] = None,
|
|
192
193
|
cloud_account_auth: bool = False,
|
|
193
|
-
artifacts_local: Optional[str] = None,
|
|
194
|
-
artifacts_remote: Optional[str] = None,
|
|
195
194
|
entrypoint: str = "sh -c",
|
|
195
|
+
path_mappings: Optional[Dict[str, str]] = None,
|
|
196
|
+
artifacts_local: Optional[str] = None, # deprecated in favor of path_mappings
|
|
197
|
+
artifacts_remote: Optional[str] = None, # deprecated in favor of path_mappings
|
|
196
198
|
) -> "MMT":
|
|
197
199
|
"""Submit a new multi-machine job to the Lightning AI platform.
|
|
198
200
|
|
|
@@ -212,20 +214,19 @@ class MMT(_BaseMMT):
|
|
|
212
214
|
This should be the name of the respective credentials secret created on the Lightning AI platform.
|
|
213
215
|
cloud_account_auth: Whether to authenticate with the cloud account to pull the image.
|
|
214
216
|
Required if the registry is part of a cloud provider (e.g. ECR).
|
|
215
|
-
artifacts_local: The path of inside the docker container, you want to persist images from.
|
|
216
|
-
CAUTION: When setting this to "/", it will effectively erase your container.
|
|
217
|
-
Only supported for jobs with a docker image compute environment.
|
|
218
|
-
artifacts_remote: The remote storage to persist your artifacts to.
|
|
219
|
-
Should be of format <CONNECTION_TYPE>:<CONNECTION_NAME>:<PATH_WITHIN_CONNECTION>.
|
|
220
|
-
PATH_WITHIN_CONNECTION hereby is a path relative to the connection's root.
|
|
221
|
-
E.g. efs:data:some-path would result in an EFS connection named `data` and to the path `some-path`
|
|
222
|
-
within it.
|
|
223
|
-
Note that the connection needs to be added to the teamspace already in order for it to be found.
|
|
224
|
-
Only supported for jobs with a docker image compute environment.
|
|
225
217
|
entrypoint: The entrypoint of your docker container. Defaults to `sh -c` which
|
|
226
218
|
just runs the provided command in a standard shell.
|
|
227
219
|
To use the pre-defined entrypoint of the provided image, set this to an empty string.
|
|
228
220
|
Only applicable when submitting docker jobs.
|
|
221
|
+
path_mappings: Dictionary of path mappings. The keys are the path inside the container whereas the value
|
|
222
|
+
represents the data-connection name and the path inside that connection.
|
|
223
|
+
Should be of form
|
|
224
|
+
{
|
|
225
|
+
"<CONTAINER_PATH_1>": "<CONNECTION_NAME_1>:<PATH_WITHIN_CONNECTION_1>",
|
|
226
|
+
"<CONTAINER_PATH_2>": "<CONNECTION_NAME_2>"
|
|
227
|
+
}
|
|
228
|
+
If the path inside the connection is omitted it's assumed to be the root path of that connection.
|
|
229
|
+
Only applicable when submitting docker jobs.
|
|
229
230
|
"""
|
|
230
231
|
self._job = self._internal_mmt._submit(
|
|
231
232
|
num_machines=num_machines,
|
|
@@ -238,9 +239,10 @@ class MMT(_BaseMMT):
|
|
|
238
239
|
interruptible=interruptible,
|
|
239
240
|
image_credentials=image_credentials,
|
|
240
241
|
cloud_account_auth=cloud_account_auth,
|
|
242
|
+
entrypoint=entrypoint,
|
|
243
|
+
path_mappings=path_mappings,
|
|
241
244
|
artifacts_local=artifacts_local,
|
|
242
245
|
artifacts_remote=artifacts_remote,
|
|
243
|
-
entrypoint=entrypoint,
|
|
244
246
|
)
|
|
245
247
|
return self
|
|
246
248
|
|
lightning_sdk/mmt/v1.py
CHANGED
|
@@ -52,9 +52,10 @@ class _MMTV1(_BaseMMT):
|
|
|
52
52
|
cloud_account: Optional[str] = None,
|
|
53
53
|
image_credentials: Optional[str] = None,
|
|
54
54
|
cloud_account_auth: bool = False,
|
|
55
|
+
entrypoint: str = "sh -c",
|
|
56
|
+
path_mappings: Optional[Dict[str, str]] = None,
|
|
55
57
|
artifacts_local: Optional[str] = None,
|
|
56
58
|
artifacts_remote: Optional[str] = None,
|
|
57
|
-
entrypoint: str = "sh -c",
|
|
58
59
|
) -> "_MMTV1":
|
|
59
60
|
"""Submit a new multi-machine job to the Lightning AI platform.
|
|
60
61
|
|
|
@@ -88,6 +89,8 @@ class _MMTV1(_BaseMMT):
|
|
|
88
89
|
just runs the provided command in a standard shell.
|
|
89
90
|
To use the pre-defined entrypoint of the provided image, set this to an empty string.
|
|
90
91
|
Only applicable when submitting docker jobs.
|
|
92
|
+
path_mappings: The mappings from data connection inside your container (not supported)
|
|
93
|
+
|
|
91
94
|
"""
|
|
92
95
|
if studio is None:
|
|
93
96
|
raise ValueError("Studio is required for submitting jobs")
|
lightning_sdk/mmt/v2.py
CHANGED
|
@@ -51,9 +51,10 @@ class _MMTV2(_BaseMMT):
|
|
|
51
51
|
cloud_account: Optional[str] = None,
|
|
52
52
|
image_credentials: Optional[str] = None,
|
|
53
53
|
cloud_account_auth: bool = False,
|
|
54
|
-
artifacts_local: Optional[str] = None,
|
|
55
|
-
artifacts_remote: Optional[str] = None,
|
|
56
54
|
entrypoint: str = "sh -c",
|
|
55
|
+
path_mappings: Optional[Dict[str, str]] = None,
|
|
56
|
+
artifacts_local: Optional[str] = None, # deprecated in favor of path_mappings
|
|
57
|
+
artifacts_remote: Optional[str] = None, # deprecated in favor of path_mappings
|
|
57
58
|
) -> "_MMTV2":
|
|
58
59
|
"""Submit a new multi-machine job to the Lightning AI platform.
|
|
59
60
|
|
|
@@ -73,20 +74,19 @@ class _MMTV2(_BaseMMT):
|
|
|
73
74
|
This should be the name of the respective credentials secret created on the Lightning AI platform.
|
|
74
75
|
cloud_account_auth: Whether to authenticate with the cloud account to pull the image.
|
|
75
76
|
Required if the registry is part of a cloud provider (e.g. ECR).
|
|
76
|
-
artifacts_local: The path of inside the docker container, you want to persist images from.
|
|
77
|
-
CAUTION: When setting this to "/", it will effectively erase your container.
|
|
78
|
-
Only supported for jobs with a docker image compute environment.
|
|
79
|
-
artifacts_remote: The remote storage to persist your artifacts to.
|
|
80
|
-
Should be of format <CONNECTION_TYPE>:<CONNECTION_NAME>:<PATH_WITHIN_CONNECTION>.
|
|
81
|
-
PATH_WITHIN_CONNECTION hereby is a path relative to the connection's root.
|
|
82
|
-
E.g. efs:data:some-path would result in an EFS connection named `data` and to the path `some-path`
|
|
83
|
-
within it.
|
|
84
|
-
Note that the connection needs to be added to the teamspace already in order for it to be found.
|
|
85
|
-
Only supported for jobs with a docker image compute environment.
|
|
86
77
|
entrypoint: The entrypoint of your docker container. Defaults to `sh -c` which
|
|
87
78
|
just runs the provided command in a standard shell.
|
|
88
79
|
To use the pre-defined entrypoint of the provided image, set this to an empty string.
|
|
89
80
|
Only applicable when submitting docker jobs.
|
|
81
|
+
path_mappings: Dictionary of path mappings. The keys are the path inside the container whereas the value
|
|
82
|
+
represents the data-connection name and the path inside that connection.
|
|
83
|
+
Should be of form
|
|
84
|
+
{
|
|
85
|
+
"<CONTAINER_PATH_1>": "<CONNECTION_NAME_1>:<PATH_WITHIN_CONNECTION_1>",
|
|
86
|
+
"<CONTAINER_PATH_2>": "<CONNECTION_NAME_2>"
|
|
87
|
+
}
|
|
88
|
+
If the path inside the connection is omitted it's assumed to be the root path of that connection.
|
|
89
|
+
Only applicable when submitting docker jobs.
|
|
90
90
|
"""
|
|
91
91
|
# Command is required if Studio is provided to know what to run
|
|
92
92
|
# Image is mutually exclusive with Studio
|
|
@@ -117,9 +117,10 @@ class _MMTV2(_BaseMMT):
|
|
|
117
117
|
env=env,
|
|
118
118
|
image_credentials=image_credentials,
|
|
119
119
|
cloud_account_auth=cloud_account_auth,
|
|
120
|
+
entrypoint=entrypoint,
|
|
121
|
+
path_mappings=path_mappings,
|
|
120
122
|
artifacts_local=artifacts_local,
|
|
121
123
|
artifacts_remote=artifacts_remote,
|
|
122
|
-
entrypoint=entrypoint,
|
|
123
124
|
)
|
|
124
125
|
self._job = submitted
|
|
125
126
|
self._name = submitted.name
|
lightning_sdk/models.py
CHANGED
|
@@ -121,10 +121,11 @@ def download_model(
|
|
|
121
121
|
progress_bar=progress_bar,
|
|
122
122
|
)
|
|
123
123
|
except ApiException as e:
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
124
|
+
if e.status == 404:
|
|
125
|
+
raise RuntimeError(
|
|
126
|
+
f"Model '{name}' not found. Either the model doesn't exist or you don't have access to it."
|
|
127
|
+
) from None
|
|
128
|
+
raise RuntimeError(f"Error downloading model. Status code: {e.status}.") from None
|
|
128
129
|
|
|
129
130
|
|
|
130
131
|
def upload_model(
|
lightning_sdk/studio.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import warnings
|
|
3
|
-
from typing import TYPE_CHECKING, Any, Mapping, Optional, Tuple, Union
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Dict, Mapping, Optional, Tuple, Union
|
|
4
4
|
|
|
5
5
|
from lightning_sdk.api.studio_api import StudioApi
|
|
6
6
|
from lightning_sdk.api.utils import _machine_to_compute_name
|
|
@@ -14,6 +14,8 @@ from lightning_sdk.user import User
|
|
|
14
14
|
from lightning_sdk.utils.resolve import _resolve_deprecated_cluster, _resolve_teamspace, _setup_logger
|
|
15
15
|
|
|
16
16
|
if TYPE_CHECKING:
|
|
17
|
+
from lightning_sdk.job import Job
|
|
18
|
+
from lightning_sdk.mmt import MMT
|
|
17
19
|
from lightning_sdk.plugin import Plugin
|
|
18
20
|
|
|
19
21
|
_logger = _setup_logger(__name__)
|
|
@@ -281,6 +283,71 @@ class Studio:
|
|
|
281
283
|
cloud_account=self._studio.cluster_id,
|
|
282
284
|
)
|
|
283
285
|
|
|
286
|
+
def run_job(
|
|
287
|
+
self,
|
|
288
|
+
name: str,
|
|
289
|
+
machine: Union["Machine", str],
|
|
290
|
+
command: str,
|
|
291
|
+
env: Optional[Dict[str, str]] = None,
|
|
292
|
+
interruptible: bool = False,
|
|
293
|
+
) -> "Job":
|
|
294
|
+
"""Run async workloads using the compute environment from your studio.
|
|
295
|
+
|
|
296
|
+
Args:
|
|
297
|
+
name: The name of the job. Needs to be unique within the teamspace.
|
|
298
|
+
machine: The machine type to run the job on. One of {", ".join(_MACHINE_VALUES)}.
|
|
299
|
+
command: The command to run inside your job.
|
|
300
|
+
env: Environment variables to set inside the job.
|
|
301
|
+
interruptible: Whether the job should run on interruptible instances. They are cheaper but can be preempted.
|
|
302
|
+
"""
|
|
303
|
+
from lightning_sdk.job import Job
|
|
304
|
+
|
|
305
|
+
return Job.run(
|
|
306
|
+
name=name,
|
|
307
|
+
machine=machine,
|
|
308
|
+
command=command,
|
|
309
|
+
studio=self,
|
|
310
|
+
image=None,
|
|
311
|
+
teamspace=self.teamspace,
|
|
312
|
+
cloud_account=self.cloud_account,
|
|
313
|
+
env=env,
|
|
314
|
+
interruptible=interruptible,
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
def run_mmt(
|
|
318
|
+
self,
|
|
319
|
+
name: str,
|
|
320
|
+
num_machines: int,
|
|
321
|
+
machine: Union["Machine", str],
|
|
322
|
+
command: str,
|
|
323
|
+
env: Optional[Dict[str, str]] = None,
|
|
324
|
+
interruptible: bool = False,
|
|
325
|
+
) -> "MMT":
|
|
326
|
+
"""Run async workloads using the compute environment from your studio.
|
|
327
|
+
|
|
328
|
+
Args:
|
|
329
|
+
name: The name of the job. Needs to be unique within the teamspace.
|
|
330
|
+
num_machines: The number of machines to run on.
|
|
331
|
+
machine: The machine type to run the job on. One of {", ".join(_MACHINE_VALUES)}.
|
|
332
|
+
command: The command to run inside your job.
|
|
333
|
+
env: Environment variables to set inside the job.
|
|
334
|
+
interruptible: Whether the job should run on interruptible instances. They are cheaper but can be preempted.
|
|
335
|
+
"""
|
|
336
|
+
from lightning_sdk.mmt import MMT
|
|
337
|
+
|
|
338
|
+
return MMT.run(
|
|
339
|
+
name=name,
|
|
340
|
+
num_machines=num_machines,
|
|
341
|
+
machine=machine,
|
|
342
|
+
command=command,
|
|
343
|
+
studio=self,
|
|
344
|
+
image=None,
|
|
345
|
+
teamspace=self.teamspace,
|
|
346
|
+
cloud_account=self.cloud_account,
|
|
347
|
+
env=env,
|
|
348
|
+
interruptible=interruptible,
|
|
349
|
+
)
|
|
350
|
+
|
|
284
351
|
@property
|
|
285
352
|
def auto_sleep(self) -> bool:
|
|
286
353
|
"""Returns if a Studio has auto-sleep enabled."""
|
lightning_sdk/utils/resolve.py
CHANGED
|
@@ -187,3 +187,10 @@ def _parse_model_and_version(name: str) -> Tuple[str, str]:
|
|
|
187
187
|
"Model version is expected to be in the format `entity/modelname:version` separated by a"
|
|
188
188
|
f" single colon, but got: {name}"
|
|
189
189
|
)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def in_studio() -> bool:
|
|
193
|
+
"""Returns true if inside a studio, else false."""
|
|
194
|
+
has_cloudspace_id = bool(os.getenv("LIGHTNING_CLOUD_SPACE_ID", None))
|
|
195
|
+
is_interactive = os.getenv("LIGHTNING_INTERACTIVE", "false") == "true"
|
|
196
|
+
return has_cloudspace_id and is_interactive
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: lightning_sdk
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.53
|
|
4
4
|
Summary: SDK to develop using Lightning AI Studios
|
|
5
5
|
Author-email: Lightning-AI <justus@lightning.ai>
|
|
6
6
|
License: MIT License
|
|
@@ -45,9 +45,9 @@ Requires-Dist: tqdm
|
|
|
45
45
|
Requires-Dist: fire
|
|
46
46
|
Requires-Dist: simple-term-menu
|
|
47
47
|
Requires-Dist: lightning-utilities
|
|
48
|
+
Requires-Dist: docker
|
|
48
49
|
Provides-Extra: serve
|
|
49
50
|
Requires-Dist: litserve>=0.2.5; extra == "serve"
|
|
50
|
-
Requires-Dist: docker; extra == "serve"
|
|
51
51
|
|
|
52
52
|
# Lightning SDK
|
|
53
53
|
|