lightning-sdk 0.1.58__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lightning_sdk/__init__.py +5 -3
- lightning_sdk/api/deployment_api.py +23 -11
- lightning_sdk/api/job_api.py +42 -7
- lightning_sdk/api/lit_container_api.py +7 -3
- lightning_sdk/api/mmt_api.py +46 -8
- lightning_sdk/api/pipeline_api.py +50 -0
- lightning_sdk/api/teamspace_api.py +2 -2
- lightning_sdk/api/utils.py +15 -5
- lightning_sdk/cli/ai_hub.py +30 -65
- lightning_sdk/cli/coloring.py +60 -0
- lightning_sdk/cli/configure.py +25 -40
- lightning_sdk/cli/connect.py +7 -20
- lightning_sdk/cli/create.py +83 -0
- lightning_sdk/cli/delete.py +72 -75
- lightning_sdk/cli/docker.py +77 -0
- lightning_sdk/cli/download.py +71 -111
- lightning_sdk/cli/entrypoint.py +44 -65
- lightning_sdk/cli/generate.py +28 -43
- lightning_sdk/cli/inspect.py +22 -50
- lightning_sdk/cli/list.py +281 -222
- lightning_sdk/cli/mmts_menu.py +1 -1
- lightning_sdk/cli/open.py +62 -0
- lightning_sdk/cli/run.py +430 -263
- lightning_sdk/cli/serve.py +162 -189
- lightning_sdk/cli/start.py +55 -36
- lightning_sdk/cli/stop.py +97 -55
- lightning_sdk/cli/switch.py +53 -36
- lightning_sdk/cli/upload.py +318 -255
- lightning_sdk/deployment/__init__.py +2 -0
- lightning_sdk/deployment/deployment.py +33 -8
- lightning_sdk/lightning_cloud/openapi/__init__.py +21 -0
- lightning_sdk/lightning_cloud/openapi/api/__init__.py +1 -0
- lightning_sdk/lightning_cloud/openapi/api/assistants_service_api.py +10 -6
- lightning_sdk/lightning_cloud/openapi/api/jobs_service_api.py +355 -4
- lightning_sdk/lightning_cloud/openapi/api/lit_logger_service_api.py +4 -4
- lightning_sdk/lightning_cloud/openapi/api/lit_registry_service_api.py +14 -2
- lightning_sdk/lightning_cloud/openapi/api/pipelines_service_api.py +670 -0
- lightning_sdk/lightning_cloud/openapi/api/storage_service_api.py +303 -4
- lightning_sdk/lightning_cloud/openapi/models/__init__.py +20 -0
- lightning_sdk/lightning_cloud/openapi/models/agents_id_body.py +17 -69
- lightning_sdk/lightning_cloud/openapi/models/cluster_id_capacityreservations_body.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/create.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/create_deployment_request_defines_a_spec_for_the_job_that_allows_for_autoscaling_jobs.py +53 -1
- lightning_sdk/lightning_cloud/openapi/models/deployments_id_body.py +105 -1
- lightning_sdk/lightning_cloud/openapi/models/id_visibility_body1.py +1 -27
- lightning_sdk/lightning_cloud/openapi/models/id_visibility_body2.py +149 -0
- lightning_sdk/lightning_cloud/openapi/models/org_id_memberships_body.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/orgs_id_body.py +157 -1
- lightning_sdk/lightning_cloud/openapi/models/pipelines_id_body.py +435 -0
- lightning_sdk/lightning_cloud/openapi/models/project_id_pipelines_body.py +201 -0
- lightning_sdk/lightning_cloud/openapi/models/projects_id_body.py +157 -1
- lightning_sdk/lightning_cloud/openapi/models/slurm_jobs_body.py +79 -1
- lightning_sdk/lightning_cloud/openapi/models/uploads_upload_id_body.py +1 -27
- lightning_sdk/lightning_cloud/openapi/models/uploads_upload_id_body1.py +175 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_agent_job.py +79 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_assistant.py +17 -69
- lightning_sdk/lightning_cloud/openapi/models/v1_capacity_block_offering.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_cloud_space_artifact_event_type.py +1 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_cluster_accelerator.py +131 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_cluster_capacity_reservation.py +79 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_cluster_security_options.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_complete_upload_temporary_artifact_request.py +175 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_create_deployment_request.py +461 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_create_deployment_template_request.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_create_job_request.py +201 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_create_managed_endpoint_response.py +149 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_create_multi_machine_job_request.py +253 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_data_connection.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_delete_pipeline_response.py +149 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_deployment.py +105 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_deployment_details.py +175 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_deployment_template.py +53 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_filestore_data_connection.py +201 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_filesystem_job.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_filesystem_mmt.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_find_capacity_block_offering_response.py +29 -3
- lightning_sdk/lightning_cloud/openapi/models/v1_job.py +133 -3
- lightning_sdk/lightning_cloud/openapi/models/v1_job_spec.py +53 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_job_timing.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_list_pipelines_response.py +123 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_lit_registry_artifact.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_lit_repository.py +29 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_managed_model.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_multi_machine_job.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_multi_machine_job_state.py +2 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_organization.py +157 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_pipeline.py +487 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_pipeline_step.py +253 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_pipeline_step_status.py +331 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_pipeline_step_type.py +104 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_project_settings.py +157 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_restart_timing.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_rule_resource.py +1 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_shared_filesystem.py +201 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_slurm_job.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_update_job_visibility_response.py +97 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_upload_temporary_artifact_request.py +123 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_user_features.py +95 -355
- lightning_sdk/lightning_cloud/openapi/models/validate.py +27 -1
- lightning_sdk/lightning_cloud/rest_client.py +4 -2
- lightning_sdk/machine.py +25 -1
- lightning_sdk/models.py +18 -12
- lightning_sdk/pipeline/__init__.py +4 -0
- lightning_sdk/pipeline/pipeline.py +109 -0
- lightning_sdk/pipeline/types.py +268 -0
- lightning_sdk/pipeline/utils.py +69 -0
- lightning_sdk/plugin.py +9 -10
- lightning_sdk/services/utilities.py +2 -2
- lightning_sdk/studio.py +5 -1
- lightning_sdk/teamspace.py +1 -1
- lightning_sdk/utils/resolve.py +12 -1
- {lightning_sdk-0.1.58.dist-info → lightning_sdk-0.2.0.dist-info}/METADATA +6 -8
- {lightning_sdk-0.1.58.dist-info → lightning_sdk-0.2.0.dist-info}/RECORD +117 -88
- lightning_sdk/cli/legacy.py +0 -135
- {lightning_sdk-0.1.58.dist-info → lightning_sdk-0.2.0.dist-info}/LICENSE +0 -0
- {lightning_sdk-0.1.58.dist-info → lightning_sdk-0.2.0.dist-info}/WHEEL +0 -0
- {lightning_sdk-0.1.58.dist-info → lightning_sdk-0.2.0.dist-info}/entry_points.txt +0 -0
- {lightning_sdk-0.1.58.dist-info → lightning_sdk-0.2.0.dist-info}/top_level.txt +0 -0
lightning_sdk/cli/run.py
CHANGED
|
@@ -1,274 +1,441 @@
|
|
|
1
|
-
|
|
1
|
+
import json
|
|
2
|
+
from typing import Dict, Mapping, Optional, Sequence, Union
|
|
3
|
+
|
|
4
|
+
import click
|
|
2
5
|
|
|
3
6
|
from lightning_sdk.job import Job
|
|
4
7
|
from lightning_sdk.machine import Machine
|
|
5
8
|
from lightning_sdk.mmt import MMT
|
|
6
9
|
from lightning_sdk.teamspace import Teamspace
|
|
7
10
|
|
|
8
|
-
if TYPE_CHECKING:
|
|
9
|
-
from lightning_sdk.cli.legacy import _LegacyLightningCLI
|
|
10
|
-
|
|
11
11
|
_MACHINE_VALUES = tuple([machine.name for machine in Machine.__dict__.values() if isinstance(machine, Machine)])
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
|
|
14
|
+
@click.group(name="run")
|
|
15
|
+
def run() -> None:
|
|
15
16
|
"""Run async workloads on the Lightning AI platform."""
|
|
16
17
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
)
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
18
|
+
|
|
19
|
+
@run.command("job")
|
|
20
|
+
@click.option("--name", default=None, help="The name of the job. Needs to be unique within the teamspace.")
|
|
21
|
+
@click.option(
|
|
22
|
+
"--machine",
|
|
23
|
+
default="CPU",
|
|
24
|
+
show_default=True,
|
|
25
|
+
type=click.Choice(_MACHINE_VALUES),
|
|
26
|
+
help="The machine type to run the job on.",
|
|
27
|
+
)
|
|
28
|
+
@click.option(
|
|
29
|
+
"--command",
|
|
30
|
+
default=None,
|
|
31
|
+
help=(
|
|
32
|
+
"The command to run inside your job. "
|
|
33
|
+
"Required if using a studio. "
|
|
34
|
+
"Optional if using an image. "
|
|
35
|
+
"If not provided for images, will run the container entrypoint and default command."
|
|
36
|
+
),
|
|
37
|
+
)
|
|
38
|
+
@click.option("--studio", default=None, help="The studio env to run the job with. Mutually exclusive with image.")
|
|
39
|
+
@click.option("--image", default=None, help="The docker image to run the job with. Mutually exclusive with studio.")
|
|
40
|
+
@click.option(
|
|
41
|
+
"--teamspace",
|
|
42
|
+
default=None,
|
|
43
|
+
help="The teamspace the job should be associated with. Defaults to the current teamspace.",
|
|
44
|
+
)
|
|
45
|
+
@click.option(
|
|
46
|
+
"--org",
|
|
47
|
+
default=None,
|
|
48
|
+
help="The organization owning the teamspace (if any). Defaults to the current organization.",
|
|
49
|
+
)
|
|
50
|
+
@click.option("--user", default=None, help="The user owning the teamspace (if any). Defaults to the current user.")
|
|
51
|
+
@click.option(
|
|
52
|
+
"--cloud-account",
|
|
53
|
+
"--cloud_account",
|
|
54
|
+
default=None,
|
|
55
|
+
help=(
|
|
56
|
+
"The cloud account to run the job on. "
|
|
57
|
+
"Defaults to the studio cloud account if running with studio compute env. "
|
|
58
|
+
"If not provided will fall back to the teamspaces default cloud account."
|
|
59
|
+
),
|
|
60
|
+
)
|
|
61
|
+
@click.option(
|
|
62
|
+
"--env",
|
|
63
|
+
"-e",
|
|
64
|
+
default=[""],
|
|
65
|
+
help=("Environment variable to set inside the job. Should be of format KEY=VALUE"),
|
|
66
|
+
multiple=True,
|
|
67
|
+
)
|
|
68
|
+
@click.option(
|
|
69
|
+
"--interruptible",
|
|
70
|
+
is_flag=True,
|
|
71
|
+
flag_value=True,
|
|
72
|
+
default=False,
|
|
73
|
+
help="Whether the job should run on interruptible instances. They are cheaper but can be preempted.",
|
|
74
|
+
)
|
|
75
|
+
@click.option(
|
|
76
|
+
"--image-credentials",
|
|
77
|
+
"--image_credentials",
|
|
78
|
+
default=None,
|
|
79
|
+
help=(
|
|
80
|
+
"The credentials used to pull the image. "
|
|
81
|
+
"Required if the image is private. "
|
|
82
|
+
"This should be the name of the respective credentials secret created on the Lightning AI platform."
|
|
83
|
+
),
|
|
84
|
+
)
|
|
85
|
+
@click.option(
|
|
86
|
+
"--cloud-account-auth",
|
|
87
|
+
"--cloud_account_auth",
|
|
88
|
+
is_flag=True,
|
|
89
|
+
default=False,
|
|
90
|
+
help=(
|
|
91
|
+
"Whether to authenticate with the cloud account to pull the image. "
|
|
92
|
+
"Required if the registry is part of a cloud provider (e.g. ECR)."
|
|
93
|
+
),
|
|
94
|
+
)
|
|
95
|
+
@click.option(
|
|
96
|
+
"--entrypoint",
|
|
97
|
+
default="sh -c",
|
|
98
|
+
show_default=True,
|
|
99
|
+
help=(
|
|
100
|
+
"The entrypoint of your docker container. "
|
|
101
|
+
"Default runs the provided command in a standard shell. "
|
|
102
|
+
"To use the pre-defined entrypoint of the provided image, set this to an empty string. "
|
|
103
|
+
"Only applicable when submitting docker jobs."
|
|
104
|
+
),
|
|
105
|
+
)
|
|
106
|
+
@click.option(
|
|
107
|
+
"--path-mapping",
|
|
108
|
+
"--path_mapping",
|
|
109
|
+
default=[""],
|
|
110
|
+
help=(
|
|
111
|
+
"Maps path inside of containers to paths inside data-connections. "
|
|
112
|
+
"Should be of form <CONTAINER_PATH_1>:<CONNECTION_NAME_1>:<PATH_WITHIN_CONNECTION_1> and "
|
|
113
|
+
"omitting the path inside the connection defaults to the connections root. "
|
|
114
|
+
"Can be specified multiple times for multiple mappings"
|
|
115
|
+
),
|
|
116
|
+
multiple=True,
|
|
117
|
+
)
|
|
118
|
+
# this is for backwards compatibility only
|
|
119
|
+
@click.option(
|
|
120
|
+
"--path-mappings",
|
|
121
|
+
"--path_mappings",
|
|
122
|
+
default="",
|
|
123
|
+
help=(
|
|
124
|
+
"Maps path inside of containers to paths inside data-connections. "
|
|
125
|
+
"Should be a comma separated list of form: "
|
|
126
|
+
"<MAPPING_1>,<MAPPING_2>,... "
|
|
127
|
+
"where each mapping is of the form "
|
|
128
|
+
"<CONTAINER_PATH_1>:<CONNECTION_NAME_1>:<PATH_WITHIN_CONNECTION_1> and "
|
|
129
|
+
"omitting the path inside the connection defaults to the connections root. "
|
|
130
|
+
"Instead of a comma-separated list, consider passing --path-mapping multiple times."
|
|
131
|
+
),
|
|
132
|
+
)
|
|
133
|
+
def job(
|
|
134
|
+
name: Optional[str] = None,
|
|
135
|
+
machine: str = "CPU",
|
|
136
|
+
command: Optional[str] = None,
|
|
137
|
+
studio: Optional[str] = None,
|
|
138
|
+
image: Optional[str] = None,
|
|
139
|
+
teamspace: Optional[str] = None,
|
|
140
|
+
org: Optional[str] = None,
|
|
141
|
+
user: Optional[str] = None,
|
|
142
|
+
cloud_account: Optional[str] = None,
|
|
143
|
+
env: Sequence[str] = (),
|
|
144
|
+
interruptible: bool = False,
|
|
145
|
+
image_credentials: Optional[str] = None,
|
|
146
|
+
cloud_account_auth: bool = False,
|
|
147
|
+
entrypoint: str = "sh -c",
|
|
148
|
+
path_mapping: Sequence[str] = (),
|
|
149
|
+
path_mappings: str = "",
|
|
150
|
+
artifacts_local: Optional[str] = None,
|
|
151
|
+
artifacts_remote: Optional[str] = None,
|
|
152
|
+
) -> None:
|
|
153
|
+
"""Run async workloads using a docker image or studio."""
|
|
154
|
+
if not name:
|
|
155
|
+
from datetime import datetime
|
|
156
|
+
|
|
157
|
+
timestr = datetime.now().strftime("%b-%d-%H_%M")
|
|
158
|
+
name = f"job-{timestr}"
|
|
159
|
+
|
|
160
|
+
machine_enum: Union[str, Machine]
|
|
161
|
+
try:
|
|
162
|
+
machine_enum = getattr(Machine, machine.upper(), Machine(machine, machine))
|
|
163
|
+
except KeyError:
|
|
164
|
+
machine_enum = machine
|
|
165
|
+
|
|
166
|
+
resolved_teamspace = Teamspace(name=teamspace, org=org, user=user)
|
|
167
|
+
|
|
168
|
+
path_mappings_dict = _resolve_path_mapping(path_mappings=path_mappings)
|
|
169
|
+
for mapping in path_mapping:
|
|
170
|
+
path_mappings_dict.update(_resolve_path_mapping(path_mappings=mapping))
|
|
171
|
+
|
|
172
|
+
env_dict = {}
|
|
173
|
+
for e in env:
|
|
174
|
+
env_dict.update(_resolve_envs(e))
|
|
175
|
+
|
|
176
|
+
Job.run(
|
|
177
|
+
name=name,
|
|
178
|
+
machine=machine_enum,
|
|
179
|
+
command=command,
|
|
180
|
+
studio=studio,
|
|
181
|
+
image=image,
|
|
182
|
+
teamspace=resolved_teamspace,
|
|
183
|
+
org=org,
|
|
184
|
+
user=user,
|
|
185
|
+
cloud_account=cloud_account,
|
|
186
|
+
env=env_dict,
|
|
187
|
+
interruptible=interruptible,
|
|
188
|
+
image_credentials=image_credentials,
|
|
189
|
+
cloud_account_auth=cloud_account_auth,
|
|
190
|
+
entrypoint=entrypoint,
|
|
191
|
+
path_mappings=path_mappings_dict,
|
|
192
|
+
artifacts_local=artifacts_local,
|
|
193
|
+
artifacts_remote=artifacts_remote,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
@run.command("mmt")
|
|
198
|
+
@click.option("--name", default=None, help="The name of the job. Needs to be unique within the teamspace.")
|
|
199
|
+
@click.option(
|
|
200
|
+
"--num-machines",
|
|
201
|
+
"--num_machines",
|
|
202
|
+
default=2,
|
|
203
|
+
show_default=True,
|
|
204
|
+
help="The number of Machines to run on.",
|
|
205
|
+
)
|
|
206
|
+
@click.option(
|
|
207
|
+
"--machine",
|
|
208
|
+
default="CPU",
|
|
209
|
+
show_default=True,
|
|
210
|
+
type=click.Choice(_MACHINE_VALUES),
|
|
211
|
+
help="The machine type to run the job on.",
|
|
212
|
+
)
|
|
213
|
+
@click.option(
|
|
214
|
+
"--command",
|
|
215
|
+
default=None,
|
|
216
|
+
help=(
|
|
217
|
+
"The command to run inside your job. "
|
|
218
|
+
"Required if using a studio. "
|
|
219
|
+
"Optional if using an image. "
|
|
220
|
+
"If not provided for images, will run the container entrypoint and default command."
|
|
221
|
+
),
|
|
222
|
+
)
|
|
223
|
+
@click.option(
|
|
224
|
+
"--studio",
|
|
225
|
+
default=None,
|
|
226
|
+
help="The studio env to run the multi-machine job with. Mutually exclusive with image.",
|
|
227
|
+
)
|
|
228
|
+
@click.option(
|
|
229
|
+
"--image",
|
|
230
|
+
default=None,
|
|
231
|
+
help="The docker image to run the multi-machine job with. Mutually exclusive with studio.",
|
|
232
|
+
)
|
|
233
|
+
@click.option(
|
|
234
|
+
"--teamspace",
|
|
235
|
+
default=None,
|
|
236
|
+
help="The teamspace the job should be associated with. Defaults to the current teamspace.",
|
|
237
|
+
)
|
|
238
|
+
@click.option(
|
|
239
|
+
"--org",
|
|
240
|
+
default=None,
|
|
241
|
+
help="The organization owning the teamspace (if any). Defaults to the current organization.",
|
|
242
|
+
)
|
|
243
|
+
@click.option("--user", default=None, help="The user owning the teamspace (if any). Defaults to the current user.")
|
|
244
|
+
@click.option(
|
|
245
|
+
"--cloud-account",
|
|
246
|
+
"--cloud_account",
|
|
247
|
+
default=None,
|
|
248
|
+
help=(
|
|
249
|
+
"The cloud account to run the job on. "
|
|
250
|
+
"Defaults to the studio cloud account if running with studio compute env. "
|
|
251
|
+
"If not provided will fall back to the teamspaces default cloud account."
|
|
252
|
+
),
|
|
253
|
+
)
|
|
254
|
+
@click.option(
|
|
255
|
+
"--env",
|
|
256
|
+
"-e",
|
|
257
|
+
default=[""],
|
|
258
|
+
help=("Environment variable to set inside the job. Should be of format KEY=VALUE"),
|
|
259
|
+
multiple=True,
|
|
260
|
+
)
|
|
261
|
+
@click.option(
|
|
262
|
+
"--interruptible",
|
|
263
|
+
is_flag=True,
|
|
264
|
+
flag_value=True,
|
|
265
|
+
default=False,
|
|
266
|
+
help="Whether the job should run on interruptible instances. They are cheaper but can be preempted.",
|
|
267
|
+
)
|
|
268
|
+
@click.option(
|
|
269
|
+
"--image-credentials",
|
|
270
|
+
"--image_credentials",
|
|
271
|
+
default=None,
|
|
272
|
+
help=(
|
|
273
|
+
"The credentials used to pull the image. "
|
|
274
|
+
"Required if the image is private. "
|
|
275
|
+
"This should be the name of the respective credentials secret created on the Lightning AI platform."
|
|
276
|
+
),
|
|
277
|
+
)
|
|
278
|
+
@click.option(
|
|
279
|
+
"--cloud-account-auth",
|
|
280
|
+
"--cloud_account_auth",
|
|
281
|
+
is_flag=True,
|
|
282
|
+
default=False,
|
|
283
|
+
help=(
|
|
284
|
+
"Whether to authenticate with the cloud account to pull the image. "
|
|
285
|
+
"Required if the registry is part of a cloud provider (e.g. ECR)."
|
|
286
|
+
),
|
|
287
|
+
)
|
|
288
|
+
@click.option(
|
|
289
|
+
"--entrypoint",
|
|
290
|
+
default="sh -c",
|
|
291
|
+
show_default=True,
|
|
292
|
+
help=(
|
|
293
|
+
"The entrypoint of your docker container. "
|
|
294
|
+
"Default runs the provided command in a standard shell. "
|
|
295
|
+
"To use the pre-defined entrypoint of the provided image, set this to an empty string. "
|
|
296
|
+
"Only applicable when submitting docker jobs."
|
|
297
|
+
),
|
|
298
|
+
)
|
|
299
|
+
@click.option(
|
|
300
|
+
"--path-mapping",
|
|
301
|
+
"--path_mapping",
|
|
302
|
+
default=[""],
|
|
303
|
+
help=(
|
|
304
|
+
"Maps path inside of containers to paths inside data-connections. "
|
|
305
|
+
"Should be of form <CONTAINER_PATH_1>:<CONNECTION_NAME_1>:<PATH_WITHIN_CONNECTION_1> and "
|
|
306
|
+
"omitting the path inside the connection defaults to the connections root. "
|
|
307
|
+
"Can be specified multiple times for multiple mappings"
|
|
308
|
+
),
|
|
309
|
+
multiple=True,
|
|
310
|
+
)
|
|
311
|
+
# this is for backwards compatibility only
|
|
312
|
+
@click.option(
|
|
313
|
+
"--path-mappings",
|
|
314
|
+
"--path_mappings",
|
|
315
|
+
default="",
|
|
316
|
+
help=(
|
|
317
|
+
"Maps path inside of containers to paths inside data-connections. "
|
|
318
|
+
"Should be a comma separated list of form: "
|
|
319
|
+
"<MAPPING_1>,<MAPPING_2>,... "
|
|
320
|
+
"where each mapping is of the form "
|
|
321
|
+
"<CONTAINER_PATH_1>:<CONNECTION_NAME_1>:<PATH_WITHIN_CONNECTION_1> and "
|
|
322
|
+
"omitting the path inside the connection defaults to the connections root. "
|
|
323
|
+
"Instead of a comma-separated list, consider passing --path-mapping multiple times."
|
|
324
|
+
),
|
|
325
|
+
)
|
|
326
|
+
def mmt(
|
|
327
|
+
name: Optional[str] = None,
|
|
328
|
+
num_machines: int = 2,
|
|
329
|
+
machine: str = "CPU",
|
|
330
|
+
command: Optional[str] = None,
|
|
331
|
+
studio: Optional[str] = None,
|
|
332
|
+
image: Optional[str] = None,
|
|
333
|
+
teamspace: Optional[str] = None,
|
|
334
|
+
org: Optional[str] = None,
|
|
335
|
+
user: Optional[str] = None,
|
|
336
|
+
cloud_account: Optional[str] = None,
|
|
337
|
+
env: Sequence[str] = (),
|
|
338
|
+
interruptible: bool = False,
|
|
339
|
+
image_credentials: Optional[str] = None,
|
|
340
|
+
cloud_account_auth: bool = False,
|
|
341
|
+
entrypoint: str = "sh -c",
|
|
342
|
+
path_mapping: Sequence[str] = (),
|
|
343
|
+
path_mappings: str = "",
|
|
344
|
+
artifacts_local: Optional[str] = None,
|
|
345
|
+
artifacts_remote: Optional[str] = None,
|
|
346
|
+
) -> None:
|
|
347
|
+
"""Run async workloads on multiple machines using a docker image."""
|
|
348
|
+
if name is None:
|
|
349
|
+
from datetime import datetime
|
|
350
|
+
|
|
351
|
+
timestr = datetime.now().strftime("%b-%d-%H_%M")
|
|
352
|
+
name = f"mmt-{timestr}"
|
|
353
|
+
|
|
354
|
+
if machine is None:
|
|
355
|
+
# TODO: infer from studio
|
|
356
|
+
machine = "CPU"
|
|
357
|
+
machine_enum: Union[str, Machine]
|
|
358
|
+
try:
|
|
359
|
+
machine_enum = getattr(Machine, machine.upper(), Machine(machine, machine))
|
|
360
|
+
except KeyError:
|
|
361
|
+
machine_enum = machine
|
|
362
|
+
|
|
363
|
+
resolved_teamspace = Teamspace(name=teamspace, org=org, user=user)
|
|
364
|
+
|
|
365
|
+
path_mappings_dict = _resolve_path_mapping(path_mappings=path_mappings)
|
|
366
|
+
for mapping in path_mapping:
|
|
367
|
+
path_mappings_dict.update(_resolve_path_mapping(path_mappings=mapping))
|
|
368
|
+
|
|
369
|
+
env_dict = {}
|
|
370
|
+
for e in env:
|
|
371
|
+
env_dict.update(_resolve_envs(e))
|
|
372
|
+
|
|
373
|
+
MMT.run(
|
|
374
|
+
name=name,
|
|
375
|
+
num_machines=num_machines,
|
|
376
|
+
machine=machine_enum,
|
|
377
|
+
command=command,
|
|
378
|
+
studio=studio,
|
|
379
|
+
image=image,
|
|
380
|
+
teamspace=resolved_teamspace,
|
|
381
|
+
org=org,
|
|
382
|
+
user=user,
|
|
383
|
+
cloud_account=cloud_account,
|
|
384
|
+
env=env_dict,
|
|
385
|
+
interruptible=interruptible,
|
|
386
|
+
image_credentials=image_credentials,
|
|
387
|
+
cloud_account_auth=cloud_account_auth,
|
|
388
|
+
entrypoint=entrypoint,
|
|
389
|
+
path_mappings=path_mappings_dict,
|
|
390
|
+
artifacts_local=artifacts_local,
|
|
391
|
+
artifacts_remote=artifacts_remote,
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
def _resolve_path_mapping(path_mappings: str) -> Dict[str, str]:
|
|
396
|
+
path_mappings = path_mappings.strip()
|
|
397
|
+
|
|
398
|
+
if not path_mappings:
|
|
399
|
+
return {}
|
|
400
|
+
|
|
401
|
+
path_mappings_dict = {}
|
|
402
|
+
for mapping in path_mappings.split(","):
|
|
403
|
+
if not mapping.strip():
|
|
404
|
+
continue
|
|
405
|
+
|
|
406
|
+
splits = str(mapping).split(":", 1)
|
|
407
|
+
if len(splits) != 2:
|
|
408
|
+
raise RuntimeError(
|
|
409
|
+
"Mapping needs to be of form <CONTAINER_PATH>:<CONNECTION_NAME>[:<PATH_WITHIN_CONNECTION>], "
|
|
410
|
+
f"but got {mapping}"
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
path_mappings_dict[splits[0].strip()] = splits[1].strip()
|
|
414
|
+
|
|
415
|
+
return path_mappings_dict
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
def _resolve_envs(envs: str) -> Dict[str, str]:
|
|
419
|
+
if not envs:
|
|
420
|
+
return {}
|
|
421
|
+
|
|
422
|
+
# backwards compatibility for supporting env as json dict
|
|
423
|
+
try:
|
|
424
|
+
env_dict = json.loads(envs)
|
|
425
|
+
if isinstance(env_dict, Mapping):
|
|
426
|
+
return dict(env_dict)
|
|
427
|
+
|
|
428
|
+
raise ValueError(f"Env {envs} cannot be parsed as environment variable")
|
|
429
|
+
except json.decoder.JSONDecodeError as e:
|
|
430
|
+
# resolve individual env vars
|
|
431
|
+
env_dict = {}
|
|
432
|
+
splits = envs.split("=", 1)
|
|
433
|
+
if len(splits) == 2:
|
|
434
|
+
key, value = splits
|
|
435
|
+
env_dict.update({key: value})
|
|
436
|
+
|
|
437
|
+
return env_dict
|
|
438
|
+
|
|
439
|
+
raise ValueError(f"Env {envs} cannot be parsed as environment variable: {e!s}") from e
|
|
440
|
+
|
|
441
|
+
return {}
|