lightning-sdk 0.1.51__py3-none-any.whl → 0.1.54__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lightning_sdk/__init__.py +1 -1
- lightning_sdk/ai_hub.py +16 -27
- lightning_sdk/api/ai_hub_api.py +7 -1
- lightning_sdk/api/deployment_api.py +9 -1
- lightning_sdk/api/job_api.py +1 -1
- lightning_sdk/api/lit_container_api.py +2 -1
- lightning_sdk/cli/run.py +25 -20
- lightning_sdk/deployment/deployment.py +12 -3
- lightning_sdk/job/base.py +21 -0
- lightning_sdk/job/job.py +1 -12
- lightning_sdk/job/v1.py +1 -32
- lightning_sdk/job/v2.py +6 -1
- lightning_sdk/lightning_cloud/openapi/__init__.py +14 -0
- lightning_sdk/lightning_cloud/openapi/api/assistants_service_api.py +105 -0
- lightning_sdk/lightning_cloud/openapi/api/cluster_service_api.py +417 -1
- lightning_sdk/lightning_cloud/openapi/api/file_system_service_api.py +105 -0
- lightning_sdk/lightning_cloud/openapi/api/storage_service_api.py +101 -0
- lightning_sdk/lightning_cloud/openapi/api/user_service_api.py +5 -1
- lightning_sdk/lightning_cloud/openapi/models/__init__.py +14 -0
- lightning_sdk/lightning_cloud/openapi/models/cluster_id_usagerestrictions_body.py +175 -0
- lightning_sdk/lightning_cloud/openapi/models/id_contactowner_body.py +149 -0
- lightning_sdk/lightning_cloud/openapi/models/metricsstream_create_body.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/usagerestrictions_id_body.py +175 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_assistant_model_status.py +4 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_cloud_provider.py +104 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_cloud_space_artifact_event.py +149 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_cloud_space_artifact_event_type.py +103 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_cluster_accelerator.py +81 -3
- lightning_sdk/lightning_cloud/openapi/models/v1_cluster_tagging_options.py +29 -3
- lightning_sdk/lightning_cloud/openapi/models/v1_cluster_usage_restriction.py +227 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_contact_assistant_owner_reason.py +102 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_contact_assistant_owner_response.py +97 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_delete_cluster_usage_restriction_response.py +97 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_deployment_api.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_filesystem_mmt.py +175 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_list_cluster_usage_restrictions_response.py +123 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_list_filesystem_mm_ts_response.py +123 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_metrics_stream.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_model.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_post_cloud_space_artifact_events_response.py +97 -0
- lightning_sdk/lightning_cloud/openapi/models/v1_resource_visibility.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_user_features.py +53 -1
- lightning_sdk/lightning_cloud/utils/data_connection.py +75 -7
- lightning_sdk/mmt/mmt.py +8 -7
- lightning_sdk/plugin.py +5 -3
- lightning_sdk/studio.py +68 -1
- {lightning_sdk-0.1.51.dist-info → lightning_sdk-0.1.54.dist-info}/METADATA +1 -1
- {lightning_sdk-0.1.51.dist-info → lightning_sdk-0.1.54.dist-info}/RECORD +52 -38
- {lightning_sdk-0.1.51.dist-info → lightning_sdk-0.1.54.dist-info}/LICENSE +0 -0
- {lightning_sdk-0.1.51.dist-info → lightning_sdk-0.1.54.dist-info}/WHEEL +0 -0
- {lightning_sdk-0.1.51.dist-info → lightning_sdk-0.1.54.dist-info}/entry_points.txt +0 -0
- {lightning_sdk-0.1.51.dist-info → lightning_sdk-0.1.54.dist-info}/top_level.txt +0 -0
|
@@ -41,25 +41,51 @@ class V1ResourceVisibility(object):
|
|
|
41
41
|
and the value is json key in definition.
|
|
42
42
|
"""
|
|
43
43
|
swagger_types = {
|
|
44
|
+
'all_org_users': 'bool',
|
|
44
45
|
'all_users': 'bool',
|
|
45
46
|
'user_ids': 'list[str]'
|
|
46
47
|
}
|
|
47
48
|
|
|
48
49
|
attribute_map = {
|
|
50
|
+
'all_org_users': 'allOrgUsers',
|
|
49
51
|
'all_users': 'allUsers',
|
|
50
52
|
'user_ids': 'userIds'
|
|
51
53
|
}
|
|
52
54
|
|
|
53
|
-
def __init__(self, all_users: 'bool' =None, user_ids: 'list[str]' =None): # noqa: E501
|
|
55
|
+
def __init__(self, all_org_users: 'bool' =None, all_users: 'bool' =None, user_ids: 'list[str]' =None): # noqa: E501
|
|
54
56
|
"""V1ResourceVisibility - a model defined in Swagger""" # noqa: E501
|
|
57
|
+
self._all_org_users = None
|
|
55
58
|
self._all_users = None
|
|
56
59
|
self._user_ids = None
|
|
57
60
|
self.discriminator = None
|
|
61
|
+
if all_org_users is not None:
|
|
62
|
+
self.all_org_users = all_org_users
|
|
58
63
|
if all_users is not None:
|
|
59
64
|
self.all_users = all_users
|
|
60
65
|
if user_ids is not None:
|
|
61
66
|
self.user_ids = user_ids
|
|
62
67
|
|
|
68
|
+
@property
|
|
69
|
+
def all_org_users(self) -> 'bool':
|
|
70
|
+
"""Gets the all_org_users of this V1ResourceVisibility. # noqa: E501
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
:return: The all_org_users of this V1ResourceVisibility. # noqa: E501
|
|
74
|
+
:rtype: bool
|
|
75
|
+
"""
|
|
76
|
+
return self._all_org_users
|
|
77
|
+
|
|
78
|
+
@all_org_users.setter
|
|
79
|
+
def all_org_users(self, all_org_users: 'bool'):
|
|
80
|
+
"""Sets the all_org_users of this V1ResourceVisibility.
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
:param all_org_users: The all_org_users of this V1ResourceVisibility. # noqa: E501
|
|
84
|
+
:type: bool
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
self._all_org_users = all_org_users
|
|
88
|
+
|
|
63
89
|
@property
|
|
64
90
|
def all_users(self) -> 'bool':
|
|
65
91
|
"""Gets the all_users of this V1ResourceVisibility. # noqa: E501
|
|
@@ -62,12 +62,14 @@ class V1UserFeatures(object):
|
|
|
62
62
|
'deployment_customize_api': 'bool',
|
|
63
63
|
'deployment_data_path': 'bool',
|
|
64
64
|
'deployment_persistent_disk': 'bool',
|
|
65
|
+
'deployment_version_upgrade': 'bool',
|
|
65
66
|
'deployment_version_visibility': 'bool',
|
|
66
67
|
'docs_agent': 'bool',
|
|
67
68
|
'drive_v2': 'bool',
|
|
68
69
|
'enable_crypto_crackdown': 'bool',
|
|
69
70
|
'enable_efs': 'bool',
|
|
70
71
|
'enable_storage_limits': 'bool',
|
|
72
|
+
'fair_share': 'bool',
|
|
71
73
|
'featured_studios_admin': 'bool',
|
|
72
74
|
'filesystem_optimisation': 'bool',
|
|
73
75
|
'inference_job_deployment_plugin': 'bool',
|
|
@@ -146,12 +148,14 @@ class V1UserFeatures(object):
|
|
|
146
148
|
'deployment_customize_api': 'deploymentCustomizeApi',
|
|
147
149
|
'deployment_data_path': 'deploymentDataPath',
|
|
148
150
|
'deployment_persistent_disk': 'deploymentPersistentDisk',
|
|
151
|
+
'deployment_version_upgrade': 'deploymentVersionUpgrade',
|
|
149
152
|
'deployment_version_visibility': 'deploymentVersionVisibility',
|
|
150
153
|
'docs_agent': 'docsAgent',
|
|
151
154
|
'drive_v2': 'driveV2',
|
|
152
155
|
'enable_crypto_crackdown': 'enableCryptoCrackdown',
|
|
153
156
|
'enable_efs': 'enableEfs',
|
|
154
157
|
'enable_storage_limits': 'enableStorageLimits',
|
|
158
|
+
'fair_share': 'fairShare',
|
|
155
159
|
'featured_studios_admin': 'featuredStudiosAdmin',
|
|
156
160
|
'filesystem_optimisation': 'filesystemOptimisation',
|
|
157
161
|
'inference_job_deployment_plugin': 'inferenceJobDeploymentPlugin',
|
|
@@ -208,7 +212,7 @@ class V1UserFeatures(object):
|
|
|
208
212
|
'writable_data_connections': 'writableDataConnections'
|
|
209
213
|
}
|
|
210
214
|
|
|
211
|
-
def __init__(self, advanced_deployment_autoscaling: 'bool' =None, affiliate_links: 'bool' =None, agents_v2: 'bool' =None, ai_hub_monetization: 'bool' =None, auto_fast_load: 'bool' =None, auto_join_orgs: 'bool' =None, b2c_experience: 'bool' =None, cap_add: 'list[str]' =None, cap_drop: 'list[str]' =None, capacity_reservation_byoc: 'bool' =None, capacity_reservation_dry_run: 'bool' =None, code_tab: 'bool' =None, collab_screen_sharing: 'bool' =None, cost_attribution_settings: 'bool' =None, custom_app_domain: 'bool' =None, custom_instance_types: 'bool' =None, default_one_cluster: 'bool' =None, deployment_alerts: 'bool' =None, deployment_customize_api: 'bool' =None, deployment_data_path: 'bool' =None, deployment_persistent_disk: 'bool' =None, deployment_version_visibility: 'bool' =None, docs_agent: 'bool' =None, drive_v2: 'bool' =None, enable_crypto_crackdown: 'bool' =None, enable_efs: 'bool' =None, enable_storage_limits: 'bool' =None, featured_studios_admin: 'bool' =None, filesystem_optimisation: 'bool' =None, inference_job_deployment_plugin: 'bool' =None, instant_capacity_reservation: 'bool' =None, job_artifacts_v2: 'bool' =None, jobs_init: 'bool' =None, jobs_v2: 'bool' =None, landing_studios: 'bool' =None, lightning_registry: 'bool' =None, lit_logger: 'bool' =None, lit_logger_storage_v2: 'bool' =None, mmt_fault_tolerance: 'bool' =None, mmt_strategy_selector: 'bool' =None, mmt_v2: 'bool' =None, multiple_deployment_versions: 'bool' =None, multiple_studio_versions: 'bool' =None, org_level_member_permissions: 'bool' =None, pipelines: 'bool' =None, plugin_biz_chat: 'bool' =None, plugin_distributed: 'bool' =None, plugin_fiftyone: 'bool' =None, plugin_inference: 'bool' =None, plugin_label_studio: 'bool' =None, plugin_langflow: 'bool' =None, plugin_lightning_apps: 'bool' =None, plugin_lightning_apps_distributed: 'bool' =None, plugin_mage_ai: 'bool' =None, plugin_milvus: 'bool' =None, plugin_python_profiler: 'bool' =None, plugin_react: 'bool' =None, plugin_service: 'bool' =None, plugin_sweeps: 'bool' =None, plugin_weviate: 'bool' =None, pricing_updates: 'bool' =None, product_generator: 'bool' =None, project_selector: 'bool' =None, restart_ide_on_hang: 'bool' =None, restartable_jobs: 'bool' =None, runnable_public_studio_page: 'bool' =None, security_docs: 'bool' =None, show_dev_admin: 'bool' =None, slurm: 'bool' =None, slurm_machine_selector: 'bool' =None, snapshotter_service: 'bool' =None, snowflake_connection: 'bool' =None, spot_v2: 'bool' =None, studio_config: 'bool' =None, studio_on_stop: 'bool' =None, studio_version_visibility: 'bool' =None, teamspace_storage_tab: 'bool' =None, trainium2: 'bool' =None, use_rclone_mounts_only: 'bool' =None, vultr: 'bool' =None, writable_data_connections: 'bool' =None): # noqa: E501
|
|
215
|
+
def __init__(self, advanced_deployment_autoscaling: 'bool' =None, affiliate_links: 'bool' =None, agents_v2: 'bool' =None, ai_hub_monetization: 'bool' =None, auto_fast_load: 'bool' =None, auto_join_orgs: 'bool' =None, b2c_experience: 'bool' =None, cap_add: 'list[str]' =None, cap_drop: 'list[str]' =None, capacity_reservation_byoc: 'bool' =None, capacity_reservation_dry_run: 'bool' =None, code_tab: 'bool' =None, collab_screen_sharing: 'bool' =None, cost_attribution_settings: 'bool' =None, custom_app_domain: 'bool' =None, custom_instance_types: 'bool' =None, default_one_cluster: 'bool' =None, deployment_alerts: 'bool' =None, deployment_customize_api: 'bool' =None, deployment_data_path: 'bool' =None, deployment_persistent_disk: 'bool' =None, deployment_version_upgrade: 'bool' =None, deployment_version_visibility: 'bool' =None, docs_agent: 'bool' =None, drive_v2: 'bool' =None, enable_crypto_crackdown: 'bool' =None, enable_efs: 'bool' =None, enable_storage_limits: 'bool' =None, fair_share: 'bool' =None, featured_studios_admin: 'bool' =None, filesystem_optimisation: 'bool' =None, inference_job_deployment_plugin: 'bool' =None, instant_capacity_reservation: 'bool' =None, job_artifacts_v2: 'bool' =None, jobs_init: 'bool' =None, jobs_v2: 'bool' =None, landing_studios: 'bool' =None, lightning_registry: 'bool' =None, lit_logger: 'bool' =None, lit_logger_storage_v2: 'bool' =None, mmt_fault_tolerance: 'bool' =None, mmt_strategy_selector: 'bool' =None, mmt_v2: 'bool' =None, multiple_deployment_versions: 'bool' =None, multiple_studio_versions: 'bool' =None, org_level_member_permissions: 'bool' =None, pipelines: 'bool' =None, plugin_biz_chat: 'bool' =None, plugin_distributed: 'bool' =None, plugin_fiftyone: 'bool' =None, plugin_inference: 'bool' =None, plugin_label_studio: 'bool' =None, plugin_langflow: 'bool' =None, plugin_lightning_apps: 'bool' =None, plugin_lightning_apps_distributed: 'bool' =None, plugin_mage_ai: 'bool' =None, plugin_milvus: 'bool' =None, plugin_python_profiler: 'bool' =None, plugin_react: 'bool' =None, plugin_service: 'bool' =None, plugin_sweeps: 'bool' =None, plugin_weviate: 'bool' =None, pricing_updates: 'bool' =None, product_generator: 'bool' =None, project_selector: 'bool' =None, restart_ide_on_hang: 'bool' =None, restartable_jobs: 'bool' =None, runnable_public_studio_page: 'bool' =None, security_docs: 'bool' =None, show_dev_admin: 'bool' =None, slurm: 'bool' =None, slurm_machine_selector: 'bool' =None, snapshotter_service: 'bool' =None, snowflake_connection: 'bool' =None, spot_v2: 'bool' =None, studio_config: 'bool' =None, studio_on_stop: 'bool' =None, studio_version_visibility: 'bool' =None, teamspace_storage_tab: 'bool' =None, trainium2: 'bool' =None, use_rclone_mounts_only: 'bool' =None, vultr: 'bool' =None, writable_data_connections: 'bool' =None): # noqa: E501
|
|
212
216
|
"""V1UserFeatures - a model defined in Swagger""" # noqa: E501
|
|
213
217
|
self._advanced_deployment_autoscaling = None
|
|
214
218
|
self._affiliate_links = None
|
|
@@ -231,12 +235,14 @@ class V1UserFeatures(object):
|
|
|
231
235
|
self._deployment_customize_api = None
|
|
232
236
|
self._deployment_data_path = None
|
|
233
237
|
self._deployment_persistent_disk = None
|
|
238
|
+
self._deployment_version_upgrade = None
|
|
234
239
|
self._deployment_version_visibility = None
|
|
235
240
|
self._docs_agent = None
|
|
236
241
|
self._drive_v2 = None
|
|
237
242
|
self._enable_crypto_crackdown = None
|
|
238
243
|
self._enable_efs = None
|
|
239
244
|
self._enable_storage_limits = None
|
|
245
|
+
self._fair_share = None
|
|
240
246
|
self._featured_studios_admin = None
|
|
241
247
|
self._filesystem_optimisation = None
|
|
242
248
|
self._inference_job_deployment_plugin = None
|
|
@@ -334,6 +340,8 @@ class V1UserFeatures(object):
|
|
|
334
340
|
self.deployment_data_path = deployment_data_path
|
|
335
341
|
if deployment_persistent_disk is not None:
|
|
336
342
|
self.deployment_persistent_disk = deployment_persistent_disk
|
|
343
|
+
if deployment_version_upgrade is not None:
|
|
344
|
+
self.deployment_version_upgrade = deployment_version_upgrade
|
|
337
345
|
if deployment_version_visibility is not None:
|
|
338
346
|
self.deployment_version_visibility = deployment_version_visibility
|
|
339
347
|
if docs_agent is not None:
|
|
@@ -346,6 +354,8 @@ class V1UserFeatures(object):
|
|
|
346
354
|
self.enable_efs = enable_efs
|
|
347
355
|
if enable_storage_limits is not None:
|
|
348
356
|
self.enable_storage_limits = enable_storage_limits
|
|
357
|
+
if fair_share is not None:
|
|
358
|
+
self.fair_share = fair_share
|
|
349
359
|
if featured_studios_admin is not None:
|
|
350
360
|
self.featured_studios_admin = featured_studios_admin
|
|
351
361
|
if filesystem_optimisation is not None:
|
|
@@ -896,6 +906,27 @@ class V1UserFeatures(object):
|
|
|
896
906
|
|
|
897
907
|
self._deployment_persistent_disk = deployment_persistent_disk
|
|
898
908
|
|
|
909
|
+
@property
|
|
910
|
+
def deployment_version_upgrade(self) -> 'bool':
|
|
911
|
+
"""Gets the deployment_version_upgrade of this V1UserFeatures. # noqa: E501
|
|
912
|
+
|
|
913
|
+
|
|
914
|
+
:return: The deployment_version_upgrade of this V1UserFeatures. # noqa: E501
|
|
915
|
+
:rtype: bool
|
|
916
|
+
"""
|
|
917
|
+
return self._deployment_version_upgrade
|
|
918
|
+
|
|
919
|
+
@deployment_version_upgrade.setter
|
|
920
|
+
def deployment_version_upgrade(self, deployment_version_upgrade: 'bool'):
|
|
921
|
+
"""Sets the deployment_version_upgrade of this V1UserFeatures.
|
|
922
|
+
|
|
923
|
+
|
|
924
|
+
:param deployment_version_upgrade: The deployment_version_upgrade of this V1UserFeatures. # noqa: E501
|
|
925
|
+
:type: bool
|
|
926
|
+
"""
|
|
927
|
+
|
|
928
|
+
self._deployment_version_upgrade = deployment_version_upgrade
|
|
929
|
+
|
|
899
930
|
@property
|
|
900
931
|
def deployment_version_visibility(self) -> 'bool':
|
|
901
932
|
"""Gets the deployment_version_visibility of this V1UserFeatures. # noqa: E501
|
|
@@ -1022,6 +1053,27 @@ class V1UserFeatures(object):
|
|
|
1022
1053
|
|
|
1023
1054
|
self._enable_storage_limits = enable_storage_limits
|
|
1024
1055
|
|
|
1056
|
+
@property
|
|
1057
|
+
def fair_share(self) -> 'bool':
|
|
1058
|
+
"""Gets the fair_share of this V1UserFeatures. # noqa: E501
|
|
1059
|
+
|
|
1060
|
+
|
|
1061
|
+
:return: The fair_share of this V1UserFeatures. # noqa: E501
|
|
1062
|
+
:rtype: bool
|
|
1063
|
+
"""
|
|
1064
|
+
return self._fair_share
|
|
1065
|
+
|
|
1066
|
+
@fair_share.setter
|
|
1067
|
+
def fair_share(self, fair_share: 'bool'):
|
|
1068
|
+
"""Sets the fair_share of this V1UserFeatures.
|
|
1069
|
+
|
|
1070
|
+
|
|
1071
|
+
:param fair_share: The fair_share of this V1UserFeatures. # noqa: E501
|
|
1072
|
+
:type: bool
|
|
1073
|
+
"""
|
|
1074
|
+
|
|
1075
|
+
self._fair_share = fair_share
|
|
1076
|
+
|
|
1025
1077
|
@property
|
|
1026
1078
|
def featured_studios_admin(self) -> 'bool':
|
|
1027
1079
|
"""Gets the featured_studios_admin of this V1UserFeatures. # noqa: E501
|
|
@@ -4,7 +4,7 @@ from lightning_sdk.lightning_cloud import rest_client
|
|
|
4
4
|
from lightning_sdk.lightning_cloud.openapi import Create, V1AwsDataConnection, V1S3FolderDataConnection, V1EfsConfig
|
|
5
5
|
from lightning_sdk.lightning_cloud.openapi.rest import ApiException
|
|
6
6
|
import urllib3
|
|
7
|
-
|
|
7
|
+
import os
|
|
8
8
|
|
|
9
9
|
def add_s3_connection(bucket_name: str, region: str = "us-east-1", create_timeout: int = 15) -> None:
|
|
10
10
|
"""Utility to add a data connection."""
|
|
@@ -26,7 +26,7 @@ def add_s3_connection(bucket_name: str, region: str = "us-east-1", create_timeou
|
|
|
26
26
|
aws=V1AwsDataConnection(
|
|
27
27
|
source=f"s3://{bucket_name}",
|
|
28
28
|
region=region
|
|
29
|
-
|
|
29
|
+
))
|
|
30
30
|
try:
|
|
31
31
|
client.data_connection_service_create_data_connection(body, project_id)
|
|
32
32
|
except (ApiException, urllib3.exceptions.HTTPError) as ex:
|
|
@@ -44,7 +44,7 @@ def add_s3_connection(bucket_name: str, region: str = "us-east-1", create_timeou
|
|
|
44
44
|
|
|
45
45
|
while not os.path.isdir(f"/teamspace/s3_connections/{bucket_name}") and (time() - start) < create_timeout:
|
|
46
46
|
sleep(1)
|
|
47
|
-
|
|
47
|
+
|
|
48
48
|
return
|
|
49
49
|
|
|
50
50
|
def create_s3_folder(folder_name: str, create_timeout: int = 15) -> None:
|
|
@@ -64,7 +64,7 @@ def create_s3_folder(folder_name: str, create_timeout: int = 15) -> None:
|
|
|
64
64
|
|
|
65
65
|
# Get existing data connections
|
|
66
66
|
data_connections = client.data_connection_service_list_data_connections(project_id).data_connections
|
|
67
|
-
|
|
67
|
+
|
|
68
68
|
for connection in data_connections:
|
|
69
69
|
existing_folder_name = getattr(connection, 'name', None)
|
|
70
70
|
isS3Folder = getattr(connection, 's3_folder', None) is not None
|
|
@@ -102,13 +102,14 @@ def create_s3_folder(folder_name: str, create_timeout: int = 15) -> None:
|
|
|
102
102
|
|
|
103
103
|
return
|
|
104
104
|
|
|
105
|
-
def create_efs_folder(folder_name: str, region: str) -> None:
|
|
105
|
+
def create_efs_folder(folder_name: str, region: str, create_timeout: int = 60) -> None:
|
|
106
106
|
"""
|
|
107
107
|
Utility function to create a EFS folder.
|
|
108
108
|
|
|
109
109
|
Args:
|
|
110
110
|
folder_name: The name of the folder to create.
|
|
111
111
|
region: the region to create the efs in. Could be something like "us-east-1"
|
|
112
|
+
create_timeout: Timeout for creating the efs folder. Defaults to 60 seconds
|
|
112
113
|
"""
|
|
113
114
|
client = rest_client.LightningClient(retry=False)
|
|
114
115
|
|
|
@@ -137,7 +138,7 @@ def create_efs_folder(folder_name: str, region: str) -> None:
|
|
|
137
138
|
efs=V1EfsConfig(region=region),
|
|
138
139
|
)
|
|
139
140
|
try:
|
|
140
|
-
client.data_connection_service_create_data_connection(body, project_id)
|
|
141
|
+
connection = client.data_connection_service_create_data_connection(body, project_id)
|
|
141
142
|
except ApiException as e:
|
|
142
143
|
# Note: This function can be called in a distributed way.
|
|
143
144
|
# There is a race condition where one machine might create the entry before another machine
|
|
@@ -150,6 +151,72 @@ def create_efs_folder(folder_name: str, region: str) -> None:
|
|
|
150
151
|
except urllib3.exceptions.HTTPError as e:
|
|
151
152
|
raise e from None
|
|
152
153
|
|
|
154
|
+
start = time()
|
|
155
|
+
while True:
|
|
156
|
+
if time() - start > create_timeout:
|
|
157
|
+
print(f"Dataconnection {connection.name} didn't become accessible withing {create_timeout} seconds!")
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
data_connection = client.data_connection_service_get_data_connection(project_id=project_id, id=connection.id)
|
|
161
|
+
if data_connection.accessible:
|
|
162
|
+
break
|
|
163
|
+
|
|
164
|
+
sleep(1)
|
|
165
|
+
|
|
166
|
+
def add_efs_connection(name: str, filesystem_id: str, region: str = "us-east-1", create_timeout: int = 60) -> None:
|
|
167
|
+
"""Utility to add an existing EFS filesystem as a data connection.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
name: The name to give to the data connection
|
|
171
|
+
filesystem_id: The ID of the existing EFS filesystem (e.g., 'fs-1234567')
|
|
172
|
+
region: AWS region where the EFS filesystem exists (default: 'us-east-1')
|
|
173
|
+
create_timeout: Timeout in seconds to wait for the connection to be accessible (default: 60)
|
|
174
|
+
"""
|
|
175
|
+
client = rest_client.LightningClient(retry=False)
|
|
176
|
+
|
|
177
|
+
project_id = os.getenv("LIGHTNING_CLOUD_PROJECT_ID")
|
|
178
|
+
cluster_id = os.getenv("LIGHTNING_CLUSTER_ID")
|
|
179
|
+
|
|
180
|
+
data_connections = client.data_connection_service_list_data_connections(project_id).data_connections
|
|
181
|
+
|
|
182
|
+
if any(d for d in data_connections if d.name == name):
|
|
183
|
+
return
|
|
184
|
+
|
|
185
|
+
body = Create(
|
|
186
|
+
name=name,
|
|
187
|
+
create_resources=False, # Don't create new EFS since we're connecting to existing one
|
|
188
|
+
cluster_id=cluster_id,
|
|
189
|
+
access_cluster_ids=[cluster_id],
|
|
190
|
+
force=True,
|
|
191
|
+
writable=True,
|
|
192
|
+
efs=V1EfsConfig(
|
|
193
|
+
region=region,
|
|
194
|
+
file_system_id=filesystem_id,
|
|
195
|
+
)
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
try:
|
|
199
|
+
connection = client.data_connection_service_create_data_connection(body, project_id)
|
|
200
|
+
except (ApiException, urllib3.exceptions.HTTPError) as ex:
|
|
201
|
+
if isinstance(ex, ApiException) and 'duplicate key value violates unique constraint' in str(ex.body):
|
|
202
|
+
pass
|
|
203
|
+
else:
|
|
204
|
+
raise ex
|
|
205
|
+
|
|
206
|
+
start = time()
|
|
207
|
+
while True:
|
|
208
|
+
if time() - start > create_timeout:
|
|
209
|
+
print(f"Dataconnection {connection.name} didn't become accessible withing {create_timeout} seconds!")
|
|
210
|
+
break
|
|
211
|
+
|
|
212
|
+
data_connection = client.data_connection_service_get_data_connection(project_id=project_id, id=connection.id)
|
|
213
|
+
if data_connection.accessible:
|
|
214
|
+
break
|
|
215
|
+
|
|
216
|
+
sleep(1)
|
|
217
|
+
|
|
218
|
+
return
|
|
219
|
+
|
|
153
220
|
def delete_data_connection(name: str):
|
|
154
221
|
"""Utility to delete a data connection
|
|
155
222
|
|
|
@@ -181,4 +248,5 @@ def delete_data_connection(name: str):
|
|
|
181
248
|
# It may exhaust the attempts before the connection is actually unmounted from the studio.
|
|
182
249
|
# for now it's best to actually stop the studio and all other things where the connection
|
|
183
250
|
# is mounted before trying to delete it
|
|
184
|
-
raise e from None
|
|
251
|
+
raise e from None
|
|
252
|
+
|
lightning_sdk/mmt/mmt.py
CHANGED
|
@@ -2,7 +2,6 @@ from functools import lru_cache
|
|
|
2
2
|
from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
|
|
3
3
|
|
|
4
4
|
from lightning_sdk.api.user_api import UserApi
|
|
5
|
-
from lightning_sdk.job.job import _has_jobs_v2
|
|
6
5
|
from lightning_sdk.mmt.base import MMTMachine, _BaseMMT
|
|
7
6
|
from lightning_sdk.mmt.v1 import _MMTV1
|
|
8
7
|
from lightning_sdk.mmt.v2 import _MMTV2
|
|
@@ -21,16 +20,18 @@ _logger = _setup_logger(__name__)
|
|
|
21
20
|
|
|
22
21
|
@lru_cache(maxsize=None)
|
|
23
22
|
def _has_mmt_v2() -> bool:
|
|
24
|
-
# users need both mmtv2 and jobsv2 flags in order for mmtv2 to work correctly
|
|
25
|
-
if not _has_jobs_v2():
|
|
26
|
-
return False
|
|
27
|
-
|
|
28
23
|
api = UserApi()
|
|
29
24
|
try:
|
|
30
|
-
|
|
25
|
+
feature_flags = api._get_feature_flags()
|
|
31
26
|
except Exception:
|
|
32
27
|
return False
|
|
33
28
|
|
|
29
|
+
try:
|
|
30
|
+
return feature_flags.mmt_v2
|
|
31
|
+
except AttributeError:
|
|
32
|
+
# Feature flag doesn't exist anymore, so return True
|
|
33
|
+
return True
|
|
34
|
+
|
|
34
35
|
|
|
35
36
|
class MMT(_BaseMMT):
|
|
36
37
|
"""Class to submit and manage multi-machine jobs on the Lightning AI Platform."""
|
|
@@ -120,7 +121,7 @@ class MMT(_BaseMMT):
|
|
|
120
121
|
Args:
|
|
121
122
|
name: The name of the job. Needs to be unique within the teamspace.
|
|
122
123
|
machine: The machine type to run the job on. One of {", ".join(_MACHINE_VALUES)}.
|
|
123
|
-
|
|
124
|
+
num_machines: The number of machines to run on.
|
|
124
125
|
command: The command to run inside your job. Required if using a studio. Optional if using an image.
|
|
125
126
|
If not provided for images, will run the container entrypoint and default command.
|
|
126
127
|
studio: The studio env to run the job with. Mutually exclusive with image.
|
lightning_sdk/plugin.py
CHANGED
|
@@ -432,6 +432,8 @@ def _success_message(resp: Union["Externalv1LightningappInstance", Job], plugin_
|
|
|
432
432
|
def forced_v1(cls: Any) -> Generator[Any, None, None]:
|
|
433
433
|
"""Forces to use the v1 version of a class when using a class with multiple backends."""
|
|
434
434
|
orig_val = getattr(cls, "_force_v1", False)
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
435
|
+
try:
|
|
436
|
+
cls._force_v1 = True
|
|
437
|
+
yield cls
|
|
438
|
+
finally:
|
|
439
|
+
cls._force_v1 = orig_val
|
lightning_sdk/studio.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import warnings
|
|
3
|
-
from typing import TYPE_CHECKING, Any, Mapping, Optional, Tuple, Union
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Dict, Mapping, Optional, Tuple, Union
|
|
4
4
|
|
|
5
5
|
from lightning_sdk.api.studio_api import StudioApi
|
|
6
6
|
from lightning_sdk.api.utils import _machine_to_compute_name
|
|
@@ -14,6 +14,8 @@ from lightning_sdk.user import User
|
|
|
14
14
|
from lightning_sdk.utils.resolve import _resolve_deprecated_cluster, _resolve_teamspace, _setup_logger
|
|
15
15
|
|
|
16
16
|
if TYPE_CHECKING:
|
|
17
|
+
from lightning_sdk.job import Job
|
|
18
|
+
from lightning_sdk.mmt import MMT
|
|
17
19
|
from lightning_sdk.plugin import Plugin
|
|
18
20
|
|
|
19
21
|
_logger = _setup_logger(__name__)
|
|
@@ -281,6 +283,71 @@ class Studio:
|
|
|
281
283
|
cloud_account=self._studio.cluster_id,
|
|
282
284
|
)
|
|
283
285
|
|
|
286
|
+
def run_job(
|
|
287
|
+
self,
|
|
288
|
+
name: str,
|
|
289
|
+
machine: Union["Machine", str],
|
|
290
|
+
command: str,
|
|
291
|
+
env: Optional[Dict[str, str]] = None,
|
|
292
|
+
interruptible: bool = False,
|
|
293
|
+
) -> "Job":
|
|
294
|
+
"""Run async workloads using the compute environment from your studio.
|
|
295
|
+
|
|
296
|
+
Args:
|
|
297
|
+
name: The name of the job. Needs to be unique within the teamspace.
|
|
298
|
+
machine: The machine type to run the job on. One of {", ".join(_MACHINE_VALUES)}.
|
|
299
|
+
command: The command to run inside your job.
|
|
300
|
+
env: Environment variables to set inside the job.
|
|
301
|
+
interruptible: Whether the job should run on interruptible instances. They are cheaper but can be preempted.
|
|
302
|
+
"""
|
|
303
|
+
from lightning_sdk.job import Job
|
|
304
|
+
|
|
305
|
+
return Job.run(
|
|
306
|
+
name=name,
|
|
307
|
+
machine=machine,
|
|
308
|
+
command=command,
|
|
309
|
+
studio=self,
|
|
310
|
+
image=None,
|
|
311
|
+
teamspace=self.teamspace,
|
|
312
|
+
cloud_account=self.cloud_account,
|
|
313
|
+
env=env,
|
|
314
|
+
interruptible=interruptible,
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
def run_mmt(
|
|
318
|
+
self,
|
|
319
|
+
name: str,
|
|
320
|
+
num_machines: int,
|
|
321
|
+
machine: Union["Machine", str],
|
|
322
|
+
command: str,
|
|
323
|
+
env: Optional[Dict[str, str]] = None,
|
|
324
|
+
interruptible: bool = False,
|
|
325
|
+
) -> "MMT":
|
|
326
|
+
"""Run async workloads using the compute environment from your studio.
|
|
327
|
+
|
|
328
|
+
Args:
|
|
329
|
+
name: The name of the job. Needs to be unique within the teamspace.
|
|
330
|
+
num_machines: The number of machines to run on.
|
|
331
|
+
machine: The machine type to run the job on. One of {", ".join(_MACHINE_VALUES)}.
|
|
332
|
+
command: The command to run inside your job.
|
|
333
|
+
env: Environment variables to set inside the job.
|
|
334
|
+
interruptible: Whether the job should run on interruptible instances. They are cheaper but can be preempted.
|
|
335
|
+
"""
|
|
336
|
+
from lightning_sdk.mmt import MMT
|
|
337
|
+
|
|
338
|
+
return MMT.run(
|
|
339
|
+
name=name,
|
|
340
|
+
num_machines=num_machines,
|
|
341
|
+
machine=machine,
|
|
342
|
+
command=command,
|
|
343
|
+
studio=self,
|
|
344
|
+
image=None,
|
|
345
|
+
teamspace=self.teamspace,
|
|
346
|
+
cloud_account=self.cloud_account,
|
|
347
|
+
env=env,
|
|
348
|
+
interruptible=interruptible,
|
|
349
|
+
)
|
|
350
|
+
|
|
284
351
|
@property
|
|
285
352
|
def auto_sleep(self) -> bool:
|
|
286
353
|
"""Returns if a Studio has auto-sleep enabled."""
|