lightning-sdk 0.1.48__py3-none-any.whl → 0.1.50__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lightning_sdk/__init__.py +3 -1
- lightning_sdk/api/job_api.py +13 -6
- lightning_sdk/api/lit_container_api.py +37 -1
- lightning_sdk/api/mmt_api.py +12 -6
- lightning_sdk/api/utils.py +7 -0
- lightning_sdk/cli/download.py +20 -1
- lightning_sdk/cli/entrypoint.py +11 -0
- lightning_sdk/cli/list.py +60 -2
- lightning_sdk/cli/run.py +19 -4
- lightning_sdk/cli/upload.py +32 -1
- lightning_sdk/job/base.py +23 -4
- lightning_sdk/job/job.py +4 -3
- lightning_sdk/job/v1.py +4 -4
- lightning_sdk/job/v2.py +7 -10
- lightning_sdk/job/work.py +2 -2
- lightning_sdk/lightning_cloud/openapi/models/v1_cluster_spec.py +1 -29
- lightning_sdk/lightning_cloud/openapi/models/v1_lambda_labs_direct_v1.py +31 -3
- lightning_sdk/lightning_cloud/openapi/models/v1_user_features.py +27 -1
- lightning_sdk/lightning_cloud/openapi/models/v1_vultr_direct_v1.py +27 -1
- lightning_sdk/lit_container.py +40 -0
- lightning_sdk/mmt/base.py +22 -5
- lightning_sdk/mmt/mmt.py +5 -3
- lightning_sdk/mmt/v1.py +5 -3
- lightning_sdk/mmt/v2.py +11 -10
- {lightning_sdk-0.1.48.dist-info → lightning_sdk-0.1.50.dist-info}/METADATA +1 -1
- {lightning_sdk-0.1.48.dist-info → lightning_sdk-0.1.50.dist-info}/RECORD +30 -30
- {lightning_sdk-0.1.48.dist-info → lightning_sdk-0.1.50.dist-info}/LICENSE +0 -0
- {lightning_sdk-0.1.48.dist-info → lightning_sdk-0.1.50.dist-info}/WHEEL +0 -0
- {lightning_sdk-0.1.48.dist-info → lightning_sdk-0.1.50.dist-info}/entry_points.txt +0 -0
- {lightning_sdk-0.1.48.dist-info → lightning_sdk-0.1.50.dist-info}/top_level.txt +0 -0
lightning_sdk/job/work.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import TYPE_CHECKING, Any, Optional, Protocol
|
|
1
|
+
from typing import TYPE_CHECKING, Any, Optional, Protocol, Union
|
|
2
2
|
|
|
3
3
|
from lightning_sdk.api.job_api import JobApiV1
|
|
4
4
|
|
|
@@ -51,7 +51,7 @@ class Work:
|
|
|
51
51
|
return self._job._name_filter(self._guaranteed_work.name)
|
|
52
52
|
|
|
53
53
|
@property
|
|
54
|
-
def machine(self) -> "Machine":
|
|
54
|
+
def machine(self) -> Union["Machine", str]:
|
|
55
55
|
return self._job_api.get_machine_from_work(self._guaranteed_work)
|
|
56
56
|
|
|
57
57
|
@property
|
|
@@ -42,7 +42,6 @@ class V1ClusterSpec(object):
|
|
|
42
42
|
"""
|
|
43
43
|
swagger_types = {
|
|
44
44
|
'auth_token': 'str',
|
|
45
|
-
'available_instance_types': 'list[str]',
|
|
46
45
|
'aws_v1': 'V1AWSDirectV1',
|
|
47
46
|
'cluster_type': 'V1ClusterType',
|
|
48
47
|
'deletion_options': 'V1ClusterDeletionOptions',
|
|
@@ -63,7 +62,6 @@ class V1ClusterSpec(object):
|
|
|
63
62
|
|
|
64
63
|
attribute_map = {
|
|
65
64
|
'auth_token': 'authToken',
|
|
66
|
-
'available_instance_types': 'availableInstanceTypes',
|
|
67
65
|
'aws_v1': 'awsV1',
|
|
68
66
|
'cluster_type': 'clusterType',
|
|
69
67
|
'deletion_options': 'deletionOptions',
|
|
@@ -82,10 +80,9 @@ class V1ClusterSpec(object):
|
|
|
82
80
|
'vultr_v1': 'vultrV1'
|
|
83
81
|
}
|
|
84
82
|
|
|
85
|
-
def __init__(self, auth_token: 'str' =None,
|
|
83
|
+
def __init__(self, auth_token: 'str' =None, aws_v1: 'V1AWSDirectV1' =None, cluster_type: 'V1ClusterType' =None, deletion_options: 'V1ClusterDeletionOptions' =None, desired_state: 'V1ClusterState' =None, domain: 'str' =None, freeze_accelerators: 'bool' =None, google_cloud_v1: 'V1GoogleCloudDirectV1' =None, insurer_disabled: 'bool' =None, lambda_labs_v1: 'V1LambdaLabsDirectV1' =None, overprovisioning: 'list[V1InstanceOverprovisioningSpec]' =None, pause_automation: 'bool' =None, security_options: 'V1ClusterSecurityOptions' =None, slurm_v1: 'V1SlurmV1' =None, tagging_options: 'V1ClusterTaggingOptions' =None, user_id: 'str' =None, vultr_v1: 'V1VultrDirectV1' =None): # noqa: E501
|
|
86
84
|
"""V1ClusterSpec - a model defined in Swagger""" # noqa: E501
|
|
87
85
|
self._auth_token = None
|
|
88
|
-
self._available_instance_types = None
|
|
89
86
|
self._aws_v1 = None
|
|
90
87
|
self._cluster_type = None
|
|
91
88
|
self._deletion_options = None
|
|
@@ -105,8 +102,6 @@ class V1ClusterSpec(object):
|
|
|
105
102
|
self.discriminator = None
|
|
106
103
|
if auth_token is not None:
|
|
107
104
|
self.auth_token = auth_token
|
|
108
|
-
if available_instance_types is not None:
|
|
109
|
-
self.available_instance_types = available_instance_types
|
|
110
105
|
if aws_v1 is not None:
|
|
111
106
|
self.aws_v1 = aws_v1
|
|
112
107
|
if cluster_type is not None:
|
|
@@ -161,29 +156,6 @@ class V1ClusterSpec(object):
|
|
|
161
156
|
|
|
162
157
|
self._auth_token = auth_token
|
|
163
158
|
|
|
164
|
-
@property
|
|
165
|
-
def available_instance_types(self) -> 'list[str]':
|
|
166
|
-
"""Gets the available_instance_types of this V1ClusterSpec. # noqa: E501
|
|
167
|
-
|
|
168
|
-
available_instance_types is a list of instance types that are available for the cluster. This is just a soft filter to prevent users from using instances that we haven't prepared for. If the list is empty, no filtering is done. # noqa: E501
|
|
169
|
-
|
|
170
|
-
:return: The available_instance_types of this V1ClusterSpec. # noqa: E501
|
|
171
|
-
:rtype: list[str]
|
|
172
|
-
"""
|
|
173
|
-
return self._available_instance_types
|
|
174
|
-
|
|
175
|
-
@available_instance_types.setter
|
|
176
|
-
def available_instance_types(self, available_instance_types: 'list[str]'):
|
|
177
|
-
"""Sets the available_instance_types of this V1ClusterSpec.
|
|
178
|
-
|
|
179
|
-
available_instance_types is a list of instance types that are available for the cluster. This is just a soft filter to prevent users from using instances that we haven't prepared for. If the list is empty, no filtering is done. # noqa: E501
|
|
180
|
-
|
|
181
|
-
:param available_instance_types: The available_instance_types of this V1ClusterSpec. # noqa: E501
|
|
182
|
-
:type: list[str]
|
|
183
|
-
"""
|
|
184
|
-
|
|
185
|
-
self._available_instance_types = available_instance_types
|
|
186
|
-
|
|
187
159
|
@property
|
|
188
160
|
def aws_v1(self) -> 'V1AWSDirectV1':
|
|
189
161
|
"""Gets the aws_v1 of this V1ClusterSpec. # noqa: E501
|
|
@@ -41,19 +41,24 @@ class V1LambdaLabsDirectV1(object):
|
|
|
41
41
|
and the value is json key in definition.
|
|
42
42
|
"""
|
|
43
43
|
swagger_types = {
|
|
44
|
-
'credentials_secret_id': 'str'
|
|
44
|
+
'credentials_secret_id': 'str',
|
|
45
|
+
'parent_cluster_id': 'str'
|
|
45
46
|
}
|
|
46
47
|
|
|
47
48
|
attribute_map = {
|
|
48
|
-
'credentials_secret_id': 'credentialsSecretId'
|
|
49
|
+
'credentials_secret_id': 'credentialsSecretId',
|
|
50
|
+
'parent_cluster_id': 'parentClusterId'
|
|
49
51
|
}
|
|
50
52
|
|
|
51
|
-
def __init__(self, credentials_secret_id: 'str' =None): # noqa: E501
|
|
53
|
+
def __init__(self, credentials_secret_id: 'str' =None, parent_cluster_id: 'str' =None): # noqa: E501
|
|
52
54
|
"""V1LambdaLabsDirectV1 - a model defined in Swagger""" # noqa: E501
|
|
53
55
|
self._credentials_secret_id = None
|
|
56
|
+
self._parent_cluster_id = None
|
|
54
57
|
self.discriminator = None
|
|
55
58
|
if credentials_secret_id is not None:
|
|
56
59
|
self.credentials_secret_id = credentials_secret_id
|
|
60
|
+
if parent_cluster_id is not None:
|
|
61
|
+
self.parent_cluster_id = parent_cluster_id
|
|
57
62
|
|
|
58
63
|
@property
|
|
59
64
|
def credentials_secret_id(self) -> 'str':
|
|
@@ -78,6 +83,29 @@ class V1LambdaLabsDirectV1(object):
|
|
|
78
83
|
|
|
79
84
|
self._credentials_secret_id = credentials_secret_id
|
|
80
85
|
|
|
86
|
+
@property
|
|
87
|
+
def parent_cluster_id(self) -> 'str':
|
|
88
|
+
"""Gets the parent_cluster_id of this V1LambdaLabsDirectV1. # noqa: E501
|
|
89
|
+
|
|
90
|
+
Note: LambdaLabs is missing object store in their offering, we will need to use either GCP or AWS S3 for that. # noqa: E501
|
|
91
|
+
|
|
92
|
+
:return: The parent_cluster_id of this V1LambdaLabsDirectV1. # noqa: E501
|
|
93
|
+
:rtype: str
|
|
94
|
+
"""
|
|
95
|
+
return self._parent_cluster_id
|
|
96
|
+
|
|
97
|
+
@parent_cluster_id.setter
|
|
98
|
+
def parent_cluster_id(self, parent_cluster_id: 'str'):
|
|
99
|
+
"""Sets the parent_cluster_id of this V1LambdaLabsDirectV1.
|
|
100
|
+
|
|
101
|
+
Note: LambdaLabs is missing object store in their offering, we will need to use either GCP or AWS S3 for that. # noqa: E501
|
|
102
|
+
|
|
103
|
+
:param parent_cluster_id: The parent_cluster_id of this V1LambdaLabsDirectV1. # noqa: E501
|
|
104
|
+
:type: str
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
self._parent_cluster_id = parent_cluster_id
|
|
108
|
+
|
|
81
109
|
def to_dict(self) -> dict:
|
|
82
110
|
"""Returns the model properties as a dict"""
|
|
83
111
|
result = {}
|
|
@@ -120,6 +120,7 @@ class V1UserFeatures(object):
|
|
|
120
120
|
'teamspace_storage_tab': 'bool',
|
|
121
121
|
'trainium2': 'bool',
|
|
122
122
|
'use_rclone_mounts_only': 'bool',
|
|
123
|
+
'vultr': 'bool',
|
|
123
124
|
'writable_data_connections': 'bool'
|
|
124
125
|
}
|
|
125
126
|
|
|
@@ -203,10 +204,11 @@ class V1UserFeatures(object):
|
|
|
203
204
|
'teamspace_storage_tab': 'teamspaceStorageTab',
|
|
204
205
|
'trainium2': 'trainium2',
|
|
205
206
|
'use_rclone_mounts_only': 'useRcloneMountsOnly',
|
|
207
|
+
'vultr': 'vultr',
|
|
206
208
|
'writable_data_connections': 'writableDataConnections'
|
|
207
209
|
}
|
|
208
210
|
|
|
209
|
-
def __init__(self, advanced_deployment_autoscaling: 'bool' =None, affiliate_links: 'bool' =None, agents_v2: 'bool' =None, ai_hub_monetization: 'bool' =None, auto_fast_load: 'bool' =None, auto_join_orgs: 'bool' =None, b2c_experience: 'bool' =None, cap_add: 'list[str]' =None, cap_drop: 'list[str]' =None, capacity_reservation_byoc: 'bool' =None, capacity_reservation_dry_run: 'bool' =None, code_tab: 'bool' =None, collab_screen_sharing: 'bool' =None, cost_attribution_settings: 'bool' =None, custom_app_domain: 'bool' =None, custom_instance_types: 'bool' =None, default_one_cluster: 'bool' =None, deployment_customize_api: 'bool' =None, deployment_data_path: 'bool' =None, deployment_gallery: 'bool' =None, deployment_persistent_disk: 'bool' =None, deployment_version_visibility: 'bool' =None, docs_agent: 'bool' =None, drive_v2: 'bool' =None, enable_crypto_crackdown: 'bool' =None, enable_efs: 'bool' =None, enable_storage_limits: 'bool' =None, featured_studios_admin: 'bool' =None, filesystem_optimisation: 'bool' =None, gcp: 'bool' =None, inference_job_deployment_plugin: 'bool' =None, instant_capacity_reservation: 'bool' =None, jobs_init: 'bool' =None, jobs_v2: 'bool' =None, landing_studios: 'bool' =None, lightning_registry: 'bool' =None, lit_logger: 'bool' =None, lit_logger_storage_v2: 'bool' =None, mmt_fault_tolerance: 'bool' =None, mmt_strategy_selector: 'bool' =None, mmt_v2: 'bool' =None, model_store: 'bool' =None, multiple_deployment_versions: 'bool' =None, multiple_studio_versions: 'bool' =None, org_level_member_permissions: 'bool' =None, pipelines: 'bool' =None, plugin_biz_chat: 'bool' =None, plugin_distributed: 'bool' =None, plugin_fiftyone: 'bool' =None, plugin_inference: 'bool' =None, plugin_label_studio: 'bool' =None, plugin_langflow: 'bool' =None, plugin_lightning_apps: 'bool' =None, plugin_lightning_apps_distributed: 'bool' =None, plugin_mage_ai: 'bool' =None, plugin_milvus: 'bool' =None, plugin_python_profiler: 'bool' =None, plugin_react: 'bool' =None, plugin_service: 'bool' =None, plugin_sweeps: 'bool' =None, plugin_weviate: 'bool' =None, pricing_updates: 'bool' =None, product_generator: 'bool' =None, project_selector: 'bool' =None, restart_ide_on_hang: 'bool' =None, restartable_jobs: 'bool' =None, runnable_public_studio_page: 'bool' =None, show_dev_admin: 'bool' =None, slurm: 'bool' =None, slurm_machine_selector: 'bool' =None, snapshotter_service: 'bool' =None, snowflake_connection: 'bool' =None, spot_v2: 'bool' =None, studio_config: 'bool' =None, studio_on_stop: 'bool' =None, studio_version_visibility: 'bool' =None, teamspace_storage_tab: 'bool' =None, trainium2: 'bool' =None, use_rclone_mounts_only: 'bool' =None, writable_data_connections: 'bool' =None): # noqa: E501
|
|
211
|
+
def __init__(self, advanced_deployment_autoscaling: 'bool' =None, affiliate_links: 'bool' =None, agents_v2: 'bool' =None, ai_hub_monetization: 'bool' =None, auto_fast_load: 'bool' =None, auto_join_orgs: 'bool' =None, b2c_experience: 'bool' =None, cap_add: 'list[str]' =None, cap_drop: 'list[str]' =None, capacity_reservation_byoc: 'bool' =None, capacity_reservation_dry_run: 'bool' =None, code_tab: 'bool' =None, collab_screen_sharing: 'bool' =None, cost_attribution_settings: 'bool' =None, custom_app_domain: 'bool' =None, custom_instance_types: 'bool' =None, default_one_cluster: 'bool' =None, deployment_customize_api: 'bool' =None, deployment_data_path: 'bool' =None, deployment_gallery: 'bool' =None, deployment_persistent_disk: 'bool' =None, deployment_version_visibility: 'bool' =None, docs_agent: 'bool' =None, drive_v2: 'bool' =None, enable_crypto_crackdown: 'bool' =None, enable_efs: 'bool' =None, enable_storage_limits: 'bool' =None, featured_studios_admin: 'bool' =None, filesystem_optimisation: 'bool' =None, gcp: 'bool' =None, inference_job_deployment_plugin: 'bool' =None, instant_capacity_reservation: 'bool' =None, jobs_init: 'bool' =None, jobs_v2: 'bool' =None, landing_studios: 'bool' =None, lightning_registry: 'bool' =None, lit_logger: 'bool' =None, lit_logger_storage_v2: 'bool' =None, mmt_fault_tolerance: 'bool' =None, mmt_strategy_selector: 'bool' =None, mmt_v2: 'bool' =None, model_store: 'bool' =None, multiple_deployment_versions: 'bool' =None, multiple_studio_versions: 'bool' =None, org_level_member_permissions: 'bool' =None, pipelines: 'bool' =None, plugin_biz_chat: 'bool' =None, plugin_distributed: 'bool' =None, plugin_fiftyone: 'bool' =None, plugin_inference: 'bool' =None, plugin_label_studio: 'bool' =None, plugin_langflow: 'bool' =None, plugin_lightning_apps: 'bool' =None, plugin_lightning_apps_distributed: 'bool' =None, plugin_mage_ai: 'bool' =None, plugin_milvus: 'bool' =None, plugin_python_profiler: 'bool' =None, plugin_react: 'bool' =None, plugin_service: 'bool' =None, plugin_sweeps: 'bool' =None, plugin_weviate: 'bool' =None, pricing_updates: 'bool' =None, product_generator: 'bool' =None, project_selector: 'bool' =None, restart_ide_on_hang: 'bool' =None, restartable_jobs: 'bool' =None, runnable_public_studio_page: 'bool' =None, show_dev_admin: 'bool' =None, slurm: 'bool' =None, slurm_machine_selector: 'bool' =None, snapshotter_service: 'bool' =None, snowflake_connection: 'bool' =None, spot_v2: 'bool' =None, studio_config: 'bool' =None, studio_on_stop: 'bool' =None, studio_version_visibility: 'bool' =None, teamspace_storage_tab: 'bool' =None, trainium2: 'bool' =None, use_rclone_mounts_only: 'bool' =None, vultr: 'bool' =None, writable_data_connections: 'bool' =None): # noqa: E501
|
|
210
212
|
"""V1UserFeatures - a model defined in Swagger""" # noqa: E501
|
|
211
213
|
self._advanced_deployment_autoscaling = None
|
|
212
214
|
self._affiliate_links = None
|
|
@@ -287,6 +289,7 @@ class V1UserFeatures(object):
|
|
|
287
289
|
self._teamspace_storage_tab = None
|
|
288
290
|
self._trainium2 = None
|
|
289
291
|
self._use_rclone_mounts_only = None
|
|
292
|
+
self._vultr = None
|
|
290
293
|
self._writable_data_connections = None
|
|
291
294
|
self.discriminator = None
|
|
292
295
|
if advanced_deployment_autoscaling is not None:
|
|
@@ -447,6 +450,8 @@ class V1UserFeatures(object):
|
|
|
447
450
|
self.trainium2 = trainium2
|
|
448
451
|
if use_rclone_mounts_only is not None:
|
|
449
452
|
self.use_rclone_mounts_only = use_rclone_mounts_only
|
|
453
|
+
if vultr is not None:
|
|
454
|
+
self.vultr = vultr
|
|
450
455
|
if writable_data_connections is not None:
|
|
451
456
|
self.writable_data_connections = writable_data_connections
|
|
452
457
|
|
|
@@ -2109,6 +2114,27 @@ class V1UserFeatures(object):
|
|
|
2109
2114
|
|
|
2110
2115
|
self._use_rclone_mounts_only = use_rclone_mounts_only
|
|
2111
2116
|
|
|
2117
|
+
@property
|
|
2118
|
+
def vultr(self) -> 'bool':
|
|
2119
|
+
"""Gets the vultr of this V1UserFeatures. # noqa: E501
|
|
2120
|
+
|
|
2121
|
+
|
|
2122
|
+
:return: The vultr of this V1UserFeatures. # noqa: E501
|
|
2123
|
+
:rtype: bool
|
|
2124
|
+
"""
|
|
2125
|
+
return self._vultr
|
|
2126
|
+
|
|
2127
|
+
@vultr.setter
|
|
2128
|
+
def vultr(self, vultr: 'bool'):
|
|
2129
|
+
"""Sets the vultr of this V1UserFeatures.
|
|
2130
|
+
|
|
2131
|
+
|
|
2132
|
+
:param vultr: The vultr of this V1UserFeatures. # noqa: E501
|
|
2133
|
+
:type: bool
|
|
2134
|
+
"""
|
|
2135
|
+
|
|
2136
|
+
self._vultr = vultr
|
|
2137
|
+
|
|
2112
2138
|
@property
|
|
2113
2139
|
def writable_data_connections(self) -> 'bool':
|
|
2114
2140
|
"""Gets the writable_data_connections of this V1UserFeatures. # noqa: E501
|
|
@@ -42,24 +42,29 @@ class V1VultrDirectV1(object):
|
|
|
42
42
|
"""
|
|
43
43
|
swagger_types = {
|
|
44
44
|
'credentials_secret_id': 'str',
|
|
45
|
+
'parent_cluster_id': 'str',
|
|
45
46
|
'primary_region': 'str',
|
|
46
47
|
'regions': 'list[str]'
|
|
47
48
|
}
|
|
48
49
|
|
|
49
50
|
attribute_map = {
|
|
50
51
|
'credentials_secret_id': 'credentialsSecretId',
|
|
52
|
+
'parent_cluster_id': 'parentClusterId',
|
|
51
53
|
'primary_region': 'primaryRegion',
|
|
52
54
|
'regions': 'regions'
|
|
53
55
|
}
|
|
54
56
|
|
|
55
|
-
def __init__(self, credentials_secret_id: 'str' =None, primary_region: 'str' =None, regions: 'list[str]' =None): # noqa: E501
|
|
57
|
+
def __init__(self, credentials_secret_id: 'str' =None, parent_cluster_id: 'str' =None, primary_region: 'str' =None, regions: 'list[str]' =None): # noqa: E501
|
|
56
58
|
"""V1VultrDirectV1 - a model defined in Swagger""" # noqa: E501
|
|
57
59
|
self._credentials_secret_id = None
|
|
60
|
+
self._parent_cluster_id = None
|
|
58
61
|
self._primary_region = None
|
|
59
62
|
self._regions = None
|
|
60
63
|
self.discriminator = None
|
|
61
64
|
if credentials_secret_id is not None:
|
|
62
65
|
self.credentials_secret_id = credentials_secret_id
|
|
66
|
+
if parent_cluster_id is not None:
|
|
67
|
+
self.parent_cluster_id = parent_cluster_id
|
|
63
68
|
if primary_region is not None:
|
|
64
69
|
self.primary_region = primary_region
|
|
65
70
|
if regions is not None:
|
|
@@ -88,6 +93,27 @@ class V1VultrDirectV1(object):
|
|
|
88
93
|
|
|
89
94
|
self._credentials_secret_id = credentials_secret_id
|
|
90
95
|
|
|
96
|
+
@property
|
|
97
|
+
def parent_cluster_id(self) -> 'str':
|
|
98
|
+
"""Gets the parent_cluster_id of this V1VultrDirectV1. # noqa: E501
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
:return: The parent_cluster_id of this V1VultrDirectV1. # noqa: E501
|
|
102
|
+
:rtype: str
|
|
103
|
+
"""
|
|
104
|
+
return self._parent_cluster_id
|
|
105
|
+
|
|
106
|
+
@parent_cluster_id.setter
|
|
107
|
+
def parent_cluster_id(self, parent_cluster_id: 'str'):
|
|
108
|
+
"""Sets the parent_cluster_id of this V1VultrDirectV1.
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
:param parent_cluster_id: The parent_cluster_id of this V1VultrDirectV1. # noqa: E501
|
|
112
|
+
:type: str
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
self._parent_cluster_id = parent_cluster_id
|
|
116
|
+
|
|
91
117
|
@property
|
|
92
118
|
def primary_region(self) -> 'str':
|
|
93
119
|
"""Gets the primary_region of this V1VultrDirectV1. # noqa: E501
|
lightning_sdk/lit_container.py
CHANGED
|
@@ -55,3 +55,43 @@ class LitContainer:
|
|
|
55
55
|
raise ValueError("Could not resolve teamspace") from e
|
|
56
56
|
project_id = teamspace.id
|
|
57
57
|
return self._api.delete_container(project_id, container)
|
|
58
|
+
|
|
59
|
+
def upload_container(
|
|
60
|
+
self, container: str, teamspace: str, org: Optional[str] = None, user: Optional[str] = None, tag: str = "latest"
|
|
61
|
+
) -> None:
|
|
62
|
+
"""Upload a container to the docker registry.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
container: The name of the container to upload.
|
|
66
|
+
teamspace: The teamspace which contains the container.
|
|
67
|
+
org: The organization which contains the container.
|
|
68
|
+
user: The user which contains the container.
|
|
69
|
+
tag: The tag to use for the container.
|
|
70
|
+
"""
|
|
71
|
+
try:
|
|
72
|
+
teamspace = _resolve_teamspace(teamspace=teamspace, org=org, user=user)
|
|
73
|
+
except Exception as e:
|
|
74
|
+
raise ValueError(f"Could not resolve teamspace: {e}") from e
|
|
75
|
+
|
|
76
|
+
resp = self._api.upload_container(container, teamspace, tag)
|
|
77
|
+
for line in resp:
|
|
78
|
+
print(line)
|
|
79
|
+
|
|
80
|
+
def download_container(
|
|
81
|
+
self, container: str, teamspace: str, org: Optional[str] = None, user: Optional[str] = None, tag: str = "latest"
|
|
82
|
+
) -> None:
|
|
83
|
+
"""Download a container from the docker registry.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
container: The name of the container to download.
|
|
87
|
+
teamspace: The teamspace which contains the container.
|
|
88
|
+
org: The organization which contains the container.
|
|
89
|
+
user: The user which contains the container.
|
|
90
|
+
tag: The tag to use for the container.
|
|
91
|
+
"""
|
|
92
|
+
try:
|
|
93
|
+
teamspace = _resolve_teamspace(teamspace=teamspace, org=org, user=user)
|
|
94
|
+
except Exception as e:
|
|
95
|
+
raise ValueError(f"Could not resolve teamspace: {e}") from e
|
|
96
|
+
|
|
97
|
+
return self._api.download_container(container, teamspace, tag)
|
lightning_sdk/mmt/base.py
CHANGED
|
@@ -23,7 +23,7 @@ class MMTMachine(Protocol):
|
|
|
23
23
|
...
|
|
24
24
|
|
|
25
25
|
@property
|
|
26
|
-
def machine(self) -> "Machine":
|
|
26
|
+
def machine(self) -> Union["Machine", str]:
|
|
27
27
|
"""The actual machine type this node is running on."""
|
|
28
28
|
...
|
|
29
29
|
|
|
@@ -54,7 +54,7 @@ class _BaseMMT(_BaseJob):
|
|
|
54
54
|
def run(
|
|
55
55
|
cls,
|
|
56
56
|
name: str,
|
|
57
|
-
machine: "Machine",
|
|
57
|
+
machine: Union["Machine", str],
|
|
58
58
|
num_machines: int,
|
|
59
59
|
command: Optional[str] = None,
|
|
60
60
|
studio: Union["Studio", str, None] = None,
|
|
@@ -199,7 +199,7 @@ class _BaseMMT(_BaseJob):
|
|
|
199
199
|
def _submit(
|
|
200
200
|
self,
|
|
201
201
|
num_machines: int,
|
|
202
|
-
machine: "Machine",
|
|
202
|
+
machine: Union["Machine", str],
|
|
203
203
|
command: Optional[str] = None,
|
|
204
204
|
studio: Optional["Studio"] = None,
|
|
205
205
|
image: Optional[str] = None,
|
|
@@ -250,9 +250,14 @@ class _BaseMMT(_BaseJob):
|
|
|
250
250
|
def machines(self) -> Tuple[MMTMachine, ...]:
|
|
251
251
|
"""Returns the sub-jobs for each individual instance."""
|
|
252
252
|
|
|
253
|
+
@property
|
|
254
|
+
def num_machines(self) -> int:
|
|
255
|
+
"""Returns the number of machines assigned to this multi-machine job."""
|
|
256
|
+
return len(self.machines)
|
|
257
|
+
|
|
253
258
|
@property
|
|
254
259
|
@abstractmethod
|
|
255
|
-
def machine(self) -> "Machine":
|
|
260
|
+
def machine(self) -> Union["Machine", str]:
|
|
256
261
|
"""Returns the machine type this job is running on."""
|
|
257
262
|
|
|
258
263
|
@abstractmethod
|
|
@@ -303,7 +308,18 @@ class _BaseMMT(_BaseJob):
|
|
|
303
308
|
|
|
304
309
|
def dict(
|
|
305
310
|
self
|
|
306
|
-
) -> Dict[
|
|
311
|
+
) -> Dict[
|
|
312
|
+
str,
|
|
313
|
+
Union[
|
|
314
|
+
str,
|
|
315
|
+
float,
|
|
316
|
+
"Studio",
|
|
317
|
+
"Status",
|
|
318
|
+
"Machine",
|
|
319
|
+
None,
|
|
320
|
+
List[Dict[str, Union[str, "Status", "Machine"]]],
|
|
321
|
+
],
|
|
322
|
+
]:
|
|
307
323
|
"""Dict representation of this job."""
|
|
308
324
|
studio = self.studio
|
|
309
325
|
|
|
@@ -319,6 +335,7 @@ class _BaseMMT(_BaseJob):
|
|
|
319
335
|
{"name": d["name"], "status": d["status"], "machine": d["machine"]}
|
|
320
336
|
for d in (x.dict() for x in self.machines)
|
|
321
337
|
],
|
|
338
|
+
"total_cost": self.total_cost,
|
|
322
339
|
}
|
|
323
340
|
|
|
324
341
|
@abstractmethod
|
lightning_sdk/mmt/mmt.py
CHANGED
|
@@ -97,7 +97,7 @@ class MMT(_BaseMMT):
|
|
|
97
97
|
cls,
|
|
98
98
|
name: str,
|
|
99
99
|
num_machines: int,
|
|
100
|
-
machine: "Machine",
|
|
100
|
+
machine: Union["Machine", str],
|
|
101
101
|
command: Optional[str] = None,
|
|
102
102
|
studio: Union["Studio", str, None] = None,
|
|
103
103
|
image: Union[str, None] = None,
|
|
@@ -167,6 +167,7 @@ class MMT(_BaseMMT):
|
|
|
167
167
|
cloud_account_auth=cloud_account_auth,
|
|
168
168
|
artifacts_local=artifacts_local,
|
|
169
169
|
artifacts_remote=artifacts_remote,
|
|
170
|
+
entrypoint=entrypoint,
|
|
170
171
|
cluster=cluster, # deprecated in favor of cloud_account
|
|
171
172
|
)
|
|
172
173
|
# required for typing with "MMT"
|
|
@@ -180,7 +181,7 @@ class MMT(_BaseMMT):
|
|
|
180
181
|
def _submit(
|
|
181
182
|
self,
|
|
182
183
|
num_machines: int,
|
|
183
|
-
machine: "Machine",
|
|
184
|
+
machine: Union["Machine", str],
|
|
184
185
|
command: Optional[str] = None,
|
|
185
186
|
studio: Optional["Studio"] = None,
|
|
186
187
|
image: Optional[str] = None,
|
|
@@ -239,6 +240,7 @@ class MMT(_BaseMMT):
|
|
|
239
240
|
cloud_account_auth=cloud_account_auth,
|
|
240
241
|
artifacts_local=artifacts_local,
|
|
241
242
|
artifacts_remote=artifacts_remote,
|
|
243
|
+
entrypoint=entrypoint,
|
|
242
244
|
)
|
|
243
245
|
return self
|
|
244
246
|
|
|
@@ -264,7 +266,7 @@ class MMT(_BaseMMT):
|
|
|
264
266
|
return self._internal_mmt.machines
|
|
265
267
|
|
|
266
268
|
@property
|
|
267
|
-
def machine(self) -> "Machine":
|
|
269
|
+
def machine(self) -> Union["Machine", str]:
|
|
268
270
|
"""Returns the machine type this job is running on."""
|
|
269
271
|
return self._internal_mmt.machine
|
|
270
272
|
|
lightning_sdk/mmt/v1.py
CHANGED
|
@@ -4,11 +4,11 @@ from lightning_sdk.api.mmt_api import MMTApiV1
|
|
|
4
4
|
from lightning_sdk.api.utils import _get_cloud_url
|
|
5
5
|
from lightning_sdk.job.v1 import _internal_status_to_external_status
|
|
6
6
|
from lightning_sdk.job.work import Work
|
|
7
|
+
from lightning_sdk.status import Status
|
|
7
8
|
|
|
8
9
|
if TYPE_CHECKING:
|
|
9
10
|
from lightning_sdk.machine import Machine
|
|
10
11
|
from lightning_sdk.organization import Organization
|
|
11
|
-
from lightning_sdk.status import Status
|
|
12
12
|
from lightning_sdk.studio import Studio
|
|
13
13
|
from lightning_sdk.teamspace import Teamspace
|
|
14
14
|
from lightning_sdk.user import User
|
|
@@ -43,7 +43,7 @@ class _MMTV1(_BaseMMT):
|
|
|
43
43
|
def _submit(
|
|
44
44
|
self,
|
|
45
45
|
num_machines: int,
|
|
46
|
-
machine: "Machine",
|
|
46
|
+
machine: Union["Machine", str],
|
|
47
47
|
command: Optional[str] = None,
|
|
48
48
|
studio: Optional["Studio"] = None,
|
|
49
49
|
image: Optional[str] = None,
|
|
@@ -133,6 +133,8 @@ class _MMTV1(_BaseMMT):
|
|
|
133
133
|
|
|
134
134
|
def stop(self) -> None:
|
|
135
135
|
"""Stops the job."""
|
|
136
|
+
if self.status in (Status.Stopped, Status.Completed, Status.Failed):
|
|
137
|
+
return
|
|
136
138
|
self._job_api.stop_job(self._guaranteed_job.id, self.teamspace.id)
|
|
137
139
|
|
|
138
140
|
def delete(self) -> None:
|
|
@@ -164,7 +166,7 @@ class _MMTV1(_BaseMMT):
|
|
|
164
166
|
return f"/teamspace/jobs/{self.name}/snapshot"
|
|
165
167
|
|
|
166
168
|
@property
|
|
167
|
-
def machine(self) -> "Machine":
|
|
169
|
+
def machine(self) -> Union["Machine", str]:
|
|
168
170
|
"""Returns the machine type this job is running on."""
|
|
169
171
|
return self.machines[0].machine
|
|
170
172
|
|
lightning_sdk/mmt/v2.py
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
from typing import TYPE_CHECKING,
|
|
1
|
+
from typing import TYPE_CHECKING, Dict, Optional, Tuple, Union
|
|
2
2
|
|
|
3
3
|
from lightning_sdk.api.mmt_api import MMTApiV2
|
|
4
4
|
from lightning_sdk.api.utils import _get_cloud_url
|
|
5
|
+
from lightning_sdk.status import Status
|
|
5
6
|
|
|
6
7
|
if TYPE_CHECKING:
|
|
7
8
|
from lightning_sdk.job.job import Job
|
|
8
9
|
from lightning_sdk.machine import Machine
|
|
9
10
|
from lightning_sdk.organization import Organization
|
|
10
|
-
from lightning_sdk.status import Status
|
|
11
11
|
from lightning_sdk.studio import Studio
|
|
12
12
|
from lightning_sdk.teamspace import Teamspace
|
|
13
13
|
from lightning_sdk.user import User
|
|
@@ -42,7 +42,7 @@ class _MMTV2(_BaseMMT):
|
|
|
42
42
|
def _submit(
|
|
43
43
|
self,
|
|
44
44
|
num_machines: int,
|
|
45
|
-
machine: "Machine",
|
|
45
|
+
machine: Union["Machine", str],
|
|
46
46
|
command: Optional[str] = None,
|
|
47
47
|
studio: Optional["Studio"] = None,
|
|
48
48
|
image: Optional[str] = None,
|
|
@@ -137,6 +137,8 @@ class _MMTV2(_BaseMMT):
|
|
|
137
137
|
|
|
138
138
|
def stop(self) -> None:
|
|
139
139
|
"""Stops the job."""
|
|
140
|
+
if self.status in (Status.Stopped, Status.Completed, Status.Failed):
|
|
141
|
+
return
|
|
140
142
|
self._job_api.stop_job(job_id=self._guaranteed_job.id, teamspace_id=self._teamspace.id)
|
|
141
143
|
|
|
142
144
|
def delete(self) -> None:
|
|
@@ -149,12 +151,6 @@ class _MMTV2(_BaseMMT):
|
|
|
149
151
|
teamspace_id=self._teamspace.id,
|
|
150
152
|
)
|
|
151
153
|
|
|
152
|
-
@property
|
|
153
|
-
def _latest_job(self) -> Any:
|
|
154
|
-
"""Guarantees to fetch the latest version of a job before returning it."""
|
|
155
|
-
self._update_internal_job()
|
|
156
|
-
return self._job
|
|
157
|
-
|
|
158
154
|
@property
|
|
159
155
|
def status(self) -> "Status":
|
|
160
156
|
"""The current status of the job."""
|
|
@@ -173,7 +169,7 @@ class _MMTV2(_BaseMMT):
|
|
|
173
169
|
raise NotImplementedError
|
|
174
170
|
|
|
175
171
|
@property
|
|
176
|
-
def machine(self) -> "Machine":
|
|
172
|
+
def machine(self) -> Union["Machine", str]:
|
|
177
173
|
"""Returns the machine type this job is running on."""
|
|
178
174
|
return self._job_api._get_job_machine_from_spec(self._guaranteed_job.spec)
|
|
179
175
|
|
|
@@ -220,3 +216,8 @@ class _MMTV2(_BaseMMT):
|
|
|
220
216
|
def command(self) -> str:
|
|
221
217
|
"""The command the job is running."""
|
|
222
218
|
return self._job_api.get_command(self._guaranteed_job)
|
|
219
|
+
|
|
220
|
+
@property
|
|
221
|
+
def num_machines(self) -> int:
|
|
222
|
+
"""Returns the number of machines assigned to this multi-machine job."""
|
|
223
|
+
return self._job_api.get_num_machines(self._guaranteed_job)
|