skypilot-nightly 1.0.0.dev2024053101__py3-none-any.whl → 1.0.0.dev2025022801__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +64 -32
- sky/adaptors/aws.py +23 -6
- sky/adaptors/azure.py +432 -15
- sky/adaptors/cloudflare.py +5 -5
- sky/adaptors/common.py +19 -9
- sky/adaptors/do.py +20 -0
- sky/adaptors/gcp.py +3 -2
- sky/adaptors/kubernetes.py +122 -88
- sky/adaptors/nebius.py +100 -0
- sky/adaptors/oci.py +39 -1
- sky/adaptors/vast.py +29 -0
- sky/admin_policy.py +101 -0
- sky/authentication.py +117 -98
- sky/backends/backend.py +52 -20
- sky/backends/backend_utils.py +669 -557
- sky/backends/cloud_vm_ray_backend.py +1099 -808
- sky/backends/local_docker_backend.py +14 -8
- sky/backends/wheel_utils.py +38 -20
- sky/benchmark/benchmark_utils.py +22 -23
- sky/check.py +76 -27
- sky/cli.py +1586 -1139
- sky/client/__init__.py +1 -0
- sky/client/cli.py +5683 -0
- sky/client/common.py +345 -0
- sky/client/sdk.py +1765 -0
- sky/cloud_stores.py +283 -19
- sky/clouds/__init__.py +7 -2
- sky/clouds/aws.py +303 -112
- sky/clouds/azure.py +185 -179
- sky/clouds/cloud.py +115 -37
- sky/clouds/cudo.py +29 -22
- sky/clouds/do.py +313 -0
- sky/clouds/fluidstack.py +44 -54
- sky/clouds/gcp.py +206 -65
- sky/clouds/ibm.py +26 -21
- sky/clouds/kubernetes.py +345 -91
- sky/clouds/lambda_cloud.py +40 -29
- sky/clouds/nebius.py +297 -0
- sky/clouds/oci.py +129 -90
- sky/clouds/paperspace.py +22 -18
- sky/clouds/runpod.py +53 -34
- sky/clouds/scp.py +28 -24
- sky/clouds/service_catalog/__init__.py +19 -13
- sky/clouds/service_catalog/aws_catalog.py +29 -12
- sky/clouds/service_catalog/azure_catalog.py +33 -6
- sky/clouds/service_catalog/common.py +95 -75
- sky/clouds/service_catalog/constants.py +3 -3
- sky/clouds/service_catalog/cudo_catalog.py +13 -3
- sky/clouds/service_catalog/data_fetchers/fetch_aws.py +36 -21
- sky/clouds/service_catalog/data_fetchers/fetch_azure.py +31 -4
- sky/clouds/service_catalog/data_fetchers/fetch_cudo.py +8 -117
- sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py +197 -44
- sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +224 -36
- sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py +44 -24
- sky/clouds/service_catalog/data_fetchers/fetch_vast.py +147 -0
- sky/clouds/service_catalog/data_fetchers/fetch_vsphere.py +1 -1
- sky/clouds/service_catalog/do_catalog.py +111 -0
- sky/clouds/service_catalog/fluidstack_catalog.py +2 -2
- sky/clouds/service_catalog/gcp_catalog.py +16 -2
- sky/clouds/service_catalog/ibm_catalog.py +2 -2
- sky/clouds/service_catalog/kubernetes_catalog.py +192 -70
- sky/clouds/service_catalog/lambda_catalog.py +8 -3
- sky/clouds/service_catalog/nebius_catalog.py +116 -0
- sky/clouds/service_catalog/oci_catalog.py +31 -4
- sky/clouds/service_catalog/paperspace_catalog.py +2 -2
- sky/clouds/service_catalog/runpod_catalog.py +2 -2
- sky/clouds/service_catalog/scp_catalog.py +2 -2
- sky/clouds/service_catalog/vast_catalog.py +104 -0
- sky/clouds/service_catalog/vsphere_catalog.py +2 -2
- sky/clouds/utils/aws_utils.py +65 -0
- sky/clouds/utils/azure_utils.py +91 -0
- sky/clouds/utils/gcp_utils.py +5 -9
- sky/clouds/utils/oci_utils.py +47 -5
- sky/clouds/utils/scp_utils.py +4 -3
- sky/clouds/vast.py +280 -0
- sky/clouds/vsphere.py +22 -18
- sky/core.py +361 -107
- sky/dag.py +41 -28
- sky/data/data_transfer.py +37 -0
- sky/data/data_utils.py +211 -32
- sky/data/mounting_utils.py +182 -30
- sky/data/storage.py +2118 -270
- sky/data/storage_utils.py +126 -5
- sky/exceptions.py +179 -8
- sky/execution.py +158 -85
- sky/global_user_state.py +150 -34
- sky/jobs/__init__.py +12 -10
- sky/jobs/client/__init__.py +0 -0
- sky/jobs/client/sdk.py +302 -0
- sky/jobs/constants.py +49 -11
- sky/jobs/controller.py +161 -99
- sky/jobs/dashboard/dashboard.py +171 -25
- sky/jobs/dashboard/templates/index.html +572 -60
- sky/jobs/recovery_strategy.py +157 -156
- sky/jobs/scheduler.py +307 -0
- sky/jobs/server/__init__.py +1 -0
- sky/jobs/server/core.py +598 -0
- sky/jobs/server/dashboard_utils.py +69 -0
- sky/jobs/server/server.py +190 -0
- sky/jobs/state.py +627 -122
- sky/jobs/utils.py +615 -206
- sky/models.py +27 -0
- sky/optimizer.py +142 -83
- sky/provision/__init__.py +20 -5
- sky/provision/aws/config.py +124 -42
- sky/provision/aws/instance.py +130 -53
- sky/provision/azure/__init__.py +7 -0
- sky/{skylet/providers → provision}/azure/azure-config-template.json +19 -7
- sky/provision/azure/config.py +220 -0
- sky/provision/azure/instance.py +1012 -37
- sky/provision/common.py +31 -3
- sky/provision/constants.py +25 -0
- sky/provision/cudo/__init__.py +2 -1
- sky/provision/cudo/cudo_utils.py +112 -0
- sky/provision/cudo/cudo_wrapper.py +37 -16
- sky/provision/cudo/instance.py +28 -12
- sky/provision/do/__init__.py +11 -0
- sky/provision/do/config.py +14 -0
- sky/provision/do/constants.py +10 -0
- sky/provision/do/instance.py +287 -0
- sky/provision/do/utils.py +301 -0
- sky/provision/docker_utils.py +82 -46
- sky/provision/fluidstack/fluidstack_utils.py +57 -125
- sky/provision/fluidstack/instance.py +15 -43
- sky/provision/gcp/config.py +19 -9
- sky/provision/gcp/constants.py +7 -1
- sky/provision/gcp/instance.py +55 -34
- sky/provision/gcp/instance_utils.py +339 -80
- sky/provision/gcp/mig_utils.py +210 -0
- sky/provision/instance_setup.py +172 -133
- sky/provision/kubernetes/__init__.py +1 -0
- sky/provision/kubernetes/config.py +104 -90
- sky/provision/kubernetes/constants.py +8 -0
- sky/provision/kubernetes/instance.py +680 -325
- sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml +3 -0
- sky/provision/kubernetes/network.py +54 -20
- sky/provision/kubernetes/network_utils.py +70 -21
- sky/provision/kubernetes/utils.py +1370 -251
- sky/provision/lambda_cloud/__init__.py +11 -0
- sky/provision/lambda_cloud/config.py +10 -0
- sky/provision/lambda_cloud/instance.py +265 -0
- sky/{clouds/utils → provision/lambda_cloud}/lambda_utils.py +24 -23
- sky/provision/logging.py +1 -1
- sky/provision/nebius/__init__.py +11 -0
- sky/provision/nebius/config.py +11 -0
- sky/provision/nebius/instance.py +285 -0
- sky/provision/nebius/utils.py +318 -0
- sky/provision/oci/__init__.py +15 -0
- sky/provision/oci/config.py +51 -0
- sky/provision/oci/instance.py +436 -0
- sky/provision/oci/query_utils.py +681 -0
- sky/provision/paperspace/constants.py +6 -0
- sky/provision/paperspace/instance.py +4 -3
- sky/provision/paperspace/utils.py +2 -0
- sky/provision/provisioner.py +207 -130
- sky/provision/runpod/__init__.py +1 -0
- sky/provision/runpod/api/__init__.py +3 -0
- sky/provision/runpod/api/commands.py +119 -0
- sky/provision/runpod/api/pods.py +142 -0
- sky/provision/runpod/instance.py +64 -8
- sky/provision/runpod/utils.py +239 -23
- sky/provision/vast/__init__.py +10 -0
- sky/provision/vast/config.py +11 -0
- sky/provision/vast/instance.py +247 -0
- sky/provision/vast/utils.py +162 -0
- sky/provision/vsphere/common/vim_utils.py +1 -1
- sky/provision/vsphere/instance.py +8 -18
- sky/provision/vsphere/vsphere_utils.py +1 -1
- sky/resources.py +247 -102
- sky/serve/__init__.py +9 -9
- sky/serve/autoscalers.py +361 -299
- sky/serve/client/__init__.py +0 -0
- sky/serve/client/sdk.py +366 -0
- sky/serve/constants.py +12 -3
- sky/serve/controller.py +106 -36
- sky/serve/load_balancer.py +63 -12
- sky/serve/load_balancing_policies.py +84 -2
- sky/serve/replica_managers.py +42 -34
- sky/serve/serve_state.py +62 -32
- sky/serve/serve_utils.py +271 -160
- sky/serve/server/__init__.py +0 -0
- sky/serve/{core.py → server/core.py} +271 -90
- sky/serve/server/server.py +112 -0
- sky/serve/service.py +52 -16
- sky/serve/service_spec.py +95 -32
- sky/server/__init__.py +1 -0
- sky/server/common.py +430 -0
- sky/server/constants.py +21 -0
- sky/server/html/log.html +174 -0
- sky/server/requests/__init__.py +0 -0
- sky/server/requests/executor.py +472 -0
- sky/server/requests/payloads.py +487 -0
- sky/server/requests/queues/__init__.py +0 -0
- sky/server/requests/queues/mp_queue.py +76 -0
- sky/server/requests/requests.py +567 -0
- sky/server/requests/serializers/__init__.py +0 -0
- sky/server/requests/serializers/decoders.py +192 -0
- sky/server/requests/serializers/encoders.py +166 -0
- sky/server/server.py +1106 -0
- sky/server/stream_utils.py +141 -0
- sky/setup_files/MANIFEST.in +2 -5
- sky/setup_files/dependencies.py +159 -0
- sky/setup_files/setup.py +14 -125
- sky/sky_logging.py +59 -14
- sky/skylet/autostop_lib.py +2 -2
- sky/skylet/constants.py +183 -50
- sky/skylet/events.py +22 -10
- sky/skylet/job_lib.py +403 -258
- sky/skylet/log_lib.py +111 -71
- sky/skylet/log_lib.pyi +6 -0
- sky/skylet/providers/command_runner.py +6 -8
- sky/skylet/providers/ibm/node_provider.py +2 -2
- sky/skylet/providers/scp/config.py +11 -3
- sky/skylet/providers/scp/node_provider.py +8 -8
- sky/skylet/skylet.py +3 -1
- sky/skylet/subprocess_daemon.py +69 -17
- sky/skypilot_config.py +119 -57
- sky/task.py +205 -64
- sky/templates/aws-ray.yml.j2 +37 -7
- sky/templates/azure-ray.yml.j2 +27 -82
- sky/templates/cudo-ray.yml.j2 +7 -3
- sky/templates/do-ray.yml.j2 +98 -0
- sky/templates/fluidstack-ray.yml.j2 +7 -4
- sky/templates/gcp-ray.yml.j2 +26 -6
- sky/templates/ibm-ray.yml.j2 +3 -2
- sky/templates/jobs-controller.yaml.j2 +46 -11
- sky/templates/kubernetes-ingress.yml.j2 +7 -0
- sky/templates/kubernetes-loadbalancer.yml.j2 +7 -0
- sky/templates/{kubernetes-port-forward-proxy-command.sh.j2 → kubernetes-port-forward-proxy-command.sh} +51 -7
- sky/templates/kubernetes-ray.yml.j2 +292 -25
- sky/templates/lambda-ray.yml.j2 +30 -40
- sky/templates/nebius-ray.yml.j2 +79 -0
- sky/templates/oci-ray.yml.j2 +18 -57
- sky/templates/paperspace-ray.yml.j2 +10 -6
- sky/templates/runpod-ray.yml.j2 +26 -4
- sky/templates/scp-ray.yml.j2 +3 -2
- sky/templates/sky-serve-controller.yaml.j2 +12 -1
- sky/templates/skypilot-server-kubernetes-proxy.sh +36 -0
- sky/templates/vast-ray.yml.j2 +70 -0
- sky/templates/vsphere-ray.yml.j2 +8 -3
- sky/templates/websocket_proxy.py +64 -0
- sky/usage/constants.py +10 -1
- sky/usage/usage_lib.py +130 -37
- sky/utils/accelerator_registry.py +35 -51
- sky/utils/admin_policy_utils.py +147 -0
- sky/utils/annotations.py +51 -0
- sky/utils/cli_utils/status_utils.py +81 -23
- sky/utils/cluster_utils.py +356 -0
- sky/utils/command_runner.py +452 -89
- sky/utils/command_runner.pyi +77 -3
- sky/utils/common.py +54 -0
- sky/utils/common_utils.py +319 -108
- sky/utils/config_utils.py +204 -0
- sky/utils/control_master_utils.py +48 -0
- sky/utils/controller_utils.py +548 -266
- sky/utils/dag_utils.py +93 -32
- sky/utils/db_utils.py +18 -4
- sky/utils/env_options.py +29 -7
- sky/utils/kubernetes/create_cluster.sh +8 -60
- sky/utils/kubernetes/deploy_remote_cluster.sh +243 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +73 -0
- sky/utils/kubernetes/generate_kubeconfig.sh +336 -0
- sky/utils/kubernetes/gpu_labeler.py +4 -4
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +4 -3
- sky/utils/kubernetes/kubernetes_deploy_utils.py +228 -0
- sky/utils/kubernetes/rsync_helper.sh +24 -0
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +1 -1
- sky/utils/log_utils.py +240 -33
- sky/utils/message_utils.py +81 -0
- sky/utils/registry.py +127 -0
- sky/utils/resources_utils.py +94 -22
- sky/utils/rich_utils.py +247 -18
- sky/utils/schemas.py +284 -64
- sky/{status_lib.py → utils/status_lib.py} +12 -7
- sky/utils/subprocess_utils.py +212 -46
- sky/utils/timeline.py +12 -7
- sky/utils/ux_utils.py +168 -15
- skypilot_nightly-1.0.0.dev2025022801.dist-info/METADATA +363 -0
- skypilot_nightly-1.0.0.dev2025022801.dist-info/RECORD +352 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/WHEEL +1 -1
- sky/clouds/cloud_registry.py +0 -31
- sky/jobs/core.py +0 -330
- sky/skylet/providers/azure/__init__.py +0 -2
- sky/skylet/providers/azure/azure-vm-template.json +0 -301
- sky/skylet/providers/azure/config.py +0 -170
- sky/skylet/providers/azure/node_provider.py +0 -466
- sky/skylet/providers/lambda_cloud/__init__.py +0 -2
- sky/skylet/providers/lambda_cloud/node_provider.py +0 -320
- sky/skylet/providers/oci/__init__.py +0 -2
- sky/skylet/providers/oci/node_provider.py +0 -488
- sky/skylet/providers/oci/query_helper.py +0 -383
- sky/skylet/providers/oci/utils.py +0 -21
- sky/utils/cluster_yaml_utils.py +0 -24
- sky/utils/kubernetes/generate_static_kubeconfig.sh +0 -137
- skypilot_nightly-1.0.0.dev2024053101.dist-info/METADATA +0 -315
- skypilot_nightly-1.0.0.dev2024053101.dist-info/RECORD +0 -275
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/top_level.txt +0 -0
sky/utils/schemas.py
CHANGED
@@ -4,6 +4,9 @@ Schemas conform to the JSON Schema specification as defined at
|
|
4
4
|
https://json-schema.org/
|
5
5
|
"""
|
6
6
|
import enum
|
7
|
+
from typing import Any, Dict, List, Tuple
|
8
|
+
|
9
|
+
from sky.skylet import constants
|
7
10
|
|
8
11
|
|
9
12
|
def _check_not_both_fields_present(field1: str, field2: str):
|
@@ -83,13 +86,28 @@ def _get_single_resources_schema():
|
|
83
86
|
'use_spot': {
|
84
87
|
'type': 'boolean',
|
85
88
|
},
|
86
|
-
# Deprecated: use 'job_recovery' instead. This is for backward
|
87
|
-
# compatibility, and can be removed in 0.8.0.
|
88
|
-
'spot_recovery': {
|
89
|
-
'type': 'string',
|
90
|
-
},
|
91
89
|
'job_recovery': {
|
92
|
-
|
90
|
+
# Either a string or a dict.
|
91
|
+
'anyOf': [{
|
92
|
+
'type': 'string',
|
93
|
+
}, {
|
94
|
+
'type': 'object',
|
95
|
+
'required': [],
|
96
|
+
'additionalProperties': False,
|
97
|
+
'properties': {
|
98
|
+
'strategy': {
|
99
|
+
'anyOf': [{
|
100
|
+
'type': 'string',
|
101
|
+
}, {
|
102
|
+
'type': 'null',
|
103
|
+
}],
|
104
|
+
},
|
105
|
+
'max_restarts_on_errors': {
|
106
|
+
'type': 'integer',
|
107
|
+
'minimum': 0,
|
108
|
+
},
|
109
|
+
}
|
110
|
+
}],
|
93
111
|
},
|
94
112
|
'disk_size': {
|
95
113
|
'type': 'integer',
|
@@ -111,6 +129,8 @@ def _get_single_resources_schema():
|
|
111
129
|
'type': 'integer',
|
112
130
|
}]
|
113
131
|
}
|
132
|
+
}, {
|
133
|
+
'type': 'null',
|
114
134
|
}],
|
115
135
|
},
|
116
136
|
'labels': {
|
@@ -145,7 +165,8 @@ def _get_single_resources_schema():
|
|
145
165
|
'type': 'null',
|
146
166
|
}]
|
147
167
|
},
|
148
|
-
# The following fields are for internal use only.
|
168
|
+
# The following fields are for internal use only. Should not be
|
169
|
+
# specified in the task config.
|
149
170
|
'_docker_login_config': {
|
150
171
|
'type': 'object',
|
151
172
|
'required': ['username', 'password', 'server'],
|
@@ -168,6 +189,9 @@ def _get_single_resources_schema():
|
|
168
189
|
'_requires_fuse': {
|
169
190
|
'type': 'boolean',
|
170
191
|
},
|
192
|
+
'_cluster_config_overrides': {
|
193
|
+
'type': 'object',
|
194
|
+
},
|
171
195
|
}
|
172
196
|
}
|
173
197
|
|
@@ -227,8 +251,6 @@ def get_resources_schema():
|
|
227
251
|
'items': multi_resources_schema,
|
228
252
|
}
|
229
253
|
},
|
230
|
-
# Avoid job_recovery and spot_recovery being present at the same time.
|
231
|
-
**_check_not_both_fields_present('job_recovery', 'spot_recovery')
|
232
254
|
}
|
233
255
|
|
234
256
|
|
@@ -270,6 +292,12 @@ def get_storage_schema():
|
|
270
292
|
mode.value for mode in storage.StorageMode
|
271
293
|
]
|
272
294
|
},
|
295
|
+
'_is_sky_managed': {
|
296
|
+
'type': 'boolean',
|
297
|
+
},
|
298
|
+
'_bucket_sub_path': {
|
299
|
+
'type': 'string',
|
300
|
+
},
|
273
301
|
'_force_delete': {
|
274
302
|
'type': 'boolean',
|
275
303
|
}
|
@@ -279,6 +307,9 @@ def get_storage_schema():
|
|
279
307
|
|
280
308
|
def get_service_schema():
|
281
309
|
"""Schema for top-level `service:` field (for SkyServe)."""
|
310
|
+
# To avoid circular imports, only import when needed.
|
311
|
+
# pylint: disable=import-outside-toplevel
|
312
|
+
from sky.serve import load_balancing_policies
|
282
313
|
return {
|
283
314
|
'$schema': 'https://json-schema.org/draft/2020-12/schema',
|
284
315
|
'type': 'object',
|
@@ -299,6 +330,9 @@ def get_service_schema():
|
|
299
330
|
'initial_delay_seconds': {
|
300
331
|
'type': 'number',
|
301
332
|
},
|
333
|
+
'timeout_seconds': {
|
334
|
+
'type': 'number',
|
335
|
+
},
|
302
336
|
'post_data': {
|
303
337
|
'anyOf': [{
|
304
338
|
'type': 'string',
|
@@ -345,24 +379,100 @@ def get_service_schema():
|
|
345
379
|
'downscale_delay_seconds': {
|
346
380
|
'type': 'number',
|
347
381
|
},
|
348
|
-
# TODO(MaoZiming): Fields `qps_upper_threshold`,
|
349
|
-
# `qps_lower_threshold` and `auto_restart` are deprecated.
|
350
|
-
# Temporarily keep these fields for backward compatibility.
|
351
|
-
# Remove after 2 minor release, i.e., 0.6.0.
|
352
|
-
'auto_restart': {
|
353
|
-
'type': 'boolean',
|
354
|
-
},
|
355
|
-
'qps_upper_threshold': {
|
356
|
-
'type': 'number',
|
357
|
-
},
|
358
|
-
'qps_lower_threshold': {
|
359
|
-
'type': 'number',
|
360
|
-
},
|
361
382
|
}
|
362
383
|
},
|
384
|
+
'ports': {
|
385
|
+
'type': 'integer',
|
386
|
+
},
|
363
387
|
'replicas': {
|
364
388
|
'type': 'integer',
|
365
389
|
},
|
390
|
+
'load_balancing_policy': {
|
391
|
+
'type': 'string',
|
392
|
+
'case_insensitive_enum': list(
|
393
|
+
load_balancing_policies.LB_POLICIES.keys())
|
394
|
+
},
|
395
|
+
'tls': {
|
396
|
+
'type': 'object',
|
397
|
+
'required': ['keyfile', 'certfile'],
|
398
|
+
'additionalProperties': False,
|
399
|
+
'properties': {
|
400
|
+
'keyfile': {
|
401
|
+
'type': 'string',
|
402
|
+
},
|
403
|
+
'certfile': {
|
404
|
+
'type': 'string',
|
405
|
+
},
|
406
|
+
},
|
407
|
+
},
|
408
|
+
}
|
409
|
+
}
|
410
|
+
|
411
|
+
|
412
|
+
def _filter_schema(schema: dict, keys_to_keep: List[Tuple[str, ...]]) -> dict:
|
413
|
+
"""Recursively filter a schema to include only certain keys.
|
414
|
+
|
415
|
+
Args:
|
416
|
+
schema: The original schema dictionary.
|
417
|
+
keys_to_keep: List of tuples with the path of keys to retain.
|
418
|
+
|
419
|
+
Returns:
|
420
|
+
The filtered schema.
|
421
|
+
"""
|
422
|
+
# Convert list of tuples to a dictionary for easier access
|
423
|
+
paths_dict: Dict[str, Any] = {}
|
424
|
+
for path in keys_to_keep:
|
425
|
+
current = paths_dict
|
426
|
+
for step in path:
|
427
|
+
if step not in current:
|
428
|
+
current[step] = {}
|
429
|
+
current = current[step]
|
430
|
+
|
431
|
+
def keep_keys(current_schema: dict, current_path_dict: dict,
|
432
|
+
new_schema: dict) -> dict:
|
433
|
+
# Base case: if we reach a leaf in the path_dict, we stop.
|
434
|
+
if (not current_path_dict or not isinstance(current_schema, dict) or
|
435
|
+
not current_schema.get('properties')):
|
436
|
+
return current_schema
|
437
|
+
|
438
|
+
if 'properties' not in new_schema:
|
439
|
+
new_schema = {
|
440
|
+
key: current_schema[key]
|
441
|
+
for key in current_schema
|
442
|
+
# We do not support the handling of `oneOf`, `anyOf`, `allOf`,
|
443
|
+
# `required` for now.
|
444
|
+
if key not in
|
445
|
+
{'properties', 'oneOf', 'anyOf', 'allOf', 'required'}
|
446
|
+
}
|
447
|
+
new_schema['properties'] = {}
|
448
|
+
for key, sub_schema in current_schema['properties'].items():
|
449
|
+
if key in current_path_dict:
|
450
|
+
# Recursively keep keys if further path dict exists
|
451
|
+
new_schema['properties'][key] = {}
|
452
|
+
current_path_value = current_path_dict.pop(key)
|
453
|
+
new_schema['properties'][key] = keep_keys(
|
454
|
+
sub_schema, current_path_value,
|
455
|
+
new_schema['properties'][key])
|
456
|
+
|
457
|
+
return new_schema
|
458
|
+
|
459
|
+
# Start the recursive filtering
|
460
|
+
new_schema = keep_keys(schema, paths_dict, {})
|
461
|
+
assert not paths_dict, f'Unprocessed keys: {paths_dict}'
|
462
|
+
return new_schema
|
463
|
+
|
464
|
+
|
465
|
+
def _experimental_task_schema() -> dict:
|
466
|
+
config_override_schema = _filter_schema(
|
467
|
+
get_config_schema(), constants.OVERRIDEABLE_CONFIG_KEYS_IN_TASK)
|
468
|
+
return {
|
469
|
+
'experimental': {
|
470
|
+
'type': 'object',
|
471
|
+
'required': [],
|
472
|
+
'additionalProperties': False,
|
473
|
+
'properties': {
|
474
|
+
'config_overrides': config_override_schema,
|
475
|
+
}
|
366
476
|
}
|
367
477
|
}
|
368
478
|
|
@@ -432,6 +542,10 @@ def get_task_schema():
|
|
432
542
|
'type': 'number'
|
433
543
|
}
|
434
544
|
},
|
545
|
+
'file_mounts_mapping': {
|
546
|
+
'type': 'object',
|
547
|
+
},
|
548
|
+
**_experimental_task_schema(),
|
435
549
|
}
|
436
550
|
}
|
437
551
|
|
@@ -513,15 +627,6 @@ _NETWORK_CONFIG_SCHEMA = {
|
|
513
627
|
}
|
514
628
|
|
515
629
|
_LABELS_SCHEMA = {
|
516
|
-
# Deprecated: 'instance_tags' is replaced by 'labels'. Keeping for backward
|
517
|
-
# compatibility. Will be removed after 0.7.0.
|
518
|
-
'instance_tags': {
|
519
|
-
'type': 'object',
|
520
|
-
'required': [],
|
521
|
-
'additionalProperties': {
|
522
|
-
'type': 'string',
|
523
|
-
},
|
524
|
-
},
|
525
630
|
'labels': {
|
526
631
|
'type': 'object',
|
527
632
|
'required': [],
|
@@ -531,6 +636,32 @@ _LABELS_SCHEMA = {
|
|
531
636
|
}
|
532
637
|
}
|
533
638
|
|
639
|
+
_PRORPERTY_NAME_OR_CLUSTER_NAME_TO_PROPERTY = {
|
640
|
+
'oneOf': [
|
641
|
+
{
|
642
|
+
'type': 'string'
|
643
|
+
},
|
644
|
+
{
|
645
|
+
# A list of single-element dict to pretain the
|
646
|
+
# order.
|
647
|
+
# Example:
|
648
|
+
# property_name:
|
649
|
+
# - my-cluster1-*: my-property-1
|
650
|
+
# - my-cluster2-*: my-property-2
|
651
|
+
# - "*"": my-property-3
|
652
|
+
'type': 'array',
|
653
|
+
'items': {
|
654
|
+
'type': 'object',
|
655
|
+
'additionalProperties': {
|
656
|
+
'type': 'string'
|
657
|
+
},
|
658
|
+
'maxProperties': 1,
|
659
|
+
'minProperties': 1,
|
660
|
+
},
|
661
|
+
}
|
662
|
+
]
|
663
|
+
}
|
664
|
+
|
534
665
|
|
535
666
|
class RemoteIdentityOptions(enum.Enum):
|
536
667
|
"""Enum for remote identity types.
|
@@ -541,6 +672,7 @@ class RemoteIdentityOptions(enum.Enum):
|
|
541
672
|
"""
|
542
673
|
LOCAL_CREDENTIALS = 'LOCAL_CREDENTIALS'
|
543
674
|
SERVICE_ACCOUNT = 'SERVICE_ACCOUNT'
|
675
|
+
NO_UPLOAD = 'NO_UPLOAD'
|
544
676
|
|
545
677
|
|
546
678
|
def get_default_remote_identity(cloud: str) -> str:
|
@@ -559,36 +691,16 @@ _REMOTE_IDENTITY_SCHEMA = {
|
|
559
691
|
}
|
560
692
|
}
|
561
693
|
|
562
|
-
|
694
|
+
_REMOTE_IDENTITY_SCHEMA_KUBERNETES = {
|
563
695
|
'remote_identity': {
|
564
|
-
'
|
565
|
-
|
696
|
+
'anyOf': [{
|
697
|
+
'type': 'string'
|
698
|
+
}, {
|
699
|
+
'type': 'object',
|
700
|
+
'additionalProperties': {
|
566
701
|
'type': 'string'
|
567
|
-
},
|
568
|
-
{
|
569
|
-
# A list of single-element dict to pretain the order.
|
570
|
-
# Example:
|
571
|
-
# remote_identity:
|
572
|
-
# - my-cluster1-*: my-iam-role-1
|
573
|
-
# - my-cluster2-*: my-iam-role-2
|
574
|
-
# - "*"": my-iam-role-3
|
575
|
-
'type': 'array',
|
576
|
-
'items': {
|
577
|
-
'type': 'object',
|
578
|
-
'additionalProperties': {
|
579
|
-
'type': 'string'
|
580
|
-
},
|
581
|
-
'maxProperties': 1,
|
582
|
-
'minProperties': 1,
|
583
|
-
},
|
584
702
|
}
|
585
|
-
]
|
586
|
-
}
|
587
|
-
}
|
588
|
-
|
589
|
-
_REMOTE_IDENTITY_SCHEMA_KUBERNETES = {
|
590
|
-
'remote_identity': {
|
591
|
-
'type': 'string'
|
703
|
+
}]
|
592
704
|
},
|
593
705
|
}
|
594
706
|
|
@@ -618,6 +730,11 @@ def get_config_schema():
|
|
618
730
|
'resources': resources_schema,
|
619
731
|
}
|
620
732
|
},
|
733
|
+
'bucket': {
|
734
|
+
'type': 'string',
|
735
|
+
'pattern': '^(https|s3|gs|r2|cos)://.+',
|
736
|
+
'required': [],
|
737
|
+
}
|
621
738
|
}
|
622
739
|
}
|
623
740
|
cloud_configs = {
|
@@ -626,9 +743,20 @@ def get_config_schema():
|
|
626
743
|
'required': [],
|
627
744
|
'additionalProperties': False,
|
628
745
|
'properties': {
|
629
|
-
'
|
630
|
-
'type': '
|
746
|
+
'prioritize_reservations': {
|
747
|
+
'type': 'boolean',
|
631
748
|
},
|
749
|
+
'specific_reservations': {
|
750
|
+
'type': 'array',
|
751
|
+
'items': {
|
752
|
+
'type': 'string',
|
753
|
+
},
|
754
|
+
},
|
755
|
+
'disk_encrypted': {
|
756
|
+
'type': 'boolean',
|
757
|
+
},
|
758
|
+
'security_group_name':
|
759
|
+
(_PRORPERTY_NAME_OR_CLUSTER_NAME_TO_PROPERTY),
|
632
760
|
**_LABELS_SCHEMA,
|
633
761
|
**_NETWORK_CONFIG_SCHEMA,
|
634
762
|
},
|
@@ -648,16 +776,54 @@ def get_config_schema():
|
|
648
776
|
'type': 'string',
|
649
777
|
},
|
650
778
|
},
|
779
|
+
'managed_instance_group': {
|
780
|
+
'type': 'object',
|
781
|
+
'required': ['run_duration'],
|
782
|
+
'additionalProperties': False,
|
783
|
+
'properties': {
|
784
|
+
'run_duration': {
|
785
|
+
'type': 'integer',
|
786
|
+
},
|
787
|
+
'provision_timeout': {
|
788
|
+
'type': 'integer',
|
789
|
+
}
|
790
|
+
}
|
791
|
+
},
|
792
|
+
'force_enable_external_ips': {
|
793
|
+
'type': 'boolean'
|
794
|
+
},
|
795
|
+
'enable_gvnic': {
|
796
|
+
'type': 'boolean'
|
797
|
+
},
|
651
798
|
**_LABELS_SCHEMA,
|
652
799
|
**_NETWORK_CONFIG_SCHEMA,
|
653
800
|
},
|
654
801
|
**_check_not_both_fields_present('instance_tags', 'labels')
|
655
802
|
},
|
803
|
+
'azure': {
|
804
|
+
'type': 'object',
|
805
|
+
'required': [],
|
806
|
+
'additionalProperties': False,
|
807
|
+
'properties': {
|
808
|
+
'storage_account': {
|
809
|
+
'type': 'string',
|
810
|
+
},
|
811
|
+
'resource_group_vm': {
|
812
|
+
'type': 'string',
|
813
|
+
},
|
814
|
+
}
|
815
|
+
},
|
656
816
|
'kubernetes': {
|
657
817
|
'type': 'object',
|
658
818
|
'required': [],
|
659
819
|
'additionalProperties': False,
|
660
820
|
'properties': {
|
821
|
+
'allowed_contexts': {
|
822
|
+
'type': 'array',
|
823
|
+
'items': {
|
824
|
+
'type': 'string',
|
825
|
+
},
|
826
|
+
},
|
661
827
|
'networking': {
|
662
828
|
'type': 'string',
|
663
829
|
'case_insensitive_enum': [
|
@@ -723,6 +889,9 @@ def get_config_schema():
|
|
723
889
|
'image_tag_gpu': {
|
724
890
|
'type': 'string',
|
725
891
|
},
|
892
|
+
'vcn_ocid': {
|
893
|
+
'type': 'string',
|
894
|
+
},
|
726
895
|
'vcn_subnet': {
|
727
896
|
'type': 'string',
|
728
897
|
},
|
@@ -731,6 +900,13 @@ def get_config_schema():
|
|
731
900
|
},
|
732
901
|
}
|
733
902
|
|
903
|
+
admin_policy_schema = {
|
904
|
+
'type': 'string',
|
905
|
+
# Check regex to be a valid python module path
|
906
|
+
'pattern': (r'^[a-zA-Z_][a-zA-Z0-9_]*'
|
907
|
+
r'(\.[a-zA-Z_][a-zA-Z0-9_]*)+$'),
|
908
|
+
}
|
909
|
+
|
734
910
|
allowed_clouds = {
|
735
911
|
# A list of cloud names that are allowed to be used
|
736
912
|
'type': 'array',
|
@@ -741,9 +917,52 @@ def get_config_schema():
|
|
741
917
|
}
|
742
918
|
}
|
743
919
|
|
920
|
+
docker_configs = {
|
921
|
+
'type': 'object',
|
922
|
+
'required': [],
|
923
|
+
'additionalProperties': False,
|
924
|
+
'properties': {
|
925
|
+
'run_options': {
|
926
|
+
'anyOf': [{
|
927
|
+
'type': 'string',
|
928
|
+
}, {
|
929
|
+
'type': 'array',
|
930
|
+
'items': {
|
931
|
+
'type': 'string',
|
932
|
+
}
|
933
|
+
}]
|
934
|
+
}
|
935
|
+
}
|
936
|
+
}
|
937
|
+
gpu_configs = {
|
938
|
+
'type': 'object',
|
939
|
+
'required': [],
|
940
|
+
'additionalProperties': False,
|
941
|
+
'properties': {
|
942
|
+
'disable_ecc': {
|
943
|
+
'type': 'boolean',
|
944
|
+
},
|
945
|
+
}
|
946
|
+
}
|
947
|
+
|
948
|
+
api_server = {
|
949
|
+
'type': 'object',
|
950
|
+
'required': [],
|
951
|
+
'additionalProperties': False,
|
952
|
+
'properties': {
|
953
|
+
'endpoint': {
|
954
|
+
'type': 'string',
|
955
|
+
# Apply validation for URL
|
956
|
+
'pattern': r'^https?://.*$',
|
957
|
+
},
|
958
|
+
}
|
959
|
+
}
|
960
|
+
|
744
961
|
for cloud, config in cloud_configs.items():
|
745
962
|
if cloud == 'aws':
|
746
|
-
config['properties'].update(
|
963
|
+
config['properties'].update({
|
964
|
+
'remote_identity': _PRORPERTY_NAME_OR_CLUSTER_NAME_TO_PROPERTY
|
965
|
+
})
|
747
966
|
elif cloud == 'kubernetes':
|
748
967
|
config['properties'].update(_REMOTE_IDENTITY_SCHEMA_KUBERNETES)
|
749
968
|
else:
|
@@ -755,11 +974,12 @@ def get_config_schema():
|
|
755
974
|
'additionalProperties': False,
|
756
975
|
'properties': {
|
757
976
|
'jobs': controller_resources_schema,
|
758
|
-
'spot': controller_resources_schema,
|
759
977
|
'serve': controller_resources_schema,
|
760
978
|
'allowed_clouds': allowed_clouds,
|
979
|
+
'admin_policy': admin_policy_schema,
|
980
|
+
'docker': docker_configs,
|
981
|
+
'nvidia_gpus': gpu_configs,
|
982
|
+
'api_server': api_server,
|
761
983
|
**cloud_configs,
|
762
984
|
},
|
763
|
-
# Avoid spot and jobs being present at the same time.
|
764
|
-
**_check_not_both_fields_present('spot', 'jobs')
|
765
985
|
}
|
@@ -6,22 +6,27 @@ import colorama
|
|
6
6
|
|
7
7
|
|
8
8
|
class ClusterStatus(enum.Enum):
|
9
|
-
"""Cluster status as recorded in
|
9
|
+
"""Cluster status as recorded in local cache.
|
10
|
+
|
11
|
+
This can be different from the actual cluster status, and can be refreshed
|
12
|
+
by running ``sky status --refresh``.
|
13
|
+
"""
|
10
14
|
# NOTE: these statuses are as recorded in our local cache, the table
|
11
15
|
# 'clusters'. The actual cluster state may be different (e.g., an UP
|
12
16
|
# cluster getting killed manually by the user or the cloud provider).
|
13
17
|
|
14
|
-
# Initializing. This means a backend.provision() call has started but has
|
15
|
-
# not successfully finished. The cluster may be undergoing setup, may have
|
16
|
-
# failed setup, may be live or down.
|
17
18
|
INIT = 'INIT'
|
19
|
+
"""Initializing.
|
20
|
+
|
21
|
+
This means a provisioning has started but has not successfully finished. The
|
22
|
+
cluster may be undergoing setup, may have failed setup, may be live or down.
|
23
|
+
"""
|
18
24
|
|
19
|
-
# The cluster is recorded as up. This means a backend.provision() has
|
20
|
-
# previously succeeded.
|
21
25
|
UP = 'UP'
|
26
|
+
"""The cluster is up. This means a provisioning has previously succeeded."""
|
22
27
|
|
23
|
-
# Stopped. This means a `sky stop` call has previously succeeded.
|
24
28
|
STOPPED = 'STOPPED'
|
29
|
+
"""The cluster is stopped."""
|
25
30
|
|
26
31
|
def colored_str(self):
|
27
32
|
color = _STATUS_TO_COLOR[self]
|