skypilot-nightly 1.0.0.dev20251210__py3-none-any.whl → 1.0.0.dev20260112__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +4 -2
- sky/adaptors/slurm.py +159 -72
- sky/backends/backend_utils.py +52 -10
- sky/backends/cloud_vm_ray_backend.py +192 -32
- sky/backends/task_codegen.py +40 -2
- sky/catalog/data_fetchers/fetch_gcp.py +9 -1
- sky/catalog/data_fetchers/fetch_nebius.py +1 -1
- sky/catalog/data_fetchers/fetch_vast.py +4 -2
- sky/catalog/seeweb_catalog.py +30 -15
- sky/catalog/shadeform_catalog.py +5 -2
- sky/catalog/slurm_catalog.py +0 -7
- sky/catalog/vast_catalog.py +30 -6
- sky/check.py +11 -8
- sky/client/cli/command.py +106 -54
- sky/client/interactive_utils.py +190 -0
- sky/client/sdk.py +8 -0
- sky/client/sdk_async.py +9 -0
- sky/clouds/aws.py +60 -2
- sky/clouds/azure.py +2 -0
- sky/clouds/kubernetes.py +2 -0
- sky/clouds/runpod.py +38 -7
- sky/clouds/slurm.py +44 -12
- sky/clouds/ssh.py +1 -1
- sky/clouds/vast.py +30 -17
- sky/core.py +69 -1
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/3nu-b8raeKRNABZ2d4GAG/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1871-0565f8975a7dcd10.js +6 -0
- sky/dashboard/out/_next/static/chunks/2109-55a1546d793574a7.js +11 -0
- sky/dashboard/out/_next/static/chunks/2521-099b07cd9e4745bf.js +26 -0
- sky/dashboard/out/_next/static/chunks/2755.a636e04a928a700e.js +31 -0
- sky/dashboard/out/_next/static/chunks/3495.05eab4862217c1a5.js +6 -0
- sky/dashboard/out/_next/static/chunks/3785.cfc5dcc9434fd98c.js +1 -0
- sky/dashboard/out/_next/static/chunks/3981.645d01bf9c8cad0c.js +21 -0
- sky/dashboard/out/_next/static/chunks/4083-0115d67c1fb57d6c.js +21 -0
- sky/dashboard/out/_next/static/chunks/{8640.5b9475a2d18c5416.js → 429.a58e9ba9742309ed.js} +2 -2
- sky/dashboard/out/_next/static/chunks/4555.8e221537181b5dc1.js +6 -0
- sky/dashboard/out/_next/static/chunks/4725.937865b81fdaaebb.js +6 -0
- sky/dashboard/out/_next/static/chunks/6082-edabd8f6092300ce.js +25 -0
- sky/dashboard/out/_next/static/chunks/6989-49cb7dca83a7a62d.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-630bd2a2257275f8.js +1 -0
- sky/dashboard/out/_next/static/chunks/7248-a99800d4db8edabd.js +1 -0
- sky/dashboard/out/_next/static/chunks/754-cfc5d4ad1b843d29.js +18 -0
- sky/dashboard/out/_next/static/chunks/8050-dd8aa107b17dce00.js +16 -0
- sky/dashboard/out/_next/static/chunks/8056-d4ae1e0cb81e7368.js +1 -0
- sky/dashboard/out/_next/static/chunks/8555.011023e296c127b3.js +6 -0
- sky/dashboard/out/_next/static/chunks/8821-93c25df904a8362b.js +1 -0
- sky/dashboard/out/_next/static/chunks/8969-0662594b69432ade.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.f15c91c97d124a5f.js +6 -0
- sky/dashboard/out/_next/static/chunks/{9353-8369df1cf105221c.js → 9353-7ad6bd01858556f1.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-5a86569acad99764.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-8297476714acb4ac.js +6 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-337c3ba1085f1210.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/{clusters-9e5d47818b9bdadd.js → clusters-57632ff3684a8b5c.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-5fd3a453c079c2ea.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-9f85c02c9c6cae9e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-90f16972cbecf354.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-2dd42fc37aad427a.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-ed806aeace26b972.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-bec34706b36f3524.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/{volumes-ef19d49c6d0e8500.js → volumes-a83ba9b38dff7ea9.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-96e0f298308da7e2.js → [name]-c781e9c3e52ef9fc.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces-91e0942f47310aae.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-cfe59cf684ee13b9.js +1 -0
- sky/dashboard/out/_next/static/css/b0dbca28f027cc19.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/plugins/[...slug].html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/data_utils.py +26 -12
- sky/data/mounting_utils.py +29 -4
- sky/global_user_state.py +108 -16
- sky/jobs/client/sdk.py +8 -3
- sky/jobs/controller.py +191 -31
- sky/jobs/recovery_strategy.py +109 -11
- sky/jobs/server/core.py +81 -4
- sky/jobs/server/server.py +14 -0
- sky/jobs/state.py +417 -19
- sky/jobs/utils.py +73 -80
- sky/models.py +9 -0
- sky/optimizer.py +2 -1
- sky/provision/__init__.py +11 -9
- sky/provision/kubernetes/utils.py +122 -15
- sky/provision/kubernetes/volume.py +52 -17
- sky/provision/provisioner.py +2 -1
- sky/provision/runpod/instance.py +3 -1
- sky/provision/runpod/utils.py +13 -1
- sky/provision/runpod/volume.py +25 -9
- sky/provision/slurm/instance.py +75 -29
- sky/provision/slurm/utils.py +213 -107
- sky/provision/vast/utils.py +1 -0
- sky/resources.py +135 -13
- sky/schemas/api/responses.py +4 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +1 -1
- sky/schemas/db/spot_jobs/008_add_full_resources.py +34 -0
- sky/schemas/db/spot_jobs/009_job_events.py +32 -0
- sky/schemas/db/spot_jobs/010_job_events_timestamp_with_timezone.py +43 -0
- sky/schemas/db/spot_jobs/011_add_links.py +34 -0
- sky/schemas/generated/jobsv1_pb2.py +9 -5
- sky/schemas/generated/jobsv1_pb2.pyi +12 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +44 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +32 -28
- sky/schemas/generated/managed_jobsv1_pb2.pyi +11 -2
- sky/serve/serve_utils.py +232 -40
- sky/server/common.py +17 -0
- sky/server/constants.py +1 -1
- sky/server/metrics.py +6 -3
- sky/server/plugins.py +16 -0
- sky/server/requests/payloads.py +18 -0
- sky/server/requests/request_names.py +2 -0
- sky/server/requests/requests.py +28 -10
- sky/server/requests/serializers/encoders.py +5 -0
- sky/server/requests/serializers/return_value_serializers.py +14 -4
- sky/server/server.py +434 -107
- sky/server/uvicorn.py +5 -0
- sky/setup_files/MANIFEST.in +1 -0
- sky/setup_files/dependencies.py +21 -10
- sky/sky_logging.py +2 -1
- sky/skylet/constants.py +22 -5
- sky/skylet/executor/slurm.py +4 -6
- sky/skylet/job_lib.py +89 -4
- sky/skylet/services.py +18 -3
- sky/ssh_node_pools/deploy/tunnel/cleanup-tunnel.sh +62 -0
- sky/ssh_node_pools/deploy/tunnel/ssh-tunnel.sh +379 -0
- sky/templates/kubernetes-ray.yml.j2 +4 -6
- sky/templates/slurm-ray.yml.j2 +32 -2
- sky/templates/websocket_proxy.py +18 -41
- sky/users/permission.py +61 -51
- sky/utils/auth_utils.py +42 -0
- sky/utils/cli_utils/status_utils.py +19 -5
- sky/utils/cluster_utils.py +10 -3
- sky/utils/command_runner.py +256 -94
- sky/utils/command_runner.pyi +16 -0
- sky/utils/common_utils.py +30 -29
- sky/utils/context.py +32 -0
- sky/utils/db/db_utils.py +36 -6
- sky/utils/db/migration_utils.py +41 -21
- sky/utils/infra_utils.py +5 -1
- sky/utils/instance_links.py +139 -0
- sky/utils/interactive_utils.py +49 -0
- sky/utils/kubernetes/generate_kubeconfig.sh +42 -33
- sky/utils/kubernetes/rsync_helper.sh +5 -1
- sky/utils/plugin_extensions/__init__.py +14 -0
- sky/utils/plugin_extensions/external_failure_source.py +176 -0
- sky/utils/resources_utils.py +10 -8
- sky/utils/rich_utils.py +9 -11
- sky/utils/schemas.py +63 -20
- sky/utils/status_lib.py +7 -0
- sky/utils/subprocess_utils.py +17 -0
- sky/volumes/client/sdk.py +6 -3
- sky/volumes/server/core.py +65 -27
- sky_templates/ray/start_cluster +8 -4
- {skypilot_nightly-1.0.0.dev20251210.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/METADATA +53 -57
- {skypilot_nightly-1.0.0.dev20251210.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/RECORD +172 -162
- sky/dashboard/out/_next/static/KYAhEFa3FTfq4JyKVgo-s/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/1141-9c810f01ff4f398a.js +0 -11
- sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +0 -6
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +0 -1
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +0 -1
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +0 -15
- sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +0 -26
- sky/dashboard/out/_next/static/chunks/3294.ddda8c6c6f9f24dc.js +0 -1
- sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +0 -1
- sky/dashboard/out/_next/static/chunks/3800-b589397dc09c5b4e.js +0 -1
- sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +0 -1
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +0 -15
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +0 -13
- sky/dashboard/out/_next/static/chunks/6856-da20c5fd999f319c.js +0 -1
- sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +0 -1
- sky/dashboard/out/_next/static/chunks/6990-09cbf02d3cd518c3.js +0 -1
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +0 -30
- sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +0 -41
- sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +0 -1
- sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +0 -1
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +0 -6
- sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +0 -31
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +0 -30
- sky/dashboard/out/_next/static/chunks/pages/_app-68b647e26f9d2793.js +0 -34
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-33f525539665fdfd.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a7565f586ef86467.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-12c559ec4d81fdbd.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra-d187cd0413d72475.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-895847b6cf200b04.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-8d0f4655400b4eb9.js +0 -21
- sky/dashboard/out/_next/static/chunks/pages/jobs-e5a98f17f8513a96.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/users-2f7646eb77785a2c.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces-cb4da3abe08ebf19.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-fba3de387ff6bb08.js +0 -1
- sky/dashboard/out/_next/static/css/c5a4cfd2600fc715.css +0 -3
- /sky/dashboard/out/_next/static/{KYAhEFa3FTfq4JyKVgo-s → 3nu-b8raeKRNABZ2d4GAG}/_ssgManifest.js +0 -0
- /sky/dashboard/out/_next/static/chunks/pages/plugins/{[...slug]-4f46050ca065d8f8.js → [...slug]-449a9f5a3bb20fb3.js} +0 -0
- {skypilot_nightly-1.0.0.dev20251210.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20251210.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20251210.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20251210.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""Utilities for server-side interactive SSH functionality."""
|
|
2
|
+
import array
|
|
3
|
+
import socket
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def get_pty_socket_path(session_id: str) -> str:
|
|
7
|
+
"""Get the Unix socket path for PTY file descriptor passing."""
|
|
8
|
+
return f'/tmp/sky_pty_{session_id}.sock'
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def send_fd(sock: socket.socket, fd: int) -> None:
|
|
12
|
+
"""Send file descriptor via Unix socket using SCM_RIGHTS.
|
|
13
|
+
|
|
14
|
+
SCM_RIGHTS allows us to send or receive a set of open
|
|
15
|
+
file descriptors from another process.
|
|
16
|
+
|
|
17
|
+
See:
|
|
18
|
+
https://man7.org/linux/man-pages/man7/unix.7.html
|
|
19
|
+
https://man7.org/linux/man-pages/man3/cmsg.3.html
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
sock: Connected Unix socket.
|
|
23
|
+
fd: File descriptor to send.
|
|
24
|
+
"""
|
|
25
|
+
sock.sendmsg(
|
|
26
|
+
[b'x'], # Dummy data
|
|
27
|
+
[(socket.SOL_SOCKET, socket.SCM_RIGHTS, array.array('i', [fd]))])
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def recv_fd(sock: socket.socket) -> int:
|
|
31
|
+
"""Receive file descriptor via Unix socket using SCM_RIGHTS.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
sock: Connected Unix socket.
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
Received file descriptor.
|
|
38
|
+
|
|
39
|
+
Raises:
|
|
40
|
+
RuntimeError: If no file descriptor was received.
|
|
41
|
+
"""
|
|
42
|
+
# NOTE: recvmsg() has no async equivalent
|
|
43
|
+
_, ancdata, _, _ = sock.recvmsg(
|
|
44
|
+
1, socket.CMSG_SPACE(array.array('i', [0]).itemsize))
|
|
45
|
+
if not ancdata:
|
|
46
|
+
raise RuntimeError('No file descriptor received - '
|
|
47
|
+
'sender may have closed connection')
|
|
48
|
+
_, _, cmsg_data = ancdata[0]
|
|
49
|
+
return array.array('i', cmsg_data)[0]
|
|
@@ -12,20 +12,20 @@
|
|
|
12
12
|
# * Specify SKYPILOT_NAMESPACE env var to override the default namespace where the service account is created.
|
|
13
13
|
# * Specify SKYPILOT_SA_NAME env var to override the default service account name.
|
|
14
14
|
# * Specify SKIP_SA_CREATION=1 to skip creating the service account and use an existing one
|
|
15
|
-
# * Specify SUPER_USER=
|
|
15
|
+
# * Specify SUPER_USER=0 to create a service account with minimal permissions
|
|
16
16
|
#
|
|
17
17
|
# Usage:
|
|
18
|
-
# # Create "sky-sa" service account
|
|
18
|
+
# # Create "sky-sa" service account in "default" namespace and generate kubeconfig
|
|
19
19
|
# $ ./generate_kubeconfig.sh
|
|
20
20
|
#
|
|
21
|
-
# # Create "my-sa" service account
|
|
21
|
+
# # Create "my-sa" service account in "my-namespace" namespace and generate kubeconfig
|
|
22
22
|
# $ SKYPILOT_SA_NAME=my-sa SKYPILOT_NAMESPACE=my-namespace ./generate_kubeconfig.sh
|
|
23
23
|
#
|
|
24
24
|
# # Use an existing service account "my-sa" in "my-namespace" namespace and generate kubeconfig
|
|
25
25
|
# $ SKIP_SA_CREATION=1 SKYPILOT_SA_NAME=my-sa SKYPILOT_NAMESPACE=my-namespace ./generate_kubeconfig.sh
|
|
26
26
|
#
|
|
27
|
-
# # Create "sky-sa" service account with
|
|
28
|
-
# $ SUPER_USER=
|
|
27
|
+
# # Create "sky-sa" service account with minimal permissions in "default" namespace (manual setup may be required)
|
|
28
|
+
# $ SUPER_USER=0 ./generate_kubeconfig.sh
|
|
29
29
|
|
|
30
30
|
set -eu -o pipefail
|
|
31
31
|
|
|
@@ -33,11 +33,18 @@ set -eu -o pipefail
|
|
|
33
33
|
# use default.
|
|
34
34
|
SKYPILOT_SA=${SKYPILOT_SA_NAME:-sky-sa}
|
|
35
35
|
NAMESPACE=${SKYPILOT_NAMESPACE:-default}
|
|
36
|
-
SUPER_USER=${SUPER_USER:-
|
|
36
|
+
SUPER_USER=${SUPER_USER:-1}
|
|
37
37
|
|
|
38
|
-
echo "
|
|
39
|
-
echo "
|
|
40
|
-
echo "
|
|
38
|
+
echo "=========================================="
|
|
39
|
+
echo "SkyPilot Kubeconfig Generation"
|
|
40
|
+
echo "=========================================="
|
|
41
|
+
echo "Service Account: ${SKYPILOT_SA}"
|
|
42
|
+
echo "Namespace: ${NAMESPACE}"
|
|
43
|
+
if [ "${SUPER_USER}" != "1" ]; then
|
|
44
|
+
echo "Permissions: Minimal (manual setup may be required)"
|
|
45
|
+
SUPER_USER=0
|
|
46
|
+
fi
|
|
47
|
+
echo ""
|
|
41
48
|
|
|
42
49
|
# Set OS specific values.
|
|
43
50
|
if [[ "$OSTYPE" == "linux-gnu" ]]; then
|
|
@@ -53,7 +60,7 @@ fi
|
|
|
53
60
|
|
|
54
61
|
# If the user has set SKIP_SA_CREATION=1, skip creating the service account.
|
|
55
62
|
if [ -z ${SKIP_SA_CREATION+x} ]; then
|
|
56
|
-
echo "Creating
|
|
63
|
+
echo "[1/3] Creating Kubernetes Service Account and RBAC permissions..."
|
|
57
64
|
if [ "${SUPER_USER}" = "1" ]; then
|
|
58
65
|
# Create service account with cluster-admin permissions
|
|
59
66
|
kubectl apply -f - <<EOF
|
|
@@ -219,7 +226,8 @@ roleRef:
|
|
|
219
226
|
EOF
|
|
220
227
|
fi
|
|
221
228
|
# Apply optional ingress-related roles, but don't make the script fail if it fails
|
|
222
|
-
|
|
229
|
+
echo " → Applying optional ingress permissions (skipped if ingress-nginx not installed)..."
|
|
230
|
+
kubectl apply -f - 2>/dev/null <<EOF || true
|
|
223
231
|
# Optional: Role for accessing ingress resources
|
|
224
232
|
apiVersion: rbac.authorization.k8s.io/v1
|
|
225
233
|
kind: Role
|
|
@@ -253,8 +261,13 @@ roleRef:
|
|
|
253
261
|
name: ${SKYPILOT_SA}-role-ingress-nginx # Use the same name as the role at line 119
|
|
254
262
|
apiGroup: rbac.authorization.k8s.io
|
|
255
263
|
EOF
|
|
264
|
+
else
|
|
265
|
+
echo "[1/3] Skipping service account creation (using existing account)..."
|
|
256
266
|
fi
|
|
257
267
|
|
|
268
|
+
echo ""
|
|
269
|
+
echo "[2/3] Creating service account token..."
|
|
270
|
+
|
|
258
271
|
# Checks if secret entry was defined for Service account. If defined it means that Kubernetes server has a
|
|
259
272
|
# version bellow 1.24, otherwise one must manually create the secret and bind it to the Service account to have a non expiring token.
|
|
260
273
|
# After Kubernetes v1.24 Service accounts no longer generate automatic tokens/secrets.
|
|
@@ -293,7 +306,9 @@ CURRENT_CONTEXT=$(kubectl config current-context)
|
|
|
293
306
|
CURRENT_CLUSTER=$(kubectl config view -o jsonpath="{.contexts[?(@.name == \"${CURRENT_CONTEXT}\"})].context.cluster}")
|
|
294
307
|
CURRENT_CLUSTER_ADDR=$(kubectl config view -o jsonpath="{.clusters[?(@.name == \"${CURRENT_CLUSTER}\"})].cluster.server}")
|
|
295
308
|
|
|
296
|
-
echo "
|
|
309
|
+
echo ""
|
|
310
|
+
echo "[3/3] Generating kubeconfig file..."
|
|
311
|
+
|
|
297
312
|
cat > kubeconfig <<EOF
|
|
298
313
|
apiVersion: v1
|
|
299
314
|
clusters:
|
|
@@ -316,24 +331,18 @@ users:
|
|
|
316
331
|
token: ${SA_TOKEN}
|
|
317
332
|
EOF
|
|
318
333
|
|
|
319
|
-
echo "
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
Also add this to your ~/.sky/config.yaml to use the new service account:
|
|
335
|
-
|
|
336
|
-
# ~/.sky/config.yaml
|
|
337
|
-
kubernetes:
|
|
338
|
-
remote_identity: ${SKYPILOT_SA}
|
|
339
|
-
"
|
|
334
|
+
echo ""
|
|
335
|
+
echo "=========================================="
|
|
336
|
+
echo "✓ SUCCESS!"
|
|
337
|
+
echo "=========================================="
|
|
338
|
+
echo ""
|
|
339
|
+
echo "Kubeconfig file created successfully!"
|
|
340
|
+
echo ""
|
|
341
|
+
echo " Service Account: ${SKYPILOT_SA}"
|
|
342
|
+
echo " Namespace: ${NAMESPACE}"
|
|
343
|
+
echo " Location: $(pwd)/kubeconfig"
|
|
344
|
+
echo ""
|
|
345
|
+
echo "Next steps:"
|
|
346
|
+
echo " Refer to this page for setting up the credential for remote API server:"
|
|
347
|
+
echo " https://docs.skypilot.co/en/latest/reference/api-server/api-server-admin-deploy.html#optional-configure-cloud-accounts"
|
|
348
|
+
echo ""
|
|
@@ -60,4 +60,8 @@ fi
|
|
|
60
60
|
# We wrap the command in a bash script that waits for rsync, then execs the original command.
|
|
61
61
|
# Timeout after MAX_WAIT_TIME_SECONDS seconds.
|
|
62
62
|
MAX_WAIT_TIME_SECONDS=300
|
|
63
|
-
|
|
63
|
+
MAX_WAIT_COUNT=$((MAX_WAIT_TIME_SECONDS * 2))
|
|
64
|
+
# Use --norc --noprofile to prevent bash from sourcing startup files that might
|
|
65
|
+
# output to stdout and corrupt the rsync protocol. All debug output must go to
|
|
66
|
+
# stderr (>&2) to keep stdout clean for rsync communication.
|
|
67
|
+
eval "${kubectl_cmd_base% --} -i -- bash --norc --noprofile -c 'count=0; until which rsync >/dev/null 2>&1; do if [ \$count -ge $MAX_WAIT_COUNT ]; then echo \"Error when trying to rsync files to kubernetes cluster. Package installation may have failed.\" >&2; exit 1; fi; sleep 0.5; count=\$((count+1)); done; exec \"\$@\"' -- \"\$@\""
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Plugin extensions module.
|
|
2
|
+
|
|
3
|
+
This module provides extension points that plugins can hook into to provide
|
|
4
|
+
custom functionality.
|
|
5
|
+
"""
|
|
6
|
+
from sky.utils.plugin_extensions.external_failure_source import (
|
|
7
|
+
ExternalClusterFailure)
|
|
8
|
+
from sky.utils.plugin_extensions.external_failure_source import (
|
|
9
|
+
ExternalFailureSource)
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
'ExternalClusterFailure',
|
|
13
|
+
'ExternalFailureSource',
|
|
14
|
+
]
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
"""External failure source interface for plugins.
|
|
2
|
+
|
|
3
|
+
This module provides an extension point that allows plugins to provide
|
|
4
|
+
cluster failure tracking functionality. By default, no-op implementations
|
|
5
|
+
are used. Plugins can register their own implementations to provide actual
|
|
6
|
+
failure tracking.
|
|
7
|
+
|
|
8
|
+
Example usage in a plugin:
|
|
9
|
+
from sky.utils.plugin_extensions import ExternalFailureSource
|
|
10
|
+
|
|
11
|
+
# Register custom failure source
|
|
12
|
+
ExternalFailureSource.register(
|
|
13
|
+
get_failures=my_get_cluster_failures,
|
|
14
|
+
clear_failures=my_clear_cluster_failures,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
Example usage in core SkyPilot:
|
|
18
|
+
from sky.utils.plugin_extensions import ExternalFailureSource
|
|
19
|
+
|
|
20
|
+
# Get failures for a cluster
|
|
21
|
+
failures = ExternalFailureSource.get(cluster_hash='abc123')
|
|
22
|
+
|
|
23
|
+
# Clear failures for a cluster
|
|
24
|
+
cleared = ExternalFailureSource.clear(cluster_name='my-cluster')
|
|
25
|
+
"""
|
|
26
|
+
import dataclasses
|
|
27
|
+
from typing import Any, Dict, List, Optional, Protocol
|
|
28
|
+
|
|
29
|
+
from sky import sky_logging
|
|
30
|
+
|
|
31
|
+
logger = sky_logging.init_logger(__name__)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclasses.dataclass
|
|
35
|
+
class ExternalClusterFailure:
|
|
36
|
+
"""Represents a single cluster failure from an external source.
|
|
37
|
+
|
|
38
|
+
Attributes:
|
|
39
|
+
code: Machine-readable failure code (e.g. 'GPU_HARDWARE_FAILURE_XID_79')
|
|
40
|
+
reason: Human-readable description of the failure.
|
|
41
|
+
"""
|
|
42
|
+
code: str
|
|
43
|
+
reason: str
|
|
44
|
+
|
|
45
|
+
@classmethod
|
|
46
|
+
def from_failure_list(
|
|
47
|
+
cls, failures: List[Dict[str,
|
|
48
|
+
Any]]) -> List['ExternalClusterFailure']:
|
|
49
|
+
"""Create a list of ExternalClusterFailure from failure dicts.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
failures: List of dicts with 'failure_mode' and 'failure_reason'
|
|
53
|
+
keys (as returned by ExternalFailureSource.get()).
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
List of ExternalClusterFailure objects, one per failure.
|
|
57
|
+
"""
|
|
58
|
+
return [
|
|
59
|
+
cls(code=f['failure_mode'], reason=f['failure_reason'])
|
|
60
|
+
for f in failures
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
# Protocol definitions for the failure source functions
|
|
65
|
+
class GetClusterFailuresFunc(Protocol):
|
|
66
|
+
"""Protocol for get_cluster_failures function."""
|
|
67
|
+
|
|
68
|
+
def __call__(self,
|
|
69
|
+
cluster_hash: Optional[str] = None,
|
|
70
|
+
cluster_name: Optional[str] = None) -> List[Dict[str, Any]]:
|
|
71
|
+
...
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class ClearClusterFailuresFunc(Protocol):
|
|
75
|
+
"""Protocol for clear_cluster_failures function."""
|
|
76
|
+
|
|
77
|
+
def __call__(self,
|
|
78
|
+
cluster_hash: Optional[str] = None,
|
|
79
|
+
cluster_name: Optional[str] = None) -> List[Dict[str, Any]]:
|
|
80
|
+
...
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class ExternalFailureSource:
|
|
84
|
+
"""Singleton class for external cluster failure source.
|
|
85
|
+
|
|
86
|
+
This class provides an extension point for plugins to register their own
|
|
87
|
+
cluster failure tracking implementations. By default, no-op implementations
|
|
88
|
+
are used that return empty lists.
|
|
89
|
+
|
|
90
|
+
Plugins can register their implementations during their install() phase,
|
|
91
|
+
and core SkyPilot code can use the get() and clear() methods to interact
|
|
92
|
+
with cluster failures without knowing which plugin (if any) is providing
|
|
93
|
+
the implementation.
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
_get_func: Optional[GetClusterFailuresFunc] = None
|
|
97
|
+
_clear_func: Optional[ClearClusterFailuresFunc] = None
|
|
98
|
+
|
|
99
|
+
@classmethod
|
|
100
|
+
def register(cls, get_failures: GetClusterFailuresFunc,
|
|
101
|
+
clear_failures: ClearClusterFailuresFunc) -> None:
|
|
102
|
+
"""Register an external failure source implementation.
|
|
103
|
+
|
|
104
|
+
This allows plugins to provide their own cluster failure tracking.
|
|
105
|
+
Only one external failure source can be registered at a time.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
get_failures: Function to get active cluster failures.
|
|
109
|
+
Signature: (cluster_hash: Optional[str],
|
|
110
|
+
cluster_name: Optional[str])
|
|
111
|
+
-> List[Dict[str, Any]]
|
|
112
|
+
Returns list of dicts with keys: cluster_hash, failure_mode,
|
|
113
|
+
failure_reason, cleared_at.
|
|
114
|
+
clear_failures: Function to clear cluster failures.
|
|
115
|
+
Signature: (cluster_hash: Optional[str],
|
|
116
|
+
cluster_name: Optional[str])
|
|
117
|
+
-> List[Dict[str, Any]]
|
|
118
|
+
Returns list of dicts of the failures that were cleared.
|
|
119
|
+
"""
|
|
120
|
+
cls._get_func = get_failures
|
|
121
|
+
cls._clear_func = clear_failures
|
|
122
|
+
logger.info('Registered external failure source')
|
|
123
|
+
|
|
124
|
+
@classmethod
|
|
125
|
+
def is_registered(cls) -> bool:
|
|
126
|
+
"""Check if an external failure source is registered."""
|
|
127
|
+
return cls._get_func is not None and cls._clear_func is not None
|
|
128
|
+
|
|
129
|
+
@classmethod
|
|
130
|
+
def get(cls,
|
|
131
|
+
cluster_hash: Optional[str] = None,
|
|
132
|
+
cluster_name: Optional[str] = None) -> List[Dict[str, Any]]:
|
|
133
|
+
"""Get active cluster failures from the registered failure source.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
cluster_hash: Hash of the cluster to query failures for.
|
|
137
|
+
cluster_name: Name of the cluster to query failures for.
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
List of dictionaries containing failure records.
|
|
141
|
+
Each dict contains: cluster_hash, failure_mode, failure_reason,
|
|
142
|
+
cleared_at. Returns empty list if no failure source is registered.
|
|
143
|
+
"""
|
|
144
|
+
if cls._get_func is None:
|
|
145
|
+
return []
|
|
146
|
+
try:
|
|
147
|
+
# pylint: disable=not-callable
|
|
148
|
+
return cls._get_func(cluster_name=cluster_name,
|
|
149
|
+
cluster_hash=cluster_hash)
|
|
150
|
+
except Exception as e: # pylint: disable=broad-except
|
|
151
|
+
logger.warning(f'Failed to get cluster failures: {e}')
|
|
152
|
+
return []
|
|
153
|
+
|
|
154
|
+
@classmethod
|
|
155
|
+
def clear(cls,
|
|
156
|
+
cluster_hash: Optional[str] = None,
|
|
157
|
+
cluster_name: Optional[str] = None) -> List[Dict[str, Any]]:
|
|
158
|
+
"""Clear cluster failures via the registered failure source.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
cluster_hash: Hash of the cluster to clear failures for.
|
|
162
|
+
cluster_name: Name of the cluster to clear failures for.
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
List of dictionaries containing the failure records that were
|
|
166
|
+
cleared. Returns empty list if no failure source is registered.
|
|
167
|
+
"""
|
|
168
|
+
if cls._clear_func is None:
|
|
169
|
+
return []
|
|
170
|
+
try:
|
|
171
|
+
# pylint: disable=not-callable
|
|
172
|
+
return cls._clear_func(cluster_name=cluster_name,
|
|
173
|
+
cluster_hash=cluster_hash)
|
|
174
|
+
except Exception as e: # pylint: disable=broad-except
|
|
175
|
+
logger.warning(f'Failed to clear cluster failures: {e}')
|
|
176
|
+
return []
|
sky/utils/resources_utils.py
CHANGED
|
@@ -183,7 +183,8 @@ def simplify_ports(ports: List[str]) -> List[str]:
|
|
|
183
183
|
def format_resource(resource: 'resources_lib.Resources',
|
|
184
184
|
simplified_only: bool = False) -> Tuple[str, Optional[str]]:
|
|
185
185
|
resource = resource.assert_launchable()
|
|
186
|
-
is_k8s =
|
|
186
|
+
is_k8s = resource.cloud.canonical_name() == 'kubernetes'
|
|
187
|
+
vcpu, mem = None, None
|
|
187
188
|
if resource.accelerators is None or is_k8s or not simplified_only:
|
|
188
189
|
vcpu, mem = resource.cloud.get_vcpus_mem_from_instance_type(
|
|
189
190
|
resource.instance_type)
|
|
@@ -198,18 +199,19 @@ def format_resource(resource: 'resources_lib.Resources',
|
|
|
198
199
|
|
|
199
200
|
if (resource.accelerators is None or is_k8s):
|
|
200
201
|
if vcpu is not None:
|
|
201
|
-
elements_simple.append(f'cpus={
|
|
202
|
-
elements_full.append(f'cpus={
|
|
202
|
+
elements_simple.append(f'cpus={common_utils.format_float(vcpu)}')
|
|
203
|
+
elements_full.append(f'cpus={common_utils.format_float(vcpu)}')
|
|
203
204
|
if mem is not None:
|
|
204
|
-
elements_simple.append(f'mem={
|
|
205
|
-
elements_full.append(f'mem={
|
|
205
|
+
elements_simple.append(f'mem={common_utils.format_float(mem)}')
|
|
206
|
+
elements_full.append(f'mem={common_utils.format_float(mem)}')
|
|
206
207
|
elif not simplified_only:
|
|
207
208
|
if vcpu is not None:
|
|
208
|
-
elements_full.append(f'cpus={
|
|
209
|
+
elements_full.append(f'cpus={common_utils.format_float(vcpu)}')
|
|
209
210
|
if mem is not None:
|
|
210
|
-
elements_full.append(f'mem={
|
|
211
|
+
elements_full.append(f'mem={common_utils.format_float(mem)}')
|
|
211
212
|
|
|
212
|
-
|
|
213
|
+
is_slurm = resource.cloud.canonical_name() == 'slurm'
|
|
214
|
+
if not is_k8s and not is_slurm:
|
|
213
215
|
instance_type_full = resource.instance_type
|
|
214
216
|
instance_type_simple = common_utils.truncate_long_string(
|
|
215
217
|
instance_type_full, 15)
|
sky/utils/rich_utils.py
CHANGED
|
@@ -362,14 +362,14 @@ def decode_rich_status(
|
|
|
362
362
|
# Replace `\r\n` with `\n`, as printing a line ends with
|
|
363
363
|
# `\r\n` in linux will cause the line to be empty.
|
|
364
364
|
line = line[:-2] + '\n'
|
|
365
|
-
is_payload,
|
|
365
|
+
is_payload, decoded_line = message_utils.decode_payload(
|
|
366
366
|
line, raise_for_mismatch=False)
|
|
367
|
-
|
|
368
|
-
if is_payload:
|
|
369
|
-
control, encoded_status = Control.decode(line)
|
|
370
|
-
if control is None:
|
|
367
|
+
if not is_payload:
|
|
371
368
|
yield line
|
|
372
369
|
continue
|
|
370
|
+
control, encoded_status = Control.decode(decoded_line)
|
|
371
|
+
if control is None:
|
|
372
|
+
continue
|
|
373
373
|
|
|
374
374
|
if control == Control.RETRY:
|
|
375
375
|
raise exceptions.RequestInterruptedError(
|
|
@@ -481,15 +481,13 @@ async def decode_rich_status_async(
|
|
|
481
481
|
# Replace `\r\n` with `\n`, as printing a line ends with
|
|
482
482
|
# `\r\n` in linux will cause the line to be empty.
|
|
483
483
|
line = line[:-2] + '\n'
|
|
484
|
-
is_payload,
|
|
484
|
+
is_payload, decoded_line = message_utils.decode_payload(
|
|
485
485
|
line, raise_for_mismatch=False)
|
|
486
|
-
if
|
|
486
|
+
if not is_payload:
|
|
487
|
+
yield line
|
|
487
488
|
continue
|
|
488
|
-
control =
|
|
489
|
-
if is_payload:
|
|
490
|
-
control, encoded_status = Control.decode(line)
|
|
489
|
+
control, encoded_status = Control.decode(decoded_line)
|
|
491
490
|
if control is None:
|
|
492
|
-
yield line
|
|
493
491
|
continue
|
|
494
492
|
|
|
495
493
|
if control == Control.RETRY:
|
sky/utils/schemas.py
CHANGED
|
@@ -208,26 +208,49 @@ def _get_single_resources_schema():
|
|
|
208
208
|
},
|
|
209
209
|
'job_recovery': {
|
|
210
210
|
# Either a string or a dict.
|
|
211
|
-
'anyOf': [
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
'
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
'
|
|
228
|
-
|
|
211
|
+
'anyOf': [
|
|
212
|
+
{
|
|
213
|
+
'type': 'string',
|
|
214
|
+
},
|
|
215
|
+
{
|
|
216
|
+
'type': 'object',
|
|
217
|
+
'required': [],
|
|
218
|
+
'additionalProperties': False,
|
|
219
|
+
'properties': {
|
|
220
|
+
'strategy': {
|
|
221
|
+
'anyOf': [{
|
|
222
|
+
'type': 'string',
|
|
223
|
+
}, {
|
|
224
|
+
'type': 'null',
|
|
225
|
+
}],
|
|
226
|
+
},
|
|
227
|
+
'max_restarts_on_errors': {
|
|
228
|
+
'type': 'integer',
|
|
229
|
+
'minimum': 0,
|
|
230
|
+
},
|
|
231
|
+
'recover_on_exit_codes': {
|
|
232
|
+
'anyOf': [
|
|
233
|
+
{
|
|
234
|
+
# Single exit code
|
|
235
|
+
'type': 'integer',
|
|
236
|
+
'minimum': 0,
|
|
237
|
+
'maximum': 255,
|
|
238
|
+
},
|
|
239
|
+
{
|
|
240
|
+
# List of exit codes
|
|
241
|
+
'type': 'array',
|
|
242
|
+
'items': {
|
|
243
|
+
'type': 'integer',
|
|
244
|
+
'minimum': 0,
|
|
245
|
+
'maximum': 255,
|
|
246
|
+
},
|
|
247
|
+
'uniqueItems': True,
|
|
248
|
+
},
|
|
249
|
+
],
|
|
250
|
+
},
|
|
251
|
+
}
|
|
229
252
|
}
|
|
230
|
-
|
|
253
|
+
],
|
|
231
254
|
},
|
|
232
255
|
'volumes': {
|
|
233
256
|
'type': 'array',
|
|
@@ -1461,7 +1484,7 @@ def get_config_schema():
|
|
|
1461
1484
|
'required': [],
|
|
1462
1485
|
'additionalProperties': False,
|
|
1463
1486
|
'properties': {
|
|
1464
|
-
'
|
|
1487
|
+
'datacenter_only': {
|
|
1465
1488
|
'type': 'boolean',
|
|
1466
1489
|
},
|
|
1467
1490
|
}
|
|
@@ -1845,6 +1868,25 @@ def get_config_schema():
|
|
|
1845
1868
|
config['properties'].update(_REMOTE_IDENTITY_SCHEMA_KUBERNETES)
|
|
1846
1869
|
else:
|
|
1847
1870
|
config['properties'].update(_REMOTE_IDENTITY_SCHEMA)
|
|
1871
|
+
|
|
1872
|
+
data_schema = {
|
|
1873
|
+
'type': 'object',
|
|
1874
|
+
'required': [],
|
|
1875
|
+
'additionalProperties': False,
|
|
1876
|
+
'properties': {
|
|
1877
|
+
'mount_cached': {
|
|
1878
|
+
'type': 'object',
|
|
1879
|
+
'required': [],
|
|
1880
|
+
'additionalProperties': False,
|
|
1881
|
+
'properties': {
|
|
1882
|
+
'sequential_upload': {
|
|
1883
|
+
'type': 'boolean',
|
|
1884
|
+
},
|
|
1885
|
+
},
|
|
1886
|
+
},
|
|
1887
|
+
},
|
|
1888
|
+
}
|
|
1889
|
+
|
|
1848
1890
|
return {
|
|
1849
1891
|
'$schema': 'https://json-schema.org/draft/2020-12/schema',
|
|
1850
1892
|
'type': 'object',
|
|
@@ -1871,6 +1913,7 @@ def get_config_schema():
|
|
|
1871
1913
|
'rbac': rbac_schema,
|
|
1872
1914
|
'logs': logs_schema,
|
|
1873
1915
|
'daemons': daemon_schema,
|
|
1916
|
+
'data': data_schema,
|
|
1874
1917
|
**cloud_configs,
|
|
1875
1918
|
},
|
|
1876
1919
|
}
|
sky/utils/status_lib.py
CHANGED
|
@@ -27,6 +27,12 @@ class ClusterStatus(enum.Enum):
|
|
|
27
27
|
|
|
28
28
|
STOPPED = 'STOPPED'
|
|
29
29
|
"""The cluster is stopped."""
|
|
30
|
+
PENDING = 'PENDING'
|
|
31
|
+
"""The cluster is pending scheduling.
|
|
32
|
+
|
|
33
|
+
NOTE: This state is for display only and should not be used in state
|
|
34
|
+
machine logic without necessary considerations.
|
|
35
|
+
"""
|
|
30
36
|
|
|
31
37
|
def colored_str(self):
|
|
32
38
|
color = _STATUS_TO_COLOR[self]
|
|
@@ -37,6 +43,7 @@ _STATUS_TO_COLOR = {
|
|
|
37
43
|
ClusterStatus.INIT: colorama.Fore.BLUE,
|
|
38
44
|
ClusterStatus.UP: colorama.Fore.GREEN,
|
|
39
45
|
ClusterStatus.STOPPED: colorama.Fore.YELLOW,
|
|
46
|
+
ClusterStatus.PENDING: colorama.Fore.CYAN,
|
|
40
47
|
}
|
|
41
48
|
|
|
42
49
|
|
sky/utils/subprocess_utils.py
CHANGED
|
@@ -7,6 +7,7 @@ import resource
|
|
|
7
7
|
import shlex
|
|
8
8
|
import subprocess
|
|
9
9
|
import sys
|
|
10
|
+
import termios
|
|
10
11
|
import threading
|
|
11
12
|
import time
|
|
12
13
|
import typing
|
|
@@ -450,3 +451,19 @@ def slow_start_processes(processes: List[Startable],
|
|
|
450
451
|
break
|
|
451
452
|
batch_size = min(batch_size * 2, max_batch_size)
|
|
452
453
|
time.sleep(delay)
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
def is_echo_disabled(fd: int) -> bool:
|
|
457
|
+
"""Check if terminal ECHO is disabled on the given fd.
|
|
458
|
+
|
|
459
|
+
When a subprocess wants password/sensitive input, it disables ECHO.
|
|
460
|
+
This is how pexpect's waitnoecho() works. See:
|
|
461
|
+
https://pexpect.readthedocs.io/en/stable/api/pexpect.html#pexpect.spawn.waitnoecho
|
|
462
|
+
"""
|
|
463
|
+
assert os.isatty(fd), 'fd is not connected to a terminal'
|
|
464
|
+
try:
|
|
465
|
+
attr = termios.tcgetattr(fd)
|
|
466
|
+
echo_on = bool(attr[3] & termios.ECHO)
|
|
467
|
+
return not echo_on
|
|
468
|
+
except (termios.error, OSError):
|
|
469
|
+
return False
|
sky/volumes/client/sdk.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""SDK functions for
|
|
1
|
+
"""SDK functions for volumes."""
|
|
2
2
|
import json
|
|
3
3
|
import typing
|
|
4
4
|
from typing import List
|
|
@@ -135,16 +135,19 @@ def ls() -> server_common.RequestId[List[responses.VolumeRecord]]:
|
|
|
135
135
|
@usage_lib.entrypoint
|
|
136
136
|
@server_common.check_server_healthy_or_start
|
|
137
137
|
@annotations.client_api
|
|
138
|
-
def delete(names: List[str]
|
|
138
|
+
def delete(names: List[str],
|
|
139
|
+
purge: bool = False) -> server_common.RequestId[None]:
|
|
139
140
|
"""Deletes volumes.
|
|
140
141
|
|
|
141
142
|
Args:
|
|
142
143
|
names: List of volume names to delete.
|
|
144
|
+
purge: If True, delete the volume from the database even if the
|
|
145
|
+
deletion API fails.
|
|
143
146
|
|
|
144
147
|
Returns:
|
|
145
148
|
The request ID of the delete request.
|
|
146
149
|
"""
|
|
147
|
-
body = payloads.VolumeDeleteBody(names=names)
|
|
150
|
+
body = payloads.VolumeDeleteBody(names=names, purge=purge)
|
|
148
151
|
response = server_common.make_authenticated_request(
|
|
149
152
|
'POST', '/volumes/delete', json=json.loads(body.model_dump_json()))
|
|
150
153
|
return server_common.get_request_id(response)
|