skypilot-nightly 1.0.0.dev20250902__py3-none-any.whl → 1.0.0.dev20250904__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +2 -2
- sky/adaptors/runpod.py +68 -0
- sky/backends/backend_utils.py +5 -3
- sky/backends/cloud_vm_ray_backend.py +7 -2
- sky/client/cli/command.py +38 -6
- sky/client/sdk.py +22 -1
- sky/clouds/kubernetes.py +1 -1
- sky/clouds/nebius.py +4 -2
- sky/clouds/runpod.py +17 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/chunks/1121-408ed10b2f9fce17.js +1 -0
- sky/dashboard/out/_next/static/chunks/{7205-88191679e7988c57.js → 1836-37fede578e2da5f8.js} +4 -9
- sky/dashboard/out/_next/static/chunks/3015-86cabed5d4669ad0.js +1 -0
- sky/dashboard/out/_next/static/chunks/3294.c80326aec9bfed40.js +6 -0
- sky/dashboard/out/_next/static/chunks/{3785.d5b86f6ebc88e6e6.js → 3785.4872a2f3aa489880.js} +1 -1
- sky/dashboard/out/_next/static/chunks/{4783.c485f48348349f47.js → 5339.3fda4a4010ff4e06.js} +4 -9
- sky/dashboard/out/_next/static/chunks/{9946.3b7b43c217ff70ec.js → 649.b9d7f7d10c1b8c53.js} +4 -9
- sky/dashboard/out/_next/static/chunks/6856-66e696640347e77b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.c12318fb6a1a9093.js +6 -0
- sky/dashboard/out/_next/static/chunks/9037-1c0101b86582136f.js +6 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-39c9bd4cdb7e5a57.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/{[cluster]-a0527109c2fab467.js → [cluster]-0b4b35dc1dfe046c.js} +2 -7
- sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-81351f95f3bec08e.js → [context]-6563820e094f68ca.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{infra-c320641c2bcbbea6.js → infra-aabba60d57826e0f.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-1f70d9faa564804f.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-de06e613e20bc977.js → [name]-af76bb06dbb3954f.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{workspaces-be35b22e2046564c.js → workspaces-7598c33a746cdc91.js} +1 -1
- sky/dashboard/out/_next/static/chunks/webpack-24c4fc6d30ce0193.js +1 -0
- sky/dashboard/out/_next/static/{tio0QibqY2C0F2-rPy00p → mriHUOVL_Ht-CeW-e7saa}/_buildManifest.js +1 -1
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/mounting_utils.py +29 -38
- sky/global_user_state.py +16 -1
- sky/jobs/state.py +1 -1
- sky/models.py +1 -0
- sky/provision/kubernetes/instance.py +10 -3
- sky/provision/runpod/__init__.py +3 -0
- sky/provision/runpod/instance.py +17 -0
- sky/provision/runpod/utils.py +23 -5
- sky/provision/runpod/volume.py +158 -0
- sky/serve/serve_state.py +1 -1
- sky/server/config.py +31 -3
- sky/server/requests/executor.py +9 -3
- sky/server/requests/payloads.py +7 -1
- sky/server/requests/preconditions.py +8 -7
- sky/server/requests/requests.py +132 -57
- sky/server/server.py +48 -38
- sky/server/stream_utils.py +14 -6
- sky/server/uvicorn.py +11 -4
- sky/skylet/constants.py +1 -1
- sky/skypilot_config.py +21 -9
- sky/ssh_node_pools/server.py +5 -5
- sky/templates/kubernetes-ray.yml.j2 +5 -5
- sky/templates/runpod-ray.yml.j2 +8 -0
- sky/users/server.py +18 -15
- sky/utils/benchmark_utils.py +60 -0
- sky/utils/command_runner.py +4 -0
- sky/utils/db/db_utils.py +58 -1
- sky/utils/db/migration_utils.py +0 -16
- sky/utils/resource_checker.py +6 -5
- sky/utils/schemas.py +1 -1
- sky/utils/volume.py +3 -0
- sky/volumes/client/sdk.py +28 -0
- sky/volumes/server/server.py +11 -1
- sky/volumes/utils.py +117 -68
- sky/volumes/volume.py +98 -39
- {skypilot_nightly-1.0.0.dev20250902.dist-info → skypilot_nightly-1.0.0.dev20250904.dist-info}/METADATA +34 -34
- {skypilot_nightly-1.0.0.dev20250902.dist-info → skypilot_nightly-1.0.0.dev20250904.dist-info}/RECORD +86 -84
- sky/dashboard/out/_next/static/chunks/1121-8afcf719ea87debc.js +0 -1
- sky/dashboard/out/_next/static/chunks/3015-8089ed1e0b7e37fd.js +0 -1
- sky/dashboard/out/_next/static/chunks/6856-049014c6d43d127b.js +0 -1
- sky/dashboard/out/_next/static/chunks/9025.a1bef12d672bb66d.js +0 -6
- sky/dashboard/out/_next/static/chunks/9037-89a84fd7fa31362d.js +0 -6
- sky/dashboard/out/_next/static/chunks/9984.7eb6cc51fb460cae.js +0 -6
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-06afb50d25f7c61f.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/jobs-7421e63ac35f8fce.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-0eaa6f7e63f51311.js +0 -1
- /sky/dashboard/out/_next/static/{tio0QibqY2C0F2-rPy00p → mriHUOVL_Ht-CeW-e7saa}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250902.dist-info → skypilot_nightly-1.0.0.dev20250904.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250902.dist-info → skypilot_nightly-1.0.0.dev20250904.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250902.dist-info → skypilot_nightly-1.0.0.dev20250904.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250902.dist-info → skypilot_nightly-1.0.0.dev20250904.dist-info}/top_level.txt +0 -0
|
@@ -302,7 +302,7 @@ available_node_types:
|
|
|
302
302
|
provreq.kueue.x-k8s.io/maxRunDurationSeconds: "{{k8s_max_run_duration_seconds|string}}"
|
|
303
303
|
{% endif %}
|
|
304
304
|
{% endif %}
|
|
305
|
-
# https://cloud.google.com/kubernetes-engine/docs/how-to/gpu-bandwidth-gpudirect-tcpx
|
|
305
|
+
# https://cloud.google.com/kubernetes-engine/docs/how-to/gpu-bandwidth-gpudirect-tcpx
|
|
306
306
|
# Values from google cloud guide
|
|
307
307
|
{% if k8s_enable_gpudirect_tcpx %}
|
|
308
308
|
devices.gke.io/container.tcpx-daemon: |+
|
|
@@ -784,8 +784,8 @@ available_node_types:
|
|
|
784
784
|
echo "Waiting for patch package to be installed..."
|
|
785
785
|
done
|
|
786
786
|
# Apply Ray patches for progress bar fix
|
|
787
|
-
~/.local/bin/uv pip list | grep "ray " | grep 2.9.3 2>&1 > /dev/null && {
|
|
788
|
-
VIRTUAL_ENV=~/skypilot-runtime python -c "from sky.skylet.ray_patches import patch; patch()" || exit 1;
|
|
787
|
+
~/.local/bin/uv pip list | grep "ray " | grep 2.9.3 2>&1 > /dev/null && {
|
|
788
|
+
VIRTUAL_ENV=~/skypilot-runtime python -c "from sky.skylet.ray_patches import patch; patch()" || exit 1;
|
|
789
789
|
}
|
|
790
790
|
touch /tmp/ray_skypilot_installation_complete
|
|
791
791
|
echo "=== Ray and skypilot installation completed ==="
|
|
@@ -1202,7 +1202,7 @@ setup_commands:
|
|
|
1202
1202
|
{%- endfor %}
|
|
1203
1203
|
STEPS=("apt-ssh-setup" "runtime-setup" "env-setup")
|
|
1204
1204
|
start_epoch=$(date +%s);
|
|
1205
|
-
|
|
1205
|
+
|
|
1206
1206
|
# Wait for SSH setup to complete before proceeding
|
|
1207
1207
|
if [ -f /tmp/apt_ssh_setup_started ]; then
|
|
1208
1208
|
echo "=== Logs for asynchronous SSH setup ===";
|
|
@@ -1210,7 +1210,7 @@ setup_commands:
|
|
|
1210
1210
|
{ tail -f -n +1 /tmp/${STEPS[0]}.log & TAIL_PID=$!; echo "Tail PID: $TAIL_PID"; until [ -f /tmp/apt_ssh_setup_complete ]; do sleep 0.5; done; kill $TAIL_PID || true; };
|
|
1211
1211
|
[ -f /tmp/${STEPS[0]}.failed ] && { echo "Error: ${STEPS[0]} failed. Exiting."; exit 1; } || true;
|
|
1212
1212
|
fi
|
|
1213
|
-
|
|
1213
|
+
|
|
1214
1214
|
echo "=== Logs for asynchronous ray and skypilot installation ===";
|
|
1215
1215
|
if [ -f /tmp/skypilot_is_nimbus ]; then
|
|
1216
1216
|
echo "=== Logs for asynchronous ray and skypilot installation ===";
|
sky/templates/runpod-ray.yml.j2
CHANGED
|
@@ -40,6 +40,14 @@ available_node_types:
|
|
|
40
40
|
skypilot:ssh_public_key_content
|
|
41
41
|
Preemptible: {{use_spot}}
|
|
42
42
|
BidPerGPU: {{bid_per_gpu}}
|
|
43
|
+
{%- if volume_mounts and volume_mounts|length > 0 %}
|
|
44
|
+
VolumeMounts:
|
|
45
|
+
{%- for vm in volume_mounts %}
|
|
46
|
+
- VolumeNameOnCloud: {{ vm.volume_name_on_cloud }}
|
|
47
|
+
VolumeIdOnCloud: {{ vm.volume_id_on_cloud }}
|
|
48
|
+
MountPath: {{ vm.path }}
|
|
49
|
+
{%- endfor %}
|
|
50
|
+
{%- endif %}
|
|
43
51
|
|
|
44
52
|
head_node_type: ray_head_default
|
|
45
53
|
|
sky/users/server.py
CHANGED
|
@@ -33,8 +33,12 @@ USER_LOCK_TIMEOUT_SECONDS = 20
|
|
|
33
33
|
router = fastapi.APIRouter()
|
|
34
34
|
|
|
35
35
|
|
|
36
|
+
# All handlers in user handler are sync to get fastAPI run it in a
|
|
37
|
+
# ThreadPoolExecutor to avoid blocking the async event loop.
|
|
38
|
+
# TODO(aylei): make these async once we have the global_user_state async
|
|
39
|
+
# support.
|
|
36
40
|
@router.get('')
|
|
37
|
-
|
|
41
|
+
def users() -> List[Dict[str, Any]]:
|
|
38
42
|
"""Gets all users."""
|
|
39
43
|
all_users = []
|
|
40
44
|
user_list = global_user_state.get_all_users()
|
|
@@ -54,7 +58,7 @@ async def users() -> List[Dict[str, Any]]:
|
|
|
54
58
|
|
|
55
59
|
|
|
56
60
|
@router.get('/role')
|
|
57
|
-
|
|
61
|
+
def get_current_user_role(request: fastapi.Request):
|
|
58
62
|
"""Get current user's role."""
|
|
59
63
|
# TODO(hailong): is there a reliable way to get the user
|
|
60
64
|
# hash for the request without 'X-Auth-Request-Email' header?
|
|
@@ -70,7 +74,7 @@ async def get_current_user_role(request: fastapi.Request):
|
|
|
70
74
|
|
|
71
75
|
|
|
72
76
|
@router.post('/create')
|
|
73
|
-
|
|
77
|
+
def user_create(user_create_body: payloads.UserCreateBody) -> None:
|
|
74
78
|
username = user_create_body.username
|
|
75
79
|
password = user_create_body.password
|
|
76
80
|
role = user_create_body.role
|
|
@@ -100,8 +104,8 @@ async def user_create(user_create_body: payloads.UserCreateBody) -> None:
|
|
|
100
104
|
|
|
101
105
|
|
|
102
106
|
@router.post('/update')
|
|
103
|
-
|
|
104
|
-
|
|
107
|
+
def user_update(request: fastapi.Request,
|
|
108
|
+
user_update_body: payloads.UserUpdateBody) -> None:
|
|
105
109
|
"""Updates the user role."""
|
|
106
110
|
user_id = user_update_body.user_id
|
|
107
111
|
role = user_update_body.role
|
|
@@ -181,14 +185,13 @@ def _delete_user(user_id: str) -> None:
|
|
|
181
185
|
|
|
182
186
|
|
|
183
187
|
@router.post('/delete')
|
|
184
|
-
|
|
188
|
+
def user_delete(user_delete_body: payloads.UserDeleteBody) -> None:
|
|
185
189
|
user_id = user_delete_body.user_id
|
|
186
190
|
_delete_user(user_id)
|
|
187
191
|
|
|
188
192
|
|
|
189
193
|
@router.post('/import')
|
|
190
|
-
|
|
191
|
-
user_import_body: payloads.UserImportBody) -> Dict[str, Any]:
|
|
194
|
+
def user_import(user_import_body: payloads.UserImportBody) -> Dict[str, Any]:
|
|
192
195
|
"""Import users from CSV content."""
|
|
193
196
|
csv_content = user_import_body.csv_content
|
|
194
197
|
|
|
@@ -305,7 +308,7 @@ async def user_import(
|
|
|
305
308
|
|
|
306
309
|
|
|
307
310
|
@router.get('/export')
|
|
308
|
-
|
|
311
|
+
def user_export() -> Dict[str, Any]:
|
|
309
312
|
"""Export all users as CSV content."""
|
|
310
313
|
try:
|
|
311
314
|
# Get all users
|
|
@@ -369,7 +372,7 @@ def _user_lock(user_id: str) -> Generator[None, None, None]:
|
|
|
369
372
|
|
|
370
373
|
|
|
371
374
|
@router.get('/service-account-tokens')
|
|
372
|
-
|
|
375
|
+
def get_service_account_tokens(
|
|
373
376
|
request: fastapi.Request) -> List[Dict[str, Any]]:
|
|
374
377
|
"""Get service account tokens. All users can see all tokens."""
|
|
375
378
|
auth_user = request.state.auth_user
|
|
@@ -420,7 +423,7 @@ def _generate_service_account_user_id() -> str:
|
|
|
420
423
|
|
|
421
424
|
|
|
422
425
|
@router.post('/service-account-tokens')
|
|
423
|
-
|
|
426
|
+
def create_service_account_token(
|
|
424
427
|
request: fastapi.Request,
|
|
425
428
|
token_body: payloads.ServiceAccountTokenCreateBody) -> Dict[str, Any]:
|
|
426
429
|
"""Create a new service account token."""
|
|
@@ -508,7 +511,7 @@ async def create_service_account_token(
|
|
|
508
511
|
|
|
509
512
|
|
|
510
513
|
@router.post('/service-account-tokens/delete')
|
|
511
|
-
|
|
514
|
+
def delete_service_account_token(
|
|
512
515
|
request: fastapi.Request,
|
|
513
516
|
token_body: payloads.ServiceAccountTokenDeleteBody) -> Dict[str, str]:
|
|
514
517
|
"""Delete a service account token.
|
|
@@ -549,7 +552,7 @@ async def delete_service_account_token(
|
|
|
549
552
|
|
|
550
553
|
|
|
551
554
|
@router.post('/service-account-tokens/get-role')
|
|
552
|
-
|
|
555
|
+
def get_service_account_role(
|
|
553
556
|
request: fastapi.Request,
|
|
554
557
|
role_body: payloads.ServiceAccountTokenRoleBody) -> Dict[str, Any]:
|
|
555
558
|
"""Get the role of a service account."""
|
|
@@ -585,7 +588,7 @@ async def get_service_account_role(
|
|
|
585
588
|
|
|
586
589
|
|
|
587
590
|
@router.post('/service-account-tokens/update-role')
|
|
588
|
-
|
|
591
|
+
def update_service_account_role(
|
|
589
592
|
request: fastapi.Request,
|
|
590
593
|
role_body: payloads.ServiceAccountTokenUpdateRoleBody
|
|
591
594
|
) -> Dict[str, str]:
|
|
@@ -628,7 +631,7 @@ async def update_service_account_role(
|
|
|
628
631
|
|
|
629
632
|
|
|
630
633
|
@router.post('/service-account-tokens/rotate')
|
|
631
|
-
|
|
634
|
+
def rotate_service_account_token(
|
|
632
635
|
request: fastapi.Request,
|
|
633
636
|
token_body: payloads.ServiceAccountTokenRotateBody) -> Dict[str, Any]:
|
|
634
637
|
"""Rotate a service account token.
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""Utility functions for benchmarking."""
|
|
2
|
+
|
|
3
|
+
import functools
|
|
4
|
+
import logging
|
|
5
|
+
import time
|
|
6
|
+
from typing import Callable, Optional
|
|
7
|
+
|
|
8
|
+
from sky import sky_logging
|
|
9
|
+
|
|
10
|
+
logger = sky_logging.init_logger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def log_execution_time(func: Optional[Callable] = None,
|
|
14
|
+
*,
|
|
15
|
+
name: Optional[str] = None,
|
|
16
|
+
level: int = logging.DEBUG,
|
|
17
|
+
precision: int = 4) -> Callable:
|
|
18
|
+
"""Mark a function and log its execution time.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
func: Function to decorate.
|
|
22
|
+
name: Name of the function.
|
|
23
|
+
level: Logging level.
|
|
24
|
+
precision: Number of decimal places (default: 4).
|
|
25
|
+
|
|
26
|
+
Usage:
|
|
27
|
+
from sky.utils import benchmark_utils
|
|
28
|
+
|
|
29
|
+
@benchmark_utils.log_execution_time
|
|
30
|
+
def my_function():
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
@benchmark_utils.log_execution_time(name='my_module.my_function2')
|
|
34
|
+
def my_function2():
|
|
35
|
+
pass
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def decorator(f: Callable) -> Callable:
|
|
39
|
+
|
|
40
|
+
@functools.wraps(f)
|
|
41
|
+
def wrapper(*args, **kwargs):
|
|
42
|
+
nonlocal name
|
|
43
|
+
name = name or f.__name__
|
|
44
|
+
start_time = time.perf_counter()
|
|
45
|
+
try:
|
|
46
|
+
result = f(*args, **kwargs)
|
|
47
|
+
return result
|
|
48
|
+
finally:
|
|
49
|
+
end_time = time.perf_counter()
|
|
50
|
+
execution_time = end_time - start_time
|
|
51
|
+
log = (f'Method {name} executed in '
|
|
52
|
+
f'{execution_time:.{precision}f}')
|
|
53
|
+
logger.log(level, log)
|
|
54
|
+
|
|
55
|
+
return wrapper
|
|
56
|
+
|
|
57
|
+
if func is None:
|
|
58
|
+
return decorator
|
|
59
|
+
else:
|
|
60
|
+
return decorator(func)
|
sky/utils/command_runner.py
CHANGED
|
@@ -41,6 +41,8 @@ RSYNC_FILTER_GITIGNORE = f'--filter=\'dir-merge,- {constants.GIT_IGNORE_FILE}\''
|
|
|
41
41
|
# The git exclude file to support.
|
|
42
42
|
GIT_EXCLUDE = '.git/info/exclude'
|
|
43
43
|
RSYNC_EXCLUDE_OPTION = '--exclude-from={}'
|
|
44
|
+
# Owner and group metadata is not needed for downloads.
|
|
45
|
+
RSYNC_NO_OWNER_NO_GROUP_OPTION = '--no-owner --no-group'
|
|
44
46
|
|
|
45
47
|
_HASH_MAX_LENGTH = 10
|
|
46
48
|
_DEFAULT_CONNECT_TIMEOUT = 30
|
|
@@ -286,6 +288,8 @@ class CommandRunner:
|
|
|
286
288
|
if prefix_command is not None:
|
|
287
289
|
rsync_command.append(prefix_command)
|
|
288
290
|
rsync_command += ['rsync', RSYNC_DISPLAY_OPTION]
|
|
291
|
+
if not up:
|
|
292
|
+
rsync_command.append(RSYNC_NO_OWNER_NO_GROUP_OPTION)
|
|
289
293
|
|
|
290
294
|
# --filter
|
|
291
295
|
# The source is a local path, so we need to resolve it.
|
sky/utils/db/db_utils.py
CHANGED
|
@@ -2,10 +2,12 @@
|
|
|
2
2
|
import asyncio
|
|
3
3
|
import contextlib
|
|
4
4
|
import enum
|
|
5
|
+
import os
|
|
6
|
+
import pathlib
|
|
5
7
|
import sqlite3
|
|
6
8
|
import threading
|
|
7
9
|
import typing
|
|
8
|
-
from typing import Any, Callable, Iterable, Optional
|
|
10
|
+
from typing import Any, Callable, Dict, Iterable, Optional
|
|
9
11
|
|
|
10
12
|
import aiosqlite
|
|
11
13
|
import aiosqlite.context
|
|
@@ -13,6 +15,7 @@ import sqlalchemy
|
|
|
13
15
|
from sqlalchemy import exc as sqlalchemy_exc
|
|
14
16
|
|
|
15
17
|
from sky import sky_logging
|
|
18
|
+
from sky.skylet import constants
|
|
16
19
|
|
|
17
20
|
logger = sky_logging.init_logger(__name__)
|
|
18
21
|
if typing.TYPE_CHECKING:
|
|
@@ -346,3 +349,57 @@ class SQLiteConn(threading.local):
|
|
|
346
349
|
) -> Iterable[sqlite3.Row]:
|
|
347
350
|
conn = await self._get_async_conn()
|
|
348
351
|
return await conn.execute_fetchall(sql, parameters)
|
|
352
|
+
|
|
353
|
+
async def close(self):
|
|
354
|
+
if self._async_conn is not None:
|
|
355
|
+
await self._async_conn.close()
|
|
356
|
+
self.conn.close()
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
_max_connections = 0
|
|
360
|
+
_postgres_engine_cache: Dict[str, sqlalchemy.engine.Engine] = {}
|
|
361
|
+
_sqlite_engine_cache: Dict[str, sqlalchemy.engine.Engine] = {}
|
|
362
|
+
|
|
363
|
+
_db_creation_lock = threading.Lock()
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
def set_max_connections(max_connections: int):
|
|
367
|
+
global _max_connections
|
|
368
|
+
_max_connections = max_connections
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def get_max_connections():
|
|
372
|
+
return _max_connections
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def get_engine(db_name: str):
|
|
376
|
+
conn_string = None
|
|
377
|
+
if os.environ.get(constants.ENV_VAR_IS_SKYPILOT_SERVER) is not None:
|
|
378
|
+
conn_string = os.environ.get(constants.ENV_VAR_DB_CONNECTION_URI)
|
|
379
|
+
if conn_string:
|
|
380
|
+
with _db_creation_lock:
|
|
381
|
+
if conn_string not in _postgres_engine_cache:
|
|
382
|
+
if _max_connections == 0:
|
|
383
|
+
_postgres_engine_cache[conn_string] = (
|
|
384
|
+
sqlalchemy.create_engine(
|
|
385
|
+
conn_string, poolclass=sqlalchemy.pool.NullPool))
|
|
386
|
+
elif _max_connections == 1:
|
|
387
|
+
_postgres_engine_cache[conn_string] = (
|
|
388
|
+
sqlalchemy.create_engine(
|
|
389
|
+
conn_string, poolclass=sqlalchemy.pool.StaticPool))
|
|
390
|
+
else:
|
|
391
|
+
_postgres_engine_cache[conn_string] = (
|
|
392
|
+
sqlalchemy.create_engine(
|
|
393
|
+
conn_string,
|
|
394
|
+
poolclass=sqlalchemy.pool.QueuePool,
|
|
395
|
+
size=_max_connections,
|
|
396
|
+
max_overflow=0))
|
|
397
|
+
engine = _postgres_engine_cache[conn_string]
|
|
398
|
+
else:
|
|
399
|
+
db_path = os.path.expanduser(f'~/.sky/{db_name}.db')
|
|
400
|
+
pathlib.Path(db_path).parents[0].mkdir(parents=True, exist_ok=True)
|
|
401
|
+
if db_path not in _sqlite_engine_cache:
|
|
402
|
+
_sqlite_engine_cache[db_path] = sqlalchemy.create_engine(
|
|
403
|
+
'sqlite:///' + db_path)
|
|
404
|
+
engine = _sqlite_engine_cache[db_path]
|
|
405
|
+
return engine
|
sky/utils/db/migration_utils.py
CHANGED
|
@@ -3,7 +3,6 @@
|
|
|
3
3
|
import contextlib
|
|
4
4
|
import logging
|
|
5
5
|
import os
|
|
6
|
-
import pathlib
|
|
7
6
|
|
|
8
7
|
from alembic import command as alembic_command
|
|
9
8
|
from alembic.config import Config
|
|
@@ -12,7 +11,6 @@ import filelock
|
|
|
12
11
|
import sqlalchemy
|
|
13
12
|
|
|
14
13
|
from sky import sky_logging
|
|
15
|
-
from sky.skylet import constants
|
|
16
14
|
|
|
17
15
|
logger = sky_logging.init_logger(__name__)
|
|
18
16
|
|
|
@@ -31,20 +29,6 @@ SERVE_VERSION = '001'
|
|
|
31
29
|
SERVE_LOCK_PATH = '~/.sky/locks/.serve_db.lock'
|
|
32
30
|
|
|
33
31
|
|
|
34
|
-
def get_engine(db_name: str):
|
|
35
|
-
conn_string = None
|
|
36
|
-
if os.environ.get(constants.ENV_VAR_IS_SKYPILOT_SERVER) is not None:
|
|
37
|
-
conn_string = os.environ.get(constants.ENV_VAR_DB_CONNECTION_URI)
|
|
38
|
-
if conn_string:
|
|
39
|
-
engine = sqlalchemy.create_engine(conn_string,
|
|
40
|
-
poolclass=sqlalchemy.NullPool)
|
|
41
|
-
else:
|
|
42
|
-
db_path = os.path.expanduser(f'~/.sky/{db_name}.db')
|
|
43
|
-
pathlib.Path(db_path).parents[0].mkdir(parents=True, exist_ok=True)
|
|
44
|
-
engine = sqlalchemy.create_engine('sqlite:///' + db_path)
|
|
45
|
-
return engine
|
|
46
|
-
|
|
47
|
-
|
|
48
32
|
@contextlib.contextmanager
|
|
49
33
|
def db_lock(db_name: str):
|
|
50
34
|
lock_path = os.path.expanduser(f'~/.sky/locks/.{db_name}.lock')
|
sky/utils/resource_checker.py
CHANGED
|
@@ -269,16 +269,17 @@ def _get_active_resources(
|
|
|
269
269
|
all_managed_jobs: List[Dict[str, Any]]
|
|
270
270
|
"""
|
|
271
271
|
|
|
272
|
-
def get_all_clusters():
|
|
272
|
+
def get_all_clusters() -> List[Dict[str, Any]]:
|
|
273
273
|
return global_user_state.get_clusters()
|
|
274
274
|
|
|
275
|
-
def get_all_managed_jobs():
|
|
275
|
+
def get_all_managed_jobs() -> List[Dict[str, Any]]:
|
|
276
276
|
# pylint: disable=import-outside-toplevel
|
|
277
277
|
from sky.jobs.server import core as managed_jobs_core
|
|
278
278
|
try:
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
279
|
+
filtered_jobs, _, _, _ = managed_jobs_core.queue(refresh=False,
|
|
280
|
+
skip_finished=True,
|
|
281
|
+
all_users=True)
|
|
282
|
+
return filtered_jobs
|
|
282
283
|
except exceptions.ClusterNotUpError:
|
|
283
284
|
logger.warning('All jobs should be finished.')
|
|
284
285
|
return []
|
sky/utils/schemas.py
CHANGED
|
@@ -432,7 +432,7 @@ def get_volume_schema():
|
|
|
432
432
|
return {
|
|
433
433
|
'$schema': 'https://json-schema.org/draft/2020-12/schema',
|
|
434
434
|
'type': 'object',
|
|
435
|
-
'required': ['name', 'type'
|
|
435
|
+
'required': ['name', 'type'],
|
|
436
436
|
'additionalProperties': False,
|
|
437
437
|
'properties': {
|
|
438
438
|
'name': {
|
sky/utils/volume.py
CHANGED
|
@@ -10,6 +10,8 @@ from sky.utils import common_utils
|
|
|
10
10
|
from sky.utils import schemas
|
|
11
11
|
from sky.utils import status_lib
|
|
12
12
|
|
|
13
|
+
MIN_RUNPOD_NETWORK_VOLUME_SIZE_GB = 10
|
|
14
|
+
|
|
13
15
|
|
|
14
16
|
class VolumeAccessMode(enum.Enum):
|
|
15
17
|
"""Volume access mode."""
|
|
@@ -22,6 +24,7 @@ class VolumeAccessMode(enum.Enum):
|
|
|
22
24
|
class VolumeType(enum.Enum):
|
|
23
25
|
"""Volume type."""
|
|
24
26
|
PVC = 'k8s-pvc'
|
|
27
|
+
RUNPOD_NETWORK_VOLUME = 'runpod-network-volume'
|
|
25
28
|
|
|
26
29
|
|
|
27
30
|
class VolumeMount:
|
sky/volumes/client/sdk.py
CHANGED
|
@@ -27,6 +27,34 @@ logger = sky_logging.init_logger(__name__)
|
|
|
27
27
|
def apply(volume: volume_lib.Volume) -> server_common.RequestId[None]:
|
|
28
28
|
"""Creates or registers a volume.
|
|
29
29
|
|
|
30
|
+
Example:
|
|
31
|
+
.. code-block:: python
|
|
32
|
+
|
|
33
|
+
import sky.volumes
|
|
34
|
+
cfg = {
|
|
35
|
+
'name': 'pvc',
|
|
36
|
+
'type': 'k8s-pvc',
|
|
37
|
+
'size': '100GB',
|
|
38
|
+
'labels': {
|
|
39
|
+
'key': 'value',
|
|
40
|
+
},
|
|
41
|
+
}
|
|
42
|
+
vol = sky.volumes.Volume.from_yaml_config(cfg)
|
|
43
|
+
request_id = sky.volumes.apply(vol)
|
|
44
|
+
sky.get(request_id)
|
|
45
|
+
|
|
46
|
+
or
|
|
47
|
+
|
|
48
|
+
import sky.volumes
|
|
49
|
+
vol = sky.volumes.Volume(
|
|
50
|
+
name='vol',
|
|
51
|
+
type='runpod-network-volume',
|
|
52
|
+
infra='runpod/ca/CA-MTL-1',
|
|
53
|
+
size='100GB',
|
|
54
|
+
)
|
|
55
|
+
request_id = sky.volumes.apply(vol)
|
|
56
|
+
sky.get(request_id)
|
|
57
|
+
|
|
30
58
|
Args:
|
|
31
59
|
volume: The volume to apply.
|
|
32
60
|
|
sky/volumes/server/server.py
CHANGED
|
@@ -19,10 +19,15 @@ router = fastapi.APIRouter()
|
|
|
19
19
|
@router.get('')
|
|
20
20
|
async def volume_list(request: fastapi.Request) -> None:
|
|
21
21
|
"""Gets the volumes."""
|
|
22
|
+
auth_user = request.state.auth_user
|
|
23
|
+
auth_user_env_vars_kwargs = {
|
|
24
|
+
'env_vars': auth_user.to_env_vars()
|
|
25
|
+
} if auth_user else {}
|
|
26
|
+
volume_list_body = payloads.VolumeListBody(**auth_user_env_vars_kwargs)
|
|
22
27
|
executor.schedule_request(
|
|
23
28
|
request_id=request.state.request_id,
|
|
24
29
|
request_name='volume_list',
|
|
25
|
-
request_body=
|
|
30
|
+
request_body=volume_list_body,
|
|
26
31
|
func=core.volume_list,
|
|
27
32
|
schedule_type=requests_lib.ScheduleType.SHORT,
|
|
28
33
|
)
|
|
@@ -76,6 +81,11 @@ async def volume_apply(request: fastapi.Request,
|
|
|
76
81
|
elif access_mode not in supported_access_modes:
|
|
77
82
|
raise fastapi.HTTPException(
|
|
78
83
|
status_code=400, detail=f'Invalid access mode: {access_mode}')
|
|
84
|
+
elif volume_type == volume.VolumeType.RUNPOD_NETWORK_VOLUME.value:
|
|
85
|
+
if not cloud.is_same_cloud(clouds.RunPod()):
|
|
86
|
+
raise fastapi.HTTPException(
|
|
87
|
+
status_code=400,
|
|
88
|
+
detail='Runpod network volume is only supported on Runpod')
|
|
79
89
|
executor.schedule_request(
|
|
80
90
|
request_id=request.state.request_id,
|
|
81
91
|
request_name='volume_apply',
|