skypilot-nightly 1.0.0.dev20250523__py3-none-any.whl → 1.0.0.dev20250526__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/backends/backend_utils.py +62 -45
- sky/backends/cloud_vm_ray_backend.py +3 -1
- sky/check.py +335 -170
- sky/cli.py +56 -13
- sky/client/cli.py +56 -13
- sky/client/sdk.py +54 -10
- sky/clouds/gcp.py +19 -3
- sky/core.py +5 -2
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/7GEgRyZKRaSnYZCV1Jwol/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/25-062253ea41fb8eec.js +6 -0
- sky/dashboard/out/_next/static/chunks/480-5a0de8b6570ea105.js +1 -0
- sky/dashboard/out/_next/static/chunks/488-50d843fdb5396d32.js +15 -0
- sky/dashboard/out/_next/static/chunks/498-d7722313e5e5b4e6.js +21 -0
- sky/dashboard/out/_next/static/chunks/573-f17bd89d9f9118b3.js +66 -0
- sky/dashboard/out/_next/static/chunks/578-d351125af46c293f.js +6 -0
- sky/dashboard/out/_next/static/chunks/734-a6e01d7f98904741.js +1 -0
- sky/dashboard/out/_next/static/chunks/937.f97f83652028e944.js +1 -0
- sky/dashboard/out/_next/static/chunks/938-59956af3950b02ed.js +1 -0
- sky/dashboard/out/_next/static/chunks/9f96d65d-5a3e4af68c26849e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-96a715a6fb01e228.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-3b5aad09a25f64b7.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-9529d9e882a0e75c.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-9e6d1ec6e1ac5b29.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-abb7d744ecf15109.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-48dc8d67d4b60be1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-73d5e0c369d00346.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/users-b8acf6e6735323a2.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-bbf436f41381e169.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-7733c960685b4385.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-5ed48b3201b998c8.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-deda68c926e8d0bc.js +1 -0
- sky/dashboard/out/_next/static/css/28558d57108b05ae.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -0
- sky/dashboard/out/workspace/new.html +1 -0
- sky/dashboard/out/workspaces/[name].html +1 -0
- sky/dashboard/out/workspaces.html +1 -0
- sky/data/storage.py +1 -1
- sky/global_user_state.py +606 -543
- sky/jobs/constants.py +1 -1
- sky/jobs/server/core.py +72 -56
- sky/jobs/state.py +26 -5
- sky/jobs/utils.py +65 -13
- sky/optimizer.py +6 -3
- sky/provision/fluidstack/instance.py +1 -0
- sky/serve/server/core.py +9 -6
- sky/server/html/token_page.html +6 -1
- sky/server/requests/executor.py +1 -0
- sky/server/requests/payloads.py +28 -0
- sky/server/server.py +59 -5
- sky/setup_files/dependencies.py +1 -0
- sky/skylet/constants.py +4 -1
- sky/skypilot_config.py +107 -11
- sky/utils/cli_utils/status_utils.py +18 -8
- sky/utils/db_utils.py +53 -0
- sky/utils/kubernetes/config_map_utils.py +133 -0
- sky/utils/kubernetes/deploy_remote_cluster.py +166 -147
- sky/utils/kubernetes/kubernetes_deploy_utils.py +49 -5
- sky/utils/kubernetes/ssh-tunnel.sh +20 -28
- sky/utils/log_utils.py +4 -0
- sky/utils/schemas.py +54 -0
- sky/workspaces/__init__.py +0 -0
- sky/workspaces/core.py +295 -0
- sky/workspaces/server.py +62 -0
- {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/METADATA +2 -1
- {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/RECORD +79 -63
- sky/dashboard/out/_next/static/ECKwDNS9v9y3_IKFZ2lpp/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-1a3a9440417720eb.js +0 -6
- sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
- sky/dashboard/out/_next/static/chunks/37-d584022b0da4ac3b.js +0 -6
- sky/dashboard/out/_next/static/chunks/393-e1eaa440481337ec.js +0 -1
- sky/dashboard/out/_next/static/chunks/480-f28cd152a98997de.js +0 -1
- sky/dashboard/out/_next/static/chunks/582-683f4f27b81996dc.js +0 -59
- sky/dashboard/out/_next/static/chunks/pages/_app-8cfab319f9fb3ae8.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-33bc2bec322249b1.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-e2fc2dd1955e6c36.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-3a748bd76e5c2984.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra-abf08c4384190a39.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-70756c2dad850a7e.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-ecd804b9272f4a7c.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
- sky/dashboard/out/_next/static/css/7e7ce4ff31d3977b.css +0 -3
- /sky/dashboard/out/_next/static/{ECKwDNS9v9y3_IKFZ2lpp → 7GEgRyZKRaSnYZCV1Jwol}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250523.dist-info → skypilot_nightly-1.0.0.dev20250526.dist-info}/top_level.txt +0 -0
sky/__init__.py
CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
|
|
5
5
|
import urllib.request
|
6
6
|
|
7
7
|
# Replaced with the current commit when building the wheels.
|
8
|
-
_SKYPILOT_COMMIT_SHA = '
|
8
|
+
_SKYPILOT_COMMIT_SHA = 'bbcd11ba81d6312ae5e292e891da531024c321aa'
|
9
9
|
|
10
10
|
|
11
11
|
def _get_git_commit():
|
@@ -35,7 +35,7 @@ def _get_git_commit():
|
|
35
35
|
|
36
36
|
|
37
37
|
__commit__ = _get_git_commit()
|
38
|
-
__version__ = '1.0.0.
|
38
|
+
__version__ = '1.0.0.dev20250526'
|
39
39
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
40
40
|
|
41
41
|
|
sky/backends/backend_utils.py
CHANGED
@@ -1556,6 +1556,16 @@ def check_owner_identity(cluster_name: str) -> None:
|
|
1556
1556
|
handle = record['handle']
|
1557
1557
|
if not isinstance(handle, backends.CloudVmRayResourceHandle):
|
1558
1558
|
return
|
1559
|
+
active_workspace = skypilot_config.get_active_workspace()
|
1560
|
+
cluster_workspace = record.get('workspace',
|
1561
|
+
constants.SKYPILOT_DEFAULT_WORKSPACE)
|
1562
|
+
if active_workspace != cluster_workspace:
|
1563
|
+
with ux_utils.print_exception_no_traceback():
|
1564
|
+
raise exceptions.ClusterOwnerIdentityMismatchError(
|
1565
|
+
f'{colorama.Fore.YELLOW}'
|
1566
|
+
f'The cluster {cluster_name!r} is in workspace '
|
1567
|
+
f'{cluster_workspace!r}, but the active workspace is '
|
1568
|
+
f'{active_workspace!r}.{colorama.Fore.RESET}')
|
1559
1569
|
|
1560
1570
|
launched_resources = handle.launched_resources.assert_launchable()
|
1561
1571
|
cloud = launched_resources.cloud
|
@@ -2152,57 +2162,64 @@ def refresh_cluster_record(
|
|
2152
2162
|
record = global_user_state.get_cluster_from_name(cluster_name)
|
2153
2163
|
if record is None:
|
2154
2164
|
return None
|
2155
|
-
|
2156
|
-
|
2157
|
-
|
2158
|
-
|
2159
|
-
|
2160
|
-
# The loop logic allows us to notice if the status was updated in the
|
2161
|
-
# global_user_state by another process and stop trying to get the lock.
|
2162
|
-
# The core loop logic is adapted from FileLock's implementation.
|
2163
|
-
lock = filelock.FileLock(CLUSTER_STATUS_LOCK_PATH.format(cluster_name))
|
2164
|
-
start_time = time.perf_counter()
|
2165
|
+
# TODO(zhwu, 05/20): switch to the specific workspace to make sure we are
|
2166
|
+
# using the correct cloud credentials.
|
2167
|
+
workspace = record.get('workspace', constants.SKYPILOT_DEFAULT_WORKSPACE)
|
2168
|
+
with skypilot_config.local_active_workspace_ctx(workspace):
|
2169
|
+
check_owner_identity(cluster_name)
|
2165
2170
|
|
2166
|
-
|
2167
|
-
while True:
|
2168
|
-
# Check to see if we can return the cached status.
|
2169
|
-
if not _must_refresh_cluster_status(record, force_refresh_statuses):
|
2171
|
+
if not isinstance(record['handle'], backends.CloudVmRayResourceHandle):
|
2170
2172
|
return record
|
2171
2173
|
|
2172
|
-
if
|
2173
|
-
|
2174
|
+
# The loop logic allows us to notice if the status was updated in the
|
2175
|
+
# global_user_state by another process and stop trying to get the lock.
|
2176
|
+
# The core loop logic is adapted from FileLock's implementation.
|
2177
|
+
lock = filelock.FileLock(CLUSTER_STATUS_LOCK_PATH.format(cluster_name))
|
2178
|
+
start_time = time.perf_counter()
|
2174
2179
|
|
2175
|
-
#
|
2176
|
-
|
2177
|
-
|
2178
|
-
|
2179
|
-
|
2180
|
-
|
2181
|
-
|
2182
|
-
record, force_refresh_statuses):
|
2183
|
-
return record
|
2184
|
-
# Update and return the cluster status.
|
2180
|
+
# Loop until we have an up-to-date status or until we acquire the lock.
|
2181
|
+
while True:
|
2182
|
+
# Check to see if we can return the cached status.
|
2183
|
+
if not _must_refresh_cluster_status(record, force_refresh_statuses):
|
2184
|
+
return record
|
2185
|
+
|
2186
|
+
if not acquire_per_cluster_status_lock:
|
2185
2187
|
return _update_cluster_status(cluster_name)
|
2186
|
-
except filelock.Timeout:
|
2187
|
-
# lock.acquire() will throw a Timeout exception if the lock is not
|
2188
|
-
# available and we have blocking=False.
|
2189
|
-
pass
|
2190
|
-
|
2191
|
-
# Logic adapted from FileLock.acquire().
|
2192
|
-
# If cluster_status_lock_time is <0, we will never hit this. No timeout.
|
2193
|
-
# Otherwise, if we have timed out, return the cached status. This has
|
2194
|
-
# the potential to cause correctness issues, but if so it is the
|
2195
|
-
# caller's responsibility to set the timeout to -1.
|
2196
|
-
if 0 <= cluster_status_lock_timeout < time.perf_counter() - start_time:
|
2197
|
-
logger.debug('Refreshing status: Failed get the lock for cluster '
|
2198
|
-
f'{cluster_name!r}. Using the cached status.')
|
2199
|
-
return record
|
2200
|
-
time.sleep(0.05)
|
2201
2188
|
|
2202
|
-
|
2203
|
-
|
2204
|
-
|
2205
|
-
|
2189
|
+
# Try to acquire the lock so we can fetch the status.
|
2190
|
+
try:
|
2191
|
+
with lock.acquire(blocking=False):
|
2192
|
+
# Check the cluster status again, since it could have been
|
2193
|
+
# updated between our last check and acquiring the lock.
|
2194
|
+
record = global_user_state.get_cluster_from_name(
|
2195
|
+
cluster_name)
|
2196
|
+
if record is None or not _must_refresh_cluster_status(
|
2197
|
+
record, force_refresh_statuses):
|
2198
|
+
return record
|
2199
|
+
# Update and return the cluster status.
|
2200
|
+
return _update_cluster_status(cluster_name)
|
2201
|
+
except filelock.Timeout:
|
2202
|
+
# lock.acquire() will throw a Timeout exception if the lock is not
|
2203
|
+
# available and we have blocking=False.
|
2204
|
+
pass
|
2205
|
+
|
2206
|
+
# Logic adapted from FileLock.acquire().
|
2207
|
+
# If cluster_status_lock_time is <0, we will never hit this. No timeout.
|
2208
|
+
# Otherwise, if we have timed out, return the cached status. This has
|
2209
|
+
# the potential to cause correctness issues, but if so it is the
|
2210
|
+
# caller's responsibility to set the timeout to -1.
|
2211
|
+
if 0 <= cluster_status_lock_timeout < time.perf_counter(
|
2212
|
+
) - start_time:
|
2213
|
+
logger.debug(
|
2214
|
+
'Refreshing status: Failed get the lock for cluster '
|
2215
|
+
f'{cluster_name!r}. Using the cached status.')
|
2216
|
+
return record
|
2217
|
+
time.sleep(0.05)
|
2218
|
+
|
2219
|
+
# Refresh for next loop iteration.
|
2220
|
+
record = global_user_state.get_cluster_from_name(cluster_name)
|
2221
|
+
if record is None:
|
2222
|
+
return None
|
2206
2223
|
|
2207
2224
|
|
2208
2225
|
@timeline.event
|
@@ -3507,7 +3507,9 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
|
|
3507
3507
|
# Add the managed job to job queue database.
|
3508
3508
|
managed_job_codegen = managed_jobs.ManagedJobCodeGen()
|
3509
3509
|
managed_job_code = managed_job_codegen.set_pending(
|
3510
|
-
job_id, managed_job_dag
|
3510
|
+
job_id, managed_job_dag,
|
3511
|
+
skypilot_config.get_active_workspace(
|
3512
|
+
force_user_workspace=True))
|
3511
3513
|
# Set the managed job to PENDING state to make sure that this
|
3512
3514
|
# managed job appears in the `sky jobs queue`, even if it needs
|
3513
3515
|
# to wait to be submitted.
|