skypilot-nightly 1.0.0.dev20250413__py3-none-any.whl → 1.0.0.dev20250421__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/adaptors/kubernetes.py +7 -0
- sky/authentication.py +2 -2
- sky/backends/backend_utils.py +31 -3
- sky/backends/cloud_vm_ray_backend.py +22 -29
- sky/backends/wheel_utils.py +9 -0
- sky/check.py +1 -1
- sky/cli.py +253 -74
- sky/client/cli.py +253 -74
- sky/client/common.py +10 -3
- sky/client/sdk.py +11 -8
- sky/clouds/aws.py +2 -2
- sky/clouds/kubernetes.py +0 -8
- sky/clouds/oci.py +1 -1
- sky/core.py +17 -11
- sky/dashboard/out/404.html +1 -0
- sky/dashboard/out/_next/static/chunks/236-d437cf66e68a6f64.js +6 -0
- sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +15 -0
- sky/dashboard/out/_next/static/chunks/37-72fdc8f71d6e4784.js +6 -0
- sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +59 -0
- sky/dashboard/out/_next/static/chunks/845-2ea1cc63ba1f4067.js +1 -0
- sky/dashboard/out/_next/static/chunks/979-7cd0778078b9cfad.js +1 -0
- sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +1 -0
- sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +33 -0
- sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +1 -0
- sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-3001e84c61acddfb.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-b09f7fbf6d5d74f6.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-b57ec043f09c5813.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-ef2e0e91a9222cac.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +1 -0
- sky/dashboard/out/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +1 -0
- sky/dashboard/out/_next/static/css/f3538cd90cfca88c.css +3 -0
- sky/dashboard/out/_next/static/mS9YfLA5hhsJMeBj9W8J7/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/mS9YfLA5hhsJMeBj9W8J7/_ssgManifest.js +1 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -0
- sky/dashboard/out/clusters/[cluster].html +1 -0
- sky/dashboard/out/clusters.html +1 -0
- sky/dashboard/out/favicon.ico +0 -0
- sky/dashboard/out/index.html +1 -0
- sky/dashboard/out/jobs/[job].html +1 -0
- sky/dashboard/out/jobs.html +1 -0
- sky/dashboard/out/skypilot.svg +15 -0
- sky/dashboard/out/videos/cursor-small.mp4 +0 -0
- sky/data/data_transfer.py +2 -1
- sky/data/storage.py +24 -14
- sky/exceptions.py +5 -0
- sky/jobs/constants.py +8 -1
- sky/jobs/server/core.py +12 -8
- sky/models.py +28 -0
- sky/optimizer.py +7 -9
- sky/provision/kubernetes/config.py +1 -1
- sky/provision/kubernetes/instance.py +16 -14
- sky/provision/kubernetes/network_utils.py +1 -1
- sky/provision/kubernetes/utils.py +50 -22
- sky/provision/provisioner.py +2 -1
- sky/resources.py +56 -2
- sky/serve/__init__.py +2 -0
- sky/serve/autoscalers.py +6 -2
- sky/serve/client/sdk.py +61 -0
- sky/serve/constants.py +6 -0
- sky/serve/load_balancing_policies.py +0 -4
- sky/serve/replica_managers.py +6 -8
- sky/serve/serve_state.py +0 -6
- sky/serve/serve_utils.py +33 -1
- sky/serve/server/core.py +192 -7
- sky/serve/server/server.py +28 -0
- sky/server/common.py +152 -47
- sky/server/constants.py +7 -1
- sky/server/requests/executor.py +4 -0
- sky/server/requests/payloads.py +12 -15
- sky/server/requests/serializers/decoders.py +2 -5
- sky/server/requests/serializers/encoders.py +2 -5
- sky/server/server.py +44 -1
- sky/setup_files/MANIFEST.in +1 -0
- sky/setup_files/dependencies.py +1 -0
- sky/sky_logging.py +12 -2
- sky/skylet/constants.py +5 -7
- sky/skylet/job_lib.py +3 -3
- sky/skypilot_config.py +225 -84
- sky/templates/kubernetes-ray.yml.j2 +7 -3
- sky/utils/cli_utils/status_utils.py +12 -5
- sky/utils/config_utils.py +39 -15
- sky/utils/controller_utils.py +44 -7
- sky/utils/kubernetes/generate_kubeconfig.sh +2 -2
- sky/utils/kubernetes/gpu_labeler.py +99 -16
- sky/utils/schemas.py +24 -0
- {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250421.dist-info}/METADATA +2 -1
- {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250421.dist-info}/RECORD +97 -64
- {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250421.dist-info}/WHEEL +1 -1
- {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250421.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250421.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250421.dist-info}/top_level.txt +0 -0
sky/server/requests/payloads.py
CHANGED
@@ -6,7 +6,6 @@ with the backend functions. The benefit of having the default values in the
|
|
6
6
|
payloads is that a user can find the default values in the Restful API docs.
|
7
7
|
"""
|
8
8
|
import getpass
|
9
|
-
import json
|
10
9
|
import os
|
11
10
|
import typing
|
12
11
|
from typing import Any, Dict, List, Optional, Tuple, Union
|
@@ -47,7 +46,7 @@ def request_body_env_vars() -> dict:
|
|
47
46
|
# Remove the path to config file, as the config content is included in the
|
48
47
|
# request body and will be merged with the config on the server side.
|
49
48
|
env_vars.pop(skypilot_config.ENV_VAR_SKYPILOT_CONFIG, None)
|
50
|
-
env_vars.pop(skypilot_config.
|
49
|
+
env_vars.pop(skypilot_config.ENV_VAR_GLOBAL_CONFIG, None)
|
51
50
|
env_vars.pop(skypilot_config.ENV_VAR_PROJECT_CONFIG, None)
|
52
51
|
return env_vars
|
53
52
|
|
@@ -56,20 +55,9 @@ def get_override_skypilot_config_from_client() -> Dict[str, Any]:
|
|
56
55
|
"""Returns the override configs from the client."""
|
57
56
|
config = skypilot_config.to_dict()
|
58
57
|
# Remove the API server config, as we should not specify the SkyPilot
|
59
|
-
# server endpoint on the server side. This avoids the warning
|
58
|
+
# server endpoint on the server side. This avoids the warning at
|
59
|
+
# server-side.
|
60
60
|
config.pop_nested(('api_server',), default_value=None)
|
61
|
-
ignored_key_values = {}
|
62
|
-
for nested_key in constants.SKIPPED_CLIENT_OVERRIDE_KEYS:
|
63
|
-
value = config.pop_nested(nested_key, default_value=None)
|
64
|
-
if value is not None:
|
65
|
-
ignored_key_values['.'.join(nested_key)] = value
|
66
|
-
if ignored_key_values:
|
67
|
-
logger.debug(f'The following keys ({json.dumps(ignored_key_values)}) '
|
68
|
-
'are specified in the client SkyPilot config at '
|
69
|
-
f'{skypilot_config.loaded_config_path()!r}. '
|
70
|
-
'This will be ignored. If you want to specify it, '
|
71
|
-
'please modify it on server side or contact your '
|
72
|
-
'administrator.')
|
73
61
|
return config
|
74
62
|
|
75
63
|
|
@@ -420,6 +408,15 @@ class ServeLogsBody(RequestBody):
|
|
420
408
|
follow: bool = True
|
421
409
|
|
422
410
|
|
411
|
+
class ServeDownloadLogsBody(RequestBody):
|
412
|
+
"""The request body for the serve download logs endpoint."""
|
413
|
+
service_name: str
|
414
|
+
local_dir: str
|
415
|
+
targets: Optional[Union[str, serve.ServiceComponent,
|
416
|
+
List[Union[str, serve.ServiceComponent]]]]
|
417
|
+
replica_ids: Optional[List[int]] = None
|
418
|
+
|
419
|
+
|
423
420
|
class ServeStatusBody(RequestBody):
|
424
421
|
"""The request body for the serve status endpoint."""
|
425
422
|
service_names: Optional[Union[str, List[str]]]
|
@@ -188,8 +188,5 @@ def decode_job_status(
|
|
188
188
|
|
189
189
|
@register_decoders('kubernetes_node_info')
|
190
190
|
def decode_kubernetes_node_info(
|
191
|
-
return_value: Dict[str, Any]) ->
|
192
|
-
return
|
193
|
-
node_name: models.KubernetesNodeInfo(**node_info)
|
194
|
-
for node_name, node_info in return_value.items()
|
195
|
-
}
|
191
|
+
return_value: Dict[str, Any]) -> models.KubernetesNodesInfo:
|
192
|
+
return models.KubernetesNodesInfo.from_dict(return_value)
|
@@ -159,8 +159,5 @@ def encode_job_status(return_value: Dict[int, Any]) -> Dict[int, str]:
|
|
159
159
|
|
160
160
|
@register_encoder('kubernetes_node_info')
|
161
161
|
def encode_kubernetes_node_info(
|
162
|
-
return_value:
|
163
|
-
return
|
164
|
-
node_name: dataclasses.asdict(node_info)
|
165
|
-
for node_name, node_info in return_value.items()
|
166
|
-
}
|
162
|
+
return_value: 'models.KubernetesNodesInfo') -> Dict[str, Any]:
|
163
|
+
return return_value.to_dict()
|
sky/server/server.py
CHANGED
@@ -150,7 +150,21 @@ async def lifespan(app: fastapi.FastAPI): # pylint: disable=redefined-outer-nam
|
|
150
150
|
# Shutdown: Add any cleanup code here if needed
|
151
151
|
|
152
152
|
|
153
|
+
# Add a new middleware class to handle /internal/dashboard prefix
|
154
|
+
class InternalDashboardPrefixMiddleware(
|
155
|
+
starlette.middleware.base.BaseHTTPMiddleware):
|
156
|
+
"""Middleware to handle /internal/dashboard prefix in requests."""
|
157
|
+
|
158
|
+
async def dispatch(self, request: fastapi.Request, call_next):
|
159
|
+
path = request.url.path
|
160
|
+
if path.startswith('/internal/dashboard/'):
|
161
|
+
# Remove /internal/dashboard prefix and update request scope
|
162
|
+
request.scope['path'] = path.replace('/internal/dashboard/', '/', 1)
|
163
|
+
return await call_next(request)
|
164
|
+
|
165
|
+
|
153
166
|
app = fastapi.FastAPI(prefix='/api/v1', debug=True, lifespan=lifespan)
|
167
|
+
app.add_middleware(InternalDashboardPrefixMiddleware)
|
154
168
|
app.add_middleware(
|
155
169
|
cors.CORSMiddleware,
|
156
170
|
# TODO(zhwu): in production deployment, we should restrict the allowed
|
@@ -210,7 +224,7 @@ async def kubernetes_node_info(
|
|
210
224
|
request: fastapi.Request,
|
211
225
|
kubernetes_node_info_body: payloads.KubernetesNodeInfoRequestBody
|
212
226
|
) -> None:
|
213
|
-
"""Gets Kubernetes
|
227
|
+
"""Gets Kubernetes nodes information and hints."""
|
214
228
|
executor.schedule_request(
|
215
229
|
request_id=request.state.request_id,
|
216
230
|
request_name='kubernetes_node_info',
|
@@ -1101,6 +1115,35 @@ async def complete_storage_name(incomplete: str,) -> List[str]:
|
|
1101
1115
|
return global_user_state.get_storage_names_start_with(incomplete)
|
1102
1116
|
|
1103
1117
|
|
1118
|
+
# Add a route to serve static files
|
1119
|
+
@app.get('/{full_path:path}')
|
1120
|
+
async def serve_static_or_dashboard(full_path: str):
|
1121
|
+
"""Serves static files for any unmatched routes.
|
1122
|
+
|
1123
|
+
Handles the /dashboard prefix from Next.js configuration.
|
1124
|
+
"""
|
1125
|
+
# Check if the path starts with 'dashboard/' and remove it if it does
|
1126
|
+
if full_path.startswith('dashboard/'):
|
1127
|
+
full_path = full_path[len('dashboard/'):]
|
1128
|
+
|
1129
|
+
# Try to serve the file directly from the out directory first
|
1130
|
+
file_path = os.path.join(server_constants.DASHBOARD_DIR, full_path)
|
1131
|
+
if os.path.isfile(file_path):
|
1132
|
+
return fastapi.responses.FileResponse(file_path)
|
1133
|
+
|
1134
|
+
# If file not found, serve the index.html for client-side routing.
|
1135
|
+
# For example, the non-matched arbitrary route (/ or /test) from
|
1136
|
+
# client will be redirected to the index.html.
|
1137
|
+
index_path = os.path.join(server_constants.DASHBOARD_DIR, 'index.html')
|
1138
|
+
try:
|
1139
|
+
with open(index_path, 'r', encoding='utf-8') as f:
|
1140
|
+
content = f.read()
|
1141
|
+
return fastapi.responses.HTMLResponse(content=content)
|
1142
|
+
except Exception as e:
|
1143
|
+
logger.error(f'Error serving dashboard: {e}')
|
1144
|
+
raise fastapi.HTTPException(status_code=500, detail=str(e))
|
1145
|
+
|
1146
|
+
|
1104
1147
|
if __name__ == '__main__':
|
1105
1148
|
import uvicorn
|
1106
1149
|
|
sky/setup_files/MANIFEST.in
CHANGED
sky/setup_files/dependencies.py
CHANGED
sky/sky_logging.py
CHANGED
@@ -18,6 +18,12 @@ _FORMAT = '%(levelname).1s %(asctime)s %(filename)s:%(lineno)d] %(message)s'
|
|
18
18
|
_DATE_FORMAT = '%m-%d %H:%M:%S'
|
19
19
|
_SENSITIVE_LOGGER = ['sky.provisioner', 'sky.optimizer']
|
20
20
|
|
21
|
+
DEBUG = logging.DEBUG
|
22
|
+
INFO = logging.INFO
|
23
|
+
WARNING = logging.WARNING
|
24
|
+
ERROR = logging.ERROR
|
25
|
+
CRITICAL = logging.CRITICAL
|
26
|
+
|
21
27
|
|
22
28
|
def _show_logging_prefix():
|
23
29
|
return env_options.Options.SHOW_DEBUG_INFO.get(
|
@@ -97,8 +103,8 @@ def _setup_logger():
|
|
97
103
|
def reload_logger():
|
98
104
|
"""Reload the logger.
|
99
105
|
|
100
|
-
This
|
101
|
-
|
106
|
+
This ensures that the logger takes the new environment variables,
|
107
|
+
such as SKYPILOT_DEBUG.
|
102
108
|
"""
|
103
109
|
global _default_handler
|
104
110
|
_root_logger.removeHandler(_default_handler)
|
@@ -127,6 +133,10 @@ def set_logging_level(logger: str, level: int):
|
|
127
133
|
logger.setLevel(original_level)
|
128
134
|
|
129
135
|
|
136
|
+
def logging_enabled(logger: logging.Logger, level: int) -> bool:
|
137
|
+
return logger.level <= level
|
138
|
+
|
139
|
+
|
130
140
|
@contextlib.contextmanager
|
131
141
|
def silent():
|
132
142
|
"""Make all sky_logging.print() and logger.{info, warning...} silent.
|
sky/skylet/constants.py
CHANGED
@@ -117,7 +117,7 @@ RUNPOD_DOCKER_USERNAME_ENV_VAR = 'SKYPILOT_RUNPOD_DOCKER_USERNAME'
|
|
117
117
|
|
118
118
|
# Commands for disable GPU ECC, which can improve the performance of the GPU
|
119
119
|
# for some workloads by 30%. This will only be applied when a user specify
|
120
|
-
# `nvidia_gpus.disable_ecc: true` in ~/.sky/
|
120
|
+
# `nvidia_gpus.disable_ecc: true` in ~/.sky/config.yaml.
|
121
121
|
# Running this command will reboot the machine, introducing overhead for
|
122
122
|
# provisioning the machine.
|
123
123
|
# https://portal.nutanix.com/page/documents/kbs/details?targetId=kA00e000000LKjOCAW
|
@@ -299,11 +299,6 @@ FILE_MOUNTS_WORKDIR_SUBPATH = 'job-{run_id}/workdir'
|
|
299
299
|
FILE_MOUNTS_SUBPATH = 'job-{run_id}/local-file-mounts/{i}'
|
300
300
|
FILE_MOUNTS_TMP_SUBPATH = 'job-{run_id}/tmp-files'
|
301
301
|
|
302
|
-
# The default idle timeout for SkyPilot controllers. This include jobs
|
303
|
-
# controller and sky serve controller.
|
304
|
-
# TODO(tian): Refactor to controller_utils. Current blocker: circular import.
|
305
|
-
CONTROLLER_IDLE_MINUTES_TO_AUTOSTOP = 10
|
306
|
-
|
307
302
|
# Due to the CPU/memory usage of the controller process launched with sky jobs (
|
308
303
|
# use ray job under the hood), we need to reserve some CPU/memory for each jobs/
|
309
304
|
# serve controller process.
|
@@ -337,7 +332,7 @@ RCLONE_LOG_DIR = '~/.sky/rclone_log'
|
|
337
332
|
RCLONE_CACHE_DIR = '~/.cache/rclone'
|
338
333
|
RCLONE_CACHE_REFRESH_INTERVAL = 10
|
339
334
|
|
340
|
-
# The keys that can be overridden in the `~/.sky/
|
335
|
+
# The keys that can be overridden in the `~/.sky/config.yaml` file. The
|
341
336
|
# overrides are specified in task YAMLs.
|
342
337
|
OVERRIDEABLE_CONFIG_KEYS_IN_TASK: List[Tuple[str, ...]] = [
|
343
338
|
('docker', 'run_options'),
|
@@ -367,3 +362,6 @@ LOCAL_SKYPILOT_CONFIG_PATH_PLACEHOLDER = 'skypilot:local_skypilot_config_path'
|
|
367
362
|
|
368
363
|
# Path to the generated cluster config yamls and ssh configs.
|
369
364
|
SKY_USER_FILE_PATH = '~/.sky/generated'
|
365
|
+
|
366
|
+
# Environment variable that is set to 'true' if this is a skypilot server.
|
367
|
+
ENV_VAR_IS_SKYPILOT_SERVER = 'IS_SKYPILOT_SERVER'
|
sky/skylet/job_lib.py
CHANGED
@@ -630,7 +630,7 @@ def update_job_status(job_ids: List[int],
|
|
630
630
|
'it to FAILED_DRIVER')
|
631
631
|
status = JobStatus.FAILED_DRIVER
|
632
632
|
elif job_pid < 0:
|
633
|
-
# TODO(zhwu): Backward compatibility, remove after 0.
|
633
|
+
# TODO(zhwu): Backward compatibility, remove after 0.10.0.
|
634
634
|
# We set the job status to PENDING instead of actually
|
635
635
|
# checking ray job status and let the status in job table
|
636
636
|
# take effect in the later max.
|
@@ -882,7 +882,7 @@ def cancel_jobs_encoded_results(jobs: Optional[List[int]],
|
|
882
882
|
# child processes.
|
883
883
|
elif job['pid'] < 0:
|
884
884
|
try:
|
885
|
-
# TODO(zhwu): Backward compatibility, remove after 0.
|
885
|
+
# TODO(zhwu): Backward compatibility, remove after 0.10.0.
|
886
886
|
# The job was submitted with ray job submit before #4318.
|
887
887
|
job_client = _create_ray_job_submission_client()
|
888
888
|
job_client.stop_job(_make_ray_job_id(job['job_id']))
|
@@ -1008,7 +1008,7 @@ class JobLibCodeGen:
|
|
1008
1008
|
# Print cancelled IDs. Caller should parse by decoding.
|
1009
1009
|
'print(cancelled, flush=True)',
|
1010
1010
|
]
|
1011
|
-
# TODO(zhwu): Backward compatibility, remove after 0.
|
1011
|
+
# TODO(zhwu): Backward compatibility, remove after 0.12.0.
|
1012
1012
|
if user_hash is None:
|
1013
1013
|
code = [
|
1014
1014
|
(f'cancelled = job_lib.cancel_jobs_encoded_results('
|