skypilot-nightly 1.0.0.dev20250703__py3-none-any.whl → 1.0.0.dev20250704__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/backends/backend_utils.py +24 -24
- sky/catalog/data_fetchers/fetch_cudo.py +37 -37
- sky/client/sdk.py +4 -6
- sky/clouds/aws.py +1 -1
- sky/clouds/cudo.py +1 -1
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/exceptions.py +5 -0
- sky/jobs/client/sdk.py +3 -1
- sky/jobs/server/core.py +3 -1
- sky/provision/cudo/cudo_utils.py +14 -8
- sky/provision/cudo/cudo_wrapper.py +71 -70
- sky/server/common.py +46 -92
- sky/server/constants.py +25 -4
- sky/server/requests/payloads.py +55 -10
- sky/server/requests/requests.py +6 -28
- sky/server/rest.py +15 -4
- sky/server/server.py +40 -7
- sky/server/versions.py +270 -0
- {skypilot_nightly-1.0.0.dev20250703.dist-info → skypilot_nightly-1.0.0.dev20250704.dist-info}/METADATA +1 -1
- {skypilot_nightly-1.0.0.dev20250703.dist-info → skypilot_nightly-1.0.0.dev20250704.dist-info}/RECORD +41 -40
- /sky/dashboard/out/_next/static/{A-fbCEgJE_q2cV8biIOIr → 6TieQqyqsJiaJC33q0FfI}/_buildManifest.js +0 -0
- /sky/dashboard/out/_next/static/{A-fbCEgJE_q2cV8biIOIr → 6TieQqyqsJiaJC33q0FfI}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250703.dist-info → skypilot_nightly-1.0.0.dev20250704.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250703.dist-info → skypilot_nightly-1.0.0.dev20250704.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250703.dist-info → skypilot_nightly-1.0.0.dev20250704.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250703.dist-info → skypilot_nightly-1.0.0.dev20250704.dist-info}/top_level.txt +0 -0
@@ -28,12 +28,10 @@ def launch(name: str, data_center_id: str, ssh_key: str, machine_type: str,
|
|
28
28
|
size_gib=disk_size),
|
29
29
|
metadata=tags)
|
30
30
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
except cudo.cudo.rest.ApiException as e:
|
36
|
-
raise e
|
31
|
+
api = cudo.cudo.cudo_api.virtual_machines()
|
32
|
+
vm = api.create_vm(cudo.cudo.cudo_api.project_id_throwable(), request)
|
33
|
+
|
34
|
+
return vm.to_dict()['id']
|
37
35
|
|
38
36
|
|
39
37
|
def remove(instance_id: str):
|
@@ -54,11 +52,8 @@ def remove(instance_id: str):
|
|
54
52
|
state = 'unknown'
|
55
53
|
project_id = cudo.cudo.cudo_api.project_id_throwable()
|
56
54
|
while retry_count < max_retries:
|
57
|
-
|
58
|
-
|
59
|
-
state = vm.to_dict()['vm']['short_state']
|
60
|
-
except cudo.cudo.rest.ApiException as e:
|
61
|
-
raise e
|
55
|
+
vm = api.get_vm(project_id, instance_id)
|
56
|
+
state = vm.to_dict()['vm']['short_state']
|
62
57
|
|
63
58
|
if state in terminate_ok:
|
64
59
|
break
|
@@ -69,76 +64,82 @@ def remove(instance_id: str):
|
|
69
64
|
'Timeout error, could not terminate due to VM state: {}'.format(
|
70
65
|
state))
|
71
66
|
|
72
|
-
|
73
|
-
api.terminate_vm(project_id, instance_id)
|
74
|
-
except cudo.cudo.rest.ApiException as e:
|
75
|
-
raise e
|
67
|
+
api.terminate_vm(project_id, instance_id)
|
76
68
|
|
77
69
|
|
78
70
|
def set_tags(instance_id: str, tags: Dict):
|
79
71
|
"""Sets the tags for the given instance."""
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
merge=True)) # TODO (skypilot team) merge or overwrite?
|
87
|
-
except cudo.cudo.rest.ApiException as e:
|
88
|
-
raise e
|
72
|
+
api = cudo.cudo.cudo_api.virtual_machines()
|
73
|
+
api.update_vm_metadata(
|
74
|
+
cudo.cudo.cudo_api.project_id(), instance_id,
|
75
|
+
cudo.cudo.UpdateVMMetadataBody(
|
76
|
+
metadata=tags,
|
77
|
+
merge=True)) # TODO (skypilot team) merge or overwrite?
|
89
78
|
|
90
79
|
|
91
80
|
def get_instance(vm_id):
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
return vm_dict
|
97
|
-
except cudo.cudo.rest.ApiException as e:
|
98
|
-
raise e
|
81
|
+
api = cudo.cudo.cudo_api.virtual_machines()
|
82
|
+
vm = api.get_vm(cudo.cudo.cudo_api.project_id_throwable(), vm_id)
|
83
|
+
vm_dict = vm.to_dict()
|
84
|
+
return vm_dict
|
99
85
|
|
100
86
|
|
101
87
|
def list_instances():
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
return instances
|
122
|
-
except cudo.cudo.rest.ApiException as e:
|
123
|
-
raise e
|
88
|
+
api = cudo.cudo.cudo_api.virtual_machines()
|
89
|
+
vms = api.list_vms(cudo.cudo.cudo_api.project_id_throwable())
|
90
|
+
instances = {}
|
91
|
+
for vm in vms.to_dict()['vms']:
|
92
|
+
ex_ip = vm['external_ip_address']
|
93
|
+
in_ip = vm['internal_ip_address']
|
94
|
+
if not in_ip:
|
95
|
+
in_ip = ex_ip
|
96
|
+
instance = {
|
97
|
+
# active_state, init_state, lcm_state, short_state
|
98
|
+
'status': vm['short_state'],
|
99
|
+
'tags': vm['metadata'],
|
100
|
+
'name': vm['id'],
|
101
|
+
'ip': ex_ip,
|
102
|
+
'external_ip': ex_ip,
|
103
|
+
'internal_ip': in_ip
|
104
|
+
}
|
105
|
+
instances[vm['id']] = instance
|
106
|
+
return instances
|
124
107
|
|
125
108
|
|
126
109
|
def vm_available(to_start_count, gpu_count, gpu_model, data_center_id, mem,
|
127
110
|
cpus):
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
111
|
+
gpu_model = utils.skypilot_gpu_to_cudo_gpu(gpu_model)
|
112
|
+
api = cudo.cudo.cudo_api.virtual_machines()
|
113
|
+
types = api.list_vm_machine_types2()
|
114
|
+
types_dict = types.to_dict()
|
115
|
+
machine_types = types_dict['machine_types']
|
116
|
+
|
117
|
+
# Filter machine types based on requirements
|
118
|
+
matching_types = []
|
119
|
+
for machine_type in machine_types:
|
120
|
+
# Check if this machine type matches our requirements
|
121
|
+
if (machine_type['data_center_id'] == data_center_id and
|
122
|
+
machine_type['gpu_model'] == gpu_model and
|
123
|
+
machine_type['min_vcpu'] <= cpus <= machine_type.get(
|
124
|
+
'max_vcpu_free', float('inf')) and
|
125
|
+
machine_type['min_memory_gib'] <= mem <= machine_type.get(
|
126
|
+
'max_memory_gib_free', float('inf'))):
|
127
|
+
|
128
|
+
# Calculate available VMs based on resource constraints
|
129
|
+
max_vms_by_vcpu = machine_type[
|
130
|
+
'total_vcpu_free'] // cpus if cpus > 0 else float('inf')
|
131
|
+
max_vms_by_memory = machine_type[
|
132
|
+
'total_memory_gib_free'] // mem if mem > 0 else float('inf')
|
133
|
+
max_vms_by_gpu = machine_type[
|
134
|
+
'total_gpu_free'] // gpu_count if gpu_count > 0 else float(
|
135
|
+
'inf')
|
136
|
+
|
137
|
+
available_vms = min(max_vms_by_vcpu, max_vms_by_memory,
|
138
|
+
max_vms_by_gpu)
|
139
|
+
matching_types.append(available_vms)
|
140
|
+
|
141
|
+
total_count = sum(matching_types)
|
142
|
+
if total_count < to_start_count:
|
143
|
+
raise Exception(
|
144
|
+
'Too many VMs requested, try another gpu type or region')
|
145
|
+
return total_count
|
sky/server/common.py
CHANGED
@@ -22,7 +22,6 @@ import uuid
|
|
22
22
|
import colorama
|
23
23
|
import filelock
|
24
24
|
|
25
|
-
import sky
|
26
25
|
from sky import exceptions
|
27
26
|
from sky import sky_logging
|
28
27
|
from sky import skypilot_config
|
@@ -31,6 +30,7 @@ from sky.client import service_account_auth
|
|
31
30
|
from sky.data import data_utils
|
32
31
|
from sky.server import constants as server_constants
|
33
32
|
from sky.server import rest
|
33
|
+
from sky.server import versions
|
34
34
|
from sky.skylet import constants
|
35
35
|
from sky.usage import usage_lib
|
36
36
|
from sky.utils import annotations
|
@@ -66,34 +66,11 @@ RETRY_COUNT_ON_TIMEOUT = 3
|
|
66
66
|
# (e.g. in high contention env) and we will exit eagerly if server exit.
|
67
67
|
WAIT_APISERVER_START_TIMEOUT_SEC = 60
|
68
68
|
|
69
|
-
_VERSION_INFO = (
|
70
|
-
f'{colorama.Style.RESET_ALL}'
|
71
|
-
f'{colorama.Style.DIM}'
|
72
|
-
'client version: v{client_version} (API version: v{client_api_version})\n'
|
73
|
-
'server version: v{server_version} (API version: v{server_api_version})'
|
74
|
-
f'{colorama.Style.RESET_ALL}')
|
75
69
|
_LOCAL_API_SERVER_RESTART_HINT = (
|
76
|
-
f'{colorama.Fore.YELLOW}
|
70
|
+
f'{colorama.Fore.YELLOW}The local SkyPilot API server is not compatible '
|
71
|
+
'with the client. Please restart the API server with:\n'
|
77
72
|
f'{colorama.Style.BRIGHT}sky api stop; sky api start'
|
78
73
|
f'{colorama.Style.RESET_ALL}')
|
79
|
-
_LOCAL_SERVER_VERSION_MISMATCH_WARNING = (
|
80
|
-
f'{colorama.Fore.YELLOW}Client and local API server version mismatch:\n'
|
81
|
-
'{version_info}\n'
|
82
|
-
f'{_LOCAL_API_SERVER_RESTART_HINT}'
|
83
|
-
f'{colorama.Style.RESET_ALL}')
|
84
|
-
_CLIENT_TOO_OLD_WARNING = (
|
85
|
-
f'{colorama.Fore.YELLOW}Your SkyPilot client is too old:\n'
|
86
|
-
'{version_info}\n'
|
87
|
-
f'{colorama.Fore.YELLOW}Upgrade your client with:\n'
|
88
|
-
'{command}'
|
89
|
-
f'{colorama.Style.RESET_ALL}')
|
90
|
-
_REMOTE_SERVER_TOO_OLD_WARNING = (
|
91
|
-
f'{colorama.Fore.YELLOW}SkyPilot API server is too old:\n'
|
92
|
-
'{version_info}\n'
|
93
|
-
f'{colorama.Fore.YELLOW}Contact your administrator to upgrade the '
|
94
|
-
'remote API server or downgrade your local client with:\n'
|
95
|
-
'{command}\n'
|
96
|
-
f'{colorama.Style.RESET_ALL}')
|
97
74
|
_SERVER_INSTALL_VERSION_MISMATCH_WARNING = (
|
98
75
|
f'{colorama.Fore.YELLOW}SkyPilot API server version does not match the '
|
99
76
|
'installation on disk:\n'
|
@@ -105,10 +82,6 @@ _SERVER_INSTALL_VERSION_MISMATCH_WARNING = (
|
|
105
82
|
f'{colorama.Fore.YELLOW}This can happen if you upgraded SkyPilot without '
|
106
83
|
'restarting the API server.'
|
107
84
|
f'{colorama.Style.RESET_ALL}')
|
108
|
-
# Parse local API version eargly to catch version format errors.
|
109
|
-
_LOCAL_API_VERSION: int = int(server_constants.API_VERSION)
|
110
|
-
# SkyPilot dev version.
|
111
|
-
_DEV_VERSION = '1.0.0-dev0'
|
112
85
|
|
113
86
|
RequestId = str
|
114
87
|
ApiVersion = Optional[str]
|
@@ -134,6 +107,7 @@ class ApiServerInfo:
|
|
134
107
|
commit: Optional[str] = None
|
135
108
|
user: Optional[Dict[str, Any]] = None
|
136
109
|
basic_auth_enabled: bool = False
|
110
|
+
error: Optional[str] = None
|
137
111
|
|
138
112
|
|
139
113
|
def get_api_cookie_jar_path() -> pathlib.Path:
|
@@ -282,6 +256,23 @@ def is_api_server_local():
|
|
282
256
|
return get_server_url() in AVAILABLE_LOCAL_API_SERVER_URLS
|
283
257
|
|
284
258
|
|
259
|
+
def _handle_non_200_server_status(
|
260
|
+
response: 'requests.Response') -> ApiServerInfo:
|
261
|
+
if response.status_code == 401:
|
262
|
+
return ApiServerInfo(status=ApiServerStatus.NEEDS_AUTH)
|
263
|
+
if response.status_code == 400:
|
264
|
+
# Check if a version mismatch error is returned.
|
265
|
+
try:
|
266
|
+
body = response.json()
|
267
|
+
if (body.get('error',
|
268
|
+
'') == ApiServerStatus.VERSION_MISMATCH.value):
|
269
|
+
return ApiServerInfo(status=ApiServerStatus.VERSION_MISMATCH,
|
270
|
+
error=body.get('message', ''))
|
271
|
+
except json.JSONDecodeError:
|
272
|
+
pass
|
273
|
+
return ApiServerInfo(status=ApiServerStatus.UNHEALTHY)
|
274
|
+
|
275
|
+
|
285
276
|
def get_api_server_status(endpoint: Optional[str] = None) -> ApiServerInfo:
|
286
277
|
"""Retrieve the status of the API server.
|
287
278
|
|
@@ -315,10 +306,10 @@ def get_api_server_status(endpoint: Optional[str] = None) -> ApiServerInfo:
|
|
315
306
|
return ApiServerInfo(status=ApiServerStatus.UNHEALTHY)
|
316
307
|
|
317
308
|
logger.debug(f'Health check status: {response.status_code}')
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
309
|
+
|
310
|
+
if response.status_code != 200:
|
311
|
+
return _handle_non_200_server_status(response)
|
312
|
+
|
322
313
|
# The response is 200, so we can parse the response.
|
323
314
|
try:
|
324
315
|
result = response.json()
|
@@ -340,8 +331,24 @@ def get_api_server_status(endpoint: Optional[str] = None) -> ApiServerInfo:
|
|
340
331
|
f'version info. {server_url} may '
|
341
332
|
f'not be running SkyPilot API server.')
|
342
333
|
server_info.status = ApiServerStatus.UNHEALTHY
|
343
|
-
|
344
|
-
|
334
|
+
version_info = versions.check_compatibility_at_client(
|
335
|
+
response.headers)
|
336
|
+
if version_info is None:
|
337
|
+
# Backward compatibility for server prior to v0.11.0 which
|
338
|
+
# does not check compatibility at server side.
|
339
|
+
# TODO(aylei): remove this after v0.13.0 is released.
|
340
|
+
return ApiServerInfo(
|
341
|
+
status=ApiServerStatus.VERSION_MISMATCH,
|
342
|
+
error=versions.SERVER_TOO_OLD_ERROR.format(
|
343
|
+
remote_version=version,
|
344
|
+
local_version=versions.get_local_readable_version(),
|
345
|
+
min_version=server_constants.MIN_COMPATIBLE_VERSION,
|
346
|
+
command=versions.install_version_command(
|
347
|
+
version, commit)))
|
348
|
+
if version_info.error is not None:
|
349
|
+
return ApiServerInfo(status=ApiServerStatus.VERSION_MISMATCH,
|
350
|
+
error=version_info.error)
|
351
|
+
|
345
352
|
cookies = get_cookies_from_response(response)
|
346
353
|
set_api_cookie_jar(cookies, create_if_not_exists=False)
|
347
354
|
return server_info
|
@@ -490,7 +497,7 @@ def _start_api_server(deploy: bool = False,
|
|
490
497
|
server_url = get_server_url(host)
|
491
498
|
dashboard_msg = ''
|
492
499
|
api_server_info = get_api_server_status(server_url)
|
493
|
-
if api_server_info.version ==
|
500
|
+
if api_server_info.version == versions.DEV_VERSION:
|
494
501
|
dashboard_msg += (
|
495
502
|
f'\n{colorama.Style.RESET_ALL}{ux_utils.INDENT_SYMBOL}'
|
496
503
|
f'{colorama.Fore.YELLOW}')
|
@@ -554,33 +561,11 @@ def check_server_healthy(
|
|
554
561
|
api_server_info = get_api_server_status(endpoint)
|
555
562
|
api_server_status = api_server_info.status
|
556
563
|
if api_server_status == ApiServerStatus.VERSION_MISMATCH:
|
557
|
-
|
558
|
-
assert sv is not None, 'Server API version is None'
|
559
|
-
try:
|
560
|
-
server_is_older = int(sv) < _LOCAL_API_VERSION
|
561
|
-
except ValueError:
|
562
|
-
# Raised when the server version using an unknown scheme.
|
563
|
-
# Version compatibility checking is expected to handle all legacy
|
564
|
-
# cases so we safely assume the server is newer when the version
|
565
|
-
# scheme is unknown.
|
566
|
-
logger.debug('API server version using unknown scheme: %s', sv)
|
567
|
-
server_is_older = False
|
568
|
-
version_info = _get_version_info_hint(api_server_info)
|
564
|
+
msg = api_server_info.error
|
569
565
|
if is_api_server_local():
|
570
566
|
# For local server, just hint user to restart the server to get
|
571
567
|
# a consistent version.
|
572
|
-
msg =
|
573
|
-
version_info=version_info)
|
574
|
-
else:
|
575
|
-
assert api_server_info.version is not None, 'Server version is None'
|
576
|
-
if server_is_older:
|
577
|
-
msg = _REMOTE_SERVER_TOO_OLD_WARNING.format(
|
578
|
-
version_info=version_info,
|
579
|
-
command=_install_server_version_command(api_server_info))
|
580
|
-
else:
|
581
|
-
msg = _CLIENT_TOO_OLD_WARNING.format(
|
582
|
-
version_info=version_info,
|
583
|
-
command=_install_server_version_command(api_server_info))
|
568
|
+
msg = _LOCAL_API_SERVER_RESTART_HINT
|
584
569
|
with ux_utils.print_exception_no_traceback():
|
585
570
|
raise exceptions.APIVersionMismatchError(msg)
|
586
571
|
elif api_server_status == ApiServerStatus.UNHEALTHY:
|
@@ -614,37 +599,6 @@ def check_server_healthy(
|
|
614
599
|
return api_server_status, api_server_info
|
615
600
|
|
616
601
|
|
617
|
-
def _get_version_info_hint(server_info: ApiServerInfo) -> str:
|
618
|
-
assert server_info.version is not None, 'Server version is None'
|
619
|
-
# version_on_disk may be None if the server is older
|
620
|
-
assert server_info.commit is not None, 'Server commit is None'
|
621
|
-
sv = server_info.version
|
622
|
-
cv = sky.__version__
|
623
|
-
if server_info.version == _DEV_VERSION:
|
624
|
-
sv = f'{sv} with commit {server_info.commit}'
|
625
|
-
if cv == _DEV_VERSION:
|
626
|
-
cv = f'{cv} with commit {sky.__commit__}'
|
627
|
-
return _VERSION_INFO.format(client_version=cv,
|
628
|
-
server_version=sv,
|
629
|
-
client_api_version=server_constants.API_VERSION,
|
630
|
-
server_api_version=server_info.api_version)
|
631
|
-
|
632
|
-
|
633
|
-
def _install_server_version_command(server_info: ApiServerInfo) -> str:
|
634
|
-
assert server_info.version is not None, 'Server version is None'
|
635
|
-
assert server_info.commit is not None, 'Server commit is None'
|
636
|
-
if server_info.version == _DEV_VERSION:
|
637
|
-
# Dev build without valid version.
|
638
|
-
return ('pip install git+https://github.com/skypilot-org/skypilot@'
|
639
|
-
f'{server_info.commit}')
|
640
|
-
elif 'dev' in server_info.version:
|
641
|
-
# Nightly version.
|
642
|
-
return f'pip install -U "skypilot-nightly=={server_info.version}"'
|
643
|
-
else:
|
644
|
-
# Stable version.
|
645
|
-
return f'pip install -U "skypilot=={server_info.version}"'
|
646
|
-
|
647
|
-
|
648
602
|
# Keep in sync with sky/setup_files/setup.py find_version()
|
649
603
|
def get_skypilot_version_on_disk() -> str:
|
650
604
|
"""Get the version of the SkyPilot code on disk."""
|
sky/server/constants.py
CHANGED
@@ -4,10 +4,31 @@ import os
|
|
4
4
|
|
5
5
|
from sky.skylet import constants
|
6
6
|
|
7
|
-
#
|
8
|
-
#
|
9
|
-
# the
|
10
|
-
|
7
|
+
# pylint: disable=line-too-long
|
8
|
+
# The SkyPilot API version that the code currently use.
|
9
|
+
# Bump this version when the API is changed and special compatibility handling
|
10
|
+
# based on version info is needed.
|
11
|
+
# For more details and code guidelines, refer to:
|
12
|
+
# https://docs.skypilot.co/en/latest/developers/CONTRIBUTING.html#backward-compatibility-guidelines
|
13
|
+
API_VERSION = 11
|
14
|
+
|
15
|
+
# The minimum peer API version that the code should still work with.
|
16
|
+
# Notes (dev):
|
17
|
+
# - This value is maintained by the CI pipeline, DO NOT EDIT this manually.
|
18
|
+
# - Compatibility code for versions lower than this can be safely removed.
|
19
|
+
# Refer to API_VERSION for more details.
|
20
|
+
MIN_COMPATIBLE_API_VERSION = 11
|
21
|
+
|
22
|
+
# The semantic version of the minimum compatible API version.
|
23
|
+
# Refer to MIN_COMPATIBLE_API_VERSION for more details.
|
24
|
+
# Note (dev): DO NOT EDIT this constant manually.
|
25
|
+
MIN_COMPATIBLE_VERSION = '0.10.0'
|
26
|
+
|
27
|
+
# The HTTP header name for the API version of the sender.
|
28
|
+
API_VERSION_HEADER = 'X-SkyPilot-API-Version'
|
29
|
+
|
30
|
+
# The HTTP header name for the SkyPilot version of the sender.
|
31
|
+
VERSION_HEADER = 'X-SkyPilot-Version'
|
11
32
|
|
12
33
|
# Prefix for API request names.
|
13
34
|
REQUEST_NAME_PREFIX = 'sky.'
|
sky/server/requests/payloads.py
CHANGED
@@ -1,9 +1,27 @@
|
|
1
1
|
"""Payloads for the Sky API requests.
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
3
|
+
All the payloads that will be used between the client and server communication
|
4
|
+
must be defined here to make sure it get covered by our API compatbility tests.
|
5
|
+
|
6
|
+
Compatibility note:
|
7
|
+
- Adding a new body for new API is compatible as long as the SDK method using
|
8
|
+
the new API is properly decorated with `versions.minimal_api_version`.
|
9
|
+
- Adding a new field with default value to an existing body is compatible at
|
10
|
+
API level, but the business logic must handle the case where the field is
|
11
|
+
not proccessed by an old version of remote client/server. This can usually
|
12
|
+
be done by checking `versions.get_remote_api_version()`.
|
13
|
+
- Other changes are not compatible at API level, so must be handled specially.
|
14
|
+
A common pattern is to keep both the old and new version of the body and
|
15
|
+
checking `versions.get_remote_api_version()` to decide which body to use. For
|
16
|
+
example, say we refactor the `LaunchBody`, the original `LaunchBody` must be
|
17
|
+
kept in the codebase and the new body should be added via `LaunchBodyV2`.
|
18
|
+
Then if the remote runs in an old version, the local code should still send
|
19
|
+
`LaunchBody` to keep the backward compatibility. `LaunchBody` can be removed
|
20
|
+
later when constants.MIN_COMPATIBLE_API_VERSION is updated to a version that
|
21
|
+
supports `LaunchBodyV2`
|
22
|
+
|
23
|
+
Also refer to sky.server.constants.MIN_COMPATIBLE_API_VERSION and the
|
24
|
+
sky.server.versions module for more details.
|
7
25
|
"""
|
8
26
|
import os
|
9
27
|
import typing
|
@@ -94,7 +112,18 @@ def get_override_skypilot_config_path_from_client() -> Optional[str]:
|
|
94
112
|
return skypilot_config.loaded_config_path_serialized()
|
95
113
|
|
96
114
|
|
97
|
-
class
|
115
|
+
class BasePayload(pydantic.BaseModel):
|
116
|
+
"""The base payload for the SkyPilot API."""
|
117
|
+
# Ignore extra fields in the request body, which is useful for backward
|
118
|
+
# compatibility. The difference with `allow` is that `ignore` will not
|
119
|
+
# include the unknown fields when dump the model, i.e., we can add new
|
120
|
+
# fields to the request body without breaking the existing old API server
|
121
|
+
# where the handler function does not accept the new field in function
|
122
|
+
# signature.
|
123
|
+
model_config = pydantic.ConfigDict(extra='ignore')
|
124
|
+
|
125
|
+
|
126
|
+
class RequestBody(BasePayload):
|
98
127
|
"""The request body for the SkyPilot API."""
|
99
128
|
env_vars: Dict[str, str] = {}
|
100
129
|
entrypoint: str = ''
|
@@ -103,11 +132,6 @@ class RequestBody(pydantic.BaseModel):
|
|
103
132
|
override_skypilot_config: Optional[Dict[str, Any]] = {}
|
104
133
|
override_skypilot_config_path: Optional[str] = None
|
105
134
|
|
106
|
-
# Allow extra fields in the request body, which is useful for backward
|
107
|
-
# compatibility, i.e., we can add new fields to the request body without
|
108
|
-
# breaking the existing old API server.
|
109
|
-
model_config = pydantic.ConfigDict(extra='allow')
|
110
|
-
|
111
135
|
def __init__(self, **data):
|
112
136
|
data['env_vars'] = data.get('env_vars', request_body_env_vars())
|
113
137
|
usage_lib_entrypoint = usage_lib.messages.usage.entrypoint
|
@@ -665,3 +689,24 @@ class GetConfigBody(RequestBody):
|
|
665
689
|
class CostReportBody(RequestBody):
|
666
690
|
"""The request body for the cost report endpoint."""
|
667
691
|
days: Optional[int] = 30
|
692
|
+
|
693
|
+
|
694
|
+
class RequestPayload(BasePayload):
|
695
|
+
"""The payload for the requests."""
|
696
|
+
|
697
|
+
request_id: str
|
698
|
+
name: str
|
699
|
+
entrypoint: str
|
700
|
+
request_body: str
|
701
|
+
status: str
|
702
|
+
created_at: float
|
703
|
+
user_id: str
|
704
|
+
return_value: str
|
705
|
+
error: str
|
706
|
+
pid: Optional[int]
|
707
|
+
schedule_type: str
|
708
|
+
user_name: Optional[str] = None
|
709
|
+
# Resources the request operates on.
|
710
|
+
cluster_name: Optional[str] = None
|
711
|
+
status_msg: Optional[str] = None
|
712
|
+
should_retry: bool = False
|
sky/server/requests/requests.py
CHANGED
@@ -98,28 +98,6 @@ class ScheduleType(enum.Enum):
|
|
98
98
|
SHORT = 'short'
|
99
99
|
|
100
100
|
|
101
|
-
@dataclasses.dataclass
|
102
|
-
class RequestPayload:
|
103
|
-
"""The payload for the requests."""
|
104
|
-
|
105
|
-
request_id: str
|
106
|
-
name: str
|
107
|
-
entrypoint: str
|
108
|
-
request_body: str
|
109
|
-
status: str
|
110
|
-
created_at: float
|
111
|
-
user_id: str
|
112
|
-
return_value: str
|
113
|
-
error: str
|
114
|
-
pid: Optional[int]
|
115
|
-
schedule_type: str
|
116
|
-
user_name: Optional[str] = None
|
117
|
-
# Resources the request operates on.
|
118
|
-
cluster_name: Optional[str] = None
|
119
|
-
status_msg: Optional[str] = None
|
120
|
-
should_retry: bool = False
|
121
|
-
|
122
|
-
|
123
101
|
@dataclasses.dataclass
|
124
102
|
class Request:
|
125
103
|
"""A SkyPilot API request."""
|
@@ -185,7 +163,7 @@ class Request:
|
|
185
163
|
@classmethod
|
186
164
|
def from_row(cls, row: Tuple[Any, ...]) -> 'Request':
|
187
165
|
content = dict(zip(REQUEST_COLUMNS, row))
|
188
|
-
return cls.decode(RequestPayload(**content))
|
166
|
+
return cls.decode(payloads.RequestPayload(**content))
|
189
167
|
|
190
168
|
def to_row(self) -> Tuple[Any, ...]:
|
191
169
|
payload = self.encode()
|
@@ -194,7 +172,7 @@ class Request:
|
|
194
172
|
row.append(getattr(payload, k))
|
195
173
|
return tuple(row)
|
196
174
|
|
197
|
-
def readable_encode(self) -> RequestPayload:
|
175
|
+
def readable_encode(self) -> payloads.RequestPayload:
|
198
176
|
"""Serialize the SkyPilot API request for display purposes.
|
199
177
|
|
200
178
|
This function should be called on the server side to serialize the
|
@@ -212,7 +190,7 @@ class Request:
|
|
212
190
|
payloads.RequestBody), (self.name, self.request_body)
|
213
191
|
user = global_user_state.get_user(self.user_id)
|
214
192
|
user_name = user.name if user is not None else None
|
215
|
-
return RequestPayload(
|
193
|
+
return payloads.RequestPayload(
|
216
194
|
request_id=self.request_id,
|
217
195
|
name=self.name,
|
218
196
|
entrypoint=self.entrypoint.__name__,
|
@@ -230,12 +208,12 @@ class Request:
|
|
230
208
|
should_retry=self.should_retry,
|
231
209
|
)
|
232
210
|
|
233
|
-
def encode(self) -> RequestPayload:
|
211
|
+
def encode(self) -> payloads.RequestPayload:
|
234
212
|
"""Serialize the SkyPilot API request."""
|
235
213
|
assert isinstance(self.request_body,
|
236
214
|
payloads.RequestBody), (self.name, self.request_body)
|
237
215
|
try:
|
238
|
-
return RequestPayload(
|
216
|
+
return payloads.RequestPayload(
|
239
217
|
request_id=self.request_id,
|
240
218
|
name=self.name,
|
241
219
|
entrypoint=encoders.pickle_and_encode(self.entrypoint),
|
@@ -264,7 +242,7 @@ class Request:
|
|
264
242
|
raise
|
265
243
|
|
266
244
|
@classmethod
|
267
|
-
def decode(cls, payload: RequestPayload) -> 'Request':
|
245
|
+
def decode(cls, payload: payloads.RequestPayload) -> 'Request':
|
268
246
|
"""Deserialize the SkyPilot API request."""
|
269
247
|
try:
|
270
248
|
return cls(
|
sky/server/rest.py
CHANGED
@@ -12,6 +12,8 @@ import colorama
|
|
12
12
|
from sky import exceptions
|
13
13
|
from sky import sky_logging
|
14
14
|
from sky.adaptors import common as adaptors_common
|
15
|
+
from sky.server import constants
|
16
|
+
from sky.server import versions
|
15
17
|
from sky.utils import common_utils
|
16
18
|
from sky.utils import rich_utils
|
17
19
|
from sky.utils import ux_utils
|
@@ -28,6 +30,11 @@ F = TypeVar('F', bound=Callable[..., Any])
|
|
28
30
|
|
29
31
|
_RETRY_CONTEXT = contextvars.ContextVar('retry_context', default=None)
|
30
32
|
|
33
|
+
_session = requests.Session()
|
34
|
+
_session.headers[constants.API_VERSION_HEADER] = str(constants.API_VERSION)
|
35
|
+
_session.headers[constants.VERSION_HEADER] = (
|
36
|
+
versions.get_local_readable_version())
|
37
|
+
|
31
38
|
|
32
39
|
class RetryContext:
|
33
40
|
|
@@ -132,13 +139,17 @@ def handle_server_unavailable(response: 'requests.Response') -> None:
|
|
132
139
|
def request(method, url, **kwargs) -> 'requests.Response':
|
133
140
|
"""Send a request to the API server, retry on server temporarily
|
134
141
|
unavailable."""
|
135
|
-
|
136
|
-
handle_server_unavailable(response)
|
137
|
-
return response
|
142
|
+
return request_without_retry(method, url, **kwargs)
|
138
143
|
|
139
144
|
|
140
145
|
def request_without_retry(method, url, **kwargs) -> 'requests.Response':
|
141
146
|
"""Send a request to the API server without retry."""
|
142
|
-
response =
|
147
|
+
response = _session.request(method, url, **kwargs)
|
143
148
|
handle_server_unavailable(response)
|
149
|
+
remote_api_version = response.headers.get(constants.API_VERSION_HEADER)
|
150
|
+
remote_version = response.headers.get(constants.VERSION_HEADER)
|
151
|
+
if remote_api_version is not None:
|
152
|
+
versions.set_remote_api_version(int(remote_api_version))
|
153
|
+
if remote_version is not None:
|
154
|
+
versions.set_remote_version(remote_version)
|
144
155
|
return response
|