skypilot-nightly 1.0.0.dev20250530__py3-none-any.whl → 1.0.0.dev20250531__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/backends/backend_utils.py +2 -2
- sky/backends/cloud_vm_ray_backend.py +3 -2
- sky/cli.py +36 -10
- sky/client/cli.py +36 -10
- sky/clouds/cloud.py +20 -0
- sky/clouds/cudo.py +2 -0
- sky/clouds/do.py +3 -0
- sky/clouds/fluidstack.py +3 -0
- sky/clouds/gcp.py +10 -3
- sky/clouds/kubernetes.py +70 -4
- sky/clouds/lambda_cloud.py +3 -0
- sky/clouds/nebius.py +2 -0
- sky/clouds/paperspace.py +3 -0
- sky/clouds/runpod.py +2 -0
- sky/clouds/scp.py +3 -0
- sky/clouds/vast.py +3 -0
- sky/clouds/vsphere.py +3 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/bdeJWb62qu7L7FOq1dbXX/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/236-7458fda7b295f305.js +6 -0
- sky/dashboard/out/_next/static/chunks/37-b638675d511d58b4.js +6 -0
- sky/dashboard/out/_next/static/chunks/{470-4d003c441839094d.js → 470-9e7a479cc8303baa.js} +1 -1
- sky/dashboard/out/_next/static/chunks/{682-f3f1443ed2fba42f.js → 682-5c12535476a21ce3.js} +1 -1
- sky/dashboard/out/_next/static/chunks/856-ab9627e7e8ac35e8.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-8f270e2c9c59fa1a.js +6 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-25edb867a41b6b20.js +6 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-c0c1dff3cd463d9e.js +11 -0
- sky/dashboard/out/_next/static/css/2b3ee34e586949a3.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/jobs/client/sdk.py +3 -0
- sky/jobs/constants.py +1 -1
- sky/jobs/server/core.py +8 -3
- sky/jobs/utils.py +31 -10
- sky/provision/gcp/config.py +3 -1
- sky/provision/gcp/constants.py +10 -0
- sky/resources.py +44 -3
- sky/server/constants.py +1 -1
- sky/server/requests/payloads.py +1 -0
- sky/templates/kubernetes-ray.yml.j2 +7 -0
- sky/utils/resources_utils.py +26 -0
- sky/utils/schemas.py +3 -0
- {skypilot_nightly-1.0.0.dev20250530.dist-info → skypilot_nightly-1.0.0.dev20250531.dist-info}/METADATA +1 -1
- {skypilot_nightly-1.0.0.dev20250530.dist-info → skypilot_nightly-1.0.0.dev20250531.dist-info}/RECORD +62 -62
- sky/dashboard/out/_next/static/Q32Bxr2Pby5tFDW-y5TNg/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-ca00738e2f58ea65.js +0 -6
- sky/dashboard/out/_next/static/chunks/37-64efcd0e9c54bff6.js +0 -6
- sky/dashboard/out/_next/static/chunks/856-02e34c9fc5945066.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-42d3656aba9d2e78.js +0 -6
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-20835df7b0c4599c.js +0 -6
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-258decb65e95f520.js +0 -11
- sky/dashboard/out/_next/static/css/5411b9fb0a783c1c.css +0 -3
- /sky/dashboard/out/_next/static/{Q32Bxr2Pby5tFDW-y5TNg → bdeJWb62qu7L7FOq1dbXX}/_ssgManifest.js +0 -0
- /sky/dashboard/out/_next/static/chunks/pages/{_app-f19ea34b91c33950.js → _app-ad1edd7fe17ea796.js} +0 -0
- {skypilot_nightly-1.0.0.dev20250530.dist-info → skypilot_nightly-1.0.0.dev20250531.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250530.dist-info → skypilot_nightly-1.0.0.dev20250531.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250530.dist-info → skypilot_nightly-1.0.0.dev20250531.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250530.dist-info → skypilot_nightly-1.0.0.dev20250531.dist-info}/top_level.txt +0 -0
sky/jobs/server/core.py
CHANGED
@@ -521,8 +521,12 @@ def cancel(name: Optional[str] = None,
|
|
521
521
|
|
522
522
|
|
523
523
|
@usage_lib.entrypoint
|
524
|
-
def tail_logs(name: Optional[str],
|
525
|
-
|
524
|
+
def tail_logs(name: Optional[str],
|
525
|
+
job_id: Optional[int],
|
526
|
+
follow: bool,
|
527
|
+
controller: bool,
|
528
|
+
refresh: bool,
|
529
|
+
tail: Optional[int] = None) -> int:
|
526
530
|
# NOTE(dev): Keep the docstring consistent between the Python API and CLI.
|
527
531
|
"""Tail logs of managed jobs.
|
528
532
|
|
@@ -565,7 +569,8 @@ def tail_logs(name: Optional[str], job_id: Optional[int], follow: bool,
|
|
565
569
|
job_id=job_id,
|
566
570
|
job_name=name,
|
567
571
|
follow=follow,
|
568
|
-
controller=controller
|
572
|
+
controller=controller,
|
573
|
+
tail=tail)
|
569
574
|
|
570
575
|
|
571
576
|
def start_dashboard_forwarding(refresh: bool = False) -> Tuple[int, int]:
|
sky/jobs/utils.py
CHANGED
@@ -13,7 +13,7 @@ import textwrap
|
|
13
13
|
import time
|
14
14
|
import traceback
|
15
15
|
import typing
|
16
|
-
from typing import Any, Dict, List, Optional, Set, Tuple, Union
|
16
|
+
from typing import Any, Deque, Dict, List, Optional, Set, TextIO, Tuple, Union
|
17
17
|
|
18
18
|
import colorama
|
19
19
|
import filelock
|
@@ -546,7 +546,9 @@ def cancel_job_by_name(job_name: str,
|
|
546
546
|
return f'{job_name!r} {msg}'
|
547
547
|
|
548
548
|
|
549
|
-
def stream_logs_by_id(job_id: int,
|
549
|
+
def stream_logs_by_id(job_id: int,
|
550
|
+
follow: bool = True,
|
551
|
+
tail: Optional[int] = None) -> Tuple[str, int]:
|
550
552
|
"""Stream logs by job id.
|
551
553
|
|
552
554
|
Returns:
|
@@ -583,7 +585,12 @@ def stream_logs_by_id(job_id: int, follow: bool = True) -> Tuple[str, int]:
|
|
583
585
|
# Stream the logs to the console without reading the whole
|
584
586
|
# file into memory.
|
585
587
|
start_streaming = False
|
586
|
-
|
588
|
+
read_from: Union[TextIO, Deque[str]] = f
|
589
|
+
if tail is not None:
|
590
|
+
assert tail > 0
|
591
|
+
# Read only the last 'tail' lines using deque
|
592
|
+
read_from = collections.deque(f, maxlen=tail)
|
593
|
+
for line in read_from:
|
587
594
|
if log_lib.LOG_FILE_START_STREAMING_AT in line:
|
588
595
|
start_streaming = True
|
589
596
|
if start_streaming:
|
@@ -644,10 +651,12 @@ def stream_logs_by_id(job_id: int, follow: bool = True) -> Tuple[str, int]:
|
|
644
651
|
managed_job_state.ManagedJobStatus.RUNNING)
|
645
652
|
assert isinstance(handle, backends.CloudVmRayResourceHandle), handle
|
646
653
|
status_display.stop()
|
654
|
+
tail_param = tail if tail is not None else 0
|
647
655
|
returncode = backend.tail_logs(handle,
|
648
656
|
job_id=None,
|
649
657
|
managed_job_id=job_id,
|
650
|
-
follow=follow
|
658
|
+
follow=follow,
|
659
|
+
tail=tail_param)
|
651
660
|
if returncode in [rc.value for rc in exceptions.JobExitCode]:
|
652
661
|
# If the log tailing exits with a known exit code we can safely
|
653
662
|
# break the loop because it indicates the tailing process
|
@@ -784,7 +793,8 @@ def stream_logs_by_id(job_id: int, follow: bool = True) -> Tuple[str, int]:
|
|
784
793
|
def stream_logs(job_id: Optional[int],
|
785
794
|
job_name: Optional[str],
|
786
795
|
controller: bool = False,
|
787
|
-
follow: bool = True
|
796
|
+
follow: bool = True,
|
797
|
+
tail: Optional[int] = None) -> Tuple[str, int]:
|
788
798
|
"""Stream logs by job id or job name.
|
789
799
|
|
790
800
|
Returns:
|
@@ -855,7 +865,12 @@ def stream_logs(job_id: Optional[int],
|
|
855
865
|
with open(controller_log_path, 'r', newline='', encoding='utf-8') as f:
|
856
866
|
# Note: we do not need to care about start_stream_at here, since
|
857
867
|
# that should be in the job log printed above.
|
858
|
-
|
868
|
+
read_from: Union[TextIO, Deque[str]] = f
|
869
|
+
if tail is not None:
|
870
|
+
assert tail > 0
|
871
|
+
# Read only the last 'tail' lines efficiently using deque
|
872
|
+
read_from = collections.deque(f, maxlen=tail)
|
873
|
+
for line in read_from:
|
859
874
|
print(line, end='')
|
860
875
|
# Flush.
|
861
876
|
print(end='', flush=True)
|
@@ -907,7 +922,7 @@ def stream_logs(job_id: Optional[int],
|
|
907
922
|
f'Multiple running jobs found with name {job_name!r}.')
|
908
923
|
job_id = job_ids[0]
|
909
924
|
|
910
|
-
return stream_logs_by_id(job_id, follow)
|
925
|
+
return stream_logs_by_id(job_id, follow, tail)
|
911
926
|
|
912
927
|
|
913
928
|
def dump_managed_job_queue() -> str:
|
@@ -1370,10 +1385,16 @@ class ManagedJobCodeGen:
|
|
1370
1385
|
job_name: Optional[str],
|
1371
1386
|
job_id: Optional[int],
|
1372
1387
|
follow: bool = True,
|
1373
|
-
controller: bool = False
|
1388
|
+
controller: bool = False,
|
1389
|
+
tail: Optional[int] = None) -> str:
|
1374
1390
|
code = textwrap.dedent(f"""\
|
1375
|
-
|
1376
|
-
|
1391
|
+
if managed_job_version < 6:
|
1392
|
+
# Versions before 5 did not support tail parameter
|
1393
|
+
result = utils.stream_logs(job_id={job_id!r}, job_name={job_name!r},
|
1394
|
+
follow={follow}, controller={controller})
|
1395
|
+
else:
|
1396
|
+
result = utils.stream_logs(job_id={job_id!r}, job_name={job_name!r},
|
1397
|
+
follow={follow}, controller={controller}, tail={tail!r})
|
1377
1398
|
if managed_job_version < 3:
|
1378
1399
|
# Versions 2 and older did not return a retcode, so we just print
|
1379
1400
|
# the result.
|
sky/provision/gcp/config.py
CHANGED
@@ -10,6 +10,7 @@ from sky.clouds.utils import gcp_utils
|
|
10
10
|
from sky.provision import common
|
11
11
|
from sky.provision.gcp import constants
|
12
12
|
from sky.provision.gcp import instance_utils
|
13
|
+
from sky.utils import resources_utils
|
13
14
|
|
14
15
|
logger = logging.getLogger(__name__)
|
15
16
|
|
@@ -788,7 +789,8 @@ def _configure_subnet(region: str, cluster_name: str,
|
|
788
789
|
default_interfaces = []
|
789
790
|
enable_gpu_direct = config.provider_config.get('enable_gpu_direct', False)
|
790
791
|
enable_gvnic = config.provider_config.get('enable_gvnic', False)
|
791
|
-
|
792
|
+
network_tier = config.provider_config.get('network_tier', 'standard')
|
793
|
+
if enable_gpu_direct or network_tier == resources_utils.NetworkTier.BEST:
|
792
794
|
if not enable_gvnic:
|
793
795
|
logger.warning(
|
794
796
|
'Enable GPU Direct requires gvnic to be enabled, enabling gvnic'
|
sky/provision/gcp/constants.py
CHANGED
@@ -84,6 +84,8 @@ GPU_DIRECT_TCPX_USER_DATA = textwrap.dedent("""
|
|
84
84
|
echo "GPU Direct TCPX installed"
|
85
85
|
""")
|
86
86
|
|
87
|
+
# Some NCCL options are from the following link.
|
88
|
+
# https://docs.nvidia.com/dgx-cloud/run-ai/latest/appendix-gcp.html
|
87
89
|
GPU_DIRECT_TCPX_SPECIFIC_OPTIONS = [
|
88
90
|
'--cap-add=IPC_LOCK',
|
89
91
|
'--userns=host',
|
@@ -103,6 +105,14 @@ GPU_DIRECT_TCPX_SPECIFIC_OPTIONS = [
|
|
103
105
|
'--device /dev/nvidia-uvm:/dev/nvidia-uvm',
|
104
106
|
'--device /dev/nvidiactl:/dev/nvidiactl',
|
105
107
|
'--env LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/tcpx/lib64',
|
108
|
+
'--env NCCL_GPUDIRECTTCPX_SOCKET_IFNAME=eth1,eth2,eth3,eth4',
|
109
|
+
'--env NCCL_GPUDIRECTTCPX_CTRL_DEV=eth0',
|
110
|
+
'--env NCCL_GPUDIRECTTCPX_TX_BINDINGS="eth1:8-21,112-125;eth2:8-21,112-125;eth3:60-73,164-177;eth4:60-73,164-177"',
|
111
|
+
'--env NCCL_GPUDIRECTTCPX_RX_BINDINGS="eth1:22-35,126-139;eth2:22-35,126-139;eth3:74-87,178-191;eth4:74-87,178-191"',
|
112
|
+
'--env NCCL_GPUDIRECTTCPX_PROGRAM_FLOW_STEERING_WAIT_MICROS=50000',
|
113
|
+
'--env NCCL_GPUDIRECTTCPX_UNIX_CLIENT_PREFIX="/run/tcpx"',
|
114
|
+
'--env NCCL_GPUDIRECTTCPX_FORCE_ACK=0',
|
115
|
+
'--env NCCL_SOCKET_IFNAME=eth0',
|
106
116
|
]
|
107
117
|
|
108
118
|
PD_EXTREME_IOPS = 20000
|
sky/resources.py
CHANGED
@@ -98,7 +98,7 @@ class Resources:
|
|
98
98
|
"""
|
99
99
|
# If any fields changed, increment the version. For backward compatibility,
|
100
100
|
# modify the __setstate__ method to handle the old version.
|
101
|
-
_VERSION =
|
101
|
+
_VERSION = 26
|
102
102
|
|
103
103
|
def __init__(
|
104
104
|
self,
|
@@ -117,6 +117,7 @@ class Resources:
|
|
117
117
|
image_id: Union[Dict[Optional[str], str], str, None] = None,
|
118
118
|
disk_size: Optional[int] = None,
|
119
119
|
disk_tier: Optional[Union[str, resources_utils.DiskTier]] = None,
|
120
|
+
network_tier: Optional[Union[str, resources_utils.NetworkTier]] = None,
|
120
121
|
ports: Optional[Union[int, str, List[str], Tuple[str]]] = None,
|
121
122
|
labels: Optional[Dict[str, str]] = None,
|
122
123
|
autostop: Union[bool, int, Dict[str, Any], None] = None,
|
@@ -202,6 +203,8 @@ class Resources:
|
|
202
203
|
disk_size: the size of the OS disk in GiB.
|
203
204
|
disk_tier: the disk performance tier to use. If None, defaults to
|
204
205
|
``'medium'``.
|
206
|
+
network_tier: the network performance tier to use. If None, defaults to
|
207
|
+
``'standard'``.
|
205
208
|
ports: the ports to open on the instance.
|
206
209
|
labels: the labels to apply to the instance. These are useful for
|
207
210
|
assigning metadata that may be used by external tools.
|
@@ -309,6 +312,20 @@ class Resources:
|
|
309
312
|
disk_tier = resources_utils.DiskTier(disk_tier_str)
|
310
313
|
self._disk_tier = disk_tier
|
311
314
|
|
315
|
+
if isinstance(network_tier, str):
|
316
|
+
network_tier_str = str(network_tier).lower()
|
317
|
+
supported_tiers = [
|
318
|
+
tier.value for tier in resources_utils.NetworkTier
|
319
|
+
]
|
320
|
+
if network_tier_str not in supported_tiers:
|
321
|
+
with ux_utils.print_exception_no_traceback():
|
322
|
+
raise ValueError(
|
323
|
+
f'Invalid network_tier {network_tier_str!r}. '
|
324
|
+
f'Network tier must be one of '
|
325
|
+
f'{", ".join(supported_tiers)}.')
|
326
|
+
network_tier = resources_utils.NetworkTier(network_tier_str)
|
327
|
+
self._network_tier = network_tier
|
328
|
+
|
312
329
|
if ports is not None:
|
313
330
|
if isinstance(ports, tuple):
|
314
331
|
ports = list(ports)
|
@@ -418,6 +435,10 @@ class Resources:
|
|
418
435
|
if self.disk_tier is not None:
|
419
436
|
disk_tier = f', disk_tier={self.disk_tier.value}'
|
420
437
|
|
438
|
+
network_tier = ''
|
439
|
+
if self.network_tier is not None:
|
440
|
+
network_tier = f', network_tier={self.network_tier.value}'
|
441
|
+
|
421
442
|
disk_size = ''
|
422
443
|
if self.disk_size != _DEFAULT_DISK_SIZE_GB:
|
423
444
|
disk_size = f', disk_size={self.disk_size}'
|
@@ -437,7 +458,7 @@ class Resources:
|
|
437
458
|
hardware_str = (
|
438
459
|
f'{instance_type}{use_spot}'
|
439
460
|
f'{cpus}{memory}{accelerators}{accelerator_args}{image_id}'
|
440
|
-
f'{disk_tier}{disk_size}{ports}')
|
461
|
+
f'{disk_tier}{network_tier}{disk_size}{ports}')
|
441
462
|
# It may have leading ',' (for example, instance_type not set) or empty
|
442
463
|
# spaces. Remove them.
|
443
464
|
while hardware_str and hardware_str[0] in (',', ' '):
|
@@ -567,6 +588,10 @@ class Resources:
|
|
567
588
|
def disk_tier(self) -> Optional[resources_utils.DiskTier]:
|
568
589
|
return self._disk_tier
|
569
590
|
|
591
|
+
@property
|
592
|
+
def network_tier(self) -> Optional[resources_utils.NetworkTier]:
|
593
|
+
return self._network_tier
|
594
|
+
|
570
595
|
@property
|
571
596
|
def ports(self) -> Optional[List[str]]:
|
572
597
|
return self._ports
|
@@ -1223,7 +1248,6 @@ class Resources:
|
|
1223
1248
|
|
1224
1249
|
def _try_validate_volumes(self) -> None:
|
1225
1250
|
"""Try to validate the volumes attribute.
|
1226
|
-
|
1227
1251
|
Raises:
|
1228
1252
|
ValueError: if the attribute is invalid.
|
1229
1253
|
"""
|
@@ -1532,6 +1556,12 @@ class Resources:
|
|
1532
1556
|
if not (self.disk_tier <= other.disk_tier): # pylint: disable=superfluous-parens
|
1533
1557
|
return False
|
1534
1558
|
|
1559
|
+
if self.network_tier is not None:
|
1560
|
+
if other.network_tier is None:
|
1561
|
+
return False
|
1562
|
+
if not self.network_tier <= other.network_tier:
|
1563
|
+
return False
|
1564
|
+
|
1535
1565
|
if check_ports:
|
1536
1566
|
if self.ports is not None:
|
1537
1567
|
if other.ports is None:
|
@@ -1586,6 +1616,7 @@ class Resources:
|
|
1586
1616
|
not self._use_spot_specified,
|
1587
1617
|
self._disk_size == _DEFAULT_DISK_SIZE_GB,
|
1588
1618
|
self._disk_tier is None,
|
1619
|
+
self._network_tier is None,
|
1589
1620
|
self._image_id is None,
|
1590
1621
|
self._ports is None,
|
1591
1622
|
self._docker_login_config is None,
|
@@ -1629,6 +1660,7 @@ class Resources:
|
|
1629
1660
|
zone=override.pop('zone', self.zone),
|
1630
1661
|
image_id=override.pop('image_id', self.image_id),
|
1631
1662
|
disk_tier=override.pop('disk_tier', self.disk_tier),
|
1663
|
+
network_tier=override.pop('network_tier', self.network_tier),
|
1632
1664
|
ports=override.pop('ports', self.ports),
|
1633
1665
|
labels=override.pop('labels', self.labels),
|
1634
1666
|
autostop=override.pop('autostop', current_autostop_config),
|
@@ -1667,6 +1699,9 @@ class Resources:
|
|
1667
1699
|
if (self.disk_tier is not None and
|
1668
1700
|
self.disk_tier != resources_utils.DiskTier.BEST):
|
1669
1701
|
features.add(clouds.CloudImplementationFeatures.CUSTOM_DISK_TIER)
|
1702
|
+
if (self.network_tier is not None and
|
1703
|
+
self.network_tier == resources_utils.NetworkTier.BEST):
|
1704
|
+
features.add(clouds.CloudImplementationFeatures.CUSTOM_NETWORK_TIER)
|
1670
1705
|
if self.extract_docker_image() is not None:
|
1671
1706
|
features.add(clouds.CloudImplementationFeatures.DOCKER_IMAGE)
|
1672
1707
|
elif self.image_id is not None:
|
@@ -1845,6 +1880,7 @@ class Resources:
|
|
1845
1880
|
resources_fields['disk_size'] = config.pop('disk_size', None)
|
1846
1881
|
resources_fields['image_id'] = config.pop('image_id', None)
|
1847
1882
|
resources_fields['disk_tier'] = config.pop('disk_tier', None)
|
1883
|
+
resources_fields['network_tier'] = config.pop('network_tier', None)
|
1848
1884
|
resources_fields['ports'] = config.pop('ports', None)
|
1849
1885
|
resources_fields['labels'] = config.pop('labels', None)
|
1850
1886
|
resources_fields['autostop'] = config.pop('autostop', None)
|
@@ -1897,6 +1933,8 @@ class Resources:
|
|
1897
1933
|
add_if_not_none('image_id', self.image_id)
|
1898
1934
|
if self.disk_tier is not None:
|
1899
1935
|
config['disk_tier'] = self.disk_tier.value
|
1936
|
+
if self.network_tier is not None:
|
1937
|
+
config['network_tier'] = self.network_tier.value
|
1900
1938
|
add_if_not_none('ports', self.ports)
|
1901
1939
|
add_if_not_none('labels', self.labels)
|
1902
1940
|
if self.volumes is not None:
|
@@ -2081,6 +2119,9 @@ class Resources:
|
|
2081
2119
|
if isinstance(state.get('_cloud', None), clouds.Kubernetes):
|
2082
2120
|
_maybe_add_docker_prefix_to_image_id(state['_image_id'])
|
2083
2121
|
|
2122
|
+
if version < 26:
|
2123
|
+
self._network_tier = state.get('_network_tier', None)
|
2124
|
+
|
2084
2125
|
self.__dict__.update(state)
|
2085
2126
|
|
2086
2127
|
|
sky/server/constants.py
CHANGED
@@ -7,7 +7,7 @@ from sky.skylet import constants
|
|
7
7
|
# API server version, whenever there is a change in API server that requires a
|
8
8
|
# restart of the local API server or error out when the client does not match
|
9
9
|
# the server version.
|
10
|
-
API_VERSION = '
|
10
|
+
API_VERSION = '7'
|
11
11
|
|
12
12
|
# Prefix for API request names.
|
13
13
|
REQUEST_NAME_PREFIX = 'sky.'
|
sky/server/requests/payloads.py
CHANGED
@@ -697,6 +697,13 @@ available_node_types:
|
|
697
697
|
{{k8s_resource_key}}: {{accelerator_count}}
|
698
698
|
{% endif %}
|
699
699
|
{% endif %}
|
700
|
+
{% if k8s_ipc_lock_capability %}
|
701
|
+
securityContext:
|
702
|
+
capabilities:
|
703
|
+
add:
|
704
|
+
- IPC_LOCK
|
705
|
+
{% endif %}
|
706
|
+
|
700
707
|
|
701
708
|
{% if high_availability %}
|
702
709
|
pvc_spec:
|
sky/utils/resources_utils.py
CHANGED
@@ -50,6 +50,32 @@ class DiskTier(enum.Enum):
|
|
50
50
|
return types.index(self) <= types.index(other)
|
51
51
|
|
52
52
|
|
53
|
+
class NetworkTier(enum.Enum):
|
54
|
+
"""All network tiers supported by SkyPilot."""
|
55
|
+
STANDARD = 'standard'
|
56
|
+
BEST = 'best'
|
57
|
+
|
58
|
+
@classmethod
|
59
|
+
def supported_tiers(cls) -> List[str]:
|
60
|
+
return [tier.value for tier in cls]
|
61
|
+
|
62
|
+
@classmethod
|
63
|
+
def cli_help_message(cls) -> str:
|
64
|
+
return (
|
65
|
+
f'Network tier. Could be one of {", ".join(cls.supported_tiers())}'
|
66
|
+
f'. Default: {cls.STANDARD.value}')
|
67
|
+
|
68
|
+
@classmethod
|
69
|
+
def from_str(cls, tier: str) -> 'NetworkTier':
|
70
|
+
if tier not in cls.supported_tiers():
|
71
|
+
raise ValueError(f'Invalid network tier: {tier}')
|
72
|
+
return cls(tier)
|
73
|
+
|
74
|
+
def __le__(self, other: 'NetworkTier') -> bool:
|
75
|
+
types = list(NetworkTier)
|
76
|
+
return types.index(self) <= types.index(other)
|
77
|
+
|
78
|
+
|
53
79
|
class StorageType(enum.Enum):
|
54
80
|
"""Storage type."""
|
55
81
|
# Durable network storage, e.g. GCP persistent disks
|
sky/utils/schemas.py
CHANGED