skypilot-nightly 1.0.0.dev20250718__py3-none-any.whl → 1.0.0.dev20250720__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +4 -2
- sky/backends/backend_utils.py +23 -13
- sky/backends/cloud_vm_ray_backend.py +19 -11
- sky/catalog/__init__.py +3 -1
- sky/catalog/aws_catalog.py +8 -5
- sky/catalog/azure_catalog.py +8 -5
- sky/catalog/common.py +8 -2
- sky/catalog/cudo_catalog.py +5 -2
- sky/catalog/do_catalog.py +4 -1
- sky/catalog/fluidstack_catalog.py +5 -2
- sky/catalog/gcp_catalog.py +8 -5
- sky/catalog/hyperbolic_catalog.py +5 -2
- sky/catalog/ibm_catalog.py +8 -5
- sky/catalog/lambda_catalog.py +8 -5
- sky/catalog/nebius_catalog.py +8 -5
- sky/catalog/oci_catalog.py +8 -5
- sky/catalog/paperspace_catalog.py +4 -1
- sky/catalog/runpod_catalog.py +5 -2
- sky/catalog/scp_catalog.py +8 -5
- sky/catalog/vast_catalog.py +5 -2
- sky/catalog/vsphere_catalog.py +4 -1
- sky/client/cli/command.py +25 -2
- sky/client/sdk.py +9 -4
- sky/clouds/aws.py +12 -7
- sky/clouds/azure.py +12 -7
- sky/clouds/cloud.py +9 -8
- sky/clouds/cudo.py +13 -7
- sky/clouds/do.py +12 -7
- sky/clouds/fluidstack.py +11 -6
- sky/clouds/gcp.py +12 -7
- sky/clouds/hyperbolic.py +11 -6
- sky/clouds/ibm.py +11 -6
- sky/clouds/kubernetes.py +7 -3
- sky/clouds/lambda_cloud.py +11 -6
- sky/clouds/nebius.py +12 -7
- sky/clouds/oci.py +12 -7
- sky/clouds/paperspace.py +12 -7
- sky/clouds/runpod.py +12 -7
- sky/clouds/scp.py +11 -6
- sky/clouds/vast.py +12 -7
- sky/clouds/vsphere.py +11 -6
- sky/core.py +6 -1
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/chunks/{1043-734e57d2b27dfe5d.js → 1043-869d9c78bf5dd3df.js} +1 -1
- sky/dashboard/out/_next/static/chunks/1871-a821dcaaae2a3823.js +6 -0
- sky/dashboard/out/_next/static/chunks/{2641.35edc9ccaeaad9e3.js → 2641.5233e938f14e31a7.js} +1 -1
- sky/dashboard/out/_next/static/chunks/{4725.4c849b1e05c8e9ad.js → 4725.66125dcd9832aa5d.js} +1 -1
- sky/dashboard/out/_next/static/chunks/4869.c7c055a5c2814f33.js +16 -0
- sky/dashboard/out/_next/static/chunks/938-63fc419cb82ad9b3.js +1 -0
- sky/dashboard/out/_next/static/chunks/9470-8178183f3bae198f.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-507712f30cd3cec3.js +20 -0
- sky/dashboard/out/_next/static/chunks/webpack-26cdc782eed15a7d.js +1 -0
- sky/dashboard/out/_next/static/css/5122cb0a08486fd3.css +3 -0
- sky/dashboard/out/_next/static/{FUjweqdImyeYhMYFON-Se → pTQKG61ng32Zc7gsAROFJ}/_buildManifest.js +1 -1
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/global_user_state.py +13 -143
- sky/jobs/state.py +9 -88
- sky/jobs/utils.py +28 -13
- sky/schemas/db/README +4 -0
- sky/schemas/db/env.py +90 -0
- sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
- sky/schemas/db/script.py.mako +28 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
- sky/serve/client/sdk.py +6 -2
- sky/serve/controller.py +7 -3
- sky/serve/serve_state.py +1 -1
- sky/serve/serve_utils.py +171 -75
- sky/serve/server/core.py +17 -6
- sky/server/requests/payloads.py +2 -0
- sky/server/requests/requests.py +1 -1
- sky/setup_files/MANIFEST.in +2 -0
- sky/setup_files/alembic.ini +152 -0
- sky/setup_files/dependencies.py +1 -0
- sky/skylet/configs.py +1 -1
- sky/skylet/job_lib.py +1 -1
- sky/skypilot_config.py +32 -6
- sky/users/permission.py +1 -1
- sky/utils/common_utils.py +77 -0
- sky/utils/db/__init__.py +0 -0
- sky/utils/{db_utils.py → db/db_utils.py} +59 -0
- sky/utils/db/migration_utils.py +53 -0
- {skypilot_nightly-1.0.0.dev20250718.dist-info → skypilot_nightly-1.0.0.dev20250720.dist-info}/METADATA +2 -1
- {skypilot_nightly-1.0.0.dev20250718.dist-info → skypilot_nightly-1.0.0.dev20250720.dist-info}/RECORD +102 -93
- sky/dashboard/out/_next/static/chunks/1871-76491ac174a95278.js +0 -6
- sky/dashboard/out/_next/static/chunks/4869.bdd42f14b51d1d6f.js +0 -16
- sky/dashboard/out/_next/static/chunks/938-6a9ffdaa21eee969.js +0 -1
- sky/dashboard/out/_next/static/chunks/9470-b6f6a35283863a6f.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-771a40cde532309b.js +0 -20
- sky/dashboard/out/_next/static/chunks/webpack-6b0575ea521af4f3.js +0 -1
- sky/dashboard/out/_next/static/css/219887b94512388c.css +0 -3
- /sky/dashboard/out/_next/static/{FUjweqdImyeYhMYFON-Se → pTQKG61ng32Zc7gsAROFJ}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250718.dist-info → skypilot_nightly-1.0.0.dev20250720.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250718.dist-info → skypilot_nightly-1.0.0.dev20250720.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250718.dist-info → skypilot_nightly-1.0.0.dev20250720.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250718.dist-info → skypilot_nightly-1.0.0.dev20250720.dist-info}/top_level.txt +0 -0
sky/__init__.py
CHANGED
|
@@ -5,7 +5,7 @@ from typing import Optional
|
|
|
5
5
|
import urllib.request
|
|
6
6
|
|
|
7
7
|
# Replaced with the current commit when building the wheels.
|
|
8
|
-
_SKYPILOT_COMMIT_SHA = '
|
|
8
|
+
_SKYPILOT_COMMIT_SHA = '80235386457c47b3c178c1e1842e9b07ed75dc06'
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
def _get_git_commit():
|
|
@@ -35,7 +35,7 @@ def _get_git_commit():
|
|
|
35
35
|
|
|
36
36
|
|
|
37
37
|
__commit__ = _get_git_commit()
|
|
38
|
-
__version__ = '1.0.0.
|
|
38
|
+
__version__ = '1.0.0.dev20250720'
|
|
39
39
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
|
40
40
|
|
|
41
41
|
|
|
@@ -104,6 +104,7 @@ from sky.client.sdk import job_status
|
|
|
104
104
|
from sky.client.sdk import launch
|
|
105
105
|
from sky.client.sdk import optimize
|
|
106
106
|
from sky.client.sdk import queue
|
|
107
|
+
from sky.client.sdk import reload_config
|
|
107
108
|
from sky.client.sdk import start
|
|
108
109
|
from sky.client.sdk import status
|
|
109
110
|
from sky.client.sdk import stop
|
|
@@ -185,6 +186,7 @@ __all__ = [
|
|
|
185
186
|
'optimize',
|
|
186
187
|
'launch',
|
|
187
188
|
'exec',
|
|
189
|
+
'reload_config',
|
|
188
190
|
# core APIs
|
|
189
191
|
'status',
|
|
190
192
|
'start',
|
sky/backends/backend_utils.py
CHANGED
|
@@ -104,7 +104,7 @@ WAIT_HEAD_NODE_IP_MAX_ATTEMPTS = 3
|
|
|
104
104
|
# Fixed IP addresses are used to avoid DNS lookup blocking the check, for
|
|
105
105
|
# machine with no internet connection.
|
|
106
106
|
# Refer to: https://stackoverflow.com/questions/3764291/how-can-i-see-if-theres-an-available-and-active-network-connection-in-python # pylint: disable=line-too-long
|
|
107
|
-
_TEST_IP_LIST = ['https://
|
|
107
|
+
_TEST_IP_LIST = ['https://8.8.8.8', 'https://1.1.1.1']
|
|
108
108
|
|
|
109
109
|
# Allow each CPU thread take 2 tasks.
|
|
110
110
|
# Note: This value cannot be too small, otherwise OOM issue may occur.
|
|
@@ -1635,18 +1635,28 @@ def get_node_ips(cluster_yaml: str,
|
|
|
1635
1635
|
|
|
1636
1636
|
def check_network_connection():
|
|
1637
1637
|
# Tolerate 3 retries as it is observed that connections can fail.
|
|
1638
|
-
adapter = adapters.HTTPAdapter(max_retries=retry_lib.Retry(total=3))
|
|
1639
1638
|
http = requests.Session()
|
|
1640
|
-
http.mount('https://',
|
|
1641
|
-
http.mount('http://',
|
|
1642
|
-
|
|
1643
|
-
|
|
1644
|
-
|
|
1645
|
-
|
|
1646
|
-
|
|
1647
|
-
|
|
1648
|
-
|
|
1649
|
-
|
|
1639
|
+
http.mount('https://', adapters.HTTPAdapter())
|
|
1640
|
+
http.mount('http://', adapters.HTTPAdapter())
|
|
1641
|
+
|
|
1642
|
+
# Alternate between IPs on each retry
|
|
1643
|
+
max_retries = 3
|
|
1644
|
+
timeout = 0.5
|
|
1645
|
+
|
|
1646
|
+
for _ in range(max_retries):
|
|
1647
|
+
for ip in _TEST_IP_LIST:
|
|
1648
|
+
try:
|
|
1649
|
+
http.head(ip, timeout=timeout)
|
|
1650
|
+
return
|
|
1651
|
+
except (requests.Timeout, requests.exceptions.ConnectionError):
|
|
1652
|
+
continue
|
|
1653
|
+
|
|
1654
|
+
timeout *= 2 # Double the timeout for next retry
|
|
1655
|
+
|
|
1656
|
+
# If we get here, all IPs failed
|
|
1657
|
+
# Assume network connection is down
|
|
1658
|
+
raise exceptions.NetworkError('Could not refresh the cluster. '
|
|
1659
|
+
'Network seems down.')
|
|
1650
1660
|
|
|
1651
1661
|
|
|
1652
1662
|
@timeline.event
|
|
@@ -2610,7 +2620,7 @@ def is_controller_accessible(
|
|
|
2610
2620
|
need_connection_check):
|
|
2611
2621
|
# Check ssh connection if (1) controller is in INIT state, or (2) we failed to fetch the
|
|
2612
2622
|
# status, both of which can happen when controller's status lock is held by another `sky jobs launch` or
|
|
2613
|
-
# `sky serve up`. If we have
|
|
2623
|
+
# `sky serve up`. If we have controller's head_ip available and it is ssh-reachable,
|
|
2614
2624
|
# we can allow access to the controller.
|
|
2615
2625
|
ssh_credentials = ssh_credential_from_yaml(handle.cluster_yaml,
|
|
2616
2626
|
handle.docker_user,
|
|
@@ -3988,12 +3988,16 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
|
|
|
3988
3988
|
return dict(zip(job_ids, local_log_dirs))
|
|
3989
3989
|
|
|
3990
3990
|
@context_utils.cancellation_guard
|
|
3991
|
-
def tail_logs(
|
|
3992
|
-
|
|
3993
|
-
|
|
3994
|
-
|
|
3995
|
-
|
|
3996
|
-
|
|
3991
|
+
def tail_logs(
|
|
3992
|
+
self,
|
|
3993
|
+
handle: CloudVmRayResourceHandle,
|
|
3994
|
+
job_id: Optional[int],
|
|
3995
|
+
managed_job_id: Optional[int] = None,
|
|
3996
|
+
follow: bool = True,
|
|
3997
|
+
tail: int = 0,
|
|
3998
|
+
require_outputs: bool = False,
|
|
3999
|
+
stream_logs: bool = True,
|
|
4000
|
+
process_stream: bool = False) -> Union[int, Tuple[int, str, str]]:
|
|
3997
4001
|
"""Tail the logs of a job.
|
|
3998
4002
|
|
|
3999
4003
|
Args:
|
|
@@ -4003,6 +4007,9 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
|
|
|
4003
4007
|
follow: Whether to follow the logs.
|
|
4004
4008
|
tail: The number of lines to display from the end of the
|
|
4005
4009
|
log file. If 0, print all lines.
|
|
4010
|
+
require_outputs: Whether to return the stdout/stderr of the command.
|
|
4011
|
+
stream_logs: Whether to stream the logs to stdout/stderr.
|
|
4012
|
+
process_stream: Whether to process the stream.
|
|
4006
4013
|
|
|
4007
4014
|
Returns:
|
|
4008
4015
|
The exit code of the tail command. Returns code 100 if the job has
|
|
@@ -4022,18 +4029,19 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
|
|
|
4022
4029
|
signal.signal(signal.SIGINT, backend_utils.interrupt_handler)
|
|
4023
4030
|
signal.signal(signal.SIGTSTP, backend_utils.stop_handler)
|
|
4024
4031
|
try:
|
|
4025
|
-
|
|
4032
|
+
final = self.run_on_head(
|
|
4026
4033
|
handle,
|
|
4027
4034
|
code,
|
|
4028
|
-
stream_logs=
|
|
4029
|
-
process_stream=
|
|
4035
|
+
stream_logs=stream_logs,
|
|
4036
|
+
process_stream=process_stream,
|
|
4037
|
+
require_outputs=require_outputs,
|
|
4030
4038
|
# Allocate a pseudo-terminal to disable output buffering.
|
|
4031
4039
|
# Otherwise, there may be 5 minutes delay in logging.
|
|
4032
4040
|
ssh_mode=command_runner.SshMode.INTERACTIVE,
|
|
4033
4041
|
)
|
|
4034
4042
|
except SystemExit as e:
|
|
4035
|
-
|
|
4036
|
-
return
|
|
4043
|
+
final = e.code
|
|
4044
|
+
return final
|
|
4037
4045
|
|
|
4038
4046
|
def tail_managed_job_logs(self,
|
|
4039
4047
|
handle: CloudVmRayResourceHandle,
|
sky/catalog/__init__.py
CHANGED
|
@@ -221,6 +221,8 @@ def get_default_instance_type(cpus: Optional[str] = None,
|
|
|
221
221
|
memory: Optional[str] = None,
|
|
222
222
|
disk_tier: Optional[
|
|
223
223
|
resources_utils.DiskTier] = None,
|
|
224
|
+
region: Optional[str] = None,
|
|
225
|
+
zone: Optional[str] = None,
|
|
224
226
|
clouds: CloudFilter = None) -> Optional[str]:
|
|
225
227
|
"""Returns the cloud's default instance type for given #vCPUs and memory.
|
|
226
228
|
|
|
@@ -234,7 +236,7 @@ def get_default_instance_type(cpus: Optional[str] = None,
|
|
|
234
236
|
the given CPU and memory requirement.
|
|
235
237
|
"""
|
|
236
238
|
return _map_clouds_catalog(clouds, 'get_default_instance_type', cpus,
|
|
237
|
-
memory, disk_tier)
|
|
239
|
+
memory, disk_tier, region, zone)
|
|
238
240
|
|
|
239
241
|
|
|
240
242
|
def get_accelerators_from_instance_type(
|
sky/catalog/aws_catalog.py
CHANGED
|
@@ -230,10 +230,12 @@ def get_vcpus_mem_from_instance_type(
|
|
|
230
230
|
instance_type)
|
|
231
231
|
|
|
232
232
|
|
|
233
|
-
def get_default_instance_type(
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
233
|
+
def get_default_instance_type(cpus: Optional[str] = None,
|
|
234
|
+
memory: Optional[str] = None,
|
|
235
|
+
disk_tier: Optional[
|
|
236
|
+
resources_utils.DiskTier] = None,
|
|
237
|
+
region: Optional[str] = None,
|
|
238
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
237
239
|
del disk_tier # unused
|
|
238
240
|
if cpus is None and memory is None:
|
|
239
241
|
cpus = f'{_DEFAULT_NUM_VCPUS}+'
|
|
@@ -247,7 +249,8 @@ def get_default_instance_type(
|
|
|
247
249
|
df = _get_df()
|
|
248
250
|
df = df[df['InstanceType'].str.startswith(instance_type_prefix)]
|
|
249
251
|
return common.get_instance_type_for_cpus_mem_impl(df, cpus,
|
|
250
|
-
memory_gb_or_ratio
|
|
252
|
+
memory_gb_or_ratio,
|
|
253
|
+
region, zone)
|
|
251
254
|
|
|
252
255
|
|
|
253
256
|
def get_accelerators_from_instance_type(
|
sky/catalog/azure_catalog.py
CHANGED
|
@@ -114,10 +114,12 @@ def _get_instance_family(instance_type: str) -> str:
|
|
|
114
114
|
return instance_family
|
|
115
115
|
|
|
116
116
|
|
|
117
|
-
def get_default_instance_type(
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
117
|
+
def get_default_instance_type(cpus: Optional[str] = None,
|
|
118
|
+
memory: Optional[str] = None,
|
|
119
|
+
disk_tier: Optional[
|
|
120
|
+
resources_utils.DiskTier] = None,
|
|
121
|
+
region: Optional[str] = None,
|
|
122
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
121
123
|
if cpus is None and memory is None:
|
|
122
124
|
cpus = f'{_DEFAULT_NUM_VCPUS}+'
|
|
123
125
|
if memory is None:
|
|
@@ -133,7 +135,8 @@ def get_default_instance_type(
|
|
|
133
135
|
|
|
134
136
|
df = df.loc[df['InstanceType'].apply(_filter_disk_type)]
|
|
135
137
|
return common.get_instance_type_for_cpus_mem_impl(df, cpus,
|
|
136
|
-
memory_gb_or_ratio
|
|
138
|
+
memory_gb_or_ratio,
|
|
139
|
+
region, zone)
|
|
137
140
|
|
|
138
141
|
|
|
139
142
|
def get_accelerators_from_instance_type(
|
sky/catalog/common.py
CHANGED
|
@@ -476,8 +476,11 @@ def _filter_region_zone(df: 'pd.DataFrame', region: Optional[str],
|
|
|
476
476
|
|
|
477
477
|
|
|
478
478
|
def get_instance_type_for_cpus_mem_impl(
|
|
479
|
-
df: 'pd.DataFrame',
|
|
480
|
-
|
|
479
|
+
df: 'pd.DataFrame',
|
|
480
|
+
cpus: Optional[str],
|
|
481
|
+
memory_gb_or_ratio: Optional[str],
|
|
482
|
+
region: Optional[str] = None,
|
|
483
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
481
484
|
"""Returns the cheapest instance type that satisfies the requirements.
|
|
482
485
|
|
|
483
486
|
Args:
|
|
@@ -490,7 +493,10 @@ def get_instance_type_for_cpus_mem_impl(
|
|
|
490
493
|
returned instance type should have at least the given memory size.
|
|
491
494
|
If the string ends with "x", then the returned instance type should
|
|
492
495
|
have at least the given number of vCPUs times the given ratio.
|
|
496
|
+
region: The region to filter by.
|
|
497
|
+
zone: The zone to filter by.
|
|
493
498
|
"""
|
|
499
|
+
df = _filter_region_zone(df, region, zone)
|
|
494
500
|
df = _filter_with_cpus(df, cpus)
|
|
495
501
|
df = _filter_with_mem(df, memory_gb_or_ratio)
|
|
496
502
|
if df.empty:
|
sky/catalog/cudo_catalog.py
CHANGED
|
@@ -51,7 +51,9 @@ def get_vcpus_mem_from_instance_type(
|
|
|
51
51
|
|
|
52
52
|
def get_default_instance_type(cpus: Optional[str] = None,
|
|
53
53
|
memory: Optional[str] = None,
|
|
54
|
-
disk_tier: Optional[str] = None
|
|
54
|
+
disk_tier: Optional[str] = None,
|
|
55
|
+
region: Optional[str] = None,
|
|
56
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
55
57
|
del disk_tier
|
|
56
58
|
# NOTE: After expanding catalog to multiple entries, you may
|
|
57
59
|
# want to specify a default instance type or family.
|
|
@@ -62,7 +64,8 @@ def get_default_instance_type(cpus: Optional[str] = None,
|
|
|
62
64
|
if memory is None:
|
|
63
65
|
memory_gb_or_ratio = f'{_DEFAULT_MEMORY_CPU_RATIO}x'
|
|
64
66
|
return common.get_instance_type_for_cpus_mem_impl(_df, cpus,
|
|
65
|
-
memory_gb_or_ratio
|
|
67
|
+
memory_gb_or_ratio,
|
|
68
|
+
region, zone)
|
|
66
69
|
|
|
67
70
|
|
|
68
71
|
def get_accelerators_from_instance_type(
|
sky/catalog/do_catalog.py
CHANGED
|
@@ -52,11 +52,14 @@ def get_default_instance_type(
|
|
|
52
52
|
cpus: Optional[str] = None,
|
|
53
53
|
memory: Optional[str] = None,
|
|
54
54
|
disk_tier: Optional[str] = None,
|
|
55
|
+
region: Optional[str] = None,
|
|
56
|
+
zone: Optional[str] = None,
|
|
55
57
|
) -> Optional[str]:
|
|
56
58
|
# NOTE: After expanding catalog to multiple entries, you may
|
|
57
59
|
# want to specify a default instance type or family.
|
|
58
60
|
del disk_tier # unused
|
|
59
|
-
return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory
|
|
61
|
+
return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory, region,
|
|
62
|
+
zone)
|
|
60
63
|
|
|
61
64
|
|
|
62
65
|
def get_accelerators_from_instance_type(
|
|
@@ -52,7 +52,9 @@ def get_vcpus_mem_from_instance_type(
|
|
|
52
52
|
|
|
53
53
|
def get_default_instance_type(cpus: Optional[str] = None,
|
|
54
54
|
memory: Optional[str] = None,
|
|
55
|
-
disk_tier: Optional[str] = None
|
|
55
|
+
disk_tier: Optional[str] = None,
|
|
56
|
+
region: Optional[str] = None,
|
|
57
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
56
58
|
del disk_tier # unused
|
|
57
59
|
if cpus is None and memory is None:
|
|
58
60
|
cpus = f'{_DEFAULT_NUM_VCPUS}+'
|
|
@@ -61,7 +63,8 @@ def get_default_instance_type(cpus: Optional[str] = None,
|
|
|
61
63
|
else:
|
|
62
64
|
memory_gb_or_ratio = memory
|
|
63
65
|
return common.get_instance_type_for_cpus_mem_impl(_df, cpus,
|
|
64
|
-
memory_gb_or_ratio
|
|
66
|
+
memory_gb_or_ratio,
|
|
67
|
+
region, zone)
|
|
65
68
|
|
|
66
69
|
|
|
67
70
|
def get_accelerators_from_instance_type(
|
sky/catalog/gcp_catalog.py
CHANGED
|
@@ -279,10 +279,12 @@ def get_vcpus_mem_from_instance_type(
|
|
|
279
279
|
return common.get_vcpus_mem_from_instance_type_impl(_df, instance_type)
|
|
280
280
|
|
|
281
281
|
|
|
282
|
-
def get_default_instance_type(
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
282
|
+
def get_default_instance_type(cpus: Optional[str] = None,
|
|
283
|
+
memory: Optional[str] = None,
|
|
284
|
+
disk_tier: Optional[
|
|
285
|
+
resources_utils.DiskTier] = None,
|
|
286
|
+
region: Optional[str] = None,
|
|
287
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
286
288
|
if cpus is None and memory is None:
|
|
287
289
|
cpus = f'{_DEFAULT_NUM_VCPUS}+'
|
|
288
290
|
if memory is None:
|
|
@@ -300,7 +302,8 @@ def get_default_instance_type(
|
|
|
300
302
|
|
|
301
303
|
df = df.loc[df['InstanceType'].apply(_filter_disk_type)]
|
|
302
304
|
return common.get_instance_type_for_cpus_mem_impl(df, cpus,
|
|
303
|
-
memory_gb_or_ratio
|
|
305
|
+
memory_gb_or_ratio,
|
|
306
|
+
region, zone)
|
|
304
307
|
|
|
305
308
|
|
|
306
309
|
def get_accelerators_from_instance_type(
|
|
@@ -67,9 +67,12 @@ def get_zone_shell_cmd() -> Optional[str]:
|
|
|
67
67
|
|
|
68
68
|
def get_default_instance_type(cpus: Optional[str] = None,
|
|
69
69
|
memory: Optional[str] = None,
|
|
70
|
-
disk_tier: Optional[str] = None
|
|
70
|
+
disk_tier: Optional[str] = None,
|
|
71
|
+
region: Optional[str] = None,
|
|
72
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
71
73
|
del disk_tier # Unused
|
|
72
|
-
return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory
|
|
74
|
+
return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory, region,
|
|
75
|
+
zone)
|
|
73
76
|
|
|
74
77
|
|
|
75
78
|
def get_instance_type_for_accelerator(
|
sky/catalog/ibm_catalog.py
CHANGED
|
@@ -92,10 +92,12 @@ def list_accelerators(
|
|
|
92
92
|
case_sensitive, all_regions)
|
|
93
93
|
|
|
94
94
|
|
|
95
|
-
def get_default_instance_type(
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
95
|
+
def get_default_instance_type(cpus: Optional[str] = None,
|
|
96
|
+
memory: Optional[str] = None,
|
|
97
|
+
disk_tier: Optional[
|
|
98
|
+
resources_utils.DiskTier] = None,
|
|
99
|
+
region: Optional[str] = None,
|
|
100
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
99
101
|
del disk_tier # unused
|
|
100
102
|
if cpus is None and memory is None:
|
|
101
103
|
cpus = f'{_DEFAULT_NUM_VCPUS}+'
|
|
@@ -107,7 +109,8 @@ def get_default_instance_type(
|
|
|
107
109
|
instance_type_prefix = f'{_DEFAULT_INSTANCE_FAMILY}-'
|
|
108
110
|
df = _df[_df['InstanceType'].str.startswith(instance_type_prefix)]
|
|
109
111
|
return common.get_instance_type_for_cpus_mem_impl(df, cpus,
|
|
110
|
-
memory_gb_or_ratio
|
|
112
|
+
memory_gb_or_ratio,
|
|
113
|
+
region, zone)
|
|
111
114
|
|
|
112
115
|
|
|
113
116
|
def is_image_tag_valid(tag: str, region: Optional[str]) -> bool:
|
sky/catalog/lambda_catalog.py
CHANGED
|
@@ -56,10 +56,12 @@ def get_vcpus_mem_from_instance_type(
|
|
|
56
56
|
return common.get_vcpus_mem_from_instance_type_impl(_df, instance_type)
|
|
57
57
|
|
|
58
58
|
|
|
59
|
-
def get_default_instance_type(
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
59
|
+
def get_default_instance_type(cpus: Optional[str] = None,
|
|
60
|
+
memory: Optional[str] = None,
|
|
61
|
+
disk_tier: Optional[
|
|
62
|
+
resources_utils.DiskTier] = None,
|
|
63
|
+
region: Optional[str] = None,
|
|
64
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
63
65
|
del disk_tier # unused
|
|
64
66
|
if cpus is None and memory is None:
|
|
65
67
|
cpus = f'{_DEFAULT_NUM_VCPUS}+'
|
|
@@ -68,7 +70,8 @@ def get_default_instance_type(
|
|
|
68
70
|
else:
|
|
69
71
|
memory_gb_or_ratio = memory
|
|
70
72
|
return common.get_instance_type_for_cpus_mem_impl(_df, cpus,
|
|
71
|
-
memory_gb_or_ratio
|
|
73
|
+
memory_gb_or_ratio,
|
|
74
|
+
region, zone)
|
|
72
75
|
|
|
73
76
|
|
|
74
77
|
def get_accelerators_from_instance_type(
|
sky/catalog/nebius_catalog.py
CHANGED
|
@@ -51,12 +51,15 @@ def get_vcpus_mem_from_instance_type(
|
|
|
51
51
|
return common.get_vcpus_mem_from_instance_type_impl(_df, instance_type)
|
|
52
52
|
|
|
53
53
|
|
|
54
|
-
def get_default_instance_type(
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
54
|
+
def get_default_instance_type(cpus: Optional[str] = None,
|
|
55
|
+
memory: Optional[str] = None,
|
|
56
|
+
disk_tier: Optional[
|
|
57
|
+
resources_utils.DiskTier] = None,
|
|
58
|
+
region: Optional[str] = None,
|
|
59
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
58
60
|
del disk_tier # unused
|
|
59
|
-
return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory
|
|
61
|
+
return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory, region,
|
|
62
|
+
zone)
|
|
60
63
|
|
|
61
64
|
|
|
62
65
|
def get_accelerators_from_instance_type(
|
sky/catalog/oci_catalog.py
CHANGED
|
@@ -101,10 +101,12 @@ def get_hourly_cost(instance_type: str,
|
|
|
101
101
|
region, zone)
|
|
102
102
|
|
|
103
103
|
|
|
104
|
-
def get_default_instance_type(
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
104
|
+
def get_default_instance_type(cpus: Optional[str] = None,
|
|
105
|
+
memory: Optional[str] = None,
|
|
106
|
+
disk_tier: Optional[
|
|
107
|
+
resources_utils.DiskTier] = None,
|
|
108
|
+
region: Optional[str] = None,
|
|
109
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
108
110
|
if cpus is None:
|
|
109
111
|
cpus = f'{oci_utils.oci_config.DEFAULT_NUM_VCPUS}+'
|
|
110
112
|
|
|
@@ -127,7 +129,8 @@ def get_default_instance_type(
|
|
|
127
129
|
|
|
128
130
|
logger.debug(f'# get_default_instance_type: {df}')
|
|
129
131
|
return common.get_instance_type_for_cpus_mem_impl(df, cpus,
|
|
130
|
-
memory_gb_or_ratio
|
|
132
|
+
memory_gb_or_ratio,
|
|
133
|
+
region, zone)
|
|
131
134
|
|
|
132
135
|
|
|
133
136
|
def get_accelerators_from_instance_type(
|
|
@@ -52,11 +52,14 @@ def get_default_instance_type(
|
|
|
52
52
|
cpus: Optional[str] = None,
|
|
53
53
|
memory: Optional[str] = None,
|
|
54
54
|
disk_tier: Optional[str] = None,
|
|
55
|
+
region: Optional[str] = None,
|
|
56
|
+
zone: Optional[str] = None,
|
|
55
57
|
) -> Optional[str]:
|
|
56
58
|
# NOTE: After expanding catalog to multiple entries, you may
|
|
57
59
|
# want to specify a default instance type or family.
|
|
58
60
|
del disk_tier # unused
|
|
59
|
-
return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory
|
|
61
|
+
return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory, region,
|
|
62
|
+
zone)
|
|
60
63
|
|
|
61
64
|
|
|
62
65
|
def get_accelerators_from_instance_type(
|
sky/catalog/runpod_catalog.py
CHANGED
|
@@ -41,11 +41,14 @@ def get_vcpus_mem_from_instance_type(
|
|
|
41
41
|
|
|
42
42
|
def get_default_instance_type(cpus: Optional[str] = None,
|
|
43
43
|
memory: Optional[str] = None,
|
|
44
|
-
disk_tier: Optional[str] = None
|
|
44
|
+
disk_tier: Optional[str] = None,
|
|
45
|
+
region: Optional[str] = None,
|
|
46
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
45
47
|
del disk_tier # RunPod does not support disk tiers.
|
|
46
48
|
# NOTE: After expanding catalog to multiple entries, you may
|
|
47
49
|
# want to specify a default instance type or family.
|
|
48
|
-
return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory
|
|
50
|
+
return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory, region,
|
|
51
|
+
zone)
|
|
49
52
|
|
|
50
53
|
|
|
51
54
|
def get_accelerators_from_instance_type(
|
sky/catalog/scp_catalog.py
CHANGED
|
@@ -51,10 +51,12 @@ def get_vcpus_mem_from_instance_type(
|
|
|
51
51
|
return common.get_vcpus_mem_from_instance_type_impl(_df, instance_type)
|
|
52
52
|
|
|
53
53
|
|
|
54
|
-
def get_default_instance_type(
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
54
|
+
def get_default_instance_type(cpus: Optional[str] = None,
|
|
55
|
+
memory: Optional[str] = None,
|
|
56
|
+
disk_tier: Optional[
|
|
57
|
+
resources_utils.DiskTier] = None,
|
|
58
|
+
region: Optional[str] = None,
|
|
59
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
58
60
|
del disk_tier # unused
|
|
59
61
|
if cpus is None and memory is None:
|
|
60
62
|
cpus = str(_DEFAULT_NUM_VCPUS)
|
|
@@ -63,7 +65,8 @@ def get_default_instance_type(
|
|
|
63
65
|
else:
|
|
64
66
|
memory_gb_or_ratio = memory
|
|
65
67
|
return common.get_instance_type_for_cpus_mem_impl(_df, cpus,
|
|
66
|
-
memory_gb_or_ratio
|
|
68
|
+
memory_gb_or_ratio,
|
|
69
|
+
region, zone)
|
|
67
70
|
|
|
68
71
|
|
|
69
72
|
def get_accelerators_from_instance_type(
|
sky/catalog/vast_catalog.py
CHANGED
|
@@ -48,11 +48,14 @@ def get_vcpus_mem_from_instance_type(
|
|
|
48
48
|
|
|
49
49
|
def get_default_instance_type(cpus: Optional[str] = None,
|
|
50
50
|
memory: Optional[str] = None,
|
|
51
|
-
disk_tier: Optional[str] = None
|
|
51
|
+
disk_tier: Optional[str] = None,
|
|
52
|
+
region: Optional[str] = None,
|
|
53
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
52
54
|
del disk_tier
|
|
53
55
|
# NOTE: After expanding catalog to multiple entries, you may
|
|
54
56
|
# want to specify a default instance type or family.
|
|
55
|
-
return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory
|
|
57
|
+
return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory, region,
|
|
58
|
+
zone)
|
|
56
59
|
|
|
57
60
|
|
|
58
61
|
def get_accelerators_from_instance_type(
|
sky/catalog/vsphere_catalog.py
CHANGED
|
@@ -72,6 +72,8 @@ def get_default_instance_type(
|
|
|
72
72
|
cpus: Optional[str] = None,
|
|
73
73
|
memory: Optional[str] = None,
|
|
74
74
|
disk_tier: Optional[str] = None,
|
|
75
|
+
region: Optional[str] = None,
|
|
76
|
+
zone: Optional[str] = None,
|
|
75
77
|
) -> Optional[str]:
|
|
76
78
|
del disk_tier # unused
|
|
77
79
|
if cpus is None and memory is None:
|
|
@@ -81,7 +83,8 @@ def get_default_instance_type(
|
|
|
81
83
|
else:
|
|
82
84
|
memory_gb_or_ratio = memory
|
|
83
85
|
return common.get_instance_type_for_cpus_mem_impl(_get_df(), cpus,
|
|
84
|
-
memory_gb_or_ratio
|
|
86
|
+
memory_gb_or_ratio,
|
|
87
|
+
region, zone)
|
|
85
88
|
|
|
86
89
|
|
|
87
90
|
def get_accelerators_from_instance_type(
|
sky/client/cli/command.py
CHANGED
|
@@ -5114,6 +5114,12 @@ def serve_down(
|
|
|
5114
5114
|
default=False,
|
|
5115
5115
|
help='Sync down logs to the local machine. Can be combined with '
|
|
5116
5116
|
'--controller, --load-balancer, or a replica ID to narrow scope.')
|
|
5117
|
+
@click.option(
|
|
5118
|
+
'--tail',
|
|
5119
|
+
default=None,
|
|
5120
|
+
type=int,
|
|
5121
|
+
help='The number of lines to display from the end of the log file. '
|
|
5122
|
+
'Default is None, which means print all lines.')
|
|
5117
5123
|
@click.argument('service_name', required=True, type=str)
|
|
5118
5124
|
@click.argument('replica_ids', required=False, type=int, nargs=-1)
|
|
5119
5125
|
@usage_lib.entrypoint
|
|
@@ -5126,6 +5132,7 @@ def serve_logs(
|
|
|
5126
5132
|
load_balancer: bool,
|
|
5127
5133
|
replica_ids: Tuple[int, ...],
|
|
5128
5134
|
sync_down: bool,
|
|
5135
|
+
tail: Optional[int],
|
|
5129
5136
|
):
|
|
5130
5137
|
"""Tail or sync down logs of a service.
|
|
5131
5138
|
|
|
@@ -5145,12 +5152,26 @@ def serve_logs(
|
|
|
5145
5152
|
# Tail the logs of replica 1
|
|
5146
5153
|
sky serve logs [SERVICE_NAME] 1
|
|
5147
5154
|
\b
|
|
5155
|
+
# Show the last 100 lines of the controller logs
|
|
5156
|
+
sky serve logs --controller --tail 100 [SERVICE_NAME]
|
|
5157
|
+
\b
|
|
5148
5158
|
# Sync down all logs of the service (controller, LB, all replicas)
|
|
5149
5159
|
sky serve logs [SERVICE_NAME] --sync-down
|
|
5150
5160
|
\b
|
|
5151
5161
|
# Sync down controller logs and logs for replicas 1 and 3
|
|
5152
5162
|
sky serve logs [SERVICE_NAME] 1 3 --controller --sync-down
|
|
5153
5163
|
"""
|
|
5164
|
+
if tail is not None:
|
|
5165
|
+
if tail < 0:
|
|
5166
|
+
raise click.UsageError('--tail must be a non-negative integer.')
|
|
5167
|
+
# TODO(arda): We could add ability to tail and follow logs together.
|
|
5168
|
+
if follow:
|
|
5169
|
+
follow = False
|
|
5170
|
+
logger.warning(
|
|
5171
|
+
f'{colorama.Fore.YELLOW}'
|
|
5172
|
+
'--tail and --follow cannot be used together. '
|
|
5173
|
+
f'Changed the mode to --no-follow.{colorama.Style.RESET_ALL}')
|
|
5174
|
+
|
|
5154
5175
|
chosen_components: Set[serve_lib.ServiceComponent] = set()
|
|
5155
5176
|
if controller:
|
|
5156
5177
|
chosen_components.add(serve_lib.ServiceComponent.CONTROLLER)
|
|
@@ -5185,7 +5206,8 @@ def serve_logs(
|
|
|
5185
5206
|
serve_lib.sync_down_logs(service_name,
|
|
5186
5207
|
local_dir=str(log_dir),
|
|
5187
5208
|
targets=targets_to_sync,
|
|
5188
|
-
replica_ids=list(replica_ids)
|
|
5209
|
+
replica_ids=list(replica_ids),
|
|
5210
|
+
tail=tail)
|
|
5189
5211
|
style = colorama.Style
|
|
5190
5212
|
fore = colorama.Fore
|
|
5191
5213
|
logger.info(f'{fore.CYAN}Service {service_name} logs: '
|
|
@@ -5227,7 +5249,8 @@ def serve_logs(
|
|
|
5227
5249
|
serve_lib.tail_logs(service_name,
|
|
5228
5250
|
target=target_component,
|
|
5229
5251
|
replica_id=target_replica_id,
|
|
5230
|
-
follow=follow
|
|
5252
|
+
follow=follow,
|
|
5253
|
+
tail=tail)
|
|
5231
5254
|
except exceptions.ClusterNotUpError:
|
|
5232
5255
|
with ux_utils.print_exception_no_traceback():
|
|
5233
5256
|
raise
|