skypilot-nightly 1.0.0.dev20250413__py3-none-any.whl → 1.0.0.dev20250421__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/adaptors/kubernetes.py +7 -0
- sky/authentication.py +2 -2
- sky/backends/backend_utils.py +31 -3
- sky/backends/cloud_vm_ray_backend.py +22 -29
- sky/backends/wheel_utils.py +9 -0
- sky/check.py +1 -1
- sky/cli.py +253 -74
- sky/client/cli.py +253 -74
- sky/client/common.py +10 -3
- sky/client/sdk.py +11 -8
- sky/clouds/aws.py +2 -2
- sky/clouds/kubernetes.py +0 -8
- sky/clouds/oci.py +1 -1
- sky/core.py +17 -11
- sky/dashboard/out/404.html +1 -0
- sky/dashboard/out/_next/static/chunks/236-d437cf66e68a6f64.js +6 -0
- sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +15 -0
- sky/dashboard/out/_next/static/chunks/37-72fdc8f71d6e4784.js +6 -0
- sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +59 -0
- sky/dashboard/out/_next/static/chunks/845-2ea1cc63ba1f4067.js +1 -0
- sky/dashboard/out/_next/static/chunks/979-7cd0778078b9cfad.js +1 -0
- sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +1 -0
- sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +33 -0
- sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +1 -0
- sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-3001e84c61acddfb.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-b09f7fbf6d5d74f6.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-b57ec043f09c5813.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-ef2e0e91a9222cac.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +1 -0
- sky/dashboard/out/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +1 -0
- sky/dashboard/out/_next/static/css/f3538cd90cfca88c.css +3 -0
- sky/dashboard/out/_next/static/mS9YfLA5hhsJMeBj9W8J7/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/mS9YfLA5hhsJMeBj9W8J7/_ssgManifest.js +1 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -0
- sky/dashboard/out/clusters/[cluster].html +1 -0
- sky/dashboard/out/clusters.html +1 -0
- sky/dashboard/out/favicon.ico +0 -0
- sky/dashboard/out/index.html +1 -0
- sky/dashboard/out/jobs/[job].html +1 -0
- sky/dashboard/out/jobs.html +1 -0
- sky/dashboard/out/skypilot.svg +15 -0
- sky/dashboard/out/videos/cursor-small.mp4 +0 -0
- sky/data/data_transfer.py +2 -1
- sky/data/storage.py +24 -14
- sky/exceptions.py +5 -0
- sky/jobs/constants.py +8 -1
- sky/jobs/server/core.py +12 -8
- sky/models.py +28 -0
- sky/optimizer.py +7 -9
- sky/provision/kubernetes/config.py +1 -1
- sky/provision/kubernetes/instance.py +16 -14
- sky/provision/kubernetes/network_utils.py +1 -1
- sky/provision/kubernetes/utils.py +50 -22
- sky/provision/provisioner.py +2 -1
- sky/resources.py +56 -2
- sky/serve/__init__.py +2 -0
- sky/serve/autoscalers.py +6 -2
- sky/serve/client/sdk.py +61 -0
- sky/serve/constants.py +6 -0
- sky/serve/load_balancing_policies.py +0 -4
- sky/serve/replica_managers.py +6 -8
- sky/serve/serve_state.py +0 -6
- sky/serve/serve_utils.py +33 -1
- sky/serve/server/core.py +192 -7
- sky/serve/server/server.py +28 -0
- sky/server/common.py +152 -47
- sky/server/constants.py +7 -1
- sky/server/requests/executor.py +4 -0
- sky/server/requests/payloads.py +12 -15
- sky/server/requests/serializers/decoders.py +2 -5
- sky/server/requests/serializers/encoders.py +2 -5
- sky/server/server.py +44 -1
- sky/setup_files/MANIFEST.in +1 -0
- sky/setup_files/dependencies.py +1 -0
- sky/sky_logging.py +12 -2
- sky/skylet/constants.py +5 -7
- sky/skylet/job_lib.py +3 -3
- sky/skypilot_config.py +225 -84
- sky/templates/kubernetes-ray.yml.j2 +7 -3
- sky/utils/cli_utils/status_utils.py +12 -5
- sky/utils/config_utils.py +39 -15
- sky/utils/controller_utils.py +44 -7
- sky/utils/kubernetes/generate_kubeconfig.sh +2 -2
- sky/utils/kubernetes/gpu_labeler.py +99 -16
- sky/utils/schemas.py +24 -0
- {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250421.dist-info}/METADATA +2 -1
- {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250421.dist-info}/RECORD +97 -64
- {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250421.dist-info}/WHEEL +1 -1
- {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250421.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250421.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250421.dist-info}/top_level.txt +0 -0
sky/__init__.py
CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
|
|
5
5
|
import urllib.request
|
6
6
|
|
7
7
|
# Replaced with the current commit when building the wheels.
|
8
|
-
_SKYPILOT_COMMIT_SHA = '
|
8
|
+
_SKYPILOT_COMMIT_SHA = 'd37ba6f11593127305c73cb1b1a03dc6da8452b7'
|
9
9
|
|
10
10
|
|
11
11
|
def _get_git_commit():
|
@@ -35,7 +35,7 @@ def _get_git_commit():
|
|
35
35
|
|
36
36
|
|
37
37
|
__commit__ = _get_git_commit()
|
38
|
-
__version__ = '1.0.0.
|
38
|
+
__version__ = '1.0.0.dev20250421'
|
39
39
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
40
40
|
|
41
41
|
|
sky/adaptors/kubernetes.py
CHANGED
@@ -160,6 +160,13 @@ def api_client(context: Optional[str] = None):
|
|
160
160
|
return kubernetes.client.ApiClient()
|
161
161
|
|
162
162
|
|
163
|
+
@_api_logging_decorator('urllib3', logging.ERROR)
|
164
|
+
@annotations.lru_cache(scope='request')
|
165
|
+
def watch(context: Optional[str] = None):
|
166
|
+
_load_config(context)
|
167
|
+
return kubernetes.watch.Watch()
|
168
|
+
|
169
|
+
|
163
170
|
def api_exception():
|
164
171
|
return kubernetes.client.rest.ApiException
|
165
172
|
|
sky/authentication.py
CHANGED
@@ -382,10 +382,10 @@ def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
|
|
382
382
|
network_mode = kubernetes_enums.KubernetesNetworkingMode.from_str(
|
383
383
|
network_mode_str)
|
384
384
|
except ValueError as e:
|
385
|
-
# Add message saying "Please check: ~/.sky/
|
385
|
+
# Add message saying "Please check: ~/.sky/config.yaml" to the error
|
386
386
|
# message.
|
387
387
|
with ux_utils.print_exception_no_traceback():
|
388
|
-
raise ValueError(str(e) + ' Please check: ~/.sky/
|
388
|
+
raise ValueError(str(e) + ' Please check: ~/.sky/config.yaml.') \
|
389
389
|
from None
|
390
390
|
_, public_key_path = get_or_generate_keys()
|
391
391
|
|
sky/backends/backend_utils.py
CHANGED
@@ -278,11 +278,11 @@ def _optimize_file_mounts(yaml_path: str) -> None:
|
|
278
278
|
# the dst.
|
279
279
|
mkdir_parent = f'mkdir -p {dst}'
|
280
280
|
src_basename = f'{src_basename}/*'
|
281
|
-
mv = (f'cp -
|
281
|
+
mv = (f'cp -rf {_REMOTE_RUNTIME_FILES_DIR}/{src_basename} '
|
282
282
|
f'{dst_parent_dir}/{dst_basename}')
|
283
283
|
fragment = f'({mkdir_parent} && {mv})'
|
284
284
|
commands.append(fragment)
|
285
|
-
postprocess_runtime_files_command = '
|
285
|
+
postprocess_runtime_files_command = '; '.join(commands)
|
286
286
|
|
287
287
|
setup_commands = yaml_config.get('setup_commands', [])
|
288
288
|
if setup_commands:
|
@@ -682,7 +682,7 @@ def write_cluster_config(
|
|
682
682
|
ssh_proxy_command = ssh_proxy_command_config[region_name]
|
683
683
|
logger.debug(f'Using ssh_proxy_command: {ssh_proxy_command!r}')
|
684
684
|
|
685
|
-
# User-supplied global instance tags from ~/.sky/
|
685
|
+
# User-supplied global instance tags from ~/.sky/config.yaml.
|
686
686
|
labels = skypilot_config.get_nested((str(cloud).lower(), 'labels'), {})
|
687
687
|
# labels is a dict, which is guaranteed by the type check in
|
688
688
|
# schemas.py
|
@@ -2582,11 +2582,36 @@ def get_clusters(
|
|
2582
2582
|
logger.info(f'Cluster(s) not found: {bright}{clusters_str}{reset}.')
|
2583
2583
|
records = new_records
|
2584
2584
|
|
2585
|
+
def _update_record_with_resources(record: Optional[Dict[str, Any]]) -> None:
|
2586
|
+
"""Add the resources to the record."""
|
2587
|
+
if record is None:
|
2588
|
+
return
|
2589
|
+
handle = record['handle']
|
2590
|
+
if handle is None:
|
2591
|
+
return
|
2592
|
+
record['nodes'] = handle.launched_nodes
|
2593
|
+
if handle.launched_resources is None:
|
2594
|
+
return
|
2595
|
+
record['cloud'] = (f'{handle.launched_resources.cloud}'
|
2596
|
+
if handle.launched_resources.cloud else None)
|
2597
|
+
record['region'] = (f'{handle.launched_resources.region}'
|
2598
|
+
if handle.launched_resources.region else None)
|
2599
|
+
record['cpus'] = (f'{handle.launched_resources.cpus}'
|
2600
|
+
if handle.launched_resources.cpus else None)
|
2601
|
+
record['memory'] = (f'{handle.launched_resources.memory}'
|
2602
|
+
if handle.launched_resources.memory else None)
|
2603
|
+
record['accelerators'] = (f'{handle.launched_resources.accelerators}'
|
2604
|
+
if handle.launched_resources.accelerators else
|
2605
|
+
None)
|
2606
|
+
|
2585
2607
|
# Add auth_config to the records
|
2586
2608
|
for record in records:
|
2587
2609
|
_update_record_with_credentials_and_resources_str(record)
|
2588
2610
|
|
2589
2611
|
if refresh == common.StatusRefreshMode.NONE:
|
2612
|
+
# Add resources to the records
|
2613
|
+
for record in records:
|
2614
|
+
_update_record_with_resources(record)
|
2590
2615
|
return records
|
2591
2616
|
|
2592
2617
|
plural = 's' if len(records) > 1 else ''
|
@@ -2662,6 +2687,9 @@ def get_clusters(
|
|
2662
2687
|
for cluster_name, e in failed_clusters:
|
2663
2688
|
logger.warning(f' {bright}{cluster_name}{reset}: {e}')
|
2664
2689
|
|
2690
|
+
# Add resources to the records
|
2691
|
+
for record in kept_records:
|
2692
|
+
_update_record_with_resources(record)
|
2665
2693
|
return kept_records
|
2666
2694
|
|
2667
2695
|
|
@@ -35,6 +35,7 @@ from sky import optimizer
|
|
35
35
|
from sky import provision as provision_lib
|
36
36
|
from sky import resources as resources_lib
|
37
37
|
from sky import sky_logging
|
38
|
+
from sky import skypilot_config
|
38
39
|
from sky import task as task_lib
|
39
40
|
from sky.backends import backend_utils
|
40
41
|
from sky.backends import wheel_utils
|
@@ -439,18 +440,7 @@ class RayCodeGen:
|
|
439
440
|
pg = ray_util.placement_group({json.dumps(bundles)}, 'STRICT_SPREAD')
|
440
441
|
plural = 's' if {num_nodes} > 1 else ''
|
441
442
|
node_str = f'{num_nodes} node{{plural}}'
|
442
|
-
|
443
|
-
# We have this `INFO: Tip:` message only for backward
|
444
|
-
# compatibility, because if a cluster has the old SkyPilot version,
|
445
|
-
# it relies on this message to start log streaming.
|
446
|
-
# This message will be skipped for new clusters, because we use
|
447
|
-
# start_streaming_at for the `Waiting for task resources on`
|
448
|
-
# message.
|
449
|
-
# TODO: Remove this message in v0.9.0.
|
450
|
-
message = ('{ux_utils.INDENT_SYMBOL}{colorama.Style.DIM}INFO: '
|
451
|
-
'Tip: use Ctrl-C to exit log streaming, not kill '
|
452
|
-
'the job.{colorama.Style.RESET_ALL}\\n')
|
453
|
-
message += ('{ux_utils.INDENT_SYMBOL}{colorama.Style.DIM}'
|
443
|
+
message = ('{ux_utils.INDENT_SYMBOL}{colorama.Style.DIM}'
|
454
444
|
'Waiting for task resources on '
|
455
445
|
f'{{node_str}}.{colorama.Style.RESET_ALL}')
|
456
446
|
print(message, flush=True)
|
@@ -608,9 +598,6 @@ class RayCodeGen:
|
|
608
598
|
textwrap.dedent(f"""\
|
609
599
|
sky_env_vars_dict = {{}}
|
610
600
|
sky_env_vars_dict['{constants.SKYPILOT_NODE_IPS}'] = job_ip_list_str
|
611
|
-
# Backward compatibility: Environment starting with `SKY_` is
|
612
|
-
# deprecated. Remove it in v0.9.0.
|
613
|
-
sky_env_vars_dict['SKY_NODE_IPS'] = job_ip_list_str
|
614
601
|
sky_env_vars_dict['{constants.SKYPILOT_NUM_NODES}'] = len(job_ip_rank_list)
|
615
602
|
""")
|
616
603
|
]
|
@@ -659,9 +646,6 @@ class RayCodeGen:
|
|
659
646
|
if script is not None:
|
660
647
|
script += rclone_flush_script
|
661
648
|
sky_env_vars_dict['{constants.SKYPILOT_NUM_GPUS_PER_NODE}'] = {int(math.ceil(num_gpus))!r}
|
662
|
-
# Backward compatibility: Environment starting with `SKY_` is
|
663
|
-
# deprecated. Remove it in v0.9.0.
|
664
|
-
sky_env_vars_dict['SKY_NUM_GPUS_PER_NODE'] = {int(math.ceil(num_gpus))!r}
|
665
649
|
|
666
650
|
ip = gang_scheduling_id_to_ip[{gang_scheduling_id!r}]
|
667
651
|
rank = job_ip_rank_map[ip]
|
@@ -678,14 +662,8 @@ class RayCodeGen:
|
|
678
662
|
name_str = f'{{node_name}}, rank={{rank}},'
|
679
663
|
log_path = os.path.expanduser(os.path.join({log_dir!r}, f'{{rank}}-{{node_name}}.log'))
|
680
664
|
sky_env_vars_dict['{constants.SKYPILOT_NODE_RANK}'] = rank
|
681
|
-
# Backward compatibility: Environment starting with `SKY_` is
|
682
|
-
# deprecated. Remove it in v0.9.0.
|
683
|
-
sky_env_vars_dict['SKY_NODE_RANK'] = rank
|
684
665
|
|
685
666
|
sky_env_vars_dict['SKYPILOT_INTERNAL_JOB_ID'] = {self.job_id}
|
686
|
-
# Backward compatibility: Environment starting with `SKY_` is
|
687
|
-
# deprecated. Remove it in v0.9.0.
|
688
|
-
sky_env_vars_dict['SKY_INTERNAL_JOB_ID'] = {self.job_id}
|
689
667
|
|
690
668
|
futures.append(run_bash_command_with_log \\
|
691
669
|
.options(name=name_str, {options_str}) \\
|
@@ -1473,7 +1451,7 @@ class RetryingVmProvisioner(object):
|
|
1473
1451
|
f'invalid cloud credentials: '
|
1474
1452
|
f'{common_utils.format_exception(e)}')
|
1475
1453
|
except exceptions.InvalidCloudConfigs as e:
|
1476
|
-
# Failed due to invalid user configs in ~/.sky/
|
1454
|
+
# Failed due to invalid user configs in ~/.sky/config.yaml.
|
1477
1455
|
logger.warning(f'{common_utils.format_exception(e)}')
|
1478
1456
|
# We should block the entire cloud if the user config is
|
1479
1457
|
# invalid.
|
@@ -2065,9 +2043,9 @@ class RetryingVmProvisioner(object):
|
|
2065
2043
|
(clouds.Kubernetes, clouds.RunPod)) and
|
2066
2044
|
controller_utils.Controllers.from_name(cluster_name)
|
2067
2045
|
is not None):
|
2068
|
-
|
2069
|
-
|
2070
|
-
requested_features.
|
2046
|
+
# If autostop is disabled in config, the feature may not be
|
2047
|
+
# requested, so use discard() instead of remove().
|
2048
|
+
requested_features.discard(
|
2071
2049
|
clouds.CloudImplementationFeatures.AUTOSTOP)
|
2072
2050
|
|
2073
2051
|
# Skip if to_provision.cloud does not support requested features
|
@@ -4470,7 +4448,9 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
|
|
4470
4448
|
(clouds.Kubernetes, clouds.RunPod)) and not down and
|
4471
4449
|
idle_minutes_to_autostop >= 0):
|
4472
4450
|
# We should hit this code path only for the controllers on
|
4473
|
-
# Kubernetes and RunPod clusters
|
4451
|
+
# Kubernetes and RunPod clusters, because autostop() will
|
4452
|
+
# skip the supported feature check. Non-controller k8s/runpod
|
4453
|
+
# clusters will have already errored out.
|
4474
4454
|
controller = controller_utils.Controllers.from_name(
|
4475
4455
|
handle.cluster_name)
|
4476
4456
|
assert (controller is not None), handle.cluster_name
|
@@ -4481,6 +4461,19 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
|
|
4481
4461
|
# For SkyServe controllers on Kubernetes: override autostop
|
4482
4462
|
# behavior to force autodown (instead of no-op)
|
4483
4463
|
# to avoid dangling controllers.
|
4464
|
+
|
4465
|
+
# down = False is the default, but warn the user in case
|
4466
|
+
# they have explicitly specified it.
|
4467
|
+
config_override_down = skypilot_config.get_nested(
|
4468
|
+
(controller.value.controller_type, 'controller',
|
4469
|
+
'autostop', 'down'), None)
|
4470
|
+
if config_override_down is False: # will not match None
|
4471
|
+
logger.warning(
|
4472
|
+
'SkyServe controller autodown is disabled in the '
|
4473
|
+
'~/.sky/config.yaml configuration file '
|
4474
|
+
'(serve.controller.autostop.down_when_idle), but '
|
4475
|
+
'it is force enabled for Kubernetes clusters.')
|
4476
|
+
|
4484
4477
|
down = True
|
4485
4478
|
else:
|
4486
4479
|
logger.info('Auto-stop is not supported for Kubernetes '
|
sky/backends/wheel_utils.py
CHANGED
@@ -85,6 +85,15 @@ def _build_sky_wheel() -> pathlib.Path:
|
|
85
85
|
for f in setup_files_dir.iterdir():
|
86
86
|
if f.is_file() and f.name != 'setup.py':
|
87
87
|
shutil.copy(str(f), str(tmp_dir))
|
88
|
+
if f.name == 'MANIFEST.in':
|
89
|
+
# Remove the line `sky/dashboard/out`, so we do not
|
90
|
+
# include the dashboard files in the internal wheel
|
91
|
+
import fileinput # pylint: disable=import-outside-toplevel
|
92
|
+
with fileinput.input(tmp_dir / f.name,
|
93
|
+
inplace=True) as file:
|
94
|
+
for line in file:
|
95
|
+
if 'sky/dashboard/out' not in line:
|
96
|
+
print(line, end='')
|
88
97
|
|
89
98
|
init_file_path = SKY_PACKAGE_PATH / '__init__.py'
|
90
99
|
init_file_content = init_file_path.read_text()
|
sky/check.py
CHANGED
@@ -142,7 +142,7 @@ def check_capabilities(
|
|
142
142
|
if disallowed_cloud_names:
|
143
143
|
disallowed_clouds_hint = (
|
144
144
|
'\nNote: The following clouds were disabled because they were not '
|
145
|
-
'included in allowed_clouds in ~/.sky/
|
145
|
+
'included in allowed_clouds in ~/.sky/config.yaml: '
|
146
146
|
f'{", ".join([c for c in disallowed_cloud_names])}')
|
147
147
|
if not all_enabled_clouds:
|
148
148
|
echo(
|