skypilot-nightly 1.0.0.dev20250910__py3-none-any.whl → 1.0.0.dev20250913__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +4 -2
- sky/adaptors/seeweb.py +103 -0
- sky/authentication.py +38 -0
- sky/backends/backend_utils.py +148 -30
- sky/backends/cloud_vm_ray_backend.py +606 -223
- sky/catalog/__init__.py +7 -0
- sky/catalog/aws_catalog.py +4 -0
- sky/catalog/common.py +18 -0
- sky/catalog/data_fetchers/fetch_aws.py +13 -37
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/seeweb_catalog.py +184 -0
- sky/client/cli/command.py +2 -71
- sky/client/sdk_async.py +5 -2
- sky/clouds/__init__.py +2 -0
- sky/clouds/aws.py +23 -5
- sky/clouds/cloud.py +8 -0
- sky/clouds/kubernetes.py +2 -0
- sky/clouds/seeweb.py +463 -0
- sky/core.py +46 -12
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/{3SYxqNGnvvPS8h3gdD2T7 → Y0Q7LyrxiFoWWbTdwb5nh}/_buildManifest.js +1 -1
- sky/dashboard/out/_next/static/chunks/1141-159df2d4c441a9d1.js +1 -0
- sky/dashboard/out/_next/static/chunks/3015-2ea98b57e318bd6e.js +1 -0
- sky/dashboard/out/_next/static/chunks/3294.03e02ae73455f48e.js +6 -0
- sky/dashboard/out/_next/static/chunks/3785.0fa442e16dd3f00e.js +1 -0
- sky/dashboard/out/_next/static/chunks/5339.c033b29835da0f35.js +51 -0
- sky/dashboard/out/_next/static/chunks/6856-e0754534b3015377.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-11c8e9b982e8ffec.js +1 -0
- sky/dashboard/out/_next/static/chunks/9037-f9800e64eb05dd1c.js +6 -0
- sky/dashboard/out/_next/static/chunks/{webpack-1d7e11230da3ca89.js → webpack-d1e29b3aa66bf4cf.js} +1 -1
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/exceptions.py +5 -0
- sky/global_user_state.py +75 -26
- sky/jobs/client/sdk_async.py +4 -2
- sky/jobs/controller.py +4 -2
- sky/jobs/recovery_strategy.py +1 -1
- sky/jobs/state.py +26 -16
- sky/jobs/utils.py +67 -24
- sky/logs/agent.py +10 -2
- sky/provision/__init__.py +1 -0
- sky/provision/kubernetes/config.py +7 -2
- sky/provision/kubernetes/instance.py +84 -41
- sky/provision/kubernetes/utils.py +14 -3
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +806 -0
- sky/provision/vast/instance.py +1 -1
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +252 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/server/config.py +14 -5
- sky/server/metrics.py +41 -8
- sky/server/requests/executor.py +41 -4
- sky/server/server.py +1 -0
- sky/server/uvicorn.py +11 -5
- sky/setup_files/dependencies.py +8 -1
- sky/skylet/constants.py +14 -8
- sky/skylet/job_lib.py +128 -10
- sky/skylet/log_lib.py +14 -3
- sky/skylet/log_lib.pyi +9 -0
- sky/skylet/services.py +203 -0
- sky/skylet/skylet.py +4 -0
- sky/task.py +62 -0
- sky/templates/kubernetes-ray.yml.j2 +120 -3
- sky/templates/seeweb-ray.yml.j2 +108 -0
- sky/utils/accelerator_registry.py +3 -1
- sky/utils/command_runner.py +35 -11
- sky/utils/command_runner.pyi +22 -0
- sky/utils/context_utils.py +15 -2
- sky/utils/controller_utils.py +11 -5
- sky/utils/db/migration_utils.py +1 -1
- sky/utils/git.py +559 -1
- sky/utils/resource_checker.py +8 -7
- sky/workspaces/core.py +57 -21
- {skypilot_nightly-1.0.0.dev20250910.dist-info → skypilot_nightly-1.0.0.dev20250913.dist-info}/METADATA +40 -35
- {skypilot_nightly-1.0.0.dev20250910.dist-info → skypilot_nightly-1.0.0.dev20250913.dist-info}/RECORD +96 -85
- sky/client/cli/git.py +0 -549
- sky/dashboard/out/_next/static/chunks/1141-943efc7aff0f0c06.js +0 -1
- sky/dashboard/out/_next/static/chunks/3015-86cabed5d4669ad0.js +0 -1
- sky/dashboard/out/_next/static/chunks/3294.c80326aec9bfed40.js +0 -6
- sky/dashboard/out/_next/static/chunks/3785.4872a2f3aa489880.js +0 -1
- sky/dashboard/out/_next/static/chunks/5339.3fda4a4010ff4e06.js +0 -51
- sky/dashboard/out/_next/static/chunks/6856-6e2bc8a6fd0867af.js +0 -1
- sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +0 -1
- sky/dashboard/out/_next/static/chunks/9037-fa1737818d0a0969.js +0 -6
- /sky/dashboard/out/_next/static/{3SYxqNGnvvPS8h3gdD2T7 → Y0Q7LyrxiFoWWbTdwb5nh}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250910.dist-info → skypilot_nightly-1.0.0.dev20250913.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250910.dist-info → skypilot_nightly-1.0.0.dev20250913.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250910.dist-info → skypilot_nightly-1.0.0.dev20250913.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250910.dist-info → skypilot_nightly-1.0.0.dev20250913.dist-info}/top_level.txt +0 -0
sky/utils/command_runner.py
CHANGED
|
@@ -469,15 +469,19 @@ class CommandRunner:
|
|
|
469
469
|
"""Close the cached connection to the remote machine."""
|
|
470
470
|
pass
|
|
471
471
|
|
|
472
|
-
def port_forward_command(
|
|
473
|
-
|
|
474
|
-
|
|
472
|
+
def port_forward_command(
|
|
473
|
+
self,
|
|
474
|
+
port_forward: List[Tuple[int, int]],
|
|
475
|
+
connect_timeout: int = 1,
|
|
476
|
+
ssh_mode: SshMode = SshMode.INTERACTIVE) -> List[str]:
|
|
475
477
|
"""Command for forwarding ports from localhost to the remote machine.
|
|
476
478
|
|
|
477
479
|
Args:
|
|
478
480
|
port_forward: A list of ports to forward from the localhost to the
|
|
479
481
|
remote host.
|
|
480
482
|
connect_timeout: The timeout for the connection.
|
|
483
|
+
ssh_mode: The mode to use for ssh.
|
|
484
|
+
See SSHMode for more details.
|
|
481
485
|
"""
|
|
482
486
|
raise NotImplementedError
|
|
483
487
|
|
|
@@ -592,6 +596,7 @@ class SSHCommandRunner(CommandRunner):
|
|
|
592
596
|
ssh_proxy_command: Optional[str] = None,
|
|
593
597
|
docker_user: Optional[str] = None,
|
|
594
598
|
disable_control_master: Optional[bool] = False,
|
|
599
|
+
port_forward_execute_remote_command: Optional[bool] = False,
|
|
595
600
|
):
|
|
596
601
|
"""Initialize SSHCommandRunner.
|
|
597
602
|
|
|
@@ -618,6 +623,10 @@ class SSHCommandRunner(CommandRunner):
|
|
|
618
623
|
disable_control_master: bool; specifies either or not the ssh
|
|
619
624
|
command will utilize ControlMaster. We currently disable
|
|
620
625
|
it for k8s instance.
|
|
626
|
+
port_forward_execute_remote_command: bool; specifies whether to
|
|
627
|
+
add -N to the port forwarding command. This is useful if you
|
|
628
|
+
want to run a command on the remote machine to make sure the
|
|
629
|
+
SSH tunnel is established.
|
|
621
630
|
"""
|
|
622
631
|
super().__init__(node)
|
|
623
632
|
ip, port = node
|
|
@@ -646,22 +655,28 @@ class SSHCommandRunner(CommandRunner):
|
|
|
646
655
|
self.ssh_user = ssh_user
|
|
647
656
|
self.port = port
|
|
648
657
|
self._docker_ssh_proxy_command = None
|
|
658
|
+
self.port_forward_execute_remote_command = (
|
|
659
|
+
port_forward_execute_remote_command)
|
|
649
660
|
|
|
650
|
-
def port_forward_command(
|
|
651
|
-
|
|
652
|
-
|
|
661
|
+
def port_forward_command(
|
|
662
|
+
self,
|
|
663
|
+
port_forward: List[Tuple[int, int]],
|
|
664
|
+
connect_timeout: int = 1,
|
|
665
|
+
ssh_mode: SshMode = SshMode.INTERACTIVE) -> List[str]:
|
|
653
666
|
"""Command for forwarding ports from localhost to the remote machine.
|
|
654
667
|
|
|
655
668
|
Args:
|
|
656
669
|
port_forward: A list of ports to forward from the local port to the
|
|
657
670
|
remote port.
|
|
658
671
|
connect_timeout: The timeout for the ssh connection.
|
|
672
|
+
ssh_mode: The mode to use for ssh.
|
|
673
|
+
See SSHMode for more details.
|
|
659
674
|
|
|
660
675
|
Returns:
|
|
661
676
|
The command for forwarding ports from localhost to the remote
|
|
662
677
|
machine.
|
|
663
678
|
"""
|
|
664
|
-
return self.ssh_base_command(ssh_mode=
|
|
679
|
+
return self.ssh_base_command(ssh_mode=ssh_mode,
|
|
665
680
|
port_forward=port_forward,
|
|
666
681
|
connect_timeout=connect_timeout)
|
|
667
682
|
|
|
@@ -680,7 +695,11 @@ class SSHCommandRunner(CommandRunner):
|
|
|
680
695
|
for local, remote in port_forward:
|
|
681
696
|
logger.debug(
|
|
682
697
|
f'Forwarding local port {local} to remote port {remote}.')
|
|
683
|
-
|
|
698
|
+
if self.port_forward_execute_remote_command:
|
|
699
|
+
ssh += ['-L']
|
|
700
|
+
else:
|
|
701
|
+
ssh += ['-NL']
|
|
702
|
+
ssh += [f'{local}:localhost:{remote}']
|
|
684
703
|
if self._docker_ssh_proxy_command is not None:
|
|
685
704
|
docker_ssh_proxy_command = self._docker_ssh_proxy_command(ssh)
|
|
686
705
|
else:
|
|
@@ -894,9 +913,11 @@ class KubernetesCommandRunner(CommandRunner):
|
|
|
894
913
|
else:
|
|
895
914
|
return f'pod/{self.pod_name}'
|
|
896
915
|
|
|
897
|
-
def port_forward_command(
|
|
898
|
-
|
|
899
|
-
|
|
916
|
+
def port_forward_command(
|
|
917
|
+
self,
|
|
918
|
+
port_forward: List[Tuple[int, int]],
|
|
919
|
+
connect_timeout: int = 1,
|
|
920
|
+
ssh_mode: SshMode = SshMode.INTERACTIVE) -> List[str]:
|
|
900
921
|
"""Command for forwarding ports from localhost to the remote machine.
|
|
901
922
|
|
|
902
923
|
Args:
|
|
@@ -904,7 +925,10 @@ class KubernetesCommandRunner(CommandRunner):
|
|
|
904
925
|
remote port. Currently, only one port is supported, i.e. the
|
|
905
926
|
list should have only one element.
|
|
906
927
|
connect_timeout: The timeout for the ssh connection.
|
|
928
|
+
ssh_mode: The mode to use for ssh.
|
|
929
|
+
See SSHMode for more details.
|
|
907
930
|
"""
|
|
931
|
+
del ssh_mode # unused
|
|
908
932
|
assert port_forward and len(port_forward) == 1, (
|
|
909
933
|
'Only one port is supported for Kubernetes port-forward.')
|
|
910
934
|
kubectl_args = [
|
sky/utils/command_runner.pyi
CHANGED
|
@@ -106,6 +106,13 @@ class CommandRunner:
|
|
|
106
106
|
max_retry: int = ...) -> None:
|
|
107
107
|
...
|
|
108
108
|
|
|
109
|
+
def port_forward_command(
|
|
110
|
+
self,
|
|
111
|
+
port_forward: List[Tuple[int, int]],
|
|
112
|
+
connect_timeout: int = 1,
|
|
113
|
+
ssh_mode: SshMode = SshMode.INTERACTIVE) -> List[str]:
|
|
114
|
+
...
|
|
115
|
+
|
|
109
116
|
@classmethod
|
|
110
117
|
def make_runner_list(cls: typing.Type[CommandRunner],
|
|
111
118
|
node_list: Iterable[Tuple[Any, ...]],
|
|
@@ -127,6 +134,7 @@ class SSHCommandRunner(CommandRunner):
|
|
|
127
134
|
ssh_control_name: Optional[str]
|
|
128
135
|
docker_user: str
|
|
129
136
|
disable_control_master: Optional[bool]
|
|
137
|
+
port_forward_execute_remote_command: Optional[bool]
|
|
130
138
|
|
|
131
139
|
def __init__(
|
|
132
140
|
self,
|
|
@@ -200,6 +208,13 @@ class SSHCommandRunner(CommandRunner):
|
|
|
200
208
|
max_retry: int = ...) -> None:
|
|
201
209
|
...
|
|
202
210
|
|
|
211
|
+
def port_forward_command(
|
|
212
|
+
self,
|
|
213
|
+
port_forward: List[Tuple[int, int]],
|
|
214
|
+
connect_timeout: int = 1,
|
|
215
|
+
ssh_mode: SshMode = SshMode.INTERACTIVE) -> List[str]:
|
|
216
|
+
...
|
|
217
|
+
|
|
203
218
|
|
|
204
219
|
class KubernetesCommandRunner(CommandRunner):
|
|
205
220
|
|
|
@@ -272,6 +287,13 @@ class KubernetesCommandRunner(CommandRunner):
|
|
|
272
287
|
max_retry: int = ...) -> None:
|
|
273
288
|
...
|
|
274
289
|
|
|
290
|
+
def port_forward_command(
|
|
291
|
+
self,
|
|
292
|
+
port_forward: List[Tuple[int, int]],
|
|
293
|
+
connect_timeout: int = 1,
|
|
294
|
+
ssh_mode: SshMode = SshMode.INTERACTIVE) -> List[str]:
|
|
295
|
+
...
|
|
296
|
+
|
|
275
297
|
|
|
276
298
|
class LocalProcessCommandRunner(CommandRunner):
|
|
277
299
|
|
sky/utils/context_utils.py
CHANGED
|
@@ -10,6 +10,8 @@ import sys
|
|
|
10
10
|
import typing
|
|
11
11
|
from typing import Any, Callable, IO, Optional, Tuple, TypeVar
|
|
12
12
|
|
|
13
|
+
from typing_extensions import ParamSpec
|
|
14
|
+
|
|
13
15
|
from sky import sky_logging
|
|
14
16
|
from sky.utils import context
|
|
15
17
|
from sky.utils import subprocess_utils
|
|
@@ -173,9 +175,14 @@ def cancellation_guard(func: F) -> F:
|
|
|
173
175
|
return typing.cast(F, wrapper)
|
|
174
176
|
|
|
175
177
|
|
|
178
|
+
P = ParamSpec('P')
|
|
179
|
+
T = TypeVar('T')
|
|
180
|
+
|
|
181
|
+
|
|
176
182
|
# TODO(aylei): replace this with asyncio.to_thread once we drop support for
|
|
177
183
|
# python 3.8
|
|
178
|
-
def to_thread(func, /, *args,
|
|
184
|
+
def to_thread(func: Callable[P, T], /, *args: P.args,
|
|
185
|
+
**kwargs: P.kwargs) -> 'asyncio.Future[T]':
|
|
179
186
|
"""Asynchronously run function *func* in a separate thread.
|
|
180
187
|
|
|
181
188
|
This is same as asyncio.to_thread added in python 3.9
|
|
@@ -183,5 +190,11 @@ def to_thread(func, /, *args, **kwargs):
|
|
|
183
190
|
loop = asyncio.get_running_loop()
|
|
184
191
|
# This is critical to pass the current coroutine context to the new thread
|
|
185
192
|
pyctx = contextvars.copy_context()
|
|
186
|
-
func_call = functools.partial(
|
|
193
|
+
func_call: Callable[..., T] = functools.partial(
|
|
194
|
+
# partial deletes arguments type and thus can't figure out the return
|
|
195
|
+
# type of pyctx.run
|
|
196
|
+
pyctx.run, # type: ignore
|
|
197
|
+
func,
|
|
198
|
+
*args,
|
|
199
|
+
**kwargs)
|
|
187
200
|
return loop.run_in_executor(None, func_call)
|
sky/utils/controller_utils.py
CHANGED
|
@@ -228,15 +228,21 @@ def get_controller_for_pool(pool: bool) -> Controllers:
|
|
|
228
228
|
def high_availability_specified(cluster_name: Optional[str]) -> bool:
|
|
229
229
|
"""Check if the controller high availability is specified in user config.
|
|
230
230
|
"""
|
|
231
|
-
# pylint: disable=import-outside-toplevel
|
|
232
|
-
from sky.jobs import utils as managed_job_utils
|
|
233
|
-
if managed_job_utils.is_consolidation_mode():
|
|
234
|
-
return True
|
|
235
|
-
|
|
236
231
|
controller = Controllers.from_name(cluster_name)
|
|
237
232
|
if controller is None:
|
|
238
233
|
return False
|
|
239
234
|
|
|
235
|
+
if controller.value.controller_type == 'jobs':
|
|
236
|
+
# pylint: disable-next=import-outside-toplevel
|
|
237
|
+
from sky.jobs import utils as managed_job_utils
|
|
238
|
+
if managed_job_utils.is_consolidation_mode():
|
|
239
|
+
return True
|
|
240
|
+
elif controller.value.controller_type == 'serve':
|
|
241
|
+
# pylint: disable-next=import-outside-toplevel
|
|
242
|
+
from sky.serve import serve_utils
|
|
243
|
+
if serve_utils.is_consolidation_mode():
|
|
244
|
+
return True
|
|
245
|
+
|
|
240
246
|
if skypilot_config.loaded():
|
|
241
247
|
return skypilot_config.get_nested((controller.value.controller_type,
|
|
242
248
|
'controller', 'high_availability'),
|
sky/utils/db/migration_utils.py
CHANGED
|
@@ -17,7 +17,7 @@ logger = sky_logging.init_logger(__name__)
|
|
|
17
17
|
DB_INIT_LOCK_TIMEOUT_SECONDS = 10
|
|
18
18
|
|
|
19
19
|
GLOBAL_USER_STATE_DB_NAME = 'state_db'
|
|
20
|
-
GLOBAL_USER_STATE_VERSION = '
|
|
20
|
+
GLOBAL_USER_STATE_VERSION = '008'
|
|
21
21
|
GLOBAL_USER_STATE_LOCK_PATH = '~/.sky/locks/.state_db.lock'
|
|
22
22
|
|
|
23
23
|
SPOT_JOBS_DB_NAME = 'spot_jobs_db'
|