skypilot-nightly 1.0.0.dev20250910__py3-none-any.whl → 1.0.0.dev20250913__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (105) hide show
  1. sky/__init__.py +4 -2
  2. sky/adaptors/seeweb.py +103 -0
  3. sky/authentication.py +38 -0
  4. sky/backends/backend_utils.py +148 -30
  5. sky/backends/cloud_vm_ray_backend.py +606 -223
  6. sky/catalog/__init__.py +7 -0
  7. sky/catalog/aws_catalog.py +4 -0
  8. sky/catalog/common.py +18 -0
  9. sky/catalog/data_fetchers/fetch_aws.py +13 -37
  10. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  11. sky/catalog/seeweb_catalog.py +184 -0
  12. sky/client/cli/command.py +2 -71
  13. sky/client/sdk_async.py +5 -2
  14. sky/clouds/__init__.py +2 -0
  15. sky/clouds/aws.py +23 -5
  16. sky/clouds/cloud.py +8 -0
  17. sky/clouds/kubernetes.py +2 -0
  18. sky/clouds/seeweb.py +463 -0
  19. sky/core.py +46 -12
  20. sky/dashboard/out/404.html +1 -1
  21. sky/dashboard/out/_next/static/{3SYxqNGnvvPS8h3gdD2T7 → Y0Q7LyrxiFoWWbTdwb5nh}/_buildManifest.js +1 -1
  22. sky/dashboard/out/_next/static/chunks/1141-159df2d4c441a9d1.js +1 -0
  23. sky/dashboard/out/_next/static/chunks/3015-2ea98b57e318bd6e.js +1 -0
  24. sky/dashboard/out/_next/static/chunks/3294.03e02ae73455f48e.js +6 -0
  25. sky/dashboard/out/_next/static/chunks/3785.0fa442e16dd3f00e.js +1 -0
  26. sky/dashboard/out/_next/static/chunks/5339.c033b29835da0f35.js +51 -0
  27. sky/dashboard/out/_next/static/chunks/6856-e0754534b3015377.js +1 -0
  28. sky/dashboard/out/_next/static/chunks/6990-11c8e9b982e8ffec.js +1 -0
  29. sky/dashboard/out/_next/static/chunks/9037-f9800e64eb05dd1c.js +6 -0
  30. sky/dashboard/out/_next/static/chunks/{webpack-1d7e11230da3ca89.js → webpack-d1e29b3aa66bf4cf.js} +1 -1
  31. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  32. sky/dashboard/out/clusters/[cluster].html +1 -1
  33. sky/dashboard/out/clusters.html +1 -1
  34. sky/dashboard/out/config.html +1 -1
  35. sky/dashboard/out/index.html +1 -1
  36. sky/dashboard/out/infra/[context].html +1 -1
  37. sky/dashboard/out/infra.html +1 -1
  38. sky/dashboard/out/jobs/[job].html +1 -1
  39. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  40. sky/dashboard/out/jobs.html +1 -1
  41. sky/dashboard/out/users.html +1 -1
  42. sky/dashboard/out/volumes.html +1 -1
  43. sky/dashboard/out/workspace/new.html +1 -1
  44. sky/dashboard/out/workspaces/[name].html +1 -1
  45. sky/dashboard/out/workspaces.html +1 -1
  46. sky/exceptions.py +5 -0
  47. sky/global_user_state.py +75 -26
  48. sky/jobs/client/sdk_async.py +4 -2
  49. sky/jobs/controller.py +4 -2
  50. sky/jobs/recovery_strategy.py +1 -1
  51. sky/jobs/state.py +26 -16
  52. sky/jobs/utils.py +67 -24
  53. sky/logs/agent.py +10 -2
  54. sky/provision/__init__.py +1 -0
  55. sky/provision/kubernetes/config.py +7 -2
  56. sky/provision/kubernetes/instance.py +84 -41
  57. sky/provision/kubernetes/utils.py +14 -3
  58. sky/provision/seeweb/__init__.py +11 -0
  59. sky/provision/seeweb/config.py +13 -0
  60. sky/provision/seeweb/instance.py +806 -0
  61. sky/provision/vast/instance.py +1 -1
  62. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  63. sky/schemas/generated/jobsv1_pb2.py +86 -0
  64. sky/schemas/generated/jobsv1_pb2.pyi +252 -0
  65. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  66. sky/server/config.py +14 -5
  67. sky/server/metrics.py +41 -8
  68. sky/server/requests/executor.py +41 -4
  69. sky/server/server.py +1 -0
  70. sky/server/uvicorn.py +11 -5
  71. sky/setup_files/dependencies.py +8 -1
  72. sky/skylet/constants.py +14 -8
  73. sky/skylet/job_lib.py +128 -10
  74. sky/skylet/log_lib.py +14 -3
  75. sky/skylet/log_lib.pyi +9 -0
  76. sky/skylet/services.py +203 -0
  77. sky/skylet/skylet.py +4 -0
  78. sky/task.py +62 -0
  79. sky/templates/kubernetes-ray.yml.j2 +120 -3
  80. sky/templates/seeweb-ray.yml.j2 +108 -0
  81. sky/utils/accelerator_registry.py +3 -1
  82. sky/utils/command_runner.py +35 -11
  83. sky/utils/command_runner.pyi +22 -0
  84. sky/utils/context_utils.py +15 -2
  85. sky/utils/controller_utils.py +11 -5
  86. sky/utils/db/migration_utils.py +1 -1
  87. sky/utils/git.py +559 -1
  88. sky/utils/resource_checker.py +8 -7
  89. sky/workspaces/core.py +57 -21
  90. {skypilot_nightly-1.0.0.dev20250910.dist-info → skypilot_nightly-1.0.0.dev20250913.dist-info}/METADATA +40 -35
  91. {skypilot_nightly-1.0.0.dev20250910.dist-info → skypilot_nightly-1.0.0.dev20250913.dist-info}/RECORD +96 -85
  92. sky/client/cli/git.py +0 -549
  93. sky/dashboard/out/_next/static/chunks/1141-943efc7aff0f0c06.js +0 -1
  94. sky/dashboard/out/_next/static/chunks/3015-86cabed5d4669ad0.js +0 -1
  95. sky/dashboard/out/_next/static/chunks/3294.c80326aec9bfed40.js +0 -6
  96. sky/dashboard/out/_next/static/chunks/3785.4872a2f3aa489880.js +0 -1
  97. sky/dashboard/out/_next/static/chunks/5339.3fda4a4010ff4e06.js +0 -51
  98. sky/dashboard/out/_next/static/chunks/6856-6e2bc8a6fd0867af.js +0 -1
  99. sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +0 -1
  100. sky/dashboard/out/_next/static/chunks/9037-fa1737818d0a0969.js +0 -6
  101. /sky/dashboard/out/_next/static/{3SYxqNGnvvPS8h3gdD2T7 → Y0Q7LyrxiFoWWbTdwb5nh}/_ssgManifest.js +0 -0
  102. {skypilot_nightly-1.0.0.dev20250910.dist-info → skypilot_nightly-1.0.0.dev20250913.dist-info}/WHEEL +0 -0
  103. {skypilot_nightly-1.0.0.dev20250910.dist-info → skypilot_nightly-1.0.0.dev20250913.dist-info}/entry_points.txt +0 -0
  104. {skypilot_nightly-1.0.0.dev20250910.dist-info → skypilot_nightly-1.0.0.dev20250913.dist-info}/licenses/LICENSE +0 -0
  105. {skypilot_nightly-1.0.0.dev20250910.dist-info → skypilot_nightly-1.0.0.dev20250913.dist-info}/top_level.txt +0 -0
@@ -469,15 +469,19 @@ class CommandRunner:
469
469
  """Close the cached connection to the remote machine."""
470
470
  pass
471
471
 
472
- def port_forward_command(self,
473
- port_forward: List[Tuple[int, int]],
474
- connect_timeout: int = 1) -> List[str]:
472
+ def port_forward_command(
473
+ self,
474
+ port_forward: List[Tuple[int, int]],
475
+ connect_timeout: int = 1,
476
+ ssh_mode: SshMode = SshMode.INTERACTIVE) -> List[str]:
475
477
  """Command for forwarding ports from localhost to the remote machine.
476
478
 
477
479
  Args:
478
480
  port_forward: A list of ports to forward from the localhost to the
479
481
  remote host.
480
482
  connect_timeout: The timeout for the connection.
483
+ ssh_mode: The mode to use for ssh.
484
+ See SSHMode for more details.
481
485
  """
482
486
  raise NotImplementedError
483
487
 
@@ -592,6 +596,7 @@ class SSHCommandRunner(CommandRunner):
592
596
  ssh_proxy_command: Optional[str] = None,
593
597
  docker_user: Optional[str] = None,
594
598
  disable_control_master: Optional[bool] = False,
599
+ port_forward_execute_remote_command: Optional[bool] = False,
595
600
  ):
596
601
  """Initialize SSHCommandRunner.
597
602
 
@@ -618,6 +623,10 @@ class SSHCommandRunner(CommandRunner):
618
623
  disable_control_master: bool; specifies either or not the ssh
619
624
  command will utilize ControlMaster. We currently disable
620
625
  it for k8s instance.
626
+ port_forward_execute_remote_command: bool; specifies whether to
627
+ add -N to the port forwarding command. This is useful if you
628
+ want to run a command on the remote machine to make sure the
629
+ SSH tunnel is established.
621
630
  """
622
631
  super().__init__(node)
623
632
  ip, port = node
@@ -646,22 +655,28 @@ class SSHCommandRunner(CommandRunner):
646
655
  self.ssh_user = ssh_user
647
656
  self.port = port
648
657
  self._docker_ssh_proxy_command = None
658
+ self.port_forward_execute_remote_command = (
659
+ port_forward_execute_remote_command)
649
660
 
650
- def port_forward_command(self,
651
- port_forward: List[Tuple[int, int]],
652
- connect_timeout: int = 1) -> List[str]:
661
+ def port_forward_command(
662
+ self,
663
+ port_forward: List[Tuple[int, int]],
664
+ connect_timeout: int = 1,
665
+ ssh_mode: SshMode = SshMode.INTERACTIVE) -> List[str]:
653
666
  """Command for forwarding ports from localhost to the remote machine.
654
667
 
655
668
  Args:
656
669
  port_forward: A list of ports to forward from the local port to the
657
670
  remote port.
658
671
  connect_timeout: The timeout for the ssh connection.
672
+ ssh_mode: The mode to use for ssh.
673
+ See SSHMode for more details.
659
674
 
660
675
  Returns:
661
676
  The command for forwarding ports from localhost to the remote
662
677
  machine.
663
678
  """
664
- return self.ssh_base_command(ssh_mode=SshMode.INTERACTIVE,
679
+ return self.ssh_base_command(ssh_mode=ssh_mode,
665
680
  port_forward=port_forward,
666
681
  connect_timeout=connect_timeout)
667
682
 
@@ -680,7 +695,11 @@ class SSHCommandRunner(CommandRunner):
680
695
  for local, remote in port_forward:
681
696
  logger.debug(
682
697
  f'Forwarding local port {local} to remote port {remote}.')
683
- ssh += ['-NL', f'{local}:localhost:{remote}']
698
+ if self.port_forward_execute_remote_command:
699
+ ssh += ['-L']
700
+ else:
701
+ ssh += ['-NL']
702
+ ssh += [f'{local}:localhost:{remote}']
684
703
  if self._docker_ssh_proxy_command is not None:
685
704
  docker_ssh_proxy_command = self._docker_ssh_proxy_command(ssh)
686
705
  else:
@@ -894,9 +913,11 @@ class KubernetesCommandRunner(CommandRunner):
894
913
  else:
895
914
  return f'pod/{self.pod_name}'
896
915
 
897
- def port_forward_command(self,
898
- port_forward: List[Tuple[int, int]],
899
- connect_timeout: int = 1) -> List[str]:
916
+ def port_forward_command(
917
+ self,
918
+ port_forward: List[Tuple[int, int]],
919
+ connect_timeout: int = 1,
920
+ ssh_mode: SshMode = SshMode.INTERACTIVE) -> List[str]:
900
921
  """Command for forwarding ports from localhost to the remote machine.
901
922
 
902
923
  Args:
@@ -904,7 +925,10 @@ class KubernetesCommandRunner(CommandRunner):
904
925
  remote port. Currently, only one port is supported, i.e. the
905
926
  list should have only one element.
906
927
  connect_timeout: The timeout for the ssh connection.
928
+ ssh_mode: The mode to use for ssh.
929
+ See SSHMode for more details.
907
930
  """
931
+ del ssh_mode # unused
908
932
  assert port_forward and len(port_forward) == 1, (
909
933
  'Only one port is supported for Kubernetes port-forward.')
910
934
  kubectl_args = [
@@ -106,6 +106,13 @@ class CommandRunner:
106
106
  max_retry: int = ...) -> None:
107
107
  ...
108
108
 
109
+ def port_forward_command(
110
+ self,
111
+ port_forward: List[Tuple[int, int]],
112
+ connect_timeout: int = 1,
113
+ ssh_mode: SshMode = SshMode.INTERACTIVE) -> List[str]:
114
+ ...
115
+
109
116
  @classmethod
110
117
  def make_runner_list(cls: typing.Type[CommandRunner],
111
118
  node_list: Iterable[Tuple[Any, ...]],
@@ -127,6 +134,7 @@ class SSHCommandRunner(CommandRunner):
127
134
  ssh_control_name: Optional[str]
128
135
  docker_user: str
129
136
  disable_control_master: Optional[bool]
137
+ port_forward_execute_remote_command: Optional[bool]
130
138
 
131
139
  def __init__(
132
140
  self,
@@ -200,6 +208,13 @@ class SSHCommandRunner(CommandRunner):
200
208
  max_retry: int = ...) -> None:
201
209
  ...
202
210
 
211
+ def port_forward_command(
212
+ self,
213
+ port_forward: List[Tuple[int, int]],
214
+ connect_timeout: int = 1,
215
+ ssh_mode: SshMode = SshMode.INTERACTIVE) -> List[str]:
216
+ ...
217
+
203
218
 
204
219
  class KubernetesCommandRunner(CommandRunner):
205
220
 
@@ -272,6 +287,13 @@ class KubernetesCommandRunner(CommandRunner):
272
287
  max_retry: int = ...) -> None:
273
288
  ...
274
289
 
290
+ def port_forward_command(
291
+ self,
292
+ port_forward: List[Tuple[int, int]],
293
+ connect_timeout: int = 1,
294
+ ssh_mode: SshMode = SshMode.INTERACTIVE) -> List[str]:
295
+ ...
296
+
275
297
 
276
298
  class LocalProcessCommandRunner(CommandRunner):
277
299
 
@@ -10,6 +10,8 @@ import sys
10
10
  import typing
11
11
  from typing import Any, Callable, IO, Optional, Tuple, TypeVar
12
12
 
13
+ from typing_extensions import ParamSpec
14
+
13
15
  from sky import sky_logging
14
16
  from sky.utils import context
15
17
  from sky.utils import subprocess_utils
@@ -173,9 +175,14 @@ def cancellation_guard(func: F) -> F:
173
175
  return typing.cast(F, wrapper)
174
176
 
175
177
 
178
+ P = ParamSpec('P')
179
+ T = TypeVar('T')
180
+
181
+
176
182
  # TODO(aylei): replace this with asyncio.to_thread once we drop support for
177
183
  # python 3.8
178
- def to_thread(func, /, *args, **kwargs):
184
+ def to_thread(func: Callable[P, T], /, *args: P.args,
185
+ **kwargs: P.kwargs) -> 'asyncio.Future[T]':
179
186
  """Asynchronously run function *func* in a separate thread.
180
187
 
181
188
  This is same as asyncio.to_thread added in python 3.9
@@ -183,5 +190,11 @@ def to_thread(func, /, *args, **kwargs):
183
190
  loop = asyncio.get_running_loop()
184
191
  # This is critical to pass the current coroutine context to the new thread
185
192
  pyctx = contextvars.copy_context()
186
- func_call = functools.partial(pyctx.run, func, *args, **kwargs)
193
+ func_call: Callable[..., T] = functools.partial(
194
+ # partial deletes arguments type and thus can't figure out the return
195
+ # type of pyctx.run
196
+ pyctx.run, # type: ignore
197
+ func,
198
+ *args,
199
+ **kwargs)
187
200
  return loop.run_in_executor(None, func_call)
@@ -228,15 +228,21 @@ def get_controller_for_pool(pool: bool) -> Controllers:
228
228
  def high_availability_specified(cluster_name: Optional[str]) -> bool:
229
229
  """Check if the controller high availability is specified in user config.
230
230
  """
231
- # pylint: disable=import-outside-toplevel
232
- from sky.jobs import utils as managed_job_utils
233
- if managed_job_utils.is_consolidation_mode():
234
- return True
235
-
236
231
  controller = Controllers.from_name(cluster_name)
237
232
  if controller is None:
238
233
  return False
239
234
 
235
+ if controller.value.controller_type == 'jobs':
236
+ # pylint: disable-next=import-outside-toplevel
237
+ from sky.jobs import utils as managed_job_utils
238
+ if managed_job_utils.is_consolidation_mode():
239
+ return True
240
+ elif controller.value.controller_type == 'serve':
241
+ # pylint: disable-next=import-outside-toplevel
242
+ from sky.serve import serve_utils
243
+ if serve_utils.is_consolidation_mode():
244
+ return True
245
+
240
246
  if skypilot_config.loaded():
241
247
  return skypilot_config.get_nested((controller.value.controller_type,
242
248
  'controller', 'high_availability'),
@@ -17,7 +17,7 @@ logger = sky_logging.init_logger(__name__)
17
17
  DB_INIT_LOCK_TIMEOUT_SECONDS = 10
18
18
 
19
19
  GLOBAL_USER_STATE_DB_NAME = 'state_db'
20
- GLOBAL_USER_STATE_VERSION = '007'
20
+ GLOBAL_USER_STATE_VERSION = '008'
21
21
  GLOBAL_USER_STATE_LOCK_PATH = '~/.sky/locks/.state_db.lock'
22
22
 
23
23
  SPOT_JOBS_DB_NAME = 'spot_jobs_db'