skypilot-nightly 1.0.0.dev20250802__py3-none-any.whl → 1.0.0.dev20250806__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (104) hide show
  1. sky/__init__.py +2 -2
  2. sky/backends/backend_utils.py +4 -1
  3. sky/catalog/kubernetes_catalog.py +8 -0
  4. sky/catalog/nebius_catalog.py +0 -1
  5. sky/client/cli/command.py +32 -13
  6. sky/client/sdk.py +16 -8
  7. sky/client/sdk.pyi +6 -5
  8. sky/client/sdk_async.py +811 -0
  9. sky/clouds/kubernetes.py +6 -1
  10. sky/clouds/nebius.py +1 -4
  11. sky/dashboard/out/404.html +1 -1
  12. sky/dashboard/out/_next/static/Gelsd19kVxXcX7aQQGsGu/_buildManifest.js +1 -0
  13. sky/dashboard/out/_next/static/chunks/1043-75af48ca5d5aaf57.js +1 -0
  14. sky/dashboard/out/_next/static/chunks/1141-8678a9102cc5f67e.js +11 -0
  15. sky/dashboard/out/_next/static/chunks/2622-951867535095b0eb.js +1 -0
  16. sky/dashboard/out/_next/static/chunks/3785.0a173cd4393f0fef.js +1 -0
  17. sky/dashboard/out/_next/static/chunks/9025.99f29acb7617963e.js +6 -0
  18. sky/dashboard/out/_next/static/chunks/{9984.78ee6d2c6fa4b0e8.js → 9984.c5564679e467d245.js} +1 -1
  19. sky/dashboard/out/_next/static/chunks/pages/{_app-a67ae198457b9886.js → _app-2a43ea3241bbdacd.js} +1 -1
  20. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/{[job]-fa63e8b1d203f298.js → [job]-7cb24da04ca00956.js} +1 -1
  21. sky/dashboard/out/_next/static/chunks/pages/clusters/{[cluster]-9e7df5fc761c95a7.js → [cluster]-1e95993124dbfc57.js} +1 -1
  22. sky/dashboard/out/_next/static/chunks/pages/clusters-47f1ddae13a2f8e4.js +1 -0
  23. sky/dashboard/out/_next/static/chunks/pages/config-d56e64f30db7b42e.js +1 -0
  24. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-2a44e70b500b6b70.js +1 -0
  25. sky/dashboard/out/_next/static/chunks/pages/infra-22faac9325016d83.js +1 -0
  26. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-90693cb88b5599a7.js +11 -0
  27. sky/dashboard/out/_next/static/chunks/pages/jobs-ab318e52eb4424a7.js +1 -0
  28. sky/dashboard/out/_next/static/chunks/pages/users-b90c865a690bfe84.js +1 -0
  29. sky/dashboard/out/_next/static/chunks/pages/volumes-7af733f5d7b6ed1c.js +1 -0
  30. sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-4d41c9023287f59a.js → [name]-35e0de5bca55e594.js} +1 -1
  31. sky/dashboard/out/_next/static/chunks/pages/workspaces-062525fb5462acb6.js +1 -0
  32. sky/dashboard/out/_next/static/chunks/webpack-387626669badf82e.js +1 -0
  33. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  34. sky/dashboard/out/clusters/[cluster].html +1 -1
  35. sky/dashboard/out/clusters.html +1 -1
  36. sky/dashboard/out/config.html +1 -1
  37. sky/dashboard/out/index.html +1 -1
  38. sky/dashboard/out/infra/[context].html +1 -1
  39. sky/dashboard/out/infra.html +1 -1
  40. sky/dashboard/out/jobs/[job].html +1 -1
  41. sky/dashboard/out/jobs.html +1 -1
  42. sky/dashboard/out/users.html +1 -1
  43. sky/dashboard/out/volumes.html +1 -1
  44. sky/dashboard/out/workspace/new.html +1 -1
  45. sky/dashboard/out/workspaces/[name].html +1 -1
  46. sky/dashboard/out/workspaces.html +1 -1
  47. sky/jobs/client/sdk_async.py +135 -0
  48. sky/jobs/utils.py +3 -1
  49. sky/provision/kubernetes/utils.py +62 -6
  50. sky/provision/nebius/instance.py +1 -0
  51. sky/provision/nebius/utils.py +9 -1
  52. sky/serve/client/sdk_async.py +130 -0
  53. sky/serve/constants.py +2 -1
  54. sky/serve/controller.py +2 -1
  55. sky/serve/load_balancer.py +3 -1
  56. sky/serve/serve_state.py +70 -5
  57. sky/serve/serve_utils.py +124 -22
  58. sky/serve/server/impl.py +22 -21
  59. sky/serve/service.py +8 -1
  60. sky/server/auth/__init__.py +0 -0
  61. sky/server/auth/authn.py +46 -0
  62. sky/server/auth/oauth2_proxy.py +185 -0
  63. sky/server/common.py +108 -17
  64. sky/server/constants.py +1 -1
  65. sky/server/daemons.py +60 -11
  66. sky/server/rest.py +114 -0
  67. sky/server/server.py +44 -40
  68. sky/setup_files/dependencies.py +2 -0
  69. sky/skylet/constants.py +2 -1
  70. sky/skylet/events.py +5 -1
  71. sky/skylet/skylet.py +3 -1
  72. sky/task.py +43 -10
  73. sky/templates/kubernetes-ray.yml.j2 +4 -0
  74. sky/templates/nebius-ray.yml.j2 +1 -0
  75. sky/utils/controller_utils.py +7 -0
  76. sky/utils/rich_utils.py +120 -0
  77. {skypilot_nightly-1.0.0.dev20250802.dist-info → skypilot_nightly-1.0.0.dev20250806.dist-info}/METADATA +5 -1
  78. {skypilot_nightly-1.0.0.dev20250802.dist-info → skypilot_nightly-1.0.0.dev20250806.dist-info}/RECORD +87 -82
  79. sky/dashboard/out/_next/static/2JNCZ4daQBotwWRNGi6aE/_buildManifest.js +0 -1
  80. sky/dashboard/out/_next/static/chunks/1043-928582d4860fef92.js +0 -1
  81. sky/dashboard/out/_next/static/chunks/1141-3f10a5a9f697c630.js +0 -11
  82. sky/dashboard/out/_next/static/chunks/3698-7874720877646365.js +0 -1
  83. sky/dashboard/out/_next/static/chunks/3785.95524bc443db8260.js +0 -1
  84. sky/dashboard/out/_next/static/chunks/6989-983d3ae7a874de98.js +0 -1
  85. sky/dashboard/out/_next/static/chunks/9025.7937c16bc8623516.js +0 -6
  86. sky/dashboard/out/_next/static/chunks/pages/clusters-956ad430075efee8.js +0 -1
  87. sky/dashboard/out/_next/static/chunks/pages/config-8620d099cbef8608.js +0 -1
  88. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-9cfd875eecb6eaf5.js +0 -1
  89. sky/dashboard/out/_next/static/chunks/pages/infra-0fbdc9072f19fbe2.js +0 -1
  90. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-6c5af4c86e6ab3d3.js +0 -11
  91. sky/dashboard/out/_next/static/chunks/pages/jobs-6393a9edc7322b54.js +0 -1
  92. sky/dashboard/out/_next/static/chunks/pages/users-34d6bb10c3b3ee3d.js +0 -1
  93. sky/dashboard/out/_next/static/chunks/pages/volumes-225c8dae0634eb7f.js +0 -1
  94. sky/dashboard/out/_next/static/chunks/pages/workspaces-e4cb7e97d37e93ad.js +0 -1
  95. sky/dashboard/out/_next/static/chunks/webpack-13145516b19858fb.js +0 -1
  96. /sky/dashboard/out/_next/static/{2JNCZ4daQBotwWRNGi6aE → Gelsd19kVxXcX7aQQGsGu}/_ssgManifest.js +0 -0
  97. /sky/dashboard/out/_next/static/chunks/{1871-7e17c195296e2ea9.js → 1871-ced1c14230cad6e1.js} +0 -0
  98. /sky/dashboard/out/_next/static/chunks/{6135-d0e285ac5f3f2485.js → 6135-2d7ed3350659d073.js} +0 -0
  99. /sky/dashboard/out/_next/static/chunks/{6601-234b1cf963c7280b.js → 6601-2109d22e7861861c.js} +0 -0
  100. /sky/dashboard/out/_next/static/chunks/{938-40d15b6261ec8dc1.js → 938-bda2685db5eae6cf.js} +0 -0
  101. {skypilot_nightly-1.0.0.dev20250802.dist-info → skypilot_nightly-1.0.0.dev20250806.dist-info}/WHEEL +0 -0
  102. {skypilot_nightly-1.0.0.dev20250802.dist-info → skypilot_nightly-1.0.0.dev20250806.dist-info}/entry_points.txt +0 -0
  103. {skypilot_nightly-1.0.0.dev20250802.dist-info → skypilot_nightly-1.0.0.dev20250806.dist-info}/licenses/LICENSE +0 -0
  104. {skypilot_nightly-1.0.0.dev20250802.dist-info → skypilot_nightly-1.0.0.dev20250806.dist-info}/top_level.txt +0 -0
sky/__init__.py CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
5
5
  import urllib.request
6
6
 
7
7
  # Replaced with the current commit when building the wheels.
8
- _SKYPILOT_COMMIT_SHA = '6ddfb3cd7d476b51f9309c547338407d6eca092c'
8
+ _SKYPILOT_COMMIT_SHA = '57784d97d8d63f3c87f4d5d22f3e820b10154241'
9
9
 
10
10
 
11
11
  def _get_git_commit():
@@ -35,7 +35,7 @@ def _get_git_commit():
35
35
 
36
36
 
37
37
  __commit__ = _get_git_commit()
38
- __version__ = '1.0.0.dev20250802'
38
+ __version__ = '1.0.0.dev20250806'
39
39
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
40
40
 
41
41
 
@@ -923,7 +923,10 @@ def write_cluster_config(
923
923
  cluster_config_overrides=cluster_config_overrides,
924
924
  cloud=cloud,
925
925
  context=region.name)
926
- kubernetes_utils.combine_metadata_fields(tmp_yaml_path, region.name)
926
+ kubernetes_utils.combine_metadata_fields(
927
+ tmp_yaml_path,
928
+ cluster_config_overrides=cluster_config_overrides,
929
+ context=region.name)
927
930
  yaml_obj = common_utils.read_yaml(tmp_yaml_path)
928
931
  pod_config: Dict[str, Any] = yaml_obj['available_node_types'][
929
932
  'ray_head_default']['node_config']
@@ -255,6 +255,14 @@ def _list_accelerators(
255
255
  # Get all the pods running on the node
256
256
  if (pod.spec.node_name == node.metadata.name and
257
257
  pod.status.phase in ['Running', 'Pending']):
258
+ # Skip pods that should not count against GPU count
259
+ if (kubernetes_utils.
260
+ should_exclude_pod_from_gpu_allocation(pod)):
261
+ logger.debug(
262
+ f'Excluding pod '
263
+ f'{pod.metadata.name} from GPU count '
264
+ f'calculations on node {node.metadata.name}')
265
+ continue
258
266
  # Iterate over all the containers in the pod and sum
259
267
  # the GPU requests
260
268
  for container in pod.spec.containers:
@@ -38,7 +38,6 @@ def get_hourly_cost(instance_type: str,
38
38
  region: Optional[str] = None,
39
39
  zone: Optional[str] = None) -> float:
40
40
  """Returns the cost, or the cheapest cost among all zones for spot."""
41
- assert not use_spot, 'Nebius does not support spot.'
42
41
  if zone is not None:
43
42
  with ux_utils.print_exception_no_traceback():
44
43
  raise ValueError('Nebius does not support zones.')
sky/client/cli/command.py CHANGED
@@ -62,7 +62,6 @@ from sky.provision.kubernetes import constants as kubernetes_constants
62
62
  from sky.provision.kubernetes import utils as kubernetes_utils
63
63
  from sky.server import common as server_common
64
64
  from sky.server import constants as server_constants
65
- from sky.server import versions
66
65
  from sky.server.requests import requests
67
66
  from sky.skylet import autostop_lib
68
67
  from sky.skylet import constants
@@ -557,17 +556,17 @@ def _parse_override_params(
557
556
  """Parses the override parameters into a dictionary."""
558
557
  override_params: Dict[str, Any] = {}
559
558
  if cloud is not None:
560
- if cloud.lower() == 'none':
559
+ if cloud.lower() == 'none' or cloud == '*':
561
560
  override_params['cloud'] = None
562
561
  else:
563
562
  override_params['cloud'] = registry.CLOUD_REGISTRY.from_str(cloud)
564
563
  if region is not None:
565
- if region.lower() == 'none':
564
+ if region.lower() == 'none' or region == '*':
566
565
  override_params['region'] = None
567
566
  else:
568
567
  override_params['region'] = region
569
568
  if zone is not None:
570
- if zone.lower() == 'none':
569
+ if zone.lower() == 'none' or zone == '*':
571
570
  override_params['zone'] = None
572
571
  else:
573
572
  override_params['zone'] = zone
@@ -965,9 +964,10 @@ def _handle_infra_cloud_region_zone_options(infra: Optional[str],
965
964
 
966
965
  if infra is not None:
967
966
  infra_info = infra_utils.InfraInfo.from_str(infra)
968
- cloud = infra_info.cloud
969
- region = infra_info.region
970
- zone = infra_info.zone
967
+ # Convert None to '*' to ensure proper override behavior
968
+ cloud = infra_info.cloud if infra_info.cloud is not None else '*'
969
+ region = infra_info.region if infra_info.region is not None else '*'
970
+ zone = infra_info.zone if infra_info.zone is not None else '*'
971
971
  return cloud, region, zone
972
972
 
973
973
 
@@ -1832,9 +1832,6 @@ def status(verbose: bool, refresh: bool, ip: bool, endpoints: bool,
1832
1832
  show_endpoints = endpoints or endpoint is not None
1833
1833
  show_single_endpoint = endpoint is not None
1834
1834
  show_services = show_services and not any([clusters, ip, endpoints])
1835
- remote_api_version = versions.get_remote_api_version()
1836
- if remote_api_version is None or remote_api_version < 12:
1837
- show_pools = False
1838
1835
 
1839
1836
  query_clusters: Optional[List[str]] = None if not clusters else clusters
1840
1837
  refresh_mode = common.StatusRefreshMode.NONE
@@ -1886,7 +1883,11 @@ def status(verbose: bool, refresh: bool, ip: bool, endpoints: bool,
1886
1883
  return serve_lib.status(service_names=None)
1887
1884
 
1888
1885
  def submit_pools() -> Optional[str]:
1889
- return managed_jobs.pool_status(pool_names=None)
1886
+ try:
1887
+ return managed_jobs.pool_status(pool_names=None)
1888
+ except exceptions.APINotSupportedError as e:
1889
+ logger.debug(f'Pools are not supported in the remote server: {e}')
1890
+ return None
1890
1891
 
1891
1892
  def submit_workspace() -> Optional[str]:
1892
1893
  try:
@@ -2009,7 +2010,7 @@ def status(verbose: bool, refresh: bool, ip: bool, endpoints: bool,
2009
2010
  controller_utils.Controllers.JOBS_CONTROLLER.value.
2010
2011
  in_progress_hint(False).format(job_info=job_info))
2011
2012
 
2012
- if show_pools:
2013
+ if show_pools and pool_status_request_id:
2013
2014
  num_pools = None
2014
2015
  if managed_jobs_query_interrupted:
2015
2016
  msg = 'KeyboardInterrupt'
@@ -3462,6 +3463,17 @@ def show_gpus(
3462
3463
  region,
3463
3464
  zone=None)
3464
3465
 
3466
+ # cloud and region could be '*' from _handle_infra_cloud_region_zone_options
3467
+ # which normally indicates to
3468
+ # _make_task_or_dag_from_entrypoint_with_overrides -> _parse_override_params
3469
+ # to disregard the cloud and region from the YAML.
3470
+ # In show_gpus, there is no YAML, so we need to handle the '*' value
3471
+ # directly here. We should use None instead to indicate "any".
3472
+ if cloud == '*':
3473
+ cloud = None
3474
+ if region == '*':
3475
+ region = None
3476
+
3465
3477
  # validation for the --region flag
3466
3478
  if region is not None and cloud is None:
3467
3479
  raise click.UsageError(
@@ -3502,7 +3514,14 @@ def show_gpus(
3502
3514
  (cloud_name is None or cloud_is_ssh))
3503
3515
 
3504
3516
  def _list_to_str(lst):
3505
- return ', '.join([str(e) for e in lst])
3517
+
3518
+ def format_number(n):
3519
+ # If it's a float that's a whole number, display as int
3520
+ if isinstance(n, float) and n.is_integer():
3521
+ return str(int(n))
3522
+ return str(n)
3523
+
3524
+ return ', '.join([format_number(n) for n in lst])
3506
3525
 
3507
3526
  # TODO(zhwu,romilb): We should move most of these kubernetes related
3508
3527
  # queries into the backend, especially behind the server.
sky/client/sdk.py CHANGED
@@ -88,14 +88,17 @@ def reload_config() -> None:
88
88
  skypilot_config.safe_reload_config()
89
89
 
90
90
 
91
- def stream_response(request_id: Optional[str],
91
+ def stream_response(request_id: Optional[server_common.RequestId],
92
92
  response: 'requests.Response',
93
93
  output_stream: Optional['io.TextIOBase'] = None,
94
94
  resumable: bool = False) -> Any:
95
95
  """Streams the response to the console.
96
96
 
97
97
  Args:
98
- request_id: The request ID.
98
+ request_id: The request ID of the request to stream. May be a full
99
+ request ID or a prefix.
100
+ If None, the latest request submitted to the API server is streamed.
101
+ Using None request_id is not recommended in multi-user environments.
99
102
  response: The HTTP response.
100
103
  output_stream: The output stream to write to. If None, print to the
101
104
  console.
@@ -1756,7 +1759,7 @@ def status_kubernetes() -> server_common.RequestId:
1756
1759
  # === API request APIs ===
1757
1760
  @usage_lib.entrypoint
1758
1761
  @annotations.client_api
1759
- def get(request_id: str) -> Any:
1762
+ def get(request_id: server_common.RequestId) -> Any:
1760
1763
  """Waits for and gets the result of a request.
1761
1764
 
1762
1765
  This function will not check the server health since /api/get is typically
@@ -1764,7 +1767,8 @@ def get(request_id: str) -> Any:
1764
1767
  may cause GET /api/get being sent to a restarted API server.
1765
1768
 
1766
1769
  Args:
1767
- request_id: The request ID of the request to get.
1770
+ request_id: The request ID of the request to get. May be a full request
1771
+ ID or a prefix.
1768
1772
 
1769
1773
  Returns:
1770
1774
  The ``Request Returns`` of the specified request. See the documentation
@@ -1818,7 +1822,7 @@ def get(request_id: str) -> Any:
1818
1822
  @server_common.check_server_healthy_or_start
1819
1823
  @annotations.client_api
1820
1824
  def stream_and_get(
1821
- request_id: Optional[str] = None,
1825
+ request_id: Optional[server_common.RequestId] = None,
1822
1826
  log_path: Optional[str] = None,
1823
1827
  tail: Optional[int] = None,
1824
1828
  follow: bool = True,
@@ -1830,7 +1834,10 @@ def stream_and_get(
1830
1834
  prefix of the full request id.
1831
1835
 
1832
1836
  Args:
1833
- request_id: The prefix of the request ID of the request to stream.
1837
+ request_id: The request ID of the request to stream. May be a full
1838
+ request ID or a prefix.
1839
+ If None, the latest request submitted to the API server is streamed.
1840
+ Using None request_id is not recommended in multi-user environments.
1834
1841
  log_path: The path to the log file to stream.
1835
1842
  tail: The number of lines to show from the end of the logs.
1836
1843
  If None, show all logs.
@@ -1873,7 +1880,8 @@ def stream_and_get(
1873
1880
 
1874
1881
  @usage_lib.entrypoint
1875
1882
  @annotations.client_api
1876
- def api_cancel(request_ids: Optional[Union[str, List[str]]] = None,
1883
+ def api_cancel(request_ids: Optional[Union[
1884
+ server_common.RequestId, List[server_common.RequestId]]] = None,
1877
1885
  all_users: bool = False,
1878
1886
  silent: bool = False) -> server_common.RequestId:
1879
1887
  """Aborts a request or all requests.
@@ -1938,7 +1946,7 @@ def _local_api_server_running(kill: bool = False) -> bool:
1938
1946
  @usage_lib.entrypoint
1939
1947
  @annotations.client_api
1940
1948
  def api_status(
1941
- request_ids: Optional[List[str]] = None,
1949
+ request_ids: Optional[List[server_common.RequestId]] = None,
1942
1950
  # pylint: disable=redefined-builtin
1943
1951
  all_status: bool = False
1944
1952
  ) -> List[payloads.RequestPayload]:
sky/client/sdk.pyi CHANGED
@@ -38,7 +38,7 @@ def reload_config() -> None:
38
38
  ...
39
39
 
40
40
 
41
- def stream_response(request_id: Optional[str],
41
+ def stream_response(request_id: Optional[server_common.RequestId],
42
42
  response: requests.Response,
43
43
  output_stream: Optional['io.TextIOBase'] = ...,
44
44
  resumable: bool = ...) -> Any:
@@ -249,11 +249,11 @@ def status_kubernetes() -> server_common.RequestId:
249
249
  ...
250
250
 
251
251
 
252
- def get(request_id: str) -> Any:
252
+ def get(request_id: server_common.RequestId) -> Any:
253
253
  ...
254
254
 
255
255
 
256
- def stream_and_get(request_id: Optional[str] = ...,
256
+ def stream_and_get(request_id: Optional[server_common.RequestId] = ...,
257
257
  log_path: Optional[str] = ...,
258
258
  tail: Optional[int] = ...,
259
259
  follow: bool = ...,
@@ -261,13 +261,14 @@ def stream_and_get(request_id: Optional[str] = ...,
261
261
  ...
262
262
 
263
263
 
264
- def api_cancel(request_ids: Optional[Union[str, List[str]]] = ...,
264
+ def api_cancel(request_ids: Optional[Union[
265
+ server_common.RequestId, List[server_common.RequestId]]] = ...,
265
266
  all_users: bool = ...,
266
267
  silent: bool = ...) -> server_common.RequestId:
267
268
  ...
268
269
 
269
270
 
270
- def api_status(request_ids: Optional[List[str]] = ...,
271
+ def api_status(request_ids: Optional[List[server_common.RequestId]] = ...,
271
272
  all_status: bool = ...) -> List[payloads.RequestPayload]:
272
273
  ...
273
274