skypilot-nightly 1.0.0.dev20250607__py3-none-any.whl → 1.0.0.dev20250610__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. sky/__init__.py +2 -2
  2. sky/admin_policy.py +3 -0
  3. sky/authentication.py +1 -7
  4. sky/backends/backend_utils.py +18 -2
  5. sky/backends/cloud_vm_ray_backend.py +9 -20
  6. sky/check.py +4 -3
  7. sky/cli.py +6 -9
  8. sky/client/cli.py +6 -9
  9. sky/client/sdk.py +49 -4
  10. sky/clouds/kubernetes.py +15 -24
  11. sky/core.py +3 -2
  12. sky/dashboard/out/404.html +1 -1
  13. sky/dashboard/out/_next/static/4lwUJxN6KwBqUxqO1VccB/_buildManifest.js +1 -0
  14. sky/dashboard/out/_next/static/chunks/211.692afc57e812ae1a.js +1 -0
  15. sky/dashboard/out/_next/static/chunks/350.9e123a4551f68b0d.js +1 -0
  16. sky/dashboard/out/_next/static/chunks/37-d8aebf1683522a0b.js +6 -0
  17. sky/dashboard/out/_next/static/chunks/42.d39e24467181b06b.js +6 -0
  18. sky/dashboard/out/_next/static/chunks/443.b2242d0efcdf5f47.js +1 -0
  19. sky/dashboard/out/_next/static/chunks/470-4d1a5dbe58a8a2b9.js +1 -0
  20. sky/dashboard/out/_next/static/chunks/{121-865d2bf8a3b84c6a.js → 491.b3d264269613fe09.js} +3 -3
  21. sky/dashboard/out/_next/static/chunks/513.211357a2914a34b2.js +1 -0
  22. sky/dashboard/out/_next/static/chunks/600.9cc76ec442b22e10.js +16 -0
  23. sky/dashboard/out/_next/static/chunks/616-d6128fa9e7cae6e6.js +39 -0
  24. sky/dashboard/out/_next/static/chunks/664-047bc03493fda379.js +1 -0
  25. sky/dashboard/out/_next/static/chunks/682.4dd5dc116f740b5f.js +6 -0
  26. sky/dashboard/out/_next/static/chunks/760-a89d354797ce7af5.js +1 -0
  27. sky/dashboard/out/_next/static/chunks/799-3625946b2ec2eb30.js +8 -0
  28. sky/dashboard/out/_next/static/chunks/804-4c9fc53aa74bc191.js +21 -0
  29. sky/dashboard/out/_next/static/chunks/843-6fcc4bf91ac45b39.js +11 -0
  30. sky/dashboard/out/_next/static/chunks/856-0776dc6ed6000c39.js +1 -0
  31. sky/dashboard/out/_next/static/chunks/901-b424d293275e1fd7.js +1 -0
  32. sky/dashboard/out/_next/static/chunks/938-a75b7712639298b7.js +1 -0
  33. sky/dashboard/out/_next/static/chunks/947-6620842ef80ae879.js +35 -0
  34. sky/dashboard/out/_next/static/chunks/969-20d54a9d998dc102.js +1 -0
  35. sky/dashboard/out/_next/static/chunks/973-c807fc34f09c7df3.js +1 -0
  36. sky/dashboard/out/_next/static/chunks/pages/_app-4768de0aede04dc9.js +20 -0
  37. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-89216c616dbaa9c5.js +6 -0
  38. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-451a14e7e755ebbc.js +6 -0
  39. sky/dashboard/out/_next/static/chunks/pages/clusters-e56b17fd85d0ba58.js +1 -0
  40. sky/dashboard/out/_next/static/chunks/pages/config-497a35a7ed49734a.js +1 -0
  41. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-d2910be98e9227cb.js +1 -0
  42. sky/dashboard/out/_next/static/chunks/pages/infra-780860bcc1103945.js +1 -0
  43. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-b3dbf38b51cb29be.js +16 -0
  44. sky/dashboard/out/_next/static/chunks/pages/jobs-fe233baf3d073491.js +1 -0
  45. sky/dashboard/out/_next/static/chunks/pages/users-c69ffcab9d6e5269.js +1 -0
  46. sky/dashboard/out/_next/static/chunks/pages/workspace/new-31aa8bdcb7592635.js +1 -0
  47. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-c8c2191328532b7d.js +1 -0
  48. sky/dashboard/out/_next/static/chunks/pages/workspaces-82e6601baa5dd280.js +1 -0
  49. sky/dashboard/out/_next/static/chunks/webpack-0574a5a4ba3cf0ac.js +1 -0
  50. sky/dashboard/out/_next/static/css/8b1c8321d4c02372.css +3 -0
  51. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  52. sky/dashboard/out/clusters/[cluster].html +1 -1
  53. sky/dashboard/out/clusters.html +1 -1
  54. sky/dashboard/out/config.html +1 -1
  55. sky/dashboard/out/index.html +1 -1
  56. sky/dashboard/out/infra/[context].html +1 -1
  57. sky/dashboard/out/infra.html +1 -1
  58. sky/dashboard/out/jobs/[job].html +1 -1
  59. sky/dashboard/out/jobs.html +1 -1
  60. sky/dashboard/out/users.html +1 -1
  61. sky/dashboard/out/workspace/new.html +1 -1
  62. sky/dashboard/out/workspaces/[name].html +1 -1
  63. sky/dashboard/out/workspaces.html +1 -1
  64. sky/exceptions.py +23 -0
  65. sky/global_user_state.py +192 -80
  66. sky/jobs/client/sdk.py +29 -21
  67. sky/jobs/server/core.py +9 -1
  68. sky/jobs/server/server.py +0 -95
  69. sky/jobs/utils.py +2 -1
  70. sky/models.py +18 -0
  71. sky/provision/kubernetes/constants.py +9 -0
  72. sky/provision/kubernetes/utils.py +106 -7
  73. sky/serve/client/sdk.py +56 -45
  74. sky/serve/server/core.py +1 -1
  75. sky/server/common.py +5 -7
  76. sky/server/constants.py +0 -2
  77. sky/server/requests/executor.py +60 -22
  78. sky/server/requests/payloads.py +3 -0
  79. sky/server/requests/process.py +69 -29
  80. sky/server/requests/requests.py +4 -3
  81. sky/server/server.py +23 -5
  82. sky/server/stream_utils.py +111 -55
  83. sky/skylet/constants.py +4 -2
  84. sky/skylet/job_lib.py +2 -1
  85. sky/skypilot_config.py +108 -25
  86. sky/users/model.conf +1 -1
  87. sky/users/permission.py +149 -32
  88. sky/users/rbac.py +26 -0
  89. sky/users/server.py +14 -13
  90. sky/utils/admin_policy_utils.py +9 -3
  91. sky/utils/common.py +6 -1
  92. sky/utils/common_utils.py +21 -3
  93. sky/utils/context.py +21 -1
  94. sky/utils/controller_utils.py +16 -1
  95. sky/utils/kubernetes/exec_kubeconfig_converter.py +19 -47
  96. sky/utils/schemas.py +9 -0
  97. sky/workspaces/core.py +100 -8
  98. sky/workspaces/server.py +15 -2
  99. sky/workspaces/utils.py +56 -0
  100. {skypilot_nightly-1.0.0.dev20250607.dist-info → skypilot_nightly-1.0.0.dev20250610.dist-info}/METADATA +1 -1
  101. {skypilot_nightly-1.0.0.dev20250607.dist-info → skypilot_nightly-1.0.0.dev20250610.dist-info}/RECORD +106 -94
  102. sky/dashboard/out/_next/static/1qG0HTmVilJPxQdBk0fX5/_buildManifest.js +0 -1
  103. sky/dashboard/out/_next/static/chunks/236-619ed0248fb6fdd9.js +0 -6
  104. sky/dashboard/out/_next/static/chunks/293-351268365226d251.js +0 -1
  105. sky/dashboard/out/_next/static/chunks/37-600191c5804dcae2.js +0 -6
  106. sky/dashboard/out/_next/static/chunks/470-ad1e0db3afcbd9c9.js +0 -1
  107. sky/dashboard/out/_next/static/chunks/614-635a84e87800f99e.js +0 -66
  108. sky/dashboard/out/_next/static/chunks/682-b60cfdacc15202e8.js +0 -6
  109. sky/dashboard/out/_next/static/chunks/843-c296541442d4af88.js +0 -11
  110. sky/dashboard/out/_next/static/chunks/856-3a32da4b84176f6d.js +0 -1
  111. sky/dashboard/out/_next/static/chunks/969-2c584e28e6b4b106.js +0 -1
  112. sky/dashboard/out/_next/static/chunks/973-6d78a0814682d771.js +0 -1
  113. sky/dashboard/out/_next/static/chunks/pages/_app-cb81dc4d27f4d009.js +0 -1
  114. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-18aed9b56247d074.js +0 -6
  115. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-b919a73aecdfa78f.js +0 -6
  116. sky/dashboard/out/_next/static/chunks/pages/clusters-4f6b9dd9abcb33ad.js +0 -1
  117. sky/dashboard/out/_next/static/chunks/pages/config-fe375a56342cf609.js +0 -6
  118. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-3a18d0eeb5119fe4.js +0 -1
  119. sky/dashboard/out/_next/static/chunks/pages/infra-a1a6abeeb58c1051.js +0 -1
  120. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1354e28c81eeb686.js +0 -16
  121. sky/dashboard/out/_next/static/chunks/pages/jobs-23bfc8bf373423db.js +0 -1
  122. sky/dashboard/out/_next/static/chunks/pages/users-5800045bd04e69c2.js +0 -16
  123. sky/dashboard/out/_next/static/chunks/pages/workspace/new-e1f9c0c3ff7ac4bd.js +0 -1
  124. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-686590e0ee4b2412.js +0 -1
  125. sky/dashboard/out/_next/static/chunks/pages/workspaces-76b07aa5da91b0df.js +0 -1
  126. sky/dashboard/out/_next/static/chunks/webpack-65d465f948974c0d.js +0 -1
  127. sky/dashboard/out/_next/static/css/667d941a2888ce6e.css +0 -3
  128. /sky/dashboard/out/_next/static/{1qG0HTmVilJPxQdBk0fX5 → 4lwUJxN6KwBqUxqO1VccB}/_ssgManifest.js +0 -0
  129. {skypilot_nightly-1.0.0.dev20250607.dist-info → skypilot_nightly-1.0.0.dev20250610.dist-info}/WHEEL +0 -0
  130. {skypilot_nightly-1.0.0.dev20250607.dist-info → skypilot_nightly-1.0.0.dev20250610.dist-info}/entry_points.txt +0 -0
  131. {skypilot_nightly-1.0.0.dev20250607.dist-info → skypilot_nightly-1.0.0.dev20250610.dist-info}/licenses/LICENSE +0 -0
  132. {skypilot_nightly-1.0.0.dev20250607.dist-info → skypilot_nightly-1.0.0.dev20250610.dist-info}/top_level.txt +0 -0
sky/jobs/client/sdk.py CHANGED
@@ -14,7 +14,9 @@ from sky.server import common as server_common
14
14
  from sky.server.requests import payloads
15
15
  from sky.skylet import constants
16
16
  from sky.usage import usage_lib
17
+ from sky.utils import admin_policy_utils
17
18
  from sky.utils import common_utils
19
+ from sky.utils import context
18
20
  from sky.utils import dag_utils
19
21
 
20
22
  if typing.TYPE_CHECKING:
@@ -29,6 +31,7 @@ else:
29
31
  logger = sky_logging.init_logger(__name__)
30
32
 
31
33
 
34
+ @context.contextual
32
35
  @usage_lib.entrypoint
33
36
  @server_common.check_server_healthy_or_start
34
37
  def launch(
@@ -65,27 +68,32 @@ def launch(
65
68
  """
66
69
 
67
70
  dag = dag_utils.convert_entrypoint_to_dag(task)
68
- sdk.validate(dag)
69
- if _need_confirmation:
70
- request_id = sdk.optimize(dag)
71
- sdk.stream_and_get(request_id)
72
- prompt = f'Launching a managed job {dag.name!r}. Proceed?'
73
- if prompt is not None:
74
- click.confirm(prompt, default=True, abort=True, show_default=True)
75
-
76
- dag = client_common.upload_mounts_to_api_server(dag)
77
- dag_str = dag_utils.dump_chain_dag_to_yaml_str(dag)
78
- body = payloads.JobsLaunchBody(
79
- task=dag_str,
80
- name=name,
81
- )
82
- response = requests.post(
83
- f'{server_common.get_server_url()}/jobs/launch',
84
- json=json.loads(body.model_dump_json()),
85
- timeout=(5, None),
86
- cookies=server_common.get_api_cookie_jar(),
87
- )
88
- return server_common.get_request_id(response)
71
+ with admin_policy_utils.apply_and_use_config_in_current_request(
72
+ dag, at_client_side=True) as dag:
73
+ sdk.validate(dag)
74
+ if _need_confirmation:
75
+ request_id = sdk.optimize(dag)
76
+ sdk.stream_and_get(request_id)
77
+ prompt = f'Launching a managed job {dag.name!r}. Proceed?'
78
+ if prompt is not None:
79
+ click.confirm(prompt,
80
+ default=True,
81
+ abort=True,
82
+ show_default=True)
83
+
84
+ dag = client_common.upload_mounts_to_api_server(dag)
85
+ dag_str = dag_utils.dump_chain_dag_to_yaml_str(dag)
86
+ body = payloads.JobsLaunchBody(
87
+ task=dag_str,
88
+ name=name,
89
+ )
90
+ response = requests.post(
91
+ f'{server_common.get_server_url()}/jobs/launch',
92
+ json=json.loads(body.model_dump_json()),
93
+ timeout=(5, None),
94
+ cookies=server_common.get_api_cookie_jar(),
95
+ )
96
+ return server_common.get_request_id(response)
89
97
 
90
98
 
91
99
  @usage_lib.entrypoint
sky/jobs/server/core.py CHANGED
@@ -37,6 +37,7 @@ from sky.utils import status_lib
37
37
  from sky.utils import subprocess_utils
38
38
  from sky.utils import timeline
39
39
  from sky.utils import ux_utils
40
+ from sky.workspaces import core as workspaces_core
40
41
 
41
42
  if typing.TYPE_CHECKING:
42
43
  import sky
@@ -244,7 +245,7 @@ def launch(
244
245
 
245
246
  # Launch with the api server's user hash, so that sky status does not
246
247
  # show the owner of the controller as whatever user launched it first.
247
- with common.with_server_user_hash():
248
+ with common.with_server_user():
248
249
  # Always launch the controller in the default workspace.
249
250
  with skypilot_config.local_active_workspace_ctx(
250
251
  skylet_constants.SKYPILOT_DEFAULT_WORKSPACE):
@@ -455,6 +456,13 @@ def queue(refresh: bool,
455
456
 
456
457
  jobs = list(filter(user_hash_matches_or_missing, jobs))
457
458
 
459
+ accessible_workspaces = workspaces_core.get_workspaces()
460
+ jobs = list(
461
+ filter(
462
+ lambda job: job.get('workspace', skylet_constants.
463
+ SKYPILOT_DEFAULT_WORKSPACE) in
464
+ accessible_workspaces, jobs))
465
+
458
466
  if skip_finished:
459
467
  # Filter out the finished jobs. If a multi-task job is partially
460
468
  # finished, we will include all its tasks.
sky/jobs/server/server.py CHANGED
@@ -1,12 +1,9 @@
1
1
  """REST API for managed jobs."""
2
- import os
3
2
 
4
3
  import fastapi
5
- import httpx
6
4
 
7
5
  from sky import sky_logging
8
6
  from sky.jobs.server import core
9
- from sky.jobs.server import dashboard_utils
10
7
  from sky.server import common as server_common
11
8
  from sky.server import stream_utils
12
9
  from sky.server.requests import executor
@@ -14,7 +11,6 @@ from sky.server.requests import payloads
14
11
  from sky.server.requests import requests as api_requests
15
12
  from sky.skylet import constants
16
13
  from sky.utils import common
17
- from sky.utils import common_utils
18
14
 
19
15
  logger = sky_logging.init_logger(__name__)
20
16
 
@@ -110,94 +106,3 @@ async def download_logs(
110
106
  if jobs_download_logs_body.refresh else api_requests.ScheduleType.SHORT,
111
107
  request_cluster_name=common.JOB_CONTROLLER_NAME,
112
108
  )
113
-
114
-
115
- @router.get('/dashboard')
116
- async def dashboard(request: fastapi.Request,
117
- user_hash: str) -> fastapi.Response:
118
- # TODO(cooperc): Support showing only jobs for a specific user.
119
-
120
- # FIX(zhwu/cooperc/eric): Fix log downloading (assumes global
121
- # /download_log/xx route)
122
-
123
- # Note: before #4717, each user had their own controller, and thus their own
124
- # dashboard. Now, all users share the same controller, so this isn't really
125
- # necessary. TODO(cooperc): clean up.
126
-
127
- # TODO: Put this in an executor to avoid blocking the main server thread.
128
- # It can take a long time if it needs to check the controller status.
129
-
130
- # Find the port for the dashboard of the user
131
- os.environ[constants.USER_ID_ENV_VAR] = user_hash
132
- server_common.reload_for_new_request(client_entrypoint=None,
133
- client_command=None,
134
- using_remote_api_server=False)
135
- logger.info(f'Starting dashboard for user hash: {user_hash}')
136
-
137
- with dashboard_utils.get_dashboard_lock_for_user(user_hash):
138
- max_retries = 3
139
- for attempt in range(max_retries):
140
- port, pid = dashboard_utils.get_dashboard_session(user_hash)
141
- if port == 0 or attempt > 0:
142
- # Let the client know that we are waiting for starting the
143
- # dashboard.
144
- try:
145
- port, pid = core.start_dashboard_forwarding()
146
- except Exception as e: # pylint: disable=broad-except
147
- # We catch all exceptions to gracefully handle unknown
148
- # errors and raise an HTTPException to the client.
149
- msg = (
150
- 'Dashboard failed to start: '
151
- f'{common_utils.format_exception(e, use_bracket=True)}')
152
- logger.error(msg)
153
- raise fastapi.HTTPException(status_code=503, detail=msg)
154
- dashboard_utils.add_dashboard_session(user_hash, port, pid)
155
-
156
- # Assuming the dashboard is forwarded to localhost on the API server
157
- dashboard_url = f'http://localhost:{port}'
158
- try:
159
- # Ping the dashboard to check if it's still running
160
- async with httpx.AsyncClient() as client:
161
- response = await client.request('GET',
162
- dashboard_url,
163
- timeout=5)
164
- if response.is_success:
165
- break # Connection successful, proceed with the request
166
- # Raise an HTTPException here which will be caught by the
167
- # following except block to retry with new connection
168
- response.raise_for_status()
169
- except Exception as e: # pylint: disable=broad-except
170
- # We catch all exceptions to gracefully handle unknown
171
- # errors and retry or raise an HTTPException to the client.
172
- # Assume an exception indicates that the dashboard connection
173
- # is stale - remove it so that a new one is created.
174
- dashboard_utils.remove_dashboard_session(user_hash)
175
- msg = (
176
- f'Dashboard connection attempt {attempt + 1} failed with '
177
- f'{common_utils.format_exception(e, use_bracket=True)}')
178
- logger.info(msg)
179
- if attempt == max_retries - 1:
180
- raise fastapi.HTTPException(status_code=503, detail=msg)
181
-
182
- # Create a client session to forward the request
183
- try:
184
- async with httpx.AsyncClient() as client:
185
- # Make the request and get the response
186
- response = await client.request(
187
- method='GET',
188
- url=f'{dashboard_url}',
189
- headers=request.headers.raw,
190
- )
191
-
192
- # Create a new response with the content already read
193
- content = await response.aread()
194
- return fastapi.Response(
195
- content=content,
196
- status_code=response.status_code,
197
- headers=dict(response.headers),
198
- media_type=response.headers.get('content-type'))
199
- except Exception as e:
200
- msg = (f'Failed to forward request to dashboard: '
201
- f'{common_utils.format_exception(e, use_bracket=True)}')
202
- logger.error(msg)
203
- raise fastapi.HTTPException(status_code=502, detail=msg)
sky/jobs/utils.py CHANGED
@@ -1025,7 +1025,8 @@ def load_managed_job_queue(payload: str) -> List[Dict[str, Any]]:
1025
1025
  if 'user_hash' in job and job['user_hash'] is not None:
1026
1026
  # Skip jobs that do not have user_hash info.
1027
1027
  # TODO(cooperc): Remove check before 0.12.0.
1028
- job['user_name'] = global_user_state.get_user(job['user_hash']).name
1028
+ user = global_user_state.get_user(job['user_hash'])
1029
+ job['user_name'] = user.name if user is not None else None
1029
1030
  return jobs
1030
1031
 
1031
1032
 
sky/models.py CHANGED
@@ -2,8 +2,13 @@
2
2
 
3
3
  import collections
4
4
  import dataclasses
5
+ import getpass
6
+ import os
5
7
  from typing import Any, Dict, Optional
6
8
 
9
+ from sky.skylet import constants
10
+ from sky.utils import common_utils
11
+
7
12
 
8
13
  @dataclasses.dataclass
9
14
  class User:
@@ -16,6 +21,19 @@ class User:
16
21
  def to_dict(self) -> Dict[str, Any]:
17
22
  return {'id': self.id, 'name': self.name}
18
23
 
24
+ def to_env_vars(self) -> Dict[str, Any]:
25
+ return {
26
+ constants.USER_ID_ENV_VAR: self.id,
27
+ constants.USER_ENV_VAR: self.name,
28
+ }
29
+
30
+ @classmethod
31
+ def get_current_user(cls) -> 'User':
32
+ """Returns the current user."""
33
+ user_name = os.getenv(constants.USER_ENV_VAR, getpass.getuser())
34
+ user_hash = common_utils.get_user_hash()
35
+ return User(id=user_hash, name=user_name)
36
+
19
37
 
20
38
  RealtimeGpuAvailability = collections.namedtuple(
21
39
  'RealtimeGpuAvailability', ['gpu', 'counts', 'capacity', 'available'])
@@ -6,3 +6,12 @@ NO_GPU_HELP_MESSAGE = ('If your cluster contains GPUs, make sure '
6
6
  '(e.g., skypilot.co/accelerator) are setup correctly. ')
7
7
 
8
8
  KUBERNETES_IN_CLUSTER_NAMESPACE_ENV_VAR = 'SKYPILOT_IN_CLUSTER_NAMESPACE'
9
+
10
+ # Name of kubernetes exec auth wrapper script
11
+ SKY_K8S_EXEC_AUTH_WRAPPER = 'sky-kube-exec-wrapper'
12
+
13
+ # PATH envvar for kubectl exec auth execve
14
+ SKY_K8S_EXEC_AUTH_PATH = '$HOME/skypilot-runtime/bin:$HOME/google-cloud-sdk/bin:$PATH' # pylint: disable=line-too-long
15
+
16
+ # cache directory for kubeconfig with modified exec auth
17
+ SKY_K8S_EXEC_AUTH_KUBECONFIG_CACHE = '~/.sky/generated/kubeconfigs'
@@ -1,6 +1,7 @@
1
1
  """Kubernetes utilities for SkyPilot."""
2
2
  import dataclasses
3
3
  import functools
4
+ import hashlib
4
5
  import json
5
6
  import math
6
7
  import os
@@ -1555,11 +1556,11 @@ def is_kubeconfig_exec_auth(
1555
1556
  == schemas.RemoteIdentityOptions.LOCAL_CREDENTIALS.value):
1556
1557
  ctx_name = context_obj['name']
1557
1558
  exec_msg = ('exec-based authentication is used for '
1558
- f'Kubernetes context {ctx_name!r}.'
1559
- ' This may cause issues with autodown or when running '
1560
- 'Managed Jobs or SkyServe controller on Kubernetes. '
1561
- 'To fix, configure SkyPilot to create a service account '
1562
- 'for running pods by setting the following in '
1559
+ f'Kubernetes context {ctx_name!r}. '
1560
+ 'Make sure that the corresponding cloud provider is '
1561
+ 'also enabled through `sky check` (e.g.: GCP for GKE). '
1562
+ 'Alternatively, configure SkyPilot to create a service '
1563
+ 'account for running pods by setting the following in '
1563
1564
  '~/.sky/config.yaml:\n'
1564
1565
  ' kubernetes:\n'
1565
1566
  ' remote_identity: SERVICE_ACCOUNT\n'
@@ -2877,8 +2878,8 @@ def get_context_from_config(provider_config: Dict[str, Any]) -> Optional[str]:
2877
2878
  context = provider_config.get('context',
2878
2879
  get_current_kube_config_context_name())
2879
2880
  if context == kubernetes.in_cluster_context_name():
2880
- # If the context (also used as the region) is in-cluster, we need to
2881
- # we need to use in-cluster auth by setting the context to None.
2881
+ # If the context (also used as the region) is in-cluster, we need
2882
+ # to use in-cluster auth by setting the context to None.
2882
2883
  context = None
2883
2884
  return context
2884
2885
 
@@ -3135,3 +3136,101 @@ def get_kubeconfig_paths() -> List[str]:
3135
3136
  for path in paths.split(kubernetes.ENV_KUBECONFIG_PATH_SEPARATOR):
3136
3137
  expanded.append(os.path.expanduser(path))
3137
3138
  return expanded
3139
+
3140
+
3141
+ def format_kubeconfig_exec_auth(config: Any,
3142
+ output_path: str,
3143
+ inject_wrapper: bool = True) -> bool:
3144
+ """Reformat the kubeconfig so that exec-based authentication can be used
3145
+ with SkyPilot. Will create a new kubeconfig file under <output_path>
3146
+ regardless of whether a change has been made.
3147
+
3148
+ kubectl internally strips all environment variables except for system
3149
+ defaults. If `inject_wrapper` is true, a wrapper executable is applied
3150
+ to inject the relevant PATH information before exec-auth is executed.
3151
+
3152
+ Contents of sky-kube-exec-wrapper:
3153
+
3154
+ #!/bin/bash
3155
+ export PATH="$HOME/skypilot-runtime/bin:$HOME/google-cloud-sdk:$PATH"
3156
+ exec "$@"
3157
+
3158
+ refer to `skylet/constants.py` for more information.
3159
+
3160
+ Args:
3161
+ config (dict): kubeconfig parsed by yaml.safe_load
3162
+ output_path (str): Path where the potentially modified kubeconfig file
3163
+ will be saved
3164
+ inject_wrapper (bool): Whether to inject the wrapper script
3165
+ Returns: whether config was updated, for logging purposes
3166
+ """
3167
+ updated = False
3168
+ for user in config.get('users', []):
3169
+ exec_info = user.get('user', {}).get('exec', {})
3170
+ current_command = exec_info.get('command', '')
3171
+
3172
+ if current_command:
3173
+ # Strip the path and keep only the executable name
3174
+ executable = os.path.basename(current_command)
3175
+ if executable == kubernetes_constants.SKY_K8S_EXEC_AUTH_WRAPPER:
3176
+ # we don't want this happening recursively.
3177
+ continue
3178
+
3179
+ if inject_wrapper:
3180
+ exec_info[
3181
+ 'command'] = kubernetes_constants.SKY_K8S_EXEC_AUTH_WRAPPER
3182
+ if exec_info.get('args') is None:
3183
+ exec_info['args'] = []
3184
+ exec_info['args'].insert(0, executable)
3185
+ updated = True
3186
+ elif executable != current_command:
3187
+ exec_info['command'] = executable
3188
+ updated = True
3189
+
3190
+ # Handle Nebius kubeconfigs: change --profile to 'sky'
3191
+ if executable == 'nebius':
3192
+ args = exec_info.get('args', [])
3193
+ if args and '--profile' in args:
3194
+ try:
3195
+ profile_index = args.index('--profile')
3196
+ if profile_index + 1 < len(args):
3197
+ old_profile = args[profile_index + 1]
3198
+ if old_profile != 'sky':
3199
+ args[profile_index + 1] = 'sky'
3200
+ updated = True
3201
+ except ValueError:
3202
+ pass
3203
+
3204
+ os.makedirs(os.path.dirname(os.path.expanduser(output_path)), exist_ok=True)
3205
+ with open(output_path, 'w', encoding='utf-8') as file:
3206
+ yaml.safe_dump(config, file)
3207
+
3208
+ return updated
3209
+
3210
+
3211
+ def format_kubeconfig_exec_auth_with_cache(kubeconfig_path: str) -> str:
3212
+ """Reformat the kubeconfig file or retrieve it from cache if it has already
3213
+ been formatted before. Store it in the cache directory if necessary.
3214
+
3215
+ Having a cache for this is good if users spawn an extreme number of jobs
3216
+ concurrently.
3217
+
3218
+ Args:
3219
+ kubeconfig_path (str): kubeconfig path
3220
+ Returns: updated kubeconfig path
3221
+ """
3222
+ # TODO(kyuds): GC cache files
3223
+ with open(kubeconfig_path, 'r', encoding='utf-8') as file:
3224
+ config = yaml.safe_load(file)
3225
+ normalized = yaml.dump(config, sort_keys=True)
3226
+ hashed = hashlib.sha1(normalized.encode('utf-8')).hexdigest()
3227
+ path = os.path.expanduser(
3228
+ f'{kubernetes_constants.SKY_K8S_EXEC_AUTH_KUBECONFIG_CACHE}/{hashed}.yaml'
3229
+ )
3230
+
3231
+ # If we have already converted the same kubeconfig before, just return.
3232
+ if os.path.isfile(path):
3233
+ return path
3234
+
3235
+ format_kubeconfig_exec_auth(config, path)
3236
+ return path
sky/serve/client/sdk.py CHANGED
@@ -10,6 +10,8 @@ from sky.client import common as client_common
10
10
  from sky.server import common as server_common
11
11
  from sky.server.requests import payloads
12
12
  from sky.usage import usage_lib
13
+ from sky.utils import admin_policy_utils
14
+ from sky.utils import context
13
15
  from sky.utils import dag_utils
14
16
 
15
17
  if typing.TYPE_CHECKING:
@@ -23,6 +25,7 @@ else:
23
25
  requests = adaptors_common.LazyImport('requests')
24
26
 
25
27
 
28
+ @context.contextual
26
29
  @usage_lib.entrypoint
27
30
  @server_common.check_server_healthy_or_start
28
31
  def up(
@@ -55,30 +58,36 @@ def up(
55
58
  from sky.client import sdk # pylint: disable=import-outside-toplevel
56
59
 
57
60
  dag = dag_utils.convert_entrypoint_to_dag(task)
58
- sdk.validate(dag)
59
- request_id = sdk.optimize(dag)
60
- sdk.stream_and_get(request_id)
61
- if _need_confirmation:
62
- prompt = f'Launching a new service {service_name!r}. Proceed?'
63
- if prompt is not None:
64
- click.confirm(prompt, default=True, abort=True, show_default=True)
65
-
66
- dag = client_common.upload_mounts_to_api_server(dag)
67
- dag_str = dag_utils.dump_chain_dag_to_yaml_str(dag)
68
-
69
- body = payloads.ServeUpBody(
70
- task=dag_str,
71
- service_name=service_name,
72
- )
73
- response = requests.post(
74
- f'{server_common.get_server_url()}/serve/up',
75
- json=json.loads(body.model_dump_json()),
76
- timeout=(5, None),
77
- cookies=server_common.get_api_cookie_jar(),
78
- )
79
- return server_common.get_request_id(response)
61
+ with admin_policy_utils.apply_and_use_config_in_current_request(
62
+ dag, at_client_side=True) as dag:
63
+ sdk.validate(dag)
64
+ request_id = sdk.optimize(dag)
65
+ sdk.stream_and_get(request_id)
66
+ if _need_confirmation:
67
+ prompt = f'Launching a new service {service_name!r}. Proceed?'
68
+ if prompt is not None:
69
+ click.confirm(prompt,
70
+ default=True,
71
+ abort=True,
72
+ show_default=True)
73
+
74
+ dag = client_common.upload_mounts_to_api_server(dag)
75
+ dag_str = dag_utils.dump_chain_dag_to_yaml_str(dag)
76
+
77
+ body = payloads.ServeUpBody(
78
+ task=dag_str,
79
+ service_name=service_name,
80
+ )
81
+ response = requests.post(
82
+ f'{server_common.get_server_url()}/serve/up',
83
+ json=json.loads(body.model_dump_json()),
84
+ timeout=(5, None),
85
+ cookies=server_common.get_api_cookie_jar(),
86
+ )
87
+ return server_common.get_request_id(response)
80
88
 
81
89
 
90
+ @context.contextual
82
91
  @usage_lib.entrypoint
83
92
  @server_common.check_server_healthy_or_start
84
93
  def update(
@@ -112,30 +121,32 @@ def update(
112
121
  from sky.client import sdk # pylint: disable=import-outside-toplevel
113
122
 
114
123
  dag = dag_utils.convert_entrypoint_to_dag(task)
115
- sdk.validate(dag)
116
- request_id = sdk.optimize(dag)
117
- sdk.stream_and_get(request_id)
118
- if _need_confirmation:
119
- click.confirm(f'Updating service {service_name!r}. Proceed?',
120
- default=True,
121
- abort=True,
122
- show_default=True)
123
-
124
- dag = client_common.upload_mounts_to_api_server(dag)
125
- dag_str = dag_utils.dump_chain_dag_to_yaml_str(dag)
126
- body = payloads.ServeUpdateBody(
127
- task=dag_str,
128
- service_name=service_name,
129
- mode=mode,
130
- )
124
+ with admin_policy_utils.apply_and_use_config_in_current_request(
125
+ dag, at_client_side=True) as dag:
126
+ sdk.validate(dag)
127
+ request_id = sdk.optimize(dag)
128
+ sdk.stream_and_get(request_id)
129
+ if _need_confirmation:
130
+ click.confirm(f'Updating service {service_name!r}. Proceed?',
131
+ default=True,
132
+ abort=True,
133
+ show_default=True)
134
+
135
+ dag = client_common.upload_mounts_to_api_server(dag)
136
+ dag_str = dag_utils.dump_chain_dag_to_yaml_str(dag)
137
+ body = payloads.ServeUpdateBody(
138
+ task=dag_str,
139
+ service_name=service_name,
140
+ mode=mode,
141
+ )
131
142
 
132
- response = requests.post(
133
- f'{server_common.get_server_url()}/serve/update',
134
- json=json.loads(body.model_dump_json()),
135
- timeout=(5, None),
136
- cookies=server_common.get_api_cookie_jar(),
137
- )
138
- return server_common.get_request_id(response)
143
+ response = requests.post(
144
+ f'{server_common.get_server_url()}/serve/update',
145
+ json=json.loads(body.model_dump_json()),
146
+ timeout=(5, None),
147
+ cookies=server_common.get_api_cookie_jar(),
148
+ )
149
+ return server_common.get_request_id(response)
139
150
 
140
151
 
141
152
  @usage_lib.entrypoint
sky/serve/server/core.py CHANGED
@@ -221,7 +221,7 @@ def up(
221
221
  # for the first time; otherwise it is a name conflict.
222
222
  # Since the controller may be shared among multiple users, launch the
223
223
  # controller with the API server's user hash.
224
- with common.with_server_user_hash():
224
+ with common.with_server_user():
225
225
  with skypilot_config.local_active_workspace_ctx(
226
226
  constants.SKYPILOT_DEFAULT_WORKSPACE):
227
227
  controller_job_id, controller_handle = execution.launch(
sky/server/common.py CHANGED
@@ -39,6 +39,7 @@ if typing.TYPE_CHECKING:
39
39
  import requests
40
40
 
41
41
  from sky import dag as dag_lib
42
+ from sky import models
42
43
  else:
43
44
  pydantic = adaptors_common.LazyImport('pydantic')
44
45
  requests = adaptors_common.LazyImport('requests')
@@ -419,11 +420,7 @@ def _start_api_server(deploy: bool = False,
419
420
  dashboard_msg += (
420
421
  'Dashboard may be stale when installed from source, '
421
422
  'to rebuild: npm --prefix sky/dashboard install '
422
- '&& npm --prefix sky/dashboard run build\n')
423
- dashboard_msg += (
424
- f'{ux_utils.INDENT_LAST_SYMBOL}{colorama.Fore.GREEN}'
425
- f'Dashboard: {get_dashboard_url(server_url)}')
426
- dashboard_msg += f'{colorama.Style.RESET_ALL}'
423
+ '&& npm --prefix sky/dashboard run build')
427
424
  logger.info(
428
425
  ux_utils.finishing_message(
429
426
  f'SkyPilot API server started. {dashboard_msg}'))
@@ -710,7 +707,7 @@ def request_body_to_params(body: 'pydantic.BaseModel') -> Dict[str, Any]:
710
707
 
711
708
  def reload_for_new_request(client_entrypoint: Optional[str],
712
709
  client_command: Optional[str],
713
- using_remote_api_server: bool):
710
+ using_remote_api_server: bool, user: 'models.User'):
714
711
  """Reload modules, global variables, and usage message for a new request."""
715
712
  # This should be called first to make sure the logger is up-to-date.
716
713
  sky_logging.reload_logger()
@@ -719,10 +716,11 @@ def reload_for_new_request(client_entrypoint: Optional[str],
719
716
  skypilot_config.safe_reload_config()
720
717
 
721
718
  # Reset the client entrypoint and command for the usage message.
722
- common_utils.set_client_status(
719
+ common_utils.set_request_context(
723
720
  client_entrypoint=client_entrypoint,
724
721
  client_command=client_command,
725
722
  using_remote_api_server=using_remote_api_server,
723
+ user=user,
726
724
  )
727
725
 
728
726
  # Clear cache should be called before reload_logger and usage reset,
sky/server/constants.py CHANGED
@@ -11,8 +11,6 @@ API_VERSION = '9'
11
11
 
12
12
  # Prefix for API request names.
13
13
  REQUEST_NAME_PREFIX = 'sky.'
14
- # The user ID of the SkyPilot system.
15
- SKYPILOT_SYSTEM_USER_ID = 'skypilot-system'
16
14
  # The memory (GB) that SkyPilot tries to not use to prevent OOM.
17
15
  MIN_AVAIL_MEM_GB = 2
18
16
  # Default encoder/decoder handler name.