skypilot-nightly 1.0.0.dev20250609__py3-none-any.whl → 1.0.0.dev20250611__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. sky/__init__.py +2 -2
  2. sky/admin_policy.py +134 -5
  3. sky/authentication.py +1 -7
  4. sky/backends/cloud_vm_ray_backend.py +9 -20
  5. sky/benchmark/benchmark_state.py +39 -1
  6. sky/cli.py +3 -5
  7. sky/client/cli.py +3 -5
  8. sky/client/sdk.py +49 -4
  9. sky/clouds/kubernetes.py +15 -24
  10. sky/dashboard/out/404.html +1 -1
  11. sky/dashboard/out/_next/static/chunks/211.692afc57e812ae1a.js +1 -0
  12. sky/dashboard/out/_next/static/chunks/350.9e123a4551f68b0d.js +1 -0
  13. sky/dashboard/out/_next/static/chunks/37-d8aebf1683522a0b.js +6 -0
  14. sky/dashboard/out/_next/static/chunks/42.d39e24467181b06b.js +6 -0
  15. sky/dashboard/out/_next/static/chunks/443.b2242d0efcdf5f47.js +1 -0
  16. sky/dashboard/out/_next/static/chunks/470-4d1a5dbe58a8a2b9.js +1 -0
  17. sky/dashboard/out/_next/static/chunks/{121-865d2bf8a3b84c6a.js → 491.b3d264269613fe09.js} +3 -3
  18. sky/dashboard/out/_next/static/chunks/513.211357a2914a34b2.js +1 -0
  19. sky/dashboard/out/_next/static/chunks/600.15a0009177e86b86.js +16 -0
  20. sky/dashboard/out/_next/static/chunks/616-d6128fa9e7cae6e6.js +39 -0
  21. sky/dashboard/out/_next/static/chunks/664-047bc03493fda379.js +1 -0
  22. sky/dashboard/out/_next/static/chunks/682.4dd5dc116f740b5f.js +6 -0
  23. sky/dashboard/out/_next/static/chunks/760-a89d354797ce7af5.js +1 -0
  24. sky/dashboard/out/_next/static/chunks/799-3625946b2ec2eb30.js +8 -0
  25. sky/dashboard/out/_next/static/chunks/804-4c9fc53aa74bc191.js +21 -0
  26. sky/dashboard/out/_next/static/chunks/843-6fcc4bf91ac45b39.js +11 -0
  27. sky/dashboard/out/_next/static/chunks/856-0776dc6ed6000c39.js +1 -0
  28. sky/dashboard/out/_next/static/chunks/901-b424d293275e1fd7.js +1 -0
  29. sky/dashboard/out/_next/static/chunks/938-ab185187a63f9cdb.js +1 -0
  30. sky/dashboard/out/_next/static/chunks/947-6620842ef80ae879.js +35 -0
  31. sky/dashboard/out/_next/static/chunks/969-20d54a9d998dc102.js +1 -0
  32. sky/dashboard/out/_next/static/chunks/973-c807fc34f09c7df3.js +1 -0
  33. sky/dashboard/out/_next/static/chunks/pages/_app-7bbd9d39d6f9a98a.js +20 -0
  34. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-89216c616dbaa9c5.js +6 -0
  35. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-451a14e7e755ebbc.js +6 -0
  36. sky/dashboard/out/_next/static/chunks/pages/clusters-e56b17fd85d0ba58.js +1 -0
  37. sky/dashboard/out/_next/static/chunks/pages/config-497a35a7ed49734a.js +1 -0
  38. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-d2910be98e9227cb.js +1 -0
  39. sky/dashboard/out/_next/static/chunks/pages/infra-780860bcc1103945.js +1 -0
  40. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-b3dbf38b51cb29be.js +16 -0
  41. sky/dashboard/out/_next/static/chunks/pages/jobs-fe233baf3d073491.js +1 -0
  42. sky/dashboard/out/_next/static/chunks/pages/users-c69ffcab9d6e5269.js +1 -0
  43. sky/dashboard/out/_next/static/chunks/pages/workspace/new-31aa8bdcb7592635.js +1 -0
  44. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-c8c2191328532b7d.js +1 -0
  45. sky/dashboard/out/_next/static/chunks/pages/workspaces-82e6601baa5dd280.js +1 -0
  46. sky/dashboard/out/_next/static/chunks/webpack-208a9812ab4f61c9.js +1 -0
  47. sky/dashboard/out/_next/static/css/{8b1c8321d4c02372.css → 5d71bfc09f184bab.css} +1 -1
  48. sky/dashboard/out/_next/static/zJqasksBQ3HcqMpA2wTUZ/_buildManifest.js +1 -0
  49. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  50. sky/dashboard/out/clusters/[cluster].html +1 -1
  51. sky/dashboard/out/clusters.html +1 -1
  52. sky/dashboard/out/config.html +1 -1
  53. sky/dashboard/out/index.html +1 -1
  54. sky/dashboard/out/infra/[context].html +1 -1
  55. sky/dashboard/out/infra.html +1 -1
  56. sky/dashboard/out/jobs/[job].html +1 -1
  57. sky/dashboard/out/jobs.html +1 -1
  58. sky/dashboard/out/users.html +1 -1
  59. sky/dashboard/out/workspace/new.html +1 -1
  60. sky/dashboard/out/workspaces/[name].html +1 -1
  61. sky/dashboard/out/workspaces.html +1 -1
  62. sky/exceptions.py +18 -0
  63. sky/global_user_state.py +181 -74
  64. sky/jobs/client/sdk.py +29 -21
  65. sky/jobs/scheduler.py +4 -5
  66. sky/jobs/state.py +104 -11
  67. sky/jobs/utils.py +5 -5
  68. sky/provision/kubernetes/constants.py +9 -0
  69. sky/provision/kubernetes/utils.py +106 -7
  70. sky/serve/client/sdk.py +56 -45
  71. sky/server/common.py +1 -5
  72. sky/server/requests/executor.py +50 -20
  73. sky/server/requests/payloads.py +3 -0
  74. sky/server/requests/process.py +69 -29
  75. sky/server/server.py +1 -0
  76. sky/server/stream_utils.py +111 -55
  77. sky/skylet/constants.py +1 -2
  78. sky/skylet/job_lib.py +95 -40
  79. sky/skypilot_config.py +99 -25
  80. sky/users/permission.py +34 -17
  81. sky/utils/admin_policy_utils.py +41 -16
  82. sky/utils/context.py +21 -1
  83. sky/utils/controller_utils.py +16 -1
  84. sky/utils/kubernetes/exec_kubeconfig_converter.py +19 -47
  85. sky/utils/schemas.py +11 -3
  86. {skypilot_nightly-1.0.0.dev20250609.dist-info → skypilot_nightly-1.0.0.dev20250611.dist-info}/METADATA +1 -1
  87. {skypilot_nightly-1.0.0.dev20250609.dist-info → skypilot_nightly-1.0.0.dev20250611.dist-info}/RECORD +92 -81
  88. sky/dashboard/out/_next/static/chunks/236-619ed0248fb6fdd9.js +0 -6
  89. sky/dashboard/out/_next/static/chunks/293-351268365226d251.js +0 -1
  90. sky/dashboard/out/_next/static/chunks/37-600191c5804dcae2.js +0 -6
  91. sky/dashboard/out/_next/static/chunks/470-680c19413b8f808b.js +0 -1
  92. sky/dashboard/out/_next/static/chunks/63-e2d7b1e75e67c713.js +0 -66
  93. sky/dashboard/out/_next/static/chunks/682-b60cfdacc15202e8.js +0 -6
  94. sky/dashboard/out/_next/static/chunks/843-16c7194621b2b512.js +0 -11
  95. sky/dashboard/out/_next/static/chunks/856-affc52adf5403a3a.js +0 -1
  96. sky/dashboard/out/_next/static/chunks/969-2c584e28e6b4b106.js +0 -1
  97. sky/dashboard/out/_next/static/chunks/973-aed916d5b02d2d63.js +0 -1
  98. sky/dashboard/out/_next/static/chunks/pages/_app-5f16aba5794ee8e7.js +0 -1
  99. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-d31688d3e52736dd.js +0 -6
  100. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-e7d8710a9b0491e5.js +0 -6
  101. sky/dashboard/out/_next/static/chunks/pages/clusters-3c674e5d970e05cb.js +0 -1
  102. sky/dashboard/out/_next/static/chunks/pages/config-3aac7a015c6eede1.js +0 -6
  103. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-46d2e4ad6c487260.js +0 -1
  104. sky/dashboard/out/_next/static/chunks/pages/infra-7013d816a2a0e76c.js +0 -1
  105. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-f7f0c9e156d328bc.js +0 -16
  106. sky/dashboard/out/_next/static/chunks/pages/jobs-87e60396c376292f.js +0 -1
  107. sky/dashboard/out/_next/static/chunks/pages/users-9355a0f13d1db61d.js +0 -16
  108. sky/dashboard/out/_next/static/chunks/pages/workspace/new-9a749cca1813bd27.js +0 -1
  109. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-8eeb628e03902f1b.js +0 -1
  110. sky/dashboard/out/_next/static/chunks/pages/workspaces-8fbcc5ab4af316d0.js +0 -1
  111. sky/dashboard/out/_next/static/chunks/webpack-65d465f948974c0d.js +0 -1
  112. sky/dashboard/out/_next/static/xos0euNCptbGAM7_Q3Acl/_buildManifest.js +0 -1
  113. /sky/dashboard/out/_next/static/{xos0euNCptbGAM7_Q3Acl → zJqasksBQ3HcqMpA2wTUZ}/_ssgManifest.js +0 -0
  114. {skypilot_nightly-1.0.0.dev20250609.dist-info → skypilot_nightly-1.0.0.dev20250611.dist-info}/WHEEL +0 -0
  115. {skypilot_nightly-1.0.0.dev20250609.dist-info → skypilot_nightly-1.0.0.dev20250611.dist-info}/entry_points.txt +0 -0
  116. {skypilot_nightly-1.0.0.dev20250609.dist-info → skypilot_nightly-1.0.0.dev20250611.dist-info}/licenses/LICENSE +0 -0
  117. {skypilot_nightly-1.0.0.dev20250609.dist-info → skypilot_nightly-1.0.0.dev20250611.dist-info}/top_level.txt +0 -0
sky/skypilot_config.py CHANGED
@@ -58,6 +58,11 @@ import typing
58
58
  from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
59
59
 
60
60
  import filelock
61
+ import sqlalchemy
62
+ from sqlalchemy import orm
63
+ from sqlalchemy.dialects import postgresql
64
+ from sqlalchemy.dialects import sqlite
65
+ from sqlalchemy.ext import declarative
61
66
 
62
67
  from sky import exceptions
63
68
  from sky import sky_logging
@@ -66,6 +71,7 @@ from sky.skylet import constants
66
71
  from sky.utils import common_utils
67
72
  from sky.utils import config_utils
68
73
  from sky.utils import context
74
+ from sky.utils import db_utils
69
75
  from sky.utils import schemas
70
76
  from sky.utils import ux_utils
71
77
  from sky.utils.kubernetes import config_map_utils
@@ -110,6 +116,56 @@ ENV_VAR_PROJECT_CONFIG = f'{constants.SKYPILOT_ENV_VAR_PREFIX}PROJECT_CONFIG'
110
116
  _GLOBAL_CONFIG_PATH = '~/.sky/config.yaml'
111
117
  _PROJECT_CONFIG_PATH = '.sky.yaml'
112
118
 
119
+ _SQLALCHEMY_ENGINE: Optional[sqlalchemy.engine.Engine] = None
120
+ API_SERVER_CONFIG_KEY = 'api_server_config'
121
+
122
+ Base = declarative.declarative_base()
123
+
124
+ config_yaml_table = sqlalchemy.Table(
125
+ 'config_yaml',
126
+ Base.metadata,
127
+ sqlalchemy.Column('key', sqlalchemy.Text, primary_key=True),
128
+ sqlalchemy.Column('value', sqlalchemy.Text),
129
+ )
130
+
131
+
132
+ def create_table():
133
+ # Create tables if they don't exist
134
+ Base.metadata.create_all(bind=_SQLALCHEMY_ENGINE)
135
+
136
+
137
+ def _get_config_yaml_from_db(key: str) -> Optional[config_utils.Config]:
138
+ assert _SQLALCHEMY_ENGINE is not None
139
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
140
+ row = session.query(config_yaml_table).filter_by(key=key).first()
141
+ if row:
142
+ db_config = config_utils.Config(yaml.safe_load(row.value))
143
+ db_config.pop_nested(('db',), None)
144
+ return db_config
145
+ return None
146
+
147
+
148
+ def _set_config_yaml_to_db(key: str, config: config_utils.Config):
149
+ assert _SQLALCHEMY_ENGINE is not None
150
+ config.pop_nested(('db',), None)
151
+ config_str = common_utils.dump_yaml_str(dict(config))
152
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
153
+ if (_SQLALCHEMY_ENGINE.dialect.name ==
154
+ db_utils.SQLAlchemyDialect.SQLITE.value):
155
+ insert_func = sqlite.insert
156
+ elif (_SQLALCHEMY_ENGINE.dialect.name ==
157
+ db_utils.SQLAlchemyDialect.POSTGRESQL.value):
158
+ insert_func = postgresql.insert
159
+ else:
160
+ raise ValueError('Unsupported database dialect')
161
+ insert_stmnt = insert_func(config_yaml_table).values(key=key,
162
+ value=config_str)
163
+ do_update_stmt = insert_stmnt.on_conflict_do_update(
164
+ index_elements=[config_yaml_table.c.key],
165
+ set_={config_yaml_table.c.value: config_str})
166
+ session.execute(do_update_stmt)
167
+ session.commit()
168
+
113
169
 
114
170
  class ConfigContext:
115
171
 
@@ -257,11 +313,6 @@ def _resolve_project_config_path() -> Optional[str]:
257
313
  return None
258
314
 
259
315
 
260
- def _get_project_config() -> config_utils.Config:
261
- """Returns the project config."""
262
- return _get_config_from_path(_resolve_project_config_path())
263
-
264
-
265
316
  def _resolve_server_config_path() -> Optional[str]:
266
317
  # find the server config file
267
318
  server_config_path = _get_config_file_path(ENV_VAR_GLOBAL_CONFIG)
@@ -507,26 +558,35 @@ def _reload_config_from_internal_file(internal_config_path: str) -> None:
507
558
 
508
559
 
509
560
  def _reload_config_as_server() -> None:
561
+ global _SQLALCHEMY_ENGINE
510
562
  # Reset the global variables, to avoid using stale values.
511
563
  _set_loaded_config(config_utils.Config())
512
564
  _set_loaded_config_path(None)
513
565
 
514
- overrides: List[config_utils.Config] = []
515
566
  server_config_path = _resolve_server_config_path()
516
567
  server_config = _get_config_from_path(server_config_path)
517
- if server_config:
518
- overrides.append(server_config)
519
568
 
520
- # layer the configs on top of each other based on priority
521
- overlaid_server_config: config_utils.Config = config_utils.Config()
522
- for override in overrides:
523
- overlaid_server_config = overlay_skypilot_config(
524
- original_config=overlaid_server_config, override_configs=override)
525
569
  if sky_logging.logging_enabled(logger, sky_logging.DEBUG):
526
- logger.debug(
527
- f'server config: \n'
528
- f'{common_utils.dump_yaml_str(dict(overlaid_server_config))}')
529
- _set_loaded_config(overlaid_server_config)
570
+ logger.debug(f'server config: \n'
571
+ f'{common_utils.dump_yaml_str(dict(server_config))}')
572
+
573
+ db_url = server_config.get_nested(('db',), None)
574
+ if db_url and len(server_config.keys()) > 1:
575
+ raise ValueError(
576
+ 'if db config is specified, no other config is allowed')
577
+
578
+ if db_url:
579
+ if _SQLALCHEMY_ENGINE is None:
580
+ _SQLALCHEMY_ENGINE = sqlalchemy.create_engine(db_url)
581
+ create_table()
582
+ db_config = _get_config_yaml_from_db(API_SERVER_CONFIG_KEY)
583
+ if db_config:
584
+ if sky_logging.logging_enabled(logger, sky_logging.DEBUG):
585
+ logger.debug(f'Config loaded from db:\n'
586
+ f'{common_utils.dump_yaml_str(dict(db_config))}')
587
+ server_config = overlay_skypilot_config(server_config, db_config)
588
+
589
+ _set_loaded_config(server_config)
530
590
  _set_loaded_config_path(server_config_path)
531
591
 
532
592
 
@@ -778,13 +838,27 @@ def update_api_server_config_no_lock(config: config_utils.Config) -> None:
778
838
  if global_config_path is None:
779
839
  global_config_path = get_user_config_path()
780
840
 
781
- # Always save to the local file (PVC in Kubernetes, local file otherwise)
782
- common_utils.dump_yaml(global_config_path, dict(config))
783
-
784
- if config_map_utils.is_running_in_kubernetes():
785
- # In Kubernetes, sync the PVC config to ConfigMap for user convenience
786
- # PVC file is the source of truth, ConfigMap is just a mirror for easy
787
- # access
788
- config_map_utils.patch_configmap_with_config(config, global_config_path)
841
+ db_updated = False
842
+ if os.environ.get(constants.ENV_VAR_IS_SKYPILOT_SERVER) is not None:
843
+ existing_db_url = get_nested(('db',), None)
844
+ if existing_db_url:
845
+ new_db_url = config.get_nested(('db',), None)
846
+ if new_db_url and new_db_url != existing_db_url:
847
+ raise ValueError('Cannot change db url while server is running')
848
+ logger.debug('saving api_server config to db')
849
+ _set_config_yaml_to_db(API_SERVER_CONFIG_KEY, config)
850
+ db_updated = True
851
+
852
+ if not db_updated:
853
+ # save to the local file (PVC in Kubernetes, local file otherwise)
854
+ common_utils.dump_yaml(global_config_path, dict(config))
855
+
856
+ if config_map_utils.is_running_in_kubernetes():
857
+ # In Kubernetes, sync the PVC config to ConfigMap for user
858
+ # convenience.
859
+ # PVC file is the source of truth, ConfigMap is just a mirror for
860
+ # easy access.
861
+ config_map_utils.patch_configmap_with_config(
862
+ config, global_config_path)
789
863
 
790
864
  _reload_config()
sky/users/permission.py CHANGED
@@ -30,26 +30,34 @@ class PermissionService:
30
30
  """Permission service for SkyPilot API Server."""
31
31
 
32
32
  def __init__(self):
33
- global _enforcer_instance
34
- if _enforcer_instance is None:
35
- # For different threads, we share the same enforcer instance.
36
- with _lock:
37
- if _enforcer_instance is None:
38
- _enforcer_instance = self
39
- engine = global_user_state.SQLALCHEMY_ENGINE
40
- adapter = sqlalchemy_adapter.Adapter(engine)
41
- model_path = os.path.join(os.path.dirname(__file__),
42
- 'model.conf')
43
- enforcer = casbin.Enforcer(model_path, adapter)
44
- self.enforcer = enforcer
45
- else:
46
- self.enforcer = _enforcer_instance.enforcer
47
- with _policy_lock():
48
- self._maybe_initialize_policies()
33
+ self.enforcer = None
34
+ self.init_lock = threading.Lock()
35
+
36
+ def _lazy_initialize(self):
37
+ if self.enforcer is not None:
38
+ return
39
+ with self.init_lock:
40
+ if self.enforcer is not None:
41
+ return
42
+ global _enforcer_instance
43
+ if _enforcer_instance is None:
44
+ # For different threads, we share the same enforcer instance.
45
+ with _lock:
46
+ if _enforcer_instance is None:
47
+ _enforcer_instance = self
48
+ engine = global_user_state.initialize_and_get_db()
49
+ adapter = sqlalchemy_adapter.Adapter(engine)
50
+ model_path = os.path.join(os.path.dirname(__file__),
51
+ 'model.conf')
52
+ enforcer = casbin.Enforcer(model_path, adapter)
53
+ self.enforcer = enforcer
54
+ else:
55
+ self.enforcer = _enforcer_instance.enforcer
56
+ with _policy_lock():
57
+ self._maybe_initialize_policies()
49
58
 
50
59
  def _maybe_initialize_policies(self) -> None:
51
60
  """Initialize policies if they don't already exist."""
52
- # TODO(zhwu): we should avoid running this on client side.
53
61
  logger.debug(f'Initializing policies in process: {os.getpid()}')
54
62
  self._load_policy_no_lock()
55
63
 
@@ -128,6 +136,7 @@ class PermissionService:
128
136
 
129
137
  def add_user_if_not_exists(self, user_id: str) -> None:
130
138
  """Add user role relationship."""
139
+ self._lazy_initialize()
131
140
  with _policy_lock():
132
141
  self._add_user_if_not_exists_no_lock(user_id)
133
142
 
@@ -147,6 +156,7 @@ class PermissionService:
147
156
 
148
157
  def update_role(self, user_id: str, new_role: str) -> None:
149
158
  """Update user role relationship."""
159
+ self._lazy_initialize()
150
160
  with _policy_lock():
151
161
  # Get current roles
152
162
  self._load_policy_no_lock()
@@ -179,6 +189,7 @@ class PermissionService:
179
189
  Returns:
180
190
  A list of role names that the user has.
181
191
  """
192
+ self._lazy_initialize()
182
193
  self._load_policy_no_lock()
183
194
  return self.enforcer.get_roles_for_user(user_id)
184
195
 
@@ -191,6 +202,7 @@ class PermissionService:
191
202
  # it is a hot path in every request. It is ok to have a stale policy,
192
203
  # as long as it is eventually consistent.
193
204
  # self._load_policy_no_lock()
205
+ self._lazy_initialize()
194
206
  return self.enforcer.enforce(user_id, path, method)
195
207
 
196
208
  def _load_policy_no_lock(self):
@@ -199,6 +211,7 @@ class PermissionService:
199
211
 
200
212
  def load_policy(self):
201
213
  """Load policy from storage with lock."""
214
+ self._lazy_initialize()
202
215
  with _policy_lock():
203
216
  self._load_policy_no_lock()
204
217
 
@@ -214,6 +227,7 @@ class PermissionService:
214
227
  For public workspaces, the permission is granted via a wildcard policy
215
228
  ('*').
216
229
  """
230
+ self._lazy_initialize()
217
231
  if os.getenv(constants.ENV_VAR_IS_SKYPILOT_SERVER) is None:
218
232
  # When it is not on API server, we allow all users to access all
219
233
  # workspaces, as the workspace check has been done on API server.
@@ -241,6 +255,7 @@ class PermissionService:
241
255
  For public workspaces, this should be ['*'].
242
256
  For private workspaces, this should be specific user IDs.
243
257
  """
258
+ self._lazy_initialize()
244
259
  with _policy_lock():
245
260
  for user in users:
246
261
  logger.debug(f'Adding workspace policy: user={user}, '
@@ -258,6 +273,7 @@ class PermissionService:
258
273
  For public workspaces, this should be ['*'].
259
274
  For private workspaces, this should be specific user IDs.
260
275
  """
276
+ self._lazy_initialize()
261
277
  with _policy_lock():
262
278
  self._load_policy_no_lock()
263
279
  # Remove all existing policies for this workspace
@@ -271,6 +287,7 @@ class PermissionService:
271
287
 
272
288
  def remove_workspace_policy(self, workspace_name: str) -> None:
273
289
  """Remove workspace policy."""
290
+ self._lazy_initialize()
274
291
  with _policy_lock():
275
292
  self.enforcer.remove_filtered_policy(1, workspace_name)
276
293
  self.enforcer.save_policy()
@@ -3,6 +3,7 @@ import contextlib
3
3
  import copy
4
4
  import importlib
5
5
  from typing import Iterator, Optional, Tuple, Union
6
+ import urllib.parse
6
7
 
7
8
  import colorama
8
9
 
@@ -19,18 +20,34 @@ from sky.utils import ux_utils
19
20
  logger = sky_logging.init_logger(__name__)
20
21
 
21
22
 
22
- def _get_policy_cls(
23
- policy: Optional[str]) -> Optional[admin_policy.AdminPolicy]:
23
+ def _is_url(policy_string: str) -> bool:
24
+ """Check if the policy string is a URL."""
25
+ try:
26
+ parsed = urllib.parse.urlparse(policy_string)
27
+ return parsed.scheme in ('http', 'https')
28
+ except Exception: # pylint: disable=broad-except
29
+ return False
30
+
31
+
32
+ def _get_policy_impl(
33
+ policy_location: Optional[str]
34
+ ) -> Optional[admin_policy.PolicyInterface]:
24
35
  """Gets admin-defined policy."""
25
- if policy is None:
36
+ if policy_location is None:
26
37
  return None
38
+
39
+ if _is_url(policy_location):
40
+ # Use the built-in URL policy class when an URL is specified.
41
+ return admin_policy.RestfulAdminPolicy(policy_location)
42
+
43
+ # Handle module path format
27
44
  try:
28
- module_path, class_name = policy.rsplit('.', 1)
45
+ module_path, class_name = policy_location.rsplit('.', 1)
29
46
  module = importlib.import_module(module_path)
30
47
  except ImportError as e:
31
48
  with ux_utils.print_exception_no_traceback():
32
49
  raise ImportError(
33
- f'Failed to import policy module: {policy}. '
50
+ f'Failed to import policy module: {policy_location}. '
34
51
  'Please check if the module is installed in your Python '
35
52
  'environment.') from e
36
53
 
@@ -42,19 +59,22 @@ def _get_policy_cls(
42
59
  f'Could not find {class_name} class in module {module_path}. '
43
60
  'Please check with your policy admin for details.') from e
44
61
 
45
- # Check if the module implements the AdminPolicy interface.
62
+ # Currently we only allow users to define subclass of AdminPolicy
63
+ # instead of inheriting from PolicyInterface or PolicyTemplate.
46
64
  if not issubclass(policy_cls, admin_policy.AdminPolicy):
47
65
  with ux_utils.print_exception_no_traceback():
48
66
  raise ValueError(
49
- f'Policy class {policy!r} does not implement the AdminPolicy '
50
- 'interface. Please check with your policy admin for details.')
51
- return policy_cls
67
+ f'Policy class {policy_cls!r} does not implement the '
68
+ 'AdminPolicy interface. Please check with your policy admin '
69
+ 'for details.')
70
+ return policy_cls()
52
71
 
53
72
 
54
73
  @contextlib.contextmanager
55
74
  def apply_and_use_config_in_current_request(
56
75
  entrypoint: Union['dag_lib.Dag', 'task_lib.Task'],
57
76
  request_options: Optional[admin_policy.RequestOptions] = None,
77
+ at_client_side: bool = False,
58
78
  ) -> Iterator['dag_lib.Dag']:
59
79
  """Applies an admin policy and override SkyPilot config for current request
60
80
 
@@ -66,7 +86,7 @@ def apply_and_use_config_in_current_request(
66
86
  Refer to `apply()` for more details.
67
87
  """
68
88
  original_config = skypilot_config.to_dict()
69
- dag, mutated_config = apply(entrypoint, request_options)
89
+ dag, mutated_config = apply(entrypoint, request_options, at_client_side)
70
90
  if mutated_config != original_config:
71
91
  with skypilot_config.replace_skypilot_config(mutated_config):
72
92
  yield dag
@@ -77,6 +97,7 @@ def apply_and_use_config_in_current_request(
77
97
  def apply(
78
98
  entrypoint: Union['dag_lib.Dag', 'task_lib.Task'],
79
99
  request_options: Optional[admin_policy.RequestOptions] = None,
100
+ at_client_side: bool = False,
80
101
  ) -> Tuple['dag_lib.Dag', config_utils.Config]:
81
102
  """Applies an admin policy (if registered) to a DAG or a task.
82
103
 
@@ -100,21 +121,25 @@ def apply(
100
121
  else:
101
122
  dag = entrypoint
102
123
 
103
- policy = skypilot_config.get_nested(('admin_policy',), None)
104
- policy_cls = _get_policy_cls(policy)
105
- if policy_cls is None:
124
+ policy_location = skypilot_config.get_nested(('admin_policy',), None)
125
+ policy = _get_policy_impl(policy_location)
126
+ if policy is None:
106
127
  return dag, skypilot_config.to_dict()
107
128
 
108
- logger.info(f'Applying policy: {policy}')
129
+ if at_client_side:
130
+ logger.info(f'Applying client admin policy: {policy}')
131
+ else:
132
+ logger.info(f'Applying server admin policy: {policy}')
109
133
  config = copy.deepcopy(skypilot_config.to_dict())
110
134
  mutated_dag = dag_lib.Dag()
111
135
  mutated_dag.name = dag.name
112
136
 
113
137
  mutated_config = None
114
138
  for task in dag.tasks:
115
- user_request = admin_policy.UserRequest(task, config, request_options)
139
+ user_request = admin_policy.UserRequest(task, config, request_options,
140
+ at_client_side)
116
141
  try:
117
- mutated_user_request = policy_cls.validate_and_mutate(user_request)
142
+ mutated_user_request = policy.apply(user_request)
118
143
  except Exception as e: # pylint: disable=broad-except
119
144
  with ux_utils.print_exception_no_traceback():
120
145
  raise exceptions.UserRequestRejectedByPolicy(
sky/utils/context.py CHANGED
@@ -4,11 +4,13 @@ import asyncio
4
4
  from collections.abc import Mapping
5
5
  from collections.abc import MutableMapping
6
6
  import contextvars
7
+ import functools
7
8
  import os
8
9
  import pathlib
9
10
  import subprocess
10
11
  import sys
11
- from typing import Dict, Optional, TextIO
12
+ import typing
13
+ from typing import Any, Callable, Dict, Optional, TextIO, TypeVar
12
14
 
13
15
 
14
16
  class Context(object):
@@ -256,6 +258,24 @@ class Popen(subprocess.Popen):
256
258
  super().__init__(*args, env=env, **kwargs)
257
259
 
258
260
 
261
+ F = TypeVar('F', bound=Callable[..., Any])
262
+
263
+
264
+ def contextual(func: F) -> F:
265
+ """Decorator to intiailize a context before executing the function.
266
+
267
+ If a context is already initialized, this decorator will reset the context,
268
+ i.e. all contextual variables set previously will be cleared.
269
+ """
270
+
271
+ @functools.wraps(func)
272
+ def wrapper(*args, **kwargs):
273
+ initialize()
274
+ return func(*args, **kwargs)
275
+
276
+ return typing.cast(F, wrapper)
277
+
278
+
259
279
  def initialize():
260
280
  """Initialize the current SkyPilot context."""
261
281
  _CONTEXT.set(Context())
@@ -24,6 +24,7 @@ from sky.clouds import gcp
24
24
  from sky.data import data_utils
25
25
  from sky.data import storage as storage_lib
26
26
  from sky.jobs import constants as managed_job_constants
27
+ from sky.provision.kubernetes import constants as kubernetes_constants
27
28
  from sky.serve import constants as serve_constants
28
29
  from sky.setup_files import dependencies
29
30
  from sky.skylet import constants
@@ -272,6 +273,18 @@ def _get_cloud_dependencies_installation_commands(
272
273
  step_prefix = prefix_str.replace('<step>', str(len(commands) + 1))
273
274
  commands.append(f'echo -en "\\r{step_prefix}GCP SDK{empty_str}" &&'
274
275
  f'{gcp.GOOGLE_SDK_INSTALLATION_COMMAND}')
276
+ if clouds.cloud_in_iterable(clouds.Kubernetes(), enabled_clouds):
277
+ # Install gke-gcloud-auth-plugin used for exec-auth with GKE.
278
+ # We install the plugin here instead of the next elif branch
279
+ # because gcloud is required to install the plugin, so the order
280
+ # of command execution is critical.
281
+
282
+ # We install plugin here regardless of whether exec-auth is
283
+ # actually used as exec-auth may be used in the future.
284
+ # TODO (kyuds): how to implement conservative installation?
285
+ commands.append(
286
+ '(command -v gke-gcloud-auth-plugin &>/dev/null || '
287
+ '(gcloud components install gke-gcloud-auth-plugin --quiet &>/dev/null))') # pylint: disable=line-too-long
275
288
  elif isinstance(cloud, clouds.Kubernetes):
276
289
  step_prefix = prefix_str.replace('<step>', str(len(commands) + 1))
277
290
  commands.append(
@@ -295,7 +308,9 @@ def _get_cloud_dependencies_installation_commands(
295
308
  '(curl -s -LO "https://dl.k8s.io/release/v1.31.6'
296
309
  '/bin/linux/$ARCH/kubectl" && '
297
310
  'sudo install -o root -g root -m 0755 '
298
- 'kubectl /usr/local/bin/kubectl))')
311
+ 'kubectl /usr/local/bin/kubectl)) && '
312
+ f'echo -e \'#!/bin/bash\\nexport PATH="{kubernetes_constants.SKY_K8S_EXEC_AUTH_PATH}"\\nexec "$@"\' | sudo tee /usr/local/bin/{kubernetes_constants.SKY_K8S_EXEC_AUTH_WRAPPER} > /dev/null && ' # pylint: disable=line-too-long
313
+ f'sudo chmod +x /usr/local/bin/{kubernetes_constants.SKY_K8S_EXEC_AUTH_WRAPPER}') # pylint: disable=line-too-long
299
314
  elif isinstance(cloud, clouds.Cudo):
300
315
  step_prefix = prefix_str.replace('<step>', str(len(commands) + 1))
301
316
  commands.append(
@@ -12,6 +12,12 @@ It assumes the target environment has the auth executable available in PATH.
12
12
  If not, you'll need to update your environment container to include the auth
13
13
  executable in PATH.
14
14
 
15
+ When using LOCAL_CREDENTIALS (aka exec auth) with Kubernetes, though, SkyPilot
16
+ will automatically inject a wrapper script for common exec auth providers like
17
+ GKE and EKS. This wrapper script helps to resolve path issues that may arise
18
+ from executables installed on non system-default paths. Thus, the kubeconfig
19
+ file may look different on the sky jobs controller.
20
+
15
21
  Usage:
16
22
  python -m sky.utils.kubernetes.exec_kubeconfig_converter
17
23
  """
@@ -20,52 +26,7 @@ import os
20
26
 
21
27
  import yaml
22
28
 
23
-
24
- def strip_auth_plugin_paths(kubeconfig_path: str, output_path: str):
25
- """Strip path information from exec plugin commands in a kubeconfig file.
26
-
27
- For Nebius kubeconfigs, also changes the --profile argument to 'sky'.
28
-
29
- Args:
30
- kubeconfig_path (str): Path to the input kubeconfig file
31
- output_path (str): Path where the modified kubeconfig will be saved
32
- """
33
- with open(kubeconfig_path, 'r', encoding='utf-8') as file:
34
- config = yaml.safe_load(file)
35
-
36
- updated = False
37
- for user in config.get('users', []):
38
- exec_info = user.get('user', {}).get('exec', {})
39
- current_command = exec_info.get('command', '')
40
-
41
- if current_command:
42
- # Strip the path and keep only the executable name
43
- executable = os.path.basename(current_command)
44
- if executable != current_command:
45
- exec_info['command'] = executable
46
- updated = True
47
-
48
- # Handle Nebius kubeconfigs: change --profile to 'sky'
49
- if executable == 'nebius' or current_command == 'nebius':
50
- args = exec_info.get('args', [])
51
- if args and '--profile' in args:
52
- try:
53
- profile_index = args.index('--profile')
54
- if profile_index + 1 < len(args):
55
- old_profile = args[profile_index + 1]
56
- if old_profile != 'sky':
57
- args[profile_index + 1] = 'sky'
58
- updated = True
59
- except ValueError:
60
- pass # --profile not found in args
61
-
62
- if updated:
63
- with open(output_path, 'w', encoding='utf-8') as file:
64
- yaml.safe_dump(config, file)
65
- print('Kubeconfig updated with path-less exec auth. '
66
- f'Saved to {output_path}')
67
- else:
68
- print('No updates made. No exec-based auth commands paths found.')
29
+ from sky.provision.kubernetes import utils as kubernetes_utils
69
30
 
70
31
 
71
32
  def main():
@@ -85,7 +46,18 @@ def main():
85
46
  help='Output kubeconfig file path (default: %(default)s)')
86
47
 
87
48
  args = parser.parse_args()
88
- strip_auth_plugin_paths(args.input, args.output)
49
+
50
+ with open(args.input, 'r', encoding='utf-8') as file:
51
+ config = yaml.safe_load(file)
52
+
53
+ updated = kubernetes_utils.format_kubeconfig_exec_auth(
54
+ config, args.output, False)
55
+
56
+ if updated:
57
+ print('Kubeconfig updated with path-less exec auth. '
58
+ f'Saved to {args.output}')
59
+ else:
60
+ print('No updates made.')
89
61
 
90
62
 
91
63
  if __name__ == '__main__':
sky/utils/schemas.py CHANGED
@@ -1149,9 +1149,17 @@ def get_config_schema():
1149
1149
 
1150
1150
  admin_policy_schema = {
1151
1151
  'type': 'string',
1152
- # Check regex to be a valid python module path
1153
- 'pattern': (r'^[a-zA-Z_][a-zA-Z0-9_]*'
1154
- r'(\.[a-zA-Z_][a-zA-Z0-9_]*)+$'),
1152
+ 'anyOf': [
1153
+ {
1154
+ # Check regex to be a valid python module path
1155
+ 'pattern': (r'^[a-zA-Z_][a-zA-Z0-9_]*'
1156
+ r'(\.[a-zA-Z_][a-zA-Z0-9_]*)+$'),
1157
+ },
1158
+ {
1159
+ # Check for valid HTTP/HTTPS URL
1160
+ 'pattern': r'^https?://.*$',
1161
+ }
1162
+ ]
1155
1163
  }
1156
1164
 
1157
1165
  allowed_clouds = {
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: skypilot-nightly
3
- Version: 1.0.0.dev20250609
3
+ Version: 1.0.0.dev20250611
4
4
  Summary: SkyPilot: Run AI on Any Infra — Unified, Faster, Cheaper.
5
5
  Author: SkyPilot Team
6
6
  License: Apache 2.0