skypilot-nightly 1.0.0.dev20250607__py3-none-any.whl → 1.0.0.dev20250610__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. sky/__init__.py +2 -2
  2. sky/admin_policy.py +3 -0
  3. sky/authentication.py +1 -7
  4. sky/backends/backend_utils.py +18 -2
  5. sky/backends/cloud_vm_ray_backend.py +9 -20
  6. sky/check.py +4 -3
  7. sky/cli.py +6 -9
  8. sky/client/cli.py +6 -9
  9. sky/client/sdk.py +49 -4
  10. sky/clouds/kubernetes.py +15 -24
  11. sky/core.py +3 -2
  12. sky/dashboard/out/404.html +1 -1
  13. sky/dashboard/out/_next/static/4lwUJxN6KwBqUxqO1VccB/_buildManifest.js +1 -0
  14. sky/dashboard/out/_next/static/chunks/211.692afc57e812ae1a.js +1 -0
  15. sky/dashboard/out/_next/static/chunks/350.9e123a4551f68b0d.js +1 -0
  16. sky/dashboard/out/_next/static/chunks/37-d8aebf1683522a0b.js +6 -0
  17. sky/dashboard/out/_next/static/chunks/42.d39e24467181b06b.js +6 -0
  18. sky/dashboard/out/_next/static/chunks/443.b2242d0efcdf5f47.js +1 -0
  19. sky/dashboard/out/_next/static/chunks/470-4d1a5dbe58a8a2b9.js +1 -0
  20. sky/dashboard/out/_next/static/chunks/{121-865d2bf8a3b84c6a.js → 491.b3d264269613fe09.js} +3 -3
  21. sky/dashboard/out/_next/static/chunks/513.211357a2914a34b2.js +1 -0
  22. sky/dashboard/out/_next/static/chunks/600.9cc76ec442b22e10.js +16 -0
  23. sky/dashboard/out/_next/static/chunks/616-d6128fa9e7cae6e6.js +39 -0
  24. sky/dashboard/out/_next/static/chunks/664-047bc03493fda379.js +1 -0
  25. sky/dashboard/out/_next/static/chunks/682.4dd5dc116f740b5f.js +6 -0
  26. sky/dashboard/out/_next/static/chunks/760-a89d354797ce7af5.js +1 -0
  27. sky/dashboard/out/_next/static/chunks/799-3625946b2ec2eb30.js +8 -0
  28. sky/dashboard/out/_next/static/chunks/804-4c9fc53aa74bc191.js +21 -0
  29. sky/dashboard/out/_next/static/chunks/843-6fcc4bf91ac45b39.js +11 -0
  30. sky/dashboard/out/_next/static/chunks/856-0776dc6ed6000c39.js +1 -0
  31. sky/dashboard/out/_next/static/chunks/901-b424d293275e1fd7.js +1 -0
  32. sky/dashboard/out/_next/static/chunks/938-a75b7712639298b7.js +1 -0
  33. sky/dashboard/out/_next/static/chunks/947-6620842ef80ae879.js +35 -0
  34. sky/dashboard/out/_next/static/chunks/969-20d54a9d998dc102.js +1 -0
  35. sky/dashboard/out/_next/static/chunks/973-c807fc34f09c7df3.js +1 -0
  36. sky/dashboard/out/_next/static/chunks/pages/_app-4768de0aede04dc9.js +20 -0
  37. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-89216c616dbaa9c5.js +6 -0
  38. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-451a14e7e755ebbc.js +6 -0
  39. sky/dashboard/out/_next/static/chunks/pages/clusters-e56b17fd85d0ba58.js +1 -0
  40. sky/dashboard/out/_next/static/chunks/pages/config-497a35a7ed49734a.js +1 -0
  41. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-d2910be98e9227cb.js +1 -0
  42. sky/dashboard/out/_next/static/chunks/pages/infra-780860bcc1103945.js +1 -0
  43. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-b3dbf38b51cb29be.js +16 -0
  44. sky/dashboard/out/_next/static/chunks/pages/jobs-fe233baf3d073491.js +1 -0
  45. sky/dashboard/out/_next/static/chunks/pages/users-c69ffcab9d6e5269.js +1 -0
  46. sky/dashboard/out/_next/static/chunks/pages/workspace/new-31aa8bdcb7592635.js +1 -0
  47. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-c8c2191328532b7d.js +1 -0
  48. sky/dashboard/out/_next/static/chunks/pages/workspaces-82e6601baa5dd280.js +1 -0
  49. sky/dashboard/out/_next/static/chunks/webpack-0574a5a4ba3cf0ac.js +1 -0
  50. sky/dashboard/out/_next/static/css/8b1c8321d4c02372.css +3 -0
  51. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  52. sky/dashboard/out/clusters/[cluster].html +1 -1
  53. sky/dashboard/out/clusters.html +1 -1
  54. sky/dashboard/out/config.html +1 -1
  55. sky/dashboard/out/index.html +1 -1
  56. sky/dashboard/out/infra/[context].html +1 -1
  57. sky/dashboard/out/infra.html +1 -1
  58. sky/dashboard/out/jobs/[job].html +1 -1
  59. sky/dashboard/out/jobs.html +1 -1
  60. sky/dashboard/out/users.html +1 -1
  61. sky/dashboard/out/workspace/new.html +1 -1
  62. sky/dashboard/out/workspaces/[name].html +1 -1
  63. sky/dashboard/out/workspaces.html +1 -1
  64. sky/exceptions.py +23 -0
  65. sky/global_user_state.py +192 -80
  66. sky/jobs/client/sdk.py +29 -21
  67. sky/jobs/server/core.py +9 -1
  68. sky/jobs/server/server.py +0 -95
  69. sky/jobs/utils.py +2 -1
  70. sky/models.py +18 -0
  71. sky/provision/kubernetes/constants.py +9 -0
  72. sky/provision/kubernetes/utils.py +106 -7
  73. sky/serve/client/sdk.py +56 -45
  74. sky/serve/server/core.py +1 -1
  75. sky/server/common.py +5 -7
  76. sky/server/constants.py +0 -2
  77. sky/server/requests/executor.py +60 -22
  78. sky/server/requests/payloads.py +3 -0
  79. sky/server/requests/process.py +69 -29
  80. sky/server/requests/requests.py +4 -3
  81. sky/server/server.py +23 -5
  82. sky/server/stream_utils.py +111 -55
  83. sky/skylet/constants.py +4 -2
  84. sky/skylet/job_lib.py +2 -1
  85. sky/skypilot_config.py +108 -25
  86. sky/users/model.conf +1 -1
  87. sky/users/permission.py +149 -32
  88. sky/users/rbac.py +26 -0
  89. sky/users/server.py +14 -13
  90. sky/utils/admin_policy_utils.py +9 -3
  91. sky/utils/common.py +6 -1
  92. sky/utils/common_utils.py +21 -3
  93. sky/utils/context.py +21 -1
  94. sky/utils/controller_utils.py +16 -1
  95. sky/utils/kubernetes/exec_kubeconfig_converter.py +19 -47
  96. sky/utils/schemas.py +9 -0
  97. sky/workspaces/core.py +100 -8
  98. sky/workspaces/server.py +15 -2
  99. sky/workspaces/utils.py +56 -0
  100. {skypilot_nightly-1.0.0.dev20250607.dist-info → skypilot_nightly-1.0.0.dev20250610.dist-info}/METADATA +1 -1
  101. {skypilot_nightly-1.0.0.dev20250607.dist-info → skypilot_nightly-1.0.0.dev20250610.dist-info}/RECORD +106 -94
  102. sky/dashboard/out/_next/static/1qG0HTmVilJPxQdBk0fX5/_buildManifest.js +0 -1
  103. sky/dashboard/out/_next/static/chunks/236-619ed0248fb6fdd9.js +0 -6
  104. sky/dashboard/out/_next/static/chunks/293-351268365226d251.js +0 -1
  105. sky/dashboard/out/_next/static/chunks/37-600191c5804dcae2.js +0 -6
  106. sky/dashboard/out/_next/static/chunks/470-ad1e0db3afcbd9c9.js +0 -1
  107. sky/dashboard/out/_next/static/chunks/614-635a84e87800f99e.js +0 -66
  108. sky/dashboard/out/_next/static/chunks/682-b60cfdacc15202e8.js +0 -6
  109. sky/dashboard/out/_next/static/chunks/843-c296541442d4af88.js +0 -11
  110. sky/dashboard/out/_next/static/chunks/856-3a32da4b84176f6d.js +0 -1
  111. sky/dashboard/out/_next/static/chunks/969-2c584e28e6b4b106.js +0 -1
  112. sky/dashboard/out/_next/static/chunks/973-6d78a0814682d771.js +0 -1
  113. sky/dashboard/out/_next/static/chunks/pages/_app-cb81dc4d27f4d009.js +0 -1
  114. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-18aed9b56247d074.js +0 -6
  115. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-b919a73aecdfa78f.js +0 -6
  116. sky/dashboard/out/_next/static/chunks/pages/clusters-4f6b9dd9abcb33ad.js +0 -1
  117. sky/dashboard/out/_next/static/chunks/pages/config-fe375a56342cf609.js +0 -6
  118. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-3a18d0eeb5119fe4.js +0 -1
  119. sky/dashboard/out/_next/static/chunks/pages/infra-a1a6abeeb58c1051.js +0 -1
  120. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1354e28c81eeb686.js +0 -16
  121. sky/dashboard/out/_next/static/chunks/pages/jobs-23bfc8bf373423db.js +0 -1
  122. sky/dashboard/out/_next/static/chunks/pages/users-5800045bd04e69c2.js +0 -16
  123. sky/dashboard/out/_next/static/chunks/pages/workspace/new-e1f9c0c3ff7ac4bd.js +0 -1
  124. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-686590e0ee4b2412.js +0 -1
  125. sky/dashboard/out/_next/static/chunks/pages/workspaces-76b07aa5da91b0df.js +0 -1
  126. sky/dashboard/out/_next/static/chunks/webpack-65d465f948974c0d.js +0 -1
  127. sky/dashboard/out/_next/static/css/667d941a2888ce6e.css +0 -3
  128. /sky/dashboard/out/_next/static/{1qG0HTmVilJPxQdBk0fX5 → 4lwUJxN6KwBqUxqO1VccB}/_ssgManifest.js +0 -0
  129. {skypilot_nightly-1.0.0.dev20250607.dist-info → skypilot_nightly-1.0.0.dev20250610.dist-info}/WHEEL +0 -0
  130. {skypilot_nightly-1.0.0.dev20250607.dist-info → skypilot_nightly-1.0.0.dev20250610.dist-info}/entry_points.txt +0 -0
  131. {skypilot_nightly-1.0.0.dev20250607.dist-info → skypilot_nightly-1.0.0.dev20250610.dist-info}/licenses/LICENSE +0 -0
  132. {skypilot_nightly-1.0.0.dev20250607.dist-info → skypilot_nightly-1.0.0.dev20250610.dist-info}/top_level.txt +0 -0
sky/skylet/constants.py CHANGED
@@ -377,8 +377,7 @@ OVERRIDEABLE_CONFIG_KEYS_IN_TASK: List[Tuple[str, ...]] = [
377
377
  ]
378
378
  # When overriding the SkyPilot configs on the API server with the client one,
379
379
  # we skip the following keys because they are meant to be client-side configs.
380
- SKIPPED_CLIENT_OVERRIDE_KEYS: List[Tuple[str, ...]] = [('admin_policy',),
381
- ('api_server',),
380
+ SKIPPED_CLIENT_OVERRIDE_KEYS: List[Tuple[str, ...]] = [('api_server',),
382
381
  ('allowed_clouds',),
383
382
  ('workspaces',), ('db',)]
384
383
 
@@ -419,3 +418,6 @@ ALL_CLOUDS = ('aws', 'azure', 'gcp', 'ibm', 'lambda', 'scp', 'oci',
419
418
  'kubernetes', 'runpod', 'vast', 'vsphere', 'cudo', 'fluidstack',
420
419
  'paperspace', 'do', 'nebius', 'ssh')
421
420
  # END constants used for service catalog.
421
+
422
+ # The user ID of the SkyPilot system.
423
+ SKYPILOT_SYSTEM_USER_ID = 'skypilot-system'
sky/skylet/job_lib.py CHANGED
@@ -794,7 +794,8 @@ def load_job_queue(payload: str) -> List[Dict[str, Any]]:
794
794
  for job in jobs:
795
795
  job['status'] = JobStatus(job['status'])
796
796
  job['user_hash'] = job['username']
797
- job['username'] = global_user_state.get_user(job['user_hash']).name
797
+ user = global_user_state.get_user(job['user_hash'])
798
+ job['username'] = user.name if user is not None else None
798
799
  return jobs
799
800
 
800
801
 
sky/skypilot_config.py CHANGED
@@ -58,6 +58,11 @@ import typing
58
58
  from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
59
59
 
60
60
  import filelock
61
+ import sqlalchemy
62
+ from sqlalchemy import orm
63
+ from sqlalchemy.dialects import postgresql
64
+ from sqlalchemy.dialects import sqlite
65
+ from sqlalchemy.ext import declarative
61
66
 
62
67
  from sky import exceptions
63
68
  from sky import sky_logging
@@ -66,6 +71,7 @@ from sky.skylet import constants
66
71
  from sky.utils import common_utils
67
72
  from sky.utils import config_utils
68
73
  from sky.utils import context
74
+ from sky.utils import db_utils
69
75
  from sky.utils import schemas
70
76
  from sky.utils import ux_utils
71
77
  from sky.utils.kubernetes import config_map_utils
@@ -110,6 +116,56 @@ ENV_VAR_PROJECT_CONFIG = f'{constants.SKYPILOT_ENV_VAR_PREFIX}PROJECT_CONFIG'
110
116
  _GLOBAL_CONFIG_PATH = '~/.sky/config.yaml'
111
117
  _PROJECT_CONFIG_PATH = '.sky.yaml'
112
118
 
119
+ _SQLALCHEMY_ENGINE: Optional[sqlalchemy.engine.Engine] = None
120
+ API_SERVER_CONFIG_KEY = 'api_server_config'
121
+
122
+ Base = declarative.declarative_base()
123
+
124
+ config_yaml_table = sqlalchemy.Table(
125
+ 'config_yaml',
126
+ Base.metadata,
127
+ sqlalchemy.Column('key', sqlalchemy.Text, primary_key=True),
128
+ sqlalchemy.Column('value', sqlalchemy.Text),
129
+ )
130
+
131
+
132
+ def create_table():
133
+ # Create tables if they don't exist
134
+ Base.metadata.create_all(bind=_SQLALCHEMY_ENGINE)
135
+
136
+
137
+ def _get_config_yaml_from_db(key: str) -> Optional[config_utils.Config]:
138
+ assert _SQLALCHEMY_ENGINE is not None
139
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
140
+ row = session.query(config_yaml_table).filter_by(key=key).first()
141
+ if row:
142
+ db_config = config_utils.Config(yaml.safe_load(row.value))
143
+ db_config.pop_nested(('db',), None)
144
+ return db_config
145
+ return None
146
+
147
+
148
+ def _set_config_yaml_to_db(key: str, config: config_utils.Config):
149
+ assert _SQLALCHEMY_ENGINE is not None
150
+ config.pop_nested(('db',), None)
151
+ config_str = common_utils.dump_yaml_str(dict(config))
152
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
153
+ if (_SQLALCHEMY_ENGINE.dialect.name ==
154
+ db_utils.SQLAlchemyDialect.SQLITE.value):
155
+ insert_func = sqlite.insert
156
+ elif (_SQLALCHEMY_ENGINE.dialect.name ==
157
+ db_utils.SQLAlchemyDialect.POSTGRESQL.value):
158
+ insert_func = postgresql.insert
159
+ else:
160
+ raise ValueError('Unsupported database dialect')
161
+ insert_stmnt = insert_func(config_yaml_table).values(key=key,
162
+ value=config_str)
163
+ do_update_stmt = insert_stmnt.on_conflict_do_update(
164
+ index_elements=[config_yaml_table.c.key],
165
+ set_={config_yaml_table.c.value: config_str})
166
+ session.execute(do_update_stmt)
167
+ session.commit()
168
+
113
169
 
114
170
  class ConfigContext:
115
171
 
@@ -257,11 +313,6 @@ def _resolve_project_config_path() -> Optional[str]:
257
313
  return None
258
314
 
259
315
 
260
- def _get_project_config() -> config_utils.Config:
261
- """Returns the project config."""
262
- return _get_config_from_path(_resolve_project_config_path())
263
-
264
-
265
316
  def _resolve_server_config_path() -> Optional[str]:
266
317
  # find the server config file
267
318
  server_config_path = _get_config_file_path(ENV_VAR_GLOBAL_CONFIG)
@@ -507,26 +558,35 @@ def _reload_config_from_internal_file(internal_config_path: str) -> None:
507
558
 
508
559
 
509
560
  def _reload_config_as_server() -> None:
561
+ global _SQLALCHEMY_ENGINE
510
562
  # Reset the global variables, to avoid using stale values.
511
563
  _set_loaded_config(config_utils.Config())
512
564
  _set_loaded_config_path(None)
513
565
 
514
- overrides: List[config_utils.Config] = []
515
566
  server_config_path = _resolve_server_config_path()
516
567
  server_config = _get_config_from_path(server_config_path)
517
- if server_config:
518
- overrides.append(server_config)
519
568
 
520
- # layer the configs on top of each other based on priority
521
- overlaid_server_config: config_utils.Config = config_utils.Config()
522
- for override in overrides:
523
- overlaid_server_config = overlay_skypilot_config(
524
- original_config=overlaid_server_config, override_configs=override)
525
569
  if sky_logging.logging_enabled(logger, sky_logging.DEBUG):
526
- logger.debug(
527
- f'server config: \n'
528
- f'{common_utils.dump_yaml_str(dict(overlaid_server_config))}')
529
- _set_loaded_config(overlaid_server_config)
570
+ logger.debug(f'server config: \n'
571
+ f'{common_utils.dump_yaml_str(dict(server_config))}')
572
+
573
+ db_url = server_config.get_nested(('db',), None)
574
+ if db_url and len(server_config.keys()) > 1:
575
+ raise ValueError(
576
+ 'if db config is specified, no other config is allowed')
577
+
578
+ if db_url:
579
+ if _SQLALCHEMY_ENGINE is None:
580
+ _SQLALCHEMY_ENGINE = sqlalchemy.create_engine(db_url)
581
+ create_table()
582
+ db_config = _get_config_yaml_from_db(API_SERVER_CONFIG_KEY)
583
+ if db_config:
584
+ if sky_logging.logging_enabled(logger, sky_logging.DEBUG):
585
+ logger.debug(f'Config loaded from db:\n'
586
+ f'{common_utils.dump_yaml_str(dict(db_config))}')
587
+ server_config = overlay_skypilot_config(server_config, db_config)
588
+
589
+ _set_loaded_config(server_config)
530
590
  _set_loaded_config_path(server_config_path)
531
591
 
532
592
 
@@ -765,17 +825,40 @@ def update_api_server_config_no_lock(config: config_utils.Config) -> None:
765
825
  Args:
766
826
  config: The config to save and sync.
767
827
  """
828
+
829
+ def is_running_pytest() -> bool:
830
+ return 'PYTEST_CURRENT_TEST' in os.environ
831
+
832
+ # Only allow this function to be called by the API Server in production.
833
+ if not is_running_pytest() and os.environ.get(
834
+ constants.ENV_VAR_IS_SKYPILOT_SERVER) is None:
835
+ raise ValueError('This function can only be called by the API Server.')
836
+
768
837
  global_config_path = _resolve_server_config_path()
769
838
  if global_config_path is None:
770
839
  global_config_path = get_user_config_path()
771
840
 
772
- # Always save to the local file (PVC in Kubernetes, local file otherwise)
773
- common_utils.dump_yaml(global_config_path, dict(config))
774
-
775
- if config_map_utils.is_running_in_kubernetes():
776
- # In Kubernetes, sync the PVC config to ConfigMap for user convenience
777
- # PVC file is the source of truth, ConfigMap is just a mirror for easy
778
- # access
779
- config_map_utils.patch_configmap_with_config(config, global_config_path)
841
+ db_updated = False
842
+ if os.environ.get(constants.ENV_VAR_IS_SKYPILOT_SERVER) is not None:
843
+ existing_db_url = get_nested(('db',), None)
844
+ if existing_db_url:
845
+ new_db_url = config.get_nested(('db',), None)
846
+ if new_db_url and new_db_url != existing_db_url:
847
+ raise ValueError('Cannot change db url while server is running')
848
+ logger.debug('saving api_server config to db')
849
+ _set_config_yaml_to_db(API_SERVER_CONFIG_KEY, config)
850
+ db_updated = True
851
+
852
+ if not db_updated:
853
+ # save to the local file (PVC in Kubernetes, local file otherwise)
854
+ common_utils.dump_yaml(global_config_path, dict(config))
855
+
856
+ if config_map_utils.is_running_in_kubernetes():
857
+ # In Kubernetes, sync the PVC config to ConfigMap for user
858
+ # convenience.
859
+ # PVC file is the source of truth, ConfigMap is just a mirror for
860
+ # easy access.
861
+ config_map_utils.patch_configmap_with_config(
862
+ config, global_config_path)
780
863
 
781
864
  _reload_config()
sky/users/model.conf CHANGED
@@ -12,4 +12,4 @@ g = _, _
12
12
  e = some(where (p.eft == allow))
13
13
 
14
14
  [matchers]
15
- m = g(r.sub, p.sub) && r.obj == p.obj && r.act == p.act
15
+ m = (g(r.sub, p.sub)|| p.sub == '*') && r.obj == p.obj && r.act == p.act
sky/users/permission.py CHANGED
@@ -3,7 +3,7 @@ import contextlib
3
3
  import logging
4
4
  import os
5
5
  import threading
6
- from typing import List
6
+ from typing import Generator, List
7
7
 
8
8
  import casbin
9
9
  import filelock
@@ -11,8 +11,11 @@ import sqlalchemy_adapter
11
11
 
12
12
  from sky import global_user_state
13
13
  from sky import sky_logging
14
+ from sky.skylet import constants
14
15
  from sky.users import rbac
15
16
 
17
+ logging.getLogger('casbin.policy').setLevel(sky_logging.ERROR)
18
+ logging.getLogger('casbin.role').setLevel(sky_logging.ERROR)
16
19
  logger = sky_logging.init_logger(__name__)
17
20
 
18
21
  # Filelocks for the policy update.
@@ -33,22 +36,24 @@ class PermissionService:
33
36
  with _lock:
34
37
  if _enforcer_instance is None:
35
38
  _enforcer_instance = self
36
- engine = global_user_state.SQLALCHEMY_ENGINE
39
+ engine = global_user_state.initialize_and_get_db()
37
40
  adapter = sqlalchemy_adapter.Adapter(engine)
38
41
  model_path = os.path.join(os.path.dirname(__file__),
39
42
  'model.conf')
40
43
  enforcer = casbin.Enforcer(model_path, adapter)
41
- logging.getLogger('casbin.policy').setLevel(
42
- sky_logging.ERROR)
43
- logging.getLogger('casbin.role').setLevel(sky_logging.ERROR)
44
44
  self.enforcer = enforcer
45
45
  else:
46
46
  self.enforcer = _enforcer_instance.enforcer
47
- self._maybe_initialize_policies()
47
+ with _policy_lock():
48
+ self._maybe_initialize_policies()
48
49
 
49
- def _maybe_initialize_policies(self):
50
+ def _maybe_initialize_policies(self) -> None:
50
51
  """Initialize policies if they don't already exist."""
52
+ # TODO(zhwu): we should avoid running this on client side.
51
53
  logger.debug(f'Initializing policies in process: {os.getpid()}')
54
+ self._load_policy_no_lock()
55
+
56
+ policy_updated = False
52
57
 
53
58
  # Check if policies are already initialized by looking for existing
54
59
  # permission policies in the enforcer
@@ -66,6 +71,17 @@ class PermissionService:
66
71
  expected_policies.append(
67
72
  [role, item['path'], item['method']])
68
73
 
74
+ # Add workspace policy
75
+ workspace_policy_permissions = rbac.get_workspace_policy_permissions()
76
+ logger.debug(f'Workspace policy permissions from config: '
77
+ f'{workspace_policy_permissions}')
78
+
79
+ for workspace_name, users in workspace_policy_permissions.items():
80
+ for user in users:
81
+ expected_policies.append([user, workspace_name, '*'])
82
+ logger.debug(f'Expected workspace policy: user={user}, '
83
+ f'workspace={workspace_name}')
84
+
69
85
  # Check if all expected policies already exist
70
86
  policies_exist = all(
71
87
  any(policy == expected
@@ -86,48 +102,71 @@ class PermissionService:
86
102
  for item in blocklist:
87
103
  path = item['path']
88
104
  method = item['method']
105
+ logger.debug(f'Adding role policy: role={role}, '
106
+ f'path={path}, method={method}')
89
107
  self.enforcer.add_policy(role, path, method)
90
- self.enforcer.save_policy()
108
+ policy_updated = True
109
+
110
+ for workspace_name, users in workspace_policy_permissions.items():
111
+ for user in users:
112
+ logger.debug(f'Initializing workspace policy: user={user}, '
113
+ f'workspace={workspace_name}')
114
+ self.enforcer.add_policy(user, workspace_name, '*')
115
+ policy_updated = True
116
+ logger.debug('Policies initialized successfully')
91
117
  else:
92
118
  logger.debug('Policies already exist, skipping initialization')
93
119
 
94
120
  # Always ensure users have default roles (this is idempotent)
95
121
  all_users = global_user_state.get_all_users()
96
- for user in all_users:
97
- self.add_user_if_not_exists(user.id)
122
+ for existing_user in all_users:
123
+ user_added = self._add_user_if_not_exists_no_lock(existing_user.id)
124
+ policy_updated = policy_updated or user_added
125
+
126
+ if policy_updated:
127
+ self.enforcer.save_policy()
98
128
 
99
- def add_user_if_not_exists(self, user: str) -> None:
129
+ def add_user_if_not_exists(self, user_id: str) -> None:
100
130
  """Add user role relationship."""
101
131
  with _policy_lock():
102
- user_roles = self.enforcer.get_roles_for_user(user)
103
- if not user_roles:
104
- logger.info(f'User {user} has no roles, adding'
105
- f' default role {rbac.get_default_role()}')
106
- self.enforcer.add_grouping_policy(user, rbac.get_default_role())
107
- self.enforcer.save_policy()
108
-
109
- def update_role(self, user: str, new_role: str):
132
+ self._add_user_if_not_exists_no_lock(user_id)
133
+
134
+ def _add_user_if_not_exists_no_lock(self, user_id: str) -> bool:
135
+ """Add user role relationship without lock.
136
+
137
+ Returns:
138
+ True if the user was added, False otherwise.
139
+ """
140
+ user_roles = self.enforcer.get_roles_for_user(user_id)
141
+ if not user_roles:
142
+ logger.info(f'User {user_id} has no roles, adding'
143
+ f' default role {rbac.get_default_role()}')
144
+ self.enforcer.add_grouping_policy(user_id, rbac.get_default_role())
145
+ return True
146
+ return False
147
+
148
+ def update_role(self, user_id: str, new_role: str) -> None:
110
149
  """Update user role relationship."""
111
150
  with _policy_lock():
112
151
  # Get current roles
113
152
  self._load_policy_no_lock()
114
153
  # Avoid calling get_user_roles, as it will require the lock.
115
- current_roles = self.enforcer.get_roles_for_user(user)
154
+ current_roles = self.enforcer.get_roles_for_user(user_id)
116
155
  if not current_roles:
117
- logger.warning(f'User {user} has no roles')
156
+ logger.warning(f'User {user_id} has no roles')
118
157
  else:
119
158
  # TODO(hailong): how to handle multiple roles?
120
159
  current_role = current_roles[0]
121
160
  if current_role == new_role:
122
- logger.info(f'User {user} already has role {new_role}')
161
+ logger.info(f'User {user_id} already has role {new_role}')
123
162
  return
124
- self.enforcer.remove_grouping_policy(user, current_role)
163
+ self.enforcer.remove_grouping_policy(user_id, current_role)
125
164
 
126
165
  # Update user role
127
- self.enforcer.add_grouping_policy(user, new_role)
166
+ self.enforcer.add_grouping_policy(user_id, new_role)
128
167
  self.enforcer.save_policy()
129
168
 
130
- def get_user_roles(self, user: str) -> List[str]:
169
+ def get_user_roles(self, user_id: str) -> List[str]:
131
170
  """Get all roles for a user.
132
171
 
133
172
  This method returns all roles that the user has, including inherited
@@ -140,10 +179,11 @@ class PermissionService:
140
179
  Returns:
141
180
  A list of role names that the user has.
142
181
  """
143
- self._load_policy()
144
- return self.enforcer.get_roles_for_user(user)
182
+ self._load_policy_no_lock()
183
+ return self.enforcer.get_roles_for_user(user_id)
145
184
 
146
- def check_permission(self, user: str, path: str, method: str) -> bool:
185
+ def check_endpoint_permission(self, user_id: str, path: str,
186
+ method: str) -> bool:
147
187
  """Check permission."""
148
188
  # We intentionally don't load the policy here, as it is a hot path, and
149
189
  # we don't support updating the policy.
@@ -151,28 +191,105 @@ class PermissionService:
151
191
  # it is a hot path in every request. It is ok to have a stale policy,
152
192
  # as long as it is eventually consistent.
153
193
  # self._load_policy_no_lock()
154
- return self.enforcer.enforce(user, path, method)
194
+ return self.enforcer.enforce(user_id, path, method)
155
195
 
156
196
  def _load_policy_no_lock(self):
157
197
  """Load policy from storage."""
158
198
  self.enforcer.load_policy()
159
199
 
160
- def _load_policy(self):
200
+ def load_policy(self):
161
201
  """Load policy from storage with lock."""
162
202
  with _policy_lock():
163
203
  self._load_policy_no_lock()
164
204
 
205
+ def check_workspace_permission(self, user_id: str,
206
+ workspace_name: str) -> bool:
207
+ """Check workspace permission.
208
+
209
+ This method checks if a user has permission to access a specific
210
+ workspace.
211
+
212
+ For private workspaces, the user must have explicit permission.
213
+
214
+ For public workspaces, the permission is granted via a wildcard policy
215
+ ('*').
216
+ """
217
+ if os.getenv(constants.ENV_VAR_IS_SKYPILOT_SERVER) is None:
218
+ # When it is not on API server, we allow all users to access all
219
+ # workspaces, as the workspace check has been done on API server.
220
+ return True
221
+ role = self.get_user_roles(user_id)
222
+ if rbac.RoleName.ADMIN.value in role:
223
+ return True
224
+ # The Casbin model matcher already handles the wildcard '*' case:
225
+ # m = (g(r.sub, p.sub)|| p.sub == '*') && r.obj == p.obj &&
226
+ # r.act == p.act
227
+ # This means if there's a policy ('*', workspace_name, '*'), it will
228
+ # match any user
229
+ result = self.enforcer.enforce(user_id, workspace_name, '*')
230
+ logger.debug(f'Workspace permission check: user={user_id}, '
231
+ f'workspace={workspace_name}, result={result}')
232
+ return result
233
+
234
+ def add_workspace_policy(self, workspace_name: str,
235
+ users: List[str]) -> None:
236
+ """Add workspace policy.
237
+
238
+ Args:
239
+ workspace_name: Name of the workspace
240
+ users: List of user IDs that should have access.
241
+ For public workspaces, this should be ['*'].
242
+ For private workspaces, this should be specific user IDs.
243
+ """
244
+ with _policy_lock():
245
+ for user in users:
246
+ logger.debug(f'Adding workspace policy: user={user}, '
247
+ f'workspace={workspace_name}')
248
+ self.enforcer.add_policy(user, workspace_name, '*')
249
+ self.enforcer.save_policy()
250
+
251
+ def update_workspace_policy(self, workspace_name: str,
252
+ users: List[str]) -> None:
253
+ """Update workspace policy.
254
+
255
+ Args:
256
+ workspace_name: Name of the workspace
257
+ users: List of user IDs that should have access.
258
+ For public workspaces, this should be ['*'].
259
+ For private workspaces, this should be specific user IDs.
260
+ """
261
+ with _policy_lock():
262
+ self._load_policy_no_lock()
263
+ # Remove all existing policies for this workspace
264
+ self.enforcer.remove_filtered_policy(1, workspace_name)
265
+ # Add new policies
266
+ for user in users:
267
+ logger.debug(f'Updating workspace policy: user={user}, '
268
+ f'workspace={workspace_name}')
269
+ self.enforcer.add_policy(user, workspace_name, '*')
270
+ self.enforcer.save_policy()
271
+
272
+ def remove_workspace_policy(self, workspace_name: str) -> None:
273
+ """Remove workspace policy."""
274
+ with _policy_lock():
275
+ self.enforcer.remove_filtered_policy(1, workspace_name)
276
+ self.enforcer.save_policy()
277
+
165
278
 
166
279
  @contextlib.contextmanager
167
- def _policy_lock():
280
+ def _policy_lock() -> Generator[None, None, None]:
168
281
  """Context manager for policy update lock."""
169
282
  try:
170
283
  with filelock.FileLock(POLICY_UPDATE_LOCK_PATH,
171
284
  POLICY_UPDATE_LOCK_TIMEOUT_SECONDS):
172
285
  yield
173
286
  except filelock.Timeout as e:
174
- raise RuntimeError(f'Failed to load policy due to a timeout '
287
+ raise RuntimeError(f'Failed to reload policy due to a timeout '
175
288
  f'when trying to acquire the lock at '
176
289
  f'{POLICY_UPDATE_LOCK_PATH}. '
177
290
  'Please try again or manually remove the lock '
178
291
  f'file if you believe it is stale.') from e
292
+
293
+
294
+ # Singleton instance of PermissionService for other modules to use.
295
+ permission_service = PermissionService()
sky/users/rbac.py CHANGED
@@ -5,6 +5,8 @@ from typing import Dict, List
5
5
 
6
6
  from sky import sky_logging
7
7
  from sky import skypilot_config
8
+ from sky.skylet import constants
9
+ from sky.workspaces import utils as workspaces_utils
8
10
 
9
11
  logger = sky_logging.init_logger(__name__)
10
12
 
@@ -84,3 +86,27 @@ def get_role_permissions(
84
86
  }
85
87
  }
86
88
  return config_permissions
89
+
90
+
91
+ def get_workspace_policy_permissions() -> Dict[str, List[str]]:
92
+ """Get workspace policy permissions from config.
93
+
94
+ Returns:
95
+ A dictionary of workspace policy permissions.
96
+ Example:
97
+ {
98
+ 'workspace1': ['user1-id', 'user2-id'],
99
+ 'workspace2': ['user3-id', 'user4-id']
100
+ 'default': ['*']
101
+ }
102
+ """
103
+ current_workspaces = skypilot_config.get_nested(('workspaces',),
104
+ default_value={})
105
+ if constants.SKYPILOT_DEFAULT_WORKSPACE not in current_workspaces:
106
+ current_workspaces[constants.SKYPILOT_DEFAULT_WORKSPACE] = {}
107
+ workspaces_to_policy = {}
108
+ for workspace_name, workspace_config in current_workspaces.items():
109
+ users = workspaces_utils.get_workspace_users(workspace_config)
110
+ workspaces_to_policy[workspace_name] = users
111
+ logger.debug(f'Workspace policy permissions: {workspaces_to_policy}')
112
+ return workspaces_to_policy
sky/users/server.py CHANGED
@@ -1,6 +1,5 @@
1
1
  """REST API for workspace management."""
2
2
 
3
- import hashlib
4
3
  from typing import Any, Dict, List
5
4
 
6
5
  import fastapi
@@ -8,16 +7,15 @@ import fastapi
8
7
  from sky import global_user_state
9
8
  from sky import sky_logging
10
9
  from sky.server.requests import payloads
10
+ from sky.skylet import constants
11
11
  from sky.users import permission
12
12
  from sky.users import rbac
13
- from sky.utils import common_utils
13
+ from sky.utils import common
14
14
 
15
15
  logger = sky_logging.init_logger(__name__)
16
16
 
17
17
  router = fastapi.APIRouter()
18
18
 
19
- permission_service = permission.PermissionService()
20
-
21
19
 
22
20
  @router.get('')
23
21
  async def users() -> List[Dict[str, Any]]:
@@ -25,7 +23,7 @@ async def users() -> List[Dict[str, Any]]:
25
23
  all_users = []
26
24
  user_list = global_user_state.get_all_users()
27
25
  for user in user_list:
28
- user_roles = permission_service.get_user_roles(user.id)
26
+ user_roles = permission.permission_service.get_user_roles(user.id)
29
27
  all_users.append({
30
28
  'id': user.id,
31
29
  'name': user.name,
@@ -39,13 +37,11 @@ async def get_current_user_role(request: fastapi.Request):
39
37
  """Get current user's role."""
40
38
  # TODO(hailong): is there a reliable way to get the user
41
39
  # hash for the request without 'X-Auth-Request-Email' header?
42
- if 'X-Auth-Request-Email' not in request.headers:
40
+ auth_user = request.state.auth_user
41
+ if auth_user is None:
43
42
  return {'name': '', 'role': rbac.RoleName.ADMIN.value}
44
- user_name = request.headers['X-Auth-Request-Email']
45
- user_hash = hashlib.md5(
46
- user_name.encode()).hexdigest()[:common_utils.USER_HASH_LENGTH]
47
- user_roles = permission_service.get_user_roles(user_hash)
48
- return {'name': user_name, 'role': user_roles[0] if user_roles else ''}
43
+ user_roles = permission.permission_service.get_user_roles(auth_user.id)
44
+ return {'name': auth_user.name, 'role': user_roles[0] if user_roles else ''}
49
45
 
50
46
 
51
47
  @router.post('/update')
@@ -58,9 +54,14 @@ async def user_update(user_update_body: payloads.UserUpdateBody) -> None:
58
54
  raise fastapi.HTTPException(status_code=400,
59
55
  detail=f'Invalid role: {role}')
60
56
  user_info = global_user_state.get_user(user_id)
61
- if not user_info.name:
57
+ if user_info is None:
62
58
  raise fastapi.HTTPException(status_code=400,
63
59
  detail=f'User {user_id} does not exist')
60
+ # Disallow updating roles for the internal users.
61
+ if user_info.id in [common.SERVER_ID, constants.SKYPILOT_SYSTEM_USER_ID]:
62
+ raise fastapi.HTTPException(status_code=400,
63
+ detail=f'Cannot update role for internal '
64
+ f'API server user {user_info.name}')
64
65
 
65
66
  # Update user role in casbin policy
66
- permission_service.update_role(user_id, role)
67
+ permission.permission_service.update_role(user_info.id, role)
@@ -55,6 +55,7 @@ def _get_policy_cls(
55
55
  def apply_and_use_config_in_current_request(
56
56
  entrypoint: Union['dag_lib.Dag', 'task_lib.Task'],
57
57
  request_options: Optional[admin_policy.RequestOptions] = None,
58
+ at_client_side: bool = False,
58
59
  ) -> Iterator['dag_lib.Dag']:
59
60
  """Applies an admin policy and override SkyPilot config for current request
60
61
 
@@ -66,7 +67,7 @@ def apply_and_use_config_in_current_request(
66
67
  Refer to `apply()` for more details.
67
68
  """
68
69
  original_config = skypilot_config.to_dict()
69
- dag, mutated_config = apply(entrypoint, request_options)
70
+ dag, mutated_config = apply(entrypoint, request_options, at_client_side)
70
71
  if mutated_config != original_config:
71
72
  with skypilot_config.replace_skypilot_config(mutated_config):
72
73
  yield dag
@@ -77,6 +78,7 @@ def apply_and_use_config_in_current_request(
77
78
  def apply(
78
79
  entrypoint: Union['dag_lib.Dag', 'task_lib.Task'],
79
80
  request_options: Optional[admin_policy.RequestOptions] = None,
81
+ at_client_side: bool = False,
80
82
  ) -> Tuple['dag_lib.Dag', config_utils.Config]:
81
83
  """Applies an admin policy (if registered) to a DAG or a task.
82
84
 
@@ -105,14 +107,18 @@ def apply(
105
107
  if policy_cls is None:
106
108
  return dag, skypilot_config.to_dict()
107
109
 
108
- logger.info(f'Applying policy: {policy}')
110
+ if at_client_side:
111
+ logger.info(f'Applying client admin policy: {policy}')
112
+ else:
113
+ logger.info(f'Applying server admin policy: {policy}')
109
114
  config = copy.deepcopy(skypilot_config.to_dict())
110
115
  mutated_dag = dag_lib.Dag()
111
116
  mutated_dag.name = dag.name
112
117
 
113
118
  mutated_config = None
114
119
  for task in dag.tasks:
115
- user_request = admin_policy.UserRequest(task, config, request_options)
120
+ user_request = admin_policy.UserRequest(task, config, request_options,
121
+ at_client_side)
116
122
  try:
117
123
  mutated_user_request = policy_cls.validate_and_mutate(user_request)
118
124
  except Exception as e: # pylint: disable=broad-except