skypilot-nightly 1.0.0.dev20250606__py3-none-any.whl → 1.0.0.dev20250609__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. sky/__init__.py +2 -2
  2. sky/backends/backend_utils.py +21 -3
  3. sky/check.py +18 -22
  4. sky/cli.py +5 -8
  5. sky/client/cli.py +5 -8
  6. sky/client/sdk.py +2 -1
  7. sky/clouds/cloud.py +4 -0
  8. sky/clouds/nebius.py +44 -4
  9. sky/core.py +3 -2
  10. sky/dashboard/out/404.html +1 -1
  11. sky/dashboard/out/_next/static/chunks/236-619ed0248fb6fdd9.js +6 -0
  12. sky/dashboard/out/_next/static/chunks/470-680c19413b8f808b.js +1 -0
  13. sky/dashboard/out/_next/static/chunks/{614-635a84e87800f99e.js → 63-e2d7b1e75e67c713.js} +8 -8
  14. sky/dashboard/out/_next/static/chunks/843-16c7194621b2b512.js +11 -0
  15. sky/dashboard/out/_next/static/chunks/969-2c584e28e6b4b106.js +1 -0
  16. sky/dashboard/out/_next/static/chunks/973-aed916d5b02d2d63.js +1 -0
  17. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/{[job]-65d04d5d77cbb6b6.js → [job]-d31688d3e52736dd.js} +1 -1
  18. sky/dashboard/out/_next/static/chunks/pages/clusters/{[cluster]-35cbeb5214fd4036.js → [cluster]-e7d8710a9b0491e5.js} +1 -1
  19. sky/dashboard/out/_next/static/chunks/pages/{clusters-5549a350f97d7ef3.js → clusters-3c674e5d970e05cb.js} +1 -1
  20. sky/dashboard/out/_next/static/chunks/pages/config-3aac7a015c6eede1.js +6 -0
  21. sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-b68ddeed712d45b5.js → [context]-46d2e4ad6c487260.js} +1 -1
  22. sky/dashboard/out/_next/static/chunks/pages/{infra-13b117a831702196.js → infra-7013d816a2a0e76c.js} +1 -1
  23. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-f7f0c9e156d328bc.js +16 -0
  24. sky/dashboard/out/_next/static/chunks/pages/{jobs-a76b2700eca236f7.js → jobs-87e60396c376292f.js} +1 -1
  25. sky/dashboard/out/_next/static/chunks/pages/users-9355a0f13d1db61d.js +16 -0
  26. sky/dashboard/out/_next/static/chunks/pages/workspace/{new-c7516f2b4c3727c0.js → new-9a749cca1813bd27.js} +1 -1
  27. sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-7799de9e691e35d8.js → [name]-8eeb628e03902f1b.js} +1 -1
  28. sky/dashboard/out/_next/static/chunks/pages/workspaces-8fbcc5ab4af316d0.js +1 -0
  29. sky/dashboard/out/_next/static/css/8b1c8321d4c02372.css +3 -0
  30. sky/dashboard/out/_next/static/xos0euNCptbGAM7_Q3Acl/_buildManifest.js +1 -0
  31. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  32. sky/dashboard/out/clusters/[cluster].html +1 -1
  33. sky/dashboard/out/clusters.html +1 -1
  34. sky/dashboard/out/config.html +1 -1
  35. sky/dashboard/out/index.html +1 -1
  36. sky/dashboard/out/infra/[context].html +1 -1
  37. sky/dashboard/out/infra.html +1 -1
  38. sky/dashboard/out/jobs/[job].html +1 -1
  39. sky/dashboard/out/jobs.html +1 -1
  40. sky/dashboard/out/users.html +1 -1
  41. sky/dashboard/out/workspace/new.html +1 -1
  42. sky/dashboard/out/workspaces/[name].html +1 -1
  43. sky/dashboard/out/workspaces.html +1 -1
  44. sky/exceptions.py +5 -0
  45. sky/global_user_state.py +11 -6
  46. sky/jobs/scheduler.py +9 -4
  47. sky/jobs/server/core.py +23 -2
  48. sky/jobs/server/server.py +0 -95
  49. sky/jobs/state.py +18 -15
  50. sky/jobs/utils.py +2 -1
  51. sky/models.py +18 -0
  52. sky/provision/kubernetes/utils.py +12 -5
  53. sky/provision/nebius/constants.py +47 -0
  54. sky/provision/nebius/instance.py +2 -1
  55. sky/provision/nebius/utils.py +28 -7
  56. sky/serve/server/core.py +1 -1
  57. sky/server/common.py +4 -2
  58. sky/server/constants.py +0 -2
  59. sky/server/requests/executor.py +10 -2
  60. sky/server/requests/requests.py +4 -3
  61. sky/server/server.py +22 -5
  62. sky/skylet/constants.py +4 -0
  63. sky/skylet/job_lib.py +2 -1
  64. sky/skypilot_config.py +13 -1
  65. sky/templates/jobs-controller.yaml.j2 +3 -1
  66. sky/templates/nebius-ray.yml.j2 +6 -0
  67. sky/users/model.conf +1 -1
  68. sky/users/permission.py +148 -31
  69. sky/users/rbac.py +26 -0
  70. sky/users/server.py +14 -13
  71. sky/utils/common.py +6 -1
  72. sky/utils/common_utils.py +21 -3
  73. sky/utils/kubernetes/deploy_remote_cluster.py +5 -3
  74. sky/utils/resources_utils.py +3 -1
  75. sky/utils/schemas.py +9 -0
  76. sky/workspaces/core.py +100 -8
  77. sky/workspaces/server.py +15 -2
  78. sky/workspaces/utils.py +56 -0
  79. {skypilot_nightly-1.0.0.dev20250606.dist-info → skypilot_nightly-1.0.0.dev20250609.dist-info}/METADATA +1 -1
  80. {skypilot_nightly-1.0.0.dev20250606.dist-info → skypilot_nightly-1.0.0.dev20250609.dist-info}/RECORD +89 -87
  81. sky/dashboard/out/_next/static/99m-BAySO8Q7J-ul1jZVL/_buildManifest.js +0 -1
  82. sky/dashboard/out/_next/static/chunks/236-a90f0a9753a10420.js +0 -6
  83. sky/dashboard/out/_next/static/chunks/470-9e7a479cc8303baa.js +0 -1
  84. sky/dashboard/out/_next/static/chunks/843-c296541442d4af88.js +0 -11
  85. sky/dashboard/out/_next/static/chunks/969-c7abda31c10440ac.js +0 -1
  86. sky/dashboard/out/_next/static/chunks/973-1a09cac61cfcc1e1.js +0 -1
  87. sky/dashboard/out/_next/static/chunks/pages/config-1a1eeb949dab8897.js +0 -6
  88. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-2d23a9c7571e6320.js +0 -16
  89. sky/dashboard/out/_next/static/chunks/pages/users-262aab38b9baaf3a.js +0 -16
  90. sky/dashboard/out/_next/static/chunks/pages/workspaces-384ea5fa0cea8f28.js +0 -1
  91. sky/dashboard/out/_next/static/css/667d941a2888ce6e.css +0 -3
  92. /sky/dashboard/out/_next/static/chunks/{37-beedd583fea84cc8.js → 37-600191c5804dcae2.js} +0 -0
  93. /sky/dashboard/out/_next/static/chunks/{682-6647f0417d5662f0.js → 682-b60cfdacc15202e8.js} +0 -0
  94. /sky/dashboard/out/_next/static/chunks/{856-3a32da4b84176f6d.js → 856-affc52adf5403a3a.js} +0 -0
  95. /sky/dashboard/out/_next/static/chunks/pages/{_app-cb81dc4d27f4d009.js → _app-5f16aba5794ee8e7.js} +0 -0
  96. /sky/dashboard/out/_next/static/{99m-BAySO8Q7J-ul1jZVL → xos0euNCptbGAM7_Q3Acl}/_ssgManifest.js +0 -0
  97. {skypilot_nightly-1.0.0.dev20250606.dist-info → skypilot_nightly-1.0.0.dev20250609.dist-info}/WHEEL +0 -0
  98. {skypilot_nightly-1.0.0.dev20250606.dist-info → skypilot_nightly-1.0.0.dev20250609.dist-info}/entry_points.txt +0 -0
  99. {skypilot_nightly-1.0.0.dev20250606.dist-info → skypilot_nightly-1.0.0.dev20250609.dist-info}/licenses/LICENSE +0 -0
  100. {skypilot_nightly-1.0.0.dev20250606.dist-info → skypilot_nightly-1.0.0.dev20250609.dist-info}/top_level.txt +0 -0
sky/skylet/constants.py CHANGED
@@ -367,6 +367,7 @@ RCLONE_CACHE_REFRESH_INTERVAL = 10
367
367
  OVERRIDEABLE_CONFIG_KEYS_IN_TASK: List[Tuple[str, ...]] = [
368
368
  ('docker', 'run_options'),
369
369
  ('nvidia_gpus', 'disable_ecc'),
370
+ ('ssh', 'pod_config'),
370
371
  ('kubernetes', 'pod_config'),
371
372
  ('kubernetes', 'provision_timeout'),
372
373
  ('gcp', 'managed_instance_group'),
@@ -418,3 +419,6 @@ ALL_CLOUDS = ('aws', 'azure', 'gcp', 'ibm', 'lambda', 'scp', 'oci',
418
419
  'kubernetes', 'runpod', 'vast', 'vsphere', 'cudo', 'fluidstack',
419
420
  'paperspace', 'do', 'nebius', 'ssh')
420
421
  # END constants used for service catalog.
422
+
423
+ # The user ID of the SkyPilot system.
424
+ SKYPILOT_SYSTEM_USER_ID = 'skypilot-system'
sky/skylet/job_lib.py CHANGED
@@ -794,7 +794,8 @@ def load_job_queue(payload: str) -> List[Dict[str, Any]]:
794
794
  for job in jobs:
795
795
  job['status'] = JobStatus(job['status'])
796
796
  job['user_hash'] = job['username']
797
- job['username'] = global_user_state.get_user(job['user_hash']).name
797
+ user = global_user_state.get_user(job['user_hash'])
798
+ job['username'] = user.name if user is not None else None
798
799
  return jobs
799
800
 
800
801
 
sky/skypilot_config.py CHANGED
@@ -167,7 +167,10 @@ def _get_loaded_config_path() -> List[Optional[str]]:
167
167
  serialized = _get_config_context().config_path
168
168
  if not serialized:
169
169
  return []
170
- return json.loads(serialized)
170
+ config_paths = json.loads(serialized)
171
+ if config_paths is None:
172
+ return []
173
+ return config_paths
171
174
 
172
175
 
173
176
  def _set_loaded_config_path(
@@ -762,6 +765,15 @@ def update_api_server_config_no_lock(config: config_utils.Config) -> None:
762
765
  Args:
763
766
  config: The config to save and sync.
764
767
  """
768
+
769
+ def is_running_pytest() -> bool:
770
+ return 'PYTEST_CURRENT_TEST' in os.environ
771
+
772
+ # Only allow this function to be called by the API Server in production.
773
+ if not is_running_pytest() and os.environ.get(
774
+ constants.ENV_VAR_IS_SKYPILOT_SERVER) is None:
775
+ raise ValueError('This function can only be called by the API Server.')
776
+
765
777
  global_config_path = _resolve_server_config_path()
766
778
  if global_config_path is None:
767
779
  global_config_path = get_user_config_path()
@@ -3,6 +3,7 @@
3
3
  name: {{dag_name}}
4
4
 
5
5
  file_mounts:
6
+ {{remote_original_user_yaml_path}}: {{original_user_dag_path}}
6
7
  {{remote_user_yaml_path}}: {{user_yaml_path}}
7
8
  {%- if local_user_config_path is not none %}
8
9
  {{remote_user_config_path}}: {{local_user_config_path}}
@@ -28,7 +29,7 @@ setup: |
28
29
  grep -q 'export SKYPILOT_DEV=' ~/.bashrc || echo 'export SKYPILOT_DEV=1' >> ~/.bashrc
29
30
  grep -q 'alias sky-env=' ~/.bashrc || echo 'alias sky-env="{{ sky_activate_python_env }}"' >> ~/.bashrc
30
31
  {% endif %}
31
-
32
+
32
33
  # Create systemd service file
33
34
  mkdir -p ~/.config/systemd/user/
34
35
 
@@ -65,6 +66,7 @@ run: |
65
66
  # CloudVmRayBackend._exec_code_on_head() calls
66
67
  # managed_job_codegen.set_pending() before we get here.
67
68
  python -u -m sky.jobs.scheduler {{remote_user_yaml_path}} \
69
+ --user-yaml-path {{remote_original_user_yaml_path}} \
68
70
  --job-id $SKYPILOT_INTERNAL_JOB_ID \
69
71
  --env-file {{remote_env_file_path}} \
70
72
  --priority {{priority}}
@@ -46,6 +46,7 @@ available_node_types:
46
46
  InstanceType: {{instance_type}}
47
47
  ImageId: {{image_id}}
48
48
  DiskSize: {{disk_size}}
49
+ network_tier: {{network_tier}}
49
50
  filesystems:
50
51
  {%- for fs in filesystems %}
51
52
  - filesystem_id: {{ fs.filesystem_id }}
@@ -152,6 +153,11 @@ setup_commands:
152
153
  mkdir -p ~/.ssh; touch ~/.ssh/config;
153
154
  {{ conda_installation_commands }}
154
155
  {{ ray_skypilot_installation_commands }}
156
+ {%- if env_vars is defined %}
157
+ {%- for env_var, env_value in env_vars.items() %}
158
+ echo '{{env_var}}={{env_value}}' | sudo tee -a /etc/environment;
159
+ {%- endfor %}
160
+ {%- endif %}
155
161
  sudo bash -c 'rm -rf /etc/security/limits.d; echo "* soft nofile 1048576" >> /etc/security/limits.conf; echo "* hard nofile 1048576" >> /etc/security/limits.conf';
156
162
  sudo grep -e '^DefaultTasksMax' /etc/systemd/system.conf || (sudo bash -c 'echo "DefaultTasksMax=infinity" >> /etc/systemd/system.conf'); sudo systemctl set-property user-$(id -u $(whoami)).slice TasksMax=infinity; sudo systemctl daemon-reload;
157
163
  mkdir -p ~/.ssh; (grep -Pzo -q "Host \*\n StrictHostKeyChecking no\n IdentityFile ~/.ssh/sky-cluster-key\n IdentityFile ~/.ssh/id_rsa" ~/.ssh/config) || printf "Host *\n StrictHostKeyChecking no\n IdentityFile ~/.ssh/sky-cluster-key\n IdentityFile ~/.ssh/id_rsa\n" >> ~/.ssh/config;
sky/users/model.conf CHANGED
@@ -12,4 +12,4 @@ g = _, _
12
12
  e = some(where (p.eft == allow))
13
13
 
14
14
  [matchers]
15
- m = g(r.sub, p.sub) && r.obj == p.obj && r.act == p.act
15
+ m = (g(r.sub, p.sub)|| p.sub == '*') && r.obj == p.obj && r.act == p.act
sky/users/permission.py CHANGED
@@ -3,7 +3,7 @@ import contextlib
3
3
  import logging
4
4
  import os
5
5
  import threading
6
- from typing import List
6
+ from typing import Generator, List
7
7
 
8
8
  import casbin
9
9
  import filelock
@@ -11,8 +11,11 @@ import sqlalchemy_adapter
11
11
 
12
12
  from sky import global_user_state
13
13
  from sky import sky_logging
14
+ from sky.skylet import constants
14
15
  from sky.users import rbac
15
16
 
17
+ logging.getLogger('casbin.policy').setLevel(sky_logging.ERROR)
18
+ logging.getLogger('casbin.role').setLevel(sky_logging.ERROR)
16
19
  logger = sky_logging.init_logger(__name__)
17
20
 
18
21
  # Filelocks for the policy update.
@@ -38,17 +41,19 @@ class PermissionService:
38
41
  model_path = os.path.join(os.path.dirname(__file__),
39
42
  'model.conf')
40
43
  enforcer = casbin.Enforcer(model_path, adapter)
41
- logging.getLogger('casbin.policy').setLevel(
42
- sky_logging.ERROR)
43
- logging.getLogger('casbin.role').setLevel(sky_logging.ERROR)
44
44
  self.enforcer = enforcer
45
45
  else:
46
46
  self.enforcer = _enforcer_instance.enforcer
47
- self._maybe_initialize_policies()
47
+ with _policy_lock():
48
+ self._maybe_initialize_policies()
48
49
 
49
- def _maybe_initialize_policies(self):
50
+ def _maybe_initialize_policies(self) -> None:
50
51
  """Initialize policies if they don't already exist."""
52
+ # TODO(zhwu): we should avoid running this on client side.
51
53
  logger.debug(f'Initializing policies in process: {os.getpid()}')
54
+ self._load_policy_no_lock()
55
+
56
+ policy_updated = False
52
57
 
53
58
  # Check if policies are already initialized by looking for existing
54
59
  # permission policies in the enforcer
@@ -66,6 +71,17 @@ class PermissionService:
66
71
  expected_policies.append(
67
72
  [role, item['path'], item['method']])
68
73
 
74
+ # Add workspace policy
75
+ workspace_policy_permissions = rbac.get_workspace_policy_permissions()
76
+ logger.debug(f'Workspace policy permissions from config: '
77
+ f'{workspace_policy_permissions}')
78
+
79
+ for workspace_name, users in workspace_policy_permissions.items():
80
+ for user in users:
81
+ expected_policies.append([user, workspace_name, '*'])
82
+ logger.debug(f'Expected workspace policy: user={user}, '
83
+ f'workspace={workspace_name}')
84
+
69
85
  # Check if all expected policies already exist
70
86
  policies_exist = all(
71
87
  any(policy == expected
@@ -86,48 +102,71 @@ class PermissionService:
86
102
  for item in blocklist:
87
103
  path = item['path']
88
104
  method = item['method']
105
+ logger.debug(f'Adding role policy: role={role}, '
106
+ f'path={path}, method={method}')
89
107
  self.enforcer.add_policy(role, path, method)
90
- self.enforcer.save_policy()
108
+ policy_updated = True
109
+
110
+ for workspace_name, users in workspace_policy_permissions.items():
111
+ for user in users:
112
+ logger.debug(f'Initializing workspace policy: user={user}, '
113
+ f'workspace={workspace_name}')
114
+ self.enforcer.add_policy(user, workspace_name, '*')
115
+ policy_updated = True
116
+ logger.debug('Policies initialized successfully')
91
117
  else:
92
118
  logger.debug('Policies already exist, skipping initialization')
93
119
 
94
120
  # Always ensure users have default roles (this is idempotent)
95
121
  all_users = global_user_state.get_all_users()
96
- for user in all_users:
97
- self.add_user_if_not_exists(user.id)
122
+ for existing_user in all_users:
123
+ user_added = self._add_user_if_not_exists_no_lock(existing_user.id)
124
+ policy_updated = policy_updated or user_added
125
+
126
+ if policy_updated:
127
+ self.enforcer.save_policy()
98
128
 
99
- def add_user_if_not_exists(self, user: str) -> None:
129
+ def add_user_if_not_exists(self, user_id: str) -> None:
100
130
  """Add user role relationship."""
101
131
  with _policy_lock():
102
- user_roles = self.enforcer.get_roles_for_user(user)
103
- if not user_roles:
104
- logger.info(f'User {user} has no roles, adding'
105
- f' default role {rbac.get_default_role()}')
106
- self.enforcer.add_grouping_policy(user, rbac.get_default_role())
107
- self.enforcer.save_policy()
108
-
109
- def update_role(self, user: str, new_role: str):
132
+ self._add_user_if_not_exists_no_lock(user_id)
133
+
134
+ def _add_user_if_not_exists_no_lock(self, user_id: str) -> bool:
135
+ """Add user role relationship without lock.
136
+
137
+ Returns:
138
+ True if the user was added, False otherwise.
139
+ """
140
+ user_roles = self.enforcer.get_roles_for_user(user_id)
141
+ if not user_roles:
142
+ logger.info(f'User {user_id} has no roles, adding'
143
+ f' default role {rbac.get_default_role()}')
144
+ self.enforcer.add_grouping_policy(user_id, rbac.get_default_role())
145
+ return True
146
+ return False
147
+
148
+ def update_role(self, user_id: str, new_role: str) -> None:
110
149
  """Update user role relationship."""
111
150
  with _policy_lock():
112
151
  # Get current roles
113
152
  self._load_policy_no_lock()
114
153
  # Avoid calling get_user_roles, as it will require the lock.
115
- current_roles = self.enforcer.get_roles_for_user(user)
154
+ current_roles = self.enforcer.get_roles_for_user(user_id)
116
155
  if not current_roles:
117
- logger.warning(f'User {user} has no roles')
156
+ logger.warning(f'User {user_id} has no roles')
118
157
  else:
119
158
  # TODO(hailong): how to handle multiple roles?
120
159
  current_role = current_roles[0]
121
160
  if current_role == new_role:
122
- logger.info(f'User {user} already has role {new_role}')
161
+ logger.info(f'User {user_id} already has role {new_role}')
123
162
  return
124
- self.enforcer.remove_grouping_policy(user, current_role)
163
+ self.enforcer.remove_grouping_policy(user_id, current_role)
125
164
 
126
165
  # Update user role
127
- self.enforcer.add_grouping_policy(user, new_role)
166
+ self.enforcer.add_grouping_policy(user_id, new_role)
128
167
  self.enforcer.save_policy()
129
168
 
130
- def get_user_roles(self, user: str) -> List[str]:
169
+ def get_user_roles(self, user_id: str) -> List[str]:
131
170
  """Get all roles for a user.
132
171
 
133
172
  This method returns all roles that the user has, including inherited
@@ -140,10 +179,11 @@ class PermissionService:
140
179
  Returns:
141
180
  A list of role names that the user has.
142
181
  """
143
- self._load_policy()
144
- return self.enforcer.get_roles_for_user(user)
182
+ self._load_policy_no_lock()
183
+ return self.enforcer.get_roles_for_user(user_id)
145
184
 
146
- def check_permission(self, user: str, path: str, method: str) -> bool:
185
+ def check_endpoint_permission(self, user_id: str, path: str,
186
+ method: str) -> bool:
147
187
  """Check permission."""
148
188
  # We intentionally don't load the policy here, as it is a hot path, and
149
189
  # we don't support updating the policy.
@@ -151,28 +191,105 @@ class PermissionService:
151
191
  # it is a hot path in every request. It is ok to have a stale policy,
152
192
  # as long as it is eventually consistent.
153
193
  # self._load_policy_no_lock()
154
- return self.enforcer.enforce(user, path, method)
194
+ return self.enforcer.enforce(user_id, path, method)
155
195
 
156
196
  def _load_policy_no_lock(self):
157
197
  """Load policy from storage."""
158
198
  self.enforcer.load_policy()
159
199
 
160
- def _load_policy(self):
200
+ def load_policy(self):
161
201
  """Load policy from storage with lock."""
162
202
  with _policy_lock():
163
203
  self._load_policy_no_lock()
164
204
 
205
+ def check_workspace_permission(self, user_id: str,
206
+ workspace_name: str) -> bool:
207
+ """Check workspace permission.
208
+
209
+ This method checks if a user has permission to access a specific
210
+ workspace.
211
+
212
+ For private workspaces, the user must have explicit permission.
213
+
214
+ For public workspaces, the permission is granted via a wildcard policy
215
+ ('*').
216
+ """
217
+ if os.getenv(constants.ENV_VAR_IS_SKYPILOT_SERVER) is None:
218
+ # When it is not on API server, we allow all users to access all
219
+ # workspaces, as the workspace check has been done on API server.
220
+ return True
221
+ role = self.get_user_roles(user_id)
222
+ if rbac.RoleName.ADMIN.value in role:
223
+ return True
224
+ # The Casbin model matcher already handles the wildcard '*' case:
225
+ # m = (g(r.sub, p.sub)|| p.sub == '*') && r.obj == p.obj &&
226
+ # r.act == p.act
227
+ # This means if there's a policy ('*', workspace_name, '*'), it will
228
+ # match any user
229
+ result = self.enforcer.enforce(user_id, workspace_name, '*')
230
+ logger.debug(f'Workspace permission check: user={user_id}, '
231
+ f'workspace={workspace_name}, result={result}')
232
+ return result
233
+
234
+ def add_workspace_policy(self, workspace_name: str,
235
+ users: List[str]) -> None:
236
+ """Add workspace policy.
237
+
238
+ Args:
239
+ workspace_name: Name of the workspace
240
+ users: List of user IDs that should have access.
241
+ For public workspaces, this should be ['*'].
242
+ For private workspaces, this should be specific user IDs.
243
+ """
244
+ with _policy_lock():
245
+ for user in users:
246
+ logger.debug(f'Adding workspace policy: user={user}, '
247
+ f'workspace={workspace_name}')
248
+ self.enforcer.add_policy(user, workspace_name, '*')
249
+ self.enforcer.save_policy()
250
+
251
+ def update_workspace_policy(self, workspace_name: str,
252
+ users: List[str]) -> None:
253
+ """Update workspace policy.
254
+
255
+ Args:
256
+ workspace_name: Name of the workspace
257
+ users: List of user IDs that should have access.
258
+ For public workspaces, this should be ['*'].
259
+ For private workspaces, this should be specific user IDs.
260
+ """
261
+ with _policy_lock():
262
+ self._load_policy_no_lock()
263
+ # Remove all existing policies for this workspace
264
+ self.enforcer.remove_filtered_policy(1, workspace_name)
265
+ # Add new policies
266
+ for user in users:
267
+ logger.debug(f'Updating workspace policy: user={user}, '
268
+ f'workspace={workspace_name}')
269
+ self.enforcer.add_policy(user, workspace_name, '*')
270
+ self.enforcer.save_policy()
271
+
272
+ def remove_workspace_policy(self, workspace_name: str) -> None:
273
+ """Remove workspace policy."""
274
+ with _policy_lock():
275
+ self.enforcer.remove_filtered_policy(1, workspace_name)
276
+ self.enforcer.save_policy()
277
+
165
278
 
166
279
  @contextlib.contextmanager
167
- def _policy_lock():
280
+ def _policy_lock() -> Generator[None, None, None]:
168
281
  """Context manager for policy update lock."""
169
282
  try:
170
283
  with filelock.FileLock(POLICY_UPDATE_LOCK_PATH,
171
284
  POLICY_UPDATE_LOCK_TIMEOUT_SECONDS):
172
285
  yield
173
286
  except filelock.Timeout as e:
174
- raise RuntimeError(f'Failed to load policy due to a timeout '
287
+ raise RuntimeError(f'Failed to reload policy due to a timeout '
175
288
  f'when trying to acquire the lock at '
176
289
  f'{POLICY_UPDATE_LOCK_PATH}. '
177
290
  'Please try again or manually remove the lock '
178
291
  f'file if you believe it is stale.') from e
292
+
293
+
294
+ # Singleton instance of PermissionService for other modules to use.
295
+ permission_service = PermissionService()
sky/users/rbac.py CHANGED
@@ -5,6 +5,8 @@ from typing import Dict, List
5
5
 
6
6
  from sky import sky_logging
7
7
  from sky import skypilot_config
8
+ from sky.skylet import constants
9
+ from sky.workspaces import utils as workspaces_utils
8
10
 
9
11
  logger = sky_logging.init_logger(__name__)
10
12
 
@@ -84,3 +86,27 @@ def get_role_permissions(
84
86
  }
85
87
  }
86
88
  return config_permissions
89
+
90
+
91
+ def get_workspace_policy_permissions() -> Dict[str, List[str]]:
92
+ """Get workspace policy permissions from config.
93
+
94
+ Returns:
95
+ A dictionary of workspace policy permissions.
96
+ Example:
97
+ {
98
+ 'workspace1': ['user1-id', 'user2-id'],
99
+ 'workspace2': ['user3-id', 'user4-id']
100
+ 'default': ['*']
101
+ }
102
+ """
103
+ current_workspaces = skypilot_config.get_nested(('workspaces',),
104
+ default_value={})
105
+ if constants.SKYPILOT_DEFAULT_WORKSPACE not in current_workspaces:
106
+ current_workspaces[constants.SKYPILOT_DEFAULT_WORKSPACE] = {}
107
+ workspaces_to_policy = {}
108
+ for workspace_name, workspace_config in current_workspaces.items():
109
+ users = workspaces_utils.get_workspace_users(workspace_config)
110
+ workspaces_to_policy[workspace_name] = users
111
+ logger.debug(f'Workspace policy permissions: {workspaces_to_policy}')
112
+ return workspaces_to_policy
sky/users/server.py CHANGED
@@ -1,6 +1,5 @@
1
1
  """REST API for workspace management."""
2
2
 
3
- import hashlib
4
3
  from typing import Any, Dict, List
5
4
 
6
5
  import fastapi
@@ -8,16 +7,15 @@ import fastapi
8
7
  from sky import global_user_state
9
8
  from sky import sky_logging
10
9
  from sky.server.requests import payloads
10
+ from sky.skylet import constants
11
11
  from sky.users import permission
12
12
  from sky.users import rbac
13
- from sky.utils import common_utils
13
+ from sky.utils import common
14
14
 
15
15
  logger = sky_logging.init_logger(__name__)
16
16
 
17
17
  router = fastapi.APIRouter()
18
18
 
19
- permission_service = permission.PermissionService()
20
-
21
19
 
22
20
  @router.get('')
23
21
  async def users() -> List[Dict[str, Any]]:
@@ -25,7 +23,7 @@ async def users() -> List[Dict[str, Any]]:
25
23
  all_users = []
26
24
  user_list = global_user_state.get_all_users()
27
25
  for user in user_list:
28
- user_roles = permission_service.get_user_roles(user.id)
26
+ user_roles = permission.permission_service.get_user_roles(user.id)
29
27
  all_users.append({
30
28
  'id': user.id,
31
29
  'name': user.name,
@@ -39,13 +37,11 @@ async def get_current_user_role(request: fastapi.Request):
39
37
  """Get current user's role."""
40
38
  # TODO(hailong): is there a reliable way to get the user
41
39
  # hash for the request without 'X-Auth-Request-Email' header?
42
- if 'X-Auth-Request-Email' not in request.headers:
40
+ auth_user = request.state.auth_user
41
+ if auth_user is None:
43
42
  return {'name': '', 'role': rbac.RoleName.ADMIN.value}
44
- user_name = request.headers['X-Auth-Request-Email']
45
- user_hash = hashlib.md5(
46
- user_name.encode()).hexdigest()[:common_utils.USER_HASH_LENGTH]
47
- user_roles = permission_service.get_user_roles(user_hash)
48
- return {'name': user_name, 'role': user_roles[0] if user_roles else ''}
43
+ user_roles = permission.permission_service.get_user_roles(auth_user.id)
44
+ return {'name': auth_user.name, 'role': user_roles[0] if user_roles else ''}
49
45
 
50
46
 
51
47
  @router.post('/update')
@@ -58,9 +54,14 @@ async def user_update(user_update_body: payloads.UserUpdateBody) -> None:
58
54
  raise fastapi.HTTPException(status_code=400,
59
55
  detail=f'Invalid role: {role}')
60
56
  user_info = global_user_state.get_user(user_id)
61
- if not user_info.name:
57
+ if user_info is None:
62
58
  raise fastapi.HTTPException(status_code=400,
63
59
  detail=f'User {user_id} does not exist')
60
+ # Disallow updating roles for the internal users.
61
+ if user_info.id in [common.SERVER_ID, constants.SKYPILOT_SYSTEM_USER_ID]:
62
+ raise fastapi.HTTPException(status_code=400,
63
+ detail=f'Cannot update role for internal '
64
+ f'API server user {user_info.name}')
64
65
 
65
66
  # Update user role in casbin policy
66
- permission_service.update_role(user_id, role)
67
+ permission.permission_service.update_role(user_info.id, role)
sky/utils/common.py CHANGED
@@ -5,6 +5,7 @@ import enum
5
5
  import os
6
6
  from typing import Generator
7
7
 
8
+ from sky import models
8
9
  from sky.skylet import constants
9
10
  from sky.utils import common_utils
10
11
 
@@ -25,10 +26,13 @@ JOB_CONTROLLER_NAME: str = f'{JOB_CONTROLLER_PREFIX}{SERVER_ID}'
25
26
 
26
27
 
27
28
  @contextlib.contextmanager
28
- def with_server_user_hash() -> Generator[None, None, None]:
29
+ def with_server_user() -> Generator[None, None, None]:
29
30
  """Temporarily set the user hash to common.SERVER_ID."""
30
31
  old_env_user_hash = os.getenv(constants.USER_ID_ENV_VAR)
32
+ # TODO(zhwu): once we have fully moved our code to use `get_current_user()`
33
+ # instead of `common_utils.get_user_hash()`, we can remove the env override.
31
34
  os.environ[constants.USER_ID_ENV_VAR] = SERVER_ID
35
+ common_utils.set_current_user(models.User.get_current_user())
32
36
  try:
33
37
  yield
34
38
  finally:
@@ -36,6 +40,7 @@ def with_server_user_hash() -> Generator[None, None, None]:
36
40
  os.environ[constants.USER_ID_ENV_VAR] = old_env_user_hash
37
41
  else:
38
42
  os.environ.pop(constants.USER_ID_ENV_VAR)
43
+ common_utils.set_current_user(models.User.get_current_user())
39
44
 
40
45
 
41
46
  class StatusRefreshMode(enum.Enum):
sky/utils/common_utils.py CHANGED
@@ -20,6 +20,7 @@ import uuid
20
20
  import jsonschema
21
21
 
22
22
  from sky import exceptions
23
+ from sky import models
23
24
  from sky import sky_logging
24
25
  from sky.adaptors import common as adaptors_common
25
26
  from sky.skylet import constants
@@ -256,11 +257,13 @@ class Backoff:
256
257
  _current_command: Optional[str] = None
257
258
  _current_client_entrypoint: Optional[str] = None
258
259
  _using_remote_api_server: Optional[bool] = None
260
+ _current_user: Optional['models.User'] = None
259
261
 
260
262
 
261
- def set_client_status(client_entrypoint: Optional[str],
262
- client_command: Optional[str],
263
- using_remote_api_server: bool):
263
+ def set_request_context(client_entrypoint: Optional[str],
264
+ client_command: Optional[str],
265
+ using_remote_api_server: bool,
266
+ user: Optional['models.User']):
264
267
  """Override the current client entrypoint and command.
265
268
 
266
269
  This is useful when we are on the SkyPilot API server side and we have a
@@ -269,9 +272,11 @@ def set_client_status(client_entrypoint: Optional[str],
269
272
  global _current_command
270
273
  global _current_client_entrypoint
271
274
  global _using_remote_api_server
275
+ global _current_user
272
276
  _current_command = client_command
273
277
  _current_client_entrypoint = client_entrypoint
274
278
  _using_remote_api_server = using_remote_api_server
279
+ _current_user = user
275
280
 
276
281
 
277
282
  def get_current_command() -> str:
@@ -286,6 +291,19 @@ def get_current_command() -> str:
286
291
  return get_pretty_entrypoint_cmd()
287
292
 
288
293
 
294
+ def get_current_user() -> 'models.User':
295
+ """Returns the current user."""
296
+ if _current_user is not None:
297
+ return _current_user
298
+ return models.User.get_current_user()
299
+
300
+
301
+ def set_current_user(user: 'models.User'):
302
+ """Sets the current user."""
303
+ global _current_user
304
+ _current_user = user
305
+
306
+
289
307
  def get_current_client_entrypoint(server_entrypoint: str) -> str:
290
308
  """Returns the current client entrypoint.
291
309
 
@@ -723,14 +723,16 @@ def main():
723
723
  # Do not support changing anything besides hosts for now
724
724
  if history is not None:
725
725
  for key in ['user', 'identity_file', 'password']:
726
- if history.get(key) != cluster_config.get(key):
726
+ if not args.cleanup and history.get(
727
+ key) != cluster_config.get(key):
727
728
  raise ValueError(
728
729
  f'Cluster configuration has changed for field {key!r}. '
729
730
  f'Previous value: {history.get(key)}, '
730
731
  f'Current value: {cluster_config.get(key)}')
731
732
  history_hosts_info = prepare_hosts_info(
732
733
  cluster_name, history)
733
- if history_hosts_info[0] != hosts_info[0]:
734
+ if not args.cleanup and history_hosts_info[0] != hosts_info[
735
+ 0]:
734
736
  raise ValueError(
735
737
  f'Cluster configuration has changed for master node. '
736
738
  f'Previous value: {history_hosts_info[0]}, '
@@ -860,7 +862,7 @@ def deploy_cluster(head_node,
860
862
  use_ssh_config=head_use_ssh_config,
861
863
  # For SkySSHUpLineProcessor
862
864
  print_output=True)
863
- if result is None:
865
+ if not cleanup and result is None:
864
866
  with ux_utils.print_exception_no_traceback():
865
867
  raise RuntimeError(
866
868
  f'Failed to SSH to head node ({head_node}). '
@@ -63,7 +63,9 @@ class NetworkTier(enum.Enum):
63
63
  def cli_help_message(cls) -> str:
64
64
  return (
65
65
  f'Network tier. Could be one of {", ".join(cls.supported_tiers())}'
66
- f'. Default: {cls.STANDARD.value}')
66
+ f'. If {cls.BEST.value} is specified, use the best network tier '
67
+ 'available on the specified instance. '
68
+ f'Default: {cls.STANDARD.value}')
67
69
 
68
70
  @classmethod
69
71
  def from_str(cls, tier: str) -> 'NetworkTier':
sky/utils/schemas.py CHANGED
@@ -1249,6 +1249,15 @@ def get_config_schema():
1249
1249
  'properties': {
1250
1250
  # Explicit definition for GCP allows both project_id and
1251
1251
  # disabled
1252
+ 'private': {
1253
+ 'type': 'boolean',
1254
+ },
1255
+ 'allowed_users': {
1256
+ 'type': 'array',
1257
+ 'items': {
1258
+ 'type': 'string',
1259
+ },
1260
+ },
1252
1261
  'gcp': {
1253
1262
  'type': 'object',
1254
1263
  'properties': {