skypilot-nightly 1.0.0.dev20250606__py3-none-any.whl → 1.0.0.dev20250609__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/backends/backend_utils.py +21 -3
- sky/check.py +18 -22
- sky/cli.py +5 -8
- sky/client/cli.py +5 -8
- sky/client/sdk.py +2 -1
- sky/clouds/cloud.py +4 -0
- sky/clouds/nebius.py +44 -4
- sky/core.py +3 -2
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/chunks/236-619ed0248fb6fdd9.js +6 -0
- sky/dashboard/out/_next/static/chunks/470-680c19413b8f808b.js +1 -0
- sky/dashboard/out/_next/static/chunks/{614-635a84e87800f99e.js → 63-e2d7b1e75e67c713.js} +8 -8
- sky/dashboard/out/_next/static/chunks/843-16c7194621b2b512.js +11 -0
- sky/dashboard/out/_next/static/chunks/969-2c584e28e6b4b106.js +1 -0
- sky/dashboard/out/_next/static/chunks/973-aed916d5b02d2d63.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/{[job]-65d04d5d77cbb6b6.js → [job]-d31688d3e52736dd.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/{[cluster]-35cbeb5214fd4036.js → [cluster]-e7d8710a9b0491e5.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{clusters-5549a350f97d7ef3.js → clusters-3c674e5d970e05cb.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/config-3aac7a015c6eede1.js +6 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-b68ddeed712d45b5.js → [context]-46d2e4ad6c487260.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{infra-13b117a831702196.js → infra-7013d816a2a0e76c.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-f7f0c9e156d328bc.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/{jobs-a76b2700eca236f7.js → jobs-87e60396c376292f.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/users-9355a0f13d1db61d.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/{new-c7516f2b4c3727c0.js → new-9a749cca1813bd27.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-7799de9e691e35d8.js → [name]-8eeb628e03902f1b.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces-8fbcc5ab4af316d0.js +1 -0
- sky/dashboard/out/_next/static/css/8b1c8321d4c02372.css +3 -0
- sky/dashboard/out/_next/static/xos0euNCptbGAM7_Q3Acl/_buildManifest.js +1 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/exceptions.py +5 -0
- sky/global_user_state.py +11 -6
- sky/jobs/scheduler.py +9 -4
- sky/jobs/server/core.py +23 -2
- sky/jobs/server/server.py +0 -95
- sky/jobs/state.py +18 -15
- sky/jobs/utils.py +2 -1
- sky/models.py +18 -0
- sky/provision/kubernetes/utils.py +12 -5
- sky/provision/nebius/constants.py +47 -0
- sky/provision/nebius/instance.py +2 -1
- sky/provision/nebius/utils.py +28 -7
- sky/serve/server/core.py +1 -1
- sky/server/common.py +4 -2
- sky/server/constants.py +0 -2
- sky/server/requests/executor.py +10 -2
- sky/server/requests/requests.py +4 -3
- sky/server/server.py +22 -5
- sky/skylet/constants.py +4 -0
- sky/skylet/job_lib.py +2 -1
- sky/skypilot_config.py +13 -1
- sky/templates/jobs-controller.yaml.j2 +3 -1
- sky/templates/nebius-ray.yml.j2 +6 -0
- sky/users/model.conf +1 -1
- sky/users/permission.py +148 -31
- sky/users/rbac.py +26 -0
- sky/users/server.py +14 -13
- sky/utils/common.py +6 -1
- sky/utils/common_utils.py +21 -3
- sky/utils/kubernetes/deploy_remote_cluster.py +5 -3
- sky/utils/resources_utils.py +3 -1
- sky/utils/schemas.py +9 -0
- sky/workspaces/core.py +100 -8
- sky/workspaces/server.py +15 -2
- sky/workspaces/utils.py +56 -0
- {skypilot_nightly-1.0.0.dev20250606.dist-info → skypilot_nightly-1.0.0.dev20250609.dist-info}/METADATA +1 -1
- {skypilot_nightly-1.0.0.dev20250606.dist-info → skypilot_nightly-1.0.0.dev20250609.dist-info}/RECORD +89 -87
- sky/dashboard/out/_next/static/99m-BAySO8Q7J-ul1jZVL/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-a90f0a9753a10420.js +0 -6
- sky/dashboard/out/_next/static/chunks/470-9e7a479cc8303baa.js +0 -1
- sky/dashboard/out/_next/static/chunks/843-c296541442d4af88.js +0 -11
- sky/dashboard/out/_next/static/chunks/969-c7abda31c10440ac.js +0 -1
- sky/dashboard/out/_next/static/chunks/973-1a09cac61cfcc1e1.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/config-1a1eeb949dab8897.js +0 -6
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-2d23a9c7571e6320.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/users-262aab38b9baaf3a.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/workspaces-384ea5fa0cea8f28.js +0 -1
- sky/dashboard/out/_next/static/css/667d941a2888ce6e.css +0 -3
- /sky/dashboard/out/_next/static/chunks/{37-beedd583fea84cc8.js → 37-600191c5804dcae2.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/{682-6647f0417d5662f0.js → 682-b60cfdacc15202e8.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/{856-3a32da4b84176f6d.js → 856-affc52adf5403a3a.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/pages/{_app-cb81dc4d27f4d009.js → _app-5f16aba5794ee8e7.js} +0 -0
- /sky/dashboard/out/_next/static/{99m-BAySO8Q7J-ul1jZVL → xos0euNCptbGAM7_Q3Acl}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250606.dist-info → skypilot_nightly-1.0.0.dev20250609.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250606.dist-info → skypilot_nightly-1.0.0.dev20250609.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250606.dist-info → skypilot_nightly-1.0.0.dev20250609.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250606.dist-info → skypilot_nightly-1.0.0.dev20250609.dist-info}/top_level.txt +0 -0
sky/skylet/constants.py
CHANGED
@@ -367,6 +367,7 @@ RCLONE_CACHE_REFRESH_INTERVAL = 10
|
|
367
367
|
OVERRIDEABLE_CONFIG_KEYS_IN_TASK: List[Tuple[str, ...]] = [
|
368
368
|
('docker', 'run_options'),
|
369
369
|
('nvidia_gpus', 'disable_ecc'),
|
370
|
+
('ssh', 'pod_config'),
|
370
371
|
('kubernetes', 'pod_config'),
|
371
372
|
('kubernetes', 'provision_timeout'),
|
372
373
|
('gcp', 'managed_instance_group'),
|
@@ -418,3 +419,6 @@ ALL_CLOUDS = ('aws', 'azure', 'gcp', 'ibm', 'lambda', 'scp', 'oci',
|
|
418
419
|
'kubernetes', 'runpod', 'vast', 'vsphere', 'cudo', 'fluidstack',
|
419
420
|
'paperspace', 'do', 'nebius', 'ssh')
|
420
421
|
# END constants used for service catalog.
|
422
|
+
|
423
|
+
# The user ID of the SkyPilot system.
|
424
|
+
SKYPILOT_SYSTEM_USER_ID = 'skypilot-system'
|
sky/skylet/job_lib.py
CHANGED
@@ -794,7 +794,8 @@ def load_job_queue(payload: str) -> List[Dict[str, Any]]:
|
|
794
794
|
for job in jobs:
|
795
795
|
job['status'] = JobStatus(job['status'])
|
796
796
|
job['user_hash'] = job['username']
|
797
|
-
|
797
|
+
user = global_user_state.get_user(job['user_hash'])
|
798
|
+
job['username'] = user.name if user is not None else None
|
798
799
|
return jobs
|
799
800
|
|
800
801
|
|
sky/skypilot_config.py
CHANGED
@@ -167,7 +167,10 @@ def _get_loaded_config_path() -> List[Optional[str]]:
|
|
167
167
|
serialized = _get_config_context().config_path
|
168
168
|
if not serialized:
|
169
169
|
return []
|
170
|
-
|
170
|
+
config_paths = json.loads(serialized)
|
171
|
+
if config_paths is None:
|
172
|
+
return []
|
173
|
+
return config_paths
|
171
174
|
|
172
175
|
|
173
176
|
def _set_loaded_config_path(
|
@@ -762,6 +765,15 @@ def update_api_server_config_no_lock(config: config_utils.Config) -> None:
|
|
762
765
|
Args:
|
763
766
|
config: The config to save and sync.
|
764
767
|
"""
|
768
|
+
|
769
|
+
def is_running_pytest() -> bool:
|
770
|
+
return 'PYTEST_CURRENT_TEST' in os.environ
|
771
|
+
|
772
|
+
# Only allow this function to be called by the API Server in production.
|
773
|
+
if not is_running_pytest() and os.environ.get(
|
774
|
+
constants.ENV_VAR_IS_SKYPILOT_SERVER) is None:
|
775
|
+
raise ValueError('This function can only be called by the API Server.')
|
776
|
+
|
765
777
|
global_config_path = _resolve_server_config_path()
|
766
778
|
if global_config_path is None:
|
767
779
|
global_config_path = get_user_config_path()
|
@@ -3,6 +3,7 @@
|
|
3
3
|
name: {{dag_name}}
|
4
4
|
|
5
5
|
file_mounts:
|
6
|
+
{{remote_original_user_yaml_path}}: {{original_user_dag_path}}
|
6
7
|
{{remote_user_yaml_path}}: {{user_yaml_path}}
|
7
8
|
{%- if local_user_config_path is not none %}
|
8
9
|
{{remote_user_config_path}}: {{local_user_config_path}}
|
@@ -28,7 +29,7 @@ setup: |
|
|
28
29
|
grep -q 'export SKYPILOT_DEV=' ~/.bashrc || echo 'export SKYPILOT_DEV=1' >> ~/.bashrc
|
29
30
|
grep -q 'alias sky-env=' ~/.bashrc || echo 'alias sky-env="{{ sky_activate_python_env }}"' >> ~/.bashrc
|
30
31
|
{% endif %}
|
31
|
-
|
32
|
+
|
32
33
|
# Create systemd service file
|
33
34
|
mkdir -p ~/.config/systemd/user/
|
34
35
|
|
@@ -65,6 +66,7 @@ run: |
|
|
65
66
|
# CloudVmRayBackend._exec_code_on_head() calls
|
66
67
|
# managed_job_codegen.set_pending() before we get here.
|
67
68
|
python -u -m sky.jobs.scheduler {{remote_user_yaml_path}} \
|
69
|
+
--user-yaml-path {{remote_original_user_yaml_path}} \
|
68
70
|
--job-id $SKYPILOT_INTERNAL_JOB_ID \
|
69
71
|
--env-file {{remote_env_file_path}} \
|
70
72
|
--priority {{priority}}
|
sky/templates/nebius-ray.yml.j2
CHANGED
@@ -46,6 +46,7 @@ available_node_types:
|
|
46
46
|
InstanceType: {{instance_type}}
|
47
47
|
ImageId: {{image_id}}
|
48
48
|
DiskSize: {{disk_size}}
|
49
|
+
network_tier: {{network_tier}}
|
49
50
|
filesystems:
|
50
51
|
{%- for fs in filesystems %}
|
51
52
|
- filesystem_id: {{ fs.filesystem_id }}
|
@@ -152,6 +153,11 @@ setup_commands:
|
|
152
153
|
mkdir -p ~/.ssh; touch ~/.ssh/config;
|
153
154
|
{{ conda_installation_commands }}
|
154
155
|
{{ ray_skypilot_installation_commands }}
|
156
|
+
{%- if env_vars is defined %}
|
157
|
+
{%- for env_var, env_value in env_vars.items() %}
|
158
|
+
echo '{{env_var}}={{env_value}}' | sudo tee -a /etc/environment;
|
159
|
+
{%- endfor %}
|
160
|
+
{%- endif %}
|
155
161
|
sudo bash -c 'rm -rf /etc/security/limits.d; echo "* soft nofile 1048576" >> /etc/security/limits.conf; echo "* hard nofile 1048576" >> /etc/security/limits.conf';
|
156
162
|
sudo grep -e '^DefaultTasksMax' /etc/systemd/system.conf || (sudo bash -c 'echo "DefaultTasksMax=infinity" >> /etc/systemd/system.conf'); sudo systemctl set-property user-$(id -u $(whoami)).slice TasksMax=infinity; sudo systemctl daemon-reload;
|
157
163
|
mkdir -p ~/.ssh; (grep -Pzo -q "Host \*\n StrictHostKeyChecking no\n IdentityFile ~/.ssh/sky-cluster-key\n IdentityFile ~/.ssh/id_rsa" ~/.ssh/config) || printf "Host *\n StrictHostKeyChecking no\n IdentityFile ~/.ssh/sky-cluster-key\n IdentityFile ~/.ssh/id_rsa\n" >> ~/.ssh/config;
|
sky/users/model.conf
CHANGED
sky/users/permission.py
CHANGED
@@ -3,7 +3,7 @@ import contextlib
|
|
3
3
|
import logging
|
4
4
|
import os
|
5
5
|
import threading
|
6
|
-
from typing import List
|
6
|
+
from typing import Generator, List
|
7
7
|
|
8
8
|
import casbin
|
9
9
|
import filelock
|
@@ -11,8 +11,11 @@ import sqlalchemy_adapter
|
|
11
11
|
|
12
12
|
from sky import global_user_state
|
13
13
|
from sky import sky_logging
|
14
|
+
from sky.skylet import constants
|
14
15
|
from sky.users import rbac
|
15
16
|
|
17
|
+
logging.getLogger('casbin.policy').setLevel(sky_logging.ERROR)
|
18
|
+
logging.getLogger('casbin.role').setLevel(sky_logging.ERROR)
|
16
19
|
logger = sky_logging.init_logger(__name__)
|
17
20
|
|
18
21
|
# Filelocks for the policy update.
|
@@ -38,17 +41,19 @@ class PermissionService:
|
|
38
41
|
model_path = os.path.join(os.path.dirname(__file__),
|
39
42
|
'model.conf')
|
40
43
|
enforcer = casbin.Enforcer(model_path, adapter)
|
41
|
-
logging.getLogger('casbin.policy').setLevel(
|
42
|
-
sky_logging.ERROR)
|
43
|
-
logging.getLogger('casbin.role').setLevel(sky_logging.ERROR)
|
44
44
|
self.enforcer = enforcer
|
45
45
|
else:
|
46
46
|
self.enforcer = _enforcer_instance.enforcer
|
47
|
-
|
47
|
+
with _policy_lock():
|
48
|
+
self._maybe_initialize_policies()
|
48
49
|
|
49
|
-
def _maybe_initialize_policies(self):
|
50
|
+
def _maybe_initialize_policies(self) -> None:
|
50
51
|
"""Initialize policies if they don't already exist."""
|
52
|
+
# TODO(zhwu): we should avoid running this on client side.
|
51
53
|
logger.debug(f'Initializing policies in process: {os.getpid()}')
|
54
|
+
self._load_policy_no_lock()
|
55
|
+
|
56
|
+
policy_updated = False
|
52
57
|
|
53
58
|
# Check if policies are already initialized by looking for existing
|
54
59
|
# permission policies in the enforcer
|
@@ -66,6 +71,17 @@ class PermissionService:
|
|
66
71
|
expected_policies.append(
|
67
72
|
[role, item['path'], item['method']])
|
68
73
|
|
74
|
+
# Add workspace policy
|
75
|
+
workspace_policy_permissions = rbac.get_workspace_policy_permissions()
|
76
|
+
logger.debug(f'Workspace policy permissions from config: '
|
77
|
+
f'{workspace_policy_permissions}')
|
78
|
+
|
79
|
+
for workspace_name, users in workspace_policy_permissions.items():
|
80
|
+
for user in users:
|
81
|
+
expected_policies.append([user, workspace_name, '*'])
|
82
|
+
logger.debug(f'Expected workspace policy: user={user}, '
|
83
|
+
f'workspace={workspace_name}')
|
84
|
+
|
69
85
|
# Check if all expected policies already exist
|
70
86
|
policies_exist = all(
|
71
87
|
any(policy == expected
|
@@ -86,48 +102,71 @@ class PermissionService:
|
|
86
102
|
for item in blocklist:
|
87
103
|
path = item['path']
|
88
104
|
method = item['method']
|
105
|
+
logger.debug(f'Adding role policy: role={role}, '
|
106
|
+
f'path={path}, method={method}')
|
89
107
|
self.enforcer.add_policy(role, path, method)
|
90
|
-
|
108
|
+
policy_updated = True
|
109
|
+
|
110
|
+
for workspace_name, users in workspace_policy_permissions.items():
|
111
|
+
for user in users:
|
112
|
+
logger.debug(f'Initializing workspace policy: user={user}, '
|
113
|
+
f'workspace={workspace_name}')
|
114
|
+
self.enforcer.add_policy(user, workspace_name, '*')
|
115
|
+
policy_updated = True
|
116
|
+
logger.debug('Policies initialized successfully')
|
91
117
|
else:
|
92
118
|
logger.debug('Policies already exist, skipping initialization')
|
93
119
|
|
94
120
|
# Always ensure users have default roles (this is idempotent)
|
95
121
|
all_users = global_user_state.get_all_users()
|
96
|
-
for
|
97
|
-
self.
|
122
|
+
for existing_user in all_users:
|
123
|
+
user_added = self._add_user_if_not_exists_no_lock(existing_user.id)
|
124
|
+
policy_updated = policy_updated or user_added
|
125
|
+
|
126
|
+
if policy_updated:
|
127
|
+
self.enforcer.save_policy()
|
98
128
|
|
99
|
-
def add_user_if_not_exists(self,
|
129
|
+
def add_user_if_not_exists(self, user_id: str) -> None:
|
100
130
|
"""Add user role relationship."""
|
101
131
|
with _policy_lock():
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
132
|
+
self._add_user_if_not_exists_no_lock(user_id)
|
133
|
+
|
134
|
+
def _add_user_if_not_exists_no_lock(self, user_id: str) -> bool:
|
135
|
+
"""Add user role relationship without lock.
|
136
|
+
|
137
|
+
Returns:
|
138
|
+
True if the user was added, False otherwise.
|
139
|
+
"""
|
140
|
+
user_roles = self.enforcer.get_roles_for_user(user_id)
|
141
|
+
if not user_roles:
|
142
|
+
logger.info(f'User {user_id} has no roles, adding'
|
143
|
+
f' default role {rbac.get_default_role()}')
|
144
|
+
self.enforcer.add_grouping_policy(user_id, rbac.get_default_role())
|
145
|
+
return True
|
146
|
+
return False
|
147
|
+
|
148
|
+
def update_role(self, user_id: str, new_role: str) -> None:
|
110
149
|
"""Update user role relationship."""
|
111
150
|
with _policy_lock():
|
112
151
|
# Get current roles
|
113
152
|
self._load_policy_no_lock()
|
114
153
|
# Avoid calling get_user_roles, as it will require the lock.
|
115
|
-
current_roles = self.enforcer.get_roles_for_user(
|
154
|
+
current_roles = self.enforcer.get_roles_for_user(user_id)
|
116
155
|
if not current_roles:
|
117
|
-
logger.warning(f'User {
|
156
|
+
logger.warning(f'User {user_id} has no roles')
|
118
157
|
else:
|
119
158
|
# TODO(hailong): how to handle multiple roles?
|
120
159
|
current_role = current_roles[0]
|
121
160
|
if current_role == new_role:
|
122
|
-
logger.info(f'User {
|
161
|
+
logger.info(f'User {user_id} already has role {new_role}')
|
123
162
|
return
|
124
|
-
self.enforcer.remove_grouping_policy(
|
163
|
+
self.enforcer.remove_grouping_policy(user_id, current_role)
|
125
164
|
|
126
165
|
# Update user role
|
127
|
-
self.enforcer.add_grouping_policy(
|
166
|
+
self.enforcer.add_grouping_policy(user_id, new_role)
|
128
167
|
self.enforcer.save_policy()
|
129
168
|
|
130
|
-
def get_user_roles(self,
|
169
|
+
def get_user_roles(self, user_id: str) -> List[str]:
|
131
170
|
"""Get all roles for a user.
|
132
171
|
|
133
172
|
This method returns all roles that the user has, including inherited
|
@@ -140,10 +179,11 @@ class PermissionService:
|
|
140
179
|
Returns:
|
141
180
|
A list of role names that the user has.
|
142
181
|
"""
|
143
|
-
self.
|
144
|
-
return self.enforcer.get_roles_for_user(
|
182
|
+
self._load_policy_no_lock()
|
183
|
+
return self.enforcer.get_roles_for_user(user_id)
|
145
184
|
|
146
|
-
def
|
185
|
+
def check_endpoint_permission(self, user_id: str, path: str,
|
186
|
+
method: str) -> bool:
|
147
187
|
"""Check permission."""
|
148
188
|
# We intentionally don't load the policy here, as it is a hot path, and
|
149
189
|
# we don't support updating the policy.
|
@@ -151,28 +191,105 @@ class PermissionService:
|
|
151
191
|
# it is a hot path in every request. It is ok to have a stale policy,
|
152
192
|
# as long as it is eventually consistent.
|
153
193
|
# self._load_policy_no_lock()
|
154
|
-
return self.enforcer.enforce(
|
194
|
+
return self.enforcer.enforce(user_id, path, method)
|
155
195
|
|
156
196
|
def _load_policy_no_lock(self):
|
157
197
|
"""Load policy from storage."""
|
158
198
|
self.enforcer.load_policy()
|
159
199
|
|
160
|
-
def
|
200
|
+
def load_policy(self):
|
161
201
|
"""Load policy from storage with lock."""
|
162
202
|
with _policy_lock():
|
163
203
|
self._load_policy_no_lock()
|
164
204
|
|
205
|
+
def check_workspace_permission(self, user_id: str,
|
206
|
+
workspace_name: str) -> bool:
|
207
|
+
"""Check workspace permission.
|
208
|
+
|
209
|
+
This method checks if a user has permission to access a specific
|
210
|
+
workspace.
|
211
|
+
|
212
|
+
For private workspaces, the user must have explicit permission.
|
213
|
+
|
214
|
+
For public workspaces, the permission is granted via a wildcard policy
|
215
|
+
('*').
|
216
|
+
"""
|
217
|
+
if os.getenv(constants.ENV_VAR_IS_SKYPILOT_SERVER) is None:
|
218
|
+
# When it is not on API server, we allow all users to access all
|
219
|
+
# workspaces, as the workspace check has been done on API server.
|
220
|
+
return True
|
221
|
+
role = self.get_user_roles(user_id)
|
222
|
+
if rbac.RoleName.ADMIN.value in role:
|
223
|
+
return True
|
224
|
+
# The Casbin model matcher already handles the wildcard '*' case:
|
225
|
+
# m = (g(r.sub, p.sub)|| p.sub == '*') && r.obj == p.obj &&
|
226
|
+
# r.act == p.act
|
227
|
+
# This means if there's a policy ('*', workspace_name, '*'), it will
|
228
|
+
# match any user
|
229
|
+
result = self.enforcer.enforce(user_id, workspace_name, '*')
|
230
|
+
logger.debug(f'Workspace permission check: user={user_id}, '
|
231
|
+
f'workspace={workspace_name}, result={result}')
|
232
|
+
return result
|
233
|
+
|
234
|
+
def add_workspace_policy(self, workspace_name: str,
|
235
|
+
users: List[str]) -> None:
|
236
|
+
"""Add workspace policy.
|
237
|
+
|
238
|
+
Args:
|
239
|
+
workspace_name: Name of the workspace
|
240
|
+
users: List of user IDs that should have access.
|
241
|
+
For public workspaces, this should be ['*'].
|
242
|
+
For private workspaces, this should be specific user IDs.
|
243
|
+
"""
|
244
|
+
with _policy_lock():
|
245
|
+
for user in users:
|
246
|
+
logger.debug(f'Adding workspace policy: user={user}, '
|
247
|
+
f'workspace={workspace_name}')
|
248
|
+
self.enforcer.add_policy(user, workspace_name, '*')
|
249
|
+
self.enforcer.save_policy()
|
250
|
+
|
251
|
+
def update_workspace_policy(self, workspace_name: str,
|
252
|
+
users: List[str]) -> None:
|
253
|
+
"""Update workspace policy.
|
254
|
+
|
255
|
+
Args:
|
256
|
+
workspace_name: Name of the workspace
|
257
|
+
users: List of user IDs that should have access.
|
258
|
+
For public workspaces, this should be ['*'].
|
259
|
+
For private workspaces, this should be specific user IDs.
|
260
|
+
"""
|
261
|
+
with _policy_lock():
|
262
|
+
self._load_policy_no_lock()
|
263
|
+
# Remove all existing policies for this workspace
|
264
|
+
self.enforcer.remove_filtered_policy(1, workspace_name)
|
265
|
+
# Add new policies
|
266
|
+
for user in users:
|
267
|
+
logger.debug(f'Updating workspace policy: user={user}, '
|
268
|
+
f'workspace={workspace_name}')
|
269
|
+
self.enforcer.add_policy(user, workspace_name, '*')
|
270
|
+
self.enforcer.save_policy()
|
271
|
+
|
272
|
+
def remove_workspace_policy(self, workspace_name: str) -> None:
|
273
|
+
"""Remove workspace policy."""
|
274
|
+
with _policy_lock():
|
275
|
+
self.enforcer.remove_filtered_policy(1, workspace_name)
|
276
|
+
self.enforcer.save_policy()
|
277
|
+
|
165
278
|
|
166
279
|
@contextlib.contextmanager
|
167
|
-
def _policy_lock():
|
280
|
+
def _policy_lock() -> Generator[None, None, None]:
|
168
281
|
"""Context manager for policy update lock."""
|
169
282
|
try:
|
170
283
|
with filelock.FileLock(POLICY_UPDATE_LOCK_PATH,
|
171
284
|
POLICY_UPDATE_LOCK_TIMEOUT_SECONDS):
|
172
285
|
yield
|
173
286
|
except filelock.Timeout as e:
|
174
|
-
raise RuntimeError(f'Failed to
|
287
|
+
raise RuntimeError(f'Failed to reload policy due to a timeout '
|
175
288
|
f'when trying to acquire the lock at '
|
176
289
|
f'{POLICY_UPDATE_LOCK_PATH}. '
|
177
290
|
'Please try again or manually remove the lock '
|
178
291
|
f'file if you believe it is stale.') from e
|
292
|
+
|
293
|
+
|
294
|
+
# Singleton instance of PermissionService for other modules to use.
|
295
|
+
permission_service = PermissionService()
|
sky/users/rbac.py
CHANGED
@@ -5,6 +5,8 @@ from typing import Dict, List
|
|
5
5
|
|
6
6
|
from sky import sky_logging
|
7
7
|
from sky import skypilot_config
|
8
|
+
from sky.skylet import constants
|
9
|
+
from sky.workspaces import utils as workspaces_utils
|
8
10
|
|
9
11
|
logger = sky_logging.init_logger(__name__)
|
10
12
|
|
@@ -84,3 +86,27 @@ def get_role_permissions(
|
|
84
86
|
}
|
85
87
|
}
|
86
88
|
return config_permissions
|
89
|
+
|
90
|
+
|
91
|
+
def get_workspace_policy_permissions() -> Dict[str, List[str]]:
|
92
|
+
"""Get workspace policy permissions from config.
|
93
|
+
|
94
|
+
Returns:
|
95
|
+
A dictionary of workspace policy permissions.
|
96
|
+
Example:
|
97
|
+
{
|
98
|
+
'workspace1': ['user1-id', 'user2-id'],
|
99
|
+
'workspace2': ['user3-id', 'user4-id']
|
100
|
+
'default': ['*']
|
101
|
+
}
|
102
|
+
"""
|
103
|
+
current_workspaces = skypilot_config.get_nested(('workspaces',),
|
104
|
+
default_value={})
|
105
|
+
if constants.SKYPILOT_DEFAULT_WORKSPACE not in current_workspaces:
|
106
|
+
current_workspaces[constants.SKYPILOT_DEFAULT_WORKSPACE] = {}
|
107
|
+
workspaces_to_policy = {}
|
108
|
+
for workspace_name, workspace_config in current_workspaces.items():
|
109
|
+
users = workspaces_utils.get_workspace_users(workspace_config)
|
110
|
+
workspaces_to_policy[workspace_name] = users
|
111
|
+
logger.debug(f'Workspace policy permissions: {workspaces_to_policy}')
|
112
|
+
return workspaces_to_policy
|
sky/users/server.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
"""REST API for workspace management."""
|
2
2
|
|
3
|
-
import hashlib
|
4
3
|
from typing import Any, Dict, List
|
5
4
|
|
6
5
|
import fastapi
|
@@ -8,16 +7,15 @@ import fastapi
|
|
8
7
|
from sky import global_user_state
|
9
8
|
from sky import sky_logging
|
10
9
|
from sky.server.requests import payloads
|
10
|
+
from sky.skylet import constants
|
11
11
|
from sky.users import permission
|
12
12
|
from sky.users import rbac
|
13
|
-
from sky.utils import
|
13
|
+
from sky.utils import common
|
14
14
|
|
15
15
|
logger = sky_logging.init_logger(__name__)
|
16
16
|
|
17
17
|
router = fastapi.APIRouter()
|
18
18
|
|
19
|
-
permission_service = permission.PermissionService()
|
20
|
-
|
21
19
|
|
22
20
|
@router.get('')
|
23
21
|
async def users() -> List[Dict[str, Any]]:
|
@@ -25,7 +23,7 @@ async def users() -> List[Dict[str, Any]]:
|
|
25
23
|
all_users = []
|
26
24
|
user_list = global_user_state.get_all_users()
|
27
25
|
for user in user_list:
|
28
|
-
user_roles = permission_service.get_user_roles(user.id)
|
26
|
+
user_roles = permission.permission_service.get_user_roles(user.id)
|
29
27
|
all_users.append({
|
30
28
|
'id': user.id,
|
31
29
|
'name': user.name,
|
@@ -39,13 +37,11 @@ async def get_current_user_role(request: fastapi.Request):
|
|
39
37
|
"""Get current user's role."""
|
40
38
|
# TODO(hailong): is there a reliable way to get the user
|
41
39
|
# hash for the request without 'X-Auth-Request-Email' header?
|
42
|
-
|
40
|
+
auth_user = request.state.auth_user
|
41
|
+
if auth_user is None:
|
43
42
|
return {'name': '', 'role': rbac.RoleName.ADMIN.value}
|
44
|
-
|
45
|
-
|
46
|
-
user_name.encode()).hexdigest()[:common_utils.USER_HASH_LENGTH]
|
47
|
-
user_roles = permission_service.get_user_roles(user_hash)
|
48
|
-
return {'name': user_name, 'role': user_roles[0] if user_roles else ''}
|
43
|
+
user_roles = permission.permission_service.get_user_roles(auth_user.id)
|
44
|
+
return {'name': auth_user.name, 'role': user_roles[0] if user_roles else ''}
|
49
45
|
|
50
46
|
|
51
47
|
@router.post('/update')
|
@@ -58,9 +54,14 @@ async def user_update(user_update_body: payloads.UserUpdateBody) -> None:
|
|
58
54
|
raise fastapi.HTTPException(status_code=400,
|
59
55
|
detail=f'Invalid role: {role}')
|
60
56
|
user_info = global_user_state.get_user(user_id)
|
61
|
-
if
|
57
|
+
if user_info is None:
|
62
58
|
raise fastapi.HTTPException(status_code=400,
|
63
59
|
detail=f'User {user_id} does not exist')
|
60
|
+
# Disallow updating roles for the internal users.
|
61
|
+
if user_info.id in [common.SERVER_ID, constants.SKYPILOT_SYSTEM_USER_ID]:
|
62
|
+
raise fastapi.HTTPException(status_code=400,
|
63
|
+
detail=f'Cannot update role for internal '
|
64
|
+
f'API server user {user_info.name}')
|
64
65
|
|
65
66
|
# Update user role in casbin policy
|
66
|
-
permission_service.update_role(
|
67
|
+
permission.permission_service.update_role(user_info.id, role)
|
sky/utils/common.py
CHANGED
@@ -5,6 +5,7 @@ import enum
|
|
5
5
|
import os
|
6
6
|
from typing import Generator
|
7
7
|
|
8
|
+
from sky import models
|
8
9
|
from sky.skylet import constants
|
9
10
|
from sky.utils import common_utils
|
10
11
|
|
@@ -25,10 +26,13 @@ JOB_CONTROLLER_NAME: str = f'{JOB_CONTROLLER_PREFIX}{SERVER_ID}'
|
|
25
26
|
|
26
27
|
|
27
28
|
@contextlib.contextmanager
|
28
|
-
def
|
29
|
+
def with_server_user() -> Generator[None, None, None]:
|
29
30
|
"""Temporarily set the user hash to common.SERVER_ID."""
|
30
31
|
old_env_user_hash = os.getenv(constants.USER_ID_ENV_VAR)
|
32
|
+
# TODO(zhwu): once we have fully moved our code to use `get_current_user()`
|
33
|
+
# instead of `common_utils.get_user_hash()`, we can remove the env override.
|
31
34
|
os.environ[constants.USER_ID_ENV_VAR] = SERVER_ID
|
35
|
+
common_utils.set_current_user(models.User.get_current_user())
|
32
36
|
try:
|
33
37
|
yield
|
34
38
|
finally:
|
@@ -36,6 +40,7 @@ def with_server_user_hash() -> Generator[None, None, None]:
|
|
36
40
|
os.environ[constants.USER_ID_ENV_VAR] = old_env_user_hash
|
37
41
|
else:
|
38
42
|
os.environ.pop(constants.USER_ID_ENV_VAR)
|
43
|
+
common_utils.set_current_user(models.User.get_current_user())
|
39
44
|
|
40
45
|
|
41
46
|
class StatusRefreshMode(enum.Enum):
|
sky/utils/common_utils.py
CHANGED
@@ -20,6 +20,7 @@ import uuid
|
|
20
20
|
import jsonschema
|
21
21
|
|
22
22
|
from sky import exceptions
|
23
|
+
from sky import models
|
23
24
|
from sky import sky_logging
|
24
25
|
from sky.adaptors import common as adaptors_common
|
25
26
|
from sky.skylet import constants
|
@@ -256,11 +257,13 @@ class Backoff:
|
|
256
257
|
_current_command: Optional[str] = None
|
257
258
|
_current_client_entrypoint: Optional[str] = None
|
258
259
|
_using_remote_api_server: Optional[bool] = None
|
260
|
+
_current_user: Optional['models.User'] = None
|
259
261
|
|
260
262
|
|
261
|
-
def
|
262
|
-
|
263
|
-
|
263
|
+
def set_request_context(client_entrypoint: Optional[str],
|
264
|
+
client_command: Optional[str],
|
265
|
+
using_remote_api_server: bool,
|
266
|
+
user: Optional['models.User']):
|
264
267
|
"""Override the current client entrypoint and command.
|
265
268
|
|
266
269
|
This is useful when we are on the SkyPilot API server side and we have a
|
@@ -269,9 +272,11 @@ def set_client_status(client_entrypoint: Optional[str],
|
|
269
272
|
global _current_command
|
270
273
|
global _current_client_entrypoint
|
271
274
|
global _using_remote_api_server
|
275
|
+
global _current_user
|
272
276
|
_current_command = client_command
|
273
277
|
_current_client_entrypoint = client_entrypoint
|
274
278
|
_using_remote_api_server = using_remote_api_server
|
279
|
+
_current_user = user
|
275
280
|
|
276
281
|
|
277
282
|
def get_current_command() -> str:
|
@@ -286,6 +291,19 @@ def get_current_command() -> str:
|
|
286
291
|
return get_pretty_entrypoint_cmd()
|
287
292
|
|
288
293
|
|
294
|
+
def get_current_user() -> 'models.User':
|
295
|
+
"""Returns the current user."""
|
296
|
+
if _current_user is not None:
|
297
|
+
return _current_user
|
298
|
+
return models.User.get_current_user()
|
299
|
+
|
300
|
+
|
301
|
+
def set_current_user(user: 'models.User'):
|
302
|
+
"""Sets the current user."""
|
303
|
+
global _current_user
|
304
|
+
_current_user = user
|
305
|
+
|
306
|
+
|
289
307
|
def get_current_client_entrypoint(server_entrypoint: str) -> str:
|
290
308
|
"""Returns the current client entrypoint.
|
291
309
|
|
@@ -723,14 +723,16 @@ def main():
|
|
723
723
|
# Do not support changing anything besides hosts for now
|
724
724
|
if history is not None:
|
725
725
|
for key in ['user', 'identity_file', 'password']:
|
726
|
-
if
|
726
|
+
if not args.cleanup and history.get(
|
727
|
+
key) != cluster_config.get(key):
|
727
728
|
raise ValueError(
|
728
729
|
f'Cluster configuration has changed for field {key!r}. '
|
729
730
|
f'Previous value: {history.get(key)}, '
|
730
731
|
f'Current value: {cluster_config.get(key)}')
|
731
732
|
history_hosts_info = prepare_hosts_info(
|
732
733
|
cluster_name, history)
|
733
|
-
if history_hosts_info[0] != hosts_info[
|
734
|
+
if not args.cleanup and history_hosts_info[0] != hosts_info[
|
735
|
+
0]:
|
734
736
|
raise ValueError(
|
735
737
|
f'Cluster configuration has changed for master node. '
|
736
738
|
f'Previous value: {history_hosts_info[0]}, '
|
@@ -860,7 +862,7 @@ def deploy_cluster(head_node,
|
|
860
862
|
use_ssh_config=head_use_ssh_config,
|
861
863
|
# For SkySSHUpLineProcessor
|
862
864
|
print_output=True)
|
863
|
-
if result is None:
|
865
|
+
if not cleanup and result is None:
|
864
866
|
with ux_utils.print_exception_no_traceback():
|
865
867
|
raise RuntimeError(
|
866
868
|
f'Failed to SSH to head node ({head_node}). '
|
sky/utils/resources_utils.py
CHANGED
@@ -63,7 +63,9 @@ class NetworkTier(enum.Enum):
|
|
63
63
|
def cli_help_message(cls) -> str:
|
64
64
|
return (
|
65
65
|
f'Network tier. Could be one of {", ".join(cls.supported_tiers())}'
|
66
|
-
f'.
|
66
|
+
f'. If {cls.BEST.value} is specified, use the best network tier '
|
67
|
+
'available on the specified instance. '
|
68
|
+
f'Default: {cls.STANDARD.value}')
|
67
69
|
|
68
70
|
@classmethod
|
69
71
|
def from_str(cls, tier: str) -> 'NetworkTier':
|
sky/utils/schemas.py
CHANGED
@@ -1249,6 +1249,15 @@ def get_config_schema():
|
|
1249
1249
|
'properties': {
|
1250
1250
|
# Explicit definition for GCP allows both project_id and
|
1251
1251
|
# disabled
|
1252
|
+
'private': {
|
1253
|
+
'type': 'boolean',
|
1254
|
+
},
|
1255
|
+
'allowed_users': {
|
1256
|
+
'type': 'array',
|
1257
|
+
'items': {
|
1258
|
+
'type': 'string',
|
1259
|
+
},
|
1260
|
+
},
|
1252
1261
|
'gcp': {
|
1253
1262
|
'type': 'object',
|
1254
1263
|
'properties': {
|