skypilot-nightly 1.0.0.dev20250616__py3-none-any.whl → 1.0.0.dev20250618__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -4
- sky/backends/backend_utils.py +7 -0
- sky/backends/cloud_vm_ray_backend.py +91 -96
- sky/cli.py +5 -6311
- sky/client/cli.py +66 -639
- sky/client/sdk.py +22 -2
- sky/clouds/kubernetes.py +8 -0
- sky/clouds/scp.py +7 -26
- sky/clouds/utils/scp_utils.py +177 -124
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/{OZxMW3bxAJmqgn5f4MdhO → LRpGymRCqq-feuFyoWz4m}/_buildManifest.js +1 -1
- sky/dashboard/out/_next/static/chunks/641.c8e452bc5070a630.js +1 -0
- sky/dashboard/out/_next/static/chunks/984.ae8c08791d274ca0.js +50 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-36bc0962129f72df.js +6 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-cf490d1fa38f3740.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/users-928edf039219e47b.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-ebc2404fd6ce581c.js +1 -0
- sky/dashboard/out/_next/static/css/6c12ecc3bd2239b6.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/global_user_state.py +50 -11
- sky/jobs/controller.py +98 -31
- sky/jobs/scheduler.py +37 -29
- sky/jobs/server/core.py +36 -3
- sky/jobs/state.py +69 -9
- sky/jobs/utils.py +11 -0
- sky/logs/__init__.py +17 -0
- sky/logs/agent.py +73 -0
- sky/logs/gcp.py +91 -0
- sky/models.py +1 -0
- sky/provision/__init__.py +1 -0
- sky/provision/instance_setup.py +35 -0
- sky/provision/provisioner.py +11 -0
- sky/provision/scp/__init__.py +15 -0
- sky/provision/scp/config.py +93 -0
- sky/provision/scp/instance.py +528 -0
- sky/resources.py +164 -29
- sky/server/common.py +21 -9
- sky/server/requests/payloads.py +19 -1
- sky/server/server.py +121 -29
- sky/setup_files/dependencies.py +11 -1
- sky/skylet/constants.py +48 -1
- sky/skylet/job_lib.py +83 -19
- sky/task.py +171 -21
- sky/templates/kubernetes-ray.yml.j2 +60 -4
- sky/templates/scp-ray.yml.j2 +3 -50
- sky/users/permission.py +47 -34
- sky/users/rbac.py +10 -1
- sky/users/server.py +274 -9
- sky/utils/command_runner.py +1 -1
- sky/utils/common_utils.py +16 -14
- sky/utils/context.py +1 -1
- sky/utils/controller_utils.py +12 -3
- sky/utils/dag_utils.py +17 -4
- sky/utils/kubernetes/deploy_remote_cluster.py +17 -8
- sky/utils/schemas.py +83 -5
- {skypilot_nightly-1.0.0.dev20250616.dist-info → skypilot_nightly-1.0.0.dev20250618.dist-info}/METADATA +9 -1
- {skypilot_nightly-1.0.0.dev20250616.dist-info → skypilot_nightly-1.0.0.dev20250618.dist-info}/RECORD +80 -79
- sky/benchmark/__init__.py +0 -0
- sky/benchmark/benchmark_state.py +0 -295
- sky/benchmark/benchmark_utils.py +0 -641
- sky/dashboard/out/_next/static/chunks/600.bd2ed8c076b720ec.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-59950b2f83b66e48.js +0 -6
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-b3dbf38b51cb29be.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/users-c69ffcab9d6e5269.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-1b69b196a4dbffef.js +0 -1
- sky/dashboard/out/_next/static/css/8e97adcaacc15293.css +0 -3
- sky/skylet/providers/scp/__init__.py +0 -2
- sky/skylet/providers/scp/config.py +0 -149
- sky/skylet/providers/scp/node_provider.py +0 -578
- /sky/dashboard/out/_next/static/{OZxMW3bxAJmqgn5f4MdhO → LRpGymRCqq-feuFyoWz4m}/_ssgManifest.js +0 -0
- /sky/dashboard/out/_next/static/chunks/{37-824c707421f6f003.js → 37-3a4d77ad62932eaf.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/{843-ab9c4f609239155f.js → 843-b3040e493f6e7947.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/{938-385d190b95815e11.js → 938-1493ac755eadeb35.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/{973-c807fc34f09c7df3.js → 973-db3c97c2bfbceb65.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/pages/{_app-32b2caae3445bf3b.js → _app-c416e87d5c2715cf.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-c8c2191328532b7d.js → [name]-c4ff1ec05e2f3daf.js} +0 -0
- {skypilot_nightly-1.0.0.dev20250616.dist-info → skypilot_nightly-1.0.0.dev20250618.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250616.dist-info → skypilot_nightly-1.0.0.dev20250618.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250616.dist-info → skypilot_nightly-1.0.0.dev20250618.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250616.dist-info → skypilot_nightly-1.0.0.dev20250618.dist-info}/top_level.txt +0 -0
@@ -273,6 +273,15 @@ available_node_types:
|
|
273
273
|
{% if (k8s_acc_label_key is not none and k8s_acc_label_values is not none) %}
|
274
274
|
skypilot-binpack: "gpu"
|
275
275
|
{% endif %}
|
276
|
+
{% if k8s_kueue_local_queue_name %}
|
277
|
+
kueue.x-k8s.io/queue-name: {{k8s_kueue_local_queue_name}}
|
278
|
+
kueue.x-k8s.io/pod-group-name: {{cluster_name_on_cloud}}
|
279
|
+
{% endif %}
|
280
|
+
{% if k8s_kueue_local_queue_name %}
|
281
|
+
annotations:
|
282
|
+
kueue.x-k8s.io/retriable-in-group: "false"
|
283
|
+
kueue.x-k8s.io/pod-group-total-count: "{{ num_nodes|string }}"
|
284
|
+
{% endif %}
|
276
285
|
spec:
|
277
286
|
# serviceAccountName: skypilot-service-account
|
278
287
|
serviceAccountName: {{k8s_service_account_name}}
|
@@ -632,19 +641,66 @@ available_node_types:
|
|
632
641
|
{% if high_availability %}
|
633
642
|
mkdir -p {{k8s_high_availability_deployment_run_script_dir}}
|
634
643
|
if [ -f {{k8s_high_availability_deployment_volume_mount_path}}/k8s_container_ready ]; then
|
644
|
+
SKYPILOT_HA_RECOVERY_LOG="/tmp/ha_recovery.log"
|
645
|
+
echo "Starting HA recovery at $(date)" >> $SKYPILOT_HA_RECOVERY_LOG
|
646
|
+
start_time=$SECONDS
|
647
|
+
retry_count=0
|
648
|
+
|
649
|
+
# Wait for Ray to be ready, as the following commands is depending on Ray.
|
650
|
+
GET_RAY_STATUS_CMD=$({{sky_python_cmd}} -c 'from sky.provision import instance_setup; print(instance_setup.RAY_STATUS_WITH_SKY_RAY_PORT_COMMAND)')
|
651
|
+
while true; do
|
652
|
+
retry_count=$((retry_count + 1))
|
653
|
+
current_duration=$(( SECONDS - start_time ))
|
654
|
+
echo "Attempt $retry_count to get Ray status after $current_duration seconds..." >> $SKYPILOT_HA_RECOVERY_LOG
|
655
|
+
|
656
|
+
bash --login -c "$GET_RAY_STATUS_CMD"
|
657
|
+
if [ $? -eq 0 ]; then
|
658
|
+
wait_duration=$(( SECONDS - start_time ))
|
659
|
+
echo "Ray ready after waiting $wait_duration seconds (took $retry_count attempts)" >> $SKYPILOT_HA_RECOVERY_LOG
|
660
|
+
break
|
661
|
+
fi
|
662
|
+
echo "Waiting for Ray to be ready..." >> $SKYPILOT_HA_RECOVERY_LOG
|
663
|
+
sleep 2
|
664
|
+
done
|
665
|
+
|
635
666
|
# ! Keep this aligned with `CloudVmRayBackend._setup()`
|
636
|
-
# Suppose all `task.setup` are the same for
|
667
|
+
# Suppose all `task.setup` are the same for sky serve / managed jobs controller task.
|
637
668
|
# So be careful for compatibility issue once you change it.
|
638
669
|
chmod +x {{k8s_high_availability_deployment_setup_script_path}}
|
639
670
|
/bin/bash --login -c "true && export OMP_NUM_THREADS=1 PYTHONWARNINGS='ignore' && {{k8s_high_availability_deployment_setup_script_path}} > /tmp/controller_recovery_setup_commands.log 2>&1"
|
640
|
-
echo "=== Controller setup commands completed for recovery ==="
|
641
|
-
|
671
|
+
echo "=== Controller setup commands completed for recovery at $(date) ===" >> $SKYPILOT_HA_RECOVERY_LOG
|
672
|
+
|
673
|
+
touch {{k8s_high_availability_restarting_signal_file}}
|
674
|
+
# Get all in-progress jobs from managed jobs controller. We skip any jobs that are already done.
|
675
|
+
# Also, skip the jobs that are waiting to be scheduled as those does not have a controller process running.
|
676
|
+
# For SkyServe, this will be None and every service will be recovered. This is because SkyServe
|
677
|
+
# will delete the service from the database after it is terminated so everything in the database is running.
|
678
|
+
ALL_IN_PROGRESS_JOBS=$({{sky_python_cmd}} -c "from sky.jobs import state; jobs = state.get_managed_jobs(); print(' '.join({str(job['job_id']) for job in jobs if job['schedule_state'] not in [state.ManagedJobScheduleState.DONE, state.ManagedJobScheduleState.WAITING]}) if jobs else None)")
|
679
|
+
if [ "$ALL_IN_PROGRESS_JOBS" != "None" ]; then
|
680
|
+
read -ra ALL_IN_PROGRESS_JOBS_SEQ <<< "$ALL_IN_PROGRESS_JOBS"
|
681
|
+
fi
|
642
682
|
for file in {{k8s_high_availability_deployment_run_script_dir}}/*; do
|
683
|
+
# This is the cluster job id on managed jobs controller, but it is guaranteed to be the same as the managed job id,
|
684
|
+
# so we directly use it here. See `CloudVmRayBackend._exec_code_on_head::_dump_code_to_file` for more details.
|
685
|
+
JOB_ID=$(basename $file | sed 's/sky_job_//')
|
686
|
+
# If the list of in-progress jobs is not None (meaning this is a managed job HA controller) and job is not in-progress, skip.
|
687
|
+
if [ "$ALL_IN_PROGRESS_JOBS" != "None" ]; then
|
688
|
+
if [[ ! " ${ALL_IN_PROGRESS_JOBS_SEQ[@]} " =~ " ${JOB_ID} " ]]; then
|
689
|
+
continue
|
690
|
+
fi
|
691
|
+
fi
|
643
692
|
# ! Keep this aligned with `CloudVmRayBackend._execute()`
|
644
693
|
chmod +x $file
|
694
|
+
# TODO(tian): This logic may run a lot of things if the jobs controller previously had many jobs.
|
695
|
+
# We should do more tests and make sure it will scale well.
|
645
696
|
/bin/bash --login -c "true && export OMP_NUM_THREADS=1 PYTHONWARNINGS='ignore' && $file > /tmp/task_run_$(basename $file).log 2>&1"
|
646
|
-
echo "=== Controller task run for service (file: $file) completed for recovery ==="
|
697
|
+
echo "=== Controller task run for service / job (file: $file) completed for recovery at $(date) ===" >> $SKYPILOT_HA_RECOVERY_LOG
|
647
698
|
done
|
699
|
+
rm {{k8s_high_availability_restarting_signal_file}}
|
700
|
+
|
701
|
+
duration=$(( SECONDS - start_time ))
|
702
|
+
echo "HA recovery completed at $(date)" >> $SKYPILOT_HA_RECOVERY_LOG
|
703
|
+
echo "Total recovery time: $duration seconds" >> $SKYPILOT_HA_RECOVERY_LOG
|
648
704
|
fi
|
649
705
|
|
650
706
|
touch {{k8s_high_availability_deployment_volume_mount_path}}/k8s_container_ready
|
sky/templates/scp-ray.yml.j2
CHANGED
@@ -7,7 +7,7 @@ idle_timeout_minutes: 60
|
|
7
7
|
|
8
8
|
provider:
|
9
9
|
type: external
|
10
|
-
module: sky.
|
10
|
+
module: sky.provision.scp
|
11
11
|
region: {{region}}
|
12
12
|
cache_stopped_nodes: True
|
13
13
|
|
@@ -24,19 +24,6 @@ available_node_types:
|
|
24
24
|
InstanceType: {{instance_type}}
|
25
25
|
imageId: {{image_id}}
|
26
26
|
diskSize: {{disk_size}}
|
27
|
-
{% if num_nodes > 1 %}
|
28
|
-
ray_worker_default:
|
29
|
-
min_workers: {{num_nodes - 1}}
|
30
|
-
max_workers: {{num_nodes - 1}}
|
31
|
-
resources: {}
|
32
|
-
node_config:
|
33
|
-
AuthorizedKey: |
|
34
|
-
skypilot:ssh_public_key_content
|
35
|
-
InstanceType: {{instance_type}}
|
36
|
-
imageId: {{image_id}}
|
37
|
-
diskSize: {{disk_size}}
|
38
|
-
|
39
|
-
{%- endif %}
|
40
27
|
|
41
28
|
head_node_type: ray_head_default
|
42
29
|
|
@@ -50,10 +37,6 @@ file_mounts: {
|
|
50
37
|
{%- endfor %}
|
51
38
|
}
|
52
39
|
|
53
|
-
rsync_exclude: []
|
54
|
-
|
55
|
-
initialization_commands: []
|
56
|
-
|
57
40
|
# List of shell commands to run to set up nodes.
|
58
41
|
# NOTE: these are very performance-sensitive. Each new item opens/closes an SSH
|
59
42
|
# connection, which is expensive. Try your best to co-locate commands into fewer
|
@@ -77,36 +60,6 @@ setup_commands:
|
|
77
60
|
sudo grep -e '^DefaultTasksMax' /etc/systemd/system.conf || (sudo bash -c 'echo "DefaultTasksMax=infinity" >> /etc/systemd/system.conf'); sudo systemctl set-property user-$(id -u $(whoami)).slice TasksMax=infinity; sudo systemctl daemon-reload;
|
78
61
|
mkdir -p ~/.ssh; (grep -Pzo -q "Host \*\n StrictHostKeyChecking no\n IdentityFile ~/.ssh/sky-cluster-key\n IdentityFile ~/.ssh/id_rsa" ~/.ssh/config) || printf "Host *\n StrictHostKeyChecking no\n IdentityFile ~/.ssh/sky-cluster-key\n IdentityFile ~/.ssh/id_rsa\n" >> ~/.ssh/config;
|
79
62
|
[ -f /etc/fuse.conf ] && sudo sed -i 's/#user_allow_other/user_allow_other/g' /etc/fuse.conf || (sudo sh -c 'echo "user_allow_other" > /etc/fuse.conf'); # This is needed for `-o allow_other` option for `goofys`;
|
80
|
-
{{ ssh_max_sessions_config }}
|
81
|
-
|
82
|
-
# Command to start ray on the head node. You don't need to change this.
|
83
|
-
# NOTE: these are very performance-sensitive. Each new item opens/closes an SSH
|
84
|
-
# connection, which is expensive. Try your best to co-locate commands into fewer
|
85
|
-
# items! The same comment applies for worker_start_ray_commands.
|
86
|
-
#
|
87
|
-
# Increment the following for catching performance bugs easier:
|
88
|
-
# current num items (num SSH connections): 1
|
89
|
-
head_start_ray_commands:
|
90
|
-
# NOTE: --disable-usage-stats in `ray start` saves 10 seconds of idle wait.
|
91
|
-
# Line "which prlimit ..": increase the limit of the number of open files for the raylet process, as the `ulimit` may not take effect at this point, because it requires
|
92
|
-
# all the sessions to be reloaded. This is a workaround.
|
93
|
-
- {{ sky_activate_python_env }}; {{ sky_ray_cmd }} stop; RAY_SCHEDULER_EVENTS=0 RAY_DEDUP_LOGS=0 {{ sky_ray_cmd }} start --disable-usage-stats --head --port={{ray_port}} --dashboard-port={{ray_dashboard_port}} --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml {{"--resources='%s'" % custom_resources if custom_resources}} --temp-dir {{ray_temp_dir}} || exit 1;
|
94
|
-
which prlimit && for id in $(pgrep -f raylet/raylet); do sudo prlimit --nofile=1048576:1048576 --pid=$id || true; done;
|
95
|
-
{{dump_port_command}}; {{ray_head_wait_initialized_command}}
|
96
|
-
|
97
|
-
{%- if num_nodes > 1 %}
|
98
|
-
worker_start_ray_commands:
|
99
|
-
- {{ sky_activate_python_env }}; {{ sky_ray_cmd }} stop; RAY_SCHEDULER_EVENTS=0 RAY_DEDUP_LOGS=0 {{ sky_ray_cmd }} start --disable-usage-stats --address=$RAY_HEAD_IP:{{ray_port}} --object-manager-port=8076 {{"--resources='%s'" % custom_resources if custom_resources}} --temp-dir {{ray_temp_dir}} || exit 1;
|
100
|
-
which prlimit && for id in $(pgrep -f raylet/raylet); do sudo prlimit --nofile=1048576:1048576 --pid=$id || true; done;
|
101
|
-
{%- else %}
|
102
|
-
worker_start_ray_commands: []
|
103
|
-
{%- endif %}
|
104
|
-
|
105
|
-
head_node: {}
|
106
|
-
worker_nodes: {}
|
107
63
|
|
108
|
-
#
|
109
|
-
|
110
|
-
worker_setup_commands: []
|
111
|
-
cluster_synced_files: []
|
112
|
-
file_mounts_sync_continuously: False
|
64
|
+
# Command to start ray clusters are now placed in `sky.provision.instance_setup`.
|
65
|
+
# We do not need to list it here anymore.
|
sky/users/permission.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
"""Permission service for SkyPilot API Server."""
|
2
2
|
import contextlib
|
3
|
+
import hashlib
|
3
4
|
import logging
|
4
5
|
import os
|
5
|
-
import threading
|
6
6
|
from typing import Generator, List
|
7
7
|
|
8
8
|
import casbin
|
@@ -10,9 +10,11 @@ import filelock
|
|
10
10
|
import sqlalchemy_adapter
|
11
11
|
|
12
12
|
from sky import global_user_state
|
13
|
+
from sky import models
|
13
14
|
from sky import sky_logging
|
14
15
|
from sky.skylet import constants
|
15
16
|
from sky.users import rbac
|
17
|
+
from sky.utils import common_utils
|
16
18
|
|
17
19
|
logging.getLogger('casbin.policy').setLevel(sky_logging.ERROR)
|
18
20
|
logging.getLogger('casbin.role').setLevel(sky_logging.ERROR)
|
@@ -23,43 +25,50 @@ POLICY_UPDATE_LOCK_PATH = os.path.expanduser('~/.sky/.policy_update.lock')
|
|
23
25
|
POLICY_UPDATE_LOCK_TIMEOUT_SECONDS = 20
|
24
26
|
|
25
27
|
_enforcer_instance = None
|
26
|
-
_lock = threading.Lock()
|
27
28
|
|
28
29
|
|
29
30
|
class PermissionService:
|
30
31
|
"""Permission service for SkyPilot API Server."""
|
31
32
|
|
32
33
|
def __init__(self):
|
33
|
-
|
34
|
-
self.init_lock = threading.Lock()
|
35
|
-
|
36
|
-
def _lazy_initialize(self):
|
37
|
-
if self.enforcer is not None:
|
38
|
-
return
|
39
|
-
with self.init_lock:
|
40
|
-
if self.enforcer is not None:
|
41
|
-
return
|
34
|
+
with _policy_lock():
|
42
35
|
global _enforcer_instance
|
43
36
|
if _enforcer_instance is None:
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
self.enforcer = enforcer
|
54
|
-
else:
|
55
|
-
self.enforcer = _enforcer_instance.enforcer
|
37
|
+
_enforcer_instance = self
|
38
|
+
engine = global_user_state.initialize_and_get_db()
|
39
|
+
adapter = sqlalchemy_adapter.Adapter(engine)
|
40
|
+
model_path = os.path.join(os.path.dirname(__file__),
|
41
|
+
'model.conf')
|
42
|
+
enforcer = casbin.Enforcer(model_path, adapter)
|
43
|
+
self.enforcer = enforcer
|
44
|
+
self._maybe_initialize_policies()
|
45
|
+
self._maybe_initialize_basic_auth_user()
|
56
46
|
else:
|
57
47
|
self.enforcer = _enforcer_instance.enforcer
|
58
|
-
|
59
|
-
|
48
|
+
|
49
|
+
def _maybe_initialize_basic_auth_user(self) -> None:
|
50
|
+
"""Initialize basic auth user if it is enabled."""
|
51
|
+
basic_auth = os.environ.get(constants.SKYPILOT_INITIAL_BASIC_AUTH)
|
52
|
+
if not basic_auth:
|
53
|
+
return
|
54
|
+
username, password = basic_auth.split(':', 1)
|
55
|
+
if username and password:
|
56
|
+
user_hash = hashlib.md5(
|
57
|
+
username.encode()).hexdigest()[:common_utils.USER_HASH_LENGTH]
|
58
|
+
user_info = global_user_state.get_user(user_hash)
|
59
|
+
if user_info:
|
60
|
+
logger.info(f'Basic auth user {username} already exists')
|
61
|
+
return
|
62
|
+
global_user_state.add_or_update_user(
|
63
|
+
models.User(id=user_hash, name=username, password=password))
|
64
|
+
self.enforcer.add_grouping_policy(user_hash,
|
65
|
+
rbac.RoleName.ADMIN.value)
|
66
|
+
self.enforcer.save_policy()
|
67
|
+
logger.info(f'Basic auth user {username} initialized')
|
60
68
|
|
61
69
|
def _maybe_initialize_policies(self) -> None:
|
62
70
|
"""Initialize policies if they don't already exist."""
|
71
|
+
# TODO(zhwu): we should avoid running this on client side.
|
63
72
|
logger.debug(f'Initializing policies in process: {os.getpid()}')
|
64
73
|
self._load_policy_no_lock()
|
65
74
|
|
@@ -138,7 +147,6 @@ class PermissionService:
|
|
138
147
|
|
139
148
|
def add_user_if_not_exists(self, user_id: str) -> None:
|
140
149
|
"""Add user role relationship."""
|
141
|
-
self._lazy_initialize()
|
142
150
|
with _policy_lock():
|
143
151
|
self._add_user_if_not_exists_no_lock(user_id)
|
144
152
|
|
@@ -156,9 +164,21 @@ class PermissionService:
|
|
156
164
|
return True
|
157
165
|
return False
|
158
166
|
|
167
|
+
def delete_user(self, user_id: str) -> None:
|
168
|
+
"""Delete user role relationship."""
|
169
|
+
with _policy_lock():
|
170
|
+
# Get current roles
|
171
|
+
self._load_policy_no_lock()
|
172
|
+
# Avoid calling get_user_roles, as it will require the lock.
|
173
|
+
current_roles = self.enforcer.get_roles_for_user(user_id)
|
174
|
+
if not current_roles:
|
175
|
+
logger.warning(f'User {user_id} has no roles')
|
176
|
+
return
|
177
|
+
self.enforcer.remove_grouping_policy(user_id, current_roles[0])
|
178
|
+
self.enforcer.save_policy()
|
179
|
+
|
159
180
|
def update_role(self, user_id: str, new_role: str) -> None:
|
160
181
|
"""Update user role relationship."""
|
161
|
-
self._lazy_initialize()
|
162
182
|
with _policy_lock():
|
163
183
|
# Get current roles
|
164
184
|
self._load_policy_no_lock()
|
@@ -191,7 +211,6 @@ class PermissionService:
|
|
191
211
|
Returns:
|
192
212
|
A list of role names that the user has.
|
193
213
|
"""
|
194
|
-
self._lazy_initialize()
|
195
214
|
self._load_policy_no_lock()
|
196
215
|
return self.enforcer.get_roles_for_user(user_id)
|
197
216
|
|
@@ -204,7 +223,6 @@ class PermissionService:
|
|
204
223
|
# it is a hot path in every request. It is ok to have a stale policy,
|
205
224
|
# as long as it is eventually consistent.
|
206
225
|
# self._load_policy_no_lock()
|
207
|
-
self._lazy_initialize()
|
208
226
|
return self.enforcer.enforce(user_id, path, method)
|
209
227
|
|
210
228
|
def _load_policy_no_lock(self):
|
@@ -213,7 +231,6 @@ class PermissionService:
|
|
213
231
|
|
214
232
|
def load_policy(self):
|
215
233
|
"""Load policy from storage with lock."""
|
216
|
-
self._lazy_initialize()
|
217
234
|
with _policy_lock():
|
218
235
|
self._load_policy_no_lock()
|
219
236
|
|
@@ -229,7 +246,6 @@ class PermissionService:
|
|
229
246
|
For public workspaces, the permission is granted via a wildcard policy
|
230
247
|
('*').
|
231
248
|
"""
|
232
|
-
self._lazy_initialize()
|
233
249
|
if os.getenv(constants.ENV_VAR_IS_SKYPILOT_SERVER) is None:
|
234
250
|
# When it is not on API server, we allow all users to access all
|
235
251
|
# workspaces, as the workspace check has been done on API server.
|
@@ -257,7 +273,6 @@ class PermissionService:
|
|
257
273
|
For public workspaces, this should be ['*'].
|
258
274
|
For private workspaces, this should be specific user IDs.
|
259
275
|
"""
|
260
|
-
self._lazy_initialize()
|
261
276
|
with _policy_lock():
|
262
277
|
for user in users:
|
263
278
|
logger.debug(f'Adding workspace policy: user={user}, '
|
@@ -275,7 +290,6 @@ class PermissionService:
|
|
275
290
|
For public workspaces, this should be ['*'].
|
276
291
|
For private workspaces, this should be specific user IDs.
|
277
292
|
"""
|
278
|
-
self._lazy_initialize()
|
279
293
|
with _policy_lock():
|
280
294
|
self._load_policy_no_lock()
|
281
295
|
# Remove all existing policies for this workspace
|
@@ -289,7 +303,6 @@ class PermissionService:
|
|
289
303
|
|
290
304
|
def remove_workspace_policy(self, workspace_name: str) -> None:
|
291
305
|
"""Remove workspace policy."""
|
292
|
-
self._lazy_initialize()
|
293
306
|
with _policy_lock():
|
294
307
|
self.enforcer.remove_filtered_policy(1, workspace_name)
|
295
308
|
self.enforcer.save_policy()
|
sky/users/rbac.py
CHANGED
@@ -25,8 +25,17 @@ _DEFAULT_USER_BLOCKLIST = [{
|
|
25
25
|
'path': '/workspaces/delete',
|
26
26
|
'method': 'POST'
|
27
27
|
}, {
|
28
|
-
'path': '/users/
|
28
|
+
'path': '/users/delete',
|
29
29
|
'method': 'POST'
|
30
|
+
}, {
|
31
|
+
'path': '/users/create',
|
32
|
+
'method': 'POST'
|
33
|
+
}, {
|
34
|
+
'path': '/users/import',
|
35
|
+
'method': 'POST'
|
36
|
+
}, {
|
37
|
+
'path': '/users/export',
|
38
|
+
'method': 'GET'
|
30
39
|
}]
|
31
40
|
|
32
41
|
|