skypilot-nightly 1.0.0.dev20250616__py3-none-any.whl → 1.0.0.dev20250618__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. sky/__init__.py +2 -4
  2. sky/backends/backend_utils.py +7 -0
  3. sky/backends/cloud_vm_ray_backend.py +91 -96
  4. sky/cli.py +5 -6311
  5. sky/client/cli.py +66 -639
  6. sky/client/sdk.py +22 -2
  7. sky/clouds/kubernetes.py +8 -0
  8. sky/clouds/scp.py +7 -26
  9. sky/clouds/utils/scp_utils.py +177 -124
  10. sky/dashboard/out/404.html +1 -1
  11. sky/dashboard/out/_next/static/{OZxMW3bxAJmqgn5f4MdhO → LRpGymRCqq-feuFyoWz4m}/_buildManifest.js +1 -1
  12. sky/dashboard/out/_next/static/chunks/641.c8e452bc5070a630.js +1 -0
  13. sky/dashboard/out/_next/static/chunks/984.ae8c08791d274ca0.js +50 -0
  14. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-36bc0962129f72df.js +6 -0
  15. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-cf490d1fa38f3740.js +16 -0
  16. sky/dashboard/out/_next/static/chunks/pages/users-928edf039219e47b.js +1 -0
  17. sky/dashboard/out/_next/static/chunks/webpack-ebc2404fd6ce581c.js +1 -0
  18. sky/dashboard/out/_next/static/css/6c12ecc3bd2239b6.css +3 -0
  19. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  20. sky/dashboard/out/clusters/[cluster].html +1 -1
  21. sky/dashboard/out/clusters.html +1 -1
  22. sky/dashboard/out/config.html +1 -1
  23. sky/dashboard/out/index.html +1 -1
  24. sky/dashboard/out/infra/[context].html +1 -1
  25. sky/dashboard/out/infra.html +1 -1
  26. sky/dashboard/out/jobs/[job].html +1 -1
  27. sky/dashboard/out/jobs.html +1 -1
  28. sky/dashboard/out/users.html +1 -1
  29. sky/dashboard/out/workspace/new.html +1 -1
  30. sky/dashboard/out/workspaces/[name].html +1 -1
  31. sky/dashboard/out/workspaces.html +1 -1
  32. sky/global_user_state.py +50 -11
  33. sky/jobs/controller.py +98 -31
  34. sky/jobs/scheduler.py +37 -29
  35. sky/jobs/server/core.py +36 -3
  36. sky/jobs/state.py +69 -9
  37. sky/jobs/utils.py +11 -0
  38. sky/logs/__init__.py +17 -0
  39. sky/logs/agent.py +73 -0
  40. sky/logs/gcp.py +91 -0
  41. sky/models.py +1 -0
  42. sky/provision/__init__.py +1 -0
  43. sky/provision/instance_setup.py +35 -0
  44. sky/provision/provisioner.py +11 -0
  45. sky/provision/scp/__init__.py +15 -0
  46. sky/provision/scp/config.py +93 -0
  47. sky/provision/scp/instance.py +528 -0
  48. sky/resources.py +164 -29
  49. sky/server/common.py +21 -9
  50. sky/server/requests/payloads.py +19 -1
  51. sky/server/server.py +121 -29
  52. sky/setup_files/dependencies.py +11 -1
  53. sky/skylet/constants.py +48 -1
  54. sky/skylet/job_lib.py +83 -19
  55. sky/task.py +171 -21
  56. sky/templates/kubernetes-ray.yml.j2 +60 -4
  57. sky/templates/scp-ray.yml.j2 +3 -50
  58. sky/users/permission.py +47 -34
  59. sky/users/rbac.py +10 -1
  60. sky/users/server.py +274 -9
  61. sky/utils/command_runner.py +1 -1
  62. sky/utils/common_utils.py +16 -14
  63. sky/utils/context.py +1 -1
  64. sky/utils/controller_utils.py +12 -3
  65. sky/utils/dag_utils.py +17 -4
  66. sky/utils/kubernetes/deploy_remote_cluster.py +17 -8
  67. sky/utils/schemas.py +83 -5
  68. {skypilot_nightly-1.0.0.dev20250616.dist-info → skypilot_nightly-1.0.0.dev20250618.dist-info}/METADATA +9 -1
  69. {skypilot_nightly-1.0.0.dev20250616.dist-info → skypilot_nightly-1.0.0.dev20250618.dist-info}/RECORD +80 -79
  70. sky/benchmark/__init__.py +0 -0
  71. sky/benchmark/benchmark_state.py +0 -295
  72. sky/benchmark/benchmark_utils.py +0 -641
  73. sky/dashboard/out/_next/static/chunks/600.bd2ed8c076b720ec.js +0 -16
  74. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-59950b2f83b66e48.js +0 -6
  75. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-b3dbf38b51cb29be.js +0 -16
  76. sky/dashboard/out/_next/static/chunks/pages/users-c69ffcab9d6e5269.js +0 -1
  77. sky/dashboard/out/_next/static/chunks/webpack-1b69b196a4dbffef.js +0 -1
  78. sky/dashboard/out/_next/static/css/8e97adcaacc15293.css +0 -3
  79. sky/skylet/providers/scp/__init__.py +0 -2
  80. sky/skylet/providers/scp/config.py +0 -149
  81. sky/skylet/providers/scp/node_provider.py +0 -578
  82. /sky/dashboard/out/_next/static/{OZxMW3bxAJmqgn5f4MdhO → LRpGymRCqq-feuFyoWz4m}/_ssgManifest.js +0 -0
  83. /sky/dashboard/out/_next/static/chunks/{37-824c707421f6f003.js → 37-3a4d77ad62932eaf.js} +0 -0
  84. /sky/dashboard/out/_next/static/chunks/{843-ab9c4f609239155f.js → 843-b3040e493f6e7947.js} +0 -0
  85. /sky/dashboard/out/_next/static/chunks/{938-385d190b95815e11.js → 938-1493ac755eadeb35.js} +0 -0
  86. /sky/dashboard/out/_next/static/chunks/{973-c807fc34f09c7df3.js → 973-db3c97c2bfbceb65.js} +0 -0
  87. /sky/dashboard/out/_next/static/chunks/pages/{_app-32b2caae3445bf3b.js → _app-c416e87d5c2715cf.js} +0 -0
  88. /sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-c8c2191328532b7d.js → [name]-c4ff1ec05e2f3daf.js} +0 -0
  89. {skypilot_nightly-1.0.0.dev20250616.dist-info → skypilot_nightly-1.0.0.dev20250618.dist-info}/WHEEL +0 -0
  90. {skypilot_nightly-1.0.0.dev20250616.dist-info → skypilot_nightly-1.0.0.dev20250618.dist-info}/entry_points.txt +0 -0
  91. {skypilot_nightly-1.0.0.dev20250616.dist-info → skypilot_nightly-1.0.0.dev20250618.dist-info}/licenses/LICENSE +0 -0
  92. {skypilot_nightly-1.0.0.dev20250616.dist-info → skypilot_nightly-1.0.0.dev20250618.dist-info}/top_level.txt +0 -0
@@ -273,6 +273,15 @@ available_node_types:
273
273
  {% if (k8s_acc_label_key is not none and k8s_acc_label_values is not none) %}
274
274
  skypilot-binpack: "gpu"
275
275
  {% endif %}
276
+ {% if k8s_kueue_local_queue_name %}
277
+ kueue.x-k8s.io/queue-name: {{k8s_kueue_local_queue_name}}
278
+ kueue.x-k8s.io/pod-group-name: {{cluster_name_on_cloud}}
279
+ {% endif %}
280
+ {% if k8s_kueue_local_queue_name %}
281
+ annotations:
282
+ kueue.x-k8s.io/retriable-in-group: "false"
283
+ kueue.x-k8s.io/pod-group-total-count: "{{ num_nodes|string }}"
284
+ {% endif %}
276
285
  spec:
277
286
  # serviceAccountName: skypilot-service-account
278
287
  serviceAccountName: {{k8s_service_account_name}}
@@ -632,19 +641,66 @@ available_node_types:
632
641
  {% if high_availability %}
633
642
  mkdir -p {{k8s_high_availability_deployment_run_script_dir}}
634
643
  if [ -f {{k8s_high_availability_deployment_volume_mount_path}}/k8s_container_ready ]; then
644
+ SKYPILOT_HA_RECOVERY_LOG="/tmp/ha_recovery.log"
645
+ echo "Starting HA recovery at $(date)" >> $SKYPILOT_HA_RECOVERY_LOG
646
+ start_time=$SECONDS
647
+ retry_count=0
648
+
649
+ # Wait for Ray to be ready, as the following commands is depending on Ray.
650
+ GET_RAY_STATUS_CMD=$({{sky_python_cmd}} -c 'from sky.provision import instance_setup; print(instance_setup.RAY_STATUS_WITH_SKY_RAY_PORT_COMMAND)')
651
+ while true; do
652
+ retry_count=$((retry_count + 1))
653
+ current_duration=$(( SECONDS - start_time ))
654
+ echo "Attempt $retry_count to get Ray status after $current_duration seconds..." >> $SKYPILOT_HA_RECOVERY_LOG
655
+
656
+ bash --login -c "$GET_RAY_STATUS_CMD"
657
+ if [ $? -eq 0 ]; then
658
+ wait_duration=$(( SECONDS - start_time ))
659
+ echo "Ray ready after waiting $wait_duration seconds (took $retry_count attempts)" >> $SKYPILOT_HA_RECOVERY_LOG
660
+ break
661
+ fi
662
+ echo "Waiting for Ray to be ready..." >> $SKYPILOT_HA_RECOVERY_LOG
663
+ sleep 2
664
+ done
665
+
635
666
  # ! Keep this aligned with `CloudVmRayBackend._setup()`
636
- # Suppose all `task.setup` are the same for skyserve controller task.
667
+ # Suppose all `task.setup` are the same for sky serve / managed jobs controller task.
637
668
  # So be careful for compatibility issue once you change it.
638
669
  chmod +x {{k8s_high_availability_deployment_setup_script_path}}
639
670
  /bin/bash --login -c "true && export OMP_NUM_THREADS=1 PYTHONWARNINGS='ignore' && {{k8s_high_availability_deployment_setup_script_path}} > /tmp/controller_recovery_setup_commands.log 2>&1"
640
- echo "=== Controller setup commands completed for recovery ==="
641
-
671
+ echo "=== Controller setup commands completed for recovery at $(date) ===" >> $SKYPILOT_HA_RECOVERY_LOG
672
+
673
+ touch {{k8s_high_availability_restarting_signal_file}}
674
+ # Get all in-progress jobs from managed jobs controller. We skip any jobs that are already done.
675
+ # Also, skip the jobs that are waiting to be scheduled as those does not have a controller process running.
676
+ # For SkyServe, this will be None and every service will be recovered. This is because SkyServe
677
+ # will delete the service from the database after it is terminated so everything in the database is running.
678
+ ALL_IN_PROGRESS_JOBS=$({{sky_python_cmd}} -c "from sky.jobs import state; jobs = state.get_managed_jobs(); print(' '.join({str(job['job_id']) for job in jobs if job['schedule_state'] not in [state.ManagedJobScheduleState.DONE, state.ManagedJobScheduleState.WAITING]}) if jobs else None)")
679
+ if [ "$ALL_IN_PROGRESS_JOBS" != "None" ]; then
680
+ read -ra ALL_IN_PROGRESS_JOBS_SEQ <<< "$ALL_IN_PROGRESS_JOBS"
681
+ fi
642
682
  for file in {{k8s_high_availability_deployment_run_script_dir}}/*; do
683
+ # This is the cluster job id on managed jobs controller, but it is guaranteed to be the same as the managed job id,
684
+ # so we directly use it here. See `CloudVmRayBackend._exec_code_on_head::_dump_code_to_file` for more details.
685
+ JOB_ID=$(basename $file | sed 's/sky_job_//')
686
+ # If the list of in-progress jobs is not None (meaning this is a managed job HA controller) and job is not in-progress, skip.
687
+ if [ "$ALL_IN_PROGRESS_JOBS" != "None" ]; then
688
+ if [[ ! " ${ALL_IN_PROGRESS_JOBS_SEQ[@]} " =~ " ${JOB_ID} " ]]; then
689
+ continue
690
+ fi
691
+ fi
643
692
  # ! Keep this aligned with `CloudVmRayBackend._execute()`
644
693
  chmod +x $file
694
+ # TODO(tian): This logic may run a lot of things if the jobs controller previously had many jobs.
695
+ # We should do more tests and make sure it will scale well.
645
696
  /bin/bash --login -c "true && export OMP_NUM_THREADS=1 PYTHONWARNINGS='ignore' && $file > /tmp/task_run_$(basename $file).log 2>&1"
646
- echo "=== Controller task run for service (file: $file) completed for recovery ==="
697
+ echo "=== Controller task run for service / job (file: $file) completed for recovery at $(date) ===" >> $SKYPILOT_HA_RECOVERY_LOG
647
698
  done
699
+ rm {{k8s_high_availability_restarting_signal_file}}
700
+
701
+ duration=$(( SECONDS - start_time ))
702
+ echo "HA recovery completed at $(date)" >> $SKYPILOT_HA_RECOVERY_LOG
703
+ echo "Total recovery time: $duration seconds" >> $SKYPILOT_HA_RECOVERY_LOG
648
704
  fi
649
705
 
650
706
  touch {{k8s_high_availability_deployment_volume_mount_path}}/k8s_container_ready
@@ -7,7 +7,7 @@ idle_timeout_minutes: 60
7
7
 
8
8
  provider:
9
9
  type: external
10
- module: sky.skylet.providers.scp.SCPNodeProvider
10
+ module: sky.provision.scp
11
11
  region: {{region}}
12
12
  cache_stopped_nodes: True
13
13
 
@@ -24,19 +24,6 @@ available_node_types:
24
24
  InstanceType: {{instance_type}}
25
25
  imageId: {{image_id}}
26
26
  diskSize: {{disk_size}}
27
- {% if num_nodes > 1 %}
28
- ray_worker_default:
29
- min_workers: {{num_nodes - 1}}
30
- max_workers: {{num_nodes - 1}}
31
- resources: {}
32
- node_config:
33
- AuthorizedKey: |
34
- skypilot:ssh_public_key_content
35
- InstanceType: {{instance_type}}
36
- imageId: {{image_id}}
37
- diskSize: {{disk_size}}
38
-
39
- {%- endif %}
40
27
 
41
28
  head_node_type: ray_head_default
42
29
 
@@ -50,10 +37,6 @@ file_mounts: {
50
37
  {%- endfor %}
51
38
  }
52
39
 
53
- rsync_exclude: []
54
-
55
- initialization_commands: []
56
-
57
40
  # List of shell commands to run to set up nodes.
58
41
  # NOTE: these are very performance-sensitive. Each new item opens/closes an SSH
59
42
  # connection, which is expensive. Try your best to co-locate commands into fewer
@@ -77,36 +60,6 @@ setup_commands:
77
60
  sudo grep -e '^DefaultTasksMax' /etc/systemd/system.conf || (sudo bash -c 'echo "DefaultTasksMax=infinity" >> /etc/systemd/system.conf'); sudo systemctl set-property user-$(id -u $(whoami)).slice TasksMax=infinity; sudo systemctl daemon-reload;
78
61
  mkdir -p ~/.ssh; (grep -Pzo -q "Host \*\n StrictHostKeyChecking no\n IdentityFile ~/.ssh/sky-cluster-key\n IdentityFile ~/.ssh/id_rsa" ~/.ssh/config) || printf "Host *\n StrictHostKeyChecking no\n IdentityFile ~/.ssh/sky-cluster-key\n IdentityFile ~/.ssh/id_rsa\n" >> ~/.ssh/config;
79
62
  [ -f /etc/fuse.conf ] && sudo sed -i 's/#user_allow_other/user_allow_other/g' /etc/fuse.conf || (sudo sh -c 'echo "user_allow_other" > /etc/fuse.conf'); # This is needed for `-o allow_other` option for `goofys`;
80
- {{ ssh_max_sessions_config }}
81
-
82
- # Command to start ray on the head node. You don't need to change this.
83
- # NOTE: these are very performance-sensitive. Each new item opens/closes an SSH
84
- # connection, which is expensive. Try your best to co-locate commands into fewer
85
- # items! The same comment applies for worker_start_ray_commands.
86
- #
87
- # Increment the following for catching performance bugs easier:
88
- # current num items (num SSH connections): 1
89
- head_start_ray_commands:
90
- # NOTE: --disable-usage-stats in `ray start` saves 10 seconds of idle wait.
91
- # Line "which prlimit ..": increase the limit of the number of open files for the raylet process, as the `ulimit` may not take effect at this point, because it requires
92
- # all the sessions to be reloaded. This is a workaround.
93
- - {{ sky_activate_python_env }}; {{ sky_ray_cmd }} stop; RAY_SCHEDULER_EVENTS=0 RAY_DEDUP_LOGS=0 {{ sky_ray_cmd }} start --disable-usage-stats --head --port={{ray_port}} --dashboard-port={{ray_dashboard_port}} --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml {{"--resources='%s'" % custom_resources if custom_resources}} --temp-dir {{ray_temp_dir}} || exit 1;
94
- which prlimit && for id in $(pgrep -f raylet/raylet); do sudo prlimit --nofile=1048576:1048576 --pid=$id || true; done;
95
- {{dump_port_command}}; {{ray_head_wait_initialized_command}}
96
-
97
- {%- if num_nodes > 1 %}
98
- worker_start_ray_commands:
99
- - {{ sky_activate_python_env }}; {{ sky_ray_cmd }} stop; RAY_SCHEDULER_EVENTS=0 RAY_DEDUP_LOGS=0 {{ sky_ray_cmd }} start --disable-usage-stats --address=$RAY_HEAD_IP:{{ray_port}} --object-manager-port=8076 {{"--resources='%s'" % custom_resources if custom_resources}} --temp-dir {{ray_temp_dir}} || exit 1;
100
- which prlimit && for id in $(pgrep -f raylet/raylet); do sudo prlimit --nofile=1048576:1048576 --pid=$id || true; done;
101
- {%- else %}
102
- worker_start_ray_commands: []
103
- {%- endif %}
104
-
105
- head_node: {}
106
- worker_nodes: {}
107
63
 
108
- # These fields are required for external cloud providers.
109
- head_setup_commands: []
110
- worker_setup_commands: []
111
- cluster_synced_files: []
112
- file_mounts_sync_continuously: False
64
+ # Command to start ray clusters are now placed in `sky.provision.instance_setup`.
65
+ # We do not need to list it here anymore.
sky/users/permission.py CHANGED
@@ -1,8 +1,8 @@
1
1
  """Permission service for SkyPilot API Server."""
2
2
  import contextlib
3
+ import hashlib
3
4
  import logging
4
5
  import os
5
- import threading
6
6
  from typing import Generator, List
7
7
 
8
8
  import casbin
@@ -10,9 +10,11 @@ import filelock
10
10
  import sqlalchemy_adapter
11
11
 
12
12
  from sky import global_user_state
13
+ from sky import models
13
14
  from sky import sky_logging
14
15
  from sky.skylet import constants
15
16
  from sky.users import rbac
17
+ from sky.utils import common_utils
16
18
 
17
19
  logging.getLogger('casbin.policy').setLevel(sky_logging.ERROR)
18
20
  logging.getLogger('casbin.role').setLevel(sky_logging.ERROR)
@@ -23,43 +25,50 @@ POLICY_UPDATE_LOCK_PATH = os.path.expanduser('~/.sky/.policy_update.lock')
23
25
  POLICY_UPDATE_LOCK_TIMEOUT_SECONDS = 20
24
26
 
25
27
  _enforcer_instance = None
26
- _lock = threading.Lock()
27
28
 
28
29
 
29
30
  class PermissionService:
30
31
  """Permission service for SkyPilot API Server."""
31
32
 
32
33
  def __init__(self):
33
- self.enforcer = None
34
- self.init_lock = threading.Lock()
35
-
36
- def _lazy_initialize(self):
37
- if self.enforcer is not None:
38
- return
39
- with self.init_lock:
40
- if self.enforcer is not None:
41
- return
34
+ with _policy_lock():
42
35
  global _enforcer_instance
43
36
  if _enforcer_instance is None:
44
- # For different threads, we share the same enforcer instance.
45
- with _lock:
46
- if _enforcer_instance is None:
47
- _enforcer_instance = self
48
- engine = global_user_state.initialize_and_get_db()
49
- adapter = sqlalchemy_adapter.Adapter(engine)
50
- model_path = os.path.join(os.path.dirname(__file__),
51
- 'model.conf')
52
- enforcer = casbin.Enforcer(model_path, adapter)
53
- self.enforcer = enforcer
54
- else:
55
- self.enforcer = _enforcer_instance.enforcer
37
+ _enforcer_instance = self
38
+ engine = global_user_state.initialize_and_get_db()
39
+ adapter = sqlalchemy_adapter.Adapter(engine)
40
+ model_path = os.path.join(os.path.dirname(__file__),
41
+ 'model.conf')
42
+ enforcer = casbin.Enforcer(model_path, adapter)
43
+ self.enforcer = enforcer
44
+ self._maybe_initialize_policies()
45
+ self._maybe_initialize_basic_auth_user()
56
46
  else:
57
47
  self.enforcer = _enforcer_instance.enforcer
58
- with _policy_lock():
59
- self._maybe_initialize_policies()
48
+
49
+ def _maybe_initialize_basic_auth_user(self) -> None:
50
+ """Initialize basic auth user if it is enabled."""
51
+ basic_auth = os.environ.get(constants.SKYPILOT_INITIAL_BASIC_AUTH)
52
+ if not basic_auth:
53
+ return
54
+ username, password = basic_auth.split(':', 1)
55
+ if username and password:
56
+ user_hash = hashlib.md5(
57
+ username.encode()).hexdigest()[:common_utils.USER_HASH_LENGTH]
58
+ user_info = global_user_state.get_user(user_hash)
59
+ if user_info:
60
+ logger.info(f'Basic auth user {username} already exists')
61
+ return
62
+ global_user_state.add_or_update_user(
63
+ models.User(id=user_hash, name=username, password=password))
64
+ self.enforcer.add_grouping_policy(user_hash,
65
+ rbac.RoleName.ADMIN.value)
66
+ self.enforcer.save_policy()
67
+ logger.info(f'Basic auth user {username} initialized')
60
68
 
61
69
  def _maybe_initialize_policies(self) -> None:
62
70
  """Initialize policies if they don't already exist."""
71
+ # TODO(zhwu): we should avoid running this on client side.
63
72
  logger.debug(f'Initializing policies in process: {os.getpid()}')
64
73
  self._load_policy_no_lock()
65
74
 
@@ -138,7 +147,6 @@ class PermissionService:
138
147
 
139
148
  def add_user_if_not_exists(self, user_id: str) -> None:
140
149
  """Add user role relationship."""
141
- self._lazy_initialize()
142
150
  with _policy_lock():
143
151
  self._add_user_if_not_exists_no_lock(user_id)
144
152
 
@@ -156,9 +164,21 @@ class PermissionService:
156
164
  return True
157
165
  return False
158
166
 
167
+ def delete_user(self, user_id: str) -> None:
168
+ """Delete user role relationship."""
169
+ with _policy_lock():
170
+ # Get current roles
171
+ self._load_policy_no_lock()
172
+ # Avoid calling get_user_roles, as it will require the lock.
173
+ current_roles = self.enforcer.get_roles_for_user(user_id)
174
+ if not current_roles:
175
+ logger.warning(f'User {user_id} has no roles')
176
+ return
177
+ self.enforcer.remove_grouping_policy(user_id, current_roles[0])
178
+ self.enforcer.save_policy()
179
+
159
180
  def update_role(self, user_id: str, new_role: str) -> None:
160
181
  """Update user role relationship."""
161
- self._lazy_initialize()
162
182
  with _policy_lock():
163
183
  # Get current roles
164
184
  self._load_policy_no_lock()
@@ -191,7 +211,6 @@ class PermissionService:
191
211
  Returns:
192
212
  A list of role names that the user has.
193
213
  """
194
- self._lazy_initialize()
195
214
  self._load_policy_no_lock()
196
215
  return self.enforcer.get_roles_for_user(user_id)
197
216
 
@@ -204,7 +223,6 @@ class PermissionService:
204
223
  # it is a hot path in every request. It is ok to have a stale policy,
205
224
  # as long as it is eventually consistent.
206
225
  # self._load_policy_no_lock()
207
- self._lazy_initialize()
208
226
  return self.enforcer.enforce(user_id, path, method)
209
227
 
210
228
  def _load_policy_no_lock(self):
@@ -213,7 +231,6 @@ class PermissionService:
213
231
 
214
232
  def load_policy(self):
215
233
  """Load policy from storage with lock."""
216
- self._lazy_initialize()
217
234
  with _policy_lock():
218
235
  self._load_policy_no_lock()
219
236
 
@@ -229,7 +246,6 @@ class PermissionService:
229
246
  For public workspaces, the permission is granted via a wildcard policy
230
247
  ('*').
231
248
  """
232
- self._lazy_initialize()
233
249
  if os.getenv(constants.ENV_VAR_IS_SKYPILOT_SERVER) is None:
234
250
  # When it is not on API server, we allow all users to access all
235
251
  # workspaces, as the workspace check has been done on API server.
@@ -257,7 +273,6 @@ class PermissionService:
257
273
  For public workspaces, this should be ['*'].
258
274
  For private workspaces, this should be specific user IDs.
259
275
  """
260
- self._lazy_initialize()
261
276
  with _policy_lock():
262
277
  for user in users:
263
278
  logger.debug(f'Adding workspace policy: user={user}, '
@@ -275,7 +290,6 @@ class PermissionService:
275
290
  For public workspaces, this should be ['*'].
276
291
  For private workspaces, this should be specific user IDs.
277
292
  """
278
- self._lazy_initialize()
279
293
  with _policy_lock():
280
294
  self._load_policy_no_lock()
281
295
  # Remove all existing policies for this workspace
@@ -289,7 +303,6 @@ class PermissionService:
289
303
 
290
304
  def remove_workspace_policy(self, workspace_name: str) -> None:
291
305
  """Remove workspace policy."""
292
- self._lazy_initialize()
293
306
  with _policy_lock():
294
307
  self.enforcer.remove_filtered_policy(1, workspace_name)
295
308
  self.enforcer.save_policy()
sky/users/rbac.py CHANGED
@@ -25,8 +25,17 @@ _DEFAULT_USER_BLOCKLIST = [{
25
25
  'path': '/workspaces/delete',
26
26
  'method': 'POST'
27
27
  }, {
28
- 'path': '/users/update',
28
+ 'path': '/users/delete',
29
29
  'method': 'POST'
30
+ }, {
31
+ 'path': '/users/create',
32
+ 'method': 'POST'
33
+ }, {
34
+ 'path': '/users/import',
35
+ 'method': 'POST'
36
+ }, {
37
+ 'path': '/users/export',
38
+ 'method': 'GET'
30
39
  }]
31
40
 
32
41