skypilot-nightly 1.0.0.dev20251210__py3-none-any.whl → 1.0.0.dev20260112__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +4 -2
- sky/adaptors/slurm.py +159 -72
- sky/backends/backend_utils.py +52 -10
- sky/backends/cloud_vm_ray_backend.py +192 -32
- sky/backends/task_codegen.py +40 -2
- sky/catalog/data_fetchers/fetch_gcp.py +9 -1
- sky/catalog/data_fetchers/fetch_nebius.py +1 -1
- sky/catalog/data_fetchers/fetch_vast.py +4 -2
- sky/catalog/seeweb_catalog.py +30 -15
- sky/catalog/shadeform_catalog.py +5 -2
- sky/catalog/slurm_catalog.py +0 -7
- sky/catalog/vast_catalog.py +30 -6
- sky/check.py +11 -8
- sky/client/cli/command.py +106 -54
- sky/client/interactive_utils.py +190 -0
- sky/client/sdk.py +8 -0
- sky/client/sdk_async.py +9 -0
- sky/clouds/aws.py +60 -2
- sky/clouds/azure.py +2 -0
- sky/clouds/kubernetes.py +2 -0
- sky/clouds/runpod.py +38 -7
- sky/clouds/slurm.py +44 -12
- sky/clouds/ssh.py +1 -1
- sky/clouds/vast.py +30 -17
- sky/core.py +69 -1
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/3nu-b8raeKRNABZ2d4GAG/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1871-0565f8975a7dcd10.js +6 -0
- sky/dashboard/out/_next/static/chunks/2109-55a1546d793574a7.js +11 -0
- sky/dashboard/out/_next/static/chunks/2521-099b07cd9e4745bf.js +26 -0
- sky/dashboard/out/_next/static/chunks/2755.a636e04a928a700e.js +31 -0
- sky/dashboard/out/_next/static/chunks/3495.05eab4862217c1a5.js +6 -0
- sky/dashboard/out/_next/static/chunks/3785.cfc5dcc9434fd98c.js +1 -0
- sky/dashboard/out/_next/static/chunks/3981.645d01bf9c8cad0c.js +21 -0
- sky/dashboard/out/_next/static/chunks/4083-0115d67c1fb57d6c.js +21 -0
- sky/dashboard/out/_next/static/chunks/{8640.5b9475a2d18c5416.js → 429.a58e9ba9742309ed.js} +2 -2
- sky/dashboard/out/_next/static/chunks/4555.8e221537181b5dc1.js +6 -0
- sky/dashboard/out/_next/static/chunks/4725.937865b81fdaaebb.js +6 -0
- sky/dashboard/out/_next/static/chunks/6082-edabd8f6092300ce.js +25 -0
- sky/dashboard/out/_next/static/chunks/6989-49cb7dca83a7a62d.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-630bd2a2257275f8.js +1 -0
- sky/dashboard/out/_next/static/chunks/7248-a99800d4db8edabd.js +1 -0
- sky/dashboard/out/_next/static/chunks/754-cfc5d4ad1b843d29.js +18 -0
- sky/dashboard/out/_next/static/chunks/8050-dd8aa107b17dce00.js +16 -0
- sky/dashboard/out/_next/static/chunks/8056-d4ae1e0cb81e7368.js +1 -0
- sky/dashboard/out/_next/static/chunks/8555.011023e296c127b3.js +6 -0
- sky/dashboard/out/_next/static/chunks/8821-93c25df904a8362b.js +1 -0
- sky/dashboard/out/_next/static/chunks/8969-0662594b69432ade.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.f15c91c97d124a5f.js +6 -0
- sky/dashboard/out/_next/static/chunks/{9353-8369df1cf105221c.js → 9353-7ad6bd01858556f1.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-5a86569acad99764.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-8297476714acb4ac.js +6 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-337c3ba1085f1210.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/{clusters-9e5d47818b9bdadd.js → clusters-57632ff3684a8b5c.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-5fd3a453c079c2ea.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-9f85c02c9c6cae9e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-90f16972cbecf354.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-2dd42fc37aad427a.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-ed806aeace26b972.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-bec34706b36f3524.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/{volumes-ef19d49c6d0e8500.js → volumes-a83ba9b38dff7ea9.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-96e0f298308da7e2.js → [name]-c781e9c3e52ef9fc.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces-91e0942f47310aae.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-cfe59cf684ee13b9.js +1 -0
- sky/dashboard/out/_next/static/css/b0dbca28f027cc19.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/plugins/[...slug].html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/data_utils.py +26 -12
- sky/data/mounting_utils.py +29 -4
- sky/global_user_state.py +108 -16
- sky/jobs/client/sdk.py +8 -3
- sky/jobs/controller.py +191 -31
- sky/jobs/recovery_strategy.py +109 -11
- sky/jobs/server/core.py +81 -4
- sky/jobs/server/server.py +14 -0
- sky/jobs/state.py +417 -19
- sky/jobs/utils.py +73 -80
- sky/models.py +9 -0
- sky/optimizer.py +2 -1
- sky/provision/__init__.py +11 -9
- sky/provision/kubernetes/utils.py +122 -15
- sky/provision/kubernetes/volume.py +52 -17
- sky/provision/provisioner.py +2 -1
- sky/provision/runpod/instance.py +3 -1
- sky/provision/runpod/utils.py +13 -1
- sky/provision/runpod/volume.py +25 -9
- sky/provision/slurm/instance.py +75 -29
- sky/provision/slurm/utils.py +213 -107
- sky/provision/vast/utils.py +1 -0
- sky/resources.py +135 -13
- sky/schemas/api/responses.py +4 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +1 -1
- sky/schemas/db/spot_jobs/008_add_full_resources.py +34 -0
- sky/schemas/db/spot_jobs/009_job_events.py +32 -0
- sky/schemas/db/spot_jobs/010_job_events_timestamp_with_timezone.py +43 -0
- sky/schemas/db/spot_jobs/011_add_links.py +34 -0
- sky/schemas/generated/jobsv1_pb2.py +9 -5
- sky/schemas/generated/jobsv1_pb2.pyi +12 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +44 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +32 -28
- sky/schemas/generated/managed_jobsv1_pb2.pyi +11 -2
- sky/serve/serve_utils.py +232 -40
- sky/server/common.py +17 -0
- sky/server/constants.py +1 -1
- sky/server/metrics.py +6 -3
- sky/server/plugins.py +16 -0
- sky/server/requests/payloads.py +18 -0
- sky/server/requests/request_names.py +2 -0
- sky/server/requests/requests.py +28 -10
- sky/server/requests/serializers/encoders.py +5 -0
- sky/server/requests/serializers/return_value_serializers.py +14 -4
- sky/server/server.py +434 -107
- sky/server/uvicorn.py +5 -0
- sky/setup_files/MANIFEST.in +1 -0
- sky/setup_files/dependencies.py +21 -10
- sky/sky_logging.py +2 -1
- sky/skylet/constants.py +22 -5
- sky/skylet/executor/slurm.py +4 -6
- sky/skylet/job_lib.py +89 -4
- sky/skylet/services.py +18 -3
- sky/ssh_node_pools/deploy/tunnel/cleanup-tunnel.sh +62 -0
- sky/ssh_node_pools/deploy/tunnel/ssh-tunnel.sh +379 -0
- sky/templates/kubernetes-ray.yml.j2 +4 -6
- sky/templates/slurm-ray.yml.j2 +32 -2
- sky/templates/websocket_proxy.py +18 -41
- sky/users/permission.py +61 -51
- sky/utils/auth_utils.py +42 -0
- sky/utils/cli_utils/status_utils.py +19 -5
- sky/utils/cluster_utils.py +10 -3
- sky/utils/command_runner.py +256 -94
- sky/utils/command_runner.pyi +16 -0
- sky/utils/common_utils.py +30 -29
- sky/utils/context.py +32 -0
- sky/utils/db/db_utils.py +36 -6
- sky/utils/db/migration_utils.py +41 -21
- sky/utils/infra_utils.py +5 -1
- sky/utils/instance_links.py +139 -0
- sky/utils/interactive_utils.py +49 -0
- sky/utils/kubernetes/generate_kubeconfig.sh +42 -33
- sky/utils/kubernetes/rsync_helper.sh +5 -1
- sky/utils/plugin_extensions/__init__.py +14 -0
- sky/utils/plugin_extensions/external_failure_source.py +176 -0
- sky/utils/resources_utils.py +10 -8
- sky/utils/rich_utils.py +9 -11
- sky/utils/schemas.py +63 -20
- sky/utils/status_lib.py +7 -0
- sky/utils/subprocess_utils.py +17 -0
- sky/volumes/client/sdk.py +6 -3
- sky/volumes/server/core.py +65 -27
- sky_templates/ray/start_cluster +8 -4
- {skypilot_nightly-1.0.0.dev20251210.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/METADATA +53 -57
- {skypilot_nightly-1.0.0.dev20251210.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/RECORD +172 -162
- sky/dashboard/out/_next/static/KYAhEFa3FTfq4JyKVgo-s/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/1141-9c810f01ff4f398a.js +0 -11
- sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +0 -6
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +0 -1
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +0 -1
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +0 -15
- sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +0 -26
- sky/dashboard/out/_next/static/chunks/3294.ddda8c6c6f9f24dc.js +0 -1
- sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +0 -1
- sky/dashboard/out/_next/static/chunks/3800-b589397dc09c5b4e.js +0 -1
- sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +0 -1
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +0 -15
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +0 -13
- sky/dashboard/out/_next/static/chunks/6856-da20c5fd999f319c.js +0 -1
- sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +0 -1
- sky/dashboard/out/_next/static/chunks/6990-09cbf02d3cd518c3.js +0 -1
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +0 -30
- sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +0 -41
- sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +0 -1
- sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +0 -1
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +0 -6
- sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +0 -31
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +0 -30
- sky/dashboard/out/_next/static/chunks/pages/_app-68b647e26f9d2793.js +0 -34
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-33f525539665fdfd.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a7565f586ef86467.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-12c559ec4d81fdbd.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra-d187cd0413d72475.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-895847b6cf200b04.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-8d0f4655400b4eb9.js +0 -21
- sky/dashboard/out/_next/static/chunks/pages/jobs-e5a98f17f8513a96.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/users-2f7646eb77785a2c.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces-cb4da3abe08ebf19.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-fba3de387ff6bb08.js +0 -1
- sky/dashboard/out/_next/static/css/c5a4cfd2600fc715.css +0 -3
- /sky/dashboard/out/_next/static/{KYAhEFa3FTfq4JyKVgo-s → 3nu-b8raeKRNABZ2d4GAG}/_ssgManifest.js +0 -0
- /sky/dashboard/out/_next/static/chunks/pages/plugins/{[...slug]-4f46050ca065d8f8.js → [...slug]-449a9f5a3bb20fb3.js} +0 -0
- {skypilot_nightly-1.0.0.dev20251210.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20251210.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20251210.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20251210.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/top_level.txt +0 -0
sky/users/permission.py
CHANGED
|
@@ -3,6 +3,7 @@ import contextlib
|
|
|
3
3
|
import hashlib
|
|
4
4
|
import logging
|
|
5
5
|
import os
|
|
6
|
+
import threading
|
|
6
7
|
from typing import Generator, List, Optional
|
|
7
8
|
|
|
8
9
|
import casbin
|
|
@@ -36,16 +37,23 @@ class PermissionService:
|
|
|
36
37
|
|
|
37
38
|
def __init__(self):
|
|
38
39
|
self.enforcer: Optional[casbin.Enforcer] = None
|
|
40
|
+
self._lock = threading.Lock()
|
|
39
41
|
|
|
40
|
-
def
|
|
42
|
+
def initialize(self):
|
|
43
|
+
self._lazy_initialize(full_initialize=True)
|
|
44
|
+
|
|
45
|
+
def _lazy_initialize(self, full_initialize: bool = False):
|
|
41
46
|
if self.enforcer is not None:
|
|
42
47
|
return
|
|
43
|
-
with
|
|
48
|
+
with self._lock:
|
|
49
|
+
if self.enforcer is not None:
|
|
50
|
+
return
|
|
44
51
|
global _enforcer_instance
|
|
45
52
|
if _enforcer_instance is None:
|
|
46
53
|
engine = global_user_state.initialize_and_get_db()
|
|
47
|
-
|
|
48
|
-
|
|
54
|
+
if full_initialize:
|
|
55
|
+
db_utils.add_all_tables_to_db_sqlalchemy(
|
|
56
|
+
sqlalchemy_adapter.Base.metadata, engine)
|
|
49
57
|
adapter = sqlalchemy_adapter.Adapter(
|
|
50
58
|
engine, db_class=sqlalchemy_adapter.CasbinRule)
|
|
51
59
|
model_path = os.path.join(os.path.dirname(__file__),
|
|
@@ -56,8 +64,10 @@ class PermissionService:
|
|
|
56
64
|
# is successfully initialized, if we change it and then fail
|
|
57
65
|
# we will set it to None and all subsequent calls will fail.
|
|
58
66
|
_enforcer_instance = self
|
|
59
|
-
|
|
60
|
-
|
|
67
|
+
if full_initialize:
|
|
68
|
+
with _policy_lock():
|
|
69
|
+
self._maybe_initialize_policies()
|
|
70
|
+
self._maybe_initialize_basic_auth_user()
|
|
61
71
|
else:
|
|
62
72
|
assert _enforcer_instance is not None
|
|
63
73
|
self.enforcer = _enforcer_instance.enforcer
|
|
@@ -112,14 +122,14 @@ class PermissionService:
|
|
|
112
122
|
def _maybe_initialize_policies(self) -> None:
|
|
113
123
|
"""Initialize policies if they don't already exist."""
|
|
114
124
|
logger.debug(f'Initializing policies in process: {os.getpid()}')
|
|
115
|
-
self._load_policy_no_lock()
|
|
116
125
|
|
|
117
126
|
policy_updated = False
|
|
118
127
|
|
|
119
128
|
# Check if policies are already initialized by looking for existing
|
|
120
129
|
# permission policies in the enforcer
|
|
121
130
|
enforcer = self._ensure_enforcer()
|
|
122
|
-
|
|
131
|
+
# Convert existing policies to set of tuples for O(1) lookups
|
|
132
|
+
existing_policies = {tuple(p) for p in enforcer.get_policy()}
|
|
123
133
|
|
|
124
134
|
# Get plugin RBAC rules dynamically
|
|
125
135
|
plugin_rules = self._get_plugin_rbac_rules()
|
|
@@ -129,12 +139,12 @@ class PermissionService:
|
|
|
129
139
|
role_permissions = rbac.get_role_permissions(plugin_rules=plugin_rules)
|
|
130
140
|
expected_policies = []
|
|
131
141
|
for role, permissions in role_permissions.items():
|
|
132
|
-
if permissions
|
|
133
|
-
|
|
142
|
+
if permissions.get('permissions'
|
|
143
|
+
) and 'blocklist' in permissions['permissions']:
|
|
134
144
|
blocklist = permissions['permissions']['blocklist']
|
|
135
145
|
for item in blocklist:
|
|
136
146
|
expected_policies.append(
|
|
137
|
-
|
|
147
|
+
(role, item['path'], item['method']))
|
|
138
148
|
|
|
139
149
|
# Add workspace policy
|
|
140
150
|
workspace_policy_permissions = rbac.get_workspace_policy_permissions()
|
|
@@ -143,50 +153,50 @@ class PermissionService:
|
|
|
143
153
|
|
|
144
154
|
for workspace_name, users in workspace_policy_permissions.items():
|
|
145
155
|
for user in users:
|
|
146
|
-
expected_policies.append(
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
for
|
|
155
|
-
|
|
156
|
-
if
|
|
157
|
-
#
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
f'workspace={workspace_name}')
|
|
179
|
-
enforcer.add_policy(user, workspace_name, '*')
|
|
180
|
-
policy_updated = True
|
|
181
|
-
logger.debug('Policies initialized successfully')
|
|
182
|
-
else:
|
|
183
|
-
logger.debug('Policies already exist, skipping initialization')
|
|
156
|
+
expected_policies.append((user, workspace_name, '*'))
|
|
157
|
+
# Check if all expected policies already exist and find missing ones
|
|
158
|
+
missing_policies = [
|
|
159
|
+
p for p in expected_policies if p not in existing_policies
|
|
160
|
+
]
|
|
161
|
+
# Find policies to remove
|
|
162
|
+
expected_policies_set = set(expected_policies)
|
|
163
|
+
redundant_policies = [
|
|
164
|
+
p for p in existing_policies if p not in expected_policies_set
|
|
165
|
+
]
|
|
166
|
+
if missing_policies:
|
|
167
|
+
# Add missing policies
|
|
168
|
+
logger.debug(f'Found {len(missing_policies)} missing policies, '
|
|
169
|
+
'initializing...')
|
|
170
|
+
for p in missing_policies:
|
|
171
|
+
logger.debug(f'Adding policy: {p}')
|
|
172
|
+
enforcer.add_policy(*p)
|
|
173
|
+
policy_updated = True
|
|
174
|
+
logger.debug('Missing policies added successfully')
|
|
175
|
+
|
|
176
|
+
if redundant_policies:
|
|
177
|
+
# Remove redundant policies
|
|
178
|
+
logger.debug(f'Found {len(redundant_policies)} redundant policies, '
|
|
179
|
+
'cleaning up...')
|
|
180
|
+
for p in redundant_policies:
|
|
181
|
+
logger.debug(f'Removing policy: {p}')
|
|
182
|
+
enforcer.remove_policy(*p)
|
|
183
|
+
policy_updated = True
|
|
184
|
+
logger.debug('Redundant policies removed successfully')
|
|
185
|
+
|
|
186
|
+
if not missing_policies and not redundant_policies:
|
|
187
|
+
logger.debug('Policies already in sync, skipping initialization')
|
|
184
188
|
|
|
185
189
|
# Always ensure users have default roles (this is idempotent)
|
|
190
|
+
# Get users who already have roles (g policies) to avoid redundant calls
|
|
191
|
+
users_with_roles = {tuple(g)[0] for g in enforcer.get_grouping_policy()}
|
|
186
192
|
all_users = global_user_state.get_all_users()
|
|
187
193
|
for existing_user in all_users:
|
|
188
|
-
|
|
189
|
-
|
|
194
|
+
if str(existing_user.id) not in users_with_roles:
|
|
195
|
+
logger.debug(f'Adding role for user: {existing_user.name}'
|
|
196
|
+
f'({existing_user.id})')
|
|
197
|
+
user_added = self._add_user_if_not_exists_no_lock(
|
|
198
|
+
existing_user.id)
|
|
199
|
+
policy_updated = policy_updated or user_added
|
|
190
200
|
|
|
191
201
|
if policy_updated:
|
|
192
202
|
enforcer.save_policy()
|
sky/utils/auth_utils.py
CHANGED
|
@@ -58,6 +58,34 @@ def _generate_rsa_key_pair() -> Tuple[str, str]:
|
|
|
58
58
|
return public_key, private_key
|
|
59
59
|
|
|
60
60
|
|
|
61
|
+
def _ensure_key_permissions(private_key_path: str,
|
|
62
|
+
public_key_path: str) -> None:
|
|
63
|
+
"""Ensure SSH key files and parent directory have correct permissions.
|
|
64
|
+
|
|
65
|
+
This is necessary because external factors (e.g., Kubernetes fsGroup,
|
|
66
|
+
volume mounts, umask) can modify file permissions after creation.
|
|
67
|
+
SSH requires private keys to have strict permissions (0600) and the
|
|
68
|
+
parent directory to not be group/world writable (0700).
|
|
69
|
+
|
|
70
|
+
This function is best-effort and will not raise exceptions if permission
|
|
71
|
+
changes fail (e.g., due to permission denied or read-only filesystem).
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
def _safe_chmod(path: str, mode: int) -> None:
|
|
75
|
+
"""Attempt to chmod, logging warning on failure."""
|
|
76
|
+
try:
|
|
77
|
+
if os.path.exists(path):
|
|
78
|
+
os.chmod(path, mode)
|
|
79
|
+
except OSError as e:
|
|
80
|
+
logger.warning(f'Failed to set permissions on {path}: {e}')
|
|
81
|
+
|
|
82
|
+
# Ensure parent directory has correct permissions (0700)
|
|
83
|
+
key_dir = os.path.dirname(private_key_path)
|
|
84
|
+
_safe_chmod(key_dir, 0o700)
|
|
85
|
+
_safe_chmod(private_key_path, 0o600)
|
|
86
|
+
_safe_chmod(public_key_path, 0o644)
|
|
87
|
+
|
|
88
|
+
|
|
61
89
|
def _save_key_pair(private_key_path: str, public_key_path: str,
|
|
62
90
|
private_key: str, public_key: str) -> None:
|
|
63
91
|
key_dir = os.path.dirname(private_key_path)
|
|
@@ -77,6 +105,11 @@ def _save_key_pair(private_key_path: str, public_key_path: str,
|
|
|
77
105
|
opener=functools.partial(os.open, mode=0o644)) as f:
|
|
78
106
|
f.write(public_key)
|
|
79
107
|
|
|
108
|
+
# Explicitly set permissions to ensure they are correct regardless of
|
|
109
|
+
# umask or pre-existing file permissions. The opener's mode parameter
|
|
110
|
+
# only applies when creating new files, and is still subject to umask.
|
|
111
|
+
_ensure_key_permissions(private_key_path, public_key_path)
|
|
112
|
+
|
|
80
113
|
|
|
81
114
|
def get_or_generate_keys() -> Tuple[str, str]:
|
|
82
115
|
"""Returns the absolute private and public key paths."""
|
|
@@ -105,6 +138,9 @@ def get_or_generate_keys() -> Tuple[str, str]:
|
|
|
105
138
|
assert os.path.exists(public_key_path), (
|
|
106
139
|
'Private key found, but associated public key '
|
|
107
140
|
f'{public_key_path} does not exist.')
|
|
141
|
+
# Ensure correct permissions every time, as external factors (e.g.,
|
|
142
|
+
# Kubernetes fsGroup) can modify them after creation.
|
|
143
|
+
_ensure_key_permissions(private_key_path, public_key_path)
|
|
108
144
|
return private_key_path, public_key_path
|
|
109
145
|
|
|
110
146
|
|
|
@@ -133,6 +169,9 @@ def create_ssh_key_files_from_db(private_key_path: str) -> bool:
|
|
|
133
169
|
lock_dir = os.path.dirname(lock_path)
|
|
134
170
|
|
|
135
171
|
if os.path.exists(private_key_path) and os.path.exists(public_key_path):
|
|
172
|
+
# Ensure correct permissions every time, as external factors (e.g.,
|
|
173
|
+
# Kubernetes fsGroup) can modify them after creation.
|
|
174
|
+
_ensure_key_permissions(private_key_path, public_key_path)
|
|
136
175
|
return True
|
|
137
176
|
# We should have the folder ~/.sky/generated/ssh to have 0o700 permission,
|
|
138
177
|
# as the ssh configs will be written to this folder as well in
|
|
@@ -150,4 +189,7 @@ def create_ssh_key_files_from_db(private_key_path: str) -> bool:
|
|
|
150
189
|
assert os.path.exists(public_key_path), (
|
|
151
190
|
'Private key found, but associated public key '
|
|
152
191
|
f'{public_key_path} does not exist.')
|
|
192
|
+
# Ensure correct permissions every time, as external factors (e.g.,
|
|
193
|
+
# Kubernetes fsGroup) can modify them after creation.
|
|
194
|
+
_ensure_key_permissions(private_key_path, public_key_path)
|
|
153
195
|
return True
|
|
@@ -13,9 +13,6 @@ from sky.utils import resources_utils
|
|
|
13
13
|
from sky.utils import status_lib
|
|
14
14
|
from sky.utils import ux_utils
|
|
15
15
|
|
|
16
|
-
if typing.TYPE_CHECKING:
|
|
17
|
-
from sky.provision.kubernetes import utils as kubernetes_utils
|
|
18
|
-
|
|
19
16
|
if typing.TYPE_CHECKING:
|
|
20
17
|
from sky.provision.kubernetes import utils as kubernetes_utils
|
|
21
18
|
|
|
@@ -225,8 +222,25 @@ def show_cost_report_table(cluster_records: List[_ClusterCostReportRecord],
|
|
|
225
222
|
# exist in those cases.
|
|
226
223
|
_get_name = (lambda cluster_record, _: cluster_record['name'])
|
|
227
224
|
_get_user_hash = (lambda cluster_record, _: cluster_record['user_hash'])
|
|
228
|
-
|
|
229
|
-
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def get_user_display_name(user_name: str, user_id: Optional[str] = None) -> str:
|
|
228
|
+
""" Appends SA to the user name if the user is a service account. """
|
|
229
|
+
if user_id and user_id.lower().startswith('sa-'):
|
|
230
|
+
return f'{user_name} (SA)'
|
|
231
|
+
return user_name
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def _get_user_name(cluster_record: _ClusterRecord,
|
|
235
|
+
truncate: bool = True) -> str:
|
|
236
|
+
del truncate
|
|
237
|
+
user_name = cluster_record.get('user_name', '-')
|
|
238
|
+
if user_name == '-':
|
|
239
|
+
return user_name
|
|
240
|
+
user_hash = cluster_record.get('user_hash')
|
|
241
|
+
return get_user_display_name(user_name, user_hash)
|
|
242
|
+
|
|
243
|
+
|
|
230
244
|
_get_launched = (lambda cluster_record, _: log_utils.readable_time_duration(
|
|
231
245
|
cluster_record['launched_at']))
|
|
232
246
|
_get_duration = (lambda cluster_record, _: log_utils.readable_time_duration(
|
sky/utils/cluster_utils.py
CHANGED
|
@@ -46,7 +46,8 @@ class SSHConfigHelper(object):
|
|
|
46
46
|
ssh_cluster_key_path = constants.SKY_USER_FILE_PATH + '/ssh-keys/{}.key'
|
|
47
47
|
|
|
48
48
|
@classmethod
|
|
49
|
-
def _get_generated_config(cls, autogen_comment: str,
|
|
49
|
+
def _get_generated_config(cls, autogen_comment: str,
|
|
50
|
+
cluster_name_on_cloud: str, host_name: str,
|
|
50
51
|
ip: str, username: str, ssh_key_path: str,
|
|
51
52
|
proxy_command: Optional[str], port: int,
|
|
52
53
|
docker_proxy_command: Optional[str]):
|
|
@@ -79,6 +80,7 @@ class SSHConfigHelper(object):
|
|
|
79
80
|
UserKnownHostsFile=/dev/null
|
|
80
81
|
GlobalKnownHostsFile=/dev/null
|
|
81
82
|
Port {port}
|
|
83
|
+
SetEnv {constants.SKY_CLUSTER_NAME_ENV_VAR_KEY}={cluster_name_on_cloud}
|
|
82
84
|
{proxy}
|
|
83
85
|
""".rstrip())
|
|
84
86
|
codegen = codegen + '\n'
|
|
@@ -111,6 +113,7 @@ class SSHConfigHelper(object):
|
|
|
111
113
|
def add_cluster(
|
|
112
114
|
cls,
|
|
113
115
|
cluster_name: str,
|
|
116
|
+
cluster_name_on_cloud: str,
|
|
114
117
|
ips: List[str],
|
|
115
118
|
auth_config: Dict[str, str],
|
|
116
119
|
ports: List[int],
|
|
@@ -135,6 +138,7 @@ class SSHConfigHelper(object):
|
|
|
135
138
|
ports: List of port numbers for SSH corresponding to ips
|
|
136
139
|
docker_user: If not None, use this user to ssh into the docker
|
|
137
140
|
ssh_user: Override the ssh_user in auth_config
|
|
141
|
+
cluster_name_on_cloud: The cluster name as it appears in the cloud.
|
|
138
142
|
"""
|
|
139
143
|
if ssh_user is None:
|
|
140
144
|
username = auth_config['ssh_user']
|
|
@@ -227,10 +231,13 @@ class SSHConfigHelper(object):
|
|
|
227
231
|
ip = 'localhost'
|
|
228
232
|
port = constants.DEFAULT_DOCKER_PORT
|
|
229
233
|
node_name = cluster_name if i == 0 else cluster_name + f'-worker{i}'
|
|
234
|
+
node_proxy_command = proxy_command_for_nodes
|
|
235
|
+
if node_proxy_command is not None:
|
|
236
|
+
node_proxy_command = node_proxy_command.replace('%w', str(i))
|
|
230
237
|
# TODO(romilb): Update port number when k8s supports multinode
|
|
231
238
|
codegen += cls._get_generated_config(
|
|
232
|
-
sky_autogen_comment, node_name, ip,
|
|
233
|
-
key_path_for_config,
|
|
239
|
+
sky_autogen_comment, cluster_name_on_cloud, node_name, ip,
|
|
240
|
+
username, key_path_for_config, node_proxy_command, port,
|
|
234
241
|
docker_proxy_command) + '\n'
|
|
235
242
|
|
|
236
243
|
cluster_config_path = os.path.expanduser(
|