skypilot-nightly 1.0.0.dev20251203__py3-none-any.whl → 1.0.0.dev20260112__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +6 -2
- sky/adaptors/aws.py +1 -61
- sky/adaptors/slurm.py +565 -0
- sky/backends/backend_utils.py +95 -12
- sky/backends/cloud_vm_ray_backend.py +224 -65
- sky/backends/task_codegen.py +380 -4
- sky/catalog/__init__.py +0 -3
- sky/catalog/data_fetchers/fetch_gcp.py +9 -1
- sky/catalog/data_fetchers/fetch_nebius.py +1 -1
- sky/catalog/data_fetchers/fetch_vast.py +4 -2
- sky/catalog/kubernetes_catalog.py +12 -4
- sky/catalog/seeweb_catalog.py +30 -15
- sky/catalog/shadeform_catalog.py +5 -2
- sky/catalog/slurm_catalog.py +236 -0
- sky/catalog/vast_catalog.py +30 -6
- sky/check.py +25 -11
- sky/client/cli/command.py +391 -32
- sky/client/interactive_utils.py +190 -0
- sky/client/sdk.py +64 -2
- sky/client/sdk_async.py +9 -0
- sky/clouds/__init__.py +2 -0
- sky/clouds/aws.py +60 -2
- sky/clouds/azure.py +2 -0
- sky/clouds/cloud.py +7 -0
- sky/clouds/kubernetes.py +2 -0
- sky/clouds/runpod.py +38 -7
- sky/clouds/slurm.py +610 -0
- sky/clouds/ssh.py +3 -2
- sky/clouds/vast.py +39 -16
- sky/core.py +197 -37
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/3nu-b8raeKRNABZ2d4GAG/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1871-0565f8975a7dcd10.js +6 -0
- sky/dashboard/out/_next/static/chunks/2109-55a1546d793574a7.js +11 -0
- sky/dashboard/out/_next/static/chunks/2521-099b07cd9e4745bf.js +26 -0
- sky/dashboard/out/_next/static/chunks/2755.a636e04a928a700e.js +31 -0
- sky/dashboard/out/_next/static/chunks/3495.05eab4862217c1a5.js +6 -0
- sky/dashboard/out/_next/static/chunks/3785.cfc5dcc9434fd98c.js +1 -0
- sky/dashboard/out/_next/static/chunks/3850-fd5696f3bbbaddae.js +1 -0
- sky/dashboard/out/_next/static/chunks/3981.645d01bf9c8cad0c.js +21 -0
- sky/dashboard/out/_next/static/chunks/4083-0115d67c1fb57d6c.js +21 -0
- sky/dashboard/out/_next/static/chunks/{8640.5b9475a2d18c5416.js → 429.a58e9ba9742309ed.js} +2 -2
- sky/dashboard/out/_next/static/chunks/4555.8e221537181b5dc1.js +6 -0
- sky/dashboard/out/_next/static/chunks/4725.937865b81fdaaebb.js +6 -0
- sky/dashboard/out/_next/static/chunks/6082-edabd8f6092300ce.js +25 -0
- sky/dashboard/out/_next/static/chunks/6989-49cb7dca83a7a62d.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-630bd2a2257275f8.js +1 -0
- sky/dashboard/out/_next/static/chunks/7248-a99800d4db8edabd.js +1 -0
- sky/dashboard/out/_next/static/chunks/754-cfc5d4ad1b843d29.js +18 -0
- sky/dashboard/out/_next/static/chunks/8050-dd8aa107b17dce00.js +16 -0
- sky/dashboard/out/_next/static/chunks/8056-d4ae1e0cb81e7368.js +1 -0
- sky/dashboard/out/_next/static/chunks/8555.011023e296c127b3.js +6 -0
- sky/dashboard/out/_next/static/chunks/8821-93c25df904a8362b.js +1 -0
- sky/dashboard/out/_next/static/chunks/8969-0662594b69432ade.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.f15c91c97d124a5f.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-7ad6bd01858556f1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-5a86569acad99764.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-8297476714acb4ac.js +6 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-337c3ba1085f1210.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/{clusters-ee39056f9851a3ff.js → clusters-57632ff3684a8b5c.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{config-dfb9bf07b13045f4.js → config-718cdc365de82689.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-5fd3a453c079c2ea.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-9f85c02c9c6cae9e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-90f16972cbecf354.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-2dd42fc37aad427a.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-ed806aeace26b972.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/plugins/[...slug]-449a9f5a3bb20fb3.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-bec34706b36f3524.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/{volumes-b84b948ff357c43e.js → volumes-a83ba9b38dff7ea9.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-84a40f8c7c627fe4.js → [name]-c781e9c3e52ef9fc.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces-91e0942f47310aae.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-cfe59cf684ee13b9.js +1 -0
- sky/dashboard/out/_next/static/css/b0dbca28f027cc19.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/plugins/[...slug].html +1 -0
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/data_utils.py +26 -12
- sky/data/mounting_utils.py +44 -5
- sky/global_user_state.py +111 -19
- sky/jobs/client/sdk.py +8 -3
- sky/jobs/controller.py +191 -31
- sky/jobs/recovery_strategy.py +109 -11
- sky/jobs/server/core.py +81 -4
- sky/jobs/server/server.py +14 -0
- sky/jobs/state.py +417 -19
- sky/jobs/utils.py +73 -80
- sky/models.py +11 -0
- sky/optimizer.py +8 -6
- sky/provision/__init__.py +12 -9
- sky/provision/common.py +20 -0
- sky/provision/docker_utils.py +15 -2
- sky/provision/kubernetes/utils.py +163 -20
- sky/provision/kubernetes/volume.py +52 -17
- sky/provision/provisioner.py +17 -7
- sky/provision/runpod/instance.py +3 -1
- sky/provision/runpod/utils.py +13 -1
- sky/provision/runpod/volume.py +25 -9
- sky/provision/slurm/__init__.py +12 -0
- sky/provision/slurm/config.py +13 -0
- sky/provision/slurm/instance.py +618 -0
- sky/provision/slurm/utils.py +689 -0
- sky/provision/vast/instance.py +4 -1
- sky/provision/vast/utils.py +11 -6
- sky/resources.py +135 -13
- sky/schemas/api/responses.py +4 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +1 -1
- sky/schemas/db/spot_jobs/008_add_full_resources.py +34 -0
- sky/schemas/db/spot_jobs/009_job_events.py +32 -0
- sky/schemas/db/spot_jobs/010_job_events_timestamp_with_timezone.py +43 -0
- sky/schemas/db/spot_jobs/011_add_links.py +34 -0
- sky/schemas/generated/jobsv1_pb2.py +9 -5
- sky/schemas/generated/jobsv1_pb2.pyi +12 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +44 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +32 -28
- sky/schemas/generated/managed_jobsv1_pb2.pyi +11 -2
- sky/serve/serve_utils.py +232 -40
- sky/serve/server/impl.py +1 -1
- sky/server/common.py +17 -0
- sky/server/constants.py +1 -1
- sky/server/metrics.py +6 -3
- sky/server/plugins.py +238 -0
- sky/server/requests/executor.py +5 -2
- sky/server/requests/payloads.py +30 -1
- sky/server/requests/request_names.py +4 -0
- sky/server/requests/requests.py +33 -11
- sky/server/requests/serializers/encoders.py +22 -0
- sky/server/requests/serializers/return_value_serializers.py +70 -0
- sky/server/server.py +506 -109
- sky/server/server_utils.py +30 -0
- sky/server/uvicorn.py +5 -0
- sky/setup_files/MANIFEST.in +1 -0
- sky/setup_files/dependencies.py +22 -9
- sky/sky_logging.py +2 -1
- sky/skylet/attempt_skylet.py +13 -3
- sky/skylet/constants.py +55 -13
- sky/skylet/events.py +10 -4
- sky/skylet/executor/__init__.py +1 -0
- sky/skylet/executor/slurm.py +187 -0
- sky/skylet/job_lib.py +91 -5
- sky/skylet/log_lib.py +22 -6
- sky/skylet/log_lib.pyi +8 -6
- sky/skylet/services.py +18 -3
- sky/skylet/skylet.py +5 -1
- sky/skylet/subprocess_daemon.py +2 -1
- sky/ssh_node_pools/constants.py +12 -0
- sky/ssh_node_pools/core.py +40 -3
- sky/ssh_node_pools/deploy/__init__.py +4 -0
- sky/{utils/kubernetes/deploy_ssh_node_pools.py → ssh_node_pools/deploy/deploy.py} +279 -504
- sky/ssh_node_pools/deploy/tunnel/ssh-tunnel.sh +379 -0
- sky/ssh_node_pools/deploy/tunnel_utils.py +199 -0
- sky/ssh_node_pools/deploy/utils.py +173 -0
- sky/ssh_node_pools/server.py +11 -13
- sky/{utils/kubernetes/ssh_utils.py → ssh_node_pools/utils.py} +9 -6
- sky/templates/kubernetes-ray.yml.j2 +12 -6
- sky/templates/slurm-ray.yml.j2 +115 -0
- sky/templates/vast-ray.yml.j2 +1 -0
- sky/templates/websocket_proxy.py +18 -41
- sky/users/model.conf +1 -1
- sky/users/permission.py +85 -52
- sky/users/rbac.py +31 -3
- sky/utils/annotations.py +108 -8
- sky/utils/auth_utils.py +42 -0
- sky/utils/cli_utils/status_utils.py +19 -5
- sky/utils/cluster_utils.py +10 -3
- sky/utils/command_runner.py +389 -35
- sky/utils/command_runner.pyi +43 -4
- sky/utils/common_utils.py +47 -31
- sky/utils/context.py +32 -0
- sky/utils/db/db_utils.py +36 -6
- sky/utils/db/migration_utils.py +41 -21
- sky/utils/infra_utils.py +5 -1
- sky/utils/instance_links.py +139 -0
- sky/utils/interactive_utils.py +49 -0
- sky/utils/kubernetes/generate_kubeconfig.sh +42 -33
- sky/utils/kubernetes/kubernetes_deploy_utils.py +2 -94
- sky/utils/kubernetes/rsync_helper.sh +5 -1
- sky/utils/kubernetes/ssh-tunnel.sh +7 -376
- sky/utils/plugin_extensions/__init__.py +14 -0
- sky/utils/plugin_extensions/external_failure_source.py +176 -0
- sky/utils/resources_utils.py +10 -8
- sky/utils/rich_utils.py +9 -11
- sky/utils/schemas.py +93 -19
- sky/utils/status_lib.py +7 -0
- sky/utils/subprocess_utils.py +17 -0
- sky/volumes/client/sdk.py +6 -3
- sky/volumes/server/core.py +65 -27
- sky_templates/ray/start_cluster +8 -4
- {skypilot_nightly-1.0.0.dev20251203.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/METADATA +67 -59
- {skypilot_nightly-1.0.0.dev20251203.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/RECORD +208 -180
- sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +0 -11
- sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +0 -6
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +0 -1
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +0 -1
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +0 -15
- sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +0 -26
- sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +0 -1
- sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +0 -1
- sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +0 -1
- sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +0 -1
- sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +0 -1
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +0 -15
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +0 -13
- sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +0 -1
- sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +0 -1
- sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +0 -1
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +0 -30
- sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +0 -41
- sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +0 -1
- sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +0 -1
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +0 -6
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +0 -1
- sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +0 -31
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +0 -30
- sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +0 -34
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +0 -21
- sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +0 -1
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +0 -3
- /sky/dashboard/out/_next/static/{96_E2yl3QAiIJGOYCkSpB → 3nu-b8raeKRNABZ2d4GAG}/_ssgManifest.js +0 -0
- /sky/{utils/kubernetes → ssh_node_pools/deploy/tunnel}/cleanup-tunnel.sh +0 -0
- {skypilot_nightly-1.0.0.dev20251203.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20251203.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20251203.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20251203.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/top_level.txt +0 -0
sky/users/permission.py
CHANGED
|
@@ -3,6 +3,7 @@ import contextlib
|
|
|
3
3
|
import hashlib
|
|
4
4
|
import logging
|
|
5
5
|
import os
|
|
6
|
+
import threading
|
|
6
7
|
from typing import Generator, List, Optional
|
|
7
8
|
|
|
8
9
|
import casbin
|
|
@@ -36,16 +37,23 @@ class PermissionService:
|
|
|
36
37
|
|
|
37
38
|
def __init__(self):
|
|
38
39
|
self.enforcer: Optional[casbin.Enforcer] = None
|
|
40
|
+
self._lock = threading.Lock()
|
|
39
41
|
|
|
40
|
-
def
|
|
42
|
+
def initialize(self):
|
|
43
|
+
self._lazy_initialize(full_initialize=True)
|
|
44
|
+
|
|
45
|
+
def _lazy_initialize(self, full_initialize: bool = False):
|
|
41
46
|
if self.enforcer is not None:
|
|
42
47
|
return
|
|
43
|
-
with
|
|
48
|
+
with self._lock:
|
|
49
|
+
if self.enforcer is not None:
|
|
50
|
+
return
|
|
44
51
|
global _enforcer_instance
|
|
45
52
|
if _enforcer_instance is None:
|
|
46
53
|
engine = global_user_state.initialize_and_get_db()
|
|
47
|
-
|
|
48
|
-
|
|
54
|
+
if full_initialize:
|
|
55
|
+
db_utils.add_all_tables_to_db_sqlalchemy(
|
|
56
|
+
sqlalchemy_adapter.Base.metadata, engine)
|
|
49
57
|
adapter = sqlalchemy_adapter.Adapter(
|
|
50
58
|
engine, db_class=sqlalchemy_adapter.CasbinRule)
|
|
51
59
|
model_path = os.path.join(os.path.dirname(__file__),
|
|
@@ -56,8 +64,10 @@ class PermissionService:
|
|
|
56
64
|
# is successfully initialized, if we change it and then fail
|
|
57
65
|
# we will set it to None and all subsequent calls will fail.
|
|
58
66
|
_enforcer_instance = self
|
|
59
|
-
|
|
60
|
-
|
|
67
|
+
if full_initialize:
|
|
68
|
+
with _policy_lock():
|
|
69
|
+
self._maybe_initialize_policies()
|
|
70
|
+
self._maybe_initialize_basic_auth_user()
|
|
61
71
|
else:
|
|
62
72
|
assert _enforcer_instance is not None
|
|
63
73
|
self.enforcer = _enforcer_instance.enforcer
|
|
@@ -69,6 +79,26 @@ class PermissionService:
|
|
|
69
79
|
'Enforcer should be initialized after _lazy_initialize()')
|
|
70
80
|
return self.enforcer
|
|
71
81
|
|
|
82
|
+
def _get_plugin_rbac_rules(self):
|
|
83
|
+
"""Get RBAC rules from loaded plugins.
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
Dictionary of plugin RBAC rules, or empty dict if plugins module
|
|
87
|
+
is not available or no rules are defined.
|
|
88
|
+
"""
|
|
89
|
+
try:
|
|
90
|
+
# pylint: disable=import-outside-toplevel
|
|
91
|
+
from sky.server import plugins as server_plugins
|
|
92
|
+
return server_plugins.get_plugin_rbac_rules()
|
|
93
|
+
except ImportError:
|
|
94
|
+
# Plugin module not available (e.g., not running as server)
|
|
95
|
+
logger.debug(
|
|
96
|
+
'Plugin module not available, skipping plugin RBAC rules')
|
|
97
|
+
return {}
|
|
98
|
+
except Exception as e: # pylint: disable=broad-except
|
|
99
|
+
logger.warning(f'Failed to get plugin RBAC rules: {e}')
|
|
100
|
+
return {}
|
|
101
|
+
|
|
72
102
|
def _maybe_initialize_basic_auth_user(self) -> None:
|
|
73
103
|
"""Initialize basic auth user if it is enabled."""
|
|
74
104
|
basic_auth = os.environ.get(constants.SKYPILOT_INITIAL_BASIC_AUTH)
|
|
@@ -92,26 +122,29 @@ class PermissionService:
|
|
|
92
122
|
def _maybe_initialize_policies(self) -> None:
|
|
93
123
|
"""Initialize policies if they don't already exist."""
|
|
94
124
|
logger.debug(f'Initializing policies in process: {os.getpid()}')
|
|
95
|
-
self._load_policy_no_lock()
|
|
96
125
|
|
|
97
126
|
policy_updated = False
|
|
98
127
|
|
|
99
128
|
# Check if policies are already initialized by looking for existing
|
|
100
129
|
# permission policies in the enforcer
|
|
101
130
|
enforcer = self._ensure_enforcer()
|
|
102
|
-
|
|
131
|
+
# Convert existing policies to set of tuples for O(1) lookups
|
|
132
|
+
existing_policies = {tuple(p) for p in enforcer.get_policy()}
|
|
133
|
+
|
|
134
|
+
# Get plugin RBAC rules dynamically
|
|
135
|
+
plugin_rules = self._get_plugin_rbac_rules()
|
|
103
136
|
|
|
104
137
|
# If we already have policies for the expected roles, skip
|
|
105
138
|
# initialization
|
|
106
|
-
role_permissions = rbac.get_role_permissions()
|
|
139
|
+
role_permissions = rbac.get_role_permissions(plugin_rules=plugin_rules)
|
|
107
140
|
expected_policies = []
|
|
108
141
|
for role, permissions in role_permissions.items():
|
|
109
|
-
if permissions
|
|
110
|
-
|
|
142
|
+
if permissions.get('permissions'
|
|
143
|
+
) and 'blocklist' in permissions['permissions']:
|
|
111
144
|
blocklist = permissions['permissions']['blocklist']
|
|
112
145
|
for item in blocklist:
|
|
113
146
|
expected_policies.append(
|
|
114
|
-
|
|
147
|
+
(role, item['path'], item['method']))
|
|
115
148
|
|
|
116
149
|
# Add workspace policy
|
|
117
150
|
workspace_policy_permissions = rbac.get_workspace_policy_permissions()
|
|
@@ -120,50 +153,50 @@ class PermissionService:
|
|
|
120
153
|
|
|
121
154
|
for workspace_name, users in workspace_policy_permissions.items():
|
|
122
155
|
for user in users:
|
|
123
|
-
expected_policies.append(
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
for
|
|
132
|
-
|
|
133
|
-
if
|
|
134
|
-
#
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
f'workspace={workspace_name}')
|
|
156
|
-
enforcer.add_policy(user, workspace_name, '*')
|
|
157
|
-
policy_updated = True
|
|
158
|
-
logger.debug('Policies initialized successfully')
|
|
159
|
-
else:
|
|
160
|
-
logger.debug('Policies already exist, skipping initialization')
|
|
156
|
+
expected_policies.append((user, workspace_name, '*'))
|
|
157
|
+
# Check if all expected policies already exist and find missing ones
|
|
158
|
+
missing_policies = [
|
|
159
|
+
p for p in expected_policies if p not in existing_policies
|
|
160
|
+
]
|
|
161
|
+
# Find policies to remove
|
|
162
|
+
expected_policies_set = set(expected_policies)
|
|
163
|
+
redundant_policies = [
|
|
164
|
+
p for p in existing_policies if p not in expected_policies_set
|
|
165
|
+
]
|
|
166
|
+
if missing_policies:
|
|
167
|
+
# Add missing policies
|
|
168
|
+
logger.debug(f'Found {len(missing_policies)} missing policies, '
|
|
169
|
+
'initializing...')
|
|
170
|
+
for p in missing_policies:
|
|
171
|
+
logger.debug(f'Adding policy: {p}')
|
|
172
|
+
enforcer.add_policy(*p)
|
|
173
|
+
policy_updated = True
|
|
174
|
+
logger.debug('Missing policies added successfully')
|
|
175
|
+
|
|
176
|
+
if redundant_policies:
|
|
177
|
+
# Remove redundant policies
|
|
178
|
+
logger.debug(f'Found {len(redundant_policies)} redundant policies, '
|
|
179
|
+
'cleaning up...')
|
|
180
|
+
for p in redundant_policies:
|
|
181
|
+
logger.debug(f'Removing policy: {p}')
|
|
182
|
+
enforcer.remove_policy(*p)
|
|
183
|
+
policy_updated = True
|
|
184
|
+
logger.debug('Redundant policies removed successfully')
|
|
185
|
+
|
|
186
|
+
if not missing_policies and not redundant_policies:
|
|
187
|
+
logger.debug('Policies already in sync, skipping initialization')
|
|
161
188
|
|
|
162
189
|
# Always ensure users have default roles (this is idempotent)
|
|
190
|
+
# Get users who already have roles (g policies) to avoid redundant calls
|
|
191
|
+
users_with_roles = {tuple(g)[0] for g in enforcer.get_grouping_policy()}
|
|
163
192
|
all_users = global_user_state.get_all_users()
|
|
164
193
|
for existing_user in all_users:
|
|
165
|
-
|
|
166
|
-
|
|
194
|
+
if str(existing_user.id) not in users_with_roles:
|
|
195
|
+
logger.debug(f'Adding role for user: {existing_user.name}'
|
|
196
|
+
f'({existing_user.id})')
|
|
197
|
+
user_added = self._add_user_if_not_exists_no_lock(
|
|
198
|
+
existing_user.id)
|
|
199
|
+
policy_updated = policy_updated or user_added
|
|
167
200
|
|
|
168
201
|
if policy_updated:
|
|
169
202
|
enforcer.save_policy()
|
sky/users/rbac.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""RBAC (Role-Based Access Control) functionality for SkyPilot API Server."""
|
|
2
2
|
|
|
3
3
|
import enum
|
|
4
|
-
from typing import Dict, List
|
|
4
|
+
from typing import Dict, List, Optional
|
|
5
5
|
|
|
6
6
|
from sky import sky_logging
|
|
7
7
|
from sky import skypilot_config
|
|
@@ -55,8 +55,13 @@ def get_default_role() -> str:
|
|
|
55
55
|
|
|
56
56
|
|
|
57
57
|
def get_role_permissions(
|
|
58
|
+
plugin_rules: Optional[Dict[str, List[Dict[str, str]]]] = None
|
|
58
59
|
) -> Dict[str, Dict[str, Dict[str, List[Dict[str, str]]]]]:
|
|
59
|
-
"""Get all role permissions from config.
|
|
60
|
+
"""Get all role permissions from config and plugins.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
plugin_rules: Optional dictionary of plugin RBAC rules to merge.
|
|
64
|
+
Format: {'user': [{'path': '...', 'method': '...'}]}
|
|
60
65
|
|
|
61
66
|
Returns:
|
|
62
67
|
Dictionary containing all roles and their permissions configuration.
|
|
@@ -91,9 +96,32 @@ def get_role_permissions(
|
|
|
91
96
|
if 'user' not in config_permissions:
|
|
92
97
|
config_permissions['user'] = {
|
|
93
98
|
'permissions': {
|
|
94
|
-
'blocklist': _DEFAULT_USER_BLOCKLIST
|
|
99
|
+
'blocklist': _DEFAULT_USER_BLOCKLIST.copy()
|
|
95
100
|
}
|
|
96
101
|
}
|
|
102
|
+
|
|
103
|
+
# Merge plugin rules into the appropriate roles
|
|
104
|
+
if plugin_rules:
|
|
105
|
+
for role, rules in plugin_rules.items():
|
|
106
|
+
if role not in supported_roles:
|
|
107
|
+
logger.warning(f'Plugin specified invalid role: {role}')
|
|
108
|
+
continue
|
|
109
|
+
if role not in config_permissions:
|
|
110
|
+
config_permissions[role] = {'permissions': {'blocklist': []}}
|
|
111
|
+
if 'permissions' not in config_permissions[role]:
|
|
112
|
+
config_permissions[role]['permissions'] = {'blocklist': []}
|
|
113
|
+
if 'blocklist' not in config_permissions[role]['permissions']:
|
|
114
|
+
config_permissions[role]['permissions']['blocklist'] = []
|
|
115
|
+
|
|
116
|
+
# Merge plugin rules, avoiding duplicates
|
|
117
|
+
existing_rules = config_permissions[role]['permissions'][
|
|
118
|
+
'blocklist']
|
|
119
|
+
for rule in rules:
|
|
120
|
+
if rule not in existing_rules:
|
|
121
|
+
existing_rules.append(rule)
|
|
122
|
+
logger.debug(f'Added plugin RBAC rule for {role}: '
|
|
123
|
+
f'{rule["method"]} {rule["path"]}')
|
|
124
|
+
|
|
97
125
|
return config_permissions
|
|
98
126
|
|
|
99
127
|
|
sky/utils/annotations.py
CHANGED
|
@@ -1,14 +1,20 @@
|
|
|
1
1
|
"""Annotations for public APIs."""
|
|
2
2
|
|
|
3
3
|
import functools
|
|
4
|
-
|
|
4
|
+
import threading
|
|
5
|
+
import time
|
|
6
|
+
from typing import Callable, List, Literal, TypeVar
|
|
7
|
+
import weakref
|
|
5
8
|
|
|
6
9
|
import cachetools
|
|
7
10
|
from typing_extensions import ParamSpec
|
|
8
11
|
|
|
9
12
|
# Whether the current process is a SkyPilot API server process.
|
|
10
13
|
is_on_api_server = True
|
|
11
|
-
|
|
14
|
+
_FUNCTIONS_NEED_RELOAD_CACHE_LOCK = threading.Lock()
|
|
15
|
+
# Caches can be thread-local, use weakref to avoid blocking the GC when the
|
|
16
|
+
# thread is destroyed.
|
|
17
|
+
_FUNCTIONS_NEED_RELOAD_CACHE: List[weakref.ReferenceType] = []
|
|
12
18
|
|
|
13
19
|
T = TypeVar('T')
|
|
14
20
|
P = ParamSpec('P')
|
|
@@ -30,6 +36,94 @@ def client_api(func: Callable[P, T]) -> Callable[P, T]:
|
|
|
30
36
|
return wrapper
|
|
31
37
|
|
|
32
38
|
|
|
39
|
+
def _register_functions_need_reload_cache(func: Callable) -> Callable:
|
|
40
|
+
"""Register a cachefunction that needs to be reloaded for a new request.
|
|
41
|
+
|
|
42
|
+
The function will be registered as a weak reference to avoid blocking GC.
|
|
43
|
+
"""
|
|
44
|
+
assert hasattr(func, 'cache_clear'), f'{func.__name__} is not cacheable'
|
|
45
|
+
wrapped_fn = func
|
|
46
|
+
try:
|
|
47
|
+
func_ref = weakref.ref(func)
|
|
48
|
+
except TypeError:
|
|
49
|
+
# The function might be not weakrefable (e.g. functools.lru_cache),
|
|
50
|
+
# wrap it in this case.
|
|
51
|
+
@functools.wraps(func)
|
|
52
|
+
def wrapper(*args, **kwargs):
|
|
53
|
+
return func(*args, **kwargs)
|
|
54
|
+
|
|
55
|
+
wrapper.cache_clear = func.cache_clear # type: ignore[attr-defined]
|
|
56
|
+
func_ref = weakref.ref(wrapper)
|
|
57
|
+
wrapped_fn = wrapper
|
|
58
|
+
with _FUNCTIONS_NEED_RELOAD_CACHE_LOCK:
|
|
59
|
+
_FUNCTIONS_NEED_RELOAD_CACHE.append(func_ref)
|
|
60
|
+
return wrapped_fn
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class ThreadLocalTTLCache(threading.local):
|
|
64
|
+
"""Thread-local storage for _thread_local_lru_cache decorator."""
|
|
65
|
+
|
|
66
|
+
def __init__(self, func, maxsize: int, ttl: int):
|
|
67
|
+
super().__init__()
|
|
68
|
+
self.func = func
|
|
69
|
+
self.maxsize = maxsize
|
|
70
|
+
self.ttl = ttl
|
|
71
|
+
|
|
72
|
+
def get_cache(self):
|
|
73
|
+
if not hasattr(self, 'cache'):
|
|
74
|
+
self.cache = ttl_cache(scope='request',
|
|
75
|
+
maxsize=self.maxsize,
|
|
76
|
+
ttl=self.ttl,
|
|
77
|
+
timer=time.time)(self.func)
|
|
78
|
+
return self.cache
|
|
79
|
+
|
|
80
|
+
def __del__(self):
|
|
81
|
+
if hasattr(self, 'cache'):
|
|
82
|
+
self.cache.cache_clear()
|
|
83
|
+
self.cache = None
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def thread_local_ttl_cache(maxsize=32, ttl=60 * 55):
|
|
87
|
+
"""Thread-local TTL cache decorator.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
maxsize: Maximum size of the cache.
|
|
91
|
+
ttl: Time to live for the cache in seconds.
|
|
92
|
+
Default is 55 minutes, a bit less than 1 hour
|
|
93
|
+
default lifetime of an STS token.
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
def decorator(func):
|
|
97
|
+
# Create thread-local storage for the LRU cache
|
|
98
|
+
local_cache = ThreadLocalTTLCache(func, maxsize, ttl)
|
|
99
|
+
|
|
100
|
+
# We can't apply the lru_cache here, because this runs at import time
|
|
101
|
+
# so we will always have the main thread's cache.
|
|
102
|
+
|
|
103
|
+
@functools.wraps(func)
|
|
104
|
+
def wrapper(*args, **kwargs):
|
|
105
|
+
# We are within the actual function call, which may be on a thread,
|
|
106
|
+
# so local_cache.cache will return the correct thread-local cache,
|
|
107
|
+
# which we can now apply and immediately call.
|
|
108
|
+
return local_cache.get_cache()(*args, **kwargs)
|
|
109
|
+
|
|
110
|
+
def cache_info():
|
|
111
|
+
# Note that this will only give the cache info for the current
|
|
112
|
+
# thread's cache.
|
|
113
|
+
return local_cache.get_cache().cache_info()
|
|
114
|
+
|
|
115
|
+
def cache_clear():
|
|
116
|
+
# Note that this will only clear the cache for the current thread.
|
|
117
|
+
local_cache.get_cache().cache_clear()
|
|
118
|
+
|
|
119
|
+
wrapper.cache_info = cache_info # type: ignore[attr-defined]
|
|
120
|
+
wrapper.cache_clear = cache_clear # type: ignore[attr-defined]
|
|
121
|
+
|
|
122
|
+
return wrapper
|
|
123
|
+
|
|
124
|
+
return decorator
|
|
125
|
+
|
|
126
|
+
|
|
33
127
|
def lru_cache(scope: Literal['global', 'request'], *lru_cache_args,
|
|
34
128
|
**lru_cache_kwargs) -> Callable:
|
|
35
129
|
"""LRU cache decorator for functions.
|
|
@@ -51,8 +145,7 @@ def lru_cache(scope: Literal['global', 'request'], *lru_cache_args,
|
|
|
51
145
|
else:
|
|
52
146
|
cached_func = functools.lru_cache(*lru_cache_args,
|
|
53
147
|
**lru_cache_kwargs)(func)
|
|
54
|
-
|
|
55
|
-
return cached_func
|
|
148
|
+
return _register_functions_need_reload_cache(cached_func)
|
|
56
149
|
|
|
57
150
|
return decorator
|
|
58
151
|
|
|
@@ -72,13 +165,20 @@ def ttl_cache(scope: Literal['global', 'request'], *ttl_cache_args,
|
|
|
72
165
|
else:
|
|
73
166
|
cached_func = cachetools.cached(
|
|
74
167
|
cachetools.TTLCache(*ttl_cache_args, **ttl_cache_kwargs))(func)
|
|
75
|
-
|
|
76
|
-
return cached_func
|
|
168
|
+
return _register_functions_need_reload_cache(cached_func)
|
|
77
169
|
|
|
78
170
|
return decorator
|
|
79
171
|
|
|
80
172
|
|
|
81
173
|
def clear_request_level_cache():
|
|
82
174
|
"""Clear the request-level cache."""
|
|
83
|
-
|
|
84
|
-
|
|
175
|
+
alive_entries = []
|
|
176
|
+
with _FUNCTIONS_NEED_RELOAD_CACHE_LOCK:
|
|
177
|
+
for entry in _FUNCTIONS_NEED_RELOAD_CACHE:
|
|
178
|
+
func = entry()
|
|
179
|
+
if func is None:
|
|
180
|
+
# Has been GC'ed, drop the reference.
|
|
181
|
+
continue
|
|
182
|
+
func.cache_clear()
|
|
183
|
+
alive_entries.append(entry)
|
|
184
|
+
_FUNCTIONS_NEED_RELOAD_CACHE[:] = alive_entries
|
sky/utils/auth_utils.py
CHANGED
|
@@ -58,6 +58,34 @@ def _generate_rsa_key_pair() -> Tuple[str, str]:
|
|
|
58
58
|
return public_key, private_key
|
|
59
59
|
|
|
60
60
|
|
|
61
|
+
def _ensure_key_permissions(private_key_path: str,
|
|
62
|
+
public_key_path: str) -> None:
|
|
63
|
+
"""Ensure SSH key files and parent directory have correct permissions.
|
|
64
|
+
|
|
65
|
+
This is necessary because external factors (e.g., Kubernetes fsGroup,
|
|
66
|
+
volume mounts, umask) can modify file permissions after creation.
|
|
67
|
+
SSH requires private keys to have strict permissions (0600) and the
|
|
68
|
+
parent directory to not be group/world writable (0700).
|
|
69
|
+
|
|
70
|
+
This function is best-effort and will not raise exceptions if permission
|
|
71
|
+
changes fail (e.g., due to permission denied or read-only filesystem).
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
def _safe_chmod(path: str, mode: int) -> None:
|
|
75
|
+
"""Attempt to chmod, logging warning on failure."""
|
|
76
|
+
try:
|
|
77
|
+
if os.path.exists(path):
|
|
78
|
+
os.chmod(path, mode)
|
|
79
|
+
except OSError as e:
|
|
80
|
+
logger.warning(f'Failed to set permissions on {path}: {e}')
|
|
81
|
+
|
|
82
|
+
# Ensure parent directory has correct permissions (0700)
|
|
83
|
+
key_dir = os.path.dirname(private_key_path)
|
|
84
|
+
_safe_chmod(key_dir, 0o700)
|
|
85
|
+
_safe_chmod(private_key_path, 0o600)
|
|
86
|
+
_safe_chmod(public_key_path, 0o644)
|
|
87
|
+
|
|
88
|
+
|
|
61
89
|
def _save_key_pair(private_key_path: str, public_key_path: str,
|
|
62
90
|
private_key: str, public_key: str) -> None:
|
|
63
91
|
key_dir = os.path.dirname(private_key_path)
|
|
@@ -77,6 +105,11 @@ def _save_key_pair(private_key_path: str, public_key_path: str,
|
|
|
77
105
|
opener=functools.partial(os.open, mode=0o644)) as f:
|
|
78
106
|
f.write(public_key)
|
|
79
107
|
|
|
108
|
+
# Explicitly set permissions to ensure they are correct regardless of
|
|
109
|
+
# umask or pre-existing file permissions. The opener's mode parameter
|
|
110
|
+
# only applies when creating new files, and is still subject to umask.
|
|
111
|
+
_ensure_key_permissions(private_key_path, public_key_path)
|
|
112
|
+
|
|
80
113
|
|
|
81
114
|
def get_or_generate_keys() -> Tuple[str, str]:
|
|
82
115
|
"""Returns the absolute private and public key paths."""
|
|
@@ -105,6 +138,9 @@ def get_or_generate_keys() -> Tuple[str, str]:
|
|
|
105
138
|
assert os.path.exists(public_key_path), (
|
|
106
139
|
'Private key found, but associated public key '
|
|
107
140
|
f'{public_key_path} does not exist.')
|
|
141
|
+
# Ensure correct permissions every time, as external factors (e.g.,
|
|
142
|
+
# Kubernetes fsGroup) can modify them after creation.
|
|
143
|
+
_ensure_key_permissions(private_key_path, public_key_path)
|
|
108
144
|
return private_key_path, public_key_path
|
|
109
145
|
|
|
110
146
|
|
|
@@ -133,6 +169,9 @@ def create_ssh_key_files_from_db(private_key_path: str) -> bool:
|
|
|
133
169
|
lock_dir = os.path.dirname(lock_path)
|
|
134
170
|
|
|
135
171
|
if os.path.exists(private_key_path) and os.path.exists(public_key_path):
|
|
172
|
+
# Ensure correct permissions every time, as external factors (e.g.,
|
|
173
|
+
# Kubernetes fsGroup) can modify them after creation.
|
|
174
|
+
_ensure_key_permissions(private_key_path, public_key_path)
|
|
136
175
|
return True
|
|
137
176
|
# We should have the folder ~/.sky/generated/ssh to have 0o700 permission,
|
|
138
177
|
# as the ssh configs will be written to this folder as well in
|
|
@@ -150,4 +189,7 @@ def create_ssh_key_files_from_db(private_key_path: str) -> bool:
|
|
|
150
189
|
assert os.path.exists(public_key_path), (
|
|
151
190
|
'Private key found, but associated public key '
|
|
152
191
|
f'{public_key_path} does not exist.')
|
|
192
|
+
# Ensure correct permissions every time, as external factors (e.g.,
|
|
193
|
+
# Kubernetes fsGroup) can modify them after creation.
|
|
194
|
+
_ensure_key_permissions(private_key_path, public_key_path)
|
|
153
195
|
return True
|
|
@@ -13,9 +13,6 @@ from sky.utils import resources_utils
|
|
|
13
13
|
from sky.utils import status_lib
|
|
14
14
|
from sky.utils import ux_utils
|
|
15
15
|
|
|
16
|
-
if typing.TYPE_CHECKING:
|
|
17
|
-
from sky.provision.kubernetes import utils as kubernetes_utils
|
|
18
|
-
|
|
19
16
|
if typing.TYPE_CHECKING:
|
|
20
17
|
from sky.provision.kubernetes import utils as kubernetes_utils
|
|
21
18
|
|
|
@@ -225,8 +222,25 @@ def show_cost_report_table(cluster_records: List[_ClusterCostReportRecord],
|
|
|
225
222
|
# exist in those cases.
|
|
226
223
|
_get_name = (lambda cluster_record, _: cluster_record['name'])
|
|
227
224
|
_get_user_hash = (lambda cluster_record, _: cluster_record['user_hash'])
|
|
228
|
-
|
|
229
|
-
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def get_user_display_name(user_name: str, user_id: Optional[str] = None) -> str:
|
|
228
|
+
""" Appends SA to the user name if the user is a service account. """
|
|
229
|
+
if user_id and user_id.lower().startswith('sa-'):
|
|
230
|
+
return f'{user_name} (SA)'
|
|
231
|
+
return user_name
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def _get_user_name(cluster_record: _ClusterRecord,
|
|
235
|
+
truncate: bool = True) -> str:
|
|
236
|
+
del truncate
|
|
237
|
+
user_name = cluster_record.get('user_name', '-')
|
|
238
|
+
if user_name == '-':
|
|
239
|
+
return user_name
|
|
240
|
+
user_hash = cluster_record.get('user_hash')
|
|
241
|
+
return get_user_display_name(user_name, user_hash)
|
|
242
|
+
|
|
243
|
+
|
|
230
244
|
_get_launched = (lambda cluster_record, _: log_utils.readable_time_duration(
|
|
231
245
|
cluster_record['launched_at']))
|
|
232
246
|
_get_duration = (lambda cluster_record, _: log_utils.readable_time_duration(
|
sky/utils/cluster_utils.py
CHANGED
|
@@ -46,7 +46,8 @@ class SSHConfigHelper(object):
|
|
|
46
46
|
ssh_cluster_key_path = constants.SKY_USER_FILE_PATH + '/ssh-keys/{}.key'
|
|
47
47
|
|
|
48
48
|
@classmethod
|
|
49
|
-
def _get_generated_config(cls, autogen_comment: str,
|
|
49
|
+
def _get_generated_config(cls, autogen_comment: str,
|
|
50
|
+
cluster_name_on_cloud: str, host_name: str,
|
|
50
51
|
ip: str, username: str, ssh_key_path: str,
|
|
51
52
|
proxy_command: Optional[str], port: int,
|
|
52
53
|
docker_proxy_command: Optional[str]):
|
|
@@ -79,6 +80,7 @@ class SSHConfigHelper(object):
|
|
|
79
80
|
UserKnownHostsFile=/dev/null
|
|
80
81
|
GlobalKnownHostsFile=/dev/null
|
|
81
82
|
Port {port}
|
|
83
|
+
SetEnv {constants.SKY_CLUSTER_NAME_ENV_VAR_KEY}={cluster_name_on_cloud}
|
|
82
84
|
{proxy}
|
|
83
85
|
""".rstrip())
|
|
84
86
|
codegen = codegen + '\n'
|
|
@@ -111,6 +113,7 @@ class SSHConfigHelper(object):
|
|
|
111
113
|
def add_cluster(
|
|
112
114
|
cls,
|
|
113
115
|
cluster_name: str,
|
|
116
|
+
cluster_name_on_cloud: str,
|
|
114
117
|
ips: List[str],
|
|
115
118
|
auth_config: Dict[str, str],
|
|
116
119
|
ports: List[int],
|
|
@@ -135,6 +138,7 @@ class SSHConfigHelper(object):
|
|
|
135
138
|
ports: List of port numbers for SSH corresponding to ips
|
|
136
139
|
docker_user: If not None, use this user to ssh into the docker
|
|
137
140
|
ssh_user: Override the ssh_user in auth_config
|
|
141
|
+
cluster_name_on_cloud: The cluster name as it appears in the cloud.
|
|
138
142
|
"""
|
|
139
143
|
if ssh_user is None:
|
|
140
144
|
username = auth_config['ssh_user']
|
|
@@ -227,10 +231,13 @@ class SSHConfigHelper(object):
|
|
|
227
231
|
ip = 'localhost'
|
|
228
232
|
port = constants.DEFAULT_DOCKER_PORT
|
|
229
233
|
node_name = cluster_name if i == 0 else cluster_name + f'-worker{i}'
|
|
234
|
+
node_proxy_command = proxy_command_for_nodes
|
|
235
|
+
if node_proxy_command is not None:
|
|
236
|
+
node_proxy_command = node_proxy_command.replace('%w', str(i))
|
|
230
237
|
# TODO(romilb): Update port number when k8s supports multinode
|
|
231
238
|
codegen += cls._get_generated_config(
|
|
232
|
-
sky_autogen_comment, node_name, ip,
|
|
233
|
-
key_path_for_config,
|
|
239
|
+
sky_autogen_comment, cluster_name_on_cloud, node_name, ip,
|
|
240
|
+
username, key_path_for_config, node_proxy_command, port,
|
|
234
241
|
docker_proxy_command) + '\n'
|
|
235
242
|
|
|
236
243
|
cluster_config_path = os.path.expanduser(
|