skypilot-nightly 1.0.0.dev20250627__py3-none-any.whl → 1.0.0.dev20250628__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/adaptors/kubernetes.py +7 -0
- sky/adaptors/nebius.py +2 -2
- sky/authentication.py +12 -5
- sky/backends/backend_utils.py +92 -26
- sky/check.py +5 -2
- sky/client/cli/command.py +38 -6
- sky/client/sdk.py +217 -167
- sky/client/service_account_auth.py +47 -0
- sky/clouds/aws.py +10 -4
- sky/clouds/azure.py +5 -2
- sky/clouds/cloud.py +5 -2
- sky/clouds/gcp.py +31 -18
- sky/clouds/kubernetes.py +54 -34
- sky/clouds/nebius.py +8 -2
- sky/clouds/ssh.py +5 -2
- sky/clouds/utils/aws_utils.py +10 -4
- sky/clouds/utils/gcp_utils.py +22 -7
- sky/clouds/utils/oci_utils.py +62 -14
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/{HudU4f4Xsy-cP51JvXSZ- → ZYLkkWSYZjJhLVsObh20y}/_buildManifest.js +1 -1
- sky/dashboard/out/_next/static/chunks/43-f38a531f6692f281.js +1 -0
- sky/dashboard/out/_next/static/chunks/601-111d06d9ded11d00.js +1 -0
- sky/dashboard/out/_next/static/chunks/{616-d6128fa9e7cae6e6.js → 616-50a620ac4a23deb4.js} +1 -1
- sky/dashboard/out/_next/static/chunks/691.fd9292250ab089af.js +21 -0
- sky/dashboard/out/_next/static/chunks/{785.dc2686c3c1235554.js → 785.3446c12ffdf3d188.js} +1 -1
- sky/dashboard/out/_next/static/chunks/871-e547295e7e21399c.js +6 -0
- sky/dashboard/out/_next/static/chunks/937.72796f7afe54075b.js +1 -0
- sky/dashboard/out/_next/static/chunks/938-0a770415b5ce4649.js +1 -0
- sky/dashboard/out/_next/static/chunks/982.d7bd80ed18cad4cc.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-21080826c6095f21.js +6 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-77d4816945b04793.js +6 -0
- sky/dashboard/out/_next/static/chunks/pages/{clusters-f119a5630a1efd61.js → clusters-65b2c90320b8afb8.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-64bdc0b2d3a44709.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/{jobs-0a5695ff3075d94a.js → jobs-df7407b5e37d3750.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{users-4978cbb093e141e7.js → users-d7684eaa04c4f58f.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-cb7e720b739de53a.js → [name]-04e1b3ad4207b1e9.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{workspaces-50e230828730cfb3.js → workspaces-c470366a6179f16e.js} +1 -1
- sky/dashboard/out/_next/static/chunks/{webpack-08fdb9e6070127fc.js → webpack-75a3310ef922a299.js} +1 -1
- sky/dashboard/out/_next/static/css/605ac87514049058.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/storage.py +8 -3
- sky/global_user_state.py +257 -9
- sky/jobs/client/sdk.py +20 -25
- sky/models.py +16 -0
- sky/provision/kubernetes/config.py +1 -1
- sky/provision/kubernetes/instance.py +7 -4
- sky/provision/kubernetes/network.py +15 -9
- sky/provision/kubernetes/network_utils.py +42 -23
- sky/provision/kubernetes/utils.py +73 -35
- sky/provision/nebius/utils.py +10 -4
- sky/resources.py +10 -4
- sky/serve/client/sdk.py +28 -34
- sky/server/common.py +51 -3
- sky/server/constants.py +3 -0
- sky/server/requests/executor.py +4 -0
- sky/server/requests/payloads.py +33 -0
- sky/server/requests/requests.py +19 -0
- sky/server/rest.py +6 -15
- sky/server/server.py +121 -6
- sky/skylet/constants.py +6 -0
- sky/skypilot_config.py +32 -4
- sky/users/permission.py +29 -0
- sky/users/server.py +384 -5
- sky/users/token_service.py +196 -0
- sky/utils/common_utils.py +4 -5
- sky/utils/config_utils.py +41 -0
- sky/utils/controller_utils.py +5 -1
- sky/utils/resource_checker.py +153 -0
- sky/utils/resources_utils.py +12 -4
- sky/utils/schemas.py +87 -60
- sky/utils/subprocess_utils.py +2 -6
- sky/workspaces/core.py +9 -117
- {skypilot_nightly-1.0.0.dev20250627.dist-info → skypilot_nightly-1.0.0.dev20250628.dist-info}/METADATA +1 -1
- {skypilot_nightly-1.0.0.dev20250627.dist-info → skypilot_nightly-1.0.0.dev20250628.dist-info}/RECORD +94 -91
- sky/dashboard/out/_next/static/chunks/43-36177d00f6956ab2.js +0 -1
- sky/dashboard/out/_next/static/chunks/690.55f9eed3be903f56.js +0 -16
- sky/dashboard/out/_next/static/chunks/871-3db673be3ee3750b.js +0 -6
- sky/dashboard/out/_next/static/chunks/937.3759f538f11a0953.js +0 -1
- sky/dashboard/out/_next/static/chunks/938-068520cc11738deb.js +0 -1
- sky/dashboard/out/_next/static/chunks/973-81b2d057178adb76.js +0 -1
- sky/dashboard/out/_next/static/chunks/982.1b61658204416b0f.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-aff040d7bc5d0086.js +0 -6
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-8040f2483897ed0c.js +0 -6
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-e4b23128db0774cd.js +0 -16
- sky/dashboard/out/_next/static/css/52082cf558ec9705.css +0 -3
- /sky/dashboard/out/_next/static/{HudU4f4Xsy-cP51JvXSZ- → ZYLkkWSYZjJhLVsObh20y}/_ssgManifest.js +0 -0
- /sky/dashboard/out/_next/static/chunks/pages/{_app-9a3ce3170d2edcec.js → _app-050a9e637b057b24.js} +0 -0
- {skypilot_nightly-1.0.0.dev20250627.dist-info → skypilot_nightly-1.0.0.dev20250628.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250627.dist-info → skypilot_nightly-1.0.0.dev20250628.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250627.dist-info → skypilot_nightly-1.0.0.dev20250628.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250627.dist-info → skypilot_nightly-1.0.0.dev20250628.dist-info}/top_level.txt +0 -0
@@ -28,7 +28,8 @@ _LOADBALANCER_TEMPLATE_NAME = 'kubernetes-loadbalancer.yml.j2'
|
|
28
28
|
|
29
29
|
|
30
30
|
def get_port_mode(
|
31
|
-
mode_str: Optional[str]
|
31
|
+
mode_str: Optional[str],
|
32
|
+
context: Optional[str]) -> kubernetes_enums.KubernetesPortMode:
|
32
33
|
"""Get the port mode from the provider config."""
|
33
34
|
|
34
35
|
curr_kube_config = kubernetes_utils.get_current_kube_config_context_name()
|
@@ -38,9 +39,11 @@ def get_port_mode(
|
|
38
39
|
# If running in kind (`sky local up`), use ingress mode
|
39
40
|
return kubernetes_enums.KubernetesPortMode.INGRESS
|
40
41
|
|
41
|
-
mode_str = mode_str or skypilot_config.
|
42
|
-
|
43
|
-
|
42
|
+
mode_str = mode_str or skypilot_config.get_effective_region_config(
|
43
|
+
cloud='kubernetes',
|
44
|
+
region=context,
|
45
|
+
keys=('ports',),
|
46
|
+
default_value=kubernetes_enums.KubernetesPortMode.LOADBALANCER.value)
|
44
47
|
try:
|
45
48
|
port_mode = kubernetes_enums.KubernetesPortMode(mode_str)
|
46
49
|
except ValueError as e:
|
@@ -54,12 +57,16 @@ def get_port_mode(
|
|
54
57
|
|
55
58
|
|
56
59
|
def get_networking_mode(
|
57
|
-
mode_str: Optional[str]
|
60
|
+
mode_str: Optional[str],
|
61
|
+
context: Optional[str],
|
58
62
|
) -> kubernetes_enums.KubernetesNetworkingMode:
|
59
63
|
"""Get the networking mode from the provider config."""
|
60
|
-
mode_str = mode_str or skypilot_config.
|
61
|
-
|
62
|
-
|
64
|
+
mode_str = mode_str or skypilot_config.get_effective_region_config(
|
65
|
+
cloud='kubernetes',
|
66
|
+
region=context,
|
67
|
+
keys=('networking_mode',),
|
68
|
+
default_value=kubernetes_enums.KubernetesNetworkingMode.PORTFORWARD.
|
69
|
+
value)
|
63
70
|
try:
|
64
71
|
networking_mode = kubernetes_enums.KubernetesNetworkingMode.from_str(
|
65
72
|
mode_str)
|
@@ -70,9 +77,9 @@ def get_networking_mode(
|
|
70
77
|
return networking_mode
|
71
78
|
|
72
79
|
|
73
|
-
def fill_loadbalancer_template(namespace: str,
|
74
|
-
ports: List[int],
|
75
|
-
selector_value: str) -> Dict:
|
80
|
+
def fill_loadbalancer_template(namespace: str, context: Optional[str],
|
81
|
+
service_name: str, ports: List[int],
|
82
|
+
selector_key: str, selector_value: str) -> Dict:
|
76
83
|
template_path = os.path.join(sky.__root_dir__, 'templates',
|
77
84
|
_LOADBALANCER_TEMPLATE_NAME)
|
78
85
|
if not os.path.exists(template_path):
|
@@ -81,10 +88,16 @@ def fill_loadbalancer_template(namespace: str, service_name: str,
|
|
81
88
|
|
82
89
|
with open(template_path, 'r', encoding='utf-8') as fin:
|
83
90
|
template = fin.read()
|
84
|
-
annotations = skypilot_config.
|
85
|
-
|
86
|
-
|
87
|
-
('
|
91
|
+
annotations = skypilot_config.get_effective_region_config(
|
92
|
+
cloud='kubernetes',
|
93
|
+
region=context,
|
94
|
+
keys=('custom_metadata', 'annotations'),
|
95
|
+
default_value={})
|
96
|
+
labels = skypilot_config.get_effective_region_config(
|
97
|
+
cloud='kubernetes',
|
98
|
+
region=context,
|
99
|
+
keys=('custom_metadata', 'labels'),
|
100
|
+
default_value={})
|
88
101
|
j2_template = jinja2.Template(template)
|
89
102
|
cont = j2_template.render(
|
90
103
|
namespace=namespace,
|
@@ -99,10 +112,10 @@ def fill_loadbalancer_template(namespace: str, service_name: str,
|
|
99
112
|
return content
|
100
113
|
|
101
114
|
|
102
|
-
def fill_ingress_template(namespace: str,
|
103
|
-
|
104
|
-
|
105
|
-
selector_value: str) -> Dict:
|
115
|
+
def fill_ingress_template(namespace: str, context: Optional[str],
|
116
|
+
service_details: List[Tuple[str, int,
|
117
|
+
str]], ingress_name: str,
|
118
|
+
selector_key: str, selector_value: str) -> Dict:
|
106
119
|
template_path = os.path.join(sky.__root_dir__, 'templates',
|
107
120
|
_INGRESS_TEMPLATE_NAME)
|
108
121
|
if not os.path.exists(template_path):
|
@@ -110,10 +123,16 @@ def fill_ingress_template(namespace: str, service_details: List[Tuple[str, int,
|
|
110
123
|
f'Template "{_INGRESS_TEMPLATE_NAME}" does not exist.')
|
111
124
|
with open(template_path, 'r', encoding='utf-8') as fin:
|
112
125
|
template = fin.read()
|
113
|
-
annotations = skypilot_config.
|
114
|
-
|
115
|
-
|
116
|
-
('
|
126
|
+
annotations = skypilot_config.get_effective_region_config(
|
127
|
+
cloud='kubernetes',
|
128
|
+
region=context,
|
129
|
+
keys=('custom_metadata', 'annotations'),
|
130
|
+
default_value={})
|
131
|
+
labels = skypilot_config.get_effective_region_config(
|
132
|
+
cloud='kubernetes',
|
133
|
+
region=context,
|
134
|
+
keys=('custom_metadata', 'labels'),
|
135
|
+
default_value={})
|
117
136
|
j2_template = jinja2.Template(template)
|
118
137
|
cont = j2_template.render(
|
119
138
|
namespace=namespace,
|
@@ -1190,7 +1190,11 @@ def get_accelerator_label_key_values(
|
|
1190
1190
|
context_display_name = common_utils.removeprefix(
|
1191
1191
|
context, 'ssh-') if (context and is_ssh_node_pool) else context
|
1192
1192
|
|
1193
|
-
autoscaler_type =
|
1193
|
+
autoscaler_type = skypilot_config.get_effective_region_config(
|
1194
|
+
cloud='kubernetes',
|
1195
|
+
region=context,
|
1196
|
+
keys=('autoscaler',),
|
1197
|
+
default_value=None)
|
1194
1198
|
if autoscaler_type is not None:
|
1195
1199
|
# If autoscaler is set in config.yaml, override the label key and value
|
1196
1200
|
# to the autoscaler's format and bypass the GPU checks.
|
@@ -1595,9 +1599,11 @@ def is_kubeconfig_exec_auth(
|
|
1595
1599
|
user_details = next(
|
1596
1600
|
user for user in user_details if user['name'] == target_username)
|
1597
1601
|
|
1598
|
-
remote_identity = skypilot_config.
|
1599
|
-
|
1600
|
-
|
1602
|
+
remote_identity = skypilot_config.get_effective_region_config(
|
1603
|
+
cloud='kubernetes',
|
1604
|
+
region=context,
|
1605
|
+
keys=('remote_identity',),
|
1606
|
+
default_value=schemas.get_default_remote_identity('kubernetes'))
|
1601
1607
|
if ('exec' in user_details.get('user', {}) and remote_identity
|
1602
1608
|
== schemas.RemoteIdentityOptions.LOCAL_CREDENTIALS.value):
|
1603
1609
|
ctx_name = context_obj['name']
|
@@ -2078,7 +2084,7 @@ def setup_ssh_jump_svc(ssh_jump_name: str, namespace: str,
|
|
2078
2084
|
content = fill_ssh_jump_template('', '', ssh_jump_name, service_type.value)
|
2079
2085
|
|
2080
2086
|
# Add custom metadata from config
|
2081
|
-
merge_custom_metadata(content['service_spec']['metadata'])
|
2087
|
+
merge_custom_metadata(content['service_spec']['metadata'], context)
|
2082
2088
|
|
2083
2089
|
# Create service
|
2084
2090
|
try:
|
@@ -2158,7 +2164,7 @@ def setup_ssh_jump_pod(ssh_jump_name: str, ssh_jump_image: str,
|
|
2158
2164
|
|
2159
2165
|
# Add custom metadata to all objects
|
2160
2166
|
for object_type in content.keys():
|
2161
|
-
merge_custom_metadata(content[object_type]['metadata'])
|
2167
|
+
merge_custom_metadata(content[object_type]['metadata'], context)
|
2162
2168
|
|
2163
2169
|
# ServiceAccount
|
2164
2170
|
try:
|
@@ -2370,7 +2376,7 @@ def check_port_forward_mode_dependencies(
|
|
2370
2376
|
return None
|
2371
2377
|
|
2372
2378
|
|
2373
|
-
def get_endpoint_debug_message() -> str:
|
2379
|
+
def get_endpoint_debug_message(context: Optional[str] = None) -> str:
|
2374
2380
|
""" Returns a string message for user to debug Kubernetes port opening
|
2375
2381
|
|
2376
2382
|
Polls the configured ports mode on Kubernetes to produce an
|
@@ -2378,7 +2384,7 @@ def get_endpoint_debug_message() -> str:
|
|
2378
2384
|
|
2379
2385
|
Also checks if the
|
2380
2386
|
"""
|
2381
|
-
port_mode = network_utils.get_port_mode()
|
2387
|
+
port_mode = network_utils.get_port_mode(None, context)
|
2382
2388
|
if port_mode == kubernetes_enums.KubernetesPortMode.INGRESS:
|
2383
2389
|
endpoint_type = 'Ingress'
|
2384
2390
|
debug_cmd = 'kubectl describe ingress && kubectl describe ingressclass'
|
@@ -2396,6 +2402,7 @@ def combine_pod_config_fields(
|
|
2396
2402
|
cluster_yaml_path: str,
|
2397
2403
|
cluster_config_overrides: Dict[str, Any],
|
2398
2404
|
cloud: Optional[clouds.Cloud] = None,
|
2405
|
+
context: Optional[str] = None,
|
2399
2406
|
) -> None:
|
2400
2407
|
"""Adds or updates fields in the YAML with fields from the
|
2401
2408
|
~/.sky/config.yaml's kubernetes.pod_spec dict.
|
@@ -2438,19 +2445,28 @@ def combine_pod_config_fields(
|
|
2438
2445
|
with open(cluster_yaml_path, 'r', encoding='utf-8') as f:
|
2439
2446
|
yaml_content = f.read()
|
2440
2447
|
yaml_obj = yaml.safe_load(yaml_content)
|
2441
|
-
# We don't use override_configs in `
|
2448
|
+
# We don't use override_configs in `get_effective_region_config`, as merging
|
2442
2449
|
# the pod config requires special handling.
|
2443
2450
|
if isinstance(cloud, clouds.SSH):
|
2444
|
-
kubernetes_config = skypilot_config.
|
2445
|
-
|
2446
|
-
|
2447
|
-
|
2448
|
-
'
|
2451
|
+
kubernetes_config = skypilot_config.get_effective_region_config(
|
2452
|
+
cloud='ssh', region=None, keys=('pod_config',), default_value={})
|
2453
|
+
override_pod_config = config_utils.get_cloud_config_value_from_dict(
|
2454
|
+
dict_config=cluster_config_overrides,
|
2455
|
+
cloud='ssh',
|
2456
|
+
keys=('pod_config',),
|
2457
|
+
default_value={})
|
2449
2458
|
else:
|
2450
|
-
kubernetes_config = skypilot_config.
|
2451
|
-
|
2452
|
-
|
2453
|
-
|
2459
|
+
kubernetes_config = skypilot_config.get_effective_region_config(
|
2460
|
+
cloud='kubernetes',
|
2461
|
+
region=context,
|
2462
|
+
keys=('pod_config',),
|
2463
|
+
default_value={})
|
2464
|
+
override_pod_config = config_utils.get_cloud_config_value_from_dict(
|
2465
|
+
dict_config=cluster_config_overrides,
|
2466
|
+
cloud='kubernetes',
|
2467
|
+
region=context,
|
2468
|
+
keys=('pod_config',),
|
2469
|
+
default_value={})
|
2454
2470
|
config_utils.merge_k8s_configs(kubernetes_config, override_pod_config)
|
2455
2471
|
|
2456
2472
|
# Merge the kubernetes config into the YAML for both head and worker nodes.
|
@@ -2462,7 +2478,8 @@ def combine_pod_config_fields(
|
|
2462
2478
|
common_utils.dump_yaml(cluster_yaml_path, yaml_obj)
|
2463
2479
|
|
2464
2480
|
|
2465
|
-
def combine_metadata_fields(cluster_yaml_path: str
|
2481
|
+
def combine_metadata_fields(cluster_yaml_path: str,
|
2482
|
+
context: Optional[str] = None) -> None:
|
2466
2483
|
"""Updates the metadata for all Kubernetes objects created by SkyPilot with
|
2467
2484
|
fields from the ~/.sky/config.yaml's kubernetes.custom_metadata dict.
|
2468
2485
|
|
@@ -2472,8 +2489,11 @@ def combine_metadata_fields(cluster_yaml_path: str) -> None:
|
|
2472
2489
|
with open(cluster_yaml_path, 'r', encoding='utf-8') as f:
|
2473
2490
|
yaml_content = f.read()
|
2474
2491
|
yaml_obj = yaml.safe_load(yaml_content)
|
2475
|
-
custom_metadata = skypilot_config.
|
2476
|
-
|
2492
|
+
custom_metadata = skypilot_config.get_effective_region_config(
|
2493
|
+
cloud='kubernetes',
|
2494
|
+
region=context,
|
2495
|
+
keys=('custom_metadata',),
|
2496
|
+
default_value={})
|
2477
2497
|
|
2478
2498
|
# List of objects in the cluster YAML to be updated
|
2479
2499
|
combination_destinations = [
|
@@ -2496,13 +2516,17 @@ def combine_metadata_fields(cluster_yaml_path: str) -> None:
|
|
2496
2516
|
common_utils.dump_yaml(cluster_yaml_path, yaml_obj)
|
2497
2517
|
|
2498
2518
|
|
2499
|
-
def merge_custom_metadata(original_metadata: Dict[str, Any]
|
2519
|
+
def merge_custom_metadata(original_metadata: Dict[str, Any],
|
2520
|
+
context: Optional[str] = None) -> None:
|
2500
2521
|
"""Merges original metadata with custom_metadata from config
|
2501
2522
|
|
2502
2523
|
Merge is done in-place, so return is not required
|
2503
2524
|
"""
|
2504
|
-
custom_metadata = skypilot_config.
|
2505
|
-
|
2525
|
+
custom_metadata = skypilot_config.get_effective_region_config(
|
2526
|
+
cloud='kubernetes',
|
2527
|
+
region=context,
|
2528
|
+
keys=('custom_metadata',),
|
2529
|
+
default_value={})
|
2506
2530
|
config_utils.merge_k8s_configs(original_metadata, custom_metadata)
|
2507
2531
|
|
2508
2532
|
|
@@ -2556,7 +2580,7 @@ def create_namespace(namespace: str, context: Optional[str]) -> None:
|
|
2556
2580
|
return
|
2557
2581
|
|
2558
2582
|
ns_metadata = dict(name=namespace, labels={'parent': 'skypilot'})
|
2559
|
-
merge_custom_metadata(ns_metadata)
|
2583
|
+
merge_custom_metadata(ns_metadata, context)
|
2560
2584
|
namespace_obj = kubernetes_client.V1Namespace(metadata=ns_metadata)
|
2561
2585
|
try:
|
2562
2586
|
kubernetes.core_api(context).create_namespace(namespace_obj)
|
@@ -2582,15 +2606,14 @@ def get_head_pod_name(cluster_name_on_cloud: str):
|
|
2582
2606
|
return f'{cluster_name_on_cloud}-head'
|
2583
2607
|
|
2584
2608
|
|
2585
|
-
def
|
2586
|
-
|
2587
|
-
|
2588
|
-
|
2589
|
-
|
2590
|
-
|
2591
|
-
|
2592
|
-
|
2593
|
-
return autoscaler_type
|
2609
|
+
def get_custom_config_k8s_contexts() -> List[str]:
|
2610
|
+
"""Returns the list of context names from the config"""
|
2611
|
+
contexts = skypilot_config.get_effective_region_config(
|
2612
|
+
cloud='kubernetes',
|
2613
|
+
region=None,
|
2614
|
+
keys=('context_configs',),
|
2615
|
+
default_value={})
|
2616
|
+
return [*contexts] or []
|
2594
2617
|
|
2595
2618
|
|
2596
2619
|
# Mapping of known spot label keys and values for different cluster types
|
@@ -2602,6 +2625,21 @@ SPOT_LABEL_MAP = {
|
|
2602
2625
|
}
|
2603
2626
|
|
2604
2627
|
|
2628
|
+
def get_autoscaler_type(
|
2629
|
+
context: Optional[str] = None
|
2630
|
+
) -> Optional[kubernetes_enums.KubernetesAutoscalerType]:
|
2631
|
+
"""Returns the autoscaler type by reading from config"""
|
2632
|
+
autoscaler_type = skypilot_config.get_effective_region_config(
|
2633
|
+
cloud='kubernetes',
|
2634
|
+
region=context,
|
2635
|
+
keys=('autoscaler',),
|
2636
|
+
default_value=None)
|
2637
|
+
if autoscaler_type is not None:
|
2638
|
+
autoscaler_type = kubernetes_enums.KubernetesAutoscalerType(
|
2639
|
+
autoscaler_type)
|
2640
|
+
return autoscaler_type
|
2641
|
+
|
2642
|
+
|
2605
2643
|
def get_spot_label(
|
2606
2644
|
context: Optional[str] = None) -> Tuple[Optional[str], Optional[str]]:
|
2607
2645
|
"""Get the spot label key and value for using spot instances, if supported.
|
@@ -2625,7 +2663,7 @@ def get_spot_label(
|
|
2625
2663
|
|
2626
2664
|
# Check if autoscaler is configured. Allow spot instances if autoscaler type
|
2627
2665
|
# is known to support spot instances.
|
2628
|
-
autoscaler_type = get_autoscaler_type()
|
2666
|
+
autoscaler_type = get_autoscaler_type(context=context)
|
2629
2667
|
if autoscaler_type == kubernetes_enums.KubernetesAutoscalerType.GKE:
|
2630
2668
|
return SPOT_LABEL_MAP[autoscaler_type.value]
|
2631
2669
|
|
sky/provision/nebius/utils.py
CHANGED
@@ -40,8 +40,11 @@ def get_project_by_region(region: str) -> str:
|
|
40
40
|
parent_id=nebius.get_tenant_id())).wait()
|
41
41
|
|
42
42
|
# Check is there project if in config
|
43
|
-
project_id = skypilot_config.
|
44
|
-
|
43
|
+
project_id = skypilot_config.get_effective_region_config(
|
44
|
+
cloud='nebius',
|
45
|
+
region=None,
|
46
|
+
keys=(region, 'project_id'),
|
47
|
+
default_value=None)
|
45
48
|
if project_id is not None:
|
46
49
|
return project_id
|
47
50
|
for project in projects.items:
|
@@ -184,8 +187,11 @@ def launch(cluster_name_on_cloud: str,
|
|
184
187
|
# https://docs.nebius.com/compute/clusters/gpu
|
185
188
|
if platform in nebius_constants.INFINIBAND_INSTANCE_PLATFORMS:
|
186
189
|
if preset == '8gpu-128vcpu-1600gb':
|
187
|
-
fabric = skypilot_config.
|
188
|
-
|
190
|
+
fabric = skypilot_config.get_effective_region_config(
|
191
|
+
cloud='nebius',
|
192
|
+
region=None,
|
193
|
+
keys=(region, 'fabric'),
|
194
|
+
default_value=None)
|
189
195
|
|
190
196
|
# Auto-select fabric if network_tier=best and no fabric configured
|
191
197
|
if (fabric is None and
|
sky/resources.py
CHANGED
@@ -1064,8 +1064,11 @@ class Resources:
|
|
1064
1064
|
regions = [r for r in regions if r.name in self._image_id]
|
1065
1065
|
|
1066
1066
|
# Filter the regions by the skypilot_config
|
1067
|
-
ssh_proxy_command_config = skypilot_config.
|
1068
|
-
|
1067
|
+
ssh_proxy_command_config = skypilot_config.get_effective_region_config(
|
1068
|
+
cloud=str(self._cloud).lower(),
|
1069
|
+
region=None,
|
1070
|
+
keys=('ssh_proxy_command',),
|
1071
|
+
default_value=None)
|
1069
1072
|
if (isinstance(ssh_proxy_command_config, str) or
|
1070
1073
|
ssh_proxy_command_config is None):
|
1071
1074
|
# All regions are valid as the regions are not specified for the
|
@@ -1550,8 +1553,11 @@ class Resources:
|
|
1550
1553
|
# to each cloud if any cloud supports reservations for spot.
|
1551
1554
|
return {}
|
1552
1555
|
specific_reservations = set(
|
1553
|
-
skypilot_config.
|
1554
|
-
|
1556
|
+
skypilot_config.get_effective_region_config(
|
1557
|
+
cloud=str(self.cloud).lower(),
|
1558
|
+
region=self.region,
|
1559
|
+
keys=('specific_reservations',),
|
1560
|
+
default_value=set()))
|
1555
1561
|
|
1556
1562
|
if isinstance(self.cloud, clouds.DummyCloud):
|
1557
1563
|
return self.cloud.get_reservations_available_resources(
|
sky/serve/client/sdk.py
CHANGED
@@ -74,12 +74,11 @@ def up(
|
|
74
74
|
task=dag_str,
|
75
75
|
service_name=service_name,
|
76
76
|
)
|
77
|
-
response =
|
78
|
-
|
77
|
+
response = server_common.make_authenticated_request(
|
78
|
+
'POST',
|
79
|
+
'/serve/up',
|
79
80
|
json=json.loads(body.model_dump_json()),
|
80
|
-
timeout=(5, None)
|
81
|
-
cookies=server_common.get_api_cookie_jar(),
|
82
|
-
)
|
81
|
+
timeout=(5, None))
|
83
82
|
return server_common.get_request_id(response)
|
84
83
|
|
85
84
|
|
@@ -136,12 +135,11 @@ def update(
|
|
136
135
|
mode=mode,
|
137
136
|
)
|
138
137
|
|
139
|
-
response =
|
140
|
-
|
138
|
+
response = server_common.make_authenticated_request(
|
139
|
+
'POST',
|
140
|
+
'/serve/update',
|
141
141
|
json=json.loads(body.model_dump_json()),
|
142
|
-
timeout=(5, None)
|
143
|
-
cookies=server_common.get_api_cookie_jar(),
|
144
|
-
)
|
142
|
+
timeout=(5, None))
|
145
143
|
return server_common.get_request_id(response)
|
146
144
|
|
147
145
|
|
@@ -178,12 +176,11 @@ def down(
|
|
178
176
|
all=all,
|
179
177
|
purge=purge,
|
180
178
|
)
|
181
|
-
response =
|
182
|
-
|
179
|
+
response = server_common.make_authenticated_request(
|
180
|
+
'POST',
|
181
|
+
'/serve/down',
|
183
182
|
json=json.loads(body.model_dump_json()),
|
184
|
-
timeout=(5, None)
|
185
|
-
cookies=server_common.get_api_cookie_jar(),
|
186
|
-
)
|
183
|
+
timeout=(5, None))
|
187
184
|
return server_common.get_request_id(response)
|
188
185
|
|
189
186
|
|
@@ -213,12 +210,11 @@ def terminate_replica(service_name: str, replica_id: int,
|
|
213
210
|
replica_id=replica_id,
|
214
211
|
purge=purge,
|
215
212
|
)
|
216
|
-
response =
|
217
|
-
|
213
|
+
response = server_common.make_authenticated_request(
|
214
|
+
'POST',
|
215
|
+
'/serve/terminate-replica',
|
218
216
|
json=json.loads(body.model_dump_json()),
|
219
|
-
timeout=(5, None)
|
220
|
-
cookies=server_common.get_api_cookie_jar(),
|
221
|
-
)
|
217
|
+
timeout=(5, None))
|
222
218
|
return server_common.get_request_id(response)
|
223
219
|
|
224
220
|
|
@@ -286,12 +282,11 @@ def status(
|
|
286
282
|
exceptions.ClusterNotUpError: if the sky serve controller is not up.
|
287
283
|
"""
|
288
284
|
body = payloads.ServeStatusBody(service_names=service_names,)
|
289
|
-
response =
|
290
|
-
|
285
|
+
response = server_common.make_authenticated_request(
|
286
|
+
'POST',
|
287
|
+
'/serve/status',
|
291
288
|
json=json.loads(body.model_dump_json()),
|
292
|
-
timeout=(5, None)
|
293
|
-
cookies=server_common.get_api_cookie_jar(),
|
294
|
-
)
|
289
|
+
timeout=(5, None))
|
295
290
|
return server_common.get_request_id(response)
|
296
291
|
|
297
292
|
|
@@ -373,13 +368,12 @@ def tail_logs(service_name: str,
|
|
373
368
|
replica_id=replica_id,
|
374
369
|
follow=follow,
|
375
370
|
)
|
376
|
-
response =
|
377
|
-
|
371
|
+
response = server_common.make_authenticated_request(
|
372
|
+
'POST',
|
373
|
+
'/serve/logs',
|
378
374
|
json=json.loads(body.model_dump_json()),
|
379
375
|
timeout=(5, None),
|
380
|
-
stream=True
|
381
|
-
cookies=server_common.get_api_cookie_jar(),
|
382
|
-
)
|
376
|
+
stream=True)
|
383
377
|
request_id = server_common.get_request_id(response)
|
384
378
|
return sdk.stream_response(request_id=request_id,
|
385
379
|
response=response,
|
@@ -436,11 +430,11 @@ def sync_down_logs(service_name: str,
|
|
436
430
|
targets=targets,
|
437
431
|
replica_ids=replica_ids,
|
438
432
|
)
|
439
|
-
response =
|
440
|
-
|
433
|
+
response = server_common.make_authenticated_request(
|
434
|
+
'POST',
|
435
|
+
'/serve/sync-down-logs',
|
441
436
|
json=json.loads(body.model_dump_json()),
|
442
|
-
timeout=(5, None)
|
443
|
-
)
|
437
|
+
timeout=(5, None))
|
444
438
|
remote_dir = sdk.stream_and_get(server_common.get_request_id(response))
|
445
439
|
|
446
440
|
# Download from API server paths to the client's local_dir
|
sky/server/common.py
CHANGED
@@ -27,6 +27,7 @@ from sky import exceptions
|
|
27
27
|
from sky import sky_logging
|
28
28
|
from sky import skypilot_config
|
29
29
|
from sky.adaptors import common as adaptors_common
|
30
|
+
from sky.client import service_account_auth
|
30
31
|
from sky.data import data_utils
|
31
32
|
from sky.server import constants as server_constants
|
32
33
|
from sky.server import rest
|
@@ -185,6 +186,53 @@ def get_cookies_from_response(
|
|
185
186
|
return cookies
|
186
187
|
|
187
188
|
|
189
|
+
def make_authenticated_request(method: str,
|
190
|
+
path: str,
|
191
|
+
server_url: Optional[str] = None,
|
192
|
+
retry: bool = True,
|
193
|
+
**kwargs) -> 'requests.Response':
|
194
|
+
"""Make an authenticated HTTP request to the API server.
|
195
|
+
|
196
|
+
Automatically handles service account token authentication or cookie-based
|
197
|
+
authentication based on what's available.
|
198
|
+
|
199
|
+
Args:
|
200
|
+
method: HTTP method (GET, POST, etc.)
|
201
|
+
path: API path (e.g., '/api/v1/status')
|
202
|
+
server_url: Server URL, defaults to configured server
|
203
|
+
**kwargs: Additional arguments to pass to requests
|
204
|
+
|
205
|
+
Returns:
|
206
|
+
requests.Response object
|
207
|
+
"""
|
208
|
+
if server_url is None:
|
209
|
+
server_url = get_server_url()
|
210
|
+
|
211
|
+
# Prepare headers and URL for service account authentication
|
212
|
+
headers = service_account_auth.get_service_account_headers()
|
213
|
+
|
214
|
+
# Merge with existing headers
|
215
|
+
if 'headers' in kwargs:
|
216
|
+
headers.update(kwargs['headers'])
|
217
|
+
kwargs['headers'] = headers
|
218
|
+
|
219
|
+
# Always use the same URL regardless of authentication type
|
220
|
+
# OAuth2 proxy will handle authentication based on headers
|
221
|
+
url = f'{server_url}/{path}' if not path.startswith(
|
222
|
+
'/') else f'{server_url}{path}'
|
223
|
+
|
224
|
+
# Use cookie authentication if no Bearer token present
|
225
|
+
if not headers.get('Authorization') and 'cookies' not in kwargs:
|
226
|
+
kwargs['cookies'] = get_api_cookie_jar()
|
227
|
+
|
228
|
+
# Make the request
|
229
|
+
if retry:
|
230
|
+
return rest.request(method, url, **kwargs)
|
231
|
+
else:
|
232
|
+
assert method == 'GET', 'Only GET requests can be done without retry'
|
233
|
+
return rest.request_without_retry(method, url, **kwargs)
|
234
|
+
|
235
|
+
|
188
236
|
@annotations.lru_cache(scope='global')
|
189
237
|
def get_server_url(host: Optional[str] = None) -> str:
|
190
238
|
endpoint = DEFAULT_SERVER_URL
|
@@ -243,9 +291,9 @@ def get_api_server_status(endpoint: Optional[str] = None) -> ApiServerInfo:
|
|
243
291
|
server_url = endpoint if endpoint is not None else get_server_url()
|
244
292
|
while time_out_try_count <= RETRY_COUNT_ON_TIMEOUT:
|
245
293
|
try:
|
246
|
-
response =
|
247
|
-
|
248
|
-
|
294
|
+
response = make_authenticated_request('GET',
|
295
|
+
'/api/health',
|
296
|
+
timeout=2.5)
|
249
297
|
except requests.exceptions.Timeout:
|
250
298
|
if time_out_try_count == RETRY_COUNT_ON_TIMEOUT:
|
251
299
|
return ApiServerInfo(status=ApiServerStatus.UNHEALTHY)
|
sky/server/constants.py
CHANGED
@@ -36,3 +36,6 @@ API_COOKIE_FILE_DEFAULT_LOCATION = '~/.sky/cookies.txt'
|
|
36
36
|
# The path to the dashboard build output
|
37
37
|
DASHBOARD_DIR = os.path.join(os.path.dirname(__file__), '..', 'dashboard',
|
38
38
|
'out')
|
39
|
+
|
40
|
+
# The interval (seconds) for the event to be restarted in the background.
|
41
|
+
DAEMON_RESTART_INTERVAL_SECONDS = 20
|
sky/server/requests/executor.py
CHANGED
@@ -268,6 +268,10 @@ def override_request_env_and_config(
|
|
268
268
|
user = models.User(id=request_body.env_vars[constants.USER_ID_ENV_VAR],
|
269
269
|
name=request_body.env_vars[constants.USER_ENV_VAR])
|
270
270
|
global_user_state.add_or_update_user(user)
|
271
|
+
# Refetch the user to get the latest user info, including the created_at
|
272
|
+
# field.
|
273
|
+
user = global_user_state.get_user(user.id)
|
274
|
+
|
271
275
|
# Force color to be enabled.
|
272
276
|
os.environ['CLICOLOR_FORCE'] = '1'
|
273
277
|
server_common.reload_for_new_request(
|
sky/server/requests/payloads.py
CHANGED
@@ -358,6 +358,39 @@ class UserImportBody(RequestBody):
|
|
358
358
|
csv_content: str
|
359
359
|
|
360
360
|
|
361
|
+
class ServiceAccountTokenCreateBody(RequestBody):
|
362
|
+
"""The request body for creating a service account token."""
|
363
|
+
token_name: str
|
364
|
+
expires_in_days: Optional[int] = None
|
365
|
+
|
366
|
+
|
367
|
+
class ServiceAccountTokenDeleteBody(RequestBody):
|
368
|
+
"""The request body for deleting a service account token."""
|
369
|
+
token_id: str
|
370
|
+
|
371
|
+
|
372
|
+
class UpdateRoleBody(RequestBody):
|
373
|
+
"""The request body for updating a user role."""
|
374
|
+
role: str
|
375
|
+
|
376
|
+
|
377
|
+
class ServiceAccountTokenRoleBody(RequestBody):
|
378
|
+
"""The request body for getting a service account token role."""
|
379
|
+
token_id: str
|
380
|
+
|
381
|
+
|
382
|
+
class ServiceAccountTokenUpdateRoleBody(RequestBody):
|
383
|
+
"""The request body for updating a service account token role."""
|
384
|
+
token_id: str
|
385
|
+
role: str
|
386
|
+
|
387
|
+
|
388
|
+
class ServiceAccountTokenRotateBody(RequestBody):
|
389
|
+
"""The request body for rotating a service account token."""
|
390
|
+
token_id: str
|
391
|
+
expires_in_days: Optional[int] = None
|
392
|
+
|
393
|
+
|
361
394
|
class DownloadBody(RequestBody):
|
362
395
|
"""The request body for the download endpoint."""
|
363
396
|
folder_paths: List[str]
|