skypilot-nightly 1.0.0.dev20250626__py3-none-any.whl → 1.0.0.dev20250628__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. sky/__init__.py +2 -2
  2. sky/adaptors/kubernetes.py +7 -0
  3. sky/adaptors/nebius.py +2 -2
  4. sky/admin_policy.py +27 -17
  5. sky/authentication.py +12 -5
  6. sky/backends/backend_utils.py +92 -26
  7. sky/check.py +5 -2
  8. sky/client/cli/command.py +38 -6
  9. sky/client/sdk.py +217 -167
  10. sky/client/service_account_auth.py +47 -0
  11. sky/clouds/aws.py +10 -4
  12. sky/clouds/azure.py +5 -2
  13. sky/clouds/cloud.py +5 -2
  14. sky/clouds/gcp.py +31 -18
  15. sky/clouds/kubernetes.py +54 -34
  16. sky/clouds/nebius.py +8 -2
  17. sky/clouds/ssh.py +5 -2
  18. sky/clouds/utils/aws_utils.py +10 -4
  19. sky/clouds/utils/gcp_utils.py +22 -7
  20. sky/clouds/utils/oci_utils.py +62 -14
  21. sky/dashboard/out/404.html +1 -1
  22. sky/dashboard/out/_next/static/{bs6UB9V4Jq10TIZ5x-kBK → ZYLkkWSYZjJhLVsObh20y}/_buildManifest.js +1 -1
  23. sky/dashboard/out/_next/static/chunks/43-f38a531f6692f281.js +1 -0
  24. sky/dashboard/out/_next/static/chunks/601-111d06d9ded11d00.js +1 -0
  25. sky/dashboard/out/_next/static/chunks/{616-d6128fa9e7cae6e6.js → 616-50a620ac4a23deb4.js} +1 -1
  26. sky/dashboard/out/_next/static/chunks/691.fd9292250ab089af.js +21 -0
  27. sky/dashboard/out/_next/static/chunks/{785.dc2686c3c1235554.js → 785.3446c12ffdf3d188.js} +1 -1
  28. sky/dashboard/out/_next/static/chunks/871-e547295e7e21399c.js +6 -0
  29. sky/dashboard/out/_next/static/chunks/937.72796f7afe54075b.js +1 -0
  30. sky/dashboard/out/_next/static/chunks/938-0a770415b5ce4649.js +1 -0
  31. sky/dashboard/out/_next/static/chunks/982.d7bd80ed18cad4cc.js +1 -0
  32. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-21080826c6095f21.js +6 -0
  33. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-77d4816945b04793.js +6 -0
  34. sky/dashboard/out/_next/static/chunks/pages/{clusters-f119a5630a1efd61.js → clusters-65b2c90320b8afb8.js} +1 -1
  35. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-64bdc0b2d3a44709.js +16 -0
  36. sky/dashboard/out/_next/static/chunks/pages/{jobs-0a5695ff3075d94a.js → jobs-df7407b5e37d3750.js} +1 -1
  37. sky/dashboard/out/_next/static/chunks/pages/{users-4978cbb093e141e7.js → users-d7684eaa04c4f58f.js} +1 -1
  38. sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-cb7e720b739de53a.js → [name]-04e1b3ad4207b1e9.js} +1 -1
  39. sky/dashboard/out/_next/static/chunks/pages/{workspaces-50e230828730cfb3.js → workspaces-c470366a6179f16e.js} +1 -1
  40. sky/dashboard/out/_next/static/chunks/{webpack-08fdb9e6070127fc.js → webpack-75a3310ef922a299.js} +1 -1
  41. sky/dashboard/out/_next/static/css/605ac87514049058.css +3 -0
  42. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  43. sky/dashboard/out/clusters/[cluster].html +1 -1
  44. sky/dashboard/out/clusters.html +1 -1
  45. sky/dashboard/out/config.html +1 -1
  46. sky/dashboard/out/index.html +1 -1
  47. sky/dashboard/out/infra/[context].html +1 -1
  48. sky/dashboard/out/infra.html +1 -1
  49. sky/dashboard/out/jobs/[job].html +1 -1
  50. sky/dashboard/out/jobs.html +1 -1
  51. sky/dashboard/out/users.html +1 -1
  52. sky/dashboard/out/volumes.html +1 -1
  53. sky/dashboard/out/workspace/new.html +1 -1
  54. sky/dashboard/out/workspaces/[name].html +1 -1
  55. sky/dashboard/out/workspaces.html +1 -1
  56. sky/data/storage.py +8 -3
  57. sky/global_user_state.py +257 -9
  58. sky/jobs/client/sdk.py +20 -25
  59. sky/models.py +16 -0
  60. sky/provision/kubernetes/config.py +1 -1
  61. sky/provision/kubernetes/instance.py +7 -4
  62. sky/provision/kubernetes/network.py +15 -9
  63. sky/provision/kubernetes/network_utils.py +42 -23
  64. sky/provision/kubernetes/utils.py +73 -35
  65. sky/provision/nebius/utils.py +10 -4
  66. sky/resources.py +10 -4
  67. sky/serve/client/sdk.py +28 -34
  68. sky/server/common.py +51 -3
  69. sky/server/constants.py +3 -0
  70. sky/server/requests/executor.py +4 -0
  71. sky/server/requests/payloads.py +33 -0
  72. sky/server/requests/requests.py +19 -0
  73. sky/server/rest.py +6 -15
  74. sky/server/server.py +121 -6
  75. sky/skylet/constants.py +6 -0
  76. sky/skypilot_config.py +32 -4
  77. sky/users/permission.py +29 -0
  78. sky/users/server.py +384 -5
  79. sky/users/token_service.py +196 -0
  80. sky/utils/common_utils.py +4 -5
  81. sky/utils/config_utils.py +41 -0
  82. sky/utils/controller_utils.py +5 -1
  83. sky/utils/resource_checker.py +153 -0
  84. sky/utils/resources_utils.py +12 -4
  85. sky/utils/schemas.py +87 -60
  86. sky/utils/subprocess_utils.py +2 -6
  87. sky/workspaces/core.py +9 -117
  88. {skypilot_nightly-1.0.0.dev20250626.dist-info → skypilot_nightly-1.0.0.dev20250628.dist-info}/METADATA +1 -1
  89. {skypilot_nightly-1.0.0.dev20250626.dist-info → skypilot_nightly-1.0.0.dev20250628.dist-info}/RECORD +95 -92
  90. sky/dashboard/out/_next/static/chunks/43-36177d00f6956ab2.js +0 -1
  91. sky/dashboard/out/_next/static/chunks/690.55f9eed3be903f56.js +0 -16
  92. sky/dashboard/out/_next/static/chunks/871-3db673be3ee3750b.js +0 -6
  93. sky/dashboard/out/_next/static/chunks/937.3759f538f11a0953.js +0 -1
  94. sky/dashboard/out/_next/static/chunks/938-068520cc11738deb.js +0 -1
  95. sky/dashboard/out/_next/static/chunks/973-81b2d057178adb76.js +0 -1
  96. sky/dashboard/out/_next/static/chunks/982.1b61658204416b0f.js +0 -1
  97. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-aff040d7bc5d0086.js +0 -6
  98. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-8040f2483897ed0c.js +0 -6
  99. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-e4b23128db0774cd.js +0 -16
  100. sky/dashboard/out/_next/static/css/52082cf558ec9705.css +0 -3
  101. /sky/dashboard/out/_next/static/{bs6UB9V4Jq10TIZ5x-kBK → ZYLkkWSYZjJhLVsObh20y}/_ssgManifest.js +0 -0
  102. /sky/dashboard/out/_next/static/chunks/pages/{_app-9a3ce3170d2edcec.js → _app-050a9e637b057b24.js} +0 -0
  103. {skypilot_nightly-1.0.0.dev20250626.dist-info → skypilot_nightly-1.0.0.dev20250628.dist-info}/WHEEL +0 -0
  104. {skypilot_nightly-1.0.0.dev20250626.dist-info → skypilot_nightly-1.0.0.dev20250628.dist-info}/entry_points.txt +0 -0
  105. {skypilot_nightly-1.0.0.dev20250626.dist-info → skypilot_nightly-1.0.0.dev20250628.dist-info}/licenses/LICENSE +0 -0
  106. {skypilot_nightly-1.0.0.dev20250626.dist-info → skypilot_nightly-1.0.0.dev20250628.dist-info}/top_level.txt +0 -0
@@ -28,7 +28,8 @@ _LOADBALANCER_TEMPLATE_NAME = 'kubernetes-loadbalancer.yml.j2'
28
28
 
29
29
 
30
30
  def get_port_mode(
31
- mode_str: Optional[str] = None) -> kubernetes_enums.KubernetesPortMode:
31
+ mode_str: Optional[str],
32
+ context: Optional[str]) -> kubernetes_enums.KubernetesPortMode:
32
33
  """Get the port mode from the provider config."""
33
34
 
34
35
  curr_kube_config = kubernetes_utils.get_current_kube_config_context_name()
@@ -38,9 +39,11 @@ def get_port_mode(
38
39
  # If running in kind (`sky local up`), use ingress mode
39
40
  return kubernetes_enums.KubernetesPortMode.INGRESS
40
41
 
41
- mode_str = mode_str or skypilot_config.get_nested(
42
- ('kubernetes', 'ports'),
43
- kubernetes_enums.KubernetesPortMode.LOADBALANCER.value)
42
+ mode_str = mode_str or skypilot_config.get_effective_region_config(
43
+ cloud='kubernetes',
44
+ region=context,
45
+ keys=('ports',),
46
+ default_value=kubernetes_enums.KubernetesPortMode.LOADBALANCER.value)
44
47
  try:
45
48
  port_mode = kubernetes_enums.KubernetesPortMode(mode_str)
46
49
  except ValueError as e:
@@ -54,12 +57,16 @@ def get_port_mode(
54
57
 
55
58
 
56
59
  def get_networking_mode(
57
- mode_str: Optional[str] = None
60
+ mode_str: Optional[str],
61
+ context: Optional[str],
58
62
  ) -> kubernetes_enums.KubernetesNetworkingMode:
59
63
  """Get the networking mode from the provider config."""
60
- mode_str = mode_str or skypilot_config.get_nested(
61
- ('kubernetes', 'networking_mode'),
62
- kubernetes_enums.KubernetesNetworkingMode.PORTFORWARD.value)
64
+ mode_str = mode_str or skypilot_config.get_effective_region_config(
65
+ cloud='kubernetes',
66
+ region=context,
67
+ keys=('networking_mode',),
68
+ default_value=kubernetes_enums.KubernetesNetworkingMode.PORTFORWARD.
69
+ value)
63
70
  try:
64
71
  networking_mode = kubernetes_enums.KubernetesNetworkingMode.from_str(
65
72
  mode_str)
@@ -70,9 +77,9 @@ def get_networking_mode(
70
77
  return networking_mode
71
78
 
72
79
 
73
- def fill_loadbalancer_template(namespace: str, service_name: str,
74
- ports: List[int], selector_key: str,
75
- selector_value: str) -> Dict:
80
+ def fill_loadbalancer_template(namespace: str, context: Optional[str],
81
+ service_name: str, ports: List[int],
82
+ selector_key: str, selector_value: str) -> Dict:
76
83
  template_path = os.path.join(sky.__root_dir__, 'templates',
77
84
  _LOADBALANCER_TEMPLATE_NAME)
78
85
  if not os.path.exists(template_path):
@@ -81,10 +88,16 @@ def fill_loadbalancer_template(namespace: str, service_name: str,
81
88
 
82
89
  with open(template_path, 'r', encoding='utf-8') as fin:
83
90
  template = fin.read()
84
- annotations = skypilot_config.get_nested(
85
- ('kubernetes', 'custom_metadata', 'annotations'), {})
86
- labels = skypilot_config.get_nested(
87
- ('kubernetes', 'custom_metadata', 'labels'), {})
91
+ annotations = skypilot_config.get_effective_region_config(
92
+ cloud='kubernetes',
93
+ region=context,
94
+ keys=('custom_metadata', 'annotations'),
95
+ default_value={})
96
+ labels = skypilot_config.get_effective_region_config(
97
+ cloud='kubernetes',
98
+ region=context,
99
+ keys=('custom_metadata', 'labels'),
100
+ default_value={})
88
101
  j2_template = jinja2.Template(template)
89
102
  cont = j2_template.render(
90
103
  namespace=namespace,
@@ -99,10 +112,10 @@ def fill_loadbalancer_template(namespace: str, service_name: str,
99
112
  return content
100
113
 
101
114
 
102
- def fill_ingress_template(namespace: str, service_details: List[Tuple[str, int,
103
- str]],
104
- ingress_name: str, selector_key: str,
105
- selector_value: str) -> Dict:
115
+ def fill_ingress_template(namespace: str, context: Optional[str],
116
+ service_details: List[Tuple[str, int,
117
+ str]], ingress_name: str,
118
+ selector_key: str, selector_value: str) -> Dict:
106
119
  template_path = os.path.join(sky.__root_dir__, 'templates',
107
120
  _INGRESS_TEMPLATE_NAME)
108
121
  if not os.path.exists(template_path):
@@ -110,10 +123,16 @@ def fill_ingress_template(namespace: str, service_details: List[Tuple[str, int,
110
123
  f'Template "{_INGRESS_TEMPLATE_NAME}" does not exist.')
111
124
  with open(template_path, 'r', encoding='utf-8') as fin:
112
125
  template = fin.read()
113
- annotations = skypilot_config.get_nested(
114
- ('kubernetes', 'custom_metadata', 'annotations'), {})
115
- labels = skypilot_config.get_nested(
116
- ('kubernetes', 'custom_metadata', 'labels'), {})
126
+ annotations = skypilot_config.get_effective_region_config(
127
+ cloud='kubernetes',
128
+ region=context,
129
+ keys=('custom_metadata', 'annotations'),
130
+ default_value={})
131
+ labels = skypilot_config.get_effective_region_config(
132
+ cloud='kubernetes',
133
+ region=context,
134
+ keys=('custom_metadata', 'labels'),
135
+ default_value={})
117
136
  j2_template = jinja2.Template(template)
118
137
  cont = j2_template.render(
119
138
  namespace=namespace,
@@ -1190,7 +1190,11 @@ def get_accelerator_label_key_values(
1190
1190
  context_display_name = common_utils.removeprefix(
1191
1191
  context, 'ssh-') if (context and is_ssh_node_pool) else context
1192
1192
 
1193
- autoscaler_type = get_autoscaler_type()
1193
+ autoscaler_type = skypilot_config.get_effective_region_config(
1194
+ cloud='kubernetes',
1195
+ region=context,
1196
+ keys=('autoscaler',),
1197
+ default_value=None)
1194
1198
  if autoscaler_type is not None:
1195
1199
  # If autoscaler is set in config.yaml, override the label key and value
1196
1200
  # to the autoscaler's format and bypass the GPU checks.
@@ -1595,9 +1599,11 @@ def is_kubeconfig_exec_auth(
1595
1599
  user_details = next(
1596
1600
  user for user in user_details if user['name'] == target_username)
1597
1601
 
1598
- remote_identity = skypilot_config.get_nested(
1599
- ('kubernetes', 'remote_identity'),
1600
- schemas.get_default_remote_identity('kubernetes'))
1602
+ remote_identity = skypilot_config.get_effective_region_config(
1603
+ cloud='kubernetes',
1604
+ region=context,
1605
+ keys=('remote_identity',),
1606
+ default_value=schemas.get_default_remote_identity('kubernetes'))
1601
1607
  if ('exec' in user_details.get('user', {}) and remote_identity
1602
1608
  == schemas.RemoteIdentityOptions.LOCAL_CREDENTIALS.value):
1603
1609
  ctx_name = context_obj['name']
@@ -2078,7 +2084,7 @@ def setup_ssh_jump_svc(ssh_jump_name: str, namespace: str,
2078
2084
  content = fill_ssh_jump_template('', '', ssh_jump_name, service_type.value)
2079
2085
 
2080
2086
  # Add custom metadata from config
2081
- merge_custom_metadata(content['service_spec']['metadata'])
2087
+ merge_custom_metadata(content['service_spec']['metadata'], context)
2082
2088
 
2083
2089
  # Create service
2084
2090
  try:
@@ -2158,7 +2164,7 @@ def setup_ssh_jump_pod(ssh_jump_name: str, ssh_jump_image: str,
2158
2164
 
2159
2165
  # Add custom metadata to all objects
2160
2166
  for object_type in content.keys():
2161
- merge_custom_metadata(content[object_type]['metadata'])
2167
+ merge_custom_metadata(content[object_type]['metadata'], context)
2162
2168
 
2163
2169
  # ServiceAccount
2164
2170
  try:
@@ -2370,7 +2376,7 @@ def check_port_forward_mode_dependencies(
2370
2376
  return None
2371
2377
 
2372
2378
 
2373
- def get_endpoint_debug_message() -> str:
2379
+ def get_endpoint_debug_message(context: Optional[str] = None) -> str:
2374
2380
  """ Returns a string message for user to debug Kubernetes port opening
2375
2381
 
2376
2382
  Polls the configured ports mode on Kubernetes to produce an
@@ -2378,7 +2384,7 @@ def get_endpoint_debug_message() -> str:
2378
2384
 
2379
2385
  Also checks if the
2380
2386
  """
2381
- port_mode = network_utils.get_port_mode()
2387
+ port_mode = network_utils.get_port_mode(None, context)
2382
2388
  if port_mode == kubernetes_enums.KubernetesPortMode.INGRESS:
2383
2389
  endpoint_type = 'Ingress'
2384
2390
  debug_cmd = 'kubectl describe ingress && kubectl describe ingressclass'
@@ -2396,6 +2402,7 @@ def combine_pod_config_fields(
2396
2402
  cluster_yaml_path: str,
2397
2403
  cluster_config_overrides: Dict[str, Any],
2398
2404
  cloud: Optional[clouds.Cloud] = None,
2405
+ context: Optional[str] = None,
2399
2406
  ) -> None:
2400
2407
  """Adds or updates fields in the YAML with fields from the
2401
2408
  ~/.sky/config.yaml's kubernetes.pod_spec dict.
@@ -2438,19 +2445,28 @@ def combine_pod_config_fields(
2438
2445
  with open(cluster_yaml_path, 'r', encoding='utf-8') as f:
2439
2446
  yaml_content = f.read()
2440
2447
  yaml_obj = yaml.safe_load(yaml_content)
2441
- # We don't use override_configs in `skypilot_config.get_nested`, as merging
2448
+ # We don't use override_configs in `get_effective_region_config`, as merging
2442
2449
  # the pod config requires special handling.
2443
2450
  if isinstance(cloud, clouds.SSH):
2444
- kubernetes_config = skypilot_config.get_nested(('ssh', 'pod_config'),
2445
- default_value={},
2446
- override_configs={})
2447
- override_pod_config = (cluster_config_overrides.get('ssh', {}).get(
2448
- 'pod_config', {}))
2451
+ kubernetes_config = skypilot_config.get_effective_region_config(
2452
+ cloud='ssh', region=None, keys=('pod_config',), default_value={})
2453
+ override_pod_config = config_utils.get_cloud_config_value_from_dict(
2454
+ dict_config=cluster_config_overrides,
2455
+ cloud='ssh',
2456
+ keys=('pod_config',),
2457
+ default_value={})
2449
2458
  else:
2450
- kubernetes_config = skypilot_config.get_nested(
2451
- ('kubernetes', 'pod_config'), default_value={}, override_configs={})
2452
- override_pod_config = (cluster_config_overrides.get(
2453
- 'kubernetes', {}).get('pod_config', {}))
2459
+ kubernetes_config = skypilot_config.get_effective_region_config(
2460
+ cloud='kubernetes',
2461
+ region=context,
2462
+ keys=('pod_config',),
2463
+ default_value={})
2464
+ override_pod_config = config_utils.get_cloud_config_value_from_dict(
2465
+ dict_config=cluster_config_overrides,
2466
+ cloud='kubernetes',
2467
+ region=context,
2468
+ keys=('pod_config',),
2469
+ default_value={})
2454
2470
  config_utils.merge_k8s_configs(kubernetes_config, override_pod_config)
2455
2471
 
2456
2472
  # Merge the kubernetes config into the YAML for both head and worker nodes.
@@ -2462,7 +2478,8 @@ def combine_pod_config_fields(
2462
2478
  common_utils.dump_yaml(cluster_yaml_path, yaml_obj)
2463
2479
 
2464
2480
 
2465
- def combine_metadata_fields(cluster_yaml_path: str) -> None:
2481
+ def combine_metadata_fields(cluster_yaml_path: str,
2482
+ context: Optional[str] = None) -> None:
2466
2483
  """Updates the metadata for all Kubernetes objects created by SkyPilot with
2467
2484
  fields from the ~/.sky/config.yaml's kubernetes.custom_metadata dict.
2468
2485
 
@@ -2472,8 +2489,11 @@ def combine_metadata_fields(cluster_yaml_path: str) -> None:
2472
2489
  with open(cluster_yaml_path, 'r', encoding='utf-8') as f:
2473
2490
  yaml_content = f.read()
2474
2491
  yaml_obj = yaml.safe_load(yaml_content)
2475
- custom_metadata = skypilot_config.get_nested(
2476
- ('kubernetes', 'custom_metadata'), {})
2492
+ custom_metadata = skypilot_config.get_effective_region_config(
2493
+ cloud='kubernetes',
2494
+ region=context,
2495
+ keys=('custom_metadata',),
2496
+ default_value={})
2477
2497
 
2478
2498
  # List of objects in the cluster YAML to be updated
2479
2499
  combination_destinations = [
@@ -2496,13 +2516,17 @@ def combine_metadata_fields(cluster_yaml_path: str) -> None:
2496
2516
  common_utils.dump_yaml(cluster_yaml_path, yaml_obj)
2497
2517
 
2498
2518
 
2499
- def merge_custom_metadata(original_metadata: Dict[str, Any]) -> None:
2519
+ def merge_custom_metadata(original_metadata: Dict[str, Any],
2520
+ context: Optional[str] = None) -> None:
2500
2521
  """Merges original metadata with custom_metadata from config
2501
2522
 
2502
2523
  Merge is done in-place, so return is not required
2503
2524
  """
2504
- custom_metadata = skypilot_config.get_nested(
2505
- ('kubernetes', 'custom_metadata'), {})
2525
+ custom_metadata = skypilot_config.get_effective_region_config(
2526
+ cloud='kubernetes',
2527
+ region=context,
2528
+ keys=('custom_metadata',),
2529
+ default_value={})
2506
2530
  config_utils.merge_k8s_configs(original_metadata, custom_metadata)
2507
2531
 
2508
2532
 
@@ -2556,7 +2580,7 @@ def create_namespace(namespace: str, context: Optional[str]) -> None:
2556
2580
  return
2557
2581
 
2558
2582
  ns_metadata = dict(name=namespace, labels={'parent': 'skypilot'})
2559
- merge_custom_metadata(ns_metadata)
2583
+ merge_custom_metadata(ns_metadata, context)
2560
2584
  namespace_obj = kubernetes_client.V1Namespace(metadata=ns_metadata)
2561
2585
  try:
2562
2586
  kubernetes.core_api(context).create_namespace(namespace_obj)
@@ -2582,15 +2606,14 @@ def get_head_pod_name(cluster_name_on_cloud: str):
2582
2606
  return f'{cluster_name_on_cloud}-head'
2583
2607
 
2584
2608
 
2585
- def get_autoscaler_type(
2586
- ) -> Optional[kubernetes_enums.KubernetesAutoscalerType]:
2587
- """Returns the autoscaler type by reading from config"""
2588
- autoscaler_type = skypilot_config.get_nested(('kubernetes', 'autoscaler'),
2589
- None)
2590
- if autoscaler_type is not None:
2591
- autoscaler_type = kubernetes_enums.KubernetesAutoscalerType(
2592
- autoscaler_type)
2593
- return autoscaler_type
2609
+ def get_custom_config_k8s_contexts() -> List[str]:
2610
+ """Returns the list of context names from the config"""
2611
+ contexts = skypilot_config.get_effective_region_config(
2612
+ cloud='kubernetes',
2613
+ region=None,
2614
+ keys=('context_configs',),
2615
+ default_value={})
2616
+ return [*contexts] or []
2594
2617
 
2595
2618
 
2596
2619
  # Mapping of known spot label keys and values for different cluster types
@@ -2602,6 +2625,21 @@ SPOT_LABEL_MAP = {
2602
2625
  }
2603
2626
 
2604
2627
 
2628
+ def get_autoscaler_type(
2629
+ context: Optional[str] = None
2630
+ ) -> Optional[kubernetes_enums.KubernetesAutoscalerType]:
2631
+ """Returns the autoscaler type by reading from config"""
2632
+ autoscaler_type = skypilot_config.get_effective_region_config(
2633
+ cloud='kubernetes',
2634
+ region=context,
2635
+ keys=('autoscaler',),
2636
+ default_value=None)
2637
+ if autoscaler_type is not None:
2638
+ autoscaler_type = kubernetes_enums.KubernetesAutoscalerType(
2639
+ autoscaler_type)
2640
+ return autoscaler_type
2641
+
2642
+
2605
2643
  def get_spot_label(
2606
2644
  context: Optional[str] = None) -> Tuple[Optional[str], Optional[str]]:
2607
2645
  """Get the spot label key and value for using spot instances, if supported.
@@ -2625,7 +2663,7 @@ def get_spot_label(
2625
2663
 
2626
2664
  # Check if autoscaler is configured. Allow spot instances if autoscaler type
2627
2665
  # is known to support spot instances.
2628
- autoscaler_type = get_autoscaler_type()
2666
+ autoscaler_type = get_autoscaler_type(context=context)
2629
2667
  if autoscaler_type == kubernetes_enums.KubernetesAutoscalerType.GKE:
2630
2668
  return SPOT_LABEL_MAP[autoscaler_type.value]
2631
2669
 
@@ -40,8 +40,11 @@ def get_project_by_region(region: str) -> str:
40
40
  parent_id=nebius.get_tenant_id())).wait()
41
41
 
42
42
  # Check is there project if in config
43
- project_id = skypilot_config.get_nested(('nebius', region, 'project_id'),
44
- None)
43
+ project_id = skypilot_config.get_effective_region_config(
44
+ cloud='nebius',
45
+ region=None,
46
+ keys=(region, 'project_id'),
47
+ default_value=None)
45
48
  if project_id is not None:
46
49
  return project_id
47
50
  for project in projects.items:
@@ -184,8 +187,11 @@ def launch(cluster_name_on_cloud: str,
184
187
  # https://docs.nebius.com/compute/clusters/gpu
185
188
  if platform in nebius_constants.INFINIBAND_INSTANCE_PLATFORMS:
186
189
  if preset == '8gpu-128vcpu-1600gb':
187
- fabric = skypilot_config.get_nested(('nebius', region, 'fabric'),
188
- None)
190
+ fabric = skypilot_config.get_effective_region_config(
191
+ cloud='nebius',
192
+ region=None,
193
+ keys=(region, 'fabric'),
194
+ default_value=None)
189
195
 
190
196
  # Auto-select fabric if network_tier=best and no fabric configured
191
197
  if (fabric is None and
sky/resources.py CHANGED
@@ -1064,8 +1064,11 @@ class Resources:
1064
1064
  regions = [r for r in regions if r.name in self._image_id]
1065
1065
 
1066
1066
  # Filter the regions by the skypilot_config
1067
- ssh_proxy_command_config = skypilot_config.get_nested(
1068
- (str(self._cloud).lower(), 'ssh_proxy_command'), None)
1067
+ ssh_proxy_command_config = skypilot_config.get_effective_region_config(
1068
+ cloud=str(self._cloud).lower(),
1069
+ region=None,
1070
+ keys=('ssh_proxy_command',),
1071
+ default_value=None)
1069
1072
  if (isinstance(ssh_proxy_command_config, str) or
1070
1073
  ssh_proxy_command_config is None):
1071
1074
  # All regions are valid as the regions are not specified for the
@@ -1550,8 +1553,11 @@ class Resources:
1550
1553
  # to each cloud if any cloud supports reservations for spot.
1551
1554
  return {}
1552
1555
  specific_reservations = set(
1553
- skypilot_config.get_nested(
1554
- (str(self.cloud).lower(), 'specific_reservations'), set()))
1556
+ skypilot_config.get_effective_region_config(
1557
+ cloud=str(self.cloud).lower(),
1558
+ region=self.region,
1559
+ keys=('specific_reservations',),
1560
+ default_value=set()))
1555
1561
 
1556
1562
  if isinstance(self.cloud, clouds.DummyCloud):
1557
1563
  return self.cloud.get_reservations_available_resources(
sky/serve/client/sdk.py CHANGED
@@ -74,12 +74,11 @@ def up(
74
74
  task=dag_str,
75
75
  service_name=service_name,
76
76
  )
77
- response = rest.post(
78
- f'{server_common.get_server_url()}/serve/up',
77
+ response = server_common.make_authenticated_request(
78
+ 'POST',
79
+ '/serve/up',
79
80
  json=json.loads(body.model_dump_json()),
80
- timeout=(5, None),
81
- cookies=server_common.get_api_cookie_jar(),
82
- )
81
+ timeout=(5, None))
83
82
  return server_common.get_request_id(response)
84
83
 
85
84
 
@@ -136,12 +135,11 @@ def update(
136
135
  mode=mode,
137
136
  )
138
137
 
139
- response = rest.post(
140
- f'{server_common.get_server_url()}/serve/update',
138
+ response = server_common.make_authenticated_request(
139
+ 'POST',
140
+ '/serve/update',
141
141
  json=json.loads(body.model_dump_json()),
142
- timeout=(5, None),
143
- cookies=server_common.get_api_cookie_jar(),
144
- )
142
+ timeout=(5, None))
145
143
  return server_common.get_request_id(response)
146
144
 
147
145
 
@@ -178,12 +176,11 @@ def down(
178
176
  all=all,
179
177
  purge=purge,
180
178
  )
181
- response = rest.post(
182
- f'{server_common.get_server_url()}/serve/down',
179
+ response = server_common.make_authenticated_request(
180
+ 'POST',
181
+ '/serve/down',
183
182
  json=json.loads(body.model_dump_json()),
184
- timeout=(5, None),
185
- cookies=server_common.get_api_cookie_jar(),
186
- )
183
+ timeout=(5, None))
187
184
  return server_common.get_request_id(response)
188
185
 
189
186
 
@@ -213,12 +210,11 @@ def terminate_replica(service_name: str, replica_id: int,
213
210
  replica_id=replica_id,
214
211
  purge=purge,
215
212
  )
216
- response = rest.post(
217
- f'{server_common.get_server_url()}/serve/terminate-replica',
213
+ response = server_common.make_authenticated_request(
214
+ 'POST',
215
+ '/serve/terminate-replica',
218
216
  json=json.loads(body.model_dump_json()),
219
- timeout=(5, None),
220
- cookies=server_common.get_api_cookie_jar(),
221
- )
217
+ timeout=(5, None))
222
218
  return server_common.get_request_id(response)
223
219
 
224
220
 
@@ -286,12 +282,11 @@ def status(
286
282
  exceptions.ClusterNotUpError: if the sky serve controller is not up.
287
283
  """
288
284
  body = payloads.ServeStatusBody(service_names=service_names,)
289
- response = rest.post(
290
- f'{server_common.get_server_url()}/serve/status',
285
+ response = server_common.make_authenticated_request(
286
+ 'POST',
287
+ '/serve/status',
291
288
  json=json.loads(body.model_dump_json()),
292
- timeout=(5, None),
293
- cookies=server_common.get_api_cookie_jar(),
294
- )
289
+ timeout=(5, None))
295
290
  return server_common.get_request_id(response)
296
291
 
297
292
 
@@ -373,13 +368,12 @@ def tail_logs(service_name: str,
373
368
  replica_id=replica_id,
374
369
  follow=follow,
375
370
  )
376
- response = rest.post(
377
- f'{server_common.get_server_url()}/serve/logs',
371
+ response = server_common.make_authenticated_request(
372
+ 'POST',
373
+ '/serve/logs',
378
374
  json=json.loads(body.model_dump_json()),
379
375
  timeout=(5, None),
380
- stream=True,
381
- cookies=server_common.get_api_cookie_jar(),
382
- )
376
+ stream=True)
383
377
  request_id = server_common.get_request_id(response)
384
378
  return sdk.stream_response(request_id=request_id,
385
379
  response=response,
@@ -436,11 +430,11 @@ def sync_down_logs(service_name: str,
436
430
  targets=targets,
437
431
  replica_ids=replica_ids,
438
432
  )
439
- response = rest.post(
440
- f'{server_common.get_server_url()}/serve/sync-down-logs',
433
+ response = server_common.make_authenticated_request(
434
+ 'POST',
435
+ '/serve/sync-down-logs',
441
436
  json=json.loads(body.model_dump_json()),
442
- timeout=(5, None),
443
- )
437
+ timeout=(5, None))
444
438
  remote_dir = sdk.stream_and_get(server_common.get_request_id(response))
445
439
 
446
440
  # Download from API server paths to the client's local_dir
sky/server/common.py CHANGED
@@ -27,6 +27,7 @@ from sky import exceptions
27
27
  from sky import sky_logging
28
28
  from sky import skypilot_config
29
29
  from sky.adaptors import common as adaptors_common
30
+ from sky.client import service_account_auth
30
31
  from sky.data import data_utils
31
32
  from sky.server import constants as server_constants
32
33
  from sky.server import rest
@@ -185,6 +186,53 @@ def get_cookies_from_response(
185
186
  return cookies
186
187
 
187
188
 
189
+ def make_authenticated_request(method: str,
190
+ path: str,
191
+ server_url: Optional[str] = None,
192
+ retry: bool = True,
193
+ **kwargs) -> 'requests.Response':
194
+ """Make an authenticated HTTP request to the API server.
195
+
196
+ Automatically handles service account token authentication or cookie-based
197
+ authentication based on what's available.
198
+
199
+ Args:
200
+ method: HTTP method (GET, POST, etc.)
201
+ path: API path (e.g., '/api/v1/status')
202
+ server_url: Server URL, defaults to configured server
203
+ **kwargs: Additional arguments to pass to requests
204
+
205
+ Returns:
206
+ requests.Response object
207
+ """
208
+ if server_url is None:
209
+ server_url = get_server_url()
210
+
211
+ # Prepare headers and URL for service account authentication
212
+ headers = service_account_auth.get_service_account_headers()
213
+
214
+ # Merge with existing headers
215
+ if 'headers' in kwargs:
216
+ headers.update(kwargs['headers'])
217
+ kwargs['headers'] = headers
218
+
219
+ # Always use the same URL regardless of authentication type
220
+ # OAuth2 proxy will handle authentication based on headers
221
+ url = f'{server_url}/{path}' if not path.startswith(
222
+ '/') else f'{server_url}{path}'
223
+
224
+ # Use cookie authentication if no Bearer token present
225
+ if not headers.get('Authorization') and 'cookies' not in kwargs:
226
+ kwargs['cookies'] = get_api_cookie_jar()
227
+
228
+ # Make the request
229
+ if retry:
230
+ return rest.request(method, url, **kwargs)
231
+ else:
232
+ assert method == 'GET', 'Only GET requests can be done without retry'
233
+ return rest.request_without_retry(method, url, **kwargs)
234
+
235
+
188
236
  @annotations.lru_cache(scope='global')
189
237
  def get_server_url(host: Optional[str] = None) -> str:
190
238
  endpoint = DEFAULT_SERVER_URL
@@ -243,9 +291,9 @@ def get_api_server_status(endpoint: Optional[str] = None) -> ApiServerInfo:
243
291
  server_url = endpoint if endpoint is not None else get_server_url()
244
292
  while time_out_try_count <= RETRY_COUNT_ON_TIMEOUT:
245
293
  try:
246
- response = rest.get(f'{server_url}/api/health',
247
- timeout=2.5,
248
- cookies=get_api_cookie_jar())
294
+ response = make_authenticated_request('GET',
295
+ '/api/health',
296
+ timeout=2.5)
249
297
  except requests.exceptions.Timeout:
250
298
  if time_out_try_count == RETRY_COUNT_ON_TIMEOUT:
251
299
  return ApiServerInfo(status=ApiServerStatus.UNHEALTHY)
sky/server/constants.py CHANGED
@@ -36,3 +36,6 @@ API_COOKIE_FILE_DEFAULT_LOCATION = '~/.sky/cookies.txt'
36
36
  # The path to the dashboard build output
37
37
  DASHBOARD_DIR = os.path.join(os.path.dirname(__file__), '..', 'dashboard',
38
38
  'out')
39
+
40
+ # The interval (seconds) for the event to be restarted in the background.
41
+ DAEMON_RESTART_INTERVAL_SECONDS = 20
@@ -268,6 +268,10 @@ def override_request_env_and_config(
268
268
  user = models.User(id=request_body.env_vars[constants.USER_ID_ENV_VAR],
269
269
  name=request_body.env_vars[constants.USER_ENV_VAR])
270
270
  global_user_state.add_or_update_user(user)
271
+ # Refetch the user to get the latest user info, including the created_at
272
+ # field.
273
+ user = global_user_state.get_user(user.id)
274
+
271
275
  # Force color to be enabled.
272
276
  os.environ['CLICOLOR_FORCE'] = '1'
273
277
  server_common.reload_for_new_request(
@@ -358,6 +358,39 @@ class UserImportBody(RequestBody):
358
358
  csv_content: str
359
359
 
360
360
 
361
+ class ServiceAccountTokenCreateBody(RequestBody):
362
+ """The request body for creating a service account token."""
363
+ token_name: str
364
+ expires_in_days: Optional[int] = None
365
+
366
+
367
+ class ServiceAccountTokenDeleteBody(RequestBody):
368
+ """The request body for deleting a service account token."""
369
+ token_id: str
370
+
371
+
372
+ class UpdateRoleBody(RequestBody):
373
+ """The request body for updating a user role."""
374
+ role: str
375
+
376
+
377
+ class ServiceAccountTokenRoleBody(RequestBody):
378
+ """The request body for getting a service account token role."""
379
+ token_id: str
380
+
381
+
382
+ class ServiceAccountTokenUpdateRoleBody(RequestBody):
383
+ """The request body for updating a service account token role."""
384
+ token_id: str
385
+ role: str
386
+
387
+
388
+ class ServiceAccountTokenRotateBody(RequestBody):
389
+ """The request body for rotating a service account token."""
390
+ token_id: str
391
+ expires_in_days: Optional[int] = None
392
+
393
+
361
394
  class DownloadBody(RequestBody):
362
395
  """The request body for the download endpoint."""
363
396
  folder_paths: List[str]