skypilot-nightly 1.0.0.dev20250627__py3-none-any.whl → 1.0.0.dev20250628__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. sky/__init__.py +2 -2
  2. sky/adaptors/kubernetes.py +7 -0
  3. sky/adaptors/nebius.py +2 -2
  4. sky/authentication.py +12 -5
  5. sky/backends/backend_utils.py +92 -26
  6. sky/check.py +5 -2
  7. sky/client/cli/command.py +38 -6
  8. sky/client/sdk.py +217 -167
  9. sky/client/service_account_auth.py +47 -0
  10. sky/clouds/aws.py +10 -4
  11. sky/clouds/azure.py +5 -2
  12. sky/clouds/cloud.py +5 -2
  13. sky/clouds/gcp.py +31 -18
  14. sky/clouds/kubernetes.py +54 -34
  15. sky/clouds/nebius.py +8 -2
  16. sky/clouds/ssh.py +5 -2
  17. sky/clouds/utils/aws_utils.py +10 -4
  18. sky/clouds/utils/gcp_utils.py +22 -7
  19. sky/clouds/utils/oci_utils.py +62 -14
  20. sky/dashboard/out/404.html +1 -1
  21. sky/dashboard/out/_next/static/{HudU4f4Xsy-cP51JvXSZ- → ZYLkkWSYZjJhLVsObh20y}/_buildManifest.js +1 -1
  22. sky/dashboard/out/_next/static/chunks/43-f38a531f6692f281.js +1 -0
  23. sky/dashboard/out/_next/static/chunks/601-111d06d9ded11d00.js +1 -0
  24. sky/dashboard/out/_next/static/chunks/{616-d6128fa9e7cae6e6.js → 616-50a620ac4a23deb4.js} +1 -1
  25. sky/dashboard/out/_next/static/chunks/691.fd9292250ab089af.js +21 -0
  26. sky/dashboard/out/_next/static/chunks/{785.dc2686c3c1235554.js → 785.3446c12ffdf3d188.js} +1 -1
  27. sky/dashboard/out/_next/static/chunks/871-e547295e7e21399c.js +6 -0
  28. sky/dashboard/out/_next/static/chunks/937.72796f7afe54075b.js +1 -0
  29. sky/dashboard/out/_next/static/chunks/938-0a770415b5ce4649.js +1 -0
  30. sky/dashboard/out/_next/static/chunks/982.d7bd80ed18cad4cc.js +1 -0
  31. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-21080826c6095f21.js +6 -0
  32. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-77d4816945b04793.js +6 -0
  33. sky/dashboard/out/_next/static/chunks/pages/{clusters-f119a5630a1efd61.js → clusters-65b2c90320b8afb8.js} +1 -1
  34. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-64bdc0b2d3a44709.js +16 -0
  35. sky/dashboard/out/_next/static/chunks/pages/{jobs-0a5695ff3075d94a.js → jobs-df7407b5e37d3750.js} +1 -1
  36. sky/dashboard/out/_next/static/chunks/pages/{users-4978cbb093e141e7.js → users-d7684eaa04c4f58f.js} +1 -1
  37. sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-cb7e720b739de53a.js → [name]-04e1b3ad4207b1e9.js} +1 -1
  38. sky/dashboard/out/_next/static/chunks/pages/{workspaces-50e230828730cfb3.js → workspaces-c470366a6179f16e.js} +1 -1
  39. sky/dashboard/out/_next/static/chunks/{webpack-08fdb9e6070127fc.js → webpack-75a3310ef922a299.js} +1 -1
  40. sky/dashboard/out/_next/static/css/605ac87514049058.css +3 -0
  41. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  42. sky/dashboard/out/clusters/[cluster].html +1 -1
  43. sky/dashboard/out/clusters.html +1 -1
  44. sky/dashboard/out/config.html +1 -1
  45. sky/dashboard/out/index.html +1 -1
  46. sky/dashboard/out/infra/[context].html +1 -1
  47. sky/dashboard/out/infra.html +1 -1
  48. sky/dashboard/out/jobs/[job].html +1 -1
  49. sky/dashboard/out/jobs.html +1 -1
  50. sky/dashboard/out/users.html +1 -1
  51. sky/dashboard/out/volumes.html +1 -1
  52. sky/dashboard/out/workspace/new.html +1 -1
  53. sky/dashboard/out/workspaces/[name].html +1 -1
  54. sky/dashboard/out/workspaces.html +1 -1
  55. sky/data/storage.py +8 -3
  56. sky/global_user_state.py +257 -9
  57. sky/jobs/client/sdk.py +20 -25
  58. sky/models.py +16 -0
  59. sky/provision/kubernetes/config.py +1 -1
  60. sky/provision/kubernetes/instance.py +7 -4
  61. sky/provision/kubernetes/network.py +15 -9
  62. sky/provision/kubernetes/network_utils.py +42 -23
  63. sky/provision/kubernetes/utils.py +73 -35
  64. sky/provision/nebius/utils.py +10 -4
  65. sky/resources.py +10 -4
  66. sky/serve/client/sdk.py +28 -34
  67. sky/server/common.py +51 -3
  68. sky/server/constants.py +3 -0
  69. sky/server/requests/executor.py +4 -0
  70. sky/server/requests/payloads.py +33 -0
  71. sky/server/requests/requests.py +19 -0
  72. sky/server/rest.py +6 -15
  73. sky/server/server.py +121 -6
  74. sky/skylet/constants.py +6 -0
  75. sky/skypilot_config.py +32 -4
  76. sky/users/permission.py +29 -0
  77. sky/users/server.py +384 -5
  78. sky/users/token_service.py +196 -0
  79. sky/utils/common_utils.py +4 -5
  80. sky/utils/config_utils.py +41 -0
  81. sky/utils/controller_utils.py +5 -1
  82. sky/utils/resource_checker.py +153 -0
  83. sky/utils/resources_utils.py +12 -4
  84. sky/utils/schemas.py +87 -60
  85. sky/utils/subprocess_utils.py +2 -6
  86. sky/workspaces/core.py +9 -117
  87. {skypilot_nightly-1.0.0.dev20250627.dist-info → skypilot_nightly-1.0.0.dev20250628.dist-info}/METADATA +1 -1
  88. {skypilot_nightly-1.0.0.dev20250627.dist-info → skypilot_nightly-1.0.0.dev20250628.dist-info}/RECORD +94 -91
  89. sky/dashboard/out/_next/static/chunks/43-36177d00f6956ab2.js +0 -1
  90. sky/dashboard/out/_next/static/chunks/690.55f9eed3be903f56.js +0 -16
  91. sky/dashboard/out/_next/static/chunks/871-3db673be3ee3750b.js +0 -6
  92. sky/dashboard/out/_next/static/chunks/937.3759f538f11a0953.js +0 -1
  93. sky/dashboard/out/_next/static/chunks/938-068520cc11738deb.js +0 -1
  94. sky/dashboard/out/_next/static/chunks/973-81b2d057178adb76.js +0 -1
  95. sky/dashboard/out/_next/static/chunks/982.1b61658204416b0f.js +0 -1
  96. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-aff040d7bc5d0086.js +0 -6
  97. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-8040f2483897ed0c.js +0 -6
  98. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-e4b23128db0774cd.js +0 -16
  99. sky/dashboard/out/_next/static/css/52082cf558ec9705.css +0 -3
  100. /sky/dashboard/out/_next/static/{HudU4f4Xsy-cP51JvXSZ- → ZYLkkWSYZjJhLVsObh20y}/_ssgManifest.js +0 -0
  101. /sky/dashboard/out/_next/static/chunks/pages/{_app-9a3ce3170d2edcec.js → _app-050a9e637b057b24.js} +0 -0
  102. {skypilot_nightly-1.0.0.dev20250627.dist-info → skypilot_nightly-1.0.0.dev20250628.dist-info}/WHEEL +0 -0
  103. {skypilot_nightly-1.0.0.dev20250627.dist-info → skypilot_nightly-1.0.0.dev20250628.dist-info}/entry_points.txt +0 -0
  104. {skypilot_nightly-1.0.0.dev20250627.dist-info → skypilot_nightly-1.0.0.dev20250628.dist-info}/licenses/LICENSE +0 -0
  105. {skypilot_nightly-1.0.0.dev20250627.dist-info → skypilot_nightly-1.0.0.dev20250628.dist-info}/top_level.txt +0 -0
sky/utils/config_utils.py CHANGED
@@ -226,3 +226,44 @@ def merge_k8s_configs(
226
226
  base_config[key].extend(value)
227
227
  else:
228
228
  base_config[key] = value
229
+
230
+
231
+ def get_cloud_config_value_from_dict(
232
+ dict_config: Dict[str, Any],
233
+ cloud: str,
234
+ keys: Tuple[str, ...],
235
+ region: Optional[str] = None,
236
+ default_value: Optional[Any] = None,
237
+ override_configs: Optional[Dict[str, Any]] = None) -> Any:
238
+ """Returns the nested key value by reading from config
239
+ Order to get the property_name value:
240
+ 1. if region is specified,
241
+ try to get the value from <cloud>/<region_key>/<region>/keys
242
+ 2. if no region or no override,
243
+ try to get it at the cloud level <cloud>/keys
244
+ 3. if not found at cloud level,
245
+ return either default_value if specified or None
246
+ """
247
+ input_config = Config(dict_config)
248
+ region_key = None
249
+ if cloud == 'kubernetes':
250
+ region_key = 'context_configs'
251
+
252
+ per_context_config = None
253
+ if region is not None and region_key is not None:
254
+ per_context_config = input_config.get_nested(
255
+ keys=(cloud, region_key, region) + keys,
256
+ default_value=None,
257
+ override_configs=override_configs)
258
+ # if no override found for specified region
259
+ general_config = input_config.get_nested(keys=(cloud,) + keys,
260
+ default_value=default_value,
261
+ override_configs=override_configs)
262
+
263
+ if (cloud == 'kubernetes' and isinstance(general_config, dict) and
264
+ isinstance(per_context_config, dict)):
265
+ merge_k8s_configs(general_config, per_context_config)
266
+ return general_config
267
+ else:
268
+ return (general_config
269
+ if per_context_config is None else per_context_config)
@@ -733,7 +733,11 @@ def _setup_proxy_command_on_controller(
733
733
  config = config_utils.Config.from_dict(user_config)
734
734
  proxy_command_key = (str(controller_launched_cloud).lower(),
735
735
  'ssh_proxy_command')
736
- ssh_proxy_command = config.get_nested(proxy_command_key, None)
736
+ ssh_proxy_command = skypilot_config.get_effective_region_config(
737
+ cloud=str(controller_launched_cloud).lower(),
738
+ region=None,
739
+ keys=('ssh_proxy_command',),
740
+ default_value=None)
737
741
  if isinstance(ssh_proxy_command, str):
738
742
  config.set_nested(proxy_command_key, None)
739
743
  elif isinstance(ssh_proxy_command, dict):
@@ -0,0 +1,153 @@
1
+ """Resource checking utilities for finding active clusters and managed jobs."""
2
+
3
+ import concurrent.futures
4
+ from typing import Any, Callable, Dict, List, Tuple
5
+
6
+ from sky import exceptions
7
+ from sky import global_user_state
8
+ from sky import sky_logging
9
+ from sky.skylet import constants
10
+
11
+ logger = sky_logging.init_logger(__name__)
12
+
13
+
14
+ def check_no_active_resources_for_users(
15
+ user_operations: List[Tuple[str, str]]) -> None:
16
+ """Check if users have active clusters or managed jobs.
17
+
18
+ Args:
19
+ user_operations: List of tuples (user_id, operation) where
20
+ operation is 'update' or 'delete'.
21
+
22
+ Raises:
23
+ ValueError: If any user has active clusters or managed jobs.
24
+ The error message will include all users with issues.
25
+ """
26
+ if not user_operations:
27
+ return
28
+
29
+ def filter_by_user(user_id: str):
30
+ return lambda resource: resource.get('user_hash') == user_id
31
+
32
+ _check_active_resources(user_operations, filter_by_user, 'user')
33
+
34
+
35
+ def check_no_active_resources_for_workspaces(
36
+ workspace_operations: List[Tuple[str, str]]) -> None:
37
+ """Check if workspaces have active clusters or managed jobs.
38
+
39
+ Args:
40
+ workspace_operations: List of tuples (workspace_name, operation) where
41
+ operation is 'update' or 'delete'.
42
+
43
+ Raises:
44
+ ValueError: If any workspace has active clusters or managed jobs.
45
+ The error message will include all workspaces with issues.
46
+ """
47
+ if not workspace_operations:
48
+ return
49
+
50
+ def filter_by_workspace(workspace_name: str):
51
+ return lambda resource: (resource.get(
52
+ 'workspace', constants.SKYPILOT_DEFAULT_WORKSPACE) == workspace_name
53
+ )
54
+
55
+ _check_active_resources(workspace_operations, filter_by_workspace,
56
+ 'workspace')
57
+
58
+
59
+ def _check_active_resources(resource_operations: List[Tuple[str, str]],
60
+ filter_factory: Callable[[str],
61
+ Callable[[Dict[str, Any]],
62
+ bool]],
63
+ resource_type: str) -> None:
64
+ """Check if resource entities have active clusters or managed jobs.
65
+
66
+ Args:
67
+ resource_operations: List of tuples (resource_name, operation) where
68
+ operation is 'update' or 'delete'.
69
+ filter_factory: Function that takes a resource_name and returns a filter
70
+ function for clusters/jobs.
71
+ resource_type: Type of resource being checked ('user' or 'workspace').
72
+
73
+ Raises:
74
+ ValueError: If any resource has active clusters or managed jobs.
75
+ """
76
+
77
+ def get_all_clusters():
78
+ return global_user_state.get_clusters()
79
+
80
+ def get_all_managed_jobs():
81
+ # pylint: disable=import-outside-toplevel
82
+ from sky.jobs.server import core as managed_jobs_core
83
+ try:
84
+ return managed_jobs_core.queue(refresh=False,
85
+ skip_finished=True,
86
+ all_users=True)
87
+ except exceptions.ClusterNotUpError:
88
+ logger.warning('All jobs should be finished.')
89
+ return []
90
+
91
+ # Fetch both clusters and jobs in parallel
92
+ with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
93
+ clusters_future = executor.submit(get_all_clusters)
94
+ jobs_future = executor.submit(get_all_managed_jobs)
95
+
96
+ all_clusters = clusters_future.result()
97
+ all_managed_jobs = jobs_future.result()
98
+
99
+ # Collect all error messages instead of raising immediately
100
+ error_messages = []
101
+
102
+ # Check each resource against the fetched data
103
+ for resource_name, operation in resource_operations:
104
+ resource_filter = filter_factory(resource_name)
105
+
106
+ # Filter clusters for this resource
107
+ resource_clusters = [
108
+ cluster for cluster in all_clusters if resource_filter(cluster)
109
+ ]
110
+
111
+ # Filter managed jobs for this resource
112
+ resource_active_jobs = [
113
+ job for job in all_managed_jobs if resource_filter(job)
114
+ ]
115
+
116
+ # Collect error messages for this resource
117
+ resource_errors = []
118
+
119
+ if resource_clusters:
120
+ active_cluster_names = [
121
+ cluster['name'] for cluster in resource_clusters
122
+ ]
123
+ cluster_list = ', '.join(active_cluster_names)
124
+ resource_errors.append(
125
+ f'{len(resource_clusters)} active cluster(s): {cluster_list}')
126
+
127
+ if resource_active_jobs:
128
+ job_names = [str(job['job_id']) for job in resource_active_jobs]
129
+ job_list = ', '.join(job_names)
130
+ resource_errors.append(
131
+ f'{len(resource_active_jobs)} active managed job(s): '
132
+ f'{job_list}')
133
+
134
+ # If this resource has issues, add to overall error messages
135
+ if resource_errors:
136
+ resource_error_summary = ' and '.join(resource_errors)
137
+ error_messages.append(
138
+ f'Cannot {operation} {resource_type} {resource_name!r} '
139
+ f'because it has {resource_error_summary}.')
140
+
141
+ # If we collected any errors, raise them all together
142
+ if error_messages:
143
+ if len(error_messages) == 1:
144
+ # Single resource error
145
+ full_message = error_messages[
146
+ 0] + ' Please terminate these resources first.'
147
+ else:
148
+ # Multiple resource errors
149
+ full_message = (f'Cannot proceed due to active resources in '
150
+ f'{len(error_messages)} {resource_type}(s):\n' +
151
+ '\n'.join(f'• {msg}' for msg in error_messages) +
152
+ '\nPlease terminate these resources first.')
153
+ raise ValueError(full_message)
@@ -273,10 +273,18 @@ def need_to_query_reservations() -> bool:
273
273
  clouds that do not use reservations.
274
274
  """
275
275
  for cloud_str in registry.CLOUD_REGISTRY.keys():
276
- cloud_specific_reservations = skypilot_config.get_nested(
277
- (cloud_str, 'specific_reservations'), None)
278
- cloud_prioritize_reservations = skypilot_config.get_nested(
279
- (cloud_str, 'prioritize_reservations'), False)
276
+ cloud_specific_reservations = (
277
+ skypilot_config.get_effective_region_config(
278
+ cloud=cloud_str,
279
+ region=None,
280
+ keys=('specific_reservations',),
281
+ default_value=None))
282
+ cloud_prioritize_reservations = (
283
+ skypilot_config.get_effective_region_config(
284
+ cloud=cloud_str,
285
+ region=None,
286
+ keys=('prioritize_reservations',),
287
+ default_value=False))
280
288
  if (cloud_specific_reservations is not None or
281
289
  cloud_prioritize_reservations):
282
290
  return True
sky/utils/schemas.py CHANGED
@@ -7,6 +7,7 @@ import enum
7
7
  from typing import Any, Dict, List, Tuple
8
8
 
9
9
  from sky.skylet import constants
10
+ from sky.utils import kubernetes_enums
10
11
 
11
12
 
12
13
  def _check_not_both_fields_present(field1: str, field2: str):
@@ -1018,10 +1019,73 @@ _REMOTE_IDENTITY_SCHEMA_KUBERNETES = {
1018
1019
  },
1019
1020
  }
1020
1021
 
1022
+ _CONTEXT_CONFIG_SCHEMA_KUBERNETES = {
1023
+ 'networking': {
1024
+ 'type': 'string',
1025
+ 'case_insensitive_enum': [
1026
+ type.value for type in kubernetes_enums.KubernetesNetworkingMode
1027
+ ],
1028
+ },
1029
+ 'ports': {
1030
+ 'type': 'string',
1031
+ 'case_insensitive_enum': [
1032
+ type.value for type in kubernetes_enums.KubernetesPortMode
1033
+ ],
1034
+ },
1035
+ 'pod_config': {
1036
+ 'type': 'object',
1037
+ 'required': [],
1038
+ # Allow arbitrary keys since validating pod spec is hard
1039
+ 'additionalProperties': True,
1040
+ },
1041
+ 'custom_metadata': {
1042
+ 'type': 'object',
1043
+ 'required': [],
1044
+ # Allow arbitrary keys since validating metadata is hard
1045
+ 'additionalProperties': True,
1046
+ # Disallow 'name' and 'namespace' keys in this dict
1047
+ 'not': {
1048
+ 'anyOf': [{
1049
+ 'required': ['name']
1050
+ }, {
1051
+ 'required': ['namespace']
1052
+ }]
1053
+ },
1054
+ },
1055
+ 'provision_timeout': {
1056
+ 'type': 'integer',
1057
+ },
1058
+ 'autoscaler': {
1059
+ 'type': 'string',
1060
+ 'case_insensitive_enum': [
1061
+ type.value for type in kubernetes_enums.KubernetesAutoscalerType
1062
+ ],
1063
+ },
1064
+ 'high_availability': {
1065
+ 'type': 'object',
1066
+ 'required': [],
1067
+ 'additionalProperties': False,
1068
+ 'properties': {
1069
+ 'storage_class_name': {
1070
+ 'type': 'string',
1071
+ }
1072
+ },
1073
+ },
1074
+ 'kueue': {
1075
+ 'type': 'object',
1076
+ 'required': [],
1077
+ 'additionalProperties': False,
1078
+ 'properties': {
1079
+ 'local_queue_name': {
1080
+ 'type': 'string',
1081
+ },
1082
+ },
1083
+ },
1084
+ }
1085
+
1021
1086
 
1022
1087
  def get_config_schema():
1023
1088
  # pylint: disable=import-outside-toplevel
1024
- from sky.utils import kubernetes_enums
1025
1089
 
1026
1090
  resources_schema = {
1027
1091
  k: v
@@ -1178,70 +1242,21 @@ def get_config_schema():
1178
1242
  'type': 'string',
1179
1243
  },
1180
1244
  },
1181
- 'networking': {
1182
- 'type': 'string',
1183
- 'case_insensitive_enum': [
1184
- type.value
1185
- for type in kubernetes_enums.KubernetesNetworkingMode
1186
- ]
1187
- },
1188
- 'ports': {
1189
- 'type': 'string',
1190
- 'case_insensitive_enum': [
1191
- type.value
1192
- for type in kubernetes_enums.KubernetesPortMode
1193
- ]
1194
- },
1195
- 'pod_config': {
1245
+ 'context_configs': {
1196
1246
  'type': 'object',
1197
1247
  'required': [],
1198
- # Allow arbitrary keys since validating pod spec is hard
1199
- 'additionalProperties': True,
1200
- },
1201
- 'custom_metadata': {
1202
- 'type': 'object',
1203
- 'required': [],
1204
- # Allow arbitrary keys since validating metadata is hard
1205
- 'additionalProperties': True,
1206
- # Disallow 'name' and 'namespace' keys in this dict
1207
- 'not': {
1208
- 'anyOf': [{
1209
- 'required': ['name']
1210
- }, {
1211
- 'required': ['namespace']
1212
- }]
1213
- }
1214
- },
1215
- 'provision_timeout': {
1216
- 'type': 'integer',
1217
- },
1218
- 'autoscaler': {
1219
- 'type': 'string',
1220
- 'case_insensitive_enum': [
1221
- type.value
1222
- for type in kubernetes_enums.KubernetesAutoscalerType
1223
- ]
1224
- },
1225
- 'high_availability': {
1226
- 'type': 'object',
1227
- 'required': [],
1228
- 'additionalProperties': False,
1229
- 'properties': {
1230
- 'storage_class_name': {
1231
- 'type': 'string',
1232
- }
1233
- }
1234
- },
1235
- 'kueue': {
1236
- 'type': 'object',
1237
- 'required': [],
1238
- 'additionalProperties': False,
1239
- 'properties': {
1240
- 'local_queue_name': {
1241
- 'type': 'string',
1248
+ 'properties': {},
1249
+ # Properties are kubernetes context names.
1250
+ 'additionalProperties': {
1251
+ 'type': 'object',
1252
+ 'required': [],
1253
+ 'additionalProperties': False,
1254
+ 'properties': {
1255
+ **_CONTEXT_CONFIG_SCHEMA_KUBERNETES,
1242
1256
  },
1243
1257
  },
1244
1258
  },
1259
+ **_CONTEXT_CONFIG_SCHEMA_KUBERNETES,
1245
1260
  }
1246
1261
  },
1247
1262
  'ssh': {
@@ -1400,6 +1415,18 @@ def get_config_schema():
1400
1415
  # Apply validation for URL
1401
1416
  'pattern': r'^https?://.*$',
1402
1417
  },
1418
+ 'service_account_token': {
1419
+ 'anyOf': [
1420
+ {
1421
+ 'type': 'string',
1422
+ # Validate that token starts with sky_ prefix
1423
+ 'pattern': r'^sky_.+$',
1424
+ },
1425
+ {
1426
+ 'type': 'null',
1427
+ }
1428
+ ]
1429
+ },
1403
1430
  }
1404
1431
  }
1405
1432
 
@@ -6,6 +6,7 @@ import random
6
6
  import resource
7
7
  import shlex
8
8
  import subprocess
9
+ import sys
9
10
  import threading
10
11
  import time
11
12
  import typing
@@ -16,7 +17,6 @@ import colorama
16
17
  from sky import exceptions
17
18
  from sky import sky_logging
18
19
  from sky.adaptors import common as adaptors_common
19
- from sky.skylet import constants
20
20
  from sky.skylet import log_lib
21
21
  from sky.utils import common_utils
22
22
  from sky.utils import timeline
@@ -322,12 +322,8 @@ def kill_process_daemon(process_pid: int) -> None:
322
322
  daemon_script = os.path.join(
323
323
  os.path.dirname(os.path.abspath(log_lib.__file__)),
324
324
  'subprocess_daemon.py')
325
- python_path = subprocess.check_output(constants.SKY_GET_PYTHON_PATH_CMD,
326
- shell=True,
327
- stderr=subprocess.DEVNULL,
328
- encoding='utf-8').strip()
329
325
  daemon_cmd = [
330
- python_path,
326
+ sys.executable,
331
327
  daemon_script,
332
328
  '--parent-pid',
333
329
  str(parent_pid),
sky/workspaces/core.py CHANGED
@@ -1,13 +1,11 @@
1
1
  """Workspace management core."""
2
2
 
3
- import concurrent.futures
4
- from typing import Any, Callable, Dict, List
3
+ from typing import Any, Callable, Dict, List, Tuple
5
4
 
6
5
  import filelock
7
6
 
8
7
  from sky import check as sky_check
9
8
  from sky import exceptions
10
- from sky import global_user_state
11
9
  from sky import models
12
10
  from sky import sky_logging
13
11
  from sky import skypilot_config
@@ -17,6 +15,7 @@ from sky.users import permission
17
15
  from sky.utils import annotations
18
16
  from sky.utils import common_utils
19
17
  from sky.utils import config_utils
18
+ from sky.utils import resource_checker
20
19
  from sky.utils import schemas
21
20
  from sky.workspaces import utils as workspaces_utils
22
21
 
@@ -79,116 +78,6 @@ def _update_workspaces_config(
79
78
  f'file if you believe it is stale.') from e
80
79
 
81
80
 
82
- def _check_workspace_has_no_active_resources(workspace_name: str,
83
- operation: str) -> None:
84
- """Check if a workspace has active clusters or managed jobs.
85
-
86
- Args:
87
- workspace_name: The name of the workspace to check.
88
- operation: The operation being performed ('update' or 'delete').
89
-
90
- Raises:
91
- ValueError: If the workspace has active clusters or managed jobs.
92
- """
93
- _check_workspaces_have_no_active_resources([(workspace_name, operation)])
94
-
95
-
96
- def _check_workspaces_have_no_active_resources(
97
- workspace_operations: list) -> None:
98
- """Check if workspaces have active clusters or managed jobs.
99
-
100
- Args:
101
- workspace_operations: List of tuples (workspace_name, operation) where
102
- operation is 'update' or 'delete'.
103
-
104
- Raises:
105
- ValueError: If any workspace has active clusters or managed jobs.
106
- The error message will include all workspaces with issues.
107
- """
108
- if not workspace_operations:
109
- return
110
-
111
- def get_all_clusters():
112
- return global_user_state.get_clusters()
113
-
114
- def get_all_managed_jobs():
115
- # pylint: disable=import-outside-toplevel
116
- from sky.jobs.server import core as managed_jobs_core
117
- try:
118
- return managed_jobs_core.queue(refresh=False,
119
- skip_finished=True,
120
- all_users=True)
121
- except exceptions.ClusterNotUpError:
122
- logger.warning('All jobs should be finished in workspace.')
123
- return []
124
-
125
- # Fetch both clusters and jobs in parallel
126
- with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
127
- clusters_future = executor.submit(get_all_clusters)
128
- jobs_future = executor.submit(get_all_managed_jobs)
129
-
130
- all_clusters = clusters_future.result()
131
- all_managed_jobs = jobs_future.result()
132
-
133
- # Collect all error messages instead of raising immediately
134
- error_messages = []
135
-
136
- # Check each workspace against the fetched data
137
- for workspace_name, operation in workspace_operations:
138
- # Filter clusters for this workspace
139
- workspace_clusters = [
140
- cluster for cluster in all_clusters
141
- if (cluster.get('workspace', constants.SKYPILOT_DEFAULT_WORKSPACE)
142
- == workspace_name)
143
- ]
144
-
145
- # Filter managed jobs for this workspace
146
- workspace_active_jobs = [
147
- job for job in all_managed_jobs
148
- if job.get('workspace', constants.SKYPILOT_DEFAULT_WORKSPACE) ==
149
- workspace_name
150
- ]
151
-
152
- # Collect error messages for this workspace
153
- workspace_errors = []
154
-
155
- if workspace_clusters:
156
- active_cluster_names = [
157
- cluster['name'] for cluster in workspace_clusters
158
- ]
159
- cluster_list = ', '.join(active_cluster_names)
160
- workspace_errors.append(
161
- f'{len(workspace_clusters)} active cluster(s): {cluster_list}')
162
-
163
- if workspace_active_jobs:
164
- job_names = [str(job['job_id']) for job in workspace_active_jobs]
165
- job_list = ', '.join(job_names)
166
- workspace_errors.append(
167
- f'{len(workspace_active_jobs)} active managed job(s): '
168
- f'{job_list}')
169
-
170
- # If this workspace has issues, add to overall error messages
171
- if workspace_errors:
172
- workspace_error_summary = ' and '.join(workspace_errors)
173
- error_messages.append(
174
- f'Cannot {operation} workspace {workspace_name!r} because it '
175
- f'has {workspace_error_summary}.')
176
-
177
- # If we collected any errors, raise them all together
178
- if error_messages:
179
- if len(error_messages) == 1:
180
- # Single workspace error
181
- full_message = error_messages[
182
- 0] + ' Please terminate these resources first.'
183
- else:
184
- # Multiple workspace errors
185
- full_message = (f'Cannot proceed due to active resources in '
186
- f'{len(error_messages)} workspace(s):\n' +
187
- '\n'.join(f'• {msg}' for msg in error_messages) +
188
- '\nPlease terminate these resources first.')
189
- raise ValueError(full_message)
190
-
191
-
192
81
  def _validate_workspace_config(workspace_name: str,
193
82
  workspace_config: Dict[str, Any]) -> None:
194
83
  """Validate the workspace configuration.
@@ -229,7 +118,8 @@ def update_workspace(workspace_name: str, config: Dict[str,
229
118
  # Check for active clusters and managed jobs in the workspace
230
119
  # TODO(zhwu): we should allow the edits that only contain changes to
231
120
  # allowed_users or private.
232
- _check_workspace_has_no_active_resources(workspace_name, 'update')
121
+ resource_checker.check_no_active_resources_for_workspaces([(workspace_name,
122
+ 'update')])
233
123
 
234
124
  def update_workspace_fn(workspaces: Dict[str, Any]) -> None:
235
125
  """Function to update workspace inside the lock."""
@@ -327,7 +217,8 @@ def delete_workspace(workspace_name: str) -> Dict[str, Any]:
327
217
  raise ValueError(f'Workspace {workspace_name!r} does not exist.')
328
218
 
329
219
  # Check for active clusters and managed jobs in the workspace
330
- _check_workspace_has_no_active_resources(workspace_name, 'delete')
220
+ resource_checker.check_no_active_resources_for_workspaces([(workspace_name,
221
+ 'delete')])
331
222
 
332
223
  def delete_workspace_fn(workspaces: Dict[str, Any]) -> None:
333
224
  """Function to delete workspace inside the lock."""
@@ -396,7 +287,7 @@ def update_config(config: Dict[str, Any]) -> Dict[str, Any]:
396
287
  new_workspaces = config.get('workspaces', {})
397
288
 
398
289
  # Collect all workspaces that need to be checked for active resources
399
- workspaces_to_check = []
290
+ workspaces_to_check: List[Tuple[str, str]] = []
400
291
  workspaces_to_check_policy: Dict[str, Dict[str, List[str]]] = {
401
292
  'add': {},
402
293
  'update': {},
@@ -430,7 +321,8 @@ def update_config(config: Dict[str, Any]) -> Dict[str, Any]:
430
321
  workspaces_to_check_policy['delete'][workspace_name] = ['*']
431
322
 
432
323
  # Check all workspaces for active resources in one efficient call
433
- _check_workspaces_have_no_active_resources(workspaces_to_check)
324
+ resource_checker.check_no_active_resources_for_workspaces(
325
+ workspaces_to_check)
434
326
 
435
327
  # Use file locking to prevent race conditions
436
328
  lock_path = skypilot_config.get_skypilot_config_lock_path()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: skypilot-nightly
3
- Version: 1.0.0.dev20250627
3
+ Version: 1.0.0.dev20250628
4
4
  Summary: SkyPilot: Run AI on Any Infra — Unified, Faster, Cheaper.
5
5
  Author: SkyPilot Team
6
6
  License: Apache 2.0