skypilot-nightly 1.0.0.dev20251005__py3-none-any.whl → 1.0.0.dev20251009__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (102) hide show
  1. sky/__init__.py +2 -2
  2. sky/authentication.py +17 -21
  3. sky/backends/backend.py +1 -3
  4. sky/backends/cloud_vm_ray_backend.py +76 -54
  5. sky/backends/local_docker_backend.py +0 -5
  6. sky/client/cli/command.py +6 -6
  7. sky/client/sdk.py +24 -23
  8. sky/dashboard/out/404.html +1 -1
  9. sky/dashboard/out/_next/static/chunks/1141-3b40c39626f99c89.js +11 -0
  10. sky/dashboard/out/_next/static/chunks/{9037-d0c00018a5ba198c.js → 1871-49141c317f3a9020.js} +2 -2
  11. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  12. sky/dashboard/out/_next/static/chunks/2755.97300e1362fe7c98.js +26 -0
  13. sky/dashboard/out/_next/static/chunks/3294.1fafbf42b3bcebff.js +1 -0
  14. sky/dashboard/out/_next/static/chunks/3785.a19328ba41517b8b.js +1 -0
  15. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  16. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  17. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  18. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  19. sky/dashboard/out/_next/static/chunks/{5339.4a881570243431a5.js → 9360.71e83b2ddc844ec2.js} +4 -24
  20. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  21. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/{[job]-72794fc3fcdd517a.js → [job]-8f058b0346db2aff.js} +1 -1
  22. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-477555ab7c0b13d8.js +1 -0
  23. sky/dashboard/out/_next/static/chunks/pages/clusters-2f61f65487f6d8ff.js +1 -0
  24. sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-6563820e094f68ca.js → [context]-553b8b5cb65e100b.js} +1 -1
  25. sky/dashboard/out/_next/static/chunks/pages/{infra-aabba60d57826e0f.js → infra-910a22500c50596f.js} +1 -1
  26. sky/dashboard/out/_next/static/chunks/pages/jobs/{[job]-dd64309c3fe67ed2.js → [job]-4f7079dcab6ed653.js} +7 -2
  27. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/{[pool]-509b2977a6373bf6.js → [pool]-bc979970c247d8f3.js} +7 -2
  28. sky/dashboard/out/_next/static/chunks/pages/jobs-a35a9dc3c5ccd657.js +1 -0
  29. sky/dashboard/out/_next/static/chunks/pages/users-98d2ed979084162a.js +1 -0
  30. sky/dashboard/out/_next/static/chunks/pages/volumes-835d14ba94808f79.js +1 -0
  31. sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-af76bb06dbb3954f.js → [name]-e8688c35c06f0ac5.js} +1 -1
  32. sky/dashboard/out/_next/static/chunks/pages/{workspaces-7528cc0ef8c522c5.js → workspaces-69c80d677d3c2949.js} +1 -1
  33. sky/dashboard/out/_next/static/chunks/webpack-6a5ddd0184bfa22c.js +1 -0
  34. sky/dashboard/out/_next/static/hIViZcQBkn0HE8SpaSsUU/_buildManifest.js +1 -0
  35. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  36. sky/dashboard/out/clusters/[cluster].html +1 -1
  37. sky/dashboard/out/clusters.html +1 -1
  38. sky/dashboard/out/config.html +1 -1
  39. sky/dashboard/out/index.html +1 -1
  40. sky/dashboard/out/infra/[context].html +1 -1
  41. sky/dashboard/out/infra.html +1 -1
  42. sky/dashboard/out/jobs/[job].html +1 -1
  43. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  44. sky/dashboard/out/jobs.html +1 -1
  45. sky/dashboard/out/users.html +1 -1
  46. sky/dashboard/out/volumes.html +1 -1
  47. sky/dashboard/out/workspace/new.html +1 -1
  48. sky/dashboard/out/workspaces/[name].html +1 -1
  49. sky/dashboard/out/workspaces.html +1 -1
  50. sky/execution.py +1 -11
  51. sky/global_user_state.py +16 -5
  52. sky/jobs/constants.py +1 -7
  53. sky/jobs/controller.py +19 -3
  54. sky/jobs/recovery_strategy.py +3 -1
  55. sky/jobs/scheduler.py +30 -15
  56. sky/jobs/server/core.py +8 -3
  57. sky/jobs/utils.py +30 -2
  58. sky/metrics/utils.py +65 -37
  59. sky/provision/instance_setup.py +32 -10
  60. sky/provision/kubernetes/instance.py +18 -3
  61. sky/provision/kubernetes/utils.py +4 -1
  62. sky/provision/provisioner.py +10 -7
  63. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  64. sky/server/common.py +1 -0
  65. sky/server/config.py +2 -0
  66. sky/server/metrics.py +3 -1
  67. sky/server/requests/executor.py +103 -77
  68. sky/server/requests/requests.py +26 -11
  69. sky/server/server.py +16 -0
  70. sky/skylet/constants.py +9 -1
  71. sky/skylet/events.py +17 -0
  72. sky/skylet/skylet.py +3 -0
  73. sky/templates/kubernetes-ray.yml.j2 +6 -1
  74. sky/utils/context_utils.py +5 -1
  75. sky/utils/controller_utils.py +14 -0
  76. sky/utils/db/db_utils.py +2 -0
  77. sky/utils/db/migration_utils.py +11 -2
  78. sky/volumes/server/server.py +2 -2
  79. {skypilot_nightly-1.0.0.dev20251005.dist-info → skypilot_nightly-1.0.0.dev20251009.dist-info}/METADATA +36 -36
  80. {skypilot_nightly-1.0.0.dev20251005.dist-info → skypilot_nightly-1.0.0.dev20251009.dist-info}/RECORD +85 -84
  81. sky/dashboard/out/_next/static/Vg53Kzbf7u4o6fYPeOHMe/_buildManifest.js +0 -1
  82. sky/dashboard/out/_next/static/chunks/1141-159df2d4c441a9d1.js +0 -1
  83. sky/dashboard/out/_next/static/chunks/1836-37fede578e2da5f8.js +0 -40
  84. sky/dashboard/out/_next/static/chunks/3294.93d9336bdc032b3a.js +0 -6
  85. sky/dashboard/out/_next/static/chunks/3785.0fa442e16dd3f00e.js +0 -1
  86. sky/dashboard/out/_next/static/chunks/4045.b30465273dc5e468.js +0 -21
  87. sky/dashboard/out/_next/static/chunks/4676-9da7fdbde90b5549.js +0 -10
  88. sky/dashboard/out/_next/static/chunks/649.b9d7f7d10c1b8c53.js +0 -45
  89. sky/dashboard/out/_next/static/chunks/7325.b4bc99ce0892dcd5.js +0 -6
  90. sky/dashboard/out/_next/static/chunks/754-d0da8ab45f9509e9.js +0 -18
  91. sky/dashboard/out/_next/static/chunks/7669.1f5d9a402bf5cc42.js +0 -36
  92. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-e052384df65ef200.js +0 -16
  93. sky/dashboard/out/_next/static/chunks/pages/clusters-469814d711d63b1b.js +0 -1
  94. sky/dashboard/out/_next/static/chunks/pages/jobs-1f70d9faa564804f.js +0 -1
  95. sky/dashboard/out/_next/static/chunks/pages/users-018bf31cda52e11b.js +0 -1
  96. sky/dashboard/out/_next/static/chunks/pages/volumes-739726d6b823f532.js +0 -1
  97. sky/dashboard/out/_next/static/chunks/webpack-3286453d56f3c0a0.js +0 -1
  98. /sky/dashboard/out/_next/static/{Vg53Kzbf7u4o6fYPeOHMe → hIViZcQBkn0HE8SpaSsUU}/_ssgManifest.js +0 -0
  99. {skypilot_nightly-1.0.0.dev20251005.dist-info → skypilot_nightly-1.0.0.dev20251009.dist-info}/WHEEL +0 -0
  100. {skypilot_nightly-1.0.0.dev20251005.dist-info → skypilot_nightly-1.0.0.dev20251009.dist-info}/entry_points.txt +0 -0
  101. {skypilot_nightly-1.0.0.dev20251005.dist-info → skypilot_nightly-1.0.0.dev20251009.dist-info}/licenses/LICENSE +0 -0
  102. {skypilot_nightly-1.0.0.dev20251005.dist-info → skypilot_nightly-1.0.0.dev20251009.dist-info}/top_level.txt +0 -0
sky/__init__.py CHANGED
@@ -7,7 +7,7 @@ import urllib.request
7
7
  from sky.utils import directory_utils
8
8
 
9
9
  # Replaced with the current commit when building the wheels.
10
- _SKYPILOT_COMMIT_SHA = '4db6dccc63718d0342d70a736bfdb0ba84b6a4ad'
10
+ _SKYPILOT_COMMIT_SHA = '02a8e33361b9deca8588cc30221de41bbd5e5cf8'
11
11
 
12
12
 
13
13
  def _get_git_commit():
@@ -37,7 +37,7 @@ def _get_git_commit():
37
37
 
38
38
 
39
39
  __commit__ = _get_git_commit()
40
- __version__ = '1.0.0.dev20251005'
40
+ __version__ = '1.0.0.dev20251009'
41
41
  __root_dir__ = directory_utils.get_sky_dir()
42
42
 
43
43
 
sky/authentication.py CHANGED
@@ -25,7 +25,7 @@ import re
25
25
  import socket
26
26
  import subprocess
27
27
  import sys
28
- from typing import Any, Dict, Optional, Tuple
28
+ from typing import Any, Dict, Tuple
29
29
  import uuid
30
30
 
31
31
  import colorama
@@ -65,10 +65,7 @@ MAX_TRIALS = 64
65
65
  _SSH_KEY_PATH_PREFIX = '~/.sky/clients/{user_hash}/ssh'
66
66
 
67
67
 
68
- def get_ssh_key_and_lock_path(
69
- user_hash: Optional[str] = None) -> Tuple[str, str, str]:
70
- if user_hash is None:
71
- user_hash = common_utils.get_user_hash()
68
+ def get_ssh_key_and_lock_path(user_hash: str) -> Tuple[str, str, str]:
72
69
  user_ssh_key_prefix = _SSH_KEY_PATH_PREFIX.format(user_hash=user_hash)
73
70
 
74
71
  os.makedirs(os.path.expanduser(user_ssh_key_prefix),
@@ -124,13 +121,12 @@ def _save_key_pair(private_key_path: str, public_key_path: str,
124
121
  opener=functools.partial(os.open, mode=0o644)) as f:
125
122
  f.write(public_key)
126
123
 
127
- global_user_state.set_ssh_keys(common_utils.get_user_hash(), public_key,
128
- private_key)
129
-
130
124
 
131
125
  def get_or_generate_keys() -> Tuple[str, str]:
132
126
  """Returns the absolute private and public key paths."""
133
- private_key_path, public_key_path, lock_path = get_ssh_key_and_lock_path()
127
+ user_hash = common_utils.get_user_hash()
128
+ private_key_path, public_key_path, lock_path = get_ssh_key_and_lock_path(
129
+ user_hash)
134
130
  private_key_path = os.path.expanduser(private_key_path)
135
131
  public_key_path = os.path.expanduser(public_key_path)
136
132
  lock_path = os.path.expanduser(lock_path)
@@ -143,9 +139,11 @@ def get_or_generate_keys() -> Tuple[str, str]:
143
139
  with filelock.FileLock(lock_path, timeout=10):
144
140
  if not os.path.exists(private_key_path):
145
141
  ssh_public_key, ssh_private_key, exists = (
146
- global_user_state.get_ssh_keys(common_utils.get_user_hash()))
142
+ global_user_state.get_ssh_keys(user_hash))
147
143
  if not exists:
148
144
  ssh_public_key, ssh_private_key = _generate_rsa_key_pair()
145
+ global_user_state.set_ssh_keys(user_hash, ssh_public_key,
146
+ ssh_private_key)
149
147
  _save_key_pair(private_key_path, public_key_path, ssh_private_key,
150
148
  ssh_public_key)
151
149
  assert os.path.exists(public_key_path), (
@@ -154,22 +152,20 @@ def get_or_generate_keys() -> Tuple[str, str]:
154
152
  return private_key_path, public_key_path
155
153
 
156
154
 
157
- def create_ssh_key_files_from_db(private_key_path: Optional[str] = None):
158
- if private_key_path is None:
159
- user_hash = common_utils.get_user_hash()
160
- else:
161
- # Assume private key path is in the format of
162
- # ~/.sky/clients/<user_hash>/ssh/sky-key
163
- separated_path = os.path.normpath(private_key_path).split(os.path.sep)
164
- assert separated_path[-1] == 'sky-key'
165
- assert separated_path[-2] == 'ssh'
166
- user_hash = separated_path[-3]
155
+ def create_ssh_key_files_from_db(private_key_path: str):
156
+ # Assume private key path is in the format of
157
+ # ~/.sky/clients/<user_hash>/ssh/sky-key
158
+ separated_path = os.path.normpath(private_key_path).split(os.path.sep)
159
+ assert separated_path[-1] == 'sky-key'
160
+ assert separated_path[-2] == 'ssh'
161
+ user_hash = separated_path[-3]
167
162
 
168
163
  private_key_path_generated, public_key_path, lock_path = (
169
164
  get_ssh_key_and_lock_path(user_hash))
170
165
  assert private_key_path == os.path.expanduser(private_key_path_generated), (
171
166
  f'Private key path {private_key_path} does not '
172
- f'match the generated path {private_key_path_generated}')
167
+ 'match the generated path '
168
+ f'{os.path.expanduser(private_key_path_generated)}')
173
169
  private_key_path = os.path.expanduser(private_key_path)
174
170
  public_key_path = os.path.expanduser(public_key_path)
175
171
  lock_path = os.path.expanduser(lock_path)
sky/backends/backend.py CHANGED
@@ -126,7 +126,6 @@ class Backend(Generic[_ResourceHandleType]):
126
126
  def execute(self,
127
127
  handle: _ResourceHandleType,
128
128
  task: 'task_lib.Task',
129
- detach_run: bool,
130
129
  dryrun: bool = False) -> Optional[int]:
131
130
  """Execute the task on the cluster.
132
131
 
@@ -137,7 +136,7 @@ class Backend(Generic[_ResourceHandleType]):
137
136
  handle.get_cluster_name())
138
137
  usage_lib.messages.usage.update_actual_task(task)
139
138
  with rich_utils.safe_status(ux_utils.spinner_message('Submitting job')):
140
- return self._execute(handle, task, detach_run, dryrun)
139
+ return self._execute(handle, task, dryrun)
141
140
 
142
141
  @timeline.event
143
142
  def post_execute(self, handle: _ResourceHandleType, down: bool) -> None:
@@ -197,7 +196,6 @@ class Backend(Generic[_ResourceHandleType]):
197
196
  def _execute(self,
198
197
  handle: _ResourceHandleType,
199
198
  task: 'task_lib.Task',
200
- detach_run: bool,
201
199
  dryrun: bool = False) -> Optional[int]:
202
200
  raise NotImplementedError
203
201
 
@@ -207,6 +207,11 @@ _RAY_UP_WITH_MONKEY_PATCHED_HASH_LAUNCH_CONF_PATH = (
207
207
  # We use 100KB as a threshold to be safe for other arguments that
208
208
  # might be added during ssh.
209
209
  _MAX_INLINE_SCRIPT_LENGTH = 100 * 1024
210
+ _EXCEPTION_MSG_AND_RETURNCODE_FOR_DUMP_INLINE_SCRIPT = [
211
+ ('too long', 255),
212
+ ('request-uri too large', 1),
213
+ ('request header fields too large', 1),
214
+ ]
210
215
 
211
216
  _RESOURCES_UNAVAILABLE_LOG = (
212
217
  'Reasons for provision failures (for details, please check the log above):')
@@ -227,6 +232,61 @@ def _is_command_length_over_limit(command: str) -> bool:
227
232
  return quoted_length > _MAX_INLINE_SCRIPT_LENGTH
228
233
 
229
234
 
235
+ def _is_message_too_long(returncode: int,
236
+ output: Optional[str] = None,
237
+ file_path: Optional[str] = None) -> bool:
238
+ """Check if the message sent to the remote is too long.
239
+
240
+ We use inline script to run the setup or run command, i.e. the script will
241
+ be part of the message sent to the remote cluster. There is a chance that
242
+ the command is too long, when people has very long run or setup commands, or
243
+ there is a cloudflare proxy in front of the remote blocking the long
244
+ message. Several common causes are:
245
+ - SSH returning: `too long` in the error message.
246
+ - Cloudflare proxy returning: `414 Request-URI Too Large` or
247
+ `431 Request Header Fields Too Large` error.
248
+
249
+ We use a general length limit check before but it could be inaccurate on
250
+ some systems, e.g. cloudflare proxy, so this is necessary.
251
+
252
+ Args:
253
+ returncode: The return code of the setup command.
254
+ output: The output of the setup command.
255
+ file_path: The path to the setup log file.
256
+ """
257
+ assert (output is None) != (file_path is None), (
258
+ 'Either output or file_path must be provided.', output, file_path)
259
+ to_check = []
260
+ for (match_str,
261
+ desired_rc) in _EXCEPTION_MSG_AND_RETURNCODE_FOR_DUMP_INLINE_SCRIPT:
262
+ if desired_rc == returncode:
263
+ to_check.append(match_str)
264
+ if not to_check:
265
+ return False
266
+
267
+ def _check_output_for_match_str(output: str) -> bool:
268
+ for match_str in to_check:
269
+ if match_str.lower() in output.lower():
270
+ return True
271
+ return False
272
+
273
+ if file_path is not None:
274
+ try:
275
+ with open(os.path.expanduser(file_path), 'r',
276
+ encoding='utf-8') as f:
277
+ content = f.read()
278
+ return _check_output_for_match_str(content)
279
+ except Exception as e: # pylint: disable=broad-except
280
+ # We don't crash the setup if we cannot read the log file.
281
+ # Instead, we should retry the setup with dumping the script
282
+ # to a file to be safe.
283
+ logger.debug(f'Failed to read setup log file {file_path}: {e}')
284
+ return True
285
+ else:
286
+ assert output is not None, (output, file_path)
287
+ return _check_output_for_match_str(output)
288
+
289
+
230
290
  def _get_cluster_config_template(cloud):
231
291
  cloud_to_template = {
232
292
  clouds.AWS: 'aws-ray.yml.j2',
@@ -2264,8 +2324,6 @@ class RetryingVmProvisioner(object):
2264
2324
  # terminated by _retry_zones().
2265
2325
  assert (prev_cluster_status == status_lib.ClusterStatus.INIT
2266
2326
  ), prev_cluster_status
2267
- assert global_user_state.get_handle_from_cluster_name(
2268
- cluster_name) is None, cluster_name
2269
2327
  logger.info(
2270
2328
  ux_utils.retry_message(
2271
2329
  f'Retrying provisioning with requested resources: '
@@ -3634,7 +3692,7 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
3634
3692
  global_user_state.ClusterEventType.STATUS_CHANGE)
3635
3693
 
3636
3694
  cluster_info = provisioner.post_provision_runtime_setup(
3637
- repr(handle.launched_resources.cloud),
3695
+ handle.launched_resources,
3638
3696
  resources_utils.ClusterName(handle.cluster_name,
3639
3697
  handle.cluster_name_on_cloud),
3640
3698
  handle.cluster_yaml,
@@ -4072,29 +4130,10 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
4072
4130
 
4073
4131
  returncode = _run_setup(f'{create_script_code} && {setup_cmd}',)
4074
4132
 
4075
- def _load_setup_log_and_match(match_str: str) -> bool:
4076
- try:
4077
- with open(os.path.expanduser(setup_log_path),
4078
- 'r',
4079
- encoding='utf-8') as f:
4080
- return match_str.lower() in f.read().lower()
4081
- except Exception as e: # pylint: disable=broad-except
4082
- # We don't crash the setup if we cannot read the log file.
4083
- # Instead, we should retry the setup with dumping the script
4084
- # to a file to be safe.
4085
- logger.debug(
4086
- f'Failed to read setup log file {setup_log_path}: {e}')
4087
- return True
4088
-
4089
- if ((returncode == 255 and _load_setup_log_and_match('too long')) or
4090
- (returncode == 1 and
4091
- _load_setup_log_and_match('request-uri too large'))):
4092
- # If the setup script is too long, we retry it with dumping
4093
- # the script to a file and running it with SSH. We use a
4094
- # general length limit check before but it could be
4095
- # inaccurate on some systems.
4096
- # When there is a cloudflare proxy in front of the remote, it
4097
- # could cause `414 Request-URI Too Large` error.
4133
+ if _is_message_too_long(returncode, file_path=setup_log_path):
4134
+ # If the setup script is too long, we need to retry it
4135
+ # with dumping the script to a file and running it the script
4136
+ # on remote cluster instead.
4098
4137
  logger.debug('Failed to run setup command inline due to '
4099
4138
  'command length limit. Dumping setup script to '
4100
4139
  'file and running it with SSH.')
@@ -4175,7 +4214,6 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
4175
4214
  handle: CloudVmRayResourceHandle,
4176
4215
  codegen: str,
4177
4216
  job_id: int,
4178
- detach_run: bool = False,
4179
4217
  managed_job_dag: Optional['dag.Dag'] = None,
4180
4218
  remote_log_dir: Optional[str] = None,
4181
4219
  ) -> None:
@@ -4308,15 +4346,10 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
4308
4346
  backend_utils.check_stale_runtime_on_remote(returncode, stderr,
4309
4347
  handle.cluster_name)
4310
4348
  output = stdout + stderr
4311
- if ((returncode == 255 and 'too long' in output.lower()) or
4312
- (returncode == 1 and
4313
- 'request-uri too large' in output.lower())):
4314
- # If the generated script is too long, we retry it with dumping
4315
- # the script to a file and running it with SSH. We use a general
4316
- # length limit check before but it could be inaccurate on some
4317
- # systems.
4318
- # When there is a cloudflare proxy in front of the remote, it
4319
- # could cause `414 Request-URI Too Large` error.
4349
+ if _is_message_too_long(returncode, output=output):
4350
+ # If the job submit script is too long, we need to retry it
4351
+ # with dumping the script to a file and running it the script
4352
+ # on remote cluster instead.
4320
4353
  logger.debug(
4321
4354
  'Failed to submit job due to command length limit. '
4322
4355
  'Dumping job to file and running it with SSH. '
@@ -4343,14 +4376,6 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
4343
4376
  logger.info(
4344
4377
  ux_utils.starting_message(f'Job submitted, ID: {job_id}'))
4345
4378
  rich_utils.stop_safe_status()
4346
- if not detach_run:
4347
- if (handle.cluster_name == controller_utils.Controllers.
4348
- JOBS_CONTROLLER.value.cluster_name):
4349
- self.tail_managed_job_logs(handle, job_id)
4350
- else:
4351
- # Sky logs. Not using subprocess.run since it will make the
4352
- # ssh keep connected after ctrl-c.
4353
- self.tail_logs(handle, job_id)
4354
4379
 
4355
4380
  def _add_job(self, handle: CloudVmRayResourceHandle,
4356
4381
  job_name: Optional[str], resources_str: str,
@@ -4419,7 +4444,6 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
4419
4444
  self,
4420
4445
  handle: CloudVmRayResourceHandle,
4421
4446
  task: task_lib.Task,
4422
- detach_run: bool,
4423
4447
  dryrun: bool = False,
4424
4448
  ) -> Optional[int]:
4425
4449
  """Executes the task on the cluster.
@@ -4471,12 +4495,10 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
4471
4495
  num_actual_nodes = task.num_nodes * handle.num_ips_per_node
4472
4496
  # Case: task_lib.Task(run, num_nodes=N) or TPU VM Pods
4473
4497
  if num_actual_nodes > 1:
4474
- self._execute_task_n_nodes(handle, task_copy, job_id, detach_run,
4475
- log_dir)
4498
+ self._execute_task_n_nodes(handle, task_copy, job_id, log_dir)
4476
4499
  else:
4477
4500
  # Case: task_lib.Task(run, num_nodes=1)
4478
- self._execute_task_one_node(handle, task_copy, job_id, detach_run,
4479
- log_dir)
4501
+ self._execute_task_one_node(handle, task_copy, job_id, log_dir)
4480
4502
 
4481
4503
  return job_id
4482
4504
 
@@ -5497,7 +5519,9 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
5497
5519
  cluster_yaml_path = handle.cluster_yaml
5498
5520
  handle.cluster_yaml = None
5499
5521
  global_user_state.update_cluster_handle(handle.cluster_name, handle)
5500
- global_user_state.remove_cluster_yaml(handle.cluster_name)
5522
+ # Removing the cluster YAML can cause some unexpected stability issues.
5523
+ # See #5011.
5524
+ # global_user_state.remove_cluster_yaml(handle.cluster_name)
5501
5525
  common_utils.remove_file_if_exists(cluster_yaml_path)
5502
5526
 
5503
5527
  def set_autostop(self,
@@ -6247,7 +6271,7 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
6247
6271
 
6248
6272
  def _execute_task_one_node(self, handle: CloudVmRayResourceHandle,
6249
6273
  task: task_lib.Task, job_id: int,
6250
- detach_run: bool, remote_log_dir: str) -> None:
6274
+ remote_log_dir: str) -> None:
6251
6275
  # Launch the command as a Ray task.
6252
6276
  log_dir = os.path.join(remote_log_dir, 'tasks')
6253
6277
 
@@ -6286,13 +6310,12 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
6286
6310
  self._exec_code_on_head(handle,
6287
6311
  codegen.build(),
6288
6312
  job_id,
6289
- detach_run=detach_run,
6290
6313
  managed_job_dag=task.managed_job_dag,
6291
6314
  remote_log_dir=remote_log_dir)
6292
6315
 
6293
6316
  def _execute_task_n_nodes(self, handle: CloudVmRayResourceHandle,
6294
6317
  task: task_lib.Task, job_id: int,
6295
- detach_run: bool, remote_log_dir: str) -> None:
6318
+ remote_log_dir: str) -> None:
6296
6319
  # Strategy:
6297
6320
  # ray.init(...)
6298
6321
  # for node:
@@ -6342,6 +6365,5 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
6342
6365
  self._exec_code_on_head(handle,
6343
6366
  codegen.build(),
6344
6367
  job_id,
6345
- detach_run=detach_run,
6346
6368
  managed_job_dag=task.managed_job_dag,
6347
6369
  remote_log_dir=remote_log_dir)
@@ -283,13 +283,8 @@ class LocalDockerBackend(backends.Backend['LocalDockerResourceHandle']):
283
283
  def _execute(self,
284
284
  handle: LocalDockerResourceHandle,
285
285
  task: 'task_lib.Task',
286
- detach_run: bool,
287
286
  dryrun: bool = False) -> None:
288
287
  """ Launches the container."""
289
- if detach_run:
290
- raise NotImplementedError('detach_run=True is not supported in '
291
- 'LocalDockerBackend.')
292
-
293
288
  if task.num_nodes > 1:
294
289
  raise NotImplementedError(
295
290
  'Tasks with num_nodes > 1 is currently not supported in '
sky/client/cli/command.py CHANGED
@@ -5181,22 +5181,22 @@ def jobs_pool_logs(
5181
5181
  .. code-block:: bash
5182
5182
 
5183
5183
  # Tail the controller logs of a pool
5184
- sky pool logs --controller [POOL_NAME]
5184
+ sky jobs pool logs --controller [POOL_NAME]
5185
5185
  \b
5186
5186
  # Print the worker logs so far and exit
5187
- sky pool logs --no-follow [POOL_NAME]
5187
+ sky jobs pool logs --no-follow [POOL_NAME] 1
5188
5188
  \b
5189
5189
  # Tail the logs of worker 1
5190
- sky pool logs [POOL_NAME] 1
5190
+ sky jobs pool logs [POOL_NAME] 1
5191
5191
  \b
5192
5192
  # Show the last 100 lines of the controller logs
5193
- sky pool logs --controller --tail 100 [POOL_NAME]
5193
+ sky jobs pool logs --controller --tail 100 [POOL_NAME]
5194
5194
  \b
5195
5195
  # Sync down all logs of the pool (controller, all workers)
5196
- sky pool logs [POOL_NAME] --sync-down
5196
+ sky jobs pool logs [POOL_NAME] --sync-down
5197
5197
  \b
5198
5198
  # Sync down controller logs and logs for workers 1 and 3
5199
- sky pool logs [POOL_NAME] 1 3 --controller --sync-down
5199
+ sky jobs pool logs [POOL_NAME] 1 3 --controller --sync-down
5200
5200
  """
5201
5201
  _handle_serve_logs(pool_name,
5202
5202
  follow=follow,
sky/client/sdk.py CHANGED
@@ -2295,29 +2295,30 @@ def api_stop() -> None:
2295
2295
  f'Cannot kill the API server at {server_url} because it is not '
2296
2296
  f'the default SkyPilot API server started locally.')
2297
2297
 
2298
- try:
2299
- with open(os.path.expanduser(scheduler.JOB_CONTROLLER_PID_PATH),
2300
- 'r',
2301
- encoding='utf-8') as f:
2302
- pids = f.read().split('\n')[:-1]
2303
- for pid in pids:
2304
- if subprocess_utils.is_process_alive(int(pid.strip())):
2305
- subprocess_utils.kill_children_processes(
2306
- parent_pids=[int(pid.strip())], force=True)
2307
- os.remove(os.path.expanduser(scheduler.JOB_CONTROLLER_PID_PATH))
2308
- except FileNotFoundError:
2309
- # its fine we will create it
2310
- pass
2311
- except Exception as e: # pylint: disable=broad-except
2312
- # in case we get perm issues or something is messed up, just ignore it
2313
- # and assume the process is dead
2314
- logger.error(f'Error looking at job controller pid file: {e}')
2315
- pass
2316
-
2317
- found = _local_api_server_running(kill=True)
2318
-
2319
- # Remove the database for requests.
2320
- server_common.clear_local_api_server_database()
2298
+ # Acquire the api server creation lock to prevent multiple processes from
2299
+ # stopping and starting the API server at the same time.
2300
+ with filelock.FileLock(
2301
+ os.path.expanduser(constants.API_SERVER_CREATION_LOCK_PATH)):
2302
+ try:
2303
+ with open(os.path.expanduser(scheduler.JOB_CONTROLLER_PID_PATH),
2304
+ 'r',
2305
+ encoding='utf-8') as f:
2306
+ pids = f.read().split('\n')[:-1]
2307
+ for pid in pids:
2308
+ if subprocess_utils.is_process_alive(int(pid.strip())):
2309
+ subprocess_utils.kill_children_processes(
2310
+ parent_pids=[int(pid.strip())], force=True)
2311
+ os.remove(os.path.expanduser(scheduler.JOB_CONTROLLER_PID_PATH))
2312
+ except FileNotFoundError:
2313
+ # its fine we will create it
2314
+ pass
2315
+ except Exception as e: # pylint: disable=broad-except
2316
+ # in case we get perm issues or something is messed up, just ignore
2317
+ # it and assume the process is dead
2318
+ logger.error(f'Error looking at job controller pid file: {e}')
2319
+ pass
2320
+
2321
+ found = _local_api_server_running(kill=True)
2321
2322
 
2322
2323
  if found:
2323
2324
  logger.info(f'{colorama.Fore.GREEN}SkyPilot API server stopped.'
@@ -1 +1 @@
1
- <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-3286453d56f3c0a0.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce361c6959bc2001.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js" defer=""></script><script src="/dashboard/_next/static/Vg53Kzbf7u4o6fYPeOHMe/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/Vg53Kzbf7u4o6fYPeOHMe/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"statusCode":404}},"page":"/_error","query":{},"buildId":"Vg53Kzbf7u4o6fYPeOHMe","assetPrefix":"/dashboard","nextExport":true,"isFallback":false,"gip":true,"scriptLoader":[]}</script></body></html>
1
+ <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-6a5ddd0184bfa22c.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce361c6959bc2001.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js" defer=""></script><script src="/dashboard/_next/static/hIViZcQBkn0HE8SpaSsUU/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/hIViZcQBkn0HE8SpaSsUU/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"statusCode":404}},"page":"/_error","query":{},"buildId":"hIViZcQBkn0HE8SpaSsUU","assetPrefix":"/dashboard","nextExport":true,"isFallback":false,"gip":true,"scriptLoader":[]}</script></body></html>
@@ -0,0 +1,11 @@
1
+ "use strict";(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[1141],{99333:function(e,s,r){r.d(s,{Z:function(){return t}});/**
2
+ * @license lucide-react v0.407.0 - ISC
3
+ *
4
+ * This source code is licensed under the ISC license.
5
+ * See the LICENSE file in the root directory of this source tree.
6
+ */let t=(0,r(60998).Z)("Save",[["path",{d:"M15.2 3a2 2 0 0 1 1.4.6l3.8 3.8a2 2 0 0 1 .6 1.4V19a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V5a2 2 0 0 1 2-2z",key:"1c8476"}],["path",{d:"M17 21v-7a1 1 0 0 0-1-1H8a1 1 0 0 0-1 1v7",key:"1ydtos"}],["path",{d:"M7 3v4a1 1 0 0 0 1 1h7",key:"t51u73"}]])},98418:function(e,s,r){r.d(s,{Z:function(){return t}});/**
7
+ * @license lucide-react v0.407.0 - ISC
8
+ *
9
+ * This source code is licensed under the ISC license.
10
+ * See the LICENSE file in the root directory of this source tree.
11
+ */let t=(0,r(60998).Z)("Trash",[["path",{d:"M3 6h18",key:"d0wm0j"}],["path",{d:"M19 6v14c0 1-1 2-2 2H7c-1 0-2-1-2-2V6",key:"4alrt4"}],["path",{d:"M8 6V4c0-1 1-2 2-2h4c1 0 2 1 2 2v2",key:"v07s0e"}]])},1812:function(e,s,r){r.d(s,{X:function(){return n}});var t=r(85893),a=r(67294);let l=e=>{if(!(null==e?void 0:e.message))return"An unexpected error occurred.";let s=e.message;return s.includes("failed:")&&(s=s.split("failed:")[1].trim()),s},n=e=>{let{error:s,title:r="Error",onDismiss:n}=e,[c,i]=(0,a.useState)(!1);if((0,a.useEffect)(()=>{s&&i(!1)},[s]),!s||c)return null;let o="string"==typeof s?s:l(s);return(0,t.jsx)("div",{className:"bg-red-50 border border-red-200 rounded-md p-3 mb-4",children:(0,t.jsxs)("div",{className:"flex items-center justify-between",children:[(0,t.jsxs)("div",{className:"flex",children:[(0,t.jsx)("div",{className:"flex-shrink-0",children:(0,t.jsx)("svg",{className:"h-5 w-5 text-red-400",viewBox:"0 0 20 20",fill:"currentColor",children:(0,t.jsx)("path",{fillRule:"evenodd",d:"M10 18a8 8 0 100-16 8 8 0 000 16zM8.707 7.293a1 1 0 00-1.414 1.414L8.586 10l-1.293 1.293a1 1 0 101.414 1.414L10 11.414l1.293 1.293a1 1 0 001.414-1.414L11.414 10l1.293-1.293a1 1 0 00-1.414-1.414L10 8.586 8.707 7.293z",clipRule:"evenodd"})})}),(0,t.jsx)("div",{className:"ml-3",children:(0,t.jsxs)("div",{className:"text-sm text-red-800",children:[(0,t.jsxs)("strong",{children:[r,":"]})," ",o]})})]}),(0,t.jsx)("button",{onClick:()=>{i(!0),n&&n()},className:"flex-shrink-0 ml-4 text-red-400 hover:text-red-600 focus:outline-none focus:ring-2 focus:ring-red-500 focus:ring-offset-2 focus:ring-offset-red-50 rounded","aria-label":"Dismiss error",children:(0,t.jsx)("svg",{className:"h-4 w-4",viewBox:"0 0 20 20",fill:"currentColor",children:(0,t.jsx)("path",{fillRule:"evenodd",d:"M4.293 4.293a1 1 0 011.414 0L10 8.586l4.293-4.293a1 1 0 111.414 1.414L11.414 10l4.293 4.293a1 1 0 01-1.414 1.414L10 11.414l-4.293 4.293a1 1 0 01-1.414-1.414L8.586 10 4.293 5.707a1 1 0 010-1.414z",clipRule:"evenodd"})})})]})})}},69123:function(e,s,r){r.d(s,{g:function(){return n}});var t=r(85893),a=r(67294),l=r(32350);let n=a.forwardRef((e,s)=>{let{className:r,...a}=e;return(0,t.jsx)("textarea",{className:(0,l.cn)("flex min-h-[80px] w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50",r),ref:s,...a})});n.displayName="Textarea"},11141:function(e,s,r){r.r(s),r.d(s,{WorkspaceEditor:function(){return R}});var t=r(85893),a=r(67294),l=r(11163),n=r(17324),c=r(23266),i=r(68969);r(6135);var o=r(41664),d=r.n(o),u=r(9008),x=r.n(u),m=r(37673),h=r(30803),f=r(69123),g=r(55739),p=r(70282),b=r(6021),j=r(13626),y=r(98418),N=r(99333),v=r(50326);let k=e=>{let{className:s="",variant:r="default",children:a,...l}=e;return(0,t.jsx)("div",{role:"alert",className:"".concat("relative w-full rounded-lg border p-4 flex items-start space-x-2"," ").concat({default:"bg-blue-50 border-blue-200 text-blue-800",destructive:"bg-red-50 border-red-200 text-red-800"}[r]," ").concat(s),...l,children:a})},w=e=>{let{className:s="",children:r,...a}=e;return(0,t.jsx)("div",{className:"text-sm leading-relaxed ".concat(s),...a,children:r})};var C=r(53850),L=r(1812),E=r(23015),S=r(1272),W=r(93225),A=r(53081);let Z=e=>{let{message:s}=e;return s?(0,t.jsxs)(k,{className:"border-green-200 bg-green-50",children:[(0,t.jsx)(p.Z,{className:"h-4 w-4 text-green-600"}),(0,t.jsx)(w,{className:"text-green-800",children:s})]}):null},D=e=>{let{workspaceName:s,config:r,enabledClouds:a=[]}=e;if(!r)return null;let l="default"===s,n=0===Object.keys(r).length;if(l&&n)return(0,t.jsx)("div",{className:"text-sm text-gray-500 mb-3 italic p-3 bg-sky-50 rounded border border-sky-200",children:"Workspace 'default' can use all accessible infrastructure."});let c=[],i=[],o=[],d=new Set(a.map(e=>e.toLowerCase()));Object.entries(r).forEach(e=>{let[s,r]=e;if("private"===s||"allowed_users"===s)return;let a=W.Z2[s.toLowerCase()]||s.toUpperCase(),l=null==a?void 0:a.toLowerCase(),n=d.has(l)||Array.from(d).some(e=>e.startsWith(l+"/")),u=()=>"kubernetes"===s.toLowerCase()?Array.from(d).filter(e=>e.startsWith(l+"/")).map(e=>e.split("/")[1]):[];if((null==r?void 0:r.disabled)===!0)i.push(a);else if(r&&Object.keys(r).length>0){let e="";if("gcp"===s.toLowerCase()&&r.project_id)e=" (Project ID: ".concat(r.project_id,")");else if("aws"===s.toLowerCase()&&r.region)e=" (Region: ".concat(r.region,")");else if("kubernetes"===s.toLowerCase()){let s=u();s.length>0&&(e=" (Contexts: ".concat(s.join(", "),")"))}n?c.push((0,t.jsxs)("span",{className:"block",children:[a,e," is enabled."]},"".concat(s,"-enabled"))):o.push((0,t.jsxs)("span",{className:"block text-amber-700",children:[a,e," is configured but not currently available."]},"".concat(s,"-configured-not-enabled")))}else if(n){let e="";if("kubernetes"===s.toLowerCase()){let s=u();s.length>0&&(e=" (Contexts: ".concat(s.join(", "),")"))}c.push((0,t.jsxs)("span",{className:"block",children:[a,e," is enabled (using default settings)."]},"".concat(s,"-default-enabled")))}else o.push((0,t.jsxs)("span",{className:"block text-amber-700",children:[a," is configured but not currently available."]},"".concat(s,"-default-not-enabled")))});let u=[];if(i.length>0){let e=i.join(" and ");u.push((0,t.jsxs)("span",{className:"block",children:[e," ",1===i.length?"is":"are"," explicitly disabled."]},"disabled-clouds"))}return(u.push(...c),u.push(...o),u.length>0)?(0,t.jsx)("div",{className:"text-sm text-gray-700 mb-3 p-3 bg-sky-50 rounded border border-sky-200",children:u}):!l&&n?(0,t.jsx)("div",{className:"text-sm text-gray-500 mb-3 italic p-3 bg-sky-50 rounded border border-sky-200",children:"This workspace has no specific cloud resource configurations and can use all accessible infrastructure."}):null},M=e=>{let{isPrivate:s}=e;return s?(0,t.jsx)("span",{className:"inline-flex items-center px-2 py-1 rounded-full text-xs font-medium bg-gray-100 text-gray-700 border border-gray-300",children:"Private"}):(0,t.jsx)("span",{className:"inline-flex items-center px-2 py-1 rounded-full text-xs font-medium bg-green-100 text-green-700 border border-green-300",children:"Public"})},P=e=>{let{workspaceConfig:s,allUsers:r}=e;if(!s.private)return null;let a=s.allowed_users||[],l=(r||[]).filter(e=>"admin"===e.role).map(e=>e.username),n=[...new Set([...a,...l])];return 0===n.length?(0,t.jsxs)("div",{className:"mt-4",children:[(0,t.jsx)("h4",{className:"mb-2 text-xs text-gray-500 tracking-wider",children:"Allowed Users (0)"}),(0,t.jsx)("div",{className:"text-amber-600 text-xs italic p-2 bg-amber-50 rounded border border-amber-200",children:"No users configured (workspace may be inaccessible)"})]}):(0,t.jsxs)("div",{className:"mt-4",children:[(0,t.jsxs)("h4",{className:"mb-2 text-xs text-gray-500 tracking-wider",children:["Allowed Users (",n.length,")"]}),(0,t.jsx)("div",{className:"space-y-1 max-h-48 overflow-y-auto border border-gray-200 rounded",children:n.map(e=>{let s=l.includes(e);return(0,t.jsxs)("div",{className:"flex items-center justify-between text-xs p-2 bg-gray-50 hover:bg-gray-100 border-b border-gray-100 last:border-b-0",children:[(0,t.jsx)("span",{className:"font-medium text-gray-700",children:e}),s?(0,t.jsxs)("span",{className:"inline-flex items-center text-blue-600",children:[(0,t.jsx)(C.r7,{className:"w-3 h-3 mr-1"}),"Admin"]}):(0,t.jsxs)("span",{className:"inline-flex items-center text-gray-600",children:[(0,t.jsx)(b.Z,{className:"w-3 h-3 mr-1"}),"User"]})]},e)})})]})};function R(e){let{workspaceName:s,isNewWorkspace:r=!1}=e,o=(0,l.useRouter)(),[u,p]=(0,a.useState)({}),[b,k]=(0,a.useState)({}),[w,W]=(0,a.useState)(""),[R,_]=(0,a.useState)(!0),[z,O]=(0,a.useState)(!1),[U,T]=(0,a.useState)(!1),[Y,J]=(0,a.useState)(null),[I,V]=(0,a.useState)(null),[F,H]=(0,a.useState)(null),[B,X]=(0,a.useState)([]),[G,q]=(0,a.useState)({showDialog:!1,deleting:!1,error:null}),[K,Q]=(0,a.useState)({totalClusterCount:0,runningClusterCount:0,managedJobsCount:0,clouds:[]}),[$,ee]=(0,a.useState)(!1),es=(0,a.useCallback)(async()=>{_(!0),J(null);try{let e;let[r,t]=await Promise.all([(0,n.getWorkspaces)(),(0,A.R)()]),a=r[s]||{};p(a),k(a),X(t||[]),e=0===Object.keys(a).length?"".concat(s,":\n # Empty workspace configuration - uses all accessible infrastructure\n"):S.ZP.dump({[s]:a},{indent:2,lineWidth:-1,noRefs:!0,skipInvalid:!0,flowLevel:-1}),W(e)}catch(e){console.error("Error fetching workspace config:",e),J(e)}finally{_(!1)}},[s]),er=(0,a.useCallback)(async()=>{if(!r){ee(!0);try{let[e,r,t]=await Promise.all([(0,c.getClusters)(),(0,i.getManagedJobs)({allUsers:!0}),(0,n.getEnabledClouds)(s,!0)]),a=e.filter(e=>(e.workspace||"default")===s),l=a.filter(e=>"RUNNING"===e.status||"LAUNCHING"===e.status),o={};e.forEach(e=>{o[e.cluster]=e.workspace||"default"});let d=r.jobs||[],u=new Set(E.statusGroups.active),x=0;d.forEach(e=>{let r=e.cluster_name||e.resources&&e.resources.cluster_name;r&&o[r]===s&&u.has(e.status)&&x++}),Q({totalClusterCount:a.length,runningClusterCount:l.length,managedJobsCount:x,clouds:Array.isArray(t)?t:[]})}catch(e){console.error("Failed to fetch workspace stats:",e)}finally{ee(!1)}}},[s,r]);(0,a.useEffect)(()=>{r?(_(!1),W("".concat(s,":\n # New workspace configuration\n # Leave empty to use all accessible infrastructure\n"))):(es(),er())},[s,r,es,er]),(0,a.useEffect)(()=>{T(JSON.stringify(u)!==JSON.stringify(b))},[u,b]);let et=e=>{W(e),H(null);try{let r=S.ZP.load(e)||{},t=Object.keys(r);if(0===t.length)p({});else if(1===t.length){let e=t[0];if(e!==s){H('Workspace name cannot be changed. Expected "'.concat(s,'" but found "').concat(e,'".'));return}let a=r[s]||{};p(a)}else H("Configuration must contain only one workspace. Found: ".concat(t.join(", ")))}catch(e){H("Invalid YAML: ".concat(e.message))}},ea=async()=>{O(!0),J(null),V(null);try{if(F)throw Error("Please fix YAML errors before saving");let e=S.ZP.load(w)||{},t=Object.keys(e);if(t.length>0&&t[0]!==s)throw Error('Workspace name cannot be changed. Expected "'.concat(s,'".'));r?(await (0,n.MB)(s,u),V("Workspace created successfully!"),setTimeout(()=>{o.push("/workspaces/".concat(s))},1500)):(await (0,n.eA)(s,u),V("Workspace updated successfully!"),k(u),er())}catch(e){console.error("Error saving workspace:",e),J(e)}finally{O(!1)}},el=async()=>{q(e=>({...e,deleting:!0,error:null}));try{await (0,n.zl)(s),V("Workspace deleted successfully!"),setTimeout(()=>{o.push("/workspaces")},1500)}catch(e){console.error("Error deleting workspace:",e),q(s=>({...s,deleting:!1,error:e}))}},en=()=>{q({showDialog:!1,deleting:!1,error:null})},ec=async()=>{await Promise.all([es(),er()])};if(!o.isReady)return(0,t.jsx)("div",{children:"Loading..."});let ei=r?"Create New Workspace | SkyPilot Dashboard":"Workspace: ".concat(s," | SkyPilot Dashboard");return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(x(),{children:(0,t.jsx)("title",{children:ei})}),(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)("div",{className:"flex items-center justify-between mb-4 h-5",children:[(0,t.jsxs)("div",{className:"text-base flex items-center",children:[(0,t.jsx)(d(),{href:"/workspaces",className:"text-sky-blue hover:underline",children:"Workspaces"}),(0,t.jsx)("span",{className:"mx-2 text-gray-500",children:"›"}),(0,t.jsx)(d(),{href:r?"/workspace/new":"/workspaces/".concat(s),className:"text-sky-blue hover:underline",children:r?"New Workspace":s}),U&&(0,t.jsx)("span",{className:"ml-3 px-2 py-1 bg-yellow-100 text-yellow-800 text-xs rounded",children:"Unsaved changes"})]}),(0,t.jsxs)("div",{className:"text-sm flex items-center",children:[(R||z||$)&&(0,t.jsxs)("div",{className:"flex items-center mr-4",children:[(0,t.jsx)(g.Z,{size:15,className:"mt-0"}),(0,t.jsx)("span",{className:"ml-2 text-gray-500",children:z?"Saving...":"Loading..."})]}),(0,t.jsxs)("div",{className:"flex items-center space-x-4",children:[!r&&(0,t.jsxs)("button",{onClick:ec,disabled:R||z||$,className:"text-sky-blue hover:text-sky-blue-bright font-medium inline-flex items-center",children:[(0,t.jsx)(j.Z,{className:"w-4 h-4 mr-1.5"}),"Refresh"]}),!r&&"default"!==s&&(0,t.jsxs)("button",{onClick:()=>q({...G,showDialog:!0}),disabled:G.deleting||z,className:"text-red-600 hover:text-red-700 font-medium inline-flex items-center",children:[(0,t.jsx)(y.Z,{className:"w-4 h-4 mr-1.5"}),"Delete"]})]})]})]}),R?(0,t.jsxs)("div",{className:"flex justify-center items-center py-12",children:[(0,t.jsx)(g.Z,{size:24,className:"mr-2"}),(0,t.jsx)("span",{className:"text-gray-500",children:"Loading workspace configuration..."})]}):(0,t.jsxs)("div",{className:"space-y-6",children:[(0,t.jsx)(L.X,{error:Y,title:"Error",onDismiss:()=>J(null)}),(0,t.jsx)(Z,{message:I}),(0,t.jsxs)("div",{className:"grid grid-cols-1 lg:grid-cols-3 gap-6",children:[!r&&(0,t.jsx)("div",{className:"lg:col-span-1",children:(0,t.jsxs)(m.Zb,{className:"h-full",children:[(0,t.jsx)(m.Ol,{children:(0,t.jsx)(m.ll,{className:"text-base font-normal",children:(0,t.jsxs)("div",{className:"flex items-center justify-between",children:[(0,t.jsxs)("div",{children:[(0,t.jsx)("span",{className:"font-semibold",children:"Workspace:"})," ",s]}),(0,t.jsx)(M,{isPrivate:!0===b.private})]})})}),(0,t.jsxs)(m.aY,{className:"text-sm pb-2 flex-1",children:[(0,t.jsxs)("div",{className:"py-2 flex items-center justify-between",children:[(0,t.jsxs)("div",{className:"flex items-center text-gray-600",children:[(0,t.jsx)(C.QT,{className:"w-4 h-4 mr-2 text-gray-500"}),(0,t.jsx)("span",{children:"Clusters (Running / Total)"})]}),(0,t.jsx)("span",{className:"font-normal text-gray-800",children:$?"...":"".concat(K.runningClusterCount," / ").concat(K.totalClusterCount)})]}),(0,t.jsxs)("div",{className:"py-2 flex items-center justify-between border-t border-gray-100",children:[(0,t.jsxs)("div",{className:"flex items-center text-gray-600",children:[(0,t.jsx)(C.Vp,{className:"w-4 h-4 mr-2 text-gray-500"}),(0,t.jsx)("span",{children:"Managed Jobs"})]}),(0,t.jsx)("span",{className:"font-normal text-gray-800",children:$?"...":K.managedJobsCount})]})]}),(0,t.jsxs)("div",{className:"px-6 pb-6 text-sm pt-3",children:[(0,t.jsx)("h4",{className:"mb-2 text-xs text-gray-500 tracking-wider",children:"Enabled Infra"}),(0,t.jsx)("div",{className:"flex flex-wrap gap-x-4 gap-y-1",children:$?(0,t.jsx)("span",{className:"text-gray-500",children:"Loading..."}):K.clouds.length>0?K.clouds.map(e=>(0,t.jsxs)("div",{className:"flex items-center text-gray-700",children:[(0,t.jsx)(C.Ye,{className:"w-3.5 h-3.5 mr-1.5 text-green-500"}),(0,t.jsx)("span",{children:e})]},e)):(0,t.jsx)("span",{className:"text-gray-500 italic",children:"No enabled infrastructure"})}),(0,t.jsx)("div",{className:"mt-4",children:(0,t.jsx)(D,{workspaceName:s,config:b,enabledClouds:K.clouds})}),(0,t.jsx)(P,{workspaceConfig:b,allUsers:B})]})]})}),(0,t.jsx)("div",{className:r?"lg:col-span-3":"lg:col-span-2",children:(0,t.jsxs)(m.Zb,{className:"h-full flex flex-col",children:[(0,t.jsx)(m.Ol,{children:(0,t.jsx)(m.ll,{className:"text-base font-normal",children:r?"New Workspace YAML":"Edit Workspace YAML"})}),(0,t.jsx)(m.aY,{className:"flex-1 flex flex-col",children:(0,t.jsxs)("div",{className:"space-y-4 flex-1 flex flex-col",children:[F&&(0,t.jsx)(L.X,{error:F,onDismiss:()=>H(null)}),(0,t.jsxs)("div",{className:"flex-1 flex flex-col",children:[(0,t.jsxs)("p",{className:"text-sm text-gray-600 mb-3",children:["Configure infra-specific settings for this workspace. Leave empty to use all accessible infrastructure. Refer to"," ",(0,t.jsx)("a",{href:"https://docs.skypilot.co/en/latest/admin/workspaces.html#configuration",target:"_blank",rel:"noopener noreferrer",className:"text-blue-600",children:"SkyPilot Docs"})," ","for more details."]}),(0,t.jsxs)("div",{className:"mb-4",children:[(0,t.jsx)("h4",{className:"text-sm font-medium text-gray-700 mb-2",children:"Example configuration:"}),(0,t.jsx)("div",{className:"p-3 bg-gray-50 border rounded-lg",children:(0,t.jsx)("pre",{className:"text-xs font-mono text-gray-600 whitespace-pre-wrap",children:"".concat(s||"my-workspace",":\n private: true\n allowed_users:\n - user1@mydomain.com\n - user2@mydomain.com\n gcp:\n project_id: xxx\n disabled: false\n kubernetes:\n allowed_contexts:\n - context-1")})})]}),(0,t.jsx)(f.g,{value:w,onChange:e=>et(e.target.value),className:"font-mono text-sm flex-1 resize-none",style:{minHeight:"350px"},spellCheck:!1,placeholder:"# Enter workspace configuration in YAML format"}),(0,t.jsx)("div",{className:"flex justify-end space-x-3 pt-3 border-gray-200",children:(0,t.jsxs)(h.z,{onClick:ea,disabled:z||F||R,className:"inline-flex items-center bg-sky-600 hover:bg-sky-700 text-white",children:[(0,t.jsx)(N.Z,{className:"w-4 h-4 mr-1.5"}),z?"Applying...":"Apply"]})})]})]})})]})})]})]}),(0,t.jsx)(v.Vq,{open:G.showDialog,onOpenChange:en,children:(0,t.jsxs)(v.cZ,{className:"sm:max-w-md",children:[(0,t.jsxs)(v.fK,{className:"",children:[(0,t.jsx)(v.$N,{children:"Delete Workspace"}),(0,t.jsxs)(v.Be,{children:['Are you sure you want to delete workspace "',s,'"? This action cannot be undone.']})]}),G.error&&(0,t.jsx)(L.X,{error:G.error,title:"Deletion Failed",onDismiss:()=>q(e=>({...e,error:null}))}),(0,t.jsxs)(v.cN,{className:"",children:[(0,t.jsx)(h.z,{variant:"outline",onClick:en,disabled:G.deleting,children:"Cancel"}),(0,t.jsx)(h.z,{variant:"destructive",onClick:el,disabled:G.deleting,children:G.deleting?"Deleting...":"Delete"})]})]})})]})]})}}}]);