skypilot-nightly 1.0.0.dev20250922__py3-none-any.whl → 1.0.0.dev20250926__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (123) hide show
  1. sky/__init__.py +2 -2
  2. sky/backends/backend.py +10 -0
  3. sky/backends/backend_utils.py +207 -79
  4. sky/backends/cloud_vm_ray_backend.py +37 -13
  5. sky/backends/local_docker_backend.py +9 -0
  6. sky/client/cli/command.py +112 -53
  7. sky/client/common.py +4 -2
  8. sky/client/sdk.py +17 -7
  9. sky/client/sdk_async.py +4 -2
  10. sky/clouds/kubernetes.py +2 -1
  11. sky/clouds/runpod.py +20 -7
  12. sky/core.py +9 -54
  13. sky/dashboard/out/404.html +1 -1
  14. sky/dashboard/out/_next/static/{KP6HCNMqb_bnJB17oplgW → VXU6_xE28M55BOdwmUUJS}/_buildManifest.js +1 -1
  15. sky/dashboard/out/_next/static/chunks/1121-d0782b9251f0fcd3.js +1 -0
  16. sky/dashboard/out/_next/static/chunks/6856-2b3600ff2854d066.js +1 -0
  17. sky/dashboard/out/_next/static/chunks/8969-d8bc3a2b9cf839a9.js +1 -0
  18. sky/dashboard/out/_next/static/chunks/9037-d0c00018a5ba198c.js +6 -0
  19. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-ad77b12fc736dca3.js +16 -0
  20. sky/dashboard/out/_next/static/chunks/pages/clusters/{[cluster]-9525660179df3605.js → [cluster]-e052384df65ef200.js} +1 -1
  21. sky/dashboard/out/_next/static/chunks/{webpack-26167a9e6d91fa51.js → webpack-8e64d11e58eab5cb.js} +1 -1
  22. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  23. sky/dashboard/out/clusters/[cluster].html +1 -1
  24. sky/dashboard/out/clusters.html +1 -1
  25. sky/dashboard/out/config.html +1 -1
  26. sky/dashboard/out/index.html +1 -1
  27. sky/dashboard/out/infra/[context].html +1 -1
  28. sky/dashboard/out/infra.html +1 -1
  29. sky/dashboard/out/jobs/[job].html +1 -1
  30. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  31. sky/dashboard/out/jobs.html +1 -1
  32. sky/dashboard/out/users.html +1 -1
  33. sky/dashboard/out/volumes.html +1 -1
  34. sky/dashboard/out/workspace/new.html +1 -1
  35. sky/dashboard/out/workspaces/[name].html +1 -1
  36. sky/dashboard/out/workspaces.html +1 -1
  37. sky/data/mounting_utils.py +19 -10
  38. sky/execution.py +4 -2
  39. sky/global_user_state.py +271 -67
  40. sky/jobs/client/sdk.py +10 -1
  41. sky/jobs/constants.py +2 -0
  42. sky/jobs/controller.py +11 -7
  43. sky/jobs/server/core.py +5 -3
  44. sky/jobs/server/server.py +15 -11
  45. sky/jobs/utils.py +1 -1
  46. sky/logs/agent.py +30 -3
  47. sky/logs/aws.py +9 -19
  48. sky/provision/__init__.py +2 -1
  49. sky/provision/aws/instance.py +2 -1
  50. sky/provision/azure/instance.py +2 -1
  51. sky/provision/cudo/instance.py +2 -2
  52. sky/provision/do/instance.py +2 -2
  53. sky/provision/docker_utils.py +41 -19
  54. sky/provision/fluidstack/instance.py +2 -2
  55. sky/provision/gcp/instance.py +2 -1
  56. sky/provision/hyperbolic/instance.py +2 -1
  57. sky/provision/instance_setup.py +1 -1
  58. sky/provision/kubernetes/instance.py +134 -8
  59. sky/provision/lambda_cloud/instance.py +2 -1
  60. sky/provision/nebius/instance.py +2 -1
  61. sky/provision/oci/instance.py +2 -1
  62. sky/provision/paperspace/instance.py +2 -2
  63. sky/provision/primeintellect/instance.py +2 -2
  64. sky/provision/provisioner.py +1 -0
  65. sky/provision/runpod/__init__.py +2 -0
  66. sky/provision/runpod/instance.py +2 -2
  67. sky/provision/scp/instance.py +2 -2
  68. sky/provision/seeweb/instance.py +2 -1
  69. sky/provision/vast/instance.py +2 -1
  70. sky/provision/vsphere/instance.py +6 -5
  71. sky/schemas/api/responses.py +2 -1
  72. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  73. sky/serve/autoscalers.py +2 -0
  74. sky/serve/client/impl.py +45 -19
  75. sky/serve/replica_managers.py +12 -5
  76. sky/serve/serve_utils.py +5 -7
  77. sky/serve/server/core.py +9 -6
  78. sky/serve/server/impl.py +78 -25
  79. sky/serve/server/server.py +4 -5
  80. sky/serve/service_spec.py +33 -0
  81. sky/server/constants.py +1 -1
  82. sky/server/daemons.py +2 -3
  83. sky/server/requests/executor.py +56 -6
  84. sky/server/requests/payloads.py +32 -8
  85. sky/server/requests/preconditions.py +2 -3
  86. sky/server/rest.py +2 -0
  87. sky/server/server.py +28 -19
  88. sky/server/stream_utils.py +34 -12
  89. sky/setup_files/dependencies.py +5 -2
  90. sky/setup_files/setup.py +44 -44
  91. sky/skylet/constants.py +4 -1
  92. sky/skylet/events.py +42 -0
  93. sky/templates/jobs-controller.yaml.j2 +3 -0
  94. sky/templates/kubernetes-ray.yml.j2 +24 -18
  95. sky/usage/usage_lib.py +3 -0
  96. sky/utils/cli_utils/status_utils.py +4 -5
  97. sky/utils/context.py +104 -29
  98. sky/utils/controller_utils.py +7 -6
  99. sky/utils/db/db_utils.py +5 -1
  100. sky/utils/db/migration_utils.py +1 -1
  101. sky/utils/kubernetes/create_cluster.sh +13 -28
  102. sky/utils/kubernetes/delete_cluster.sh +10 -7
  103. sky/utils/kubernetes/generate_kind_config.py +6 -66
  104. sky/utils/kubernetes/kubernetes_deploy_utils.py +194 -38
  105. sky/utils/kubernetes_enums.py +5 -0
  106. sky/utils/ux_utils.py +35 -1
  107. sky/utils/yaml_utils.py +9 -0
  108. sky/volumes/client/sdk.py +44 -8
  109. sky/volumes/server/core.py +1 -0
  110. sky/volumes/server/server.py +33 -7
  111. sky/volumes/volume.py +35 -28
  112. {skypilot_nightly-1.0.0.dev20250922.dist-info → skypilot_nightly-1.0.0.dev20250926.dist-info}/METADATA +38 -33
  113. {skypilot_nightly-1.0.0.dev20250922.dist-info → skypilot_nightly-1.0.0.dev20250926.dist-info}/RECORD +118 -117
  114. sky/dashboard/out/_next/static/chunks/1121-4ff1ec0dbc5792ab.js +0 -1
  115. sky/dashboard/out/_next/static/chunks/6856-9a2538f38c004652.js +0 -1
  116. sky/dashboard/out/_next/static/chunks/8969-a39efbadcd9fde80.js +0 -1
  117. sky/dashboard/out/_next/static/chunks/9037-472ee1222cb1e158.js +0 -6
  118. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-1e9248ddbddcd122.js +0 -16
  119. /sky/dashboard/out/_next/static/{KP6HCNMqb_bnJB17oplgW → VXU6_xE28M55BOdwmUUJS}/_ssgManifest.js +0 -0
  120. {skypilot_nightly-1.0.0.dev20250922.dist-info → skypilot_nightly-1.0.0.dev20250926.dist-info}/WHEEL +0 -0
  121. {skypilot_nightly-1.0.0.dev20250922.dist-info → skypilot_nightly-1.0.0.dev20250926.dist-info}/entry_points.txt +0 -0
  122. {skypilot_nightly-1.0.0.dev20250922.dist-info → skypilot_nightly-1.0.0.dev20250926.dist-info}/licenses/LICENSE +0 -0
  123. {skypilot_nightly-1.0.0.dev20250922.dist-info → skypilot_nightly-1.0.0.dev20250926.dist-info}/top_level.txt +0 -0
@@ -189,6 +189,15 @@ class LocalDockerBackend(backends.Backend['LocalDockerResourceHandle']):
189
189
  ' a NoOp. If you are running sky exec, your workdir has not'
190
190
  ' been updated.')
191
191
 
192
+ def _download_file(self, handle: LocalDockerResourceHandle,
193
+ local_file_path: str, remote_file_path: str) -> None:
194
+ """Syncs file from remote to local."""
195
+ # Copy from docker container to local
196
+ container = self.containers[handle]
197
+ copy_cmd = (
198
+ f'docker cp {container.name}:{remote_file_path} {local_file_path}')
199
+ subprocess.run(copy_cmd, shell=True, check=True)
200
+
192
201
  def _sync_file_mounts(
193
202
  self,
194
203
  handle: LocalDockerResourceHandle,
sky/client/cli/command.py CHANGED
@@ -127,6 +127,7 @@ def _get_cluster_records_and_set_ssh_config(
127
127
  clusters: Optional[List[str]],
128
128
  refresh: common.StatusRefreshMode = common.StatusRefreshMode.NONE,
129
129
  all_users: bool = False,
130
+ verbose: bool = False,
130
131
  ) -> List[responses.StatusResponse]:
131
132
  """Returns a list of clusters that match the glob pattern.
132
133
 
@@ -144,7 +145,8 @@ def _get_cluster_records_and_set_ssh_config(
144
145
  request_id = sdk.status(clusters,
145
146
  refresh=refresh,
146
147
  all_users=all_users,
147
- _include_credentials=True)
148
+ _include_credentials=True,
149
+ _summary_response=not verbose)
148
150
  cluster_records = sdk.stream_and_get(request_id)
149
151
  # Update the SSH config for all clusters
150
152
  for record in cluster_records:
@@ -1858,7 +1860,7 @@ def status(verbose: bool, refresh: bool, ip: bool, endpoints: bool,
1858
1860
 
1859
1861
  # Phase 3: Get cluster records and handle special cases
1860
1862
  cluster_records = _get_cluster_records_and_set_ssh_config(
1861
- query_clusters, refresh_mode, all_users)
1863
+ query_clusters, refresh_mode, all_users, verbose)
1862
1864
 
1863
1865
  # TOOD(zhwu): setup the ssh config for status
1864
1866
  if ip or show_endpoints:
@@ -4184,6 +4186,13 @@ def volumes_apply(
4184
4186
 
4185
4187
  logger.debug(f'Volume config: {volume.to_yaml_config()}')
4186
4188
 
4189
+ # TODO(kevin): remove the try block in v0.13.0
4190
+ try:
4191
+ volumes_sdk.validate(volume)
4192
+ except exceptions.APINotSupportedError:
4193
+ # Do best-effort client-side validation.
4194
+ volume.validate(skip_cloud_compatibility=True)
4195
+
4187
4196
  if not yes:
4188
4197
  click.confirm(f'Proceed to create volume {volume.name!r}?',
4189
4198
  default=True,
@@ -4780,7 +4789,7 @@ def pool():
4780
4789
  @pool.command('apply', cls=_DocumentedCodeCommand)
4781
4790
  @flags.config_option(expose_value=False)
4782
4791
  @click.argument('pool_yaml',
4783
- required=True,
4792
+ required=False,
4784
4793
  type=str,
4785
4794
  nargs=-1,
4786
4795
  **_get_shell_complete_args(_complete_file_name))
@@ -4799,13 +4808,18 @@ def pool():
4799
4808
  'with rolling update. If "blue_green", cluster pool will '
4800
4809
  'be updated with blue-green update. This option is only '
4801
4810
  'valid when the pool is already running.'))
4811
+ @click.option('--workers',
4812
+ default=None,
4813
+ type=int,
4814
+ required=False,
4815
+ help='Can be used to update the number of workers in the pool.')
4802
4816
  @_add_click_options(flags.TASK_OPTIONS + flags.EXTRA_RESOURCES_OPTIONS +
4803
4817
  flags.COMMON_OPTIONS)
4804
4818
  @flags.yes_option()
4805
4819
  @timeline.event
4806
4820
  @usage_lib.entrypoint
4807
4821
  def jobs_pool_apply(
4808
- pool_yaml: Tuple[str, ...],
4822
+ pool_yaml: Optional[Tuple[str, ...]],
4809
4823
  pool: Optional[str], # pylint: disable=redefined-outer-name
4810
4824
  workdir: Optional[str],
4811
4825
  infra: Optional[str],
@@ -4827,60 +4841,80 @@ def jobs_pool_apply(
4827
4841
  disk_tier: Optional[str],
4828
4842
  network_tier: Optional[str],
4829
4843
  mode: str,
4844
+ workers: Optional[int],
4830
4845
  yes: bool,
4831
4846
  async_call: bool,
4832
4847
  ):
4833
- """Apply a config to a cluster pool for managed jobs submission.
4834
-
4835
- If the pool is already running, the config will be applied to the pool.
4836
- Otherwise, a new pool will be created.
4837
-
4838
- POOL_YAML must point to a valid YAML file.
4848
+ """Either apply a config to a cluster pool for managed jobs submission
4849
+ or update the number of workers in the pool. One of POOL_YAML or --workers
4850
+ must be provided.
4851
+ Config:
4852
+ If the pool is already running, the config will be applied to the pool.
4853
+ Otherwise, a new pool will be created.
4854
+ Workers:
4855
+ The --workers option can be used to override the number of workers
4856
+ specified in the YAML file, or to update workers without a YAML file.
4857
+ Example:
4858
+ sky jobs pool apply -p my-pool --workers 5
4839
4859
  """
4840
4860
  cloud, region, zone = _handle_infra_cloud_region_zone_options(
4841
4861
  infra, cloud, region, zone)
4842
- if pool is None:
4843
- pool = serve_lib.generate_service_name(pool=True)
4862
+ if workers is not None and pool_yaml is not None and len(pool_yaml) > 0:
4863
+ raise click.UsageError(
4864
+ 'Cannot specify both --workers and POOL_YAML. Please use one of '
4865
+ 'them.')
4844
4866
 
4845
- task = _generate_task_with_service(
4846
- service_name=pool,
4847
- service_yaml_args=pool_yaml,
4848
- workdir=workdir,
4849
- cloud=cloud,
4850
- region=region,
4851
- zone=zone,
4852
- gpus=gpus,
4853
- cpus=cpus,
4854
- memory=memory,
4855
- instance_type=instance_type,
4856
- num_nodes=num_nodes,
4857
- use_spot=use_spot,
4858
- image_id=image_id,
4859
- env_file=env_file,
4860
- env=env,
4861
- secret=secret,
4862
- disk_size=disk_size,
4863
- disk_tier=disk_tier,
4864
- network_tier=network_tier,
4865
- ports=ports,
4866
- not_supported_cmd='sky jobs pool up',
4867
- pool=True,
4868
- )
4869
- assert task.service is not None
4870
- if not task.service.pool:
4871
- raise click.UsageError('The YAML file needs a `pool` section.')
4872
- click.secho('Pool spec:', fg='cyan')
4873
- click.echo(task.service)
4874
- serve_lib.validate_service_task(task, pool=True)
4867
+ if pool_yaml is None or len(pool_yaml) == 0:
4868
+ if pool is None:
4869
+ raise click.UsageError(
4870
+ 'A pool name must be provided to update the number of workers.')
4871
+ task = None
4872
+ click.secho(f'Attempting to update {pool} to have {workers} workers',
4873
+ fg='cyan')
4874
+ else:
4875
+ if pool is None:
4876
+ pool = serve_lib.generate_service_name(pool=True)
4877
+
4878
+ task = _generate_task_with_service(
4879
+ service_name=pool,
4880
+ service_yaml_args=pool_yaml,
4881
+ workdir=workdir,
4882
+ cloud=cloud,
4883
+ region=region,
4884
+ zone=zone,
4885
+ gpus=gpus,
4886
+ cpus=cpus,
4887
+ memory=memory,
4888
+ instance_type=instance_type,
4889
+ num_nodes=num_nodes,
4890
+ use_spot=use_spot,
4891
+ image_id=image_id,
4892
+ env_file=env_file,
4893
+ env=env,
4894
+ secret=secret,
4895
+ disk_size=disk_size,
4896
+ disk_tier=disk_tier,
4897
+ network_tier=network_tier,
4898
+ ports=ports,
4899
+ not_supported_cmd='sky jobs pool up',
4900
+ pool=True,
4901
+ )
4902
+ assert task.service is not None
4903
+ if not task.service.pool:
4904
+ raise click.UsageError('The YAML file needs a `pool` section.')
4905
+ click.secho('Pool spec:', fg='cyan')
4906
+ click.echo(task.service)
4907
+ serve_lib.validate_service_task(task, pool=True)
4875
4908
 
4876
- click.secho(
4877
- 'Each pool worker will use the following resources (estimated):',
4878
- fg='cyan')
4879
- with dag_lib.Dag() as dag:
4880
- dag.add(task)
4909
+ click.secho(
4910
+ 'Each pool worker will use the following resources (estimated):',
4911
+ fg='cyan')
4912
+ with dag_lib.Dag() as dag:
4913
+ dag.add(task)
4881
4914
 
4882
4915
  request_id = managed_jobs.pool_apply(task,
4883
4916
  pool,
4917
+ workers=workers,
4884
4918
  mode=serve_lib.UpdateMode(mode),
4885
4919
  _need_confirmation=not yes)
4886
4920
  _async_call_or_wait(request_id, async_call, 'sky.jobs.pool_apply')
@@ -5487,6 +5521,8 @@ def serve_update(
5487
5521
  sky serve update --mode blue_green sky-service-16aa new_service.yaml
5488
5522
 
5489
5523
  """
5524
+ # TODO(lloyd-brown): Add a way to update number of replicas for serve
5525
+ # the way we did for pools.
5490
5526
  cloud, region, zone = _handle_infra_cloud_region_zone_options(
5491
5527
  infra, cloud, region, zone)
5492
5528
  task = _generate_task_with_service(
@@ -5868,19 +5904,33 @@ def local():
5868
5904
  '--context-name',
5869
5905
  type=str,
5870
5906
  required=False,
5871
- help='Name to use for the kubeconfig context. Defaults to "default".')
5907
+ help='Name to use for the kubeconfig context. Defaults to "default". '
5908
+ 'Used with the ip list.')
5872
5909
  @click.option('--password',
5873
5910
  type=str,
5874
5911
  required=False,
5875
5912
  help='Password for the ssh-user to execute sudo commands. '
5876
5913
  'Required only if passwordless sudo is not setup.')
5914
+ @click.option(
5915
+ '--name',
5916
+ type=str,
5917
+ required=False,
5918
+ help='Name of the cluster. Defaults to "skypilot". Used without ip list.')
5919
+ @click.option(
5920
+ '--port-start',
5921
+ type=int,
5922
+ required=False,
5923
+ help='Starting port range for the local kind cluster. Needs to be a '
5924
+ 'multiple of 100. If not given, a random range will be used. '
5925
+ 'Used without ip list.')
5877
5926
  @local.command('up', cls=_DocumentedCodeCommand)
5878
5927
  @flags.config_option(expose_value=False)
5879
5928
  @_add_click_options(flags.COMMON_OPTIONS)
5880
5929
  @usage_lib.entrypoint
5881
5930
  def local_up(gpus: bool, ips: str, ssh_user: str, ssh_key_path: str,
5882
5931
  cleanup: bool, context_name: Optional[str],
5883
- password: Optional[str], async_call: bool):
5932
+ password: Optional[str], name: Optional[str],
5933
+ port_start: Optional[int], async_call: bool):
5884
5934
  """Creates a local or remote cluster."""
5885
5935
 
5886
5936
  def _validate_args(ips, ssh_user, ssh_key_path, cleanup):
@@ -5926,17 +5976,26 @@ def local_up(gpus: bool, ips: str, ssh_user: str, ssh_key_path: str,
5926
5976
  f'Failed to read SSH key file {ssh_key_path}: {str(e)}')
5927
5977
 
5928
5978
  request_id = sdk.local_up(gpus, ip_list, ssh_user, ssh_key, cleanup,
5929
- context_name, password)
5979
+ context_name, password, name, port_start)
5930
5980
  _async_call_or_wait(request_id, async_call, request_name='local up')
5931
5981
 
5932
5982
 
5983
+ @click.option('--name',
5984
+ type=str,
5985
+ required=False,
5986
+ help='Name of the cluster to down. Defaults to "skypilot".')
5933
5987
  @local.command('down', cls=_DocumentedCodeCommand)
5934
5988
  @flags.config_option(expose_value=False)
5935
5989
  @_add_click_options(flags.COMMON_OPTIONS)
5936
5990
  @usage_lib.entrypoint
5937
- def local_down(async_call: bool):
5938
- """Deletes a local cluster."""
5939
- request_id = sdk.local_down()
5991
+ def local_down(name: Optional[str], async_call: bool):
5992
+ """Deletes a local cluster.
5993
+
5994
+ This will only delete a local cluster started without the ip list.
5995
+ To clean up the local cluster started with a ip list, use `sky local up`
5996
+ with the cleanup flag.
5997
+ """
5998
+ request_id = sdk.local_down(name)
5940
5999
  _async_call_or_wait(request_id, async_call, request_name='sky.local.down')
5941
6000
 
5942
6001
 
sky/client/common.py CHANGED
@@ -44,8 +44,10 @@ logger = sky_logging.init_logger(__name__)
44
44
  _DOWNLOAD_CHUNK_BYTES = 8192
45
45
  # The chunk size for the zip file to be uploaded to the API server. We split
46
46
  # the zip file into chunks to avoid network issues for large request body that
47
- # can be caused by NGINX's client_max_body_size.
48
- _UPLOAD_CHUNK_BYTES = 512 * 1024 * 1024
47
+ # can be caused by NGINX's client_max_body_size or Cloudflare's upload limit.
48
+ # As of 09/25/2025, the upload limit for Cloudflare's free plan is 100MiB:
49
+ # https://developers.cloudflare.com/support/troubleshooting/http-status-codes/4xx-client-error/error-413/
50
+ _UPLOAD_CHUNK_BYTES = 100 * 1024 * 1024
49
51
 
50
52
  FILE_UPLOAD_LOGS_DIR = os.path.join(constants.SKY_LOGS_DIRECTORY,
51
53
  'file_uploads')
sky/client/sdk.py CHANGED
@@ -1429,6 +1429,7 @@ def status(
1429
1429
  all_users: bool = False,
1430
1430
  *,
1431
1431
  _include_credentials: bool = False,
1432
+ _summary_response: bool = False,
1432
1433
  ) -> server_common.RequestId[List[responses.StatusResponse]]:
1433
1434
  """Gets cluster statuses.
1434
1435
 
@@ -1514,6 +1515,7 @@ def status(
1514
1515
  refresh=refresh,
1515
1516
  all_users=all_users,
1516
1517
  include_credentials=_include_credentials,
1518
+ summary_response=_summary_response,
1517
1519
  )
1518
1520
  response = server_common.make_authenticated_request(
1519
1521
  'POST', '/status', json=json.loads(body.model_dump_json()))
@@ -1675,7 +1677,9 @@ def local_up(gpus: bool,
1675
1677
  ssh_key: Optional[str],
1676
1678
  cleanup: bool,
1677
1679
  context_name: Optional[str] = None,
1678
- password: Optional[str] = None) -> server_common.RequestId[None]:
1680
+ password: Optional[str] = None,
1681
+ name: Optional[str] = None,
1682
+ port_start: Optional[int] = None) -> server_common.RequestId[None]:
1679
1683
  """Launches a Kubernetes cluster on local machines.
1680
1684
 
1681
1685
  Returns:
@@ -1686,8 +1690,8 @@ def local_up(gpus: bool,
1686
1690
  # TODO: move this check to server.
1687
1691
  if not server_common.is_api_server_local():
1688
1692
  with ux_utils.print_exception_no_traceback():
1689
- raise ValueError(
1690
- 'sky local up is only supported when running SkyPilot locally.')
1693
+ raise ValueError('`sky local up` is only supported when '
1694
+ 'running SkyPilot locally.')
1691
1695
 
1692
1696
  body = payloads.LocalUpBody(gpus=gpus,
1693
1697
  ips=ips,
@@ -1695,7 +1699,9 @@ def local_up(gpus: bool,
1695
1699
  ssh_key=ssh_key,
1696
1700
  cleanup=cleanup,
1697
1701
  context_name=context_name,
1698
- password=password)
1702
+ password=password,
1703
+ name=name,
1704
+ port_start=port_start)
1699
1705
  response = server_common.make_authenticated_request(
1700
1706
  'POST', '/local_up', json=json.loads(body.model_dump_json()))
1701
1707
  return server_common.get_request_id(response)
@@ -1704,16 +1710,19 @@ def local_up(gpus: bool,
1704
1710
  @usage_lib.entrypoint
1705
1711
  @server_common.check_server_healthy_or_start
1706
1712
  @annotations.client_api
1707
- def local_down() -> server_common.RequestId[None]:
1713
+ def local_down(name: Optional[str]) -> server_common.RequestId[None]:
1708
1714
  """Tears down the Kubernetes cluster started by local_up."""
1709
1715
  # We do not allow local up when the API server is running remotely since it
1710
1716
  # will modify the kubeconfig.
1711
1717
  # TODO: move this check to remote server.
1712
1718
  if not server_common.is_api_server_local():
1713
1719
  with ux_utils.print_exception_no_traceback():
1714
- raise ValueError('sky local down is only supported when running '
1720
+ raise ValueError('`sky local down` is only supported when running '
1715
1721
  'SkyPilot locally.')
1716
- response = server_common.make_authenticated_request('POST', '/local_down')
1722
+
1723
+ body = payloads.LocalDownBody(name=name)
1724
+ response = server_common.make_authenticated_request(
1725
+ 'POST', '/local_down', json=json.loads(body.model_dump_json()))
1717
1726
  return server_common.get_request_id(response)
1718
1727
 
1719
1728
 
@@ -2083,6 +2092,7 @@ def stream_and_get(
2083
2092
  return stream_response(request_id,
2084
2093
  response,
2085
2094
  output_stream,
2095
+ resumable=True,
2086
2096
  get_result=follow)
2087
2097
 
2088
2098
 
sky/client/sdk_async.py CHANGED
@@ -661,13 +661,14 @@ async def local_up(
661
661
  ssh_key: Optional[str],
662
662
  cleanup: bool,
663
663
  context_name: Optional[str] = None,
664
+ name: Optional[str] = None,
664
665
  password: Optional[str] = None,
665
666
  stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG) -> None:
666
667
  """Async version of local_up() that launches a Kubernetes cluster on
667
668
  local machines."""
668
669
  request_id = await context_utils.to_thread(sdk.local_up, gpus, ips,
669
670
  ssh_user, ssh_key, cleanup,
670
- context_name, password)
671
+ context_name, name, password)
671
672
  if stream_logs is not None:
672
673
  return await _stream_and_get(request_id, stream_logs)
673
674
  else:
@@ -677,10 +678,11 @@ async def local_up(
677
678
  @usage_lib.entrypoint
678
679
  @annotations.client_api
679
680
  async def local_down(
681
+ name: Optional[str] = None,
680
682
  stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG) -> None:
681
683
  """Async version of local_down() that tears down the Kubernetes cluster
682
684
  started by local_up."""
683
- request_id = await context_utils.to_thread(sdk.local_down)
685
+ request_id = await context_utils.to_thread(sdk.local_down, name)
684
686
  if stream_logs is not None:
685
687
  return await _stream_and_get(request_id, stream_logs)
686
688
  else:
sky/clouds/kubernetes.py CHANGED
@@ -62,6 +62,7 @@ class Kubernetes(clouds.Cloud):
62
62
  _SUPPORTS_SERVICE_ACCOUNT_ON_REMOTE = True
63
63
 
64
64
  _DEFAULT_NUM_VCPUS = 2
65
+ _DEFAULT_NUM_VCPUS_WITH_GPU = 4
65
66
  _DEFAULT_MEMORY_CPU_RATIO = 1
66
67
  _DEFAULT_MEMORY_CPU_RATIO_WITH_GPU = 4 # Allocate more memory for GPU tasks
67
68
  _REPR = 'Kubernetes'
@@ -842,7 +843,7 @@ class Kubernetes(clouds.Cloud):
842
843
 
843
844
  gpu_task_cpus = k8s_instance_type.cpus
844
845
  if resources.cpus is None:
845
- gpu_task_cpus = gpu_task_cpus * acc_count
846
+ gpu_task_cpus = self._DEFAULT_NUM_VCPUS_WITH_GPU * acc_count
846
847
  # Special handling to bump up memory multiplier for GPU instances
847
848
  gpu_task_memory = (float(resources.memory.strip('+')) if
848
849
  resources.memory is not None else gpu_task_cpus *
sky/clouds/runpod.py CHANGED
@@ -286,14 +286,16 @@ class RunPod(clouds.Cloud):
286
286
  @classmethod
287
287
  def _check_credentials(cls) -> Tuple[bool, Optional[str]]:
288
288
  """Verify that the user has valid credentials for RunPod. """
289
- dependency_error_msg = ('Failed to import runpod. '
290
- 'To install, run: pip install skypilot[runpod]')
289
+ dependency_error_msg = ('Failed to import runpod or TOML parser. '
290
+ 'Install: pip install "skypilot[runpod]".')
291
291
  try:
292
292
  runpod_spec = import_lib_util.find_spec('runpod')
293
293
  if runpod_spec is None:
294
294
  return False, dependency_error_msg
295
- toml_spec = import_lib_util.find_spec('toml')
296
- if toml_spec is None:
295
+ # Prefer stdlib tomllib (Python 3.11+); fallback to tomli
296
+ tomllib_spec = import_lib_util.find_spec('tomllib')
297
+ tomli_spec = import_lib_util.find_spec('tomli')
298
+ if tomllib_spec is None and tomli_spec is None:
297
299
  return False, dependency_error_msg
298
300
  except ValueError:
299
301
  # docstring of importlib_util.find_spec:
@@ -322,9 +324,20 @@ class RunPod(clouds.Cloud):
322
324
  if not os.path.exists(credential_file):
323
325
  return False, '~/.runpod/config.toml does not exist.'
324
326
 
325
- # we don't need to import toml here if config.toml does not exist,
326
- # wait until we know the cred file exists.
327
- import tomli as toml # pylint: disable=import-outside-toplevel
327
+ # We don't need to import TOML parser if config.toml does not exist.
328
+ # When needed, prefer stdlib tomllib (py>=3.11); otherwise use tomli.
329
+ # TODO(andy): remove this fallback after dropping Python 3.10 support.
330
+ try:
331
+ try:
332
+ import tomllib as toml # pylint: disable=import-outside-toplevel
333
+ except ModuleNotFoundError: # py<3.11
334
+ import tomli as toml # pylint: disable=import-outside-toplevel
335
+ except ModuleNotFoundError:
336
+ # Should never happen. We already installed proper dependencies for
337
+ # different Python versions in setup_files/dependencies.py.
338
+ return False, (
339
+ '~/.runpod/config.toml exists but no TOML parser is available. '
340
+ 'Install tomli for Python < 3.11: pip install tomli.')
328
341
 
329
342
  # Check for default api_key
330
343
  try:
sky/core.py CHANGED
@@ -1,6 +1,4 @@
1
1
  """SDK functions for cluster/job management."""
2
- import os
3
- import shlex
4
2
  import typing
5
3
  from typing import Any, Dict, List, Optional, Tuple, Union
6
4
 
@@ -9,7 +7,6 @@ import colorama
9
7
  from sky import admin_policy
10
8
  from sky import backends
11
9
  from sky import catalog
12
- from sky import check as sky_check
13
10
  from sky import clouds
14
11
  from sky import dag as dag_lib
15
12
  from sky import data
@@ -31,7 +28,6 @@ from sky.schemas.api import responses
31
28
  from sky.skylet import autostop_lib
32
29
  from sky.skylet import constants
33
30
  from sky.skylet import job_lib
34
- from sky.skylet import log_lib
35
31
  from sky.usage import usage_lib
36
32
  from sky.utils import admin_policy_utils
37
33
  from sky.utils import common
@@ -102,6 +98,7 @@ def status(
102
98
  refresh: common.StatusRefreshMode = common.StatusRefreshMode.NONE,
103
99
  all_users: bool = False,
104
100
  include_credentials: bool = False,
101
+ summary_response: bool = False,
105
102
  ) -> List[responses.StatusResponse]:
106
103
  # NOTE(dev): Keep the docstring consistent between the Python API and CLI.
107
104
  """Gets cluster statuses.
@@ -181,7 +178,8 @@ def status(
181
178
  refresh=refresh,
182
179
  cluster_names=cluster_names,
183
180
  all_users=all_users,
184
- include_credentials=include_credentials)
181
+ include_credentials=include_credentials,
182
+ summary_response=summary_response)
185
183
 
186
184
  status_responses = []
187
185
  for cluster in clusters:
@@ -1301,7 +1299,9 @@ def local_up(gpus: bool,
1301
1299
  ssh_key: Optional[str],
1302
1300
  cleanup: bool,
1303
1301
  context_name: Optional[str] = None,
1304
- password: Optional[str] = None) -> None:
1302
+ password: Optional[str] = None,
1303
+ name: Optional[str] = None,
1304
+ port_start: Optional[int] = None) -> None:
1305
1305
  """Creates a local or remote cluster."""
1306
1306
 
1307
1307
  def _validate_args(ips, ssh_user, ssh_key, cleanup):
@@ -1331,57 +1331,12 @@ def local_up(gpus: bool,
1331
1331
  password)
1332
1332
  else:
1333
1333
  # Run local deployment (kind) if no remote args are specified
1334
- kubernetes_deploy_utils.deploy_local_cluster(gpus)
1334
+ kubernetes_deploy_utils.deploy_local_cluster(name, port_start, gpus)
1335
1335
 
1336
1336
 
1337
- def local_down() -> None:
1337
+ def local_down(name: Optional[str] = None) -> None:
1338
1338
  """Tears down the Kubernetes cluster started by local_up."""
1339
- cluster_removed = False
1340
-
1341
- path_to_package = os.path.dirname(__file__)
1342
- down_script_path = os.path.join(path_to_package, 'utils/kubernetes',
1343
- 'delete_cluster.sh')
1344
-
1345
- cwd = os.path.dirname(os.path.abspath(down_script_path))
1346
- run_command = shlex.split(down_script_path)
1347
-
1348
- # Setup logging paths
1349
- run_timestamp = sky_logging.get_run_timestamp()
1350
- log_path = os.path.join(constants.SKY_LOGS_DIRECTORY, run_timestamp,
1351
- 'local_down.log')
1352
-
1353
- with rich_utils.safe_status(
1354
- ux_utils.spinner_message('Removing local cluster',
1355
- log_path=log_path,
1356
- is_local=True)):
1357
-
1358
- returncode, stdout, stderr = log_lib.run_with_log(cmd=run_command,
1359
- log_path=log_path,
1360
- require_outputs=True,
1361
- stream_logs=False,
1362
- cwd=cwd)
1363
- stderr = stderr.replace('No kind clusters found.\n', '')
1364
-
1365
- if returncode == 0:
1366
- cluster_removed = True
1367
- elif returncode == 100:
1368
- logger.info(ux_utils.error_message('Local cluster does not exist.'))
1369
- else:
1370
- with ux_utils.print_exception_no_traceback():
1371
- raise RuntimeError('Failed to create local cluster. '
1372
- f'Stdout: {stdout}'
1373
- f'\nError: {stderr}')
1374
- if cluster_removed:
1375
- # Run sky check
1376
- with rich_utils.safe_status(
1377
- ux_utils.spinner_message('Running sky check...')):
1378
- sky_check.check_capability(sky_cloud.CloudCapability.COMPUTE,
1379
- clouds=['kubernetes'],
1380
- quiet=True)
1381
- logger.info(
1382
- ux_utils.finishing_message('Local cluster removed.',
1383
- log_path=log_path,
1384
- is_local=True))
1339
+ kubernetes_deploy_utils.teardown_local_cluster(name)
1385
1340
 
1386
1341
 
1387
1342
  @usage_lib.entrypoint
@@ -1 +1 @@
1
- <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-26167a9e6d91fa51.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce361c6959bc2001.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js" defer=""></script><script src="/dashboard/_next/static/KP6HCNMqb_bnJB17oplgW/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/KP6HCNMqb_bnJB17oplgW/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"statusCode":404}},"page":"/_error","query":{},"buildId":"KP6HCNMqb_bnJB17oplgW","assetPrefix":"/dashboard","nextExport":true,"isFallback":false,"gip":true,"scriptLoader":[]}</script></body></html>
1
+ <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-8e64d11e58eab5cb.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce361c6959bc2001.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js" defer=""></script><script src="/dashboard/_next/static/VXU6_xE28M55BOdwmUUJS/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/VXU6_xE28M55BOdwmUUJS/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"statusCode":404}},"page":"/_error","query":{},"buildId":"VXU6_xE28M55BOdwmUUJS","assetPrefix":"/dashboard","nextExport":true,"isFallback":false,"gip":true,"scriptLoader":[]}</script></body></html>
@@ -1 +1 @@
1
- self.__BUILD_MANIFEST=function(s,c,a,e,t,f,u,n,b,o,j,i,r,d){return{__rewrites:{afterFiles:[],beforeFiles:[],fallback:[]},"/":["static/chunks/pages/index-444f1804401f04ea.js"],"/_error":["static/chunks/pages/_error-c66a4e8afc46f17b.js"],"/clusters":["static/chunks/pages/clusters-469814d711d63b1b.js"],"/clusters/[cluster]":[s,c,a,f,u,"static/chunks/4676-9da7fdbde90b5549.js",o,e,t,n,j,b,i,"static/chunks/6856-9a2538f38c004652.js",r,d,"static/chunks/9037-472ee1222cb1e158.js","static/chunks/pages/clusters/[cluster]-9525660179df3605.js"],"/clusters/[cluster]/[job]":[s,c,a,f,e,t,b,"static/chunks/pages/clusters/[cluster]/[job]-1e9248ddbddcd122.js"],"/config":["static/chunks/pages/config-dfb9bf07b13045f4.js"],"/infra":["static/chunks/pages/infra-aabba60d57826e0f.js"],"/infra/[context]":["static/chunks/pages/infra/[context]-6563820e094f68ca.js"],"/jobs":["static/chunks/pages/jobs-1f70d9faa564804f.js"],"/jobs/pools/[pool]":[s,c,a,u,o,e,t,n,"static/chunks/pages/jobs/pools/[pool]-07349868f7905d37.js"],"/jobs/[job]":[s,c,a,f,u,o,e,t,n,b,"static/chunks/pages/jobs/[job]-dd64309c3fe67ed2.js"],"/users":["static/chunks/pages/users-018bf31cda52e11b.js"],"/volumes":["static/chunks/pages/volumes-739726d6b823f532.js"],"/workspace/new":["static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js"],"/workspaces":["static/chunks/pages/workspaces-7528cc0ef8c522c5.js"],"/workspaces/[name]":[s,c,a,f,u,"static/chunks/1836-37fede578e2da5f8.js",e,t,n,j,b,i,r,d,"static/chunks/1141-159df2d4c441a9d1.js","static/chunks/pages/workspaces/[name]-af76bb06dbb3954f.js"],sortedPages:["/","/_app","/_error","/clusters","/clusters/[cluster]","/clusters/[cluster]/[job]","/config","/infra","/infra/[context]","/jobs","/jobs/pools/[pool]","/jobs/[job]","/users","/volumes","/workspace/new","/workspaces","/workspaces/[name]"]}}("static/chunks/616-3d59f75e2ccf9321.js","static/chunks/6130-2be46d70a38f1e82.js","static/chunks/5739-d67458fcb1386c92.js","static/chunks/6989-01359c57e018caa4.js","static/chunks/3850-ff4a9a69d978632b.js","static/chunks/7411-b15471acd2cba716.js","static/chunks/1272-1ef0bf0237faccdb.js","static/chunks/8969-a39efbadcd9fde80.js","static/chunks/6135-4b4d5e824b7f9d3c.js","static/chunks/754-d0da8ab45f9509e9.js","static/chunks/6990-f6818c84ed8f1c86.js","static/chunks/1121-4ff1ec0dbc5792ab.js","static/chunks/6601-06114c982db410b6.js","static/chunks/3015-88c7c8d69b0b6dba.js"),self.__BUILD_MANIFEST_CB&&self.__BUILD_MANIFEST_CB();
1
+ self.__BUILD_MANIFEST=function(s,c,a,t,e,f,u,n,b,o,j,i,r,k){return{__rewrites:{afterFiles:[],beforeFiles:[],fallback:[]},"/":["static/chunks/pages/index-444f1804401f04ea.js"],"/_error":["static/chunks/pages/_error-c66a4e8afc46f17b.js"],"/clusters":["static/chunks/pages/clusters-469814d711d63b1b.js"],"/clusters/[cluster]":[s,c,a,f,u,"static/chunks/4676-9da7fdbde90b5549.js",o,t,e,n,j,b,i,"static/chunks/6856-2b3600ff2854d066.js",r,k,"static/chunks/9037-d0c00018a5ba198c.js","static/chunks/pages/clusters/[cluster]-e052384df65ef200.js"],"/clusters/[cluster]/[job]":[s,c,a,f,t,e,b,"static/chunks/pages/clusters/[cluster]/[job]-ad77b12fc736dca3.js"],"/config":["static/chunks/pages/config-dfb9bf07b13045f4.js"],"/infra":["static/chunks/pages/infra-aabba60d57826e0f.js"],"/infra/[context]":["static/chunks/pages/infra/[context]-6563820e094f68ca.js"],"/jobs":["static/chunks/pages/jobs-1f70d9faa564804f.js"],"/jobs/pools/[pool]":[s,c,a,u,o,t,e,n,"static/chunks/pages/jobs/pools/[pool]-07349868f7905d37.js"],"/jobs/[job]":[s,c,a,f,u,o,t,e,n,b,"static/chunks/pages/jobs/[job]-dd64309c3fe67ed2.js"],"/users":["static/chunks/pages/users-018bf31cda52e11b.js"],"/volumes":["static/chunks/pages/volumes-739726d6b823f532.js"],"/workspace/new":["static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js"],"/workspaces":["static/chunks/pages/workspaces-7528cc0ef8c522c5.js"],"/workspaces/[name]":[s,c,a,f,u,"static/chunks/1836-37fede578e2da5f8.js",t,e,n,j,b,i,r,k,"static/chunks/1141-159df2d4c441a9d1.js","static/chunks/pages/workspaces/[name]-af76bb06dbb3954f.js"],sortedPages:["/","/_app","/_error","/clusters","/clusters/[cluster]","/clusters/[cluster]/[job]","/config","/infra","/infra/[context]","/jobs","/jobs/pools/[pool]","/jobs/[job]","/users","/volumes","/workspace/new","/workspaces","/workspaces/[name]"]}}("static/chunks/616-3d59f75e2ccf9321.js","static/chunks/6130-2be46d70a38f1e82.js","static/chunks/5739-d67458fcb1386c92.js","static/chunks/6989-01359c57e018caa4.js","static/chunks/3850-ff4a9a69d978632b.js","static/chunks/7411-b15471acd2cba716.js","static/chunks/1272-1ef0bf0237faccdb.js","static/chunks/8969-d8bc3a2b9cf839a9.js","static/chunks/6135-4b4d5e824b7f9d3c.js","static/chunks/754-d0da8ab45f9509e9.js","static/chunks/6990-f6818c84ed8f1c86.js","static/chunks/1121-d0782b9251f0fcd3.js","static/chunks/6601-06114c982db410b6.js","static/chunks/3015-88c7c8d69b0b6dba.js"),self.__BUILD_MANIFEST_CB&&self.__BUILD_MANIFEST_CB();
@@ -0,0 +1 @@
1
+ "use strict";(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[1121],{50326:function(e,t,a){a.d(t,{$N:function(){return m},Be:function(){return h},Vq:function(){return c},cN:function(){return _},cZ:function(){return d},fK:function(){return f}});var r=a(85893),s=a(67294),o=a(6327),n=a(32350),l=a(43767);let c=o.fC;o.xz;let u=o.h_;o.x8;let i=s.forwardRef((e,t)=>{let{className:a,...s}=e;return(0,r.jsx)(o.aV,{ref:t,className:(0,n.cn)("fixed inset-0 z-50 bg-black/50 backdrop-blur-sm data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0",a),...s})});i.displayName=o.aV.displayName;let d=s.forwardRef((e,t)=>{let{className:a,children:s,...c}=e;return(0,r.jsxs)(u,{children:[(0,r.jsx)(i,{}),(0,r.jsxs)(o.VY,{ref:t,className:(0,n.cn)("fixed left-[50%] top-[50%] z-50 grid w-full max-w-lg translate-x-[-50%] translate-y-[-50%] gap-4 border border-gray-200 bg-white p-6 shadow-lg duration-200 data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[state=closed]:slide-out-to-left-1/2 data-[state=closed]:slide-out-to-top-[48%] data-[state=open]:slide-in-from-left-1/2 data-[state=open]:slide-in-from-top-[48%] sm:rounded-lg",a),...c,children:[s,(0,r.jsxs)(o.x8,{className:"absolute right-4 top-4 rounded-sm opacity-70 ring-offset-white transition-opacity hover:opacity-100 focus:outline-none focus:ring-2 focus:ring-gray-400 focus:ring-offset-2 disabled:pointer-events-none data-[state=open]:bg-gray-100 data-[state=open]:text-gray-500",children:[(0,r.jsx)(l.Z,{className:"h-4 w-4"}),(0,r.jsx)("span",{className:"sr-only",children:"Close"})]})]})]})});d.displayName=o.VY.displayName;let f=e=>{let{className:t,...a}=e;return(0,r.jsx)("div",{className:(0,n.cn)("flex flex-col space-y-1.5 text-center sm:text-left",t),...a})};f.displayName="DialogHeader";let _=e=>{let{className:t,...a}=e;return(0,r.jsx)("div",{className:(0,n.cn)("flex flex-col-reverse sm:flex-row sm:justify-end sm:space-x-2",t),...a})};_.displayName="DialogFooter";let m=s.forwardRef((e,t)=>{let{className:a,...s}=e;return(0,r.jsx)(o.Dx,{ref:t,className:(0,n.cn)("text-lg font-semibold leading-none tracking-tight",a),...s})});m.displayName=o.Dx.displayName;let h=s.forwardRef((e,t)=>{let{className:a,...s}=e;return(0,r.jsx)(o.dk,{ref:t,className:(0,n.cn)("text-sm text-gray-500",a),...s})});h.displayName=o.dk.displayName},23266:function(e,t,a){a.d(t,{GH:function(){return f},QL:function(){return m},Sl:function(){return d},getClusters:function(){return u},uR:function(){return i}});var r=a(67294),s=a(15821),o=a(47145),n=a(93225),l=a(6378);let c={UP:"RUNNING",STOPPED:"STOPPED",INIT:"LAUNCHING",null:"TERMINATED"};async function u(){let{clusterNames:e=null}=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{};try{return(await o.x.fetch("/status",{cluster_names:e,all_users:!0,include_credentials:!1,summary_response:null==e})).map(e=>{let t="",a=t=e.zone?e.zone:e.region;return t&&t.length>25&&(t=function(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:15;if(!e||e.length<=t)return e;if(t<=3)return"...";let a=Math.floor((t-3)/2),r=a+(t-3)%2;return 0===a?e.substring(0,r)+"...":e.substring(0,r)+"..."+e.substring(e.length-a)}(t,25)),{status:c[e.status],cluster:e.name,user:e.user_name,user_hash:e.user_hash,cluster_hash:e.cluster_hash,cloud:e.cloud,region:e.region,infra:t?e.cloud+" ("+t+")":e.cloud,full_infra:a?"".concat(e.cloud," (").concat(a,")"):e.cloud,cpus:e.cpus,mem:e.memory,gpus:e.accelerators,resources_str:e.resources_str,resources_str_full:e.resources_str_full,time:new Date(1e3*e.launched_at),num_nodes:e.nodes,workspace:e.workspace,autostop:e.autostop,last_event:e.last_event,to_down:e.to_down,cluster_name_on_cloud:e.cluster_name_on_cloud,jobs:[],command:e.last_creation_command||e.last_use,task_yaml:e.last_creation_yaml||"{}",events:[{time:new Date(1e3*e.launched_at),event:"Cluster created."}]}})}catch(e){return console.error("Error fetching clusters:",e),[]}}async function i(){let e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:null,t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:30;try{let a={days:t,dashboard_summary_response:!0};e&&(a.cluster_hashes=[e]);let r=await o.x.fetch("/cost_report",a);console.log("Raw cluster history data:",r);let s=r.map(e=>{let t="Unknown";e.cloud?t=e.cloud:e.resources&&e.resources.cloud&&(t=e.resources.cloud);let a=e.user_name||"-";return{status:e.status?c[e.status]:"TERMINATED",cluster:e.name,user:a,user_hash:e.user_hash,cluster_hash:e.cluster_hash,cloud:t,region:"",infra:t,full_infra:t,resources_str:e.resources_str,resources_str_full:e.resources_str_full,time:e.launched_at?new Date(1e3*e.launched_at):null,num_nodes:e.num_nodes||1,duration:e.duration,total_cost:e.total_cost,workspace:e.workspace||"default",autostop:-1,last_event:e.last_event,to_down:!1,cluster_name_on_cloud:null,usage_intervals:e.usage_intervals,command:e.last_creation_command||"",task_yaml:e.last_creation_yaml||"{}",events:[{time:e.launched_at?new Date(1e3*e.launched_at):new Date,event:"Cluster created."}]}});return console.log("Processed cluster history data:",s),s}catch(e){return console.error("Error fetching cluster history:",e),[]}}async function d(e){let{clusterName:t,jobId:a,onNewLog:r,workspace:n}=e;try{await o.x.stream("/logs",{follow:!1,cluster_name:t,job_id:a,tail:1e4,override_skypilot_config:{active_workspace:n||"default"}},r)}catch(e){console.error("Error in streamClusterJobLogs:",e),(0,s.C)("Error in streamClusterJobLogs: ".concat(e.message),"error")}}async function f(e){let{clusterName:t,jobIds:a=null,workspace:r}=e;try{let e=await o.x.fetch("/download_logs",{cluster_name:t,job_ids:a?a.map(String):null,override_skypilot_config:{active_workspace:r||"default"}}),l=Object.values(e||{});if(!l.length){(0,s.C)("No logs found to download.","warning");return}let c=window.location.origin,u="".concat(c).concat(n.f4,"/download"),i=await fetch("".concat(u,"?relative=items"),{method:"POST",headers:{"Content-Type":"application/json"},body:JSON.stringify({folder_paths:l})});if(!i.ok){let e=await i.text();throw Error("Download failed: ".concat(i.status," ").concat(e))}let d=await i.blob(),f=window.URL.createObjectURL(d),_=document.createElement("a"),m=new Date().toISOString().replace(/[:.]/g,"-"),h=a&&1===a.length?"job-".concat(a[0]):"jobs";_.href=f,_.download="".concat(t,"-").concat(h,"-logs-").concat(m,".zip"),document.body.appendChild(_),_.click(),_.remove(),window.URL.revokeObjectURL(f)}catch(e){console.error("Error downloading logs:",e),(0,s.C)("Error downloading logs: ".concat(e.message),"error")}}async function _(e){let{clusterName:t,workspace:a}=e;try{return(await o.x.fetch("/queue",{cluster_name:t,all_users:!0,override_skypilot_config:{active_workspace:a}})).map(e=>{var r;let s=e.end_at?e.end_at:Date.now()/1e3,o=0,n=0;return e.submitted_at&&(o=s-e.submitted_at),e.start_at&&(n=s-e.start_at),{id:e.job_id,status:e.status,job:e.job_name,user:e.username,user_hash:e.user_hash,gpus:e.accelerators||{},submitted_at:e.submitted_at?new Date(1e3*e.submitted_at):null,resources:e.resources,cluster:t,total_duration:o,job_duration:n,infra:"",logs:"",workspace:a||"default",git_commit:(null===(r=e.metadata)||void 0===r?void 0:r.git_commit)||"-"}})}catch(e){return console.error("Error fetching cluster jobs:",e),[]}}function m(e){let{cluster:t,job:a=null}=e,[s,o]=(0,r.useState)(null),[n,c]=(0,r.useState)(null),[i,d]=(0,r.useState)(!0),[f,m]=(0,r.useState)(!0),h=(0,r.useCallback)(async()=>{if(t)try{d(!0);let e=await l.default.get(u,[{clusterNames:[t]}]);return o(e[0]),e[0]}catch(e){console.error("Error fetching cluster data:",e)}finally{d(!1)}return null},[t]),g=(0,r.useCallback)(async e=>{if(t)try{m(!0);let a=await l.default.get(_,[{clusterName:t,workspace:e||"default"}]);c(a)}catch(e){console.error("Error fetching cluster job data:",e)}finally{m(!1)}},[t]),p=(0,r.useCallback)(async()=>{l.default.invalidate(u,[{clusterNames:[t]}]);let e=await h();e&&(l.default.invalidate(_,[{clusterName:t,workspace:e.workspace||"default"}]),await g(e.workspace))},[h,g,t]),w=(0,r.useCallback)(async()=>{s&&(l.default.invalidate(_,[{clusterName:t,workspace:s.workspace||"default"}]),await g(s.workspace))},[g,s,t]);return(0,r.useEffect)(()=>{(async()=>{let e=await h();e&&g(e.workspace)})()},[t,a,h,g]),{clusterData:s,clusterJobData:n,loading:i,clusterDetailsLoading:i,clusterJobsLoading:f,refreshData:p,refreshClusterJobsOnly:w}}},53081:function(e,t,a){a.d(t,{R:function(){return s}}),a(23266),a(68969);var r=a(47145);async function s(){try{let e=await r.x.get("/users");if(!e.ok)throw Error("HTTP error! status: ".concat(e.status));return(await e.json()).map(e=>({userId:e.id,username:e.name,role:e.role,created_at:e.created_at}))||[]}catch(e){return console.error("Failed to fetch users:",e),[]}}}}]);