skypilot-nightly 1.0.0.dev20250922__py3-none-any.whl → 1.0.0.dev20250926__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +2 -2
- sky/backends/backend.py +10 -0
- sky/backends/backend_utils.py +207 -79
- sky/backends/cloud_vm_ray_backend.py +37 -13
- sky/backends/local_docker_backend.py +9 -0
- sky/client/cli/command.py +112 -53
- sky/client/common.py +4 -2
- sky/client/sdk.py +17 -7
- sky/client/sdk_async.py +4 -2
- sky/clouds/kubernetes.py +2 -1
- sky/clouds/runpod.py +20 -7
- sky/core.py +9 -54
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/{KP6HCNMqb_bnJB17oplgW → VXU6_xE28M55BOdwmUUJS}/_buildManifest.js +1 -1
- sky/dashboard/out/_next/static/chunks/1121-d0782b9251f0fcd3.js +1 -0
- sky/dashboard/out/_next/static/chunks/6856-2b3600ff2854d066.js +1 -0
- sky/dashboard/out/_next/static/chunks/8969-d8bc3a2b9cf839a9.js +1 -0
- sky/dashboard/out/_next/static/chunks/9037-d0c00018a5ba198c.js +6 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-ad77b12fc736dca3.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/{[cluster]-9525660179df3605.js → [cluster]-e052384df65ef200.js} +1 -1
- sky/dashboard/out/_next/static/chunks/{webpack-26167a9e6d91fa51.js → webpack-8e64d11e58eab5cb.js} +1 -1
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/mounting_utils.py +19 -10
- sky/execution.py +4 -2
- sky/global_user_state.py +271 -67
- sky/jobs/client/sdk.py +10 -1
- sky/jobs/constants.py +2 -0
- sky/jobs/controller.py +11 -7
- sky/jobs/server/core.py +5 -3
- sky/jobs/server/server.py +15 -11
- sky/jobs/utils.py +1 -1
- sky/logs/agent.py +30 -3
- sky/logs/aws.py +9 -19
- sky/provision/__init__.py +2 -1
- sky/provision/aws/instance.py +2 -1
- sky/provision/azure/instance.py +2 -1
- sky/provision/cudo/instance.py +2 -2
- sky/provision/do/instance.py +2 -2
- sky/provision/docker_utils.py +41 -19
- sky/provision/fluidstack/instance.py +2 -2
- sky/provision/gcp/instance.py +2 -1
- sky/provision/hyperbolic/instance.py +2 -1
- sky/provision/instance_setup.py +1 -1
- sky/provision/kubernetes/instance.py +134 -8
- sky/provision/lambda_cloud/instance.py +2 -1
- sky/provision/nebius/instance.py +2 -1
- sky/provision/oci/instance.py +2 -1
- sky/provision/paperspace/instance.py +2 -2
- sky/provision/primeintellect/instance.py +2 -2
- sky/provision/provisioner.py +1 -0
- sky/provision/runpod/__init__.py +2 -0
- sky/provision/runpod/instance.py +2 -2
- sky/provision/scp/instance.py +2 -2
- sky/provision/seeweb/instance.py +2 -1
- sky/provision/vast/instance.py +2 -1
- sky/provision/vsphere/instance.py +6 -5
- sky/schemas/api/responses.py +2 -1
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/serve/autoscalers.py +2 -0
- sky/serve/client/impl.py +45 -19
- sky/serve/replica_managers.py +12 -5
- sky/serve/serve_utils.py +5 -7
- sky/serve/server/core.py +9 -6
- sky/serve/server/impl.py +78 -25
- sky/serve/server/server.py +4 -5
- sky/serve/service_spec.py +33 -0
- sky/server/constants.py +1 -1
- sky/server/daemons.py +2 -3
- sky/server/requests/executor.py +56 -6
- sky/server/requests/payloads.py +32 -8
- sky/server/requests/preconditions.py +2 -3
- sky/server/rest.py +2 -0
- sky/server/server.py +28 -19
- sky/server/stream_utils.py +34 -12
- sky/setup_files/dependencies.py +5 -2
- sky/setup_files/setup.py +44 -44
- sky/skylet/constants.py +4 -1
- sky/skylet/events.py +42 -0
- sky/templates/jobs-controller.yaml.j2 +3 -0
- sky/templates/kubernetes-ray.yml.j2 +24 -18
- sky/usage/usage_lib.py +3 -0
- sky/utils/cli_utils/status_utils.py +4 -5
- sky/utils/context.py +104 -29
- sky/utils/controller_utils.py +7 -6
- sky/utils/db/db_utils.py +5 -1
- sky/utils/db/migration_utils.py +1 -1
- sky/utils/kubernetes/create_cluster.sh +13 -28
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/kubernetes_deploy_utils.py +194 -38
- sky/utils/kubernetes_enums.py +5 -0
- sky/utils/ux_utils.py +35 -1
- sky/utils/yaml_utils.py +9 -0
- sky/volumes/client/sdk.py +44 -8
- sky/volumes/server/core.py +1 -0
- sky/volumes/server/server.py +33 -7
- sky/volumes/volume.py +35 -28
- {skypilot_nightly-1.0.0.dev20250922.dist-info → skypilot_nightly-1.0.0.dev20250926.dist-info}/METADATA +38 -33
- {skypilot_nightly-1.0.0.dev20250922.dist-info → skypilot_nightly-1.0.0.dev20250926.dist-info}/RECORD +118 -117
- sky/dashboard/out/_next/static/chunks/1121-4ff1ec0dbc5792ab.js +0 -1
- sky/dashboard/out/_next/static/chunks/6856-9a2538f38c004652.js +0 -1
- sky/dashboard/out/_next/static/chunks/8969-a39efbadcd9fde80.js +0 -1
- sky/dashboard/out/_next/static/chunks/9037-472ee1222cb1e158.js +0 -6
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-1e9248ddbddcd122.js +0 -16
- /sky/dashboard/out/_next/static/{KP6HCNMqb_bnJB17oplgW → VXU6_xE28M55BOdwmUUJS}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250922.dist-info → skypilot_nightly-1.0.0.dev20250926.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250922.dist-info → skypilot_nightly-1.0.0.dev20250926.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250922.dist-info → skypilot_nightly-1.0.0.dev20250926.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250922.dist-info → skypilot_nightly-1.0.0.dev20250926.dist-info}/top_level.txt +0 -0
|
@@ -189,6 +189,15 @@ class LocalDockerBackend(backends.Backend['LocalDockerResourceHandle']):
|
|
|
189
189
|
' a NoOp. If you are running sky exec, your workdir has not'
|
|
190
190
|
' been updated.')
|
|
191
191
|
|
|
192
|
+
def _download_file(self, handle: LocalDockerResourceHandle,
|
|
193
|
+
local_file_path: str, remote_file_path: str) -> None:
|
|
194
|
+
"""Syncs file from remote to local."""
|
|
195
|
+
# Copy from docker container to local
|
|
196
|
+
container = self.containers[handle]
|
|
197
|
+
copy_cmd = (
|
|
198
|
+
f'docker cp {container.name}:{remote_file_path} {local_file_path}')
|
|
199
|
+
subprocess.run(copy_cmd, shell=True, check=True)
|
|
200
|
+
|
|
192
201
|
def _sync_file_mounts(
|
|
193
202
|
self,
|
|
194
203
|
handle: LocalDockerResourceHandle,
|
sky/client/cli/command.py
CHANGED
|
@@ -127,6 +127,7 @@ def _get_cluster_records_and_set_ssh_config(
|
|
|
127
127
|
clusters: Optional[List[str]],
|
|
128
128
|
refresh: common.StatusRefreshMode = common.StatusRefreshMode.NONE,
|
|
129
129
|
all_users: bool = False,
|
|
130
|
+
verbose: bool = False,
|
|
130
131
|
) -> List[responses.StatusResponse]:
|
|
131
132
|
"""Returns a list of clusters that match the glob pattern.
|
|
132
133
|
|
|
@@ -144,7 +145,8 @@ def _get_cluster_records_and_set_ssh_config(
|
|
|
144
145
|
request_id = sdk.status(clusters,
|
|
145
146
|
refresh=refresh,
|
|
146
147
|
all_users=all_users,
|
|
147
|
-
_include_credentials=True
|
|
148
|
+
_include_credentials=True,
|
|
149
|
+
_summary_response=not verbose)
|
|
148
150
|
cluster_records = sdk.stream_and_get(request_id)
|
|
149
151
|
# Update the SSH config for all clusters
|
|
150
152
|
for record in cluster_records:
|
|
@@ -1858,7 +1860,7 @@ def status(verbose: bool, refresh: bool, ip: bool, endpoints: bool,
|
|
|
1858
1860
|
|
|
1859
1861
|
# Phase 3: Get cluster records and handle special cases
|
|
1860
1862
|
cluster_records = _get_cluster_records_and_set_ssh_config(
|
|
1861
|
-
query_clusters, refresh_mode, all_users)
|
|
1863
|
+
query_clusters, refresh_mode, all_users, verbose)
|
|
1862
1864
|
|
|
1863
1865
|
# TOOD(zhwu): setup the ssh config for status
|
|
1864
1866
|
if ip or show_endpoints:
|
|
@@ -4184,6 +4186,13 @@ def volumes_apply(
|
|
|
4184
4186
|
|
|
4185
4187
|
logger.debug(f'Volume config: {volume.to_yaml_config()}')
|
|
4186
4188
|
|
|
4189
|
+
# TODO(kevin): remove the try block in v0.13.0
|
|
4190
|
+
try:
|
|
4191
|
+
volumes_sdk.validate(volume)
|
|
4192
|
+
except exceptions.APINotSupportedError:
|
|
4193
|
+
# Do best-effort client-side validation.
|
|
4194
|
+
volume.validate(skip_cloud_compatibility=True)
|
|
4195
|
+
|
|
4187
4196
|
if not yes:
|
|
4188
4197
|
click.confirm(f'Proceed to create volume {volume.name!r}?',
|
|
4189
4198
|
default=True,
|
|
@@ -4780,7 +4789,7 @@ def pool():
|
|
|
4780
4789
|
@pool.command('apply', cls=_DocumentedCodeCommand)
|
|
4781
4790
|
@flags.config_option(expose_value=False)
|
|
4782
4791
|
@click.argument('pool_yaml',
|
|
4783
|
-
required=
|
|
4792
|
+
required=False,
|
|
4784
4793
|
type=str,
|
|
4785
4794
|
nargs=-1,
|
|
4786
4795
|
**_get_shell_complete_args(_complete_file_name))
|
|
@@ -4799,13 +4808,18 @@ def pool():
|
|
|
4799
4808
|
'with rolling update. If "blue_green", cluster pool will '
|
|
4800
4809
|
'be updated with blue-green update. This option is only '
|
|
4801
4810
|
'valid when the pool is already running.'))
|
|
4811
|
+
@click.option('--workers',
|
|
4812
|
+
default=None,
|
|
4813
|
+
type=int,
|
|
4814
|
+
required=False,
|
|
4815
|
+
help='Can be used to update the number of workers in the pool.')
|
|
4802
4816
|
@_add_click_options(flags.TASK_OPTIONS + flags.EXTRA_RESOURCES_OPTIONS +
|
|
4803
4817
|
flags.COMMON_OPTIONS)
|
|
4804
4818
|
@flags.yes_option()
|
|
4805
4819
|
@timeline.event
|
|
4806
4820
|
@usage_lib.entrypoint
|
|
4807
4821
|
def jobs_pool_apply(
|
|
4808
|
-
pool_yaml: Tuple[str, ...],
|
|
4822
|
+
pool_yaml: Optional[Tuple[str, ...]],
|
|
4809
4823
|
pool: Optional[str], # pylint: disable=redefined-outer-name
|
|
4810
4824
|
workdir: Optional[str],
|
|
4811
4825
|
infra: Optional[str],
|
|
@@ -4827,60 +4841,80 @@ def jobs_pool_apply(
|
|
|
4827
4841
|
disk_tier: Optional[str],
|
|
4828
4842
|
network_tier: Optional[str],
|
|
4829
4843
|
mode: str,
|
|
4844
|
+
workers: Optional[int],
|
|
4830
4845
|
yes: bool,
|
|
4831
4846
|
async_call: bool,
|
|
4832
4847
|
):
|
|
4833
|
-
"""
|
|
4834
|
-
|
|
4835
|
-
|
|
4836
|
-
|
|
4837
|
-
|
|
4838
|
-
|
|
4848
|
+
"""Either apply a config to a cluster pool for managed jobs submission
|
|
4849
|
+
or update the number of workers in the pool. One of POOL_YAML or --workers
|
|
4850
|
+
must be provided.
|
|
4851
|
+
Config:
|
|
4852
|
+
If the pool is already running, the config will be applied to the pool.
|
|
4853
|
+
Otherwise, a new pool will be created.
|
|
4854
|
+
Workers:
|
|
4855
|
+
The --workers option can be used to override the number of workers
|
|
4856
|
+
specified in the YAML file, or to update workers without a YAML file.
|
|
4857
|
+
Example:
|
|
4858
|
+
sky jobs pool apply -p my-pool --workers 5
|
|
4839
4859
|
"""
|
|
4840
4860
|
cloud, region, zone = _handle_infra_cloud_region_zone_options(
|
|
4841
4861
|
infra, cloud, region, zone)
|
|
4842
|
-
if
|
|
4843
|
-
|
|
4862
|
+
if workers is not None and pool_yaml is not None and len(pool_yaml) > 0:
|
|
4863
|
+
raise click.UsageError(
|
|
4864
|
+
'Cannot specify both --workers and POOL_YAML. Please use one of '
|
|
4865
|
+
'them.')
|
|
4844
4866
|
|
|
4845
|
-
|
|
4846
|
-
|
|
4847
|
-
|
|
4848
|
-
|
|
4849
|
-
|
|
4850
|
-
|
|
4851
|
-
|
|
4852
|
-
|
|
4853
|
-
|
|
4854
|
-
|
|
4855
|
-
|
|
4856
|
-
|
|
4857
|
-
|
|
4858
|
-
|
|
4859
|
-
|
|
4860
|
-
|
|
4861
|
-
|
|
4862
|
-
|
|
4863
|
-
|
|
4864
|
-
|
|
4865
|
-
|
|
4866
|
-
|
|
4867
|
-
|
|
4868
|
-
|
|
4869
|
-
|
|
4870
|
-
|
|
4871
|
-
|
|
4872
|
-
|
|
4873
|
-
|
|
4874
|
-
|
|
4867
|
+
if pool_yaml is None or len(pool_yaml) == 0:
|
|
4868
|
+
if pool is None:
|
|
4869
|
+
raise click.UsageError(
|
|
4870
|
+
'A pool name must be provided to update the number of workers.')
|
|
4871
|
+
task = None
|
|
4872
|
+
click.secho(f'Attempting to update {pool} to have {workers} workers',
|
|
4873
|
+
fg='cyan')
|
|
4874
|
+
else:
|
|
4875
|
+
if pool is None:
|
|
4876
|
+
pool = serve_lib.generate_service_name(pool=True)
|
|
4877
|
+
|
|
4878
|
+
task = _generate_task_with_service(
|
|
4879
|
+
service_name=pool,
|
|
4880
|
+
service_yaml_args=pool_yaml,
|
|
4881
|
+
workdir=workdir,
|
|
4882
|
+
cloud=cloud,
|
|
4883
|
+
region=region,
|
|
4884
|
+
zone=zone,
|
|
4885
|
+
gpus=gpus,
|
|
4886
|
+
cpus=cpus,
|
|
4887
|
+
memory=memory,
|
|
4888
|
+
instance_type=instance_type,
|
|
4889
|
+
num_nodes=num_nodes,
|
|
4890
|
+
use_spot=use_spot,
|
|
4891
|
+
image_id=image_id,
|
|
4892
|
+
env_file=env_file,
|
|
4893
|
+
env=env,
|
|
4894
|
+
secret=secret,
|
|
4895
|
+
disk_size=disk_size,
|
|
4896
|
+
disk_tier=disk_tier,
|
|
4897
|
+
network_tier=network_tier,
|
|
4898
|
+
ports=ports,
|
|
4899
|
+
not_supported_cmd='sky jobs pool up',
|
|
4900
|
+
pool=True,
|
|
4901
|
+
)
|
|
4902
|
+
assert task.service is not None
|
|
4903
|
+
if not task.service.pool:
|
|
4904
|
+
raise click.UsageError('The YAML file needs a `pool` section.')
|
|
4905
|
+
click.secho('Pool spec:', fg='cyan')
|
|
4906
|
+
click.echo(task.service)
|
|
4907
|
+
serve_lib.validate_service_task(task, pool=True)
|
|
4875
4908
|
|
|
4876
|
-
|
|
4877
|
-
|
|
4878
|
-
|
|
4879
|
-
|
|
4880
|
-
|
|
4909
|
+
click.secho(
|
|
4910
|
+
'Each pool worker will use the following resources (estimated):',
|
|
4911
|
+
fg='cyan')
|
|
4912
|
+
with dag_lib.Dag() as dag:
|
|
4913
|
+
dag.add(task)
|
|
4881
4914
|
|
|
4882
4915
|
request_id = managed_jobs.pool_apply(task,
|
|
4883
4916
|
pool,
|
|
4917
|
+
workers=workers,
|
|
4884
4918
|
mode=serve_lib.UpdateMode(mode),
|
|
4885
4919
|
_need_confirmation=not yes)
|
|
4886
4920
|
_async_call_or_wait(request_id, async_call, 'sky.jobs.pool_apply')
|
|
@@ -5487,6 +5521,8 @@ def serve_update(
|
|
|
5487
5521
|
sky serve update --mode blue_green sky-service-16aa new_service.yaml
|
|
5488
5522
|
|
|
5489
5523
|
"""
|
|
5524
|
+
# TODO(lloyd-brown): Add a way to update number of replicas for serve
|
|
5525
|
+
# the way we did for pools.
|
|
5490
5526
|
cloud, region, zone = _handle_infra_cloud_region_zone_options(
|
|
5491
5527
|
infra, cloud, region, zone)
|
|
5492
5528
|
task = _generate_task_with_service(
|
|
@@ -5868,19 +5904,33 @@ def local():
|
|
|
5868
5904
|
'--context-name',
|
|
5869
5905
|
type=str,
|
|
5870
5906
|
required=False,
|
|
5871
|
-
help='Name to use for the kubeconfig context. Defaults to "default".'
|
|
5907
|
+
help='Name to use for the kubeconfig context. Defaults to "default". '
|
|
5908
|
+
'Used with the ip list.')
|
|
5872
5909
|
@click.option('--password',
|
|
5873
5910
|
type=str,
|
|
5874
5911
|
required=False,
|
|
5875
5912
|
help='Password for the ssh-user to execute sudo commands. '
|
|
5876
5913
|
'Required only if passwordless sudo is not setup.')
|
|
5914
|
+
@click.option(
|
|
5915
|
+
'--name',
|
|
5916
|
+
type=str,
|
|
5917
|
+
required=False,
|
|
5918
|
+
help='Name of the cluster. Defaults to "skypilot". Used without ip list.')
|
|
5919
|
+
@click.option(
|
|
5920
|
+
'--port-start',
|
|
5921
|
+
type=int,
|
|
5922
|
+
required=False,
|
|
5923
|
+
help='Starting port range for the local kind cluster. Needs to be a '
|
|
5924
|
+
'multiple of 100. If not given, a random range will be used. '
|
|
5925
|
+
'Used without ip list.')
|
|
5877
5926
|
@local.command('up', cls=_DocumentedCodeCommand)
|
|
5878
5927
|
@flags.config_option(expose_value=False)
|
|
5879
5928
|
@_add_click_options(flags.COMMON_OPTIONS)
|
|
5880
5929
|
@usage_lib.entrypoint
|
|
5881
5930
|
def local_up(gpus: bool, ips: str, ssh_user: str, ssh_key_path: str,
|
|
5882
5931
|
cleanup: bool, context_name: Optional[str],
|
|
5883
|
-
password: Optional[str],
|
|
5932
|
+
password: Optional[str], name: Optional[str],
|
|
5933
|
+
port_start: Optional[int], async_call: bool):
|
|
5884
5934
|
"""Creates a local or remote cluster."""
|
|
5885
5935
|
|
|
5886
5936
|
def _validate_args(ips, ssh_user, ssh_key_path, cleanup):
|
|
@@ -5926,17 +5976,26 @@ def local_up(gpus: bool, ips: str, ssh_user: str, ssh_key_path: str,
|
|
|
5926
5976
|
f'Failed to read SSH key file {ssh_key_path}: {str(e)}')
|
|
5927
5977
|
|
|
5928
5978
|
request_id = sdk.local_up(gpus, ip_list, ssh_user, ssh_key, cleanup,
|
|
5929
|
-
context_name, password)
|
|
5979
|
+
context_name, password, name, port_start)
|
|
5930
5980
|
_async_call_or_wait(request_id, async_call, request_name='local up')
|
|
5931
5981
|
|
|
5932
5982
|
|
|
5983
|
+
@click.option('--name',
|
|
5984
|
+
type=str,
|
|
5985
|
+
required=False,
|
|
5986
|
+
help='Name of the cluster to down. Defaults to "skypilot".')
|
|
5933
5987
|
@local.command('down', cls=_DocumentedCodeCommand)
|
|
5934
5988
|
@flags.config_option(expose_value=False)
|
|
5935
5989
|
@_add_click_options(flags.COMMON_OPTIONS)
|
|
5936
5990
|
@usage_lib.entrypoint
|
|
5937
|
-
def local_down(async_call: bool):
|
|
5938
|
-
"""Deletes a local cluster.
|
|
5939
|
-
|
|
5991
|
+
def local_down(name: Optional[str], async_call: bool):
|
|
5992
|
+
"""Deletes a local cluster.
|
|
5993
|
+
|
|
5994
|
+
This will only delete a local cluster started without the ip list.
|
|
5995
|
+
To clean up the local cluster started with a ip list, use `sky local up`
|
|
5996
|
+
with the cleanup flag.
|
|
5997
|
+
"""
|
|
5998
|
+
request_id = sdk.local_down(name)
|
|
5940
5999
|
_async_call_or_wait(request_id, async_call, request_name='sky.local.down')
|
|
5941
6000
|
|
|
5942
6001
|
|
sky/client/common.py
CHANGED
|
@@ -44,8 +44,10 @@ logger = sky_logging.init_logger(__name__)
|
|
|
44
44
|
_DOWNLOAD_CHUNK_BYTES = 8192
|
|
45
45
|
# The chunk size for the zip file to be uploaded to the API server. We split
|
|
46
46
|
# the zip file into chunks to avoid network issues for large request body that
|
|
47
|
-
# can be caused by NGINX's client_max_body_size.
|
|
48
|
-
|
|
47
|
+
# can be caused by NGINX's client_max_body_size or Cloudflare's upload limit.
|
|
48
|
+
# As of 09/25/2025, the upload limit for Cloudflare's free plan is 100MiB:
|
|
49
|
+
# https://developers.cloudflare.com/support/troubleshooting/http-status-codes/4xx-client-error/error-413/
|
|
50
|
+
_UPLOAD_CHUNK_BYTES = 100 * 1024 * 1024
|
|
49
51
|
|
|
50
52
|
FILE_UPLOAD_LOGS_DIR = os.path.join(constants.SKY_LOGS_DIRECTORY,
|
|
51
53
|
'file_uploads')
|
sky/client/sdk.py
CHANGED
|
@@ -1429,6 +1429,7 @@ def status(
|
|
|
1429
1429
|
all_users: bool = False,
|
|
1430
1430
|
*,
|
|
1431
1431
|
_include_credentials: bool = False,
|
|
1432
|
+
_summary_response: bool = False,
|
|
1432
1433
|
) -> server_common.RequestId[List[responses.StatusResponse]]:
|
|
1433
1434
|
"""Gets cluster statuses.
|
|
1434
1435
|
|
|
@@ -1514,6 +1515,7 @@ def status(
|
|
|
1514
1515
|
refresh=refresh,
|
|
1515
1516
|
all_users=all_users,
|
|
1516
1517
|
include_credentials=_include_credentials,
|
|
1518
|
+
summary_response=_summary_response,
|
|
1517
1519
|
)
|
|
1518
1520
|
response = server_common.make_authenticated_request(
|
|
1519
1521
|
'POST', '/status', json=json.loads(body.model_dump_json()))
|
|
@@ -1675,7 +1677,9 @@ def local_up(gpus: bool,
|
|
|
1675
1677
|
ssh_key: Optional[str],
|
|
1676
1678
|
cleanup: bool,
|
|
1677
1679
|
context_name: Optional[str] = None,
|
|
1678
|
-
password: Optional[str] = None
|
|
1680
|
+
password: Optional[str] = None,
|
|
1681
|
+
name: Optional[str] = None,
|
|
1682
|
+
port_start: Optional[int] = None) -> server_common.RequestId[None]:
|
|
1679
1683
|
"""Launches a Kubernetes cluster on local machines.
|
|
1680
1684
|
|
|
1681
1685
|
Returns:
|
|
@@ -1686,8 +1690,8 @@ def local_up(gpus: bool,
|
|
|
1686
1690
|
# TODO: move this check to server.
|
|
1687
1691
|
if not server_common.is_api_server_local():
|
|
1688
1692
|
with ux_utils.print_exception_no_traceback():
|
|
1689
|
-
raise ValueError(
|
|
1690
|
-
|
|
1693
|
+
raise ValueError('`sky local up` is only supported when '
|
|
1694
|
+
'running SkyPilot locally.')
|
|
1691
1695
|
|
|
1692
1696
|
body = payloads.LocalUpBody(gpus=gpus,
|
|
1693
1697
|
ips=ips,
|
|
@@ -1695,7 +1699,9 @@ def local_up(gpus: bool,
|
|
|
1695
1699
|
ssh_key=ssh_key,
|
|
1696
1700
|
cleanup=cleanup,
|
|
1697
1701
|
context_name=context_name,
|
|
1698
|
-
password=password
|
|
1702
|
+
password=password,
|
|
1703
|
+
name=name,
|
|
1704
|
+
port_start=port_start)
|
|
1699
1705
|
response = server_common.make_authenticated_request(
|
|
1700
1706
|
'POST', '/local_up', json=json.loads(body.model_dump_json()))
|
|
1701
1707
|
return server_common.get_request_id(response)
|
|
@@ -1704,16 +1710,19 @@ def local_up(gpus: bool,
|
|
|
1704
1710
|
@usage_lib.entrypoint
|
|
1705
1711
|
@server_common.check_server_healthy_or_start
|
|
1706
1712
|
@annotations.client_api
|
|
1707
|
-
def local_down() -> server_common.RequestId[None]:
|
|
1713
|
+
def local_down(name: Optional[str]) -> server_common.RequestId[None]:
|
|
1708
1714
|
"""Tears down the Kubernetes cluster started by local_up."""
|
|
1709
1715
|
# We do not allow local up when the API server is running remotely since it
|
|
1710
1716
|
# will modify the kubeconfig.
|
|
1711
1717
|
# TODO: move this check to remote server.
|
|
1712
1718
|
if not server_common.is_api_server_local():
|
|
1713
1719
|
with ux_utils.print_exception_no_traceback():
|
|
1714
|
-
raise ValueError('sky local down is only supported when running '
|
|
1720
|
+
raise ValueError('`sky local down` is only supported when running '
|
|
1715
1721
|
'SkyPilot locally.')
|
|
1716
|
-
|
|
1722
|
+
|
|
1723
|
+
body = payloads.LocalDownBody(name=name)
|
|
1724
|
+
response = server_common.make_authenticated_request(
|
|
1725
|
+
'POST', '/local_down', json=json.loads(body.model_dump_json()))
|
|
1717
1726
|
return server_common.get_request_id(response)
|
|
1718
1727
|
|
|
1719
1728
|
|
|
@@ -2083,6 +2092,7 @@ def stream_and_get(
|
|
|
2083
2092
|
return stream_response(request_id,
|
|
2084
2093
|
response,
|
|
2085
2094
|
output_stream,
|
|
2095
|
+
resumable=True,
|
|
2086
2096
|
get_result=follow)
|
|
2087
2097
|
|
|
2088
2098
|
|
sky/client/sdk_async.py
CHANGED
|
@@ -661,13 +661,14 @@ async def local_up(
|
|
|
661
661
|
ssh_key: Optional[str],
|
|
662
662
|
cleanup: bool,
|
|
663
663
|
context_name: Optional[str] = None,
|
|
664
|
+
name: Optional[str] = None,
|
|
664
665
|
password: Optional[str] = None,
|
|
665
666
|
stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG) -> None:
|
|
666
667
|
"""Async version of local_up() that launches a Kubernetes cluster on
|
|
667
668
|
local machines."""
|
|
668
669
|
request_id = await context_utils.to_thread(sdk.local_up, gpus, ips,
|
|
669
670
|
ssh_user, ssh_key, cleanup,
|
|
670
|
-
context_name, password)
|
|
671
|
+
context_name, name, password)
|
|
671
672
|
if stream_logs is not None:
|
|
672
673
|
return await _stream_and_get(request_id, stream_logs)
|
|
673
674
|
else:
|
|
@@ -677,10 +678,11 @@ async def local_up(
|
|
|
677
678
|
@usage_lib.entrypoint
|
|
678
679
|
@annotations.client_api
|
|
679
680
|
async def local_down(
|
|
681
|
+
name: Optional[str] = None,
|
|
680
682
|
stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG) -> None:
|
|
681
683
|
"""Async version of local_down() that tears down the Kubernetes cluster
|
|
682
684
|
started by local_up."""
|
|
683
|
-
request_id = await context_utils.to_thread(sdk.local_down)
|
|
685
|
+
request_id = await context_utils.to_thread(sdk.local_down, name)
|
|
684
686
|
if stream_logs is not None:
|
|
685
687
|
return await _stream_and_get(request_id, stream_logs)
|
|
686
688
|
else:
|
sky/clouds/kubernetes.py
CHANGED
|
@@ -62,6 +62,7 @@ class Kubernetes(clouds.Cloud):
|
|
|
62
62
|
_SUPPORTS_SERVICE_ACCOUNT_ON_REMOTE = True
|
|
63
63
|
|
|
64
64
|
_DEFAULT_NUM_VCPUS = 2
|
|
65
|
+
_DEFAULT_NUM_VCPUS_WITH_GPU = 4
|
|
65
66
|
_DEFAULT_MEMORY_CPU_RATIO = 1
|
|
66
67
|
_DEFAULT_MEMORY_CPU_RATIO_WITH_GPU = 4 # Allocate more memory for GPU tasks
|
|
67
68
|
_REPR = 'Kubernetes'
|
|
@@ -842,7 +843,7 @@ class Kubernetes(clouds.Cloud):
|
|
|
842
843
|
|
|
843
844
|
gpu_task_cpus = k8s_instance_type.cpus
|
|
844
845
|
if resources.cpus is None:
|
|
845
|
-
gpu_task_cpus =
|
|
846
|
+
gpu_task_cpus = self._DEFAULT_NUM_VCPUS_WITH_GPU * acc_count
|
|
846
847
|
# Special handling to bump up memory multiplier for GPU instances
|
|
847
848
|
gpu_task_memory = (float(resources.memory.strip('+')) if
|
|
848
849
|
resources.memory is not None else gpu_task_cpus *
|
sky/clouds/runpod.py
CHANGED
|
@@ -286,14 +286,16 @@ class RunPod(clouds.Cloud):
|
|
|
286
286
|
@classmethod
|
|
287
287
|
def _check_credentials(cls) -> Tuple[bool, Optional[str]]:
|
|
288
288
|
"""Verify that the user has valid credentials for RunPod. """
|
|
289
|
-
dependency_error_msg = ('Failed to import runpod. '
|
|
290
|
-
'
|
|
289
|
+
dependency_error_msg = ('Failed to import runpod or TOML parser. '
|
|
290
|
+
'Install: pip install "skypilot[runpod]".')
|
|
291
291
|
try:
|
|
292
292
|
runpod_spec = import_lib_util.find_spec('runpod')
|
|
293
293
|
if runpod_spec is None:
|
|
294
294
|
return False, dependency_error_msg
|
|
295
|
-
|
|
296
|
-
|
|
295
|
+
# Prefer stdlib tomllib (Python 3.11+); fallback to tomli
|
|
296
|
+
tomllib_spec = import_lib_util.find_spec('tomllib')
|
|
297
|
+
tomli_spec = import_lib_util.find_spec('tomli')
|
|
298
|
+
if tomllib_spec is None and tomli_spec is None:
|
|
297
299
|
return False, dependency_error_msg
|
|
298
300
|
except ValueError:
|
|
299
301
|
# docstring of importlib_util.find_spec:
|
|
@@ -322,9 +324,20 @@ class RunPod(clouds.Cloud):
|
|
|
322
324
|
if not os.path.exists(credential_file):
|
|
323
325
|
return False, '~/.runpod/config.toml does not exist.'
|
|
324
326
|
|
|
325
|
-
#
|
|
326
|
-
#
|
|
327
|
-
|
|
327
|
+
# We don't need to import TOML parser if config.toml does not exist.
|
|
328
|
+
# When needed, prefer stdlib tomllib (py>=3.11); otherwise use tomli.
|
|
329
|
+
# TODO(andy): remove this fallback after dropping Python 3.10 support.
|
|
330
|
+
try:
|
|
331
|
+
try:
|
|
332
|
+
import tomllib as toml # pylint: disable=import-outside-toplevel
|
|
333
|
+
except ModuleNotFoundError: # py<3.11
|
|
334
|
+
import tomli as toml # pylint: disable=import-outside-toplevel
|
|
335
|
+
except ModuleNotFoundError:
|
|
336
|
+
# Should never happen. We already installed proper dependencies for
|
|
337
|
+
# different Python versions in setup_files/dependencies.py.
|
|
338
|
+
return False, (
|
|
339
|
+
'~/.runpod/config.toml exists but no TOML parser is available. '
|
|
340
|
+
'Install tomli for Python < 3.11: pip install tomli.')
|
|
328
341
|
|
|
329
342
|
# Check for default api_key
|
|
330
343
|
try:
|
sky/core.py
CHANGED
|
@@ -1,6 +1,4 @@
|
|
|
1
1
|
"""SDK functions for cluster/job management."""
|
|
2
|
-
import os
|
|
3
|
-
import shlex
|
|
4
2
|
import typing
|
|
5
3
|
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
6
4
|
|
|
@@ -9,7 +7,6 @@ import colorama
|
|
|
9
7
|
from sky import admin_policy
|
|
10
8
|
from sky import backends
|
|
11
9
|
from sky import catalog
|
|
12
|
-
from sky import check as sky_check
|
|
13
10
|
from sky import clouds
|
|
14
11
|
from sky import dag as dag_lib
|
|
15
12
|
from sky import data
|
|
@@ -31,7 +28,6 @@ from sky.schemas.api import responses
|
|
|
31
28
|
from sky.skylet import autostop_lib
|
|
32
29
|
from sky.skylet import constants
|
|
33
30
|
from sky.skylet import job_lib
|
|
34
|
-
from sky.skylet import log_lib
|
|
35
31
|
from sky.usage import usage_lib
|
|
36
32
|
from sky.utils import admin_policy_utils
|
|
37
33
|
from sky.utils import common
|
|
@@ -102,6 +98,7 @@ def status(
|
|
|
102
98
|
refresh: common.StatusRefreshMode = common.StatusRefreshMode.NONE,
|
|
103
99
|
all_users: bool = False,
|
|
104
100
|
include_credentials: bool = False,
|
|
101
|
+
summary_response: bool = False,
|
|
105
102
|
) -> List[responses.StatusResponse]:
|
|
106
103
|
# NOTE(dev): Keep the docstring consistent between the Python API and CLI.
|
|
107
104
|
"""Gets cluster statuses.
|
|
@@ -181,7 +178,8 @@ def status(
|
|
|
181
178
|
refresh=refresh,
|
|
182
179
|
cluster_names=cluster_names,
|
|
183
180
|
all_users=all_users,
|
|
184
|
-
include_credentials=include_credentials
|
|
181
|
+
include_credentials=include_credentials,
|
|
182
|
+
summary_response=summary_response)
|
|
185
183
|
|
|
186
184
|
status_responses = []
|
|
187
185
|
for cluster in clusters:
|
|
@@ -1301,7 +1299,9 @@ def local_up(gpus: bool,
|
|
|
1301
1299
|
ssh_key: Optional[str],
|
|
1302
1300
|
cleanup: bool,
|
|
1303
1301
|
context_name: Optional[str] = None,
|
|
1304
|
-
password: Optional[str] = None
|
|
1302
|
+
password: Optional[str] = None,
|
|
1303
|
+
name: Optional[str] = None,
|
|
1304
|
+
port_start: Optional[int] = None) -> None:
|
|
1305
1305
|
"""Creates a local or remote cluster."""
|
|
1306
1306
|
|
|
1307
1307
|
def _validate_args(ips, ssh_user, ssh_key, cleanup):
|
|
@@ -1331,57 +1331,12 @@ def local_up(gpus: bool,
|
|
|
1331
1331
|
password)
|
|
1332
1332
|
else:
|
|
1333
1333
|
# Run local deployment (kind) if no remote args are specified
|
|
1334
|
-
kubernetes_deploy_utils.deploy_local_cluster(gpus)
|
|
1334
|
+
kubernetes_deploy_utils.deploy_local_cluster(name, port_start, gpus)
|
|
1335
1335
|
|
|
1336
1336
|
|
|
1337
|
-
def local_down() -> None:
|
|
1337
|
+
def local_down(name: Optional[str] = None) -> None:
|
|
1338
1338
|
"""Tears down the Kubernetes cluster started by local_up."""
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
path_to_package = os.path.dirname(__file__)
|
|
1342
|
-
down_script_path = os.path.join(path_to_package, 'utils/kubernetes',
|
|
1343
|
-
'delete_cluster.sh')
|
|
1344
|
-
|
|
1345
|
-
cwd = os.path.dirname(os.path.abspath(down_script_path))
|
|
1346
|
-
run_command = shlex.split(down_script_path)
|
|
1347
|
-
|
|
1348
|
-
# Setup logging paths
|
|
1349
|
-
run_timestamp = sky_logging.get_run_timestamp()
|
|
1350
|
-
log_path = os.path.join(constants.SKY_LOGS_DIRECTORY, run_timestamp,
|
|
1351
|
-
'local_down.log')
|
|
1352
|
-
|
|
1353
|
-
with rich_utils.safe_status(
|
|
1354
|
-
ux_utils.spinner_message('Removing local cluster',
|
|
1355
|
-
log_path=log_path,
|
|
1356
|
-
is_local=True)):
|
|
1357
|
-
|
|
1358
|
-
returncode, stdout, stderr = log_lib.run_with_log(cmd=run_command,
|
|
1359
|
-
log_path=log_path,
|
|
1360
|
-
require_outputs=True,
|
|
1361
|
-
stream_logs=False,
|
|
1362
|
-
cwd=cwd)
|
|
1363
|
-
stderr = stderr.replace('No kind clusters found.\n', '')
|
|
1364
|
-
|
|
1365
|
-
if returncode == 0:
|
|
1366
|
-
cluster_removed = True
|
|
1367
|
-
elif returncode == 100:
|
|
1368
|
-
logger.info(ux_utils.error_message('Local cluster does not exist.'))
|
|
1369
|
-
else:
|
|
1370
|
-
with ux_utils.print_exception_no_traceback():
|
|
1371
|
-
raise RuntimeError('Failed to create local cluster. '
|
|
1372
|
-
f'Stdout: {stdout}'
|
|
1373
|
-
f'\nError: {stderr}')
|
|
1374
|
-
if cluster_removed:
|
|
1375
|
-
# Run sky check
|
|
1376
|
-
with rich_utils.safe_status(
|
|
1377
|
-
ux_utils.spinner_message('Running sky check...')):
|
|
1378
|
-
sky_check.check_capability(sky_cloud.CloudCapability.COMPUTE,
|
|
1379
|
-
clouds=['kubernetes'],
|
|
1380
|
-
quiet=True)
|
|
1381
|
-
logger.info(
|
|
1382
|
-
ux_utils.finishing_message('Local cluster removed.',
|
|
1383
|
-
log_path=log_path,
|
|
1384
|
-
is_local=True))
|
|
1339
|
+
kubernetes_deploy_utils.teardown_local_cluster(name)
|
|
1385
1340
|
|
|
1386
1341
|
|
|
1387
1342
|
@usage_lib.entrypoint
|
sky/dashboard/out/404.html
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-8e64d11e58eab5cb.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce361c6959bc2001.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js" defer=""></script><script src="/dashboard/_next/static/VXU6_xE28M55BOdwmUUJS/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/VXU6_xE28M55BOdwmUUJS/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"statusCode":404}},"page":"/_error","query":{},"buildId":"VXU6_xE28M55BOdwmUUJS","assetPrefix":"/dashboard","nextExport":true,"isFallback":false,"gip":true,"scriptLoader":[]}</script></body></html>
|
sky/dashboard/out/_next/static/{KP6HCNMqb_bnJB17oplgW → VXU6_xE28M55BOdwmUUJS}/_buildManifest.js
RENAMED
|
@@ -1 +1 @@
|
|
|
1
|
-
self.__BUILD_MANIFEST=function(s,c,a,e,
|
|
1
|
+
self.__BUILD_MANIFEST=function(s,c,a,t,e,f,u,n,b,o,j,i,r,k){return{__rewrites:{afterFiles:[],beforeFiles:[],fallback:[]},"/":["static/chunks/pages/index-444f1804401f04ea.js"],"/_error":["static/chunks/pages/_error-c66a4e8afc46f17b.js"],"/clusters":["static/chunks/pages/clusters-469814d711d63b1b.js"],"/clusters/[cluster]":[s,c,a,f,u,"static/chunks/4676-9da7fdbde90b5549.js",o,t,e,n,j,b,i,"static/chunks/6856-2b3600ff2854d066.js",r,k,"static/chunks/9037-d0c00018a5ba198c.js","static/chunks/pages/clusters/[cluster]-e052384df65ef200.js"],"/clusters/[cluster]/[job]":[s,c,a,f,t,e,b,"static/chunks/pages/clusters/[cluster]/[job]-ad77b12fc736dca3.js"],"/config":["static/chunks/pages/config-dfb9bf07b13045f4.js"],"/infra":["static/chunks/pages/infra-aabba60d57826e0f.js"],"/infra/[context]":["static/chunks/pages/infra/[context]-6563820e094f68ca.js"],"/jobs":["static/chunks/pages/jobs-1f70d9faa564804f.js"],"/jobs/pools/[pool]":[s,c,a,u,o,t,e,n,"static/chunks/pages/jobs/pools/[pool]-07349868f7905d37.js"],"/jobs/[job]":[s,c,a,f,u,o,t,e,n,b,"static/chunks/pages/jobs/[job]-dd64309c3fe67ed2.js"],"/users":["static/chunks/pages/users-018bf31cda52e11b.js"],"/volumes":["static/chunks/pages/volumes-739726d6b823f532.js"],"/workspace/new":["static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js"],"/workspaces":["static/chunks/pages/workspaces-7528cc0ef8c522c5.js"],"/workspaces/[name]":[s,c,a,f,u,"static/chunks/1836-37fede578e2da5f8.js",t,e,n,j,b,i,r,k,"static/chunks/1141-159df2d4c441a9d1.js","static/chunks/pages/workspaces/[name]-af76bb06dbb3954f.js"],sortedPages:["/","/_app","/_error","/clusters","/clusters/[cluster]","/clusters/[cluster]/[job]","/config","/infra","/infra/[context]","/jobs","/jobs/pools/[pool]","/jobs/[job]","/users","/volumes","/workspace/new","/workspaces","/workspaces/[name]"]}}("static/chunks/616-3d59f75e2ccf9321.js","static/chunks/6130-2be46d70a38f1e82.js","static/chunks/5739-d67458fcb1386c92.js","static/chunks/6989-01359c57e018caa4.js","static/chunks/3850-ff4a9a69d978632b.js","static/chunks/7411-b15471acd2cba716.js","static/chunks/1272-1ef0bf0237faccdb.js","static/chunks/8969-d8bc3a2b9cf839a9.js","static/chunks/6135-4b4d5e824b7f9d3c.js","static/chunks/754-d0da8ab45f9509e9.js","static/chunks/6990-f6818c84ed8f1c86.js","static/chunks/1121-d0782b9251f0fcd3.js","static/chunks/6601-06114c982db410b6.js","static/chunks/3015-88c7c8d69b0b6dba.js"),self.__BUILD_MANIFEST_CB&&self.__BUILD_MANIFEST_CB();
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"use strict";(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[1121],{50326:function(e,t,a){a.d(t,{$N:function(){return m},Be:function(){return h},Vq:function(){return c},cN:function(){return _},cZ:function(){return d},fK:function(){return f}});var r=a(85893),s=a(67294),o=a(6327),n=a(32350),l=a(43767);let c=o.fC;o.xz;let u=o.h_;o.x8;let i=s.forwardRef((e,t)=>{let{className:a,...s}=e;return(0,r.jsx)(o.aV,{ref:t,className:(0,n.cn)("fixed inset-0 z-50 bg-black/50 backdrop-blur-sm data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0",a),...s})});i.displayName=o.aV.displayName;let d=s.forwardRef((e,t)=>{let{className:a,children:s,...c}=e;return(0,r.jsxs)(u,{children:[(0,r.jsx)(i,{}),(0,r.jsxs)(o.VY,{ref:t,className:(0,n.cn)("fixed left-[50%] top-[50%] z-50 grid w-full max-w-lg translate-x-[-50%] translate-y-[-50%] gap-4 border border-gray-200 bg-white p-6 shadow-lg duration-200 data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[state=closed]:slide-out-to-left-1/2 data-[state=closed]:slide-out-to-top-[48%] data-[state=open]:slide-in-from-left-1/2 data-[state=open]:slide-in-from-top-[48%] sm:rounded-lg",a),...c,children:[s,(0,r.jsxs)(o.x8,{className:"absolute right-4 top-4 rounded-sm opacity-70 ring-offset-white transition-opacity hover:opacity-100 focus:outline-none focus:ring-2 focus:ring-gray-400 focus:ring-offset-2 disabled:pointer-events-none data-[state=open]:bg-gray-100 data-[state=open]:text-gray-500",children:[(0,r.jsx)(l.Z,{className:"h-4 w-4"}),(0,r.jsx)("span",{className:"sr-only",children:"Close"})]})]})]})});d.displayName=o.VY.displayName;let f=e=>{let{className:t,...a}=e;return(0,r.jsx)("div",{className:(0,n.cn)("flex flex-col space-y-1.5 text-center sm:text-left",t),...a})};f.displayName="DialogHeader";let _=e=>{let{className:t,...a}=e;return(0,r.jsx)("div",{className:(0,n.cn)("flex flex-col-reverse sm:flex-row sm:justify-end sm:space-x-2",t),...a})};_.displayName="DialogFooter";let m=s.forwardRef((e,t)=>{let{className:a,...s}=e;return(0,r.jsx)(o.Dx,{ref:t,className:(0,n.cn)("text-lg font-semibold leading-none tracking-tight",a),...s})});m.displayName=o.Dx.displayName;let h=s.forwardRef((e,t)=>{let{className:a,...s}=e;return(0,r.jsx)(o.dk,{ref:t,className:(0,n.cn)("text-sm text-gray-500",a),...s})});h.displayName=o.dk.displayName},23266:function(e,t,a){a.d(t,{GH:function(){return f},QL:function(){return m},Sl:function(){return d},getClusters:function(){return u},uR:function(){return i}});var r=a(67294),s=a(15821),o=a(47145),n=a(93225),l=a(6378);let c={UP:"RUNNING",STOPPED:"STOPPED",INIT:"LAUNCHING",null:"TERMINATED"};async function u(){let{clusterNames:e=null}=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{};try{return(await o.x.fetch("/status",{cluster_names:e,all_users:!0,include_credentials:!1,summary_response:null==e})).map(e=>{let t="",a=t=e.zone?e.zone:e.region;return t&&t.length>25&&(t=function(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:15;if(!e||e.length<=t)return e;if(t<=3)return"...";let a=Math.floor((t-3)/2),r=a+(t-3)%2;return 0===a?e.substring(0,r)+"...":e.substring(0,r)+"..."+e.substring(e.length-a)}(t,25)),{status:c[e.status],cluster:e.name,user:e.user_name,user_hash:e.user_hash,cluster_hash:e.cluster_hash,cloud:e.cloud,region:e.region,infra:t?e.cloud+" ("+t+")":e.cloud,full_infra:a?"".concat(e.cloud," (").concat(a,")"):e.cloud,cpus:e.cpus,mem:e.memory,gpus:e.accelerators,resources_str:e.resources_str,resources_str_full:e.resources_str_full,time:new Date(1e3*e.launched_at),num_nodes:e.nodes,workspace:e.workspace,autostop:e.autostop,last_event:e.last_event,to_down:e.to_down,cluster_name_on_cloud:e.cluster_name_on_cloud,jobs:[],command:e.last_creation_command||e.last_use,task_yaml:e.last_creation_yaml||"{}",events:[{time:new Date(1e3*e.launched_at),event:"Cluster created."}]}})}catch(e){return console.error("Error fetching clusters:",e),[]}}async function i(){let e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:null,t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:30;try{let a={days:t,dashboard_summary_response:!0};e&&(a.cluster_hashes=[e]);let r=await o.x.fetch("/cost_report",a);console.log("Raw cluster history data:",r);let s=r.map(e=>{let t="Unknown";e.cloud?t=e.cloud:e.resources&&e.resources.cloud&&(t=e.resources.cloud);let a=e.user_name||"-";return{status:e.status?c[e.status]:"TERMINATED",cluster:e.name,user:a,user_hash:e.user_hash,cluster_hash:e.cluster_hash,cloud:t,region:"",infra:t,full_infra:t,resources_str:e.resources_str,resources_str_full:e.resources_str_full,time:e.launched_at?new Date(1e3*e.launched_at):null,num_nodes:e.num_nodes||1,duration:e.duration,total_cost:e.total_cost,workspace:e.workspace||"default",autostop:-1,last_event:e.last_event,to_down:!1,cluster_name_on_cloud:null,usage_intervals:e.usage_intervals,command:e.last_creation_command||"",task_yaml:e.last_creation_yaml||"{}",events:[{time:e.launched_at?new Date(1e3*e.launched_at):new Date,event:"Cluster created."}]}});return console.log("Processed cluster history data:",s),s}catch(e){return console.error("Error fetching cluster history:",e),[]}}async function d(e){let{clusterName:t,jobId:a,onNewLog:r,workspace:n}=e;try{await o.x.stream("/logs",{follow:!1,cluster_name:t,job_id:a,tail:1e4,override_skypilot_config:{active_workspace:n||"default"}},r)}catch(e){console.error("Error in streamClusterJobLogs:",e),(0,s.C)("Error in streamClusterJobLogs: ".concat(e.message),"error")}}async function f(e){let{clusterName:t,jobIds:a=null,workspace:r}=e;try{let e=await o.x.fetch("/download_logs",{cluster_name:t,job_ids:a?a.map(String):null,override_skypilot_config:{active_workspace:r||"default"}}),l=Object.values(e||{});if(!l.length){(0,s.C)("No logs found to download.","warning");return}let c=window.location.origin,u="".concat(c).concat(n.f4,"/download"),i=await fetch("".concat(u,"?relative=items"),{method:"POST",headers:{"Content-Type":"application/json"},body:JSON.stringify({folder_paths:l})});if(!i.ok){let e=await i.text();throw Error("Download failed: ".concat(i.status," ").concat(e))}let d=await i.blob(),f=window.URL.createObjectURL(d),_=document.createElement("a"),m=new Date().toISOString().replace(/[:.]/g,"-"),h=a&&1===a.length?"job-".concat(a[0]):"jobs";_.href=f,_.download="".concat(t,"-").concat(h,"-logs-").concat(m,".zip"),document.body.appendChild(_),_.click(),_.remove(),window.URL.revokeObjectURL(f)}catch(e){console.error("Error downloading logs:",e),(0,s.C)("Error downloading logs: ".concat(e.message),"error")}}async function _(e){let{clusterName:t,workspace:a}=e;try{return(await o.x.fetch("/queue",{cluster_name:t,all_users:!0,override_skypilot_config:{active_workspace:a}})).map(e=>{var r;let s=e.end_at?e.end_at:Date.now()/1e3,o=0,n=0;return e.submitted_at&&(o=s-e.submitted_at),e.start_at&&(n=s-e.start_at),{id:e.job_id,status:e.status,job:e.job_name,user:e.username,user_hash:e.user_hash,gpus:e.accelerators||{},submitted_at:e.submitted_at?new Date(1e3*e.submitted_at):null,resources:e.resources,cluster:t,total_duration:o,job_duration:n,infra:"",logs:"",workspace:a||"default",git_commit:(null===(r=e.metadata)||void 0===r?void 0:r.git_commit)||"-"}})}catch(e){return console.error("Error fetching cluster jobs:",e),[]}}function m(e){let{cluster:t,job:a=null}=e,[s,o]=(0,r.useState)(null),[n,c]=(0,r.useState)(null),[i,d]=(0,r.useState)(!0),[f,m]=(0,r.useState)(!0),h=(0,r.useCallback)(async()=>{if(t)try{d(!0);let e=await l.default.get(u,[{clusterNames:[t]}]);return o(e[0]),e[0]}catch(e){console.error("Error fetching cluster data:",e)}finally{d(!1)}return null},[t]),g=(0,r.useCallback)(async e=>{if(t)try{m(!0);let a=await l.default.get(_,[{clusterName:t,workspace:e||"default"}]);c(a)}catch(e){console.error("Error fetching cluster job data:",e)}finally{m(!1)}},[t]),p=(0,r.useCallback)(async()=>{l.default.invalidate(u,[{clusterNames:[t]}]);let e=await h();e&&(l.default.invalidate(_,[{clusterName:t,workspace:e.workspace||"default"}]),await g(e.workspace))},[h,g,t]),w=(0,r.useCallback)(async()=>{s&&(l.default.invalidate(_,[{clusterName:t,workspace:s.workspace||"default"}]),await g(s.workspace))},[g,s,t]);return(0,r.useEffect)(()=>{(async()=>{let e=await h();e&&g(e.workspace)})()},[t,a,h,g]),{clusterData:s,clusterJobData:n,loading:i,clusterDetailsLoading:i,clusterJobsLoading:f,refreshData:p,refreshClusterJobsOnly:w}}},53081:function(e,t,a){a.d(t,{R:function(){return s}}),a(23266),a(68969);var r=a(47145);async function s(){try{let e=await r.x.get("/users");if(!e.ok)throw Error("HTTP error! status: ".concat(e.status));return(await e.json()).map(e=>({userId:e.id,username:e.name,role:e.role,created_at:e.created_at}))||[]}catch(e){return console.error("Failed to fetch users:",e),[]}}}}]);
|