skypilot-nightly 1.0.0.dev20250510__py3-none-any.whl → 1.0.0.dev20250514__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/backends/cloud_vm_ray_backend.py +22 -3
- sky/cli.py +109 -109
- sky/client/cli.py +109 -109
- sky/clouds/gcp.py +59 -16
- sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +33 -11
- sky/clouds/service_catalog/gcp_catalog.py +7 -1
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/chunks/845-0ca6f2c1ba667c3b.js +1 -0
- sky/dashboard/out/_next/static/{C0fkLhvxyqkymoV7IeInQ → tdxxQrPV6NW90a983oHXe}/_buildManifest.js +1 -1
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/provision/docker_utils.py +4 -1
- sky/provision/gcp/config.py +197 -15
- sky/provision/gcp/constants.py +64 -0
- sky/provision/nebius/instance.py +3 -1
- sky/provision/nebius/utils.py +4 -2
- sky/resources.py +4 -0
- sky/skylet/constants.py +3 -0
- sky/templates/gcp-ray.yml.j2 +11 -0
- sky/templates/nebius-ray.yml.j2 +7 -1
- sky/templates/websocket_proxy.py +29 -9
- sky/utils/schemas.py +9 -1
- {skypilot_nightly-1.0.0.dev20250510.dist-info → skypilot_nightly-1.0.0.dev20250514.dist-info}/METADATA +1 -1
- {skypilot_nightly-1.0.0.dev20250510.dist-info → skypilot_nightly-1.0.0.dev20250514.dist-info}/RECORD +34 -34
- sky/dashboard/out/_next/static/chunks/845-0f8017370869e269.js +0 -1
- /sky/dashboard/out/_next/static/{C0fkLhvxyqkymoV7IeInQ → tdxxQrPV6NW90a983oHXe}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250510.dist-info → skypilot_nightly-1.0.0.dev20250514.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250510.dist-info → skypilot_nightly-1.0.0.dev20250514.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250510.dist-info → skypilot_nightly-1.0.0.dev20250514.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250510.dist-info → skypilot_nightly-1.0.0.dev20250514.dist-info}/top_level.txt +0 -0
sky/client/cli.py
CHANGED
@@ -91,6 +91,8 @@ from sky.utils.cli_utils import status_utils
|
|
91
91
|
if typing.TYPE_CHECKING:
|
92
92
|
import types
|
93
93
|
|
94
|
+
import prettytable
|
95
|
+
|
94
96
|
pd = adaptors_common.LazyImport('pandas')
|
95
97
|
logger = sky_logging.init_logger(__name__)
|
96
98
|
|
@@ -3371,12 +3373,8 @@ def show_gpus(
|
|
3371
3373
|
* ``QTY_PER_NODE`` (Kubernetes only): GPU quantities that can be requested
|
3372
3374
|
on a single node.
|
3373
3375
|
|
3374
|
-
* ``
|
3375
|
-
Kubernetes cluster.
|
3376
|
-
|
3377
|
-
* ``TOTAL_FREE_GPUS`` (Kubernetes only): Number of currently free GPUs
|
3378
|
-
in the Kubernetes cluster. This is fetched in real-time and may change
|
3379
|
-
when other users are using the cluster.
|
3376
|
+
* ``UTILIZATION`` (Kubernetes only): Total number of GPUs free / available
|
3377
|
+
in the Kubernetes cluster.
|
3380
3378
|
"""
|
3381
3379
|
# validation for the --region flag
|
3382
3380
|
if region is not None and cloud is None:
|
@@ -3415,15 +3413,16 @@ def show_gpus(
|
|
3415
3413
|
# TODO(zhwu,romilb): We should move most of these kubernetes related
|
3416
3414
|
# queries into the backend, especially behind the server.
|
3417
3415
|
def _get_kubernetes_realtime_gpu_tables(
|
3418
|
-
|
3419
|
-
|
3420
|
-
|
3416
|
+
context: Optional[str] = None,
|
3417
|
+
name_filter: Optional[str] = None,
|
3418
|
+
quantity_filter: Optional[int] = None
|
3419
|
+
) -> Tuple[List[Tuple[str, 'prettytable.PrettyTable']],
|
3420
|
+
Optional['prettytable.PrettyTable'], List[Tuple[
|
3421
|
+
str, 'models.KubernetesNodesInfo']]]:
|
3421
3422
|
if quantity_filter:
|
3422
3423
|
qty_header = 'QTY_FILTER'
|
3423
|
-
free_header = 'FILTERED_FREE_GPUS'
|
3424
3424
|
else:
|
3425
3425
|
qty_header = 'REQUESTABLE_QTY_PER_NODE'
|
3426
|
-
free_header = 'TOTAL_FREE_GPUS'
|
3427
3426
|
|
3428
3427
|
realtime_gpu_availability_lists = sdk.stream_and_get(
|
3429
3428
|
sdk.realtime_kubernetes_gpu_availability(
|
@@ -3449,41 +3448,19 @@ def show_gpus(
|
|
3449
3448
|
realtime_gpu_infos = []
|
3450
3449
|
total_gpu_info: Dict[str, List[int]] = collections.defaultdict(
|
3451
3450
|
lambda: [0, 0])
|
3451
|
+
all_nodes_info = []
|
3452
3452
|
|
3453
|
-
# TODO(kyuds): remove backwards compatibility code (else branch)
|
3454
|
-
# when API version is bumped
|
3455
3453
|
if realtime_gpu_availability_lists:
|
3456
|
-
|
3457
|
-
|
3458
|
-
|
3459
|
-
|
3460
|
-
|
3461
|
-
|
3462
|
-
|
3463
|
-
|
3464
|
-
available_qty = (gpu_availability.available
|
3465
|
-
if gpu_availability.available != -1
|
3466
|
-
else no_permissions_str)
|
3467
|
-
realtime_gpu_table.add_row([
|
3468
|
-
gpu_availability.gpu,
|
3469
|
-
_list_to_str(gpu_availability.counts),
|
3470
|
-
gpu_availability.capacity,
|
3471
|
-
available_qty,
|
3472
|
-
])
|
3473
|
-
gpu = gpu_availability.gpu
|
3474
|
-
capacity = gpu_availability.capacity
|
3475
|
-
# we want total, so skip permission denied.
|
3476
|
-
available = max(gpu_availability.available, 0)
|
3477
|
-
if capacity > 0:
|
3478
|
-
total_gpu_info[gpu][0] += capacity
|
3479
|
-
total_gpu_info[gpu][1] += available
|
3480
|
-
realtime_gpu_infos.append((ctx, realtime_gpu_table))
|
3481
|
-
else:
|
3482
|
-
# can remove this with api server version bump.
|
3483
|
-
# 2025.05.03
|
3484
|
-
availability_list = realtime_gpu_availability_lists
|
3454
|
+
if len(realtime_gpu_availability_lists[0]) != 2:
|
3455
|
+
# TODO(kyuds): for backwards compatibility, as we add new
|
3456
|
+
# context to the API server response in #5362. Remove this after
|
3457
|
+
# 0.10.0.
|
3458
|
+
realtime_gpu_availability_lists = [
|
3459
|
+
(context, realtime_gpu_availability_lists)
|
3460
|
+
]
|
3461
|
+
for (ctx, availability_list) in realtime_gpu_availability_lists:
|
3485
3462
|
realtime_gpu_table = log_utils.create_table(
|
3486
|
-
['GPU', qty_header, '
|
3463
|
+
['GPU', qty_header, 'UTILIZATION'])
|
3487
3464
|
for realtime_gpu_availability in sorted(availability_list):
|
3488
3465
|
gpu_availability = models.RealtimeGpuAvailability(
|
3489
3466
|
*realtime_gpu_availability)
|
@@ -3493,49 +3470,100 @@ def show_gpus(
|
|
3493
3470
|
realtime_gpu_table.add_row([
|
3494
3471
|
gpu_availability.gpu,
|
3495
3472
|
_list_to_str(gpu_availability.counts),
|
3496
|
-
gpu_availability.capacity,
|
3497
|
-
available_qty,
|
3473
|
+
f'{available_qty} of {gpu_availability.capacity} free',
|
3498
3474
|
])
|
3499
|
-
|
3475
|
+
gpu = gpu_availability.gpu
|
3476
|
+
capacity = gpu_availability.capacity
|
3477
|
+
# we want total, so skip permission denied.
|
3478
|
+
available = max(gpu_availability.available, 0)
|
3479
|
+
if capacity > 0:
|
3480
|
+
total_gpu_info[gpu][0] += capacity
|
3481
|
+
total_gpu_info[gpu][1] += available
|
3482
|
+
realtime_gpu_infos.append((ctx, realtime_gpu_table))
|
3483
|
+
# Collect node info for this context
|
3484
|
+
nodes_info = sdk.stream_and_get(
|
3485
|
+
sdk.kubernetes_node_info(context=ctx))
|
3486
|
+
all_nodes_info.append((ctx, nodes_info))
|
3500
3487
|
|
3501
3488
|
# display an aggregated table for all contexts
|
3502
3489
|
# if there are more than one contexts with GPUs
|
3503
3490
|
if len(realtime_gpu_infos) > 1:
|
3504
3491
|
total_realtime_gpu_table = log_utils.create_table(
|
3505
|
-
['GPU', '
|
3492
|
+
['GPU', 'UTILIZATION'])
|
3506
3493
|
for gpu, stats in total_gpu_info.items():
|
3507
|
-
total_realtime_gpu_table.add_row(
|
3494
|
+
total_realtime_gpu_table.add_row(
|
3495
|
+
[gpu, f'{stats[1]} of {stats[0]} free'])
|
3508
3496
|
else:
|
3509
3497
|
total_realtime_gpu_table = None
|
3510
3498
|
|
3511
|
-
return realtime_gpu_infos, total_realtime_gpu_table
|
3499
|
+
return realtime_gpu_infos, total_realtime_gpu_table, all_nodes_info
|
3512
3500
|
|
3513
|
-
def
|
3501
|
+
def _format_kubernetes_node_info_combined(
|
3502
|
+
contexts_info: List[Tuple[str,
|
3503
|
+
'models.KubernetesNodesInfo']]) -> str:
|
3514
3504
|
node_table = log_utils.create_table(
|
3515
|
-
['
|
3505
|
+
['CONTEXT', 'NODE', 'GPU', 'UTILIZATION'])
|
3516
3506
|
|
3517
|
-
nodes_info = sdk.stream_and_get(
|
3518
|
-
sdk.kubernetes_node_info(context=context))
|
3519
3507
|
no_permissions_str = '<no permissions>'
|
3520
|
-
|
3521
|
-
|
3522
|
-
|
3523
|
-
|
3524
|
-
|
3525
|
-
|
3508
|
+
hints = []
|
3509
|
+
|
3510
|
+
for context, nodes_info in contexts_info:
|
3511
|
+
context_name = context if context else 'default'
|
3512
|
+
if nodes_info.hint:
|
3513
|
+
hints.append(f'{context_name}: {nodes_info.hint}')
|
3514
|
+
|
3515
|
+
for node_name, node_info in nodes_info.node_info_dict.items():
|
3516
|
+
available = node_info.free[
|
3517
|
+
'accelerators_available'] if node_info.free[
|
3518
|
+
'accelerators_available'] != -1 else no_permissions_str
|
3519
|
+
acc_type = node_info.accelerator_type
|
3520
|
+
if acc_type is None:
|
3521
|
+
acc_type = '-'
|
3526
3522
|
node_table.add_row([
|
3527
|
-
node_name,
|
3528
|
-
node_info.total[
|
3523
|
+
context_name, node_name, acc_type,
|
3524
|
+
f'{available} of {node_info.total["accelerator_count"]} '
|
3525
|
+
'free'
|
3529
3526
|
])
|
3530
|
-
|
3531
|
-
|
3532
|
-
if
|
3533
|
-
k8s_per_node_acc_message +=
|
3527
|
+
|
3528
|
+
k8s_per_node_acc_message = ('Kubernetes per-node GPU availability')
|
3529
|
+
if hints:
|
3530
|
+
k8s_per_node_acc_message += ' (' + '; '.join(hints) + ')'
|
3531
|
+
|
3534
3532
|
return (f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
|
3535
3533
|
f'{k8s_per_node_acc_message}'
|
3536
3534
|
f'{colorama.Style.RESET_ALL}\n'
|
3537
3535
|
f'{node_table.get_string()}')
|
3538
3536
|
|
3537
|
+
def _format_kubernetes_realtime_gpu(
|
3538
|
+
total_table: 'prettytable.PrettyTable',
|
3539
|
+
k8s_realtime_infos: List[Tuple[str, 'prettytable.PrettyTable']],
|
3540
|
+
all_nodes_info: List[Tuple[str, 'models.KubernetesNodesInfo']],
|
3541
|
+
show_node_info: bool) -> Generator[str, None, None]:
|
3542
|
+
yield (f'{colorama.Fore.GREEN}{colorama.Style.BRIGHT}'
|
3543
|
+
'Kubernetes GPUs'
|
3544
|
+
f'{colorama.Style.RESET_ALL}')
|
3545
|
+
# print total table
|
3546
|
+
if total_table is not None:
|
3547
|
+
yield '\n'
|
3548
|
+
yield from total_table.get_string()
|
3549
|
+
|
3550
|
+
# print individual infos.
|
3551
|
+
for (ctx, k8s_realtime_table) in k8s_realtime_infos:
|
3552
|
+
yield '\n'
|
3553
|
+
# Print context header separately
|
3554
|
+
if ctx:
|
3555
|
+
context_str = f'Context: {ctx}'
|
3556
|
+
else:
|
3557
|
+
context_str = 'Default Context'
|
3558
|
+
yield (
|
3559
|
+
f'{colorama.Fore.CYAN}{context_str}{colorama.Style.RESET_ALL}\n'
|
3560
|
+
)
|
3561
|
+
yield from k8s_realtime_table.get_string()
|
3562
|
+
|
3563
|
+
if show_node_info:
|
3564
|
+
yield '\n'
|
3565
|
+
yield _format_kubernetes_node_info_combined(all_nodes_info)
|
3566
|
+
|
3539
3567
|
def _output() -> Generator[str, None, None]:
|
3540
3568
|
gpu_table = log_utils.create_table(
|
3541
3569
|
['COMMON_GPU', 'AVAILABLE_QUANTITIES'])
|
@@ -3568,7 +3596,7 @@ def show_gpus(
|
|
3568
3596
|
# If --cloud kubernetes is not specified, we want to catch
|
3569
3597
|
# the case where no GPUs are available on the cluster and
|
3570
3598
|
# print the warning at the end.
|
3571
|
-
k8s_realtime_infos, total_table = _get_kubernetes_realtime_gpu_tables(context) # pylint: disable=line-too-long
|
3599
|
+
k8s_realtime_infos, total_table, all_nodes_info = _get_kubernetes_realtime_gpu_tables(context) # pylint: disable=line-too-long
|
3572
3600
|
except ValueError as e:
|
3573
3601
|
if not cloud_is_kubernetes:
|
3574
3602
|
# Make it a note if cloud is not kubernetes
|
@@ -3577,27 +3605,12 @@ def show_gpus(
|
|
3577
3605
|
else:
|
3578
3606
|
print_section_titles = True
|
3579
3607
|
|
3580
|
-
|
3581
|
-
|
3582
|
-
|
3583
|
-
|
3584
|
-
|
3585
|
-
|
3586
|
-
yield '\n\n'
|
3587
|
-
|
3588
|
-
# print individual infos.
|
3589
|
-
for (idx,
|
3590
|
-
(ctx,
|
3591
|
-
k8s_realtime_table)) in enumerate(k8s_realtime_infos):
|
3592
|
-
context_str = f'(Context: {ctx})' if ctx else ''
|
3593
|
-
yield (f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
|
3594
|
-
f'Kubernetes GPUs {context_str}'
|
3595
|
-
f'{colorama.Style.RESET_ALL}\n')
|
3596
|
-
yield from k8s_realtime_table.get_string()
|
3597
|
-
yield '\n\n'
|
3598
|
-
yield _format_kubernetes_node_info(ctx)
|
3599
|
-
if idx != len(k8s_realtime_infos) - 1:
|
3600
|
-
yield '\n\n'
|
3608
|
+
yield from _format_kubernetes_realtime_gpu(
|
3609
|
+
total_table,
|
3610
|
+
k8s_realtime_infos,
|
3611
|
+
all_nodes_info,
|
3612
|
+
show_node_info=True)
|
3613
|
+
|
3601
3614
|
if kubernetes_autoscaling:
|
3602
3615
|
k8s_messages += (
|
3603
3616
|
'\n' + kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE)
|
@@ -3688,31 +3701,18 @@ def show_gpus(
|
|
3688
3701
|
print_section_titles = True
|
3689
3702
|
# TODO(romilb): Show filtered per node GPU availability here as well
|
3690
3703
|
try:
|
3691
|
-
k8s_realtime_infos, total_table
|
3692
|
-
|
3693
|
-
|
3694
|
-
|
3695
|
-
|
3696
|
-
|
3697
|
-
|
3698
|
-
|
3699
|
-
'Total Kubernetes GPUs'
|
3700
|
-
f'{colorama.Style.RESET_ALL}\n')
|
3701
|
-
yield from total_table.get_string()
|
3702
|
-
yield '\n\n'
|
3703
|
-
|
3704
|
-
# print individual tables
|
3705
|
-
for (ctx, k8s_realtime_table) in k8s_realtime_infos:
|
3706
|
-
context_str = f'(Context: {ctx})' if ctx else ''
|
3707
|
-
yield (f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
|
3708
|
-
f'Kubernetes GPUs {context_str}'
|
3709
|
-
f'{colorama.Style.RESET_ALL}\n')
|
3710
|
-
yield from k8s_realtime_table.get_string()
|
3711
|
-
yield '\n\n'
|
3704
|
+
(k8s_realtime_infos, total_table,
|
3705
|
+
all_nodes_info) = _get_kubernetes_realtime_gpu_tables(
|
3706
|
+
context=region, name_filter=name, quantity_filter=quantity)
|
3707
|
+
|
3708
|
+
yield from _format_kubernetes_realtime_gpu(total_table,
|
3709
|
+
k8s_realtime_infos,
|
3710
|
+
all_nodes_info,
|
3711
|
+
show_node_info=False)
|
3712
3712
|
except ValueError as e:
|
3713
3713
|
# In the case of a specific accelerator, show the error message
|
3714
3714
|
# immediately (e.g., "Resources H100 not found ...")
|
3715
|
-
yield
|
3715
|
+
yield common_utils.format_exception(e, use_bracket=True)
|
3716
3716
|
if kubernetes_autoscaling:
|
3717
3717
|
k8s_messages += ('\n' +
|
3718
3718
|
kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE)
|
sky/clouds/gcp.py
CHANGED
@@ -17,6 +17,7 @@ from sky import skypilot_config
|
|
17
17
|
from sky.adaptors import gcp
|
18
18
|
from sky.clouds import service_catalog
|
19
19
|
from sky.clouds.utils import gcp_utils
|
20
|
+
from sky.provision.gcp import constants
|
20
21
|
from sky.utils import annotations
|
21
22
|
from sky.utils import common_utils
|
22
23
|
from sky.utils import registry
|
@@ -112,6 +113,10 @@ _DEFAULT_CPU_IMAGE_ID = 'skypilot:custom-cpu-ubuntu-2204'
|
|
112
113
|
# For GPU-related package version, see sky/clouds/service_catalog/images/provisioners/cuda.sh
|
113
114
|
_DEFAULT_GPU_IMAGE_ID = 'skypilot:custom-gpu-ubuntu-2204'
|
114
115
|
_DEFAULT_GPU_K80_IMAGE_ID = 'skypilot:k80-debian-10'
|
116
|
+
# Use COS image with GPU Direct support.
|
117
|
+
# Need to contact GCP support to build our own image for GPUDirect-TCPX support.
|
118
|
+
# Refer to https://github.com/GoogleCloudPlatform/cluster-toolkit/blob/main/examples/machine-learning/a3-highgpu-8g/README.md#before-starting
|
119
|
+
_DEFAULT_GPU_DIRECT_IMAGE_ID = 'skypilot:gpu-direct-cos'
|
115
120
|
|
116
121
|
|
117
122
|
def _run_output(cmd):
|
@@ -488,6 +493,11 @@ class GCP(clouds.Cloud):
|
|
488
493
|
'gcp_project_id': self.get_project_id(dryrun),
|
489
494
|
**GCP._get_disk_specs(r.instance_type, _failover_disk_tier()),
|
490
495
|
}
|
496
|
+
enable_gpu_direct = skypilot_config.get_nested(
|
497
|
+
('gcp', 'enable_gpu_direct'),
|
498
|
+
False,
|
499
|
+
override_configs=resources.cluster_config_overrides)
|
500
|
+
resources_vars['enable_gpu_direct'] = enable_gpu_direct
|
491
501
|
accelerators = r.accelerators
|
492
502
|
if accelerators is not None:
|
493
503
|
assert len(accelerators) == 1, r
|
@@ -511,23 +521,28 @@ class GCP(clouds.Cloud):
|
|
511
521
|
else:
|
512
522
|
# Convert to GCP names:
|
513
523
|
# https://cloud.google.com/compute/docs/gpus
|
514
|
-
if acc in ('A100-80GB', 'L4'):
|
524
|
+
if acc in ('A100-80GB', 'L4', 'B200'):
|
515
525
|
# A100-80GB and L4 have a different name pattern.
|
516
526
|
resources_vars['gpu'] = f'nvidia-{acc.lower()}'
|
517
527
|
elif acc in ('H100', 'H100-MEGA'):
|
518
528
|
resources_vars['gpu'] = f'nvidia-{acc.lower()}-80gb'
|
529
|
+
elif acc in ('H200',):
|
530
|
+
resources_vars['gpu'] = f'nvidia-{acc.lower()}-141gb'
|
519
531
|
else:
|
520
532
|
resources_vars['gpu'] = 'nvidia-tesla-{}'.format(
|
521
533
|
acc.lower())
|
522
534
|
resources_vars['gpu_count'] = acc_count
|
523
|
-
if
|
524
|
-
|
525
|
-
# versions of CUDA as noted below.
|
526
|
-
# CUDA driver version 470.57.02, CUDA Library 11.4
|
527
|
-
image_id = _DEFAULT_GPU_K80_IMAGE_ID
|
535
|
+
if enable_gpu_direct:
|
536
|
+
image_id = _DEFAULT_GPU_DIRECT_IMAGE_ID
|
528
537
|
else:
|
529
|
-
|
530
|
-
|
538
|
+
if acc == 'K80':
|
539
|
+
# Though the image is called cu113, it actually has later
|
540
|
+
# versions of CUDA as noted below.
|
541
|
+
# CUDA driver version 470.57.02, CUDA Library 11.4
|
542
|
+
image_id = _DEFAULT_GPU_K80_IMAGE_ID
|
543
|
+
else:
|
544
|
+
# CUDA driver version 535.86.10, CUDA Library 12.2
|
545
|
+
image_id = _DEFAULT_GPU_IMAGE_ID
|
531
546
|
|
532
547
|
if (resources.image_id is not None and
|
533
548
|
resources.extract_docker_image() is None):
|
@@ -580,7 +595,21 @@ class GCP(clouds.Cloud):
|
|
580
595
|
|
581
596
|
# Add gVNIC from config
|
582
597
|
resources_vars['enable_gvnic'] = skypilot_config.get_nested(
|
583
|
-
('gcp', 'enable_gvnic'),
|
598
|
+
('gcp', 'enable_gvnic'),
|
599
|
+
False,
|
600
|
+
override_configs=resources.cluster_config_overrides)
|
601
|
+
placement_policy = skypilot_config.get_nested(
|
602
|
+
('gcp', 'placement_policy'),
|
603
|
+
None,
|
604
|
+
override_configs=resources.cluster_config_overrides)
|
605
|
+
resources_vars['user_data'] = None
|
606
|
+
if enable_gpu_direct:
|
607
|
+
resources_vars['user_data'] = constants.GPU_DIRECT_TCPX_USER_DATA
|
608
|
+
resources_vars[
|
609
|
+
'docker_run_options'] = constants.GPU_DIRECT_TCPX_SPECIFIC_OPTIONS
|
610
|
+
if placement_policy is None:
|
611
|
+
placement_policy = constants.COMPACT_GROUP_PLACEMENT_POLICY
|
612
|
+
resources_vars['placement_policy'] = placement_policy
|
584
613
|
|
585
614
|
return resources_vars
|
586
615
|
|
@@ -1032,15 +1061,24 @@ class GCP(clouds.Cloud):
|
|
1032
1061
|
raise exceptions.NotSupportedError(msg)
|
1033
1062
|
|
1034
1063
|
@classmethod
|
1035
|
-
def _get_disk_type(
|
1036
|
-
|
1037
|
-
|
1038
|
-
|
1039
|
-
|
1064
|
+
def _get_disk_type(
|
1065
|
+
cls,
|
1066
|
+
instance_type: Optional[str],
|
1067
|
+
disk_tier: Optional[resources_utils.DiskTier],
|
1068
|
+
) -> str:
|
1069
|
+
|
1070
|
+
def _propagate_disk_type(
|
1071
|
+
lowest: Optional[str] = None,
|
1072
|
+
highest: Optional[str] = None,
|
1073
|
+
# pylint: disable=redefined-builtin
|
1074
|
+
all: Optional[str] = None) -> None:
|
1040
1075
|
if lowest is not None:
|
1041
1076
|
tier2name[resources_utils.DiskTier.LOW] = lowest
|
1042
1077
|
if highest is not None:
|
1043
1078
|
tier2name[resources_utils.DiskTier.ULTRA] = highest
|
1079
|
+
if all is not None:
|
1080
|
+
for tier in tier2name:
|
1081
|
+
tier2name[tier] = all
|
1044
1082
|
|
1045
1083
|
tier = cls._translate_disk_tier(disk_tier)
|
1046
1084
|
|
@@ -1054,7 +1092,8 @@ class GCP(clouds.Cloud):
|
|
1054
1092
|
|
1055
1093
|
# Remap series-specific disk types.
|
1056
1094
|
# Reference: https://github.com/skypilot-org/skypilot/issues/4705
|
1057
|
-
|
1095
|
+
assert instance_type is not None, (instance_type, disk_tier)
|
1096
|
+
series = instance_type.split('-')[0]
|
1058
1097
|
|
1059
1098
|
# General handling of unsupported disk types
|
1060
1099
|
if series in ['n1', 'a2', 'g2']:
|
@@ -1065,6 +1104,9 @@ class GCP(clouds.Cloud):
|
|
1065
1104
|
# These series don't support pd-standard, use pd-balanced for LOW.
|
1066
1105
|
_propagate_disk_type(
|
1067
1106
|
lowest=tier2name[resources_utils.DiskTier.MEDIUM])
|
1107
|
+
if instance_type.startswith('a3-ultragpu'):
|
1108
|
+
# a3-ultragpu instances only support hyperdisk-balanced.
|
1109
|
+
_propagate_disk_type(all='hyperdisk-balanced')
|
1068
1110
|
|
1069
1111
|
# Series specific handling
|
1070
1112
|
if series == 'n2':
|
@@ -1087,7 +1129,8 @@ class GCP(clouds.Cloud):
|
|
1087
1129
|
specs: Dict[str, Any] = {
|
1088
1130
|
'disk_tier': cls._get_disk_type(instance_type, disk_tier)
|
1089
1131
|
}
|
1090
|
-
if disk_tier == resources_utils.DiskTier.ULTRA
|
1132
|
+
if (disk_tier == resources_utils.DiskTier.ULTRA and
|
1133
|
+
specs['disk_tier'] == 'pd-extreme'):
|
1091
1134
|
# Only pd-extreme supports custom iops.
|
1092
1135
|
# see https://cloud.google.com/compute/docs/disks#disk-types
|
1093
1136
|
specs['disk_iops'] = 20000
|
@@ -182,6 +182,9 @@ TPU_V4_HOST_DF = pd.read_csv(
|
|
182
182
|
SERIES_TO_DISCRIPTION = {
|
183
183
|
'a2': 'A2 Instance',
|
184
184
|
'a3': 'A3 Instance',
|
185
|
+
# TODO(zhwu): GCP does not have A4 instance in SKUs API yet. We keep it here
|
186
|
+
# for completeness.
|
187
|
+
'a4': 'A4 Instance',
|
185
188
|
'c2': 'Compute optimized',
|
186
189
|
'c2d': 'C2D AMD Instance',
|
187
190
|
'c3': 'C3 Instance',
|
@@ -198,6 +201,7 @@ SERIES_TO_DISCRIPTION = {
|
|
198
201
|
't2a': 'T2A Arm Instance',
|
199
202
|
't2d': 'T2D AMD Instance',
|
200
203
|
}
|
204
|
+
|
201
205
|
creds, project_id = google.auth.default()
|
202
206
|
gcp_client = discovery.build('compute', 'v1')
|
203
207
|
tpu_client = discovery.build('tpu', 'v1')
|
@@ -434,10 +438,18 @@ def _get_gpus_for_zone(zone: str) -> 'pd.DataFrame':
|
|
434
438
|
gpu_name = gpu_name.upper()
|
435
439
|
if 'H100-80GB' in gpu_name:
|
436
440
|
gpu_name = 'H100'
|
437
|
-
|
441
|
+
|
442
|
+
if 'H100-MEGA' in gpu_name:
|
438
443
|
gpu_name = 'H100-MEGA'
|
439
444
|
if count != 8:
|
440
|
-
|
445
|
+
continue
|
446
|
+
elif 'H200' in gpu_name:
|
447
|
+
gpu_name = 'H200'
|
448
|
+
if count != 8:
|
449
|
+
continue
|
450
|
+
elif 'B200' in gpu_name:
|
451
|
+
gpu_name = 'B200'
|
452
|
+
if count != 8:
|
441
453
|
continue
|
442
454
|
if 'VWS' in gpu_name:
|
443
455
|
continue
|
@@ -468,6 +480,8 @@ def _gpu_info_from_name(name: str) -> Optional[Dict[str, List[Dict[str, Any]]]]:
|
|
468
480
|
'A100': 40 * 1024,
|
469
481
|
'H100': 80 * 1024,
|
470
482
|
'H100-MEGA': 80 * 1024,
|
483
|
+
'H200': 141 * 1024,
|
484
|
+
'B200': 180 * 1024,
|
471
485
|
'P4': 8 * 1024,
|
472
486
|
'T4': 16 * 1024,
|
473
487
|
'V100': 16 * 1024,
|
@@ -507,22 +521,30 @@ def get_gpu_df(skus: List[Dict[str, Any]],
|
|
507
521
|
ondemand_or_spot = 'OnDemand' if not spot else 'Preemptible'
|
508
522
|
gpu_price = None
|
509
523
|
for sku in gpu_skus:
|
524
|
+
row_gpu_name = row['AcceleratorName']
|
510
525
|
if row['Region'] not in sku['serviceRegions']:
|
511
526
|
continue
|
512
527
|
if sku['category']['usageType'] != ondemand_or_spot:
|
513
528
|
continue
|
514
529
|
|
515
|
-
gpu_names = [
|
516
|
-
if
|
517
|
-
gpu_names = ['A100 80GB']
|
518
|
-
|
519
|
-
gpu_names = ['H100 80GB']
|
520
|
-
|
530
|
+
gpu_names = [f'{row_gpu_name} GPU']
|
531
|
+
if row_gpu_name == 'A100-80GB':
|
532
|
+
gpu_names = ['A100 80GB GPU']
|
533
|
+
elif row_gpu_name == 'H100':
|
534
|
+
gpu_names = ['H100 80GB GPU']
|
535
|
+
elif row_gpu_name == 'H100-MEGA':
|
521
536
|
# Seems that H100-MEGA has two different descriptions in SKUs in
|
522
537
|
# different regions: 'H100 80GB Mega' and 'H100 80GB Plus'.
|
523
|
-
gpu_names = [
|
524
|
-
|
525
|
-
|
538
|
+
gpu_names = [
|
539
|
+
'H100 80GB Mega GPU', 'H100 Mega 80GB GPU',
|
540
|
+
'H100 80GB Plus GPU'
|
541
|
+
]
|
542
|
+
elif row_gpu_name == 'H200':
|
543
|
+
gpu_names = ['H200 141GB GPU']
|
544
|
+
elif row_gpu_name == 'B200':
|
545
|
+
gpu_names = ['Nvidia B200 (1 gpu slice)']
|
546
|
+
if not any(
|
547
|
+
gpu_name in sku['description'] for gpu_name in gpu_names):
|
526
548
|
continue
|
527
549
|
|
528
550
|
unit_price = _get_unit_price(sku)
|
@@ -104,7 +104,13 @@ _ACC_INSTANCE_TYPE_DICTS = {
|
|
104
104
|
},
|
105
105
|
'H100-MEGA': {
|
106
106
|
8: ['a3-megagpu-8g'],
|
107
|
-
}
|
107
|
+
},
|
108
|
+
'H200': {
|
109
|
+
8: ['a3-ultragpu-8g'],
|
110
|
+
},
|
111
|
+
'B200': {
|
112
|
+
8: ['a4-highgpu-8g'],
|
113
|
+
},
|
108
114
|
}
|
109
115
|
# Enable GPU type inference from instance types
|
110
116
|
_INSTANCE_TYPE_TO_ACC = {
|
sky/dashboard/out/404.html
CHANGED
@@ -1 +1 @@
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><title>404: This page could not be found</title><meta name="next-head-count" content="3"/><link rel="preload" href="/dashboard/_next/static/css/c6933bbb2ce7f4dd.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/c6933bbb2ce7f4dd.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-830f59b8404e96b8.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-87d061ee6ed71b28.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-e0e2335212e72357.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_error-1be831200e60c5c0.js" defer=""></script><script src="/dashboard/_next/static/
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><title>404: This page could not be found</title><meta name="next-head-count" content="3"/><link rel="preload" href="/dashboard/_next/static/css/c6933bbb2ce7f4dd.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/c6933bbb2ce7f4dd.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-830f59b8404e96b8.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-87d061ee6ed71b28.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-e0e2335212e72357.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_error-1be831200e60c5c0.js" defer=""></script><script src="/dashboard/_next/static/tdxxQrPV6NW90a983oHXe/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/tdxxQrPV6NW90a983oHXe/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div style="font-family:system-ui,"Segoe UI",Roboto,Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji";height:100vh;text-align:center;display:flex;flex-direction:column;align-items:center;justify-content:center"><div style="line-height:48px"><style>body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}</style><h1 class="next-error-h1" style="display:inline-block;margin:0 20px 0 0;padding-right:23px;font-size:24px;font-weight:500;vertical-align:top">404</h1><div style="display:inline-block"><h2 style="font-size:14px;font-weight:400;line-height:28px">This page could not be found<!-- -->.</h2></div></div></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"statusCode":404}},"page":"/_error","query":{},"buildId":"tdxxQrPV6NW90a983oHXe","assetPrefix":"/dashboard","nextExport":true,"isFallback":false,"gip":true,"scriptLoader":[]}</script></body></html>
|
@@ -0,0 +1 @@
|
|
1
|
+
"use strict";(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[845],{1266:function(e,t,s){s.d(t,{cV:function(){return C},Oh:function(){return v},_R:function(){return w}});var a=s(5893),r=s(7294),n=s(5235),o=s(2350),l=s(3767);let c=n.fC;n.xz;let i=n.h_;n.x8;let d=r.forwardRef((e,t)=>{let{className:s,...r}=e;return(0,a.jsx)(n.aV,{ref:t,className:(0,o.cn)("fixed inset-0 z-50 bg-black/50 backdrop-blur-sm data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0",s),...r})});d.displayName=n.aV.displayName;let u=r.forwardRef((e,t)=>{let{className:s,children:r,...c}=e;return(0,a.jsxs)(i,{children:[(0,a.jsx)(d,{}),(0,a.jsxs)(n.VY,{ref:t,className:(0,o.cn)("fixed left-[50%] top-[50%] z-50 grid w-full max-w-lg translate-x-[-50%] translate-y-[-50%] gap-4 border border-gray-200 bg-white p-6 shadow-lg duration-200 data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[state=closed]:slide-out-to-left-1/2 data-[state=closed]:slide-out-to-top-[48%] data-[state=open]:slide-in-from-left-1/2 data-[state=open]:slide-in-from-top-[48%] sm:rounded-lg",s),...c,children:[r,(0,a.jsxs)(n.x8,{className:"absolute right-4 top-4 rounded-sm opacity-70 ring-offset-white transition-opacity hover:opacity-100 focus:outline-none focus:ring-2 focus:ring-gray-400 focus:ring-offset-2 disabled:pointer-events-none data-[state=open]:bg-gray-100 data-[state=open]:text-gray-500",children:[(0,a.jsx)(l.Z,{className:"h-4 w-4"}),(0,a.jsx)("span",{className:"sr-only",children:"Close"})]})]})]})});u.displayName=n.VY.displayName;let m=e=>{let{className:t,...s}=e;return(0,a.jsx)("div",{className:(0,o.cn)("flex flex-col space-y-1.5 text-center sm:text-left",t),...s})};m.displayName="DialogHeader";let f=e=>{let{className:t,...s}=e;return(0,a.jsx)("div",{className:(0,o.cn)("flex flex-col-reverse sm:flex-row sm:justify-end sm:space-x-2",t),...s})};f.displayName="DialogFooter";let h=r.forwardRef((e,t)=>{let{className:s,...r}=e;return(0,a.jsx)(n.Dx,{ref:t,className:(0,o.cn)("text-lg font-semibold leading-none tracking-tight",s),...r})});h.displayName=n.Dx.displayName;let x=r.forwardRef((e,t)=>{let{className:s,...r}=e;return(0,a.jsx)(n.dk,{ref:t,className:(0,o.cn)("text-sm text-gray-500",s),...r})});x.displayName=n.dk.displayName;var p=s(803),g=s(7673),b=s(8671),j=s(7469),y=s(3225),N=s(3001);function v(e){let{isOpen:t,onClose:s,cluster:n}=e,[o,l]=r.useState(!1),i=e=>{navigator.clipboard.writeText(e),l(!0),setTimeout(()=>l(!1),2e3)},d=["sky status ".concat(n),"ssh ".concat(n)],f=d.join("\n");return(0,a.jsx)(c,{open:t,onOpenChange:s,children:(0,a.jsxs)(u,{className:"sm:max-w-md",children:[(0,a.jsxs)(m,{children:[(0,a.jsxs)(h,{children:["Connect to: ",(0,a.jsx)("span",{className:"font-light",children:n})]}),(0,a.jsx)(x,{children:"Use these instructions to connect to your cluster via SSH."})]}),(0,a.jsxs)("div",{className:"flex flex-col space-y-4",children:[(0,a.jsxs)("div",{children:[(0,a.jsx)("h3",{className:"text-sm font-medium mb-2",children:"SSH Command"}),(0,a.jsx)(g.Zb,{className:"p-3 bg-gray-50",children:(0,a.jsxs)("div",{className:"flex items-center justify-between",children:[(0,a.jsx)("pre",{className:"text-sm w-full whitespace-pre-wrap",children:d.map((e,t)=>(0,a.jsx)("code",{className:"block",children:e},t))}),(0,a.jsx)(j.WH,{content:o?"Copied!":"Copy command",children:(0,a.jsx)(p.z,{variant:"ghost",size:"icon",onClick:()=>i(f),className:"h-8 w-8 rounded-full",children:(0,a.jsx)(b.Z,{className:"h-4 w-4"})})})]})})]}),(0,a.jsxs)("div",{children:[(0,a.jsx)("h3",{className:"text-sm font-medium mb-2",children:"Additional Information"}),(0,a.jsxs)("p",{className:"text-sm text-secondary-foreground",children:["Make sure to run"," ",(0,a.jsxs)("code",{className:"text-sm",children:["sky status ",n]})," first to have SkyPilot set up the SSH access."]})]})]})]})})}function w(e){let{isOpen:t,onClose:s,cluster:r}=e,n=(0,N.X)();return(0,a.jsx)(c,{open:t,onOpenChange:s,children:(0,a.jsx)(u,{className:"sm:max-w-3xl",children:(0,a.jsxs)(m,{children:[(0,a.jsxs)(h,{children:["Connect to: ",(0,a.jsx)("span",{className:"font-light",children:r})]}),(0,a.jsx)(x,{children:(0,a.jsxs)("div",{className:"flex flex-col space-y-4",children:[(0,a.jsxs)("div",{children:[(0,a.jsx)("h3",{className:"text-sm font-medium mb-2 my-2",children:"Setup SSH access"}),(0,a.jsx)(g.Zb,{className:"p-3 bg-gray-50",children:(0,a.jsxs)("div",{className:"flex items-center justify-between",children:[(0,a.jsx)("pre",{className:"text-sm",children:(0,a.jsxs)("code",{children:["sky status ",r]})}),(0,a.jsx)(j.WH,{content:"Copy command",children:(0,a.jsx)(p.z,{variant:"ghost",size:"icon",onClick:()=>navigator.clipboard.writeText("sky status ".concat(r)),className:"h-8 w-8 rounded-full",children:(0,a.jsx)(b.Z,{className:"h-4 w-4"})})})]})})]}),(0,a.jsxs)("div",{children:[(0,a.jsx)("h3",{className:"text-sm font-medium mb-2 my-2",children:"Connect with VSCode/Cursor"}),(0,a.jsx)(g.Zb,{className:"p-3 bg-gray-50",children:(0,a.jsxs)("div",{className:"flex items-center justify-between",children:[(0,a.jsx)("pre",{className:"text-sm",children:(0,a.jsxs)("code",{children:["code --remote ssh-remote+",r]})}),(0,a.jsx)(j.WH,{content:"Copy command",children:(0,a.jsx)(p.z,{variant:"ghost",size:"icon",onClick:()=>navigator.clipboard.writeText("code --remote ssh-remote+".concat(r)),className:"h-8 w-8 rounded-full",children:(0,a.jsx)(b.Z,{className:"h-4 w-4"})})})]})})]}),(0,a.jsxs)("div",{children:[(0,a.jsx)("h3",{className:"text-sm font-medium",children:"Or use the GUI to connect"}),(0,a.jsx)("div",{className:"relative ".concat(n?"-mt-5":"-mt-10"),style:{paddingBottom:"70%"},children:(0,a.jsxs)("video",{className:"absolute top-0 left-0 w-full h-full rounded-lg",controls:!0,autoPlay:!0,muted:!0,preload:"metadata",children:[(0,a.jsx)("source",{src:"".concat(y.GW,"/videos/cursor-small.mp4"),type:"video/mp4"}),"Your browser does not support the video tag."]})})]})]})})]})})})}function C(e){let{isOpen:t,onClose:s,onConfirm:r,title:n,message:o,confirmText:l="Confirm",confirmVariant:i="destructive"}=e;return(0,a.jsx)(c,{open:t,onOpenChange:s,children:(0,a.jsxs)(u,{className:"sm:max-w-md",children:[(0,a.jsxs)(m,{children:[(0,a.jsx)(h,{children:n}),(0,a.jsx)(x,{children:o})]}),(0,a.jsxs)(f,{className:"flex justify-end gap-2 pt-4",children:[(0,a.jsx)(p.z,{variant:"outline",onClick:s,children:"Cancel"}),(0,a.jsx)(p.z,{variant:i,onClick:()=>{r(),s()},children:l})]})]})})}},803:function(e,t,s){s.d(t,{z:function(){return i}});var a=s(5893),r=s(7294),n=s(8426),o=s(5139),l=s(2350);let c=(0,o.j)("inline-flex items-center justify-center whitespace-nowrap rounded-md text-sm font-medium ring-offset-background transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50",{variants:{variant:{default:"bg-primary text-primary-foreground hover:bg-primary/90",destructive:"bg-destructive text-destructive-foreground hover:bg-destructive/90",outline:"border border-input bg-background hover:bg-accent hover:text-accent-foreground",secondary:"bg-secondary text-secondary-foreground hover:bg-secondary/80",ghost:"hover:bg-accent hover:text-accent-foreground",link:"text-primary underline-offset-4 hover:underline"},size:{default:"h-10 px-4 py-2",sm:"h-9 rounded-md px-3",lg:"h-11 rounded-md px-8",icon:"h-10 w-10"}},defaultVariants:{variant:"default",size:"default"}}),i=r.forwardRef((e,t)=>{let{className:s,variant:r,size:o,asChild:i=!1,...d}=e,u=i?n.g7:"button";return(0,a.jsx)(u,{className:(0,l.cn)(c({variant:r,size:o,className:s})),ref:t,...d})});i.displayName="Button"},8764:function(e,t,s){s.d(t,{RM:function(){return c},SC:function(){return i},iA:function(){return o},pj:function(){return u},ss:function(){return d},xD:function(){return l}});var a=s(5893),r=s(7294),n=s(2350);let o=r.forwardRef((e,t)=>{let{className:s,...r}=e;return(0,a.jsx)("div",{className:"relative w-full overflow-auto",children:(0,a.jsx)("table",{ref:t,className:(0,n.cn)("w-full caption-bottom text-base",s),...r})})});o.displayName="Table";let l=r.forwardRef((e,t)=>{let{className:s,...r}=e;return(0,a.jsx)("thead",{ref:t,className:(0,n.cn)("[&_tr]:border-b",s),...r})});l.displayName="TableHeader";let c=r.forwardRef((e,t)=>{let{className:s,...r}=e;return(0,a.jsx)("tbody",{ref:t,className:(0,n.cn)("[&_tr:last-child]:border-0",s),...r})});c.displayName="TableBody",r.forwardRef((e,t)=>{let{className:s,...r}=e;return(0,a.jsx)("tfoot",{ref:t,className:(0,n.cn)("border-t bg-muted/50 font-medium [&>tr]:last:border-b-0",s),...r})}).displayName="TableFooter";let i=r.forwardRef((e,t)=>{let{className:s,...r}=e;return(0,a.jsx)("tr",{ref:t,className:(0,n.cn)("border-b transition-colors hover:bg-muted/50 data-[state=selected]:bg-muted",s),...r})});i.displayName="TableRow";let d=r.forwardRef((e,t)=>{let{className:s,...r}=e;return(0,a.jsx)("th",{ref:t,className:(0,n.cn)("h-12 px-4 text-left align-middle font-medium text-[hsl(var(--text-strong))] [&:has([role=checkbox])]:pr-0",s),...r})});d.displayName="TableHead";let u=r.forwardRef((e,t)=>{let{className:s,...r}=e;return(0,a.jsx)("td",{ref:t,className:(0,n.cn)("p-4 align-middle [&:has([role=checkbox])]:pr-0",s),...r})});u.displayName="TableCell",r.forwardRef((e,t)=>{let{className:s,...r}=e;return(0,a.jsx)("caption",{ref:t,className:(0,n.cn)("mt-4 text-base text-muted-foreground",s),...r})}).displayName="TableCaption"},3266:function(e,t,s){s.d(t,{QL:function(){return d},Sl:function(){return c},zd:function(){return l}});var a=s(7294),r=s(5821),n=s(3225);let o={UP:"RUNNING",STOPPED:"STOPPED",INIT:"LAUNCHING",null:"TERMINATED"};async function l(){let{clusterNames:e=null}=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{};try{let t=await fetch("".concat(n.f4,"/status"),{method:"POST",headers:{"Content-Type":"application/json"},body:JSON.stringify({cluster_names:e,all_users:!0})}),s=t.headers.get("X-Skypilot-Request-ID")||t.headers.get("X-Request-ID"),a=await fetch("".concat(n.f4,"/api/get?request_id=").concat(s)),r=await a.json();return(r.return_value?JSON.parse(r.return_value):[]).map(e=>({status:o[e.status],cluster:e.name,user:e.user_name,infra:e.cloud,region:e.region,cpus:e.cpus,mem:e.memory,gpus:e.accelerators,resources_str:e.resources_str,time:new Date(1e3*e.launched_at),num_nodes:e.nodes,jobs:[],events:[{time:new Date(1e3*e.launched_at),event:"Cluster created."}]}))}catch(e){return console.error("Error fetching clusters:",e),[]}}async function c(e){let{clusterName:t,jobId:s,onNewLog:a}=e;try{let e=(await fetch("".concat(n.f4,"/logs"),{method:"POST",headers:{"Content-Type":"application/json"},body:JSON.stringify({follow:!1,cluster_name:t,job_id:s})})).body.getReader();for(;;){let{done:t,value:s}=await e.read();if(t)break;let r=new TextDecoder().decode(s);a(r)}}catch(e){console.error("Error in streamClusterJobLogs:",e),(0,r.C)("Error in streamClusterJobLogs: ".concat(e.message),"error")}}async function i(e){let{clusterName:t}=e;try{let e=await fetch("".concat(n.f4,"/queue"),{method:"POST",headers:{"Content-Type":"application/json"},body:JSON.stringify({cluster_name:t,all_users:!0})}),s=e.headers.get("X-Skypilot-Request-ID")||e.headers.get("X-Request-ID"),a=await fetch("".concat(n.f4,"/api/get?request_id=").concat(s)),r=await a.json();return JSON.parse(r.return_value).map(e=>{let s=e.end_at?e.end_at:Date.now()/1e3,a=0,r=0;return e.submitted_at&&(a=s-e.submitted_at),e.start_at&&(r=s-e.start_at),{id:e.job_id,status:e.status,job:e.job_name,user:e.username,gpus:e.accelerators||{},submitted_at:e.submitted_at?new Date(1e3*e.submitted_at):null,resources:e.resources,cluster:t,total_duration:a,job_duration:r,infra:"",logs:""}})}catch(e){return console.error("Error fetching cluster jobs:",e),[]}}function d(e){let{cluster:t,job:s=null}=e,[r,n]=(0,a.useState)(null),[o,c]=(0,a.useState)(null),[d,u]=(0,a.useState)(!0),[m,f]=(0,a.useState)(!0),h=(0,a.useCallback)(async()=>{if(t)try{u(!0);let e=await l({clusterNames:[t]});n(e[0])}catch(e){console.error("Error fetching cluster data:",e)}finally{u(!1)}},[t]),x=(0,a.useCallback)(async()=>{if(t)try{f(!0);let e=await i({clusterName:t,job:s});c(e)}catch(e){console.error("Error fetching cluster job data:",e)}finally{f(!1)}},[t,s]),p=(0,a.useCallback)(async()=>{await Promise.all([h(),x()])},[h,x]);return(0,a.useEffect)(()=>{h(),x()},[t,s,h,x]),{clusterData:r,clusterJobData:o,loading:d||m,refreshData:p}}},4545:function(e,t,s){function a(e){return e.startsWith("sky-jobs-controller-")}function r(e,t,s){return null===t?e:[...e].sort((e,a)=>e[t]<a[t]?"ascending"===s?-1:1:e[t]>a[t]?"ascending"===s?1:-1:0)}s.d(t,{R0:function(){return r},Ym:function(){return a}})}}]);
|
sky/dashboard/out/_next/static/{C0fkLhvxyqkymoV7IeInQ → tdxxQrPV6NW90a983oHXe}/_buildManifest.js
RENAMED
@@ -1 +1 @@
|
|
1
|
-
self.__BUILD_MANIFEST=function(s,c,e,t,a,b){return{__rewrites:{afterFiles:[],beforeFiles:[],fallback:[]},"/":["static/chunks/pages/index-f9f039532ca8cbc4.js"],"/_error":["static/chunks/pages/_error-1be831200e60c5c0.js"],"/clusters":[s,e,c,t,a,"static/chunks/pages/clusters-a93b93e10b8b074e.js"],"/clusters/[cluster]":[s,e,c,t,b,a,"static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js"],"/clusters/[cluster]/[job]":[s,c,"static/chunks/pages/clusters/[cluster]/[job]-e15db85d0ea1fbe1.js"],"/jobs":[s,e,c,t,b,"static/chunks/pages/jobs-a75029b67aab6a2e.js"],"/jobs/[job]":[s,c,"static/chunks/pages/jobs/[job]-03f279c6741fb48b.js"],sortedPages:["/","/_app","/_error","/clusters","/clusters/[cluster]","/clusters/[cluster]/[job]","/jobs","/jobs/[job]"]}}("static/chunks/678-206dddca808e6d16.js","static/chunks/979-7bf73a4c7cea0f5c.js","static/chunks/312-c3c8845990db8ffc.js","static/chunks/845-
|
1
|
+
self.__BUILD_MANIFEST=function(s,c,e,t,a,b){return{__rewrites:{afterFiles:[],beforeFiles:[],fallback:[]},"/":["static/chunks/pages/index-f9f039532ca8cbc4.js"],"/_error":["static/chunks/pages/_error-1be831200e60c5c0.js"],"/clusters":[s,e,c,t,a,"static/chunks/pages/clusters-a93b93e10b8b074e.js"],"/clusters/[cluster]":[s,e,c,t,b,a,"static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js"],"/clusters/[cluster]/[job]":[s,c,"static/chunks/pages/clusters/[cluster]/[job]-e15db85d0ea1fbe1.js"],"/jobs":[s,e,c,t,b,"static/chunks/pages/jobs-a75029b67aab6a2e.js"],"/jobs/[job]":[s,c,"static/chunks/pages/jobs/[job]-03f279c6741fb48b.js"],sortedPages:["/","/_app","/_error","/clusters","/clusters/[cluster]","/clusters/[cluster]/[job]","/jobs","/jobs/[job]"]}}("static/chunks/678-206dddca808e6d16.js","static/chunks/979-7bf73a4c7cea0f5c.js","static/chunks/312-c3c8845990db8ffc.js","static/chunks/845-0ca6f2c1ba667c3b.js","static/chunks/37-0a572fe0dbb89c4d.js","static/chunks/236-f49500b82ad5392d.js"),self.__BUILD_MANIFEST_CB&&self.__BUILD_MANIFEST_CB();
|
@@ -1 +1 @@
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/c6933bbb2ce7f4dd.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/c6933bbb2ce7f4dd.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-830f59b8404e96b8.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-87d061ee6ed71b28.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-e0e2335212e72357.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js" defer=""></script><script src="/dashboard/_next/static/chunks/678-206dddca808e6d16.js" defer=""></script><script src="/dashboard/_next/static/chunks/979-7bf73a4c7cea0f5c.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/clusters/%5Bcluster%5D/%5Bjob%5D-e15db85d0ea1fbe1.js" defer=""></script><script src="/dashboard/_next/static/
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/c6933bbb2ce7f4dd.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/c6933bbb2ce7f4dd.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-830f59b8404e96b8.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-87d061ee6ed71b28.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-e0e2335212e72357.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js" defer=""></script><script src="/dashboard/_next/static/chunks/678-206dddca808e6d16.js" defer=""></script><script src="/dashboard/_next/static/chunks/979-7bf73a4c7cea0f5c.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/clusters/%5Bcluster%5D/%5Bjob%5D-e15db85d0ea1fbe1.js" defer=""></script><script src="/dashboard/_next/static/tdxxQrPV6NW90a983oHXe/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/tdxxQrPV6NW90a983oHXe/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div>Loading...</div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/clusters/[cluster]/[job]","query":{},"buildId":"tdxxQrPV6NW90a983oHXe","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|