skypilot-nightly 1.0.0.dev20250413__py3-none-any.whl → 1.0.0.dev20250417__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/adaptors/kubernetes.py +7 -0
- sky/authentication.py +2 -2
- sky/backends/backend_utils.py +3 -3
- sky/backends/cloud_vm_ray_backend.py +22 -29
- sky/check.py +1 -1
- sky/cli.py +161 -55
- sky/client/cli.py +161 -55
- sky/client/sdk.py +5 -5
- sky/clouds/aws.py +2 -2
- sky/clouds/kubernetes.py +0 -8
- sky/clouds/oci.py +1 -1
- sky/core.py +17 -11
- sky/exceptions.py +5 -0
- sky/jobs/constants.py +8 -1
- sky/jobs/server/core.py +12 -8
- sky/models.py +28 -0
- sky/provision/kubernetes/config.py +1 -1
- sky/provision/kubernetes/instance.py +16 -14
- sky/provision/kubernetes/network_utils.py +1 -1
- sky/provision/kubernetes/utils.py +50 -22
- sky/resources.py +47 -2
- sky/serve/constants.py +6 -0
- sky/serve/load_balancing_policies.py +0 -4
- sky/serve/serve_state.py +0 -6
- sky/serve/server/core.py +5 -2
- sky/server/common.py +133 -46
- sky/server/constants.py +1 -1
- sky/server/requests/serializers/decoders.py +2 -5
- sky/server/requests/serializers/encoders.py +2 -5
- sky/server/server.py +1 -1
- sky/setup_files/dependencies.py +1 -0
- sky/sky_logging.py +2 -2
- sky/skylet/constants.py +5 -7
- sky/skylet/job_lib.py +3 -3
- sky/skypilot_config.py +194 -73
- sky/templates/kubernetes-ray.yml.j2 +1 -1
- sky/utils/cli_utils/status_utils.py +12 -5
- sky/utils/config_utils.py +39 -14
- sky/utils/controller_utils.py +44 -6
- sky/utils/kubernetes/generate_kubeconfig.sh +2 -2
- sky/utils/kubernetes/gpu_labeler.py +99 -16
- sky/utils/schemas.py +24 -0
- {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250417.dist-info}/METADATA +2 -1
- {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250417.dist-info}/RECORD +49 -49
- {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250417.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250417.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250417.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250417.dist-info}/top_level.txt +0 -0
sky/client/cli.py
CHANGED
@@ -54,6 +54,7 @@ from sky import jobs as managed_jobs
|
|
54
54
|
from sky import models
|
55
55
|
from sky import serve as serve_lib
|
56
56
|
from sky import sky_logging
|
57
|
+
from sky import skypilot_config
|
57
58
|
from sky.adaptors import common as adaptors_common
|
58
59
|
from sky.benchmark import benchmark_state
|
59
60
|
from sky.benchmark import benchmark_utils
|
@@ -278,6 +279,54 @@ def _merge_env_vars(env_dict: Optional[Dict[str, str]],
|
|
278
279
|
return list(env_dict.items())
|
279
280
|
|
280
281
|
|
282
|
+
def config_option(expose_value: bool):
|
283
|
+
"""A decorator for the --config option.
|
284
|
+
|
285
|
+
This decorator is used to parse the --config option.
|
286
|
+
|
287
|
+
Any overrides specified in the command line will be applied to the skypilot
|
288
|
+
config before the decorated function is called.
|
289
|
+
|
290
|
+
If expose_value is True, the decorated function will receive the parsed
|
291
|
+
config overrides as 'config_override' parameter.
|
292
|
+
|
293
|
+
Args:
|
294
|
+
expose_value: Whether to expose the value of the option to the decorated
|
295
|
+
function.
|
296
|
+
"""
|
297
|
+
|
298
|
+
def preprocess_config_options(ctx, param, value):
|
299
|
+
del ctx # Unused.
|
300
|
+
param.name = 'config_override'
|
301
|
+
try:
|
302
|
+
if len(value) == 0:
|
303
|
+
return None
|
304
|
+
elif len(value) > 1:
|
305
|
+
raise ValueError('argument specified multiple times. '
|
306
|
+
'To specify multiple configs, use '
|
307
|
+
'--config nested.key1=val1,another.key2=val2')
|
308
|
+
else:
|
309
|
+
# Apply the config overrides to the skypilot config.
|
310
|
+
return skypilot_config.apply_cli_config(value[0])
|
311
|
+
except ValueError as e:
|
312
|
+
raise click.BadParameter(f'{str(e)}') from e
|
313
|
+
|
314
|
+
def return_option_decorator(func):
|
315
|
+
return click.option(
|
316
|
+
'--config',
|
317
|
+
required=False,
|
318
|
+
type=str,
|
319
|
+
multiple=True,
|
320
|
+
expose_value=expose_value,
|
321
|
+
callback=preprocess_config_options,
|
322
|
+
help=('Path to a config file or a comma-separated '
|
323
|
+
'list of key-value pairs '
|
324
|
+
'(e.g. "nested.key1=val1,another.key2=val2").'),
|
325
|
+
)(func)
|
326
|
+
|
327
|
+
return return_option_decorator
|
328
|
+
|
329
|
+
|
281
330
|
_COMMON_OPTIONS = [
|
282
331
|
click.option('--async/--no-async',
|
283
332
|
'async_call',
|
@@ -630,7 +679,8 @@ def _parse_override_params(
|
|
630
679
|
image_id: Optional[str] = None,
|
631
680
|
disk_size: Optional[int] = None,
|
632
681
|
disk_tier: Optional[str] = None,
|
633
|
-
ports: Optional[Tuple[str, ...]] = None
|
682
|
+
ports: Optional[Tuple[str, ...]] = None,
|
683
|
+
config_override: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
634
684
|
"""Parses the override parameters into a dictionary."""
|
635
685
|
override_params: Dict[str, Any] = {}
|
636
686
|
if cloud is not None:
|
@@ -691,6 +741,8 @@ def _parse_override_params(
|
|
691
741
|
override_params['ports'] = None
|
692
742
|
else:
|
693
743
|
override_params['ports'] = ports
|
744
|
+
if config_override:
|
745
|
+
override_params['_cluster_config_overrides'] = config_override
|
694
746
|
return override_params
|
695
747
|
|
696
748
|
|
@@ -793,6 +845,7 @@ def _make_task_or_dag_from_entrypoint_with_overrides(
|
|
793
845
|
field_to_ignore: Optional[List[str]] = None,
|
794
846
|
# job launch specific
|
795
847
|
job_recovery: Optional[str] = None,
|
848
|
+
config_override: Optional[Dict[str, Any]] = None,
|
796
849
|
) -> Union[sky.Task, sky.Dag]:
|
797
850
|
"""Creates a task or a dag from an entrypoint with overrides.
|
798
851
|
|
@@ -826,7 +879,8 @@ def _make_task_or_dag_from_entrypoint_with_overrides(
|
|
826
879
|
image_id=image_id,
|
827
880
|
disk_size=disk_size,
|
828
881
|
disk_tier=disk_tier,
|
829
|
-
ports=ports
|
882
|
+
ports=ports,
|
883
|
+
config_override=config_override)
|
830
884
|
if field_to_ignore is not None:
|
831
885
|
_pop_and_ignore_fields_in_override_params(override_params,
|
832
886
|
field_to_ignore)
|
@@ -1010,6 +1064,7 @@ def cli():
|
|
1010
1064
|
|
1011
1065
|
|
1012
1066
|
@cli.command(cls=_DocumentedCodeCommand)
|
1067
|
+
@config_option(expose_value=True)
|
1013
1068
|
@click.argument('entrypoint',
|
1014
1069
|
required=False,
|
1015
1070
|
type=str,
|
@@ -1139,7 +1194,8 @@ def launch(
|
|
1139
1194
|
no_setup: bool,
|
1140
1195
|
clone_disk_from: Optional[str],
|
1141
1196
|
fast: bool,
|
1142
|
-
async_call: bool
|
1197
|
+
async_call: bool,
|
1198
|
+
config_override: Optional[Dict[str, Any]] = None):
|
1143
1199
|
"""Launch a cluster or task.
|
1144
1200
|
|
1145
1201
|
If ENTRYPOINT points to a valid YAML file, it is read in as the task
|
@@ -1181,6 +1237,7 @@ def launch(
|
|
1181
1237
|
disk_size=disk_size,
|
1182
1238
|
disk_tier=disk_tier,
|
1183
1239
|
ports=ports,
|
1240
|
+
config_override=config_override,
|
1184
1241
|
)
|
1185
1242
|
if isinstance(task_or_dag, sky.Dag):
|
1186
1243
|
raise click.UsageError(
|
@@ -1245,6 +1302,7 @@ def launch(
|
|
1245
1302
|
|
1246
1303
|
|
1247
1304
|
@cli.command(cls=_DocumentedCodeCommand)
|
1305
|
+
@config_option(expose_value=True)
|
1248
1306
|
@click.argument('cluster',
|
1249
1307
|
required=False,
|
1250
1308
|
type=str,
|
@@ -1273,15 +1331,29 @@ def launch(
|
|
1273
1331
|
_COMMON_OPTIONS)
|
1274
1332
|
@usage_lib.entrypoint
|
1275
1333
|
# pylint: disable=redefined-builtin
|
1276
|
-
def exec(cluster: Optional[str],
|
1277
|
-
|
1278
|
-
|
1279
|
-
|
1280
|
-
|
1281
|
-
|
1282
|
-
|
1283
|
-
|
1284
|
-
|
1334
|
+
def exec(cluster: Optional[str],
|
1335
|
+
cluster_option: Optional[str],
|
1336
|
+
entrypoint: Tuple[str, ...],
|
1337
|
+
detach_run: bool,
|
1338
|
+
name: Optional[str],
|
1339
|
+
cloud: Optional[str],
|
1340
|
+
region: Optional[str],
|
1341
|
+
zone: Optional[str],
|
1342
|
+
workdir: Optional[str],
|
1343
|
+
gpus: Optional[str],
|
1344
|
+
ports: Tuple[str],
|
1345
|
+
instance_type: Optional[str],
|
1346
|
+
num_nodes: Optional[int],
|
1347
|
+
use_spot: Optional[bool],
|
1348
|
+
image_id: Optional[str],
|
1349
|
+
env_file: Optional[Dict[str, str]],
|
1350
|
+
env: List[Tuple[str, str]],
|
1351
|
+
cpus: Optional[str],
|
1352
|
+
memory: Optional[str],
|
1353
|
+
disk_size: Optional[int],
|
1354
|
+
disk_tier: Optional[str],
|
1355
|
+
async_call: bool,
|
1356
|
+
config_override: Optional[Dict[str, Any]] = None):
|
1285
1357
|
# NOTE(dev): Keep the docstring consistent between the Python API and CLI.
|
1286
1358
|
"""Execute a task or command on an existing cluster.
|
1287
1359
|
|
@@ -1374,6 +1446,7 @@ def exec(cluster: Optional[str], cluster_option: Optional[str],
|
|
1374
1446
|
disk_tier=disk_tier,
|
1375
1447
|
ports=ports,
|
1376
1448
|
field_to_ignore=['cpus', 'memory', 'disk_size', 'disk_tier', 'ports'],
|
1449
|
+
config_override=config_override,
|
1377
1450
|
)
|
1378
1451
|
|
1379
1452
|
if isinstance(task_or_dag, sky.Dag):
|
@@ -1657,6 +1730,7 @@ def _show_endpoint(query_clusters: Optional[List[str]],
|
|
1657
1730
|
|
1658
1731
|
|
1659
1732
|
@cli.command()
|
1733
|
+
@config_option(expose_value=False)
|
1660
1734
|
@click.option('--verbose',
|
1661
1735
|
'-v',
|
1662
1736
|
default=False,
|
@@ -1949,6 +2023,7 @@ def status(verbose: bool, refresh: bool, ip: bool, endpoints: bool,
|
|
1949
2023
|
|
1950
2024
|
|
1951
2025
|
@cli.command()
|
2026
|
+
@config_option(expose_value=False)
|
1952
2027
|
@click.option('--all',
|
1953
2028
|
'-a',
|
1954
2029
|
default=False,
|
@@ -2019,6 +2094,7 @@ def cost_report(all: bool): # pylint: disable=redefined-builtin
|
|
2019
2094
|
|
2020
2095
|
|
2021
2096
|
@cli.command()
|
2097
|
+
@config_option(expose_value=False)
|
2022
2098
|
@click.option('--all-users',
|
2023
2099
|
'-u',
|
2024
2100
|
default=False,
|
@@ -2080,6 +2156,7 @@ def queue(clusters: List[str], skip_finished: bool, all_users: bool):
|
|
2080
2156
|
|
2081
2157
|
|
2082
2158
|
@cli.command()
|
2159
|
+
@config_option(expose_value=False)
|
2083
2160
|
@click.option(
|
2084
2161
|
'--sync-down',
|
2085
2162
|
'-s',
|
@@ -2217,6 +2294,7 @@ def logs(
|
|
2217
2294
|
|
2218
2295
|
|
2219
2296
|
@cli.command()
|
2297
|
+
@config_option(expose_value=False)
|
2220
2298
|
@click.argument('cluster',
|
2221
2299
|
required=True,
|
2222
2300
|
type=str,
|
@@ -2320,6 +2398,7 @@ def cancel(
|
|
2320
2398
|
|
2321
2399
|
|
2322
2400
|
@cli.command(cls=_DocumentedCodeCommand)
|
2401
|
+
@config_option(expose_value=False)
|
2323
2402
|
@click.argument('clusters',
|
2324
2403
|
nargs=-1,
|
2325
2404
|
required=False,
|
@@ -2387,6 +2466,7 @@ def stop(
|
|
2387
2466
|
|
2388
2467
|
|
2389
2468
|
@cli.command(cls=_DocumentedCodeCommand)
|
2469
|
+
@config_option(expose_value=False)
|
2390
2470
|
@click.argument('clusters',
|
2391
2471
|
nargs=-1,
|
2392
2472
|
required=False,
|
@@ -2499,6 +2579,7 @@ def autostop(
|
|
2499
2579
|
|
2500
2580
|
|
2501
2581
|
@cli.command(cls=_DocumentedCodeCommand)
|
2582
|
+
@config_option(expose_value=False)
|
2502
2583
|
@click.argument('clusters',
|
2503
2584
|
nargs=-1,
|
2504
2585
|
required=False,
|
@@ -2744,6 +2825,7 @@ def start(
|
|
2744
2825
|
|
2745
2826
|
|
2746
2827
|
@cli.command(cls=_DocumentedCodeCommand)
|
2828
|
+
@config_option(expose_value=False)
|
2747
2829
|
@click.argument('clusters',
|
2748
2830
|
nargs=-1,
|
2749
2831
|
required=False,
|
@@ -3182,6 +3264,7 @@ def _down_or_stop_clusters(
|
|
3182
3264
|
|
3183
3265
|
|
3184
3266
|
@cli.command(cls=_DocumentedCodeCommand)
|
3267
|
+
@config_option(expose_value=False)
|
3185
3268
|
@click.argument('clouds', required=False, type=str, nargs=-1)
|
3186
3269
|
@click.option('--verbose',
|
3187
3270
|
'-v',
|
@@ -3222,6 +3305,7 @@ def check(clouds: Tuple[str], verbose: bool):
|
|
3222
3305
|
|
3223
3306
|
|
3224
3307
|
@cli.command()
|
3308
|
+
@config_option(expose_value=False)
|
3225
3309
|
@click.argument('accelerator_str', required=False)
|
3226
3310
|
@click.option('--all',
|
3227
3311
|
'-a',
|
@@ -3379,15 +3463,14 @@ def show_gpus(
|
|
3379
3463
|
])
|
3380
3464
|
return realtime_gpu_table
|
3381
3465
|
|
3382
|
-
|
3383
|
-
def _get_kubernetes_node_info_table(context: Optional[str]):
|
3466
|
+
def _format_kubernetes_node_info(context: Optional[str]):
|
3384
3467
|
node_table = log_utils.create_table(
|
3385
3468
|
['NODE_NAME', 'GPU_NAME', 'TOTAL_GPUS', 'FREE_GPUS'])
|
3386
3469
|
|
3387
|
-
|
3388
|
-
node_info_dict = sdk.stream_and_get(
|
3470
|
+
nodes_info = sdk.stream_and_get(
|
3389
3471
|
sdk.kubernetes_node_info(context=context))
|
3390
|
-
|
3472
|
+
no_permissions_str = '<no permissions>'
|
3473
|
+
for node_name, node_info in nodes_info.node_info_dict.items():
|
3391
3474
|
available = node_info.free[
|
3392
3475
|
'accelerators_available'] if node_info.free[
|
3393
3476
|
'accelerators_available'] != -1 else no_permissions_str
|
@@ -3395,7 +3478,14 @@ def show_gpus(
|
|
3395
3478
|
node_name, node_info.accelerator_type,
|
3396
3479
|
node_info.total['accelerator_count'], available
|
3397
3480
|
])
|
3398
|
-
|
3481
|
+
k8s_per_node_acc_message = (
|
3482
|
+
'Kubernetes per node accelerator availability ')
|
3483
|
+
if nodes_info.hint:
|
3484
|
+
k8s_per_node_acc_message += nodes_info.hint
|
3485
|
+
return (f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
|
3486
|
+
f'{k8s_per_node_acc_message}'
|
3487
|
+
f'{colorama.Style.RESET_ALL}\n'
|
3488
|
+
f'{node_table.get_string()}')
|
3399
3489
|
|
3400
3490
|
def _output() -> Generator[str, None, None]:
|
3401
3491
|
gpu_table = log_utils.create_table(
|
@@ -3443,22 +3533,8 @@ def show_gpus(
|
|
3443
3533
|
f'Kubernetes GPUs {context_str}'
|
3444
3534
|
f'{colorama.Style.RESET_ALL}\n')
|
3445
3535
|
yield from k8s_realtime_table.get_string()
|
3446
|
-
k8s_node_table = _get_kubernetes_node_info_table(context)
|
3447
3536
|
yield '\n\n'
|
3448
|
-
|
3449
|
-
# support.
|
3450
|
-
k8s_per_node_acc_message = (
|
3451
|
-
'Kubernetes per node accelerator availability ')
|
3452
|
-
if kubernetes_utils.multi_host_tpu_exists_in_cluster(
|
3453
|
-
context):
|
3454
|
-
k8s_per_node_acc_message += (
|
3455
|
-
'(Note: Multi-host TPUs are detected and excluded '
|
3456
|
-
'from the display as multi-host TPUs are not '
|
3457
|
-
'supported.)')
|
3458
|
-
yield (f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
|
3459
|
-
f'{k8s_per_node_acc_message}'
|
3460
|
-
f'{colorama.Style.RESET_ALL}\n')
|
3461
|
-
yield from k8s_node_table.get_string()
|
3537
|
+
yield _format_kubernetes_node_info(context)
|
3462
3538
|
if kubernetes_autoscaling:
|
3463
3539
|
k8s_messages += (
|
3464
3540
|
'\n' + kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE)
|
@@ -3693,6 +3769,7 @@ def storage():
|
|
3693
3769
|
|
3694
3770
|
|
3695
3771
|
@storage.command('ls', cls=_DocumentedCodeCommand)
|
3772
|
+
@config_option(expose_value=False)
|
3696
3773
|
@click.option('--verbose',
|
3697
3774
|
'-v',
|
3698
3775
|
default=False,
|
@@ -3711,6 +3788,7 @@ def storage_ls(verbose: bool):
|
|
3711
3788
|
|
3712
3789
|
|
3713
3790
|
@storage.command('delete', cls=_DocumentedCodeCommand)
|
3791
|
+
@config_option(expose_value=False)
|
3714
3792
|
@click.argument('names',
|
3715
3793
|
required=False,
|
3716
3794
|
type=str,
|
@@ -3795,6 +3873,7 @@ def jobs():
|
|
3795
3873
|
|
3796
3874
|
|
3797
3875
|
@jobs.command('launch', cls=_DocumentedCodeCommand)
|
3876
|
+
@config_option(expose_value=True)
|
3798
3877
|
@click.argument('entrypoint',
|
3799
3878
|
required=True,
|
3800
3879
|
type=str,
|
@@ -3852,6 +3931,7 @@ def jobs_launch(
|
|
3852
3931
|
detach_run: bool,
|
3853
3932
|
yes: bool,
|
3854
3933
|
async_call: bool,
|
3934
|
+
config_override: Optional[Dict[str, Any]] = None,
|
3855
3935
|
):
|
3856
3936
|
"""Launch a managed job from a YAML or a command.
|
3857
3937
|
|
@@ -3892,6 +3972,7 @@ def jobs_launch(
|
|
3892
3972
|
disk_tier=disk_tier,
|
3893
3973
|
ports=ports,
|
3894
3974
|
job_recovery=job_recovery,
|
3975
|
+
config_override=config_override,
|
3895
3976
|
)
|
3896
3977
|
|
3897
3978
|
if not isinstance(task_or_dag, sky.Dag):
|
@@ -3929,6 +4010,7 @@ def jobs_launch(
|
|
3929
4010
|
|
3930
4011
|
|
3931
4012
|
@jobs.command('queue', cls=_DocumentedCodeCommand)
|
4013
|
+
@config_option(expose_value=False)
|
3932
4014
|
@click.option('--verbose',
|
3933
4015
|
'-v',
|
3934
4016
|
default=False,
|
@@ -4045,6 +4127,7 @@ def jobs_queue(verbose: bool, refresh: bool, skip_finished: bool,
|
|
4045
4127
|
|
4046
4128
|
|
4047
4129
|
@jobs.command('cancel', cls=_DocumentedCodeCommand)
|
4130
|
+
@config_option(expose_value=False)
|
4048
4131
|
@click.option('--name',
|
4049
4132
|
'-n',
|
4050
4133
|
required=False,
|
@@ -4119,6 +4202,7 @@ def jobs_cancel(name: Optional[str], job_ids: Tuple[int], all: bool, yes: bool,
|
|
4119
4202
|
|
4120
4203
|
|
4121
4204
|
@jobs.command('logs', cls=_DocumentedCodeCommand)
|
4205
|
+
@config_option(expose_value=False)
|
4122
4206
|
@click.option('--name',
|
4123
4207
|
'-n',
|
4124
4208
|
required=False,
|
@@ -4183,6 +4267,7 @@ def jobs_logs(name: Optional[str], job_id: Optional[int], follow: bool,
|
|
4183
4267
|
|
4184
4268
|
|
4185
4269
|
@jobs.command('dashboard', cls=_DocumentedCodeCommand)
|
4270
|
+
@config_option(expose_value=False)
|
4186
4271
|
@usage_lib.entrypoint
|
4187
4272
|
def jobs_dashboard():
|
4188
4273
|
"""Opens a dashboard for managed jobs."""
|
@@ -4312,6 +4397,7 @@ def _generate_task_with_service(
|
|
4312
4397
|
|
4313
4398
|
|
4314
4399
|
@serve.command('up', cls=_DocumentedCodeCommand)
|
4400
|
+
@config_option(expose_value=False)
|
4315
4401
|
@click.argument('service_yaml',
|
4316
4402
|
required=True,
|
4317
4403
|
type=str,
|
@@ -4423,6 +4509,7 @@ def serve_up(
|
|
4423
4509
|
# TODO(MaoZiming): Expose mix replica traffic option to user.
|
4424
4510
|
# Currently, we do not mix traffic from old and new replicas.
|
4425
4511
|
@serve.command('update', cls=_DocumentedCodeCommand)
|
4512
|
+
@config_option(expose_value=False)
|
4426
4513
|
@click.argument('service_name', required=True, type=str)
|
4427
4514
|
@click.argument('service_yaml',
|
4428
4515
|
required=True,
|
@@ -4523,6 +4610,7 @@ def serve_update(service_name: str, service_yaml: Tuple[str, ...],
|
|
4523
4610
|
|
4524
4611
|
|
4525
4612
|
@serve.command('status', cls=_DocumentedCodeCommand)
|
4613
|
+
@config_option(expose_value=False)
|
4526
4614
|
@click.option('--verbose',
|
4527
4615
|
'-v',
|
4528
4616
|
default=False,
|
@@ -4648,6 +4736,7 @@ def serve_status(verbose: bool, endpoint: bool, service_names: List[str]):
|
|
4648
4736
|
|
4649
4737
|
|
4650
4738
|
@serve.command('down', cls=_DocumentedCodeCommand)
|
4739
|
+
@config_option(expose_value=False)
|
4651
4740
|
@click.argument('service_names', required=False, type=str, nargs=-1)
|
4652
4741
|
@click.option('--all',
|
4653
4742
|
'-a',
|
@@ -4761,6 +4850,7 @@ def serve_down(
|
|
4761
4850
|
|
4762
4851
|
|
4763
4852
|
@serve.command('logs', cls=_DocumentedCodeCommand)
|
4853
|
+
@config_option(expose_value=False)
|
4764
4854
|
@click.option(
|
4765
4855
|
'--follow/--no-follow',
|
4766
4856
|
is_flag=True,
|
@@ -4874,6 +4964,7 @@ def _get_candidate_configs(yaml_path: str) -> Optional[List[Dict[str, str]]]:
|
|
4874
4964
|
|
4875
4965
|
|
4876
4966
|
@bench.command('launch', cls=_DocumentedCodeCommand)
|
4967
|
+
@config_option(expose_value=True)
|
4877
4968
|
@click.argument('entrypoint',
|
4878
4969
|
required=True,
|
4879
4970
|
type=str,
|
@@ -4919,27 +5010,28 @@ def _get_candidate_configs(yaml_path: str) -> Optional[List[Dict[str, str]]]:
|
|
4919
5010
|
help='Skip confirmation prompt.')
|
4920
5011
|
@usage_lib.entrypoint
|
4921
5012
|
def benchmark_launch(
|
4922
|
-
|
4923
|
-
|
4924
|
-
|
4925
|
-
|
4926
|
-
|
4927
|
-
|
4928
|
-
|
4929
|
-
|
4930
|
-
|
4931
|
-
|
4932
|
-
|
4933
|
-
|
4934
|
-
|
4935
|
-
|
4936
|
-
|
4937
|
-
|
4938
|
-
|
4939
|
-
|
4940
|
-
|
4941
|
-
|
4942
|
-
|
5013
|
+
entrypoint: str,
|
5014
|
+
benchmark: str,
|
5015
|
+
name: Optional[str],
|
5016
|
+
workdir: Optional[str],
|
5017
|
+
cloud: Optional[str],
|
5018
|
+
region: Optional[str],
|
5019
|
+
zone: Optional[str],
|
5020
|
+
gpus: Optional[str],
|
5021
|
+
num_nodes: Optional[int],
|
5022
|
+
use_spot: Optional[bool],
|
5023
|
+
image_id: Optional[str],
|
5024
|
+
env_file: Optional[Dict[str, str]],
|
5025
|
+
env: List[Tuple[str, str]],
|
5026
|
+
cpus: Optional[str],
|
5027
|
+
memory: Optional[str],
|
5028
|
+
disk_size: Optional[int],
|
5029
|
+
disk_tier: Optional[str],
|
5030
|
+
ports: Tuple[str],
|
5031
|
+
idle_minutes_to_autostop: Optional[int],
|
5032
|
+
yes: bool,
|
5033
|
+
async_call: bool, # pylint: disable=unused-argument
|
5034
|
+
config_override: Optional[Dict[str, Any]] = None,
|
4943
5035
|
) -> None:
|
4944
5036
|
"""Benchmark a task on different resources.
|
4945
5037
|
|
@@ -5048,7 +5140,8 @@ def benchmark_launch(
|
|
5048
5140
|
image_id=image_id,
|
5049
5141
|
disk_size=disk_size,
|
5050
5142
|
disk_tier=disk_tier,
|
5051
|
-
ports=ports
|
5143
|
+
ports=ports,
|
5144
|
+
config_override=config_override)
|
5052
5145
|
_pop_and_ignore_fields_in_override_params(
|
5053
5146
|
override_params, field_to_ignore=['cpus', 'memory'])
|
5054
5147
|
resources_config.update(override_params)
|
@@ -5113,6 +5206,7 @@ def benchmark_launch(
|
|
5113
5206
|
|
5114
5207
|
|
5115
5208
|
@bench.command('ls', cls=_DocumentedCodeCommand)
|
5209
|
+
@config_option(expose_value=False)
|
5116
5210
|
@usage_lib.entrypoint
|
5117
5211
|
def benchmark_ls() -> None:
|
5118
5212
|
"""List the benchmark history."""
|
@@ -5176,6 +5270,7 @@ def benchmark_ls() -> None:
|
|
5176
5270
|
|
5177
5271
|
|
5178
5272
|
@bench.command('show', cls=_DocumentedCodeCommand)
|
5273
|
+
@config_option(expose_value=False)
|
5179
5274
|
@click.argument('benchmark', required=True, type=str)
|
5180
5275
|
# TODO(woosuk): Add --all option to show all the collected information
|
5181
5276
|
# (e.g., setup time, warmup steps, total steps, etc.).
|
@@ -5301,6 +5396,7 @@ def benchmark_show(benchmark: str) -> None:
|
|
5301
5396
|
|
5302
5397
|
|
5303
5398
|
@bench.command('down', cls=_DocumentedCodeCommand)
|
5399
|
+
@config_option(expose_value=False)
|
5304
5400
|
@click.argument('benchmark', required=True, type=str)
|
5305
5401
|
@click.option(
|
5306
5402
|
'--exclude',
|
@@ -5343,6 +5439,7 @@ def benchmark_down(
|
|
5343
5439
|
|
5344
5440
|
|
5345
5441
|
@bench.command('delete', cls=_DocumentedCodeCommand)
|
5442
|
+
@config_option(expose_value=False)
|
5346
5443
|
@click.argument('benchmarks', required=False, type=str, nargs=-1)
|
5347
5444
|
@click.option('--all',
|
5348
5445
|
'-a',
|
@@ -5477,6 +5574,7 @@ def local():
|
|
5477
5574
|
help='Password for the ssh-user to execute sudo commands. '
|
5478
5575
|
'Required only if passwordless sudo is not setup.')
|
5479
5576
|
@local.command('up', cls=_DocumentedCodeCommand)
|
5577
|
+
@config_option(expose_value=False)
|
5480
5578
|
@_add_click_options(_COMMON_OPTIONS)
|
5481
5579
|
@usage_lib.entrypoint
|
5482
5580
|
def local_up(gpus: bool, ips: str, ssh_user: str, ssh_key_path: str,
|
@@ -5532,6 +5630,7 @@ def local_up(gpus: bool, ips: str, ssh_user: str, ssh_key_path: str,
|
|
5532
5630
|
|
5533
5631
|
|
5534
5632
|
@local.command('down', cls=_DocumentedCodeCommand)
|
5633
|
+
@config_option(expose_value=False)
|
5535
5634
|
@_add_click_options(_COMMON_OPTIONS)
|
5536
5635
|
@usage_lib.entrypoint
|
5537
5636
|
def local_down(async_call: bool):
|
@@ -5547,6 +5646,7 @@ def api():
|
|
5547
5646
|
|
5548
5647
|
|
5549
5648
|
@api.command('start', cls=_DocumentedCodeCommand)
|
5649
|
+
@config_option(expose_value=False)
|
5550
5650
|
@click.option('--deploy',
|
5551
5651
|
type=bool,
|
5552
5652
|
is_flag=True,
|
@@ -5579,6 +5679,7 @@ def api_start(deploy: bool, host: Optional[str], foreground: bool):
|
|
5579
5679
|
|
5580
5680
|
|
5581
5681
|
@api.command('stop', cls=_DocumentedCodeCommand)
|
5682
|
+
@config_option(expose_value=False)
|
5582
5683
|
@usage_lib.entrypoint
|
5583
5684
|
def api_stop():
|
5584
5685
|
"""Stops the SkyPilot API server locally."""
|
@@ -5586,6 +5687,7 @@ def api_stop():
|
|
5586
5687
|
|
5587
5688
|
|
5588
5689
|
@api.command('logs', cls=_DocumentedCodeCommand)
|
5690
|
+
@config_option(expose_value=False)
|
5589
5691
|
@click.argument('request_id', required=False, type=str)
|
5590
5692
|
@click.option('--server-logs',
|
5591
5693
|
is_flag=True,
|
@@ -5625,6 +5727,7 @@ def api_logs(request_id: Optional[str], server_logs: bool,
|
|
5625
5727
|
|
5626
5728
|
|
5627
5729
|
@api.command('cancel', cls=_DocumentedCodeCommand)
|
5730
|
+
@config_option(expose_value=False)
|
5628
5731
|
@click.argument('request_ids', required=False, type=str, nargs=-1)
|
5629
5732
|
@click.option('--all',
|
5630
5733
|
'-a',
|
@@ -5666,6 +5769,7 @@ def api_cancel(request_ids: Optional[List[str]], all: bool, all_users: bool):
|
|
5666
5769
|
|
5667
5770
|
|
5668
5771
|
@api.command('status', cls=_DocumentedCodeCommand)
|
5772
|
+
@config_option(expose_value=False)
|
5669
5773
|
@click.argument('request_ids', required=False, type=str, nargs=-1)
|
5670
5774
|
@click.option('--all-status',
|
5671
5775
|
'-a',
|
@@ -5709,6 +5813,7 @@ def api_status(request_ids: Optional[List[str]], all_status: bool,
|
|
5709
5813
|
|
5710
5814
|
|
5711
5815
|
@api.command('login', cls=_DocumentedCodeCommand)
|
5816
|
+
@config_option(expose_value=False)
|
5712
5817
|
@click.option('--endpoint',
|
5713
5818
|
'-e',
|
5714
5819
|
required=False,
|
@@ -5720,6 +5825,7 @@ def api_login(endpoint: Optional[str]):
|
|
5720
5825
|
|
5721
5826
|
|
5722
5827
|
@api.command('info', cls=_DocumentedCodeCommand)
|
5828
|
+
@config_option(expose_value=False)
|
5723
5829
|
@usage_lib.entrypoint
|
5724
5830
|
def api_info():
|
5725
5831
|
"""Shows the SkyPilot API server URL."""
|
sky/client/sdk.py
CHANGED
@@ -1408,8 +1408,8 @@ def kubernetes_node_info(
|
|
1408
1408
|
The request ID of the Kubernetes node info request.
|
1409
1409
|
|
1410
1410
|
Request Returns:
|
1411
|
-
|
1412
|
-
|
1411
|
+
KubernetesNodesInfo: A model that contains the node info map and other
|
1412
|
+
information.
|
1413
1413
|
"""
|
1414
1414
|
body = payloads.KubernetesNodeInfoRequestBody(context=context)
|
1415
1415
|
response = requests.post(
|
@@ -1815,12 +1815,12 @@ def api_login(endpoint: Optional[str] = None) -> None:
|
|
1815
1815
|
config_path = pathlib.Path(
|
1816
1816
|
skypilot_config.get_user_config_path()).expanduser()
|
1817
1817
|
with filelock.FileLock(config_path.with_suffix('.lock')):
|
1818
|
-
if not
|
1818
|
+
if not config_path.exists():
|
1819
1819
|
config_path.touch()
|
1820
1820
|
config = {'api_server': {'endpoint': endpoint}}
|
1821
1821
|
else:
|
1822
|
-
config = skypilot_config.
|
1823
|
-
|
1822
|
+
config = skypilot_config.get_user_config()
|
1823
|
+
config.set_nested(('api_server', 'endpoint'), endpoint)
|
1824
1824
|
common_utils.dump_yaml(str(config_path), config)
|
1825
1825
|
click.secho(f'Logged in to SkyPilot API server at {endpoint}',
|
1826
1826
|
fg='green')
|
sky/clouds/aws.py
CHANGED
@@ -472,10 +472,10 @@ class AWS(clouds.Cloud):
|
|
472
472
|
with ux_utils.print_exception_no_traceback():
|
473
473
|
logger.warning(
|
474
474
|
f'Skip opening ports {resources.ports} for cluster {cluster_name!r}, '
|
475
|
-
'as `aws.security_group_name` in `~/.sky/
|
475
|
+
'as `aws.security_group_name` in `~/.sky/config.yaml` is specified as '
|
476
476
|
f' {security_group!r}. Please make sure the specified security group '
|
477
477
|
'has requested ports setup; or, leave out `aws.security_group_name` '
|
478
|
-
'in `~/.sky/
|
478
|
+
'in `~/.sky/config.yaml`.')
|
479
479
|
|
480
480
|
return {
|
481
481
|
'instance_type': r.instance_type,
|
sky/clouds/kubernetes.py
CHANGED
@@ -47,8 +47,6 @@ class Kubernetes(clouds.Cloud):
|
|
47
47
|
SKY_SSH_KEY_SECRET_NAME = 'sky-ssh-keys'
|
48
48
|
SKY_SSH_JUMP_NAME = 'sky-ssh-jump-pod'
|
49
49
|
|
50
|
-
LEGACY_SINGLETON_REGION = 'kubernetes'
|
51
|
-
|
52
50
|
# Limit the length of the cluster name to avoid exceeding the limit of 63
|
53
51
|
# characters for Kubernetes resources. We limit to 42 characters (63-21) to
|
54
52
|
# allow additional characters for creating ingress services to expose ports.
|
@@ -753,12 +751,6 @@ class Kubernetes(clouds.Cloud):
|
|
753
751
|
instance_type)
|
754
752
|
|
755
753
|
def validate_region_zone(self, region: Optional[str], zone: Optional[str]):
|
756
|
-
if region == self.LEGACY_SINGLETON_REGION:
|
757
|
-
# For backward compatibility, we allow the region to be set to the
|
758
|
-
# legacy singleton region.
|
759
|
-
# TODO: Remove this after 0.9.0.
|
760
|
-
return region, zone
|
761
|
-
|
762
754
|
if region == kubernetes.in_cluster_context_name():
|
763
755
|
# If running incluster, we set region to IN_CLUSTER_REGION
|
764
756
|
# since there is no context name available.
|
sky/clouds/oci.py
CHANGED
@@ -9,7 +9,7 @@ History:
|
|
9
9
|
file path resolution (by os.path.expanduser) when construct the file
|
10
10
|
mounts. This bug will cause the created workder nodes located in different
|
11
11
|
compartment and VCN than the header node if user specifies compartment_id
|
12
|
-
in the sky config file, because the ~/.sky/
|
12
|
+
in the sky config file, because the ~/.sky/config.yaml is not
|
13
13
|
sync-ed to the remote machine.
|
14
14
|
The workaround is set the sky config file path using ENV before running
|
15
15
|
the sky launch: export SKYPILOT_CONFIG=/home/ubuntu/.sky/config.yaml
|