skypilot-nightly 1.0.0.dev20250413__py3-none-any.whl → 1.0.0.dev20250417__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. sky/__init__.py +2 -2
  2. sky/adaptors/kubernetes.py +7 -0
  3. sky/authentication.py +2 -2
  4. sky/backends/backend_utils.py +3 -3
  5. sky/backends/cloud_vm_ray_backend.py +22 -29
  6. sky/check.py +1 -1
  7. sky/cli.py +161 -55
  8. sky/client/cli.py +161 -55
  9. sky/client/sdk.py +5 -5
  10. sky/clouds/aws.py +2 -2
  11. sky/clouds/kubernetes.py +0 -8
  12. sky/clouds/oci.py +1 -1
  13. sky/core.py +17 -11
  14. sky/exceptions.py +5 -0
  15. sky/jobs/constants.py +8 -1
  16. sky/jobs/server/core.py +12 -8
  17. sky/models.py +28 -0
  18. sky/provision/kubernetes/config.py +1 -1
  19. sky/provision/kubernetes/instance.py +16 -14
  20. sky/provision/kubernetes/network_utils.py +1 -1
  21. sky/provision/kubernetes/utils.py +50 -22
  22. sky/resources.py +47 -2
  23. sky/serve/constants.py +6 -0
  24. sky/serve/load_balancing_policies.py +0 -4
  25. sky/serve/serve_state.py +0 -6
  26. sky/serve/server/core.py +5 -2
  27. sky/server/common.py +133 -46
  28. sky/server/constants.py +1 -1
  29. sky/server/requests/serializers/decoders.py +2 -5
  30. sky/server/requests/serializers/encoders.py +2 -5
  31. sky/server/server.py +1 -1
  32. sky/setup_files/dependencies.py +1 -0
  33. sky/sky_logging.py +2 -2
  34. sky/skylet/constants.py +5 -7
  35. sky/skylet/job_lib.py +3 -3
  36. sky/skypilot_config.py +194 -73
  37. sky/templates/kubernetes-ray.yml.j2 +1 -1
  38. sky/utils/cli_utils/status_utils.py +12 -5
  39. sky/utils/config_utils.py +39 -14
  40. sky/utils/controller_utils.py +44 -6
  41. sky/utils/kubernetes/generate_kubeconfig.sh +2 -2
  42. sky/utils/kubernetes/gpu_labeler.py +99 -16
  43. sky/utils/schemas.py +24 -0
  44. {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250417.dist-info}/METADATA +2 -1
  45. {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250417.dist-info}/RECORD +49 -49
  46. {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250417.dist-info}/WHEEL +0 -0
  47. {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250417.dist-info}/entry_points.txt +0 -0
  48. {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250417.dist-info}/licenses/LICENSE +0 -0
  49. {skypilot_nightly-1.0.0.dev20250413.dist-info → skypilot_nightly-1.0.0.dev20250417.dist-info}/top_level.txt +0 -0
sky/client/cli.py CHANGED
@@ -54,6 +54,7 @@ from sky import jobs as managed_jobs
54
54
  from sky import models
55
55
  from sky import serve as serve_lib
56
56
  from sky import sky_logging
57
+ from sky import skypilot_config
57
58
  from sky.adaptors import common as adaptors_common
58
59
  from sky.benchmark import benchmark_state
59
60
  from sky.benchmark import benchmark_utils
@@ -278,6 +279,54 @@ def _merge_env_vars(env_dict: Optional[Dict[str, str]],
278
279
  return list(env_dict.items())
279
280
 
280
281
 
282
+ def config_option(expose_value: bool):
283
+ """A decorator for the --config option.
284
+
285
+ This decorator is used to parse the --config option.
286
+
287
+ Any overrides specified in the command line will be applied to the skypilot
288
+ config before the decorated function is called.
289
+
290
+ If expose_value is True, the decorated function will receive the parsed
291
+ config overrides as 'config_override' parameter.
292
+
293
+ Args:
294
+ expose_value: Whether to expose the value of the option to the decorated
295
+ function.
296
+ """
297
+
298
+ def preprocess_config_options(ctx, param, value):
299
+ del ctx # Unused.
300
+ param.name = 'config_override'
301
+ try:
302
+ if len(value) == 0:
303
+ return None
304
+ elif len(value) > 1:
305
+ raise ValueError('argument specified multiple times. '
306
+ 'To specify multiple configs, use '
307
+ '--config nested.key1=val1,another.key2=val2')
308
+ else:
309
+ # Apply the config overrides to the skypilot config.
310
+ return skypilot_config.apply_cli_config(value[0])
311
+ except ValueError as e:
312
+ raise click.BadParameter(f'{str(e)}') from e
313
+
314
+ def return_option_decorator(func):
315
+ return click.option(
316
+ '--config',
317
+ required=False,
318
+ type=str,
319
+ multiple=True,
320
+ expose_value=expose_value,
321
+ callback=preprocess_config_options,
322
+ help=('Path to a config file or a comma-separated '
323
+ 'list of key-value pairs '
324
+ '(e.g. "nested.key1=val1,another.key2=val2").'),
325
+ )(func)
326
+
327
+ return return_option_decorator
328
+
329
+
281
330
  _COMMON_OPTIONS = [
282
331
  click.option('--async/--no-async',
283
332
  'async_call',
@@ -630,7 +679,8 @@ def _parse_override_params(
630
679
  image_id: Optional[str] = None,
631
680
  disk_size: Optional[int] = None,
632
681
  disk_tier: Optional[str] = None,
633
- ports: Optional[Tuple[str, ...]] = None) -> Dict[str, Any]:
682
+ ports: Optional[Tuple[str, ...]] = None,
683
+ config_override: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
634
684
  """Parses the override parameters into a dictionary."""
635
685
  override_params: Dict[str, Any] = {}
636
686
  if cloud is not None:
@@ -691,6 +741,8 @@ def _parse_override_params(
691
741
  override_params['ports'] = None
692
742
  else:
693
743
  override_params['ports'] = ports
744
+ if config_override:
745
+ override_params['_cluster_config_overrides'] = config_override
694
746
  return override_params
695
747
 
696
748
 
@@ -793,6 +845,7 @@ def _make_task_or_dag_from_entrypoint_with_overrides(
793
845
  field_to_ignore: Optional[List[str]] = None,
794
846
  # job launch specific
795
847
  job_recovery: Optional[str] = None,
848
+ config_override: Optional[Dict[str, Any]] = None,
796
849
  ) -> Union[sky.Task, sky.Dag]:
797
850
  """Creates a task or a dag from an entrypoint with overrides.
798
851
 
@@ -826,7 +879,8 @@ def _make_task_or_dag_from_entrypoint_with_overrides(
826
879
  image_id=image_id,
827
880
  disk_size=disk_size,
828
881
  disk_tier=disk_tier,
829
- ports=ports)
882
+ ports=ports,
883
+ config_override=config_override)
830
884
  if field_to_ignore is not None:
831
885
  _pop_and_ignore_fields_in_override_params(override_params,
832
886
  field_to_ignore)
@@ -1010,6 +1064,7 @@ def cli():
1010
1064
 
1011
1065
 
1012
1066
  @cli.command(cls=_DocumentedCodeCommand)
1067
+ @config_option(expose_value=True)
1013
1068
  @click.argument('entrypoint',
1014
1069
  required=False,
1015
1070
  type=str,
@@ -1139,7 +1194,8 @@ def launch(
1139
1194
  no_setup: bool,
1140
1195
  clone_disk_from: Optional[str],
1141
1196
  fast: bool,
1142
- async_call: bool):
1197
+ async_call: bool,
1198
+ config_override: Optional[Dict[str, Any]] = None):
1143
1199
  """Launch a cluster or task.
1144
1200
 
1145
1201
  If ENTRYPOINT points to a valid YAML file, it is read in as the task
@@ -1181,6 +1237,7 @@ def launch(
1181
1237
  disk_size=disk_size,
1182
1238
  disk_tier=disk_tier,
1183
1239
  ports=ports,
1240
+ config_override=config_override,
1184
1241
  )
1185
1242
  if isinstance(task_or_dag, sky.Dag):
1186
1243
  raise click.UsageError(
@@ -1245,6 +1302,7 @@ def launch(
1245
1302
 
1246
1303
 
1247
1304
  @cli.command(cls=_DocumentedCodeCommand)
1305
+ @config_option(expose_value=True)
1248
1306
  @click.argument('cluster',
1249
1307
  required=False,
1250
1308
  type=str,
@@ -1273,15 +1331,29 @@ def launch(
1273
1331
  _COMMON_OPTIONS)
1274
1332
  @usage_lib.entrypoint
1275
1333
  # pylint: disable=redefined-builtin
1276
- def exec(cluster: Optional[str], cluster_option: Optional[str],
1277
- entrypoint: Tuple[str, ...], detach_run: bool, name: Optional[str],
1278
- cloud: Optional[str], region: Optional[str], zone: Optional[str],
1279
- workdir: Optional[str], gpus: Optional[str], ports: Tuple[str],
1280
- instance_type: Optional[str], num_nodes: Optional[int],
1281
- use_spot: Optional[bool], image_id: Optional[str],
1282
- env_file: Optional[Dict[str, str]], env: List[Tuple[str, str]],
1283
- cpus: Optional[str], memory: Optional[str], disk_size: Optional[int],
1284
- disk_tier: Optional[str], async_call: bool):
1334
+ def exec(cluster: Optional[str],
1335
+ cluster_option: Optional[str],
1336
+ entrypoint: Tuple[str, ...],
1337
+ detach_run: bool,
1338
+ name: Optional[str],
1339
+ cloud: Optional[str],
1340
+ region: Optional[str],
1341
+ zone: Optional[str],
1342
+ workdir: Optional[str],
1343
+ gpus: Optional[str],
1344
+ ports: Tuple[str],
1345
+ instance_type: Optional[str],
1346
+ num_nodes: Optional[int],
1347
+ use_spot: Optional[bool],
1348
+ image_id: Optional[str],
1349
+ env_file: Optional[Dict[str, str]],
1350
+ env: List[Tuple[str, str]],
1351
+ cpus: Optional[str],
1352
+ memory: Optional[str],
1353
+ disk_size: Optional[int],
1354
+ disk_tier: Optional[str],
1355
+ async_call: bool,
1356
+ config_override: Optional[Dict[str, Any]] = None):
1285
1357
  # NOTE(dev): Keep the docstring consistent between the Python API and CLI.
1286
1358
  """Execute a task or command on an existing cluster.
1287
1359
 
@@ -1374,6 +1446,7 @@ def exec(cluster: Optional[str], cluster_option: Optional[str],
1374
1446
  disk_tier=disk_tier,
1375
1447
  ports=ports,
1376
1448
  field_to_ignore=['cpus', 'memory', 'disk_size', 'disk_tier', 'ports'],
1449
+ config_override=config_override,
1377
1450
  )
1378
1451
 
1379
1452
  if isinstance(task_or_dag, sky.Dag):
@@ -1657,6 +1730,7 @@ def _show_endpoint(query_clusters: Optional[List[str]],
1657
1730
 
1658
1731
 
1659
1732
  @cli.command()
1733
+ @config_option(expose_value=False)
1660
1734
  @click.option('--verbose',
1661
1735
  '-v',
1662
1736
  default=False,
@@ -1949,6 +2023,7 @@ def status(verbose: bool, refresh: bool, ip: bool, endpoints: bool,
1949
2023
 
1950
2024
 
1951
2025
  @cli.command()
2026
+ @config_option(expose_value=False)
1952
2027
  @click.option('--all',
1953
2028
  '-a',
1954
2029
  default=False,
@@ -2019,6 +2094,7 @@ def cost_report(all: bool): # pylint: disable=redefined-builtin
2019
2094
 
2020
2095
 
2021
2096
  @cli.command()
2097
+ @config_option(expose_value=False)
2022
2098
  @click.option('--all-users',
2023
2099
  '-u',
2024
2100
  default=False,
@@ -2080,6 +2156,7 @@ def queue(clusters: List[str], skip_finished: bool, all_users: bool):
2080
2156
 
2081
2157
 
2082
2158
  @cli.command()
2159
+ @config_option(expose_value=False)
2083
2160
  @click.option(
2084
2161
  '--sync-down',
2085
2162
  '-s',
@@ -2217,6 +2294,7 @@ def logs(
2217
2294
 
2218
2295
 
2219
2296
  @cli.command()
2297
+ @config_option(expose_value=False)
2220
2298
  @click.argument('cluster',
2221
2299
  required=True,
2222
2300
  type=str,
@@ -2320,6 +2398,7 @@ def cancel(
2320
2398
 
2321
2399
 
2322
2400
  @cli.command(cls=_DocumentedCodeCommand)
2401
+ @config_option(expose_value=False)
2323
2402
  @click.argument('clusters',
2324
2403
  nargs=-1,
2325
2404
  required=False,
@@ -2387,6 +2466,7 @@ def stop(
2387
2466
 
2388
2467
 
2389
2468
  @cli.command(cls=_DocumentedCodeCommand)
2469
+ @config_option(expose_value=False)
2390
2470
  @click.argument('clusters',
2391
2471
  nargs=-1,
2392
2472
  required=False,
@@ -2499,6 +2579,7 @@ def autostop(
2499
2579
 
2500
2580
 
2501
2581
  @cli.command(cls=_DocumentedCodeCommand)
2582
+ @config_option(expose_value=False)
2502
2583
  @click.argument('clusters',
2503
2584
  nargs=-1,
2504
2585
  required=False,
@@ -2744,6 +2825,7 @@ def start(
2744
2825
 
2745
2826
 
2746
2827
  @cli.command(cls=_DocumentedCodeCommand)
2828
+ @config_option(expose_value=False)
2747
2829
  @click.argument('clusters',
2748
2830
  nargs=-1,
2749
2831
  required=False,
@@ -3182,6 +3264,7 @@ def _down_or_stop_clusters(
3182
3264
 
3183
3265
 
3184
3266
  @cli.command(cls=_DocumentedCodeCommand)
3267
+ @config_option(expose_value=False)
3185
3268
  @click.argument('clouds', required=False, type=str, nargs=-1)
3186
3269
  @click.option('--verbose',
3187
3270
  '-v',
@@ -3222,6 +3305,7 @@ def check(clouds: Tuple[str], verbose: bool):
3222
3305
 
3223
3306
 
3224
3307
  @cli.command()
3308
+ @config_option(expose_value=False)
3225
3309
  @click.argument('accelerator_str', required=False)
3226
3310
  @click.option('--all',
3227
3311
  '-a',
@@ -3379,15 +3463,14 @@ def show_gpus(
3379
3463
  ])
3380
3464
  return realtime_gpu_table
3381
3465
 
3382
- # TODO(zhwu): this needs to run on remote server.
3383
- def _get_kubernetes_node_info_table(context: Optional[str]):
3466
+ def _format_kubernetes_node_info(context: Optional[str]):
3384
3467
  node_table = log_utils.create_table(
3385
3468
  ['NODE_NAME', 'GPU_NAME', 'TOTAL_GPUS', 'FREE_GPUS'])
3386
3469
 
3387
- no_permissions_str = '<no permissions>'
3388
- node_info_dict = sdk.stream_and_get(
3470
+ nodes_info = sdk.stream_and_get(
3389
3471
  sdk.kubernetes_node_info(context=context))
3390
- for node_name, node_info in node_info_dict.items():
3472
+ no_permissions_str = '<no permissions>'
3473
+ for node_name, node_info in nodes_info.node_info_dict.items():
3391
3474
  available = node_info.free[
3392
3475
  'accelerators_available'] if node_info.free[
3393
3476
  'accelerators_available'] != -1 else no_permissions_str
@@ -3395,7 +3478,14 @@ def show_gpus(
3395
3478
  node_name, node_info.accelerator_type,
3396
3479
  node_info.total['accelerator_count'], available
3397
3480
  ])
3398
- return node_table
3481
+ k8s_per_node_acc_message = (
3482
+ 'Kubernetes per node accelerator availability ')
3483
+ if nodes_info.hint:
3484
+ k8s_per_node_acc_message += nodes_info.hint
3485
+ return (f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
3486
+ f'{k8s_per_node_acc_message}'
3487
+ f'{colorama.Style.RESET_ALL}\n'
3488
+ f'{node_table.get_string()}')
3399
3489
 
3400
3490
  def _output() -> Generator[str, None, None]:
3401
3491
  gpu_table = log_utils.create_table(
@@ -3443,22 +3533,8 @@ def show_gpus(
3443
3533
  f'Kubernetes GPUs {context_str}'
3444
3534
  f'{colorama.Style.RESET_ALL}\n')
3445
3535
  yield from k8s_realtime_table.get_string()
3446
- k8s_node_table = _get_kubernetes_node_info_table(context)
3447
3536
  yield '\n\n'
3448
- # TODO(Doyoung): Update the message with the multi-host TPU
3449
- # support.
3450
- k8s_per_node_acc_message = (
3451
- 'Kubernetes per node accelerator availability ')
3452
- if kubernetes_utils.multi_host_tpu_exists_in_cluster(
3453
- context):
3454
- k8s_per_node_acc_message += (
3455
- '(Note: Multi-host TPUs are detected and excluded '
3456
- 'from the display as multi-host TPUs are not '
3457
- 'supported.)')
3458
- yield (f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
3459
- f'{k8s_per_node_acc_message}'
3460
- f'{colorama.Style.RESET_ALL}\n')
3461
- yield from k8s_node_table.get_string()
3537
+ yield _format_kubernetes_node_info(context)
3462
3538
  if kubernetes_autoscaling:
3463
3539
  k8s_messages += (
3464
3540
  '\n' + kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE)
@@ -3693,6 +3769,7 @@ def storage():
3693
3769
 
3694
3770
 
3695
3771
  @storage.command('ls', cls=_DocumentedCodeCommand)
3772
+ @config_option(expose_value=False)
3696
3773
  @click.option('--verbose',
3697
3774
  '-v',
3698
3775
  default=False,
@@ -3711,6 +3788,7 @@ def storage_ls(verbose: bool):
3711
3788
 
3712
3789
 
3713
3790
  @storage.command('delete', cls=_DocumentedCodeCommand)
3791
+ @config_option(expose_value=False)
3714
3792
  @click.argument('names',
3715
3793
  required=False,
3716
3794
  type=str,
@@ -3795,6 +3873,7 @@ def jobs():
3795
3873
 
3796
3874
 
3797
3875
  @jobs.command('launch', cls=_DocumentedCodeCommand)
3876
+ @config_option(expose_value=True)
3798
3877
  @click.argument('entrypoint',
3799
3878
  required=True,
3800
3879
  type=str,
@@ -3852,6 +3931,7 @@ def jobs_launch(
3852
3931
  detach_run: bool,
3853
3932
  yes: bool,
3854
3933
  async_call: bool,
3934
+ config_override: Optional[Dict[str, Any]] = None,
3855
3935
  ):
3856
3936
  """Launch a managed job from a YAML or a command.
3857
3937
 
@@ -3892,6 +3972,7 @@ def jobs_launch(
3892
3972
  disk_tier=disk_tier,
3893
3973
  ports=ports,
3894
3974
  job_recovery=job_recovery,
3975
+ config_override=config_override,
3895
3976
  )
3896
3977
 
3897
3978
  if not isinstance(task_or_dag, sky.Dag):
@@ -3929,6 +4010,7 @@ def jobs_launch(
3929
4010
 
3930
4011
 
3931
4012
  @jobs.command('queue', cls=_DocumentedCodeCommand)
4013
+ @config_option(expose_value=False)
3932
4014
  @click.option('--verbose',
3933
4015
  '-v',
3934
4016
  default=False,
@@ -4045,6 +4127,7 @@ def jobs_queue(verbose: bool, refresh: bool, skip_finished: bool,
4045
4127
 
4046
4128
 
4047
4129
  @jobs.command('cancel', cls=_DocumentedCodeCommand)
4130
+ @config_option(expose_value=False)
4048
4131
  @click.option('--name',
4049
4132
  '-n',
4050
4133
  required=False,
@@ -4119,6 +4202,7 @@ def jobs_cancel(name: Optional[str], job_ids: Tuple[int], all: bool, yes: bool,
4119
4202
 
4120
4203
 
4121
4204
  @jobs.command('logs', cls=_DocumentedCodeCommand)
4205
+ @config_option(expose_value=False)
4122
4206
  @click.option('--name',
4123
4207
  '-n',
4124
4208
  required=False,
@@ -4183,6 +4267,7 @@ def jobs_logs(name: Optional[str], job_id: Optional[int], follow: bool,
4183
4267
 
4184
4268
 
4185
4269
  @jobs.command('dashboard', cls=_DocumentedCodeCommand)
4270
+ @config_option(expose_value=False)
4186
4271
  @usage_lib.entrypoint
4187
4272
  def jobs_dashboard():
4188
4273
  """Opens a dashboard for managed jobs."""
@@ -4312,6 +4397,7 @@ def _generate_task_with_service(
4312
4397
 
4313
4398
 
4314
4399
  @serve.command('up', cls=_DocumentedCodeCommand)
4400
+ @config_option(expose_value=False)
4315
4401
  @click.argument('service_yaml',
4316
4402
  required=True,
4317
4403
  type=str,
@@ -4423,6 +4509,7 @@ def serve_up(
4423
4509
  # TODO(MaoZiming): Expose mix replica traffic option to user.
4424
4510
  # Currently, we do not mix traffic from old and new replicas.
4425
4511
  @serve.command('update', cls=_DocumentedCodeCommand)
4512
+ @config_option(expose_value=False)
4426
4513
  @click.argument('service_name', required=True, type=str)
4427
4514
  @click.argument('service_yaml',
4428
4515
  required=True,
@@ -4523,6 +4610,7 @@ def serve_update(service_name: str, service_yaml: Tuple[str, ...],
4523
4610
 
4524
4611
 
4525
4612
  @serve.command('status', cls=_DocumentedCodeCommand)
4613
+ @config_option(expose_value=False)
4526
4614
  @click.option('--verbose',
4527
4615
  '-v',
4528
4616
  default=False,
@@ -4648,6 +4736,7 @@ def serve_status(verbose: bool, endpoint: bool, service_names: List[str]):
4648
4736
 
4649
4737
 
4650
4738
  @serve.command('down', cls=_DocumentedCodeCommand)
4739
+ @config_option(expose_value=False)
4651
4740
  @click.argument('service_names', required=False, type=str, nargs=-1)
4652
4741
  @click.option('--all',
4653
4742
  '-a',
@@ -4761,6 +4850,7 @@ def serve_down(
4761
4850
 
4762
4851
 
4763
4852
  @serve.command('logs', cls=_DocumentedCodeCommand)
4853
+ @config_option(expose_value=False)
4764
4854
  @click.option(
4765
4855
  '--follow/--no-follow',
4766
4856
  is_flag=True,
@@ -4874,6 +4964,7 @@ def _get_candidate_configs(yaml_path: str) -> Optional[List[Dict[str, str]]]:
4874
4964
 
4875
4965
 
4876
4966
  @bench.command('launch', cls=_DocumentedCodeCommand)
4967
+ @config_option(expose_value=True)
4877
4968
  @click.argument('entrypoint',
4878
4969
  required=True,
4879
4970
  type=str,
@@ -4919,27 +5010,28 @@ def _get_candidate_configs(yaml_path: str) -> Optional[List[Dict[str, str]]]:
4919
5010
  help='Skip confirmation prompt.')
4920
5011
  @usage_lib.entrypoint
4921
5012
  def benchmark_launch(
4922
- entrypoint: str,
4923
- benchmark: str,
4924
- name: Optional[str],
4925
- workdir: Optional[str],
4926
- cloud: Optional[str],
4927
- region: Optional[str],
4928
- zone: Optional[str],
4929
- gpus: Optional[str],
4930
- num_nodes: Optional[int],
4931
- use_spot: Optional[bool],
4932
- image_id: Optional[str],
4933
- env_file: Optional[Dict[str, str]],
4934
- env: List[Tuple[str, str]],
4935
- cpus: Optional[str],
4936
- memory: Optional[str],
4937
- disk_size: Optional[int],
4938
- disk_tier: Optional[str],
4939
- ports: Tuple[str],
4940
- idle_minutes_to_autostop: Optional[int],
4941
- yes: bool,
4942
- async_call: bool, # pylint: disable=unused-argument
5013
+ entrypoint: str,
5014
+ benchmark: str,
5015
+ name: Optional[str],
5016
+ workdir: Optional[str],
5017
+ cloud: Optional[str],
5018
+ region: Optional[str],
5019
+ zone: Optional[str],
5020
+ gpus: Optional[str],
5021
+ num_nodes: Optional[int],
5022
+ use_spot: Optional[bool],
5023
+ image_id: Optional[str],
5024
+ env_file: Optional[Dict[str, str]],
5025
+ env: List[Tuple[str, str]],
5026
+ cpus: Optional[str],
5027
+ memory: Optional[str],
5028
+ disk_size: Optional[int],
5029
+ disk_tier: Optional[str],
5030
+ ports: Tuple[str],
5031
+ idle_minutes_to_autostop: Optional[int],
5032
+ yes: bool,
5033
+ async_call: bool, # pylint: disable=unused-argument
5034
+ config_override: Optional[Dict[str, Any]] = None,
4943
5035
  ) -> None:
4944
5036
  """Benchmark a task on different resources.
4945
5037
 
@@ -5048,7 +5140,8 @@ def benchmark_launch(
5048
5140
  image_id=image_id,
5049
5141
  disk_size=disk_size,
5050
5142
  disk_tier=disk_tier,
5051
- ports=ports)
5143
+ ports=ports,
5144
+ config_override=config_override)
5052
5145
  _pop_and_ignore_fields_in_override_params(
5053
5146
  override_params, field_to_ignore=['cpus', 'memory'])
5054
5147
  resources_config.update(override_params)
@@ -5113,6 +5206,7 @@ def benchmark_launch(
5113
5206
 
5114
5207
 
5115
5208
  @bench.command('ls', cls=_DocumentedCodeCommand)
5209
+ @config_option(expose_value=False)
5116
5210
  @usage_lib.entrypoint
5117
5211
  def benchmark_ls() -> None:
5118
5212
  """List the benchmark history."""
@@ -5176,6 +5270,7 @@ def benchmark_ls() -> None:
5176
5270
 
5177
5271
 
5178
5272
  @bench.command('show', cls=_DocumentedCodeCommand)
5273
+ @config_option(expose_value=False)
5179
5274
  @click.argument('benchmark', required=True, type=str)
5180
5275
  # TODO(woosuk): Add --all option to show all the collected information
5181
5276
  # (e.g., setup time, warmup steps, total steps, etc.).
@@ -5301,6 +5396,7 @@ def benchmark_show(benchmark: str) -> None:
5301
5396
 
5302
5397
 
5303
5398
  @bench.command('down', cls=_DocumentedCodeCommand)
5399
+ @config_option(expose_value=False)
5304
5400
  @click.argument('benchmark', required=True, type=str)
5305
5401
  @click.option(
5306
5402
  '--exclude',
@@ -5343,6 +5439,7 @@ def benchmark_down(
5343
5439
 
5344
5440
 
5345
5441
  @bench.command('delete', cls=_DocumentedCodeCommand)
5442
+ @config_option(expose_value=False)
5346
5443
  @click.argument('benchmarks', required=False, type=str, nargs=-1)
5347
5444
  @click.option('--all',
5348
5445
  '-a',
@@ -5477,6 +5574,7 @@ def local():
5477
5574
  help='Password for the ssh-user to execute sudo commands. '
5478
5575
  'Required only if passwordless sudo is not setup.')
5479
5576
  @local.command('up', cls=_DocumentedCodeCommand)
5577
+ @config_option(expose_value=False)
5480
5578
  @_add_click_options(_COMMON_OPTIONS)
5481
5579
  @usage_lib.entrypoint
5482
5580
  def local_up(gpus: bool, ips: str, ssh_user: str, ssh_key_path: str,
@@ -5532,6 +5630,7 @@ def local_up(gpus: bool, ips: str, ssh_user: str, ssh_key_path: str,
5532
5630
 
5533
5631
 
5534
5632
  @local.command('down', cls=_DocumentedCodeCommand)
5633
+ @config_option(expose_value=False)
5535
5634
  @_add_click_options(_COMMON_OPTIONS)
5536
5635
  @usage_lib.entrypoint
5537
5636
  def local_down(async_call: bool):
@@ -5547,6 +5646,7 @@ def api():
5547
5646
 
5548
5647
 
5549
5648
  @api.command('start', cls=_DocumentedCodeCommand)
5649
+ @config_option(expose_value=False)
5550
5650
  @click.option('--deploy',
5551
5651
  type=bool,
5552
5652
  is_flag=True,
@@ -5579,6 +5679,7 @@ def api_start(deploy: bool, host: Optional[str], foreground: bool):
5579
5679
 
5580
5680
 
5581
5681
  @api.command('stop', cls=_DocumentedCodeCommand)
5682
+ @config_option(expose_value=False)
5582
5683
  @usage_lib.entrypoint
5583
5684
  def api_stop():
5584
5685
  """Stops the SkyPilot API server locally."""
@@ -5586,6 +5687,7 @@ def api_stop():
5586
5687
 
5587
5688
 
5588
5689
  @api.command('logs', cls=_DocumentedCodeCommand)
5690
+ @config_option(expose_value=False)
5589
5691
  @click.argument('request_id', required=False, type=str)
5590
5692
  @click.option('--server-logs',
5591
5693
  is_flag=True,
@@ -5625,6 +5727,7 @@ def api_logs(request_id: Optional[str], server_logs: bool,
5625
5727
 
5626
5728
 
5627
5729
  @api.command('cancel', cls=_DocumentedCodeCommand)
5730
+ @config_option(expose_value=False)
5628
5731
  @click.argument('request_ids', required=False, type=str, nargs=-1)
5629
5732
  @click.option('--all',
5630
5733
  '-a',
@@ -5666,6 +5769,7 @@ def api_cancel(request_ids: Optional[List[str]], all: bool, all_users: bool):
5666
5769
 
5667
5770
 
5668
5771
  @api.command('status', cls=_DocumentedCodeCommand)
5772
+ @config_option(expose_value=False)
5669
5773
  @click.argument('request_ids', required=False, type=str, nargs=-1)
5670
5774
  @click.option('--all-status',
5671
5775
  '-a',
@@ -5709,6 +5813,7 @@ def api_status(request_ids: Optional[List[str]], all_status: bool,
5709
5813
 
5710
5814
 
5711
5815
  @api.command('login', cls=_DocumentedCodeCommand)
5816
+ @config_option(expose_value=False)
5712
5817
  @click.option('--endpoint',
5713
5818
  '-e',
5714
5819
  required=False,
@@ -5720,6 +5825,7 @@ def api_login(endpoint: Optional[str]):
5720
5825
 
5721
5826
 
5722
5827
  @api.command('info', cls=_DocumentedCodeCommand)
5828
+ @config_option(expose_value=False)
5723
5829
  @usage_lib.entrypoint
5724
5830
  def api_info():
5725
5831
  """Shows the SkyPilot API server URL."""
sky/client/sdk.py CHANGED
@@ -1408,8 +1408,8 @@ def kubernetes_node_info(
1408
1408
  The request ID of the Kubernetes node info request.
1409
1409
 
1410
1410
  Request Returns:
1411
- Dict[str, KubernetesNodeInfo]: Dictionary containing the node name as
1412
- key and the KubernetesNodeInfo object as value
1411
+ KubernetesNodesInfo: A model that contains the node info map and other
1412
+ information.
1413
1413
  """
1414
1414
  body = payloads.KubernetesNodeInfoRequestBody(context=context)
1415
1415
  response = requests.post(
@@ -1815,12 +1815,12 @@ def api_login(endpoint: Optional[str] = None) -> None:
1815
1815
  config_path = pathlib.Path(
1816
1816
  skypilot_config.get_user_config_path()).expanduser()
1817
1817
  with filelock.FileLock(config_path.with_suffix('.lock')):
1818
- if not skypilot_config.loaded():
1818
+ if not config_path.exists():
1819
1819
  config_path.touch()
1820
1820
  config = {'api_server': {'endpoint': endpoint}}
1821
1821
  else:
1822
- config = skypilot_config.set_nested(('api_server', 'endpoint'),
1823
- endpoint)
1822
+ config = skypilot_config.get_user_config()
1823
+ config.set_nested(('api_server', 'endpoint'), endpoint)
1824
1824
  common_utils.dump_yaml(str(config_path), config)
1825
1825
  click.secho(f'Logged in to SkyPilot API server at {endpoint}',
1826
1826
  fg='green')
sky/clouds/aws.py CHANGED
@@ -472,10 +472,10 @@ class AWS(clouds.Cloud):
472
472
  with ux_utils.print_exception_no_traceback():
473
473
  logger.warning(
474
474
  f'Skip opening ports {resources.ports} for cluster {cluster_name!r}, '
475
- 'as `aws.security_group_name` in `~/.sky/skyconfig.yaml` is specified as '
475
+ 'as `aws.security_group_name` in `~/.sky/config.yaml` is specified as '
476
476
  f' {security_group!r}. Please make sure the specified security group '
477
477
  'has requested ports setup; or, leave out `aws.security_group_name` '
478
- 'in `~/.sky/skyconfig.yaml`.')
478
+ 'in `~/.sky/config.yaml`.')
479
479
 
480
480
  return {
481
481
  'instance_type': r.instance_type,
sky/clouds/kubernetes.py CHANGED
@@ -47,8 +47,6 @@ class Kubernetes(clouds.Cloud):
47
47
  SKY_SSH_KEY_SECRET_NAME = 'sky-ssh-keys'
48
48
  SKY_SSH_JUMP_NAME = 'sky-ssh-jump-pod'
49
49
 
50
- LEGACY_SINGLETON_REGION = 'kubernetes'
51
-
52
50
  # Limit the length of the cluster name to avoid exceeding the limit of 63
53
51
  # characters for Kubernetes resources. We limit to 42 characters (63-21) to
54
52
  # allow additional characters for creating ingress services to expose ports.
@@ -753,12 +751,6 @@ class Kubernetes(clouds.Cloud):
753
751
  instance_type)
754
752
 
755
753
  def validate_region_zone(self, region: Optional[str], zone: Optional[str]):
756
- if region == self.LEGACY_SINGLETON_REGION:
757
- # For backward compatibility, we allow the region to be set to the
758
- # legacy singleton region.
759
- # TODO: Remove this after 0.9.0.
760
- return region, zone
761
-
762
754
  if region == kubernetes.in_cluster_context_name():
763
755
  # If running incluster, we set region to IN_CLUSTER_REGION
764
756
  # since there is no context name available.
sky/clouds/oci.py CHANGED
@@ -9,7 +9,7 @@ History:
9
9
  file path resolution (by os.path.expanduser) when construct the file
10
10
  mounts. This bug will cause the created workder nodes located in different
11
11
  compartment and VCN than the header node if user specifies compartment_id
12
- in the sky config file, because the ~/.sky/skyconfig.yaml is not
12
+ in the sky config file, because the ~/.sky/config.yaml is not
13
13
  sync-ed to the remote machine.
14
14
  The workaround is set the sky config file path using ENV before running
15
15
  the sky launch: export SKYPILOT_CONFIG=/home/ubuntu/.sky/config.yaml