skypilot-nightly 1.0.0.dev20250520__py3-none-any.whl → 1.0.0.dev20250521__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/backends/backend_utils.py +4 -1
- sky/backends/cloud_vm_ray_backend.py +56 -37
- sky/check.py +3 -3
- sky/cli.py +89 -16
- sky/client/cli.py +89 -16
- sky/client/sdk.py +20 -3
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/chunks/236-1a3a9440417720eb.js +6 -0
- sky/dashboard/out/_next/static/chunks/37-d584022b0da4ac3b.js +6 -0
- sky/dashboard/out/_next/static/chunks/393-e1eaa440481337ec.js +1 -0
- sky/dashboard/out/_next/static/chunks/480-f28cd152a98997de.js +1 -0
- sky/dashboard/out/_next/static/chunks/{678-206dddca808e6d16.js → 582-683f4f27b81996dc.js} +2 -2
- sky/dashboard/out/_next/static/chunks/pages/_app-8cfab319f9fb3ae8.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-33bc2bec322249b1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-e2fc2dd1955e6c36.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-3a748bd76e5c2984.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-9180cd91cee64b96.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-70756c2dad850a7e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-ecd804b9272f4a7c.js +1 -0
- sky/dashboard/out/_next/static/css/7e7ce4ff31d3977b.css +3 -0
- sky/dashboard/out/_next/static/hvWzC5E6Q4CcKzXcWbgig/_buildManifest.js +1 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra.html +1 -0
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/execution.py +1 -1
- sky/jobs/server/core.py +1 -1
- sky/jobs/utils.py +38 -7
- sky/optimizer.py +36 -29
- sky/provision/provisioner.py +16 -7
- sky/resources.py +60 -15
- sky/serve/serve_utils.py +5 -13
- sky/server/common.py +14 -5
- sky/server/requests/payloads.py +3 -3
- sky/utils/cli_utils/status_utils.py +95 -56
- sky/utils/common_utils.py +35 -2
- sky/utils/infra_utils.py +175 -0
- sky/utils/resources_utils.py +41 -21
- sky/utils/schemas.py +65 -5
- {skypilot_nightly-1.0.0.dev20250520.dist-info → skypilot_nightly-1.0.0.dev20250521.dist-info}/METADATA +1 -1
- {skypilot_nightly-1.0.0.dev20250520.dist-info → skypilot_nightly-1.0.0.dev20250521.dist-info}/RECORD +50 -47
- {skypilot_nightly-1.0.0.dev20250520.dist-info → skypilot_nightly-1.0.0.dev20250521.dist-info}/WHEEL +1 -1
- sky/dashboard/out/_next/static/8hlc2dkbIDDBOkxtEW7X6/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-f49500b82ad5392d.js +0 -6
- sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
- sky/dashboard/out/_next/static/chunks/845-0ca6f2c1ba667c3b.js +0 -1
- sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-e15db85d0ea1fbe1.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-03f279c6741fb48b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
- sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
- /sky/dashboard/out/_next/static/{8hlc2dkbIDDBOkxtEW7X6 → hvWzC5E6Q4CcKzXcWbgig}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250520.dist-info → skypilot_nightly-1.0.0.dev20250521.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250520.dist-info → skypilot_nightly-1.0.0.dev20250521.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250520.dist-info → skypilot_nightly-1.0.0.dev20250521.dist-info}/top_level.txt +0 -0
sky/__init__.py
CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
|
|
5
5
|
import urllib.request
|
6
6
|
|
7
7
|
# Replaced with the current commit when building the wheels.
|
8
|
-
_SKYPILOT_COMMIT_SHA = '
|
8
|
+
_SKYPILOT_COMMIT_SHA = 'e1716e31bee6a918b246f8134f2e086e7011bdc8'
|
9
9
|
|
10
10
|
|
11
11
|
def _get_git_commit():
|
@@ -35,7 +35,7 @@ def _get_git_commit():
|
|
35
35
|
|
36
36
|
|
37
37
|
__commit__ = _get_git_commit()
|
38
|
-
__version__ = '1.0.0.
|
38
|
+
__version__ = '1.0.0.dev20250521'
|
39
39
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
40
40
|
|
41
41
|
|
sky/backends/backend_utils.py
CHANGED
@@ -2570,7 +2570,10 @@ def get_clusters(
|
|
2570
2570
|
if handle is None:
|
2571
2571
|
return
|
2572
2572
|
record['resources_str'] = resources_utils.get_readable_resources_repr(
|
2573
|
-
handle)
|
2573
|
+
handle, simplify=True)
|
2574
|
+
record[
|
2575
|
+
'resources_str_full'] = resources_utils.get_readable_resources_repr(
|
2576
|
+
handle, simplify=False)
|
2574
2577
|
credentials = ssh_credential_from_yaml(handle.cluster_yaml,
|
2575
2578
|
handle.docker_user,
|
2576
2579
|
handle.ssh_user)
|
@@ -8,7 +8,6 @@ import os
|
|
8
8
|
import pathlib
|
9
9
|
import re
|
10
10
|
import shlex
|
11
|
-
import shutil
|
12
11
|
import signal
|
13
12
|
import subprocess
|
14
13
|
import sys
|
@@ -2157,11 +2156,18 @@ class RetryingVmProvisioner(object):
|
|
2157
2156
|
# possible resources or the requested resources is too
|
2158
2157
|
# restrictive. If we reach here, our failover logic finally
|
2159
2158
|
# ends here.
|
2160
|
-
table = log_utils.create_table(['
|
2159
|
+
table = log_utils.create_table(['INFRA', 'RESOURCES', 'REASON'])
|
2161
2160
|
for (resource, exception) in resource_exceptions.items():
|
2162
|
-
table.add_row(
|
2163
|
-
|
2164
|
-
|
2161
|
+
table.add_row([
|
2162
|
+
resource.infra.formatted_str(),
|
2163
|
+
resources_utils.format_resource(resource,
|
2164
|
+
simplify=True),
|
2165
|
+
exception
|
2166
|
+
])
|
2167
|
+
# Set the max width of REASON column to 80 to avoid the table
|
2168
|
+
# being wrapped in a unreadable way.
|
2169
|
+
# pylint: disable=protected-access
|
2170
|
+
table._max_width = {'REASON': 80}
|
2165
2171
|
raise exceptions.ResourcesUnavailableError(
|
2166
2172
|
_RESOURCES_UNAVAILABLE_LOG + '\n' + table.get_string(),
|
2167
2173
|
failover_history=failover_history)
|
@@ -3340,33 +3346,35 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
|
|
3340
3346
|
return returncode
|
3341
3347
|
|
3342
3348
|
returncode = _run_setup(f'{create_script_code} && {setup_cmd}',)
|
3343
|
-
|
3344
|
-
|
3349
|
+
|
3350
|
+
def _load_setup_log_and_match(match_str: str) -> bool:
|
3345
3351
|
try:
|
3346
3352
|
with open(os.path.expanduser(setup_log_path),
|
3347
3353
|
'r',
|
3348
3354
|
encoding='utf-8') as f:
|
3349
|
-
|
3350
|
-
is_message_too_long = True
|
3355
|
+
return match_str.lower() in f.read().lower()
|
3351
3356
|
except Exception as e: # pylint: disable=broad-except
|
3352
3357
|
# We don't crash the setup if we cannot read the log file.
|
3353
3358
|
# Instead, we should retry the setup with dumping the script
|
3354
3359
|
# to a file to be safe.
|
3355
|
-
logger.debug('Failed to read setup log file '
|
3356
|
-
f'{setup_log_path}: {e}')
|
3357
|
-
is_message_too_long = True
|
3358
|
-
|
3359
|
-
if is_message_too_long:
|
3360
|
-
# If the setup script is too long, we retry it with dumping
|
3361
|
-
# the script to a file and running it with SSH. We use a
|
3362
|
-
# general length limit check before but it could be
|
3363
|
-
# inaccurate on some systems.
|
3364
3360
|
logger.debug(
|
3365
|
-
'Failed to
|
3366
|
-
|
3367
|
-
|
3368
|
-
|
3369
|
-
|
3361
|
+
f'Failed to read setup log file {setup_log_path}: {e}')
|
3362
|
+
return True
|
3363
|
+
|
3364
|
+
if ((returncode == 255 and _load_setup_log_and_match('too long')) or
|
3365
|
+
(returncode == 1 and
|
3366
|
+
_load_setup_log_and_match('request-uri too large'))):
|
3367
|
+
# If the setup script is too long, we retry it with dumping
|
3368
|
+
# the script to a file and running it with SSH. We use a
|
3369
|
+
# general length limit check before but it could be
|
3370
|
+
# inaccurate on some systems.
|
3371
|
+
# When there is a cloudflare proxy in front of the remote, it
|
3372
|
+
# could cause `414 Request-URI Too Large` error.
|
3373
|
+
logger.debug('Failed to run setup command inline due to '
|
3374
|
+
'command length limit. Dumping setup script to '
|
3375
|
+
'file and running it with SSH.')
|
3376
|
+
_dump_final_script(setup_script)
|
3377
|
+
returncode = _run_setup(setup_cmd)
|
3370
3378
|
|
3371
3379
|
def error_message() -> str:
|
3372
3380
|
# Use the function to avoid tailing the file in success case
|
@@ -3482,18 +3490,23 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
|
|
3482
3490
|
_dump_code_to_file(codegen)
|
3483
3491
|
job_submit_cmd = f'{mkdir_code} && {code}'
|
3484
3492
|
|
3485
|
-
|
3486
|
-
|
3487
|
-
|
3488
|
-
|
3489
|
-
|
3490
|
-
|
3491
|
-
|
3492
|
-
|
3493
|
-
|
3494
|
-
|
3495
|
-
|
3496
|
-
|
3493
|
+
def _maybe_add_managed_job_code(job_submit_cmd: str) -> str:
|
3494
|
+
if managed_job_dag is not None:
|
3495
|
+
# Add the managed job to job queue database.
|
3496
|
+
managed_job_codegen = managed_jobs.ManagedJobCodeGen()
|
3497
|
+
managed_job_code = managed_job_codegen.set_pending(
|
3498
|
+
job_id, managed_job_dag)
|
3499
|
+
# Set the managed job to PENDING state to make sure that this
|
3500
|
+
# managed job appears in the `sky jobs queue`, even if it needs
|
3501
|
+
# to wait to be submitted.
|
3502
|
+
# We cannot set the managed job to PENDING state in the job
|
3503
|
+
# template (jobs-controller.yaml.j2), as it may need to wait for
|
3504
|
+
# the run commands to be scheduled on the job controller in
|
3505
|
+
# high-load cases.
|
3506
|
+
job_submit_cmd += ' && ' + managed_job_code
|
3507
|
+
return job_submit_cmd
|
3508
|
+
|
3509
|
+
job_submit_cmd = _maybe_add_managed_job_code(job_submit_cmd)
|
3497
3510
|
|
3498
3511
|
returncode, stdout, stderr = self.run_on_head(handle,
|
3499
3512
|
job_submit_cmd,
|
@@ -3503,15 +3516,21 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
|
|
3503
3516
|
# running a job. Necessitating calling `sky launch`.
|
3504
3517
|
backend_utils.check_stale_runtime_on_remote(returncode, stderr,
|
3505
3518
|
handle.cluster_name)
|
3506
|
-
|
3519
|
+
output = stdout + stderr
|
3520
|
+
if ((returncode == 255 and 'too long' in output.lower()) or
|
3521
|
+
(returncode == 1 and 'request-uri too large' in output.lower())):
|
3507
3522
|
# If the generated script is too long, we retry it with dumping
|
3508
3523
|
# the script to a file and running it with SSH. We use a general
|
3509
3524
|
# length limit check before but it could be inaccurate on some
|
3510
3525
|
# systems.
|
3526
|
+
# When there is a cloudflare proxy in front of the remote, it could
|
3527
|
+
# cause `414 Request-URI Too Large` error.
|
3511
3528
|
logger.debug('Failed to submit job due to command length limit. '
|
3512
|
-
'Dumping job to file and running it with SSH.'
|
3529
|
+
'Dumping job to file and running it with SSH. '
|
3530
|
+
f'Output: {output}')
|
3513
3531
|
_dump_code_to_file(codegen)
|
3514
3532
|
job_submit_cmd = f'{mkdir_code} && {code}'
|
3533
|
+
job_submit_cmd = _maybe_add_managed_job_code(job_submit_cmd)
|
3515
3534
|
returncode, stdout, stderr = self.run_on_head(handle,
|
3516
3535
|
job_submit_cmd,
|
3517
3536
|
stream_logs=False,
|
sky/check.py
CHANGED
@@ -34,7 +34,7 @@ def check_capabilities(
|
|
34
34
|
echo = (lambda *_args, **_kwargs: None
|
35
35
|
) if quiet else lambda *args, **kwargs: click.echo(
|
36
36
|
*args, **kwargs, color=True)
|
37
|
-
echo('Checking credentials to enable
|
37
|
+
echo('Checking credentials to enable infra for SkyPilot.')
|
38
38
|
if capabilities is None:
|
39
39
|
capabilities = sky_cloud.ALL_CAPABILITIES
|
40
40
|
assert capabilities is not None
|
@@ -93,7 +93,7 @@ def check_capabilities(
|
|
93
93
|
]
|
94
94
|
|
95
95
|
combinations = list(itertools.product(clouds_to_check, capabilities))
|
96
|
-
with rich_utils.safe_status('Checking
|
96
|
+
with rich_utils.safe_status('Checking infra choices...'):
|
97
97
|
check_results = subprocess_utils.run_in_parallel(
|
98
98
|
check_one_cloud_one_capability, combinations)
|
99
99
|
|
@@ -189,7 +189,7 @@ def check_capabilities(
|
|
189
189
|
key=lambda item: item[0])
|
190
190
|
])
|
191
191
|
echo(f'\n{colorama.Fore.GREEN}{PARTY_POPPER_EMOJI} '
|
192
|
-
f'Enabled
|
192
|
+
f'Enabled infra {PARTY_POPPER_EMOJI}'
|
193
193
|
f'{colorama.Style.RESET_ALL}{enabled_clouds_str}')
|
194
194
|
return enabled_clouds
|
195
195
|
|
sky/cli.py
CHANGED
@@ -78,6 +78,7 @@ from sky.utils import common_utils
|
|
78
78
|
from sky.utils import controller_utils
|
79
79
|
from sky.utils import dag_utils
|
80
80
|
from sky.utils import env_options
|
81
|
+
from sky.utils import infra_utils
|
81
82
|
from sky.utils import log_utils
|
82
83
|
from sky.utils import registry
|
83
84
|
from sky.utils import resources_utils
|
@@ -345,24 +346,39 @@ _TASK_OPTIONS = [
|
|
345
346
|
'where the task will be invoked. '
|
346
347
|
'Overrides the "workdir" config in the YAML if both are supplied.'
|
347
348
|
)),
|
349
|
+
click.option(
|
350
|
+
'--infra',
|
351
|
+
required=False,
|
352
|
+
type=str,
|
353
|
+
help='Infrastructure to use. '
|
354
|
+
'Format: cloud, cloud/region, cloud/region/zone, '
|
355
|
+
'or kubernetes/context-name. '
|
356
|
+
'Examples: aws, aws/us-east-1, aws/us-east-1/us-east-1a, '
|
357
|
+
# TODO(zhwu): we have to use `\*` to make sure the docs build
|
358
|
+
# not complaining about the `*`, but this will cause `--help`
|
359
|
+
# to show `\*` instead of `*`.
|
360
|
+
'aws/\\*/us-east-1a, kubernetes/my-cluster-context.'),
|
348
361
|
click.option(
|
349
362
|
'--cloud',
|
350
363
|
required=False,
|
351
364
|
type=str,
|
352
365
|
help=('The cloud to use. If specified, overrides the "resources.cloud" '
|
353
|
-
'config. Passing "none" resets the config.')
|
366
|
+
'config. Passing "none" resets the config.'),
|
367
|
+
hidden=True),
|
354
368
|
click.option(
|
355
369
|
'--region',
|
356
370
|
required=False,
|
357
371
|
type=str,
|
358
372
|
help=('The region to use. If specified, overrides the '
|
359
|
-
'"resources.region" config. Passing "none" resets the config.')
|
373
|
+
'"resources.region" config. Passing "none" resets the config.'),
|
374
|
+
hidden=True),
|
360
375
|
click.option(
|
361
376
|
'--zone',
|
362
377
|
required=False,
|
363
378
|
type=str,
|
364
379
|
help=('The zone to use. If specified, overrides the '
|
365
|
-
'"resources.zone" config. Passing "none" resets the config.')
|
380
|
+
'"resources.zone" config. Passing "none" resets the config.'),
|
381
|
+
hidden=True),
|
366
382
|
click.option(
|
367
383
|
'--num-nodes',
|
368
384
|
required=False,
|
@@ -1063,6 +1079,33 @@ def cli():
|
|
1063
1079
|
pass
|
1064
1080
|
|
1065
1081
|
|
1082
|
+
def _handle_infra_cloud_region_zone_options(infra: Optional[str],
|
1083
|
+
cloud: Optional[str],
|
1084
|
+
region: Optional[str],
|
1085
|
+
zone: Optional[str]):
|
1086
|
+
"""Handle the backward compatibility for --infra and --cloud/region/zone.
|
1087
|
+
|
1088
|
+
Returns:
|
1089
|
+
cloud, region, zone
|
1090
|
+
"""
|
1091
|
+
if cloud is not None or region is not None or zone is not None:
|
1092
|
+
click.secho(
|
1093
|
+
'The --cloud, --region, and --zone options are deprecated. '
|
1094
|
+
'Use --infra instead.',
|
1095
|
+
fg='yellow')
|
1096
|
+
if infra is not None:
|
1097
|
+
with ux_utils.print_exception_no_traceback():
|
1098
|
+
raise ValueError('Cannot specify both --infra and '
|
1099
|
+
'--cloud, --region, or --zone.')
|
1100
|
+
|
1101
|
+
if infra is not None:
|
1102
|
+
infra_info = infra_utils.InfraInfo.from_str(infra)
|
1103
|
+
cloud = infra_info.cloud
|
1104
|
+
region = infra_info.region
|
1105
|
+
zone = infra_info.zone
|
1106
|
+
return cloud, region, zone
|
1107
|
+
|
1108
|
+
|
1066
1109
|
@cli.command(cls=_DocumentedCodeCommand)
|
1067
1110
|
@config_option(expose_value=True)
|
1068
1111
|
@click.argument('entrypoint',
|
@@ -1172,6 +1215,7 @@ def launch(
|
|
1172
1215
|
backend_name: Optional[str],
|
1173
1216
|
name: Optional[str],
|
1174
1217
|
workdir: Optional[str],
|
1218
|
+
infra: Optional[str],
|
1175
1219
|
cloud: Optional[str],
|
1176
1220
|
region: Optional[str],
|
1177
1221
|
zone: Optional[str],
|
@@ -1219,6 +1263,9 @@ def launch(
|
|
1219
1263
|
if backend_name is None:
|
1220
1264
|
backend_name = backends.CloudVmRayBackend.NAME
|
1221
1265
|
|
1266
|
+
cloud, region, zone = _handle_infra_cloud_region_zone_options(
|
1267
|
+
infra, cloud, region, zone)
|
1268
|
+
|
1222
1269
|
task_or_dag = _make_task_or_dag_from_entrypoint_with_overrides(
|
1223
1270
|
entrypoint=entrypoint,
|
1224
1271
|
name=name,
|
@@ -1336,6 +1383,7 @@ def exec(cluster: Optional[str],
|
|
1336
1383
|
entrypoint: Tuple[str, ...],
|
1337
1384
|
detach_run: bool,
|
1338
1385
|
name: Optional[str],
|
1386
|
+
infra: Optional[str],
|
1339
1387
|
cloud: Optional[str],
|
1340
1388
|
region: Optional[str],
|
1341
1389
|
zone: Optional[str],
|
@@ -1427,6 +1475,9 @@ def exec(cluster: Optional[str],
|
|
1427
1475
|
controller_utils.check_cluster_name_not_controller(
|
1428
1476
|
cluster, operation_str='Executing task on it')
|
1429
1477
|
|
1478
|
+
cloud, region, zone = _handle_infra_cloud_region_zone_options(
|
1479
|
+
infra, cloud, region, zone)
|
1480
|
+
|
1430
1481
|
task_or_dag = _make_task_or_dag_from_entrypoint_with_overrides(
|
1431
1482
|
entrypoint=entrypoint,
|
1432
1483
|
name=name,
|
@@ -3265,7 +3316,7 @@ def _down_or_stop_clusters(
|
|
3265
3316
|
|
3266
3317
|
@cli.command(cls=_DocumentedCodeCommand)
|
3267
3318
|
@config_option(expose_value=False)
|
3268
|
-
@click.argument('
|
3319
|
+
@click.argument('infra_list', required=False, type=str, nargs=-1)
|
3269
3320
|
@click.option('--verbose',
|
3270
3321
|
'-v',
|
3271
3322
|
is_flag=True,
|
@@ -3273,7 +3324,7 @@ def _down_or_stop_clusters(
|
|
3273
3324
|
help='Show the activated account for each cloud.')
|
3274
3325
|
@usage_lib.entrypoint
|
3275
3326
|
# pylint: disable=redefined-outer-name
|
3276
|
-
def check(
|
3327
|
+
def check(infra_list: Tuple[str], verbose: bool):
|
3277
3328
|
"""Check which clouds are available to use.
|
3278
3329
|
|
3279
3330
|
This checks access credentials for all clouds supported by SkyPilot. If a
|
@@ -3295,8 +3346,8 @@ def check(clouds: Tuple[str], verbose: bool):
|
|
3295
3346
|
# Check only specific clouds - AWS and GCP.
|
3296
3347
|
sky check aws gcp
|
3297
3348
|
"""
|
3298
|
-
|
3299
|
-
request_id = sdk.check(
|
3349
|
+
infra_arg = infra_list if len(infra_list) > 0 else None
|
3350
|
+
request_id = sdk.check(infra_list=infra_arg, verbose=verbose)
|
3300
3351
|
sdk.stream_and_get(request_id)
|
3301
3352
|
api_server_url = server_common.get_server_url()
|
3302
3353
|
click.echo()
|
@@ -3312,10 +3363,15 @@ def check(clouds: Tuple[str], verbose: bool):
|
|
3312
3363
|
is_flag=True,
|
3313
3364
|
default=False,
|
3314
3365
|
help='Show details of all GPU/TPU/accelerator offerings.')
|
3366
|
+
@click.option('--infra',
|
3367
|
+
default=None,
|
3368
|
+
type=str,
|
3369
|
+
help='Infrastructure to query. Examples: "aws", "aws/us-east-1"')
|
3315
3370
|
@click.option('--cloud',
|
3316
3371
|
default=None,
|
3317
3372
|
type=str,
|
3318
|
-
help='Cloud provider to query.'
|
3373
|
+
help='Cloud provider to query.',
|
3374
|
+
hidden=True)
|
3319
3375
|
@click.option(
|
3320
3376
|
'--region',
|
3321
3377
|
required=False,
|
@@ -3323,6 +3379,7 @@ def check(clouds: Tuple[str], verbose: bool):
|
|
3323
3379
|
help=
|
3324
3380
|
('The region to use. If not specified, shows accelerators from all regions.'
|
3325
3381
|
),
|
3382
|
+
hidden=True,
|
3326
3383
|
)
|
3327
3384
|
@click.option(
|
3328
3385
|
'--all-regions',
|
@@ -3335,6 +3392,7 @@ def check(clouds: Tuple[str], verbose: bool):
|
|
3335
3392
|
def show_gpus(
|
3336
3393
|
accelerator_str: Optional[str],
|
3337
3394
|
all: bool, # pylint: disable=redefined-builtin
|
3395
|
+
infra: Optional[str],
|
3338
3396
|
cloud: Optional[str],
|
3339
3397
|
region: Optional[str],
|
3340
3398
|
all_regions: Optional[bool]):
|
@@ -3376,6 +3434,11 @@ def show_gpus(
|
|
3376
3434
|
* ``UTILIZATION`` (Kubernetes only): Total number of GPUs free / available
|
3377
3435
|
in the Kubernetes cluster.
|
3378
3436
|
"""
|
3437
|
+
cloud, region, _ = _handle_infra_cloud_region_zone_options(infra,
|
3438
|
+
cloud,
|
3439
|
+
region,
|
3440
|
+
zone=None)
|
3441
|
+
|
3379
3442
|
# validation for the --region flag
|
3380
3443
|
if region is not None and cloud is None:
|
3381
3444
|
raise click.UsageError(
|
@@ -3991,6 +4054,7 @@ def jobs_launch(
|
|
3991
4054
|
name: Optional[str],
|
3992
4055
|
cluster: Optional[str],
|
3993
4056
|
workdir: Optional[str],
|
4057
|
+
infra: Optional[str],
|
3994
4058
|
cloud: Optional[str],
|
3995
4059
|
region: Optional[str],
|
3996
4060
|
zone: Optional[str],
|
@@ -4032,6 +4096,8 @@ def jobs_launch(
|
|
4032
4096
|
'Use one of the flags as they are alias.')
|
4033
4097
|
name = cluster
|
4034
4098
|
env = _merge_env_vars(env_file, env)
|
4099
|
+
cloud, region, zone = _handle_infra_cloud_region_zone_options(
|
4100
|
+
infra, cloud, region, zone)
|
4035
4101
|
task_or_dag = _make_task_or_dag_from_entrypoint_with_overrides(
|
4036
4102
|
entrypoint,
|
4037
4103
|
name=name,
|
@@ -4509,6 +4575,7 @@ def serve_up(
|
|
4509
4575
|
service_yaml: Tuple[str, ...],
|
4510
4576
|
service_name: Optional[str],
|
4511
4577
|
workdir: Optional[str],
|
4578
|
+
infra: Optional[str],
|
4512
4579
|
cloud: Optional[str],
|
4513
4580
|
region: Optional[str],
|
4514
4581
|
zone: Optional[str],
|
@@ -4555,6 +4622,8 @@ def serve_up(
|
|
4555
4622
|
|
4556
4623
|
sky serve up service.yaml
|
4557
4624
|
"""
|
4625
|
+
cloud, region, zone = _handle_infra_cloud_region_zone_options(
|
4626
|
+
infra, cloud, region, zone)
|
4558
4627
|
if service_name is None:
|
4559
4628
|
service_name = serve_lib.generate_service_name()
|
4560
4629
|
|
@@ -4621,13 +4690,13 @@ def serve_up(
|
|
4621
4690
|
@timeline.event
|
4622
4691
|
@usage_lib.entrypoint
|
4623
4692
|
def serve_update(service_name: str, service_yaml: Tuple[str, ...],
|
4624
|
-
workdir: Optional[str],
|
4625
|
-
|
4626
|
-
|
4627
|
-
|
4628
|
-
|
4629
|
-
|
4630
|
-
cpus: Optional[str], memory: Optional[str],
|
4693
|
+
workdir: Optional[str], infra: Optional[str],
|
4694
|
+
cloud: Optional[str], region: Optional[str],
|
4695
|
+
zone: Optional[str], num_nodes: Optional[int],
|
4696
|
+
use_spot: Optional[bool], image_id: Optional[str],
|
4697
|
+
env_file: Optional[Dict[str, str]], env: List[Tuple[str, str]],
|
4698
|
+
gpus: Optional[str], instance_type: Optional[str],
|
4699
|
+
ports: Tuple[str], cpus: Optional[str], memory: Optional[str],
|
4631
4700
|
disk_size: Optional[int], disk_tier: Optional[str], mode: str,
|
4632
4701
|
yes: bool, async_call: bool):
|
4633
4702
|
"""Update a SkyServe service.
|
@@ -4659,6 +4728,8 @@ def serve_update(service_name: str, service_yaml: Tuple[str, ...],
|
|
4659
4728
|
sky serve update --mode blue_green sky-service-16aa new_service.yaml
|
4660
4729
|
|
4661
4730
|
"""
|
4731
|
+
cloud, region, zone = _handle_infra_cloud_region_zone_options(
|
4732
|
+
infra, cloud, region, zone)
|
4662
4733
|
task = _generate_task_with_service(
|
4663
4734
|
service_name=service_name,
|
4664
4735
|
service_yaml_args=service_yaml,
|
@@ -5173,6 +5244,7 @@ def benchmark_launch(
|
|
5173
5244
|
benchmark: str,
|
5174
5245
|
name: Optional[str],
|
5175
5246
|
workdir: Optional[str],
|
5247
|
+
infra: Optional[str],
|
5176
5248
|
cloud: Optional[str],
|
5177
5249
|
region: Optional[str],
|
5178
5250
|
zone: Optional[str],
|
@@ -5206,7 +5278,6 @@ def benchmark_launch(
|
|
5206
5278
|
raise click.BadParameter(f'Benchmark {benchmark} already exists. '
|
5207
5279
|
'To delete the previous benchmark result, '
|
5208
5280
|
f'run `sky bench delete {benchmark}`.')
|
5209
|
-
|
5210
5281
|
entrypoint = ' '.join(entrypoint)
|
5211
5282
|
if not entrypoint:
|
5212
5283
|
raise click.BadParameter('Please specify a task yaml to benchmark.')
|
@@ -5217,6 +5288,8 @@ def benchmark_launch(
|
|
5217
5288
|
'Sky Benchmark does not support command line tasks. '
|
5218
5289
|
'Please provide a YAML file.')
|
5219
5290
|
assert config is not None, (is_yaml, config)
|
5291
|
+
cloud, region, zone = _handle_infra_cloud_region_zone_options(
|
5292
|
+
infra, cloud, region, zone)
|
5220
5293
|
|
5221
5294
|
click.secho('Benchmarking a task from YAML: ', fg='cyan', nl=False)
|
5222
5295
|
click.secho(entrypoint, bold=True)
|