skypilot-nightly 1.0.0.dev20250215__py3-none-any.whl → 1.0.0.dev20250217__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +48 -22
- sky/adaptors/aws.py +2 -1
- sky/adaptors/azure.py +4 -4
- sky/adaptors/cloudflare.py +4 -4
- sky/adaptors/kubernetes.py +8 -8
- sky/authentication.py +42 -45
- sky/backends/backend.py +2 -2
- sky/backends/backend_utils.py +108 -221
- sky/backends/cloud_vm_ray_backend.py +283 -282
- sky/benchmark/benchmark_utils.py +6 -2
- sky/check.py +40 -28
- sky/cli.py +1213 -1116
- sky/client/__init__.py +1 -0
- sky/client/cli.py +5644 -0
- sky/client/common.py +345 -0
- sky/client/sdk.py +1757 -0
- sky/cloud_stores.py +12 -6
- sky/clouds/__init__.py +0 -2
- sky/clouds/aws.py +20 -13
- sky/clouds/azure.py +5 -3
- sky/clouds/cloud.py +1 -1
- sky/clouds/cudo.py +2 -1
- sky/clouds/do.py +2 -1
- sky/clouds/fluidstack.py +3 -2
- sky/clouds/gcp.py +10 -8
- sky/clouds/ibm.py +8 -7
- sky/clouds/kubernetes.py +7 -6
- sky/clouds/lambda_cloud.py +8 -7
- sky/clouds/oci.py +4 -3
- sky/clouds/paperspace.py +2 -1
- sky/clouds/runpod.py +2 -1
- sky/clouds/scp.py +8 -7
- sky/clouds/service_catalog/__init__.py +3 -3
- sky/clouds/service_catalog/aws_catalog.py +7 -1
- sky/clouds/service_catalog/common.py +4 -2
- sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +2 -2
- sky/clouds/utils/oci_utils.py +1 -1
- sky/clouds/vast.py +2 -1
- sky/clouds/vsphere.py +2 -1
- sky/core.py +263 -99
- sky/dag.py +4 -0
- sky/data/mounting_utils.py +2 -1
- sky/data/storage.py +97 -35
- sky/data/storage_utils.py +69 -9
- sky/exceptions.py +138 -5
- sky/execution.py +47 -50
- sky/global_user_state.py +105 -22
- sky/jobs/__init__.py +12 -14
- sky/jobs/client/__init__.py +0 -0
- sky/jobs/client/sdk.py +296 -0
- sky/jobs/constants.py +30 -1
- sky/jobs/controller.py +12 -6
- sky/jobs/dashboard/dashboard.py +2 -6
- sky/jobs/recovery_strategy.py +22 -29
- sky/jobs/server/__init__.py +1 -0
- sky/jobs/{core.py → server/core.py} +101 -34
- sky/jobs/server/dashboard_utils.py +64 -0
- sky/jobs/server/server.py +182 -0
- sky/jobs/utils.py +32 -23
- sky/models.py +27 -0
- sky/optimizer.py +9 -11
- sky/provision/__init__.py +6 -3
- sky/provision/aws/config.py +2 -2
- sky/provision/aws/instance.py +1 -1
- sky/provision/azure/instance.py +1 -1
- sky/provision/cudo/instance.py +1 -1
- sky/provision/do/instance.py +1 -1
- sky/provision/do/utils.py +0 -5
- sky/provision/fluidstack/fluidstack_utils.py +4 -3
- sky/provision/fluidstack/instance.py +4 -2
- sky/provision/gcp/instance.py +1 -1
- sky/provision/instance_setup.py +2 -2
- sky/provision/kubernetes/constants.py +8 -0
- sky/provision/kubernetes/instance.py +1 -1
- sky/provision/kubernetes/utils.py +67 -76
- sky/provision/lambda_cloud/instance.py +3 -15
- sky/provision/logging.py +1 -1
- sky/provision/oci/instance.py +7 -4
- sky/provision/paperspace/instance.py +1 -1
- sky/provision/provisioner.py +3 -2
- sky/provision/runpod/instance.py +1 -1
- sky/provision/vast/instance.py +1 -1
- sky/provision/vast/utils.py +2 -1
- sky/provision/vsphere/instance.py +2 -11
- sky/resources.py +55 -40
- sky/serve/__init__.py +6 -10
- sky/serve/client/__init__.py +0 -0
- sky/serve/client/sdk.py +366 -0
- sky/serve/constants.py +3 -0
- sky/serve/replica_managers.py +10 -10
- sky/serve/serve_utils.py +56 -36
- sky/serve/server/__init__.py +0 -0
- sky/serve/{core.py → server/core.py} +37 -17
- sky/serve/server/server.py +117 -0
- sky/serve/service.py +8 -1
- sky/server/__init__.py +1 -0
- sky/server/common.py +441 -0
- sky/server/constants.py +21 -0
- sky/server/html/log.html +174 -0
- sky/server/requests/__init__.py +0 -0
- sky/server/requests/executor.py +462 -0
- sky/server/requests/payloads.py +481 -0
- sky/server/requests/queues/__init__.py +0 -0
- sky/server/requests/queues/mp_queue.py +76 -0
- sky/server/requests/requests.py +567 -0
- sky/server/requests/serializers/__init__.py +0 -0
- sky/server/requests/serializers/decoders.py +192 -0
- sky/server/requests/serializers/encoders.py +166 -0
- sky/server/server.py +1095 -0
- sky/server/stream_utils.py +144 -0
- sky/setup_files/MANIFEST.in +1 -0
- sky/setup_files/dependencies.py +12 -4
- sky/setup_files/setup.py +1 -1
- sky/sky_logging.py +9 -13
- sky/skylet/autostop_lib.py +2 -2
- sky/skylet/constants.py +46 -12
- sky/skylet/events.py +5 -6
- sky/skylet/job_lib.py +78 -66
- sky/skylet/log_lib.py +17 -11
- sky/skypilot_config.py +79 -94
- sky/task.py +119 -73
- sky/templates/aws-ray.yml.j2 +4 -4
- sky/templates/azure-ray.yml.j2 +3 -2
- sky/templates/cudo-ray.yml.j2 +3 -2
- sky/templates/fluidstack-ray.yml.j2 +3 -2
- sky/templates/gcp-ray.yml.j2 +3 -2
- sky/templates/ibm-ray.yml.j2 +3 -2
- sky/templates/jobs-controller.yaml.j2 +1 -12
- sky/templates/kubernetes-ray.yml.j2 +3 -2
- sky/templates/lambda-ray.yml.j2 +3 -2
- sky/templates/oci-ray.yml.j2 +3 -2
- sky/templates/paperspace-ray.yml.j2 +3 -2
- sky/templates/runpod-ray.yml.j2 +3 -2
- sky/templates/scp-ray.yml.j2 +3 -2
- sky/templates/skypilot-server-kubernetes-proxy.sh +36 -0
- sky/templates/vsphere-ray.yml.j2 +4 -2
- sky/templates/websocket_proxy.py +64 -0
- sky/usage/constants.py +8 -0
- sky/usage/usage_lib.py +45 -11
- sky/utils/accelerator_registry.py +33 -53
- sky/utils/admin_policy_utils.py +2 -1
- sky/utils/annotations.py +51 -0
- sky/utils/cli_utils/status_utils.py +33 -3
- sky/utils/cluster_utils.py +356 -0
- sky/utils/command_runner.py +69 -14
- sky/utils/common.py +74 -0
- sky/utils/common_utils.py +133 -93
- sky/utils/config_utils.py +204 -0
- sky/utils/control_master_utils.py +2 -3
- sky/utils/controller_utils.py +133 -147
- sky/utils/dag_utils.py +72 -24
- sky/utils/kubernetes/deploy_remote_cluster.sh +2 -2
- sky/utils/kubernetes/exec_kubeconfig_converter.py +73 -0
- sky/utils/kubernetes/kubernetes_deploy_utils.py +228 -0
- sky/utils/log_utils.py +83 -23
- sky/utils/message_utils.py +81 -0
- sky/utils/registry.py +127 -0
- sky/utils/resources_utils.py +2 -2
- sky/utils/rich_utils.py +213 -34
- sky/utils/schemas.py +19 -2
- sky/{status_lib.py → utils/status_lib.py} +12 -7
- sky/utils/subprocess_utils.py +51 -35
- sky/utils/timeline.py +7 -2
- sky/utils/ux_utils.py +95 -25
- {skypilot_nightly-1.0.0.dev20250215.dist-info → skypilot_nightly-1.0.0.dev20250217.dist-info}/METADATA +8 -3
- {skypilot_nightly-1.0.0.dev20250215.dist-info → skypilot_nightly-1.0.0.dev20250217.dist-info}/RECORD +170 -132
- sky/clouds/cloud_registry.py +0 -76
- sky/utils/cluster_yaml_utils.py +0 -24
- {skypilot_nightly-1.0.0.dev20250215.dist-info → skypilot_nightly-1.0.0.dev20250217.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250215.dist-info → skypilot_nightly-1.0.0.dev20250217.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250215.dist-info → skypilot_nightly-1.0.0.dev20250217.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250215.dist-info → skypilot_nightly-1.0.0.dev20250217.dist-info}/top_level.txt +0 -0
sky/cloud_stores.py
CHANGED
@@ -54,7 +54,8 @@ class S3CloudStorage(CloudStorage):
|
|
54
54
|
|
55
55
|
# List of commands to install AWS CLI
|
56
56
|
_GET_AWSCLI = [
|
57
|
-
'aws --version >/dev/null 2>&1 ||
|
57
|
+
'aws --version >/dev/null 2>&1 || '
|
58
|
+
f'{constants.SKY_UV_PIP_CMD} install awscli',
|
58
59
|
]
|
59
60
|
|
60
61
|
def is_directory(self, url: str) -> bool:
|
@@ -84,7 +85,8 @@ class S3CloudStorage(CloudStorage):
|
|
84
85
|
# AWS Sync by default uses 10 threads to upload files to the bucket.
|
85
86
|
# To increase parallelism, modify max_concurrent_requests in your
|
86
87
|
# aws config file (Default path: ~/.aws/config).
|
87
|
-
download_via_awscli = ('aws s3
|
88
|
+
download_via_awscli = (f'{constants.SKY_REMOTE_PYTHON_ENV}/bin/aws s3 '
|
89
|
+
'sync --no-follow-symlinks '
|
88
90
|
f'{source} {destination}')
|
89
91
|
|
90
92
|
all_commands = list(self._GET_AWSCLI)
|
@@ -93,7 +95,8 @@ class S3CloudStorage(CloudStorage):
|
|
93
95
|
|
94
96
|
def make_sync_file_command(self, source: str, destination: str) -> str:
|
95
97
|
"""Downloads a file using AWS CLI."""
|
96
|
-
download_via_awscli = f'aws s3
|
98
|
+
download_via_awscli = (f'{constants.SKY_REMOTE_PYTHON_ENV}/bin/aws s3 '
|
99
|
+
f'cp {source} {destination}')
|
97
100
|
|
98
101
|
all_commands = list(self._GET_AWSCLI)
|
99
102
|
all_commands.append(download_via_awscli)
|
@@ -354,7 +357,8 @@ class R2CloudStorage(CloudStorage):
|
|
354
357
|
|
355
358
|
# List of commands to install AWS CLI
|
356
359
|
_GET_AWSCLI = [
|
357
|
-
'aws --version >/dev/null 2>&1 ||
|
360
|
+
'aws --version >/dev/null 2>&1 || '
|
361
|
+
f'{constants.SKY_UV_PIP_CMD} install awscli',
|
358
362
|
]
|
359
363
|
|
360
364
|
def is_directory(self, url: str) -> bool:
|
@@ -389,7 +393,8 @@ class R2CloudStorage(CloudStorage):
|
|
389
393
|
source = source.replace('r2://', 's3://')
|
390
394
|
download_via_awscli = ('AWS_SHARED_CREDENTIALS_FILE='
|
391
395
|
f'{cloudflare.R2_CREDENTIALS_PATH} '
|
392
|
-
'aws s3
|
396
|
+
f'{constants.SKY_REMOTE_PYTHON_ENV}/bin/aws s3 '
|
397
|
+
'sync --no-follow-symlinks '
|
393
398
|
f'{source} {destination} '
|
394
399
|
f'--endpoint {endpoint_url} '
|
395
400
|
f'--profile={cloudflare.R2_PROFILE_NAME}')
|
@@ -405,7 +410,8 @@ class R2CloudStorage(CloudStorage):
|
|
405
410
|
source = source.replace('r2://', 's3://')
|
406
411
|
download_via_awscli = ('AWS_SHARED_CREDENTIALS_FILE='
|
407
412
|
f'{cloudflare.R2_CREDENTIALS_PATH} '
|
408
|
-
f'aws s3
|
413
|
+
f'{constants.SKY_REMOTE_PYTHON_ENV}/bin/aws s3 '
|
414
|
+
f'cp {source} {destination} '
|
409
415
|
f'--endpoint {endpoint_url} '
|
410
416
|
f'--profile={cloudflare.R2_PROFILE_NAME}')
|
411
417
|
|
sky/clouds/__init__.py
CHANGED
@@ -8,7 +8,6 @@ from sky.clouds.cloud import ProvisionerVersion
|
|
8
8
|
from sky.clouds.cloud import Region
|
9
9
|
from sky.clouds.cloud import StatusVersion
|
10
10
|
from sky.clouds.cloud import Zone
|
11
|
-
from sky.clouds.cloud_registry import CLOUD_REGISTRY
|
12
11
|
|
13
12
|
# NOTE: import the above first to avoid circular imports.
|
14
13
|
# isort: split
|
@@ -47,7 +46,6 @@ __all__ = [
|
|
47
46
|
'CloudImplementationFeatures',
|
48
47
|
'Region',
|
49
48
|
'Zone',
|
50
|
-
'CLOUD_REGISTRY',
|
51
49
|
'ProvisionerVersion',
|
52
50
|
'StatusVersion',
|
53
51
|
'Fluidstack',
|
sky/clouds/aws.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
"""Amazon Web Services."""
|
2
2
|
import enum
|
3
3
|
import fnmatch
|
4
|
-
import functools
|
5
4
|
import hashlib
|
6
5
|
import json
|
7
6
|
import os
|
@@ -21,7 +20,9 @@ from sky.clouds import service_catalog
|
|
21
20
|
from sky.clouds.service_catalog import common as catalog_common
|
22
21
|
from sky.clouds.utils import aws_utils
|
23
22
|
from sky.skylet import constants
|
23
|
+
from sky.utils import annotations
|
24
24
|
from sky.utils import common_utils
|
25
|
+
from sky.utils import registry
|
25
26
|
from sky.utils import resources_utils
|
26
27
|
from sky.utils import rich_utils
|
27
28
|
from sky.utils import subprocess_utils
|
@@ -30,7 +31,7 @@ from sky.utils import ux_utils
|
|
30
31
|
if typing.TYPE_CHECKING:
|
31
32
|
# renaming to avoid shadowing variables
|
32
33
|
from sky import resources as resources_lib
|
33
|
-
from sky import status_lib
|
34
|
+
from sky.utils import status_lib
|
34
35
|
|
35
36
|
logger = sky_logging.init_logger(__name__)
|
36
37
|
|
@@ -126,7 +127,7 @@ class AWSIdentityType(enum.Enum):
|
|
126
127
|
return self in expirable_types
|
127
128
|
|
128
129
|
|
129
|
-
@
|
130
|
+
@registry.CLOUD_REGISTRY.register
|
130
131
|
class AWS(clouds.Cloud):
|
131
132
|
"""Amazon Web Services."""
|
132
133
|
|
@@ -558,7 +559,8 @@ class AWS(clouds.Cloud):
|
|
558
559
|
fuzzy_candidate_list, None)
|
559
560
|
|
560
561
|
@classmethod
|
561
|
-
@
|
562
|
+
@annotations.lru_cache(scope='global',
|
563
|
+
maxsize=1) # Cache since getting identity is slow.
|
562
564
|
def check_credentials(cls) -> Tuple[bool, Optional[str]]:
|
563
565
|
"""Checks if the user has access credentials to this cloud."""
|
564
566
|
|
@@ -696,7 +698,7 @@ class AWS(clouds.Cloud):
|
|
696
698
|
return AWSIdentityType.SHARED_CREDENTIALS_FILE
|
697
699
|
|
698
700
|
@classmethod
|
699
|
-
@
|
701
|
+
@annotations.lru_cache(scope='global', maxsize=1)
|
700
702
|
def _aws_configure_list(cls) -> Optional[bytes]:
|
701
703
|
proc = subprocess.run('aws configure list',
|
702
704
|
shell=True,
|
@@ -708,7 +710,8 @@ class AWS(clouds.Cloud):
|
|
708
710
|
return proc.stdout
|
709
711
|
|
710
712
|
@classmethod
|
711
|
-
@
|
713
|
+
@annotations.lru_cache(scope='global',
|
714
|
+
maxsize=1) # Cache since getting identity is slow.
|
712
715
|
def _sts_get_caller_identity(cls) -> Optional[List[List[str]]]:
|
713
716
|
try:
|
714
717
|
sts = aws.client('sts', check_credentials=False)
|
@@ -789,7 +792,8 @@ class AWS(clouds.Cloud):
|
|
789
792
|
return [user_ids]
|
790
793
|
|
791
794
|
@classmethod
|
792
|
-
@
|
795
|
+
@annotations.lru_cache(scope='global',
|
796
|
+
maxsize=1) # Cache since getting identity is slow.
|
793
797
|
def get_user_identities(cls) -> Optional[List[List[str]]]:
|
794
798
|
"""Returns a [UserId, Account] list that uniquely identifies the user.
|
795
799
|
|
@@ -893,11 +897,11 @@ class AWS(clouds.Cloud):
|
|
893
897
|
if os.path.exists(os.path.expanduser(f'~/.aws/{filename}'))
|
894
898
|
}
|
895
899
|
|
896
|
-
@
|
900
|
+
@annotations.lru_cache(scope='global', maxsize=1)
|
897
901
|
def can_credential_expire(self) -> bool:
|
898
902
|
identity_type = self._current_identity_type()
|
899
|
-
return identity_type is not None and
|
900
|
-
|
903
|
+
return (identity_type is not None and
|
904
|
+
identity_type.can_credential_expire())
|
901
905
|
|
902
906
|
def instance_type_exists(self, instance_type):
|
903
907
|
return service_catalog.instance_type_exists(instance_type, clouds='aws')
|
@@ -945,7 +949,8 @@ class AWS(clouds.Cloud):
|
|
945
949
|
Returns:
|
946
950
|
False if the quota is found to be zero, and True otherwise.
|
947
951
|
Raises:
|
948
|
-
ImportError: if the dependencies for AWS are not able to be
|
952
|
+
ImportError: if the dependencies for AWS are not able to be
|
953
|
+
installed.
|
949
954
|
botocore.exceptions.ClientError: error in Boto3 client request.
|
950
955
|
"""
|
951
956
|
|
@@ -959,7 +964,8 @@ class AWS(clouds.Cloud):
|
|
959
964
|
quota_code = aws_catalog.get_quota_code(instance_type, use_spot)
|
960
965
|
|
961
966
|
if quota_code is None:
|
962
|
-
# Quota code not found in the catalog for the chosen instance_type,
|
967
|
+
# Quota code not found in the catalog for the chosen instance_type,
|
968
|
+
# try provisioning anyway.
|
963
969
|
return True
|
964
970
|
|
965
971
|
if aws_utils.use_reservations():
|
@@ -973,7 +979,8 @@ class AWS(clouds.Cloud):
|
|
973
979
|
response = client.get_service_quota(ServiceCode='ec2',
|
974
980
|
QuotaCode=quota_code)
|
975
981
|
except aws.botocore_exceptions().ClientError:
|
976
|
-
# Botocore client connection not established, try provisioning
|
982
|
+
# Botocore client connection not established, try provisioning
|
983
|
+
# anyways
|
977
984
|
return True
|
978
985
|
|
979
986
|
if response['Quota']['Value'] == 0:
|
sky/clouds/azure.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
"""Azure."""
|
2
|
-
import functools
|
3
2
|
import os
|
4
3
|
import re
|
5
4
|
import subprocess
|
@@ -17,7 +16,9 @@ from sky import skypilot_config
|
|
17
16
|
from sky.adaptors import azure
|
18
17
|
from sky.clouds import service_catalog
|
19
18
|
from sky.clouds.utils import azure_utils
|
19
|
+
from sky.utils import annotations
|
20
20
|
from sky.utils import common_utils
|
21
|
+
from sky.utils import registry
|
21
22
|
from sky.utils import resources_utils
|
22
23
|
from sky.utils import ux_utils
|
23
24
|
|
@@ -60,7 +61,7 @@ def _run_output(cmd):
|
|
60
61
|
return proc.stdout.decode('ascii')
|
61
62
|
|
62
63
|
|
63
|
-
@
|
64
|
+
@registry.CLOUD_REGISTRY.register
|
64
65
|
class Azure(clouds.Cloud):
|
65
66
|
"""Azure."""
|
66
67
|
|
@@ -574,7 +575,8 @@ class Azure(clouds.Cloud):
|
|
574
575
|
clouds='azure')
|
575
576
|
|
576
577
|
@classmethod
|
577
|
-
@
|
578
|
+
@annotations.lru_cache(scope='global',
|
579
|
+
maxsize=1) # Cache since getting identity is slow.
|
578
580
|
def get_user_identities(cls) -> Optional[List[List[str]]]:
|
579
581
|
"""Returns the cloud user identity."""
|
580
582
|
# This returns the user's email address + [subscription_id].
|
sky/clouds/cloud.py
CHANGED
sky/clouds/cudo.py
CHANGED
@@ -6,6 +6,7 @@ from typing import Dict, Iterator, List, Optional, Tuple, Union
|
|
6
6
|
from sky import clouds
|
7
7
|
from sky.clouds import service_catalog
|
8
8
|
from sky.utils import common_utils
|
9
|
+
from sky.utils import registry
|
9
10
|
from sky.utils import resources_utils
|
10
11
|
|
11
12
|
if typing.TYPE_CHECKING:
|
@@ -27,7 +28,7 @@ def _run_output(cmd):
|
|
27
28
|
return proc.stdout.decode('ascii')
|
28
29
|
|
29
30
|
|
30
|
-
@
|
31
|
+
@registry.CLOUD_REGISTRY.register
|
31
32
|
class Cudo(clouds.Cloud):
|
32
33
|
"""Cudo Compute"""
|
33
34
|
_REPR = 'Cudo'
|
sky/clouds/do.py
CHANGED
@@ -8,6 +8,7 @@ from sky import clouds
|
|
8
8
|
from sky.adaptors import do
|
9
9
|
from sky.clouds import service_catalog
|
10
10
|
from sky.provision.do import utils as do_utils
|
11
|
+
from sky.utils import registry
|
11
12
|
from sky.utils import resources_utils
|
12
13
|
|
13
14
|
if typing.TYPE_CHECKING:
|
@@ -16,7 +17,7 @@ if typing.TYPE_CHECKING:
|
|
16
17
|
_CREDENTIAL_FILE = 'config.yaml'
|
17
18
|
|
18
19
|
|
19
|
-
@
|
20
|
+
@registry.CLOUD_REGISTRY.register(aliases=['digitalocean'])
|
20
21
|
class DO(clouds.Cloud):
|
21
22
|
"""Digital Ocean Cloud"""
|
22
23
|
|
sky/clouds/fluidstack.py
CHANGED
@@ -6,10 +6,11 @@ from typing import Dict, Iterator, List, Optional, Tuple, Union
|
|
6
6
|
import requests
|
7
7
|
|
8
8
|
from sky import clouds
|
9
|
-
from sky import status_lib
|
10
9
|
from sky.clouds import service_catalog
|
11
10
|
from sky.provision.fluidstack import fluidstack_utils
|
11
|
+
from sky.utils import registry
|
12
12
|
from sky.utils import resources_utils
|
13
|
+
from sky.utils import status_lib
|
13
14
|
from sky.utils.resources_utils import DiskTier
|
14
15
|
|
15
16
|
_CREDENTIAL_FILES = [
|
@@ -21,7 +22,7 @@ if typing.TYPE_CHECKING:
|
|
21
22
|
from sky import resources as resources_lib
|
22
23
|
|
23
24
|
|
24
|
-
@
|
25
|
+
@registry.CLOUD_REGISTRY.register
|
25
26
|
class Fluidstack(clouds.Cloud):
|
26
27
|
"""FluidStack GPU Cloud."""
|
27
28
|
|
sky/clouds/gcp.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
"""Google Cloud Platform."""
|
2
2
|
import enum
|
3
|
-
import functools
|
4
3
|
import json
|
5
4
|
import os
|
6
5
|
import re
|
@@ -18,14 +17,16 @@ from sky import skypilot_config
|
|
18
17
|
from sky.adaptors import gcp
|
19
18
|
from sky.clouds import service_catalog
|
20
19
|
from sky.clouds.utils import gcp_utils
|
20
|
+
from sky.utils import annotations
|
21
21
|
from sky.utils import common_utils
|
22
|
+
from sky.utils import registry
|
22
23
|
from sky.utils import resources_utils
|
23
24
|
from sky.utils import subprocess_utils
|
24
25
|
from sky.utils import ux_utils
|
25
26
|
|
26
27
|
if typing.TYPE_CHECKING:
|
27
28
|
from sky import resources
|
28
|
-
from sky import status_lib
|
29
|
+
from sky.utils import status_lib
|
29
30
|
|
30
31
|
logger = sky_logging.init_logger(__name__)
|
31
32
|
|
@@ -136,7 +137,7 @@ class GCPIdentityType(enum.Enum):
|
|
136
137
|
return self == GCPIdentityType.SHARED_CREDENTIALS_FILE
|
137
138
|
|
138
139
|
|
139
|
-
@
|
140
|
+
@registry.CLOUD_REGISTRY.register
|
140
141
|
class GCP(clouds.Cloud):
|
141
142
|
"""Google Cloud Platform."""
|
142
143
|
|
@@ -348,7 +349,7 @@ class GCP(clouds.Cloud):
|
|
348
349
|
return find_machine is not None
|
349
350
|
|
350
351
|
@classmethod
|
351
|
-
@
|
352
|
+
@annotations.lru_cache(scope='global', maxsize=1)
|
352
353
|
def _get_image_size(cls, image_id: str) -> float:
|
353
354
|
if image_id.startswith('skypilot:'):
|
354
355
|
return DEFAULT_GCP_IMAGE_GB
|
@@ -866,11 +867,11 @@ class GCP(clouds.Cloud):
|
|
866
867
|
pass
|
867
868
|
return credentials
|
868
869
|
|
869
|
-
@
|
870
|
+
@annotations.lru_cache(scope='global', maxsize=1)
|
870
871
|
def can_credential_expire(self) -> bool:
|
871
872
|
identity_type = self._get_identity_type()
|
872
|
-
return identity_type is not None and
|
873
|
-
|
873
|
+
return (identity_type is not None and
|
874
|
+
identity_type.can_credential_expire())
|
874
875
|
|
875
876
|
@classmethod
|
876
877
|
def _get_identity_type(cls) -> Optional[GCPIdentityType]:
|
@@ -886,7 +887,8 @@ class GCP(clouds.Cloud):
|
|
886
887
|
return GCPIdentityType.SHARED_CREDENTIALS_FILE
|
887
888
|
|
888
889
|
@classmethod
|
889
|
-
@
|
890
|
+
@annotations.lru_cache(scope='request',
|
891
|
+
maxsize=1) # Cache since getting identity is slow.
|
890
892
|
def get_user_identities(cls) -> List[List[str]]:
|
891
893
|
"""Returns the email address + project id of the active user."""
|
892
894
|
try:
|
sky/clouds/ibm.py
CHANGED
@@ -7,11 +7,12 @@ import colorama
|
|
7
7
|
|
8
8
|
from sky import clouds
|
9
9
|
from sky import sky_logging
|
10
|
-
from sky import status_lib
|
11
10
|
from sky.adaptors import ibm
|
12
11
|
from sky.adaptors.ibm import CREDENTIAL_FILE
|
13
12
|
from sky.clouds import service_catalog
|
13
|
+
from sky.utils import registry
|
14
14
|
from sky.utils import resources_utils
|
15
|
+
from sky.utils import status_lib
|
15
16
|
from sky.utils import ux_utils
|
16
17
|
|
17
18
|
if typing.TYPE_CHECKING:
|
@@ -21,7 +22,7 @@ if typing.TYPE_CHECKING:
|
|
21
22
|
logger = sky_logging.init_logger(__name__)
|
22
23
|
|
23
24
|
|
24
|
-
@
|
25
|
+
@registry.CLOUD_REGISTRY.register
|
25
26
|
class IBM(clouds.Cloud):
|
26
27
|
"""IBM Web Services."""
|
27
28
|
|
@@ -167,7 +168,7 @@ class IBM(clouds.Cloud):
|
|
167
168
|
def make_deploy_resources_variables(
|
168
169
|
self,
|
169
170
|
resources: 'resources_lib.Resources',
|
170
|
-
cluster_name: resources_utils.ClusterName,
|
171
|
+
cluster_name: 'resources_utils.ClusterName',
|
171
172
|
region: 'clouds.Region',
|
172
173
|
zones: Optional[List['clouds.Zone']],
|
173
174
|
num_nodes: int,
|
@@ -252,10 +253,10 @@ class IBM(clouds.Cloud):
|
|
252
253
|
|
253
254
|
@classmethod
|
254
255
|
def get_default_instance_type(
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
256
|
+
cls,
|
257
|
+
cpus: Optional[str] = None,
|
258
|
+
memory: Optional[str] = None,
|
259
|
+
disk_tier: Optional['resources_utils.DiskTier'] = None
|
259
260
|
) -> Optional[str]:
|
260
261
|
return service_catalog.get_default_instance_type(cpus=cpus,
|
261
262
|
memory=memory,
|
sky/clouds/kubernetes.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
"""Kubernetes."""
|
2
|
-
import functools
|
3
2
|
import os
|
4
3
|
import re
|
5
4
|
import typing
|
@@ -14,7 +13,9 @@ from sky.provision import instance_setup
|
|
14
13
|
from sky.provision.kubernetes import network_utils
|
15
14
|
from sky.provision.kubernetes import utils as kubernetes_utils
|
16
15
|
from sky.skylet import constants
|
16
|
+
from sky.utils import annotations
|
17
17
|
from sky.utils import common_utils
|
18
|
+
from sky.utils import registry
|
18
19
|
from sky.utils import resources_utils
|
19
20
|
from sky.utils import schemas
|
20
21
|
|
@@ -34,7 +35,7 @@ CREDENTIAL_PATH = os.environ.get('KUBECONFIG', DEFAULT_KUBECONFIG_PATH)
|
|
34
35
|
_SKYPILOT_SYSTEM_NAMESPACE = 'skypilot-system'
|
35
36
|
|
36
37
|
|
37
|
-
@
|
38
|
+
@registry.CLOUD_REGISTRY.register(aliases=['k8s'])
|
38
39
|
class Kubernetes(clouds.Cloud):
|
39
40
|
"""Kubernetes."""
|
40
41
|
|
@@ -82,7 +83,7 @@ class Kubernetes(clouds.Cloud):
|
|
82
83
|
# Use a fresh user hash to avoid conflicts in the secret object naming.
|
83
84
|
# This can happen when the controller is reusing the same user hash
|
84
85
|
# through USER_ID_ENV_VAR but has a different SSH key.
|
85
|
-
fresh_user_hash = common_utils.
|
86
|
+
fresh_user_hash = common_utils.generate_user_hash()
|
86
87
|
return f'ssh-publickey-{fresh_user_hash}'
|
87
88
|
|
88
89
|
@classmethod
|
@@ -116,7 +117,7 @@ class Kubernetes(clouds.Cloud):
|
|
116
117
|
return cls._MAX_CLUSTER_NAME_LEN_LIMIT
|
117
118
|
|
118
119
|
@classmethod
|
119
|
-
@
|
120
|
+
@annotations.lru_cache(scope='global', maxsize=1)
|
120
121
|
def _log_skipped_contexts_once(cls, skipped_contexts: Tuple[str,
|
121
122
|
...]) -> None:
|
122
123
|
"""Log skipped contexts for only once.
|
@@ -240,7 +241,7 @@ class Kubernetes(clouds.Cloud):
|
|
240
241
|
cls,
|
241
242
|
cpus: Optional[str] = None,
|
242
243
|
memory: Optional[str] = None,
|
243
|
-
disk_tier: Optional[resources_utils.DiskTier] = None) -> str:
|
244
|
+
disk_tier: Optional['resources_utils.DiskTier'] = None) -> str:
|
244
245
|
# TODO(romilb): In the future, we may want to move the instance type
|
245
246
|
# selection + availability checking to a kubernetes_catalog module.
|
246
247
|
del disk_tier # Unused.
|
@@ -330,7 +331,7 @@ class Kubernetes(clouds.Cloud):
|
|
330
331
|
def make_deploy_resources_variables(
|
331
332
|
self,
|
332
333
|
resources: 'resources_lib.Resources',
|
333
|
-
cluster_name: resources_utils.ClusterName,
|
334
|
+
cluster_name: 'resources_utils.ClusterName',
|
334
335
|
region: Optional['clouds.Region'],
|
335
336
|
zones: Optional[List['clouds.Zone']],
|
336
337
|
num_nodes: int,
|
sky/clouds/lambda_cloud.py
CHANGED
@@ -5,10 +5,11 @@ from typing import Dict, Iterator, List, Optional, Tuple, Union
|
|
5
5
|
import requests
|
6
6
|
|
7
7
|
from sky import clouds
|
8
|
-
from sky import status_lib
|
9
8
|
from sky.clouds import service_catalog
|
10
9
|
from sky.provision.lambda_cloud import lambda_utils
|
10
|
+
from sky.utils import registry
|
11
11
|
from sky.utils import resources_utils
|
12
|
+
from sky.utils import status_lib
|
12
13
|
|
13
14
|
if typing.TYPE_CHECKING:
|
14
15
|
# Renaming to avoid shadowing variables.
|
@@ -20,7 +21,7 @@ _CREDENTIAL_FILES = [
|
|
20
21
|
]
|
21
22
|
|
22
23
|
|
23
|
-
@
|
24
|
+
@registry.CLOUD_REGISTRY.register
|
24
25
|
class Lambda(clouds.Cloud):
|
25
26
|
"""Lambda Labs GPU Cloud."""
|
26
27
|
|
@@ -121,10 +122,10 @@ class Lambda(clouds.Cloud):
|
|
121
122
|
|
122
123
|
@classmethod
|
123
124
|
def get_default_instance_type(
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
125
|
+
cls,
|
126
|
+
cpus: Optional[str] = None,
|
127
|
+
memory: Optional[str] = None,
|
128
|
+
disk_tier: Optional['resources_utils.DiskTier'] = None
|
128
129
|
) -> Optional[str]:
|
129
130
|
return service_catalog.get_default_instance_type(cpus=cpus,
|
130
131
|
memory=memory,
|
@@ -154,7 +155,7 @@ class Lambda(clouds.Cloud):
|
|
154
155
|
def make_deploy_resources_variables(
|
155
156
|
self,
|
156
157
|
resources: 'resources_lib.Resources',
|
157
|
-
cluster_name: resources_utils.ClusterName,
|
158
|
+
cluster_name: 'resources_utils.ClusterName',
|
158
159
|
region: 'clouds.Region',
|
159
160
|
zones: Optional[List['clouds.Zone']],
|
160
161
|
num_nodes: int,
|
sky/clouds/oci.py
CHANGED
@@ -9,7 +9,7 @@ History:
|
|
9
9
|
file path resolution (by os.path.expanduser) when construct the file
|
10
10
|
mounts. This bug will cause the created workder nodes located in different
|
11
11
|
compartment and VCN than the header node if user specifies compartment_id
|
12
|
-
in the sky config file, because the ~/.sky/config is not sync-ed to the
|
12
|
+
in the sky config file, because the ~/.sky/config.yaml is not sync-ed to the
|
13
13
|
remote machine.
|
14
14
|
The workaround is set the sky config file path using ENV before running
|
15
15
|
the sky launch: export SKYPILOT_CONFIG=/home/ubuntu/.sky/config.yaml
|
@@ -27,13 +27,14 @@ from typing import Dict, Iterator, List, Optional, Tuple, Union
|
|
27
27
|
|
28
28
|
from sky import clouds
|
29
29
|
from sky import exceptions
|
30
|
-
from sky import status_lib
|
31
30
|
from sky.adaptors import oci as oci_adaptor
|
32
31
|
from sky.clouds import service_catalog
|
33
32
|
from sky.clouds.utils import oci_utils
|
34
33
|
from sky.provision.oci.query_utils import query_helper
|
35
34
|
from sky.utils import common_utils
|
35
|
+
from sky.utils import registry
|
36
36
|
from sky.utils import resources_utils
|
37
|
+
from sky.utils import status_lib
|
37
38
|
from sky.utils import ux_utils
|
38
39
|
|
39
40
|
if typing.TYPE_CHECKING:
|
@@ -45,7 +46,7 @@ logger = logging.getLogger(__name__)
|
|
45
46
|
_tenancy_prefix: Optional[str] = None
|
46
47
|
|
47
48
|
|
48
|
-
@
|
49
|
+
@registry.CLOUD_REGISTRY.register
|
49
50
|
class OCI(clouds.Cloud):
|
50
51
|
"""OCI: Oracle Cloud Infrastructure """
|
51
52
|
|
sky/clouds/paperspace.py
CHANGED
@@ -8,6 +8,7 @@ import requests
|
|
8
8
|
from sky import clouds
|
9
9
|
from sky.clouds import service_catalog
|
10
10
|
from sky.provision.paperspace import utils
|
11
|
+
from sky.utils import registry
|
11
12
|
from sky.utils import resources_utils
|
12
13
|
|
13
14
|
if typing.TYPE_CHECKING:
|
@@ -19,7 +20,7 @@ _CREDENTIAL_FILES = [
|
|
19
20
|
]
|
20
21
|
|
21
22
|
|
22
|
-
@
|
23
|
+
@registry.CLOUD_REGISTRY.register
|
23
24
|
class Paperspace(clouds.Cloud):
|
24
25
|
"""Paperspace GPU Cloud"""
|
25
26
|
|
sky/clouds/runpod.py
CHANGED
@@ -5,6 +5,7 @@ from typing import Dict, Iterator, List, Optional, Tuple, Union
|
|
5
5
|
|
6
6
|
from sky import clouds
|
7
7
|
from sky.clouds import service_catalog
|
8
|
+
from sky.utils import registry
|
8
9
|
from sky.utils import resources_utils
|
9
10
|
|
10
11
|
if typing.TYPE_CHECKING:
|
@@ -15,7 +16,7 @@ _CREDENTIAL_FILES = [
|
|
15
16
|
]
|
16
17
|
|
17
18
|
|
18
|
-
@
|
19
|
+
@registry.CLOUD_REGISTRY.register
|
19
20
|
class RunPod(clouds.Cloud):
|
20
21
|
""" RunPod GPU Cloud
|
21
22
|
|
sky/clouds/scp.py
CHANGED
@@ -10,10 +10,11 @@ from typing import Dict, Iterator, List, Optional, Tuple, Union
|
|
10
10
|
from sky import clouds
|
11
11
|
from sky import exceptions
|
12
12
|
from sky import sky_logging
|
13
|
-
from sky import status_lib
|
14
13
|
from sky.clouds import service_catalog
|
15
14
|
from sky.clouds.utils import scp_utils
|
15
|
+
from sky.utils import registry
|
16
16
|
from sky.utils import resources_utils
|
17
|
+
from sky.utils import status_lib
|
17
18
|
|
18
19
|
if typing.TYPE_CHECKING:
|
19
20
|
# Renaming to avoid shadowing variables.
|
@@ -29,7 +30,7 @@ _SCP_MIN_DISK_SIZE_GB = 100
|
|
29
30
|
_SCP_MAX_DISK_SIZE_GB = 300
|
30
31
|
|
31
32
|
|
32
|
-
@
|
33
|
+
@registry.CLOUD_REGISTRY.register
|
33
34
|
class SCP(clouds.Cloud):
|
34
35
|
"""SCP Cloud."""
|
35
36
|
|
@@ -145,10 +146,10 @@ class SCP(clouds.Cloud):
|
|
145
146
|
|
146
147
|
@classmethod
|
147
148
|
def get_default_instance_type(
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
149
|
+
cls,
|
150
|
+
cpus: Optional[str] = None,
|
151
|
+
memory: Optional[str] = None,
|
152
|
+
disk_tier: Optional['resources_utils.DiskTier'] = None
|
152
153
|
) -> Optional[str]:
|
153
154
|
return service_catalog.get_default_instance_type(cpus=cpus,
|
154
155
|
memory=memory,
|
@@ -178,7 +179,7 @@ class SCP(clouds.Cloud):
|
|
178
179
|
def make_deploy_resources_variables(
|
179
180
|
self,
|
180
181
|
resources: 'resources_lib.Resources',
|
181
|
-
cluster_name: resources_utils.ClusterName,
|
182
|
+
cluster_name: 'resources_utils.ClusterName',
|
182
183
|
region: 'clouds.Region',
|
183
184
|
zones: Optional[List['clouds.Zone']],
|
184
185
|
num_nodes: int,
|
@@ -67,7 +67,7 @@ def list_accelerators(
|
|
67
67
|
all_regions: bool = False,
|
68
68
|
require_price: bool = True,
|
69
69
|
) -> 'Dict[str, List[common.InstanceTypeInfo]]':
|
70
|
-
"""
|
70
|
+
"""Lists the names of all accelerators offered by Sky.
|
71
71
|
|
72
72
|
This will include all accelerators offered by Sky, including those
|
73
73
|
that may not be available in the user's account.
|
@@ -95,7 +95,7 @@ def list_accelerator_counts(
|
|
95
95
|
quantity_filter: Optional[int] = None,
|
96
96
|
clouds: CloudFilter = None,
|
97
97
|
) -> Dict[str, List[int]]:
|
98
|
-
"""
|
98
|
+
"""Lists all accelerators offered by Sky and available counts.
|
99
99
|
|
100
100
|
Returns: A dictionary of canonical accelerator names mapped to a list
|
101
101
|
of available counts. See usage in cli.py.
|
@@ -129,7 +129,7 @@ def list_accelerator_realtime(
|
|
129
129
|
clouds: CloudFilter = None,
|
130
130
|
case_sensitive: bool = True,
|
131
131
|
) -> Tuple[Dict[str, List[int]], Dict[str, int], Dict[str, int]]:
|
132
|
-
"""
|
132
|
+
"""Lists all accelerators offered by Sky with their realtime availability.
|
133
133
|
|
134
134
|
Realtime availability is the total number of accelerators in the cluster
|
135
135
|
and number of accelerators available at the time of the call.
|
@@ -128,7 +128,13 @@ def _fetch_and_apply_az_mapping(df: common.LazyDataFrame) -> 'pd.DataFrame':
|
|
128
128
|
assert user_identity_list, user_identity_list
|
129
129
|
user_identity = user_identity_list[0]
|
130
130
|
aws_user_hash = hashlib.md5(user_identity.encode()).hexdigest()[:8]
|
131
|
-
except exceptions.CloudUserIdentityError:
|
131
|
+
except (exceptions.CloudUserIdentityError, ImportError):
|
132
|
+
# If failed to get user identity, or import aws dependencies, we use the
|
133
|
+
# latest mapping file or the default mapping file.
|
134
|
+
# The import error can happen on the client side when the user does not
|
135
|
+
# have AWS dependencies installed.
|
136
|
+
# TODO(zhwu): we should avoid the dependency of the availability zone
|
137
|
+
# mapping so as to get rid of the import error.
|
132
138
|
glob_name = common.get_catalog_path('aws/az_mappings-*.csv')
|
133
139
|
# Find the most recent file that matches the glob.
|
134
140
|
# We check the existing files because the user could remove the
|