skypilot-nightly 1.0.0.dev2024053101__py3-none-any.whl → 1.0.0.dev2025022801__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +64 -32
- sky/adaptors/aws.py +23 -6
- sky/adaptors/azure.py +432 -15
- sky/adaptors/cloudflare.py +5 -5
- sky/adaptors/common.py +19 -9
- sky/adaptors/do.py +20 -0
- sky/adaptors/gcp.py +3 -2
- sky/adaptors/kubernetes.py +122 -88
- sky/adaptors/nebius.py +100 -0
- sky/adaptors/oci.py +39 -1
- sky/adaptors/vast.py +29 -0
- sky/admin_policy.py +101 -0
- sky/authentication.py +117 -98
- sky/backends/backend.py +52 -20
- sky/backends/backend_utils.py +669 -557
- sky/backends/cloud_vm_ray_backend.py +1099 -808
- sky/backends/local_docker_backend.py +14 -8
- sky/backends/wheel_utils.py +38 -20
- sky/benchmark/benchmark_utils.py +22 -23
- sky/check.py +76 -27
- sky/cli.py +1586 -1139
- sky/client/__init__.py +1 -0
- sky/client/cli.py +5683 -0
- sky/client/common.py +345 -0
- sky/client/sdk.py +1765 -0
- sky/cloud_stores.py +283 -19
- sky/clouds/__init__.py +7 -2
- sky/clouds/aws.py +303 -112
- sky/clouds/azure.py +185 -179
- sky/clouds/cloud.py +115 -37
- sky/clouds/cudo.py +29 -22
- sky/clouds/do.py +313 -0
- sky/clouds/fluidstack.py +44 -54
- sky/clouds/gcp.py +206 -65
- sky/clouds/ibm.py +26 -21
- sky/clouds/kubernetes.py +345 -91
- sky/clouds/lambda_cloud.py +40 -29
- sky/clouds/nebius.py +297 -0
- sky/clouds/oci.py +129 -90
- sky/clouds/paperspace.py +22 -18
- sky/clouds/runpod.py +53 -34
- sky/clouds/scp.py +28 -24
- sky/clouds/service_catalog/__init__.py +19 -13
- sky/clouds/service_catalog/aws_catalog.py +29 -12
- sky/clouds/service_catalog/azure_catalog.py +33 -6
- sky/clouds/service_catalog/common.py +95 -75
- sky/clouds/service_catalog/constants.py +3 -3
- sky/clouds/service_catalog/cudo_catalog.py +13 -3
- sky/clouds/service_catalog/data_fetchers/fetch_aws.py +36 -21
- sky/clouds/service_catalog/data_fetchers/fetch_azure.py +31 -4
- sky/clouds/service_catalog/data_fetchers/fetch_cudo.py +8 -117
- sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py +197 -44
- sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +224 -36
- sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py +44 -24
- sky/clouds/service_catalog/data_fetchers/fetch_vast.py +147 -0
- sky/clouds/service_catalog/data_fetchers/fetch_vsphere.py +1 -1
- sky/clouds/service_catalog/do_catalog.py +111 -0
- sky/clouds/service_catalog/fluidstack_catalog.py +2 -2
- sky/clouds/service_catalog/gcp_catalog.py +16 -2
- sky/clouds/service_catalog/ibm_catalog.py +2 -2
- sky/clouds/service_catalog/kubernetes_catalog.py +192 -70
- sky/clouds/service_catalog/lambda_catalog.py +8 -3
- sky/clouds/service_catalog/nebius_catalog.py +116 -0
- sky/clouds/service_catalog/oci_catalog.py +31 -4
- sky/clouds/service_catalog/paperspace_catalog.py +2 -2
- sky/clouds/service_catalog/runpod_catalog.py +2 -2
- sky/clouds/service_catalog/scp_catalog.py +2 -2
- sky/clouds/service_catalog/vast_catalog.py +104 -0
- sky/clouds/service_catalog/vsphere_catalog.py +2 -2
- sky/clouds/utils/aws_utils.py +65 -0
- sky/clouds/utils/azure_utils.py +91 -0
- sky/clouds/utils/gcp_utils.py +5 -9
- sky/clouds/utils/oci_utils.py +47 -5
- sky/clouds/utils/scp_utils.py +4 -3
- sky/clouds/vast.py +280 -0
- sky/clouds/vsphere.py +22 -18
- sky/core.py +361 -107
- sky/dag.py +41 -28
- sky/data/data_transfer.py +37 -0
- sky/data/data_utils.py +211 -32
- sky/data/mounting_utils.py +182 -30
- sky/data/storage.py +2118 -270
- sky/data/storage_utils.py +126 -5
- sky/exceptions.py +179 -8
- sky/execution.py +158 -85
- sky/global_user_state.py +150 -34
- sky/jobs/__init__.py +12 -10
- sky/jobs/client/__init__.py +0 -0
- sky/jobs/client/sdk.py +302 -0
- sky/jobs/constants.py +49 -11
- sky/jobs/controller.py +161 -99
- sky/jobs/dashboard/dashboard.py +171 -25
- sky/jobs/dashboard/templates/index.html +572 -60
- sky/jobs/recovery_strategy.py +157 -156
- sky/jobs/scheduler.py +307 -0
- sky/jobs/server/__init__.py +1 -0
- sky/jobs/server/core.py +598 -0
- sky/jobs/server/dashboard_utils.py +69 -0
- sky/jobs/server/server.py +190 -0
- sky/jobs/state.py +627 -122
- sky/jobs/utils.py +615 -206
- sky/models.py +27 -0
- sky/optimizer.py +142 -83
- sky/provision/__init__.py +20 -5
- sky/provision/aws/config.py +124 -42
- sky/provision/aws/instance.py +130 -53
- sky/provision/azure/__init__.py +7 -0
- sky/{skylet/providers → provision}/azure/azure-config-template.json +19 -7
- sky/provision/azure/config.py +220 -0
- sky/provision/azure/instance.py +1012 -37
- sky/provision/common.py +31 -3
- sky/provision/constants.py +25 -0
- sky/provision/cudo/__init__.py +2 -1
- sky/provision/cudo/cudo_utils.py +112 -0
- sky/provision/cudo/cudo_wrapper.py +37 -16
- sky/provision/cudo/instance.py +28 -12
- sky/provision/do/__init__.py +11 -0
- sky/provision/do/config.py +14 -0
- sky/provision/do/constants.py +10 -0
- sky/provision/do/instance.py +287 -0
- sky/provision/do/utils.py +301 -0
- sky/provision/docker_utils.py +82 -46
- sky/provision/fluidstack/fluidstack_utils.py +57 -125
- sky/provision/fluidstack/instance.py +15 -43
- sky/provision/gcp/config.py +19 -9
- sky/provision/gcp/constants.py +7 -1
- sky/provision/gcp/instance.py +55 -34
- sky/provision/gcp/instance_utils.py +339 -80
- sky/provision/gcp/mig_utils.py +210 -0
- sky/provision/instance_setup.py +172 -133
- sky/provision/kubernetes/__init__.py +1 -0
- sky/provision/kubernetes/config.py +104 -90
- sky/provision/kubernetes/constants.py +8 -0
- sky/provision/kubernetes/instance.py +680 -325
- sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml +3 -0
- sky/provision/kubernetes/network.py +54 -20
- sky/provision/kubernetes/network_utils.py +70 -21
- sky/provision/kubernetes/utils.py +1370 -251
- sky/provision/lambda_cloud/__init__.py +11 -0
- sky/provision/lambda_cloud/config.py +10 -0
- sky/provision/lambda_cloud/instance.py +265 -0
- sky/{clouds/utils → provision/lambda_cloud}/lambda_utils.py +24 -23
- sky/provision/logging.py +1 -1
- sky/provision/nebius/__init__.py +11 -0
- sky/provision/nebius/config.py +11 -0
- sky/provision/nebius/instance.py +285 -0
- sky/provision/nebius/utils.py +318 -0
- sky/provision/oci/__init__.py +15 -0
- sky/provision/oci/config.py +51 -0
- sky/provision/oci/instance.py +436 -0
- sky/provision/oci/query_utils.py +681 -0
- sky/provision/paperspace/constants.py +6 -0
- sky/provision/paperspace/instance.py +4 -3
- sky/provision/paperspace/utils.py +2 -0
- sky/provision/provisioner.py +207 -130
- sky/provision/runpod/__init__.py +1 -0
- sky/provision/runpod/api/__init__.py +3 -0
- sky/provision/runpod/api/commands.py +119 -0
- sky/provision/runpod/api/pods.py +142 -0
- sky/provision/runpod/instance.py +64 -8
- sky/provision/runpod/utils.py +239 -23
- sky/provision/vast/__init__.py +10 -0
- sky/provision/vast/config.py +11 -0
- sky/provision/vast/instance.py +247 -0
- sky/provision/vast/utils.py +162 -0
- sky/provision/vsphere/common/vim_utils.py +1 -1
- sky/provision/vsphere/instance.py +8 -18
- sky/provision/vsphere/vsphere_utils.py +1 -1
- sky/resources.py +247 -102
- sky/serve/__init__.py +9 -9
- sky/serve/autoscalers.py +361 -299
- sky/serve/client/__init__.py +0 -0
- sky/serve/client/sdk.py +366 -0
- sky/serve/constants.py +12 -3
- sky/serve/controller.py +106 -36
- sky/serve/load_balancer.py +63 -12
- sky/serve/load_balancing_policies.py +84 -2
- sky/serve/replica_managers.py +42 -34
- sky/serve/serve_state.py +62 -32
- sky/serve/serve_utils.py +271 -160
- sky/serve/server/__init__.py +0 -0
- sky/serve/{core.py → server/core.py} +271 -90
- sky/serve/server/server.py +112 -0
- sky/serve/service.py +52 -16
- sky/serve/service_spec.py +95 -32
- sky/server/__init__.py +1 -0
- sky/server/common.py +430 -0
- sky/server/constants.py +21 -0
- sky/server/html/log.html +174 -0
- sky/server/requests/__init__.py +0 -0
- sky/server/requests/executor.py +472 -0
- sky/server/requests/payloads.py +487 -0
- sky/server/requests/queues/__init__.py +0 -0
- sky/server/requests/queues/mp_queue.py +76 -0
- sky/server/requests/requests.py +567 -0
- sky/server/requests/serializers/__init__.py +0 -0
- sky/server/requests/serializers/decoders.py +192 -0
- sky/server/requests/serializers/encoders.py +166 -0
- sky/server/server.py +1106 -0
- sky/server/stream_utils.py +141 -0
- sky/setup_files/MANIFEST.in +2 -5
- sky/setup_files/dependencies.py +159 -0
- sky/setup_files/setup.py +14 -125
- sky/sky_logging.py +59 -14
- sky/skylet/autostop_lib.py +2 -2
- sky/skylet/constants.py +183 -50
- sky/skylet/events.py +22 -10
- sky/skylet/job_lib.py +403 -258
- sky/skylet/log_lib.py +111 -71
- sky/skylet/log_lib.pyi +6 -0
- sky/skylet/providers/command_runner.py +6 -8
- sky/skylet/providers/ibm/node_provider.py +2 -2
- sky/skylet/providers/scp/config.py +11 -3
- sky/skylet/providers/scp/node_provider.py +8 -8
- sky/skylet/skylet.py +3 -1
- sky/skylet/subprocess_daemon.py +69 -17
- sky/skypilot_config.py +119 -57
- sky/task.py +205 -64
- sky/templates/aws-ray.yml.j2 +37 -7
- sky/templates/azure-ray.yml.j2 +27 -82
- sky/templates/cudo-ray.yml.j2 +7 -3
- sky/templates/do-ray.yml.j2 +98 -0
- sky/templates/fluidstack-ray.yml.j2 +7 -4
- sky/templates/gcp-ray.yml.j2 +26 -6
- sky/templates/ibm-ray.yml.j2 +3 -2
- sky/templates/jobs-controller.yaml.j2 +46 -11
- sky/templates/kubernetes-ingress.yml.j2 +7 -0
- sky/templates/kubernetes-loadbalancer.yml.j2 +7 -0
- sky/templates/{kubernetes-port-forward-proxy-command.sh.j2 → kubernetes-port-forward-proxy-command.sh} +51 -7
- sky/templates/kubernetes-ray.yml.j2 +292 -25
- sky/templates/lambda-ray.yml.j2 +30 -40
- sky/templates/nebius-ray.yml.j2 +79 -0
- sky/templates/oci-ray.yml.j2 +18 -57
- sky/templates/paperspace-ray.yml.j2 +10 -6
- sky/templates/runpod-ray.yml.j2 +26 -4
- sky/templates/scp-ray.yml.j2 +3 -2
- sky/templates/sky-serve-controller.yaml.j2 +12 -1
- sky/templates/skypilot-server-kubernetes-proxy.sh +36 -0
- sky/templates/vast-ray.yml.j2 +70 -0
- sky/templates/vsphere-ray.yml.j2 +8 -3
- sky/templates/websocket_proxy.py +64 -0
- sky/usage/constants.py +10 -1
- sky/usage/usage_lib.py +130 -37
- sky/utils/accelerator_registry.py +35 -51
- sky/utils/admin_policy_utils.py +147 -0
- sky/utils/annotations.py +51 -0
- sky/utils/cli_utils/status_utils.py +81 -23
- sky/utils/cluster_utils.py +356 -0
- sky/utils/command_runner.py +452 -89
- sky/utils/command_runner.pyi +77 -3
- sky/utils/common.py +54 -0
- sky/utils/common_utils.py +319 -108
- sky/utils/config_utils.py +204 -0
- sky/utils/control_master_utils.py +48 -0
- sky/utils/controller_utils.py +548 -266
- sky/utils/dag_utils.py +93 -32
- sky/utils/db_utils.py +18 -4
- sky/utils/env_options.py +29 -7
- sky/utils/kubernetes/create_cluster.sh +8 -60
- sky/utils/kubernetes/deploy_remote_cluster.sh +243 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +73 -0
- sky/utils/kubernetes/generate_kubeconfig.sh +336 -0
- sky/utils/kubernetes/gpu_labeler.py +4 -4
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +4 -3
- sky/utils/kubernetes/kubernetes_deploy_utils.py +228 -0
- sky/utils/kubernetes/rsync_helper.sh +24 -0
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +1 -1
- sky/utils/log_utils.py +240 -33
- sky/utils/message_utils.py +81 -0
- sky/utils/registry.py +127 -0
- sky/utils/resources_utils.py +94 -22
- sky/utils/rich_utils.py +247 -18
- sky/utils/schemas.py +284 -64
- sky/{status_lib.py → utils/status_lib.py} +12 -7
- sky/utils/subprocess_utils.py +212 -46
- sky/utils/timeline.py +12 -7
- sky/utils/ux_utils.py +168 -15
- skypilot_nightly-1.0.0.dev2025022801.dist-info/METADATA +363 -0
- skypilot_nightly-1.0.0.dev2025022801.dist-info/RECORD +352 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/WHEEL +1 -1
- sky/clouds/cloud_registry.py +0 -31
- sky/jobs/core.py +0 -330
- sky/skylet/providers/azure/__init__.py +0 -2
- sky/skylet/providers/azure/azure-vm-template.json +0 -301
- sky/skylet/providers/azure/config.py +0 -170
- sky/skylet/providers/azure/node_provider.py +0 -466
- sky/skylet/providers/lambda_cloud/__init__.py +0 -2
- sky/skylet/providers/lambda_cloud/node_provider.py +0 -320
- sky/skylet/providers/oci/__init__.py +0 -2
- sky/skylet/providers/oci/node_provider.py +0 -488
- sky/skylet/providers/oci/query_helper.py +0 -383
- sky/skylet/providers/oci/utils.py +0 -21
- sky/utils/cluster_yaml_utils.py +0 -24
- sky/utils/kubernetes/generate_static_kubeconfig.sh +0 -137
- skypilot_nightly-1.0.0.dev2024053101.dist-info/METADATA +0 -315
- skypilot_nightly-1.0.0.dev2024053101.dist-info/RECORD +0 -275
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,116 @@
|
|
1
|
+
"""Nebius Catalog.
|
2
|
+
|
3
|
+
This module loads the service catalog file and can be used to query
|
4
|
+
instance types and pricing information for Nebius.
|
5
|
+
"""
|
6
|
+
import typing
|
7
|
+
from typing import Dict, List, Optional, Tuple, Union
|
8
|
+
|
9
|
+
from sky.clouds.service_catalog import common
|
10
|
+
from sky.utils import resources_utils
|
11
|
+
from sky.utils import ux_utils
|
12
|
+
|
13
|
+
if typing.TYPE_CHECKING:
|
14
|
+
from sky.clouds import cloud
|
15
|
+
|
16
|
+
# Keep it synced with the frequency in
|
17
|
+
# skypilot-catalog/.github/workflows/update-Nebius-catalog.yml
|
18
|
+
_PULL_FREQUENCY_HOURS = 7
|
19
|
+
|
20
|
+
_df = common.read_catalog('nebius/vms.csv')
|
21
|
+
|
22
|
+
|
23
|
+
def instance_type_exists(instance_type: str) -> bool:
|
24
|
+
return common.instance_type_exists_impl(_df, instance_type)
|
25
|
+
|
26
|
+
|
27
|
+
def validate_region_zone(
|
28
|
+
region: Optional[str],
|
29
|
+
zone: Optional[str]) -> Tuple[Optional[str], Optional[str]]:
|
30
|
+
if zone is not None:
|
31
|
+
with ux_utils.print_exception_no_traceback():
|
32
|
+
raise ValueError('Nebius does not support zones.')
|
33
|
+
return common.validate_region_zone_impl('nebius', _df, region, zone)
|
34
|
+
|
35
|
+
|
36
|
+
def get_hourly_cost(instance_type: str,
|
37
|
+
use_spot: bool = False,
|
38
|
+
region: Optional[str] = None,
|
39
|
+
zone: Optional[str] = None) -> float:
|
40
|
+
"""Returns the cost, or the cheapest cost among all zones for spot."""
|
41
|
+
assert not use_spot, 'Nebius does not support spot.'
|
42
|
+
if zone is not None:
|
43
|
+
with ux_utils.print_exception_no_traceback():
|
44
|
+
raise ValueError('Nebius does not support zones.')
|
45
|
+
return common.get_hourly_cost_impl(_df, instance_type, use_spot, region,
|
46
|
+
zone)
|
47
|
+
|
48
|
+
|
49
|
+
def get_vcpus_mem_from_instance_type(
|
50
|
+
instance_type: str) -> Tuple[Optional[float], Optional[float]]:
|
51
|
+
return common.get_vcpus_mem_from_instance_type_impl(_df, instance_type)
|
52
|
+
|
53
|
+
|
54
|
+
def get_default_instance_type(
|
55
|
+
cpus: Optional[str] = None,
|
56
|
+
memory: Optional[str] = None,
|
57
|
+
disk_tier: Optional[resources_utils.DiskTier] = None) -> Optional[str]:
|
58
|
+
del disk_tier # unused
|
59
|
+
return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory)
|
60
|
+
|
61
|
+
|
62
|
+
def get_accelerators_from_instance_type(
|
63
|
+
instance_type: str) -> Optional[Dict[str, Union[int, float]]]:
|
64
|
+
return common.get_accelerators_from_instance_type_impl(_df, instance_type)
|
65
|
+
|
66
|
+
|
67
|
+
def get_instance_type_for_accelerator(
|
68
|
+
acc_name: str,
|
69
|
+
acc_count: int,
|
70
|
+
cpus: Optional[str] = None,
|
71
|
+
memory: Optional[str] = None,
|
72
|
+
use_spot: bool = False,
|
73
|
+
region: Optional[str] = None,
|
74
|
+
zone: Optional[str] = None) -> Tuple[Optional[List[str]], List[str]]:
|
75
|
+
"""Filter the instance types based on resource requirements.
|
76
|
+
|
77
|
+
Returns a list of instance types satisfying the required count of
|
78
|
+
accelerators with sorted prices and a list of candidates with fuzzy search.
|
79
|
+
"""
|
80
|
+
if zone is not None:
|
81
|
+
with ux_utils.print_exception_no_traceback():
|
82
|
+
raise ValueError('Nebius does not support zones.')
|
83
|
+
return common.get_instance_type_for_accelerator_impl(df=_df,
|
84
|
+
acc_name=acc_name,
|
85
|
+
acc_count=acc_count,
|
86
|
+
cpus=cpus,
|
87
|
+
memory=memory,
|
88
|
+
use_spot=use_spot,
|
89
|
+
region=region,
|
90
|
+
zone=zone)
|
91
|
+
|
92
|
+
|
93
|
+
def regions() -> List['cloud.Region']:
|
94
|
+
return common.get_region_zones(_df, use_spot=False)
|
95
|
+
|
96
|
+
|
97
|
+
def get_region_zones_for_instance_type(instance_type: str,
|
98
|
+
use_spot: bool) -> List['cloud.Region']:
|
99
|
+
df = _df[_df['InstanceType'] == instance_type]
|
100
|
+
return common.get_region_zones(df, use_spot)
|
101
|
+
|
102
|
+
|
103
|
+
def list_accelerators(
|
104
|
+
gpus_only: bool,
|
105
|
+
name_filter: Optional[str],
|
106
|
+
region_filter: Optional[str],
|
107
|
+
quantity_filter: Optional[int],
|
108
|
+
case_sensitive: bool = True,
|
109
|
+
all_regions: bool = False,
|
110
|
+
require_price: bool = True) -> Dict[str, List[common.InstanceTypeInfo]]:
|
111
|
+
"""Returns all instance types in Nebius offering GPUs."""
|
112
|
+
|
113
|
+
del require_price # Unused.
|
114
|
+
return common.list_accelerators_impl('nebius', _df, gpus_only, name_filter,
|
115
|
+
region_filter, quantity_filter,
|
116
|
+
case_sensitive, all_regions)
|
@@ -7,14 +7,17 @@ History:
|
|
7
7
|
- Hysun He (hysun.he@oracle.com) @ Apr, 2023: Initial implementation
|
8
8
|
- Hysun He (hysun.he@oracle.com) @ Jun, 2023: Reduce retry times by
|
9
9
|
excluding those unsubscribed regions.
|
10
|
+
- Hysun He (hysun.he@oracle.com) @ Oct 14, 2024: Bug fix for validation
|
11
|
+
of the Marketplace images
|
10
12
|
"""
|
11
13
|
|
12
14
|
import logging
|
13
15
|
import threading
|
14
16
|
import typing
|
15
|
-
from typing import Dict, List, Optional, Tuple
|
17
|
+
from typing import Dict, List, Optional, Tuple, Union
|
16
18
|
|
17
19
|
from sky.adaptors import oci as oci_adaptor
|
20
|
+
from sky.clouds import OCI
|
18
21
|
from sky.clouds.service_catalog import common
|
19
22
|
from sky.clouds.utils import oci_utils
|
20
23
|
from sky.utils import resources_utils
|
@@ -63,7 +66,7 @@ def _get_df() -> 'pd.DataFrame':
|
|
63
66
|
logger.debug(f'It is OK goes here when testing: {str(e)}')
|
64
67
|
subscribed_regions = []
|
65
68
|
|
66
|
-
except oci_adaptor.
|
69
|
+
except oci_adaptor.oci.exceptions.ServiceError as e:
|
67
70
|
# Should never expect going here. However, we still catch
|
68
71
|
# it so that if any OCI call failed, the program can still
|
69
72
|
# proceed with try-and-error way.
|
@@ -102,7 +105,6 @@ def get_default_instance_type(
|
|
102
105
|
cpus: Optional[str] = None,
|
103
106
|
memory: Optional[str] = None,
|
104
107
|
disk_tier: Optional[resources_utils.DiskTier] = None) -> Optional[str]:
|
105
|
-
del disk_tier # unused
|
106
108
|
if cpus is None:
|
107
109
|
cpus = f'{oci_utils.oci_config.DEFAULT_NUM_VCPUS}+'
|
108
110
|
|
@@ -111,12 +113,17 @@ def get_default_instance_type(
|
|
111
113
|
else:
|
112
114
|
memory_gb_or_ratio = memory
|
113
115
|
|
116
|
+
def _filter_disk_type(instance_type: str) -> bool:
|
117
|
+
valid, _ = OCI.check_disk_tier(instance_type, disk_tier)
|
118
|
+
return valid
|
119
|
+
|
114
120
|
instance_type_prefix = tuple(
|
115
121
|
f'{family}' for family in oci_utils.oci_config.DEFAULT_INSTANCE_FAMILY)
|
116
122
|
|
117
123
|
df = _get_df()
|
118
124
|
df = df[df['InstanceType'].notna()]
|
119
125
|
df = df[df['InstanceType'].str.startswith(instance_type_prefix)]
|
126
|
+
df = df.loc[df['InstanceType'].apply(_filter_disk_type)]
|
120
127
|
|
121
128
|
logger.debug(f'# get_default_instance_type: {df}')
|
122
129
|
return common.get_instance_type_for_cpus_mem_impl(df, cpus,
|
@@ -124,7 +131,7 @@ def get_default_instance_type(
|
|
124
131
|
|
125
132
|
|
126
133
|
def get_accelerators_from_instance_type(
|
127
|
-
instance_type: str) -> Optional[Dict[str, int]]:
|
134
|
+
instance_type: str) -> Optional[Dict[str, Union[int, float]]]:
|
128
135
|
return common.get_accelerators_from_instance_type_impl(
|
129
136
|
_get_df(), instance_type)
|
130
137
|
|
@@ -201,4 +208,24 @@ def get_image_id_from_tag(tag: str, region: Optional[str]) -> Optional[str]:
|
|
201
208
|
|
202
209
|
def is_image_tag_valid(tag: str, region: Optional[str]) -> bool:
|
203
210
|
"""Returns whether the image tag is valid."""
|
211
|
+
# Oct.14, 2024 by Hysun He: Marketplace images are region neutral, so don't
|
212
|
+
# check with region for the Marketplace images.
|
213
|
+
df = _image_df[_image_df['Tag'].str.fullmatch(tag)]
|
214
|
+
if df.empty:
|
215
|
+
return False
|
216
|
+
app_catalog_listing_id = df['AppCatalogListingId'].iloc[0]
|
217
|
+
if app_catalog_listing_id:
|
218
|
+
return True
|
204
219
|
return common.is_image_tag_valid_impl(_image_df, tag, region)
|
220
|
+
|
221
|
+
|
222
|
+
def get_image_os_from_tag(tag: str, region: Optional[str]) -> Optional[str]:
|
223
|
+
del region
|
224
|
+
df = _image_df[_image_df['Tag'].str.fullmatch(tag)]
|
225
|
+
if df.empty:
|
226
|
+
os_type = oci_utils.oci_config.get_default_image_os()
|
227
|
+
else:
|
228
|
+
os_type = df['OS'].iloc[0]
|
229
|
+
|
230
|
+
logger.debug(f'Operation system for the image {tag} is {os_type}')
|
231
|
+
return os_type
|
@@ -5,7 +5,7 @@ query instance types and pricing information for Paperspace.
|
|
5
5
|
"""
|
6
6
|
|
7
7
|
import typing
|
8
|
-
from typing import Dict, List, Optional, Tuple
|
8
|
+
from typing import Dict, List, Optional, Tuple, Union
|
9
9
|
|
10
10
|
from sky.clouds.service_catalog import common
|
11
11
|
from sky.utils import ux_utils
|
@@ -60,7 +60,7 @@ def get_default_instance_type(
|
|
60
60
|
|
61
61
|
|
62
62
|
def get_accelerators_from_instance_type(
|
63
|
-
instance_type: str) -> Optional[Dict[str, int]]:
|
63
|
+
instance_type: str) -> Optional[Dict[str, Union[int, float]]]:
|
64
64
|
return common.get_accelerators_from_instance_type_impl(_df, instance_type)
|
65
65
|
|
66
66
|
|
@@ -5,7 +5,7 @@ query instance types and pricing information for RunPod.
|
|
5
5
|
"""
|
6
6
|
|
7
7
|
import typing
|
8
|
-
from typing import Dict, List, Optional, Tuple
|
8
|
+
from typing import Dict, List, Optional, Tuple, Union
|
9
9
|
|
10
10
|
from sky.clouds.service_catalog import common
|
11
11
|
from sky.utils import ux_utils
|
@@ -56,7 +56,7 @@ def get_default_instance_type(cpus: Optional[str] = None,
|
|
56
56
|
|
57
57
|
|
58
58
|
def get_accelerators_from_instance_type(
|
59
|
-
instance_type: str) -> Optional[Dict[str, int]]:
|
59
|
+
instance_type: str) -> Optional[Dict[str, Union[int, float]]]:
|
60
60
|
return common.get_accelerators_from_instance_type_impl(_df, instance_type)
|
61
61
|
|
62
62
|
|
@@ -5,7 +5,7 @@ instance types and pricing information for SCP.
|
|
5
5
|
"""
|
6
6
|
|
7
7
|
import typing
|
8
|
-
from typing import Dict, List, Optional, Tuple
|
8
|
+
from typing import Dict, List, Optional, Tuple, Union
|
9
9
|
|
10
10
|
from sky.clouds.service_catalog import common
|
11
11
|
from sky.utils import resources_utils
|
@@ -67,7 +67,7 @@ def get_default_instance_type(
|
|
67
67
|
|
68
68
|
|
69
69
|
def get_accelerators_from_instance_type(
|
70
|
-
instance_type: str) -> Optional[Dict[str, int]]:
|
70
|
+
instance_type: str) -> Optional[Dict[str, Union[int, float]]]:
|
71
71
|
return common.get_accelerators_from_instance_type_impl(_df, instance_type)
|
72
72
|
|
73
73
|
|
@@ -0,0 +1,104 @@
|
|
1
|
+
""" Vast | Catalog
|
2
|
+
|
3
|
+
This module loads the service catalog file and can be used to
|
4
|
+
query instance types and pricing information for Vast.ai.
|
5
|
+
"""
|
6
|
+
|
7
|
+
import typing
|
8
|
+
from typing import Dict, List, Optional, Tuple, Union
|
9
|
+
|
10
|
+
from sky.clouds.service_catalog import common
|
11
|
+
from sky.utils import ux_utils
|
12
|
+
|
13
|
+
if typing.TYPE_CHECKING:
|
14
|
+
from sky.clouds import cloud
|
15
|
+
|
16
|
+
_df = common.read_catalog('vast/vms.csv')
|
17
|
+
|
18
|
+
|
19
|
+
def instance_type_exists(instance_type: str) -> bool:
|
20
|
+
return common.instance_type_exists_impl(_df, instance_type)
|
21
|
+
|
22
|
+
|
23
|
+
def validate_region_zone(
|
24
|
+
region: Optional[str],
|
25
|
+
zone: Optional[str]) -> Tuple[Optional[str], Optional[str]]:
|
26
|
+
if zone is not None:
|
27
|
+
with ux_utils.print_exception_no_traceback():
|
28
|
+
raise ValueError('Vast does not support zones.')
|
29
|
+
return common.validate_region_zone_impl('vast', _df, region, zone)
|
30
|
+
|
31
|
+
|
32
|
+
def get_hourly_cost(instance_type: str,
|
33
|
+
use_spot: bool = False,
|
34
|
+
region: Optional[str] = None,
|
35
|
+
zone: Optional[str] = None) -> float:
|
36
|
+
"""Returns the cost, or the cheapest cost among all zones for spot."""
|
37
|
+
if zone is not None:
|
38
|
+
with ux_utils.print_exception_no_traceback():
|
39
|
+
raise ValueError('Vast does not support zones.')
|
40
|
+
return common.get_hourly_cost_impl(_df, instance_type, use_spot, region,
|
41
|
+
zone)
|
42
|
+
|
43
|
+
|
44
|
+
def get_vcpus_mem_from_instance_type(
|
45
|
+
instance_type: str) -> Tuple[Optional[float], Optional[float]]:
|
46
|
+
return common.get_vcpus_mem_from_instance_type_impl(_df, instance_type)
|
47
|
+
|
48
|
+
|
49
|
+
def get_default_instance_type(cpus: Optional[str] = None,
|
50
|
+
memory: Optional[str] = None,
|
51
|
+
disk_tier: Optional[str] = None) -> Optional[str]:
|
52
|
+
del disk_tier
|
53
|
+
# NOTE: After expanding catalog to multiple entries, you may
|
54
|
+
# want to specify a default instance type or family.
|
55
|
+
return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory)
|
56
|
+
|
57
|
+
|
58
|
+
def get_accelerators_from_instance_type(
|
59
|
+
instance_type: str) -> Optional[Dict[str, Union[int, float]]]:
|
60
|
+
return common.get_accelerators_from_instance_type_impl(_df, instance_type)
|
61
|
+
|
62
|
+
|
63
|
+
def get_instance_type_for_accelerator(
|
64
|
+
acc_name: str,
|
65
|
+
acc_count: int,
|
66
|
+
cpus: Optional[str] = None,
|
67
|
+
memory: Optional[str] = None,
|
68
|
+
use_spot: bool = False,
|
69
|
+
region: Optional[str] = None,
|
70
|
+
zone: Optional[str] = None) -> Tuple[Optional[List[str]], List[str]]:
|
71
|
+
"""Returns a list of instance types that have the given accelerator."""
|
72
|
+
if zone is not None:
|
73
|
+
with ux_utils.print_exception_no_traceback():
|
74
|
+
raise ValueError('Vast does not support zones.')
|
75
|
+
return common.get_instance_type_for_accelerator_impl(df=_df,
|
76
|
+
acc_name=acc_name,
|
77
|
+
acc_count=acc_count,
|
78
|
+
cpus=cpus,
|
79
|
+
memory=memory,
|
80
|
+
use_spot=use_spot,
|
81
|
+
region=region,
|
82
|
+
zone=zone)
|
83
|
+
|
84
|
+
|
85
|
+
def get_region_zones_for_instance_type(instance_type: str,
|
86
|
+
use_spot: bool) -> List['cloud.Region']:
|
87
|
+
df = _df[_df['InstanceType'] == instance_type]
|
88
|
+
return common.get_region_zones(df, use_spot)
|
89
|
+
|
90
|
+
|
91
|
+
# TODO: this differs from the fluffy catalog version
|
92
|
+
def list_accelerators(
|
93
|
+
gpus_only: bool,
|
94
|
+
name_filter: Optional[str],
|
95
|
+
region_filter: Optional[str],
|
96
|
+
quantity_filter: Optional[int],
|
97
|
+
case_sensitive: bool = True,
|
98
|
+
all_regions: bool = False,
|
99
|
+
require_price: bool = True) -> Dict[str, List[common.InstanceTypeInfo]]:
|
100
|
+
"""Returns all instance types in Vast offering GPUs."""
|
101
|
+
del require_price # Unused.
|
102
|
+
return common.list_accelerators_impl('Vast', _df, gpus_only, name_filter,
|
103
|
+
region_filter, quantity_filter,
|
104
|
+
case_sensitive, all_regions)
|
@@ -2,7 +2,7 @@
|
|
2
2
|
import io
|
3
3
|
import os
|
4
4
|
import typing
|
5
|
-
from typing import Dict, List, Optional, Tuple
|
5
|
+
from typing import Dict, List, Optional, Tuple, Union
|
6
6
|
|
7
7
|
from sky.adaptors import common as adaptors_common
|
8
8
|
from sky.clouds.service_catalog import common
|
@@ -85,7 +85,7 @@ def get_default_instance_type(
|
|
85
85
|
|
86
86
|
|
87
87
|
def get_accelerators_from_instance_type(
|
88
|
-
instance_type: str) -> Optional[Dict[str, int]]:
|
88
|
+
instance_type: str) -> Optional[Dict[str, Union[int, float]]]:
|
89
89
|
return common.get_accelerators_from_instance_type_impl(
|
90
90
|
_get_df(), instance_type)
|
91
91
|
|
@@ -0,0 +1,65 @@
|
|
1
|
+
"""Utilities for AWS."""
|
2
|
+
import dataclasses
|
3
|
+
import enum
|
4
|
+
import time
|
5
|
+
from typing import List
|
6
|
+
|
7
|
+
import cachetools
|
8
|
+
|
9
|
+
from sky import skypilot_config
|
10
|
+
from sky.adaptors import aws
|
11
|
+
|
12
|
+
|
13
|
+
class ReservationType(str, enum.Enum):
|
14
|
+
DEFAULT = 'default'
|
15
|
+
BLOCK = 'capacity-block'
|
16
|
+
|
17
|
+
|
18
|
+
@dataclasses.dataclass
|
19
|
+
class AWSReservation:
|
20
|
+
name: str
|
21
|
+
instance_type: str
|
22
|
+
zone: str
|
23
|
+
available_resources: int
|
24
|
+
# Whether the reservation is targeted, i.e. can only be consumed when
|
25
|
+
# the reservation name is specified.
|
26
|
+
targeted: bool
|
27
|
+
type: ReservationType
|
28
|
+
|
29
|
+
|
30
|
+
def use_reservations() -> bool:
|
31
|
+
prioritize_reservations = skypilot_config.get_nested(
|
32
|
+
('aws', 'prioritize_reservations'), False)
|
33
|
+
specific_reservations = skypilot_config.get_nested(
|
34
|
+
('aws', 'specific_reservations'), set())
|
35
|
+
return prioritize_reservations or specific_reservations
|
36
|
+
|
37
|
+
|
38
|
+
@cachetools.cached(cache=cachetools.TTLCache(maxsize=100,
|
39
|
+
ttl=300,
|
40
|
+
timer=time.time))
|
41
|
+
def list_reservations_for_instance_type(
|
42
|
+
instance_type: str,
|
43
|
+
region: str,
|
44
|
+
) -> List[AWSReservation]:
|
45
|
+
if not use_reservations():
|
46
|
+
return []
|
47
|
+
ec2 = aws.client('ec2', region_name=region)
|
48
|
+
response = ec2.describe_capacity_reservations(Filters=[{
|
49
|
+
'Name': 'instance-type',
|
50
|
+
'Values': [instance_type]
|
51
|
+
}, {
|
52
|
+
'Name': 'state',
|
53
|
+
'Values': ['active']
|
54
|
+
}])
|
55
|
+
reservations = response['CapacityReservations']
|
56
|
+
return [
|
57
|
+
AWSReservation(name=r['CapacityReservationId'],
|
58
|
+
instance_type=r['InstanceType'],
|
59
|
+
zone=r['AvailabilityZone'],
|
60
|
+
available_resources=r['AvailableInstanceCount'],
|
61
|
+
targeted=r['InstanceMatchCriteria'] == 'targeted',
|
62
|
+
type=ReservationType(r.get('ReservationType',
|
63
|
+
'default')))
|
64
|
+
for r in reservations
|
65
|
+
]
|
@@ -0,0 +1,91 @@
|
|
1
|
+
"""Utilies for Azure"""
|
2
|
+
|
3
|
+
import typing
|
4
|
+
|
5
|
+
from sky import exceptions
|
6
|
+
from sky.adaptors import azure
|
7
|
+
from sky.utils import ux_utils
|
8
|
+
|
9
|
+
if typing.TYPE_CHECKING:
|
10
|
+
from azure.mgmt import compute as azure_compute
|
11
|
+
from azure.mgmt.compute import models as azure_compute_models
|
12
|
+
|
13
|
+
|
14
|
+
def validate_image_id(image_id: str):
|
15
|
+
"""Check if the image ID has a valid format.
|
16
|
+
|
17
|
+
Raises:
|
18
|
+
ValueError: If the image ID is invalid.
|
19
|
+
"""
|
20
|
+
image_id_colon_splitted = image_id.split(':')
|
21
|
+
image_id_slash_splitted = image_id.split('/')
|
22
|
+
if len(image_id_slash_splitted) != 5 and len(image_id_colon_splitted) != 4:
|
23
|
+
with ux_utils.print_exception_no_traceback():
|
24
|
+
raise ValueError(
|
25
|
+
f'Invalid image id for Azure: {image_id}. Expected format: \n'
|
26
|
+
'* Marketplace image ID: <publisher>:<offer>:<sku>:<version>\n'
|
27
|
+
'* Community image ID: '
|
28
|
+
'/CommunityGalleries/<gallery-name>/Images/<image-name>')
|
29
|
+
if len(image_id_slash_splitted) == 5:
|
30
|
+
_, gallery_type, _, image_type, _ = image_id.split('/')
|
31
|
+
if gallery_type != 'CommunityGalleries' or image_type != 'Images':
|
32
|
+
with ux_utils.print_exception_no_traceback():
|
33
|
+
raise ValueError(
|
34
|
+
f'Invalid community image id for Azure: {image_id}.\n'
|
35
|
+
'Expected format: '
|
36
|
+
'/CommunityGalleries/<gallery-name>/Images/<image-name>')
|
37
|
+
|
38
|
+
|
39
|
+
def get_community_image(
|
40
|
+
compute_client: 'azure_compute.ComputeManagementClient', image_id: str,
|
41
|
+
region: str) -> 'azure_compute_models.CommunityGalleryImage':
|
42
|
+
"""Get community image from cloud.
|
43
|
+
|
44
|
+
Args:
|
45
|
+
image_id: /CommunityGalleries/<gallery-name>/Images/<image-name>
|
46
|
+
Raises:
|
47
|
+
ResourcesUnavailableError
|
48
|
+
"""
|
49
|
+
try:
|
50
|
+
_, _, gallery_name, _, image_name = image_id.split('/')
|
51
|
+
return compute_client.community_gallery_images.get(
|
52
|
+
location=region,
|
53
|
+
public_gallery_name=gallery_name,
|
54
|
+
gallery_image_name=image_name)
|
55
|
+
except azure.exceptions().AzureError as e:
|
56
|
+
raise exceptions.ResourcesUnavailableError(
|
57
|
+
f'Community image {image_id} does not exist in region {region}.'
|
58
|
+
) from e
|
59
|
+
|
60
|
+
|
61
|
+
def get_community_image_size(
|
62
|
+
compute_client: 'azure_compute.ComputeManagementClient',
|
63
|
+
gallery_name: str, image_name: str, region: str) -> float:
|
64
|
+
"""Get the size of the community image from cloud.
|
65
|
+
|
66
|
+
Args:
|
67
|
+
image_id: /CommunityGalleries/<gallery-name>/Images/<image-name>
|
68
|
+
Raises:
|
69
|
+
ResourcesUnavailableError
|
70
|
+
"""
|
71
|
+
try:
|
72
|
+
image_versions = compute_client.community_gallery_image_versions.list(
|
73
|
+
location=region,
|
74
|
+
public_gallery_name=gallery_name,
|
75
|
+
gallery_image_name=image_name,
|
76
|
+
)
|
77
|
+
image_versions = list(image_versions)
|
78
|
+
if not image_versions:
|
79
|
+
raise exceptions.ResourcesUnavailableError(
|
80
|
+
f'No versions available for Azure community image {image_name}')
|
81
|
+
latest_version = image_versions[-1].name
|
82
|
+
|
83
|
+
image_details = compute_client.community_gallery_image_versions.get(
|
84
|
+
location=region,
|
85
|
+
public_gallery_name=gallery_name,
|
86
|
+
gallery_image_name=image_name,
|
87
|
+
gallery_image_version_name=latest_version)
|
88
|
+
return image_details.storage_profile.os_disk_image.disk_size_gb
|
89
|
+
except azure.exceptions().AzureError as e:
|
90
|
+
raise exceptions.ResourcesUnavailableError(
|
91
|
+
f'Failed to get community image size: {e}.') from e
|
sky/clouds/utils/gcp_utils.py
CHANGED
@@ -17,6 +17,7 @@ import cachetools
|
|
17
17
|
from sky import sky_logging
|
18
18
|
from sky import skypilot_config
|
19
19
|
from sky.provision.gcp import constants
|
20
|
+
from sky.provision.kubernetes import utils as kubernetes_utils
|
20
21
|
from sky.utils import subprocess_utils
|
21
22
|
|
22
23
|
if typing.TYPE_CHECKING:
|
@@ -35,7 +36,10 @@ def is_tpu(resources: Optional['resources_lib.Resources']) -> bool:
|
|
35
36
|
def is_tpu_vm(resources: Optional['resources_lib.Resources']) -> bool:
|
36
37
|
if not is_tpu(resources):
|
37
38
|
return False
|
38
|
-
assert resources is not None
|
39
|
+
assert (resources is not None and len(resources.accelerators) == 1)
|
40
|
+
acc, _ = list(resources.accelerators.items())[0]
|
41
|
+
if kubernetes_utils.is_tpu_on_gke(acc):
|
42
|
+
return False
|
39
43
|
if resources.accelerator_args is None:
|
40
44
|
return True
|
41
45
|
return resources.accelerator_args.get('tpu_vm', True)
|
@@ -49,14 +53,6 @@ def is_tpu_vm_pod(resources: Optional['resources_lib.Resources']) -> bool:
|
|
49
53
|
return not acc.endswith('-8')
|
50
54
|
|
51
55
|
|
52
|
-
def get_num_tpu_devices(resources: Optional['resources_lib.Resources']) -> int:
|
53
|
-
if resources is None or not is_tpu(resources):
|
54
|
-
raise ValueError('resources must be a valid TPU resource.')
|
55
|
-
acc, _ = list(resources.accelerators.items())[0]
|
56
|
-
num_tpu_devices = int(int(acc.split('-')[2]) / 8)
|
57
|
-
return num_tpu_devices
|
58
|
-
|
59
|
-
|
60
56
|
@dataclasses.dataclass
|
61
57
|
class SpecificReservation:
|
62
58
|
count: int
|
sky/clouds/utils/oci_utils.py
CHANGED
@@ -1,15 +1,26 @@
|
|
1
1
|
"""OCI Configuration.
|
2
2
|
History:
|
3
|
-
- Zhanghao Wu @ Oct 2023: Formatting and refactoring
|
4
3
|
- Hysun He (hysun.he@oracle.com) @ Apr, 2023: Initial implementation
|
4
|
+
- Zhanghao Wu @ Oct 2023: Formatting and refactoring
|
5
|
+
- Hysun He (hysun.he@oracle.com) @ Oct, 2024: Add default image OS
|
6
|
+
configuration.
|
7
|
+
- Hysun He (hysun.he@oracle.com) @ Nov.12, 2024: Add the constant
|
8
|
+
SERVICE_PORT_RULE_TAG
|
9
|
+
- Hysun He (hysun.he@oracle.com) @ Jan.01, 2025: Set the default image
|
10
|
+
from ubuntu 20.04 to ubuntu 22.04, including:
|
11
|
+
- GPU: skypilot:gpu-ubuntu-2004 -> skypilot:gpu-ubuntu-2204
|
12
|
+
- CPU: skypilot:cpu-ubuntu-2004 -> skypilot:cpu-ubuntu-2204
|
13
|
+
- Hysun He (hysun.he@oracle.com) @ Jan.01, 2025: Support reuse existing
|
14
|
+
VCN for SkyServe.
|
5
15
|
"""
|
6
|
-
import logging
|
7
16
|
import os
|
8
17
|
|
18
|
+
from sky import sky_logging
|
9
19
|
from sky import skypilot_config
|
10
20
|
from sky.utils import resources_utils
|
21
|
+
from sky.utils import status_lib
|
11
22
|
|
12
|
-
logger =
|
23
|
+
logger = sky_logging.init_logger(__name__)
|
13
24
|
|
14
25
|
|
15
26
|
class OCIConfig:
|
@@ -39,6 +50,9 @@ class OCIConfig:
|
|
39
50
|
VCN_CIDR_INTERNET = '0.0.0.0/0'
|
40
51
|
VCN_CIDR = '192.168.0.0/16'
|
41
52
|
VCN_SUBNET_CIDR = '192.168.0.0/18'
|
53
|
+
SERVICE_PORT_RULE_TAG = 'SkyServe-Service-Port'
|
54
|
+
# NSG name template
|
55
|
+
NSG_NAME_TEMPLATE = 'nsg_{cluster_name}'
|
42
56
|
|
43
57
|
MAX_RETRY_COUNT = 3
|
44
58
|
RETRY_INTERVAL_BASE_SECONDS = 5
|
@@ -75,6 +89,19 @@ class OCIConfig:
|
|
75
89
|
resources_utils.DiskTier.HIGH: DISK_TIER_HIGH,
|
76
90
|
}
|
77
91
|
|
92
|
+
# Oracle instance's lifecycle state to sky state mapping.
|
93
|
+
# For Oracle VM instance's lifecyle state, please refer to the link:
|
94
|
+
# https://docs.oracle.com/en-us/iaas/api/#/en/iaas/latest/Instance/
|
95
|
+
STATE_MAPPING_OCI_TO_SKY = {
|
96
|
+
'PROVISIONING': status_lib.ClusterStatus.INIT,
|
97
|
+
'STARTING': status_lib.ClusterStatus.INIT,
|
98
|
+
'RUNNING': status_lib.ClusterStatus.UP,
|
99
|
+
'STOPPING': status_lib.ClusterStatus.STOPPED,
|
100
|
+
'STOPPED': status_lib.ClusterStatus.STOPPED,
|
101
|
+
'TERMINATED': None,
|
102
|
+
'TERMINATING': None,
|
103
|
+
}
|
104
|
+
|
78
105
|
@classmethod
|
79
106
|
def get_compartment(cls, region):
|
80
107
|
# Allow task(cluster)-specific compartment/VCN parameters.
|
@@ -84,8 +111,15 @@ class OCIConfig:
|
|
84
111
|
('oci', region, 'compartment_ocid'), default_compartment_ocid)
|
85
112
|
return compartment
|
86
113
|
|
114
|
+
@classmethod
|
115
|
+
def get_vcn_ocid(cls, region):
|
116
|
+
# Will reuse the regional VCN if specified.
|
117
|
+
vcn = skypilot_config.get_nested(('oci', region, 'vcn_ocid'), None)
|
118
|
+
return vcn
|
119
|
+
|
87
120
|
@classmethod
|
88
121
|
def get_vcn_subnet(cls, region):
|
122
|
+
# Will reuse the subnet if specified.
|
89
123
|
vcn = skypilot_config.get_nested(('oci', region, 'vcn_subnet'), None)
|
90
124
|
return vcn
|
91
125
|
|
@@ -96,7 +130,7 @@ class OCIConfig:
|
|
96
130
|
# the sky's user-config file (if not specified, use the hardcode one at
|
97
131
|
# last)
|
98
132
|
return skypilot_config.get_nested(('oci', 'default', 'image_tag_gpu'),
|
99
|
-
'skypilot:gpu-ubuntu-
|
133
|
+
'skypilot:gpu-ubuntu-2204')
|
100
134
|
|
101
135
|
@classmethod
|
102
136
|
def get_default_image_tag(cls) -> str:
|
@@ -104,7 +138,7 @@ class OCIConfig:
|
|
104
138
|
# set the default image tag in the sky's user-config file. (if not
|
105
139
|
# specified, use the hardcode one at last)
|
106
140
|
return skypilot_config.get_nested(
|
107
|
-
('oci', 'default', 'image_tag_general'), 'skypilot:cpu-ubuntu-
|
141
|
+
('oci', 'default', 'image_tag_general'), 'skypilot:cpu-ubuntu-2204')
|
108
142
|
|
109
143
|
@classmethod
|
110
144
|
def get_sky_user_config_file(cls) -> str:
|
@@ -121,5 +155,13 @@ class OCIConfig:
|
|
121
155
|
return skypilot_config.get_nested(
|
122
156
|
('oci', 'default', 'oci_config_profile'), 'DEFAULT')
|
123
157
|
|
158
|
+
@classmethod
|
159
|
+
def get_default_image_os(cls) -> str:
|
160
|
+
# Get the default image OS. Instead of hardcoding, we give a choice to
|
161
|
+
# set the default image OS type in the sky's user-config file. (if not
|
162
|
+
# specified, use the hardcode one at last)
|
163
|
+
return skypilot_config.get_nested(('oci', 'default', 'image_os_type'),
|
164
|
+
'ubuntu')
|
165
|
+
|
124
166
|
|
125
167
|
oci_config = OCIConfig()
|