skypilot-nightly 1.0.0.dev20250204__py3-none-any.whl → 1.0.0.dev20250206__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. sky/__init__.py +4 -2
  2. sky/adaptors/vast.py +29 -0
  3. sky/authentication.py +18 -0
  4. sky/backends/backend_utils.py +4 -1
  5. sky/backends/cloud_vm_ray_backend.py +1 -0
  6. sky/clouds/__init__.py +2 -0
  7. sky/clouds/service_catalog/constants.py +1 -1
  8. sky/clouds/service_catalog/data_fetchers/fetch_vast.py +147 -0
  9. sky/clouds/service_catalog/kubernetes_catalog.py +11 -6
  10. sky/clouds/service_catalog/vast_catalog.py +104 -0
  11. sky/clouds/vast.py +279 -0
  12. sky/jobs/dashboard/dashboard.py +10 -3
  13. sky/jobs/dashboard/templates/index.html +117 -52
  14. sky/jobs/scheduler.py +14 -5
  15. sky/jobs/utils.py +10 -19
  16. sky/provision/__init__.py +1 -0
  17. sky/provision/vast/__init__.py +10 -0
  18. sky/provision/vast/config.py +11 -0
  19. sky/provision/vast/instance.py +247 -0
  20. sky/provision/vast/utils.py +161 -0
  21. sky/setup_files/dependencies.py +1 -0
  22. sky/templates/vast-ray.yml.j2 +70 -0
  23. sky/utils/controller_utils.py +5 -0
  24. {skypilot_nightly-1.0.0.dev20250204.dist-info → skypilot_nightly-1.0.0.dev20250206.dist-info}/METADATA +4 -1
  25. {skypilot_nightly-1.0.0.dev20250204.dist-info → skypilot_nightly-1.0.0.dev20250206.dist-info}/RECORD +29 -20
  26. {skypilot_nightly-1.0.0.dev20250204.dist-info → skypilot_nightly-1.0.0.dev20250206.dist-info}/LICENSE +0 -0
  27. {skypilot_nightly-1.0.0.dev20250204.dist-info → skypilot_nightly-1.0.0.dev20250206.dist-info}/WHEEL +0 -0
  28. {skypilot_nightly-1.0.0.dev20250204.dist-info → skypilot_nightly-1.0.0.dev20250206.dist-info}/entry_points.txt +0 -0
  29. {skypilot_nightly-1.0.0.dev20250204.dist-info → skypilot_nightly-1.0.0.dev20250206.dist-info}/top_level.txt +0 -0
sky/__init__.py CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
5
5
  import urllib.request
6
6
 
7
7
  # Replaced with the current commit when building the wheels.
8
- _SKYPILOT_COMMIT_SHA = 'e4ad98c907a819d9a6b7f6872445ca497401b8c9'
8
+ _SKYPILOT_COMMIT_SHA = '1e284afda6c2808e521e629a025267a12744b0db'
9
9
 
10
10
 
11
11
  def _get_git_commit():
@@ -35,7 +35,7 @@ def _get_git_commit():
35
35
 
36
36
 
37
37
  __commit__ = _get_git_commit()
38
- __version__ = '1.0.0.dev20250204'
38
+ __version__ = '1.0.0.dev20250206'
39
39
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
40
40
 
41
41
 
@@ -127,6 +127,7 @@ K8s = Kubernetes
127
127
  OCI = clouds.OCI
128
128
  Paperspace = clouds.Paperspace
129
129
  RunPod = clouds.RunPod
130
+ Vast = clouds.Vast
130
131
  Vsphere = clouds.Vsphere
131
132
  Fluidstack = clouds.Fluidstack
132
133
  optimize = Optimizer.optimize
@@ -144,6 +145,7 @@ __all__ = [
144
145
  'OCI',
145
146
  'Paperspace',
146
147
  'RunPod',
148
+ 'Vast',
147
149
  'SCP',
148
150
  'Vsphere',
149
151
  'Fluidstack',
sky/adaptors/vast.py ADDED
@@ -0,0 +1,29 @@
1
+ """Vast cloud adaptor."""
2
+
3
+ import functools
4
+
5
+ _vast_sdk = None
6
+
7
+
8
+ def import_package(func):
9
+
10
+ @functools.wraps(func)
11
+ def wrapper(*args, **kwargs):
12
+ global _vast_sdk
13
+
14
+ if _vast_sdk is None:
15
+ try:
16
+ import vastai_sdk as _vast # pylint: disable=import-outside-toplevel
17
+ _vast_sdk = _vast.VastAI()
18
+ except ImportError as e:
19
+ raise ImportError(f'Fail to import dependencies for vast: {e}\n'
20
+ 'Try pip install "skypilot[vast]"') from None
21
+ return func(*args, **kwargs)
22
+
23
+ return wrapper
24
+
25
+
26
+ @import_package
27
+ def vast():
28
+ """Return the vast package."""
29
+ return _vast_sdk
sky/authentication.py CHANGED
@@ -43,6 +43,7 @@ from sky.adaptors import gcp
43
43
  from sky.adaptors import ibm
44
44
  from sky.adaptors import kubernetes
45
45
  from sky.adaptors import runpod
46
+ from sky.adaptors import vast
46
47
  from sky.provision.fluidstack import fluidstack_utils
47
48
  from sky.provision.kubernetes import utils as kubernetes_utils
48
49
  from sky.provision.lambda_cloud import lambda_utils
@@ -485,6 +486,23 @@ def setup_runpod_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
485
486
  return configure_ssh_info(config)
486
487
 
487
488
 
489
+ def setup_vast_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
490
+ """Sets up SSH authentication for Vast.
491
+ - Generates a new SSH key pair if one does not exist.
492
+ - Adds the public SSH key to the user's Vast account.
493
+ """
494
+ _, public_key_path = get_or_generate_keys()
495
+ with open(public_key_path, 'r', encoding='UTF-8') as pub_key_file:
496
+ public_key = pub_key_file.read().strip()
497
+ current_key_list = vast.vast().show_ssh_keys() # pylint: disable=assignment-from-no-return
498
+ # Only add an ssh key if it hasn't already been added
499
+ if not any(x['public_key'] == public_key for x in current_key_list):
500
+ vast.vast().create_ssh_key(ssh_key=public_key)
501
+
502
+ config['auth']['ssh_public_key'] = PUBLIC_SSH_KEY_PATH
503
+ return configure_ssh_info(config)
504
+
505
+
488
506
  def setup_fluidstack_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
489
507
 
490
508
  get_or_generate_keys()
@@ -1056,6 +1056,8 @@ def _add_auth_to_cluster_config(cloud: clouds.Cloud, cluster_config_file: str):
1056
1056
  config = auth.setup_ibm_authentication(config)
1057
1057
  elif isinstance(cloud, clouds.RunPod):
1058
1058
  config = auth.setup_runpod_authentication(config)
1059
+ elif isinstance(cloud, clouds.Vast):
1060
+ config = auth.setup_vast_authentication(config)
1059
1061
  elif isinstance(cloud, clouds.Fluidstack):
1060
1062
  config = auth.setup_fluidstack_authentication(config)
1061
1063
  else:
@@ -2135,7 +2137,8 @@ def _update_cluster_status_no_lock(
2135
2137
  except exceptions.CommandError as e:
2136
2138
  success = False
2137
2139
  if e.returncode == 255:
2138
- logger.debug(f'The cluster is likely {noun}ed.')
2140
+ word = 'autostopped' if noun == 'autostop' else 'autodowned'
2141
+ logger.debug(f'The cluster is likely {word}.')
2139
2142
  reset_local_autostop = False
2140
2143
  except (Exception, SystemExit) as e: # pylint: disable=broad-except
2141
2144
  success = False
@@ -187,6 +187,7 @@ def _get_cluster_config_template(cloud):
187
187
  clouds.RunPod: 'runpod-ray.yml.j2',
188
188
  clouds.Kubernetes: 'kubernetes-ray.yml.j2',
189
189
  clouds.Vsphere: 'vsphere-ray.yml.j2',
190
+ clouds.Vast: 'vast-ray.yml.j2',
190
191
  clouds.Fluidstack: 'fluidstack-ray.yml.j2'
191
192
  }
192
193
  return cloud_to_template[type(cloud)]
sky/clouds/__init__.py CHANGED
@@ -25,6 +25,7 @@ from sky.clouds.oci import OCI
25
25
  from sky.clouds.paperspace import Paperspace
26
26
  from sky.clouds.runpod import RunPod
27
27
  from sky.clouds.scp import SCP
28
+ from sky.clouds.vast import Vast
28
29
  from sky.clouds.vsphere import Vsphere
29
30
 
30
31
  __all__ = [
@@ -39,6 +40,7 @@ __all__ = [
39
40
  'Paperspace',
40
41
  'SCP',
41
42
  'RunPod',
43
+ 'Vast',
42
44
  'OCI',
43
45
  'Vsphere',
44
46
  'Kubernetes',
@@ -3,5 +3,5 @@ HOSTED_CATALOG_DIR_URL = 'https://raw.githubusercontent.com/skypilot-org/skypilo
3
3
  CATALOG_SCHEMA_VERSION = 'v6'
4
4
  CATALOG_DIR = '~/.sky/catalogs'
5
5
  ALL_CLOUDS = ('aws', 'azure', 'gcp', 'ibm', 'lambda', 'scp', 'oci',
6
- 'kubernetes', 'runpod', 'vsphere', 'cudo', 'fluidstack',
6
+ 'kubernetes', 'runpod', 'vast', 'vsphere', 'cudo', 'fluidstack',
7
7
  'paperspace', 'do')
@@ -0,0 +1,147 @@
1
+ """A script that generates the Vast Cloud catalog. """
2
+
3
+ #
4
+ # Due to the design of the sdk, pylint has a false
5
+ # positive for the fnctions.
6
+ #
7
+ # pylint: disable=assignment-from-no-return
8
+ import collections
9
+ import csv
10
+ import json
11
+ import math
12
+ import re
13
+ import sys
14
+ from typing import Any, Dict, List
15
+
16
+ from sky.adaptors import vast
17
+
18
+ _map = {
19
+ 'TeslaV100': 'V100',
20
+ 'TeslaT4': 'T4',
21
+ 'TeslaP100': 'P100',
22
+ 'QRTX6000': 'RTX6000',
23
+ 'QRTX8000': 'RTX8000'
24
+ }
25
+
26
+
27
+ def create_instance_type(obj: Dict[str, Any]) -> str:
28
+ stubify = lambda x: re.sub(r'\s', '_', x)
29
+ return '{}x-{}-{}-{}'.format(obj['num_gpus'], stubify(obj['gpu_name']),
30
+ obj['cpu_cores'], obj['cpu_ram'])
31
+
32
+
33
+ def dot_get(d: dict, key: str) -> Any:
34
+ for k in key.split('.'):
35
+ d = d[k]
36
+ return d
37
+
38
+
39
+ if __name__ == '__main__':
40
+ seen = set()
41
+ # InstanceType and gpuInfo are basically just stubs
42
+ # so that the dictwriter is happy without weird
43
+ # code.
44
+ mapped_keys = (('gpu_name', 'InstanceType'), ('gpu_name',
45
+ 'AcceleratorName'),
46
+ ('num_gpus', 'AcceleratorCount'), ('cpu_cores', 'vCPUs'),
47
+ ('cpu_ram', 'MemoryGiB'), ('gpu_name', 'GpuInfo'),
48
+ ('search.totalHour', 'Price'), ('min_bid', 'SpotPrice'),
49
+ ('geolocation', 'Region'))
50
+ writer = csv.DictWriter(sys.stdout, fieldnames=[x[1] for x in mapped_keys])
51
+ writer.writeheader()
52
+
53
+ # Vast has a wide variety of machines, some of
54
+ # which will have less diskspace and network
55
+ # bandwidth than others.
56
+ #
57
+ # The machine normally have high specificity
58
+ # in the vast catalog - this is fairly unique
59
+ # to Vast and can make bucketing them into
60
+ # instance types difficult.
61
+ #
62
+ # The flags
63
+ #
64
+ # * georegion consolidates geographic areas
65
+ #
66
+ # * chunked rounds down specifications (such
67
+ # as 1025GB to 1024GB disk) in order to
68
+ # make machine specifications look more
69
+ # consistent
70
+ #
71
+ # * inet_down makes sure that only machines
72
+ # with "reasonable" downlink speed are
73
+ # considered
74
+ #
75
+ # * disk_space sets a lower limit of how
76
+ # much space is availble to be allocated
77
+ # in order to ensure that machines with
78
+ # small disk pools aren't listed
79
+ #
80
+ offerList = vast.vast().search_offers(
81
+ query=('georegion = true chunked = true '
82
+ 'inet_down >= 100 disk_space >= 80'),
83
+ limit=10000)
84
+
85
+ priceMap: Dict[str, List] = collections.defaultdict(list)
86
+ for offer in offerList:
87
+ entry = {}
88
+ for ours, theirs in mapped_keys:
89
+ field = dot_get(offer, ours)
90
+ entry[theirs] = field
91
+
92
+ instance_type = create_instance_type(offer)
93
+ entry['InstanceType'] = instance_type
94
+
95
+ # the documentation says
96
+ # "{'gpus': [{
97
+ # 'name': 'v100',
98
+ # 'manufacturer': 'nvidia',
99
+ # 'count': 8.0,
100
+ # 'memoryinfo': {'sizeinmib': 16384}
101
+ # }],
102
+ # 'totalgpumemoryinmib': 16384}",
103
+ # we can do that.
104
+ entry['MemoryGiB'] /= 1024
105
+
106
+ gpu = re.sub('Ada', '-Ada', re.sub(r'\s', '', offer['gpu_name']))
107
+ gpu = re.sub(r'(Ti|PCIE|SXM4|SXM|NVL)$', '', gpu)
108
+ gpu = re.sub(r'(RTX\d0\d0)(S|D)$', r'\1', gpu)
109
+
110
+ if gpu in _map:
111
+ gpu = _map[gpu]
112
+
113
+ entry['AcceleratorName'] = gpu
114
+ entry['GpuInfo'] = json.dumps({
115
+ 'Gpus': [{
116
+ 'Name': gpu,
117
+ 'Count': offer['num_gpus'],
118
+ 'MemoryInfo': {
119
+ 'SizeInMiB': offer['gpu_total_ram']
120
+ }
121
+ }],
122
+ 'TotalGpuMemoryInMiB': offer['gpu_total_ram']
123
+ }).replace('"', '\'')
124
+
125
+ priceMap[instance_type].append(entry)
126
+
127
+ for instanceList in priceMap.values():
128
+ priceList = sorted([x['Price'] for x in instanceList])
129
+ index = math.ceil(0.5 * len(priceList)) - 1
130
+ priceTarget = priceList[index]
131
+ toList: List = []
132
+ for instance in instanceList:
133
+ if instance['Price'] <= priceTarget:
134
+ instance['Price'] = '{:.2f}'.format(priceTarget)
135
+ toList.append(instance)
136
+
137
+ maxBid = max([x.get('SpotPrice') for x in toList])
138
+ for instance in toList:
139
+ stub = f'{instance["InstanceType"]} {instance["Region"][-2:]}'
140
+ if stub in seen:
141
+ printstub = f'{stub}#print'
142
+ if printstub not in seen:
143
+ instance['SpotPrice'] = f'{maxBid:.2f}'
144
+ writer.writerow(instance)
145
+ seen.add(printstub)
146
+ else:
147
+ seen.add(stub)
@@ -8,10 +8,10 @@ import typing
8
8
  from typing import Dict, List, Optional, Set, Tuple
9
9
 
10
10
  from sky import check as sky_check
11
+ from sky import clouds as sky_clouds
11
12
  from sky import sky_logging
12
13
  from sky.adaptors import common as adaptors_common
13
14
  from sky.adaptors import kubernetes
14
- from sky.clouds import Kubernetes
15
15
  from sky.clouds.service_catalog import CloudFilter
16
16
  from sky.clouds.service_catalog import common
17
17
  from sky.provision.kubernetes import utils as kubernetes_utils
@@ -129,6 +129,14 @@ def _list_accelerators(
129
129
  # TODO(romilb): This should be refactored to use get_kubernetes_node_info()
130
130
  # function from kubernetes_utils.
131
131
  del all_regions, require_price # Unused.
132
+
133
+ # First check if Kubernetes is enabled. This ensures k8s python client is
134
+ # installed. Do not put any k8s-specific logic before this check.
135
+ enabled_clouds = sky_check.get_cached_enabled_clouds_or_refresh()
136
+ if not sky_clouds.cloud_in_iterable(sky_clouds.Kubernetes(),
137
+ enabled_clouds):
138
+ return {}, {}, {}
139
+
132
140
  # TODO(zhwu): this should return all accelerators in multiple kubernetes
133
141
  # clusters defined by allowed_contexts.
134
142
  if region_filter is None:
@@ -142,11 +150,8 @@ def _list_accelerators(
142
150
  if context is None:
143
151
  return {}, {}, {}
144
152
 
145
- k8s_cloud = Kubernetes()
146
- if not any(
147
- map(k8s_cloud.is_same_cloud,
148
- sky_check.get_cached_enabled_clouds_or_refresh())
149
- ) or not kubernetes_utils.check_credentials(context)[0]:
153
+ # Verify that the credentials are still valid.
154
+ if not kubernetes_utils.check_credentials(context)[0]:
150
155
  return {}, {}, {}
151
156
 
152
157
  has_gpu = kubernetes_utils.detect_accelerator_resource(context)
@@ -0,0 +1,104 @@
1
+ """ Vast | Catalog
2
+
3
+ This module loads the service catalog file and can be used to
4
+ query instance types and pricing information for Vast.ai.
5
+ """
6
+
7
+ import typing
8
+ from typing import Dict, List, Optional, Tuple, Union
9
+
10
+ from sky.clouds.service_catalog import common
11
+ from sky.utils import ux_utils
12
+
13
+ if typing.TYPE_CHECKING:
14
+ from sky.clouds import cloud
15
+
16
+ _df = common.read_catalog('vast/vms.csv')
17
+
18
+
19
+ def instance_type_exists(instance_type: str) -> bool:
20
+ return common.instance_type_exists_impl(_df, instance_type)
21
+
22
+
23
+ def validate_region_zone(
24
+ region: Optional[str],
25
+ zone: Optional[str]) -> Tuple[Optional[str], Optional[str]]:
26
+ if zone is not None:
27
+ with ux_utils.print_exception_no_traceback():
28
+ raise ValueError('Vast does not support zones.')
29
+ return common.validate_region_zone_impl('vast', _df, region, zone)
30
+
31
+
32
+ def get_hourly_cost(instance_type: str,
33
+ use_spot: bool = False,
34
+ region: Optional[str] = None,
35
+ zone: Optional[str] = None) -> float:
36
+ """Returns the cost, or the cheapest cost among all zones for spot."""
37
+ if zone is not None:
38
+ with ux_utils.print_exception_no_traceback():
39
+ raise ValueError('Vast does not support zones.')
40
+ return common.get_hourly_cost_impl(_df, instance_type, use_spot, region,
41
+ zone)
42
+
43
+
44
+ def get_vcpus_mem_from_instance_type(
45
+ instance_type: str) -> Tuple[Optional[float], Optional[float]]:
46
+ return common.get_vcpus_mem_from_instance_type_impl(_df, instance_type)
47
+
48
+
49
+ def get_default_instance_type(cpus: Optional[str] = None,
50
+ memory: Optional[str] = None,
51
+ disk_tier: Optional[str] = None) -> Optional[str]:
52
+ del disk_tier
53
+ # NOTE: After expanding catalog to multiple entries, you may
54
+ # want to specify a default instance type or family.
55
+ return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory)
56
+
57
+
58
+ def get_accelerators_from_instance_type(
59
+ instance_type: str) -> Optional[Dict[str, Union[int, float]]]:
60
+ return common.get_accelerators_from_instance_type_impl(_df, instance_type)
61
+
62
+
63
+ def get_instance_type_for_accelerator(
64
+ acc_name: str,
65
+ acc_count: int,
66
+ cpus: Optional[str] = None,
67
+ memory: Optional[str] = None,
68
+ use_spot: bool = False,
69
+ region: Optional[str] = None,
70
+ zone: Optional[str] = None) -> Tuple[Optional[List[str]], List[str]]:
71
+ """Returns a list of instance types that have the given accelerator."""
72
+ if zone is not None:
73
+ with ux_utils.print_exception_no_traceback():
74
+ raise ValueError('Vast does not support zones.')
75
+ return common.get_instance_type_for_accelerator_impl(df=_df,
76
+ acc_name=acc_name,
77
+ acc_count=acc_count,
78
+ cpus=cpus,
79
+ memory=memory,
80
+ use_spot=use_spot,
81
+ region=region,
82
+ zone=zone)
83
+
84
+
85
+ def get_region_zones_for_instance_type(instance_type: str,
86
+ use_spot: bool) -> List['cloud.Region']:
87
+ df = _df[_df['InstanceType'] == instance_type]
88
+ return common.get_region_zones(df, use_spot)
89
+
90
+
91
+ # TODO: this differs from the fluffy catalog version
92
+ def list_accelerators(
93
+ gpus_only: bool,
94
+ name_filter: Optional[str],
95
+ region_filter: Optional[str],
96
+ quantity_filter: Optional[int],
97
+ case_sensitive: bool = True,
98
+ all_regions: bool = False,
99
+ require_price: bool = True) -> Dict[str, List[common.InstanceTypeInfo]]:
100
+ """Returns all instance types in Vast offering GPUs."""
101
+ del require_price # Unused.
102
+ return common.list_accelerators_impl('Vast', _df, gpus_only, name_filter,
103
+ region_filter, quantity_filter,
104
+ case_sensitive, all_regions)