skypilot-nightly 1.0.0.dev20251011__py3-none-any.whl → 1.0.0.dev20251013__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +4 -2
- sky/adaptors/shadeform.py +89 -0
- sky/authentication.py +43 -0
- sky/backends/backend_utils.py +2 -0
- sky/backends/cloud_vm_ray_backend.py +4 -2
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/client/cli/command.py +44 -3
- sky/client/sdk.py +11 -3
- sky/clouds/__init__.py +2 -0
- sky/clouds/shadeform.py +393 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/chunks/{webpack-66f23594d38c7f16.js → webpack-ac3a34c8f9fef041.js} +1 -1
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/provision/__init__.py +1 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/serve/constants.py +0 -3
- sky/serve/service_spec.py +1 -8
- sky/server/constants.py +4 -0
- sky/server/requests/executor.py +22 -2
- sky/server/requests/payloads.py +2 -0
- sky/server/requests/requests.py +119 -2
- sky/server/server.py +17 -6
- sky/setup_files/dependencies.py +1 -0
- sky/skylet/constants.py +1 -1
- sky/templates/shadeform-ray.yml.j2 +72 -0
- sky/utils/context_utils.py +13 -9
- {skypilot_nightly-1.0.0.dev20251011.dist-info → skypilot_nightly-1.0.0.dev20251013.dist-info}/METADATA +43 -41
- {skypilot_nightly-1.0.0.dev20251011.dist-info → skypilot_nightly-1.0.0.dev20251013.dist-info}/RECORD +52 -43
- /sky/dashboard/out/_next/static/{Xs6jdcfyNaUuBO8jmzU9_ → MtlDUf-nH1hhcy7xwbCj3}/_buildManifest.js +0 -0
- /sky/dashboard/out/_next/static/{Xs6jdcfyNaUuBO8jmzU9_ → MtlDUf-nH1hhcy7xwbCj3}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20251011.dist-info → skypilot_nightly-1.0.0.dev20251013.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20251011.dist-info → skypilot_nightly-1.0.0.dev20251013.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20251011.dist-info → skypilot_nightly-1.0.0.dev20251013.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20251011.dist-info → skypilot_nightly-1.0.0.dev20251013.dist-info}/top_level.txt +0 -0
sky/__init__.py
CHANGED
|
@@ -7,7 +7,7 @@ import urllib.request
|
|
|
7
7
|
from sky.utils import directory_utils
|
|
8
8
|
|
|
9
9
|
# Replaced with the current commit when building the wheels.
|
|
10
|
-
_SKYPILOT_COMMIT_SHA = '
|
|
10
|
+
_SKYPILOT_COMMIT_SHA = 'c9063aba1971bd726c07f76164ad1127ac90424c'
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def _get_git_commit():
|
|
@@ -37,7 +37,7 @@ def _get_git_commit():
|
|
|
37
37
|
|
|
38
38
|
|
|
39
39
|
__commit__ = _get_git_commit()
|
|
40
|
-
__version__ = '1.0.0.
|
|
40
|
+
__version__ = '1.0.0.dev20251013'
|
|
41
41
|
__root_dir__ = directory_utils.get_sky_dir()
|
|
42
42
|
|
|
43
43
|
|
|
@@ -150,6 +150,7 @@ Vsphere = clouds.Vsphere
|
|
|
150
150
|
Fluidstack = clouds.Fluidstack
|
|
151
151
|
Nebius = clouds.Nebius
|
|
152
152
|
Hyperbolic = clouds.Hyperbolic
|
|
153
|
+
Shadeform = clouds.Shadeform
|
|
153
154
|
Seeweb = clouds.Seeweb
|
|
154
155
|
|
|
155
156
|
__all__ = [
|
|
@@ -172,6 +173,7 @@ __all__ = [
|
|
|
172
173
|
'Fluidstack',
|
|
173
174
|
'Nebius',
|
|
174
175
|
'Hyperbolic',
|
|
176
|
+
'Shadeform',
|
|
175
177
|
'Seeweb',
|
|
176
178
|
'Optimizer',
|
|
177
179
|
'OptimizeTarget',
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""Shadeform cloud adaptor."""
|
|
2
|
+
|
|
3
|
+
import functools
|
|
4
|
+
import socket
|
|
5
|
+
from typing import Any, Dict, List, Optional
|
|
6
|
+
|
|
7
|
+
import requests
|
|
8
|
+
|
|
9
|
+
from sky import sky_logging
|
|
10
|
+
from sky.provision.shadeform import shadeform_utils
|
|
11
|
+
from sky.utils import common_utils
|
|
12
|
+
|
|
13
|
+
logger = sky_logging.init_logger(__name__)
|
|
14
|
+
|
|
15
|
+
_shadeform_sdk = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def import_package(func):
|
|
19
|
+
|
|
20
|
+
@functools.wraps(func)
|
|
21
|
+
def wrapper(*args, **kwargs):
|
|
22
|
+
global _shadeform_sdk
|
|
23
|
+
if _shadeform_sdk is None:
|
|
24
|
+
try:
|
|
25
|
+
import shadeform as _shadeform # pylint: disable=import-outside-toplevel
|
|
26
|
+
_shadeform_sdk = _shadeform
|
|
27
|
+
except ImportError:
|
|
28
|
+
raise ImportError(
|
|
29
|
+
'Failed to import dependencies for Shadeform. '
|
|
30
|
+
'Try pip install "skypilot[shadeform]"') from None
|
|
31
|
+
return func(*args, **kwargs)
|
|
32
|
+
|
|
33
|
+
return wrapper
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@import_package
|
|
37
|
+
def shadeform():
|
|
38
|
+
"""Return the shadeform package."""
|
|
39
|
+
return _shadeform_sdk
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def list_ssh_keys() -> List[Dict[str, Any]]:
|
|
43
|
+
"""List all SSH keys in Shadeform account."""
|
|
44
|
+
try:
|
|
45
|
+
response = shadeform_utils.get_ssh_keys()
|
|
46
|
+
return response.get('ssh_keys', [])
|
|
47
|
+
except (ValueError, KeyError, requests.exceptions.RequestException) as e:
|
|
48
|
+
logger.warning(f'Failed to list SSH keys from Shadeform: {e}')
|
|
49
|
+
return []
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def add_ssh_key_to_shadeform(public_key: str) -> Optional[str]:
|
|
53
|
+
"""Add SSH key to Shadeform if it doesn't already exist.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
public_key: The SSH public key string.
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
The name of the key if added successfully, None otherwise.
|
|
60
|
+
"""
|
|
61
|
+
try:
|
|
62
|
+
# Check if key already exists
|
|
63
|
+
existing_keys = list_ssh_keys()
|
|
64
|
+
key_exists = False
|
|
65
|
+
key_id = None
|
|
66
|
+
for key in existing_keys:
|
|
67
|
+
if key.get('public_key') == public_key:
|
|
68
|
+
key_exists = True
|
|
69
|
+
key_id = key.get('id')
|
|
70
|
+
break
|
|
71
|
+
|
|
72
|
+
if key_exists:
|
|
73
|
+
logger.info('SSH key already exists in Shadeform account')
|
|
74
|
+
return key_id
|
|
75
|
+
|
|
76
|
+
# Generate a unique key name
|
|
77
|
+
hostname = socket.gethostname()
|
|
78
|
+
key_name = f'skypilot-{hostname}-{common_utils.get_user_hash()[:8]}'
|
|
79
|
+
|
|
80
|
+
# Add the key
|
|
81
|
+
response = shadeform_utils.add_ssh_key(name=key_name,
|
|
82
|
+
public_key=public_key)
|
|
83
|
+
key_id = response['id']
|
|
84
|
+
logger.info(f'Added SSH key to Shadeform: {key_name, key_id}')
|
|
85
|
+
return key_id
|
|
86
|
+
|
|
87
|
+
except (ValueError, KeyError, requests.exceptions.RequestException) as e:
|
|
88
|
+
logger.warning(f'Failed to add SSH key to Shadeform: {e}')
|
|
89
|
+
return None
|
sky/authentication.py
CHANGED
|
@@ -39,6 +39,7 @@ from sky.adaptors import gcp
|
|
|
39
39
|
from sky.adaptors import ibm
|
|
40
40
|
from sky.adaptors import runpod
|
|
41
41
|
from sky.adaptors import seeweb as seeweb_adaptor
|
|
42
|
+
from sky.adaptors import shadeform as shadeform_adaptor
|
|
42
43
|
from sky.adaptors import vast
|
|
43
44
|
from sky.provision.fluidstack import fluidstack_utils
|
|
44
45
|
from sky.provision.kubernetes import utils as kubernetes_utils
|
|
@@ -511,6 +512,48 @@ def setup_hyperbolic_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
511
512
|
return configure_ssh_info(config)
|
|
512
513
|
|
|
513
514
|
|
|
515
|
+
def setup_shadeform_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
|
|
516
|
+
"""Sets up SSH authentication for Shadeform.
|
|
517
|
+
- Generates a new SSH key pair if one does not exist.
|
|
518
|
+
- Adds the public SSH key to the user's Shadeform account.
|
|
519
|
+
|
|
520
|
+
Note: This assumes there is a Shadeform Python SDK available.
|
|
521
|
+
If no official SDK exists, this function would need to use direct API calls.
|
|
522
|
+
"""
|
|
523
|
+
|
|
524
|
+
_, public_key_path = get_or_generate_keys()
|
|
525
|
+
ssh_key_id = None
|
|
526
|
+
|
|
527
|
+
with open(public_key_path, 'r', encoding='utf-8') as f:
|
|
528
|
+
public_key = f.read().strip()
|
|
529
|
+
|
|
530
|
+
try:
|
|
531
|
+
# Add SSH key to Shadeform using our utility functions
|
|
532
|
+
ssh_key_id = shadeform_adaptor.add_ssh_key_to_shadeform(public_key)
|
|
533
|
+
|
|
534
|
+
except ImportError as e:
|
|
535
|
+
# If required dependencies are missing
|
|
536
|
+
logger.warning(
|
|
537
|
+
f'Failed to add Shadeform SSH key due to missing dependencies: '
|
|
538
|
+
f'{e}. Manually configure SSH keys in your Shadeform account.')
|
|
539
|
+
|
|
540
|
+
except Exception as e:
|
|
541
|
+
logger.warning(f'Failed to set up Shadeform authentication: {e}')
|
|
542
|
+
raise exceptions.CloudUserIdentityError(
|
|
543
|
+
'Failed to set up SSH authentication for Shadeform. '
|
|
544
|
+
f'Please ensure your Shadeform credentials are configured: {e}'
|
|
545
|
+
) from e
|
|
546
|
+
|
|
547
|
+
if ssh_key_id is None:
|
|
548
|
+
raise Exception('Failed to add SSH key to Shadeform')
|
|
549
|
+
|
|
550
|
+
# Configure SSH info in the config
|
|
551
|
+
config['auth']['ssh_public_key'] = public_key_path
|
|
552
|
+
config['auth']['ssh_key_id'] = ssh_key_id
|
|
553
|
+
|
|
554
|
+
return configure_ssh_info(config)
|
|
555
|
+
|
|
556
|
+
|
|
514
557
|
def setup_primeintellect_authentication(
|
|
515
558
|
config: Dict[str, Any]) -> Dict[str, Any]:
|
|
516
559
|
"""Sets up SSH authentication for Prime Intellect.
|
sky/backends/backend_utils.py
CHANGED
|
@@ -1124,6 +1124,8 @@ def _add_auth_to_cluster_config(cloud: clouds.Cloud, tmp_yaml_path: str):
|
|
|
1124
1124
|
config = auth.setup_fluidstack_authentication(config)
|
|
1125
1125
|
elif isinstance(cloud, clouds.Hyperbolic):
|
|
1126
1126
|
config = auth.setup_hyperbolic_authentication(config)
|
|
1127
|
+
elif isinstance(cloud, clouds.Shadeform):
|
|
1128
|
+
config = auth.setup_shadeform_authentication(config)
|
|
1127
1129
|
elif isinstance(cloud, clouds.PrimeIntellect):
|
|
1128
1130
|
config = auth.setup_primeintellect_authentication(config)
|
|
1129
1131
|
elif isinstance(cloud, clouds.Seeweb):
|
|
@@ -141,6 +141,7 @@ _NODES_LAUNCHING_PROGRESS_TIMEOUT = {
|
|
|
141
141
|
clouds.OCI: 300,
|
|
142
142
|
clouds.Paperspace: 600,
|
|
143
143
|
clouds.Kubernetes: 300,
|
|
144
|
+
clouds.Shadeform: 300,
|
|
144
145
|
clouds.Vsphere: 240,
|
|
145
146
|
}
|
|
146
147
|
|
|
@@ -304,6 +305,7 @@ def _get_cluster_config_template(cloud):
|
|
|
304
305
|
clouds.RunPod: 'runpod-ray.yml.j2',
|
|
305
306
|
clouds.Kubernetes: 'kubernetes-ray.yml.j2',
|
|
306
307
|
clouds.SSH: 'kubernetes-ray.yml.j2',
|
|
308
|
+
clouds.Shadeform: 'shadeform-ray.yml.j2',
|
|
307
309
|
clouds.Vsphere: 'vsphere-ray.yml.j2',
|
|
308
310
|
clouds.Vast: 'vast-ray.yml.j2',
|
|
309
311
|
clouds.Fluidstack: 'fluidstack-ray.yml.j2',
|
|
@@ -2826,7 +2828,7 @@ class CloudVmRayResourceHandle(backends.backend.ResourceHandle):
|
|
|
2826
2828
|
return grpc.insecure_channel(f'localhost:{tunnel.port}',
|
|
2827
2829
|
options=grpc_options)
|
|
2828
2830
|
except socket.error as e:
|
|
2829
|
-
logger.
|
|
2831
|
+
logger.debug(
|
|
2830
2832
|
'Failed to connect to SSH tunnel for cluster '
|
|
2831
2833
|
f'{self.cluster_name!r} on port {tunnel.port} ({e}), '
|
|
2832
2834
|
'acquiring lock')
|
|
@@ -2852,7 +2854,7 @@ class CloudVmRayResourceHandle(backends.backend.ResourceHandle):
|
|
|
2852
2854
|
return grpc.insecure_channel(f'localhost:{tunnel.port}',
|
|
2853
2855
|
options=grpc_options)
|
|
2854
2856
|
except socket.error as e:
|
|
2855
|
-
logger.
|
|
2857
|
+
logger.debug(
|
|
2856
2858
|
'Failed to connect to SSH tunnel for cluster '
|
|
2857
2859
|
f'{self.cluster_name!r} on port {tunnel.port} ({e}), '
|
|
2858
2860
|
'opening new tunnel')
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""A script that generates the Shadeform catalog.
|
|
2
|
+
|
|
3
|
+
Usage:
|
|
4
|
+
python fetch_shadeform.py [-h] [--api-key API_KEY]
|
|
5
|
+
[--api-key-path API_KEY_PATH]
|
|
6
|
+
|
|
7
|
+
If neither --api-key nor --api-key-path are provided, this script will parse
|
|
8
|
+
`~/.shadeform/api_key` to look for Shadeform API key.
|
|
9
|
+
"""
|
|
10
|
+
import argparse
|
|
11
|
+
import csv
|
|
12
|
+
import json
|
|
13
|
+
import os
|
|
14
|
+
from typing import Dict
|
|
15
|
+
|
|
16
|
+
import requests
|
|
17
|
+
|
|
18
|
+
ENDPOINT = 'https://api.shadeform.ai/v1/instances/types'
|
|
19
|
+
DEFAULT_SHADEFORM_API_KEY_PATH = os.path.expanduser('~/.shadeform/api_key')
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def parse_gpu_info(gpu_type: str, num_gpus: int, ram_per_gpu: int) -> Dict:
|
|
23
|
+
"""Parse GPU information for the catalog."""
|
|
24
|
+
|
|
25
|
+
manufacturer = 'NVIDIA'
|
|
26
|
+
if gpu_type == 'MI300X':
|
|
27
|
+
manufacturer = 'AMD'
|
|
28
|
+
elif gpu_type == 'GAUDI2':
|
|
29
|
+
manufacturer = 'Intel'
|
|
30
|
+
|
|
31
|
+
return {
|
|
32
|
+
'Gpus': [{
|
|
33
|
+
'Name': gpu_type,
|
|
34
|
+
'Manufacturer': manufacturer,
|
|
35
|
+
'Count': float(num_gpus),
|
|
36
|
+
'MemoryInfo': {
|
|
37
|
+
'SizeInMiB': ram_per_gpu
|
|
38
|
+
},
|
|
39
|
+
'TotalGpuMemoryInMiB': ram_per_gpu * num_gpus
|
|
40
|
+
}]
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def create_catalog(api_key: str, output_path: str) -> None:
|
|
45
|
+
"""Create Shadeform catalog by fetching from API."""
|
|
46
|
+
headers = {'X-API-KEY': api_key}
|
|
47
|
+
|
|
48
|
+
params = {'available': 'true'}
|
|
49
|
+
|
|
50
|
+
response = requests.get(ENDPOINT,
|
|
51
|
+
headers=headers,
|
|
52
|
+
params=params,
|
|
53
|
+
timeout=30)
|
|
54
|
+
response.raise_for_status()
|
|
55
|
+
|
|
56
|
+
data = response.json()
|
|
57
|
+
instance_types = data.get('instance_types', [])
|
|
58
|
+
|
|
59
|
+
with open(output_path, mode='w', encoding='utf-8') as f:
|
|
60
|
+
writer = csv.writer(f, delimiter=',', quotechar='"')
|
|
61
|
+
writer.writerow([
|
|
62
|
+
'InstanceType', 'AcceleratorName', 'AcceleratorCount', 'vCPUs',
|
|
63
|
+
'MemoryGiB', 'Price', 'Region', 'GpuInfo', 'SpotPrice'
|
|
64
|
+
])
|
|
65
|
+
|
|
66
|
+
for instance in instance_types:
|
|
67
|
+
config = instance['configuration']
|
|
68
|
+
|
|
69
|
+
cloud = instance['cloud']
|
|
70
|
+
shade_instance_type = instance['shade_instance_type']
|
|
71
|
+
instance_type = f'{cloud}_{shade_instance_type.replace("_", "-")}'
|
|
72
|
+
gpu_type = config['gpu_type'].replace('_', '-')
|
|
73
|
+
gpu_count = float(config['num_gpus'])
|
|
74
|
+
vcpus = float(config['vcpus'])
|
|
75
|
+
memory_gb = int(config['memory_in_gb'])
|
|
76
|
+
|
|
77
|
+
# Append "B" to instance_type and gpu_type if they end with "G"
|
|
78
|
+
if instance_type.endswith('G'):
|
|
79
|
+
instance_type += 'B'
|
|
80
|
+
if gpu_type.endswith('G'):
|
|
81
|
+
gpu_type += 'B'
|
|
82
|
+
|
|
83
|
+
# Replace "Gx" with "GBx" (case sensitive)
|
|
84
|
+
if 'Gx' in instance_type:
|
|
85
|
+
instance_type = instance_type.replace('Gx', 'GBx')
|
|
86
|
+
|
|
87
|
+
# Price is in cents per hour, convert to dollars
|
|
88
|
+
price = float(instance['hourly_price']) / 100
|
|
89
|
+
|
|
90
|
+
# Create GPU info
|
|
91
|
+
gpuinfo = None
|
|
92
|
+
if gpu_count > 0:
|
|
93
|
+
gpuinfo_dict = parse_gpu_info(gpu_type, int(gpu_count),
|
|
94
|
+
int(config['vram_per_gpu_in_gb']))
|
|
95
|
+
gpuinfo = json.dumps(gpuinfo_dict).replace('"', '\'')
|
|
96
|
+
|
|
97
|
+
# Write entry for each available region
|
|
98
|
+
for availability in instance.get('availability', []):
|
|
99
|
+
if availability['available'] and gpu_count > 0:
|
|
100
|
+
region = availability['region']
|
|
101
|
+
writer.writerow([
|
|
102
|
+
instance_type,
|
|
103
|
+
gpu_type,
|
|
104
|
+
gpu_count,
|
|
105
|
+
vcpus,
|
|
106
|
+
memory_gb,
|
|
107
|
+
price,
|
|
108
|
+
region,
|
|
109
|
+
gpuinfo,
|
|
110
|
+
'' # No spot pricing info available
|
|
111
|
+
])
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def get_api_key(cmdline_args: argparse.Namespace) -> str:
|
|
115
|
+
"""Get Shadeform API key from cmdline or default path."""
|
|
116
|
+
api_key = cmdline_args.api_key
|
|
117
|
+
if api_key is None:
|
|
118
|
+
if cmdline_args.api_key_path is not None:
|
|
119
|
+
with open(cmdline_args.api_key_path, mode='r',
|
|
120
|
+
encoding='utf-8') as f:
|
|
121
|
+
api_key = f.read().strip()
|
|
122
|
+
else:
|
|
123
|
+
# Read from ~/.shadeform/api_key
|
|
124
|
+
with open(DEFAULT_SHADEFORM_API_KEY_PATH,
|
|
125
|
+
mode='r',
|
|
126
|
+
encoding='utf-8') as f:
|
|
127
|
+
api_key = f.read().strip()
|
|
128
|
+
assert api_key is not None, (
|
|
129
|
+
f'API key not found. Please provide via --api-key or place in '
|
|
130
|
+
f'{DEFAULT_SHADEFORM_API_KEY_PATH}')
|
|
131
|
+
return api_key
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
if __name__ == '__main__':
|
|
135
|
+
parser = argparse.ArgumentParser()
|
|
136
|
+
parser.add_argument('--api-key', help='Shadeform API key.')
|
|
137
|
+
parser.add_argument('--api-key-path',
|
|
138
|
+
help='path of file containing Shadeform API key.')
|
|
139
|
+
args = parser.parse_args()
|
|
140
|
+
os.makedirs('shadeform', exist_ok=True)
|
|
141
|
+
create_catalog(get_api_key(args), 'shadeform/vms.csv')
|
|
142
|
+
print('Shadeform catalog saved to shadeform/vms.csv')
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
""" Shadeform | Catalog
|
|
2
|
+
|
|
3
|
+
This module loads pricing and instance information from the Shadeform API
|
|
4
|
+
and can be used to query instance types and pricing information for Shadeform.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import typing
|
|
8
|
+
from typing import Dict, List, Optional, Tuple, Union
|
|
9
|
+
|
|
10
|
+
import pandas as pd
|
|
11
|
+
|
|
12
|
+
from sky.catalog import common
|
|
13
|
+
|
|
14
|
+
if typing.TYPE_CHECKING:
|
|
15
|
+
from sky.clouds import cloud
|
|
16
|
+
|
|
17
|
+
# We'll use dynamic fetching, so no static CSV file to load
|
|
18
|
+
_df = None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _get_df():
|
|
22
|
+
"""Get the dataframe, fetching from API if needed."""
|
|
23
|
+
global _df
|
|
24
|
+
if _df is None:
|
|
25
|
+
# For now, we'll fall back to a minimal static catalog
|
|
26
|
+
# In a full implementation, this would call the Shadeform API
|
|
27
|
+
# to dynamically fetch the latest instance types and pricing
|
|
28
|
+
try:
|
|
29
|
+
df = common.read_catalog('shadeform/vms.csv')
|
|
30
|
+
except FileNotFoundError:
|
|
31
|
+
# If no static catalog exists, create an empty one
|
|
32
|
+
# This would be replaced with dynamic API fetching
|
|
33
|
+
_df = pd.DataFrame(columns=[
|
|
34
|
+
'InstanceType', 'AcceleratorName', 'AcceleratorCount', 'vCPUs',
|
|
35
|
+
'MemoryGiB', 'Price', 'Region', 'GpuInfo', 'SpotPrice'
|
|
36
|
+
])
|
|
37
|
+
else:
|
|
38
|
+
df = df[df['InstanceType'].notna()]
|
|
39
|
+
if 'AcceleratorName' in df.columns:
|
|
40
|
+
df = df[df['AcceleratorName'].notna()]
|
|
41
|
+
df = df.assign(AcceleratorName=df['AcceleratorName'].astype(
|
|
42
|
+
str).str.strip())
|
|
43
|
+
_df = df.reset_index(drop=True)
|
|
44
|
+
return _df
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _is_not_found_error(err: ValueError) -> bool:
|
|
48
|
+
msg = str(err).lower()
|
|
49
|
+
return 'not found' in msg or 'not supported' in msg
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _call_or_default(func, default):
|
|
53
|
+
try:
|
|
54
|
+
return func()
|
|
55
|
+
except ValueError as err:
|
|
56
|
+
if _is_not_found_error(err):
|
|
57
|
+
return default
|
|
58
|
+
raise
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def instance_type_exists(instance_type: str) -> bool:
|
|
62
|
+
"""Check if an instance type exists."""
|
|
63
|
+
return common.instance_type_exists_impl(_get_df(), instance_type)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def validate_region_zone(
|
|
67
|
+
region: Optional[str],
|
|
68
|
+
zone: Optional[str]) -> Tuple[Optional[str], Optional[str]]:
|
|
69
|
+
"""Validate region and zone for Shadeform."""
|
|
70
|
+
return common.validate_region_zone_impl('shadeform', _get_df(), region,
|
|
71
|
+
zone)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def get_hourly_cost(instance_type: str,
|
|
75
|
+
use_spot: bool = False,
|
|
76
|
+
region: Optional[str] = None,
|
|
77
|
+
zone: Optional[str] = None) -> float:
|
|
78
|
+
"""Returns the cost, or the cheapest cost among all zones for spot."""
|
|
79
|
+
# Shadeform doesn't support spot instances currently
|
|
80
|
+
if use_spot:
|
|
81
|
+
raise ValueError('Spot instances are not supported on Shadeform')
|
|
82
|
+
|
|
83
|
+
return common.get_hourly_cost_impl(_get_df(), instance_type, use_spot,
|
|
84
|
+
region, zone)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def get_vcpus_mem_from_instance_type(
|
|
88
|
+
instance_type: str) -> Tuple[Optional[float], Optional[float]]:
|
|
89
|
+
"""Get vCPUs and memory from instance type."""
|
|
90
|
+
return _call_or_default(
|
|
91
|
+
lambda: common.get_vcpus_mem_from_instance_type_impl(
|
|
92
|
+
_get_df(), instance_type), (None, None))
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def get_default_instance_type(cpus: Optional[str] = None,
|
|
96
|
+
memory: Optional[str] = None,
|
|
97
|
+
disk_tier: Optional[str] = None,
|
|
98
|
+
region: Optional[str] = None,
|
|
99
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
100
|
+
"""Get default instance type based on requirements."""
|
|
101
|
+
del disk_tier # Shadeform doesn't support custom disk tiers yet
|
|
102
|
+
return _call_or_default(
|
|
103
|
+
lambda: common.get_instance_type_for_cpus_mem_impl(
|
|
104
|
+
_get_df(), cpus, memory, region, zone), None)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def get_accelerators_from_instance_type(
|
|
108
|
+
instance_type: str) -> Optional[Dict[str, Union[int, float]]]:
|
|
109
|
+
"""Get accelerator information from instance type."""
|
|
110
|
+
return _call_or_default(
|
|
111
|
+
lambda: common.get_accelerators_from_instance_type_impl(
|
|
112
|
+
_get_df(), instance_type), None)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def get_instance_type_for_accelerator(
|
|
116
|
+
acc_name: str,
|
|
117
|
+
acc_count: int,
|
|
118
|
+
cpus: Optional[str] = None,
|
|
119
|
+
memory: Optional[str] = None,
|
|
120
|
+
use_spot: bool = False,
|
|
121
|
+
region: Optional[str] = None,
|
|
122
|
+
zone: Optional[str] = None) -> Tuple[Optional[List[str]], List[str]]:
|
|
123
|
+
"""Returns a list of instance types that have the given accelerator."""
|
|
124
|
+
if use_spot:
|
|
125
|
+
# Return empty lists since spot is not supported
|
|
126
|
+
return None, ['Spot instances are not supported on Shadeform']
|
|
127
|
+
|
|
128
|
+
return _call_or_default(
|
|
129
|
+
lambda: common.get_instance_type_for_accelerator_impl(
|
|
130
|
+
df=_get_df(),
|
|
131
|
+
acc_name=acc_name,
|
|
132
|
+
acc_count=acc_count,
|
|
133
|
+
cpus=cpus,
|
|
134
|
+
memory=memory,
|
|
135
|
+
use_spot=use_spot,
|
|
136
|
+
region=region,
|
|
137
|
+
zone=zone), (None, []))
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def get_region_zones_for_instance_type(instance_type: str,
|
|
141
|
+
use_spot: bool) -> List['cloud.Region']:
|
|
142
|
+
"""Get regions and zones for an instance type."""
|
|
143
|
+
if use_spot:
|
|
144
|
+
return [] # No spot support
|
|
145
|
+
|
|
146
|
+
df = _get_df()
|
|
147
|
+
df_filtered = df[df['InstanceType'] == instance_type]
|
|
148
|
+
return _call_or_default(
|
|
149
|
+
lambda: common.get_region_zones(df_filtered, use_spot), [])
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def list_accelerators(
|
|
153
|
+
gpus_only: bool,
|
|
154
|
+
name_filter: Optional[str],
|
|
155
|
+
region_filter: Optional[str],
|
|
156
|
+
quantity_filter: Optional[int],
|
|
157
|
+
case_sensitive: bool = True,
|
|
158
|
+
all_regions: bool = False,
|
|
159
|
+
require_price: bool = True) -> Dict[str, List[common.InstanceTypeInfo]]:
|
|
160
|
+
"""Returns all instance types in Shadeform offering GPUs."""
|
|
161
|
+
del require_price # Unused.
|
|
162
|
+
return common.list_accelerators_impl('Shadeform', _get_df(), gpus_only,
|
|
163
|
+
name_filter, region_filter,
|
|
164
|
+
quantity_filter, case_sensitive,
|
|
165
|
+
all_regions)
|
sky/client/cli/command.py
CHANGED
|
@@ -111,6 +111,13 @@ an autogenerated name."""
|
|
|
111
111
|
# command.
|
|
112
112
|
_NUM_MANAGED_JOBS_TO_SHOW_IN_STATUS = 5
|
|
113
113
|
_NUM_MANAGED_JOBS_TO_SHOW = 50
|
|
114
|
+
_NUM_REQUESTS_TO_SHOW = 50
|
|
115
|
+
_DEFAULT_REQUEST_FIELDS_TO_SHOW = [
|
|
116
|
+
'request_id', 'name', 'user_id', 'status', 'created_at'
|
|
117
|
+
]
|
|
118
|
+
_VERBOSE_REQUEST_FIELDS_TO_SHOW = _DEFAULT_REQUEST_FIELDS_TO_SHOW + [
|
|
119
|
+
'cluster_name'
|
|
120
|
+
]
|
|
114
121
|
|
|
115
122
|
_STATUS_PROPERTY_CLUSTER_NUM_ERROR_MESSAGE = (
|
|
116
123
|
'{cluster_num} cluster{plural} {verb}. Please specify {cause} '
|
|
@@ -6155,6 +6162,25 @@ def api_cancel(request_ids: Optional[List[str]], all: bool, all_users: bool):
|
|
|
6155
6162
|
fg='green')
|
|
6156
6163
|
|
|
6157
6164
|
|
|
6165
|
+
class IntOrNone(click.ParamType):
|
|
6166
|
+
"""Int or None"""
|
|
6167
|
+
name = 'int-or-none'
|
|
6168
|
+
|
|
6169
|
+
def convert(self, value, param, ctx):
|
|
6170
|
+
if isinstance(value, int):
|
|
6171
|
+
return value
|
|
6172
|
+
if isinstance(value, str) and value.lower() in ('none', 'all'):
|
|
6173
|
+
return None
|
|
6174
|
+
try:
|
|
6175
|
+
return int(value)
|
|
6176
|
+
except ValueError:
|
|
6177
|
+
self.fail(f'{value!r} is not a valid integer or "none" or "all"',
|
|
6178
|
+
param, ctx)
|
|
6179
|
+
|
|
6180
|
+
|
|
6181
|
+
INT_OR_NONE = IntOrNone()
|
|
6182
|
+
|
|
6183
|
+
|
|
6158
6184
|
@api.command('status', cls=_DocumentedCodeCommand)
|
|
6159
6185
|
@flags.config_option(expose_value=False)
|
|
6160
6186
|
@click.argument('request_ids',
|
|
@@ -6168,15 +6194,26 @@ def api_cancel(request_ids: Optional[List[str]], all: bool, all_users: bool):
|
|
|
6168
6194
|
default=False,
|
|
6169
6195
|
required=False,
|
|
6170
6196
|
help='Show requests of all statuses.')
|
|
6197
|
+
@click.option(
|
|
6198
|
+
'--limit',
|
|
6199
|
+
'-l',
|
|
6200
|
+
default=_NUM_REQUESTS_TO_SHOW,
|
|
6201
|
+
type=INT_OR_NONE,
|
|
6202
|
+
required=False,
|
|
6203
|
+
help=(f'Number of requests to show, default is {_NUM_REQUESTS_TO_SHOW},'
|
|
6204
|
+
f' set to "none" or "all" to show all requests.'))
|
|
6171
6205
|
@flags.verbose_option('Show more details.')
|
|
6172
6206
|
@usage_lib.entrypoint
|
|
6173
6207
|
# pylint: disable=redefined-builtin
|
|
6174
6208
|
def api_status(request_ids: Optional[List[str]], all_status: bool,
|
|
6175
|
-
verbose: bool):
|
|
6209
|
+
verbose: bool, limit: Optional[int]):
|
|
6176
6210
|
"""List requests on SkyPilot API server."""
|
|
6177
6211
|
if not request_ids:
|
|
6178
6212
|
request_ids = None
|
|
6179
|
-
|
|
6213
|
+
fields = _DEFAULT_REQUEST_FIELDS_TO_SHOW
|
|
6214
|
+
if verbose:
|
|
6215
|
+
fields = _VERBOSE_REQUEST_FIELDS_TO_SHOW
|
|
6216
|
+
request_list = sdk.api_status(request_ids, all_status, limit, fields)
|
|
6180
6217
|
columns = ['ID', 'User', 'Name']
|
|
6181
6218
|
if verbose:
|
|
6182
6219
|
columns.append('Cluster')
|
|
@@ -6202,8 +6239,12 @@ def api_status(request_ids: Optional[List[str]], all_status: bool,
|
|
|
6202
6239
|
if verbose:
|
|
6203
6240
|
dummy_row.append('-')
|
|
6204
6241
|
table.add_row(dummy_row)
|
|
6205
|
-
click.echo()
|
|
6206
6242
|
click.echo(table)
|
|
6243
|
+
if limit and len(request_list) >= limit:
|
|
6244
|
+
click.echo()
|
|
6245
|
+
click.echo(
|
|
6246
|
+
f'Showing {limit} requests. Use "-l none" or "-l all" to show'
|
|
6247
|
+
f' all requests.')
|
|
6207
6248
|
|
|
6208
6249
|
|
|
6209
6250
|
@api.command('login', cls=_DocumentedCodeCommand)
|
sky/client/sdk.py
CHANGED
|
@@ -2158,7 +2158,9 @@ def _local_api_server_running(kill: bool = False) -> bool:
|
|
|
2158
2158
|
def api_status(
|
|
2159
2159
|
request_ids: Optional[List[Union[server_common.RequestId[T], str]]] = None,
|
|
2160
2160
|
# pylint: disable=redefined-builtin
|
|
2161
|
-
all_status: bool = False
|
|
2161
|
+
all_status: bool = False,
|
|
2162
|
+
limit: Optional[int] = None,
|
|
2163
|
+
fields: Optional[List[str]] = None,
|
|
2162
2164
|
) -> List[payloads.RequestPayload]:
|
|
2163
2165
|
"""Lists all requests.
|
|
2164
2166
|
|
|
@@ -2167,6 +2169,8 @@ def api_status(
|
|
|
2167
2169
|
If None, all requests are queried.
|
|
2168
2170
|
all_status: Whether to list all finished requests as well. This argument
|
|
2169
2171
|
is ignored if request_ids is not None.
|
|
2172
|
+
limit: The number of requests to show. If None, show all requests.
|
|
2173
|
+
fields: The fields to get. If None, get all fields.
|
|
2170
2174
|
|
|
2171
2175
|
Returns:
|
|
2172
2176
|
A list of request payloads.
|
|
@@ -2175,8 +2179,12 @@ def api_status(
|
|
|
2175
2179
|
logger.info('SkyPilot API server is not running.')
|
|
2176
2180
|
return []
|
|
2177
2181
|
|
|
2178
|
-
body = payloads.RequestStatusBody(
|
|
2179
|
-
|
|
2182
|
+
body = payloads.RequestStatusBody(
|
|
2183
|
+
request_ids=request_ids,
|
|
2184
|
+
all_status=all_status,
|
|
2185
|
+
limit=limit,
|
|
2186
|
+
fields=fields,
|
|
2187
|
+
)
|
|
2180
2188
|
response = server_common.make_authenticated_request(
|
|
2181
2189
|
'GET',
|
|
2182
2190
|
'/api/status',
|
sky/clouds/__init__.py
CHANGED
|
@@ -30,6 +30,7 @@ from sky.clouds.primeintellect import PrimeIntellect
|
|
|
30
30
|
from sky.clouds.runpod import RunPod
|
|
31
31
|
from sky.clouds.scp import SCP
|
|
32
32
|
from sky.clouds.seeweb import Seeweb
|
|
33
|
+
from sky.clouds.shadeform import Shadeform
|
|
33
34
|
from sky.clouds.ssh import SSH
|
|
34
35
|
from sky.clouds.vast import Vast
|
|
35
36
|
from sky.clouds.vsphere import Vsphere
|
|
@@ -48,6 +49,7 @@ __all__ = [
|
|
|
48
49
|
'PrimeIntellect',
|
|
49
50
|
'SCP',
|
|
50
51
|
'RunPod',
|
|
52
|
+
'Shadeform',
|
|
51
53
|
'Vast',
|
|
52
54
|
'OCI',
|
|
53
55
|
'Vsphere',
|