skypilot-nightly 1.0.0.dev20250912__py3-none-any.whl → 1.0.0.dev20250914__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +4 -2
- sky/adaptors/seeweb.py +103 -0
- sky/authentication.py +38 -0
- sky/backends/backend_utils.py +24 -9
- sky/backends/cloud_vm_ray_backend.py +382 -151
- sky/catalog/data_fetchers/fetch_aws.py +0 -36
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/seeweb_catalog.py +184 -0
- sky/clouds/__init__.py +2 -0
- sky/clouds/kubernetes.py +2 -0
- sky/clouds/seeweb.py +463 -0
- sky/core.py +46 -12
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/{DAiq7V2xJnO1LSfmunZl6 → 5iak5kYp9a9ezANCb74L8}/_buildManifest.js +1 -1
- sky/dashboard/out/_next/static/chunks/1141-159df2d4c441a9d1.js +1 -0
- sky/dashboard/out/_next/static/chunks/3015-2ea98b57e318bd6e.js +1 -0
- sky/dashboard/out/_next/static/chunks/3294.03e02ae73455f48e.js +6 -0
- sky/dashboard/out/_next/static/chunks/3785.0fa442e16dd3f00e.js +1 -0
- sky/dashboard/out/_next/static/chunks/5339.c033b29835da0f35.js +51 -0
- sky/dashboard/out/_next/static/chunks/6856-e0754534b3015377.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-11c8e9b982e8ffec.js +1 -0
- sky/dashboard/out/_next/static/chunks/9037-f9800e64eb05dd1c.js +6 -0
- sky/dashboard/out/_next/static/chunks/{webpack-e8a0c4c3c6f408fb.js → webpack-e2e3d2d3de7d43e5.js} +1 -1
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/exceptions.py +5 -0
- sky/global_user_state.py +41 -26
- sky/jobs/utils.py +61 -13
- sky/provision/__init__.py +1 -0
- sky/provision/kubernetes/utils.py +14 -3
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +806 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +252 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/setup_files/dependencies.py +8 -1
- sky/skylet/constants.py +2 -1
- sky/skylet/job_lib.py +128 -10
- sky/skylet/log_lib.py +3 -3
- sky/skylet/services.py +203 -0
- sky/skylet/skylet.py +4 -0
- sky/templates/seeweb-ray.yml.j2 +108 -0
- sky/utils/cluster_utils.py +6 -2
- sky/utils/controller_utils.py +11 -5
- {skypilot_nightly-1.0.0.dev20250912.dist-info → skypilot_nightly-1.0.0.dev20250914.dist-info}/METADATA +39 -34
- {skypilot_nightly-1.0.0.dev20250912.dist-info → skypilot_nightly-1.0.0.dev20250914.dist-info}/RECORD +65 -54
- sky/dashboard/out/_next/static/chunks/1141-943efc7aff0f0c06.js +0 -1
- sky/dashboard/out/_next/static/chunks/3015-86cabed5d4669ad0.js +0 -1
- sky/dashboard/out/_next/static/chunks/3294.ba6586f9755b0edb.js +0 -6
- sky/dashboard/out/_next/static/chunks/3785.4872a2f3aa489880.js +0 -1
- sky/dashboard/out/_next/static/chunks/5339.3fda4a4010ff4e06.js +0 -51
- sky/dashboard/out/_next/static/chunks/6856-6e2bc8a6fd0867af.js +0 -1
- sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +0 -1
- sky/dashboard/out/_next/static/chunks/9037-fa1737818d0a0969.js +0 -6
- /sky/dashboard/out/_next/static/{DAiq7V2xJnO1LSfmunZl6 → 5iak5kYp9a9ezANCb74L8}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250912.dist-info → skypilot_nightly-1.0.0.dev20250914.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250912.dist-info → skypilot_nightly-1.0.0.dev20250914.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250912.dist-info → skypilot_nightly-1.0.0.dev20250914.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250912.dist-info → skypilot_nightly-1.0.0.dev20250914.dist-info}/top_level.txt +0 -0
|
@@ -74,10 +74,6 @@ USEFUL_COLUMNS = [
|
|
|
74
74
|
# only available in this region, but it serves pricing information for all
|
|
75
75
|
# regions.
|
|
76
76
|
PRICING_TABLE_URL_FMT = 'https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonEC2/current/{region}/index.csv' # pylint: disable=line-too-long
|
|
77
|
-
# Hardcode the regions that offer p4de.24xlarge as our credential does not have
|
|
78
|
-
# the permission to query the offerings of the instance.
|
|
79
|
-
# Ref: https://aws.amazon.com/ec2/instance-types/p4/
|
|
80
|
-
P4DE_REGIONS = ['us-east-1', 'us-west-2']
|
|
81
77
|
# g6f instances have fractional GPUs, but the API returns Count: 1 under
|
|
82
78
|
# GpuInfo. However, the GPU memory is properly scaled. Taking the instance GPU
|
|
83
79
|
# divided by the total memory of an L4 will give us the fraction of the GPU.
|
|
@@ -214,35 +210,6 @@ def _get_spot_pricing_table(region: str) -> 'pd.DataFrame':
|
|
|
214
210
|
return df
|
|
215
211
|
|
|
216
212
|
|
|
217
|
-
def _patch_p4de(region: str, df: 'pd.DataFrame',
|
|
218
|
-
pricing_df: 'pd.DataFrame') -> 'pd.DataFrame':
|
|
219
|
-
# Hardcoded patch for p4de.24xlarge, as our credentials doesn't have access
|
|
220
|
-
# to the instance type.
|
|
221
|
-
# Columns:
|
|
222
|
-
# InstanceType,AcceleratorName,AcceleratorCount,vCPUs,MemoryGiB,GpuInfo,
|
|
223
|
-
# Price,SpotPrice,Region,AvailabilityZone
|
|
224
|
-
records = []
|
|
225
|
-
for zone in df[df['Region'] == region]['AvailabilityZone'].unique():
|
|
226
|
-
records.append({
|
|
227
|
-
'InstanceType': 'p4de.24xlarge',
|
|
228
|
-
'AcceleratorName': 'A100-80GB',
|
|
229
|
-
'AcceleratorCount': 8,
|
|
230
|
-
'vCPUs': 96,
|
|
231
|
-
'MemoryGiB': 1152,
|
|
232
|
-
'GpuInfo':
|
|
233
|
-
('{\'Gpus\': [{\'Name\': \'A100-80GB\', \'Manufacturer\': '
|
|
234
|
-
'\'NVIDIA\', \'Count\': 8, \'MemoryInfo\': {\'SizeInMiB\': '
|
|
235
|
-
'81920}}], \'TotalGpuMemoryInMiB\': 655360}'),
|
|
236
|
-
'AvailabilityZone': zone,
|
|
237
|
-
'Region': region,
|
|
238
|
-
'Price': pricing_df[pricing_df['InstanceType'] == 'p4de.24xlarge']
|
|
239
|
-
['Price'].values[0],
|
|
240
|
-
'SpotPrice': np.nan,
|
|
241
|
-
})
|
|
242
|
-
df = pd.concat([df, pd.DataFrame.from_records(records)])
|
|
243
|
-
return df
|
|
244
|
-
|
|
245
|
-
|
|
246
213
|
def _get_instance_types_df(region: str) -> Union[str, 'pd.DataFrame']:
|
|
247
214
|
try:
|
|
248
215
|
# Fetch the zone info first to make sure the account has access to the
|
|
@@ -367,9 +334,6 @@ def _get_instance_types_df(region: str) -> Union[str, 'pd.DataFrame']:
|
|
|
367
334
|
df = pd.concat(
|
|
368
335
|
[df, df.apply(get_additional_columns, axis='columns')],
|
|
369
336
|
axis='columns')
|
|
370
|
-
# patch the df for p4de.24xlarge
|
|
371
|
-
if region in P4DE_REGIONS:
|
|
372
|
-
df = _patch_p4de(region, df, pricing_df)
|
|
373
337
|
if 'GpuInfo' not in df.columns:
|
|
374
338
|
df['GpuInfo'] = np.nan
|
|
375
339
|
df = df[USEFUL_COLUMNS]
|
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
"""A script that generates the Seeweb catalog.
|
|
2
|
+
|
|
3
|
+
Usage:
|
|
4
|
+
python fetch_seeweb.py [-h] [--api-key API_KEY]
|
|
5
|
+
[--api-key-path API_KEY_PATH]
|
|
6
|
+
|
|
7
|
+
If neither --api-key nor --api-key-path are provided, this script will parse
|
|
8
|
+
`~/.seeweb_cloud/seeweb_keys` to look for Seeweb API key.
|
|
9
|
+
"""
|
|
10
|
+
import argparse
|
|
11
|
+
import configparser
|
|
12
|
+
import csv
|
|
13
|
+
import json
|
|
14
|
+
import os
|
|
15
|
+
from typing import Any, Dict, List, Optional
|
|
16
|
+
|
|
17
|
+
from sky.adaptors.seeweb import ecsapi
|
|
18
|
+
|
|
19
|
+
# GPU name mapping from Seeweb to SkyPilot canonical names
|
|
20
|
+
SEEWEB_GPU_NAME_TO_SKYPILOT_GPU_NAME = {
|
|
21
|
+
'H200 141GB': 'H200',
|
|
22
|
+
'RTX A6000 48GB': 'RTXA6000',
|
|
23
|
+
'A100 80GB': 'A100',
|
|
24
|
+
'L4 24GB': 'L4',
|
|
25
|
+
'L40s 48GB': 'L40s',
|
|
26
|
+
'H100 80GB': 'H100',
|
|
27
|
+
'MI300X': 'MI300X',
|
|
28
|
+
'A30': 'A30',
|
|
29
|
+
'RTX 6000 24GB': 'RTX6000',
|
|
30
|
+
'Tenstorrent Grayskull e75': 'GRAYSKULL-E75',
|
|
31
|
+
'Tenstorrent Grayskull e150': 'GRAYSKULL-E150',
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
# GPU VRAM mapping in MB
|
|
35
|
+
VRAM = {
|
|
36
|
+
'RTXA6000': 48384, # 48GB
|
|
37
|
+
'H200': 144384, # 141GB
|
|
38
|
+
'A100': 81920, # 80GB
|
|
39
|
+
'L4': 24576, # 24GB
|
|
40
|
+
'L40s': 49152, # 48GB
|
|
41
|
+
'H100': 81920, # 80GB
|
|
42
|
+
'MI300X': 192000, # 192GB
|
|
43
|
+
'A30': 24576, # 24GB
|
|
44
|
+
'RTX6000': 24576, # 24GB
|
|
45
|
+
'GRAYSKULL-E75': 8192, # 8GB
|
|
46
|
+
'GRAYSKULL-E150': 8192, # 8GB
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def is_tenstorrent_gpu_name(gpu_name: Optional[str]) -> bool:
|
|
51
|
+
"""Return True if the given GPU name refers to a Tenstorrent GPU.
|
|
52
|
+
|
|
53
|
+
Detects by common identifiers present in normalized names (e.g., GRAYSKULL)
|
|
54
|
+
or by the vendor name directly.
|
|
55
|
+
"""
|
|
56
|
+
if not gpu_name:
|
|
57
|
+
return False
|
|
58
|
+
upper = str(gpu_name).upper()
|
|
59
|
+
return 'TENSTORRENT' in upper or 'GRAYSKULL' in upper
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def is_mi300x_gpu_name(gpu_name: Optional[str]) -> bool:
|
|
63
|
+
"""Return True if the given GPU name refers to AMD MI300X."""
|
|
64
|
+
if not gpu_name:
|
|
65
|
+
return False
|
|
66
|
+
return 'MI300X' in str(gpu_name).upper()
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def get_api_key(path: Optional[str] = None) -> str:
|
|
70
|
+
"""Get API key from config file or environment variable."""
|
|
71
|
+
# Step 1: Try to get from config file
|
|
72
|
+
if path is None:
|
|
73
|
+
path = os.path.expanduser('~/.seeweb_cloud/seeweb_keys')
|
|
74
|
+
else:
|
|
75
|
+
path = os.path.expanduser(path)
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
parser = configparser.ConfigParser()
|
|
79
|
+
parser.read(path)
|
|
80
|
+
return parser['DEFAULT']['api_key'].strip()
|
|
81
|
+
except (KeyError, FileNotFoundError) as exc:
|
|
82
|
+
# Step 2: Try environment variable
|
|
83
|
+
api_key = os.environ.get('SEEWEB_API_KEY')
|
|
84
|
+
if api_key:
|
|
85
|
+
return api_key.strip()
|
|
86
|
+
|
|
87
|
+
# If neither found, raise error
|
|
88
|
+
raise ValueError(
|
|
89
|
+
f'API key not found in {path} or ENV variable SEEWEB_API_KEY'
|
|
90
|
+
) from exc
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def normalize_gpu_name(gpu_name: str) -> str:
|
|
94
|
+
"""Normalize GPU name from Seeweb API to SkyPilot canonical name."""
|
|
95
|
+
if not gpu_name:
|
|
96
|
+
return ''
|
|
97
|
+
|
|
98
|
+
# Map to canonical name if available
|
|
99
|
+
canonical_name = SEEWEB_GPU_NAME_TO_SKYPILOT_GPU_NAME.get(gpu_name)
|
|
100
|
+
if canonical_name:
|
|
101
|
+
return canonical_name
|
|
102
|
+
|
|
103
|
+
# If not found in mapping, return original name
|
|
104
|
+
print(f'Warning: GPU name "{gpu_name}" not found in mapping,'
|
|
105
|
+
f'using original name')
|
|
106
|
+
return gpu_name
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def parse_plan_info(plan: Any) -> Dict[str, Any]:
|
|
110
|
+
"""Parse plan information from Seeweb API response."""
|
|
111
|
+
# Handle both dictionary and object formats
|
|
112
|
+
if hasattr(plan, 'name'):
|
|
113
|
+
# Object format from API
|
|
114
|
+
plan_name = getattr(plan, 'name', 'unknown')
|
|
115
|
+
vcpus = int(getattr(plan, 'cpu', 0))
|
|
116
|
+
|
|
117
|
+
# Handle memory conversion safely
|
|
118
|
+
memory_mb = getattr(plan, 'ram', 0)
|
|
119
|
+
try:
|
|
120
|
+
memory_gb = int(
|
|
121
|
+
memory_mb) / 1024 if memory_mb else 0 # Convert to GB
|
|
122
|
+
except (ValueError, TypeError):
|
|
123
|
+
memory_gb = 0
|
|
124
|
+
|
|
125
|
+
# Handle price safely
|
|
126
|
+
try:
|
|
127
|
+
price = float(getattr(plan, 'hourly_price', 0.0))
|
|
128
|
+
except (ValueError, TypeError):
|
|
129
|
+
price = 0.0
|
|
130
|
+
|
|
131
|
+
# Handle GPU info
|
|
132
|
+
try:
|
|
133
|
+
gpu_count = int(getattr(plan, 'gpu', 0))
|
|
134
|
+
except (ValueError, TypeError):
|
|
135
|
+
gpu_count = 0
|
|
136
|
+
|
|
137
|
+
gpu_label = getattr(plan, 'gpu_label', None)
|
|
138
|
+
|
|
139
|
+
# Determine GPU name - use gpu_label if available,
|
|
140
|
+
# otherwise try to infer from plan name
|
|
141
|
+
if gpu_label:
|
|
142
|
+
gpu_name = normalize_gpu_name(gpu_label) # Normalize the GPU name
|
|
143
|
+
else:
|
|
144
|
+
# Try to extract GPU name from plan name
|
|
145
|
+
plan_name = getattr(plan, 'name', '')
|
|
146
|
+
if 'GPU' in plan_name:
|
|
147
|
+
# Extract GPU type from plan name (e.g., ECS1GPU11 -> GPU11)
|
|
148
|
+
parts = plan_name.split('GPU')
|
|
149
|
+
if len(parts) > 1:
|
|
150
|
+
gpu_name = 'GPU' + parts[1]
|
|
151
|
+
else:
|
|
152
|
+
gpu_name = 'GPU'
|
|
153
|
+
else:
|
|
154
|
+
gpu_name = None
|
|
155
|
+
|
|
156
|
+
# Get GPU VRAM from mapping using the normalized name
|
|
157
|
+
gpu_vram_mb = VRAM.get(gpu_name, 0) if gpu_name else 0
|
|
158
|
+
else:
|
|
159
|
+
raise ValueError(f'Unsupported plan format: {type(plan)}')
|
|
160
|
+
|
|
161
|
+
return {
|
|
162
|
+
'plan_name': plan_name,
|
|
163
|
+
'vcpus': vcpus,
|
|
164
|
+
'memory_gb': memory_gb,
|
|
165
|
+
'gpu_name': gpu_name,
|
|
166
|
+
'gpu_count': gpu_count,
|
|
167
|
+
'gpu_vram_mb': gpu_vram_mb,
|
|
168
|
+
'price': price,
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def get_gpu_info(gpu_count: int, gpu_name: str, gpu_vram_mb: int = 0) -> str:
|
|
173
|
+
"""Generate GPU info JSON string compatible with SkyPilot."""
|
|
174
|
+
if not gpu_name or gpu_count == 0:
|
|
175
|
+
return ''
|
|
176
|
+
|
|
177
|
+
# Determine manufacturer based on GPU name
|
|
178
|
+
gpu_name_upper = str(gpu_name).upper()
|
|
179
|
+
if 'MI300' in gpu_name_upper or gpu_name_upper == 'MI300X':
|
|
180
|
+
manufacturer = 'AMD'
|
|
181
|
+
elif 'GRAYSKULL' in gpu_name_upper:
|
|
182
|
+
manufacturer = 'TENSTORRENT'
|
|
183
|
+
else:
|
|
184
|
+
manufacturer = 'NVIDIA'
|
|
185
|
+
|
|
186
|
+
gpu_info = {
|
|
187
|
+
'Gpus': [{
|
|
188
|
+
'Name': gpu_name,
|
|
189
|
+
'Manufacturer': manufacturer,
|
|
190
|
+
'Count': float(gpu_count),
|
|
191
|
+
'MemoryInfo': {
|
|
192
|
+
'SizeInMiB': gpu_vram_mb
|
|
193
|
+
},
|
|
194
|
+
}],
|
|
195
|
+
'TotalGpuMemoryInMiB': gpu_vram_mb * gpu_count if gpu_vram_mb else 0
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
return json.dumps(gpu_info).replace('"', '\'')
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def fetch_seeweb_data(api_key: str) -> List[Dict]:
|
|
202
|
+
"""Fetch data from Seeweb API."""
|
|
203
|
+
if ecsapi is None:
|
|
204
|
+
raise ImportError('ecsapi not available')
|
|
205
|
+
|
|
206
|
+
try:
|
|
207
|
+
client = ecsapi.Api(token=api_key)
|
|
208
|
+
|
|
209
|
+
print('Fetching plans from Seeweb API...')
|
|
210
|
+
api_plans = client.fetch_plans()
|
|
211
|
+
|
|
212
|
+
if not api_plans:
|
|
213
|
+
raise ValueError('No plans returned from API')
|
|
214
|
+
|
|
215
|
+
print(f'Successfully fetched {len(api_plans)} plans from API')
|
|
216
|
+
plans = []
|
|
217
|
+
|
|
218
|
+
for plan in api_plans:
|
|
219
|
+
try:
|
|
220
|
+
# Parse first so we can filter
|
|
221
|
+
# Tenstorrent before extra API calls
|
|
222
|
+
parsed = parse_plan_info(plan)
|
|
223
|
+
|
|
224
|
+
if is_tenstorrent_gpu_name(parsed.get('gpu_name')):
|
|
225
|
+
print(f'Skipping Tenstorrent plan {plan.name}')
|
|
226
|
+
continue
|
|
227
|
+
|
|
228
|
+
if is_mi300x_gpu_name(parsed.get('gpu_name')):
|
|
229
|
+
print(f'Skipping MI300X plan {plan.name}')
|
|
230
|
+
continue
|
|
231
|
+
|
|
232
|
+
print(f'Fetching regions available for {plan.name}')
|
|
233
|
+
regions_available = client.fetch_regions_available(plan.name)
|
|
234
|
+
|
|
235
|
+
parsed.update({'regions_available': regions_available})
|
|
236
|
+
plans.append(parsed)
|
|
237
|
+
except Exception as e: # pylint: disable=broad-except
|
|
238
|
+
print(f'Error parsing plan {plan.name}: {e}')
|
|
239
|
+
continue
|
|
240
|
+
|
|
241
|
+
print(f'Successfully parsed {len(plans)} plans')
|
|
242
|
+
return plans
|
|
243
|
+
|
|
244
|
+
except Exception as e: # pylint: disable=broad-except
|
|
245
|
+
raise Exception(f'Error fetching data from Seeweb API: {e}') from e
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def create_catalog(api_key: str, output_path: str) -> None:
|
|
249
|
+
"""Create Seeweb catalog by fetching data from API."""
|
|
250
|
+
plans = fetch_seeweb_data(api_key)
|
|
251
|
+
|
|
252
|
+
# Create CSV catalog
|
|
253
|
+
print(f'Writing catalog to {output_path}')
|
|
254
|
+
with open(output_path, mode='w', encoding='utf-8') as f:
|
|
255
|
+
writer = csv.writer(f, delimiter=',', quotechar='"')
|
|
256
|
+
writer.writerow([
|
|
257
|
+
'InstanceType', 'AcceleratorName', 'AcceleratorCount', 'vCPUs',
|
|
258
|
+
'MemoryGiB', 'Price', 'Region', 'GpuInfo', 'SpotPrice'
|
|
259
|
+
])
|
|
260
|
+
|
|
261
|
+
for plan in plans:
|
|
262
|
+
try:
|
|
263
|
+
gpu_info_str = ''
|
|
264
|
+
if plan['gpu_name'] and plan['gpu_count'] > 0:
|
|
265
|
+
gpu_info_str = get_gpu_info(plan['gpu_count'],
|
|
266
|
+
plan['gpu_name'],
|
|
267
|
+
plan.get('gpu_vram_mb', 0))
|
|
268
|
+
|
|
269
|
+
# Handle regions - create a row for each available region
|
|
270
|
+
regions_available = plan['regions_available']
|
|
271
|
+
if isinstance(regions_available,
|
|
272
|
+
list) and len(regions_available) > 0:
|
|
273
|
+
# Create a row for each region
|
|
274
|
+
for region in regions_available:
|
|
275
|
+
writer.writerow([
|
|
276
|
+
plan['plan_name'], # InstanceType
|
|
277
|
+
plan['gpu_name'], # AcceleratorName (cleaned)
|
|
278
|
+
plan['gpu_count'] if plan['gpu_count'] > 0 else
|
|
279
|
+
'', # AcceleratorCount
|
|
280
|
+
plan['vcpus'], # vCPUs
|
|
281
|
+
plan['memory_gb'], # MemoryGiB
|
|
282
|
+
plan['price'], # Price
|
|
283
|
+
region, # Region (single region per row)
|
|
284
|
+
gpu_info_str, # GpuInfo
|
|
285
|
+
'' # SpotPrice (Seeweb doesn't support spot)
|
|
286
|
+
])
|
|
287
|
+
else:
|
|
288
|
+
# No regions available, create a row with empty region
|
|
289
|
+
writer.writerow([
|
|
290
|
+
plan['plan_name'], # InstanceType
|
|
291
|
+
plan['gpu_name'], # AcceleratorName (cleaned)
|
|
292
|
+
plan['gpu_count']
|
|
293
|
+
if plan['gpu_count'] > 0 else '', # AcceleratorCount
|
|
294
|
+
plan['vcpus'], # vCPUs
|
|
295
|
+
plan['memory_gb'], # MemoryGiB
|
|
296
|
+
plan['price'], # Price
|
|
297
|
+
'', # Region (empty)
|
|
298
|
+
gpu_info_str, # GpuInfo
|
|
299
|
+
'' # SpotPrice (Seeweb doesn't support spot)
|
|
300
|
+
])
|
|
301
|
+
except Exception as e: # pylint: disable=broad-except
|
|
302
|
+
print(f'Error processing plan {plan["plan_name"]}: {e}')
|
|
303
|
+
continue
|
|
304
|
+
|
|
305
|
+
print(f'Seeweb catalog saved to {output_path}')
|
|
306
|
+
print(f'Created {len(plans)} instance types')
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def main() -> None:
|
|
310
|
+
"""Main function to fetch and write Seeweb platform prices to a CSV file."""
|
|
311
|
+
parser = argparse.ArgumentParser()
|
|
312
|
+
parser.add_argument('--api-key', help='Seeweb API key')
|
|
313
|
+
parser.add_argument('--api-key-path',
|
|
314
|
+
help='Path to file containing Seeweb API key')
|
|
315
|
+
args = parser.parse_args()
|
|
316
|
+
|
|
317
|
+
# Get API key
|
|
318
|
+
if args.api_key:
|
|
319
|
+
api_key = args.api_key
|
|
320
|
+
else:
|
|
321
|
+
api_key = get_api_key(args.api_key_path)
|
|
322
|
+
|
|
323
|
+
os.makedirs('seeweb', exist_ok=True)
|
|
324
|
+
create_catalog(api_key, 'seeweb/vms.csv')
|
|
325
|
+
print('Seeweb Service Catalog saved to seeweb/vms.csv')
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
if __name__ == '__main__':
|
|
329
|
+
main()
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
"""Seeweb service catalog.
|
|
2
|
+
|
|
3
|
+
This module loads the service catalog file and can be used to
|
|
4
|
+
query instance types and pricing information for Seeweb.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import typing
|
|
8
|
+
from typing import Dict, List, Optional, Tuple
|
|
9
|
+
|
|
10
|
+
import pandas as pd
|
|
11
|
+
|
|
12
|
+
from sky.catalog import common
|
|
13
|
+
from sky.utils import resources_utils
|
|
14
|
+
from sky.utils import ux_utils
|
|
15
|
+
|
|
16
|
+
if typing.TYPE_CHECKING:
|
|
17
|
+
from sky.clouds import cloud
|
|
18
|
+
|
|
19
|
+
_PULL_FREQUENCY_HOURS = 8
|
|
20
|
+
_df = common.read_catalog('seeweb/vms.csv',
|
|
21
|
+
pull_frequency_hours=_PULL_FREQUENCY_HOURS)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def instance_type_exists(instance_type: str) -> bool:
|
|
25
|
+
result = common.instance_type_exists_impl(_df, instance_type)
|
|
26
|
+
return result
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def validate_region_zone(
|
|
30
|
+
region: Optional[str],
|
|
31
|
+
zone: Optional[str]) -> Tuple[Optional[str], Optional[str]]:
|
|
32
|
+
if zone is not None:
|
|
33
|
+
with ux_utils.print_exception_no_traceback():
|
|
34
|
+
raise ValueError('Seeweb does not support zones.')
|
|
35
|
+
|
|
36
|
+
result = common.validate_region_zone_impl('Seeweb', _df, region, zone)
|
|
37
|
+
return result
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def get_hourly_cost(instance_type: str,
|
|
41
|
+
use_spot: bool = False,
|
|
42
|
+
region: Optional[str] = None,
|
|
43
|
+
zone: Optional[str] = None) -> float:
|
|
44
|
+
"""Returns the cost, or the cheapest cost among all zones for spot."""
|
|
45
|
+
if zone is not None:
|
|
46
|
+
with ux_utils.print_exception_no_traceback():
|
|
47
|
+
raise ValueError('Seeweb does not support zones.')
|
|
48
|
+
|
|
49
|
+
result = common.get_hourly_cost_impl(_df, instance_type, use_spot, region,
|
|
50
|
+
zone)
|
|
51
|
+
return result
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def get_vcpus_mem_from_instance_type(
|
|
55
|
+
instance_type: str) -> Tuple[Optional[float], Optional[float]]:
|
|
56
|
+
result = common.get_vcpus_mem_from_instance_type_impl(_df, instance_type)
|
|
57
|
+
return result
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def get_default_instance_type(cpus: Optional[str] = None,
|
|
61
|
+
memory: Optional[str] = None,
|
|
62
|
+
disk_tier: Optional[
|
|
63
|
+
resources_utils.DiskTier] = None,
|
|
64
|
+
region: Optional[str] = None,
|
|
65
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
66
|
+
del disk_tier # unused
|
|
67
|
+
result = common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory,
|
|
68
|
+
region, zone)
|
|
69
|
+
return result
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def get_accelerators_from_instance_type(
|
|
73
|
+
instance_type: str) -> Optional[Dict[str, int]]:
|
|
74
|
+
# Filter the dataframe for the specific instance type
|
|
75
|
+
df_filtered = _df[_df['InstanceType'] == instance_type]
|
|
76
|
+
if df_filtered.empty:
|
|
77
|
+
return None
|
|
78
|
+
|
|
79
|
+
# Get the first row (all rows for same instance
|
|
80
|
+
# type should have same accelerator info)
|
|
81
|
+
row = df_filtered.iloc[0]
|
|
82
|
+
acc_name = row['AcceleratorName']
|
|
83
|
+
acc_count = row['AcceleratorCount']
|
|
84
|
+
|
|
85
|
+
# Check if the instance has accelerators
|
|
86
|
+
if pd.isna(acc_name) or pd.isna(
|
|
87
|
+
acc_count) or acc_name == '' or acc_count == '':
|
|
88
|
+
return None
|
|
89
|
+
|
|
90
|
+
# Convert accelerator count to int/float
|
|
91
|
+
try:
|
|
92
|
+
if int(acc_count) == acc_count:
|
|
93
|
+
acc_count = int(acc_count)
|
|
94
|
+
else:
|
|
95
|
+
acc_count = float(acc_count)
|
|
96
|
+
except (ValueError, TypeError):
|
|
97
|
+
return None
|
|
98
|
+
|
|
99
|
+
result = {acc_name: acc_count}
|
|
100
|
+
return result
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def get_instance_type_for_accelerator(
|
|
104
|
+
acc_name: str,
|
|
105
|
+
acc_count: int,
|
|
106
|
+
cpus: Optional[str] = None,
|
|
107
|
+
memory: Optional[str] = None,
|
|
108
|
+
use_spot: bool = False,
|
|
109
|
+
region: Optional[str] = None,
|
|
110
|
+
zone: Optional[str] = None) -> Tuple[Optional[List[str]], List[str]]:
|
|
111
|
+
"""Returns a list of instance types satisfying
|
|
112
|
+
the required count of accelerators."""
|
|
113
|
+
if zone is not None:
|
|
114
|
+
with ux_utils.print_exception_no_traceback():
|
|
115
|
+
raise ValueError('Seeweb does not support zones.')
|
|
116
|
+
|
|
117
|
+
result = common.get_instance_type_for_accelerator_impl(df=_df,
|
|
118
|
+
acc_name=acc_name,
|
|
119
|
+
acc_count=acc_count,
|
|
120
|
+
cpus=cpus,
|
|
121
|
+
memory=memory,
|
|
122
|
+
use_spot=use_spot,
|
|
123
|
+
region=region,
|
|
124
|
+
zone=zone)
|
|
125
|
+
return result
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def regions() -> List['cloud.Region']:
|
|
129
|
+
result = common.get_region_zones(_df, use_spot=False)
|
|
130
|
+
return result
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def get_region_zones_for_instance_type(instance_type: str,
|
|
134
|
+
use_spot: bool = False
|
|
135
|
+
) -> List['cloud.Region']:
|
|
136
|
+
"""Returns a list of regions for a given instance type."""
|
|
137
|
+
# Filter the dataframe for the specific instance type
|
|
138
|
+
df_filtered = _df[_df['InstanceType'] == instance_type]
|
|
139
|
+
if df_filtered.empty:
|
|
140
|
+
return []
|
|
141
|
+
|
|
142
|
+
# Use common.get_region_zones() like all other providers
|
|
143
|
+
region_list = common.get_region_zones(df_filtered, use_spot)
|
|
144
|
+
|
|
145
|
+
# Default region: Frosinone (it-fr2)
|
|
146
|
+
# Other regions: Milano (it-mi2), Lugano (ch-lug1), Bulgaria (bg-sof1)
|
|
147
|
+
priority_regions = ['it-fr2']
|
|
148
|
+
prioritized_regions = []
|
|
149
|
+
other_regions = []
|
|
150
|
+
|
|
151
|
+
# First, add regions in priority order if they exist
|
|
152
|
+
for priority_region in priority_regions:
|
|
153
|
+
for region in region_list:
|
|
154
|
+
if region.name == priority_region:
|
|
155
|
+
prioritized_regions.append(region)
|
|
156
|
+
break
|
|
157
|
+
|
|
158
|
+
# Then, add any remaining regions that weren't in the priority list
|
|
159
|
+
for region in region_list:
|
|
160
|
+
if region.name not in priority_regions:
|
|
161
|
+
other_regions.append(region)
|
|
162
|
+
|
|
163
|
+
result = prioritized_regions + other_regions
|
|
164
|
+
return result
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def list_accelerators(
|
|
168
|
+
gpus_only: bool,
|
|
169
|
+
name_filter: Optional[str],
|
|
170
|
+
region_filter: Optional[str],
|
|
171
|
+
quantity_filter: Optional[int],
|
|
172
|
+
case_sensitive: bool = True,
|
|
173
|
+
all_regions: bool = False,
|
|
174
|
+
require_price: bool = True) -> Dict[str, List[common.InstanceTypeInfo]]:
|
|
175
|
+
"""Lists accelerators offered in Seeweb."""
|
|
176
|
+
# Filter out rows with empty or null regions (indicating unavailability)
|
|
177
|
+
df_filtered = _df.dropna(subset=['Region'])
|
|
178
|
+
df_filtered = df_filtered[df_filtered['Region'].str.strip() != '']
|
|
179
|
+
|
|
180
|
+
result = common.list_accelerators_impl('Seeweb', df_filtered, gpus_only,
|
|
181
|
+
name_filter, region_filter,
|
|
182
|
+
quantity_filter, case_sensitive,
|
|
183
|
+
all_regions, require_price)
|
|
184
|
+
return result
|
sky/clouds/__init__.py
CHANGED
|
@@ -28,6 +28,7 @@ from sky.clouds.oci import OCI
|
|
|
28
28
|
from sky.clouds.paperspace import Paperspace
|
|
29
29
|
from sky.clouds.runpod import RunPod
|
|
30
30
|
from sky.clouds.scp import SCP
|
|
31
|
+
from sky.clouds.seeweb import Seeweb
|
|
31
32
|
from sky.clouds.ssh import SSH
|
|
32
33
|
from sky.clouds.vast import Vast
|
|
33
34
|
from sky.clouds.vsphere import Vsphere
|
|
@@ -58,6 +59,7 @@ __all__ = [
|
|
|
58
59
|
'Fluidstack',
|
|
59
60
|
'Nebius',
|
|
60
61
|
'Hyperbolic',
|
|
62
|
+
'Seeweb',
|
|
61
63
|
# Utility functions
|
|
62
64
|
'cloud_in_iterable',
|
|
63
65
|
]
|
sky/clouds/kubernetes.py
CHANGED
|
@@ -841,6 +841,8 @@ class Kubernetes(clouds.Cloud):
|
|
|
841
841
|
from_instance_type(default_instance_type))
|
|
842
842
|
|
|
843
843
|
gpu_task_cpus = k8s_instance_type.cpus
|
|
844
|
+
if resources.cpus is None:
|
|
845
|
+
gpu_task_cpus = gpu_task_cpus * acc_count
|
|
844
846
|
# Special handling to bump up memory multiplier for GPU instances
|
|
845
847
|
gpu_task_memory = (float(resources.memory.strip('+')) if
|
|
846
848
|
resources.memory is not None else gpu_task_cpus *
|