skypilot-nightly 1.0.0.dev20250613__py3-none-any.whl → 1.0.0.dev20250614__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +4 -2
- sky/adaptors/hyperbolic.py +8 -0
- sky/authentication.py +20 -2
- sky/backends/backend_utils.py +3 -1
- sky/backends/cloud_vm_ray_backend.py +2 -1
- sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
- sky/catalog/hyperbolic_catalog.py +133 -0
- sky/clouds/__init__.py +2 -0
- sky/clouds/hyperbolic.py +276 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/chunks/37-7754056a4b503e1d.js +6 -0
- sky/dashboard/out/_next/static/chunks/600.bd2ed8c076b720ec.js +16 -0
- sky/dashboard/out/_next/static/chunks/{856-0776dc6ed6000c39.js → 856-c2c39c0912285e54.js} +1 -1
- sky/dashboard/out/_next/static/chunks/938-245c9ac4c9e8bf15.js +1 -0
- sky/dashboard/out/_next/static/chunks/{webpack-5c3e6471d04780c6.js → webpack-27de3d9d450d81c6.js} +1 -1
- sky/dashboard/out/_next/static/css/{5d71bfc09f184bab.css → 6f84444b8f3c656c.css} +1 -1
- sky/dashboard/out/_next/static/{UdgJCk2sZFLJgFJW_qiWG → nm5jrKpUZh2W0SxzyDKhz}/_buildManifest.js +1 -1
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/provision/__init__.py +1 -0
- sky/provision/hyperbolic/__init__.py +11 -0
- sky/provision/hyperbolic/config.py +10 -0
- sky/provision/hyperbolic/instance.py +423 -0
- sky/provision/hyperbolic/utils.py +373 -0
- sky/setup_files/dependencies.py +2 -1
- sky/skylet/constants.py +1 -1
- sky/templates/hyperbolic-ray.yml.j2 +67 -0
- sky/users/permission.py +2 -0
- {skypilot_nightly-1.0.0.dev20250613.dist-info → skypilot_nightly-1.0.0.dev20250614.dist-info}/METADATA +2 -1
- {skypilot_nightly-1.0.0.dev20250613.dist-info → skypilot_nightly-1.0.0.dev20250614.dist-info}/RECORD +50 -41
- sky/dashboard/out/_next/static/chunks/37-d8aebf1683522a0b.js +0 -6
- sky/dashboard/out/_next/static/chunks/600.15a0009177e86b86.js +0 -16
- sky/dashboard/out/_next/static/chunks/938-ab185187a63f9cdb.js +0 -1
- /sky/dashboard/out/_next/static/chunks/{843-6fcc4bf91ac45b39.js → 843-5011affc9540757f.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/pages/{_app-7bbd9d39d6f9a98a.js → _app-664031f6ae737f80.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/pages/clusters/{[cluster]-451a14e7e755ebbc.js → [cluster]-20210f8cd809063d.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/pages/{jobs-fe233baf3d073491.js → jobs-ae7a5e9fa5a5b5f0.js} +0 -0
- /sky/dashboard/out/_next/static/{UdgJCk2sZFLJgFJW_qiWG → nm5jrKpUZh2W0SxzyDKhz}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250613.dist-info → skypilot_nightly-1.0.0.dev20250614.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250613.dist-info → skypilot_nightly-1.0.0.dev20250614.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250613.dist-info → skypilot_nightly-1.0.0.dev20250614.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250613.dist-info → skypilot_nightly-1.0.0.dev20250614.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,373 @@
|
|
1
|
+
"""Hyperbolic API utilities."""
|
2
|
+
import enum
|
3
|
+
import json
|
4
|
+
import os
|
5
|
+
import time
|
6
|
+
from typing import Any, Dict, Optional, Tuple
|
7
|
+
|
8
|
+
import requests
|
9
|
+
|
10
|
+
from sky import authentication
|
11
|
+
from sky import sky_logging
|
12
|
+
from sky.utils import status_lib
|
13
|
+
|
14
|
+
#TODO update to prod endpoint
|
15
|
+
BASE_URL = 'https://api.hyperbolic.xyz'
|
16
|
+
API_KEY_PATH = '~/.hyperbolic/api_key'
|
17
|
+
|
18
|
+
MAX_RETRIES = 3
|
19
|
+
RETRY_DELAY = 2 # seconds
|
20
|
+
TIMEOUT = 120
|
21
|
+
|
22
|
+
logger = sky_logging.init_logger(__name__)
|
23
|
+
|
24
|
+
|
25
|
+
class HyperbolicError(Exception):
|
26
|
+
"""Base exception for Hyperbolic API errors."""
|
27
|
+
pass
|
28
|
+
|
29
|
+
|
30
|
+
class HyperbolicInstanceStatus(enum.Enum):
|
31
|
+
"""Statuses enum for Hyperbolic instances."""
|
32
|
+
UNKNOWN = 'unknown'
|
33
|
+
ONLINE = 'online'
|
34
|
+
OFFLINE = 'offline'
|
35
|
+
STARTING = 'starting'
|
36
|
+
STOPPING = 'stopping'
|
37
|
+
BUSY = 'busy'
|
38
|
+
RESTARTING = 'restarting'
|
39
|
+
CREATING = 'creating'
|
40
|
+
FAILED = 'failed'
|
41
|
+
ERROR = 'error'
|
42
|
+
TERMINATED = 'terminated'
|
43
|
+
|
44
|
+
@classmethod
|
45
|
+
def cluster_status_map(
|
46
|
+
cls
|
47
|
+
) -> Dict['HyperbolicInstanceStatus', Optional[status_lib.ClusterStatus]]:
|
48
|
+
return {
|
49
|
+
cls.CREATING: status_lib.ClusterStatus.INIT,
|
50
|
+
cls.STARTING: status_lib.ClusterStatus.INIT,
|
51
|
+
cls.ONLINE: status_lib.ClusterStatus.UP,
|
52
|
+
cls.FAILED: status_lib.ClusterStatus.INIT,
|
53
|
+
cls.ERROR: status_lib.ClusterStatus.INIT,
|
54
|
+
cls.RESTARTING: status_lib.ClusterStatus.INIT,
|
55
|
+
cls.STOPPING: status_lib.ClusterStatus.INIT,
|
56
|
+
cls.UNKNOWN: status_lib.ClusterStatus.INIT,
|
57
|
+
cls.BUSY: status_lib.ClusterStatus.INIT,
|
58
|
+
cls.OFFLINE: status_lib.ClusterStatus.INIT,
|
59
|
+
cls.TERMINATED: None,
|
60
|
+
}
|
61
|
+
|
62
|
+
@classmethod
|
63
|
+
def from_raw_status(cls, status: str) -> 'HyperbolicInstanceStatus':
|
64
|
+
"""Convert raw status string to HyperbolicInstanceStatus enum."""
|
65
|
+
try:
|
66
|
+
return cls(status.lower())
|
67
|
+
except ValueError as exc:
|
68
|
+
raise HyperbolicError(f'Unknown instance status: {status}') from exc
|
69
|
+
|
70
|
+
def to_cluster_status(self) -> Optional[status_lib.ClusterStatus]:
|
71
|
+
"""Convert to SkyPilot cluster status."""
|
72
|
+
return self.cluster_status_map().get(self)
|
73
|
+
|
74
|
+
|
75
|
+
class HyperbolicClient:
|
76
|
+
"""Client for interacting with the Hyperbolic API."""
|
77
|
+
|
78
|
+
def __init__(self):
|
79
|
+
"""Initialize the Hyperbolic client with API credentials."""
|
80
|
+
cred_path = os.path.expanduser(API_KEY_PATH)
|
81
|
+
if not os.path.exists(cred_path):
|
82
|
+
raise RuntimeError(f'API key not found at {cred_path}')
|
83
|
+
with open(cred_path, 'r', encoding='utf-8') as f:
|
84
|
+
self.api_key = f.read().strip()
|
85
|
+
self.headers = {'Authorization': f'Bearer {self.api_key}'}
|
86
|
+
self.api_url = BASE_URL
|
87
|
+
|
88
|
+
def _make_request(
|
89
|
+
self,
|
90
|
+
method: str,
|
91
|
+
endpoint: str,
|
92
|
+
payload: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
93
|
+
"""Make an API request to Hyperbolic."""
|
94
|
+
url = f'{BASE_URL}{endpoint}'
|
95
|
+
headers = {
|
96
|
+
'Authorization': f'Bearer {self.api_key}',
|
97
|
+
'Content-Type': 'application/json'
|
98
|
+
}
|
99
|
+
|
100
|
+
# Debug logging for request
|
101
|
+
logger.debug(f'Making {method} request to {url}')
|
102
|
+
if payload:
|
103
|
+
logger.debug(f'Request payload: {json.dumps(payload, indent=2)}')
|
104
|
+
|
105
|
+
try:
|
106
|
+
if method == 'GET':
|
107
|
+
response = requests.get(url, headers=headers, timeout=120)
|
108
|
+
elif method == 'POST':
|
109
|
+
response = requests.post(url,
|
110
|
+
headers=headers,
|
111
|
+
json=payload,
|
112
|
+
timeout=120)
|
113
|
+
else:
|
114
|
+
raise HyperbolicError(f'Unsupported HTTP method: {method}')
|
115
|
+
|
116
|
+
# Debug logging for response
|
117
|
+
logger.debug(f'Response status code: {response.status_code}')
|
118
|
+
logger.debug(f'Response headers: {dict(response.headers)}')
|
119
|
+
|
120
|
+
# Try to parse response as JSON
|
121
|
+
try:
|
122
|
+
response_data = response.json()
|
123
|
+
logger.debug(
|
124
|
+
f'Response body: {json.dumps(response_data, indent=2)}')
|
125
|
+
except json.JSONDecodeError as exc:
|
126
|
+
# If response is not JSON, use the raw text
|
127
|
+
response_text = response.text
|
128
|
+
logger.debug(f'Response body (raw): {response_text}')
|
129
|
+
if not response.ok:
|
130
|
+
raise HyperbolicError(f'API request failed with status '
|
131
|
+
f'{response.status_code}: '
|
132
|
+
f'{response_text}') from exc
|
133
|
+
# If response is OK but not JSON, return empty dict
|
134
|
+
return {}
|
135
|
+
|
136
|
+
if not response.ok:
|
137
|
+
error_msg = response_data.get(
|
138
|
+
'error', response_data.get('message', response.text))
|
139
|
+
raise HyperbolicError(
|
140
|
+
f'API request failed with status {response.status_code}: '
|
141
|
+
f'{error_msg}')
|
142
|
+
|
143
|
+
return response_data
|
144
|
+
except requests.exceptions.RequestException as e:
|
145
|
+
raise HyperbolicError(f'Request failed: {str(e)}') from e
|
146
|
+
except Exception as e:
|
147
|
+
raise HyperbolicError(
|
148
|
+
f'Unexpected error during API request: {str(e)}') from e
|
149
|
+
|
150
|
+
def launch_instance(self, gpu_model: str, gpu_count: int,
|
151
|
+
name: str) -> Tuple[str, str]:
|
152
|
+
"""Launch a new instance with the specified configuration."""
|
153
|
+
# Initialize config with basic instance info
|
154
|
+
config = {
|
155
|
+
'gpuModel': gpu_model,
|
156
|
+
'gpuCount': str(gpu_count),
|
157
|
+
'userMetadata': {
|
158
|
+
'skypilot': {
|
159
|
+
'cluster_name': name,
|
160
|
+
'launch_time': str(int(time.time()))
|
161
|
+
}
|
162
|
+
}
|
163
|
+
}
|
164
|
+
|
165
|
+
config = authentication.setup_hyperbolic_authentication(config)
|
166
|
+
|
167
|
+
endpoint = '/v2/marketplace/instances/create-cheapest'
|
168
|
+
try:
|
169
|
+
response = self._make_request('POST', endpoint, payload=config)
|
170
|
+
logger.debug(f'Launch response: {json.dumps(response, indent=2)}')
|
171
|
+
|
172
|
+
instance_id = response.get('instanceName')
|
173
|
+
if not instance_id:
|
174
|
+
logger.error(f'No instance ID in response: {response}')
|
175
|
+
raise HyperbolicError('No instance ID returned from API')
|
176
|
+
|
177
|
+
logger.info(f'Successfully launched instance {instance_id}, '
|
178
|
+
f'waiting for it to be ready...')
|
179
|
+
|
180
|
+
# Wait for instance to be ready
|
181
|
+
if not self.wait_for_instance(
|
182
|
+
instance_id, HyperbolicInstanceStatus.ONLINE.value):
|
183
|
+
raise HyperbolicError(
|
184
|
+
f'Instance {instance_id} failed to reach ONLINE state')
|
185
|
+
|
186
|
+
# Get instance details to get SSH command
|
187
|
+
instances = self.list_instances(
|
188
|
+
metadata={'skypilot': {
|
189
|
+
'cluster_name': name
|
190
|
+
}})
|
191
|
+
instance = instances.get(instance_id)
|
192
|
+
if not instance:
|
193
|
+
raise HyperbolicError(
|
194
|
+
f'Instance {instance_id} not found after launch')
|
195
|
+
|
196
|
+
ssh_command = instance.get('sshCommand')
|
197
|
+
if not ssh_command:
|
198
|
+
logger.error(
|
199
|
+
f'No SSH command available for instance {instance_id}')
|
200
|
+
raise HyperbolicError('No SSH command available for instance')
|
201
|
+
|
202
|
+
logger.info(f'Instance {instance_id} is ready with SSH command')
|
203
|
+
return instance_id, ssh_command
|
204
|
+
|
205
|
+
except Exception as e:
|
206
|
+
logger.error(f'Failed to launch instance: {str(e)}')
|
207
|
+
raise HyperbolicError(f'Failed to launch instance: {str(e)}') from e
|
208
|
+
|
209
|
+
def list_instances(
|
210
|
+
self,
|
211
|
+
status: Optional[str] = None,
|
212
|
+
metadata: Optional[Dict[str, Dict[str, str]]] = None
|
213
|
+
) -> Dict[str, Dict[str, Any]]:
|
214
|
+
"""List all instances, optionally filtered by status and metadata."""
|
215
|
+
endpoint = '/v1/marketplace/instances'
|
216
|
+
try:
|
217
|
+
response = self._make_request('GET', endpoint)
|
218
|
+
logger.debug(f'Raw API response: {json.dumps(response, indent=2)}')
|
219
|
+
instances = {}
|
220
|
+
for instance in response.get('instances', []):
|
221
|
+
instance_info = instance.get('instance', {})
|
222
|
+
current_status = instance_info.get('status')
|
223
|
+
logger.debug(
|
224
|
+
f'Instance {instance.get("id")} status: {current_status}')
|
225
|
+
|
226
|
+
# Convert raw status to enum
|
227
|
+
try:
|
228
|
+
instance_status = HyperbolicInstanceStatus.from_raw_status(
|
229
|
+
current_status)
|
230
|
+
except HyperbolicError as e:
|
231
|
+
logger.warning(f'Failed to parse status for instance '
|
232
|
+
f'{instance.get("id")}: {e}')
|
233
|
+
continue
|
234
|
+
|
235
|
+
if status and instance_status.value != status.lower():
|
236
|
+
continue
|
237
|
+
|
238
|
+
if metadata:
|
239
|
+
skypilot_metadata: Dict[str,
|
240
|
+
str] = metadata.get('skypilot', {})
|
241
|
+
cluster_name = skypilot_metadata.get('cluster_name', '')
|
242
|
+
instance_skypilot = instance.get('userMetadata',
|
243
|
+
{}).get('skypilot', {})
|
244
|
+
if not instance_skypilot.get('cluster_name',
|
245
|
+
'').startswith(cluster_name):
|
246
|
+
logger.debug(
|
247
|
+
f'Skipping instance {instance.get("id")} - '
|
248
|
+
f'skypilot metadata {instance_skypilot} '
|
249
|
+
f'does not match {skypilot_metadata}')
|
250
|
+
continue
|
251
|
+
logger.debug(f'Including instance {instance.get("id")} '
|
252
|
+
f'- skypilot metadata matches')
|
253
|
+
|
254
|
+
hardware = instance_info.get('hardware', {})
|
255
|
+
instances[instance.get('id')] = {
|
256
|
+
'id': instance.get('id'),
|
257
|
+
'created': instance.get('created'),
|
258
|
+
'sshCommand': instance.get('sshCommand'),
|
259
|
+
'status': instance_status.value,
|
260
|
+
'gpu_count': instance_info.get('gpu_count'),
|
261
|
+
'gpus_total': instance_info.get('gpus_total'),
|
262
|
+
'owner': instance_info.get('owner'),
|
263
|
+
'cpus': hardware.get('cpus'),
|
264
|
+
'gpus': hardware.get('gpus'),
|
265
|
+
'ram': hardware.get('ram'),
|
266
|
+
'storage': hardware.get('storage'),
|
267
|
+
'pricing': instance_info.get('pricing'),
|
268
|
+
'metadata': instance.get('userMetadata', {})
|
269
|
+
}
|
270
|
+
return instances
|
271
|
+
except Exception as e:
|
272
|
+
raise HyperbolicError(f'Failed to list instances: {str(e)}') from e
|
273
|
+
|
274
|
+
def terminate_instance(self, instance_id: str) -> None:
|
275
|
+
"""Terminate an instance by ID."""
|
276
|
+
endpoint = '/v1/marketplace/instances/terminate'
|
277
|
+
data = {'id': instance_id}
|
278
|
+
try:
|
279
|
+
self._make_request('POST', endpoint, payload=data)
|
280
|
+
except Exception as e:
|
281
|
+
raise HyperbolicError(
|
282
|
+
f'Failed to terminate instance {instance_id}: {str(e)}') from e
|
283
|
+
|
284
|
+
def wait_for_instance(self,
|
285
|
+
instance_id: str,
|
286
|
+
target_status: str,
|
287
|
+
timeout: int = TIMEOUT) -> bool:
|
288
|
+
"""Wait for an instance to reach a specific status."""
|
289
|
+
start_time = time.time()
|
290
|
+
target_status_enum = HyperbolicInstanceStatus.from_raw_status(
|
291
|
+
target_status)
|
292
|
+
logger.info(
|
293
|
+
f'Waiting for instance {instance_id} '
|
294
|
+
f'to reach status {target_status_enum.value} and have SSH command')
|
295
|
+
|
296
|
+
while True:
|
297
|
+
elapsed = time.time() - start_time
|
298
|
+
if elapsed >= timeout:
|
299
|
+
logger.error(f'Timeout after {int(elapsed)}s '
|
300
|
+
f'waiting for instance {instance_id}')
|
301
|
+
return False
|
302
|
+
|
303
|
+
try:
|
304
|
+
instances = self.list_instances()
|
305
|
+
instance = instances.get(instance_id)
|
306
|
+
|
307
|
+
if not instance:
|
308
|
+
logger.warning(f'Instance {instance_id} not found')
|
309
|
+
time.sleep(5)
|
310
|
+
continue
|
311
|
+
|
312
|
+
current_status = instance.get('status', '').lower()
|
313
|
+
ssh_command = instance.get('sshCommand')
|
314
|
+
logger.debug(f'Current status: {current_status}, '
|
315
|
+
f'Target status: {target_status_enum.value}, '
|
316
|
+
f'SSH command: {ssh_command}')
|
317
|
+
|
318
|
+
if current_status == target_status_enum.value and ssh_command:
|
319
|
+
logger.info(f'Instance {instance_id} reached '
|
320
|
+
f'target status {target_status_enum.value} '
|
321
|
+
f'and has SSH command after {int(elapsed)}s')
|
322
|
+
return True
|
323
|
+
|
324
|
+
if current_status in ['failed', 'error', 'terminated']:
|
325
|
+
logger.error(f'Instance {instance_id} reached '
|
326
|
+
f'terminal status: {current_status} '
|
327
|
+
f'after {int(elapsed)}s')
|
328
|
+
return False
|
329
|
+
|
330
|
+
time.sleep(5)
|
331
|
+
except Exception as e: # pylint: disable=broad-except
|
332
|
+
logger.warning(
|
333
|
+
f'Error while waiting for instance {instance_id}: {str(e)}')
|
334
|
+
time.sleep(5)
|
335
|
+
|
336
|
+
|
337
|
+
# Module-level singleton client
|
338
|
+
_client = None
|
339
|
+
|
340
|
+
|
341
|
+
def get_client() -> HyperbolicClient:
|
342
|
+
"""Get or create the Hyperbolic client singleton."""
|
343
|
+
global _client
|
344
|
+
if _client is None:
|
345
|
+
_client = HyperbolicClient()
|
346
|
+
return _client
|
347
|
+
|
348
|
+
|
349
|
+
# Backward-compatible wrapper functions
|
350
|
+
def launch_instance(gpu_model: str, gpu_count: int,
|
351
|
+
name: str) -> Tuple[str, str]:
|
352
|
+
"""Launch a new instance with the specified configuration."""
|
353
|
+
return get_client().launch_instance(gpu_model, gpu_count, name)
|
354
|
+
|
355
|
+
|
356
|
+
def list_instances(
|
357
|
+
status: Optional[str] = None,
|
358
|
+
metadata: Optional[Dict[str, Dict[str, str]]] = None
|
359
|
+
) -> Dict[str, Dict[str, Any]]:
|
360
|
+
"""List all instances, optionally filtered by status and metadata."""
|
361
|
+
return get_client().list_instances(status=status, metadata=metadata)
|
362
|
+
|
363
|
+
|
364
|
+
def terminate_instance(instance_id: str) -> None:
|
365
|
+
"""Terminate an instance by ID."""
|
366
|
+
return get_client().terminate_instance(instance_id)
|
367
|
+
|
368
|
+
|
369
|
+
def wait_for_instance(instance_id: str,
|
370
|
+
target_status: str,
|
371
|
+
timeout: int = TIMEOUT) -> bool:
|
372
|
+
"""Wait for an instance to reach a specific status."""
|
373
|
+
return get_client().wait_for_instance(instance_id, target_status, timeout)
|
sky/setup_files/dependencies.py
CHANGED
@@ -161,7 +161,8 @@ extras_require: Dict[str, List[str]] = {
|
|
161
161
|
],
|
162
162
|
'nebius': [
|
163
163
|
'nebius>=0.2.0',
|
164
|
-
] + aws_dependencies
|
164
|
+
] + aws_dependencies,
|
165
|
+
'hyperbolic': [] # No dependencies needed for hyperbolic
|
165
166
|
}
|
166
167
|
|
167
168
|
# Nebius needs python3.10. If python 3.9 [all] will not install nebius
|
sky/skylet/constants.py
CHANGED
@@ -416,7 +416,7 @@ CATALOG_SCHEMA_VERSION = 'v7'
|
|
416
416
|
CATALOG_DIR = '~/.sky/catalogs'
|
417
417
|
ALL_CLOUDS = ('aws', 'azure', 'gcp', 'ibm', 'lambda', 'scp', 'oci',
|
418
418
|
'kubernetes', 'runpod', 'vast', 'vsphere', 'cudo', 'fluidstack',
|
419
|
-
'paperspace', 'do', 'nebius', 'ssh')
|
419
|
+
'paperspace', 'do', 'nebius', 'ssh', 'hyperbolic')
|
420
420
|
# END constants used for service catalog.
|
421
421
|
|
422
422
|
# The user ID of the SkyPilot system.
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# Ray cluster config template for Hyperbolic Cloud
|
2
|
+
|
3
|
+
cluster_name: {{cluster_name_on_cloud}}
|
4
|
+
|
5
|
+
# Hyperbolic only supports a single node (the head node).
|
6
|
+
max_workers: 0
|
7
|
+
upscaling_speed: 0
|
8
|
+
idle_timeout_minutes: 60
|
9
|
+
|
10
|
+
provider:
|
11
|
+
type: external
|
12
|
+
module: sky.provision.hyperbolic
|
13
|
+
region: "default"
|
14
|
+
|
15
|
+
auth:
|
16
|
+
ssh_user: ubuntu
|
17
|
+
ssh_private_key: {{ssh_private_key}}
|
18
|
+
|
19
|
+
available_node_types:
|
20
|
+
ray_head_default:
|
21
|
+
resources: {}
|
22
|
+
node_config:
|
23
|
+
InstanceType: {{instance_type}}
|
24
|
+
|
25
|
+
head_node_type: ray_head_default
|
26
|
+
|
27
|
+
# Format: `REMOTE_PATH : LOCAL_PATH`
|
28
|
+
file_mounts: {
|
29
|
+
"{{sky_ray_yaml_remote_path}}": "{{sky_ray_yaml_local_path}}",
|
30
|
+
"{{sky_remote_path}}/{{sky_wheel_hash}}": "{{sky_local_path}}",
|
31
|
+
{%- for remote_path, local_path in credentials.items() %}
|
32
|
+
"{{remote_path}}": "{{local_path}}",
|
33
|
+
{%- endfor %}
|
34
|
+
}
|
35
|
+
|
36
|
+
rsync_exclude: []
|
37
|
+
|
38
|
+
initialization_commands: []
|
39
|
+
|
40
|
+
# List of shell commands to run to set up nodes.
|
41
|
+
# NOTE: these are very performance-sensitive. Each new item opens/closes an SSH
|
42
|
+
# connection, which is expensive. Try your best to co-locate commands into fewer
|
43
|
+
# items!
|
44
|
+
#
|
45
|
+
# Increment the following for catching performance bugs easier:
|
46
|
+
# current num items (num SSH connections): 1
|
47
|
+
setup_commands:
|
48
|
+
# Disable unattended-upgrades and handle apt-get locks
|
49
|
+
# Install patch utility for Ray
|
50
|
+
# Install conda and Ray
|
51
|
+
# Set system limits for Ray performance (nofile and TasksMax)
|
52
|
+
- {%- for initial_setup_command in initial_setup_commands %}
|
53
|
+
{{ initial_setup_command }}
|
54
|
+
{%- endfor %}
|
55
|
+
sudo systemctl stop unattended-upgrades || true;
|
56
|
+
sudo systemctl disable unattended-upgrades || true;
|
57
|
+
sudo sed -i 's/Unattended-Upgrade "1"/Unattended-Upgrade "0"/g' /etc/apt/apt.conf.d/20auto-upgrades || true;
|
58
|
+
sudo kill -9 `sudo lsof /var/lib/dpkg/lock-frontend | awk '{print $2}' | tail -n 1` || true;
|
59
|
+
sudo pkill -9 apt-get;
|
60
|
+
sudo pkill -9 dpkg;
|
61
|
+
sudo dpkg --configure -a;
|
62
|
+
which patch > /dev/null || sudo apt install -y patch;
|
63
|
+
{{ conda_installation_commands }}
|
64
|
+
{{ ray_skypilot_installation_commands }}
|
65
|
+
sudo bash -c 'rm -rf /etc/security/limits.d; echo "* soft nofile 1048576" >> /etc/security/limits.conf; echo "* hard nofile 1048576" >> /etc/security/limits.conf';
|
66
|
+
sudo grep -e '^DefaultTasksMax' /etc/systemd/system.conf || (sudo bash -c 'echo "DefaultTasksMax=infinity" >> /etc/systemd/system.conf'); sudo systemctl set-property user-$(id -u $(whoami)).slice TasksMax=infinity; sudo systemctl daemon-reload;
|
67
|
+
{{ ssh_max_sessions_config }}
|
sky/users/permission.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: skypilot-nightly
|
3
|
-
Version: 1.0.0.
|
3
|
+
Version: 1.0.0.dev20250614
|
4
4
|
Summary: SkyPilot: Run AI on Any Infra — Unified, Faster, Cheaper.
|
5
5
|
Author: SkyPilot Team
|
6
6
|
License: Apache 2.0
|
@@ -119,6 +119,7 @@ Requires-Dist: awscli>=1.27.10; extra == "nebius"
|
|
119
119
|
Requires-Dist: botocore>=1.29.10; extra == "nebius"
|
120
120
|
Requires-Dist: boto3>=1.26.1; extra == "nebius"
|
121
121
|
Requires-Dist: colorama<0.4.5; extra == "nebius"
|
122
|
+
Provides-Extra: hyperbolic
|
122
123
|
Provides-Extra: all
|
123
124
|
Requires-Dist: awscli>=1.27.10; extra == "all"
|
124
125
|
Requires-Dist: botocore>=1.29.10; extra == "all"
|