skypilot-nightly 1.0.0.dev20251011__py3-none-any.whl → 1.0.0.dev20251013__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +4 -2
- sky/adaptors/shadeform.py +89 -0
- sky/authentication.py +43 -0
- sky/backends/backend_utils.py +2 -0
- sky/backends/cloud_vm_ray_backend.py +4 -2
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/client/cli/command.py +44 -3
- sky/client/sdk.py +11 -3
- sky/clouds/__init__.py +2 -0
- sky/clouds/shadeform.py +393 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/chunks/{webpack-66f23594d38c7f16.js → webpack-ac3a34c8f9fef041.js} +1 -1
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/provision/__init__.py +1 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/serve/constants.py +0 -3
- sky/serve/service_spec.py +1 -8
- sky/server/constants.py +4 -0
- sky/server/requests/executor.py +22 -2
- sky/server/requests/payloads.py +2 -0
- sky/server/requests/requests.py +119 -2
- sky/server/server.py +17 -6
- sky/setup_files/dependencies.py +1 -0
- sky/skylet/constants.py +1 -1
- sky/templates/shadeform-ray.yml.j2 +72 -0
- sky/utils/context_utils.py +13 -9
- {skypilot_nightly-1.0.0.dev20251011.dist-info → skypilot_nightly-1.0.0.dev20251013.dist-info}/METADATA +43 -41
- {skypilot_nightly-1.0.0.dev20251011.dist-info → skypilot_nightly-1.0.0.dev20251013.dist-info}/RECORD +52 -43
- /sky/dashboard/out/_next/static/{Xs6jdcfyNaUuBO8jmzU9_ → MtlDUf-nH1hhcy7xwbCj3}/_buildManifest.js +0 -0
- /sky/dashboard/out/_next/static/{Xs6jdcfyNaUuBO8jmzU9_ → MtlDUf-nH1hhcy7xwbCj3}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20251011.dist-info → skypilot_nightly-1.0.0.dev20251013.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20251011.dist-info → skypilot_nightly-1.0.0.dev20251013.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20251011.dist-info → skypilot_nightly-1.0.0.dev20251013.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20251011.dist-info → skypilot_nightly-1.0.0.dev20251013.dist-info}/top_level.txt +0 -0
sky/server/requests/requests.py
CHANGED
|
@@ -292,6 +292,100 @@ class Request:
|
|
|
292
292
|
raise
|
|
293
293
|
|
|
294
294
|
|
|
295
|
+
def encode_requests(requests: List[Request]) -> List[payloads.RequestPayload]:
|
|
296
|
+
"""Serialize the SkyPilot API request for display purposes.
|
|
297
|
+
|
|
298
|
+
This function should be called on the server side to serialize the
|
|
299
|
+
request body into human readable format, e.g., the entrypoint should
|
|
300
|
+
be a string, and the pid, error, or return value are not needed.
|
|
301
|
+
|
|
302
|
+
The returned value will then be displayed on the client side in request
|
|
303
|
+
table.
|
|
304
|
+
|
|
305
|
+
We do not use `encode` for display to avoid a large amount of data being
|
|
306
|
+
sent to the client side, especially for the request table could include
|
|
307
|
+
all the requests.
|
|
308
|
+
"""
|
|
309
|
+
encoded_requests = []
|
|
310
|
+
all_users = global_user_state.get_all_users()
|
|
311
|
+
all_users_map = {user.id: user.name for user in all_users}
|
|
312
|
+
for request in requests:
|
|
313
|
+
if request.request_body is not None:
|
|
314
|
+
assert isinstance(request.request_body,
|
|
315
|
+
payloads.RequestBody), (request.name,
|
|
316
|
+
request.request_body)
|
|
317
|
+
user_name = all_users_map.get(request.user_id)
|
|
318
|
+
payload = payloads.RequestPayload(
|
|
319
|
+
request_id=request.request_id,
|
|
320
|
+
name=request.name,
|
|
321
|
+
entrypoint=request.entrypoint.__name__
|
|
322
|
+
if request.entrypoint is not None else '',
|
|
323
|
+
request_body=request.request_body.model_dump_json()
|
|
324
|
+
if request.request_body is not None else json.dumps(None),
|
|
325
|
+
status=request.status.value,
|
|
326
|
+
return_value=json.dumps(None),
|
|
327
|
+
error=json.dumps(None),
|
|
328
|
+
pid=None,
|
|
329
|
+
created_at=request.created_at,
|
|
330
|
+
schedule_type=request.schedule_type.value,
|
|
331
|
+
user_id=request.user_id,
|
|
332
|
+
user_name=user_name,
|
|
333
|
+
cluster_name=request.cluster_name,
|
|
334
|
+
status_msg=request.status_msg,
|
|
335
|
+
should_retry=request.should_retry,
|
|
336
|
+
finished_at=request.finished_at,
|
|
337
|
+
)
|
|
338
|
+
encoded_requests.append(payload)
|
|
339
|
+
return encoded_requests
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def _update_request_row_fields(
|
|
343
|
+
row: Tuple[Any, ...],
|
|
344
|
+
fields: Optional[List[str]] = None) -> Tuple[Any, ...]:
|
|
345
|
+
"""Update the request row fields."""
|
|
346
|
+
if not fields:
|
|
347
|
+
return row
|
|
348
|
+
|
|
349
|
+
# Convert tuple to dictionary for easier manipulation
|
|
350
|
+
content = dict(zip(fields, row))
|
|
351
|
+
|
|
352
|
+
# Required fields in RequestPayload
|
|
353
|
+
if 'request_id' not in fields:
|
|
354
|
+
content['request_id'] = ''
|
|
355
|
+
if 'name' not in fields:
|
|
356
|
+
content['name'] = ''
|
|
357
|
+
if 'entrypoint' not in fields:
|
|
358
|
+
content['entrypoint'] = server_constants.EMPTY_PICKLED_VALUE
|
|
359
|
+
if 'request_body' not in fields:
|
|
360
|
+
content['request_body'] = server_constants.EMPTY_PICKLED_VALUE
|
|
361
|
+
if 'status' not in fields:
|
|
362
|
+
content['status'] = RequestStatus.PENDING.value
|
|
363
|
+
if 'created_at' not in fields:
|
|
364
|
+
content['created_at'] = 0
|
|
365
|
+
if 'user_id' not in fields:
|
|
366
|
+
content['user_id'] = ''
|
|
367
|
+
if 'return_value' not in fields:
|
|
368
|
+
content['return_value'] = json.dumps(None)
|
|
369
|
+
if 'error' not in fields:
|
|
370
|
+
content['error'] = json.dumps(None)
|
|
371
|
+
if 'schedule_type' not in fields:
|
|
372
|
+
content['schedule_type'] = ScheduleType.SHORT.value
|
|
373
|
+
# Optional fields in RequestPayload
|
|
374
|
+
if 'pid' not in fields:
|
|
375
|
+
content['pid'] = None
|
|
376
|
+
if 'cluster_name' not in fields:
|
|
377
|
+
content['cluster_name'] = None
|
|
378
|
+
if 'status_msg' not in fields:
|
|
379
|
+
content['status_msg'] = None
|
|
380
|
+
if 'should_retry' not in fields:
|
|
381
|
+
content['should_retry'] = False
|
|
382
|
+
if 'finished_at' not in fields:
|
|
383
|
+
content['finished_at'] = None
|
|
384
|
+
|
|
385
|
+
# Convert back to tuple in the same order as REQUEST_COLUMNS
|
|
386
|
+
return tuple(content[col] for col in REQUEST_COLUMNS)
|
|
387
|
+
|
|
388
|
+
|
|
295
389
|
def kill_cluster_requests(cluster_name: str, exclude_request_name: str):
|
|
296
390
|
"""Kill all pending and running requests for a cluster.
|
|
297
391
|
|
|
@@ -634,6 +728,7 @@ class RequestTaskFilter:
|
|
|
634
728
|
Mutually exclusive with exclude_request_names.
|
|
635
729
|
finished_before: if provided, only include requests finished before this
|
|
636
730
|
timestamp.
|
|
731
|
+
limit: the number of requests to show. If None, show all requests.
|
|
637
732
|
|
|
638
733
|
Raises:
|
|
639
734
|
ValueError: If both exclude_request_names and include_request_names are
|
|
@@ -645,6 +740,8 @@ class RequestTaskFilter:
|
|
|
645
740
|
exclude_request_names: Optional[List[str]] = None
|
|
646
741
|
include_request_names: Optional[List[str]] = None
|
|
647
742
|
finished_before: Optional[float] = None
|
|
743
|
+
limit: Optional[int] = None
|
|
744
|
+
fields: Optional[List[str]] = None
|
|
648
745
|
|
|
649
746
|
def __post_init__(self):
|
|
650
747
|
if (self.exclude_request_names is not None and
|
|
@@ -687,8 +784,13 @@ class RequestTaskFilter:
|
|
|
687
784
|
if filter_str:
|
|
688
785
|
filter_str = f' WHERE {filter_str}'
|
|
689
786
|
columns_str = ', '.join(REQUEST_COLUMNS)
|
|
690
|
-
|
|
691
|
-
|
|
787
|
+
if self.fields:
|
|
788
|
+
columns_str = ', '.join(self.fields)
|
|
789
|
+
query_str = (f'SELECT {columns_str} FROM {REQUEST_TABLE}{filter_str} '
|
|
790
|
+
'ORDER BY created_at DESC')
|
|
791
|
+
if self.limit is not None:
|
|
792
|
+
query_str += f' LIMIT {self.limit}'
|
|
793
|
+
return query_str, filter_params
|
|
692
794
|
|
|
693
795
|
|
|
694
796
|
@init_db
|
|
@@ -722,6 +824,21 @@ async def get_request_tasks_async(
|
|
|
722
824
|
return [Request.from_row(row) for row in rows]
|
|
723
825
|
|
|
724
826
|
|
|
827
|
+
@init_db_async
|
|
828
|
+
@metrics_lib.time_me_async
|
|
829
|
+
async def get_request_tasks_with_fields_async(
|
|
830
|
+
req_filter: RequestTaskFilter,
|
|
831
|
+
fields: Optional[List[str]] = None,
|
|
832
|
+
) -> List[Request]:
|
|
833
|
+
"""Async version of get_request_tasks."""
|
|
834
|
+
assert _DB is not None
|
|
835
|
+
async with _DB.execute_fetchall_async(*req_filter.build_query()) as rows:
|
|
836
|
+
if not rows:
|
|
837
|
+
return []
|
|
838
|
+
rows = [_update_request_row_fields(row, fields) for row in rows]
|
|
839
|
+
return [Request.from_row(row) for row in rows]
|
|
840
|
+
|
|
841
|
+
|
|
725
842
|
@init_db_async
|
|
726
843
|
@metrics_lib.time_me_async
|
|
727
844
|
async def get_api_request_ids_start_with(incomplete: str) -> List[str]:
|
sky/server/server.py
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
import argparse
|
|
4
4
|
import asyncio
|
|
5
5
|
import base64
|
|
6
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
6
7
|
import contextlib
|
|
7
8
|
import datetime
|
|
8
9
|
import hashlib
|
|
@@ -1629,6 +1630,10 @@ async def api_status(
|
|
|
1629
1630
|
None, description='Request IDs to get status for.'),
|
|
1630
1631
|
all_status: bool = fastapi.Query(
|
|
1631
1632
|
False, description='Get finished requests as well.'),
|
|
1633
|
+
limit: Optional[int] = fastapi.Query(
|
|
1634
|
+
None, description='Number of requests to show.'),
|
|
1635
|
+
fields: Optional[List[str]] = fastapi.Query(
|
|
1636
|
+
None, description='Fields to get. If None, get all fields.'),
|
|
1632
1637
|
) -> List[payloads.RequestPayload]:
|
|
1633
1638
|
"""Gets the list of requests."""
|
|
1634
1639
|
if request_ids is None:
|
|
@@ -1638,9 +1643,15 @@ async def api_status(
|
|
|
1638
1643
|
requests_lib.RequestStatus.PENDING,
|
|
1639
1644
|
requests_lib.RequestStatus.RUNNING,
|
|
1640
1645
|
]
|
|
1641
|
-
request_tasks = await requests_lib.
|
|
1642
|
-
req_filter=requests_lib.RequestTaskFilter(
|
|
1643
|
-
|
|
1646
|
+
request_tasks = await requests_lib.get_request_tasks_with_fields_async(
|
|
1647
|
+
req_filter=requests_lib.RequestTaskFilter(
|
|
1648
|
+
status=statuses,
|
|
1649
|
+
limit=limit,
|
|
1650
|
+
fields=fields,
|
|
1651
|
+
),
|
|
1652
|
+
fields=fields,
|
|
1653
|
+
)
|
|
1654
|
+
return requests_lib.encode_requests(request_tasks)
|
|
1644
1655
|
else:
|
|
1645
1656
|
encoded_request_tasks = []
|
|
1646
1657
|
for request_id in request_ids:
|
|
@@ -1721,9 +1732,9 @@ async def kubernetes_pod_ssh_proxy(websocket: fastapi.WebSocket,
|
|
|
1721
1732
|
logger.info(f'WebSocket connection accepted for cluster: {cluster_name}')
|
|
1722
1733
|
|
|
1723
1734
|
# Run core.status in another thread to avoid blocking the event loop.
|
|
1724
|
-
|
|
1725
|
-
|
|
1726
|
-
|
|
1735
|
+
with ThreadPoolExecutor(max_workers=1) as thread_pool_executor:
|
|
1736
|
+
cluster_records = await context_utils.to_thread_with_executor(
|
|
1737
|
+
thread_pool_executor, core.status, cluster_name, all_users=True)
|
|
1727
1738
|
cluster_record = cluster_records[0]
|
|
1728
1739
|
if cluster_record['status'] != status_lib.ClusterStatus.UP:
|
|
1729
1740
|
raise fastapi.HTTPException(
|
sky/setup_files/dependencies.py
CHANGED
|
@@ -222,6 +222,7 @@ extras_require: Dict[str, List[str]] = {
|
|
|
222
222
|
'hyperbolic': [], # No dependencies needed for hyperbolic
|
|
223
223
|
'seeweb': ['ecsapi>=0.2.0'],
|
|
224
224
|
'server': server_dependencies,
|
|
225
|
+
'shadeform': [], # No dependencies needed for shadeform
|
|
225
226
|
}
|
|
226
227
|
|
|
227
228
|
# Calculate which clouds should be included in the [all] installation.
|
sky/skylet/constants.py
CHANGED
|
@@ -471,7 +471,7 @@ CATALOG_DIR = '~/.sky/catalogs'
|
|
|
471
471
|
ALL_CLOUDS = ('aws', 'azure', 'gcp', 'ibm', 'lambda', 'scp', 'oci',
|
|
472
472
|
'kubernetes', 'runpod', 'vast', 'vsphere', 'cudo', 'fluidstack',
|
|
473
473
|
'paperspace', 'primeintellect', 'do', 'nebius', 'ssh',
|
|
474
|
-
'hyperbolic', 'seeweb')
|
|
474
|
+
'hyperbolic', 'seeweb', 'shadeform')
|
|
475
475
|
# END constants used for service catalog.
|
|
476
476
|
|
|
477
477
|
# The user ID of the SkyPilot system.
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
cluster_name: {{cluster_name_on_cloud}}
|
|
2
|
+
|
|
3
|
+
# The maximum number of workers nodes to launch in addition to the head node.
|
|
4
|
+
max_workers: {{num_nodes - 1}}
|
|
5
|
+
upscaling_speed: {{num_nodes - 1}}
|
|
6
|
+
idle_timeout_minutes: 60
|
|
7
|
+
|
|
8
|
+
provider:
|
|
9
|
+
type: external
|
|
10
|
+
module: sky.provision.shadeform
|
|
11
|
+
region: "{{region}}"
|
|
12
|
+
disable_launch_config_check: true
|
|
13
|
+
|
|
14
|
+
auth:
|
|
15
|
+
ssh_user: shadeform
|
|
16
|
+
ssh_private_key: {{ssh_private_key}}
|
|
17
|
+
ssh_key_id: {{ssh_key_id}}
|
|
18
|
+
|
|
19
|
+
available_node_types:
|
|
20
|
+
ray_head_default:
|
|
21
|
+
{%- if custom_resources %}
|
|
22
|
+
resources: {{custom_resources}}
|
|
23
|
+
{%- else %}
|
|
24
|
+
resources: {}
|
|
25
|
+
{%- endif %}
|
|
26
|
+
node_config:
|
|
27
|
+
InstanceType: {{instance_type}}
|
|
28
|
+
PublicKey: |-
|
|
29
|
+
skypilot:ssh_public_key_content
|
|
30
|
+
|
|
31
|
+
head_node_type: ray_head_default
|
|
32
|
+
|
|
33
|
+
# Format: `REMOTE_PATH : LOCAL_PATH`
|
|
34
|
+
file_mounts: {
|
|
35
|
+
"{{sky_ray_yaml_remote_path}}": "{{sky_ray_yaml_local_path}}",
|
|
36
|
+
"{{sky_remote_path}}/{{sky_wheel_hash}}": "{{sky_local_path}}",
|
|
37
|
+
{%- for remote_path, local_path in credentials.items() %}
|
|
38
|
+
"{{remote_path}}": "{{local_path}}",
|
|
39
|
+
{%- endfor %}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
rsync_exclude: []
|
|
43
|
+
|
|
44
|
+
initialization_commands: []
|
|
45
|
+
|
|
46
|
+
# List of shell commands to run to set up nodes.
|
|
47
|
+
# NOTE: these are very performance-sensitive. Each new item opens/closes an SSH
|
|
48
|
+
# connection, which is expensive. Try your best to co-locate commands into fewer
|
|
49
|
+
# items!
|
|
50
|
+
#
|
|
51
|
+
# Increment the following for catching performance bugs easier:
|
|
52
|
+
# current num items (num SSH connections): 1
|
|
53
|
+
setup_commands:
|
|
54
|
+
# Create ~/.ssh/config file in case the file does not exist in the image.
|
|
55
|
+
# Line 'rm ..': there is another installation of pip.
|
|
56
|
+
# Line 'sudo bash ..': set the ulimit as suggested by ray docs for performance. https://docs.ray.io/en/latest/cluster/vms/user-guides/large-cluster-best-practices.html#system-configuration
|
|
57
|
+
# Line 'sudo grep ..': set the number of threads per process to unlimited to avoid ray job submit stucking issue when the number of running ray jobs increase.
|
|
58
|
+
# Line 'mkdir -p ..': disable host key check
|
|
59
|
+
# Line 'python3 -c ..': patch the buggy ray files and enable `-o allow_other` option for `goofys`
|
|
60
|
+
- {%- for initial_setup_command in initial_setup_commands %}
|
|
61
|
+
{{ initial_setup_command }}
|
|
62
|
+
{%- endfor %}
|
|
63
|
+
mkdir -p ~/.ssh; touch ~/.ssh/config; which patch > /dev/null || sudo apt install -y patch;
|
|
64
|
+
{{ conda_installation_commands }}
|
|
65
|
+
{{ ray_skypilot_installation_commands }}
|
|
66
|
+
sudo bash -c 'rm -rf /etc/security/limits.d; echo "* soft nofile 1048576" >> /etc/security/limits.conf; echo "* hard nofile 1048576" >> /etc/security/limits.conf';
|
|
67
|
+
sudo grep -e '^DefaultTasksMax' /etc/systemd/system.conf || (sudo bash -c 'echo "DefaultTasksMax=infinity" >> /etc/systemd/system.conf'); sudo systemctl set-property user-$(id -u $(whoami)).slice TasksMax=infinity; sudo systemctl daemon-reload;
|
|
68
|
+
(grep -Pzo -q "Host \*\n StrictHostKeyChecking no" ~/.ssh/config) || printf "Host *\n StrictHostKeyChecking no\n" >> ~/.ssh/config;
|
|
69
|
+
{{ ssh_max_sessions_config }}
|
|
70
|
+
|
|
71
|
+
# Command to start ray clusters are now placed in `sky.provision.instance_setup`.
|
|
72
|
+
# We do not need to list it here anymore.
|
sky/utils/context_utils.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Utilities for SkyPilot context."""
|
|
2
2
|
import asyncio
|
|
3
|
+
import concurrent.futures
|
|
3
4
|
import contextvars
|
|
4
5
|
import functools
|
|
5
6
|
import io
|
|
@@ -191,14 +192,17 @@ def to_thread(func: Callable[P, T], /, *args: P.args,
|
|
|
191
192
|
|
|
192
193
|
This is same as asyncio.to_thread added in python 3.9
|
|
193
194
|
"""
|
|
195
|
+
return to_thread_with_executor(None, func, *args, **kwargs)
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def to_thread_with_executor(executor: Optional[concurrent.futures.Executor],
|
|
199
|
+
func: Callable[P, T], /, *args: P.args,
|
|
200
|
+
**kwargs: P.kwargs) -> 'asyncio.Future[T]':
|
|
201
|
+
"""Asynchronously run function *func* in a separate thread with
|
|
202
|
+
a custom executor."""
|
|
203
|
+
|
|
194
204
|
loop = asyncio.get_running_loop()
|
|
195
|
-
# This is critical to pass the current coroutine context to the new thread
|
|
196
205
|
pyctx = contextvars.copy_context()
|
|
197
|
-
func_call: Callable[..., T] = functools.partial(
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
pyctx.run, # type: ignore
|
|
201
|
-
func,
|
|
202
|
-
*args,
|
|
203
|
-
**kwargs)
|
|
204
|
-
return loop.run_in_executor(None, func_call)
|
|
206
|
+
func_call: Callable[..., T] = functools.partial(pyctx.run, func, *args,
|
|
207
|
+
**kwargs)
|
|
208
|
+
return loop.run_in_executor(executor, func_call)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: skypilot-nightly
|
|
3
|
-
Version: 1.0.0.
|
|
3
|
+
Version: 1.0.0.dev20251013
|
|
4
4
|
Summary: SkyPilot: Run AI on Any Infra — Unified, Faster, Cheaper.
|
|
5
5
|
Author: SkyPilot Team
|
|
6
6
|
License: Apache 2.0
|
|
@@ -154,52 +154,53 @@ Requires-Dist: grpcio>=1.63.0; extra == "server"
|
|
|
154
154
|
Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "server"
|
|
155
155
|
Requires-Dist: aiosqlite; extra == "server"
|
|
156
156
|
Requires-Dist: greenlet; extra == "server"
|
|
157
|
+
Provides-Extra: shadeform
|
|
157
158
|
Provides-Extra: all
|
|
158
|
-
Requires-Dist:
|
|
159
|
-
Requires-Dist: ray[default]>=2.6.1; extra == "all"
|
|
160
|
-
Requires-Dist: nebius>=0.2.47; extra == "all"
|
|
161
|
-
Requires-Dist: awscli>=1.27.10; extra == "all"
|
|
162
|
-
Requires-Dist: azure-mgmt-network>=27.0.0; extra == "all"
|
|
163
|
-
Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "all"
|
|
164
|
-
Requires-Dist: azure-core>=1.24.0; extra == "all"
|
|
165
|
-
Requires-Dist: pyopenssl<24.3.0,>=23.2.0; extra == "all"
|
|
166
|
-
Requires-Dist: azure-storage-blob>=12.23.1; extra == "all"
|
|
167
|
-
Requires-Dist: websockets; extra == "all"
|
|
168
|
-
Requires-Dist: passlib; extra == "all"
|
|
169
|
-
Requires-Dist: pyvmomi==8.0.1.0.2; extra == "all"
|
|
170
|
-
Requires-Dist: ibm-cloud-sdk-core; extra == "all"
|
|
171
|
-
Requires-Dist: ibm-cos-sdk; extra == "all"
|
|
172
|
-
Requires-Dist: azure-identity>=1.19.0; extra == "all"
|
|
173
|
-
Requires-Dist: kubernetes!=32.0.0,>=20.0.0; extra == "all"
|
|
174
|
-
Requires-Dist: pyjwt; extra == "all"
|
|
175
|
-
Requires-Dist: pydo>=0.3.0; extra == "all"
|
|
176
|
-
Requires-Dist: aiohttp; extra == "all"
|
|
159
|
+
Requires-Dist: python-dateutil; extra == "all"
|
|
177
160
|
Requires-Dist: ibm-platform-services>=0.48.0; extra == "all"
|
|
178
|
-
Requires-Dist:
|
|
179
|
-
Requires-Dist: azure-mgmt-compute>=33.0.0; extra == "all"
|
|
161
|
+
Requires-Dist: tomli; python_version < "3.11" and extra == "all"
|
|
180
162
|
Requires-Dist: anyio; extra == "all"
|
|
181
|
-
Requires-Dist:
|
|
182
|
-
Requires-Dist:
|
|
183
|
-
Requires-Dist: ecsapi>=0.2.0; extra == "all"
|
|
184
|
-
Requires-Dist: msrestazure; extra == "all"
|
|
185
|
-
Requires-Dist: boto3>=1.26.1; extra == "all"
|
|
186
|
-
Requires-Dist: msgraph-sdk; extra == "all"
|
|
163
|
+
Requires-Dist: azure-storage-blob>=12.23.1; extra == "all"
|
|
164
|
+
Requires-Dist: aiosqlite; extra == "all"
|
|
187
165
|
Requires-Dist: casbin; extra == "all"
|
|
188
|
-
Requires-Dist:
|
|
189
|
-
Requires-Dist: python-dateutil; extra == "all"
|
|
190
|
-
Requires-Dist: botocore>=1.29.10; extra == "all"
|
|
191
|
-
Requires-Dist: vastai-sdk>=0.1.12; extra == "all"
|
|
192
|
-
Requires-Dist: grpcio>=1.63.0; extra == "all"
|
|
166
|
+
Requires-Dist: pyjwt; extra == "all"
|
|
193
167
|
Requires-Dist: azure-cli>=2.65.0; extra == "all"
|
|
194
|
-
Requires-Dist:
|
|
195
|
-
Requires-Dist:
|
|
196
|
-
Requires-Dist:
|
|
168
|
+
Requires-Dist: pyopenssl<24.3.0,>=23.2.0; extra == "all"
|
|
169
|
+
Requires-Dist: passlib; extra == "all"
|
|
170
|
+
Requires-Dist: azure-core>=1.31.0; extra == "all"
|
|
171
|
+
Requires-Dist: azure-mgmt-network>=27.0.0; extra == "all"
|
|
172
|
+
Requires-Dist: vastai-sdk>=0.1.12; extra == "all"
|
|
173
|
+
Requires-Dist: ibm-cloud-sdk-core; extra == "all"
|
|
197
174
|
Requires-Dist: cudo-compute>=0.1.10; extra == "all"
|
|
175
|
+
Requires-Dist: pyvmomi==8.0.1.0.2; extra == "all"
|
|
176
|
+
Requires-Dist: grpcio>=1.63.0; extra == "all"
|
|
177
|
+
Requires-Dist: boto3>=1.26.1; extra == "all"
|
|
178
|
+
Requires-Dist: nebius>=0.2.47; extra == "all"
|
|
179
|
+
Requires-Dist: greenlet; extra == "all"
|
|
198
180
|
Requires-Dist: azure-common; extra == "all"
|
|
199
|
-
Requires-Dist:
|
|
200
|
-
Requires-Dist:
|
|
181
|
+
Requires-Dist: botocore>=1.29.10; extra == "all"
|
|
182
|
+
Requires-Dist: azure-mgmt-compute>=33.0.0; extra == "all"
|
|
183
|
+
Requires-Dist: ibm-vpc; extra == "all"
|
|
184
|
+
Requires-Dist: oci; extra == "all"
|
|
185
|
+
Requires-Dist: kubernetes!=32.0.0,>=20.0.0; extra == "all"
|
|
186
|
+
Requires-Dist: azure-identity>=1.19.0; extra == "all"
|
|
187
|
+
Requires-Dist: ibm-cos-sdk; extra == "all"
|
|
201
188
|
Requires-Dist: google-api-python-client>=2.69.0; extra == "all"
|
|
202
|
-
Requires-Dist:
|
|
189
|
+
Requires-Dist: colorama<0.4.5; extra == "all"
|
|
190
|
+
Requires-Dist: awscli>=1.27.10; extra == "all"
|
|
191
|
+
Requires-Dist: docker; extra == "all"
|
|
192
|
+
Requires-Dist: aiohttp; extra == "all"
|
|
193
|
+
Requires-Dist: msrestazure; extra == "all"
|
|
194
|
+
Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "all"
|
|
195
|
+
Requires-Dist: ecsapi>=0.2.0; extra == "all"
|
|
196
|
+
Requires-Dist: google-cloud-storage; extra == "all"
|
|
197
|
+
Requires-Dist: azure-core>=1.24.0; extra == "all"
|
|
198
|
+
Requires-Dist: sqlalchemy_adapter; extra == "all"
|
|
199
|
+
Requires-Dist: runpod>=1.6.1; extra == "all"
|
|
200
|
+
Requires-Dist: msgraph-sdk; extra == "all"
|
|
201
|
+
Requires-Dist: ray[default]>=2.6.1; extra == "all"
|
|
202
|
+
Requires-Dist: pydo>=0.3.0; extra == "all"
|
|
203
|
+
Requires-Dist: websockets; extra == "all"
|
|
203
204
|
Dynamic: author
|
|
204
205
|
Dynamic: classifier
|
|
205
206
|
Dynamic: description
|
|
@@ -249,10 +250,11 @@ Dynamic: summary
|
|
|
249
250
|
----
|
|
250
251
|
|
|
251
252
|
:fire: *News* :fire:
|
|
253
|
+
- [Oct 2025] Run large-scale **LLM training with TorchTitan** on any AI infra: [**example**](./llm/torchtitan/)
|
|
254
|
+
- [Sep 2025] Scaling AI infrastructure at Abridge - **10x faster development** with SkyPilot: [**blog**](https://blog.skypilot.co/abridge/)
|
|
255
|
+
- [Sep 2025] Network and Storage Benchmarks for LLM training on the cloud: [**blog**](https://maknee.github.io/blog/2025/Network-And-Storage-Training-Skypilot/)
|
|
252
256
|
- [Aug 2025] Serve and finetune **OpenAI GPT-OSS models** (gpt-oss-120b, gpt-oss-20b) with one command on any infra: [**serve**](./llm/gpt-oss/) + [**LoRA and full finetuning**](./llm/gpt-oss-finetuning/)
|
|
253
|
-
- [Jul 2025] Run large-scale **LLM training with TorchTitan** on any cloud: [**example**](./llm/torchtitan/)
|
|
254
257
|
- [Jul 2025] Run distributed **RL training for LLMs** with Verl (PPO, GRPO) on any cloud: [**example**](./llm/verl/)
|
|
255
|
-
- [Jul 2025] 🎉 SkyPilot v0.10.0 released! [**blog post**](https://blog.skypilot.co/announcing-skypilot-0.10.0/), [**release notes**](https://github.com/skypilot-org/skypilot/releases/tag/v0.10.0)
|
|
256
258
|
- [Jul 2025] Finetune **Llama4** on any distributed cluster/cloud: [**example**](./llm/llama-4-finetuning/)
|
|
257
259
|
- [Jul 2025] Two-part blog series, `The Evolution of AI Job Orchestration`: (1) [Running AI jobs on GPU Neoclouds](https://blog.skypilot.co/ai-job-orchestration-pt1-gpu-neoclouds/), (2) [The AI-Native Control Plane & Orchestration that Finally Works for ML](https://blog.skypilot.co/ai-job-orchestration-pt2-ai-control-plane/)
|
|
258
260
|
- [Apr 2025] Spin up **Qwen3** on your cluster/cloud: [**example**](./llm/qwen/)
|