skypilot-nightly 1.0.0.dev20251011__py3-none-any.whl → 1.0.0.dev20251013__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (52) hide show
  1. sky/__init__.py +4 -2
  2. sky/adaptors/shadeform.py +89 -0
  3. sky/authentication.py +43 -0
  4. sky/backends/backend_utils.py +2 -0
  5. sky/backends/cloud_vm_ray_backend.py +4 -2
  6. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  7. sky/catalog/shadeform_catalog.py +165 -0
  8. sky/client/cli/command.py +44 -3
  9. sky/client/sdk.py +11 -3
  10. sky/clouds/__init__.py +2 -0
  11. sky/clouds/shadeform.py +393 -0
  12. sky/dashboard/out/404.html +1 -1
  13. sky/dashboard/out/_next/static/chunks/{webpack-66f23594d38c7f16.js → webpack-ac3a34c8f9fef041.js} +1 -1
  14. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  15. sky/dashboard/out/clusters/[cluster].html +1 -1
  16. sky/dashboard/out/clusters.html +1 -1
  17. sky/dashboard/out/config.html +1 -1
  18. sky/dashboard/out/index.html +1 -1
  19. sky/dashboard/out/infra/[context].html +1 -1
  20. sky/dashboard/out/infra.html +1 -1
  21. sky/dashboard/out/jobs/[job].html +1 -1
  22. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  23. sky/dashboard/out/jobs.html +1 -1
  24. sky/dashboard/out/users.html +1 -1
  25. sky/dashboard/out/volumes.html +1 -1
  26. sky/dashboard/out/workspace/new.html +1 -1
  27. sky/dashboard/out/workspaces/[name].html +1 -1
  28. sky/dashboard/out/workspaces.html +1 -1
  29. sky/provision/__init__.py +1 -0
  30. sky/provision/shadeform/__init__.py +11 -0
  31. sky/provision/shadeform/config.py +12 -0
  32. sky/provision/shadeform/instance.py +351 -0
  33. sky/provision/shadeform/shadeform_utils.py +83 -0
  34. sky/serve/constants.py +0 -3
  35. sky/serve/service_spec.py +1 -8
  36. sky/server/constants.py +4 -0
  37. sky/server/requests/executor.py +22 -2
  38. sky/server/requests/payloads.py +2 -0
  39. sky/server/requests/requests.py +119 -2
  40. sky/server/server.py +17 -6
  41. sky/setup_files/dependencies.py +1 -0
  42. sky/skylet/constants.py +1 -1
  43. sky/templates/shadeform-ray.yml.j2 +72 -0
  44. sky/utils/context_utils.py +13 -9
  45. {skypilot_nightly-1.0.0.dev20251011.dist-info → skypilot_nightly-1.0.0.dev20251013.dist-info}/METADATA +43 -41
  46. {skypilot_nightly-1.0.0.dev20251011.dist-info → skypilot_nightly-1.0.0.dev20251013.dist-info}/RECORD +52 -43
  47. /sky/dashboard/out/_next/static/{Xs6jdcfyNaUuBO8jmzU9_ → MtlDUf-nH1hhcy7xwbCj3}/_buildManifest.js +0 -0
  48. /sky/dashboard/out/_next/static/{Xs6jdcfyNaUuBO8jmzU9_ → MtlDUf-nH1hhcy7xwbCj3}/_ssgManifest.js +0 -0
  49. {skypilot_nightly-1.0.0.dev20251011.dist-info → skypilot_nightly-1.0.0.dev20251013.dist-info}/WHEEL +0 -0
  50. {skypilot_nightly-1.0.0.dev20251011.dist-info → skypilot_nightly-1.0.0.dev20251013.dist-info}/entry_points.txt +0 -0
  51. {skypilot_nightly-1.0.0.dev20251011.dist-info → skypilot_nightly-1.0.0.dev20251013.dist-info}/licenses/LICENSE +0 -0
  52. {skypilot_nightly-1.0.0.dev20251011.dist-info → skypilot_nightly-1.0.0.dev20251013.dist-info}/top_level.txt +0 -0
@@ -292,6 +292,100 @@ class Request:
292
292
  raise
293
293
 
294
294
 
295
+ def encode_requests(requests: List[Request]) -> List[payloads.RequestPayload]:
296
+ """Serialize the SkyPilot API request for display purposes.
297
+
298
+ This function should be called on the server side to serialize the
299
+ request body into human readable format, e.g., the entrypoint should
300
+ be a string, and the pid, error, or return value are not needed.
301
+
302
+ The returned value will then be displayed on the client side in request
303
+ table.
304
+
305
+ We do not use `encode` for display to avoid a large amount of data being
306
+ sent to the client side, especially for the request table could include
307
+ all the requests.
308
+ """
309
+ encoded_requests = []
310
+ all_users = global_user_state.get_all_users()
311
+ all_users_map = {user.id: user.name for user in all_users}
312
+ for request in requests:
313
+ if request.request_body is not None:
314
+ assert isinstance(request.request_body,
315
+ payloads.RequestBody), (request.name,
316
+ request.request_body)
317
+ user_name = all_users_map.get(request.user_id)
318
+ payload = payloads.RequestPayload(
319
+ request_id=request.request_id,
320
+ name=request.name,
321
+ entrypoint=request.entrypoint.__name__
322
+ if request.entrypoint is not None else '',
323
+ request_body=request.request_body.model_dump_json()
324
+ if request.request_body is not None else json.dumps(None),
325
+ status=request.status.value,
326
+ return_value=json.dumps(None),
327
+ error=json.dumps(None),
328
+ pid=None,
329
+ created_at=request.created_at,
330
+ schedule_type=request.schedule_type.value,
331
+ user_id=request.user_id,
332
+ user_name=user_name,
333
+ cluster_name=request.cluster_name,
334
+ status_msg=request.status_msg,
335
+ should_retry=request.should_retry,
336
+ finished_at=request.finished_at,
337
+ )
338
+ encoded_requests.append(payload)
339
+ return encoded_requests
340
+
341
+
342
+ def _update_request_row_fields(
343
+ row: Tuple[Any, ...],
344
+ fields: Optional[List[str]] = None) -> Tuple[Any, ...]:
345
+ """Update the request row fields."""
346
+ if not fields:
347
+ return row
348
+
349
+ # Convert tuple to dictionary for easier manipulation
350
+ content = dict(zip(fields, row))
351
+
352
+ # Required fields in RequestPayload
353
+ if 'request_id' not in fields:
354
+ content['request_id'] = ''
355
+ if 'name' not in fields:
356
+ content['name'] = ''
357
+ if 'entrypoint' not in fields:
358
+ content['entrypoint'] = server_constants.EMPTY_PICKLED_VALUE
359
+ if 'request_body' not in fields:
360
+ content['request_body'] = server_constants.EMPTY_PICKLED_VALUE
361
+ if 'status' not in fields:
362
+ content['status'] = RequestStatus.PENDING.value
363
+ if 'created_at' not in fields:
364
+ content['created_at'] = 0
365
+ if 'user_id' not in fields:
366
+ content['user_id'] = ''
367
+ if 'return_value' not in fields:
368
+ content['return_value'] = json.dumps(None)
369
+ if 'error' not in fields:
370
+ content['error'] = json.dumps(None)
371
+ if 'schedule_type' not in fields:
372
+ content['schedule_type'] = ScheduleType.SHORT.value
373
+ # Optional fields in RequestPayload
374
+ if 'pid' not in fields:
375
+ content['pid'] = None
376
+ if 'cluster_name' not in fields:
377
+ content['cluster_name'] = None
378
+ if 'status_msg' not in fields:
379
+ content['status_msg'] = None
380
+ if 'should_retry' not in fields:
381
+ content['should_retry'] = False
382
+ if 'finished_at' not in fields:
383
+ content['finished_at'] = None
384
+
385
+ # Convert back to tuple in the same order as REQUEST_COLUMNS
386
+ return tuple(content[col] for col in REQUEST_COLUMNS)
387
+
388
+
295
389
  def kill_cluster_requests(cluster_name: str, exclude_request_name: str):
296
390
  """Kill all pending and running requests for a cluster.
297
391
 
@@ -634,6 +728,7 @@ class RequestTaskFilter:
634
728
  Mutually exclusive with exclude_request_names.
635
729
  finished_before: if provided, only include requests finished before this
636
730
  timestamp.
731
+ limit: the number of requests to show. If None, show all requests.
637
732
 
638
733
  Raises:
639
734
  ValueError: If both exclude_request_names and include_request_names are
@@ -645,6 +740,8 @@ class RequestTaskFilter:
645
740
  exclude_request_names: Optional[List[str]] = None
646
741
  include_request_names: Optional[List[str]] = None
647
742
  finished_before: Optional[float] = None
743
+ limit: Optional[int] = None
744
+ fields: Optional[List[str]] = None
648
745
 
649
746
  def __post_init__(self):
650
747
  if (self.exclude_request_names is not None and
@@ -687,8 +784,13 @@ class RequestTaskFilter:
687
784
  if filter_str:
688
785
  filter_str = f' WHERE {filter_str}'
689
786
  columns_str = ', '.join(REQUEST_COLUMNS)
690
- return (f'SELECT {columns_str} FROM {REQUEST_TABLE}{filter_str} '
691
- 'ORDER BY created_at DESC'), filter_params
787
+ if self.fields:
788
+ columns_str = ', '.join(self.fields)
789
+ query_str = (f'SELECT {columns_str} FROM {REQUEST_TABLE}{filter_str} '
790
+ 'ORDER BY created_at DESC')
791
+ if self.limit is not None:
792
+ query_str += f' LIMIT {self.limit}'
793
+ return query_str, filter_params
692
794
 
693
795
 
694
796
  @init_db
@@ -722,6 +824,21 @@ async def get_request_tasks_async(
722
824
  return [Request.from_row(row) for row in rows]
723
825
 
724
826
 
827
+ @init_db_async
828
+ @metrics_lib.time_me_async
829
+ async def get_request_tasks_with_fields_async(
830
+ req_filter: RequestTaskFilter,
831
+ fields: Optional[List[str]] = None,
832
+ ) -> List[Request]:
833
+ """Async version of get_request_tasks."""
834
+ assert _DB is not None
835
+ async with _DB.execute_fetchall_async(*req_filter.build_query()) as rows:
836
+ if not rows:
837
+ return []
838
+ rows = [_update_request_row_fields(row, fields) for row in rows]
839
+ return [Request.from_row(row) for row in rows]
840
+
841
+
725
842
  @init_db_async
726
843
  @metrics_lib.time_me_async
727
844
  async def get_api_request_ids_start_with(incomplete: str) -> List[str]:
sky/server/server.py CHANGED
@@ -3,6 +3,7 @@
3
3
  import argparse
4
4
  import asyncio
5
5
  import base64
6
+ from concurrent.futures import ThreadPoolExecutor
6
7
  import contextlib
7
8
  import datetime
8
9
  import hashlib
@@ -1629,6 +1630,10 @@ async def api_status(
1629
1630
  None, description='Request IDs to get status for.'),
1630
1631
  all_status: bool = fastapi.Query(
1631
1632
  False, description='Get finished requests as well.'),
1633
+ limit: Optional[int] = fastapi.Query(
1634
+ None, description='Number of requests to show.'),
1635
+ fields: Optional[List[str]] = fastapi.Query(
1636
+ None, description='Fields to get. If None, get all fields.'),
1632
1637
  ) -> List[payloads.RequestPayload]:
1633
1638
  """Gets the list of requests."""
1634
1639
  if request_ids is None:
@@ -1638,9 +1643,15 @@ async def api_status(
1638
1643
  requests_lib.RequestStatus.PENDING,
1639
1644
  requests_lib.RequestStatus.RUNNING,
1640
1645
  ]
1641
- request_tasks = await requests_lib.get_request_tasks_async(
1642
- req_filter=requests_lib.RequestTaskFilter(status=statuses))
1643
- return [r.readable_encode() for r in request_tasks]
1646
+ request_tasks = await requests_lib.get_request_tasks_with_fields_async(
1647
+ req_filter=requests_lib.RequestTaskFilter(
1648
+ status=statuses,
1649
+ limit=limit,
1650
+ fields=fields,
1651
+ ),
1652
+ fields=fields,
1653
+ )
1654
+ return requests_lib.encode_requests(request_tasks)
1644
1655
  else:
1645
1656
  encoded_request_tasks = []
1646
1657
  for request_id in request_ids:
@@ -1721,9 +1732,9 @@ async def kubernetes_pod_ssh_proxy(websocket: fastapi.WebSocket,
1721
1732
  logger.info(f'WebSocket connection accepted for cluster: {cluster_name}')
1722
1733
 
1723
1734
  # Run core.status in another thread to avoid blocking the event loop.
1724
- cluster_records = await context_utils.to_thread(core.status,
1725
- cluster_name,
1726
- all_users=True)
1735
+ with ThreadPoolExecutor(max_workers=1) as thread_pool_executor:
1736
+ cluster_records = await context_utils.to_thread_with_executor(
1737
+ thread_pool_executor, core.status, cluster_name, all_users=True)
1727
1738
  cluster_record = cluster_records[0]
1728
1739
  if cluster_record['status'] != status_lib.ClusterStatus.UP:
1729
1740
  raise fastapi.HTTPException(
@@ -222,6 +222,7 @@ extras_require: Dict[str, List[str]] = {
222
222
  'hyperbolic': [], # No dependencies needed for hyperbolic
223
223
  'seeweb': ['ecsapi>=0.2.0'],
224
224
  'server': server_dependencies,
225
+ 'shadeform': [], # No dependencies needed for shadeform
225
226
  }
226
227
 
227
228
  # Calculate which clouds should be included in the [all] installation.
sky/skylet/constants.py CHANGED
@@ -471,7 +471,7 @@ CATALOG_DIR = '~/.sky/catalogs'
471
471
  ALL_CLOUDS = ('aws', 'azure', 'gcp', 'ibm', 'lambda', 'scp', 'oci',
472
472
  'kubernetes', 'runpod', 'vast', 'vsphere', 'cudo', 'fluidstack',
473
473
  'paperspace', 'primeintellect', 'do', 'nebius', 'ssh',
474
- 'hyperbolic', 'seeweb')
474
+ 'hyperbolic', 'seeweb', 'shadeform')
475
475
  # END constants used for service catalog.
476
476
 
477
477
  # The user ID of the SkyPilot system.
@@ -0,0 +1,72 @@
1
+ cluster_name: {{cluster_name_on_cloud}}
2
+
3
+ # The maximum number of workers nodes to launch in addition to the head node.
4
+ max_workers: {{num_nodes - 1}}
5
+ upscaling_speed: {{num_nodes - 1}}
6
+ idle_timeout_minutes: 60
7
+
8
+ provider:
9
+ type: external
10
+ module: sky.provision.shadeform
11
+ region: "{{region}}"
12
+ disable_launch_config_check: true
13
+
14
+ auth:
15
+ ssh_user: shadeform
16
+ ssh_private_key: {{ssh_private_key}}
17
+ ssh_key_id: {{ssh_key_id}}
18
+
19
+ available_node_types:
20
+ ray_head_default:
21
+ {%- if custom_resources %}
22
+ resources: {{custom_resources}}
23
+ {%- else %}
24
+ resources: {}
25
+ {%- endif %}
26
+ node_config:
27
+ InstanceType: {{instance_type}}
28
+ PublicKey: |-
29
+ skypilot:ssh_public_key_content
30
+
31
+ head_node_type: ray_head_default
32
+
33
+ # Format: `REMOTE_PATH : LOCAL_PATH`
34
+ file_mounts: {
35
+ "{{sky_ray_yaml_remote_path}}": "{{sky_ray_yaml_local_path}}",
36
+ "{{sky_remote_path}}/{{sky_wheel_hash}}": "{{sky_local_path}}",
37
+ {%- for remote_path, local_path in credentials.items() %}
38
+ "{{remote_path}}": "{{local_path}}",
39
+ {%- endfor %}
40
+ }
41
+
42
+ rsync_exclude: []
43
+
44
+ initialization_commands: []
45
+
46
+ # List of shell commands to run to set up nodes.
47
+ # NOTE: these are very performance-sensitive. Each new item opens/closes an SSH
48
+ # connection, which is expensive. Try your best to co-locate commands into fewer
49
+ # items!
50
+ #
51
+ # Increment the following for catching performance bugs easier:
52
+ # current num items (num SSH connections): 1
53
+ setup_commands:
54
+ # Create ~/.ssh/config file in case the file does not exist in the image.
55
+ # Line 'rm ..': there is another installation of pip.
56
+ # Line 'sudo bash ..': set the ulimit as suggested by ray docs for performance. https://docs.ray.io/en/latest/cluster/vms/user-guides/large-cluster-best-practices.html#system-configuration
57
+ # Line 'sudo grep ..': set the number of threads per process to unlimited to avoid ray job submit stucking issue when the number of running ray jobs increase.
58
+ # Line 'mkdir -p ..': disable host key check
59
+ # Line 'python3 -c ..': patch the buggy ray files and enable `-o allow_other` option for `goofys`
60
+ - {%- for initial_setup_command in initial_setup_commands %}
61
+ {{ initial_setup_command }}
62
+ {%- endfor %}
63
+ mkdir -p ~/.ssh; touch ~/.ssh/config; which patch > /dev/null || sudo apt install -y patch;
64
+ {{ conda_installation_commands }}
65
+ {{ ray_skypilot_installation_commands }}
66
+ sudo bash -c 'rm -rf /etc/security/limits.d; echo "* soft nofile 1048576" >> /etc/security/limits.conf; echo "* hard nofile 1048576" >> /etc/security/limits.conf';
67
+ sudo grep -e '^DefaultTasksMax' /etc/systemd/system.conf || (sudo bash -c 'echo "DefaultTasksMax=infinity" >> /etc/systemd/system.conf'); sudo systemctl set-property user-$(id -u $(whoami)).slice TasksMax=infinity; sudo systemctl daemon-reload;
68
+ (grep -Pzo -q "Host \*\n StrictHostKeyChecking no" ~/.ssh/config) || printf "Host *\n StrictHostKeyChecking no\n" >> ~/.ssh/config;
69
+ {{ ssh_max_sessions_config }}
70
+
71
+ # Command to start ray clusters are now placed in `sky.provision.instance_setup`.
72
+ # We do not need to list it here anymore.
@@ -1,5 +1,6 @@
1
1
  """Utilities for SkyPilot context."""
2
2
  import asyncio
3
+ import concurrent.futures
3
4
  import contextvars
4
5
  import functools
5
6
  import io
@@ -191,14 +192,17 @@ def to_thread(func: Callable[P, T], /, *args: P.args,
191
192
 
192
193
  This is same as asyncio.to_thread added in python 3.9
193
194
  """
195
+ return to_thread_with_executor(None, func, *args, **kwargs)
196
+
197
+
198
+ def to_thread_with_executor(executor: Optional[concurrent.futures.Executor],
199
+ func: Callable[P, T], /, *args: P.args,
200
+ **kwargs: P.kwargs) -> 'asyncio.Future[T]':
201
+ """Asynchronously run function *func* in a separate thread with
202
+ a custom executor."""
203
+
194
204
  loop = asyncio.get_running_loop()
195
- # This is critical to pass the current coroutine context to the new thread
196
205
  pyctx = contextvars.copy_context()
197
- func_call: Callable[..., T] = functools.partial(
198
- # partial deletes arguments type and thus can't figure out the return
199
- # type of pyctx.run
200
- pyctx.run, # type: ignore
201
- func,
202
- *args,
203
- **kwargs)
204
- return loop.run_in_executor(None, func_call)
206
+ func_call: Callable[..., T] = functools.partial(pyctx.run, func, *args,
207
+ **kwargs)
208
+ return loop.run_in_executor(executor, func_call)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: skypilot-nightly
3
- Version: 1.0.0.dev20251011
3
+ Version: 1.0.0.dev20251013
4
4
  Summary: SkyPilot: Run AI on Any Infra — Unified, Faster, Cheaper.
5
5
  Author: SkyPilot Team
6
6
  License: Apache 2.0
@@ -154,52 +154,53 @@ Requires-Dist: grpcio>=1.63.0; extra == "server"
154
154
  Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "server"
155
155
  Requires-Dist: aiosqlite; extra == "server"
156
156
  Requires-Dist: greenlet; extra == "server"
157
+ Provides-Extra: shadeform
157
158
  Provides-Extra: all
158
- Requires-Dist: google-cloud-storage; extra == "all"
159
- Requires-Dist: ray[default]>=2.6.1; extra == "all"
160
- Requires-Dist: nebius>=0.2.47; extra == "all"
161
- Requires-Dist: awscli>=1.27.10; extra == "all"
162
- Requires-Dist: azure-mgmt-network>=27.0.0; extra == "all"
163
- Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "all"
164
- Requires-Dist: azure-core>=1.24.0; extra == "all"
165
- Requires-Dist: pyopenssl<24.3.0,>=23.2.0; extra == "all"
166
- Requires-Dist: azure-storage-blob>=12.23.1; extra == "all"
167
- Requires-Dist: websockets; extra == "all"
168
- Requires-Dist: passlib; extra == "all"
169
- Requires-Dist: pyvmomi==8.0.1.0.2; extra == "all"
170
- Requires-Dist: ibm-cloud-sdk-core; extra == "all"
171
- Requires-Dist: ibm-cos-sdk; extra == "all"
172
- Requires-Dist: azure-identity>=1.19.0; extra == "all"
173
- Requires-Dist: kubernetes!=32.0.0,>=20.0.0; extra == "all"
174
- Requires-Dist: pyjwt; extra == "all"
175
- Requires-Dist: pydo>=0.3.0; extra == "all"
176
- Requires-Dist: aiohttp; extra == "all"
159
+ Requires-Dist: python-dateutil; extra == "all"
177
160
  Requires-Dist: ibm-platform-services>=0.48.0; extra == "all"
178
- Requires-Dist: colorama<0.4.5; extra == "all"
179
- Requires-Dist: azure-mgmt-compute>=33.0.0; extra == "all"
161
+ Requires-Dist: tomli; python_version < "3.11" and extra == "all"
180
162
  Requires-Dist: anyio; extra == "all"
181
- Requires-Dist: oci; extra == "all"
182
- Requires-Dist: greenlet; extra == "all"
183
- Requires-Dist: ecsapi>=0.2.0; extra == "all"
184
- Requires-Dist: msrestazure; extra == "all"
185
- Requires-Dist: boto3>=1.26.1; extra == "all"
186
- Requires-Dist: msgraph-sdk; extra == "all"
163
+ Requires-Dist: azure-storage-blob>=12.23.1; extra == "all"
164
+ Requires-Dist: aiosqlite; extra == "all"
187
165
  Requires-Dist: casbin; extra == "all"
188
- Requires-Dist: ibm-vpc; extra == "all"
189
- Requires-Dist: python-dateutil; extra == "all"
190
- Requires-Dist: botocore>=1.29.10; extra == "all"
191
- Requires-Dist: vastai-sdk>=0.1.12; extra == "all"
192
- Requires-Dist: grpcio>=1.63.0; extra == "all"
166
+ Requires-Dist: pyjwt; extra == "all"
193
167
  Requires-Dist: azure-cli>=2.65.0; extra == "all"
194
- Requires-Dist: tomli; python_version < "3.11" and extra == "all"
195
- Requires-Dist: aiosqlite; extra == "all"
196
- Requires-Dist: runpod>=1.6.1; extra == "all"
168
+ Requires-Dist: pyopenssl<24.3.0,>=23.2.0; extra == "all"
169
+ Requires-Dist: passlib; extra == "all"
170
+ Requires-Dist: azure-core>=1.31.0; extra == "all"
171
+ Requires-Dist: azure-mgmt-network>=27.0.0; extra == "all"
172
+ Requires-Dist: vastai-sdk>=0.1.12; extra == "all"
173
+ Requires-Dist: ibm-cloud-sdk-core; extra == "all"
197
174
  Requires-Dist: cudo-compute>=0.1.10; extra == "all"
175
+ Requires-Dist: pyvmomi==8.0.1.0.2; extra == "all"
176
+ Requires-Dist: grpcio>=1.63.0; extra == "all"
177
+ Requires-Dist: boto3>=1.26.1; extra == "all"
178
+ Requires-Dist: nebius>=0.2.47; extra == "all"
179
+ Requires-Dist: greenlet; extra == "all"
198
180
  Requires-Dist: azure-common; extra == "all"
199
- Requires-Dist: sqlalchemy_adapter; extra == "all"
200
- Requires-Dist: docker; extra == "all"
181
+ Requires-Dist: botocore>=1.29.10; extra == "all"
182
+ Requires-Dist: azure-mgmt-compute>=33.0.0; extra == "all"
183
+ Requires-Dist: ibm-vpc; extra == "all"
184
+ Requires-Dist: oci; extra == "all"
185
+ Requires-Dist: kubernetes!=32.0.0,>=20.0.0; extra == "all"
186
+ Requires-Dist: azure-identity>=1.19.0; extra == "all"
187
+ Requires-Dist: ibm-cos-sdk; extra == "all"
201
188
  Requires-Dist: google-api-python-client>=2.69.0; extra == "all"
202
- Requires-Dist: azure-core>=1.31.0; extra == "all"
189
+ Requires-Dist: colorama<0.4.5; extra == "all"
190
+ Requires-Dist: awscli>=1.27.10; extra == "all"
191
+ Requires-Dist: docker; extra == "all"
192
+ Requires-Dist: aiohttp; extra == "all"
193
+ Requires-Dist: msrestazure; extra == "all"
194
+ Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "all"
195
+ Requires-Dist: ecsapi>=0.2.0; extra == "all"
196
+ Requires-Dist: google-cloud-storage; extra == "all"
197
+ Requires-Dist: azure-core>=1.24.0; extra == "all"
198
+ Requires-Dist: sqlalchemy_adapter; extra == "all"
199
+ Requires-Dist: runpod>=1.6.1; extra == "all"
200
+ Requires-Dist: msgraph-sdk; extra == "all"
201
+ Requires-Dist: ray[default]>=2.6.1; extra == "all"
202
+ Requires-Dist: pydo>=0.3.0; extra == "all"
203
+ Requires-Dist: websockets; extra == "all"
203
204
  Dynamic: author
204
205
  Dynamic: classifier
205
206
  Dynamic: description
@@ -249,10 +250,11 @@ Dynamic: summary
249
250
  ----
250
251
 
251
252
  :fire: *News* :fire:
253
+ - [Oct 2025] Run large-scale **LLM training with TorchTitan** on any AI infra: [**example**](./llm/torchtitan/)
254
+ - [Sep 2025] Scaling AI infrastructure at Abridge - **10x faster development** with SkyPilot: [**blog**](https://blog.skypilot.co/abridge/)
255
+ - [Sep 2025] Network and Storage Benchmarks for LLM training on the cloud: [**blog**](https://maknee.github.io/blog/2025/Network-And-Storage-Training-Skypilot/)
252
256
  - [Aug 2025] Serve and finetune **OpenAI GPT-OSS models** (gpt-oss-120b, gpt-oss-20b) with one command on any infra: [**serve**](./llm/gpt-oss/) + [**LoRA and full finetuning**](./llm/gpt-oss-finetuning/)
253
- - [Jul 2025] Run large-scale **LLM training with TorchTitan** on any cloud: [**example**](./llm/torchtitan/)
254
257
  - [Jul 2025] Run distributed **RL training for LLMs** with Verl (PPO, GRPO) on any cloud: [**example**](./llm/verl/)
255
- - [Jul 2025] 🎉 SkyPilot v0.10.0 released! [**blog post**](https://blog.skypilot.co/announcing-skypilot-0.10.0/), [**release notes**](https://github.com/skypilot-org/skypilot/releases/tag/v0.10.0)
256
258
  - [Jul 2025] Finetune **Llama4** on any distributed cluster/cloud: [**example**](./llm/llama-4-finetuning/)
257
259
  - [Jul 2025] Two-part blog series, `The Evolution of AI Job Orchestration`: (1) [Running AI jobs on GPU Neoclouds](https://blog.skypilot.co/ai-job-orchestration-pt1-gpu-neoclouds/), (2) [The AI-Native Control Plane & Orchestration that Finally Works for ML](https://blog.skypilot.co/ai-job-orchestration-pt2-ai-control-plane/)
258
260
  - [Apr 2025] Spin up **Qwen3** on your cluster/cloud: [**example**](./llm/qwen/)