skypilot-nightly 1.0.0.dev20250922__py3-none-any.whl → 1.0.0.dev20250926__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (123) hide show
  1. sky/__init__.py +2 -2
  2. sky/backends/backend.py +10 -0
  3. sky/backends/backend_utils.py +207 -79
  4. sky/backends/cloud_vm_ray_backend.py +37 -13
  5. sky/backends/local_docker_backend.py +9 -0
  6. sky/client/cli/command.py +112 -53
  7. sky/client/common.py +4 -2
  8. sky/client/sdk.py +17 -7
  9. sky/client/sdk_async.py +4 -2
  10. sky/clouds/kubernetes.py +2 -1
  11. sky/clouds/runpod.py +20 -7
  12. sky/core.py +9 -54
  13. sky/dashboard/out/404.html +1 -1
  14. sky/dashboard/out/_next/static/{KP6HCNMqb_bnJB17oplgW → VXU6_xE28M55BOdwmUUJS}/_buildManifest.js +1 -1
  15. sky/dashboard/out/_next/static/chunks/1121-d0782b9251f0fcd3.js +1 -0
  16. sky/dashboard/out/_next/static/chunks/6856-2b3600ff2854d066.js +1 -0
  17. sky/dashboard/out/_next/static/chunks/8969-d8bc3a2b9cf839a9.js +1 -0
  18. sky/dashboard/out/_next/static/chunks/9037-d0c00018a5ba198c.js +6 -0
  19. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-ad77b12fc736dca3.js +16 -0
  20. sky/dashboard/out/_next/static/chunks/pages/clusters/{[cluster]-9525660179df3605.js → [cluster]-e052384df65ef200.js} +1 -1
  21. sky/dashboard/out/_next/static/chunks/{webpack-26167a9e6d91fa51.js → webpack-8e64d11e58eab5cb.js} +1 -1
  22. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  23. sky/dashboard/out/clusters/[cluster].html +1 -1
  24. sky/dashboard/out/clusters.html +1 -1
  25. sky/dashboard/out/config.html +1 -1
  26. sky/dashboard/out/index.html +1 -1
  27. sky/dashboard/out/infra/[context].html +1 -1
  28. sky/dashboard/out/infra.html +1 -1
  29. sky/dashboard/out/jobs/[job].html +1 -1
  30. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  31. sky/dashboard/out/jobs.html +1 -1
  32. sky/dashboard/out/users.html +1 -1
  33. sky/dashboard/out/volumes.html +1 -1
  34. sky/dashboard/out/workspace/new.html +1 -1
  35. sky/dashboard/out/workspaces/[name].html +1 -1
  36. sky/dashboard/out/workspaces.html +1 -1
  37. sky/data/mounting_utils.py +19 -10
  38. sky/execution.py +4 -2
  39. sky/global_user_state.py +271 -67
  40. sky/jobs/client/sdk.py +10 -1
  41. sky/jobs/constants.py +2 -0
  42. sky/jobs/controller.py +11 -7
  43. sky/jobs/server/core.py +5 -3
  44. sky/jobs/server/server.py +15 -11
  45. sky/jobs/utils.py +1 -1
  46. sky/logs/agent.py +30 -3
  47. sky/logs/aws.py +9 -19
  48. sky/provision/__init__.py +2 -1
  49. sky/provision/aws/instance.py +2 -1
  50. sky/provision/azure/instance.py +2 -1
  51. sky/provision/cudo/instance.py +2 -2
  52. sky/provision/do/instance.py +2 -2
  53. sky/provision/docker_utils.py +41 -19
  54. sky/provision/fluidstack/instance.py +2 -2
  55. sky/provision/gcp/instance.py +2 -1
  56. sky/provision/hyperbolic/instance.py +2 -1
  57. sky/provision/instance_setup.py +1 -1
  58. sky/provision/kubernetes/instance.py +134 -8
  59. sky/provision/lambda_cloud/instance.py +2 -1
  60. sky/provision/nebius/instance.py +2 -1
  61. sky/provision/oci/instance.py +2 -1
  62. sky/provision/paperspace/instance.py +2 -2
  63. sky/provision/primeintellect/instance.py +2 -2
  64. sky/provision/provisioner.py +1 -0
  65. sky/provision/runpod/__init__.py +2 -0
  66. sky/provision/runpod/instance.py +2 -2
  67. sky/provision/scp/instance.py +2 -2
  68. sky/provision/seeweb/instance.py +2 -1
  69. sky/provision/vast/instance.py +2 -1
  70. sky/provision/vsphere/instance.py +6 -5
  71. sky/schemas/api/responses.py +2 -1
  72. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  73. sky/serve/autoscalers.py +2 -0
  74. sky/serve/client/impl.py +45 -19
  75. sky/serve/replica_managers.py +12 -5
  76. sky/serve/serve_utils.py +5 -7
  77. sky/serve/server/core.py +9 -6
  78. sky/serve/server/impl.py +78 -25
  79. sky/serve/server/server.py +4 -5
  80. sky/serve/service_spec.py +33 -0
  81. sky/server/constants.py +1 -1
  82. sky/server/daemons.py +2 -3
  83. sky/server/requests/executor.py +56 -6
  84. sky/server/requests/payloads.py +32 -8
  85. sky/server/requests/preconditions.py +2 -3
  86. sky/server/rest.py +2 -0
  87. sky/server/server.py +28 -19
  88. sky/server/stream_utils.py +34 -12
  89. sky/setup_files/dependencies.py +5 -2
  90. sky/setup_files/setup.py +44 -44
  91. sky/skylet/constants.py +4 -1
  92. sky/skylet/events.py +42 -0
  93. sky/templates/jobs-controller.yaml.j2 +3 -0
  94. sky/templates/kubernetes-ray.yml.j2 +24 -18
  95. sky/usage/usage_lib.py +3 -0
  96. sky/utils/cli_utils/status_utils.py +4 -5
  97. sky/utils/context.py +104 -29
  98. sky/utils/controller_utils.py +7 -6
  99. sky/utils/db/db_utils.py +5 -1
  100. sky/utils/db/migration_utils.py +1 -1
  101. sky/utils/kubernetes/create_cluster.sh +13 -28
  102. sky/utils/kubernetes/delete_cluster.sh +10 -7
  103. sky/utils/kubernetes/generate_kind_config.py +6 -66
  104. sky/utils/kubernetes/kubernetes_deploy_utils.py +194 -38
  105. sky/utils/kubernetes_enums.py +5 -0
  106. sky/utils/ux_utils.py +35 -1
  107. sky/utils/yaml_utils.py +9 -0
  108. sky/volumes/client/sdk.py +44 -8
  109. sky/volumes/server/core.py +1 -0
  110. sky/volumes/server/server.py +33 -7
  111. sky/volumes/volume.py +35 -28
  112. {skypilot_nightly-1.0.0.dev20250922.dist-info → skypilot_nightly-1.0.0.dev20250926.dist-info}/METADATA +38 -33
  113. {skypilot_nightly-1.0.0.dev20250922.dist-info → skypilot_nightly-1.0.0.dev20250926.dist-info}/RECORD +118 -117
  114. sky/dashboard/out/_next/static/chunks/1121-4ff1ec0dbc5792ab.js +0 -1
  115. sky/dashboard/out/_next/static/chunks/6856-9a2538f38c004652.js +0 -1
  116. sky/dashboard/out/_next/static/chunks/8969-a39efbadcd9fde80.js +0 -1
  117. sky/dashboard/out/_next/static/chunks/9037-472ee1222cb1e158.js +0 -6
  118. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-1e9248ddbddcd122.js +0 -16
  119. /sky/dashboard/out/_next/static/{KP6HCNMqb_bnJB17oplgW → VXU6_xE28M55BOdwmUUJS}/_ssgManifest.js +0 -0
  120. {skypilot_nightly-1.0.0.dev20250922.dist-info → skypilot_nightly-1.0.0.dev20250926.dist-info}/WHEEL +0 -0
  121. {skypilot_nightly-1.0.0.dev20250922.dist-info → skypilot_nightly-1.0.0.dev20250926.dist-info}/entry_points.txt +0 -0
  122. {skypilot_nightly-1.0.0.dev20250922.dist-info → skypilot_nightly-1.0.0.dev20250926.dist-info}/licenses/LICENSE +0 -0
  123. {skypilot_nightly-1.0.0.dev20250922.dist-info → skypilot_nightly-1.0.0.dev20250926.dist-info}/top_level.txt +0 -0
@@ -3,12 +3,13 @@
3
3
  import fastapi
4
4
 
5
5
  from sky import clouds
6
+ from sky import exceptions
6
7
  from sky import sky_logging
7
8
  from sky.server.requests import executor
8
9
  from sky.server.requests import payloads
9
10
  from sky.server.requests import requests as requests_lib
10
11
  from sky.utils import registry
11
- from sky.utils import volume
12
+ from sky.utils import volume as volume_utils
12
13
  from sky.volumes.server import core
13
14
 
14
15
  logger = sky_logging.init_logger(__name__)
@@ -46,6 +47,31 @@ async def volume_delete(request: fastapi.Request,
46
47
  )
47
48
 
48
49
 
50
+ @router.post('/validate')
51
+ async def volume_validate(
52
+ _: fastapi.Request,
53
+ volume_validate_body: payloads.VolumeValidateBody) -> None:
54
+ """Validates a volume."""
55
+ # pylint: disable=import-outside-toplevel
56
+ from sky.volumes import volume as volume_lib
57
+
58
+ try:
59
+ volume_config = {
60
+ 'name': volume_validate_body.name,
61
+ 'type': volume_validate_body.volume_type,
62
+ 'infra': volume_validate_body.infra,
63
+ 'size': volume_validate_body.size,
64
+ 'labels': volume_validate_body.labels,
65
+ 'config': volume_validate_body.config,
66
+ 'resource_name': volume_validate_body.resource_name,
67
+ }
68
+ volume = volume_lib.Volume.from_yaml_config(volume_config)
69
+ volume.validate()
70
+ except Exception as e:
71
+ raise fastapi.HTTPException(status_code=400,
72
+ detail=exceptions.serialize_exception(e))
73
+
74
+
49
75
  @router.post('/apply')
50
76
  async def volume_apply(request: fastapi.Request,
51
77
  volume_apply_body: payloads.VolumeApplyBody) -> None:
@@ -55,7 +81,7 @@ async def volume_apply(request: fastapi.Request,
55
81
  volume_config = volume_apply_body.config
56
82
 
57
83
  supported_volume_types = [
58
- volume_type.value for volume_type in volume.VolumeType
84
+ volume_type.value for volume_type in volume_utils.VolumeType
59
85
  ]
60
86
  if volume_type not in supported_volume_types:
61
87
  raise fastapi.HTTPException(
@@ -64,24 +90,24 @@ async def volume_apply(request: fastapi.Request,
64
90
  if cloud is None:
65
91
  raise fastapi.HTTPException(status_code=400,
66
92
  detail=f'Invalid cloud: {volume_cloud}')
67
- if volume_type == volume.VolumeType.PVC.value:
93
+ if volume_type == volume_utils.VolumeType.PVC.value:
68
94
  if not cloud.is_same_cloud(clouds.Kubernetes()):
69
95
  raise fastapi.HTTPException(
70
96
  status_code=400,
71
97
  detail='PVC storage is only supported on Kubernetes')
72
98
  supported_access_modes = [
73
- access_mode.value for access_mode in volume.VolumeAccessMode
99
+ access_mode.value for access_mode in volume_utils.VolumeAccessMode
74
100
  ]
75
101
  if volume_config is None:
76
102
  volume_config = {}
77
103
  access_mode = volume_config.get('access_mode')
78
104
  if access_mode is None:
79
- volume_config[
80
- 'access_mode'] = volume.VolumeAccessMode.READ_WRITE_ONCE.value
105
+ volume_config['access_mode'] = (
106
+ volume_utils.VolumeAccessMode.READ_WRITE_ONCE.value)
81
107
  elif access_mode not in supported_access_modes:
82
108
  raise fastapi.HTTPException(
83
109
  status_code=400, detail=f'Invalid access mode: {access_mode}')
84
- elif volume_type == volume.VolumeType.RUNPOD_NETWORK_VOLUME.value:
110
+ elif volume_type == volume_utils.VolumeType.RUNPOD_NETWORK_VOLUME.value:
85
111
  if not cloud.is_same_cloud(clouds.RunPod()):
86
112
  raise fastapi.HTTPException(
87
113
  status_code=400,
sky/volumes/volume.py CHANGED
@@ -100,7 +100,7 @@ class Volume:
100
100
  }
101
101
 
102
102
  def _normalize_config(self) -> None:
103
- """Adjust and validate the config."""
103
+ """Normalize and validate the config."""
104
104
  # Validate schema
105
105
  common_utils.validate_schema(self.to_yaml_config(),
106
106
  schemas.get_volume_schema(),
@@ -115,8 +115,17 @@ class Volume:
115
115
  self.region = infra_info.region
116
116
  self.zone = infra_info.zone
117
117
 
118
- # Validate the volume config
119
- self._validate_config()
118
+ # Set cloud from volume type if not specified
119
+ cloud_obj_from_type = VOLUME_TYPE_TO_CLOUD.get(
120
+ volume_lib.VolumeType(self.type))
121
+ if self.cloud:
122
+ cloud_obj = registry.CLOUD_REGISTRY.from_str(self.cloud)
123
+ assert cloud_obj is not None
124
+ if not cloud_obj.is_same_cloud(cloud_obj_from_type):
125
+ raise ValueError(
126
+ f'Invalid cloud {self.cloud} for volume type {self.type}')
127
+ else:
128
+ self.cloud = str(cloud_obj_from_type)
120
129
 
121
130
  def _adjust_config(self) -> None:
122
131
  """Adjust the volume config (e.g., parse size)."""
@@ -132,43 +141,41 @@ class Volume:
132
141
  except ValueError as e:
133
142
  raise ValueError(f'Invalid size {self.size}: {e}') from e
134
143
 
135
- def _validate_config(self) -> None:
136
- """Validate the volume config."""
137
- cloud_obj_from_type = VOLUME_TYPE_TO_CLOUD.get(
138
- volume_lib.VolumeType(self.type))
139
- if self.cloud:
140
- cloud_obj = registry.CLOUD_REGISTRY.from_str(self.cloud)
141
- assert cloud_obj is not None
142
- if not cloud_obj.is_same_cloud(cloud_obj_from_type):
143
- raise ValueError(
144
- f'Invalid cloud {self.cloud} for volume type {self.type}')
145
- else:
146
- self.cloud = str(cloud_obj_from_type)
147
- cloud_obj = cloud_obj_from_type
148
- assert cloud_obj is not None
149
-
150
- self.region, self.zone = cloud_obj.validate_region_zone(
151
- self.region, self.zone)
144
+ def validate(self, skip_cloud_compatibility: bool = False) -> None:
145
+ """Validates the volume."""
146
+ self.validate_name()
147
+ self.validate_size()
148
+ if not skip_cloud_compatibility:
149
+ self.validate_cloud_compatibility()
150
+ # Extra, type-specific validations
151
+ self._validate_config_extra()
152
152
 
153
- # Name must be set by factory before validation.
154
- assert self.name is not None
155
- valid, err_msg = cloud_obj.is_volume_name_valid(self.name)
156
- if not valid:
157
- raise ValueError(f'Invalid volume name: {err_msg}')
153
+ def validate_name(self) -> None:
154
+ """Validates if the volume name is set."""
155
+ assert self.name is not None, 'Volume name must be set'
158
156
 
157
+ def validate_size(self) -> None:
158
+ """Validates that size is specified for new volumes."""
159
159
  if not self.resource_name and not self.size:
160
160
  raise ValueError('Size is required for new volumes. '
161
161
  'Please specify the size in the YAML file or '
162
162
  'use the --size flag.')
163
+
164
+ def validate_cloud_compatibility(self) -> None:
165
+ """Validates region, zone, name, labels with the cloud."""
166
+ cloud_obj = registry.CLOUD_REGISTRY.from_str(self.cloud)
167
+ assert cloud_obj is not None
168
+
169
+ valid, err_msg = cloud_obj.is_volume_name_valid(self.name)
170
+ if not valid:
171
+ raise ValueError(f'Invalid volume name: {err_msg}')
172
+
163
173
  if self.labels:
164
174
  for key, value in self.labels.items():
165
175
  valid, err_msg = cloud_obj.is_label_valid(key, value)
166
176
  if not valid:
167
177
  raise ValueError(f'{err_msg}')
168
178
 
169
- # Extra, type-specific validations
170
- self._validate_config_extra()
171
-
172
179
  # Hook methods for subclasses
173
180
  def _validate_config_extra(self) -> None:
174
181
  """Additional type-specific validation.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: skypilot-nightly
3
- Version: 1.0.0.dev20250922
3
+ Version: 1.0.0.dev20250926
4
4
  Summary: SkyPilot: Run AI on Any Infra — Unified, Faster, Cheaper.
5
5
  Author: SkyPilot Team
6
6
  License: Apache 2.0
@@ -59,7 +59,7 @@ Requires-Dist: casbin
59
59
  Requires-Dist: sqlalchemy_adapter
60
60
  Requires-Dist: prometheus_client>=0.8.0
61
61
  Requires-Dist: passlib
62
- Requires-Dist: bcrypt
62
+ Requires-Dist: bcrypt==4.0.1
63
63
  Requires-Dist: pyjwt
64
64
  Requires-Dist: gitpython
65
65
  Requires-Dist: types-paramiko
@@ -117,6 +117,7 @@ Requires-Dist: grpcio>=1.63.0; extra == "remote"
117
117
  Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "remote"
118
118
  Provides-Extra: runpod
119
119
  Requires-Dist: runpod>=1.6.1; extra == "runpod"
120
+ Requires-Dist: tomli; python_version < "3.11" and extra == "runpod"
120
121
  Provides-Extra: fluidstack
121
122
  Provides-Extra: cudo
122
123
  Requires-Dist: cudo-compute>=0.1.10; extra == "cudo"
@@ -151,50 +152,53 @@ Requires-Dist: anyio; extra == "server"
151
152
  Requires-Dist: grpcio>=1.63.0; extra == "server"
152
153
  Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "server"
153
154
  Requires-Dist: aiosqlite; extra == "server"
155
+ Requires-Dist: greenlet; extra == "server"
154
156
  Provides-Extra: all
157
+ Requires-Dist: pyopenssl<24.3.0,>=23.2.0; extra == "all"
155
158
  Requires-Dist: azure-core>=1.31.0; extra == "all"
156
- Requires-Dist: cudo-compute>=0.1.10; extra == "all"
157
- Requires-Dist: azure-storage-blob>=12.23.1; extra == "all"
158
- Requires-Dist: casbin; extra == "all"
159
- Requires-Dist: sqlalchemy_adapter; extra == "all"
159
+ Requires-Dist: docker; extra == "all"
160
160
  Requires-Dist: ibm-cos-sdk; extra == "all"
161
+ Requires-Dist: azure-identity>=1.19.0; extra == "all"
162
+ Requires-Dist: azure-mgmt-compute>=33.0.0; extra == "all"
163
+ Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "all"
164
+ Requires-Dist: azure-cli>=2.65.0; extra == "all"
165
+ Requires-Dist: google-api-python-client>=2.69.0; extra == "all"
166
+ Requires-Dist: boto3>=1.26.1; extra == "all"
167
+ Requires-Dist: msrestazure; extra == "all"
168
+ Requires-Dist: ibm-platform-services>=0.48.0; extra == "all"
169
+ Requires-Dist: aiohttp; extra == "all"
170
+ Requires-Dist: colorama<0.4.5; extra == "all"
171
+ Requires-Dist: websockets; extra == "all"
172
+ Requires-Dist: ecsapi>=0.2.0; extra == "all"
173
+ Requires-Dist: kubernetes!=32.0.0,>=20.0.0; extra == "all"
174
+ Requires-Dist: tomli; python_version < "3.11" and extra == "all"
161
175
  Requires-Dist: pydo>=0.3.0; extra == "all"
176
+ Requires-Dist: casbin; extra == "all"
162
177
  Requires-Dist: ray[default]>=2.6.1; extra == "all"
163
178
  Requires-Dist: azure-core>=1.24.0; extra == "all"
164
- Requires-Dist: websockets; extra == "all"
165
- Requires-Dist: azure-mgmt-network>=27.0.0; extra == "all"
166
- Requires-Dist: anyio; extra == "all"
179
+ Requires-Dist: cudo-compute>=0.1.10; extra == "all"
180
+ Requires-Dist: sqlalchemy_adapter; extra == "all"
167
181
  Requires-Dist: ibm-vpc; extra == "all"
168
- Requires-Dist: ecsapi>=0.2.0; extra == "all"
182
+ Requires-Dist: runpod>=1.6.1; extra == "all"
169
183
  Requires-Dist: passlib; extra == "all"
170
- Requires-Dist: google-cloud-storage; extra == "all"
171
- Requires-Dist: oci; extra == "all"
172
- Requires-Dist: azure-mgmt-compute>=33.0.0; extra == "all"
173
- Requires-Dist: python-dateutil; extra == "all"
174
- Requires-Dist: docker; extra == "all"
175
- Requires-Dist: colorama<0.4.5; extra == "all"
176
- Requires-Dist: vastai-sdk>=0.1.12; extra == "all"
184
+ Requires-Dist: anyio; extra == "all"
177
185
  Requires-Dist: grpcio>=1.63.0; extra == "all"
186
+ Requires-Dist: python-dateutil; extra == "all"
187
+ Requires-Dist: nebius>=0.2.47; extra == "all"
188
+ Requires-Dist: pyvmomi==8.0.1.0.2; extra == "all"
189
+ Requires-Dist: oci; extra == "all"
178
190
  Requires-Dist: pyjwt; extra == "all"
179
- Requires-Dist: aiosqlite; extra == "all"
180
- Requires-Dist: ibm-cloud-sdk-core; extra == "all"
181
- Requires-Dist: aiohttp; extra == "all"
191
+ Requires-Dist: msgraph-sdk; extra == "all"
182
192
  Requires-Dist: azure-common; extra == "all"
183
- Requires-Dist: kubernetes!=32.0.0,>=20.0.0; extra == "all"
193
+ Requires-Dist: azure-mgmt-network>=27.0.0; extra == "all"
194
+ Requires-Dist: vastai-sdk>=0.1.12; extra == "all"
195
+ Requires-Dist: azure-storage-blob>=12.23.1; extra == "all"
196
+ Requires-Dist: ibm-cloud-sdk-core; extra == "all"
197
+ Requires-Dist: aiosqlite; extra == "all"
184
198
  Requires-Dist: awscli>=1.27.10; extra == "all"
185
- Requires-Dist: boto3>=1.26.1; extra == "all"
186
- Requires-Dist: msrestazure; extra == "all"
187
- Requires-Dist: pyopenssl<24.3.0,>=23.2.0; extra == "all"
188
- Requires-Dist: runpod>=1.6.1; extra == "all"
189
- Requires-Dist: google-api-python-client>=2.69.0; extra == "all"
190
- Requires-Dist: azure-cli>=2.65.0; extra == "all"
191
- Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "all"
192
- Requires-Dist: msgraph-sdk; extra == "all"
193
- Requires-Dist: pyvmomi==8.0.1.0.2; extra == "all"
194
- Requires-Dist: azure-identity>=1.19.0; extra == "all"
195
- Requires-Dist: ibm-platform-services>=0.48.0; extra == "all"
196
- Requires-Dist: nebius>=0.2.47; extra == "all"
197
199
  Requires-Dist: botocore>=1.29.10; extra == "all"
200
+ Requires-Dist: google-cloud-storage; extra == "all"
201
+ Requires-Dist: greenlet; extra == "all"
198
202
  Dynamic: author
199
203
  Dynamic: classifier
200
204
  Dynamic: description
@@ -245,6 +249,7 @@ Dynamic: summary
245
249
 
246
250
  :fire: *News* :fire:
247
251
  - [Aug 2025] Serve and finetune **OpenAI GPT-OSS models** (gpt-oss-120b, gpt-oss-20b) with one command on any infra: [**serve**](./llm/gpt-oss/) + [**LoRA and full finetuning**](./llm/gpt-oss-finetuning/)
252
+ - [Jul 2025] Run large-scale **LLM training with TorchTitan** on any cloud: [**example**](./llm/torchtitan/)
248
253
  - [Jul 2025] Run distributed **RL training for LLMs** with Verl (PPO, GRPO) on any cloud: [**example**](./llm/verl/)
249
254
  - [Jul 2025] 🎉 SkyPilot v0.10.0 released! [**blog post**](https://blog.skypilot.co/announcing-skypilot-0.10.0/), [**release notes**](https://github.com/skypilot-org/skypilot/releases/tag/v0.10.0)
250
255
  - [Jul 2025] Finetune **Llama4** on any distributed cluster/cloud: [**example**](./llm/llama-4-finetuning/)