skypilot-nightly 1.0.0.dev20241018__py3-none-any.whl → 1.0.0.dev20241020__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/cli.py +46 -12
- sky/clouds/service_catalog/aws_catalog.py +11 -1
- sky/serve/__init__.py +2 -0
- sky/serve/constants.py +8 -1
- sky/serve/controller.py +70 -0
- sky/serve/core.py +47 -0
- sky/serve/replica_managers.py +13 -4
- sky/serve/serve_utils.py +43 -1
- {skypilot_nightly-1.0.0.dev20241018.dist-info → skypilot_nightly-1.0.0.dev20241020.dist-info}/METADATA +23 -21
- {skypilot_nightly-1.0.0.dev20241018.dist-info → skypilot_nightly-1.0.0.dev20241020.dist-info}/RECORD +15 -15
- {skypilot_nightly-1.0.0.dev20241018.dist-info → skypilot_nightly-1.0.0.dev20241020.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20241018.dist-info → skypilot_nightly-1.0.0.dev20241020.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20241018.dist-info → skypilot_nightly-1.0.0.dev20241020.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20241018.dist-info → skypilot_nightly-1.0.0.dev20241020.dist-info}/top_level.txt +0 -0
sky/__init__.py
CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
|
|
5
5
|
import urllib.request
|
6
6
|
|
7
7
|
# Replaced with the current commit when building the wheels.
|
8
|
-
_SKYPILOT_COMMIT_SHA = '
|
8
|
+
_SKYPILOT_COMMIT_SHA = 'c6ae536d8dfedc3bbcf427a81480382b9d5f4c29'
|
9
9
|
|
10
10
|
|
11
11
|
def _get_git_commit():
|
@@ -35,7 +35,7 @@ def _get_git_commit():
|
|
35
35
|
|
36
36
|
|
37
37
|
__commit__ = _get_git_commit()
|
38
|
-
__version__ = '1.0.0.
|
38
|
+
__version__ = '1.0.0.dev20241020'
|
39
39
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
40
40
|
|
41
41
|
|
sky/cli.py
CHANGED
@@ -4380,9 +4380,14 @@ def serve_status(all: bool, endpoint: bool, service_names: List[str]):
|
|
4380
4380
|
default=False,
|
4381
4381
|
required=False,
|
4382
4382
|
help='Skip confirmation prompt.')
|
4383
|
+
@click.option('--replica-id',
|
4384
|
+
default=None,
|
4385
|
+
type=int,
|
4386
|
+
help='Tear down a given replica')
|
4383
4387
|
# pylint: disable=redefined-builtin
|
4384
|
-
def serve_down(service_names: List[str], all: bool, purge: bool, yes: bool
|
4385
|
-
|
4388
|
+
def serve_down(service_names: List[str], all: bool, purge: bool, yes: bool,
|
4389
|
+
replica_id: Optional[int]):
|
4390
|
+
"""Teardown service(s) or a replica.
|
4386
4391
|
|
4387
4392
|
SERVICE_NAMES is the name of the service (or glob pattern) to tear down. If
|
4388
4393
|
both SERVICE_NAMES and ``--all`` are supplied, the latter takes precedence.
|
@@ -4408,6 +4413,12 @@ def serve_down(service_names: List[str], all: bool, purge: bool, yes: bool):
|
|
4408
4413
|
\b
|
4409
4414
|
# Forcefully tear down a service in failed status.
|
4410
4415
|
sky serve down failed-service --purge
|
4416
|
+
\b
|
4417
|
+
# Tear down a specific replica
|
4418
|
+
sky serve down my-service --replica-id 1
|
4419
|
+
\b
|
4420
|
+
# Forcefully tear down a specific replica, even in failed status.
|
4421
|
+
sky serve down my-service --replica-id 1 --purge
|
4411
4422
|
"""
|
4412
4423
|
if sum([len(service_names) > 0, all]) != 1:
|
4413
4424
|
argument_str = f'SERVICE_NAMES={",".join(service_names)}' if len(
|
@@ -4417,22 +4428,45 @@ def serve_down(service_names: List[str], all: bool, purge: bool, yes: bool):
|
|
4417
4428
|
'Can only specify one of SERVICE_NAMES or --all. '
|
4418
4429
|
f'Provided {argument_str!r}.')
|
4419
4430
|
|
4431
|
+
replica_id_is_defined = replica_id is not None
|
4432
|
+
if replica_id_is_defined:
|
4433
|
+
if len(service_names) != 1:
|
4434
|
+
service_names_str = ', '.join(service_names)
|
4435
|
+
raise click.UsageError(f'The --replica-id option can only be used '
|
4436
|
+
f'with a single service name. Got: '
|
4437
|
+
f'{service_names_str}.')
|
4438
|
+
if all:
|
4439
|
+
raise click.UsageError('The --replica-id option cannot be used '
|
4440
|
+
'with the --all option.')
|
4441
|
+
|
4420
4442
|
backend_utils.is_controller_accessible(
|
4421
4443
|
controller=controller_utils.Controllers.SKY_SERVE_CONTROLLER,
|
4422
4444
|
stopped_message='All services should have been terminated.',
|
4423
4445
|
exit_if_not_accessible=True)
|
4424
4446
|
|
4425
4447
|
if not yes:
|
4426
|
-
|
4427
|
-
|
4428
|
-
|
4429
|
-
|
4430
|
-
|
4431
|
-
|
4432
|
-
|
4433
|
-
|
4434
|
-
|
4435
|
-
|
4448
|
+
if replica_id_is_defined:
|
4449
|
+
click.confirm(
|
4450
|
+
f'Terminating replica ID {replica_id} in '
|
4451
|
+
f'{service_names[0]!r}. Proceed?',
|
4452
|
+
default=True,
|
4453
|
+
abort=True,
|
4454
|
+
show_default=True)
|
4455
|
+
else:
|
4456
|
+
quoted_service_names = [f'{name!r}' for name in service_names]
|
4457
|
+
service_identity_str = (f'service(s) '
|
4458
|
+
f'{", ".join(quoted_service_names)}')
|
4459
|
+
if all:
|
4460
|
+
service_identity_str = 'all services'
|
4461
|
+
click.confirm(f'Terminating {service_identity_str}. Proceed?',
|
4462
|
+
default=True,
|
4463
|
+
abort=True,
|
4464
|
+
show_default=True)
|
4465
|
+
|
4466
|
+
if replica_id_is_defined:
|
4467
|
+
serve_lib.terminate_replica(service_names[0], replica_id, purge)
|
4468
|
+
else:
|
4469
|
+
serve_lib.down(service_names=service_names, all=all, purge=purge)
|
4436
4470
|
|
4437
4471
|
|
4438
4472
|
@serve.command('logs', cls=_DocumentedCodeCommand)
|
@@ -308,7 +308,17 @@ def list_accelerators(
|
|
308
308
|
|
309
309
|
def get_image_id_from_tag(tag: str, region: Optional[str]) -> Optional[str]:
|
310
310
|
"""Returns the image id from the tag."""
|
311
|
-
|
311
|
+
global _image_df
|
312
|
+
|
313
|
+
image_id = common.get_image_id_from_tag_impl(_image_df, tag, region)
|
314
|
+
if image_id is None:
|
315
|
+
# Refresh the image catalog and try again, if the image tag is not
|
316
|
+
# found.
|
317
|
+
logger.debug('Refreshing the image catalog and trying again.')
|
318
|
+
_image_df = common.read_catalog('aws/images.csv',
|
319
|
+
pull_frequency_hours=0)
|
320
|
+
image_id = common.get_image_id_from_tag_impl(_image_df, tag, region)
|
321
|
+
return image_id
|
312
322
|
|
313
323
|
|
314
324
|
def is_image_tag_valid(tag: str, region: Optional[str]) -> bool:
|
sky/serve/__init__.py
CHANGED
@@ -8,6 +8,7 @@ from sky.serve.constants import SKYSERVE_METADATA_DIR
|
|
8
8
|
from sky.serve.core import down
|
9
9
|
from sky.serve.core import status
|
10
10
|
from sky.serve.core import tail_logs
|
11
|
+
from sky.serve.core import terminate_replica
|
11
12
|
from sky.serve.core import up
|
12
13
|
from sky.serve.core import update
|
13
14
|
from sky.serve.serve_state import ReplicaStatus
|
@@ -42,6 +43,7 @@ __all__ = [
|
|
42
43
|
'SKY_SERVE_CONTROLLER_NAME',
|
43
44
|
'SKYSERVE_METADATA_DIR',
|
44
45
|
'status',
|
46
|
+
'terminate_replica',
|
45
47
|
'tail_logs',
|
46
48
|
'up',
|
47
49
|
'update',
|
sky/serve/constants.py
CHANGED
@@ -92,4 +92,11 @@ REPLICA_ID_ENV_VAR = 'SKYPILOT_SERVE_REPLICA_ID'
|
|
92
92
|
# change for the serve_utils.ServeCodeGen, we need to bump this version, so that
|
93
93
|
# the user can be notified to update their SkyPilot serve version on the remote
|
94
94
|
# cluster.
|
95
|
-
|
95
|
+
# Changelog:
|
96
|
+
# v1.0 - Introduce rolling update.
|
97
|
+
# v2.0 - Added template-replica feature.
|
98
|
+
SERVE_VERSION = 2
|
99
|
+
|
100
|
+
TERMINATE_REPLICA_VERSION_MISMATCH_ERROR = (
|
101
|
+
'The version of service is outdated and does not support manually '
|
102
|
+
'terminating replicas. Please terminate the service and spin up again.')
|
sky/serve/controller.py
CHANGED
@@ -9,6 +9,7 @@ import time
|
|
9
9
|
import traceback
|
10
10
|
from typing import Any, Dict, List
|
11
11
|
|
12
|
+
import colorama
|
12
13
|
import fastapi
|
13
14
|
from fastapi import responses
|
14
15
|
import uvicorn
|
@@ -157,6 +158,75 @@ class SkyServeController:
|
|
157
158
|
return responses.JSONResponse(content={'message': 'Error'},
|
158
159
|
status_code=500)
|
159
160
|
|
161
|
+
@self._app.post('/controller/terminate_replica')
|
162
|
+
async def terminate_replica(
|
163
|
+
request: fastapi.Request) -> fastapi.Response:
|
164
|
+
request_data = await request.json()
|
165
|
+
replica_id = request_data['replica_id']
|
166
|
+
assert isinstance(replica_id,
|
167
|
+
int), 'Error: replica ID must be an integer.'
|
168
|
+
purge = request_data['purge']
|
169
|
+
assert isinstance(purge, bool), 'Error: purge must be a boolean.'
|
170
|
+
replica_info = serve_state.get_replica_info_from_id(
|
171
|
+
self._service_name, replica_id)
|
172
|
+
assert replica_info is not None, (f'Error: replica '
|
173
|
+
f'{replica_id} does not exist.')
|
174
|
+
replica_status = replica_info.status
|
175
|
+
|
176
|
+
if replica_status == serve_state.ReplicaStatus.SHUTTING_DOWN:
|
177
|
+
return responses.JSONResponse(
|
178
|
+
status_code=409,
|
179
|
+
content={
|
180
|
+
'message':
|
181
|
+
f'Replica {replica_id} of service '
|
182
|
+
f'{self._service_name!r} is already in the process '
|
183
|
+
f'of terminating. Skip terminating now.'
|
184
|
+
})
|
185
|
+
|
186
|
+
if (replica_status in serve_state.ReplicaStatus.failed_statuses()
|
187
|
+
and not purge):
|
188
|
+
return responses.JSONResponse(
|
189
|
+
status_code=409,
|
190
|
+
content={
|
191
|
+
'message': f'{colorama.Fore.YELLOW}Replica '
|
192
|
+
f'{replica_id} of service '
|
193
|
+
f'{self._service_name!r} is in failed '
|
194
|
+
f'status ({replica_info.status}). '
|
195
|
+
f'Skipping its termination as it could '
|
196
|
+
f'lead to a resource leak. '
|
197
|
+
f'(Use `sky serve down '
|
198
|
+
f'{self._service_name!r} --replica-id '
|
199
|
+
f'{replica_id} --purge` to '
|
200
|
+
'forcefully terminate the replica.)'
|
201
|
+
f'{colorama.Style.RESET_ALL}'
|
202
|
+
})
|
203
|
+
|
204
|
+
self._replica_manager.scale_down(replica_id, purge=purge)
|
205
|
+
|
206
|
+
action = 'terminated' if not purge else 'purged'
|
207
|
+
message = (f'{colorama.Fore.GREEN}Replica {replica_id} of service '
|
208
|
+
f'{self._service_name!r} is scheduled to be '
|
209
|
+
f'{action}.{colorama.Style.RESET_ALL}\n'
|
210
|
+
f'Please use {ux_utils.BOLD}sky serve status '
|
211
|
+
f'{self._service_name}{ux_utils.RESET_BOLD} '
|
212
|
+
f'to check the latest status.')
|
213
|
+
return responses.JSONResponse(status_code=200,
|
214
|
+
content={'message': message})
|
215
|
+
|
216
|
+
@self._app.exception_handler(Exception)
|
217
|
+
async def validation_exception_handler(
|
218
|
+
request: fastapi.Request, exc: Exception) -> fastapi.Response:
|
219
|
+
with ux_utils.enable_traceback():
|
220
|
+
logger.error(f'Error in controller: {exc!r}')
|
221
|
+
return responses.JSONResponse(
|
222
|
+
status_code=500,
|
223
|
+
content={
|
224
|
+
'message':
|
225
|
+
(f'Failed method {request.method} at URL {request.url}.'
|
226
|
+
f' Exception message is {exc!r}.')
|
227
|
+
},
|
228
|
+
)
|
229
|
+
|
160
230
|
threading.Thread(target=self._run_autoscaler).start()
|
161
231
|
|
162
232
|
logger.info('SkyServe Controller started on '
|
sky/serve/core.py
CHANGED
@@ -503,6 +503,53 @@ def down(
|
|
503
503
|
sky_logging.print(stdout)
|
504
504
|
|
505
505
|
|
506
|
+
@usage_lib.entrypoint
|
507
|
+
def terminate_replica(service_name: str, replica_id: int, purge: bool) -> None:
|
508
|
+
"""Tear down a specific replica for the given service.
|
509
|
+
|
510
|
+
Args:
|
511
|
+
service_name: Name of the service.
|
512
|
+
replica_id: ID of replica to terminate.
|
513
|
+
purge: Whether to terminate replicas in a failed status. These replicas
|
514
|
+
may lead to resource leaks, so we require the user to explicitly
|
515
|
+
specify this flag to make sure they are aware of this potential
|
516
|
+
resource leak.
|
517
|
+
|
518
|
+
Raises:
|
519
|
+
sky.exceptions.ClusterNotUpError: if the sky sere controller is not up.
|
520
|
+
RuntimeError: if failed to terminate the replica.
|
521
|
+
"""
|
522
|
+
handle = backend_utils.is_controller_accessible(
|
523
|
+
controller=controller_utils.Controllers.SKY_SERVE_CONTROLLER,
|
524
|
+
stopped_message=
|
525
|
+
'No service is running now. Please spin up a service first.',
|
526
|
+
non_existent_message='No service is running now. '
|
527
|
+
'Please spin up a service first.',
|
528
|
+
)
|
529
|
+
|
530
|
+
backend = backend_utils.get_backend_from_handle(handle)
|
531
|
+
assert isinstance(backend, backends.CloudVmRayBackend)
|
532
|
+
|
533
|
+
code = serve_utils.ServeCodeGen.terminate_replica(service_name, replica_id,
|
534
|
+
purge)
|
535
|
+
returncode, stdout, stderr = backend.run_on_head(handle,
|
536
|
+
code,
|
537
|
+
require_outputs=True,
|
538
|
+
stream_logs=False,
|
539
|
+
separate_stderr=True)
|
540
|
+
|
541
|
+
try:
|
542
|
+
subprocess_utils.handle_returncode(returncode,
|
543
|
+
code,
|
544
|
+
'Failed to terminate the replica',
|
545
|
+
stderr,
|
546
|
+
stream_logs=True)
|
547
|
+
except exceptions.CommandError as e:
|
548
|
+
raise RuntimeError(e.error_msg) from e
|
549
|
+
|
550
|
+
sky_logging.print(stdout)
|
551
|
+
|
552
|
+
|
506
553
|
@usage_lib.entrypoint
|
507
554
|
def status(
|
508
555
|
service_names: Optional[Union[str,
|
sky/serve/replica_managers.py
CHANGED
@@ -247,6 +247,8 @@ class ReplicaStatusProperty:
|
|
247
247
|
is_scale_down: bool = False
|
248
248
|
# The replica's spot instance was preempted.
|
249
249
|
preempted: bool = False
|
250
|
+
# Whether the replica is purged.
|
251
|
+
purged: bool = False
|
250
252
|
|
251
253
|
def remove_terminated_replica(self) -> bool:
|
252
254
|
"""Whether to remove the replica record from the replica table.
|
@@ -307,6 +309,8 @@ class ReplicaStatusProperty:
|
|
307
309
|
return False
|
308
310
|
if self.preempted:
|
309
311
|
return False
|
312
|
+
if self.purged:
|
313
|
+
return False
|
310
314
|
return True
|
311
315
|
|
312
316
|
def to_replica_status(self) -> serve_state.ReplicaStatus:
|
@@ -590,7 +594,7 @@ class ReplicaManager:
|
|
590
594
|
"""
|
591
595
|
raise NotImplementedError
|
592
596
|
|
593
|
-
def scale_down(self, replica_id: int) -> None:
|
597
|
+
def scale_down(self, replica_id: int, purge: bool = False) -> None:
|
594
598
|
"""Scale down replica with replica_id."""
|
595
599
|
raise NotImplementedError
|
596
600
|
|
@@ -679,7 +683,8 @@ class SkyPilotReplicaManager(ReplicaManager):
|
|
679
683
|
replica_id: int,
|
680
684
|
sync_down_logs: bool,
|
681
685
|
replica_drain_delay_seconds: int,
|
682
|
-
is_scale_down: bool = False
|
686
|
+
is_scale_down: bool = False,
|
687
|
+
purge: bool = False) -> None:
|
683
688
|
|
684
689
|
if replica_id in self._launch_process_pool:
|
685
690
|
info = serve_state.get_replica_info_from_id(self._service_name,
|
@@ -763,16 +768,18 @@ class SkyPilotReplicaManager(ReplicaManager):
|
|
763
768
|
)
|
764
769
|
info.status_property.sky_down_status = ProcessStatus.RUNNING
|
765
770
|
info.status_property.is_scale_down = is_scale_down
|
771
|
+
info.status_property.purged = purge
|
766
772
|
serve_state.add_or_update_replica(self._service_name, replica_id, info)
|
767
773
|
p.start()
|
768
774
|
self._down_process_pool[replica_id] = p
|
769
775
|
|
770
|
-
def scale_down(self, replica_id: int) -> None:
|
776
|
+
def scale_down(self, replica_id: int, purge: bool = False) -> None:
|
771
777
|
self._terminate_replica(
|
772
778
|
replica_id,
|
773
779
|
sync_down_logs=False,
|
774
780
|
replica_drain_delay_seconds=_DEFAULT_DRAIN_SECONDS,
|
775
|
-
is_scale_down=True
|
781
|
+
is_scale_down=True,
|
782
|
+
purge=purge)
|
776
783
|
|
777
784
|
def _handle_preemption(self, info: ReplicaInfo) -> bool:
|
778
785
|
"""Handle preemption of the replica if any error happened.
|
@@ -911,6 +918,8 @@ class SkyPilotReplicaManager(ReplicaManager):
|
|
911
918
|
# since user should fixed the error before update.
|
912
919
|
elif info.version != self.latest_version:
|
913
920
|
removal_reason = 'for version outdated'
|
921
|
+
elif info.status_property.purged:
|
922
|
+
removal_reason = 'for purge'
|
914
923
|
else:
|
915
924
|
logger.info(f'Termination of replica {replica_id} '
|
916
925
|
'finished. Replica info is kept since some '
|
sky/serve/serve_utils.py
CHANGED
@@ -313,6 +313,36 @@ def update_service_encoded(service_name: str, version: int, mode: str) -> str:
|
|
313
313
|
return common_utils.encode_payload(service_msg)
|
314
314
|
|
315
315
|
|
316
|
+
def terminate_replica(service_name: str, replica_id: int, purge: bool) -> str:
|
317
|
+
service_status = _get_service_status(service_name)
|
318
|
+
if service_status is None:
|
319
|
+
with ux_utils.print_exception_no_traceback():
|
320
|
+
raise ValueError(f'Service {service_name!r} does not exist.')
|
321
|
+
replica_info = serve_state.get_replica_info_from_id(service_name,
|
322
|
+
replica_id)
|
323
|
+
if replica_info is None:
|
324
|
+
with ux_utils.print_exception_no_traceback():
|
325
|
+
raise ValueError(
|
326
|
+
f'Replica {replica_id} for service {service_name} does not '
|
327
|
+
'exist.')
|
328
|
+
|
329
|
+
controller_port = service_status['controller_port']
|
330
|
+
resp = requests.post(
|
331
|
+
_CONTROLLER_URL.format(CONTROLLER_PORT=controller_port) +
|
332
|
+
'/controller/terminate_replica',
|
333
|
+
json={
|
334
|
+
'replica_id': replica_id,
|
335
|
+
'purge': purge,
|
336
|
+
})
|
337
|
+
|
338
|
+
message: str = resp.json()['message']
|
339
|
+
if resp.status_code != 200:
|
340
|
+
with ux_utils.print_exception_no_traceback():
|
341
|
+
raise ValueError(f'Failed to terminate replica {replica_id} '
|
342
|
+
f'in {service_name}. Reason:\n{message}')
|
343
|
+
return message
|
344
|
+
|
345
|
+
|
316
346
|
def _get_service_status(
|
317
347
|
service_name: str,
|
318
348
|
with_replica_info: bool = True) -> Optional[Dict[str, Any]]:
|
@@ -735,7 +765,7 @@ def _get_replicas(service_record: Dict[str, Any]) -> str:
|
|
735
765
|
|
736
766
|
|
737
767
|
def get_endpoint(service_record: Dict[str, Any]) -> str:
|
738
|
-
# Don't use backend_utils.
|
768
|
+
# Don't use backend_utils.is_controller_accessible since it is too slow.
|
739
769
|
handle = global_user_state.get_handle_from_cluster_name(
|
740
770
|
SKY_SERVE_CONTROLLER_NAME)
|
741
771
|
assert isinstance(handle, backends.CloudVmRayResourceHandle)
|
@@ -915,6 +945,18 @@ class ServeCodeGen:
|
|
915
945
|
]
|
916
946
|
return cls._build(code)
|
917
947
|
|
948
|
+
@classmethod
|
949
|
+
def terminate_replica(cls, service_name: str, replica_id: int,
|
950
|
+
purge: bool) -> str:
|
951
|
+
code = [
|
952
|
+
f'(lambda: print(serve_utils.terminate_replica({service_name!r}, '
|
953
|
+
f'{replica_id}, {purge}), end="", flush=True) '
|
954
|
+
'if getattr(constants, "SERVE_VERSION", 0) >= 2 else '
|
955
|
+
f'exec("raise RuntimeError('
|
956
|
+
f'{constants.TERMINATE_REPLICA_VERSION_MISMATCH_ERROR!r})"))()'
|
957
|
+
]
|
958
|
+
return cls._build(code)
|
959
|
+
|
918
960
|
@classmethod
|
919
961
|
def wait_service_registration(cls, service_name: str, job_id: int) -> str:
|
920
962
|
code = [
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: skypilot-nightly
|
3
|
-
Version: 1.0.0.
|
3
|
+
Version: 1.0.0.dev20241020
|
4
4
|
Summary: SkyPilot: An intercloud broker for the clouds
|
5
5
|
Author: SkyPilot Team
|
6
6
|
License: Apache 2.0
|
@@ -153,30 +153,32 @@ Requires-Dist: pyvmomi==8.0.1.0.2; extra == "vsphere"
|
|
153
153
|
|
154
154
|
----
|
155
155
|
:fire: *News* :fire:
|
156
|
-
- [
|
157
|
-
- [Sep
|
158
|
-
- [
|
159
|
-
- [Jun
|
160
|
-
- [Apr
|
161
|
-
- [Apr
|
162
|
-
- [Feb
|
163
|
-
- [Feb
|
164
|
-
- [Dec
|
165
|
-
- [Nov
|
156
|
+
- [Oct 2024] :tada: **SkyPilot crossed 1M+ downloads** :tada:: Thank you to our community! [**Twitter/X**](https://x.com/skypilot_org/status/1844770841718067638)
|
157
|
+
- [Sep 2024] Point, Launch and Serve **Llama 3.2** on Kubernetes or Any Cloud: [**example**](./llm/llama-3_2/)
|
158
|
+
- [Sep 2024] Run and deploy [**Pixtral**](./llm/pixtral), the first open-source multimodal model from Mistral AI.
|
159
|
+
- [Jun 2024] Reproduce **GPT** with [llm.c](https://github.com/karpathy/llm.c/discussions/481) on any cloud: [**guide**](./llm/gpt-2/)
|
160
|
+
- [Apr 2024] Serve [**Qwen-110B**](https://qwenlm.github.io/blog/qwen1.5-110b/) on your infra: [**example**](./llm/qwen/)
|
161
|
+
- [Apr 2024] Using [**Ollama**](https://github.com/ollama/ollama) to deploy quantized LLMs on CPUs and GPUs: [**example**](./llm/ollama/)
|
162
|
+
- [Feb 2024] Deploying and scaling [**Gemma**](https://blog.google/technology/developers/gemma-open-models/) with SkyServe: [**example**](./llm/gemma/)
|
163
|
+
- [Feb 2024] Serving [**Code Llama 70B**](https://ai.meta.com/blog/code-llama-large-language-model-coding/) with vLLM and SkyServe: [**example**](./llm/codellama/)
|
164
|
+
- [Dec 2023] [**Mixtral 8x7B**](https://mistral.ai/news/mixtral-of-experts/), a high quality sparse mixture-of-experts model, was released by Mistral AI! Deploy via SkyPilot on any cloud: [**example**](./llm/mixtral/)
|
165
|
+
- [Nov 2023] Using [**Axolotl**](https://github.com/OpenAccess-AI-Collective/axolotl) to finetune Mistral 7B on the cloud (on-demand and spot): [**example**](./llm/axolotl/)
|
166
|
+
|
167
|
+
**LLM Finetuning Cookbooks**: Finetuning Llama 2 / Llama 3.1 in your own cloud environment, privately: Llama 2 [**example**](./llm/vicuna-llama-2/) and [**blog**](https://blog.skypilot.co/finetuning-llama2-operational-guide/); Llama 3.1 [**example**](./llm/llama-3_1-finetuning/) and [**blog**](https://blog.skypilot.co/finetune-llama-3_1-on-your-infra/)
|
166
168
|
|
167
169
|
<details>
|
168
170
|
<summary>Archived</summary>
|
169
171
|
|
170
|
-
- [
|
171
|
-
- [
|
172
|
-
- [
|
173
|
-
- [
|
174
|
-
- [
|
175
|
-
- [Sep
|
176
|
-
- [
|
177
|
-
- [
|
178
|
-
- [
|
179
|
-
- [
|
172
|
+
- [Jul 2024] [**Finetune**](./llm/llama-3_1-finetuning/) and [**serve**](./llm/llama-3_1/) **Llama 3.1** on your infra
|
173
|
+
- [Apr 2024] Serve and finetune [**Llama 3**](https://skypilot.readthedocs.io/en/latest/gallery/llms/llama-3.html) on any cloud or Kubernetes: [**example**](./llm/llama-3/)
|
174
|
+
- [Mar 2024] Serve and deploy [**Databricks DBRX**](https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm) on your infra: [**example**](./llm/dbrx/)
|
175
|
+
- [Feb 2024] Speed up your LLM deployments with [**SGLang**](https://github.com/sgl-project/sglang) for 5x throughput on SkyServe: [**example**](./llm/sglang/)
|
176
|
+
- [Dec 2023] Using [**LoRAX**](https://github.com/predibase/lorax) to serve 1000s of finetuned LLMs on a single instance in the cloud: [**example**](./llm/lorax/)
|
177
|
+
- [Sep 2023] [**Mistral 7B**](https://mistral.ai/news/announcing-mistral-7b/), a high-quality open LLM, was released! Deploy via SkyPilot on any cloud: [**Mistral docs**](https://docs.mistral.ai/self-deployment/skypilot)
|
178
|
+
- [Sep 2023] Case study: [**Covariant**](https://covariant.ai/) transformed AI development on the cloud using SkyPilot, delivering models 4x faster cost-effectively: [**read the case study**](https://blog.skypilot.co/covariant/)
|
179
|
+
- [Jul 2023] Self-Hosted **Llama-2 Chatbot** on Any Cloud: [**example**](./llm/llama-2/)
|
180
|
+
- [Jun 2023] Serving LLM 24x Faster On the Cloud [**with vLLM**](https://vllm.ai/) and SkyPilot: [**example**](./llm/vllm/), [**blog post**](https://blog.skypilot.co/serving-llm-24x-faster-on-the-cloud-with-vllm-and-skypilot/)
|
181
|
+
- [Apr 2023] [SkyPilot YAMLs](./llm/vicuna/) for finetuning & serving the [Vicuna LLM](https://lmsys.org/blog/2023-03-30-vicuna/) with a single command!
|
180
182
|
|
181
183
|
</details>
|
182
184
|
|
{skypilot_nightly-1.0.0.dev20241018.dist-info → skypilot_nightly-1.0.0.dev20241020.dist-info}/RECORD
RENAMED
@@ -1,8 +1,8 @@
|
|
1
|
-
sky/__init__.py,sha256=
|
1
|
+
sky/__init__.py,sha256=njbGTeVXmuel8rNQYbPE9POlsdZUizsa3jEcGfwJklE,5854
|
2
2
|
sky/admin_policy.py,sha256=hPo02f_A32gCqhUueF0QYy1fMSSKqRwYEg_9FxScN_s,3248
|
3
3
|
sky/authentication.py,sha256=pAdCT60OxxiXI9KXDyP2lQ9u9vMc6aMtq5Xi2h_hbdw,20984
|
4
4
|
sky/check.py,sha256=jLMIIJrseaZj1_o5WkbaD9XdyXIlCaT6pyAaIFdhdmA,9079
|
5
|
-
sky/cli.py,sha256=
|
5
|
+
sky/cli.py,sha256=XcQeVtH5J7xcARGagYicmTUfd2145jN2nvnZaZXtZlI,209981
|
6
6
|
sky/cloud_stores.py,sha256=RjFgmRhUh1Kk__f6g3KxzLp9s7dA0pFK4W1AukEuUaw,21153
|
7
7
|
sky/core.py,sha256=DW9OGE2kS2CmsvQ1grrpRnNFS3woMGWSHu5GE99e-I4,38190
|
8
8
|
sky/dag.py,sha256=WLFWr5hfrwjd31uYlNvI-zWUk7tLaT_gzJn4LzbVtkE,2780
|
@@ -56,7 +56,7 @@ sky/clouds/runpod.py,sha256=lstUC6f4JDhtcH9NfwkbpCJMmfmvMigoanhPXPbTYds,11540
|
|
56
56
|
sky/clouds/scp.py,sha256=2KLTuNSMdBzK8CLwSesv7efOuiLidIMoyNG4AOt5Sqw,15870
|
57
57
|
sky/clouds/vsphere.py,sha256=7eZFYIDtY5sX_ATr8h7kwwkY9t8Z-EYMJ9HCjoRBoxI,12309
|
58
58
|
sky/clouds/service_catalog/__init__.py,sha256=e0K-c64jQV9d6zly5OnIXMsYaZXs_Ko9osAbDaRlOOw,14743
|
59
|
-
sky/clouds/service_catalog/aws_catalog.py,sha256=
|
59
|
+
sky/clouds/service_catalog/aws_catalog.py,sha256=1wX1-wOMw2LZ7RkV_Ah7c42RLRYm-m5_GAXzn32M5a8,13038
|
60
60
|
sky/clouds/service_catalog/azure_catalog.py,sha256=VJi3yfhZy9Sc6UfcLAc8xIoTlUlUr090TODkCZyyHFw,7311
|
61
61
|
sky/clouds/service_catalog/common.py,sha256=PA3llB0zZh4v0DO_gDDCKGhRIBx16CAp2WJZNxhjNOA,27266
|
62
62
|
sky/clouds/service_catalog/config.py,sha256=ylzqewdEBjDg4awvFek6ldYmFrnvD2bVGLZuLPvEVYA,1793
|
@@ -171,16 +171,16 @@ sky/provision/vsphere/common/service_manager_factory.py,sha256=YkvfHiRXFK_Nb406z
|
|
171
171
|
sky/provision/vsphere/common/ssl_helper.py,sha256=TYzN9K0i_Mk_17PKGyGPgvOGfoizysuuIeYapcy_tWE,795
|
172
172
|
sky/provision/vsphere/common/vapiconnect.py,sha256=R2I1ZWBA19d11fZ_FrIzQT8E1aLl1HU4Rdcj8Z5r3NE,2932
|
173
173
|
sky/provision/vsphere/common/vim_utils.py,sha256=EMWLS8ILpdx6XwUZ9I53y0B_1yFrRrlr4jjIMT84hAc,17877
|
174
|
-
sky/serve/__init__.py,sha256=
|
174
|
+
sky/serve/__init__.py,sha256=gFZt7W3UPMi4qvYe2xgkHg1VxbR1WGavKyWLBUD3mpg,1731
|
175
175
|
sky/serve/autoscalers.py,sha256=khY1oZ22PRaUQNsLCoNKH178X_NiJw0LSLOKr7_LNgY,30275
|
176
|
-
sky/serve/constants.py,sha256=
|
177
|
-
sky/serve/controller.py,sha256=
|
178
|
-
sky/serve/core.py,sha256=
|
176
|
+
sky/serve/constants.py,sha256=7MflfgTHO9gDSux93U4BmNeEMWXxZB4q7I54KUwgp-s,4651
|
177
|
+
sky/serve/controller.py,sha256=R5iIEGEEFtbm_6MvSGelYZP-vSmW0cSFuy64OexUc4g,11719
|
178
|
+
sky/serve/core.py,sha256=pz62ERWyHcg2p-rtzVjBZaWmKrK6Hx213YPoa_J5Tlo,31097
|
179
179
|
sky/serve/load_balancer.py,sha256=aUfDsgUT_fYrchCwJCeunMPXmAkwJAY58BEu-IN2FaA,11571
|
180
180
|
sky/serve/load_balancing_policies.py,sha256=ExdwH_pxPYpJ6CkoTQCOPSa4lzwbq1LFFMKzmIu8ryk,2331
|
181
|
-
sky/serve/replica_managers.py,sha256=
|
181
|
+
sky/serve/replica_managers.py,sha256=1xYDK9Te5wFEF5hUK0gyNIUib0MY-HScLHUBDlTSl-k,57774
|
182
182
|
sky/serve/serve_state.py,sha256=5BZSKKKxQRk-0mku17Ch4Veu4qOhaFvaOJY3zrZCkLw,19315
|
183
|
-
sky/serve/serve_utils.py,sha256=
|
183
|
+
sky/serve/serve_utils.py,sha256=egGb4HB4yMyFISqZgMWnoHH8AfuLGt3xq4raU8V0qds,39755
|
184
184
|
sky/serve/service.py,sha256=fkfJvNJ2BO6rfV0TblZG-QkOXaCyZlpkwbGgrsTzf2w,11872
|
185
185
|
sky/serve/service_spec.py,sha256=iRhW95SERvb4NWtV10uCuhgvW31HuSAmZZ55OX0WK8s,15309
|
186
186
|
sky/setup_files/MANIFEST.in,sha256=CXz8lIJMgWlH9TvYgzIL3vPFtSDoQq-UMfD9K62rtH4,590
|
@@ -274,9 +274,9 @@ sky/utils/kubernetes/k8s_gpu_labeler_job.yaml,sha256=KPqp23B-zQ2SZK03jdHeF9fLTog
|
|
274
274
|
sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml,sha256=VLKT2KKimZu1GDg_4AIlIt488oMQvhRZWwsj9vBbPUg,3812
|
275
275
|
sky/utils/kubernetes/rsync_helper.sh,sha256=aRMa_0JRHtXFOPtEg4rFAwR1t57wvvAoGZhn3H3BtGk,1059
|
276
276
|
sky/utils/kubernetes/ssh_jump_lifecycle_manager.py,sha256=RFLJ3k7MR5UN4SKHykQ0lV9SgXumoULpKYIAt1vh-HU,6560
|
277
|
-
skypilot_nightly-1.0.0.
|
278
|
-
skypilot_nightly-1.0.0.
|
279
|
-
skypilot_nightly-1.0.0.
|
280
|
-
skypilot_nightly-1.0.0.
|
281
|
-
skypilot_nightly-1.0.0.
|
282
|
-
skypilot_nightly-1.0.0.
|
277
|
+
skypilot_nightly-1.0.0.dev20241020.dist-info/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
|
278
|
+
skypilot_nightly-1.0.0.dev20241020.dist-info/METADATA,sha256=L8MmlJIr14EcjFiMMWqkaWvJsVZnm_SmhgzceSuhdRs,19540
|
279
|
+
skypilot_nightly-1.0.0.dev20241020.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
|
280
|
+
skypilot_nightly-1.0.0.dev20241020.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
|
281
|
+
skypilot_nightly-1.0.0.dev20241020.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
|
282
|
+
skypilot_nightly-1.0.0.dev20241020.dist-info/RECORD,,
|
File without changes
|
{skypilot_nightly-1.0.0.dev20241018.dist-info → skypilot_nightly-1.0.0.dev20241020.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|
File without changes
|