skypilot-nightly 1.0.0.dev20241018__py3-none-any.whl → 1.0.0.dev20241020__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sky/__init__.py CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
5
5
  import urllib.request
6
6
 
7
7
  # Replaced with the current commit when building the wheels.
8
- _SKYPILOT_COMMIT_SHA = '71a95f4bf7f1446e80bb5c24d23c1695bc4fc031'
8
+ _SKYPILOT_COMMIT_SHA = 'c6ae536d8dfedc3bbcf427a81480382b9d5f4c29'
9
9
 
10
10
 
11
11
  def _get_git_commit():
@@ -35,7 +35,7 @@ def _get_git_commit():
35
35
 
36
36
 
37
37
  __commit__ = _get_git_commit()
38
- __version__ = '1.0.0.dev20241018'
38
+ __version__ = '1.0.0.dev20241020'
39
39
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
40
40
 
41
41
 
sky/cli.py CHANGED
@@ -4380,9 +4380,14 @@ def serve_status(all: bool, endpoint: bool, service_names: List[str]):
4380
4380
  default=False,
4381
4381
  required=False,
4382
4382
  help='Skip confirmation prompt.')
4383
+ @click.option('--replica-id',
4384
+ default=None,
4385
+ type=int,
4386
+ help='Tear down a given replica')
4383
4387
  # pylint: disable=redefined-builtin
4384
- def serve_down(service_names: List[str], all: bool, purge: bool, yes: bool):
4385
- """Teardown service(s).
4388
+ def serve_down(service_names: List[str], all: bool, purge: bool, yes: bool,
4389
+ replica_id: Optional[int]):
4390
+ """Teardown service(s) or a replica.
4386
4391
 
4387
4392
  SERVICE_NAMES is the name of the service (or glob pattern) to tear down. If
4388
4393
  both SERVICE_NAMES and ``--all`` are supplied, the latter takes precedence.
@@ -4408,6 +4413,12 @@ def serve_down(service_names: List[str], all: bool, purge: bool, yes: bool):
4408
4413
  \b
4409
4414
  # Forcefully tear down a service in failed status.
4410
4415
  sky serve down failed-service --purge
4416
+ \b
4417
+ # Tear down a specific replica
4418
+ sky serve down my-service --replica-id 1
4419
+ \b
4420
+ # Forcefully tear down a specific replica, even in failed status.
4421
+ sky serve down my-service --replica-id 1 --purge
4411
4422
  """
4412
4423
  if sum([len(service_names) > 0, all]) != 1:
4413
4424
  argument_str = f'SERVICE_NAMES={",".join(service_names)}' if len(
@@ -4417,22 +4428,45 @@ def serve_down(service_names: List[str], all: bool, purge: bool, yes: bool):
4417
4428
  'Can only specify one of SERVICE_NAMES or --all. '
4418
4429
  f'Provided {argument_str!r}.')
4419
4430
 
4431
+ replica_id_is_defined = replica_id is not None
4432
+ if replica_id_is_defined:
4433
+ if len(service_names) != 1:
4434
+ service_names_str = ', '.join(service_names)
4435
+ raise click.UsageError(f'The --replica-id option can only be used '
4436
+ f'with a single service name. Got: '
4437
+ f'{service_names_str}.')
4438
+ if all:
4439
+ raise click.UsageError('The --replica-id option cannot be used '
4440
+ 'with the --all option.')
4441
+
4420
4442
  backend_utils.is_controller_accessible(
4421
4443
  controller=controller_utils.Controllers.SKY_SERVE_CONTROLLER,
4422
4444
  stopped_message='All services should have been terminated.',
4423
4445
  exit_if_not_accessible=True)
4424
4446
 
4425
4447
  if not yes:
4426
- quoted_service_names = [f'{name!r}' for name in service_names]
4427
- service_identity_str = f'service(s) {", ".join(quoted_service_names)}'
4428
- if all:
4429
- service_identity_str = 'all services'
4430
- click.confirm(f'Terminating {service_identity_str}. Proceed?',
4431
- default=True,
4432
- abort=True,
4433
- show_default=True)
4434
-
4435
- serve_lib.down(service_names=service_names, all=all, purge=purge)
4448
+ if replica_id_is_defined:
4449
+ click.confirm(
4450
+ f'Terminating replica ID {replica_id} in '
4451
+ f'{service_names[0]!r}. Proceed?',
4452
+ default=True,
4453
+ abort=True,
4454
+ show_default=True)
4455
+ else:
4456
+ quoted_service_names = [f'{name!r}' for name in service_names]
4457
+ service_identity_str = (f'service(s) '
4458
+ f'{", ".join(quoted_service_names)}')
4459
+ if all:
4460
+ service_identity_str = 'all services'
4461
+ click.confirm(f'Terminating {service_identity_str}. Proceed?',
4462
+ default=True,
4463
+ abort=True,
4464
+ show_default=True)
4465
+
4466
+ if replica_id_is_defined:
4467
+ serve_lib.terminate_replica(service_names[0], replica_id, purge)
4468
+ else:
4469
+ serve_lib.down(service_names=service_names, all=all, purge=purge)
4436
4470
 
4437
4471
 
4438
4472
  @serve.command('logs', cls=_DocumentedCodeCommand)
@@ -308,7 +308,17 @@ def list_accelerators(
308
308
 
309
309
  def get_image_id_from_tag(tag: str, region: Optional[str]) -> Optional[str]:
310
310
  """Returns the image id from the tag."""
311
- return common.get_image_id_from_tag_impl(_image_df, tag, region)
311
+ global _image_df
312
+
313
+ image_id = common.get_image_id_from_tag_impl(_image_df, tag, region)
314
+ if image_id is None:
315
+ # Refresh the image catalog and try again, if the image tag is not
316
+ # found.
317
+ logger.debug('Refreshing the image catalog and trying again.')
318
+ _image_df = common.read_catalog('aws/images.csv',
319
+ pull_frequency_hours=0)
320
+ image_id = common.get_image_id_from_tag_impl(_image_df, tag, region)
321
+ return image_id
312
322
 
313
323
 
314
324
  def is_image_tag_valid(tag: str, region: Optional[str]) -> bool:
sky/serve/__init__.py CHANGED
@@ -8,6 +8,7 @@ from sky.serve.constants import SKYSERVE_METADATA_DIR
8
8
  from sky.serve.core import down
9
9
  from sky.serve.core import status
10
10
  from sky.serve.core import tail_logs
11
+ from sky.serve.core import terminate_replica
11
12
  from sky.serve.core import up
12
13
  from sky.serve.core import update
13
14
  from sky.serve.serve_state import ReplicaStatus
@@ -42,6 +43,7 @@ __all__ = [
42
43
  'SKY_SERVE_CONTROLLER_NAME',
43
44
  'SKYSERVE_METADATA_DIR',
44
45
  'status',
46
+ 'terminate_replica',
45
47
  'tail_logs',
46
48
  'up',
47
49
  'update',
sky/serve/constants.py CHANGED
@@ -92,4 +92,11 @@ REPLICA_ID_ENV_VAR = 'SKYPILOT_SERVE_REPLICA_ID'
92
92
  # change for the serve_utils.ServeCodeGen, we need to bump this version, so that
93
93
  # the user can be notified to update their SkyPilot serve version on the remote
94
94
  # cluster.
95
- SERVE_VERSION = 1
95
+ # Changelog:
96
+ # v1.0 - Introduce rolling update.
97
+ # v2.0 - Added template-replica feature.
98
+ SERVE_VERSION = 2
99
+
100
+ TERMINATE_REPLICA_VERSION_MISMATCH_ERROR = (
101
+ 'The version of service is outdated and does not support manually '
102
+ 'terminating replicas. Please terminate the service and spin up again.')
sky/serve/controller.py CHANGED
@@ -9,6 +9,7 @@ import time
9
9
  import traceback
10
10
  from typing import Any, Dict, List
11
11
 
12
+ import colorama
12
13
  import fastapi
13
14
  from fastapi import responses
14
15
  import uvicorn
@@ -157,6 +158,75 @@ class SkyServeController:
157
158
  return responses.JSONResponse(content={'message': 'Error'},
158
159
  status_code=500)
159
160
 
161
+ @self._app.post('/controller/terminate_replica')
162
+ async def terminate_replica(
163
+ request: fastapi.Request) -> fastapi.Response:
164
+ request_data = await request.json()
165
+ replica_id = request_data['replica_id']
166
+ assert isinstance(replica_id,
167
+ int), 'Error: replica ID must be an integer.'
168
+ purge = request_data['purge']
169
+ assert isinstance(purge, bool), 'Error: purge must be a boolean.'
170
+ replica_info = serve_state.get_replica_info_from_id(
171
+ self._service_name, replica_id)
172
+ assert replica_info is not None, (f'Error: replica '
173
+ f'{replica_id} does not exist.')
174
+ replica_status = replica_info.status
175
+
176
+ if replica_status == serve_state.ReplicaStatus.SHUTTING_DOWN:
177
+ return responses.JSONResponse(
178
+ status_code=409,
179
+ content={
180
+ 'message':
181
+ f'Replica {replica_id} of service '
182
+ f'{self._service_name!r} is already in the process '
183
+ f'of terminating. Skip terminating now.'
184
+ })
185
+
186
+ if (replica_status in serve_state.ReplicaStatus.failed_statuses()
187
+ and not purge):
188
+ return responses.JSONResponse(
189
+ status_code=409,
190
+ content={
191
+ 'message': f'{colorama.Fore.YELLOW}Replica '
192
+ f'{replica_id} of service '
193
+ f'{self._service_name!r} is in failed '
194
+ f'status ({replica_info.status}). '
195
+ f'Skipping its termination as it could '
196
+ f'lead to a resource leak. '
197
+ f'(Use `sky serve down '
198
+ f'{self._service_name!r} --replica-id '
199
+ f'{replica_id} --purge` to '
200
+ 'forcefully terminate the replica.)'
201
+ f'{colorama.Style.RESET_ALL}'
202
+ })
203
+
204
+ self._replica_manager.scale_down(replica_id, purge=purge)
205
+
206
+ action = 'terminated' if not purge else 'purged'
207
+ message = (f'{colorama.Fore.GREEN}Replica {replica_id} of service '
208
+ f'{self._service_name!r} is scheduled to be '
209
+ f'{action}.{colorama.Style.RESET_ALL}\n'
210
+ f'Please use {ux_utils.BOLD}sky serve status '
211
+ f'{self._service_name}{ux_utils.RESET_BOLD} '
212
+ f'to check the latest status.')
213
+ return responses.JSONResponse(status_code=200,
214
+ content={'message': message})
215
+
216
+ @self._app.exception_handler(Exception)
217
+ async def validation_exception_handler(
218
+ request: fastapi.Request, exc: Exception) -> fastapi.Response:
219
+ with ux_utils.enable_traceback():
220
+ logger.error(f'Error in controller: {exc!r}')
221
+ return responses.JSONResponse(
222
+ status_code=500,
223
+ content={
224
+ 'message':
225
+ (f'Failed method {request.method} at URL {request.url}.'
226
+ f' Exception message is {exc!r}.')
227
+ },
228
+ )
229
+
160
230
  threading.Thread(target=self._run_autoscaler).start()
161
231
 
162
232
  logger.info('SkyServe Controller started on '
sky/serve/core.py CHANGED
@@ -503,6 +503,53 @@ def down(
503
503
  sky_logging.print(stdout)
504
504
 
505
505
 
506
+ @usage_lib.entrypoint
507
+ def terminate_replica(service_name: str, replica_id: int, purge: bool) -> None:
508
+ """Tear down a specific replica for the given service.
509
+
510
+ Args:
511
+ service_name: Name of the service.
512
+ replica_id: ID of replica to terminate.
513
+ purge: Whether to terminate replicas in a failed status. These replicas
514
+ may lead to resource leaks, so we require the user to explicitly
515
+ specify this flag to make sure they are aware of this potential
516
+ resource leak.
517
+
518
+ Raises:
519
+ sky.exceptions.ClusterNotUpError: if the sky sere controller is not up.
520
+ RuntimeError: if failed to terminate the replica.
521
+ """
522
+ handle = backend_utils.is_controller_accessible(
523
+ controller=controller_utils.Controllers.SKY_SERVE_CONTROLLER,
524
+ stopped_message=
525
+ 'No service is running now. Please spin up a service first.',
526
+ non_existent_message='No service is running now. '
527
+ 'Please spin up a service first.',
528
+ )
529
+
530
+ backend = backend_utils.get_backend_from_handle(handle)
531
+ assert isinstance(backend, backends.CloudVmRayBackend)
532
+
533
+ code = serve_utils.ServeCodeGen.terminate_replica(service_name, replica_id,
534
+ purge)
535
+ returncode, stdout, stderr = backend.run_on_head(handle,
536
+ code,
537
+ require_outputs=True,
538
+ stream_logs=False,
539
+ separate_stderr=True)
540
+
541
+ try:
542
+ subprocess_utils.handle_returncode(returncode,
543
+ code,
544
+ 'Failed to terminate the replica',
545
+ stderr,
546
+ stream_logs=True)
547
+ except exceptions.CommandError as e:
548
+ raise RuntimeError(e.error_msg) from e
549
+
550
+ sky_logging.print(stdout)
551
+
552
+
506
553
  @usage_lib.entrypoint
507
554
  def status(
508
555
  service_names: Optional[Union[str,
@@ -247,6 +247,8 @@ class ReplicaStatusProperty:
247
247
  is_scale_down: bool = False
248
248
  # The replica's spot instance was preempted.
249
249
  preempted: bool = False
250
+ # Whether the replica is purged.
251
+ purged: bool = False
250
252
 
251
253
  def remove_terminated_replica(self) -> bool:
252
254
  """Whether to remove the replica record from the replica table.
@@ -307,6 +309,8 @@ class ReplicaStatusProperty:
307
309
  return False
308
310
  if self.preempted:
309
311
  return False
312
+ if self.purged:
313
+ return False
310
314
  return True
311
315
 
312
316
  def to_replica_status(self) -> serve_state.ReplicaStatus:
@@ -590,7 +594,7 @@ class ReplicaManager:
590
594
  """
591
595
  raise NotImplementedError
592
596
 
593
- def scale_down(self, replica_id: int) -> None:
597
+ def scale_down(self, replica_id: int, purge: bool = False) -> None:
594
598
  """Scale down replica with replica_id."""
595
599
  raise NotImplementedError
596
600
 
@@ -679,7 +683,8 @@ class SkyPilotReplicaManager(ReplicaManager):
679
683
  replica_id: int,
680
684
  sync_down_logs: bool,
681
685
  replica_drain_delay_seconds: int,
682
- is_scale_down: bool = False) -> None:
686
+ is_scale_down: bool = False,
687
+ purge: bool = False) -> None:
683
688
 
684
689
  if replica_id in self._launch_process_pool:
685
690
  info = serve_state.get_replica_info_from_id(self._service_name,
@@ -763,16 +768,18 @@ class SkyPilotReplicaManager(ReplicaManager):
763
768
  )
764
769
  info.status_property.sky_down_status = ProcessStatus.RUNNING
765
770
  info.status_property.is_scale_down = is_scale_down
771
+ info.status_property.purged = purge
766
772
  serve_state.add_or_update_replica(self._service_name, replica_id, info)
767
773
  p.start()
768
774
  self._down_process_pool[replica_id] = p
769
775
 
770
- def scale_down(self, replica_id: int) -> None:
776
+ def scale_down(self, replica_id: int, purge: bool = False) -> None:
771
777
  self._terminate_replica(
772
778
  replica_id,
773
779
  sync_down_logs=False,
774
780
  replica_drain_delay_seconds=_DEFAULT_DRAIN_SECONDS,
775
- is_scale_down=True)
781
+ is_scale_down=True,
782
+ purge=purge)
776
783
 
777
784
  def _handle_preemption(self, info: ReplicaInfo) -> bool:
778
785
  """Handle preemption of the replica if any error happened.
@@ -911,6 +918,8 @@ class SkyPilotReplicaManager(ReplicaManager):
911
918
  # since user should fixed the error before update.
912
919
  elif info.version != self.latest_version:
913
920
  removal_reason = 'for version outdated'
921
+ elif info.status_property.purged:
922
+ removal_reason = 'for purge'
914
923
  else:
915
924
  logger.info(f'Termination of replica {replica_id} '
916
925
  'finished. Replica info is kept since some '
sky/serve/serve_utils.py CHANGED
@@ -313,6 +313,36 @@ def update_service_encoded(service_name: str, version: int, mode: str) -> str:
313
313
  return common_utils.encode_payload(service_msg)
314
314
 
315
315
 
316
+ def terminate_replica(service_name: str, replica_id: int, purge: bool) -> str:
317
+ service_status = _get_service_status(service_name)
318
+ if service_status is None:
319
+ with ux_utils.print_exception_no_traceback():
320
+ raise ValueError(f'Service {service_name!r} does not exist.')
321
+ replica_info = serve_state.get_replica_info_from_id(service_name,
322
+ replica_id)
323
+ if replica_info is None:
324
+ with ux_utils.print_exception_no_traceback():
325
+ raise ValueError(
326
+ f'Replica {replica_id} for service {service_name} does not '
327
+ 'exist.')
328
+
329
+ controller_port = service_status['controller_port']
330
+ resp = requests.post(
331
+ _CONTROLLER_URL.format(CONTROLLER_PORT=controller_port) +
332
+ '/controller/terminate_replica',
333
+ json={
334
+ 'replica_id': replica_id,
335
+ 'purge': purge,
336
+ })
337
+
338
+ message: str = resp.json()['message']
339
+ if resp.status_code != 200:
340
+ with ux_utils.print_exception_no_traceback():
341
+ raise ValueError(f'Failed to terminate replica {replica_id} '
342
+ f'in {service_name}. Reason:\n{message}')
343
+ return message
344
+
345
+
316
346
  def _get_service_status(
317
347
  service_name: str,
318
348
  with_replica_info: bool = True) -> Optional[Dict[str, Any]]:
@@ -735,7 +765,7 @@ def _get_replicas(service_record: Dict[str, Any]) -> str:
735
765
 
736
766
 
737
767
  def get_endpoint(service_record: Dict[str, Any]) -> str:
738
- # Don't use backend_utils.is_controller_up since it is too slow.
768
+ # Don't use backend_utils.is_controller_accessible since it is too slow.
739
769
  handle = global_user_state.get_handle_from_cluster_name(
740
770
  SKY_SERVE_CONTROLLER_NAME)
741
771
  assert isinstance(handle, backends.CloudVmRayResourceHandle)
@@ -915,6 +945,18 @@ class ServeCodeGen:
915
945
  ]
916
946
  return cls._build(code)
917
947
 
948
+ @classmethod
949
+ def terminate_replica(cls, service_name: str, replica_id: int,
950
+ purge: bool) -> str:
951
+ code = [
952
+ f'(lambda: print(serve_utils.terminate_replica({service_name!r}, '
953
+ f'{replica_id}, {purge}), end="", flush=True) '
954
+ 'if getattr(constants, "SERVE_VERSION", 0) >= 2 else '
955
+ f'exec("raise RuntimeError('
956
+ f'{constants.TERMINATE_REPLICA_VERSION_MISMATCH_ERROR!r})"))()'
957
+ ]
958
+ return cls._build(code)
959
+
918
960
  @classmethod
919
961
  def wait_service_registration(cls, service_name: str, job_id: int) -> str:
920
962
  code = [
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: skypilot-nightly
3
- Version: 1.0.0.dev20241018
3
+ Version: 1.0.0.dev20241020
4
4
  Summary: SkyPilot: An intercloud broker for the clouds
5
5
  Author: SkyPilot Team
6
6
  License: Apache 2.0
@@ -153,30 +153,32 @@ Requires-Dist: pyvmomi==8.0.1.0.2; extra == "vsphere"
153
153
 
154
154
  ----
155
155
  :fire: *News* :fire:
156
- - [Sep, 2024] Point, Launch and Serve **Llama 3.2** on Kubernetes or Any Cloud: [**example**](./llm/llama-3_2/)
157
- - [Sep, 2024] Run and deploy [**Pixtral**](./llm/pixtral), the first open-source multimodal model from Mistral AI.
158
- - [Jul, 2024] [**Finetune**](./llm/llama-3_1-finetuning/) and [**serve**](./llm/llama-3_1/) **Llama 3.1** on your infra
159
- - [Jun, 2024] Reproduce **GPT** with [llm.c](https://github.com/karpathy/llm.c/discussions/481) on any cloud: [**guide**](./llm/gpt-2/)
160
- - [Apr, 2024] Serve **Qwen-110B** on your infra: [**example**](./llm/qwen/)
161
- - [Apr, 2024] Using **Ollama** to deploy quantized LLMs on CPUs and GPUs: [**example**](./llm/ollama/)
162
- - [Feb, 2024] Deploying and scaling **Gemma** with SkyServe: [**example**](./llm/gemma/)
163
- - [Feb, 2024] Serving **Code Llama 70B** with vLLM and SkyServe: [**example**](./llm/codellama/)
164
- - [Dec, 2023] **Mixtral 8x7B**, a high quality sparse mixture-of-experts model, was released by Mistral AI! Deploy via SkyPilot on any cloud: [**example**](./llm/mixtral/)
165
- - [Nov, 2023] Using **Axolotl** to finetune Mistral 7B on the cloud (on-demand and spot): [**example**](./llm/axolotl/)
156
+ - [Oct 2024] :tada: **SkyPilot crossed 1M+ downloads** :tada:: Thank you to our community! [**Twitter/X**](https://x.com/skypilot_org/status/1844770841718067638)
157
+ - [Sep 2024] Point, Launch and Serve **Llama 3.2** on Kubernetes or Any Cloud: [**example**](./llm/llama-3_2/)
158
+ - [Sep 2024] Run and deploy [**Pixtral**](./llm/pixtral), the first open-source multimodal model from Mistral AI.
159
+ - [Jun 2024] Reproduce **GPT** with [llm.c](https://github.com/karpathy/llm.c/discussions/481) on any cloud: [**guide**](./llm/gpt-2/)
160
+ - [Apr 2024] Serve [**Qwen-110B**](https://qwenlm.github.io/blog/qwen1.5-110b/) on your infra: [**example**](./llm/qwen/)
161
+ - [Apr 2024] Using [**Ollama**](https://github.com/ollama/ollama) to deploy quantized LLMs on CPUs and GPUs: [**example**](./llm/ollama/)
162
+ - [Feb 2024] Deploying and scaling [**Gemma**](https://blog.google/technology/developers/gemma-open-models/) with SkyServe: [**example**](./llm/gemma/)
163
+ - [Feb 2024] Serving [**Code Llama 70B**](https://ai.meta.com/blog/code-llama-large-language-model-coding/) with vLLM and SkyServe: [**example**](./llm/codellama/)
164
+ - [Dec 2023] [**Mixtral 8x7B**](https://mistral.ai/news/mixtral-of-experts/), a high quality sparse mixture-of-experts model, was released by Mistral AI! Deploy via SkyPilot on any cloud: [**example**](./llm/mixtral/)
165
+ - [Nov 2023] Using [**Axolotl**](https://github.com/OpenAccess-AI-Collective/axolotl) to finetune Mistral 7B on the cloud (on-demand and spot): [**example**](./llm/axolotl/)
166
+
167
+ **LLM Finetuning Cookbooks**: Finetuning Llama 2 / Llama 3.1 in your own cloud environment, privately: Llama 2 [**example**](./llm/vicuna-llama-2/) and [**blog**](https://blog.skypilot.co/finetuning-llama2-operational-guide/); Llama 3.1 [**example**](./llm/llama-3_1-finetuning/) and [**blog**](https://blog.skypilot.co/finetune-llama-3_1-on-your-infra/)
166
168
 
167
169
  <details>
168
170
  <summary>Archived</summary>
169
171
 
170
- - [Apr, 2024] Serve and finetune [**Llama 3**](https://skypilot.readthedocs.io/en/latest/gallery/llms/llama-3.html) on any cloud or Kubernetes: [**example**](./llm/llama-3/)
171
- - [Mar, 2024] Serve and deploy [**Databricks DBRX**](https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm) on your infra: [**example**](./llm/dbrx/)
172
- - [Feb, 2024] Speed up your LLM deployments with [**SGLang**](https://github.com/sgl-project/sglang) for 5x throughput on SkyServe: [**example**](./llm/sglang/)
173
- - [Dec, 2023] Using [**LoRAX**](https://github.com/predibase/lorax) to serve 1000s of finetuned LLMs on a single instance in the cloud: [**example**](./llm/lorax/)
174
- - [Sep, 2023] [**Mistral 7B**](https://mistral.ai/news/announcing-mistral-7b/), a high-quality open LLM, was released! Deploy via SkyPilot on any cloud: [**Mistral docs**](https://docs.mistral.ai/self-deployment/skypilot)
175
- - [Sep, 2023] Case study: [**Covariant**](https://covariant.ai/) transformed AI development on the cloud using SkyPilot, delivering models 4x faster cost-effectively: [**read the case study**](https://blog.skypilot.co/covariant/)
176
- - [Aug, 2023] **Finetuning Cookbook**: Finetuning Llama 2 in your own cloud environment, privately: [**example**](./llm/vicuna-llama-2/), [**blog post**](https://blog.skypilot.co/finetuning-llama2-operational-guide/)
177
- - [July, 2023] Self-Hosted **Llama-2 Chatbot** on Any Cloud: [**example**](./llm/llama-2/)
178
- - [June, 2023] Serving LLM 24x Faster On the Cloud [**with vLLM**](https://vllm.ai/) and SkyPilot: [**example**](./llm/vllm/), [**blog post**](https://blog.skypilot.co/serving-llm-24x-faster-on-the-cloud-with-vllm-and-skypilot/)
179
- - [April, 2023] [SkyPilot YAMLs](./llm/vicuna/) for finetuning & serving the [Vicuna LLM](https://lmsys.org/blog/2023-03-30-vicuna/) with a single command!
172
+ - [Jul 2024] [**Finetune**](./llm/llama-3_1-finetuning/) and [**serve**](./llm/llama-3_1/) **Llama 3.1** on your infra
173
+ - [Apr 2024] Serve and finetune [**Llama 3**](https://skypilot.readthedocs.io/en/latest/gallery/llms/llama-3.html) on any cloud or Kubernetes: [**example**](./llm/llama-3/)
174
+ - [Mar 2024] Serve and deploy [**Databricks DBRX**](https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm) on your infra: [**example**](./llm/dbrx/)
175
+ - [Feb 2024] Speed up your LLM deployments with [**SGLang**](https://github.com/sgl-project/sglang) for 5x throughput on SkyServe: [**example**](./llm/sglang/)
176
+ - [Dec 2023] Using [**LoRAX**](https://github.com/predibase/lorax) to serve 1000s of finetuned LLMs on a single instance in the cloud: [**example**](./llm/lorax/)
177
+ - [Sep 2023] [**Mistral 7B**](https://mistral.ai/news/announcing-mistral-7b/), a high-quality open LLM, was released! Deploy via SkyPilot on any cloud: [**Mistral docs**](https://docs.mistral.ai/self-deployment/skypilot)
178
+ - [Sep 2023] Case study: [**Covariant**](https://covariant.ai/) transformed AI development on the cloud using SkyPilot, delivering models 4x faster cost-effectively: [**read the case study**](https://blog.skypilot.co/covariant/)
179
+ - [Jul 2023] Self-Hosted **Llama-2 Chatbot** on Any Cloud: [**example**](./llm/llama-2/)
180
+ - [Jun 2023] Serving LLM 24x Faster On the Cloud [**with vLLM**](https://vllm.ai/) and SkyPilot: [**example**](./llm/vllm/), [**blog post**](https://blog.skypilot.co/serving-llm-24x-faster-on-the-cloud-with-vllm-and-skypilot/)
181
+ - [Apr 2023] [SkyPilot YAMLs](./llm/vicuna/) for finetuning & serving the [Vicuna LLM](https://lmsys.org/blog/2023-03-30-vicuna/) with a single command!
180
182
 
181
183
  </details>
182
184
 
@@ -1,8 +1,8 @@
1
- sky/__init__.py,sha256=ooJaoPt0Vq10nF2ftXGThCKQFJ2HbgQNKQ7Dp6Qg6s4,5854
1
+ sky/__init__.py,sha256=njbGTeVXmuel8rNQYbPE9POlsdZUizsa3jEcGfwJklE,5854
2
2
  sky/admin_policy.py,sha256=hPo02f_A32gCqhUueF0QYy1fMSSKqRwYEg_9FxScN_s,3248
3
3
  sky/authentication.py,sha256=pAdCT60OxxiXI9KXDyP2lQ9u9vMc6aMtq5Xi2h_hbdw,20984
4
4
  sky/check.py,sha256=jLMIIJrseaZj1_o5WkbaD9XdyXIlCaT6pyAaIFdhdmA,9079
5
- sky/cli.py,sha256=PJR6W92twf89j17OWLQJ9RawdazJcGslfW2L_fLB2PM,208545
5
+ sky/cli.py,sha256=XcQeVtH5J7xcARGagYicmTUfd2145jN2nvnZaZXtZlI,209981
6
6
  sky/cloud_stores.py,sha256=RjFgmRhUh1Kk__f6g3KxzLp9s7dA0pFK4W1AukEuUaw,21153
7
7
  sky/core.py,sha256=DW9OGE2kS2CmsvQ1grrpRnNFS3woMGWSHu5GE99e-I4,38190
8
8
  sky/dag.py,sha256=WLFWr5hfrwjd31uYlNvI-zWUk7tLaT_gzJn4LzbVtkE,2780
@@ -56,7 +56,7 @@ sky/clouds/runpod.py,sha256=lstUC6f4JDhtcH9NfwkbpCJMmfmvMigoanhPXPbTYds,11540
56
56
  sky/clouds/scp.py,sha256=2KLTuNSMdBzK8CLwSesv7efOuiLidIMoyNG4AOt5Sqw,15870
57
57
  sky/clouds/vsphere.py,sha256=7eZFYIDtY5sX_ATr8h7kwwkY9t8Z-EYMJ9HCjoRBoxI,12309
58
58
  sky/clouds/service_catalog/__init__.py,sha256=e0K-c64jQV9d6zly5OnIXMsYaZXs_Ko9osAbDaRlOOw,14743
59
- sky/clouds/service_catalog/aws_catalog.py,sha256=Ab8VAmSiuV37k6LLNBtjTYoe_UDMJRgXe1Vk3wiUO_U,12605
59
+ sky/clouds/service_catalog/aws_catalog.py,sha256=1wX1-wOMw2LZ7RkV_Ah7c42RLRYm-m5_GAXzn32M5a8,13038
60
60
  sky/clouds/service_catalog/azure_catalog.py,sha256=VJi3yfhZy9Sc6UfcLAc8xIoTlUlUr090TODkCZyyHFw,7311
61
61
  sky/clouds/service_catalog/common.py,sha256=PA3llB0zZh4v0DO_gDDCKGhRIBx16CAp2WJZNxhjNOA,27266
62
62
  sky/clouds/service_catalog/config.py,sha256=ylzqewdEBjDg4awvFek6ldYmFrnvD2bVGLZuLPvEVYA,1793
@@ -171,16 +171,16 @@ sky/provision/vsphere/common/service_manager_factory.py,sha256=YkvfHiRXFK_Nb406z
171
171
  sky/provision/vsphere/common/ssl_helper.py,sha256=TYzN9K0i_Mk_17PKGyGPgvOGfoizysuuIeYapcy_tWE,795
172
172
  sky/provision/vsphere/common/vapiconnect.py,sha256=R2I1ZWBA19d11fZ_FrIzQT8E1aLl1HU4Rdcj8Z5r3NE,2932
173
173
  sky/provision/vsphere/common/vim_utils.py,sha256=EMWLS8ILpdx6XwUZ9I53y0B_1yFrRrlr4jjIMT84hAc,17877
174
- sky/serve/__init__.py,sha256=Qg_XPOtQsUxiN-Q3njHZRfzoMcQ_KKU1QthkiTbESDw,1661
174
+ sky/serve/__init__.py,sha256=gFZt7W3UPMi4qvYe2xgkHg1VxbR1WGavKyWLBUD3mpg,1731
175
175
  sky/serve/autoscalers.py,sha256=khY1oZ22PRaUQNsLCoNKH178X_NiJw0LSLOKr7_LNgY,30275
176
- sky/serve/constants.py,sha256=OansIC7a0Pwat-Y5SF43T9phad_EvyjKO3peZgKFEHk,4367
177
- sky/serve/controller.py,sha256=gfE_gB7wxE1VxvnYqw_-KcMGc6X2kufl-NLR7sWdzdY,8172
178
- sky/serve/core.py,sha256=yebcgmafGwKppXA1vyJdnbWdOg5BSlh87pKL9gkzHPE,29066
176
+ sky/serve/constants.py,sha256=7MflfgTHO9gDSux93U4BmNeEMWXxZB4q7I54KUwgp-s,4651
177
+ sky/serve/controller.py,sha256=R5iIEGEEFtbm_6MvSGelYZP-vSmW0cSFuy64OexUc4g,11719
178
+ sky/serve/core.py,sha256=pz62ERWyHcg2p-rtzVjBZaWmKrK6Hx213YPoa_J5Tlo,31097
179
179
  sky/serve/load_balancer.py,sha256=aUfDsgUT_fYrchCwJCeunMPXmAkwJAY58BEu-IN2FaA,11571
180
180
  sky/serve/load_balancing_policies.py,sha256=ExdwH_pxPYpJ6CkoTQCOPSa4lzwbq1LFFMKzmIu8ryk,2331
181
- sky/serve/replica_managers.py,sha256=dO962WZ_6YWRDpyNemY7SzC7fZHlNfoL4kUS3MaKwDo,57405
181
+ sky/serve/replica_managers.py,sha256=1xYDK9Te5wFEF5hUK0gyNIUib0MY-HScLHUBDlTSl-k,57774
182
182
  sky/serve/serve_state.py,sha256=5BZSKKKxQRk-0mku17Ch4Veu4qOhaFvaOJY3zrZCkLw,19315
183
- sky/serve/serve_utils.py,sha256=im_1cJoJmufFxkBVnhK4nI6XlHvEXersQyIivNruJJc,38009
183
+ sky/serve/serve_utils.py,sha256=egGb4HB4yMyFISqZgMWnoHH8AfuLGt3xq4raU8V0qds,39755
184
184
  sky/serve/service.py,sha256=fkfJvNJ2BO6rfV0TblZG-QkOXaCyZlpkwbGgrsTzf2w,11872
185
185
  sky/serve/service_spec.py,sha256=iRhW95SERvb4NWtV10uCuhgvW31HuSAmZZ55OX0WK8s,15309
186
186
  sky/setup_files/MANIFEST.in,sha256=CXz8lIJMgWlH9TvYgzIL3vPFtSDoQq-UMfD9K62rtH4,590
@@ -274,9 +274,9 @@ sky/utils/kubernetes/k8s_gpu_labeler_job.yaml,sha256=KPqp23B-zQ2SZK03jdHeF9fLTog
274
274
  sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml,sha256=VLKT2KKimZu1GDg_4AIlIt488oMQvhRZWwsj9vBbPUg,3812
275
275
  sky/utils/kubernetes/rsync_helper.sh,sha256=aRMa_0JRHtXFOPtEg4rFAwR1t57wvvAoGZhn3H3BtGk,1059
276
276
  sky/utils/kubernetes/ssh_jump_lifecycle_manager.py,sha256=RFLJ3k7MR5UN4SKHykQ0lV9SgXumoULpKYIAt1vh-HU,6560
277
- skypilot_nightly-1.0.0.dev20241018.dist-info/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
278
- skypilot_nightly-1.0.0.dev20241018.dist-info/METADATA,sha256=hKti-qYovHe9BXjvZnYoV-88kOo2Qz0-xTDwY08RzrM,18945
279
- skypilot_nightly-1.0.0.dev20241018.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
280
- skypilot_nightly-1.0.0.dev20241018.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
281
- skypilot_nightly-1.0.0.dev20241018.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
282
- skypilot_nightly-1.0.0.dev20241018.dist-info/RECORD,,
277
+ skypilot_nightly-1.0.0.dev20241020.dist-info/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
278
+ skypilot_nightly-1.0.0.dev20241020.dist-info/METADATA,sha256=L8MmlJIr14EcjFiMMWqkaWvJsVZnm_SmhgzceSuhdRs,19540
279
+ skypilot_nightly-1.0.0.dev20241020.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
280
+ skypilot_nightly-1.0.0.dev20241020.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
281
+ skypilot_nightly-1.0.0.dev20241020.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
282
+ skypilot_nightly-1.0.0.dev20241020.dist-info/RECORD,,