skypilot-nightly 1.0.0.dev20240924__py3-none-any.whl → 1.0.0.dev20240926__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/backends/cloud_vm_ray_backend.py +23 -5
- sky/provision/kubernetes/instance.py +15 -46
- sky/provision/kubernetes/utils.py +143 -0
- sky/provision/paperspace/constants.py +6 -0
- {skypilot_nightly-1.0.0.dev20240924.dist-info → skypilot_nightly-1.0.0.dev20240926.dist-info}/METADATA +5 -4
- {skypilot_nightly-1.0.0.dev20240924.dist-info → skypilot_nightly-1.0.0.dev20240926.dist-info}/RECORD +11 -11
- {skypilot_nightly-1.0.0.dev20240924.dist-info → skypilot_nightly-1.0.0.dev20240926.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20240924.dist-info → skypilot_nightly-1.0.0.dev20240926.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20240924.dist-info → skypilot_nightly-1.0.0.dev20240926.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20240924.dist-info → skypilot_nightly-1.0.0.dev20240926.dist-info}/top_level.txt +0 -0
sky/__init__.py
CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
|
|
5
5
|
import urllib.request
|
6
6
|
|
7
7
|
# Replaced with the current commit when building the wheels.
|
8
|
-
_SKYPILOT_COMMIT_SHA = '
|
8
|
+
_SKYPILOT_COMMIT_SHA = 'e95332b9eb8de4cdcac464ff704bf64f3285e776'
|
9
9
|
|
10
10
|
|
11
11
|
def _get_git_commit():
|
@@ -35,7 +35,7 @@ def _get_git_commit():
|
|
35
35
|
|
36
36
|
|
37
37
|
__commit__ = _get_git_commit()
|
38
|
-
__version__ = '1.0.0.
|
38
|
+
__version__ = '1.0.0.dev20240926'
|
39
39
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
40
40
|
|
41
41
|
|
@@ -48,6 +48,7 @@ from sky.provision import common as provision_common
|
|
48
48
|
from sky.provision import instance_setup
|
49
49
|
from sky.provision import metadata_utils
|
50
50
|
from sky.provision import provisioner
|
51
|
+
from sky.provision.kubernetes import utils as kubernetes_utils
|
51
52
|
from sky.skylet import autostop_lib
|
52
53
|
from sky.skylet import constants
|
53
54
|
from sky.skylet import job_lib
|
@@ -4147,11 +4148,21 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
|
|
4147
4148
|
idle_minutes_to_autostop >= 0):
|
4148
4149
|
# We should hit this code path only for the controllers on
|
4149
4150
|
# Kubernetes and RunPod clusters.
|
4150
|
-
|
4151
|
-
handle.cluster_name)
|
4152
|
-
|
4153
|
-
|
4154
|
-
|
4151
|
+
controller = controller_utils.Controllers.from_name(
|
4152
|
+
handle.cluster_name)
|
4153
|
+
assert (controller is not None), handle.cluster_name
|
4154
|
+
if (controller
|
4155
|
+
== controller_utils.Controllers.SKY_SERVE_CONTROLLER and
|
4156
|
+
isinstance(handle.launched_resources.cloud,
|
4157
|
+
clouds.Kubernetes)):
|
4158
|
+
# For SkyServe controllers on Kubernetes: override autostop
|
4159
|
+
# behavior to force autodown (instead of no-op)
|
4160
|
+
# to avoid dangling controllers.
|
4161
|
+
down = True
|
4162
|
+
else:
|
4163
|
+
logger.info('Auto-stop is not supported for Kubernetes '
|
4164
|
+
'and RunPod clusters. Skipping.')
|
4165
|
+
return
|
4155
4166
|
|
4156
4167
|
# Check if we're stopping spot
|
4157
4168
|
assert (handle.launched_resources is not None and
|
@@ -4170,6 +4181,13 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
|
|
4170
4181
|
global_user_state.set_cluster_autostop_value(
|
4171
4182
|
handle.cluster_name, idle_minutes_to_autostop, down)
|
4172
4183
|
|
4184
|
+
# Add/Remove autodown annotations to/from Kubernetes pods.
|
4185
|
+
if isinstance(handle.launched_resources.cloud, clouds.Kubernetes):
|
4186
|
+
kubernetes_utils.set_autodown_annotations(
|
4187
|
+
handle=handle,
|
4188
|
+
idle_minutes_to_autostop=idle_minutes_to_autostop,
|
4189
|
+
down=down)
|
4190
|
+
|
4173
4191
|
def is_definitely_autostopping(self,
|
4174
4192
|
handle: CloudVmRayResourceHandle,
|
4175
4193
|
stream_logs: bool = True) -> bool:
|
@@ -28,42 +28,6 @@ TAG_RAY_CLUSTER_NAME = 'ray-cluster-name'
|
|
28
28
|
TAG_SKYPILOT_CLUSTER_NAME = 'skypilot-cluster-name'
|
29
29
|
TAG_POD_INITIALIZED = 'skypilot-initialized'
|
30
30
|
|
31
|
-
POD_STATUSES = {
|
32
|
-
'Pending', 'Running', 'Succeeded', 'Failed', 'Unknown', 'Terminating'
|
33
|
-
}
|
34
|
-
|
35
|
-
|
36
|
-
def to_label_selector(tags):
|
37
|
-
label_selector = ''
|
38
|
-
for k, v in tags.items():
|
39
|
-
if label_selector != '':
|
40
|
-
label_selector += ','
|
41
|
-
label_selector += '{}={}'.format(k, v)
|
42
|
-
return label_selector
|
43
|
-
|
44
|
-
|
45
|
-
def _filter_pods(namespace: str, context: str, tag_filters: Dict[str, str],
|
46
|
-
status_filters: Optional[List[str]]) -> Dict[str, Any]:
|
47
|
-
"""Filters pods by tags and status."""
|
48
|
-
non_included_pod_statuses = POD_STATUSES.copy()
|
49
|
-
|
50
|
-
field_selector = ''
|
51
|
-
if status_filters is not None:
|
52
|
-
non_included_pod_statuses -= set(status_filters)
|
53
|
-
field_selector = ','.join(
|
54
|
-
[f'status.phase!={status}' for status in non_included_pod_statuses])
|
55
|
-
|
56
|
-
label_selector = to_label_selector(tag_filters)
|
57
|
-
pod_list = kubernetes.core_api(context).list_namespaced_pod(
|
58
|
-
namespace, field_selector=field_selector, label_selector=label_selector)
|
59
|
-
|
60
|
-
# Don't return pods marked for deletion,
|
61
|
-
# i.e. pods with non-null metadata.DeletionTimestamp.
|
62
|
-
pods = [
|
63
|
-
pod for pod in pod_list.items if pod.metadata.deletion_timestamp is None
|
64
|
-
]
|
65
|
-
return {pod.metadata.name: pod for pod in pods}
|
66
|
-
|
67
31
|
|
68
32
|
def _get_head_pod_name(pods: Dict[str, Any]) -> Optional[str]:
|
69
33
|
head_pod_name = None
|
@@ -475,7 +439,8 @@ def _create_pods(region: str, cluster_name_on_cloud: str,
|
|
475
439
|
pod_spec['metadata']['labels'].update(
|
476
440
|
{TAG_SKYPILOT_CLUSTER_NAME: cluster_name_on_cloud})
|
477
441
|
|
478
|
-
terminating_pods =
|
442
|
+
terminating_pods = kubernetes_utils.filter_pods(namespace, context, tags,
|
443
|
+
['Terminating'])
|
479
444
|
start_time = time.time()
|
480
445
|
while (len(terminating_pods) > 0 and
|
481
446
|
time.time() - start_time < _TIMEOUT_FOR_POD_TERMINATION):
|
@@ -483,8 +448,8 @@ def _create_pods(region: str, cluster_name_on_cloud: str,
|
|
483
448
|
'terminating pods. Waiting them to finish: '
|
484
449
|
f'{list(terminating_pods.keys())}')
|
485
450
|
time.sleep(POLL_INTERVAL)
|
486
|
-
terminating_pods =
|
487
|
-
|
451
|
+
terminating_pods = kubernetes_utils.filter_pods(namespace, context,
|
452
|
+
tags, ['Terminating'])
|
488
453
|
|
489
454
|
if len(terminating_pods) > 0:
|
490
455
|
# If there are still terminating pods, we force delete them.
|
@@ -501,8 +466,8 @@ def _create_pods(region: str, cluster_name_on_cloud: str,
|
|
501
466
|
_request_timeout=config_lib.DELETION_TIMEOUT,
|
502
467
|
grace_period_seconds=0)
|
503
468
|
|
504
|
-
running_pods =
|
505
|
-
|
469
|
+
running_pods = kubernetes_utils.filter_pods(namespace, context, tags,
|
470
|
+
['Pending', 'Running'])
|
506
471
|
head_pod_name = _get_head_pod_name(running_pods)
|
507
472
|
logger.debug(f'Found {len(running_pods)} existing pods: '
|
508
473
|
f'{list(running_pods.keys())}')
|
@@ -583,7 +548,8 @@ def _create_pods(region: str, cluster_name_on_cloud: str,
|
|
583
548
|
if head_pod_name is None:
|
584
549
|
head_pod_name = pod.metadata.name
|
585
550
|
|
586
|
-
wait_pods_dict =
|
551
|
+
wait_pods_dict = kubernetes_utils.filter_pods(namespace, context, tags,
|
552
|
+
['Pending'])
|
587
553
|
wait_pods = list(wait_pods_dict.values())
|
588
554
|
|
589
555
|
networking_mode = network_utils.get_networking_mode(
|
@@ -613,8 +579,9 @@ def _create_pods(region: str, cluster_name_on_cloud: str,
|
|
613
579
|
logger.debug(f'run_instances: all pods are scheduled and running: '
|
614
580
|
f'{list(wait_pods_dict.keys())}')
|
615
581
|
|
616
|
-
running_pods =
|
617
|
-
|
582
|
+
running_pods = kubernetes_utils.filter_pods(namespace, context, tags,
|
583
|
+
['Running'])
|
584
|
+
initialized_pods = kubernetes_utils.filter_pods(namespace, context, {
|
618
585
|
TAG_POD_INITIALIZED: 'true',
|
619
586
|
**tags
|
620
587
|
}, ['Running'])
|
@@ -722,7 +689,7 @@ def terminate_instances(
|
|
722
689
|
tag_filters = {
|
723
690
|
TAG_RAY_CLUSTER_NAME: cluster_name_on_cloud,
|
724
691
|
}
|
725
|
-
pods =
|
692
|
+
pods = kubernetes_utils.filter_pods(namespace, context, tag_filters, None)
|
726
693
|
|
727
694
|
def _is_head(pod) -> bool:
|
728
695
|
return pod.metadata.labels[constants.TAG_RAY_NODE_KIND] == 'head'
|
@@ -746,7 +713,9 @@ def get_cluster_info(
|
|
746
713
|
TAG_RAY_CLUSTER_NAME: cluster_name_on_cloud,
|
747
714
|
}
|
748
715
|
|
749
|
-
running_pods =
|
716
|
+
running_pods = kubernetes_utils.filter_pods(namespace, context, tag_filters,
|
717
|
+
['Running'])
|
718
|
+
|
750
719
|
pods: Dict[str, List[common.InstanceInfo]] = {}
|
751
720
|
head_pod_name = None
|
752
721
|
|
@@ -6,6 +6,7 @@ import os
|
|
6
6
|
import re
|
7
7
|
import shutil
|
8
8
|
import subprocess
|
9
|
+
import typing
|
9
10
|
from typing import Any, Dict, List, Optional, Set, Tuple, Union
|
10
11
|
from urllib.parse import urlparse
|
11
12
|
|
@@ -17,6 +18,7 @@ from sky import exceptions
|
|
17
18
|
from sky import sky_logging
|
18
19
|
from sky import skypilot_config
|
19
20
|
from sky.adaptors import kubernetes
|
21
|
+
from sky.provision import constants as provision_constants
|
20
22
|
from sky.provision.kubernetes import network_utils
|
21
23
|
from sky.skylet import constants
|
22
24
|
from sky.utils import common_utils
|
@@ -25,6 +27,9 @@ from sky.utils import kubernetes_enums
|
|
25
27
|
from sky.utils import schemas
|
26
28
|
from sky.utils import ux_utils
|
27
29
|
|
30
|
+
if typing.TYPE_CHECKING:
|
31
|
+
from sky import backends
|
32
|
+
|
28
33
|
# TODO(romilb): Move constants to constants.py
|
29
34
|
DEFAULT_NAMESPACE = 'default'
|
30
35
|
|
@@ -64,6 +69,16 @@ PORT_FORWARD_PROXY_CMD_VERSION = 2
|
|
64
69
|
PORT_FORWARD_PROXY_CMD_PATH = ('~/.sky/kubernetes-port-forward-proxy-command-'
|
65
70
|
f'v{PORT_FORWARD_PROXY_CMD_VERSION}.sh')
|
66
71
|
|
72
|
+
POD_STATUSES = {
|
73
|
+
'Pending', 'Running', 'Succeeded', 'Failed', 'Unknown', 'Terminating'
|
74
|
+
}
|
75
|
+
AUTODOWN_ANNOTATION_KEY = 'skypilot.co/autodown'
|
76
|
+
IDLE_MINUTES_TO_AUTOSTOP_ANNOTATION_KEY = (
|
77
|
+
'skypilot.co/idle_minutes_to_autostop')
|
78
|
+
ANNOTATIONS_POD_NOT_FOUND_ERROR_MSG = ('Pod {pod_name} not found in namespace '
|
79
|
+
'{namespace} while trying to {action} '
|
80
|
+
'an annotation {annotation}.')
|
81
|
+
|
67
82
|
logger = sky_logging.init_logger(__name__)
|
68
83
|
|
69
84
|
|
@@ -1748,11 +1763,139 @@ def get_kubernetes_node_info() -> Dict[str, KubernetesNodeInfo]:
|
|
1748
1763
|
return node_info_dict
|
1749
1764
|
|
1750
1765
|
|
1766
|
+
def to_label_selector(tags):
|
1767
|
+
label_selector = ''
|
1768
|
+
for k, v in tags.items():
|
1769
|
+
if label_selector != '':
|
1770
|
+
label_selector += ','
|
1771
|
+
label_selector += '{}={}'.format(k, v)
|
1772
|
+
return label_selector
|
1773
|
+
|
1774
|
+
|
1751
1775
|
def get_namespace_from_config(provider_config: Dict[str, Any]) -> str:
|
1752
1776
|
return provider_config.get('namespace',
|
1753
1777
|
get_current_kube_config_context_namespace())
|
1754
1778
|
|
1755
1779
|
|
1780
|
+
def filter_pods(namespace: str,
|
1781
|
+
context: str,
|
1782
|
+
tag_filters: Dict[str, str],
|
1783
|
+
status_filters: Optional[List[str]] = None) -> Dict[str, Any]:
|
1784
|
+
"""Filters pods by tags and status."""
|
1785
|
+
non_included_pod_statuses = POD_STATUSES.copy()
|
1786
|
+
|
1787
|
+
field_selector = ''
|
1788
|
+
if status_filters is not None:
|
1789
|
+
non_included_pod_statuses -= set(status_filters)
|
1790
|
+
field_selector = ','.join(
|
1791
|
+
[f'status.phase!={status}' for status in non_included_pod_statuses])
|
1792
|
+
|
1793
|
+
label_selector = to_label_selector(tag_filters)
|
1794
|
+
pod_list = kubernetes.core_api(context).list_namespaced_pod(
|
1795
|
+
namespace, field_selector=field_selector, label_selector=label_selector)
|
1796
|
+
|
1797
|
+
# Don't return pods marked for deletion,
|
1798
|
+
# i.e. pods with non-null metadata.DeletionTimestamp.
|
1799
|
+
pods = [
|
1800
|
+
pod for pod in pod_list.items if pod.metadata.deletion_timestamp is None
|
1801
|
+
]
|
1802
|
+
return {pod.metadata.name: pod for pod in pods}
|
1803
|
+
|
1804
|
+
|
1805
|
+
def _remove_pod_annotation(pod: Any, annotation_key: str,
|
1806
|
+
namespace: str) -> None:
|
1807
|
+
"""Removes specified Annotations from a Kubernetes pod."""
|
1808
|
+
try:
|
1809
|
+
# Remove the specified annotation
|
1810
|
+
if pod.metadata.annotations:
|
1811
|
+
if annotation_key in pod.metadata.annotations:
|
1812
|
+
# Patch the pod with the updated metadata.
|
1813
|
+
body = {'metadata': {'annotations': {annotation_key: None}}}
|
1814
|
+
kubernetes.core_api().patch_namespaced_pod(
|
1815
|
+
name=pod.metadata.name,
|
1816
|
+
namespace=namespace,
|
1817
|
+
body=body,
|
1818
|
+
_request_timeout=kubernetes.API_TIMEOUT)
|
1819
|
+
|
1820
|
+
except kubernetes.api_exception() as e:
|
1821
|
+
if e.status == 404:
|
1822
|
+
logger.warning(
|
1823
|
+
ANNOTATIONS_POD_NOT_FOUND_ERROR_MSG.format(
|
1824
|
+
pod_name=pod.metadata.name,
|
1825
|
+
namespace=namespace,
|
1826
|
+
action='remove',
|
1827
|
+
annotation=annotation_key))
|
1828
|
+
else:
|
1829
|
+
with ux_utils.print_exception_no_traceback():
|
1830
|
+
raise
|
1831
|
+
|
1832
|
+
|
1833
|
+
def _add_pod_annotation(pod: Any, annotation: Dict[str, str],
|
1834
|
+
namespace: str) -> None:
|
1835
|
+
"""Adds specified Annotations on a Kubernetes pod."""
|
1836
|
+
try:
|
1837
|
+
# Patch the pod with the updated metadata
|
1838
|
+
body = {'metadata': {'annotations': annotation}}
|
1839
|
+
kubernetes.core_api().patch_namespaced_pod(
|
1840
|
+
name=pod.metadata.name,
|
1841
|
+
namespace=namespace,
|
1842
|
+
body=body,
|
1843
|
+
_request_timeout=kubernetes.API_TIMEOUT)
|
1844
|
+
|
1845
|
+
except kubernetes.api_exception() as e:
|
1846
|
+
if e.status == 404:
|
1847
|
+
logger.warning(
|
1848
|
+
ANNOTATIONS_POD_NOT_FOUND_ERROR_MSG.format(
|
1849
|
+
pod_name=pod.metadata.name,
|
1850
|
+
namespace=namespace,
|
1851
|
+
action='add',
|
1852
|
+
annotation=annotation))
|
1853
|
+
else:
|
1854
|
+
with ux_utils.print_exception_no_traceback():
|
1855
|
+
raise
|
1856
|
+
|
1857
|
+
|
1858
|
+
def set_autodown_annotations(handle: 'backends.CloudVmRayResourceHandle',
|
1859
|
+
idle_minutes_to_autostop: Optional[int],
|
1860
|
+
down: bool = False) -> None:
|
1861
|
+
"""Adds or removes Annotations of autodown on Kubernetes pods."""
|
1862
|
+
tags = {
|
1863
|
+
provision_constants.TAG_RAY_CLUSTER_NAME: handle.cluster_name_on_cloud,
|
1864
|
+
}
|
1865
|
+
ray_config = common_utils.read_yaml(handle.cluster_yaml)
|
1866
|
+
provider_config = ray_config['provider']
|
1867
|
+
namespace = get_namespace_from_config(provider_config)
|
1868
|
+
context = get_context_from_config(provider_config)
|
1869
|
+
running_pods = filter_pods(namespace, context, tags)
|
1870
|
+
|
1871
|
+
for _, pod in running_pods.items():
|
1872
|
+
if down:
|
1873
|
+
idle_minutes_to_autostop_annotation = {
|
1874
|
+
IDLE_MINUTES_TO_AUTOSTOP_ANNOTATION_KEY:
|
1875
|
+
str(idle_minutes_to_autostop)
|
1876
|
+
}
|
1877
|
+
autodown_annotation = {AUTODOWN_ANNOTATION_KEY: 'true'}
|
1878
|
+
_add_pod_annotation(pod=pod,
|
1879
|
+
annotation=idle_minutes_to_autostop_annotation,
|
1880
|
+
namespace=namespace)
|
1881
|
+
_add_pod_annotation(pod=pod,
|
1882
|
+
annotation=autodown_annotation,
|
1883
|
+
namespace=namespace)
|
1884
|
+
|
1885
|
+
# If idle_minutes_to_autostop is negative, it indicates a request to
|
1886
|
+
# cancel autostop using the --cancel flag with the `sky autostop`
|
1887
|
+
# command.
|
1888
|
+
elif (idle_minutes_to_autostop is not None and
|
1889
|
+
idle_minutes_to_autostop < 0):
|
1890
|
+
_remove_pod_annotation(
|
1891
|
+
pod=pod,
|
1892
|
+
annotation_key=IDLE_MINUTES_TO_AUTOSTOP_ANNOTATION_KEY,
|
1893
|
+
namespace=namespace)
|
1894
|
+
_remove_pod_annotation(pod=pod,
|
1895
|
+
annotation_key=AUTODOWN_ANNOTATION_KEY,
|
1896
|
+
namespace=namespace)
|
1897
|
+
|
1898
|
+
|
1756
1899
|
def get_context_from_config(provider_config: Dict[str, Any]) -> str:
|
1757
1900
|
return provider_config.get('context',
|
1758
1901
|
get_current_kube_config_context_name())
|
@@ -19,6 +19,12 @@ INSTANCE_TO_TEMPLATEID = {
|
|
19
19
|
'V100-32Gx2': 'twnlo3zj',
|
20
20
|
'V100-32G': 'twnlo3zj',
|
21
21
|
'V100': 'twnlo3zj',
|
22
|
+
'GPU+': 'twnlo3zj',
|
23
|
+
'P4000': 'twnlo3zj',
|
24
|
+
'P4000x2': 'twnlo3zj',
|
25
|
+
'A4000': 'twnlo3zj',
|
26
|
+
'A4000x2': 'twnlo3zj',
|
27
|
+
'A4000x4': 'twnlo3zj',
|
22
28
|
**CPU_INSTANCES_TEMPLATEID
|
23
29
|
}
|
24
30
|
NVLINK_INSTANCES = {
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: skypilot-nightly
|
3
|
-
Version: 1.0.0.
|
3
|
+
Version: 1.0.0.dev20240926
|
4
4
|
Summary: SkyPilot: An intercloud broker for the clouds
|
5
5
|
Author: SkyPilot Team
|
6
6
|
License: Apache 2.0
|
@@ -153,10 +153,10 @@ Requires-Dist: pyvmomi==8.0.1.0.2; extra == "vsphere"
|
|
153
153
|
|
154
154
|
----
|
155
155
|
:fire: *News* :fire:
|
156
|
+
- [Sep, 2024] Point, Lanuch and Serve **Llama 3.2** on on Kubernetes or Any Cloud: [**example**](./llm/llama-3_2/)
|
156
157
|
- [Sep, 2024] Run and deploy [Pixtral](./llm/pixtral), the first open-source multimodal model from Mistral AI.
|
157
158
|
- [Jul, 2024] [Finetune](./llm/llama-3_1-finetuning/) and [serve](./llm/llama-3_1/) **Llama 3.1** on your infra
|
158
159
|
- [Jun, 2024] Reproduce **GPT** with [llm.c](https://github.com/karpathy/llm.c/discussions/481) on any cloud: [**guide**](./llm/gpt-2/)
|
159
|
-
- [Apr, 2024] Serve and finetune [**Llama 3**](https://skypilot.readthedocs.io/en/latest/gallery/llms/llama-3.html) on any cloud or Kubernetes: [**example**](./llm/llama-3/)
|
160
160
|
- [Apr, 2024] Serve [**Qwen-110B**](https://qwenlm.github.io/blog/qwen1.5-110b/) on your infra: [**example**](./llm/qwen/)
|
161
161
|
- [Apr, 2024] Using [**Ollama**](https://github.com/ollama/ollama) to deploy quantized LLMs on CPUs and GPUs: [**example**](./llm/ollama/)
|
162
162
|
- [Feb, 2024] Deploying and scaling [**Gemma**](https://blog.google/technology/developers/gemma-open-models/) with SkyServe: [**example**](./llm/gemma/)
|
@@ -168,7 +168,8 @@ Requires-Dist: pyvmomi==8.0.1.0.2; extra == "vsphere"
|
|
168
168
|
|
169
169
|
<details>
|
170
170
|
<summary>Archived</summary>
|
171
|
-
|
171
|
+
|
172
|
+
- [Apr, 2024] Serve and finetune [**Llama 3**](https://skypilot.readthedocs.io/en/latest/gallery/llms/llama-3.html) on any cloud or Kubernetes: [**example**](./llm/llama-3/)
|
172
173
|
- [Mar, 2024] Serve and deploy [**Databricks DBRX**](https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm) on your infra: [**example**](./llm/dbrx/)
|
173
174
|
- [Feb, 2024] Speed up your LLM deployments with [**SGLang**](https://github.com/sgl-project/sglang) for 5x throughput on SkyServe: [**example**](./llm/sglang/)
|
174
175
|
- [Dec, 2023] Using [**LoRAX**](https://github.com/predibase/lorax) to serve 1000s of finetuned LLMs on a single instance in the cloud: [**example**](./llm/lorax/)
|
@@ -303,7 +304,7 @@ Runnable examples:
|
|
303
304
|
- [LocalGPT](./llm/localgpt)
|
304
305
|
- [Falcon](./llm/falcon)
|
305
306
|
- Add yours here & see more in [`llm/`](./llm)!
|
306
|
-
- Framework examples: [PyTorch DDP](https://github.com/skypilot-org/skypilot/blob/master/examples/resnet_distributed_torch.yaml), [DeepSpeed](./examples/deepspeed-multinode/sky.yaml), [JAX/Flax on TPU](https://github.com/skypilot-org/skypilot/blob/master/examples/tpu/tpuvm_mnist.yaml), [Stable Diffusion](https://github.com/skypilot-org/skypilot/tree/master/examples/stable_diffusion), [Detectron2](https://github.com/skypilot-org/skypilot/blob/master/examples/detectron2_docker.yaml), [Distributed](https://github.com/skypilot-org/skypilot/blob/master/examples/resnet_distributed_tf_app.py) [TensorFlow](https://github.com/skypilot-org/skypilot/blob/master/examples/resnet_app_storage.yaml), [Ray Train](examples/distributed_ray_train/ray_train.yaml), [NeMo](https://github.com/skypilot-org/skypilot/blob/master/examples/nemo/nemo.yaml), [programmatic grid search](https://github.com/skypilot-org/skypilot/blob/master/examples/huggingface_glue_imdb_grid_search_app.py), [Docker](https://github.com/skypilot-org/skypilot/blob/master/examples/docker/echo_app.yaml), [Cog](https://github.com/skypilot-org/skypilot/blob/master/examples/cog/), [Unsloth](https://github.com/skypilot-org/skypilot/blob/master/examples/unsloth/unsloth.yaml), [Ollama](https://github.com/skypilot-org/skypilot/blob/master/llm/ollama), [llm.c](https://github.com/skypilot-org/skypilot/tree/master/llm/gpt-2) and [many more (`examples/`)](./examples).
|
307
|
+
- Framework examples: [PyTorch DDP](https://github.com/skypilot-org/skypilot/blob/master/examples/resnet_distributed_torch.yaml), [DeepSpeed](./examples/deepspeed-multinode/sky.yaml), [JAX/Flax on TPU](https://github.com/skypilot-org/skypilot/blob/master/examples/tpu/tpuvm_mnist.yaml), [Stable Diffusion](https://github.com/skypilot-org/skypilot/tree/master/examples/stable_diffusion), [Detectron2](https://github.com/skypilot-org/skypilot/blob/master/examples/detectron2_docker.yaml), [Distributed](https://github.com/skypilot-org/skypilot/blob/master/examples/resnet_distributed_tf_app.py) [TensorFlow](https://github.com/skypilot-org/skypilot/blob/master/examples/resnet_app_storage.yaml), [Ray Train](examples/distributed_ray_train/ray_train.yaml), [NeMo](https://github.com/skypilot-org/skypilot/blob/master/examples/nemo/nemo.yaml), [programmatic grid search](https://github.com/skypilot-org/skypilot/blob/master/examples/huggingface_glue_imdb_grid_search_app.py), [Docker](https://github.com/skypilot-org/skypilot/blob/master/examples/docker/echo_app.yaml), [Cog](https://github.com/skypilot-org/skypilot/blob/master/examples/cog/), [Unsloth](https://github.com/skypilot-org/skypilot/blob/master/examples/unsloth/unsloth.yaml), [Ollama](https://github.com/skypilot-org/skypilot/blob/master/llm/ollama), [llm.c](https://github.com/skypilot-org/skypilot/tree/master/llm/gpt-2), [Airflow](./examples/airflow/training_workflow) and [many more (`examples/`)](./examples).
|
307
308
|
|
308
309
|
Case Studies and Integrations: [Community Spotlights](https://blog.skypilot.co/community/)
|
309
310
|
|
{skypilot_nightly-1.0.0.dev20240924.dist-info → skypilot_nightly-1.0.0.dev20240926.dist-info}/RECORD
RENAMED
@@ -1,4 +1,4 @@
|
|
1
|
-
sky/__init__.py,sha256=
|
1
|
+
sky/__init__.py,sha256=AgGTrAKHH0SdDTVNhZzABnW6Sk2pF4PQwqyUWXZifxg,5854
|
2
2
|
sky/admin_policy.py,sha256=hPo02f_A32gCqhUueF0QYy1fMSSKqRwYEg_9FxScN_s,3248
|
3
3
|
sky/authentication.py,sha256=yvpdkXS9htf-X83DPCiSG3mQ41y0zV1BQ0YgOMgTYBU,20612
|
4
4
|
sky/check.py,sha256=jLMIIJrseaZj1_o5WkbaD9XdyXIlCaT6pyAaIFdhdmA,9079
|
@@ -31,7 +31,7 @@ sky/adaptors/vsphere.py,sha256=zJP9SeObEoLrpgHW2VHvZE48EhgVf8GfAEIwBeaDMfM,2129
|
|
31
31
|
sky/backends/__init__.py,sha256=UDjwbUgpTRApbPJnNfR786GadUuwgRk3vsWoVu5RB_c,536
|
32
32
|
sky/backends/backend.py,sha256=xtxR6boDv1o-uSCjbJhOMkKMnZvBZh3gExx4khFWPTI,5932
|
33
33
|
sky/backends/backend_utils.py,sha256=W11gOb3v6Z2PLFu5YbFHDCckjgfwiSieBiDtmMeJXpE,126590
|
34
|
-
sky/backends/cloud_vm_ray_backend.py,sha256=
|
34
|
+
sky/backends/cloud_vm_ray_backend.py,sha256=fmM6OoyVN5N-Au9KuFL9u5714ta9A4BB7KwYzZF4_ps,233279
|
35
35
|
sky/backends/docker_utils.py,sha256=Hyw1YY20EyghhEbYx6O2FIMDcGkNzBzV9TM7LFynei8,8358
|
36
36
|
sky/backends/local_docker_backend.py,sha256=H4GBo0KFUC_EEf-ziv1OUbfAkOI5BrwkYs9fYOxSoNw,16741
|
37
37
|
sky/backends/wheel_utils.py,sha256=3QS4T_Ydvo4DbYhogtyADyNBEf04I6jUCL71M285shQ,7963
|
@@ -138,15 +138,15 @@ sky/provision/gcp/instance_utils.py,sha256=veRBr6Oziv0KaUdC4acuWeaOremNV0gMYCCHa
|
|
138
138
|
sky/provision/gcp/mig_utils.py,sha256=oFpcFZoapHMILSE4iIm8V5bxP1RhbMHRF7cciqq8qAk,7883
|
139
139
|
sky/provision/kubernetes/__init__.py,sha256=y6yVfii81WYG3ROxv4hiIj-ydinS5-xGxLvXnARVQoI,719
|
140
140
|
sky/provision/kubernetes/config.py,sha256=gC1FeW-cyeebphY6sq2BGVF8QKZujUKyH7qe9TAAoPM,29024
|
141
|
-
sky/provision/kubernetes/instance.py,sha256=
|
141
|
+
sky/provision/kubernetes/instance.py,sha256=YdcZ2vhxJPXzT1D8FuCIUyjdkK6VjsG4_qm3dDbygGw,38204
|
142
142
|
sky/provision/kubernetes/network.py,sha256=Y4ZbxpkXPj_t79uvbwfyCzEp6PYaG_UkhTu7rNifuCs,11636
|
143
143
|
sky/provision/kubernetes/network_utils.py,sha256=pRfi4dsxA-8PzxAarKX_k43od79r60wue9MwPsn1svI,11223
|
144
|
-
sky/provision/kubernetes/utils.py,sha256=
|
144
|
+
sky/provision/kubernetes/utils.py,sha256=TzODbdNeMoVEpFVLbI-UwaaQDYLb6F8hsL8m2O3n-SU,79865
|
145
145
|
sky/provision/kubernetes/manifests/smarter-device-manager-configmap.yaml,sha256=AMzYzlY0JIlfBWj5eX054Rc1XDW2thUcLSOGMJVhIdA,229
|
146
146
|
sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml,sha256=RtTq4F1QUmR2Uunb6zuuRaPhV7hpesz4saHjn3Ncsb4,2010
|
147
147
|
sky/provision/paperspace/__init__.py,sha256=1nbUPWio7UA5gCQkO_rfEDfgXT17u5OtuByxQx4Ez6g,598
|
148
148
|
sky/provision/paperspace/config.py,sha256=oNmffSt-V466pE0DmML8hOCX1CiA24jAqE5JEKuqpyI,1541
|
149
|
-
sky/provision/paperspace/constants.py,sha256=
|
149
|
+
sky/provision/paperspace/constants.py,sha256=NcLJGivJxshJwhR28yVHysWQ2gtMAkTVmHC91d3kyKM,957
|
150
150
|
sky/provision/paperspace/instance.py,sha256=8qkvZt2-gBYCiDpT9-lztaP2_DgDdYQAAHFvK62nPNk,12045
|
151
151
|
sky/provision/paperspace/utils.py,sha256=Bl3POslZjtZU_wbBIXid7ubhRy2j5kpsesR85q7MN5w,9428
|
152
152
|
sky/provision/runpod/__init__.py,sha256=6HYvHI27EaLrX1SS0vWVhdLu5HDBeZCdvAeDJuwM5pk,556
|
@@ -272,9 +272,9 @@ sky/utils/kubernetes/k8s_gpu_labeler_job.yaml,sha256=KPqp23B-zQ2SZK03jdHeF9fLTog
|
|
272
272
|
sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml,sha256=VLKT2KKimZu1GDg_4AIlIt488oMQvhRZWwsj9vBbPUg,3812
|
273
273
|
sky/utils/kubernetes/rsync_helper.sh,sha256=Ma-N9a271fTfdgP5-8XIQL7KPf8IPUo-uY004PCdUFo,747
|
274
274
|
sky/utils/kubernetes/ssh_jump_lifecycle_manager.py,sha256=RFLJ3k7MR5UN4SKHykQ0lV9SgXumoULpKYIAt1vh-HU,6560
|
275
|
-
skypilot_nightly-1.0.0.
|
276
|
-
skypilot_nightly-1.0.0.
|
277
|
-
skypilot_nightly-1.0.0.
|
278
|
-
skypilot_nightly-1.0.0.
|
279
|
-
skypilot_nightly-1.0.0.
|
280
|
-
skypilot_nightly-1.0.0.
|
275
|
+
skypilot_nightly-1.0.0.dev20240926.dist-info/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
|
276
|
+
skypilot_nightly-1.0.0.dev20240926.dist-info/METADATA,sha256=ef5NIiwOn0pe5BXLrBXC3m5yKTTuzd47GB4ie8eNOug,19177
|
277
|
+
skypilot_nightly-1.0.0.dev20240926.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
278
|
+
skypilot_nightly-1.0.0.dev20240926.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
|
279
|
+
skypilot_nightly-1.0.0.dev20240926.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
|
280
|
+
skypilot_nightly-1.0.0.dev20240926.dist-info/RECORD,,
|
File without changes
|
{skypilot_nightly-1.0.0.dev20240924.dist-info → skypilot_nightly-1.0.0.dev20240926.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|
File without changes
|