skypilot-nightly 1.0.0.dev20241123__py3-none-any.whl → 1.0.0.dev20241124__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/clouds/service_catalog/kubernetes_catalog.py +48 -15
- sky/templates/kubernetes-ray.yml.j2 +13 -3
- {skypilot_nightly-1.0.0.dev20241123.dist-info → skypilot_nightly-1.0.0.dev20241124.dist-info}/METADATA +1 -1
- {skypilot_nightly-1.0.0.dev20241123.dist-info → skypilot_nightly-1.0.0.dev20241124.dist-info}/RECORD +9 -9
- {skypilot_nightly-1.0.0.dev20241123.dist-info → skypilot_nightly-1.0.0.dev20241124.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20241123.dist-info → skypilot_nightly-1.0.0.dev20241124.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20241123.dist-info → skypilot_nightly-1.0.0.dev20241124.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20241123.dist-info → skypilot_nightly-1.0.0.dev20241124.dist-info}/top_level.txt +0 -0
sky/__init__.py
CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
|
|
5
5
|
import urllib.request
|
6
6
|
|
7
7
|
# Replaced with the current commit when building the wheels.
|
8
|
-
_SKYPILOT_COMMIT_SHA = '
|
8
|
+
_SKYPILOT_COMMIT_SHA = '44625e0e62b70e052072851129507a06ff60636d'
|
9
9
|
|
10
10
|
|
11
11
|
def _get_git_commit():
|
@@ -35,7 +35,7 @@ def _get_git_commit():
|
|
35
35
|
|
36
36
|
|
37
37
|
__commit__ = _get_git_commit()
|
38
|
-
__version__ = '1.0.0.
|
38
|
+
__version__ = '1.0.0.dev20241124'
|
39
39
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
40
40
|
|
41
41
|
|
@@ -65,9 +65,14 @@ def list_accelerators(
|
|
65
65
|
# TODO(romilb): We should consider putting a lru_cache() with TTL to
|
66
66
|
# avoid multiple calls to kubernetes API in a short period of time (e.g.,
|
67
67
|
# from the optimizer).
|
68
|
-
return
|
69
|
-
|
70
|
-
|
68
|
+
return _list_accelerators(gpus_only,
|
69
|
+
name_filter,
|
70
|
+
region_filter,
|
71
|
+
quantity_filter,
|
72
|
+
case_sensitive,
|
73
|
+
all_regions,
|
74
|
+
require_price,
|
75
|
+
realtime=False)[0]
|
71
76
|
|
72
77
|
|
73
78
|
def list_accelerators_realtime(
|
@@ -78,10 +83,36 @@ def list_accelerators_realtime(
|
|
78
83
|
case_sensitive: bool = True,
|
79
84
|
all_regions: bool = False,
|
80
85
|
require_price: bool = True
|
86
|
+
) -> Tuple[Dict[str, List[common.InstanceTypeInfo]], Dict[str, int], Dict[str,
|
87
|
+
int]]:
|
88
|
+
return _list_accelerators(gpus_only,
|
89
|
+
name_filter,
|
90
|
+
region_filter,
|
91
|
+
quantity_filter,
|
92
|
+
case_sensitive,
|
93
|
+
all_regions,
|
94
|
+
require_price,
|
95
|
+
realtime=True)
|
96
|
+
|
97
|
+
|
98
|
+
def _list_accelerators(
|
99
|
+
gpus_only: bool,
|
100
|
+
name_filter: Optional[str],
|
101
|
+
region_filter: Optional[str],
|
102
|
+
quantity_filter: Optional[int],
|
103
|
+
case_sensitive: bool = True,
|
104
|
+
all_regions: bool = False,
|
105
|
+
require_price: bool = True,
|
106
|
+
realtime: bool = False
|
81
107
|
) -> Tuple[Dict[str, List[common.InstanceTypeInfo]], Dict[str, int], Dict[str,
|
82
108
|
int]]:
|
83
109
|
"""List accelerators in the Kubernetes cluster.
|
84
110
|
|
111
|
+
If realtime is True, the function will query the cluster to fetch real-time
|
112
|
+
GPU usage, which is returned in total_accelerators_available. Note that
|
113
|
+
this may require an expensive list_pod_for_all_namespaces call, which
|
114
|
+
requires cluster-wide pod read permissions.
|
115
|
+
|
85
116
|
If the user does not have sufficient permissions to list pods in all
|
86
117
|
namespaces, the function will return free GPUs as -1.
|
87
118
|
"""
|
@@ -115,18 +146,20 @@ def list_accelerators_realtime(
|
|
115
146
|
accelerators_qtys: Set[Tuple[str, int]] = set()
|
116
147
|
keys = lf.get_label_keys()
|
117
148
|
nodes = kubernetes_utils.get_kubernetes_nodes(context)
|
118
|
-
|
119
|
-
|
120
|
-
pods
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
149
|
+
pods = None
|
150
|
+
if realtime:
|
151
|
+
# Get the pods to get the real-time GPU usage
|
152
|
+
try:
|
153
|
+
pods = kubernetes_utils.get_all_pods_in_kubernetes_cluster(context)
|
154
|
+
except kubernetes.api_exception() as e:
|
155
|
+
if e.status == 403:
|
156
|
+
logger.warning(
|
157
|
+
'Failed to get pods in the Kubernetes cluster '
|
158
|
+
'(forbidden). Please check if your account has '
|
159
|
+
'necessary permissions to list pods. Realtime GPU '
|
160
|
+
'availability information may be incorrect.')
|
161
|
+
else:
|
162
|
+
raise
|
130
163
|
# Total number of GPUs in the cluster
|
131
164
|
total_accelerators_capacity: Dict[str, int] = {}
|
132
165
|
# Total number of GPUs currently available in the cluster
|
@@ -327,6 +327,13 @@ available_node_types:
|
|
327
327
|
command: ["/bin/bash", "-c", "--"]
|
328
328
|
args:
|
329
329
|
- |
|
330
|
+
# For backwards compatibility, we put a marker file in the pod
|
331
|
+
# to indicate that the pod is running with the changes introduced
|
332
|
+
# in project nimbus: https://github.com/skypilot-org/skypilot/pull/4393
|
333
|
+
# TODO: Remove this marker file and it's usage in setup_commands
|
334
|
+
# after v0.10.0 release.
|
335
|
+
touch /tmp/skypilot_is_nimbus
|
336
|
+
|
330
337
|
# Helper function to conditionally use sudo
|
331
338
|
# TODO(zhwu): consolidate the two prefix_cmd and sudo replacements
|
332
339
|
prefix_cmd() { if [ $(id -u) -ne 0 ]; then echo "sudo"; else echo ""; fi; }
|
@@ -575,9 +582,12 @@ setup_commands:
|
|
575
582
|
STEPS=("apt-ssh-setup" "runtime-setup" "env-setup")
|
576
583
|
start_epoch=$(date +%s);
|
577
584
|
echo "=== Logs for asynchronous ray and skypilot installation ===";
|
578
|
-
[ -f /tmp/
|
579
|
-
|
580
|
-
|
585
|
+
if [ -f /tmp/skypilot_is_nimbus ]; then
|
586
|
+
echo "=== Logs for asynchronous ray and skypilot installation ===";
|
587
|
+
[ -f /tmp/ray_skypilot_installation_complete ] && cat /tmp/${STEPS[1]}.log ||
|
588
|
+
{ tail -f -n +1 /tmp/${STEPS[1]}.log & TAIL_PID=$!; echo "Tail PID: $TAIL_PID"; until [ -f /tmp/ray_skypilot_installation_complete ]; do sleep 0.5; done; kill $TAIL_PID || true; };
|
589
|
+
[ -f /tmp/${STEPS[1]}.failed ] && { echo "Error: ${STEPS[1]} failed. Exiting."; exit 1; } || true;
|
590
|
+
fi
|
581
591
|
end_epoch=$(date +%s);
|
582
592
|
echo "=== Ray and skypilot dependencies installation completed in $(($end_epoch - $start_epoch)) secs ===";
|
583
593
|
start_epoch=$(date +%s);
|
{skypilot_nightly-1.0.0.dev20241123.dist-info → skypilot_nightly-1.0.0.dev20241124.dist-info}/RECORD
RENAMED
@@ -1,4 +1,4 @@
|
|
1
|
-
sky/__init__.py,sha256=
|
1
|
+
sky/__init__.py,sha256=gSpB4bqIgwDeXSZzO0gyRjKn3ALUrIUX0wpZ0-dmVU8,5882
|
2
2
|
sky/admin_policy.py,sha256=hPo02f_A32gCqhUueF0QYy1fMSSKqRwYEg_9FxScN_s,3248
|
3
3
|
sky/authentication.py,sha256=pAdCT60OxxiXI9KXDyP2lQ9u9vMc6aMtq5Xi2h_hbdw,20984
|
4
4
|
sky/check.py,sha256=D3Y3saIFAYVvPxuBHnVgJEO0fUVDxgjwuMBaO-D778k,9472
|
@@ -65,7 +65,7 @@ sky/clouds/service_catalog/cudo_catalog.py,sha256=V_takvL6dWTGQaTLCEvjKIotCDPnMu
|
|
65
65
|
sky/clouds/service_catalog/fluidstack_catalog.py,sha256=21-cvrYEYTIi7n3ZNF2e7_0QX-PF4BkhlVJUWQOvKrY,5059
|
66
66
|
sky/clouds/service_catalog/gcp_catalog.py,sha256=v_5fsB3dB9oD8U7lBKnCe5ii6AUWEOiQjNarMnU_qLA,24379
|
67
67
|
sky/clouds/service_catalog/ibm_catalog.py,sha256=1iK0KvbI82U7sySb7chr-qm_16x3tTnZ6nIo7o76ouc,4493
|
68
|
-
sky/clouds/service_catalog/kubernetes_catalog.py,sha256=
|
68
|
+
sky/clouds/service_catalog/kubernetes_catalog.py,sha256=4MsPXyzpwncwiBmndnbYAMpf2yAP2xINeurM6AaVV2k,12335
|
69
69
|
sky/clouds/service_catalog/lambda_catalog.py,sha256=2R-ccu63BbdvO6X80MtxiniA-jLewXb6I0Ye1rYD9fY,5302
|
70
70
|
sky/clouds/service_catalog/oci_catalog.py,sha256=cyA6ZqwHGOKuPxUl_dKmFGdeWdQGMrvl_-o2MtyF998,8580
|
71
71
|
sky/clouds/service_catalog/paperspace_catalog.py,sha256=MOlfoGRChjEwMzu4nRAho8DrIwwUJ3QlRzrMA1RLqvE,3789
|
@@ -228,7 +228,7 @@ sky/templates/jobs-controller.yaml.j2,sha256=Gu3ogFxFYr09VEXP-6zEbrCUOFo1aYxWEjA
|
|
228
228
|
sky/templates/kubernetes-ingress.yml.j2,sha256=73iDklVDWBMbItg0IexCa6_ClXPJOxw7PWz3leku4nE,1340
|
229
229
|
sky/templates/kubernetes-loadbalancer.yml.j2,sha256=IxrNYM366N01bbkJEbZ_UPYxUP8wyVEbRNFHRsBuLsw,626
|
230
230
|
sky/templates/kubernetes-port-forward-proxy-command.sh,sha256=HlG7CPBBedCVBlL9qv0erW_eKm6Irj0LFyaAWuJW_lc,3148
|
231
|
-
sky/templates/kubernetes-ray.yml.j2,sha256=
|
231
|
+
sky/templates/kubernetes-ray.yml.j2,sha256=dQrNwyOAm46To7GA4s8oHftBnWIOOeRJXTHfhG7lgHo,28555
|
232
232
|
sky/templates/kubernetes-ssh-jump.yml.j2,sha256=k5W5sOIMppU7dDkJMwPlqsUcb92y7L5_TVG3hkgMy8M,2747
|
233
233
|
sky/templates/lambda-ray.yml.j2,sha256=HyvO_tX2vxwSsc4IFVSqGuIbjLMk0bevP9bcxb8ZQII,4498
|
234
234
|
sky/templates/local-ray.yml.j2,sha256=FNHeyHF6nW9nU9QLIZceUWfvrFTTcO51KqhTnYCEFaA,1185
|
@@ -275,9 +275,9 @@ sky/utils/kubernetes/k8s_gpu_labeler_job.yaml,sha256=k0TBoQ4zgf79-sVkixKSGYFHQ7Z
|
|
275
275
|
sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml,sha256=VLKT2KKimZu1GDg_4AIlIt488oMQvhRZWwsj9vBbPUg,3812
|
276
276
|
sky/utils/kubernetes/rsync_helper.sh,sha256=hyYDaYSNxYaNvzUQBzC8AidB7nDeojizjkzc_CTxycY,1077
|
277
277
|
sky/utils/kubernetes/ssh_jump_lifecycle_manager.py,sha256=RFLJ3k7MR5UN4SKHykQ0lV9SgXumoULpKYIAt1vh-HU,6560
|
278
|
-
skypilot_nightly-1.0.0.
|
279
|
-
skypilot_nightly-1.0.0.
|
280
|
-
skypilot_nightly-1.0.0.
|
281
|
-
skypilot_nightly-1.0.0.
|
282
|
-
skypilot_nightly-1.0.0.
|
283
|
-
skypilot_nightly-1.0.0.
|
278
|
+
skypilot_nightly-1.0.0.dev20241124.dist-info/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
|
279
|
+
skypilot_nightly-1.0.0.dev20241124.dist-info/METADATA,sha256=tozaR63Pdm3PXACP_UuuNZKxZXFfxVq0T9eR1-G1YZI,20222
|
280
|
+
skypilot_nightly-1.0.0.dev20241124.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
281
|
+
skypilot_nightly-1.0.0.dev20241124.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
|
282
|
+
skypilot_nightly-1.0.0.dev20241124.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
|
283
|
+
skypilot_nightly-1.0.0.dev20241124.dist-info/RECORD,,
|
File without changes
|
{skypilot_nightly-1.0.0.dev20241123.dist-info → skypilot_nightly-1.0.0.dev20241124.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|
File without changes
|