skypilot-nightly 1.0.0.dev20241202__py3-none-any.whl → 1.0.0.dev20241204__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +4 -2
- sky/backends/backend_utils.py +5 -4
- sky/backends/cloud_vm_ray_backend.py +27 -7
- sky/cli.py +11 -2
- sky/clouds/service_catalog/kubernetes_catalog.py +3 -4
- sky/core.py +25 -18
- sky/exceptions.py +7 -0
- sky/execution.py +3 -2
- sky/jobs/controller.py +28 -8
- sky/jobs/core.py +61 -35
- sky/jobs/recovery_strategy.py +2 -1
- sky/jobs/state.py +33 -1
- sky/jobs/utils.py +16 -2
- sky/setup_files/dependencies.py +141 -0
- sky/setup_files/setup.py +12 -124
- sky/skylet/constants.py +36 -11
- sky/skylet/log_lib.py +3 -1
- sky/skylet/log_lib.pyi +3 -0
- sky/templates/kubernetes-ray.yml.j2 +1 -1
- sky/utils/controller_utils.py +60 -98
- {skypilot_nightly-1.0.0.dev20241202.dist-info → skypilot_nightly-1.0.0.dev20241204.dist-info}/METADATA +3 -2
- {skypilot_nightly-1.0.0.dev20241202.dist-info → skypilot_nightly-1.0.0.dev20241204.dist-info}/RECORD +26 -25
- {skypilot_nightly-1.0.0.dev20241202.dist-info → skypilot_nightly-1.0.0.dev20241204.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20241202.dist-info → skypilot_nightly-1.0.0.dev20241204.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20241202.dist-info → skypilot_nightly-1.0.0.dev20241204.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20241202.dist-info → skypilot_nightly-1.0.0.dev20241204.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,141 @@
|
|
1
|
+
"""Dependencies for SkyPilot.
|
2
|
+
|
3
|
+
This file is imported by setup.py, so:
|
4
|
+
- It may not be able to import other skypilot modules, since sys.path may not be
|
5
|
+
correct.
|
6
|
+
- It should not import any dependencies, as they may not be installed yet.
|
7
|
+
"""
|
8
|
+
from typing import Dict, List
|
9
|
+
|
10
|
+
install_requires = [
|
11
|
+
'wheel',
|
12
|
+
'cachetools',
|
13
|
+
# NOTE: ray requires click>=7.0.
|
14
|
+
'click >= 7.0',
|
15
|
+
'colorama',
|
16
|
+
'cryptography',
|
17
|
+
# Jinja has a bug in older versions because of the lack of pinning
|
18
|
+
# the version of the underlying markupsafe package. See:
|
19
|
+
# https://github.com/pallets/jinja/issues/1585
|
20
|
+
'jinja2 >= 3.0',
|
21
|
+
'jsonschema',
|
22
|
+
'networkx',
|
23
|
+
'pandas>=1.3.0',
|
24
|
+
'pendulum',
|
25
|
+
# PrettyTable with version >=2.0.0 is required for the support of
|
26
|
+
# `add_rows` method.
|
27
|
+
'PrettyTable >= 2.0.0',
|
28
|
+
'python-dotenv',
|
29
|
+
'rich',
|
30
|
+
'tabulate',
|
31
|
+
# Light weight requirement, can be replaced with "typing" once
|
32
|
+
# we deprecate Python 3.7 (this will take a while).
|
33
|
+
'typing_extensions',
|
34
|
+
'filelock >= 3.6.0',
|
35
|
+
'packaging',
|
36
|
+
'psutil',
|
37
|
+
'pulp',
|
38
|
+
# Cython 3.0 release breaks PyYAML 5.4.*
|
39
|
+
# (https://github.com/yaml/pyyaml/issues/601)
|
40
|
+
# <= 3.13 may encounter https://github.com/ultralytics/yolov5/issues/414
|
41
|
+
'pyyaml > 3.13, != 5.4.*',
|
42
|
+
'requests',
|
43
|
+
]
|
44
|
+
|
45
|
+
local_ray = [
|
46
|
+
# Lower version of ray will cause dependency conflict for
|
47
|
+
# click/grpcio/protobuf.
|
48
|
+
# Excluded 2.6.0 as it has a bug in the cluster launcher:
|
49
|
+
# https://github.com/ray-project/ray/releases/tag/ray-2.6.1
|
50
|
+
'ray[default] >= 2.2.0, != 2.6.0',
|
51
|
+
]
|
52
|
+
|
53
|
+
remote = [
|
54
|
+
# Adopted from ray's setup.py:
|
55
|
+
# https://github.com/ray-project/ray/blob/ray-2.4.0/python/setup.py
|
56
|
+
# SkyPilot: != 1.48.0 is required to avoid the error where ray dashboard
|
57
|
+
# fails to start when ray start is called (#2054).
|
58
|
+
# Tracking issue: https://github.com/ray-project/ray/issues/30984
|
59
|
+
'grpcio >= 1.32.0, <= 1.49.1, != 1.48.0; python_version < \'3.10\' and sys_platform == \'darwin\'', # noqa:E501 pylint: disable=line-too-long
|
60
|
+
'grpcio >= 1.42.0, <= 1.49.1, != 1.48.0; python_version >= \'3.10\' and sys_platform == \'darwin\'', # noqa:E501 pylint: disable=line-too-long
|
61
|
+
# Original issue: https://github.com/ray-project/ray/issues/33833
|
62
|
+
'grpcio >= 1.32.0, <= 1.51.3, != 1.48.0; python_version < \'3.10\' and sys_platform != \'darwin\'', # noqa:E501 pylint: disable=line-too-long
|
63
|
+
'grpcio >= 1.42.0, <= 1.51.3, != 1.48.0; python_version >= \'3.10\' and sys_platform != \'darwin\'', # noqa:E501 pylint: disable=line-too-long
|
64
|
+
# Adopted from ray's setup.py:
|
65
|
+
# https://github.com/ray-project/ray/blob/ray-2.9.3/python/setup.py#L343
|
66
|
+
'protobuf >= 3.15.3, != 3.19.5',
|
67
|
+
# Some pydantic versions are not compatible with ray. Adopted from ray's
|
68
|
+
# setup.py:
|
69
|
+
# https://github.com/ray-project/ray/blob/ray-2.9.3/python/setup.py#L254
|
70
|
+
'pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3',
|
71
|
+
]
|
72
|
+
|
73
|
+
# NOTE: Change the templates/jobs-controller.yaml.j2 file if any of the
|
74
|
+
# following packages dependencies are changed.
|
75
|
+
aws_dependencies = [
|
76
|
+
# botocore does not work with urllib3>=2.0.0, according to
|
77
|
+
# https://github.com/boto/botocore/issues/2926
|
78
|
+
# We have to explicitly pin the version to optimize the time for
|
79
|
+
# poetry install. See https://github.com/orgs/python-poetry/discussions/7937
|
80
|
+
'urllib3<2',
|
81
|
+
# NOTE: this installs CLI V1. To use AWS SSO (e.g., `aws sso login`), users
|
82
|
+
# should instead use CLI V2 which is not pip-installable. See
|
83
|
+
# https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html.
|
84
|
+
'awscli>=1.27.10',
|
85
|
+
'botocore>=1.29.10',
|
86
|
+
'boto3>=1.26.1',
|
87
|
+
# NOTE: required by awscli. To avoid ray automatically installing
|
88
|
+
# the latest version.
|
89
|
+
'colorama < 0.4.5',
|
90
|
+
]
|
91
|
+
|
92
|
+
# azure-cli cannot be installed normally by uv, so we need to work around it in
|
93
|
+
# a few places.
|
94
|
+
AZURE_CLI = 'azure-cli>=2.65.0'
|
95
|
+
|
96
|
+
extras_require: Dict[str, List[str]] = {
|
97
|
+
'aws': aws_dependencies,
|
98
|
+
# TODO(zongheng): azure-cli is huge and takes a long time to install.
|
99
|
+
# Tracked in: https://github.com/Azure/azure-cli/issues/7387
|
100
|
+
# azure-identity is needed in node_provider.
|
101
|
+
# We need azure-identity>=1.13.0 to enable the customization of the
|
102
|
+
# timeout of AzureCliCredential.
|
103
|
+
'azure': [
|
104
|
+
AZURE_CLI,
|
105
|
+
'azure-core>=1.31.0',
|
106
|
+
'azure-identity>=1.19.0',
|
107
|
+
'azure-mgmt-network>=27.0.0',
|
108
|
+
'azure-mgmt-compute>=33.0.0',
|
109
|
+
'azure-storage-blob>=12.23.1',
|
110
|
+
'msgraph-sdk',
|
111
|
+
] + local_ray,
|
112
|
+
# We need google-api-python-client>=2.69.0 to enable 'discardLocalSsd'
|
113
|
+
# parameter for stopping instances. Reference:
|
114
|
+
# https://github.com/googleapis/google-api-python-client/commit/f6e9d3869ed605b06f7cbf2e8cf2db25108506e6
|
115
|
+
'gcp': ['google-api-python-client>=2.69.0', 'google-cloud-storage'],
|
116
|
+
'ibm': [
|
117
|
+
'ibm-cloud-sdk-core', 'ibm-vpc', 'ibm-platform-services', 'ibm-cos-sdk'
|
118
|
+
] + local_ray,
|
119
|
+
'docker': ['docker'] + local_ray,
|
120
|
+
'lambda': local_ray,
|
121
|
+
'cloudflare': aws_dependencies,
|
122
|
+
'scp': local_ray,
|
123
|
+
'oci': ['oci'] + local_ray,
|
124
|
+
'kubernetes': ['kubernetes>=20.0.0'],
|
125
|
+
'remote': remote,
|
126
|
+
'runpod': ['runpod>=1.5.1'],
|
127
|
+
'fluidstack': [], # No dependencies needed for fluidstack
|
128
|
+
'cudo': ['cudo-compute>=0.1.10'],
|
129
|
+
'paperspace': [], # No dependencies needed for paperspace
|
130
|
+
'vsphere': [
|
131
|
+
'pyvmomi==8.0.1.0.2',
|
132
|
+
# vsphere-automation-sdk is also required, but it does not have
|
133
|
+
# pypi release, which cause failure of our pypi release.
|
134
|
+
# https://peps.python.org/pep-0440/#direct-references
|
135
|
+
# We have the instruction for its installation in our
|
136
|
+
# docs instead.
|
137
|
+
# 'vsphere-automation-sdk @ git+https://github.com/vmware/vsphere-automation-sdk-python.git@v8.0.1.0' pylint: disable=line-too-long
|
138
|
+
],
|
139
|
+
}
|
140
|
+
|
141
|
+
extras_require['all'] = sum(extras_require.values(), [])
|
sky/setup_files/setup.py
CHANGED
@@ -18,19 +18,28 @@ import io
|
|
18
18
|
import os
|
19
19
|
import platform
|
20
20
|
import re
|
21
|
+
import runpy
|
21
22
|
import subprocess
|
22
23
|
import sys
|
23
|
-
from typing import Dict, List
|
24
24
|
|
25
25
|
import setuptools
|
26
26
|
|
27
|
+
# __file__ is setup.py at the root of the repo. We shouldn't assume it's a
|
28
|
+
# symlink - e.g. in the sdist it's resolved to a normal file.
|
27
29
|
ROOT_DIR = os.path.dirname(__file__)
|
30
|
+
DEPENDENCIES_FILE_PATH = os.path.join(ROOT_DIR, 'sky', 'setup_files',
|
31
|
+
'dependencies.py')
|
28
32
|
INIT_FILE_PATH = os.path.join(ROOT_DIR, 'sky', '__init__.py')
|
29
33
|
_COMMIT_FAILURE_MESSAGE = (
|
30
34
|
'WARNING: SkyPilot fail to {verb} the commit hash in '
|
31
35
|
f'{INIT_FILE_PATH!r} (SkyPilot can still be normally used): '
|
32
36
|
'{error}')
|
33
37
|
|
38
|
+
# setuptools does not include the script dir on the search path, so we can't
|
39
|
+
# just do `import dependencies`. Instead, use runpy to manually load it. Note:
|
40
|
+
# dependencies here is a dict, not a module, so we access it by subscripting.
|
41
|
+
dependencies = runpy.run_path(DEPENDENCIES_FILE_PATH)
|
42
|
+
|
34
43
|
original_init_content = None
|
35
44
|
|
36
45
|
system = platform.system()
|
@@ -130,127 +139,6 @@ def parse_readme(readme: str) -> str:
|
|
130
139
|
return readme
|
131
140
|
|
132
141
|
|
133
|
-
install_requires = [
|
134
|
-
'wheel',
|
135
|
-
'cachetools',
|
136
|
-
# NOTE: ray requires click>=7.0.
|
137
|
-
'click >= 7.0',
|
138
|
-
'colorama',
|
139
|
-
'cryptography',
|
140
|
-
# Jinja has a bug in older versions because of the lack of pinning
|
141
|
-
# the version of the underlying markupsafe package. See:
|
142
|
-
# https://github.com/pallets/jinja/issues/1585
|
143
|
-
'jinja2 >= 3.0',
|
144
|
-
'jsonschema',
|
145
|
-
'networkx',
|
146
|
-
'pandas>=1.3.0',
|
147
|
-
'pendulum',
|
148
|
-
# PrettyTable with version >=2.0.0 is required for the support of
|
149
|
-
# `add_rows` method.
|
150
|
-
'PrettyTable >= 2.0.0',
|
151
|
-
'python-dotenv',
|
152
|
-
'rich',
|
153
|
-
'tabulate',
|
154
|
-
# Light weight requirement, can be replaced with "typing" once
|
155
|
-
# we deprecate Python 3.7 (this will take a while).
|
156
|
-
'typing_extensions',
|
157
|
-
'filelock >= 3.6.0',
|
158
|
-
'packaging',
|
159
|
-
'psutil',
|
160
|
-
'pulp',
|
161
|
-
# Cython 3.0 release breaks PyYAML 5.4.* (https://github.com/yaml/pyyaml/issues/601)
|
162
|
-
# <= 3.13 may encounter https://github.com/ultralytics/yolov5/issues/414
|
163
|
-
'pyyaml > 3.13, != 5.4.*',
|
164
|
-
'requests',
|
165
|
-
]
|
166
|
-
|
167
|
-
local_ray = [
|
168
|
-
# Lower version of ray will cause dependency conflict for
|
169
|
-
# click/grpcio/protobuf.
|
170
|
-
# Excluded 2.6.0 as it has a bug in the cluster launcher:
|
171
|
-
# https://github.com/ray-project/ray/releases/tag/ray-2.6.1
|
172
|
-
'ray[default] >= 2.2.0, != 2.6.0',
|
173
|
-
]
|
174
|
-
|
175
|
-
remote = [
|
176
|
-
# Adopted from ray's setup.py: https://github.com/ray-project/ray/blob/ray-2.4.0/python/setup.py
|
177
|
-
# SkyPilot: != 1.48.0 is required to avoid the error where ray dashboard fails to start when
|
178
|
-
# ray start is called (#2054).
|
179
|
-
# Tracking issue: https://github.com/ray-project/ray/issues/30984
|
180
|
-
"grpcio >= 1.32.0, <= 1.49.1, != 1.48.0; python_version < '3.10' and sys_platform == 'darwin'", # noqa:E501
|
181
|
-
"grpcio >= 1.42.0, <= 1.49.1, != 1.48.0; python_version >= '3.10' and sys_platform == 'darwin'", # noqa:E501
|
182
|
-
# Original issue: https://github.com/ray-project/ray/issues/33833
|
183
|
-
"grpcio >= 1.32.0, <= 1.51.3, != 1.48.0; python_version < '3.10' and sys_platform != 'darwin'", # noqa:E501
|
184
|
-
"grpcio >= 1.42.0, <= 1.51.3, != 1.48.0; python_version >= '3.10' and sys_platform != 'darwin'", # noqa:E501
|
185
|
-
# Adopted from ray's setup.py:
|
186
|
-
# https://github.com/ray-project/ray/blob/ray-2.9.3/python/setup.py#L343
|
187
|
-
'protobuf >= 3.15.3, != 3.19.5',
|
188
|
-
# Some pydantic versions are not compatible with ray. Adopted from ray's
|
189
|
-
# setup.py: https://github.com/ray-project/ray/blob/ray-2.9.3/python/setup.py#L254
|
190
|
-
'pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3',
|
191
|
-
]
|
192
|
-
|
193
|
-
# NOTE: Change the templates/jobs-controller.yaml.j2 file if any of the
|
194
|
-
# following packages dependencies are changed.
|
195
|
-
aws_dependencies = [
|
196
|
-
# botocore does not work with urllib3>=2.0.0, according to https://github.com/boto/botocore/issues/2926
|
197
|
-
# We have to explicitly pin the version to optimize the time for
|
198
|
-
# poetry install. See https://github.com/orgs/python-poetry/discussions/7937
|
199
|
-
'urllib3<2',
|
200
|
-
# NOTE: this installs CLI V1. To use AWS SSO (e.g., `aws sso login`), users
|
201
|
-
# should instead use CLI V2 which is not pip-installable. See
|
202
|
-
# https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html.
|
203
|
-
'awscli>=1.27.10',
|
204
|
-
'botocore>=1.29.10',
|
205
|
-
'boto3>=1.26.1',
|
206
|
-
# NOTE: required by awscli. To avoid ray automatically installing
|
207
|
-
# the latest version.
|
208
|
-
'colorama < 0.4.5',
|
209
|
-
]
|
210
|
-
|
211
|
-
extras_require: Dict[str, List[str]] = {
|
212
|
-
'aws': aws_dependencies,
|
213
|
-
# TODO(zongheng): azure-cli is huge and takes a long time to install.
|
214
|
-
# Tracked in: https://github.com/Azure/azure-cli/issues/7387
|
215
|
-
# azure-identity is needed in node_provider.
|
216
|
-
# We need azure-identity>=1.13.0 to enable the customization of the
|
217
|
-
# timeout of AzureCliCredential.
|
218
|
-
'azure': [
|
219
|
-
'azure-cli>=2.65.0', 'azure-core>=1.31.0', 'azure-identity>=1.19.0',
|
220
|
-
'azure-mgmt-network>=27.0.0', 'azure-mgmt-compute>=33.0.0',
|
221
|
-
'azure-storage-blob>=12.23.1', 'msgraph-sdk'
|
222
|
-
] + local_ray,
|
223
|
-
# We need google-api-python-client>=2.69.0 to enable 'discardLocalSsd'
|
224
|
-
# parameter for stopping instances.
|
225
|
-
# Reference: https://github.com/googleapis/google-api-python-client/commit/f6e9d3869ed605b06f7cbf2e8cf2db25108506e6
|
226
|
-
'gcp': ['google-api-python-client>=2.69.0', 'google-cloud-storage'],
|
227
|
-
'ibm': [
|
228
|
-
'ibm-cloud-sdk-core', 'ibm-vpc', 'ibm-platform-services', 'ibm-cos-sdk'
|
229
|
-
] + local_ray,
|
230
|
-
'docker': ['docker'] + local_ray,
|
231
|
-
'lambda': local_ray,
|
232
|
-
'cloudflare': aws_dependencies,
|
233
|
-
'scp': local_ray,
|
234
|
-
'oci': ['oci'] + local_ray,
|
235
|
-
'kubernetes': ['kubernetes>=20.0.0'],
|
236
|
-
'remote': remote,
|
237
|
-
'runpod': ['runpod>=1.5.1'],
|
238
|
-
'fluidstack': [], # No dependencies needed for fluidstack
|
239
|
-
'cudo': ['cudo-compute>=0.1.10'],
|
240
|
-
'paperspace': [], # No dependencies needed for paperspace
|
241
|
-
'vsphere': [
|
242
|
-
'pyvmomi==8.0.1.0.2',
|
243
|
-
# vsphere-automation-sdk is also required, but it does not have
|
244
|
-
# pypi release, which cause failure of our pypi release.
|
245
|
-
# https://peps.python.org/pep-0440/#direct-references
|
246
|
-
# We have the instruction for its installation in our
|
247
|
-
# docs instead.
|
248
|
-
# 'vsphere-automation-sdk @ git+https://github.com/vmware/vsphere-automation-sdk-python.git@v8.0.1.0'
|
249
|
-
],
|
250
|
-
}
|
251
|
-
|
252
|
-
extras_require['all'] = sum(extras_require.values(), [])
|
253
|
-
|
254
142
|
long_description = ''
|
255
143
|
readme_filepath = 'README.md'
|
256
144
|
# When sky/backends/wheel_utils.py builds wheels, it will not contain the
|
@@ -277,8 +165,8 @@ setuptools.setup(
|
|
277
165
|
long_description_content_type='text/markdown',
|
278
166
|
setup_requires=['wheel'],
|
279
167
|
requires_python='>=3.7',
|
280
|
-
install_requires=install_requires,
|
281
|
-
extras_require=extras_require,
|
168
|
+
install_requires=dependencies['install_requires'],
|
169
|
+
extras_require=dependencies['extras_require'],
|
282
170
|
entry_points={
|
283
171
|
'console_scripts': ['sky = sky.cli:cli'],
|
284
172
|
},
|
sky/skylet/constants.py
CHANGED
@@ -4,6 +4,7 @@ from typing import List, Tuple
|
|
4
4
|
from packaging import version
|
5
5
|
|
6
6
|
import sky
|
7
|
+
from sky.setup_files import dependencies
|
7
8
|
|
8
9
|
SKY_LOGS_DIRECTORY = '~/sky_logs'
|
9
10
|
SKY_REMOTE_WORKDIR = '~/sky_workdir'
|
@@ -39,6 +40,8 @@ SKY_GET_PYTHON_PATH_CMD = (f'[ -s {SKY_PYTHON_PATH_FILE} ] && '
|
|
39
40
|
'which python3')
|
40
41
|
# Python executable, e.g., /opt/conda/bin/python3
|
41
42
|
SKY_PYTHON_CMD = f'$({SKY_GET_PYTHON_PATH_CMD})'
|
43
|
+
# Prefer SKY_UV_PIP_CMD, which is faster.
|
44
|
+
# TODO(cooperc): remove remaining usage (GCP TPU setup).
|
42
45
|
SKY_PIP_CMD = f'{SKY_PYTHON_CMD} -m pip'
|
43
46
|
# Ray executable, e.g., /opt/conda/bin/ray
|
44
47
|
# We need to add SKY_PYTHON_CMD before ray executable because:
|
@@ -50,6 +53,14 @@ SKY_RAY_CMD = (f'{SKY_PYTHON_CMD} $([ -s {SKY_RAY_PATH_FILE} ] && '
|
|
50
53
|
SKY_REMOTE_PYTHON_ENV_NAME = 'skypilot-runtime'
|
51
54
|
SKY_REMOTE_PYTHON_ENV = f'~/{SKY_REMOTE_PYTHON_ENV_NAME}'
|
52
55
|
ACTIVATE_SKY_REMOTE_PYTHON_ENV = f'source {SKY_REMOTE_PYTHON_ENV}/bin/activate'
|
56
|
+
# uv is used for venv and pip, much faster than python implementations.
|
57
|
+
SKY_UV_INSTALL_DIR = '"$HOME/.local/bin"'
|
58
|
+
SKY_UV_CMD = f'{SKY_UV_INSTALL_DIR}/uv'
|
59
|
+
# This won't reinstall uv if it's already installed, so it's safe to re-run.
|
60
|
+
SKY_UV_INSTALL_CMD = (f'{SKY_UV_CMD} -V >/dev/null 2>&1 || '
|
61
|
+
'curl -LsSf https://astral.sh/uv/install.sh '
|
62
|
+
f'| UV_INSTALL_DIR={SKY_UV_INSTALL_DIR} sh')
|
63
|
+
SKY_UV_PIP_CMD = f'VIRTUAL_ENV={SKY_REMOTE_PYTHON_ENV} {SKY_UV_CMD} pip'
|
53
64
|
# Deleting the SKY_REMOTE_PYTHON_ENV_NAME from the PATH to deactivate the
|
54
65
|
# environment. `deactivate` command does not work when conda is used.
|
55
66
|
DEACTIVATE_SKY_REMOTE_PYTHON_ENV = (
|
@@ -148,28 +159,30 @@ CONDA_INSTALLATION_COMMANDS = (
|
|
148
159
|
'echo "Creating conda env with Python 3.10" && '
|
149
160
|
f'conda create -y -n {SKY_REMOTE_PYTHON_ENV_NAME} python=3.10 && '
|
150
161
|
f'conda activate {SKY_REMOTE_PYTHON_ENV_NAME};'
|
162
|
+
# Install uv for venv management and pip installation.
|
163
|
+
f'{SKY_UV_INSTALL_CMD};'
|
151
164
|
# Create a separate conda environment for SkyPilot dependencies.
|
152
165
|
f'[ -d {SKY_REMOTE_PYTHON_ENV} ] || '
|
153
166
|
# Do NOT use --system-site-packages here, because if users upgrade any
|
154
167
|
# packages in the base env, they interfere with skypilot dependencies.
|
155
168
|
# Reference: https://github.com/skypilot-org/skypilot/issues/4097
|
156
|
-
|
169
|
+
# --seed will include pip and setuptools, which are present in venvs created
|
170
|
+
# with python -m venv.
|
171
|
+
f'{SKY_UV_CMD} venv --seed {SKY_REMOTE_PYTHON_ENV};'
|
157
172
|
f'echo "$(echo {SKY_REMOTE_PYTHON_ENV})/bin/python" > {SKY_PYTHON_PATH_FILE};'
|
158
173
|
)
|
159
174
|
|
160
175
|
_sky_version = str(version.parse(sky.__version__))
|
161
176
|
RAY_STATUS = f'RAY_ADDRESS=127.0.0.1:{SKY_REMOTE_RAY_PORT} {SKY_RAY_CMD} status'
|
162
177
|
RAY_INSTALLATION_COMMANDS = (
|
178
|
+
f'{SKY_UV_INSTALL_CMD};'
|
163
179
|
'mkdir -p ~/sky_workdir && mkdir -p ~/.sky/sky_app;'
|
164
|
-
# Disable the pip version check to avoid the warning message, which makes
|
165
|
-
# the output hard to read.
|
166
|
-
'export PIP_DISABLE_PIP_VERSION_CHECK=1;'
|
167
180
|
# Print the PATH in provision.log to help debug PATH issues.
|
168
181
|
'echo PATH=$PATH; '
|
169
182
|
# Install setuptools<=69.5.1 to avoid the issue with the latest setuptools
|
170
183
|
# causing the error:
|
171
184
|
# ImportError: cannot import name 'packaging' from 'pkg_resources'"
|
172
|
-
f'{
|
185
|
+
f'{SKY_UV_PIP_CMD} install "setuptools<70"; '
|
173
186
|
# Backward compatibility for ray upgrade (#3248): do not upgrade ray if the
|
174
187
|
# ray cluster is already running, to avoid the ray cluster being restarted.
|
175
188
|
#
|
@@ -183,10 +196,10 @@ RAY_INSTALLATION_COMMANDS = (
|
|
183
196
|
# latest ray port 6380, but those existing cluster launched before #1790
|
184
197
|
# that has ray cluster on the default port 6379 will be upgraded and
|
185
198
|
# restarted.
|
186
|
-
f'{
|
199
|
+
f'{SKY_UV_PIP_CMD} list | grep "ray " | '
|
187
200
|
f'grep {SKY_REMOTE_RAY_VERSION} 2>&1 > /dev/null '
|
188
201
|
f'|| {RAY_STATUS} || '
|
189
|
-
f'{
|
202
|
+
f'{SKY_UV_PIP_CMD} install -U ray[default]=={SKY_REMOTE_RAY_VERSION}; ' # pylint: disable=line-too-long
|
190
203
|
# In some envs, e.g. pip does not have permission to write under /opt/conda
|
191
204
|
# ray package will be installed under ~/.local/bin. If the user's PATH does
|
192
205
|
# not include ~/.local/bin (the pip install will have the output: `WARNING:
|
@@ -202,10 +215,22 @@ RAY_INSTALLATION_COMMANDS = (
|
|
202
215
|
f'which ray > {SKY_RAY_PATH_FILE} || exit 1; }}; ')
|
203
216
|
|
204
217
|
SKYPILOT_WHEEL_INSTALLATION_COMMANDS = (
|
205
|
-
f'{
|
218
|
+
f'{SKY_UV_INSTALL_CMD};'
|
219
|
+
f'{{ {SKY_UV_PIP_CMD} list | grep "skypilot " && '
|
206
220
|
'[ "$(cat ~/.sky/wheels/current_sky_wheel_hash)" == "{sky_wheel_hash}" ]; } || ' # pylint: disable=line-too-long
|
207
|
-
f'{{ {
|
208
|
-
|
221
|
+
f'{{ {SKY_UV_PIP_CMD} uninstall skypilot; '
|
222
|
+
# uv cannot install azure-cli normally, since it depends on pre-release
|
223
|
+
# packages. Manually install azure-cli with the --prerelease=allow flag
|
224
|
+
# first. This will allow skypilot to successfully install. See
|
225
|
+
# https://docs.astral.sh/uv/pip/compatibility/#pre-release-compatibility.
|
226
|
+
# We don't want to use --prerelease=allow for all packages, because it will
|
227
|
+
# cause uv to use pre-releases for some other packages that have sufficient
|
228
|
+
# stable releases.
|
229
|
+
'if [ "{cloud}" = "azure" ]; then '
|
230
|
+
f'{SKY_UV_PIP_CMD} install --prerelease=allow "{dependencies.AZURE_CLI}";'
|
231
|
+
'fi;'
|
232
|
+
# Install skypilot from wheel
|
233
|
+
f'{SKY_UV_PIP_CMD} install "$(echo ~/.sky/wheels/{{sky_wheel_hash}}/'
|
209
234
|
f'skypilot-{_sky_version}*.whl)[{{cloud}}, remote]" && '
|
210
235
|
'echo "{sky_wheel_hash}" > ~/.sky/wheels/current_sky_wheel_hash || '
|
211
236
|
'exit 1; }; ')
|
@@ -220,7 +245,7 @@ RAY_SKYPILOT_INSTALLATION_COMMANDS = (
|
|
220
245
|
# The ray installation above can be skipped due to the existing ray cluster
|
221
246
|
# for backward compatibility. In this case, we should not patch the ray
|
222
247
|
# files.
|
223
|
-
f'{
|
248
|
+
f'{SKY_UV_PIP_CMD} list | grep "ray " | '
|
224
249
|
f'grep {SKY_REMOTE_RAY_VERSION} 2>&1 > /dev/null && '
|
225
250
|
f'{{ {SKY_PYTHON_CMD} -c '
|
226
251
|
'"from sky.skylet.ray_patches import patch; patch()" || exit 1; }; ')
|
sky/skylet/log_lib.py
CHANGED
@@ -34,6 +34,8 @@ PEEK_HEAD_LINES_FOR_START_STREAM = 20
|
|
34
34
|
|
35
35
|
logger = sky_logging.init_logger(__name__)
|
36
36
|
|
37
|
+
LOG_FILE_START_STREAMING_AT = 'Waiting for task resources on '
|
38
|
+
|
37
39
|
|
38
40
|
class _ProcessingArgs:
|
39
41
|
"""Arguments for processing logs."""
|
@@ -435,7 +437,7 @@ def tail_logs(job_id: Optional[int],
|
|
435
437
|
time.sleep(_SKY_LOG_WAITING_GAP_SECONDS)
|
436
438
|
status = job_lib.update_job_status([job_id], silent=True)[0]
|
437
439
|
|
438
|
-
start_stream_at =
|
440
|
+
start_stream_at = LOG_FILE_START_STREAMING_AT
|
439
441
|
# Explicitly declare the type to avoid mypy warning.
|
440
442
|
lines: Iterable[str] = []
|
441
443
|
if follow and status in [
|
sky/skylet/log_lib.pyi
CHANGED
@@ -414,7 +414,7 @@ available_node_types:
|
|
414
414
|
done
|
415
415
|
{{ conda_installation_commands }}
|
416
416
|
{{ ray_installation_commands }}
|
417
|
-
|
417
|
+
VIRTUAL_ENV=~/skypilot-runtime ~/.local/bin/uv pip install skypilot[kubernetes,remote]
|
418
418
|
touch /tmp/ray_skypilot_installation_complete
|
419
419
|
echo "=== Ray and skypilot installation completed ==="
|
420
420
|
|