skypilot-nightly 1.0.0.dev20241004__py3-none-any.whl → 1.0.0.dev20241006__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/jobs/state.py +12 -4
- sky/serve/autoscalers.py +4 -0
- sky/serve/controller.py +10 -8
- sky/serve/load_balancer.py +1 -1
- sky/serve/replica_managers.py +3 -1
- sky/serve/serve_state.py +12 -4
- sky/serve/service.py +12 -10
- {skypilot_nightly-1.0.0.dev20241004.dist-info → skypilot_nightly-1.0.0.dev20241006.dist-info}/METADATA +2 -2
- {skypilot_nightly-1.0.0.dev20241004.dist-info → skypilot_nightly-1.0.0.dev20241006.dist-info}/RECORD +14 -14
- {skypilot_nightly-1.0.0.dev20241004.dist-info → skypilot_nightly-1.0.0.dev20241006.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20241004.dist-info → skypilot_nightly-1.0.0.dev20241006.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20241004.dist-info → skypilot_nightly-1.0.0.dev20241006.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20241004.dist-info → skypilot_nightly-1.0.0.dev20241006.dist-info}/top_level.txt +0 -0
sky/__init__.py
CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
|
|
5
5
|
import urllib.request
|
6
6
|
|
7
7
|
# Replaced with the current commit when building the wheels.
|
8
|
-
_SKYPILOT_COMMIT_SHA = '
|
8
|
+
_SKYPILOT_COMMIT_SHA = 'f4886bed755a3a6ba62554ef359fbe1dcd174d78'
|
9
9
|
|
10
10
|
|
11
11
|
def _get_git_commit():
|
@@ -35,7 +35,7 @@ def _get_git_commit():
|
|
35
35
|
|
36
36
|
|
37
37
|
__commit__ = _get_git_commit()
|
38
|
-
__version__ = '1.0.0.
|
38
|
+
__version__ = '1.0.0.dev20241006'
|
39
39
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
40
40
|
|
41
41
|
|
sky/jobs/state.py
CHANGED
@@ -20,10 +20,18 @@ CallbackType = Callable[[str], None]
|
|
20
20
|
|
21
21
|
logger = sky_logging.init_logger(__name__)
|
22
22
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
_DB_PATH
|
23
|
+
|
24
|
+
def _get_db_path() -> str:
|
25
|
+
"""Workaround to collapse multi-step Path ops for type checker.
|
26
|
+
Ensures _DB_PATH is str, avoiding Union[Path, str] inference.
|
27
|
+
"""
|
28
|
+
path = pathlib.Path('~/.sky/spot_jobs.db')
|
29
|
+
path = path.expanduser().absolute()
|
30
|
+
path.parents[0].mkdir(parents=True, exist_ok=True)
|
31
|
+
return str(path)
|
32
|
+
|
33
|
+
|
34
|
+
_DB_PATH = _get_db_path()
|
27
35
|
|
28
36
|
# Module-level connection/cursor; thread-safe as the module is only imported
|
29
37
|
# once.
|
sky/serve/autoscalers.py
CHANGED
@@ -131,6 +131,10 @@ class Autoscaler:
|
|
131
131
|
"""Load dynamic states to autoscaler."""
|
132
132
|
raise NotImplementedError
|
133
133
|
|
134
|
+
def get_decision_interval(self) -> int:
|
135
|
+
"""Get the decision interval for the autoscaler."""
|
136
|
+
raise NotImplementedError
|
137
|
+
|
134
138
|
def load_dynamic_states(self, dynamic_states: Dict[str, Any]) -> None:
|
135
139
|
"""Load dynamic states to autoscaler."""
|
136
140
|
self.latest_version_ever_ready = dynamic_states.pop(
|
sky/serve/controller.py
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
Responsible for autoscaling and replica management.
|
4
4
|
"""
|
5
|
+
import contextlib
|
5
6
|
import logging
|
6
7
|
import threading
|
7
8
|
import time
|
@@ -49,7 +50,14 @@ class SkyServeController:
|
|
49
50
|
autoscalers.Autoscaler.from_spec(service_name, service_spec))
|
50
51
|
self._host = host
|
51
52
|
self._port = port
|
52
|
-
self._app = fastapi.FastAPI()
|
53
|
+
self._app = fastapi.FastAPI(lifespan=self.lifespan)
|
54
|
+
|
55
|
+
@contextlib.asynccontextmanager
|
56
|
+
async def lifespan(self, _: fastapi.FastAPI):
|
57
|
+
uvicorn_access_logger = logging.getLogger('uvicorn.access')
|
58
|
+
for handler in uvicorn_access_logger.handlers:
|
59
|
+
handler.setFormatter(sky_logging.FORMATTER)
|
60
|
+
yield
|
53
61
|
|
54
62
|
def _run_autoscaler(self):
|
55
63
|
logger.info('Starting autoscaler.')
|
@@ -142,18 +150,12 @@ class SkyServeController:
|
|
142
150
|
f'{common_utils.format_exception(e)}')
|
143
151
|
return {'message': 'Error'}
|
144
152
|
|
145
|
-
@self._app.on_event('startup')
|
146
|
-
def configure_logger():
|
147
|
-
uvicorn_access_logger = logging.getLogger('uvicorn.access')
|
148
|
-
for handler in uvicorn_access_logger.handlers:
|
149
|
-
handler.setFormatter(sky_logging.FORMATTER)
|
150
|
-
|
151
153
|
threading.Thread(target=self._run_autoscaler).start()
|
152
154
|
|
153
155
|
logger.info('SkyServe Controller started on '
|
154
156
|
f'http://{self._host}:{self._port}')
|
155
157
|
|
156
|
-
uvicorn.run(self._app, host=
|
158
|
+
uvicorn.run(self._app, host=self._host, port=self._port)
|
157
159
|
|
158
160
|
|
159
161
|
# TODO(tian): Probably we should support service that will stop the VM in
|
sky/serve/load_balancer.py
CHANGED
sky/serve/replica_managers.py
CHANGED
@@ -36,6 +36,7 @@ from sky.utils import env_options
|
|
36
36
|
from sky.utils import ux_utils
|
37
37
|
|
38
38
|
if typing.TYPE_CHECKING:
|
39
|
+
from sky import resources
|
39
40
|
from sky.serve import service_spec
|
40
41
|
|
41
42
|
logger = sky_logging.init_logger(__name__)
|
@@ -172,9 +173,10 @@ def _get_resources_ports(task_yaml: str) -> str:
|
|
172
173
|
task = sky.Task.from_yaml(task_yaml)
|
173
174
|
# Already checked all ports are the same in sky.serve.core.up
|
174
175
|
assert len(task.resources) >= 1, task
|
175
|
-
task_resources = list(task.resources)[0]
|
176
|
+
task_resources: 'resources.Resources' = list(task.resources)[0]
|
176
177
|
# Already checked the resources have and only have one port
|
177
178
|
# before upload the task yaml.
|
179
|
+
assert task_resources.ports is not None
|
178
180
|
return task_resources.ports[0]
|
179
181
|
|
180
182
|
|
sky/serve/serve_state.py
CHANGED
@@ -17,10 +17,18 @@ if typing.TYPE_CHECKING:
|
|
17
17
|
from sky.serve import replica_managers
|
18
18
|
from sky.serve import service_spec
|
19
19
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
_DB_PATH
|
20
|
+
|
21
|
+
def _get_db_path() -> str:
|
22
|
+
"""Workaround to collapse multi-step Path ops for type checker.
|
23
|
+
Ensures _DB_PATH is str, avoiding Union[Path, str] inference.
|
24
|
+
"""
|
25
|
+
path = pathlib.Path(constants.SKYSERVE_METADATA_DIR) / 'services.db'
|
26
|
+
path = path.expanduser().absolute()
|
27
|
+
path.parents[0].mkdir(parents=True, exist_ok=True)
|
28
|
+
return str(path)
|
29
|
+
|
30
|
+
|
31
|
+
_DB_PATH: str = _get_db_path()
|
24
32
|
|
25
33
|
|
26
34
|
def create_table(cursor: 'sqlite3.Cursor', conn: 'sqlite3.Connection') -> None:
|
sky/serve/service.py
CHANGED
@@ -9,7 +9,7 @@ import pathlib
|
|
9
9
|
import shutil
|
10
10
|
import time
|
11
11
|
import traceback
|
12
|
-
from typing import Dict
|
12
|
+
from typing import Dict
|
13
13
|
|
14
14
|
import filelock
|
15
15
|
|
@@ -116,15 +116,17 @@ def _cleanup(service_name: str) -> bool:
|
|
116
116
|
logger.error(f'Replica {info.replica_id} failed to terminate.')
|
117
117
|
versions = serve_state.get_service_versions(service_name)
|
118
118
|
serve_state.remove_service_versions(service_name)
|
119
|
-
|
120
|
-
|
119
|
+
|
120
|
+
def cleanup_version_storage(version: int) -> bool:
|
121
121
|
task_yaml: str = serve_utils.generate_task_yaml_file_name(
|
122
122
|
service_name, version)
|
123
123
|
logger.info(f'Cleaning up storage for version {version}, '
|
124
124
|
f'task_yaml: {task_yaml}')
|
125
|
-
|
126
|
-
|
125
|
+
return cleanup_storage(task_yaml)
|
126
|
+
|
127
|
+
if not all(map(cleanup_version_storage, versions)):
|
127
128
|
failed = True
|
129
|
+
|
128
130
|
return failed
|
129
131
|
|
130
132
|
|
@@ -213,6 +215,7 @@ def _start(service_name: str, tmp_task_yaml: str, job_id: int):
|
|
213
215
|
|
214
216
|
# TODO(tian): Support HTTPS.
|
215
217
|
controller_addr = f'http://{controller_host}:{controller_port}'
|
218
|
+
|
216
219
|
load_balancer_port = common_utils.find_free_port(
|
217
220
|
constants.LOAD_BALANCER_PORT_START)
|
218
221
|
|
@@ -236,13 +239,12 @@ def _start(service_name: str, tmp_task_yaml: str, job_id: int):
|
|
236
239
|
serve_state.set_service_status_and_active_versions(
|
237
240
|
service_name, serve_state.ServiceStatus.SHUTTING_DOWN)
|
238
241
|
finally:
|
239
|
-
process_to_kill: List[multiprocessing.Process] = []
|
240
|
-
if load_balancer_process is not None:
|
241
|
-
process_to_kill.append(load_balancer_process)
|
242
|
-
if controller_process is not None:
|
243
|
-
process_to_kill.append(controller_process)
|
244
242
|
# Kill load balancer process first since it will raise errors if failed
|
245
243
|
# to connect to the controller. Then the controller process.
|
244
|
+
process_to_kill = [
|
245
|
+
proc for proc in [load_balancer_process, controller_process]
|
246
|
+
if proc is not None
|
247
|
+
]
|
246
248
|
subprocess_utils.kill_children_processes(
|
247
249
|
[process.pid for process in process_to_kill], force=True)
|
248
250
|
for process in process_to_kill:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: skypilot-nightly
|
3
|
-
Version: 1.0.0.
|
3
|
+
Version: 1.0.0.dev20241006
|
4
4
|
Summary: SkyPilot: An intercloud broker for the clouds
|
5
5
|
Author: SkyPilot Team
|
6
6
|
License: Apache 2.0
|
@@ -153,7 +153,7 @@ Requires-Dist: pyvmomi==8.0.1.0.2; extra == "vsphere"
|
|
153
153
|
|
154
154
|
----
|
155
155
|
:fire: *News* :fire:
|
156
|
-
- [Sep, 2024] Point, Launch and Serve **Llama 3.2** on
|
156
|
+
- [Sep, 2024] Point, Launch and Serve **Llama 3.2** on Kubernetes or Any Cloud: [**example**](./llm/llama-3_2/)
|
157
157
|
- [Sep, 2024] Run and deploy [**Pixtral**](./llm/pixtral), the first open-source multimodal model from Mistral AI.
|
158
158
|
- [Jul, 2024] [**Finetune**](./llm/llama-3_1-finetuning/) and [**serve**](./llm/llama-3_1/) **Llama 3.1** on your infra
|
159
159
|
- [Jun, 2024] Reproduce **GPT** with [llm.c](https://github.com/karpathy/llm.c/discussions/481) on any cloud: [**guide**](./llm/gpt-2/)
|
{skypilot_nightly-1.0.0.dev20241004.dist-info → skypilot_nightly-1.0.0.dev20241006.dist-info}/RECORD
RENAMED
@@ -1,4 +1,4 @@
|
|
1
|
-
sky/__init__.py,sha256=
|
1
|
+
sky/__init__.py,sha256=Xtdc-QY_FgNqcqj_CNhowHLdYnKutlnzRxVUFB21BJc,5854
|
2
2
|
sky/admin_policy.py,sha256=hPo02f_A32gCqhUueF0QYy1fMSSKqRwYEg_9FxScN_s,3248
|
3
3
|
sky/authentication.py,sha256=TfKkVnmRIetATSEVQFp-rOOIRGqVig2i8faSQQt_ixA,20974
|
4
4
|
sky/check.py,sha256=jLMIIJrseaZj1_o5WkbaD9XdyXIlCaT6pyAaIFdhdmA,9079
|
@@ -98,7 +98,7 @@ sky/jobs/constants.py,sha256=YLgcCg_RHSYr_rfsI_4UIdXk78KKKOK29Oem88t5j8I,1350
|
|
98
98
|
sky/jobs/controller.py,sha256=k28bbicxtML6p1YxSetk-1nhBHPCubpvLWJsh7TtU9c,26701
|
99
99
|
sky/jobs/core.py,sha256=Q5ExRWnF7yAYWJxwnB9NfAGBVDNqKYBCrWsypiMLCpY,13637
|
100
100
|
sky/jobs/recovery_strategy.py,sha256=G3iFicEajB-l9FefvcqjqPIazb1X8BJ_AgVmD5bDV2w,25556
|
101
|
-
sky/jobs/state.py,sha256=
|
101
|
+
sky/jobs/state.py,sha256=C6R5Yq7ftBqGPa_71tUjflBMKAaJ1FTTdbgjAwmbJsI,23231
|
102
102
|
sky/jobs/utils.py,sha256=ZB2dJxtJ4hbCRdxHmy8wrmtXIvvGGE80kk5BQTOQWkQ,35653
|
103
103
|
sky/jobs/dashboard/dashboard.py,sha256=HFShuaxKir97QTeK2x37h6bsY6ncaFaNEg1USZqJPdc,3050
|
104
104
|
sky/jobs/dashboard/static/favicon.ico,sha256=uYlvgxSM7gjBmXpZ8wydvZUPAbJiiix-rc2Xe5mma9s,15086
|
@@ -169,16 +169,16 @@ sky/provision/vsphere/common/ssl_helper.py,sha256=TYzN9K0i_Mk_17PKGyGPgvOGfoizys
|
|
169
169
|
sky/provision/vsphere/common/vapiconnect.py,sha256=R2I1ZWBA19d11fZ_FrIzQT8E1aLl1HU4Rdcj8Z5r3NE,2932
|
170
170
|
sky/provision/vsphere/common/vim_utils.py,sha256=EMWLS8ILpdx6XwUZ9I53y0B_1yFrRrlr4jjIMT84hAc,17877
|
171
171
|
sky/serve/__init__.py,sha256=Qg_XPOtQsUxiN-Q3njHZRfzoMcQ_KKU1QthkiTbESDw,1661
|
172
|
-
sky/serve/autoscalers.py,sha256=
|
172
|
+
sky/serve/autoscalers.py,sha256=khY1oZ22PRaUQNsLCoNKH178X_NiJw0LSLOKr7_LNgY,30275
|
173
173
|
sky/serve/constants.py,sha256=OansIC7a0Pwat-Y5SF43T9phad_EvyjKO3peZgKFEHk,4367
|
174
|
-
sky/serve/controller.py,sha256=
|
174
|
+
sky/serve/controller.py,sha256=NgJqRESccAQCUfW6yoxZbIOxzdsByVuRqTBCyvfmyOI,7706
|
175
175
|
sky/serve/core.py,sha256=cW2SNMPMbGtOcqASHnL__B12BCIErUilGtFw3olQbjk,28947
|
176
|
-
sky/serve/load_balancer.py,sha256=
|
176
|
+
sky/serve/load_balancer.py,sha256=aUfDsgUT_fYrchCwJCeunMPXmAkwJAY58BEu-IN2FaA,11571
|
177
177
|
sky/serve/load_balancing_policies.py,sha256=ExdwH_pxPYpJ6CkoTQCOPSa4lzwbq1LFFMKzmIu8ryk,2331
|
178
|
-
sky/serve/replica_managers.py,sha256=
|
179
|
-
sky/serve/serve_state.py,sha256=
|
178
|
+
sky/serve/replica_managers.py,sha256=dO962WZ_6YWRDpyNemY7SzC7fZHlNfoL4kUS3MaKwDo,57405
|
179
|
+
sky/serve/serve_state.py,sha256=5BZSKKKxQRk-0mku17Ch4Veu4qOhaFvaOJY3zrZCkLw,19315
|
180
180
|
sky/serve/serve_utils.py,sha256=scZzEpJtk-Si05VuzIB212DPovuMHFV2b-T-xWNZ2Yw,37785
|
181
|
-
sky/serve/service.py,sha256=
|
181
|
+
sky/serve/service.py,sha256=fkfJvNJ2BO6rfV0TblZG-QkOXaCyZlpkwbGgrsTzf2w,11872
|
182
182
|
sky/serve/service_spec.py,sha256=iRhW95SERvb4NWtV10uCuhgvW31HuSAmZZ55OX0WK8s,15309
|
183
183
|
sky/setup_files/MANIFEST.in,sha256=BAR1TfVIHwBFfV3najggE8HDXTJyO3fNN0Yhu5aTitI,634
|
184
184
|
sky/setup_files/setup.py,sha256=o4IgiwFoTB6Sdn3MmOirUIS0OSkoh6qo_0vrgcmrYA4,12093
|
@@ -273,9 +273,9 @@ sky/utils/kubernetes/k8s_gpu_labeler_job.yaml,sha256=KPqp23B-zQ2SZK03jdHeF9fLTog
|
|
273
273
|
sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml,sha256=VLKT2KKimZu1GDg_4AIlIt488oMQvhRZWwsj9vBbPUg,3812
|
274
274
|
sky/utils/kubernetes/rsync_helper.sh,sha256=Ma-N9a271fTfdgP5-8XIQL7KPf8IPUo-uY004PCdUFo,747
|
275
275
|
sky/utils/kubernetes/ssh_jump_lifecycle_manager.py,sha256=RFLJ3k7MR5UN4SKHykQ0lV9SgXumoULpKYIAt1vh-HU,6560
|
276
|
-
skypilot_nightly-1.0.0.
|
277
|
-
skypilot_nightly-1.0.0.
|
278
|
-
skypilot_nightly-1.0.0.
|
279
|
-
skypilot_nightly-1.0.0.
|
280
|
-
skypilot_nightly-1.0.0.
|
281
|
-
skypilot_nightly-1.0.0.
|
276
|
+
skypilot_nightly-1.0.0.dev20241006.dist-info/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
|
277
|
+
skypilot_nightly-1.0.0.dev20241006.dist-info/METADATA,sha256=2utuR-W951m23X1syJR0VXTp8XiM4Gta_L2jnGEFt38,18945
|
278
|
+
skypilot_nightly-1.0.0.dev20241006.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
279
|
+
skypilot_nightly-1.0.0.dev20241006.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
|
280
|
+
skypilot_nightly-1.0.0.dev20241006.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
|
281
|
+
skypilot_nightly-1.0.0.dev20241006.dist-info/RECORD,,
|
File without changes
|
{skypilot_nightly-1.0.0.dev20241004.dist-info → skypilot_nightly-1.0.0.dev20241006.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|
File without changes
|