b10-transfer 0.1.8__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {b10_transfer-0.1.8 → b10_transfer-0.2.0}/PKG-INFO +1 -1
- {b10_transfer-0.1.8 → b10_transfer-0.2.0}/pyproject.toml +4 -1
- {b10_transfer-0.1.8 → b10_transfer-0.2.0}/src/b10_transfer/__init__.py +1 -1
- b10_transfer-0.2.0/src/b10_transfer/cache_cli.py +124 -0
- {b10_transfer-0.1.8 → b10_transfer-0.2.0}/README.md +0 -0
- {b10_transfer-0.1.8 → b10_transfer-0.2.0}/src/b10_transfer/archive.py +0 -0
- {b10_transfer-0.1.8 → b10_transfer-0.2.0}/src/b10_transfer/cache.py +0 -0
- {b10_transfer-0.1.8 → b10_transfer-0.2.0}/src/b10_transfer/cleanup.py +0 -0
- {b10_transfer-0.1.8 → b10_transfer-0.2.0}/src/b10_transfer/constants.py +0 -0
- {b10_transfer-0.1.8 → b10_transfer-0.2.0}/src/b10_transfer/core.py +0 -0
- {b10_transfer-0.1.8 → b10_transfer-0.2.0}/src/b10_transfer/environment.py +0 -0
- {b10_transfer-0.1.8 → b10_transfer-0.2.0}/src/b10_transfer/info.py +0 -0
- {b10_transfer-0.1.8 → b10_transfer-0.2.0}/src/b10_transfer/logging_utils.py +0 -0
- {b10_transfer-0.1.8 → b10_transfer-0.2.0}/src/b10_transfer/space_monitor.py +0 -0
- {b10_transfer-0.1.8 → b10_transfer-0.2.0}/src/b10_transfer/utils.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: b10-transfer
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.2.0
|
4
4
|
Summary: Distributed PyTorch file transfer for Baseten - Environment-aware, lock-free file transfer management
|
5
5
|
License: MIT
|
6
6
|
Keywords: pytorch,file-transfer,cache,machine-learning,inference
|
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
|
4
4
|
|
5
5
|
[tool.poetry]
|
6
6
|
name = "b10-transfer"
|
7
|
-
version = "0.
|
7
|
+
version = "0.2.0"
|
8
8
|
description = "Distributed PyTorch file transfer for Baseten - Environment-aware, lock-free file transfer management"
|
9
9
|
authors = ["Shounak Ray <shounak.noreply@baseten.co>", "Fred Liu <fred.liu.noreply@baseten.co>"]
|
10
10
|
maintainers = ["Fred Liu <fred.liu.noreply@baseten.co>", "Shounak Ray <shounak.noreply@baseten.co>"]
|
@@ -28,6 +28,9 @@ classifiers = [
|
|
28
28
|
]
|
29
29
|
packages = [{include = "b10_transfer", from = "src"}]
|
30
30
|
|
31
|
+
[tool.poetry.scripts]
|
32
|
+
b10-transfer = "b10_transfer.cache_cli:main"
|
33
|
+
|
31
34
|
[tool.poetry.dependencies]
|
32
35
|
python = "^3.9"
|
33
36
|
torch = ">=2.0.0"
|
@@ -0,0 +1,124 @@
|
|
1
|
+
# src/b10_tcache/cli.py
|
2
|
+
from __future__ import annotations
|
3
|
+
|
4
|
+
import logging
|
5
|
+
import os
|
6
|
+
import sys
|
7
|
+
import time
|
8
|
+
import urllib.error
|
9
|
+
import urllib.request
|
10
|
+
from dataclasses import dataclass
|
11
|
+
|
12
|
+
|
13
|
+
@dataclass(frozen=True)
|
14
|
+
class WaitCfg:
|
15
|
+
url: str
|
16
|
+
timeout_s: float
|
17
|
+
interval_s: float
|
18
|
+
loglevel: str
|
19
|
+
|
20
|
+
|
21
|
+
DEFAULT_URL = os.getenv("B10_TRANSFER_VLLM_URL", "http://127.0.0.1:8000/v1/models")
|
22
|
+
DEFAULT_TIMEOUT_S = float(os.getenv("B10_TRANSFER_TIMEOUT_S", "1800")) # 30m default
|
23
|
+
DEFAULT_INTERVAL_S = float(os.getenv("B10_TRANSFER_INTERVAL_S", "2"))
|
24
|
+
DEFAULT_LOGLEVEL = os.getenv("B10_TRANSFER_CLI_LOGLEVEL", "INFO").upper()
|
25
|
+
|
26
|
+
|
27
|
+
VLLM_CACHE_DIR = os.getenv("VLLM_CACHE_ROOT", "~/.cache/vllm")
|
28
|
+
|
29
|
+
|
30
|
+
def _setup_logging(level: str) -> logging.Logger:
|
31
|
+
logging.basicConfig(
|
32
|
+
level=getattr(logging, level, logging.INFO),
|
33
|
+
format="%(asctime)s | %(levelname)s | %(message)s",
|
34
|
+
)
|
35
|
+
return logging.getLogger("b10_transfer.cli")
|
36
|
+
|
37
|
+
|
38
|
+
def _http_ok(url: str, logger: logging.Logger) -> bool:
|
39
|
+
"""
|
40
|
+
Return True if vLLM readiness looks good.
|
41
|
+
|
42
|
+
We consider it 'ready' if GET <url> returns 200.
|
43
|
+
"""
|
44
|
+
try:
|
45
|
+
req = urllib.request.Request(url, method="GET")
|
46
|
+
with urllib.request.urlopen(req, timeout=5) as resp:
|
47
|
+
if resp.status != 200:
|
48
|
+
return False
|
49
|
+
return True
|
50
|
+
except (urllib.error.URLError, urllib.error.HTTPError) as e:
|
51
|
+
logger.debug("Readiness probe failed: %s", e)
|
52
|
+
return False
|
53
|
+
except Exception as e:
|
54
|
+
logger.debug("Unexpected readiness error: %s", e)
|
55
|
+
return False
|
56
|
+
|
57
|
+
|
58
|
+
def _wait_for_ready(cfg: WaitCfg, logger: logging.Logger) -> bool:
|
59
|
+
t0 = time.monotonic()
|
60
|
+
logger.info(
|
61
|
+
"Waiting for vLLM readiness at %s (timeout=%.0fs, interval=%.1fs)",
|
62
|
+
cfg.url,
|
63
|
+
cfg.timeout_s,
|
64
|
+
cfg.interval_s,
|
65
|
+
)
|
66
|
+
|
67
|
+
while True:
|
68
|
+
if _http_ok(cfg.url, logger):
|
69
|
+
logger.info("vLLM reported ready at %s", cfg.url)
|
70
|
+
return True
|
71
|
+
if time.monotonic() - t0 > cfg.timeout_s:
|
72
|
+
logger.error(
|
73
|
+
"Timed out after %.0fs waiting for vLLM readiness.", cfg.timeout_s
|
74
|
+
)
|
75
|
+
return False
|
76
|
+
|
77
|
+
time.sleep(cfg.interval_s)
|
78
|
+
|
79
|
+
|
80
|
+
def main() -> None:
|
81
|
+
# Configure torch compile cache location
|
82
|
+
os.environ["TORCHINDUCTOR_CACHE_DIR"] = VLLM_CACHE_DIR
|
83
|
+
|
84
|
+
# Import here to allow environment variables to be set before the imported script uses them
|
85
|
+
from cache import load_compile_cache, save_compile_cache
|
86
|
+
|
87
|
+
cfg = WaitCfg(
|
88
|
+
url=DEFAULT_URL,
|
89
|
+
timeout_s=DEFAULT_TIMEOUT_S,
|
90
|
+
interval_s=DEFAULT_INTERVAL_S,
|
91
|
+
loglevel=DEFAULT_LOGLEVEL,
|
92
|
+
)
|
93
|
+
|
94
|
+
logger = _setup_logging(cfg.loglevel)
|
95
|
+
|
96
|
+
# 1) Preload any existing cache (non-fatal on error)
|
97
|
+
try:
|
98
|
+
logger.info("Calling load_compile_cache() …")
|
99
|
+
load_compile_cache()
|
100
|
+
logger.info("load_compile_cache() returned.")
|
101
|
+
except Exception as e:
|
102
|
+
logger.exception("load_compile_cache() failed: %s", e)
|
103
|
+
|
104
|
+
# 2) Wait for vLLM HTTP to be ready
|
105
|
+
try:
|
106
|
+
ready = _wait_for_ready(cfg, logger)
|
107
|
+
except Exception as e:
|
108
|
+
logger.exception("Readiness wait crashed: %s", e)
|
109
|
+
sys.exit(3)
|
110
|
+
|
111
|
+
if not ready:
|
112
|
+
# Loop timed out. Safe exit.
|
113
|
+
sys.exit(4)
|
114
|
+
|
115
|
+
# 3) Save compile cache
|
116
|
+
try:
|
117
|
+
logger.info("Calling save_compile_cache() …")
|
118
|
+
save_compile_cache()
|
119
|
+
logger.info("save_compile_cache() completed.")
|
120
|
+
except Exception as e:
|
121
|
+
logger.exception("save_compile_cache() failed: %s", e)
|
122
|
+
sys.exit(5)
|
123
|
+
|
124
|
+
logger.info("vLLM automatic torch compile cache done.")
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|