b10-transfer 0.1.8__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: b10-transfer
3
- Version: 0.1.8
3
+ Version: 0.2.1
4
4
  Summary: Distributed PyTorch file transfer for Baseten - Environment-aware, lock-free file transfer management
5
5
  License: MIT
6
6
  Keywords: pytorch,file-transfer,cache,machine-learning,inference
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "b10-transfer"
7
- version = "0.1.8"
7
+ version = "0.2.1"
8
8
  description = "Distributed PyTorch file transfer for Baseten - Environment-aware, lock-free file transfer management"
9
9
  authors = ["Shounak Ray <shounak.noreply@baseten.co>", "Fred Liu <fred.liu.noreply@baseten.co>"]
10
10
  maintainers = ["Fred Liu <fred.liu.noreply@baseten.co>", "Shounak Ray <shounak.noreply@baseten.co>"]
@@ -28,6 +28,9 @@ classifiers = [
28
28
  ]
29
29
  packages = [{include = "b10_transfer", from = "src"}]
30
30
 
31
+ [tool.poetry.scripts]
32
+ b10-transfer = "b10_transfer.cache_cli:main"
33
+
31
34
  [tool.poetry.dependencies]
32
35
  python = "^3.9"
33
36
  torch = ">=2.0.0"
@@ -9,7 +9,7 @@ from .constants import OperationStatus
9
9
  from .logging_utils import get_b10_logger
10
10
 
11
11
  # Version
12
- __version__ = "0.1.8"
12
+ __version__ = "0.2.1"
13
13
 
14
14
  __all__ = [
15
15
  "CacheError",
@@ -0,0 +1,124 @@
1
+ # src/b10_tcache/cli.py
2
+ from __future__ import annotations
3
+
4
+ import logging
5
+ import os
6
+ import sys
7
+ import time
8
+ import urllib.error
9
+ import urllib.request
10
+ from dataclasses import dataclass
11
+
12
+
13
+ @dataclass(frozen=True)
14
+ class WaitCfg:
15
+ url: str
16
+ timeout_s: float
17
+ interval_s: float
18
+ loglevel: str
19
+
20
+
21
+ DEFAULT_URL = os.getenv("B10_TRANSFER_VLLM_URL", "http://127.0.0.1:8000/v1/models")
22
+ DEFAULT_TIMEOUT_S = float(os.getenv("B10_TRANSFER_TIMEOUT_S", "1800")) # 30m default
23
+ DEFAULT_INTERVAL_S = float(os.getenv("B10_TRANSFER_INTERVAL_S", "2"))
24
+ DEFAULT_LOGLEVEL = os.getenv("B10_TRANSFER_CLI_LOGLEVEL", "INFO").upper()
25
+
26
+
27
+ VLLM_CACHE_DIR = os.getenv("VLLM_CACHE_ROOT", "~/.cache/vllm")
28
+
29
+
30
+ def _setup_logging(level: str) -> logging.Logger:
31
+ logging.basicConfig(
32
+ level=getattr(logging, level, logging.INFO),
33
+ format="%(asctime)s | %(levelname)s | %(message)s",
34
+ )
35
+ return logging.getLogger("b10_transfer.cli")
36
+
37
+
38
+ def _http_ok(url: str, logger: logging.Logger) -> bool:
39
+ """
40
+ Return True if vLLM readiness looks good.
41
+
42
+ We consider it 'ready' if GET <url> returns 200.
43
+ """
44
+ try:
45
+ req = urllib.request.Request(url, method="GET")
46
+ with urllib.request.urlopen(req, timeout=5) as resp:
47
+ if resp.status != 200:
48
+ return False
49
+ return True
50
+ except (urllib.error.URLError, urllib.error.HTTPError) as e:
51
+ logger.debug("Readiness probe failed: %s", e)
52
+ return False
53
+ except Exception as e:
54
+ logger.debug("Unexpected readiness error: %s", e)
55
+ return False
56
+
57
+
58
+ def _wait_for_ready(cfg: WaitCfg, logger: logging.Logger) -> bool:
59
+ t0 = time.monotonic()
60
+ logger.info(
61
+ "Waiting for vLLM readiness at %s (timeout=%.0fs, interval=%.1fs)",
62
+ cfg.url,
63
+ cfg.timeout_s,
64
+ cfg.interval_s,
65
+ )
66
+
67
+ while True:
68
+ if _http_ok(cfg.url, logger):
69
+ logger.info("vLLM reported ready at %s", cfg.url)
70
+ return True
71
+ if time.monotonic() - t0 > cfg.timeout_s:
72
+ logger.error(
73
+ "Timed out after %.0fs waiting for vLLM readiness.", cfg.timeout_s
74
+ )
75
+ return False
76
+
77
+ time.sleep(cfg.interval_s)
78
+
79
+
80
+ def main() -> None:
81
+ # Configure torch compile cache location
82
+ os.environ["TORCHINDUCTOR_CACHE_DIR"] = VLLM_CACHE_DIR
83
+
84
+ # Import here to allow environment variables to be set before the imported script uses them
85
+ from .cache import load_compile_cache, save_compile_cache
86
+
87
+ cfg = WaitCfg(
88
+ url=DEFAULT_URL,
89
+ timeout_s=DEFAULT_TIMEOUT_S,
90
+ interval_s=DEFAULT_INTERVAL_S,
91
+ loglevel=DEFAULT_LOGLEVEL,
92
+ )
93
+
94
+ logger = _setup_logging(cfg.loglevel)
95
+
96
+ # 1) Preload any existing cache (non-fatal on error)
97
+ try:
98
+ logger.info("Calling load_compile_cache() …")
99
+ load_compile_cache()
100
+ logger.info("load_compile_cache() returned.")
101
+ except Exception as e:
102
+ logger.exception("load_compile_cache() failed: %s", e)
103
+
104
+ # 2) Wait for vLLM HTTP to be ready
105
+ try:
106
+ ready = _wait_for_ready(cfg, logger)
107
+ except Exception as e:
108
+ logger.exception("Readiness wait crashed: %s", e)
109
+ sys.exit(3)
110
+
111
+ if not ready:
112
+ # Loop timed out. Safe exit.
113
+ sys.exit(4)
114
+
115
+ # 3) Save compile cache
116
+ try:
117
+ logger.info("Calling save_compile_cache() …")
118
+ save_compile_cache()
119
+ logger.info("save_compile_cache() completed.")
120
+ except Exception as e:
121
+ logger.exception("save_compile_cache() failed: %s", e)
122
+ sys.exit(5)
123
+
124
+ logger.info("vLLM automatic torch compile cache done.")
File without changes