llmboost-hub 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,372 @@
1
+ import click
2
+ import subprocess
3
+ import time
4
+ from typing import Optional
5
+ import os
6
+ import shutil
7
+ from datetime import datetime
8
+
9
+ from llmboost_hub.commands.run import do_run
10
+ from llmboost_hub.utils.container_utils import (
11
+ container_name_for_model,
12
+ is_container_running,
13
+ is_model_tuning,
14
+ )
15
+ from llmboost_hub.commands.completions import complete_model_names
16
+ from llmboost_hub.utils.config import config
17
+ from llmboost_hub.utils.gpu_info import get_gpu_count
18
+ from llmboost_hub.commands.stop import do_stop
19
+
20
+
21
+ def _collect_error_logs(cname: str, max_lines: int = 200) -> str:
22
+ """
23
+ Collect recent error log lines from the container.
24
+
25
+ Strategy:
26
+ - Grep case-insensitive 'error' across `worker*.log` under `config.LLMBOOST_LOGS_DIR` and tail `max_lines`.
27
+ - Fallback to tailing any `*.log` if grep returns nothing.
28
+
29
+ Args:
30
+ cname: Target container name.
31
+ max_lines: Maximum number of lines to include.
32
+
33
+ Returns:
34
+ Concatenated recent log lines, or empty string on failure or no logs.
35
+ """
36
+ try:
37
+ grep_cmd = [
38
+ "docker",
39
+ "exec",
40
+ cname,
41
+ "sh",
42
+ "-lc",
43
+ f"grep -i 'error' -r {config.LLMBOOST_LOGS_DIR}/worker*.log 2>/dev/null | tail -n {max_lines}",
44
+ ]
45
+ out = subprocess.check_output(grep_cmd, text=True, stderr=subprocess.DEVNULL).strip()
46
+ if out:
47
+ return out
48
+ except subprocess.CalledProcessError:
49
+ pass
50
+ except Exception:
51
+ pass
52
+
53
+ try:
54
+ tail_cmd = [
55
+ "docker",
56
+ "exec",
57
+ cname,
58
+ "sh",
59
+ "-lc",
60
+ f"tail -n {max_lines} {config.LLMBOOST_LOGS_DIR}/*.log 2>/dev/null",
61
+ ]
62
+ out = subprocess.check_output(tail_cmd, text=True, stderr=subprocess.DEVNULL).strip()
63
+ return out
64
+ except Exception:
65
+ return ""
66
+
67
+
68
+ def do_tune(
69
+ model: str,
70
+ lbh_workspace: Optional[str],
71
+ verbose: bool = False,
72
+ metrics: str = "latency",
73
+ algorithm: str = "mb_algorithm",
74
+ wait_timeout: float = 600.0,
75
+ poll_interval: float = 1.0,
76
+ detached: bool = False,
77
+ gui: bool = False,
78
+ image: Optional[str] = None,
79
+ model_path: Optional[str] = None,
80
+ restart: bool = False,
81
+ n_tuners: Optional[int] = None,
82
+ merge_db: bool = False,
83
+ ) -> dict:
84
+ """
85
+ Start the autotuner inside the model container and optionally wait.
86
+
87
+ Args:
88
+ model: Model identifier.
89
+ lbh_workspace: Optional override for the workspace mount path.
90
+ verbose: If True, echo detailed logs and commands.
91
+ metrics: Primary optimization metric (`latency` or `throughput`).
92
+ algorithm: Autotuning algorithm identifier.
93
+ wait_timeout: Max seconds to wait for completion (ignored when detached).
94
+ poll_interval: Seconds between tuning status checks.
95
+ detached: If True, return right after starting the tuner.
96
+ gui: If True, print diagnostics GUI URL using `config.LBH_GUI_PORT`.
97
+ image: If set, force a specific docker image for the model.
98
+ model_path: If set, local HF model directory to mount inside the container.
99
+ restart: If True, restart the container if it is already running.
100
+ n_tuners: Number of parallel tuners; defaults to GPU count when None.
101
+ merge_db: If True, merge container DB into host and exit (no tuning occurs).
102
+
103
+ Returns:
104
+ Dict: {returncode: int, container_name: str, error: str|None}
105
+ """
106
+ cname = container_name_for_model(model)
107
+ # Ensure container is running; otherwise start it via lbh run
108
+ if not is_container_running(cname):
109
+ if verbose:
110
+ click.echo(f"[tune] No running container for {model}; starting via lbh run...")
111
+ # Pass through forced image when provided
112
+ res = do_run(
113
+ model,
114
+ lbh_workspace,
115
+ verbose=verbose,
116
+ image=image,
117
+ model_path=model_path,
118
+ restart=restart,
119
+ docker_args=(),
120
+ ) # use empty docker_args
121
+ if res["returncode"] != 0:
122
+ return {"returncode": res["returncode"], "container_name": "", "error": res["error"]}
123
+ time.sleep(1)
124
+ if not is_container_running(cname):
125
+ return {"returncode": 1, "container_name": "", "error": "Failed to start container."}
126
+
127
+ # Optional: print GUI URL hint
128
+ if gui:
129
+ try:
130
+ gui_port = config.LBH_GUI_PORT
131
+ click.echo(f"[tune] Diagnostics GUI: http://localhost:{gui_port}")
132
+ except Exception:
133
+ # Ignore failures to read/format port
134
+ pass
135
+
136
+ # Default n_tuners to GPU count if not provided
137
+ if not n_tuners:
138
+ n_tuners = get_gpu_count()
139
+
140
+ exec_cmd = [
141
+ "docker",
142
+ "exec",
143
+ "-i",
144
+ cname,
145
+ "llmboost",
146
+ "tuner",
147
+ "--model",
148
+ model,
149
+ "--metrics",
150
+ metrics,
151
+ "--algorithm",
152
+ algorithm,
153
+ "--n-tuners",
154
+ f"{n_tuners}",
155
+ ]
156
+
157
+ # In verbose+attached mode, prefer interactive exec to surface logs
158
+ if verbose and not detached:
159
+ # replace `-d` for `-i` in exec_cmd
160
+ exec_cmd = [part if part != "-d" else "-i" for part in exec_cmd]
161
+
162
+ if merge_db:
163
+ # Ensure backup directory exists on host
164
+ os.makedirs(os.path.join(config.LBH_HOME, config.TUNER_DB_BACKUPS_DIRNAME), exist_ok=True)
165
+ # Backup host DB before merging
166
+ backup_file = os.path.join(
167
+ config.LBH_HOME,
168
+ config.TUNER_DB_BACKUPS_DIRNAME,
169
+ f"inference.db.{datetime.now().strftime('%Y%m%d_%H%M%S')}.bak",
170
+ ) # eg: inference.db.20231123_153045.bak
171
+ shutil.copy2(config.LBH_TUNER_DB_PATH, backup_file)
172
+ click.echo(f"[tune] Backed up host tuner database to {backup_file} before merging.")
173
+
174
+ # Replace all args from --model with --merge-db, merging container DB into host DB
175
+ exec_cmd = exec_cmd[: exec_cmd.index("--model")] + [
176
+ "--merge-db",
177
+ f"{config.LLMBOOST_TUNER_DB_BACKUP_PATH}",
178
+ ]
179
+ if verbose:
180
+ click.echo(f"[tune] Merging tuner database: {' '.join(exec_cmd)}")
181
+ subprocess.run(exec_cmd, check=True)
182
+ click.echo("[tune] Merged tuner database from container into host database.")
183
+ # No tuning is performed in merge-db mode
184
+ return {"returncode": 0, "container_name": cname, "error": None}
185
+
186
+ if verbose:
187
+ click.echo(f"[tune] Tuning model: {' '.join(exec_cmd)}")
188
+
189
+ # Start tuner and handle failures
190
+ start = time.time()
191
+ try:
192
+ # Start the tuner process inside the container (foreground)
193
+ subprocess.run(exec_cmd, check=True)
194
+ except subprocess.CalledProcessError as e:
195
+ return {
196
+ "returncode": e.returncode,
197
+ "container_name": cname,
198
+ "error": f"Failed to start tuner inside container (exit {e.returncode})",
199
+ }
200
+
201
+ if detached:
202
+ # Return early when running in background
203
+ click.echo("[tune] Tuner started in background (detached).")
204
+ return {"returncode": 0, "container_name": cname, "error": None}
205
+
206
+ if not verbose:
207
+ # Poll for completion with minimal feedback
208
+ click.echo(f"[tune] Waiting for tuning to complete (timeout {wait_timeout:.1f}s)...")
209
+ time.sleep(3.0) # brief pause to let process start
210
+ while (
211
+ is_container_running(cname)
212
+ and is_model_tuning(cname)
213
+ and (time.time() - start < wait_timeout)
214
+ ):
215
+ # Minimal progress feedback
216
+ elapsed = int(time.time() - start)
217
+ if elapsed % 60 == 0:
218
+ click.echo(f"{int(elapsed)}s.", nl=False)
219
+ elif elapsed % 5 == 0:
220
+ click.echo(".", nl=False)
221
+ time.sleep(max(0.1, float(poll_interval)))
222
+
223
+ # Handle container unexpectedly stopping
224
+ if not is_container_running(cname):
225
+ return {
226
+ "returncode": 1,
227
+ "container_name": cname,
228
+ "error": "Container stopped during tuning.",
229
+ }
230
+ # Branch: timeout while still tuning, show recent logs
231
+ if is_model_tuning(cname):
232
+ logs = _collect_error_logs(cname, max_lines=200)
233
+ msg = f"Tuning did not complete within {wait_timeout:.1f} seconds.\nNOTE: The tuning process may still be running in the background inside the container. Increase --wait-timeout to wait longer."
234
+ if logs:
235
+ msg += f"\nRecent logs:\n{logs}"
236
+ return {"returncode": 0, "container_name": cname, "error": msg}
237
+
238
+ # Completed within timeout
239
+ elapsed = time.time() - start
240
+ click.echo(f"[tune] Tuning finished after {elapsed:.1f} seconds.")
241
+ return {"returncode": 0, "container_name": cname, "error": None}
242
+
243
+
244
+ @click.command(name="tune", context_settings={"help_option_names": ["-h", "--help"]})
245
+ @click.argument("model", required=True, shell_complete=complete_model_names)
246
+ @click.option(
247
+ "--lbh-workspace", type=click.Path(), help="Override workspace path mounted inside container."
248
+ )
249
+ @click.option(
250
+ "--metrics",
251
+ type=click.Choice(["throughput", "latency"]),
252
+ default="throughput",
253
+ show_default=True,
254
+ help="Primary optimization metric.",
255
+ )
256
+ @click.option(
257
+ "-a",
258
+ "--algorithm",
259
+ type=str,
260
+ default="mb_algorithm",
261
+ show_default=True,
262
+ help="Autotuning algorithm identifier.",
263
+ )
264
+ @click.option(
265
+ "--wait-timeout",
266
+ default=600.0,
267
+ show_default=True,
268
+ type=float,
269
+ help="Maximum seconds to wait for tuning to complete (ignored in detached mode).",
270
+ )
271
+ @click.option(
272
+ "--poll-interval",
273
+ default=1.0,
274
+ show_default=True,
275
+ type=float,
276
+ help="Seconds between tuning status checks.",
277
+ )
278
+ @click.option(
279
+ "-d",
280
+ "--detached",
281
+ is_flag=True,
282
+ help="Do not wait for tuning to complete; return immediately after starting tuner.",
283
+ )
284
+ @click.option(
285
+ "-i",
286
+ "--image",
287
+ "forced_image",
288
+ type=str,
289
+ default=None,
290
+ help="Force a specific docker image (required when multiple images match the model).",
291
+ )
292
+ @click.option(
293
+ "-m",
294
+ "--model_path",
295
+ "model_path",
296
+ type=click.Path(exists=True, file_okay=False, dir_okay=True, readable=True),
297
+ default=None,
298
+ help=f"Local HF model directory to mount inside the container.",
299
+ )
300
+ @click.option(
301
+ "-r",
302
+ "--restart",
303
+ is_flag=True,
304
+ help="Restart the container if it is running before starting.",
305
+ )
306
+ @click.option(
307
+ "--gui",
308
+ is_flag=True,
309
+ help="Print localhost URL for diagnostics GUI.",
310
+ )
311
+ @click.option(
312
+ "-n",
313
+ "--n-tuners",
314
+ type=str,
315
+ default=None,
316
+ help="Number of parallel tuners to run (defaults to number of GPUs detected).",
317
+ )
318
+ @click.option(
319
+ "--merge-db",
320
+ is_flag=True,
321
+ type=bool,
322
+ help="Merge container DB into existing DB on host. (no tuning is performed when this flag is set)",
323
+ )
324
+ @click.pass_context
325
+ def tune(
326
+ ctx,
327
+ model,
328
+ lbh_workspace,
329
+ metrics,
330
+ algorithm,
331
+ wait_timeout,
332
+ poll_interval,
333
+ detached,
334
+ forced_image,
335
+ model_path,
336
+ restart,
337
+ gui,
338
+ n_tuners,
339
+ merge_db,
340
+ ):
341
+ """
342
+ Start autotuning for a given model inside its container.
343
+ """
344
+ verbose = ctx.obj.get("VERBOSE", False)
345
+
346
+ # Restart if requested
347
+ if restart:
348
+ stop_res = do_stop(model, None, verbose=verbose)
349
+ if stop_res["returncode"] != 0:
350
+ if is_container_running(container_name_for_model(model)):
351
+ raise click.ClickException(
352
+ stop_res.get("error") or "Failed to stop existing container"
353
+ )
354
+
355
+ res = do_tune(
356
+ model=model,
357
+ lbh_workspace=lbh_workspace,
358
+ verbose=verbose,
359
+ metrics=metrics,
360
+ algorithm=algorithm,
361
+ wait_timeout=wait_timeout,
362
+ poll_interval=poll_interval,
363
+ detached=detached,
364
+ gui=gui,
365
+ image=forced_image,
366
+ model_path=model_path,
367
+ restart=restart,
368
+ n_tuners=n_tuners,
369
+ merge_db=merge_db,
370
+ )
371
+ if res["returncode"] != 0:
372
+ raise click.ClickException(res["error"] or "Tune failed")
@@ -0,0 +1,220 @@
1
+ import os
2
+ from pathlib import Path
3
+ from typing import Optional, Dict, Any
4
+ import logging
5
+ import yaml
6
+
7
+ _DEFAULT_HOME = "~/.llmboost_hub"
8
+ _CONFIG_FILENAME = "config.yaml"
9
+
10
+ log = logging.getLogger("CONFIG")
11
+
12
+
13
+ def expand_path(p: str) -> str:
14
+ """
15
+ Expand a path containing `~` to the user home directory.
16
+
17
+ Args:
18
+ p: Path string that may contain a leading `~`.
19
+
20
+ Returns:
21
+ The expanded absolute or relative path string.
22
+ """
23
+ return os.path.expanduser(p)
24
+
25
+
26
+ def ensure_home() -> str:
27
+ """
28
+ Ensure `LBH_HOME` exists and return its absolute path.
29
+
30
+ Resolution order:
31
+ - ENV `LBH_HOME` (if set)
32
+ - built-in default `~/.llmboost_hub`
33
+
34
+ Returns:
35
+ Absolute path to `LBH_HOME` (created if missing).
36
+ """
37
+ home_env = os.getenv("LBH_HOME", _DEFAULT_HOME)
38
+ home = expand_path(home_env)
39
+ os.makedirs(home, exist_ok=True)
40
+ return os.path.abspath(home)
41
+
42
+
43
+ def _get_home() -> str:
44
+ """Internal helper to return `LBH_HOME` (ensures existence)."""
45
+ return ensure_home()
46
+
47
+
48
+ def _config_path() -> str:
49
+ """Absolute path to `config.LBH_HOME`/`config.yaml`."""
50
+ return os.path.join(_get_home(), _CONFIG_FILENAME)
51
+
52
+
53
+ class _Constants:
54
+ # LLMBOOST paths are always inside container
55
+ CONTAINER_LBH_HOME = "/llmboost_hub" # container lbh home
56
+ CONTAINER_USER_WORKSPACE = "/user_workspace" # container user workspace mount point
57
+ LLMBOOST_WORKSPACE = "/workspace" # container workspace dir
58
+ LLMBOOST_MODELS_DIR = f"{LLMBOOST_WORKSPACE}/models" # container models dir
59
+ LLMBOOST_LOGS_DIR = f"{LLMBOOST_WORKSPACE}/logs" # container logs dir
60
+ LLMBOOST_LICENSE_PATH = f"{LLMBOOST_WORKSPACE}/license.skm" # container license path
61
+ LLMBOOST_TUNER_DB_PATH = f"{LLMBOOST_WORKSPACE}/data/inference.db" # container tuner DB path
62
+ LLMBOOST_TUNER_DB_BACKUP_PATH = (
63
+ f"{LLMBOOST_WORKSPACE}/data/inference.db.bak" # container tuner DB backup path
64
+ )
65
+ TUNER_DB_BACKUPS_DIRNAME = "tuner_db_backups" # tuner DB backup dir name (host and container)
66
+
67
+
68
+ class _Defaults:
69
+ # LBH is always on host
70
+ LBH_HOME = _get_home() # host lbh home
71
+ LBH_MODELS = os.path.join(_get_home(), "models") # host models dir
72
+ LBH_MODELS_STAGING = os.path.join(_get_home(), "models", ".tmp") # host staging dir
73
+ LBH_LICENSE_PATH = os.path.join(_get_home(), "license.skm") # host license path
74
+ LBH_WORKSPACE = os.path.join(_get_home(), "workspace") # host workspace dir
75
+ LBH_LOOKUP_URL = "https://docs.google.com/spreadsheets/d/1f8FTgGDJkI6hnJQsd-RhHtlGhYTx_p8AAvDLNbRRTV8/export?format=csv" # lookup URL
76
+ LBH_LOOKUP_CACHE = os.path.join(_get_home(), "lookup_cache.csv") # host lookup cache
77
+ LBH_LOOKUP_CACHE_TTL = 60 # seconds between cache refreshes
78
+ LBH_GUI_PORT = 8080 # GUI port
79
+ LBH_TUNER_DB_PATH = os.path.join(
80
+ _get_home(), f"{os.path.basename(_Constants.LLMBOOST_TUNER_DB_PATH)}"
81
+ ) # host tuner DB path
82
+ LBH_AUTO_PREP = True # whether to auto-prepare missing models on run
83
+
84
+
85
+ # Coerce env/config values to the expected type (handles bools, ints, floats)
86
+ def _to_bool(v: Any) -> bool:
87
+ if isinstance(v, bool):
88
+ return v
89
+ if isinstance(v, (int, float)):
90
+ return bool(v)
91
+ if isinstance(v, str):
92
+ s = v.strip().lower()
93
+ if s in {"1", "true", "t", "yes", "y", "on"}:
94
+ return True
95
+ if s in {"0", "false", "f", "no", "n", "off", ""}:
96
+ return False
97
+ raise ValueError(f"Cannot parse boolean from: {v!r}")
98
+
99
+
100
+ def _coerce_to_type(value: Any, default: Any) -> Any:
101
+ # Note: bool is a subclass of int, so handle bool before int.
102
+ if isinstance(default, bool):
103
+ try:
104
+ return _to_bool(value)
105
+ except Exception:
106
+ return default
107
+ if isinstance(default, int) and not isinstance(default, bool):
108
+ try:
109
+ return int(value)
110
+ except Exception:
111
+ return default
112
+ if isinstance(default, float):
113
+ try:
114
+ return float(value)
115
+ except Exception:
116
+ return default
117
+ return value
118
+
119
+
120
+ class _Config(_Defaults, _Constants):
121
+ _loaded_cfg = None
122
+
123
+ @staticmethod
124
+ def _resolve(cfg: Dict, key):
125
+ """
126
+ Resolve a config value for 'key'.
127
+
128
+ Resolution order:
129
+ 1) Environment variable (if set, even if "0"/"false")
130
+ 2) config.yaml (if present, even if False/0)
131
+ 3) Defaults (_Defaults)
132
+ """
133
+ # 1) ENV (use presence, not truthiness; coerce type)
134
+ v = os.getenv(key)
135
+ if v is not None:
136
+ return _coerce_to_type(v, getattr(_Defaults, key))
137
+
138
+ # 2) config.yaml (do not use truthiness; coerce type)
139
+ if key in cfg:
140
+ val = cfg.get(key)
141
+ if val is not None and val != "":
142
+ return _coerce_to_type(val, getattr(_Defaults, key))
143
+
144
+ # 3) update config.yaml with default if missing
145
+ if key not in cfg:
146
+ _write_config({**cfg, key: getattr(_Defaults, key)})
147
+ return getattr(_Defaults, key)
148
+
149
+ def __init__(self):
150
+ """Load config on instantiation and populate attributes from resolved values."""
151
+ loaded_cfg = _load_config(create_if_missing=True)
152
+ for key in dir(_Defaults):
153
+ if not key.startswith("_"):
154
+ setattr(self, key, self._resolve(loaded_cfg, key))
155
+
156
+
157
+ def _write_config(cfg: Dict[str, Any]) -> None:
158
+ """
159
+ Write the given config mapping to `config.LBH_HOME`/`config.yaml`.
160
+
161
+ Args:
162
+ cfg: Mapping of key -> value to persist.
163
+ """
164
+ path = _config_path()
165
+ os.makedirs(os.path.dirname(path), exist_ok=True)
166
+ with open(path, "w", encoding="utf-8") as fh:
167
+ yaml.safe_dump(cfg, fh, sort_keys=True)
168
+
169
+
170
+ def _load_config(create_if_missing: bool = True) -> Dict[str, Any]:
171
+ """
172
+ Load `config.LBH_HOME`/`config.yaml` with safe defaults.
173
+
174
+ Behavior:
175
+ - If file is missing and `create_if_missing=True`: create with defaults and return those defaults.
176
+ - If file exists but is not a mapping: warn and rewrite defaults.
177
+ - On read error: warn and rewrite defaults.
178
+
179
+ Args:
180
+ create_if_missing: Whether to create a default config file if missing.
181
+
182
+ Returns:
183
+ The loaded config mapping (possibly defaults).
184
+ """
185
+ path = _config_path()
186
+ if not os.path.exists(path):
187
+ if create_if_missing:
188
+ # Bootstrap with defaults
189
+ cfg = {}
190
+ for key in dir(_Defaults):
191
+ if not key.startswith("_"):
192
+ cfg[key] = getattr(_Defaults, key)
193
+ _write_config(cfg)
194
+ log.info(f"Created default config at {path}")
195
+ return cfg
196
+ return {}
197
+ try:
198
+ with open(path, "r", encoding="utf-8") as fh:
199
+ data = yaml.safe_load(fh) or {}
200
+ if not isinstance(data, dict):
201
+ log.warning(f"Config at {path} is not a mapping. Rewriting defaults.")
202
+ cfg = {}
203
+ for key in dir(_Defaults):
204
+ if not key.startswith("_"):
205
+ cfg[key] = getattr(_Defaults, key)
206
+ # Note: rewrite defaults to recover from invalid content
207
+ _write_config(data)
208
+ return data
209
+ except Exception as e:
210
+ # Any error reading or parsing: recover by writing defaults
211
+ log.warning(f"Failed to read config at {path}: {e}. Rewriting defaults.")
212
+ cfg = {}
213
+ for key in dir(_Defaults):
214
+ if not key.startswith("_"):
215
+ cfg[key] = getattr(_Defaults, key)
216
+ _write_config(cfg)
217
+ return cfg
218
+
219
+
220
+ config = _Config()