llmboost-hub 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,303 @@
1
+ import click
2
+ import subprocess
3
+ import time
4
+ from typing import Optional
5
+ import socket
6
+
7
+ from llmboost_hub.commands.run import do_run
8
+ from llmboost_hub.utils.container_utils import (
9
+ container_name_for_model,
10
+ is_container_running,
11
+ is_model_initializing,
12
+ is_model_ready2serve,
13
+ )
14
+ from llmboost_hub.utils import gpu_info
15
+ from llmboost_hub.commands.stop import do_stop
16
+ from llmboost_hub.commands.completions import complete_model_names
17
+ from llmboost_hub.utils.config import config
18
+
19
+
20
+ def _collect_error_logs(cname: str, max_lines: int = 200) -> str:
21
+ """
22
+ Return recent error lines from inside the container for diagnosis.
23
+
24
+ Strategy:
25
+ - First: grep -i 'error' across `worker*.log` in `config.LLMBOOST_LOGS_DIR` and tail the last `max_lines` lines.
26
+ - Fallback: tail any `*.log` in `config.LLMBOOST_LOGS_DIR`.
27
+
28
+ Args:
29
+ cname: Container name.
30
+ max_lines: Maximum lines to return.
31
+
32
+ Returns:
33
+ Joined log lines, or empty string when unavailable.
34
+ """
35
+ try:
36
+ logs_dir = config.LLMBOOST_LOGS_DIR
37
+ grep_cmd = [
38
+ "docker",
39
+ "exec",
40
+ cname,
41
+ "sh",
42
+ "-lc",
43
+ f"grep -i 'error' -r {logs_dir}/worker*.log 2>/dev/null | tail -n {max_lines}",
44
+ ]
45
+ out = subprocess.check_output(grep_cmd, text=True, stderr=subprocess.DEVNULL).strip()
46
+ if out:
47
+ return out
48
+ except subprocess.CalledProcessError:
49
+ pass
50
+ except Exception:
51
+ pass
52
+
53
+ try:
54
+ logs_dir = config.LLMBOOST_LOGS_DIR
55
+ tail_cmd = [
56
+ "docker",
57
+ "exec",
58
+ cname,
59
+ "sh",
60
+ "-lc",
61
+ f"tail -n {max_lines} {logs_dir}/*.log 2>/dev/null",
62
+ ]
63
+ out = subprocess.check_output(tail_cmd, text=True, stderr=subprocess.DEVNULL).strip()
64
+ return out
65
+ except Exception:
66
+ return ""
67
+
68
+
69
+ def do_serve(
70
+ model: str,
71
+ lbh_workspace: str | None,
72
+ verbose: bool = False,
73
+ host: str = "0.0.0.0",
74
+ port: int = 8080,
75
+ wait_timeout: float = 600.0,
76
+ poll_interval: float = 1.0,
77
+ detached: bool = False,
78
+ force: bool = False,
79
+ image: Optional[str] = None,
80
+ model_path: Optional[str] = None,
81
+ restart: bool = False,
82
+ ) -> dict:
83
+ """
84
+ Start llmboost serve in the container and optionally wait for readiness.
85
+
86
+ Args:
87
+ model: Target model identifier.
88
+ lbh_workspace: Optional host path to mount as /user_workspace.
89
+ verbose: If True, echo docker exec command and progress.
90
+ host: Bind address passed to the service.
91
+ port: Bind port passed to the service.
92
+ wait_timeout: Max seconds to wait for readiness (ignored if detached).
93
+ poll_interval: Seconds between readiness checks.
94
+ detached: If True, start and return immediately without polling.
95
+ force: If True, bypass pre-flight GPU utilization guard.
96
+ image: If set, force a specific docker image for the model.
97
+ model_path: If set, local HF model directory to mount inside the container.
98
+ restart: If True, restart the container if it is already running.
99
+
100
+ Returns:
101
+ Dict: {returncode: int, container_name: str, error: str|None}
102
+ """
103
+
104
+ # Guard: prevent accidental start if GPUs are already in use (unless forced)
105
+ if not force and gpu_info.any_gpu_in_use():
106
+ return {
107
+ "returncode": 1,
108
+ "container_name": "",
109
+ "error": "Detected non-zero GPU utilization (compute or VRAM). Decrease GPU memory utilization or reduce GPU memory used by other processes. Use -f/--force to bypass.",
110
+ }
111
+
112
+ cname = container_name_for_model(model)
113
+ if not is_container_running(cname):
114
+ # Start container if missing
115
+ if verbose:
116
+ click.echo(f"[serve] No running container for {model}; starting via lbh run...")
117
+ res = do_run(
118
+ model,
119
+ lbh_workspace,
120
+ verbose=verbose,
121
+ image=image,
122
+ model_path=model_path,
123
+ restart=restart,
124
+ docker_args=(),
125
+ ) # use empty docker_args
126
+ if res["returncode"] != 0:
127
+ return {"returncode": res["returncode"], "container_name": "", "error": res["error"]}
128
+ time.sleep(1)
129
+ if not is_container_running(cname):
130
+ return {
131
+ "returncode": 1,
132
+ "container_name": "",
133
+ "error": "Failed to start container for model.",
134
+ }
135
+
136
+ # Launch llmboost serve detached by default; switch to interactive on verbose+attached
137
+ exec_cmd = [
138
+ "docker",
139
+ "exec",
140
+ "-d",
141
+ cname,
142
+ "llmboost",
143
+ "serve",
144
+ "--host",
145
+ host,
146
+ "--port",
147
+ str(port),
148
+ "--model_name",
149
+ model,
150
+ ]
151
+
152
+ if verbose and not detached:
153
+ # Replace -d with -i to surface logs interactively during startup
154
+ exec_cmd = [part if part != "-d" else "-i" for part in exec_cmd]
155
+
156
+ if verbose:
157
+ click.echo("[tune] Executing inside container:")
158
+ click.echo(" ".join(exec_cmd))
159
+
160
+ try:
161
+ subprocess.run(exec_cmd, check=True)
162
+ except subprocess.CalledProcessError as e:
163
+ return {
164
+ "returncode": e.returncode,
165
+ "container_name": cname,
166
+ "error": f"Failed to start service inside container (exit {e.returncode})\n{_collect_error_logs(cname, max_lines=200)}",
167
+ }
168
+
169
+ # If detached, return immediately (no readiness polling)
170
+ start = time.time()
171
+ time.sleep(3.0) # brief pause to let process start
172
+
173
+ if detached:
174
+ click.echo(
175
+ f"[serve] Started llmboost serve in background (detached). Not waiting for readiness."
176
+ )
177
+ return {"returncode": 0, "container_name": cname, "error": None}
178
+
179
+ click.echo(
180
+ f"[serve] Waiting for server to become ready on {host}:{port} (timeout {wait_timeout:.1f}s)..."
181
+ )
182
+
183
+ # Poll until it is no longer initializing or timeout is reached
184
+ while is_model_initializing(cname) and (time.time() - start < wait_timeout):
185
+ elapsed = int(time.time() - start)
186
+ if elapsed % 60 == 0: # every minute
187
+ click.echo(f"{int(elapsed)}s.", nl=False)
188
+ elif elapsed % 5 == 0: # every 5 seconds
189
+ click.echo(".", nl=False)
190
+ time.sleep(max(0.1, float(poll_interval)))
191
+
192
+ elapsed = time.time() - start
193
+ if is_model_ready2serve(cname, host=host, port=port):
194
+ click.echo(f"[serve] Server is ready after {elapsed:.1f} seconds.")
195
+ return {"returncode": 0, "container_name": cname, "error": None}
196
+ else:
197
+ error_logs = _collect_error_logs(cname, max_lines=200)
198
+ error_msg = f"Server failed to become ready within {wait_timeout:.1f} seconds."
199
+ if error_logs:
200
+ error_msg += f"\nRecent error logs:\n{error_logs}"
201
+ return {"returncode": 1, "container_name": cname, "error": error_msg}
202
+
203
+
204
+ @click.command(context_settings={"help_option_names": ["-h", "--help"]})
205
+ @click.argument("model", required=True, shell_complete=complete_model_names)
206
+ @click.option(
207
+ "--lbh-workspace", type=click.Path(), help="Override workspace path mounted inside container."
208
+ )
209
+ @click.option("--host", default="0.0.0.0", show_default=True, help="Host address to bind to.")
210
+ @click.option("--port", default=8080, show_default=True, help="Port to bind to.", type=int)
211
+ @click.option(
212
+ "--wait-timeout",
213
+ default=600.0,
214
+ show_default=True,
215
+ type=float,
216
+ help="Maximum seconds to wait for the server to become ready.",
217
+ )
218
+ @click.option(
219
+ "--poll-interval",
220
+ default=1.0,
221
+ show_default=True,
222
+ type=float,
223
+ help="Seconds between readiness checks.",
224
+ )
225
+ @click.option(
226
+ "-d",
227
+ "--detached",
228
+ is_flag=True,
229
+ help="Do not wait for server readiness; return immediately after starting serve.",
230
+ )
231
+ @click.option(
232
+ "-f",
233
+ "--force",
234
+ is_flag=True,
235
+ help="Ignore GPU utilization checks before starting serve.",
236
+ )
237
+ @click.option(
238
+ "-i",
239
+ "--image",
240
+ "forced_image",
241
+ type=str,
242
+ default=None,
243
+ help="Force a specific docker image (required when multiple images match the model).",
244
+ )
245
+ @click.option(
246
+ "-m",
247
+ "--model_path",
248
+ "model_path",
249
+ type=click.Path(exists=True, file_okay=False, dir_okay=True, readable=True),
250
+ default=None,
251
+ help=f"Local HF model directory to mount inside the container.",
252
+ )
253
+ @click.option(
254
+ "-r",
255
+ "--restart",
256
+ is_flag=True,
257
+ help="Restart the container if it is running before starting.",
258
+ )
259
+ @click.pass_context
260
+ def serve(
261
+ ctx,
262
+ model,
263
+ lbh_workspace,
264
+ host,
265
+ port,
266
+ wait_timeout,
267
+ poll_interval,
268
+ detached,
269
+ force,
270
+ forced_image,
271
+ model_path,
272
+ restart,
273
+ ):
274
+ """
275
+ Start llmboost server inside the model container.
276
+ """
277
+ verbose = ctx.obj.get("VERBOSE", False)
278
+
279
+ # Restart if requested
280
+ if restart:
281
+ stop_res = do_stop(model, None, verbose=verbose)
282
+ if stop_res["returncode"] != 0:
283
+ if is_container_running(container_name_for_model(model)):
284
+ raise click.ClickException(
285
+ stop_res.get("error") or "Failed to stop existing container"
286
+ )
287
+
288
+ res = do_serve(
289
+ model,
290
+ lbh_workspace,
291
+ verbose=verbose,
292
+ host=host,
293
+ port=port,
294
+ wait_timeout=wait_timeout,
295
+ poll_interval=poll_interval,
296
+ detached=detached,
297
+ force=force,
298
+ image=forced_image,
299
+ model_path=model_path,
300
+ restart=restart,
301
+ )
302
+ if res["returncode"] != 0:
303
+ raise click.ClickException(res["error"] or "Serve failed")
@@ -0,0 +1,34 @@
1
+ import click
2
+ import pandas as pd
3
+ import tabulate
4
+ from llmboost_hub.commands.list import do_list
5
+ from llmboost_hub.commands.completions import complete_model_names
6
+
7
+
8
+ @click.command(name="status", context_settings={"help_option_names": ["-h", "--help"]})
9
+ @click.argument("model", required=False, default="", shell_complete=complete_model_names)
10
+ @click.pass_context
11
+ def status_cmd(ctx: click.Context, model: str | None):
12
+ """
13
+ Show a compact status table for models.
14
+ """
15
+ verbose = ctx.obj.get("VERBOSE", False)
16
+ data = do_list(query=model or "", verbose=verbose)
17
+ df = data.get("images_df")
18
+ if not isinstance(df, pd.DataFrame) or df.empty:
19
+ click.echo("Found 0 models")
20
+ return
21
+
22
+ # Keep only desired columns if available; bail out if none found
23
+ cols = [c for c in ["status", "model"] if c in df.columns]
24
+ if not cols:
25
+ click.echo("No status information available.")
26
+ return
27
+ df = df[cols].reset_index(drop=True)
28
+ df.index += 1
29
+ click.echo(f"Found {len(df)} models")
30
+ click.echo(
31
+ tabulate.tabulate(
32
+ df.values.tolist(), headers=list(df.columns), showindex=list(df.index), tablefmt="psql"
33
+ )
34
+ )
@@ -0,0 +1,59 @@
1
+ import click
2
+ import subprocess
3
+
4
+ from llmboost_hub.utils.container_utils import (
5
+ container_name_for_model,
6
+ is_container_running,
7
+ )
8
+ from llmboost_hub.commands.completions import complete_model_names
9
+
10
+
11
+ def do_stop(model: str, container: str | None, verbose: bool = False) -> dict:
12
+ """
13
+ Stop the model's container.
14
+
15
+ Args:
16
+ model: Model identifier (used when container is not directly provided).
17
+ container: Optional explicit container name to stop.
18
+ verbose: If True, echo the docker command.
19
+
20
+ Returns:
21
+ Dict: {returncode: int, container_name: str, error: str|None}
22
+ """
23
+ cname = container or container_name_for_model(model)
24
+
25
+ # Fast-fail if target container is not running
26
+ if not is_container_running(cname):
27
+ return {
28
+ "returncode": 1,
29
+ "container_name": cname,
30
+ "error": f"Container '{cname}' is not running.",
31
+ }
32
+ cmd = ["docker", "stop", cname]
33
+ if verbose:
34
+ click.echo("[run] " + " ".join(cmd))
35
+ try:
36
+ subprocess.run(cmd, check=True)
37
+ return {"returncode": 0, "container_name": cname, "error": None}
38
+ except subprocess.CalledProcessError as e:
39
+ return {
40
+ "returncode": e.returncode,
41
+ "container_name": cname,
42
+ "error": f"Docker stop failed (exit {e.returncode})",
43
+ }
44
+
45
+
46
+ @click.command(context_settings={"help_option_names": ["-h", "--help"]})
47
+ @click.argument("model", required=True, shell_complete=complete_model_names)
48
+ @click.option(
49
+ "-c", "--container", "container", type=str, help="Container name to stop (overrides model)."
50
+ )
51
+ @click.pass_context
52
+ def stop(ctx: click.Context, model, container):
53
+ """
54
+ Stops a running container for a given model (or explicit name).
55
+ """
56
+ verbose = ctx.obj.get("VERBOSE", False)
57
+ res = do_stop(model, container, verbose=verbose)
58
+ if res["returncode"] != 0:
59
+ raise click.ClickException(res["error"] or "Stop failed")
@@ -0,0 +1,45 @@
1
+ import click
2
+ import subprocess
3
+ import json
4
+ from llmboost_hub.commands.completions import complete_model_names
5
+
6
+
7
+ @click.command(name="test", context_settings={"help_option_names": ["-h", "--help"]})
8
+ @click.argument("model", required=True, shell_complete=complete_model_names)
9
+ @click.option(
10
+ "-q",
11
+ "--query",
12
+ "query_str",
13
+ default="What are you?",
14
+ show_default=True,
15
+ help="User query prompt.",
16
+ )
17
+ @click.option(
18
+ "-t", "--max_tokens", default=300, show_default=True, type=int, help="Max tokens in completion."
19
+ )
20
+ @click.option("--host", default="127.0.0.1", show_default=True, help="Host to call.")
21
+ @click.option("--port", default=8080, show_default=True, type=int, help="Port to call.")
22
+ @click.pass_context
23
+ def test_cmd(ctx: click.Context, model: str, query_str: str, max_tokens: int, host: str, port: int):
24
+ """
25
+ Call the running llmboost serve endpoint and print the raw JSON response.
26
+ """
27
+ # Build endpoint URL and JSON payload for OpenAI-compatible chat API
28
+ url = f"http://{host}:{port}/v1/chat/completions"
29
+ payload = {
30
+ "model": model,
31
+ "messages": [{"role": "user", "content": query_str}],
32
+ "max_tokens": max_tokens,
33
+ }
34
+ data_str = json.dumps(payload)
35
+
36
+ # Use curl for simplicity; surface the full command in verbose mode
37
+ cmd = ["curl", "-sS", url, "-H", "Content-Type: application/json", "-d", data_str]
38
+ if ctx.obj.get("VERBOSE", False):
39
+ click.echo("[test] " + " ".join(cmd))
40
+
41
+ # Execute request and propagate errors with a clear message
42
+ try:
43
+ subprocess.run(cmd, check=True)
44
+ except subprocess.CalledProcessError as e:
45
+ raise click.ClickException(f"curl failed (exit {e.returncode})")