@elvatis_com/openclaw-cli-bridge-elvatis 2.3.0 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,7 +7,7 @@ _Last updated: 2026-04-10_
7
7
 
8
8
  | Component | Version | Build | Tests | Status |
9
9
  |-----------|---------|-------|-------|--------|
10
- | openclaw-cli-bridge-elvatis | 2.3.0 | ✅ | ✅ | ✅ Stable |
10
+ | openclaw-cli-bridge-elvatis | 2.4.0 | ✅ | ✅ | ✅ Stable |
11
11
  <!-- /SECTION: plugin_status -->
12
12
 
13
13
  <!-- SECTION: release_state -->
@@ -15,9 +15,9 @@ _Last updated: 2026-04-10_
15
15
 
16
16
  | Platform | Published Version | Status |
17
17
  |----------|------------------|--------|
18
- | GitHub | v2.3.0 | ✅ Pushed to main |
19
- | npm | 2.3.0 | Published (via CI) |
20
- | ClawHub | 2.3.0 | Published (via CI) |
18
+ | GitHub | v2.4.0 | ✅ Pushed to main |
19
+ | npm | 2.4.0 | Pending (via CI) |
20
+ | ClawHub | 2.4.0 | Pending (via CI) |
21
21
  <!-- /SECTION: release_state -->
22
22
 
23
23
  <!-- SECTION: open_tasks -->
@@ -31,6 +31,7 @@ _No open tasks._
31
31
 
32
32
  | Task | Title | Version |
33
33
  |------|-------|---------|
34
+ | T-020 | Metrics & health dashboard: request volume, latency, errors, token usage | 2.4.0 |
34
35
  | T-019 | Full-featured CLI bridge: tool calls + multimodal + autonomous execution | 2.3.0 |
35
36
  | T-018 | Fix vllm apiKey corruption (401) + harden config-patcher | 2.2.1 |
36
37
  | T-017 | Fix log spam, restart loops, CLI blocking | 2.2.0 |
@@ -2,7 +2,7 @@
2
2
  "id": "openclaw-cli-bridge-elvatis",
3
3
  "slug": "openclaw-cli-bridge-elvatis",
4
4
  "name": "OpenClaw CLI Bridge",
5
- "version": "2.2.2",
5
+ "version": "2.4.0",
6
6
  "license": "MIT",
7
7
  "description": "Phase 1: openai-codex auth bridge. Phase 2: local HTTP proxy routing model calls through gemini/claude CLIs (vllm provider).",
8
8
  "providers": [
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@elvatis_com/openclaw-cli-bridge-elvatis",
3
- "version": "2.3.0",
3
+ "version": "2.4.0",
4
4
  "description": "Bridges gemini, claude, and codex CLI tools as OpenClaw model providers. Reads existing CLI auth without re-login.",
5
5
  "type": "module",
6
6
  "openclaw": {
package/src/metrics.ts ADDED
@@ -0,0 +1,85 @@
1
+ /**
2
+ * metrics.ts
3
+ *
4
+ * In-memory metrics collector for the CLI bridge proxy.
5
+ * Tracks request counts, errors, latency, and token usage per model.
6
+ * All operations are O(1) — cannot block the event loop.
7
+ */
8
+
9
+ export interface ModelMetrics {
10
+ model: string;
11
+ requests: number;
12
+ errors: number;
13
+ totalLatencyMs: number;
14
+ promptTokens: number;
15
+ completionTokens: number;
16
+ lastRequestAt: number | null;
17
+ }
18
+
19
+ export interface MetricsSnapshot {
20
+ startedAt: number;
21
+ totalRequests: number;
22
+ totalErrors: number;
23
+ models: ModelMetrics[]; // sorted by requests desc
24
+ }
25
+
26
+ class MetricsCollector {
27
+ private startedAt = Date.now();
28
+ private data = new Map<string, ModelMetrics>();
29
+
30
+ recordRequest(
31
+ model: string,
32
+ durationMs: number,
33
+ success: boolean,
34
+ promptTokens?: number,
35
+ completionTokens?: number,
36
+ ): void {
37
+ let entry = this.data.get(model);
38
+ if (!entry) {
39
+ entry = {
40
+ model,
41
+ requests: 0,
42
+ errors: 0,
43
+ totalLatencyMs: 0,
44
+ promptTokens: 0,
45
+ completionTokens: 0,
46
+ lastRequestAt: null,
47
+ };
48
+ this.data.set(model, entry);
49
+ }
50
+ entry.requests++;
51
+ if (!success) entry.errors++;
52
+ entry.totalLatencyMs += durationMs;
53
+ if (promptTokens) entry.promptTokens += promptTokens;
54
+ if (completionTokens) entry.completionTokens += completionTokens;
55
+ entry.lastRequestAt = Date.now();
56
+ }
57
+
58
+ getMetrics(): MetricsSnapshot {
59
+ let totalRequests = 0;
60
+ let totalErrors = 0;
61
+ const models: ModelMetrics[] = [];
62
+
63
+ for (const entry of this.data.values()) {
64
+ totalRequests += entry.requests;
65
+ totalErrors += entry.errors;
66
+ models.push({ ...entry });
67
+ }
68
+
69
+ models.sort((a, b) => b.requests - a.requests);
70
+
71
+ return {
72
+ startedAt: this.startedAt,
73
+ totalRequests,
74
+ totalErrors,
75
+ models,
76
+ };
77
+ }
78
+
79
+ reset(): void {
80
+ this.startedAt = Date.now();
81
+ this.data.clear();
82
+ }
83
+ }
84
+
85
+ export const metrics = new MetricsCollector();
@@ -19,6 +19,7 @@ import { chatgptComplete, chatgptCompleteStream, type ChatMessage as ChatGPTBrow
19
19
  import type { BrowserContext } from "playwright";
20
20
  import { renderStatusPage, type StatusProvider } from "./status-template.js";
21
21
  import { sessionManager } from "./session-manager.js";
22
+ import { metrics } from "./metrics.js";
22
23
 
23
24
  export type GrokCompleteOptions = Parameters<typeof grokComplete>[1];
24
25
  export type GrokCompleteStreamOptions = Parameters<typeof grokCompleteStream>[1];
@@ -222,6 +223,7 @@ async function handleRequest(
222
223
  chatgpt: sessionStatus("chatgpt", opts.getChatGPTContext, expiry.chatgpt),
223
224
  },
224
225
  models: CLI_MODELS.length,
226
+ metrics: metrics.getMetrics(),
225
227
  };
226
228
  res.writeHead(200, { "Content-Type": "application/json", ...corsHeaders() });
227
229
  res.end(JSON.stringify(health, null, 2));
@@ -240,7 +242,7 @@ async function handleRequest(
240
242
  { name: "ChatGPT", icon: "◉", expiry: expiry.chatgpt, loginCmd: "/chatgpt-login", ctx: opts.getChatGPTContext?.() ?? null },
241
243
  ];
242
244
 
243
- const html = renderStatusPage({ version, port: opts.port, providers, models: CLI_MODELS, modelCommands: opts.modelCommands });
245
+ const html = renderStatusPage({ version, port: opts.port, providers, models: CLI_MODELS, modelCommands: opts.modelCommands, metrics: metrics.getMetrics() });
244
246
  res.writeHead(200, { "Content-Type": "text/html; charset=utf-8" });
245
247
  res.end(html);
246
248
  return;
@@ -331,6 +333,7 @@ async function handleRequest(
331
333
  const grokMessages = messages as GrokChatMessage[];
332
334
  const doGrokComplete = opts._grokComplete ?? grokComplete;
333
335
  const doGrokCompleteStream = opts._grokCompleteStream ?? grokCompleteStream;
336
+ const grokStart = Date.now();
334
337
  try {
335
338
  if (stream) {
336
339
  res.writeHead(200, { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", Connection: "keep-alive", ...corsHeaders() });
@@ -341,11 +344,13 @@ async function handleRequest(
341
344
  (token) => sendSseChunk(res, { id, created, model, delta: { content: token }, finish_reason: null }),
342
345
  opts.log
343
346
  );
347
+ metrics.recordRequest(model, Date.now() - grokStart, true, result.promptTokens, result.completionTokens);
344
348
  sendSseChunk(res, { id, created, model, delta: {}, finish_reason: result.finishReason });
345
349
  res.write("data: [DONE]\n\n");
346
350
  res.end();
347
351
  } else {
348
352
  const result = await doGrokComplete(grokCtx, { messages: grokMessages, model: grokModel, timeoutMs }, opts.log);
353
+ metrics.recordRequest(model, Date.now() - grokStart, true, result.promptTokens, result.completionTokens);
349
354
  res.writeHead(200, { "Content-Type": "application/json", ...corsHeaders() });
350
355
  res.end(JSON.stringify({
351
356
  id, object: "chat.completion", created, model,
@@ -354,6 +359,7 @@ async function handleRequest(
354
359
  }));
355
360
  }
356
361
  } catch (err) {
362
+ metrics.recordRequest(model, Date.now() - grokStart, false);
357
363
  const msg = (err as Error).message;
358
364
  opts.warn(`[cli-bridge] Grok error for ${model}: ${msg}`);
359
365
  if (!res.headersSent) {
@@ -380,6 +386,7 @@ async function handleRequest(
380
386
  const geminiMessages = messages as GeminiBrowserChatMessage[];
381
387
  const doGeminiComplete = opts._geminiComplete ?? geminiComplete;
382
388
  const doGeminiCompleteStream = opts._geminiCompleteStream ?? geminiCompleteStream;
389
+ const geminiStart = Date.now();
383
390
  try {
384
391
  if (stream) {
385
392
  res.writeHead(200, { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", Connection: "keep-alive", ...corsHeaders() });
@@ -390,11 +397,13 @@ async function handleRequest(
390
397
  (token) => sendSseChunk(res, { id, created, model, delta: { content: token }, finish_reason: null }),
391
398
  opts.log
392
399
  );
400
+ metrics.recordRequest(model, Date.now() - geminiStart, true);
393
401
  sendSseChunk(res, { id, created, model, delta: {}, finish_reason: result.finishReason });
394
402
  res.write("data: [DONE]\n\n");
395
403
  res.end();
396
404
  } else {
397
405
  const result = await doGeminiComplete(geminiCtx, { messages: geminiMessages, model, timeoutMs }, opts.log);
406
+ metrics.recordRequest(model, Date.now() - geminiStart, true);
398
407
  res.writeHead(200, { "Content-Type": "application/json", ...corsHeaders() });
399
408
  res.end(JSON.stringify({
400
409
  id, object: "chat.completion", created, model,
@@ -403,6 +412,7 @@ async function handleRequest(
403
412
  }));
404
413
  }
405
414
  } catch (err) {
415
+ metrics.recordRequest(model, Date.now() - geminiStart, false);
406
416
  const msg = (err as Error).message;
407
417
  opts.warn(`[cli-bridge] Gemini browser error for ${model}: ${msg}`);
408
418
  if (!res.headersSent) {
@@ -429,6 +439,7 @@ async function handleRequest(
429
439
  const claudeMessages = messages as ClaudeBrowserChatMessage[];
430
440
  const doClaudeComplete = opts._claudeComplete ?? claudeComplete;
431
441
  const doClaudeCompleteStream = opts._claudeCompleteStream ?? claudeCompleteStream;
442
+ const claudeStart = Date.now();
432
443
  try {
433
444
  if (stream) {
434
445
  res.writeHead(200, { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", Connection: "keep-alive", ...corsHeaders() });
@@ -439,11 +450,13 @@ async function handleRequest(
439
450
  (token) => sendSseChunk(res, { id, created, model, delta: { content: token }, finish_reason: null }),
440
451
  opts.log
441
452
  );
453
+ metrics.recordRequest(model, Date.now() - claudeStart, true);
442
454
  sendSseChunk(res, { id, created, model, delta: {}, finish_reason: result.finishReason });
443
455
  res.write("data: [DONE]\n\n");
444
456
  res.end();
445
457
  } else {
446
458
  const result = await doClaudeComplete(claudeCtx, { messages: claudeMessages, model, timeoutMs }, opts.log);
459
+ metrics.recordRequest(model, Date.now() - claudeStart, true);
447
460
  res.writeHead(200, { "Content-Type": "application/json", ...corsHeaders() });
448
461
  res.end(JSON.stringify({
449
462
  id, object: "chat.completion", created, model,
@@ -452,6 +465,7 @@ async function handleRequest(
452
465
  }));
453
466
  }
454
467
  } catch (err) {
468
+ metrics.recordRequest(model, Date.now() - claudeStart, false);
455
469
  const msg = (err as Error).message;
456
470
  opts.warn(`[cli-bridge] Claude browser error for ${model}: ${msg}`);
457
471
  if (!res.headersSent) {
@@ -479,6 +493,7 @@ async function handleRequest(
479
493
  const chatgptMessages = messages as ChatGPTBrowserChatMessage[];
480
494
  const doChatGPTComplete = opts._chatgptComplete ?? chatgptComplete;
481
495
  const doChatGPTCompleteStream = opts._chatgptCompleteStream ?? chatgptCompleteStream;
496
+ const chatgptStart = Date.now();
482
497
  try {
483
498
  if (stream) {
484
499
  res.writeHead(200, { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", Connection: "keep-alive", ...corsHeaders() });
@@ -489,11 +504,13 @@ async function handleRequest(
489
504
  (token) => sendSseChunk(res, { id, created, model, delta: { content: token }, finish_reason: null }),
490
505
  opts.log
491
506
  );
507
+ metrics.recordRequest(model, Date.now() - chatgptStart, true);
492
508
  sendSseChunk(res, { id, created, model, delta: {}, finish_reason: result.finishReason });
493
509
  res.write("data: [DONE]\n\n");
494
510
  res.end();
495
511
  } else {
496
512
  const result = await doChatGPTComplete(chatgptCtx, { messages: chatgptMessages, model: chatgptModel, timeoutMs }, opts.log);
513
+ metrics.recordRequest(model, Date.now() - chatgptStart, true);
497
514
  res.writeHead(200, { "Content-Type": "application/json", ...corsHeaders() });
498
515
  res.end(JSON.stringify({
499
516
  id, object: "chat.completion", created, model,
@@ -502,6 +519,7 @@ async function handleRequest(
502
519
  }));
503
520
  }
504
521
  } catch (err) {
522
+ metrics.recordRequest(model, Date.now() - chatgptStart, false);
505
523
  const msg = (err as Error).message;
506
524
  opts.warn(`[cli-bridge] ChatGPT browser error for ${model}: ${msg}`);
507
525
  if (!res.headersSent) {
@@ -546,6 +564,7 @@ async function handleRequest(
546
564
  const bitnetMessages = [{ role: "system", content: BITNET_SYSTEM }, ...truncated];
547
565
  const requestBody = JSON.stringify({ ...parsed, messages: bitnetMessages, tools: undefined });
548
566
 
567
+ const bitnetStart = Date.now();
549
568
  try {
550
569
  const targetUrl = new URL("/v1/chat/completions", bitnetUrl);
551
570
  const proxyRes = await new Promise<http.IncomingMessage>((resolve, reject) => {
@@ -566,6 +585,7 @@ async function handleRequest(
566
585
  proxyReq.end();
567
586
  });
568
587
 
588
+ metrics.recordRequest(model, Date.now() - bitnetStart, true);
569
589
  // Forward status + headers
570
590
  const fwdHeaders: Record<string, string> = { ...corsHeaders() };
571
591
  const ct = proxyRes.headers["content-type"];
@@ -577,6 +597,7 @@ async function handleRequest(
577
597
  res.writeHead(proxyRes.statusCode ?? 200, fwdHeaders);
578
598
  proxyRes.pipe(res);
579
599
  } catch (err) {
600
+ metrics.recordRequest(model, Date.now() - bitnetStart, false);
580
601
  const msg = (err as Error).message;
581
602
  if (msg.includes("ECONNREFUSED") || msg.includes("ECONNRESET") || msg.includes("ENOTFOUND")) {
582
603
  res.writeHead(503, { "Content-Type": "application/json", ...corsHeaders() });
@@ -603,33 +624,75 @@ async function handleRequest(
603
624
  let result: CliToolResult;
604
625
  let usedModel = model;
605
626
  const routeOpts = { workdir, tools: hasTools ? tools : undefined, mediaFiles: mediaFiles.length ? mediaFiles : undefined };
627
+
628
+ // ── Dynamic timeout: scale with conversation size ────────────────────────
629
+ const baseTimeout = opts.timeoutMs ?? 300_000; // 5 min default (was 120s)
630
+ const msgExtra = Math.max(0, cleanMessages.length - 10) * 2_000;
631
+ const toolExtra = (tools?.length ?? 0) * 5_000;
632
+ const effectiveTimeout = Math.min(baseTimeout + msgExtra + toolExtra, 600_000);
633
+
634
+ // ── SSE keepalive: send headers early so OpenClaw doesn't read-timeout ──
635
+ let sseHeadersSent = false;
636
+ let keepaliveInterval: ReturnType<typeof setInterval> | null = null;
637
+ if (stream) {
638
+ res.writeHead(200, {
639
+ "Content-Type": "text/event-stream",
640
+ "Cache-Control": "no-cache",
641
+ Connection: "keep-alive",
642
+ ...corsHeaders(),
643
+ });
644
+ sseHeadersSent = true;
645
+ res.write(": keepalive\n\n");
646
+ keepaliveInterval = setInterval(() => { res.write(": keepalive\n\n"); }, 15_000);
647
+ }
648
+
649
+ const cliStart = Date.now();
606
650
  try {
607
- result = await routeToCliRunner(model, cleanMessages, opts.timeoutMs ?? 120_000, routeOpts);
651
+ result = await routeToCliRunner(model, cleanMessages, effectiveTimeout, routeOpts);
652
+ metrics.recordRequest(model, Date.now() - cliStart, true);
608
653
  } catch (err) {
654
+ const primaryDuration = Date.now() - cliStart;
609
655
  const msg = (err as Error).message;
610
656
  // ── Model fallback: retry once with a lighter model if configured ────
611
657
  const fallbackModel = opts.modelFallbacks?.[model];
612
658
  if (fallbackModel) {
659
+ metrics.recordRequest(model, primaryDuration, false);
613
660
  opts.warn(`[cli-bridge] ${model} failed (${msg}), falling back to ${fallbackModel}`);
661
+ const fallbackStart = Date.now();
614
662
  try {
615
- result = await routeToCliRunner(fallbackModel, cleanMessages, opts.timeoutMs ?? 120_000, routeOpts);
663
+ result = await routeToCliRunner(fallbackModel, cleanMessages, effectiveTimeout, routeOpts);
664
+ metrics.recordRequest(fallbackModel, Date.now() - fallbackStart, true);
616
665
  usedModel = fallbackModel;
617
666
  opts.log(`[cli-bridge] fallback to ${fallbackModel} succeeded`);
618
667
  } catch (fallbackErr) {
668
+ metrics.recordRequest(fallbackModel, Date.now() - fallbackStart, false);
619
669
  const fallbackMsg = (fallbackErr as Error).message;
620
670
  opts.warn(`[cli-bridge] fallback ${fallbackModel} also failed: ${fallbackMsg}`);
621
- res.writeHead(500, { "Content-Type": "application/json" });
622
- res.end(JSON.stringify({ error: { message: `${model}: ${msg} | fallback ${fallbackModel}: ${fallbackMsg}`, type: "cli_error" } }));
671
+ if (sseHeadersSent) {
672
+ res.write(`data: ${JSON.stringify({ error: { message: `${model}: ${msg} | fallback ${fallbackModel}: ${fallbackMsg}`, type: "cli_error" } })}\n\n`);
673
+ res.write("data: [DONE]\n\n");
674
+ res.end();
675
+ } else {
676
+ res.writeHead(500, { "Content-Type": "application/json" });
677
+ res.end(JSON.stringify({ error: { message: `${model}: ${msg} | fallback ${fallbackModel}: ${fallbackMsg}`, type: "cli_error" } }));
678
+ }
623
679
  return;
624
680
  }
625
681
  } else {
682
+ metrics.recordRequest(model, primaryDuration, false);
626
683
  opts.warn(`[cli-bridge] CLI error for ${model}: ${msg}`);
627
- res.writeHead(500, { "Content-Type": "application/json" });
628
- res.end(JSON.stringify({ error: { message: msg, type: "cli_error" } }));
684
+ if (sseHeadersSent) {
685
+ res.write(`data: ${JSON.stringify({ error: { message: msg, type: "cli_error" } })}\n\n`);
686
+ res.write("data: [DONE]\n\n");
687
+ res.end();
688
+ } else {
689
+ res.writeHead(500, { "Content-Type": "application/json" });
690
+ res.end(JSON.stringify({ error: { message: msg, type: "cli_error" } }));
691
+ }
629
692
  return;
630
693
  }
631
694
  } finally {
632
- // Clean up temp media files after response
695
+ if (keepaliveInterval) clearInterval(keepaliveInterval);
633
696
  cleanupMediaFiles(mediaFiles);
634
697
  }
635
698
 
@@ -637,12 +700,7 @@ async function handleRequest(
637
700
  const finishReason = hasToolCalls ? "tool_calls" : "stop";
638
701
 
639
702
  if (stream) {
640
- res.writeHead(200, {
641
- "Content-Type": "text/event-stream",
642
- "Cache-Control": "no-cache",
643
- Connection: "keep-alive",
644
- ...corsHeaders(),
645
- });
703
+ // SSE headers already sent above — stream response chunks directly
646
704
 
647
705
  if (hasToolCalls) {
648
706
  // Stream tool_calls in OpenAI SSE format
@@ -6,6 +6,7 @@
6
6
  */
7
7
 
8
8
  import type { BrowserContext } from "playwright";
9
+ import type { MetricsSnapshot } from "./metrics.js";
9
10
 
10
11
  export interface StatusProvider {
11
12
  name: string;
@@ -22,6 +23,8 @@ export interface StatusTemplateOptions {
22
23
  models: Array<{ id: string; name: string; contextWindow: number; maxTokens: number }>;
23
24
  /** Maps model ID → slash command name (e.g. "openai-codex/gpt-5.3-codex" → "/cli-codex") */
24
25
  modelCommands?: Record<string, string>;
26
+ /** In-memory metrics snapshot — optional for backward compat */
27
+ metrics?: MetricsSnapshot;
25
28
  }
26
29
 
27
30
  function statusBadge(p: StatusProvider): { label: string; color: string; dot: string } {
@@ -32,6 +35,114 @@ function statusBadge(p: StatusProvider): { label: string; color: string; dot: st
32
35
  return { label: "Logged in", color: "#3b82f6", dot: "🔵" };
33
36
  }
34
37
 
38
+ // ── Formatting helpers ──────────────────────────────────────────────────────
39
+
40
+ function formatDuration(ms: number): string {
41
+ if (ms < 1000) return `${Math.round(ms)}ms`;
42
+ if (ms < 60_000) return `${(ms / 1000).toFixed(1)}s`;
43
+ return `${(ms / 60_000).toFixed(1)}m`;
44
+ }
45
+
46
+ function formatTokens(n: number): string {
47
+ if (n === 0) return "—";
48
+ if (n < 1000) return String(n);
49
+ if (n < 1_000_000) return `${(n / 1000).toFixed(1)}k`;
50
+ return `${(n / 1_000_000).toFixed(2)}M`;
51
+ }
52
+
53
+ function timeAgo(epochMs: number | null): string {
54
+ if (!epochMs) return "—";
55
+ const diff = Date.now() - epochMs;
56
+ if (diff < 60_000) return "just now";
57
+ if (diff < 3_600_000) return `${Math.floor(diff / 60_000)}m ago`;
58
+ if (diff < 86_400_000) return `${Math.floor(diff / 3_600_000)}h ago`;
59
+ return `${Math.floor(diff / 86_400_000)}d ago`;
60
+ }
61
+
62
+ function formatUptime(startedAt: number): string {
63
+ const diff = Date.now() - startedAt;
64
+ const s = Math.floor(diff / 1000);
65
+ if (s < 60) return `${s}s`;
66
+ const m = Math.floor(s / 60);
67
+ if (m < 60) return `${m}m ${s % 60}s`;
68
+ const h = Math.floor(m / 60);
69
+ if (h < 24) return `${h}h ${m % 60}m`;
70
+ const d = Math.floor(h / 24);
71
+ return `${d}d ${h % 24}h`;
72
+ }
73
+
74
+ function escapeHtml(s: string): string {
75
+ return s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
76
+ }
77
+
78
+ // ── Metrics sections ────────────────────────────────────────────────────────
79
+
80
+ function renderMetricsSection(m: MetricsSnapshot): string {
81
+ const errorRate = m.totalRequests > 0 ? ((m.totalErrors / m.totalRequests) * 100).toFixed(1) : "0.0";
82
+ const totalTokens = m.models.reduce((sum, mod) => sum + mod.promptTokens + mod.completionTokens, 0);
83
+
84
+ // Summary cards
85
+ const summaryCards = `
86
+ <div class="summary-grid">
87
+ <div class="summary-card">
88
+ <div class="summary-value">${m.totalRequests}</div>
89
+ <div class="summary-label">Total Requests</div>
90
+ </div>
91
+ <div class="summary-card">
92
+ <div class="summary-value" style="color:${m.totalErrors > 0 ? '#ef4444' : '#22c55e'}">${errorRate}%</div>
93
+ <div class="summary-label">Error Rate</div>
94
+ </div>
95
+ <div class="summary-card">
96
+ <div class="summary-value">${formatTokens(totalTokens)}</div>
97
+ <div class="summary-label">Total Tokens</div>
98
+ </div>
99
+ <div class="summary-card">
100
+ <div class="summary-value">${formatUptime(m.startedAt)}</div>
101
+ <div class="summary-label">Uptime</div>
102
+ </div>
103
+ </div>`;
104
+
105
+ // Per-model stats table
106
+ let modelRows: string;
107
+ if (m.models.length === 0) {
108
+ modelRows = `<tr><td colspan="6" style="padding:16px;color:#6b7280;text-align:center;font-style:italic">No requests recorded yet.</td></tr>`;
109
+ } else {
110
+ modelRows = m.models.map(mod => {
111
+ const avgLatency = mod.requests > 0 ? mod.totalLatencyMs / mod.requests : 0;
112
+ const modErrorRate = mod.requests > 0 ? ((mod.errors / mod.requests) * 100).toFixed(1) : "0.0";
113
+ return `
114
+ <tr>
115
+ <td class="metrics-cell"><code style="color:#93c5fd">${escapeHtml(mod.model)}</code></td>
116
+ <td class="metrics-cell" style="text-align:right">${mod.requests}</td>
117
+ <td class="metrics-cell" style="text-align:right;color:${mod.errors > 0 ? '#ef4444' : '#6b7280'}">${mod.errors} <span style="color:#6b7280;font-size:11px">(${modErrorRate}%)</span></td>
118
+ <td class="metrics-cell" style="text-align:right">${formatDuration(avgLatency)}</td>
119
+ <td class="metrics-cell" style="text-align:right">${formatTokens(mod.promptTokens)} / ${formatTokens(mod.completionTokens)}</td>
120
+ <td class="metrics-cell" style="text-align:right;color:#9ca3af">${timeAgo(mod.lastRequestAt)}</td>
121
+ </tr>`;
122
+ }).join("");
123
+ }
124
+
125
+ const modelTable = `
126
+ <div class="card">
127
+ <div class="card-header">Per-Model Stats</div>
128
+ <table class="metrics-table">
129
+ <thead>
130
+ <tr style="background:#13151f">
131
+ <th class="metrics-th" style="text-align:left">Model</th>
132
+ <th class="metrics-th" style="text-align:right">Requests</th>
133
+ <th class="metrics-th" style="text-align:right">Errors</th>
134
+ <th class="metrics-th" style="text-align:right">Avg Latency</th>
135
+ <th class="metrics-th" style="text-align:right">Tokens (in/out)</th>
136
+ <th class="metrics-th" style="text-align:right">Last Request</th>
137
+ </tr>
138
+ </thead>
139
+ <tbody>${modelRows}</tbody>
140
+ </table>
141
+ </div>`;
142
+
143
+ return summaryCards + modelTable;
144
+ }
145
+
35
146
  export function renderStatusPage(opts: StatusTemplateOptions): string {
36
147
  const { version, port, providers, models } = opts;
37
148
 
@@ -66,6 +177,8 @@ export function renderStatusPage(opts: StatusTemplateOptions): string {
66
177
  return `<li style="margin:2px 0;font-size:13px;color:#d1d5db"><code style="color:#93c5fd">${m.id}</code>${cmdBadge}</li>`;
67
178
  }).join("");
68
179
 
180
+ const metricsHtml = opts.metrics ? renderMetricsSection(opts.metrics) : "";
181
+
69
182
  return `<!DOCTYPE html>
70
183
  <html lang="en">
71
184
  <head>
@@ -86,6 +199,13 @@ export function renderStatusPage(opts: StatusTemplateOptions): string {
86
199
  ul { list-style: none; padding: 12px 16px; }
87
200
  .footer { color: #374151; font-size: 12px; text-align: center; margin-top: 16px; }
88
201
  code { background: #1e2130; padding: 1px 5px; border-radius: 4px; }
202
+ .summary-grid { display: grid; grid-template-columns: repeat(4, 1fr); gap: 16px; margin-bottom: 24px; }
203
+ .summary-card { background: #1a1d27; border: 1px solid #2d3148; border-radius: 12px; padding: 20px 16px; text-align: center; }
204
+ .summary-value { font-size: 28px; font-weight: 700; color: #f9fafb; margin-bottom: 4px; }
205
+ .summary-label { font-size: 12px; color: #6b7280; text-transform: uppercase; letter-spacing: 0.05em; }
206
+ .metrics-table { width: 100%; border-collapse: collapse; }
207
+ .metrics-th { padding: 10px 16px; font-size: 12px; color: #4b5563; font-weight: 600; }
208
+ .metrics-cell { padding: 10px 16px; font-size: 13px; }
89
209
  </style>
90
210
  </head>
91
211
  <body>
@@ -107,6 +227,8 @@ export function renderStatusPage(opts: StatusTemplateOptions): string {
107
227
  </table>
108
228
  </div>
109
229
 
230
+ ${metricsHtml}
231
+
110
232
  <div class="models">
111
233
  <div class="card">
112
234
  <div class="card-header">CLI Models (${cliModels.length})</div>