@elvatis_com/openclaw-cli-bridge-elvatis 2.2.2 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,7 +10,7 @@
10
10
 
11
11
  import http from "node:http";
12
12
  import { randomBytes } from "node:crypto";
13
- import { type ChatMessage, routeToCliRunner } from "./cli-runner.js";
13
+ import { type ChatMessage, type CliToolResult, type ToolDefinition, routeToCliRunner, extractMultimodalParts, cleanupMediaFiles } from "./cli-runner.js";
14
14
  import { scheduleTokenRefresh, setAuthLogger, stopTokenRefresh } from "./claude-auth.js";
15
15
  import { grokComplete, grokCompleteStream, type ChatMessage as GrokChatMessage } from "./grok-client.js";
16
16
  import { geminiComplete, geminiCompleteStream, type ChatMessage as GeminiBrowserChatMessage } from "./gemini-browser.js";
@@ -19,6 +19,7 @@ import { chatgptComplete, chatgptCompleteStream, type ChatMessage as ChatGPTBrow
19
19
  import type { BrowserContext } from "playwright";
20
20
  import { renderStatusPage, type StatusProvider } from "./status-template.js";
21
21
  import { sessionManager } from "./session-manager.js";
22
+ import { metrics } from "./metrics.js";
22
23
 
23
24
  export type GrokCompleteOptions = Parameters<typeof grokComplete>[1];
24
25
  export type GrokCompleteStreamOptions = Parameters<typeof grokCompleteStream>[1];
@@ -222,6 +223,7 @@ async function handleRequest(
222
223
  chatgpt: sessionStatus("chatgpt", opts.getChatGPTContext, expiry.chatgpt),
223
224
  },
224
225
  models: CLI_MODELS.length,
226
+ metrics: metrics.getMetrics(),
225
227
  };
226
228
  res.writeHead(200, { "Content-Type": "application/json", ...corsHeaders() });
227
229
  res.end(JSON.stringify(health, null, 2));
@@ -240,7 +242,7 @@ async function handleRequest(
240
242
  { name: "ChatGPT", icon: "◉", expiry: expiry.chatgpt, loginCmd: "/chatgpt-login", ctx: opts.getChatGPTContext?.() ?? null },
241
243
  ];
242
244
 
243
- const html = renderStatusPage({ version, port: opts.port, providers, models: CLI_MODELS, modelCommands: opts.modelCommands });
245
+ const html = renderStatusPage({ version, port: opts.port, providers, models: CLI_MODELS, modelCommands: opts.modelCommands, metrics: metrics.getMetrics() });
244
246
  res.writeHead(200, { "Content-Type": "text/html; charset=utf-8" });
245
247
  res.end(html);
246
248
  return;
@@ -258,9 +260,8 @@ async function handleRequest(
258
260
  object: "model",
259
261
  created: now,
260
262
  owned_by: "openclaw-cli-bridge",
261
- // CLI-proxy models stream plain text — no tool/function call support
262
263
  capabilities: {
263
- tools: !(m.id.startsWith("cli-gemini/") || m.id.startsWith("cli-claude/") || m.id.startsWith("openai-codex/") || m.id.startsWith("opencode/") || m.id.startsWith("pi/") || m.id.startsWith("local-bitnet/")),
264
+ tools: !m.id.startsWith("local-bitnet/"), // all CLI models support tools via prompt injection; only bitnet is text-only
264
265
  },
265
266
  })),
266
267
  })
@@ -296,9 +297,10 @@ async function handleRequest(
296
297
  return;
297
298
  }
298
299
 
299
- const { model, messages, stream = false } = parsed as { model: string; messages: ChatMessage[]; stream?: boolean; tools?: unknown; workdir?: string };
300
+ const { model, messages, stream = false } = parsed as { model: string; messages: ChatMessage[]; stream?: boolean; tools?: ToolDefinition[]; workdir?: string };
300
301
  const workdir = (parsed as { workdir?: string }).workdir;
301
- const hasTools = Array.isArray((parsed as { tools?: unknown }).tools) && (parsed as { tools?: unknown[] }).tools!.length > 0;
302
+ const tools = (parsed as { tools?: ToolDefinition[] }).tools;
303
+ const hasTools = Array.isArray(tools) && tools.length > 0;
302
304
 
303
305
  if (!model || !messages?.length) {
304
306
  res.writeHead(400, { "Content-Type": "application/json" });
@@ -306,23 +308,10 @@ async function handleRequest(
306
308
  return;
307
309
  }
308
310
 
309
- // CLI-proxy models (cli-gemini/*, cli-claude/*) are plain text completions
310
- // they cannot process tool/function call schemas. Return a clear 400 so
311
- // OpenClaw can surface a meaningful error instead of getting a garbled response.
312
- const isCliModel = model.startsWith("cli-gemini/") || model.startsWith("cli-claude/") || model.startsWith("openai-codex/") || model.startsWith("opencode/") || model.startsWith("pi/"); // local-bitnet/* exempt: llama-server silently ignores tools
313
- if (hasTools && isCliModel) {
314
- res.writeHead(400, { "Content-Type": "application/json" });
315
- res.end(JSON.stringify({
316
- error: {
317
- message: `Model ${model} does not support tool/function calls. Use a native API model (e.g. github-copilot/gpt-5-mini) for agents that need tools.`,
318
- type: "invalid_request_error",
319
- code: "tools_not_supported",
320
- }
321
- }));
322
- return;
323
- }
311
+ // Extract multimodal content (images, audio) from messages temp files
312
+ const { cleanMessages, mediaFiles } = extractMultimodalParts(messages);
324
313
 
325
- opts.log(`[cli-bridge] ${model} · ${messages.length} msg(s) · stream=${stream}${hasTools ? " · tools=unsupported→rejected" : ""}`);
314
+ opts.log(`[cli-bridge] ${model} · ${cleanMessages.length} msg(s) · stream=${stream}${hasTools ? ` · tools=${tools!.length}` : ""}${mediaFiles.length ? ` · media=${mediaFiles.length}` : ""}`);
326
315
 
327
316
  const id = `chatcmpl-cli-${randomBytes(6).toString("hex")}`;
328
317
  const created = Math.floor(Date.now() / 1000);
@@ -344,6 +333,7 @@ async function handleRequest(
344
333
  const grokMessages = messages as GrokChatMessage[];
345
334
  const doGrokComplete = opts._grokComplete ?? grokComplete;
346
335
  const doGrokCompleteStream = opts._grokCompleteStream ?? grokCompleteStream;
336
+ const grokStart = Date.now();
347
337
  try {
348
338
  if (stream) {
349
339
  res.writeHead(200, { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", Connection: "keep-alive", ...corsHeaders() });
@@ -354,11 +344,13 @@ async function handleRequest(
354
344
  (token) => sendSseChunk(res, { id, created, model, delta: { content: token }, finish_reason: null }),
355
345
  opts.log
356
346
  );
347
+ metrics.recordRequest(model, Date.now() - grokStart, true, result.promptTokens, result.completionTokens);
357
348
  sendSseChunk(res, { id, created, model, delta: {}, finish_reason: result.finishReason });
358
349
  res.write("data: [DONE]\n\n");
359
350
  res.end();
360
351
  } else {
361
352
  const result = await doGrokComplete(grokCtx, { messages: grokMessages, model: grokModel, timeoutMs }, opts.log);
353
+ metrics.recordRequest(model, Date.now() - grokStart, true, result.promptTokens, result.completionTokens);
362
354
  res.writeHead(200, { "Content-Type": "application/json", ...corsHeaders() });
363
355
  res.end(JSON.stringify({
364
356
  id, object: "chat.completion", created, model,
@@ -367,6 +359,7 @@ async function handleRequest(
367
359
  }));
368
360
  }
369
361
  } catch (err) {
362
+ metrics.recordRequest(model, Date.now() - grokStart, false);
370
363
  const msg = (err as Error).message;
371
364
  opts.warn(`[cli-bridge] Grok error for ${model}: ${msg}`);
372
365
  if (!res.headersSent) {
@@ -393,6 +386,7 @@ async function handleRequest(
393
386
  const geminiMessages = messages as GeminiBrowserChatMessage[];
394
387
  const doGeminiComplete = opts._geminiComplete ?? geminiComplete;
395
388
  const doGeminiCompleteStream = opts._geminiCompleteStream ?? geminiCompleteStream;
389
+ const geminiStart = Date.now();
396
390
  try {
397
391
  if (stream) {
398
392
  res.writeHead(200, { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", Connection: "keep-alive", ...corsHeaders() });
@@ -403,11 +397,13 @@ async function handleRequest(
403
397
  (token) => sendSseChunk(res, { id, created, model, delta: { content: token }, finish_reason: null }),
404
398
  opts.log
405
399
  );
400
+ metrics.recordRequest(model, Date.now() - geminiStart, true);
406
401
  sendSseChunk(res, { id, created, model, delta: {}, finish_reason: result.finishReason });
407
402
  res.write("data: [DONE]\n\n");
408
403
  res.end();
409
404
  } else {
410
405
  const result = await doGeminiComplete(geminiCtx, { messages: geminiMessages, model, timeoutMs }, opts.log);
406
+ metrics.recordRequest(model, Date.now() - geminiStart, true);
411
407
  res.writeHead(200, { "Content-Type": "application/json", ...corsHeaders() });
412
408
  res.end(JSON.stringify({
413
409
  id, object: "chat.completion", created, model,
@@ -416,6 +412,7 @@ async function handleRequest(
416
412
  }));
417
413
  }
418
414
  } catch (err) {
415
+ metrics.recordRequest(model, Date.now() - geminiStart, false);
419
416
  const msg = (err as Error).message;
420
417
  opts.warn(`[cli-bridge] Gemini browser error for ${model}: ${msg}`);
421
418
  if (!res.headersSent) {
@@ -442,6 +439,7 @@ async function handleRequest(
442
439
  const claudeMessages = messages as ClaudeBrowserChatMessage[];
443
440
  const doClaudeComplete = opts._claudeComplete ?? claudeComplete;
444
441
  const doClaudeCompleteStream = opts._claudeCompleteStream ?? claudeCompleteStream;
442
+ const claudeStart = Date.now();
445
443
  try {
446
444
  if (stream) {
447
445
  res.writeHead(200, { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", Connection: "keep-alive", ...corsHeaders() });
@@ -452,11 +450,13 @@ async function handleRequest(
452
450
  (token) => sendSseChunk(res, { id, created, model, delta: { content: token }, finish_reason: null }),
453
451
  opts.log
454
452
  );
453
+ metrics.recordRequest(model, Date.now() - claudeStart, true);
455
454
  sendSseChunk(res, { id, created, model, delta: {}, finish_reason: result.finishReason });
456
455
  res.write("data: [DONE]\n\n");
457
456
  res.end();
458
457
  } else {
459
458
  const result = await doClaudeComplete(claudeCtx, { messages: claudeMessages, model, timeoutMs }, opts.log);
459
+ metrics.recordRequest(model, Date.now() - claudeStart, true);
460
460
  res.writeHead(200, { "Content-Type": "application/json", ...corsHeaders() });
461
461
  res.end(JSON.stringify({
462
462
  id, object: "chat.completion", created, model,
@@ -465,6 +465,7 @@ async function handleRequest(
465
465
  }));
466
466
  }
467
467
  } catch (err) {
468
+ metrics.recordRequest(model, Date.now() - claudeStart, false);
468
469
  const msg = (err as Error).message;
469
470
  opts.warn(`[cli-bridge] Claude browser error for ${model}: ${msg}`);
470
471
  if (!res.headersSent) {
@@ -492,6 +493,7 @@ async function handleRequest(
492
493
  const chatgptMessages = messages as ChatGPTBrowserChatMessage[];
493
494
  const doChatGPTComplete = opts._chatgptComplete ?? chatgptComplete;
494
495
  const doChatGPTCompleteStream = opts._chatgptCompleteStream ?? chatgptCompleteStream;
496
+ const chatgptStart = Date.now();
495
497
  try {
496
498
  if (stream) {
497
499
  res.writeHead(200, { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", Connection: "keep-alive", ...corsHeaders() });
@@ -502,11 +504,13 @@ async function handleRequest(
502
504
  (token) => sendSseChunk(res, { id, created, model, delta: { content: token }, finish_reason: null }),
503
505
  opts.log
504
506
  );
507
+ metrics.recordRequest(model, Date.now() - chatgptStart, true);
505
508
  sendSseChunk(res, { id, created, model, delta: {}, finish_reason: result.finishReason });
506
509
  res.write("data: [DONE]\n\n");
507
510
  res.end();
508
511
  } else {
509
512
  const result = await doChatGPTComplete(chatgptCtx, { messages: chatgptMessages, model: chatgptModel, timeoutMs }, opts.log);
513
+ metrics.recordRequest(model, Date.now() - chatgptStart, true);
510
514
  res.writeHead(200, { "Content-Type": "application/json", ...corsHeaders() });
511
515
  res.end(JSON.stringify({
512
516
  id, object: "chat.completion", created, model,
@@ -515,6 +519,7 @@ async function handleRequest(
515
519
  }));
516
520
  }
517
521
  } catch (err) {
522
+ metrics.recordRequest(model, Date.now() - chatgptStart, false);
518
523
  const msg = (err as Error).message;
519
524
  opts.warn(`[cli-bridge] ChatGPT browser error for ${model}: ${msg}`);
520
525
  if (!res.headersSent) {
@@ -559,6 +564,7 @@ async function handleRequest(
559
564
  const bitnetMessages = [{ role: "system", content: BITNET_SYSTEM }, ...truncated];
560
565
  const requestBody = JSON.stringify({ ...parsed, messages: bitnetMessages, tools: undefined });
561
566
 
567
+ const bitnetStart = Date.now();
562
568
  try {
563
569
  const targetUrl = new URL("/v1/chat/completions", bitnetUrl);
564
570
  const proxyRes = await new Promise<http.IncomingMessage>((resolve, reject) => {
@@ -579,6 +585,7 @@ async function handleRequest(
579
585
  proxyReq.end();
580
586
  });
581
587
 
588
+ metrics.recordRequest(model, Date.now() - bitnetStart, true);
582
589
  // Forward status + headers
583
590
  const fwdHeaders: Record<string, string> = { ...corsHeaders() };
584
591
  const ct = proxyRes.headers["content-type"];
@@ -590,6 +597,7 @@ async function handleRequest(
590
597
  res.writeHead(proxyRes.statusCode ?? 200, fwdHeaders);
591
598
  proxyRes.pipe(res);
592
599
  } catch (err) {
600
+ metrics.recordRequest(model, Date.now() - bitnetStart, false);
593
601
  const msg = (err as Error).message;
594
602
  if (msg.includes("ECONNREFUSED") || msg.includes("ECONNRESET") || msg.includes("ENOTFOUND")) {
595
603
  res.writeHead(503, { "Content-Type": "application/json", ...corsHeaders() });
@@ -612,64 +620,141 @@ async function handleRequest(
612
620
  }
613
621
  // ─────────────────────────────────────────────────────────────────────────
614
622
 
615
- // ── CLI runner routing (Gemini / Claude Code) ─────────────────────────────
616
- let content: string;
623
+ // ── CLI runner routing (Gemini / Claude Code / Codex) ──────────────────────
624
+ let result: CliToolResult;
617
625
  let usedModel = model;
626
+ const routeOpts = { workdir, tools: hasTools ? tools : undefined, mediaFiles: mediaFiles.length ? mediaFiles : undefined };
627
+
628
+ // ── Dynamic timeout: scale with conversation size ────────────────────────
629
+ const baseTimeout = opts.timeoutMs ?? 300_000; // 5 min default (was 120s)
630
+ const msgExtra = Math.max(0, cleanMessages.length - 10) * 2_000;
631
+ const toolExtra = (tools?.length ?? 0) * 5_000;
632
+ const effectiveTimeout = Math.min(baseTimeout + msgExtra + toolExtra, 600_000);
633
+
634
+ // ── SSE keepalive: send headers early so OpenClaw doesn't read-timeout ──
635
+ let sseHeadersSent = false;
636
+ let keepaliveInterval: ReturnType<typeof setInterval> | null = null;
637
+ if (stream) {
638
+ res.writeHead(200, {
639
+ "Content-Type": "text/event-stream",
640
+ "Cache-Control": "no-cache",
641
+ Connection: "keep-alive",
642
+ ...corsHeaders(),
643
+ });
644
+ sseHeadersSent = true;
645
+ res.write(": keepalive\n\n");
646
+ keepaliveInterval = setInterval(() => { res.write(": keepalive\n\n"); }, 15_000);
647
+ }
648
+
649
+ const cliStart = Date.now();
618
650
  try {
619
- content = await routeToCliRunner(model, messages, opts.timeoutMs ?? 120_000, { workdir });
651
+ result = await routeToCliRunner(model, cleanMessages, effectiveTimeout, routeOpts);
652
+ metrics.recordRequest(model, Date.now() - cliStart, true);
620
653
  } catch (err) {
654
+ const primaryDuration = Date.now() - cliStart;
621
655
  const msg = (err as Error).message;
622
656
  // ── Model fallback: retry once with a lighter model if configured ────
623
657
  const fallbackModel = opts.modelFallbacks?.[model];
624
658
  if (fallbackModel) {
659
+ metrics.recordRequest(model, primaryDuration, false);
625
660
  opts.warn(`[cli-bridge] ${model} failed (${msg}), falling back to ${fallbackModel}`);
661
+ const fallbackStart = Date.now();
626
662
  try {
627
- content = await routeToCliRunner(fallbackModel, messages, opts.timeoutMs ?? 120_000, { workdir });
663
+ result = await routeToCliRunner(fallbackModel, cleanMessages, effectiveTimeout, routeOpts);
664
+ metrics.recordRequest(fallbackModel, Date.now() - fallbackStart, true);
628
665
  usedModel = fallbackModel;
629
666
  opts.log(`[cli-bridge] fallback to ${fallbackModel} succeeded`);
630
667
  } catch (fallbackErr) {
668
+ metrics.recordRequest(fallbackModel, Date.now() - fallbackStart, false);
631
669
  const fallbackMsg = (fallbackErr as Error).message;
632
670
  opts.warn(`[cli-bridge] fallback ${fallbackModel} also failed: ${fallbackMsg}`);
633
- res.writeHead(500, { "Content-Type": "application/json" });
634
- res.end(JSON.stringify({ error: { message: `${model}: ${msg} | fallback ${fallbackModel}: ${fallbackMsg}`, type: "cli_error" } }));
671
+ if (sseHeadersSent) {
672
+ res.write(`data: ${JSON.stringify({ error: { message: `${model}: ${msg} | fallback ${fallbackModel}: ${fallbackMsg}`, type: "cli_error" } })}\n\n`);
673
+ res.write("data: [DONE]\n\n");
674
+ res.end();
675
+ } else {
676
+ res.writeHead(500, { "Content-Type": "application/json" });
677
+ res.end(JSON.stringify({ error: { message: `${model}: ${msg} | fallback ${fallbackModel}: ${fallbackMsg}`, type: "cli_error" } }));
678
+ }
635
679
  return;
636
680
  }
637
681
  } else {
682
+ metrics.recordRequest(model, primaryDuration, false);
638
683
  opts.warn(`[cli-bridge] CLI error for ${model}: ${msg}`);
639
- res.writeHead(500, { "Content-Type": "application/json" });
640
- res.end(JSON.stringify({ error: { message: msg, type: "cli_error" } }));
684
+ if (sseHeadersSent) {
685
+ res.write(`data: ${JSON.stringify({ error: { message: msg, type: "cli_error" } })}\n\n`);
686
+ res.write("data: [DONE]\n\n");
687
+ res.end();
688
+ } else {
689
+ res.writeHead(500, { "Content-Type": "application/json" });
690
+ res.end(JSON.stringify({ error: { message: msg, type: "cli_error" } }));
691
+ }
641
692
  return;
642
693
  }
694
+ } finally {
695
+ if (keepaliveInterval) clearInterval(keepaliveInterval);
696
+ cleanupMediaFiles(mediaFiles);
643
697
  }
644
698
 
645
- if (stream) {
646
- res.writeHead(200, {
647
- "Content-Type": "text/event-stream",
648
- "Cache-Control": "no-cache",
649
- Connection: "keep-alive",
650
- ...corsHeaders(),
651
- });
699
+ const hasToolCalls = !!(result.tool_calls?.length);
700
+ const finishReason = hasToolCalls ? "tool_calls" : "stop";
652
701
 
653
- // Role chunk
654
- sendSseChunk(res, { id, created, model: usedModel, delta: { role: "assistant" }, finish_reason: null });
702
+ if (stream) {
703
+ // SSE headers already sent above stream response chunks directly
655
704
 
656
- // Content in chunks (~50 chars each for natural feel)
657
- const chunkSize = 50;
658
- for (let i = 0; i < content.length; i += chunkSize) {
705
+ if (hasToolCalls) {
706
+ // Stream tool_calls in OpenAI SSE format
707
+ const toolCalls = result.tool_calls!;
708
+ // Role chunk with all tool_calls (name + empty arguments)
659
709
  sendSseChunk(res, {
660
- id,
661
- created,
662
- model: usedModel,
663
- delta: { content: content.slice(i, i + chunkSize) },
710
+ id, created, model: usedModel,
711
+ delta: {
712
+ role: "assistant",
713
+ tool_calls: toolCalls.map((tc, idx) => ({
714
+ index: idx, id: tc.id, type: "function",
715
+ function: { name: tc.function.name, arguments: "" },
716
+ })),
717
+ },
664
718
  finish_reason: null,
665
719
  });
720
+ // Arguments chunks (one per tool call)
721
+ for (let idx = 0; idx < toolCalls.length; idx++) {
722
+ sendSseChunk(res, {
723
+ id, created, model: usedModel,
724
+ delta: {
725
+ tool_calls: [{ index: idx, function: { arguments: toolCalls[idx].function.arguments } }],
726
+ },
727
+ finish_reason: null,
728
+ });
729
+ }
730
+ // Stop chunk
731
+ sendSseChunk(res, { id, created, model: usedModel, delta: {}, finish_reason: "tool_calls" });
732
+ } else {
733
+ // Standard text streaming
734
+ sendSseChunk(res, { id, created, model: usedModel, delta: { role: "assistant" }, finish_reason: null });
735
+ const content = result.content ?? "";
736
+ const chunkSize = 50;
737
+ for (let i = 0; i < content.length; i += chunkSize) {
738
+ sendSseChunk(res, {
739
+ id, created, model: usedModel,
740
+ delta: { content: content.slice(i, i + chunkSize) },
741
+ finish_reason: null,
742
+ });
743
+ }
744
+ sendSseChunk(res, { id, created, model: usedModel, delta: {}, finish_reason: "stop" });
666
745
  }
667
746
 
668
- // Stop chunk
669
- sendSseChunk(res, { id, created, model: usedModel, delta: {}, finish_reason: "stop" });
670
747
  res.write("data: [DONE]\n\n");
671
748
  res.end();
672
749
  } else {
750
+ const message: Record<string, unknown> = { role: "assistant" };
751
+ if (hasToolCalls) {
752
+ message.content = null;
753
+ message.tool_calls = result.tool_calls;
754
+ } else {
755
+ message.content = result.content;
756
+ }
757
+
673
758
  const response = {
674
759
  id,
675
760
  object: "chat.completion",
@@ -678,8 +763,8 @@ async function handleRequest(
678
763
  choices: [
679
764
  {
680
765
  index: 0,
681
- message: { role: "assistant", content },
682
- finish_reason: "stop",
766
+ message,
767
+ finish_reason: finishReason,
683
768
  },
684
769
  ],
685
770
  usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
@@ -6,6 +6,7 @@
6
6
  */
7
7
 
8
8
  import type { BrowserContext } from "playwright";
9
+ import type { MetricsSnapshot } from "./metrics.js";
9
10
 
10
11
  export interface StatusProvider {
11
12
  name: string;
@@ -22,6 +23,8 @@ export interface StatusTemplateOptions {
22
23
  models: Array<{ id: string; name: string; contextWindow: number; maxTokens: number }>;
23
24
  /** Maps model ID → slash command name (e.g. "openai-codex/gpt-5.3-codex" → "/cli-codex") */
24
25
  modelCommands?: Record<string, string>;
26
+ /** In-memory metrics snapshot — optional for backward compat */
27
+ metrics?: MetricsSnapshot;
25
28
  }
26
29
 
27
30
  function statusBadge(p: StatusProvider): { label: string; color: string; dot: string } {
@@ -32,6 +35,114 @@ function statusBadge(p: StatusProvider): { label: string; color: string; dot: st
32
35
  return { label: "Logged in", color: "#3b82f6", dot: "🔵" };
33
36
  }
34
37
 
38
+ // ── Formatting helpers ──────────────────────────────────────────────────────
39
+
40
+ function formatDuration(ms: number): string {
41
+ if (ms < 1000) return `${Math.round(ms)}ms`;
42
+ if (ms < 60_000) return `${(ms / 1000).toFixed(1)}s`;
43
+ return `${(ms / 60_000).toFixed(1)}m`;
44
+ }
45
+
46
+ function formatTokens(n: number): string {
47
+ if (n === 0) return "—";
48
+ if (n < 1000) return String(n);
49
+ if (n < 1_000_000) return `${(n / 1000).toFixed(1)}k`;
50
+ return `${(n / 1_000_000).toFixed(2)}M`;
51
+ }
52
+
53
+ function timeAgo(epochMs: number | null): string {
54
+ if (!epochMs) return "—";
55
+ const diff = Date.now() - epochMs;
56
+ if (diff < 60_000) return "just now";
57
+ if (diff < 3_600_000) return `${Math.floor(diff / 60_000)}m ago`;
58
+ if (diff < 86_400_000) return `${Math.floor(diff / 3_600_000)}h ago`;
59
+ return `${Math.floor(diff / 86_400_000)}d ago`;
60
+ }
61
+
62
+ function formatUptime(startedAt: number): string {
63
+ const diff = Date.now() - startedAt;
64
+ const s = Math.floor(diff / 1000);
65
+ if (s < 60) return `${s}s`;
66
+ const m = Math.floor(s / 60);
67
+ if (m < 60) return `${m}m ${s % 60}s`;
68
+ const h = Math.floor(m / 60);
69
+ if (h < 24) return `${h}h ${m % 60}m`;
70
+ const d = Math.floor(h / 24);
71
+ return `${d}d ${h % 24}h`;
72
+ }
73
+
74
+ function escapeHtml(s: string): string {
75
+ return s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
76
+ }
77
+
78
+ // ── Metrics sections ────────────────────────────────────────────────────────
79
+
80
+ function renderMetricsSection(m: MetricsSnapshot): string {
81
+ const errorRate = m.totalRequests > 0 ? ((m.totalErrors / m.totalRequests) * 100).toFixed(1) : "0.0";
82
+ const totalTokens = m.models.reduce((sum, mod) => sum + mod.promptTokens + mod.completionTokens, 0);
83
+
84
+ // Summary cards
85
+ const summaryCards = `
86
+ <div class="summary-grid">
87
+ <div class="summary-card">
88
+ <div class="summary-value">${m.totalRequests}</div>
89
+ <div class="summary-label">Total Requests</div>
90
+ </div>
91
+ <div class="summary-card">
92
+ <div class="summary-value" style="color:${m.totalErrors > 0 ? '#ef4444' : '#22c55e'}">${errorRate}%</div>
93
+ <div class="summary-label">Error Rate</div>
94
+ </div>
95
+ <div class="summary-card">
96
+ <div class="summary-value">${formatTokens(totalTokens)}</div>
97
+ <div class="summary-label">Total Tokens</div>
98
+ </div>
99
+ <div class="summary-card">
100
+ <div class="summary-value">${formatUptime(m.startedAt)}</div>
101
+ <div class="summary-label">Uptime</div>
102
+ </div>
103
+ </div>`;
104
+
105
+ // Per-model stats table
106
+ let modelRows: string;
107
+ if (m.models.length === 0) {
108
+ modelRows = `<tr><td colspan="6" style="padding:16px;color:#6b7280;text-align:center;font-style:italic">No requests recorded yet.</td></tr>`;
109
+ } else {
110
+ modelRows = m.models.map(mod => {
111
+ const avgLatency = mod.requests > 0 ? mod.totalLatencyMs / mod.requests : 0;
112
+ const modErrorRate = mod.requests > 0 ? ((mod.errors / mod.requests) * 100).toFixed(1) : "0.0";
113
+ return `
114
+ <tr>
115
+ <td class="metrics-cell"><code style="color:#93c5fd">${escapeHtml(mod.model)}</code></td>
116
+ <td class="metrics-cell" style="text-align:right">${mod.requests}</td>
117
+ <td class="metrics-cell" style="text-align:right;color:${mod.errors > 0 ? '#ef4444' : '#6b7280'}">${mod.errors} <span style="color:#6b7280;font-size:11px">(${modErrorRate}%)</span></td>
118
+ <td class="metrics-cell" style="text-align:right">${formatDuration(avgLatency)}</td>
119
+ <td class="metrics-cell" style="text-align:right">${formatTokens(mod.promptTokens)} / ${formatTokens(mod.completionTokens)}</td>
120
+ <td class="metrics-cell" style="text-align:right;color:#9ca3af">${timeAgo(mod.lastRequestAt)}</td>
121
+ </tr>`;
122
+ }).join("");
123
+ }
124
+
125
+ const modelTable = `
126
+ <div class="card">
127
+ <div class="card-header">Per-Model Stats</div>
128
+ <table class="metrics-table">
129
+ <thead>
130
+ <tr style="background:#13151f">
131
+ <th class="metrics-th" style="text-align:left">Model</th>
132
+ <th class="metrics-th" style="text-align:right">Requests</th>
133
+ <th class="metrics-th" style="text-align:right">Errors</th>
134
+ <th class="metrics-th" style="text-align:right">Avg Latency</th>
135
+ <th class="metrics-th" style="text-align:right">Tokens (in/out)</th>
136
+ <th class="metrics-th" style="text-align:right">Last Request</th>
137
+ </tr>
138
+ </thead>
139
+ <tbody>${modelRows}</tbody>
140
+ </table>
141
+ </div>`;
142
+
143
+ return summaryCards + modelTable;
144
+ }
145
+
35
146
  export function renderStatusPage(opts: StatusTemplateOptions): string {
36
147
  const { version, port, providers, models } = opts;
37
148
 
@@ -66,6 +177,8 @@ export function renderStatusPage(opts: StatusTemplateOptions): string {
66
177
  return `<li style="margin:2px 0;font-size:13px;color:#d1d5db"><code style="color:#93c5fd">${m.id}</code>${cmdBadge}</li>`;
67
178
  }).join("");
68
179
 
180
+ const metricsHtml = opts.metrics ? renderMetricsSection(opts.metrics) : "";
181
+
69
182
  return `<!DOCTYPE html>
70
183
  <html lang="en">
71
184
  <head>
@@ -86,6 +199,13 @@ export function renderStatusPage(opts: StatusTemplateOptions): string {
86
199
  ul { list-style: none; padding: 12px 16px; }
87
200
  .footer { color: #374151; font-size: 12px; text-align: center; margin-top: 16px; }
88
201
  code { background: #1e2130; padding: 1px 5px; border-radius: 4px; }
202
+ .summary-grid { display: grid; grid-template-columns: repeat(4, 1fr); gap: 16px; margin-bottom: 24px; }
203
+ .summary-card { background: #1a1d27; border: 1px solid #2d3148; border-radius: 12px; padding: 20px 16px; text-align: center; }
204
+ .summary-value { font-size: 28px; font-weight: 700; color: #f9fafb; margin-bottom: 4px; }
205
+ .summary-label { font-size: 12px; color: #6b7280; text-transform: uppercase; letter-spacing: 0.05em; }
206
+ .metrics-table { width: 100%; border-collapse: collapse; }
207
+ .metrics-th { padding: 10px 16px; font-size: 12px; color: #4b5563; font-weight: 600; }
208
+ .metrics-cell { padding: 10px 16px; font-size: 13px; }
89
209
  </style>
90
210
  </head>
91
211
  <body>
@@ -107,6 +227,8 @@ export function renderStatusPage(opts: StatusTemplateOptions): string {
107
227
  </table>
108
228
  </div>
109
229
 
230
+ ${metricsHtml}
231
+
110
232
  <div class="models">
111
233
  <div class="card">
112
234
  <div class="card-header">CLI Models (${cliModels.length})</div>