@oh-my-pi/pi-coding-agent 15.11.4 → 15.11.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/CHANGELOG.md +82 -1
  2. package/dist/cli.js +520 -451
  3. package/dist/types/cli/bench-cli.d.ts +78 -0
  4. package/dist/types/cli/usage-cli.d.ts +10 -1
  5. package/dist/types/commands/bench.d.ts +29 -0
  6. package/dist/types/commands/usage.d.ts +9 -0
  7. package/dist/types/config/model-resolver.d.ts +3 -2
  8. package/dist/types/config/settings-schema.d.ts +125 -3
  9. package/dist/types/edit/renderer.d.ts +1 -0
  10. package/dist/types/modes/components/oauth-selector.d.ts +10 -1
  11. package/dist/types/modes/components/reset-usage-selector.d.ts +12 -0
  12. package/dist/types/modes/components/session-selector.d.ts +1 -1
  13. package/dist/types/modes/components/settings-selector.d.ts +8 -1
  14. package/dist/types/modes/components/snapcompact-shape-preview.d.ts +31 -0
  15. package/dist/types/modes/components/tool-execution.d.ts +18 -0
  16. package/dist/types/modes/controllers/selector-controller.d.ts +1 -0
  17. package/dist/types/modes/interactive-mode.d.ts +10 -0
  18. package/dist/types/modes/session-observer-registry.d.ts +2 -0
  19. package/dist/types/modes/setup-wizard/scenes/sign-in.d.ts +3 -0
  20. package/dist/types/modes/setup-wizard/scenes/types.d.ts +10 -1
  21. package/dist/types/modes/setup-wizard/scenes/web-search.d.ts +3 -0
  22. package/dist/types/modes/types.d.ts +2 -0
  23. package/dist/types/modes/utils/context-usage.d.ts +6 -1
  24. package/dist/types/session/agent-session.d.ts +14 -1
  25. package/dist/types/session/auth-storage.d.ts +1 -1
  26. package/dist/types/session/codex-auto-reset.d.ts +107 -0
  27. package/dist/types/session/snapcompact-inline.d.ts +107 -4
  28. package/dist/types/slash-commands/helpers/reset-usage.d.ts +27 -0
  29. package/dist/types/task/render.d.ts +1 -0
  30. package/dist/types/tools/bash.d.ts +2 -0
  31. package/dist/types/tools/eval-render.d.ts +1 -0
  32. package/dist/types/tools/renderers.d.ts +13 -0
  33. package/dist/types/tools/ssh.d.ts +1 -0
  34. package/dist/types/tools/todo.d.ts +0 -11
  35. package/package.json +11 -11
  36. package/src/cli/bench-cli.ts +437 -0
  37. package/src/cli/usage-cli.ts +187 -16
  38. package/src/cli-commands.ts +1 -0
  39. package/src/commands/bench.ts +42 -0
  40. package/src/commands/usage.ts +8 -0
  41. package/src/config/model-registry.ts +52 -5
  42. package/src/config/model-resolver.ts +36 -5
  43. package/src/config/settings-schema.ts +148 -3
  44. package/src/config/settings.ts +9 -0
  45. package/src/edit/renderer.ts +5 -0
  46. package/src/hindsight/client.ts +26 -1
  47. package/src/hindsight/state.ts +6 -2
  48. package/src/internal-urls/docs-index.generated.ts +2 -2
  49. package/src/mcp/transports/stdio.ts +81 -7
  50. package/src/modes/components/oauth-selector.ts +67 -7
  51. package/src/modes/components/reset-usage-selector.ts +161 -0
  52. package/src/modes/components/session-selector.ts +8 -2
  53. package/src/modes/components/settings-selector.ts +89 -47
  54. package/src/modes/components/snapcompact-shape-preview-doc.md +11 -0
  55. package/src/modes/components/snapcompact-shape-preview.ts +192 -0
  56. package/src/modes/components/tool-execution.ts +26 -0
  57. package/src/modes/components/transcript-container.ts +23 -1
  58. package/src/modes/controllers/command-controller.ts +24 -1
  59. package/src/modes/controllers/input-controller.ts +8 -6
  60. package/src/modes/controllers/selector-controller.ts +72 -2
  61. package/src/modes/interactive-mode.ts +83 -0
  62. package/src/modes/session-observer-registry.ts +61 -3
  63. package/src/modes/setup-wizard/index.ts +1 -0
  64. package/src/modes/setup-wizard/scenes/glyph.ts +24 -6
  65. package/src/modes/setup-wizard/scenes/providers.ts +36 -2
  66. package/src/modes/setup-wizard/scenes/sign-in.ts +10 -1
  67. package/src/modes/setup-wizard/scenes/theme.ts +28 -1
  68. package/src/modes/setup-wizard/scenes/types.ts +10 -1
  69. package/src/modes/setup-wizard/scenes/web-search.ts +22 -6
  70. package/src/modes/setup-wizard/wizard-overlay.ts +38 -1
  71. package/src/modes/theme/theme.ts +2 -2
  72. package/src/modes/types.ts +2 -0
  73. package/src/modes/utils/context-usage.ts +75 -1
  74. package/src/prompts/bench.md +7 -0
  75. package/src/prompts/system/snapcompact-context-frames-note.md +1 -0
  76. package/src/prompts/system/snapcompact-context-stub.md +1 -0
  77. package/src/prompts/system/snapcompact-toolresult-note.md +1 -1
  78. package/src/prompts/tools/browser.md +33 -43
  79. package/src/prompts/tools/eval.md +27 -50
  80. package/src/prompts/tools/irc.md +29 -31
  81. package/src/prompts/tools/read.md +31 -37
  82. package/src/prompts/tools/todo.md +1 -2
  83. package/src/sdk.ts +4 -2
  84. package/src/session/agent-session.ts +136 -6
  85. package/src/session/auth-storage.ts +3 -0
  86. package/src/session/codex-auto-reset.ts +190 -0
  87. package/src/session/snapcompact-inline.ts +404 -75
  88. package/src/slash-commands/builtin-registry.ts +145 -8
  89. package/src/slash-commands/helpers/context-report.ts +28 -1
  90. package/src/slash-commands/helpers/reset-usage.ts +66 -0
  91. package/src/slash-commands/helpers/usage-report.ts +12 -0
  92. package/src/task/index.ts +30 -7
  93. package/src/task/render.ts +34 -19
  94. package/src/tools/bash.ts +3 -0
  95. package/src/tools/eval-render.ts +4 -0
  96. package/src/tools/renderers.ts +13 -0
  97. package/src/tools/ssh.ts +3 -0
  98. package/src/tools/todo.ts +8 -128
@@ -0,0 +1,437 @@
1
+ import type { ResolvedThinkingLevel } from "@oh-my-pi/pi-agent-core";
2
+ import type {
3
+ Api,
4
+ ApiKeyResolver,
5
+ AssistantMessage,
6
+ AssistantMessageEvent,
7
+ AssistantMessageEventStream,
8
+ Context,
9
+ Effort,
10
+ Model,
11
+ SimpleStreamOptions,
12
+ } from "@oh-my-pi/pi-ai";
13
+ import { streamSimple } from "@oh-my-pi/pi-ai";
14
+ import type { CanonicalModelVariant } from "@oh-my-pi/pi-catalog/identity";
15
+ import { replaceTabs, truncateToWidth } from "@oh-my-pi/pi-tui";
16
+ import { formatDuration, getProjectDir } from "@oh-my-pi/pi-utils";
17
+ import chalk from "chalk";
18
+ import type { ApiKeyResolverModel } from "../config/api-key-resolver";
19
+ import { type CanonicalModelQueryOptions, ModelRegistry } from "../config/model-registry";
20
+ import { formatModelString, getModelMatchPreferences, resolveCliModel } from "../config/model-resolver";
21
+ import { Settings } from "../config/settings";
22
+ import benchPrompt from "../prompts/bench.md" with { type: "text" };
23
+ import { discoverAuthStorage } from "../sdk";
24
+ import { resolveThinkingLevelForModel, shouldDisableReasoning, toReasoningEffort } from "../thinking";
25
+
26
+ const DEFAULT_RUNS = 1;
27
+ const DEFAULT_MAX_TOKENS = 512;
28
+ const ERROR_WIDTH = 110;
29
+ const BENCH_PROMPT = benchPrompt.trim();
30
+
31
+ export interface BenchCommandArgs {
32
+ models: string[];
33
+ flags: {
34
+ runs?: number;
35
+ maxTokens?: number;
36
+ prompt?: string;
37
+ json?: boolean;
38
+ };
39
+ }
40
+
41
+ export interface BenchModelRegistry {
42
+ getAll(): Model<Api>[];
43
+ getApiKey(model: Model<Api>, sessionId?: string): Promise<string | undefined>;
44
+ resolver(model: ApiKeyResolverModel, sessionId?: string): ApiKeyResolver;
45
+ resolveCanonicalModel?(canonicalId: string, options?: CanonicalModelQueryOptions): Model<Api> | undefined;
46
+ getCanonicalVariants?(canonicalId: string, options?: CanonicalModelQueryOptions): CanonicalModelVariant[];
47
+ getCanonicalId?(model: Model<Api>): string | undefined;
48
+ }
49
+
50
+ export interface BenchRuntime {
51
+ modelRegistry: BenchModelRegistry;
52
+ settings?: Settings;
53
+ close?: () => void;
54
+ }
55
+
56
+ export interface BenchRunSuccess {
57
+ ok: true;
58
+ ttftMs: number;
59
+ durationMs: number;
60
+ outputTokens: number;
61
+ /** Generation throughput measured over the post-first-token window. */
62
+ tokensPerSecond: number;
63
+ }
64
+
65
+ export interface BenchRunFailure {
66
+ ok: false;
67
+ error: string;
68
+ }
69
+
70
+ export type BenchRunResult = BenchRunSuccess | BenchRunFailure;
71
+
72
+ export interface BenchAverages {
73
+ ttftMs: number;
74
+ durationMs: number;
75
+ outputTokens: number;
76
+ tokensPerSecond: number;
77
+ }
78
+
79
+ export interface BenchModelReport {
80
+ /** Selector as the user typed it (e.g. "opus" or "gemini-3.5:low"). */
81
+ selector: string;
82
+ /** Resolved `provider/id`. */
83
+ model: string;
84
+ /** Explicit thinking level from a `:level` selector suffix; undefined = provider default. */
85
+ thinking?: ResolvedThinkingLevel;
86
+ results: BenchRunResult[];
87
+ /** Averages over successful runs; null when every run failed. */
88
+ average: BenchAverages | null;
89
+ }
90
+
91
+ export interface BenchSummary {
92
+ runs: number;
93
+ maxTokens: number;
94
+ models: BenchModelReport[];
95
+ failures: number;
96
+ }
97
+
98
+ type BenchStreamSimple = (
99
+ model: Model<Api>,
100
+ context: Context,
101
+ options?: SimpleStreamOptions,
102
+ ) => AssistantMessageEventStream;
103
+
104
+ export interface BenchDependencies {
105
+ createRuntime?: () => Promise<BenchRuntime>;
106
+ randomSessionId?: () => string;
107
+ writeStdout?: (text: string) => void;
108
+ writeStderr?: (text: string) => void;
109
+ setExitCode?: (code: number) => void;
110
+ streamSimple?: BenchStreamSimple;
111
+ now?: () => number;
112
+ stdoutIsTTY?: boolean;
113
+ }
114
+
115
+ function getErrorMessage(error: unknown): string {
116
+ if (error instanceof Error && error.message) return error.message;
117
+ return String(error);
118
+ }
119
+
120
+ function normalizePositiveInteger(name: string, value: number | undefined, fallback: number): number {
121
+ if (value === undefined) return fallback;
122
+ if (!Number.isInteger(value) || value <= 0) {
123
+ throw new Error(`Expected --${name} to be a positive integer, got ${value}`);
124
+ }
125
+ return value;
126
+ }
127
+
128
+ function isFirstTokenEvent(event: AssistantMessageEvent): boolean {
129
+ switch (event.type) {
130
+ case "text_delta":
131
+ case "thinking_delta":
132
+ case "toolcall_delta":
133
+ return event.delta.length > 0;
134
+ case "text_end":
135
+ case "thinking_end":
136
+ return event.content.length > 0;
137
+ default:
138
+ return false;
139
+ }
140
+ }
141
+
142
+ /**
143
+ * Tokens/s over the generation window (duration minus TTFT) so queue/prefill
144
+ * latency does not dilute throughput. Falls back to total duration when the
145
+ * response arrived as a single chunk (TTFT ~ duration).
146
+ */
147
+ function computeTokensPerSecond(outputTokens: number, durationMs: number, ttftMs: number): number {
148
+ const decodeMs = durationMs - ttftMs;
149
+ const windowMs = decodeMs > 0 ? decodeMs : durationMs;
150
+ return windowMs > 0 ? (outputTokens * 1000) / windowMs : 0;
151
+ }
152
+
153
+ interface BenchRequestOptions {
154
+ apiKey: ApiKeyResolver;
155
+ sessionId: string;
156
+ prompt: string;
157
+ maxTokens: number;
158
+ /** Explicit effort from a `:level` selector suffix; absent = provider default. */
159
+ reasoning?: Effort;
160
+ /** Only set for an explicit `:off` suffix — some endpoints reject disablement. */
161
+ disableReasoning?: boolean;
162
+ }
163
+
164
+ async function runBenchRequest(
165
+ model: Model<Api>,
166
+ options: BenchRequestOptions,
167
+ streamFn: BenchStreamSimple,
168
+ now: () => number,
169
+ ): Promise<BenchRunResult> {
170
+ const startedAt = now();
171
+ let firstTokenAt: number | undefined;
172
+ try {
173
+ const context: Context = {
174
+ // Codex's Responses endpoint 400s with "Instructions are required" when no
175
+ // system prompt is present — same guard as eval's completion bridge.
176
+ systemPrompt: ["You are a helpful assistant."],
177
+ messages: [{ role: "user", content: options.prompt, timestamp: Date.now(), attribution: "user" }],
178
+ };
179
+ const stream = streamFn(model, context, {
180
+ apiKey: options.apiKey,
181
+ sessionId: options.sessionId,
182
+ maxTokens:
183
+ Number.isFinite(model.maxTokens) && model.maxTokens > 0
184
+ ? Math.min(options.maxTokens, model.maxTokens)
185
+ : options.maxTokens,
186
+ reasoning: options.reasoning,
187
+ disableReasoning: options.disableReasoning,
188
+ // pi-ai opts every OpenRouter request into response caching (1h TTL).
189
+ // Bench sends a byte-identical request each run, so within the TTL
190
+ // OpenRouter replays the cached generation with zeroed usage — the run
191
+ // shows "tokens 0, TPS 0.0" at line speed. Opt back out so every run
192
+ // measures a fresh generation.
193
+ headers: model.provider === "openrouter" ? { "X-OpenRouter-Cache": "false" } : undefined,
194
+ });
195
+ let message: AssistantMessage | undefined;
196
+ for await (const event of stream) {
197
+ if (firstTokenAt === undefined && isFirstTokenEvent(event)) {
198
+ firstTokenAt = now();
199
+ }
200
+ if (event.type === "error") {
201
+ return { ok: false, error: event.error.errorMessage ?? "request failed" };
202
+ }
203
+ if (event.type === "done") {
204
+ message = event.message;
205
+ }
206
+ }
207
+ message ??= await stream.result();
208
+ if (message.stopReason === "error" || message.errorMessage) {
209
+ return { ok: false, error: message.errorMessage ?? "request failed" };
210
+ }
211
+ const rawDuration = message.duration ?? now() - startedAt;
212
+ const durationMs = Number.isFinite(rawDuration) && rawDuration > 0 ? rawDuration : 0;
213
+ const rawTtft = message.ttft ?? (firstTokenAt === undefined ? durationMs : firstTokenAt - startedAt);
214
+ const ttftMs = Number.isFinite(rawTtft) && rawTtft > 0 ? rawTtft : 0;
215
+ const outputTokens = Number.isFinite(message.usage.output) && message.usage.output > 0 ? message.usage.output : 0;
216
+ return {
217
+ ok: true,
218
+ ttftMs,
219
+ durationMs,
220
+ outputTokens,
221
+ tokensPerSecond: computeTokensPerSecond(outputTokens, durationMs, ttftMs),
222
+ };
223
+ } catch (error) {
224
+ return { ok: false, error: getErrorMessage(error) };
225
+ }
226
+ }
227
+
228
+ function buildModelReport(
229
+ selector: string,
230
+ model: Model<Api>,
231
+ thinking: ResolvedThinkingLevel | undefined,
232
+ results: BenchRunResult[],
233
+ ): BenchModelReport {
234
+ const successes = results.filter((result): result is BenchRunSuccess => result.ok);
235
+ const average =
236
+ successes.length === 0
237
+ ? null
238
+ : {
239
+ ttftMs: successes.reduce((sum, r) => sum + r.ttftMs, 0) / successes.length,
240
+ durationMs: successes.reduce((sum, r) => sum + r.durationMs, 0) / successes.length,
241
+ outputTokens: successes.reduce((sum, r) => sum + r.outputTokens, 0) / successes.length,
242
+ tokensPerSecond: successes.reduce((sum, r) => sum + r.tokensPerSecond, 0) / successes.length,
243
+ };
244
+ return { selector, model: formatModelString(model), thinking, results, average };
245
+ }
246
+
247
+ function formatMs(ms: number): string {
248
+ return formatDuration(Math.max(0, Math.round(ms)));
249
+ }
250
+
251
+ function formatRunLine(result: BenchRunResult, index: number, total: number): string {
252
+ const prefix = chalk.dim(`run ${index + 1}/${total}`);
253
+ if (result.ok) {
254
+ return ` ${chalk.green("✓")} ${prefix} ${chalk.dim("TTFT")} ${formatMs(result.ttftMs)} ${chalk.dim("TPS")} ${result.tokensPerSecond.toFixed(1)}/s ${chalk.dim("tokens")} ${result.outputTokens} ${chalk.dim("total")} ${formatMs(result.durationMs)}`;
255
+ }
256
+ return ` ${chalk.red("✗")} ${prefix} ${chalk.red(truncateToWidth(replaceTabs(result.error).replace(/\r?\n/g, " "), ERROR_WIDTH))}`;
257
+ }
258
+
259
+ export function formatBenchTable(summary: BenchSummary): string {
260
+ const ranked = [...summary.models].sort((a, b) => {
261
+ if (a.average === null && b.average === null) return 0;
262
+ if (a.average === null) return 1;
263
+ if (b.average === null) return -1;
264
+ return b.average.tokensPerSecond - a.average.tokensPerSecond;
265
+ });
266
+ const rows = ranked.map(report => ({
267
+ model: report.model,
268
+ ttft: report.average ? formatMs(report.average.ttftMs) : "-",
269
+ tps: report.average ? `${report.average.tokensPerSecond.toFixed(1)}/s` : "-",
270
+ tokens: report.average ? String(Math.round(report.average.outputTokens)) : "-",
271
+ total: report.average ? formatMs(report.average.durationMs) : "-",
272
+ failed: report.results.filter(result => !result.ok).length,
273
+ }));
274
+ const headers = { model: "model", ttft: "TTFT", tps: "TPS", tokens: "tokens", total: "total" } as const;
275
+ const width = (key: keyof typeof headers): number =>
276
+ Math.max(headers[key].length, ...rows.map(row => row[key].length));
277
+ const lines = [
278
+ [
279
+ headers.model.padEnd(width("model")),
280
+ headers.ttft.padEnd(width("ttft")),
281
+ headers.tps.padEnd(width("tps")),
282
+ headers.tokens.padEnd(width("tokens")),
283
+ headers.total.padEnd(width("total")),
284
+ ]
285
+ .join(" ")
286
+ .trimEnd(),
287
+ ];
288
+ for (const row of rows) {
289
+ const failedSuffix = row.failed > 0 ? ` ${chalk.red(`(${row.failed} failed)`)}` : "";
290
+ lines.push(
291
+ [
292
+ row.model.padEnd(width("model")),
293
+ row.ttft.padEnd(width("ttft")),
294
+ row.tps.padEnd(width("tps")),
295
+ row.tokens.padEnd(width("tokens")),
296
+ row.total.padEnd(width("total")),
297
+ ]
298
+ .join(" ")
299
+ .trimEnd() + failedSuffix,
300
+ );
301
+ }
302
+ return `${lines.map((line, index) => (index === 0 ? chalk.dim(line) : line)).join("\n")}\n`;
303
+ }
304
+
305
+ async function createDefaultRuntime(): Promise<BenchRuntime> {
306
+ const authStorage = await discoverAuthStorage();
307
+ try {
308
+ const settings = await Settings.init({ cwd: getProjectDir() });
309
+ const modelRegistry = new ModelRegistry(authStorage);
310
+ return {
311
+ modelRegistry,
312
+ settings,
313
+ close: () => authStorage.close(),
314
+ };
315
+ } catch (error) {
316
+ authStorage.close();
317
+ throw error;
318
+ }
319
+ }
320
+
321
+ interface BenchTarget {
322
+ selector: string;
323
+ model: Model<Api>;
324
+ thinking: ResolvedThinkingLevel | undefined;
325
+ }
326
+
327
+ function resolveBenchModels(
328
+ selectors: string[],
329
+ modelRegistry: BenchModelRegistry,
330
+ settings: Settings | undefined,
331
+ writeStderr: (text: string) => void,
332
+ ): BenchTarget[] {
333
+ const preferences = getModelMatchPreferences(settings);
334
+ const resolved: BenchTarget[] = [];
335
+ const errors: string[] = [];
336
+ for (const selector of selectors) {
337
+ const result = resolveCliModel({ cliModel: selector, modelRegistry, preferences });
338
+ if (result.error) {
339
+ errors.push(`${selector}: ${result.error}`);
340
+ continue;
341
+ }
342
+ if (!result.model) {
343
+ errors.push(`${selector}: model not found`);
344
+ continue;
345
+ }
346
+ if (result.warning) writeStderr(`${chalk.yellow(`Warning: ${result.warning}`)}\n`);
347
+ resolved.push({
348
+ selector,
349
+ model: result.model,
350
+ thinking: resolveThinkingLevelForModel(result.model, result.thinkingLevel),
351
+ });
352
+ }
353
+ if (errors.length > 0) {
354
+ throw new Error(`Could not resolve ${errors.length === 1 ? "model" : "models"}:\n${errors.join("\n")}`);
355
+ }
356
+ return resolved;
357
+ }
358
+
359
+ export async function runBenchCommand(command: BenchCommandArgs, deps: BenchDependencies = {}): Promise<BenchSummary> {
360
+ const runs = normalizePositiveInteger("runs", command.flags.runs, DEFAULT_RUNS);
361
+ const maxTokens = normalizePositiveInteger("max-tokens", command.flags.maxTokens, DEFAULT_MAX_TOKENS);
362
+ const prompt = command.flags.prompt?.trim() || BENCH_PROMPT;
363
+ const json = command.flags.json === true;
364
+ const randomSessionId = deps.randomSessionId ?? (() => Bun.randomUUIDv7());
365
+ const writeStdout = deps.writeStdout ?? ((text: string) => process.stdout.write(text));
366
+ const writeStderr = deps.writeStderr ?? ((text: string) => process.stderr.write(text));
367
+ const setExitCode =
368
+ deps.setExitCode ??
369
+ ((code: number) => {
370
+ process.exitCode = code;
371
+ });
372
+ const streamFn = deps.streamSimple ?? streamSimple;
373
+ const now = deps.now ?? (() => performance.now());
374
+ const interactive = deps.stdoutIsTTY ?? process.stdout.isTTY === true;
375
+ if (command.models.length === 0) {
376
+ throw new Error("Pass at least one model selector, e.g. `omp bench opus gpt-5.2`");
377
+ }
378
+
379
+ const runtime = await (deps.createRuntime ?? createDefaultRuntime)();
380
+ try {
381
+ const targets = resolveBenchModels(command.models, runtime.modelRegistry, runtime.settings, writeStderr);
382
+ const reports: BenchModelReport[] = [];
383
+ for (const { selector, model, thinking } of targets) {
384
+ if (!json) {
385
+ const resolvedNote = selector === formatModelString(model) ? "" : chalk.dim(` (${selector})`);
386
+ writeStdout(`${chalk.bold(formatModelString(model))}${resolvedNote}\n`);
387
+ }
388
+ const results: BenchRunResult[] = [];
389
+ for (let index = 0; index < runs; index++) {
390
+ const sessionId = randomSessionId();
391
+ const initialKey = await runtime.modelRegistry.getApiKey(model, sessionId);
392
+ if (!initialKey) {
393
+ const failure: BenchRunFailure = {
394
+ ok: false,
395
+ error: `No credentials for provider "${model.provider}". Run \`omp\` and use /login, or set the provider API key.`,
396
+ };
397
+ results.push(failure);
398
+ if (!json) writeStdout(`${formatRunLine(failure, index, runs)}\n`);
399
+ break; // remaining runs would fail identically
400
+ }
401
+ if (!json && interactive) {
402
+ writeStdout(chalk.dim(` … run ${index + 1}/${runs} streaming`));
403
+ }
404
+ const result = await runBenchRequest(
405
+ model,
406
+ {
407
+ apiKey: runtime.modelRegistry.resolver(model, sessionId),
408
+ sessionId,
409
+ prompt,
410
+ maxTokens,
411
+ reasoning: toReasoningEffort(thinking),
412
+ disableReasoning: shouldDisableReasoning(thinking) ? true : undefined,
413
+ },
414
+ streamFn,
415
+ now,
416
+ );
417
+ results.push(result);
418
+ if (!json) {
419
+ if (interactive) writeStdout("\r\x1b[2K");
420
+ writeStdout(`${formatRunLine(result, index, runs)}\n`);
421
+ }
422
+ }
423
+ reports.push(buildModelReport(selector, model, thinking, results));
424
+ }
425
+ const failures = reports.reduce((sum, report) => sum + report.results.filter(result => !result.ok).length, 0);
426
+ const summary: BenchSummary = { runs, maxTokens, models: reports, failures };
427
+ if (json) {
428
+ writeStdout(`${JSON.stringify(summary, null, 2)}\n`);
429
+ } else if (reports.length > 1 || runs > 1) {
430
+ writeStdout(`\n${formatBenchTable(summary)}`);
431
+ }
432
+ if (failures > 0) setExitCode(1);
433
+ return summary;
434
+ } finally {
435
+ runtime.close?.();
436
+ }
437
+ }
@@ -7,7 +7,14 @@
7
7
  * credentials produced no usage report are listed too, so the output
8
8
  * always covers the full credential pool.
9
9
  */
10
- import type { AuthStorage, UsageLimit, UsageReport, UsageUnit } from "@oh-my-pi/pi-ai";
10
+ import {
11
+ type AuthStorage,
12
+ resolveUsedFraction,
13
+ type UsageHistoryEntry,
14
+ type UsageLimit,
15
+ type UsageReport,
16
+ type UsageUnit,
17
+ } from "@oh-my-pi/pi-ai";
11
18
  import { formatDuration, formatNumber } from "@oh-my-pi/pi-utils";
12
19
  import chalk from "chalk";
13
20
  import { ModelRegistry } from "../config/model-registry";
@@ -19,6 +26,10 @@ export interface UsageCommandArgs {
19
26
  json?: boolean;
20
27
  provider?: string;
21
28
  redact?: boolean;
29
+ /** Show recorded usage-limit history instead of a live snapshot. */
30
+ history?: boolean;
31
+ /** History window in days (with `history`). */
32
+ days?: number;
22
33
  }
23
34
 
24
35
  /** Identity slice of a stored credential, for "every account" coverage. */
@@ -139,20 +150,9 @@ function collectIdentityStrings(reports: UsageReport[], accounts: UsageAccountId
139
150
 
140
151
  type LimitStatus = NonNullable<UsageLimit["status"]>;
141
152
 
142
- function resolveFraction(limit: UsageLimit): number | undefined {
143
- const amount = limit.amount;
144
- if (amount.usedFraction !== undefined) return amount.usedFraction;
145
- if (amount.used !== undefined && amount.limit !== undefined && amount.limit > 0) {
146
- return amount.used / amount.limit;
147
- }
148
- if (amount.unit === "percent" && amount.used !== undefined) return amount.used / 100;
149
- if (amount.remainingFraction !== undefined) return Math.max(0, 1 - amount.remainingFraction);
150
- return undefined;
151
- }
152
-
153
153
  function resolveStatus(limit: UsageLimit): LimitStatus {
154
154
  if (limit.status && limit.status !== "unknown") return limit.status;
155
- const fraction = resolveFraction(limit);
155
+ const fraction = resolveUsedFraction(limit);
156
156
  if (fraction === undefined) return "unknown";
157
157
  if (fraction >= 1) return "exhausted";
158
158
  if (fraction >= 0.8) return "warning";
@@ -208,7 +208,7 @@ function describeAmount(limit: UsageLimit): string {
208
208
  } else if (absoluteUnit && amount.remaining !== undefined) {
209
209
  parts.push(`${formatUnitValue(amount.remaining, amount.unit)}${UNIT_SUFFIX[amount.unit]} left`);
210
210
  }
211
- const fraction = resolveFraction(limit);
211
+ const fraction = resolveUsedFraction(limit);
212
212
  if (fraction !== undefined) {
213
213
  parts.push(`${(fraction * 100).toFixed(1)}% used`);
214
214
  } else if (amount.remainingFraction !== undefined) {
@@ -219,7 +219,7 @@ function describeAmount(limit: UsageLimit): string {
219
219
  }
220
220
 
221
221
  function renderBar(limit: UsageLimit): string {
222
- const fraction = resolveFraction(limit);
222
+ const fraction = resolveUsedFraction(limit);
223
223
  if (fraction === undefined) return chalk.dim("·".repeat(BAR_WIDTH));
224
224
  const clamped = Math.min(Math.max(fraction, 0), 1);
225
225
  const filled = Math.round(clamped * BAR_WIDTH);
@@ -325,6 +325,8 @@ function formatAccountHeader(
325
325
  let header = `${icon} ${chalk.bold(redaction?.get(label) ?? label)}`;
326
326
  const planType = report.metadata?.planType;
327
327
  if (typeof planType === "string" && planType) header += chalk.dim(` · plan: ${planType}`);
328
+ const savedResets = report.resetCredits?.availableCount ?? 0;
329
+ if (savedResets > 0) header += chalk.cyan(` · ✦ ${savedResets} saved reset${savedResets === 1 ? "" : "s"}`);
328
330
  if (report.fetchedAt && nowMs - report.fetchedAt > 90_000) {
329
331
  header += chalk.dim(` · fetched ${formatDuration(nowMs - report.fetchedAt)} ago`);
330
332
  }
@@ -375,7 +377,7 @@ export function computeProviderWindowStats(reports: UsageReport[]): ProviderWind
375
377
  for (const report of reports) {
376
378
  const accountMax = new Map<string, number>();
377
379
  for (const limit of report.limits) {
378
- const fraction = resolveFraction(limit);
380
+ const fraction = resolveUsedFraction(limit);
379
381
  if (fraction === undefined) continue;
380
382
  const durationMs = limit.window?.durationMs;
381
383
  const key =
@@ -482,6 +484,144 @@ export function formatUsageBreakdown(
482
484
  return lines.join("\n");
483
485
  }
484
486
 
487
+ const HISTORY_SPARK_WIDTH = 48;
488
+ const SPARK_LEVELS = ["▁", "▂", "▃", "▄", "▅", "▆", "▇", "█"] as const;
489
+
490
+ interface HistorySeries {
491
+ title: string;
492
+ /** Snapshots ascending by recordedAt (listUsageHistory order). */
493
+ entries: UsageHistoryEntry[];
494
+ }
495
+
496
+ interface HistoryAccount {
497
+ label: string;
498
+ series: Map<string, HistorySeries>;
499
+ }
500
+
501
+ /** Mirror of {@link limitTitle} for history rows (no scope/tier available). */
502
+ function historySeriesTitle(entry: UsageHistoryEntry): string {
503
+ const label = entry.label;
504
+ const windowLabel = entry.windowLabel;
505
+ if (!windowLabel) return label;
506
+ if (windowLabel.toLowerCase() === "quota window") return label;
507
+ if (label.toLowerCase().includes(windowLabel.toLowerCase())) return label;
508
+ return `${label} (${windowLabel})`;
509
+ }
510
+
511
+ function historyAccountLabel(entry: UsageHistoryEntry): string {
512
+ return entry.email ?? entry.accountId ?? entry.accountKey;
513
+ }
514
+
515
+ function historyStatus(fraction: number | undefined, status: UsageHistoryEntry["status"]): LimitStatus {
516
+ if (status && status !== "unknown") return status;
517
+ if (fraction === undefined) return "unknown";
518
+ if (fraction >= 1) return "exhausted";
519
+ if (fraction >= 0.8) return "warning";
520
+ return "ok";
521
+ }
522
+
523
+ /** Peak-per-bucket sparkline over [sinceMs, nowMs]; empty buckets render dim dots. */
524
+ function renderHistorySparkline(entries: UsageHistoryEntry[], sinceMs: number, nowMs: number): string {
525
+ const span = Math.max(1, nowMs - sinceMs);
526
+ const buckets: Array<number | undefined> = new Array(HISTORY_SPARK_WIDTH).fill(undefined);
527
+ for (const entry of entries) {
528
+ if (entry.usedFraction === undefined) continue;
529
+ const offset = Math.floor(((entry.recordedAt - sinceMs) / span) * HISTORY_SPARK_WIDTH);
530
+ const index = Math.min(HISTORY_SPARK_WIDTH - 1, Math.max(0, offset));
531
+ const prev = buckets[index];
532
+ buckets[index] = prev === undefined ? entry.usedFraction : Math.max(prev, entry.usedFraction);
533
+ }
534
+ return buckets
535
+ .map(fraction => {
536
+ if (fraction === undefined) return chalk.dim("·");
537
+ const clamped = Math.min(Math.max(fraction, 0), 1);
538
+ const level = SPARK_LEVELS[Math.min(SPARK_LEVELS.length - 1, Math.floor(clamped * SPARK_LEVELS.length))];
539
+ return STATUS_COLOR[historyStatus(clamped, undefined)](level);
540
+ })
541
+ .join("");
542
+ }
543
+
544
+ /** Identity strings a history rendering could surface — input for {@link buildRedactionMap}. */
545
+ function collectHistoryIdentityStrings(entries: UsageHistoryEntry[]): string[] {
546
+ const values: string[] = [];
547
+ for (const entry of entries) {
548
+ if (entry.email) values.push(entry.email);
549
+ if (entry.accountId) values.push(entry.accountId);
550
+ values.push(entry.accountKey);
551
+ }
552
+ return values;
553
+ }
554
+
555
+ /**
556
+ * Render recorded usage-limit history: per provider, per account, one
557
+ * peak-per-bucket sparkline per limit window plus latest/peak percentages.
558
+ */
559
+ export function formatUsageHistory(
560
+ entries: UsageHistoryEntry[],
561
+ sinceMs: number,
562
+ nowMs: number,
563
+ redaction?: Map<string, string>,
564
+ ): string {
565
+ const providers = new Map<string, Map<string, HistoryAccount>>();
566
+ for (const entry of entries) {
567
+ let accounts = providers.get(entry.provider);
568
+ if (!accounts) {
569
+ accounts = new Map();
570
+ providers.set(entry.provider, accounts);
571
+ }
572
+ let account = accounts.get(entry.accountKey);
573
+ if (!account) {
574
+ account = { label: historyAccountLabel(entry), series: new Map() };
575
+ accounts.set(entry.accountKey, account);
576
+ }
577
+ let series = account.series.get(entry.limitId);
578
+ if (!series) {
579
+ series = { title: historySeriesTitle(entry), entries: [] };
580
+ account.series.set(entry.limitId, series);
581
+ }
582
+ // Labels can change across snapshots (provider renames); latest wins.
583
+ series.title = historySeriesTitle(entry);
584
+ series.entries.push(entry);
585
+ }
586
+
587
+ const lines: string[] = [];
588
+ lines.push(
589
+ `${chalk.bold("Usage history")}${chalk.dim(` · last ${formatDuration(nowMs - sinceMs)} · peak per bucket`)}`,
590
+ );
591
+
592
+ for (const provider of [...providers.keys()].sort((a, b) => a.localeCompare(b))) {
593
+ const accounts = providers.get(provider) ?? new Map<string, HistoryAccount>();
594
+ lines.push("");
595
+ lines.push(
596
+ `${chalk.bold.cyan(formatProviderName(provider))} ${chalk.dim(`— ${accounts.size} ${accounts.size === 1 ? "account" : "accounts"}`)}`,
597
+ );
598
+ const sortedAccounts = [...accounts.values()].sort((a, b) => a.label.localeCompare(b.label));
599
+ for (const account of sortedAccounts) {
600
+ lines.push(` ${chalk.bold(redaction?.get(account.label) ?? account.label)}`);
601
+ const labelWidth = [...account.series.values()].reduce((max, series) => Math.max(max, series.title.length), 0);
602
+ const sortedSeries = [...account.series.values()].sort((a, b) => a.title.localeCompare(b.title));
603
+ for (const series of sortedSeries) {
604
+ const fractions = series.entries
605
+ .map(entry => entry.usedFraction)
606
+ .filter((fraction): fraction is number => fraction !== undefined);
607
+ const latestEntry = series.entries[series.entries.length - 1];
608
+ const latestFraction = fractions.length > 0 ? fractions[fractions.length - 1] : undefined;
609
+ const peakFraction = fractions.length > 0 ? Math.max(...fractions) : undefined;
610
+ const status = historyStatus(latestFraction, latestEntry?.status);
611
+ const details: string[] = [];
612
+ if (latestFraction !== undefined) details.push(`latest ${(latestFraction * 100).toFixed(1)}%`);
613
+ if (peakFraction !== undefined) details.push(`peak ${(peakFraction * 100).toFixed(1)}%`);
614
+ details.push(`${series.entries.length} snapshot${series.entries.length === 1 ? "" : "s"}`);
615
+ lines.push(
616
+ ` ${STATUS_COLOR[status]("●")} ${series.title.padEnd(labelWidth)} ${renderHistorySparkline(series.entries, sinceMs, nowMs)} ${chalk.dim(details.join(" · "))}`,
617
+ );
618
+ }
619
+ }
620
+ }
621
+
622
+ return lines.join("\n");
623
+ }
624
+
485
625
  function collectStoredAccounts(authStorage: AuthStorage): UsageAccountIdentity[] {
486
626
  const accounts: UsageAccountIdentity[] = [];
487
627
  const all = authStorage.getAll();
@@ -541,6 +681,37 @@ function redactReportForJson(
541
681
  export async function runUsageCommand(cmd: UsageCommandArgs): Promise<void> {
542
682
  const authStorage = await discoverAuthStorage();
543
683
  try {
684
+ if (cmd.history) {
685
+ const days = cmd.days !== undefined && Number.isFinite(cmd.days) && cmd.days > 0 ? cmd.days : 7;
686
+ const nowMs = Date.now();
687
+ const sinceMs = nowMs - days * 86_400_000;
688
+ const entries = authStorage.listUsageHistory({ sinceMs, provider: cmd.provider?.toLowerCase() });
689
+ const redaction = cmd.redact ? buildRedactionMap(collectHistoryIdentityStrings(entries)) : undefined;
690
+ if (cmd.json) {
691
+ const masked = redaction
692
+ ? entries.map(entry => ({
693
+ ...entry,
694
+ accountKey: redaction.get(entry.accountKey) ?? entry.accountKey,
695
+ email: maskIdentity(redaction, entry.email),
696
+ accountId: maskIdentity(redaction, entry.accountId),
697
+ }))
698
+ : entries;
699
+ process.stdout.write(`${JSON.stringify({ generatedAt: nowMs, sinceMs, entries: masked }, null, 2)}\n`);
700
+ return;
701
+ }
702
+ if (entries.length === 0) {
703
+ const scope = cmd.provider ? ` for provider "${cmd.provider}"` : "";
704
+ process.stderr.write(
705
+ chalk.yellow(
706
+ `No usage history recorded${scope} yet. Snapshots accumulate whenever usage is fetched (TUI footer, /usage, omp usage).\n`,
707
+ ),
708
+ );
709
+ process.exitCode = 1;
710
+ return;
711
+ }
712
+ process.stdout.write(`${formatUsageHistory(entries, sinceMs, nowMs, redaction)}\n`);
713
+ return;
714
+ }
544
715
  const modelRegistry = new ModelRegistry(authStorage);
545
716
  const reports =
546
717
  (await authStorage.fetchUsageReports({