@oh-my-pi/pi-coding-agent 15.11.6 → 15.11.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/CHANGELOG.md +57 -1
  2. package/dist/cli.js +431 -381
  3. package/dist/types/cli/args.d.ts +2 -0
  4. package/dist/types/cli/bench-cli.d.ts +78 -0
  5. package/dist/types/collab/crypto.d.ts +12 -0
  6. package/dist/types/collab/guest.d.ts +21 -0
  7. package/dist/types/collab/host.d.ts +13 -0
  8. package/dist/types/collab/protocol.d.ts +100 -0
  9. package/dist/types/collab/relay-client.d.ts +22 -0
  10. package/dist/types/commands/bench.d.ts +29 -0
  11. package/dist/types/commands/join.d.ts +12 -0
  12. package/dist/types/config/model-resolver.d.ts +3 -2
  13. package/dist/types/config/settings-schema.d.ts +93 -1
  14. package/dist/types/edit/renderer.d.ts +1 -0
  15. package/dist/types/extensibility/slash-commands.d.ts +1 -11
  16. package/dist/types/modes/components/agent-hub.d.ts +13 -0
  17. package/dist/types/modes/components/collab-prompt-message.d.ts +10 -0
  18. package/dist/types/modes/components/hook-selector.d.ts +4 -6
  19. package/dist/types/modes/components/oauth-selector.d.ts +10 -1
  20. package/dist/types/modes/components/segment-track.d.ts +11 -6
  21. package/dist/types/modes/components/settings-selector.d.ts +8 -1
  22. package/dist/types/modes/components/snapcompact-shape-preview.d.ts +31 -0
  23. package/dist/types/modes/components/status-line/component.d.ts +4 -1
  24. package/dist/types/modes/components/status-line/types.d.ts +9 -0
  25. package/dist/types/modes/components/tool-execution.d.ts +13 -9
  26. package/dist/types/modes/interactive-mode.d.ts +7 -0
  27. package/dist/types/modes/setup-wizard/scenes/sign-in.d.ts +3 -0
  28. package/dist/types/modes/setup-wizard/scenes/types.d.ts +10 -1
  29. package/dist/types/modes/setup-wizard/scenes/web-search.d.ts +3 -0
  30. package/dist/types/modes/types.d.ts +8 -0
  31. package/dist/types/session/agent-session.d.ts +11 -0
  32. package/dist/types/session/session-manager.d.ts +21 -0
  33. package/dist/types/session/snapcompact-inline.d.ts +8 -3
  34. package/dist/types/slash-commands/builtin-registry.d.ts +9 -0
  35. package/dist/types/tools/bash.d.ts +2 -0
  36. package/dist/types/tools/eval-render.d.ts +1 -0
  37. package/dist/types/tools/renderers.d.ts +13 -0
  38. package/dist/types/tools/ssh.d.ts +1 -0
  39. package/package.json +14 -12
  40. package/scripts/bench-guard.ts +71 -0
  41. package/src/cli/args.ts +2 -0
  42. package/src/cli/bench-cli.ts +437 -0
  43. package/src/cli-commands.ts +2 -0
  44. package/src/collab/crypto.ts +57 -0
  45. package/src/collab/guest.ts +421 -0
  46. package/src/collab/host.ts +494 -0
  47. package/src/collab/protocol.ts +191 -0
  48. package/src/collab/relay-client.ts +216 -0
  49. package/src/commands/bench.ts +42 -0
  50. package/src/commands/join.ts +39 -0
  51. package/src/config/model-registry.ts +74 -19
  52. package/src/config/model-resolver.ts +36 -5
  53. package/src/config/settings-schema.ts +119 -1
  54. package/src/edit/renderer.ts +5 -0
  55. package/src/extensibility/slash-commands.ts +1 -97
  56. package/src/hindsight/client.ts +26 -1
  57. package/src/hindsight/state.ts +6 -2
  58. package/src/internal-urls/docs-index.generated.ts +4 -3
  59. package/src/main.ts +11 -2
  60. package/src/mcp/transports/stdio.ts +81 -7
  61. package/src/modes/components/agent-hub.ts +119 -22
  62. package/src/modes/components/assistant-message.ts +126 -6
  63. package/src/modes/components/collab-prompt-message.ts +30 -0
  64. package/src/modes/components/hook-selector.ts +4 -5
  65. package/src/modes/components/oauth-selector.ts +67 -7
  66. package/src/modes/components/segment-track.ts +44 -7
  67. package/src/modes/components/settings-selector.ts +27 -0
  68. package/src/modes/components/snapcompact-shape-preview-doc.md +11 -0
  69. package/src/modes/components/snapcompact-shape-preview.ts +192 -0
  70. package/src/modes/components/status-line/component.ts +21 -1
  71. package/src/modes/components/status-line/presets.ts +1 -1
  72. package/src/modes/components/status-line/segments.ts +13 -0
  73. package/src/modes/components/status-line/types.ts +10 -0
  74. package/src/modes/components/tips.txt +2 -1
  75. package/src/modes/components/tool-execution.ts +18 -10
  76. package/src/modes/controllers/input-controller.ts +80 -12
  77. package/src/modes/controllers/selector-controller.ts +6 -2
  78. package/src/modes/controllers/streaming-reveal.ts +7 -0
  79. package/src/modes/interactive-mode.ts +36 -4
  80. package/src/modes/setup-wizard/index.ts +1 -0
  81. package/src/modes/setup-wizard/scenes/glyph.ts +24 -6
  82. package/src/modes/setup-wizard/scenes/providers.ts +36 -2
  83. package/src/modes/setup-wizard/scenes/sign-in.ts +10 -1
  84. package/src/modes/setup-wizard/scenes/theme.ts +28 -1
  85. package/src/modes/setup-wizard/scenes/types.ts +10 -1
  86. package/src/modes/setup-wizard/scenes/web-search.ts +22 -6
  87. package/src/modes/setup-wizard/wizard-overlay.ts +38 -1
  88. package/src/modes/types.ts +8 -0
  89. package/src/modes/utils/context-usage.ts +1 -1
  90. package/src/modes/utils/ui-helpers.ts +7 -0
  91. package/src/prompts/bench.md +7 -0
  92. package/src/sdk.ts +240 -36
  93. package/src/session/agent-session.ts +22 -0
  94. package/src/session/session-manager.ts +44 -0
  95. package/src/session/snapcompact-inline.ts +20 -22
  96. package/src/slash-commands/builtin-registry.ts +210 -0
  97. package/src/tools/bash.ts +3 -0
  98. package/src/tools/eval-render.ts +4 -0
  99. package/src/tools/read.ts +38 -5
  100. package/src/tools/renderers.ts +13 -0
  101. package/src/tools/ssh.ts +3 -0
  102. package/src/tools/write.ts +13 -42
@@ -0,0 +1,437 @@
1
+ import type { ResolvedThinkingLevel } from "@oh-my-pi/pi-agent-core";
2
+ import type {
3
+ Api,
4
+ ApiKeyResolver,
5
+ AssistantMessage,
6
+ AssistantMessageEvent,
7
+ AssistantMessageEventStream,
8
+ Context,
9
+ Effort,
10
+ Model,
11
+ SimpleStreamOptions,
12
+ } from "@oh-my-pi/pi-ai";
13
+ import { streamSimple } from "@oh-my-pi/pi-ai";
14
+ import type { CanonicalModelVariant } from "@oh-my-pi/pi-catalog/identity";
15
+ import { replaceTabs, truncateToWidth } from "@oh-my-pi/pi-tui";
16
+ import { formatDuration, getProjectDir } from "@oh-my-pi/pi-utils";
17
+ import chalk from "chalk";
18
+ import type { ApiKeyResolverModel } from "../config/api-key-resolver";
19
+ import { type CanonicalModelQueryOptions, ModelRegistry } from "../config/model-registry";
20
+ import { formatModelString, getModelMatchPreferences, resolveCliModel } from "../config/model-resolver";
21
+ import { Settings } from "../config/settings";
22
+ import benchPrompt from "../prompts/bench.md" with { type: "text" };
23
+ import { discoverAuthStorage } from "../sdk";
24
+ import { resolveThinkingLevelForModel, shouldDisableReasoning, toReasoningEffort } from "../thinking";
25
+
26
+ const DEFAULT_RUNS = 1;
27
+ const DEFAULT_MAX_TOKENS = 512;
28
+ const ERROR_WIDTH = 110;
29
+ const BENCH_PROMPT = benchPrompt.trim();
30
+
31
+ export interface BenchCommandArgs {
32
+ models: string[];
33
+ flags: {
34
+ runs?: number;
35
+ maxTokens?: number;
36
+ prompt?: string;
37
+ json?: boolean;
38
+ };
39
+ }
40
+
41
+ export interface BenchModelRegistry {
42
+ getAll(): Model<Api>[];
43
+ getApiKey(model: Model<Api>, sessionId?: string): Promise<string | undefined>;
44
+ resolver(model: ApiKeyResolverModel, sessionId?: string): ApiKeyResolver;
45
+ resolveCanonicalModel?(canonicalId: string, options?: CanonicalModelQueryOptions): Model<Api> | undefined;
46
+ getCanonicalVariants?(canonicalId: string, options?: CanonicalModelQueryOptions): CanonicalModelVariant[];
47
+ getCanonicalId?(model: Model<Api>): string | undefined;
48
+ }
49
+
50
+ export interface BenchRuntime {
51
+ modelRegistry: BenchModelRegistry;
52
+ settings?: Settings;
53
+ close?: () => void;
54
+ }
55
+
56
+ export interface BenchRunSuccess {
57
+ ok: true;
58
+ ttftMs: number;
59
+ durationMs: number;
60
+ outputTokens: number;
61
+ /** Generation throughput measured over the post-first-token window. */
62
+ tokensPerSecond: number;
63
+ }
64
+
65
+ export interface BenchRunFailure {
66
+ ok: false;
67
+ error: string;
68
+ }
69
+
70
+ export type BenchRunResult = BenchRunSuccess | BenchRunFailure;
71
+
72
+ export interface BenchAverages {
73
+ ttftMs: number;
74
+ durationMs: number;
75
+ outputTokens: number;
76
+ tokensPerSecond: number;
77
+ }
78
+
79
+ export interface BenchModelReport {
80
+ /** Selector as the user typed it (e.g. "opus" or "gemini-3.5:low"). */
81
+ selector: string;
82
+ /** Resolved `provider/id`. */
83
+ model: string;
84
+ /** Explicit thinking level from a `:level` selector suffix; undefined = provider default. */
85
+ thinking?: ResolvedThinkingLevel;
86
+ results: BenchRunResult[];
87
+ /** Averages over successful runs; null when every run failed. */
88
+ average: BenchAverages | null;
89
+ }
90
+
91
+ export interface BenchSummary {
92
+ runs: number;
93
+ maxTokens: number;
94
+ models: BenchModelReport[];
95
+ failures: number;
96
+ }
97
+
98
+ type BenchStreamSimple = (
99
+ model: Model<Api>,
100
+ context: Context,
101
+ options?: SimpleStreamOptions,
102
+ ) => AssistantMessageEventStream;
103
+
104
+ export interface BenchDependencies {
105
+ createRuntime?: () => Promise<BenchRuntime>;
106
+ randomSessionId?: () => string;
107
+ writeStdout?: (text: string) => void;
108
+ writeStderr?: (text: string) => void;
109
+ setExitCode?: (code: number) => void;
110
+ streamSimple?: BenchStreamSimple;
111
+ now?: () => number;
112
+ stdoutIsTTY?: boolean;
113
+ }
114
+
115
+ function getErrorMessage(error: unknown): string {
116
+ if (error instanceof Error && error.message) return error.message;
117
+ return String(error);
118
+ }
119
+
120
+ function normalizePositiveInteger(name: string, value: number | undefined, fallback: number): number {
121
+ if (value === undefined) return fallback;
122
+ if (!Number.isInteger(value) || value <= 0) {
123
+ throw new Error(`Expected --${name} to be a positive integer, got ${value}`);
124
+ }
125
+ return value;
126
+ }
127
+
128
+ function isFirstTokenEvent(event: AssistantMessageEvent): boolean {
129
+ switch (event.type) {
130
+ case "text_delta":
131
+ case "thinking_delta":
132
+ case "toolcall_delta":
133
+ return event.delta.length > 0;
134
+ case "text_end":
135
+ case "thinking_end":
136
+ return event.content.length > 0;
137
+ default:
138
+ return false;
139
+ }
140
+ }
141
+
142
+ /**
143
+ * Tokens/s over the generation window (duration minus TTFT) so queue/prefill
144
+ * latency does not dilute throughput. Falls back to total duration when the
145
+ * response arrived as a single chunk (TTFT ~ duration).
146
+ */
147
+ function computeTokensPerSecond(outputTokens: number, durationMs: number, ttftMs: number): number {
148
+ const decodeMs = durationMs - ttftMs;
149
+ const windowMs = decodeMs > 0 ? decodeMs : durationMs;
150
+ return windowMs > 0 ? (outputTokens * 1000) / windowMs : 0;
151
+ }
152
+
153
+ interface BenchRequestOptions {
154
+ apiKey: ApiKeyResolver;
155
+ sessionId: string;
156
+ prompt: string;
157
+ maxTokens: number;
158
+ /** Explicit effort from a `:level` selector suffix; absent = provider default. */
159
+ reasoning?: Effort;
160
+ /** Only set for an explicit `:off` suffix — some endpoints reject disablement. */
161
+ disableReasoning?: boolean;
162
+ }
163
+
164
+ async function runBenchRequest(
165
+ model: Model<Api>,
166
+ options: BenchRequestOptions,
167
+ streamFn: BenchStreamSimple,
168
+ now: () => number,
169
+ ): Promise<BenchRunResult> {
170
+ const startedAt = now();
171
+ let firstTokenAt: number | undefined;
172
+ try {
173
+ const context: Context = {
174
+ // Codex's Responses endpoint 400s with "Instructions are required" when no
175
+ // system prompt is present — same guard as eval's completion bridge.
176
+ systemPrompt: ["You are a helpful assistant."],
177
+ messages: [{ role: "user", content: options.prompt, timestamp: Date.now(), attribution: "user" }],
178
+ };
179
+ const stream = streamFn(model, context, {
180
+ apiKey: options.apiKey,
181
+ sessionId: options.sessionId,
182
+ maxTokens:
183
+ Number.isFinite(model.maxTokens) && model.maxTokens > 0
184
+ ? Math.min(options.maxTokens, model.maxTokens)
185
+ : options.maxTokens,
186
+ reasoning: options.reasoning,
187
+ disableReasoning: options.disableReasoning,
188
+ // pi-ai opts every OpenRouter request into response caching (1h TTL).
189
+ // Bench sends a byte-identical request each run, so within the TTL
190
+ // OpenRouter replays the cached generation with zeroed usage — the run
191
+ // shows "tokens 0, TPS 0.0" at line speed. Opt back out so every run
192
+ // measures a fresh generation.
193
+ headers: model.provider === "openrouter" ? { "X-OpenRouter-Cache": "false" } : undefined,
194
+ });
195
+ let message: AssistantMessage | undefined;
196
+ for await (const event of stream) {
197
+ if (firstTokenAt === undefined && isFirstTokenEvent(event)) {
198
+ firstTokenAt = now();
199
+ }
200
+ if (event.type === "error") {
201
+ return { ok: false, error: event.error.errorMessage ?? "request failed" };
202
+ }
203
+ if (event.type === "done") {
204
+ message = event.message;
205
+ }
206
+ }
207
+ message ??= await stream.result();
208
+ if (message.stopReason === "error" || message.errorMessage) {
209
+ return { ok: false, error: message.errorMessage ?? "request failed" };
210
+ }
211
+ const rawDuration = message.duration ?? now() - startedAt;
212
+ const durationMs = Number.isFinite(rawDuration) && rawDuration > 0 ? rawDuration : 0;
213
+ const rawTtft = message.ttft ?? (firstTokenAt === undefined ? durationMs : firstTokenAt - startedAt);
214
+ const ttftMs = Number.isFinite(rawTtft) && rawTtft > 0 ? rawTtft : 0;
215
+ const outputTokens = Number.isFinite(message.usage.output) && message.usage.output > 0 ? message.usage.output : 0;
216
+ return {
217
+ ok: true,
218
+ ttftMs,
219
+ durationMs,
220
+ outputTokens,
221
+ tokensPerSecond: computeTokensPerSecond(outputTokens, durationMs, ttftMs),
222
+ };
223
+ } catch (error) {
224
+ return { ok: false, error: getErrorMessage(error) };
225
+ }
226
+ }
227
+
228
+ function buildModelReport(
229
+ selector: string,
230
+ model: Model<Api>,
231
+ thinking: ResolvedThinkingLevel | undefined,
232
+ results: BenchRunResult[],
233
+ ): BenchModelReport {
234
+ const successes = results.filter((result): result is BenchRunSuccess => result.ok);
235
+ const average =
236
+ successes.length === 0
237
+ ? null
238
+ : {
239
+ ttftMs: successes.reduce((sum, r) => sum + r.ttftMs, 0) / successes.length,
240
+ durationMs: successes.reduce((sum, r) => sum + r.durationMs, 0) / successes.length,
241
+ outputTokens: successes.reduce((sum, r) => sum + r.outputTokens, 0) / successes.length,
242
+ tokensPerSecond: successes.reduce((sum, r) => sum + r.tokensPerSecond, 0) / successes.length,
243
+ };
244
+ return { selector, model: formatModelString(model), thinking, results, average };
245
+ }
246
+
247
+ function formatMs(ms: number): string {
248
+ return formatDuration(Math.max(0, Math.round(ms)));
249
+ }
250
+
251
+ function formatRunLine(result: BenchRunResult, index: number, total: number): string {
252
+ const prefix = chalk.dim(`run ${index + 1}/${total}`);
253
+ if (result.ok) {
254
+ return ` ${chalk.green("✓")} ${prefix} ${chalk.dim("TTFT")} ${formatMs(result.ttftMs)} ${chalk.dim("TPS")} ${result.tokensPerSecond.toFixed(1)}/s ${chalk.dim("tokens")} ${result.outputTokens} ${chalk.dim("total")} ${formatMs(result.durationMs)}`;
255
+ }
256
+ return ` ${chalk.red("✗")} ${prefix} ${chalk.red(truncateToWidth(replaceTabs(result.error).replace(/\r?\n/g, " "), ERROR_WIDTH))}`;
257
+ }
258
+
259
+ export function formatBenchTable(summary: BenchSummary): string {
260
+ const ranked = [...summary.models].sort((a, b) => {
261
+ if (a.average === null && b.average === null) return 0;
262
+ if (a.average === null) return 1;
263
+ if (b.average === null) return -1;
264
+ return b.average.tokensPerSecond - a.average.tokensPerSecond;
265
+ });
266
+ const rows = ranked.map(report => ({
267
+ model: report.model,
268
+ ttft: report.average ? formatMs(report.average.ttftMs) : "-",
269
+ tps: report.average ? `${report.average.tokensPerSecond.toFixed(1)}/s` : "-",
270
+ tokens: report.average ? String(Math.round(report.average.outputTokens)) : "-",
271
+ total: report.average ? formatMs(report.average.durationMs) : "-",
272
+ failed: report.results.filter(result => !result.ok).length,
273
+ }));
274
+ const headers = { model: "model", ttft: "TTFT", tps: "TPS", tokens: "tokens", total: "total" } as const;
275
+ const width = (key: keyof typeof headers): number =>
276
+ Math.max(headers[key].length, ...rows.map(row => row[key].length));
277
+ const lines = [
278
+ [
279
+ headers.model.padEnd(width("model")),
280
+ headers.ttft.padEnd(width("ttft")),
281
+ headers.tps.padEnd(width("tps")),
282
+ headers.tokens.padEnd(width("tokens")),
283
+ headers.total.padEnd(width("total")),
284
+ ]
285
+ .join(" ")
286
+ .trimEnd(),
287
+ ];
288
+ for (const row of rows) {
289
+ const failedSuffix = row.failed > 0 ? ` ${chalk.red(`(${row.failed} failed)`)}` : "";
290
+ lines.push(
291
+ [
292
+ row.model.padEnd(width("model")),
293
+ row.ttft.padEnd(width("ttft")),
294
+ row.tps.padEnd(width("tps")),
295
+ row.tokens.padEnd(width("tokens")),
296
+ row.total.padEnd(width("total")),
297
+ ]
298
+ .join(" ")
299
+ .trimEnd() + failedSuffix,
300
+ );
301
+ }
302
+ return `${lines.map((line, index) => (index === 0 ? chalk.dim(line) : line)).join("\n")}\n`;
303
+ }
304
+
305
+ async function createDefaultRuntime(): Promise<BenchRuntime> {
306
+ const authStorage = await discoverAuthStorage();
307
+ try {
308
+ const settings = await Settings.init({ cwd: getProjectDir() });
309
+ const modelRegistry = new ModelRegistry(authStorage);
310
+ return {
311
+ modelRegistry,
312
+ settings,
313
+ close: () => authStorage.close(),
314
+ };
315
+ } catch (error) {
316
+ authStorage.close();
317
+ throw error;
318
+ }
319
+ }
320
+
321
+ interface BenchTarget {
322
+ selector: string;
323
+ model: Model<Api>;
324
+ thinking: ResolvedThinkingLevel | undefined;
325
+ }
326
+
327
+ function resolveBenchModels(
328
+ selectors: string[],
329
+ modelRegistry: BenchModelRegistry,
330
+ settings: Settings | undefined,
331
+ writeStderr: (text: string) => void,
332
+ ): BenchTarget[] {
333
+ const preferences = getModelMatchPreferences(settings);
334
+ const resolved: BenchTarget[] = [];
335
+ const errors: string[] = [];
336
+ for (const selector of selectors) {
337
+ const result = resolveCliModel({ cliModel: selector, modelRegistry, preferences });
338
+ if (result.error) {
339
+ errors.push(`${selector}: ${result.error}`);
340
+ continue;
341
+ }
342
+ if (!result.model) {
343
+ errors.push(`${selector}: model not found`);
344
+ continue;
345
+ }
346
+ if (result.warning) writeStderr(`${chalk.yellow(`Warning: ${result.warning}`)}\n`);
347
+ resolved.push({
348
+ selector,
349
+ model: result.model,
350
+ thinking: resolveThinkingLevelForModel(result.model, result.thinkingLevel),
351
+ });
352
+ }
353
+ if (errors.length > 0) {
354
+ throw new Error(`Could not resolve ${errors.length === 1 ? "model" : "models"}:\n${errors.join("\n")}`);
355
+ }
356
+ return resolved;
357
+ }
358
+
359
+ export async function runBenchCommand(command: BenchCommandArgs, deps: BenchDependencies = {}): Promise<BenchSummary> {
360
+ const runs = normalizePositiveInteger("runs", command.flags.runs, DEFAULT_RUNS);
361
+ const maxTokens = normalizePositiveInteger("max-tokens", command.flags.maxTokens, DEFAULT_MAX_TOKENS);
362
+ const prompt = command.flags.prompt?.trim() || BENCH_PROMPT;
363
+ const json = command.flags.json === true;
364
+ const randomSessionId = deps.randomSessionId ?? (() => Bun.randomUUIDv7());
365
+ const writeStdout = deps.writeStdout ?? ((text: string) => process.stdout.write(text));
366
+ const writeStderr = deps.writeStderr ?? ((text: string) => process.stderr.write(text));
367
+ const setExitCode =
368
+ deps.setExitCode ??
369
+ ((code: number) => {
370
+ process.exitCode = code;
371
+ });
372
+ const streamFn = deps.streamSimple ?? streamSimple;
373
+ const now = deps.now ?? (() => performance.now());
374
+ const interactive = deps.stdoutIsTTY ?? process.stdout.isTTY === true;
375
+ if (command.models.length === 0) {
376
+ throw new Error("Pass at least one model selector, e.g. `omp bench opus gpt-5.2`");
377
+ }
378
+
379
+ const runtime = await (deps.createRuntime ?? createDefaultRuntime)();
380
+ try {
381
+ const targets = resolveBenchModels(command.models, runtime.modelRegistry, runtime.settings, writeStderr);
382
+ const reports: BenchModelReport[] = [];
383
+ for (const { selector, model, thinking } of targets) {
384
+ if (!json) {
385
+ const resolvedNote = selector === formatModelString(model) ? "" : chalk.dim(` (${selector})`);
386
+ writeStdout(`${chalk.bold(formatModelString(model))}${resolvedNote}\n`);
387
+ }
388
+ const results: BenchRunResult[] = [];
389
+ for (let index = 0; index < runs; index++) {
390
+ const sessionId = randomSessionId();
391
+ const initialKey = await runtime.modelRegistry.getApiKey(model, sessionId);
392
+ if (!initialKey) {
393
+ const failure: BenchRunFailure = {
394
+ ok: false,
395
+ error: `No credentials for provider "${model.provider}". Run \`omp\` and use /login, or set the provider API key.`,
396
+ };
397
+ results.push(failure);
398
+ if (!json) writeStdout(`${formatRunLine(failure, index, runs)}\n`);
399
+ break; // remaining runs would fail identically
400
+ }
401
+ if (!json && interactive) {
402
+ writeStdout(chalk.dim(` … run ${index + 1}/${runs} streaming`));
403
+ }
404
+ const result = await runBenchRequest(
405
+ model,
406
+ {
407
+ apiKey: runtime.modelRegistry.resolver(model, sessionId),
408
+ sessionId,
409
+ prompt,
410
+ maxTokens,
411
+ reasoning: toReasoningEffort(thinking),
412
+ disableReasoning: shouldDisableReasoning(thinking) ? true : undefined,
413
+ },
414
+ streamFn,
415
+ now,
416
+ );
417
+ results.push(result);
418
+ if (!json) {
419
+ if (interactive) writeStdout("\r\x1b[2K");
420
+ writeStdout(`${formatRunLine(result, index, runs)}\n`);
421
+ }
422
+ }
423
+ reports.push(buildModelReport(selector, model, thinking, results));
424
+ }
425
+ const failures = reports.reduce((sum, report) => sum + report.results.filter(result => !result.ok).length, 0);
426
+ const summary: BenchSummary = { runs, maxTokens, models: reports, failures };
427
+ if (json) {
428
+ writeStdout(`${JSON.stringify(summary, null, 2)}\n`);
429
+ } else if (reports.length > 1 || runs > 1) {
430
+ writeStdout(`\n${formatBenchTable(summary)}`);
431
+ }
432
+ if (failures > 0) setExitCode(1);
433
+ return summary;
434
+ } finally {
435
+ runtime.close?.();
436
+ }
437
+ }
@@ -16,6 +16,7 @@ export const commands: CommandEntry[] = [
16
16
  { name: "auth-broker", load: () => import("./commands/auth-broker").then(m => m.default) },
17
17
  { name: "auth-gateway", load: () => import("./commands/auth-gateway").then(m => m.default) },
18
18
  { name: "agents", load: () => import("./commands/agents").then(m => m.default) },
19
+ { name: "bench", load: () => import("./commands/bench").then(m => m.default) },
19
20
  { name: "commit", load: () => import("./commands/commit").then(m => m.default) },
20
21
  { name: "completions", load: () => import("./commands/completions").then(m => m.default) },
21
22
  { name: "__complete", load: () => import("./commands/complete").then(m => m.default) },
@@ -25,6 +26,7 @@ export const commands: CommandEntry[] = [
25
26
  { name: "gallery", load: () => import("./commands/gallery").then(m => m.default) },
26
27
  { name: "grievances", load: () => import("./commands/grievances").then(m => m.default) },
27
28
  { name: "install", load: () => import("./commands/install").then(m => m.default) },
29
+ { name: "join", load: () => import("./commands/join").then(m => m.default) },
28
30
  { name: "plugin", load: () => import("./commands/plugin").then(m => m.default) },
29
31
  { name: "setup", load: () => import("./commands/setup").then(m => m.default) },
30
32
  { name: "shell", load: () => import("./commands/shell").then(m => m.default) },
@@ -0,0 +1,57 @@
1
+ /**
2
+ * AES-256-GCM sealing for collab frames.
3
+ *
4
+ * The room key lives only in the link fragment; the relay sees opaque bytes.
5
+ * Sealed layout: `[12B IV][ciphertext+tag]`.
6
+ */
7
+ import type { CollabFrame } from "./protocol";
8
+
9
+ const AES_ALGORITHM = "AES-GCM";
10
+ const IV_LENGTH = 12;
11
+ const KEY_LENGTH = 32;
12
+ const TEXT_ENCODER = new TextEncoder();
13
+ const TEXT_DECODER = new TextDecoder();
14
+
15
+ export function generateRoomKey(): Uint8Array {
16
+ const key = new Uint8Array(KEY_LENGTH);
17
+ crypto.getRandomValues(key);
18
+ return key;
19
+ }
20
+
21
+ export function importRoomKey(raw: Uint8Array): Promise<CryptoKey> {
22
+ if (raw.byteLength !== KEY_LENGTH) {
23
+ throw new Error(`Room key must be ${KEY_LENGTH} bytes, got ${raw.byteLength}`);
24
+ }
25
+ return crypto.subtle.importKey("raw", asStrict(raw), AES_ALGORITHM, false, ["encrypt", "decrypt"]);
26
+ }
27
+
28
+ export async function seal(key: CryptoKey, frame: CollabFrame): Promise<Uint8Array> {
29
+ const iv = new Uint8Array(IV_LENGTH);
30
+ crypto.getRandomValues(iv);
31
+ const plaintext = TEXT_ENCODER.encode(JSON.stringify(frame));
32
+ const ciphertext = new Uint8Array(await crypto.subtle.encrypt({ name: AES_ALGORITHM, iv }, key, plaintext));
33
+ const out = new Uint8Array(IV_LENGTH + ciphertext.byteLength);
34
+ out.set(iv, 0);
35
+ out.set(ciphertext, IV_LENGTH);
36
+ return out;
37
+ }
38
+
39
+ /** Inverse of {@link seal}. Throws on auth failure or malformed input. */
40
+ export async function open(key: CryptoKey, data: Uint8Array): Promise<CollabFrame> {
41
+ if (data.byteLength <= IV_LENGTH) {
42
+ throw new Error("Sealed frame too short");
43
+ }
44
+ const iv = asStrict(data.subarray(0, IV_LENGTH));
45
+ const ciphertext = asStrict(data.subarray(IV_LENGTH));
46
+ const plaintext = new Uint8Array(await crypto.subtle.decrypt({ name: AES_ALGORITHM, iv }, key, ciphertext));
47
+ return JSON.parse(TEXT_DECODER.decode(plaintext)) as CollabFrame;
48
+ }
49
+
50
+ function asStrict(bytes: Uint8Array): Uint8Array<ArrayBuffer> {
51
+ if (bytes.buffer instanceof ArrayBuffer && bytes.byteOffset === 0 && bytes.byteLength === bytes.buffer.byteLength) {
52
+ return bytes as Uint8Array<ArrayBuffer>;
53
+ }
54
+ const copy = new Uint8Array(bytes.byteLength);
55
+ copy.set(bytes);
56
+ return copy;
57
+ }