@oh-my-pi/pi-coding-agent 15.9.1 → 15.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/CHANGELOG.md +68 -2
  2. package/dist/types/cli/classify-install-target.d.ts +5 -1
  3. package/dist/types/cli/dry-balance-cli.d.ts +104 -0
  4. package/dist/types/commands/dry-balance.d.ts +31 -0
  5. package/dist/types/config/model-registry.d.ts +2 -0
  6. package/dist/types/config/models-config-schema.d.ts +3 -0
  7. package/dist/types/config/settings-schema.d.ts +13 -4
  8. package/dist/types/config/settings.d.ts +11 -0
  9. package/dist/types/discovery/helpers.d.ts +1 -0
  10. package/dist/types/extensibility/plugins/legacy-pi-compat.d.ts +2 -3
  11. package/dist/types/hindsight/bank.d.ts +17 -9
  12. package/dist/types/hindsight/mental-models.d.ts +1 -1
  13. package/dist/types/hindsight/state.d.ts +9 -3
  14. package/dist/types/mcp/manager.d.ts +1 -1
  15. package/dist/types/modes/components/assistant-message.d.ts +11 -0
  16. package/dist/types/modes/components/custom-editor.d.ts +3 -1
  17. package/dist/types/modes/components/error-banner.d.ts +11 -0
  18. package/dist/types/modes/components/tool-execution.d.ts +15 -0
  19. package/dist/types/modes/components/transcript-container.d.ts +4 -2
  20. package/dist/types/modes/components/user-message.d.ts +1 -1
  21. package/dist/types/modes/image-references.d.ts +17 -0
  22. package/dist/types/modes/interactive-mode.d.ts +7 -0
  23. package/dist/types/modes/types.d.ts +7 -0
  24. package/dist/types/modes/utils/ui-helpers.d.ts +1 -0
  25. package/dist/types/session/agent-session.d.ts +9 -0
  26. package/dist/types/session/auth-storage.d.ts +2 -2
  27. package/dist/types/session/blob-store.d.ts +12 -11
  28. package/dist/types/session/session-manager.d.ts +5 -3
  29. package/dist/types/system-prompt.d.ts +2 -0
  30. package/dist/types/task/types.d.ts +2 -0
  31. package/dist/types/tiny/title-client.d.ts +16 -1
  32. package/dist/types/tool-discovery/mode.d.ts +8 -0
  33. package/dist/types/tools/archive-reader.d.ts +5 -1
  34. package/dist/types/tools/index.d.ts +16 -0
  35. package/dist/types/tools/path-utils.d.ts +11 -0
  36. package/dist/types/tui/hyperlink.d.ts +12 -0
  37. package/dist/types/web/search/render.d.ts +1 -2
  38. package/package.json +9 -9
  39. package/src/cli/classify-install-target.ts +31 -5
  40. package/src/cli/dry-balance-cli.ts +823 -0
  41. package/src/cli/plugin-cli.ts +45 -0
  42. package/src/cli/web-search-cli.ts +0 -1
  43. package/src/cli-commands.ts +1 -0
  44. package/src/commands/dry-balance.ts +43 -0
  45. package/src/config/model-registry.ts +60 -4
  46. package/src/config/models-config-schema.ts +2 -0
  47. package/src/config/settings-schema.ts +14 -4
  48. package/src/config/settings.ts +38 -0
  49. package/src/discovery/builtin-rules/ts-no-tiny-functions.md +1 -0
  50. package/src/discovery/github.ts +37 -1
  51. package/src/discovery/helpers.ts +3 -1
  52. package/src/eval/__tests__/agent-bridge.test.ts +72 -0
  53. package/src/eval/py/tool-bridge.ts +43 -5
  54. package/src/extensibility/custom-commands/bundled/ci-green/index.ts +31 -2
  55. package/src/extensibility/plugins/legacy-pi-compat.ts +245 -25
  56. package/src/hindsight/backend.ts +184 -35
  57. package/src/hindsight/bank.ts +32 -22
  58. package/src/hindsight/mental-models.ts +1 -1
  59. package/src/hindsight/state.ts +21 -7
  60. package/src/internal-urls/docs-index.generated.ts +6 -6
  61. package/src/internal-urls/omp-protocol.ts +8 -2
  62. package/src/main.ts +7 -1
  63. package/src/mcp/manager.ts +40 -21
  64. package/src/modes/components/assistant-message.ts +22 -0
  65. package/src/modes/components/custom-editor.ts +14 -2
  66. package/src/modes/components/error-banner.ts +33 -0
  67. package/src/modes/components/tool-execution.ts +44 -0
  68. package/src/modes/components/transcript-container.ts +102 -30
  69. package/src/modes/components/tree-selector.ts +29 -2
  70. package/src/modes/components/user-message.ts +9 -2
  71. package/src/modes/controllers/event-controller.ts +42 -3
  72. package/src/modes/controllers/input-controller.ts +41 -3
  73. package/src/modes/image-references.ts +111 -0
  74. package/src/modes/interactive-mode.ts +48 -13
  75. package/src/modes/setup-wizard/scenes/sign-in.ts +27 -7
  76. package/src/modes/types.ts +10 -1
  77. package/src/modes/utils/ui-helpers.ts +23 -2
  78. package/src/prompts/agents/explore.md +1 -0
  79. package/src/prompts/agents/librarian.md +1 -0
  80. package/src/prompts/ci-green-request.md +5 -3
  81. package/src/prompts/dry-balance-bench.md +8 -0
  82. package/src/prompts/system/project-prompt.md +1 -0
  83. package/src/sdk.ts +99 -18
  84. package/src/session/agent-session.ts +103 -19
  85. package/src/session/auth-storage.ts +4 -0
  86. package/src/session/blob-store.ts +96 -9
  87. package/src/session/session-manager.ts +19 -10
  88. package/src/system-prompt.ts +4 -0
  89. package/src/task/executor.ts +6 -2
  90. package/src/task/index.ts +8 -7
  91. package/src/task/types.ts +2 -0
  92. package/src/tiny/title-client.ts +7 -1
  93. package/src/tool-discovery/mode.ts +24 -0
  94. package/src/tools/archive-reader.ts +339 -31
  95. package/src/tools/bash.ts +3 -4
  96. package/src/tools/fetch.ts +29 -9
  97. package/src/tools/gh.ts +65 -11
  98. package/src/tools/index.ts +22 -8
  99. package/src/tools/job.ts +3 -3
  100. package/src/tools/memory-reflect.ts +2 -2
  101. package/src/tools/path-utils.ts +21 -0
  102. package/src/tools/read.ts +58 -12
  103. package/src/tools/search-tool-bm25.ts +4 -6
  104. package/src/tools/search.ts +78 -12
  105. package/src/tui/hyperlink.ts +42 -7
  106. package/src/utils/file-mentions.ts +7 -107
  107. package/src/utils/title-generator.ts +58 -37
  108. package/src/web/search/index.ts +2 -2
  109. package/src/web/search/render.ts +20 -52
@@ -0,0 +1,823 @@
1
+ import type {
2
+ Api,
3
+ AssistantMessage,
4
+ AssistantMessageEvent,
5
+ AssistantMessageEventStream,
6
+ Context,
7
+ Model,
8
+ OAuthAccess,
9
+ OAuthAccessResolution,
10
+ SimpleStreamOptions,
11
+ } from "@oh-my-pi/pi-ai";
12
+ import { streamSimple } from "@oh-my-pi/pi-ai";
13
+ import { replaceTabs, truncateToWidth } from "@oh-my-pi/pi-tui";
14
+ import { formatDuration, getProjectDir } from "@oh-my-pi/pi-utils";
15
+ import chalk from "chalk";
16
+ import type { CanonicalModelVariant } from "../config/model-equivalence";
17
+ import { type CanonicalModelQueryOptions, ModelRegistry } from "../config/model-registry";
18
+ import {
19
+ formatModelString,
20
+ type ModelMatchPreferences,
21
+ resolveAllowedModels,
22
+ resolveCliModel,
23
+ resolveModelRoleValue,
24
+ } from "../config/model-resolver";
25
+ import { Settings } from "../config/settings";
26
+ import dryBalanceBenchPrompt from "../prompts/dry-balance-bench.md" with { type: "text" };
27
+ import { discoverAuthStorage } from "../sdk";
28
+
29
+ const DEFAULT_SAMPLE_COUNT = 100;
30
+ const DEFAULT_CONCURRENCY = 32;
31
+ const BENCH_MAX_TOKENS = 512;
32
+ const BENCH_RENDER_INTERVAL_MS = 80;
33
+ const BENCH_ACCOUNT_WIDTH = 60;
34
+ const BENCH_ERROR_WIDTH = 110;
35
+ const BENCH_SPINNER_FRAMES = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"] as const;
36
+ const DRY_BALANCE_BENCH_PROMPT = dryBalanceBenchPrompt.trim();
37
+
38
+ export interface DryBalanceCommandArgs {
39
+ model?: string;
40
+ flags: {
41
+ model?: string;
42
+ count?: number;
43
+ concurrency?: number;
44
+ json?: boolean;
45
+ bench?: boolean;
46
+ };
47
+ }
48
+
49
+ export interface DryBalanceAuthOptions {
50
+ baseUrl?: string;
51
+ modelId?: string;
52
+ signal?: AbortSignal;
53
+ }
54
+
55
+ export interface DryBalanceAuthStorage {
56
+ getOAuthAccess(
57
+ provider: string,
58
+ sessionId?: string,
59
+ options?: DryBalanceAuthOptions,
60
+ ): Promise<OAuthAccess | undefined>;
61
+ getOAuthAccesses?(provider: string, options?: DryBalanceAuthOptions): Promise<OAuthAccessResolution[]>;
62
+ }
63
+
64
+ export interface DryBalanceModelRegistry {
65
+ authStorage: DryBalanceAuthStorage;
66
+ getAll(): Model<Api>[];
67
+ getAvailable(): Model<Api>[];
68
+ getApiKey(model: Model<Api>, sessionId?: string): Promise<string | undefined>;
69
+ getCanonicalVariants(canonicalId: string, options?: CanonicalModelQueryOptions): CanonicalModelVariant[];
70
+ resolveCanonicalModel?(canonicalId: string, options?: CanonicalModelQueryOptions): Model<Api> | undefined;
71
+ getCanonicalId?(model: Model<Api>): string | undefined;
72
+ }
73
+
74
+ export interface DryBalanceRuntime {
75
+ modelRegistry: DryBalanceModelRegistry;
76
+ settings?: Settings;
77
+ close?: () => void;
78
+ }
79
+
80
+ export interface DryBalanceAccountStat {
81
+ account: string;
82
+ count: number;
83
+ percent: number;
84
+ }
85
+
86
+ export interface DryBalanceFailureStat {
87
+ reason: string;
88
+ count: number;
89
+ percent: number;
90
+ }
91
+
92
+ export interface DryBalanceBenchSuccessResult {
93
+ ok: true;
94
+ account: string;
95
+ ttftMs: number;
96
+ durationMs: number;
97
+ outputTokens: number;
98
+ tokensPerSecond: number;
99
+ }
100
+
101
+ export interface DryBalanceBenchFailureResult {
102
+ ok: false;
103
+ account?: string;
104
+ error: string;
105
+ }
106
+
107
+ export type DryBalanceBenchResult = DryBalanceBenchSuccessResult | DryBalanceBenchFailureResult;
108
+
109
+ export interface DryBalanceBenchSummary {
110
+ total: number;
111
+ success: {
112
+ total: number;
113
+ averageTtftMs: number | null;
114
+ averageTokensPerSecond: number | null;
115
+ };
116
+ failure: {
117
+ total: number;
118
+ reasons: DryBalanceFailureStat[];
119
+ };
120
+ results: DryBalanceBenchResult[];
121
+ }
122
+
123
+ export interface DryBalanceSummary {
124
+ model: string;
125
+ provider: string;
126
+ samples: number;
127
+ concurrency: number;
128
+ success: {
129
+ total: number;
130
+ accounts: DryBalanceAccountStat[];
131
+ };
132
+ failure: {
133
+ total: number;
134
+ reasons: DryBalanceFailureStat[];
135
+ };
136
+ bench?: DryBalanceBenchSummary;
137
+ }
138
+
139
+ type DryBalanceStreamSimple = (
140
+ model: Model<Api>,
141
+ context: Context,
142
+ options?: SimpleStreamOptions,
143
+ ) => AssistantMessageEventStream;
144
+
145
+ export interface DryBalanceDependencies {
146
+ createRuntime?: () => Promise<DryBalanceRuntime>;
147
+ randomSessionId?: () => string;
148
+ writeStdout?: (text: string) => void;
149
+ writeStderr?: (text: string) => void;
150
+ setExitCode?: (code: number) => void;
151
+ streamSimple?: DryBalanceStreamSimple;
152
+ now?: () => number;
153
+ stdoutIsTTY?: boolean;
154
+ stderrIsTTY?: boolean;
155
+ }
156
+
157
+ type DryBalanceAttemptResult =
158
+ | {
159
+ ok: true;
160
+ account: string;
161
+ }
162
+ | {
163
+ ok: false;
164
+ reason: string;
165
+ };
166
+
167
+ type DryBalanceBenchProgressStatus =
168
+ | { state: "waiting" }
169
+ | { state: "running"; account: string }
170
+ | { state: "success"; result: DryBalanceBenchSuccessResult }
171
+ | { state: "failure"; result: DryBalanceBenchFailureResult };
172
+
173
+ interface DryBalanceBenchProgressSink {
174
+ markRunning(index: number, account: string): void;
175
+ complete(index: number, result: DryBalanceBenchResult): void;
176
+ close(): void;
177
+ }
178
+
179
+ type DryBalanceBenchTarget =
180
+ | {
181
+ ok: true;
182
+ account: string;
183
+ accessToken: string;
184
+ }
185
+ | {
186
+ ok: false;
187
+ account: string;
188
+ error: string;
189
+ };
190
+
191
+ function normalizePositiveInteger(name: string, value: number | undefined, fallback: number): number {
192
+ const resolved = value ?? fallback;
193
+ if (!Number.isInteger(resolved) || resolved <= 0) {
194
+ throw new Error(`--${name} must be a positive integer`);
195
+ }
196
+ return resolved;
197
+ }
198
+
199
+ function getErrorMessage(error: unknown): string {
200
+ if (error instanceof Error && error.message) return error.message;
201
+ const message = String(error);
202
+ return message ? message : "Unknown error";
203
+ }
204
+
205
+ function extractAccount(access: {
206
+ email?: string;
207
+ accountId?: string;
208
+ projectId?: string;
209
+ enterpriseUrl?: string;
210
+ }): string {
211
+ return access.email ?? access.accountId ?? access.projectId ?? access.enterpriseUrl ?? "(unknown oauth account)";
212
+ }
213
+
214
+ function getBenchTargetKey(access: {
215
+ credentialId?: number;
216
+ email?: string;
217
+ accountId?: string;
218
+ projectId?: string;
219
+ enterpriseUrl?: string;
220
+ accessToken?: string;
221
+ }): string {
222
+ return (
223
+ access.email ??
224
+ access.accountId ??
225
+ access.projectId ??
226
+ access.enterpriseUrl ??
227
+ (access.credentialId === undefined ? access.accessToken : `credential:${access.credentialId}`) ??
228
+ "(unknown oauth account)"
229
+ );
230
+ }
231
+
232
+ function sanitizeBenchText(text: string, width: number): string {
233
+ return truncateToWidth(replaceTabs(text).replace(/\r?\n/g, " "), width);
234
+ }
235
+
236
+ function formatBenchIndex(index: number, total: number): string {
237
+ return `#${String(index + 1).padStart(String(total).length, "0")}`;
238
+ }
239
+
240
+ function formatBenchAccount(account: string | undefined): string {
241
+ return account ? sanitizeBenchText(account, BENCH_ACCOUNT_WIDTH) : chalk.dim("(no account)");
242
+ }
243
+
244
+ function formatBenchDuration(ms: number): string {
245
+ return formatDuration(Math.max(0, Math.round(ms)));
246
+ }
247
+
248
+ function formatBenchTps(tokensPerSecond: number): string {
249
+ return `${tokensPerSecond.toFixed(1)}/s`;
250
+ }
251
+
252
+ function isBenchSuccess(result: DryBalanceBenchResult): result is DryBalanceBenchSuccessResult {
253
+ return result.ok;
254
+ }
255
+
256
+ function isBenchFirstTokenEvent(event: AssistantMessageEvent): boolean {
257
+ switch (event.type) {
258
+ case "text_delta":
259
+ case "thinking_delta":
260
+ case "toolcall_delta":
261
+ return event.delta.length > 0;
262
+ case "text_end":
263
+ case "thinking_end":
264
+ return event.content.length > 0;
265
+ default:
266
+ return false;
267
+ }
268
+ }
269
+
270
+ function resolveBenchMaxTokens(model: Model<Api>): number {
271
+ return Number.isFinite(model.maxTokens) && model.maxTokens > 0
272
+ ? Math.min(BENCH_MAX_TOKENS, model.maxTokens)
273
+ : BENCH_MAX_TOKENS;
274
+ }
275
+
276
+ function normalizeBenchMs(value: number): number {
277
+ return Number.isFinite(value) && value > 0 ? value : 0;
278
+ }
279
+
280
+ function renderBenchResultLine(index: number, total: number, result: DryBalanceBenchResult): string {
281
+ const prefix = formatBenchIndex(index, total);
282
+ if (result.ok) {
283
+ return `${chalk.green("✓")} ${prefix} ${formatBenchAccount(result.account)} ${chalk.dim("TTFT")} ${formatBenchDuration(
284
+ result.ttftMs,
285
+ )} ${chalk.dim("TPS")} ${formatBenchTps(result.tokensPerSecond)}`;
286
+ }
287
+ return `${chalk.red("✗")} ${prefix} ${formatBenchAccount(result.account)} ${chalk.red(
288
+ sanitizeBenchText(result.error, BENCH_ERROR_WIDTH),
289
+ )}`;
290
+ }
291
+
292
+ function renderBenchStatusLine(
293
+ status: DryBalanceBenchProgressStatus,
294
+ index: number,
295
+ total: number,
296
+ frame: number,
297
+ ): string {
298
+ const prefix = formatBenchIndex(index, total);
299
+ switch (status.state) {
300
+ case "waiting":
301
+ return `${chalk.dim("○")} ${prefix} ${chalk.dim("waiting")}`;
302
+ case "running": {
303
+ const spinner = BENCH_SPINNER_FRAMES[frame % BENCH_SPINNER_FRAMES.length] ?? "*";
304
+ return `${chalk.yellow(spinner)} ${prefix} ${formatBenchAccount(status.account)} ${chalk.dim("sending request")}`;
305
+ }
306
+ case "success":
307
+ return renderBenchResultLine(index, total, status.result);
308
+ case "failure":
309
+ return renderBenchResultLine(index, total, status.result);
310
+ }
311
+ }
312
+
313
+ function createBenchProgressSink(
314
+ total: number,
315
+ write: (text: string) => void,
316
+ interactive: boolean,
317
+ ): DryBalanceBenchProgressSink {
318
+ const statuses: DryBalanceBenchProgressStatus[] = Array.from({ length: total }, () => ({ state: "waiting" }));
319
+ if (!interactive) {
320
+ return {
321
+ markRunning(index, account) {
322
+ statuses[index] = { state: "running", account };
323
+ write(`${renderBenchStatusLine(statuses[index], index, total, 0)}\n`);
324
+ },
325
+ complete(index, result) {
326
+ statuses[index] = result.ok ? { state: "success", result } : { state: "failure", result };
327
+ write(`${renderBenchResultLine(index, total, result)}\n`);
328
+ },
329
+ close() {},
330
+ };
331
+ }
332
+
333
+ let frame = 0;
334
+ let lineCount = 0;
335
+ let timer: NodeJS.Timeout | undefined;
336
+ const render = (): void => {
337
+ const lines = [
338
+ chalk.bold("bench requests"),
339
+ ...statuses.map((status, index) => renderBenchStatusLine(status, index, total, frame)),
340
+ ];
341
+ if (lineCount > 0) write(`\x1b[${lineCount}A`);
342
+ write(`${lines.map(line => `\x1b[2K${line}`).join("\n")}\n`);
343
+ lineCount = lines.length;
344
+ };
345
+ render();
346
+ timer = setInterval(() => {
347
+ frame += 1;
348
+ render();
349
+ }, BENCH_RENDER_INTERVAL_MS);
350
+ timer.unref?.();
351
+ return {
352
+ markRunning(index, account) {
353
+ statuses[index] = { state: "running", account };
354
+ render();
355
+ },
356
+ complete(index, result) {
357
+ statuses[index] = result.ok ? { state: "success", result } : { state: "failure", result };
358
+ render();
359
+ },
360
+ close() {
361
+ if (timer) {
362
+ clearInterval(timer);
363
+ timer = undefined;
364
+ }
365
+ render();
366
+ },
367
+ };
368
+ }
369
+
370
+ async function runBenchRequest(
371
+ model: Model<Api>,
372
+ sessionId: string,
373
+ account: string,
374
+ accessToken: string,
375
+ streamFn: DryBalanceStreamSimple,
376
+ now: () => number,
377
+ ): Promise<DryBalanceBenchResult> {
378
+ const startedAt = now();
379
+ let firstTokenAt: number | undefined;
380
+ try {
381
+ const context: Context = {
382
+ messages: [
383
+ {
384
+ role: "user",
385
+ content: DRY_BALANCE_BENCH_PROMPT,
386
+ timestamp: Date.now(),
387
+ attribution: "user",
388
+ },
389
+ ],
390
+ };
391
+ const stream = streamFn(model, context, {
392
+ apiKey: accessToken,
393
+ sessionId,
394
+ maxTokens: resolveBenchMaxTokens(model),
395
+ temperature: 0.2,
396
+ disableReasoning: true,
397
+ hideThinkingSummary: true,
398
+ });
399
+ let message: AssistantMessage | undefined;
400
+ for await (const event of stream) {
401
+ if (firstTokenAt === undefined && isBenchFirstTokenEvent(event)) {
402
+ firstTokenAt = now();
403
+ }
404
+ if (event.type === "error") {
405
+ return { ok: false, account, error: event.error.errorMessage ?? "request failed" };
406
+ }
407
+ if (event.type === "done") {
408
+ message = event.message;
409
+ }
410
+ }
411
+ message ??= await stream.result();
412
+ if (message.stopReason === "error" || message.errorMessage) {
413
+ return { ok: false, account, error: message.errorMessage ?? "request failed" };
414
+ }
415
+ const durationMs = normalizeBenchMs(message.duration ?? now() - startedAt);
416
+ const ttftMs = normalizeBenchMs(
417
+ message.ttft ?? (firstTokenAt === undefined ? durationMs : firstTokenAt - startedAt),
418
+ );
419
+ const outputTokens = Number.isFinite(message.usage.output) && message.usage.output > 0 ? message.usage.output : 0;
420
+ const tokensPerSecond = durationMs > 0 ? (outputTokens * 1000) / durationMs : 0;
421
+ return {
422
+ ok: true,
423
+ account,
424
+ ttftMs,
425
+ durationMs,
426
+ outputTokens,
427
+ tokensPerSecond,
428
+ };
429
+ } catch (error) {
430
+ return { ok: false, account, error: getErrorMessage(error) };
431
+ }
432
+ }
433
+
434
+ async function resolveBenchTargets(
435
+ model: Model<Api>,
436
+ authStorage: DryBalanceAuthStorage,
437
+ ): Promise<DryBalanceBenchTarget[]> {
438
+ const resolved = authStorage.getOAuthAccesses
439
+ ? await authStorage.getOAuthAccesses(model.provider, {
440
+ baseUrl: model.baseUrl,
441
+ modelId: model.id,
442
+ })
443
+ : await authStorage
444
+ .getOAuthAccess(model.provider, undefined, {
445
+ baseUrl: model.baseUrl,
446
+ modelId: model.id,
447
+ })
448
+ .then(access => (access ? [{ ok: true as const, ...access }] : []));
449
+ const targets: DryBalanceBenchTarget[] = [];
450
+ const seen = new Set<string>();
451
+ for (const entry of resolved) {
452
+ const key = getBenchTargetKey(entry);
453
+ if (seen.has(key)) continue;
454
+ seen.add(key);
455
+ const account = extractAccount(entry);
456
+ if (entry.ok) {
457
+ targets.push({ ok: true, account, accessToken: entry.accessToken });
458
+ } else {
459
+ targets.push({ ok: false, account, error: entry.error });
460
+ }
461
+ }
462
+ return targets;
463
+ }
464
+
465
+ async function runBenchTargets(
466
+ model: Model<Api>,
467
+ targets: DryBalanceBenchTarget[],
468
+ randomSessionId: () => string,
469
+ progress: DryBalanceBenchProgressSink | undefined,
470
+ streamFn: DryBalanceStreamSimple,
471
+ now: () => number,
472
+ ): Promise<DryBalanceBenchResult[]> {
473
+ return Promise.all(
474
+ targets.map(async (target, index) => {
475
+ if (!target.ok) {
476
+ const result: DryBalanceBenchFailureResult = {
477
+ ok: false,
478
+ account: target.account,
479
+ error: target.error,
480
+ };
481
+ progress?.complete(index, result);
482
+ return result;
483
+ }
484
+ progress?.markRunning(index, target.account);
485
+ const result = await runBenchRequest(
486
+ model,
487
+ randomSessionId(),
488
+ target.account,
489
+ target.accessToken,
490
+ streamFn,
491
+ now,
492
+ );
493
+ progress?.complete(index, result);
494
+ return result;
495
+ }),
496
+ );
497
+ }
498
+
499
+ async function createDefaultRuntime(): Promise<DryBalanceRuntime> {
500
+ const authStorage = await discoverAuthStorage();
501
+ try {
502
+ const settings = await Settings.init({ cwd: getProjectDir() });
503
+ const modelRegistry = new ModelRegistry(authStorage);
504
+ return {
505
+ modelRegistry,
506
+ settings,
507
+ close: () => authStorage.close(),
508
+ };
509
+ } catch (error) {
510
+ authStorage.close();
511
+ throw error;
512
+ }
513
+ }
514
+
515
+ async function resolveDryBalanceModel(
516
+ modelSelector: string | undefined,
517
+ modelRegistry: DryBalanceModelRegistry,
518
+ settings: Settings | undefined,
519
+ randomSessionId: () => string,
520
+ ): Promise<{ model: Model<Api>; warning?: string }> {
521
+ const preferences: ModelMatchPreferences = {
522
+ usageOrder: settings?.getStorage()?.getModelUsageOrder(),
523
+ };
524
+ if (modelSelector) {
525
+ const resolved = resolveCliModel({
526
+ cliModel: modelSelector,
527
+ modelRegistry,
528
+ preferences,
529
+ });
530
+ if (resolved.error) throw new Error(resolved.error);
531
+ if (!resolved.model) throw new Error(`Model "${modelSelector}" not found`);
532
+ return { model: resolved.model, warning: resolved.warning };
533
+ }
534
+
535
+ const allowedModels = await resolveAllowedModels(modelRegistry, settings, preferences);
536
+ if (allowedModels.length === 0) {
537
+ throw new Error(
538
+ "No models available. Use --model to select a model or configure enabledModels/default model settings.",
539
+ );
540
+ }
541
+
542
+ const defaultRoleSpec = resolveModelRoleValue(settings?.getModelRole("default"), allowedModels, {
543
+ settings,
544
+ matchPreferences: preferences,
545
+ modelRegistry,
546
+ });
547
+ if (defaultRoleSpec.model) {
548
+ return { model: defaultRoleSpec.model, warning: defaultRoleSpec.warning };
549
+ }
550
+
551
+ for (const candidate of allowedModels) {
552
+ const apiKey = await modelRegistry.getApiKey(candidate, randomSessionId());
553
+ if (apiKey) return { model: candidate };
554
+ }
555
+
556
+ return {
557
+ model: allowedModels[0],
558
+ warning:
559
+ "No allowed model had usable credentials during default resolution; dry-balance will report OAuth failures for the first allowed model.",
560
+ };
561
+ }
562
+
563
+ async function runOneAttempt(
564
+ model: Model<Api>,
565
+ modelRegistry: DryBalanceModelRegistry,
566
+ sessionId: string,
567
+ ): Promise<DryBalanceAttemptResult> {
568
+ try {
569
+ // AuthStorage.getOAuthAccess shares the OAuth credential ranking, refresh,
570
+ // usage-limit, broker, and session-sticky path used by getApiKey(), while
571
+ // returning the selected account metadata instead of bearer bytes.
572
+ const access = await modelRegistry.authStorage.getOAuthAccess(model.provider, sessionId, {
573
+ baseUrl: model.baseUrl,
574
+ modelId: model.id,
575
+ });
576
+ if (!access) return { ok: false, reason: "no OAuth access resolved" };
577
+ return { ok: true, account: extractAccount(access) };
578
+ } catch (error) {
579
+ return { ok: false, reason: getErrorMessage(error) };
580
+ }
581
+ }
582
+
583
+ async function mapConcurrent<T, R>(
584
+ items: T[],
585
+ concurrency: number,
586
+ fn: (item: T, index: number) => Promise<R>,
587
+ ): Promise<R[]> {
588
+ const results = new Array<R>(items.length);
589
+ let nextIndex = 0;
590
+ const workerCount = Math.min(concurrency, items.length);
591
+ await Promise.all(
592
+ Array.from({ length: workerCount }, async () => {
593
+ while (true) {
594
+ const index = nextIndex;
595
+ nextIndex += 1;
596
+ if (index >= items.length) return;
597
+ results[index] = await fn(items[index], index);
598
+ }
599
+ }),
600
+ );
601
+ return results;
602
+ }
603
+
604
+ function sortedStats(
605
+ map: Map<string, number>,
606
+ samples: number,
607
+ ): Array<{ label: string; count: number; percent: number }> {
608
+ return [...map.entries()]
609
+ .map(([label, count]) => ({ label, count, percent: (count / samples) * 100 }))
610
+ .sort((left, right) => right.count - left.count || left.label.localeCompare(right.label));
611
+ }
612
+
613
+ function summarizeBenchResults(results: DryBalanceBenchResult[]): DryBalanceBenchSummary | undefined {
614
+ if (results.length === 0) return undefined;
615
+ const successes = results.filter(isBenchSuccess);
616
+ const failureReasons = new Map<string, number>();
617
+ for (const result of results) {
618
+ if (!result.ok) {
619
+ failureReasons.set(result.error, (failureReasons.get(result.error) ?? 0) + 1);
620
+ }
621
+ }
622
+ const average = (values: number[]): number | null =>
623
+ values.length === 0 ? null : values.reduce((sum, value) => sum + value, 0) / values.length;
624
+ return {
625
+ total: results.length,
626
+ success: {
627
+ total: successes.length,
628
+ averageTtftMs: average(successes.map(result => result.ttftMs)),
629
+ averageTokensPerSecond: average(successes.map(result => result.tokensPerSecond)),
630
+ },
631
+ failure: {
632
+ total: results.length - successes.length,
633
+ reasons: sortedStats(failureReasons, results.length).map(stat => ({
634
+ reason: stat.label,
635
+ count: stat.count,
636
+ percent: stat.percent,
637
+ })),
638
+ },
639
+ results,
640
+ };
641
+ }
642
+
643
+ function summarizeResults(
644
+ model: Model<Api>,
645
+ samples: number,
646
+ concurrency: number,
647
+ results: DryBalanceAttemptResult[],
648
+ ): DryBalanceSummary {
649
+ const accounts = new Map<string, number>();
650
+ const reasons = new Map<string, number>();
651
+ for (const result of results) {
652
+ if (result.ok) {
653
+ accounts.set(result.account, (accounts.get(result.account) ?? 0) + 1);
654
+ } else {
655
+ reasons.set(result.reason, (reasons.get(result.reason) ?? 0) + 1);
656
+ }
657
+ }
658
+ const accountStats: DryBalanceAccountStat[] = sortedStats(accounts, samples).map(stat => ({
659
+ account: stat.label,
660
+ count: stat.count,
661
+ percent: stat.percent,
662
+ }));
663
+ const failureStats: DryBalanceFailureStat[] = sortedStats(reasons, samples).map(stat => ({
664
+ reason: stat.label,
665
+ count: stat.count,
666
+ percent: stat.percent,
667
+ }));
668
+ const summary: DryBalanceSummary = {
669
+ model: formatModelString(model),
670
+ provider: model.provider,
671
+ samples,
672
+ concurrency,
673
+ success: {
674
+ total: results.filter(result => result.ok).length,
675
+ accounts: accountStats,
676
+ },
677
+ failure: {
678
+ total: results.filter(result => !result.ok).length,
679
+ reasons: failureStats,
680
+ },
681
+ };
682
+ return summary;
683
+ }
684
+
685
+ function formatRows(rows: Array<{ count: number; percent: number; label: string }>): string[] {
686
+ if (rows.length === 0) return [` ${chalk.dim("(none)")}`];
687
+ const maxCountWidth = Math.max(...rows.map(row => row.count.toString().length));
688
+ return rows.map(row => {
689
+ const count = row.count.toString().padStart(maxCountWidth);
690
+ const percent = `${row.percent.toFixed(1)}%`.padStart(6);
691
+ return ` ${count} ${percent} ${row.label}`;
692
+ });
693
+ }
694
+
695
+ export function formatDryBalanceText(summary: DryBalanceSummary): string {
696
+ const accountRows = summary.success.accounts.map(row => ({
697
+ count: row.count,
698
+ percent: row.percent,
699
+ label: row.account,
700
+ }));
701
+ const failureRows = summary.failure.reasons.map(row => ({
702
+ count: row.count,
703
+ percent: row.percent,
704
+ label: row.reason,
705
+ }));
706
+ const lines = [
707
+ chalk.bold("dry-balance"),
708
+ `model: ${summary.model}`,
709
+ `provider: ${summary.provider}`,
710
+ `samples: ${summary.samples}`,
711
+ `concurrency: ${summary.concurrency}`,
712
+ "",
713
+ `${chalk.green("success")} ${summary.success.total}`,
714
+ ...formatRows(accountRows),
715
+ "",
716
+ `${summary.failure.total > 0 ? chalk.red("failure") : chalk.dim("failure")} ${summary.failure.total}`,
717
+ ...formatRows(failureRows),
718
+ ];
719
+ if (summary.bench) {
720
+ const avgTtft =
721
+ summary.bench.success.averageTtftMs === null ? "-" : formatBenchDuration(summary.bench.success.averageTtftMs);
722
+ const avgTps =
723
+ summary.bench.success.averageTokensPerSecond === null
724
+ ? "-"
725
+ : formatBenchTps(summary.bench.success.averageTokensPerSecond);
726
+ const benchFailureRows = summary.bench.failure.reasons.map(row => ({
727
+ count: row.count,
728
+ percent: row.percent,
729
+ label: row.reason,
730
+ }));
731
+ lines.push(
732
+ "",
733
+ chalk.bold("bench"),
734
+ `requests: ${summary.bench.total}`,
735
+ `${chalk.green("success")} ${summary.bench.success.total}`,
736
+ `avg TTFT: ${avgTtft}`,
737
+ `avg TPS: ${avgTps}`,
738
+ "",
739
+ `${summary.bench.failure.total > 0 ? chalk.red("failure") : chalk.dim("failure")} ${summary.bench.failure.total}`,
740
+ ...formatRows(benchFailureRows),
741
+ );
742
+ }
743
+ return `${lines.join("\n")}\n`;
744
+ }
745
+
746
+ export async function runDryBalanceCommand(
747
+ command: DryBalanceCommandArgs,
748
+ deps: DryBalanceDependencies = {},
749
+ ): Promise<DryBalanceSummary> {
750
+ const isBench = command.flags.bench === true;
751
+ const samples = isBench ? 0 : normalizePositiveInteger("count", command.flags.count, DEFAULT_SAMPLE_COUNT);
752
+ const concurrency = isBench
753
+ ? 0
754
+ : Math.min(samples, normalizePositiveInteger("concurrency", command.flags.concurrency, DEFAULT_CONCURRENCY));
755
+ const randomSessionId = deps.randomSessionId ?? (() => Bun.randomUUIDv7());
756
+ const writeStdout = deps.writeStdout ?? ((text: string) => process.stdout.write(text));
757
+ const writeStderr = deps.writeStderr ?? ((text: string) => process.stderr.write(text));
758
+ const setExitCode =
759
+ deps.setExitCode ??
760
+ ((code: number) => {
761
+ process.exitCode = code;
762
+ });
763
+ const streamFn = deps.streamSimple ?? streamSimple;
764
+ const now = deps.now ?? (() => performance.now());
765
+ const runtime = await (deps.createRuntime ?? createDefaultRuntime)();
766
+ let progress: DryBalanceBenchProgressSink | undefined;
767
+ let progressClosed = false;
768
+ const closeProgress = (): void => {
769
+ if (progressClosed) return;
770
+ progressClosed = true;
771
+ progress?.close();
772
+ };
773
+ try {
774
+ const modelSelector = command.flags.model ?? command.model;
775
+ const { model, warning } = await resolveDryBalanceModel(
776
+ modelSelector,
777
+ runtime.modelRegistry,
778
+ runtime.settings,
779
+ randomSessionId,
780
+ );
781
+ if (warning) writeStderr(`${chalk.yellow(`Warning: ${warning}`)}\n`);
782
+ let results: DryBalanceAttemptResult[];
783
+ let benchResults: DryBalanceBenchResult[] | undefined;
784
+ let summarySamples = samples;
785
+ let summaryConcurrency = concurrency;
786
+ if (isBench) {
787
+ const targets = await resolveBenchTargets(model, runtime.modelRegistry.authStorage);
788
+ if (targets.length === 0) throw new Error(`No OAuth accounts resolved for provider ${model.provider}`);
789
+ summarySamples = targets.length;
790
+ summaryConcurrency = targets.length;
791
+ const progressWrite = command.flags.json ? writeStderr : writeStdout;
792
+ const progressInteractive = command.flags.json
793
+ ? (deps.stderrIsTTY ?? process.stderr.isTTY === true)
794
+ : (deps.stdoutIsTTY ?? process.stdout.isTTY === true);
795
+ progress = createBenchProgressSink(targets.length, progressWrite, progressInteractive);
796
+ benchResults = await runBenchTargets(model, targets, randomSessionId, progress, streamFn, now);
797
+ results = targets.map(target =>
798
+ target.ok ? { ok: true, account: target.account } : { ok: false, reason: target.error },
799
+ );
800
+ } else {
801
+ const sessionIds = Array.from({ length: samples }, () => randomSessionId());
802
+ results = await mapConcurrent(sessionIds, concurrency, sessionId =>
803
+ runOneAttempt(model, runtime.modelRegistry, sessionId),
804
+ );
805
+ }
806
+ closeProgress();
807
+ const summary = summarizeResults(model, summarySamples, summaryConcurrency, results);
808
+ if (benchResults) {
809
+ const benchSummary = summarizeBenchResults(benchResults);
810
+ if (benchSummary) summary.bench = benchSummary;
811
+ }
812
+ if (command.flags.json) {
813
+ writeStdout(`${JSON.stringify(summary, null, 2)}\n`);
814
+ } else {
815
+ writeStdout(formatDryBalanceText(summary));
816
+ }
817
+ if (summary.failure.total > 0 || (summary.bench?.failure.total ?? 0) > 0) setExitCode(1);
818
+ return summary;
819
+ } finally {
820
+ closeProgress();
821
+ runtime.close?.();
822
+ }
823
+ }