@phnx-labs/agents-cli 1.20.17 → 1.20.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/CHANGELOG.md +19 -0
  2. package/README.md +1 -1
  3. package/dist/commands/budget.d.ts +14 -0
  4. package/dist/commands/budget.js +137 -0
  5. package/dist/commands/cost.d.ts +12 -0
  6. package/dist/commands/cost.js +139 -0
  7. package/dist/commands/exec.d.ts +20 -0
  8. package/dist/commands/exec.js +382 -5
  9. package/dist/commands/secrets.d.ts +15 -0
  10. package/dist/commands/secrets.js +343 -16
  11. package/dist/commands/sessions.js +4 -0
  12. package/dist/index.js +4 -0
  13. package/dist/lib/budget/config.d.ts +9 -0
  14. package/dist/lib/budget/config.js +115 -0
  15. package/dist/lib/budget/enforce.d.ts +94 -0
  16. package/dist/lib/budget/enforce.js +151 -0
  17. package/dist/lib/budget/ledger.d.ts +61 -0
  18. package/dist/lib/budget/ledger.js +107 -0
  19. package/dist/lib/budget/preflight.d.ts +110 -0
  20. package/dist/lib/budget/preflight.js +200 -0
  21. package/dist/lib/checkpoint.d.ts +54 -0
  22. package/dist/lib/checkpoint.js +56 -0
  23. package/dist/lib/cloud/rush.js +18 -0
  24. package/dist/lib/exec.d.ts +36 -0
  25. package/dist/lib/exec.js +192 -4
  26. package/dist/lib/git.d.ts +18 -0
  27. package/dist/lib/git.js +67 -4
  28. package/dist/lib/loop.d.ts +145 -0
  29. package/dist/lib/loop.js +330 -0
  30. package/dist/lib/mcp.d.ts +7 -0
  31. package/dist/lib/mcp.js +24 -0
  32. package/dist/lib/models.d.ts +11 -0
  33. package/dist/lib/models.js +21 -0
  34. package/dist/lib/plugins.js +5 -2
  35. package/dist/lib/pricing/cost.d.ts +46 -0
  36. package/dist/lib/pricing/cost.js +71 -0
  37. package/dist/lib/pricing/index.d.ts +8 -0
  38. package/dist/lib/pricing/index.js +8 -0
  39. package/dist/lib/pricing/prices.json +138 -0
  40. package/dist/lib/pricing/table.d.ts +17 -0
  41. package/dist/lib/pricing/table.js +73 -0
  42. package/dist/lib/secrets/Agents CLI.app/Contents/CodeResources +0 -0
  43. package/dist/lib/secrets/Agents CLI.app/Contents/MacOS/Agents CLI +0 -0
  44. package/dist/lib/secrets/agent.d.ts +147 -0
  45. package/dist/lib/secrets/agent.js +500 -0
  46. package/dist/lib/secrets/bundles.d.ts +58 -7
  47. package/dist/lib/secrets/bundles.js +264 -75
  48. package/dist/lib/secrets/filestore.d.ts +82 -0
  49. package/dist/lib/secrets/filestore.js +295 -0
  50. package/dist/lib/secrets/linux.d.ts +6 -24
  51. package/dist/lib/secrets/linux.js +22 -265
  52. package/dist/lib/session/db.d.ts +40 -0
  53. package/dist/lib/session/db.js +84 -2
  54. package/dist/lib/session/discover.d.ts +2 -0
  55. package/dist/lib/session/discover.js +126 -2
  56. package/dist/lib/session/render.d.ts +2 -0
  57. package/dist/lib/session/render.js +1 -1
  58. package/dist/lib/session/types.d.ts +4 -0
  59. package/dist/lib/teams/agents.d.ts +32 -0
  60. package/dist/lib/teams/agents.js +66 -3
  61. package/dist/lib/teams/api.js +20 -0
  62. package/dist/lib/teams/parsers.js +16 -4
  63. package/dist/lib/types.d.ts +48 -0
  64. package/dist/lib/workflows.d.ts +56 -0
  65. package/dist/lib/workflows.js +72 -5
  66. package/package.json +2 -1
@@ -20,6 +20,7 @@ import { walkForFiles } from '../fs-walk.js';
20
20
  import { getConfigSymlinkVersion } from '../shims.js';
21
21
  import { SESSION_AGENTS } from './types.js';
22
22
  import { extractSessionTopic } from './prompt.js';
23
+ import { costOfUsage } from '../pricing/index.js';
23
24
  import { getDB, getScanStampByPath, getScanStampsForPaths, recordScans, syncLabels, upsertSessionsBatch, querySessions, countSessions, ftsSearch, tryClaimScan, releaseScan, } from './db.js';
24
25
  const HOME = os.homedir();
25
26
  // Versions can live under either repo: the user repo (current canonical
@@ -108,6 +109,7 @@ function buildQueryOptions(options, agents, opts) {
108
109
  limit: opts.includeLimit ? (options?.limit ?? 50) : undefined,
109
110
  excludeTeamOrigin: options?.excludeTeamOrigin,
110
111
  onlyTeamOrigin: options?.onlyTeamOrigin,
112
+ sortBy: options?.sortBy,
111
113
  };
112
114
  }
113
115
  /** Resolve and canonicalize a working directory path (follows symlinks). */
@@ -402,6 +404,8 @@ async function readClaudeMeta(filePath, sessionId, account, label) {
402
404
  label,
403
405
  messageCount: scan.messageCount,
404
406
  tokenCount: scan.tokenCount,
407
+ costUsd: scan.costUsd,
408
+ durationMs: scan.durationMs,
405
409
  isTeamOrigin,
406
410
  };
407
411
  }
@@ -417,6 +421,8 @@ async function readClaudeMeta(filePath, sessionId, account, label) {
417
421
  label,
418
422
  messageCount: scan.messageCount,
419
423
  tokenCount: scan.tokenCount,
424
+ costUsd: scan.costUsd,
425
+ durationMs: scan.durationMs,
420
426
  topic: scan.topic,
421
427
  isTeamOrigin,
422
428
  };
@@ -529,6 +535,8 @@ async function readCodexMeta(filePath, account, currentVersion) {
529
535
  topic: scan.topic,
530
536
  messageCount: scan.messageCount,
531
537
  tokenCount: scan.tokenCount,
538
+ costUsd: scan.costUsd,
539
+ durationMs: scan.durationMs,
532
540
  account,
533
541
  };
534
542
  return { meta, content: scan.contentText || '' };
@@ -642,10 +650,15 @@ function readGeminiMeta(filePath, hashDir, projectMap, currentVersion) {
642
650
  const cwd = projectInfo?.path ? normalizeCwd(projectInfo.path) : undefined;
643
651
  const stat = safeStatSync(filePath);
644
652
  const messages = Array.isArray(session.messages) ? session.messages : [];
653
+ const sessionModel = typeof session.model === 'string' ? session.model : undefined;
645
654
  let topic;
646
655
  let messageCount = 0;
647
656
  let tokenCount = 0;
648
657
  let sawTokenCount = false;
658
+ let costUsd = 0;
659
+ let sawCost = false;
660
+ let firstTsMs;
661
+ let lastTsMs;
649
662
  const userTexts = [];
650
663
  for (const message of messages) {
651
664
  if (message.type === 'user') {
@@ -662,12 +675,43 @@ function readGeminiMeta(filePath, hashDir, projectMap, currentVersion) {
662
675
  messageCount++;
663
676
  }
664
677
  }
678
+ // Duration: messages carry a `timestamp` on most Gemini CLI versions.
679
+ const tsRaw = message.timestamp ?? message.time;
680
+ if (typeof tsRaw === 'string' || typeof tsRaw === 'number') {
681
+ const ms = new Date(tsRaw).getTime();
682
+ if (!Number.isNaN(ms)) {
683
+ if (firstTsMs === undefined || ms < firstTsMs)
684
+ firstTsMs = ms;
685
+ if (lastTsMs === undefined || ms > lastTsMs)
686
+ lastTsMs = ms;
687
+ }
688
+ }
665
689
  const total = getGeminiTokenCount(message.tokens);
666
690
  if (total !== null) {
667
691
  tokenCount += total;
668
692
  sawTokenCount = true;
669
693
  }
694
+ // Per-message cost: directional tokens × this message's model price.
695
+ const msgModel = (typeof message.model === 'string' ? message.model : undefined) || sessionModel;
696
+ const tk = message.tokens;
697
+ if (msgModel && tk && typeof tk === 'object') {
698
+ const c = costOfUsage({
699
+ model: msgModel,
700
+ inputTokens: typeof tk.input === 'number' ? tk.input : undefined,
701
+ outputTokens: (typeof tk.output === 'number' ? tk.output : 0) +
702
+ (typeof tk.thoughts === 'number' ? tk.thoughts : 0) +
703
+ (typeof tk.tool === 'number' ? tk.tool : 0),
704
+ cacheReadTokens: typeof tk.cached === 'number' ? tk.cached : undefined,
705
+ });
706
+ if (c > 0) {
707
+ costUsd += c;
708
+ sawCost = true;
709
+ }
710
+ }
670
711
  }
712
+ const durationMs = firstTsMs !== undefined && lastTsMs !== undefined && lastTsMs > firstTsMs
713
+ ? lastTsMs - firstTsMs
714
+ : undefined;
671
715
  const meta = {
672
716
  id: sessionId,
673
717
  shortId: sessionId.slice(0, 8),
@@ -680,6 +724,8 @@ function readGeminiMeta(filePath, hashDir, projectMap, currentVersion) {
680
724
  topic,
681
725
  messageCount,
682
726
  tokenCount: sawTokenCount ? tokenCount : undefined,
727
+ costUsd: sawCost ? costUsd : undefined,
728
+ durationMs,
683
729
  };
684
730
  return { meta, content: userTexts.join('\n') };
685
731
  }
@@ -1206,6 +1252,11 @@ async function scanClaudeSession(filePath) {
1206
1252
  let messageCount = 0;
1207
1253
  let tokenCount = 0;
1208
1254
  let sawTokenCount = false;
1255
+ let costUsd = 0;
1256
+ let sawCost = false;
1257
+ // Track the first and last timestamped event to derive wall-clock duration.
1258
+ let firstTsMs;
1259
+ let lastTsMs;
1209
1260
  const seenAssistantIds = new Set();
1210
1261
  const userTexts = [];
1211
1262
  try {
@@ -1224,6 +1275,16 @@ async function scanClaudeSession(filePath) {
1224
1275
  if (!entrypoint && typeof parsed.entrypoint === 'string') {
1225
1276
  entrypoint = parsed.entrypoint;
1226
1277
  }
1278
+ // Track duration across every timestamped event, not just the first.
1279
+ if (typeof parsed.timestamp === 'string') {
1280
+ const ms = new Date(parsed.timestamp).getTime();
1281
+ if (!Number.isNaN(ms)) {
1282
+ if (firstTsMs === undefined || ms < firstTsMs)
1283
+ firstTsMs = ms;
1284
+ if (lastTsMs === undefined || ms > lastTsMs)
1285
+ lastTsMs = ms;
1286
+ }
1287
+ }
1227
1288
  if (!timestamp && (parsed.type === 'user' || parsed.type === 'assistant') && parsed.timestamp) {
1228
1289
  timestamp = parsed.timestamp;
1229
1290
  cwd = parsed.cwd || '';
@@ -1252,17 +1313,37 @@ async function scanClaudeSession(filePath) {
1252
1313
  continue;
1253
1314
  seenAssistantIds.add(logicalId);
1254
1315
  messageCount++;
1255
- const usage = getClaudeUsageTotal(parsed.message?.usage || parsed.usage);
1316
+ const usageObj = parsed.message?.usage || parsed.usage;
1317
+ const usage = getClaudeUsageTotal(usageObj);
1256
1318
  if (usage !== null) {
1257
1319
  tokenCount += usage;
1258
1320
  sawTokenCount = true;
1259
1321
  }
1322
+ // Per-assistant-message cost: each event carries its own model, so we
1323
+ // multiply that event's raw token directions by that model's price.
1324
+ const model = parsed.message?.model;
1325
+ if (model && usageObj && typeof usageObj === 'object') {
1326
+ const eventCost = costOfUsage({
1327
+ model,
1328
+ inputTokens: usageObj.input_tokens,
1329
+ outputTokens: usageObj.output_tokens,
1330
+ cacheReadTokens: usageObj.cache_read_input_tokens,
1331
+ cacheCreationTokens: usageObj.cache_creation_input_tokens,
1332
+ });
1333
+ if (eventCost > 0) {
1334
+ costUsd += eventCost;
1335
+ sawCost = true;
1336
+ }
1337
+ }
1260
1338
  }
1261
1339
  }
1262
1340
  finally {
1263
1341
  rl.close();
1264
1342
  stream.destroy();
1265
1343
  }
1344
+ const durationMs = firstTsMs !== undefined && lastTsMs !== undefined && lastTsMs > firstTsMs
1345
+ ? lastTsMs - firstTsMs
1346
+ : undefined;
1266
1347
  return {
1267
1348
  timestamp,
1268
1349
  cwd,
@@ -1272,6 +1353,8 @@ async function scanClaudeSession(filePath) {
1272
1353
  entrypoint,
1273
1354
  messageCount,
1274
1355
  tokenCount: sawTokenCount ? tokenCount : undefined,
1356
+ costUsd: sawCost ? costUsd : undefined,
1357
+ durationMs,
1275
1358
  contentText: userTexts.length > 0 ? userTexts.join('\n') : undefined,
1276
1359
  };
1277
1360
  }
@@ -1287,6 +1370,10 @@ async function scanCodexSession(filePath) {
1287
1370
  let topic;
1288
1371
  let messageCount = 0;
1289
1372
  let tokenCount;
1373
+ let model;
1374
+ let lastTotalTokenUsage;
1375
+ let firstTsMs;
1376
+ let lastTsMs;
1290
1377
  const userTexts = [];
1291
1378
  try {
1292
1379
  for await (const line of rl) {
@@ -1299,6 +1386,16 @@ async function scanCodexSession(filePath) {
1299
1386
  catch {
1300
1387
  continue;
1301
1388
  }
1389
+ // Track duration across every timestamped event.
1390
+ if (typeof parsed.timestamp === 'string') {
1391
+ const ms = new Date(parsed.timestamp).getTime();
1392
+ if (!Number.isNaN(ms)) {
1393
+ if (firstTsMs === undefined || ms < firstTsMs)
1394
+ firstTsMs = ms;
1395
+ if (lastTsMs === undefined || ms > lastTsMs)
1396
+ lastTsMs = ms;
1397
+ }
1398
+ }
1302
1399
  if (parsed.type === 'session_meta') {
1303
1400
  const payload = parsed.payload || {};
1304
1401
  sessionId = payload.id || sessionId;
@@ -1306,6 +1403,7 @@ async function scanCodexSession(filePath) {
1306
1403
  cwd = payload.cwd || cwd;
1307
1404
  gitBranch = payload.git?.branch || gitBranch;
1308
1405
  version = payload.cli_version || payload.version || version;
1406
+ model = payload.model || model;
1309
1407
  continue;
1310
1408
  }
1311
1409
  if (parsed.type === 'response_item' && parsed.payload?.type === 'message') {
@@ -1324,9 +1422,18 @@ async function scanCodexSession(filePath) {
1324
1422
  continue;
1325
1423
  }
1326
1424
  if (parsed.type === 'event_msg' && parsed.payload?.type === 'token_count') {
1327
- const total = getCodexTokenCount(parsed.payload.info?.total_token_usage);
1425
+ const totalUsage = parsed.payload.info?.total_token_usage;
1426
+ const total = getCodexTokenCount(totalUsage);
1328
1427
  if (total !== null)
1329
1428
  tokenCount = total;
1429
+ // token_count is cumulative — keep the latest snapshot and price it once
1430
+ // after the stream, so we don't double-count across intermediate events.
1431
+ if (totalUsage && typeof totalUsage === 'object')
1432
+ lastTotalTokenUsage = totalUsage;
1433
+ // Codex also stamps the model on the rate_limits/token_count payload on
1434
+ // some versions; prefer session_meta but fall back to it.
1435
+ if (!model && typeof parsed.payload.info?.model === 'string')
1436
+ model = parsed.payload.info.model;
1330
1437
  }
1331
1438
  }
1332
1439
  }
@@ -1334,6 +1441,21 @@ async function scanCodexSession(filePath) {
1334
1441
  rl.close();
1335
1442
  stream.destroy();
1336
1443
  }
1444
+ // Price the final cumulative token snapshot once, against the session model.
1445
+ let costUsd;
1446
+ if (model && lastTotalTokenUsage) {
1447
+ const c = costOfUsage({
1448
+ model,
1449
+ inputTokens: lastTotalTokenUsage.input_tokens,
1450
+ outputTokens: (lastTotalTokenUsage.output_tokens ?? 0) + (lastTotalTokenUsage.reasoning_output_tokens ?? 0),
1451
+ cacheReadTokens: lastTotalTokenUsage.cached_input_tokens,
1452
+ });
1453
+ if (c > 0)
1454
+ costUsd = c;
1455
+ }
1456
+ const durationMs = firstTsMs !== undefined && lastTsMs !== undefined && lastTsMs > firstTsMs
1457
+ ? lastTsMs - firstTsMs
1458
+ : undefined;
1337
1459
  return {
1338
1460
  sessionId,
1339
1461
  timestamp,
@@ -1343,6 +1465,8 @@ async function scanCodexSession(filePath) {
1343
1465
  topic,
1344
1466
  messageCount,
1345
1467
  tokenCount,
1468
+ costUsd,
1469
+ durationMs,
1346
1470
  contentText: userTexts.length > 0 ? userTexts.join('\n') : undefined,
1347
1471
  };
1348
1472
  }
@@ -57,6 +57,8 @@ export interface SessionStats {
57
57
  }
58
58
  /** Compute aggregate statistics (turns, tools, tokens, duration) from session events. */
59
59
  export declare function computeSummaryStats(events: SessionEvent[]): SessionStats;
60
+ /** Format a duration in milliseconds as a human-readable string (e.g. '12 min', '2h 30min'). */
61
+ export declare function formatDuration(ms: number): string;
60
62
  /**
61
63
  * Return the stats line for a session summary header.
62
64
  * e.g. "221 turns · 198 tools (10 errors) · 67.5M cached / 361K out · 12 min"
@@ -218,7 +218,7 @@ function formatTokenCount(n) {
218
218
  return (m >= 100 ? Math.round(m) : parseFloat(m.toFixed(1))) + 'M';
219
219
  }
220
220
  /** Format a duration in milliseconds as a human-readable string (e.g. '12 min', '2h 30min'). */
221
- function formatDuration(ms) {
221
+ export function formatDuration(ms) {
222
222
  const totalMin = Math.round(ms / 60_000);
223
223
  if (totalMin < 1)
224
224
  return 'under 1 min';
@@ -52,6 +52,10 @@ export interface SessionMeta {
52
52
  gitBranch?: string;
53
53
  messageCount?: number;
54
54
  tokenCount?: number;
55
+ /** Total USD cost, computed at scan time from per-model token usage (issue #323). */
56
+ costUsd?: number;
57
+ /** Wall-clock duration in ms (lastTs − firstTs), persisted at scan time. */
58
+ durationMs?: number;
55
59
  version?: string;
56
60
  account?: string;
57
61
  topic?: string;
@@ -17,6 +17,14 @@ export declare enum AgentStatus {
17
17
  export type TaskType = 'plan' | 'implement' | 'test' | 'review' | 'bugfix' | 'docs';
18
18
  export declare const VALID_TASK_TYPES: readonly TaskType[];
19
19
  export type { AgentType } from './parsers.js';
20
+ /**
21
+ * Wrap a teammate argv in a POSIX shell command that runs it and then records
22
+ * the real exit code to `exitCodePath`. `echo $?` captures the status of the
23
+ * preceding command, so the sentinel reflects the underlying CLI's exit code,
24
+ * not the shell's. Single source of truth shared by launchProcess() and its
25
+ * test. See reapProcess() for how the sentinel is consumed.
26
+ */
27
+ export declare function buildSentinelCommand(cmd: string[], exitCodePath: string): string;
20
28
  /**
21
29
  * Capture a stable identifier for a process at the moment it was started.
22
30
  * Used to defeat PID reuse: a kill(pid, ...) is only safe when the process
@@ -118,6 +126,13 @@ export declare class AgentProcess {
118
126
  }>;
119
127
  getStdoutPath(): Promise<string>;
120
128
  getMetaPath(): Promise<string>;
129
+ /**
130
+ * Path to the exit-code sentinel. The launcher wraps the teammate command in
131
+ * a shell that writes the underlying CLI's `$?` here once it exits. Detached
132
+ * teammates can't be wait()ed on by the parent, so this file is the only
133
+ * durable record of the real exit status — see reapProcess().
134
+ */
135
+ getExitCodePath(): Promise<string>;
121
136
  toDict(): any;
122
137
  duration(): string | null;
123
138
  get events(): any[];
@@ -131,6 +146,23 @@ export declare class AgentProcess {
131
146
  static loadFromDisk(agentId: string, baseDir?: string | null): Promise<AgentProcess | null>;
132
147
  isProcessAlive(): boolean;
133
148
  updateStatusFromProcess(): Promise<void>;
149
+ /**
150
+ * Recover the teammate's exit status after its process is gone.
151
+ *
152
+ * The teammate is spawned detached + unref()'d (see launchProcess), so the
153
+ * parent never gets the child's exit code from the OS. Instead the launcher
154
+ * wraps the command in a shell that records `$?` to the exit-code sentinel.
155
+ * This reads that file:
156
+ * - still alive -> null (no verdict yet)
157
+ * - sentinel present -> the real exit code (0 = success)
158
+ * - sentinel absent -> 1 (the shell was killed before it could write
159
+ * it, e.g. SIGKILL on timeout/stop — a real
160
+ * failure)
161
+ *
162
+ * Returning a real code (not a hardcoded 1) is what lets agents whose stream
163
+ * never emits a parsed terminal event — kimi, antigravity, droid — be marked
164
+ * completed on success instead of falsely failed.
165
+ */
134
166
  private reapProcess;
135
167
  }
136
168
  /**
@@ -135,6 +135,25 @@ function hasTransitiveDep(byName, startName, targetName, seen = new Set()) {
135
135
  }
136
136
  return false;
137
137
  }
138
+ /**
139
+ * Single-quote a string for safe interpolation into a POSIX `sh -c` command.
140
+ * Wraps in single quotes and escapes embedded single quotes via the standard
141
+ * `'\''` close-escape-reopen idiom, so arbitrary prompts/paths can't break out
142
+ * of quoting or inject shell syntax.
143
+ */
144
+ function shSingleQuote(value) {
145
+ return `'${value.replace(/'/g, `'\\''`)}'`;
146
+ }
147
+ /**
148
+ * Wrap a teammate argv in a POSIX shell command that runs it and then records
149
+ * the real exit code to `exitCodePath`. `echo $?` captures the status of the
150
+ * preceding command, so the sentinel reflects the underlying CLI's exit code,
151
+ * not the shell's. Single source of truth shared by launchProcess() and its
152
+ * test. See reapProcess() for how the sentinel is consumed.
153
+ */
154
+ export function buildSentinelCommand(cmd, exitCodePath) {
155
+ return `${cmd.map(shSingleQuote).join(' ')}; echo $? > ${shSingleQuote(exitCodePath)}`;
156
+ }
138
157
  /**
139
158
  * Capture a stable identifier for a process at the moment it was started.
140
159
  * Used to defeat PID reuse: a kill(pid, ...) is only safe when the process
@@ -456,6 +475,15 @@ export class AgentProcess {
456
475
  async getMetaPath() {
457
476
  return path.join(await this.getAgentDir(), 'meta.json');
458
477
  }
478
+ /**
479
+ * Path to the exit-code sentinel. The launcher wraps the teammate command in
480
+ * a shell that writes the underlying CLI's `$?` here once it exits. Detached
481
+ * teammates can't be wait()ed on by the parent, so this file is the only
482
+ * durable record of the real exit status — see reapProcess().
483
+ */
484
+ async getExitCodePath() {
485
+ return path.join(await this.getAgentDir(), 'exit_code');
486
+ }
459
487
  toDict() {
460
488
  return {
461
489
  agent_id: this.agentId,
@@ -748,14 +776,37 @@ export class AgentProcess {
748
776
  }
749
777
  await this.saveMeta();
750
778
  }
779
+ /**
780
+ * Recover the teammate's exit status after its process is gone.
781
+ *
782
+ * The teammate is spawned detached + unref()'d (see launchProcess), so the
783
+ * parent never gets the child's exit code from the OS. Instead the launcher
784
+ * wraps the command in a shell that records `$?` to the exit-code sentinel.
785
+ * This reads that file:
786
+ * - still alive -> null (no verdict yet)
787
+ * - sentinel present -> the real exit code (0 = success)
788
+ * - sentinel absent -> 1 (the shell was killed before it could write
789
+ * it, e.g. SIGKILL on timeout/stop — a real
790
+ * failure)
791
+ *
792
+ * Returning a real code (not a hardcoded 1) is what lets agents whose stream
793
+ * never emits a parsed terminal event — kimi, antigravity, droid — be marked
794
+ * completed on success instead of falsely failed.
795
+ */
751
796
  async reapProcess() {
752
797
  if (!this.pid)
753
798
  return null;
754
- try {
755
- process.kill(this.pid, 0);
799
+ // isProcessAlive() applies the start-time guard, so a recycled PID now
800
+ // owned by an unrelated process doesn't read as still-alive.
801
+ if (this.isProcessAlive())
756
802
  return null;
803
+ try {
804
+ const raw = (await fs.readFile(await this.getExitCodePath(), 'utf-8')).trim();
805
+ const code = Number.parseInt(raw, 10);
806
+ return Number.isNaN(code) ? 1 : code;
757
807
  }
758
808
  catch {
809
+ // No sentinel: the shell died before recording $? (killed mid-run).
759
810
  return 1;
760
811
  }
761
812
  }
@@ -998,7 +1049,19 @@ export class AgentManager {
998
1049
  const stdoutPath = await agent.getStdoutPath();
999
1050
  const stdoutFile = await fs.open(stdoutPath, 'w');
1000
1051
  const stdoutFd = stdoutFile.fd;
1001
- const childProcess = spawn(cmd[0], cmd.slice(1), {
1052
+ // Wrap the teammate command in a shell that records the underlying CLI's
1053
+ // exit code to a sentinel file. Detached + unref()'d children can't be
1054
+ // wait()ed on by this parent, so the sentinel is the only durable record
1055
+ // of the real exit status — reapProcess() reads it to decide
1056
+ // completed-vs-failed for agents whose stream emits no parsed terminal
1057
+ // event (kimi, antigravity, droid). Remove any stale sentinel from a
1058
+ // prior run of the same agent id first so a restart can't read it.
1059
+ const exitCodePath = await agent.getExitCodePath();
1060
+ await fs.rm(exitCodePath, { force: true }).catch(() => { });
1061
+ const wrappedCmd = buildSentinelCommand(cmd, exitCodePath);
1062
+ // detached:true makes the shell the process-group leader, so stop()'s
1063
+ // `kill(-pid)` still reaches the underlying CLI through the group.
1064
+ const childProcess = spawn('/bin/sh', ['-c', wrappedCmd], {
1002
1065
  stdio: ['ignore', stdoutFd, stdoutFd],
1003
1066
  cwd: agent.cwd || undefined,
1004
1067
  detached: true,
@@ -139,6 +139,26 @@ export async function handleSpawn(manager, taskName, agentType, prompt, cwd, mod
139
139
  const resolvedMode = resolveMode(mode, defaultMode);
140
140
  const resolvedEffort = effort ?? 'medium';
141
141
  debug(`[spawn] Spawning ${agentType} agent for task "${taskName}" [${resolvedMode}] effort=${resolvedEffort}${profileName ? ` profile=${profileName}` : ''}...`);
142
+ // Budget pre-flight gate (issue #346). Teammates inherit the project's caps:
143
+ // before launching one, project its estimated cost onto current spend and
144
+ // refuse when on_exceed:block would be breached. Cross-vendor by construction
145
+ // — a Claude teammate and a Codex teammate draw down the same per_project /
146
+ // per_day pool. Dormant (no-op) when no caps are configured.
147
+ {
148
+ const gateCwd = cwd || workspaceDir || worktreePath || process.cwd();
149
+ const { runPreflightGate } = await import('../budget/preflight.js');
150
+ const gate = runPreflightGate({
151
+ agent: agentType,
152
+ model: model ?? `${agentType}-default`,
153
+ mode: resolvedMode,
154
+ prompt,
155
+ project: gateCwd,
156
+ cwd: gateCwd,
157
+ });
158
+ if (!gate.dormant && !gate.decision.allow) {
159
+ throw new Error(`[budget] BLOCKED teammate "${taskName}" (${agentType}): ${gate.decision.reason}`);
160
+ }
161
+ }
142
162
  const agent = await manager.spawn(taskName, agentType, prompt, cwd, resolvedMode, resolvedEffort, parentSessionId, workspaceDir, version, name, after, model, envOverrides, taskType, cloudProvider, cloudSessionId, cloudRepo, cloudBranch, worktreeName, worktreePath, profileName);
143
163
  debug(`[spawn] Spawned ${agentType} agent ${agent.agentId} for task "${taskName}"`);
144
164
  return {
@@ -917,9 +917,16 @@ function normalizeGrok(raw) {
917
917
  // - {"role":"assistant","content":"..."} → final message
918
918
  // - {"role":"assistant","tool_calls":[{"function":{"name":"Bash","arguments":"<json>"}}]} → tool use
919
919
  // - {"role":"tool","tool_call_id":"...","content":"..."} → tool result
920
- // - {"role":"meta","type":"session.resume_hint","session_id":"..."} → init / session id
921
- // Tool arguments are JSON-stringified inside `function.arguments` and must be
922
- // parsed before extracting paths/commands. Verified against live `kimi` runs.
920
+ // - {"role":"meta","type":"session.resume_hint","session_id":"..."} → terminal/result
921
+ // Kimi emits NO dedicated result/turn-complete event and NO init event. The
922
+ // `session.resume_hint` meta is its terminal marker: emitted exactly once, as
923
+ // the LAST line, on clean completion (it carries the `kimi -r <id>` resume
924
+ // command). We map it to a success `result` so the team runner resolves status
925
+ // from the stream; the run's exit code remains the safety net for crashes that
926
+ // never reach the hint. Tool arguments are JSON-stringified inside
927
+ // `function.arguments` and must be parsed before extracting paths/commands.
928
+ // Verified against live `kimi` runs (no-tool and tool-using) — see
929
+ // __tests__/testdata/kimi-stream-*.jsonl.
923
930
  function normalizeKimi(raw) {
924
931
  const timestamp = new Date().toISOString();
925
932
  if (!raw || typeof raw !== 'object') {
@@ -1044,9 +1051,14 @@ function normalizeKimi(raw) {
1044
1051
  if (role === 'meta') {
1045
1052
  const metaType = typeof raw.type === 'string' ? raw.type : '';
1046
1053
  if (metaType === 'session.resume_hint') {
1054
+ // Kimi's terminal marker (see header). Emit a success `result` so the
1055
+ // team runner's terminal-event detection resolves the teammate to
1056
+ // COMPLETED from the stream. session_id is preserved for cross-
1057
+ // referencing — readNewEvents() captures it off any event.
1047
1058
  return [{
1048
- type: 'init',
1059
+ type: 'result',
1049
1060
  agent: 'kimi',
1061
+ status: 'success',
1050
1062
  session_id: typeof raw.session_id === 'string' ? raw.session_id : null,
1051
1063
  timestamp: timestamp,
1052
1064
  }];
@@ -22,6 +22,43 @@ export interface RunDefaults {
22
22
  export type RunConfig = Partial<Record<AgentId, AgentRunConfig>> & {
23
23
  defaults?: Record<string, RunDefaults>;
24
24
  };
25
+ /**
26
+ * What to do when a configured budget cap would be exceeded (issue #346).
27
+ * `block` refuses to launch (or kills a running child) and exits non-zero so
28
+ * CI/headless/teams/cloud all inherit the decision. `warn` prints the overrun
29
+ * but proceeds — useful for soft rollout / observability-only.
30
+ */
31
+ export type BudgetOnExceed = 'block' | 'warn';
32
+ /**
33
+ * `budget:` block in agents.yaml — cross-vendor spend guardrails (issue #346).
34
+ *
35
+ * Resolution is project > user (same precedence as `run:`); see
36
+ * `resolveBudgetConfig` in lib/budget/config.ts. Every cap is in USD. A cap is
37
+ * "unset" when undefined — only set caps are enforced. `per_agent` caps apply
38
+ * to one agent's spend; the top-level caps (`per_run`, `per_day`,
39
+ * `per_project`) aggregate ACROSS every vendor the CLI dispatches, which is the
40
+ * cross-vendor property no single-vendor control has.
41
+ */
42
+ export interface BudgetConfig {
43
+ /** Display currency. Only "USD" is priced today; carried for forward-compat. */
44
+ currency?: string;
45
+ /** Hard cap on the estimated/actual cost of a single run. */
46
+ per_run?: number;
47
+ /** Hard cap on total spend attributed to the current day (local date). */
48
+ per_day?: number;
49
+ /** Per-agent daily caps, keyed by agent id (e.g. { claude: 30, codex: 20 }). */
50
+ per_agent?: Partial<Record<AgentId, number>>;
51
+ /** Hard cap on cumulative spend attributed to the current project. */
52
+ per_project?: number;
53
+ /** block (refuse/kill) or warn (proceed). Defaults to block. */
54
+ on_exceed?: BudgetOnExceed;
55
+ /**
56
+ * Interactive confirm threshold (USD). When a run's pre-flight estimate is at
57
+ * or above this, prompt before launching (unless --yes). Does NOT gate a hard
58
+ * block — a cap breach always blocks regardless of this value.
59
+ */
60
+ require_confirm_over?: number;
61
+ }
25
62
  /** Preview features that users can opt into via `agents beta`. */
26
63
  export type BetaFeatureName = 'drive' | 'factory';
27
64
  /** Subset of chalk color names used for agent-specific terminal output. */
@@ -210,6 +247,8 @@ export interface InstalledHook {
210
247
  export interface Manifest {
211
248
  agents?: Partial<Record<AgentId, string>>;
212
249
  run?: RunConfig;
250
+ /** Spend guardrails (issue #346). Project-local block overrides user. */
251
+ budget?: BudgetConfig;
213
252
  beta?: {
214
253
  enabled?: BetaFeatureName[];
215
254
  };
@@ -516,6 +555,15 @@ export interface ExtraRepoConfig {
516
555
  export interface Meta {
517
556
  agents?: Partial<Record<AgentId, string>>;
518
557
  run?: RunConfig;
558
+ /** macOS secrets-agent config. `auto` makes the first real keychain read of a
559
+ * `session`-tier bundle populate the broker so concurrent runs read silently. */
560
+ secrets?: {
561
+ agent?: {
562
+ auto?: boolean;
563
+ };
564
+ };
565
+ /** Spend guardrails (issue #346). User-global caps; project agents.yaml overrides. */
566
+ budget?: BudgetConfig;
519
567
  beta?: {
520
568
  enabled?: BetaFeatureName[];
521
569
  };
@@ -6,6 +6,21 @@
6
6
  * are composed at runtime by `agents run <workflow>`.
7
7
  */
8
8
  import type { AgentId } from './types.js';
9
+ /**
10
+ * The `loop:` block as it appears in WORKFLOW.md frontmatter (YAML, snake_case).
11
+ * Parsed defensively and translated to the camelCase LoopConfig the driver
12
+ * consumes (src/lib/loop.ts). See docs/07-entrypoints-and-loops.md.
13
+ */
14
+ export interface LoopConfigRaw {
15
+ /** Stop condition. Only `signal` is supported today. */
16
+ until?: 'signal';
17
+ /** Hard cap on iterations. */
18
+ max_iterations?: number;
19
+ /** Token hard-cap, enforced outside the agent. */
20
+ budget?: number;
21
+ /** Delay between iterations ("0" back-to-back, "30m" paces). */
22
+ interval?: string;
23
+ }
9
24
  /** Parsed WORKFLOW.md frontmatter. */
10
25
  export interface WorkflowFrontmatter {
11
26
  name: string;
@@ -22,6 +37,12 @@ export interface WorkflowFrontmatter {
22
37
  * Pass `--no-auto-secrets` to skip this injection.
23
38
  */
24
39
  secrets?: string[];
40
+ /**
41
+ * Optional loop block: wraps the workflow in a bounded until-condition loop
42
+ * (issue #332). When present, `agents run <workflow>` honors it without a
43
+ * `--loop` flag. Validated/coerced in parseWorkflowFrontmatter.
44
+ */
45
+ loop?: LoopConfigRaw;
25
46
  }
26
47
  /** A workflow found during repo discovery. */
27
48
  export interface DiscoveredWorkflow {
@@ -39,6 +60,41 @@ export interface InstalledWorkflow {
39
60
  }
40
61
  /** Parse WORKFLOW.md frontmatter from a workflow directory. Returns null if invalid. */
41
62
  export declare function parseWorkflowFrontmatter(workflowDir: string): WorkflowFrontmatter | null;
63
+ /**
64
+ * Defensively coerce a frontmatter `loop:` value into a LoopConfigRaw.
65
+ *
66
+ * Mirrors the asStringArray discipline above: a malformed field is dropped to
67
+ * undefined rather than passed through, so the loop driver never sees a bad
68
+ * shape. Returns undefined when `loop:` is absent or not an object, or when no
69
+ * recognized field survives coercion (an all-garbage block is treated as
70
+ * "no loop", not "empty loop").
71
+ *
72
+ * Field rules:
73
+ * - until: only the literal `signal` is accepted; anything else dropped.
74
+ * - max_iterations: a finite positive integer; non-numbers/<=0 dropped.
75
+ * - budget: a finite positive number (tokens); non-numbers/<=0 dropped.
76
+ * - interval: a string (e.g. "0", "30m"); non-strings dropped.
77
+ */
78
+ export declare function parseLoopBlock(v: unknown): LoopConfigRaw | undefined;
79
+ /**
80
+ * Decide which subagent .md stems a workflow may use, given the discovered
81
+ * subagent files and the parsed `allowedAgents` frontmatter. This is the
82
+ * fail-closed security boundary for issue #324:
83
+ *
84
+ * - `allowedAgents === undefined` (field absent) -> NO restriction; allow all.
85
+ * - `allowedAgents === []` (present, empty) -> allow ZERO; copy none.
86
+ * - `allowedAgents = [a, b]` -> allow only those stems.
87
+ *
88
+ * An explicit empty array must NEVER widen to "allow all" — that would copy
89
+ * every subagent definition into the run, granting MORE access than declared.
90
+ *
91
+ * `available` are the .md filenames found in subagents/ (e.g. `security.md`).
92
+ * Returns the stems to copy and any allowedAgents entries with no matching file.
93
+ */
94
+ export declare function resolveAllowedSubagents(available: string[], allowedAgents: string[] | undefined): {
95
+ allowedStems: string[];
96
+ missing: string[];
97
+ };
42
98
  /** Count subagent .md files in a workflow's subagents/ directory. */
43
99
  export declare function countWorkflowSubagents(workflowDir: string): number;
44
100
  /**