@bradtaylorsf/alpha-loop 1.14.0 → 1.14.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/README.md +73 -1
  2. package/dist/cli.js +37 -2
  3. package/dist/cli.js.map +1 -1
  4. package/dist/commands/eval.d.ts +22 -0
  5. package/dist/commands/eval.js +105 -1
  6. package/dist/commands/eval.js.map +1 -1
  7. package/dist/commands/evolve-routing.d.ts +24 -0
  8. package/dist/commands/evolve-routing.js +320 -0
  9. package/dist/commands/evolve-routing.js.map +1 -0
  10. package/dist/commands/history.d.ts +2 -0
  11. package/dist/commands/history.js +95 -1
  12. package/dist/commands/history.js.map +1 -1
  13. package/dist/commands/init.d.ts +6 -0
  14. package/dist/commands/init.js +26 -1
  15. package/dist/commands/init.js.map +1 -1
  16. package/dist/commands/report.d.ts +7 -0
  17. package/dist/commands/report.js +27 -0
  18. package/dist/commands/report.js.map +1 -0
  19. package/dist/commands/scan.d.ts +1 -1
  20. package/dist/commands/scan.js.map +1 -1
  21. package/dist/engine/agents.d.ts +30 -8
  22. package/dist/engine/agents.js +94 -10
  23. package/dist/engine/agents.js.map +1 -1
  24. package/dist/engine/prerequisites.d.ts +40 -2
  25. package/dist/engine/prerequisites.js +126 -2
  26. package/dist/engine/prerequisites.js.map +1 -1
  27. package/dist/lib/agent.d.ts +39 -2
  28. package/dist/lib/agent.js +106 -4
  29. package/dist/lib/agent.js.map +1 -1
  30. package/dist/lib/config.d.ts +73 -1
  31. package/dist/lib/config.js +214 -1
  32. package/dist/lib/config.js.map +1 -1
  33. package/dist/lib/escalation.d.ts +102 -0
  34. package/dist/lib/escalation.js +241 -0
  35. package/dist/lib/escalation.js.map +1 -0
  36. package/dist/lib/eval-matrix.d.ts +125 -0
  37. package/dist/lib/eval-matrix.js +317 -0
  38. package/dist/lib/eval-matrix.js.map +1 -0
  39. package/dist/lib/eval-report.d.ts +12 -0
  40. package/dist/lib/eval-report.js +132 -0
  41. package/dist/lib/eval-report.js.map +1 -0
  42. package/dist/lib/eval-secret-scan.d.ts +41 -0
  43. package/dist/lib/eval-secret-scan.js +163 -0
  44. package/dist/lib/eval-secret-scan.js.map +1 -0
  45. package/dist/lib/eval.js +7 -4
  46. package/dist/lib/eval.js.map +1 -1
  47. package/dist/lib/hardware.d.ts +9 -0
  48. package/dist/lib/hardware.js +32 -0
  49. package/dist/lib/hardware.js.map +1 -0
  50. package/dist/lib/pipeline.d.ts +5 -1
  51. package/dist/lib/pipeline.js +217 -16
  52. package/dist/lib/pipeline.js.map +1 -1
  53. package/dist/lib/prerequisites.js +11 -3
  54. package/dist/lib/prerequisites.js.map +1 -1
  55. package/dist/lib/routing-history.d.ts +43 -0
  56. package/dist/lib/routing-history.js +112 -0
  57. package/dist/lib/routing-history.js.map +1 -0
  58. package/dist/lib/routing-promotion.d.ts +95 -0
  59. package/dist/lib/routing-promotion.js +229 -0
  60. package/dist/lib/routing-promotion.js.map +1 -0
  61. package/dist/lib/session.js +13 -0
  62. package/dist/lib/session.js.map +1 -1
  63. package/dist/lib/telemetry.d.ts +147 -0
  64. package/dist/lib/telemetry.js +353 -0
  65. package/dist/lib/telemetry.js.map +1 -0
  66. package/package.json +1 -1
@@ -5,7 +5,8 @@ import { mkdirSync, readFileSync, writeFileSync, unlinkSync, existsSync } from '
5
5
  import { join } from 'node:path';
6
6
  import { log } from './logger.js';
7
7
  import { exec } from './shell.js';
8
- import { spawnAgent } from './agent.js';
8
+ import { spawnAgent, buildEndpointEnv } from './agent.js';
9
+ import { classifyToolError, classifyToolErrors, EscalationTracker, defaultEscalationStatePath, appendEscalationEventToTrace, } from './escalation.js';
9
10
  import { setupWorktree, cleanupWorktree } from './worktree.js';
10
11
  import { assignIssue, labelIssue, commentIssue, createPR, mergePR, updateProjectStatus, getIssueComments, } from './github.js';
11
12
  import { buildImplementPrompt, buildReviewPrompt, buildAssumptionsPrompt, buildBatchPlanPrompt, buildBatchImplementPrompt, buildBatchReviewPrompt, } from './prompts.js';
@@ -13,8 +14,9 @@ import { runTests } from './testing.js';
13
14
  import { runVerify } from './verify.js';
14
15
  import { extractLearnings, getLearningContext } from './learning.js';
15
16
  import { saveResult, getPreviousResult } from './session.js';
16
- import { writeTrace, writeTraceMetadata, writeTraceToSubdir, writeConfigSnapshot, writeScores, writeCosts, computeScores, computeCosts, } from './traces.js';
17
- import { estimateCost } from './config.js';
17
+ import { writeTrace, writeTraceMetadata, writeTraceToSubdir, writeConfigSnapshot, writeScores, writeCosts, computeScores, computeCosts, runDir, } from './traces.js';
18
+ import { estimateCost, getFallbackPolicy, resolveRoutingStage, selectRoutingProfile } from './config.js';
19
+ import { buildStageTelemetry, writeStageTelemetry } from './telemetry.js';
18
20
  /** Max diff size to include in learning analysis. */
19
21
  const MAX_DIFF_CHARS = 10_000;
20
22
  /**
@@ -212,16 +214,184 @@ export function formatGateFindings(gate, gateType) {
212
214
  }
213
215
  return lines.join('\n');
214
216
  }
217
+ /**
218
+ * Build a StageTelemetry entry from the agent result and append it to the
219
+ * run's stages.jsonl file. Called alongside each stepCosts.push site so that
220
+ * every stage invocation — not just the top-level session — emits telemetry.
221
+ */
222
+ function recordStageTelemetry(session, issueNum, stage, agentResult, config, ctx) {
223
+ try {
224
+ const entry = buildStageTelemetry(agentResult, stage, config, {
225
+ endpoint: ctx?.endpointName,
226
+ endpointType: ctx?.endpointType,
227
+ profile: ctx?.profile,
228
+ issueNum,
229
+ });
230
+ writeStageTelemetry(runDir(session.name), entry);
231
+ }
232
+ catch {
233
+ /* telemetry is best-effort */
234
+ }
235
+ }
236
+ /**
237
+ * Invoke an agent with retry-with-escalation and the rolling-rate guardrail.
238
+ *
239
+ * Resolves the stage's routing (model + endpoint), runs the agent once, and
240
+ * if the output contains two or more classified tool-call errors escalates
241
+ * the turn to the configured fallback model. Escalation is scoped to the
242
+ * current turn: the caller's next invocation reverts to the primary model.
243
+ *
244
+ * Also honors the guardrail — when the stage is pinned to fallback because
245
+ * the recent error rate exceeded the threshold, the primary is skipped
246
+ * entirely and the fallback runs up-front.
247
+ */
248
+ async function spawnStageAgent(options, config, ctx) {
249
+ const policy = getFallbackPolicy(config);
250
+ const primary = resolveRoutingStage(config, ctx.stage);
251
+ const primaryModel = primary?.model ?? options.model;
252
+ const primaryEndpoint = primary?.endpoint;
253
+ const primaryEnv = primaryEndpoint ? buildEndpointEnv(primaryEndpoint, primaryModel) : undefined;
254
+ // Expose endpoint info back to the caller so it can tag telemetry.
255
+ const primaryEndpointName = config.routing?.stages?.[ctx.stage]?.endpoint;
256
+ ctx.endpointName = primaryEndpointName ?? 'default';
257
+ ctx.endpointType = primaryEndpoint?.type;
258
+ ctx.profile = selectRoutingProfile(config, ctx.issueNum);
259
+ const hasEscalateTarget = policy?.escalate_to !== undefined;
260
+ const escalateTo = policy?.escalate_to;
261
+ const escalateEndpoint = escalateTo
262
+ ? config.routing?.endpoints?.[escalateTo.endpoint]
263
+ : undefined;
264
+ const escalateEnv = escalateTo && escalateEndpoint
265
+ ? buildEndpointEnv(escalateEndpoint, escalateTo.model)
266
+ : undefined;
267
+ const nowIso = () => new Date().toISOString();
268
+ const recordEvent = (event) => {
269
+ ctx.events.push(event);
270
+ try {
271
+ appendEscalationEventToTrace(runDir(ctx.session.name), event);
272
+ }
273
+ catch { /* non-fatal */ }
274
+ };
275
+ // Guardrail: stage pinned to fallback — use fallback up-front.
276
+ const reverted = policy?.on_tool_error === 'escalate'
277
+ && hasEscalateTarget
278
+ && ctx.tracker.isStageReverted(ctx.stage);
279
+ if (reverted && escalateTo) {
280
+ recordEvent({
281
+ type: 'stage_revert_active',
282
+ stage: ctx.stage,
283
+ from_model: primaryModel,
284
+ to_model: escalateTo.model,
285
+ reason: 'rolling_error_rate_above_threshold',
286
+ turn_index: ctx.turnIndex,
287
+ issue: ctx.issueNum,
288
+ ts: nowIso(),
289
+ });
290
+ ctx.endpointName = escalateTo.endpoint;
291
+ ctx.endpointType = escalateEndpoint?.type;
292
+ const result = await spawnAgent({
293
+ ...options,
294
+ model: escalateTo.model,
295
+ env: escalateEnv,
296
+ });
297
+ ctx.tracker.recordTurn({
298
+ stage: ctx.stage,
299
+ errored: classifyToolError(result.output) !== null,
300
+ escalated: true,
301
+ windowSize: policy.escalation_window_issues,
302
+ });
303
+ return result;
304
+ }
305
+ // Normal primary invocation.
306
+ const firstResult = await spawnAgent({
307
+ ...options,
308
+ model: primaryModel,
309
+ env: primaryEnv,
310
+ });
311
+ const classified = classifyToolErrors(firstResult.output);
312
+ const errored = classified.length > 0;
313
+ let escalated = false;
314
+ let finalResult = firstResult;
315
+ if (policy?.on_tool_error === 'escalate'
316
+ && hasEscalateTarget
317
+ && escalateTo
318
+ && classified.length >= 2) {
319
+ const last = classified[classified.length - 1];
320
+ recordEvent({
321
+ type: 'escalation',
322
+ stage: ctx.stage,
323
+ from_model: primaryModel,
324
+ to_model: escalateTo.model,
325
+ reason: last.kind,
326
+ turn_index: ctx.turnIndex,
327
+ issue: ctx.issueNum,
328
+ ts: nowIso(),
329
+ });
330
+ ctx.endpointName = escalateTo.endpoint;
331
+ ctx.endpointType = escalateEndpoint?.type;
332
+ finalResult = await spawnAgent({
333
+ ...options,
334
+ model: escalateTo.model,
335
+ env: escalateEnv,
336
+ });
337
+ escalated = true;
338
+ }
339
+ if (policy) {
340
+ ctx.tracker.recordTurn({
341
+ stage: ctx.stage,
342
+ errored,
343
+ escalated,
344
+ windowSize: policy.escalation_window_issues,
345
+ });
346
+ if (ctx.tracker.maybeTriggerRevert(ctx.stage, policy)) {
347
+ const untilMs = ctx.tracker.revertUntil(ctx.stage) ?? 0;
348
+ recordEvent({
349
+ type: 'stage_revert',
350
+ stage: ctx.stage,
351
+ from_model: primaryModel,
352
+ to_model: escalateTo?.model ?? primaryModel,
353
+ reason: `rolling_error_rate_above_${policy.escalation_error_threshold}`,
354
+ turn_index: ctx.turnIndex,
355
+ issue: ctx.issueNum,
356
+ ts: nowIso(),
357
+ });
358
+ recordEvent({
359
+ type: 'needs_human_input',
360
+ stage: ctx.stage,
361
+ from_model: primaryModel,
362
+ to_model: escalateTo?.model ?? primaryModel,
363
+ reason: `stage_pinned_to_fallback_until_${new Date(untilMs).toISOString()}`,
364
+ turn_index: ctx.turnIndex,
365
+ issue: ctx.issueNum,
366
+ ts: nowIso(),
367
+ });
368
+ }
369
+ }
370
+ return finalResult;
371
+ }
215
372
  /**
216
373
  * Process a single issue through the full pipeline.
217
374
  * Steps: status → worktree → plan → implement → test+retry → verify+retry →
218
375
  * review → PR → learnings → update → auto-merge → cleanup
219
376
  */
220
- export async function processIssue(issueNum, title, body, config, session) {
377
+ export async function processIssue(issueNum, title, body, config, session, trackerOverride) {
221
378
  const startTime = Date.now();
222
379
  const projectDir = process.cwd();
223
380
  const stepCosts = [];
224
381
  const stepsCompleted = [];
382
+ const escalationEvents = [];
383
+ const tracker = trackerOverride ?? new EscalationTracker({
384
+ statePath: defaultEscalationStatePath(projectDir),
385
+ });
386
+ let turnCounter = 0;
387
+ const stageCtx = (stage) => ({
388
+ stage,
389
+ issueNum,
390
+ turnIndex: ++turnCounter,
391
+ session,
392
+ tracker,
393
+ events: escalationEvents,
394
+ });
225
395
  // Setup logging
226
396
  mkdirSync(session.logsDir, { recursive: true });
227
397
  const logFile = join(session.logsDir, `issue-${issueNum}.log`);
@@ -309,7 +479,8 @@ Rules:
309
479
  - Write ONLY the JSON file. Do not create any other files or make any code changes.`;
310
480
  // Trace the plan prompt
311
481
  tracePrompt(session.name, issueNum, 'plan', planPrompt);
312
- const planResult = await spawnAgent({
482
+ const planCtx = stageCtx('plan');
483
+ const planResult = await spawnStageAgent({
313
484
  agent: config.agent,
314
485
  model: config.model,
315
486
  prompt: planPrompt,
@@ -317,10 +488,11 @@ Rules:
317
488
  logFile: join(session.logsDir, `issue-${issueNum}-plan.log`),
318
489
  verbose: config.verbose,
319
490
  timeout: config.agentTimeout * 1000,
320
- });
491
+ }, config, planCtx);
321
492
  // Trace the plan output and costs
322
493
  traceOutput(session.name, issueNum, 'plan', planResult.output);
323
494
  stepCosts.push(buildStepCost('plan', issueNum, planResult, config));
495
+ recordStageTelemetry(session, issueNum, 'plan', planResult, config, planCtx);
324
496
  // Detect transient errors (usage limits) during planning
325
497
  if (planResult.exitCode !== 0 && isTransientError(planResult.output)) {
326
498
  log.warn(`Agent hit a transient error during planning for #${issueNum} — re-queuing`);
@@ -381,7 +553,8 @@ Do NOT redo work that is already committed. Build on top of existing progress.\n
381
553
  });
382
554
  // Trace the implement prompt
383
555
  tracePrompt(session.name, issueNum, 'implement', implementPrompt);
384
- const implResult = await spawnAgent({
556
+ const implCtx = stageCtx('build');
557
+ const implResult = await spawnStageAgent({
385
558
  agent: config.agent,
386
559
  model: config.model,
387
560
  prompt: implementPrompt,
@@ -389,10 +562,11 @@ Do NOT redo work that is already committed. Build on top of existing progress.\n
389
562
  logFile: join(session.logsDir, `issue-${issueNum}-implement.log`),
390
563
  verbose: config.verbose,
391
564
  timeout: config.agentTimeout * 1000,
392
- });
565
+ }, config, implCtx);
393
566
  // Trace the implement output and costs
394
567
  traceOutput(session.name, issueNum, 'implement', implResult.output);
395
568
  stepCosts.push(buildStepCost('implement', issueNum, implResult, config));
569
+ recordStageTelemetry(session, issueNum, 'implement', implResult, config, implCtx);
396
570
  if (implResult.exitCode !== 0) {
397
571
  // Auto-commit any uncommitted work before deciding on cleanup
398
572
  const dirtyCheck = exec('git status --porcelain', { cwd: worktreePath });
@@ -459,7 +633,8 @@ Do NOT redo work that is already committed. Build on top of existing progress.\n
459
633
  const fixPrompt = `Tests are failing for issue #${issueNum} (attempt ${attempt} of ${config.maxTestRetries}). Fix the failing tests.\n\nTest output:\n${testOutput}\n\nInstructions:\n1. Read the failing test output carefully and identify the ROOT CAUSE\n2. Fix ONLY code related to issue #${issueNum} — do NOT modify test infrastructure, build scripts, or unrelated files\n3. If tests fail due to environment issues (missing venv, wrong port, missing deps), fix only YOUR code — do NOT rewrite the test runner or package.json scripts\n4. Run the tests again to verify\n5. Commit your fixes with a DESCRIPTIVE message that explains WHAT you fixed and WHY it failed.\n Format: fix(#${issueNum}): <what you changed> — <why it was failing>\n Example: fix(#${issueNum}): use port 5435 for postgres — default 5432 conflicts with host service\n DO NOT use generic messages like "fix: resolve test failures"`;
460
634
  // Trace fix prompt
461
635
  tracePrompt(session.name, issueNum, `fix-${attempt}`, fixPrompt);
462
- const fixResult = await spawnAgent({
636
+ const fixCtx = stageCtx('test_write');
637
+ const fixResult = await spawnStageAgent({
463
638
  agent: config.agent,
464
639
  model: config.model,
465
640
  prompt: fixPrompt,
@@ -468,10 +643,11 @@ Do NOT redo work that is already committed. Build on top of existing progress.\n
468
643
  logFile: join(session.logsDir, `issue-${issueNum}-fix-${attempt}.log`),
469
644
  verbose: config.verbose,
470
645
  timeout: config.agentTimeout * 1000,
471
- });
646
+ }, config, fixCtx);
472
647
  // Trace fix output and costs
473
648
  traceOutput(session.name, issueNum, `fix-${attempt}`, fixResult.output);
474
649
  stepCosts.push(buildStepCost('test_fix', issueNum, fixResult, config));
650
+ recordStageTelemetry(session, issueNum, 'test_fix', fixResult, config, fixCtx);
475
651
  stepsCompleted.push(`fix-${attempt}`);
476
652
  // Auto-commit fixes
477
653
  const fixStatus = exec('git status --porcelain', { cwd: worktreePath });
@@ -519,7 +695,8 @@ Do NOT redo work that is already committed. Build on top of existing progress.\n
519
695
  });
520
696
  // Trace review prompt
521
697
  tracePrompt(session.name, issueNum, `review${attempt > 1 ? `-${attempt}` : ''}`, reviewPrompt);
522
- const reviewResult = await spawnAgent({
698
+ const reviewCtx = stageCtx('review');
699
+ const reviewResult = await spawnStageAgent({
523
700
  agent: config.agent,
524
701
  model: config.reviewModel,
525
702
  prompt: reviewPrompt,
@@ -527,10 +704,11 @@ Do NOT redo work that is already committed. Build on top of existing progress.\n
527
704
  logFile: join(session.logsDir, `issue-${issueNum}-review${attempt > 1 ? `-${attempt}` : ''}.log`),
528
705
  verbose: config.verbose,
529
706
  timeout: config.agentTimeout * 1000,
530
- });
707
+ }, config, reviewCtx);
531
708
  // Trace review output and costs
532
709
  traceOutput(session.name, issueNum, `review${attempt > 1 ? `-${attempt}` : ''}`, reviewResult.output);
533
710
  stepCosts.push(buildStepCost('review', issueNum, reviewResult, config));
711
+ recordStageTelemetry(session, issueNum, 'review', reviewResult, config, reviewCtx);
534
712
  reviewOutput = reviewResult.output;
535
713
  }
536
714
  catch {
@@ -554,7 +732,8 @@ Do NOT redo work that is already committed. Build on top of existing progress.\n
554
732
  const fixPrompt = `The code review for issue #${issueNum} found problems that need to be fixed.\n\n${findings}\n\nInstructions:\n1. Address each finding listed above\n2. Run tests to make sure nothing is broken\n3. Commit your fixes with: git commit -m "fix(#${issueNum}): address review findings"`;
555
733
  // Trace review-fix prompt
556
734
  tracePrompt(session.name, issueNum, `review-fix-${attempt}`, fixPrompt);
557
- const reviewFixResult = await spawnAgent({
735
+ const reviewFixCtx = stageCtx('build');
736
+ const reviewFixResult = await spawnStageAgent({
558
737
  agent: config.agent,
559
738
  model: config.model,
560
739
  prompt: fixPrompt,
@@ -563,10 +742,11 @@ Do NOT redo work that is already committed. Build on top of existing progress.\n
563
742
  logFile: join(session.logsDir, `issue-${issueNum}-review-fix-${attempt}.log`),
564
743
  verbose: config.verbose,
565
744
  timeout: config.agentTimeout * 1000,
566
- });
745
+ }, config, reviewFixCtx);
567
746
  // Trace review-fix output and costs
568
747
  traceOutput(session.name, issueNum, `review-fix-${attempt}`, reviewFixResult.output);
569
748
  stepCosts.push(buildStepCost('review', issueNum, reviewFixResult, config));
749
+ recordStageTelemetry(session, issueNum, 'review_fix', reviewFixResult, config, reviewFixCtx);
570
750
  // Auto-commit if agent didn't
571
751
  const fixStatus = exec('git status --porcelain', { cwd: worktreePath });
572
752
  if (fixStatus.stdout.trim()) {
@@ -647,7 +827,8 @@ Do NOT redo work that is already committed. Build on top of existing progress.\n
647
827
  const fixPrompt = `Live verification failed for issue #${issueNum} (attempt ${attempt} of ${config.maxTestRetries}).\n\n${findings}\n\nInstructions:\n1. Read the verification findings and identify the ROOT CAUSE\n2. Fix ONLY code related to issue #${issueNum}\n3. Run tests to make sure nothing is broken\n4. Commit your fixes with: git commit -m "fix(#${issueNum}): address verification findings"`;
648
828
  // Trace verify-fix prompt
649
829
  tracePrompt(session.name, issueNum, `verify-fix-${attempt}`, fixPrompt);
650
- const verifyFixResult = await spawnAgent({
830
+ const verifyFixCtx = stageCtx('test_exec');
831
+ const verifyFixResult = await spawnStageAgent({
651
832
  agent: config.agent,
652
833
  model: config.model,
653
834
  prompt: fixPrompt,
@@ -656,10 +837,11 @@ Do NOT redo work that is already committed. Build on top of existing progress.\n
656
837
  logFile: join(session.logsDir, `issue-${issueNum}-verify-fix-${attempt}.log`),
657
838
  verbose: config.verbose,
658
839
  timeout: config.agentTimeout * 1000,
659
- });
840
+ }, config, verifyFixCtx);
660
841
  // Trace verify-fix output and costs
661
842
  traceOutput(session.name, issueNum, `verify-fix-${attempt}`, verifyFixResult.output);
662
843
  stepCosts.push(buildStepCost('verify', issueNum, verifyFixResult, config));
844
+ recordStageTelemetry(session, issueNum, 'verify_fix', verifyFixResult, config, verifyFixCtx);
663
845
  // Auto-commit if agent didn't
664
846
  const fixStatus = exec('git status --porcelain', { cwd: worktreePath });
665
847
  if (fixStatus.stdout.trim()) {
@@ -749,6 +931,9 @@ Do NOT redo work that is already committed. Build on top of existing progress.\n
749
931
  });
750
932
  traceOutput(session.name, issueNum, 'assumptions', assumptionsResult.output);
751
933
  stepCosts.push(buildStepCost('assumptions', issueNum, assumptionsResult, config));
934
+ recordStageTelemetry(session, issueNum, 'assumptions', assumptionsResult, config, {
935
+ profile: selectRoutingProfile(config, issueNum),
936
+ });
752
937
  if (assumptionsResult.exitCode === 0 && assumptionsResult.output.trim()) {
753
938
  commentIssue(config.repo, issueNum, `## AI Implementation Notes\n\n${assumptionsResult.output.trim()}\n\n---\n_Posted by alpha-loop for user validation._`);
754
939
  log.success('Posted assumptions/decisions comment');
@@ -912,6 +1097,7 @@ Do NOT redo work that is already committed. Build on top of existing progress.\n
912
1097
  verifySkipped,
913
1098
  duration,
914
1099
  filesChanged,
1100
+ escalationEvents: escalationEvents.length > 0 ? escalationEvents : undefined,
915
1101
  };
916
1102
  // Save result to session
917
1103
  saveResult(session, result);
@@ -1019,6 +1205,9 @@ Do NOT redo work that is already committed. Build on top of existing progress.\n
1019
1205
  });
1020
1206
  traceOutput(session.name, issues[0].number, 'batch-plan', planResult.output);
1021
1207
  stepCosts.push(buildStepCost('plan', issues[0].number, planResult, config));
1208
+ recordStageTelemetry(session, issues[0].number, 'batch-plan', planResult, config, {
1209
+ profile: selectRoutingProfile(config, issues[0].number),
1210
+ });
1022
1211
  if (planResult.exitCode !== 0 && isTransientError(planResult.output)) {
1023
1212
  log.warn('Agent hit a transient error during batch planning — re-queuing all issues');
1024
1213
  for (const issue of issues)
@@ -1073,6 +1262,9 @@ Do NOT redo work that is already committed. Build on top of existing progress.\n
1073
1262
  });
1074
1263
  traceOutput(session.name, issues[0].number, 'batch-implement', implResult.output);
1075
1264
  stepCosts.push(buildStepCost('implement', issues[0].number, implResult, config));
1265
+ recordStageTelemetry(session, issues[0].number, 'batch-implement', implResult, config, {
1266
+ profile: selectRoutingProfile(config, issues[0].number),
1267
+ });
1076
1268
  if (implResult.exitCode !== 0) {
1077
1269
  // Auto-commit any uncommitted work before deciding on cleanup
1078
1270
  const dirtyCheck = exec('git status --porcelain', { cwd: worktreePath });
@@ -1152,6 +1344,9 @@ Do NOT redo work that is already committed. Build on top of existing progress.\n
1152
1344
  });
1153
1345
  traceOutput(session.name, issues[0].number, `batch-fix-${attempt}`, fixResult.output);
1154
1346
  stepCosts.push(buildStepCost('test_fix', issues[0].number, fixResult, config));
1347
+ recordStageTelemetry(session, issues[0].number, 'batch-test_fix', fixResult, config, {
1348
+ profile: selectRoutingProfile(config, issues[0].number),
1349
+ });
1155
1350
  const fixStatus = exec('git status --porcelain', { cwd: worktreePath });
1156
1351
  if (fixStatus.stdout.trim()) {
1157
1352
  exec('git add -A', { cwd: worktreePath });
@@ -1193,6 +1388,9 @@ Do NOT redo work that is already committed. Build on top of existing progress.\n
1193
1388
  });
1194
1389
  traceOutput(session.name, issues[0].number, `batch-review${attempt > 1 ? `-${attempt}` : ''}`, reviewResult.output);
1195
1390
  stepCosts.push(buildStepCost('review', issues[0].number, reviewResult, config));
1391
+ recordStageTelemetry(session, issues[0].number, 'batch-review', reviewResult, config, {
1392
+ profile: selectRoutingProfile(config, issues[0].number),
1393
+ });
1196
1394
  reviewOutput = reviewResult.output;
1197
1395
  }
1198
1396
  catch {
@@ -1224,6 +1422,9 @@ Do NOT redo work that is already committed. Build on top of existing progress.\n
1224
1422
  });
1225
1423
  traceOutput(session.name, issues[0].number, `batch-review-fix-${attempt}`, reviewFixResult.output);
1226
1424
  stepCosts.push(buildStepCost('review', issues[0].number, reviewFixResult, config));
1425
+ recordStageTelemetry(session, issues[0].number, 'batch-review_fix', reviewFixResult, config, {
1426
+ profile: selectRoutingProfile(config, issues[0].number),
1427
+ });
1227
1428
  const fixStatus = exec('git status --porcelain', { cwd: worktreePath });
1228
1429
  if (fixStatus.stdout.trim()) {
1229
1430
  exec('git add -A', { cwd: worktreePath });