aiden-runtime 4.1.5 → 4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. package/README.md +250 -847
  2. package/dist/api/server.js +32 -5
  3. package/dist/cli/v4/aidenCLI.js +351 -53
  4. package/dist/cli/v4/callbacks.js +170 -0
  5. package/dist/cli/v4/chatSession.js +138 -3
  6. package/dist/cli/v4/commands/_runtimeToggleHelpers.js +92 -0
  7. package/dist/cli/v4/commands/browserDepth.js +45 -0
  8. package/dist/cli/v4/commands/cron.js +264 -0
  9. package/dist/cli/v4/commands/daemon.js +541 -0
  10. package/dist/cli/v4/commands/daemonStatus.js +253 -0
  11. package/dist/cli/v4/commands/help.js +7 -0
  12. package/dist/cli/v4/commands/index.js +20 -1
  13. package/dist/cli/v4/commands/runs.js +203 -0
  14. package/dist/cli/v4/commands/sandbox.js +48 -0
  15. package/dist/cli/v4/commands/suggestions.js +68 -0
  16. package/dist/cli/v4/commands/tce.js +41 -0
  17. package/dist/cli/v4/commands/trigger.js +378 -0
  18. package/dist/cli/v4/commands/update.js +95 -3
  19. package/dist/cli/v4/daemonAgentBuilder.js +142 -0
  20. package/dist/cli/v4/defaultSoul.js +1 -1
  21. package/dist/cli/v4/display/capabilityCard.js +26 -0
  22. package/dist/cli/v4/display.js +18 -8
  23. package/dist/cli/v4/replyRenderer.js +31 -23
  24. package/dist/cli/v4/updateBootPrompt.js +170 -0
  25. package/dist/core/playwrightBridge.js +129 -0
  26. package/dist/core/v4/aidenAgent.js +308 -4
  27. package/dist/core/v4/browserState.js +436 -0
  28. package/dist/core/v4/checkpoint.js +79 -0
  29. package/dist/core/v4/daemon/bootstrap.js +604 -0
  30. package/dist/core/v4/daemon/cleanShutdown.js +154 -0
  31. package/dist/core/v4/daemon/cron/cronBridge.js +126 -0
  32. package/dist/core/v4/daemon/cron/cronEmitter.js +173 -0
  33. package/dist/core/v4/daemon/cron/migration.js +199 -0
  34. package/dist/core/v4/daemon/cron/misfirePolicy.js +115 -0
  35. package/dist/core/v4/daemon/daemonConfig.js +90 -0
  36. package/dist/core/v4/daemon/db/connection.js +106 -0
  37. package/dist/core/v4/daemon/db/migrations.js +296 -0
  38. package/dist/core/v4/daemon/db/schema/v1.spec.js +18 -0
  39. package/dist/core/v4/daemon/dispatcher/agentRunner.js +98 -0
  40. package/dist/core/v4/daemon/dispatcher/budgetGate.js +127 -0
  41. package/dist/core/v4/daemon/dispatcher/daemonApproval.js +113 -0
  42. package/dist/core/v4/daemon/dispatcher/dailyBudgetTracker.js +120 -0
  43. package/dist/core/v4/daemon/dispatcher/dispatcher.js +389 -0
  44. package/dist/core/v4/daemon/dispatcher/fireRateLimiter.js +113 -0
  45. package/dist/core/v4/daemon/dispatcher/index.js +53 -0
  46. package/dist/core/v4/daemon/dispatcher/promptTemplate.js +95 -0
  47. package/dist/core/v4/daemon/dispatcher/realAgentRunner.js +356 -0
  48. package/dist/core/v4/daemon/dispatcher/resolveModel.js +93 -0
  49. package/dist/core/v4/daemon/dispatcher/sessionId.js +93 -0
  50. package/dist/core/v4/daemon/drain.js +156 -0
  51. package/dist/core/v4/daemon/eventLoopLag.js +73 -0
  52. package/dist/core/v4/daemon/health.js +159 -0
  53. package/dist/core/v4/daemon/idempotencyStore.js +204 -0
  54. package/dist/core/v4/daemon/index.js +179 -0
  55. package/dist/core/v4/daemon/instanceTracker.js +99 -0
  56. package/dist/core/v4/daemon/resourceRegistry.js +150 -0
  57. package/dist/core/v4/daemon/restartCode.js +32 -0
  58. package/dist/core/v4/daemon/restartFailureCounter.js +77 -0
  59. package/dist/core/v4/daemon/runStore.js +114 -0
  60. package/dist/core/v4/daemon/runtimeLock.js +167 -0
  61. package/dist/core/v4/daemon/signals.js +50 -0
  62. package/dist/core/v4/daemon/supervisor.js +272 -0
  63. package/dist/core/v4/daemon/triggerBus.js +279 -0
  64. package/dist/core/v4/daemon/triggers/email/allowlist.js +70 -0
  65. package/dist/core/v4/daemon/triggers/email/automatedSender.js +78 -0
  66. package/dist/core/v4/daemon/triggers/email/bodyExtractor.js +0 -0
  67. package/dist/core/v4/daemon/triggers/email/emailSeenStore.js +99 -0
  68. package/dist/core/v4/daemon/triggers/email/emailSpec.js +107 -0
  69. package/dist/core/v4/daemon/triggers/email/imapConnection.js +211 -0
  70. package/dist/core/v4/daemon/triggers/email/index.js +332 -0
  71. package/dist/core/v4/daemon/triggers/email/seenUids.js +60 -0
  72. package/dist/core/v4/daemon/triggers/fileObservationsStore.js +93 -0
  73. package/dist/core/v4/daemon/triggers/fileWatcher.js +253 -0
  74. package/dist/core/v4/daemon/triggers/fileWatcherSpec.js +88 -0
  75. package/dist/core/v4/daemon/triggers/fsIdentity.js +42 -0
  76. package/dist/core/v4/daemon/triggers/globMatcher.js +100 -0
  77. package/dist/core/v4/daemon/triggers/reconcile.js +206 -0
  78. package/dist/core/v4/daemon/triggers/settleStat.js +81 -0
  79. package/dist/core/v4/daemon/triggers/webhook.js +376 -0
  80. package/dist/core/v4/daemon/triggers/webhookDeliveriesStore.js +109 -0
  81. package/dist/core/v4/daemon/triggers/webhookIdempotency.js +72 -0
  82. package/dist/core/v4/daemon/triggers/webhookRateLimit.js +56 -0
  83. package/dist/core/v4/daemon/triggers/webhookSpec.js +76 -0
  84. package/dist/core/v4/daemon/triggers/webhookVerifier.js +128 -0
  85. package/dist/core/v4/daemon/types.js +15 -0
  86. package/dist/core/v4/dockerSession.js +461 -0
  87. package/dist/core/v4/dryRun.js +117 -0
  88. package/dist/core/v4/failureClassifier.js +779 -0
  89. package/dist/core/v4/recoveryReport.js +449 -0
  90. package/dist/core/v4/runtimeToggles.js +187 -0
  91. package/dist/core/v4/sandboxConfig.js +285 -0
  92. package/dist/core/v4/sandboxFs.js +316 -0
  93. package/dist/core/v4/suggestionCatalog.js +41 -0
  94. package/dist/core/v4/suggestionEngine.js +210 -0
  95. package/dist/core/v4/toolRegistry.js +18 -0
  96. package/dist/core/v4/turnState.js +587 -0
  97. package/dist/core/v4/update/checkUpdate.js +63 -3
  98. package/dist/core/v4/update/installMethodDetect.js +115 -0
  99. package/dist/core/v4/update/registryClient.js +121 -0
  100. package/dist/core/v4/update/skipState.js +75 -0
  101. package/dist/core/v4/verifier.js +448 -0
  102. package/dist/core/version.js +1 -1
  103. package/dist/tools/v4/browser/_observer.js +224 -0
  104. package/dist/tools/v4/browser/browserBlocker.js +396 -0
  105. package/dist/tools/v4/browser/browserClick.js +18 -1
  106. package/dist/tools/v4/browser/browserClose.js +18 -1
  107. package/dist/tools/v4/browser/browserExtract.js +5 -1
  108. package/dist/tools/v4/browser/browserFill.js +17 -1
  109. package/dist/tools/v4/browser/browserGetUrl.js +5 -1
  110. package/dist/tools/v4/browser/browserNavigate.js +16 -1
  111. package/dist/tools/v4/browser/browserScreenshot.js +5 -1
  112. package/dist/tools/v4/browser/browserScroll.js +18 -1
  113. package/dist/tools/v4/browser/browserType.js +17 -1
  114. package/dist/tools/v4/browser/captchaCheck.js +5 -1
  115. package/dist/tools/v4/executeCode.js +1 -0
  116. package/dist/tools/v4/files/fileCopy.js +56 -2
  117. package/dist/tools/v4/files/fileDelete.js +38 -1
  118. package/dist/tools/v4/files/fileList.js +12 -1
  119. package/dist/tools/v4/files/fileMove.js +59 -2
  120. package/dist/tools/v4/files/filePatch.js +43 -1
  121. package/dist/tools/v4/files/fileRead.js +12 -1
  122. package/dist/tools/v4/files/fileWrite.js +41 -1
  123. package/dist/tools/v4/index.js +71 -58
  124. package/dist/tools/v4/memory/memoryAdd.js +14 -0
  125. package/dist/tools/v4/memory/memoryRemove.js +14 -0
  126. package/dist/tools/v4/memory/memoryReplace.js +15 -0
  127. package/dist/tools/v4/memory/sessionSummary.js +12 -0
  128. package/dist/tools/v4/process/processKill.js +19 -0
  129. package/dist/tools/v4/process/processList.js +1 -0
  130. package/dist/tools/v4/process/processLogRead.js +1 -0
  131. package/dist/tools/v4/process/processSpawn.js +13 -0
  132. package/dist/tools/v4/process/processWait.js +1 -0
  133. package/dist/tools/v4/sessions/recallSession.js +1 -0
  134. package/dist/tools/v4/sessions/sessionList.js +1 -0
  135. package/dist/tools/v4/sessions/sessionSearch.js +1 -0
  136. package/dist/tools/v4/skills/lookupToolSchema.js +2 -0
  137. package/dist/tools/v4/skills/skillManage.js +13 -0
  138. package/dist/tools/v4/skills/skillView.js +1 -0
  139. package/dist/tools/v4/skills/skillsList.js +1 -0
  140. package/dist/tools/v4/subagent/subagentFanout.js +1 -0
  141. package/dist/tools/v4/system/aidenSelfUpdate.js +16 -0
  142. package/dist/tools/v4/system/appClose.js +13 -0
  143. package/dist/tools/v4/system/appInput.js +13 -0
  144. package/dist/tools/v4/system/appLaunch.js +13 -0
  145. package/dist/tools/v4/system/clipboardRead.js +1 -0
  146. package/dist/tools/v4/system/clipboardWrite.js +14 -0
  147. package/dist/tools/v4/system/mediaKey.js +12 -0
  148. package/dist/tools/v4/system/mediaSessions.js +1 -0
  149. package/dist/tools/v4/system/mediaTransport.js +13 -0
  150. package/dist/tools/v4/system/naturalEvents.js +1 -0
  151. package/dist/tools/v4/system/nowPlaying.js +1 -0
  152. package/dist/tools/v4/system/osProcessList.js +1 -0
  153. package/dist/tools/v4/system/screenshot.js +1 -0
  154. package/dist/tools/v4/system/systemInfo.js +1 -0
  155. package/dist/tools/v4/system/volumeSet.js +17 -0
  156. package/dist/tools/v4/terminal/shellExec.js +81 -9
  157. package/dist/tools/v4/web/deepResearch.js +1 -0
  158. package/dist/tools/v4/web/openUrl.js +1 -0
  159. package/dist/tools/v4/web/webFetch.js +1 -0
  160. package/dist/tools/v4/web/webPage.js +1 -0
  161. package/dist/tools/v4/web/webSearch.js +1 -0
  162. package/dist/tools/v4/web/youtubeSearch.js +1 -0
  163. package/package.json +7 -1
@@ -42,6 +42,32 @@
42
42
  */
43
43
  Object.defineProperty(exports, "__esModule", { value: true });
44
44
  exports.AidenAgent = void 0;
45
+ // v4.1.6 spike — Task Completion Engine (TCE) per-turn loop detector
46
+ // + recovery controller. Default ON as of v4.2 Phase 6 — set
47
+ // AIDEN_TCE=0 to disable. Zero
48
+ // behavioral change when unset. See core/v4/turnState.ts.
49
+ const turnState_1 = require("./turnState");
50
+ // v4.2 Phase 1 — per-tool result verifier. Same TCE gate as
51
+ // TurnState (default ON, opt-out via AIDEN_TCE=0); classification
52
+ // feeds the recovery controller.
53
+ const verifier_1 = require("./verifier");
54
+ // v4.2 Phase 2 — tool-failure WHY-classifier. Runs after the verifier
55
+ // when verification.ok === false. Records-only; Phase 3 will act.
56
+ const failureClassifier_1 = require("./failureClassifier");
57
+ // v4.2 Phase 3 — structured RecoveryReport. Built ONLY when the
58
+ // recovery controller's surface stage fires (tool_loop); enriches the
59
+ // existing surface card with summary + category breakdown + dominant
60
+ // guidance. Implicitly gated by TCE being enabled (surface only
61
+ // reachable when TurnState is enabled — default ON as of Phase 6).
62
+ const recoveryReport_1 = require("./recoveryReport");
63
+ // v4.2 Phase 4 — checkpoint / restore. Lets the recovery controller
64
+ // roll conversation messages + TurnState internals back to before a
65
+ // looping tool started failing, so the model retries from a clean
66
+ // baseline. Hard-blocked on iterations containing mutating tools
67
+ // (never claim to undo executed side effects). All-no-op when
68
+ // TCE is opted out via AIDEN_TCE=0 — capture / mark / find /
69
+ // restore all short-circuit.
70
+ const checkpoint_1 = require("./checkpoint");
45
71
  const skillEnforcement_1 = require("./agent/skillEnforcement");
46
72
  const urlProvenance_1 = require("./agent/urlProvenance");
47
73
  const intentPreArm_1 = require("./agent/intentPreArm");
@@ -92,6 +118,7 @@ class AidenAgent {
92
118
  this.onSkillCandidate = opts.onSkillCandidate;
93
119
  this.resolveVerifiedFlag = opts.resolveVerifiedFlag;
94
120
  this.resolveToolset = opts.resolveToolset;
121
+ this.resolveMutates = opts.resolveMutates;
95
122
  this.promptBuilder = opts.promptBuilder;
96
123
  this.promptBuilderOptions = opts.promptBuilderOptions;
97
124
  this.contextCompressor = opts.contextCompressor;
@@ -108,6 +135,15 @@ class AidenAgent {
108
135
  this.onPromptBuilt = opts.onPromptBuilt;
109
136
  this.onProviderRequestStart = opts.onProviderRequestStart;
110
137
  this.lookupSkillRequiredTools = opts.lookupSkillRequiredTools;
138
+ // v4.5 Phase 7 — explicit sessionId. Existing access path
139
+ // `(this as { sessionId?: string }).sessionId` at line 751–752
140
+ // already reads from `this.sessionId`; setting it here keys
141
+ // docker / browser / TurnState per session for daemon-mode
142
+ // turns. Interactive REPL callers don't pass this and continue
143
+ // hitting the 'session' fallback.
144
+ if (typeof opts.sessionId === 'string' && opts.sessionId.length > 0) {
145
+ this.sessionId = opts.sessionId;
146
+ }
111
147
  // Phase v4.1.2-slice3: optional health registry (constructor-
112
148
  // injected per the slice3 decision tree — no singleton). When
113
149
  // wired, the caller already plumbed trackers into each subsystem
@@ -304,7 +340,21 @@ class AidenAgent {
304
340
  }
305
341
  }
306
342
  // 10. SkillTeacher post-loop observation + proposal.
343
+ //
344
+ // v4.1.6 Polish 2 — `handleProposal` previously ran INLINE here,
345
+ // awaiting `callbacks.promptUser` (an inquirer modal) before
346
+ // `runConversation` returned. That made the modal fire BEFORE
347
+ // chatSession rendered the agent's reply on screen, so users
348
+ // saw "Save this as a reusable skill?" pop up mid-turn — feels
349
+ // like an interruption.
350
+ //
351
+ // New flow: agent ONLY observes here. When a proposal needs user
352
+ // confirmation (tier_3_propose with a promptUser callback), the
353
+ // proposal is surfaced in `AidenAgentResult.skillProposal` and
354
+ // chatSession handles the prompt + create dance AFTER rendering
355
+ // the reply. Tier_4_auto still runs inline (no prompt needed).
307
356
  let skillCreated;
357
+ let skillProposal;
308
358
  if (this.skillTeacher) {
309
359
  try {
310
360
  const traceForTeacher = loopResult.toolCallTrace.map((entry, i) => ({
@@ -316,9 +366,20 @@ class AidenAgent {
316
366
  }));
317
367
  const proposal = await this.skillTeacher.observeTurn(history, traceForTeacher, loopResult.finishReason !== 'stop');
318
368
  if (proposal) {
319
- const result = await this.skillTeacher.handleProposal(proposal, this.skillTeacherCallbacks);
320
- if (result.created && result.skillName) {
321
- skillCreated = result.skillName;
369
+ // Defer to chatSession only when there's a prompt callback
370
+ // wired (tier_3_propose path). Otherwise run inline to
371
+ // preserve tier_4_auto and tier_off behaviour.
372
+ const hasPromptCallback = typeof this.skillTeacherCallbacks?.promptUser === 'function';
373
+ if (hasPromptCallback) {
374
+ // Surface the proposal back to chatSession; do NOT call
375
+ // handleProposal here.
376
+ skillProposal = proposal;
377
+ }
378
+ else {
379
+ const result = await this.skillTeacher.handleProposal(proposal, this.skillTeacherCallbacks);
380
+ if (result.created && result.skillName) {
381
+ skillCreated = result.skillName;
382
+ }
322
383
  }
323
384
  }
324
385
  }
@@ -369,11 +430,20 @@ class AidenAgent {
369
430
  toolCallTrace: loopResult.toolCallTrace,
370
431
  honestyFindings,
371
432
  skillCreated,
433
+ // v4.1.6 Polish 2 — deferred to chatSession's post-render
434
+ // handler when the SkillTeacher proposal needs user
435
+ // confirmation. Undefined when no proposal, when tier auto-
436
+ // handled inline, or when the teacher's observation faulted.
437
+ skillProposal,
372
438
  compressionEvents: this.compressionEvents,
373
439
  auxiliaryUsage: this.auxiliaryClient?.getUsage() ?? {},
374
440
  skillEnforcement: { ...this.skillEnforcementMetrics },
375
441
  urlProvenance: { ...this.urlProvenanceMetrics },
376
442
  emptyResponse: { ...this.emptyResponseMetrics },
443
+ // v4.1.6 spike (TCE) — surfaced when TurnState hit the surface
444
+ // threshold mid-turn. chatSession reads this to render the
445
+ // structured-failure card; undefined on all other finishReasons.
446
+ toolLoopCard: loopResult.toolLoopCard,
377
447
  };
378
448
  }
379
449
  // ── Private helpers ──────────────────────────────────────────────────
@@ -465,6 +535,10 @@ class AidenAgent {
465
535
  // off the same entry index.
466
536
  const fullTrace = [];
467
537
  const totalUsage = { inputTokens: 0, outputTokens: 0 };
538
+ // v4.2 Phase 3 — turn start timestamp for RecoveryReport duration.
539
+ // Captured here so any code path (early-return / error / surface)
540
+ // can compute wallclock duration consistently.
541
+ const turnStartedAt = Date.now();
468
542
  let turnCount = 0;
469
543
  let toolCallCount = 0;
470
544
  let fallbackActivated = false;
@@ -473,7 +547,27 @@ class AidenAgent {
473
547
  let emptyRetriesUsed = 0;
474
548
  let finishReason = 'stop';
475
549
  let finalContent = '';
550
+ // v4.1.6 spike (TCE) — per-turn loop detection + recovery state.
551
+ // Default ON as of v4.2 Phase 6 — set AIDEN_TCE=0 to disable.
552
+ // When disabled, TurnState.recordToolCall short-circuits with
553
+ // `{kind: 'allow'}` and the entire v4.2 recovery surface stays
554
+ // dormant (zero behavioural change vs v4.1.6).
555
+ const turnState = new turnState_1.TurnState();
556
+ // v4.2 Phase 1 — per-tool verifier registry. Constructed
557
+ // unconditionally (cheap, no side effects) but only used to
558
+ // classify tool outcomes when TCE is enabled; verification args
559
+ // are passed to TurnState only inside the gated branch below.
560
+ const verifierRegistry = (0, verifier_1.buildDefaultRegistry)();
561
+ // v4.2 Phase 2 — per-tool failure classifier. Same gating as
562
+ // the verifier; only runs when verification.ok === false. Phase 2
563
+ // records-only — Phase 3 wires recovery actions off the category.
564
+ const failureClassifier = (0, failureClassifier_1.buildDefaultClassifier)();
565
+ let toolLoopCard = undefined;
476
566
  while (true) {
567
+ // v4.1.6 spike — decrement cooldown counters once per iteration
568
+ // so cooled-down tools eventually return to the schemas. No-op
569
+ // when TCE is disabled.
570
+ turnState.advanceIteration();
477
571
  if (turnCount >= this.maxTurns) {
478
572
  finishReason = 'budget_exhausted';
479
573
  break;
@@ -491,9 +585,22 @@ class AidenAgent {
491
585
  this.onBudgetWarning?.('warning', turnCount, this.maxTurns);
492
586
  }
493
587
  // ── Provider call (stream or non-stream) ──────────────────────────
588
+ //
589
+ // v4.1.6 spike (TCE) — filter cooled-down tools out of the
590
+ // schemas we send to the provider. The model literally cannot
591
+ // see (and therefore cannot request) a cooled-down tool until
592
+ // its cooldown counter decrements to zero via
593
+ // `turnState.advanceIteration()`. No-op when TCE disabled
594
+ // (`getCooledDownTools()` returns []).
595
+ let effectiveTools = tools;
596
+ const cooledDown = turnState.getCooledDownTools();
597
+ if (cooledDown.length > 0) {
598
+ const cdSet = new Set(cooledDown);
599
+ effectiveTools = tools.filter((t) => !cdSet.has(t.name));
600
+ }
494
601
  let output;
495
602
  try {
496
- output = await this.callProvider(messages, tools, runOptions);
603
+ output = await this.callProvider(messages, effectiveTools, runOptions);
497
604
  }
498
605
  catch (err) {
499
606
  const error = err instanceof Error ? err : new Error(String(err));
@@ -511,6 +618,25 @@ class AidenAgent {
511
618
  }
512
619
  totalUsage.inputTokens += output.usage?.inputTokens ?? 0;
513
620
  totalUsage.outputTokens += output.usage?.outputTokens ?? 0;
621
+ // v4.2 Phase 4 — capture the state going INTO this iteration's
622
+ // tool dispatch. MUST run BEFORE `messages.push(assistantMsg)`
623
+ // so the checkpoint represents "the conversation before the
624
+ // model decided to call this iteration's tools". If rollback
625
+ // fires later, truncating `messages.length` to
626
+ // `checkpoint.messages.length` drops the assistant tool_call
627
+ // message together with its tool result messages — preserving
628
+ // tool_call/tool_result pairing in the rolled-back state.
629
+ //
630
+ // Capturing AFTER the assistant push (the prior placement) was
631
+ // a real bug: rollback would leave the assistant tool_call in
632
+ // history without its tool results, producing strict-provider
633
+ // 400 errors of the form "No tool output found for function
634
+ // call <id>". Tests in tests/v4/core/checkpoint-integration
635
+ // assert the post-rollback messages array contains zero orphan
636
+ // assistant tool_calls — this position is part of the contract.
637
+ //
638
+ // No-op when TCE is disabled (AIDEN_TCE=0) or checkpointDepth=0.
639
+ turnState.captureCheckpoint(messages, turnCount);
514
640
  // ── Append assistant message ──────────────────────────────────────
515
641
  const assistantMsg = output.toolCalls.length > 0
516
642
  ? { role: 'assistant', content: output.content ?? '', toolCalls: output.toolCalls }
@@ -585,8 +711,30 @@ class AidenAgent {
585
711
  }
586
712
  // ── Dispatch tools sequentially ──────────────────────────────────
587
713
  const turnToolMessages = [];
714
+ // v4.1.6 spike (TCE) — set when TurnState surfaces a tool_loop
715
+ // mid-batch. The agent stops dispatching remaining calls in the
716
+ // batch and breaks out of the outer iteration loop cleanly.
717
+ let surfaceDecision = null;
718
+ // v4.2 Phase 4 — set when TurnState's recovery controller asks
719
+ // for a rollback. The agent loop truncates messages + restores
720
+ // TurnState internals + pushes a corrective system message,
721
+ // then continues the outer iteration loop from a clean baseline.
722
+ let rollbackDecision = null;
588
723
  for (const call of output.toolCalls) {
589
724
  this.onToolCall?.(call, 'before');
725
+ // v4.2 Phase 4 — mark any active checkpoints as containing a
726
+ // mutating call BEFORE dispatch. Done pre-dispatch (not post)
727
+ // so that even if the tool throws / errors / produces a
728
+ // partial side effect, the mutation flag is set — rollback
729
+ // safety errs on the side of "this iteration mutated state".
730
+ // The mutability resolver is wired from the CLI's tool
731
+ // registry (`resolveMutates`); unknown tools return undefined,
732
+ // which we treat as non-mutating (leave the flag alone).
733
+ // Plugin authors should declare `mutates` honestly on their
734
+ // tool handlers — this is the structural enforcement point.
735
+ if (turnState.isEnabled() && this.resolveMutates?.(call.name) === true) {
736
+ turnState.markMutationOnLiveCheckpoint(call.name);
737
+ }
590
738
  let result;
591
739
  try {
592
740
  result = await this.toolExecutor(call);
@@ -600,11 +748,46 @@ class AidenAgent {
600
748
  };
601
749
  }
602
750
  toolCallCount += 1;
751
+ // v4.2 Phase 1 — verifier classification. Runs only when TCE
752
+ // is enabled; the registry resolves a per-tool verifier or
753
+ // falls back to the heuristic default. Synchronous + pure;
754
+ // no network, no side effects.
755
+ let verification;
756
+ let classification = null;
757
+ if (turnState.isEnabled()) {
758
+ try {
759
+ verification = verifierRegistry.resolve(call.name)(call.name, call.arguments, result);
760
+ }
761
+ catch {
762
+ // Defensive — a buggy verifier never breaks the agent loop.
763
+ verification = undefined;
764
+ }
765
+ // v4.2 Phase 2 — classify WHY when the verifier said !ok.
766
+ // classify(...) returns null for ok results, so happy-path
767
+ // calls incur zero classifier work.
768
+ if (verification && !verification.ok) {
769
+ try {
770
+ classification = failureClassifier.classify(verification, call.name, call.arguments, result);
771
+ }
772
+ catch {
773
+ // Defensive — a buggy classifier never breaks the loop.
774
+ classification = null;
775
+ }
776
+ }
777
+ }
603
778
  toolCallTrace.push({
604
779
  name: call.name,
605
780
  result: result.result,
606
781
  error: result.error,
607
782
  verified: this.resolveVerifiedFlag?.(result),
783
+ // v4.2 Phase 1 — verification surfaces alongside the trace
784
+ // entry for downstream callers (chatSession, loopTrace,
785
+ // future RecoveryReport). Undefined when TCE is off.
786
+ verification,
787
+ // v4.2 Phase 2 — classification surfaces alongside verification.
788
+ // Undefined for verifier-ok calls (classifier skips them) and
789
+ // when TCE is off.
790
+ classification: classification ?? undefined,
608
791
  });
609
792
  fullTrace.push({ name: call.name, args: call.arguments });
610
793
  // URL ledger ingest — extracts ids from result body for next turn.
@@ -623,6 +806,126 @@ class AidenAgent {
623
806
  ? `[error] ${result.error}`
624
807
  : stringifyToolResult(result.result),
625
808
  });
809
+ // v4.1.6 spike (TCE) — after the tool result lands in the
810
+ // message history, consult the recovery controller. Returns
811
+ // `allow` immediately when TCE disabled (zero overhead).
812
+ // v4.2 Phase 1 — pass the verifier outcome so TurnState's
813
+ // consecFailed counter can fast-fail on demonstrably failing
814
+ // tool calls before the slower signature/name counters fire.
815
+ // v4.2 Phase 2 — also pass the classification so TurnState
816
+ // records the WHY for Phase 3's RecoveryReport.
817
+ const recovery = turnState.recordToolCall(call.name, call.arguments, verification, classification);
818
+ if (recovery.kind === 'hint' && recovery.hintMessage) {
819
+ // Stage 1: append a corrective system message so the model
820
+ // sees it on the next provider call. Same pattern as the
821
+ // existing skill-enforcement + URL-provenance correctives.
822
+ turnToolMessages.push({
823
+ role: 'system',
824
+ content: recovery.hintMessage,
825
+ });
826
+ }
827
+ else if (recovery.kind === 'cooldown_with_rollback' && recovery.rollback) {
828
+ // v4.2 Phase 4 — controller asks us to roll back. Capture
829
+ // the decision; we apply it AFTER the inner dispatch loop
830
+ // exits so we don't leave partial turnToolMessages in a
831
+ // half-state. Break out of dispatch immediately — no point
832
+ // running more tools whose results we're about to drop.
833
+ rollbackDecision = recovery;
834
+ break;
835
+ }
836
+ else if (recovery.kind === 'cooldown' && recovery.cooldownMessage) {
837
+ // Stage 2: cooldown has already been recorded internally
838
+ // (next iteration's schema-filter step excludes this tool).
839
+ // Inject a system message announcing the cooldown so the
840
+ // model knows why the tool just disappeared from its menu.
841
+ turnToolMessages.push({
842
+ role: 'system',
843
+ content: recovery.cooldownMessage,
844
+ });
845
+ }
846
+ else if (recovery.kind === 'surface' && recovery.surfaceCard) {
847
+ // Stage 3: structured failure. Stop dispatching the rest of
848
+ // the batch — anything else is throwing good budget after
849
+ // bad. The outer loop reads `surfaceDecision` below and
850
+ // exits cleanly.
851
+ surfaceDecision = recovery;
852
+ break;
853
+ }
854
+ }
855
+ // v4.2 Phase 4 — apply rollback if the controller asked for it.
856
+ // Truncate messages to the captured snapshot length, restore
857
+ // TurnState internals, then push a corrective system message
858
+ // and continue the OUTER iteration loop. We deliberately drop
859
+ // any partial `turnToolMessages` collected before the rollback
860
+ // trigger — those are the noise we're trying to undo.
861
+ //
862
+ // Hard-block invariant: TurnState only emits
863
+ // `cooldown_with_rollback` when the target checkpoint has
864
+ // `containedMutations === false`, so we never get here for an
865
+ // iteration that ran a mutating tool. The optional
866
+ // `rollback.blockedBy` is empty in Phase 4 (kept on the type
867
+ // for a Phase 5+ soft-rollback variant).
868
+ if (rollbackDecision && rollbackDecision.rollback) {
869
+ const { checkpoint, blockedBy } = rollbackDecision.rollback;
870
+ // Truncate messages array to the captured length. The captured
871
+ // items are immutable Message references; we keep them as-is
872
+ // and just shorten the live array.
873
+ messages.length = checkpoint.messages.length;
874
+ // Restore TurnState mutable internals (stage / streaks /
875
+ // cooledDownTools / arrays). The cooled-down tools map is
876
+ // preserved as it was at checkpoint time — but the controller
877
+ // already added the looping tool to `cooledDownTools` before
878
+ // emitting the decision, so we need to RE-apply that cooldown
879
+ // after restore to honour the cooldown intent.
880
+ turnState.restoreInternalsFrom(checkpoint);
881
+ // Re-cool the tool that triggered the rollback so the next
882
+ // provider call sees the constrained schema.
883
+ if (rollbackDecision.toolName) {
884
+ turnState.reapplyCooldown(rollbackDecision.toolName);
885
+ }
886
+ // Inject corrective system message so the model sees what
887
+ // happened and why the tool just disappeared from its menu.
888
+ messages.push({
889
+ role: 'system',
890
+ content: (0, checkpoint_1.buildRollbackMessage)({
891
+ iteration: checkpoint.iteration,
892
+ toolName: rollbackDecision.toolName,
893
+ blockedBy,
894
+ }),
895
+ });
896
+ // Continue the outer iteration loop from the restored
897
+ // baseline. The next provider call gets the filtered tool
898
+ // schema (cooldown applied) and the corrective message.
899
+ continue;
900
+ }
901
+ // v4.1.6 spike (TCE) — terminal surface handling.
902
+ if (surfaceDecision && surfaceDecision.kind === 'surface') {
903
+ finishReason = 'tool_loop';
904
+ // v4.2 Phase 3 — enrich the base surface card with a
905
+ // structured RecoveryReport. Pure synthesis from TurnState's
906
+ // diagnostic snapshot + first-user-message goal + duration.
907
+ // Implicit gating: this branch is only reachable when
908
+ // TurnState is enabled, so AIDEN_TCE=0 (opt-out) never
909
+ // builds a report.
910
+ if (surfaceDecision.surfaceCard) {
911
+ const report = (0, recoveryReport_1.buildRecoveryReport)({
912
+ snapshot: turnState.getDiagnosticSnapshot(),
913
+ goal: (0, recoveryReport_1.extractGoal)(messages),
914
+ exitReason: 'tool_loop',
915
+ durationMs: Date.now() - turnStartedAt,
916
+ });
917
+ toolLoopCard = (0, recoveryReport_1.enrichCardWithReport)(surfaceDecision.surfaceCard, report);
918
+ }
919
+ else {
920
+ toolLoopCard = surfaceDecision.surfaceCard;
921
+ }
922
+ // Push the partial tool messages we collected so honesty +
923
+ // history downstream see the full sequence including the
924
+ // loop-trigger call. No final assistant message — the
925
+ // tool_loop card IS the user-facing surface.
926
+ messages.push(...turnToolMessages);
927
+ finalContent = '';
928
+ break;
626
929
  }
627
930
  // ── Iteration-budget injection on the LAST tool message ──────────
628
931
  if (this.iterationBudgetInjection && turnToolMessages.length > 0) {
@@ -645,6 +948,7 @@ class AidenAgent {
645
948
  totalUsage,
646
949
  toolCallTrace,
647
950
  fullTrace,
951
+ toolLoopCard,
648
952
  };
649
953
  }
650
954
  /**