aiden-runtime 4.1.4 → 4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. package/README.md +250 -847
  2. package/dist/api/server.js +32 -5
  3. package/dist/cli/v4/aidenCLI.js +379 -53
  4. package/dist/cli/v4/callbacks.js +248 -0
  5. package/dist/cli/v4/chatSession.js +292 -4
  6. package/dist/cli/v4/commands/_runtimeToggleHelpers.js +92 -0
  7. package/dist/cli/v4/commands/browserDepth.js +45 -0
  8. package/dist/cli/v4/commands/cron.js +264 -0
  9. package/dist/cli/v4/commands/daemon.js +541 -0
  10. package/dist/cli/v4/commands/daemonStatus.js +253 -0
  11. package/dist/cli/v4/commands/help.js +7 -0
  12. package/dist/cli/v4/commands/index.js +20 -1
  13. package/dist/cli/v4/commands/runs.js +203 -0
  14. package/dist/cli/v4/commands/sandbox.js +48 -0
  15. package/dist/cli/v4/commands/suggestions.js +68 -0
  16. package/dist/cli/v4/commands/tce.js +41 -0
  17. package/dist/cli/v4/commands/trigger.js +378 -0
  18. package/dist/cli/v4/commands/update.js +95 -3
  19. package/dist/cli/v4/daemonAgentBuilder.js +142 -0
  20. package/dist/cli/v4/defaultSoul.js +75 -3
  21. package/dist/cli/v4/display/capabilityCard.js +26 -0
  22. package/dist/cli/v4/display/progressBar.js +41 -8
  23. package/dist/cli/v4/display.js +258 -15
  24. package/dist/cli/v4/replyRenderer.js +31 -23
  25. package/dist/cli/v4/toolPreview.js +10 -0
  26. package/dist/cli/v4/updateBootPrompt.js +170 -0
  27. package/dist/core/playwrightBridge.js +129 -0
  28. package/dist/core/toolRegistry.js +7 -1
  29. package/dist/core/v4/aidenAgent.js +371 -4
  30. package/dist/core/v4/browserState.js +436 -0
  31. package/dist/core/v4/checkpoint.js +79 -0
  32. package/dist/core/v4/daemon/bootstrap.js +604 -0
  33. package/dist/core/v4/daemon/cleanShutdown.js +154 -0
  34. package/dist/core/v4/daemon/cron/cronBridge.js +126 -0
  35. package/dist/core/v4/daemon/cron/cronEmitter.js +173 -0
  36. package/dist/core/v4/daemon/cron/migration.js +199 -0
  37. package/dist/core/v4/daemon/cron/misfirePolicy.js +115 -0
  38. package/dist/core/v4/daemon/daemonConfig.js +90 -0
  39. package/dist/core/v4/daemon/db/connection.js +106 -0
  40. package/dist/core/v4/daemon/db/migrations.js +296 -0
  41. package/dist/core/v4/daemon/db/schema/v1.spec.js +18 -0
  42. package/dist/core/v4/daemon/dispatcher/agentRunner.js +98 -0
  43. package/dist/core/v4/daemon/dispatcher/budgetGate.js +127 -0
  44. package/dist/core/v4/daemon/dispatcher/daemonApproval.js +113 -0
  45. package/dist/core/v4/daemon/dispatcher/dailyBudgetTracker.js +120 -0
  46. package/dist/core/v4/daemon/dispatcher/dispatcher.js +389 -0
  47. package/dist/core/v4/daemon/dispatcher/fireRateLimiter.js +113 -0
  48. package/dist/core/v4/daemon/dispatcher/index.js +53 -0
  49. package/dist/core/v4/daemon/dispatcher/promptTemplate.js +95 -0
  50. package/dist/core/v4/daemon/dispatcher/realAgentRunner.js +356 -0
  51. package/dist/core/v4/daemon/dispatcher/resolveModel.js +93 -0
  52. package/dist/core/v4/daemon/dispatcher/sessionId.js +93 -0
  53. package/dist/core/v4/daemon/drain.js +156 -0
  54. package/dist/core/v4/daemon/eventLoopLag.js +73 -0
  55. package/dist/core/v4/daemon/health.js +159 -0
  56. package/dist/core/v4/daemon/idempotencyStore.js +204 -0
  57. package/dist/core/v4/daemon/index.js +179 -0
  58. package/dist/core/v4/daemon/instanceTracker.js +99 -0
  59. package/dist/core/v4/daemon/resourceRegistry.js +150 -0
  60. package/dist/core/v4/daemon/restartCode.js +32 -0
  61. package/dist/core/v4/daemon/restartFailureCounter.js +77 -0
  62. package/dist/core/v4/daemon/runStore.js +114 -0
  63. package/dist/core/v4/daemon/runtimeLock.js +167 -0
  64. package/dist/core/v4/daemon/signals.js +50 -0
  65. package/dist/core/v4/daemon/supervisor.js +272 -0
  66. package/dist/core/v4/daemon/triggerBus.js +279 -0
  67. package/dist/core/v4/daemon/triggers/email/allowlist.js +70 -0
  68. package/dist/core/v4/daemon/triggers/email/automatedSender.js +78 -0
  69. package/dist/core/v4/daemon/triggers/email/bodyExtractor.js +0 -0
  70. package/dist/core/v4/daemon/triggers/email/emailSeenStore.js +99 -0
  71. package/dist/core/v4/daemon/triggers/email/emailSpec.js +107 -0
  72. package/dist/core/v4/daemon/triggers/email/imapConnection.js +211 -0
  73. package/dist/core/v4/daemon/triggers/email/index.js +332 -0
  74. package/dist/core/v4/daemon/triggers/email/seenUids.js +60 -0
  75. package/dist/core/v4/daemon/triggers/fileObservationsStore.js +93 -0
  76. package/dist/core/v4/daemon/triggers/fileWatcher.js +253 -0
  77. package/dist/core/v4/daemon/triggers/fileWatcherSpec.js +88 -0
  78. package/dist/core/v4/daemon/triggers/fsIdentity.js +42 -0
  79. package/dist/core/v4/daemon/triggers/globMatcher.js +100 -0
  80. package/dist/core/v4/daemon/triggers/reconcile.js +206 -0
  81. package/dist/core/v4/daemon/triggers/settleStat.js +81 -0
  82. package/dist/core/v4/daemon/triggers/webhook.js +376 -0
  83. package/dist/core/v4/daemon/triggers/webhookDeliveriesStore.js +109 -0
  84. package/dist/core/v4/daemon/triggers/webhookIdempotency.js +72 -0
  85. package/dist/core/v4/daemon/triggers/webhookRateLimit.js +56 -0
  86. package/dist/core/v4/daemon/triggers/webhookSpec.js +76 -0
  87. package/dist/core/v4/daemon/triggers/webhookVerifier.js +128 -0
  88. package/dist/core/v4/daemon/types.js +15 -0
  89. package/dist/core/v4/dockerSession.js +461 -0
  90. package/dist/core/v4/dryRun.js +117 -0
  91. package/dist/core/v4/failureClassifier.js +779 -0
  92. package/dist/core/v4/loopTrace.js +257 -0
  93. package/dist/core/v4/recoveryReport.js +449 -0
  94. package/dist/core/v4/runtimeToggles.js +187 -0
  95. package/dist/core/v4/sandboxConfig.js +285 -0
  96. package/dist/core/v4/sandboxFs.js +316 -0
  97. package/dist/core/v4/suggestionCatalog.js +41 -0
  98. package/dist/core/v4/suggestionEngine.js +210 -0
  99. package/dist/core/v4/toolRegistry.js +18 -0
  100. package/dist/core/v4/turnState.js +587 -0
  101. package/dist/core/v4/update/checkUpdate.js +63 -3
  102. package/dist/core/v4/update/installMethodDetect.js +115 -0
  103. package/dist/core/v4/update/registryClient.js +121 -0
  104. package/dist/core/v4/update/skipState.js +75 -0
  105. package/dist/core/v4/verifier.js +448 -0
  106. package/dist/core/version.js +1 -1
  107. package/dist/core/webSearch.js +64 -24
  108. package/dist/tools/v4/browser/_observer.js +224 -0
  109. package/dist/tools/v4/browser/browserBlocker.js +396 -0
  110. package/dist/tools/v4/browser/browserClick.js +18 -1
  111. package/dist/tools/v4/browser/browserClose.js +18 -1
  112. package/dist/tools/v4/browser/browserExtract.js +5 -1
  113. package/dist/tools/v4/browser/browserFill.js +17 -1
  114. package/dist/tools/v4/browser/browserGetUrl.js +5 -1
  115. package/dist/tools/v4/browser/browserNavigate.js +16 -1
  116. package/dist/tools/v4/browser/browserScreenshot.js +5 -1
  117. package/dist/tools/v4/browser/browserScroll.js +18 -1
  118. package/dist/tools/v4/browser/browserType.js +17 -1
  119. package/dist/tools/v4/browser/captchaCheck.js +5 -1
  120. package/dist/tools/v4/executeCode.js +1 -0
  121. package/dist/tools/v4/files/fileCopy.js +56 -2
  122. package/dist/tools/v4/files/fileDelete.js +38 -1
  123. package/dist/tools/v4/files/fileList.js +12 -1
  124. package/dist/tools/v4/files/fileMove.js +59 -2
  125. package/dist/tools/v4/files/filePatch.js +43 -1
  126. package/dist/tools/v4/files/fileRead.js +12 -1
  127. package/dist/tools/v4/files/fileWrite.js +41 -1
  128. package/dist/tools/v4/index.js +71 -58
  129. package/dist/tools/v4/memory/memoryAdd.js +14 -0
  130. package/dist/tools/v4/memory/memoryRemove.js +14 -0
  131. package/dist/tools/v4/memory/memoryReplace.js +15 -0
  132. package/dist/tools/v4/memory/sessionSummary.js +12 -0
  133. package/dist/tools/v4/process/processKill.js +19 -0
  134. package/dist/tools/v4/process/processList.js +1 -0
  135. package/dist/tools/v4/process/processLogRead.js +1 -0
  136. package/dist/tools/v4/process/processSpawn.js +13 -0
  137. package/dist/tools/v4/process/processWait.js +1 -0
  138. package/dist/tools/v4/sessions/recallSession.js +1 -0
  139. package/dist/tools/v4/sessions/sessionList.js +1 -0
  140. package/dist/tools/v4/sessions/sessionSearch.js +1 -0
  141. package/dist/tools/v4/skills/lookupToolSchema.js +2 -0
  142. package/dist/tools/v4/skills/skillManage.js +13 -0
  143. package/dist/tools/v4/skills/skillView.js +1 -0
  144. package/dist/tools/v4/skills/skillsList.js +1 -0
  145. package/dist/tools/v4/subagent/subagentFanout.js +1 -0
  146. package/dist/tools/v4/system/aidenSelfUpdate.js +16 -0
  147. package/dist/tools/v4/system/appClose.js +13 -0
  148. package/dist/tools/v4/system/appInput.js +13 -0
  149. package/dist/tools/v4/system/appLaunch.js +13 -0
  150. package/dist/tools/v4/system/clipboardRead.js +1 -0
  151. package/dist/tools/v4/system/clipboardWrite.js +14 -0
  152. package/dist/tools/v4/system/mediaKey.js +12 -0
  153. package/dist/tools/v4/system/mediaSessions.js +1 -0
  154. package/dist/tools/v4/system/mediaTransport.js +13 -0
  155. package/dist/tools/v4/system/naturalEvents.js +1 -0
  156. package/dist/tools/v4/system/nowPlaying.js +1 -0
  157. package/dist/tools/v4/system/osProcessList.js +1 -0
  158. package/dist/tools/v4/system/screenshot.js +1 -0
  159. package/dist/tools/v4/system/systemInfo.js +1 -0
  160. package/dist/tools/v4/system/volumeSet.js +17 -0
  161. package/dist/tools/v4/terminal/shellExec.js +81 -9
  162. package/dist/tools/v4/web/deepResearch.js +1 -0
  163. package/dist/tools/v4/web/openUrl.js +1 -0
  164. package/dist/tools/v4/web/webFetch.js +1 -0
  165. package/dist/tools/v4/web/webPage.js +1 -0
  166. package/dist/tools/v4/web/webSearch.js +1 -0
  167. package/dist/tools/v4/web/youtubeSearch.js +1 -0
  168. package/package.json +7 -1
  169. package/plugins/aiden-plugin-cdp-browser/.granted-permissions.json +8 -0
@@ -42,6 +42,32 @@
42
42
  */
43
43
  Object.defineProperty(exports, "__esModule", { value: true });
44
44
  exports.AidenAgent = void 0;
45
+ // v4.1.6 spike — Task Completion Engine (TCE) per-turn loop detector
46
+ // + recovery controller. Default ON as of v4.2 Phase 6 — set
47
+ // AIDEN_TCE=0 to disable. Zero
48
+ // behavioral change when unset. See core/v4/turnState.ts.
49
+ const turnState_1 = require("./turnState");
50
+ // v4.2 Phase 1 — per-tool result verifier. Same TCE gate as
51
+ // TurnState (default ON, opt-out via AIDEN_TCE=0); classification
52
+ // feeds the recovery controller.
53
+ const verifier_1 = require("./verifier");
54
+ // v4.2 Phase 2 — tool-failure WHY-classifier. Runs after the verifier
55
+ // when verification.ok === false. Records-only; Phase 3 will act.
56
+ const failureClassifier_1 = require("./failureClassifier");
57
+ // v4.2 Phase 3 — structured RecoveryReport. Built ONLY when the
58
+ // recovery controller's surface stage fires (tool_loop); enriches the
59
+ // existing surface card with summary + category breakdown + dominant
60
+ // guidance. Implicitly gated by TCE being enabled (surface only
61
+ // reachable when TurnState is enabled — default ON as of Phase 6).
62
+ const recoveryReport_1 = require("./recoveryReport");
63
+ // v4.2 Phase 4 — checkpoint / restore. Lets the recovery controller
64
+ // roll conversation messages + TurnState internals back to before a
65
+ // looping tool started failing, so the model retries from a clean
66
+ // baseline. Hard-blocked on iterations containing mutating tools
67
+ // (never claim to undo executed side effects). All-no-op when
68
+ // TCE is opted out via AIDEN_TCE=0 — capture / mark / find /
69
+ // restore all short-circuit.
70
+ const checkpoint_1 = require("./checkpoint");
45
71
  const skillEnforcement_1 = require("./agent/skillEnforcement");
46
72
  const urlProvenance_1 = require("./agent/urlProvenance");
47
73
  const intentPreArm_1 = require("./agent/intentPreArm");
@@ -92,6 +118,7 @@ class AidenAgent {
92
118
  this.onSkillCandidate = opts.onSkillCandidate;
93
119
  this.resolveVerifiedFlag = opts.resolveVerifiedFlag;
94
120
  this.resolveToolset = opts.resolveToolset;
121
+ this.resolveMutates = opts.resolveMutates;
95
122
  this.promptBuilder = opts.promptBuilder;
96
123
  this.promptBuilderOptions = opts.promptBuilderOptions;
97
124
  this.contextCompressor = opts.contextCompressor;
@@ -103,7 +130,20 @@ class AidenAgent {
103
130
  this.onCompression = opts.onCompression;
104
131
  this.refreshMemorySnapshot = opts.refreshMemorySnapshot;
105
132
  this.onMemoryRefresh = opts.onMemoryRefresh;
133
+ // v4.1.5 Issue K — phase hooks (all optional, fire defensively).
134
+ this.onMemoryRefreshStart = opts.onMemoryRefreshStart;
135
+ this.onPromptBuilt = opts.onPromptBuilt;
136
+ this.onProviderRequestStart = opts.onProviderRequestStart;
106
137
  this.lookupSkillRequiredTools = opts.lookupSkillRequiredTools;
138
+ // v4.5 Phase 7 — explicit sessionId. Existing access path
139
+ // `(this as { sessionId?: string }).sessionId` at line 751–752
140
+ // already reads from `this.sessionId`; setting it here keys
141
+ // docker / browser / TurnState per session for daemon-mode
142
+ // turns. Interactive REPL callers don't pass this and continue
143
+ // hitting the 'session' fallback.
144
+ if (typeof opts.sessionId === 'string' && opts.sessionId.length > 0) {
145
+ this.sessionId = opts.sessionId;
146
+ }
107
147
  // Phase v4.1.2-slice3: optional health registry (constructor-
108
148
  // injected per the slice3 decision tree — no singleton). When
109
149
  // wired, the caller already plumbed trackers into each subsystem
@@ -300,7 +340,21 @@ class AidenAgent {
300
340
  }
301
341
  }
302
342
  // 10. SkillTeacher post-loop observation + proposal.
343
+ //
344
+ // v4.1.6 Polish 2 — `handleProposal` previously ran INLINE here,
345
+ // awaiting `callbacks.promptUser` (an inquirer modal) before
346
+ // `runConversation` returned. That made the modal fire BEFORE
347
+ // chatSession rendered the agent's reply on screen, so users
348
+ // saw "Save this as a reusable skill?" pop up mid-turn — feels
349
+ // like an interruption.
350
+ //
351
+ // New flow: agent ONLY observes here. When a proposal needs user
352
+ // confirmation (tier_3_propose with a promptUser callback), the
353
+ // proposal is surfaced in `AidenAgentResult.skillProposal` and
354
+ // chatSession handles the prompt + create dance AFTER rendering
355
+ // the reply. Tier_4_auto still runs inline (no prompt needed).
303
356
  let skillCreated;
357
+ let skillProposal;
304
358
  if (this.skillTeacher) {
305
359
  try {
306
360
  const traceForTeacher = loopResult.toolCallTrace.map((entry, i) => ({
@@ -312,9 +366,20 @@ class AidenAgent {
312
366
  }));
313
367
  const proposal = await this.skillTeacher.observeTurn(history, traceForTeacher, loopResult.finishReason !== 'stop');
314
368
  if (proposal) {
315
- const result = await this.skillTeacher.handleProposal(proposal, this.skillTeacherCallbacks);
316
- if (result.created && result.skillName) {
317
- skillCreated = result.skillName;
369
+ // Defer to chatSession only when there's a prompt callback
370
+ // wired (tier_3_propose path). Otherwise run inline to
371
+ // preserve tier_4_auto and tier_off behaviour.
372
+ const hasPromptCallback = typeof this.skillTeacherCallbacks?.promptUser === 'function';
373
+ if (hasPromptCallback) {
374
+ // Surface the proposal back to chatSession; do NOT call
375
+ // handleProposal here.
376
+ skillProposal = proposal;
377
+ }
378
+ else {
379
+ const result = await this.skillTeacher.handleProposal(proposal, this.skillTeacherCallbacks);
380
+ if (result.created && result.skillName) {
381
+ skillCreated = result.skillName;
382
+ }
318
383
  }
319
384
  }
320
385
  }
@@ -365,11 +430,20 @@ class AidenAgent {
365
430
  toolCallTrace: loopResult.toolCallTrace,
366
431
  honestyFindings,
367
432
  skillCreated,
433
+ // v4.1.6 Polish 2 — deferred to chatSession's post-render
434
+ // handler when the SkillTeacher proposal needs user
435
+ // confirmation. Undefined when no proposal, when tier auto-
436
+ // handled inline, or when the teacher's observation faulted.
437
+ skillProposal,
368
438
  compressionEvents: this.compressionEvents,
369
439
  auxiliaryUsage: this.auxiliaryClient?.getUsage() ?? {},
370
440
  skillEnforcement: { ...this.skillEnforcementMetrics },
371
441
  urlProvenance: { ...this.urlProvenanceMetrics },
372
442
  emptyResponse: { ...this.emptyResponseMetrics },
443
+ // v4.1.6 spike (TCE) — surfaced when TurnState hit the surface
444
+ // threshold mid-turn. chatSession reads this to render the
445
+ // structured-failure card; undefined on all other finishReasons.
446
+ toolLoopCard: loopResult.toolLoopCard,
373
447
  };
374
448
  }
375
449
  // ── Private helpers ──────────────────────────────────────────────────
@@ -386,6 +460,14 @@ class AidenAgent {
386
460
  // / 'user' need a snapshot refresh first.
387
461
  const needsSnapshot = this.memoryDirty.has('memory') || this.memoryDirty.has('user');
388
462
  if (needsSnapshot && this.refreshMemorySnapshot) {
463
+ // v4.1.5 Issue K — fire BEFORE the file I/O so the display layer
464
+ // can switch the activity verb to "refreshing memory" while the
465
+ // read is in flight. Defensive try/catch so a misbehaving hook
466
+ // never blocks the refresh.
467
+ try {
468
+ this.onMemoryRefreshStart?.();
469
+ }
470
+ catch { /* defensive */ }
389
471
  let snapshot;
390
472
  try {
391
473
  snapshot = await this.refreshMemorySnapshot();
@@ -410,6 +492,21 @@ class AidenAgent {
410
492
  if (this.cachedSystemPrompt !== null)
411
493
  return this.cachedSystemPrompt;
412
494
  this.cachedSystemPrompt = await this.promptBuilder.build(this.promptBuilderOptions);
495
+ // v4.1.5 Issue K — fire AFTER the prompt has been assembled, with
496
+ // cardinality so the display layer can surface "preparing prompt:
497
+ // N tools, M skills" or similar. Only fires when the cache MISSED
498
+ // (which is what made us actually build); cached returns skip the
499
+ // hook because nothing was prepared this turn. Defensive try/catch.
500
+ if (this.onPromptBuilt) {
501
+ try {
502
+ this.onPromptBuilt({
503
+ tools: this.tools.length,
504
+ skills: this.promptBuilderOptions.skillsList?.length ?? 0,
505
+ memoryFacts: countMemoryFacts(this.promptBuilderOptions.memorySnapshot),
506
+ });
507
+ }
508
+ catch { /* defensive */ }
509
+ }
413
510
  return this.cachedSystemPrompt;
414
511
  }
415
512
  async narrowTools(userMsg, history) {
@@ -438,6 +535,10 @@ class AidenAgent {
438
535
  // off the same entry index.
439
536
  const fullTrace = [];
440
537
  const totalUsage = { inputTokens: 0, outputTokens: 0 };
538
+ // v4.2 Phase 3 — turn start timestamp for RecoveryReport duration.
539
+ // Captured here so any code path (early-return / error / surface)
540
+ // can compute wallclock duration consistently.
541
+ const turnStartedAt = Date.now();
441
542
  let turnCount = 0;
442
543
  let toolCallCount = 0;
443
544
  let fallbackActivated = false;
@@ -446,7 +547,27 @@ class AidenAgent {
446
547
  let emptyRetriesUsed = 0;
447
548
  let finishReason = 'stop';
448
549
  let finalContent = '';
550
+ // v4.1.6 spike (TCE) — per-turn loop detection + recovery state.
551
+ // Default ON as of v4.2 Phase 6 — set AIDEN_TCE=0 to disable.
552
+ // When disabled, TurnState.recordToolCall short-circuits with
553
+ // `{kind: 'allow'}` and the entire v4.2 recovery surface stays
554
+ // dormant (zero behavioural change vs v4.1.6).
555
+ const turnState = new turnState_1.TurnState();
556
+ // v4.2 Phase 1 — per-tool verifier registry. Constructed
557
+ // unconditionally (cheap, no side effects) but only used to
558
+ // classify tool outcomes when TCE is enabled; verification args
559
+ // are passed to TurnState only inside the gated branch below.
560
+ const verifierRegistry = (0, verifier_1.buildDefaultRegistry)();
561
+ // v4.2 Phase 2 — per-tool failure classifier. Same gating as
562
+ // the verifier; only runs when verification.ok === false. Phase 2
563
+ // records-only — Phase 3 wires recovery actions off the category.
564
+ const failureClassifier = (0, failureClassifier_1.buildDefaultClassifier)();
565
+ let toolLoopCard = undefined;
449
566
  while (true) {
567
+ // v4.1.6 spike — decrement cooldown counters once per iteration
568
+ // so cooled-down tools eventually return to the schemas. No-op
569
+ // when TCE is disabled.
570
+ turnState.advanceIteration();
450
571
  if (turnCount >= this.maxTurns) {
451
572
  finishReason = 'budget_exhausted';
452
573
  break;
@@ -464,9 +585,22 @@ class AidenAgent {
464
585
  this.onBudgetWarning?.('warning', turnCount, this.maxTurns);
465
586
  }
466
587
  // ── Provider call (stream or non-stream) ──────────────────────────
588
+ //
589
+ // v4.1.6 spike (TCE) — filter cooled-down tools out of the
590
+ // schemas we send to the provider. The model literally cannot
591
+ // see (and therefore cannot request) a cooled-down tool until
592
+ // its cooldown counter decrements to zero via
593
+ // `turnState.advanceIteration()`. No-op when TCE disabled
594
+ // (`getCooledDownTools()` returns []).
595
+ let effectiveTools = tools;
596
+ const cooledDown = turnState.getCooledDownTools();
597
+ if (cooledDown.length > 0) {
598
+ const cdSet = new Set(cooledDown);
599
+ effectiveTools = tools.filter((t) => !cdSet.has(t.name));
600
+ }
467
601
  let output;
468
602
  try {
469
- output = await this.callProvider(messages, tools, runOptions);
603
+ output = await this.callProvider(messages, effectiveTools, runOptions);
470
604
  }
471
605
  catch (err) {
472
606
  const error = err instanceof Error ? err : new Error(String(err));
@@ -484,6 +618,25 @@ class AidenAgent {
484
618
  }
485
619
  totalUsage.inputTokens += output.usage?.inputTokens ?? 0;
486
620
  totalUsage.outputTokens += output.usage?.outputTokens ?? 0;
621
+ // v4.2 Phase 4 — capture the state going INTO this iteration's
622
+ // tool dispatch. MUST run BEFORE `messages.push(assistantMsg)`
623
+ // so the checkpoint represents "the conversation before the
624
+ // model decided to call this iteration's tools". If rollback
625
+ // fires later, truncating `messages.length` to
626
+ // `checkpoint.messages.length` drops the assistant tool_call
627
+ // message together with its tool result messages — preserving
628
+ // tool_call/tool_result pairing in the rolled-back state.
629
+ //
630
+ // Capturing AFTER the assistant push (the prior placement) was
631
+ // a real bug: rollback would leave the assistant tool_call in
632
+ // history without its tool results, producing strict-provider
633
+ // 400 errors of the form "No tool output found for function
634
+ // call <id>". Tests in tests/v4/core/checkpoint-integration
635
+ // assert the post-rollback messages array contains zero orphan
636
+ // assistant tool_calls — this position is part of the contract.
637
+ //
638
+ // No-op when TCE is disabled (AIDEN_TCE=0) or checkpointDepth=0.
639
+ turnState.captureCheckpoint(messages, turnCount);
487
640
  // ── Append assistant message ──────────────────────────────────────
488
641
  const assistantMsg = output.toolCalls.length > 0
489
642
  ? { role: 'assistant', content: output.content ?? '', toolCalls: output.toolCalls }
@@ -558,8 +711,30 @@ class AidenAgent {
558
711
  }
559
712
  // ── Dispatch tools sequentially ──────────────────────────────────
560
713
  const turnToolMessages = [];
714
+ // v4.1.6 spike (TCE) — set when TurnState surfaces a tool_loop
715
+ // mid-batch. The agent stops dispatching remaining calls in the
716
+ // batch and breaks out of the outer iteration loop cleanly.
717
+ let surfaceDecision = null;
718
+ // v4.2 Phase 4 — set when TurnState's recovery controller asks
719
+ // for a rollback. The agent loop truncates messages + restores
720
+ // TurnState internals + pushes a corrective system message,
721
+ // then continues the outer iteration loop from a clean baseline.
722
+ let rollbackDecision = null;
561
723
  for (const call of output.toolCalls) {
562
724
  this.onToolCall?.(call, 'before');
725
+ // v4.2 Phase 4 — mark any active checkpoints as containing a
726
+ // mutating call BEFORE dispatch. Done pre-dispatch (not post)
727
+ // so that even if the tool throws / errors / produces a
728
+ // partial side effect, the mutation flag is set — rollback
729
+ // safety errs on the side of "this iteration mutated state".
730
+ // The mutability resolver is wired from the CLI's tool
731
+ // registry (`resolveMutates`); unknown tools return undefined,
732
+ // which we treat as non-mutating (leave the flag alone).
733
+ // Plugin authors should declare `mutates` honestly on their
734
+ // tool handlers — this is the structural enforcement point.
735
+ if (turnState.isEnabled() && this.resolveMutates?.(call.name) === true) {
736
+ turnState.markMutationOnLiveCheckpoint(call.name);
737
+ }
563
738
  let result;
564
739
  try {
565
740
  result = await this.toolExecutor(call);
@@ -573,11 +748,46 @@ class AidenAgent {
573
748
  };
574
749
  }
575
750
  toolCallCount += 1;
751
+ // v4.2 Phase 1 — verifier classification. Runs only when TCE
752
+ // is enabled; the registry resolves a per-tool verifier or
753
+ // falls back to the heuristic default. Synchronous + pure;
754
+ // no network, no side effects.
755
+ let verification;
756
+ let classification = null;
757
+ if (turnState.isEnabled()) {
758
+ try {
759
+ verification = verifierRegistry.resolve(call.name)(call.name, call.arguments, result);
760
+ }
761
+ catch {
762
+ // Defensive — a buggy verifier never breaks the agent loop.
763
+ verification = undefined;
764
+ }
765
+ // v4.2 Phase 2 — classify WHY when the verifier said !ok.
766
+ // classify(...) returns null for ok results, so happy-path
767
+ // calls incur zero classifier work.
768
+ if (verification && !verification.ok) {
769
+ try {
770
+ classification = failureClassifier.classify(verification, call.name, call.arguments, result);
771
+ }
772
+ catch {
773
+ // Defensive — a buggy classifier never breaks the loop.
774
+ classification = null;
775
+ }
776
+ }
777
+ }
576
778
  toolCallTrace.push({
577
779
  name: call.name,
578
780
  result: result.result,
579
781
  error: result.error,
580
782
  verified: this.resolveVerifiedFlag?.(result),
783
+ // v4.2 Phase 1 — verification surfaces alongside the trace
784
+ // entry for downstream callers (chatSession, loopTrace,
785
+ // future RecoveryReport). Undefined when TCE is off.
786
+ verification,
787
+ // v4.2 Phase 2 — classification surfaces alongside verification.
788
+ // Undefined for verifier-ok calls (classifier skips them) and
789
+ // when TCE is off.
790
+ classification: classification ?? undefined,
581
791
  });
582
792
  fullTrace.push({ name: call.name, args: call.arguments });
583
793
  // URL ledger ingest — extracts ids from result body for next turn.
@@ -596,6 +806,126 @@ class AidenAgent {
596
806
  ? `[error] ${result.error}`
597
807
  : stringifyToolResult(result.result),
598
808
  });
809
+ // v4.1.6 spike (TCE) — after the tool result lands in the
810
+ // message history, consult the recovery controller. Returns
811
+ // `allow` immediately when TCE disabled (zero overhead).
812
+ // v4.2 Phase 1 — pass the verifier outcome so TurnState's
813
+ // consecFailed counter can fast-fail on demonstrably failing
814
+ // tool calls before the slower signature/name counters fire.
815
+ // v4.2 Phase 2 — also pass the classification so TurnState
816
+ // records the WHY for Phase 3's RecoveryReport.
817
+ const recovery = turnState.recordToolCall(call.name, call.arguments, verification, classification);
818
+ if (recovery.kind === 'hint' && recovery.hintMessage) {
819
+ // Stage 1: append a corrective system message so the model
820
+ // sees it on the next provider call. Same pattern as the
821
+ // existing skill-enforcement + URL-provenance correctives.
822
+ turnToolMessages.push({
823
+ role: 'system',
824
+ content: recovery.hintMessage,
825
+ });
826
+ }
827
+ else if (recovery.kind === 'cooldown_with_rollback' && recovery.rollback) {
828
+ // v4.2 Phase 4 — controller asks us to roll back. Capture
829
+ // the decision; we apply it AFTER the inner dispatch loop
830
+ // exits so we don't leave partial turnToolMessages in a
831
+ // half-state. Break out of dispatch immediately — no point
832
+ // running more tools whose results we're about to drop.
833
+ rollbackDecision = recovery;
834
+ break;
835
+ }
836
+ else if (recovery.kind === 'cooldown' && recovery.cooldownMessage) {
837
+ // Stage 2: cooldown has already been recorded internally
838
+ // (next iteration's schema-filter step excludes this tool).
839
+ // Inject a system message announcing the cooldown so the
840
+ // model knows why the tool just disappeared from its menu.
841
+ turnToolMessages.push({
842
+ role: 'system',
843
+ content: recovery.cooldownMessage,
844
+ });
845
+ }
846
+ else if (recovery.kind === 'surface' && recovery.surfaceCard) {
847
+ // Stage 3: structured failure. Stop dispatching the rest of
848
+ // the batch — anything else is throwing good budget after
849
+ // bad. The outer loop reads `surfaceDecision` below and
850
+ // exits cleanly.
851
+ surfaceDecision = recovery;
852
+ break;
853
+ }
854
+ }
855
+ // v4.2 Phase 4 — apply rollback if the controller asked for it.
856
+ // Truncate messages to the captured snapshot length, restore
857
+ // TurnState internals, then push a corrective system message
858
+ // and continue the OUTER iteration loop. We deliberately drop
859
+ // any partial `turnToolMessages` collected before the rollback
860
+ // trigger — those are the noise we're trying to undo.
861
+ //
862
+ // Hard-block invariant: TurnState only emits
863
+ // `cooldown_with_rollback` when the target checkpoint has
864
+ // `containedMutations === false`, so we never get here for an
865
+ // iteration that ran a mutating tool. The optional
866
+ // `rollback.blockedBy` is empty in Phase 4 (kept on the type
867
+ // for a Phase 5+ soft-rollback variant).
868
+ if (rollbackDecision && rollbackDecision.rollback) {
869
+ const { checkpoint, blockedBy } = rollbackDecision.rollback;
870
+ // Truncate messages array to the captured length. The captured
871
+ // items are immutable Message references; we keep them as-is
872
+ // and just shorten the live array.
873
+ messages.length = checkpoint.messages.length;
874
+ // Restore TurnState mutable internals (stage / streaks /
875
+ // cooledDownTools / arrays). The cooled-down tools map is
876
+ // preserved as it was at checkpoint time — but the controller
877
+ // already added the looping tool to `cooledDownTools` before
878
+ // emitting the decision, so we need to RE-apply that cooldown
879
+ // after restore to honour the cooldown intent.
880
+ turnState.restoreInternalsFrom(checkpoint);
881
+ // Re-cool the tool that triggered the rollback so the next
882
+ // provider call sees the constrained schema.
883
+ if (rollbackDecision.toolName) {
884
+ turnState.reapplyCooldown(rollbackDecision.toolName);
885
+ }
886
+ // Inject corrective system message so the model sees what
887
+ // happened and why the tool just disappeared from its menu.
888
+ messages.push({
889
+ role: 'system',
890
+ content: (0, checkpoint_1.buildRollbackMessage)({
891
+ iteration: checkpoint.iteration,
892
+ toolName: rollbackDecision.toolName,
893
+ blockedBy,
894
+ }),
895
+ });
896
+ // Continue the outer iteration loop from the restored
897
+ // baseline. The next provider call gets the filtered tool
898
+ // schema (cooldown applied) and the corrective message.
899
+ continue;
900
+ }
901
+ // v4.1.6 spike (TCE) — terminal surface handling.
902
+ if (surfaceDecision && surfaceDecision.kind === 'surface') {
903
+ finishReason = 'tool_loop';
904
+ // v4.2 Phase 3 — enrich the base surface card with a
905
+ // structured RecoveryReport. Pure synthesis from TurnState's
906
+ // diagnostic snapshot + first-user-message goal + duration.
907
+ // Implicit gating: this branch is only reachable when
908
+ // TurnState is enabled, so AIDEN_TCE=0 (opt-out) never
909
+ // builds a report.
910
+ if (surfaceDecision.surfaceCard) {
911
+ const report = (0, recoveryReport_1.buildRecoveryReport)({
912
+ snapshot: turnState.getDiagnosticSnapshot(),
913
+ goal: (0, recoveryReport_1.extractGoal)(messages),
914
+ exitReason: 'tool_loop',
915
+ durationMs: Date.now() - turnStartedAt,
916
+ });
917
+ toolLoopCard = (0, recoveryReport_1.enrichCardWithReport)(surfaceDecision.surfaceCard, report);
918
+ }
919
+ else {
920
+ toolLoopCard = surfaceDecision.surfaceCard;
921
+ }
922
+ // Push the partial tool messages we collected so honesty +
923
+ // history downstream see the full sequence including the
924
+ // loop-trigger call. No final assistant message — the
925
+ // tool_loop card IS the user-facing surface.
926
+ messages.push(...turnToolMessages);
927
+ finalContent = '';
928
+ break;
599
929
  }
600
930
  // ── Iteration-budget injection on the LAST tool message ──────────
601
931
  if (this.iterationBudgetInjection && turnToolMessages.length > 0) {
@@ -618,6 +948,7 @@ class AidenAgent {
618
948
  totalUsage,
619
949
  toolCallTrace,
620
950
  fullTrace,
951
+ toolLoopCard,
621
952
  };
622
953
  }
623
954
  /**
@@ -629,6 +960,18 @@ class AidenAgent {
629
960
  */
630
961
  async callProvider(messages, tools, runOptions) {
631
962
  const wantStream = runOptions.stream === true && typeof this.provider.callStream === 'function';
963
+ // v4.1.5 Issue K — fire just before the HTTP request opens, so the
964
+ // display layer can transition the activity verb from local-prep
965
+ // ("preparing prompt", "selecting tools") to a network verb
966
+ // ("calling provider"). The wait for TTFT (time-to-first-token) is
967
+ // the longest gap in most turns and is what the wave bar covers.
968
+ // Fires for both streaming and non-streaming paths — caller may use
969
+ // it to add a one-shot indicator on non-streaming providers too.
970
+ // Defensive try/catch (a misbehaving hook must not block dispatch).
971
+ try {
972
+ this.onProviderRequestStart?.(this.providerId);
973
+ }
974
+ catch { /* defensive */ }
632
975
  if (!wantStream) {
633
976
  return this.provider.call({ messages, tools });
634
977
  }
@@ -671,6 +1014,30 @@ class AidenAgent {
671
1014
  }
672
1015
  exports.AidenAgent = AidenAgent;
673
1016
  // ── Free helpers ────────────────────────────────────────────────────────
1017
+ /**
1018
+ * v4.1.5 Issue K — best-effort count of "memory facts" from a
1019
+ * MemorySnapshot. Counts markdown bullet-list lines (`- `) in both
1020
+ * MEMORY.md and USER.md. This is a fuzzy proxy — the agent stores
1021
+ * facts as bullets by convention but free-form prose can also carry
1022
+ * fact-like content. Surfaced verbatim to the display layer; treat as
1023
+ * "approximately N items in the persistent memory file" rather than
1024
+ * a precise inventory.
1025
+ */
1026
+ function countMemoryFacts(snapshot) {
1027
+ if (!snapshot || typeof snapshot !== 'object')
1028
+ return 0;
1029
+ const s = snapshot;
1030
+ let count = 0;
1031
+ for (const md of [s.memoryMd, s.userMd]) {
1032
+ if (typeof md !== 'string' || md.length === 0)
1033
+ continue;
1034
+ for (const line of md.split('\n')) {
1035
+ if (line.trim().startsWith('- '))
1036
+ count += 1;
1037
+ }
1038
+ }
1039
+ return count;
1040
+ }
674
1041
  function lastUserMessageContent(history) {
675
1042
  for (let i = history.length - 1; i >= 0; i--) {
676
1043
  const m = history[i];