aiden-runtime 4.1.5 → 4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. package/README.md +250 -847
  2. package/dist/api/server.js +32 -5
  3. package/dist/cli/v4/aidenCLI.js +351 -53
  4. package/dist/cli/v4/callbacks.js +170 -0
  5. package/dist/cli/v4/chatSession.js +138 -3
  6. package/dist/cli/v4/commands/_runtimeToggleHelpers.js +92 -0
  7. package/dist/cli/v4/commands/browserDepth.js +45 -0
  8. package/dist/cli/v4/commands/cron.js +264 -0
  9. package/dist/cli/v4/commands/daemon.js +541 -0
  10. package/dist/cli/v4/commands/daemonStatus.js +253 -0
  11. package/dist/cli/v4/commands/help.js +7 -0
  12. package/dist/cli/v4/commands/index.js +20 -1
  13. package/dist/cli/v4/commands/runs.js +203 -0
  14. package/dist/cli/v4/commands/sandbox.js +48 -0
  15. package/dist/cli/v4/commands/suggestions.js +68 -0
  16. package/dist/cli/v4/commands/tce.js +41 -0
  17. package/dist/cli/v4/commands/trigger.js +378 -0
  18. package/dist/cli/v4/commands/update.js +95 -3
  19. package/dist/cli/v4/daemonAgentBuilder.js +142 -0
  20. package/dist/cli/v4/defaultSoul.js +1 -1
  21. package/dist/cli/v4/display/capabilityCard.js +26 -0
  22. package/dist/cli/v4/display.js +18 -8
  23. package/dist/cli/v4/replyRenderer.js +31 -23
  24. package/dist/cli/v4/updateBootPrompt.js +170 -0
  25. package/dist/core/playwrightBridge.js +129 -0
  26. package/dist/core/v4/aidenAgent.js +308 -4
  27. package/dist/core/v4/browserState.js +436 -0
  28. package/dist/core/v4/checkpoint.js +79 -0
  29. package/dist/core/v4/daemon/bootstrap.js +604 -0
  30. package/dist/core/v4/daemon/cleanShutdown.js +154 -0
  31. package/dist/core/v4/daemon/cron/cronBridge.js +126 -0
  32. package/dist/core/v4/daemon/cron/cronEmitter.js +173 -0
  33. package/dist/core/v4/daemon/cron/migration.js +199 -0
  34. package/dist/core/v4/daemon/cron/misfirePolicy.js +115 -0
  35. package/dist/core/v4/daemon/daemonConfig.js +90 -0
  36. package/dist/core/v4/daemon/db/connection.js +106 -0
  37. package/dist/core/v4/daemon/db/migrations.js +296 -0
  38. package/dist/core/v4/daemon/db/schema/v1.spec.js +18 -0
  39. package/dist/core/v4/daemon/dispatcher/agentRunner.js +98 -0
  40. package/dist/core/v4/daemon/dispatcher/budgetGate.js +127 -0
  41. package/dist/core/v4/daemon/dispatcher/daemonApproval.js +113 -0
  42. package/dist/core/v4/daemon/dispatcher/dailyBudgetTracker.js +120 -0
  43. package/dist/core/v4/daemon/dispatcher/dispatcher.js +389 -0
  44. package/dist/core/v4/daemon/dispatcher/fireRateLimiter.js +113 -0
  45. package/dist/core/v4/daemon/dispatcher/index.js +53 -0
  46. package/dist/core/v4/daemon/dispatcher/promptTemplate.js +95 -0
  47. package/dist/core/v4/daemon/dispatcher/realAgentRunner.js +356 -0
  48. package/dist/core/v4/daemon/dispatcher/resolveModel.js +93 -0
  49. package/dist/core/v4/daemon/dispatcher/sessionId.js +93 -0
  50. package/dist/core/v4/daemon/drain.js +156 -0
  51. package/dist/core/v4/daemon/eventLoopLag.js +73 -0
  52. package/dist/core/v4/daemon/health.js +159 -0
  53. package/dist/core/v4/daemon/idempotencyStore.js +204 -0
  54. package/dist/core/v4/daemon/index.js +179 -0
  55. package/dist/core/v4/daemon/instanceTracker.js +99 -0
  56. package/dist/core/v4/daemon/resourceRegistry.js +150 -0
  57. package/dist/core/v4/daemon/restartCode.js +32 -0
  58. package/dist/core/v4/daemon/restartFailureCounter.js +77 -0
  59. package/dist/core/v4/daemon/runStore.js +114 -0
  60. package/dist/core/v4/daemon/runtimeLock.js +167 -0
  61. package/dist/core/v4/daemon/signals.js +50 -0
  62. package/dist/core/v4/daemon/supervisor.js +272 -0
  63. package/dist/core/v4/daemon/triggerBus.js +279 -0
  64. package/dist/core/v4/daemon/triggers/email/allowlist.js +70 -0
  65. package/dist/core/v4/daemon/triggers/email/automatedSender.js +78 -0
  66. package/dist/core/v4/daemon/triggers/email/bodyExtractor.js +0 -0
  67. package/dist/core/v4/daemon/triggers/email/emailSeenStore.js +99 -0
  68. package/dist/core/v4/daemon/triggers/email/emailSpec.js +107 -0
  69. package/dist/core/v4/daemon/triggers/email/imapConnection.js +211 -0
  70. package/dist/core/v4/daemon/triggers/email/index.js +332 -0
  71. package/dist/core/v4/daemon/triggers/email/seenUids.js +60 -0
  72. package/dist/core/v4/daemon/triggers/fileObservationsStore.js +93 -0
  73. package/dist/core/v4/daemon/triggers/fileWatcher.js +253 -0
  74. package/dist/core/v4/daemon/triggers/fileWatcherSpec.js +88 -0
  75. package/dist/core/v4/daemon/triggers/fsIdentity.js +42 -0
  76. package/dist/core/v4/daemon/triggers/globMatcher.js +100 -0
  77. package/dist/core/v4/daemon/triggers/reconcile.js +206 -0
  78. package/dist/core/v4/daemon/triggers/settleStat.js +81 -0
  79. package/dist/core/v4/daemon/triggers/webhook.js +376 -0
  80. package/dist/core/v4/daemon/triggers/webhookDeliveriesStore.js +109 -0
  81. package/dist/core/v4/daemon/triggers/webhookIdempotency.js +72 -0
  82. package/dist/core/v4/daemon/triggers/webhookRateLimit.js +56 -0
  83. package/dist/core/v4/daemon/triggers/webhookSpec.js +76 -0
  84. package/dist/core/v4/daemon/triggers/webhookVerifier.js +128 -0
  85. package/dist/core/v4/daemon/types.js +15 -0
  86. package/dist/core/v4/dockerSession.js +461 -0
  87. package/dist/core/v4/dryRun.js +117 -0
  88. package/dist/core/v4/failureClassifier.js +779 -0
  89. package/dist/core/v4/recoveryReport.js +449 -0
  90. package/dist/core/v4/runtimeToggles.js +187 -0
  91. package/dist/core/v4/sandboxConfig.js +285 -0
  92. package/dist/core/v4/sandboxFs.js +316 -0
  93. package/dist/core/v4/suggestionCatalog.js +41 -0
  94. package/dist/core/v4/suggestionEngine.js +210 -0
  95. package/dist/core/v4/toolRegistry.js +18 -0
  96. package/dist/core/v4/turnState.js +587 -0
  97. package/dist/core/v4/update/checkUpdate.js +63 -3
  98. package/dist/core/v4/update/installMethodDetect.js +115 -0
  99. package/dist/core/v4/update/registryClient.js +121 -0
  100. package/dist/core/v4/update/skipState.js +75 -0
  101. package/dist/core/v4/verifier.js +448 -0
  102. package/dist/core/version.js +1 -1
  103. package/dist/tools/v4/browser/_observer.js +224 -0
  104. package/dist/tools/v4/browser/browserBlocker.js +396 -0
  105. package/dist/tools/v4/browser/browserClick.js +18 -1
  106. package/dist/tools/v4/browser/browserClose.js +18 -1
  107. package/dist/tools/v4/browser/browserExtract.js +5 -1
  108. package/dist/tools/v4/browser/browserFill.js +17 -1
  109. package/dist/tools/v4/browser/browserGetUrl.js +5 -1
  110. package/dist/tools/v4/browser/browserNavigate.js +16 -1
  111. package/dist/tools/v4/browser/browserScreenshot.js +5 -1
  112. package/dist/tools/v4/browser/browserScroll.js +18 -1
  113. package/dist/tools/v4/browser/browserType.js +17 -1
  114. package/dist/tools/v4/browser/captchaCheck.js +5 -1
  115. package/dist/tools/v4/executeCode.js +1 -0
  116. package/dist/tools/v4/files/fileCopy.js +56 -2
  117. package/dist/tools/v4/files/fileDelete.js +38 -1
  118. package/dist/tools/v4/files/fileList.js +12 -1
  119. package/dist/tools/v4/files/fileMove.js +59 -2
  120. package/dist/tools/v4/files/filePatch.js +43 -1
  121. package/dist/tools/v4/files/fileRead.js +12 -1
  122. package/dist/tools/v4/files/fileWrite.js +41 -1
  123. package/dist/tools/v4/index.js +71 -58
  124. package/dist/tools/v4/memory/memoryAdd.js +14 -0
  125. package/dist/tools/v4/memory/memoryRemove.js +14 -0
  126. package/dist/tools/v4/memory/memoryReplace.js +15 -0
  127. package/dist/tools/v4/memory/sessionSummary.js +12 -0
  128. package/dist/tools/v4/process/processKill.js +19 -0
  129. package/dist/tools/v4/process/processList.js +1 -0
  130. package/dist/tools/v4/process/processLogRead.js +1 -0
  131. package/dist/tools/v4/process/processSpawn.js +13 -0
  132. package/dist/tools/v4/process/processWait.js +1 -0
  133. package/dist/tools/v4/sessions/recallSession.js +1 -0
  134. package/dist/tools/v4/sessions/sessionList.js +1 -0
  135. package/dist/tools/v4/sessions/sessionSearch.js +1 -0
  136. package/dist/tools/v4/skills/lookupToolSchema.js +2 -0
  137. package/dist/tools/v4/skills/skillManage.js +13 -0
  138. package/dist/tools/v4/skills/skillView.js +1 -0
  139. package/dist/tools/v4/skills/skillsList.js +1 -0
  140. package/dist/tools/v4/subagent/subagentFanout.js +1 -0
  141. package/dist/tools/v4/system/aidenSelfUpdate.js +16 -0
  142. package/dist/tools/v4/system/appClose.js +13 -0
  143. package/dist/tools/v4/system/appInput.js +13 -0
  144. package/dist/tools/v4/system/appLaunch.js +13 -0
  145. package/dist/tools/v4/system/clipboardRead.js +1 -0
  146. package/dist/tools/v4/system/clipboardWrite.js +14 -0
  147. package/dist/tools/v4/system/mediaKey.js +12 -0
  148. package/dist/tools/v4/system/mediaSessions.js +1 -0
  149. package/dist/tools/v4/system/mediaTransport.js +13 -0
  150. package/dist/tools/v4/system/naturalEvents.js +1 -0
  151. package/dist/tools/v4/system/nowPlaying.js +1 -0
  152. package/dist/tools/v4/system/osProcessList.js +1 -0
  153. package/dist/tools/v4/system/screenshot.js +1 -0
  154. package/dist/tools/v4/system/systemInfo.js +1 -0
  155. package/dist/tools/v4/system/volumeSet.js +17 -0
  156. package/dist/tools/v4/terminal/shellExec.js +81 -9
  157. package/dist/tools/v4/web/deepResearch.js +1 -0
  158. package/dist/tools/v4/web/openUrl.js +1 -0
  159. package/dist/tools/v4/web/webFetch.js +1 -0
  160. package/dist/tools/v4/web/webPage.js +1 -0
  161. package/dist/tools/v4/web/webSearch.js +1 -0
  162. package/dist/tools/v4/web/youtubeSearch.js +1 -0
  163. package/package.json +7 -1
@@ -0,0 +1,587 @@
1
+ "use strict";
2
+ /**
3
+ * Copyright (c) 2026 Shiva Deore (Taracod).
4
+ * Licensed under AGPL-3.0. See LICENSE for details.
5
+ *
6
+ * Aiden — local-first agent.
7
+ */
8
+ /**
9
+ * core/v4/turnState.ts — v4.1.6 spike: Task Completion Engine (TCE)
10
+ * loop detection + recovery controller.
11
+ *
12
+ * One TurnState instance lives per `runConversation` call. **Default
13
+ * ON** as of v4.2 Phase 6 — set `AIDEN_TCE=0` to disable. Zero
14
+ * behavioral change vs v4.1.6 when disabled.
15
+ *
16
+ * Concept: per-turn state object that the agent loop consults after
17
+ * each tool dispatch. Tracks how often the model is repeating itself
18
+ * — both at the precise-call level (same tool name + identical args)
19
+ * AND at the same-tool-name level (any args). Returns a typed
20
+ * recovery decision so the agent loop can act on it.
21
+ *
22
+ * Two counters by design (the layered-budget pattern):
23
+ *
24
+ * - `consecSignature`: same name + same args-hash run length.
25
+ * Resets when EITHER name or args change. Catches precise loops
26
+ * where the model literally repeats the identical call.
27
+ *
28
+ * - `consecName`: same tool name run length (any args).
29
+ * Resets only when the tool name changes. Catches broader
30
+ * "fishing" patterns where the model probes a tool with
31
+ * different args repeatedly without making progress.
32
+ *
33
+ * Hint stage uses signature counting (precise — fires only on
34
+ * genuine identical-call loops; not on legitimate skill exploration
35
+ * via `skill_view` with different names). Cooldown + surface use
36
+ * name counting (broader — catches the reported 30-skill_view
37
+ * failure mode regardless of args).
38
+ *
39
+ * Three escalating recovery stages, monotonic (once hinted, can
40
+ * escalate to cooldown then surface; never re-fires the same stage):
41
+ *
42
+ * Stage 1 — HINT (signature ≥ 5): inject `role: 'system'` message
43
+ * into the conversation suggesting the model reconsider.
44
+ *
45
+ * Stage 2 — COOLDOWN (name ≥ 8): mark the tool cooled-down for N
46
+ * iterations. Agent filters the tool out of the schemas passed
47
+ * to the provider, so the model literally cannot call it.
48
+ *
49
+ * Stage 3 — SURFACE (name ≥ 11): return a structured-failure card.
50
+ * Agent ends the turn cleanly via `finishReason = 'tool_loop'`;
51
+ * chatSession renders a capability-card-style failure surface.
52
+ *
53
+ * Thresholds are tunable via constructor options. Pure module — no
54
+ * Display dependency, no event-emitter side effects. Safe to import
55
+ * from anywhere in the codebase.
56
+ */
57
+ var __importDefault = (this && this.__importDefault) || function (mod) {
58
+ return (mod && mod.__esModule) ? mod : { "default": mod };
59
+ };
60
+ Object.defineProperty(exports, "__esModule", { value: true });
61
+ exports.TurnState = void 0;
62
+ const node_crypto_1 = __importDefault(require("node:crypto"));
63
+ // ── Implementation ──────────────────────────────────────────────────────────
64
+ class TurnState {
65
+ constructor(opts = {}) {
66
+ this.stage = 'none';
67
+ this.toolCalls = [];
68
+ this.successfulTools = new Set();
69
+ // Layered streak tracking — see module docstring for rationale.
70
+ this.consecName = { name: null, count: 0 };
71
+ this.consecSignature = { signature: null, count: 0 };
72
+ /**
73
+ * v4.2 Phase 1 — verifier-driven failure streak. Resets on tool
74
+ * name change OR on a verified-ok call. Independent of the other
75
+ * two streaks because a failing tool isn't necessarily called with
76
+ * identical args (model often varies args between retries).
77
+ */
78
+ this.consecFailed = { name: null, count: 0 };
79
+ this.cooledDownTools = new Map();
80
+ this.recoveryEvents = [];
81
+ /**
82
+ * v4.2 Phase 1 — append-only verifier log, parallel to `toolCalls`.
83
+ * Only entries whose `recordToolCall(...)` was given a verification
84
+ * argument land here; this keeps the array semantically clean for
85
+ * downstream callers (no `undefined` placeholders).
86
+ */
87
+ this.verifications = [];
88
+ /**
89
+ * v4.2 Phase 2 — append-only classification log. Only populated
90
+ * when a classifier was supplied to `recordToolCall(...)` AND the
91
+ * verifier marked the call as `!ok`. Semantically clean — no
92
+ * `undefined` placeholders for ok calls.
93
+ */
94
+ this.classifications = [];
95
+ /**
96
+ * v4.2 Phase 4 — ring buffer of per-iteration checkpoints. Newest
97
+ * at the tail. Length is bounded by `checkpointDepth`; older
98
+ * entries are dropped from the head when capacity is exceeded.
99
+ * The "live" checkpoint (the one capturing the current iteration's
100
+ * mutation flag) is always `checkpoints[checkpoints.length - 1]`.
101
+ */
102
+ this.checkpoints = [];
103
+ // v4.2 Phase 6 — TCE is ON by default. Strict `'0'` opt-out
104
+ // semantic: env var must be literally the string `'0'` to
105
+ // disable; everything else (unset, `'1'`, empty string, junk)
106
+ // enables. The opts.enabled override still wins when explicitly
107
+ // passed by callers (test fixtures, embedded usage).
108
+ // v4.5 Phase 8a — route through the runtimeToggles singleton so
109
+ // /tce slash-command flips and config.yaml overrides take effect
110
+ // on the next constructed TurnState. The explicit opts.enabled
111
+ // override still wins for test fixtures + embedded usage.
112
+ if (typeof opts.enabled === 'boolean') {
113
+ this.enabled = opts.enabled;
114
+ }
115
+ else {
116
+ try {
117
+ // eslint-disable-next-line @typescript-eslint/no-var-requires
118
+ const rt = require('./runtimeToggles');
119
+ this.enabled = rt.getRuntimeToggles().isEnabled('tce');
120
+ }
121
+ catch {
122
+ // runtimeToggles unavailable (rare — circular import or test
123
+ // harness without core/v4 wired). Fall back to direct env read.
124
+ this.enabled = process.env.AIDEN_TCE !== '0';
125
+ }
126
+ }
127
+ this.hintConsec = opts.hintConsecThreshold ?? 5;
128
+ this.cooldownConsec = opts.cooldownConsecThreshold ?? 8;
129
+ this.surfaceConsec = opts.surfaceConsecThreshold ?? 11;
130
+ this.cooldownIters = opts.cooldownIterations ?? 3;
131
+ this.failedConsec = opts.failedConsecThreshold ?? 3;
132
+ // checkpointDepth = 0 disables the buffer entirely (useful for
133
+ // tests that want Phase 1-3 behavior with TCE enabled). Otherwise
134
+ // default 3 per Q-CP2 approval.
135
+ this.checkpointDepth = Math.max(0, opts.checkpointDepth ?? 3);
136
+ }
137
+ isEnabled() {
138
+ return this.enabled;
139
+ }
140
+ /**
141
+ * Called after each tool's executor resolves. Updates the streak
142
+ * counters, decides which recovery action (if any) applies, and
143
+ * returns the decision for the agent loop to act on.
144
+ *
145
+ * When `enabled === false`, returns `{kind: 'allow'}` immediately
146
+ * without any state mutation — guarantees zero behavioral change
147
+ * when TCE is opted out via `AIDEN_TCE=0`.
148
+ *
149
+ * v4.2 Phase 1 — optional `verification` argument lets the verifier
150
+ * layer feed its classification into the controller. When provided
151
+ * and `!verification.ok`, the `consecFailed` counter increments;
152
+ * when `verification.ok`, it resets. Callers that don't pass a
153
+ * verification get the original v4.1.6 behavior unchanged.
154
+ *
155
+ * v4.2 Phase 2 — optional `classification` argument records WHY a
156
+ * call failed. Phase 2 only logs it (for Phase 3's RecoveryReport
157
+ * to consume); no counter or recovery action fires off classification.
158
+ */
159
+ recordToolCall(name, args, verification, classification) {
160
+ if (!this.enabled) {
161
+ return { kind: 'allow', consecutive: 0 };
162
+ }
163
+ const argsHash = canonicalArgsHash(args);
164
+ const signature = `${name}::${argsHash}`;
165
+ const ts = Date.now();
166
+ this.toolCalls.push({ name, argsHash, ts });
167
+ // Update name streak: resets only on tool-name change.
168
+ if (this.consecName.name === name) {
169
+ this.consecName.count += 1;
170
+ }
171
+ else {
172
+ this.consecName = { name, count: 1 };
173
+ }
174
+ // Update signature streak: resets on EITHER name or args change.
175
+ if (this.consecSignature.signature === signature) {
176
+ this.consecSignature.count += 1;
177
+ }
178
+ else {
179
+ this.consecSignature = { signature, count: 1 };
180
+ }
181
+ // v4.2 Phase 1 — update verifier-driven failure streak. Reset on
182
+ // name change OR on a verified-ok call; increment on verified-fail.
183
+ // Calls without a verification leave the counter untouched (so a
184
+ // mid-turn migration from un-verified to verified callers doesn't
185
+ // produce spurious resets).
186
+ if (verification) {
187
+ this.verifications.push({ name, verification, ts });
188
+ if (verification.ok) {
189
+ this.consecFailed = { name, count: 0 };
190
+ }
191
+ else {
192
+ if (this.consecFailed.name === name) {
193
+ this.consecFailed.count += 1;
194
+ }
195
+ else {
196
+ this.consecFailed = { name, count: 1 };
197
+ }
198
+ }
199
+ }
200
+ else if (this.consecFailed.name !== name) {
201
+ // Name change with no verification — reset the failed counter
202
+ // to keep it semantically aligned with `consecName`.
203
+ this.consecFailed = { name: null, count: 0 };
204
+ }
205
+ // v4.2 Phase 2 — record-only. Classifier output lands here for
206
+ // Phase 3 to consume; no recovery action fires off this in Phase 2.
207
+ if (classification) {
208
+ this.classifications.push({ name, classification, ts });
209
+ }
210
+ // Track which distinct tools have run in this turn (for surface
211
+ // card's `canStill` list — tools the model used productively
212
+ // before getting stuck).
213
+ if (this.stage === 'none' || this.stage === 'hinted') {
214
+ this.successfulTools.add(name);
215
+ }
216
+ // ── Stage transition gate (monotonic) ────────────────────────────
217
+ // Surface (highest priority): name-streak crosses the surface
218
+ // threshold AND we haven't already surfaced.
219
+ if (this.stage !== 'surfaced' && this.consecName.count >= this.surfaceConsec) {
220
+ this.stage = 'surfaced';
221
+ const decision = {
222
+ kind: 'surface',
223
+ toolName: name,
224
+ consecutive: this.consecName.count,
225
+ surfaceCard: this.buildSurfaceCard(name, this.consecName.count),
226
+ };
227
+ this.recoveryEvents.push({ stage: 'surfaced', toolName: name, count: this.consecName.count, ts });
228
+ return decision;
229
+ }
230
+ // Cooldown: name-streak crosses cooldown threshold AND tool not
231
+ // already cooled-down AND we haven't escalated past cooldown.
232
+ if (this.stage !== 'surfaced' &&
233
+ this.consecName.count >= this.cooldownConsec &&
234
+ !this.cooledDownTools.has(name)) {
235
+ this.stage = 'cooldown';
236
+ this.cooledDownTools.set(name, this.cooldownIters);
237
+ // v4.2 Phase 4 — look for a restorable checkpoint. The cooldown
238
+ // stage benefits from rolling back to a clean baseline before
239
+ // the looping tool started failing, but ONLY when no mutating
240
+ // tools ran in the target iteration's window (HARD BLOCK per
241
+ // Q-CP3). Falls back gracefully to plain cooldown when no
242
+ // restorable checkpoint exists.
243
+ const restorable = this.findRestorableCheckpoint();
244
+ const baseDecision = {
245
+ kind: 'cooldown',
246
+ toolName: name,
247
+ consecutive: this.consecName.count,
248
+ cooldownMessage: buildCooldownMessage(name, this.cooldownIters),
249
+ };
250
+ this.recoveryEvents.push({ stage: 'cooldown', toolName: name, count: this.consecName.count, ts });
251
+ if (restorable) {
252
+ return {
253
+ ...baseDecision,
254
+ kind: 'cooldown_with_rollback',
255
+ rollback: {
256
+ checkpoint: restorable,
257
+ blockedBy: [], // hard block means we only return checkpoints with zero mutations
258
+ },
259
+ };
260
+ }
261
+ return baseDecision;
262
+ }
263
+ // v4.2 Phase 1 — verifier-driven HINT. Fires faster than the
264
+ // signature-based hint when the verifier flags consecutive
265
+ // failures. Distinct hint message so the model sees a different
266
+ // corrective signal ("you're failing" vs "you're repeating").
267
+ if (this.stage === 'none' &&
268
+ this.consecFailed.name === name &&
269
+ this.consecFailed.count >= this.failedConsec) {
270
+ this.stage = 'hinted';
271
+ const decision = {
272
+ kind: 'hint',
273
+ toolName: name,
274
+ consecutive: this.consecFailed.count,
275
+ hintMessage: buildFailedHintMessage(name, this.consecFailed.count, verification),
276
+ };
277
+ this.recoveryEvents.push({ stage: 'hinted', toolName: name, count: this.consecFailed.count, ts });
278
+ return decision;
279
+ }
280
+ // Hint: signature-streak (precise) crosses hint threshold AND
281
+ // we're still in the `none` stage. Use signature here to avoid
282
+ // false-positives on legitimate skill exploration (different
283
+ // skill names through `skill_view` shouldn't trigger).
284
+ if (this.stage === 'none' && this.consecSignature.count >= this.hintConsec) {
285
+ this.stage = 'hinted';
286
+ const decision = {
287
+ kind: 'hint',
288
+ toolName: name,
289
+ consecutive: this.consecSignature.count,
290
+ hintMessage: buildHintMessage(name, this.consecSignature.count),
291
+ };
292
+ this.recoveryEvents.push({ stage: 'hinted', toolName: name, count: this.consecSignature.count, ts });
293
+ return decision;
294
+ }
295
+ return { kind: 'allow', consecutive: this.consecName.count };
296
+ }
297
+ /**
298
+ * Tools currently cooled-down. Agent filters these out of the
299
+ * tool schemas passed to the next provider call so the model
300
+ * literally cannot request them.
301
+ */
302
+ getCooledDownTools() {
303
+ if (!this.enabled)
304
+ return [];
305
+ return [...this.cooledDownTools.keys()];
306
+ }
307
+ /**
308
+ * Called once per agent loop iteration. Decrements each cooled-
309
+ * down tool's remaining-iteration counter; drops tools that have
310
+ * served their cooldown. No-op when disabled.
311
+ */
312
+ advanceIteration() {
313
+ if (!this.enabled)
314
+ return;
315
+ for (const [name, remaining] of this.cooledDownTools.entries()) {
316
+ if (remaining <= 1) {
317
+ this.cooledDownTools.delete(name);
318
+ }
319
+ else {
320
+ this.cooledDownTools.set(name, remaining - 1);
321
+ }
322
+ }
323
+ }
324
+ // ── Phase 4 — checkpoint / restore API ─────────────────────────────────
325
+ /**
326
+ * Capture the state going INTO an iteration's tool dispatch. Called
327
+ * by the agent loop after the assistant message is pushed but
328
+ * before the for-each-tool dispatch loop begins. The captured
329
+ * `messages` argument is shallow-cloned (item references shared;
330
+ * the array reference is new — items are treated as immutable
331
+ * Message objects downstream).
332
+ *
333
+ * No-op when TCE is disabled (opt-out via `AIDEN_TCE=0`) OR when
334
+ * `checkpointDepth === 0`.
335
+ * Ring-buffer rolls over once depth is exceeded.
336
+ */
337
+ captureCheckpoint(messages, iteration) {
338
+ if (!this.enabled || this.checkpointDepth === 0)
339
+ return;
340
+ const checkpoint = {
341
+ iteration,
342
+ ts: Date.now(),
343
+ messages: [...messages],
344
+ turnStateSnapshot: this.captureInternalSnapshot(),
345
+ containedMutations: false,
346
+ mutatingToolsSinceCheckpoint: [],
347
+ };
348
+ this.checkpoints.push(checkpoint);
349
+ while (this.checkpoints.length > this.checkpointDepth) {
350
+ this.checkpoints.shift();
351
+ }
352
+ }
353
+ /**
354
+ * Flag the LIVE checkpoint (the most recently captured one) as
355
+ * having seen a mutating tool dispatch. Called by the agent loop
356
+ * just before dispatching any tool with `ToolHandler.mutates ===
357
+ * true`. Sets `containedMutations` on the live checkpoint AND on
358
+ * every older checkpoint that's still in the ring buffer — those
359
+ * older checkpoints would otherwise be eligible for rollback even
360
+ * though the iterations between them contained mutating tools.
361
+ *
362
+ * No-op when disabled or when the ring buffer is empty.
363
+ */
364
+ markMutationOnLiveCheckpoint(toolName) {
365
+ if (!this.enabled || this.checkpoints.length === 0)
366
+ return;
367
+ // Mark every checkpoint currently in the buffer — rolling back to
368
+ // ANY of them would require un-doing this mutation.
369
+ for (const cp of this.checkpoints) {
370
+ if (!cp.containedMutations) {
371
+ // Re-assign with mutated copy; Checkpoint fields are typed
372
+ // readonly on the public type but we own them internally.
373
+ cp.containedMutations = true;
374
+ }
375
+ const mutating = cp.mutatingToolsSinceCheckpoint;
376
+ if (!mutating.includes(toolName)) {
377
+ mutating.push(toolName);
378
+ }
379
+ }
380
+ }
381
+ /**
382
+ * Find the most recent checkpoint that's safe to roll back to. A
383
+ * checkpoint is safe when `containedMutations === false` — no
384
+ * mutating tool has run since it was captured. Returns null when
385
+ * no such checkpoint exists (caller falls back to plain cooldown
386
+ * per Q-CP3 hard block).
387
+ *
388
+ * Walks the ring buffer from newest to oldest; the first restorable
389
+ * checkpoint is returned. Disabled / empty buffer → null.
390
+ */
391
+ findRestorableCheckpoint() {
392
+ if (!this.enabled || this.checkpoints.length === 0)
393
+ return null;
394
+ for (let i = this.checkpoints.length - 1; i >= 0; i -= 1) {
395
+ const cp = this.checkpoints[i];
396
+ if (!cp.containedMutations)
397
+ return cp;
398
+ }
399
+ return null;
400
+ }
401
+ /**
402
+ * Restore TurnState internals from a previously-captured checkpoint.
403
+ * The caller is responsible for truncating the messages array to
404
+ * `checkpoint.messages.length`. After restore, the ring buffer is
405
+ * trimmed to remove the checkpoint AND every newer entry — those
406
+ * captures correspond to iterations that no longer happened from
407
+ * the controller's perspective.
408
+ *
409
+ * No-op when disabled. Safe to call with a checkpoint that's no
410
+ * longer in the buffer (e.g. dropped by the ring rollover) — the
411
+ * snapshot data is still valid; only the buffer-trimming step is
412
+ * skipped.
413
+ */
414
+ restoreInternalsFrom(checkpoint) {
415
+ if (!this.enabled)
416
+ return;
417
+ const snap = checkpoint.turnStateSnapshot;
418
+ this.stage = snap.stage;
419
+ this.consecName = { ...snap.consecName };
420
+ this.consecSignature = { ...snap.consecSignature };
421
+ this.consecFailed = { ...snap.consecFailed };
422
+ this.cooledDownTools = new Map(snap.cooledDownTools.map(([k, v]) => [k, v]));
423
+ this.toolCalls = [...snap.toolCalls];
424
+ this.successfulTools = new Set(snap.successfulTools);
425
+ this.recoveryEvents = [...snap.recoveryEvents];
426
+ this.verifications = [...snap.verifications];
427
+ this.classifications = [...snap.classifications];
428
+ // Trim the ring buffer to remove `checkpoint` and everything newer.
429
+ const idx = this.checkpoints.indexOf(checkpoint);
430
+ if (idx >= 0) {
431
+ this.checkpoints = this.checkpoints.slice(0, idx);
432
+ }
433
+ }
434
+ /**
435
+ * Read-only view of the live ring buffer. Public for tests + future
436
+ * diagnostic surfaces. Returns a fresh array; mutation is harmless.
437
+ */
438
+ getCheckpoints() {
439
+ return [...this.checkpoints];
440
+ }
441
+ /**
442
+ * v4.2 Phase 4 — re-apply a cooldown after a rollback. Called by
443
+ * the agent loop AFTER `restoreInternalsFrom`, because restore
444
+ * replaces `cooledDownTools` with the checkpoint's snapshot (which
445
+ * was captured BEFORE the cooldown decision was emitted).
446
+ *
447
+ * Without this re-apply, the cooldown intent of the recovery
448
+ * decision would be silently dropped post-rollback. We want the
449
+ * NEXT iteration to see the constrained tool schema, which is the
450
+ * whole point of cooldown_with_rollback.
451
+ *
452
+ * Also re-promotes the stage to 'cooldown' so subsequent calls
453
+ * within the same turn don't re-trigger the same recovery
454
+ * (monotonic stage discipline preserved).
455
+ *
456
+ * No-op when disabled.
457
+ */
458
+ reapplyCooldown(toolName) {
459
+ if (!this.enabled)
460
+ return;
461
+ this.cooledDownTools.set(toolName, this.cooldownIters);
462
+ if (this.stage === 'none' || this.stage === 'hinted') {
463
+ this.stage = 'cooldown';
464
+ }
465
+ }
466
+ /**
467
+ * Internal: capture the current mutable state into an immutable
468
+ * snapshot suitable for embedding in a Checkpoint. Deep-clones
469
+ * Maps + Sets; arrays are shallow-cloned because the items are
470
+ * treated as immutable downstream.
471
+ */
472
+ captureInternalSnapshot() {
473
+ return {
474
+ stage: this.stage,
475
+ consecName: { ...this.consecName },
476
+ consecSignature: { ...this.consecSignature },
477
+ consecFailed: { ...this.consecFailed },
478
+ cooledDownTools: [...this.cooledDownTools.entries()].map(([k, v]) => [k, v]),
479
+ toolCalls: [...this.toolCalls],
480
+ successfulTools: [...this.successfulTools],
481
+ recoveryEvents: [...this.recoveryEvents],
482
+ verifications: [...this.verifications],
483
+ classifications: [...this.classifications],
484
+ };
485
+ }
486
+ // ── Diagnostic snapshot ────────────────────────────────────────────────
487
+ /** Diagnostic snapshot for tests + future debug surfacing. Pure read. */
488
+ getDiagnosticSnapshot() {
489
+ return {
490
+ enabled: this.enabled,
491
+ stage: this.stage,
492
+ consecName: { ...this.consecName },
493
+ consecSignature: { ...this.consecSignature },
494
+ consecFailed: { ...this.consecFailed },
495
+ cooledDownTools: [...this.cooledDownTools.entries()].map(([name, iterationsRemaining]) => ({ name, iterationsRemaining })),
496
+ toolCalls: [...this.toolCalls],
497
+ successfulTools: [...this.successfulTools],
498
+ recoveryEvents: [...this.recoveryEvents],
499
+ verifications: [...this.verifications],
500
+ classifications: [...this.classifications],
501
+ thresholds: {
502
+ hintConsec: this.hintConsec,
503
+ cooldownConsec: this.cooldownConsec,
504
+ surfaceConsec: this.surfaceConsec,
505
+ cooldownIters: this.cooldownIters,
506
+ failedConsec: this.failedConsec,
507
+ },
508
+ };
509
+ }
510
+ /** Build the structured-failure surface card for the chat layer. */
511
+ buildSurfaceCard(loopingTool, count) {
512
+ const canStillItems = [];
513
+ for (const t of this.successfulTools) {
514
+ if (t === loopingTool)
515
+ continue;
516
+ canStillItems.push(`Reuse \`${t}\` (called earlier this turn)`);
517
+ }
518
+ if (canStillItems.length === 0) {
519
+ canStillItems.push('Try a different approach without this tool');
520
+ }
521
+ return {
522
+ title: `Stuck on repeated tool calls`,
523
+ canStill: canStillItems,
524
+ cannotReliably: [
525
+ `Call \`${loopingTool}\` again this turn — fired ${count}× consecutively without making progress`,
526
+ ],
527
+ fix: `Rephrase the request to be more specific about which tool/result you want, ` +
528
+ `or try a different angle (e.g. ask for a concrete output rather than discovery).`,
529
+ };
530
+ }
531
+ }
532
+ exports.TurnState = TurnState;
533
+ // ── Internal helpers ────────────────────────────────────────────────────────
534
+ /**
535
+ * Stable, canonical hash of tool arguments. Sorts object keys
536
+ * recursively so `{a:1, b:2}` and `{b:2, a:1}` hash identically.
537
+ * sha256 truncated to 12 hex chars — enough collision resistance
538
+ * for the per-turn windows we operate over (~30 calls max).
539
+ *
540
+ * Throws-safe: any serialization failure (circular refs, BigInt
541
+ * values, etc.) falls back to `String(args)`. The trace stays
542
+ * informative even when the args shape is weird.
543
+ */
544
+ function canonicalArgsHash(args) {
545
+ let serialized;
546
+ try {
547
+ serialized = canonicalStringify(args);
548
+ }
549
+ catch {
550
+ serialized = String(args);
551
+ }
552
+ return node_crypto_1.default.createHash('sha256').update(serialized).digest('hex').slice(0, 12);
553
+ }
554
+ function canonicalStringify(value) {
555
+ if (value === null || value === undefined)
556
+ return 'null';
557
+ if (typeof value !== 'object')
558
+ return JSON.stringify(value);
559
+ if (Array.isArray(value)) {
560
+ return '[' + value.map(canonicalStringify).join(',') + ']';
561
+ }
562
+ const obj = value;
563
+ const keys = Object.keys(obj).sort();
564
+ return '{' + keys.map((k) => JSON.stringify(k) + ':' + canonicalStringify(obj[k])).join(',') + '}';
565
+ }
566
+ function buildHintMessage(toolName, count) {
567
+ return (`[tce] You've called \`${toolName}\` ${count} times in a row with the same arguments. ` +
568
+ `This looks like a loop. Reconsider your approach — try a different tool, change the arguments, ` +
569
+ `or answer with what you know if no tool will make progress.`);
570
+ }
571
+ /**
572
+ * v4.2 Phase 1 — verifier-driven hint. Different framing from the
573
+ * signature-based hint: this one says "your call is failing" rather
574
+ * than "your call is repeating", which is the more accurate diagnosis
575
+ * when the failure streak triggers.
576
+ */
577
+ function buildFailedHintMessage(toolName, count, verification) {
578
+ const reason = verification?.reason ? ` Latest reason: "${verification.reason}".` : '';
579
+ const suggestion = verification?.suggestion ? ` ${verification.suggestion}` : '';
580
+ return (`[tce] \`${toolName}\` has failed ${count} times in a row.${reason} ` +
581
+ `Stop retrying it unchanged — change the arguments, switch to a different tool, ` +
582
+ `or answer with what you have if no tool can make progress.${suggestion}`);
583
+ }
584
+ function buildCooldownMessage(toolName, cooldownIters) {
585
+ return (`[tce] \`${toolName}\` is now disabled for the next ${cooldownIters} iteration(s) because it's been ` +
586
+ `called repeatedly without making progress. Use a different tool or answer with what you have.`);
587
+ }