aiden-runtime 4.1.5 → 4.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +250 -847
- package/dist/api/server.js +32 -5
- package/dist/cli/v4/aidenCLI.js +351 -53
- package/dist/cli/v4/callbacks.js +170 -0
- package/dist/cli/v4/chatSession.js +138 -3
- package/dist/cli/v4/commands/_runtimeToggleHelpers.js +92 -0
- package/dist/cli/v4/commands/browserDepth.js +45 -0
- package/dist/cli/v4/commands/cron.js +264 -0
- package/dist/cli/v4/commands/daemon.js +541 -0
- package/dist/cli/v4/commands/daemonStatus.js +253 -0
- package/dist/cli/v4/commands/help.js +7 -0
- package/dist/cli/v4/commands/index.js +20 -1
- package/dist/cli/v4/commands/runs.js +203 -0
- package/dist/cli/v4/commands/sandbox.js +48 -0
- package/dist/cli/v4/commands/suggestions.js +68 -0
- package/dist/cli/v4/commands/tce.js +41 -0
- package/dist/cli/v4/commands/trigger.js +378 -0
- package/dist/cli/v4/commands/update.js +95 -3
- package/dist/cli/v4/daemonAgentBuilder.js +142 -0
- package/dist/cli/v4/defaultSoul.js +1 -1
- package/dist/cli/v4/display/capabilityCard.js +26 -0
- package/dist/cli/v4/display.js +18 -8
- package/dist/cli/v4/replyRenderer.js +31 -23
- package/dist/cli/v4/updateBootPrompt.js +170 -0
- package/dist/core/playwrightBridge.js +129 -0
- package/dist/core/v4/aidenAgent.js +308 -4
- package/dist/core/v4/browserState.js +436 -0
- package/dist/core/v4/checkpoint.js +79 -0
- package/dist/core/v4/daemon/bootstrap.js +604 -0
- package/dist/core/v4/daemon/cleanShutdown.js +154 -0
- package/dist/core/v4/daemon/cron/cronBridge.js +126 -0
- package/dist/core/v4/daemon/cron/cronEmitter.js +173 -0
- package/dist/core/v4/daemon/cron/migration.js +199 -0
- package/dist/core/v4/daemon/cron/misfirePolicy.js +115 -0
- package/dist/core/v4/daemon/daemonConfig.js +90 -0
- package/dist/core/v4/daemon/db/connection.js +106 -0
- package/dist/core/v4/daemon/db/migrations.js +296 -0
- package/dist/core/v4/daemon/db/schema/v1.spec.js +18 -0
- package/dist/core/v4/daemon/dispatcher/agentRunner.js +98 -0
- package/dist/core/v4/daemon/dispatcher/budgetGate.js +127 -0
- package/dist/core/v4/daemon/dispatcher/daemonApproval.js +113 -0
- package/dist/core/v4/daemon/dispatcher/dailyBudgetTracker.js +120 -0
- package/dist/core/v4/daemon/dispatcher/dispatcher.js +389 -0
- package/dist/core/v4/daemon/dispatcher/fireRateLimiter.js +113 -0
- package/dist/core/v4/daemon/dispatcher/index.js +53 -0
- package/dist/core/v4/daemon/dispatcher/promptTemplate.js +95 -0
- package/dist/core/v4/daemon/dispatcher/realAgentRunner.js +356 -0
- package/dist/core/v4/daemon/dispatcher/resolveModel.js +93 -0
- package/dist/core/v4/daemon/dispatcher/sessionId.js +93 -0
- package/dist/core/v4/daemon/drain.js +156 -0
- package/dist/core/v4/daemon/eventLoopLag.js +73 -0
- package/dist/core/v4/daemon/health.js +159 -0
- package/dist/core/v4/daemon/idempotencyStore.js +204 -0
- package/dist/core/v4/daemon/index.js +179 -0
- package/dist/core/v4/daemon/instanceTracker.js +99 -0
- package/dist/core/v4/daemon/resourceRegistry.js +150 -0
- package/dist/core/v4/daemon/restartCode.js +32 -0
- package/dist/core/v4/daemon/restartFailureCounter.js +77 -0
- package/dist/core/v4/daemon/runStore.js +114 -0
- package/dist/core/v4/daemon/runtimeLock.js +167 -0
- package/dist/core/v4/daemon/signals.js +50 -0
- package/dist/core/v4/daemon/supervisor.js +272 -0
- package/dist/core/v4/daemon/triggerBus.js +279 -0
- package/dist/core/v4/daemon/triggers/email/allowlist.js +70 -0
- package/dist/core/v4/daemon/triggers/email/automatedSender.js +78 -0
- package/dist/core/v4/daemon/triggers/email/bodyExtractor.js +0 -0
- package/dist/core/v4/daemon/triggers/email/emailSeenStore.js +99 -0
- package/dist/core/v4/daemon/triggers/email/emailSpec.js +107 -0
- package/dist/core/v4/daemon/triggers/email/imapConnection.js +211 -0
- package/dist/core/v4/daemon/triggers/email/index.js +332 -0
- package/dist/core/v4/daemon/triggers/email/seenUids.js +60 -0
- package/dist/core/v4/daemon/triggers/fileObservationsStore.js +93 -0
- package/dist/core/v4/daemon/triggers/fileWatcher.js +253 -0
- package/dist/core/v4/daemon/triggers/fileWatcherSpec.js +88 -0
- package/dist/core/v4/daemon/triggers/fsIdentity.js +42 -0
- package/dist/core/v4/daemon/triggers/globMatcher.js +100 -0
- package/dist/core/v4/daemon/triggers/reconcile.js +206 -0
- package/dist/core/v4/daemon/triggers/settleStat.js +81 -0
- package/dist/core/v4/daemon/triggers/webhook.js +376 -0
- package/dist/core/v4/daemon/triggers/webhookDeliveriesStore.js +109 -0
- package/dist/core/v4/daemon/triggers/webhookIdempotency.js +72 -0
- package/dist/core/v4/daemon/triggers/webhookRateLimit.js +56 -0
- package/dist/core/v4/daemon/triggers/webhookSpec.js +76 -0
- package/dist/core/v4/daemon/triggers/webhookVerifier.js +128 -0
- package/dist/core/v4/daemon/types.js +15 -0
- package/dist/core/v4/dockerSession.js +461 -0
- package/dist/core/v4/dryRun.js +117 -0
- package/dist/core/v4/failureClassifier.js +779 -0
- package/dist/core/v4/recoveryReport.js +449 -0
- package/dist/core/v4/runtimeToggles.js +187 -0
- package/dist/core/v4/sandboxConfig.js +285 -0
- package/dist/core/v4/sandboxFs.js +316 -0
- package/dist/core/v4/suggestionCatalog.js +41 -0
- package/dist/core/v4/suggestionEngine.js +210 -0
- package/dist/core/v4/toolRegistry.js +18 -0
- package/dist/core/v4/turnState.js +587 -0
- package/dist/core/v4/update/checkUpdate.js +63 -3
- package/dist/core/v4/update/installMethodDetect.js +115 -0
- package/dist/core/v4/update/registryClient.js +121 -0
- package/dist/core/v4/update/skipState.js +75 -0
- package/dist/core/v4/verifier.js +448 -0
- package/dist/core/version.js +1 -1
- package/dist/tools/v4/browser/_observer.js +224 -0
- package/dist/tools/v4/browser/browserBlocker.js +396 -0
- package/dist/tools/v4/browser/browserClick.js +18 -1
- package/dist/tools/v4/browser/browserClose.js +18 -1
- package/dist/tools/v4/browser/browserExtract.js +5 -1
- package/dist/tools/v4/browser/browserFill.js +17 -1
- package/dist/tools/v4/browser/browserGetUrl.js +5 -1
- package/dist/tools/v4/browser/browserNavigate.js +16 -1
- package/dist/tools/v4/browser/browserScreenshot.js +5 -1
- package/dist/tools/v4/browser/browserScroll.js +18 -1
- package/dist/tools/v4/browser/browserType.js +17 -1
- package/dist/tools/v4/browser/captchaCheck.js +5 -1
- package/dist/tools/v4/executeCode.js +1 -0
- package/dist/tools/v4/files/fileCopy.js +56 -2
- package/dist/tools/v4/files/fileDelete.js +38 -1
- package/dist/tools/v4/files/fileList.js +12 -1
- package/dist/tools/v4/files/fileMove.js +59 -2
- package/dist/tools/v4/files/filePatch.js +43 -1
- package/dist/tools/v4/files/fileRead.js +12 -1
- package/dist/tools/v4/files/fileWrite.js +41 -1
- package/dist/tools/v4/index.js +71 -58
- package/dist/tools/v4/memory/memoryAdd.js +14 -0
- package/dist/tools/v4/memory/memoryRemove.js +14 -0
- package/dist/tools/v4/memory/memoryReplace.js +15 -0
- package/dist/tools/v4/memory/sessionSummary.js +12 -0
- package/dist/tools/v4/process/processKill.js +19 -0
- package/dist/tools/v4/process/processList.js +1 -0
- package/dist/tools/v4/process/processLogRead.js +1 -0
- package/dist/tools/v4/process/processSpawn.js +13 -0
- package/dist/tools/v4/process/processWait.js +1 -0
- package/dist/tools/v4/sessions/recallSession.js +1 -0
- package/dist/tools/v4/sessions/sessionList.js +1 -0
- package/dist/tools/v4/sessions/sessionSearch.js +1 -0
- package/dist/tools/v4/skills/lookupToolSchema.js +2 -0
- package/dist/tools/v4/skills/skillManage.js +13 -0
- package/dist/tools/v4/skills/skillView.js +1 -0
- package/dist/tools/v4/skills/skillsList.js +1 -0
- package/dist/tools/v4/subagent/subagentFanout.js +1 -0
- package/dist/tools/v4/system/aidenSelfUpdate.js +16 -0
- package/dist/tools/v4/system/appClose.js +13 -0
- package/dist/tools/v4/system/appInput.js +13 -0
- package/dist/tools/v4/system/appLaunch.js +13 -0
- package/dist/tools/v4/system/clipboardRead.js +1 -0
- package/dist/tools/v4/system/clipboardWrite.js +14 -0
- package/dist/tools/v4/system/mediaKey.js +12 -0
- package/dist/tools/v4/system/mediaSessions.js +1 -0
- package/dist/tools/v4/system/mediaTransport.js +13 -0
- package/dist/tools/v4/system/naturalEvents.js +1 -0
- package/dist/tools/v4/system/nowPlaying.js +1 -0
- package/dist/tools/v4/system/osProcessList.js +1 -0
- package/dist/tools/v4/system/screenshot.js +1 -0
- package/dist/tools/v4/system/systemInfo.js +1 -0
- package/dist/tools/v4/system/volumeSet.js +17 -0
- package/dist/tools/v4/terminal/shellExec.js +81 -9
- package/dist/tools/v4/web/deepResearch.js +1 -0
- package/dist/tools/v4/web/openUrl.js +1 -0
- package/dist/tools/v4/web/webFetch.js +1 -0
- package/dist/tools/v4/web/webPage.js +1 -0
- package/dist/tools/v4/web/webSearch.js +1 -0
- package/dist/tools/v4/web/youtubeSearch.js +1 -0
- package/package.json +7 -1
|
@@ -0,0 +1,587 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Copyright (c) 2026 Shiva Deore (Taracod).
|
|
4
|
+
* Licensed under AGPL-3.0. See LICENSE for details.
|
|
5
|
+
*
|
|
6
|
+
* Aiden — local-first agent.
|
|
7
|
+
*/
|
|
8
|
+
/**
|
|
9
|
+
* core/v4/turnState.ts — v4.1.6 spike: Task Completion Engine (TCE)
|
|
10
|
+
* loop detection + recovery controller.
|
|
11
|
+
*
|
|
12
|
+
* One TurnState instance lives per `runConversation` call. **Default
|
|
13
|
+
* ON** as of v4.2 Phase 6 — set `AIDEN_TCE=0` to disable. Zero
|
|
14
|
+
* behavioral change vs v4.1.6 when disabled.
|
|
15
|
+
*
|
|
16
|
+
* Concept: per-turn state object that the agent loop consults after
|
|
17
|
+
* each tool dispatch. Tracks how often the model is repeating itself
|
|
18
|
+
* — both at the precise-call level (same tool name + identical args)
|
|
19
|
+
* AND at the same-tool-name level (any args). Returns a typed
|
|
20
|
+
* recovery decision so the agent loop can act on it.
|
|
21
|
+
*
|
|
22
|
+
* Two counters by design (the layered-budget pattern):
|
|
23
|
+
*
|
|
24
|
+
* - `consecSignature`: same name + same args-hash run length.
|
|
25
|
+
* Resets when EITHER name or args change. Catches precise loops
|
|
26
|
+
* where the model literally repeats the identical call.
|
|
27
|
+
*
|
|
28
|
+
* - `consecName`: same tool name run length (any args).
|
|
29
|
+
* Resets only when the tool name changes. Catches broader
|
|
30
|
+
* "fishing" patterns where the model probes a tool with
|
|
31
|
+
* different args repeatedly without making progress.
|
|
32
|
+
*
|
|
33
|
+
* Hint stage uses signature counting (precise — fires only on
|
|
34
|
+
* genuine identical-call loops; not on legitimate skill exploration
|
|
35
|
+
* via `skill_view` with different names). Cooldown + surface use
|
|
36
|
+
* name counting (broader — catches the reported 30-skill_view
|
|
37
|
+
* failure mode regardless of args).
|
|
38
|
+
*
|
|
39
|
+
* Three escalating recovery stages, monotonic (once hinted, can
|
|
40
|
+
* escalate to cooldown then surface; never re-fires the same stage):
|
|
41
|
+
*
|
|
42
|
+
* Stage 1 — HINT (signature ≥ 5): inject `role: 'system'` message
|
|
43
|
+
* into the conversation suggesting the model reconsider.
|
|
44
|
+
*
|
|
45
|
+
* Stage 2 — COOLDOWN (name ≥ 8): mark the tool cooled-down for N
|
|
46
|
+
* iterations. Agent filters the tool out of the schemas passed
|
|
47
|
+
* to the provider, so the model literally cannot call it.
|
|
48
|
+
*
|
|
49
|
+
* Stage 3 — SURFACE (name ≥ 11): return a structured-failure card.
|
|
50
|
+
* Agent ends the turn cleanly via `finishReason = 'tool_loop'`;
|
|
51
|
+
* chatSession renders a capability-card-style failure surface.
|
|
52
|
+
*
|
|
53
|
+
* Thresholds are tunable via constructor options. Pure module — no
|
|
54
|
+
* Display dependency, no event-emitter side effects. Safe to import
|
|
55
|
+
* from anywhere in the codebase.
|
|
56
|
+
*/
|
|
57
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
58
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
59
|
+
};
|
|
60
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
61
|
+
exports.TurnState = void 0;
|
|
62
|
+
const node_crypto_1 = __importDefault(require("node:crypto"));
|
|
63
|
+
// ── Implementation ──────────────────────────────────────────────────────────
|
|
64
|
+
class TurnState {
|
|
65
|
+
constructor(opts = {}) {
|
|
66
|
+
this.stage = 'none';
|
|
67
|
+
this.toolCalls = [];
|
|
68
|
+
this.successfulTools = new Set();
|
|
69
|
+
// Layered streak tracking — see module docstring for rationale.
|
|
70
|
+
this.consecName = { name: null, count: 0 };
|
|
71
|
+
this.consecSignature = { signature: null, count: 0 };
|
|
72
|
+
/**
|
|
73
|
+
* v4.2 Phase 1 — verifier-driven failure streak. Resets on tool
|
|
74
|
+
* name change OR on a verified-ok call. Independent of the other
|
|
75
|
+
* two streaks because a failing tool isn't necessarily called with
|
|
76
|
+
* identical args (model often varies args between retries).
|
|
77
|
+
*/
|
|
78
|
+
this.consecFailed = { name: null, count: 0 };
|
|
79
|
+
this.cooledDownTools = new Map();
|
|
80
|
+
this.recoveryEvents = [];
|
|
81
|
+
/**
|
|
82
|
+
* v4.2 Phase 1 — append-only verifier log, parallel to `toolCalls`.
|
|
83
|
+
* Only entries whose `recordToolCall(...)` was given a verification
|
|
84
|
+
* argument land here; this keeps the array semantically clean for
|
|
85
|
+
* downstream callers (no `undefined` placeholders).
|
|
86
|
+
*/
|
|
87
|
+
this.verifications = [];
|
|
88
|
+
/**
|
|
89
|
+
* v4.2 Phase 2 — append-only classification log. Only populated
|
|
90
|
+
* when a classifier was supplied to `recordToolCall(...)` AND the
|
|
91
|
+
* verifier marked the call as `!ok`. Semantically clean — no
|
|
92
|
+
* `undefined` placeholders for ok calls.
|
|
93
|
+
*/
|
|
94
|
+
this.classifications = [];
|
|
95
|
+
/**
|
|
96
|
+
* v4.2 Phase 4 — ring buffer of per-iteration checkpoints. Newest
|
|
97
|
+
* at the tail. Length is bounded by `checkpointDepth`; older
|
|
98
|
+
* entries are dropped from the head when capacity is exceeded.
|
|
99
|
+
* The "live" checkpoint (the one capturing the current iteration's
|
|
100
|
+
* mutation flag) is always `checkpoints[checkpoints.length - 1]`.
|
|
101
|
+
*/
|
|
102
|
+
this.checkpoints = [];
|
|
103
|
+
// v4.2 Phase 6 — TCE is ON by default. Strict `'0'` opt-out
|
|
104
|
+
// semantic: env var must be literally the string `'0'` to
|
|
105
|
+
// disable; everything else (unset, `'1'`, empty string, junk)
|
|
106
|
+
// enables. The opts.enabled override still wins when explicitly
|
|
107
|
+
// passed by callers (test fixtures, embedded usage).
|
|
108
|
+
// v4.5 Phase 8a — route through the runtimeToggles singleton so
|
|
109
|
+
// /tce slash-command flips and config.yaml overrides take effect
|
|
110
|
+
// on the next constructed TurnState. The explicit opts.enabled
|
|
111
|
+
// override still wins for test fixtures + embedded usage.
|
|
112
|
+
if (typeof opts.enabled === 'boolean') {
|
|
113
|
+
this.enabled = opts.enabled;
|
|
114
|
+
}
|
|
115
|
+
else {
|
|
116
|
+
try {
|
|
117
|
+
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
|
118
|
+
const rt = require('./runtimeToggles');
|
|
119
|
+
this.enabled = rt.getRuntimeToggles().isEnabled('tce');
|
|
120
|
+
}
|
|
121
|
+
catch {
|
|
122
|
+
// runtimeToggles unavailable (rare — circular import or test
|
|
123
|
+
// harness without core/v4 wired). Fall back to direct env read.
|
|
124
|
+
this.enabled = process.env.AIDEN_TCE !== '0';
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
this.hintConsec = opts.hintConsecThreshold ?? 5;
|
|
128
|
+
this.cooldownConsec = opts.cooldownConsecThreshold ?? 8;
|
|
129
|
+
this.surfaceConsec = opts.surfaceConsecThreshold ?? 11;
|
|
130
|
+
this.cooldownIters = opts.cooldownIterations ?? 3;
|
|
131
|
+
this.failedConsec = opts.failedConsecThreshold ?? 3;
|
|
132
|
+
// checkpointDepth = 0 disables the buffer entirely (useful for
|
|
133
|
+
// tests that want Phase 1-3 behavior with TCE enabled). Otherwise
|
|
134
|
+
// default 3 per Q-CP2 approval.
|
|
135
|
+
this.checkpointDepth = Math.max(0, opts.checkpointDepth ?? 3);
|
|
136
|
+
}
|
|
137
|
+
isEnabled() {
|
|
138
|
+
return this.enabled;
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Called after each tool's executor resolves. Updates the streak
|
|
142
|
+
* counters, decides which recovery action (if any) applies, and
|
|
143
|
+
* returns the decision for the agent loop to act on.
|
|
144
|
+
*
|
|
145
|
+
* When `enabled === false`, returns `{kind: 'allow'}` immediately
|
|
146
|
+
* without any state mutation — guarantees zero behavioral change
|
|
147
|
+
* when TCE is opted out via `AIDEN_TCE=0`.
|
|
148
|
+
*
|
|
149
|
+
* v4.2 Phase 1 — optional `verification` argument lets the verifier
|
|
150
|
+
* layer feed its classification into the controller. When provided
|
|
151
|
+
* and `!verification.ok`, the `consecFailed` counter increments;
|
|
152
|
+
* when `verification.ok`, it resets. Callers that don't pass a
|
|
153
|
+
* verification get the original v4.1.6 behavior unchanged.
|
|
154
|
+
*
|
|
155
|
+
* v4.2 Phase 2 — optional `classification` argument records WHY a
|
|
156
|
+
* call failed. Phase 2 only logs it (for Phase 3's RecoveryReport
|
|
157
|
+
* to consume); no counter or recovery action fires off classification.
|
|
158
|
+
*/
|
|
159
|
+
recordToolCall(name, args, verification, classification) {
|
|
160
|
+
if (!this.enabled) {
|
|
161
|
+
return { kind: 'allow', consecutive: 0 };
|
|
162
|
+
}
|
|
163
|
+
const argsHash = canonicalArgsHash(args);
|
|
164
|
+
const signature = `${name}::${argsHash}`;
|
|
165
|
+
const ts = Date.now();
|
|
166
|
+
this.toolCalls.push({ name, argsHash, ts });
|
|
167
|
+
// Update name streak: resets only on tool-name change.
|
|
168
|
+
if (this.consecName.name === name) {
|
|
169
|
+
this.consecName.count += 1;
|
|
170
|
+
}
|
|
171
|
+
else {
|
|
172
|
+
this.consecName = { name, count: 1 };
|
|
173
|
+
}
|
|
174
|
+
// Update signature streak: resets on EITHER name or args change.
|
|
175
|
+
if (this.consecSignature.signature === signature) {
|
|
176
|
+
this.consecSignature.count += 1;
|
|
177
|
+
}
|
|
178
|
+
else {
|
|
179
|
+
this.consecSignature = { signature, count: 1 };
|
|
180
|
+
}
|
|
181
|
+
// v4.2 Phase 1 — update verifier-driven failure streak. Reset on
|
|
182
|
+
// name change OR on a verified-ok call; increment on verified-fail.
|
|
183
|
+
// Calls without a verification leave the counter untouched (so a
|
|
184
|
+
// mid-turn migration from un-verified to verified callers doesn't
|
|
185
|
+
// produce spurious resets).
|
|
186
|
+
if (verification) {
|
|
187
|
+
this.verifications.push({ name, verification, ts });
|
|
188
|
+
if (verification.ok) {
|
|
189
|
+
this.consecFailed = { name, count: 0 };
|
|
190
|
+
}
|
|
191
|
+
else {
|
|
192
|
+
if (this.consecFailed.name === name) {
|
|
193
|
+
this.consecFailed.count += 1;
|
|
194
|
+
}
|
|
195
|
+
else {
|
|
196
|
+
this.consecFailed = { name, count: 1 };
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
else if (this.consecFailed.name !== name) {
|
|
201
|
+
// Name change with no verification — reset the failed counter
|
|
202
|
+
// to keep it semantically aligned with `consecName`.
|
|
203
|
+
this.consecFailed = { name: null, count: 0 };
|
|
204
|
+
}
|
|
205
|
+
// v4.2 Phase 2 — record-only. Classifier output lands here for
|
|
206
|
+
// Phase 3 to consume; no recovery action fires off this in Phase 2.
|
|
207
|
+
if (classification) {
|
|
208
|
+
this.classifications.push({ name, classification, ts });
|
|
209
|
+
}
|
|
210
|
+
// Track which distinct tools have run in this turn (for surface
|
|
211
|
+
// card's `canStill` list — tools the model used productively
|
|
212
|
+
// before getting stuck).
|
|
213
|
+
if (this.stage === 'none' || this.stage === 'hinted') {
|
|
214
|
+
this.successfulTools.add(name);
|
|
215
|
+
}
|
|
216
|
+
// ── Stage transition gate (monotonic) ────────────────────────────
|
|
217
|
+
// Surface (highest priority): name-streak crosses the surface
|
|
218
|
+
// threshold AND we haven't already surfaced.
|
|
219
|
+
if (this.stage !== 'surfaced' && this.consecName.count >= this.surfaceConsec) {
|
|
220
|
+
this.stage = 'surfaced';
|
|
221
|
+
const decision = {
|
|
222
|
+
kind: 'surface',
|
|
223
|
+
toolName: name,
|
|
224
|
+
consecutive: this.consecName.count,
|
|
225
|
+
surfaceCard: this.buildSurfaceCard(name, this.consecName.count),
|
|
226
|
+
};
|
|
227
|
+
this.recoveryEvents.push({ stage: 'surfaced', toolName: name, count: this.consecName.count, ts });
|
|
228
|
+
return decision;
|
|
229
|
+
}
|
|
230
|
+
// Cooldown: name-streak crosses cooldown threshold AND tool not
|
|
231
|
+
// already cooled-down AND we haven't escalated past cooldown.
|
|
232
|
+
if (this.stage !== 'surfaced' &&
|
|
233
|
+
this.consecName.count >= this.cooldownConsec &&
|
|
234
|
+
!this.cooledDownTools.has(name)) {
|
|
235
|
+
this.stage = 'cooldown';
|
|
236
|
+
this.cooledDownTools.set(name, this.cooldownIters);
|
|
237
|
+
// v4.2 Phase 4 — look for a restorable checkpoint. The cooldown
|
|
238
|
+
// stage benefits from rolling back to a clean baseline before
|
|
239
|
+
// the looping tool started failing, but ONLY when no mutating
|
|
240
|
+
// tools ran in the target iteration's window (HARD BLOCK per
|
|
241
|
+
// Q-CP3). Falls back gracefully to plain cooldown when no
|
|
242
|
+
// restorable checkpoint exists.
|
|
243
|
+
const restorable = this.findRestorableCheckpoint();
|
|
244
|
+
const baseDecision = {
|
|
245
|
+
kind: 'cooldown',
|
|
246
|
+
toolName: name,
|
|
247
|
+
consecutive: this.consecName.count,
|
|
248
|
+
cooldownMessage: buildCooldownMessage(name, this.cooldownIters),
|
|
249
|
+
};
|
|
250
|
+
this.recoveryEvents.push({ stage: 'cooldown', toolName: name, count: this.consecName.count, ts });
|
|
251
|
+
if (restorable) {
|
|
252
|
+
return {
|
|
253
|
+
...baseDecision,
|
|
254
|
+
kind: 'cooldown_with_rollback',
|
|
255
|
+
rollback: {
|
|
256
|
+
checkpoint: restorable,
|
|
257
|
+
blockedBy: [], // hard block means we only return checkpoints with zero mutations
|
|
258
|
+
},
|
|
259
|
+
};
|
|
260
|
+
}
|
|
261
|
+
return baseDecision;
|
|
262
|
+
}
|
|
263
|
+
// v4.2 Phase 1 — verifier-driven HINT. Fires faster than the
|
|
264
|
+
// signature-based hint when the verifier flags consecutive
|
|
265
|
+
// failures. Distinct hint message so the model sees a different
|
|
266
|
+
// corrective signal ("you're failing" vs "you're repeating").
|
|
267
|
+
if (this.stage === 'none' &&
|
|
268
|
+
this.consecFailed.name === name &&
|
|
269
|
+
this.consecFailed.count >= this.failedConsec) {
|
|
270
|
+
this.stage = 'hinted';
|
|
271
|
+
const decision = {
|
|
272
|
+
kind: 'hint',
|
|
273
|
+
toolName: name,
|
|
274
|
+
consecutive: this.consecFailed.count,
|
|
275
|
+
hintMessage: buildFailedHintMessage(name, this.consecFailed.count, verification),
|
|
276
|
+
};
|
|
277
|
+
this.recoveryEvents.push({ stage: 'hinted', toolName: name, count: this.consecFailed.count, ts });
|
|
278
|
+
return decision;
|
|
279
|
+
}
|
|
280
|
+
// Hint: signature-streak (precise) crosses hint threshold AND
|
|
281
|
+
// we're still in the `none` stage. Use signature here to avoid
|
|
282
|
+
// false-positives on legitimate skill exploration (different
|
|
283
|
+
// skill names through `skill_view` shouldn't trigger).
|
|
284
|
+
if (this.stage === 'none' && this.consecSignature.count >= this.hintConsec) {
|
|
285
|
+
this.stage = 'hinted';
|
|
286
|
+
const decision = {
|
|
287
|
+
kind: 'hint',
|
|
288
|
+
toolName: name,
|
|
289
|
+
consecutive: this.consecSignature.count,
|
|
290
|
+
hintMessage: buildHintMessage(name, this.consecSignature.count),
|
|
291
|
+
};
|
|
292
|
+
this.recoveryEvents.push({ stage: 'hinted', toolName: name, count: this.consecSignature.count, ts });
|
|
293
|
+
return decision;
|
|
294
|
+
}
|
|
295
|
+
return { kind: 'allow', consecutive: this.consecName.count };
|
|
296
|
+
}
|
|
297
|
+
/**
|
|
298
|
+
* Tools currently cooled-down. Agent filters these out of the
|
|
299
|
+
* tool schemas passed to the next provider call so the model
|
|
300
|
+
* literally cannot request them.
|
|
301
|
+
*/
|
|
302
|
+
getCooledDownTools() {
|
|
303
|
+
if (!this.enabled)
|
|
304
|
+
return [];
|
|
305
|
+
return [...this.cooledDownTools.keys()];
|
|
306
|
+
}
|
|
307
|
+
/**
|
|
308
|
+
* Called once per agent loop iteration. Decrements each cooled-
|
|
309
|
+
* down tool's remaining-iteration counter; drops tools that have
|
|
310
|
+
* served their cooldown. No-op when disabled.
|
|
311
|
+
*/
|
|
312
|
+
advanceIteration() {
|
|
313
|
+
if (!this.enabled)
|
|
314
|
+
return;
|
|
315
|
+
for (const [name, remaining] of this.cooledDownTools.entries()) {
|
|
316
|
+
if (remaining <= 1) {
|
|
317
|
+
this.cooledDownTools.delete(name);
|
|
318
|
+
}
|
|
319
|
+
else {
|
|
320
|
+
this.cooledDownTools.set(name, remaining - 1);
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
// ── Phase 4 — checkpoint / restore API ─────────────────────────────────
|
|
325
|
+
/**
|
|
326
|
+
* Capture the state going INTO an iteration's tool dispatch. Called
|
|
327
|
+
* by the agent loop after the assistant message is pushed but
|
|
328
|
+
* before the for-each-tool dispatch loop begins. The captured
|
|
329
|
+
* `messages` argument is shallow-cloned (item references shared;
|
|
330
|
+
* the array reference is new — items are treated as immutable
|
|
331
|
+
* Message objects downstream).
|
|
332
|
+
*
|
|
333
|
+
* No-op when TCE is disabled (opt-out via `AIDEN_TCE=0`) OR when
|
|
334
|
+
* `checkpointDepth === 0`.
|
|
335
|
+
* Ring-buffer rolls over once depth is exceeded.
|
|
336
|
+
*/
|
|
337
|
+
captureCheckpoint(messages, iteration) {
|
|
338
|
+
if (!this.enabled || this.checkpointDepth === 0)
|
|
339
|
+
return;
|
|
340
|
+
const checkpoint = {
|
|
341
|
+
iteration,
|
|
342
|
+
ts: Date.now(),
|
|
343
|
+
messages: [...messages],
|
|
344
|
+
turnStateSnapshot: this.captureInternalSnapshot(),
|
|
345
|
+
containedMutations: false,
|
|
346
|
+
mutatingToolsSinceCheckpoint: [],
|
|
347
|
+
};
|
|
348
|
+
this.checkpoints.push(checkpoint);
|
|
349
|
+
while (this.checkpoints.length > this.checkpointDepth) {
|
|
350
|
+
this.checkpoints.shift();
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
/**
|
|
354
|
+
* Flag the LIVE checkpoint (the most recently captured one) as
|
|
355
|
+
* having seen a mutating tool dispatch. Called by the agent loop
|
|
356
|
+
* just before dispatching any tool with `ToolHandler.mutates ===
|
|
357
|
+
* true`. Sets `containedMutations` on the live checkpoint AND on
|
|
358
|
+
* every older checkpoint that's still in the ring buffer — those
|
|
359
|
+
* older checkpoints would otherwise be eligible for rollback even
|
|
360
|
+
* though the iterations between them contained mutating tools.
|
|
361
|
+
*
|
|
362
|
+
* No-op when disabled or when the ring buffer is empty.
|
|
363
|
+
*/
|
|
364
|
+
markMutationOnLiveCheckpoint(toolName) {
|
|
365
|
+
if (!this.enabled || this.checkpoints.length === 0)
|
|
366
|
+
return;
|
|
367
|
+
// Mark every checkpoint currently in the buffer — rolling back to
|
|
368
|
+
// ANY of them would require un-doing this mutation.
|
|
369
|
+
for (const cp of this.checkpoints) {
|
|
370
|
+
if (!cp.containedMutations) {
|
|
371
|
+
// Re-assign with mutated copy; Checkpoint fields are typed
|
|
372
|
+
// readonly on the public type but we own them internally.
|
|
373
|
+
cp.containedMutations = true;
|
|
374
|
+
}
|
|
375
|
+
const mutating = cp.mutatingToolsSinceCheckpoint;
|
|
376
|
+
if (!mutating.includes(toolName)) {
|
|
377
|
+
mutating.push(toolName);
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
/**
|
|
382
|
+
* Find the most recent checkpoint that's safe to roll back to. A
|
|
383
|
+
* checkpoint is safe when `containedMutations === false` — no
|
|
384
|
+
* mutating tool has run since it was captured. Returns null when
|
|
385
|
+
* no such checkpoint exists (caller falls back to plain cooldown
|
|
386
|
+
* per Q-CP3 hard block).
|
|
387
|
+
*
|
|
388
|
+
* Walks the ring buffer from newest to oldest; the first restorable
|
|
389
|
+
* checkpoint is returned. Disabled / empty buffer → null.
|
|
390
|
+
*/
|
|
391
|
+
findRestorableCheckpoint() {
|
|
392
|
+
if (!this.enabled || this.checkpoints.length === 0)
|
|
393
|
+
return null;
|
|
394
|
+
for (let i = this.checkpoints.length - 1; i >= 0; i -= 1) {
|
|
395
|
+
const cp = this.checkpoints[i];
|
|
396
|
+
if (!cp.containedMutations)
|
|
397
|
+
return cp;
|
|
398
|
+
}
|
|
399
|
+
return null;
|
|
400
|
+
}
|
|
401
|
+
/**
|
|
402
|
+
* Restore TurnState internals from a previously-captured checkpoint.
|
|
403
|
+
* The caller is responsible for truncating the messages array to
|
|
404
|
+
* `checkpoint.messages.length`. After restore, the ring buffer is
|
|
405
|
+
* trimmed to remove the checkpoint AND every newer entry — those
|
|
406
|
+
* captures correspond to iterations that no longer happened from
|
|
407
|
+
* the controller's perspective.
|
|
408
|
+
*
|
|
409
|
+
* No-op when disabled. Safe to call with a checkpoint that's no
|
|
410
|
+
* longer in the buffer (e.g. dropped by the ring rollover) — the
|
|
411
|
+
* snapshot data is still valid; only the buffer-trimming step is
|
|
412
|
+
* skipped.
|
|
413
|
+
*/
|
|
414
|
+
restoreInternalsFrom(checkpoint) {
|
|
415
|
+
if (!this.enabled)
|
|
416
|
+
return;
|
|
417
|
+
const snap = checkpoint.turnStateSnapshot;
|
|
418
|
+
this.stage = snap.stage;
|
|
419
|
+
this.consecName = { ...snap.consecName };
|
|
420
|
+
this.consecSignature = { ...snap.consecSignature };
|
|
421
|
+
this.consecFailed = { ...snap.consecFailed };
|
|
422
|
+
this.cooledDownTools = new Map(snap.cooledDownTools.map(([k, v]) => [k, v]));
|
|
423
|
+
this.toolCalls = [...snap.toolCalls];
|
|
424
|
+
this.successfulTools = new Set(snap.successfulTools);
|
|
425
|
+
this.recoveryEvents = [...snap.recoveryEvents];
|
|
426
|
+
this.verifications = [...snap.verifications];
|
|
427
|
+
this.classifications = [...snap.classifications];
|
|
428
|
+
// Trim the ring buffer to remove `checkpoint` and everything newer.
|
|
429
|
+
const idx = this.checkpoints.indexOf(checkpoint);
|
|
430
|
+
if (idx >= 0) {
|
|
431
|
+
this.checkpoints = this.checkpoints.slice(0, idx);
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
/**
|
|
435
|
+
* Read-only view of the live ring buffer. Public for tests + future
|
|
436
|
+
* diagnostic surfaces. Returns a fresh array; mutation is harmless.
|
|
437
|
+
*/
|
|
438
|
+
getCheckpoints() {
|
|
439
|
+
return [...this.checkpoints];
|
|
440
|
+
}
|
|
441
|
+
/**
|
|
442
|
+
* v4.2 Phase 4 — re-apply a cooldown after a rollback. Called by
|
|
443
|
+
* the agent loop AFTER `restoreInternalsFrom`, because restore
|
|
444
|
+
* replaces `cooledDownTools` with the checkpoint's snapshot (which
|
|
445
|
+
* was captured BEFORE the cooldown decision was emitted).
|
|
446
|
+
*
|
|
447
|
+
* Without this re-apply, the cooldown intent of the recovery
|
|
448
|
+
* decision would be silently dropped post-rollback. We want the
|
|
449
|
+
* NEXT iteration to see the constrained tool schema, which is the
|
|
450
|
+
* whole point of cooldown_with_rollback.
|
|
451
|
+
*
|
|
452
|
+
* Also re-promotes the stage to 'cooldown' so subsequent calls
|
|
453
|
+
* within the same turn don't re-trigger the same recovery
|
|
454
|
+
* (monotonic stage discipline preserved).
|
|
455
|
+
*
|
|
456
|
+
* No-op when disabled.
|
|
457
|
+
*/
|
|
458
|
+
reapplyCooldown(toolName) {
|
|
459
|
+
if (!this.enabled)
|
|
460
|
+
return;
|
|
461
|
+
this.cooledDownTools.set(toolName, this.cooldownIters);
|
|
462
|
+
if (this.stage === 'none' || this.stage === 'hinted') {
|
|
463
|
+
this.stage = 'cooldown';
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
/**
|
|
467
|
+
* Internal: capture the current mutable state into an immutable
|
|
468
|
+
* snapshot suitable for embedding in a Checkpoint. Deep-clones
|
|
469
|
+
* Maps + Sets; arrays are shallow-cloned because the items are
|
|
470
|
+
* treated as immutable downstream.
|
|
471
|
+
*/
|
|
472
|
+
captureInternalSnapshot() {
|
|
473
|
+
return {
|
|
474
|
+
stage: this.stage,
|
|
475
|
+
consecName: { ...this.consecName },
|
|
476
|
+
consecSignature: { ...this.consecSignature },
|
|
477
|
+
consecFailed: { ...this.consecFailed },
|
|
478
|
+
cooledDownTools: [...this.cooledDownTools.entries()].map(([k, v]) => [k, v]),
|
|
479
|
+
toolCalls: [...this.toolCalls],
|
|
480
|
+
successfulTools: [...this.successfulTools],
|
|
481
|
+
recoveryEvents: [...this.recoveryEvents],
|
|
482
|
+
verifications: [...this.verifications],
|
|
483
|
+
classifications: [...this.classifications],
|
|
484
|
+
};
|
|
485
|
+
}
|
|
486
|
+
// ── Diagnostic snapshot ────────────────────────────────────────────────
|
|
487
|
+
/** Diagnostic snapshot for tests + future debug surfacing. Pure read. */
|
|
488
|
+
getDiagnosticSnapshot() {
|
|
489
|
+
return {
|
|
490
|
+
enabled: this.enabled,
|
|
491
|
+
stage: this.stage,
|
|
492
|
+
consecName: { ...this.consecName },
|
|
493
|
+
consecSignature: { ...this.consecSignature },
|
|
494
|
+
consecFailed: { ...this.consecFailed },
|
|
495
|
+
cooledDownTools: [...this.cooledDownTools.entries()].map(([name, iterationsRemaining]) => ({ name, iterationsRemaining })),
|
|
496
|
+
toolCalls: [...this.toolCalls],
|
|
497
|
+
successfulTools: [...this.successfulTools],
|
|
498
|
+
recoveryEvents: [...this.recoveryEvents],
|
|
499
|
+
verifications: [...this.verifications],
|
|
500
|
+
classifications: [...this.classifications],
|
|
501
|
+
thresholds: {
|
|
502
|
+
hintConsec: this.hintConsec,
|
|
503
|
+
cooldownConsec: this.cooldownConsec,
|
|
504
|
+
surfaceConsec: this.surfaceConsec,
|
|
505
|
+
cooldownIters: this.cooldownIters,
|
|
506
|
+
failedConsec: this.failedConsec,
|
|
507
|
+
},
|
|
508
|
+
};
|
|
509
|
+
}
|
|
510
|
+
/** Build the structured-failure surface card for the chat layer. */
|
|
511
|
+
buildSurfaceCard(loopingTool, count) {
|
|
512
|
+
const canStillItems = [];
|
|
513
|
+
for (const t of this.successfulTools) {
|
|
514
|
+
if (t === loopingTool)
|
|
515
|
+
continue;
|
|
516
|
+
canStillItems.push(`Reuse \`${t}\` (called earlier this turn)`);
|
|
517
|
+
}
|
|
518
|
+
if (canStillItems.length === 0) {
|
|
519
|
+
canStillItems.push('Try a different approach without this tool');
|
|
520
|
+
}
|
|
521
|
+
return {
|
|
522
|
+
title: `Stuck on repeated tool calls`,
|
|
523
|
+
canStill: canStillItems,
|
|
524
|
+
cannotReliably: [
|
|
525
|
+
`Call \`${loopingTool}\` again this turn — fired ${count}× consecutively without making progress`,
|
|
526
|
+
],
|
|
527
|
+
fix: `Rephrase the request to be more specific about which tool/result you want, ` +
|
|
528
|
+
`or try a different angle (e.g. ask for a concrete output rather than discovery).`,
|
|
529
|
+
};
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
exports.TurnState = TurnState;
|
|
533
|
+
// ── Internal helpers ────────────────────────────────────────────────────────
|
|
534
|
+
/**
|
|
535
|
+
* Stable, canonical hash of tool arguments. Sorts object keys
|
|
536
|
+
* recursively so `{a:1, b:2}` and `{b:2, a:1}` hash identically.
|
|
537
|
+
* sha256 truncated to 12 hex chars — enough collision resistance
|
|
538
|
+
* for the per-turn windows we operate over (~30 calls max).
|
|
539
|
+
*
|
|
540
|
+
* Throws-safe: any serialization failure (circular refs, BigInt
|
|
541
|
+
* values, etc.) falls back to `String(args)`. The trace stays
|
|
542
|
+
* informative even when the args shape is weird.
|
|
543
|
+
*/
|
|
544
|
+
function canonicalArgsHash(args) {
|
|
545
|
+
let serialized;
|
|
546
|
+
try {
|
|
547
|
+
serialized = canonicalStringify(args);
|
|
548
|
+
}
|
|
549
|
+
catch {
|
|
550
|
+
serialized = String(args);
|
|
551
|
+
}
|
|
552
|
+
return node_crypto_1.default.createHash('sha256').update(serialized).digest('hex').slice(0, 12);
|
|
553
|
+
}
|
|
554
|
+
function canonicalStringify(value) {
|
|
555
|
+
if (value === null || value === undefined)
|
|
556
|
+
return 'null';
|
|
557
|
+
if (typeof value !== 'object')
|
|
558
|
+
return JSON.stringify(value);
|
|
559
|
+
if (Array.isArray(value)) {
|
|
560
|
+
return '[' + value.map(canonicalStringify).join(',') + ']';
|
|
561
|
+
}
|
|
562
|
+
const obj = value;
|
|
563
|
+
const keys = Object.keys(obj).sort();
|
|
564
|
+
return '{' + keys.map((k) => JSON.stringify(k) + ':' + canonicalStringify(obj[k])).join(',') + '}';
|
|
565
|
+
}
|
|
566
|
+
function buildHintMessage(toolName, count) {
|
|
567
|
+
return (`[tce] You've called \`${toolName}\` ${count} times in a row with the same arguments. ` +
|
|
568
|
+
`This looks like a loop. Reconsider your approach — try a different tool, change the arguments, ` +
|
|
569
|
+
`or answer with what you know if no tool will make progress.`);
|
|
570
|
+
}
|
|
571
|
+
/**
|
|
572
|
+
* v4.2 Phase 1 — verifier-driven hint. Different framing from the
|
|
573
|
+
* signature-based hint: this one says "your call is failing" rather
|
|
574
|
+
* than "your call is repeating", which is the more accurate diagnosis
|
|
575
|
+
* when the failure streak triggers.
|
|
576
|
+
*/
|
|
577
|
+
function buildFailedHintMessage(toolName, count, verification) {
|
|
578
|
+
const reason = verification?.reason ? ` Latest reason: "${verification.reason}".` : '';
|
|
579
|
+
const suggestion = verification?.suggestion ? ` ${verification.suggestion}` : '';
|
|
580
|
+
return (`[tce] \`${toolName}\` has failed ${count} times in a row.${reason} ` +
|
|
581
|
+
`Stop retrying it unchanged — change the arguments, switch to a different tool, ` +
|
|
582
|
+
`or answer with what you have if no tool can make progress.${suggestion}`);
|
|
583
|
+
}
|
|
584
|
+
function buildCooldownMessage(toolName, cooldownIters) {
|
|
585
|
+
return (`[tce] \`${toolName}\` is now disabled for the next ${cooldownIters} iteration(s) because it's been ` +
|
|
586
|
+
`called repeatedly without making progress. Use a different tool or answer with what you have.`);
|
|
587
|
+
}
|