aiden-runtime 4.1.5 → 4.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +250 -847
- package/dist/api/server.js +32 -5
- package/dist/cli/v4/aidenCLI.js +351 -53
- package/dist/cli/v4/callbacks.js +170 -0
- package/dist/cli/v4/chatSession.js +138 -3
- package/dist/cli/v4/commands/_runtimeToggleHelpers.js +92 -0
- package/dist/cli/v4/commands/browserDepth.js +45 -0
- package/dist/cli/v4/commands/cron.js +264 -0
- package/dist/cli/v4/commands/daemon.js +541 -0
- package/dist/cli/v4/commands/daemonStatus.js +253 -0
- package/dist/cli/v4/commands/help.js +7 -0
- package/dist/cli/v4/commands/index.js +20 -1
- package/dist/cli/v4/commands/runs.js +203 -0
- package/dist/cli/v4/commands/sandbox.js +48 -0
- package/dist/cli/v4/commands/suggestions.js +68 -0
- package/dist/cli/v4/commands/tce.js +41 -0
- package/dist/cli/v4/commands/trigger.js +378 -0
- package/dist/cli/v4/commands/update.js +95 -3
- package/dist/cli/v4/daemonAgentBuilder.js +142 -0
- package/dist/cli/v4/defaultSoul.js +1 -1
- package/dist/cli/v4/display/capabilityCard.js +26 -0
- package/dist/cli/v4/display.js +18 -8
- package/dist/cli/v4/replyRenderer.js +31 -23
- package/dist/cli/v4/updateBootPrompt.js +170 -0
- package/dist/core/playwrightBridge.js +129 -0
- package/dist/core/v4/aidenAgent.js +308 -4
- package/dist/core/v4/browserState.js +436 -0
- package/dist/core/v4/checkpoint.js +79 -0
- package/dist/core/v4/daemon/bootstrap.js +604 -0
- package/dist/core/v4/daemon/cleanShutdown.js +154 -0
- package/dist/core/v4/daemon/cron/cronBridge.js +126 -0
- package/dist/core/v4/daemon/cron/cronEmitter.js +173 -0
- package/dist/core/v4/daemon/cron/migration.js +199 -0
- package/dist/core/v4/daemon/cron/misfirePolicy.js +115 -0
- package/dist/core/v4/daemon/daemonConfig.js +90 -0
- package/dist/core/v4/daemon/db/connection.js +106 -0
- package/dist/core/v4/daemon/db/migrations.js +296 -0
- package/dist/core/v4/daemon/db/schema/v1.spec.js +18 -0
- package/dist/core/v4/daemon/dispatcher/agentRunner.js +98 -0
- package/dist/core/v4/daemon/dispatcher/budgetGate.js +127 -0
- package/dist/core/v4/daemon/dispatcher/daemonApproval.js +113 -0
- package/dist/core/v4/daemon/dispatcher/dailyBudgetTracker.js +120 -0
- package/dist/core/v4/daemon/dispatcher/dispatcher.js +389 -0
- package/dist/core/v4/daemon/dispatcher/fireRateLimiter.js +113 -0
- package/dist/core/v4/daemon/dispatcher/index.js +53 -0
- package/dist/core/v4/daemon/dispatcher/promptTemplate.js +95 -0
- package/dist/core/v4/daemon/dispatcher/realAgentRunner.js +356 -0
- package/dist/core/v4/daemon/dispatcher/resolveModel.js +93 -0
- package/dist/core/v4/daemon/dispatcher/sessionId.js +93 -0
- package/dist/core/v4/daemon/drain.js +156 -0
- package/dist/core/v4/daemon/eventLoopLag.js +73 -0
- package/dist/core/v4/daemon/health.js +159 -0
- package/dist/core/v4/daemon/idempotencyStore.js +204 -0
- package/dist/core/v4/daemon/index.js +179 -0
- package/dist/core/v4/daemon/instanceTracker.js +99 -0
- package/dist/core/v4/daemon/resourceRegistry.js +150 -0
- package/dist/core/v4/daemon/restartCode.js +32 -0
- package/dist/core/v4/daemon/restartFailureCounter.js +77 -0
- package/dist/core/v4/daemon/runStore.js +114 -0
- package/dist/core/v4/daemon/runtimeLock.js +167 -0
- package/dist/core/v4/daemon/signals.js +50 -0
- package/dist/core/v4/daemon/supervisor.js +272 -0
- package/dist/core/v4/daemon/triggerBus.js +279 -0
- package/dist/core/v4/daemon/triggers/email/allowlist.js +70 -0
- package/dist/core/v4/daemon/triggers/email/automatedSender.js +78 -0
- package/dist/core/v4/daemon/triggers/email/bodyExtractor.js +0 -0
- package/dist/core/v4/daemon/triggers/email/emailSeenStore.js +99 -0
- package/dist/core/v4/daemon/triggers/email/emailSpec.js +107 -0
- package/dist/core/v4/daemon/triggers/email/imapConnection.js +211 -0
- package/dist/core/v4/daemon/triggers/email/index.js +332 -0
- package/dist/core/v4/daemon/triggers/email/seenUids.js +60 -0
- package/dist/core/v4/daemon/triggers/fileObservationsStore.js +93 -0
- package/dist/core/v4/daemon/triggers/fileWatcher.js +253 -0
- package/dist/core/v4/daemon/triggers/fileWatcherSpec.js +88 -0
- package/dist/core/v4/daemon/triggers/fsIdentity.js +42 -0
- package/dist/core/v4/daemon/triggers/globMatcher.js +100 -0
- package/dist/core/v4/daemon/triggers/reconcile.js +206 -0
- package/dist/core/v4/daemon/triggers/settleStat.js +81 -0
- package/dist/core/v4/daemon/triggers/webhook.js +376 -0
- package/dist/core/v4/daemon/triggers/webhookDeliveriesStore.js +109 -0
- package/dist/core/v4/daemon/triggers/webhookIdempotency.js +72 -0
- package/dist/core/v4/daemon/triggers/webhookRateLimit.js +56 -0
- package/dist/core/v4/daemon/triggers/webhookSpec.js +76 -0
- package/dist/core/v4/daemon/triggers/webhookVerifier.js +128 -0
- package/dist/core/v4/daemon/types.js +15 -0
- package/dist/core/v4/dockerSession.js +461 -0
- package/dist/core/v4/dryRun.js +117 -0
- package/dist/core/v4/failureClassifier.js +779 -0
- package/dist/core/v4/recoveryReport.js +449 -0
- package/dist/core/v4/runtimeToggles.js +187 -0
- package/dist/core/v4/sandboxConfig.js +285 -0
- package/dist/core/v4/sandboxFs.js +316 -0
- package/dist/core/v4/suggestionCatalog.js +41 -0
- package/dist/core/v4/suggestionEngine.js +210 -0
- package/dist/core/v4/toolRegistry.js +18 -0
- package/dist/core/v4/turnState.js +587 -0
- package/dist/core/v4/update/checkUpdate.js +63 -3
- package/dist/core/v4/update/installMethodDetect.js +115 -0
- package/dist/core/v4/update/registryClient.js +121 -0
- package/dist/core/v4/update/skipState.js +75 -0
- package/dist/core/v4/verifier.js +448 -0
- package/dist/core/version.js +1 -1
- package/dist/tools/v4/browser/_observer.js +224 -0
- package/dist/tools/v4/browser/browserBlocker.js +396 -0
- package/dist/tools/v4/browser/browserClick.js +18 -1
- package/dist/tools/v4/browser/browserClose.js +18 -1
- package/dist/tools/v4/browser/browserExtract.js +5 -1
- package/dist/tools/v4/browser/browserFill.js +17 -1
- package/dist/tools/v4/browser/browserGetUrl.js +5 -1
- package/dist/tools/v4/browser/browserNavigate.js +16 -1
- package/dist/tools/v4/browser/browserScreenshot.js +5 -1
- package/dist/tools/v4/browser/browserScroll.js +18 -1
- package/dist/tools/v4/browser/browserType.js +17 -1
- package/dist/tools/v4/browser/captchaCheck.js +5 -1
- package/dist/tools/v4/executeCode.js +1 -0
- package/dist/tools/v4/files/fileCopy.js +56 -2
- package/dist/tools/v4/files/fileDelete.js +38 -1
- package/dist/tools/v4/files/fileList.js +12 -1
- package/dist/tools/v4/files/fileMove.js +59 -2
- package/dist/tools/v4/files/filePatch.js +43 -1
- package/dist/tools/v4/files/fileRead.js +12 -1
- package/dist/tools/v4/files/fileWrite.js +41 -1
- package/dist/tools/v4/index.js +71 -58
- package/dist/tools/v4/memory/memoryAdd.js +14 -0
- package/dist/tools/v4/memory/memoryRemove.js +14 -0
- package/dist/tools/v4/memory/memoryReplace.js +15 -0
- package/dist/tools/v4/memory/sessionSummary.js +12 -0
- package/dist/tools/v4/process/processKill.js +19 -0
- package/dist/tools/v4/process/processList.js +1 -0
- package/dist/tools/v4/process/processLogRead.js +1 -0
- package/dist/tools/v4/process/processSpawn.js +13 -0
- package/dist/tools/v4/process/processWait.js +1 -0
- package/dist/tools/v4/sessions/recallSession.js +1 -0
- package/dist/tools/v4/sessions/sessionList.js +1 -0
- package/dist/tools/v4/sessions/sessionSearch.js +1 -0
- package/dist/tools/v4/skills/lookupToolSchema.js +2 -0
- package/dist/tools/v4/skills/skillManage.js +13 -0
- package/dist/tools/v4/skills/skillView.js +1 -0
- package/dist/tools/v4/skills/skillsList.js +1 -0
- package/dist/tools/v4/subagent/subagentFanout.js +1 -0
- package/dist/tools/v4/system/aidenSelfUpdate.js +16 -0
- package/dist/tools/v4/system/appClose.js +13 -0
- package/dist/tools/v4/system/appInput.js +13 -0
- package/dist/tools/v4/system/appLaunch.js +13 -0
- package/dist/tools/v4/system/clipboardRead.js +1 -0
- package/dist/tools/v4/system/clipboardWrite.js +14 -0
- package/dist/tools/v4/system/mediaKey.js +12 -0
- package/dist/tools/v4/system/mediaSessions.js +1 -0
- package/dist/tools/v4/system/mediaTransport.js +13 -0
- package/dist/tools/v4/system/naturalEvents.js +1 -0
- package/dist/tools/v4/system/nowPlaying.js +1 -0
- package/dist/tools/v4/system/osProcessList.js +1 -0
- package/dist/tools/v4/system/screenshot.js +1 -0
- package/dist/tools/v4/system/systemInfo.js +1 -0
- package/dist/tools/v4/system/volumeSet.js +17 -0
- package/dist/tools/v4/terminal/shellExec.js +81 -9
- package/dist/tools/v4/web/deepResearch.js +1 -0
- package/dist/tools/v4/web/openUrl.js +1 -0
- package/dist/tools/v4/web/webFetch.js +1 -0
- package/dist/tools/v4/web/webPage.js +1 -0
- package/dist/tools/v4/web/webSearch.js +1 -0
- package/dist/tools/v4/web/youtubeSearch.js +1 -0
- package/package.json +7 -1
|
@@ -42,6 +42,32 @@
|
|
|
42
42
|
*/
|
|
43
43
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
44
44
|
exports.AidenAgent = void 0;
|
|
45
|
+
// v4.1.6 spike — Task Completion Engine (TCE) per-turn loop detector
|
|
46
|
+
// + recovery controller. Default ON as of v4.2 Phase 6 — set
|
|
47
|
+
// AIDEN_TCE=0 to disable. Zero
|
|
48
|
+
// behavioral change when unset. See core/v4/turnState.ts.
|
|
49
|
+
const turnState_1 = require("./turnState");
|
|
50
|
+
// v4.2 Phase 1 — per-tool result verifier. Same TCE gate as
|
|
51
|
+
// TurnState (default ON, opt-out via AIDEN_TCE=0); classification
|
|
52
|
+
// feeds the recovery controller.
|
|
53
|
+
const verifier_1 = require("./verifier");
|
|
54
|
+
// v4.2 Phase 2 — tool-failure WHY-classifier. Runs after the verifier
|
|
55
|
+
// when verification.ok === false. Records-only; Phase 3 will act.
|
|
56
|
+
const failureClassifier_1 = require("./failureClassifier");
|
|
57
|
+
// v4.2 Phase 3 — structured RecoveryReport. Built ONLY when the
|
|
58
|
+
// recovery controller's surface stage fires (tool_loop); enriches the
|
|
59
|
+
// existing surface card with summary + category breakdown + dominant
|
|
60
|
+
// guidance. Implicitly gated by TCE being enabled (surface only
|
|
61
|
+
// reachable when TurnState is enabled — default ON as of Phase 6).
|
|
62
|
+
const recoveryReport_1 = require("./recoveryReport");
|
|
63
|
+
// v4.2 Phase 4 — checkpoint / restore. Lets the recovery controller
|
|
64
|
+
// roll conversation messages + TurnState internals back to before a
|
|
65
|
+
// looping tool started failing, so the model retries from a clean
|
|
66
|
+
// baseline. Hard-blocked on iterations containing mutating tools
|
|
67
|
+
// (never claim to undo executed side effects). All-no-op when
|
|
68
|
+
// TCE is opted out via AIDEN_TCE=0 — capture / mark / find /
|
|
69
|
+
// restore all short-circuit.
|
|
70
|
+
const checkpoint_1 = require("./checkpoint");
|
|
45
71
|
const skillEnforcement_1 = require("./agent/skillEnforcement");
|
|
46
72
|
const urlProvenance_1 = require("./agent/urlProvenance");
|
|
47
73
|
const intentPreArm_1 = require("./agent/intentPreArm");
|
|
@@ -92,6 +118,7 @@ class AidenAgent {
|
|
|
92
118
|
this.onSkillCandidate = opts.onSkillCandidate;
|
|
93
119
|
this.resolveVerifiedFlag = opts.resolveVerifiedFlag;
|
|
94
120
|
this.resolveToolset = opts.resolveToolset;
|
|
121
|
+
this.resolveMutates = opts.resolveMutates;
|
|
95
122
|
this.promptBuilder = opts.promptBuilder;
|
|
96
123
|
this.promptBuilderOptions = opts.promptBuilderOptions;
|
|
97
124
|
this.contextCompressor = opts.contextCompressor;
|
|
@@ -108,6 +135,15 @@ class AidenAgent {
|
|
|
108
135
|
this.onPromptBuilt = opts.onPromptBuilt;
|
|
109
136
|
this.onProviderRequestStart = opts.onProviderRequestStart;
|
|
110
137
|
this.lookupSkillRequiredTools = opts.lookupSkillRequiredTools;
|
|
138
|
+
// v4.5 Phase 7 — explicit sessionId. Existing access path
|
|
139
|
+
// `(this as { sessionId?: string }).sessionId` at line 751–752
|
|
140
|
+
// already reads from `this.sessionId`; setting it here keys
|
|
141
|
+
// docker / browser / TurnState per session for daemon-mode
|
|
142
|
+
// turns. Interactive REPL callers don't pass this and continue
|
|
143
|
+
// hitting the 'session' fallback.
|
|
144
|
+
if (typeof opts.sessionId === 'string' && opts.sessionId.length > 0) {
|
|
145
|
+
this.sessionId = opts.sessionId;
|
|
146
|
+
}
|
|
111
147
|
// Phase v4.1.2-slice3: optional health registry (constructor-
|
|
112
148
|
// injected per the slice3 decision tree — no singleton). When
|
|
113
149
|
// wired, the caller already plumbed trackers into each subsystem
|
|
@@ -304,7 +340,21 @@ class AidenAgent {
|
|
|
304
340
|
}
|
|
305
341
|
}
|
|
306
342
|
// 10. SkillTeacher post-loop observation + proposal.
|
|
343
|
+
//
|
|
344
|
+
// v4.1.6 Polish 2 — `handleProposal` previously ran INLINE here,
|
|
345
|
+
// awaiting `callbacks.promptUser` (an inquirer modal) before
|
|
346
|
+
// `runConversation` returned. That made the modal fire BEFORE
|
|
347
|
+
// chatSession rendered the agent's reply on screen, so users
|
|
348
|
+
// saw "Save this as a reusable skill?" pop up mid-turn — feels
|
|
349
|
+
// like an interruption.
|
|
350
|
+
//
|
|
351
|
+
// New flow: agent ONLY observes here. When a proposal needs user
|
|
352
|
+
// confirmation (tier_3_propose with a promptUser callback), the
|
|
353
|
+
// proposal is surfaced in `AidenAgentResult.skillProposal` and
|
|
354
|
+
// chatSession handles the prompt + create dance AFTER rendering
|
|
355
|
+
// the reply. Tier_4_auto still runs inline (no prompt needed).
|
|
307
356
|
let skillCreated;
|
|
357
|
+
let skillProposal;
|
|
308
358
|
if (this.skillTeacher) {
|
|
309
359
|
try {
|
|
310
360
|
const traceForTeacher = loopResult.toolCallTrace.map((entry, i) => ({
|
|
@@ -316,9 +366,20 @@ class AidenAgent {
|
|
|
316
366
|
}));
|
|
317
367
|
const proposal = await this.skillTeacher.observeTurn(history, traceForTeacher, loopResult.finishReason !== 'stop');
|
|
318
368
|
if (proposal) {
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
369
|
+
// Defer to chatSession only when there's a prompt callback
|
|
370
|
+
// wired (tier_3_propose path). Otherwise run inline to
|
|
371
|
+
// preserve tier_4_auto and tier_off behaviour.
|
|
372
|
+
const hasPromptCallback = typeof this.skillTeacherCallbacks?.promptUser === 'function';
|
|
373
|
+
if (hasPromptCallback) {
|
|
374
|
+
// Surface the proposal back to chatSession; do NOT call
|
|
375
|
+
// handleProposal here.
|
|
376
|
+
skillProposal = proposal;
|
|
377
|
+
}
|
|
378
|
+
else {
|
|
379
|
+
const result = await this.skillTeacher.handleProposal(proposal, this.skillTeacherCallbacks);
|
|
380
|
+
if (result.created && result.skillName) {
|
|
381
|
+
skillCreated = result.skillName;
|
|
382
|
+
}
|
|
322
383
|
}
|
|
323
384
|
}
|
|
324
385
|
}
|
|
@@ -369,11 +430,20 @@ class AidenAgent {
|
|
|
369
430
|
toolCallTrace: loopResult.toolCallTrace,
|
|
370
431
|
honestyFindings,
|
|
371
432
|
skillCreated,
|
|
433
|
+
// v4.1.6 Polish 2 — deferred to chatSession's post-render
|
|
434
|
+
// handler when the SkillTeacher proposal needs user
|
|
435
|
+
// confirmation. Undefined when no proposal, when tier auto-
|
|
436
|
+
// handled inline, or when the teacher's observation faulted.
|
|
437
|
+
skillProposal,
|
|
372
438
|
compressionEvents: this.compressionEvents,
|
|
373
439
|
auxiliaryUsage: this.auxiliaryClient?.getUsage() ?? {},
|
|
374
440
|
skillEnforcement: { ...this.skillEnforcementMetrics },
|
|
375
441
|
urlProvenance: { ...this.urlProvenanceMetrics },
|
|
376
442
|
emptyResponse: { ...this.emptyResponseMetrics },
|
|
443
|
+
// v4.1.6 spike (TCE) — surfaced when TurnState hit the surface
|
|
444
|
+
// threshold mid-turn. chatSession reads this to render the
|
|
445
|
+
// structured-failure card; undefined on all other finishReasons.
|
|
446
|
+
toolLoopCard: loopResult.toolLoopCard,
|
|
377
447
|
};
|
|
378
448
|
}
|
|
379
449
|
// ── Private helpers ──────────────────────────────────────────────────
|
|
@@ -465,6 +535,10 @@ class AidenAgent {
|
|
|
465
535
|
// off the same entry index.
|
|
466
536
|
const fullTrace = [];
|
|
467
537
|
const totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
538
|
+
// v4.2 Phase 3 — turn start timestamp for RecoveryReport duration.
|
|
539
|
+
// Captured here so any code path (early-return / error / surface)
|
|
540
|
+
// can compute wallclock duration consistently.
|
|
541
|
+
const turnStartedAt = Date.now();
|
|
468
542
|
let turnCount = 0;
|
|
469
543
|
let toolCallCount = 0;
|
|
470
544
|
let fallbackActivated = false;
|
|
@@ -473,7 +547,27 @@ class AidenAgent {
|
|
|
473
547
|
let emptyRetriesUsed = 0;
|
|
474
548
|
let finishReason = 'stop';
|
|
475
549
|
let finalContent = '';
|
|
550
|
+
// v4.1.6 spike (TCE) — per-turn loop detection + recovery state.
|
|
551
|
+
// Default ON as of v4.2 Phase 6 — set AIDEN_TCE=0 to disable.
|
|
552
|
+
// When disabled, TurnState.recordToolCall short-circuits with
|
|
553
|
+
// `{kind: 'allow'}` and the entire v4.2 recovery surface stays
|
|
554
|
+
// dormant (zero behavioural change vs v4.1.6).
|
|
555
|
+
const turnState = new turnState_1.TurnState();
|
|
556
|
+
// v4.2 Phase 1 — per-tool verifier registry. Constructed
|
|
557
|
+
// unconditionally (cheap, no side effects) but only used to
|
|
558
|
+
// classify tool outcomes when TCE is enabled; verification args
|
|
559
|
+
// are passed to TurnState only inside the gated branch below.
|
|
560
|
+
const verifierRegistry = (0, verifier_1.buildDefaultRegistry)();
|
|
561
|
+
// v4.2 Phase 2 — per-tool failure classifier. Same gating as
|
|
562
|
+
// the verifier; only runs when verification.ok === false. Phase 2
|
|
563
|
+
// records-only — Phase 3 wires recovery actions off the category.
|
|
564
|
+
const failureClassifier = (0, failureClassifier_1.buildDefaultClassifier)();
|
|
565
|
+
let toolLoopCard = undefined;
|
|
476
566
|
while (true) {
|
|
567
|
+
// v4.1.6 spike — decrement cooldown counters once per iteration
|
|
568
|
+
// so cooled-down tools eventually return to the schemas. No-op
|
|
569
|
+
// when TCE is disabled.
|
|
570
|
+
turnState.advanceIteration();
|
|
477
571
|
if (turnCount >= this.maxTurns) {
|
|
478
572
|
finishReason = 'budget_exhausted';
|
|
479
573
|
break;
|
|
@@ -491,9 +585,22 @@ class AidenAgent {
|
|
|
491
585
|
this.onBudgetWarning?.('warning', turnCount, this.maxTurns);
|
|
492
586
|
}
|
|
493
587
|
// ── Provider call (stream or non-stream) ──────────────────────────
|
|
588
|
+
//
|
|
589
|
+
// v4.1.6 spike (TCE) — filter cooled-down tools out of the
|
|
590
|
+
// schemas we send to the provider. The model literally cannot
|
|
591
|
+
// see (and therefore cannot request) a cooled-down tool until
|
|
592
|
+
// its cooldown counter decrements to zero via
|
|
593
|
+
// `turnState.advanceIteration()`. No-op when TCE disabled
|
|
594
|
+
// (`getCooledDownTools()` returns []).
|
|
595
|
+
let effectiveTools = tools;
|
|
596
|
+
const cooledDown = turnState.getCooledDownTools();
|
|
597
|
+
if (cooledDown.length > 0) {
|
|
598
|
+
const cdSet = new Set(cooledDown);
|
|
599
|
+
effectiveTools = tools.filter((t) => !cdSet.has(t.name));
|
|
600
|
+
}
|
|
494
601
|
let output;
|
|
495
602
|
try {
|
|
496
|
-
output = await this.callProvider(messages,
|
|
603
|
+
output = await this.callProvider(messages, effectiveTools, runOptions);
|
|
497
604
|
}
|
|
498
605
|
catch (err) {
|
|
499
606
|
const error = err instanceof Error ? err : new Error(String(err));
|
|
@@ -511,6 +618,25 @@ class AidenAgent {
|
|
|
511
618
|
}
|
|
512
619
|
totalUsage.inputTokens += output.usage?.inputTokens ?? 0;
|
|
513
620
|
totalUsage.outputTokens += output.usage?.outputTokens ?? 0;
|
|
621
|
+
// v4.2 Phase 4 — capture the state going INTO this iteration's
|
|
622
|
+
// tool dispatch. MUST run BEFORE `messages.push(assistantMsg)`
|
|
623
|
+
// so the checkpoint represents "the conversation before the
|
|
624
|
+
// model decided to call this iteration's tools". If rollback
|
|
625
|
+
// fires later, truncating `messages.length` to
|
|
626
|
+
// `checkpoint.messages.length` drops the assistant tool_call
|
|
627
|
+
// message together with its tool result messages — preserving
|
|
628
|
+
// tool_call/tool_result pairing in the rolled-back state.
|
|
629
|
+
//
|
|
630
|
+
// Capturing AFTER the assistant push (the prior placement) was
|
|
631
|
+
// a real bug: rollback would leave the assistant tool_call in
|
|
632
|
+
// history without its tool results, producing strict-provider
|
|
633
|
+
// 400 errors of the form "No tool output found for function
|
|
634
|
+
// call <id>". Tests in tests/v4/core/checkpoint-integration
|
|
635
|
+
// assert the post-rollback messages array contains zero orphan
|
|
636
|
+
// assistant tool_calls — this position is part of the contract.
|
|
637
|
+
//
|
|
638
|
+
// No-op when TCE is disabled (AIDEN_TCE=0) or checkpointDepth=0.
|
|
639
|
+
turnState.captureCheckpoint(messages, turnCount);
|
|
514
640
|
// ── Append assistant message ──────────────────────────────────────
|
|
515
641
|
const assistantMsg = output.toolCalls.length > 0
|
|
516
642
|
? { role: 'assistant', content: output.content ?? '', toolCalls: output.toolCalls }
|
|
@@ -585,8 +711,30 @@ class AidenAgent {
|
|
|
585
711
|
}
|
|
586
712
|
// ── Dispatch tools sequentially ──────────────────────────────────
|
|
587
713
|
const turnToolMessages = [];
|
|
714
|
+
// v4.1.6 spike (TCE) — set when TurnState surfaces a tool_loop
|
|
715
|
+
// mid-batch. The agent stops dispatching remaining calls in the
|
|
716
|
+
// batch and breaks out of the outer iteration loop cleanly.
|
|
717
|
+
let surfaceDecision = null;
|
|
718
|
+
// v4.2 Phase 4 — set when TurnState's recovery controller asks
|
|
719
|
+
// for a rollback. The agent loop truncates messages + restores
|
|
720
|
+
// TurnState internals + pushes a corrective system message,
|
|
721
|
+
// then continues the outer iteration loop from a clean baseline.
|
|
722
|
+
let rollbackDecision = null;
|
|
588
723
|
for (const call of output.toolCalls) {
|
|
589
724
|
this.onToolCall?.(call, 'before');
|
|
725
|
+
// v4.2 Phase 4 — mark any active checkpoints as containing a
|
|
726
|
+
// mutating call BEFORE dispatch. Done pre-dispatch (not post)
|
|
727
|
+
// so that even if the tool throws / errors / produces a
|
|
728
|
+
// partial side effect, the mutation flag is set — rollback
|
|
729
|
+
// safety errs on the side of "this iteration mutated state".
|
|
730
|
+
// The mutability resolver is wired from the CLI's tool
|
|
731
|
+
// registry (`resolveMutates`); unknown tools return undefined,
|
|
732
|
+
// which we treat as non-mutating (leave the flag alone).
|
|
733
|
+
// Plugin authors should declare `mutates` honestly on their
|
|
734
|
+
// tool handlers — this is the structural enforcement point.
|
|
735
|
+
if (turnState.isEnabled() && this.resolveMutates?.(call.name) === true) {
|
|
736
|
+
turnState.markMutationOnLiveCheckpoint(call.name);
|
|
737
|
+
}
|
|
590
738
|
let result;
|
|
591
739
|
try {
|
|
592
740
|
result = await this.toolExecutor(call);
|
|
@@ -600,11 +748,46 @@ class AidenAgent {
|
|
|
600
748
|
};
|
|
601
749
|
}
|
|
602
750
|
toolCallCount += 1;
|
|
751
|
+
// v4.2 Phase 1 — verifier classification. Runs only when TCE
|
|
752
|
+
// is enabled; the registry resolves a per-tool verifier or
|
|
753
|
+
// falls back to the heuristic default. Synchronous + pure;
|
|
754
|
+
// no network, no side effects.
|
|
755
|
+
let verification;
|
|
756
|
+
let classification = null;
|
|
757
|
+
if (turnState.isEnabled()) {
|
|
758
|
+
try {
|
|
759
|
+
verification = verifierRegistry.resolve(call.name)(call.name, call.arguments, result);
|
|
760
|
+
}
|
|
761
|
+
catch {
|
|
762
|
+
// Defensive — a buggy verifier never breaks the agent loop.
|
|
763
|
+
verification = undefined;
|
|
764
|
+
}
|
|
765
|
+
// v4.2 Phase 2 — classify WHY when the verifier said !ok.
|
|
766
|
+
// classify(...) returns null for ok results, so happy-path
|
|
767
|
+
// calls incur zero classifier work.
|
|
768
|
+
if (verification && !verification.ok) {
|
|
769
|
+
try {
|
|
770
|
+
classification = failureClassifier.classify(verification, call.name, call.arguments, result);
|
|
771
|
+
}
|
|
772
|
+
catch {
|
|
773
|
+
// Defensive — a buggy classifier never breaks the loop.
|
|
774
|
+
classification = null;
|
|
775
|
+
}
|
|
776
|
+
}
|
|
777
|
+
}
|
|
603
778
|
toolCallTrace.push({
|
|
604
779
|
name: call.name,
|
|
605
780
|
result: result.result,
|
|
606
781
|
error: result.error,
|
|
607
782
|
verified: this.resolveVerifiedFlag?.(result),
|
|
783
|
+
// v4.2 Phase 1 — verification surfaces alongside the trace
|
|
784
|
+
// entry for downstream callers (chatSession, loopTrace,
|
|
785
|
+
// future RecoveryReport). Undefined when TCE is off.
|
|
786
|
+
verification,
|
|
787
|
+
// v4.2 Phase 2 — classification surfaces alongside verification.
|
|
788
|
+
// Undefined for verifier-ok calls (classifier skips them) and
|
|
789
|
+
// when TCE is off.
|
|
790
|
+
classification: classification ?? undefined,
|
|
608
791
|
});
|
|
609
792
|
fullTrace.push({ name: call.name, args: call.arguments });
|
|
610
793
|
// URL ledger ingest — extracts ids from result body for next turn.
|
|
@@ -623,6 +806,126 @@ class AidenAgent {
|
|
|
623
806
|
? `[error] ${result.error}`
|
|
624
807
|
: stringifyToolResult(result.result),
|
|
625
808
|
});
|
|
809
|
+
// v4.1.6 spike (TCE) — after the tool result lands in the
|
|
810
|
+
// message history, consult the recovery controller. Returns
|
|
811
|
+
// `allow` immediately when TCE disabled (zero overhead).
|
|
812
|
+
// v4.2 Phase 1 — pass the verifier outcome so TurnState's
|
|
813
|
+
// consecFailed counter can fast-fail on demonstrably failing
|
|
814
|
+
// tool calls before the slower signature/name counters fire.
|
|
815
|
+
// v4.2 Phase 2 — also pass the classification so TurnState
|
|
816
|
+
// records the WHY for Phase 3's RecoveryReport.
|
|
817
|
+
const recovery = turnState.recordToolCall(call.name, call.arguments, verification, classification);
|
|
818
|
+
if (recovery.kind === 'hint' && recovery.hintMessage) {
|
|
819
|
+
// Stage 1: append a corrective system message so the model
|
|
820
|
+
// sees it on the next provider call. Same pattern as the
|
|
821
|
+
// existing skill-enforcement + URL-provenance correctives.
|
|
822
|
+
turnToolMessages.push({
|
|
823
|
+
role: 'system',
|
|
824
|
+
content: recovery.hintMessage,
|
|
825
|
+
});
|
|
826
|
+
}
|
|
827
|
+
else if (recovery.kind === 'cooldown_with_rollback' && recovery.rollback) {
|
|
828
|
+
// v4.2 Phase 4 — controller asks us to roll back. Capture
|
|
829
|
+
// the decision; we apply it AFTER the inner dispatch loop
|
|
830
|
+
// exits so we don't leave partial turnToolMessages in a
|
|
831
|
+
// half-state. Break out of dispatch immediately — no point
|
|
832
|
+
// running more tools whose results we're about to drop.
|
|
833
|
+
rollbackDecision = recovery;
|
|
834
|
+
break;
|
|
835
|
+
}
|
|
836
|
+
else if (recovery.kind === 'cooldown' && recovery.cooldownMessage) {
|
|
837
|
+
// Stage 2: cooldown has already been recorded internally
|
|
838
|
+
// (next iteration's schema-filter step excludes this tool).
|
|
839
|
+
// Inject a system message announcing the cooldown so the
|
|
840
|
+
// model knows why the tool just disappeared from its menu.
|
|
841
|
+
turnToolMessages.push({
|
|
842
|
+
role: 'system',
|
|
843
|
+
content: recovery.cooldownMessage,
|
|
844
|
+
});
|
|
845
|
+
}
|
|
846
|
+
else if (recovery.kind === 'surface' && recovery.surfaceCard) {
|
|
847
|
+
// Stage 3: structured failure. Stop dispatching the rest of
|
|
848
|
+
// the batch — anything else is throwing good budget after
|
|
849
|
+
// bad. The outer loop reads `surfaceDecision` below and
|
|
850
|
+
// exits cleanly.
|
|
851
|
+
surfaceDecision = recovery;
|
|
852
|
+
break;
|
|
853
|
+
}
|
|
854
|
+
}
|
|
855
|
+
// v4.2 Phase 4 — apply rollback if the controller asked for it.
|
|
856
|
+
// Truncate messages to the captured snapshot length, restore
|
|
857
|
+
// TurnState internals, then push a corrective system message
|
|
858
|
+
// and continue the OUTER iteration loop. We deliberately drop
|
|
859
|
+
// any partial `turnToolMessages` collected before the rollback
|
|
860
|
+
// trigger — those are the noise we're trying to undo.
|
|
861
|
+
//
|
|
862
|
+
// Hard-block invariant: TurnState only emits
|
|
863
|
+
// `cooldown_with_rollback` when the target checkpoint has
|
|
864
|
+
// `containedMutations === false`, so we never get here for an
|
|
865
|
+
// iteration that ran a mutating tool. The optional
|
|
866
|
+
// `rollback.blockedBy` is empty in Phase 4 (kept on the type
|
|
867
|
+
// for a Phase 5+ soft-rollback variant).
|
|
868
|
+
if (rollbackDecision && rollbackDecision.rollback) {
|
|
869
|
+
const { checkpoint, blockedBy } = rollbackDecision.rollback;
|
|
870
|
+
// Truncate messages array to the captured length. The captured
|
|
871
|
+
// items are immutable Message references; we keep them as-is
|
|
872
|
+
// and just shorten the live array.
|
|
873
|
+
messages.length = checkpoint.messages.length;
|
|
874
|
+
// Restore TurnState mutable internals (stage / streaks /
|
|
875
|
+
// cooledDownTools / arrays). The cooled-down tools map is
|
|
876
|
+
// preserved as it was at checkpoint time — but the controller
|
|
877
|
+
// already added the looping tool to `cooledDownTools` before
|
|
878
|
+
// emitting the decision, so we need to RE-apply that cooldown
|
|
879
|
+
// after restore to honour the cooldown intent.
|
|
880
|
+
turnState.restoreInternalsFrom(checkpoint);
|
|
881
|
+
// Re-cool the tool that triggered the rollback so the next
|
|
882
|
+
// provider call sees the constrained schema.
|
|
883
|
+
if (rollbackDecision.toolName) {
|
|
884
|
+
turnState.reapplyCooldown(rollbackDecision.toolName);
|
|
885
|
+
}
|
|
886
|
+
// Inject corrective system message so the model sees what
|
|
887
|
+
// happened and why the tool just disappeared from its menu.
|
|
888
|
+
messages.push({
|
|
889
|
+
role: 'system',
|
|
890
|
+
content: (0, checkpoint_1.buildRollbackMessage)({
|
|
891
|
+
iteration: checkpoint.iteration,
|
|
892
|
+
toolName: rollbackDecision.toolName,
|
|
893
|
+
blockedBy,
|
|
894
|
+
}),
|
|
895
|
+
});
|
|
896
|
+
// Continue the outer iteration loop from the restored
|
|
897
|
+
// baseline. The next provider call gets the filtered tool
|
|
898
|
+
// schema (cooldown applied) and the corrective message.
|
|
899
|
+
continue;
|
|
900
|
+
}
|
|
901
|
+
// v4.1.6 spike (TCE) — terminal surface handling.
|
|
902
|
+
if (surfaceDecision && surfaceDecision.kind === 'surface') {
|
|
903
|
+
finishReason = 'tool_loop';
|
|
904
|
+
// v4.2 Phase 3 — enrich the base surface card with a
|
|
905
|
+
// structured RecoveryReport. Pure synthesis from TurnState's
|
|
906
|
+
// diagnostic snapshot + first-user-message goal + duration.
|
|
907
|
+
// Implicit gating: this branch is only reachable when
|
|
908
|
+
// TurnState is enabled, so AIDEN_TCE=0 (opt-out) never
|
|
909
|
+
// builds a report.
|
|
910
|
+
if (surfaceDecision.surfaceCard) {
|
|
911
|
+
const report = (0, recoveryReport_1.buildRecoveryReport)({
|
|
912
|
+
snapshot: turnState.getDiagnosticSnapshot(),
|
|
913
|
+
goal: (0, recoveryReport_1.extractGoal)(messages),
|
|
914
|
+
exitReason: 'tool_loop',
|
|
915
|
+
durationMs: Date.now() - turnStartedAt,
|
|
916
|
+
});
|
|
917
|
+
toolLoopCard = (0, recoveryReport_1.enrichCardWithReport)(surfaceDecision.surfaceCard, report);
|
|
918
|
+
}
|
|
919
|
+
else {
|
|
920
|
+
toolLoopCard = surfaceDecision.surfaceCard;
|
|
921
|
+
}
|
|
922
|
+
// Push the partial tool messages we collected so honesty +
|
|
923
|
+
// history downstream see the full sequence including the
|
|
924
|
+
// loop-trigger call. No final assistant message — the
|
|
925
|
+
// tool_loop card IS the user-facing surface.
|
|
926
|
+
messages.push(...turnToolMessages);
|
|
927
|
+
finalContent = '';
|
|
928
|
+
break;
|
|
626
929
|
}
|
|
627
930
|
// ── Iteration-budget injection on the LAST tool message ──────────
|
|
628
931
|
if (this.iterationBudgetInjection && turnToolMessages.length > 0) {
|
|
@@ -645,6 +948,7 @@ class AidenAgent {
|
|
|
645
948
|
totalUsage,
|
|
646
949
|
toolCallTrace,
|
|
647
950
|
fullTrace,
|
|
951
|
+
toolLoopCard,
|
|
648
952
|
};
|
|
649
953
|
}
|
|
650
954
|
/**
|