aiden-runtime 4.1.4 → 4.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +250 -847
- package/dist/api/server.js +32 -5
- package/dist/cli/v4/aidenCLI.js +379 -53
- package/dist/cli/v4/callbacks.js +248 -0
- package/dist/cli/v4/chatSession.js +292 -4
- package/dist/cli/v4/commands/_runtimeToggleHelpers.js +92 -0
- package/dist/cli/v4/commands/browserDepth.js +45 -0
- package/dist/cli/v4/commands/cron.js +264 -0
- package/dist/cli/v4/commands/daemon.js +541 -0
- package/dist/cli/v4/commands/daemonStatus.js +253 -0
- package/dist/cli/v4/commands/help.js +7 -0
- package/dist/cli/v4/commands/index.js +20 -1
- package/dist/cli/v4/commands/runs.js +203 -0
- package/dist/cli/v4/commands/sandbox.js +48 -0
- package/dist/cli/v4/commands/suggestions.js +68 -0
- package/dist/cli/v4/commands/tce.js +41 -0
- package/dist/cli/v4/commands/trigger.js +378 -0
- package/dist/cli/v4/commands/update.js +95 -3
- package/dist/cli/v4/daemonAgentBuilder.js +142 -0
- package/dist/cli/v4/defaultSoul.js +75 -3
- package/dist/cli/v4/display/capabilityCard.js +26 -0
- package/dist/cli/v4/display/progressBar.js +41 -8
- package/dist/cli/v4/display.js +258 -15
- package/dist/cli/v4/replyRenderer.js +31 -23
- package/dist/cli/v4/toolPreview.js +10 -0
- package/dist/cli/v4/updateBootPrompt.js +170 -0
- package/dist/core/playwrightBridge.js +129 -0
- package/dist/core/toolRegistry.js +7 -1
- package/dist/core/v4/aidenAgent.js +371 -4
- package/dist/core/v4/browserState.js +436 -0
- package/dist/core/v4/checkpoint.js +79 -0
- package/dist/core/v4/daemon/bootstrap.js +604 -0
- package/dist/core/v4/daemon/cleanShutdown.js +154 -0
- package/dist/core/v4/daemon/cron/cronBridge.js +126 -0
- package/dist/core/v4/daemon/cron/cronEmitter.js +173 -0
- package/dist/core/v4/daemon/cron/migration.js +199 -0
- package/dist/core/v4/daemon/cron/misfirePolicy.js +115 -0
- package/dist/core/v4/daemon/daemonConfig.js +90 -0
- package/dist/core/v4/daemon/db/connection.js +106 -0
- package/dist/core/v4/daemon/db/migrations.js +296 -0
- package/dist/core/v4/daemon/db/schema/v1.spec.js +18 -0
- package/dist/core/v4/daemon/dispatcher/agentRunner.js +98 -0
- package/dist/core/v4/daemon/dispatcher/budgetGate.js +127 -0
- package/dist/core/v4/daemon/dispatcher/daemonApproval.js +113 -0
- package/dist/core/v4/daemon/dispatcher/dailyBudgetTracker.js +120 -0
- package/dist/core/v4/daemon/dispatcher/dispatcher.js +389 -0
- package/dist/core/v4/daemon/dispatcher/fireRateLimiter.js +113 -0
- package/dist/core/v4/daemon/dispatcher/index.js +53 -0
- package/dist/core/v4/daemon/dispatcher/promptTemplate.js +95 -0
- package/dist/core/v4/daemon/dispatcher/realAgentRunner.js +356 -0
- package/dist/core/v4/daemon/dispatcher/resolveModel.js +93 -0
- package/dist/core/v4/daemon/dispatcher/sessionId.js +93 -0
- package/dist/core/v4/daemon/drain.js +156 -0
- package/dist/core/v4/daemon/eventLoopLag.js +73 -0
- package/dist/core/v4/daemon/health.js +159 -0
- package/dist/core/v4/daemon/idempotencyStore.js +204 -0
- package/dist/core/v4/daemon/index.js +179 -0
- package/dist/core/v4/daemon/instanceTracker.js +99 -0
- package/dist/core/v4/daemon/resourceRegistry.js +150 -0
- package/dist/core/v4/daemon/restartCode.js +32 -0
- package/dist/core/v4/daemon/restartFailureCounter.js +77 -0
- package/dist/core/v4/daemon/runStore.js +114 -0
- package/dist/core/v4/daemon/runtimeLock.js +167 -0
- package/dist/core/v4/daemon/signals.js +50 -0
- package/dist/core/v4/daemon/supervisor.js +272 -0
- package/dist/core/v4/daemon/triggerBus.js +279 -0
- package/dist/core/v4/daemon/triggers/email/allowlist.js +70 -0
- package/dist/core/v4/daemon/triggers/email/automatedSender.js +78 -0
- package/dist/core/v4/daemon/triggers/email/bodyExtractor.js +0 -0
- package/dist/core/v4/daemon/triggers/email/emailSeenStore.js +99 -0
- package/dist/core/v4/daemon/triggers/email/emailSpec.js +107 -0
- package/dist/core/v4/daemon/triggers/email/imapConnection.js +211 -0
- package/dist/core/v4/daemon/triggers/email/index.js +332 -0
- package/dist/core/v4/daemon/triggers/email/seenUids.js +60 -0
- package/dist/core/v4/daemon/triggers/fileObservationsStore.js +93 -0
- package/dist/core/v4/daemon/triggers/fileWatcher.js +253 -0
- package/dist/core/v4/daemon/triggers/fileWatcherSpec.js +88 -0
- package/dist/core/v4/daemon/triggers/fsIdentity.js +42 -0
- package/dist/core/v4/daemon/triggers/globMatcher.js +100 -0
- package/dist/core/v4/daemon/triggers/reconcile.js +206 -0
- package/dist/core/v4/daemon/triggers/settleStat.js +81 -0
- package/dist/core/v4/daemon/triggers/webhook.js +376 -0
- package/dist/core/v4/daemon/triggers/webhookDeliveriesStore.js +109 -0
- package/dist/core/v4/daemon/triggers/webhookIdempotency.js +72 -0
- package/dist/core/v4/daemon/triggers/webhookRateLimit.js +56 -0
- package/dist/core/v4/daemon/triggers/webhookSpec.js +76 -0
- package/dist/core/v4/daemon/triggers/webhookVerifier.js +128 -0
- package/dist/core/v4/daemon/types.js +15 -0
- package/dist/core/v4/dockerSession.js +461 -0
- package/dist/core/v4/dryRun.js +117 -0
- package/dist/core/v4/failureClassifier.js +779 -0
- package/dist/core/v4/loopTrace.js +257 -0
- package/dist/core/v4/recoveryReport.js +449 -0
- package/dist/core/v4/runtimeToggles.js +187 -0
- package/dist/core/v4/sandboxConfig.js +285 -0
- package/dist/core/v4/sandboxFs.js +316 -0
- package/dist/core/v4/suggestionCatalog.js +41 -0
- package/dist/core/v4/suggestionEngine.js +210 -0
- package/dist/core/v4/toolRegistry.js +18 -0
- package/dist/core/v4/turnState.js +587 -0
- package/dist/core/v4/update/checkUpdate.js +63 -3
- package/dist/core/v4/update/installMethodDetect.js +115 -0
- package/dist/core/v4/update/registryClient.js +121 -0
- package/dist/core/v4/update/skipState.js +75 -0
- package/dist/core/v4/verifier.js +448 -0
- package/dist/core/version.js +1 -1
- package/dist/core/webSearch.js +64 -24
- package/dist/tools/v4/browser/_observer.js +224 -0
- package/dist/tools/v4/browser/browserBlocker.js +396 -0
- package/dist/tools/v4/browser/browserClick.js +18 -1
- package/dist/tools/v4/browser/browserClose.js +18 -1
- package/dist/tools/v4/browser/browserExtract.js +5 -1
- package/dist/tools/v4/browser/browserFill.js +17 -1
- package/dist/tools/v4/browser/browserGetUrl.js +5 -1
- package/dist/tools/v4/browser/browserNavigate.js +16 -1
- package/dist/tools/v4/browser/browserScreenshot.js +5 -1
- package/dist/tools/v4/browser/browserScroll.js +18 -1
- package/dist/tools/v4/browser/browserType.js +17 -1
- package/dist/tools/v4/browser/captchaCheck.js +5 -1
- package/dist/tools/v4/executeCode.js +1 -0
- package/dist/tools/v4/files/fileCopy.js +56 -2
- package/dist/tools/v4/files/fileDelete.js +38 -1
- package/dist/tools/v4/files/fileList.js +12 -1
- package/dist/tools/v4/files/fileMove.js +59 -2
- package/dist/tools/v4/files/filePatch.js +43 -1
- package/dist/tools/v4/files/fileRead.js +12 -1
- package/dist/tools/v4/files/fileWrite.js +41 -1
- package/dist/tools/v4/index.js +71 -58
- package/dist/tools/v4/memory/memoryAdd.js +14 -0
- package/dist/tools/v4/memory/memoryRemove.js +14 -0
- package/dist/tools/v4/memory/memoryReplace.js +15 -0
- package/dist/tools/v4/memory/sessionSummary.js +12 -0
- package/dist/tools/v4/process/processKill.js +19 -0
- package/dist/tools/v4/process/processList.js +1 -0
- package/dist/tools/v4/process/processLogRead.js +1 -0
- package/dist/tools/v4/process/processSpawn.js +13 -0
- package/dist/tools/v4/process/processWait.js +1 -0
- package/dist/tools/v4/sessions/recallSession.js +1 -0
- package/dist/tools/v4/sessions/sessionList.js +1 -0
- package/dist/tools/v4/sessions/sessionSearch.js +1 -0
- package/dist/tools/v4/skills/lookupToolSchema.js +2 -0
- package/dist/tools/v4/skills/skillManage.js +13 -0
- package/dist/tools/v4/skills/skillView.js +1 -0
- package/dist/tools/v4/skills/skillsList.js +1 -0
- package/dist/tools/v4/subagent/subagentFanout.js +1 -0
- package/dist/tools/v4/system/aidenSelfUpdate.js +16 -0
- package/dist/tools/v4/system/appClose.js +13 -0
- package/dist/tools/v4/system/appInput.js +13 -0
- package/dist/tools/v4/system/appLaunch.js +13 -0
- package/dist/tools/v4/system/clipboardRead.js +1 -0
- package/dist/tools/v4/system/clipboardWrite.js +14 -0
- package/dist/tools/v4/system/mediaKey.js +12 -0
- package/dist/tools/v4/system/mediaSessions.js +1 -0
- package/dist/tools/v4/system/mediaTransport.js +13 -0
- package/dist/tools/v4/system/naturalEvents.js +1 -0
- package/dist/tools/v4/system/nowPlaying.js +1 -0
- package/dist/tools/v4/system/osProcessList.js +1 -0
- package/dist/tools/v4/system/screenshot.js +1 -0
- package/dist/tools/v4/system/systemInfo.js +1 -0
- package/dist/tools/v4/system/volumeSet.js +17 -0
- package/dist/tools/v4/terminal/shellExec.js +81 -9
- package/dist/tools/v4/web/deepResearch.js +1 -0
- package/dist/tools/v4/web/openUrl.js +1 -0
- package/dist/tools/v4/web/webFetch.js +1 -0
- package/dist/tools/v4/web/webPage.js +1 -0
- package/dist/tools/v4/web/webSearch.js +1 -0
- package/dist/tools/v4/web/youtubeSearch.js +1 -0
- package/package.json +7 -1
- package/plugins/aiden-plugin-cdp-browser/.granted-permissions.json +8 -0
|
@@ -42,6 +42,32 @@
|
|
|
42
42
|
*/
|
|
43
43
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
44
44
|
exports.AidenAgent = void 0;
|
|
45
|
+
// v4.1.6 spike — Task Completion Engine (TCE) per-turn loop detector
|
|
46
|
+
// + recovery controller. Default ON as of v4.2 Phase 6 — set
|
|
47
|
+
// AIDEN_TCE=0 to disable. Zero
|
|
48
|
+
// behavioral change when unset. See core/v4/turnState.ts.
|
|
49
|
+
const turnState_1 = require("./turnState");
|
|
50
|
+
// v4.2 Phase 1 — per-tool result verifier. Same TCE gate as
|
|
51
|
+
// TurnState (default ON, opt-out via AIDEN_TCE=0); classification
|
|
52
|
+
// feeds the recovery controller.
|
|
53
|
+
const verifier_1 = require("./verifier");
|
|
54
|
+
// v4.2 Phase 2 — tool-failure WHY-classifier. Runs after the verifier
|
|
55
|
+
// when verification.ok === false. Records-only; Phase 3 will act.
|
|
56
|
+
const failureClassifier_1 = require("./failureClassifier");
|
|
57
|
+
// v4.2 Phase 3 — structured RecoveryReport. Built ONLY when the
|
|
58
|
+
// recovery controller's surface stage fires (tool_loop); enriches the
|
|
59
|
+
// existing surface card with summary + category breakdown + dominant
|
|
60
|
+
// guidance. Implicitly gated by TCE being enabled (surface only
|
|
61
|
+
// reachable when TurnState is enabled — default ON as of Phase 6).
|
|
62
|
+
const recoveryReport_1 = require("./recoveryReport");
|
|
63
|
+
// v4.2 Phase 4 — checkpoint / restore. Lets the recovery controller
|
|
64
|
+
// roll conversation messages + TurnState internals back to before a
|
|
65
|
+
// looping tool started failing, so the model retries from a clean
|
|
66
|
+
// baseline. Hard-blocked on iterations containing mutating tools
|
|
67
|
+
// (never claim to undo executed side effects). All-no-op when
|
|
68
|
+
// TCE is opted out via AIDEN_TCE=0 — capture / mark / find /
|
|
69
|
+
// restore all short-circuit.
|
|
70
|
+
const checkpoint_1 = require("./checkpoint");
|
|
45
71
|
const skillEnforcement_1 = require("./agent/skillEnforcement");
|
|
46
72
|
const urlProvenance_1 = require("./agent/urlProvenance");
|
|
47
73
|
const intentPreArm_1 = require("./agent/intentPreArm");
|
|
@@ -92,6 +118,7 @@ class AidenAgent {
|
|
|
92
118
|
this.onSkillCandidate = opts.onSkillCandidate;
|
|
93
119
|
this.resolveVerifiedFlag = opts.resolveVerifiedFlag;
|
|
94
120
|
this.resolveToolset = opts.resolveToolset;
|
|
121
|
+
this.resolveMutates = opts.resolveMutates;
|
|
95
122
|
this.promptBuilder = opts.promptBuilder;
|
|
96
123
|
this.promptBuilderOptions = opts.promptBuilderOptions;
|
|
97
124
|
this.contextCompressor = opts.contextCompressor;
|
|
@@ -103,7 +130,20 @@ class AidenAgent {
|
|
|
103
130
|
this.onCompression = opts.onCompression;
|
|
104
131
|
this.refreshMemorySnapshot = opts.refreshMemorySnapshot;
|
|
105
132
|
this.onMemoryRefresh = opts.onMemoryRefresh;
|
|
133
|
+
// v4.1.5 Issue K — phase hooks (all optional, fire defensively).
|
|
134
|
+
this.onMemoryRefreshStart = opts.onMemoryRefreshStart;
|
|
135
|
+
this.onPromptBuilt = opts.onPromptBuilt;
|
|
136
|
+
this.onProviderRequestStart = opts.onProviderRequestStart;
|
|
106
137
|
this.lookupSkillRequiredTools = opts.lookupSkillRequiredTools;
|
|
138
|
+
// v4.5 Phase 7 — explicit sessionId. Existing access path
|
|
139
|
+
// `(this as { sessionId?: string }).sessionId` at line 751–752
|
|
140
|
+
// already reads from `this.sessionId`; setting it here keys
|
|
141
|
+
// docker / browser / TurnState per session for daemon-mode
|
|
142
|
+
// turns. Interactive REPL callers don't pass this and continue
|
|
143
|
+
// hitting the 'session' fallback.
|
|
144
|
+
if (typeof opts.sessionId === 'string' && opts.sessionId.length > 0) {
|
|
145
|
+
this.sessionId = opts.sessionId;
|
|
146
|
+
}
|
|
107
147
|
// Phase v4.1.2-slice3: optional health registry (constructor-
|
|
108
148
|
// injected per the slice3 decision tree — no singleton). When
|
|
109
149
|
// wired, the caller already plumbed trackers into each subsystem
|
|
@@ -300,7 +340,21 @@ class AidenAgent {
|
|
|
300
340
|
}
|
|
301
341
|
}
|
|
302
342
|
// 10. SkillTeacher post-loop observation + proposal.
|
|
343
|
+
//
|
|
344
|
+
// v4.1.6 Polish 2 — `handleProposal` previously ran INLINE here,
|
|
345
|
+
// awaiting `callbacks.promptUser` (an inquirer modal) before
|
|
346
|
+
// `runConversation` returned. That made the modal fire BEFORE
|
|
347
|
+
// chatSession rendered the agent's reply on screen, so users
|
|
348
|
+
// saw "Save this as a reusable skill?" pop up mid-turn — feels
|
|
349
|
+
// like an interruption.
|
|
350
|
+
//
|
|
351
|
+
// New flow: agent ONLY observes here. When a proposal needs user
|
|
352
|
+
// confirmation (tier_3_propose with a promptUser callback), the
|
|
353
|
+
// proposal is surfaced in `AidenAgentResult.skillProposal` and
|
|
354
|
+
// chatSession handles the prompt + create dance AFTER rendering
|
|
355
|
+
// the reply. Tier_4_auto still runs inline (no prompt needed).
|
|
303
356
|
let skillCreated;
|
|
357
|
+
let skillProposal;
|
|
304
358
|
if (this.skillTeacher) {
|
|
305
359
|
try {
|
|
306
360
|
const traceForTeacher = loopResult.toolCallTrace.map((entry, i) => ({
|
|
@@ -312,9 +366,20 @@ class AidenAgent {
|
|
|
312
366
|
}));
|
|
313
367
|
const proposal = await this.skillTeacher.observeTurn(history, traceForTeacher, loopResult.finishReason !== 'stop');
|
|
314
368
|
if (proposal) {
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
369
|
+
// Defer to chatSession only when there's a prompt callback
|
|
370
|
+
// wired (tier_3_propose path). Otherwise run inline to
|
|
371
|
+
// preserve tier_4_auto and tier_off behaviour.
|
|
372
|
+
const hasPromptCallback = typeof this.skillTeacherCallbacks?.promptUser === 'function';
|
|
373
|
+
if (hasPromptCallback) {
|
|
374
|
+
// Surface the proposal back to chatSession; do NOT call
|
|
375
|
+
// handleProposal here.
|
|
376
|
+
skillProposal = proposal;
|
|
377
|
+
}
|
|
378
|
+
else {
|
|
379
|
+
const result = await this.skillTeacher.handleProposal(proposal, this.skillTeacherCallbacks);
|
|
380
|
+
if (result.created && result.skillName) {
|
|
381
|
+
skillCreated = result.skillName;
|
|
382
|
+
}
|
|
318
383
|
}
|
|
319
384
|
}
|
|
320
385
|
}
|
|
@@ -365,11 +430,20 @@ class AidenAgent {
|
|
|
365
430
|
toolCallTrace: loopResult.toolCallTrace,
|
|
366
431
|
honestyFindings,
|
|
367
432
|
skillCreated,
|
|
433
|
+
// v4.1.6 Polish 2 — deferred to chatSession's post-render
|
|
434
|
+
// handler when the SkillTeacher proposal needs user
|
|
435
|
+
// confirmation. Undefined when no proposal, when tier auto-
|
|
436
|
+
// handled inline, or when the teacher's observation faulted.
|
|
437
|
+
skillProposal,
|
|
368
438
|
compressionEvents: this.compressionEvents,
|
|
369
439
|
auxiliaryUsage: this.auxiliaryClient?.getUsage() ?? {},
|
|
370
440
|
skillEnforcement: { ...this.skillEnforcementMetrics },
|
|
371
441
|
urlProvenance: { ...this.urlProvenanceMetrics },
|
|
372
442
|
emptyResponse: { ...this.emptyResponseMetrics },
|
|
443
|
+
// v4.1.6 spike (TCE) — surfaced when TurnState hit the surface
|
|
444
|
+
// threshold mid-turn. chatSession reads this to render the
|
|
445
|
+
// structured-failure card; undefined on all other finishReasons.
|
|
446
|
+
toolLoopCard: loopResult.toolLoopCard,
|
|
373
447
|
};
|
|
374
448
|
}
|
|
375
449
|
// ── Private helpers ──────────────────────────────────────────────────
|
|
@@ -386,6 +460,14 @@ class AidenAgent {
|
|
|
386
460
|
// / 'user' need a snapshot refresh first.
|
|
387
461
|
const needsSnapshot = this.memoryDirty.has('memory') || this.memoryDirty.has('user');
|
|
388
462
|
if (needsSnapshot && this.refreshMemorySnapshot) {
|
|
463
|
+
// v4.1.5 Issue K — fire BEFORE the file I/O so the display layer
|
|
464
|
+
// can switch the activity verb to "refreshing memory" while the
|
|
465
|
+
// read is in flight. Defensive try/catch so a misbehaving hook
|
|
466
|
+
// never blocks the refresh.
|
|
467
|
+
try {
|
|
468
|
+
this.onMemoryRefreshStart?.();
|
|
469
|
+
}
|
|
470
|
+
catch { /* defensive */ }
|
|
389
471
|
let snapshot;
|
|
390
472
|
try {
|
|
391
473
|
snapshot = await this.refreshMemorySnapshot();
|
|
@@ -410,6 +492,21 @@ class AidenAgent {
|
|
|
410
492
|
if (this.cachedSystemPrompt !== null)
|
|
411
493
|
return this.cachedSystemPrompt;
|
|
412
494
|
this.cachedSystemPrompt = await this.promptBuilder.build(this.promptBuilderOptions);
|
|
495
|
+
// v4.1.5 Issue K — fire AFTER the prompt has been assembled, with
|
|
496
|
+
// cardinality so the display layer can surface "preparing prompt:
|
|
497
|
+
// N tools, M skills" or similar. Only fires when the cache MISSED
|
|
498
|
+
// (which is what made us actually build); cached returns skip the
|
|
499
|
+
// hook because nothing was prepared this turn. Defensive try/catch.
|
|
500
|
+
if (this.onPromptBuilt) {
|
|
501
|
+
try {
|
|
502
|
+
this.onPromptBuilt({
|
|
503
|
+
tools: this.tools.length,
|
|
504
|
+
skills: this.promptBuilderOptions.skillsList?.length ?? 0,
|
|
505
|
+
memoryFacts: countMemoryFacts(this.promptBuilderOptions.memorySnapshot),
|
|
506
|
+
});
|
|
507
|
+
}
|
|
508
|
+
catch { /* defensive */ }
|
|
509
|
+
}
|
|
413
510
|
return this.cachedSystemPrompt;
|
|
414
511
|
}
|
|
415
512
|
async narrowTools(userMsg, history) {
|
|
@@ -438,6 +535,10 @@ class AidenAgent {
|
|
|
438
535
|
// off the same entry index.
|
|
439
536
|
const fullTrace = [];
|
|
440
537
|
const totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
538
|
+
// v4.2 Phase 3 — turn start timestamp for RecoveryReport duration.
|
|
539
|
+
// Captured here so any code path (early-return / error / surface)
|
|
540
|
+
// can compute wallclock duration consistently.
|
|
541
|
+
const turnStartedAt = Date.now();
|
|
441
542
|
let turnCount = 0;
|
|
442
543
|
let toolCallCount = 0;
|
|
443
544
|
let fallbackActivated = false;
|
|
@@ -446,7 +547,27 @@ class AidenAgent {
|
|
|
446
547
|
let emptyRetriesUsed = 0;
|
|
447
548
|
let finishReason = 'stop';
|
|
448
549
|
let finalContent = '';
|
|
550
|
+
// v4.1.6 spike (TCE) — per-turn loop detection + recovery state.
|
|
551
|
+
// Default ON as of v4.2 Phase 6 — set AIDEN_TCE=0 to disable.
|
|
552
|
+
// When disabled, TurnState.recordToolCall short-circuits with
|
|
553
|
+
// `{kind: 'allow'}` and the entire v4.2 recovery surface stays
|
|
554
|
+
// dormant (zero behavioural change vs v4.1.6).
|
|
555
|
+
const turnState = new turnState_1.TurnState();
|
|
556
|
+
// v4.2 Phase 1 — per-tool verifier registry. Constructed
|
|
557
|
+
// unconditionally (cheap, no side effects) but only used to
|
|
558
|
+
// classify tool outcomes when TCE is enabled; verification args
|
|
559
|
+
// are passed to TurnState only inside the gated branch below.
|
|
560
|
+
const verifierRegistry = (0, verifier_1.buildDefaultRegistry)();
|
|
561
|
+
// v4.2 Phase 2 — per-tool failure classifier. Same gating as
|
|
562
|
+
// the verifier; only runs when verification.ok === false. Phase 2
|
|
563
|
+
// records-only — Phase 3 wires recovery actions off the category.
|
|
564
|
+
const failureClassifier = (0, failureClassifier_1.buildDefaultClassifier)();
|
|
565
|
+
let toolLoopCard = undefined;
|
|
449
566
|
while (true) {
|
|
567
|
+
// v4.1.6 spike — decrement cooldown counters once per iteration
|
|
568
|
+
// so cooled-down tools eventually return to the schemas. No-op
|
|
569
|
+
// when TCE is disabled.
|
|
570
|
+
turnState.advanceIteration();
|
|
450
571
|
if (turnCount >= this.maxTurns) {
|
|
451
572
|
finishReason = 'budget_exhausted';
|
|
452
573
|
break;
|
|
@@ -464,9 +585,22 @@ class AidenAgent {
|
|
|
464
585
|
this.onBudgetWarning?.('warning', turnCount, this.maxTurns);
|
|
465
586
|
}
|
|
466
587
|
// ── Provider call (stream or non-stream) ──────────────────────────
|
|
588
|
+
//
|
|
589
|
+
// v4.1.6 spike (TCE) — filter cooled-down tools out of the
|
|
590
|
+
// schemas we send to the provider. The model literally cannot
|
|
591
|
+
// see (and therefore cannot request) a cooled-down tool until
|
|
592
|
+
// its cooldown counter decrements to zero via
|
|
593
|
+
// `turnState.advanceIteration()`. No-op when TCE disabled
|
|
594
|
+
// (`getCooledDownTools()` returns []).
|
|
595
|
+
let effectiveTools = tools;
|
|
596
|
+
const cooledDown = turnState.getCooledDownTools();
|
|
597
|
+
if (cooledDown.length > 0) {
|
|
598
|
+
const cdSet = new Set(cooledDown);
|
|
599
|
+
effectiveTools = tools.filter((t) => !cdSet.has(t.name));
|
|
600
|
+
}
|
|
467
601
|
let output;
|
|
468
602
|
try {
|
|
469
|
-
output = await this.callProvider(messages,
|
|
603
|
+
output = await this.callProvider(messages, effectiveTools, runOptions);
|
|
470
604
|
}
|
|
471
605
|
catch (err) {
|
|
472
606
|
const error = err instanceof Error ? err : new Error(String(err));
|
|
@@ -484,6 +618,25 @@ class AidenAgent {
|
|
|
484
618
|
}
|
|
485
619
|
totalUsage.inputTokens += output.usage?.inputTokens ?? 0;
|
|
486
620
|
totalUsage.outputTokens += output.usage?.outputTokens ?? 0;
|
|
621
|
+
// v4.2 Phase 4 — capture the state going INTO this iteration's
|
|
622
|
+
// tool dispatch. MUST run BEFORE `messages.push(assistantMsg)`
|
|
623
|
+
// so the checkpoint represents "the conversation before the
|
|
624
|
+
// model decided to call this iteration's tools". If rollback
|
|
625
|
+
// fires later, truncating `messages.length` to
|
|
626
|
+
// `checkpoint.messages.length` drops the assistant tool_call
|
|
627
|
+
// message together with its tool result messages — preserving
|
|
628
|
+
// tool_call/tool_result pairing in the rolled-back state.
|
|
629
|
+
//
|
|
630
|
+
// Capturing AFTER the assistant push (the prior placement) was
|
|
631
|
+
// a real bug: rollback would leave the assistant tool_call in
|
|
632
|
+
// history without its tool results, producing strict-provider
|
|
633
|
+
// 400 errors of the form "No tool output found for function
|
|
634
|
+
// call <id>". Tests in tests/v4/core/checkpoint-integration
|
|
635
|
+
// assert the post-rollback messages array contains zero orphan
|
|
636
|
+
// assistant tool_calls — this position is part of the contract.
|
|
637
|
+
//
|
|
638
|
+
// No-op when TCE is disabled (AIDEN_TCE=0) or checkpointDepth=0.
|
|
639
|
+
turnState.captureCheckpoint(messages, turnCount);
|
|
487
640
|
// ── Append assistant message ──────────────────────────────────────
|
|
488
641
|
const assistantMsg = output.toolCalls.length > 0
|
|
489
642
|
? { role: 'assistant', content: output.content ?? '', toolCalls: output.toolCalls }
|
|
@@ -558,8 +711,30 @@ class AidenAgent {
|
|
|
558
711
|
}
|
|
559
712
|
// ── Dispatch tools sequentially ──────────────────────────────────
|
|
560
713
|
const turnToolMessages = [];
|
|
714
|
+
// v4.1.6 spike (TCE) — set when TurnState surfaces a tool_loop
|
|
715
|
+
// mid-batch. The agent stops dispatching remaining calls in the
|
|
716
|
+
// batch and breaks out of the outer iteration loop cleanly.
|
|
717
|
+
let surfaceDecision = null;
|
|
718
|
+
// v4.2 Phase 4 — set when TurnState's recovery controller asks
|
|
719
|
+
// for a rollback. The agent loop truncates messages + restores
|
|
720
|
+
// TurnState internals + pushes a corrective system message,
|
|
721
|
+
// then continues the outer iteration loop from a clean baseline.
|
|
722
|
+
let rollbackDecision = null;
|
|
561
723
|
for (const call of output.toolCalls) {
|
|
562
724
|
this.onToolCall?.(call, 'before');
|
|
725
|
+
// v4.2 Phase 4 — mark any active checkpoints as containing a
|
|
726
|
+
// mutating call BEFORE dispatch. Done pre-dispatch (not post)
|
|
727
|
+
// so that even if the tool throws / errors / produces a
|
|
728
|
+
// partial side effect, the mutation flag is set — rollback
|
|
729
|
+
// safety errs on the side of "this iteration mutated state".
|
|
730
|
+
// The mutability resolver is wired from the CLI's tool
|
|
731
|
+
// registry (`resolveMutates`); unknown tools return undefined,
|
|
732
|
+
// which we treat as non-mutating (leave the flag alone).
|
|
733
|
+
// Plugin authors should declare `mutates` honestly on their
|
|
734
|
+
// tool handlers — this is the structural enforcement point.
|
|
735
|
+
if (turnState.isEnabled() && this.resolveMutates?.(call.name) === true) {
|
|
736
|
+
turnState.markMutationOnLiveCheckpoint(call.name);
|
|
737
|
+
}
|
|
563
738
|
let result;
|
|
564
739
|
try {
|
|
565
740
|
result = await this.toolExecutor(call);
|
|
@@ -573,11 +748,46 @@ class AidenAgent {
|
|
|
573
748
|
};
|
|
574
749
|
}
|
|
575
750
|
toolCallCount += 1;
|
|
751
|
+
// v4.2 Phase 1 — verifier classification. Runs only when TCE
|
|
752
|
+
// is enabled; the registry resolves a per-tool verifier or
|
|
753
|
+
// falls back to the heuristic default. Synchronous + pure;
|
|
754
|
+
// no network, no side effects.
|
|
755
|
+
let verification;
|
|
756
|
+
let classification = null;
|
|
757
|
+
if (turnState.isEnabled()) {
|
|
758
|
+
try {
|
|
759
|
+
verification = verifierRegistry.resolve(call.name)(call.name, call.arguments, result);
|
|
760
|
+
}
|
|
761
|
+
catch {
|
|
762
|
+
// Defensive — a buggy verifier never breaks the agent loop.
|
|
763
|
+
verification = undefined;
|
|
764
|
+
}
|
|
765
|
+
// v4.2 Phase 2 — classify WHY when the verifier said !ok.
|
|
766
|
+
// classify(...) returns null for ok results, so happy-path
|
|
767
|
+
// calls incur zero classifier work.
|
|
768
|
+
if (verification && !verification.ok) {
|
|
769
|
+
try {
|
|
770
|
+
classification = failureClassifier.classify(verification, call.name, call.arguments, result);
|
|
771
|
+
}
|
|
772
|
+
catch {
|
|
773
|
+
// Defensive — a buggy classifier never breaks the loop.
|
|
774
|
+
classification = null;
|
|
775
|
+
}
|
|
776
|
+
}
|
|
777
|
+
}
|
|
576
778
|
toolCallTrace.push({
|
|
577
779
|
name: call.name,
|
|
578
780
|
result: result.result,
|
|
579
781
|
error: result.error,
|
|
580
782
|
verified: this.resolveVerifiedFlag?.(result),
|
|
783
|
+
// v4.2 Phase 1 — verification surfaces alongside the trace
|
|
784
|
+
// entry for downstream callers (chatSession, loopTrace,
|
|
785
|
+
// future RecoveryReport). Undefined when TCE is off.
|
|
786
|
+
verification,
|
|
787
|
+
// v4.2 Phase 2 — classification surfaces alongside verification.
|
|
788
|
+
// Undefined for verifier-ok calls (classifier skips them) and
|
|
789
|
+
// when TCE is off.
|
|
790
|
+
classification: classification ?? undefined,
|
|
581
791
|
});
|
|
582
792
|
fullTrace.push({ name: call.name, args: call.arguments });
|
|
583
793
|
// URL ledger ingest — extracts ids from result body for next turn.
|
|
@@ -596,6 +806,126 @@ class AidenAgent {
|
|
|
596
806
|
? `[error] ${result.error}`
|
|
597
807
|
: stringifyToolResult(result.result),
|
|
598
808
|
});
|
|
809
|
+
// v4.1.6 spike (TCE) — after the tool result lands in the
|
|
810
|
+
// message history, consult the recovery controller. Returns
|
|
811
|
+
// `allow` immediately when TCE disabled (zero overhead).
|
|
812
|
+
// v4.2 Phase 1 — pass the verifier outcome so TurnState's
|
|
813
|
+
// consecFailed counter can fast-fail on demonstrably failing
|
|
814
|
+
// tool calls before the slower signature/name counters fire.
|
|
815
|
+
// v4.2 Phase 2 — also pass the classification so TurnState
|
|
816
|
+
// records the WHY for Phase 3's RecoveryReport.
|
|
817
|
+
const recovery = turnState.recordToolCall(call.name, call.arguments, verification, classification);
|
|
818
|
+
if (recovery.kind === 'hint' && recovery.hintMessage) {
|
|
819
|
+
// Stage 1: append a corrective system message so the model
|
|
820
|
+
// sees it on the next provider call. Same pattern as the
|
|
821
|
+
// existing skill-enforcement + URL-provenance correctives.
|
|
822
|
+
turnToolMessages.push({
|
|
823
|
+
role: 'system',
|
|
824
|
+
content: recovery.hintMessage,
|
|
825
|
+
});
|
|
826
|
+
}
|
|
827
|
+
else if (recovery.kind === 'cooldown_with_rollback' && recovery.rollback) {
|
|
828
|
+
// v4.2 Phase 4 — controller asks us to roll back. Capture
|
|
829
|
+
// the decision; we apply it AFTER the inner dispatch loop
|
|
830
|
+
// exits so we don't leave partial turnToolMessages in a
|
|
831
|
+
// half-state. Break out of dispatch immediately — no point
|
|
832
|
+
// running more tools whose results we're about to drop.
|
|
833
|
+
rollbackDecision = recovery;
|
|
834
|
+
break;
|
|
835
|
+
}
|
|
836
|
+
else if (recovery.kind === 'cooldown' && recovery.cooldownMessage) {
|
|
837
|
+
// Stage 2: cooldown has already been recorded internally
|
|
838
|
+
// (next iteration's schema-filter step excludes this tool).
|
|
839
|
+
// Inject a system message announcing the cooldown so the
|
|
840
|
+
// model knows why the tool just disappeared from its menu.
|
|
841
|
+
turnToolMessages.push({
|
|
842
|
+
role: 'system',
|
|
843
|
+
content: recovery.cooldownMessage,
|
|
844
|
+
});
|
|
845
|
+
}
|
|
846
|
+
else if (recovery.kind === 'surface' && recovery.surfaceCard) {
|
|
847
|
+
// Stage 3: structured failure. Stop dispatching the rest of
|
|
848
|
+
// the batch — anything else is throwing good budget after
|
|
849
|
+
// bad. The outer loop reads `surfaceDecision` below and
|
|
850
|
+
// exits cleanly.
|
|
851
|
+
surfaceDecision = recovery;
|
|
852
|
+
break;
|
|
853
|
+
}
|
|
854
|
+
}
|
|
855
|
+
// v4.2 Phase 4 — apply rollback if the controller asked for it.
|
|
856
|
+
// Truncate messages to the captured snapshot length, restore
|
|
857
|
+
// TurnState internals, then push a corrective system message
|
|
858
|
+
// and continue the OUTER iteration loop. We deliberately drop
|
|
859
|
+
// any partial `turnToolMessages` collected before the rollback
|
|
860
|
+
// trigger — those are the noise we're trying to undo.
|
|
861
|
+
//
|
|
862
|
+
// Hard-block invariant: TurnState only emits
|
|
863
|
+
// `cooldown_with_rollback` when the target checkpoint has
|
|
864
|
+
// `containedMutations === false`, so we never get here for an
|
|
865
|
+
// iteration that ran a mutating tool. The optional
|
|
866
|
+
// `rollback.blockedBy` is empty in Phase 4 (kept on the type
|
|
867
|
+
// for a Phase 5+ soft-rollback variant).
|
|
868
|
+
if (rollbackDecision && rollbackDecision.rollback) {
|
|
869
|
+
const { checkpoint, blockedBy } = rollbackDecision.rollback;
|
|
870
|
+
// Truncate messages array to the captured length. The captured
|
|
871
|
+
// items are immutable Message references; we keep them as-is
|
|
872
|
+
// and just shorten the live array.
|
|
873
|
+
messages.length = checkpoint.messages.length;
|
|
874
|
+
// Restore TurnState mutable internals (stage / streaks /
|
|
875
|
+
// cooledDownTools / arrays). The cooled-down tools map is
|
|
876
|
+
// preserved as it was at checkpoint time — but the controller
|
|
877
|
+
// already added the looping tool to `cooledDownTools` before
|
|
878
|
+
// emitting the decision, so we need to RE-apply that cooldown
|
|
879
|
+
// after restore to honour the cooldown intent.
|
|
880
|
+
turnState.restoreInternalsFrom(checkpoint);
|
|
881
|
+
// Re-cool the tool that triggered the rollback so the next
|
|
882
|
+
// provider call sees the constrained schema.
|
|
883
|
+
if (rollbackDecision.toolName) {
|
|
884
|
+
turnState.reapplyCooldown(rollbackDecision.toolName);
|
|
885
|
+
}
|
|
886
|
+
// Inject corrective system message so the model sees what
|
|
887
|
+
// happened and why the tool just disappeared from its menu.
|
|
888
|
+
messages.push({
|
|
889
|
+
role: 'system',
|
|
890
|
+
content: (0, checkpoint_1.buildRollbackMessage)({
|
|
891
|
+
iteration: checkpoint.iteration,
|
|
892
|
+
toolName: rollbackDecision.toolName,
|
|
893
|
+
blockedBy,
|
|
894
|
+
}),
|
|
895
|
+
});
|
|
896
|
+
// Continue the outer iteration loop from the restored
|
|
897
|
+
// baseline. The next provider call gets the filtered tool
|
|
898
|
+
// schema (cooldown applied) and the corrective message.
|
|
899
|
+
continue;
|
|
900
|
+
}
|
|
901
|
+
// v4.1.6 spike (TCE) — terminal surface handling.
|
|
902
|
+
if (surfaceDecision && surfaceDecision.kind === 'surface') {
|
|
903
|
+
finishReason = 'tool_loop';
|
|
904
|
+
// v4.2 Phase 3 — enrich the base surface card with a
|
|
905
|
+
// structured RecoveryReport. Pure synthesis from TurnState's
|
|
906
|
+
// diagnostic snapshot + first-user-message goal + duration.
|
|
907
|
+
// Implicit gating: this branch is only reachable when
|
|
908
|
+
// TurnState is enabled, so AIDEN_TCE=0 (opt-out) never
|
|
909
|
+
// builds a report.
|
|
910
|
+
if (surfaceDecision.surfaceCard) {
|
|
911
|
+
const report = (0, recoveryReport_1.buildRecoveryReport)({
|
|
912
|
+
snapshot: turnState.getDiagnosticSnapshot(),
|
|
913
|
+
goal: (0, recoveryReport_1.extractGoal)(messages),
|
|
914
|
+
exitReason: 'tool_loop',
|
|
915
|
+
durationMs: Date.now() - turnStartedAt,
|
|
916
|
+
});
|
|
917
|
+
toolLoopCard = (0, recoveryReport_1.enrichCardWithReport)(surfaceDecision.surfaceCard, report);
|
|
918
|
+
}
|
|
919
|
+
else {
|
|
920
|
+
toolLoopCard = surfaceDecision.surfaceCard;
|
|
921
|
+
}
|
|
922
|
+
// Push the partial tool messages we collected so honesty +
|
|
923
|
+
// history downstream see the full sequence including the
|
|
924
|
+
// loop-trigger call. No final assistant message — the
|
|
925
|
+
// tool_loop card IS the user-facing surface.
|
|
926
|
+
messages.push(...turnToolMessages);
|
|
927
|
+
finalContent = '';
|
|
928
|
+
break;
|
|
599
929
|
}
|
|
600
930
|
// ── Iteration-budget injection on the LAST tool message ──────────
|
|
601
931
|
if (this.iterationBudgetInjection && turnToolMessages.length > 0) {
|
|
@@ -618,6 +948,7 @@ class AidenAgent {
|
|
|
618
948
|
totalUsage,
|
|
619
949
|
toolCallTrace,
|
|
620
950
|
fullTrace,
|
|
951
|
+
toolLoopCard,
|
|
621
952
|
};
|
|
622
953
|
}
|
|
623
954
|
/**
|
|
@@ -629,6 +960,18 @@ class AidenAgent {
|
|
|
629
960
|
*/
|
|
630
961
|
async callProvider(messages, tools, runOptions) {
|
|
631
962
|
const wantStream = runOptions.stream === true && typeof this.provider.callStream === 'function';
|
|
963
|
+
// v4.1.5 Issue K — fire just before the HTTP request opens, so the
|
|
964
|
+
// display layer can transition the activity verb from local-prep
|
|
965
|
+
// ("preparing prompt", "selecting tools") to a network verb
|
|
966
|
+
// ("calling provider"). The wait for TTFT (time-to-first-token) is
|
|
967
|
+
// the longest gap in most turns and is what the wave bar covers.
|
|
968
|
+
// Fires for both streaming and non-streaming paths — caller may use
|
|
969
|
+
// it to add a one-shot indicator on non-streaming providers too.
|
|
970
|
+
// Defensive try/catch (a misbehaving hook must not block dispatch).
|
|
971
|
+
try {
|
|
972
|
+
this.onProviderRequestStart?.(this.providerId);
|
|
973
|
+
}
|
|
974
|
+
catch { /* defensive */ }
|
|
632
975
|
if (!wantStream) {
|
|
633
976
|
return this.provider.call({ messages, tools });
|
|
634
977
|
}
|
|
@@ -671,6 +1014,30 @@ class AidenAgent {
|
|
|
671
1014
|
}
|
|
672
1015
|
exports.AidenAgent = AidenAgent;
|
|
673
1016
|
// ── Free helpers ────────────────────────────────────────────────────────
|
|
1017
|
+
/**
|
|
1018
|
+
* v4.1.5 Issue K — best-effort count of "memory facts" from a
|
|
1019
|
+
* MemorySnapshot. Counts markdown bullet-list lines (`- `) in both
|
|
1020
|
+
* MEMORY.md and USER.md. This is a fuzzy proxy — the agent stores
|
|
1021
|
+
* facts as bullets by convention but free-form prose can also carry
|
|
1022
|
+
* fact-like content. Surfaced verbatim to the display layer; treat as
|
|
1023
|
+
* "approximately N items in the persistent memory file" rather than
|
|
1024
|
+
* a precise inventory.
|
|
1025
|
+
*/
|
|
1026
|
+
function countMemoryFacts(snapshot) {
|
|
1027
|
+
if (!snapshot || typeof snapshot !== 'object')
|
|
1028
|
+
return 0;
|
|
1029
|
+
const s = snapshot;
|
|
1030
|
+
let count = 0;
|
|
1031
|
+
for (const md of [s.memoryMd, s.userMd]) {
|
|
1032
|
+
if (typeof md !== 'string' || md.length === 0)
|
|
1033
|
+
continue;
|
|
1034
|
+
for (const line of md.split('\n')) {
|
|
1035
|
+
if (line.trim().startsWith('- '))
|
|
1036
|
+
count += 1;
|
|
1037
|
+
}
|
|
1038
|
+
}
|
|
1039
|
+
return count;
|
|
1040
|
+
}
|
|
674
1041
|
function lastUserMessageContent(history) {
|
|
675
1042
|
for (let i = history.length - 1; i >= 0; i--) {
|
|
676
1043
|
const m = history[i];
|