aiden-runtime 4.1.5 → 4.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +250 -847
- package/dist/api/server.js +32 -5
- package/dist/cli/v4/aidenCLI.js +351 -53
- package/dist/cli/v4/callbacks.js +170 -0
- package/dist/cli/v4/chatSession.js +138 -3
- package/dist/cli/v4/commands/_runtimeToggleHelpers.js +92 -0
- package/dist/cli/v4/commands/browserDepth.js +45 -0
- package/dist/cli/v4/commands/cron.js +264 -0
- package/dist/cli/v4/commands/daemon.js +541 -0
- package/dist/cli/v4/commands/daemonStatus.js +253 -0
- package/dist/cli/v4/commands/help.js +7 -0
- package/dist/cli/v4/commands/index.js +20 -1
- package/dist/cli/v4/commands/runs.js +203 -0
- package/dist/cli/v4/commands/sandbox.js +48 -0
- package/dist/cli/v4/commands/suggestions.js +68 -0
- package/dist/cli/v4/commands/tce.js +41 -0
- package/dist/cli/v4/commands/trigger.js +378 -0
- package/dist/cli/v4/commands/update.js +95 -3
- package/dist/cli/v4/daemonAgentBuilder.js +142 -0
- package/dist/cli/v4/defaultSoul.js +1 -1
- package/dist/cli/v4/display/capabilityCard.js +26 -0
- package/dist/cli/v4/display.js +18 -8
- package/dist/cli/v4/replyRenderer.js +31 -23
- package/dist/cli/v4/updateBootPrompt.js +170 -0
- package/dist/core/playwrightBridge.js +129 -0
- package/dist/core/v4/aidenAgent.js +308 -4
- package/dist/core/v4/browserState.js +436 -0
- package/dist/core/v4/checkpoint.js +79 -0
- package/dist/core/v4/daemon/bootstrap.js +604 -0
- package/dist/core/v4/daemon/cleanShutdown.js +154 -0
- package/dist/core/v4/daemon/cron/cronBridge.js +126 -0
- package/dist/core/v4/daemon/cron/cronEmitter.js +173 -0
- package/dist/core/v4/daemon/cron/migration.js +199 -0
- package/dist/core/v4/daemon/cron/misfirePolicy.js +115 -0
- package/dist/core/v4/daemon/daemonConfig.js +90 -0
- package/dist/core/v4/daemon/db/connection.js +106 -0
- package/dist/core/v4/daemon/db/migrations.js +296 -0
- package/dist/core/v4/daemon/db/schema/v1.spec.js +18 -0
- package/dist/core/v4/daemon/dispatcher/agentRunner.js +98 -0
- package/dist/core/v4/daemon/dispatcher/budgetGate.js +127 -0
- package/dist/core/v4/daemon/dispatcher/daemonApproval.js +113 -0
- package/dist/core/v4/daemon/dispatcher/dailyBudgetTracker.js +120 -0
- package/dist/core/v4/daemon/dispatcher/dispatcher.js +389 -0
- package/dist/core/v4/daemon/dispatcher/fireRateLimiter.js +113 -0
- package/dist/core/v4/daemon/dispatcher/index.js +53 -0
- package/dist/core/v4/daemon/dispatcher/promptTemplate.js +95 -0
- package/dist/core/v4/daemon/dispatcher/realAgentRunner.js +356 -0
- package/dist/core/v4/daemon/dispatcher/resolveModel.js +93 -0
- package/dist/core/v4/daemon/dispatcher/sessionId.js +93 -0
- package/dist/core/v4/daemon/drain.js +156 -0
- package/dist/core/v4/daemon/eventLoopLag.js +73 -0
- package/dist/core/v4/daemon/health.js +159 -0
- package/dist/core/v4/daemon/idempotencyStore.js +204 -0
- package/dist/core/v4/daemon/index.js +179 -0
- package/dist/core/v4/daemon/instanceTracker.js +99 -0
- package/dist/core/v4/daemon/resourceRegistry.js +150 -0
- package/dist/core/v4/daemon/restartCode.js +32 -0
- package/dist/core/v4/daemon/restartFailureCounter.js +77 -0
- package/dist/core/v4/daemon/runStore.js +114 -0
- package/dist/core/v4/daemon/runtimeLock.js +167 -0
- package/dist/core/v4/daemon/signals.js +50 -0
- package/dist/core/v4/daemon/supervisor.js +272 -0
- package/dist/core/v4/daemon/triggerBus.js +279 -0
- package/dist/core/v4/daemon/triggers/email/allowlist.js +70 -0
- package/dist/core/v4/daemon/triggers/email/automatedSender.js +78 -0
- package/dist/core/v4/daemon/triggers/email/bodyExtractor.js +0 -0
- package/dist/core/v4/daemon/triggers/email/emailSeenStore.js +99 -0
- package/dist/core/v4/daemon/triggers/email/emailSpec.js +107 -0
- package/dist/core/v4/daemon/triggers/email/imapConnection.js +211 -0
- package/dist/core/v4/daemon/triggers/email/index.js +332 -0
- package/dist/core/v4/daemon/triggers/email/seenUids.js +60 -0
- package/dist/core/v4/daemon/triggers/fileObservationsStore.js +93 -0
- package/dist/core/v4/daemon/triggers/fileWatcher.js +253 -0
- package/dist/core/v4/daemon/triggers/fileWatcherSpec.js +88 -0
- package/dist/core/v4/daemon/triggers/fsIdentity.js +42 -0
- package/dist/core/v4/daemon/triggers/globMatcher.js +100 -0
- package/dist/core/v4/daemon/triggers/reconcile.js +206 -0
- package/dist/core/v4/daemon/triggers/settleStat.js +81 -0
- package/dist/core/v4/daemon/triggers/webhook.js +376 -0
- package/dist/core/v4/daemon/triggers/webhookDeliveriesStore.js +109 -0
- package/dist/core/v4/daemon/triggers/webhookIdempotency.js +72 -0
- package/dist/core/v4/daemon/triggers/webhookRateLimit.js +56 -0
- package/dist/core/v4/daemon/triggers/webhookSpec.js +76 -0
- package/dist/core/v4/daemon/triggers/webhookVerifier.js +128 -0
- package/dist/core/v4/daemon/types.js +15 -0
- package/dist/core/v4/dockerSession.js +461 -0
- package/dist/core/v4/dryRun.js +117 -0
- package/dist/core/v4/failureClassifier.js +779 -0
- package/dist/core/v4/recoveryReport.js +449 -0
- package/dist/core/v4/runtimeToggles.js +187 -0
- package/dist/core/v4/sandboxConfig.js +285 -0
- package/dist/core/v4/sandboxFs.js +316 -0
- package/dist/core/v4/suggestionCatalog.js +41 -0
- package/dist/core/v4/suggestionEngine.js +210 -0
- package/dist/core/v4/toolRegistry.js +18 -0
- package/dist/core/v4/turnState.js +587 -0
- package/dist/core/v4/update/checkUpdate.js +63 -3
- package/dist/core/v4/update/installMethodDetect.js +115 -0
- package/dist/core/v4/update/registryClient.js +121 -0
- package/dist/core/v4/update/skipState.js +75 -0
- package/dist/core/v4/verifier.js +448 -0
- package/dist/core/version.js +1 -1
- package/dist/tools/v4/browser/_observer.js +224 -0
- package/dist/tools/v4/browser/browserBlocker.js +396 -0
- package/dist/tools/v4/browser/browserClick.js +18 -1
- package/dist/tools/v4/browser/browserClose.js +18 -1
- package/dist/tools/v4/browser/browserExtract.js +5 -1
- package/dist/tools/v4/browser/browserFill.js +17 -1
- package/dist/tools/v4/browser/browserGetUrl.js +5 -1
- package/dist/tools/v4/browser/browserNavigate.js +16 -1
- package/dist/tools/v4/browser/browserScreenshot.js +5 -1
- package/dist/tools/v4/browser/browserScroll.js +18 -1
- package/dist/tools/v4/browser/browserType.js +17 -1
- package/dist/tools/v4/browser/captchaCheck.js +5 -1
- package/dist/tools/v4/executeCode.js +1 -0
- package/dist/tools/v4/files/fileCopy.js +56 -2
- package/dist/tools/v4/files/fileDelete.js +38 -1
- package/dist/tools/v4/files/fileList.js +12 -1
- package/dist/tools/v4/files/fileMove.js +59 -2
- package/dist/tools/v4/files/filePatch.js +43 -1
- package/dist/tools/v4/files/fileRead.js +12 -1
- package/dist/tools/v4/files/fileWrite.js +41 -1
- package/dist/tools/v4/index.js +71 -58
- package/dist/tools/v4/memory/memoryAdd.js +14 -0
- package/dist/tools/v4/memory/memoryRemove.js +14 -0
- package/dist/tools/v4/memory/memoryReplace.js +15 -0
- package/dist/tools/v4/memory/sessionSummary.js +12 -0
- package/dist/tools/v4/process/processKill.js +19 -0
- package/dist/tools/v4/process/processList.js +1 -0
- package/dist/tools/v4/process/processLogRead.js +1 -0
- package/dist/tools/v4/process/processSpawn.js +13 -0
- package/dist/tools/v4/process/processWait.js +1 -0
- package/dist/tools/v4/sessions/recallSession.js +1 -0
- package/dist/tools/v4/sessions/sessionList.js +1 -0
- package/dist/tools/v4/sessions/sessionSearch.js +1 -0
- package/dist/tools/v4/skills/lookupToolSchema.js +2 -0
- package/dist/tools/v4/skills/skillManage.js +13 -0
- package/dist/tools/v4/skills/skillView.js +1 -0
- package/dist/tools/v4/skills/skillsList.js +1 -0
- package/dist/tools/v4/subagent/subagentFanout.js +1 -0
- package/dist/tools/v4/system/aidenSelfUpdate.js +16 -0
- package/dist/tools/v4/system/appClose.js +13 -0
- package/dist/tools/v4/system/appInput.js +13 -0
- package/dist/tools/v4/system/appLaunch.js +13 -0
- package/dist/tools/v4/system/clipboardRead.js +1 -0
- package/dist/tools/v4/system/clipboardWrite.js +14 -0
- package/dist/tools/v4/system/mediaKey.js +12 -0
- package/dist/tools/v4/system/mediaSessions.js +1 -0
- package/dist/tools/v4/system/mediaTransport.js +13 -0
- package/dist/tools/v4/system/naturalEvents.js +1 -0
- package/dist/tools/v4/system/nowPlaying.js +1 -0
- package/dist/tools/v4/system/osProcessList.js +1 -0
- package/dist/tools/v4/system/screenshot.js +1 -0
- package/dist/tools/v4/system/systemInfo.js +1 -0
- package/dist/tools/v4/system/volumeSet.js +17 -0
- package/dist/tools/v4/terminal/shellExec.js +81 -9
- package/dist/tools/v4/web/deepResearch.js +1 -0
- package/dist/tools/v4/web/openUrl.js +1 -0
- package/dist/tools/v4/web/webFetch.js +1 -0
- package/dist/tools/v4/web/webPage.js +1 -0
- package/dist/tools/v4/web/webSearch.js +1 -0
- package/dist/tools/v4/web/youtubeSearch.js +1 -0
- package/package.json +7 -1
|
@@ -0,0 +1,436 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Copyright (c) 2026 Shiva Deore (Taracod).
|
|
4
|
+
* Licensed under AGPL-3.0. See LICENSE for details.
|
|
5
|
+
*
|
|
6
|
+
* Aiden — local-first agent.
|
|
7
|
+
*/
|
|
8
|
+
/**
|
|
9
|
+
* core/v4/browserState.ts — v4.3 Phase 1: Page-state observer.
|
|
10
|
+
*
|
|
11
|
+
* Per-agent-session observer that captures structured browser-page
|
|
12
|
+
* state before and after every browser tool action. The captured
|
|
13
|
+
* states embed on the tool result as a `browserState` sidecar; Phase 5
|
|
14
|
+
* will use the sidecar to classify "tool succeeded but UI did nothing"
|
|
15
|
+
* cases that currently look identical to genuine success.
|
|
16
|
+
*
|
|
17
|
+
* Three production rules from the consult shape this module:
|
|
18
|
+
*
|
|
19
|
+
* - **Element refs are leases, not identifiers.** ElementLease defined
|
|
20
|
+
* here, validated in Phase 2 — carries snapshot_id + frame_id +
|
|
21
|
+
* visible_text_hash + bbox so mismatches signal "DOM changed since
|
|
22
|
+
* we took this ref".
|
|
23
|
+
*
|
|
24
|
+
* - **Frame_id is part of the contract.** Iframe blindness is a real
|
|
25
|
+
* gap; BrowserStateSnapshot carries frame_id + frame_tree_hash so
|
|
26
|
+
* cross-frame DOM churn is observable.
|
|
27
|
+
*
|
|
28
|
+
* - **Never equate tool success with UI progress.** ActionResult
|
|
29
|
+
* includes progress_score + maybe_noop + needs_verifier; a tool
|
|
30
|
+
* returning success:true AND maybe_noop:true is the structural
|
|
31
|
+
* signal for "click executed but nothing changed".
|
|
32
|
+
*
|
|
33
|
+
* **Default ON** as of v4.3 Phase 6 — set `AIDEN_BROWSER_DEPTH=0`
|
|
34
|
+
* to disable. Symmetric with v4.2 Phase 6's TCE flip. When disabled,
|
|
35
|
+
* `captureState()` returns null and the HOC wrapper
|
|
36
|
+
* (`tools/v4/browser/_observer.ts`) skips snapshot work entirely.
|
|
37
|
+
* Zero behavioural change vs v4.2.5 when disabled.
|
|
38
|
+
*
|
|
39
|
+
* Pure module — types + class + helpers. No I/O on the disabled path;
|
|
40
|
+
* two `page.evaluate()` calls per action when enabled (URL + title +
|
|
41
|
+
* innerText hash + recursive iframe URL walk). Latency ~5-15ms per
|
|
42
|
+
* snapshot; observer overhead per action ~10-30ms total.
|
|
43
|
+
*
|
|
44
|
+
* Reference notes: the snapshot shape (URL/title/dom_hash/frame_id)
|
|
45
|
+
* mirrors a pattern seen in a comparable reference system; the
|
|
46
|
+
* ElementLease shape was contributed by a downstream consult. Aiden
|
|
47
|
+
* keeps the typing clean and the implementation Aiden-shaped.
|
|
48
|
+
*/
|
|
49
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
50
|
+
if (k2 === undefined) k2 = k;
|
|
51
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
52
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
53
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
54
|
+
}
|
|
55
|
+
Object.defineProperty(o, k2, desc);
|
|
56
|
+
}) : (function(o, m, k, k2) {
|
|
57
|
+
if (k2 === undefined) k2 = k;
|
|
58
|
+
o[k2] = m[k];
|
|
59
|
+
}));
|
|
60
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
61
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
62
|
+
}) : function(o, v) {
|
|
63
|
+
o["default"] = v;
|
|
64
|
+
});
|
|
65
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
66
|
+
var ownKeys = function(o) {
|
|
67
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
68
|
+
var ar = [];
|
|
69
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
70
|
+
return ar;
|
|
71
|
+
};
|
|
72
|
+
return ownKeys(o);
|
|
73
|
+
};
|
|
74
|
+
return function (mod) {
|
|
75
|
+
if (mod && mod.__esModule) return mod;
|
|
76
|
+
var result = {};
|
|
77
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
78
|
+
__setModuleDefault(result, mod);
|
|
79
|
+
return result;
|
|
80
|
+
};
|
|
81
|
+
})();
|
|
82
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
83
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
84
|
+
};
|
|
85
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
86
|
+
exports.BrowserState = void 0;
|
|
87
|
+
exports.sha256Hex = sha256Hex;
|
|
88
|
+
exports.normalizeUrl = normalizeUrl;
|
|
89
|
+
exports.createBrowserState = createBrowserState;
|
|
90
|
+
const node_crypto_1 = __importDefault(require("node:crypto"));
|
|
91
|
+
// ── Helpers (exported for tests + ElementLease lifecycle in Phase 2) ───────
|
|
92
|
+
const SHORT_TEXT_HASH_CAP = 5000;
|
|
93
|
+
const TRACKING_PARAMS = new Set([
|
|
94
|
+
'utm_source', 'utm_medium', 'utm_campaign', 'utm_term', 'utm_content',
|
|
95
|
+
'gclid', 'fbclid', 'mc_eid', 'mc_cid', '_ga', 'ref', '_hsenc', '_hsmi',
|
|
96
|
+
'igshid', 'msclkid', 'yclid',
|
|
97
|
+
]);
|
|
98
|
+
/**
|
|
99
|
+
* Stable sha256 over a string. Hex-encoded. Truncated input — caller
|
|
100
|
+
* is responsible for slicing to a sensible bound.
|
|
101
|
+
*/
|
|
102
|
+
function sha256Hex(input) {
|
|
103
|
+
return node_crypto_1.default.createHash('sha256').update(input).digest('hex');
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* Strip hash + common tracking params + trailing slash. Pure helper;
|
|
107
|
+
* exported for tests + ElementLease URL normalization.
|
|
108
|
+
*/
|
|
109
|
+
function normalizeUrl(raw) {
|
|
110
|
+
let url;
|
|
111
|
+
try {
|
|
112
|
+
url = new URL(raw);
|
|
113
|
+
}
|
|
114
|
+
catch {
|
|
115
|
+
return raw; // unparseable — return as-is rather than crashing
|
|
116
|
+
}
|
|
117
|
+
url.hash = '';
|
|
118
|
+
const next = new URLSearchParams();
|
|
119
|
+
for (const [k, v] of url.searchParams) {
|
|
120
|
+
if (!TRACKING_PARAMS.has(k.toLowerCase()))
|
|
121
|
+
next.append(k, v);
|
|
122
|
+
}
|
|
123
|
+
url.search = next.toString();
|
|
124
|
+
let out = url.toString();
|
|
125
|
+
// Drop trailing slash on the path component when query is empty.
|
|
126
|
+
if (out.endsWith('/') && !url.search && url.pathname === '/') {
|
|
127
|
+
out = out.slice(0, -1);
|
|
128
|
+
}
|
|
129
|
+
return out;
|
|
130
|
+
}
|
|
131
|
+
// ── Snapshot-pair evidence + score ─────────────────────────────────────────
|
|
132
|
+
const PROGRESS_WEIGHTS = [
|
|
133
|
+
['url_changed', 0.8],
|
|
134
|
+
['normalized_url_changed', 0.7],
|
|
135
|
+
['dom_hash_changed', 0.6],
|
|
136
|
+
['frame_tree_changed', 0.5],
|
|
137
|
+
['title_changed', 0.4],
|
|
138
|
+
];
|
|
139
|
+
function computeEvidence(pre, post) {
|
|
140
|
+
const evidence = [];
|
|
141
|
+
if (pre.url !== post.url)
|
|
142
|
+
evidence.push('url_changed');
|
|
143
|
+
if (pre.normalized_url !== post.normalized_url)
|
|
144
|
+
evidence.push('normalized_url_changed');
|
|
145
|
+
if (pre.title !== post.title)
|
|
146
|
+
evidence.push('title_changed');
|
|
147
|
+
if (pre.dom_text_hash !== post.dom_text_hash)
|
|
148
|
+
evidence.push('dom_hash_changed');
|
|
149
|
+
if (pre.frame_tree_hash !== post.frame_tree_hash)
|
|
150
|
+
evidence.push('frame_tree_changed');
|
|
151
|
+
return evidence;
|
|
152
|
+
}
|
|
153
|
+
function computeProgressScore(evidence) {
|
|
154
|
+
let score = 0;
|
|
155
|
+
for (const [name, weight] of PROGRESS_WEIGHTS) {
|
|
156
|
+
if (evidence.includes(name) && weight > score)
|
|
157
|
+
score = weight;
|
|
158
|
+
}
|
|
159
|
+
return score;
|
|
160
|
+
}
|
|
161
|
+
// ── BrowserState class ─────────────────────────────────────────────────────
|
|
162
|
+
const NEEDS_VERIFIER_THRESHOLD = 0.3;
|
|
163
|
+
/**
|
|
164
|
+
* Per-agent-session observer. Lifecycle matches the playwrightBridge's
|
|
165
|
+
* persistent context. Reads AIDEN_BROWSER_DEPTH at construction; all
|
|
166
|
+
* methods short-circuit when disabled.
|
|
167
|
+
*/
|
|
168
|
+
class BrowserState {
|
|
169
|
+
constructor(opts = {}) {
|
|
170
|
+
this.snapshotCounter = 0;
|
|
171
|
+
/** v4.3 Phase 4 — per-tab metadata. Keyed by stable tab_id. */
|
|
172
|
+
this.tabs = new Map();
|
|
173
|
+
/** v4.3 Phase 4 — id of the currently-focused tab. */
|
|
174
|
+
this.activeTabId = null;
|
|
175
|
+
// v4.3 Phase 6 — state-aware browser depth is ON by default.
|
|
176
|
+
// Strict `'0'` opt-out semantic: env var must be literally the
|
|
177
|
+
// string `'0'` to disable; everything else (unset, `'1'`, empty
|
|
178
|
+
// string, junk) enables. Mirrors v4.2 Phase 6's TCE flip exactly.
|
|
179
|
+
// The opts.enabled override still wins when explicitly passed
|
|
180
|
+
// by callers (test fixtures, embedded usage).
|
|
181
|
+
// v4.5 Phase 8a — route through runtimeToggles singleton so
|
|
182
|
+
// /browser-depth slash-command flips and config.yaml overrides
|
|
183
|
+
// take effect on the next constructed BrowserState. Explicit
|
|
184
|
+
// opts.enabled still wins for test fixtures.
|
|
185
|
+
if (typeof opts.enabled === 'boolean') {
|
|
186
|
+
this.enabled = opts.enabled;
|
|
187
|
+
}
|
|
188
|
+
else {
|
|
189
|
+
try {
|
|
190
|
+
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
|
191
|
+
const rt = require('./runtimeToggles');
|
|
192
|
+
this.enabled = rt.getRuntimeToggles().isEnabled('browser_depth');
|
|
193
|
+
}
|
|
194
|
+
catch {
|
|
195
|
+
this.enabled = process.env.AIDEN_BROWSER_DEPTH !== '0';
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
isEnabled() {
|
|
200
|
+
return this.enabled;
|
|
201
|
+
}
|
|
202
|
+
/**
|
|
203
|
+
* Inject a bridge loader for tests. Production code uses the default
|
|
204
|
+
* `() => import('../playwrightBridge')` loader set by `createBrowserState`.
|
|
205
|
+
*/
|
|
206
|
+
setBridgeLoader(loader) {
|
|
207
|
+
this.bridgeLoader = loader;
|
|
208
|
+
}
|
|
209
|
+
/**
|
|
210
|
+
* Capture current page state. Returns null when:
|
|
211
|
+
* - opt'd out (AIDEN_BROWSER_DEPTH=0)
|
|
212
|
+
* - bridge loader missing
|
|
213
|
+
* - underlying pwSnapshotHash fails (browser not open, page error, etc.)
|
|
214
|
+
*
|
|
215
|
+
* Never throws — observer must not break the inner tool execute.
|
|
216
|
+
*/
|
|
217
|
+
async captureState() {
|
|
218
|
+
if (!this.enabled)
|
|
219
|
+
return null;
|
|
220
|
+
if (!this.bridgeLoader)
|
|
221
|
+
return null;
|
|
222
|
+
let raw;
|
|
223
|
+
try {
|
|
224
|
+
raw = await this.bridgeLoader();
|
|
225
|
+
}
|
|
226
|
+
catch {
|
|
227
|
+
return null;
|
|
228
|
+
}
|
|
229
|
+
let result;
|
|
230
|
+
try {
|
|
231
|
+
result = await raw.pwSnapshotHash();
|
|
232
|
+
}
|
|
233
|
+
catch {
|
|
234
|
+
return null;
|
|
235
|
+
}
|
|
236
|
+
if (!result.ok)
|
|
237
|
+
return null;
|
|
238
|
+
this.snapshotCounter += 1;
|
|
239
|
+
const url = result.url ?? '';
|
|
240
|
+
const title = result.title ?? '';
|
|
241
|
+
const snapshot = {
|
|
242
|
+
url,
|
|
243
|
+
normalized_url: normalizeUrl(url),
|
|
244
|
+
title,
|
|
245
|
+
dom_text_hash: result.dom_text_hash ?? '',
|
|
246
|
+
frame_id: 'main',
|
|
247
|
+
frame_tree_hash: result.frame_tree_hash ?? '',
|
|
248
|
+
ts: this.snapshotCounter,
|
|
249
|
+
};
|
|
250
|
+
// v4.3 Phase 4 — reconcile the tabs map. Lazy: runs after the
|
|
251
|
+
// snapshot is built so a captureState failure (bridge ok:false)
|
|
252
|
+
// skips reconciliation entirely. Never throws.
|
|
253
|
+
await this.reconcileTabs(snapshot.dom_text_hash);
|
|
254
|
+
return snapshot;
|
|
255
|
+
}
|
|
256
|
+
// ── v4.3 Phase 4 — multi-tab state API ─────────────────────────────────
|
|
257
|
+
/**
|
|
258
|
+
* Reconcile the tabs map against the bridge's current page set.
|
|
259
|
+
* Adds newly-observed tabs, updates `last_seen_ts` (and
|
|
260
|
+
* `last_snapshot_hash` for the active tab), removes tabs absent
|
|
261
|
+
* from the bridge's enumeration. Sets `activeTabId`.
|
|
262
|
+
*
|
|
263
|
+
* Called from `captureState()` after a successful snapshot. Public
|
|
264
|
+
* for tests + future v4.4 multi-tab dispatch flows.
|
|
265
|
+
*
|
|
266
|
+
* No-op when:
|
|
267
|
+
* - disabled (opt-out via AIDEN_BROWSER_DEPTH=0)
|
|
268
|
+
* - bridge loader missing pwSnapshotTabs (older test fixtures)
|
|
269
|
+
* - bridge returns ok:false (browser closed, page error)
|
|
270
|
+
*
|
|
271
|
+
* Never throws — observer must not break the inner tool execute.
|
|
272
|
+
*/
|
|
273
|
+
async reconcileTabs(activeSnapshotHash) {
|
|
274
|
+
if (!this.enabled)
|
|
275
|
+
return;
|
|
276
|
+
if (!this.bridgeLoader)
|
|
277
|
+
return;
|
|
278
|
+
let raw;
|
|
279
|
+
try {
|
|
280
|
+
raw = await this.bridgeLoader();
|
|
281
|
+
}
|
|
282
|
+
catch {
|
|
283
|
+
return;
|
|
284
|
+
}
|
|
285
|
+
if (!raw.pwSnapshotTabs)
|
|
286
|
+
return;
|
|
287
|
+
let result;
|
|
288
|
+
try {
|
|
289
|
+
result = await raw.pwSnapshotTabs();
|
|
290
|
+
}
|
|
291
|
+
catch {
|
|
292
|
+
return;
|
|
293
|
+
}
|
|
294
|
+
if (!result.ok || !result.tabs)
|
|
295
|
+
return;
|
|
296
|
+
const now = Date.now();
|
|
297
|
+
const seenIds = new Set();
|
|
298
|
+
let activeId = null;
|
|
299
|
+
for (const t of result.tabs) {
|
|
300
|
+
seenIds.add(t.tab_id);
|
|
301
|
+
if (t.is_active)
|
|
302
|
+
activeId = t.tab_id;
|
|
303
|
+
const existing = this.tabs.get(t.tab_id);
|
|
304
|
+
if (existing) {
|
|
305
|
+
existing.url = t.url;
|
|
306
|
+
existing.title = t.title;
|
|
307
|
+
existing.is_active = t.is_active;
|
|
308
|
+
existing.opener_id = t.opener_id;
|
|
309
|
+
existing.last_seen_ts = now;
|
|
310
|
+
if (t.is_active && activeSnapshotHash) {
|
|
311
|
+
existing.last_snapshot_hash = activeSnapshotHash;
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
else {
|
|
315
|
+
const fresh = {
|
|
316
|
+
tab_id: t.tab_id,
|
|
317
|
+
url: t.url,
|
|
318
|
+
title: t.title,
|
|
319
|
+
is_active: t.is_active,
|
|
320
|
+
opener_id: t.opener_id,
|
|
321
|
+
created_ts: now,
|
|
322
|
+
last_seen_ts: now,
|
|
323
|
+
};
|
|
324
|
+
if (t.is_active && activeSnapshotHash) {
|
|
325
|
+
fresh.last_snapshot_hash = activeSnapshotHash;
|
|
326
|
+
}
|
|
327
|
+
this.tabs.set(t.tab_id, fresh);
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
// Drop closed tabs — anything in the map that wasn't in this
|
|
331
|
+
// reconciliation pass.
|
|
332
|
+
for (const id of [...this.tabs.keys()]) {
|
|
333
|
+
if (!seenIds.has(id))
|
|
334
|
+
this.tabs.delete(id);
|
|
335
|
+
}
|
|
336
|
+
this.activeTabId = activeId;
|
|
337
|
+
}
|
|
338
|
+
/**
|
|
339
|
+
* Update the active tab's `last_blocker` field. Called by the HOC
|
|
340
|
+
* after Phase 3 detection — pass the BlockerSurface to record, or
|
|
341
|
+
* null to clear (e.g. a later action on the same tab succeeded
|
|
342
|
+
* without blocker text). No-op when disabled or when there's no
|
|
343
|
+
* active tab.
|
|
344
|
+
*/
|
|
345
|
+
updateActiveTabBlocker(blocker) {
|
|
346
|
+
if (!this.enabled || !this.activeTabId)
|
|
347
|
+
return;
|
|
348
|
+
const tab = this.tabs.get(this.activeTabId);
|
|
349
|
+
if (!tab)
|
|
350
|
+
return;
|
|
351
|
+
if (blocker === null) {
|
|
352
|
+
delete tab.last_blocker;
|
|
353
|
+
}
|
|
354
|
+
else {
|
|
355
|
+
tab.last_blocker = blocker;
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
/**
|
|
359
|
+
* Read-only view of the tabs map. Returns a defensive shallow-clone
|
|
360
|
+
* array. Order is the bridge-reported order (which typically tracks
|
|
361
|
+
* Playwright's internal target ordering — first-opened first).
|
|
362
|
+
*/
|
|
363
|
+
getTabs() {
|
|
364
|
+
return [...this.tabs.values()].map((t) => ({ ...t }));
|
|
365
|
+
}
|
|
366
|
+
/** Convenience: the tab marked is_active, or null when none. */
|
|
367
|
+
getActiveTab() {
|
|
368
|
+
if (!this.activeTabId)
|
|
369
|
+
return null;
|
|
370
|
+
const tab = this.tabs.get(this.activeTabId);
|
|
371
|
+
return tab ? { ...tab } : null;
|
|
372
|
+
}
|
|
373
|
+
/** Lookup a tab by id. Returns null when not in the map. */
|
|
374
|
+
getTab(tabId) {
|
|
375
|
+
const tab = this.tabs.get(tabId);
|
|
376
|
+
return tab ? { ...tab } : null;
|
|
377
|
+
}
|
|
378
|
+
/**
|
|
379
|
+
* Build the ActionResult sidecar from a pair of snapshots. Returns
|
|
380
|
+
* null when either snapshot is null (disabled or capture failed) —
|
|
381
|
+
* caller should skip embedding the sidecar entirely in that case.
|
|
382
|
+
*/
|
|
383
|
+
buildActionResult(input) {
|
|
384
|
+
if (!input.pre || !input.post)
|
|
385
|
+
return null;
|
|
386
|
+
const evidence = computeEvidence(input.pre, input.post);
|
|
387
|
+
const progress_score = computeProgressScore(evidence);
|
|
388
|
+
const maybe_noop = evidence.length === 0;
|
|
389
|
+
const needs_verifier = maybe_noop || progress_score < NEEDS_VERIFIER_THRESHOLD;
|
|
390
|
+
return {
|
|
391
|
+
pre_state: input.pre,
|
|
392
|
+
post_state: input.post,
|
|
393
|
+
progress_score,
|
|
394
|
+
evidence,
|
|
395
|
+
maybe_noop,
|
|
396
|
+
needs_verifier,
|
|
397
|
+
};
|
|
398
|
+
}
|
|
399
|
+
/**
|
|
400
|
+
* v4.3 Phase 2 — compute evidence-array delta between two snapshots.
|
|
401
|
+
* Public so the observer HOC can record `state_delta` on a
|
|
402
|
+
* stale-ref retry without re-deriving from `buildActionResult`
|
|
403
|
+
* (which expects a pair representing one action, not a pair across
|
|
404
|
+
* a failed attempt + resnapshot).
|
|
405
|
+
*
|
|
406
|
+
* Returns the same set of evidence strings produced by
|
|
407
|
+
* `buildActionResult`: `url_changed`, `normalized_url_changed`,
|
|
408
|
+
* `title_changed`, `dom_hash_changed`, `frame_tree_changed`.
|
|
409
|
+
* Returns `[]` when either snapshot is null.
|
|
410
|
+
*/
|
|
411
|
+
computeStateDelta(pre, post) {
|
|
412
|
+
if (!pre || !post)
|
|
413
|
+
return [];
|
|
414
|
+
return computeEvidence(pre, post);
|
|
415
|
+
}
|
|
416
|
+
/** Public for tests + ElementLease text-hash construction in Phase 2. */
|
|
417
|
+
normalizeUrl(raw) {
|
|
418
|
+
return normalizeUrl(raw);
|
|
419
|
+
}
|
|
420
|
+
/** Public for tests + ElementLease visible_text_hash construction. */
|
|
421
|
+
hashText(text) {
|
|
422
|
+
return sha256Hex(text.slice(0, SHORT_TEXT_HASH_CAP));
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
exports.BrowserState = BrowserState;
|
|
426
|
+
// ── Factory ────────────────────────────────────────────────────────────────
|
|
427
|
+
/**
|
|
428
|
+
* Default factory. Constructs a BrowserState wired to the production
|
|
429
|
+
* playwrightBridge. One instance is shared across all browser tool
|
|
430
|
+
* wrappers in `tools/v4/browser/_observer.ts`.
|
|
431
|
+
*/
|
|
432
|
+
function createBrowserState() {
|
|
433
|
+
const bs = new BrowserState();
|
|
434
|
+
bs.setBridgeLoader(() => Promise.resolve().then(() => __importStar(require('../playwrightBridge'))));
|
|
435
|
+
return bs;
|
|
436
|
+
}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Copyright (c) 2026 Shiva Deore (Taracod).
|
|
4
|
+
* Licensed under AGPL-3.0. See LICENSE for details.
|
|
5
|
+
*
|
|
6
|
+
* Aiden — local-first agent.
|
|
7
|
+
*/
|
|
8
|
+
/**
|
|
9
|
+
* core/v4/checkpoint.ts — v4.2 Phase 4: Checkpoint data types +
|
|
10
|
+
* rollback helpers.
|
|
11
|
+
*
|
|
12
|
+
* Phase 4 is the first phase that MUTATES agent state. Phases 1-3
|
|
13
|
+
* recorded data and synthesised reports; Phase 4 lets the recovery
|
|
14
|
+
* controller restore conversation messages + TurnState internals to
|
|
15
|
+
* an earlier iteration when a tool loop is detected, so the model
|
|
16
|
+
* retries from a clean baseline instead of accumulating noise.
|
|
17
|
+
*
|
|
18
|
+
* Critical constraint: **rollback NEVER claims to undo executed
|
|
19
|
+
* side effects**. A file_write that already happened is still on
|
|
20
|
+
* disk — rollback only affects in-memory conversation state, not
|
|
21
|
+
* the world. Enforcement is structural via the
|
|
22
|
+
* `containedMutations` flag — iterations that ran any mutating
|
|
23
|
+
* tool (`ToolHandler.mutates === true`) are not eligible for
|
|
24
|
+
* rollback at all (HARD BLOCK per Q-CP3 approval).
|
|
25
|
+
*
|
|
26
|
+
* Storage is in-memory only, ring buffer of configurable depth
|
|
27
|
+
* (default 3). Disk-backed checkpoints are out of scope for v4.2 —
|
|
28
|
+
* adds I/O, crash-recovery complexity, and a serialization contract
|
|
29
|
+
* that the spike doesn't need.
|
|
30
|
+
*
|
|
31
|
+
* Provider cache safety: a restored message array is a strict
|
|
32
|
+
* prefix of the pre-rollback state. Anthropic prompt caching keys
|
|
33
|
+
* on the message prefix and handles prefix matches natively — no
|
|
34
|
+
* cache invalidation concern. OpenAI / Ollama / Groq don't use
|
|
35
|
+
* prefix caching at the wire level.
|
|
36
|
+
*
|
|
37
|
+
* Reference-system note: a comparable reference system has no
|
|
38
|
+
* checkpoint/restore primitive — only a counter-refund pattern that
|
|
39
|
+
* gives back iteration budget when a "cheap RPC" tool ran. Aiden's
|
|
40
|
+
* Phase 4 is genuinely new ground. No code patterns ported.
|
|
41
|
+
*
|
|
42
|
+
* Pure module — only types, frozen-data factories, and a
|
|
43
|
+
* deterministic message builder. No I/O, no async, no
|
|
44
|
+
* side effects.
|
|
45
|
+
*/
|
|
46
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
47
|
+
exports.buildRollbackMessage = buildRollbackMessage;
|
|
48
|
+
// ── Rollback message builder ───────────────────────────────────────────────
|
|
49
|
+
/**
|
|
50
|
+
* Build the corrective system message inserted into history after a
|
|
51
|
+
* successful rollback. Includes:
|
|
52
|
+
* - Which iteration we rolled back to
|
|
53
|
+
* - The cooldown rationale (model needs to use different approach)
|
|
54
|
+
* - When `blockedBy` is non-empty, an explicit acknowledgement that
|
|
55
|
+
* those mutating tools' world effects CANNOT be undone (defensive
|
|
56
|
+
* even though Q-CP3 hard-blocks rollback when any mutations ran —
|
|
57
|
+
* leaves the door open for a Phase 5+ soft-rollback variant
|
|
58
|
+
* without changing the message shape).
|
|
59
|
+
*
|
|
60
|
+
* Pure deterministic helper — same inputs always produce the same
|
|
61
|
+
* output. Public for tests.
|
|
62
|
+
*/
|
|
63
|
+
function buildRollbackMessage(input) {
|
|
64
|
+
const { iteration, toolName, blockedBy } = input;
|
|
65
|
+
const toolPart = toolName ? `\`${toolName}\`` : 'the looping tool';
|
|
66
|
+
const targetPart = iteration > 0
|
|
67
|
+
? `to iteration ${iteration}`
|
|
68
|
+
: 'to the start of this turn';
|
|
69
|
+
const parts = [
|
|
70
|
+
`[tce] Rolled back ${targetPart} because ${toolPart} was failing repeatedly. ` +
|
|
71
|
+
`${toolPart === 'the looping tool' ? 'That tool' : `\`${toolName}\``} is now cooled down — ` +
|
|
72
|
+
`try a different approach with the tools that remain.`,
|
|
73
|
+
];
|
|
74
|
+
if (blockedBy && blockedBy.length > 0) {
|
|
75
|
+
parts.push(`Note: ${blockedBy.join(', ')} ran during this turn and produced real-world ` +
|
|
76
|
+
`side effects that this rollback CANNOT undo. Those effects persist.`);
|
|
77
|
+
}
|
|
78
|
+
return parts.join(' ');
|
|
79
|
+
}
|