aiden-runtime 4.6.0 → 4.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,111 +6,92 @@
6
6
  * Aiden — local-first agent.
7
7
  */
8
8
  /**
9
- * moat/honestyEnforcement.ts — Aiden v4.0.0
9
+ * moat/honestyEnforcement.ts — Aiden v4.7.0 (Phase 2.3 — outcome-based verifier)
10
10
  *
11
- * Post-loop trace inspector. Runs after AidenAgent returns its final
12
- * response. Compares the response's stated actions to the actual tool
13
- * calls in the trace. If the model claims it did something but the trace
14
- * says no tool fired (or fired and failed verification), Honesty refuses
15
- * the claim and rewrites the response.
11
+ * The regex-based natural-language claim scanner (deleted in Phase 2.2)
12
+ * has been replaced with a deterministic outcome recorder that consumes
13
+ * `toolCallTrace` structurally. Two failure modes are recorded:
16
14
  *
17
- * The failure modes this catches:
18
- * - "I saved your file to ~/notes/today.md" → no file_write call
19
- * - "I sent the email" → no email tool call
20
- * - "I remembered that" → no memory_add OR memory_add returned verified=false
21
- * - "I searched the web" → no web_search call
22
- * - "I ran X" → no shell_exec call
15
+ * 1. mutation_errored — a tool tagged `mutates: true` (in the
16
+ * registry, stamped onto trace entries at dispatch time via
17
+ * `handlerMutates`) returned an `error` envelope. Path is
18
+ * extracted from `result.path` when present.
23
19
  *
24
- * Three modes:
25
- * off passes everything; no inspection.
26
- * detect — runs checks and populates findings, but does NOT modify the
27
- * response. Useful for telemetry / canary measurement.
28
- * enforce — DEFAULT. Rewrites failed claims into honest text that lists
29
- * the actual trace summary.
20
+ * 2. memory_unverified — a memory_* tool's result carries
21
+ * `verified === false` (per Phase 9 MemoryGuard). This was
22
+ * the v3 C20/C21 lying surface and remains the only memory-
23
+ * specific check the verifier performs.
30
24
  *
31
- * Detection:
32
- * 1. Pattern-based (default, $0 cost) — past-tense action verbs matched
33
- * against tool registry. This file owns the table.
34
- * 2. LLM-classified — auxiliary LLM call. Wired via the optional
35
- * `llmAdapter`; defaulted off in Phase 12. Phase 13 turns it on.
25
+ * Modes:
26
+ * off — bypass entirely. No events recorded.
27
+ * detect — Record events; never user-visible. `findings` populated;
28
+ * no `footer`.
29
+ * enforce — DEFAULT. Record events + append a short footer to the
30
+ * assistant reply summarising the unverified outcomes.
31
+ * The footer is APPEND-ONLY — the assistant's text is
32
+ * never rewritten. (This is the key behaviour change vs
33
+ * v4.6.x — append-only, never an in-place edit.)
36
34
  *
37
- * Critical invariant for memory:
38
- * Every memory_add / memory_replace / memory_remove tool result carries
39
- * a `verified` flag (per Phase 9 MemoryGuard). If the model claims
40
- * "I remembered X" but `verified=false`, Honesty MUST flag this — even
41
- * though a memory tool DID fire. This was the v3 C20/C21 lying surface.
42
- *
43
- * Status: PHASE 12.
35
+ * What the verifier intentionally does NOT do (delta vs the deleted
36
+ * scanner):
37
+ * - It does not look at the assistant's natural-language text at all.
38
+ * There's no regex matching of English verbs to tool names.
39
+ * - It does not emit `no_tool_call` findings. The previous "model
40
+ * claimed X but no tool fired" failure mode is gone — that was
41
+ * the false-refusal class. We only record OUTCOMES that ran.
42
+ * - It does not mutate `loopResult.messages`. The caller appends
43
+ * the footer to its own `finalContent` string variable.
44
44
  */
45
45
  Object.defineProperty(exports, "__esModule", { value: true });
46
- exports.__test__ = exports.HonestyEnforcement = void 0;
47
- /** Allows optional adverbs/auxiliaries between "I" and the verb:
48
- * "I have", "I also", "I just", "I successfully", "I have just", etc. */
49
- const I_PREFIX = String.raw `\bI\s+(?:have\s+|just\s+|also\s+|already\s+|successfully\s+|then\s+|now\s+){0,3}`;
50
- const PATTERNS = [
51
- // ── File operations (past tense only) ─────────────────────────
52
- {
53
- pattern: new RegExp(`${I_PREFIX}(?:saved|wrote|created|modified|patched|updated)\\b[^.]*\\b(?:file|to|at|in)\\b`, 'i'),
54
- tools: ['file_write', 'file_patch', 'skill_manage'],
55
- label: 'file_write',
56
- },
57
- {
58
- pattern: new RegExp(`${I_PREFIX}(?:deleted|removed)\\s+(?:the\\s+)?(?:file|directory|folder)\\b`, 'i'),
59
- tools: ['file_delete'],
60
- label: 'file_delete',
61
- },
62
- // ── Web ────────────────────────────────────────────────────────
63
- {
64
- pattern: new RegExp(`${I_PREFIX}(?:searched|looked\\s+up|found(?:\\s+online)?|googled)\\b`, 'i'),
65
- tools: ['web_search', 'deep_research'],
66
- label: 'web_search',
67
- },
68
- {
69
- pattern: new RegExp(`${I_PREFIX}(?:fetched|downloaded|retrieved)\\b`, 'i'),
70
- tools: ['web_fetch', 'fetch_url'],
71
- label: 'web_fetch',
72
- },
73
- // ── Shell / execution ──────────────────────────────────────────
74
- {
75
- pattern: new RegExp(`${I_PREFIX}(?:ran|executed|called)\\b`, 'i'),
76
- tools: ['shell_exec', 'execute_code', 'run_python', 'run_node'],
77
- label: 'shell_exec',
78
- },
79
- // ── Browser ────────────────────────────────────────────────────
80
- {
81
- pattern: new RegExp(`${I_PREFIX}(?:navigated|clicked|typed|scrolled)\\b`, 'i'),
82
- tools: [
83
- 'browser_navigate',
84
- 'open_browser',
85
- 'browser_click',
86
- 'browser_type',
87
- 'browser_scroll',
88
- ],
89
- label: 'browser_action',
90
- },
91
- // ── Memory (verified=true required) ────────────────────────────
92
- {
93
- pattern: new RegExp(`${I_PREFIX}(?:remembered|memori[sz]ed|noted\\s+that|saved\\s+that\\s+to\\s+memory)\\b`, 'i'),
94
- tools: ['memory_add', 'memory_upsert'],
95
- label: 'memory_add',
96
- kind: 'memory',
97
- },
98
- {
99
- pattern: new RegExp(`${I_PREFIX}(?:forgot(?:ten)?|removed)\\b[^.]*\\bmemory\\b`, 'i'),
100
- tools: ['memory_remove', 'memory_forget'],
101
- label: 'memory_remove',
102
- kind: 'memory',
103
- },
104
- // ── Model switch ───────────────────────────────────────────────
105
- {
106
- pattern: new RegExp(`${I_PREFIX}(?:switched\\s+to|changed\\s+(?:to|model\\s+to)|am\\s+now\\s+using)\\s+\\S+`, 'i'),
107
- tools: ['model_switch'],
108
- label: 'model_switch',
109
- },
110
- ];
111
- /** Negation patterns. If matched at the start of a sentence containing
112
- * the claim, the claim is NOT flagged. */
113
- const NEGATION_RE = /\b(?:couldn'?t|cannot|can'?t|wasn'?t\s+able|unable\s+to|failed\s+to|did\s+not|didn'?t|won'?t|will\s+not)\b/i;
46
+ exports.HonestyEnforcement = void 0;
47
+ /**
48
+ * Memory tools whose results carry the `verified` flag set by
49
+ * MemoryGuard. The list is closed — adding a new memory_* tool
50
+ * means extending this set.
51
+ */
52
+ const MEMORY_TOOLS = new Set([
53
+ 'memory_add',
54
+ 'memory_replace',
55
+ 'memory_remove',
56
+ ]);
57
+ /**
58
+ * Read `result.path` when present (file_* tools' result envelopes
59
+ * carry it). Returns undefined otherwise. Used only for cosmetic
60
+ * footer detail — never affects pass/fail outcome.
61
+ */
62
+ function extractPath(result) {
63
+ if (result && typeof result === 'object' && 'path' in result) {
64
+ const p = result.path;
65
+ if (typeof p === 'string')
66
+ return p;
67
+ }
68
+ return undefined;
69
+ }
70
+ /**
71
+ * Translate a `HonestyEvent` to the legacy `HonestyFinding` shape so
72
+ * existing downstream consumers (chatSession, telemetry) keep working.
73
+ * The fine-grained kind is preserved via `reason`.
74
+ */
75
+ function toFinding(event) {
76
+ switch (event.kind) {
77
+ case 'mutation_errored':
78
+ return {
79
+ claim: event.tool,
80
+ expectedTool: event.tool,
81
+ found: false,
82
+ confidence: 1,
83
+ reason: 'tool_errored',
84
+ };
85
+ case 'memory_unverified':
86
+ return {
87
+ claim: event.tool,
88
+ expectedTool: event.tool,
89
+ found: false,
90
+ confidence: 1,
91
+ reason: 'memory_verified_false',
92
+ };
93
+ }
94
+ }
114
95
  class HonestyEnforcement {
115
96
  constructor(mode = 'enforce', llmAdapter, logger) {
116
97
  this.llmAdapter = llmAdapter;
@@ -124,20 +105,63 @@ class HonestyEnforcement {
124
105
  return this.mode;
125
106
  }
126
107
  /**
127
- * Inspect a finished response against the actual tool-call trace.
128
- * Returns a structured result. Caller (AidenAgent) decides whether to
129
- * use `correctedResponse` or `originalResponse` based on `passed`.
108
+ * v4.7.0 Phase 2.3 record deterministic unverified outcomes from
109
+ * the per-turn tool trace. Pure function; no I/O, no side effects.
130
110
  */
131
- async check(response, _messages, toolCallTrace) {
132
- if (this.mode === 'off') {
133
- return {
134
- passed: true,
135
- findings: [],
136
- confidence: 1,
137
- originalResponse: response,
138
- };
111
+ recordOutcomes(trace) {
112
+ const events = [];
113
+ for (const t of trace) {
114
+ if (t.error && t.handlerMutates === true) {
115
+ events.push({
116
+ kind: 'mutation_errored',
117
+ tool: t.name,
118
+ reason: t.error,
119
+ path: extractPath(t.result),
120
+ });
121
+ continue;
122
+ }
123
+ if (MEMORY_TOOLS.has(t.name) && t.verified === false) {
124
+ events.push({
125
+ kind: 'memory_unverified',
126
+ tool: t.name,
127
+ reason: 'verification failed',
128
+ });
129
+ }
139
130
  }
140
- if (!response || !response.trim()) {
131
+ return events;
132
+ }
133
+ /**
134
+ * v4.7.0 Phase 2.3 — render the append-only footer used in enforce
135
+ * mode. Caller concatenates with a blank line; we own the lines
136
+ * inside. Format: one summary line + one row per event.
137
+ */
138
+ buildFooter(events) {
139
+ const lines = [];
140
+ lines.push(`⚠️ Verifier: ${events.length} tool outcome(s) not verified this turn.`);
141
+ for (const e of events) {
142
+ if (e.kind === 'mutation_errored') {
143
+ const where = e.path ? ` (path: ${e.path})` : '';
144
+ lines.push(`- ${e.tool}${where}: errored — ${e.reason}`);
145
+ }
146
+ else {
147
+ lines.push(`- ${e.tool}: not verified`);
148
+ }
149
+ }
150
+ return lines.join('\n');
151
+ }
152
+ /**
153
+ * v4.7.0 Phase 2.3 — entry point. Records outcome events from the
154
+ * trace, converts to legacy `HonestyFinding[]` for downstream
155
+ * consumers, and renders an append-only footer in enforce mode.
156
+ *
157
+ * NEVER rewrites `response`. The returned `footer` is what the
158
+ * caller appends; the original text is preserved verbatim.
159
+ *
160
+ * Off mode short-circuits without touching the trace — minimal cost
161
+ * for users who opt out.
162
+ */
163
+ async check(response, _messages, trace) {
164
+ if (this.mode === 'off') {
141
165
  return {
142
166
  passed: true,
143
167
  findings: [],
@@ -145,145 +169,23 @@ class HonestyEnforcement {
145
169
  originalResponse: response,
146
170
  };
147
171
  }
148
- const findings = this.detectClaimsPattern(response, toolCallTrace);
149
- const failed = findings.filter((f) => !f.found);
150
- const passed = failed.length === 0;
151
- const confidence = findings.length === 0
152
- ? 1
153
- : findings.reduce((s, f) => s + f.confidence, 0) /
154
- findings.length;
155
- if (this.mode === 'detect') {
156
- this.logger?.('info', `[HonestyEnforcement] detect mode: ${findings.length} findings (${failed.length} failed)`);
157
- return {
158
- passed,
159
- findings,
160
- confidence,
161
- originalResponse: response,
162
- };
172
+ const events = this.recordOutcomes(trace);
173
+ const findings = events.map(toFinding);
174
+ const passed = findings.length === 0;
175
+ let footer;
176
+ if (this.mode === 'enforce' && !passed) {
177
+ footer = this.buildFooter(events);
163
178
  }
164
- // enforce mode
165
- let correctedResponse;
166
179
  if (!passed) {
167
- correctedResponse = this.buildCorrection(response, failed, toolCallTrace);
168
- this.logger?.('warn', `[HonestyEnforcement] enforce: rewrote response (${failed.length} failed claims)`);
180
+ this.logger?.('info', `honesty: ${events.length} unverified outcome(s) this turn`);
169
181
  }
170
182
  return {
171
183
  passed,
172
184
  findings,
173
- confidence,
185
+ confidence: 1,
174
186
  originalResponse: response,
175
- correctedResponse,
187
+ footer,
176
188
  };
177
189
  }
178
- // ─────────────────────────────────────────────────────────────────────
179
- // pattern detection
180
- // ─────────────────────────────────────────────────────────────────────
181
- detectClaimsPattern(response, trace) {
182
- const findings = [];
183
- const sentences = splitSentences(response);
184
- for (const sentence of sentences) {
185
- // Skip negated sentences entirely.
186
- if (NEGATION_RE.test(sentence))
187
- continue;
188
- for (const pat of PATTERNS) {
189
- if (!pat.pattern.test(sentence))
190
- continue;
191
- const matched = sentence.match(pat.pattern);
192
- const claimText = matched?.[0] ?? sentence.trim();
193
- const found = this.traceSatisfies(pat, trace);
194
- let reason;
195
- if (!found) {
196
- if (pat.kind === 'memory' && memoryFiredButUnverified(pat, trace)) {
197
- reason = 'memory_verified_false';
198
- }
199
- else if (toolFiredButErrored(pat, trace)) {
200
- reason = 'tool_errored';
201
- }
202
- else {
203
- reason = 'no_tool_call';
204
- }
205
- }
206
- findings.push({
207
- claim: claimText.trim(),
208
- expectedTool: pat.tools.length === 1 ? pat.tools[0] : pat.tools,
209
- found,
210
- confidence: 0.8,
211
- reason,
212
- });
213
- }
214
- }
215
- return findings;
216
- }
217
- traceSatisfies(pat, trace) {
218
- const matching = trace.filter((t) => pat.tools.includes(t.name) && !t.error);
219
- if (matching.length === 0)
220
- return false;
221
- if (pat.kind === 'memory') {
222
- // verified must be explicitly true
223
- return matching.some((m) => m.verified === true);
224
- }
225
- return true;
226
- }
227
- // ─────────────────────────────────────────────────────────────────────
228
- // correction builder
229
- // ─────────────────────────────────────────────────────────────────────
230
- buildCorrection(_original, failed, trace) {
231
- const lines = [];
232
- lines.push("I shouldn't claim actions I didn't take. Honest summary of what I actually did:");
233
- lines.push('');
234
- if (trace.length === 0) {
235
- lines.push('- No tools were called this turn.');
236
- }
237
- else {
238
- for (const entry of trace) {
239
- const status = entry.error ? `errored (${entry.error})` : 'succeeded';
240
- const verified = entry.verified === false
241
- ? ' (NOT VERIFIED)'
242
- : entry.verified === true
243
- ? ' (verified)'
244
- : '';
245
- lines.push(`- ${entry.name}: ${status}${verified}`);
246
- }
247
- }
248
- lines.push('');
249
- lines.push('Refused claims:');
250
- for (const f of failed) {
251
- const tool = Array.isArray(f.expectedTool)
252
- ? f.expectedTool.join('/')
253
- : f.expectedTool;
254
- const why = f.reason === 'memory_verified_false'
255
- ? `(memory write returned verified=false — fact was not stored)`
256
- : f.reason === 'tool_errored'
257
- ? `(tool errored)`
258
- : `(no ${tool} call in trace)`;
259
- lines.push(`- "${f.claim}" ${why}`);
260
- }
261
- return lines.join('\n');
262
- }
263
190
  }
264
191
  exports.HonestyEnforcement = HonestyEnforcement;
265
- // ─────────────────────────────────────────────────────────────────────
266
- // helpers (exported for tests)
267
- // ─────────────────────────────────────────────────────────────────────
268
- function splitSentences(text) {
269
- // Split on sentence terminators while keeping reasonable bounds.
270
- // Don't try to be clever about abbreviations — false positives are
271
- // benign (we just inspect more granular slices).
272
- return text
273
- .split(/(?<=[.!?])\s+|\n+/)
274
- .map((s) => s.trim())
275
- .filter((s) => s.length > 0);
276
- }
277
- function memoryFiredButUnverified(pat, trace) {
278
- if (pat.kind !== 'memory')
279
- return false;
280
- return trace.some((t) => pat.tools.includes(t.name) && !t.error && t.verified === false);
281
- }
282
- function toolFiredButErrored(pat, trace) {
283
- return trace.some((t) => pat.tools.includes(t.name) && !!t.error);
284
- }
285
- exports.__test__ = {
286
- splitSentences,
287
- PATTERNS,
288
- NEGATION_RE,
289
- };
@@ -0,0 +1,60 @@
1
+ "use strict";
2
+ /**
3
+ * Copyright (c) 2026 Shiva Deore (Taracod).
4
+ * Licensed under AGPL-3.0. See LICENSE for details.
5
+ *
6
+ * Aiden — local-first agent.
7
+ */
8
+ /**
9
+ * tools/v4/ui/_uiSmokeTool.ts — v4.7 Slice 1 smoke harness.
10
+ *
11
+ * Internal tool used ONLY to verify the uiOnly dispatch seam from
12
+ * Slice 1 (ToolHandler.uiOnly + resolveUiOnly + onUiEvent +
13
+ * Display.renderUiEvent). NOT for end-user LLM workflows.
14
+ *
15
+ * Registered behind `AIDEN_UI_SMOKE=1` env flag in
16
+ * `tools/v4/index.ts::registerAllTools`. Will be deleted once
17
+ * Slice 2 lands the real ui_task_update / ui_task_done tools.
18
+ *
19
+ * When invoked, the agent's dispatch loop:
20
+ * - resolves uiOnly=true via the resolveUiOnly closure
21
+ * - fires runOptions.onUiEvent('_ui_smoke', args)
22
+ * - SKIPS execute() entirely
23
+ * - SKIPS turnToolMessages push + toolCallCount increment + verifier
24
+ *
25
+ * The handler's `execute` MUST never be called by the dispatch path
26
+ * when uiOnly is honoured. Throws if reached — that throw is a
27
+ * regression alarm.
28
+ */
29
+ Object.defineProperty(exports, "__esModule", { value: true });
30
+ exports.uiSmokeTool = void 0;
31
+ exports.uiSmokeTool = {
32
+ schema: {
33
+ name: '_ui_smoke',
34
+ description: 'Internal smoke-test tool for the v4.7 uiOnly dispatch path. ' +
35
+ 'Renders a single debug line through the UI event seam. ' +
36
+ 'Does NOT round-trip back to the model. Only available when ' +
37
+ 'AIDEN_UI_SMOKE=1.',
38
+ inputSchema: {
39
+ type: 'object',
40
+ properties: {
41
+ message: {
42
+ type: 'string',
43
+ description: 'Free-text payload echoed in the rendered debug line.',
44
+ },
45
+ },
46
+ required: ['message'],
47
+ },
48
+ },
49
+ category: 'read',
50
+ mutates: false,
51
+ uiOnly: true,
52
+ execute() {
53
+ // Defensive — if `resolveUiOnly` is wired correctly, the
54
+ // dispatch loop short-circuits BEFORE reaching this body. A
55
+ // call here means the resolver returned false/undefined and
56
+ // the seam regressed. Throwing surfaces the regression at
57
+ // smoke time instead of silently behaving like a regular tool.
58
+ throw new Error('_ui_smoke.execute() should never be called — uiOnly dispatch path regressed');
59
+ },
60
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "aiden-runtime",
3
- "version": "4.6.0",
3
+ "version": "4.7.0",
4
4
  "publishConfig": {
5
5
  "access": "public"
6
6
  },
@@ -254,7 +254,7 @@
254
254
  "epub2": "^3.0.2",
255
255
  "execa": "^8.0.1",
256
256
  "express": "^4.18.2",
257
- "form-data": "^4.0.0",
257
+ "form-data": "^4.0.4",
258
258
  "imap-simple": "^5.1.0",
259
259
  "js-tiktoken": "^1.0.21",
260
260
  "js-yaml": "^4.1.1",
@@ -284,7 +284,7 @@
284
284
  "uuid": "^9.0.0",
285
285
  "whatsapp-web.js": "^1.26.0",
286
286
  "wrap-ansi": "^9.0.2",
287
- "ws": "^8.20.0"
287
+ "ws": "^8.20.1"
288
288
  },
289
289
  "optionalDependencies": {
290
290
  "decibri": "*",
@@ -296,7 +296,13 @@
296
296
  "semver": "^7.5.2",
297
297
  "postcss": "^8.5.10",
298
298
  "hono": "^4.12.16",
299
- "minimatch": "^9.0.9"
299
+ "minimatch": "^9.0.9",
300
+ "qs": ">=6.14.1",
301
+ "tough-cookie": ">=4.1.3",
302
+ "protobufjs": ">=7.5.8",
303
+ "request": {
304
+ "form-data": "^2.5.5"
305
+ }
300
306
  },
301
307
  "devDependencies": {
302
308
  "@types/better-sqlite3": "^7.6.13",