aiden-runtime 4.6.1 → 4.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1470,6 +1470,10 @@ async function buildAgentRuntime(cliOpts, opts) {
1470
1470
  resolveVerifiedFlag,
1471
1471
  resolveToolset,
1472
1472
  resolveMutates,
1473
+ // v4.7.0 Phase 2.4 — share the REPL's config-resolved honesty mode
1474
+ // with daemon-built agents so autonomous turns honour the same
1475
+ // setting interactive turns do.
1476
+ honestyMode,
1473
1477
  maxTurns: config.getValue('agent.max_turns', 90),
1474
1478
  });
1475
1479
  // Phase v4.1.2 alive-core: SOUL.md file watcher. Best-effort —
@@ -40,6 +40,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
40
40
  exports.buildDaemonAgentBuilder = buildDaemonAgentBuilder;
41
41
  const aidenAgent_1 = require("../../core/v4/aidenAgent");
42
42
  const approvalEngine_1 = require("../../moat/approvalEngine");
43
+ const honestyEnforcement_1 = require("../../moat/honestyEnforcement");
43
44
  // ── Implementation ─────────────────────────────────────────────────────────
44
45
  const DEFAULT_MAX_TURNS = 90;
45
46
  /**
@@ -112,11 +113,18 @@ function buildDaemonAgentBuilder(deps) {
112
113
  resolveMutates: deps.resolveMutates,
113
114
  // Memory snapshot refresh — daemon agent doesn't track dirty
114
115
  // bits because each instance is short-lived; we provide the
115
- // refresh callback so honestyEnforcement-style consumers (when
116
- // we add them) can still rebuild.
116
+ // refresh callback so honestyEnforcement (and any future
117
+ // consumer that needs a current memory snapshot) can rebuild.
117
118
  refreshMemorySnapshot: () => deps.memoryManager.loadSnapshot(),
118
- // Scope cuts (Phase 7b): no plannerGuard, no honestyEnforcement,
119
- // no skillTeacher, no skillMiner. These add LLM calls + state
119
+ // v4.7.0 Phase 2.4 HonestyEnforcement is now structural
120
+ // (reads tool trace only, no natural-language scanning) and
121
+ // cheap enough to run on autonomous daemon turns. Mode mirrors
122
+ // the REPL's config-resolved value (default 'enforce'); the
123
+ // footer appended in enforce mode is captured by the daemon
124
+ // dispatcher's run_events and surfaced in the channel reply.
125
+ honestyEnforcement: new honestyEnforcement_1.HonestyEnforcement(deps.honestyMode ?? 'enforce'),
126
+ // Scope cuts (Phase 7b, still deferred): no plannerGuard, no
127
+ // skillTeacher, no skillMiner. These add LLM calls + state
120
128
  // that don't fit the daemon's "fire and act" pattern.
121
129
  });
122
130
  // Q-P7b-4(b) — minimal per-turn stdout line for tail-friendly
@@ -372,25 +372,21 @@ class AidenAgent {
372
372
  // 8. Run the tool-calling loop.
373
373
  const loopResult = await this.runTurnLoop(messages, narrowedTools, trackers, options);
374
374
  // 9. Honesty post-loop scan (only if loop ended with a normal stop).
375
+ //
376
+ // v4.7.0 Phase 2.3 — the verifier now records deterministic
377
+ // outcome events from `toolCallTrace` (not regex over the
378
+ // assistant's text). When `findings.length > 0` AND mode is
379
+ // `enforce`, it returns an append-only `footer` we concatenate
380
+ // to `finalContent`. The model's text is NEVER rewritten —
381
+ // that was the v4.6.x failure mode this verifier replaces.
375
382
  let honestyFindings;
376
383
  let finalContent = loopResult.finalContent;
377
384
  if (this.honestyEnforcement && loopResult.finishReason === 'stop') {
378
385
  try {
379
386
  const scan = await this.honestyEnforcement.check(finalContent, loopResult.messages, loopResult.toolCallTrace);
380
- if (!scan.passed) {
381
- honestyFindings = scan.findings;
382
- if (scan.correctedResponse) {
383
- finalContent = scan.correctedResponse;
384
- // Reflect the corrected text in the message history too so
385
- // /debug-prompt and /usage agree on the final string.
386
- for (let i = loopResult.messages.length - 1; i >= 0; i--) {
387
- const m = loopResult.messages[i];
388
- if (m.role === 'assistant' && (!m.toolCalls || m.toolCalls.length === 0)) {
389
- loopResult.messages[i].content = finalContent;
390
- break;
391
- }
392
- }
393
- }
387
+ honestyFindings = scan.findings;
388
+ if (scan.footer) {
389
+ finalContent = `${finalContent}\n\n${scan.footer}`;
394
390
  }
395
391
  }
396
392
  catch {
@@ -970,6 +966,15 @@ class AidenAgent {
970
966
  result: result.result,
971
967
  error: result.error,
972
968
  verified: this.resolveVerifiedFlag?.(result),
969
+ // v4.7.0 Phase 2.3 — stamp the handler's `mutates` flag
970
+ // at dispatch time so the post-loop honesty verifier can
971
+ // distinguish mutating vs read-only failures without
972
+ // needing a registry handle. Defaults to `false` for
973
+ // unknown tools (the resolver returns undefined) — read-
974
+ // only tools that error are surfaced via the tool-trail
975
+ // row already; the verifier deliberately stays quiet
976
+ // about them.
977
+ handlerMutates: this.resolveMutates?.(call.name) ?? false,
973
978
  // v4.2 Phase 1 — verification surfaces alongside the trace
974
979
  // entry for downstream callers (chatSession, loopTrace,
975
980
  // future RecoveryReport). Undefined when TCE is off.
@@ -94,7 +94,7 @@ function expandPathInline(input, cwd) {
94
94
  }
95
95
  /**
96
96
  * Boundary-aware containment check. `path.relative` avoids the
97
- * `/home/user-evil` vs `/home/user` false positive that a naive
97
+ * `<root>/user-evil` vs `<root>/user` false positive that a naive
98
98
  * `startsWith` would produce.
99
99
  */
100
100
  function isWithin(child, parent) {
@@ -33,6 +33,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
33
33
  exports.ProviderNotFoundError = exports.SUBAGENT_BLOCKED_TOOL_NAMES = void 0;
34
34
  exports.buildChildAgent = buildChildAgent;
35
35
  const approvalEngine_1 = require("../../../moat/approvalEngine");
36
+ const honestyEnforcement_1 = require("../../../moat/honestyEnforcement");
36
37
  const aidenAgent_1 = require("../aidenAgent");
37
38
  const providerFallback_1 = require("../providerFallback");
38
39
  // ── Hard-coded blocklist (Q5 from design doc §2) ────────────────────────────
@@ -177,10 +178,16 @@ function buildChildAgent(deps, input) {
177
178
  // Pure no-op when runStore is absent (unit tests of buildChildAgent).
178
179
  const onToolCall = buildOnToolCall(deps);
179
180
  // ── 7. Build the child agent ─────────────────────────────────────────────
180
- // Focused worker config: omit plannerGuard, honestyEnforcement,
181
- // skillTeacher, skillMiner, contextCompressor, promptCaching,
182
- // promptBuilder. Match the daemon agent's "act on the task, don't
183
- // self-improve" shape.
181
+ // Focused worker config: omit plannerGuard, skillTeacher, skillMiner,
182
+ // contextCompressor, promptCaching, promptBuilder. Match the daemon
183
+ // agent's "act on the task, don't self-improve" shape.
184
+ //
185
+ // v4.7.0: HonestyEnforcement is now structural (reads tool trace only,
186
+ // no natural-language scanning) and cheap enough to run in subagents.
187
+ // Mode is 'detect' here — events are captured into the child's run
188
+ // record but never produce user-visible output (subagents have no
189
+ // chat surface; the parent assembles their summary).
190
+ const childHonestyEnforcement = new honestyEnforcement_1.HonestyEnforcement('detect');
184
191
  const agent = new aidenAgent_1.AidenAgent({
185
192
  provider: childProvider,
186
193
  tools: childTools,
@@ -192,6 +199,7 @@ function buildChildAgent(deps, input) {
192
199
  resolveVerifiedFlag: deps.resolveVerifiedFlag,
193
200
  resolveToolset: deps.resolveToolset,
194
201
  resolveMutates: deps.resolveMutates,
202
+ honestyEnforcement: childHonestyEnforcement,
195
203
  onToolCall,
196
204
  // iterationBudgetInjection inherits the default (true) — child
197
205
  // sees its own remaining-budget hint near the end of the run.
@@ -2,4 +2,4 @@
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.VERSION = void 0;
4
4
  // AUTO-GENERATED by scripts/inject-version.js — do not edit by hand
5
- exports.VERSION = '4.6.1';
5
+ exports.VERSION = '4.7.0';
@@ -6,111 +6,92 @@
6
6
  * Aiden — local-first agent.
7
7
  */
8
8
  /**
9
- * moat/honestyEnforcement.ts — Aiden v4.0.0
9
+ * moat/honestyEnforcement.ts — Aiden v4.7.0 (Phase 2.3 — outcome-based verifier)
10
10
  *
11
- * Post-loop trace inspector. Runs after AidenAgent returns its final
12
- * response. Compares the response's stated actions to the actual tool
13
- * calls in the trace. If the model claims it did something but the trace
14
- * says no tool fired (or fired and failed verification), Honesty refuses
15
- * the claim and rewrites the response.
11
+ * The regex-based natural-language claim scanner (deleted in Phase 2.2)
12
+ * has been replaced with a deterministic outcome recorder that consumes
13
+ * `toolCallTrace` structurally. Two failure modes are recorded:
16
14
  *
17
- * The failure modes this catches:
18
- * - "I saved your file to ~/notes/today.md" → no file_write call
19
- * - "I sent the email" → no email tool call
20
- * - "I remembered that" → no memory_add OR memory_add returned verified=false
21
- * - "I searched the web" → no web_search call
22
- * - "I ran X" → no shell_exec call
15
+ * 1. mutation_errored — a tool tagged `mutates: true` (in the
16
+ * registry, stamped onto trace entries at dispatch time via
17
+ * `handlerMutates`) returned an `error` envelope. Path is
18
+ * extracted from `result.path` when present.
23
19
  *
24
- * Three modes:
25
- * off passes everything; no inspection.
26
- * detect — runs checks and populates findings, but does NOT modify the
27
- * response. Useful for telemetry / canary measurement.
28
- * enforce — DEFAULT. Rewrites failed claims into honest text that lists
29
- * the actual trace summary.
20
+ * 2. memory_unverified — a memory_* tool's result carries
21
+ * `verified === false` (per Phase 9 MemoryGuard). This was
22
+ * the v3 C20/C21 lying surface and remains the only memory-
23
+ * specific check the verifier performs.
30
24
  *
31
- * Detection:
32
- * 1. Pattern-based (default, $0 cost) — past-tense action verbs matched
33
- * against tool registry. This file owns the table.
34
- * 2. LLM-classified — auxiliary LLM call. Wired via the optional
35
- * `llmAdapter`; defaulted off in Phase 12. Phase 13 turns it on.
25
+ * Modes:
26
+ * off — bypass entirely. No events recorded.
27
+ * detect — Record events; never user-visible. `findings` populated;
28
+ * no `footer`.
29
+ * enforce — DEFAULT. Record events + append a short footer to the
30
+ * assistant reply summarising the unverified outcomes.
31
+ * The footer is APPEND-ONLY — the assistant's text is
32
+ * never rewritten. (This is the key behaviour change vs
33
+ * v4.6.x — append-only, never an in-place edit.)
36
34
  *
37
- * Critical invariant for memory:
38
- * Every memory_add / memory_replace / memory_remove tool result carries
39
- * a `verified` flag (per Phase 9 MemoryGuard). If the model claims
40
- * "I remembered X" but `verified=false`, Honesty MUST flag this — even
41
- * though a memory tool DID fire. This was the v3 C20/C21 lying surface.
42
- *
43
- * Status: PHASE 12.
35
+ * What the verifier intentionally does NOT do (delta vs the deleted
36
+ * scanner):
37
+ * - It does not look at the assistant's natural-language text at all.
38
+ * There's no regex matching of English verbs to tool names.
39
+ * - It does not emit `no_tool_call` findings. The previous "model
40
+ * claimed X but no tool fired" failure mode is gone — that was
41
+ * the false-refusal class. We only record OUTCOMES that ran.
42
+ * - It does not mutate `loopResult.messages`. The caller appends
43
+ * the footer to its own `finalContent` string variable.
44
44
  */
45
45
  Object.defineProperty(exports, "__esModule", { value: true });
46
- exports.__test__ = exports.HonestyEnforcement = void 0;
47
- /** Allows optional adverbs/auxiliaries between "I" and the verb:
48
- * "I have", "I also", "I just", "I successfully", "I have just", etc. */
49
- const I_PREFIX = String.raw `\bI\s+(?:have\s+|just\s+|also\s+|already\s+|successfully\s+|then\s+|now\s+){0,3}`;
50
- const PATTERNS = [
51
- // ── File operations (past tense only) ─────────────────────────
52
- {
53
- pattern: new RegExp(`${I_PREFIX}(?:saved|wrote|created|modified|patched|updated)\\b[^.]*\\b(?:file|to|at|in)\\b`, 'i'),
54
- tools: ['file_write', 'file_patch', 'skill_manage'],
55
- label: 'file_write',
56
- },
57
- {
58
- pattern: new RegExp(`${I_PREFIX}(?:deleted|removed)\\s+(?:the\\s+)?(?:file|directory|folder)\\b`, 'i'),
59
- tools: ['file_delete'],
60
- label: 'file_delete',
61
- },
62
- // ── Web ────────────────────────────────────────────────────────
63
- {
64
- pattern: new RegExp(`${I_PREFIX}(?:searched|looked\\s+up|found(?:\\s+online)?|googled)\\b`, 'i'),
65
- tools: ['web_search', 'deep_research'],
66
- label: 'web_search',
67
- },
68
- {
69
- pattern: new RegExp(`${I_PREFIX}(?:fetched|downloaded|retrieved)\\b`, 'i'),
70
- tools: ['web_fetch', 'fetch_url'],
71
- label: 'web_fetch',
72
- },
73
- // ── Shell / execution ──────────────────────────────────────────
74
- {
75
- pattern: new RegExp(`${I_PREFIX}(?:ran|executed|called)\\b`, 'i'),
76
- tools: ['shell_exec', 'execute_code', 'run_python', 'run_node'],
77
- label: 'shell_exec',
78
- },
79
- // ── Browser ────────────────────────────────────────────────────
80
- {
81
- pattern: new RegExp(`${I_PREFIX}(?:navigated|clicked|typed|scrolled)\\b`, 'i'),
82
- tools: [
83
- 'browser_navigate',
84
- 'open_browser',
85
- 'browser_click',
86
- 'browser_type',
87
- 'browser_scroll',
88
- ],
89
- label: 'browser_action',
90
- },
91
- // ── Memory (verified=true required) ────────────────────────────
92
- {
93
- pattern: new RegExp(`${I_PREFIX}(?:remembered|memori[sz]ed|noted\\s+that|saved\\s+that\\s+to\\s+memory)\\b`, 'i'),
94
- tools: ['memory_add', 'memory_upsert'],
95
- label: 'memory_add',
96
- kind: 'memory',
97
- },
98
- {
99
- pattern: new RegExp(`${I_PREFIX}(?:forgot(?:ten)?|removed)\\b[^.]*\\bmemory\\b`, 'i'),
100
- tools: ['memory_remove', 'memory_forget'],
101
- label: 'memory_remove',
102
- kind: 'memory',
103
- },
104
- // ── Model switch ───────────────────────────────────────────────
105
- {
106
- pattern: new RegExp(`${I_PREFIX}(?:switched\\s+to|changed\\s+(?:to|model\\s+to)|am\\s+now\\s+using)\\s+\\S+`, 'i'),
107
- tools: ['model_switch'],
108
- label: 'model_switch',
109
- },
110
- ];
111
- /** Negation patterns. If matched at the start of a sentence containing
112
- * the claim, the claim is NOT flagged. */
113
- const NEGATION_RE = /\b(?:couldn'?t|cannot|can'?t|wasn'?t\s+able|unable\s+to|failed\s+to|did\s+not|didn'?t|won'?t|will\s+not)\b/i;
46
+ exports.HonestyEnforcement = void 0;
47
+ /**
48
+ * Memory tools whose results carry the `verified` flag set by
49
+ * MemoryGuard. The list is closed — adding a new memory_* tool
50
+ * means extending this set.
51
+ */
52
+ const MEMORY_TOOLS = new Set([
53
+ 'memory_add',
54
+ 'memory_replace',
55
+ 'memory_remove',
56
+ ]);
57
+ /**
58
+ * Read `result.path` when present (file_* tools' result envelopes
59
+ * carry it). Returns undefined otherwise. Used only for cosmetic
60
+ * footer detail — never affects pass/fail outcome.
61
+ */
62
+ function extractPath(result) {
63
+ if (result && typeof result === 'object' && 'path' in result) {
64
+ const p = result.path;
65
+ if (typeof p === 'string')
66
+ return p;
67
+ }
68
+ return undefined;
69
+ }
70
+ /**
71
+ * Translate a `HonestyEvent` to the legacy `HonestyFinding` shape so
72
+ * existing downstream consumers (chatSession, telemetry) keep working.
73
+ * The fine-grained kind is preserved via `reason`.
74
+ */
75
+ function toFinding(event) {
76
+ switch (event.kind) {
77
+ case 'mutation_errored':
78
+ return {
79
+ claim: event.tool,
80
+ expectedTool: event.tool,
81
+ found: false,
82
+ confidence: 1,
83
+ reason: 'tool_errored',
84
+ };
85
+ case 'memory_unverified':
86
+ return {
87
+ claim: event.tool,
88
+ expectedTool: event.tool,
89
+ found: false,
90
+ confidence: 1,
91
+ reason: 'memory_verified_false',
92
+ };
93
+ }
94
+ }
114
95
  class HonestyEnforcement {
115
96
  constructor(mode = 'enforce', llmAdapter, logger) {
116
97
  this.llmAdapter = llmAdapter;
@@ -124,20 +105,63 @@ class HonestyEnforcement {
124
105
  return this.mode;
125
106
  }
126
107
  /**
127
- * Inspect a finished response against the actual tool-call trace.
128
- * Returns a structured result. Caller (AidenAgent) decides whether to
129
- * use `correctedResponse` or `originalResponse` based on `passed`.
108
+ * v4.7.0 Phase 2.3 record deterministic unverified outcomes from
109
+ * the per-turn tool trace. Pure function; no I/O, no side effects.
130
110
  */
131
- async check(response, _messages, toolCallTrace) {
132
- if (this.mode === 'off') {
133
- return {
134
- passed: true,
135
- findings: [],
136
- confidence: 1,
137
- originalResponse: response,
138
- };
111
+ recordOutcomes(trace) {
112
+ const events = [];
113
+ for (const t of trace) {
114
+ if (t.error && t.handlerMutates === true) {
115
+ events.push({
116
+ kind: 'mutation_errored',
117
+ tool: t.name,
118
+ reason: t.error,
119
+ path: extractPath(t.result),
120
+ });
121
+ continue;
122
+ }
123
+ if (MEMORY_TOOLS.has(t.name) && t.verified === false) {
124
+ events.push({
125
+ kind: 'memory_unverified',
126
+ tool: t.name,
127
+ reason: 'verification failed',
128
+ });
129
+ }
139
130
  }
140
- if (!response || !response.trim()) {
131
+ return events;
132
+ }
133
+ /**
134
+ * v4.7.0 Phase 2.3 — render the append-only footer used in enforce
135
+ * mode. Caller concatenates with a blank line; we own the lines
136
+ * inside. Format: one summary line + one row per event.
137
+ */
138
+ buildFooter(events) {
139
+ const lines = [];
140
+ lines.push(`⚠️ Verifier: ${events.length} tool outcome(s) not verified this turn.`);
141
+ for (const e of events) {
142
+ if (e.kind === 'mutation_errored') {
143
+ const where = e.path ? ` (path: ${e.path})` : '';
144
+ lines.push(`- ${e.tool}${where}: errored — ${e.reason}`);
145
+ }
146
+ else {
147
+ lines.push(`- ${e.tool}: not verified`);
148
+ }
149
+ }
150
+ return lines.join('\n');
151
+ }
152
+ /**
153
+ * v4.7.0 Phase 2.3 — entry point. Records outcome events from the
154
+ * trace, converts to legacy `HonestyFinding[]` for downstream
155
+ * consumers, and renders an append-only footer in enforce mode.
156
+ *
157
+ * NEVER rewrites `response`. The returned `footer` is what the
158
+ * caller appends; the original text is preserved verbatim.
159
+ *
160
+ * Off mode short-circuits without touching the trace — minimal cost
161
+ * for users who opt out.
162
+ */
163
+ async check(response, _messages, trace) {
164
+ if (this.mode === 'off') {
141
165
  return {
142
166
  passed: true,
143
167
  findings: [],
@@ -145,145 +169,23 @@ class HonestyEnforcement {
145
169
  originalResponse: response,
146
170
  };
147
171
  }
148
- const findings = this.detectClaimsPattern(response, toolCallTrace);
149
- const failed = findings.filter((f) => !f.found);
150
- const passed = failed.length === 0;
151
- const confidence = findings.length === 0
152
- ? 1
153
- : findings.reduce((s, f) => s + f.confidence, 0) /
154
- findings.length;
155
- if (this.mode === 'detect') {
156
- this.logger?.('info', `[HonestyEnforcement] detect mode: ${findings.length} findings (${failed.length} failed)`);
157
- return {
158
- passed,
159
- findings,
160
- confidence,
161
- originalResponse: response,
162
- };
172
+ const events = this.recordOutcomes(trace);
173
+ const findings = events.map(toFinding);
174
+ const passed = findings.length === 0;
175
+ let footer;
176
+ if (this.mode === 'enforce' && !passed) {
177
+ footer = this.buildFooter(events);
163
178
  }
164
- // enforce mode
165
- let correctedResponse;
166
179
  if (!passed) {
167
- correctedResponse = this.buildCorrection(response, failed, toolCallTrace);
168
- this.logger?.('warn', `[HonestyEnforcement] enforce: rewrote response (${failed.length} failed claims)`);
180
+ this.logger?.('info', `honesty: ${events.length} unverified outcome(s) this turn`);
169
181
  }
170
182
  return {
171
183
  passed,
172
184
  findings,
173
- confidence,
185
+ confidence: 1,
174
186
  originalResponse: response,
175
- correctedResponse,
187
+ footer,
176
188
  };
177
189
  }
178
- // ─────────────────────────────────────────────────────────────────────
179
- // pattern detection
180
- // ─────────────────────────────────────────────────────────────────────
181
- detectClaimsPattern(response, trace) {
182
- const findings = [];
183
- const sentences = splitSentences(response);
184
- for (const sentence of sentences) {
185
- // Skip negated sentences entirely.
186
- if (NEGATION_RE.test(sentence))
187
- continue;
188
- for (const pat of PATTERNS) {
189
- if (!pat.pattern.test(sentence))
190
- continue;
191
- const matched = sentence.match(pat.pattern);
192
- const claimText = matched?.[0] ?? sentence.trim();
193
- const found = this.traceSatisfies(pat, trace);
194
- let reason;
195
- if (!found) {
196
- if (pat.kind === 'memory' && memoryFiredButUnverified(pat, trace)) {
197
- reason = 'memory_verified_false';
198
- }
199
- else if (toolFiredButErrored(pat, trace)) {
200
- reason = 'tool_errored';
201
- }
202
- else {
203
- reason = 'no_tool_call';
204
- }
205
- }
206
- findings.push({
207
- claim: claimText.trim(),
208
- expectedTool: pat.tools.length === 1 ? pat.tools[0] : pat.tools,
209
- found,
210
- confidence: 0.8,
211
- reason,
212
- });
213
- }
214
- }
215
- return findings;
216
- }
217
- traceSatisfies(pat, trace) {
218
- const matching = trace.filter((t) => pat.tools.includes(t.name) && !t.error);
219
- if (matching.length === 0)
220
- return false;
221
- if (pat.kind === 'memory') {
222
- // verified must be explicitly true
223
- return matching.some((m) => m.verified === true);
224
- }
225
- return true;
226
- }
227
- // ─────────────────────────────────────────────────────────────────────
228
- // correction builder
229
- // ─────────────────────────────────────────────────────────────────────
230
- buildCorrection(_original, failed, trace) {
231
- const lines = [];
232
- lines.push("I shouldn't claim actions I didn't take. Honest summary of what I actually did:");
233
- lines.push('');
234
- if (trace.length === 0) {
235
- lines.push('- No tools were called this turn.');
236
- }
237
- else {
238
- for (const entry of trace) {
239
- const status = entry.error ? `errored (${entry.error})` : 'succeeded';
240
- const verified = entry.verified === false
241
- ? ' (NOT VERIFIED)'
242
- : entry.verified === true
243
- ? ' (verified)'
244
- : '';
245
- lines.push(`- ${entry.name}: ${status}${verified}`);
246
- }
247
- }
248
- lines.push('');
249
- lines.push('Refused claims:');
250
- for (const f of failed) {
251
- const tool = Array.isArray(f.expectedTool)
252
- ? f.expectedTool.join('/')
253
- : f.expectedTool;
254
- const why = f.reason === 'memory_verified_false'
255
- ? `(memory write returned verified=false — fact was not stored)`
256
- : f.reason === 'tool_errored'
257
- ? `(tool errored)`
258
- : `(no ${tool} call in trace)`;
259
- lines.push(`- "${f.claim}" ${why}`);
260
- }
261
- return lines.join('\n');
262
- }
263
190
  }
264
191
  exports.HonestyEnforcement = HonestyEnforcement;
265
- // ─────────────────────────────────────────────────────────────────────
266
- // helpers (exported for tests)
267
- // ─────────────────────────────────────────────────────────────────────
268
- function splitSentences(text) {
269
- // Split on sentence terminators while keeping reasonable bounds.
270
- // Don't try to be clever about abbreviations — false positives are
271
- // benign (we just inspect more granular slices).
272
- return text
273
- .split(/(?<=[.!?])\s+|\n+/)
274
- .map((s) => s.trim())
275
- .filter((s) => s.length > 0);
276
- }
277
- function memoryFiredButUnverified(pat, trace) {
278
- if (pat.kind !== 'memory')
279
- return false;
280
- return trace.some((t) => pat.tools.includes(t.name) && !t.error && t.verified === false);
281
- }
282
- function toolFiredButErrored(pat, trace) {
283
- return trace.some((t) => pat.tools.includes(t.name) && !!t.error);
284
- }
285
- exports.__test__ = {
286
- splitSentences,
287
- PATTERNS,
288
- NEGATION_RE,
289
- };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "aiden-runtime",
3
- "version": "4.6.1",
3
+ "version": "4.7.0",
4
4
  "publishConfig": {
5
5
  "access": "public"
6
6
  },
@@ -254,7 +254,7 @@
254
254
  "epub2": "^3.0.2",
255
255
  "execa": "^8.0.1",
256
256
  "express": "^4.18.2",
257
- "form-data": "^4.0.0",
257
+ "form-data": "^4.0.4",
258
258
  "imap-simple": "^5.1.0",
259
259
  "js-tiktoken": "^1.0.21",
260
260
  "js-yaml": "^4.1.1",
@@ -284,7 +284,7 @@
284
284
  "uuid": "^9.0.0",
285
285
  "whatsapp-web.js": "^1.26.0",
286
286
  "wrap-ansi": "^9.0.2",
287
- "ws": "^8.20.0"
287
+ "ws": "^8.20.1"
288
288
  },
289
289
  "optionalDependencies": {
290
290
  "decibri": "*",
@@ -296,7 +296,13 @@
296
296
  "semver": "^7.5.2",
297
297
  "postcss": "^8.5.10",
298
298
  "hono": "^4.12.16",
299
- "minimatch": "^9.0.9"
299
+ "minimatch": "^9.0.9",
300
+ "qs": ">=6.14.1",
301
+ "tough-cookie": ">=4.1.3",
302
+ "protobufjs": ">=7.5.8",
303
+ "request": {
304
+ "form-data": "^2.5.5"
305
+ }
300
306
  },
301
307
  "devDependencies": {
302
308
  "@types/better-sqlite3": "^7.6.13",