aiden-runtime 4.6.1 → 4.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +488 -265
- package/dist/cli/v4/aidenCLI.js +4 -0
- package/dist/cli/v4/daemonAgentBuilder.js +12 -4
- package/dist/core/v4/aidenAgent.js +19 -14
- package/dist/core/v4/sandboxFs.js +1 -1
- package/dist/core/v4/subagent/childBuilder.js +12 -4
- package/dist/core/version.js +1 -1
- package/dist/moat/honestyEnforcement.js +143 -241
- package/package.json +10 -4
package/dist/cli/v4/aidenCLI.js
CHANGED
|
@@ -1470,6 +1470,10 @@ async function buildAgentRuntime(cliOpts, opts) {
|
|
|
1470
1470
|
resolveVerifiedFlag,
|
|
1471
1471
|
resolveToolset,
|
|
1472
1472
|
resolveMutates,
|
|
1473
|
+
// v4.7.0 Phase 2.4 — share the REPL's config-resolved honesty mode
|
|
1474
|
+
// with daemon-built agents so autonomous turns honour the same
|
|
1475
|
+
// setting interactive turns do.
|
|
1476
|
+
honestyMode,
|
|
1473
1477
|
maxTurns: config.getValue('agent.max_turns', 90),
|
|
1474
1478
|
});
|
|
1475
1479
|
// Phase v4.1.2 alive-core: SOUL.md file watcher. Best-effort —
|
|
@@ -40,6 +40,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
40
40
|
exports.buildDaemonAgentBuilder = buildDaemonAgentBuilder;
|
|
41
41
|
const aidenAgent_1 = require("../../core/v4/aidenAgent");
|
|
42
42
|
const approvalEngine_1 = require("../../moat/approvalEngine");
|
|
43
|
+
const honestyEnforcement_1 = require("../../moat/honestyEnforcement");
|
|
43
44
|
// ── Implementation ─────────────────────────────────────────────────────────
|
|
44
45
|
const DEFAULT_MAX_TURNS = 90;
|
|
45
46
|
/**
|
|
@@ -112,11 +113,18 @@ function buildDaemonAgentBuilder(deps) {
|
|
|
112
113
|
resolveMutates: deps.resolveMutates,
|
|
113
114
|
// Memory snapshot refresh — daemon agent doesn't track dirty
|
|
114
115
|
// bits because each instance is short-lived; we provide the
|
|
115
|
-
// refresh callback so honestyEnforcement
|
|
116
|
-
//
|
|
116
|
+
// refresh callback so honestyEnforcement (and any future
|
|
117
|
+
// consumer that needs a current memory snapshot) can rebuild.
|
|
117
118
|
refreshMemorySnapshot: () => deps.memoryManager.loadSnapshot(),
|
|
118
|
-
//
|
|
119
|
-
//
|
|
119
|
+
// v4.7.0 Phase 2.4 — HonestyEnforcement is now structural
|
|
120
|
+
// (reads tool trace only, no natural-language scanning) and
|
|
121
|
+
// cheap enough to run on autonomous daemon turns. Mode mirrors
|
|
122
|
+
// the REPL's config-resolved value (default 'enforce'); the
|
|
123
|
+
// footer appended in enforce mode is captured by the daemon
|
|
124
|
+
// dispatcher's run_events and surfaced in the channel reply.
|
|
125
|
+
honestyEnforcement: new honestyEnforcement_1.HonestyEnforcement(deps.honestyMode ?? 'enforce'),
|
|
126
|
+
// Scope cuts (Phase 7b, still deferred): no plannerGuard, no
|
|
127
|
+
// skillTeacher, no skillMiner. These add LLM calls + state
|
|
120
128
|
// that don't fit the daemon's "fire and act" pattern.
|
|
121
129
|
});
|
|
122
130
|
// Q-P7b-4(b) — minimal per-turn stdout line for tail-friendly
|
|
@@ -372,25 +372,21 @@ class AidenAgent {
|
|
|
372
372
|
// 8. Run the tool-calling loop.
|
|
373
373
|
const loopResult = await this.runTurnLoop(messages, narrowedTools, trackers, options);
|
|
374
374
|
// 9. Honesty post-loop scan (only if loop ended with a normal stop).
|
|
375
|
+
//
|
|
376
|
+
// v4.7.0 Phase 2.3 — the verifier now records deterministic
|
|
377
|
+
// outcome events from `toolCallTrace` (not regex over the
|
|
378
|
+
// assistant's text). When `findings.length > 0` AND mode is
|
|
379
|
+
// `enforce`, it returns an append-only `footer` we concatenate
|
|
380
|
+
// to `finalContent`. The model's text is NEVER rewritten —
|
|
381
|
+
// that was the v4.6.x failure mode this verifier replaces.
|
|
375
382
|
let honestyFindings;
|
|
376
383
|
let finalContent = loopResult.finalContent;
|
|
377
384
|
if (this.honestyEnforcement && loopResult.finishReason === 'stop') {
|
|
378
385
|
try {
|
|
379
386
|
const scan = await this.honestyEnforcement.check(finalContent, loopResult.messages, loopResult.toolCallTrace);
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
finalContent = scan.correctedResponse;
|
|
384
|
-
// Reflect the corrected text in the message history too so
|
|
385
|
-
// /debug-prompt and /usage agree on the final string.
|
|
386
|
-
for (let i = loopResult.messages.length - 1; i >= 0; i--) {
|
|
387
|
-
const m = loopResult.messages[i];
|
|
388
|
-
if (m.role === 'assistant' && (!m.toolCalls || m.toolCalls.length === 0)) {
|
|
389
|
-
loopResult.messages[i].content = finalContent;
|
|
390
|
-
break;
|
|
391
|
-
}
|
|
392
|
-
}
|
|
393
|
-
}
|
|
387
|
+
honestyFindings = scan.findings;
|
|
388
|
+
if (scan.footer) {
|
|
389
|
+
finalContent = `${finalContent}\n\n${scan.footer}`;
|
|
394
390
|
}
|
|
395
391
|
}
|
|
396
392
|
catch {
|
|
@@ -970,6 +966,15 @@ class AidenAgent {
|
|
|
970
966
|
result: result.result,
|
|
971
967
|
error: result.error,
|
|
972
968
|
verified: this.resolveVerifiedFlag?.(result),
|
|
969
|
+
// v4.7.0 Phase 2.3 — stamp the handler's `mutates` flag
|
|
970
|
+
// at dispatch time so the post-loop honesty verifier can
|
|
971
|
+
// distinguish mutating vs read-only failures without
|
|
972
|
+
// needing a registry handle. Defaults to `false` for
|
|
973
|
+
// unknown tools (the resolver returns undefined) — read-
|
|
974
|
+
// only tools that error are surfaced via the tool-trail
|
|
975
|
+
// row already; the verifier deliberately stays quiet
|
|
976
|
+
// about them.
|
|
977
|
+
handlerMutates: this.resolveMutates?.(call.name) ?? false,
|
|
973
978
|
// v4.2 Phase 1 — verification surfaces alongside the trace
|
|
974
979
|
// entry for downstream callers (chatSession, loopTrace,
|
|
975
980
|
// future RecoveryReport). Undefined when TCE is off.
|
|
@@ -94,7 +94,7 @@ function expandPathInline(input, cwd) {
|
|
|
94
94
|
}
|
|
95
95
|
/**
|
|
96
96
|
* Boundary-aware containment check. `path.relative` avoids the
|
|
97
|
-
*
|
|
97
|
+
* `<root>/user-evil` vs `<root>/user` false positive that a naive
|
|
98
98
|
* `startsWith` would produce.
|
|
99
99
|
*/
|
|
100
100
|
function isWithin(child, parent) {
|
|
@@ -33,6 +33,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
33
33
|
exports.ProviderNotFoundError = exports.SUBAGENT_BLOCKED_TOOL_NAMES = void 0;
|
|
34
34
|
exports.buildChildAgent = buildChildAgent;
|
|
35
35
|
const approvalEngine_1 = require("../../../moat/approvalEngine");
|
|
36
|
+
const honestyEnforcement_1 = require("../../../moat/honestyEnforcement");
|
|
36
37
|
const aidenAgent_1 = require("../aidenAgent");
|
|
37
38
|
const providerFallback_1 = require("../providerFallback");
|
|
38
39
|
// ── Hard-coded blocklist (Q5 from design doc §2) ────────────────────────────
|
|
@@ -177,10 +178,16 @@ function buildChildAgent(deps, input) {
|
|
|
177
178
|
// Pure no-op when runStore is absent (unit tests of buildChildAgent).
|
|
178
179
|
const onToolCall = buildOnToolCall(deps);
|
|
179
180
|
// ── 7. Build the child agent ─────────────────────────────────────────────
|
|
180
|
-
// Focused worker config: omit plannerGuard,
|
|
181
|
-
//
|
|
182
|
-
//
|
|
183
|
-
//
|
|
181
|
+
// Focused worker config: omit plannerGuard, skillTeacher, skillMiner,
|
|
182
|
+
// contextCompressor, promptCaching, promptBuilder. Match the daemon
|
|
183
|
+
// agent's "act on the task, don't self-improve" shape.
|
|
184
|
+
//
|
|
185
|
+
// v4.7.0: HonestyEnforcement is now structural (reads tool trace only,
|
|
186
|
+
// no natural-language scanning) and cheap enough to run in subagents.
|
|
187
|
+
// Mode is 'detect' here — events are captured into the child's run
|
|
188
|
+
// record but never produce user-visible output (subagents have no
|
|
189
|
+
// chat surface; the parent assembles their summary).
|
|
190
|
+
const childHonestyEnforcement = new honestyEnforcement_1.HonestyEnforcement('detect');
|
|
184
191
|
const agent = new aidenAgent_1.AidenAgent({
|
|
185
192
|
provider: childProvider,
|
|
186
193
|
tools: childTools,
|
|
@@ -192,6 +199,7 @@ function buildChildAgent(deps, input) {
|
|
|
192
199
|
resolveVerifiedFlag: deps.resolveVerifiedFlag,
|
|
193
200
|
resolveToolset: deps.resolveToolset,
|
|
194
201
|
resolveMutates: deps.resolveMutates,
|
|
202
|
+
honestyEnforcement: childHonestyEnforcement,
|
|
195
203
|
onToolCall,
|
|
196
204
|
// iterationBudgetInjection inherits the default (true) — child
|
|
197
205
|
// sees its own remaining-budget hint near the end of the run.
|
package/dist/core/version.js
CHANGED
|
@@ -6,111 +6,92 @@
|
|
|
6
6
|
* Aiden — local-first agent.
|
|
7
7
|
*/
|
|
8
8
|
/**
|
|
9
|
-
* moat/honestyEnforcement.ts — Aiden v4.0.
|
|
9
|
+
* moat/honestyEnforcement.ts — Aiden v4.7.0 (Phase 2.3 — outcome-based verifier)
|
|
10
10
|
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
* says no tool fired (or fired and failed verification), Honesty refuses
|
|
15
|
-
* the claim and rewrites the response.
|
|
11
|
+
* The regex-based natural-language claim scanner (deleted in Phase 2.2)
|
|
12
|
+
* has been replaced with a deterministic outcome recorder that consumes
|
|
13
|
+
* `toolCallTrace` structurally. Two failure modes are recorded:
|
|
16
14
|
*
|
|
17
|
-
*
|
|
18
|
-
*
|
|
19
|
-
*
|
|
20
|
-
*
|
|
21
|
-
* - "I searched the web" → no web_search call
|
|
22
|
-
* - "I ran X" → no shell_exec call
|
|
15
|
+
* 1. mutation_errored — a tool tagged `mutates: true` (in the
|
|
16
|
+
* registry, stamped onto trace entries at dispatch time via
|
|
17
|
+
* `handlerMutates`) returned an `error` envelope. Path is
|
|
18
|
+
* extracted from `result.path` when present.
|
|
23
19
|
*
|
|
24
|
-
*
|
|
25
|
-
*
|
|
26
|
-
*
|
|
27
|
-
*
|
|
28
|
-
* enforce — DEFAULT. Rewrites failed claims into honest text that lists
|
|
29
|
-
* the actual trace summary.
|
|
20
|
+
* 2. memory_unverified — a memory_* tool's result carries
|
|
21
|
+
* `verified === false` (per Phase 9 MemoryGuard). This was
|
|
22
|
+
* the v3 C20/C21 lying surface and remains the only memory-
|
|
23
|
+
* specific check the verifier performs.
|
|
30
24
|
*
|
|
31
|
-
*
|
|
32
|
-
*
|
|
33
|
-
*
|
|
34
|
-
*
|
|
35
|
-
*
|
|
25
|
+
* Modes:
|
|
26
|
+
* off — bypass entirely. No events recorded.
|
|
27
|
+
* detect — Record events; never user-visible. `findings` populated;
|
|
28
|
+
* no `footer`.
|
|
29
|
+
* enforce — DEFAULT. Record events + append a short footer to the
|
|
30
|
+
* assistant reply summarising the unverified outcomes.
|
|
31
|
+
* The footer is APPEND-ONLY — the assistant's text is
|
|
32
|
+
* never rewritten. (This is the key behaviour change vs
|
|
33
|
+
* v4.6.x — append-only, never an in-place edit.)
|
|
36
34
|
*
|
|
37
|
-
*
|
|
38
|
-
*
|
|
39
|
-
*
|
|
40
|
-
*
|
|
41
|
-
*
|
|
42
|
-
*
|
|
43
|
-
*
|
|
35
|
+
* What the verifier intentionally does NOT do (delta vs the deleted
|
|
36
|
+
* scanner):
|
|
37
|
+
* - It does not look at the assistant's natural-language text at all.
|
|
38
|
+
* There's no regex matching of English verbs to tool names.
|
|
39
|
+
* - It does not emit `no_tool_call` findings. The previous "model
|
|
40
|
+
* claimed X but no tool fired" failure mode is gone — that was
|
|
41
|
+
* the false-refusal class. We only record OUTCOMES that ran.
|
|
42
|
+
* - It does not mutate `loopResult.messages`. The caller appends
|
|
43
|
+
* the footer to its own `finalContent` string variable.
|
|
44
44
|
*/
|
|
45
45
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
46
|
-
exports.
|
|
47
|
-
/**
|
|
48
|
-
*
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
{
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
label: 'memory_add',
|
|
96
|
-
kind: 'memory',
|
|
97
|
-
},
|
|
98
|
-
{
|
|
99
|
-
pattern: new RegExp(`${I_PREFIX}(?:forgot(?:ten)?|removed)\\b[^.]*\\bmemory\\b`, 'i'),
|
|
100
|
-
tools: ['memory_remove', 'memory_forget'],
|
|
101
|
-
label: 'memory_remove',
|
|
102
|
-
kind: 'memory',
|
|
103
|
-
},
|
|
104
|
-
// ── Model switch ───────────────────────────────────────────────
|
|
105
|
-
{
|
|
106
|
-
pattern: new RegExp(`${I_PREFIX}(?:switched\\s+to|changed\\s+(?:to|model\\s+to)|am\\s+now\\s+using)\\s+\\S+`, 'i'),
|
|
107
|
-
tools: ['model_switch'],
|
|
108
|
-
label: 'model_switch',
|
|
109
|
-
},
|
|
110
|
-
];
|
|
111
|
-
/** Negation patterns. If matched at the start of a sentence containing
|
|
112
|
-
* the claim, the claim is NOT flagged. */
|
|
113
|
-
const NEGATION_RE = /\b(?:couldn'?t|cannot|can'?t|wasn'?t\s+able|unable\s+to|failed\s+to|did\s+not|didn'?t|won'?t|will\s+not)\b/i;
|
|
46
|
+
exports.HonestyEnforcement = void 0;
|
|
47
|
+
/**
|
|
48
|
+
* Memory tools whose results carry the `verified` flag set by
|
|
49
|
+
* MemoryGuard. The list is closed — adding a new memory_* tool
|
|
50
|
+
* means extending this set.
|
|
51
|
+
*/
|
|
52
|
+
const MEMORY_TOOLS = new Set([
|
|
53
|
+
'memory_add',
|
|
54
|
+
'memory_replace',
|
|
55
|
+
'memory_remove',
|
|
56
|
+
]);
|
|
57
|
+
/**
|
|
58
|
+
* Read `result.path` when present (file_* tools' result envelopes
|
|
59
|
+
* carry it). Returns undefined otherwise. Used only for cosmetic
|
|
60
|
+
* footer detail — never affects pass/fail outcome.
|
|
61
|
+
*/
|
|
62
|
+
function extractPath(result) {
|
|
63
|
+
if (result && typeof result === 'object' && 'path' in result) {
|
|
64
|
+
const p = result.path;
|
|
65
|
+
if (typeof p === 'string')
|
|
66
|
+
return p;
|
|
67
|
+
}
|
|
68
|
+
return undefined;
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* Translate a `HonestyEvent` to the legacy `HonestyFinding` shape so
|
|
72
|
+
* existing downstream consumers (chatSession, telemetry) keep working.
|
|
73
|
+
* The fine-grained kind is preserved via `reason`.
|
|
74
|
+
*/
|
|
75
|
+
function toFinding(event) {
|
|
76
|
+
switch (event.kind) {
|
|
77
|
+
case 'mutation_errored':
|
|
78
|
+
return {
|
|
79
|
+
claim: event.tool,
|
|
80
|
+
expectedTool: event.tool,
|
|
81
|
+
found: false,
|
|
82
|
+
confidence: 1,
|
|
83
|
+
reason: 'tool_errored',
|
|
84
|
+
};
|
|
85
|
+
case 'memory_unverified':
|
|
86
|
+
return {
|
|
87
|
+
claim: event.tool,
|
|
88
|
+
expectedTool: event.tool,
|
|
89
|
+
found: false,
|
|
90
|
+
confidence: 1,
|
|
91
|
+
reason: 'memory_verified_false',
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
}
|
|
114
95
|
class HonestyEnforcement {
|
|
115
96
|
constructor(mode = 'enforce', llmAdapter, logger) {
|
|
116
97
|
this.llmAdapter = llmAdapter;
|
|
@@ -124,20 +105,63 @@ class HonestyEnforcement {
|
|
|
124
105
|
return this.mode;
|
|
125
106
|
}
|
|
126
107
|
/**
|
|
127
|
-
*
|
|
128
|
-
*
|
|
129
|
-
* use `correctedResponse` or `originalResponse` based on `passed`.
|
|
108
|
+
* v4.7.0 Phase 2.3 — record deterministic unverified outcomes from
|
|
109
|
+
* the per-turn tool trace. Pure function; no I/O, no side effects.
|
|
130
110
|
*/
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
111
|
+
recordOutcomes(trace) {
|
|
112
|
+
const events = [];
|
|
113
|
+
for (const t of trace) {
|
|
114
|
+
if (t.error && t.handlerMutates === true) {
|
|
115
|
+
events.push({
|
|
116
|
+
kind: 'mutation_errored',
|
|
117
|
+
tool: t.name,
|
|
118
|
+
reason: t.error,
|
|
119
|
+
path: extractPath(t.result),
|
|
120
|
+
});
|
|
121
|
+
continue;
|
|
122
|
+
}
|
|
123
|
+
if (MEMORY_TOOLS.has(t.name) && t.verified === false) {
|
|
124
|
+
events.push({
|
|
125
|
+
kind: 'memory_unverified',
|
|
126
|
+
tool: t.name,
|
|
127
|
+
reason: 'verification failed',
|
|
128
|
+
});
|
|
129
|
+
}
|
|
139
130
|
}
|
|
140
|
-
|
|
131
|
+
return events;
|
|
132
|
+
}
|
|
133
|
+
/**
|
|
134
|
+
* v4.7.0 Phase 2.3 — render the append-only footer used in enforce
|
|
135
|
+
* mode. Caller concatenates with a blank line; we own the lines
|
|
136
|
+
* inside. Format: one summary line + one row per event.
|
|
137
|
+
*/
|
|
138
|
+
buildFooter(events) {
|
|
139
|
+
const lines = [];
|
|
140
|
+
lines.push(`⚠️ Verifier: ${events.length} tool outcome(s) not verified this turn.`);
|
|
141
|
+
for (const e of events) {
|
|
142
|
+
if (e.kind === 'mutation_errored') {
|
|
143
|
+
const where = e.path ? ` (path: ${e.path})` : '';
|
|
144
|
+
lines.push(`- ${e.tool}${where}: errored — ${e.reason}`);
|
|
145
|
+
}
|
|
146
|
+
else {
|
|
147
|
+
lines.push(`- ${e.tool}: not verified`);
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
return lines.join('\n');
|
|
151
|
+
}
|
|
152
|
+
/**
|
|
153
|
+
* v4.7.0 Phase 2.3 — entry point. Records outcome events from the
|
|
154
|
+
* trace, converts to legacy `HonestyFinding[]` for downstream
|
|
155
|
+
* consumers, and renders an append-only footer in enforce mode.
|
|
156
|
+
*
|
|
157
|
+
* NEVER rewrites `response`. The returned `footer` is what the
|
|
158
|
+
* caller appends; the original text is preserved verbatim.
|
|
159
|
+
*
|
|
160
|
+
* Off mode short-circuits without touching the trace — minimal cost
|
|
161
|
+
* for users who opt out.
|
|
162
|
+
*/
|
|
163
|
+
async check(response, _messages, trace) {
|
|
164
|
+
if (this.mode === 'off') {
|
|
141
165
|
return {
|
|
142
166
|
passed: true,
|
|
143
167
|
findings: [],
|
|
@@ -145,145 +169,23 @@ class HonestyEnforcement {
|
|
|
145
169
|
originalResponse: response,
|
|
146
170
|
};
|
|
147
171
|
}
|
|
148
|
-
const
|
|
149
|
-
const
|
|
150
|
-
const passed =
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
findings.length;
|
|
155
|
-
if (this.mode === 'detect') {
|
|
156
|
-
this.logger?.('info', `[HonestyEnforcement] detect mode: ${findings.length} findings (${failed.length} failed)`);
|
|
157
|
-
return {
|
|
158
|
-
passed,
|
|
159
|
-
findings,
|
|
160
|
-
confidence,
|
|
161
|
-
originalResponse: response,
|
|
162
|
-
};
|
|
172
|
+
const events = this.recordOutcomes(trace);
|
|
173
|
+
const findings = events.map(toFinding);
|
|
174
|
+
const passed = findings.length === 0;
|
|
175
|
+
let footer;
|
|
176
|
+
if (this.mode === 'enforce' && !passed) {
|
|
177
|
+
footer = this.buildFooter(events);
|
|
163
178
|
}
|
|
164
|
-
// enforce mode
|
|
165
|
-
let correctedResponse;
|
|
166
179
|
if (!passed) {
|
|
167
|
-
|
|
168
|
-
this.logger?.('warn', `[HonestyEnforcement] enforce: rewrote response (${failed.length} failed claims)`);
|
|
180
|
+
this.logger?.('info', `honesty: ${events.length} unverified outcome(s) this turn`);
|
|
169
181
|
}
|
|
170
182
|
return {
|
|
171
183
|
passed,
|
|
172
184
|
findings,
|
|
173
|
-
confidence,
|
|
185
|
+
confidence: 1,
|
|
174
186
|
originalResponse: response,
|
|
175
|
-
|
|
187
|
+
footer,
|
|
176
188
|
};
|
|
177
189
|
}
|
|
178
|
-
// ─────────────────────────────────────────────────────────────────────
|
|
179
|
-
// pattern detection
|
|
180
|
-
// ─────────────────────────────────────────────────────────────────────
|
|
181
|
-
detectClaimsPattern(response, trace) {
|
|
182
|
-
const findings = [];
|
|
183
|
-
const sentences = splitSentences(response);
|
|
184
|
-
for (const sentence of sentences) {
|
|
185
|
-
// Skip negated sentences entirely.
|
|
186
|
-
if (NEGATION_RE.test(sentence))
|
|
187
|
-
continue;
|
|
188
|
-
for (const pat of PATTERNS) {
|
|
189
|
-
if (!pat.pattern.test(sentence))
|
|
190
|
-
continue;
|
|
191
|
-
const matched = sentence.match(pat.pattern);
|
|
192
|
-
const claimText = matched?.[0] ?? sentence.trim();
|
|
193
|
-
const found = this.traceSatisfies(pat, trace);
|
|
194
|
-
let reason;
|
|
195
|
-
if (!found) {
|
|
196
|
-
if (pat.kind === 'memory' && memoryFiredButUnverified(pat, trace)) {
|
|
197
|
-
reason = 'memory_verified_false';
|
|
198
|
-
}
|
|
199
|
-
else if (toolFiredButErrored(pat, trace)) {
|
|
200
|
-
reason = 'tool_errored';
|
|
201
|
-
}
|
|
202
|
-
else {
|
|
203
|
-
reason = 'no_tool_call';
|
|
204
|
-
}
|
|
205
|
-
}
|
|
206
|
-
findings.push({
|
|
207
|
-
claim: claimText.trim(),
|
|
208
|
-
expectedTool: pat.tools.length === 1 ? pat.tools[0] : pat.tools,
|
|
209
|
-
found,
|
|
210
|
-
confidence: 0.8,
|
|
211
|
-
reason,
|
|
212
|
-
});
|
|
213
|
-
}
|
|
214
|
-
}
|
|
215
|
-
return findings;
|
|
216
|
-
}
|
|
217
|
-
traceSatisfies(pat, trace) {
|
|
218
|
-
const matching = trace.filter((t) => pat.tools.includes(t.name) && !t.error);
|
|
219
|
-
if (matching.length === 0)
|
|
220
|
-
return false;
|
|
221
|
-
if (pat.kind === 'memory') {
|
|
222
|
-
// verified must be explicitly true
|
|
223
|
-
return matching.some((m) => m.verified === true);
|
|
224
|
-
}
|
|
225
|
-
return true;
|
|
226
|
-
}
|
|
227
|
-
// ─────────────────────────────────────────────────────────────────────
|
|
228
|
-
// correction builder
|
|
229
|
-
// ─────────────────────────────────────────────────────────────────────
|
|
230
|
-
buildCorrection(_original, failed, trace) {
|
|
231
|
-
const lines = [];
|
|
232
|
-
lines.push("I shouldn't claim actions I didn't take. Honest summary of what I actually did:");
|
|
233
|
-
lines.push('');
|
|
234
|
-
if (trace.length === 0) {
|
|
235
|
-
lines.push('- No tools were called this turn.');
|
|
236
|
-
}
|
|
237
|
-
else {
|
|
238
|
-
for (const entry of trace) {
|
|
239
|
-
const status = entry.error ? `errored (${entry.error})` : 'succeeded';
|
|
240
|
-
const verified = entry.verified === false
|
|
241
|
-
? ' (NOT VERIFIED)'
|
|
242
|
-
: entry.verified === true
|
|
243
|
-
? ' (verified)'
|
|
244
|
-
: '';
|
|
245
|
-
lines.push(`- ${entry.name}: ${status}${verified}`);
|
|
246
|
-
}
|
|
247
|
-
}
|
|
248
|
-
lines.push('');
|
|
249
|
-
lines.push('Refused claims:');
|
|
250
|
-
for (const f of failed) {
|
|
251
|
-
const tool = Array.isArray(f.expectedTool)
|
|
252
|
-
? f.expectedTool.join('/')
|
|
253
|
-
: f.expectedTool;
|
|
254
|
-
const why = f.reason === 'memory_verified_false'
|
|
255
|
-
? `(memory write returned verified=false — fact was not stored)`
|
|
256
|
-
: f.reason === 'tool_errored'
|
|
257
|
-
? `(tool errored)`
|
|
258
|
-
: `(no ${tool} call in trace)`;
|
|
259
|
-
lines.push(`- "${f.claim}" ${why}`);
|
|
260
|
-
}
|
|
261
|
-
return lines.join('\n');
|
|
262
|
-
}
|
|
263
190
|
}
|
|
264
191
|
exports.HonestyEnforcement = HonestyEnforcement;
|
|
265
|
-
// ─────────────────────────────────────────────────────────────────────
|
|
266
|
-
// helpers (exported for tests)
|
|
267
|
-
// ─────────────────────────────────────────────────────────────────────
|
|
268
|
-
function splitSentences(text) {
|
|
269
|
-
// Split on sentence terminators while keeping reasonable bounds.
|
|
270
|
-
// Don't try to be clever about abbreviations — false positives are
|
|
271
|
-
// benign (we just inspect more granular slices).
|
|
272
|
-
return text
|
|
273
|
-
.split(/(?<=[.!?])\s+|\n+/)
|
|
274
|
-
.map((s) => s.trim())
|
|
275
|
-
.filter((s) => s.length > 0);
|
|
276
|
-
}
|
|
277
|
-
function memoryFiredButUnverified(pat, trace) {
|
|
278
|
-
if (pat.kind !== 'memory')
|
|
279
|
-
return false;
|
|
280
|
-
return trace.some((t) => pat.tools.includes(t.name) && !t.error && t.verified === false);
|
|
281
|
-
}
|
|
282
|
-
function toolFiredButErrored(pat, trace) {
|
|
283
|
-
return trace.some((t) => pat.tools.includes(t.name) && !!t.error);
|
|
284
|
-
}
|
|
285
|
-
exports.__test__ = {
|
|
286
|
-
splitSentences,
|
|
287
|
-
PATTERNS,
|
|
288
|
-
NEGATION_RE,
|
|
289
|
-
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "aiden-runtime",
|
|
3
|
-
"version": "4.
|
|
3
|
+
"version": "4.7.0",
|
|
4
4
|
"publishConfig": {
|
|
5
5
|
"access": "public"
|
|
6
6
|
},
|
|
@@ -254,7 +254,7 @@
|
|
|
254
254
|
"epub2": "^3.0.2",
|
|
255
255
|
"execa": "^8.0.1",
|
|
256
256
|
"express": "^4.18.2",
|
|
257
|
-
"form-data": "^4.0.
|
|
257
|
+
"form-data": "^4.0.4",
|
|
258
258
|
"imap-simple": "^5.1.0",
|
|
259
259
|
"js-tiktoken": "^1.0.21",
|
|
260
260
|
"js-yaml": "^4.1.1",
|
|
@@ -284,7 +284,7 @@
|
|
|
284
284
|
"uuid": "^9.0.0",
|
|
285
285
|
"whatsapp-web.js": "^1.26.0",
|
|
286
286
|
"wrap-ansi": "^9.0.2",
|
|
287
|
-
"ws": "^8.20.
|
|
287
|
+
"ws": "^8.20.1"
|
|
288
288
|
},
|
|
289
289
|
"optionalDependencies": {
|
|
290
290
|
"decibri": "*",
|
|
@@ -296,7 +296,13 @@
|
|
|
296
296
|
"semver": "^7.5.2",
|
|
297
297
|
"postcss": "^8.5.10",
|
|
298
298
|
"hono": "^4.12.16",
|
|
299
|
-
"minimatch": "^9.0.9"
|
|
299
|
+
"minimatch": "^9.0.9",
|
|
300
|
+
"qs": ">=6.14.1",
|
|
301
|
+
"tough-cookie": ">=4.1.3",
|
|
302
|
+
"protobufjs": ">=7.5.8",
|
|
303
|
+
"request": {
|
|
304
|
+
"form-data": "^2.5.5"
|
|
305
|
+
}
|
|
300
306
|
},
|
|
301
307
|
"devDependencies": {
|
|
302
308
|
"@types/better-sqlite3": "^7.6.13",
|