@genesislcap/ai-assistant 14.451.3-alpha-861508d.0 → 14.451.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -48,6 +48,7 @@ export const META_EVENT_IMPORTANCE = {
48
48
  'turn.start': 'normal',
49
49
  'turn.end': 'normal',
50
50
  'turn.retry': 'normal',
51
+ 'tool.unresolved': 'normal',
51
52
  'agent.handoff': 'normal',
52
53
  'agent.pinned': 'normal',
53
54
  'agent.unpinned': 'normal',
@@ -67,7 +68,7 @@ export const META_EVENT_IMPORTANCE = {
67
68
  * allowed to float above this cap rather than lose a failure signal; in normal
68
69
  * use the frequent `low`/`normal` events keep it near the cap. Entries are cheap.
69
70
  */
70
- const DEFAULT_MAX_META_EVENTS = 400;
71
+ const DEFAULT_MAX_META_EVENTS = 800;
71
72
  const registry = new Map();
72
73
  /**
73
74
  * Append a meta event to the timeline for `key`. Once the buffer exceeds
@@ -138,7 +139,7 @@ export const DEBUG_LOG_README = [
138
139
  "kind:'turn'.`agentSnapshot` — the active agent's own view of its internal state, captured at that turn. An agent opts into this by exposing a `getDebugSnapshot()` that returns JSON-serializable per-state info; stateful/flow agents wire it automatically, so you can watch a flow advance turn-by-turn (e.g. current step, cursor, collected fields, pending changes). Absent for agents that don't expose one.",
139
140
  "kind:'event' — a meta/lifecycle event. `type` names it (see below); `detail` carries structured data. `detail.placement` is the emitting UI instance: 'bubble' (collapsed), 'panel' (popped-out), or 'standalone'.",
140
141
  "Each 'event' also has an `importance`: 'high' (failures/limits — turn.error, tool.failed, file.read-failed, suggestions.failed, context.threshold-crossed), 'normal' (session flow — connects, turns, retries, handoffs, agent/provider changes, interactions), or 'low' (skippable UI/bookkeeping noise — panel.toggled, attachment.added, driver.wired/unwired, context.updated). To skim, ignore importance:'low'; to triage a failure, filter to importance:'high' then read the nearby messages and turns. A 'high' turn.error is often preceded by one or more 'normal' turn.retry events for the same reason — read them together to see how many attempts were made before bailing. 'message' and 'turn' entries carry no importance — they are the substance, always read them.",
141
- 'Event types: assistant.connected/disconnected (mount + placement + whether the session was created or restored), assistant.popout/popin (window placement), driver.created/wired/unwired (which driver is live and why it stops/starts responding across a popout), state.changed (idle↔loading), turn.start/turn.end (turn boundary; turn.end carries durationMs), turn.retry (a recoverable in-turn retry — detail.reason plus attempt/maxAttempts; for malformed calls also finishMessage), turn.error (a turn failed or hit a guardrail — detail.reason is one of exception/malformed-function-call/empty-response/unknown-tool-limit/max-iterations, plus reason-specific diagnostics: attempts, finishMessage, unknownTools + availableTools, iterations + limit, or name + message for exceptions), tool.failed (a tool threw), agent.handoff (routing; from=null is the initial activation), agent.pinned/unpinned (forced routing), provider.selected (model/provider for the upcoming turns), interaction.requested/resolved (blocking user widgets — explain quiet gaps), context.updated/threshold-crossed (token + cost), panel.toggled, attachment.added, file.read-failed, suggestions.failed.',
142
+ 'Event types: assistant.connected/disconnected (mount + placement + whether the session was created or restored), assistant.popout/popin (window placement), driver.created/wired/unwired (which driver is live and why it stops/starts responding across a popout), state.changed (idle↔loading), turn.start/turn.end (turn boundary; turn.end carries durationMs), turn.retry (a recoverable in-turn retry — detail.reason plus attempt/maxAttempts; for malformed calls also finishMessage), turn.error (a turn failed or hit a guardrail — detail.reason is one of exception/malformed-function-call/empty-response/unknown-tool-limit/max-iterations, plus reason-specific diagnostics: attempts, finishMessage, unknownTools (split into staleTools — real earlier this activation but retired by the current state or hidden behind an open exclusive fold — and hallucinatedTools — never advertised) + availableTools, iterations + limit, or name + message for exceptions), tool.failed (a tool threw), tool.unresolved (the model called a tool that could not be dispatched — detail.kind is folded/fold-hidden/stale/unknown, plus tool + agent and, for the counted kinds, the consecutive streak; the recurring lead-up to an unknown-tool-limit turn.error), agent.handoff (routing; from=null is the initial activation), agent.pinned/unpinned (forced routing), provider.selected (model/provider for the upcoming turns), interaction.requested/resolved (blocking user widgets — explain quiet gaps), context.updated/threshold-crossed (token + cost), panel.toggled, attachment.added, file.read-failed, suggestions.failed.',
142
143
  "`meta` holds context captured at export time: agentSummary (full agent configs), context (active model, token usage, session cost), activeDebugSnapshot (the active agent's `getDebugSnapshot()` taken fresh at export — reflects state NOW, which may have advanced beyond the last turn's agentSnapshot), debug (optional host-supplied debug state), host, and the export timestamp.",
143
144
  'To debug a failure: find the last turn.error or tool.failed, then read upward for the user message, the turn(s), and the agent/provider/state events that led into it.',
144
145
  ];
@@ -1 +1 @@
1
- {"root":["../src/index.ts","../src/channel/ai-activity-bus.ts","../src/channel/ai-activity-channel.ts","../src/components/halo-overlay.ts","../src/components/activity-halo/activity-halo.ts","../src/components/agent-picker/agent-picker.constants.ts","../src/components/agent-picker/agent-picker.styles.ts","../src/components/agent-picker/agent-picker.template.ts","../src/components/agent-picker/agent-picker.ts","../src/components/agent-picker/index.ts","../src/components/ai-driver/ai-driver.ts","../src/components/ai-driver/index.ts","../src/components/chat-bubble/chat-bubble.styles.ts","../src/components/chat-bubble/chat-bubble.template.ts","../src/components/chat-bubble/chat-bubble.ts","../src/components/chat-bubble/index.ts","../src/components/chat-driver/chat-driver.ts","../src/components/chat-driver/index.ts","../src/components/chat-interaction-wrapper/chat-interaction-wrapper.styles.ts","../src/components/chat-interaction-wrapper/chat-interaction-wrapper.template.ts","../src/components/chat-interaction-wrapper/chat-interaction-wrapper.test.ts","../src/components/chat-interaction-wrapper/chat-interaction-wrapper.ts","../src/components/chat-interaction-wrapper/index.ts","../src/components/chat-markdown/chat-markdown.ts","../src/components/chat-markdown/index.ts","../src/components/orchestrating-driver/index.ts","../src/components/orchestrating-driver/orchestrating-driver.ts","../src/components/popout-manager/index.ts","../src/components/popout-manager/popout-manager.ts","../src/config/config.ts","../src/config/define-stateful-agent.ts","../src/config/fallback-agents.ts","../src/config/index.ts","../src/config/validate-providers.test.ts","../src/config/validate-providers.ts","../src/main/index.ts","../src/main/main.styles.ts","../src/main/main.template.ts","../src/main/main.ts","../src/main/main.types.ts","../src/state/ai-assistant-slice.ts","../src/state/debug-event-log.ts","../src/state/driver-registry.ts","../src/state/session-store.ts","../src/styles/ai-colours.ts","../src/styles/index.ts","../src/styles/styles.ts","../src/suggestions/chat-suggestions.ts","../src/tags/index.ts","../src/types/ai-chat-widget.ts","../src/utils/animated-panel-toggle.ts","../src/utils/history-transform.ts","../src/utils/index.ts","../src/utils/logger.ts","../src/utils/message-partition.test.ts","../src/utils/message-partition.ts","../src/utils/sum-costs.test.ts","../src/utils/sum-costs.ts","../src/utils/tool-fold.ts"],"version":"5.9.2"}
1
+ {"root":["../src/index.ts","../src/channel/ai-activity-bus.ts","../src/channel/ai-activity-channel.ts","../src/components/halo-overlay.ts","../src/components/activity-halo/activity-halo.ts","../src/components/agent-picker/agent-picker.constants.ts","../src/components/agent-picker/agent-picker.styles.ts","../src/components/agent-picker/agent-picker.template.ts","../src/components/agent-picker/agent-picker.ts","../src/components/agent-picker/index.ts","../src/components/ai-driver/ai-driver.ts","../src/components/ai-driver/index.ts","../src/components/chat-bubble/chat-bubble.styles.ts","../src/components/chat-bubble/chat-bubble.template.ts","../src/components/chat-bubble/chat-bubble.ts","../src/components/chat-bubble/index.ts","../src/components/chat-driver/align-event-globals.ts","../src/components/chat-driver/chat-driver.test.ts","../src/components/chat-driver/chat-driver.ts","../src/components/chat-driver/index.ts","../src/components/chat-interaction-wrapper/chat-interaction-wrapper.styles.ts","../src/components/chat-interaction-wrapper/chat-interaction-wrapper.template.ts","../src/components/chat-interaction-wrapper/chat-interaction-wrapper.test.ts","../src/components/chat-interaction-wrapper/chat-interaction-wrapper.ts","../src/components/chat-interaction-wrapper/index.ts","../src/components/chat-markdown/chat-markdown.ts","../src/components/chat-markdown/index.ts","../src/components/orchestrating-driver/index.ts","../src/components/orchestrating-driver/orchestrating-driver.ts","../src/components/popout-manager/index.ts","../src/components/popout-manager/popout-manager.ts","../src/config/config.ts","../src/config/define-stateful-agent.ts","../src/config/fallback-agents.ts","../src/config/index.ts","../src/config/validate-providers.test.ts","../src/config/validate-providers.ts","../src/main/index.ts","../src/main/main.styles.ts","../src/main/main.template.ts","../src/main/main.ts","../src/main/main.types.ts","../src/state/ai-assistant-slice.ts","../src/state/debug-event-log.ts","../src/state/driver-registry.ts","../src/state/session-store.ts","../src/styles/ai-colours.ts","../src/styles/index.ts","../src/styles/styles.ts","../src/suggestions/chat-suggestions.ts","../src/tags/index.ts","../src/types/ai-chat-widget.ts","../src/utils/animated-panel-toggle.ts","../src/utils/history-transform.ts","../src/utils/index.ts","../src/utils/logger.ts","../src/utils/message-partition.test.ts","../src/utils/message-partition.ts","../src/utils/sum-costs.test.ts","../src/utils/sum-costs.ts","../src/utils/tool-fold.ts"],"version":"5.9.2"}
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@genesislcap/ai-assistant",
3
3
  "description": "Genesis AI Assistant micro-frontend",
4
- "version": "14.451.3-alpha-861508d.0",
4
+ "version": "14.451.4",
5
5
  "license": "SEE LICENSE IN license.txt",
6
6
  "main": "dist/esm/index.js",
7
7
  "types": "dist/ai-assistant.d.ts",
@@ -64,24 +64,24 @@
64
64
  }
65
65
  },
66
66
  "devDependencies": {
67
- "@genesislcap/foundation-testing": "14.451.3-alpha-861508d.0",
68
- "@genesislcap/genx": "14.451.3-alpha-861508d.0",
69
- "@genesislcap/rollup-builder": "14.451.3-alpha-861508d.0",
70
- "@genesislcap/ts-builder": "14.451.3-alpha-861508d.0",
71
- "@genesislcap/uvu-playwright-builder": "14.451.3-alpha-861508d.0",
72
- "@genesislcap/vite-builder": "14.451.3-alpha-861508d.0",
73
- "@genesislcap/webpack-builder": "14.451.3-alpha-861508d.0",
67
+ "@genesislcap/foundation-testing": "14.451.4",
68
+ "@genesislcap/genx": "14.451.4",
69
+ "@genesislcap/rollup-builder": "14.451.4",
70
+ "@genesislcap/ts-builder": "14.451.4",
71
+ "@genesislcap/uvu-playwright-builder": "14.451.4",
72
+ "@genesislcap/vite-builder": "14.451.4",
73
+ "@genesislcap/webpack-builder": "14.451.4",
74
74
  "@types/dompurify": "^3.0.5",
75
75
  "@types/marked": "^5.0.2"
76
76
  },
77
77
  "dependencies": {
78
- "@genesislcap/foundation-ai": "14.451.3-alpha-861508d.0",
79
- "@genesislcap/foundation-logger": "14.451.3-alpha-861508d.0",
80
- "@genesislcap/foundation-redux": "14.451.3-alpha-861508d.0",
81
- "@genesislcap/foundation-ui": "14.451.3-alpha-861508d.0",
82
- "@genesislcap/foundation-utils": "14.451.3-alpha-861508d.0",
83
- "@genesislcap/rapid-design-system": "14.451.3-alpha-861508d.0",
84
- "@genesislcap/web-core": "14.451.3-alpha-861508d.0",
78
+ "@genesislcap/foundation-ai": "14.451.4",
79
+ "@genesislcap/foundation-logger": "14.451.4",
80
+ "@genesislcap/foundation-redux": "14.451.4",
81
+ "@genesislcap/foundation-ui": "14.451.4",
82
+ "@genesislcap/foundation-utils": "14.451.4",
83
+ "@genesislcap/rapid-design-system": "14.451.4",
84
+ "@genesislcap/web-core": "14.451.4",
85
85
  "dompurify": "^3.3.1",
86
86
  "marked": "^17.0.3"
87
87
  },
@@ -93,5 +93,5 @@
93
93
  "publishConfig": {
94
94
  "access": "public"
95
95
  },
96
- "gitHead": "6952480d23f47b3830abff2b8afd8e018fdf2b92"
96
+ "gitHead": "265d5fa00ae476a7713d78707f53f393f5eeb647"
97
97
  }
@@ -0,0 +1,23 @@
1
+ /**
2
+ * Test-only side effect: align the global `EventTarget` with jsdom's before any
3
+ * module that `extends EventTarget` is evaluated.
4
+ *
5
+ * The node test runner's jsdom setup installs `globalThis.CustomEvent` from
6
+ * jsdom but leaves `globalThis.EventTarget` as Node's native class. A class that
7
+ * `extends EventTarget` (e.g. {@link ChatDriver}) then inherits Node's native
8
+ * `dispatchEvent`, which rejects the jsdom `CustomEvent` instances it is handed
9
+ * ("The 'event' argument must be an instance of Event. Received an instance of
10
+ * CustomEvent"). Pointing `EventTarget` at jsdom's keeps the whole event family
11
+ * in one realm.
12
+ *
13
+ * No-op in a real browser, where `window.EventTarget === globalThis.EventTarget`
14
+ * already. Import this BEFORE importing anything that subclasses `EventTarget`.
15
+ */
16
+ const jsdomWindow = (globalThis as { window?: { EventTarget?: typeof EventTarget } }).window;
17
+ if (jsdomWindow?.EventTarget && globalThis.EventTarget !== jsdomWindow.EventTarget) {
18
+ Object.defineProperty(globalThis, 'EventTarget', {
19
+ value: jsdomWindow.EventTarget,
20
+ configurable: true,
21
+ writable: true,
22
+ });
23
+ }
@@ -0,0 +1,315 @@
1
+ import type {
2
+ AIProvider,
3
+ AIProviderRegistry,
4
+ ChatMessage,
5
+ ChatRequestOptions,
6
+ ChatToolCall,
7
+ ChatToolDefinition,
8
+ } from '@genesislcap/foundation-ai';
9
+ import { isChatToolCallUnknown } from '@genesislcap/foundation-ai';
10
+ import { assert, createLogicSuite } from '@genesislcap/foundation-testing';
11
+ import { agenticActivityBus } from '../../channel/ai-activity-bus';
12
+ import type { AgentConfig } from '../../config/config';
13
+ import { clearMetaEventRegistry, getMetaEvents } from '../../state/debug-event-log';
14
+ import { createToolFold } from '../../utils/tool-fold';
15
+ // Side-effect import — MUST come before `./chat-driver` so the driver subclasses
16
+ // jsdom's EventTarget rather than Node's native one (see the file). None of the
17
+ // imports above pull in the driver, so its realm is still set before evaluation.
18
+ import './align-event-globals';
19
+ import { ChatDriver } from './chat-driver';
20
+
21
+ // ---------------------------------------------------------------------------
22
+ // Test harness
23
+ //
24
+ // The driver calls `provider.chat(history, userMessage, options)` once per
25
+ // tool-loop iteration and inspects the returned ChatMessage: a message with
26
+ // `toolCalls` keeps the loop running; one without ends the turn. So a fake
27
+ // provider that replays a scripted sequence of ChatMessages is enough to drive
28
+ // any tool-loop path. We capture the advertised tool names per call so tests
29
+ // can assert per-state narrowing actually happened.
30
+ // ---------------------------------------------------------------------------
31
+
32
+ interface ScriptedProvider extends AIProvider {
33
+ /** Tool names advertised to the model on each `chat()` call, in order. */
34
+ advertisedPerCall: string[][];
35
+ }
36
+
37
+ const scriptedProvider = (responses: ChatMessage[]): ScriptedProvider => {
38
+ const queue = [...responses];
39
+ const advertisedPerCall: string[][] = [];
40
+ return {
41
+ advertisedPerCall,
42
+ chat: async (
43
+ _history: ChatMessage[],
44
+ _userMessage: string,
45
+ options?: ChatRequestOptions,
46
+ ): Promise<ChatMessage> => {
47
+ advertisedPerCall.push((options?.tools ?? []).map((t) => t.name));
48
+ // Once the script is exhausted, end the turn with a plain text reply.
49
+ return queue.shift() ?? { role: 'assistant', content: 'done' };
50
+ },
51
+ };
52
+ };
53
+
54
+ const makeRegistry = (provider: AIProvider): AIProviderRegistry => ({
55
+ get: () => provider,
56
+ default: () => provider,
57
+ defaultName: () => 'test',
58
+ names: () => ['test'],
59
+ getStatus: async () => null,
60
+ listStatuses: async () => [],
61
+ });
62
+
63
+ const def = (name: string): ChatToolDefinition => ({
64
+ name,
65
+ description: `${name} tool`,
66
+ parameters: { type: 'object', properties: {} },
67
+ });
68
+
69
+ /** An assistant turn that calls a single tool. `content` is empty so the driver
70
+ * does not treat it as a thinking step (which would split it into two messages). */
71
+ const callsTool = (name: string, id: string): ChatMessage => ({
72
+ role: 'assistant',
73
+ content: '',
74
+ toolCalls: [{ id, name, args: {} }],
75
+ });
76
+
77
+ const agent = (overrides: Partial<AgentConfig> & { name: string }): AgentConfig =>
78
+ ({ description: 'test agent', ...overrides }) as AgentConfig;
79
+
80
+ const makeDriver = (config: AgentConfig, provider: AIProvider, sessionKey = ''): ChatDriver => {
81
+ const driver = new ChatDriver(
82
+ makeRegistry(provider),
83
+ {},
84
+ [],
85
+ undefined,
86
+ undefined,
87
+ 50,
88
+ 5,
89
+ undefined,
90
+ sessionKey,
91
+ );
92
+ driver.applyAgent(config);
93
+ return driver;
94
+ };
95
+
96
+ /** All tool calls across the whole conversation, flattened. */
97
+ const allToolCalls = (driver: ChatDriver): ChatToolCall[] =>
98
+ driver.getHistory().flatMap((m) => m.toolCalls ?? []);
99
+
100
+ /** Tool-result message contents, in order. */
101
+ const toolResultContents = (driver: ChatDriver): string[] =>
102
+ driver
103
+ .getHistory()
104
+ .filter((m) => m.role === 'tool' && m.toolResult)
105
+ .map((m) => m.toolResult!.content);
106
+
107
+ /** `tool.unresolved` meta-event details recorded for a session (download-log surface). */
108
+ const unresolvedEvents = (sessionKey: string): Array<Record<string, unknown>> =>
109
+ getMetaEvents(sessionKey)
110
+ .filter((e) => e.type === 'tool.unresolved')
111
+ .map((e) => e.detail ?? {});
112
+
113
+ // ---------------------------------------------------------------------------
114
+ // stale tool detection — stateful agent advances past a tool's state
115
+ // ---------------------------------------------------------------------------
116
+
117
+ const stale = createLogicSuite('ChatDriver stale-tool detection');
118
+
119
+ // The driver imports the `agenticActivityBus` singleton, which opens a
120
+ // BroadcastChannel at module load. An open channel keeps the test page alive
121
+ // and hangs the runner, so close it once the suite finishes.
122
+ stale.after(() => {
123
+ agenticActivityBus.close();
124
+ });
125
+
126
+ stale('guides the model when it calls a tool that an earlier state exposed', async () => {
127
+ // State A exposes tool_a; calling it advances to state B, which exposes only
128
+ // tool_b. A factory-form agent narrows the tool set per turn, mirroring how
129
+ // `defineStatefulAgent` works.
130
+ let state: 'A' | 'B' = 'A';
131
+ const config = agent({
132
+ name: 'Stateful',
133
+ toolDefinitions: () => (state === 'A' ? [def('tool_a')] : [def('tool_b')]),
134
+ toolHandlers: () =>
135
+ state === 'A'
136
+ ? {
137
+ tool_a: async () => {
138
+ state = 'B';
139
+ return 'advanced to B';
140
+ },
141
+ }
142
+ : { tool_b: async () => 'b done' },
143
+ });
144
+
145
+ const provider = scriptedProvider([
146
+ callsTool('tool_a', 't1'), // real — advances A -> B
147
+ callsTool('tool_a', 't2'), // stale — tool_a no longer in state B
148
+ callsTool('tool_b', 't3'), // real — valid in state B
149
+ ]);
150
+ const sessionKey = 'stale-meta-test';
151
+ const driver = makeDriver(config, provider, sessionKey);
152
+
153
+ const result = await driver.sendMessage('go');
154
+ assert.is(result.reason, 'done');
155
+
156
+ // The per-state narrowing actually happened: tool_a advertised first, tool_b later.
157
+ assert.equal(provider.advertisedPerCall[0], ['tool_a']);
158
+ assert.ok(
159
+ provider.advertisedPerCall.some(
160
+ (tools) => tools.includes('tool_b') && !tools.includes('tool_a'),
161
+ ),
162
+ 'a later turn should advertise tool_b without tool_a',
163
+ );
164
+
165
+ // The retried tool_a got stale guidance — not "Unknown tool".
166
+ const staleGuidance = toolResultContents(driver).find((c) =>
167
+ c.includes('was available earlier but is not part of the current step'),
168
+ );
169
+ assert.ok(staleGuidance, 'a previously-available tool should receive stale guidance');
170
+ assert.not.ok(
171
+ toolResultContents(driver).some((c) => c.startsWith('Unknown tool:')),
172
+ 'a previously-available tool must not be reported as a hallucination',
173
+ );
174
+
175
+ // The retried call is flagged unknown + stale for the UI.
176
+ const retried = allToolCalls(driver).filter(
177
+ (tc) => tc.name === 'tool_a' && isChatToolCallUnknown(tc),
178
+ );
179
+ assert.is(retried.length, 1, 'exactly one tool_a call should be flagged unknown');
180
+ assert.ok(isChatToolCallUnknown(retried[0]) && retried[0].stale === true, 'and marked stale');
181
+
182
+ // The occurrence is recorded to the meta-event log for the download log.
183
+ assert.ok(
184
+ unresolvedEvents(sessionKey).some((d) => d.kind === 'stale' && d.tool === 'tool_a'),
185
+ 'a stale tool.unresolved meta event should be recorded',
186
+ );
187
+ });
188
+
189
+ stale('reports a never-seen tool as a hallucinated unknown tool', async () => {
190
+ const config = agent({
191
+ name: 'Static',
192
+ toolDefinitions: [def('real_tool')],
193
+ toolHandlers: { real_tool: async () => 'ok' },
194
+ });
195
+ const provider = scriptedProvider([callsTool('made_up', 'm1')]);
196
+ const sessionKey = 'hallucination-meta-test';
197
+ const driver = makeDriver(config, provider, sessionKey);
198
+
199
+ await driver.sendMessage('go');
200
+
201
+ assert.ok(
202
+ toolResultContents(driver).includes('Unknown tool: made_up'),
203
+ 'a tool never advertised should be reported as unknown',
204
+ );
205
+ const call = allToolCalls(driver).find((tc) => tc.name === 'made_up');
206
+ assert.ok(call && isChatToolCallUnknown(call), 'the call should be flagged unknown');
207
+ assert.not.ok(
208
+ (call as { stale?: boolean }).stale,
209
+ 'a hallucinated tool must NOT be flagged stale',
210
+ );
211
+
212
+ assert.ok(
213
+ unresolvedEvents(sessionKey).some((d) => d.kind === 'unknown' && d.tool === 'made_up'),
214
+ 'an unknown tool.unresolved meta event should be recorded',
215
+ );
216
+ });
217
+
218
+ stale('points the model at the close tool when an exclusive fold hides a base tool', async () => {
219
+ const fold = createToolFold({
220
+ name: 'my_fold',
221
+ tools: [def('inner_tool')],
222
+ handlers: { inner_tool: async () => 'inner done' },
223
+ // exclusive defaults to true — opening it removes base_tool from the set.
224
+ });
225
+ const config = agent({
226
+ name: 'Folded',
227
+ toolDefinitions: [def('base_tool'), fold.definition],
228
+ toolHandlers: { base_tool: async () => 'base done', ...fold.handler },
229
+ });
230
+
231
+ const provider = scriptedProvider([
232
+ callsTool('my_fold', 'f1'), // open the exclusive fold — base_tool now hidden
233
+ callsTool('base_tool', 'b1'), // hidden behind the open fold
234
+ ]);
235
+ const sessionKey = 'fold-meta-test';
236
+ const driver = makeDriver(config, provider, sessionKey);
237
+
238
+ await driver.sendMessage('go');
239
+
240
+ // Target the base_tool result specifically — the fold-open result also
241
+ // mentions my_fold, so match on the tool call id rather than substring.
242
+ const guidance = driver
243
+ .getHistory()
244
+ .find((m) => m.role === 'tool' && m.toolResult?.toolCallId === 'b1')?.toolResult?.content;
245
+ assert.ok(guidance, 'calling a fold-hidden tool should produce guidance');
246
+ assert.match(guidance!, /not available while the "my_fold" fold is open/);
247
+ assert.match(guidance!, /close_my_fold/);
248
+
249
+ const hidden = allToolCalls(driver).find(
250
+ (tc) => tc.name === 'base_tool' && isChatToolCallUnknown(tc),
251
+ );
252
+ assert.ok(
253
+ hidden && isChatToolCallUnknown(hidden) && hidden.stale === true,
254
+ 'the hidden call is stale',
255
+ );
256
+
257
+ assert.ok(
258
+ unresolvedEvents(sessionKey).some(
259
+ (d) => d.kind === 'fold-hidden' && d.tool === 'base_tool' && d.fold === 'my_fold',
260
+ ),
261
+ 'a fold-hidden tool.unresolved meta event should be recorded',
262
+ );
263
+ });
264
+
265
+ stale('splits stale vs hallucinated tools on the unknown-tool-limit error', async () => {
266
+ const sessionKey = 'stale-limit-test';
267
+ clearMetaEventRegistry();
268
+
269
+ let state: 'A' | 'B' = 'A';
270
+ const config = agent({
271
+ name: 'Stateful',
272
+ toolDefinitions: () => (state === 'A' ? [def('tool_a')] : [def('tool_b')]),
273
+ toolHandlers: () =>
274
+ state === 'A'
275
+ ? {
276
+ tool_a: async () => {
277
+ state = 'B';
278
+ return 'advanced to B';
279
+ },
280
+ }
281
+ : { tool_b: async () => 'b done' },
282
+ });
283
+
284
+ // One real call to advance to B, then 5 consecutive stale calls — the 5th
285
+ // trips DEFAULT_MAX_UNKNOWN_TOOL_CALLS and ends the turn.
286
+ const provider = scriptedProvider([
287
+ callsTool('tool_a', 'real'),
288
+ ...Array.from({ length: 5 }, (_unused, i) => callsTool('tool_a', `stale-${i}`)),
289
+ ]);
290
+ const driver = makeDriver(config, provider, sessionKey);
291
+
292
+ const result = await driver.sendMessage('go');
293
+ assert.is(result.reason, 'done');
294
+
295
+ const limitError = getMetaEvents(sessionKey).find(
296
+ (e) => e.type === 'turn.error' && e.detail?.reason === 'unknown-tool-limit',
297
+ );
298
+ assert.ok(limitError, 'hitting the limit should record an unknown-tool-limit turn.error');
299
+ const detail = limitError!.detail!;
300
+ assert.equal(detail.staleTools, ['tool_a'], 'tool_a should be classified as stale');
301
+ assert.equal(detail.hallucinatedTools, [], 'nothing was hallucinated');
302
+
303
+ // Every stale attempt — not just the final limit error — is in the download log.
304
+ assert.is(
305
+ unresolvedEvents(sessionKey).filter((d) => d.kind === 'stale').length,
306
+ 5,
307
+ 'each stale attempt should be recorded as its own tool.unresolved event',
308
+ );
309
+
310
+ // The user-facing turn ends with the apology, not a crash.
311
+ const last = driver.getHistory().at(-1);
312
+ assert.ok(last?.role === 'assistant' && last.content.startsWith("I'm sorry"));
313
+ });
314
+
315
+ stale.run();
@@ -192,6 +192,21 @@ export class ChatDriver extends EventTarget implements AiDriver {
192
192
  * hallucinated. Reset alongside `consecutiveUnknownToolCalls`.
193
193
  */
194
194
  private readonly recentUnknownToolNames = new Set<string>();
195
+ /**
196
+ * Union of every tool name advertised at any point during the current agent
197
+ * activation. Lets the unknown-tool path tell a *stale* call (a real tool from
198
+ * an earlier state, now retired — or one an open exclusive fold is hiding)
199
+ * apart from a *hallucinated* one. Reset on agent swap in `applyAgent`.
200
+ */
201
+ private readonly everSeenToolNames = new Set<string>();
202
+ /**
203
+ * Subset of the current unknown-tool streak that was stale (previously
204
+ * available) rather than hallucinated — surfaced separately on the
205
+ * `unknown-tool-limit` turn.error so triage can tell a state/prompt-design
206
+ * problem from a model that's inventing tools. Reset alongside
207
+ * `recentUnknownToolNames`.
208
+ */
209
+ private readonly recentStaleToolNames = new Set<string>();
195
210
  private readonly maxFoldOperations: number;
196
211
 
197
212
  /** Sub-agents declared on the active agent config, keyed by name. */
@@ -320,6 +335,10 @@ export class ChatDriver extends EventTarget implements AiDriver {
320
335
  // Reset fold state when agent changes — each specialist starts fresh
321
336
  this.foldStack = [];
322
337
  this.consecutiveFoldOps = 0;
338
+ // Forget the previous agent's tools — "previously available" is scoped to
339
+ // the current activation, so a stateful agent accumulates its tools across
340
+ // states while a swap to a different specialist starts clean.
341
+ this.everSeenToolNames.clear();
323
342
  }
324
343
 
325
344
  /**
@@ -954,6 +973,22 @@ export class ChatDriver extends EventTarget implements AiDriver {
954
973
  return null;
955
974
  }
956
975
 
976
+ /**
977
+ * If an open fold is hiding a previously-available tool, return the name of
978
+ * the fold to close to start getting it back. Only exclusive folds hide tools
979
+ * (they replace the tool set on open rather than extending it), so a base tool
980
+ * that was visible before the fold opened now sits in a fold-stack frame's
981
+ * `previousHandlers` but not in the live handler map. Only the top fold's
982
+ * `close_` tool is active, so that's always the actionable next step — even
983
+ * when the tool lives further down the stack, closing repeatedly walks back to
984
+ * it. Returns null when no open fold accounts for the tool.
985
+ */
986
+ private foldHidingTool(toolName: string): string | null {
987
+ if (this.foldStack.length === 0) return null;
988
+ const hidden = this.foldStack.some((f) => f.previousHandlers[toolName]);
989
+ return hidden ? this.foldStack[this.foldStack.length - 1].foldName : null;
990
+ }
991
+
957
992
  /**
958
993
  * Install the fold's inner tool set, replacing (exclusive) or extending (non-exclusive)
959
994
  * the current tool set. Also injects the close tool. Does NOT touch the fold stack.
@@ -1133,6 +1168,12 @@ export class ChatDriver extends EventTarget implements AiDriver {
1133
1168
  this.toolHandlers = await this.toolHandlersFactory(promptCtx);
1134
1169
  }
1135
1170
 
1171
+ // Record everything advertised this turn so the unknown-tool path can tell
1172
+ // a stale tool (real earlier, retired now) from a hallucinated one. Runs
1173
+ // for both the static and factory cases; folds also flow through here as
1174
+ // their inner tools become visible on the iteration after they open.
1175
+ for (const def of this.toolDefinitions) this.everSeenToolNames.add(def.name);
1176
+
1136
1177
  const resolvedSystemPrompt =
1137
1178
  typeof this.systemPrompt === 'function'
1138
1179
  ? // oxlint-disable-next-line no-await-in-loop
@@ -1287,6 +1328,9 @@ export class ChatDriver extends EventTarget implements AiDriver {
1287
1328
  { toolCallId: string; content: string; subAgentTrace?: ChatMessage[] }
1288
1329
  >();
1289
1330
  const unknownToolIds = new Set<string>();
1331
+ // Subset of unknownToolIds that were stale (previously available) rather
1332
+ // than hallucinated — drives the `stale` UI flag back-patched below.
1333
+ const staleToolIds = new Set<string>();
1290
1334
  let anyRealToolExecuted = false;
1291
1335
  let hitUnknownToolLimit = false;
1292
1336
 
@@ -1342,23 +1386,80 @@ export class ChatDriver extends EventTarget implements AiDriver {
1342
1386
  logger.debug(
1343
1387
  `ChatDriver: model called folded tool "${tc.name}" — guiding to open "${containingFold}"`,
1344
1388
  );
1389
+ recordMetaEvent(this.sessionKey, 'tool.unresolved', {
1390
+ tool: tc.name,
1391
+ agent: this.activeAgentName,
1392
+ kind: 'folded',
1393
+ fold: containingFold,
1394
+ });
1345
1395
  executedById.set(tc.id, {
1346
1396
  toolCallId: tc.id,
1347
1397
  content: `"${tc.name}" is not directly available. It is inside the "${containingFold}" fold. Call ${containingFold} first to access it.`,
1348
1398
  });
1349
1399
  // Guidance does not count as a real iteration or fold op
1350
1400
  iterations -= 1;
1351
- } else {
1401
+ return;
1402
+ }
1403
+
1404
+ // Not in any registered fold. If the tool was advertised earlier
1405
+ // in this agent's lifetime it's *stale* (a stateful agent moved on,
1406
+ // or an exclusive fold is hiding it) rather than hallucinated — a
1407
+ // distinction worth making, because the model should stop retrying
1408
+ // a retired tool rather than treat the failure as a typo. Stale
1409
+ // calls still count toward the same unknown-tool limit (loop
1410
+ // protection); only the guidance and telemetry differ.
1411
+ if (this.everSeenToolNames.has(tc.name)) {
1352
1412
  this.consecutiveUnknownToolCalls += 1;
1353
- logger.warn(
1354
- `ChatDriver: no handler registered for tool "${tc.name}" (${this.consecutiveUnknownToolCalls}/${DEFAULT_MAX_UNKNOWN_TOOL_CALLS}). Available tools: ${Object.keys(this.toolHandlers).join(', ') || '(none)'}`,
1355
- );
1356
- executedById.set(tc.id, { toolCallId: tc.id, content: `Unknown tool: ${tc.name}` });
1413
+ const hidingFold = this.foldHidingTool(tc.name);
1414
+ let content: string;
1415
+ if (hidingFold) {
1416
+ content = `"${tc.name}" is not available while the "${hidingFold}" fold is open. Call close_${hidingFold} to return to the previous set of tools, then call ${tc.name}.`;
1417
+ logger.warn(
1418
+ `ChatDriver: tool "${tc.name}" is hidden behind open fold "${hidingFold}" (${this.consecutiveUnknownToolCalls}/${DEFAULT_MAX_UNKNOWN_TOOL_CALLS})`,
1419
+ );
1420
+ } else {
1421
+ content = `"${tc.name}" was available earlier but is not part of the current step — that step is complete, so do not call it again. Continue with the tools available now: ${Object.keys(this.toolHandlers).join(', ') || '(none)'}.`;
1422
+ logger.warn(
1423
+ `ChatDriver: stale tool "${tc.name}" — advertised earlier this activation but retired in the current state (${this.consecutiveUnknownToolCalls}/${DEFAULT_MAX_UNKNOWN_TOOL_CALLS})`,
1424
+ );
1425
+ }
1426
+ recordMetaEvent(this.sessionKey, 'tool.unresolved', {
1427
+ tool: tc.name,
1428
+ agent: this.activeAgentName,
1429
+ kind: hidingFold ? 'fold-hidden' : 'stale',
1430
+ fold: hidingFold ?? undefined,
1431
+ consecutive: this.consecutiveUnknownToolCalls,
1432
+ max: DEFAULT_MAX_UNKNOWN_TOOL_CALLS,
1433
+ });
1434
+ executedById.set(tc.id, { toolCallId: tc.id, content });
1357
1435
  unknownToolIds.add(tc.id);
1436
+ staleToolIds.add(tc.id);
1358
1437
  this.recentUnknownToolNames.add(tc.name);
1438
+ this.recentStaleToolNames.add(tc.name);
1359
1439
  if (this.consecutiveUnknownToolCalls >= DEFAULT_MAX_UNKNOWN_TOOL_CALLS) {
1360
1440
  hitUnknownToolLimit = true;
1361
1441
  }
1442
+ return;
1443
+ }
1444
+
1445
+ // Never advertised — a hallucinated tool name.
1446
+ this.consecutiveUnknownToolCalls += 1;
1447
+ logger.warn(
1448
+ `ChatDriver: no handler registered for tool "${tc.name}" (${this.consecutiveUnknownToolCalls}/${DEFAULT_MAX_UNKNOWN_TOOL_CALLS}). Available tools: ${Object.keys(this.toolHandlers).join(', ') || '(none)'}`,
1449
+ );
1450
+ recordMetaEvent(this.sessionKey, 'tool.unresolved', {
1451
+ tool: tc.name,
1452
+ agent: this.activeAgentName,
1453
+ kind: 'unknown',
1454
+ consecutive: this.consecutiveUnknownToolCalls,
1455
+ max: DEFAULT_MAX_UNKNOWN_TOOL_CALLS,
1456
+ availableTools: Object.keys(this.toolHandlers),
1457
+ });
1458
+ executedById.set(tc.id, { toolCallId: tc.id, content: `Unknown tool: ${tc.name}` });
1459
+ unknownToolIds.add(tc.id);
1460
+ this.recentUnknownToolNames.add(tc.name);
1461
+ if (this.consecutiveUnknownToolCalls >= DEFAULT_MAX_UNKNOWN_TOOL_CALLS) {
1462
+ hitUnknownToolLimit = true;
1362
1463
  }
1363
1464
  return;
1364
1465
  }
@@ -1396,6 +1497,7 @@ export class ChatDriver extends EventTarget implements AiDriver {
1396
1497
  this.consecutiveFoldOps = 0;
1397
1498
  this.consecutiveUnknownToolCalls = 0;
1398
1499
  this.recentUnknownToolNames.clear();
1500
+ this.recentStaleToolNames.clear();
1399
1501
  }
1400
1502
 
1401
1503
  // Tag tool calls with fold UI metadata before appending results
@@ -1447,6 +1549,9 @@ export class ChatDriver extends EventTarget implements AiDriver {
1447
1549
  foldPath: !isFoldOpen && !isFoldClose && foldPath.length > 0 ? foldPath : undefined,
1448
1550
  unknown: isUnknown || undefined,
1449
1551
  availableTools: isUnknown ? availableToolNames : undefined,
1552
+ // Distinguish a retired tool from a hallucinated one so the UI can
1553
+ // say "no longer available here" rather than "does not exist".
1554
+ stale: staleToolIds.has(tc.id) || undefined,
1450
1555
  subAgentTrace: executedById.get(tc.id)?.subAgentTrace,
1451
1556
  };
1452
1557
  });
@@ -1470,10 +1575,25 @@ export class ChatDriver extends EventTarget implements AiDriver {
1470
1575
  .map((tc) => tc.name),
1471
1576
  ]),
1472
1577
  ];
1578
+ // Stale tools were real earlier this activation; hallucinated tools
1579
+ // never existed. The hard stop counts both the same way, but the split
1580
+ // tells a triager whether the cause is a state/prompt-design problem
1581
+ // (stale) or a model inventing tool names (hallucinated).
1582
+ const staleTools = [
1583
+ ...new Set([
1584
+ ...this.recentStaleToolNames,
1585
+ ...(response.toolCalls ?? [])
1586
+ .filter((tc) => staleToolIds.has(tc.id))
1587
+ .map((tc) => tc.name),
1588
+ ]),
1589
+ ];
1590
+ const hallucinatedTools = unknownTools.filter((t) => !staleTools.includes(t));
1473
1591
  recordTurnError(this.sessionKey, 'unknown-tool-limit', {
1474
1592
  agent: this.activeAgentName,
1475
1593
  provider: this.lastResolvedProviderName,
1476
1594
  unknownTools,
1595
+ staleTools,
1596
+ hallucinatedTools,
1477
1597
  availableTools: Object.keys(this.toolHandlers),
1478
1598
  });
1479
1599
  this.appendToHistory({