npm - @genesislcap/ai-assistant - Versions diffs - 14.451.3-alpha-861508d.0 → 14.451.4 - Mend

@genesislcap/ai-assistant 14.451.3-alpha-861508d.0 → 14.451.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/dist/esm/state/debug-event-log.js CHANGED Viewed

@@ -48,6 +48,7 @@ export const META_EVENT_IMPORTANCE = {
     'turn.start': 'normal',
     'turn.end': 'normal',
     'turn.retry': 'normal',
+    'tool.unresolved': 'normal',
     'agent.handoff': 'normal',
     'agent.pinned': 'normal',
     'agent.unpinned': 'normal',
@@ -67,7 +68,7 @@ export const META_EVENT_IMPORTANCE = {
  * allowed to float above this cap rather than lose a failure signal; in normal
  * use the frequent `low`/`normal` events keep it near the cap. Entries are cheap.
  */
-const DEFAULT_MAX_META_EVENTS = 400;
+const DEFAULT_MAX_META_EVENTS = 800;
 const registry = new Map();
 /**
  * Append a meta event to the timeline for `key`. Once the buffer exceeds
@@ -138,7 +139,7 @@ export const DEBUG_LOG_README = [
     "kind:'turn'.`agentSnapshot` — the active agent's own view of its internal state, captured at that turn. An agent opts into this by exposing a `getDebugSnapshot()` that returns JSON-serializable per-state info; stateful/flow agents wire it automatically, so you can watch a flow advance turn-by-turn (e.g. current step, cursor, collected fields, pending changes). Absent for agents that don't expose one.",
     "kind:'event' — a meta/lifecycle event. `type` names it (see below); `detail` carries structured data. `detail.placement` is the emitting UI instance: 'bubble' (collapsed), 'panel' (popped-out), or 'standalone'.",
     "Each 'event' also has an `importance`: 'high' (failures/limits — turn.error, tool.failed, file.read-failed, suggestions.failed, context.threshold-crossed), 'normal' (session flow — connects, turns, retries, handoffs, agent/provider changes, interactions), or 'low' (skippable UI/bookkeeping noise — panel.toggled, attachment.added, driver.wired/unwired, context.updated). To skim, ignore importance:'low'; to triage a failure, filter to importance:'high' then read the nearby messages and turns. A 'high' turn.error is often preceded by one or more 'normal' turn.retry events for the same reason — read them together to see how many attempts were made before bailing. 'message' and 'turn' entries carry no importance — they are the substance, always read them.",
-    'Event types: assistant.connected/disconnected (mount + placement + whether the session was created or restored), assistant.popout/popin (window placement), driver.created/wired/unwired (which driver is live and why it stops/starts responding across a popout), state.changed (idle↔loading), turn.start/turn.end (turn boundary; turn.end carries durationMs), turn.retry (a recoverable in-turn retry — detail.reason plus attempt/maxAttempts; for malformed calls also finishMessage), turn.error (a turn failed or hit a guardrail — detail.reason is one of exception/malformed-function-call/empty-response/unknown-tool-limit/max-iterations, plus reason-specific diagnostics: attempts, finishMessage, unknownTools + availableTools, iterations + limit, or name + message for exceptions), tool.failed (a tool threw), agent.handoff (routing; from=null is the initial activation), agent.pinned/unpinned (forced routing), provider.selected (model/provider for the upcoming turns), interaction.requested/resolved (blocking user widgets — explain quiet gaps), context.updated/threshold-crossed (token + cost), panel.toggled, attachment.added, file.read-failed, suggestions.failed.',
+    'Event types: assistant.connected/disconnected (mount + placement + whether the session was created or restored), assistant.popout/popin (window placement), driver.created/wired/unwired (which driver is live and why it stops/starts responding across a popout), state.changed (idle↔loading), turn.start/turn.end (turn boundary; turn.end carries durationMs), turn.retry (a recoverable in-turn retry — detail.reason plus attempt/maxAttempts; for malformed calls also finishMessage), turn.error (a turn failed or hit a guardrail — detail.reason is one of exception/malformed-function-call/empty-response/unknown-tool-limit/max-iterations, plus reason-specific diagnostics: attempts, finishMessage, unknownTools (split into staleTools — real earlier this activation but retired by the current state or hidden behind an open exclusive fold — and hallucinatedTools — never advertised) + availableTools, iterations + limit, or name + message for exceptions), tool.failed (a tool threw), tool.unresolved (the model called a tool that could not be dispatched — detail.kind is folded/fold-hidden/stale/unknown, plus tool + agent and, for the counted kinds, the consecutive streak; the recurring lead-up to an unknown-tool-limit turn.error), agent.handoff (routing; from=null is the initial activation), agent.pinned/unpinned (forced routing), provider.selected (model/provider for the upcoming turns), interaction.requested/resolved (blocking user widgets — explain quiet gaps), context.updated/threshold-crossed (token + cost), panel.toggled, attachment.added, file.read-failed, suggestions.failed.',
     "`meta` holds context captured at export time: agentSummary (full agent configs), context (active model, token usage, session cost), activeDebugSnapshot (the active agent's `getDebugSnapshot()` taken fresh at export — reflects state NOW, which may have advanced beyond the last turn's agentSnapshot), debug (optional host-supplied debug state), host, and the export timestamp.",
     'To debug a failure: find the last turn.error or tool.failed, then read upward for the user message, the turn(s), and the agent/provider/state events that led into it.',
 ];

package/dist/tsconfig.tsbuildinfo CHANGED Viewed

	@@ -1 +1 @@
1	- {"root":["../src/index.ts","../src/channel/ai-activity-bus.ts","../src/channel/ai-activity-channel.ts","../src/components/halo-overlay.ts","../src/components/activity-halo/activity-halo.ts","../src/components/agent-picker/agent-picker.constants.ts","../src/components/agent-picker/agent-picker.styles.ts","../src/components/agent-picker/agent-picker.template.ts","../src/components/agent-picker/agent-picker.ts","../src/components/agent-picker/index.ts","../src/components/ai-driver/ai-driver.ts","../src/components/ai-driver/index.ts","../src/components/chat-bubble/chat-bubble.styles.ts","../src/components/chat-bubble/chat-bubble.template.ts","../src/components/chat-bubble/chat-bubble.ts","../src/components/chat-bubble/index.ts","../src/components/chat-driver/chat-driver.ts","../src/components/chat-driver/index.ts","../src/components/chat-interaction-wrapper/chat-interaction-wrapper.styles.ts","../src/components/chat-interaction-wrapper/chat-interaction-wrapper.template.ts","../src/components/chat-interaction-wrapper/chat-interaction-wrapper.test.ts","../src/components/chat-interaction-wrapper/chat-interaction-wrapper.ts","../src/components/chat-interaction-wrapper/index.ts","../src/components/chat-markdown/chat-markdown.ts","../src/components/chat-markdown/index.ts","../src/components/orchestrating-driver/index.ts","../src/components/orchestrating-driver/orchestrating-driver.ts","../src/components/popout-manager/index.ts","../src/components/popout-manager/popout-manager.ts","../src/config/config.ts","../src/config/define-stateful-agent.ts","../src/config/fallback-agents.ts","../src/config/index.ts","../src/config/validate-providers.test.ts","../src/config/validate-providers.ts","../src/main/index.ts","../src/main/main.styles.ts","../src/main/main.template.ts","../src/main/main.ts","../src/main/main.types.ts","../src/state/ai-assistant-slice.ts","../src/state/debug-event-log.ts","../src/state/driver-registry.ts","../src/state/session-store.ts","../src/styles/ai-colours.ts","../src/styles/index.ts","../src/styles/styles.ts","../src/suggestions/chat-suggestions.ts","../src/tags/index.ts","../src/types/ai-chat-widget.ts","../src/utils/animated-panel-toggle.ts","../src/utils/history-transform.ts","../src/utils/index.ts","../src/utils/logger.ts","../src/utils/message-partition.test.ts","../src/utils/message-partition.ts","../src/utils/sum-costs.test.ts","../src/utils/sum-costs.ts","../src/utils/tool-fold.ts"],"version":"5.9.2"}
1	+ {"root":["../src/index.ts","../src/channel/ai-activity-bus.ts","../src/channel/ai-activity-channel.ts","../src/components/halo-overlay.ts","../src/components/activity-halo/activity-halo.ts","../src/components/agent-picker/agent-picker.constants.ts","../src/components/agent-picker/agent-picker.styles.ts","../src/components/agent-picker/agent-picker.template.ts","../src/components/agent-picker/agent-picker.ts","../src/components/agent-picker/index.ts","../src/components/ai-driver/ai-driver.ts","../src/components/ai-driver/index.ts","../src/components/chat-bubble/chat-bubble.styles.ts","../src/components/chat-bubble/chat-bubble.template.ts","../src/components/chat-bubble/chat-bubble.ts","../src/components/chat-bubble/index.ts","../src/components/chat-driver/align-event-globals.ts","../src/components/chat-driver/chat-driver.test.ts","../src/components/chat-driver/chat-driver.ts","../src/components/chat-driver/index.ts","../src/components/chat-interaction-wrapper/chat-interaction-wrapper.styles.ts","../src/components/chat-interaction-wrapper/chat-interaction-wrapper.template.ts","../src/components/chat-interaction-wrapper/chat-interaction-wrapper.test.ts","../src/components/chat-interaction-wrapper/chat-interaction-wrapper.ts","../src/components/chat-interaction-wrapper/index.ts","../src/components/chat-markdown/chat-markdown.ts","../src/components/chat-markdown/index.ts","../src/components/orchestrating-driver/index.ts","../src/components/orchestrating-driver/orchestrating-driver.ts","../src/components/popout-manager/index.ts","../src/components/popout-manager/popout-manager.ts","../src/config/config.ts","../src/config/define-stateful-agent.ts","../src/config/fallback-agents.ts","../src/config/index.ts","../src/config/validate-providers.test.ts","../src/config/validate-providers.ts","../src/main/index.ts","../src/main/main.styles.ts","../src/main/main.template.ts","../src/main/main.ts","../src/main/main.types.ts","../src/state/ai-assistant-slice.ts","../src/state/debug-event-log.ts","../src/state/driver-registry.ts","../src/state/session-store.ts","../src/styles/ai-colours.ts","../src/styles/index.ts","../src/styles/styles.ts","../src/suggestions/chat-suggestions.ts","../src/tags/index.ts","../src/types/ai-chat-widget.ts","../src/utils/animated-panel-toggle.ts","../src/utils/history-transform.ts","../src/utils/index.ts","../src/utils/logger.ts","../src/utils/message-partition.test.ts","../src/utils/message-partition.ts","../src/utils/sum-costs.test.ts","../src/utils/sum-costs.ts","../src/utils/tool-fold.ts"],"version":"5.9.2"}

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@genesislcap/ai-assistant",
   "description": "Genesis AI Assistant micro-frontend",
-  "version": "14.451.3-alpha-861508d.0",
+  "version": "14.451.4",
   "license": "SEE LICENSE IN license.txt",
   "main": "dist/esm/index.js",
   "types": "dist/ai-assistant.d.ts",
@@ -64,24 +64,24 @@
     }
   },
   "devDependencies": {
-    "@genesislcap/foundation-testing": "14.451.3-alpha-861508d.0",
-    "@genesislcap/genx": "14.451.3-alpha-861508d.0",
-    "@genesislcap/rollup-builder": "14.451.3-alpha-861508d.0",
-    "@genesislcap/ts-builder": "14.451.3-alpha-861508d.0",
-    "@genesislcap/uvu-playwright-builder": "14.451.3-alpha-861508d.0",
-    "@genesislcap/vite-builder": "14.451.3-alpha-861508d.0",
-    "@genesislcap/webpack-builder": "14.451.3-alpha-861508d.0",
+    "@genesislcap/foundation-testing": "14.451.4",
+    "@genesislcap/genx": "14.451.4",
+    "@genesislcap/rollup-builder": "14.451.4",
+    "@genesislcap/ts-builder": "14.451.4",
+    "@genesislcap/uvu-playwright-builder": "14.451.4",
+    "@genesislcap/vite-builder": "14.451.4",
+    "@genesislcap/webpack-builder": "14.451.4",
     "@types/dompurify": "^3.0.5",
     "@types/marked": "^5.0.2"
   },
   "dependencies": {
-    "@genesislcap/foundation-ai": "14.451.3-alpha-861508d.0",
-    "@genesislcap/foundation-logger": "14.451.3-alpha-861508d.0",
-    "@genesislcap/foundation-redux": "14.451.3-alpha-861508d.0",
-    "@genesislcap/foundation-ui": "14.451.3-alpha-861508d.0",
-    "@genesislcap/foundation-utils": "14.451.3-alpha-861508d.0",
-    "@genesislcap/rapid-design-system": "14.451.3-alpha-861508d.0",
-    "@genesislcap/web-core": "14.451.3-alpha-861508d.0",
+    "@genesislcap/foundation-ai": "14.451.4",
+    "@genesislcap/foundation-logger": "14.451.4",
+    "@genesislcap/foundation-redux": "14.451.4",
+    "@genesislcap/foundation-ui": "14.451.4",
+    "@genesislcap/foundation-utils": "14.451.4",
+    "@genesislcap/rapid-design-system": "14.451.4",
+    "@genesislcap/web-core": "14.451.4",
     "dompurify": "^3.3.1",
     "marked": "^17.0.3"
   },
@@ -93,5 +93,5 @@
   "publishConfig": {
     "access": "public"
   },
-  "gitHead": "6952480d23f47b3830abff2b8afd8e018fdf2b92"
+  "gitHead": "265d5fa00ae476a7713d78707f53f393f5eeb647"
 }

package/src/components/chat-driver/align-event-globals.ts ADDED Viewed

@@ -0,0 +1,23 @@
+/**
+ * Test-only side effect: align the global `EventTarget` with jsdom's before any
+ * module that `extends EventTarget` is evaluated.
+ *
+ * The node test runner's jsdom setup installs `globalThis.CustomEvent` from
+ * jsdom but leaves `globalThis.EventTarget` as Node's native class. A class that
+ * `extends EventTarget` (e.g. {@link ChatDriver}) then inherits Node's native
+ * `dispatchEvent`, which rejects the jsdom `CustomEvent` instances it is handed
+ * ("The 'event' argument must be an instance of Event. Received an instance of
+ * CustomEvent"). Pointing `EventTarget` at jsdom's keeps the whole event family
+ * in one realm.
+ *
+ * No-op in a real browser, where `window.EventTarget === globalThis.EventTarget`
+ * already. Import this BEFORE importing anything that subclasses `EventTarget`.
+ */
+const jsdomWindow = (globalThis as { window?: { EventTarget?: typeof EventTarget } }).window;
+if (jsdomWindow?.EventTarget && globalThis.EventTarget !== jsdomWindow.EventTarget) {
+  Object.defineProperty(globalThis, 'EventTarget', {
+    value: jsdomWindow.EventTarget,
+    configurable: true,
+    writable: true,
+  });
+}

package/src/components/chat-driver/chat-driver.test.ts ADDED Viewed

@@ -0,0 +1,315 @@
+import type {
+  AIProvider,
+  AIProviderRegistry,
+  ChatMessage,
+  ChatRequestOptions,
+  ChatToolCall,
+  ChatToolDefinition,
+} from '@genesislcap/foundation-ai';
+import { isChatToolCallUnknown } from '@genesislcap/foundation-ai';
+import { assert, createLogicSuite } from '@genesislcap/foundation-testing';
+import { agenticActivityBus } from '../../channel/ai-activity-bus';
+import type { AgentConfig } from '../../config/config';
+import { clearMetaEventRegistry, getMetaEvents } from '../../state/debug-event-log';
+import { createToolFold } from '../../utils/tool-fold';
+// Side-effect import — MUST come before `./chat-driver` so the driver subclasses
+// jsdom's EventTarget rather than Node's native one (see the file). None of the
+// imports above pull in the driver, so its realm is still set before evaluation.
+import './align-event-globals';
+import { ChatDriver } from './chat-driver';
+// ---------------------------------------------------------------------------
+// Test harness
+//
+// The driver calls `provider.chat(history, userMessage, options)` once per
+// tool-loop iteration and inspects the returned ChatMessage: a message with
+// `toolCalls` keeps the loop running; one without ends the turn. So a fake
+// provider that replays a scripted sequence of ChatMessages is enough to drive
+// any tool-loop path. We capture the advertised tool names per call so tests
+// can assert per-state narrowing actually happened.
+// ---------------------------------------------------------------------------
+interface ScriptedProvider extends AIProvider {
+  /** Tool names advertised to the model on each `chat()` call, in order. */
+  advertisedPerCall: string[][];
+}
+const scriptedProvider = (responses: ChatMessage[]): ScriptedProvider => {
+  const queue = [...responses];
+  const advertisedPerCall: string[][] = [];
+  return {
+    advertisedPerCall,
+    chat: async (
+      _history: ChatMessage[],
+      _userMessage: string,
+      options?: ChatRequestOptions,
+    ): Promise<ChatMessage> => {
+      advertisedPerCall.push((options?.tools ?? []).map((t) => t.name));
+      // Once the script is exhausted, end the turn with a plain text reply.
+      return queue.shift() ?? { role: 'assistant', content: 'done' };
+    },
+  };
+};
+const makeRegistry = (provider: AIProvider): AIProviderRegistry => ({
+  get: () => provider,
+  default: () => provider,
+  defaultName: () => 'test',
+  names: () => ['test'],
+  getStatus: async () => null,
+  listStatuses: async () => [],
+});
+const def = (name: string): ChatToolDefinition => ({
+  name,
+  description: `${name} tool`,
+  parameters: { type: 'object', properties: {} },
+});
+/** An assistant turn that calls a single tool. `content` is empty so the driver
+ * does not treat it as a thinking step (which would split it into two messages). */
+const callsTool = (name: string, id: string): ChatMessage => ({
+  role: 'assistant',
+  content: '',
+  toolCalls: [{ id, name, args: {} }],
+});
+const agent = (overrides: Partial<AgentConfig> & { name: string }): AgentConfig =>
+  ({ description: 'test agent', ...overrides }) as AgentConfig;
+const makeDriver = (config: AgentConfig, provider: AIProvider, sessionKey = ''): ChatDriver => {
+  const driver = new ChatDriver(
+    makeRegistry(provider),
+    {},
+    [],
+    undefined,
+    undefined,
+    50,
+    5,
+    undefined,
+    sessionKey,
+  );
+  driver.applyAgent(config);
+  return driver;
+};
+/** All tool calls across the whole conversation, flattened. */
+const allToolCalls = (driver: ChatDriver): ChatToolCall[] =>
+  driver.getHistory().flatMap((m) => m.toolCalls ?? []);
+/** Tool-result message contents, in order. */
+const toolResultContents = (driver: ChatDriver): string[] =>
+  driver
+    .getHistory()
+    .filter((m) => m.role === 'tool' && m.toolResult)
+    .map((m) => m.toolResult!.content);
+/** `tool.unresolved` meta-event details recorded for a session (download-log surface). */
+const unresolvedEvents = (sessionKey: string): Array<Record<string, unknown>> =>
+  getMetaEvents(sessionKey)
+    .filter((e) => e.type === 'tool.unresolved')
+    .map((e) => e.detail ?? {});
+// ---------------------------------------------------------------------------
+// stale tool detection — stateful agent advances past a tool's state
+// ---------------------------------------------------------------------------
+const stale = createLogicSuite('ChatDriver stale-tool detection');
+// The driver imports the `agenticActivityBus` singleton, which opens a
+// BroadcastChannel at module load. An open channel keeps the test page alive
+// and hangs the runner, so close it once the suite finishes.
+stale.after(() => {
+  agenticActivityBus.close();
+});
+stale('guides the model when it calls a tool that an earlier state exposed', async () => {
+  // State A exposes tool_a; calling it advances to state B, which exposes only
+  // tool_b. A factory-form agent narrows the tool set per turn, mirroring how
+  // `defineStatefulAgent` works.
+  let state: 'A' | 'B' = 'A';
+  const config = agent({
+    name: 'Stateful',
+    toolDefinitions: () => (state === 'A' ? [def('tool_a')] : [def('tool_b')]),
+    toolHandlers: () =>
+      state === 'A'
+        ? {
+            tool_a: async () => {
+              state = 'B';
+              return 'advanced to B';
+            },
+          }
+        : { tool_b: async () => 'b done' },
+  });
+  const provider = scriptedProvider([
+    callsTool('tool_a', 't1'), // real — advances A -> B
+    callsTool('tool_a', 't2'), // stale — tool_a no longer in state B
+    callsTool('tool_b', 't3'), // real — valid in state B
+  ]);
+  const sessionKey = 'stale-meta-test';
+  const driver = makeDriver(config, provider, sessionKey);
+  const result = await driver.sendMessage('go');
+  assert.is(result.reason, 'done');
+  // The per-state narrowing actually happened: tool_a advertised first, tool_b later.
+  assert.equal(provider.advertisedPerCall[0], ['tool_a']);
+  assert.ok(
+    provider.advertisedPerCall.some(
+      (tools) => tools.includes('tool_b') && !tools.includes('tool_a'),
+    ),
+    'a later turn should advertise tool_b without tool_a',
+  );
+  // The retried tool_a got stale guidance — not "Unknown tool".
+  const staleGuidance = toolResultContents(driver).find((c) =>
+    c.includes('was available earlier but is not part of the current step'),
+  );
+  assert.ok(staleGuidance, 'a previously-available tool should receive stale guidance');
+  assert.not.ok(
+    toolResultContents(driver).some((c) => c.startsWith('Unknown tool:')),
+    'a previously-available tool must not be reported as a hallucination',
+  );
+  // The retried call is flagged unknown + stale for the UI.
+  const retried = allToolCalls(driver).filter(
+    (tc) => tc.name === 'tool_a' && isChatToolCallUnknown(tc),
+  );
+  assert.is(retried.length, 1, 'exactly one tool_a call should be flagged unknown');
+  assert.ok(isChatToolCallUnknown(retried[0]) && retried[0].stale === true, 'and marked stale');
+  // The occurrence is recorded to the meta-event log for the download log.
+  assert.ok(
+    unresolvedEvents(sessionKey).some((d) => d.kind === 'stale' && d.tool === 'tool_a'),
+    'a stale tool.unresolved meta event should be recorded',
+  );
+});
+stale('reports a never-seen tool as a hallucinated unknown tool', async () => {
+  const config = agent({
+    name: 'Static',
+    toolDefinitions: [def('real_tool')],
+    toolHandlers: { real_tool: async () => 'ok' },
+  });
+  const provider = scriptedProvider([callsTool('made_up', 'm1')]);
+  const sessionKey = 'hallucination-meta-test';
+  const driver = makeDriver(config, provider, sessionKey);
+  await driver.sendMessage('go');
+  assert.ok(
+    toolResultContents(driver).includes('Unknown tool: made_up'),
+    'a tool never advertised should be reported as unknown',
+  );
+  const call = allToolCalls(driver).find((tc) => tc.name === 'made_up');
+  assert.ok(call && isChatToolCallUnknown(call), 'the call should be flagged unknown');
+  assert.not.ok(
+    (call as { stale?: boolean }).stale,
+    'a hallucinated tool must NOT be flagged stale',
+  );
+  assert.ok(
+    unresolvedEvents(sessionKey).some((d) => d.kind === 'unknown' && d.tool === 'made_up'),
+    'an unknown tool.unresolved meta event should be recorded',
+  );
+});
+stale('points the model at the close tool when an exclusive fold hides a base tool', async () => {
+  const fold = createToolFold({
+    name: 'my_fold',
+    tools: [def('inner_tool')],
+    handlers: { inner_tool: async () => 'inner done' },
+    // exclusive defaults to true — opening it removes base_tool from the set.
+  });
+  const config = agent({
+    name: 'Folded',
+    toolDefinitions: [def('base_tool'), fold.definition],
+    toolHandlers: { base_tool: async () => 'base done', ...fold.handler },
+  });
+  const provider = scriptedProvider([
+    callsTool('my_fold', 'f1'), // open the exclusive fold — base_tool now hidden
+    callsTool('base_tool', 'b1'), // hidden behind the open fold
+  ]);
+  const sessionKey = 'fold-meta-test';
+  const driver = makeDriver(config, provider, sessionKey);
+  await driver.sendMessage('go');
+  // Target the base_tool result specifically — the fold-open result also
+  // mentions my_fold, so match on the tool call id rather than substring.
+  const guidance = driver
+    .getHistory()
+    .find((m) => m.role === 'tool' && m.toolResult?.toolCallId === 'b1')?.toolResult?.content;
+  assert.ok(guidance, 'calling a fold-hidden tool should produce guidance');
+  assert.match(guidance!, /not available while the "my_fold" fold is open/);
+  assert.match(guidance!, /close_my_fold/);
+  const hidden = allToolCalls(driver).find(
+    (tc) => tc.name === 'base_tool' && isChatToolCallUnknown(tc),
+  );
+  assert.ok(
+    hidden && isChatToolCallUnknown(hidden) && hidden.stale === true,
+    'the hidden call is stale',
+  );
+  assert.ok(
+    unresolvedEvents(sessionKey).some(
+      (d) => d.kind === 'fold-hidden' && d.tool === 'base_tool' && d.fold === 'my_fold',
+    ),
+    'a fold-hidden tool.unresolved meta event should be recorded',
+  );
+});
+stale('splits stale vs hallucinated tools on the unknown-tool-limit error', async () => {
+  const sessionKey = 'stale-limit-test';
+  clearMetaEventRegistry();
+  let state: 'A' | 'B' = 'A';
+  const config = agent({
+    name: 'Stateful',
+    toolDefinitions: () => (state === 'A' ? [def('tool_a')] : [def('tool_b')]),
+    toolHandlers: () =>
+      state === 'A'
+        ? {
+            tool_a: async () => {
+              state = 'B';
+              return 'advanced to B';
+            },
+          }
+        : { tool_b: async () => 'b done' },
+  });
+  // One real call to advance to B, then 5 consecutive stale calls — the 5th
+  // trips DEFAULT_MAX_UNKNOWN_TOOL_CALLS and ends the turn.
+  const provider = scriptedProvider([
+    callsTool('tool_a', 'real'),
+    ...Array.from({ length: 5 }, (_unused, i) => callsTool('tool_a', `stale-${i}`)),
+  ]);
+  const driver = makeDriver(config, provider, sessionKey);
+  const result = await driver.sendMessage('go');
+  assert.is(result.reason, 'done');
+  const limitError = getMetaEvents(sessionKey).find(
+    (e) => e.type === 'turn.error' && e.detail?.reason === 'unknown-tool-limit',
+  );
+  assert.ok(limitError, 'hitting the limit should record an unknown-tool-limit turn.error');
+  const detail = limitError!.detail!;
+  assert.equal(detail.staleTools, ['tool_a'], 'tool_a should be classified as stale');
+  assert.equal(detail.hallucinatedTools, [], 'nothing was hallucinated');
+  // Every stale attempt — not just the final limit error — is in the download log.
+  assert.is(
+    unresolvedEvents(sessionKey).filter((d) => d.kind === 'stale').length,
+    5,
+    'each stale attempt should be recorded as its own tool.unresolved event',
+  );
+  // The user-facing turn ends with the apology, not a crash.
+  const last = driver.getHistory().at(-1);
+  assert.ok(last?.role === 'assistant' && last.content.startsWith("I'm sorry"));
+});
+stale.run();

package/src/components/chat-driver/chat-driver.ts CHANGED Viewed

@@ -192,6 +192,21 @@ export class ChatDriver extends EventTarget implements AiDriver {
    * hallucinated. Reset alongside `consecutiveUnknownToolCalls`.
    */
   private readonly recentUnknownToolNames = new Set<string>();
+  /**
+   * Union of every tool name advertised at any point during the current agent
+   * activation. Lets the unknown-tool path tell a *stale* call (a real tool from
+   * an earlier state, now retired — or one an open exclusive fold is hiding)
+   * apart from a *hallucinated* one. Reset on agent swap in `applyAgent`.
+   */
+  private readonly everSeenToolNames = new Set<string>();
+  /**
+   * Subset of the current unknown-tool streak that was stale (previously
+   * available) rather than hallucinated — surfaced separately on the
+   * `unknown-tool-limit` turn.error so triage can tell a state/prompt-design
+   * problem from a model that's inventing tools. Reset alongside
+   * `recentUnknownToolNames`.
+   */
+  private readonly recentStaleToolNames = new Set<string>();
   private readonly maxFoldOperations: number;
   /** Sub-agents declared on the active agent config, keyed by name. */
@@ -320,6 +335,10 @@ export class ChatDriver extends EventTarget implements AiDriver {
     // Reset fold state when agent changes — each specialist starts fresh
     this.foldStack = [];
     this.consecutiveFoldOps = 0;
+    // Forget the previous agent's tools — "previously available" is scoped to
+    // the current activation, so a stateful agent accumulates its tools across
+    // states while a swap to a different specialist starts clean.
+    this.everSeenToolNames.clear();
   }
   /**
@@ -954,6 +973,22 @@ export class ChatDriver extends EventTarget implements AiDriver {
     return null;
   }
+  /**
+   * If an open fold is hiding a previously-available tool, return the name of
+   * the fold to close to start getting it back. Only exclusive folds hide tools
+   * (they replace the tool set on open rather than extending it), so a base tool
+   * that was visible before the fold opened now sits in a fold-stack frame's
+   * `previousHandlers` but not in the live handler map. Only the top fold's
+   * `close_` tool is active, so that's always the actionable next step — even
+   * when the tool lives further down the stack, closing repeatedly walks back to
+   * it. Returns null when no open fold accounts for the tool.
+   */
+  private foldHidingTool(toolName: string): string | null {
+    if (this.foldStack.length === 0) return null;
+    const hidden = this.foldStack.some((f) => f.previousHandlers[toolName]);
+    return hidden ? this.foldStack[this.foldStack.length - 1].foldName : null;
+  }
   /**
    * Install the fold's inner tool set, replacing (exclusive) or extending (non-exclusive)
    * the current tool set. Also injects the close tool. Does NOT touch the fold stack.
@@ -1133,6 +1168,12 @@ export class ChatDriver extends EventTarget implements AiDriver {
         this.toolHandlers = await this.toolHandlersFactory(promptCtx);
       }
+      // Record everything advertised this turn so the unknown-tool path can tell
+      // a stale tool (real earlier, retired now) from a hallucinated one. Runs
+      // for both the static and factory cases; folds also flow through here as
+      // their inner tools become visible on the iteration after they open.
+      for (const def of this.toolDefinitions) this.everSeenToolNames.add(def.name);
       const resolvedSystemPrompt =
         typeof this.systemPrompt === 'function'
           ? // oxlint-disable-next-line no-await-in-loop
@@ -1287,6 +1328,9 @@ export class ChatDriver extends EventTarget implements AiDriver {
         { toolCallId: string; content: string; subAgentTrace?: ChatMessage[] }
       >();
       const unknownToolIds = new Set<string>();
+      // Subset of unknownToolIds that were stale (previously available) rather
+      // than hallucinated — drives the `stale` UI flag back-patched below.
+      const staleToolIds = new Set<string>();
       let anyRealToolExecuted = false;
       let hitUnknownToolLimit = false;
@@ -1342,23 +1386,80 @@ export class ChatDriver extends EventTarget implements AiDriver {
                 logger.debug(
                   `ChatDriver: model called folded tool "${tc.name}" — guiding to open "${containingFold}"`,
                 );
+                recordMetaEvent(this.sessionKey, 'tool.unresolved', {
+                  tool: tc.name,
+                  agent: this.activeAgentName,
+                  kind: 'folded',
+                  fold: containingFold,
+                });
                 executedById.set(tc.id, {
                   toolCallId: tc.id,
                   content: `"${tc.name}" is not directly available. It is inside the "${containingFold}" fold. Call ${containingFold} first to access it.`,
                 });
                 // Guidance does not count as a real iteration or fold op
                 iterations -= 1;
-              } else {
+                return;
+              }
+              // Not in any registered fold. If the tool was advertised earlier
+              // in this agent's lifetime it's *stale* (a stateful agent moved on,
+              // or an exclusive fold is hiding it) rather than hallucinated — a
+              // distinction worth making, because the model should stop retrying
+              // a retired tool rather than treat the failure as a typo. Stale
+              // calls still count toward the same unknown-tool limit (loop
+              // protection); only the guidance and telemetry differ.
+              if (this.everSeenToolNames.has(tc.name)) {
                 this.consecutiveUnknownToolCalls += 1;
-                logger.warn(
-                  `ChatDriver: no handler registered for tool "${tc.name}" (${this.consecutiveUnknownToolCalls}/${DEFAULT_MAX_UNKNOWN_TOOL_CALLS}). Available tools: ${Object.keys(this.toolHandlers).join(', ') || '(none)'}`,
-                );
-                executedById.set(tc.id, { toolCallId: tc.id, content: `Unknown tool: ${tc.name}` });
+                const hidingFold = this.foldHidingTool(tc.name);
+                let content: string;
+                if (hidingFold) {
+                  content = `"${tc.name}" is not available while the "${hidingFold}" fold is open. Call close_${hidingFold} to return to the previous set of tools, then call ${tc.name}.`;
+                  logger.warn(
+                    `ChatDriver: tool "${tc.name}" is hidden behind open fold "${hidingFold}" (${this.consecutiveUnknownToolCalls}/${DEFAULT_MAX_UNKNOWN_TOOL_CALLS})`,
+                  );
+                } else {
+                  content = `"${tc.name}" was available earlier but is not part of the current step — that step is complete, so do not call it again. Continue with the tools available now: ${Object.keys(this.toolHandlers).join(', ') || '(none)'}.`;
+                  logger.warn(
+                    `ChatDriver: stale tool "${tc.name}" — advertised earlier this activation but retired in the current state (${this.consecutiveUnknownToolCalls}/${DEFAULT_MAX_UNKNOWN_TOOL_CALLS})`,
+                  );
+                }
+                recordMetaEvent(this.sessionKey, 'tool.unresolved', {
+                  tool: tc.name,
+                  agent: this.activeAgentName,
+                  kind: hidingFold ? 'fold-hidden' : 'stale',
+                  fold: hidingFold ?? undefined,
+                  consecutive: this.consecutiveUnknownToolCalls,
+                  max: DEFAULT_MAX_UNKNOWN_TOOL_CALLS,
+                });
+                executedById.set(tc.id, { toolCallId: tc.id, content });
                 unknownToolIds.add(tc.id);
+                staleToolIds.add(tc.id);
                 this.recentUnknownToolNames.add(tc.name);
+                this.recentStaleToolNames.add(tc.name);
                 if (this.consecutiveUnknownToolCalls >= DEFAULT_MAX_UNKNOWN_TOOL_CALLS) {
                   hitUnknownToolLimit = true;
                 }
+                return;
+              }
+              // Never advertised — a hallucinated tool name.
+              this.consecutiveUnknownToolCalls += 1;
+              logger.warn(
+                `ChatDriver: no handler registered for tool "${tc.name}" (${this.consecutiveUnknownToolCalls}/${DEFAULT_MAX_UNKNOWN_TOOL_CALLS}). Available tools: ${Object.keys(this.toolHandlers).join(', ') || '(none)'}`,
+              );
+              recordMetaEvent(this.sessionKey, 'tool.unresolved', {
+                tool: tc.name,
+                agent: this.activeAgentName,
+                kind: 'unknown',
+                consecutive: this.consecutiveUnknownToolCalls,
+                max: DEFAULT_MAX_UNKNOWN_TOOL_CALLS,
+                availableTools: Object.keys(this.toolHandlers),
+              });
+              executedById.set(tc.id, { toolCallId: tc.id, content: `Unknown tool: ${tc.name}` });
+              unknownToolIds.add(tc.id);
+              this.recentUnknownToolNames.add(tc.name);
+              if (this.consecutiveUnknownToolCalls >= DEFAULT_MAX_UNKNOWN_TOOL_CALLS) {
+                hitUnknownToolLimit = true;
               }
               return;
             }
@@ -1396,6 +1497,7 @@ export class ChatDriver extends EventTarget implements AiDriver {
         this.consecutiveFoldOps = 0;
         this.consecutiveUnknownToolCalls = 0;
         this.recentUnknownToolNames.clear();
+        this.recentStaleToolNames.clear();
       }
       // Tag tool calls with fold UI metadata before appending results
@@ -1447,6 +1549,9 @@ export class ChatDriver extends EventTarget implements AiDriver {
             foldPath: !isFoldOpen && !isFoldClose && foldPath.length > 0 ? foldPath : undefined,
             unknown: isUnknown || undefined,
             availableTools: isUnknown ? availableToolNames : undefined,
+            // Distinguish a retired tool from a hallucinated one so the UI can
+            // say "no longer available here" rather than "does not exist".
+            stale: staleToolIds.has(tc.id) || undefined,
             subAgentTrace: executedById.get(tc.id)?.subAgentTrace,
           };
         });
@@ -1470,10 +1575,25 @@ export class ChatDriver extends EventTarget implements AiDriver {
               .map((tc) => tc.name),
           ]),
         ];
+        // Stale tools were real earlier this activation; hallucinated tools
+        // never existed. The hard stop counts both the same way, but the split
+        // tells a triager whether the cause is a state/prompt-design problem
+        // (stale) or a model inventing tool names (hallucinated).
+        const staleTools = [
+          ...new Set([
+            ...this.recentStaleToolNames,
+            ...(response.toolCalls ?? [])
+              .filter((tc) => staleToolIds.has(tc.id))
+              .map((tc) => tc.name),
+          ]),
+        ];
+        const hallucinatedTools = unknownTools.filter((t) => !staleTools.includes(t));
         recordTurnError(this.sessionKey, 'unknown-tool-limit', {
           agent: this.activeAgentName,
           provider: this.lastResolvedProviderName,
           unknownTools,
+          staleTools,
+          hallucinatedTools,
           availableTools: Object.keys(this.toolHandlers),
         });
         this.appendToHistory({