@genesislcap/ai-assistant 14.451.3 → 14.451.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai-assistant.d.ts +27 -1
- package/dist/dts/components/chat-driver/align-event-globals.d.ts +19 -0
- package/dist/dts/components/chat-driver/align-event-globals.d.ts.map +1 -0
- package/dist/dts/components/chat-driver/chat-driver.d.ts +26 -0
- package/dist/dts/components/chat-driver/chat-driver.d.ts.map +1 -1
- package/dist/dts/components/chat-driver/chat-driver.test.d.ts +2 -0
- package/dist/dts/components/chat-driver/chat-driver.test.d.ts.map +1 -0
- package/dist/dts/main/main.template.d.ts.map +1 -1
- package/dist/dts/state/debug-event-log.d.ts +3 -2
- package/dist/dts/state/debug-event-log.d.ts.map +1 -1
- package/dist/esm/components/chat-driver/align-event-globals.js +23 -0
- package/dist/esm/components/chat-driver/chat-driver.js +119 -5
- package/dist/esm/components/chat-driver/chat-driver.test.js +196 -0
- package/dist/esm/main/main.template.js +5 -1
- package/dist/esm/state/debug-event-log.js +3 -2
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +16 -16
- package/src/components/chat-driver/align-event-globals.ts +23 -0
- package/src/components/chat-driver/chat-driver.test.ts +315 -0
- package/src/components/chat-driver/chat-driver.ts +125 -5
- package/src/main/main.template.ts +5 -1
- package/src/state/debug-event-log.ts +6 -3
|
@@ -48,6 +48,7 @@ export const META_EVENT_IMPORTANCE = {
|
|
|
48
48
|
'turn.start': 'normal',
|
|
49
49
|
'turn.end': 'normal',
|
|
50
50
|
'turn.retry': 'normal',
|
|
51
|
+
'tool.unresolved': 'normal',
|
|
51
52
|
'agent.handoff': 'normal',
|
|
52
53
|
'agent.pinned': 'normal',
|
|
53
54
|
'agent.unpinned': 'normal',
|
|
@@ -67,7 +68,7 @@ export const META_EVENT_IMPORTANCE = {
|
|
|
67
68
|
* allowed to float above this cap rather than lose a failure signal; in normal
|
|
68
69
|
* use the frequent `low`/`normal` events keep it near the cap. Entries are cheap.
|
|
69
70
|
*/
|
|
70
|
-
const DEFAULT_MAX_META_EVENTS =
|
|
71
|
+
const DEFAULT_MAX_META_EVENTS = 800;
|
|
71
72
|
const registry = new Map();
|
|
72
73
|
/**
|
|
73
74
|
* Append a meta event to the timeline for `key`. Once the buffer exceeds
|
|
@@ -138,7 +139,7 @@ export const DEBUG_LOG_README = [
|
|
|
138
139
|
"kind:'turn'.`agentSnapshot` — the active agent's own view of its internal state, captured at that turn. An agent opts into this by exposing a `getDebugSnapshot()` that returns JSON-serializable per-state info; stateful/flow agents wire it automatically, so you can watch a flow advance turn-by-turn (e.g. current step, cursor, collected fields, pending changes). Absent for agents that don't expose one.",
|
|
139
140
|
"kind:'event' — a meta/lifecycle event. `type` names it (see below); `detail` carries structured data. `detail.placement` is the emitting UI instance: 'bubble' (collapsed), 'panel' (popped-out), or 'standalone'.",
|
|
140
141
|
"Each 'event' also has an `importance`: 'high' (failures/limits — turn.error, tool.failed, file.read-failed, suggestions.failed, context.threshold-crossed), 'normal' (session flow — connects, turns, retries, handoffs, agent/provider changes, interactions), or 'low' (skippable UI/bookkeeping noise — panel.toggled, attachment.added, driver.wired/unwired, context.updated). To skim, ignore importance:'low'; to triage a failure, filter to importance:'high' then read the nearby messages and turns. A 'high' turn.error is often preceded by one or more 'normal' turn.retry events for the same reason — read them together to see how many attempts were made before bailing. 'message' and 'turn' entries carry no importance — they are the substance, always read them.",
|
|
141
|
-
'Event types: assistant.connected/disconnected (mount + placement + whether the session was created or restored), assistant.popout/popin (window placement), driver.created/wired/unwired (which driver is live and why it stops/starts responding across a popout), state.changed (idle↔loading), turn.start/turn.end (turn boundary; turn.end carries durationMs), turn.retry (a recoverable in-turn retry — detail.reason plus attempt/maxAttempts; for malformed calls also finishMessage), turn.error (a turn failed or hit a guardrail — detail.reason is one of exception/malformed-function-call/empty-response/unknown-tool-limit/max-iterations, plus reason-specific diagnostics: attempts, finishMessage, unknownTools + availableTools, iterations + limit, or name + message for exceptions), tool.failed (a tool threw), agent.handoff (routing; from=null is the initial activation), agent.pinned/unpinned (forced routing), provider.selected (model/provider for the upcoming turns), interaction.requested/resolved (blocking user widgets — explain quiet gaps), context.updated/threshold-crossed (token + cost), panel.toggled, attachment.added, file.read-failed, suggestions.failed.',
|
|
142
|
+
'Event types: assistant.connected/disconnected (mount + placement + whether the session was created or restored), assistant.popout/popin (window placement), driver.created/wired/unwired (which driver is live and why it stops/starts responding across a popout), state.changed (idle↔loading), turn.start/turn.end (turn boundary; turn.end carries durationMs), turn.retry (a recoverable in-turn retry — detail.reason plus attempt/maxAttempts; for malformed calls also finishMessage), turn.error (a turn failed or hit a guardrail — detail.reason is one of exception/malformed-function-call/empty-response/unknown-tool-limit/max-iterations, plus reason-specific diagnostics: attempts, finishMessage, unknownTools (split into staleTools — real earlier this activation but retired by the current state or hidden behind an open exclusive fold — and hallucinatedTools — never advertised) + availableTools, iterations + limit, or name + message for exceptions), tool.failed (a tool threw), tool.unresolved (the model called a tool that could not be dispatched — detail.kind is folded/fold-hidden/stale/unknown, plus tool + agent and, for the counted kinds, the consecutive streak; the recurring lead-up to an unknown-tool-limit turn.error), agent.handoff (routing; from=null is the initial activation), agent.pinned/unpinned (forced routing), provider.selected (model/provider for the upcoming turns), interaction.requested/resolved (blocking user widgets — explain quiet gaps), context.updated/threshold-crossed (token + cost), panel.toggled, attachment.added, file.read-failed, suggestions.failed.',
|
|
142
143
|
"`meta` holds context captured at export time: agentSummary (full agent configs), context (active model, token usage, session cost), activeDebugSnapshot (the active agent's `getDebugSnapshot()` taken fresh at export — reflects state NOW, which may have advanced beyond the last turn's agentSnapshot), debug (optional host-supplied debug state), host, and the export timestamp.",
|
|
143
144
|
'To debug a failure: find the last turn.error or tool.failed, then read upward for the user message, the turn(s), and the agent/provider/state events that led into it.',
|
|
144
145
|
];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"root":["../src/index.ts","../src/channel/ai-activity-bus.ts","../src/channel/ai-activity-channel.ts","../src/components/halo-overlay.ts","../src/components/activity-halo/activity-halo.ts","../src/components/agent-picker/agent-picker.constants.ts","../src/components/agent-picker/agent-picker.styles.ts","../src/components/agent-picker/agent-picker.template.ts","../src/components/agent-picker/agent-picker.ts","../src/components/agent-picker/index.ts","../src/components/ai-driver/ai-driver.ts","../src/components/ai-driver/index.ts","../src/components/chat-bubble/chat-bubble.styles.ts","../src/components/chat-bubble/chat-bubble.template.ts","../src/components/chat-bubble/chat-bubble.ts","../src/components/chat-bubble/index.ts","../src/components/chat-driver/chat-driver.ts","../src/components/chat-driver/index.ts","../src/components/chat-interaction-wrapper/chat-interaction-wrapper.styles.ts","../src/components/chat-interaction-wrapper/chat-interaction-wrapper.template.ts","../src/components/chat-interaction-wrapper/chat-interaction-wrapper.test.ts","../src/components/chat-interaction-wrapper/chat-interaction-wrapper.ts","../src/components/chat-interaction-wrapper/index.ts","../src/components/chat-markdown/chat-markdown.ts","../src/components/chat-markdown/index.ts","../src/components/orchestrating-driver/index.ts","../src/components/orchestrating-driver/orchestrating-driver.ts","../src/components/popout-manager/index.ts","../src/components/popout-manager/popout-manager.ts","../src/config/config.ts","../src/config/define-stateful-agent.ts","../src/config/fallback-agents.ts","../src/config/index.ts","../src/config/validate-providers.test.ts","../src/config/validate-providers.ts","../src/main/index.ts","../src/main/main.styles.ts","../src/main/main.template.ts","../src/main/main.ts","../src/main/main.types.ts","../src/state/ai-assistant-slice.ts","../src/state/debug-event-log.ts","../src/state/driver-registry.ts","../src/state/session-store.ts","../src/styles/ai-colours.ts","../src/styles/index.ts","../src/styles/styles.ts","../src/suggestions/chat-suggestions.ts","../src/tags/index.ts","../src/types/ai-chat-widget.ts","../src/utils/animated-panel-toggle.ts","../src/utils/history-transform.ts","../src/utils/index.ts","../src/utils/logger.ts","../src/utils/message-partition.test.ts","../src/utils/message-partition.ts","../src/utils/sum-costs.test.ts","../src/utils/sum-costs.ts","../src/utils/tool-fold.ts"],"version":"5.9.2"}
|
|
1
|
+
{"root":["../src/index.ts","../src/channel/ai-activity-bus.ts","../src/channel/ai-activity-channel.ts","../src/components/halo-overlay.ts","../src/components/activity-halo/activity-halo.ts","../src/components/agent-picker/agent-picker.constants.ts","../src/components/agent-picker/agent-picker.styles.ts","../src/components/agent-picker/agent-picker.template.ts","../src/components/agent-picker/agent-picker.ts","../src/components/agent-picker/index.ts","../src/components/ai-driver/ai-driver.ts","../src/components/ai-driver/index.ts","../src/components/chat-bubble/chat-bubble.styles.ts","../src/components/chat-bubble/chat-bubble.template.ts","../src/components/chat-bubble/chat-bubble.ts","../src/components/chat-bubble/index.ts","../src/components/chat-driver/align-event-globals.ts","../src/components/chat-driver/chat-driver.test.ts","../src/components/chat-driver/chat-driver.ts","../src/components/chat-driver/index.ts","../src/components/chat-interaction-wrapper/chat-interaction-wrapper.styles.ts","../src/components/chat-interaction-wrapper/chat-interaction-wrapper.template.ts","../src/components/chat-interaction-wrapper/chat-interaction-wrapper.test.ts","../src/components/chat-interaction-wrapper/chat-interaction-wrapper.ts","../src/components/chat-interaction-wrapper/index.ts","../src/components/chat-markdown/chat-markdown.ts","../src/components/chat-markdown/index.ts","../src/components/orchestrating-driver/index.ts","../src/components/orchestrating-driver/orchestrating-driver.ts","../src/components/popout-manager/index.ts","../src/components/popout-manager/popout-manager.ts","../src/config/config.ts","../src/config/define-stateful-agent.ts","../src/config/fallback-agents.ts","../src/config/index.ts","../src/config/validate-providers.test.ts","../src/config/validate-providers.ts","../src/main/index.ts","../src/main/main.styles.ts","../src/main/main.template.ts","../src/main/main.ts","../src/main/main.types.ts","../src/state/ai-assistant-slice.ts","../src/state/debug-event-log.ts","../src/state/driver-registry.ts","../src/state/session-store.ts","../src/styles/ai-colours.ts","../src/styles/index.ts","../src/styles/styles.ts","../src/suggestions/chat-suggestions.ts","../src/tags/index.ts","../src/types/ai-chat-widget.ts","../src/utils/animated-panel-toggle.ts","../src/utils/history-transform.ts","../src/utils/index.ts","../src/utils/logger.ts","../src/utils/message-partition.test.ts","../src/utils/message-partition.ts","../src/utils/sum-costs.test.ts","../src/utils/sum-costs.ts","../src/utils/tool-fold.ts"],"version":"5.9.2"}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@genesislcap/ai-assistant",
|
|
3
3
|
"description": "Genesis AI Assistant micro-frontend",
|
|
4
|
-
"version": "14.451.
|
|
4
|
+
"version": "14.451.4",
|
|
5
5
|
"license": "SEE LICENSE IN license.txt",
|
|
6
6
|
"main": "dist/esm/index.js",
|
|
7
7
|
"types": "dist/ai-assistant.d.ts",
|
|
@@ -64,24 +64,24 @@
|
|
|
64
64
|
}
|
|
65
65
|
},
|
|
66
66
|
"devDependencies": {
|
|
67
|
-
"@genesislcap/foundation-testing": "14.451.
|
|
68
|
-
"@genesislcap/genx": "14.451.
|
|
69
|
-
"@genesislcap/rollup-builder": "14.451.
|
|
70
|
-
"@genesislcap/ts-builder": "14.451.
|
|
71
|
-
"@genesislcap/uvu-playwright-builder": "14.451.
|
|
72
|
-
"@genesislcap/vite-builder": "14.451.
|
|
73
|
-
"@genesislcap/webpack-builder": "14.451.
|
|
67
|
+
"@genesislcap/foundation-testing": "14.451.4",
|
|
68
|
+
"@genesislcap/genx": "14.451.4",
|
|
69
|
+
"@genesislcap/rollup-builder": "14.451.4",
|
|
70
|
+
"@genesislcap/ts-builder": "14.451.4",
|
|
71
|
+
"@genesislcap/uvu-playwright-builder": "14.451.4",
|
|
72
|
+
"@genesislcap/vite-builder": "14.451.4",
|
|
73
|
+
"@genesislcap/webpack-builder": "14.451.4",
|
|
74
74
|
"@types/dompurify": "^3.0.5",
|
|
75
75
|
"@types/marked": "^5.0.2"
|
|
76
76
|
},
|
|
77
77
|
"dependencies": {
|
|
78
|
-
"@genesislcap/foundation-ai": "14.451.
|
|
79
|
-
"@genesislcap/foundation-logger": "14.451.
|
|
80
|
-
"@genesislcap/foundation-redux": "14.451.
|
|
81
|
-
"@genesislcap/foundation-ui": "14.451.
|
|
82
|
-
"@genesislcap/foundation-utils": "14.451.
|
|
83
|
-
"@genesislcap/rapid-design-system": "14.451.
|
|
84
|
-
"@genesislcap/web-core": "14.451.
|
|
78
|
+
"@genesislcap/foundation-ai": "14.451.4",
|
|
79
|
+
"@genesislcap/foundation-logger": "14.451.4",
|
|
80
|
+
"@genesislcap/foundation-redux": "14.451.4",
|
|
81
|
+
"@genesislcap/foundation-ui": "14.451.4",
|
|
82
|
+
"@genesislcap/foundation-utils": "14.451.4",
|
|
83
|
+
"@genesislcap/rapid-design-system": "14.451.4",
|
|
84
|
+
"@genesislcap/web-core": "14.451.4",
|
|
85
85
|
"dompurify": "^3.3.1",
|
|
86
86
|
"marked": "^17.0.3"
|
|
87
87
|
},
|
|
@@ -93,5 +93,5 @@
|
|
|
93
93
|
"publishConfig": {
|
|
94
94
|
"access": "public"
|
|
95
95
|
},
|
|
96
|
-
"gitHead": "
|
|
96
|
+
"gitHead": "265d5fa00ae476a7713d78707f53f393f5eeb647"
|
|
97
97
|
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Test-only side effect: align the global `EventTarget` with jsdom's before any
|
|
3
|
+
* module that `extends EventTarget` is evaluated.
|
|
4
|
+
*
|
|
5
|
+
* The node test runner's jsdom setup installs `globalThis.CustomEvent` from
|
|
6
|
+
* jsdom but leaves `globalThis.EventTarget` as Node's native class. A class that
|
|
7
|
+
* `extends EventTarget` (e.g. {@link ChatDriver}) then inherits Node's native
|
|
8
|
+
* `dispatchEvent`, which rejects the jsdom `CustomEvent` instances it is handed
|
|
9
|
+
* ("The 'event' argument must be an instance of Event. Received an instance of
|
|
10
|
+
* CustomEvent"). Pointing `EventTarget` at jsdom's keeps the whole event family
|
|
11
|
+
* in one realm.
|
|
12
|
+
*
|
|
13
|
+
* No-op in a real browser, where `window.EventTarget === globalThis.EventTarget`
|
|
14
|
+
* already. Import this BEFORE importing anything that subclasses `EventTarget`.
|
|
15
|
+
*/
|
|
16
|
+
const jsdomWindow = (globalThis as { window?: { EventTarget?: typeof EventTarget } }).window;
|
|
17
|
+
if (jsdomWindow?.EventTarget && globalThis.EventTarget !== jsdomWindow.EventTarget) {
|
|
18
|
+
Object.defineProperty(globalThis, 'EventTarget', {
|
|
19
|
+
value: jsdomWindow.EventTarget,
|
|
20
|
+
configurable: true,
|
|
21
|
+
writable: true,
|
|
22
|
+
});
|
|
23
|
+
}
|
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
AIProvider,
|
|
3
|
+
AIProviderRegistry,
|
|
4
|
+
ChatMessage,
|
|
5
|
+
ChatRequestOptions,
|
|
6
|
+
ChatToolCall,
|
|
7
|
+
ChatToolDefinition,
|
|
8
|
+
} from '@genesislcap/foundation-ai';
|
|
9
|
+
import { isChatToolCallUnknown } from '@genesislcap/foundation-ai';
|
|
10
|
+
import { assert, createLogicSuite } from '@genesislcap/foundation-testing';
|
|
11
|
+
import { agenticActivityBus } from '../../channel/ai-activity-bus';
|
|
12
|
+
import type { AgentConfig } from '../../config/config';
|
|
13
|
+
import { clearMetaEventRegistry, getMetaEvents } from '../../state/debug-event-log';
|
|
14
|
+
import { createToolFold } from '../../utils/tool-fold';
|
|
15
|
+
// Side-effect import — MUST come before `./chat-driver` so the driver subclasses
|
|
16
|
+
// jsdom's EventTarget rather than Node's native one (see the file). None of the
|
|
17
|
+
// imports above pull in the driver, so its realm is still set before evaluation.
|
|
18
|
+
import './align-event-globals';
|
|
19
|
+
import { ChatDriver } from './chat-driver';
|
|
20
|
+
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
// Test harness
|
|
23
|
+
//
|
|
24
|
+
// The driver calls `provider.chat(history, userMessage, options)` once per
|
|
25
|
+
// tool-loop iteration and inspects the returned ChatMessage: a message with
|
|
26
|
+
// `toolCalls` keeps the loop running; one without ends the turn. So a fake
|
|
27
|
+
// provider that replays a scripted sequence of ChatMessages is enough to drive
|
|
28
|
+
// any tool-loop path. We capture the advertised tool names per call so tests
|
|
29
|
+
// can assert per-state narrowing actually happened.
|
|
30
|
+
// ---------------------------------------------------------------------------
|
|
31
|
+
|
|
32
|
+
interface ScriptedProvider extends AIProvider {
|
|
33
|
+
/** Tool names advertised to the model on each `chat()` call, in order. */
|
|
34
|
+
advertisedPerCall: string[][];
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const scriptedProvider = (responses: ChatMessage[]): ScriptedProvider => {
|
|
38
|
+
const queue = [...responses];
|
|
39
|
+
const advertisedPerCall: string[][] = [];
|
|
40
|
+
return {
|
|
41
|
+
advertisedPerCall,
|
|
42
|
+
chat: async (
|
|
43
|
+
_history: ChatMessage[],
|
|
44
|
+
_userMessage: string,
|
|
45
|
+
options?: ChatRequestOptions,
|
|
46
|
+
): Promise<ChatMessage> => {
|
|
47
|
+
advertisedPerCall.push((options?.tools ?? []).map((t) => t.name));
|
|
48
|
+
// Once the script is exhausted, end the turn with a plain text reply.
|
|
49
|
+
return queue.shift() ?? { role: 'assistant', content: 'done' };
|
|
50
|
+
},
|
|
51
|
+
};
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
const makeRegistry = (provider: AIProvider): AIProviderRegistry => ({
|
|
55
|
+
get: () => provider,
|
|
56
|
+
default: () => provider,
|
|
57
|
+
defaultName: () => 'test',
|
|
58
|
+
names: () => ['test'],
|
|
59
|
+
getStatus: async () => null,
|
|
60
|
+
listStatuses: async () => [],
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
const def = (name: string): ChatToolDefinition => ({
|
|
64
|
+
name,
|
|
65
|
+
description: `${name} tool`,
|
|
66
|
+
parameters: { type: 'object', properties: {} },
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
/** An assistant turn that calls a single tool. `content` is empty so the driver
|
|
70
|
+
* does not treat it as a thinking step (which would split it into two messages). */
|
|
71
|
+
const callsTool = (name: string, id: string): ChatMessage => ({
|
|
72
|
+
role: 'assistant',
|
|
73
|
+
content: '',
|
|
74
|
+
toolCalls: [{ id, name, args: {} }],
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
const agent = (overrides: Partial<AgentConfig> & { name: string }): AgentConfig =>
|
|
78
|
+
({ description: 'test agent', ...overrides }) as AgentConfig;
|
|
79
|
+
|
|
80
|
+
const makeDriver = (config: AgentConfig, provider: AIProvider, sessionKey = ''): ChatDriver => {
|
|
81
|
+
const driver = new ChatDriver(
|
|
82
|
+
makeRegistry(provider),
|
|
83
|
+
{},
|
|
84
|
+
[],
|
|
85
|
+
undefined,
|
|
86
|
+
undefined,
|
|
87
|
+
50,
|
|
88
|
+
5,
|
|
89
|
+
undefined,
|
|
90
|
+
sessionKey,
|
|
91
|
+
);
|
|
92
|
+
driver.applyAgent(config);
|
|
93
|
+
return driver;
|
|
94
|
+
};
|
|
95
|
+
|
|
96
|
+
/** All tool calls across the whole conversation, flattened. */
|
|
97
|
+
const allToolCalls = (driver: ChatDriver): ChatToolCall[] =>
|
|
98
|
+
driver.getHistory().flatMap((m) => m.toolCalls ?? []);
|
|
99
|
+
|
|
100
|
+
/** Tool-result message contents, in order. */
|
|
101
|
+
const toolResultContents = (driver: ChatDriver): string[] =>
|
|
102
|
+
driver
|
|
103
|
+
.getHistory()
|
|
104
|
+
.filter((m) => m.role === 'tool' && m.toolResult)
|
|
105
|
+
.map((m) => m.toolResult!.content);
|
|
106
|
+
|
|
107
|
+
/** `tool.unresolved` meta-event details recorded for a session (download-log surface). */
|
|
108
|
+
const unresolvedEvents = (sessionKey: string): Array<Record<string, unknown>> =>
|
|
109
|
+
getMetaEvents(sessionKey)
|
|
110
|
+
.filter((e) => e.type === 'tool.unresolved')
|
|
111
|
+
.map((e) => e.detail ?? {});
|
|
112
|
+
|
|
113
|
+
// ---------------------------------------------------------------------------
|
|
114
|
+
// stale tool detection — stateful agent advances past a tool's state
|
|
115
|
+
// ---------------------------------------------------------------------------
|
|
116
|
+
|
|
117
|
+
const stale = createLogicSuite('ChatDriver stale-tool detection');
|
|
118
|
+
|
|
119
|
+
// The driver imports the `agenticActivityBus` singleton, which opens a
|
|
120
|
+
// BroadcastChannel at module load. An open channel keeps the test page alive
|
|
121
|
+
// and hangs the runner, so close it once the suite finishes.
|
|
122
|
+
stale.after(() => {
|
|
123
|
+
agenticActivityBus.close();
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
stale('guides the model when it calls a tool that an earlier state exposed', async () => {
|
|
127
|
+
// State A exposes tool_a; calling it advances to state B, which exposes only
|
|
128
|
+
// tool_b. A factory-form agent narrows the tool set per turn, mirroring how
|
|
129
|
+
// `defineStatefulAgent` works.
|
|
130
|
+
let state: 'A' | 'B' = 'A';
|
|
131
|
+
const config = agent({
|
|
132
|
+
name: 'Stateful',
|
|
133
|
+
toolDefinitions: () => (state === 'A' ? [def('tool_a')] : [def('tool_b')]),
|
|
134
|
+
toolHandlers: () =>
|
|
135
|
+
state === 'A'
|
|
136
|
+
? {
|
|
137
|
+
tool_a: async () => {
|
|
138
|
+
state = 'B';
|
|
139
|
+
return 'advanced to B';
|
|
140
|
+
},
|
|
141
|
+
}
|
|
142
|
+
: { tool_b: async () => 'b done' },
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
const provider = scriptedProvider([
|
|
146
|
+
callsTool('tool_a', 't1'), // real — advances A -> B
|
|
147
|
+
callsTool('tool_a', 't2'), // stale — tool_a no longer in state B
|
|
148
|
+
callsTool('tool_b', 't3'), // real — valid in state B
|
|
149
|
+
]);
|
|
150
|
+
const sessionKey = 'stale-meta-test';
|
|
151
|
+
const driver = makeDriver(config, provider, sessionKey);
|
|
152
|
+
|
|
153
|
+
const result = await driver.sendMessage('go');
|
|
154
|
+
assert.is(result.reason, 'done');
|
|
155
|
+
|
|
156
|
+
// The per-state narrowing actually happened: tool_a advertised first, tool_b later.
|
|
157
|
+
assert.equal(provider.advertisedPerCall[0], ['tool_a']);
|
|
158
|
+
assert.ok(
|
|
159
|
+
provider.advertisedPerCall.some(
|
|
160
|
+
(tools) => tools.includes('tool_b') && !tools.includes('tool_a'),
|
|
161
|
+
),
|
|
162
|
+
'a later turn should advertise tool_b without tool_a',
|
|
163
|
+
);
|
|
164
|
+
|
|
165
|
+
// The retried tool_a got stale guidance — not "Unknown tool".
|
|
166
|
+
const staleGuidance = toolResultContents(driver).find((c) =>
|
|
167
|
+
c.includes('was available earlier but is not part of the current step'),
|
|
168
|
+
);
|
|
169
|
+
assert.ok(staleGuidance, 'a previously-available tool should receive stale guidance');
|
|
170
|
+
assert.not.ok(
|
|
171
|
+
toolResultContents(driver).some((c) => c.startsWith('Unknown tool:')),
|
|
172
|
+
'a previously-available tool must not be reported as a hallucination',
|
|
173
|
+
);
|
|
174
|
+
|
|
175
|
+
// The retried call is flagged unknown + stale for the UI.
|
|
176
|
+
const retried = allToolCalls(driver).filter(
|
|
177
|
+
(tc) => tc.name === 'tool_a' && isChatToolCallUnknown(tc),
|
|
178
|
+
);
|
|
179
|
+
assert.is(retried.length, 1, 'exactly one tool_a call should be flagged unknown');
|
|
180
|
+
assert.ok(isChatToolCallUnknown(retried[0]) && retried[0].stale === true, 'and marked stale');
|
|
181
|
+
|
|
182
|
+
// The occurrence is recorded to the meta-event log for the download log.
|
|
183
|
+
assert.ok(
|
|
184
|
+
unresolvedEvents(sessionKey).some((d) => d.kind === 'stale' && d.tool === 'tool_a'),
|
|
185
|
+
'a stale tool.unresolved meta event should be recorded',
|
|
186
|
+
);
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
stale('reports a never-seen tool as a hallucinated unknown tool', async () => {
|
|
190
|
+
const config = agent({
|
|
191
|
+
name: 'Static',
|
|
192
|
+
toolDefinitions: [def('real_tool')],
|
|
193
|
+
toolHandlers: { real_tool: async () => 'ok' },
|
|
194
|
+
});
|
|
195
|
+
const provider = scriptedProvider([callsTool('made_up', 'm1')]);
|
|
196
|
+
const sessionKey = 'hallucination-meta-test';
|
|
197
|
+
const driver = makeDriver(config, provider, sessionKey);
|
|
198
|
+
|
|
199
|
+
await driver.sendMessage('go');
|
|
200
|
+
|
|
201
|
+
assert.ok(
|
|
202
|
+
toolResultContents(driver).includes('Unknown tool: made_up'),
|
|
203
|
+
'a tool never advertised should be reported as unknown',
|
|
204
|
+
);
|
|
205
|
+
const call = allToolCalls(driver).find((tc) => tc.name === 'made_up');
|
|
206
|
+
assert.ok(call && isChatToolCallUnknown(call), 'the call should be flagged unknown');
|
|
207
|
+
assert.not.ok(
|
|
208
|
+
(call as { stale?: boolean }).stale,
|
|
209
|
+
'a hallucinated tool must NOT be flagged stale',
|
|
210
|
+
);
|
|
211
|
+
|
|
212
|
+
assert.ok(
|
|
213
|
+
unresolvedEvents(sessionKey).some((d) => d.kind === 'unknown' && d.tool === 'made_up'),
|
|
214
|
+
'an unknown tool.unresolved meta event should be recorded',
|
|
215
|
+
);
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
stale('points the model at the close tool when an exclusive fold hides a base tool', async () => {
|
|
219
|
+
const fold = createToolFold({
|
|
220
|
+
name: 'my_fold',
|
|
221
|
+
tools: [def('inner_tool')],
|
|
222
|
+
handlers: { inner_tool: async () => 'inner done' },
|
|
223
|
+
// exclusive defaults to true — opening it removes base_tool from the set.
|
|
224
|
+
});
|
|
225
|
+
const config = agent({
|
|
226
|
+
name: 'Folded',
|
|
227
|
+
toolDefinitions: [def('base_tool'), fold.definition],
|
|
228
|
+
toolHandlers: { base_tool: async () => 'base done', ...fold.handler },
|
|
229
|
+
});
|
|
230
|
+
|
|
231
|
+
const provider = scriptedProvider([
|
|
232
|
+
callsTool('my_fold', 'f1'), // open the exclusive fold — base_tool now hidden
|
|
233
|
+
callsTool('base_tool', 'b1'), // hidden behind the open fold
|
|
234
|
+
]);
|
|
235
|
+
const sessionKey = 'fold-meta-test';
|
|
236
|
+
const driver = makeDriver(config, provider, sessionKey);
|
|
237
|
+
|
|
238
|
+
await driver.sendMessage('go');
|
|
239
|
+
|
|
240
|
+
// Target the base_tool result specifically — the fold-open result also
|
|
241
|
+
// mentions my_fold, so match on the tool call id rather than substring.
|
|
242
|
+
const guidance = driver
|
|
243
|
+
.getHistory()
|
|
244
|
+
.find((m) => m.role === 'tool' && m.toolResult?.toolCallId === 'b1')?.toolResult?.content;
|
|
245
|
+
assert.ok(guidance, 'calling a fold-hidden tool should produce guidance');
|
|
246
|
+
assert.match(guidance!, /not available while the "my_fold" fold is open/);
|
|
247
|
+
assert.match(guidance!, /close_my_fold/);
|
|
248
|
+
|
|
249
|
+
const hidden = allToolCalls(driver).find(
|
|
250
|
+
(tc) => tc.name === 'base_tool' && isChatToolCallUnknown(tc),
|
|
251
|
+
);
|
|
252
|
+
assert.ok(
|
|
253
|
+
hidden && isChatToolCallUnknown(hidden) && hidden.stale === true,
|
|
254
|
+
'the hidden call is stale',
|
|
255
|
+
);
|
|
256
|
+
|
|
257
|
+
assert.ok(
|
|
258
|
+
unresolvedEvents(sessionKey).some(
|
|
259
|
+
(d) => d.kind === 'fold-hidden' && d.tool === 'base_tool' && d.fold === 'my_fold',
|
|
260
|
+
),
|
|
261
|
+
'a fold-hidden tool.unresolved meta event should be recorded',
|
|
262
|
+
);
|
|
263
|
+
});
|
|
264
|
+
|
|
265
|
+
stale('splits stale vs hallucinated tools on the unknown-tool-limit error', async () => {
|
|
266
|
+
const sessionKey = 'stale-limit-test';
|
|
267
|
+
clearMetaEventRegistry();
|
|
268
|
+
|
|
269
|
+
let state: 'A' | 'B' = 'A';
|
|
270
|
+
const config = agent({
|
|
271
|
+
name: 'Stateful',
|
|
272
|
+
toolDefinitions: () => (state === 'A' ? [def('tool_a')] : [def('tool_b')]),
|
|
273
|
+
toolHandlers: () =>
|
|
274
|
+
state === 'A'
|
|
275
|
+
? {
|
|
276
|
+
tool_a: async () => {
|
|
277
|
+
state = 'B';
|
|
278
|
+
return 'advanced to B';
|
|
279
|
+
},
|
|
280
|
+
}
|
|
281
|
+
: { tool_b: async () => 'b done' },
|
|
282
|
+
});
|
|
283
|
+
|
|
284
|
+
// One real call to advance to B, then 5 consecutive stale calls — the 5th
|
|
285
|
+
// trips DEFAULT_MAX_UNKNOWN_TOOL_CALLS and ends the turn.
|
|
286
|
+
const provider = scriptedProvider([
|
|
287
|
+
callsTool('tool_a', 'real'),
|
|
288
|
+
...Array.from({ length: 5 }, (_unused, i) => callsTool('tool_a', `stale-${i}`)),
|
|
289
|
+
]);
|
|
290
|
+
const driver = makeDriver(config, provider, sessionKey);
|
|
291
|
+
|
|
292
|
+
const result = await driver.sendMessage('go');
|
|
293
|
+
assert.is(result.reason, 'done');
|
|
294
|
+
|
|
295
|
+
const limitError = getMetaEvents(sessionKey).find(
|
|
296
|
+
(e) => e.type === 'turn.error' && e.detail?.reason === 'unknown-tool-limit',
|
|
297
|
+
);
|
|
298
|
+
assert.ok(limitError, 'hitting the limit should record an unknown-tool-limit turn.error');
|
|
299
|
+
const detail = limitError!.detail!;
|
|
300
|
+
assert.equal(detail.staleTools, ['tool_a'], 'tool_a should be classified as stale');
|
|
301
|
+
assert.equal(detail.hallucinatedTools, [], 'nothing was hallucinated');
|
|
302
|
+
|
|
303
|
+
// Every stale attempt — not just the final limit error — is in the download log.
|
|
304
|
+
assert.is(
|
|
305
|
+
unresolvedEvents(sessionKey).filter((d) => d.kind === 'stale').length,
|
|
306
|
+
5,
|
|
307
|
+
'each stale attempt should be recorded as its own tool.unresolved event',
|
|
308
|
+
);
|
|
309
|
+
|
|
310
|
+
// The user-facing turn ends with the apology, not a crash.
|
|
311
|
+
const last = driver.getHistory().at(-1);
|
|
312
|
+
assert.ok(last?.role === 'assistant' && last.content.startsWith("I'm sorry"));
|
|
313
|
+
});
|
|
314
|
+
|
|
315
|
+
stale.run();
|
|
@@ -192,6 +192,21 @@ export class ChatDriver extends EventTarget implements AiDriver {
|
|
|
192
192
|
* hallucinated. Reset alongside `consecutiveUnknownToolCalls`.
|
|
193
193
|
*/
|
|
194
194
|
private readonly recentUnknownToolNames = new Set<string>();
|
|
195
|
+
/**
|
|
196
|
+
* Union of every tool name advertised at any point during the current agent
|
|
197
|
+
* activation. Lets the unknown-tool path tell a *stale* call (a real tool from
|
|
198
|
+
* an earlier state, now retired — or one an open exclusive fold is hiding)
|
|
199
|
+
* apart from a *hallucinated* one. Reset on agent swap in `applyAgent`.
|
|
200
|
+
*/
|
|
201
|
+
private readonly everSeenToolNames = new Set<string>();
|
|
202
|
+
/**
|
|
203
|
+
* Subset of the current unknown-tool streak that was stale (previously
|
|
204
|
+
* available) rather than hallucinated — surfaced separately on the
|
|
205
|
+
* `unknown-tool-limit` turn.error so triage can tell a state/prompt-design
|
|
206
|
+
* problem from a model that's inventing tools. Reset alongside
|
|
207
|
+
* `recentUnknownToolNames`.
|
|
208
|
+
*/
|
|
209
|
+
private readonly recentStaleToolNames = new Set<string>();
|
|
195
210
|
private readonly maxFoldOperations: number;
|
|
196
211
|
|
|
197
212
|
/** Sub-agents declared on the active agent config, keyed by name. */
|
|
@@ -320,6 +335,10 @@ export class ChatDriver extends EventTarget implements AiDriver {
|
|
|
320
335
|
// Reset fold state when agent changes — each specialist starts fresh
|
|
321
336
|
this.foldStack = [];
|
|
322
337
|
this.consecutiveFoldOps = 0;
|
|
338
|
+
// Forget the previous agent's tools — "previously available" is scoped to
|
|
339
|
+
// the current activation, so a stateful agent accumulates its tools across
|
|
340
|
+
// states while a swap to a different specialist starts clean.
|
|
341
|
+
this.everSeenToolNames.clear();
|
|
323
342
|
}
|
|
324
343
|
|
|
325
344
|
/**
|
|
@@ -954,6 +973,22 @@ export class ChatDriver extends EventTarget implements AiDriver {
|
|
|
954
973
|
return null;
|
|
955
974
|
}
|
|
956
975
|
|
|
976
|
+
/**
|
|
977
|
+
* If an open fold is hiding a previously-available tool, return the name of
|
|
978
|
+
* the fold to close to start getting it back. Only exclusive folds hide tools
|
|
979
|
+
* (they replace the tool set on open rather than extending it), so a base tool
|
|
980
|
+
* that was visible before the fold opened now sits in a fold-stack frame's
|
|
981
|
+
* `previousHandlers` but not in the live handler map. Only the top fold's
|
|
982
|
+
* `close_` tool is active, so that's always the actionable next step — even
|
|
983
|
+
* when the tool lives further down the stack, closing repeatedly walks back to
|
|
984
|
+
* it. Returns null when no open fold accounts for the tool.
|
|
985
|
+
*/
|
|
986
|
+
private foldHidingTool(toolName: string): string | null {
|
|
987
|
+
if (this.foldStack.length === 0) return null;
|
|
988
|
+
const hidden = this.foldStack.some((f) => f.previousHandlers[toolName]);
|
|
989
|
+
return hidden ? this.foldStack[this.foldStack.length - 1].foldName : null;
|
|
990
|
+
}
|
|
991
|
+
|
|
957
992
|
/**
|
|
958
993
|
* Install the fold's inner tool set, replacing (exclusive) or extending (non-exclusive)
|
|
959
994
|
* the current tool set. Also injects the close tool. Does NOT touch the fold stack.
|
|
@@ -1133,6 +1168,12 @@ export class ChatDriver extends EventTarget implements AiDriver {
|
|
|
1133
1168
|
this.toolHandlers = await this.toolHandlersFactory(promptCtx);
|
|
1134
1169
|
}
|
|
1135
1170
|
|
|
1171
|
+
// Record everything advertised this turn so the unknown-tool path can tell
|
|
1172
|
+
// a stale tool (real earlier, retired now) from a hallucinated one. Runs
|
|
1173
|
+
// for both the static and factory cases; folds also flow through here as
|
|
1174
|
+
// their inner tools become visible on the iteration after they open.
|
|
1175
|
+
for (const def of this.toolDefinitions) this.everSeenToolNames.add(def.name);
|
|
1176
|
+
|
|
1136
1177
|
const resolvedSystemPrompt =
|
|
1137
1178
|
typeof this.systemPrompt === 'function'
|
|
1138
1179
|
? // oxlint-disable-next-line no-await-in-loop
|
|
@@ -1287,6 +1328,9 @@ export class ChatDriver extends EventTarget implements AiDriver {
|
|
|
1287
1328
|
{ toolCallId: string; content: string; subAgentTrace?: ChatMessage[] }
|
|
1288
1329
|
>();
|
|
1289
1330
|
const unknownToolIds = new Set<string>();
|
|
1331
|
+
// Subset of unknownToolIds that were stale (previously available) rather
|
|
1332
|
+
// than hallucinated — drives the `stale` UI flag back-patched below.
|
|
1333
|
+
const staleToolIds = new Set<string>();
|
|
1290
1334
|
let anyRealToolExecuted = false;
|
|
1291
1335
|
let hitUnknownToolLimit = false;
|
|
1292
1336
|
|
|
@@ -1342,23 +1386,80 @@ export class ChatDriver extends EventTarget implements AiDriver {
|
|
|
1342
1386
|
logger.debug(
|
|
1343
1387
|
`ChatDriver: model called folded tool "${tc.name}" — guiding to open "${containingFold}"`,
|
|
1344
1388
|
);
|
|
1389
|
+
recordMetaEvent(this.sessionKey, 'tool.unresolved', {
|
|
1390
|
+
tool: tc.name,
|
|
1391
|
+
agent: this.activeAgentName,
|
|
1392
|
+
kind: 'folded',
|
|
1393
|
+
fold: containingFold,
|
|
1394
|
+
});
|
|
1345
1395
|
executedById.set(tc.id, {
|
|
1346
1396
|
toolCallId: tc.id,
|
|
1347
1397
|
content: `"${tc.name}" is not directly available. It is inside the "${containingFold}" fold. Call ${containingFold} first to access it.`,
|
|
1348
1398
|
});
|
|
1349
1399
|
// Guidance does not count as a real iteration or fold op
|
|
1350
1400
|
iterations -= 1;
|
|
1351
|
-
|
|
1401
|
+
return;
|
|
1402
|
+
}
|
|
1403
|
+
|
|
1404
|
+
// Not in any registered fold. If the tool was advertised earlier
|
|
1405
|
+
// in this agent's lifetime it's *stale* (a stateful agent moved on,
|
|
1406
|
+
// or an exclusive fold is hiding it) rather than hallucinated — a
|
|
1407
|
+
// distinction worth making, because the model should stop retrying
|
|
1408
|
+
// a retired tool rather than treat the failure as a typo. Stale
|
|
1409
|
+
// calls still count toward the same unknown-tool limit (loop
|
|
1410
|
+
// protection); only the guidance and telemetry differ.
|
|
1411
|
+
if (this.everSeenToolNames.has(tc.name)) {
|
|
1352
1412
|
this.consecutiveUnknownToolCalls += 1;
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
)
|
|
1356
|
-
|
|
1413
|
+
const hidingFold = this.foldHidingTool(tc.name);
|
|
1414
|
+
let content: string;
|
|
1415
|
+
if (hidingFold) {
|
|
1416
|
+
content = `"${tc.name}" is not available while the "${hidingFold}" fold is open. Call close_${hidingFold} to return to the previous set of tools, then call ${tc.name}.`;
|
|
1417
|
+
logger.warn(
|
|
1418
|
+
`ChatDriver: tool "${tc.name}" is hidden behind open fold "${hidingFold}" (${this.consecutiveUnknownToolCalls}/${DEFAULT_MAX_UNKNOWN_TOOL_CALLS})`,
|
|
1419
|
+
);
|
|
1420
|
+
} else {
|
|
1421
|
+
content = `"${tc.name}" was available earlier but is not part of the current step — that step is complete, so do not call it again. Continue with the tools available now: ${Object.keys(this.toolHandlers).join(', ') || '(none)'}.`;
|
|
1422
|
+
logger.warn(
|
|
1423
|
+
`ChatDriver: stale tool "${tc.name}" — advertised earlier this activation but retired in the current state (${this.consecutiveUnknownToolCalls}/${DEFAULT_MAX_UNKNOWN_TOOL_CALLS})`,
|
|
1424
|
+
);
|
|
1425
|
+
}
|
|
1426
|
+
recordMetaEvent(this.sessionKey, 'tool.unresolved', {
|
|
1427
|
+
tool: tc.name,
|
|
1428
|
+
agent: this.activeAgentName,
|
|
1429
|
+
kind: hidingFold ? 'fold-hidden' : 'stale',
|
|
1430
|
+
fold: hidingFold ?? undefined,
|
|
1431
|
+
consecutive: this.consecutiveUnknownToolCalls,
|
|
1432
|
+
max: DEFAULT_MAX_UNKNOWN_TOOL_CALLS,
|
|
1433
|
+
});
|
|
1434
|
+
executedById.set(tc.id, { toolCallId: tc.id, content });
|
|
1357
1435
|
unknownToolIds.add(tc.id);
|
|
1436
|
+
staleToolIds.add(tc.id);
|
|
1358
1437
|
this.recentUnknownToolNames.add(tc.name);
|
|
1438
|
+
this.recentStaleToolNames.add(tc.name);
|
|
1359
1439
|
if (this.consecutiveUnknownToolCalls >= DEFAULT_MAX_UNKNOWN_TOOL_CALLS) {
|
|
1360
1440
|
hitUnknownToolLimit = true;
|
|
1361
1441
|
}
|
|
1442
|
+
return;
|
|
1443
|
+
}
|
|
1444
|
+
|
|
1445
|
+
// Never advertised — a hallucinated tool name.
|
|
1446
|
+
this.consecutiveUnknownToolCalls += 1;
|
|
1447
|
+
logger.warn(
|
|
1448
|
+
`ChatDriver: no handler registered for tool "${tc.name}" (${this.consecutiveUnknownToolCalls}/${DEFAULT_MAX_UNKNOWN_TOOL_CALLS}). Available tools: ${Object.keys(this.toolHandlers).join(', ') || '(none)'}`,
|
|
1449
|
+
);
|
|
1450
|
+
recordMetaEvent(this.sessionKey, 'tool.unresolved', {
|
|
1451
|
+
tool: tc.name,
|
|
1452
|
+
agent: this.activeAgentName,
|
|
1453
|
+
kind: 'unknown',
|
|
1454
|
+
consecutive: this.consecutiveUnknownToolCalls,
|
|
1455
|
+
max: DEFAULT_MAX_UNKNOWN_TOOL_CALLS,
|
|
1456
|
+
availableTools: Object.keys(this.toolHandlers),
|
|
1457
|
+
});
|
|
1458
|
+
executedById.set(tc.id, { toolCallId: tc.id, content: `Unknown tool: ${tc.name}` });
|
|
1459
|
+
unknownToolIds.add(tc.id);
|
|
1460
|
+
this.recentUnknownToolNames.add(tc.name);
|
|
1461
|
+
if (this.consecutiveUnknownToolCalls >= DEFAULT_MAX_UNKNOWN_TOOL_CALLS) {
|
|
1462
|
+
hitUnknownToolLimit = true;
|
|
1362
1463
|
}
|
|
1363
1464
|
return;
|
|
1364
1465
|
}
|
|
@@ -1396,6 +1497,7 @@ export class ChatDriver extends EventTarget implements AiDriver {
|
|
|
1396
1497
|
this.consecutiveFoldOps = 0;
|
|
1397
1498
|
this.consecutiveUnknownToolCalls = 0;
|
|
1398
1499
|
this.recentUnknownToolNames.clear();
|
|
1500
|
+
this.recentStaleToolNames.clear();
|
|
1399
1501
|
}
|
|
1400
1502
|
|
|
1401
1503
|
// Tag tool calls with fold UI metadata before appending results
|
|
@@ -1447,6 +1549,9 @@ export class ChatDriver extends EventTarget implements AiDriver {
|
|
|
1447
1549
|
foldPath: !isFoldOpen && !isFoldClose && foldPath.length > 0 ? foldPath : undefined,
|
|
1448
1550
|
unknown: isUnknown || undefined,
|
|
1449
1551
|
availableTools: isUnknown ? availableToolNames : undefined,
|
|
1552
|
+
// Distinguish a retired tool from a hallucinated one so the UI can
|
|
1553
|
+
// say "no longer available here" rather than "does not exist".
|
|
1554
|
+
stale: staleToolIds.has(tc.id) || undefined,
|
|
1450
1555
|
subAgentTrace: executedById.get(tc.id)?.subAgentTrace,
|
|
1451
1556
|
};
|
|
1452
1557
|
});
|
|
@@ -1470,10 +1575,25 @@ export class ChatDriver extends EventTarget implements AiDriver {
|
|
|
1470
1575
|
.map((tc) => tc.name),
|
|
1471
1576
|
]),
|
|
1472
1577
|
];
|
|
1578
|
+
// Stale tools were real earlier this activation; hallucinated tools
|
|
1579
|
+
// never existed. The hard stop counts both the same way, but the split
|
|
1580
|
+
// tells a triager whether the cause is a state/prompt-design problem
|
|
1581
|
+
// (stale) or a model inventing tool names (hallucinated).
|
|
1582
|
+
const staleTools = [
|
|
1583
|
+
...new Set([
|
|
1584
|
+
...this.recentStaleToolNames,
|
|
1585
|
+
...(response.toolCalls ?? [])
|
|
1586
|
+
.filter((tc) => staleToolIds.has(tc.id))
|
|
1587
|
+
.map((tc) => tc.name),
|
|
1588
|
+
]),
|
|
1589
|
+
];
|
|
1590
|
+
const hallucinatedTools = unknownTools.filter((t) => !staleTools.includes(t));
|
|
1473
1591
|
recordTurnError(this.sessionKey, 'unknown-tool-limit', {
|
|
1474
1592
|
agent: this.activeAgentName,
|
|
1475
1593
|
provider: this.lastResolvedProviderName,
|
|
1476
1594
|
unknownTools,
|
|
1595
|
+
staleTools,
|
|
1596
|
+
hallucinatedTools,
|
|
1477
1597
|
availableTools: Object.keys(this.toolHandlers),
|
|
1478
1598
|
});
|
|
1479
1599
|
this.appendToHistory({
|