@genesislcap/ai-assistant 14.467.1 → 14.467.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai-assistant.api.json +39 -53
- package/dist/ai-assistant.d.ts +20 -25
- package/dist/dts/components/chat-driver/chat-driver.d.ts.map +1 -1
- package/dist/dts/index.d.ts +1 -0
- package/dist/dts/index.d.ts.map +1 -1
- package/dist/dts/main/main.d.ts +1 -20
- package/dist/dts/main/main.d.ts.map +1 -1
- package/dist/dts/state/debug-event-log.d.ts +16 -0
- package/dist/dts/state/debug-event-log.d.ts.map +1 -1
- package/dist/dts/state/debug-event-log.test.d.ts +2 -0
- package/dist/dts/state/debug-event-log.test.d.ts.map +1 -0
- package/dist/dts/utils/flatten-sub-agent-messages.d.ts +51 -0
- package/dist/dts/utils/flatten-sub-agent-messages.d.ts.map +1 -0
- package/dist/dts/utils/flatten-sub-agent-messages.test.d.ts +2 -0
- package/dist/dts/utils/flatten-sub-agent-messages.test.d.ts.map +1 -0
- package/dist/dts/utils/strip-agent-handlers.d.ts +29 -0
- package/dist/dts/utils/strip-agent-handlers.d.ts.map +1 -0
- package/dist/dts/utils/strip-agent-handlers.test.d.ts +2 -0
- package/dist/dts/utils/strip-agent-handlers.test.d.ts.map +1 -0
- package/dist/esm/components/chat-driver/chat-driver.js +48 -12
- package/dist/esm/components/chat-driver/chat-driver.test.js +29 -0
- package/dist/esm/main/main.js +14 -38
- package/dist/esm/state/debug-event-log.js +47 -0
- package/dist/esm/state/debug-event-log.test.js +67 -0
- package/dist/esm/utils/flatten-sub-agent-messages.js +49 -0
- package/dist/esm/utils/flatten-sub-agent-messages.test.js +139 -0
- package/dist/esm/utils/strip-agent-handlers.js +51 -0
- package/dist/esm/utils/strip-agent-handlers.test.js +81 -0
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +16 -16
- package/src/components/chat-driver/chat-driver.test.ts +43 -0
- package/src/components/chat-driver/chat-driver.ts +64 -10
- package/src/index.ts +1 -0
- package/src/main/main.ts +16 -37
- package/src/state/debug-event-log.test.ts +89 -0
- package/src/state/debug-event-log.ts +48 -0
- package/src/utils/flatten-sub-agent-messages.test.ts +163 -0
- package/src/utils/flatten-sub-agent-messages.ts +88 -0
- package/src/utils/strip-agent-handlers.test.ts +99 -0
- package/src/utils/strip-agent-handlers.ts +52 -0
|
@@ -824,6 +824,49 @@ subagent(
|
|
|
824
824
|
},
|
|
825
825
|
);
|
|
826
826
|
|
|
827
|
+
subagent(
|
|
828
|
+
"folds the sub-agent's high-value meta events into the parent session, breadcrumbed",
|
|
829
|
+
async () => {
|
|
830
|
+
const sessionKey = 'subagent-meta-harvest-test';
|
|
831
|
+
clearMetaEventRegistry();
|
|
832
|
+
|
|
833
|
+
const worker = agent({
|
|
834
|
+
name: 'worker',
|
|
835
|
+
toolDefinitions: [def('real')],
|
|
836
|
+
toolHandlers: { real: async () => 'ok' },
|
|
837
|
+
});
|
|
838
|
+
const parent = delegatingParent(worker, () => {});
|
|
839
|
+
// The worker trips the unknown-tool limit — emitting tool.unresolved + a
|
|
840
|
+
// turn.error under its own transient session, which the parent harvests.
|
|
841
|
+
const provider = scriptedProvider([
|
|
842
|
+
callsTool('delegate', 'd1'),
|
|
843
|
+
...Array.from({ length: 5 }, (_unused, i) => callsTool('made_up', `u${i}`)),
|
|
844
|
+
]);
|
|
845
|
+
|
|
846
|
+
await makeDriver(parent, provider, sessionKey).sendMessage('go');
|
|
847
|
+
|
|
848
|
+
const events = getMetaEvents(sessionKey);
|
|
849
|
+
// The sub-agent's turn.error is folded onto the parent timeline, breadcrumbed.
|
|
850
|
+
const harvestedError = events.find(
|
|
851
|
+
(e) => e.type === 'turn.error' && e.detail?.subAgent === 'boss › worker',
|
|
852
|
+
);
|
|
853
|
+
assert.ok(harvestedError, "the sub-agent's turn.error is folded in, breadcrumbed");
|
|
854
|
+
assert.is(harvestedError!.detail?.reason, 'unknown-tool-limit');
|
|
855
|
+
// ... as are its per-attempt tool.unresolved signals.
|
|
856
|
+
assert.ok(
|
|
857
|
+
events.some((e) => e.type === 'tool.unresolved' && e.detail?.subAgent === 'boss › worker'),
|
|
858
|
+
"the sub-agent's tool.unresolved events are folded in too",
|
|
859
|
+
);
|
|
860
|
+
// High-volume, message-derivable events are NOT merged.
|
|
861
|
+
assert.not.ok(
|
|
862
|
+
events.some((e) => e.type === 'turn.start' && e.detail?.subAgent === 'boss › worker'),
|
|
863
|
+
'turn.start is intentionally excluded from the harvest',
|
|
864
|
+
);
|
|
865
|
+
// The child's transient bucket is cleared, never polluting the empty-key sink.
|
|
866
|
+
assert.is(getMetaEvents('').length, 0);
|
|
867
|
+
},
|
|
868
|
+
);
|
|
869
|
+
|
|
827
870
|
subagent(
|
|
828
871
|
'defaults to { ok: false, reason: "max_iterations" } when the sub-agent ends without completing',
|
|
829
872
|
async () => {
|
|
@@ -31,7 +31,15 @@ import type {
|
|
|
31
31
|
UnresolvedToolInput,
|
|
32
32
|
} from '../../config/config';
|
|
33
33
|
import { resolveChatProvider } from '../../config/validate-providers';
|
|
34
|
-
import {
|
|
34
|
+
import {
|
|
35
|
+
clearSession,
|
|
36
|
+
getMetaEvents,
|
|
37
|
+
mergeMetaEvents,
|
|
38
|
+
type MetaEventType,
|
|
39
|
+
recordMetaEvent,
|
|
40
|
+
recordTurnError,
|
|
41
|
+
recordTurnRetry,
|
|
42
|
+
} from '../../state/debug-event-log';
|
|
35
43
|
import { applyHistoryCap } from '../../utils/history-transform';
|
|
36
44
|
import { logger } from '../../utils/logger';
|
|
37
45
|
import { TOOL_FOLD_SYMBOL, type ToolFold } from '../../utils/tool-fold';
|
|
@@ -64,6 +72,23 @@ const MAX_EMPTY_RESPONSE_RETRIES = 3;
|
|
|
64
72
|
const MAX_SETUP_TRANSPORT_RETRIES = 3;
|
|
65
73
|
const SUGGESTIONS_HISTORY_WINDOW = 8;
|
|
66
74
|
|
|
75
|
+
/**
|
|
76
|
+
* Sub-agent meta events worth folding into the parent's debug timeline: the
|
|
77
|
+
* per-attempt and per-failure signals that do NOT otherwise surface in the
|
|
78
|
+
* sub-agent's (now hoisted) messages — a retried-away malformed/empty attempt
|
|
79
|
+
* produces no message, and the stale-vs-hallucinated/streak diagnostics live only
|
|
80
|
+
* on the event. High-volume, message-derivable events (turn.start/turn.end,
|
|
81
|
+
* provider.selected, context.updated) are intentionally excluded: read the
|
|
82
|
+
* sub-agent's hoisted messages for model/tokens/cost and turn-by-turn activity.
|
|
83
|
+
* See `ChatDriver.invokeSubAgent`.
|
|
84
|
+
*/
|
|
85
|
+
const HARVESTED_SUBAGENT_EVENTS: ReadonlySet<MetaEventType> = new Set([
|
|
86
|
+
'turn.retry',
|
|
87
|
+
'turn.error',
|
|
88
|
+
'tool.failed',
|
|
89
|
+
'tool.unresolved',
|
|
90
|
+
]);
|
|
91
|
+
|
|
67
92
|
/** Name reserved for the cross-agent handoff tool — injected by OrchestratingDriver. */
|
|
68
93
|
export const REQUEST_CONTINUATION_TOOL = 'request_continuation';
|
|
69
94
|
|
|
@@ -1288,7 +1313,23 @@ export class ChatDriver extends EventTarget implements AiDriver {
|
|
|
1288
1313
|
...(subConfig.primerHistory ?? []),
|
|
1289
1314
|
];
|
|
1290
1315
|
|
|
1291
|
-
|
|
1316
|
+
// Unique per-invocation id — reused for the lifecycle event bracket below —
|
|
1317
|
+
// and a child session key derived from it. The child files its meta events
|
|
1318
|
+
// under this own bucket (rather than the shared empty-key sink), so they can
|
|
1319
|
+
// be harvested into THIS session on completion and then discarded.
|
|
1320
|
+
const invocationId = crypto.randomUUID();
|
|
1321
|
+
const childSessionKey = `${this.sessionKey}::sub:${invocationId}`;
|
|
1322
|
+
const child = new ChatDriver(
|
|
1323
|
+
this.providerRegistry,
|
|
1324
|
+
{},
|
|
1325
|
+
[],
|
|
1326
|
+
undefined,
|
|
1327
|
+
undefined,
|
|
1328
|
+
undefined,
|
|
1329
|
+
undefined,
|
|
1330
|
+
undefined,
|
|
1331
|
+
childSessionKey,
|
|
1332
|
+
);
|
|
1292
1333
|
// Mark before the first turn so the child forces tool use and reports a
|
|
1293
1334
|
// typed failure (rather than user-facing text) if it never completes.
|
|
1294
1335
|
child.markAsSubAgent();
|
|
@@ -1330,9 +1371,6 @@ export class ChatDriver extends EventTarget implements AiDriver {
|
|
|
1330
1371
|
child.addEventListener('history-updated', forwardTrace);
|
|
1331
1372
|
child.addEventListener('provider-changed', forwardProviderChanged);
|
|
1332
1373
|
|
|
1333
|
-
// Unique per-invocation id so listeners can pair start/stop reliably even
|
|
1334
|
-
// when the same sub-agent runs multiple times in parallel.
|
|
1335
|
-
const invocationId = crypto.randomUUID();
|
|
1336
1374
|
const chatInputDuringExecution = options?.chatInputDuringExecution;
|
|
1337
1375
|
const lifecycleDetail = { name, invocationId, chatInputDuringExecution };
|
|
1338
1376
|
|
|
@@ -1389,14 +1427,30 @@ export class ChatDriver extends EventTarget implements AiDriver {
|
|
|
1389
1427
|
// settled) lifecycle, so the snapshot/completion reads below still work.
|
|
1390
1428
|
child.dispose();
|
|
1391
1429
|
this.dispatchEvent(new CustomEvent('sub-agent-stop', { detail: lifecycleDetail }));
|
|
1430
|
+
// Capture the child's diagnostics into THIS session, then ALWAYS discard its
|
|
1431
|
+
// transient bucket — done in the `finally` so an unexpected `sendMessage`
|
|
1432
|
+
// rejection (which propagates out of this method) can't orphan the bucket in
|
|
1433
|
+
// the registry, and a crashed sub-agent still leaves its turns/events behind.
|
|
1434
|
+
// Forward the child's per-LLM-call snapshots so they show as `kind:'turn'`
|
|
1435
|
+
// entries in the exported debug log, re-numbered under the activating parent turn.
|
|
1436
|
+
this.forwardSubAgentSnapshots(child.getTurnSnapshots());
|
|
1437
|
+
// Fold the sub-agent's high-value meta events (retries/errors/tool failures —
|
|
1438
|
+
// see HARVESTED_SUBAGENT_EVENTS) into THIS session, preserving their original
|
|
1439
|
+
// timestamps so they interleave within the subagent.started→completed/failed
|
|
1440
|
+
// bracket. Each is breadcrumbed `"<parent> › <sub-agent>"`, composing for a
|
|
1441
|
+
// nested sub-agent whose own breadcrumb the child already merged.
|
|
1442
|
+
const parentName = this.activeAgentName ?? '?';
|
|
1443
|
+
const harvested = getMetaEvents(childSessionKey)
|
|
1444
|
+
.filter((e) => HARVESTED_SUBAGENT_EVENTS.has(e.type))
|
|
1445
|
+
.map((e) => {
|
|
1446
|
+
const existing = e.detail?.subAgent as string | undefined;
|
|
1447
|
+
return { ...e, detail: { ...e.detail, subAgent: `${parentName} › ${existing ?? name}` } };
|
|
1448
|
+
});
|
|
1449
|
+
mergeMetaEvents(this.sessionKey, harvested);
|
|
1450
|
+
clearSession(childSessionKey);
|
|
1392
1451
|
}
|
|
1393
1452
|
|
|
1394
1453
|
const trace = child.getHistory() as ChatMessage[];
|
|
1395
|
-
// Forward the child's per-LLM-call snapshots onto this (parent) driver's
|
|
1396
|
-
// buffer so they show as `kind:'turn'` entries in the exported debug log,
|
|
1397
|
-
// re-numbered under the activating parent turn. Runs for both success and
|
|
1398
|
-
// failure so the sub-agent's turns are always visible.
|
|
1399
|
-
this.forwardSubAgentSnapshots(child.getTurnSnapshots());
|
|
1400
1454
|
|
|
1401
1455
|
if (timedOut) {
|
|
1402
1456
|
// Same failure shape as any other non-completion — the parent handler
|
package/src/index.ts
CHANGED
|
@@ -12,5 +12,6 @@ export * from './config/config';
|
|
|
12
12
|
export * from './config/define-stateful-agent';
|
|
13
13
|
export * from './config/fallback-agents';
|
|
14
14
|
export * from './utils/tool-fold';
|
|
15
|
+
export type { TimelineMessage } from './utils/flatten-sub-agent-messages';
|
|
15
16
|
export type { AiChatWidget } from './types/ai-chat-widget';
|
|
16
17
|
export { AiChatMarkdown } from './components/chat-markdown/chat-markdown';
|
package/src/main/main.ts
CHANGED
|
@@ -70,8 +70,10 @@ import {
|
|
|
70
70
|
import { ChatSuggestions } from '../suggestions/chat-suggestions';
|
|
71
71
|
import { AnimatedPanelToggle } from '../utils/animated-panel-toggle';
|
|
72
72
|
import { resolveExclusiveLoadingStyle } from '../utils/animation-exclusivity';
|
|
73
|
+
import { flattenSubAgentMessages } from '../utils/flatten-sub-agent-messages';
|
|
73
74
|
import { logger } from '../utils/logger';
|
|
74
75
|
import { filterVisibleMessages, trailingInteractionRow } from '../utils/message-partition';
|
|
76
|
+
import { stripAgentHandlers } from '../utils/strip-agent-handlers';
|
|
75
77
|
import { sumCosts } from '../utils/sum-costs';
|
|
76
78
|
import { expandToolTree } from '../utils/tool-fold';
|
|
77
79
|
import { styles } from './main.styles';
|
|
@@ -150,36 +152,6 @@ avoidTreeShaking(
|
|
|
150
152
|
AgentPicker,
|
|
151
153
|
);
|
|
152
154
|
|
|
153
|
-
/**
|
|
154
|
-
* Recursively strips non-serializable fields from an agent before storing in
|
|
155
|
-
* Redux. Drops **every function-valued property** — `toolHandlers`, the
|
|
156
|
-
* lifecycle/dispatch hooks (`onActivate`, `onDeactivate`, `getDebugSnapshot`,
|
|
157
|
-
* `onUnresolvedTool`), and the function form of the per-turn resolvers
|
|
158
|
-
* (`systemPrompt`, `toolDefinitions`, `displayName`, `provider`, `temperature`,
|
|
159
|
-
* `toolChoice`). Static forms (string / number / array / plain object) pass
|
|
160
|
-
* through unchanged; `subAgents` are stripped recursively.
|
|
161
|
-
*
|
|
162
|
-
* Filtering by *value* (any function) rather than by an explicit field list
|
|
163
|
-
* means a new function-valued field added to `AgentConfig` is handled
|
|
164
|
-
* automatically and can never leak a live function into serialized store
|
|
165
|
-
* state — no denylist to keep in sync. The live config on the driver stays the
|
|
166
|
-
* source of truth; the slice only holds this serializable projection, and
|
|
167
|
-
* functions are never read back from it.
|
|
168
|
-
*/
|
|
169
|
-
function stripHandlers(agent: AgentConfig): Omit<AgentConfig, 'toolHandlers'> {
|
|
170
|
-
const serializable: Record<string, unknown> = {};
|
|
171
|
-
for (const [key, value] of Object.entries(agent)) {
|
|
172
|
-
// `subAgents` is handled separately (recursively, below); drop everything
|
|
173
|
-
// function-valued.
|
|
174
|
-
if (key === 'subAgents' || typeof value === 'function') continue;
|
|
175
|
-
serializable[key] = value;
|
|
176
|
-
}
|
|
177
|
-
if (agent.subAgents?.length) {
|
|
178
|
-
serializable.subAgents = agent.subAgents.map(stripHandlers);
|
|
179
|
-
}
|
|
180
|
-
return serializable as unknown as Omit<AgentConfig, 'toolHandlers'>;
|
|
181
|
-
}
|
|
182
|
-
|
|
183
155
|
/**
|
|
184
156
|
* Foundation AI Assistant component.
|
|
185
157
|
*
|
|
@@ -375,7 +347,9 @@ export class FoundationAiAssistant extends GenesisElement {
|
|
|
375
347
|
// and Redux serializable-state middleware will warn. toolHandlers are never read
|
|
376
348
|
// back from the store; they are always sourced from this.agents when the driver
|
|
377
349
|
// is built.
|
|
378
|
-
this._sessionRef?.actions.aiAssistant.setActiveAgent(
|
|
350
|
+
this._sessionRef?.actions.aiAssistant.setActiveAgent(
|
|
351
|
+
value ? stripAgentHandlers(value) : undefined,
|
|
352
|
+
);
|
|
379
353
|
}
|
|
380
354
|
|
|
381
355
|
get suggestionsState(): SuggestionsState {
|
|
@@ -1587,9 +1561,13 @@ export class FoundationAiAssistant extends GenesisElement {
|
|
|
1587
1561
|
turn: 1,
|
|
1588
1562
|
message: 2,
|
|
1589
1563
|
};
|
|
1564
|
+
// Sub-agent conversations are stored nested on the parent tool call's
|
|
1565
|
+
// `subAgentTrace`; `flattenSubAgentMessages` hoists them to top-level
|
|
1566
|
+
// `kind: 'message'` entries (breadcrumbed + correlated, the nested copy moved
|
|
1567
|
+
// out — not duplicated) so the timeline reads as one chronological sequence.
|
|
1590
1568
|
const messages = this.driver?.getRawHistory?.() ?? this.messages;
|
|
1591
1569
|
const timeline = [
|
|
1592
|
-
...messages
|
|
1570
|
+
...flattenSubAgentMessages(messages),
|
|
1593
1571
|
...turns,
|
|
1594
1572
|
...(stateKey ? getMetaEvents(stateKey) : []).map((e) => ({ kind: 'event' as const, ...e })),
|
|
1595
1573
|
].sort((a, b) => {
|
|
@@ -1609,12 +1587,13 @@ export class FoundationAiAssistant extends GenesisElement {
|
|
|
1609
1587
|
meta: {
|
|
1610
1588
|
timestamp,
|
|
1611
1589
|
host: window.location.host,
|
|
1612
|
-
//
|
|
1613
|
-
// hooks, onUnresolvedTool, function-form resolvers)
|
|
1614
|
-
//
|
|
1615
|
-
//
|
|
1590
|
+
// stripAgentHandlers drops every function-valued field (handlers, lifecycle
|
|
1591
|
+
// hooks, onUnresolvedTool, function-form resolvers) AND object handler bags
|
|
1592
|
+
// like an object-form `toolHandlers`, and recurses subAgents — no manual
|
|
1593
|
+
// exclusion list to keep in sync. We only override toolDefinitions
|
|
1594
|
+
// afterwards to expand the fold tree for the log.
|
|
1616
1595
|
agentSummary: this.agents?.map((a) => ({
|
|
1617
|
-
...
|
|
1596
|
+
...stripAgentHandlers(a),
|
|
1618
1597
|
toolDefinitions: Array.isArray(a.toolDefinitions)
|
|
1619
1598
|
? typeof a.toolHandlers === 'function'
|
|
1620
1599
|
? // Static defs + dynamic handlers — can't walk fold tree
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import { assert, createLogicSuite } from '@genesislcap/foundation-testing';
|
|
2
|
+
import {
|
|
3
|
+
clearMetaEventRegistry,
|
|
4
|
+
clearSession,
|
|
5
|
+
getMetaEvents,
|
|
6
|
+
type MetaEvent,
|
|
7
|
+
mergeMetaEvents,
|
|
8
|
+
recordMetaEvent,
|
|
9
|
+
} from './debug-event-log';
|
|
10
|
+
|
|
11
|
+
const event = (overrides: Partial<MetaEvent> = {}): MetaEvent => ({
|
|
12
|
+
index: 0,
|
|
13
|
+
timestamp: '2026-06-19T16:00:00.000Z',
|
|
14
|
+
type: 'turn.retry',
|
|
15
|
+
importance: 'normal',
|
|
16
|
+
...overrides,
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
const suite = createLogicSuite('debug-event-log merge/clear');
|
|
20
|
+
|
|
21
|
+
suite('mergeMetaEvents is a no-op for an empty batch', () => {
|
|
22
|
+
clearMetaEventRegistry();
|
|
23
|
+
mergeMetaEvents('k', []);
|
|
24
|
+
assert.is(getMetaEvents('k').length, 0);
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
suite('mergeMetaEvents preserves each event timestamp and re-indexes monotonically', () => {
|
|
28
|
+
clearMetaEventRegistry();
|
|
29
|
+
mergeMetaEvents('k', [
|
|
30
|
+
event({ index: 7, timestamp: '2026-06-19T16:00:01.000Z', detail: { a: 1 } }),
|
|
31
|
+
event({ index: 99, timestamp: '2026-06-19T16:00:02.000Z', detail: { b: 2 } }),
|
|
32
|
+
]);
|
|
33
|
+
const out = getMetaEvents('k');
|
|
34
|
+
assert.is(out.length, 2);
|
|
35
|
+
// original timestamps kept (not re-stamped) ...
|
|
36
|
+
assert.is(out[0].timestamp, '2026-06-19T16:00:01.000Z');
|
|
37
|
+
assert.is(out[1].timestamp, '2026-06-19T16:00:02.000Z');
|
|
38
|
+
// ... indices re-assigned from the target's counter, not carried from source.
|
|
39
|
+
assert.is(out[0].index, 0);
|
|
40
|
+
assert.is(out[1].index, 1);
|
|
41
|
+
assert.equal(out[0].detail, { a: 1 });
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
suite('mergeMetaEvents appends after existing events with a continuing index', () => {
|
|
45
|
+
clearMetaEventRegistry();
|
|
46
|
+
recordMetaEvent('k', 'turn.start'); // index 0, real timestamp
|
|
47
|
+
mergeMetaEvents('k', [event({ type: 'turn.error', importance: 'high' })]);
|
|
48
|
+
const out = getMetaEvents('k');
|
|
49
|
+
assert.is(out.length, 2);
|
|
50
|
+
assert.is(out[0].type, 'turn.start');
|
|
51
|
+
assert.is(out[1].type, 'turn.error');
|
|
52
|
+
assert.is(out[1].index, 1); // continues the monotonic counter
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
suite('mergeMetaEvents into a fresh key creates the bucket', () => {
|
|
56
|
+
clearMetaEventRegistry();
|
|
57
|
+
mergeMetaEvents('brand-new', [event()]);
|
|
58
|
+
assert.is(getMetaEvents('brand-new').length, 1);
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
suite('mergeMetaEvents evicts oldest non-high events past the cap but keeps every high', () => {
|
|
62
|
+
clearMetaEventRegistry();
|
|
63
|
+
const highStamp = '2026-06-19T16:30:00.000Z';
|
|
64
|
+
// One `high` failure first, then a flood of `normal` events well past the cap.
|
|
65
|
+
const batch: MetaEvent[] = [
|
|
66
|
+
event({ type: 'turn.error', importance: 'high', timestamp: highStamp }),
|
|
67
|
+
];
|
|
68
|
+
for (let i = 0; i < 2000; i += 1) {
|
|
69
|
+
batch.push(event({ type: 'turn.retry', importance: 'normal' }));
|
|
70
|
+
}
|
|
71
|
+
mergeMetaEvents('k', batch);
|
|
72
|
+
const out = getMetaEvents('k');
|
|
73
|
+
// Eviction ran (bounded below what we merged) ...
|
|
74
|
+
assert.ok(out.length < 2001);
|
|
75
|
+
// ... and the lone high-importance failure was never dropped.
|
|
76
|
+
assert.is(out.filter((e) => e.importance === 'high').length, 1);
|
|
77
|
+
assert.ok(out.some((e) => e.timestamp === highStamp));
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
suite('clearSession drops one key without touching others', () => {
|
|
81
|
+
clearMetaEventRegistry();
|
|
82
|
+
recordMetaEvent('child', 'turn.retry');
|
|
83
|
+
recordMetaEvent('parent', 'turn.start');
|
|
84
|
+
clearSession('child');
|
|
85
|
+
assert.is(getMetaEvents('child').length, 0);
|
|
86
|
+
assert.is(getMetaEvents('parent').length, 1);
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
suite.run();
|
|
@@ -237,6 +237,52 @@ export function getMetaEvents(key: string): ReadonlyArray<MetaEvent> {
|
|
|
237
237
|
return registry.get(key)?.events ?? [];
|
|
238
238
|
}
|
|
239
239
|
|
|
240
|
+
/**
|
|
241
|
+
* Merge pre-built meta events into the timeline for `targetKey`, **preserving each
|
|
242
|
+
* event's original `timestamp`** (so it interleaves chronologically on export)
|
|
243
|
+
* while re-indexing onto the target buffer's monotonic counter. Used to fold a
|
|
244
|
+
* sub-agent's harvested events into the parent session — see
|
|
245
|
+
* `ChatDriver.invokeSubAgent`. Unlike {@link recordMetaEvent} it does not stamp a
|
|
246
|
+
* fresh timestamp; the same non-`high` eviction policy is applied once after the
|
|
247
|
+
* batch so the buffer stays bounded.
|
|
248
|
+
*/
|
|
249
|
+
export function mergeMetaEvents(targetKey: string, events: readonly MetaEvent[]): void {
|
|
250
|
+
if (events.length === 0) return;
|
|
251
|
+
let buffer = registry.get(targetKey);
|
|
252
|
+
if (!buffer) {
|
|
253
|
+
buffer = { events: [], next: 0 };
|
|
254
|
+
registry.set(targetKey, buffer);
|
|
255
|
+
}
|
|
256
|
+
for (const event of events) {
|
|
257
|
+
buffer.events.push({ ...event, index: buffer.next });
|
|
258
|
+
buffer.next += 1;
|
|
259
|
+
}
|
|
260
|
+
// Evict oldest non-`high` events until back under the cap — same policy as
|
|
261
|
+
// recordMetaEvent; `high` events (failures/limits) are never dropped. Single
|
|
262
|
+
// pass: take the overflow count, then drop that many of the oldest non-`high`
|
|
263
|
+
// events in order (if there aren't enough non-`high`, the buffer floats above
|
|
264
|
+
// the cap rather than losing a failure signal).
|
|
265
|
+
let toEvict = buffer.events.length - DEFAULT_MAX_META_EVENTS;
|
|
266
|
+
if (toEvict > 0) {
|
|
267
|
+
buffer.events = buffer.events.filter((e) => {
|
|
268
|
+
if (toEvict > 0 && e.importance !== 'high') {
|
|
269
|
+
toEvict -= 1;
|
|
270
|
+
return false;
|
|
271
|
+
}
|
|
272
|
+
return true;
|
|
273
|
+
});
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
/**
|
|
278
|
+
* Drop the entire timeline for `key`. Used to discard a sub-agent's transient
|
|
279
|
+
* per-invocation session once its events have been harvested into the parent, so
|
|
280
|
+
* the module registry doesn't accumulate one orphaned bucket per sub-agent run.
|
|
281
|
+
*/
|
|
282
|
+
export function clearSession(key: string): void {
|
|
283
|
+
registry.delete(key);
|
|
284
|
+
}
|
|
285
|
+
|
|
240
286
|
/**
|
|
241
287
|
* Human/agent-facing guide emitted as the first key of the exported debug log,
|
|
242
288
|
* so whoever opens the JSON (often an AI agent) knows how to read it without
|
|
@@ -248,11 +294,13 @@ export const DEBUG_LOG_README: readonly string[] = [
|
|
|
248
294
|
'`timeline` is the entire session as one array, already sorted chronologically by `timestamp` (ISO 8601). Every entry has a `kind`.',
|
|
249
295
|
'Timestamps are millisecond-resolution; entries that share the same millisecond are ordered by a fixed kind rank (event, then turn, then message), which is a heuristic and may not reflect exact causal order within that millisecond — e.g. a user message and the turn it triggered, or a final assistant message and its turn.end event, can appear in either order depending on whether they landed in the same millisecond. Read the logical structure of a turn rather than over-interpreting the micro-ordering of co-timestamped entries of different kinds.',
|
|
250
296
|
"kind:'message' — the conversation. `role` is user/assistant/tool/system-event/synthetic-user; `agentName` says which agent produced it; `toolCalls`/`toolResult`/`interaction` carry tool and widget activity; `inputTokens`/`outputTokens`/`cost` are per-message LLM usage, and `externalCostUsd` is any non-LLM cost a widget reported for its own external service calls (folded into the session cost total alongside `cost`). On model-produced assistant messages, `model` is the concrete model id that generated it (e.g. 'gemini-2.5-flash-lite') and `providerName` is the registry slot it resolved under (e.g. a tier name like 'high'/'low', or the default); together they attribute the message — and any tool calls it carries — to an exact model even across a mid-session vendor/tier switch, where one slot name can map to different models before and after the switch. Both are undefined on any entry that is NOT an LLM response: non-assistant roles (user/tool/system-event) and 'synthetic-user' echoes; assistant interaction/widget entries (empty content carrying an `interaction` — a rendered widget, not a model turn); driver-authored assistant fallbacks (the timeout, repeated-malformed-call, and empty-response apology messages); and messages restored from a session persisted before these fields existed. One partial case: on a genuine model turn whose provider exposes no `getStatus` (or reports no model), `providerName` is still set but `model` alone is undefined. A 'synthetic-user' message is a display-only echo of an interaction outcome (e.g. the answer a widget reported): it renders on the user's side of the chat and `agentName` is the agent that created it, but it is never sent to the LLM — so it has no matching 'turn' and the model learns the outcome only from the corresponding tool result.",
|
|
297
|
+
"Sub-agent messages appear inline. When a tool delegates to a sub-agent (via `requestSubAgent`), the sub-agent's whole conversation — its own assistant/tool messages, each with their own `content`/`thinking`/`toolCalls`/`toolResult` and per-message `model`/`providerName`/`inputTokens`/`outputTokens`/`cost` — is hoisted into the timeline as ordinary kind:'message' entries, interleaved by timestamp right after the tool call that spawned them (so you read the delegation top-to-bottom). A hoisted entry is marked: `subAgentDepth` is its delegation depth (1 for a sub-agent, 2 for a sub-agent's sub-agent, …), `subAgentOf` is the id of the parent tool call that spawned it (correlates it back even when two sub-agents run in one parent turn), `subAgentName` is the sub-agent's own name, and `agentName` is rewritten to a `\"<parent> › <sub-agent>\"` breadcrumb (composing when nested, e.g. `\"UI Builder › Planner › Grounding\"`). The sub-agent's per-LLM-call snapshots also surface as kind:'turn' entries with an N-M `turnIndex`, and subagent.started/completed (or subagent.failed) events bracket the run. Per-message `cost` on hoisted entries is already part of the session total (it is summed from the un-flattened history), so summing the top-level timeline does NOT double-count.",
|
|
251
298
|
"kind:'turn' — one LLM call. `turnIndex` is a string: a top-level turn is the bare counter ('0', '1', …); a sub-agent's turns are numbered under the parent turn that activated them ('3-1', '3-2', …, and a nested sub-agent contributes '3-2-1', …), and `agentName` names the agent that ran the turn. `systemPrompt` and `toolNames` are what the model saw. A systemPrompt of '<repeated — identical to turn N>' was byte-identical to turn N and de-duplicated; the full prompt is shown whenever it changes (often because a stateful agent advanced), so prompt evolution is visible.",
|
|
252
299
|
"kind:'turn'.`agentSnapshot` — the active agent's own view of its internal state, captured at that turn. An agent opts into this by exposing a `getDebugSnapshot()` that returns JSON-serializable per-state info; stateful/flow agents wire it automatically, so you can watch a flow advance turn-by-turn (e.g. current step, cursor, collected fields, pending changes). Absent for agents that don't expose one.",
|
|
253
300
|
"kind:'event' — a meta/lifecycle event. `type` names it (see below); `detail` carries structured data. `detail.placement` is the emitting UI instance: 'bubble' (collapsed), 'panel' (popped-out), or 'standalone'.",
|
|
254
301
|
"Each 'event' also has an `importance`: 'high' (failures/limits — turn.error, tool.failed, subagent.failed, file.read-failed, suggestions.failed, context.threshold-crossed), 'normal' (session flow — connects, turns, retries, handoffs, agent/provider changes, interactions, sub-agent start/complete), or 'low' (skippable UI/bookkeeping noise — panel.toggled, attachment.added, driver.wired/unwired, context.updated). To skim, ignore importance:'low'; to triage a failure, filter to importance:'high' then read the nearby messages and turns. A 'high' turn.error is often preceded by one or more 'normal' turn.retry events for the same reason — read them together to see how many attempts were made before bailing. 'message' and 'turn' entries carry no importance — they are the substance, always read them.",
|
|
255
302
|
'Event types: assistant.connected/disconnected (mount + placement + whether the session was created or restored), assistant.popout/popin (window placement), driver.created/wired/unwired (which driver is live and why it stops/starts responding across a popout), state.changed (idle↔loading), turn.start/turn.end (turn boundary; turn.end carries durationMs), turn.retry (a recoverable in-turn retry — detail.reason plus attempt/maxAttempts; for malformed calls also finishMessage; for empty responses also the provider finishReason + thoughtsTokens + parts breakdown), turn.error (a turn failed or hit a guardrail — detail.reason is one of exception/malformed-function-call/empty-response/unknown-tool-limit/max-iterations, plus reason-specific diagnostics: attempts (for empty-response also finishReason + thoughtsTokens + a parts breakdown, distinguishing a thinking-only STOP from a truly empty turn), finishMessage, unknownTools (split into staleTools — real earlier this activation but retired by the current state or hidden behind an open exclusive fold — and hallucinatedTools — never advertised) + availableTools, iterations + limit, or name + message for exceptions), tool.failed (a tool threw), tool.unresolved (the model called a tool that could not be dispatched — detail.kind is folded/fold-hidden/stale/unknown, plus tool + agent and, for the counted kinds, the consecutive streak; the recurring lead-up to an unknown-tool-limit turn.error), subagent.started/completed/failed (the lifecycle of a `requestSubAgent` delegation — detail.agent names the sub-agent; these bracket the sub-agent turns that appear as kind:turn entries with an N-M `turnIndex`; subagent.failed also carries detail.reason, one of max_iterations/malformed_tool_call/empty_response/unknown_tool_limit/timeout), agent.handoff (routing; from=null is the initial activation), agent.pinned/unpinned (forced routing), provider.selected (model/provider for the upcoming turns), interaction.requested/resolved (blocking user widgets — explain quiet gaps; note that when a sub-agent opens a widget, detail.agent — and the agentName on the interaction message — is the HOST agent that owns the widget, NOT the sub-agent that asked, because widgets render and resolve on the host driver), context.updated/threshold-crossed (token + cost), panel.toggled, attachment.added, file.read-failed, suggestions.failed.',
|
|
303
|
+
'Sub-agent meta events: a sub-agent\'s own turn.retry/turn.error/tool.failed/tool.unresolved events are merged into this same timeline, tagged with `detail.subAgent` — a `"<parent> › <sub-agent>"` breadcrumb that composes when nested (e.g. `"UI Builder › Planner › Grounding"`) — and interleaved by their original timestamps within the subagent.started→completed/failed bracket. These are the per-attempt/per-failure signals that do NOT appear among the sub-agent\'s (hoisted) messages: a malformed/empty attempt that gets retried produces no message, and the stale-vs-hallucinated split and streak counts live only on the event. A sub-agent\'s high-volume, message-derivable events (turn.start/turn.end, provider.selected, context.updated) are intentionally NOT merged — read its hoisted messages for model/tokens/cost and turn-by-turn activity, and the bracketing subagent.* events for the run\'s span.',
|
|
256
304
|
"`meta` holds context captured at export time: agentSummary (full agent configs), context (active model, token usage, session cost), activeDebugSnapshot (the active agent's `getDebugSnapshot()` taken fresh at export — reflects state NOW, which may have advanced beyond the last turn's agentSnapshot), debug (optional host-supplied debug state), host, and the export timestamp.",
|
|
257
305
|
'To debug a failure: find the last turn.error or tool.failed, then read upward for the user message, the turn(s), and the agent/provider/state events that led into it.',
|
|
258
306
|
];
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
import type { ChatMessage } from '@genesislcap/foundation-ai';
|
|
2
|
+
import { assert, createLogicSuite } from '@genesislcap/foundation-testing';
|
|
3
|
+
import { flattenSubAgentMessages } from './flatten-sub-agent-messages';
|
|
4
|
+
|
|
5
|
+
const msg = (overrides: Partial<ChatMessage> = {}): ChatMessage => ({
|
|
6
|
+
role: 'assistant',
|
|
7
|
+
content: '',
|
|
8
|
+
...overrides,
|
|
9
|
+
});
|
|
10
|
+
|
|
11
|
+
/** A parent assistant message whose single tool call delegated to a sub-agent. */
|
|
12
|
+
const delegating = (id: string, agentName: string, trace: ChatMessage[]): ChatMessage =>
|
|
13
|
+
msg({
|
|
14
|
+
agentName,
|
|
15
|
+
toolCalls: [{ id, name: 'requestSubAgent', args: {}, subAgentTrace: trace }],
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
const suite = createLogicSuite('flattenSubAgentMessages');
|
|
19
|
+
|
|
20
|
+
suite('returns an empty array for an empty list', () => {
|
|
21
|
+
assert.equal(flattenSubAgentMessages([]), []);
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
suite('emits top-level messages unchanged and tags none as sub-agent', () => {
|
|
25
|
+
const out = flattenSubAgentMessages([
|
|
26
|
+
msg({ role: 'user', content: 'hi', agentName: 'UI Builder' }),
|
|
27
|
+
msg({ content: 'hello', agentName: 'UI Builder' }),
|
|
28
|
+
]);
|
|
29
|
+
assert.is(out.length, 2);
|
|
30
|
+
assert.is(out[0].kind, 'message');
|
|
31
|
+
assert.is(out[0].agentName, 'UI Builder');
|
|
32
|
+
// No depth/marker fields on top-level entries.
|
|
33
|
+
assert.is(out[0].subAgentDepth, undefined);
|
|
34
|
+
assert.is(out[0].subAgentName, undefined);
|
|
35
|
+
assert.is(out[1].subAgentOf, undefined);
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
suite('hoists a sub-agent trace to top-level entries after the spawning message', () => {
|
|
39
|
+
const trace = [
|
|
40
|
+
msg({
|
|
41
|
+
agentName: 'UI Architecture Planner',
|
|
42
|
+
content: '',
|
|
43
|
+
toolCalls: [{ id: 'g', name: 'grep_source', args: {} }],
|
|
44
|
+
}),
|
|
45
|
+
msg({
|
|
46
|
+
role: 'tool',
|
|
47
|
+
agentName: 'UI Architecture Planner',
|
|
48
|
+
toolResult: { toolCallId: 'g', content: 'result' },
|
|
49
|
+
}),
|
|
50
|
+
];
|
|
51
|
+
const out = flattenSubAgentMessages([
|
|
52
|
+
delegating('tc1', 'UI Builder', trace),
|
|
53
|
+
msg({
|
|
54
|
+
role: 'tool',
|
|
55
|
+
agentName: 'UI Builder',
|
|
56
|
+
toolResult: { toolCallId: 'tc1', content: 'plan done' },
|
|
57
|
+
}),
|
|
58
|
+
]);
|
|
59
|
+
|
|
60
|
+
// parent tool-call msg, then 2 hoisted children, then the parent tool-result msg.
|
|
61
|
+
assert.is(out.length, 4);
|
|
62
|
+
assert.is(out[0].agentName, 'UI Builder'); // parent unchanged
|
|
63
|
+
assert.is(out[1].agentName, 'UI Builder › UI Architecture Planner');
|
|
64
|
+
assert.is(out[2].agentName, 'UI Builder › UI Architecture Planner');
|
|
65
|
+
assert.is(out[3].agentName, 'UI Builder'); // parent tool-result, back at top level
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
suite('marks hoisted entries with depth, raw name, and the spawning tool-call id', () => {
|
|
69
|
+
const out = flattenSubAgentMessages([
|
|
70
|
+
delegating('tc1', 'UI Builder', [msg({ agentName: 'UI Architecture Planner', content: 'x' })]),
|
|
71
|
+
]);
|
|
72
|
+
const child = out[1];
|
|
73
|
+
assert.is(child.subAgentDepth, 1);
|
|
74
|
+
assert.is(child.subAgentName, 'UI Architecture Planner');
|
|
75
|
+
assert.is(child.subAgentOf, 'tc1');
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
suite(
|
|
79
|
+
'moves the trace out — the emitted parent tool call no longer carries it (no duplication)',
|
|
80
|
+
() => {
|
|
81
|
+
const trace = [msg({ agentName: 'Planner', content: 'only once' })];
|
|
82
|
+
const out = flattenSubAgentMessages([delegating('tc1', 'UI Builder', trace)]);
|
|
83
|
+
// The trace is stripped from the emitted tool call ...
|
|
84
|
+
assert.is(out[0].toolCalls![0].subAgentTrace, undefined);
|
|
85
|
+
// ... and surfaces exactly once, as a hoisted top-level entry.
|
|
86
|
+
const hoisted = out.filter((e) => e.subAgentDepth === 1);
|
|
87
|
+
assert.is(hoisted.length, 1);
|
|
88
|
+
assert.is(hoisted[0].content, 'only once');
|
|
89
|
+
},
|
|
90
|
+
);
|
|
91
|
+
|
|
92
|
+
suite('preserves per-message usage fields on hoisted entries', () => {
|
|
93
|
+
const out = flattenSubAgentMessages([
|
|
94
|
+
delegating('tc1', 'UI Builder', [
|
|
95
|
+
msg({
|
|
96
|
+
agentName: 'Planner',
|
|
97
|
+
model: 'claude-sonnet-4-6',
|
|
98
|
+
inputTokens: 100,
|
|
99
|
+
outputTokens: 20,
|
|
100
|
+
cost: 0.5,
|
|
101
|
+
}),
|
|
102
|
+
]),
|
|
103
|
+
]);
|
|
104
|
+
const child = out[1];
|
|
105
|
+
assert.is(child.model, 'claude-sonnet-4-6');
|
|
106
|
+
assert.is(child.inputTokens, 100);
|
|
107
|
+
assert.is(child.outputTokens, 20);
|
|
108
|
+
assert.is(child.cost, 0.5);
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
suite('composes the breadcrumb for nested sub-agents', () => {
|
|
112
|
+
const grandchild = [msg({ agentName: 'Grounding', content: 'deep' })];
|
|
113
|
+
const childTrace = [delegating('tc2', 'Planner', grandchild)];
|
|
114
|
+
const out = flattenSubAgentMessages([delegating('tc1', 'UI Builder', childTrace)]);
|
|
115
|
+
|
|
116
|
+
// top: UI Builder | depth1: Planner | depth2: Grounding
|
|
117
|
+
const depth1 = out.find((e) => e.subAgentDepth === 1)!;
|
|
118
|
+
const depth2 = out.find((e) => e.subAgentDepth === 2)!;
|
|
119
|
+
assert.is(depth1.agentName, 'UI Builder › Planner');
|
|
120
|
+
assert.is(depth2.agentName, 'UI Builder › Planner › Grounding');
|
|
121
|
+
assert.is(depth2.subAgentName, 'Grounding');
|
|
122
|
+
assert.is(depth2.subAgentOf, 'tc2');
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
suite('correlates two sub-agents spawned in the same parent turn', () => {
|
|
126
|
+
const parent = msg({
|
|
127
|
+
agentName: 'UI Builder',
|
|
128
|
+
toolCalls: [
|
|
129
|
+
{
|
|
130
|
+
id: 'a',
|
|
131
|
+
name: 'requestSubAgent',
|
|
132
|
+
args: {},
|
|
133
|
+
subAgentTrace: [msg({ agentName: 'Planner', content: 'p' })],
|
|
134
|
+
},
|
|
135
|
+
{
|
|
136
|
+
id: 'b',
|
|
137
|
+
name: 'requestSubAgent',
|
|
138
|
+
args: {},
|
|
139
|
+
subAgentTrace: [msg({ agentName: 'Reviewer', content: 'r' })],
|
|
140
|
+
},
|
|
141
|
+
],
|
|
142
|
+
});
|
|
143
|
+
const out = flattenSubAgentMessages([parent]);
|
|
144
|
+
const planner = out.find((e) => e.subAgentName === 'Planner')!;
|
|
145
|
+
const reviewer = out.find((e) => e.subAgentName === 'Reviewer')!;
|
|
146
|
+
assert.is(planner.subAgentOf, 'a');
|
|
147
|
+
assert.is(reviewer.subAgentOf, 'b');
|
|
148
|
+
assert.is(planner.agentName, 'UI Builder › Planner');
|
|
149
|
+
assert.is(reviewer.agentName, 'UI Builder › Reviewer');
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
suite('falls back gracefully when the spawning message has no agentName', () => {
|
|
153
|
+
const out = flattenSubAgentMessages([
|
|
154
|
+
delegating('tc1', undefined as unknown as string, [
|
|
155
|
+
msg({ agentName: 'Planner', content: 'x' }),
|
|
156
|
+
]),
|
|
157
|
+
]);
|
|
158
|
+
// No parent name to prefix → show the sub-agent's own name unbreadcrumbed.
|
|
159
|
+
assert.is(out[1].agentName, 'Planner');
|
|
160
|
+
assert.is(out[1].subAgentName, 'Planner');
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
suite.run();
|