npm - @genesislcap/ai-assistant - Versions diffs - 14.467.1 → 14.467.2 - Mend

@genesislcap/ai-assistant 14.467.1 → 14.467.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

package/dist/ai-assistant.api.json +39 -53
package/dist/ai-assistant.d.ts +20 -25
package/dist/dts/components/chat-driver/chat-driver.d.ts.map +1 -1
package/dist/dts/index.d.ts +1 -0
package/dist/dts/index.d.ts.map +1 -1
package/dist/dts/main/main.d.ts +1 -20
package/dist/dts/main/main.d.ts.map +1 -1
package/dist/dts/state/debug-event-log.d.ts +16 -0
package/dist/dts/state/debug-event-log.d.ts.map +1 -1
package/dist/dts/state/debug-event-log.test.d.ts +2 -0
package/dist/dts/state/debug-event-log.test.d.ts.map +1 -0
package/dist/dts/utils/flatten-sub-agent-messages.d.ts +51 -0
package/dist/dts/utils/flatten-sub-agent-messages.d.ts.map +1 -0
package/dist/dts/utils/flatten-sub-agent-messages.test.d.ts +2 -0
package/dist/dts/utils/flatten-sub-agent-messages.test.d.ts.map +1 -0
package/dist/dts/utils/strip-agent-handlers.d.ts +29 -0
package/dist/dts/utils/strip-agent-handlers.d.ts.map +1 -0
package/dist/dts/utils/strip-agent-handlers.test.d.ts +2 -0
package/dist/dts/utils/strip-agent-handlers.test.d.ts.map +1 -0
package/dist/esm/components/chat-driver/chat-driver.js +48 -12
package/dist/esm/components/chat-driver/chat-driver.test.js +29 -0
package/dist/esm/main/main.js +14 -38
package/dist/esm/state/debug-event-log.js +47 -0
package/dist/esm/state/debug-event-log.test.js +67 -0
package/dist/esm/utils/flatten-sub-agent-messages.js +49 -0
package/dist/esm/utils/flatten-sub-agent-messages.test.js +139 -0
package/dist/esm/utils/strip-agent-handlers.js +51 -0
package/dist/esm/utils/strip-agent-handlers.test.js +81 -0
package/dist/tsconfig.tsbuildinfo +1 -1
package/package.json +16 -16
package/src/components/chat-driver/chat-driver.test.ts +43 -0
package/src/components/chat-driver/chat-driver.ts +64 -10
package/src/index.ts +1 -0
package/src/main/main.ts +16 -37
package/src/state/debug-event-log.test.ts +89 -0
package/src/state/debug-event-log.ts +48 -0
package/src/utils/flatten-sub-agent-messages.test.ts +163 -0
package/src/utils/flatten-sub-agent-messages.ts +88 -0
package/src/utils/strip-agent-handlers.test.ts +99 -0
package/src/utils/strip-agent-handlers.ts +52 -0

package/src/components/chat-driver/chat-driver.test.ts CHANGED Viewed

@@ -824,6 +824,49 @@ subagent(
   },
 );
+subagent(
+  "folds the sub-agent's high-value meta events into the parent session, breadcrumbed",
+  async () => {
+    const sessionKey = 'subagent-meta-harvest-test';
+    clearMetaEventRegistry();
+    const worker = agent({
+      name: 'worker',
+      toolDefinitions: [def('real')],
+      toolHandlers: { real: async () => 'ok' },
+    });
+    const parent = delegatingParent(worker, () => {});
+    // The worker trips the unknown-tool limit — emitting tool.unresolved + a
+    // turn.error under its own transient session, which the parent harvests.
+    const provider = scriptedProvider([
+      callsTool('delegate', 'd1'),
+      ...Array.from({ length: 5 }, (_unused, i) => callsTool('made_up', `u${i}`)),
+    ]);
+    await makeDriver(parent, provider, sessionKey).sendMessage('go');
+    const events = getMetaEvents(sessionKey);
+    // The sub-agent's turn.error is folded onto the parent timeline, breadcrumbed.
+    const harvestedError = events.find(
+      (e) => e.type === 'turn.error' && e.detail?.subAgent === 'boss › worker',
+    );
+    assert.ok(harvestedError, "the sub-agent's turn.error is folded in, breadcrumbed");
+    assert.is(harvestedError!.detail?.reason, 'unknown-tool-limit');
+    // ... as are its per-attempt tool.unresolved signals.
+    assert.ok(
+      events.some((e) => e.type === 'tool.unresolved' && e.detail?.subAgent === 'boss › worker'),
+      "the sub-agent's tool.unresolved events are folded in too",
+    );
+    // High-volume, message-derivable events are NOT merged.
+    assert.not.ok(
+      events.some((e) => e.type === 'turn.start' && e.detail?.subAgent === 'boss › worker'),
+      'turn.start is intentionally excluded from the harvest',
+    );
+    // The child's transient bucket is cleared, never polluting the empty-key sink.
+    assert.is(getMetaEvents('').length, 0);
+  },
+);
 subagent(
   'defaults to { ok: false, reason: "max_iterations" } when the sub-agent ends without completing',
   async () => {

package/src/components/chat-driver/chat-driver.ts CHANGED Viewed

@@ -31,7 +31,15 @@ import type {
   UnresolvedToolInput,
 } from '../../config/config';
 import { resolveChatProvider } from '../../config/validate-providers';
-import { recordMetaEvent, recordTurnError, recordTurnRetry } from '../../state/debug-event-log';
+import {
+  clearSession,
+  getMetaEvents,
+  mergeMetaEvents,
+  type MetaEventType,
+  recordMetaEvent,
+  recordTurnError,
+  recordTurnRetry,
+} from '../../state/debug-event-log';
 import { applyHistoryCap } from '../../utils/history-transform';
 import { logger } from '../../utils/logger';
 import { TOOL_FOLD_SYMBOL, type ToolFold } from '../../utils/tool-fold';
@@ -64,6 +72,23 @@ const MAX_EMPTY_RESPONSE_RETRIES = 3;
 const MAX_SETUP_TRANSPORT_RETRIES = 3;
 const SUGGESTIONS_HISTORY_WINDOW = 8;
+/**
+ * Sub-agent meta events worth folding into the parent's debug timeline: the
+ * per-attempt and per-failure signals that do NOT otherwise surface in the
+ * sub-agent's (now hoisted) messages — a retried-away malformed/empty attempt
+ * produces no message, and the stale-vs-hallucinated/streak diagnostics live only
+ * on the event. High-volume, message-derivable events (turn.start/turn.end,
+ * provider.selected, context.updated) are intentionally excluded: read the
+ * sub-agent's hoisted messages for model/tokens/cost and turn-by-turn activity.
+ * See `ChatDriver.invokeSubAgent`.
+ */
+const HARVESTED_SUBAGENT_EVENTS: ReadonlySet<MetaEventType> = new Set([
+  'turn.retry',
+  'turn.error',
+  'tool.failed',
+  'tool.unresolved',
+]);
 /** Name reserved for the cross-agent handoff tool — injected by OrchestratingDriver. */
 export const REQUEST_CONTINUATION_TOOL = 'request_continuation';
@@ -1288,7 +1313,23 @@ export class ChatDriver extends EventTarget implements AiDriver {
       ...(subConfig.primerHistory ?? []),
     ];
-    const child = new ChatDriver(this.providerRegistry);
+    // Unique per-invocation id — reused for the lifecycle event bracket below —
+    // and a child session key derived from it. The child files its meta events
+    // under this own bucket (rather than the shared empty-key sink), so they can
+    // be harvested into THIS session on completion and then discarded.
+    const invocationId = crypto.randomUUID();
+    const childSessionKey = `${this.sessionKey}::sub:${invocationId}`;
+    const child = new ChatDriver(
+      this.providerRegistry,
+      {},
+      [],
+      undefined,
+      undefined,
+      undefined,
+      undefined,
+      undefined,
+      childSessionKey,
+    );
     // Mark before the first turn so the child forces tool use and reports a
     // typed failure (rather than user-facing text) if it never completes.
     child.markAsSubAgent();
@@ -1330,9 +1371,6 @@ export class ChatDriver extends EventTarget implements AiDriver {
     child.addEventListener('history-updated', forwardTrace);
     child.addEventListener('provider-changed', forwardProviderChanged);
-    // Unique per-invocation id so listeners can pair start/stop reliably even
-    // when the same sub-agent runs multiple times in parallel.
-    const invocationId = crypto.randomUUID();
     const chatInputDuringExecution = options?.chatInputDuringExecution;
     const lifecycleDetail = { name, invocationId, chatInputDuringExecution };
@@ -1389,14 +1427,30 @@ export class ChatDriver extends EventTarget implements AiDriver {
       // settled) lifecycle, so the snapshot/completion reads below still work.
       child.dispose();
       this.dispatchEvent(new CustomEvent('sub-agent-stop', { detail: lifecycleDetail }));
+      // Capture the child's diagnostics into THIS session, then ALWAYS discard its
+      // transient bucket — done in the `finally` so an unexpected `sendMessage`
+      // rejection (which propagates out of this method) can't orphan the bucket in
+      // the registry, and a crashed sub-agent still leaves its turns/events behind.
+      // Forward the child's per-LLM-call snapshots so they show as `kind:'turn'`
+      // entries in the exported debug log, re-numbered under the activating parent turn.
+      this.forwardSubAgentSnapshots(child.getTurnSnapshots());
+      // Fold the sub-agent's high-value meta events (retries/errors/tool failures —
+      // see HARVESTED_SUBAGENT_EVENTS) into THIS session, preserving their original
+      // timestamps so they interleave within the subagent.started→completed/failed
+      // bracket. Each is breadcrumbed `"<parent> › <sub-agent>"`, composing for a
+      // nested sub-agent whose own breadcrumb the child already merged.
+      const parentName = this.activeAgentName ?? '?';
+      const harvested = getMetaEvents(childSessionKey)
+        .filter((e) => HARVESTED_SUBAGENT_EVENTS.has(e.type))
+        .map((e) => {
+          const existing = e.detail?.subAgent as string | undefined;
+          return { ...e, detail: { ...e.detail, subAgent: `${parentName} › ${existing ?? name}` } };
+        });
+      mergeMetaEvents(this.sessionKey, harvested);
+      clearSession(childSessionKey);
     }
     const trace = child.getHistory() as ChatMessage[];
-    // Forward the child's per-LLM-call snapshots onto this (parent) driver's
-    // buffer so they show as `kind:'turn'` entries in the exported debug log,
-    // re-numbered under the activating parent turn. Runs for both success and
-    // failure so the sub-agent's turns are always visible.
-    this.forwardSubAgentSnapshots(child.getTurnSnapshots());
     if (timedOut) {
       // Same failure shape as any other non-completion — the parent handler

package/src/index.ts CHANGED Viewed

@@ -12,5 +12,6 @@ export * from './config/config';
 export * from './config/define-stateful-agent';
 export * from './config/fallback-agents';
 export * from './utils/tool-fold';
+export type { TimelineMessage } from './utils/flatten-sub-agent-messages';
 export type { AiChatWidget } from './types/ai-chat-widget';
 export { AiChatMarkdown } from './components/chat-markdown/chat-markdown';

package/src/main/main.ts CHANGED Viewed

@@ -70,8 +70,10 @@ import {
 import { ChatSuggestions } from '../suggestions/chat-suggestions';
 import { AnimatedPanelToggle } from '../utils/animated-panel-toggle';
 import { resolveExclusiveLoadingStyle } from '../utils/animation-exclusivity';
+import { flattenSubAgentMessages } from '../utils/flatten-sub-agent-messages';
 import { logger } from '../utils/logger';
 import { filterVisibleMessages, trailingInteractionRow } from '../utils/message-partition';
+import { stripAgentHandlers } from '../utils/strip-agent-handlers';
 import { sumCosts } from '../utils/sum-costs';
 import { expandToolTree } from '../utils/tool-fold';
 import { styles } from './main.styles';
@@ -150,36 +152,6 @@ avoidTreeShaking(
   AgentPicker,
 );
-/**
- * Recursively strips non-serializable fields from an agent before storing in
- * Redux. Drops **every function-valued property** — `toolHandlers`, the
- * lifecycle/dispatch hooks (`onActivate`, `onDeactivate`, `getDebugSnapshot`,
- * `onUnresolvedTool`), and the function form of the per-turn resolvers
- * (`systemPrompt`, `toolDefinitions`, `displayName`, `provider`, `temperature`,
- * `toolChoice`). Static forms (string / number / array / plain object) pass
- * through unchanged; `subAgents` are stripped recursively.
- *
- * Filtering by *value* (any function) rather than by an explicit field list
- * means a new function-valued field added to `AgentConfig` is handled
- * automatically and can never leak a live function into serialized store
- * state — no denylist to keep in sync. The live config on the driver stays the
- * source of truth; the slice only holds this serializable projection, and
- * functions are never read back from it.
- */
-function stripHandlers(agent: AgentConfig): Omit<AgentConfig, 'toolHandlers'> {
-  const serializable: Record<string, unknown> = {};
-  for (const [key, value] of Object.entries(agent)) {
-    // `subAgents` is handled separately (recursively, below); drop everything
-    // function-valued.
-    if (key === 'subAgents' || typeof value === 'function') continue;
-    serializable[key] = value;
-  }
-  if (agent.subAgents?.length) {
-    serializable.subAgents = agent.subAgents.map(stripHandlers);
-  }
-  return serializable as unknown as Omit<AgentConfig, 'toolHandlers'>;
-}
 /**
  * Foundation AI Assistant component.
  *
@@ -375,7 +347,9 @@ export class FoundationAiAssistant extends GenesisElement {
     // and Redux serializable-state middleware will warn. toolHandlers are never read
     // back from the store; they are always sourced from this.agents when the driver
     // is built.
-    this._sessionRef?.actions.aiAssistant.setActiveAgent(value ? stripHandlers(value) : undefined);
+    this._sessionRef?.actions.aiAssistant.setActiveAgent(
+      value ? stripAgentHandlers(value) : undefined,
+    );
   }
   get suggestionsState(): SuggestionsState {
@@ -1587,9 +1561,13 @@ export class FoundationAiAssistant extends GenesisElement {
       turn: 1,
       message: 2,
     };
+    // Sub-agent conversations are stored nested on the parent tool call's
+    // `subAgentTrace`; `flattenSubAgentMessages` hoists them to top-level
+    // `kind: 'message'` entries (breadcrumbed + correlated, the nested copy moved
+    // out — not duplicated) so the timeline reads as one chronological sequence.
     const messages = this.driver?.getRawHistory?.() ?? this.messages;
     const timeline = [
-      ...messages.map((m) => ({ kind: 'message' as const, ...m })),
+      ...flattenSubAgentMessages(messages),
       ...turns,
       ...(stateKey ? getMetaEvents(stateKey) : []).map((e) => ({ kind: 'event' as const, ...e })),
     ].sort((a, b) => {
@@ -1609,12 +1587,13 @@ export class FoundationAiAssistant extends GenesisElement {
       meta: {
         timestamp,
         host: window.location.host,
-        // stripHandlers drops every function-valued field (handlers, lifecycle
-        // hooks, onUnresolvedTool, function-form resolvers) and recurses
-        // subAgents — no manual exclusion list to keep in sync. We only override
-        // toolDefinitions afterwards to expand the fold tree for the log.
+        // stripAgentHandlers drops every function-valued field (handlers, lifecycle
+        // hooks, onUnresolvedTool, function-form resolvers) AND object handler bags
+        // like an object-form `toolHandlers`, and recurses subAgents — no manual
+        // exclusion list to keep in sync. We only override toolDefinitions
+        // afterwards to expand the fold tree for the log.
         agentSummary: this.agents?.map((a) => ({
-          ...stripHandlers(a),
+          ...stripAgentHandlers(a),
           toolDefinitions: Array.isArray(a.toolDefinitions)
             ? typeof a.toolHandlers === 'function'
               ? // Static defs + dynamic handlers — can't walk fold tree

package/src/state/debug-event-log.test.ts ADDED Viewed

@@ -0,0 +1,89 @@
+import { assert, createLogicSuite } from '@genesislcap/foundation-testing';
+import {
+  clearMetaEventRegistry,
+  clearSession,
+  getMetaEvents,
+  type MetaEvent,
+  mergeMetaEvents,
+  recordMetaEvent,
+} from './debug-event-log';
+const event = (overrides: Partial<MetaEvent> = {}): MetaEvent => ({
+  index: 0,
+  timestamp: '2026-06-19T16:00:00.000Z',
+  type: 'turn.retry',
+  importance: 'normal',
+  ...overrides,
+});
+const suite = createLogicSuite('debug-event-log merge/clear');
+suite('mergeMetaEvents is a no-op for an empty batch', () => {
+  clearMetaEventRegistry();
+  mergeMetaEvents('k', []);
+  assert.is(getMetaEvents('k').length, 0);
+});
+suite('mergeMetaEvents preserves each event timestamp and re-indexes monotonically', () => {
+  clearMetaEventRegistry();
+  mergeMetaEvents('k', [
+    event({ index: 7, timestamp: '2026-06-19T16:00:01.000Z', detail: { a: 1 } }),
+    event({ index: 99, timestamp: '2026-06-19T16:00:02.000Z', detail: { b: 2 } }),
+  ]);
+  const out = getMetaEvents('k');
+  assert.is(out.length, 2);
+  // original timestamps kept (not re-stamped) ...
+  assert.is(out[0].timestamp, '2026-06-19T16:00:01.000Z');
+  assert.is(out[1].timestamp, '2026-06-19T16:00:02.000Z');
+  // ... indices re-assigned from the target's counter, not carried from source.
+  assert.is(out[0].index, 0);
+  assert.is(out[1].index, 1);
+  assert.equal(out[0].detail, { a: 1 });
+});
+suite('mergeMetaEvents appends after existing events with a continuing index', () => {
+  clearMetaEventRegistry();
+  recordMetaEvent('k', 'turn.start'); // index 0, real timestamp
+  mergeMetaEvents('k', [event({ type: 'turn.error', importance: 'high' })]);
+  const out = getMetaEvents('k');
+  assert.is(out.length, 2);
+  assert.is(out[0].type, 'turn.start');
+  assert.is(out[1].type, 'turn.error');
+  assert.is(out[1].index, 1); // continues the monotonic counter
+});
+suite('mergeMetaEvents into a fresh key creates the bucket', () => {
+  clearMetaEventRegistry();
+  mergeMetaEvents('brand-new', [event()]);
+  assert.is(getMetaEvents('brand-new').length, 1);
+});
+suite('mergeMetaEvents evicts oldest non-high events past the cap but keeps every high', () => {
+  clearMetaEventRegistry();
+  const highStamp = '2026-06-19T16:30:00.000Z';
+  // One `high` failure first, then a flood of `normal` events well past the cap.
+  const batch: MetaEvent[] = [
+    event({ type: 'turn.error', importance: 'high', timestamp: highStamp }),
+  ];
+  for (let i = 0; i < 2000; i += 1) {
+    batch.push(event({ type: 'turn.retry', importance: 'normal' }));
+  }
+  mergeMetaEvents('k', batch);
+  const out = getMetaEvents('k');
+  // Eviction ran (bounded below what we merged) ...
+  assert.ok(out.length < 2001);
+  // ... and the lone high-importance failure was never dropped.
+  assert.is(out.filter((e) => e.importance === 'high').length, 1);
+  assert.ok(out.some((e) => e.timestamp === highStamp));
+});
+suite('clearSession drops one key without touching others', () => {
+  clearMetaEventRegistry();
+  recordMetaEvent('child', 'turn.retry');
+  recordMetaEvent('parent', 'turn.start');
+  clearSession('child');
+  assert.is(getMetaEvents('child').length, 0);
+  assert.is(getMetaEvents('parent').length, 1);
+});
+suite.run();

package/src/state/debug-event-log.ts CHANGED Viewed

@@ -237,6 +237,52 @@ export function getMetaEvents(key: string): ReadonlyArray<MetaEvent> {
   return registry.get(key)?.events ?? [];
 }
+/**
+ * Merge pre-built meta events into the timeline for `targetKey`, **preserving each
+ * event's original `timestamp`** (so it interleaves chronologically on export)
+ * while re-indexing onto the target buffer's monotonic counter. Used to fold a
+ * sub-agent's harvested events into the parent session — see
+ * `ChatDriver.invokeSubAgent`. Unlike {@link recordMetaEvent} it does not stamp a
+ * fresh timestamp; the same non-`high` eviction policy is applied once after the
+ * batch so the buffer stays bounded.
+ */
+export function mergeMetaEvents(targetKey: string, events: readonly MetaEvent[]): void {
+  if (events.length === 0) return;
+  let buffer = registry.get(targetKey);
+  if (!buffer) {
+    buffer = { events: [], next: 0 };
+    registry.set(targetKey, buffer);
+  }
+  for (const event of events) {
+    buffer.events.push({ ...event, index: buffer.next });
+    buffer.next += 1;
+  }
+  // Evict oldest non-`high` events until back under the cap — same policy as
+  // recordMetaEvent; `high` events (failures/limits) are never dropped. Single
+  // pass: take the overflow count, then drop that many of the oldest non-`high`
+  // events in order (if there aren't enough non-`high`, the buffer floats above
+  // the cap rather than losing a failure signal).
+  let toEvict = buffer.events.length - DEFAULT_MAX_META_EVENTS;
+  if (toEvict > 0) {
+    buffer.events = buffer.events.filter((e) => {
+      if (toEvict > 0 && e.importance !== 'high') {
+        toEvict -= 1;
+        return false;
+      }
+      return true;
+    });
+  }
+}
+/**
+ * Drop the entire timeline for `key`. Used to discard a sub-agent's transient
+ * per-invocation session once its events have been harvested into the parent, so
+ * the module registry doesn't accumulate one orphaned bucket per sub-agent run.
+ */
+export function clearSession(key: string): void {
+  registry.delete(key);
+}
 /**
  * Human/agent-facing guide emitted as the first key of the exported debug log,
  * so whoever opens the JSON (often an AI agent) knows how to read it without
@@ -248,11 +294,13 @@ export const DEBUG_LOG_README: readonly string[] = [
   '`timeline` is the entire session as one array, already sorted chronologically by `timestamp` (ISO 8601). Every entry has a `kind`.',
   'Timestamps are millisecond-resolution; entries that share the same millisecond are ordered by a fixed kind rank (event, then turn, then message), which is a heuristic and may not reflect exact causal order within that millisecond — e.g. a user message and the turn it triggered, or a final assistant message and its turn.end event, can appear in either order depending on whether they landed in the same millisecond. Read the logical structure of a turn rather than over-interpreting the micro-ordering of co-timestamped entries of different kinds.',
   "kind:'message' — the conversation. `role` is user/assistant/tool/system-event/synthetic-user; `agentName` says which agent produced it; `toolCalls`/`toolResult`/`interaction` carry tool and widget activity; `inputTokens`/`outputTokens`/`cost` are per-message LLM usage, and `externalCostUsd` is any non-LLM cost a widget reported for its own external service calls (folded into the session cost total alongside `cost`). On model-produced assistant messages, `model` is the concrete model id that generated it (e.g. 'gemini-2.5-flash-lite') and `providerName` is the registry slot it resolved under (e.g. a tier name like 'high'/'low', or the default); together they attribute the message — and any tool calls it carries — to an exact model even across a mid-session vendor/tier switch, where one slot name can map to different models before and after the switch. Both are undefined on any entry that is NOT an LLM response: non-assistant roles (user/tool/system-event) and 'synthetic-user' echoes; assistant interaction/widget entries (empty content carrying an `interaction` — a rendered widget, not a model turn); driver-authored assistant fallbacks (the timeout, repeated-malformed-call, and empty-response apology messages); and messages restored from a session persisted before these fields existed. One partial case: on a genuine model turn whose provider exposes no `getStatus` (or reports no model), `providerName` is still set but `model` alone is undefined. A 'synthetic-user' message is a display-only echo of an interaction outcome (e.g. the answer a widget reported): it renders on the user's side of the chat and `agentName` is the agent that created it, but it is never sent to the LLM — so it has no matching 'turn' and the model learns the outcome only from the corresponding tool result.",
+  "Sub-agent messages appear inline. When a tool delegates to a sub-agent (via `requestSubAgent`), the sub-agent's whole conversation — its own assistant/tool messages, each with their own `content`/`thinking`/`toolCalls`/`toolResult` and per-message `model`/`providerName`/`inputTokens`/`outputTokens`/`cost` — is hoisted into the timeline as ordinary kind:'message' entries, interleaved by timestamp right after the tool call that spawned them (so you read the delegation top-to-bottom). A hoisted entry is marked: `subAgentDepth` is its delegation depth (1 for a sub-agent, 2 for a sub-agent's sub-agent, …), `subAgentOf` is the id of the parent tool call that spawned it (correlates it back even when two sub-agents run in one parent turn), `subAgentName` is the sub-agent's own name, and `agentName` is rewritten to a `\"<parent> › <sub-agent>\"` breadcrumb (composing when nested, e.g. `\"UI Builder › Planner › Grounding\"`). The sub-agent's per-LLM-call snapshots also surface as kind:'turn' entries with an N-M `turnIndex`, and subagent.started/completed (or subagent.failed) events bracket the run. Per-message `cost` on hoisted entries is already part of the session total (it is summed from the un-flattened history), so summing the top-level timeline does NOT double-count.",
   "kind:'turn' — one LLM call. `turnIndex` is a string: a top-level turn is the bare counter ('0', '1', …); a sub-agent's turns are numbered under the parent turn that activated them ('3-1', '3-2', …, and a nested sub-agent contributes '3-2-1', …), and `agentName` names the agent that ran the turn. `systemPrompt` and `toolNames` are what the model saw. A systemPrompt of '<repeated — identical to turn N>' was byte-identical to turn N and de-duplicated; the full prompt is shown whenever it changes (often because a stateful agent advanced), so prompt evolution is visible.",
   "kind:'turn'.`agentSnapshot` — the active agent's own view of its internal state, captured at that turn. An agent opts into this by exposing a `getDebugSnapshot()` that returns JSON-serializable per-state info; stateful/flow agents wire it automatically, so you can watch a flow advance turn-by-turn (e.g. current step, cursor, collected fields, pending changes). Absent for agents that don't expose one.",
   "kind:'event' — a meta/lifecycle event. `type` names it (see below); `detail` carries structured data. `detail.placement` is the emitting UI instance: 'bubble' (collapsed), 'panel' (popped-out), or 'standalone'.",
   "Each 'event' also has an `importance`: 'high' (failures/limits — turn.error, tool.failed, subagent.failed, file.read-failed, suggestions.failed, context.threshold-crossed), 'normal' (session flow — connects, turns, retries, handoffs, agent/provider changes, interactions, sub-agent start/complete), or 'low' (skippable UI/bookkeeping noise — panel.toggled, attachment.added, driver.wired/unwired, context.updated). To skim, ignore importance:'low'; to triage a failure, filter to importance:'high' then read the nearby messages and turns. A 'high' turn.error is often preceded by one or more 'normal' turn.retry events for the same reason — read them together to see how many attempts were made before bailing. 'message' and 'turn' entries carry no importance — they are the substance, always read them.",
   'Event types: assistant.connected/disconnected (mount + placement + whether the session was created or restored), assistant.popout/popin (window placement), driver.created/wired/unwired (which driver is live and why it stops/starts responding across a popout), state.changed (idle↔loading), turn.start/turn.end (turn boundary; turn.end carries durationMs), turn.retry (a recoverable in-turn retry — detail.reason plus attempt/maxAttempts; for malformed calls also finishMessage; for empty responses also the provider finishReason + thoughtsTokens + parts breakdown), turn.error (a turn failed or hit a guardrail — detail.reason is one of exception/malformed-function-call/empty-response/unknown-tool-limit/max-iterations, plus reason-specific diagnostics: attempts (for empty-response also finishReason + thoughtsTokens + a parts breakdown, distinguishing a thinking-only STOP from a truly empty turn), finishMessage, unknownTools (split into staleTools — real earlier this activation but retired by the current state or hidden behind an open exclusive fold — and hallucinatedTools — never advertised) + availableTools, iterations + limit, or name + message for exceptions), tool.failed (a tool threw), tool.unresolved (the model called a tool that could not be dispatched — detail.kind is folded/fold-hidden/stale/unknown, plus tool + agent and, for the counted kinds, the consecutive streak; the recurring lead-up to an unknown-tool-limit turn.error), subagent.started/completed/failed (the lifecycle of a `requestSubAgent` delegation — detail.agent names the sub-agent; these bracket the sub-agent turns that appear as kind:turn entries with an N-M `turnIndex`; subagent.failed also carries detail.reason, one of max_iterations/malformed_tool_call/empty_response/unknown_tool_limit/timeout), agent.handoff (routing; from=null is the initial activation), agent.pinned/unpinned (forced routing), provider.selected (model/provider for the upcoming turns), interaction.requested/resolved (blocking user widgets — explain quiet gaps; note that when a sub-agent opens a widget, detail.agent — and the agentName on the interaction message — is the HOST agent that owns the widget, NOT the sub-agent that asked, because widgets render and resolve on the host driver), context.updated/threshold-crossed (token + cost), panel.toggled, attachment.added, file.read-failed, suggestions.failed.',
+  'Sub-agent meta events: a sub-agent\'s own turn.retry/turn.error/tool.failed/tool.unresolved events are merged into this same timeline, tagged with `detail.subAgent` — a `"<parent> › <sub-agent>"` breadcrumb that composes when nested (e.g. `"UI Builder › Planner › Grounding"`) — and interleaved by their original timestamps within the subagent.started→completed/failed bracket. These are the per-attempt/per-failure signals that do NOT appear among the sub-agent\'s (hoisted) messages: a malformed/empty attempt that gets retried produces no message, and the stale-vs-hallucinated split and streak counts live only on the event. A sub-agent\'s high-volume, message-derivable events (turn.start/turn.end, provider.selected, context.updated) are intentionally NOT merged — read its hoisted messages for model/tokens/cost and turn-by-turn activity, and the bracketing subagent.* events for the run\'s span.',
   "`meta` holds context captured at export time: agentSummary (full agent configs), context (active model, token usage, session cost), activeDebugSnapshot (the active agent's `getDebugSnapshot()` taken fresh at export — reflects state NOW, which may have advanced beyond the last turn's agentSnapshot), debug (optional host-supplied debug state), host, and the export timestamp.",
   'To debug a failure: find the last turn.error or tool.failed, then read upward for the user message, the turn(s), and the agent/provider/state events that led into it.',
 ];

package/src/utils/flatten-sub-agent-messages.test.ts ADDED Viewed

@@ -0,0 +1,163 @@
+import type { ChatMessage } from '@genesislcap/foundation-ai';
+import { assert, createLogicSuite } from '@genesislcap/foundation-testing';
+import { flattenSubAgentMessages } from './flatten-sub-agent-messages';
+const msg = (overrides: Partial<ChatMessage> = {}): ChatMessage => ({
+  role: 'assistant',
+  content: '',
+  ...overrides,
+});
+/** A parent assistant message whose single tool call delegated to a sub-agent. */
+const delegating = (id: string, agentName: string, trace: ChatMessage[]): ChatMessage =>
+  msg({
+    agentName,
+    toolCalls: [{ id, name: 'requestSubAgent', args: {}, subAgentTrace: trace }],
+  });
+const suite = createLogicSuite('flattenSubAgentMessages');
+suite('returns an empty array for an empty list', () => {
+  assert.equal(flattenSubAgentMessages([]), []);
+});
+suite('emits top-level messages unchanged and tags none as sub-agent', () => {
+  const out = flattenSubAgentMessages([
+    msg({ role: 'user', content: 'hi', agentName: 'UI Builder' }),
+    msg({ content: 'hello', agentName: 'UI Builder' }),
+  ]);
+  assert.is(out.length, 2);
+  assert.is(out[0].kind, 'message');
+  assert.is(out[0].agentName, 'UI Builder');
+  // No depth/marker fields on top-level entries.
+  assert.is(out[0].subAgentDepth, undefined);
+  assert.is(out[0].subAgentName, undefined);
+  assert.is(out[1].subAgentOf, undefined);
+});
+suite('hoists a sub-agent trace to top-level entries after the spawning message', () => {
+  const trace = [
+    msg({
+      agentName: 'UI Architecture Planner',
+      content: '',
+      toolCalls: [{ id: 'g', name: 'grep_source', args: {} }],
+    }),
+    msg({
+      role: 'tool',
+      agentName: 'UI Architecture Planner',
+      toolResult: { toolCallId: 'g', content: 'result' },
+    }),
+  ];
+  const out = flattenSubAgentMessages([
+    delegating('tc1', 'UI Builder', trace),
+    msg({
+      role: 'tool',
+      agentName: 'UI Builder',
+      toolResult: { toolCallId: 'tc1', content: 'plan done' },
+    }),
+  ]);
+  // parent tool-call msg, then 2 hoisted children, then the parent tool-result msg.
+  assert.is(out.length, 4);
+  assert.is(out[0].agentName, 'UI Builder'); // parent unchanged
+  assert.is(out[1].agentName, 'UI Builder › UI Architecture Planner');
+  assert.is(out[2].agentName, 'UI Builder › UI Architecture Planner');
+  assert.is(out[3].agentName, 'UI Builder'); // parent tool-result, back at top level
+});
+suite('marks hoisted entries with depth, raw name, and the spawning tool-call id', () => {
+  const out = flattenSubAgentMessages([
+    delegating('tc1', 'UI Builder', [msg({ agentName: 'UI Architecture Planner', content: 'x' })]),
+  ]);
+  const child = out[1];
+  assert.is(child.subAgentDepth, 1);
+  assert.is(child.subAgentName, 'UI Architecture Planner');
+  assert.is(child.subAgentOf, 'tc1');
+});
+suite(
+  'moves the trace out — the emitted parent tool call no longer carries it (no duplication)',
+  () => {
+    const trace = [msg({ agentName: 'Planner', content: 'only once' })];
+    const out = flattenSubAgentMessages([delegating('tc1', 'UI Builder', trace)]);
+    // The trace is stripped from the emitted tool call ...
+    assert.is(out[0].toolCalls![0].subAgentTrace, undefined);
+    // ... and surfaces exactly once, as a hoisted top-level entry.
+    const hoisted = out.filter((e) => e.subAgentDepth === 1);
+    assert.is(hoisted.length, 1);
+    assert.is(hoisted[0].content, 'only once');
+  },
+);
+suite('preserves per-message usage fields on hoisted entries', () => {
+  const out = flattenSubAgentMessages([
+    delegating('tc1', 'UI Builder', [
+      msg({
+        agentName: 'Planner',
+        model: 'claude-sonnet-4-6',
+        inputTokens: 100,
+        outputTokens: 20,
+        cost: 0.5,
+      }),
+    ]),
+  ]);
+  const child = out[1];
+  assert.is(child.model, 'claude-sonnet-4-6');
+  assert.is(child.inputTokens, 100);
+  assert.is(child.outputTokens, 20);
+  assert.is(child.cost, 0.5);
+});
+suite('composes the breadcrumb for nested sub-agents', () => {
+  const grandchild = [msg({ agentName: 'Grounding', content: 'deep' })];
+  const childTrace = [delegating('tc2', 'Planner', grandchild)];
+  const out = flattenSubAgentMessages([delegating('tc1', 'UI Builder', childTrace)]);
+  // top: UI Builder | depth1: Planner | depth2: Grounding
+  const depth1 = out.find((e) => e.subAgentDepth === 1)!;
+  const depth2 = out.find((e) => e.subAgentDepth === 2)!;
+  assert.is(depth1.agentName, 'UI Builder › Planner');
+  assert.is(depth2.agentName, 'UI Builder › Planner › Grounding');
+  assert.is(depth2.subAgentName, 'Grounding');
+  assert.is(depth2.subAgentOf, 'tc2');
+});
+suite('correlates two sub-agents spawned in the same parent turn', () => {
+  const parent = msg({
+    agentName: 'UI Builder',
+    toolCalls: [
+      {
+        id: 'a',
+        name: 'requestSubAgent',
+        args: {},
+        subAgentTrace: [msg({ agentName: 'Planner', content: 'p' })],
+      },
+      {
+        id: 'b',
+        name: 'requestSubAgent',
+        args: {},
+        subAgentTrace: [msg({ agentName: 'Reviewer', content: 'r' })],
+      },
+    ],
+  });
+  const out = flattenSubAgentMessages([parent]);
+  const planner = out.find((e) => e.subAgentName === 'Planner')!;
+  const reviewer = out.find((e) => e.subAgentName === 'Reviewer')!;
+  assert.is(planner.subAgentOf, 'a');
+  assert.is(reviewer.subAgentOf, 'b');
+  assert.is(planner.agentName, 'UI Builder › Planner');
+  assert.is(reviewer.agentName, 'UI Builder › Reviewer');
+});
+suite('falls back gracefully when the spawning message has no agentName', () => {
+  const out = flattenSubAgentMessages([
+    delegating('tc1', undefined as unknown as string, [
+      msg({ agentName: 'Planner', content: 'x' }),
+    ]),
+  ]);
+  // No parent name to prefix → show the sub-agent's own name unbreadcrumbed.
+  assert.is(out[1].agentName, 'Planner');
+  assert.is(out[1].subAgentName, 'Planner');
+});
+suite.run();