npm - @genesislcap/ai-assistant - Versions diffs - 14.452.0 → 14.452.1 - Mend

@genesislcap/ai-assistant 14.452.0 → 14.452.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/dist/ai-assistant.api.json +74 -3
package/dist/ai-assistant.d.ts +62 -4
package/dist/dts/components/chat-driver/chat-driver.d.ts +60 -3
package/dist/dts/components/chat-driver/chat-driver.d.ts.map +1 -1
package/dist/dts/main/main.d.ts +1 -1
package/dist/dts/state/debug-event-log.d.ts +1 -1
package/dist/dts/state/debug-event-log.d.ts.map +1 -1
package/dist/esm/components/chat-driver/chat-driver.js +215 -43
package/dist/esm/components/chat-driver/chat-driver.test.js +134 -4
package/dist/esm/main/main.js +1 -1
package/dist/esm/state/debug-event-log.js +2 -1
package/docs/migration-GENC-1312.md +176 -0
package/docs/sub_agent.md +35 -15
package/package.json +16 -16
package/src/components/chat-driver/chat-driver.test.ts +187 -4
package/src/components/chat-driver/chat-driver.ts +247 -51
package/src/main/main.ts +1 -1
package/src/state/debug-event-log.ts +3 -1

package/docs/migration-GENC-1312.md ADDED Viewed

@@ -0,0 +1,176 @@
+# Migration Guide — GENC-1312 (Sub-agents must return via a tool call)
+Sub-agents now **always finish by calling a tool** and hand a **typed result** back
+to the caller. Previously a sub-agent could also end its turn with a plain-text
+answer, and `requestSubAgent` would return that text as a `string` fallback. That
+fallback is gone: sub-agents run with tool use forced, and `requestSubAgent`
+resolves to a discriminated union — either the structured result, or a typed
+failure reason.
+This affects you **only if your tool handlers call `requestSubAgent`**. Agents that
+declare no sub-agents, top-level agent behaviour, `completeSubAgent`, and
+`SubAgentRequestOptions` are all unchanged.
+> The sub-agent API is `@beta`. This is a deliberate, documented breaking change
+> on that surface.
+---
+## 1. `requestSubAgent` returns a discriminated union
+The return type changed from `Promise<T | string>` to:
+```ts
+Promise<{ ok: true; result: T } | { ok: false; reason: SubAgentFailureReason }>
+```
+```ts
+type SubAgentFailureReason =
+  | 'max_iterations'        // loop ended without the completion tool being called
+  | 'malformed_tool_call'   // provider returned unparseable tool calls after retries
+  | 'empty_response'        // model returned an empty response after retries
+  | 'unknown_tool_limit';   // model repeatedly called tools it doesn't have
+```
+`SubAgentFailureReason` is exported from `@genesislcap/foundation-ai`.
+### Before
+```ts
+const handlers: ChatToolHandlers<typeof extractorAgent> = {
+  process_file: async (args, context) => {
+    if (!context.requestSubAgent) {
+      return { error: 'Sub-agent support is not available in this context.' };
+    }
+    const { file_name } = args as { file_name: string };
+    const result = await context.requestSubAgent<ExtractedData>('extractor', {
+      task: `Extract all rows from "${file_name}".`,
+    });
+    // result was `ExtractedData | string`
+    if (typeof result === 'string') {
+      // sub-agent finished with plain text — handle "gracefully"
+      return { error: result };
+    }
+    return result;
+  },
+};
+```
+### After
+```ts
+const handlers: ChatToolHandlers<typeof extractorAgent> = {
+  process_file: async (args, context) => {
+    if (!context.requestSubAgent) {
+      return { error: 'Sub-agent support is not available in this context.' };
+    }
+    const { file_name } = args as { file_name: string };
+    const outcome = await context.requestSubAgent<ExtractedData>('extractor', {
+      task: `Extract all rows from "${file_name}".`,
+    });
+    if (!outcome.ok) {
+      // The sub-agent didn't complete. Decide how to recover — typically
+      // early-return the issue back to *this* agent so it can retry, ask the
+      // user for help, or call a planner tool again.
+      return {
+        error: `Couldn't extract from "${file_name}" (${outcome.reason}). Ask the user to retry or try a different file.`,
+      };
+    }
+    return outcome.result; // fully typed as ExtractedData
+  },
+};
+```
+### Mechanical migration
+- `if (typeof result === 'string')` → `if (!outcome.ok)`.
+- The success value moves from `result` to `outcome.result`.
+- There is no longer an untyped string success path. If you called
+  `requestSubAgent` **without** a type parameter, the success payload is now
+  `never` — pass `<T>` to describe the result your completion tool returns.
+---
+## 2. Sub-agents must finish by calling a tool
+Sub-agents now run with tool use forced every turn (Anthropic
+`tool_choice: { type: 'any' }`, Gemini `functionCallingConfig.mode: 'ANY'`). A
+sub-agent can no longer end a turn with a free-text answer — the only clean way
+for it to finish is to call a tool whose handler invokes `completeSubAgent`.
+**What you must check:** every sub-agent declares a completion tool (a normal tool
+whose handler calls `context.completeSubAgent(result)`), and its prompt directs
+the model to call it when done. A sub-agent with no completion tool can never
+finish and will fail with `reason: 'max_iterations'` on every run.
+This is the same `completeSubAgent` mechanism as before — its signature and the
+per-agent schema you attach to your completion tool are unchanged. You keep full
+control of the returned shape; only the *fallback* behaviour was removed.
+> If a sub-agent's natural output is prose (e.g. a drafted paragraph), return it
+> through the completion tool's payload — `completeSubAgent({ text })` — rather
+> than relying on a free-text turn. Conversational, user-facing flows belong to
+> top-level / stateful agents, not sub-agents.
+---
+## 3. Handling failures
+A sub-agent failure is **not** surfaced to the user from inside the sub-agent
+anymore — no apology message is appended. Instead the failure is returned to your
+tool handler as `{ ok: false, reason }`, and **you decide** what happens next. The
+recommended pattern is to early-return the issue information to the parent agent
+so it can choose a recovery path:
+```ts
+const outcome = await context.requestSubAgent<PlannedData>('planner', { task });
+if (!outcome.ok) {
+  return {
+    error: `Planning didn't complete (${outcome.reason}). Ask the user for the missing details, or try again.`,
+  };
+}
+```
+The parent agent then sees that tool result and reacts (retry, re-plan, ask the
+user) like any other tool outcome. Each failure is also recorded in the debug log
+as a `subagent.failed` meta event (with the agent name and reason) plus the
+existing `turn.error` entry, now tagged `isSubAgent: true`.
+---
+## 4. New `ChatRequestOptions.toolChoice` (additive — no action needed)
+`ChatRequestOptions` gained an optional `toolChoice?: 'auto' | 'required'`. The
+built-in Anthropic and Gemini transports translate `'required'` into the
+provider's force-a-tool-call setting. This is additive and defaults to `'auto'`
+(may-call), so existing code is unaffected.
+If you maintain a **custom `ChatTransport`**, you can ignore `toolChoice` (the
+field is optional). To support forced tool use in sub-agent loops, map
+`'required'` to your provider's equivalent and only apply it when tools are
+present.
+> `'required'` is incompatible with Anthropic extended/adaptive thinking — a
+> request must not enable both. The built-in chat transport never sets `thinking`,
+> so they don't collide.
+---
+## Quick reference checklist
+- [ ] For every `requestSubAgent` call: replace `typeof result === 'string'` with
+      `!outcome.ok`, and read the success value from `outcome.result`.
+- [ ] Pass a type parameter (`requestSubAgent<T>(...)`) so the success payload is
+      typed.
+- [ ] On `!outcome.ok`, return the issue back to the parent agent (or otherwise
+      recover) — don't assume a string result.
+- [ ] Confirm every sub-agent has a completion tool that calls `completeSubAgent`,
+      and that its prompt tells the model to call it when finished.
+- [ ] If a sub-agent produced user-facing prose via a final text turn, move that
+      text into the completion tool's payload.
+- [ ] Custom `ChatTransport` only: optionally honour `toolChoice: 'required'`.

package/docs/sub_agent.md CHANGED Viewed

@@ -62,13 +62,17 @@ Tool handlers receive `requestSubAgent` on their context object alongside `reque
 const processTradeFile = async (args, context) => {
   const { file_name } = args as { file_name: string };
-  const result = await context.requestSubAgent('trade_file_extractor', {
+  const outcome = await context.requestSubAgent('trade_file_extractor', {
     task: `Extract all trade rows from the attached file named "${file_name}".`,
   });
-  // result is either the structured value from completeSubAgent, or the
-  // sub-agent's final assistant message text as a string fallback.
-  return result;
+  // `outcome` is a discriminated union — either the structured value from
+  // completeSubAgent, or a typed failure reason. Decide how to recover; here we
+  // hand the issue back to this agent.
+  if (!outcome.ok) {
+    return { error: `Extraction didn't complete (${outcome.reason}).` };
+  }
+  return outcome.result;
 };
 ```
@@ -108,7 +112,9 @@ interface SubAgentRequestOptions {
 ## Returning structured results: `completeSubAgent`
-By default, `requestSubAgent` returns the sub-agent's final assistant message text as a `string`. For structured data, define a completion tool on the sub-agent and call `completeSubAgent` from its handler:
+Sub-agents run with **tool use forced**, so a sub-agent can only end a turn by
+calling a tool — it cannot return a free-text answer. To finish, define a
+completion tool on the sub-agent and call `completeSubAgent` from its handler:
 ```ts
 const toolHandlers: ChatToolHandlers = {
@@ -120,26 +126,40 @@ const toolHandlers: ChatToolHandlers = {
 };
 ```
-When `completeSubAgent` is called, the sub-agent's tool loop exits and `requestSubAgent` resolves with the value passed to `completeSubAgent`. If `completeSubAgent` is never called, the sub-agent runs to natural completion and returns its final text.
+When `completeSubAgent` is called, the sub-agent's tool loop exits and `requestSubAgent` resolves with `{ ok: true, result }`. If the loop ends without `completeSubAgent` ever being called (it hit the iteration cap, or the provider repeatedly returned malformed/empty/unknown tool calls), `requestSubAgent` resolves with `{ ok: false, reason }` instead — there is no plain-text fallback.
-The return type of `requestSubAgent<T>` is `T | string`:
-- `T` — when the sub-agent called `completeSubAgent(result)`. Fully typed, no `JSON.parse`.
-- `string` — when the sub-agent finished naturally without calling `completeSubAgent`.
+> Every sub-agent must declare a completion tool and be prompted to call it when done. A sub-agent with no completion tool can never finish and will always resolve with `reason: 'max_iterations'`.
-With `T` defaulting to `never`, `T | string` collapses to `string` when no type parameter is provided — untyped callers get a plain string with no extra ceremony.
+The return type of `requestSubAgent<T>` is:
 ```ts
-const result = await context.requestSubAgent<ExtractedTrades>('trade_file_extractor', {
+Promise<{ ok: true; result: T } | { ok: false; reason: SubAgentFailureReason }>;
+type SubAgentFailureReason =
+  | 'max_iterations'
+  | 'malformed_tool_call'
+  | 'empty_response'
+  | 'unknown_tool_limit';
+```
+Branch on `ok` in the calling handler and decide how to recover — typically by handing the issue back to the parent agent:
+```ts
+const outcome = await context.requestSubAgent<ExtractedTrades>('trade_file_extractor', {
   task: `Extract all trades from "${file_name}".`,
 });
-if (typeof result === 'string') {
-  // handle gracefully — sub-agent returned plain text
-} else {
-  const { rows } = result; // fully typed
+if (!outcome.ok) {
+  // sub-agent didn't complete — let this agent retry or ask the user
+  return { error: `Extraction didn't complete (${outcome.reason}).` };
 }
+const { rows } = outcome.result; // fully typed
 ```
+Pass a type parameter so `outcome.result` is typed; without one it defaults to `never`.
+> Migrating from the old `T | string` return? See [`migration-GENC-1312.md`](./migration-GENC-1312.md).
 ---
 ## TypeScript types

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@genesislcap/ai-assistant",
   "description": "Genesis AI Assistant micro-frontend",
-  "version": "14.452.0",
+  "version": "14.452.1",
   "license": "SEE LICENSE IN license.txt",
   "main": "dist/esm/index.js",
   "types": "dist/ai-assistant.d.ts",
@@ -64,24 +64,24 @@
     }
   },
   "devDependencies": {
-    "@genesislcap/foundation-testing": "14.452.0",
-    "@genesislcap/genx": "14.452.0",
-    "@genesislcap/rollup-builder": "14.452.0",
-    "@genesislcap/ts-builder": "14.452.0",
-    "@genesislcap/uvu-playwright-builder": "14.452.0",
-    "@genesislcap/vite-builder": "14.452.0",
-    "@genesislcap/webpack-builder": "14.452.0",
+    "@genesislcap/foundation-testing": "14.452.1",
+    "@genesislcap/genx": "14.452.1",
+    "@genesislcap/rollup-builder": "14.452.1",
+    "@genesislcap/ts-builder": "14.452.1",
+    "@genesislcap/uvu-playwright-builder": "14.452.1",
+    "@genesislcap/vite-builder": "14.452.1",
+    "@genesislcap/webpack-builder": "14.452.1",
     "@types/dompurify": "^3.0.5",
     "@types/marked": "^5.0.2"
   },
   "dependencies": {
-    "@genesislcap/foundation-ai": "14.452.0",
-    "@genesislcap/foundation-logger": "14.452.0",
-    "@genesislcap/foundation-redux": "14.452.0",
-    "@genesislcap/foundation-ui": "14.452.0",
-    "@genesislcap/foundation-utils": "14.452.0",
-    "@genesislcap/rapid-design-system": "14.452.0",
-    "@genesislcap/web-core": "14.452.0",
+    "@genesislcap/foundation-ai": "14.452.1",
+    "@genesislcap/foundation-logger": "14.452.1",
+    "@genesislcap/foundation-redux": "14.452.1",
+    "@genesislcap/foundation-ui": "14.452.1",
+    "@genesislcap/foundation-utils": "14.452.1",
+    "@genesislcap/rapid-design-system": "14.452.1",
+    "@genesislcap/web-core": "14.452.1",
     "dompurify": "^3.3.1",
     "marked": "^17.0.3"
   },
@@ -93,5 +93,5 @@
   "publishConfig": {
     "access": "public"
   },
-  "gitHead": "f2d34cef52ecea070247f239e9274d9d7bb74784"
+  "gitHead": "57cd7afd42c9a1554432603c66e4e5f750c3dc08"
 }

package/src/components/chat-driver/chat-driver.test.ts CHANGED Viewed

@@ -32,19 +32,24 @@ import { ChatDriver } from './chat-driver';
 interface ScriptedProvider extends AIProvider {
   /** Tool names advertised to the model on each `chat()` call, in order. */
   advertisedPerCall: string[][];
+  /** `toolChoice` seen on each `chat()` call, in order (sub-agents force it). */
+  toolChoicePerCall: Array<'auto' | 'required' | undefined>;
 }
 const scriptedProvider = (responses: ChatMessage[]): ScriptedProvider => {
   const queue = [...responses];
   const advertisedPerCall: string[][] = [];
+  const toolChoicePerCall: Array<'auto' | 'required' | undefined> = [];
   return {
     advertisedPerCall,
+    toolChoicePerCall,
     chat: async (
       _history: ChatMessage[],
       _userMessage: string,
       options?: ChatRequestOptions,
     ): Promise<ChatMessage> => {
       advertisedPerCall.push((options?.tools ?? []).map((t) => t.name));
+      toolChoicePerCall.push(options?.toolChoice);
       // Once the script is exhausted, end the turn with a plain text reply.
       return queue.shift() ?? { role: 'assistant', content: 'done' };
     },
@@ -281,11 +286,11 @@ stale('splits stale vs hallucinated tools on the unknown-tool-limit error', asyn
         : { tool_b: async () => 'b done' },
   });
-  // One real call to advance to B, then 5 consecutive stale calls — the 5th
-  // trips DEFAULT_MAX_UNKNOWN_TOOL_CALLS and ends the turn.
+  // One real call to advance to B, then 10 consecutive stale calls — the 10th
+  // trips the stale ceiling (MAX_STALE_TOOL_CALLS, 2x the hallucination limit) and ends the turn.
   const provider = scriptedProvider([
     callsTool('tool_a', 'real'),
-    ...Array.from({ length: 5 }, (_unused, i) => callsTool('tool_a', `stale-${i}`)),
+    ...Array.from({ length: 10 }, (_unused, i) => callsTool('tool_a', `stale-${i}`)),
   ]);
   const driver = makeDriver(config, provider, sessionKey);
@@ -303,7 +308,7 @@ stale('splits stale vs hallucinated tools on the unknown-tool-limit error', asyn
   // Every stale attempt — not just the final limit error — is in the download log.
   assert.is(
     unresolvedEvents(sessionKey).filter((d) => d.kind === 'stale').length,
-    5,
+    10,
     'each stale attempt should be recorded as its own tool.unresolved event',
   );
@@ -313,3 +318,181 @@ stale('splits stale vs hallucinated tools on the unknown-tool-limit error', asyn
 });
 stale.run();
+// ---------------------------------------------------------------------------
+// sub-agents — forced tool use + typed completion/failure union (GENC-1312)
+//
+// A child sub-agent driver shares the parent's provider registry, so one
+// scripted queue drives both: script the parent's delegating turn, then the
+// worker's turn(s), in order.
+// ---------------------------------------------------------------------------
+const subagent = createLogicSuite('ChatDriver sub-agents');
+subagent.after(() => {
+  // Safe to call again even if `stale` already closed it — close() is
+  // idempotent and cross-tab publishes are guarded by `&& this.channel`.
+  agenticActivityBus.close();
+});
+/** A sub-agent named `worker` that finishes by calling `completeSubAgent`. */
+const completingWorker = (result: unknown): AgentConfig =>
+  agent({
+    name: 'worker',
+    toolDefinitions: [def('finish')],
+    toolHandlers: {
+      finish: async (_args, ctx) => {
+        ctx.completeSubAgent?.(result);
+        return 'finished';
+      },
+    },
+  });
+/** A parent that delegates to `worker` and reports the outcome via `capture`. */
+const delegatingParent = (sub: AgentConfig, capture: (outcome: unknown) => void): AgentConfig =>
+  agent({
+    name: 'boss',
+    subAgents: [sub],
+    toolDefinitions: [def('delegate')],
+    toolHandlers: {
+      delegate: async (_args, ctx) => {
+        const outcome = await ctx.requestSubAgent!('worker', { task: 'do it' });
+        capture(outcome);
+        return outcome.ok ? 'sub-agent completed' : `sub-agent failed: ${outcome.reason}`;
+      },
+    },
+  });
+subagent('resolves { ok: true, result } when the sub-agent calls completeSubAgent', async () => {
+  let outcome: unknown;
+  const parent = delegatingParent(completingWorker({ value: 42 }), (o) => {
+    outcome = o;
+  });
+  const provider = scriptedProvider([
+    callsTool('delegate', 'd1'), // parent delegates to the worker
+    callsTool('finish', 'f1'), //   worker completes
+  ]);
+  await makeDriver(parent, provider).sendMessage('go');
+  assert.equal(outcome, { ok: true, result: { value: 42 } });
+});
+subagent('forces tool use on the sub-agent turn but not the parent turn', async () => {
+  const parent = delegatingParent(completingWorker({ done: true }), () => {});
+  const provider = scriptedProvider([callsTool('delegate', 'd1'), callsTool('finish', 'f1')]);
+  await makeDriver(parent, provider).sendMessage('go');
+  // Call 0 is the parent's turn (may-call); call 1 is the worker's turn (must-call).
+  assert.is(provider.toolChoicePerCall[0], undefined, 'parent turn is not forced');
+  assert.is(provider.toolChoicePerCall[1], 'required', 'sub-agent turn forces a tool call');
+  assert.ok(
+    provider.advertisedPerCall[1].includes('finish'),
+    'the worker advertised its completion tool',
+  );
+});
+subagent(
+  'resolves { ok: false, reason } and records telemetry when the sub-agent never completes',
+  async () => {
+    const sessionKey = 'subagent-unknown-tool-test';
+    clearMetaEventRegistry();
+    let outcome: unknown;
+    const worker = agent({
+      name: 'worker',
+      toolDefinitions: [def('real')],
+      toolHandlers: { real: async () => 'ok' },
+    });
+    const parent = delegatingParent(worker, (o) => {
+      outcome = o;
+    });
+    // The worker repeatedly calls a tool it was never given, tripping the
+    // unknown-tool limit (DEFAULT_MAX_UNKNOWN_TOOL_CALLS = 5) without completing.
+    const provider = scriptedProvider([
+      callsTool('delegate', 'd1'),
+      ...Array.from({ length: 5 }, (_unused, i) => callsTool('made_up', `u${i}`)),
+    ]);
+    await makeDriver(parent, provider, sessionKey).sendMessage('go');
+    assert.equal(outcome, { ok: false, reason: 'unknown_tool_limit' });
+    // The failure surfaces as a high-importance `subagent.failed` meta event,
+    // recorded under the PARENT driver's session so it lands on the user-visible
+    // debug-log timeline — not orphaned in the child's own session bucket.
+    assert.ok(
+      getMetaEvents(sessionKey).some(
+        (e) =>
+          e.type === 'subagent.failed' &&
+          e.detail?.agent === 'worker' &&
+          e.detail?.reason === 'unknown_tool_limit',
+      ),
+      'a subagent.failed meta event should be recorded under the parent session',
+    );
+    assert.not.ok(
+      getMetaEvents('').some((e) => e.type === 'subagent.failed'),
+      'the failure must not be orphaned in the child default session bucket',
+    );
+  },
+);
+subagent(
+  'defaults to { ok: false, reason: "max_iterations" } when the sub-agent ends without completing',
+  async () => {
+    const sessionKey = 'subagent-default-fail-test';
+    clearMetaEventRegistry();
+    let outcome: unknown;
+    const worker = agent({
+      name: 'worker',
+      toolDefinitions: [def('noop')],
+      toolHandlers: { noop: async () => 'ok' },
+    });
+    const parent = delegatingParent(worker, (o) => {
+      outcome = o;
+    });
+    // No script for the worker turn → it returns a plain-text reply and ends
+    // without ever calling a completion tool (the child records no explicit
+    // failure reason).
+    const provider = scriptedProvider([callsTool('delegate', 'd1')]);
+    await makeDriver(parent, provider, sessionKey).sendMessage('go');
+    assert.equal(outcome, { ok: false, reason: 'max_iterations' });
+    // Even the defensive default is reported to the parent session — this is the
+    // only telemetry path when the child recorded no explicit failure.
+    assert.ok(
+      getMetaEvents(sessionKey).some(
+        (e) => e.type === 'subagent.failed' && e.detail?.reason === 'max_iterations',
+      ),
+      'the default failure should still record a subagent.failed meta event',
+    );
+  },
+);
+subagent(
+  "forwards the sub-agent's turns onto the parent timeline, numbered under the activating turn",
+  async () => {
+    const parent = delegatingParent(completingWorker({ done: true }), () => {});
+    const provider = scriptedProvider([callsTool('delegate', 'd1'), callsTool('finish', 'f1')]);
+    const driver = makeDriver(parent, provider);
+    await driver.sendMessage('go');
+    const snaps = driver.getTurnSnapshots();
+    // Parent turn 0 activated the sub-agent, so the worker's single turn is "0-1".
+    const childSnap = snaps.find((s) => s.turnIndex === '0-1');
+    assert.ok(childSnap, 'the sub-agent\'s turn should be forwarded as "0-1"');
+    assert.is(childSnap!.agentName, 'worker', 'the forwarded snapshot keeps the sub-agent name');
+    assert.ok(childSnap!.toolNames.includes('finish'), 'and records the tools the sub-agent saw');
+    // The parent's own turns stay numeric.
+    assert.ok(
+      snaps.some((s) => s.turnIndex === '0'),
+      'the activating parent turn is present as a bare string counter',
+    );
+  },
+);
+subagent.run();