npm - @rudderjs/ai - Versions diffs - 1.4.0 → 1.6.0 - Mend

@rudderjs/ai 1.4.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (179) hide show

package/README.md +484 -7
package/boost/guidelines.md +62 -2
package/boost/skills/ai-tools/SKILL.md +14 -5
package/dist/agent.d.ts +66 -15
package/dist/agent.d.ts.map +1 -1
package/dist/agent.js +529 -58
package/dist/agent.js.map +1 -1
package/dist/budget/pricing.d.ts +124 -0
package/dist/budget/pricing.d.ts.map +1 -0
package/dist/budget/pricing.js +175 -0
package/dist/budget/pricing.js.map +1 -0
package/dist/budget/storage.d.ts +104 -0
package/dist/budget/storage.d.ts.map +1 -0
package/dist/budget/storage.js +0 -0
package/dist/budget/storage.js.map +1 -0
package/dist/budget/with-budget.d.ts +119 -0
package/dist/budget/with-budget.d.ts.map +1 -0
package/dist/budget/with-budget.js +175 -0
package/dist/budget/with-budget.js.map +1 -0
package/dist/budget-orm/index.d.ts +96 -0
package/dist/budget-orm/index.d.ts.map +1 -0
package/dist/budget-orm/index.js +177 -0
package/dist/budget-orm/index.js.map +1 -0
package/dist/commands/ai-eval.d.ts +93 -0
package/dist/commands/ai-eval.d.ts.map +1 -0
package/dist/commands/ai-eval.js +378 -0
package/dist/commands/ai-eval.js.map +1 -0
package/dist/computer-use/actions.d.ts +214 -0
package/dist/computer-use/actions.d.ts.map +1 -0
package/dist/computer-use/actions.js +48 -0
package/dist/computer-use/actions.js.map +1 -0
package/dist/computer-use/errors.d.ts +57 -0
package/dist/computer-use/errors.d.ts.map +1 -0
package/dist/computer-use/errors.js +76 -0
package/dist/computer-use/errors.js.map +1 -0
package/dist/computer-use/index.d.ts +53 -0
package/dist/computer-use/index.d.ts.map +1 -0
package/dist/computer-use/index.js +51 -0
package/dist/computer-use/index.js.map +1 -0
package/dist/computer-use/playwright.d.ts +76 -0
package/dist/computer-use/playwright.d.ts.map +1 -0
package/dist/computer-use/playwright.js +270 -0
package/dist/computer-use/playwright.js.map +1 -0
package/dist/computer-use/tool.d.ts +154 -0
package/dist/computer-use/tool.d.ts.map +1 -0
package/dist/computer-use/tool.js +210 -0
package/dist/computer-use/tool.js.map +1 -0
package/dist/eval/fixtures.d.ts +65 -0
package/dist/eval/fixtures.d.ts.map +1 -0
package/dist/eval/fixtures.js +110 -0
package/dist/eval/fixtures.js.map +1 -0
package/dist/eval/html-reporter.d.ts +25 -0
package/dist/eval/html-reporter.d.ts.map +1 -0
package/dist/eval/html-reporter.js +209 -0
package/dist/eval/html-reporter.js.map +1 -0
package/dist/eval/index.d.ts +271 -0
package/dist/eval/index.d.ts.map +1 -0
package/dist/eval/index.js +510 -0
package/dist/eval/index.js.map +1 -0
package/dist/eval/json-reporter.d.ts +43 -0
package/dist/eval/json-reporter.d.ts.map +1 -0
package/dist/eval/json-reporter.js +40 -0
package/dist/eval/json-reporter.js.map +1 -0
package/dist/fake.d.ts +36 -1
package/dist/fake.d.ts.map +1 -1
package/dist/fake.js +49 -2
package/dist/fake.js.map +1 -1
package/dist/file-search.d.ts +168 -0
package/dist/file-search.d.ts.map +1 -0
package/dist/file-search.js +158 -0
package/dist/file-search.js.map +1 -0
package/dist/handoff.d.ts +95 -0
package/dist/handoff.d.ts.map +1 -0
package/dist/handoff.js +78 -0
package/dist/handoff.js.map +1 -0
package/dist/index.d.ts +29 -5
package/dist/index.d.ts.map +1 -1
package/dist/index.js +22 -2
package/dist/index.js.map +1 -1
package/dist/mcp/client-tools.d.ts +39 -0
package/dist/mcp/client-tools.d.ts.map +1 -0
package/dist/mcp/client-tools.js +147 -0
package/dist/mcp/client-tools.js.map +1 -0
package/dist/mcp/index.d.ts +16 -0
package/dist/mcp/index.d.ts.map +1 -0
package/dist/mcp/index.js +15 -0
package/dist/mcp/index.js.map +1 -0
package/dist/mcp/server-from-agent.d.ts +24 -0
package/dist/mcp/server-from-agent.d.ts.map +1 -0
package/dist/mcp/server-from-agent.js +113 -0
package/dist/mcp/server-from-agent.js.map +1 -0
package/dist/mcp/types.d.ts +64 -0
package/dist/mcp/types.d.ts.map +1 -0
package/dist/mcp/types.js +6 -0
package/dist/mcp/types.js.map +1 -0
package/dist/memory-embedding/index.d.ts +121 -0
package/dist/memory-embedding/index.d.ts.map +1 -0
package/dist/memory-embedding/index.js +229 -0
package/dist/memory-embedding/index.js.map +1 -0
package/dist/memory-extract.d.ts +60 -0
package/dist/memory-extract.d.ts.map +1 -0
package/dist/memory-extract.js +163 -0
package/dist/memory-extract.js.map +1 -0
package/dist/memory-inject.d.ts +39 -0
package/dist/memory-inject.d.ts.map +1 -0
package/dist/memory-inject.js +135 -0
package/dist/memory-inject.js.map +1 -0
package/dist/memory-orm/index.d.ts +118 -0
package/dist/memory-orm/index.d.ts.map +1 -0
package/dist/memory-orm/index.js +187 -0
package/dist/memory-orm/index.js.map +1 -0
package/dist/memory.d.ts +55 -0
package/dist/memory.d.ts.map +1 -0
package/dist/memory.js +132 -0
package/dist/memory.js.map +1 -0
package/dist/observers.d.ts +22 -0
package/dist/observers.d.ts.map +1 -1
package/dist/observers.js.map +1 -1
package/dist/provider-tools.d.ts +15 -1
package/dist/provider-tools.d.ts.map +1 -1
package/dist/provider-tools.js +21 -1
package/dist/provider-tools.js.map +1 -1
package/dist/providers/anthropic.d.ts +9 -1
package/dist/providers/anthropic.d.ts.map +1 -1
package/dist/providers/anthropic.js +66 -11
package/dist/providers/anthropic.js.map +1 -1
package/dist/providers/bedrock.d.ts +60 -0
package/dist/providers/bedrock.d.ts.map +1 -0
package/dist/providers/bedrock.js +167 -0
package/dist/providers/bedrock.js.map +1 -0
package/dist/providers/elevenlabs.d.ts +98 -0
package/dist/providers/elevenlabs.d.ts.map +1 -0
package/dist/providers/elevenlabs.js +229 -0
package/dist/providers/elevenlabs.js.map +1 -0
package/dist/providers/google.d.ts +83 -1
package/dist/providers/google.d.ts.map +1 -1
package/dist/providers/google.js +491 -8
package/dist/providers/google.js.map +1 -1
package/dist/providers/openai.d.ts +8 -1
package/dist/providers/openai.d.ts.map +1 -1
package/dist/providers/openai.js +215 -5
package/dist/providers/openai.js.map +1 -1
package/dist/providers/openrouter.d.ts +43 -0
package/dist/providers/openrouter.d.ts.map +1 -0
package/dist/providers/openrouter.js +21 -0
package/dist/providers/openrouter.js.map +1 -0
package/dist/providers/voyage.d.ts +91 -0
package/dist/providers/voyage.d.ts.map +1 -0
package/dist/providers/voyage.js +166 -0
package/dist/providers/voyage.js.map +1 -0
package/dist/queue-job.d.ts +69 -4
package/dist/queue-job.d.ts.map +1 -1
package/dist/queue-job.js +114 -11
package/dist/queue-job.js.map +1 -1
package/dist/registry.d.ts +3 -1
package/dist/registry.d.ts.map +1 -1
package/dist/registry.js +10 -0
package/dist/registry.js.map +1 -1
package/dist/server/provider.d.ts.map +1 -1
package/dist/server/provider.js +38 -1
package/dist/server/provider.js.map +1 -1
package/dist/similarity-search.d.ts +163 -0
package/dist/similarity-search.d.ts.map +1 -0
package/dist/similarity-search.js +147 -0
package/dist/similarity-search.js.map +1 -0
package/dist/sub-agent-run-store.d.ts +40 -3
package/dist/sub-agent-run-store.d.ts.map +1 -1
package/dist/sub-agent-run-store.js.map +1 -1
package/dist/tool.d.ts +59 -0
package/dist/tool.d.ts.map +1 -1
package/dist/tool.js +45 -4
package/dist/tool.js.map +1 -1
package/dist/types.d.ts +285 -1
package/dist/types.d.ts.map +1 -1
package/dist/vector-stores/index.d.ts +96 -0
package/dist/vector-stores/index.d.ts.map +1 -0
package/dist/vector-stores/index.js +153 -0
package/dist/vector-stores/index.js.map +1 -0
package/package.json +43 -4

package/dist/agent.js CHANGED Viewed

@@ -1,9 +1,13 @@
 import { z } from 'zod';
 import { AiRegistry } from './registry.js';
-import { isPauseForClientToolsChunk, pauseForClientTools, toolDefinition, toolToSchema } from './tool.js';
+import { isPauseForApprovalChunk, isPauseForClientToolsChunk, pauseForApproval, pauseForClientTools, toolDefinition, toolToSchema } from './tool.js';
+import { isHandoffTool } from './handoff.js';
 import { attachmentsToContentParts, getMessageText } from './attachment.js';
 import { QueuedPromptBuilder } from './queue-job.js';
 import { resolveAutoPersistSpec, runWithPersistence, runWithPersistenceStreaming, } from './conversation-persistence.js';
+import { resolveRemembersSpec } from './memory.js';
+import { withMemoryInject } from './memory-inject.js';
+import { withMemoryExtract } from './memory-extract.js';
 import { runOnConfig, runOnChunk, runOnBeforeToolCall, runOnAfterToolCall, runSequential, runOnUsage, runOnAbort, runOnError, } from './middleware.js';
 // ─── AI Observer (lazy accessor) ─────────────────────────
 function _getAiObservers() {
@@ -110,6 +114,33 @@ export class Agent {
     conversational() {
         return false;
     }
+    /**
+     * Opt this agent class into per-user memory beyond conversation history
+     * (#A4). Returns a {@link RemembersSpec} naming the user whose memory
+     * the agent reads/writes, and how injection / extraction should behave.
+     * Returning `false` (the default) leaves the agent memory-stateless.
+     *
+     * Phase 1 wires the declaration + the per-call precedence chain so
+     * apps and downstream phases (auto-inject middleware in Phase 2,
+     * auto-extract middleware in Phase 3) can read a consistent spec.
+     * Calling this method directly today produces no runtime behavior
+     * unless application code reads it via `resolveRemembersSpec()`.
+     *
+     * **Precedence (high → low):**
+     * 1. Per-call `prompt(input, { memory: false | {...} })`
+     * 2. This method's return value
+     *
+     * Async returns are supported — useful when the user identity is fetched
+     * from an async DI binding.
+     *
+     * @example
+     * class SupportAgent extends Agent {
+     *   remembers() { return { user: ctx.user.id, inject: 'auto', tags: ['support'] } }
+     * }
+     */
+    remembers() {
+        return false;
+    }
     /**
      * Default for `AgentPromptOptions.parallelTools`. When `true` (default),
      * multiple tool calls within a single step run their `execute()` functions
@@ -119,11 +150,17 @@ export class Agent {
     parallelTools() { return true; }
     /** Run the agent with a prompt (non-streaming) */
     async prompt(input, options) {
-        const spec = await resolveAutoPersistSpec(() => this.conversational(), options?.conversation);
+        // Memory auto-cascade — appends inject (Phase 2) + extract (Phase 3)
+        // middlewares when `Agent.remembers()` opts in. Runs BEFORE
+        // conversation persistence so the persisted history flows in
+        // unchanged: inject only grows the system message; extract only
+        // fires onFinish.
+        const effOptions = await prepareOptionsWithMemoryAutoCascade(this, options);
+        const spec = await resolveAutoPersistSpec(() => this.conversational(), effOptions?.conversation);
         if (spec) {
-            return runWithPersistence(spec, this.constructor.name, resolveConversationStore, input, options, (effOptions) => runAgentLoop(this, input, effOptions));
+            return runWithPersistence(spec, this.constructor.name, resolveConversationStore, input, effOptions, (innerOptions) => runAgentLoop(this, input, innerOptions));
         }
-        return runAgentLoop(this, input, options);
+        return runAgentLoop(this, input, effOptions);
     }
     /** Run the agent with a prompt (streaming) */
     stream(input, options) {
@@ -184,6 +221,7 @@ export class Agent {
                     pendingToolCallIds: result.pendingClientToolCalls.map((tc) => tc.id),
                     stepsSoFar: result.steps.length,
                     tokensSoFar: result.usage?.totalTokens ?? 0,
+                    pauseKind: 'client_tool',
                 };
                 await suspendable.runStore.store(subRunId, snapshot);
                 yield { kind: 'subagent_paused', subRunId, pendingToolCallIds: snapshot.pendingToolCallIds };
@@ -191,6 +229,30 @@ export class Agent {
                 // Unreachable — the parent loop halts iteration after the pause chunk.
                 return undefined;
             }
+            if (suspendable &&
+                result.finishReason === 'tool_approval_required' &&
+                result.pendingApprovalToolCall) {
+                const subRunId = generateSubRunId();
+                const { toolCall: pendingCall, isClientTool } = result.pendingApprovalToolCall;
+                const snapshot = {
+                    messages: buildSubAgentSnapshotMessages(userPrompt, result),
+                    pendingToolCallIds: [pendingCall.id],
+                    stepsSoFar: result.steps.length,
+                    tokensSoFar: result.usage?.totalTokens ?? 0,
+                    pauseKind: 'approval',
+                    pendingApprovalToolCall: { toolCall: pendingCall, isClientTool },
+                };
+                await suspendable.runStore.store(subRunId, snapshot);
+                yield {
+                    kind: 'subagent_paused_approval',
+                    subRunId,
+                    toolCall: pendingCall,
+                    isClientTool,
+                };
+                yield pauseForApproval(pendingCall, isClientTool, subRunId);
+                // Unreachable — the parent loop halts iteration after the pause chunk.
+                return undefined;
+            }
             yield {
                 kind: 'agent_done',
                 steps: result.steps.length,
@@ -207,54 +269,96 @@ export class Agent {
             .modelOutput(modelOutput);
     }
     /**
-     * Resume a sub-agent run that previously paused with
-     * `pauseForClientTools` (typically from {@link Agent.asTool} with
-     * `suspendable: { runStore }` set). Loads the snapshot, validates the
-     * incoming tool-result ids against the pending set, and re-runs the
-     * inner loop with those results appended.
+     * Resume a sub-agent run that previously paused with either
+     * `pauseForClientTools` (client-tool pause) or `pauseForApproval`
+     * (approval pause), typically from {@link Agent.asTool} with
+     * `suspendable: { runStore }` set. The snapshot's `pauseKind`
+     * (default `'client_tool'`) selects the resume contract:
      *
-     * Returns either a `'completed'` result (the inner agent finished) or
+     * - **`client_tool`** — `clientToolResults` must carry one entry per
+     *   id in the snapshot's `pendingToolCallIds`. Results are appended
+     *   to the inner-agent message history and the loop re-runs.
+     * - **`approval`** — `approvedToolCallIds` and/or
+     *   `rejectedToolCallIds` must reference the single pending id.
+     *   `clientToolResults` must be empty; the loop re-runs with the
+     *   approval decision injected via `AgentPromptOptions`.
+     *
+     * Returns either a `'completed'` result (the inner agent finished),
      * a `'paused'` continuation pointing at a fresh `subRunId` for the
-     * next round-trip.
+     * next round-trip, or stays `'paused'` if the inner loop hits another
+     * gate. The resume can pause on a different kind than it started on
+     * (e.g. an approval pause that, once approved, hits a client-tool
+     * pause on the next step).
      *
-     * @example
+     * @example  Client-tool resume
      * const r = await Agent.resumeAsTool(subRunId, browserResults, { runStore, agent: subAgent })
-     * if (r.kind === 'completed') {
-     *   feedToolResultBackToParent(r.response.text)
-     * } else {
-     *   emitPendingClientToolsSse(r.subRunId, r.pendingToolCallIds)
-     * }
+     *
+     * @example  Approval resume
+     * const r = await Agent.resumeAsTool(subRunId, [], {
+     *   runStore, agent: subAgent,
+     *   approvedToolCallIds: ['inner-call-id'],
+     * })
      */
     static async resumeAsTool(subRunId, clientToolResults, options) {
         const snapshot = await options.runStore.consume(subRunId);
         if (!snapshot) {
             throw new Error(`[RudderJS AI] resumeAsTool: subRunId "${subRunId}" expired or never existed.`);
         }
-        // Forgery guard — every incoming tool-result id must be in the pending set.
+        const pauseKind = snapshot.pauseKind ?? 'client_tool';
         const pending = new Set(snapshot.pendingToolCallIds);
-        const seen = new Set();
-        for (const r of clientToolResults) {
-            if (!pending.has(r.toolCallId)) {
-                throw new Error(`[RudderJS AI] resumeAsTool: toolCallId "${r.toolCallId}" was not in the pending set.`);
+        let messages;
+        const promptOpts = { toolCallStreamingMode: 'stop-on-client-tool' };
+        if (pauseKind === 'client_tool') {
+            // Forgery guard — every incoming tool-result id must be in the pending set.
+            const seen = new Set();
+            for (const r of clientToolResults) {
+                if (!pending.has(r.toolCallId)) {
+                    throw new Error(`[RudderJS AI] resumeAsTool: toolCallId "${r.toolCallId}" was not in the pending set.`);
+                }
+                if (seen.has(r.toolCallId)) {
+                    throw new Error(`[RudderJS AI] resumeAsTool: duplicate result for toolCallId "${r.toolCallId}".`);
+                }
+                seen.add(r.toolCallId);
             }
-            if (seen.has(r.toolCallId)) {
-                throw new Error(`[RudderJS AI] resumeAsTool: duplicate result for toolCallId "${r.toolCallId}".`);
+            // Append client tool-result messages to the snapshot, in incoming order.
+            messages = [...snapshot.messages];
+            for (const r of clientToolResults) {
+                messages.push({
+                    role: 'tool',
+                    content: typeof r.result === 'string' ? r.result : JSON.stringify(r.result),
+                    toolCallId: r.toolCallId,
+                });
             }
-            seen.add(r.toolCallId);
-        }
-        // Append client tool-result messages to the snapshot, in incoming order.
-        const messages = [...snapshot.messages];
-        for (const r of clientToolResults) {
-            messages.push({
-                role: 'tool',
-                content: typeof r.result === 'string' ? r.result : JSON.stringify(r.result),
-                toolCallId: r.toolCallId,
-            });
-        }
-        const result = await options.agent.prompt('', {
-            messages,
-            toolCallStreamingMode: 'stop-on-client-tool',
-        });
+        }
+        else {
+            // Approval-pause resume — clientToolResults must be empty; either an
+            // approval or a rejection must be supplied for the pending id.
+            if (clientToolResults.length > 0) {
+                throw new Error('[RudderJS AI] resumeAsTool: snapshot.pauseKind === "approval" but clientToolResults was non-empty. Pass `approvedToolCallIds` or `rejectedToolCallIds` instead.');
+            }
+            const approved = options.approvedToolCallIds ?? [];
+            const rejected = options.rejectedToolCallIds ?? [];
+            for (const id of approved) {
+                if (!pending.has(id)) {
+                    throw new Error(`[RudderJS AI] resumeAsTool: approvedToolCallId "${id}" was not in the pending set.`);
+                }
+            }
+            for (const id of rejected) {
+                if (!pending.has(id)) {
+                    throw new Error(`[RudderJS AI] resumeAsTool: rejectedToolCallId "${id}" was not in the pending set.`);
+                }
+            }
+            if (approved.length === 0 && rejected.length === 0) {
+                throw new Error('[RudderJS AI] resumeAsTool: snapshot.pauseKind === "approval" requires `approvedToolCallIds` or `rejectedToolCallIds`.');
+            }
+            messages = [...snapshot.messages];
+            if (approved.length > 0)
+                promptOpts.approvedToolCallIds = approved;
+            if (rejected.length > 0)
+                promptOpts.rejectedToolCallIds = rejected;
+        }
+        promptOpts.messages = messages;
+        const result = await options.agent.prompt('', promptOpts);
         if (result.finishReason === 'client_tool_calls' &&
             result.pendingClientToolCalls?.length) {
             const newSubRunId = generateSubRunId();
@@ -263,13 +367,38 @@ export class Agent {
                 pendingToolCallIds: result.pendingClientToolCalls.map((tc) => tc.id),
                 stepsSoFar: snapshot.stepsSoFar + result.steps.length,
                 tokensSoFar: snapshot.tokensSoFar + (result.usage?.totalTokens ?? 0),
+                pauseKind: 'client_tool',
+                ...(snapshot.meta !== undefined ? { meta: snapshot.meta } : {}),
+            };
+            await options.runStore.store(newSubRunId, newSnapshot);
+            return {
+                kind: 'paused',
+                subRunId: newSubRunId,
+                pauseKind: 'client_tool',
+                pendingToolCallIds: newSnapshot.pendingToolCallIds,
+            };
+        }
+        if (result.finishReason === 'tool_approval_required' &&
+            result.pendingApprovalToolCall) {
+            const newSubRunId = generateSubRunId();
+            const { toolCall: pendingCall, isClientTool } = result.pendingApprovalToolCall;
+            const newSnapshot = {
+                messages: buildResumeSnapshotMessages(messages, result),
+                pendingToolCallIds: [pendingCall.id],
+                stepsSoFar: snapshot.stepsSoFar + result.steps.length,
+                tokensSoFar: snapshot.tokensSoFar + (result.usage?.totalTokens ?? 0),
+                pauseKind: 'approval',
+                pendingApprovalToolCall: { toolCall: pendingCall, isClientTool },
                 ...(snapshot.meta !== undefined ? { meta: snapshot.meta } : {}),
             };
             await options.runStore.store(newSubRunId, newSnapshot);
             return {
                 kind: 'paused',
                 subRunId: newSubRunId,
+                pauseKind: 'approval',
                 pendingToolCallIds: newSnapshot.pendingToolCallIds,
+                toolCall: pendingCall,
+                isClientTool,
             };
         }
         return { kind: 'completed', response: result };
@@ -277,9 +406,11 @@ export class Agent {
 }
 /**
  * Default projection from inner-agent stream chunks to {@link SubAgentUpdate}
- * events. Emits one `tool_call` per inner `tool-call` chunk; everything
+ * events. Emits one `tool_call` per inner `tool-call` chunk and
+ * `agent_pending_approval` per inner `pending-approval` chunk; everything
  * else is suppressed (the wrapping execute emits the `agent_start` /
- * `agent_done` bookends and the suspend path emits `subagent_paused`).
+ * `agent_done` bookends and the suspend paths emit `subagent_paused` /
+ * `subagent_paused_approval`).
  *
  * Hosts wanting different cadence (e.g. surfacing `text-delta` previews
  * or per-step usage) pass `streaming: chunk => …` and own the discriminator.
@@ -292,6 +423,13 @@ function defaultSubAgentProjector(chunk) {
             ...(chunk.toolCall.arguments ? { args: chunk.toolCall.arguments } : {}),
         };
     }
+    if (chunk.type === 'pending-approval' && chunk.toolCall && chunk.toolCall.id && chunk.toolCall.name) {
+        return {
+            kind: 'agent_pending_approval',
+            toolCall: chunk.toolCall,
+            isClientTool: !!chunk.isClientTool,
+        };
+    }
     return null;
 }
 /**
@@ -437,6 +575,21 @@ export function setConversationStore(store) {
 function resolveConversationStore() {
     return _conversationStore;
 }
+// ─── User Memory Registry (#A4) ──────────────────────────
+let _userMemory;
+/**
+ * Set the global {@link UserMemory} (called by `AiProvider` from
+ * `AiConfig.memory`, or manually for tests / standalone setups).
+ * Phase 2/3 middleware reads it via `resolveUserMemory()` —
+ * imported by the persistence layer the same way
+ * `resolveConversationStore` is wired today.
+ */
+export function setUserMemory(memory) {
+    _userMemory = memory;
+}
+export function resolveUserMemory() {
+    return _userMemory;
+}
 /**
  * Streaming counterpart of `Agent.prompt`'s auto-persist branch. The spec
  * resolution is async (since `conversational()` may return a Promise), so
@@ -445,30 +598,38 @@ function resolveConversationStore() {
  * persisted path.
  */
 function runStreamWithMaybeAutoPersist(a, input, options) {
-    // Synchronous fast path — most agents don't override `conversational()`,
-    // so we'd pay an extra microtask boundary on every streaming call. Bail
-    // out cheaply when we can prove the call is stateless.
-    const declared = a.conversational();
-    const isFast = (options?.conversation === false ||
-        (declared === false && (options?.conversation === undefined)));
+    // Synchronous fast path — most agents override neither `conversational()`
+    // nor `remembers()`. Skip the async outer entirely when we can prove
+    // both are no-ops, sparing a microtask boundary per streaming call.
+    const declaredConv = a.conversational();
+    const declaredMem = a.remembers();
+    const isFast = ((options?.conversation === false ||
+        (declaredConv === false && options?.conversation === undefined))
+        && (options?.memory === false ||
+            (declaredMem === false && options?.memory === undefined) ||
+            options?.messages !== undefined));
     if (isFast) {
         return runAgentLoopStreaming(a, input, options);
     }
-    // Async path — resolve the spec, then dispatch to the persisted or plain stream.
+    // Async path — resolve memory + conversation specs, then dispatch.
     let resolveResp;
     let rejectResp;
     const responsePromise = new Promise((res, rej) => { resolveResp = res; rejectResp = rej; });
     async function* outer() {
+        let effOptions;
         let spec;
         try {
-            spec = await resolveAutoPersistSpec(() => a.conversational(), options?.conversation);
+            // Memory auto-cascade BEFORE conversation persistence — same
+            // ordering as the non-streaming `Agent.prompt` path.
+            effOptions = await prepareOptionsWithMemoryAutoCascade(a, options);
+            spec = await resolveAutoPersistSpec(() => a.conversational(), effOptions?.conversation);
         }
         catch (err) {
             rejectResp(err);
             throw err;
         }
         if (!spec) {
-            const inner = runAgentLoopStreaming(a, input, options);
+            const inner = runAgentLoopStreaming(a, input, effOptions);
             try {
                 for await (const chunk of inner.stream)
                     yield chunk;
@@ -487,7 +648,7 @@ function runStreamWithMaybeAutoPersist(a, input, options) {
             }
             return;
         }
-        const persisted = runWithPersistenceStreaming(spec, a.constructor.name, resolveConversationStore, input, options, (effOptions) => runAgentLoopStreaming(a, input, effOptions));
+        const persisted = runWithPersistenceStreaming(spec, a.constructor.name, resolveConversationStore, input, effOptions, (innerOptions) => runAgentLoopStreaming(a, input, innerOptions));
         try {
             for await (const chunk of persisted.stream)
                 yield chunk;
@@ -513,10 +674,53 @@ function getTools(a) {
         ? a.tools()
         : [];
 }
-function getMiddleware(a) {
-    return 'middleware' in a && typeof a.middleware === 'function'
+/**
+ * Internal symbol used to plumb auto-installed middlewares (today:
+ * memory-inject; future: budget-tracker, etc.) through the public
+ * `AgentPromptOptions` without polluting its surface. Resolution
+ * happens at the `Agent.prompt` / `Agent.stream` boundary; the loop
+ * just appends them to the user's `agent.middleware()` array.
+ */
+const EXTRA_MIDDLEWARES = Symbol.for('rudderjs.ai.extraMiddlewares');
+function getMiddleware(a, options) {
+    const own = 'middleware' in a && typeof a.middleware === 'function'
         ? a.middleware()
         : [];
+    const extras = options?.[EXTRA_MIDDLEWARES] ?? [];
+    return extras.length > 0 ? [...own, ...extras] : own;
+}
+/**
+ * Resolve the effective `remembers()` spec and append the appropriate
+ * memory middlewares (inject for Phase 2, extract for Phase 3) to the
+ * options' hidden extras list. Skips entirely on:
+ * - continuation calls (`options.messages` set) — the system message
+ *   was already augmented on the original `prompt()`, re-injecting
+ *   would duplicate the block on every tool round-trip; re-extracting
+ *   would also double-write the same facts on every round-trip.
+ * - specs where neither `inject === 'auto'` nor `extract === 'auto'`
+ *   apply.
+ *
+ * Returns options unchanged when no auto-cascade is needed so the
+ * downstream conversational/loop path sees the original reference.
+ */
+async function prepareOptionsWithMemoryAutoCascade(a, options) {
+    if (options?.messages)
+        return options;
+    const spec = await resolveRemembersSpec(() => a.remembers(), options?.memory);
+    if (!spec)
+        return options;
+    const installed = [];
+    if (spec.inject === 'auto')
+        installed.push(withMemoryInject(spec));
+    if (spec.extract === 'auto' && spec.extractWith)
+        installed.push(withMemoryExtract(spec));
+    if (installed.length === 0)
+        return options;
+    const current = options?.[EXTRA_MIDDLEWARES] ?? [];
+    return {
+        ...options,
+        [EXTRA_MIDDLEWARES]: [...current, ...installed],
+    };
 }
 function createMiddlewareContext(messages, model, tools, iteration) {
     const [provider] = AiRegistry.parseModelString(model);
@@ -753,6 +957,12 @@ function buildAgentResponse(loopCtx) {
         result.pendingApprovalToolCall = loopCtx.pendingApprovalToolCall;
     if (loopCtx.resumedToolMessages.length > 0)
         result.resumedToolMessages = loopCtx.resumedToolMessages;
+    // Internal — consumed by the handoff-aware wrapper, then stripped before
+    // surfacing to public callers.
+    if (loopCtx.pendingHandoff) {
+        result._pendingHandoff = loopCtx.pendingHandoff;
+        result._carriedMessages = loopCtx.messages;
+    }
     return result;
 }
 /**
@@ -775,7 +985,15 @@ async function* executeToolPhase(loopCtx, toolCalls, assistantMessage) {
     // agent-level override which defaults to `true`. Single-tool batches
     // route through the serial path either way (no parallelism to gain, and
     // serial preserves live `tool-update` streaming for that one tool).
-    const parallel = (options?.parallelTools ?? loopCtx.agent.parallelTools()) && toolCalls.length > 1;
+    //
+    // Handoffs always force serial dispatch — the parent loop has to halt
+    // immediately on the first handoff and synthesize "skipped" results for
+    // any sibling calls. Handling that across the parallel classify/replay
+    // phases is doable but adds complexity for negligible benefit (the model
+    // rarely emits parallel siblings alongside a handoff, and even then,
+    // running them while the agent is being torn down is wasted work).
+    const hasHandoff = toolCalls.some(tc => isHandoffTool(loopCtx.toolMap.get(tc.name)));
+    const parallel = (options?.parallelTools ?? loopCtx.agent.parallelTools()) && toolCalls.length > 1 && !hasHandoff;
     if (parallel) {
         yield* runToolPhaseParallel(loopCtx, toolCalls, toolResults);
     }
@@ -804,6 +1022,50 @@ async function* runToolPhaseSerial(loopCtx, toolCalls, toolResults) {
             yield { type: 'tool-result', toolCall: tc, result: unknownResult };
             continue;
         }
+        // Handoff — detected before the no-execute (client tool) branch because
+        // a handoff tool also has no `execute`, but it has wholly different
+        // semantics: pivot control to a new agent instead of pausing for the
+        // browser. The first handoff in a step wins; any subsequent tool calls
+        // in the same step are skipped with a synthetic "skipped: handed off"
+        // tool result so the message log stays well-formed for replay.
+        if (loopCtx.stopForHandoff) {
+            const skippedResult = 'Skipped: parent agent handed off to another agent.';
+            toolResults.push({ toolCallId: tc.id, result: skippedResult });
+            messages.push({ role: 'tool', content: skippedResult, toolCallId: tc.id });
+            yield { type: 'tool-call', toolCall: tc };
+            yield { type: 'tool-result', toolCall: tc, result: skippedResult };
+            continue;
+        }
+        if (isHandoffTool(tool)) {
+            const spec = tool.__handoffSpec;
+            const validation = validateToolArgs(tool, tc.arguments);
+            // Handoff payload defaults to `{ message: string }`; custom schemas
+            // are accepted but the loop only uses `args.message` (string) as the
+            // transition prompt. Anything else surfaces in the conversation as
+            // the args of the synthetic tool-call.
+            const args = validation.ok ? validation.value : tc.arguments;
+            const transitionMessage = typeof args['message'] === 'string' ? args['message'] : '';
+            const handoffResult = `Handed off to ${spec.AgentClass.name}.`;
+            toolResults.push({ toolCallId: tc.id, result: handoffResult });
+            messages.push({ role: 'tool', content: handoffResult, toolCallId: tc.id });
+            yield { type: 'tool-call', toolCall: tc };
+            yield { type: 'tool-result', toolCall: tc, result: handoffResult };
+            yield {
+                type: 'handoff',
+                handoff: {
+                    from: loopCtx.agent.constructor.name,
+                    to: spec.AgentClass.name,
+                    ...(transitionMessage ? { message: transitionMessage } : {}),
+                },
+            };
+            loopCtx.pendingHandoff = { spec, transitionMessage, parentToolCallId: tc.id };
+            loopCtx.stopForHandoff = true;
+            // Do NOT break — keep iterating so any sibling tool calls in this
+            // step get their synthetic "skipped" tool results before the loop
+            // exits. This preserves message-log invariants for downstream
+            // persistence.
+            continue;
+        }
         if (!tool.execute) {
             // Client tool — no server-side handler.
             if (options?.toolCallStreamingMode === 'stop-on-client-tool') {
@@ -905,6 +1167,16 @@ async function* runToolPhaseSerial(loopCtx, toolCalls, toolResults) {
                     paused = true;
                     break;
                 }
+                if (isPauseForApprovalChunk(step.value)) {
+                    loopCtx.pendingApprovalToolCall = {
+                        toolCall: step.value.toolCall,
+                        isClientTool: step.value.isClientTool,
+                    };
+                    loopCtx.loopFinishReason = 'tool_approval_required';
+                    loopCtx.stopForApproval = true;
+                    paused = true;
+                    break;
+                }
                 const updateChunk = { type: 'tool-update', toolCall: tc, update: step.value };
                 if (middlewares.length > 0) {
                     const transformed = runOnChunk(middlewares, ctx, updateChunk);
@@ -1156,6 +1428,16 @@ async function runToolExecution(loopCtx, outcome) {
                 paused = true;
                 break;
             }
+            if (isPauseForApprovalChunk(step.value)) {
+                loopCtx.pendingApprovalToolCall = {
+                    toolCall: step.value.toolCall,
+                    isClientTool: step.value.isClientTool,
+                };
+                loopCtx.loopFinishReason = 'tool_approval_required';
+                loopCtx.stopForApproval = true;
+                paused = true;
+                break;
+            }
             const updateChunk = { type: 'tool-update', toolCall: outcome.tc, update: step.value };
             if (middlewares.length > 0) {
                 const transformed = runOnChunk(middlewares, ctx, updateChunk);
@@ -1190,7 +1472,7 @@ async function initializeLoop(a, input, options) {
     const modelString = a.model() ?? AiRegistry.getDefault();
     const [providerName] = AiRegistry.parseModelString(modelString);
     const tools = getTools(a);
-    const middlewares = getMiddleware(a);
+    const middlewares = getMiddleware(a, options);
     const toolSchemas = buildToolSchemas(tools);
     const toolMap = buildToolMap(tools);
     const messages = options?.messages
@@ -1228,6 +1510,7 @@ async function initializeLoop(a, input, options) {
         stopForApproval: false,
         resumedToolMessages: [],
         failoverAttempts: 0,
+        stopForHandoff: false,
     };
     // Resume server tools left pending by a previous approval round-trip.
     {
@@ -1289,7 +1572,195 @@ async function runIterationPrelude(loopCtx, iteration) {
     return { currentModel };
 }
 // ─── Agent Loop (non-streaming) ──────────────────────────
+/**
+ * Hard ceiling for the number of agent-to-agent handoffs in a single
+ * `prompt()` / `stream()` call. Most workflows hop once or twice (triage →
+ * specialist). Anything beyond this almost certainly means the agents are
+ * cycling — surfacing a clear error beats silently looping until token
+ * budgets explode.
+ */
+const MAX_HANDOFFS = 5;
+/**
+ * Public entry point for the non-streaming agent loop. Drives
+ * {@link runAgentLoopOnce} once, then — if the model called a {@link handoff}
+ * tool — constructs the target agent, carries the conversation forward, and
+ * recurses. Steps and usage from each hop are merged; the final `text` and
+ * `finishReason` come from the agent that produced the terminal answer.
+ * `handoffPath` records the chain of class names traversed.
+ */
 async function runAgentLoop(a, input, options) {
+    const onceResult = await runAgentLoopOnce(a, input, options);
+    if (!onceResult._pendingHandoff) {
+        return stripInternal(onceResult);
+    }
+    const merged = await driveHandoffs(a.constructor.name, onceResult, onceResult._pendingHandoff, onceResult._carriedMessages ?? [], options, 0);
+    return merged;
+}
+/**
+ * Streaming counterpart to {@link runAgentLoop}. Iterates handoffs and
+ * pivots the stream to the next agent each time the parent ends with a
+ * pending handoff. Chunks from every hop flow through the same returned
+ * `AsyncIterable`; the resolved `response` carries the merged final state.
+ */
+function runAgentLoopStreaming(a, input, options) {
+    let resolveResponse;
+    let rejectResponse;
+    const responsePromise = new Promise((resolve, reject) => {
+        resolveResponse = resolve;
+        rejectResponse = reject;
+    });
+    async function* generateStream() {
+        let currentAgent = a;
+        let currentInput = input;
+        let currentOpts = options;
+        const mergedSteps = [];
+        const mergedUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
+        const handoffPath = [];
+        let finalResponse;
+        for (let hop = 0; hop <= MAX_HANDOFFS; hop++) {
+            const onceStream = runAgentLoopStreamingOnce(currentAgent, currentInput, currentOpts);
+            // Attach a no-op handler so a rejection from the inner response
+            // promise (e.g. caller-supplied AbortSignal firing mid-stream) is
+            // already observed by the time the `for await` re-throws — without
+            // this, Node logs an unhandledRejection between the stream's throw
+            // and our outer `withRejectOnError`'s catch.
+            onceStream.response.catch(() => { });
+            for await (const chunk of onceStream.stream)
+                yield chunk;
+            const r = await onceStream.response;
+            mergedSteps.push(...r.steps);
+            addUsage(mergedUsage, r.usage);
+            if (r._pendingHandoff && hop < MAX_HANDOFFS) {
+                handoffPath.push(currentAgent.constructor.name);
+                const ChildClass = r._pendingHandoff.spec.AgentClass;
+                currentAgent = new ChildClass();
+                currentInput = r._pendingHandoff.transitionMessage;
+                currentOpts = buildHandoffChildOptions(options, r._carriedMessages ?? []);
+                continue;
+            }
+            if (r._pendingHandoff) {
+                throw new Error(`[RudderJS AI] Exceeded max handoffs (${MAX_HANDOFFS}). Likely a cycle between agents.`);
+            }
+            finalResponse = handoffPath.length === 0
+                ? stripInternal(r)
+                : mergeFinalHandoff(stripInternal(r), mergedSteps, mergedUsage, handoffPath, currentAgent.constructor.name);
+            break;
+        }
+        if (!finalResponse) {
+            throw new Error(`[RudderJS AI] Exceeded max handoffs (${MAX_HANDOFFS}). Likely a cycle between agents.`);
+        }
+        resolveResponse(finalResponse);
+    }
+    async function* withRejectOnError() {
+        try {
+            yield* generateStream();
+        }
+        catch (err) {
+            rejectResponse(err);
+            throw err;
+        }
+    }
+    return {
+        stream: withRejectOnError(),
+        response: responsePromise,
+    };
+}
+/**
+ * Iteratively drive pending handoffs, carrying steps + usage forward.
+ * Used by the non-streaming path. (Streaming has its own iterative driver
+ * inline in {@link runAgentLoopStreaming} so chunks can flow as each hop's
+ * loop runs.)
+ */
+async function driveHandoffs(rootName, rootResult, pending, carriedMessages, origOptions, startHopCount) {
+    const mergedSteps = [...rootResult.steps];
+    const mergedUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
+    addUsage(mergedUsage, rootResult.usage);
+    const handoffPath = [rootName];
+    let currentPending = pending;
+    let currentCarried = carriedMessages;
+    let hopCount = startHopCount;
+    for (;;) {
+        if (hopCount >= MAX_HANDOFFS) {
+            throw new Error(`[RudderJS AI] Exceeded max handoffs (${MAX_HANDOFFS}). Likely a cycle between agents.`);
+        }
+        const ChildClass = currentPending.spec.AgentClass;
+        handoffPath.push(ChildClass.name);
+        const child = new ChildClass();
+        const childOpts = buildHandoffChildOptions(origOptions, currentCarried);
+        const childOnce = await runAgentLoopOnce(child, currentPending.transitionMessage, childOpts);
+        mergedSteps.push(...childOnce.steps);
+        addUsage(mergedUsage, childOnce.usage);
+        if (childOnce._pendingHandoff) {
+            currentPending = childOnce._pendingHandoff;
+            currentCarried = childOnce._carriedMessages ?? [];
+            hopCount++;
+            continue;
+        }
+        return {
+            ...stripInternal(childOnce),
+            steps: mergedSteps,
+            usage: mergedUsage,
+            handoffPath,
+        };
+    }
+}
+/** Merge the terminal hop's response with carried steps / usage / path. */
+function mergeFinalHandoff(terminal, mergedSteps, mergedUsage, pathPrefix, terminalName) {
+    return {
+        ...terminal,
+        steps: mergedSteps,
+        usage: mergedUsage,
+        handoffPath: [...pathPrefix, terminalName],
+    };
+}
+/**
+ * Build the {@link AgentPromptOptions} for a child agent invoked via
+ * handoff. The parent's carried message log replaces the child's input
+ * (so the child sees the full conversation up to the handoff point) but
+ * the child still prepends its own `instructions()` as the system message
+ * during {@link initializeLoop}, so we drop the parent's leading system
+ * message to avoid double-prefixing.
+ *
+ * Per-call options that make sense to carry across (signal, attachments,
+ * tool/middleware overrides) are preserved; `messages` and `history` are
+ * deliberately overridden.
+ */
+function buildHandoffChildOptions(parentOptions, carriedMessages) {
+    const stripped = carriedMessages.length > 0 && carriedMessages[0]?.role === 'system'
+        ? carriedMessages.slice(1)
+        : carriedMessages;
+    // We append the model's transition message as the next user message so
+    // the child has something concrete to respond to (it's also passed as
+    // `currentInput` below — but feeding it via `messages` mode keeps the
+    // history coherent and prevents `initializeLoop` from also prepending
+    // an `input` user message).
+    return {
+        ...(parentOptions ?? {}),
+        messages: stripped,
+    };
+}
+/** Strip the internal `_pendingHandoff` / `_carriedMessages` fields before surfacing the response to public callers. */
+function stripInternal(r) {
+    const out = {
+        text: r.text,
+        steps: r.steps,
+        usage: r.usage,
+    };
+    if (r.conversationId !== undefined)
+        out.conversationId = r.conversationId;
+    if (r.finishReason !== undefined)
+        out.finishReason = r.finishReason;
+    if (r.pendingClientToolCalls !== undefined)
+        out.pendingClientToolCalls = r.pendingClientToolCalls;
+    if (r.pendingApprovalToolCall !== undefined)
+        out.pendingApprovalToolCall = r.pendingApprovalToolCall;
+    if (r.resumedToolMessages !== undefined)
+        out.resumedToolMessages = r.resumedToolMessages;
+    if (r.handoffPath !== undefined)
+        out.handoffPath = r.handoffPath;
+    return out;
+}
+async function runAgentLoopOnce(a, input, options) {
     const { loopCtx, stopConditions } = await initializeLoop(a, input, options);
     const { ctx, middlewares, messages, steps, totalUsage } = loopCtx;
     try {
@@ -1333,7 +1804,7 @@ async function runAgentLoop(a, input, options) {
                 };
                 steps.push(step);
                 emitObserverStepCompleted(loopCtx, iteration, false);
-                if (loopCtx.stopForClientTools || loopCtx.stopForApproval)
+                if (loopCtx.stopForClientTools || loopCtx.stopForApproval || loopCtx.stopForHandoff)
                     break;
                 const shouldStop = stopConditions.some(cond => cond({ steps, iteration, lastMessage: response.message }));
                 if (shouldStop || response.finishReason !== 'tool_calls') {
@@ -1357,7 +1828,7 @@ async function runAgentLoop(a, input, options) {
     return result;
 }
 // ─── Agent Loop (streaming) ──────────────────────────────
-function runAgentLoopStreaming(a, input, options) {
+function runAgentLoopStreamingOnce(a, input, options) {
     let resolveResponse;
     let rejectResponse;
     const responsePromise = new Promise((resolve, reject) => {
@@ -1463,7 +1934,7 @@ function runAgentLoopStreaming(a, input, options) {
                     };
                     steps.push(step);
                     emitObserverStepCompleted(loopCtx, iteration, true);
-                    if (loopCtx.stopForClientTools || loopCtx.stopForApproval)
+                    if (loopCtx.stopForClientTools || loopCtx.stopForApproval || loopCtx.stopForHandoff)
                         break;
                     const shouldStop = stopConditions.some(cond => cond({ steps, iteration, lastMessage: step.message }));
                     if (shouldStop || finishReason !== 'tool_calls')