@rudderjs/ai 1.3.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/README.md +101 -8
  2. package/boost/guidelines.md +14 -2
  3. package/boost/skills/ai-tools/SKILL.md +14 -5
  4. package/dist/agent.d.ts +97 -1
  5. package/dist/agent.d.ts.map +1 -1
  6. package/dist/agent.js +692 -83
  7. package/dist/agent.js.map +1 -1
  8. package/dist/conversation-persistence.d.ts +46 -0
  9. package/dist/conversation-persistence.d.ts.map +1 -0
  10. package/dist/conversation-persistence.js +152 -0
  11. package/dist/conversation-persistence.js.map +1 -0
  12. package/dist/conversation.d.ts +2 -7
  13. package/dist/conversation.d.ts.map +1 -1
  14. package/dist/conversation.js +3 -1
  15. package/dist/conversation.js.map +1 -1
  16. package/dist/handoff.d.ts +95 -0
  17. package/dist/handoff.d.ts.map +1 -0
  18. package/dist/handoff.js +78 -0
  19. package/dist/handoff.js.map +1 -0
  20. package/dist/index.d.ts +8 -3
  21. package/dist/index.d.ts.map +1 -1
  22. package/dist/index.js +7 -1
  23. package/dist/index.js.map +1 -1
  24. package/dist/providers/anthropic.d.ts +9 -1
  25. package/dist/providers/anthropic.d.ts.map +1 -1
  26. package/dist/providers/anthropic.js +5 -5
  27. package/dist/providers/anthropic.js.map +1 -1
  28. package/dist/providers/bedrock.d.ts +60 -0
  29. package/dist/providers/bedrock.d.ts.map +1 -0
  30. package/dist/providers/bedrock.js +167 -0
  31. package/dist/providers/bedrock.js.map +1 -0
  32. package/dist/providers/openai.d.ts +5 -0
  33. package/dist/providers/openai.d.ts.map +1 -1
  34. package/dist/providers/openai.js +6 -0
  35. package/dist/providers/openai.js.map +1 -1
  36. package/dist/providers/openrouter.d.ts +43 -0
  37. package/dist/providers/openrouter.d.ts.map +1 -0
  38. package/dist/providers/openrouter.js +21 -0
  39. package/dist/providers/openrouter.js.map +1 -0
  40. package/dist/server/provider.d.ts.map +1 -1
  41. package/dist/server/provider.js +15 -0
  42. package/dist/server/provider.js.map +1 -1
  43. package/dist/sub-agent-run-store.d.ts +143 -0
  44. package/dist/sub-agent-run-store.d.ts.map +1 -0
  45. package/dist/sub-agent-run-store.js +80 -0
  46. package/dist/sub-agent-run-store.js.map +1 -0
  47. package/dist/tool.d.ts +59 -0
  48. package/dist/tool.d.ts.map +1 -1
  49. package/dist/tool.js +32 -0
  50. package/dist/tool.js.map +1 -1
  51. package/dist/types.d.ts +136 -7
  52. package/dist/types.d.ts.map +1 -1
  53. package/package.json +3 -2
package/dist/agent.js CHANGED
@@ -1,8 +1,10 @@
1
1
  import { z } from 'zod';
2
2
  import { AiRegistry } from './registry.js';
3
- import { isPauseForClientToolsChunk, toolDefinition, toolToSchema } from './tool.js';
3
+ import { isPauseForApprovalChunk, isPauseForClientToolsChunk, pauseForApproval, pauseForClientTools, toolDefinition, toolToSchema } from './tool.js';
4
+ import { isHandoffTool } from './handoff.js';
4
5
  import { attachmentsToContentParts, getMessageText } from './attachment.js';
5
6
  import { QueuedPromptBuilder } from './queue-job.js';
7
+ import { resolveAutoPersistSpec, runWithPersistence, runWithPersistenceStreaming, } from './conversation-persistence.js';
6
8
  import { runOnConfig, runOnChunk, runOnBeforeToolCall, runOnAfterToolCall, runSequential, runOnUsage, runOnAbort, runOnError, } from './middleware.js';
7
9
  // ─── AI Observer (lazy accessor) ─────────────────────────
8
10
  function _getAiObservers() {
@@ -79,6 +81,36 @@ export class Agent {
79
81
  * }
80
82
  */
81
83
  cacheable() { return undefined; }
84
+ /**
85
+ * Opt into auto-persisted conversation behavior. Override on a subclass
86
+ * to declare *which* user owns the thread and (optionally) which
87
+ * specific thread, and the framework will load history before each
88
+ * `prompt()`/`stream()` call and append the new turn after it — without
89
+ * any caller having to remember `forUser()` / `continue()`.
90
+ *
91
+ * Returning `false` (the default) disables auto-persist; the agent runs
92
+ * stateless. Returning a {@link ConversationalSpec} opts in:
93
+ *
94
+ * @example
95
+ * class ChatAgent extends Agent {
96
+ * conversational() {
97
+ * return { user: Auth.user()?.id } // null user → falsy → opt-out
98
+ * }
99
+ * }
100
+ *
101
+ * await new ChatAgent().prompt('Hi') // auto-loads + auto-saves
102
+ *
103
+ * **Precedence (high → low):**
104
+ * 1. Explicit `agent.forUser(id).prompt()` / `agent.continue(id).prompt()`
105
+ * 2. Per-call `prompt(input, { conversation: false | {...} })`
106
+ * 3. This method's return value
107
+ *
108
+ * Async returns are supported — useful when the user identity is fetched
109
+ * from an async DI binding.
110
+ */
111
+ conversational() {
112
+ return false;
113
+ }
82
114
  /**
83
115
  * Default for `AgentPromptOptions.parallelTools`. When `true` (default),
84
116
  * multiple tool calls within a single step run their `execute()` functions
@@ -88,11 +120,15 @@ export class Agent {
88
120
  parallelTools() { return true; }
89
121
  /** Run the agent with a prompt (non-streaming) */
90
122
  async prompt(input, options) {
123
+ const spec = await resolveAutoPersistSpec(() => this.conversational(), options?.conversation);
124
+ if (spec) {
125
+ return runWithPersistence(spec, this.constructor.name, resolveConversationStore, input, options, (effOptions) => runAgentLoop(this, input, effOptions));
126
+ }
91
127
  return runAgentLoop(this, input, options);
92
128
  }
93
129
  /** Run the agent with a prompt (streaming) */
94
130
  stream(input, options) {
95
- return runAgentLoopStreaming(this, input, options);
131
+ return runStreamWithMaybeAutoPersist(this, input, options);
96
132
  }
97
133
  /** Queue the prompt for background execution */
98
134
  queue(input, options) {
@@ -107,17 +143,302 @@ export class Agent {
107
143
  return new ConversableAgent(this).continue(conversationId);
108
144
  }
109
145
  asTool(options) {
146
+ if (options.suspendable && !options.streaming) {
147
+ throw new Error('[RudderJS AI] asTool: `suspendable` requires `streaming: true` (or a projector). Silent suspend would leave the parent UI with no progress signal between sub-agent invocations.');
148
+ }
110
149
  const schema = options.inputSchema ?? z.object({ prompt: z.string() });
111
150
  const promptOf = options.prompt ?? ((input) => input.prompt);
112
151
  const modelOutput = options.modelOutput ?? ((response) => response.text);
152
+ if (!options.streaming) {
153
+ // 1.2.0 zero-config path — single prompt() call, single AgentResponse out.
154
+ return toolDefinition({
155
+ name: options.name,
156
+ description: options.description,
157
+ inputSchema: schema,
158
+ })
159
+ .server((input) => this.prompt(promptOf(input)))
160
+ .modelOutput(modelOutput);
161
+ }
162
+ const project = options.streaming === true ? defaultSubAgentProjector : options.streaming;
163
+ const innerAgent = this; // eslint-disable-line @typescript-eslint/no-this-alias
164
+ const agentName = options.name;
165
+ const suspendable = options.suspendable;
166
+ const generatorExecute = async function* (input) {
167
+ const userPrompt = promptOf(input);
168
+ yield { kind: 'agent_start', agentName };
169
+ const streamOpts = suspendable
170
+ ? { toolCallStreamingMode: 'stop-on-client-tool' }
171
+ : undefined;
172
+ const { stream, response } = innerAgent.stream(userPrompt, streamOpts);
173
+ for await (const chunk of stream) {
174
+ const update = project(chunk);
175
+ if (update)
176
+ yield update;
177
+ }
178
+ const result = await response;
179
+ if (suspendable &&
180
+ result.finishReason === 'client_tool_calls' &&
181
+ result.pendingClientToolCalls?.length) {
182
+ const subRunId = generateSubRunId();
183
+ const snapshot = {
184
+ messages: buildSubAgentSnapshotMessages(userPrompt, result),
185
+ pendingToolCallIds: result.pendingClientToolCalls.map((tc) => tc.id),
186
+ stepsSoFar: result.steps.length,
187
+ tokensSoFar: result.usage?.totalTokens ?? 0,
188
+ pauseKind: 'client_tool',
189
+ };
190
+ await suspendable.runStore.store(subRunId, snapshot);
191
+ yield { kind: 'subagent_paused', subRunId, pendingToolCallIds: snapshot.pendingToolCallIds };
192
+ yield pauseForClientTools(result.pendingClientToolCalls, subRunId);
193
+ // Unreachable — the parent loop halts iteration after the pause chunk.
194
+ return undefined;
195
+ }
196
+ if (suspendable &&
197
+ result.finishReason === 'tool_approval_required' &&
198
+ result.pendingApprovalToolCall) {
199
+ const subRunId = generateSubRunId();
200
+ const { toolCall: pendingCall, isClientTool } = result.pendingApprovalToolCall;
201
+ const snapshot = {
202
+ messages: buildSubAgentSnapshotMessages(userPrompt, result),
203
+ pendingToolCallIds: [pendingCall.id],
204
+ stepsSoFar: result.steps.length,
205
+ tokensSoFar: result.usage?.totalTokens ?? 0,
206
+ pauseKind: 'approval',
207
+ pendingApprovalToolCall: { toolCall: pendingCall, isClientTool },
208
+ };
209
+ await suspendable.runStore.store(subRunId, snapshot);
210
+ yield {
211
+ kind: 'subagent_paused_approval',
212
+ subRunId,
213
+ toolCall: pendingCall,
214
+ isClientTool,
215
+ };
216
+ yield pauseForApproval(pendingCall, isClientTool, subRunId);
217
+ // Unreachable — the parent loop halts iteration after the pause chunk.
218
+ return undefined;
219
+ }
220
+ yield {
221
+ kind: 'agent_done',
222
+ steps: result.steps.length,
223
+ tokens: result.usage?.totalTokens ?? 0,
224
+ };
225
+ return result;
226
+ };
113
227
  return toolDefinition({
114
228
  name: options.name,
115
229
  description: options.description,
116
230
  inputSchema: schema,
117
231
  })
118
- .server((input) => this.prompt(promptOf(input)))
232
+ .server(generatorExecute)
119
233
  .modelOutput(modelOutput);
120
234
  }
235
+ /**
236
+ * Resume a sub-agent run that previously paused with either
237
+ * `pauseForClientTools` (client-tool pause) or `pauseForApproval`
238
+ * (approval pause), typically from {@link Agent.asTool} with
239
+ * `suspendable: { runStore }` set. The snapshot's `pauseKind`
240
+ * (default `'client_tool'`) selects the resume contract:
241
+ *
242
+ * - **`client_tool`** — `clientToolResults` must carry one entry per
243
+ * id in the snapshot's `pendingToolCallIds`. Results are appended
244
+ * to the inner-agent message history and the loop re-runs.
245
+ * - **`approval`** — `approvedToolCallIds` and/or
246
+ * `rejectedToolCallIds` must reference the single pending id.
247
+ * `clientToolResults` must be empty; the loop re-runs with the
248
+ * approval decision injected via `AgentPromptOptions`.
249
+ *
250
+ * Returns either a `'completed'` result (the inner agent finished),
251
+ * a `'paused'` continuation pointing at a fresh `subRunId` for the
252
+ * next round-trip, or stays `'paused'` if the inner loop hits another
253
+ * gate. The resume can pause on a different kind than it started on
254
+ * (e.g. an approval pause that, once approved, hits a client-tool
255
+ * pause on the next step).
256
+ *
257
+ * @example Client-tool resume
258
+ * const r = await Agent.resumeAsTool(subRunId, browserResults, { runStore, agent: subAgent })
259
+ *
260
+ * @example Approval resume
261
+ * const r = await Agent.resumeAsTool(subRunId, [], {
262
+ * runStore, agent: subAgent,
263
+ * approvedToolCallIds: ['inner-call-id'],
264
+ * })
265
+ */
266
+ static async resumeAsTool(subRunId, clientToolResults, options) {
267
+ const snapshot = await options.runStore.consume(subRunId);
268
+ if (!snapshot) {
269
+ throw new Error(`[RudderJS AI] resumeAsTool: subRunId "${subRunId}" expired or never existed.`);
270
+ }
271
+ const pauseKind = snapshot.pauseKind ?? 'client_tool';
272
+ const pending = new Set(snapshot.pendingToolCallIds);
273
+ let messages;
274
+ const promptOpts = { toolCallStreamingMode: 'stop-on-client-tool' };
275
+ if (pauseKind === 'client_tool') {
276
+ // Forgery guard — every incoming tool-result id must be in the pending set.
277
+ const seen = new Set();
278
+ for (const r of clientToolResults) {
279
+ if (!pending.has(r.toolCallId)) {
280
+ throw new Error(`[RudderJS AI] resumeAsTool: toolCallId "${r.toolCallId}" was not in the pending set.`);
281
+ }
282
+ if (seen.has(r.toolCallId)) {
283
+ throw new Error(`[RudderJS AI] resumeAsTool: duplicate result for toolCallId "${r.toolCallId}".`);
284
+ }
285
+ seen.add(r.toolCallId);
286
+ }
287
+ // Append client tool-result messages to the snapshot, in incoming order.
288
+ messages = [...snapshot.messages];
289
+ for (const r of clientToolResults) {
290
+ messages.push({
291
+ role: 'tool',
292
+ content: typeof r.result === 'string' ? r.result : JSON.stringify(r.result),
293
+ toolCallId: r.toolCallId,
294
+ });
295
+ }
296
+ }
297
+ else {
298
+ // Approval-pause resume — clientToolResults must be empty; either an
299
+ // approval or a rejection must be supplied for the pending id.
300
+ if (clientToolResults.length > 0) {
301
+ throw new Error('[RudderJS AI] resumeAsTool: snapshot.pauseKind === "approval" but clientToolResults was non-empty. Pass `approvedToolCallIds` or `rejectedToolCallIds` instead.');
302
+ }
303
+ const approved = options.approvedToolCallIds ?? [];
304
+ const rejected = options.rejectedToolCallIds ?? [];
305
+ for (const id of approved) {
306
+ if (!pending.has(id)) {
307
+ throw new Error(`[RudderJS AI] resumeAsTool: approvedToolCallId "${id}" was not in the pending set.`);
308
+ }
309
+ }
310
+ for (const id of rejected) {
311
+ if (!pending.has(id)) {
312
+ throw new Error(`[RudderJS AI] resumeAsTool: rejectedToolCallId "${id}" was not in the pending set.`);
313
+ }
314
+ }
315
+ if (approved.length === 0 && rejected.length === 0) {
316
+ throw new Error('[RudderJS AI] resumeAsTool: snapshot.pauseKind === "approval" requires `approvedToolCallIds` or `rejectedToolCallIds`.');
317
+ }
318
+ messages = [...snapshot.messages];
319
+ if (approved.length > 0)
320
+ promptOpts.approvedToolCallIds = approved;
321
+ if (rejected.length > 0)
322
+ promptOpts.rejectedToolCallIds = rejected;
323
+ }
324
+ promptOpts.messages = messages;
325
+ const result = await options.agent.prompt('', promptOpts);
326
+ if (result.finishReason === 'client_tool_calls' &&
327
+ result.pendingClientToolCalls?.length) {
328
+ const newSubRunId = generateSubRunId();
329
+ const newSnapshot = {
330
+ messages: buildResumeSnapshotMessages(messages, result),
331
+ pendingToolCallIds: result.pendingClientToolCalls.map((tc) => tc.id),
332
+ stepsSoFar: snapshot.stepsSoFar + result.steps.length,
333
+ tokensSoFar: snapshot.tokensSoFar + (result.usage?.totalTokens ?? 0),
334
+ pauseKind: 'client_tool',
335
+ ...(snapshot.meta !== undefined ? { meta: snapshot.meta } : {}),
336
+ };
337
+ await options.runStore.store(newSubRunId, newSnapshot);
338
+ return {
339
+ kind: 'paused',
340
+ subRunId: newSubRunId,
341
+ pauseKind: 'client_tool',
342
+ pendingToolCallIds: newSnapshot.pendingToolCallIds,
343
+ };
344
+ }
345
+ if (result.finishReason === 'tool_approval_required' &&
346
+ result.pendingApprovalToolCall) {
347
+ const newSubRunId = generateSubRunId();
348
+ const { toolCall: pendingCall, isClientTool } = result.pendingApprovalToolCall;
349
+ const newSnapshot = {
350
+ messages: buildResumeSnapshotMessages(messages, result),
351
+ pendingToolCallIds: [pendingCall.id],
352
+ stepsSoFar: snapshot.stepsSoFar + result.steps.length,
353
+ tokensSoFar: snapshot.tokensSoFar + (result.usage?.totalTokens ?? 0),
354
+ pauseKind: 'approval',
355
+ pendingApprovalToolCall: { toolCall: pendingCall, isClientTool },
356
+ ...(snapshot.meta !== undefined ? { meta: snapshot.meta } : {}),
357
+ };
358
+ await options.runStore.store(newSubRunId, newSnapshot);
359
+ return {
360
+ kind: 'paused',
361
+ subRunId: newSubRunId,
362
+ pauseKind: 'approval',
363
+ pendingToolCallIds: newSnapshot.pendingToolCallIds,
364
+ toolCall: pendingCall,
365
+ isClientTool,
366
+ };
367
+ }
368
+ return { kind: 'completed', response: result };
369
+ }
370
+ }
371
+ /**
372
+ * Default projection from inner-agent stream chunks to {@link SubAgentUpdate}
373
+ * events. Emits one `tool_call` per inner `tool-call` chunk and
374
+ * `agent_pending_approval` per inner `pending-approval` chunk; everything
375
+ * else is suppressed (the wrapping execute emits the `agent_start` /
376
+ * `agent_done` bookends and the suspend paths emit `subagent_paused` /
377
+ * `subagent_paused_approval`).
378
+ *
379
+ * Hosts wanting different cadence (e.g. surfacing `text-delta` previews
380
+ * or per-step usage) pass `streaming: chunk => …` and own the discriminator.
381
+ */
382
+ function defaultSubAgentProjector(chunk) {
383
+ if (chunk.type === 'tool-call' && chunk.toolCall?.name) {
384
+ return {
385
+ kind: 'tool_call',
386
+ tool: chunk.toolCall.name,
387
+ ...(chunk.toolCall.arguments ? { args: chunk.toolCall.arguments } : {}),
388
+ };
389
+ }
390
+ if (chunk.type === 'pending-approval' && chunk.toolCall && chunk.toolCall.id && chunk.toolCall.name) {
391
+ return {
392
+ kind: 'agent_pending_approval',
393
+ toolCall: chunk.toolCall,
394
+ isClientTool: !!chunk.isClientTool,
395
+ };
396
+ }
397
+ return null;
398
+ }
399
+ /**
400
+ * Reconstruct the inner-agent message history at the point the loop
401
+ * paused, so a subsequent {@link Agent.resumeAsTool} can rerun the loop
402
+ * with the appended client tool results. The shape is `[user, …(message
403
+ * + serverToolResults)*]` — system messages are omitted because the
404
+ * `messages` mode of the agent loop prepends `system` itself.
405
+ *
406
+ * Each step's `message` includes ALL `toolCalls` (server + client).
407
+ * Server-side `toolResults` are interleaved; client-side calls remain
408
+ * unfulfilled until resume appends their results.
409
+ */
410
+ function buildSubAgentSnapshotMessages(userPrompt, response) {
411
+ const out = [{ role: 'user', content: userPrompt }];
412
+ for (const step of response.steps) {
413
+ out.push(step.message);
414
+ for (const tr of step.toolResults) {
415
+ const resultStr = typeof tr.result === 'string' ? tr.result : JSON.stringify(tr.result);
416
+ out.push({ role: 'tool', content: resultStr, toolCallId: tr.toolCallId });
417
+ }
418
+ }
419
+ return out;
420
+ }
421
+ /**
422
+ * Snapshot reconstruction for a resume-time pause. The `priorMessages`
423
+ * already include the original user prompt + every step prior to the
424
+ * resume call. Append the freshly-completed steps' messages and any
425
+ * server-side tool results so the next resume sees the full history.
426
+ */
427
+ function buildResumeSnapshotMessages(priorMessages, response) {
428
+ const out = [...priorMessages];
429
+ for (const step of response.steps) {
430
+ out.push(step.message);
431
+ for (const tr of step.toolResults) {
432
+ const resultStr = typeof tr.result === 'string' ? tr.result : JSON.stringify(tr.result);
433
+ out.push({ role: 'tool', content: resultStr, toolCallId: tr.toolCallId });
434
+ }
435
+ }
436
+ return out;
437
+ }
438
+ function generateSubRunId() {
439
+ if (typeof globalThis.crypto?.randomUUID === 'function')
440
+ return globalThis.crypto.randomUUID();
441
+ return `sub-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 12)}`;
121
442
  }
122
443
  // ─── Conversable Agent (conversation persistence) ───────
123
444
  /**
@@ -140,84 +461,35 @@ export class ConversableAgent {
140
461
  return this;
141
462
  }
142
463
  async prompt(input, options) {
143
- const store = resolveConversationStore();
144
- if (!store)
145
- throw new Error('[RudderJS AI] No ConversationStore registered. Register one via the DI container with key "ai.conversations".');
146
- // Load or create conversation
147
- let history = options?.history ?? [];
148
- if (this._conversationId) {
149
- history = [...(await store.load(this._conversationId)), ...history];
150
- }
151
- else {
152
- const meta = this._userId ? { userId: this._userId } : undefined;
153
- this._conversationId = await store.create(undefined, meta);
154
- }
155
- const response = await runAgentLoop(this.agent, input, { ...options, history });
156
- // Persist messages
157
- const newMessages = [
158
- { role: 'user', content: input },
159
- ...response.steps.flatMap(s => {
160
- const msgs = [s.message];
161
- for (const tr of s.toolResults) {
162
- const resultStr = typeof tr.result === 'string' ? tr.result : JSON.stringify(tr.result);
163
- msgs.push({ role: 'tool', content: resultStr, toolCallId: tr.toolCallId });
164
- }
165
- return msgs;
166
- }),
167
- ];
168
- await store.append(this._conversationId, newMessages);
169
- return { text: response.text, steps: response.steps, usage: response.usage, conversationId: this._conversationId };
464
+ const spec = this.toSpec();
465
+ return runWithPersistence(spec, this.agent.constructor.name, resolveConversationStore, input, options, (effOptions) => runAgentLoop(this.agent, input, effOptions)).then((r) => {
466
+ // Track the resolved id back on the wrapper so a subsequent
467
+ // `wrapper.prompt()` call resumes the same thread.
468
+ if (r.conversationId)
469
+ this._conversationId = r.conversationId;
470
+ return r;
471
+ });
170
472
  }
171
473
  stream(input, options) {
172
- const store = resolveConversationStore();
173
- if (!store)
174
- throw new Error('[RudderJS AI] No ConversationStore registered. Register one via the DI container with key "ai.conversations".');
175
- // We need to handle async setup, so wrap the streaming
176
- let resolveReady;
177
- const ready = new Promise(r => { resolveReady = r; });
178
- let loadedHistory = [];
179
- let convId = this._conversationId;
180
- // Kick off async setup
181
- const setupPromise = (async () => {
182
- if (convId) {
183
- loadedHistory = await store.load(convId);
184
- }
185
- else {
186
- const meta = this._userId ? { userId: this._userId } : undefined;
187
- convId = await store.create(undefined, meta);
188
- this._conversationId = convId;
189
- }
190
- resolveReady();
191
- })();
192
- let resolveResponse;
193
- const responsePromise = new Promise(r => { resolveResponse = r; });
194
- const self = this; // eslint-disable-line @typescript-eslint/no-this-alias
195
- const storeRef = store;
196
- async function* generateStream() {
197
- await setupPromise;
198
- const history = [...loadedHistory, ...(options?.history ?? [])];
199
- const inner = runAgentLoopStreaming(self.agent, input, { ...options, history });
200
- for await (const chunk of inner.stream) {
201
- yield chunk;
202
- }
203
- const response = await inner.response;
204
- // Persist messages
205
- const newMessages = [
206
- { role: 'user', content: input },
207
- ...response.steps.flatMap(s => {
208
- const msgs = [s.message];
209
- for (const tr of s.toolResults) {
210
- const resultStr = typeof tr.result === 'string' ? tr.result : JSON.stringify(tr.result);
211
- msgs.push({ role: 'tool', content: resultStr, toolCallId: tr.toolCallId });
212
- }
213
- return msgs;
214
- }),
215
- ];
216
- await storeRef.append(convId, newMessages);
217
- const result = { text: response.text, steps: response.steps, usage: response.usage, conversationId: convId };
218
- resolveResponse(result);
219
- }
220
- return { stream: generateStream(), response: responsePromise };
474
+ const spec = this.toSpec();
475
+ const persisted = runWithPersistenceStreaming(spec, this.agent.constructor.name, resolveConversationStore, input, options, (effOptions) => runAgentLoopStreaming(this.agent, input, effOptions));
476
+ // Update the wrapper's id once the run completes.
477
+ persisted.response.then((r) => { if (r.conversationId)
478
+ this._conversationId = r.conversationId; }, () => { });
479
+ return persisted;
480
+ }
481
+ /**
482
+ * Translate the wrapper's explicit-form state (`forUser` / `continue`)
483
+ * into a {@link ConversationalSpec}. The explicit chain bypasses the
484
+ * agent's `conversational()` declaration entirely — `forUser` always
485
+ * wins over class defaults.
486
+ */
487
+ toSpec() {
488
+ if (this._conversationId)
489
+ return { user: this._userId ?? '', id: this._conversationId };
490
+ if (this._userId)
491
+ return { user: this._userId };
492
+ throw new Error('[RudderJS AI] ConversableAgent requires forUser() or continue() to be called before prompt().');
221
493
  }
222
494
  }
223
495
  // ─── Anonymous Agent ─────────────────────────────────────
@@ -267,6 +539,76 @@ export function setConversationStore(store) {
267
539
  function resolveConversationStore() {
268
540
  return _conversationStore;
269
541
  }
542
+ /**
543
+ * Streaming counterpart of `Agent.prompt`'s auto-persist branch. The spec
544
+ * resolution is async (since `conversational()` may return a Promise), so
545
+ * we defer the decision into the outer wrapper that handles the inner
546
+ * stream's setup the same way `runWithPersistenceStreaming` does for the
547
+ * persisted path.
548
+ */
549
+ function runStreamWithMaybeAutoPersist(a, input, options) {
550
+ // Synchronous fast path — most agents don't override `conversational()`,
551
+ // so we'd pay an extra microtask boundary on every streaming call. Bail
552
+ // out cheaply when we can prove the call is stateless.
553
+ const declared = a.conversational();
554
+ const isFast = (options?.conversation === false ||
555
+ (declared === false && (options?.conversation === undefined)));
556
+ if (isFast) {
557
+ return runAgentLoopStreaming(a, input, options);
558
+ }
559
+ // Async path — resolve the spec, then dispatch to the persisted or plain stream.
560
+ let resolveResp;
561
+ let rejectResp;
562
+ const responsePromise = new Promise((res, rej) => { resolveResp = res; rejectResp = rej; });
563
+ async function* outer() {
564
+ let spec;
565
+ try {
566
+ spec = await resolveAutoPersistSpec(() => a.conversational(), options?.conversation);
567
+ }
568
+ catch (err) {
569
+ rejectResp(err);
570
+ throw err;
571
+ }
572
+ if (!spec) {
573
+ const inner = runAgentLoopStreaming(a, input, options);
574
+ try {
575
+ for await (const chunk of inner.stream)
576
+ yield chunk;
577
+ }
578
+ catch (err) {
579
+ rejectResp(err);
580
+ throw err;
581
+ }
582
+ try {
583
+ const r = await inner.response;
584
+ resolveResp(r);
585
+ }
586
+ catch (err) {
587
+ rejectResp(err);
588
+ throw err;
589
+ }
590
+ return;
591
+ }
592
+ const persisted = runWithPersistenceStreaming(spec, a.constructor.name, resolveConversationStore, input, options, (effOptions) => runAgentLoopStreaming(a, input, effOptions));
593
+ try {
594
+ for await (const chunk of persisted.stream)
595
+ yield chunk;
596
+ }
597
+ catch (err) {
598
+ rejectResp(err);
599
+ throw err;
600
+ }
601
+ try {
602
+ const r = await persisted.response;
603
+ resolveResp(r);
604
+ }
605
+ catch (err) {
606
+ rejectResp(err);
607
+ throw err;
608
+ }
609
+ }
610
+ return { stream: outer(), response: responsePromise };
611
+ }
270
612
  // ─── Helpers ─────────────────────────────────────────────
271
613
  function getTools(a) {
272
614
  return 'tools' in a && typeof a.tools === 'function'
@@ -513,6 +855,12 @@ function buildAgentResponse(loopCtx) {
513
855
  result.pendingApprovalToolCall = loopCtx.pendingApprovalToolCall;
514
856
  if (loopCtx.resumedToolMessages.length > 0)
515
857
  result.resumedToolMessages = loopCtx.resumedToolMessages;
858
+ // Internal — consumed by the handoff-aware wrapper, then stripped before
859
+ // surfacing to public callers.
860
+ if (loopCtx.pendingHandoff) {
861
+ result._pendingHandoff = loopCtx.pendingHandoff;
862
+ result._carriedMessages = loopCtx.messages;
863
+ }
516
864
  return result;
517
865
  }
518
866
  /**
@@ -535,7 +883,15 @@ async function* executeToolPhase(loopCtx, toolCalls, assistantMessage) {
535
883
  // agent-level override which defaults to `true`. Single-tool batches
536
884
  // route through the serial path either way (no parallelism to gain, and
537
885
  // serial preserves live `tool-update` streaming for that one tool).
538
- const parallel = (options?.parallelTools ?? loopCtx.agent.parallelTools()) && toolCalls.length > 1;
886
+ //
887
+ // Handoffs always force serial dispatch — the parent loop has to halt
888
+ // immediately on the first handoff and synthesize "skipped" results for
889
+ // any sibling calls. Handling that across the parallel classify/replay
890
+ // phases is doable but adds complexity for negligible benefit (the model
891
+ // rarely emits parallel siblings alongside a handoff, and even then,
892
+ // running them while the agent is being torn down is wasted work).
893
+ const hasHandoff = toolCalls.some(tc => isHandoffTool(loopCtx.toolMap.get(tc.name)));
894
+ const parallel = (options?.parallelTools ?? loopCtx.agent.parallelTools()) && toolCalls.length > 1 && !hasHandoff;
539
895
  if (parallel) {
540
896
  yield* runToolPhaseParallel(loopCtx, toolCalls, toolResults);
541
897
  }
@@ -564,6 +920,50 @@ async function* runToolPhaseSerial(loopCtx, toolCalls, toolResults) {
564
920
  yield { type: 'tool-result', toolCall: tc, result: unknownResult };
565
921
  continue;
566
922
  }
923
+ // Handoff — detected before the no-execute (client tool) branch because
924
+ // a handoff tool also has no `execute`, but it has wholly different
925
+ // semantics: pivot control to a new agent instead of pausing for the
926
+ // browser. The first handoff in a step wins; any subsequent tool calls
927
+ // in the same step are skipped with a synthetic "skipped: handed off"
928
+ // tool result so the message log stays well-formed for replay.
929
+ if (loopCtx.stopForHandoff) {
930
+ const skippedResult = 'Skipped: parent agent handed off to another agent.';
931
+ toolResults.push({ toolCallId: tc.id, result: skippedResult });
932
+ messages.push({ role: 'tool', content: skippedResult, toolCallId: tc.id });
933
+ yield { type: 'tool-call', toolCall: tc };
934
+ yield { type: 'tool-result', toolCall: tc, result: skippedResult };
935
+ continue;
936
+ }
937
+ if (isHandoffTool(tool)) {
938
+ const spec = tool.__handoffSpec;
939
+ const validation = validateToolArgs(tool, tc.arguments);
940
+ // Handoff payload defaults to `{ message: string }`; custom schemas
941
+ // are accepted but the loop only uses `args.message` (string) as the
942
+ // transition prompt. Anything else surfaces in the conversation as
943
+ // the args of the synthetic tool-call.
944
+ const args = validation.ok ? validation.value : tc.arguments;
945
+ const transitionMessage = typeof args['message'] === 'string' ? args['message'] : '';
946
+ const handoffResult = `Handed off to ${spec.AgentClass.name}.`;
947
+ toolResults.push({ toolCallId: tc.id, result: handoffResult });
948
+ messages.push({ role: 'tool', content: handoffResult, toolCallId: tc.id });
949
+ yield { type: 'tool-call', toolCall: tc };
950
+ yield { type: 'tool-result', toolCall: tc, result: handoffResult };
951
+ yield {
952
+ type: 'handoff',
953
+ handoff: {
954
+ from: loopCtx.agent.constructor.name,
955
+ to: spec.AgentClass.name,
956
+ ...(transitionMessage ? { message: transitionMessage } : {}),
957
+ },
958
+ };
959
+ loopCtx.pendingHandoff = { spec, transitionMessage, parentToolCallId: tc.id };
960
+ loopCtx.stopForHandoff = true;
961
+ // Do NOT break — keep iterating so any sibling tool calls in this
962
+ // step get their synthetic "skipped" tool results before the loop
963
+ // exits. This preserves message-log invariants for downstream
964
+ // persistence.
965
+ continue;
966
+ }
567
967
  if (!tool.execute) {
568
968
  // Client tool — no server-side handler.
569
969
  if (options?.toolCallStreamingMode === 'stop-on-client-tool') {
@@ -665,6 +1065,16 @@ async function* runToolPhaseSerial(loopCtx, toolCalls, toolResults) {
665
1065
  paused = true;
666
1066
  break;
667
1067
  }
1068
+ if (isPauseForApprovalChunk(step.value)) {
1069
+ loopCtx.pendingApprovalToolCall = {
1070
+ toolCall: step.value.toolCall,
1071
+ isClientTool: step.value.isClientTool,
1072
+ };
1073
+ loopCtx.loopFinishReason = 'tool_approval_required';
1074
+ loopCtx.stopForApproval = true;
1075
+ paused = true;
1076
+ break;
1077
+ }
668
1078
  const updateChunk = { type: 'tool-update', toolCall: tc, update: step.value };
669
1079
  if (middlewares.length > 0) {
670
1080
  const transformed = runOnChunk(middlewares, ctx, updateChunk);
@@ -916,6 +1326,16 @@ async function runToolExecution(loopCtx, outcome) {
916
1326
  paused = true;
917
1327
  break;
918
1328
  }
1329
+ if (isPauseForApprovalChunk(step.value)) {
1330
+ loopCtx.pendingApprovalToolCall = {
1331
+ toolCall: step.value.toolCall,
1332
+ isClientTool: step.value.isClientTool,
1333
+ };
1334
+ loopCtx.loopFinishReason = 'tool_approval_required';
1335
+ loopCtx.stopForApproval = true;
1336
+ paused = true;
1337
+ break;
1338
+ }
919
1339
  const updateChunk = { type: 'tool-update', toolCall: outcome.tc, update: step.value };
920
1340
  if (middlewares.length > 0) {
921
1341
  const transformed = runOnChunk(middlewares, ctx, updateChunk);
@@ -988,6 +1408,7 @@ async function initializeLoop(a, input, options) {
988
1408
  stopForApproval: false,
989
1409
  resumedToolMessages: [],
990
1410
  failoverAttempts: 0,
1411
+ stopForHandoff: false,
991
1412
  };
992
1413
  // Resume server tools left pending by a previous approval round-trip.
993
1414
  {
@@ -1049,7 +1470,195 @@ async function runIterationPrelude(loopCtx, iteration) {
1049
1470
  return { currentModel };
1050
1471
  }
1051
1472
  // ─── Agent Loop (non-streaming) ──────────────────────────
1473
+ /**
1474
+ * Hard ceiling for the number of agent-to-agent handoffs in a single
1475
+ * `prompt()` / `stream()` call. Most workflows hop once or twice (triage →
1476
+ * specialist). Anything beyond this almost certainly means the agents are
1477
+ * cycling — surfacing a clear error beats silently looping until token
1478
+ * budgets explode.
1479
+ */
1480
+ const MAX_HANDOFFS = 5;
1481
+ /**
1482
+ * Public entry point for the non-streaming agent loop. Drives
1483
+ * {@link runAgentLoopOnce} once, then — if the model called a {@link handoff}
1484
+ * tool — constructs the target agent, carries the conversation forward, and
1485
+ * recurses. Steps and usage from each hop are merged; the final `text` and
1486
+ * `finishReason` come from the agent that produced the terminal answer.
1487
+ * `handoffPath` records the chain of class names traversed.
1488
+ */
1052
1489
  async function runAgentLoop(a, input, options) {
1490
+ const onceResult = await runAgentLoopOnce(a, input, options);
1491
+ if (!onceResult._pendingHandoff) {
1492
+ return stripInternal(onceResult);
1493
+ }
1494
+ const merged = await driveHandoffs(a.constructor.name, onceResult, onceResult._pendingHandoff, onceResult._carriedMessages ?? [], options, 0);
1495
+ return merged;
1496
+ }
1497
+ /**
1498
+ * Streaming counterpart to {@link runAgentLoop}. Iterates handoffs and
1499
+ * pivots the stream to the next agent each time the parent ends with a
1500
+ * pending handoff. Chunks from every hop flow through the same returned
1501
+ * `AsyncIterable`; the resolved `response` carries the merged final state.
1502
+ */
1503
+ function runAgentLoopStreaming(a, input, options) {
1504
+ let resolveResponse;
1505
+ let rejectResponse;
1506
+ const responsePromise = new Promise((resolve, reject) => {
1507
+ resolveResponse = resolve;
1508
+ rejectResponse = reject;
1509
+ });
1510
+ async function* generateStream() {
1511
+ let currentAgent = a;
1512
+ let currentInput = input;
1513
+ let currentOpts = options;
1514
+ const mergedSteps = [];
1515
+ const mergedUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
1516
+ const handoffPath = [];
1517
+ let finalResponse;
1518
+ for (let hop = 0; hop <= MAX_HANDOFFS; hop++) {
1519
+ const onceStream = runAgentLoopStreamingOnce(currentAgent, currentInput, currentOpts);
1520
+ // Attach a no-op handler so a rejection from the inner response
1521
+ // promise (e.g. caller-supplied AbortSignal firing mid-stream) is
1522
+ // already observed by the time the `for await` re-throws — without
1523
+ // this, Node logs an unhandledRejection between the stream's throw
1524
+ // and our outer `withRejectOnError`'s catch.
1525
+ onceStream.response.catch(() => { });
1526
+ for await (const chunk of onceStream.stream)
1527
+ yield chunk;
1528
+ const r = await onceStream.response;
1529
+ mergedSteps.push(...r.steps);
1530
+ addUsage(mergedUsage, r.usage);
1531
+ if (r._pendingHandoff && hop < MAX_HANDOFFS) {
1532
+ handoffPath.push(currentAgent.constructor.name);
1533
+ const ChildClass = r._pendingHandoff.spec.AgentClass;
1534
+ currentAgent = new ChildClass();
1535
+ currentInput = r._pendingHandoff.transitionMessage;
1536
+ currentOpts = buildHandoffChildOptions(options, r._carriedMessages ?? []);
1537
+ continue;
1538
+ }
1539
+ if (r._pendingHandoff) {
1540
+ throw new Error(`[RudderJS AI] Exceeded max handoffs (${MAX_HANDOFFS}). Likely a cycle between agents.`);
1541
+ }
1542
+ finalResponse = handoffPath.length === 0
1543
+ ? stripInternal(r)
1544
+ : mergeFinalHandoff(stripInternal(r), mergedSteps, mergedUsage, handoffPath, currentAgent.constructor.name);
1545
+ break;
1546
+ }
1547
+ if (!finalResponse) {
1548
+ throw new Error(`[RudderJS AI] Exceeded max handoffs (${MAX_HANDOFFS}). Likely a cycle between agents.`);
1549
+ }
1550
+ resolveResponse(finalResponse);
1551
+ }
1552
+ async function* withRejectOnError() {
1553
+ try {
1554
+ yield* generateStream();
1555
+ }
1556
+ catch (err) {
1557
+ rejectResponse(err);
1558
+ throw err;
1559
+ }
1560
+ }
1561
+ return {
1562
+ stream: withRejectOnError(),
1563
+ response: responsePromise,
1564
+ };
1565
+ }
1566
+ /**
1567
+ * Iteratively drive pending handoffs, carrying steps + usage forward.
1568
+ * Used by the non-streaming path. (Streaming has its own iterative driver
1569
+ * inline in {@link runAgentLoopStreaming} so chunks can flow as each hop's
1570
+ * loop runs.)
1571
+ */
1572
+ async function driveHandoffs(rootName, rootResult, pending, carriedMessages, origOptions, startHopCount) {
1573
+ const mergedSteps = [...rootResult.steps];
1574
+ const mergedUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
1575
+ addUsage(mergedUsage, rootResult.usage);
1576
+ const handoffPath = [rootName];
1577
+ let currentPending = pending;
1578
+ let currentCarried = carriedMessages;
1579
+ let hopCount = startHopCount;
1580
+ for (;;) {
1581
+ if (hopCount >= MAX_HANDOFFS) {
1582
+ throw new Error(`[RudderJS AI] Exceeded max handoffs (${MAX_HANDOFFS}). Likely a cycle between agents.`);
1583
+ }
1584
+ const ChildClass = currentPending.spec.AgentClass;
1585
+ handoffPath.push(ChildClass.name);
1586
+ const child = new ChildClass();
1587
+ const childOpts = buildHandoffChildOptions(origOptions, currentCarried);
1588
+ const childOnce = await runAgentLoopOnce(child, currentPending.transitionMessage, childOpts);
1589
+ mergedSteps.push(...childOnce.steps);
1590
+ addUsage(mergedUsage, childOnce.usage);
1591
+ if (childOnce._pendingHandoff) {
1592
+ currentPending = childOnce._pendingHandoff;
1593
+ currentCarried = childOnce._carriedMessages ?? [];
1594
+ hopCount++;
1595
+ continue;
1596
+ }
1597
+ return {
1598
+ ...stripInternal(childOnce),
1599
+ steps: mergedSteps,
1600
+ usage: mergedUsage,
1601
+ handoffPath,
1602
+ };
1603
+ }
1604
+ }
1605
+ /** Merge the terminal hop's response with carried steps / usage / path. */
1606
+ function mergeFinalHandoff(terminal, mergedSteps, mergedUsage, pathPrefix, terminalName) {
1607
+ return {
1608
+ ...terminal,
1609
+ steps: mergedSteps,
1610
+ usage: mergedUsage,
1611
+ handoffPath: [...pathPrefix, terminalName],
1612
+ };
1613
+ }
1614
+ /**
1615
+ * Build the {@link AgentPromptOptions} for a child agent invoked via
1616
+ * handoff. The parent's carried message log replaces the child's input
1617
+ * (so the child sees the full conversation up to the handoff point) but
1618
+ * the child still prepends its own `instructions()` as the system message
1619
+ * during {@link initializeLoop}, so we drop the parent's leading system
1620
+ * message to avoid double-prefixing.
1621
+ *
1622
+ * Per-call options that make sense to carry across (signal, attachments,
1623
+ * tool/middleware overrides) are preserved; `messages` and `history` are
1624
+ * deliberately overridden.
1625
+ */
1626
+ function buildHandoffChildOptions(parentOptions, carriedMessages) {
1627
+ const stripped = carriedMessages.length > 0 && carriedMessages[0]?.role === 'system'
1628
+ ? carriedMessages.slice(1)
1629
+ : carriedMessages;
1630
+ // We append the model's transition message as the next user message so
1631
+ // the child has something concrete to respond to (it's also passed as
1632
+ // `currentInput` below — but feeding it via `messages` mode keeps the
1633
+ // history coherent and prevents `initializeLoop` from also prepending
1634
+ // an `input` user message).
1635
+ return {
1636
+ ...(parentOptions ?? {}),
1637
+ messages: stripped,
1638
+ };
1639
+ }
1640
+ /** Strip the internal `_pendingHandoff` / `_carriedMessages` fields before surfacing the response to public callers. */
1641
+ function stripInternal(r) {
1642
+ const out = {
1643
+ text: r.text,
1644
+ steps: r.steps,
1645
+ usage: r.usage,
1646
+ };
1647
+ if (r.conversationId !== undefined)
1648
+ out.conversationId = r.conversationId;
1649
+ if (r.finishReason !== undefined)
1650
+ out.finishReason = r.finishReason;
1651
+ if (r.pendingClientToolCalls !== undefined)
1652
+ out.pendingClientToolCalls = r.pendingClientToolCalls;
1653
+ if (r.pendingApprovalToolCall !== undefined)
1654
+ out.pendingApprovalToolCall = r.pendingApprovalToolCall;
1655
+ if (r.resumedToolMessages !== undefined)
1656
+ out.resumedToolMessages = r.resumedToolMessages;
1657
+ if (r.handoffPath !== undefined)
1658
+ out.handoffPath = r.handoffPath;
1659
+ return out;
1660
+ }
1661
+ async function runAgentLoopOnce(a, input, options) {
1053
1662
  const { loopCtx, stopConditions } = await initializeLoop(a, input, options);
1054
1663
  const { ctx, middlewares, messages, steps, totalUsage } = loopCtx;
1055
1664
  try {
@@ -1093,7 +1702,7 @@ async function runAgentLoop(a, input, options) {
1093
1702
  };
1094
1703
  steps.push(step);
1095
1704
  emitObserverStepCompleted(loopCtx, iteration, false);
1096
- if (loopCtx.stopForClientTools || loopCtx.stopForApproval)
1705
+ if (loopCtx.stopForClientTools || loopCtx.stopForApproval || loopCtx.stopForHandoff)
1097
1706
  break;
1098
1707
  const shouldStop = stopConditions.some(cond => cond({ steps, iteration, lastMessage: response.message }));
1099
1708
  if (shouldStop || response.finishReason !== 'tool_calls') {
@@ -1117,7 +1726,7 @@ async function runAgentLoop(a, input, options) {
1117
1726
  return result;
1118
1727
  }
1119
1728
  // ─── Agent Loop (streaming) ──────────────────────────────
1120
- function runAgentLoopStreaming(a, input, options) {
1729
+ function runAgentLoopStreamingOnce(a, input, options) {
1121
1730
  let resolveResponse;
1122
1731
  let rejectResponse;
1123
1732
  const responsePromise = new Promise((resolve, reject) => {
@@ -1223,7 +1832,7 @@ function runAgentLoopStreaming(a, input, options) {
1223
1832
  };
1224
1833
  steps.push(step);
1225
1834
  emitObserverStepCompleted(loopCtx, iteration, true);
1226
- if (loopCtx.stopForClientTools || loopCtx.stopForApproval)
1835
+ if (loopCtx.stopForClientTools || loopCtx.stopForApproval || loopCtx.stopForHandoff)
1227
1836
  break;
1228
1837
  const shouldStop = stopConditions.some(cond => cond({ steps, iteration, lastMessage: step.message }));
1229
1838
  if (shouldStop || finishReason !== 'tool_calls')