@rudderjs/ai 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +74 -2
  2. package/boost/guidelines.md +24 -1
  3. package/dist/agent.d.ts +104 -1
  4. package/dist/agent.d.ts.map +1 -1
  5. package/dist/agent.js +377 -79
  6. package/dist/agent.js.map +1 -1
  7. package/dist/conversation-persistence.d.ts +46 -0
  8. package/dist/conversation-persistence.d.ts.map +1 -0
  9. package/dist/conversation-persistence.js +152 -0
  10. package/dist/conversation-persistence.js.map +1 -0
  11. package/dist/conversation.d.ts +2 -7
  12. package/dist/conversation.d.ts.map +1 -1
  13. package/dist/conversation.js +3 -1
  14. package/dist/conversation.js.map +1 -1
  15. package/dist/index.d.ts +2 -1
  16. package/dist/index.d.ts.map +1 -1
  17. package/dist/index.js +2 -0
  18. package/dist/index.js.map +1 -1
  19. package/dist/providers/anthropic.d.ts +3 -0
  20. package/dist/providers/anthropic.d.ts.map +1 -1
  21. package/dist/providers/anthropic.js +61 -10
  22. package/dist/providers/anthropic.js.map +1 -1
  23. package/dist/providers/google-cache-registry.d.ts +145 -0
  24. package/dist/providers/google-cache-registry.d.ts.map +1 -0
  25. package/dist/providers/google-cache-registry.js +209 -0
  26. package/dist/providers/google-cache-registry.js.map +1 -0
  27. package/dist/providers/google.d.ts +21 -2
  28. package/dist/providers/google.d.ts.map +1 -1
  29. package/dist/providers/google.js +90 -36
  30. package/dist/providers/google.js.map +1 -1
  31. package/dist/providers/openai.d.ts +10 -1
  32. package/dist/providers/openai.d.ts.map +1 -1
  33. package/dist/providers/openai.js +57 -6
  34. package/dist/providers/openai.js.map +1 -1
  35. package/dist/server/provider.d.ts +8 -0
  36. package/dist/server/provider.d.ts.map +1 -1
  37. package/dist/server/provider.js +17 -1
  38. package/dist/server/provider.js.map +1 -1
  39. package/dist/sub-agent-run-store.d.ts +106 -0
  40. package/dist/sub-agent-run-store.d.ts.map +1 -0
  41. package/dist/sub-agent-run-store.js +80 -0
  42. package/dist/sub-agent-run-store.js.map +1 -0
  43. package/dist/types.d.ts +174 -6
  44. package/dist/types.d.ts.map +1 -1
  45. package/dist/util/hash.d.ts +11 -0
  46. package/dist/util/hash.d.ts.map +1 -0
  47. package/dist/util/hash.js +23 -0
  48. package/dist/util/hash.js.map +1 -0
  49. package/package.json +1 -1
package/dist/agent.js CHANGED
@@ -1,8 +1,9 @@
1
1
  import { z } from 'zod';
2
2
  import { AiRegistry } from './registry.js';
3
- import { isPauseForClientToolsChunk, toolDefinition, toolToSchema } from './tool.js';
3
+ import { isPauseForClientToolsChunk, pauseForClientTools, toolDefinition, toolToSchema } from './tool.js';
4
4
  import { attachmentsToContentParts, getMessageText } from './attachment.js';
5
5
  import { QueuedPromptBuilder } from './queue-job.js';
6
+ import { resolveAutoPersistSpec, runWithPersistence, runWithPersistenceStreaming, } from './conversation-persistence.js';
6
7
  import { runOnConfig, runOnChunk, runOnBeforeToolCall, runOnAfterToolCall, runSequential, runOnUsage, runOnAbort, runOnError, } from './middleware.js';
7
8
  // ─── AI Observer (lazy accessor) ─────────────────────────
8
9
  function _getAiObservers() {
@@ -55,6 +56,60 @@ export class Agent {
55
56
  temperature() { return undefined; }
56
57
  /** Max tokens for response */
57
58
  maxTokens() { return undefined; }
59
+ /**
60
+ * Declarative prompt-cache configuration.
61
+ *
62
+ * Override on a subclass to mark stable parts of the prompt as cacheable
63
+ * — provider adapters translate to native primitives (Anthropic
64
+ * `cache_control`, OpenAI `prompt_cache_key`, Google `cachedContent`)
65
+ * so cache hits can save 50–90% on input tokens for long system prompts,
66
+ * tool definitions, or stable conversation context.
67
+ *
68
+ * Returning `undefined` (the default) means no caching. Per-call override
69
+ * via `agent.prompt(input, { cache: false })` disables caching for that
70
+ * call; passing a {@link CacheableConfig} for `cache` replaces the agent
71
+ * default for that call.
72
+ *
73
+ * @example
74
+ * class SupportAgent extends Agent {
75
+ * instructions() { return LONG_SYSTEM_PROMPT }
76
+ * tools() { return [tool1, tool2, tool3] }
77
+ * cacheable() {
78
+ * return { instructions: true, tools: true }
79
+ * }
80
+ * }
81
+ */
82
+ cacheable() { return undefined; }
83
+ /**
84
+ * Opt into auto-persisted conversation behavior. Override on a subclass
85
+ * to declare *which* user owns the thread and (optionally) which
86
+ * specific thread, and the framework will load history before each
87
+ * `prompt()`/`stream()` call and append the new turn after it — without
88
+ * any caller having to remember `forUser()` / `continue()`.
89
+ *
90
+ * Returning `false` (the default) disables auto-persist; the agent runs
91
+ * stateless. Returning a {@link ConversationalSpec} opts in:
92
+ *
93
+ * @example
94
+ * class ChatAgent extends Agent {
95
+ * conversational() {
96
+ * return { user: Auth.user()?.id } // null user → falsy → opt-out
97
+ * }
98
+ * }
99
+ *
100
+ * await new ChatAgent().prompt('Hi') // auto-loads + auto-saves
101
+ *
102
+ * **Precedence (high → low):**
103
+ * 1. Explicit `agent.forUser(id).prompt()` / `agent.continue(id).prompt()`
104
+ * 2. Per-call `prompt(input, { conversation: false | {...} })`
105
+ * 3. This method's return value
106
+ *
107
+ * Async returns are supported — useful when the user identity is fetched
108
+ * from an async DI binding.
109
+ */
110
+ conversational() {
111
+ return false;
112
+ }
58
113
  /**
59
114
  * Default for `AgentPromptOptions.parallelTools`. When `true` (default),
60
115
  * multiple tool calls within a single step run their `execute()` functions
@@ -64,11 +119,15 @@ export class Agent {
64
119
  parallelTools() { return true; }
65
120
  /** Run the agent with a prompt (non-streaming) */
66
121
  async prompt(input, options) {
122
+ const spec = await resolveAutoPersistSpec(() => this.conversational(), options?.conversation);
123
+ if (spec) {
124
+ return runWithPersistence(spec, this.constructor.name, resolveConversationStore, input, options, (effOptions) => runAgentLoop(this, input, effOptions));
125
+ }
67
126
  return runAgentLoop(this, input, options);
68
127
  }
69
128
  /** Run the agent with a prompt (streaming) */
70
129
  stream(input, options) {
71
- return runAgentLoopStreaming(this, input, options);
130
+ return runStreamWithMaybeAutoPersist(this, input, options);
72
131
  }
73
132
  /** Queue the prompt for background execution */
74
133
  queue(input, options) {
@@ -83,17 +142,201 @@ export class Agent {
83
142
  return new ConversableAgent(this).continue(conversationId);
84
143
  }
85
144
  asTool(options) {
145
+ if (options.suspendable && !options.streaming) {
146
+ throw new Error('[RudderJS AI] asTool: `suspendable` requires `streaming: true` (or a projector). Silent suspend would leave the parent UI with no progress signal between sub-agent invocations.');
147
+ }
86
148
  const schema = options.inputSchema ?? z.object({ prompt: z.string() });
87
149
  const promptOf = options.prompt ?? ((input) => input.prompt);
88
150
  const modelOutput = options.modelOutput ?? ((response) => response.text);
151
+ if (!options.streaming) {
152
+ // 1.2.0 zero-config path — single prompt() call, single AgentResponse out.
153
+ return toolDefinition({
154
+ name: options.name,
155
+ description: options.description,
156
+ inputSchema: schema,
157
+ })
158
+ .server((input) => this.prompt(promptOf(input)))
159
+ .modelOutput(modelOutput);
160
+ }
161
+ const project = options.streaming === true ? defaultSubAgentProjector : options.streaming;
162
+ const innerAgent = this; // eslint-disable-line @typescript-eslint/no-this-alias
163
+ const agentName = options.name;
164
+ const suspendable = options.suspendable;
165
+ const generatorExecute = async function* (input) {
166
+ const userPrompt = promptOf(input);
167
+ yield { kind: 'agent_start', agentName };
168
+ const streamOpts = suspendable
169
+ ? { toolCallStreamingMode: 'stop-on-client-tool' }
170
+ : undefined;
171
+ const { stream, response } = innerAgent.stream(userPrompt, streamOpts);
172
+ for await (const chunk of stream) {
173
+ const update = project(chunk);
174
+ if (update)
175
+ yield update;
176
+ }
177
+ const result = await response;
178
+ if (suspendable &&
179
+ result.finishReason === 'client_tool_calls' &&
180
+ result.pendingClientToolCalls?.length) {
181
+ const subRunId = generateSubRunId();
182
+ const snapshot = {
183
+ messages: buildSubAgentSnapshotMessages(userPrompt, result),
184
+ pendingToolCallIds: result.pendingClientToolCalls.map((tc) => tc.id),
185
+ stepsSoFar: result.steps.length,
186
+ tokensSoFar: result.usage?.totalTokens ?? 0,
187
+ };
188
+ await suspendable.runStore.store(subRunId, snapshot);
189
+ yield { kind: 'subagent_paused', subRunId, pendingToolCallIds: snapshot.pendingToolCallIds };
190
+ yield pauseForClientTools(result.pendingClientToolCalls, subRunId);
191
+ // Unreachable — the parent loop halts iteration after the pause chunk.
192
+ return undefined;
193
+ }
194
+ yield {
195
+ kind: 'agent_done',
196
+ steps: result.steps.length,
197
+ tokens: result.usage?.totalTokens ?? 0,
198
+ };
199
+ return result;
200
+ };
89
201
  return toolDefinition({
90
202
  name: options.name,
91
203
  description: options.description,
92
204
  inputSchema: schema,
93
205
  })
94
- .server((input) => this.prompt(promptOf(input)))
206
+ .server(generatorExecute)
95
207
  .modelOutput(modelOutput);
96
208
  }
209
+ /**
210
+ * Resume a sub-agent run that previously paused with
211
+ * `pauseForClientTools` (typically from {@link Agent.asTool} with
212
+ * `suspendable: { runStore }` set). Loads the snapshot, validates the
213
+ * incoming tool-result ids against the pending set, and re-runs the
214
+ * inner loop with those results appended.
215
+ *
216
+ * Returns either a `'completed'` result (the inner agent finished) or
217
+ * a `'paused'` continuation pointing at a fresh `subRunId` for the
218
+ * next round-trip.
219
+ *
220
+ * @example
221
+ * const r = await Agent.resumeAsTool(subRunId, browserResults, { runStore, agent: subAgent })
222
+ * if (r.kind === 'completed') {
223
+ * feedToolResultBackToParent(r.response.text)
224
+ * } else {
225
+ * emitPendingClientToolsSse(r.subRunId, r.pendingToolCallIds)
226
+ * }
227
+ */
228
+ static async resumeAsTool(subRunId, clientToolResults, options) {
229
+ const snapshot = await options.runStore.consume(subRunId);
230
+ if (!snapshot) {
231
+ throw new Error(`[RudderJS AI] resumeAsTool: subRunId "${subRunId}" expired or never existed.`);
232
+ }
233
+ // Forgery guard — every incoming tool-result id must be in the pending set.
234
+ const pending = new Set(snapshot.pendingToolCallIds);
235
+ const seen = new Set();
236
+ for (const r of clientToolResults) {
237
+ if (!pending.has(r.toolCallId)) {
238
+ throw new Error(`[RudderJS AI] resumeAsTool: toolCallId "${r.toolCallId}" was not in the pending set.`);
239
+ }
240
+ if (seen.has(r.toolCallId)) {
241
+ throw new Error(`[RudderJS AI] resumeAsTool: duplicate result for toolCallId "${r.toolCallId}".`);
242
+ }
243
+ seen.add(r.toolCallId);
244
+ }
245
+ // Append client tool-result messages to the snapshot, in incoming order.
246
+ const messages = [...snapshot.messages];
247
+ for (const r of clientToolResults) {
248
+ messages.push({
249
+ role: 'tool',
250
+ content: typeof r.result === 'string' ? r.result : JSON.stringify(r.result),
251
+ toolCallId: r.toolCallId,
252
+ });
253
+ }
254
+ const result = await options.agent.prompt('', {
255
+ messages,
256
+ toolCallStreamingMode: 'stop-on-client-tool',
257
+ });
258
+ if (result.finishReason === 'client_tool_calls' &&
259
+ result.pendingClientToolCalls?.length) {
260
+ const newSubRunId = generateSubRunId();
261
+ const newSnapshot = {
262
+ messages: buildResumeSnapshotMessages(messages, result),
263
+ pendingToolCallIds: result.pendingClientToolCalls.map((tc) => tc.id),
264
+ stepsSoFar: snapshot.stepsSoFar + result.steps.length,
265
+ tokensSoFar: snapshot.tokensSoFar + (result.usage?.totalTokens ?? 0),
266
+ ...(snapshot.meta !== undefined ? { meta: snapshot.meta } : {}),
267
+ };
268
+ await options.runStore.store(newSubRunId, newSnapshot);
269
+ return {
270
+ kind: 'paused',
271
+ subRunId: newSubRunId,
272
+ pendingToolCallIds: newSnapshot.pendingToolCallIds,
273
+ };
274
+ }
275
+ return { kind: 'completed', response: result };
276
+ }
277
+ }
278
+ /**
279
+ * Default projection from inner-agent stream chunks to {@link SubAgentUpdate}
280
+ * events. Emits one `tool_call` per inner `tool-call` chunk; everything
281
+ * else is suppressed (the wrapping execute emits the `agent_start` /
282
+ * `agent_done` bookends and the suspend path emits `subagent_paused`).
283
+ *
284
+ * Hosts wanting different cadence (e.g. surfacing `text-delta` previews
285
+ * or per-step usage) pass `streaming: chunk => …` and own the discriminator.
286
+ */
287
+ function defaultSubAgentProjector(chunk) {
288
+ if (chunk.type === 'tool-call' && chunk.toolCall?.name) {
289
+ return {
290
+ kind: 'tool_call',
291
+ tool: chunk.toolCall.name,
292
+ ...(chunk.toolCall.arguments ? { args: chunk.toolCall.arguments } : {}),
293
+ };
294
+ }
295
+ return null;
296
+ }
297
+ /**
298
+ * Reconstruct the inner-agent message history at the point the loop
299
+ * paused, so a subsequent {@link Agent.resumeAsTool} can rerun the loop
300
+ * with the appended client tool results. The shape is `[user, …(message
301
+ * + serverToolResults)*]` — system messages are omitted because the
302
+ * `messages` mode of the agent loop prepends `system` itself.
303
+ *
304
+ * Each step's `message` includes ALL `toolCalls` (server + client).
305
+ * Server-side `toolResults` are interleaved; client-side calls remain
306
+ * unfulfilled until resume appends their results.
307
+ */
308
+ function buildSubAgentSnapshotMessages(userPrompt, response) {
309
+ const out = [{ role: 'user', content: userPrompt }];
310
+ for (const step of response.steps) {
311
+ out.push(step.message);
312
+ for (const tr of step.toolResults) {
313
+ const resultStr = typeof tr.result === 'string' ? tr.result : JSON.stringify(tr.result);
314
+ out.push({ role: 'tool', content: resultStr, toolCallId: tr.toolCallId });
315
+ }
316
+ }
317
+ return out;
318
+ }
319
+ /**
320
+ * Snapshot reconstruction for a resume-time pause. The `priorMessages`
321
+ * already include the original user prompt + every step prior to the
322
+ * resume call. Append the freshly-completed steps' messages and any
323
+ * server-side tool results so the next resume sees the full history.
324
+ */
325
+ function buildResumeSnapshotMessages(priorMessages, response) {
326
+ const out = [...priorMessages];
327
+ for (const step of response.steps) {
328
+ out.push(step.message);
329
+ for (const tr of step.toolResults) {
330
+ const resultStr = typeof tr.result === 'string' ? tr.result : JSON.stringify(tr.result);
331
+ out.push({ role: 'tool', content: resultStr, toolCallId: tr.toolCallId });
332
+ }
333
+ }
334
+ return out;
335
+ }
336
+ function generateSubRunId() {
337
+ if (typeof globalThis.crypto?.randomUUID === 'function')
338
+ return globalThis.crypto.randomUUID();
339
+ return `sub-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 12)}`;
97
340
  }
98
341
  // ─── Conversable Agent (conversation persistence) ───────
99
342
  /**
@@ -116,84 +359,35 @@ export class ConversableAgent {
116
359
  return this;
117
360
  }
118
361
  async prompt(input, options) {
119
- const store = resolveConversationStore();
120
- if (!store)
121
- throw new Error('[RudderJS AI] No ConversationStore registered. Register one via the DI container with key "ai.conversations".');
122
- // Load or create conversation
123
- let history = options?.history ?? [];
124
- if (this._conversationId) {
125
- history = [...(await store.load(this._conversationId)), ...history];
126
- }
127
- else {
128
- const meta = this._userId ? { userId: this._userId } : undefined;
129
- this._conversationId = await store.create(undefined, meta);
130
- }
131
- const response = await runAgentLoop(this.agent, input, { ...options, history });
132
- // Persist messages
133
- const newMessages = [
134
- { role: 'user', content: input },
135
- ...response.steps.flatMap(s => {
136
- const msgs = [s.message];
137
- for (const tr of s.toolResults) {
138
- const resultStr = typeof tr.result === 'string' ? tr.result : JSON.stringify(tr.result);
139
- msgs.push({ role: 'tool', content: resultStr, toolCallId: tr.toolCallId });
140
- }
141
- return msgs;
142
- }),
143
- ];
144
- await store.append(this._conversationId, newMessages);
145
- return { text: response.text, steps: response.steps, usage: response.usage, conversationId: this._conversationId };
362
+ const spec = this.toSpec();
363
+ return runWithPersistence(spec, this.agent.constructor.name, resolveConversationStore, input, options, (effOptions) => runAgentLoop(this.agent, input, effOptions)).then((r) => {
364
+ // Track the resolved id back on the wrapper so a subsequent
365
+ // `wrapper.prompt()` call resumes the same thread.
366
+ if (r.conversationId)
367
+ this._conversationId = r.conversationId;
368
+ return r;
369
+ });
146
370
  }
147
371
  stream(input, options) {
148
- const store = resolveConversationStore();
149
- if (!store)
150
- throw new Error('[RudderJS AI] No ConversationStore registered. Register one via the DI container with key "ai.conversations".');
151
- // We need to handle async setup, so wrap the streaming
152
- let resolveReady;
153
- const ready = new Promise(r => { resolveReady = r; });
154
- let loadedHistory = [];
155
- let convId = this._conversationId;
156
- // Kick off async setup
157
- const setupPromise = (async () => {
158
- if (convId) {
159
- loadedHistory = await store.load(convId);
160
- }
161
- else {
162
- const meta = this._userId ? { userId: this._userId } : undefined;
163
- convId = await store.create(undefined, meta);
164
- this._conversationId = convId;
165
- }
166
- resolveReady();
167
- })();
168
- let resolveResponse;
169
- const responsePromise = new Promise(r => { resolveResponse = r; });
170
- const self = this; // eslint-disable-line @typescript-eslint/no-this-alias
171
- const storeRef = store;
172
- async function* generateStream() {
173
- await setupPromise;
174
- const history = [...loadedHistory, ...(options?.history ?? [])];
175
- const inner = runAgentLoopStreaming(self.agent, input, { ...options, history });
176
- for await (const chunk of inner.stream) {
177
- yield chunk;
178
- }
179
- const response = await inner.response;
180
- // Persist messages
181
- const newMessages = [
182
- { role: 'user', content: input },
183
- ...response.steps.flatMap(s => {
184
- const msgs = [s.message];
185
- for (const tr of s.toolResults) {
186
- const resultStr = typeof tr.result === 'string' ? tr.result : JSON.stringify(tr.result);
187
- msgs.push({ role: 'tool', content: resultStr, toolCallId: tr.toolCallId });
188
- }
189
- return msgs;
190
- }),
191
- ];
192
- await storeRef.append(convId, newMessages);
193
- const result = { text: response.text, steps: response.steps, usage: response.usage, conversationId: convId };
194
- resolveResponse(result);
195
- }
196
- return { stream: generateStream(), response: responsePromise };
372
+ const spec = this.toSpec();
373
+ const persisted = runWithPersistenceStreaming(spec, this.agent.constructor.name, resolveConversationStore, input, options, (effOptions) => runAgentLoopStreaming(this.agent, input, effOptions));
374
+ // Update the wrapper's id once the run completes.
375
+ persisted.response.then((r) => { if (r.conversationId)
376
+ this._conversationId = r.conversationId; }, () => { });
377
+ return persisted;
378
+ }
379
+ /**
380
+ * Translate the wrapper's explicit-form state (`forUser` / `continue`)
381
+ * into a {@link ConversationalSpec}. The explicit chain bypasses the
382
+ * agent's `conversational()` declaration entirely — `forUser` always
383
+ * wins over class defaults.
384
+ */
385
+ toSpec() {
386
+ if (this._conversationId)
387
+ return { user: this._userId ?? '', id: this._conversationId };
388
+ if (this._userId)
389
+ return { user: this._userId };
390
+ throw new Error('[RudderJS AI] ConversableAgent requires forUser() or continue() to be called before prompt().');
197
391
  }
198
392
  }
199
393
  // ─── Anonymous Agent ─────────────────────────────────────
@@ -243,6 +437,76 @@ export function setConversationStore(store) {
243
437
  function resolveConversationStore() {
244
438
  return _conversationStore;
245
439
  }
440
+ /**
441
+ * Streaming counterpart of `Agent.prompt`'s auto-persist branch. The spec
442
+ * resolution is async (since `conversational()` may return a Promise), so
443
+ * we defer the decision into the outer wrapper that handles the inner
444
+ * stream's setup the same way `runWithPersistenceStreaming` does for the
445
+ * persisted path.
446
+ */
447
+ function runStreamWithMaybeAutoPersist(a, input, options) {
448
+ // Synchronous fast path — most agents don't override `conversational()`,
449
+ // so we'd pay an extra microtask boundary on every streaming call. Bail
450
+ // out cheaply when we can prove the call is stateless.
451
+ const declared = a.conversational();
452
+ const isFast = (options?.conversation === false ||
453
+ (declared === false && (options?.conversation === undefined)));
454
+ if (isFast) {
455
+ return runAgentLoopStreaming(a, input, options);
456
+ }
457
+ // Async path — resolve the spec, then dispatch to the persisted or plain stream.
458
+ let resolveResp;
459
+ let rejectResp;
460
+ const responsePromise = new Promise((res, rej) => { resolveResp = res; rejectResp = rej; });
461
+ async function* outer() {
462
+ let spec;
463
+ try {
464
+ spec = await resolveAutoPersistSpec(() => a.conversational(), options?.conversation);
465
+ }
466
+ catch (err) {
467
+ rejectResp(err);
468
+ throw err;
469
+ }
470
+ if (!spec) {
471
+ const inner = runAgentLoopStreaming(a, input, options);
472
+ try {
473
+ for await (const chunk of inner.stream)
474
+ yield chunk;
475
+ }
476
+ catch (err) {
477
+ rejectResp(err);
478
+ throw err;
479
+ }
480
+ try {
481
+ const r = await inner.response;
482
+ resolveResp(r);
483
+ }
484
+ catch (err) {
485
+ rejectResp(err);
486
+ throw err;
487
+ }
488
+ return;
489
+ }
490
+ const persisted = runWithPersistenceStreaming(spec, a.constructor.name, resolveConversationStore, input, options, (effOptions) => runAgentLoopStreaming(a, input, effOptions));
491
+ try {
492
+ for await (const chunk of persisted.stream)
493
+ yield chunk;
494
+ }
495
+ catch (err) {
496
+ rejectResp(err);
497
+ throw err;
498
+ }
499
+ try {
500
+ const r = await persisted.response;
501
+ resolveResp(r);
502
+ }
503
+ catch (err) {
504
+ rejectResp(err);
505
+ throw err;
506
+ }
507
+ }
508
+ return { stream: outer(), response: responsePromise };
509
+ }
246
510
  // ─── Helpers ─────────────────────────────────────────────
247
511
  function getTools(a) {
248
512
  return 'tools' in a && typeof a.tools === 'function'
@@ -328,6 +592,7 @@ async function runFailover(loopCtx, currentModel, call) {
328
592
  temperature: loopCtx.agent.temperature(),
329
593
  maxTokens: loopCtx.agent.maxTokens(),
330
594
  signal: loopCtx.options?.signal,
595
+ cache: resolveCacheMarkers(loopCtx.agent, loopCtx.options),
331
596
  };
332
597
  return await call(adapter, modelId, reqOptions);
333
598
  }
@@ -344,6 +609,39 @@ async function runFailover(loopCtx, currentModel, call) {
344
609
  }
345
610
  throw lastError ?? new Error('No provider available');
346
611
  }
612
+ /**
613
+ * Merge agent-level `cacheable()` declaration with per-call override.
614
+ *
615
+ * - Per-call `cache: false` → returns `undefined` (caching disabled).
616
+ * - Per-call `cache: { ... }` → replaces the agent default.
617
+ * - Per-call omitted → uses `agent.cacheable()` unchanged.
618
+ *
619
+ * Returns `undefined` when no markers are set so the provider request
620
+ * carries no `cache` field at all.
621
+ */
622
+ function resolveCacheMarkers(agent, options) {
623
+ if (options && options.cache === false)
624
+ return undefined;
625
+ const perCall = options?.cache === false ? undefined : options?.cache;
626
+ const config = perCall ?? agent.cacheable();
627
+ if (!config)
628
+ return undefined;
629
+ const markers = {};
630
+ if (config.instructions)
631
+ markers.instructions = true;
632
+ if (config.tools)
633
+ markers.tools = true;
634
+ if (config.messages !== undefined && config.messages > 0) {
635
+ markers.messages = Math.floor(config.messages);
636
+ }
637
+ if (config.ttl)
638
+ markers.ttl = config.ttl;
639
+ // ttl alone with no region markers is meaningless — drop it.
640
+ const hasRegion = markers.instructions || markers.tools || (markers.messages && markers.messages > 0);
641
+ if (!hasRegion)
642
+ return undefined;
643
+ return markers;
644
+ }
347
645
  /** Emit the `agent.failed` observer event from the shared loop state. */
348
646
  function emitObserverFailed(loopCtx, err, streaming) {
349
647
  const obs = _getAiObservers();