@trigger.dev/sdk 4.5.0-rc.6 → 4.5.0-rc.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commonjs/v3/ai.d.ts +171 -5
- package/dist/commonjs/v3/ai.js +309 -22
- package/dist/commonjs/v3/ai.js.map +1 -1
- package/dist/commonjs/v3/chat-server.d.ts +8 -0
- package/dist/commonjs/v3/chat-server.js +32 -10
- package/dist/commonjs/v3/chat-server.js.map +1 -1
- package/dist/commonjs/v3/chat-server.test.js +51 -0
- package/dist/commonjs/v3/chat-server.test.js.map +1 -1
- package/dist/commonjs/v3/createStartSessionAction.test.js +30 -0
- package/dist/commonjs/v3/createStartSessionAction.test.js.map +1 -1
- package/dist/commonjs/v3/sessions.d.ts +3 -2
- package/dist/commonjs/v3/sessions.js +3 -2
- package/dist/commonjs/v3/sessions.js.map +1 -1
- package/dist/commonjs/version.js +1 -1
- package/dist/esm/v3/ai.d.ts +171 -5
- package/dist/esm/v3/ai.js +309 -22
- package/dist/esm/v3/ai.js.map +1 -1
- package/dist/esm/v3/chat-server.d.ts +8 -0
- package/dist/esm/v3/chat-server.js +32 -10
- package/dist/esm/v3/chat-server.js.map +1 -1
- package/dist/esm/v3/chat-server.test.js +51 -0
- package/dist/esm/v3/chat-server.test.js.map +1 -1
- package/dist/esm/v3/createStartSessionAction.test.js +30 -0
- package/dist/esm/v3/createStartSessionAction.test.js.map +1 -1
- package/dist/esm/v3/sessions.d.ts +3 -2
- package/dist/esm/v3/sessions.js +3 -2
- package/dist/esm/v3/sessions.js.map +1 -1
- package/dist/esm/version.js +1 -1
- package/docs/ai/prompts.mdx +430 -0
- package/docs/ai-chat/actions.mdx +115 -0
- package/docs/ai-chat/anatomy.mdx +71 -0
- package/docs/ai-chat/backend.mdx +817 -0
- package/docs/ai-chat/background-injection.mdx +221 -0
- package/docs/ai-chat/changelog.mdx +850 -0
- package/docs/ai-chat/chat-local.mdx +174 -0
- package/docs/ai-chat/client-protocol.mdx +1081 -0
- package/docs/ai-chat/compaction.mdx +411 -0
- package/docs/ai-chat/custom-agents.mdx +364 -0
- package/docs/ai-chat/error-handling.mdx +415 -0
- package/docs/ai-chat/fast-starts.mdx +672 -0
- package/docs/ai-chat/frontend.mdx +580 -0
- package/docs/ai-chat/how-it-works.mdx +230 -0
- package/docs/ai-chat/lifecycle-hooks.mdx +530 -0
- package/docs/ai-chat/mcp.mdx +101 -0
- package/docs/ai-chat/overview.mdx +90 -0
- package/docs/ai-chat/patterns/branching-conversations.mdx +284 -0
- package/docs/ai-chat/patterns/code-sandbox.mdx +126 -0
- package/docs/ai-chat/patterns/database-persistence.mdx +414 -0
- package/docs/ai-chat/patterns/human-in-the-loop.mdx +275 -0
- package/docs/ai-chat/patterns/large-payloads.mdx +169 -0
- package/docs/ai-chat/patterns/oom-resilience.mdx +120 -0
- package/docs/ai-chat/patterns/persistence-and-replay.mdx +211 -0
- package/docs/ai-chat/patterns/recovery-boot.mdx +230 -0
- package/docs/ai-chat/patterns/skills.mdx +221 -0
- package/docs/ai-chat/patterns/sub-agents.mdx +383 -0
- package/docs/ai-chat/patterns/tool-result-auditing.mdx +148 -0
- package/docs/ai-chat/patterns/trusted-edge-signals.mdx +337 -0
- package/docs/ai-chat/patterns/version-upgrades.mdx +172 -0
- package/docs/ai-chat/pending-messages.mdx +343 -0
- package/docs/ai-chat/prompt-caching.mdx +206 -0
- package/docs/ai-chat/quick-start.mdx +161 -0
- package/docs/ai-chat/reference.mdx +909 -0
- package/docs/ai-chat/server-chat.mdx +263 -0
- package/docs/ai-chat/sessions.mdx +333 -0
- package/docs/ai-chat/testing.mdx +682 -0
- package/docs/ai-chat/tools.mdx +191 -0
- package/docs/ai-chat/types.mdx +242 -0
- package/docs/ai-chat/upgrade-guide.mdx +515 -0
- package/docs/apikeys.mdx +54 -0
- package/docs/building-with-ai.mdx +261 -0
- package/docs/bulk-actions.mdx +49 -0
- package/docs/changelog.mdx +6 -0
- package/docs/cli-deploy-commands.mdx +9 -0
- package/docs/cli-dev-commands.mdx +9 -0
- package/docs/cli-dev.mdx +8 -0
- package/docs/cli-init-commands.mdx +58 -0
- package/docs/cli-introduction.mdx +25 -0
- package/docs/cli-list-profiles-commands.mdx +42 -0
- package/docs/cli-login-commands.mdx +33 -0
- package/docs/cli-logout-commands.mdx +33 -0
- package/docs/cli-preview-archive.mdx +59 -0
- package/docs/cli-promote-commands.mdx +9 -0
- package/docs/cli-switch.mdx +43 -0
- package/docs/cli-update-commands.mdx +42 -0
- package/docs/cli-whoami-commands.mdx +33 -0
- package/docs/community.mdx +6 -0
- package/docs/config/config-file.mdx +602 -0
- package/docs/config/extensions/additionalFiles.mdx +38 -0
- package/docs/config/extensions/additionalPackages.mdx +40 -0
- package/docs/config/extensions/aptGet.mdx +34 -0
- package/docs/config/extensions/audioWaveform.mdx +20 -0
- package/docs/config/extensions/custom.mdx +380 -0
- package/docs/config/extensions/emitDecoratorMetadata.mdx +29 -0
- package/docs/config/extensions/esbuildPlugin.mdx +31 -0
- package/docs/config/extensions/ffmpeg.mdx +45 -0
- package/docs/config/extensions/lightpanda.mdx +56 -0
- package/docs/config/extensions/overview.mdx +67 -0
- package/docs/config/extensions/playwright.mdx +195 -0
- package/docs/config/extensions/prismaExtension.mdx +1014 -0
- package/docs/config/extensions/puppeteer.mdx +30 -0
- package/docs/config/extensions/pythonExtension.mdx +182 -0
- package/docs/config/extensions/syncEnvVars.mdx +291 -0
- package/docs/context.mdx +235 -0
- package/docs/database-connections.mdx +213 -0
- package/docs/deploy-environment-variables.mdx +435 -0
- package/docs/deployment/atomic-deployment.mdx +172 -0
- package/docs/deployment/overview.mdx +257 -0
- package/docs/deployment/preview-branches.mdx +224 -0
- package/docs/errors-retrying.mdx +379 -0
- package/docs/github-actions.mdx +222 -0
- package/docs/github-integration.mdx +136 -0
- package/docs/github-repo.mdx +8 -0
- package/docs/help-email.mdx +6 -0
- package/docs/help-slack.mdx +11 -0
- package/docs/hidden-tasks.mdx +56 -0
- package/docs/how-it-works.mdx +454 -0
- package/docs/how-to-reduce-your-spend.mdx +217 -0
- package/docs/idempotency.mdx +504 -0
- package/docs/introduction.mdx +223 -0
- package/docs/limits.mdx +241 -0
- package/docs/logging.mdx +195 -0
- package/docs/machines.mdx +952 -0
- package/docs/manual-setup.mdx +632 -0
- package/docs/mcp-agent-rules.mdx +41 -0
- package/docs/mcp-introduction.mdx +385 -0
- package/docs/mcp-tools.mdx +273 -0
- package/docs/migrating-from-v3.mdx +334 -0
- package/docs/observability/dashboards.mdx +102 -0
- package/docs/observability/query.mdx +585 -0
- package/docs/open-source-contributing.mdx +16 -0
- package/docs/open-source-self-hosting.mdx +541 -0
- package/docs/private-networking/aws-console-setup.mdx +304 -0
- package/docs/private-networking/overview.mdx +144 -0
- package/docs/private-networking/troubleshooting.mdx +78 -0
- package/docs/queue-concurrency.mdx +354 -0
- package/docs/quick-start.mdx +97 -0
- package/docs/realtime/auth.mdx +208 -0
- package/docs/realtime/backend/overview.mdx +45 -0
- package/docs/realtime/backend/streams.mdx +418 -0
- package/docs/realtime/backend/subscribe.mdx +225 -0
- package/docs/realtime/how-it-works.mdx +94 -0
- package/docs/realtime/overview.mdx +63 -0
- package/docs/realtime/react-hooks/overview.mdx +73 -0
- package/docs/realtime/react-hooks/streams.mdx +449 -0
- package/docs/realtime/react-hooks/subscribe.mdx +674 -0
- package/docs/realtime/react-hooks/swr.mdx +87 -0
- package/docs/realtime/react-hooks/triggering.mdx +194 -0
- package/docs/realtime/react-hooks/use-wait-token.mdx +34 -0
- package/docs/realtime/run-object.mdx +174 -0
- package/docs/replaying.mdx +72 -0
- package/docs/request-feature.mdx +6 -0
- package/docs/roadmap.mdx +6 -0
- package/docs/run-tests.mdx +20 -0
- package/docs/run-usage.mdx +113 -0
- package/docs/runs/heartbeats.mdx +38 -0
- package/docs/runs/max-duration.mdx +139 -0
- package/docs/runs/metadata.mdx +734 -0
- package/docs/runs/priority.mdx +31 -0
- package/docs/runs.mdx +396 -0
- package/docs/self-hosting/docker.mdx +458 -0
- package/docs/self-hosting/env/supervisor.mdx +74 -0
- package/docs/self-hosting/env/webapp.mdx +276 -0
- package/docs/self-hosting/kubernetes.mdx +601 -0
- package/docs/self-hosting/overview.mdx +108 -0
- package/docs/skills.mdx +85 -0
- package/docs/tags.mdx +120 -0
- package/docs/tasks/overview.mdx +697 -0
- package/docs/tasks/scheduled.mdx +382 -0
- package/docs/tasks/schemaTask.mdx +413 -0
- package/docs/tasks/streams.mdx +884 -0
- package/docs/triggering.mdx +1320 -0
- package/docs/troubleshooting-alerts.mdx +385 -0
- package/docs/troubleshooting-debugging-in-vscode.mdx +8 -0
- package/docs/troubleshooting-github-issues.mdx +6 -0
- package/docs/troubleshooting-uptime-status.mdx +6 -0
- package/docs/troubleshooting.mdx +398 -0
- package/docs/upgrading-packages.mdx +80 -0
- package/docs/vercel-integration.mdx +207 -0
- package/docs/versioning.mdx +56 -0
- package/docs/video-walkthrough.mdx +23 -0
- package/docs/wait-for-token.mdx +540 -0
- package/docs/wait-for.mdx +42 -0
- package/docs/wait-until.mdx +53 -0
- package/docs/wait.mdx +18 -0
- package/docs/writing-tasks-introduction.mdx +33 -0
- package/package.json +8 -5
- package/skills/trigger-authoring-chat-agent/SKILL.md +296 -0
- package/skills/trigger-authoring-tasks/SKILL.md +254 -0
- package/skills/trigger-chat-agent-advanced/SKILL.md +368 -0
- package/skills/trigger-cost-savings/SKILL.md +116 -0
- package/skills/trigger-realtime-and-frontend/SKILL.md +276 -0
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Pending Messages"
|
|
3
|
+
sidebarTitle: "Pending Messages"
|
|
4
|
+
description: "Inject user messages mid-execution to steer agents between tool-call steps."
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
|
|
8
|
+
|
|
9
|
+
<RcBanner />
|
|
10
|
+
|
|
11
|
+
## Overview
|
|
12
|
+
|
|
13
|
+
When an AI agent is executing tool calls, users may want to send a message that **steers the agent mid-execution** — adding context, correcting course, or refining the request without waiting for the response to finish.
|
|
14
|
+
|
|
15
|
+
The `pendingMessages` option enables this by injecting user messages between tool-call steps via the AI SDK's `prepareStep`. Messages that arrive during streaming are queued and injected at the next step boundary. If there are no more step boundaries (single-step response or final text generation), the message becomes the next turn automatically.
|
|
16
|
+
|
|
17
|
+
## How it works
|
|
18
|
+
|
|
19
|
+
1. User sends a message while the agent is streaming
|
|
20
|
+
2. The message is sent to the backend via input stream (`transport.sendPendingMessage`)
|
|
21
|
+
3. The backend queues it in the steering queue
|
|
22
|
+
4. At the next `prepareStep` boundary (between tool-call steps), `shouldInject` is called
|
|
23
|
+
5. If it returns `true`, the message is injected into the LLM's context
|
|
24
|
+
6. A `data-pending-message-injected` stream chunk confirms injection to the frontend
|
|
25
|
+
7. If `prepareStep` never fires (no tool calls), the message becomes the next turn
|
|
26
|
+
|
|
27
|
+
## Backend: chat.agent
|
|
28
|
+
|
|
29
|
+
Add `pendingMessages` to your `chat.agent` configuration:
|
|
30
|
+
|
|
31
|
+
```ts
|
|
32
|
+
import { chat } from "@trigger.dev/sdk/ai";
|
|
33
|
+
import { streamText, stepCountIs } from "ai";
|
|
34
|
+
import { anthropic } from "@ai-sdk/anthropic";
|
|
35
|
+
|
|
36
|
+
export const myChat = chat.agent({
|
|
37
|
+
id: "my-chat",
|
|
38
|
+
pendingMessages: {
|
|
39
|
+
// Only inject when there are completed steps (tool calls happened)
|
|
40
|
+
shouldInject: ({ steps }) => steps.length > 0,
|
|
41
|
+
},
|
|
42
|
+
run: async ({ messages, signal }) => {
|
|
43
|
+
return streamText({
|
|
44
|
+
...chat.toStreamTextOptions({ registry }),
|
|
45
|
+
messages,
|
|
46
|
+
tools: { /* ... */ },
|
|
47
|
+
abortSignal: signal,
|
|
48
|
+
stopWhen: stepCountIs(15),
|
|
49
|
+
});
|
|
50
|
+
},
|
|
51
|
+
});
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
The `prepareStep` for injection is automatically included when you spread `chat.toStreamTextOptions()`. If you provide your own `prepareStep` after the spread, it overrides the auto-injected one.
|
|
55
|
+
|
|
56
|
+
### Options
|
|
57
|
+
|
|
58
|
+
| Option | Type | Description |
|
|
59
|
+
|--------|------|-------------|
|
|
60
|
+
| `shouldInject` | `(event: PendingMessagesBatchEvent) => boolean` | Decide whether to inject the batch. Called once per step boundary. If absent, no injection happens. |
|
|
61
|
+
| `prepare` | `(event: PendingMessagesBatchEvent) => ModelMessage[]` | Transform the batch before injection. Default: convert each message via `convertToModelMessages`. |
|
|
62
|
+
| `onReceived` | `(event) => void` | Called when a message arrives during streaming (per-message). |
|
|
63
|
+
| `onInjected` | `(event) => void` | Called after a batch is injected. |
|
|
64
|
+
|
|
65
|
+
### shouldInject
|
|
66
|
+
|
|
67
|
+
Called once per step boundary with the full batch of pending messages. Return `true` to inject all of them, `false` to skip (they'll be available at the next boundary or become the next turn).
|
|
68
|
+
|
|
69
|
+
```ts
|
|
70
|
+
pendingMessages: {
|
|
71
|
+
// Always inject
|
|
72
|
+
shouldInject: () => true,
|
|
73
|
+
|
|
74
|
+
// Only inject after tool calls
|
|
75
|
+
shouldInject: ({ steps }) => steps.length > 0,
|
|
76
|
+
|
|
77
|
+
// Only inject if there's one message
|
|
78
|
+
shouldInject: ({ messages }) => messages.length === 1,
|
|
79
|
+
},
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
The event includes:
|
|
83
|
+
|
|
84
|
+
| Field | Type | Description |
|
|
85
|
+
|-------|------|-------------|
|
|
86
|
+
| `messages` | `UIMessage[]` | All pending messages (batch) |
|
|
87
|
+
| `modelMessages` | `ModelMessage[]` | Current conversation |
|
|
88
|
+
| `steps` | `CompactionStep[]` | Completed steps |
|
|
89
|
+
| `stepNumber` | `number` | Current step (0-indexed) |
|
|
90
|
+
| `chatId` | `string` | Chat session ID |
|
|
91
|
+
| `turn` | `number` | Current turn |
|
|
92
|
+
| `clientData` | `unknown` | Frontend metadata |
|
|
93
|
+
|
|
94
|
+
### prepare
|
|
95
|
+
|
|
96
|
+
Transform the batch of pending messages before they're injected into the LLM's context. By default, each UIMessage is converted to ModelMessages individually. Use `prepare` to combine multiple messages or add context:
|
|
97
|
+
|
|
98
|
+
```ts
|
|
99
|
+
pendingMessages: {
|
|
100
|
+
shouldInject: ({ steps }) => steps.length > 0,
|
|
101
|
+
prepare: ({ messages }) => [{
|
|
102
|
+
role: "user",
|
|
103
|
+
content: messages.length === 1
|
|
104
|
+
? messages[0].parts[0]?.text ?? ""
|
|
105
|
+
: `The user sent ${messages.length} messages:\n${
|
|
106
|
+
messages.map((m, i) => `${i + 1}. ${m.parts[0]?.text}`).join("\n")
|
|
107
|
+
}`,
|
|
108
|
+
}],
|
|
109
|
+
},
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Stream chunk
|
|
113
|
+
|
|
114
|
+
When messages are injected, the SDK automatically writes a `data-pending-message-injected` stream chunk containing the message IDs and text. The frontend uses this to:
|
|
115
|
+
- Confirm which messages were injected
|
|
116
|
+
- Remove them from the pending overlay
|
|
117
|
+
- Render them inline at the injection point in the assistant response
|
|
118
|
+
|
|
119
|
+
A "pending message injected" span also appears in the run trace.
|
|
120
|
+
|
|
121
|
+
## Backend: chat.createSession
|
|
122
|
+
|
|
123
|
+
Pass `pendingMessages` to the session options:
|
|
124
|
+
|
|
125
|
+
```ts
|
|
126
|
+
const session = chat.createSession(payload, {
|
|
127
|
+
signal,
|
|
128
|
+
idleTimeoutInSeconds: 60,
|
|
129
|
+
pendingMessages: {
|
|
130
|
+
shouldInject: () => true,
|
|
131
|
+
},
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
for await (const turn of session) {
|
|
135
|
+
const result = streamText({
|
|
136
|
+
model: anthropic("claude-sonnet-4-5"),
|
|
137
|
+
messages: turn.messages,
|
|
138
|
+
abortSignal: turn.signal,
|
|
139
|
+
prepareStep: turn.prepareStep(), // Handles injection + compaction
|
|
140
|
+
stopWhen: stepCountIs(15),
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
await turn.complete(result);
|
|
144
|
+
}
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
Use `turn.prepareStep()` to get a prepareStep function that handles both injection and compaction. Users who spread `chat.toStreamTextOptions()` get it automatically.
|
|
148
|
+
|
|
149
|
+
## Backend: MessageAccumulator (raw task)
|
|
150
|
+
|
|
151
|
+
Pass `pendingMessages` to the constructor and wire up the message listener manually:
|
|
152
|
+
|
|
153
|
+
```ts
|
|
154
|
+
const conversation = new chat.MessageAccumulator({
|
|
155
|
+
pendingMessages: {
|
|
156
|
+
shouldInject: () => true,
|
|
157
|
+
prepare: ({ messages }) => [{
|
|
158
|
+
role: "user",
|
|
159
|
+
content: `[Steering]: ${messages.map(m => m.parts[0]?.text).join(", ")}`,
|
|
160
|
+
}],
|
|
161
|
+
},
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
for (let turn = 0; turn < 100; turn++) {
|
|
165
|
+
// The wire payload carries at most one new message per turn.
|
|
166
|
+
const messages = await conversation.addIncoming(
|
|
167
|
+
payload.message ? [payload.message] : [],
|
|
168
|
+
payload.trigger,
|
|
169
|
+
turn
|
|
170
|
+
);
|
|
171
|
+
|
|
172
|
+
// Listen for steering messages during streaming
|
|
173
|
+
const sub = chat.messages.on(async (msg) => {
|
|
174
|
+
if (msg.message) await conversation.steerAsync(msg.message);
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
const result = streamText({
|
|
178
|
+
model: anthropic("claude-sonnet-4-5"),
|
|
179
|
+
messages,
|
|
180
|
+
prepareStep: conversation.prepareStep(), // Handles injection + compaction
|
|
181
|
+
stopWhen: stepCountIs(15),
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
const response = await chat.pipeAndCapture(result);
|
|
185
|
+
sub.off();
|
|
186
|
+
|
|
187
|
+
if (response) await conversation.addResponse(response);
|
|
188
|
+
await chat.writeTurnComplete();
|
|
189
|
+
}
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
### MessageAccumulator methods
|
|
193
|
+
|
|
194
|
+
| Method | Description |
|
|
195
|
+
|--------|-------------|
|
|
196
|
+
| `steer(message, modelMessages?)` | Queue a UIMessage for injection (sync) |
|
|
197
|
+
| `steerAsync(message)` | Queue a UIMessage, converting to model messages automatically |
|
|
198
|
+
| `drainSteering()` | Get and clear unconsumed steering messages |
|
|
199
|
+
| `prepareStep()` | Returns a prepareStep function handling injection + compaction |
|
|
200
|
+
|
|
201
|
+
## Frontend: usePendingMessages hook
|
|
202
|
+
|
|
203
|
+
The `usePendingMessages` hook manages all the frontend complexity — tracking pending messages, detecting injections, and handling the turn lifecycle.
|
|
204
|
+
|
|
205
|
+
```tsx
|
|
206
|
+
import { useChat } from "@ai-sdk/react";
|
|
207
|
+
import { useTriggerChatTransport, usePendingMessages } from "@trigger.dev/sdk/chat/react";
|
|
208
|
+
|
|
209
|
+
function Chat({ chatId }: { chatId: string }) {
|
|
210
|
+
const transport = useTriggerChatTransport({
|
|
211
|
+
task: "my-chat",
|
|
212
|
+
accessToken: ({ chatId }) => mintChatAccessToken(chatId),
|
|
213
|
+
startSession: ({ chatId, clientData }) =>
|
|
214
|
+
startChatSession({ chatId, clientData }),
|
|
215
|
+
});
|
|
216
|
+
|
|
217
|
+
const { messages, setMessages, sendMessage, stop, status } = useChat({
|
|
218
|
+
id: chatId,
|
|
219
|
+
transport,
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
const pending = usePendingMessages({
|
|
223
|
+
transport,
|
|
224
|
+
chatId,
|
|
225
|
+
status,
|
|
226
|
+
messages,
|
|
227
|
+
setMessages,
|
|
228
|
+
sendMessage,
|
|
229
|
+
metadata: { model: "gpt-4o" },
|
|
230
|
+
});
|
|
231
|
+
|
|
232
|
+
return (
|
|
233
|
+
<div>
|
|
234
|
+
{/* Render messages */}
|
|
235
|
+
{messages.map((msg) => (
|
|
236
|
+
<div key={msg.id}>
|
|
237
|
+
{msg.role === "assistant" ? (
|
|
238
|
+
msg.parts.map((part, i) =>
|
|
239
|
+
pending.isInjectionPoint(part) ? (
|
|
240
|
+
// Render injected messages inline at the injection point
|
|
241
|
+
<div key={i}>
|
|
242
|
+
{pending.getInjectedMessages(part).map((m) => (
|
|
243
|
+
<div key={m.id} className="injected-message">{m.text}</div>
|
|
244
|
+
))}
|
|
245
|
+
</div>
|
|
246
|
+
) : (
|
|
247
|
+
<Part key={i} part={part} />
|
|
248
|
+
)
|
|
249
|
+
)
|
|
250
|
+
) : (
|
|
251
|
+
<UserMessage msg={msg} />
|
|
252
|
+
)}
|
|
253
|
+
</div>
|
|
254
|
+
))}
|
|
255
|
+
|
|
256
|
+
{/* Render pending messages */}
|
|
257
|
+
{pending.pending.map((msg) => (
|
|
258
|
+
<div key={msg.id}>
|
|
259
|
+
<span>{msg.text}</span>
|
|
260
|
+
<span>{msg.mode === "steering" ? "Steering" : "Queued"}</span>
|
|
261
|
+
{msg.mode === "queued" && status === "streaming" && (
|
|
262
|
+
<button onClick={() => pending.promoteToSteering(msg.id)}>
|
|
263
|
+
Steer instead
|
|
264
|
+
</button>
|
|
265
|
+
)}
|
|
266
|
+
</div>
|
|
267
|
+
))}
|
|
268
|
+
|
|
269
|
+
{/* Send form */}
|
|
270
|
+
<form onSubmit={(e) => {
|
|
271
|
+
e.preventDefault();
|
|
272
|
+
pending.steer(input); // Steers during streaming, sends normally when ready
|
|
273
|
+
setInput("");
|
|
274
|
+
}}>
|
|
275
|
+
<input value={input} onChange={(e) => setInput(e.target.value)} />
|
|
276
|
+
<button type="submit">Send</button>
|
|
277
|
+
{status === "streaming" && (
|
|
278
|
+
<button type="button" onClick={() => { pending.queue(input); setInput(""); }}>
|
|
279
|
+
Queue
|
|
280
|
+
</button>
|
|
281
|
+
)}
|
|
282
|
+
</form>
|
|
283
|
+
</div>
|
|
284
|
+
);
|
|
285
|
+
}
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
### Hook API
|
|
289
|
+
|
|
290
|
+
| Property/Method | Type | Description |
|
|
291
|
+
|----------------|------|-------------|
|
|
292
|
+
| `pending` | `PendingMessage[]` | Current pending messages with `id`, `text`, `mode`, and `injected` status |
|
|
293
|
+
| `steer(text)` | `(text: string) => void` | Send a steering message during streaming, or normal message when ready |
|
|
294
|
+
| `queue(text)` | `(text: string) => void` | Queue for next turn during streaming, or send normally when ready |
|
|
295
|
+
| `promoteToSteering(id)` | `(id: string) => void` | Convert a queued message to steering (sends via input stream immediately) |
|
|
296
|
+
| `isInjectionPoint(part)` | `(part: unknown) => boolean` | Check if an assistant message part is an injection confirmation |
|
|
297
|
+
| `getInjectedMessageIds(part)` | `(part: unknown) => string[]` | Get message IDs from an injection point |
|
|
298
|
+
| `getInjectedMessages(part)` | `(part: unknown) => InjectedMessage[]` | Get messages (id + text) from an injection point |
|
|
299
|
+
|
|
300
|
+
### PendingMessage
|
|
301
|
+
|
|
302
|
+
| Field | Type | Description |
|
|
303
|
+
|-------|------|-------------|
|
|
304
|
+
| `id` | `string` | Unique message ID |
|
|
305
|
+
| `text` | `string` | Message text |
|
|
306
|
+
| `mode` | `"steering" \| "queued"` | How the message is being handled |
|
|
307
|
+
| `injected` | `boolean` | Whether the backend confirmed injection |
|
|
308
|
+
|
|
309
|
+
### Message lifecycle
|
|
310
|
+
|
|
311
|
+
- **Steering messages** are sent via `transport.sendPendingMessage()` immediately. They appear as purple pending bubbles. If injected, they disappear from the overlay and render inline at the injection point. If not injected (no more step boundaries), they auto-send as the next turn when the response finishes.
|
|
312
|
+
|
|
313
|
+
- **Queued messages** stay client-side until the turn completes, then auto-send as the next turn via `sendMessage()`. They can be promoted to steering mid-stream by clicking "Steer instead".
|
|
314
|
+
|
|
315
|
+
- **Promoted messages** are queued messages that were converted to steering. They get sent via input stream immediately and follow the steering lifecycle from that point.
|
|
316
|
+
|
|
317
|
+
## Transport: sendPendingMessage
|
|
318
|
+
|
|
319
|
+
The `TriggerChatTransport` exposes a `sendPendingMessage` method for sending messages via input stream without disrupting the active stream subscription:
|
|
320
|
+
|
|
321
|
+
```ts
|
|
322
|
+
const sent = await transport.sendPendingMessage(chatId, {
|
|
323
|
+
id: crypto.randomUUID(),
|
|
324
|
+
role: "user",
|
|
325
|
+
parts: [{ type: "text", text: "and compare to vercel" }],
|
|
326
|
+
}, { model: "gpt-4o" });
|
|
327
|
+
```
|
|
328
|
+
|
|
329
|
+
Unlike `sendMessage()` from useChat, this does NOT:
|
|
330
|
+
- Add the message to useChat's local state
|
|
331
|
+
- Cancel the active stream subscription
|
|
332
|
+
- Start a new response stream
|
|
333
|
+
|
|
334
|
+
The `usePendingMessages` hook calls this internally — you typically don't need to use it directly.
|
|
335
|
+
|
|
336
|
+
## Coexistence with compaction
|
|
337
|
+
|
|
338
|
+
Pending message injection and compaction both use `prepareStep`. When both are configured, the auto-injected `prepareStep` handles them in order:
|
|
339
|
+
|
|
340
|
+
1. **Compaction** runs first — checks threshold, generates summary if needed
|
|
341
|
+
2. **Injection** runs second — pending messages are appended to either the compacted or original messages
|
|
342
|
+
|
|
343
|
+
This means injected messages are always included after compaction, ensuring the LLM sees both the compressed history and the new steering input.
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Prompt caching"
|
|
3
|
+
sidebarTitle: "Prompt caching"
|
|
4
|
+
description: "Cache the stable prefix of your agent's prompt with Anthropic prompt caching to cut token cost and latency on every turn."
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
import RcBanner from "/snippets/ai-chat-rc-banner.mdx";
|
|
8
|
+
|
|
9
|
+
<RcBanner />
|
|
10
|
+
|
|
11
|
+
**Prompt caching lets a provider reuse the unchanged prefix of your prompt across requests, billing it at a fraction of the input price and skipping re-processing.** With Anthropic, cache reads cost ~10% of base input tokens, so a long, stable system prompt or a growing conversation history pays full price once and reads cheaply on every turn after.
|
|
12
|
+
|
|
13
|
+
Caching is a **byte-exact prefix match**: any change in the prefix invalidates everything after it. A multi-turn agent is the ideal case — the system prompt, tools, and earlier turns are identical turn over turn, so the cacheable prefix only grows. `chat.agent` is built to keep that prefix stable across turns, suspends, and resumes; this page shows how to place the cache breakpoints and verify they're hitting.
|
|
14
|
+
|
|
15
|
+
Caching is provider-specific. This guide covers Anthropic (`@ai-sdk/anthropic`), where you opt in per breakpoint with `providerOptions.anthropic.cacheControl`. Other providers cache differently, and most cache automatically — see [Other providers](#other-providers).
|
|
16
|
+
|
|
17
|
+
## What you cache, and where
|
|
18
|
+
|
|
19
|
+
A request renders as `tools` → `system` → `messages`. There are three prefix regions worth caching, in order:
|
|
20
|
+
|
|
21
|
+
| Region | How to cache it | Stability |
|
|
22
|
+
| --- | --- | --- |
|
|
23
|
+
| System prompt (+ tools) | `cacheControl` / `systemProviderOptions` on `chat.toStreamTextOptions()`, or `providerOptions` on `chat.prompt.set()` | Set once, never changes — the highest-value target |
|
|
24
|
+
| Conversation history | `prepareMessages` adds a breakpoint to the last message | Grows append-only across turns |
|
|
25
|
+
| Tool definitions | Stable as long as your tool set doesn't change between turns | Render at position 0 — changing them invalidates everything |
|
|
26
|
+
|
|
27
|
+
`chat.agent` preserves `providerOptions` through message persistence and rehydration, so a breakpoint you place survives a suspend/resume or a page refresh. The recommended way to place message breakpoints is `prepareMessages` (below) rather than baking `cacheControl` into stored messages — `prepareMessages` runs on every prompt-assembly path, including after compaction, so the breakpoint is always in the right place.
|
|
28
|
+
|
|
29
|
+
## Cache the system prompt
|
|
30
|
+
|
|
31
|
+
The system prompt (your `chat.prompt` text plus any skills preamble) is usually the largest stable block, so it's the first thing to cache. `chat.toStreamTextOptions()` returns `system` as a plain string by default; opt into caching and it returns a structured system message carrying the cache breakpoint instead.
|
|
32
|
+
|
|
33
|
+
<Note>
|
|
34
|
+
System-prompt caching needs AI SDK v6 or later, where the `system` parameter accepts a structured message. On AI SDK v5 `system` is a plain string, so these options won't apply a breakpoint to the system block — cache the conversation via `prepareMessages` instead.
|
|
35
|
+
</Note>
|
|
36
|
+
|
|
37
|
+
Three ways to opt in, depending on where you'd rather express it.
|
|
38
|
+
|
|
39
|
+
**`cacheControl` at the `streamText` call site** — the Anthropic-flavored one-liner:
|
|
40
|
+
|
|
41
|
+
```ts /trigger/chat.ts
|
|
42
|
+
import { chat } from "@trigger.dev/sdk/ai";
|
|
43
|
+
import { streamText } from "ai";
|
|
44
|
+
import { anthropic } from "@ai-sdk/anthropic";
|
|
45
|
+
|
|
46
|
+
export const myChat = chat.agent({
|
|
47
|
+
id: "my-chat",
|
|
48
|
+
onChatStart: async () => {
|
|
49
|
+
chat.prompt.set(SYSTEM_PROMPT); // a large, stable instruction block
|
|
50
|
+
},
|
|
51
|
+
run: async ({ messages, signal }) => {
|
|
52
|
+
return streamText({
|
|
53
|
+
model: anthropic("claude-sonnet-4-6"),
|
|
54
|
+
// Caches the system block with a 5-minute breakpoint.
|
|
55
|
+
...chat.toStreamTextOptions({ cacheControl: { type: "ephemeral" } }),
|
|
56
|
+
messages,
|
|
57
|
+
abortSignal: signal,
|
|
58
|
+
});
|
|
59
|
+
},
|
|
60
|
+
});
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
**`systemProviderOptions`** is the provider-agnostic form — pass the raw `providerOptions` so it composes with any provider:
|
|
64
|
+
|
|
65
|
+
```ts /trigger/chat.ts
|
|
66
|
+
return streamText({
|
|
67
|
+
model: anthropic("claude-sonnet-4-6"),
|
|
68
|
+
...chat.toStreamTextOptions({
|
|
69
|
+
systemProviderOptions: { anthropic: { cacheControl: { type: "ephemeral" } } },
|
|
70
|
+
}),
|
|
71
|
+
messages,
|
|
72
|
+
abortSignal: signal,
|
|
73
|
+
});
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
**`providerOptions` on `chat.prompt.set()`** co-locates the intent with where the prompt is defined. It carries through to `toStreamTextOptions()` with no call-site change:
|
|
77
|
+
|
|
78
|
+
```ts /trigger/chat.ts
|
|
79
|
+
onChatStart: async () => {
|
|
80
|
+
chat.prompt.set(SYSTEM_PROMPT, {
|
|
81
|
+
providerOptions: { anthropic: { cacheControl: { type: "ephemeral" } } },
|
|
82
|
+
});
|
|
83
|
+
},
|
|
84
|
+
run: async ({ messages, signal }) => {
|
|
85
|
+
return streamText({
|
|
86
|
+
model: anthropic("claude-sonnet-4-6"),
|
|
87
|
+
...chat.toStreamTextOptions(), // already cached
|
|
88
|
+
messages,
|
|
89
|
+
abortSignal: signal,
|
|
90
|
+
});
|
|
91
|
+
},
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
If more than one is set, the call-site option wins: `systemProviderOptions` overrides `cacheControl`, and both override `chat.prompt.set`'s `providerOptions`. There's no deep merge — the most specific option replaces the rest.
|
|
95
|
+
|
|
96
|
+
<Note>
|
|
97
|
+
Use the 1-hour cache for prefixes that sit idle longer than 5 minutes between turns: `cacheControl: { type: "ephemeral", ttl: "1h" }`. Writes cost more (2× vs 1.25×), so it pays off only when reads span the longer window.
|
|
98
|
+
</Note>
|
|
99
|
+
|
|
100
|
+
## Cache the conversation history
|
|
101
|
+
|
|
102
|
+
Place a breakpoint on the last message and the entire conversation prefix up to that point is cached, so the next turn reads it back instead of re-processing it. Do this in [`prepareMessages`](/ai-chat/reference#chatagentoptions) — it transforms model messages once, and `chat.agent` applies it on every path that builds a prompt (each turn, and both compaction rebuild paths), so the breakpoint always lands on the real last message.
|
|
103
|
+
|
|
104
|
+
```ts /trigger/chat.ts
|
|
105
|
+
export const myChat = chat.agent({
|
|
106
|
+
id: "my-chat",
|
|
107
|
+
prepareMessages: async ({ messages }) => {
|
|
108
|
+
if (messages.length === 0) return messages;
|
|
109
|
+
const last = messages[messages.length - 1];
|
|
110
|
+
return [
|
|
111
|
+
...messages.slice(0, -1),
|
|
112
|
+
{
|
|
113
|
+
...last,
|
|
114
|
+
providerOptions: {
|
|
115
|
+
...last.providerOptions,
|
|
116
|
+
anthropic: { cacheControl: { type: "ephemeral" } },
|
|
117
|
+
},
|
|
118
|
+
},
|
|
119
|
+
];
|
|
120
|
+
},
|
|
121
|
+
run: async ({ messages, signal }) => {
|
|
122
|
+
return streamText({
|
|
123
|
+
model: anthropic("claude-sonnet-4-6"),
|
|
124
|
+
...chat.toStreamTextOptions({ cacheControl: { type: "ephemeral" } }),
|
|
125
|
+
messages,
|
|
126
|
+
abortSignal: signal,
|
|
127
|
+
});
|
|
128
|
+
},
|
|
129
|
+
});
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
The system breakpoint and the conversation breakpoint compose: the system block is cached once for the life of the chat, and each turn extends the cached message prefix.
|
|
133
|
+
|
|
134
|
+
<Note>
|
|
135
|
+
Anthropic allows **at most 4** cache breakpoints per request, and a prefix must be at least ~1024 tokens (model-dependent) to cache at all — shorter prefixes silently don't cache. One system breakpoint plus one rolling message breakpoint is the typical setup and leaves headroom.
|
|
136
|
+
</Note>
|
|
137
|
+
|
|
138
|
+
## Caching and compaction
|
|
139
|
+
|
|
140
|
+
Compaction rewrites the conversation prefix — it replaces earlier turns with a summary — so it necessarily invalidates the cached message prefix at that point. That's a one-time reset, not a regression: because `prepareMessages` also runs on the compaction rebuild and result paths, the new (shorter) prefix gets a fresh breakpoint and re-warms on the next turn. Your system-prompt cache is unaffected — compaction never touches the system block. See [Compaction](/ai-chat/compaction) for how the summary is produced.
|
|
141
|
+
|
|
142
|
+
## Other providers
|
|
143
|
+
|
|
144
|
+
Caching is provider-specific, and most providers don't use per-block breakpoints at all:
|
|
145
|
+
|
|
146
|
+
- **OpenAI** and **Google Gemini** cache automatically. OpenAI caches any prompt prefix over 1024 tokens; Gemini 2.5 caches implicitly (1024 tokens on Flash, 2048 on Pro). Neither needs a breakpoint, so the system-caching options above are a no-op for them — `chat.agent` already gives automatic caching exactly what it needs: a byte-stable prefix that only grows across turns. Keep the system prompt frozen and the prefix over the model's minimum and reads happen on their own. (OpenAI's optional `providerOptions.openai.promptCacheKey` improves hit-routing across requests; it's a top-level option, not a system-block breakpoint.)
|
|
147
|
+
|
|
148
|
+
- **Anthropic** and **Amazon Bedrock** take an explicit breakpoint on the system block — Anthropic via `cacheControl`, Bedrock via `cachePoint`. Both go through the provider-agnostic `systemProviderOptions`:
|
|
149
|
+
|
|
150
|
+
```ts /trigger/chat.ts
|
|
151
|
+
// Amazon Bedrock
|
|
152
|
+
return streamText({
|
|
153
|
+
...chat.toStreamTextOptions({
|
|
154
|
+
systemProviderOptions: { bedrock: { cachePoint: { type: "default" } } },
|
|
155
|
+
}),
|
|
156
|
+
messages,
|
|
157
|
+
});
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
The `cacheControl` shorthand is Anthropic-only; `systemProviderOptions` (and `chat.prompt.set`'s `providerOptions`) is the form to reach for on any other breakpoint-based provider.
|
|
161
|
+
|
|
162
|
+
Usage reporting is normalized. Each provider reports cache tokens under its own provider-specific field, but the AI SDK maps them into the same `inputTokenDetails.cacheReadTokens` / `cacheWriteTokens` that `previousTurnUsage` and `totalUsage` carry and the dashboard shows — so the [verify step](#verify-caching-is-working) is the same regardless of provider.
|
|
163
|
+
|
|
164
|
+
## Verify caching is working
|
|
165
|
+
|
|
166
|
+
The turn's usage carries cache token counts. `chat.agent` accumulates them across turns and hands them to `run` as `previousTurnUsage` (last turn) and `totalUsage` (whole chat), both `LanguageModelUsage`:
|
|
167
|
+
|
|
168
|
+
```ts /trigger/chat.ts
|
|
169
|
+
run: async ({ messages, signal, previousTurnUsage }) => {
|
|
170
|
+
// After turn 1, cacheReadTokens should be > 0 on a stable prefix.
|
|
171
|
+
console.log("cache read", previousTurnUsage?.inputTokenDetails?.cacheReadTokens);
|
|
172
|
+
console.log("cache write", previousTurnUsage?.inputTokenDetails?.cacheWriteTokens);
|
|
173
|
+
|
|
174
|
+
return streamText({
|
|
175
|
+
model: anthropic("claude-sonnet-4-6"),
|
|
176
|
+
...chat.toStreamTextOptions({ cacheControl: { type: "ephemeral" } }),
|
|
177
|
+
messages,
|
|
178
|
+
abortSignal: signal,
|
|
179
|
+
});
|
|
180
|
+
},
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
The first turn writes the cache (`cacheWriteTokens > 0`, `cacheReadTokens` is 0). Every turn after, on an unchanged prefix, reads it (`cacheReadTokens > 0`). The dashboard surfaces the same numbers on the AI span as **Cache write** and **Cache read**, so you can confirm hits per run without logging.
|
|
184
|
+
|
|
185
|
+
If `cacheReadTokens` stays 0 across turns with an identical prefix, a silent invalidator is shifting the bytes — see below.
|
|
186
|
+
|
|
187
|
+
<Warning>
|
|
188
|
+
Anything that changes the prefix between turns silently kills the cache. Keep the system prompt **byte-stable** — never interpolate a timestamp, request ID, or per-turn value into `chat.prompt`. Don't change the **model** or the **tool set** mid-conversation (tools render at position 0, so adding one invalidates everything after). Inject dynamic per-turn context as a late message via [pending messages](/ai-chat/pending-messages) or [background injection](/ai-chat/background-injection), not into the cached prefix.
|
|
189
|
+
</Warning>
|
|
190
|
+
|
|
191
|
+
## Next steps
|
|
192
|
+
|
|
193
|
+
<CardGroup cols={2}>
|
|
194
|
+
<Card title="Compaction" icon="compress" href="/ai-chat/compaction">
|
|
195
|
+
Keep long conversations within token limits — and re-warm the cache after.
|
|
196
|
+
</Card>
|
|
197
|
+
<Card title="Fast starts" icon="bolt" href="/ai-chat/fast-starts">
|
|
198
|
+
Cut cold-start latency so a cached prefix is the only thing between a message and a reply.
|
|
199
|
+
</Card>
|
|
200
|
+
<Card title="chat.agent reference" icon="book" href="/ai-chat/reference#chatagentoptions">
|
|
201
|
+
Full option surface, including `prepareMessages` and `toStreamTextOptions`.
|
|
202
|
+
</Card>
|
|
203
|
+
<Card title="Building agents: backend" icon="server" href="/ai-chat/backend">
|
|
204
|
+
The three ways to build a chat backend and when to reach for each.
|
|
205
|
+
</Card>
|
|
206
|
+
</CardGroup>
|