@rudderjs/ai 1.4.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (179) hide show
  1. package/README.md +484 -7
  2. package/boost/guidelines.md +62 -2
  3. package/boost/skills/ai-tools/SKILL.md +14 -5
  4. package/dist/agent.d.ts +66 -15
  5. package/dist/agent.d.ts.map +1 -1
  6. package/dist/agent.js +529 -58
  7. package/dist/agent.js.map +1 -1
  8. package/dist/budget/pricing.d.ts +124 -0
  9. package/dist/budget/pricing.d.ts.map +1 -0
  10. package/dist/budget/pricing.js +175 -0
  11. package/dist/budget/pricing.js.map +1 -0
  12. package/dist/budget/storage.d.ts +104 -0
  13. package/dist/budget/storage.d.ts.map +1 -0
  14. package/dist/budget/storage.js +0 -0
  15. package/dist/budget/storage.js.map +1 -0
  16. package/dist/budget/with-budget.d.ts +119 -0
  17. package/dist/budget/with-budget.d.ts.map +1 -0
  18. package/dist/budget/with-budget.js +175 -0
  19. package/dist/budget/with-budget.js.map +1 -0
  20. package/dist/budget-orm/index.d.ts +96 -0
  21. package/dist/budget-orm/index.d.ts.map +1 -0
  22. package/dist/budget-orm/index.js +177 -0
  23. package/dist/budget-orm/index.js.map +1 -0
  24. package/dist/commands/ai-eval.d.ts +93 -0
  25. package/dist/commands/ai-eval.d.ts.map +1 -0
  26. package/dist/commands/ai-eval.js +378 -0
  27. package/dist/commands/ai-eval.js.map +1 -0
  28. package/dist/computer-use/actions.d.ts +214 -0
  29. package/dist/computer-use/actions.d.ts.map +1 -0
  30. package/dist/computer-use/actions.js +48 -0
  31. package/dist/computer-use/actions.js.map +1 -0
  32. package/dist/computer-use/errors.d.ts +57 -0
  33. package/dist/computer-use/errors.d.ts.map +1 -0
  34. package/dist/computer-use/errors.js +76 -0
  35. package/dist/computer-use/errors.js.map +1 -0
  36. package/dist/computer-use/index.d.ts +53 -0
  37. package/dist/computer-use/index.d.ts.map +1 -0
  38. package/dist/computer-use/index.js +51 -0
  39. package/dist/computer-use/index.js.map +1 -0
  40. package/dist/computer-use/playwright.d.ts +76 -0
  41. package/dist/computer-use/playwright.d.ts.map +1 -0
  42. package/dist/computer-use/playwright.js +270 -0
  43. package/dist/computer-use/playwright.js.map +1 -0
  44. package/dist/computer-use/tool.d.ts +154 -0
  45. package/dist/computer-use/tool.d.ts.map +1 -0
  46. package/dist/computer-use/tool.js +210 -0
  47. package/dist/computer-use/tool.js.map +1 -0
  48. package/dist/eval/fixtures.d.ts +65 -0
  49. package/dist/eval/fixtures.d.ts.map +1 -0
  50. package/dist/eval/fixtures.js +110 -0
  51. package/dist/eval/fixtures.js.map +1 -0
  52. package/dist/eval/html-reporter.d.ts +25 -0
  53. package/dist/eval/html-reporter.d.ts.map +1 -0
  54. package/dist/eval/html-reporter.js +209 -0
  55. package/dist/eval/html-reporter.js.map +1 -0
  56. package/dist/eval/index.d.ts +271 -0
  57. package/dist/eval/index.d.ts.map +1 -0
  58. package/dist/eval/index.js +510 -0
  59. package/dist/eval/index.js.map +1 -0
  60. package/dist/eval/json-reporter.d.ts +43 -0
  61. package/dist/eval/json-reporter.d.ts.map +1 -0
  62. package/dist/eval/json-reporter.js +40 -0
  63. package/dist/eval/json-reporter.js.map +1 -0
  64. package/dist/fake.d.ts +36 -1
  65. package/dist/fake.d.ts.map +1 -1
  66. package/dist/fake.js +49 -2
  67. package/dist/fake.js.map +1 -1
  68. package/dist/file-search.d.ts +168 -0
  69. package/dist/file-search.d.ts.map +1 -0
  70. package/dist/file-search.js +158 -0
  71. package/dist/file-search.js.map +1 -0
  72. package/dist/handoff.d.ts +95 -0
  73. package/dist/handoff.d.ts.map +1 -0
  74. package/dist/handoff.js +78 -0
  75. package/dist/handoff.js.map +1 -0
  76. package/dist/index.d.ts +29 -5
  77. package/dist/index.d.ts.map +1 -1
  78. package/dist/index.js +22 -2
  79. package/dist/index.js.map +1 -1
  80. package/dist/mcp/client-tools.d.ts +39 -0
  81. package/dist/mcp/client-tools.d.ts.map +1 -0
  82. package/dist/mcp/client-tools.js +147 -0
  83. package/dist/mcp/client-tools.js.map +1 -0
  84. package/dist/mcp/index.d.ts +16 -0
  85. package/dist/mcp/index.d.ts.map +1 -0
  86. package/dist/mcp/index.js +15 -0
  87. package/dist/mcp/index.js.map +1 -0
  88. package/dist/mcp/server-from-agent.d.ts +24 -0
  89. package/dist/mcp/server-from-agent.d.ts.map +1 -0
  90. package/dist/mcp/server-from-agent.js +113 -0
  91. package/dist/mcp/server-from-agent.js.map +1 -0
  92. package/dist/mcp/types.d.ts +64 -0
  93. package/dist/mcp/types.d.ts.map +1 -0
  94. package/dist/mcp/types.js +6 -0
  95. package/dist/mcp/types.js.map +1 -0
  96. package/dist/memory-embedding/index.d.ts +121 -0
  97. package/dist/memory-embedding/index.d.ts.map +1 -0
  98. package/dist/memory-embedding/index.js +229 -0
  99. package/dist/memory-embedding/index.js.map +1 -0
  100. package/dist/memory-extract.d.ts +60 -0
  101. package/dist/memory-extract.d.ts.map +1 -0
  102. package/dist/memory-extract.js +163 -0
  103. package/dist/memory-extract.js.map +1 -0
  104. package/dist/memory-inject.d.ts +39 -0
  105. package/dist/memory-inject.d.ts.map +1 -0
  106. package/dist/memory-inject.js +135 -0
  107. package/dist/memory-inject.js.map +1 -0
  108. package/dist/memory-orm/index.d.ts +118 -0
  109. package/dist/memory-orm/index.d.ts.map +1 -0
  110. package/dist/memory-orm/index.js +187 -0
  111. package/dist/memory-orm/index.js.map +1 -0
  112. package/dist/memory.d.ts +55 -0
  113. package/dist/memory.d.ts.map +1 -0
  114. package/dist/memory.js +132 -0
  115. package/dist/memory.js.map +1 -0
  116. package/dist/observers.d.ts +22 -0
  117. package/dist/observers.d.ts.map +1 -1
  118. package/dist/observers.js.map +1 -1
  119. package/dist/provider-tools.d.ts +15 -1
  120. package/dist/provider-tools.d.ts.map +1 -1
  121. package/dist/provider-tools.js +21 -1
  122. package/dist/provider-tools.js.map +1 -1
  123. package/dist/providers/anthropic.d.ts +9 -1
  124. package/dist/providers/anthropic.d.ts.map +1 -1
  125. package/dist/providers/anthropic.js +66 -11
  126. package/dist/providers/anthropic.js.map +1 -1
  127. package/dist/providers/bedrock.d.ts +60 -0
  128. package/dist/providers/bedrock.d.ts.map +1 -0
  129. package/dist/providers/bedrock.js +167 -0
  130. package/dist/providers/bedrock.js.map +1 -0
  131. package/dist/providers/elevenlabs.d.ts +98 -0
  132. package/dist/providers/elevenlabs.d.ts.map +1 -0
  133. package/dist/providers/elevenlabs.js +229 -0
  134. package/dist/providers/elevenlabs.js.map +1 -0
  135. package/dist/providers/google.d.ts +83 -1
  136. package/dist/providers/google.d.ts.map +1 -1
  137. package/dist/providers/google.js +491 -8
  138. package/dist/providers/google.js.map +1 -1
  139. package/dist/providers/openai.d.ts +8 -1
  140. package/dist/providers/openai.d.ts.map +1 -1
  141. package/dist/providers/openai.js +215 -5
  142. package/dist/providers/openai.js.map +1 -1
  143. package/dist/providers/openrouter.d.ts +43 -0
  144. package/dist/providers/openrouter.d.ts.map +1 -0
  145. package/dist/providers/openrouter.js +21 -0
  146. package/dist/providers/openrouter.js.map +1 -0
  147. package/dist/providers/voyage.d.ts +91 -0
  148. package/dist/providers/voyage.d.ts.map +1 -0
  149. package/dist/providers/voyage.js +166 -0
  150. package/dist/providers/voyage.js.map +1 -0
  151. package/dist/queue-job.d.ts +69 -4
  152. package/dist/queue-job.d.ts.map +1 -1
  153. package/dist/queue-job.js +114 -11
  154. package/dist/queue-job.js.map +1 -1
  155. package/dist/registry.d.ts +3 -1
  156. package/dist/registry.d.ts.map +1 -1
  157. package/dist/registry.js +10 -0
  158. package/dist/registry.js.map +1 -1
  159. package/dist/server/provider.d.ts.map +1 -1
  160. package/dist/server/provider.js +38 -1
  161. package/dist/server/provider.js.map +1 -1
  162. package/dist/similarity-search.d.ts +163 -0
  163. package/dist/similarity-search.d.ts.map +1 -0
  164. package/dist/similarity-search.js +147 -0
  165. package/dist/similarity-search.js.map +1 -0
  166. package/dist/sub-agent-run-store.d.ts +40 -3
  167. package/dist/sub-agent-run-store.d.ts.map +1 -1
  168. package/dist/sub-agent-run-store.js.map +1 -1
  169. package/dist/tool.d.ts +59 -0
  170. package/dist/tool.d.ts.map +1 -1
  171. package/dist/tool.js +45 -4
  172. package/dist/tool.js.map +1 -1
  173. package/dist/types.d.ts +285 -1
  174. package/dist/types.d.ts.map +1 -1
  175. package/dist/vector-stores/index.d.ts +96 -0
  176. package/dist/vector-stores/index.d.ts.map +1 -0
  177. package/dist/vector-stores/index.js +153 -0
  178. package/dist/vector-stores/index.js.map +1 -0
  179. package/package.json +43 -4
package/README.md CHANGED
@@ -11,11 +11,12 @@ pnpm add @rudderjs/ai
11
11
  Install the provider SDK(s) you need:
12
12
 
13
13
  ```bash
14
- pnpm add @anthropic-ai/sdk # Anthropic (Claude)
15
- pnpm add openai # OpenAI (GPT)
16
- pnpm add @google/genai # Google (Gemini)
17
- pnpm add cohere-ai # Cohere (reranking + embeddings)
18
- # Ollama, Jina no extra package needed
14
+ pnpm add @anthropic-ai/sdk # Anthropic (Claude)
15
+ pnpm add openai # OpenAI (GPT) — also used for OpenRouter / Mistral / DeepSeek / Groq / xAI / Ollama
16
+ pnpm add @google/genai # Google (Gemini)
17
+ pnpm add cohere-ai # Cohere (reranking + embeddings)
18
+ pnpm add @aws-sdk/client-bedrock-runtime # AWS Bedrock
19
+ # Jina — no extra package needed
19
20
  ```
20
21
 
21
22
  ## Runtime Compatibility
@@ -45,6 +46,17 @@ export default {
45
46
  ollama: { driver: 'ollama', baseUrl: 'http://localhost:11434' },
46
47
  cohere: { driver: 'cohere', apiKey: process.env.COHERE_API_KEY! },
47
48
  jina: { driver: 'jina', apiKey: process.env.JINA_API_KEY! },
49
+ openrouter: {
50
+ driver: 'openrouter',
51
+ apiKey: process.env.OPENROUTER_API_KEY!,
52
+ siteUrl: process.env.APP_URL, // optional — sent as HTTP-Referer
53
+ siteName: 'My App', // optional — sent as X-Title
54
+ },
55
+ bedrock: {
56
+ driver: 'bedrock',
57
+ region: process.env.AWS_REGION ?? 'us-east-1',
58
+ // credentials are read from the AWS chain (env, IAM, ~/.aws/credentials)
59
+ },
48
60
  },
49
61
  }
50
62
 
@@ -231,7 +243,59 @@ new Researcher().asTool({
231
243
  })
232
244
  ```
233
245
 
234
- The wrapped subagent runs via `prompt()` (non-streaming) by default — to surface inner-agent progress as `tool-update` chunks in the parent stream, pass `streaming: true` (or a custom `(chunk) => SubAgentUpdate | null` projector). When the sub-agent's model emits a *client* tool call, opt into the suspend/resume protocol with `suspendable: { runStore }` — the parent loop halts with the inner agent's `pendingClientToolCalls`, the snapshot persists in the run store, and the host resumes via `Agent.resumeAsTool(subRunId, browserResults, { runStore, agent })`. See `docs/guide/ai.md` for the full flow. `InMemorySubAgentRunStore` works for tests; `CachedSubAgentRunStore` plugs into `@rudderjs/cache` for cross-process persistence. Suspend without streaming throws at builder time.
246
+ The wrapped subagent runs via `prompt()` (non-streaming) by default — to surface inner-agent progress as `tool-update` chunks in the parent stream, pass `streaming: true` (or a custom `(chunk) => SubAgentUpdate | null` projector). Pass `suspendable: { runStore }` to opt into the propagation protocol when the sub-agent pauses on a **client tool call** (`finishReason: 'client_tool_calls'`) or an **approval gate** (`finishReason: 'tool_approval_required'`) — the parent loop halts, the snapshot persists in the run store with a `pauseKind: 'client_tool' | 'approval'` discriminator, and the host resumes via `Agent.resumeAsTool(subRunId, results, { runStore, agent, approvedToolCallIds? })`. See `docs/guide/ai.md` for the full flow. `InMemorySubAgentRunStore` works for tests; `CachedSubAgentRunStore` plugs into `@rudderjs/cache` for cross-process persistence. Suspend without streaming throws at builder time.
247
+
248
+ ### Handoffs — `handoff()`
249
+
250
+ Sometimes a parent agent shouldn't *call* a specialist and incorporate its result — it should *step out* and let the specialist own the rest of the conversation. That's a handoff.
251
+
252
+ ```ts
253
+ import { Agent, handoff } from '@rudderjs/ai'
254
+
255
+ class SalesAgent extends Agent {
256
+ instructions() { return 'You handle pricing, plans, and upgrades.' }
257
+ }
258
+ class SupportAgent extends Agent {
259
+ instructions() { return 'You triage bugs and walk users through fixes.' }
260
+ }
261
+
262
+ class TriageAgent extends Agent {
263
+ instructions() { return 'Greet the user, then route them to the right specialist.' }
264
+ tools() {
265
+ return [
266
+ handoff(SalesAgent, { when: 'pricing or sales questions' }),
267
+ handoff(SupportAgent, { when: 'bug reports or technical issues' }),
268
+ ]
269
+ }
270
+ }
271
+
272
+ const r = await new TriageAgent().prompt('What does the Pro plan cost?')
273
+ console.log(r.text) // "The Pro plan is $49/month..." (from SalesAgent)
274
+ console.log(r.handoffPath) // ['TriageAgent', 'SalesAgent']
275
+ ```
276
+
277
+ How it differs from `asTool`:
278
+
279
+ | | `asTool` (call-and-return) | `handoff` (control transfer) |
280
+ |---|---|---|
281
+ | Parent loop | continues after subagent finishes | ends |
282
+ | Conversation owner | parent | child |
283
+ | Final `text` | parent's | last child in the chain |
284
+ | `r.steps` | parent steps + a single tool-result step for the subagent | parent steps + each agent's steps merged in order |
285
+ | Use case | "look something up and use it" | "transfer to the right specialist" |
286
+
287
+ Default: the model writes a transition message (`{ message: string }`) that becomes the child's first user message. The full prior conversation flows through to the child — but the child uses its own `instructions()` as the system message. Multi-hop is supported (Triage → Sales → Billing); cycles are bounded by `MAX_HANDOFFS = 5` and surface a clear error.
288
+
289
+ ```ts
290
+ // Custom name + payload
291
+ handoff(SalesAgent, {
292
+ name: 'pivotToSales',
293
+ description: 'Transfer the user to a sales specialist.',
294
+ inputSchema: z.object({ urgency: z.enum(['low', 'high']), context: z.string() }),
295
+ })
296
+ ```
297
+
298
+ In `agent.stream()`, a `'handoff'` `StreamChunk` is emitted right before control transfers, with `{ from, to, message? }` for UIs to render a transition indicator before the next agent's chunks arrive.
235
299
 
236
300
  ### Tool execution context
237
301
 
@@ -316,6 +380,17 @@ internally. Tool authors should construct chunks via the
316
380
  `pauseForClientTools()` factory rather than by hand so future shape
317
381
  changes stay source-compatible.
318
382
 
383
+ **Approval pauses:** the sibling `pauseForApproval(toolCall, isClientTool, resumeHandle?)`
384
+ chunk halts the parent loop when a sub-agent's inner approval gate fires
385
+ (inner `finishReason === 'tool_approval_required'`). The parent's loop
386
+ sets `loopFinishReason = 'tool_approval_required'` and surfaces the
387
+ gated call on `pendingApprovalToolCall`. The wrapping `asTool({ suspendable })`
388
+ generator persists a snapshot with `pauseKind: 'approval'` and yields
389
+ this chunk automatically — hand-rolled tools that wrap their own
390
+ approval-gated sub-agents can yield it directly. Resume with
391
+ `Agent.resumeAsTool(subRunId, [], { runStore, agent, approvedToolCallIds: [...] })`
392
+ (or `rejectedToolCallIds`).
393
+
319
394
  **Resuming:** that's caller territory — `@rudderjs/ai` knows nothing about
320
395
  the resume protocol. The canonical implementation is in
321
396
  `@rudderjs/panels`'s `subAgentResume.ts`, which uses a runStore to persist
@@ -498,6 +573,38 @@ const agent = AI.agent({
498
573
  })
499
574
  ```
500
575
 
576
+ ### Hosted vector stores + `fileSearch`
577
+
578
+ `VectorStores` is a CRUD façade over the provider's hosted vector store; `fileSearch({ stores })` is the agent tool that queries them. The provider runs ingestion, chunking, embedding, and retrieval server-side — no embedding pipeline, no pgvector setup, no `execute` to write. Supported on **OpenAI** (`vectorStores.*`) and **Gemini** (`fileSearchStores`). Same façade, same agent surface.
579
+
580
+ ```ts
581
+ import { Agent, VectorStores, fileSearch } from '@rudderjs/ai'
582
+
583
+ // 1. Manage the store
584
+ const kb = await VectorStores.create('Knowledge Base') // OpenAI default
585
+ await kb.add({ filePath: './report.pdf', attributes: { author: 'Alice', year: 2026 } })
586
+
587
+ // 2. Use it as an agent tool
588
+ class SupportAgent extends Agent {
589
+ model() { return 'openai/gpt-4o' } // or 'google/gemini-2.5-flash'
590
+ tools() {
591
+ return [
592
+ fileSearch({
593
+ stores: [kb.id],
594
+ where: { author: 'Alice', year: 2026 }, // server-side metadata filter
595
+ maxResults: 10,
596
+ }),
597
+ ]
598
+ }
599
+ }
600
+ ```
601
+
602
+ **Provider override:** pass `{ provider: 'google' }` to `VectorStores.create(...)` for Gemini. Store ids are full resource paths (`fileSearchStores/foo-bar`) on Gemini, opaque (`vs_abc123`) on OpenAI — apps pass them back verbatim through the same `VectorStores` API.
603
+
604
+ **Self-hosted RAG fallback.** `fileSearch({ ..., fallback: { model, column, embedWith } })` lifts a `similaritySearch` `execute` onto the tool. Providers that recognize the file-search hint (OpenAI, Gemini) still emit their native block; other providers serialize the tool as a function-call and run the fallback against a local pgvector model. Same agent prompt across hosted and self-hosted RAG.
605
+
606
+ Full surface (provider-differences table, `where`/filter shapes, testing with `AiFake`): the framework's [Vector Stores guide](https://github.com/rudderjs/rudder/blob/main/docs/guide/vector-stores.md).
607
+
501
608
  ### Reranking
502
609
 
503
610
  Reorder documents by relevance to a query — useful for RAG pipelines:
@@ -589,6 +696,54 @@ for await (const chunk of stream) {
589
696
  const final = await response // full AgentResponse when stream completes
590
697
  ```
591
698
 
699
+ ### Queued prompts (`agent.queue()`)
700
+
701
+ Push the agent run onto the queue for background execution. Returns a builder so you can configure the queue, attach success/failure callbacks, and (optionally) stream progress to a broadcast channel as it runs.
702
+
703
+ Requires `@rudderjs/queue` (and `@rudderjs/broadcast` if you call `.broadcast()`).
704
+
705
+ ```ts
706
+ // Fire-and-forget background run
707
+ await new SupportAgent()
708
+ .queue('Help with refund request')
709
+ .onQueue('ai')
710
+ .send()
711
+
712
+ // With success/failure callbacks
713
+ await new ResearchAgent()
714
+ .queue('Research GPT-5 architecture')
715
+ .then(response => console.log('Done:', response.text))
716
+ .catch(error => console.error('Failed:', error))
717
+ .send()
718
+ ```
719
+
720
+ #### Stream progress to a broadcast channel — `.broadcast(channel)`
721
+
722
+ Background AI work + live UI without polling. Each stream chunk is broadcast to the channel as the job runs; the final response is broadcast as a `done` event:
723
+
724
+ ```ts
725
+ await new SupportAgent()
726
+ .queue('Help with refund request')
727
+ .broadcast(`user.${userId}.support`)
728
+ .send()
729
+
730
+ // Subscribers on `user.${userId}.support` receive:
731
+ // { event: 'chunk', data: <StreamChunk> } // one per stream chunk (text-delta, tool-call, ...)
732
+ // { event: 'done', data: <AgentResponse> } // final result, after the loop ends
733
+ // { event: 'error', data: { message } } // on failure
734
+ ```
735
+
736
+ The wire shape matches the framework's normal `StreamChunk` types — the same `text-delta` / `tool-call` / `tool-result` shapes you'd iterate from `agent.stream()`. Frontends can subscribe to the channel and reuse their existing chunk-handling code.
737
+
738
+ Pass `eventPrefix` to namespace events when the channel carries other unrelated messages:
739
+
740
+ ```ts
741
+ .broadcast('shared-channel', { eventPrefix: 'agent.' })
742
+ // emits 'agent.chunk', 'agent.done', 'agent.error'
743
+ ```
744
+
745
+ **Process model:** `@rudderjs/broadcast`'s `broadcast()` writes to the WS server in the same process. In the typical RudderJS dev setup (single process running both web + `queue:work`) this works out of the box. Production deployments that run the queue worker as a separate process from the broadcast WS server will need a pub/sub bridge (Redis, Reverb, etc.) — outside the scope of v1.
746
+
592
747
  ### Conversation History
593
748
 
594
749
  Pass message history to maintain context across turns:
@@ -619,6 +774,152 @@ await new ChatAgent().prompt('Continue?') // resumes same thread (per user + cl
619
774
 
620
775
  Returning `false` (the default) keeps the agent stateless. Async returns are awaited; an optional `historyLimit` caps loaded messages. Per-call escape hatches: `prompt(input, { conversation: false })` or `agent.forUser(id).prompt()` / `agent.continue(id).prompt()` — explicit always wins. See `docs/guide/ai.md` for the full precedence chain.
621
776
 
777
+ ### User memory beyond conversation history (Mem0-style)
778
+
779
+ Conversation history persists messages; user memory persists **facts** that should travel across conversations. Useful when the agent needs to remember "Alice's project is named Foo" in a brand-new thread without replaying the entire prior session.
780
+
781
+ ```ts
782
+ import type { UserMemory } from '@rudderjs/ai'
783
+ import { MemoryUserMemory } from '@rudderjs/ai'
784
+
785
+ // config/ai.ts — wire a backend
786
+ export default {
787
+ default: 'anthropic/claude-sonnet-4-5',
788
+ providers: { /* ... */ },
789
+ memory: new MemoryUserMemory(), // in-process; swap for an ORM- or embedding-backed store in production
790
+ } satisfies AiConfig
791
+
792
+ // Use it directly
793
+ const memory = app().make<UserMemory>('ai.memory')
794
+ await memory.remember('user_123', 'Project name is Foo', { tags: ['project'] })
795
+ const facts = await memory.recall('user_123', 'project')
796
+ //=> [{ fact: 'Project name is Foo', tags: ['project'], ... }]
797
+ ```
798
+
799
+ Or declare on an agent class to opt into auto-inject — relevant facts get prepended to the system prompt before each turn, with no plumbing on the caller's side:
800
+
801
+ ```ts
802
+ class SupportAgent extends Agent {
803
+ remembers() {
804
+ return {
805
+ user: ctx.user.id,
806
+ inject: 'auto', // recall + prepend matching facts before each model call
807
+ tags: ['support'], // recall scope
808
+ injectLimit: 5, // cap facts per turn
809
+ injectTokenBudget: 400, // hard token cap; lowest-score facts drop first
810
+ }
811
+ }
812
+ }
813
+
814
+ await new SupportAgent().prompt('Where is my project deployed?')
815
+ // system prompt sent to the model:
816
+ // "You are a support agent.\n\n
817
+ // <user-memory>\n
818
+ // - Project Foo deploys to fly.io us-east\n
819
+ // - …\n
820
+ // </user-memory>"
821
+ ```
822
+
823
+ The auto-cascade runs in `Agent.prompt` / `Agent.stream`, before conversation persistence. `withMemoryInject(spec)` is also exported so you can drop it into `agent.middleware()` manually if you want full control.
824
+
825
+ **Continuation note:** when you pass `options.messages` (e.g. resuming after a client-tool round-trip), both auto-inject and auto-extract are skipped — the system prompt was already augmented on the original turn, and re-extracting would write the same facts twice.
826
+
827
+ #### Auto-extract — distill facts from each turn
828
+
829
+ Set `extract: 'auto'` (and an `extractWith` model) and a small model is asked to pull durable facts from each successful turn:
830
+
831
+ ```ts
832
+ class SupportAgent extends Agent {
833
+ remembers() {
834
+ return {
835
+ user: ctx.user.id,
836
+ inject: 'auto',
837
+ extract: 'auto',
838
+ extractWith: 'anthropic/claude-haiku-4-5', // small model for fact distillation
839
+ tags: ['support'],
840
+ }
841
+ }
842
+ }
843
+
844
+ await new SupportAgent().prompt('hey, my project is named Foo and lives at /var/www/foo')
845
+ // On success, the small model is asked to distill durable facts. Survivors above
846
+ // the confidence threshold (default 0.7) get written via `mem.remember()`:
847
+ // - "Project name is Foo" (score ~0.95, tags: ['support', 'project'])
848
+ ```
849
+
850
+ Failures (network, JSON parse, schema mismatch, store write) route through `MemoryExtractOptions.onError` and never break the parent run. Failed parent runs do NOT trigger extract.
851
+
852
+ **Poisoning mitigation** — auto-extraction trusts the user's own conversation as input. The default 0.7 confidence threshold is the v1 defense against adversarial "facts." Pair with `MemoryExtractOptions.onExtracted` for an audit log when shipping to production, and tighten the threshold for high-risk domains.
853
+
854
+ #### Production backend — `OrmUserMemory`
855
+
856
+ For production, swap `MemoryUserMemory` for `OrmUserMemory` (subpath `@rudderjs/ai/memory-orm`) — persists rows via your registered `@rudderjs/orm` adapter (Prisma today; Drizzle once you wire the tables):
857
+
858
+ ```ts
859
+ // config/ai.ts
860
+ import type { AiConfig } from '@rudderjs/ai'
861
+ import { OrmUserMemory } from '@rudderjs/ai/memory-orm'
862
+
863
+ export default {
864
+ default: 'anthropic/claude-sonnet-4-5',
865
+ providers: { /* ... */ },
866
+ memory: new OrmUserMemory(),
867
+ } satisfies AiConfig
868
+ ```
869
+
870
+ Add the schema to your Prisma file (or import the reference string `userMemoryPrismaSchema` from `@rudderjs/ai/memory-orm`):
871
+
872
+ ```prisma
873
+ model UserMemory {
874
+ id String @id @default(cuid())
875
+ userId String
876
+ fact String
877
+ /// JSON-encoded `string[]` of tags, or null
878
+ tags String?
879
+ /// Confidence score in [0, 1] — extract sets this from the model's self-rating
880
+ score Float?
881
+ /// Phase 5 — vector embedding for cosine recall (nullable so Phase 4 ignores it)
882
+ embedding Bytes?
883
+ createdAt DateTime @default(now())
884
+ updatedAt DateTime @updatedAt
885
+
886
+ @@index([userId])
887
+ }
888
+ ```
889
+
890
+ Then run `pnpm exec prisma db push` (dev) or `pnpm exec prisma migrate dev` (prod). The `embedding Bytes?` column is intentionally nullable — Phase 5's `EmbeddingUserMemory` populates it without forcing a follow-up migration.
891
+
892
+ `OrmUserMemory.recall()` uses **OR-of-LIKE token overlap** on the `fact` column — same semantic as `MemoryUserMemory`. Tag-array filtering happens JS-side after fetch (pushing tags into the WHERE is adapter-specific; that lands in a follow-up).
893
+
894
+ #### Embedding backend — `EmbeddingUserMemory` (Phase 5)
895
+
896
+ For semantic recall ("Where do I deploy?" matching "Project Foo lives at fly.io"), wrap `OrmUserMemory` with `EmbeddingUserMemory` from `@rudderjs/ai/memory-embedding`:
897
+
898
+ ```ts
899
+ import { OrmUserMemory } from '@rudderjs/ai/memory-orm'
900
+ import { EmbeddingUserMemory } from '@rudderjs/ai/memory-embedding'
901
+
902
+ export default {
903
+ default: 'anthropic/claude-sonnet-4-5',
904
+ providers: { /* ... */ },
905
+ memory: new EmbeddingUserMemory({
906
+ inner: new OrmUserMemory(),
907
+ model: 'openai/text-embedding-3-small',
908
+ threshold: 0.5, // cosine floor; matches below get dropped
909
+ }),
910
+ } satisfies AiConfig
911
+ ```
912
+
913
+ `remember()` embeds the fact via `AI.embed()` and writes the Float32-packed vector into the row's `embedding` column. `recall()` embeds the query and ranks all of the user's facts by **pure-JS cosine similarity** (acceptable up to a few thousand facts/user; for larger workloads, B7 lands a pgvector-backed variant).
914
+
915
+ **GDPR right-to-be-forgotten cascades automatically** — the embedding lives in the same row as the fact, so `forget()` / `forgetAll()` delete both. No second store to keep in sync.
916
+
917
+ **Backward compat with Phase 4:** rows persisted before `EmbeddingUserMemory` was wired in have `embedding === null`. The default `nullEmbeddingFallback: 'token-overlap'` falls back to the same token-overlap matching `MemoryUserMemory` uses, so upgrading from `OrmUserMemory` doesn't lose recall on existing rows. New `remember()` calls populate the embedding column going forward. Set `nullEmbeddingFallback: 'skip'` to drop pre-embedding rows entirely.
918
+
919
+ `embed()` failures (network down, missing peer SDK) are swallowed: `remember()` still persists the entry with `embedding === null`, and `recall()` falls back to token-overlap. The parent prompt never breaks because of memory work.
920
+
921
+ **A4 status (all phases shipped):** interface, in-process backend, per-call/class declaration, auto-inject, auto-extract, ORM-backed `OrmUserMemory`, and embedding-backed `EmbeddingUserMemory` all ship today. The roadmap item is complete.
922
+
622
923
  ### Model Selection
623
924
 
624
925
  Configure available models for user selection (used by `@rudderjs/panels` chat UI):
@@ -703,6 +1004,179 @@ await new ChatAgent().prompt('again') // throws "Stray prompt: no scripted res
703
1004
 
704
1005
  Under strict mode, only `respondWithSequence` entries count as valid responses; ambient `respondWith` is ignored. Force a single-step script via `respondWithSequence([{ text: '...' }])` if you want exact-one-prompt tests with content.
705
1006
 
1007
+ ### Evals — `@rudderjs/ai/eval`
1008
+
1009
+ `AiFake` proves the agent's wiring works; **evals** prove the agent does the right thing on real models. Define a suite of input cases + assertions, run them against any `Agent`, get a console report with pass/fail + cost + tokens:
1010
+
1011
+ ```ts
1012
+ // evals/support-agent.eval.ts
1013
+ import { evalSuite, llmJudge, exactMatch, regex } from '@rudderjs/ai/eval'
1014
+ import { SupportAgent } from '../app/Agents/SupportAgent.js'
1015
+
1016
+ export default evalSuite('SupportAgent', {
1017
+ agent: () => new SupportAgent(),
1018
+ cases: [
1019
+ { name: 'password reset',
1020
+ input: 'How do I reset my password?',
1021
+ assert: llmJudge('mentions a password reset link') },
1022
+ { name: 'price',
1023
+ input: 'How much does this cost?',
1024
+ assert: exactMatch('$99/month') },
1025
+ { name: 'support email',
1026
+ input: 'How do I contact support?',
1027
+ assert: regex(/support@example\.com/) },
1028
+ ],
1029
+ })
1030
+ ```
1031
+
1032
+ Run via the CLI (Phase 2):
1033
+
1034
+ ```bash
1035
+ pnpm rudder ai:eval # all suites under evals/**/*.eval.ts
1036
+ pnpm rudder ai:eval support # only suites whose name includes "support"
1037
+ pnpm rudder ai:eval --bail # stop on first failing suite
1038
+ pnpm rudder ai:eval --json # machine-readable envelope to stdout
1039
+ ```
1040
+
1041
+ ```text
1042
+ SupportAgent (3 cases, 2.3s, $0.014)
1043
+ ✓ password reset 1.2s $0.003 tokens: 487
1044
+ ✓ price 0.8s $0.002 tokens: 312
1045
+ ✗ support email 1.1s $0.002 tokens: 425
1046
+ pattern /support@example\.com/ did not match "Reach us at hello@…"
1047
+
1048
+ 2 passed, 1 failed
1049
+ total: $0.007 • cumulative tokens: 1,224
1050
+ ```
1051
+
1052
+ Exits 0 when every case passes, 1 on any failure. `--json` emits `{ suites: [{ suite, passed, failed, cases: [{ name, status, pass, score?, reason?, tokens, cost, duration }] }] }` to stdout — pipe directly into `jq` for CI gates.
1053
+
1054
+ Override the discovery pattern via `config('ai').eval.pattern` (`'evals/**/*.eval.ts'` by default; supports `<dir>/**/*<suffix>` and `*<suffix>` shapes).
1055
+
1056
+ Or run programmatically:
1057
+
1058
+ ```ts
1059
+ import { runSuite, reportConsole, reportJson } from '@rudderjs/ai/eval'
1060
+ import suite from './evals/support-agent.eval.ts'
1061
+
1062
+ reportConsole(await runSuite(suite))
1063
+ // reportJson(await runSuite(suite)) // structured envelope for CI scripts
1064
+ ```
1065
+
1066
+ **Built-in metrics:**
1067
+
1068
+ | Metric | Behavior |
1069
+ |---|---|
1070
+ | `exactMatch(string)` | `response.text === expected` |
1071
+ | `regex(RegExp)` | `pattern.test(response.text)` |
1072
+ | `llmJudge(criterion, opts?)` | Asks a small model whether the response satisfies a natural-language criterion. Returns the judge's reasoning in `reason` so failures are debuggable. |
1073
+ | `jsonShape(zodSchema)` | Strips ```` ``` ```` fences, parses, runs zod `safeParse`. Surfaces the zod issue path on failure. Pairs with `Output.object({ schema })` on the agent. |
1074
+ | `semanticMatch(reference, opts?)` | Embeds reference + response via `AI.embed()`, cosine similarity vs `opts.threshold` (default `0.85`). Embed cost rolls into the case's cost rollup. Requires a provider with `createEmbedding()` (openai/google/mistral/cohere/jina). |
1075
+ | `tokenCost(threshold)` | Passes when `response.usage.totalTokens <= threshold`. Detects prompt-size regressions before they show up as a billing surprise. |
1076
+
1077
+ `compose(...metrics)` runs them in order, short-circuits on the first failure, surfaces its reason. Useful for "must be valid JSON AND under budget" assertions:
1078
+
1079
+ ```ts
1080
+ { input: '…',
1081
+ assert: compose(jsonShape(SummarySchema), tokenCost(800)) }
1082
+ ```
1083
+
1084
+ User-defined metrics implement `(response, ctx) => MetricResult` — no inheritance, no decorators. The catalog is just a starting set.
1085
+
1086
+ **Failure semantics:** the runner never throws upward. Agent errors AND assertion throws become `failed` rows with the message in `reason`. Per-case `timeout` (ms) caps long runs. Per-case `agent` factory overrides the suite default — useful for stress-testing one case against a different model.
1087
+
1088
+ **Record + replay:**
1089
+
1090
+ ```bash
1091
+ pnpm rudder ai:eval --record support # run live, save fixtures
1092
+ pnpm rudder ai:eval --replay support # zero API calls, deterministic
1093
+ ```
1094
+
1095
+ `--record` runs each matching case against the real provider and writes assistant turns (text + tool calls) to `evals/__fixtures__/<suite>/<case>.json` (commit these alongside the suite for diffable regression history). `--replay` swaps the runtime with `AiFake` and feeds each case its recorded fixture — same agent code path, scripted responses. Cases without a fixture fall through to a normal run with a stderr warning. The two modes are mutually exclusive.
1096
+
1097
+ **Telescope hook:** `aiObservers` emits an `agent.eval.completed` event after every case (passing, failing, skipped). Telescope's AI collector aggregates pass-rate per `(suite, case)` over time.
1098
+
1099
+ **HTML report:**
1100
+
1101
+ ```bash
1102
+ pnpm rudder ai:eval --html report.html # write a self-contained HTML report
1103
+ ```
1104
+
1105
+ Self-contained HTML (inline CSS + vanilla JS, no external assets), pasteable into PR comments / Slack threads, openable offline. Coexists with `--json` (JSON to stdout, HTML to disk). Click any case row to expand the prompt + response.
1106
+
1107
+ Annotate suites with optional metadata:
1108
+
1109
+ ```ts
1110
+ export default evalSuite('SupportAgent', {
1111
+ agent: () => new SupportAgent(),
1112
+ cases: [/* … */],
1113
+ metadata: {
1114
+ owner: '@billing-team',
1115
+ lastReviewed: '2026-05-01',
1116
+ ticket: 'AI-42',
1117
+ },
1118
+ })
1119
+ ```
1120
+
1121
+ ### MCP integration
1122
+
1123
+ `@rudderjs/ai/mcp` bridges agents and Model Context Protocol servers in both directions. Optional peer: `@modelcontextprotocol/sdk`.
1124
+
1125
+ ```ts
1126
+ import { mcpClientTools, mcpServerFromAgent } from '@rudderjs/ai/mcp'
1127
+ ```
1128
+
1129
+ #### Consume MCP tools in an Agent — `mcpClientTools(transport, opts?)`
1130
+
1131
+ Connect to a remote MCP server and surface its tools to an agent.
1132
+
1133
+ ```ts
1134
+ // HTTP transport
1135
+ const tools = await mcpClientTools('https://api.example.com/mcp')
1136
+
1137
+ // Local subprocess (stdio)
1138
+ const tools = await mcpClientTools({ command: 'npx', args: ['some-mcp-server'] })
1139
+
1140
+ // Already-connected SDK Client (caller owns lifecycle)
1141
+ const tools = await mcpClientTools(myClient)
1142
+
1143
+ class ResearchAgent extends Agent {
1144
+ instructions() { return 'You have access to remote tools via MCP.' }
1145
+ tools() { return tools }
1146
+ }
1147
+ ```
1148
+
1149
+ The remote server's JSON Schema flows directly to providers via the `jsonSchema` passthrough field on `ToolDefinitionOptions` — no zod round-trip. When this connector owns the underlying client (URL or stdio transport), the returned array exposes a non-enumerable `close()` for shutdown:
1150
+
1151
+ ```ts
1152
+ const tools = await mcpClientTools('https://api.example.com/mcp')
1153
+ // ... use tools in agent ...
1154
+ await tools.close?.()
1155
+ ```
1156
+
1157
+ Options: `filter` (drop tools by name), `namePrefix` (avoid collisions across multiple servers), `streaming` (forward MCP `notifications/progress` as `tool-update` chunks; default `true`).
1158
+
1159
+ #### Expose an Agent as an MCP server — `mcpServerFromAgent(AgentClass, opts?)`
1160
+
1161
+ Wrap an `Agent` so external MCP clients (Claude Desktop, Cursor, etc.) can call it. Returns a `McpServer` from `@modelcontextprotocol/sdk` — connect with any SDK transport.
1162
+
1163
+ ```ts
1164
+ import { mcpServerFromAgent } from '@rudderjs/ai/mcp'
1165
+ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
1166
+
1167
+ const server = await mcpServerFromAgent(ResearchAgent)
1168
+ await server.connect(new StdioServerTransport())
1169
+ ```
1170
+
1171
+ Three exposure modes via `opts.expose`:
1172
+ - `'tools'` *(default)* — one MCP tool per `agent.tools()` entry; the wrapping agent isn't called, individual tools execute directly
1173
+ - `'agent'` — one MCP tool that runs the whole agent (`prompt(text) → response.text`); the differentiator move — ship an agent, callable from any MCP-aware client
1174
+ - `'both'` — individual tools and the agent prompt-tool side by side
1175
+
1176
+ Other options: `name`, `version`, `instructions` (defaults to `agent.instructions()`), `agentToolName` (renames the prompt-tool when `expose: 'agent' | 'both'`).
1177
+
1178
+ Approval gates (`needsApproval: true`) are dropped on the MCP side — there's no MCP-protocol way to forward "this tool needs human approval" to a remote client. The gate fires only inside the wrapping agent, not for external MCP callers.
1179
+
706
1180
  ## Providers
707
1181
 
708
1182
  | Provider | SDK | Model String | Text | Embeddings | Images | TTS/STT | Reranking | Files |
@@ -718,11 +1192,14 @@ Under strict mode, only `respondWithSequence` entries count as valid responses;
718
1192
  | xAI | *(none)* | `xai/grok-3` | ✓ | | | | | |
719
1193
  | Mistral | *(none)* | `mistral/mistral-large` | ✓ | ✓ | | | | |
720
1194
  | Azure OpenAI | `openai` | `azure/gpt-4o` | ✓ | | | | | |
1195
+ | OpenRouter | `openai` | `openrouter/anthropic/claude-3.5-sonnet` | ✓ | | | | | |
1196
+ | AWS Bedrock | `@aws-sdk/client-bedrock-runtime` | `bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0` | ✓ | | | | | |
721
1197
 
722
1198
  ## Notes
723
1199
 
724
1200
  - Provider SDKs are optional dependencies — install only what you use
725
1201
  - `exactOptionalPropertyTypes` compatible
726
1202
  - All adapters lazy-load their SDK on first use
727
- - Ollama, Groq, DeepSeek, xAI, Mistral reuse the OpenAI adapter (OpenAI-compatible API)
1203
+ - Ollama, Groq, DeepSeek, xAI, Mistral, OpenRouter reuse the OpenAI adapter (OpenAI-compatible API)
728
1204
  - Cohere requires `cohere-ai` SDK; Jina uses direct HTTP (no SDK needed)
1205
+ - Bedrock uses the AWS credential chain (env vars / IAM roles / `~/.aws/credentials`); v1 supports Anthropic Claude models on Bedrock
@@ -29,7 +29,7 @@ await agent('You are helpful.').prompt('Hello') // simplest form
29
29
  Configure providers in `config/ai.ts`. The Node-only `AiProvider` lives at `@rudderjs/ai/server` (the main `@rudderjs/ai` entry is runtime-agnostic and has no provider class):
30
30
 
31
31
  ```ts
32
- // config/ai.ts — providers: anthropic, openai, google, ollama, deepseek, xai, groq, mistral, azure
32
+ // config/ai.ts — providers: anthropic, openai, google, ollama, deepseek, xai, groq, mistral, azure, openrouter, bedrock
33
33
  import type { AiConfig } from '@rudderjs/ai'
34
34
 
35
35
  export default {
@@ -91,7 +91,7 @@ class Planner extends Agent implements HasTools {
91
91
  }
92
92
  ```
93
93
 
94
- By default the subagent runs via `prompt()` (non-streaming). Pass `streaming: true` to surface inner progress as `tool-update` chunks (default projection emits `agent_start` / `tool_call` / `agent_done`); pass `(chunk) => SubAgentUpdate | null` for a custom projector. To propagate inner client-tool calls upward through the parent loop, also pass `suspendable: { runStore }` (suspend without streaming throws at builder time) — the host's continuation calls `Agent.resumeAsTool(subRunId, results, { runStore, agent })` to resume the inner agent with the browser's results. `InMemorySubAgentRunStore` works for tests; `CachedSubAgentRunStore` plugs into `@rudderjs/cache` for multi-worker persistence.
94
+ By default the subagent runs via `prompt()` (non-streaming). Pass `streaming: true` to surface inner progress as `tool-update` chunks (default projection emits `agent_start` / `tool_call` / `agent_done`, plus `agent_pending_approval` for inner approval gates); pass `(chunk) => SubAgentUpdate | null` for a custom projector. To propagate inner pauses upward through the parent loop, also pass `suspendable: { runStore }` (suspend without streaming throws at builder time) — `asTool` handles BOTH client-tool pauses AND approval pauses symmetrically, persisting a snapshot with a `pauseKind: 'client_tool' | 'approval'` discriminator. The host's continuation calls `Agent.resumeAsTool(subRunId, results, { runStore, agent })` for client-tool resumes, or `Agent.resumeAsTool(subRunId, [], { runStore, agent, approvedToolCallIds: [...] })` (or `rejectedToolCallIds`) for approval resumes. The returned `'paused'` variant carries `pauseKind` so the host can route the next round-trip correctly. `InMemorySubAgentRunStore` works for tests; `CachedSubAgentRunStore` plugs into `@rudderjs/cache` for multi-worker persistence.
95
95
 
96
96
  ### Middleware
97
97
 
@@ -162,6 +162,65 @@ for await (const chunk of stream) {
162
162
  const final = await response // full AgentResponse after stream ends
163
163
  ```
164
164
 
165
+ ### MCP integration (`@rudderjs/ai/mcp`)
166
+
167
+ Bridge agents and Model Context Protocol servers in both directions. Optional peer: `@modelcontextprotocol/sdk`.
168
+
169
+ **Consume MCP tools in an agent:**
170
+
171
+ ```ts
172
+ import { mcpClientTools } from '@rudderjs/ai/mcp'
173
+
174
+ const tools = await mcpClientTools('https://api.example.com/mcp')
175
+ // or: await mcpClientTools({ command: 'npx', args: ['some-mcp-server'] })
176
+
177
+ class ResearchAgent extends Agent {
178
+ instructions() { return 'You can call remote MCP tools.' }
179
+ tools() { return tools }
180
+ }
181
+ ```
182
+
183
+ **Expose an agent as an MCP server** (callable from Claude Desktop, Cursor, etc.):
184
+
185
+ ```ts
186
+ import { mcpServerFromAgent } from '@rudderjs/ai/mcp'
187
+ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
188
+
189
+ // Default: each agent.tools() entry becomes an MCP tool
190
+ const server = await mcpServerFromAgent(MyAgent)
191
+
192
+ // Or expose the whole agent as one prompt-tool
193
+ const promptServer = await mcpServerFromAgent(MyAgent, { expose: 'agent' })
194
+
195
+ await server.connect(new StdioServerTransport())
196
+ ```
197
+
198
+ When mcpClientTools owns the underlying client (URL or stdio transport), the returned array exposes `close()` for shutdown — call it when the agent is done. With a caller-provided client, lifecycle stays with the caller.
199
+
200
+ ### Queued prompts + live broadcast (`agent.queue().broadcast()`)
201
+
202
+ `agent.queue(input)` ships the run to the background queue (`@rudderjs/queue`). Add `.broadcast(channel)` to stream chunks to a `@rudderjs/broadcast` channel as the job runs — background AI work + live UI without polling.
203
+
204
+ ```ts
205
+ import { agent } from '@rudderjs/ai'
206
+
207
+ // Plain queued — no live updates
208
+ await agent('You help with refunds.')
209
+ .queue('Process refund for order #1234')
210
+ .onQueue('ai')
211
+ .send()
212
+
213
+ // Stream chunks to the user's channel as they arrive
214
+ await new SupportAgent()
215
+ .queue('Help with refund request')
216
+ .broadcast(`user.${userId}.support`)
217
+ .send()
218
+ ```
219
+
220
+ Subscribers on the channel receive `chunk` events (one per `StreamChunk`), then a `done` event with the final `AgentResponse`, or an `error` event on failure. Optional `eventPrefix` namespaces events: `.broadcast('chan', { eventPrefix: 'agent.' })` emits `agent.chunk` / `agent.done` / `agent.error`.
221
+
222
+ `@rudderjs/broadcast`'s in-process WS state is process-local — same-process web + `queue:work` works out of the box; a separate worker process needs a future pub/sub bridge.
223
+
165
224
  ### Structured Output
166
225
 
167
226
  Use `Output` to constrain responses to typed schemas:
@@ -195,6 +254,7 @@ import { Image, Document } from '@rudderjs/ai' // attachments
195
254
  import { MemoryConversationStore, setConversationStore } from '@rudderjs/ai'
196
255
  import { Output } from '@rudderjs/ai' // structured output
197
256
  import { AiRegistry } from '@rudderjs/ai' // provider registry
257
+ import { mcpClientTools, mcpServerFromAgent } from '@rudderjs/ai/mcp' // MCP bridge (Node)
198
258
  import { stepCountIs, hasToolCall } from '@rudderjs/ai' // stop conditions
199
259
  import type { AgentResponse, AiConfig, AiMiddleware, AnyTool, HasTools, HasMiddleware } from '@rudderjs/ai'
200
260
  ```