@providerprotocol/ai 0.0.34 → 0.0.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. package/README.md +542 -3
  2. package/dist/anthropic/index.d.ts +2 -1
  3. package/dist/anthropic/index.js +151 -145
  4. package/dist/anthropic/index.js.map +1 -1
  5. package/dist/cerebras/index.d.ts +392 -0
  6. package/dist/cerebras/index.js +648 -0
  7. package/dist/cerebras/index.js.map +1 -0
  8. package/dist/chunk-3GWM5GR3.js +153 -0
  9. package/dist/chunk-3GWM5GR3.js.map +1 -0
  10. package/dist/chunk-4OGB7JZA.js +157 -0
  11. package/dist/chunk-4OGB7JZA.js.map +1 -0
  12. package/dist/chunk-7DXVRILR.js +49 -0
  13. package/dist/chunk-7DXVRILR.js.map +1 -0
  14. package/dist/{chunk-3C7O2RNO.js → chunk-A2IM7PGT.js} +6 -4
  15. package/dist/{chunk-3C7O2RNO.js.map → chunk-A2IM7PGT.js.map} +1 -1
  16. package/dist/{chunk-3D6XGGVG.js → chunk-ARVM24K2.js} +2 -2
  17. package/dist/{chunk-4J6OFUKX.js → chunk-AY55T37A.js} +70 -162
  18. package/dist/chunk-AY55T37A.js.map +1 -0
  19. package/dist/{chunk-ILR2D5PN.js → chunk-BRP5XJ6Q.js} +2 -86
  20. package/dist/chunk-BRP5XJ6Q.js.map +1 -0
  21. package/dist/chunk-C4JP64VW.js +298 -0
  22. package/dist/chunk-C4JP64VW.js.map +1 -0
  23. package/dist/chunk-COS4ON4G.js +111 -0
  24. package/dist/chunk-COS4ON4G.js.map +1 -0
  25. package/dist/chunk-ETBFOLQN.js +34 -0
  26. package/dist/chunk-ETBFOLQN.js.map +1 -0
  27. package/dist/chunk-HB4ZIH3T.js +31 -0
  28. package/dist/chunk-HB4ZIH3T.js.map +1 -0
  29. package/dist/chunk-I53CI6ZZ.js +142 -0
  30. package/dist/chunk-I53CI6ZZ.js.map +1 -0
  31. package/dist/chunk-IDZOVWP3.js +29 -0
  32. package/dist/chunk-IDZOVWP3.js.map +1 -0
  33. package/dist/chunk-JA3UZALR.js +88 -0
  34. package/dist/chunk-JA3UZALR.js.map +1 -0
  35. package/dist/{chunk-WAKD3OO5.js → chunk-N5DX5JW3.js} +31 -31
  36. package/dist/chunk-N5DX5JW3.js.map +1 -0
  37. package/dist/chunk-OIEWDFQU.js +97 -0
  38. package/dist/chunk-OIEWDFQU.js.map +1 -0
  39. package/dist/{chunk-TOJCZMVU.js → chunk-PMK5LZ5Z.js} +40 -40
  40. package/dist/chunk-PMK5LZ5Z.js.map +1 -0
  41. package/dist/chunk-UFFJDYCE.js +94 -0
  42. package/dist/chunk-UFFJDYCE.js.map +1 -0
  43. package/dist/chunk-VGKZIGVI.js +222 -0
  44. package/dist/chunk-VGKZIGVI.js.map +1 -0
  45. package/dist/chunk-VOEWHQUB.js +31 -0
  46. package/dist/chunk-VOEWHQUB.js.map +1 -0
  47. package/dist/{chunk-KUPF5KHT.js → chunk-Y5H7C5J4.js} +2 -2
  48. package/dist/chunk-ZI67WIQS.js +30 -0
  49. package/dist/chunk-ZI67WIQS.js.map +1 -0
  50. package/dist/{embedding-D2BYIehX.d.ts → embedding-CW6SaOOz.d.ts} +1 -1
  51. package/dist/google/index.d.ts +2 -1
  52. package/dist/google/index.js +202 -199
  53. package/dist/google/index.js.map +1 -1
  54. package/dist/groq/index.d.ts +410 -0
  55. package/dist/groq/index.js +649 -0
  56. package/dist/groq/index.js.map +1 -0
  57. package/dist/http/index.d.ts +3 -2
  58. package/dist/http/index.js +5 -4
  59. package/dist/image-stream-C0ciACM2.d.ts +11 -0
  60. package/dist/index.d.ts +8 -118
  61. package/dist/index.js +518 -767
  62. package/dist/index.js.map +1 -1
  63. package/dist/{llm-BQJZj3cD.d.ts → llm-DwbUK7un.d.ts} +12 -1632
  64. package/dist/middleware/logging/index.d.ts +76 -0
  65. package/dist/middleware/logging/index.js +74 -0
  66. package/dist/middleware/logging/index.js.map +1 -0
  67. package/dist/middleware/parsed-object/index.d.ts +45 -0
  68. package/dist/middleware/parsed-object/index.js +73 -0
  69. package/dist/middleware/parsed-object/index.js.map +1 -0
  70. package/dist/middleware/pubsub/index.d.ts +104 -0
  71. package/dist/middleware/pubsub/index.js +230 -0
  72. package/dist/middleware/pubsub/index.js.map +1 -0
  73. package/dist/middleware/pubsub/server/express/index.d.ts +52 -0
  74. package/dist/middleware/pubsub/server/express/index.js +11 -0
  75. package/dist/middleware/pubsub/server/express/index.js.map +1 -0
  76. package/dist/middleware/pubsub/server/fastify/index.d.ts +53 -0
  77. package/dist/middleware/pubsub/server/fastify/index.js +11 -0
  78. package/dist/middleware/pubsub/server/fastify/index.js.map +1 -0
  79. package/dist/middleware/pubsub/server/h3/index.d.ts +56 -0
  80. package/dist/middleware/pubsub/server/h3/index.js +11 -0
  81. package/dist/middleware/pubsub/server/h3/index.js.map +1 -0
  82. package/dist/middleware/pubsub/server/index.d.ts +78 -0
  83. package/dist/middleware/pubsub/server/index.js +34 -0
  84. package/dist/middleware/pubsub/server/index.js.map +1 -0
  85. package/dist/middleware/pubsub/server/webapi/index.d.ts +53 -0
  86. package/dist/middleware/pubsub/server/webapi/index.js +11 -0
  87. package/dist/middleware/pubsub/server/webapi/index.js.map +1 -0
  88. package/dist/ollama/index.d.ts +2 -1
  89. package/dist/ollama/index.js +48 -45
  90. package/dist/ollama/index.js.map +1 -1
  91. package/dist/openai/index.d.ts +2 -1
  92. package/dist/openai/index.js +319 -313
  93. package/dist/openai/index.js.map +1 -1
  94. package/dist/openrouter/index.d.ts +2 -1
  95. package/dist/openrouter/index.js +379 -383
  96. package/dist/openrouter/index.js.map +1 -1
  97. package/dist/proxy/index.d.ts +10 -914
  98. package/dist/proxy/index.js +275 -1007
  99. package/dist/proxy/index.js.map +1 -1
  100. package/dist/proxy/server/express/index.d.ts +161 -0
  101. package/dist/proxy/server/express/index.js +24 -0
  102. package/dist/proxy/server/express/index.js.map +1 -0
  103. package/dist/proxy/server/fastify/index.d.ts +162 -0
  104. package/dist/proxy/server/fastify/index.js +24 -0
  105. package/dist/proxy/server/fastify/index.js.map +1 -0
  106. package/dist/proxy/server/h3/index.d.ts +189 -0
  107. package/dist/proxy/server/h3/index.js +28 -0
  108. package/dist/proxy/server/h3/index.js.map +1 -0
  109. package/dist/proxy/server/index.d.ts +151 -0
  110. package/dist/proxy/server/index.js +48 -0
  111. package/dist/proxy/server/index.js.map +1 -0
  112. package/dist/proxy/server/webapi/index.d.ts +278 -0
  113. package/dist/proxy/server/webapi/index.js +32 -0
  114. package/dist/proxy/server/webapi/index.js.map +1 -0
  115. package/dist/responses/index.d.ts +650 -0
  116. package/dist/responses/index.js +930 -0
  117. package/dist/responses/index.js.map +1 -0
  118. package/dist/{retry-8Ch-WWgX.d.ts → retry-YayV42GV.d.ts} +1 -1
  119. package/dist/stream-CecfVCPO.d.ts +1632 -0
  120. package/dist/types-C8Gciizr.d.ts +168 -0
  121. package/dist/utils/index.d.ts +53 -0
  122. package/dist/utils/index.js +7 -0
  123. package/dist/utils/index.js.map +1 -0
  124. package/dist/xai/index.d.ts +2 -1
  125. package/dist/xai/index.js +310 -310
  126. package/dist/xai/index.js.map +1 -1
  127. package/package.json +82 -4
  128. package/dist/chunk-4J6OFUKX.js.map +0 -1
  129. package/dist/chunk-ILR2D5PN.js.map +0 -1
  130. package/dist/chunk-TOJCZMVU.js.map +0 -1
  131. package/dist/chunk-WAKD3OO5.js.map +0 -1
  132. /package/dist/{chunk-3D6XGGVG.js.map → chunk-ARVM24K2.js.map} +0 -0
  133. /package/dist/{chunk-KUPF5KHT.js.map → chunk-Y5H7C5J4.js.map} +0 -0
package/README.md CHANGED
@@ -26,9 +26,12 @@ console.log(turn.response.text);
26
26
  | Google | `@providerprotocol/ai/google` | ✓ | ✓ | ✓ |
27
27
  | xAI | `@providerprotocol/ai/xai` | ✓ | | ✓ |
28
28
  | Ollama | `@providerprotocol/ai/ollama` | ✓ | ✓ | |
29
- | OpenRouter | `@providerprotocol/ai/openrouter` | ✓ | ✓ | |
29
+ | OpenRouter | `@providerprotocol/ai/openrouter` | ✓ | ✓ | |
30
+ | Groq | `@providerprotocol/ai/groq` | ✓ | | |
31
+ | Cerebras | `@providerprotocol/ai/cerebras` | ✓ | | |
32
+ | OpenResponses | `@providerprotocol/ai/responses` | ✓ | | |
30
33
 
31
- API keys are loaded automatically from environment variables (`ANTHROPIC_API_KEY`, `OPENAI_API_KEY`, etc.).
34
+ API keys are loaded automatically from environment variables (`ANTHROPIC_API_KEY`, `OPENAI_API_KEY`, `GROQ_API_KEY`, `CEREBRAS_API_KEY`, etc.).
32
35
 
33
36
  ## LLM
34
37
 
@@ -63,6 +66,7 @@ for await (const event of stream) {
63
66
  |-------|-------------|
64
67
  | `text_delta` | Incremental text output |
65
68
  | `reasoning_delta` | Incremental reasoning/thinking output |
69
+ | `object_delta` | Incremental structured output JSON |
66
70
  | `tool_call_delta` | Tool call arguments being streamed |
67
71
  | `tool_execution_start` | Tool execution has started |
68
72
  | `tool_execution_end` | Tool execution has completed |
@@ -123,12 +127,36 @@ console.log(turn.data); // { name: 'John', age: 30 }
123
127
  ### Multimodal Input
124
128
 
125
129
  ```typescript
126
- import { Image } from '@providerprotocol/ai';
130
+ import { Image, Document, Audio, Video } from '@providerprotocol/ai';
127
131
 
132
+ // Images
128
133
  const img = await Image.fromPath('./photo.png');
129
134
  const turn = await claude.generate([img, 'What is in this image?']);
135
+
136
+ // Documents (PDF, text)
137
+ const doc = await Document.fromPath('./report.pdf', 'Annual Report');
138
+ const docTurn = await claude.generate([doc.toBlock(), 'Summarize this document']);
139
+
140
+ // Audio (Google, OpenRouter)
141
+ const audio = await Audio.fromPath('./recording.mp3');
142
+ const audioTurn = await gemini.generate([audio.toBlock(), 'Transcribe this audio']);
143
+
144
+ // Video (Google, OpenRouter)
145
+ const video = await Video.fromPath('./clip.mp4');
146
+ const videoTurn = await gemini.generate([video.toBlock(), 'Describe this video']);
130
147
  ```
131
148
 
149
+ **Multimodal Support by Provider:**
150
+
151
+ | Provider | Image | Document | Audio | Video |
152
+ |----------|:-----:|:--------:|:-----:|:-----:|
153
+ | Anthropic | ✓ | PDF, Text | | |
154
+ | OpenAI | ✓ | PDF, Text | | |
155
+ | Google | ✓ | PDF, Text | ✓ | ✓ |
156
+ | OpenRouter | ✓ | PDF, Text | ✓ | ✓ |
157
+ | xAI | ✓ | | | |
158
+ | Groq | ✓ | | | |
159
+
132
160
  ## Anthropic Beta Features
133
161
 
134
162
  Anthropic provides beta features through the `betas` export. Enable them at the model level:
@@ -168,21 +196,164 @@ const thinker = llm({
168
196
  | `interleavedThinking` | Claude can think between tool calls |
169
197
  | `devFullThinking` | Developer mode for full thinking visibility |
170
198
  | `effort` | Control response thoroughness vs efficiency (Opus 4.5) |
199
+ | `computerUseLegacy` | Computer use for Claude 3.x models |
171
200
  | `computerUse` | Mouse, keyboard, screenshot control (Claude 4) |
201
+ | `computerUseOpus` | Computer use with extra commands (Opus 4.5) |
172
202
  | `codeExecution` | Python/Bash sandbox execution |
173
203
  | `tokenEfficientTools` | Up to 70% token reduction for tool calls |
174
204
  | `fineGrainedToolStreaming` | Stream tool args without buffering |
205
+ | `maxTokens35Sonnet` | 8,192 output tokens for Claude 3.5 Sonnet |
175
206
  | `output128k` | 128K token output length |
176
207
  | `context1m` | 1 million token context window (Sonnet 4) |
177
208
  | `promptCaching` | Reduced latency and costs via caching |
178
209
  | `extendedCacheTtl` | 1-hour cache TTL (vs 5-minute default) |
210
+ | `contextManagement` | Automatic tool call clearing for context |
211
+ | `modelContextWindowExceeded` | Handle exceeded context windows |
179
212
  | `advancedToolUse` | Tool Search, Programmatic Tool Calling |
180
213
  | `mcpClient` | Connect to remote MCP servers |
214
+ | `mcpClientLatest` | Updated MCP client |
181
215
  | `filesApi` | Upload and manage files |
182
216
  | `pdfs` | PDF document support |
217
+ | `tokenCounting` | Token counting endpoint |
183
218
  | `messageBatches` | Async batch processing at 50% cost |
184
219
  | `skills` | Agent Skills (PowerPoint, Excel, Word, PDF) |
185
220
 
221
+ ## Anthropic Built-in Tools
222
+
223
+ Use Anthropic's built-in tools directly with the `tools` export:
224
+
225
+ ```typescript
226
+ import { anthropic, betas, tools } from '@providerprotocol/ai/anthropic';
227
+ import { llm } from '@providerprotocol/ai';
228
+
229
+ // Web search with optional user location
230
+ const model = llm({
231
+ model: anthropic('claude-sonnet-4-20250514'),
232
+ params: {
233
+ tools: [tools.webSearch({ max_results: 5 })],
234
+ },
235
+ });
236
+
237
+ // Computer use (requires beta)
238
+ const computerModel = llm({
239
+ model: anthropic('claude-sonnet-4-20250514', {
240
+ betas: [betas.computerUse],
241
+ }),
242
+ params: {
243
+ tools: [tools.computer({ display_width: 1920, display_height: 1080, display_number: 1 })],
244
+ },
245
+ });
246
+
247
+ // Code execution (requires beta)
248
+ const codeModel = llm({
249
+ model: anthropic('claude-sonnet-4-20250514', {
250
+ betas: [betas.codeExecution],
251
+ }),
252
+ params: {
253
+ tools: [tools.codeExecution()],
254
+ },
255
+ });
256
+ ```
257
+
258
+ **Available Built-in Tools:**
259
+
260
+ | Tool | Description |
261
+ |------|-------------|
262
+ | `tools.webSearch()` | Search the web with optional max results and location |
263
+ | `tools.computer()` | Mouse, keyboard, and screenshot control |
264
+ | `tools.textEditor()` | Edit text files programmatically |
265
+ | `tools.bash()` | Execute bash commands |
266
+ | `tools.codeExecution()` | Run code in a sandboxed environment |
267
+ | `tools.toolSearch()` | Search through available tools |
268
+
269
+ ## Reasoning / Extended Thinking
270
+
271
+ Access model reasoning and extended thinking across providers with a unified API.
272
+
273
+ ### Anthropic
274
+
275
+ ```typescript
276
+ import { llm } from '@providerprotocol/ai';
277
+ import { anthropic } from '@providerprotocol/ai/anthropic';
278
+
279
+ const claude = llm({
280
+ model: anthropic('claude-sonnet-4-20250514'),
281
+ params: {
282
+ max_tokens: 16000,
283
+ thinking: {
284
+ type: 'enabled',
285
+ budget_tokens: 5000,
286
+ },
287
+ },
288
+ });
289
+
290
+ const turn = await claude.generate('Solve this complex problem...');
291
+ console.log(turn.response.reasoning); // Reasoning blocks
292
+ ```
293
+
294
+ ### OpenAI
295
+
296
+ ```typescript
297
+ import { llm } from '@providerprotocol/ai';
298
+ import { openai } from '@providerprotocol/ai/openai';
299
+
300
+ const gpt = llm({
301
+ model: openai('o3-mini'),
302
+ params: {
303
+ max_output_tokens: 4000,
304
+ reasoning: {
305
+ effort: 'medium',
306
+ summary: 'detailed',
307
+ },
308
+ },
309
+ });
310
+ ```
311
+
312
+ ### Google Gemini
313
+
314
+ ```typescript
315
+ import { llm } from '@providerprotocol/ai';
316
+ import { google } from '@providerprotocol/ai/google';
317
+
318
+ const gemini = llm({
319
+ model: google('gemini-2.5-flash'),
320
+ params: {
321
+ maxOutputTokens: 4000,
322
+ thinkingConfig: {
323
+ thinkingBudget: -1, // Dynamic
324
+ includeThoughts: true,
325
+ },
326
+ },
327
+ });
328
+ ```
329
+
330
+ ### Cerebras
331
+
332
+ ```typescript
333
+ import { llm } from '@providerprotocol/ai';
334
+ import { cerebras } from '@providerprotocol/ai/cerebras';
335
+
336
+ const model = llm({
337
+ model: cerebras('gpt-oss-120b'),
338
+ params: {
339
+ reasoning_effort: 'high',
340
+ reasoning_format: 'parsed',
341
+ },
342
+ });
343
+ ```
344
+
345
+ ### Streaming Reasoning
346
+
347
+ All providers emit `ReasoningDelta` events during streaming:
348
+
349
+ ```typescript
350
+ for await (const event of stream) {
351
+ if (event.type === 'reasoning_delta') {
352
+ process.stdout.write(event.delta.text);
353
+ }
354
+ }
355
+ ```
356
+
186
357
  ## Embeddings
187
358
 
188
359
  ```typescript
@@ -414,6 +585,163 @@ localStorage.setItem('conversation', JSON.stringify(json));
414
585
  const restored = Thread.fromJSON(JSON.parse(localStorage.getItem('conversation')));
415
586
  ```
416
587
 
588
+ ## Middleware
589
+
590
+ Compose request/response/stream transformations with the middleware system. Middleware is imported from dedicated entry points.
591
+
592
+ ### Parsed Object Middleware
593
+
594
+ Automatically parse streaming JSON from structured output and tool call events:
595
+
596
+ ```typescript
597
+ import { llm } from '@providerprotocol/ai';
598
+ import { anthropic } from '@providerprotocol/ai/anthropic';
599
+ import { parsedObjectMiddleware } from '@providerprotocol/ai/middleware/parsed-object';
600
+
601
+ const model = llm({
602
+ model: anthropic('claude-sonnet-4-20250514'),
603
+ structure: {
604
+ type: 'object',
605
+ properties: {
606
+ city: { type: 'string' },
607
+ country: { type: 'string' },
608
+ population: { type: 'number' },
609
+ },
610
+ required: ['city', 'country', 'population'],
611
+ },
612
+ middleware: [parsedObjectMiddleware()],
613
+ });
614
+
615
+ for await (const event of model.stream('What is the capital of France?')) {
616
+ if (event.type === 'object_delta') {
617
+ // Access incrementally parsed structured data
618
+ console.log(event.delta.parsed);
619
+ // { city: "Par" } -> { city: "Paris" } -> { city: "Paris", country: "Fr" } -> ...
620
+ }
621
+ }
622
+ ```
623
+
624
+ ### Logging Middleware
625
+
626
+ Add visibility into request lifecycle:
627
+
628
+ ```typescript
629
+ import { llm } from '@providerprotocol/ai';
630
+ import { anthropic } from '@providerprotocol/ai/anthropic';
631
+ import { loggingMiddleware } from '@providerprotocol/ai/middleware/logging';
632
+
633
+ const model = llm({
634
+ model: anthropic('claude-sonnet-4-20250514'),
635
+ middleware: [loggingMiddleware({ level: 'debug' })],
636
+ });
637
+
638
+ // Logs: [PP] [anthropic] Starting llm request (streaming)
639
+ // Logs: [PP] [anthropic] Completed in 1234ms
640
+ const result = await model.generate('Hello');
641
+ ```
642
+
643
+ ### Pub-Sub Middleware (Stream Resumption)
644
+
645
+ Enable reconnecting clients to catch up on missed events during active generation. The middleware buffers events and publishes them to subscribers.
646
+
647
+ ```typescript
648
+ import { llm } from '@providerprotocol/ai';
649
+ import { anthropic } from '@providerprotocol/ai/anthropic';
650
+ import { pubsubMiddleware, memoryAdapter } from '@providerprotocol/ai/middleware/pubsub';
651
+ import { webapi } from '@providerprotocol/ai/middleware/pubsub/server';
652
+
653
+ // Create a shared adapter instance
654
+ const adapter = memoryAdapter({ maxStreams: 1000 });
655
+
656
+ // Server route handling both new requests and reconnections
657
+ Bun.serve({
658
+ port: 3000,
659
+ async fetch(req) {
660
+ const { messages, streamId } = await req.json();
661
+ const exists = await adapter.exists(streamId);
662
+
663
+ if (!exists) {
664
+ // Start background generation (fire and forget)
665
+ const model = llm({
666
+ model: anthropic('claude-sonnet-4-20250514'),
667
+ middleware: [pubsubMiddleware({ adapter, streamId })],
668
+ });
669
+ consumeInBackground(model.stream(messages));
670
+ }
671
+
672
+ // Both new and reconnect: subscribe to events
673
+ return new Response(webapi.createSubscriberStream(streamId, adapter), {
674
+ headers: { 'Content-Type': 'text/event-stream' },
675
+ });
676
+ },
677
+ });
678
+ ```
679
+
680
+ **Framework Adapters:**
681
+
682
+ ```typescript
683
+ // Express
684
+ import { express } from '@providerprotocol/ai/middleware/pubsub/server';
685
+ app.post('/api/ai/reconnect', (req, res) => {
686
+ const { streamId } = req.body;
687
+ express.streamSubscriber(streamId, adapter, res);
688
+ });
689
+
690
+ // Fastify
691
+ import { fastify } from '@providerprotocol/ai/middleware/pubsub/server';
692
+ app.post('/api/ai/reconnect', (request, reply) => {
693
+ const { streamId } = request.body;
694
+ return fastify.streamSubscriber(streamId, adapter, reply);
695
+ });
696
+
697
+ // H3/Nuxt
698
+ import { h3 } from '@providerprotocol/ai/middleware/pubsub/server';
699
+ export default defineEventHandler(async (event) => {
700
+ const { streamId } = await readBody(event);
701
+ return h3.streamSubscriber(streamId, adapter, event);
702
+ });
703
+ ```
704
+
705
+ **Custom Adapters:**
706
+
707
+ Implement `PubSubAdapter` for custom backends (Redis, etc.):
708
+
709
+ ```typescript
710
+ import type { PubSubAdapter } from '@providerprotocol/ai/middleware/pubsub';
711
+
712
+ const redisAdapter: PubSubAdapter = {
713
+ async exists(streamId) { /* ... */ },
714
+ async create(streamId, metadata) { /* ... */ },
715
+ async append(streamId, event) { /* ... */ },
716
+ async markCompleted(streamId) { /* ... */ },
717
+ async isCompleted(streamId) { /* ... */ },
718
+ async getEvents(streamId) { /* ... */ },
719
+ async getStream(streamId) { /* ... */ },
720
+ subscribe(streamId, callback) { /* ... */ },
721
+ publish(streamId, event) { /* ... */ },
722
+ async remove(streamId) { /* ... */ },
723
+ async cleanup(maxAge) { /* ... */ },
724
+ };
725
+ ```
726
+
727
+ ### Combining Middleware
728
+
729
+ ```typescript
730
+ import { llm } from '@providerprotocol/ai';
731
+ import { anthropic } from '@providerprotocol/ai/anthropic';
732
+ import { loggingMiddleware } from '@providerprotocol/ai/middleware/logging';
733
+ import { parsedObjectMiddleware } from '@providerprotocol/ai/middleware/parsed-object';
734
+
735
+ const model = llm({
736
+ model: anthropic('claude-sonnet-4-20250514'),
737
+ structure: mySchema,
738
+ middleware: [
739
+ loggingMiddleware({ level: 'info' }),
740
+ parsedObjectMiddleware(),
741
+ ],
742
+ });
743
+ ```
744
+
417
745
  ## Error Handling
418
746
 
419
747
  All errors are normalized to `UPPError` with consistent error codes:
@@ -561,6 +889,74 @@ export default defineEventHandler(async (event) => {
561
889
  - Request/response logging, content filtering
562
890
  - Double-layer retry: client retries to proxy, server retries to AI provider
563
891
 
892
+ ## OpenAI API Modes
893
+
894
+ OpenAI supports two API endpoints. The Responses API is the default and recommended approach:
895
+
896
+ ```typescript
897
+ import { openai } from '@providerprotocol/ai/openai';
898
+
899
+ // Responses API (default, recommended)
900
+ openai('gpt-4o')
901
+
902
+ // Chat Completions API (legacy)
903
+ openai('gpt-4o', { api: 'completions' })
904
+ ```
905
+
906
+ The Responses API supports built-in tools and stateful conversations. Use completions for backward compatibility.
907
+
908
+ ## OpenAI Built-in Tools
909
+
910
+ With the Responses API, use OpenAI's built-in tools directly:
911
+
912
+ ```typescript
913
+ import { llm } from '@providerprotocol/ai';
914
+ import { openai, tools } from '@providerprotocol/ai/openai';
915
+
916
+ // Web search
917
+ const model = llm({
918
+ model: openai('gpt-4o'),
919
+ params: {
920
+ tools: [tools.webSearch()],
921
+ },
922
+ });
923
+
924
+ // File search with vector stores
925
+ const researchModel = llm({
926
+ model: openai('gpt-4o'),
927
+ params: {
928
+ tools: [tools.fileSearch({ vector_store_ids: ['vs_abc123'] })],
929
+ },
930
+ });
931
+
932
+ // Code interpreter
933
+ const codeModel = llm({
934
+ model: openai('gpt-4o'),
935
+ params: {
936
+ tools: [tools.codeInterpreter()],
937
+ },
938
+ });
939
+
940
+ // Image generation
941
+ const creativeModel = llm({
942
+ model: openai('gpt-4o'),
943
+ params: {
944
+ tools: [tools.imageGeneration()],
945
+ },
946
+ });
947
+ ```
948
+
949
+ **Available Built-in Tools:**
950
+
951
+ | Tool | Description |
952
+ |------|-------------|
953
+ | `tools.webSearch()` | Search the web with optional user location |
954
+ | `tools.fileSearch()` | Search uploaded files in vector stores |
955
+ | `tools.codeInterpreter()` | Execute code in a sandboxed environment |
956
+ | `tools.computer()` | Computer use with display configuration |
957
+ | `tools.imageGeneration()` | Generate images via DALL-E |
958
+ | `tools.mcp()` | Connect to MCP servers |
959
+
564
960
  ## xAI API Modes
565
961
 
566
962
  xAI supports multiple API compatibility modes:
@@ -578,6 +974,119 @@ xai('grok-3-fast', { api: 'responses' })
578
974
  xai('grok-3-fast', { api: 'messages' })
579
975
  ```
580
976
 
977
+ ## Groq
978
+
979
+ Fast inference with Llama, Gemma, and Mixtral models:
980
+
981
+ ```typescript
982
+ import { llm } from '@providerprotocol/ai';
983
+ import { groq } from '@providerprotocol/ai/groq';
984
+
985
+ const model = llm({
986
+ model: groq('llama-3.3-70b-versatile'),
987
+ params: { max_tokens: 1000 },
988
+ });
989
+
990
+ const turn = await model.generate('Hello!');
991
+ ```
992
+
993
+ **With web search:**
994
+
995
+ ```typescript
996
+ const searchModel = llm({
997
+ model: groq('llama-3.3-70b-versatile'),
998
+ params: {
999
+ search_settings: { mode: 'auto' },
1000
+ },
1001
+ });
1002
+ ```
1003
+
1004
+ **With RAG documents:**
1005
+
1006
+ ```typescript
1007
+ const ragModel = llm({
1008
+ model: groq('llama-3.3-70b-versatile'),
1009
+ params: {
1010
+ documents: [
1011
+ { title: 'Doc 1', content: 'Document content here...' },
1012
+ { title: 'Doc 2', content: 'More content...' },
1013
+ ],
1014
+ citation_options: { include: true },
1015
+ },
1016
+ });
1017
+ ```
1018
+
1019
+ **Capabilities:** Streaming, tool calling, structured output, image input (Llama 4 preview), web search, RAG with citations.
1020
+
1021
+ **Environment:** `GROQ_API_KEY`
1022
+
1023
+ ## Cerebras
1024
+
1025
+ Ultra-fast inference with Llama, Qwen, and GPT-OSS models:
1026
+
1027
+ ```typescript
1028
+ import { llm } from '@providerprotocol/ai';
1029
+ import { cerebras } from '@providerprotocol/ai/cerebras';
1030
+
1031
+ const model = llm({
1032
+ model: cerebras('llama-3.3-70b'),
1033
+ params: { max_completion_tokens: 1000 },
1034
+ });
1035
+
1036
+ const turn = await model.generate('Hello!');
1037
+ ```
1038
+
1039
+ **With reasoning (GPT-OSS):**
1040
+
1041
+ ```typescript
1042
+ const model = llm({
1043
+ model: cerebras('gpt-oss-120b'),
1044
+ params: {
1045
+ reasoning_effort: 'high',
1046
+ reasoning_format: 'parsed',
1047
+ },
1048
+ });
1049
+ ```
1050
+
1051
+ **Capabilities:** Streaming, tool calling, structured output, reasoning parameters.
1052
+
1053
+ **Environment:** `CEREBRAS_API_KEY`
1054
+
1055
+ ## OpenResponses Provider
1056
+
1057
+ Connect to any server implementing the [OpenResponses specification](https://www.openresponses.org):
1058
+
1059
+ ```typescript
1060
+ import { llm } from '@providerprotocol/ai';
1061
+ import { responses } from '@providerprotocol/ai/responses';
1062
+
1063
+ // Using with OpenAI
1064
+ const model = llm({
1065
+ model: responses('gpt-5.2', {
1066
+ host: 'https://api.openai.com/v1',
1067
+ apiKeyEnv: 'OPENAI_API_KEY',
1068
+ }),
1069
+ params: { max_output_tokens: 1000 },
1070
+ });
1071
+
1072
+ // Using with OpenRouter
1073
+ const routerModel = llm({
1074
+ model: responses('openai/gpt-4o', {
1075
+ host: 'https://openrouter.ai/api/v1',
1076
+ apiKeyEnv: 'OPENROUTER_API_KEY',
1077
+ }),
1078
+ });
1079
+
1080
+ // Using with self-hosted server
1081
+ const localModel = llm({
1082
+ model: responses('llama-3.3-70b', {
1083
+ host: 'http://localhost:8080/v1',
1084
+ }),
1085
+ });
1086
+ ```
1087
+
1088
+ **Capabilities:** Full multimodal support, streaming, tool calling, structured output, reasoning summaries.
1089
+
581
1090
  ## Alternative Import Style
582
1091
 
583
1092
  Use the `ai` namespace for a grouped import style:
@@ -607,6 +1116,14 @@ import type {
607
1116
  StreamEvent,
608
1117
  StreamResult,
609
1118
 
1119
+ // Content blocks
1120
+ TextBlock,
1121
+ ImageBlock,
1122
+ ReasoningBlock,
1123
+ DocumentBlock,
1124
+ AudioBlock,
1125
+ VideoBlock,
1126
+
610
1127
  // Modality results
611
1128
  EmbeddingResult,
612
1129
  ImageResult,
@@ -620,9 +1137,31 @@ import type {
620
1137
  KeyStrategy,
621
1138
  RetryStrategy,
622
1139
  LLMCapabilities,
1140
+
1141
+ // Middleware
1142
+ Middleware,
1143
+ MiddlewareContext,
1144
+ StreamContext,
623
1145
  } from '@providerprotocol/ai';
624
1146
  ```
625
1147
 
1148
+ **Type-Safe Enums:**
1149
+
1150
+ ```typescript
1151
+ import {
1152
+ StreamEventType,
1153
+ ErrorCode,
1154
+ ContentBlockType,
1155
+ MessageRole,
1156
+ ModalityType,
1157
+ } from '@providerprotocol/ai';
1158
+
1159
+ // Use instead of magic strings
1160
+ if (event.type === StreamEventType.TextDelta) { ... }
1161
+ if (error.code === ErrorCode.RateLimited) { ... }
1162
+ if (block.type === ContentBlockType.Text) { ... }
1163
+ ```
1164
+
626
1165
  ### Custom Providers
627
1166
 
628
1167
  Build custom providers with `createProvider`:
@@ -1,4 +1,5 @@
1
- import { P as Provider } from '../llm-BQJZj3cD.js';
1
+ import { d as Provider } from '../llm-DwbUK7un.js';
2
+ import '../stream-CecfVCPO.js';
2
3
 
3
4
  /**
4
5
  * @fileoverview Anthropic API type definitions.