@yourgpt/llm-sdk 2.5.0 → 2.5.1-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/README.md +19 -1
  2. package/dist/adapters/index.d.mts +4 -4
  3. package/dist/adapters/index.d.ts +4 -4
  4. package/dist/adapters/index.js +293 -23
  5. package/dist/adapters/index.mjs +293 -23
  6. package/dist/base-BYQKp9TW.d.mts +263 -0
  7. package/dist/base-Cxq3ni0t.d.ts +263 -0
  8. package/dist/fallback/index.d.mts +4 -4
  9. package/dist/fallback/index.d.ts +4 -4
  10. package/dist/index.d.mts +61 -8
  11. package/dist/index.d.ts +61 -8
  12. package/dist/index.js +71 -0
  13. package/dist/index.mjs +71 -0
  14. package/dist/providers/anthropic/index.d.mts +3 -3
  15. package/dist/providers/anthropic/index.d.ts +3 -3
  16. package/dist/providers/anthropic/index.js +360 -203
  17. package/dist/providers/anthropic/index.mjs +360 -203
  18. package/dist/providers/azure/index.d.mts +3 -3
  19. package/dist/providers/azure/index.d.ts +3 -3
  20. package/dist/providers/azure/index.js +49 -1
  21. package/dist/providers/azure/index.mjs +49 -1
  22. package/dist/providers/fireworks/index.d.mts +1 -1
  23. package/dist/providers/fireworks/index.d.ts +1 -1
  24. package/dist/providers/fireworks/index.js +56 -0
  25. package/dist/providers/fireworks/index.mjs +56 -0
  26. package/dist/providers/google/index.d.mts +3 -3
  27. package/dist/providers/google/index.d.ts +3 -3
  28. package/dist/providers/google/index.js +303 -207
  29. package/dist/providers/google/index.mjs +303 -207
  30. package/dist/providers/ollama/index.d.mts +4 -4
  31. package/dist/providers/ollama/index.d.ts +4 -4
  32. package/dist/providers/ollama/index.js +10 -2
  33. package/dist/providers/ollama/index.mjs +10 -2
  34. package/dist/providers/openai/index.d.mts +3 -3
  35. package/dist/providers/openai/index.d.ts +3 -3
  36. package/dist/providers/openai/index.js +318 -216
  37. package/dist/providers/openai/index.mjs +318 -216
  38. package/dist/providers/openrouter/index.d.mts +3 -3
  39. package/dist/providers/openrouter/index.d.ts +3 -3
  40. package/dist/providers/openrouter/index.js +308 -206
  41. package/dist/providers/openrouter/index.mjs +308 -206
  42. package/dist/providers/togetherai/index.d.mts +3 -3
  43. package/dist/providers/togetherai/index.d.ts +3 -3
  44. package/dist/providers/togetherai/index.js +308 -206
  45. package/dist/providers/togetherai/index.mjs +308 -206
  46. package/dist/providers/xai/index.d.mts +3 -3
  47. package/dist/providers/xai/index.d.ts +3 -3
  48. package/dist/providers/xai/index.js +307 -210
  49. package/dist/providers/xai/index.mjs +307 -210
  50. package/dist/{types-BctsnC3g.d.ts → types-BvkiJ1dd.d.mts} +2 -1
  51. package/dist/{types-38yolWJn.d.ts → types-ChORafYS.d.ts} +1 -1
  52. package/dist/types-D774b0dg.d.mts +1018 -0
  53. package/dist/types-D774b0dg.d.ts +1018 -0
  54. package/dist/{types-DRqxMIjF.d.mts → types-TMilS-Dz.d.ts} +2 -1
  55. package/dist/{types-D4YfrQJR.d.mts → types-mwMhCwOq.d.mts} +1 -1
  56. package/dist/yourgpt/index.d.mts +1 -1
  57. package/dist/yourgpt/index.d.ts +1 -1
  58. package/package.json +1 -1
  59. package/dist/base-D-U61JaB.d.mts +0 -788
  60. package/dist/base-iGi9Va6Z.d.ts +0 -788
  61. package/dist/types-CR8mi9I0.d.mts +0 -417
  62. package/dist/types-CR8mi9I0.d.ts +0 -417
package/README.md CHANGED
@@ -114,6 +114,24 @@ const runtime = createRuntime({
114
114
 
115
115
  When `search.enabled` is on, deferred tools can be discovered through a hidden `search_tools` server tool. Matching tools are loaded into the next loop iteration instead of sending every deferred tool definition up front.
116
116
 
117
+ ## Structured output, MCP, and reasoning effort
118
+
119
+ Pass `responseFormat`, `mcpServers`, and `reasoningEffort` on any `generateText()` / `streamText()` / `runtime.chat()` / `runtime.response()` call:
120
+
121
+ ```ts
122
+ const result = await runtime.response({
123
+ prompt: "Extract FAQs from this conversation.",
124
+ mcpServers: [{ label: "kb", url: "https://kb.example.com/sse" }],
125
+ reasoningEffort: "high",
126
+ responseFormat: {
127
+ type: "json_schema",
128
+ json_schema: { name, schema, strict: true },
129
+ },
130
+ });
131
+ ```
132
+
133
+ OpenAI routes through `/v1/responses` automatically when MCP or reasoning is set; Anthropic uses the `mcp-client-2025-11-20` beta and adaptive thinking on Claude 4.6/4.7. See the [Structured Output guide](https://copilot-sdk.yourgpt.ai/docs/llm-sdk/structured-output) for the full per-provider mapping.
134
+
117
135
  ## Documentation
118
136
 
119
137
  Visit **[copilot-sdk.yourgpt.ai](https://copilot-sdk.yourgpt.ai)** for full documentation:
@@ -121,7 +139,7 @@ Visit **[copilot-sdk.yourgpt.ai](https://copilot-sdk.yourgpt.ai)** for full docu
121
139
  - [All Providers](https://copilot-sdk.yourgpt.ai/docs/providers) - OpenAI, Anthropic, Google, xAI
122
140
  - [Server Setup](https://copilot-sdk.yourgpt.ai/docs/server) - Runtime, streaming, tools
123
141
  - [Tools](https://copilot-sdk.yourgpt.ai/docs/tools) - Server-side and client-side tools
124
- - [LLM SDK Reference](https://copilot-sdk.yourgpt.ai/docs/llm-sdk) - streamText, generateText
142
+ - [LLM SDK Reference](https://copilot-sdk.yourgpt.ai/docs/llm-sdk) - streamText, generateText, runtime.response()
125
143
 
126
144
  ## License
127
145
 
@@ -1,7 +1,7 @@
1
- import { L as LLMAdapter, W as WebSearchConfig, C as ChatCompletionRequest, S as StreamEvent, o as CompletionResult } from '../base-D-U61JaB.mjs';
2
- export { e as AdapterFactory, B as AnthropicContentBlock, E as OpenAIContentBlock, y as attachmentToAnthropicDocument, x as attachmentToAnthropicImage, z as attachmentToOpenAIImage, p as formatMessages, r as formatMessagesForAnthropic, s as formatMessagesForOpenAI, q as formatTools, v as hasImageAttachments, w as hasMediaAttachments, t as messageToAnthropicContent, u as messageToOpenAIContent } from '../base-D-U61JaB.mjs';
3
- import { d as OllamaModelOptions } from '../types-DRqxMIjF.mjs';
4
- import '../types-CR8mi9I0.mjs';
1
+ import { L as LLMAdapter, C as ChatCompletionRequest, a as CompletionResult } from '../base-BYQKp9TW.mjs';
2
+ export { A as AdapterFactory, l as AnthropicContentBlock, O as OpenAIContentBlock, j as attachmentToAnthropicDocument, i as attachmentToAnthropicImage, k as attachmentToOpenAIImage, f as formatMessages, c as formatMessagesForAnthropic, d as formatMessagesForOpenAI, b as formatTools, h as hasImageAttachments, g as hasMediaAttachments, m as messageToAnthropicContent, e as messageToOpenAIContent } from '../base-BYQKp9TW.mjs';
3
+ import { W as WebSearchConfig, h as StreamEvent } from '../types-D774b0dg.mjs';
4
+ import { d as OllamaModelOptions } from '../types-BvkiJ1dd.mjs';
5
5
  import 'zod';
6
6
 
7
7
  /**
@@ -1,7 +1,7 @@
1
- import { L as LLMAdapter, W as WebSearchConfig, C as ChatCompletionRequest, S as StreamEvent, o as CompletionResult } from '../base-iGi9Va6Z.js';
2
- export { e as AdapterFactory, B as AnthropicContentBlock, E as OpenAIContentBlock, y as attachmentToAnthropicDocument, x as attachmentToAnthropicImage, z as attachmentToOpenAIImage, p as formatMessages, r as formatMessagesForAnthropic, s as formatMessagesForOpenAI, q as formatTools, v as hasImageAttachments, w as hasMediaAttachments, t as messageToAnthropicContent, u as messageToOpenAIContent } from '../base-iGi9Va6Z.js';
3
- import { d as OllamaModelOptions } from '../types-BctsnC3g.js';
4
- import '../types-CR8mi9I0.js';
1
+ import { L as LLMAdapter, C as ChatCompletionRequest, a as CompletionResult } from '../base-Cxq3ni0t.js';
2
+ export { A as AdapterFactory, l as AnthropicContentBlock, O as OpenAIContentBlock, j as attachmentToAnthropicDocument, i as attachmentToAnthropicImage, k as attachmentToOpenAIImage, f as formatMessages, c as formatMessagesForAnthropic, d as formatMessagesForOpenAI, b as formatTools, h as hasImageAttachments, g as hasMediaAttachments, m as messageToAnthropicContent, e as messageToOpenAIContent } from '../base-Cxq3ni0t.js';
3
+ import { W as WebSearchConfig, h as StreamEvent } from '../types-D774b0dg.js';
4
+ import { d as OllamaModelOptions } from '../types-TMilS-Dz.js';
5
5
  import 'zod';
6
6
 
7
7
  /**
@@ -114,6 +114,190 @@ function normalizeObjectJsonSchema(schema) {
114
114
  }
115
115
  return normalized;
116
116
  }
117
+ function isOpenAIReasoningModel(modelId) {
118
+ if (!modelId) return false;
119
+ return /^(o1|o3|o4|gpt-5)/i.test(modelId);
120
+ }
121
+ function buildOpenAITokenParams(modelId, maxTokens, temperature) {
122
+ if (isOpenAIReasoningModel(modelId)) {
123
+ return { max_completion_tokens: maxTokens };
124
+ }
125
+ return { max_tokens: maxTokens, temperature };
126
+ }
127
+ function stripSchemaKeys(schema, keysToDrop, options = {}) {
128
+ if (Array.isArray(schema)) {
129
+ return schema.map((item) => stripSchemaKeys(item, keysToDrop, options));
130
+ }
131
+ if (!schema || typeof schema !== "object") return schema;
132
+ const out = {};
133
+ for (const [key, value] of Object.entries(
134
+ schema
135
+ )) {
136
+ if (keysToDrop.has(key)) continue;
137
+ const renamed = options.renameKeys?.[key] ?? key;
138
+ out[renamed] = stripSchemaKeys(value, keysToDrop, options);
139
+ }
140
+ if (options.forceAdditionalPropertiesFalse && out.type === "object") {
141
+ out.additionalProperties = false;
142
+ }
143
+ return out;
144
+ }
145
+ function toOpenAIResponseFormat(rf) {
146
+ if (!rf) return void 0;
147
+ if (rf.type === "json_object") return { type: "json_object" };
148
+ return {
149
+ type: "json_schema",
150
+ json_schema: {
151
+ name: rf.json_schema.name,
152
+ schema: normalizeObjectJsonSchema(rf.json_schema.schema),
153
+ strict: rf.json_schema.strict ?? true
154
+ }
155
+ };
156
+ }
157
+ function toOpenAIResponsesTextFormat(rf) {
158
+ if (!rf || rf.type !== "json_schema") return void 0;
159
+ return {
160
+ type: "json_schema",
161
+ name: rf.json_schema.name,
162
+ schema: normalizeObjectJsonSchema(rf.json_schema.schema),
163
+ strict: rf.json_schema.strict ?? true
164
+ };
165
+ }
166
+ var ANTHROPIC_UNSUPPORTED_KEYS = /* @__PURE__ */ new Set([
167
+ "minimum",
168
+ "maximum",
169
+ "exclusiveMinimum",
170
+ "exclusiveMaximum",
171
+ "multipleOf",
172
+ "minLength",
173
+ "maxLength",
174
+ "minItems",
175
+ "maxItems",
176
+ "minProperties",
177
+ "maxProperties",
178
+ "pattern",
179
+ "$schema"
180
+ ]);
181
+ function toAnthropicOutputConfig(rf) {
182
+ if (!rf || rf.type !== "json_schema") return void 0;
183
+ const schema = stripSchemaKeys(
184
+ rf.json_schema.schema,
185
+ ANTHROPIC_UNSUPPORTED_KEYS,
186
+ {
187
+ forceAdditionalPropertiesFalse: true,
188
+ renameKeys: { oneOf: "anyOf" }
189
+ }
190
+ );
191
+ return {
192
+ format: {
193
+ type: "json_schema",
194
+ schema
195
+ }
196
+ };
197
+ }
198
+ var GEMINI_UNSUPPORTED_KEYS = /* @__PURE__ */ new Set([
199
+ "oneOf",
200
+ "anyOf",
201
+ "$ref",
202
+ "$defs",
203
+ "definitions",
204
+ "pattern",
205
+ "$schema",
206
+ "additionalProperties"
207
+ ]);
208
+ function toGeminiSchema(rf) {
209
+ if (!rf || rf.type !== "json_schema") return void 0;
210
+ return stripSchemaKeys(
211
+ rf.json_schema.schema,
212
+ GEMINI_UNSUPPORTED_KEYS
213
+ );
214
+ }
215
+ function toOllamaFormat(rf) {
216
+ if (!rf) return void 0;
217
+ if (rf.type === "json_object") return "json";
218
+ return rf.json_schema.schema;
219
+ }
220
+ function toOpenAIResponsesMcpTools(mcpServers) {
221
+ if (!mcpServers || mcpServers.length === 0) return [];
222
+ return mcpServers.map((mcp) => ({
223
+ type: "mcp",
224
+ server_label: mcp.label,
225
+ server_url: mcp.url,
226
+ ...mcp.headers ? { headers: mcp.headers } : {},
227
+ ...mcp.allowedTools ? { allowed_tools: mcp.allowedTools } : {},
228
+ require_approval: mcp.requireApproval ?? "never"
229
+ }));
230
+ }
231
+ function toAnthropicMcp(mcpServers) {
232
+ if (!mcpServers || mcpServers.length === 0) {
233
+ return { mcpServers: [], tools: [], betas: [] };
234
+ }
235
+ const serverEntries = [];
236
+ const toolEntries = [];
237
+ for (const mcp of mcpServers) {
238
+ const authHeader = mcp.headers?.Authorization ?? mcp.headers?.authorization;
239
+ const token = authHeader?.replace(/^Bearer\s+/i, "");
240
+ serverEntries.push({
241
+ type: "url",
242
+ url: mcp.url,
243
+ name: mcp.label,
244
+ ...token ? { authorization_token: token } : {}
245
+ });
246
+ if (mcp.allowedTools && mcp.allowedTools.length > 0) {
247
+ toolEntries.push({
248
+ type: "mcp_toolset",
249
+ mcp_server_name: mcp.label,
250
+ configs: Object.fromEntries(
251
+ mcp.allowedTools.map((toolName) => [toolName, {}])
252
+ )
253
+ });
254
+ }
255
+ }
256
+ return {
257
+ mcpServers: serverEntries,
258
+ tools: toolEntries,
259
+ betas: ["mcp-client-2025-11-20"]
260
+ };
261
+ }
262
+ function isStringEffort(effort) {
263
+ return typeof effort === "string" && (effort === "minimal" || effort === "low" || effort === "medium" || effort === "high");
264
+ }
265
+ function toOpenAIReasoning(effort) {
266
+ if (!effort) return void 0;
267
+ if (typeof effort === "object" && "raw" in effort) return effort.raw;
268
+ if (typeof effort === "object" && "budgetTokens" in effort) {
269
+ const budget = effort.budgetTokens;
270
+ const mapped = budget >= 16e3 ? "high" : budget >= 8e3 ? "medium" : "low";
271
+ return { effort: mapped, summary: "auto" };
272
+ }
273
+ if (isStringEffort(effort)) {
274
+ return { effort, summary: "auto" };
275
+ }
276
+ return void 0;
277
+ }
278
+ var ANTHROPIC_ADAPTIVE_MODELS = /(claude-opus-4-7|claude-opus-4-6|claude-sonnet-4-6)/i;
279
+ function toAnthropicThinking(effort, modelId) {
280
+ if (!effort) return {};
281
+ if (typeof effort === "object" && "raw" in effort) {
282
+ return { thinking: effort.raw };
283
+ }
284
+ const isAdaptive = !!modelId && ANTHROPIC_ADAPTIVE_MODELS.test(modelId);
285
+ if (typeof effort === "object" && "budgetTokens" in effort) {
286
+ return {
287
+ thinking: { type: "enabled", budget_tokens: effort.budgetTokens }
288
+ };
289
+ }
290
+ if (!isStringEffort(effort)) return {};
291
+ if (isAdaptive) {
292
+ const mapped = effort === "minimal" ? "low" : effort;
293
+ return {
294
+ thinking: { type: "adaptive" },
295
+ outputConfigEffort: mapped
296
+ };
297
+ }
298
+ const budget = effort === "high" ? 16e3 : effort === "medium" ? 8e3 : effort === "low" ? 4e3 : 2048;
299
+ return { thinking: { type: "enabled", budget_tokens: budget } };
300
+ }
117
301
  function formatTools(actions) {
118
302
  return actions.map((action) => ({
119
303
  type: "function",
@@ -395,6 +579,14 @@ var OpenAIAdapter = class _OpenAIAdapter {
395
579
  return this.client;
396
580
  }
397
581
  shouldUseResponsesApi(request) {
582
+ if (request.config?.mcpServers && request.config.mcpServers.length > 0 || request.config?.reasoningEffort !== void 0) {
583
+ if (this.provider !== "openai" && this.provider !== "azure") {
584
+ throw new Error(
585
+ `[llm-sdk] Provider "${this.provider}" does not support MCP servers or per-request reasoning effort. Use OpenAI or Anthropic for these features.`
586
+ );
587
+ }
588
+ return true;
589
+ }
398
590
  return request.providerToolOptions?.openai?.nativeToolSearch?.enabled === true && request.providerToolOptions.openai.nativeToolSearch.useResponsesApi !== false && Array.isArray(request.toolDefinitions) && request.toolDefinitions.length > 0;
399
591
  }
400
592
  buildResponsesInput(request) {
@@ -455,7 +647,7 @@ var OpenAIAdapter = class _OpenAIAdapter {
455
647
  strict: true,
456
648
  defer_loading: tool.deferLoading === true
457
649
  }));
458
- return [{ type: "tool_search" }, ...nativeTools];
650
+ return nativeTools.length > 0 ? [{ type: "tool_search" }, ...nativeTools] : [];
459
651
  }
460
652
  parseResponsesResult(response) {
461
653
  const content = typeof response?.output_text === "string" ? response.output_text : "";
@@ -484,15 +676,33 @@ var OpenAIAdapter = class _OpenAIAdapter {
484
676
  async completeWithResponses(request) {
485
677
  const client = await this.getClient();
486
678
  const openaiToolOptions = request.providerToolOptions?.openai;
679
+ const responsesTextFormat = toOpenAIResponsesTextFormat(
680
+ request.config?.responseFormat
681
+ );
682
+ const mcpTools = toOpenAIResponsesMcpTools(request.config?.mcpServers);
683
+ const modelId = request.config?.model || this.model;
684
+ const reasoning = isOpenAIReasoningModel(modelId) ? toOpenAIReasoning(request.config?.reasoningEffort) : void 0;
685
+ if (request.config?.reasoningEffort && !isOpenAIReasoningModel(modelId)) {
686
+ console.warn(
687
+ `[llm-sdk] openai/${modelId} is not a reasoning model; \`reasoningEffort\` is ignored. Use o1/o3/o4/gpt-5.x for reasoning.`
688
+ );
689
+ }
690
+ const functionTools = this.buildResponsesTools(
691
+ request.toolDefinitions ?? []
692
+ );
693
+ const tools = [...functionTools, ...mcpTools];
487
694
  const payload = {
488
695
  model: request.config?.model || this.model,
489
696
  instructions: request.systemPrompt,
490
697
  input: this.buildResponsesInput(request),
491
- tools: this.buildResponsesTools(request.toolDefinitions ?? []),
698
+ tools: tools.length > 0 ? tools : void 0,
492
699
  tool_choice: openaiToolOptions?.toolChoice === "required" ? "required" : openaiToolOptions?.toolChoice === "auto" ? "auto" : void 0,
493
700
  parallel_tool_calls: openaiToolOptions?.parallelToolCalls,
494
701
  temperature: request.config?.temperature ?? this.config.temperature,
495
702
  max_output_tokens: request.config?.maxTokens ?? this.config.maxTokens,
703
+ ...responsesTextFormat ? { text: { format: responsesTextFormat } } : {},
704
+ ...reasoning ? { reasoning } : {},
705
+ store: false,
496
706
  stream: false
497
707
  };
498
708
  logProviderPayload("openai", "request payload", payload, request.debug);
@@ -614,14 +824,19 @@ var OpenAIAdapter = class _OpenAIAdapter {
614
824
  name: openaiToolOptions.toolChoice.name
615
825
  }
616
826
  } : openaiToolOptions?.toolChoice;
827
+ const modelIdForPayload = request.config?.model || this.model;
617
828
  const payload = {
618
- model: request.config?.model || this.model,
829
+ model: modelIdForPayload,
619
830
  messages,
620
831
  tools: tools.length > 0 ? tools : void 0,
621
832
  tool_choice: tools.length > 0 ? toolChoice : void 0,
622
833
  parallel_tool_calls: tools.length > 0 ? openaiToolOptions?.parallelToolCalls : void 0,
623
- temperature: request.config?.temperature ?? this.config.temperature,
624
- max_tokens: request.config?.maxTokens ?? this.config.maxTokens,
834
+ ...buildOpenAITokenParams(
835
+ modelIdForPayload,
836
+ request.config?.maxTokens ?? this.config.maxTokens,
837
+ request.config?.temperature ?? this.config.temperature
838
+ ),
839
+ response_format: toOpenAIResponseFormat(request.config?.responseFormat),
625
840
  stream: true,
626
841
  stream_options: { include_usage: true }
627
842
  };
@@ -763,14 +978,19 @@ var OpenAIAdapter = class _OpenAIAdapter {
763
978
  name: openaiToolOptions.toolChoice.name
764
979
  }
765
980
  } : openaiToolOptions?.toolChoice;
981
+ const modelIdForCompletePayload = request.config?.model || this.model;
766
982
  const payload = {
767
- model: request.config?.model || this.model,
983
+ model: modelIdForCompletePayload,
768
984
  messages,
769
985
  tools: tools.length > 0 ? tools : void 0,
770
986
  tool_choice: tools.length > 0 ? toolChoice : void 0,
771
987
  parallel_tool_calls: tools.length > 0 ? openaiToolOptions?.parallelToolCalls : void 0,
772
- temperature: request.config?.temperature ?? this.config.temperature,
773
- max_tokens: request.config?.maxTokens ?? this.config.maxTokens,
988
+ ...buildOpenAITokenParams(
989
+ modelIdForCompletePayload,
990
+ request.config?.maxTokens ?? this.config.maxTokens,
991
+ request.config?.temperature ?? this.config.temperature
992
+ ),
993
+ response_format: toOpenAIResponseFormat(request.config?.responseFormat),
774
994
  stream: false
775
995
  };
776
996
  logProviderPayload("openai", "request payload", payload, request.debug);
@@ -1046,7 +1266,9 @@ var AnthropicAdapter = class {
1046
1266
  * Build common request options for both streaming and non-streaming
1047
1267
  */
1048
1268
  buildRequestOptions(request) {
1049
- const systemMessage = request.systemPrompt || "";
1269
+ const responseFormat = request.config?.responseFormat;
1270
+ const jsonObjectSuffix = responseFormat?.type === "json_object" ? "\n\nRespond with a single JSON object and no other text." : "";
1271
+ const systemMessage = (request.systemPrompt || "") + jsonObjectSuffix;
1050
1272
  let messages;
1051
1273
  if (request.rawMessages && request.rawMessages.length > 0) {
1052
1274
  messages = this.convertToAnthropicMessages(request.rawMessages);
@@ -1127,32 +1349,58 @@ var AnthropicAdapter = class {
1127
1349
  if (serverToolConfiguration) {
1128
1350
  options.server_tool_configuration = serverToolConfiguration;
1129
1351
  }
1130
- if (this.config.thinking?.type === "enabled") {
1352
+ const modelForThinking = request.config?.model || this.model;
1353
+ const thinkingTranslation = toAnthropicThinking(
1354
+ request.config?.reasoningEffort,
1355
+ modelForThinking
1356
+ );
1357
+ const outputConfig = toAnthropicOutputConfig(responseFormat);
1358
+ if (outputConfig || thinkingTranslation.outputConfigEffort) {
1359
+ options.output_config = {
1360
+ ...outputConfig ?? {},
1361
+ ...thinkingTranslation.outputConfigEffort ? { effort: thinkingTranslation.outputConfigEffort } : {}
1362
+ };
1363
+ }
1364
+ if (thinkingTranslation.thinking) {
1365
+ options.thinking = thinkingTranslation.thinking;
1366
+ } else if (this.config.thinking?.type === "enabled") {
1131
1367
  options.thinking = {
1132
1368
  type: "enabled",
1133
1369
  budget_tokens: this.config.thinking.budgetTokens || 1e4
1134
1370
  };
1135
1371
  }
1136
- return { options, messages };
1372
+ const mcp = toAnthropicMcp(request.config?.mcpServers);
1373
+ const betas = [];
1374
+ if (mcp.mcpServers.length > 0) {
1375
+ options.mcp_servers = mcp.mcpServers;
1376
+ betas.push(...mcp.betas);
1377
+ if (mcp.tools.length > 0) {
1378
+ const existingTools = Array.isArray(options.tools) ? options.tools : [];
1379
+ options.tools = [...existingTools, ...mcp.tools];
1380
+ }
1381
+ }
1382
+ return { options, messages, betas };
1137
1383
  }
1138
1384
  /**
1139
1385
  * Non-streaming completion (for debugging/comparison with original studio-ai)
1140
1386
  */
1141
1387
  async complete(request) {
1142
1388
  const client = await this.getClient();
1143
- const { options } = this.buildRequestOptions(request);
1389
+ const { options, betas } = this.buildRequestOptions(request);
1144
1390
  const nonStreamingOptions = {
1145
1391
  ...options,
1146
1392
  stream: false
1147
1393
  };
1148
1394
  try {
1395
+ const finalOptions = betas.length > 0 ? { ...nonStreamingOptions, betas } : nonStreamingOptions;
1396
+ const messagesApi = betas.length > 0 ? client.beta.messages : client.messages;
1149
1397
  logProviderPayload(
1150
1398
  "anthropic",
1151
1399
  "request payload",
1152
- nonStreamingOptions,
1400
+ finalOptions,
1153
1401
  request.debug
1154
1402
  );
1155
- const response = await client.messages.create(nonStreamingOptions);
1403
+ const response = await messagesApi.create(finalOptions);
1156
1404
  logProviderPayload(
1157
1405
  "anthropic",
1158
1406
  "response payload",
@@ -1187,17 +1435,19 @@ var AnthropicAdapter = class {
1187
1435
  }
1188
1436
  async *stream(request) {
1189
1437
  const client = await this.getClient();
1190
- const { options } = this.buildRequestOptions(request);
1438
+ const { options, betas } = this.buildRequestOptions(request);
1191
1439
  const messageId = generateMessageId();
1192
1440
  yield { type: "message:start", id: messageId };
1193
1441
  try {
1442
+ const finalOptions = betas.length > 0 ? { ...options, betas } : options;
1443
+ const streamApi = betas.length > 0 ? client.beta.messages : client.messages;
1194
1444
  logProviderPayload(
1195
1445
  "anthropic",
1196
1446
  "request payload",
1197
- options,
1447
+ finalOptions,
1198
1448
  request.debug
1199
1449
  );
1200
- const stream = await client.messages.stream(options);
1450
+ const stream = await streamApi.stream(finalOptions);
1201
1451
  let currentToolUse = null;
1202
1452
  let isInThinkingBlock = false;
1203
1453
  const collectedCitations = [];
@@ -1487,12 +1737,14 @@ var OllamaAdapter = class {
1487
1737
  if (this.config.options) {
1488
1738
  Object.assign(ollamaOptions, this.config.options);
1489
1739
  }
1740
+ const ollamaFormat = toOllamaFormat(request.config?.responseFormat);
1490
1741
  const payload = {
1491
1742
  model: request.config?.model || this.model,
1492
1743
  messages,
1493
1744
  tools,
1494
1745
  stream: true,
1495
- options: ollamaOptions
1746
+ options: ollamaOptions,
1747
+ ...ollamaFormat !== void 0 ? { format: ollamaFormat } : {}
1496
1748
  };
1497
1749
  logProviderPayload("ollama", "request payload", payload, request.debug);
1498
1750
  const response = await fetch(`${this.baseUrl}/api/chat`, {
@@ -1783,6 +2035,12 @@ var GoogleAdapter = class {
1783
2035
  }
1784
2036
  const messageId = generateMessageId();
1785
2037
  yield { type: "message:start", id: messageId };
2038
+ const responseFormat = request.config?.responseFormat;
2039
+ const geminiSchema = toGeminiSchema(responseFormat);
2040
+ const responseFormatGenConfig = responseFormat ? {
2041
+ responseMimeType: "application/json",
2042
+ ...geminiSchema ? { responseJsonSchema: geminiSchema } : {}
2043
+ } : {};
1786
2044
  try {
1787
2045
  logProviderPayload(
1788
2046
  "google",
@@ -1794,7 +2052,8 @@ var GoogleAdapter = class {
1794
2052
  tools: toolsArray.length > 0 ? toolsArray : void 0,
1795
2053
  generationConfig: {
1796
2054
  temperature: request.config?.temperature ?? this.config.temperature,
1797
- maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens
2055
+ maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens,
2056
+ ...responseFormatGenConfig
1798
2057
  },
1799
2058
  messageParts: mergedContents[mergedContents.length - 1]?.parts
1800
2059
  },
@@ -1807,7 +2066,8 @@ var GoogleAdapter = class {
1807
2066
  tools: toolsArray.length > 0 ? toolsArray : void 0,
1808
2067
  generationConfig: {
1809
2068
  temperature: request.config?.temperature ?? this.config.temperature,
1810
- maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens
2069
+ maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens,
2070
+ ...responseFormatGenConfig
1811
2071
  }
1812
2072
  });
1813
2073
  const lastMessage = mergedContents[mergedContents.length - 1];
@@ -1974,6 +2234,12 @@ var GoogleAdapter = class {
1974
2234
  }
1975
2235
  }
1976
2236
  const tools = formatToolsForGemini(request.actions);
2237
+ const responseFormat = request.config?.responseFormat;
2238
+ const geminiSchema = toGeminiSchema(responseFormat);
2239
+ const responseFormatGenConfig = responseFormat ? {
2240
+ responseMimeType: "application/json",
2241
+ ...geminiSchema ? { responseJsonSchema: geminiSchema } : {}
2242
+ } : {};
1977
2243
  const payload = {
1978
2244
  model: modelId,
1979
2245
  history: mergedContents.slice(0, -1),
@@ -1981,7 +2247,8 @@ var GoogleAdapter = class {
1981
2247
  tools: tools ? [tools] : void 0,
1982
2248
  generationConfig: {
1983
2249
  temperature: request.config?.temperature ?? this.config.temperature,
1984
- maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens
2250
+ maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens,
2251
+ ...responseFormatGenConfig
1985
2252
  },
1986
2253
  messageParts: mergedContents[mergedContents.length - 1]?.parts
1987
2254
  };
@@ -1992,7 +2259,8 @@ var GoogleAdapter = class {
1992
2259
  tools: tools ? [tools] : void 0,
1993
2260
  generationConfig: {
1994
2261
  temperature: request.config?.temperature ?? this.config.temperature,
1995
- maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens
2262
+ maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens,
2263
+ ...responseFormatGenConfig
1996
2264
  }
1997
2265
  });
1998
2266
  const lastMessage = mergedContents[mergedContents.length - 1];
@@ -2131,6 +2399,7 @@ var AzureAdapter = class {
2131
2399
  tools,
2132
2400
  temperature: request.config?.temperature ?? this.config.temperature,
2133
2401
  max_tokens: request.config?.maxTokens ?? this.config.maxTokens,
2402
+ response_format: toOpenAIResponseFormat(request.config?.responseFormat),
2134
2403
  stream: true
2135
2404
  };
2136
2405
  logProviderPayload("azure", "request payload", payload, request.debug);
@@ -2230,7 +2499,8 @@ var AzureAdapter = class {
2230
2499
  messages,
2231
2500
  tools,
2232
2501
  temperature: request.config?.temperature ?? this.config.temperature,
2233
- max_tokens: request.config?.maxTokens ?? this.config.maxTokens
2502
+ max_tokens: request.config?.maxTokens ?? this.config.maxTokens,
2503
+ response_format: toOpenAIResponseFormat(request.config?.responseFormat)
2234
2504
  };
2235
2505
  logProviderPayload("azure", "request payload", payload, request.debug);
2236
2506
  const response = await client.chat.completions.create(payload);