@yourgpt/llm-sdk 2.5.1-beta.0 → 2.5.1-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +19 -1
  2. package/dist/adapters/index.d.mts +4 -4
  3. package/dist/adapters/index.d.ts +4 -4
  4. package/dist/adapters/index.js +139 -12
  5. package/dist/adapters/index.mjs +139 -12
  6. package/dist/{base-tNgbBaSo.d.mts → base-BYQKp9TW.d.mts} +5 -1
  7. package/dist/{base-C58Dsr9p.d.ts → base-Cxq3ni0t.d.ts} +5 -1
  8. package/dist/fallback/index.d.mts +4 -4
  9. package/dist/fallback/index.d.ts +4 -4
  10. package/dist/index.d.mts +60 -8
  11. package/dist/index.d.ts +60 -8
  12. package/dist/index.js +59 -0
  13. package/dist/index.mjs +59 -0
  14. package/dist/providers/anthropic/index.d.mts +3 -3
  15. package/dist/providers/anthropic/index.d.ts +3 -3
  16. package/dist/providers/anthropic/index.js +91 -10
  17. package/dist/providers/anthropic/index.mjs +91 -10
  18. package/dist/providers/azure/index.d.mts +3 -3
  19. package/dist/providers/azure/index.d.ts +3 -3
  20. package/dist/providers/fireworks/index.d.mts +1 -1
  21. package/dist/providers/fireworks/index.d.ts +1 -1
  22. package/dist/providers/google/index.d.mts +3 -3
  23. package/dist/providers/google/index.d.ts +3 -3
  24. package/dist/providers/google/index.js +51 -2
  25. package/dist/providers/google/index.mjs +51 -2
  26. package/dist/providers/ollama/index.d.mts +4 -4
  27. package/dist/providers/ollama/index.d.ts +4 -4
  28. package/dist/providers/openai/index.d.mts +3 -3
  29. package/dist/providers/openai/index.d.ts +3 -3
  30. package/dist/providers/openai/index.js +51 -2
  31. package/dist/providers/openai/index.mjs +51 -2
  32. package/dist/providers/openrouter/index.d.mts +3 -3
  33. package/dist/providers/openrouter/index.d.ts +3 -3
  34. package/dist/providers/openrouter/index.js +51 -2
  35. package/dist/providers/openrouter/index.mjs +51 -2
  36. package/dist/providers/togetherai/index.d.mts +3 -3
  37. package/dist/providers/togetherai/index.d.ts +3 -3
  38. package/dist/providers/togetherai/index.js +51 -2
  39. package/dist/providers/togetherai/index.mjs +51 -2
  40. package/dist/providers/xai/index.d.mts +3 -3
  41. package/dist/providers/xai/index.d.ts +3 -3
  42. package/dist/providers/xai/index.js +51 -2
  43. package/dist/providers/xai/index.mjs +51 -2
  44. package/dist/{types-BSSiJW2o.d.mts → types-BvkiJ1dd.d.mts} +2 -2
  45. package/dist/{types-CCxPmkmK.d.ts → types-ChORafYS.d.ts} +1 -1
  46. package/dist/{types-BkQCSiIt.d.mts → types-D774b0dg.d.mts} +57 -2
  47. package/dist/{types-BkQCSiIt.d.ts → types-D774b0dg.d.ts} +57 -2
  48. package/dist/{types-BQ31QIsA.d.ts → types-TMilS-Dz.d.ts} +2 -2
  49. package/dist/{types-B6dhnguR.d.mts → types-mwMhCwOq.d.mts} +1 -1
  50. package/dist/yourgpt/index.d.mts +1 -1
  51. package/dist/yourgpt/index.d.ts +1 -1
  52. package/package.json +1 -1
package/README.md CHANGED
@@ -114,6 +114,24 @@ const runtime = createRuntime({
114
114
 
115
115
  When `search.enabled` is on, deferred tools can be discovered through a hidden `search_tools` server tool. Matching tools are loaded into the next loop iteration instead of sending every deferred tool definition up front.
116
116
 
117
+ ## Structured output, MCP, and reasoning effort
118
+
119
+ Pass `responseFormat`, `mcpServers`, and `reasoningEffort` on any `generateText()` / `streamText()` / `runtime.chat()` / `runtime.response()` call:
120
+
121
+ ```ts
122
+ const result = await runtime.response({
123
+ prompt: "Extract FAQs from this conversation.",
124
+ mcpServers: [{ label: "kb", url: "https://kb.example.com/sse" }],
125
+ reasoningEffort: "high",
126
+ responseFormat: {
127
+ type: "json_schema",
128
+ json_schema: { name, schema, strict: true },
129
+ },
130
+ });
131
+ ```
132
+
133
+ OpenAI routes through `/v1/responses` automatically when MCP or reasoning is set; Anthropic uses the `mcp-client-2025-11-20` beta and adaptive thinking on Claude 4.6/4.7. See the [Structured Output guide](https://copilot-sdk.yourgpt.ai/docs/llm-sdk/structured-output) for the full per-provider mapping.
134
+
117
135
  ## Documentation
118
136
 
119
137
  Visit **[copilot-sdk.yourgpt.ai](https://copilot-sdk.yourgpt.ai)** for full documentation:
@@ -121,7 +139,7 @@ Visit **[copilot-sdk.yourgpt.ai](https://copilot-sdk.yourgpt.ai)** for full docu
121
139
  - [All Providers](https://copilot-sdk.yourgpt.ai/docs/providers) - OpenAI, Anthropic, Google, xAI
122
140
  - [Server Setup](https://copilot-sdk.yourgpt.ai/docs/server) - Runtime, streaming, tools
123
141
  - [Tools](https://copilot-sdk.yourgpt.ai/docs/tools) - Server-side and client-side tools
124
- - [LLM SDK Reference](https://copilot-sdk.yourgpt.ai/docs/llm-sdk) - streamText, generateText
142
+ - [LLM SDK Reference](https://copilot-sdk.yourgpt.ai/docs/llm-sdk) - streamText, generateText, runtime.response()
125
143
 
126
144
  ## License
127
145
 
@@ -1,7 +1,7 @@
1
- import { L as LLMAdapter, C as ChatCompletionRequest, a as CompletionResult } from '../base-tNgbBaSo.mjs';
2
- export { A as AdapterFactory, l as AnthropicContentBlock, O as OpenAIContentBlock, j as attachmentToAnthropicDocument, i as attachmentToAnthropicImage, k as attachmentToOpenAIImage, f as formatMessages, c as formatMessagesForAnthropic, d as formatMessagesForOpenAI, b as formatTools, h as hasImageAttachments, g as hasMediaAttachments, m as messageToAnthropicContent, e as messageToOpenAIContent } from '../base-tNgbBaSo.mjs';
3
- import { W as WebSearchConfig, g as StreamEvent } from '../types-BkQCSiIt.mjs';
4
- import { d as OllamaModelOptions } from '../types-BSSiJW2o.mjs';
1
+ import { L as LLMAdapter, C as ChatCompletionRequest, a as CompletionResult } from '../base-BYQKp9TW.mjs';
2
+ export { A as AdapterFactory, l as AnthropicContentBlock, O as OpenAIContentBlock, j as attachmentToAnthropicDocument, i as attachmentToAnthropicImage, k as attachmentToOpenAIImage, f as formatMessages, c as formatMessagesForAnthropic, d as formatMessagesForOpenAI, b as formatTools, h as hasImageAttachments, g as hasMediaAttachments, m as messageToAnthropicContent, e as messageToOpenAIContent } from '../base-BYQKp9TW.mjs';
3
+ import { W as WebSearchConfig, h as StreamEvent } from '../types-D774b0dg.mjs';
4
+ import { d as OllamaModelOptions } from '../types-BvkiJ1dd.mjs';
5
5
  import 'zod';
6
6
 
7
7
  /**
@@ -1,7 +1,7 @@
1
- import { L as LLMAdapter, C as ChatCompletionRequest, a as CompletionResult } from '../base-C58Dsr9p.js';
2
- export { A as AdapterFactory, l as AnthropicContentBlock, O as OpenAIContentBlock, j as attachmentToAnthropicDocument, i as attachmentToAnthropicImage, k as attachmentToOpenAIImage, f as formatMessages, c as formatMessagesForAnthropic, d as formatMessagesForOpenAI, b as formatTools, h as hasImageAttachments, g as hasMediaAttachments, m as messageToAnthropicContent, e as messageToOpenAIContent } from '../base-C58Dsr9p.js';
3
- import { W as WebSearchConfig, g as StreamEvent } from '../types-BkQCSiIt.js';
4
- import { d as OllamaModelOptions } from '../types-BQ31QIsA.js';
1
+ import { L as LLMAdapter, C as ChatCompletionRequest, a as CompletionResult } from '../base-Cxq3ni0t.js';
2
+ export { A as AdapterFactory, l as AnthropicContentBlock, O as OpenAIContentBlock, j as attachmentToAnthropicDocument, i as attachmentToAnthropicImage, k as attachmentToOpenAIImage, f as formatMessages, c as formatMessagesForAnthropic, d as formatMessagesForOpenAI, b as formatTools, h as hasImageAttachments, g as hasMediaAttachments, m as messageToAnthropicContent, e as messageToOpenAIContent } from '../base-Cxq3ni0t.js';
3
+ import { W as WebSearchConfig, h as StreamEvent } from '../types-D774b0dg.js';
4
+ import { d as OllamaModelOptions } from '../types-TMilS-Dz.js';
5
5
  import 'zod';
6
6
 
7
7
  /**
@@ -217,6 +217,87 @@ function toOllamaFormat(rf) {
217
217
  if (rf.type === "json_object") return "json";
218
218
  return rf.json_schema.schema;
219
219
  }
220
+ function toOpenAIResponsesMcpTools(mcpServers) {
221
+ if (!mcpServers || mcpServers.length === 0) return [];
222
+ return mcpServers.map((mcp) => ({
223
+ type: "mcp",
224
+ server_label: mcp.label,
225
+ server_url: mcp.url,
226
+ ...mcp.headers ? { headers: mcp.headers } : {},
227
+ ...mcp.allowedTools ? { allowed_tools: mcp.allowedTools } : {},
228
+ require_approval: mcp.requireApproval ?? "never"
229
+ }));
230
+ }
231
+ function toAnthropicMcp(mcpServers) {
232
+ if (!mcpServers || mcpServers.length === 0) {
233
+ return { mcpServers: [], tools: [], betas: [] };
234
+ }
235
+ const serverEntries = [];
236
+ const toolEntries = [];
237
+ for (const mcp of mcpServers) {
238
+ const authHeader = mcp.headers?.Authorization ?? mcp.headers?.authorization;
239
+ const token = authHeader?.replace(/^Bearer\s+/i, "");
240
+ serverEntries.push({
241
+ type: "url",
242
+ url: mcp.url,
243
+ name: mcp.label,
244
+ ...token ? { authorization_token: token } : {}
245
+ });
246
+ if (mcp.allowedTools && mcp.allowedTools.length > 0) {
247
+ toolEntries.push({
248
+ type: "mcp_toolset",
249
+ mcp_server_name: mcp.label,
250
+ configs: Object.fromEntries(
251
+ mcp.allowedTools.map((toolName) => [toolName, {}])
252
+ )
253
+ });
254
+ }
255
+ }
256
+ return {
257
+ mcpServers: serverEntries,
258
+ tools: toolEntries,
259
+ betas: ["mcp-client-2025-11-20"]
260
+ };
261
+ }
262
+ function isStringEffort(effort) {
263
+ return typeof effort === "string" && (effort === "minimal" || effort === "low" || effort === "medium" || effort === "high");
264
+ }
265
+ function toOpenAIReasoning(effort) {
266
+ if (!effort) return void 0;
267
+ if (typeof effort === "object" && "raw" in effort) return effort.raw;
268
+ if (typeof effort === "object" && "budgetTokens" in effort) {
269
+ const budget = effort.budgetTokens;
270
+ const mapped = budget >= 16e3 ? "high" : budget >= 8e3 ? "medium" : "low";
271
+ return { effort: mapped, summary: "auto" };
272
+ }
273
+ if (isStringEffort(effort)) {
274
+ return { effort, summary: "auto" };
275
+ }
276
+ return void 0;
277
+ }
278
+ var ANTHROPIC_ADAPTIVE_MODELS = /(claude-opus-4-7|claude-opus-4-6|claude-sonnet-4-6)/i;
279
+ function toAnthropicThinking(effort, modelId) {
280
+ if (!effort) return {};
281
+ if (typeof effort === "object" && "raw" in effort) {
282
+ return { thinking: effort.raw };
283
+ }
284
+ const isAdaptive = !!modelId && ANTHROPIC_ADAPTIVE_MODELS.test(modelId);
285
+ if (typeof effort === "object" && "budgetTokens" in effort) {
286
+ return {
287
+ thinking: { type: "enabled", budget_tokens: effort.budgetTokens }
288
+ };
289
+ }
290
+ if (!isStringEffort(effort)) return {};
291
+ if (isAdaptive) {
292
+ const mapped = effort === "minimal" ? "low" : effort;
293
+ return {
294
+ thinking: { type: "adaptive" },
295
+ outputConfigEffort: mapped
296
+ };
297
+ }
298
+ const budget = effort === "high" ? 16e3 : effort === "medium" ? 8e3 : effort === "low" ? 4e3 : 2048;
299
+ return { thinking: { type: "enabled", budget_tokens: budget } };
300
+ }
220
301
  function formatTools(actions) {
221
302
  return actions.map((action) => ({
222
303
  type: "function",
@@ -498,6 +579,14 @@ var OpenAIAdapter = class _OpenAIAdapter {
498
579
  return this.client;
499
580
  }
500
581
  shouldUseResponsesApi(request) {
582
+ if (request.config?.mcpServers && request.config.mcpServers.length > 0 || request.config?.reasoningEffort !== void 0) {
583
+ if (this.provider !== "openai" && this.provider !== "azure") {
584
+ throw new Error(
585
+ `[llm-sdk] Provider "${this.provider}" does not support MCP servers or per-request reasoning effort. Use OpenAI or Anthropic for these features.`
586
+ );
587
+ }
588
+ return true;
589
+ }
501
590
  return request.providerToolOptions?.openai?.nativeToolSearch?.enabled === true && request.providerToolOptions.openai.nativeToolSearch.useResponsesApi !== false && Array.isArray(request.toolDefinitions) && request.toolDefinitions.length > 0;
502
591
  }
503
592
  buildResponsesInput(request) {
@@ -558,7 +647,7 @@ var OpenAIAdapter = class _OpenAIAdapter {
558
647
  strict: true,
559
648
  defer_loading: tool.deferLoading === true
560
649
  }));
561
- return [{ type: "tool_search" }, ...nativeTools];
650
+ return nativeTools.length > 0 ? [{ type: "tool_search" }, ...nativeTools] : [];
562
651
  }
563
652
  parseResponsesResult(response) {
564
653
  const content = typeof response?.output_text === "string" ? response.output_text : "";
@@ -590,16 +679,30 @@ var OpenAIAdapter = class _OpenAIAdapter {
590
679
  const responsesTextFormat = toOpenAIResponsesTextFormat(
591
680
  request.config?.responseFormat
592
681
  );
682
+ const mcpTools = toOpenAIResponsesMcpTools(request.config?.mcpServers);
683
+ const modelId = request.config?.model || this.model;
684
+ const reasoning = isOpenAIReasoningModel(modelId) ? toOpenAIReasoning(request.config?.reasoningEffort) : void 0;
685
+ if (request.config?.reasoningEffort && !isOpenAIReasoningModel(modelId)) {
686
+ console.warn(
687
+ `[llm-sdk] openai/${modelId} is not a reasoning model; \`reasoningEffort\` is ignored. Use o1/o3/o4/gpt-5.x for reasoning.`
688
+ );
689
+ }
690
+ const functionTools = this.buildResponsesTools(
691
+ request.toolDefinitions ?? []
692
+ );
693
+ const tools = [...functionTools, ...mcpTools];
593
694
  const payload = {
594
695
  model: request.config?.model || this.model,
595
696
  instructions: request.systemPrompt,
596
697
  input: this.buildResponsesInput(request),
597
- tools: this.buildResponsesTools(request.toolDefinitions ?? []),
698
+ tools: tools.length > 0 ? tools : void 0,
598
699
  tool_choice: openaiToolOptions?.toolChoice === "required" ? "required" : openaiToolOptions?.toolChoice === "auto" ? "auto" : void 0,
599
700
  parallel_tool_calls: openaiToolOptions?.parallelToolCalls,
600
701
  temperature: request.config?.temperature ?? this.config.temperature,
601
702
  max_output_tokens: request.config?.maxTokens ?? this.config.maxTokens,
602
703
  ...responsesTextFormat ? { text: { format: responsesTextFormat } } : {},
704
+ ...reasoning ? { reasoning } : {},
705
+ store: false,
603
706
  stream: false
604
707
  };
605
708
  logProviderPayload("openai", "request payload", payload, request.debug);
@@ -1246,36 +1349,58 @@ var AnthropicAdapter = class {
1246
1349
  if (serverToolConfiguration) {
1247
1350
  options.server_tool_configuration = serverToolConfiguration;
1248
1351
  }
1352
+ const modelForThinking = request.config?.model || this.model;
1353
+ const thinkingTranslation = toAnthropicThinking(
1354
+ request.config?.reasoningEffort,
1355
+ modelForThinking
1356
+ );
1249
1357
  const outputConfig = toAnthropicOutputConfig(responseFormat);
1250
- if (outputConfig) {
1251
- options.output_config = outputConfig;
1358
+ if (outputConfig || thinkingTranslation.outputConfigEffort) {
1359
+ options.output_config = {
1360
+ ...outputConfig ?? {},
1361
+ ...thinkingTranslation.outputConfigEffort ? { effort: thinkingTranslation.outputConfigEffort } : {}
1362
+ };
1252
1363
  }
1253
- if (this.config.thinking?.type === "enabled") {
1364
+ if (thinkingTranslation.thinking) {
1365
+ options.thinking = thinkingTranslation.thinking;
1366
+ } else if (this.config.thinking?.type === "enabled") {
1254
1367
  options.thinking = {
1255
1368
  type: "enabled",
1256
1369
  budget_tokens: this.config.thinking.budgetTokens || 1e4
1257
1370
  };
1258
1371
  }
1259
- return { options, messages };
1372
+ const mcp = toAnthropicMcp(request.config?.mcpServers);
1373
+ const betas = [];
1374
+ if (mcp.mcpServers.length > 0) {
1375
+ options.mcp_servers = mcp.mcpServers;
1376
+ betas.push(...mcp.betas);
1377
+ if (mcp.tools.length > 0) {
1378
+ const existingTools = Array.isArray(options.tools) ? options.tools : [];
1379
+ options.tools = [...existingTools, ...mcp.tools];
1380
+ }
1381
+ }
1382
+ return { options, messages, betas };
1260
1383
  }
1261
1384
  /**
1262
1385
  * Non-streaming completion (for debugging/comparison with original studio-ai)
1263
1386
  */
1264
1387
  async complete(request) {
1265
1388
  const client = await this.getClient();
1266
- const { options } = this.buildRequestOptions(request);
1389
+ const { options, betas } = this.buildRequestOptions(request);
1267
1390
  const nonStreamingOptions = {
1268
1391
  ...options,
1269
1392
  stream: false
1270
1393
  };
1271
1394
  try {
1395
+ const finalOptions = betas.length > 0 ? { ...nonStreamingOptions, betas } : nonStreamingOptions;
1396
+ const messagesApi = betas.length > 0 ? client.beta.messages : client.messages;
1272
1397
  logProviderPayload(
1273
1398
  "anthropic",
1274
1399
  "request payload",
1275
- nonStreamingOptions,
1400
+ finalOptions,
1276
1401
  request.debug
1277
1402
  );
1278
- const response = await client.messages.create(nonStreamingOptions);
1403
+ const response = await messagesApi.create(finalOptions);
1279
1404
  logProviderPayload(
1280
1405
  "anthropic",
1281
1406
  "response payload",
@@ -1310,17 +1435,19 @@ var AnthropicAdapter = class {
1310
1435
  }
1311
1436
  async *stream(request) {
1312
1437
  const client = await this.getClient();
1313
- const { options } = this.buildRequestOptions(request);
1438
+ const { options, betas } = this.buildRequestOptions(request);
1314
1439
  const messageId = generateMessageId();
1315
1440
  yield { type: "message:start", id: messageId };
1316
1441
  try {
1442
+ const finalOptions = betas.length > 0 ? { ...options, betas } : options;
1443
+ const streamApi = betas.length > 0 ? client.beta.messages : client.messages;
1317
1444
  logProviderPayload(
1318
1445
  "anthropic",
1319
1446
  "request payload",
1320
- options,
1447
+ finalOptions,
1321
1448
  request.debug
1322
1449
  );
1323
- const stream = await client.messages.stream(options);
1450
+ const stream = await streamApi.stream(finalOptions);
1324
1451
  let currentToolUse = null;
1325
1452
  let isInThinkingBlock = false;
1326
1453
  const collectedCitations = [];
@@ -215,6 +215,87 @@ function toOllamaFormat(rf) {
215
215
  if (rf.type === "json_object") return "json";
216
216
  return rf.json_schema.schema;
217
217
  }
218
+ function toOpenAIResponsesMcpTools(mcpServers) {
219
+ if (!mcpServers || mcpServers.length === 0) return [];
220
+ return mcpServers.map((mcp) => ({
221
+ type: "mcp",
222
+ server_label: mcp.label,
223
+ server_url: mcp.url,
224
+ ...mcp.headers ? { headers: mcp.headers } : {},
225
+ ...mcp.allowedTools ? { allowed_tools: mcp.allowedTools } : {},
226
+ require_approval: mcp.requireApproval ?? "never"
227
+ }));
228
+ }
229
+ function toAnthropicMcp(mcpServers) {
230
+ if (!mcpServers || mcpServers.length === 0) {
231
+ return { mcpServers: [], tools: [], betas: [] };
232
+ }
233
+ const serverEntries = [];
234
+ const toolEntries = [];
235
+ for (const mcp of mcpServers) {
236
+ const authHeader = mcp.headers?.Authorization ?? mcp.headers?.authorization;
237
+ const token = authHeader?.replace(/^Bearer\s+/i, "");
238
+ serverEntries.push({
239
+ type: "url",
240
+ url: mcp.url,
241
+ name: mcp.label,
242
+ ...token ? { authorization_token: token } : {}
243
+ });
244
+ if (mcp.allowedTools && mcp.allowedTools.length > 0) {
245
+ toolEntries.push({
246
+ type: "mcp_toolset",
247
+ mcp_server_name: mcp.label,
248
+ configs: Object.fromEntries(
249
+ mcp.allowedTools.map((toolName) => [toolName, {}])
250
+ )
251
+ });
252
+ }
253
+ }
254
+ return {
255
+ mcpServers: serverEntries,
256
+ tools: toolEntries,
257
+ betas: ["mcp-client-2025-11-20"]
258
+ };
259
+ }
260
+ function isStringEffort(effort) {
261
+ return typeof effort === "string" && (effort === "minimal" || effort === "low" || effort === "medium" || effort === "high");
262
+ }
263
+ function toOpenAIReasoning(effort) {
264
+ if (!effort) return void 0;
265
+ if (typeof effort === "object" && "raw" in effort) return effort.raw;
266
+ if (typeof effort === "object" && "budgetTokens" in effort) {
267
+ const budget = effort.budgetTokens;
268
+ const mapped = budget >= 16e3 ? "high" : budget >= 8e3 ? "medium" : "low";
269
+ return { effort: mapped, summary: "auto" };
270
+ }
271
+ if (isStringEffort(effort)) {
272
+ return { effort, summary: "auto" };
273
+ }
274
+ return void 0;
275
+ }
276
+ var ANTHROPIC_ADAPTIVE_MODELS = /(claude-opus-4-7|claude-opus-4-6|claude-sonnet-4-6)/i;
277
+ function toAnthropicThinking(effort, modelId) {
278
+ if (!effort) return {};
279
+ if (typeof effort === "object" && "raw" in effort) {
280
+ return { thinking: effort.raw };
281
+ }
282
+ const isAdaptive = !!modelId && ANTHROPIC_ADAPTIVE_MODELS.test(modelId);
283
+ if (typeof effort === "object" && "budgetTokens" in effort) {
284
+ return {
285
+ thinking: { type: "enabled", budget_tokens: effort.budgetTokens }
286
+ };
287
+ }
288
+ if (!isStringEffort(effort)) return {};
289
+ if (isAdaptive) {
290
+ const mapped = effort === "minimal" ? "low" : effort;
291
+ return {
292
+ thinking: { type: "adaptive" },
293
+ outputConfigEffort: mapped
294
+ };
295
+ }
296
+ const budget = effort === "high" ? 16e3 : effort === "medium" ? 8e3 : effort === "low" ? 4e3 : 2048;
297
+ return { thinking: { type: "enabled", budget_tokens: budget } };
298
+ }
218
299
  function formatTools(actions) {
219
300
  return actions.map((action) => ({
220
301
  type: "function",
@@ -496,6 +577,14 @@ var OpenAIAdapter = class _OpenAIAdapter {
496
577
  return this.client;
497
578
  }
498
579
  shouldUseResponsesApi(request) {
580
+ if (request.config?.mcpServers && request.config.mcpServers.length > 0 || request.config?.reasoningEffort !== void 0) {
581
+ if (this.provider !== "openai" && this.provider !== "azure") {
582
+ throw new Error(
583
+ `[llm-sdk] Provider "${this.provider}" does not support MCP servers or per-request reasoning effort. Use OpenAI or Anthropic for these features.`
584
+ );
585
+ }
586
+ return true;
587
+ }
499
588
  return request.providerToolOptions?.openai?.nativeToolSearch?.enabled === true && request.providerToolOptions.openai.nativeToolSearch.useResponsesApi !== false && Array.isArray(request.toolDefinitions) && request.toolDefinitions.length > 0;
500
589
  }
501
590
  buildResponsesInput(request) {
@@ -556,7 +645,7 @@ var OpenAIAdapter = class _OpenAIAdapter {
556
645
  strict: true,
557
646
  defer_loading: tool.deferLoading === true
558
647
  }));
559
- return [{ type: "tool_search" }, ...nativeTools];
648
+ return nativeTools.length > 0 ? [{ type: "tool_search" }, ...nativeTools] : [];
560
649
  }
561
650
  parseResponsesResult(response) {
562
651
  const content = typeof response?.output_text === "string" ? response.output_text : "";
@@ -588,16 +677,30 @@ var OpenAIAdapter = class _OpenAIAdapter {
588
677
  const responsesTextFormat = toOpenAIResponsesTextFormat(
589
678
  request.config?.responseFormat
590
679
  );
680
+ const mcpTools = toOpenAIResponsesMcpTools(request.config?.mcpServers);
681
+ const modelId = request.config?.model || this.model;
682
+ const reasoning = isOpenAIReasoningModel(modelId) ? toOpenAIReasoning(request.config?.reasoningEffort) : void 0;
683
+ if (request.config?.reasoningEffort && !isOpenAIReasoningModel(modelId)) {
684
+ console.warn(
685
+ `[llm-sdk] openai/${modelId} is not a reasoning model; \`reasoningEffort\` is ignored. Use o1/o3/o4/gpt-5.x for reasoning.`
686
+ );
687
+ }
688
+ const functionTools = this.buildResponsesTools(
689
+ request.toolDefinitions ?? []
690
+ );
691
+ const tools = [...functionTools, ...mcpTools];
591
692
  const payload = {
592
693
  model: request.config?.model || this.model,
593
694
  instructions: request.systemPrompt,
594
695
  input: this.buildResponsesInput(request),
595
- tools: this.buildResponsesTools(request.toolDefinitions ?? []),
696
+ tools: tools.length > 0 ? tools : void 0,
596
697
  tool_choice: openaiToolOptions?.toolChoice === "required" ? "required" : openaiToolOptions?.toolChoice === "auto" ? "auto" : void 0,
597
698
  parallel_tool_calls: openaiToolOptions?.parallelToolCalls,
598
699
  temperature: request.config?.temperature ?? this.config.temperature,
599
700
  max_output_tokens: request.config?.maxTokens ?? this.config.maxTokens,
600
701
  ...responsesTextFormat ? { text: { format: responsesTextFormat } } : {},
702
+ ...reasoning ? { reasoning } : {},
703
+ store: false,
601
704
  stream: false
602
705
  };
603
706
  logProviderPayload("openai", "request payload", payload, request.debug);
@@ -1244,36 +1347,58 @@ var AnthropicAdapter = class {
1244
1347
  if (serverToolConfiguration) {
1245
1348
  options.server_tool_configuration = serverToolConfiguration;
1246
1349
  }
1350
+ const modelForThinking = request.config?.model || this.model;
1351
+ const thinkingTranslation = toAnthropicThinking(
1352
+ request.config?.reasoningEffort,
1353
+ modelForThinking
1354
+ );
1247
1355
  const outputConfig = toAnthropicOutputConfig(responseFormat);
1248
- if (outputConfig) {
1249
- options.output_config = outputConfig;
1356
+ if (outputConfig || thinkingTranslation.outputConfigEffort) {
1357
+ options.output_config = {
1358
+ ...outputConfig ?? {},
1359
+ ...thinkingTranslation.outputConfigEffort ? { effort: thinkingTranslation.outputConfigEffort } : {}
1360
+ };
1250
1361
  }
1251
- if (this.config.thinking?.type === "enabled") {
1362
+ if (thinkingTranslation.thinking) {
1363
+ options.thinking = thinkingTranslation.thinking;
1364
+ } else if (this.config.thinking?.type === "enabled") {
1252
1365
  options.thinking = {
1253
1366
  type: "enabled",
1254
1367
  budget_tokens: this.config.thinking.budgetTokens || 1e4
1255
1368
  };
1256
1369
  }
1257
- return { options, messages };
1370
+ const mcp = toAnthropicMcp(request.config?.mcpServers);
1371
+ const betas = [];
1372
+ if (mcp.mcpServers.length > 0) {
1373
+ options.mcp_servers = mcp.mcpServers;
1374
+ betas.push(...mcp.betas);
1375
+ if (mcp.tools.length > 0) {
1376
+ const existingTools = Array.isArray(options.tools) ? options.tools : [];
1377
+ options.tools = [...existingTools, ...mcp.tools];
1378
+ }
1379
+ }
1380
+ return { options, messages, betas };
1258
1381
  }
1259
1382
  /**
1260
1383
  * Non-streaming completion (for debugging/comparison with original studio-ai)
1261
1384
  */
1262
1385
  async complete(request) {
1263
1386
  const client = await this.getClient();
1264
- const { options } = this.buildRequestOptions(request);
1387
+ const { options, betas } = this.buildRequestOptions(request);
1265
1388
  const nonStreamingOptions = {
1266
1389
  ...options,
1267
1390
  stream: false
1268
1391
  };
1269
1392
  try {
1393
+ const finalOptions = betas.length > 0 ? { ...nonStreamingOptions, betas } : nonStreamingOptions;
1394
+ const messagesApi = betas.length > 0 ? client.beta.messages : client.messages;
1270
1395
  logProviderPayload(
1271
1396
  "anthropic",
1272
1397
  "request payload",
1273
- nonStreamingOptions,
1398
+ finalOptions,
1274
1399
  request.debug
1275
1400
  );
1276
- const response = await client.messages.create(nonStreamingOptions);
1401
+ const response = await messagesApi.create(finalOptions);
1277
1402
  logProviderPayload(
1278
1403
  "anthropic",
1279
1404
  "response payload",
@@ -1308,17 +1433,19 @@ var AnthropicAdapter = class {
1308
1433
  }
1309
1434
  async *stream(request) {
1310
1435
  const client = await this.getClient();
1311
- const { options } = this.buildRequestOptions(request);
1436
+ const { options, betas } = this.buildRequestOptions(request);
1312
1437
  const messageId = generateMessageId();
1313
1438
  yield { type: "message:start", id: messageId };
1314
1439
  try {
1440
+ const finalOptions = betas.length > 0 ? { ...options, betas } : options;
1441
+ const streamApi = betas.length > 0 ? client.beta.messages : client.messages;
1315
1442
  logProviderPayload(
1316
1443
  "anthropic",
1317
1444
  "request payload",
1318
- options,
1445
+ finalOptions,
1319
1446
  request.debug
1320
1447
  );
1321
- const stream = await client.messages.stream(options);
1448
+ const stream = await streamApi.stream(finalOptions);
1322
1449
  let currentToolUse = null;
1323
1450
  let isInThinkingBlock = false;
1324
1451
  const collectedCitations = [];
@@ -1,4 +1,4 @@
1
- import { M as Message, A as ActionDefinition, d as ToolDefinition, R as ResponseFormat, W as WebSearchConfig, P as ProviderToolRuntimeOptions, g as StreamEvent, J as TokenUsage, X as LLMConfig, a6 as MessageAttachment } from './types-BkQCSiIt.mjs';
1
+ import { k as Message, A as ActionDefinition, d as ToolDefinition, R as ResponseFormat, M as McpServerConfig, g as ReasoningEffort, W as WebSearchConfig, P as ProviderToolRuntimeOptions, h as StreamEvent, O as TokenUsage, Z as LLMConfig, a8 as MessageAttachment } from './types-D774b0dg.mjs';
2
2
 
3
3
  /**
4
4
  * Request-level LLM configuration overrides
@@ -8,6 +8,10 @@ interface RequestLLMConfig {
8
8
  temperature?: number;
9
9
  maxTokens?: number;
10
10
  responseFormat?: ResponseFormat;
11
+ /** MCP servers exposed to the model for this request (provider-translated). */
12
+ mcpServers?: McpServerConfig[];
13
+ /** Reasoning/thinking effort knob (provider-translated). */
14
+ reasoningEffort?: ReasoningEffort;
11
15
  }
12
16
  /**
13
17
  * Chat completion request
@@ -1,4 +1,4 @@
1
- import { M as Message, A as ActionDefinition, d as ToolDefinition, R as ResponseFormat, W as WebSearchConfig, P as ProviderToolRuntimeOptions, g as StreamEvent, J as TokenUsage, X as LLMConfig, a6 as MessageAttachment } from './types-BkQCSiIt.js';
1
+ import { k as Message, A as ActionDefinition, d as ToolDefinition, R as ResponseFormat, M as McpServerConfig, g as ReasoningEffort, W as WebSearchConfig, P as ProviderToolRuntimeOptions, h as StreamEvent, O as TokenUsage, Z as LLMConfig, a8 as MessageAttachment } from './types-D774b0dg.js';
2
2
 
3
3
  /**
4
4
  * Request-level LLM configuration overrides
@@ -8,6 +8,10 @@ interface RequestLLMConfig {
8
8
  temperature?: number;
9
9
  maxTokens?: number;
10
10
  responseFormat?: ResponseFormat;
11
+ /** MCP servers exposed to the model for this request (provider-translated). */
12
+ mcpServers?: McpServerConfig[];
13
+ /** Reasoning/thinking effort knob (provider-translated). */
14
+ reasoningEffort?: ReasoningEffort;
11
15
  }
12
16
  /**
13
17
  * Chat completion request
@@ -1,7 +1,7 @@
1
- import { L as LLMAdapter } from '../base-tNgbBaSo.mjs';
2
- import { F as FallbackChainConfig, c as FallbackFailure, R as RoutingStore } from '../types-B6dhnguR.mjs';
3
- export { d as FallbackInfo, b as RetryBackoff, e as RetryInfo, a as RoutingStrategy } from '../types-B6dhnguR.mjs';
4
- import '../types-BkQCSiIt.mjs';
1
+ import { L as LLMAdapter } from '../base-BYQKp9TW.mjs';
2
+ import { F as FallbackChainConfig, c as FallbackFailure, R as RoutingStore } from '../types-mwMhCwOq.mjs';
3
+ export { d as FallbackInfo, b as RetryBackoff, e as RetryInfo, a as RoutingStrategy } from '../types-mwMhCwOq.mjs';
4
+ import '../types-D774b0dg.mjs';
5
5
  import 'zod';
6
6
 
7
7
  /**
@@ -1,7 +1,7 @@
1
- import { L as LLMAdapter } from '../base-C58Dsr9p.js';
2
- import { F as FallbackChainConfig, c as FallbackFailure, R as RoutingStore } from '../types-CCxPmkmK.js';
3
- export { d as FallbackInfo, b as RetryBackoff, e as RetryInfo, a as RoutingStrategy } from '../types-CCxPmkmK.js';
4
- import '../types-BkQCSiIt.js';
1
+ import { L as LLMAdapter } from '../base-Cxq3ni0t.js';
2
+ import { F as FallbackChainConfig, c as FallbackFailure, R as RoutingStore } from '../types-ChORafYS.js';
3
+ export { d as FallbackInfo, b as RetryBackoff, e as RetryInfo, a as RoutingStrategy } from '../types-ChORafYS.js';
4
+ import '../types-D774b0dg.js';
5
5
  import 'zod';
6
6
 
7
7
  /**