@yourgpt/llm-sdk 2.1.10-alpha.0 → 2.5.1-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/dist/adapters/index.d.mts +4 -38
  2. package/dist/adapters/index.d.ts +4 -38
  3. package/dist/adapters/index.js +158 -325
  4. package/dist/adapters/index.mjs +158 -325
  5. package/dist/base-C58Dsr9p.d.ts +259 -0
  6. package/dist/base-tNgbBaSo.d.mts +259 -0
  7. package/dist/fallback/index.d.mts +4 -4
  8. package/dist/fallback/index.d.ts +4 -4
  9. package/dist/index.d.mts +8 -7
  10. package/dist/index.d.ts +8 -7
  11. package/dist/index.js +35 -43
  12. package/dist/index.mjs +35 -43
  13. package/dist/providers/anthropic/index.d.mts +3 -3
  14. package/dist/providers/anthropic/index.d.ts +3 -3
  15. package/dist/providers/anthropic/index.js +271 -212
  16. package/dist/providers/anthropic/index.mjs +271 -212
  17. package/dist/providers/azure/index.d.mts +3 -3
  18. package/dist/providers/azure/index.d.ts +3 -3
  19. package/dist/providers/azure/index.js +49 -1
  20. package/dist/providers/azure/index.mjs +49 -1
  21. package/dist/providers/fireworks/index.d.mts +1 -1
  22. package/dist/providers/fireworks/index.d.ts +1 -1
  23. package/dist/providers/fireworks/index.js +56 -0
  24. package/dist/providers/fireworks/index.mjs +56 -0
  25. package/dist/providers/google/index.d.mts +3 -3
  26. package/dist/providers/google/index.d.ts +3 -3
  27. package/dist/providers/google/index.js +254 -510
  28. package/dist/providers/google/index.mjs +254 -510
  29. package/dist/providers/ollama/index.d.mts +4 -4
  30. package/dist/providers/ollama/index.d.ts +4 -4
  31. package/dist/providers/ollama/index.js +10 -2
  32. package/dist/providers/ollama/index.mjs +10 -2
  33. package/dist/providers/openai/index.d.mts +3 -3
  34. package/dist/providers/openai/index.d.ts +3 -3
  35. package/dist/providers/openai/index.js +269 -529
  36. package/dist/providers/openai/index.mjs +269 -529
  37. package/dist/providers/openrouter/index.d.mts +3 -7
  38. package/dist/providers/openrouter/index.d.ts +3 -7
  39. package/dist/providers/openrouter/index.js +365 -902
  40. package/dist/providers/openrouter/index.mjs +365 -902
  41. package/dist/providers/togetherai/index.d.mts +3 -3
  42. package/dist/providers/togetherai/index.d.ts +3 -3
  43. package/dist/providers/togetherai/index.js +259 -509
  44. package/dist/providers/togetherai/index.mjs +259 -509
  45. package/dist/providers/xai/index.d.mts +3 -3
  46. package/dist/providers/xai/index.d.ts +3 -3
  47. package/dist/providers/xai/index.js +258 -513
  48. package/dist/providers/xai/index.mjs +258 -513
  49. package/dist/{types-BNCmlJMs.d.mts → types-B6dhnguR.d.mts} +1 -1
  50. package/dist/{types-DhktekQ3.d.ts → types-BQ31QIsA.d.ts} +2 -1
  51. package/dist/{types-CMMQ8s2O.d.mts → types-BSSiJW2o.d.mts} +2 -1
  52. package/dist/{base-DN1EfKnE.d.mts → types-BkQCSiIt.d.mts} +388 -214
  53. package/dist/{base-DuUNxtVg.d.ts → types-BkQCSiIt.d.ts} +388 -214
  54. package/dist/{types-Pj-vpmoT.d.ts → types-CCxPmkmK.d.ts} +1 -1
  55. package/dist/yourgpt/index.d.mts +1 -1
  56. package/dist/yourgpt/index.d.ts +1 -1
  57. package/package.json +1 -1
  58. package/dist/types-CMvvDo-E.d.mts +0 -428
  59. package/dist/types-CMvvDo-E.d.ts +0 -428
@@ -112,6 +112,109 @@ function normalizeObjectJsonSchema(schema) {
112
112
  }
113
113
  return normalized;
114
114
  }
115
+ function isOpenAIReasoningModel(modelId) {
116
+ if (!modelId) return false;
117
+ return /^(o1|o3|o4|gpt-5)/i.test(modelId);
118
+ }
119
+ function buildOpenAITokenParams(modelId, maxTokens, temperature) {
120
+ if (isOpenAIReasoningModel(modelId)) {
121
+ return { max_completion_tokens: maxTokens };
122
+ }
123
+ return { max_tokens: maxTokens, temperature };
124
+ }
125
+ function stripSchemaKeys(schema, keysToDrop, options = {}) {
126
+ if (Array.isArray(schema)) {
127
+ return schema.map((item) => stripSchemaKeys(item, keysToDrop, options));
128
+ }
129
+ if (!schema || typeof schema !== "object") return schema;
130
+ const out = {};
131
+ for (const [key, value] of Object.entries(
132
+ schema
133
+ )) {
134
+ if (keysToDrop.has(key)) continue;
135
+ const renamed = options.renameKeys?.[key] ?? key;
136
+ out[renamed] = stripSchemaKeys(value, keysToDrop, options);
137
+ }
138
+ if (options.forceAdditionalPropertiesFalse && out.type === "object") {
139
+ out.additionalProperties = false;
140
+ }
141
+ return out;
142
+ }
143
+ function toOpenAIResponseFormat(rf) {
144
+ if (!rf) return void 0;
145
+ if (rf.type === "json_object") return { type: "json_object" };
146
+ return {
147
+ type: "json_schema",
148
+ json_schema: {
149
+ name: rf.json_schema.name,
150
+ schema: normalizeObjectJsonSchema(rf.json_schema.schema),
151
+ strict: rf.json_schema.strict ?? true
152
+ }
153
+ };
154
+ }
155
+ function toOpenAIResponsesTextFormat(rf) {
156
+ if (!rf || rf.type !== "json_schema") return void 0;
157
+ return {
158
+ type: "json_schema",
159
+ name: rf.json_schema.name,
160
+ schema: normalizeObjectJsonSchema(rf.json_schema.schema),
161
+ strict: rf.json_schema.strict ?? true
162
+ };
163
+ }
164
+ var ANTHROPIC_UNSUPPORTED_KEYS = /* @__PURE__ */ new Set([
165
+ "minimum",
166
+ "maximum",
167
+ "exclusiveMinimum",
168
+ "exclusiveMaximum",
169
+ "multipleOf",
170
+ "minLength",
171
+ "maxLength",
172
+ "minItems",
173
+ "maxItems",
174
+ "minProperties",
175
+ "maxProperties",
176
+ "pattern",
177
+ "$schema"
178
+ ]);
179
+ function toAnthropicOutputConfig(rf) {
180
+ if (!rf || rf.type !== "json_schema") return void 0;
181
+ const schema = stripSchemaKeys(
182
+ rf.json_schema.schema,
183
+ ANTHROPIC_UNSUPPORTED_KEYS,
184
+ {
185
+ forceAdditionalPropertiesFalse: true,
186
+ renameKeys: { oneOf: "anyOf" }
187
+ }
188
+ );
189
+ return {
190
+ format: {
191
+ type: "json_schema",
192
+ schema
193
+ }
194
+ };
195
+ }
196
+ var GEMINI_UNSUPPORTED_KEYS = /* @__PURE__ */ new Set([
197
+ "oneOf",
198
+ "anyOf",
199
+ "$ref",
200
+ "$defs",
201
+ "definitions",
202
+ "pattern",
203
+ "$schema",
204
+ "additionalProperties"
205
+ ]);
206
+ function toGeminiSchema(rf) {
207
+ if (!rf || rf.type !== "json_schema") return void 0;
208
+ return stripSchemaKeys(
209
+ rf.json_schema.schema,
210
+ GEMINI_UNSUPPORTED_KEYS
211
+ );
212
+ }
213
+ function toOllamaFormat(rf) {
214
+ if (!rf) return void 0;
215
+ if (rf.type === "json_object") return "json";
216
+ return rf.json_schema.schema;
217
+ }
115
218
  function formatTools(actions) {
116
219
  return actions.map((action) => ({
117
220
  type: "function",
@@ -380,7 +483,6 @@ var OpenAIAdapter = class _OpenAIAdapter {
380
483
  if (baseUrl.includes("generativelanguage.googleapis.com")) return "google";
381
484
  if (baseUrl.includes("x.ai")) return "xai";
382
485
  if (baseUrl.includes("azure")) return "azure";
383
- if (baseUrl.includes("openrouter.ai")) return "openrouter";
384
486
  return "openai";
385
487
  }
386
488
  async getClient() {
@@ -480,259 +582,12 @@ var OpenAIAdapter = class _OpenAIAdapter {
480
582
  rawResponse: response
481
583
  };
482
584
  }
483
- /**
484
- * OpenAI reasoning models on OpenRouter (o1/o3/o4/gpt-5 family) hide their
485
- * reasoning content on the chat-completions endpoint. To surface reasoning
486
- * SUMMARIES (not raw CoT, which OpenAI never exposes) we have to use the
487
- * Responses API, which streams `response.reasoning_summary_text.delta` events.
488
- *
489
- * Match by prefix on the OpenRouter model id. Excludes openai/gpt-4o,
490
- * openai/gpt-4.1, openai/chatgpt-* — those continue on chat-completions.
491
- */
492
- isOpenAIReasoningModelOnOpenRouter(activeModel) {
493
- if (this.provider !== "openrouter") return false;
494
- return activeModel.startsWith("openai/o1") || activeModel.startsWith("openai/o3") || activeModel.startsWith("openai/o4") || activeModel.startsWith("openai/gpt-5");
495
- }
496
- /**
497
- * Convert ActionDefinition[] (the chat-completions tool shape used by the
498
- * adapter) to the Responses API tool shape.
499
- */
500
- buildResponsesToolsFromActions(actions) {
501
- if (!actions || actions.length === 0) return void 0;
502
- const formatted = formatTools(actions);
503
- return formatted.map((t) => ({
504
- type: "function",
505
- name: t.function.name,
506
- description: t.function.description,
507
- parameters: t.function.parameters
508
- }));
509
- }
510
- /**
511
- * Streaming Responses API path for OpenAI reasoning models on OpenRouter.
512
- *
513
- * Maps Responses API SSE events back to the same StreamEvent shapes the
514
- * chat-completions path emits, so downstream consumers (processChunk.ts,
515
- * frontend tool handlers, plan approval, specialist delegations) see
516
- * identical events regardless of which path produced them.
517
- *
518
- * response.reasoning_summary_text.delta → thinking:start (once) + thinking:delta
519
- * response.output_text.delta → message:delta
520
- * response.output_item.added (function_call) → action:start (queued buffer)
521
- * response.function_call_arguments.delta → action:args (progressive)
522
- * response.output_item.done (function_call) → final action:args + action:end
523
- * response.completed → message:end + done(usage)
524
- * response.error → error
525
- */
526
- async *streamWithResponsesAPI(request, activeModel, messageId) {
527
- const client = await this.getClient();
528
- const maxTokensValue = request.config?.maxTokens ?? this.config.maxTokens;
529
- const payload = {
530
- model: activeModel,
531
- input: this.buildResponsesInput(request),
532
- stream: true,
533
- reasoning: {
534
- effort: request.config?.reasoningEffort ?? "medium",
535
- summary: "auto"
536
- }
537
- };
538
- if (request.systemPrompt) payload.instructions = request.systemPrompt;
539
- if (typeof maxTokensValue === "number")
540
- payload.max_output_tokens = maxTokensValue;
541
- const tools = this.buildResponsesToolsFromActions(request.actions);
542
- if (tools && tools.length > 0) payload.tools = tools;
543
- logProviderPayload(
544
- "openai",
545
- "responses-api request payload",
546
- payload,
547
- request.debug
548
- );
549
- let stream;
550
- try {
551
- stream = await client.responses.create(payload);
552
- } catch (error) {
553
- yield {
554
- type: "error",
555
- message: error instanceof Error ? error.message : "Unknown error",
556
- code: "OPENAI_RESPONSES_ERROR"
557
- };
558
- return;
559
- }
560
- const toolBuffers = /* @__PURE__ */ new Map();
561
- const itemIdToCallId = /* @__PURE__ */ new Map();
562
- let usage;
563
- let reasoningStarted = false;
564
- let textStarted = false;
565
- let finishEmitted = false;
566
- const resolveCallId = (evt) => {
567
- if (evt?.call_id) return evt.call_id;
568
- if (evt?.item_id) return itemIdToCallId.get(evt.item_id) ?? evt.item_id;
569
- if (evt?.item?.call_id) return evt.item.call_id;
570
- if (evt?.item?.id) return evt.item.id;
571
- return "";
572
- };
573
- try {
574
- for await (const evt of stream) {
575
- logProviderPayload(
576
- "openai",
577
- "responses-api stream chunk",
578
- evt,
579
- request.debug
580
- );
581
- if (request.signal?.aborted) break;
582
- const t = evt?.type ?? "";
583
- if (t === "response.reasoning_summary_text.delta") {
584
- const delta = evt.delta ?? "";
585
- if (!delta) continue;
586
- if (!reasoningStarted) {
587
- yield { type: "thinking:start" };
588
- reasoningStarted = true;
589
- }
590
- yield { type: "thinking:delta", content: delta };
591
- continue;
592
- }
593
- if (t === "response.reasoning_summary_text.done" || t === "response.reasoning.done") {
594
- continue;
595
- }
596
- if (t === "response.output_text.delta") {
597
- const text = evt.delta ?? "";
598
- if (!text) continue;
599
- if (reasoningStarted && !textStarted) {
600
- yield { type: "thinking:end" };
601
- textStarted = true;
602
- }
603
- yield { type: "message:delta", content: text };
604
- continue;
605
- }
606
- if (t === "response.output_item.added") {
607
- const item = evt.item;
608
- if (item?.type === "function_call") {
609
- const callId = item.call_id ?? item.id ?? "";
610
- const itemId = item.id ?? callId;
611
- if (callId) {
612
- if (itemId && itemId !== callId) {
613
- itemIdToCallId.set(itemId, callId);
614
- }
615
- if (!toolBuffers.has(callId)) {
616
- toolBuffers.set(callId, {
617
- id: callId,
618
- name: item.name ?? "",
619
- arguments: item.arguments ?? "",
620
- emittedStart: false
621
- });
622
- }
623
- const buf = toolBuffers.get(callId);
624
- if (buf.name && !buf.emittedStart) {
625
- yield { type: "action:start", id: buf.id, name: buf.name };
626
- buf.emittedStart = true;
627
- }
628
- }
629
- }
630
- continue;
631
- }
632
- if (t === "response.function_call_arguments.delta") {
633
- const callId = resolveCallId(evt);
634
- const delta = evt.delta ?? "";
635
- if (!callId || !delta) continue;
636
- let buf = toolBuffers.get(callId);
637
- if (!buf) {
638
- buf = { id: callId, name: "", arguments: "", emittedStart: false };
639
- toolBuffers.set(callId, buf);
640
- }
641
- buf.arguments += delta;
642
- if (buf.emittedStart) {
643
- yield {
644
- type: "action:args",
645
- id: buf.id,
646
- args: buf.arguments
647
- };
648
- }
649
- continue;
650
- }
651
- if (t === "response.output_item.done") {
652
- const item = evt.item;
653
- if (item?.type === "function_call") {
654
- const callId = item.call_id ?? item.id ?? "";
655
- const buf = toolBuffers.get(callId);
656
- const name = buf?.name || item.name || "";
657
- const argsStr = buf?.arguments || item.arguments || "{}";
658
- if (callId && name) {
659
- if (!buf?.emittedStart) {
660
- yield { type: "action:start", id: callId, name };
661
- }
662
- yield {
663
- type: "action:args",
664
- id: callId,
665
- args: argsStr
666
- };
667
- yield {
668
- type: "action:end",
669
- id: callId,
670
- name
671
- };
672
- }
673
- toolBuffers.delete(callId);
674
- }
675
- continue;
676
- }
677
- if (t === "response.completed") {
678
- const u = evt.response?.usage;
679
- if (u) {
680
- usage = {
681
- prompt_tokens: u.input_tokens ?? 0,
682
- completion_tokens: u.output_tokens ?? 0,
683
- total_tokens: u.total_tokens ?? (u.input_tokens ?? 0) + (u.output_tokens ?? 0)
684
- };
685
- }
686
- for (const buf of toolBuffers.values()) {
687
- if (!buf.id || !buf.name) continue;
688
- if (!buf.emittedStart) {
689
- yield { type: "action:start", id: buf.id, name: buf.name };
690
- }
691
- yield {
692
- type: "action:args",
693
- id: buf.id,
694
- args: buf.arguments || "{}"
695
- };
696
- yield { type: "action:end", id: buf.id, name: buf.name };
697
- }
698
- toolBuffers.clear();
699
- if (reasoningStarted && !textStarted) {
700
- yield { type: "thinking:end" };
701
- }
702
- yield { type: "message:end" };
703
- yield { type: "done", usage };
704
- finishEmitted = true;
705
- continue;
706
- }
707
- if (t === "response.error" || t === "error") {
708
- const msg = evt.error?.message || evt.message || "Responses API error";
709
- yield {
710
- type: "error",
711
- message: msg,
712
- code: "OPENAI_RESPONSES_ERROR"
713
- };
714
- return;
715
- }
716
- }
717
- } catch (error) {
718
- yield {
719
- type: "error",
720
- message: error instanceof Error ? error.message : "Unknown error",
721
- code: "OPENAI_RESPONSES_ERROR"
722
- };
723
- return;
724
- }
725
- if (!finishEmitted) {
726
- if (reasoningStarted && !textStarted) {
727
- yield { type: "thinking:end" };
728
- }
729
- yield { type: "message:end" };
730
- yield { type: "done", usage };
731
- }
732
- }
733
585
  async completeWithResponses(request) {
734
586
  const client = await this.getClient();
735
587
  const openaiToolOptions = request.providerToolOptions?.openai;
588
+ const responsesTextFormat = toOpenAIResponsesTextFormat(
589
+ request.config?.responseFormat
590
+ );
736
591
  const payload = {
737
592
  model: request.config?.model || this.model,
738
593
  instructions: request.systemPrompt,
@@ -742,6 +597,7 @@ var OpenAIAdapter = class _OpenAIAdapter {
742
597
  parallel_tool_calls: openaiToolOptions?.parallelToolCalls,
743
598
  temperature: request.config?.temperature ?? this.config.temperature,
744
599
  max_output_tokens: request.config?.maxTokens ?? this.config.maxTokens,
600
+ ...responsesTextFormat ? { text: { format: responsesTextFormat } } : {},
745
601
  stream: false
746
602
  };
747
603
  logProviderPayload("openai", "request payload", payload, request.debug);
@@ -863,37 +719,21 @@ var OpenAIAdapter = class _OpenAIAdapter {
863
719
  name: openaiToolOptions.toolChoice.name
864
720
  }
865
721
  } : openaiToolOptions?.toolChoice;
866
- const isOpenRouter = this.provider === "openrouter";
867
- const activeModel = request.config?.model || this.model;
868
- const modelSlug = activeModel.replace("openai/", "");
869
- const isOSeries = /^o[1-9]/.test(modelSlug);
870
- const isOpenAIOnOpenRouter = isOpenRouter && activeModel.startsWith("openai/");
871
- if (!this.config.disableThinking && this.isOpenAIReasoningModelOnOpenRouter(activeModel)) {
872
- yield* this.streamWithResponsesAPI(request, activeModel, messageId);
873
- return;
874
- }
875
- const maxTokensValue = request.config?.maxTokens ?? this.config.maxTokens;
722
+ const modelIdForPayload = request.config?.model || this.model;
876
723
  const payload = {
877
- model: activeModel,
724
+ model: modelIdForPayload,
878
725
  messages,
879
726
  tools: tools.length > 0 ? tools : void 0,
880
727
  tool_choice: tools.length > 0 ? toolChoice : void 0,
881
728
  parallel_tool_calls: tools.length > 0 ? openaiToolOptions?.parallelToolCalls : void 0,
729
+ ...buildOpenAITokenParams(
730
+ modelIdForPayload,
731
+ request.config?.maxTokens ?? this.config.maxTokens,
732
+ request.config?.temperature ?? this.config.temperature
733
+ ),
734
+ response_format: toOpenAIResponseFormat(request.config?.responseFormat),
882
735
  stream: true,
883
- stream_options: { include_usage: true },
884
- // o-series: use max_completion_tokens + reasoning_effort, no temperature
885
- // regular models: use max_tokens + temperature
886
- ...isOSeries ? {
887
- max_completion_tokens: maxTokensValue,
888
- reasoning_effort: request.config?.reasoningEffort ?? "medium"
889
- } : {
890
- temperature: request.config?.temperature ?? this.config.temperature,
891
- max_tokens: maxTokensValue
892
- },
893
- // Non-OpenAI OpenRouter models support OR's reasoning/include_reasoning params.
894
- // When disableThinking=true we must explicitly send include_reasoning:false because
895
- // models like Qwen3 and DeepSeek-R1 reason by default even without the reasoning param.
896
- ...isOpenRouter && !isOpenAIOnOpenRouter ? this.config.disableThinking ? { include_reasoning: false } : { reasoning: { max_tokens: 8e3 }, include_reasoning: true } : {}
736
+ stream_options: { include_usage: true }
897
737
  };
898
738
  logProviderPayload("openai", "request payload", payload, request.debug);
899
739
  const stream = await client.chat.completions.create(payload);
@@ -901,7 +741,6 @@ var OpenAIAdapter = class _OpenAIAdapter {
901
741
  const collectedCitations = [];
902
742
  let citationIndex = 0;
903
743
  let usage;
904
- let adapterReasoningStarted = false;
905
744
  for await (const chunk of stream) {
906
745
  logProviderPayload("openai", "stream chunk", chunk, request.debug);
907
746
  if (request.signal?.aborted) {
@@ -912,22 +751,6 @@ var OpenAIAdapter = class _OpenAIAdapter {
912
751
  if (delta?.content) {
913
752
  yield { type: "message:delta", content: delta.content };
914
753
  }
915
- if (isOpenRouter) {
916
- const rc = delta?.reasoning_content ?? delta?.reasoning ?? null;
917
- if (rc) {
918
- const rcText = typeof rc === "string" ? rc : Array.isArray(rc) && rc[0]?.text ? rc[0].text : "";
919
- if (rcText) {
920
- if (!adapterReasoningStarted) {
921
- yield { type: "thinking:start" };
922
- adapterReasoningStarted = true;
923
- }
924
- yield { type: "thinking:delta", content: rcText };
925
- }
926
- } else if (adapterReasoningStarted && (delta?.content || choice?.finish_reason)) {
927
- yield { type: "thinking:end" };
928
- adapterReasoningStarted = false;
929
- }
930
- }
931
754
  const annotations = delta?.annotations;
932
755
  if (annotations && annotations.length > 0) {
933
756
  for (const annotation of annotations) {
@@ -975,11 +798,6 @@ var OpenAIAdapter = class _OpenAIAdapter {
975
798
  };
976
799
  } else if (currentToolCall && toolCall.function?.arguments) {
977
800
  currentToolCall.arguments += toolCall.function.arguments;
978
- yield {
979
- type: "action:args",
980
- id: currentToolCall.id,
981
- args: currentToolCall.arguments
982
- };
983
801
  }
984
802
  }
985
803
  }
@@ -1055,24 +873,20 @@ var OpenAIAdapter = class _OpenAIAdapter {
1055
873
  name: openaiToolOptions.toolChoice.name
1056
874
  }
1057
875
  } : openaiToolOptions?.toolChoice;
1058
- const activeModel2 = request.config?.model || this.model;
1059
- const modelSlug2 = activeModel2.replace("openai/", "");
1060
- const isOSeries2 = /^o[1-9]/.test(modelSlug2);
1061
- const maxTokensValue2 = request.config?.maxTokens ?? this.config.maxTokens;
876
+ const modelIdForCompletePayload = request.config?.model || this.model;
1062
877
  const payload = {
1063
- model: activeModel2,
878
+ model: modelIdForCompletePayload,
1064
879
  messages,
1065
880
  tools: tools.length > 0 ? tools : void 0,
1066
881
  tool_choice: tools.length > 0 ? toolChoice : void 0,
1067
882
  parallel_tool_calls: tools.length > 0 ? openaiToolOptions?.parallelToolCalls : void 0,
1068
- stream: false,
1069
- ...isOSeries2 ? {
1070
- max_completion_tokens: maxTokensValue2,
1071
- reasoning_effort: request.config?.reasoningEffort ?? "medium"
1072
- } : {
1073
- temperature: request.config?.temperature ?? this.config.temperature,
1074
- max_tokens: maxTokensValue2
1075
- }
883
+ ...buildOpenAITokenParams(
884
+ modelIdForCompletePayload,
885
+ request.config?.maxTokens ?? this.config.maxTokens,
886
+ request.config?.temperature ?? this.config.temperature
887
+ ),
888
+ response_format: toOpenAIResponseFormat(request.config?.responseFormat),
889
+ stream: false
1076
890
  };
1077
891
  logProviderPayload("openai", "request payload", payload, request.debug);
1078
892
  const response = await client.chat.completions.create(payload);
@@ -1347,7 +1161,9 @@ var AnthropicAdapter = class {
1347
1161
  * Build common request options for both streaming and non-streaming
1348
1162
  */
1349
1163
  buildRequestOptions(request) {
1350
- const systemMessage = request.systemPrompt || "";
1164
+ const responseFormat = request.config?.responseFormat;
1165
+ const jsonObjectSuffix = responseFormat?.type === "json_object" ? "\n\nRespond with a single JSON object and no other text." : "";
1166
+ const systemMessage = (request.systemPrompt || "") + jsonObjectSuffix;
1351
1167
  let messages;
1352
1168
  if (request.rawMessages && request.rawMessages.length > 0) {
1353
1169
  messages = this.convertToAnthropicMessages(request.rawMessages);
@@ -1428,6 +1244,10 @@ var AnthropicAdapter = class {
1428
1244
  if (serverToolConfiguration) {
1429
1245
  options.server_tool_configuration = serverToolConfiguration;
1430
1246
  }
1247
+ const outputConfig = toAnthropicOutputConfig(responseFormat);
1248
+ if (outputConfig) {
1249
+ options.output_config = outputConfig;
1250
+ }
1431
1251
  if (this.config.thinking?.type === "enabled") {
1432
1252
  options.thinking = {
1433
1253
  type: "enabled",
@@ -1582,13 +1402,6 @@ var AnthropicAdapter = class {
1582
1402
  yield { type: "thinking:delta", content: event.delta.thinking };
1583
1403
  } else if (event.delta.type === "input_json_delta" && currentToolUse) {
1584
1404
  currentToolUse.input += event.delta.partial_json;
1585
- if (currentToolUse.name !== "web_search") {
1586
- yield {
1587
- type: "action:args",
1588
- id: currentToolUse.id,
1589
- args: currentToolUse.input
1590
- };
1591
- }
1592
1405
  }
1593
1406
  break;
1594
1407
  case "content_block_stop":
@@ -1795,12 +1608,14 @@ var OllamaAdapter = class {
1795
1608
  if (this.config.options) {
1796
1609
  Object.assign(ollamaOptions, this.config.options);
1797
1610
  }
1611
+ const ollamaFormat = toOllamaFormat(request.config?.responseFormat);
1798
1612
  const payload = {
1799
1613
  model: request.config?.model || this.model,
1800
1614
  messages,
1801
1615
  tools,
1802
1616
  stream: true,
1803
- options: ollamaOptions
1617
+ options: ollamaOptions,
1618
+ ...ollamaFormat !== void 0 ? { format: ollamaFormat } : {}
1804
1619
  };
1805
1620
  logProviderPayload("ollama", "request payload", payload, request.debug);
1806
1621
  const response = await fetch(`${this.baseUrl}/api/chat`, {
@@ -2091,6 +1906,12 @@ var GoogleAdapter = class {
2091
1906
  }
2092
1907
  const messageId = generateMessageId();
2093
1908
  yield { type: "message:start", id: messageId };
1909
+ const responseFormat = request.config?.responseFormat;
1910
+ const geminiSchema = toGeminiSchema(responseFormat);
1911
+ const responseFormatGenConfig = responseFormat ? {
1912
+ responseMimeType: "application/json",
1913
+ ...geminiSchema ? { responseJsonSchema: geminiSchema } : {}
1914
+ } : {};
2094
1915
  try {
2095
1916
  logProviderPayload(
2096
1917
  "google",
@@ -2102,7 +1923,8 @@ var GoogleAdapter = class {
2102
1923
  tools: toolsArray.length > 0 ? toolsArray : void 0,
2103
1924
  generationConfig: {
2104
1925
  temperature: request.config?.temperature ?? this.config.temperature,
2105
- maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens
1926
+ maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens,
1927
+ ...responseFormatGenConfig
2106
1928
  },
2107
1929
  messageParts: mergedContents[mergedContents.length - 1]?.parts
2108
1930
  },
@@ -2115,7 +1937,8 @@ var GoogleAdapter = class {
2115
1937
  tools: toolsArray.length > 0 ? toolsArray : void 0,
2116
1938
  generationConfig: {
2117
1939
  temperature: request.config?.temperature ?? this.config.temperature,
2118
- maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens
1940
+ maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens,
1941
+ ...responseFormatGenConfig
2119
1942
  }
2120
1943
  });
2121
1944
  const lastMessage = mergedContents[mergedContents.length - 1];
@@ -2282,6 +2105,12 @@ var GoogleAdapter = class {
2282
2105
  }
2283
2106
  }
2284
2107
  const tools = formatToolsForGemini(request.actions);
2108
+ const responseFormat = request.config?.responseFormat;
2109
+ const geminiSchema = toGeminiSchema(responseFormat);
2110
+ const responseFormatGenConfig = responseFormat ? {
2111
+ responseMimeType: "application/json",
2112
+ ...geminiSchema ? { responseJsonSchema: geminiSchema } : {}
2113
+ } : {};
2285
2114
  const payload = {
2286
2115
  model: modelId,
2287
2116
  history: mergedContents.slice(0, -1),
@@ -2289,7 +2118,8 @@ var GoogleAdapter = class {
2289
2118
  tools: tools ? [tools] : void 0,
2290
2119
  generationConfig: {
2291
2120
  temperature: request.config?.temperature ?? this.config.temperature,
2292
- maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens
2121
+ maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens,
2122
+ ...responseFormatGenConfig
2293
2123
  },
2294
2124
  messageParts: mergedContents[mergedContents.length - 1]?.parts
2295
2125
  };
@@ -2300,7 +2130,8 @@ var GoogleAdapter = class {
2300
2130
  tools: tools ? [tools] : void 0,
2301
2131
  generationConfig: {
2302
2132
  temperature: request.config?.temperature ?? this.config.temperature,
2303
- maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens
2133
+ maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens,
2134
+ ...responseFormatGenConfig
2304
2135
  }
2305
2136
  });
2306
2137
  const lastMessage = mergedContents[mergedContents.length - 1];
@@ -2439,6 +2270,7 @@ var AzureAdapter = class {
2439
2270
  tools,
2440
2271
  temperature: request.config?.temperature ?? this.config.temperature,
2441
2272
  max_tokens: request.config?.maxTokens ?? this.config.maxTokens,
2273
+ response_format: toOpenAIResponseFormat(request.config?.responseFormat),
2442
2274
  stream: true
2443
2275
  };
2444
2276
  logProviderPayload("azure", "request payload", payload, request.debug);
@@ -2538,7 +2370,8 @@ var AzureAdapter = class {
2538
2370
  messages,
2539
2371
  tools,
2540
2372
  temperature: request.config?.temperature ?? this.config.temperature,
2541
- max_tokens: request.config?.maxTokens ?? this.config.maxTokens
2373
+ max_tokens: request.config?.maxTokens ?? this.config.maxTokens,
2374
+ response_format: toOpenAIResponseFormat(request.config?.responseFormat)
2542
2375
  };
2543
2376
  logProviderPayload("azure", "request payload", payload, request.debug);
2544
2377
  const response = await client.chat.completions.create(payload);