@yourgpt/llm-sdk 2.1.10-alpha.0 → 2.5.1-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/dist/adapters/index.d.mts +4 -38
  2. package/dist/adapters/index.d.ts +4 -38
  3. package/dist/adapters/index.js +158 -325
  4. package/dist/adapters/index.mjs +158 -325
  5. package/dist/base-C58Dsr9p.d.ts +259 -0
  6. package/dist/base-tNgbBaSo.d.mts +259 -0
  7. package/dist/fallback/index.d.mts +4 -4
  8. package/dist/fallback/index.d.ts +4 -4
  9. package/dist/index.d.mts +8 -7
  10. package/dist/index.d.ts +8 -7
  11. package/dist/index.js +35 -43
  12. package/dist/index.mjs +35 -43
  13. package/dist/providers/anthropic/index.d.mts +3 -3
  14. package/dist/providers/anthropic/index.d.ts +3 -3
  15. package/dist/providers/anthropic/index.js +271 -212
  16. package/dist/providers/anthropic/index.mjs +271 -212
  17. package/dist/providers/azure/index.d.mts +3 -3
  18. package/dist/providers/azure/index.d.ts +3 -3
  19. package/dist/providers/azure/index.js +49 -1
  20. package/dist/providers/azure/index.mjs +49 -1
  21. package/dist/providers/fireworks/index.d.mts +1 -1
  22. package/dist/providers/fireworks/index.d.ts +1 -1
  23. package/dist/providers/fireworks/index.js +56 -0
  24. package/dist/providers/fireworks/index.mjs +56 -0
  25. package/dist/providers/google/index.d.mts +3 -3
  26. package/dist/providers/google/index.d.ts +3 -3
  27. package/dist/providers/google/index.js +254 -510
  28. package/dist/providers/google/index.mjs +254 -510
  29. package/dist/providers/ollama/index.d.mts +4 -4
  30. package/dist/providers/ollama/index.d.ts +4 -4
  31. package/dist/providers/ollama/index.js +10 -2
  32. package/dist/providers/ollama/index.mjs +10 -2
  33. package/dist/providers/openai/index.d.mts +3 -3
  34. package/dist/providers/openai/index.d.ts +3 -3
  35. package/dist/providers/openai/index.js +269 -529
  36. package/dist/providers/openai/index.mjs +269 -529
  37. package/dist/providers/openrouter/index.d.mts +3 -7
  38. package/dist/providers/openrouter/index.d.ts +3 -7
  39. package/dist/providers/openrouter/index.js +365 -902
  40. package/dist/providers/openrouter/index.mjs +365 -902
  41. package/dist/providers/togetherai/index.d.mts +3 -3
  42. package/dist/providers/togetherai/index.d.ts +3 -3
  43. package/dist/providers/togetherai/index.js +259 -509
  44. package/dist/providers/togetherai/index.mjs +259 -509
  45. package/dist/providers/xai/index.d.mts +3 -3
  46. package/dist/providers/xai/index.d.ts +3 -3
  47. package/dist/providers/xai/index.js +258 -513
  48. package/dist/providers/xai/index.mjs +258 -513
  49. package/dist/{types-BNCmlJMs.d.mts → types-B6dhnguR.d.mts} +1 -1
  50. package/dist/{types-DhktekQ3.d.ts → types-BQ31QIsA.d.ts} +2 -1
  51. package/dist/{types-CMMQ8s2O.d.mts → types-BSSiJW2o.d.mts} +2 -1
  52. package/dist/{base-DN1EfKnE.d.mts → types-BkQCSiIt.d.mts} +388 -214
  53. package/dist/{base-DuUNxtVg.d.ts → types-BkQCSiIt.d.ts} +388 -214
  54. package/dist/{types-Pj-vpmoT.d.ts → types-CCxPmkmK.d.ts} +1 -1
  55. package/dist/yourgpt/index.d.mts +1 -1
  56. package/dist/yourgpt/index.d.ts +1 -1
  57. package/package.json +1 -1
  58. package/dist/types-CMvvDo-E.d.mts +0 -428
  59. package/dist/types-CMvvDo-E.d.ts +0 -428
@@ -114,6 +114,109 @@ function normalizeObjectJsonSchema(schema) {
114
114
  }
115
115
  return normalized;
116
116
  }
117
+ function isOpenAIReasoningModel(modelId) {
118
+ if (!modelId) return false;
119
+ return /^(o1|o3|o4|gpt-5)/i.test(modelId);
120
+ }
121
+ function buildOpenAITokenParams(modelId, maxTokens, temperature) {
122
+ if (isOpenAIReasoningModel(modelId)) {
123
+ return { max_completion_tokens: maxTokens };
124
+ }
125
+ return { max_tokens: maxTokens, temperature };
126
+ }
127
+ function stripSchemaKeys(schema, keysToDrop, options = {}) {
128
+ if (Array.isArray(schema)) {
129
+ return schema.map((item) => stripSchemaKeys(item, keysToDrop, options));
130
+ }
131
+ if (!schema || typeof schema !== "object") return schema;
132
+ const out = {};
133
+ for (const [key, value] of Object.entries(
134
+ schema
135
+ )) {
136
+ if (keysToDrop.has(key)) continue;
137
+ const renamed = options.renameKeys?.[key] ?? key;
138
+ out[renamed] = stripSchemaKeys(value, keysToDrop, options);
139
+ }
140
+ if (options.forceAdditionalPropertiesFalse && out.type === "object") {
141
+ out.additionalProperties = false;
142
+ }
143
+ return out;
144
+ }
145
+ function toOpenAIResponseFormat(rf) {
146
+ if (!rf) return void 0;
147
+ if (rf.type === "json_object") return { type: "json_object" };
148
+ return {
149
+ type: "json_schema",
150
+ json_schema: {
151
+ name: rf.json_schema.name,
152
+ schema: normalizeObjectJsonSchema(rf.json_schema.schema),
153
+ strict: rf.json_schema.strict ?? true
154
+ }
155
+ };
156
+ }
157
+ function toOpenAIResponsesTextFormat(rf) {
158
+ if (!rf || rf.type !== "json_schema") return void 0;
159
+ return {
160
+ type: "json_schema",
161
+ name: rf.json_schema.name,
162
+ schema: normalizeObjectJsonSchema(rf.json_schema.schema),
163
+ strict: rf.json_schema.strict ?? true
164
+ };
165
+ }
166
+ var ANTHROPIC_UNSUPPORTED_KEYS = /* @__PURE__ */ new Set([
167
+ "minimum",
168
+ "maximum",
169
+ "exclusiveMinimum",
170
+ "exclusiveMaximum",
171
+ "multipleOf",
172
+ "minLength",
173
+ "maxLength",
174
+ "minItems",
175
+ "maxItems",
176
+ "minProperties",
177
+ "maxProperties",
178
+ "pattern",
179
+ "$schema"
180
+ ]);
181
+ function toAnthropicOutputConfig(rf) {
182
+ if (!rf || rf.type !== "json_schema") return void 0;
183
+ const schema = stripSchemaKeys(
184
+ rf.json_schema.schema,
185
+ ANTHROPIC_UNSUPPORTED_KEYS,
186
+ {
187
+ forceAdditionalPropertiesFalse: true,
188
+ renameKeys: { oneOf: "anyOf" }
189
+ }
190
+ );
191
+ return {
192
+ format: {
193
+ type: "json_schema",
194
+ schema
195
+ }
196
+ };
197
+ }
198
+ var GEMINI_UNSUPPORTED_KEYS = /* @__PURE__ */ new Set([
199
+ "oneOf",
200
+ "anyOf",
201
+ "$ref",
202
+ "$defs",
203
+ "definitions",
204
+ "pattern",
205
+ "$schema",
206
+ "additionalProperties"
207
+ ]);
208
+ function toGeminiSchema(rf) {
209
+ if (!rf || rf.type !== "json_schema") return void 0;
210
+ return stripSchemaKeys(
211
+ rf.json_schema.schema,
212
+ GEMINI_UNSUPPORTED_KEYS
213
+ );
214
+ }
215
+ function toOllamaFormat(rf) {
216
+ if (!rf) return void 0;
217
+ if (rf.type === "json_object") return "json";
218
+ return rf.json_schema.schema;
219
+ }
117
220
  function formatTools(actions) {
118
221
  return actions.map((action) => ({
119
222
  type: "function",
@@ -382,7 +485,6 @@ var OpenAIAdapter = class _OpenAIAdapter {
382
485
  if (baseUrl.includes("generativelanguage.googleapis.com")) return "google";
383
486
  if (baseUrl.includes("x.ai")) return "xai";
384
487
  if (baseUrl.includes("azure")) return "azure";
385
- if (baseUrl.includes("openrouter.ai")) return "openrouter";
386
488
  return "openai";
387
489
  }
388
490
  async getClient() {
@@ -482,259 +584,12 @@ var OpenAIAdapter = class _OpenAIAdapter {
482
584
  rawResponse: response
483
585
  };
484
586
  }
485
- /**
486
- * OpenAI reasoning models on OpenRouter (o1/o3/o4/gpt-5 family) hide their
487
- * reasoning content on the chat-completions endpoint. To surface reasoning
488
- * SUMMARIES (not raw CoT, which OpenAI never exposes) we have to use the
489
- * Responses API, which streams `response.reasoning_summary_text.delta` events.
490
- *
491
- * Match by prefix on the OpenRouter model id. Excludes openai/gpt-4o,
492
- * openai/gpt-4.1, openai/chatgpt-* — those continue on chat-completions.
493
- */
494
- isOpenAIReasoningModelOnOpenRouter(activeModel) {
495
- if (this.provider !== "openrouter") return false;
496
- return activeModel.startsWith("openai/o1") || activeModel.startsWith("openai/o3") || activeModel.startsWith("openai/o4") || activeModel.startsWith("openai/gpt-5");
497
- }
498
- /**
499
- * Convert ActionDefinition[] (the chat-completions tool shape used by the
500
- * adapter) to the Responses API tool shape.
501
- */
502
- buildResponsesToolsFromActions(actions) {
503
- if (!actions || actions.length === 0) return void 0;
504
- const formatted = formatTools(actions);
505
- return formatted.map((t) => ({
506
- type: "function",
507
- name: t.function.name,
508
- description: t.function.description,
509
- parameters: t.function.parameters
510
- }));
511
- }
512
- /**
513
- * Streaming Responses API path for OpenAI reasoning models on OpenRouter.
514
- *
515
- * Maps Responses API SSE events back to the same StreamEvent shapes the
516
- * chat-completions path emits, so downstream consumers (processChunk.ts,
517
- * frontend tool handlers, plan approval, specialist delegations) see
518
- * identical events regardless of which path produced them.
519
- *
520
- * response.reasoning_summary_text.delta → thinking:start (once) + thinking:delta
521
- * response.output_text.delta → message:delta
522
- * response.output_item.added (function_call) → action:start (queued buffer)
523
- * response.function_call_arguments.delta → action:args (progressive)
524
- * response.output_item.done (function_call) → final action:args + action:end
525
- * response.completed → message:end + done(usage)
526
- * response.error → error
527
- */
528
- async *streamWithResponsesAPI(request, activeModel, messageId) {
529
- const client = await this.getClient();
530
- const maxTokensValue = request.config?.maxTokens ?? this.config.maxTokens;
531
- const payload = {
532
- model: activeModel,
533
- input: this.buildResponsesInput(request),
534
- stream: true,
535
- reasoning: {
536
- effort: request.config?.reasoningEffort ?? "medium",
537
- summary: "auto"
538
- }
539
- };
540
- if (request.systemPrompt) payload.instructions = request.systemPrompt;
541
- if (typeof maxTokensValue === "number")
542
- payload.max_output_tokens = maxTokensValue;
543
- const tools = this.buildResponsesToolsFromActions(request.actions);
544
- if (tools && tools.length > 0) payload.tools = tools;
545
- logProviderPayload(
546
- "openai",
547
- "responses-api request payload",
548
- payload,
549
- request.debug
550
- );
551
- let stream;
552
- try {
553
- stream = await client.responses.create(payload);
554
- } catch (error) {
555
- yield {
556
- type: "error",
557
- message: error instanceof Error ? error.message : "Unknown error",
558
- code: "OPENAI_RESPONSES_ERROR"
559
- };
560
- return;
561
- }
562
- const toolBuffers = /* @__PURE__ */ new Map();
563
- const itemIdToCallId = /* @__PURE__ */ new Map();
564
- let usage;
565
- let reasoningStarted = false;
566
- let textStarted = false;
567
- let finishEmitted = false;
568
- const resolveCallId = (evt) => {
569
- if (evt?.call_id) return evt.call_id;
570
- if (evt?.item_id) return itemIdToCallId.get(evt.item_id) ?? evt.item_id;
571
- if (evt?.item?.call_id) return evt.item.call_id;
572
- if (evt?.item?.id) return evt.item.id;
573
- return "";
574
- };
575
- try {
576
- for await (const evt of stream) {
577
- logProviderPayload(
578
- "openai",
579
- "responses-api stream chunk",
580
- evt,
581
- request.debug
582
- );
583
- if (request.signal?.aborted) break;
584
- const t = evt?.type ?? "";
585
- if (t === "response.reasoning_summary_text.delta") {
586
- const delta = evt.delta ?? "";
587
- if (!delta) continue;
588
- if (!reasoningStarted) {
589
- yield { type: "thinking:start" };
590
- reasoningStarted = true;
591
- }
592
- yield { type: "thinking:delta", content: delta };
593
- continue;
594
- }
595
- if (t === "response.reasoning_summary_text.done" || t === "response.reasoning.done") {
596
- continue;
597
- }
598
- if (t === "response.output_text.delta") {
599
- const text = evt.delta ?? "";
600
- if (!text) continue;
601
- if (reasoningStarted && !textStarted) {
602
- yield { type: "thinking:end" };
603
- textStarted = true;
604
- }
605
- yield { type: "message:delta", content: text };
606
- continue;
607
- }
608
- if (t === "response.output_item.added") {
609
- const item = evt.item;
610
- if (item?.type === "function_call") {
611
- const callId = item.call_id ?? item.id ?? "";
612
- const itemId = item.id ?? callId;
613
- if (callId) {
614
- if (itemId && itemId !== callId) {
615
- itemIdToCallId.set(itemId, callId);
616
- }
617
- if (!toolBuffers.has(callId)) {
618
- toolBuffers.set(callId, {
619
- id: callId,
620
- name: item.name ?? "",
621
- arguments: item.arguments ?? "",
622
- emittedStart: false
623
- });
624
- }
625
- const buf = toolBuffers.get(callId);
626
- if (buf.name && !buf.emittedStart) {
627
- yield { type: "action:start", id: buf.id, name: buf.name };
628
- buf.emittedStart = true;
629
- }
630
- }
631
- }
632
- continue;
633
- }
634
- if (t === "response.function_call_arguments.delta") {
635
- const callId = resolveCallId(evt);
636
- const delta = evt.delta ?? "";
637
- if (!callId || !delta) continue;
638
- let buf = toolBuffers.get(callId);
639
- if (!buf) {
640
- buf = { id: callId, name: "", arguments: "", emittedStart: false };
641
- toolBuffers.set(callId, buf);
642
- }
643
- buf.arguments += delta;
644
- if (buf.emittedStart) {
645
- yield {
646
- type: "action:args",
647
- id: buf.id,
648
- args: buf.arguments
649
- };
650
- }
651
- continue;
652
- }
653
- if (t === "response.output_item.done") {
654
- const item = evt.item;
655
- if (item?.type === "function_call") {
656
- const callId = item.call_id ?? item.id ?? "";
657
- const buf = toolBuffers.get(callId);
658
- const name = buf?.name || item.name || "";
659
- const argsStr = buf?.arguments || item.arguments || "{}";
660
- if (callId && name) {
661
- if (!buf?.emittedStart) {
662
- yield { type: "action:start", id: callId, name };
663
- }
664
- yield {
665
- type: "action:args",
666
- id: callId,
667
- args: argsStr
668
- };
669
- yield {
670
- type: "action:end",
671
- id: callId,
672
- name
673
- };
674
- }
675
- toolBuffers.delete(callId);
676
- }
677
- continue;
678
- }
679
- if (t === "response.completed") {
680
- const u = evt.response?.usage;
681
- if (u) {
682
- usage = {
683
- prompt_tokens: u.input_tokens ?? 0,
684
- completion_tokens: u.output_tokens ?? 0,
685
- total_tokens: u.total_tokens ?? (u.input_tokens ?? 0) + (u.output_tokens ?? 0)
686
- };
687
- }
688
- for (const buf of toolBuffers.values()) {
689
- if (!buf.id || !buf.name) continue;
690
- if (!buf.emittedStart) {
691
- yield { type: "action:start", id: buf.id, name: buf.name };
692
- }
693
- yield {
694
- type: "action:args",
695
- id: buf.id,
696
- args: buf.arguments || "{}"
697
- };
698
- yield { type: "action:end", id: buf.id, name: buf.name };
699
- }
700
- toolBuffers.clear();
701
- if (reasoningStarted && !textStarted) {
702
- yield { type: "thinking:end" };
703
- }
704
- yield { type: "message:end" };
705
- yield { type: "done", usage };
706
- finishEmitted = true;
707
- continue;
708
- }
709
- if (t === "response.error" || t === "error") {
710
- const msg = evt.error?.message || evt.message || "Responses API error";
711
- yield {
712
- type: "error",
713
- message: msg,
714
- code: "OPENAI_RESPONSES_ERROR"
715
- };
716
- return;
717
- }
718
- }
719
- } catch (error) {
720
- yield {
721
- type: "error",
722
- message: error instanceof Error ? error.message : "Unknown error",
723
- code: "OPENAI_RESPONSES_ERROR"
724
- };
725
- return;
726
- }
727
- if (!finishEmitted) {
728
- if (reasoningStarted && !textStarted) {
729
- yield { type: "thinking:end" };
730
- }
731
- yield { type: "message:end" };
732
- yield { type: "done", usage };
733
- }
734
- }
735
587
  async completeWithResponses(request) {
736
588
  const client = await this.getClient();
737
589
  const openaiToolOptions = request.providerToolOptions?.openai;
590
+ const responsesTextFormat = toOpenAIResponsesTextFormat(
591
+ request.config?.responseFormat
592
+ );
738
593
  const payload = {
739
594
  model: request.config?.model || this.model,
740
595
  instructions: request.systemPrompt,
@@ -744,6 +599,7 @@ var OpenAIAdapter = class _OpenAIAdapter {
744
599
  parallel_tool_calls: openaiToolOptions?.parallelToolCalls,
745
600
  temperature: request.config?.temperature ?? this.config.temperature,
746
601
  max_output_tokens: request.config?.maxTokens ?? this.config.maxTokens,
602
+ ...responsesTextFormat ? { text: { format: responsesTextFormat } } : {},
747
603
  stream: false
748
604
  };
749
605
  logProviderPayload("openai", "request payload", payload, request.debug);
@@ -865,37 +721,21 @@ var OpenAIAdapter = class _OpenAIAdapter {
865
721
  name: openaiToolOptions.toolChoice.name
866
722
  }
867
723
  } : openaiToolOptions?.toolChoice;
868
- const isOpenRouter = this.provider === "openrouter";
869
- const activeModel = request.config?.model || this.model;
870
- const modelSlug = activeModel.replace("openai/", "");
871
- const isOSeries = /^o[1-9]/.test(modelSlug);
872
- const isOpenAIOnOpenRouter = isOpenRouter && activeModel.startsWith("openai/");
873
- if (!this.config.disableThinking && this.isOpenAIReasoningModelOnOpenRouter(activeModel)) {
874
- yield* this.streamWithResponsesAPI(request, activeModel, messageId);
875
- return;
876
- }
877
- const maxTokensValue = request.config?.maxTokens ?? this.config.maxTokens;
724
+ const modelIdForPayload = request.config?.model || this.model;
878
725
  const payload = {
879
- model: activeModel,
726
+ model: modelIdForPayload,
880
727
  messages,
881
728
  tools: tools.length > 0 ? tools : void 0,
882
729
  tool_choice: tools.length > 0 ? toolChoice : void 0,
883
730
  parallel_tool_calls: tools.length > 0 ? openaiToolOptions?.parallelToolCalls : void 0,
731
+ ...buildOpenAITokenParams(
732
+ modelIdForPayload,
733
+ request.config?.maxTokens ?? this.config.maxTokens,
734
+ request.config?.temperature ?? this.config.temperature
735
+ ),
736
+ response_format: toOpenAIResponseFormat(request.config?.responseFormat),
884
737
  stream: true,
885
- stream_options: { include_usage: true },
886
- // o-series: use max_completion_tokens + reasoning_effort, no temperature
887
- // regular models: use max_tokens + temperature
888
- ...isOSeries ? {
889
- max_completion_tokens: maxTokensValue,
890
- reasoning_effort: request.config?.reasoningEffort ?? "medium"
891
- } : {
892
- temperature: request.config?.temperature ?? this.config.temperature,
893
- max_tokens: maxTokensValue
894
- },
895
- // Non-OpenAI OpenRouter models support OR's reasoning/include_reasoning params.
896
- // When disableThinking=true we must explicitly send include_reasoning:false because
897
- // models like Qwen3 and DeepSeek-R1 reason by default even without the reasoning param.
898
- ...isOpenRouter && !isOpenAIOnOpenRouter ? this.config.disableThinking ? { include_reasoning: false } : { reasoning: { max_tokens: 8e3 }, include_reasoning: true } : {}
738
+ stream_options: { include_usage: true }
899
739
  };
900
740
  logProviderPayload("openai", "request payload", payload, request.debug);
901
741
  const stream = await client.chat.completions.create(payload);
@@ -903,7 +743,6 @@ var OpenAIAdapter = class _OpenAIAdapter {
903
743
  const collectedCitations = [];
904
744
  let citationIndex = 0;
905
745
  let usage;
906
- let adapterReasoningStarted = false;
907
746
  for await (const chunk of stream) {
908
747
  logProviderPayload("openai", "stream chunk", chunk, request.debug);
909
748
  if (request.signal?.aborted) {
@@ -914,22 +753,6 @@ var OpenAIAdapter = class _OpenAIAdapter {
914
753
  if (delta?.content) {
915
754
  yield { type: "message:delta", content: delta.content };
916
755
  }
917
- if (isOpenRouter) {
918
- const rc = delta?.reasoning_content ?? delta?.reasoning ?? null;
919
- if (rc) {
920
- const rcText = typeof rc === "string" ? rc : Array.isArray(rc) && rc[0]?.text ? rc[0].text : "";
921
- if (rcText) {
922
- if (!adapterReasoningStarted) {
923
- yield { type: "thinking:start" };
924
- adapterReasoningStarted = true;
925
- }
926
- yield { type: "thinking:delta", content: rcText };
927
- }
928
- } else if (adapterReasoningStarted && (delta?.content || choice?.finish_reason)) {
929
- yield { type: "thinking:end" };
930
- adapterReasoningStarted = false;
931
- }
932
- }
933
756
  const annotations = delta?.annotations;
934
757
  if (annotations && annotations.length > 0) {
935
758
  for (const annotation of annotations) {
@@ -977,11 +800,6 @@ var OpenAIAdapter = class _OpenAIAdapter {
977
800
  };
978
801
  } else if (currentToolCall && toolCall.function?.arguments) {
979
802
  currentToolCall.arguments += toolCall.function.arguments;
980
- yield {
981
- type: "action:args",
982
- id: currentToolCall.id,
983
- args: currentToolCall.arguments
984
- };
985
803
  }
986
804
  }
987
805
  }
@@ -1057,24 +875,20 @@ var OpenAIAdapter = class _OpenAIAdapter {
1057
875
  name: openaiToolOptions.toolChoice.name
1058
876
  }
1059
877
  } : openaiToolOptions?.toolChoice;
1060
- const activeModel2 = request.config?.model || this.model;
1061
- const modelSlug2 = activeModel2.replace("openai/", "");
1062
- const isOSeries2 = /^o[1-9]/.test(modelSlug2);
1063
- const maxTokensValue2 = request.config?.maxTokens ?? this.config.maxTokens;
878
+ const modelIdForCompletePayload = request.config?.model || this.model;
1064
879
  const payload = {
1065
- model: activeModel2,
880
+ model: modelIdForCompletePayload,
1066
881
  messages,
1067
882
  tools: tools.length > 0 ? tools : void 0,
1068
883
  tool_choice: tools.length > 0 ? toolChoice : void 0,
1069
884
  parallel_tool_calls: tools.length > 0 ? openaiToolOptions?.parallelToolCalls : void 0,
1070
- stream: false,
1071
- ...isOSeries2 ? {
1072
- max_completion_tokens: maxTokensValue2,
1073
- reasoning_effort: request.config?.reasoningEffort ?? "medium"
1074
- } : {
1075
- temperature: request.config?.temperature ?? this.config.temperature,
1076
- max_tokens: maxTokensValue2
1077
- }
885
+ ...buildOpenAITokenParams(
886
+ modelIdForCompletePayload,
887
+ request.config?.maxTokens ?? this.config.maxTokens,
888
+ request.config?.temperature ?? this.config.temperature
889
+ ),
890
+ response_format: toOpenAIResponseFormat(request.config?.responseFormat),
891
+ stream: false
1078
892
  };
1079
893
  logProviderPayload("openai", "request payload", payload, request.debug);
1080
894
  const response = await client.chat.completions.create(payload);
@@ -1349,7 +1163,9 @@ var AnthropicAdapter = class {
1349
1163
  * Build common request options for both streaming and non-streaming
1350
1164
  */
1351
1165
  buildRequestOptions(request) {
1352
- const systemMessage = request.systemPrompt || "";
1166
+ const responseFormat = request.config?.responseFormat;
1167
+ const jsonObjectSuffix = responseFormat?.type === "json_object" ? "\n\nRespond with a single JSON object and no other text." : "";
1168
+ const systemMessage = (request.systemPrompt || "") + jsonObjectSuffix;
1353
1169
  let messages;
1354
1170
  if (request.rawMessages && request.rawMessages.length > 0) {
1355
1171
  messages = this.convertToAnthropicMessages(request.rawMessages);
@@ -1430,6 +1246,10 @@ var AnthropicAdapter = class {
1430
1246
  if (serverToolConfiguration) {
1431
1247
  options.server_tool_configuration = serverToolConfiguration;
1432
1248
  }
1249
+ const outputConfig = toAnthropicOutputConfig(responseFormat);
1250
+ if (outputConfig) {
1251
+ options.output_config = outputConfig;
1252
+ }
1433
1253
  if (this.config.thinking?.type === "enabled") {
1434
1254
  options.thinking = {
1435
1255
  type: "enabled",
@@ -1584,13 +1404,6 @@ var AnthropicAdapter = class {
1584
1404
  yield { type: "thinking:delta", content: event.delta.thinking };
1585
1405
  } else if (event.delta.type === "input_json_delta" && currentToolUse) {
1586
1406
  currentToolUse.input += event.delta.partial_json;
1587
- if (currentToolUse.name !== "web_search") {
1588
- yield {
1589
- type: "action:args",
1590
- id: currentToolUse.id,
1591
- args: currentToolUse.input
1592
- };
1593
- }
1594
1407
  }
1595
1408
  break;
1596
1409
  case "content_block_stop":
@@ -1797,12 +1610,14 @@ var OllamaAdapter = class {
1797
1610
  if (this.config.options) {
1798
1611
  Object.assign(ollamaOptions, this.config.options);
1799
1612
  }
1613
+ const ollamaFormat = toOllamaFormat(request.config?.responseFormat);
1800
1614
  const payload = {
1801
1615
  model: request.config?.model || this.model,
1802
1616
  messages,
1803
1617
  tools,
1804
1618
  stream: true,
1805
- options: ollamaOptions
1619
+ options: ollamaOptions,
1620
+ ...ollamaFormat !== void 0 ? { format: ollamaFormat } : {}
1806
1621
  };
1807
1622
  logProviderPayload("ollama", "request payload", payload, request.debug);
1808
1623
  const response = await fetch(`${this.baseUrl}/api/chat`, {
@@ -2093,6 +1908,12 @@ var GoogleAdapter = class {
2093
1908
  }
2094
1909
  const messageId = generateMessageId();
2095
1910
  yield { type: "message:start", id: messageId };
1911
+ const responseFormat = request.config?.responseFormat;
1912
+ const geminiSchema = toGeminiSchema(responseFormat);
1913
+ const responseFormatGenConfig = responseFormat ? {
1914
+ responseMimeType: "application/json",
1915
+ ...geminiSchema ? { responseJsonSchema: geminiSchema } : {}
1916
+ } : {};
2096
1917
  try {
2097
1918
  logProviderPayload(
2098
1919
  "google",
@@ -2104,7 +1925,8 @@ var GoogleAdapter = class {
2104
1925
  tools: toolsArray.length > 0 ? toolsArray : void 0,
2105
1926
  generationConfig: {
2106
1927
  temperature: request.config?.temperature ?? this.config.temperature,
2107
- maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens
1928
+ maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens,
1929
+ ...responseFormatGenConfig
2108
1930
  },
2109
1931
  messageParts: mergedContents[mergedContents.length - 1]?.parts
2110
1932
  },
@@ -2117,7 +1939,8 @@ var GoogleAdapter = class {
2117
1939
  tools: toolsArray.length > 0 ? toolsArray : void 0,
2118
1940
  generationConfig: {
2119
1941
  temperature: request.config?.temperature ?? this.config.temperature,
2120
- maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens
1942
+ maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens,
1943
+ ...responseFormatGenConfig
2121
1944
  }
2122
1945
  });
2123
1946
  const lastMessage = mergedContents[mergedContents.length - 1];
@@ -2284,6 +2107,12 @@ var GoogleAdapter = class {
2284
2107
  }
2285
2108
  }
2286
2109
  const tools = formatToolsForGemini(request.actions);
2110
+ const responseFormat = request.config?.responseFormat;
2111
+ const geminiSchema = toGeminiSchema(responseFormat);
2112
+ const responseFormatGenConfig = responseFormat ? {
2113
+ responseMimeType: "application/json",
2114
+ ...geminiSchema ? { responseJsonSchema: geminiSchema } : {}
2115
+ } : {};
2287
2116
  const payload = {
2288
2117
  model: modelId,
2289
2118
  history: mergedContents.slice(0, -1),
@@ -2291,7 +2120,8 @@ var GoogleAdapter = class {
2291
2120
  tools: tools ? [tools] : void 0,
2292
2121
  generationConfig: {
2293
2122
  temperature: request.config?.temperature ?? this.config.temperature,
2294
- maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens
2123
+ maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens,
2124
+ ...responseFormatGenConfig
2295
2125
  },
2296
2126
  messageParts: mergedContents[mergedContents.length - 1]?.parts
2297
2127
  };
@@ -2302,7 +2132,8 @@ var GoogleAdapter = class {
2302
2132
  tools: tools ? [tools] : void 0,
2303
2133
  generationConfig: {
2304
2134
  temperature: request.config?.temperature ?? this.config.temperature,
2305
- maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens
2135
+ maxOutputTokens: request.config?.maxTokens ?? this.config.maxTokens,
2136
+ ...responseFormatGenConfig
2306
2137
  }
2307
2138
  });
2308
2139
  const lastMessage = mergedContents[mergedContents.length - 1];
@@ -2441,6 +2272,7 @@ var AzureAdapter = class {
2441
2272
  tools,
2442
2273
  temperature: request.config?.temperature ?? this.config.temperature,
2443
2274
  max_tokens: request.config?.maxTokens ?? this.config.maxTokens,
2275
+ response_format: toOpenAIResponseFormat(request.config?.responseFormat),
2444
2276
  stream: true
2445
2277
  };
2446
2278
  logProviderPayload("azure", "request payload", payload, request.debug);
@@ -2540,7 +2372,8 @@ var AzureAdapter = class {
2540
2372
  messages,
2541
2373
  tools,
2542
2374
  temperature: request.config?.temperature ?? this.config.temperature,
2543
- max_tokens: request.config?.maxTokens ?? this.config.maxTokens
2375
+ max_tokens: request.config?.maxTokens ?? this.config.maxTokens,
2376
+ response_format: toOpenAIResponseFormat(request.config?.responseFormat)
2544
2377
  };
2545
2378
  logProviderPayload("azure", "request payload", payload, request.debug);
2546
2379
  const response = await client.chat.completions.create(payload);