@yourgpt/llm-sdk 2.1.8 → 2.1.10-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/dist/adapters/index.d.mts +38 -4
  2. package/dist/adapters/index.d.ts +38 -4
  3. package/dist/adapters/index.js +318 -8
  4. package/dist/adapters/index.mjs +318 -8
  5. package/dist/{base-iGi9Va6Z.d.ts → base-DN1EfKnE.d.mts} +2 -1
  6. package/dist/{base-D-U61JaB.d.mts → base-DuUNxtVg.d.ts} +2 -1
  7. package/dist/fallback/index.d.mts +4 -4
  8. package/dist/fallback/index.d.ts +4 -4
  9. package/dist/index.d.mts +7 -7
  10. package/dist/index.d.ts +7 -7
  11. package/dist/index.js +43 -23
  12. package/dist/index.mjs +43 -23
  13. package/dist/providers/anthropic/index.d.mts +3 -3
  14. package/dist/providers/anthropic/index.d.ts +3 -3
  15. package/dist/providers/anthropic/index.js +17 -0
  16. package/dist/providers/anthropic/index.mjs +17 -0
  17. package/dist/providers/azure/index.d.mts +3 -3
  18. package/dist/providers/azure/index.d.ts +3 -3
  19. package/dist/providers/fireworks/index.d.mts +1 -1
  20. package/dist/providers/fireworks/index.d.ts +1 -1
  21. package/dist/providers/google/index.d.mts +3 -3
  22. package/dist/providers/google/index.d.ts +3 -3
  23. package/dist/providers/google/index.js +311 -8
  24. package/dist/providers/google/index.mjs +311 -8
  25. package/dist/providers/ollama/index.d.mts +4 -4
  26. package/dist/providers/ollama/index.d.ts +4 -4
  27. package/dist/providers/openai/index.d.mts +3 -3
  28. package/dist/providers/openai/index.d.ts +3 -3
  29. package/dist/providers/openai/index.js +321 -8
  30. package/dist/providers/openai/index.mjs +321 -8
  31. package/dist/providers/openrouter/index.d.mts +7 -3
  32. package/dist/providers/openrouter/index.d.ts +7 -3
  33. package/dist/providers/openrouter/index.js +601 -11
  34. package/dist/providers/openrouter/index.mjs +601 -11
  35. package/dist/providers/togetherai/index.d.mts +61 -2
  36. package/dist/providers/togetherai/index.d.ts +61 -2
  37. package/dist/providers/togetherai/index.js +1030 -2
  38. package/dist/providers/togetherai/index.mjs +1029 -2
  39. package/dist/providers/xai/index.d.mts +3 -3
  40. package/dist/providers/xai/index.d.ts +3 -3
  41. package/dist/providers/xai/index.js +311 -8
  42. package/dist/providers/xai/index.mjs +311 -8
  43. package/dist/{types-D4YfrQJR.d.mts → types-BNCmlJMs.d.mts} +1 -1
  44. package/dist/{types-DRqxMIjF.d.mts → types-CMMQ8s2O.d.mts} +1 -1
  45. package/dist/{types-CR8mi9I0.d.ts → types-CMvvDo-E.d.mts} +12 -1
  46. package/dist/{types-CR8mi9I0.d.mts → types-CMvvDo-E.d.ts} +12 -1
  47. package/dist/{types-BctsnC3g.d.ts → types-DhktekQ3.d.ts} +1 -1
  48. package/dist/{types-38yolWJn.d.ts → types-Pj-vpmoT.d.ts} +1 -1
  49. package/dist/yourgpt/index.d.mts +1 -1
  50. package/dist/yourgpt/index.d.ts +1 -1
  51. package/package.json +1 -1
@@ -163,8 +163,18 @@ function openai(modelId, options = {}) {
163
163
  name: tc.function?.name ?? "",
164
164
  arguments: tc.function?.arguments ?? ""
165
165
  };
166
+ yield {
167
+ type: "tool-call-start",
168
+ toolCallId: tc.id,
169
+ toolName: tc.function?.name ?? ""
170
+ };
166
171
  } else if (currentToolCall && tc.function?.arguments) {
167
172
  currentToolCall.arguments += tc.function.arguments;
173
+ yield {
174
+ type: "tool-call-delta",
175
+ toolCallId: currentToolCall.id,
176
+ argsText: currentToolCall.arguments
177
+ };
168
178
  }
169
179
  }
170
180
  }
@@ -485,6 +495,7 @@ var OpenAIAdapter = class _OpenAIAdapter {
485
495
  if (baseUrl.includes("generativelanguage.googleapis.com")) return "google";
486
496
  if (baseUrl.includes("x.ai")) return "xai";
487
497
  if (baseUrl.includes("azure")) return "azure";
498
+ if (baseUrl.includes("openrouter.ai")) return "openrouter";
488
499
  return "openai";
489
500
  }
490
501
  async getClient() {
@@ -584,6 +595,256 @@ var OpenAIAdapter = class _OpenAIAdapter {
584
595
  rawResponse: response
585
596
  };
586
597
  }
598
+ /**
599
+ * OpenAI reasoning models on OpenRouter (o1/o3/o4/gpt-5 family) hide their
600
+ * reasoning content on the chat-completions endpoint. To surface reasoning
601
+ * SUMMARIES (not raw CoT, which OpenAI never exposes) we have to use the
602
+ * Responses API, which streams `response.reasoning_summary_text.delta` events.
603
+ *
604
+ * Match by prefix on the OpenRouter model id. Excludes openai/gpt-4o,
605
+ * openai/gpt-4.1, openai/chatgpt-* — those continue on chat-completions.
606
+ */
607
+ isOpenAIReasoningModelOnOpenRouter(activeModel) {
608
+ if (this.provider !== "openrouter") return false;
609
+ return activeModel.startsWith("openai/o1") || activeModel.startsWith("openai/o3") || activeModel.startsWith("openai/o4") || activeModel.startsWith("openai/gpt-5");
610
+ }
611
+ /**
612
+ * Convert ActionDefinition[] (the chat-completions tool shape used by the
613
+ * adapter) to the Responses API tool shape.
614
+ */
615
+ buildResponsesToolsFromActions(actions) {
616
+ if (!actions || actions.length === 0) return void 0;
617
+ const formatted = formatTools(actions);
618
+ return formatted.map((t) => ({
619
+ type: "function",
620
+ name: t.function.name,
621
+ description: t.function.description,
622
+ parameters: t.function.parameters
623
+ }));
624
+ }
625
+ /**
626
+ * Streaming Responses API path for OpenAI reasoning models on OpenRouter.
627
+ *
628
+ * Maps Responses API SSE events back to the same StreamEvent shapes the
629
+ * chat-completions path emits, so downstream consumers (processChunk.ts,
630
+ * frontend tool handlers, plan approval, specialist delegations) see
631
+ * identical events regardless of which path produced them.
632
+ *
633
+ * response.reasoning_summary_text.delta → thinking:start (once) + thinking:delta
634
+ * response.output_text.delta → message:delta
635
+ * response.output_item.added (function_call) → action:start (queued buffer)
636
+ * response.function_call_arguments.delta → action:args (progressive)
637
+ * response.output_item.done (function_call) → final action:args + action:end
638
+ * response.completed → message:end + done(usage)
639
+ * response.error → error
640
+ */
641
+ async *streamWithResponsesAPI(request, activeModel, messageId) {
642
+ const client = await this.getClient();
643
+ const maxTokensValue = request.config?.maxTokens ?? this.config.maxTokens;
644
+ const payload = {
645
+ model: activeModel,
646
+ input: this.buildResponsesInput(request),
647
+ stream: true,
648
+ reasoning: {
649
+ effort: request.config?.reasoningEffort ?? "medium",
650
+ summary: "auto"
651
+ }
652
+ };
653
+ if (request.systemPrompt) payload.instructions = request.systemPrompt;
654
+ if (typeof maxTokensValue === "number")
655
+ payload.max_output_tokens = maxTokensValue;
656
+ const tools = this.buildResponsesToolsFromActions(request.actions);
657
+ if (tools && tools.length > 0) payload.tools = tools;
658
+ logProviderPayload(
659
+ "openai",
660
+ "responses-api request payload",
661
+ payload,
662
+ request.debug
663
+ );
664
+ let stream;
665
+ try {
666
+ stream = await client.responses.create(payload);
667
+ } catch (error) {
668
+ yield {
669
+ type: "error",
670
+ message: error instanceof Error ? error.message : "Unknown error",
671
+ code: "OPENAI_RESPONSES_ERROR"
672
+ };
673
+ return;
674
+ }
675
+ const toolBuffers = /* @__PURE__ */ new Map();
676
+ const itemIdToCallId = /* @__PURE__ */ new Map();
677
+ let usage;
678
+ let reasoningStarted = false;
679
+ let textStarted = false;
680
+ let finishEmitted = false;
681
+ const resolveCallId = (evt) => {
682
+ if (evt?.call_id) return evt.call_id;
683
+ if (evt?.item_id) return itemIdToCallId.get(evt.item_id) ?? evt.item_id;
684
+ if (evt?.item?.call_id) return evt.item.call_id;
685
+ if (evt?.item?.id) return evt.item.id;
686
+ return "";
687
+ };
688
+ try {
689
+ for await (const evt of stream) {
690
+ logProviderPayload(
691
+ "openai",
692
+ "responses-api stream chunk",
693
+ evt,
694
+ request.debug
695
+ );
696
+ if (request.signal?.aborted) break;
697
+ const t = evt?.type ?? "";
698
+ if (t === "response.reasoning_summary_text.delta") {
699
+ const delta = evt.delta ?? "";
700
+ if (!delta) continue;
701
+ if (!reasoningStarted) {
702
+ yield { type: "thinking:start" };
703
+ reasoningStarted = true;
704
+ }
705
+ yield { type: "thinking:delta", content: delta };
706
+ continue;
707
+ }
708
+ if (t === "response.reasoning_summary_text.done" || t === "response.reasoning.done") {
709
+ continue;
710
+ }
711
+ if (t === "response.output_text.delta") {
712
+ const text = evt.delta ?? "";
713
+ if (!text) continue;
714
+ if (reasoningStarted && !textStarted) {
715
+ yield { type: "thinking:end" };
716
+ textStarted = true;
717
+ }
718
+ yield { type: "message:delta", content: text };
719
+ continue;
720
+ }
721
+ if (t === "response.output_item.added") {
722
+ const item = evt.item;
723
+ if (item?.type === "function_call") {
724
+ const callId = item.call_id ?? item.id ?? "";
725
+ const itemId = item.id ?? callId;
726
+ if (callId) {
727
+ if (itemId && itemId !== callId) {
728
+ itemIdToCallId.set(itemId, callId);
729
+ }
730
+ if (!toolBuffers.has(callId)) {
731
+ toolBuffers.set(callId, {
732
+ id: callId,
733
+ name: item.name ?? "",
734
+ arguments: item.arguments ?? "",
735
+ emittedStart: false
736
+ });
737
+ }
738
+ const buf = toolBuffers.get(callId);
739
+ if (buf.name && !buf.emittedStart) {
740
+ yield { type: "action:start", id: buf.id, name: buf.name };
741
+ buf.emittedStart = true;
742
+ }
743
+ }
744
+ }
745
+ continue;
746
+ }
747
+ if (t === "response.function_call_arguments.delta") {
748
+ const callId = resolveCallId(evt);
749
+ const delta = evt.delta ?? "";
750
+ if (!callId || !delta) continue;
751
+ let buf = toolBuffers.get(callId);
752
+ if (!buf) {
753
+ buf = { id: callId, name: "", arguments: "", emittedStart: false };
754
+ toolBuffers.set(callId, buf);
755
+ }
756
+ buf.arguments += delta;
757
+ if (buf.emittedStart) {
758
+ yield {
759
+ type: "action:args",
760
+ id: buf.id,
761
+ args: buf.arguments
762
+ };
763
+ }
764
+ continue;
765
+ }
766
+ if (t === "response.output_item.done") {
767
+ const item = evt.item;
768
+ if (item?.type === "function_call") {
769
+ const callId = item.call_id ?? item.id ?? "";
770
+ const buf = toolBuffers.get(callId);
771
+ const name = buf?.name || item.name || "";
772
+ const argsStr = buf?.arguments || item.arguments || "{}";
773
+ if (callId && name) {
774
+ if (!buf?.emittedStart) {
775
+ yield { type: "action:start", id: callId, name };
776
+ }
777
+ yield {
778
+ type: "action:args",
779
+ id: callId,
780
+ args: argsStr
781
+ };
782
+ yield {
783
+ type: "action:end",
784
+ id: callId,
785
+ name
786
+ };
787
+ }
788
+ toolBuffers.delete(callId);
789
+ }
790
+ continue;
791
+ }
792
+ if (t === "response.completed") {
793
+ const u = evt.response?.usage;
794
+ if (u) {
795
+ usage = {
796
+ prompt_tokens: u.input_tokens ?? 0,
797
+ completion_tokens: u.output_tokens ?? 0,
798
+ total_tokens: u.total_tokens ?? (u.input_tokens ?? 0) + (u.output_tokens ?? 0)
799
+ };
800
+ }
801
+ for (const buf of toolBuffers.values()) {
802
+ if (!buf.id || !buf.name) continue;
803
+ if (!buf.emittedStart) {
804
+ yield { type: "action:start", id: buf.id, name: buf.name };
805
+ }
806
+ yield {
807
+ type: "action:args",
808
+ id: buf.id,
809
+ args: buf.arguments || "{}"
810
+ };
811
+ yield { type: "action:end", id: buf.id, name: buf.name };
812
+ }
813
+ toolBuffers.clear();
814
+ if (reasoningStarted && !textStarted) {
815
+ yield { type: "thinking:end" };
816
+ }
817
+ yield { type: "message:end" };
818
+ yield { type: "done", usage };
819
+ finishEmitted = true;
820
+ continue;
821
+ }
822
+ if (t === "response.error" || t === "error") {
823
+ const msg = evt.error?.message || evt.message || "Responses API error";
824
+ yield {
825
+ type: "error",
826
+ message: msg,
827
+ code: "OPENAI_RESPONSES_ERROR"
828
+ };
829
+ return;
830
+ }
831
+ }
832
+ } catch (error) {
833
+ yield {
834
+ type: "error",
835
+ message: error instanceof Error ? error.message : "Unknown error",
836
+ code: "OPENAI_RESPONSES_ERROR"
837
+ };
838
+ return;
839
+ }
840
+ if (!finishEmitted) {
841
+ if (reasoningStarted && !textStarted) {
842
+ yield { type: "thinking:end" };
843
+ }
844
+ yield { type: "message:end" };
845
+ yield { type: "done", usage };
846
+ }
847
+ }
587
848
  async completeWithResponses(request) {
588
849
  const client = await this.getClient();
589
850
  const openaiToolOptions = request.providerToolOptions?.openai;
@@ -717,16 +978,37 @@ var OpenAIAdapter = class _OpenAIAdapter {
717
978
  name: openaiToolOptions.toolChoice.name
718
979
  }
719
980
  } : openaiToolOptions?.toolChoice;
981
+ const isOpenRouter = this.provider === "openrouter";
982
+ const activeModel = request.config?.model || this.model;
983
+ const modelSlug = activeModel.replace("openai/", "");
984
+ const isOSeries = /^o[1-9]/.test(modelSlug);
985
+ const isOpenAIOnOpenRouter = isOpenRouter && activeModel.startsWith("openai/");
986
+ if (!this.config.disableThinking && this.isOpenAIReasoningModelOnOpenRouter(activeModel)) {
987
+ yield* this.streamWithResponsesAPI(request, activeModel, messageId);
988
+ return;
989
+ }
990
+ const maxTokensValue = request.config?.maxTokens ?? this.config.maxTokens;
720
991
  const payload = {
721
- model: request.config?.model || this.model,
992
+ model: activeModel,
722
993
  messages,
723
994
  tools: tools.length > 0 ? tools : void 0,
724
995
  tool_choice: tools.length > 0 ? toolChoice : void 0,
725
996
  parallel_tool_calls: tools.length > 0 ? openaiToolOptions?.parallelToolCalls : void 0,
726
- temperature: request.config?.temperature ?? this.config.temperature,
727
- max_tokens: request.config?.maxTokens ?? this.config.maxTokens,
728
997
  stream: true,
729
- stream_options: { include_usage: true }
998
+ stream_options: { include_usage: true },
999
+ // o-series: use max_completion_tokens + reasoning_effort, no temperature
1000
+ // regular models: use max_tokens + temperature
1001
+ ...isOSeries ? {
1002
+ max_completion_tokens: maxTokensValue,
1003
+ reasoning_effort: request.config?.reasoningEffort ?? "medium"
1004
+ } : {
1005
+ temperature: request.config?.temperature ?? this.config.temperature,
1006
+ max_tokens: maxTokensValue
1007
+ },
1008
+ // Non-OpenAI OpenRouter models support OR's reasoning/include_reasoning params.
1009
+ // When disableThinking=true we must explicitly send include_reasoning:false because
1010
+ // models like Qwen3 and DeepSeek-R1 reason by default even without the reasoning param.
1011
+ ...isOpenRouter && !isOpenAIOnOpenRouter ? this.config.disableThinking ? { include_reasoning: false } : { reasoning: { max_tokens: 8e3 }, include_reasoning: true } : {}
730
1012
  };
731
1013
  logProviderPayload("openai", "request payload", payload, request.debug);
732
1014
  const stream = await client.chat.completions.create(payload);
@@ -734,6 +1016,7 @@ var OpenAIAdapter = class _OpenAIAdapter {
734
1016
  const collectedCitations = [];
735
1017
  let citationIndex = 0;
736
1018
  let usage;
1019
+ let adapterReasoningStarted = false;
737
1020
  for await (const chunk of stream) {
738
1021
  logProviderPayload("openai", "stream chunk", chunk, request.debug);
739
1022
  if (request.signal?.aborted) {
@@ -744,6 +1027,22 @@ var OpenAIAdapter = class _OpenAIAdapter {
744
1027
  if (delta?.content) {
745
1028
  yield { type: "message:delta", content: delta.content };
746
1029
  }
1030
+ if (isOpenRouter) {
1031
+ const rc = delta?.reasoning_content ?? delta?.reasoning ?? null;
1032
+ if (rc) {
1033
+ const rcText = typeof rc === "string" ? rc : Array.isArray(rc) && rc[0]?.text ? rc[0].text : "";
1034
+ if (rcText) {
1035
+ if (!adapterReasoningStarted) {
1036
+ yield { type: "thinking:start" };
1037
+ adapterReasoningStarted = true;
1038
+ }
1039
+ yield { type: "thinking:delta", content: rcText };
1040
+ }
1041
+ } else if (adapterReasoningStarted && (delta?.content || choice?.finish_reason)) {
1042
+ yield { type: "thinking:end" };
1043
+ adapterReasoningStarted = false;
1044
+ }
1045
+ }
747
1046
  const annotations = delta?.annotations;
748
1047
  if (annotations && annotations.length > 0) {
749
1048
  for (const annotation of annotations) {
@@ -791,6 +1090,11 @@ var OpenAIAdapter = class _OpenAIAdapter {
791
1090
  };
792
1091
  } else if (currentToolCall && toolCall.function?.arguments) {
793
1092
  currentToolCall.arguments += toolCall.function.arguments;
1093
+ yield {
1094
+ type: "action:args",
1095
+ id: currentToolCall.id,
1096
+ args: currentToolCall.arguments
1097
+ };
794
1098
  }
795
1099
  }
796
1100
  }
@@ -866,15 +1170,24 @@ var OpenAIAdapter = class _OpenAIAdapter {
866
1170
  name: openaiToolOptions.toolChoice.name
867
1171
  }
868
1172
  } : openaiToolOptions?.toolChoice;
1173
+ const activeModel2 = request.config?.model || this.model;
1174
+ const modelSlug2 = activeModel2.replace("openai/", "");
1175
+ const isOSeries2 = /^o[1-9]/.test(modelSlug2);
1176
+ const maxTokensValue2 = request.config?.maxTokens ?? this.config.maxTokens;
869
1177
  const payload = {
870
- model: request.config?.model || this.model,
1178
+ model: activeModel2,
871
1179
  messages,
872
1180
  tools: tools.length > 0 ? tools : void 0,
873
1181
  tool_choice: tools.length > 0 ? toolChoice : void 0,
874
1182
  parallel_tool_calls: tools.length > 0 ? openaiToolOptions?.parallelToolCalls : void 0,
875
- temperature: request.config?.temperature ?? this.config.temperature,
876
- max_tokens: request.config?.maxTokens ?? this.config.maxTokens,
877
- stream: false
1183
+ stream: false,
1184
+ ...isOSeries2 ? {
1185
+ max_completion_tokens: maxTokensValue2,
1186
+ reasoning_effort: request.config?.reasoningEffort ?? "medium"
1187
+ } : {
1188
+ temperature: request.config?.temperature ?? this.config.temperature,
1189
+ max_tokens: maxTokensValue2
1190
+ }
878
1191
  };
879
1192
  logProviderPayload("openai", "request payload", payload, request.debug);
880
1193
  const response = await client.chat.completions.create(payload);
@@ -1,7 +1,7 @@
1
- import { L as LanguageModel } from '../../types-CR8mi9I0.mjs';
2
- import { A as AIProvider } from '../../types-DRqxMIjF.mjs';
1
+ import { L as LanguageModel } from '../../types-CMvvDo-E.mjs';
2
+ import { A as AIProvider } from '../../types-CMMQ8s2O.mjs';
3
3
  import 'zod';
4
- import '../../base-D-U61JaB.mjs';
4
+ import '../../base-DN1EfKnE.mjs';
5
5
 
6
6
  /**
7
7
  * OpenRouter Provider - Modern Pattern
@@ -39,6 +39,8 @@ interface OpenRouterProviderOptions {
39
39
  /** Order preference: 'price' | 'latency' | 'throughput' */
40
40
  order?: "price" | "latency" | "throughput";
41
41
  };
42
+ /** Disable extended thinking/reasoning (default: thinking enabled) */
43
+ disableThinking?: boolean;
42
44
  }
43
45
  /**
44
46
  * Create an OpenRouter language model
@@ -148,6 +150,8 @@ interface OpenRouterProviderConfig {
148
150
  siteUrl?: string;
149
151
  /** Your app name for OpenRouter rankings */
150
152
  appName?: string;
153
+ /** Disable extended thinking/reasoning */
154
+ disableThinking?: boolean;
151
155
  }
152
156
  /**
153
157
  * Create an OpenRouter provider (callable, Vercel AI SDK style)
@@ -1,7 +1,7 @@
1
- import { L as LanguageModel } from '../../types-CR8mi9I0.js';
2
- import { A as AIProvider } from '../../types-BctsnC3g.js';
1
+ import { L as LanguageModel } from '../../types-CMvvDo-E.js';
2
+ import { A as AIProvider } from '../../types-DhktekQ3.js';
3
3
  import 'zod';
4
- import '../../base-iGi9Va6Z.js';
4
+ import '../../base-DuUNxtVg.js';
5
5
 
6
6
  /**
7
7
  * OpenRouter Provider - Modern Pattern
@@ -39,6 +39,8 @@ interface OpenRouterProviderOptions {
39
39
  /** Order preference: 'price' | 'latency' | 'throughput' */
40
40
  order?: "price" | "latency" | "throughput";
41
41
  };
42
+ /** Disable extended thinking/reasoning (default: thinking enabled) */
43
+ disableThinking?: boolean;
42
44
  }
43
45
  /**
44
46
  * Create an OpenRouter language model
@@ -148,6 +150,8 @@ interface OpenRouterProviderConfig {
148
150
  siteUrl?: string;
149
151
  /** Your app name for OpenRouter rankings */
150
152
  appName?: string;
153
+ /** Disable extended thinking/reasoning */
154
+ disableThinking?: boolean;
151
155
  }
152
156
  /**
153
157
  * Create an OpenRouter provider (callable, Vercel AI SDK style)