@yourgpt/llm-sdk 2.1.8 → 2.1.10-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/dist/adapters/index.d.mts +38 -4
  2. package/dist/adapters/index.d.ts +38 -4
  3. package/dist/adapters/index.js +318 -8
  4. package/dist/adapters/index.mjs +318 -8
  5. package/dist/{base-iGi9Va6Z.d.ts → base-DN1EfKnE.d.mts} +2 -1
  6. package/dist/{base-D-U61JaB.d.mts → base-DuUNxtVg.d.ts} +2 -1
  7. package/dist/fallback/index.d.mts +4 -4
  8. package/dist/fallback/index.d.ts +4 -4
  9. package/dist/index.d.mts +7 -7
  10. package/dist/index.d.ts +7 -7
  11. package/dist/index.js +43 -23
  12. package/dist/index.mjs +43 -23
  13. package/dist/providers/anthropic/index.d.mts +3 -3
  14. package/dist/providers/anthropic/index.d.ts +3 -3
  15. package/dist/providers/anthropic/index.js +17 -0
  16. package/dist/providers/anthropic/index.mjs +17 -0
  17. package/dist/providers/azure/index.d.mts +3 -3
  18. package/dist/providers/azure/index.d.ts +3 -3
  19. package/dist/providers/fireworks/index.d.mts +1 -1
  20. package/dist/providers/fireworks/index.d.ts +1 -1
  21. package/dist/providers/google/index.d.mts +3 -3
  22. package/dist/providers/google/index.d.ts +3 -3
  23. package/dist/providers/google/index.js +311 -8
  24. package/dist/providers/google/index.mjs +311 -8
  25. package/dist/providers/ollama/index.d.mts +4 -4
  26. package/dist/providers/ollama/index.d.ts +4 -4
  27. package/dist/providers/openai/index.d.mts +3 -3
  28. package/dist/providers/openai/index.d.ts +3 -3
  29. package/dist/providers/openai/index.js +321 -8
  30. package/dist/providers/openai/index.mjs +321 -8
  31. package/dist/providers/openrouter/index.d.mts +7 -3
  32. package/dist/providers/openrouter/index.d.ts +7 -3
  33. package/dist/providers/openrouter/index.js +601 -11
  34. package/dist/providers/openrouter/index.mjs +601 -11
  35. package/dist/providers/togetherai/index.d.mts +61 -2
  36. package/dist/providers/togetherai/index.d.ts +61 -2
  37. package/dist/providers/togetherai/index.js +1030 -2
  38. package/dist/providers/togetherai/index.mjs +1029 -2
  39. package/dist/providers/xai/index.d.mts +3 -3
  40. package/dist/providers/xai/index.d.ts +3 -3
  41. package/dist/providers/xai/index.js +311 -8
  42. package/dist/providers/xai/index.mjs +311 -8
  43. package/dist/{types-D4YfrQJR.d.mts → types-BNCmlJMs.d.mts} +1 -1
  44. package/dist/{types-DRqxMIjF.d.mts → types-CMMQ8s2O.d.mts} +1 -1
  45. package/dist/{types-CR8mi9I0.d.ts → types-CMvvDo-E.d.mts} +12 -1
  46. package/dist/{types-CR8mi9I0.d.mts → types-CMvvDo-E.d.ts} +12 -1
  47. package/dist/{types-BctsnC3g.d.ts → types-DhktekQ3.d.ts} +1 -1
  48. package/dist/{types-38yolWJn.d.ts → types-Pj-vpmoT.d.ts} +1 -1
  49. package/dist/yourgpt/index.d.mts +1 -1
  50. package/dist/yourgpt/index.d.ts +1 -1
  51. package/package.json +1 -1
@@ -504,6 +504,7 @@ var OpenAIAdapter = class _OpenAIAdapter {
504
504
  if (baseUrl.includes("generativelanguage.googleapis.com")) return "google";
505
505
  if (baseUrl.includes("x.ai")) return "xai";
506
506
  if (baseUrl.includes("azure")) return "azure";
507
+ if (baseUrl.includes("openrouter.ai")) return "openrouter";
507
508
  return "openai";
508
509
  }
509
510
  async getClient() {
@@ -603,6 +604,256 @@ var OpenAIAdapter = class _OpenAIAdapter {
603
604
  rawResponse: response
604
605
  };
605
606
  }
607
+ /**
608
+ * OpenAI reasoning models on OpenRouter (o1/o3/o4/gpt-5 family) hide their
609
+ * reasoning content on the chat-completions endpoint. To surface reasoning
610
+ * SUMMARIES (not raw CoT, which OpenAI never exposes) we have to use the
611
+ * Responses API, which streams `response.reasoning_summary_text.delta` events.
612
+ *
613
+ * Match by prefix on the OpenRouter model id. Excludes openai/gpt-4o,
614
+ * openai/gpt-4.1, openai/chatgpt-* — those continue on chat-completions.
615
+ */
616
+ isOpenAIReasoningModelOnOpenRouter(activeModel) {
617
+ if (this.provider !== "openrouter") return false;
618
+ return activeModel.startsWith("openai/o1") || activeModel.startsWith("openai/o3") || activeModel.startsWith("openai/o4") || activeModel.startsWith("openai/gpt-5");
619
+ }
620
+ /**
621
+ * Convert ActionDefinition[] (the chat-completions tool shape used by the
622
+ * adapter) to the Responses API tool shape.
623
+ */
624
+ buildResponsesToolsFromActions(actions) {
625
+ if (!actions || actions.length === 0) return void 0;
626
+ const formatted = formatTools(actions);
627
+ return formatted.map((t) => ({
628
+ type: "function",
629
+ name: t.function.name,
630
+ description: t.function.description,
631
+ parameters: t.function.parameters
632
+ }));
633
+ }
634
+ /**
635
+ * Streaming Responses API path for OpenAI reasoning models on OpenRouter.
636
+ *
637
+ * Maps Responses API SSE events back to the same StreamEvent shapes the
638
+ * chat-completions path emits, so downstream consumers (processChunk.ts,
639
+ * frontend tool handlers, plan approval, specialist delegations) see
640
+ * identical events regardless of which path produced them.
641
+ *
642
+ * response.reasoning_summary_text.delta → thinking:start (once) + thinking:delta
643
+ * response.output_text.delta → message:delta
644
+ * response.output_item.added (function_call) → action:start (queued buffer)
645
+ * response.function_call_arguments.delta → action:args (progressive)
646
+ * response.output_item.done (function_call) → final action:args + action:end
647
+ * response.completed → message:end + done(usage)
648
+ * response.error → error
649
+ */
650
+ async *streamWithResponsesAPI(request, activeModel, messageId) {
651
+ const client = await this.getClient();
652
+ const maxTokensValue = request.config?.maxTokens ?? this.config.maxTokens;
653
+ const payload = {
654
+ model: activeModel,
655
+ input: this.buildResponsesInput(request),
656
+ stream: true,
657
+ reasoning: {
658
+ effort: request.config?.reasoningEffort ?? "medium",
659
+ summary: "auto"
660
+ }
661
+ };
662
+ if (request.systemPrompt) payload.instructions = request.systemPrompt;
663
+ if (typeof maxTokensValue === "number")
664
+ payload.max_output_tokens = maxTokensValue;
665
+ const tools = this.buildResponsesToolsFromActions(request.actions);
666
+ if (tools && tools.length > 0) payload.tools = tools;
667
+ logProviderPayload(
668
+ "openai",
669
+ "responses-api request payload",
670
+ payload,
671
+ request.debug
672
+ );
673
+ let stream;
674
+ try {
675
+ stream = await client.responses.create(payload);
676
+ } catch (error) {
677
+ yield {
678
+ type: "error",
679
+ message: error instanceof Error ? error.message : "Unknown error",
680
+ code: "OPENAI_RESPONSES_ERROR"
681
+ };
682
+ return;
683
+ }
684
+ const toolBuffers = /* @__PURE__ */ new Map();
685
+ const itemIdToCallId = /* @__PURE__ */ new Map();
686
+ let usage;
687
+ let reasoningStarted = false;
688
+ let textStarted = false;
689
+ let finishEmitted = false;
690
+ const resolveCallId = (evt) => {
691
+ if (evt?.call_id) return evt.call_id;
692
+ if (evt?.item_id) return itemIdToCallId.get(evt.item_id) ?? evt.item_id;
693
+ if (evt?.item?.call_id) return evt.item.call_id;
694
+ if (evt?.item?.id) return evt.item.id;
695
+ return "";
696
+ };
697
+ try {
698
+ for await (const evt of stream) {
699
+ logProviderPayload(
700
+ "openai",
701
+ "responses-api stream chunk",
702
+ evt,
703
+ request.debug
704
+ );
705
+ if (request.signal?.aborted) break;
706
+ const t = evt?.type ?? "";
707
+ if (t === "response.reasoning_summary_text.delta") {
708
+ const delta = evt.delta ?? "";
709
+ if (!delta) continue;
710
+ if (!reasoningStarted) {
711
+ yield { type: "thinking:start" };
712
+ reasoningStarted = true;
713
+ }
714
+ yield { type: "thinking:delta", content: delta };
715
+ continue;
716
+ }
717
+ if (t === "response.reasoning_summary_text.done" || t === "response.reasoning.done") {
718
+ continue;
719
+ }
720
+ if (t === "response.output_text.delta") {
721
+ const text = evt.delta ?? "";
722
+ if (!text) continue;
723
+ if (reasoningStarted && !textStarted) {
724
+ yield { type: "thinking:end" };
725
+ textStarted = true;
726
+ }
727
+ yield { type: "message:delta", content: text };
728
+ continue;
729
+ }
730
+ if (t === "response.output_item.added") {
731
+ const item = evt.item;
732
+ if (item?.type === "function_call") {
733
+ const callId = item.call_id ?? item.id ?? "";
734
+ const itemId = item.id ?? callId;
735
+ if (callId) {
736
+ if (itemId && itemId !== callId) {
737
+ itemIdToCallId.set(itemId, callId);
738
+ }
739
+ if (!toolBuffers.has(callId)) {
740
+ toolBuffers.set(callId, {
741
+ id: callId,
742
+ name: item.name ?? "",
743
+ arguments: item.arguments ?? "",
744
+ emittedStart: false
745
+ });
746
+ }
747
+ const buf = toolBuffers.get(callId);
748
+ if (buf.name && !buf.emittedStart) {
749
+ yield { type: "action:start", id: buf.id, name: buf.name };
750
+ buf.emittedStart = true;
751
+ }
752
+ }
753
+ }
754
+ continue;
755
+ }
756
+ if (t === "response.function_call_arguments.delta") {
757
+ const callId = resolveCallId(evt);
758
+ const delta = evt.delta ?? "";
759
+ if (!callId || !delta) continue;
760
+ let buf = toolBuffers.get(callId);
761
+ if (!buf) {
762
+ buf = { id: callId, name: "", arguments: "", emittedStart: false };
763
+ toolBuffers.set(callId, buf);
764
+ }
765
+ buf.arguments += delta;
766
+ if (buf.emittedStart) {
767
+ yield {
768
+ type: "action:args",
769
+ id: buf.id,
770
+ args: buf.arguments
771
+ };
772
+ }
773
+ continue;
774
+ }
775
+ if (t === "response.output_item.done") {
776
+ const item = evt.item;
777
+ if (item?.type === "function_call") {
778
+ const callId = item.call_id ?? item.id ?? "";
779
+ const buf = toolBuffers.get(callId);
780
+ const name = buf?.name || item.name || "";
781
+ const argsStr = buf?.arguments || item.arguments || "{}";
782
+ if (callId && name) {
783
+ if (!buf?.emittedStart) {
784
+ yield { type: "action:start", id: callId, name };
785
+ }
786
+ yield {
787
+ type: "action:args",
788
+ id: callId,
789
+ args: argsStr
790
+ };
791
+ yield {
792
+ type: "action:end",
793
+ id: callId,
794
+ name
795
+ };
796
+ }
797
+ toolBuffers.delete(callId);
798
+ }
799
+ continue;
800
+ }
801
+ if (t === "response.completed") {
802
+ const u = evt.response?.usage;
803
+ if (u) {
804
+ usage = {
805
+ prompt_tokens: u.input_tokens ?? 0,
806
+ completion_tokens: u.output_tokens ?? 0,
807
+ total_tokens: u.total_tokens ?? (u.input_tokens ?? 0) + (u.output_tokens ?? 0)
808
+ };
809
+ }
810
+ for (const buf of toolBuffers.values()) {
811
+ if (!buf.id || !buf.name) continue;
812
+ if (!buf.emittedStart) {
813
+ yield { type: "action:start", id: buf.id, name: buf.name };
814
+ }
815
+ yield {
816
+ type: "action:args",
817
+ id: buf.id,
818
+ args: buf.arguments || "{}"
819
+ };
820
+ yield { type: "action:end", id: buf.id, name: buf.name };
821
+ }
822
+ toolBuffers.clear();
823
+ if (reasoningStarted && !textStarted) {
824
+ yield { type: "thinking:end" };
825
+ }
826
+ yield { type: "message:end" };
827
+ yield { type: "done", usage };
828
+ finishEmitted = true;
829
+ continue;
830
+ }
831
+ if (t === "response.error" || t === "error") {
832
+ const msg = evt.error?.message || evt.message || "Responses API error";
833
+ yield {
834
+ type: "error",
835
+ message: msg,
836
+ code: "OPENAI_RESPONSES_ERROR"
837
+ };
838
+ return;
839
+ }
840
+ }
841
+ } catch (error) {
842
+ yield {
843
+ type: "error",
844
+ message: error instanceof Error ? error.message : "Unknown error",
845
+ code: "OPENAI_RESPONSES_ERROR"
846
+ };
847
+ return;
848
+ }
849
+ if (!finishEmitted) {
850
+ if (reasoningStarted && !textStarted) {
851
+ yield { type: "thinking:end" };
852
+ }
853
+ yield { type: "message:end" };
854
+ yield { type: "done", usage };
855
+ }
856
+ }
606
857
  async completeWithResponses(request) {
607
858
  const client = await this.getClient();
608
859
  const openaiToolOptions = request.providerToolOptions?.openai;
@@ -736,16 +987,37 @@ var OpenAIAdapter = class _OpenAIAdapter {
736
987
  name: openaiToolOptions.toolChoice.name
737
988
  }
738
989
  } : openaiToolOptions?.toolChoice;
990
+ const isOpenRouter = this.provider === "openrouter";
991
+ const activeModel = request.config?.model || this.model;
992
+ const modelSlug = activeModel.replace("openai/", "");
993
+ const isOSeries = /^o[1-9]/.test(modelSlug);
994
+ const isOpenAIOnOpenRouter = isOpenRouter && activeModel.startsWith("openai/");
995
+ if (!this.config.disableThinking && this.isOpenAIReasoningModelOnOpenRouter(activeModel)) {
996
+ yield* this.streamWithResponsesAPI(request, activeModel, messageId);
997
+ return;
998
+ }
999
+ const maxTokensValue = request.config?.maxTokens ?? this.config.maxTokens;
739
1000
  const payload = {
740
- model: request.config?.model || this.model,
1001
+ model: activeModel,
741
1002
  messages,
742
1003
  tools: tools.length > 0 ? tools : void 0,
743
1004
  tool_choice: tools.length > 0 ? toolChoice : void 0,
744
1005
  parallel_tool_calls: tools.length > 0 ? openaiToolOptions?.parallelToolCalls : void 0,
745
- temperature: request.config?.temperature ?? this.config.temperature,
746
- max_tokens: request.config?.maxTokens ?? this.config.maxTokens,
747
1006
  stream: true,
748
- stream_options: { include_usage: true }
1007
+ stream_options: { include_usage: true },
1008
+ // o-series: use max_completion_tokens + reasoning_effort, no temperature
1009
+ // regular models: use max_tokens + temperature
1010
+ ...isOSeries ? {
1011
+ max_completion_tokens: maxTokensValue,
1012
+ reasoning_effort: request.config?.reasoningEffort ?? "medium"
1013
+ } : {
1014
+ temperature: request.config?.temperature ?? this.config.temperature,
1015
+ max_tokens: maxTokensValue
1016
+ },
1017
+ // Non-OpenAI OpenRouter models support OR's reasoning/include_reasoning params.
1018
+ // When disableThinking=true we must explicitly send include_reasoning:false because
1019
+ // models like Qwen3 and DeepSeek-R1 reason by default even without the reasoning param.
1020
+ ...isOpenRouter && !isOpenAIOnOpenRouter ? this.config.disableThinking ? { include_reasoning: false } : { reasoning: { max_tokens: 8e3 }, include_reasoning: true } : {}
749
1021
  };
750
1022
  logProviderPayload("openai", "request payload", payload, request.debug);
751
1023
  const stream = await client.chat.completions.create(payload);
@@ -753,6 +1025,7 @@ var OpenAIAdapter = class _OpenAIAdapter {
753
1025
  const collectedCitations = [];
754
1026
  let citationIndex = 0;
755
1027
  let usage;
1028
+ let adapterReasoningStarted = false;
756
1029
  for await (const chunk of stream) {
757
1030
  logProviderPayload("openai", "stream chunk", chunk, request.debug);
758
1031
  if (request.signal?.aborted) {
@@ -763,6 +1036,22 @@ var OpenAIAdapter = class _OpenAIAdapter {
763
1036
  if (delta?.content) {
764
1037
  yield { type: "message:delta", content: delta.content };
765
1038
  }
1039
+ if (isOpenRouter) {
1040
+ const rc = delta?.reasoning_content ?? delta?.reasoning ?? null;
1041
+ if (rc) {
1042
+ const rcText = typeof rc === "string" ? rc : Array.isArray(rc) && rc[0]?.text ? rc[0].text : "";
1043
+ if (rcText) {
1044
+ if (!adapterReasoningStarted) {
1045
+ yield { type: "thinking:start" };
1046
+ adapterReasoningStarted = true;
1047
+ }
1048
+ yield { type: "thinking:delta", content: rcText };
1049
+ }
1050
+ } else if (adapterReasoningStarted && (delta?.content || choice?.finish_reason)) {
1051
+ yield { type: "thinking:end" };
1052
+ adapterReasoningStarted = false;
1053
+ }
1054
+ }
766
1055
  const annotations = delta?.annotations;
767
1056
  if (annotations && annotations.length > 0) {
768
1057
  for (const annotation of annotations) {
@@ -810,6 +1099,11 @@ var OpenAIAdapter = class _OpenAIAdapter {
810
1099
  };
811
1100
  } else if (currentToolCall && toolCall.function?.arguments) {
812
1101
  currentToolCall.arguments += toolCall.function.arguments;
1102
+ yield {
1103
+ type: "action:args",
1104
+ id: currentToolCall.id,
1105
+ args: currentToolCall.arguments
1106
+ };
813
1107
  }
814
1108
  }
815
1109
  }
@@ -885,15 +1179,24 @@ var OpenAIAdapter = class _OpenAIAdapter {
885
1179
  name: openaiToolOptions.toolChoice.name
886
1180
  }
887
1181
  } : openaiToolOptions?.toolChoice;
1182
+ const activeModel2 = request.config?.model || this.model;
1183
+ const modelSlug2 = activeModel2.replace("openai/", "");
1184
+ const isOSeries2 = /^o[1-9]/.test(modelSlug2);
1185
+ const maxTokensValue2 = request.config?.maxTokens ?? this.config.maxTokens;
888
1186
  const payload = {
889
- model: request.config?.model || this.model,
1187
+ model: activeModel2,
890
1188
  messages,
891
1189
  tools: tools.length > 0 ? tools : void 0,
892
1190
  tool_choice: tools.length > 0 ? toolChoice : void 0,
893
1191
  parallel_tool_calls: tools.length > 0 ? openaiToolOptions?.parallelToolCalls : void 0,
894
- temperature: request.config?.temperature ?? this.config.temperature,
895
- max_tokens: request.config?.maxTokens ?? this.config.maxTokens,
896
- stream: false
1192
+ stream: false,
1193
+ ...isOSeries2 ? {
1194
+ max_completion_tokens: maxTokensValue2,
1195
+ reasoning_effort: request.config?.reasoningEffort ?? "medium"
1196
+ } : {
1197
+ temperature: request.config?.temperature ?? this.config.temperature,
1198
+ max_tokens: maxTokensValue2
1199
+ }
897
1200
  };
898
1201
  logProviderPayload("openai", "request payload", payload, request.debug);
899
1202
  const response = await client.chat.completions.create(payload);
@@ -1,7 +1,7 @@
1
- import { c as OllamaProviderConfig, A as AIProvider } from '../../types-DRqxMIjF.mjs';
2
- export { d as OllamaModelOptions } from '../../types-DRqxMIjF.mjs';
3
- import '../../base-D-U61JaB.mjs';
4
- import '../../types-CR8mi9I0.mjs';
1
+ import { c as OllamaProviderConfig, A as AIProvider } from '../../types-CMMQ8s2O.mjs';
2
+ export { d as OllamaModelOptions } from '../../types-CMMQ8s2O.mjs';
3
+ import '../../base-DN1EfKnE.mjs';
4
+ import '../../types-CMvvDo-E.mjs';
5
5
  import 'zod';
6
6
 
7
7
  /**
@@ -1,7 +1,7 @@
1
- import { c as OllamaProviderConfig, A as AIProvider } from '../../types-BctsnC3g.js';
2
- export { d as OllamaModelOptions } from '../../types-BctsnC3g.js';
3
- import '../../base-iGi9Va6Z.js';
4
- import '../../types-CR8mi9I0.js';
1
+ import { c as OllamaProviderConfig, A as AIProvider } from '../../types-DhktekQ3.js';
2
+ export { d as OllamaModelOptions } from '../../types-DhktekQ3.js';
3
+ import '../../base-DuUNxtVg.js';
4
+ import '../../types-CMvvDo-E.js';
5
5
  import 'zod';
6
6
 
7
7
  /**
@@ -1,7 +1,7 @@
1
- import { L as LanguageModel } from '../../types-CR8mi9I0.mjs';
2
- import { O as OpenAIProviderConfig, A as AIProvider } from '../../types-DRqxMIjF.mjs';
1
+ import { L as LanguageModel } from '../../types-CMvvDo-E.mjs';
2
+ import { O as OpenAIProviderConfig, A as AIProvider } from '../../types-CMMQ8s2O.mjs';
3
3
  import 'zod';
4
- import '../../base-D-U61JaB.mjs';
4
+ import '../../base-DN1EfKnE.mjs';
5
5
 
6
6
  /**
7
7
  * OpenAI Provider - Modern Pattern
@@ -1,7 +1,7 @@
1
- import { L as LanguageModel } from '../../types-CR8mi9I0.js';
2
- import { O as OpenAIProviderConfig, A as AIProvider } from '../../types-BctsnC3g.js';
1
+ import { L as LanguageModel } from '../../types-CMvvDo-E.js';
2
+ import { O as OpenAIProviderConfig, A as AIProvider } from '../../types-DhktekQ3.js';
3
3
  import 'zod';
4
- import '../../base-iGi9Va6Z.js';
4
+ import '../../base-DuUNxtVg.js';
5
5
 
6
6
  /**
7
7
  * OpenAI Provider - Modern Pattern