@yourgpt/llm-sdk 2.1.9 → 2.1.10-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/dist/adapters/index.d.mts +38 -4
  2. package/dist/adapters/index.d.ts +38 -4
  3. package/dist/adapters/index.js +318 -8
  4. package/dist/adapters/index.mjs +318 -8
  5. package/dist/{base-iGi9Va6Z.d.ts → base-DN1EfKnE.d.mts} +2 -1
  6. package/dist/{base-D-U61JaB.d.mts → base-DuUNxtVg.d.ts} +2 -1
  7. package/dist/fallback/index.d.mts +4 -4
  8. package/dist/fallback/index.d.ts +4 -4
  9. package/dist/index.d.mts +7 -7
  10. package/dist/index.d.ts +7 -7
  11. package/dist/index.js +43 -23
  12. package/dist/index.mjs +43 -23
  13. package/dist/providers/anthropic/index.d.mts +3 -3
  14. package/dist/providers/anthropic/index.d.ts +3 -3
  15. package/dist/providers/anthropic/index.js +17 -0
  16. package/dist/providers/anthropic/index.mjs +17 -0
  17. package/dist/providers/azure/index.d.mts +3 -3
  18. package/dist/providers/azure/index.d.ts +3 -3
  19. package/dist/providers/fireworks/index.d.mts +1 -1
  20. package/dist/providers/fireworks/index.d.ts +1 -1
  21. package/dist/providers/google/index.d.mts +3 -3
  22. package/dist/providers/google/index.d.ts +3 -3
  23. package/dist/providers/google/index.js +311 -8
  24. package/dist/providers/google/index.mjs +311 -8
  25. package/dist/providers/ollama/index.d.mts +4 -4
  26. package/dist/providers/ollama/index.d.ts +4 -4
  27. package/dist/providers/openai/index.d.mts +3 -3
  28. package/dist/providers/openai/index.d.ts +3 -3
  29. package/dist/providers/openai/index.js +321 -8
  30. package/dist/providers/openai/index.mjs +321 -8
  31. package/dist/providers/openrouter/index.d.mts +7 -3
  32. package/dist/providers/openrouter/index.d.ts +7 -3
  33. package/dist/providers/openrouter/index.js +601 -11
  34. package/dist/providers/openrouter/index.mjs +601 -11
  35. package/dist/providers/togetherai/index.d.mts +3 -3
  36. package/dist/providers/togetherai/index.d.ts +3 -3
  37. package/dist/providers/togetherai/index.js +311 -8
  38. package/dist/providers/togetherai/index.mjs +311 -8
  39. package/dist/providers/xai/index.d.mts +3 -3
  40. package/dist/providers/xai/index.d.ts +3 -3
  41. package/dist/providers/xai/index.js +311 -8
  42. package/dist/providers/xai/index.mjs +311 -8
  43. package/dist/{types-D4YfrQJR.d.mts → types-BNCmlJMs.d.mts} +1 -1
  44. package/dist/{types-DRqxMIjF.d.mts → types-CMMQ8s2O.d.mts} +1 -1
  45. package/dist/{types-CR8mi9I0.d.ts → types-CMvvDo-E.d.mts} +12 -1
  46. package/dist/{types-CR8mi9I0.d.mts → types-CMvvDo-E.d.ts} +12 -1
  47. package/dist/{types-BctsnC3g.d.ts → types-DhktekQ3.d.ts} +1 -1
  48. package/dist/{types-38yolWJn.d.ts → types-Pj-vpmoT.d.ts} +1 -1
  49. package/dist/yourgpt/index.d.mts +1 -1
  50. package/dist/yourgpt/index.d.ts +1 -1
  51. package/package.json +1 -1
@@ -165,8 +165,18 @@ function openai(modelId, options = {}) {
165
165
  name: tc.function?.name ?? "",
166
166
  arguments: tc.function?.arguments ?? ""
167
167
  };
168
+ yield {
169
+ type: "tool-call-start",
170
+ toolCallId: tc.id,
171
+ toolName: tc.function?.name ?? ""
172
+ };
168
173
  } else if (currentToolCall && tc.function?.arguments) {
169
174
  currentToolCall.arguments += tc.function.arguments;
175
+ yield {
176
+ type: "tool-call-delta",
177
+ toolCallId: currentToolCall.id,
178
+ argsText: currentToolCall.arguments
179
+ };
170
180
  }
171
181
  }
172
182
  }
@@ -487,6 +497,7 @@ var OpenAIAdapter = class _OpenAIAdapter {
487
497
  if (baseUrl.includes("generativelanguage.googleapis.com")) return "google";
488
498
  if (baseUrl.includes("x.ai")) return "xai";
489
499
  if (baseUrl.includes("azure")) return "azure";
500
+ if (baseUrl.includes("openrouter.ai")) return "openrouter";
490
501
  return "openai";
491
502
  }
492
503
  async getClient() {
@@ -586,6 +597,256 @@ var OpenAIAdapter = class _OpenAIAdapter {
586
597
  rawResponse: response
587
598
  };
588
599
  }
600
+ /**
601
+ * OpenAI reasoning models on OpenRouter (o1/o3/o4/gpt-5 family) hide their
602
+ * reasoning content on the chat-completions endpoint. To surface reasoning
603
+ * SUMMARIES (not raw CoT, which OpenAI never exposes) we have to use the
604
+ * Responses API, which streams `response.reasoning_summary_text.delta` events.
605
+ *
606
+ * Match by prefix on the OpenRouter model id. Excludes openai/gpt-4o,
607
+ * openai/gpt-4.1, openai/chatgpt-* — those continue on chat-completions.
608
+ */
609
+ isOpenAIReasoningModelOnOpenRouter(activeModel) {
610
+ if (this.provider !== "openrouter") return false;
611
+ return activeModel.startsWith("openai/o1") || activeModel.startsWith("openai/o3") || activeModel.startsWith("openai/o4") || activeModel.startsWith("openai/gpt-5");
612
+ }
613
+ /**
614
+ * Convert ActionDefinition[] (the chat-completions tool shape used by the
615
+ * adapter) to the Responses API tool shape.
616
+ */
617
+ buildResponsesToolsFromActions(actions) {
618
+ if (!actions || actions.length === 0) return void 0;
619
+ const formatted = formatTools(actions);
620
+ return formatted.map((t) => ({
621
+ type: "function",
622
+ name: t.function.name,
623
+ description: t.function.description,
624
+ parameters: t.function.parameters
625
+ }));
626
+ }
627
+ /**
628
+ * Streaming Responses API path for OpenAI reasoning models on OpenRouter.
629
+ *
630
+ * Maps Responses API SSE events back to the same StreamEvent shapes the
631
+ * chat-completions path emits, so downstream consumers (processChunk.ts,
632
+ * frontend tool handlers, plan approval, specialist delegations) see
633
+ * identical events regardless of which path produced them.
634
+ *
635
+ * response.reasoning_summary_text.delta → thinking:start (once) + thinking:delta
636
+ * response.output_text.delta → message:delta
637
+ * response.output_item.added (function_call) → action:start (queued buffer)
638
+ * response.function_call_arguments.delta → action:args (progressive)
639
+ * response.output_item.done (function_call) → final action:args + action:end
640
+ * response.completed → message:end + done(usage)
641
+ * response.error → error
642
+ */
643
+ async *streamWithResponsesAPI(request, activeModel, messageId) {
644
+ const client = await this.getClient();
645
+ const maxTokensValue = request.config?.maxTokens ?? this.config.maxTokens;
646
+ const payload = {
647
+ model: activeModel,
648
+ input: this.buildResponsesInput(request),
649
+ stream: true,
650
+ reasoning: {
651
+ effort: request.config?.reasoningEffort ?? "medium",
652
+ summary: "auto"
653
+ }
654
+ };
655
+ if (request.systemPrompt) payload.instructions = request.systemPrompt;
656
+ if (typeof maxTokensValue === "number")
657
+ payload.max_output_tokens = maxTokensValue;
658
+ const tools = this.buildResponsesToolsFromActions(request.actions);
659
+ if (tools && tools.length > 0) payload.tools = tools;
660
+ logProviderPayload(
661
+ "openai",
662
+ "responses-api request payload",
663
+ payload,
664
+ request.debug
665
+ );
666
+ let stream;
667
+ try {
668
+ stream = await client.responses.create(payload);
669
+ } catch (error) {
670
+ yield {
671
+ type: "error",
672
+ message: error instanceof Error ? error.message : "Unknown error",
673
+ code: "OPENAI_RESPONSES_ERROR"
674
+ };
675
+ return;
676
+ }
677
+ const toolBuffers = /* @__PURE__ */ new Map();
678
+ const itemIdToCallId = /* @__PURE__ */ new Map();
679
+ let usage;
680
+ let reasoningStarted = false;
681
+ let textStarted = false;
682
+ let finishEmitted = false;
683
+ const resolveCallId = (evt) => {
684
+ if (evt?.call_id) return evt.call_id;
685
+ if (evt?.item_id) return itemIdToCallId.get(evt.item_id) ?? evt.item_id;
686
+ if (evt?.item?.call_id) return evt.item.call_id;
687
+ if (evt?.item?.id) return evt.item.id;
688
+ return "";
689
+ };
690
+ try {
691
+ for await (const evt of stream) {
692
+ logProviderPayload(
693
+ "openai",
694
+ "responses-api stream chunk",
695
+ evt,
696
+ request.debug
697
+ );
698
+ if (request.signal?.aborted) break;
699
+ const t = evt?.type ?? "";
700
+ if (t === "response.reasoning_summary_text.delta") {
701
+ const delta = evt.delta ?? "";
702
+ if (!delta) continue;
703
+ if (!reasoningStarted) {
704
+ yield { type: "thinking:start" };
705
+ reasoningStarted = true;
706
+ }
707
+ yield { type: "thinking:delta", content: delta };
708
+ continue;
709
+ }
710
+ if (t === "response.reasoning_summary_text.done" || t === "response.reasoning.done") {
711
+ continue;
712
+ }
713
+ if (t === "response.output_text.delta") {
714
+ const text = evt.delta ?? "";
715
+ if (!text) continue;
716
+ if (reasoningStarted && !textStarted) {
717
+ yield { type: "thinking:end" };
718
+ textStarted = true;
719
+ }
720
+ yield { type: "message:delta", content: text };
721
+ continue;
722
+ }
723
+ if (t === "response.output_item.added") {
724
+ const item = evt.item;
725
+ if (item?.type === "function_call") {
726
+ const callId = item.call_id ?? item.id ?? "";
727
+ const itemId = item.id ?? callId;
728
+ if (callId) {
729
+ if (itemId && itemId !== callId) {
730
+ itemIdToCallId.set(itemId, callId);
731
+ }
732
+ if (!toolBuffers.has(callId)) {
733
+ toolBuffers.set(callId, {
734
+ id: callId,
735
+ name: item.name ?? "",
736
+ arguments: item.arguments ?? "",
737
+ emittedStart: false
738
+ });
739
+ }
740
+ const buf = toolBuffers.get(callId);
741
+ if (buf.name && !buf.emittedStart) {
742
+ yield { type: "action:start", id: buf.id, name: buf.name };
743
+ buf.emittedStart = true;
744
+ }
745
+ }
746
+ }
747
+ continue;
748
+ }
749
+ if (t === "response.function_call_arguments.delta") {
750
+ const callId = resolveCallId(evt);
751
+ const delta = evt.delta ?? "";
752
+ if (!callId || !delta) continue;
753
+ let buf = toolBuffers.get(callId);
754
+ if (!buf) {
755
+ buf = { id: callId, name: "", arguments: "", emittedStart: false };
756
+ toolBuffers.set(callId, buf);
757
+ }
758
+ buf.arguments += delta;
759
+ if (buf.emittedStart) {
760
+ yield {
761
+ type: "action:args",
762
+ id: buf.id,
763
+ args: buf.arguments
764
+ };
765
+ }
766
+ continue;
767
+ }
768
+ if (t === "response.output_item.done") {
769
+ const item = evt.item;
770
+ if (item?.type === "function_call") {
771
+ const callId = item.call_id ?? item.id ?? "";
772
+ const buf = toolBuffers.get(callId);
773
+ const name = buf?.name || item.name || "";
774
+ const argsStr = buf?.arguments || item.arguments || "{}";
775
+ if (callId && name) {
776
+ if (!buf?.emittedStart) {
777
+ yield { type: "action:start", id: callId, name };
778
+ }
779
+ yield {
780
+ type: "action:args",
781
+ id: callId,
782
+ args: argsStr
783
+ };
784
+ yield {
785
+ type: "action:end",
786
+ id: callId,
787
+ name
788
+ };
789
+ }
790
+ toolBuffers.delete(callId);
791
+ }
792
+ continue;
793
+ }
794
+ if (t === "response.completed") {
795
+ const u = evt.response?.usage;
796
+ if (u) {
797
+ usage = {
798
+ prompt_tokens: u.input_tokens ?? 0,
799
+ completion_tokens: u.output_tokens ?? 0,
800
+ total_tokens: u.total_tokens ?? (u.input_tokens ?? 0) + (u.output_tokens ?? 0)
801
+ };
802
+ }
803
+ for (const buf of toolBuffers.values()) {
804
+ if (!buf.id || !buf.name) continue;
805
+ if (!buf.emittedStart) {
806
+ yield { type: "action:start", id: buf.id, name: buf.name };
807
+ }
808
+ yield {
809
+ type: "action:args",
810
+ id: buf.id,
811
+ args: buf.arguments || "{}"
812
+ };
813
+ yield { type: "action:end", id: buf.id, name: buf.name };
814
+ }
815
+ toolBuffers.clear();
816
+ if (reasoningStarted && !textStarted) {
817
+ yield { type: "thinking:end" };
818
+ }
819
+ yield { type: "message:end" };
820
+ yield { type: "done", usage };
821
+ finishEmitted = true;
822
+ continue;
823
+ }
824
+ if (t === "response.error" || t === "error") {
825
+ const msg = evt.error?.message || evt.message || "Responses API error";
826
+ yield {
827
+ type: "error",
828
+ message: msg,
829
+ code: "OPENAI_RESPONSES_ERROR"
830
+ };
831
+ return;
832
+ }
833
+ }
834
+ } catch (error) {
835
+ yield {
836
+ type: "error",
837
+ message: error instanceof Error ? error.message : "Unknown error",
838
+ code: "OPENAI_RESPONSES_ERROR"
839
+ };
840
+ return;
841
+ }
842
+ if (!finishEmitted) {
843
+ if (reasoningStarted && !textStarted) {
844
+ yield { type: "thinking:end" };
845
+ }
846
+ yield { type: "message:end" };
847
+ yield { type: "done", usage };
848
+ }
849
+ }
589
850
  async completeWithResponses(request) {
590
851
  const client = await this.getClient();
591
852
  const openaiToolOptions = request.providerToolOptions?.openai;
@@ -719,16 +980,37 @@ var OpenAIAdapter = class _OpenAIAdapter {
719
980
  name: openaiToolOptions.toolChoice.name
720
981
  }
721
982
  } : openaiToolOptions?.toolChoice;
983
+ const isOpenRouter = this.provider === "openrouter";
984
+ const activeModel = request.config?.model || this.model;
985
+ const modelSlug = activeModel.replace("openai/", "");
986
+ const isOSeries = /^o[1-9]/.test(modelSlug);
987
+ const isOpenAIOnOpenRouter = isOpenRouter && activeModel.startsWith("openai/");
988
+ if (!this.config.disableThinking && this.isOpenAIReasoningModelOnOpenRouter(activeModel)) {
989
+ yield* this.streamWithResponsesAPI(request, activeModel, messageId);
990
+ return;
991
+ }
992
+ const maxTokensValue = request.config?.maxTokens ?? this.config.maxTokens;
722
993
  const payload = {
723
- model: request.config?.model || this.model,
994
+ model: activeModel,
724
995
  messages,
725
996
  tools: tools.length > 0 ? tools : void 0,
726
997
  tool_choice: tools.length > 0 ? toolChoice : void 0,
727
998
  parallel_tool_calls: tools.length > 0 ? openaiToolOptions?.parallelToolCalls : void 0,
728
- temperature: request.config?.temperature ?? this.config.temperature,
729
- max_tokens: request.config?.maxTokens ?? this.config.maxTokens,
730
999
  stream: true,
731
- stream_options: { include_usage: true }
1000
+ stream_options: { include_usage: true },
1001
+ // o-series: use max_completion_tokens + reasoning_effort, no temperature
1002
+ // regular models: use max_tokens + temperature
1003
+ ...isOSeries ? {
1004
+ max_completion_tokens: maxTokensValue,
1005
+ reasoning_effort: request.config?.reasoningEffort ?? "medium"
1006
+ } : {
1007
+ temperature: request.config?.temperature ?? this.config.temperature,
1008
+ max_tokens: maxTokensValue
1009
+ },
1010
+ // Non-OpenAI OpenRouter models support OR's reasoning/include_reasoning params.
1011
+ // When disableThinking=true we must explicitly send include_reasoning:false because
1012
+ // models like Qwen3 and DeepSeek-R1 reason by default even without the reasoning param.
1013
+ ...isOpenRouter && !isOpenAIOnOpenRouter ? this.config.disableThinking ? { include_reasoning: false } : { reasoning: { max_tokens: 8e3 }, include_reasoning: true } : {}
732
1014
  };
733
1015
  logProviderPayload("openai", "request payload", payload, request.debug);
734
1016
  const stream = await client.chat.completions.create(payload);
@@ -736,6 +1018,7 @@ var OpenAIAdapter = class _OpenAIAdapter {
736
1018
  const collectedCitations = [];
737
1019
  let citationIndex = 0;
738
1020
  let usage;
1021
+ let adapterReasoningStarted = false;
739
1022
  for await (const chunk of stream) {
740
1023
  logProviderPayload("openai", "stream chunk", chunk, request.debug);
741
1024
  if (request.signal?.aborted) {
@@ -746,6 +1029,22 @@ var OpenAIAdapter = class _OpenAIAdapter {
746
1029
  if (delta?.content) {
747
1030
  yield { type: "message:delta", content: delta.content };
748
1031
  }
1032
+ if (isOpenRouter) {
1033
+ const rc = delta?.reasoning_content ?? delta?.reasoning ?? null;
1034
+ if (rc) {
1035
+ const rcText = typeof rc === "string" ? rc : Array.isArray(rc) && rc[0]?.text ? rc[0].text : "";
1036
+ if (rcText) {
1037
+ if (!adapterReasoningStarted) {
1038
+ yield { type: "thinking:start" };
1039
+ adapterReasoningStarted = true;
1040
+ }
1041
+ yield { type: "thinking:delta", content: rcText };
1042
+ }
1043
+ } else if (adapterReasoningStarted && (delta?.content || choice?.finish_reason)) {
1044
+ yield { type: "thinking:end" };
1045
+ adapterReasoningStarted = false;
1046
+ }
1047
+ }
749
1048
  const annotations = delta?.annotations;
750
1049
  if (annotations && annotations.length > 0) {
751
1050
  for (const annotation of annotations) {
@@ -793,6 +1092,11 @@ var OpenAIAdapter = class _OpenAIAdapter {
793
1092
  };
794
1093
  } else if (currentToolCall && toolCall.function?.arguments) {
795
1094
  currentToolCall.arguments += toolCall.function.arguments;
1095
+ yield {
1096
+ type: "action:args",
1097
+ id: currentToolCall.id,
1098
+ args: currentToolCall.arguments
1099
+ };
796
1100
  }
797
1101
  }
798
1102
  }
@@ -868,15 +1172,24 @@ var OpenAIAdapter = class _OpenAIAdapter {
868
1172
  name: openaiToolOptions.toolChoice.name
869
1173
  }
870
1174
  } : openaiToolOptions?.toolChoice;
1175
+ const activeModel2 = request.config?.model || this.model;
1176
+ const modelSlug2 = activeModel2.replace("openai/", "");
1177
+ const isOSeries2 = /^o[1-9]/.test(modelSlug2);
1178
+ const maxTokensValue2 = request.config?.maxTokens ?? this.config.maxTokens;
871
1179
  const payload = {
872
- model: request.config?.model || this.model,
1180
+ model: activeModel2,
873
1181
  messages,
874
1182
  tools: tools.length > 0 ? tools : void 0,
875
1183
  tool_choice: tools.length > 0 ? toolChoice : void 0,
876
1184
  parallel_tool_calls: tools.length > 0 ? openaiToolOptions?.parallelToolCalls : void 0,
877
- temperature: request.config?.temperature ?? this.config.temperature,
878
- max_tokens: request.config?.maxTokens ?? this.config.maxTokens,
879
- stream: false
1185
+ stream: false,
1186
+ ...isOSeries2 ? {
1187
+ max_completion_tokens: maxTokensValue2,
1188
+ reasoning_effort: request.config?.reasoningEffort ?? "medium"
1189
+ } : {
1190
+ temperature: request.config?.temperature ?? this.config.temperature,
1191
+ max_tokens: maxTokensValue2
1192
+ }
880
1193
  };
881
1194
  logProviderPayload("openai", "request payload", payload, request.debug);
882
1195
  const response = await client.chat.completions.create(payload);