@yourgpt/llm-sdk 2.1.8 → 2.1.10-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/dist/adapters/index.d.mts +38 -4
  2. package/dist/adapters/index.d.ts +38 -4
  3. package/dist/adapters/index.js +318 -8
  4. package/dist/adapters/index.mjs +318 -8
  5. package/dist/{base-iGi9Va6Z.d.ts → base-DN1EfKnE.d.mts} +2 -1
  6. package/dist/{base-D-U61JaB.d.mts → base-DuUNxtVg.d.ts} +2 -1
  7. package/dist/fallback/index.d.mts +4 -4
  8. package/dist/fallback/index.d.ts +4 -4
  9. package/dist/index.d.mts +7 -7
  10. package/dist/index.d.ts +7 -7
  11. package/dist/index.js +43 -23
  12. package/dist/index.mjs +43 -23
  13. package/dist/providers/anthropic/index.d.mts +3 -3
  14. package/dist/providers/anthropic/index.d.ts +3 -3
  15. package/dist/providers/anthropic/index.js +17 -0
  16. package/dist/providers/anthropic/index.mjs +17 -0
  17. package/dist/providers/azure/index.d.mts +3 -3
  18. package/dist/providers/azure/index.d.ts +3 -3
  19. package/dist/providers/fireworks/index.d.mts +1 -1
  20. package/dist/providers/fireworks/index.d.ts +1 -1
  21. package/dist/providers/google/index.d.mts +3 -3
  22. package/dist/providers/google/index.d.ts +3 -3
  23. package/dist/providers/google/index.js +311 -8
  24. package/dist/providers/google/index.mjs +311 -8
  25. package/dist/providers/ollama/index.d.mts +4 -4
  26. package/dist/providers/ollama/index.d.ts +4 -4
  27. package/dist/providers/openai/index.d.mts +3 -3
  28. package/dist/providers/openai/index.d.ts +3 -3
  29. package/dist/providers/openai/index.js +321 -8
  30. package/dist/providers/openai/index.mjs +321 -8
  31. package/dist/providers/openrouter/index.d.mts +7 -3
  32. package/dist/providers/openrouter/index.d.ts +7 -3
  33. package/dist/providers/openrouter/index.js +601 -11
  34. package/dist/providers/openrouter/index.mjs +601 -11
  35. package/dist/providers/togetherai/index.d.mts +61 -2
  36. package/dist/providers/togetherai/index.d.ts +61 -2
  37. package/dist/providers/togetherai/index.js +1030 -2
  38. package/dist/providers/togetherai/index.mjs +1029 -2
  39. package/dist/providers/xai/index.d.mts +3 -3
  40. package/dist/providers/xai/index.d.ts +3 -3
  41. package/dist/providers/xai/index.js +311 -8
  42. package/dist/providers/xai/index.mjs +311 -8
  43. package/dist/{types-D4YfrQJR.d.mts → types-BNCmlJMs.d.mts} +1 -1
  44. package/dist/{types-DRqxMIjF.d.mts → types-CMMQ8s2O.d.mts} +1 -1
  45. package/dist/{types-CR8mi9I0.d.ts → types-CMvvDo-E.d.mts} +12 -1
  46. package/dist/{types-CR8mi9I0.d.mts → types-CMvvDo-E.d.ts} +12 -1
  47. package/dist/{types-BctsnC3g.d.ts → types-DhktekQ3.d.ts} +1 -1
  48. package/dist/{types-38yolWJn.d.ts → types-Pj-vpmoT.d.ts} +1 -1
  49. package/dist/yourgpt/index.d.mts +1 -1
  50. package/dist/yourgpt/index.d.ts +1 -1
  51. package/package.json +1 -1
@@ -1,7 +1,7 @@
1
- import { L as LanguageModel } from '../../types-CR8mi9I0.mjs';
2
- import { X as XAIProviderConfig, A as AIProvider } from '../../types-DRqxMIjF.mjs';
1
+ import { L as LanguageModel } from '../../types-CMvvDo-E.mjs';
2
+ import { X as XAIProviderConfig, A as AIProvider } from '../../types-CMMQ8s2O.mjs';
3
3
  import 'zod';
4
- import '../../base-D-U61JaB.mjs';
4
+ import '../../base-DN1EfKnE.mjs';
5
5
 
6
6
  /**
7
7
  * xAI Provider - Modern Pattern
@@ -1,7 +1,7 @@
1
- import { L as LanguageModel } from '../../types-CR8mi9I0.js';
2
- import { X as XAIProviderConfig, A as AIProvider } from '../../types-BctsnC3g.js';
1
+ import { L as LanguageModel } from '../../types-CMvvDo-E.js';
2
+ import { X as XAIProviderConfig, A as AIProvider } from '../../types-DhktekQ3.js';
3
3
  import 'zod';
4
- import '../../base-iGi9Va6Z.js';
4
+ import '../../base-DuUNxtVg.js';
5
5
 
6
6
  /**
7
7
  * xAI Provider - Modern Pattern
@@ -455,6 +455,7 @@ var OpenAIAdapter = class _OpenAIAdapter {
455
455
  if (baseUrl.includes("generativelanguage.googleapis.com")) return "google";
456
456
  if (baseUrl.includes("x.ai")) return "xai";
457
457
  if (baseUrl.includes("azure")) return "azure";
458
+ if (baseUrl.includes("openrouter.ai")) return "openrouter";
458
459
  return "openai";
459
460
  }
460
461
  async getClient() {
@@ -554,6 +555,256 @@ var OpenAIAdapter = class _OpenAIAdapter {
554
555
  rawResponse: response
555
556
  };
556
557
  }
558
+ /**
559
+ * OpenAI reasoning models on OpenRouter (o1/o3/o4/gpt-5 family) hide their
560
+ * reasoning content on the chat-completions endpoint. To surface reasoning
561
+ * SUMMARIES (not raw CoT, which OpenAI never exposes) we have to use the
562
+ * Responses API, which streams `response.reasoning_summary_text.delta` events.
563
+ *
564
+ * Match by prefix on the OpenRouter model id. Excludes openai/gpt-4o,
565
+ * openai/gpt-4.1, openai/chatgpt-* — those continue on chat-completions.
566
+ */
567
+ isOpenAIReasoningModelOnOpenRouter(activeModel) {
568
+ if (this.provider !== "openrouter") return false;
569
+ return activeModel.startsWith("openai/o1") || activeModel.startsWith("openai/o3") || activeModel.startsWith("openai/o4") || activeModel.startsWith("openai/gpt-5");
570
+ }
571
+ /**
572
+ * Convert ActionDefinition[] (the chat-completions tool shape used by the
573
+ * adapter) to the Responses API tool shape.
574
+ */
575
+ buildResponsesToolsFromActions(actions) {
576
+ if (!actions || actions.length === 0) return void 0;
577
+ const formatted = formatTools(actions);
578
+ return formatted.map((t) => ({
579
+ type: "function",
580
+ name: t.function.name,
581
+ description: t.function.description,
582
+ parameters: t.function.parameters
583
+ }));
584
+ }
585
+ /**
586
+ * Streaming Responses API path for OpenAI reasoning models on OpenRouter.
587
+ *
588
+ * Maps Responses API SSE events back to the same StreamEvent shapes the
589
+ * chat-completions path emits, so downstream consumers (processChunk.ts,
590
+ * frontend tool handlers, plan approval, specialist delegations) see
591
+ * identical events regardless of which path produced them.
592
+ *
593
+ * response.reasoning_summary_text.delta → thinking:start (once) + thinking:delta
594
+ * response.output_text.delta → message:delta
595
+ * response.output_item.added (function_call) → action:start (queued buffer)
596
+ * response.function_call_arguments.delta → action:args (progressive)
597
+ * response.output_item.done (function_call) → final action:args + action:end
598
+ * response.completed → message:end + done(usage)
599
+ * response.error → error
600
+ */
601
+ async *streamWithResponsesAPI(request, activeModel, messageId) {
602
+ const client = await this.getClient();
603
+ const maxTokensValue = request.config?.maxTokens ?? this.config.maxTokens;
604
+ const payload = {
605
+ model: activeModel,
606
+ input: this.buildResponsesInput(request),
607
+ stream: true,
608
+ reasoning: {
609
+ effort: request.config?.reasoningEffort ?? "medium",
610
+ summary: "auto"
611
+ }
612
+ };
613
+ if (request.systemPrompt) payload.instructions = request.systemPrompt;
614
+ if (typeof maxTokensValue === "number")
615
+ payload.max_output_tokens = maxTokensValue;
616
+ const tools = this.buildResponsesToolsFromActions(request.actions);
617
+ if (tools && tools.length > 0) payload.tools = tools;
618
+ logProviderPayload(
619
+ "openai",
620
+ "responses-api request payload",
621
+ payload,
622
+ request.debug
623
+ );
624
+ let stream;
625
+ try {
626
+ stream = await client.responses.create(payload);
627
+ } catch (error) {
628
+ yield {
629
+ type: "error",
630
+ message: error instanceof Error ? error.message : "Unknown error",
631
+ code: "OPENAI_RESPONSES_ERROR"
632
+ };
633
+ return;
634
+ }
635
+ const toolBuffers = /* @__PURE__ */ new Map();
636
+ const itemIdToCallId = /* @__PURE__ */ new Map();
637
+ let usage;
638
+ let reasoningStarted = false;
639
+ let textStarted = false;
640
+ let finishEmitted = false;
641
+ const resolveCallId = (evt) => {
642
+ if (evt?.call_id) return evt.call_id;
643
+ if (evt?.item_id) return itemIdToCallId.get(evt.item_id) ?? evt.item_id;
644
+ if (evt?.item?.call_id) return evt.item.call_id;
645
+ if (evt?.item?.id) return evt.item.id;
646
+ return "";
647
+ };
648
+ try {
649
+ for await (const evt of stream) {
650
+ logProviderPayload(
651
+ "openai",
652
+ "responses-api stream chunk",
653
+ evt,
654
+ request.debug
655
+ );
656
+ if (request.signal?.aborted) break;
657
+ const t = evt?.type ?? "";
658
+ if (t === "response.reasoning_summary_text.delta") {
659
+ const delta = evt.delta ?? "";
660
+ if (!delta) continue;
661
+ if (!reasoningStarted) {
662
+ yield { type: "thinking:start" };
663
+ reasoningStarted = true;
664
+ }
665
+ yield { type: "thinking:delta", content: delta };
666
+ continue;
667
+ }
668
+ if (t === "response.reasoning_summary_text.done" || t === "response.reasoning.done") {
669
+ continue;
670
+ }
671
+ if (t === "response.output_text.delta") {
672
+ const text = evt.delta ?? "";
673
+ if (!text) continue;
674
+ if (reasoningStarted && !textStarted) {
675
+ yield { type: "thinking:end" };
676
+ textStarted = true;
677
+ }
678
+ yield { type: "message:delta", content: text };
679
+ continue;
680
+ }
681
+ if (t === "response.output_item.added") {
682
+ const item = evt.item;
683
+ if (item?.type === "function_call") {
684
+ const callId = item.call_id ?? item.id ?? "";
685
+ const itemId = item.id ?? callId;
686
+ if (callId) {
687
+ if (itemId && itemId !== callId) {
688
+ itemIdToCallId.set(itemId, callId);
689
+ }
690
+ if (!toolBuffers.has(callId)) {
691
+ toolBuffers.set(callId, {
692
+ id: callId,
693
+ name: item.name ?? "",
694
+ arguments: item.arguments ?? "",
695
+ emittedStart: false
696
+ });
697
+ }
698
+ const buf = toolBuffers.get(callId);
699
+ if (buf.name && !buf.emittedStart) {
700
+ yield { type: "action:start", id: buf.id, name: buf.name };
701
+ buf.emittedStart = true;
702
+ }
703
+ }
704
+ }
705
+ continue;
706
+ }
707
+ if (t === "response.function_call_arguments.delta") {
708
+ const callId = resolveCallId(evt);
709
+ const delta = evt.delta ?? "";
710
+ if (!callId || !delta) continue;
711
+ let buf = toolBuffers.get(callId);
712
+ if (!buf) {
713
+ buf = { id: callId, name: "", arguments: "", emittedStart: false };
714
+ toolBuffers.set(callId, buf);
715
+ }
716
+ buf.arguments += delta;
717
+ if (buf.emittedStart) {
718
+ yield {
719
+ type: "action:args",
720
+ id: buf.id,
721
+ args: buf.arguments
722
+ };
723
+ }
724
+ continue;
725
+ }
726
+ if (t === "response.output_item.done") {
727
+ const item = evt.item;
728
+ if (item?.type === "function_call") {
729
+ const callId = item.call_id ?? item.id ?? "";
730
+ const buf = toolBuffers.get(callId);
731
+ const name = buf?.name || item.name || "";
732
+ const argsStr = buf?.arguments || item.arguments || "{}";
733
+ if (callId && name) {
734
+ if (!buf?.emittedStart) {
735
+ yield { type: "action:start", id: callId, name };
736
+ }
737
+ yield {
738
+ type: "action:args",
739
+ id: callId,
740
+ args: argsStr
741
+ };
742
+ yield {
743
+ type: "action:end",
744
+ id: callId,
745
+ name
746
+ };
747
+ }
748
+ toolBuffers.delete(callId);
749
+ }
750
+ continue;
751
+ }
752
+ if (t === "response.completed") {
753
+ const u = evt.response?.usage;
754
+ if (u) {
755
+ usage = {
756
+ prompt_tokens: u.input_tokens ?? 0,
757
+ completion_tokens: u.output_tokens ?? 0,
758
+ total_tokens: u.total_tokens ?? (u.input_tokens ?? 0) + (u.output_tokens ?? 0)
759
+ };
760
+ }
761
+ for (const buf of toolBuffers.values()) {
762
+ if (!buf.id || !buf.name) continue;
763
+ if (!buf.emittedStart) {
764
+ yield { type: "action:start", id: buf.id, name: buf.name };
765
+ }
766
+ yield {
767
+ type: "action:args",
768
+ id: buf.id,
769
+ args: buf.arguments || "{}"
770
+ };
771
+ yield { type: "action:end", id: buf.id, name: buf.name };
772
+ }
773
+ toolBuffers.clear();
774
+ if (reasoningStarted && !textStarted) {
775
+ yield { type: "thinking:end" };
776
+ }
777
+ yield { type: "message:end" };
778
+ yield { type: "done", usage };
779
+ finishEmitted = true;
780
+ continue;
781
+ }
782
+ if (t === "response.error" || t === "error") {
783
+ const msg = evt.error?.message || evt.message || "Responses API error";
784
+ yield {
785
+ type: "error",
786
+ message: msg,
787
+ code: "OPENAI_RESPONSES_ERROR"
788
+ };
789
+ return;
790
+ }
791
+ }
792
+ } catch (error) {
793
+ yield {
794
+ type: "error",
795
+ message: error instanceof Error ? error.message : "Unknown error",
796
+ code: "OPENAI_RESPONSES_ERROR"
797
+ };
798
+ return;
799
+ }
800
+ if (!finishEmitted) {
801
+ if (reasoningStarted && !textStarted) {
802
+ yield { type: "thinking:end" };
803
+ }
804
+ yield { type: "message:end" };
805
+ yield { type: "done", usage };
806
+ }
807
+ }
557
808
  async completeWithResponses(request) {
558
809
  const client = await this.getClient();
559
810
  const openaiToolOptions = request.providerToolOptions?.openai;
@@ -687,16 +938,37 @@ var OpenAIAdapter = class _OpenAIAdapter {
687
938
  name: openaiToolOptions.toolChoice.name
688
939
  }
689
940
  } : openaiToolOptions?.toolChoice;
941
+ const isOpenRouter = this.provider === "openrouter";
942
+ const activeModel = request.config?.model || this.model;
943
+ const modelSlug = activeModel.replace("openai/", "");
944
+ const isOSeries = /^o[1-9]/.test(modelSlug);
945
+ const isOpenAIOnOpenRouter = isOpenRouter && activeModel.startsWith("openai/");
946
+ if (!this.config.disableThinking && this.isOpenAIReasoningModelOnOpenRouter(activeModel)) {
947
+ yield* this.streamWithResponsesAPI(request, activeModel, messageId);
948
+ return;
949
+ }
950
+ const maxTokensValue = request.config?.maxTokens ?? this.config.maxTokens;
690
951
  const payload = {
691
- model: request.config?.model || this.model,
952
+ model: activeModel,
692
953
  messages,
693
954
  tools: tools.length > 0 ? tools : void 0,
694
955
  tool_choice: tools.length > 0 ? toolChoice : void 0,
695
956
  parallel_tool_calls: tools.length > 0 ? openaiToolOptions?.parallelToolCalls : void 0,
696
- temperature: request.config?.temperature ?? this.config.temperature,
697
- max_tokens: request.config?.maxTokens ?? this.config.maxTokens,
698
957
  stream: true,
699
- stream_options: { include_usage: true }
958
+ stream_options: { include_usage: true },
959
+ // o-series: use max_completion_tokens + reasoning_effort, no temperature
960
+ // regular models: use max_tokens + temperature
961
+ ...isOSeries ? {
962
+ max_completion_tokens: maxTokensValue,
963
+ reasoning_effort: request.config?.reasoningEffort ?? "medium"
964
+ } : {
965
+ temperature: request.config?.temperature ?? this.config.temperature,
966
+ max_tokens: maxTokensValue
967
+ },
968
+ // Non-OpenAI OpenRouter models support OR's reasoning/include_reasoning params.
969
+ // When disableThinking=true we must explicitly send include_reasoning:false because
970
+ // models like Qwen3 and DeepSeek-R1 reason by default even without the reasoning param.
971
+ ...isOpenRouter && !isOpenAIOnOpenRouter ? this.config.disableThinking ? { include_reasoning: false } : { reasoning: { max_tokens: 8e3 }, include_reasoning: true } : {}
700
972
  };
701
973
  logProviderPayload("openai", "request payload", payload, request.debug);
702
974
  const stream = await client.chat.completions.create(payload);
@@ -704,6 +976,7 @@ var OpenAIAdapter = class _OpenAIAdapter {
704
976
  const collectedCitations = [];
705
977
  let citationIndex = 0;
706
978
  let usage;
979
+ let adapterReasoningStarted = false;
707
980
  for await (const chunk of stream) {
708
981
  logProviderPayload("openai", "stream chunk", chunk, request.debug);
709
982
  if (request.signal?.aborted) {
@@ -714,6 +987,22 @@ var OpenAIAdapter = class _OpenAIAdapter {
714
987
  if (delta?.content) {
715
988
  yield { type: "message:delta", content: delta.content };
716
989
  }
990
+ if (isOpenRouter) {
991
+ const rc = delta?.reasoning_content ?? delta?.reasoning ?? null;
992
+ if (rc) {
993
+ const rcText = typeof rc === "string" ? rc : Array.isArray(rc) && rc[0]?.text ? rc[0].text : "";
994
+ if (rcText) {
995
+ if (!adapterReasoningStarted) {
996
+ yield { type: "thinking:start" };
997
+ adapterReasoningStarted = true;
998
+ }
999
+ yield { type: "thinking:delta", content: rcText };
1000
+ }
1001
+ } else if (adapterReasoningStarted && (delta?.content || choice?.finish_reason)) {
1002
+ yield { type: "thinking:end" };
1003
+ adapterReasoningStarted = false;
1004
+ }
1005
+ }
717
1006
  const annotations = delta?.annotations;
718
1007
  if (annotations && annotations.length > 0) {
719
1008
  for (const annotation of annotations) {
@@ -761,6 +1050,11 @@ var OpenAIAdapter = class _OpenAIAdapter {
761
1050
  };
762
1051
  } else if (currentToolCall && toolCall.function?.arguments) {
763
1052
  currentToolCall.arguments += toolCall.function.arguments;
1053
+ yield {
1054
+ type: "action:args",
1055
+ id: currentToolCall.id,
1056
+ args: currentToolCall.arguments
1057
+ };
764
1058
  }
765
1059
  }
766
1060
  }
@@ -836,15 +1130,24 @@ var OpenAIAdapter = class _OpenAIAdapter {
836
1130
  name: openaiToolOptions.toolChoice.name
837
1131
  }
838
1132
  } : openaiToolOptions?.toolChoice;
1133
+ const activeModel2 = request.config?.model || this.model;
1134
+ const modelSlug2 = activeModel2.replace("openai/", "");
1135
+ const isOSeries2 = /^o[1-9]/.test(modelSlug2);
1136
+ const maxTokensValue2 = request.config?.maxTokens ?? this.config.maxTokens;
839
1137
  const payload = {
840
- model: request.config?.model || this.model,
1138
+ model: activeModel2,
841
1139
  messages,
842
1140
  tools: tools.length > 0 ? tools : void 0,
843
1141
  tool_choice: tools.length > 0 ? toolChoice : void 0,
844
1142
  parallel_tool_calls: tools.length > 0 ? openaiToolOptions?.parallelToolCalls : void 0,
845
- temperature: request.config?.temperature ?? this.config.temperature,
846
- max_tokens: request.config?.maxTokens ?? this.config.maxTokens,
847
- stream: false
1143
+ stream: false,
1144
+ ...isOSeries2 ? {
1145
+ max_completion_tokens: maxTokensValue2,
1146
+ reasoning_effort: request.config?.reasoningEffort ?? "medium"
1147
+ } : {
1148
+ temperature: request.config?.temperature ?? this.config.temperature,
1149
+ max_tokens: maxTokensValue2
1150
+ }
848
1151
  };
849
1152
  logProviderPayload("openai", "request payload", payload, request.debug);
850
1153
  const response = await client.chat.completions.create(payload);