@yourgpt/llm-sdk 2.1.9 → 2.1.10-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/dist/adapters/index.d.mts +38 -4
  2. package/dist/adapters/index.d.ts +38 -4
  3. package/dist/adapters/index.js +318 -8
  4. package/dist/adapters/index.mjs +318 -8
  5. package/dist/{base-iGi9Va6Z.d.ts → base-DN1EfKnE.d.mts} +2 -1
  6. package/dist/{base-D-U61JaB.d.mts → base-DuUNxtVg.d.ts} +2 -1
  7. package/dist/fallback/index.d.mts +4 -4
  8. package/dist/fallback/index.d.ts +4 -4
  9. package/dist/index.d.mts +7 -7
  10. package/dist/index.d.ts +7 -7
  11. package/dist/index.js +43 -23
  12. package/dist/index.mjs +43 -23
  13. package/dist/providers/anthropic/index.d.mts +3 -3
  14. package/dist/providers/anthropic/index.d.ts +3 -3
  15. package/dist/providers/anthropic/index.js +17 -0
  16. package/dist/providers/anthropic/index.mjs +17 -0
  17. package/dist/providers/azure/index.d.mts +3 -3
  18. package/dist/providers/azure/index.d.ts +3 -3
  19. package/dist/providers/fireworks/index.d.mts +1 -1
  20. package/dist/providers/fireworks/index.d.ts +1 -1
  21. package/dist/providers/google/index.d.mts +3 -3
  22. package/dist/providers/google/index.d.ts +3 -3
  23. package/dist/providers/google/index.js +311 -8
  24. package/dist/providers/google/index.mjs +311 -8
  25. package/dist/providers/ollama/index.d.mts +4 -4
  26. package/dist/providers/ollama/index.d.ts +4 -4
  27. package/dist/providers/openai/index.d.mts +3 -3
  28. package/dist/providers/openai/index.d.ts +3 -3
  29. package/dist/providers/openai/index.js +321 -8
  30. package/dist/providers/openai/index.mjs +321 -8
  31. package/dist/providers/openrouter/index.d.mts +7 -3
  32. package/dist/providers/openrouter/index.d.ts +7 -3
  33. package/dist/providers/openrouter/index.js +601 -11
  34. package/dist/providers/openrouter/index.mjs +601 -11
  35. package/dist/providers/togetherai/index.d.mts +3 -3
  36. package/dist/providers/togetherai/index.d.ts +3 -3
  37. package/dist/providers/togetherai/index.js +311 -8
  38. package/dist/providers/togetherai/index.mjs +311 -8
  39. package/dist/providers/xai/index.d.mts +3 -3
  40. package/dist/providers/xai/index.d.ts +3 -3
  41. package/dist/providers/xai/index.js +311 -8
  42. package/dist/providers/xai/index.mjs +311 -8
  43. package/dist/{types-D4YfrQJR.d.mts → types-BNCmlJMs.d.mts} +1 -1
  44. package/dist/{types-DRqxMIjF.d.mts → types-CMMQ8s2O.d.mts} +1 -1
  45. package/dist/{types-CR8mi9I0.d.ts → types-CMvvDo-E.d.mts} +12 -1
  46. package/dist/{types-CR8mi9I0.d.mts → types-CMvvDo-E.d.ts} +12 -1
  47. package/dist/{types-BctsnC3g.d.ts → types-DhktekQ3.d.ts} +1 -1
  48. package/dist/{types-38yolWJn.d.ts → types-Pj-vpmoT.d.ts} +1 -1
  49. package/dist/yourgpt/index.d.mts +1 -1
  50. package/dist/yourgpt/index.d.ts +1 -1
  51. package/package.json +1 -1
@@ -1,7 +1,7 @@
1
- import { L as LanguageModel } from '../../types-CR8mi9I0.mjs';
2
- import { A as AIProvider } from '../../types-DRqxMIjF.mjs';
1
+ import { L as LanguageModel } from '../../types-CMvvDo-E.mjs';
2
+ import { A as AIProvider } from '../../types-CMMQ8s2O.mjs';
3
3
  import 'zod';
4
- import '../../base-D-U61JaB.mjs';
4
+ import '../../base-DN1EfKnE.mjs';
5
5
 
6
6
  /**
7
7
  * Together AI Provider
@@ -1,7 +1,7 @@
1
- import { L as LanguageModel } from '../../types-CR8mi9I0.js';
2
- import { A as AIProvider } from '../../types-BctsnC3g.js';
1
+ import { L as LanguageModel } from '../../types-CMvvDo-E.js';
2
+ import { A as AIProvider } from '../../types-DhktekQ3.js';
3
3
  import 'zod';
4
- import '../../base-iGi9Va6Z.js';
4
+ import '../../base-DuUNxtVg.js';
5
5
 
6
6
  /**
7
7
  * Together AI Provider
@@ -425,6 +425,7 @@ var OpenAIAdapter = class _OpenAIAdapter {
425
425
  if (baseUrl.includes("generativelanguage.googleapis.com")) return "google";
426
426
  if (baseUrl.includes("x.ai")) return "xai";
427
427
  if (baseUrl.includes("azure")) return "azure";
428
+ if (baseUrl.includes("openrouter.ai")) return "openrouter";
428
429
  return "openai";
429
430
  }
430
431
  async getClient() {
@@ -524,6 +525,256 @@ var OpenAIAdapter = class _OpenAIAdapter {
524
525
  rawResponse: response
525
526
  };
526
527
  }
528
+ /**
529
+ * OpenAI reasoning models on OpenRouter (o1/o3/o4/gpt-5 family) hide their
530
+ * reasoning content on the chat-completions endpoint. To surface reasoning
531
+ * SUMMARIES (not raw CoT, which OpenAI never exposes) we have to use the
532
+ * Responses API, which streams `response.reasoning_summary_text.delta` events.
533
+ *
534
+ * Match by prefix on the OpenRouter model id. Excludes openai/gpt-4o,
535
+ * openai/gpt-4.1, openai/chatgpt-* — those continue on chat-completions.
536
+ */
537
+ isOpenAIReasoningModelOnOpenRouter(activeModel) {
538
+ if (this.provider !== "openrouter") return false;
539
+ return activeModel.startsWith("openai/o1") || activeModel.startsWith("openai/o3") || activeModel.startsWith("openai/o4") || activeModel.startsWith("openai/gpt-5");
540
+ }
541
+ /**
542
+ * Convert ActionDefinition[] (the chat-completions tool shape used by the
543
+ * adapter) to the Responses API tool shape.
544
+ */
545
+ buildResponsesToolsFromActions(actions) {
546
+ if (!actions || actions.length === 0) return void 0;
547
+ const formatted = formatTools(actions);
548
+ return formatted.map((t) => ({
549
+ type: "function",
550
+ name: t.function.name,
551
+ description: t.function.description,
552
+ parameters: t.function.parameters
553
+ }));
554
+ }
555
+ /**
556
+ * Streaming Responses API path for OpenAI reasoning models on OpenRouter.
557
+ *
558
+ * Maps Responses API SSE events back to the same StreamEvent shapes the
559
+ * chat-completions path emits, so downstream consumers (processChunk.ts,
560
+ * frontend tool handlers, plan approval, specialist delegations) see
561
+ * identical events regardless of which path produced them.
562
+ *
563
+ * response.reasoning_summary_text.delta → thinking:start (once) + thinking:delta
564
+ * response.output_text.delta → message:delta
565
+ * response.output_item.added (function_call) → action:start (queued buffer)
566
+ * response.function_call_arguments.delta → action:args (progressive)
567
+ * response.output_item.done (function_call) → final action:args + action:end
568
+ * response.completed → message:end + done(usage)
569
+ * response.error → error
570
+ */
571
+ async *streamWithResponsesAPI(request, activeModel, messageId) {
572
+ const client = await this.getClient();
573
+ const maxTokensValue = request.config?.maxTokens ?? this.config.maxTokens;
574
+ const payload = {
575
+ model: activeModel,
576
+ input: this.buildResponsesInput(request),
577
+ stream: true,
578
+ reasoning: {
579
+ effort: request.config?.reasoningEffort ?? "medium",
580
+ summary: "auto"
581
+ }
582
+ };
583
+ if (request.systemPrompt) payload.instructions = request.systemPrompt;
584
+ if (typeof maxTokensValue === "number")
585
+ payload.max_output_tokens = maxTokensValue;
586
+ const tools = this.buildResponsesToolsFromActions(request.actions);
587
+ if (tools && tools.length > 0) payload.tools = tools;
588
+ logProviderPayload(
589
+ "openai",
590
+ "responses-api request payload",
591
+ payload,
592
+ request.debug
593
+ );
594
+ let stream;
595
+ try {
596
+ stream = await client.responses.create(payload);
597
+ } catch (error) {
598
+ yield {
599
+ type: "error",
600
+ message: error instanceof Error ? error.message : "Unknown error",
601
+ code: "OPENAI_RESPONSES_ERROR"
602
+ };
603
+ return;
604
+ }
605
+ const toolBuffers = /* @__PURE__ */ new Map();
606
+ const itemIdToCallId = /* @__PURE__ */ new Map();
607
+ let usage;
608
+ let reasoningStarted = false;
609
+ let textStarted = false;
610
+ let finishEmitted = false;
611
+ const resolveCallId = (evt) => {
612
+ if (evt?.call_id) return evt.call_id;
613
+ if (evt?.item_id) return itemIdToCallId.get(evt.item_id) ?? evt.item_id;
614
+ if (evt?.item?.call_id) return evt.item.call_id;
615
+ if (evt?.item?.id) return evt.item.id;
616
+ return "";
617
+ };
618
+ try {
619
+ for await (const evt of stream) {
620
+ logProviderPayload(
621
+ "openai",
622
+ "responses-api stream chunk",
623
+ evt,
624
+ request.debug
625
+ );
626
+ if (request.signal?.aborted) break;
627
+ const t = evt?.type ?? "";
628
+ if (t === "response.reasoning_summary_text.delta") {
629
+ const delta = evt.delta ?? "";
630
+ if (!delta) continue;
631
+ if (!reasoningStarted) {
632
+ yield { type: "thinking:start" };
633
+ reasoningStarted = true;
634
+ }
635
+ yield { type: "thinking:delta", content: delta };
636
+ continue;
637
+ }
638
+ if (t === "response.reasoning_summary_text.done" || t === "response.reasoning.done") {
639
+ continue;
640
+ }
641
+ if (t === "response.output_text.delta") {
642
+ const text = evt.delta ?? "";
643
+ if (!text) continue;
644
+ if (reasoningStarted && !textStarted) {
645
+ yield { type: "thinking:end" };
646
+ textStarted = true;
647
+ }
648
+ yield { type: "message:delta", content: text };
649
+ continue;
650
+ }
651
+ if (t === "response.output_item.added") {
652
+ const item = evt.item;
653
+ if (item?.type === "function_call") {
654
+ const callId = item.call_id ?? item.id ?? "";
655
+ const itemId = item.id ?? callId;
656
+ if (callId) {
657
+ if (itemId && itemId !== callId) {
658
+ itemIdToCallId.set(itemId, callId);
659
+ }
660
+ if (!toolBuffers.has(callId)) {
661
+ toolBuffers.set(callId, {
662
+ id: callId,
663
+ name: item.name ?? "",
664
+ arguments: item.arguments ?? "",
665
+ emittedStart: false
666
+ });
667
+ }
668
+ const buf = toolBuffers.get(callId);
669
+ if (buf.name && !buf.emittedStart) {
670
+ yield { type: "action:start", id: buf.id, name: buf.name };
671
+ buf.emittedStart = true;
672
+ }
673
+ }
674
+ }
675
+ continue;
676
+ }
677
+ if (t === "response.function_call_arguments.delta") {
678
+ const callId = resolveCallId(evt);
679
+ const delta = evt.delta ?? "";
680
+ if (!callId || !delta) continue;
681
+ let buf = toolBuffers.get(callId);
682
+ if (!buf) {
683
+ buf = { id: callId, name: "", arguments: "", emittedStart: false };
684
+ toolBuffers.set(callId, buf);
685
+ }
686
+ buf.arguments += delta;
687
+ if (buf.emittedStart) {
688
+ yield {
689
+ type: "action:args",
690
+ id: buf.id,
691
+ args: buf.arguments
692
+ };
693
+ }
694
+ continue;
695
+ }
696
+ if (t === "response.output_item.done") {
697
+ const item = evt.item;
698
+ if (item?.type === "function_call") {
699
+ const callId = item.call_id ?? item.id ?? "";
700
+ const buf = toolBuffers.get(callId);
701
+ const name = buf?.name || item.name || "";
702
+ const argsStr = buf?.arguments || item.arguments || "{}";
703
+ if (callId && name) {
704
+ if (!buf?.emittedStart) {
705
+ yield { type: "action:start", id: callId, name };
706
+ }
707
+ yield {
708
+ type: "action:args",
709
+ id: callId,
710
+ args: argsStr
711
+ };
712
+ yield {
713
+ type: "action:end",
714
+ id: callId,
715
+ name
716
+ };
717
+ }
718
+ toolBuffers.delete(callId);
719
+ }
720
+ continue;
721
+ }
722
+ if (t === "response.completed") {
723
+ const u = evt.response?.usage;
724
+ if (u) {
725
+ usage = {
726
+ prompt_tokens: u.input_tokens ?? 0,
727
+ completion_tokens: u.output_tokens ?? 0,
728
+ total_tokens: u.total_tokens ?? (u.input_tokens ?? 0) + (u.output_tokens ?? 0)
729
+ };
730
+ }
731
+ for (const buf of toolBuffers.values()) {
732
+ if (!buf.id || !buf.name) continue;
733
+ if (!buf.emittedStart) {
734
+ yield { type: "action:start", id: buf.id, name: buf.name };
735
+ }
736
+ yield {
737
+ type: "action:args",
738
+ id: buf.id,
739
+ args: buf.arguments || "{}"
740
+ };
741
+ yield { type: "action:end", id: buf.id, name: buf.name };
742
+ }
743
+ toolBuffers.clear();
744
+ if (reasoningStarted && !textStarted) {
745
+ yield { type: "thinking:end" };
746
+ }
747
+ yield { type: "message:end" };
748
+ yield { type: "done", usage };
749
+ finishEmitted = true;
750
+ continue;
751
+ }
752
+ if (t === "response.error" || t === "error") {
753
+ const msg = evt.error?.message || evt.message || "Responses API error";
754
+ yield {
755
+ type: "error",
756
+ message: msg,
757
+ code: "OPENAI_RESPONSES_ERROR"
758
+ };
759
+ return;
760
+ }
761
+ }
762
+ } catch (error) {
763
+ yield {
764
+ type: "error",
765
+ message: error instanceof Error ? error.message : "Unknown error",
766
+ code: "OPENAI_RESPONSES_ERROR"
767
+ };
768
+ return;
769
+ }
770
+ if (!finishEmitted) {
771
+ if (reasoningStarted && !textStarted) {
772
+ yield { type: "thinking:end" };
773
+ }
774
+ yield { type: "message:end" };
775
+ yield { type: "done", usage };
776
+ }
777
+ }
527
778
  async completeWithResponses(request) {
528
779
  const client = await this.getClient();
529
780
  const openaiToolOptions = request.providerToolOptions?.openai;
@@ -657,16 +908,37 @@ var OpenAIAdapter = class _OpenAIAdapter {
657
908
  name: openaiToolOptions.toolChoice.name
658
909
  }
659
910
  } : openaiToolOptions?.toolChoice;
911
+ const isOpenRouter = this.provider === "openrouter";
912
+ const activeModel = request.config?.model || this.model;
913
+ const modelSlug = activeModel.replace("openai/", "");
914
+ const isOSeries = /^o[1-9]/.test(modelSlug);
915
+ const isOpenAIOnOpenRouter = isOpenRouter && activeModel.startsWith("openai/");
916
+ if (!this.config.disableThinking && this.isOpenAIReasoningModelOnOpenRouter(activeModel)) {
917
+ yield* this.streamWithResponsesAPI(request, activeModel, messageId);
918
+ return;
919
+ }
920
+ const maxTokensValue = request.config?.maxTokens ?? this.config.maxTokens;
660
921
  const payload = {
661
- model: request.config?.model || this.model,
922
+ model: activeModel,
662
923
  messages,
663
924
  tools: tools.length > 0 ? tools : void 0,
664
925
  tool_choice: tools.length > 0 ? toolChoice : void 0,
665
926
  parallel_tool_calls: tools.length > 0 ? openaiToolOptions?.parallelToolCalls : void 0,
666
- temperature: request.config?.temperature ?? this.config.temperature,
667
- max_tokens: request.config?.maxTokens ?? this.config.maxTokens,
668
927
  stream: true,
669
- stream_options: { include_usage: true }
928
+ stream_options: { include_usage: true },
929
+ // o-series: use max_completion_tokens + reasoning_effort, no temperature
930
+ // regular models: use max_tokens + temperature
931
+ ...isOSeries ? {
932
+ max_completion_tokens: maxTokensValue,
933
+ reasoning_effort: request.config?.reasoningEffort ?? "medium"
934
+ } : {
935
+ temperature: request.config?.temperature ?? this.config.temperature,
936
+ max_tokens: maxTokensValue
937
+ },
938
+ // Non-OpenAI OpenRouter models support OR's reasoning/include_reasoning params.
939
+ // When disableThinking=true we must explicitly send include_reasoning:false because
940
+ // models like Qwen3 and DeepSeek-R1 reason by default even without the reasoning param.
941
+ ...isOpenRouter && !isOpenAIOnOpenRouter ? this.config.disableThinking ? { include_reasoning: false } : { reasoning: { max_tokens: 8e3 }, include_reasoning: true } : {}
670
942
  };
671
943
  logProviderPayload("openai", "request payload", payload, request.debug);
672
944
  const stream = await client.chat.completions.create(payload);
@@ -674,6 +946,7 @@ var OpenAIAdapter = class _OpenAIAdapter {
674
946
  const collectedCitations = [];
675
947
  let citationIndex = 0;
676
948
  let usage;
949
+ let adapterReasoningStarted = false;
677
950
  for await (const chunk of stream) {
678
951
  logProviderPayload("openai", "stream chunk", chunk, request.debug);
679
952
  if (request.signal?.aborted) {
@@ -684,6 +957,22 @@ var OpenAIAdapter = class _OpenAIAdapter {
684
957
  if (delta?.content) {
685
958
  yield { type: "message:delta", content: delta.content };
686
959
  }
960
+ if (isOpenRouter) {
961
+ const rc = delta?.reasoning_content ?? delta?.reasoning ?? null;
962
+ if (rc) {
963
+ const rcText = typeof rc === "string" ? rc : Array.isArray(rc) && rc[0]?.text ? rc[0].text : "";
964
+ if (rcText) {
965
+ if (!adapterReasoningStarted) {
966
+ yield { type: "thinking:start" };
967
+ adapterReasoningStarted = true;
968
+ }
969
+ yield { type: "thinking:delta", content: rcText };
970
+ }
971
+ } else if (adapterReasoningStarted && (delta?.content || choice?.finish_reason)) {
972
+ yield { type: "thinking:end" };
973
+ adapterReasoningStarted = false;
974
+ }
975
+ }
687
976
  const annotations = delta?.annotations;
688
977
  if (annotations && annotations.length > 0) {
689
978
  for (const annotation of annotations) {
@@ -731,6 +1020,11 @@ var OpenAIAdapter = class _OpenAIAdapter {
731
1020
  };
732
1021
  } else if (currentToolCall && toolCall.function?.arguments) {
733
1022
  currentToolCall.arguments += toolCall.function.arguments;
1023
+ yield {
1024
+ type: "action:args",
1025
+ id: currentToolCall.id,
1026
+ args: currentToolCall.arguments
1027
+ };
734
1028
  }
735
1029
  }
736
1030
  }
@@ -806,15 +1100,24 @@ var OpenAIAdapter = class _OpenAIAdapter {
806
1100
  name: openaiToolOptions.toolChoice.name
807
1101
  }
808
1102
  } : openaiToolOptions?.toolChoice;
1103
+ const activeModel2 = request.config?.model || this.model;
1104
+ const modelSlug2 = activeModel2.replace("openai/", "");
1105
+ const isOSeries2 = /^o[1-9]/.test(modelSlug2);
1106
+ const maxTokensValue2 = request.config?.maxTokens ?? this.config.maxTokens;
809
1107
  const payload = {
810
- model: request.config?.model || this.model,
1108
+ model: activeModel2,
811
1109
  messages,
812
1110
  tools: tools.length > 0 ? tools : void 0,
813
1111
  tool_choice: tools.length > 0 ? toolChoice : void 0,
814
1112
  parallel_tool_calls: tools.length > 0 ? openaiToolOptions?.parallelToolCalls : void 0,
815
- temperature: request.config?.temperature ?? this.config.temperature,
816
- max_tokens: request.config?.maxTokens ?? this.config.maxTokens,
817
- stream: false
1113
+ stream: false,
1114
+ ...isOSeries2 ? {
1115
+ max_completion_tokens: maxTokensValue2,
1116
+ reasoning_effort: request.config?.reasoningEffort ?? "medium"
1117
+ } : {
1118
+ temperature: request.config?.temperature ?? this.config.temperature,
1119
+ max_tokens: maxTokensValue2
1120
+ }
818
1121
  };
819
1122
  logProviderPayload("openai", "request payload", payload, request.debug);
820
1123
  const response = await client.chat.completions.create(payload);