@yourgpt/llm-sdk 2.1.8 → 2.1.10-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/dist/adapters/index.d.mts +38 -4
  2. package/dist/adapters/index.d.ts +38 -4
  3. package/dist/adapters/index.js +318 -8
  4. package/dist/adapters/index.mjs +318 -8
  5. package/dist/{base-iGi9Va6Z.d.ts → base-DN1EfKnE.d.mts} +2 -1
  6. package/dist/{base-D-U61JaB.d.mts → base-DuUNxtVg.d.ts} +2 -1
  7. package/dist/fallback/index.d.mts +4 -4
  8. package/dist/fallback/index.d.ts +4 -4
  9. package/dist/index.d.mts +7 -7
  10. package/dist/index.d.ts +7 -7
  11. package/dist/index.js +43 -23
  12. package/dist/index.mjs +43 -23
  13. package/dist/providers/anthropic/index.d.mts +3 -3
  14. package/dist/providers/anthropic/index.d.ts +3 -3
  15. package/dist/providers/anthropic/index.js +17 -0
  16. package/dist/providers/anthropic/index.mjs +17 -0
  17. package/dist/providers/azure/index.d.mts +3 -3
  18. package/dist/providers/azure/index.d.ts +3 -3
  19. package/dist/providers/fireworks/index.d.mts +1 -1
  20. package/dist/providers/fireworks/index.d.ts +1 -1
  21. package/dist/providers/google/index.d.mts +3 -3
  22. package/dist/providers/google/index.d.ts +3 -3
  23. package/dist/providers/google/index.js +311 -8
  24. package/dist/providers/google/index.mjs +311 -8
  25. package/dist/providers/ollama/index.d.mts +4 -4
  26. package/dist/providers/ollama/index.d.ts +4 -4
  27. package/dist/providers/openai/index.d.mts +3 -3
  28. package/dist/providers/openai/index.d.ts +3 -3
  29. package/dist/providers/openai/index.js +321 -8
  30. package/dist/providers/openai/index.mjs +321 -8
  31. package/dist/providers/openrouter/index.d.mts +7 -3
  32. package/dist/providers/openrouter/index.d.ts +7 -3
  33. package/dist/providers/openrouter/index.js +601 -11
  34. package/dist/providers/openrouter/index.mjs +601 -11
  35. package/dist/providers/togetherai/index.d.mts +61 -2
  36. package/dist/providers/togetherai/index.d.ts +61 -2
  37. package/dist/providers/togetherai/index.js +1030 -2
  38. package/dist/providers/togetherai/index.mjs +1029 -2
  39. package/dist/providers/xai/index.d.mts +3 -3
  40. package/dist/providers/xai/index.d.ts +3 -3
  41. package/dist/providers/xai/index.js +311 -8
  42. package/dist/providers/xai/index.mjs +311 -8
  43. package/dist/{types-D4YfrQJR.d.mts → types-BNCmlJMs.d.mts} +1 -1
  44. package/dist/{types-DRqxMIjF.d.mts → types-CMMQ8s2O.d.mts} +1 -1
  45. package/dist/{types-CR8mi9I0.d.ts → types-CMvvDo-E.d.mts} +12 -1
  46. package/dist/{types-CR8mi9I0.d.mts → types-CMvvDo-E.d.ts} +12 -1
  47. package/dist/{types-BctsnC3g.d.ts → types-DhktekQ3.d.ts} +1 -1
  48. package/dist/{types-38yolWJn.d.ts → types-Pj-vpmoT.d.ts} +1 -1
  49. package/dist/yourgpt/index.d.mts +1 -1
  50. package/dist/yourgpt/index.d.ts +1 -1
  51. package/package.json +1 -1
@@ -1,7 +1,7 @@
1
- import { L as LLMAdapter, W as WebSearchConfig, C as ChatCompletionRequest, S as StreamEvent, o as CompletionResult } from '../base-D-U61JaB.mjs';
2
- export { e as AdapterFactory, B as AnthropicContentBlock, E as OpenAIContentBlock, y as attachmentToAnthropicDocument, x as attachmentToAnthropicImage, z as attachmentToOpenAIImage, p as formatMessages, r as formatMessagesForAnthropic, s as formatMessagesForOpenAI, q as formatTools, v as hasImageAttachments, w as hasMediaAttachments, t as messageToAnthropicContent, u as messageToOpenAIContent } from '../base-D-U61JaB.mjs';
3
- import { d as OllamaModelOptions } from '../types-DRqxMIjF.mjs';
4
- import '../types-CR8mi9I0.mjs';
1
+ import { L as LLMAdapter, W as WebSearchConfig, C as ChatCompletionRequest, S as StreamEvent, o as CompletionResult } from '../base-DN1EfKnE.mjs';
2
+ export { e as AdapterFactory, B as AnthropicContentBlock, E as OpenAIContentBlock, y as attachmentToAnthropicDocument, x as attachmentToAnthropicImage, z as attachmentToOpenAIImage, p as formatMessages, r as formatMessagesForAnthropic, s as formatMessagesForOpenAI, q as formatTools, v as hasImageAttachments, w as hasMediaAttachments, t as messageToAnthropicContent, u as messageToOpenAIContent } from '../base-DN1EfKnE.mjs';
3
+ import { d as OllamaModelOptions } from '../types-CMMQ8s2O.mjs';
4
+ import '../types-CMvvDo-E.mjs';
5
5
  import 'zod';
6
6
 
7
7
  /**
@@ -13,6 +13,8 @@ interface OpenAIAdapterConfig {
13
13
  baseUrl?: string;
14
14
  temperature?: number;
15
15
  maxTokens?: number;
16
+ /** Disable extended thinking/reasoning for OpenRouter models */
17
+ disableThinking?: boolean;
16
18
  /**
17
19
  * Enable native web search for GPT models.
18
20
  * Uses OpenAI's web_search_preview tool.
@@ -36,6 +38,38 @@ declare class OpenAIAdapter implements LLMAdapter {
36
38
  private buildResponsesInput;
37
39
  private buildResponsesTools;
38
40
  private parseResponsesResult;
41
+ /**
42
+ * OpenAI reasoning models on OpenRouter (o1/o3/o4/gpt-5 family) hide their
43
+ * reasoning content on the chat-completions endpoint. To surface reasoning
44
+ * SUMMARIES (not raw CoT, which OpenAI never exposes) we have to use the
45
+ * Responses API, which streams `response.reasoning_summary_text.delta` events.
46
+ *
47
+ * Match by prefix on the OpenRouter model id. Excludes openai/gpt-4o,
48
+ * openai/gpt-4.1, openai/chatgpt-* — those continue on chat-completions.
49
+ */
50
+ private isOpenAIReasoningModelOnOpenRouter;
51
+ /**
52
+ * Convert ActionDefinition[] (the chat-completions tool shape used by the
53
+ * adapter) to the Responses API tool shape.
54
+ */
55
+ private buildResponsesToolsFromActions;
56
+ /**
57
+ * Streaming Responses API path for OpenAI reasoning models on OpenRouter.
58
+ *
59
+ * Maps Responses API SSE events back to the same StreamEvent shapes the
60
+ * chat-completions path emits, so downstream consumers (processChunk.ts,
61
+ * frontend tool handlers, plan approval, specialist delegations) see
62
+ * identical events regardless of which path produced them.
63
+ *
64
+ * response.reasoning_summary_text.delta → thinking:start (once) + thinking:delta
65
+ * response.output_text.delta → message:delta
66
+ * response.output_item.added (function_call) → action:start (queued buffer)
67
+ * response.function_call_arguments.delta → action:args (progressive)
68
+ * response.output_item.done (function_call) → final action:args + action:end
69
+ * response.completed → message:end + done(usage)
70
+ * response.error → error
71
+ */
72
+ private streamWithResponsesAPI;
39
73
  private completeWithResponses;
40
74
  stream(request: ChatCompletionRequest): AsyncGenerator<StreamEvent>;
41
75
  complete(request: ChatCompletionRequest): Promise<CompletionResult>;
@@ -1,7 +1,7 @@
1
- import { L as LLMAdapter, W as WebSearchConfig, C as ChatCompletionRequest, S as StreamEvent, o as CompletionResult } from '../base-iGi9Va6Z.js';
2
- export { e as AdapterFactory, B as AnthropicContentBlock, E as OpenAIContentBlock, y as attachmentToAnthropicDocument, x as attachmentToAnthropicImage, z as attachmentToOpenAIImage, p as formatMessages, r as formatMessagesForAnthropic, s as formatMessagesForOpenAI, q as formatTools, v as hasImageAttachments, w as hasMediaAttachments, t as messageToAnthropicContent, u as messageToOpenAIContent } from '../base-iGi9Va6Z.js';
3
- import { d as OllamaModelOptions } from '../types-BctsnC3g.js';
4
- import '../types-CR8mi9I0.js';
1
+ import { L as LLMAdapter, W as WebSearchConfig, C as ChatCompletionRequest, S as StreamEvent, o as CompletionResult } from '../base-DuUNxtVg.js';
2
+ export { e as AdapterFactory, B as AnthropicContentBlock, E as OpenAIContentBlock, y as attachmentToAnthropicDocument, x as attachmentToAnthropicImage, z as attachmentToOpenAIImage, p as formatMessages, r as formatMessagesForAnthropic, s as formatMessagesForOpenAI, q as formatTools, v as hasImageAttachments, w as hasMediaAttachments, t as messageToAnthropicContent, u as messageToOpenAIContent } from '../base-DuUNxtVg.js';
3
+ import { d as OllamaModelOptions } from '../types-DhktekQ3.js';
4
+ import '../types-CMvvDo-E.js';
5
5
  import 'zod';
6
6
 
7
7
  /**
@@ -13,6 +13,8 @@ interface OpenAIAdapterConfig {
13
13
  baseUrl?: string;
14
14
  temperature?: number;
15
15
  maxTokens?: number;
16
+ /** Disable extended thinking/reasoning for OpenRouter models */
17
+ disableThinking?: boolean;
16
18
  /**
17
19
  * Enable native web search for GPT models.
18
20
  * Uses OpenAI's web_search_preview tool.
@@ -36,6 +38,38 @@ declare class OpenAIAdapter implements LLMAdapter {
36
38
  private buildResponsesInput;
37
39
  private buildResponsesTools;
38
40
  private parseResponsesResult;
41
+ /**
42
+ * OpenAI reasoning models on OpenRouter (o1/o3/o4/gpt-5 family) hide their
43
+ * reasoning content on the chat-completions endpoint. To surface reasoning
44
+ * SUMMARIES (not raw CoT, which OpenAI never exposes) we have to use the
45
+ * Responses API, which streams `response.reasoning_summary_text.delta` events.
46
+ *
47
+ * Match by prefix on the OpenRouter model id. Excludes openai/gpt-4o,
48
+ * openai/gpt-4.1, openai/chatgpt-* — those continue on chat-completions.
49
+ */
50
+ private isOpenAIReasoningModelOnOpenRouter;
51
+ /**
52
+ * Convert ActionDefinition[] (the chat-completions tool shape used by the
53
+ * adapter) to the Responses API tool shape.
54
+ */
55
+ private buildResponsesToolsFromActions;
56
+ /**
57
+ * Streaming Responses API path for OpenAI reasoning models on OpenRouter.
58
+ *
59
+ * Maps Responses API SSE events back to the same StreamEvent shapes the
60
+ * chat-completions path emits, so downstream consumers (processChunk.ts,
61
+ * frontend tool handlers, plan approval, specialist delegations) see
62
+ * identical events regardless of which path produced them.
63
+ *
64
+ * response.reasoning_summary_text.delta → thinking:start (once) + thinking:delta
65
+ * response.output_text.delta → message:delta
66
+ * response.output_item.added (function_call) → action:start (queued buffer)
67
+ * response.function_call_arguments.delta → action:args (progressive)
68
+ * response.output_item.done (function_call) → final action:args + action:end
69
+ * response.completed → message:end + done(usage)
70
+ * response.error → error
71
+ */
72
+ private streamWithResponsesAPI;
39
73
  private completeWithResponses;
40
74
  stream(request: ChatCompletionRequest): AsyncGenerator<StreamEvent>;
41
75
  complete(request: ChatCompletionRequest): Promise<CompletionResult>;
@@ -382,6 +382,7 @@ var OpenAIAdapter = class _OpenAIAdapter {
382
382
  if (baseUrl.includes("generativelanguage.googleapis.com")) return "google";
383
383
  if (baseUrl.includes("x.ai")) return "xai";
384
384
  if (baseUrl.includes("azure")) return "azure";
385
+ if (baseUrl.includes("openrouter.ai")) return "openrouter";
385
386
  return "openai";
386
387
  }
387
388
  async getClient() {
@@ -481,6 +482,256 @@ var OpenAIAdapter = class _OpenAIAdapter {
481
482
  rawResponse: response
482
483
  };
483
484
  }
485
+ /**
486
+ * OpenAI reasoning models on OpenRouter (o1/o3/o4/gpt-5 family) hide their
487
+ * reasoning content on the chat-completions endpoint. To surface reasoning
488
+ * SUMMARIES (not raw CoT, which OpenAI never exposes) we have to use the
489
+ * Responses API, which streams `response.reasoning_summary_text.delta` events.
490
+ *
491
+ * Match by prefix on the OpenRouter model id. Excludes openai/gpt-4o,
492
+ * openai/gpt-4.1, openai/chatgpt-* — those continue on chat-completions.
493
+ */
494
+ isOpenAIReasoningModelOnOpenRouter(activeModel) {
495
+ if (this.provider !== "openrouter") return false;
496
+ return activeModel.startsWith("openai/o1") || activeModel.startsWith("openai/o3") || activeModel.startsWith("openai/o4") || activeModel.startsWith("openai/gpt-5");
497
+ }
498
+ /**
499
+ * Convert ActionDefinition[] (the chat-completions tool shape used by the
500
+ * adapter) to the Responses API tool shape.
501
+ */
502
+ buildResponsesToolsFromActions(actions) {
503
+ if (!actions || actions.length === 0) return void 0;
504
+ const formatted = formatTools(actions);
505
+ return formatted.map((t) => ({
506
+ type: "function",
507
+ name: t.function.name,
508
+ description: t.function.description,
509
+ parameters: t.function.parameters
510
+ }));
511
+ }
512
+ /**
513
+ * Streaming Responses API path for OpenAI reasoning models on OpenRouter.
514
+ *
515
+ * Maps Responses API SSE events back to the same StreamEvent shapes the
516
+ * chat-completions path emits, so downstream consumers (processChunk.ts,
517
+ * frontend tool handlers, plan approval, specialist delegations) see
518
+ * identical events regardless of which path produced them.
519
+ *
520
+ * response.reasoning_summary_text.delta → thinking:start (once) + thinking:delta
521
+ * response.output_text.delta → message:delta
522
+ * response.output_item.added (function_call) → action:start (queued buffer)
523
+ * response.function_call_arguments.delta → action:args (progressive)
524
+ * response.output_item.done (function_call) → final action:args + action:end
525
+ * response.completed → message:end + done(usage)
526
+ * response.error → error
527
+ */
528
+ async *streamWithResponsesAPI(request, activeModel, messageId) {
529
+ const client = await this.getClient();
530
+ const maxTokensValue = request.config?.maxTokens ?? this.config.maxTokens;
531
+ const payload = {
532
+ model: activeModel,
533
+ input: this.buildResponsesInput(request),
534
+ stream: true,
535
+ reasoning: {
536
+ effort: request.config?.reasoningEffort ?? "medium",
537
+ summary: "auto"
538
+ }
539
+ };
540
+ if (request.systemPrompt) payload.instructions = request.systemPrompt;
541
+ if (typeof maxTokensValue === "number")
542
+ payload.max_output_tokens = maxTokensValue;
543
+ const tools = this.buildResponsesToolsFromActions(request.actions);
544
+ if (tools && tools.length > 0) payload.tools = tools;
545
+ logProviderPayload(
546
+ "openai",
547
+ "responses-api request payload",
548
+ payload,
549
+ request.debug
550
+ );
551
+ let stream;
552
+ try {
553
+ stream = await client.responses.create(payload);
554
+ } catch (error) {
555
+ yield {
556
+ type: "error",
557
+ message: error instanceof Error ? error.message : "Unknown error",
558
+ code: "OPENAI_RESPONSES_ERROR"
559
+ };
560
+ return;
561
+ }
562
+ const toolBuffers = /* @__PURE__ */ new Map();
563
+ const itemIdToCallId = /* @__PURE__ */ new Map();
564
+ let usage;
565
+ let reasoningStarted = false;
566
+ let textStarted = false;
567
+ let finishEmitted = false;
568
+ const resolveCallId = (evt) => {
569
+ if (evt?.call_id) return evt.call_id;
570
+ if (evt?.item_id) return itemIdToCallId.get(evt.item_id) ?? evt.item_id;
571
+ if (evt?.item?.call_id) return evt.item.call_id;
572
+ if (evt?.item?.id) return evt.item.id;
573
+ return "";
574
+ };
575
+ try {
576
+ for await (const evt of stream) {
577
+ logProviderPayload(
578
+ "openai",
579
+ "responses-api stream chunk",
580
+ evt,
581
+ request.debug
582
+ );
583
+ if (request.signal?.aborted) break;
584
+ const t = evt?.type ?? "";
585
+ if (t === "response.reasoning_summary_text.delta") {
586
+ const delta = evt.delta ?? "";
587
+ if (!delta) continue;
588
+ if (!reasoningStarted) {
589
+ yield { type: "thinking:start" };
590
+ reasoningStarted = true;
591
+ }
592
+ yield { type: "thinking:delta", content: delta };
593
+ continue;
594
+ }
595
+ if (t === "response.reasoning_summary_text.done" || t === "response.reasoning.done") {
596
+ continue;
597
+ }
598
+ if (t === "response.output_text.delta") {
599
+ const text = evt.delta ?? "";
600
+ if (!text) continue;
601
+ if (reasoningStarted && !textStarted) {
602
+ yield { type: "thinking:end" };
603
+ textStarted = true;
604
+ }
605
+ yield { type: "message:delta", content: text };
606
+ continue;
607
+ }
608
+ if (t === "response.output_item.added") {
609
+ const item = evt.item;
610
+ if (item?.type === "function_call") {
611
+ const callId = item.call_id ?? item.id ?? "";
612
+ const itemId = item.id ?? callId;
613
+ if (callId) {
614
+ if (itemId && itemId !== callId) {
615
+ itemIdToCallId.set(itemId, callId);
616
+ }
617
+ if (!toolBuffers.has(callId)) {
618
+ toolBuffers.set(callId, {
619
+ id: callId,
620
+ name: item.name ?? "",
621
+ arguments: item.arguments ?? "",
622
+ emittedStart: false
623
+ });
624
+ }
625
+ const buf = toolBuffers.get(callId);
626
+ if (buf.name && !buf.emittedStart) {
627
+ yield { type: "action:start", id: buf.id, name: buf.name };
628
+ buf.emittedStart = true;
629
+ }
630
+ }
631
+ }
632
+ continue;
633
+ }
634
+ if (t === "response.function_call_arguments.delta") {
635
+ const callId = resolveCallId(evt);
636
+ const delta = evt.delta ?? "";
637
+ if (!callId || !delta) continue;
638
+ let buf = toolBuffers.get(callId);
639
+ if (!buf) {
640
+ buf = { id: callId, name: "", arguments: "", emittedStart: false };
641
+ toolBuffers.set(callId, buf);
642
+ }
643
+ buf.arguments += delta;
644
+ if (buf.emittedStart) {
645
+ yield {
646
+ type: "action:args",
647
+ id: buf.id,
648
+ args: buf.arguments
649
+ };
650
+ }
651
+ continue;
652
+ }
653
+ if (t === "response.output_item.done") {
654
+ const item = evt.item;
655
+ if (item?.type === "function_call") {
656
+ const callId = item.call_id ?? item.id ?? "";
657
+ const buf = toolBuffers.get(callId);
658
+ const name = buf?.name || item.name || "";
659
+ const argsStr = buf?.arguments || item.arguments || "{}";
660
+ if (callId && name) {
661
+ if (!buf?.emittedStart) {
662
+ yield { type: "action:start", id: callId, name };
663
+ }
664
+ yield {
665
+ type: "action:args",
666
+ id: callId,
667
+ args: argsStr
668
+ };
669
+ yield {
670
+ type: "action:end",
671
+ id: callId,
672
+ name
673
+ };
674
+ }
675
+ toolBuffers.delete(callId);
676
+ }
677
+ continue;
678
+ }
679
+ if (t === "response.completed") {
680
+ const u = evt.response?.usage;
681
+ if (u) {
682
+ usage = {
683
+ prompt_tokens: u.input_tokens ?? 0,
684
+ completion_tokens: u.output_tokens ?? 0,
685
+ total_tokens: u.total_tokens ?? (u.input_tokens ?? 0) + (u.output_tokens ?? 0)
686
+ };
687
+ }
688
+ for (const buf of toolBuffers.values()) {
689
+ if (!buf.id || !buf.name) continue;
690
+ if (!buf.emittedStart) {
691
+ yield { type: "action:start", id: buf.id, name: buf.name };
692
+ }
693
+ yield {
694
+ type: "action:args",
695
+ id: buf.id,
696
+ args: buf.arguments || "{}"
697
+ };
698
+ yield { type: "action:end", id: buf.id, name: buf.name };
699
+ }
700
+ toolBuffers.clear();
701
+ if (reasoningStarted && !textStarted) {
702
+ yield { type: "thinking:end" };
703
+ }
704
+ yield { type: "message:end" };
705
+ yield { type: "done", usage };
706
+ finishEmitted = true;
707
+ continue;
708
+ }
709
+ if (t === "response.error" || t === "error") {
710
+ const msg = evt.error?.message || evt.message || "Responses API error";
711
+ yield {
712
+ type: "error",
713
+ message: msg,
714
+ code: "OPENAI_RESPONSES_ERROR"
715
+ };
716
+ return;
717
+ }
718
+ }
719
+ } catch (error) {
720
+ yield {
721
+ type: "error",
722
+ message: error instanceof Error ? error.message : "Unknown error",
723
+ code: "OPENAI_RESPONSES_ERROR"
724
+ };
725
+ return;
726
+ }
727
+ if (!finishEmitted) {
728
+ if (reasoningStarted && !textStarted) {
729
+ yield { type: "thinking:end" };
730
+ }
731
+ yield { type: "message:end" };
732
+ yield { type: "done", usage };
733
+ }
734
+ }
484
735
  async completeWithResponses(request) {
485
736
  const client = await this.getClient();
486
737
  const openaiToolOptions = request.providerToolOptions?.openai;
@@ -614,16 +865,37 @@ var OpenAIAdapter = class _OpenAIAdapter {
614
865
  name: openaiToolOptions.toolChoice.name
615
866
  }
616
867
  } : openaiToolOptions?.toolChoice;
868
+ const isOpenRouter = this.provider === "openrouter";
869
+ const activeModel = request.config?.model || this.model;
870
+ const modelSlug = activeModel.replace("openai/", "");
871
+ const isOSeries = /^o[1-9]/.test(modelSlug);
872
+ const isOpenAIOnOpenRouter = isOpenRouter && activeModel.startsWith("openai/");
873
+ if (!this.config.disableThinking && this.isOpenAIReasoningModelOnOpenRouter(activeModel)) {
874
+ yield* this.streamWithResponsesAPI(request, activeModel, messageId);
875
+ return;
876
+ }
877
+ const maxTokensValue = request.config?.maxTokens ?? this.config.maxTokens;
617
878
  const payload = {
618
- model: request.config?.model || this.model,
879
+ model: activeModel,
619
880
  messages,
620
881
  tools: tools.length > 0 ? tools : void 0,
621
882
  tool_choice: tools.length > 0 ? toolChoice : void 0,
622
883
  parallel_tool_calls: tools.length > 0 ? openaiToolOptions?.parallelToolCalls : void 0,
623
- temperature: request.config?.temperature ?? this.config.temperature,
624
- max_tokens: request.config?.maxTokens ?? this.config.maxTokens,
625
884
  stream: true,
626
- stream_options: { include_usage: true }
885
+ stream_options: { include_usage: true },
886
+ // o-series: use max_completion_tokens + reasoning_effort, no temperature
887
+ // regular models: use max_tokens + temperature
888
+ ...isOSeries ? {
889
+ max_completion_tokens: maxTokensValue,
890
+ reasoning_effort: request.config?.reasoningEffort ?? "medium"
891
+ } : {
892
+ temperature: request.config?.temperature ?? this.config.temperature,
893
+ max_tokens: maxTokensValue
894
+ },
895
+ // Non-OpenAI OpenRouter models support OR's reasoning/include_reasoning params.
896
+ // When disableThinking=true we must explicitly send include_reasoning:false because
897
+ // models like Qwen3 and DeepSeek-R1 reason by default even without the reasoning param.
898
+ ...isOpenRouter && !isOpenAIOnOpenRouter ? this.config.disableThinking ? { include_reasoning: false } : { reasoning: { max_tokens: 8e3 }, include_reasoning: true } : {}
627
899
  };
628
900
  logProviderPayload("openai", "request payload", payload, request.debug);
629
901
  const stream = await client.chat.completions.create(payload);
@@ -631,6 +903,7 @@ var OpenAIAdapter = class _OpenAIAdapter {
631
903
  const collectedCitations = [];
632
904
  let citationIndex = 0;
633
905
  let usage;
906
+ let adapterReasoningStarted = false;
634
907
  for await (const chunk of stream) {
635
908
  logProviderPayload("openai", "stream chunk", chunk, request.debug);
636
909
  if (request.signal?.aborted) {
@@ -641,6 +914,22 @@ var OpenAIAdapter = class _OpenAIAdapter {
641
914
  if (delta?.content) {
642
915
  yield { type: "message:delta", content: delta.content };
643
916
  }
917
+ if (isOpenRouter) {
918
+ const rc = delta?.reasoning_content ?? delta?.reasoning ?? null;
919
+ if (rc) {
920
+ const rcText = typeof rc === "string" ? rc : Array.isArray(rc) && rc[0]?.text ? rc[0].text : "";
921
+ if (rcText) {
922
+ if (!adapterReasoningStarted) {
923
+ yield { type: "thinking:start" };
924
+ adapterReasoningStarted = true;
925
+ }
926
+ yield { type: "thinking:delta", content: rcText };
927
+ }
928
+ } else if (adapterReasoningStarted && (delta?.content || choice?.finish_reason)) {
929
+ yield { type: "thinking:end" };
930
+ adapterReasoningStarted = false;
931
+ }
932
+ }
644
933
  const annotations = delta?.annotations;
645
934
  if (annotations && annotations.length > 0) {
646
935
  for (const annotation of annotations) {
@@ -688,6 +977,11 @@ var OpenAIAdapter = class _OpenAIAdapter {
688
977
  };
689
978
  } else if (currentToolCall && toolCall.function?.arguments) {
690
979
  currentToolCall.arguments += toolCall.function.arguments;
980
+ yield {
981
+ type: "action:args",
982
+ id: currentToolCall.id,
983
+ args: currentToolCall.arguments
984
+ };
691
985
  }
692
986
  }
693
987
  }
@@ -763,15 +1057,24 @@ var OpenAIAdapter = class _OpenAIAdapter {
763
1057
  name: openaiToolOptions.toolChoice.name
764
1058
  }
765
1059
  } : openaiToolOptions?.toolChoice;
1060
+ const activeModel2 = request.config?.model || this.model;
1061
+ const modelSlug2 = activeModel2.replace("openai/", "");
1062
+ const isOSeries2 = /^o[1-9]/.test(modelSlug2);
1063
+ const maxTokensValue2 = request.config?.maxTokens ?? this.config.maxTokens;
766
1064
  const payload = {
767
- model: request.config?.model || this.model,
1065
+ model: activeModel2,
768
1066
  messages,
769
1067
  tools: tools.length > 0 ? tools : void 0,
770
1068
  tool_choice: tools.length > 0 ? toolChoice : void 0,
771
1069
  parallel_tool_calls: tools.length > 0 ? openaiToolOptions?.parallelToolCalls : void 0,
772
- temperature: request.config?.temperature ?? this.config.temperature,
773
- max_tokens: request.config?.maxTokens ?? this.config.maxTokens,
774
- stream: false
1070
+ stream: false,
1071
+ ...isOSeries2 ? {
1072
+ max_completion_tokens: maxTokensValue2,
1073
+ reasoning_effort: request.config?.reasoningEffort ?? "medium"
1074
+ } : {
1075
+ temperature: request.config?.temperature ?? this.config.temperature,
1076
+ max_tokens: maxTokensValue2
1077
+ }
775
1078
  };
776
1079
  logProviderPayload("openai", "request payload", payload, request.debug);
777
1080
  const response = await client.chat.completions.create(payload);
@@ -1281,6 +1584,13 @@ var AnthropicAdapter = class {
1281
1584
  yield { type: "thinking:delta", content: event.delta.thinking };
1282
1585
  } else if (event.delta.type === "input_json_delta" && currentToolUse) {
1283
1586
  currentToolUse.input += event.delta.partial_json;
1587
+ if (currentToolUse.name !== "web_search") {
1588
+ yield {
1589
+ type: "action:args",
1590
+ id: currentToolUse.id,
1591
+ args: currentToolUse.input
1592
+ };
1593
+ }
1284
1594
  }
1285
1595
  break;
1286
1596
  case "content_block_stop":