@yourgpt/llm-sdk 2.1.8 → 2.1.10-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/index.d.mts +38 -4
- package/dist/adapters/index.d.ts +38 -4
- package/dist/adapters/index.js +318 -8
- package/dist/adapters/index.mjs +318 -8
- package/dist/{base-iGi9Va6Z.d.ts → base-DN1EfKnE.d.mts} +2 -1
- package/dist/{base-D-U61JaB.d.mts → base-DuUNxtVg.d.ts} +2 -1
- package/dist/fallback/index.d.mts +4 -4
- package/dist/fallback/index.d.ts +4 -4
- package/dist/index.d.mts +7 -7
- package/dist/index.d.ts +7 -7
- package/dist/index.js +43 -23
- package/dist/index.mjs +43 -23
- package/dist/providers/anthropic/index.d.mts +3 -3
- package/dist/providers/anthropic/index.d.ts +3 -3
- package/dist/providers/anthropic/index.js +17 -0
- package/dist/providers/anthropic/index.mjs +17 -0
- package/dist/providers/azure/index.d.mts +3 -3
- package/dist/providers/azure/index.d.ts +3 -3
- package/dist/providers/fireworks/index.d.mts +1 -1
- package/dist/providers/fireworks/index.d.ts +1 -1
- package/dist/providers/google/index.d.mts +3 -3
- package/dist/providers/google/index.d.ts +3 -3
- package/dist/providers/google/index.js +311 -8
- package/dist/providers/google/index.mjs +311 -8
- package/dist/providers/ollama/index.d.mts +4 -4
- package/dist/providers/ollama/index.d.ts +4 -4
- package/dist/providers/openai/index.d.mts +3 -3
- package/dist/providers/openai/index.d.ts +3 -3
- package/dist/providers/openai/index.js +321 -8
- package/dist/providers/openai/index.mjs +321 -8
- package/dist/providers/openrouter/index.d.mts +7 -3
- package/dist/providers/openrouter/index.d.ts +7 -3
- package/dist/providers/openrouter/index.js +601 -11
- package/dist/providers/openrouter/index.mjs +601 -11
- package/dist/providers/togetherai/index.d.mts +61 -2
- package/dist/providers/togetherai/index.d.ts +61 -2
- package/dist/providers/togetherai/index.js +1030 -2
- package/dist/providers/togetherai/index.mjs +1029 -2
- package/dist/providers/xai/index.d.mts +3 -3
- package/dist/providers/xai/index.d.ts +3 -3
- package/dist/providers/xai/index.js +311 -8
- package/dist/providers/xai/index.mjs +311 -8
- package/dist/{types-D4YfrQJR.d.mts → types-BNCmlJMs.d.mts} +1 -1
- package/dist/{types-DRqxMIjF.d.mts → types-CMMQ8s2O.d.mts} +1 -1
- package/dist/{types-CR8mi9I0.d.ts → types-CMvvDo-E.d.mts} +12 -1
- package/dist/{types-CR8mi9I0.d.mts → types-CMvvDo-E.d.ts} +12 -1
- package/dist/{types-BctsnC3g.d.ts → types-DhktekQ3.d.ts} +1 -1
- package/dist/{types-38yolWJn.d.ts → types-Pj-vpmoT.d.ts} +1 -1
- package/dist/yourgpt/index.d.mts +1 -1
- package/dist/yourgpt/index.d.ts +1 -1
- package/package.json +1 -1
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { L as LLMAdapter, W as WebSearchConfig, C as ChatCompletionRequest, S as StreamEvent, o as CompletionResult } from '../base-
|
|
2
|
-
export { e as AdapterFactory, B as AnthropicContentBlock, E as OpenAIContentBlock, y as attachmentToAnthropicDocument, x as attachmentToAnthropicImage, z as attachmentToOpenAIImage, p as formatMessages, r as formatMessagesForAnthropic, s as formatMessagesForOpenAI, q as formatTools, v as hasImageAttachments, w as hasMediaAttachments, t as messageToAnthropicContent, u as messageToOpenAIContent } from '../base-
|
|
3
|
-
import { d as OllamaModelOptions } from '../types-
|
|
4
|
-
import '../types-
|
|
1
|
+
import { L as LLMAdapter, W as WebSearchConfig, C as ChatCompletionRequest, S as StreamEvent, o as CompletionResult } from '../base-DN1EfKnE.mjs';
|
|
2
|
+
export { e as AdapterFactory, B as AnthropicContentBlock, E as OpenAIContentBlock, y as attachmentToAnthropicDocument, x as attachmentToAnthropicImage, z as attachmentToOpenAIImage, p as formatMessages, r as formatMessagesForAnthropic, s as formatMessagesForOpenAI, q as formatTools, v as hasImageAttachments, w as hasMediaAttachments, t as messageToAnthropicContent, u as messageToOpenAIContent } from '../base-DN1EfKnE.mjs';
|
|
3
|
+
import { d as OllamaModelOptions } from '../types-CMMQ8s2O.mjs';
|
|
4
|
+
import '../types-CMvvDo-E.mjs';
|
|
5
5
|
import 'zod';
|
|
6
6
|
|
|
7
7
|
/**
|
|
@@ -13,6 +13,8 @@ interface OpenAIAdapterConfig {
|
|
|
13
13
|
baseUrl?: string;
|
|
14
14
|
temperature?: number;
|
|
15
15
|
maxTokens?: number;
|
|
16
|
+
/** Disable extended thinking/reasoning for OpenRouter models */
|
|
17
|
+
disableThinking?: boolean;
|
|
16
18
|
/**
|
|
17
19
|
* Enable native web search for GPT models.
|
|
18
20
|
* Uses OpenAI's web_search_preview tool.
|
|
@@ -36,6 +38,38 @@ declare class OpenAIAdapter implements LLMAdapter {
|
|
|
36
38
|
private buildResponsesInput;
|
|
37
39
|
private buildResponsesTools;
|
|
38
40
|
private parseResponsesResult;
|
|
41
|
+
/**
|
|
42
|
+
* OpenAI reasoning models on OpenRouter (o1/o3/o4/gpt-5 family) hide their
|
|
43
|
+
* reasoning content on the chat-completions endpoint. To surface reasoning
|
|
44
|
+
* SUMMARIES (not raw CoT, which OpenAI never exposes) we have to use the
|
|
45
|
+
* Responses API, which streams `response.reasoning_summary_text.delta` events.
|
|
46
|
+
*
|
|
47
|
+
* Match by prefix on the OpenRouter model id. Excludes openai/gpt-4o,
|
|
48
|
+
* openai/gpt-4.1, openai/chatgpt-* — those continue on chat-completions.
|
|
49
|
+
*/
|
|
50
|
+
private isOpenAIReasoningModelOnOpenRouter;
|
|
51
|
+
/**
|
|
52
|
+
* Convert ActionDefinition[] (the chat-completions tool shape used by the
|
|
53
|
+
* adapter) to the Responses API tool shape.
|
|
54
|
+
*/
|
|
55
|
+
private buildResponsesToolsFromActions;
|
|
56
|
+
/**
|
|
57
|
+
* Streaming Responses API path for OpenAI reasoning models on OpenRouter.
|
|
58
|
+
*
|
|
59
|
+
* Maps Responses API SSE events back to the same StreamEvent shapes the
|
|
60
|
+
* chat-completions path emits, so downstream consumers (processChunk.ts,
|
|
61
|
+
* frontend tool handlers, plan approval, specialist delegations) see
|
|
62
|
+
* identical events regardless of which path produced them.
|
|
63
|
+
*
|
|
64
|
+
* response.reasoning_summary_text.delta → thinking:start (once) + thinking:delta
|
|
65
|
+
* response.output_text.delta → message:delta
|
|
66
|
+
* response.output_item.added (function_call) → action:start (queued buffer)
|
|
67
|
+
* response.function_call_arguments.delta → action:args (progressive)
|
|
68
|
+
* response.output_item.done (function_call) → final action:args + action:end
|
|
69
|
+
* response.completed → message:end + done(usage)
|
|
70
|
+
* response.error → error
|
|
71
|
+
*/
|
|
72
|
+
private streamWithResponsesAPI;
|
|
39
73
|
private completeWithResponses;
|
|
40
74
|
stream(request: ChatCompletionRequest): AsyncGenerator<StreamEvent>;
|
|
41
75
|
complete(request: ChatCompletionRequest): Promise<CompletionResult>;
|
package/dist/adapters/index.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { L as LLMAdapter, W as WebSearchConfig, C as ChatCompletionRequest, S as StreamEvent, o as CompletionResult } from '../base-
|
|
2
|
-
export { e as AdapterFactory, B as AnthropicContentBlock, E as OpenAIContentBlock, y as attachmentToAnthropicDocument, x as attachmentToAnthropicImage, z as attachmentToOpenAIImage, p as formatMessages, r as formatMessagesForAnthropic, s as formatMessagesForOpenAI, q as formatTools, v as hasImageAttachments, w as hasMediaAttachments, t as messageToAnthropicContent, u as messageToOpenAIContent } from '../base-
|
|
3
|
-
import { d as OllamaModelOptions } from '../types-
|
|
4
|
-
import '../types-
|
|
1
|
+
import { L as LLMAdapter, W as WebSearchConfig, C as ChatCompletionRequest, S as StreamEvent, o as CompletionResult } from '../base-DuUNxtVg.js';
|
|
2
|
+
export { e as AdapterFactory, B as AnthropicContentBlock, E as OpenAIContentBlock, y as attachmentToAnthropicDocument, x as attachmentToAnthropicImage, z as attachmentToOpenAIImage, p as formatMessages, r as formatMessagesForAnthropic, s as formatMessagesForOpenAI, q as formatTools, v as hasImageAttachments, w as hasMediaAttachments, t as messageToAnthropicContent, u as messageToOpenAIContent } from '../base-DuUNxtVg.js';
|
|
3
|
+
import { d as OllamaModelOptions } from '../types-DhktekQ3.js';
|
|
4
|
+
import '../types-CMvvDo-E.js';
|
|
5
5
|
import 'zod';
|
|
6
6
|
|
|
7
7
|
/**
|
|
@@ -13,6 +13,8 @@ interface OpenAIAdapterConfig {
|
|
|
13
13
|
baseUrl?: string;
|
|
14
14
|
temperature?: number;
|
|
15
15
|
maxTokens?: number;
|
|
16
|
+
/** Disable extended thinking/reasoning for OpenRouter models */
|
|
17
|
+
disableThinking?: boolean;
|
|
16
18
|
/**
|
|
17
19
|
* Enable native web search for GPT models.
|
|
18
20
|
* Uses OpenAI's web_search_preview tool.
|
|
@@ -36,6 +38,38 @@ declare class OpenAIAdapter implements LLMAdapter {
|
|
|
36
38
|
private buildResponsesInput;
|
|
37
39
|
private buildResponsesTools;
|
|
38
40
|
private parseResponsesResult;
|
|
41
|
+
/**
|
|
42
|
+
* OpenAI reasoning models on OpenRouter (o1/o3/o4/gpt-5 family) hide their
|
|
43
|
+
* reasoning content on the chat-completions endpoint. To surface reasoning
|
|
44
|
+
* SUMMARIES (not raw CoT, which OpenAI never exposes) we have to use the
|
|
45
|
+
* Responses API, which streams `response.reasoning_summary_text.delta` events.
|
|
46
|
+
*
|
|
47
|
+
* Match by prefix on the OpenRouter model id. Excludes openai/gpt-4o,
|
|
48
|
+
* openai/gpt-4.1, openai/chatgpt-* — those continue on chat-completions.
|
|
49
|
+
*/
|
|
50
|
+
private isOpenAIReasoningModelOnOpenRouter;
|
|
51
|
+
/**
|
|
52
|
+
* Convert ActionDefinition[] (the chat-completions tool shape used by the
|
|
53
|
+
* adapter) to the Responses API tool shape.
|
|
54
|
+
*/
|
|
55
|
+
private buildResponsesToolsFromActions;
|
|
56
|
+
/**
|
|
57
|
+
* Streaming Responses API path for OpenAI reasoning models on OpenRouter.
|
|
58
|
+
*
|
|
59
|
+
* Maps Responses API SSE events back to the same StreamEvent shapes the
|
|
60
|
+
* chat-completions path emits, so downstream consumers (processChunk.ts,
|
|
61
|
+
* frontend tool handlers, plan approval, specialist delegations) see
|
|
62
|
+
* identical events regardless of which path produced them.
|
|
63
|
+
*
|
|
64
|
+
* response.reasoning_summary_text.delta → thinking:start (once) + thinking:delta
|
|
65
|
+
* response.output_text.delta → message:delta
|
|
66
|
+
* response.output_item.added (function_call) → action:start (queued buffer)
|
|
67
|
+
* response.function_call_arguments.delta → action:args (progressive)
|
|
68
|
+
* response.output_item.done (function_call) → final action:args + action:end
|
|
69
|
+
* response.completed → message:end + done(usage)
|
|
70
|
+
* response.error → error
|
|
71
|
+
*/
|
|
72
|
+
private streamWithResponsesAPI;
|
|
39
73
|
private completeWithResponses;
|
|
40
74
|
stream(request: ChatCompletionRequest): AsyncGenerator<StreamEvent>;
|
|
41
75
|
complete(request: ChatCompletionRequest): Promise<CompletionResult>;
|
package/dist/adapters/index.js
CHANGED
|
@@ -382,6 +382,7 @@ var OpenAIAdapter = class _OpenAIAdapter {
|
|
|
382
382
|
if (baseUrl.includes("generativelanguage.googleapis.com")) return "google";
|
|
383
383
|
if (baseUrl.includes("x.ai")) return "xai";
|
|
384
384
|
if (baseUrl.includes("azure")) return "azure";
|
|
385
|
+
if (baseUrl.includes("openrouter.ai")) return "openrouter";
|
|
385
386
|
return "openai";
|
|
386
387
|
}
|
|
387
388
|
async getClient() {
|
|
@@ -481,6 +482,256 @@ var OpenAIAdapter = class _OpenAIAdapter {
|
|
|
481
482
|
rawResponse: response
|
|
482
483
|
};
|
|
483
484
|
}
|
|
485
|
+
/**
|
|
486
|
+
* OpenAI reasoning models on OpenRouter (o1/o3/o4/gpt-5 family) hide their
|
|
487
|
+
* reasoning content on the chat-completions endpoint. To surface reasoning
|
|
488
|
+
* SUMMARIES (not raw CoT, which OpenAI never exposes) we have to use the
|
|
489
|
+
* Responses API, which streams `response.reasoning_summary_text.delta` events.
|
|
490
|
+
*
|
|
491
|
+
* Match by prefix on the OpenRouter model id. Excludes openai/gpt-4o,
|
|
492
|
+
* openai/gpt-4.1, openai/chatgpt-* — those continue on chat-completions.
|
|
493
|
+
*/
|
|
494
|
+
isOpenAIReasoningModelOnOpenRouter(activeModel) {
|
|
495
|
+
if (this.provider !== "openrouter") return false;
|
|
496
|
+
return activeModel.startsWith("openai/o1") || activeModel.startsWith("openai/o3") || activeModel.startsWith("openai/o4") || activeModel.startsWith("openai/gpt-5");
|
|
497
|
+
}
|
|
498
|
+
/**
|
|
499
|
+
* Convert ActionDefinition[] (the chat-completions tool shape used by the
|
|
500
|
+
* adapter) to the Responses API tool shape.
|
|
501
|
+
*/
|
|
502
|
+
buildResponsesToolsFromActions(actions) {
|
|
503
|
+
if (!actions || actions.length === 0) return void 0;
|
|
504
|
+
const formatted = formatTools(actions);
|
|
505
|
+
return formatted.map((t) => ({
|
|
506
|
+
type: "function",
|
|
507
|
+
name: t.function.name,
|
|
508
|
+
description: t.function.description,
|
|
509
|
+
parameters: t.function.parameters
|
|
510
|
+
}));
|
|
511
|
+
}
|
|
512
|
+
/**
|
|
513
|
+
* Streaming Responses API path for OpenAI reasoning models on OpenRouter.
|
|
514
|
+
*
|
|
515
|
+
* Maps Responses API SSE events back to the same StreamEvent shapes the
|
|
516
|
+
* chat-completions path emits, so downstream consumers (processChunk.ts,
|
|
517
|
+
* frontend tool handlers, plan approval, specialist delegations) see
|
|
518
|
+
* identical events regardless of which path produced them.
|
|
519
|
+
*
|
|
520
|
+
* response.reasoning_summary_text.delta → thinking:start (once) + thinking:delta
|
|
521
|
+
* response.output_text.delta → message:delta
|
|
522
|
+
* response.output_item.added (function_call) → action:start (queued buffer)
|
|
523
|
+
* response.function_call_arguments.delta → action:args (progressive)
|
|
524
|
+
* response.output_item.done (function_call) → final action:args + action:end
|
|
525
|
+
* response.completed → message:end + done(usage)
|
|
526
|
+
* response.error → error
|
|
527
|
+
*/
|
|
528
|
+
async *streamWithResponsesAPI(request, activeModel, messageId) {
|
|
529
|
+
const client = await this.getClient();
|
|
530
|
+
const maxTokensValue = request.config?.maxTokens ?? this.config.maxTokens;
|
|
531
|
+
const payload = {
|
|
532
|
+
model: activeModel,
|
|
533
|
+
input: this.buildResponsesInput(request),
|
|
534
|
+
stream: true,
|
|
535
|
+
reasoning: {
|
|
536
|
+
effort: request.config?.reasoningEffort ?? "medium",
|
|
537
|
+
summary: "auto"
|
|
538
|
+
}
|
|
539
|
+
};
|
|
540
|
+
if (request.systemPrompt) payload.instructions = request.systemPrompt;
|
|
541
|
+
if (typeof maxTokensValue === "number")
|
|
542
|
+
payload.max_output_tokens = maxTokensValue;
|
|
543
|
+
const tools = this.buildResponsesToolsFromActions(request.actions);
|
|
544
|
+
if (tools && tools.length > 0) payload.tools = tools;
|
|
545
|
+
logProviderPayload(
|
|
546
|
+
"openai",
|
|
547
|
+
"responses-api request payload",
|
|
548
|
+
payload,
|
|
549
|
+
request.debug
|
|
550
|
+
);
|
|
551
|
+
let stream;
|
|
552
|
+
try {
|
|
553
|
+
stream = await client.responses.create(payload);
|
|
554
|
+
} catch (error) {
|
|
555
|
+
yield {
|
|
556
|
+
type: "error",
|
|
557
|
+
message: error instanceof Error ? error.message : "Unknown error",
|
|
558
|
+
code: "OPENAI_RESPONSES_ERROR"
|
|
559
|
+
};
|
|
560
|
+
return;
|
|
561
|
+
}
|
|
562
|
+
const toolBuffers = /* @__PURE__ */ new Map();
|
|
563
|
+
const itemIdToCallId = /* @__PURE__ */ new Map();
|
|
564
|
+
let usage;
|
|
565
|
+
let reasoningStarted = false;
|
|
566
|
+
let textStarted = false;
|
|
567
|
+
let finishEmitted = false;
|
|
568
|
+
const resolveCallId = (evt) => {
|
|
569
|
+
if (evt?.call_id) return evt.call_id;
|
|
570
|
+
if (evt?.item_id) return itemIdToCallId.get(evt.item_id) ?? evt.item_id;
|
|
571
|
+
if (evt?.item?.call_id) return evt.item.call_id;
|
|
572
|
+
if (evt?.item?.id) return evt.item.id;
|
|
573
|
+
return "";
|
|
574
|
+
};
|
|
575
|
+
try {
|
|
576
|
+
for await (const evt of stream) {
|
|
577
|
+
logProviderPayload(
|
|
578
|
+
"openai",
|
|
579
|
+
"responses-api stream chunk",
|
|
580
|
+
evt,
|
|
581
|
+
request.debug
|
|
582
|
+
);
|
|
583
|
+
if (request.signal?.aborted) break;
|
|
584
|
+
const t = evt?.type ?? "";
|
|
585
|
+
if (t === "response.reasoning_summary_text.delta") {
|
|
586
|
+
const delta = evt.delta ?? "";
|
|
587
|
+
if (!delta) continue;
|
|
588
|
+
if (!reasoningStarted) {
|
|
589
|
+
yield { type: "thinking:start" };
|
|
590
|
+
reasoningStarted = true;
|
|
591
|
+
}
|
|
592
|
+
yield { type: "thinking:delta", content: delta };
|
|
593
|
+
continue;
|
|
594
|
+
}
|
|
595
|
+
if (t === "response.reasoning_summary_text.done" || t === "response.reasoning.done") {
|
|
596
|
+
continue;
|
|
597
|
+
}
|
|
598
|
+
if (t === "response.output_text.delta") {
|
|
599
|
+
const text = evt.delta ?? "";
|
|
600
|
+
if (!text) continue;
|
|
601
|
+
if (reasoningStarted && !textStarted) {
|
|
602
|
+
yield { type: "thinking:end" };
|
|
603
|
+
textStarted = true;
|
|
604
|
+
}
|
|
605
|
+
yield { type: "message:delta", content: text };
|
|
606
|
+
continue;
|
|
607
|
+
}
|
|
608
|
+
if (t === "response.output_item.added") {
|
|
609
|
+
const item = evt.item;
|
|
610
|
+
if (item?.type === "function_call") {
|
|
611
|
+
const callId = item.call_id ?? item.id ?? "";
|
|
612
|
+
const itemId = item.id ?? callId;
|
|
613
|
+
if (callId) {
|
|
614
|
+
if (itemId && itemId !== callId) {
|
|
615
|
+
itemIdToCallId.set(itemId, callId);
|
|
616
|
+
}
|
|
617
|
+
if (!toolBuffers.has(callId)) {
|
|
618
|
+
toolBuffers.set(callId, {
|
|
619
|
+
id: callId,
|
|
620
|
+
name: item.name ?? "",
|
|
621
|
+
arguments: item.arguments ?? "",
|
|
622
|
+
emittedStart: false
|
|
623
|
+
});
|
|
624
|
+
}
|
|
625
|
+
const buf = toolBuffers.get(callId);
|
|
626
|
+
if (buf.name && !buf.emittedStart) {
|
|
627
|
+
yield { type: "action:start", id: buf.id, name: buf.name };
|
|
628
|
+
buf.emittedStart = true;
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
continue;
|
|
633
|
+
}
|
|
634
|
+
if (t === "response.function_call_arguments.delta") {
|
|
635
|
+
const callId = resolveCallId(evt);
|
|
636
|
+
const delta = evt.delta ?? "";
|
|
637
|
+
if (!callId || !delta) continue;
|
|
638
|
+
let buf = toolBuffers.get(callId);
|
|
639
|
+
if (!buf) {
|
|
640
|
+
buf = { id: callId, name: "", arguments: "", emittedStart: false };
|
|
641
|
+
toolBuffers.set(callId, buf);
|
|
642
|
+
}
|
|
643
|
+
buf.arguments += delta;
|
|
644
|
+
if (buf.emittedStart) {
|
|
645
|
+
yield {
|
|
646
|
+
type: "action:args",
|
|
647
|
+
id: buf.id,
|
|
648
|
+
args: buf.arguments
|
|
649
|
+
};
|
|
650
|
+
}
|
|
651
|
+
continue;
|
|
652
|
+
}
|
|
653
|
+
if (t === "response.output_item.done") {
|
|
654
|
+
const item = evt.item;
|
|
655
|
+
if (item?.type === "function_call") {
|
|
656
|
+
const callId = item.call_id ?? item.id ?? "";
|
|
657
|
+
const buf = toolBuffers.get(callId);
|
|
658
|
+
const name = buf?.name || item.name || "";
|
|
659
|
+
const argsStr = buf?.arguments || item.arguments || "{}";
|
|
660
|
+
if (callId && name) {
|
|
661
|
+
if (!buf?.emittedStart) {
|
|
662
|
+
yield { type: "action:start", id: callId, name };
|
|
663
|
+
}
|
|
664
|
+
yield {
|
|
665
|
+
type: "action:args",
|
|
666
|
+
id: callId,
|
|
667
|
+
args: argsStr
|
|
668
|
+
};
|
|
669
|
+
yield {
|
|
670
|
+
type: "action:end",
|
|
671
|
+
id: callId,
|
|
672
|
+
name
|
|
673
|
+
};
|
|
674
|
+
}
|
|
675
|
+
toolBuffers.delete(callId);
|
|
676
|
+
}
|
|
677
|
+
continue;
|
|
678
|
+
}
|
|
679
|
+
if (t === "response.completed") {
|
|
680
|
+
const u = evt.response?.usage;
|
|
681
|
+
if (u) {
|
|
682
|
+
usage = {
|
|
683
|
+
prompt_tokens: u.input_tokens ?? 0,
|
|
684
|
+
completion_tokens: u.output_tokens ?? 0,
|
|
685
|
+
total_tokens: u.total_tokens ?? (u.input_tokens ?? 0) + (u.output_tokens ?? 0)
|
|
686
|
+
};
|
|
687
|
+
}
|
|
688
|
+
for (const buf of toolBuffers.values()) {
|
|
689
|
+
if (!buf.id || !buf.name) continue;
|
|
690
|
+
if (!buf.emittedStart) {
|
|
691
|
+
yield { type: "action:start", id: buf.id, name: buf.name };
|
|
692
|
+
}
|
|
693
|
+
yield {
|
|
694
|
+
type: "action:args",
|
|
695
|
+
id: buf.id,
|
|
696
|
+
args: buf.arguments || "{}"
|
|
697
|
+
};
|
|
698
|
+
yield { type: "action:end", id: buf.id, name: buf.name };
|
|
699
|
+
}
|
|
700
|
+
toolBuffers.clear();
|
|
701
|
+
if (reasoningStarted && !textStarted) {
|
|
702
|
+
yield { type: "thinking:end" };
|
|
703
|
+
}
|
|
704
|
+
yield { type: "message:end" };
|
|
705
|
+
yield { type: "done", usage };
|
|
706
|
+
finishEmitted = true;
|
|
707
|
+
continue;
|
|
708
|
+
}
|
|
709
|
+
if (t === "response.error" || t === "error") {
|
|
710
|
+
const msg = evt.error?.message || evt.message || "Responses API error";
|
|
711
|
+
yield {
|
|
712
|
+
type: "error",
|
|
713
|
+
message: msg,
|
|
714
|
+
code: "OPENAI_RESPONSES_ERROR"
|
|
715
|
+
};
|
|
716
|
+
return;
|
|
717
|
+
}
|
|
718
|
+
}
|
|
719
|
+
} catch (error) {
|
|
720
|
+
yield {
|
|
721
|
+
type: "error",
|
|
722
|
+
message: error instanceof Error ? error.message : "Unknown error",
|
|
723
|
+
code: "OPENAI_RESPONSES_ERROR"
|
|
724
|
+
};
|
|
725
|
+
return;
|
|
726
|
+
}
|
|
727
|
+
if (!finishEmitted) {
|
|
728
|
+
if (reasoningStarted && !textStarted) {
|
|
729
|
+
yield { type: "thinking:end" };
|
|
730
|
+
}
|
|
731
|
+
yield { type: "message:end" };
|
|
732
|
+
yield { type: "done", usage };
|
|
733
|
+
}
|
|
734
|
+
}
|
|
484
735
|
async completeWithResponses(request) {
|
|
485
736
|
const client = await this.getClient();
|
|
486
737
|
const openaiToolOptions = request.providerToolOptions?.openai;
|
|
@@ -614,16 +865,37 @@ var OpenAIAdapter = class _OpenAIAdapter {
|
|
|
614
865
|
name: openaiToolOptions.toolChoice.name
|
|
615
866
|
}
|
|
616
867
|
} : openaiToolOptions?.toolChoice;
|
|
868
|
+
const isOpenRouter = this.provider === "openrouter";
|
|
869
|
+
const activeModel = request.config?.model || this.model;
|
|
870
|
+
const modelSlug = activeModel.replace("openai/", "");
|
|
871
|
+
const isOSeries = /^o[1-9]/.test(modelSlug);
|
|
872
|
+
const isOpenAIOnOpenRouter = isOpenRouter && activeModel.startsWith("openai/");
|
|
873
|
+
if (!this.config.disableThinking && this.isOpenAIReasoningModelOnOpenRouter(activeModel)) {
|
|
874
|
+
yield* this.streamWithResponsesAPI(request, activeModel, messageId);
|
|
875
|
+
return;
|
|
876
|
+
}
|
|
877
|
+
const maxTokensValue = request.config?.maxTokens ?? this.config.maxTokens;
|
|
617
878
|
const payload = {
|
|
618
|
-
model:
|
|
879
|
+
model: activeModel,
|
|
619
880
|
messages,
|
|
620
881
|
tools: tools.length > 0 ? tools : void 0,
|
|
621
882
|
tool_choice: tools.length > 0 ? toolChoice : void 0,
|
|
622
883
|
parallel_tool_calls: tools.length > 0 ? openaiToolOptions?.parallelToolCalls : void 0,
|
|
623
|
-
temperature: request.config?.temperature ?? this.config.temperature,
|
|
624
|
-
max_tokens: request.config?.maxTokens ?? this.config.maxTokens,
|
|
625
884
|
stream: true,
|
|
626
|
-
stream_options: { include_usage: true }
|
|
885
|
+
stream_options: { include_usage: true },
|
|
886
|
+
// o-series: use max_completion_tokens + reasoning_effort, no temperature
|
|
887
|
+
// regular models: use max_tokens + temperature
|
|
888
|
+
...isOSeries ? {
|
|
889
|
+
max_completion_tokens: maxTokensValue,
|
|
890
|
+
reasoning_effort: request.config?.reasoningEffort ?? "medium"
|
|
891
|
+
} : {
|
|
892
|
+
temperature: request.config?.temperature ?? this.config.temperature,
|
|
893
|
+
max_tokens: maxTokensValue
|
|
894
|
+
},
|
|
895
|
+
// Non-OpenAI OpenRouter models support OR's reasoning/include_reasoning params.
|
|
896
|
+
// When disableThinking=true we must explicitly send include_reasoning:false because
|
|
897
|
+
// models like Qwen3 and DeepSeek-R1 reason by default even without the reasoning param.
|
|
898
|
+
...isOpenRouter && !isOpenAIOnOpenRouter ? this.config.disableThinking ? { include_reasoning: false } : { reasoning: { max_tokens: 8e3 }, include_reasoning: true } : {}
|
|
627
899
|
};
|
|
628
900
|
logProviderPayload("openai", "request payload", payload, request.debug);
|
|
629
901
|
const stream = await client.chat.completions.create(payload);
|
|
@@ -631,6 +903,7 @@ var OpenAIAdapter = class _OpenAIAdapter {
|
|
|
631
903
|
const collectedCitations = [];
|
|
632
904
|
let citationIndex = 0;
|
|
633
905
|
let usage;
|
|
906
|
+
let adapterReasoningStarted = false;
|
|
634
907
|
for await (const chunk of stream) {
|
|
635
908
|
logProviderPayload("openai", "stream chunk", chunk, request.debug);
|
|
636
909
|
if (request.signal?.aborted) {
|
|
@@ -641,6 +914,22 @@ var OpenAIAdapter = class _OpenAIAdapter {
|
|
|
641
914
|
if (delta?.content) {
|
|
642
915
|
yield { type: "message:delta", content: delta.content };
|
|
643
916
|
}
|
|
917
|
+
if (isOpenRouter) {
|
|
918
|
+
const rc = delta?.reasoning_content ?? delta?.reasoning ?? null;
|
|
919
|
+
if (rc) {
|
|
920
|
+
const rcText = typeof rc === "string" ? rc : Array.isArray(rc) && rc[0]?.text ? rc[0].text : "";
|
|
921
|
+
if (rcText) {
|
|
922
|
+
if (!adapterReasoningStarted) {
|
|
923
|
+
yield { type: "thinking:start" };
|
|
924
|
+
adapterReasoningStarted = true;
|
|
925
|
+
}
|
|
926
|
+
yield { type: "thinking:delta", content: rcText };
|
|
927
|
+
}
|
|
928
|
+
} else if (adapterReasoningStarted && (delta?.content || choice?.finish_reason)) {
|
|
929
|
+
yield { type: "thinking:end" };
|
|
930
|
+
adapterReasoningStarted = false;
|
|
931
|
+
}
|
|
932
|
+
}
|
|
644
933
|
const annotations = delta?.annotations;
|
|
645
934
|
if (annotations && annotations.length > 0) {
|
|
646
935
|
for (const annotation of annotations) {
|
|
@@ -688,6 +977,11 @@ var OpenAIAdapter = class _OpenAIAdapter {
|
|
|
688
977
|
};
|
|
689
978
|
} else if (currentToolCall && toolCall.function?.arguments) {
|
|
690
979
|
currentToolCall.arguments += toolCall.function.arguments;
|
|
980
|
+
yield {
|
|
981
|
+
type: "action:args",
|
|
982
|
+
id: currentToolCall.id,
|
|
983
|
+
args: currentToolCall.arguments
|
|
984
|
+
};
|
|
691
985
|
}
|
|
692
986
|
}
|
|
693
987
|
}
|
|
@@ -763,15 +1057,24 @@ var OpenAIAdapter = class _OpenAIAdapter {
|
|
|
763
1057
|
name: openaiToolOptions.toolChoice.name
|
|
764
1058
|
}
|
|
765
1059
|
} : openaiToolOptions?.toolChoice;
|
|
1060
|
+
const activeModel2 = request.config?.model || this.model;
|
|
1061
|
+
const modelSlug2 = activeModel2.replace("openai/", "");
|
|
1062
|
+
const isOSeries2 = /^o[1-9]/.test(modelSlug2);
|
|
1063
|
+
const maxTokensValue2 = request.config?.maxTokens ?? this.config.maxTokens;
|
|
766
1064
|
const payload = {
|
|
767
|
-
model:
|
|
1065
|
+
model: activeModel2,
|
|
768
1066
|
messages,
|
|
769
1067
|
tools: tools.length > 0 ? tools : void 0,
|
|
770
1068
|
tool_choice: tools.length > 0 ? toolChoice : void 0,
|
|
771
1069
|
parallel_tool_calls: tools.length > 0 ? openaiToolOptions?.parallelToolCalls : void 0,
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
1070
|
+
stream: false,
|
|
1071
|
+
...isOSeries2 ? {
|
|
1072
|
+
max_completion_tokens: maxTokensValue2,
|
|
1073
|
+
reasoning_effort: request.config?.reasoningEffort ?? "medium"
|
|
1074
|
+
} : {
|
|
1075
|
+
temperature: request.config?.temperature ?? this.config.temperature,
|
|
1076
|
+
max_tokens: maxTokensValue2
|
|
1077
|
+
}
|
|
775
1078
|
};
|
|
776
1079
|
logProviderPayload("openai", "request payload", payload, request.debug);
|
|
777
1080
|
const response = await client.chat.completions.create(payload);
|
|
@@ -1281,6 +1584,13 @@ var AnthropicAdapter = class {
|
|
|
1281
1584
|
yield { type: "thinking:delta", content: event.delta.thinking };
|
|
1282
1585
|
} else if (event.delta.type === "input_json_delta" && currentToolUse) {
|
|
1283
1586
|
currentToolUse.input += event.delta.partial_json;
|
|
1587
|
+
if (currentToolUse.name !== "web_search") {
|
|
1588
|
+
yield {
|
|
1589
|
+
type: "action:args",
|
|
1590
|
+
id: currentToolUse.id,
|
|
1591
|
+
args: currentToolUse.input
|
|
1592
|
+
};
|
|
1593
|
+
}
|
|
1284
1594
|
}
|
|
1285
1595
|
break;
|
|
1286
1596
|
case "content_block_stop":
|