@librechat/agents 3.1.52 → 3.1.54
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/llm/bedrock/utils/message_outputs.cjs +16 -5
- package/dist/cjs/llm/bedrock/utils/message_outputs.cjs.map +1 -1
- package/dist/cjs/llm/google/index.cjs.map +1 -1
- package/dist/cjs/llm/openrouter/index.cjs +59 -5
- package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
- package/dist/cjs/llm/vertexai/index.cjs +16 -2
- package/dist/cjs/llm/vertexai/index.cjs.map +1 -1
- package/dist/cjs/main.cjs +2 -0
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/esm/llm/bedrock/utils/message_outputs.mjs +16 -5
- package/dist/esm/llm/bedrock/utils/message_outputs.mjs.map +1 -1
- package/dist/esm/llm/google/index.mjs.map +1 -1
- package/dist/esm/llm/openrouter/index.mjs +59 -5
- package/dist/esm/llm/openrouter/index.mjs.map +1 -1
- package/dist/esm/llm/vertexai/index.mjs +16 -2
- package/dist/esm/llm/vertexai/index.mjs.map +1 -1
- package/dist/esm/main.mjs +1 -0
- package/dist/esm/main.mjs.map +1 -1
- package/dist/types/index.d.ts +2 -0
- package/dist/types/llm/bedrock/utils/message_outputs.d.ts +1 -1
- package/dist/types/llm/google/index.d.ts +2 -3
- package/dist/types/llm/openrouter/index.d.ts +21 -1
- package/dist/types/llm/vertexai/index.d.ts +2 -1
- package/dist/types/types/llm.d.ts +7 -2
- package/package.json +1 -1
- package/src/index.ts +6 -0
- package/src/llm/bedrock/llm.spec.ts +233 -4
- package/src/llm/bedrock/utils/message_outputs.ts +51 -11
- package/src/llm/google/index.ts +2 -3
- package/src/llm/openrouter/index.ts +117 -6
- package/src/llm/openrouter/reasoning.test.ts +207 -0
- package/src/llm/vertexai/index.ts +20 -3
- package/src/scripts/bedrock-cache-debug.ts +250 -0
- package/src/specs/openrouter.simple.test.ts +163 -2
- package/src/types/llm.ts +7 -2
- package/src/utils/llmConfig.ts +3 -4
|
@@ -5,16 +5,24 @@ config();
|
|
|
5
5
|
import { expect, test, describe, jest } from '@jest/globals';
|
|
6
6
|
import {
|
|
7
7
|
AIMessage,
|
|
8
|
-
|
|
8
|
+
ToolMessage,
|
|
9
9
|
HumanMessage,
|
|
10
10
|
SystemMessage,
|
|
11
|
-
|
|
11
|
+
AIMessageChunk,
|
|
12
12
|
} from '@langchain/core/messages';
|
|
13
13
|
import { concat } from '@langchain/core/utils/stream';
|
|
14
14
|
import { ChatGenerationChunk } from '@langchain/core/outputs';
|
|
15
|
-
import {
|
|
15
|
+
import {
|
|
16
|
+
BedrockRuntimeClient,
|
|
17
|
+
ConverseCommand,
|
|
18
|
+
} from '@aws-sdk/client-bedrock-runtime';
|
|
19
|
+
import type { ConverseResponse } from '@aws-sdk/client-bedrock-runtime';
|
|
20
|
+
import {
|
|
21
|
+
convertConverseMessageToLangChainMessage,
|
|
22
|
+
handleConverseStreamMetadata,
|
|
23
|
+
convertToConverseMessages,
|
|
24
|
+
} from './utils';
|
|
16
25
|
import { CustomChatBedrockConverse, ServiceTierType } from './index';
|
|
17
|
-
import { convertToConverseMessages } from './utils';
|
|
18
26
|
|
|
19
27
|
jest.setTimeout(120000);
|
|
20
28
|
|
|
@@ -429,6 +437,164 @@ describe('CustomChatBedrockConverse', () => {
|
|
|
429
437
|
});
|
|
430
438
|
});
|
|
431
439
|
|
|
440
|
+
describe('handleConverseStreamMetadata - cache token extraction', () => {
|
|
441
|
+
test('should extract cacheReadInputTokens and cacheWriteInputTokens into input_token_details', () => {
|
|
442
|
+
const metadata = {
|
|
443
|
+
usage: {
|
|
444
|
+
inputTokens: 13,
|
|
445
|
+
outputTokens: 5,
|
|
446
|
+
totalTokens: 10849,
|
|
447
|
+
cacheReadInputTokens: 10831,
|
|
448
|
+
cacheWriteInputTokens: 0,
|
|
449
|
+
},
|
|
450
|
+
metrics: { latencyMs: 1000 },
|
|
451
|
+
};
|
|
452
|
+
|
|
453
|
+
const chunk = handleConverseStreamMetadata(metadata, {
|
|
454
|
+
streamUsage: true,
|
|
455
|
+
});
|
|
456
|
+
const msg = chunk.message as AIMessageChunk;
|
|
457
|
+
|
|
458
|
+
expect(msg.usage_metadata).toEqual({
|
|
459
|
+
input_tokens: 13,
|
|
460
|
+
output_tokens: 5,
|
|
461
|
+
total_tokens: 10849,
|
|
462
|
+
input_token_details: {
|
|
463
|
+
cache_read: 10831,
|
|
464
|
+
cache_creation: 0,
|
|
465
|
+
},
|
|
466
|
+
});
|
|
467
|
+
});
|
|
468
|
+
|
|
469
|
+
test('should not include input_token_details when no cache tokens present', () => {
|
|
470
|
+
const metadata = {
|
|
471
|
+
usage: {
|
|
472
|
+
inputTokens: 100,
|
|
473
|
+
outputTokens: 50,
|
|
474
|
+
totalTokens: 150,
|
|
475
|
+
},
|
|
476
|
+
metrics: { latencyMs: 500 },
|
|
477
|
+
};
|
|
478
|
+
|
|
479
|
+
const chunk = handleConverseStreamMetadata(metadata, {
|
|
480
|
+
streamUsage: true,
|
|
481
|
+
});
|
|
482
|
+
const msg = chunk.message as AIMessageChunk;
|
|
483
|
+
|
|
484
|
+
expect(msg.usage_metadata).toEqual({
|
|
485
|
+
input_tokens: 100,
|
|
486
|
+
output_tokens: 50,
|
|
487
|
+
total_tokens: 150,
|
|
488
|
+
});
|
|
489
|
+
expect(msg.usage_metadata?.input_token_details).toBeUndefined();
|
|
490
|
+
});
|
|
491
|
+
|
|
492
|
+
test('should include input_token_details when only cacheWriteInputTokens is present', () => {
|
|
493
|
+
const metadata = {
|
|
494
|
+
usage: {
|
|
495
|
+
inputTokens: 50,
|
|
496
|
+
outputTokens: 10,
|
|
497
|
+
totalTokens: 10060,
|
|
498
|
+
cacheWriteInputTokens: 10000,
|
|
499
|
+
},
|
|
500
|
+
metrics: { latencyMs: 800 },
|
|
501
|
+
};
|
|
502
|
+
|
|
503
|
+
const chunk = handleConverseStreamMetadata(metadata, {
|
|
504
|
+
streamUsage: true,
|
|
505
|
+
});
|
|
506
|
+
const msg = chunk.message as AIMessageChunk;
|
|
507
|
+
|
|
508
|
+
expect(msg.usage_metadata?.input_token_details).toEqual({
|
|
509
|
+
cache_read: 0,
|
|
510
|
+
cache_creation: 10000,
|
|
511
|
+
});
|
|
512
|
+
});
|
|
513
|
+
|
|
514
|
+
test('should return undefined usage_metadata when streamUsage is false', () => {
|
|
515
|
+
const metadata = {
|
|
516
|
+
usage: {
|
|
517
|
+
inputTokens: 13,
|
|
518
|
+
outputTokens: 5,
|
|
519
|
+
totalTokens: 10849,
|
|
520
|
+
cacheReadInputTokens: 10831,
|
|
521
|
+
cacheWriteInputTokens: 0,
|
|
522
|
+
},
|
|
523
|
+
metrics: { latencyMs: 1000 },
|
|
524
|
+
};
|
|
525
|
+
|
|
526
|
+
const chunk = handleConverseStreamMetadata(metadata, {
|
|
527
|
+
streamUsage: false,
|
|
528
|
+
});
|
|
529
|
+
const msg = chunk.message as AIMessageChunk;
|
|
530
|
+
|
|
531
|
+
expect(msg.usage_metadata).toBeUndefined();
|
|
532
|
+
});
|
|
533
|
+
});
|
|
534
|
+
|
|
535
|
+
describe('convertConverseMessageToLangChainMessage - cache token extraction', () => {
|
|
536
|
+
const makeResponseMetadata = (
|
|
537
|
+
usage: Record<string, number>
|
|
538
|
+
): Omit<ConverseResponse, 'output'> =>
|
|
539
|
+
({
|
|
540
|
+
usage,
|
|
541
|
+
stopReason: 'end_turn',
|
|
542
|
+
metrics: undefined,
|
|
543
|
+
$metadata: { requestId: 'test-id' },
|
|
544
|
+
}) as unknown as Omit<ConverseResponse, 'output'>;
|
|
545
|
+
|
|
546
|
+
test('should extract cache tokens in non-streaming response', () => {
|
|
547
|
+
const message = {
|
|
548
|
+
role: 'assistant' as const,
|
|
549
|
+
content: [{ text: 'Hello!' }],
|
|
550
|
+
};
|
|
551
|
+
|
|
552
|
+
const result = convertConverseMessageToLangChainMessage(
|
|
553
|
+
message,
|
|
554
|
+
makeResponseMetadata({
|
|
555
|
+
inputTokens: 20,
|
|
556
|
+
outputTokens: 5,
|
|
557
|
+
totalTokens: 10856,
|
|
558
|
+
cacheReadInputTokens: 10831,
|
|
559
|
+
cacheWriteInputTokens: 0,
|
|
560
|
+
})
|
|
561
|
+
);
|
|
562
|
+
|
|
563
|
+
expect(result.usage_metadata).toEqual({
|
|
564
|
+
input_tokens: 20,
|
|
565
|
+
output_tokens: 5,
|
|
566
|
+
total_tokens: 10856,
|
|
567
|
+
input_token_details: {
|
|
568
|
+
cache_read: 10831,
|
|
569
|
+
cache_creation: 0,
|
|
570
|
+
},
|
|
571
|
+
});
|
|
572
|
+
});
|
|
573
|
+
|
|
574
|
+
test('should not include input_token_details when no cache tokens in non-streaming response', () => {
|
|
575
|
+
const message = {
|
|
576
|
+
role: 'assistant' as const,
|
|
577
|
+
content: [{ text: 'Hello!' }],
|
|
578
|
+
};
|
|
579
|
+
|
|
580
|
+
const result = convertConverseMessageToLangChainMessage(
|
|
581
|
+
message,
|
|
582
|
+
makeResponseMetadata({
|
|
583
|
+
inputTokens: 100,
|
|
584
|
+
outputTokens: 50,
|
|
585
|
+
totalTokens: 150,
|
|
586
|
+
})
|
|
587
|
+
);
|
|
588
|
+
|
|
589
|
+
expect(result.usage_metadata).toEqual({
|
|
590
|
+
input_tokens: 100,
|
|
591
|
+
output_tokens: 50,
|
|
592
|
+
total_tokens: 150,
|
|
593
|
+
});
|
|
594
|
+
expect(result.usage_metadata?.input_token_details).toBeUndefined();
|
|
595
|
+
});
|
|
596
|
+
});
|
|
597
|
+
|
|
432
598
|
describe('convertToConverseMessages', () => {
|
|
433
599
|
test('should convert basic messages', () => {
|
|
434
600
|
const { converseMessages, converseSystem } = convertToConverseMessages([
|
|
@@ -647,4 +813,67 @@ describe.skip('Integration tests', () => {
|
|
|
647
813
|
expect(reasoningBlocks.length).toBeGreaterThanOrEqual(0);
|
|
648
814
|
}
|
|
649
815
|
});
|
|
816
|
+
|
|
817
|
+
test('cache tokens should populate input_token_details', async () => {
|
|
818
|
+
const client = new BedrockRuntimeClient({
|
|
819
|
+
region: integrationArgs.region,
|
|
820
|
+
credentials: integrationArgs.credentials,
|
|
821
|
+
});
|
|
822
|
+
|
|
823
|
+
// Large system prompt (>1024 tokens) to meet Bedrock's minimum cache threshold
|
|
824
|
+
const largeSystemPrompt = [
|
|
825
|
+
'You are an expert assistant.',
|
|
826
|
+
...Array(200).fill(
|
|
827
|
+
'This is padding content to exceed the minimum token threshold for Bedrock prompt caching. '
|
|
828
|
+
),
|
|
829
|
+
'When answering, be brief and direct.',
|
|
830
|
+
].join(' ');
|
|
831
|
+
|
|
832
|
+
const systemBlocks = [
|
|
833
|
+
{ text: largeSystemPrompt },
|
|
834
|
+
{ cachePoint: { type: 'default' as const } },
|
|
835
|
+
];
|
|
836
|
+
|
|
837
|
+
const converseArgs = {
|
|
838
|
+
modelId: 'us.anthropic.claude-sonnet-4-5-20250929-v1:0',
|
|
839
|
+
system: systemBlocks,
|
|
840
|
+
inferenceConfig: { maxTokens: 50 },
|
|
841
|
+
};
|
|
842
|
+
|
|
843
|
+
// Call 1: populate the cache (may be a write or read if already warm)
|
|
844
|
+
await client.send(
|
|
845
|
+
new ConverseCommand({
|
|
846
|
+
...converseArgs,
|
|
847
|
+
messages: [{ role: 'user', content: [{ text: 'Say hello.' }] }],
|
|
848
|
+
})
|
|
849
|
+
);
|
|
850
|
+
|
|
851
|
+
// Call 2: should read from cache — this is the one we assert on
|
|
852
|
+
const response = await client.send(
|
|
853
|
+
new ConverseCommand({
|
|
854
|
+
...converseArgs,
|
|
855
|
+
messages: [
|
|
856
|
+
{ role: 'user', content: [{ text: 'Say hello.' }] },
|
|
857
|
+
{ role: 'assistant', content: [{ text: 'Hello!' }] },
|
|
858
|
+
{ role: 'user', content: [{ text: 'Say goodbye.' }] },
|
|
859
|
+
],
|
|
860
|
+
})
|
|
861
|
+
);
|
|
862
|
+
|
|
863
|
+
// Feed raw response through convertConverseMessageToLangChainMessage
|
|
864
|
+
const result = convertConverseMessageToLangChainMessage(
|
|
865
|
+
response.output!.message!,
|
|
866
|
+
response
|
|
867
|
+
);
|
|
868
|
+
|
|
869
|
+
expect(result.usage_metadata).toBeDefined();
|
|
870
|
+
expect(result.usage_metadata!.input_tokens).toBeGreaterThan(0);
|
|
871
|
+
expect(result.usage_metadata!.output_tokens).toBeGreaterThan(0);
|
|
872
|
+
|
|
873
|
+
// Cache should have been populated by call 1, so call 2 should show cache reads
|
|
874
|
+
expect(result.usage_metadata!.input_token_details).toBeDefined();
|
|
875
|
+
expect(
|
|
876
|
+
result.usage_metadata!.input_token_details!.cache_read
|
|
877
|
+
).toBeGreaterThan(0);
|
|
878
|
+
});
|
|
650
879
|
});
|
|
@@ -2,8 +2,9 @@
|
|
|
2
2
|
* Utility functions for converting Bedrock Converse responses to LangChain messages.
|
|
3
3
|
* Ported from @langchain/aws common.js
|
|
4
4
|
*/
|
|
5
|
-
import { AIMessage, AIMessageChunk } from '@langchain/core/messages';
|
|
6
5
|
import { ChatGenerationChunk } from '@langchain/core/outputs';
|
|
6
|
+
import { AIMessage, AIMessageChunk } from '@langchain/core/messages';
|
|
7
|
+
import type { UsageMetadata } from '@langchain/core/messages';
|
|
7
8
|
import type {
|
|
8
9
|
BedrockMessage,
|
|
9
10
|
ConverseResponse,
|
|
@@ -107,17 +108,38 @@ export function convertConverseMessageToLangChainMessage(
|
|
|
107
108
|
}
|
|
108
109
|
|
|
109
110
|
let tokenUsage:
|
|
110
|
-
| {
|
|
111
|
+
| {
|
|
112
|
+
input_tokens: number;
|
|
113
|
+
output_tokens: number;
|
|
114
|
+
total_tokens: number;
|
|
115
|
+
input_token_details?: {
|
|
116
|
+
cache_read: number;
|
|
117
|
+
cache_creation: number;
|
|
118
|
+
};
|
|
119
|
+
}
|
|
111
120
|
| undefined;
|
|
112
121
|
if (responseMetadata.usage != null) {
|
|
113
|
-
const
|
|
114
|
-
|
|
122
|
+
const usage = responseMetadata.usage as NonNullable<
|
|
123
|
+
typeof responseMetadata.usage
|
|
124
|
+
> & {
|
|
125
|
+
cacheReadInputTokens?: number;
|
|
126
|
+
cacheWriteInputTokens?: number;
|
|
127
|
+
};
|
|
128
|
+
const input_tokens = usage.inputTokens ?? 0;
|
|
129
|
+
const output_tokens = usage.outputTokens ?? 0;
|
|
130
|
+
const cacheRead = usage.cacheReadInputTokens;
|
|
131
|
+
const cacheWrite = usage.cacheWriteInputTokens;
|
|
115
132
|
tokenUsage = {
|
|
116
133
|
input_tokens,
|
|
117
134
|
output_tokens,
|
|
118
|
-
total_tokens:
|
|
119
|
-
responseMetadata.usage.totalTokens ?? input_tokens + output_tokens,
|
|
135
|
+
total_tokens: usage.totalTokens ?? input_tokens + output_tokens,
|
|
120
136
|
};
|
|
137
|
+
if (cacheRead != null || cacheWrite != null) {
|
|
138
|
+
tokenUsage.input_token_details = {
|
|
139
|
+
cache_read: cacheRead ?? 0,
|
|
140
|
+
cache_creation: cacheWrite ?? 0,
|
|
141
|
+
};
|
|
142
|
+
}
|
|
121
143
|
}
|
|
122
144
|
|
|
123
145
|
if (
|
|
@@ -285,19 +307,37 @@ export function handleConverseStreamMetadata(
|
|
|
285
307
|
metadata: ConverseStreamMetadataEvent,
|
|
286
308
|
extra: { streamUsage: boolean }
|
|
287
309
|
): ChatGenerationChunk {
|
|
288
|
-
const
|
|
289
|
-
|
|
290
|
-
|
|
310
|
+
const usage = metadata.usage as
|
|
311
|
+
| (NonNullable<ConverseStreamMetadataEvent['usage']> & {
|
|
312
|
+
cacheReadInputTokens?: number;
|
|
313
|
+
cacheWriteInputTokens?: number;
|
|
314
|
+
})
|
|
315
|
+
| undefined;
|
|
316
|
+
const inputTokens = usage?.inputTokens ?? 0;
|
|
317
|
+
const outputTokens = usage?.outputTokens ?? 0;
|
|
318
|
+
const cacheRead = usage?.cacheReadInputTokens;
|
|
319
|
+
const cacheWrite = usage?.cacheWriteInputTokens;
|
|
320
|
+
|
|
321
|
+
const usage_metadata: Record<string, unknown> = {
|
|
291
322
|
input_tokens: inputTokens,
|
|
292
323
|
output_tokens: outputTokens,
|
|
293
|
-
total_tokens:
|
|
324
|
+
total_tokens: usage?.totalTokens ?? inputTokens + outputTokens,
|
|
294
325
|
};
|
|
295
326
|
|
|
327
|
+
if (cacheRead != null || cacheWrite != null) {
|
|
328
|
+
usage_metadata.input_token_details = {
|
|
329
|
+
cache_read: cacheRead ?? 0,
|
|
330
|
+
cache_creation: cacheWrite ?? 0,
|
|
331
|
+
};
|
|
332
|
+
}
|
|
333
|
+
|
|
296
334
|
return new ChatGenerationChunk({
|
|
297
335
|
text: '',
|
|
298
336
|
message: new AIMessageChunk({
|
|
299
337
|
content: '',
|
|
300
|
-
usage_metadata: extra.streamUsage
|
|
338
|
+
usage_metadata: extra.streamUsage
|
|
339
|
+
? (usage_metadata as UsageMetadata)
|
|
340
|
+
: undefined,
|
|
301
341
|
response_metadata: {
|
|
302
342
|
// Use the same key as returned from the Converse API
|
|
303
343
|
metadata,
|
package/src/llm/google/index.ts
CHANGED
|
@@ -10,9 +10,8 @@ import type {
|
|
|
10
10
|
} from '@google/generative-ai';
|
|
11
11
|
import type { CallbackManagerForLLMRun } from '@langchain/core/callbacks/manager';
|
|
12
12
|
import type { BaseMessage, UsageMetadata } from '@langchain/core/messages';
|
|
13
|
-
import type { GeminiGenerationConfig } from '@langchain/google-common';
|
|
14
13
|
import type { GeminiApiUsageMetadata, InputTokenDetails } from './types';
|
|
15
|
-
import type { GoogleClientOptions } from '@/types';
|
|
14
|
+
import type { GoogleClientOptions, GoogleThinkingConfig } from '@/types';
|
|
16
15
|
import {
|
|
17
16
|
convertResponseContentToChatGenerationChunk,
|
|
18
17
|
convertBaseMessagesToContent,
|
|
@@ -20,7 +19,7 @@ import {
|
|
|
20
19
|
} from './utils/common';
|
|
21
20
|
|
|
22
21
|
export class CustomChatGoogleGenerativeAI extends ChatGoogleGenerativeAI {
|
|
23
|
-
thinkingConfig?:
|
|
22
|
+
thinkingConfig?: GoogleThinkingConfig;
|
|
24
23
|
|
|
25
24
|
/**
|
|
26
25
|
* Override to add gemini-3 model support for multimodal and function calling thought signatures
|
|
@@ -29,24 +29,135 @@ type OpenAIRoleEnum =
|
|
|
29
29
|
| 'function'
|
|
30
30
|
| 'tool';
|
|
31
31
|
|
|
32
|
-
export
|
|
32
|
+
export type OpenRouterReasoningEffort =
|
|
33
|
+
| 'xhigh'
|
|
34
|
+
| 'high'
|
|
35
|
+
| 'medium'
|
|
36
|
+
| 'low'
|
|
37
|
+
| 'minimal'
|
|
38
|
+
| 'none';
|
|
39
|
+
|
|
40
|
+
export interface OpenRouterReasoning {
|
|
41
|
+
effort?: OpenRouterReasoningEffort;
|
|
42
|
+
max_tokens?: number;
|
|
43
|
+
exclude?: boolean;
|
|
44
|
+
enabled?: boolean;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export interface ChatOpenRouterCallOptions
|
|
48
|
+
extends Omit<ChatOpenAICallOptions, 'reasoning'> {
|
|
49
|
+
/** @deprecated Use `reasoning` object instead */
|
|
33
50
|
include_reasoning?: boolean;
|
|
51
|
+
reasoning?: OpenRouterReasoning;
|
|
34
52
|
modelKwargs?: OpenAIChatInput['modelKwargs'];
|
|
35
53
|
}
|
|
54
|
+
|
|
55
|
+
/** invocationParams return type extended with OpenRouter reasoning */
|
|
56
|
+
export type OpenRouterInvocationParams = Omit<
|
|
57
|
+
OpenAIClient.Chat.ChatCompletionCreateParams,
|
|
58
|
+
'messages'
|
|
59
|
+
> & {
|
|
60
|
+
reasoning?: OpenRouterReasoning;
|
|
61
|
+
};
|
|
36
62
|
export class ChatOpenRouter extends ChatOpenAI {
|
|
63
|
+
private openRouterReasoning?: OpenRouterReasoning;
|
|
64
|
+
/** @deprecated Use `reasoning` object instead */
|
|
65
|
+
private includeReasoning?: boolean;
|
|
66
|
+
|
|
37
67
|
constructor(_fields: Partial<ChatOpenRouterCallOptions>) {
|
|
38
|
-
const {
|
|
68
|
+
const {
|
|
69
|
+
include_reasoning,
|
|
70
|
+
reasoning: openRouterReasoning,
|
|
71
|
+
modelKwargs = {},
|
|
72
|
+
...fields
|
|
73
|
+
} = _fields;
|
|
74
|
+
|
|
75
|
+
// Extract reasoning from modelKwargs if provided there (e.g., from LLMConfig)
|
|
76
|
+
const { reasoning: mkReasoning, ...restModelKwargs } = modelKwargs as {
|
|
77
|
+
reasoning?: OpenRouterReasoning;
|
|
78
|
+
} & Record<string, unknown>;
|
|
79
|
+
|
|
39
80
|
super({
|
|
40
81
|
...fields,
|
|
41
|
-
modelKwargs:
|
|
42
|
-
...modelKwargs,
|
|
43
|
-
include_reasoning,
|
|
44
|
-
},
|
|
82
|
+
modelKwargs: restModelKwargs,
|
|
45
83
|
});
|
|
84
|
+
|
|
85
|
+
// Merge reasoning config: modelKwargs.reasoning < constructor reasoning
|
|
86
|
+
if (mkReasoning != null || openRouterReasoning != null) {
|
|
87
|
+
this.openRouterReasoning = {
|
|
88
|
+
...mkReasoning,
|
|
89
|
+
...openRouterReasoning,
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
this.includeReasoning = include_reasoning;
|
|
46
94
|
}
|
|
47
95
|
static lc_name(): 'LibreChatOpenRouter' {
|
|
48
96
|
return 'LibreChatOpenRouter';
|
|
49
97
|
}
|
|
98
|
+
|
|
99
|
+
// @ts-expect-error - OpenRouter reasoning extends OpenAI Reasoning with additional
|
|
100
|
+
// effort levels ('xhigh' | 'none' | 'minimal') not in ReasoningEffort.
|
|
101
|
+
// The parent's generic conditional return type cannot be widened in an override.
|
|
102
|
+
override invocationParams(
|
|
103
|
+
options?: this['ParsedCallOptions'],
|
|
104
|
+
extra?: { streaming?: boolean }
|
|
105
|
+
): OpenRouterInvocationParams {
|
|
106
|
+
type MutableParams = Omit<
|
|
107
|
+
OpenAIClient.Chat.ChatCompletionCreateParams,
|
|
108
|
+
'messages'
|
|
109
|
+
> & { reasoning_effort?: string; reasoning?: OpenRouterReasoning };
|
|
110
|
+
|
|
111
|
+
const params = super.invocationParams(options, extra) as MutableParams;
|
|
112
|
+
|
|
113
|
+
// Remove the OpenAI-native reasoning_effort that the parent sets;
|
|
114
|
+
// OpenRouter uses a `reasoning` object instead
|
|
115
|
+
delete params.reasoning_effort;
|
|
116
|
+
|
|
117
|
+
// Build the OpenRouter reasoning config
|
|
118
|
+
const reasoning = this.buildOpenRouterReasoning(options);
|
|
119
|
+
if (reasoning != null) {
|
|
120
|
+
params.reasoning = reasoning;
|
|
121
|
+
} else {
|
|
122
|
+
delete params.reasoning;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
return params;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
private buildOpenRouterReasoning(
|
|
129
|
+
options?: this['ParsedCallOptions']
|
|
130
|
+
): OpenRouterReasoning | undefined {
|
|
131
|
+
let reasoning: OpenRouterReasoning | undefined;
|
|
132
|
+
|
|
133
|
+
// 1. Instance-level reasoning config (from constructor)
|
|
134
|
+
if (this.openRouterReasoning != null) {
|
|
135
|
+
reasoning = { ...this.openRouterReasoning };
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// 2. LangChain-style reasoning params (from parent's `this.reasoning`)
|
|
139
|
+
const lcReasoning = this.getReasoningParams(options);
|
|
140
|
+
if (lcReasoning?.effort != null) {
|
|
141
|
+
reasoning = {
|
|
142
|
+
...reasoning,
|
|
143
|
+
effort: lcReasoning.effort as OpenRouterReasoningEffort,
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// 3. Call-level reasoning override
|
|
148
|
+
const callReasoning = (options as ChatOpenRouterCallOptions | undefined)
|
|
149
|
+
?.reasoning;
|
|
150
|
+
if (callReasoning != null) {
|
|
151
|
+
reasoning = { ...reasoning, ...callReasoning };
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// 4. Legacy include_reasoning backward compatibility
|
|
155
|
+
if (reasoning == null && this.includeReasoning === true) {
|
|
156
|
+
reasoning = { enabled: true };
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
return reasoning;
|
|
160
|
+
}
|
|
50
161
|
protected override _convertOpenAIDeltaToBaseMessageChunk(
|
|
51
162
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
52
163
|
delta: Record<string, any>,
|