@librechat/agents 3.1.80 → 3.1.82
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +102 -35
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +13 -0
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/llm/openai/index.cjs +50 -13
- package/dist/cjs/llm/openai/index.cjs.map +1 -1
- package/dist/cjs/llm/openrouter/index.cjs +17 -7
- package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
- package/dist/cjs/llm/openrouter/toolCache.cjs +55 -0
- package/dist/cjs/llm/openrouter/toolCache.cjs.map +1 -0
- package/dist/cjs/llm/vertexai/index.cjs +15 -15
- package/dist/cjs/llm/vertexai/index.cjs.map +1 -1
- package/dist/cjs/tools/ToolNode.cjs +70 -12
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/esm/agents/AgentContext.mjs +101 -34
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +13 -0
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/llm/openai/index.mjs +50 -14
- package/dist/esm/llm/openai/index.mjs.map +1 -1
- package/dist/esm/llm/openrouter/index.mjs +17 -7
- package/dist/esm/llm/openrouter/index.mjs.map +1 -1
- package/dist/esm/llm/openrouter/toolCache.mjs +53 -0
- package/dist/esm/llm/openrouter/toolCache.mjs.map +1 -0
- package/dist/esm/llm/vertexai/index.mjs +15 -16
- package/dist/esm/llm/vertexai/index.mjs.map +1 -1
- package/dist/esm/tools/ToolNode.mjs +70 -12
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/types/agents/AgentContext.d.ts +6 -1
- package/dist/types/llm/openrouter/index.d.ts +1 -0
- package/dist/types/llm/openrouter/toolCache.d.ts +2 -0
- package/dist/types/llm/vertexai/index.d.ts +18 -1
- package/dist/types/tools/ToolNode.d.ts +5 -0
- package/dist/types/types/run.d.ts +2 -0
- package/package.json +2 -1
- package/src/agents/AgentContext.ts +146 -38
- package/src/agents/__tests__/AgentContext.test.ts +198 -0
- package/src/graphs/Graph.ts +24 -0
- package/src/llm/custom-chat-models.smoke.test.ts +76 -0
- package/src/llm/openai/deepseek.test.ts +14 -1
- package/src/llm/openai/index.ts +38 -12
- package/src/llm/openrouter/index.ts +22 -7
- package/src/llm/openrouter/reasoning.test.ts +33 -0
- package/src/llm/openrouter/toolCache.test.ts +83 -0
- package/src/llm/openrouter/toolCache.ts +89 -0
- package/src/llm/vertexai/fixThoughtSignatures.test.ts +154 -0
- package/src/llm/vertexai/index.ts +16 -22
- package/src/messages/cache.test.ts +127 -0
- package/src/scripts/openrouter_prompt_cache_live.ts +310 -0
- package/src/specs/agent-handoffs.live.test.ts +140 -0
- package/src/specs/agent-handoffs.test.ts +266 -2
- package/src/specs/openrouter.simple.test.ts +15 -8
- package/src/tools/ToolNode.ts +92 -13
- package/src/types/run.ts +2 -0
|
@@ -65,50 +65,44 @@ type AdditionalKwargs =
|
|
|
65
65
|
* - The signature for a functionCall part is an empty string
|
|
66
66
|
*
|
|
67
67
|
* This function correlates each "model" content block in the formatted request
|
|
68
|
-
* back to its originating AI message
|
|
69
|
-
* that the library failed to apply.
|
|
68
|
+
* back to its originating AI message by *position*, then re-attaches non-empty
|
|
69
|
+
* signatures that the library failed to apply. AI messages without signatures
|
|
70
|
+
* still consume their slot — filtering them out shifted later messages onto
|
|
71
|
+
* the wrong content block and dropped real signatures on the floor.
|
|
70
72
|
*/
|
|
71
|
-
function fixThoughtSignatures(
|
|
73
|
+
export function fixThoughtSignatures(
|
|
72
74
|
contents: GeminiContent[],
|
|
73
75
|
input: BaseMessage[]
|
|
74
76
|
): void {
|
|
75
|
-
//
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
isAIMessage(msg) &&
|
|
79
|
-
Array.isArray((msg.additional_kwargs as AdditionalKwargs)?.signatures) &&
|
|
80
|
-
(msg.additional_kwargs.signatures as string[]).length > 0
|
|
81
|
-
);
|
|
82
|
-
|
|
83
|
-
// Collect "model" content blocks from the formatted request, in order
|
|
77
|
+
// All AI messages, in order — non-signature ones still consume positional
|
|
78
|
+
// slots so later messages line up with their model content blocks.
|
|
79
|
+
const aiMessages = input.filter(isAIMessage);
|
|
84
80
|
const modelContents = contents.filter((c) => c.role === 'model');
|
|
85
81
|
|
|
86
|
-
// They should correspond 1:1 in order (both derived from the same input sequence)
|
|
87
82
|
const count = Math.min(aiMessages.length, modelContents.length);
|
|
88
83
|
for (let i = 0; i < count; i++) {
|
|
89
|
-
const
|
|
90
|
-
|
|
91
|
-
|
|
84
|
+
const signatures = (aiMessages[i].additional_kwargs as AdditionalKwargs)
|
|
85
|
+
?.signatures;
|
|
86
|
+
if (!Array.isArray(signatures) || signatures.length === 0) continue;
|
|
92
87
|
|
|
93
|
-
|
|
88
|
+
const content = modelContents[i];
|
|
94
89
|
const attachedSignatures = new Set(
|
|
95
90
|
content.parts
|
|
96
91
|
.map((p) => p.thoughtSignature)
|
|
97
92
|
.filter((s): s is string => s != null && s !== '')
|
|
98
93
|
);
|
|
99
|
-
const availableSignatures = signatures
|
|
100
|
-
(s) => s != null && s !== '' && !attachedSignatures.has(s)
|
|
94
|
+
const availableSignatures = signatures.filter(
|
|
95
|
+
(s): s is string => s != null && s !== '' && !attachedSignatures.has(s)
|
|
101
96
|
);
|
|
102
97
|
|
|
103
|
-
// Assign available signatures to functionCall parts missing one, in order
|
|
104
98
|
let sigIdx = 0;
|
|
105
99
|
for (const part of content.parts) {
|
|
106
100
|
if (
|
|
107
101
|
'functionCall' in part &&
|
|
108
102
|
(part.thoughtSignature == null || part.thoughtSignature === '') &&
|
|
109
|
-
sigIdx <
|
|
103
|
+
sigIdx < availableSignatures.length
|
|
110
104
|
) {
|
|
111
|
-
part.thoughtSignature = availableSignatures
|
|
105
|
+
part.thoughtSignature = availableSignatures[sigIdx];
|
|
112
106
|
sigIdx++;
|
|
113
107
|
}
|
|
114
108
|
}
|
|
@@ -14,9 +14,14 @@ import {
|
|
|
14
14
|
addBedrockCacheControl,
|
|
15
15
|
addCacheControl,
|
|
16
16
|
} from './cache';
|
|
17
|
+
import { _convertMessagesToOpenAIParams } from '@/llm/openai/utils';
|
|
17
18
|
import { toLangChainContent } from './langchain';
|
|
18
19
|
import { ContentTypes } from '@/common/enum';
|
|
19
20
|
|
|
21
|
+
type CacheControlBlock = MessageContentComplex & {
|
|
22
|
+
cache_control?: { type: 'ephemeral'; ttl?: '1h' };
|
|
23
|
+
};
|
|
24
|
+
|
|
20
25
|
describe('addCacheControl', () => {
|
|
21
26
|
test('should add cache control to the last two user messages with array content', () => {
|
|
22
27
|
const messages: AnthropicMessages = [
|
|
@@ -1483,3 +1488,125 @@ describe('LangChain message type preservation', () => {
|
|
|
1483
1488
|
expect((result[1] as AIMessage).tool_calls![0].name).toBe('navigate');
|
|
1484
1489
|
});
|
|
1485
1490
|
});
|
|
1491
|
+
|
|
1492
|
+
describe('OpenRouter prompt caching (reuses addCacheControl)', () => {
|
|
1493
|
+
it('adds cache_control to LangChain messages for OpenRouter (same format as Anthropic)', () => {
|
|
1494
|
+
const messages: BaseMessage[] = [
|
|
1495
|
+
new HumanMessage({ content: [{ type: 'text', text: 'System context' }] }),
|
|
1496
|
+
new AIMessage({ content: [{ type: 'text', text: 'Acknowledged' }] }),
|
|
1497
|
+
new HumanMessage({ content: [{ type: 'text', text: 'User query' }] }),
|
|
1498
|
+
];
|
|
1499
|
+
|
|
1500
|
+
const result = addCacheControl(messages);
|
|
1501
|
+
|
|
1502
|
+
const firstContent = result[0].content as MessageContentComplex[];
|
|
1503
|
+
const lastContent = result[2].content as MessageContentComplex[];
|
|
1504
|
+
|
|
1505
|
+
expect((firstContent[0] as CacheControlBlock).cache_control).toEqual({
|
|
1506
|
+
type: 'ephemeral',
|
|
1507
|
+
});
|
|
1508
|
+
expect((lastContent[0] as CacheControlBlock).cache_control).toEqual({
|
|
1509
|
+
type: 'ephemeral',
|
|
1510
|
+
});
|
|
1511
|
+
});
|
|
1512
|
+
|
|
1513
|
+
it('preserves cache_control through OpenAI message conversion used by OpenRouter', () => {
|
|
1514
|
+
const messages: BaseMessage[] = [
|
|
1515
|
+
new HumanMessage({
|
|
1516
|
+
content: [
|
|
1517
|
+
{
|
|
1518
|
+
type: 'text',
|
|
1519
|
+
text: 'Hello',
|
|
1520
|
+
cache_control: { type: 'ephemeral' },
|
|
1521
|
+
},
|
|
1522
|
+
],
|
|
1523
|
+
}),
|
|
1524
|
+
new AIMessage({ content: 'Hi there' }),
|
|
1525
|
+
new HumanMessage({
|
|
1526
|
+
content: [
|
|
1527
|
+
{
|
|
1528
|
+
type: 'text',
|
|
1529
|
+
text: 'Follow-up',
|
|
1530
|
+
cache_control: { type: 'ephemeral' },
|
|
1531
|
+
},
|
|
1532
|
+
],
|
|
1533
|
+
}),
|
|
1534
|
+
];
|
|
1535
|
+
|
|
1536
|
+
const converted = _convertMessagesToOpenAIParams(messages);
|
|
1537
|
+
|
|
1538
|
+
const firstUserContent = converted[0].content as CacheControlBlock[];
|
|
1539
|
+
const lastUserContent = converted[2].content as CacheControlBlock[];
|
|
1540
|
+
|
|
1541
|
+
expect(firstUserContent[0]).toHaveProperty('cache_control');
|
|
1542
|
+
expect(firstUserContent[0].cache_control).toEqual({ type: 'ephemeral' });
|
|
1543
|
+
expect(lastUserContent[0]).toHaveProperty('cache_control');
|
|
1544
|
+
expect(lastUserContent[0].cache_control).toEqual({ type: 'ephemeral' });
|
|
1545
|
+
});
|
|
1546
|
+
|
|
1547
|
+
it('end-to-end: addCacheControl then convert preserves breakpoints for OpenRouter', () => {
|
|
1548
|
+
const messages: BaseMessage[] = [
|
|
1549
|
+
new HumanMessage({ content: 'First message with context' }),
|
|
1550
|
+
new AIMessage({ content: 'Response' }),
|
|
1551
|
+
new HumanMessage({ content: 'Second question' }),
|
|
1552
|
+
];
|
|
1553
|
+
|
|
1554
|
+
const cached = addCacheControl(messages);
|
|
1555
|
+
const converted = _convertMessagesToOpenAIParams(
|
|
1556
|
+
cached,
|
|
1557
|
+
'anthropic/claude-sonnet-4-20250514'
|
|
1558
|
+
);
|
|
1559
|
+
|
|
1560
|
+
const firstUser = converted[0];
|
|
1561
|
+
const lastUser = converted[2];
|
|
1562
|
+
|
|
1563
|
+
expect(Array.isArray(firstUser.content)).toBe(true);
|
|
1564
|
+
expect(
|
|
1565
|
+
(firstUser.content as CacheControlBlock[])[0]
|
|
1566
|
+
).toHaveProperty('cache_control');
|
|
1567
|
+
|
|
1568
|
+
expect(Array.isArray(lastUser.content)).toBe(true);
|
|
1569
|
+
expect(
|
|
1570
|
+
(lastUser.content as CacheControlBlock[])[0]
|
|
1571
|
+
).toHaveProperty('cache_control');
|
|
1572
|
+
});
|
|
1573
|
+
|
|
1574
|
+
it('strips Bedrock cache before applying OpenRouter/Anthropic cache', () => {
|
|
1575
|
+
const messages: TestMsg[] = [
|
|
1576
|
+
{
|
|
1577
|
+
role: 'user',
|
|
1578
|
+
content: [
|
|
1579
|
+
{ type: ContentTypes.TEXT, text: 'First message' },
|
|
1580
|
+
{ cachePoint: { type: 'default' } },
|
|
1581
|
+
],
|
|
1582
|
+
},
|
|
1583
|
+
{
|
|
1584
|
+
role: 'assistant',
|
|
1585
|
+
content: [
|
|
1586
|
+
{ type: ContentTypes.TEXT, text: 'Response' },
|
|
1587
|
+
{ cachePoint: { type: 'default' } },
|
|
1588
|
+
],
|
|
1589
|
+
},
|
|
1590
|
+
{
|
|
1591
|
+
role: 'user',
|
|
1592
|
+
content: [{ type: ContentTypes.TEXT, text: 'Follow-up' }],
|
|
1593
|
+
},
|
|
1594
|
+
];
|
|
1595
|
+
|
|
1596
|
+
/** @ts-expect-error - Testing cross-provider compatibility */
|
|
1597
|
+
const result = addCacheControl(messages);
|
|
1598
|
+
|
|
1599
|
+
for (const msg of result) {
|
|
1600
|
+
if (Array.isArray(msg.content)) {
|
|
1601
|
+
expect(
|
|
1602
|
+
(msg.content as MessageContentComplex[]).some(
|
|
1603
|
+
(b) => 'cachePoint' in b
|
|
1604
|
+
)
|
|
1605
|
+
).toBe(false);
|
|
1606
|
+
}
|
|
1607
|
+
}
|
|
1608
|
+
|
|
1609
|
+
const lastContent = result[2].content as MessageContentComplex[];
|
|
1610
|
+
expect('cache_control' in lastContent[0]).toBe(true);
|
|
1611
|
+
});
|
|
1612
|
+
});
|
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
import { config as loadEnv } from 'dotenv';
|
|
2
|
+
import { HumanMessage, SystemMessage } from '@langchain/core/messages';
|
|
3
|
+
import type { AIMessage, BaseMessage } from '@langchain/core/messages';
|
|
4
|
+
import type { ClientOptions } from '@langchain/openai';
|
|
5
|
+
import type { GraphTools } from '@/types';
|
|
6
|
+
import type { ChatOpenRouterInput } from '@/llm/openrouter';
|
|
7
|
+
import { addCacheControl } from '@/messages/cache';
|
|
8
|
+
import { ChatOpenRouter } from '@/llm/openrouter';
|
|
9
|
+
import { partitionAndMarkOpenRouterToolCache } from '@/llm/openrouter/toolCache';
|
|
10
|
+
|
|
11
|
+
loadEnv({ path: process.env.DOTENV_CONFIG_PATH ?? '.env' });
|
|
12
|
+
|
|
13
|
+
type ModelCase = {
|
|
14
|
+
label: string;
|
|
15
|
+
model: string;
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
type CacheUsage = {
|
|
19
|
+
cacheCreation: number;
|
|
20
|
+
cacheRead: number;
|
|
21
|
+
inputTokens: number;
|
|
22
|
+
outputTokens: number;
|
|
23
|
+
totalTokens: number;
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
type OpenRouterTool = {
|
|
27
|
+
type: 'function';
|
|
28
|
+
function: {
|
|
29
|
+
name: string;
|
|
30
|
+
};
|
|
31
|
+
cache_control?: { type: 'ephemeral' };
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
const DEFAULT_MODEL_CASES: ModelCase[] = [
|
|
35
|
+
{ label: 'Anthropic Claude', model: 'anthropic/claude-haiku-4.5' },
|
|
36
|
+
{ label: 'Google Gemini', model: 'google/gemini-2.5-flash' },
|
|
37
|
+
{ label: 'Alibaba Qwen', model: 'qwen/qwen3-coder-flash' },
|
|
38
|
+
];
|
|
39
|
+
|
|
40
|
+
const apiKey = process.env.OPENROUTER_API_KEY;
|
|
41
|
+
const baseURL =
|
|
42
|
+
process.env.OPENROUTER_BASE_URL ?? 'https://openrouter.ai/api/v1';
|
|
43
|
+
const attempts = Number(process.env.OPENROUTER_PROMPT_CACHE_ATTEMPTS ?? '3');
|
|
44
|
+
const modelCases = (
|
|
45
|
+
process.env.OPENROUTER_PROMPT_CACHE_MODELS?.split(',').map((model) => ({
|
|
46
|
+
label: 'Custom',
|
|
47
|
+
model: model.trim(),
|
|
48
|
+
})) ?? DEFAULT_MODEL_CASES
|
|
49
|
+
).filter(({ model }) => model.length > 0);
|
|
50
|
+
|
|
51
|
+
if (apiKey == null || apiKey.length === 0) {
|
|
52
|
+
throw new Error('OPENROUTER_API_KEY is required');
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function buildStableReference(): string {
|
|
56
|
+
const paragraph =
|
|
57
|
+
'LibreChat OpenRouter prompt caching live validation reference. This paragraph is deliberately stable across repeated requests so OpenRouter can route the conversation to the same provider endpoint and reuse cached prompt tokens. It describes cache breakpoints, provider sticky routing, cache write metrics, cache read metrics, model-specific minimum prompt sizes, and the expected behavior of explicit per-message cache_control markers for supported OpenRouter providers.';
|
|
58
|
+
|
|
59
|
+
return Array.from({ length: 90 }, (_, index) => {
|
|
60
|
+
const section = index + 1;
|
|
61
|
+
return `Section ${section}. ${paragraph} Verification key ${section}: OPENROUTER_PROMPT_CACHE_LIVE_REFERENCE_${section}.`;
|
|
62
|
+
}).join('\n');
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function buildStableToolDescription(): string {
|
|
66
|
+
const paragraph =
|
|
67
|
+
'Static OpenRouter tool contract for prompt cache validation. This tool description is stable across requests and intentionally verbose so provider-side prompt caching can write and then read a meaningful static tool-schema prefix while dynamic tools vary after the cache breakpoint.';
|
|
68
|
+
|
|
69
|
+
return Array.from({ length: 90 }, (_, index) => {
|
|
70
|
+
const section = index + 1;
|
|
71
|
+
return `Tool section ${section}. ${paragraph} Stable tool key ${section}: OPENROUTER_STATIC_TOOL_CACHE_REFERENCE_${section}.`;
|
|
72
|
+
}).join('\n');
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function buildToolSet(attempt: number): GraphTools {
|
|
76
|
+
return [
|
|
77
|
+
{
|
|
78
|
+
type: 'function',
|
|
79
|
+
function: {
|
|
80
|
+
name: 'stable_reference_lookup',
|
|
81
|
+
description: buildStableToolDescription(),
|
|
82
|
+
parameters: {
|
|
83
|
+
type: 'object',
|
|
84
|
+
properties: {
|
|
85
|
+
query: {
|
|
86
|
+
type: 'string',
|
|
87
|
+
description: 'Stable lookup query.',
|
|
88
|
+
},
|
|
89
|
+
},
|
|
90
|
+
required: ['query'],
|
|
91
|
+
additionalProperties: false,
|
|
92
|
+
},
|
|
93
|
+
},
|
|
94
|
+
},
|
|
95
|
+
{
|
|
96
|
+
type: 'function',
|
|
97
|
+
function: {
|
|
98
|
+
name: `dynamic_runtime_tool_${attempt}`,
|
|
99
|
+
description: `Dynamic runtime tool ${attempt}; this varies between attempts and should sit after the cached static tool prefix.`,
|
|
100
|
+
parameters: {
|
|
101
|
+
type: 'object',
|
|
102
|
+
properties: {
|
|
103
|
+
value: {
|
|
104
|
+
type: 'string',
|
|
105
|
+
},
|
|
106
|
+
},
|
|
107
|
+
required: ['value'],
|
|
108
|
+
additionalProperties: false,
|
|
109
|
+
},
|
|
110
|
+
},
|
|
111
|
+
},
|
|
112
|
+
] as GraphTools;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
function buildMessages(model: string): BaseMessage[] {
|
|
116
|
+
const reference = buildStableReference();
|
|
117
|
+
const messages: BaseMessage[] = [
|
|
118
|
+
new SystemMessage(
|
|
119
|
+
'You are validating prompt caching. Answer with one concise sentence.'
|
|
120
|
+
),
|
|
121
|
+
new HumanMessage(
|
|
122
|
+
[
|
|
123
|
+
`For model ${model}, reply with exactly this format: cache live check ok.`,
|
|
124
|
+
'Use the stable reference below only to make this request large enough to cache.',
|
|
125
|
+
reference,
|
|
126
|
+
].join('\n\n')
|
|
127
|
+
),
|
|
128
|
+
];
|
|
129
|
+
|
|
130
|
+
return addCacheControl<BaseMessage>(messages);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
function getCacheUsage(message: AIMessage): CacheUsage {
|
|
134
|
+
const usage = message.usage_metadata;
|
|
135
|
+
const inputDetails = usage?.input_token_details;
|
|
136
|
+
|
|
137
|
+
return {
|
|
138
|
+
inputTokens: usage?.input_tokens ?? 0,
|
|
139
|
+
outputTokens: usage?.output_tokens ?? 0,
|
|
140
|
+
totalTokens: usage?.total_tokens ?? 0,
|
|
141
|
+
cacheRead: inputDetails?.cache_read ?? 0,
|
|
142
|
+
cacheCreation: inputDetails?.cache_creation ?? 0,
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
function hasCacheHit(usages: CacheUsage[]): boolean {
|
|
147
|
+
return usages.some(({ cacheRead }) => cacheRead > 0);
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
function hasCacheActivity(usages: CacheUsage[]): boolean {
|
|
151
|
+
return usages.some(
|
|
152
|
+
({ cacheCreation, cacheRead }) => cacheCreation > 0 || cacheRead > 0
|
|
153
|
+
);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
function log(message = ''): void {
|
|
157
|
+
process.stdout.write(`${message}\n`);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
function logError(message: string): void {
|
|
161
|
+
process.stderr.write(`${message}\n`);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
async function runCase({ label, model }: ModelCase): Promise<CacheUsage[]> {
|
|
165
|
+
const llmInput: ChatOpenRouterInput & { configuration: ClientOptions } = {
|
|
166
|
+
model,
|
|
167
|
+
apiKey,
|
|
168
|
+
maxTokens: 12,
|
|
169
|
+
temperature: 0,
|
|
170
|
+
promptCache: true,
|
|
171
|
+
streamUsage: true,
|
|
172
|
+
configuration: {
|
|
173
|
+
baseURL,
|
|
174
|
+
defaultHeaders: {
|
|
175
|
+
'HTTP-Referer': 'https://librechat.ai',
|
|
176
|
+
'X-Title': 'LibreChat OpenRouter Prompt Cache Live Test',
|
|
177
|
+
},
|
|
178
|
+
},
|
|
179
|
+
};
|
|
180
|
+
const llm = new ChatOpenRouter(llmInput);
|
|
181
|
+
const messages = buildMessages(model);
|
|
182
|
+
const usages: CacheUsage[] = [];
|
|
183
|
+
|
|
184
|
+
log(`\n${label}: ${model}`);
|
|
185
|
+
|
|
186
|
+
for (let attempt = 1; attempt <= attempts; attempt++) {
|
|
187
|
+
const started = Date.now();
|
|
188
|
+
const response = (await llm.invoke(messages)) as AIMessage;
|
|
189
|
+
const usage = getCacheUsage(response);
|
|
190
|
+
usages.push(usage);
|
|
191
|
+
|
|
192
|
+
log(
|
|
193
|
+
[
|
|
194
|
+
`attempt=${attempt}`,
|
|
195
|
+
`ms=${Date.now() - started}`,
|
|
196
|
+
`input=${usage.inputTokens}`,
|
|
197
|
+
`output=${usage.outputTokens}`,
|
|
198
|
+
`write=${usage.cacheCreation}`,
|
|
199
|
+
`read=${usage.cacheRead}`,
|
|
200
|
+
`total=${usage.totalTokens}`,
|
|
201
|
+
].join(' ')
|
|
202
|
+
);
|
|
203
|
+
|
|
204
|
+
if (hasCacheHit(usages)) {
|
|
205
|
+
return usages;
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
return usages;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
async function runStaticToolCase(): Promise<CacheUsage[]> {
|
|
213
|
+
const model = 'anthropic/claude-haiku-4.5';
|
|
214
|
+
const usages: CacheUsage[] = [];
|
|
215
|
+
|
|
216
|
+
log(`\nStatic tools through OpenRouter: ${model}`);
|
|
217
|
+
|
|
218
|
+
for (let attempt = 1; attempt <= attempts; attempt++) {
|
|
219
|
+
const llmInput: ChatOpenRouterInput & { configuration: ClientOptions } = {
|
|
220
|
+
model,
|
|
221
|
+
apiKey,
|
|
222
|
+
maxTokens: 12,
|
|
223
|
+
temperature: 0,
|
|
224
|
+
promptCache: true,
|
|
225
|
+
streamUsage: true,
|
|
226
|
+
configuration: {
|
|
227
|
+
baseURL,
|
|
228
|
+
defaultHeaders: {
|
|
229
|
+
'HTTP-Referer': 'https://librechat.ai',
|
|
230
|
+
'X-Title': 'LibreChat OpenRouter Prompt Cache Live Test',
|
|
231
|
+
},
|
|
232
|
+
},
|
|
233
|
+
};
|
|
234
|
+
const llm = new ChatOpenRouter(llmInput);
|
|
235
|
+
const tools = partitionAndMarkOpenRouterToolCache(
|
|
236
|
+
buildToolSet(attempt),
|
|
237
|
+
(name) => name.startsWith('dynamic_runtime_tool_')
|
|
238
|
+
) as OpenRouterTool[];
|
|
239
|
+
const markedTool = tools.find((entry) => entry.cache_control != null);
|
|
240
|
+
if (markedTool?.function.name !== 'stable_reference_lookup') {
|
|
241
|
+
throw new Error('Static tool cache marker was not applied as expected');
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
const modelWithTools = llm.bindTools(tools);
|
|
245
|
+
const started = Date.now();
|
|
246
|
+
const response = (await modelWithTools.invoke([
|
|
247
|
+
new SystemMessage('Reply with exactly: cache live check ok.'),
|
|
248
|
+
new HumanMessage(
|
|
249
|
+
`Attempt ${attempt}. Do not call tools; only answer with the requested text.`
|
|
250
|
+
),
|
|
251
|
+
])) as AIMessage;
|
|
252
|
+
const usage = getCacheUsage(response);
|
|
253
|
+
usages.push(usage);
|
|
254
|
+
|
|
255
|
+
log(
|
|
256
|
+
[
|
|
257
|
+
`attempt=${attempt}`,
|
|
258
|
+
`ms=${Date.now() - started}`,
|
|
259
|
+
`input=${usage.inputTokens}`,
|
|
260
|
+
`output=${usage.outputTokens}`,
|
|
261
|
+
`write=${usage.cacheCreation}`,
|
|
262
|
+
`read=${usage.cacheRead}`,
|
|
263
|
+
`total=${usage.totalTokens}`,
|
|
264
|
+
].join(' ')
|
|
265
|
+
);
|
|
266
|
+
|
|
267
|
+
if (hasCacheHit(usages)) {
|
|
268
|
+
return usages;
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
return usages;
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
async function main(): Promise<void> {
|
|
276
|
+
const results: Array<ModelCase & { usages: CacheUsage[] }> = [];
|
|
277
|
+
|
|
278
|
+
for (const modelCase of modelCases) {
|
|
279
|
+
const usages = await runCase(modelCase);
|
|
280
|
+
results.push({ ...modelCase, usages });
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
const staticToolUsages = await runStaticToolCase();
|
|
284
|
+
results.push({
|
|
285
|
+
label: 'Static tools',
|
|
286
|
+
model: 'anthropic/claude-haiku-4.5',
|
|
287
|
+
usages: staticToolUsages,
|
|
288
|
+
});
|
|
289
|
+
|
|
290
|
+
const failures = results.filter(({ usages }) => {
|
|
291
|
+
return !hasCacheActivity(usages) || !hasCacheHit(usages);
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
log('\nSummary');
|
|
295
|
+
for (const { label, model, usages } of results) {
|
|
296
|
+
const writes = usages.map(({ cacheCreation }) => cacheCreation).join(',');
|
|
297
|
+
const reads = usages.map(({ cacheRead }) => cacheRead).join(',');
|
|
298
|
+
log(`${label} ${model}: writes=[${writes}] reads=[${reads}]`);
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
if (failures.length > 0) {
|
|
302
|
+
const failedModels = failures.map(({ model }) => model).join(', ');
|
|
303
|
+
throw new Error(`Prompt caching was not confirmed for: ${failedModels}`);
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
main().catch((error: Error) => {
|
|
308
|
+
logError(error.message);
|
|
309
|
+
process.exit(1);
|
|
310
|
+
});
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
// src/specs/agent-handoffs.live.test.ts
|
|
2
|
+
/**
|
|
3
|
+
* Live handoff integration verification.
|
|
4
|
+
*
|
|
5
|
+
* Run with:
|
|
6
|
+
* RUN_HANDOFF_LIVE_TESTS=1 ANTHROPIC_API_KEY=... npm test -- agent-handoffs.live.test.ts --runInBand
|
|
7
|
+
*/
|
|
8
|
+
import { config as dotenvConfig } from 'dotenv';
|
|
9
|
+
dotenvConfig();
|
|
10
|
+
|
|
11
|
+
import { HumanMessage } from '@langchain/core/messages';
|
|
12
|
+
import { describe, expect, it, jest } from '@jest/globals';
|
|
13
|
+
import type { BaseMessage, ToolMessage } from '@langchain/core/messages';
|
|
14
|
+
import type { RunnableConfig } from '@langchain/core/runnables';
|
|
15
|
+
import type * as t from '@/types';
|
|
16
|
+
import { Constants, Providers } from '@/common';
|
|
17
|
+
import { Run } from '@/run';
|
|
18
|
+
|
|
19
|
+
const shouldRunLive =
|
|
20
|
+
process.env.RUN_HANDOFF_LIVE_TESTS === '1' &&
|
|
21
|
+
process.env.ANTHROPIC_API_KEY != null &&
|
|
22
|
+
process.env.ANTHROPIC_API_KEY !== '';
|
|
23
|
+
|
|
24
|
+
const describeIfLive = shouldRunLive ? describe : describe.skip;
|
|
25
|
+
const modelName =
|
|
26
|
+
process.env.ANTHROPIC_HANDOFF_LIVE_MODEL ?? 'claude-sonnet-4-6';
|
|
27
|
+
|
|
28
|
+
function createAnthropicAgent(
|
|
29
|
+
agentId: string,
|
|
30
|
+
instructions: string
|
|
31
|
+
): t.AgentInputs {
|
|
32
|
+
return {
|
|
33
|
+
agentId,
|
|
34
|
+
provider: Providers.ANTHROPIC,
|
|
35
|
+
clientOptions: {
|
|
36
|
+
modelName,
|
|
37
|
+
apiKey: process.env.ANTHROPIC_API_KEY,
|
|
38
|
+
temperature: 0,
|
|
39
|
+
maxTokens: 256,
|
|
40
|
+
streaming: true,
|
|
41
|
+
},
|
|
42
|
+
instructions,
|
|
43
|
+
maxContextTokens: 8000,
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function createStreamConfig(threadId: string): Partial<RunnableConfig> & {
|
|
48
|
+
version: 'v1' | 'v2';
|
|
49
|
+
streamMode: string;
|
|
50
|
+
} {
|
|
51
|
+
return {
|
|
52
|
+
configurable: { thread_id: threadId },
|
|
53
|
+
streamMode: 'values',
|
|
54
|
+
version: 'v2',
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function contentToText(message: BaseMessage): string {
|
|
59
|
+
if (typeof message.content === 'string') {
|
|
60
|
+
return message.content;
|
|
61
|
+
}
|
|
62
|
+
if (!Array.isArray(message.content)) {
|
|
63
|
+
return '';
|
|
64
|
+
}
|
|
65
|
+
return message.content
|
|
66
|
+
.map((part) => {
|
|
67
|
+
if (typeof part === 'string') {
|
|
68
|
+
return part;
|
|
69
|
+
}
|
|
70
|
+
if ('text' in part && typeof part.text === 'string') {
|
|
71
|
+
return part.text;
|
|
72
|
+
}
|
|
73
|
+
return '';
|
|
74
|
+
})
|
|
75
|
+
.join('');
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
describeIfLive('Agent handoffs live integration', () => {
|
|
79
|
+
jest.setTimeout(120_000);
|
|
80
|
+
|
|
81
|
+
it('routes through a real Anthropic handoff and preserves instructions', async () => {
|
|
82
|
+
const nonce = `live-handoff-${Date.now()}`;
|
|
83
|
+
const expectedReply = `${nonce}-specialist-confirmed`;
|
|
84
|
+
const handoffToolName = `${Constants.LC_TRANSFER_TO_}specialist`;
|
|
85
|
+
const agents: t.AgentInputs[] = [
|
|
86
|
+
createAnthropicAgent(
|
|
87
|
+
'router',
|
|
88
|
+
`You are a routing agent. For every user request, your only valid action is to call the handoff tool named ${handoffToolName}. Do not answer directly.
|
|
89
|
+
|
|
90
|
+
When you call the handoff tool, include instructions telling the specialist to reply exactly with this marker and no extra words: ${expectedReply}`
|
|
91
|
+
),
|
|
92
|
+
createAnthropicAgent(
|
|
93
|
+
'specialist',
|
|
94
|
+
'You are the specialist. When you receive handoff instructions with a marker, reply exactly with that marker and no extra words.'
|
|
95
|
+
),
|
|
96
|
+
];
|
|
97
|
+
const edges: t.GraphEdge[] = [
|
|
98
|
+
{
|
|
99
|
+
from: 'router',
|
|
100
|
+
to: 'specialist',
|
|
101
|
+
edgeType: 'handoff',
|
|
102
|
+
description: 'Transfer to the specialist for the final response',
|
|
103
|
+
prompt:
|
|
104
|
+
'Instructions for the specialist. Include any exact marker that must be returned.',
|
|
105
|
+
promptKey: 'instructions',
|
|
106
|
+
},
|
|
107
|
+
];
|
|
108
|
+
const run = await Run.create({
|
|
109
|
+
runId: `${nonce}-run`,
|
|
110
|
+
graphConfig: { type: 'multi-agent', agents, edges },
|
|
111
|
+
returnContent: true,
|
|
112
|
+
skipCleanup: true,
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
await run.processStream(
|
|
116
|
+
{
|
|
117
|
+
messages: [
|
|
118
|
+
new HumanMessage(
|
|
119
|
+
`Please delegate this to the specialist. The final answer must be exactly: ${expectedReply}`
|
|
120
|
+
),
|
|
121
|
+
],
|
|
122
|
+
},
|
|
123
|
+
createStreamConfig(`${nonce}-thread`)
|
|
124
|
+
);
|
|
125
|
+
|
|
126
|
+
const messages = run.getRunMessages() ?? [];
|
|
127
|
+
const handoffMessage = messages.find(
|
|
128
|
+
(message): message is ToolMessage =>
|
|
129
|
+
message.getType() === 'tool' &&
|
|
130
|
+
(message as ToolMessage).name === handoffToolName
|
|
131
|
+
);
|
|
132
|
+
const finalText = messages
|
|
133
|
+
.filter((message) => message.getType() === 'ai')
|
|
134
|
+
.map(contentToText)
|
|
135
|
+
.join('\n');
|
|
136
|
+
|
|
137
|
+
expect(handoffMessage).toBeDefined();
|
|
138
|
+
expect(finalText).toContain(expectedReply);
|
|
139
|
+
});
|
|
140
|
+
});
|