@librechat/agents 3.1.96 → 3.1.98
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/graphs/Graph.cjs +60 -21
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/instrumentation.cjs +120 -9
- package/dist/cjs/instrumentation.cjs.map +1 -1
- package/dist/cjs/langfuse.cjs +30 -226
- package/dist/cjs/langfuse.cjs.map +1 -1
- package/dist/cjs/langfuseToolOutputTracing.cjs +476 -0
- package/dist/cjs/langfuseToolOutputTracing.cjs.map +1 -0
- package/dist/cjs/llm/bedrock/index.cjs +10 -0
- package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/toolCache.cjs +125 -0
- package/dist/cjs/llm/bedrock/toolCache.cjs.map +1 -0
- package/dist/cjs/messages/cache.cjs +17 -9
- package/dist/cjs/messages/cache.cjs.map +1 -1
- package/dist/cjs/run.cjs +142 -69
- package/dist/cjs/run.cjs.map +1 -1
- package/dist/cjs/tools/ToolNode.cjs +26 -9
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/cjs/tools/subagent/SubagentExecutor.cjs +10 -6
- package/dist/cjs/tools/subagent/SubagentExecutor.cjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +62 -23
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/instrumentation.mjs +118 -9
- package/dist/esm/instrumentation.mjs.map +1 -1
- package/dist/esm/langfuse.mjs +28 -224
- package/dist/esm/langfuse.mjs.map +1 -1
- package/dist/esm/langfuseToolOutputTracing.mjs +468 -0
- package/dist/esm/langfuseToolOutputTracing.mjs.map +1 -0
- package/dist/esm/llm/bedrock/index.mjs +10 -0
- package/dist/esm/llm/bedrock/index.mjs.map +1 -1
- package/dist/esm/llm/bedrock/toolCache.mjs +122 -0
- package/dist/esm/llm/bedrock/toolCache.mjs.map +1 -0
- package/dist/esm/messages/cache.mjs +17 -9
- package/dist/esm/messages/cache.mjs.map +1 -1
- package/dist/esm/run.mjs +144 -71
- package/dist/esm/run.mjs.map +1 -1
- package/dist/esm/tools/ToolNode.mjs +26 -9
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/esm/tools/subagent/SubagentExecutor.mjs +10 -6
- package/dist/esm/tools/subagent/SubagentExecutor.mjs.map +1 -1
- package/dist/types/graphs/Graph.d.ts +5 -1
- package/dist/types/instrumentation.d.ts +5 -1
- package/dist/types/langfuse.d.ts +6 -28
- package/dist/types/langfuseToolOutputTracing.d.ts +20 -0
- package/dist/types/llm/bedrock/index.d.ts +16 -0
- package/dist/types/llm/bedrock/toolCache.d.ts +4 -0
- package/dist/types/messages/cache.d.ts +2 -2
- package/dist/types/run.d.ts +5 -1
- package/dist/types/tools/ToolNode.d.ts +4 -1
- package/dist/types/tools/subagent/SubagentExecutor.d.ts +2 -0
- package/dist/types/types/graph.d.ts +30 -0
- package/dist/types/types/llm.d.ts +2 -2
- package/dist/types/types/run.d.ts +6 -0
- package/dist/types/types/tools.d.ts +7 -0
- package/package.json +2 -1
- package/src/agents/__tests__/AgentContext.anthropic.live.test.ts +332 -0
- package/src/agents/__tests__/AgentContext.bedrock.live.test.ts +504 -0
- package/src/graphs/Graph.ts +104 -34
- package/src/instrumentation.ts +172 -11
- package/src/langfuse.ts +59 -324
- package/src/langfuseToolOutputTracing.ts +702 -0
- package/src/llm/bedrock/index.ts +32 -1
- package/src/llm/bedrock/llm.spec.ts +154 -1
- package/src/llm/bedrock/toolCache.test.ts +131 -0
- package/src/llm/bedrock/toolCache.ts +191 -0
- package/src/messages/cache.test.ts +97 -38
- package/src/messages/cache.ts +18 -10
- package/src/run.ts +190 -87
- package/src/specs/langfuse-callbacks.test.ts +178 -1
- package/src/specs/langfuse-config.test.ts +112 -76
- package/src/specs/langfuse-instrumentation.test.ts +283 -0
- package/src/specs/langfuse-metadata.test.ts +54 -1
- package/src/specs/langfuse-tool-output-tracing.test.ts +616 -0
- package/src/tools/ToolNode.ts +35 -8
- package/src/tools/__tests__/SubagentExecutor.test.ts +32 -0
- package/src/tools/__tests__/ToolNode.langfuse.test.ts +47 -0
- package/src/tools/subagent/SubagentExecutor.ts +11 -6
- package/src/types/graph.ts +32 -0
- package/src/types/llm.ts +2 -2
- package/src/types/run.ts +6 -0
- package/src/types/tools.ts +7 -0
|
@@ -195,6 +195,7 @@ export type StandardGraphInput = {
|
|
|
195
195
|
runId?: string;
|
|
196
196
|
signal?: AbortSignal;
|
|
197
197
|
agents: AgentInputs[];
|
|
198
|
+
langfuse?: LangfuseConfig;
|
|
198
199
|
tokenCounter?: TokenCounter;
|
|
199
200
|
indexTokenCountMap?: Record<string, number>;
|
|
200
201
|
calibrationRatio?: number;
|
|
@@ -288,11 +289,40 @@ export interface SubagentUpdateEvent {
|
|
|
288
289
|
/** ISO timestamp for ordering / display. */
|
|
289
290
|
timestamp: string;
|
|
290
291
|
}
|
|
292
|
+
export type LangfuseToolOutputTracingConfig = {
|
|
293
|
+
/**
|
|
294
|
+
* Whether tool outputs should be exported to Langfuse. Defaults to
|
|
295
|
+
* `true`. Set to `false` to keep tool spans and redact their output.
|
|
296
|
+
*/
|
|
297
|
+
enabled?: boolean;
|
|
298
|
+
/**
|
|
299
|
+
* Optional allowlist of tool names whose outputs should be redacted even
|
|
300
|
+
* when `enabled` is true.
|
|
301
|
+
*/
|
|
302
|
+
redactedToolNames?: string[];
|
|
303
|
+
/**
|
|
304
|
+
* Match strategy for `redactedToolNames`. Defaults to `exact`; use
|
|
305
|
+
* `partial` to redact tools whose names contain a configured value.
|
|
306
|
+
*/
|
|
307
|
+
redactedToolNameMatchMode?: 'exact' | 'partial';
|
|
308
|
+
/** Replacement text used for redacted tool outputs. */
|
|
309
|
+
redactionText?: string;
|
|
310
|
+
};
|
|
311
|
+
export type LangfuseToolNodeTracingConfig = {
|
|
312
|
+
/**
|
|
313
|
+
* Overrides ToolNode callback tracing. ToolNode spans are exported by the
|
|
314
|
+
* env-backed Langfuse callback, so this only enables tracing when that
|
|
315
|
+
* callback is configured.
|
|
316
|
+
*/
|
|
317
|
+
enabled?: boolean;
|
|
318
|
+
};
|
|
291
319
|
export interface LangfuseConfig {
|
|
292
320
|
enabled?: boolean;
|
|
293
321
|
publicKey?: string;
|
|
294
322
|
secretKey?: string;
|
|
295
323
|
baseUrl?: string;
|
|
324
|
+
toolNodeTracing?: LangfuseToolNodeTracingConfig;
|
|
325
|
+
toolOutputTracing?: LangfuseToolOutputTracingConfig;
|
|
296
326
|
}
|
|
297
327
|
export interface AgentInputs {
|
|
298
328
|
agentId: string;
|
|
@@ -65,7 +65,7 @@ export type BedrockAnthropicInput = ChatBedrockConverseInput & {
|
|
|
65
65
|
additionalModelRequestFields?: ChatBedrockConverseInput['additionalModelRequestFields'] & AnthropicReasoning;
|
|
66
66
|
promptCache?: boolean;
|
|
67
67
|
};
|
|
68
|
-
export type BedrockConverseClientOptions =
|
|
68
|
+
export type BedrockConverseClientOptions = BedrockAnthropicInput;
|
|
69
69
|
export type BedrockAnthropicClientOptions = BedrockAnthropicInput;
|
|
70
70
|
export type GoogleClientOptions = GoogleGenerativeAIChatInput & {
|
|
71
71
|
customHeaders?: RequestOptions['customHeaders'];
|
|
@@ -95,7 +95,7 @@ export type ProviderOptionsMap = {
|
|
|
95
95
|
[Providers.MISTRALAI]: MistralAIClientOptions;
|
|
96
96
|
[Providers.MISTRAL]: MistralAIClientOptions;
|
|
97
97
|
[Providers.OPENROUTER]: ChatOpenRouterCallOptions;
|
|
98
|
-
[Providers.BEDROCK]:
|
|
98
|
+
[Providers.BEDROCK]: BedrockAnthropicClientOptions;
|
|
99
99
|
[Providers.XAI]: XAIClientOptions;
|
|
100
100
|
[Providers.MOONSHOT]: OpenAIClientOptions;
|
|
101
101
|
};
|
|
@@ -104,6 +104,12 @@ export type StandardGraphConfig = Omit<MultiAgentGraphConfig, 'edges' | 'type'>
|
|
|
104
104
|
export type RunConfig = {
|
|
105
105
|
runId: string;
|
|
106
106
|
graphConfig: LegacyGraphConfig | StandardGraphConfig | MultiAgentGraphConfig;
|
|
107
|
+
/**
|
|
108
|
+
* Run-scoped Langfuse configuration. Per-agent `AgentInputs.langfuse`
|
|
109
|
+
* takes precedence for agent-specific spans; this object supplies defaults
|
|
110
|
+
* for run-wide tracing controls such as tool-output redaction.
|
|
111
|
+
*/
|
|
112
|
+
langfuse?: g.LangfuseConfig;
|
|
107
113
|
customHandlers?: Record<string, g.EventHandler>;
|
|
108
114
|
/**
|
|
109
115
|
* Pre-constructed hook registry for this run. Hooks fire at lifecycle
|
|
@@ -5,6 +5,7 @@ import type { HookRegistry } from '@/hooks';
|
|
|
5
5
|
import type { ToolOutputReferenceRegistry } from '@/tools/toolOutputReferences';
|
|
6
6
|
import type { MessageContentComplex, ToolErrorData } from './stream';
|
|
7
7
|
import type { HumanInTheLoopConfig } from './hitl';
|
|
8
|
+
import type { LangfuseConfig } from './graph';
|
|
8
9
|
/** Replacement type for `import type { ToolCall } from '@langchain/core/messages/tool'` in order to have stringified args typed */
|
|
9
10
|
export type CustomToolCall = {
|
|
10
11
|
name: string;
|
|
@@ -62,6 +63,12 @@ export type EagerEventToolCallChunkState = {
|
|
|
62
63
|
export type ToolNodeOptions = {
|
|
63
64
|
name?: string;
|
|
64
65
|
tags?: string[];
|
|
66
|
+
/** Enables LangChain/LangGraph tracing for this ToolNode. Defaults to false. */
|
|
67
|
+
trace?: boolean;
|
|
68
|
+
/** Run-level Langfuse config used to scope ToolNode trace redaction. */
|
|
69
|
+
runLangfuse?: LangfuseConfig;
|
|
70
|
+
/** Agent-level Langfuse config used to scope ToolNode trace redaction. */
|
|
71
|
+
agentLangfuse?: LangfuseConfig;
|
|
65
72
|
handleToolErrors?: boolean;
|
|
66
73
|
loadRuntimeTools?: ToolRefGenerator;
|
|
67
74
|
toolCallStepIds?: Map<string, string>;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@librechat/agents",
|
|
3
|
-
"version": "3.1.
|
|
3
|
+
"version": "3.1.98",
|
|
4
4
|
"main": "./dist/cjs/main.cjs",
|
|
5
5
|
"module": "./dist/esm/main.mjs",
|
|
6
6
|
"types": "./dist/types/index.d.ts",
|
|
@@ -230,6 +230,7 @@
|
|
|
230
230
|
"@langfuse/langchain": "^5.3.0",
|
|
231
231
|
"@langfuse/otel": "^5.3.0",
|
|
232
232
|
"@langfuse/tracing": "^5.3.0",
|
|
233
|
+
"@opentelemetry/context-async-hooks": "2.7.1",
|
|
233
234
|
"@opentelemetry/sdk-node": "^0.218.0",
|
|
234
235
|
"@scarf/scarf": "^1.4.0",
|
|
235
236
|
"@types/diff": "^7.0.2",
|
|
@@ -9,6 +9,14 @@ import { config as dotenvConfig } from 'dotenv';
|
|
|
9
9
|
dotenvConfig();
|
|
10
10
|
|
|
11
11
|
import { describe, expect, it } from '@jest/globals';
|
|
12
|
+
import {
|
|
13
|
+
AIMessage,
|
|
14
|
+
BaseMessage,
|
|
15
|
+
HumanMessage,
|
|
16
|
+
SystemMessage,
|
|
17
|
+
ToolMessage,
|
|
18
|
+
type MessageContentComplex,
|
|
19
|
+
} from '@langchain/core/messages';
|
|
12
20
|
import type * as t from '@/types';
|
|
13
21
|
import {
|
|
14
22
|
runLiveTurn,
|
|
@@ -18,6 +26,9 @@ import {
|
|
|
18
26
|
waitForCachePropagation,
|
|
19
27
|
} from './promptCacheLiveHelpers';
|
|
20
28
|
import { Providers } from '@/common';
|
|
29
|
+
import { addCacheControl } from '@/messages/cache';
|
|
30
|
+
import { toLangChainContent } from '@/messages/langchain';
|
|
31
|
+
import { _convertMessagesToAnthropicPayload } from '@/llm/anthropic/utils/message_inputs';
|
|
21
32
|
|
|
22
33
|
const shouldRunLive =
|
|
23
34
|
process.env.RUN_ANTHROPIC_PROMPT_CACHE_LIVE_TESTS === '1' &&
|
|
@@ -46,6 +57,268 @@ function createClientOptions(): t.AnthropicClientOptions {
|
|
|
46
57
|
};
|
|
47
58
|
}
|
|
48
59
|
|
|
60
|
+
type AnthropicCacheUsage = {
|
|
61
|
+
inputTokens: number;
|
|
62
|
+
outputTokens: number;
|
|
63
|
+
cacheCreation: number;
|
|
64
|
+
cacheRead: number;
|
|
65
|
+
latencyMs: number;
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
type AnthropicUsageResponse = {
|
|
69
|
+
usage?: {
|
|
70
|
+
input_tokens?: number;
|
|
71
|
+
output_tokens?: number;
|
|
72
|
+
cache_creation_input_tokens?: number | null;
|
|
73
|
+
cache_read_input_tokens?: number | null;
|
|
74
|
+
};
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
type AnthropicMessagesClient = {
|
|
78
|
+
messages: {
|
|
79
|
+
create: (
|
|
80
|
+
request: Record<string, unknown>,
|
|
81
|
+
options: { headers: Record<string, string> }
|
|
82
|
+
) => Promise<AnthropicUsageResponse>;
|
|
83
|
+
};
|
|
84
|
+
};
|
|
85
|
+
|
|
86
|
+
const benchmarkTool = {
|
|
87
|
+
name: 'lookup_cache_probe',
|
|
88
|
+
description: 'Returns prompt cache benchmark data.',
|
|
89
|
+
input_schema: {
|
|
90
|
+
type: 'object',
|
|
91
|
+
properties: {
|
|
92
|
+
step: { type: 'integer' },
|
|
93
|
+
},
|
|
94
|
+
required: ['step'],
|
|
95
|
+
},
|
|
96
|
+
};
|
|
97
|
+
|
|
98
|
+
function cloneLiveMessage(
|
|
99
|
+
message: BaseMessage,
|
|
100
|
+
content: MessageContentComplex[]
|
|
101
|
+
): BaseMessage {
|
|
102
|
+
const baseParams = {
|
|
103
|
+
content: toLangChainContent(content),
|
|
104
|
+
additional_kwargs: { ...message.additional_kwargs },
|
|
105
|
+
response_metadata: { ...message.response_metadata },
|
|
106
|
+
id: message.id,
|
|
107
|
+
name: message.name,
|
|
108
|
+
};
|
|
109
|
+
|
|
110
|
+
const messageType = message.getType();
|
|
111
|
+
if (messageType === 'ai') {
|
|
112
|
+
return new AIMessage({
|
|
113
|
+
...baseParams,
|
|
114
|
+
tool_calls: (message as AIMessage).tool_calls,
|
|
115
|
+
});
|
|
116
|
+
}
|
|
117
|
+
if (messageType === 'human') {
|
|
118
|
+
return new HumanMessage(baseParams);
|
|
119
|
+
}
|
|
120
|
+
if (messageType === 'system') {
|
|
121
|
+
return new SystemMessage(baseParams);
|
|
122
|
+
}
|
|
123
|
+
if (messageType === 'tool') {
|
|
124
|
+
return new ToolMessage({
|
|
125
|
+
...baseParams,
|
|
126
|
+
tool_call_id: (message as ToolMessage).tool_call_id,
|
|
127
|
+
});
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
return message;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
function addLatestUserOnlyAnthropicCacheControl(
|
|
134
|
+
messages: BaseMessage[]
|
|
135
|
+
): BaseMessage[] {
|
|
136
|
+
const updatedMessages = [...messages];
|
|
137
|
+
let addedCacheControl = false;
|
|
138
|
+
|
|
139
|
+
for (let i = updatedMessages.length - 1; i >= 0; i--) {
|
|
140
|
+
const message = updatedMessages[i];
|
|
141
|
+
const content = message.content;
|
|
142
|
+
const hasArrayContent = Array.isArray(content);
|
|
143
|
+
const canAddCache =
|
|
144
|
+
!addedCacheControl &&
|
|
145
|
+
message.getType() === 'human' &&
|
|
146
|
+
(typeof content === 'string' || hasArrayContent);
|
|
147
|
+
|
|
148
|
+
if (!canAddCache && !hasArrayContent) {
|
|
149
|
+
continue;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
let workingContent: MessageContentComplex[];
|
|
153
|
+
let modified = false;
|
|
154
|
+
|
|
155
|
+
if (hasArrayContent) {
|
|
156
|
+
workingContent = [];
|
|
157
|
+
let lastTextIndex = -1;
|
|
158
|
+
for (const block of content as MessageContentComplex[]) {
|
|
159
|
+
if ('cachePoint' in block && !('type' in block)) {
|
|
160
|
+
modified = true;
|
|
161
|
+
continue;
|
|
162
|
+
}
|
|
163
|
+
const cloned = { ...block };
|
|
164
|
+
if ('cache_control' in cloned) {
|
|
165
|
+
delete (cloned as Record<string, unknown>).cache_control;
|
|
166
|
+
modified = true;
|
|
167
|
+
}
|
|
168
|
+
if ('type' in cloned && cloned.type === 'text') {
|
|
169
|
+
const text = (cloned as { text?: string }).text;
|
|
170
|
+
if (text != null && text.trim() !== '') {
|
|
171
|
+
lastTextIndex = workingContent.length;
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
workingContent.push(cloned as MessageContentComplex);
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
if (canAddCache && lastTextIndex >= 0) {
|
|
178
|
+
(
|
|
179
|
+
workingContent[lastTextIndex] as MessageContentComplex & {
|
|
180
|
+
cache_control?: { type: 'ephemeral' };
|
|
181
|
+
}
|
|
182
|
+
).cache_control = { type: 'ephemeral' };
|
|
183
|
+
addedCacheControl = true;
|
|
184
|
+
modified = true;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
if (!modified) {
|
|
188
|
+
continue;
|
|
189
|
+
}
|
|
190
|
+
} else if (typeof content === 'string' && content.trim() !== '' && canAddCache) {
|
|
191
|
+
workingContent = [
|
|
192
|
+
{
|
|
193
|
+
type: 'text',
|
|
194
|
+
text: content,
|
|
195
|
+
cache_control: { type: 'ephemeral' },
|
|
196
|
+
},
|
|
197
|
+
] as unknown as MessageContentComplex[];
|
|
198
|
+
addedCacheControl = true;
|
|
199
|
+
} else {
|
|
200
|
+
continue;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
updatedMessages[i] = cloneLiveMessage(message, workingContent);
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
return updatedMessages;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
function repeated(label: string, count: number): string {
|
|
210
|
+
return Array.from(
|
|
211
|
+
{ length: count },
|
|
212
|
+
(_, index) =>
|
|
213
|
+
`${label} reference ${index}: stable schema, metric definition, access policy, dashboard note, and query planning guidance.`
|
|
214
|
+
).join('\n');
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
function buildMultiTurnToolMessages({
|
|
218
|
+
nonce,
|
|
219
|
+
marker,
|
|
220
|
+
}: {
|
|
221
|
+
nonce: string;
|
|
222
|
+
marker: string;
|
|
223
|
+
}): BaseMessage[] {
|
|
224
|
+
const stableFirstUser = [
|
|
225
|
+
`Anthropic prompt cache placement benchmark ${nonce}.`,
|
|
226
|
+
'This first user turn is intentionally stable across calls in the same benchmark case.',
|
|
227
|
+
repeated(`${nonce} stable-user-context`, 190),
|
|
228
|
+
].join('\n');
|
|
229
|
+
const latestUser = [
|
|
230
|
+
`Current user request marker: ${marker}.`,
|
|
231
|
+
'Use the final tool result to answer with the marker only.',
|
|
232
|
+
repeated(`${nonce} latest-user-${marker}`, 18),
|
|
233
|
+
].join('\n');
|
|
234
|
+
const volatileToolPayload = repeated(`${nonce} volatile-tool-${marker}`, 70);
|
|
235
|
+
|
|
236
|
+
return [
|
|
237
|
+
new HumanMessage(stableFirstUser),
|
|
238
|
+
new AIMessage('I will keep this stable context in mind.'),
|
|
239
|
+
new HumanMessage(latestUser),
|
|
240
|
+
new AIMessage({
|
|
241
|
+
content: `I will inspect cache probe step 1 for ${marker}.\n${volatileToolPayload}`,
|
|
242
|
+
tool_calls: [
|
|
243
|
+
{
|
|
244
|
+
id: `call_${marker}_1`,
|
|
245
|
+
name: 'lookup_cache_probe',
|
|
246
|
+
args: { step: 1 },
|
|
247
|
+
},
|
|
248
|
+
],
|
|
249
|
+
}),
|
|
250
|
+
new ToolMessage({
|
|
251
|
+
content: `Tool result 1 for ${marker}.\n${volatileToolPayload}`,
|
|
252
|
+
tool_call_id: `call_${marker}_1`,
|
|
253
|
+
}),
|
|
254
|
+
new AIMessage({
|
|
255
|
+
content: `I will inspect cache probe step 2 for ${marker}.\n${volatileToolPayload}`,
|
|
256
|
+
tool_calls: [
|
|
257
|
+
{
|
|
258
|
+
id: `call_${marker}_2`,
|
|
259
|
+
name: 'lookup_cache_probe',
|
|
260
|
+
args: { step: 2 },
|
|
261
|
+
},
|
|
262
|
+
],
|
|
263
|
+
}),
|
|
264
|
+
new ToolMessage({
|
|
265
|
+
content: [
|
|
266
|
+
`Final tool result marker: ${marker}.`,
|
|
267
|
+
'Reply with the marker and no extra explanation.',
|
|
268
|
+
volatileToolPayload,
|
|
269
|
+
].join('\n'),
|
|
270
|
+
tool_call_id: `call_${marker}_2`,
|
|
271
|
+
}),
|
|
272
|
+
];
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
function extractCacheUsage(
|
|
276
|
+
response: AnthropicUsageResponse,
|
|
277
|
+
latencyMs: number
|
|
278
|
+
): AnthropicCacheUsage {
|
|
279
|
+
if (response.usage == null) {
|
|
280
|
+
throw new Error('Missing Anthropic usage metadata for cache benchmark');
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
return {
|
|
284
|
+
inputTokens: response.usage.input_tokens ?? 0,
|
|
285
|
+
outputTokens: response.usage.output_tokens ?? 0,
|
|
286
|
+
cacheCreation: response.usage.cache_creation_input_tokens ?? 0,
|
|
287
|
+
cacheRead: response.usage.cache_read_input_tokens ?? 0,
|
|
288
|
+
latencyMs,
|
|
289
|
+
};
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
async function runAnthropicCacheBenchmarkTurn({
|
|
293
|
+
client,
|
|
294
|
+
messages,
|
|
295
|
+
}: {
|
|
296
|
+
client: AnthropicMessagesClient;
|
|
297
|
+
messages: BaseMessage[];
|
|
298
|
+
}): Promise<AnthropicCacheUsage> {
|
|
299
|
+
const payload = _convertMessagesToAnthropicPayload(messages);
|
|
300
|
+
const startedAt = Date.now();
|
|
301
|
+
const response = await client.messages.create(
|
|
302
|
+
{
|
|
303
|
+
...payload,
|
|
304
|
+
model: modelName,
|
|
305
|
+
max_tokens: 16,
|
|
306
|
+
temperature: 0,
|
|
307
|
+
tools: [benchmarkTool],
|
|
308
|
+
},
|
|
309
|
+
{
|
|
310
|
+
headers: {
|
|
311
|
+
'anthropic-beta': 'prompt-caching-2024-07-31',
|
|
312
|
+
},
|
|
313
|
+
}
|
|
314
|
+
);
|
|
315
|
+
|
|
316
|
+
return extractCacheUsage(
|
|
317
|
+
response as AnthropicUsageResponse,
|
|
318
|
+
Date.now() - startedAt
|
|
319
|
+
);
|
|
320
|
+
}
|
|
321
|
+
|
|
49
322
|
describeIfLive('AgentContext Anthropic prompt cache live API', () => {
|
|
50
323
|
it('caches only the stable system prefix while dynamic tail changes', async () => {
|
|
51
324
|
const nonce = `agent-cache-live-${Date.now()}`;
|
|
@@ -109,4 +382,63 @@ describeIfLive('AgentContext Anthropic prompt cache live API', () => {
|
|
|
109
382
|
expect(second.text.toLowerCase()).toContain('bravo');
|
|
110
383
|
expect(second.usage.input_token_details?.cache_read).toBeGreaterThan(0);
|
|
111
384
|
}, 120_000);
|
|
385
|
+
|
|
386
|
+
it('compares current two-user cache placement against latest-user-only', async () => {
|
|
387
|
+
const Anthropic = (await import('@anthropic-ai/sdk')).default;
|
|
388
|
+
const client = new Anthropic({
|
|
389
|
+
apiKey: process.env.ANTHROPIC_API_KEY,
|
|
390
|
+
}) as unknown as AnthropicMessagesClient;
|
|
391
|
+
const nonce = `anthropic-cache-placement-${Date.now()}`;
|
|
392
|
+
const currentNonce = `${nonce}-current`;
|
|
393
|
+
const latestOnlyNonce = `${nonce}-latest-only`;
|
|
394
|
+
|
|
395
|
+
const currentFirst = await runAnthropicCacheBenchmarkTurn({
|
|
396
|
+
client,
|
|
397
|
+
messages: addCacheControl(
|
|
398
|
+
buildMultiTurnToolMessages({ nonce: currentNonce, marker: 'alpha' })
|
|
399
|
+
),
|
|
400
|
+
});
|
|
401
|
+
|
|
402
|
+
await waitForCachePropagation();
|
|
403
|
+
|
|
404
|
+
const currentSecond = await runAnthropicCacheBenchmarkTurn({
|
|
405
|
+
client,
|
|
406
|
+
messages: addCacheControl(
|
|
407
|
+
buildMultiTurnToolMessages({ nonce: currentNonce, marker: 'bravo' })
|
|
408
|
+
),
|
|
409
|
+
});
|
|
410
|
+
|
|
411
|
+
const latestOnlyFirst = await runAnthropicCacheBenchmarkTurn({
|
|
412
|
+
client,
|
|
413
|
+
messages: addLatestUserOnlyAnthropicCacheControl(
|
|
414
|
+
buildMultiTurnToolMessages({ nonce: latestOnlyNonce, marker: 'alpha' })
|
|
415
|
+
),
|
|
416
|
+
});
|
|
417
|
+
|
|
418
|
+
await waitForCachePropagation();
|
|
419
|
+
|
|
420
|
+
const latestOnlySecond = await runAnthropicCacheBenchmarkTurn({
|
|
421
|
+
client,
|
|
422
|
+
messages: addLatestUserOnlyAnthropicCacheControl(
|
|
423
|
+
buildMultiTurnToolMessages({ nonce: latestOnlyNonce, marker: 'bravo' })
|
|
424
|
+
),
|
|
425
|
+
});
|
|
426
|
+
|
|
427
|
+
process.stdout.write(
|
|
428
|
+
`Anthropic cache placement benchmark ${JSON.stringify({
|
|
429
|
+
currentFirst,
|
|
430
|
+
currentSecond,
|
|
431
|
+
latestOnlyFirst,
|
|
432
|
+
latestOnlySecond,
|
|
433
|
+
cacheWriteDelta:
|
|
434
|
+
currentSecond.cacheCreation - latestOnlySecond.cacheCreation,
|
|
435
|
+
})}\n`
|
|
436
|
+
);
|
|
437
|
+
|
|
438
|
+
expect(currentSecond.cacheRead).toBeGreaterThan(0);
|
|
439
|
+
expect(currentSecond.cacheRead).toBeGreaterThan(latestOnlySecond.cacheRead);
|
|
440
|
+
expect(currentSecond.cacheCreation).toBeLessThan(
|
|
441
|
+
latestOnlySecond.cacheCreation
|
|
442
|
+
);
|
|
443
|
+
}, 180_000);
|
|
112
444
|
});
|