@librechat/agents 3.2.35 → 3.2.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +75 -2
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/agents/projection.cjs +25 -0
- package/dist/cjs/agents/projection.cjs.map +1 -0
- package/dist/cjs/graphs/Graph.cjs +10 -26
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/langfuse.cjs +16 -5
- package/dist/cjs/langfuse.cjs.map +1 -1
- package/dist/cjs/langfuseToolOutputTracing.cjs +7 -0
- package/dist/cjs/langfuseToolOutputTracing.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +118 -7
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +44 -4
- package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/main.cjs +7 -0
- package/dist/cjs/messages/budget.cjs +23 -0
- package/dist/cjs/messages/budget.cjs.map +1 -0
- package/dist/cjs/messages/cache.cjs +184 -0
- package/dist/cjs/messages/cache.cjs.map +1 -1
- package/dist/cjs/messages/index.cjs +1 -0
- package/dist/cjs/summarization/node.cjs +1 -1
- package/dist/cjs/summarization/node.cjs.map +1 -1
- package/dist/cjs/tools/search/format.cjs +91 -2
- package/dist/cjs/tools/search/format.cjs.map +1 -1
- package/dist/cjs/tools/search/tool.cjs +4 -3
- package/dist/cjs/tools/search/tool.cjs.map +1 -1
- package/dist/cjs/tools/toolOutputReferences.cjs +28 -14
- package/dist/cjs/tools/toolOutputReferences.cjs.map +1 -1
- package/dist/esm/agents/AgentContext.mjs +76 -3
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/agents/projection.mjs +25 -0
- package/dist/esm/agents/projection.mjs.map +1 -0
- package/dist/esm/graphs/Graph.mjs +9 -25
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/langfuse.mjs +16 -5
- package/dist/esm/langfuse.mjs.map +1 -1
- package/dist/esm/langfuseToolOutputTracing.mjs +7 -0
- package/dist/esm/langfuseToolOutputTracing.mjs.map +1 -1
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs +118 -7
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/llm/bedrock/utils/message_inputs.mjs +44 -4
- package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/main.mjs +4 -2
- package/dist/esm/messages/budget.mjs +23 -0
- package/dist/esm/messages/budget.mjs.map +1 -0
- package/dist/esm/messages/cache.mjs +182 -1
- package/dist/esm/messages/cache.mjs.map +1 -1
- package/dist/esm/messages/index.mjs +1 -0
- package/dist/esm/summarization/node.mjs +2 -2
- package/dist/esm/summarization/node.mjs.map +1 -1
- package/dist/esm/tools/search/format.mjs +91 -2
- package/dist/esm/tools/search/format.mjs.map +1 -1
- package/dist/esm/tools/search/tool.mjs +4 -3
- package/dist/esm/tools/search/tool.mjs.map +1 -1
- package/dist/esm/tools/toolOutputReferences.mjs +28 -14
- package/dist/esm/tools/toolOutputReferences.mjs.map +1 -1
- package/dist/types/agents/AgentContext.d.ts +30 -1
- package/dist/types/agents/projection.d.ts +26 -0
- package/dist/types/index.d.ts +1 -0
- package/dist/types/messages/budget.d.ts +11 -0
- package/dist/types/messages/cache.d.ts +47 -0
- package/dist/types/messages/index.d.ts +1 -0
- package/dist/types/tools/search/format.d.ts +4 -1
- package/dist/types/tools/search/types.d.ts +7 -0
- package/dist/types/types/graph.d.ts +2 -0
- package/package.json +2 -1
- package/src/agents/AgentContext.ts +105 -4
- package/src/agents/__tests__/AgentContext.test.ts +232 -9
- package/src/agents/__tests__/projection.test.ts +73 -0
- package/src/agents/projection.ts +46 -0
- package/src/graphs/Graph.ts +66 -65
- package/src/index.ts +3 -0
- package/src/langfuse.ts +38 -4
- package/src/langfuseToolOutputTracing.ts +18 -0
- package/src/llm/anthropic/utils/cross-provider-reasoning.test.ts +317 -0
- package/src/llm/anthropic/utils/message_inputs.ts +209 -19
- package/src/llm/anthropic/utils/stripPrefillCache.test.ts +111 -0
- package/src/llm/bedrock/utils/cross-provider-reasoning.test.ts +131 -0
- package/src/llm/bedrock/utils/message_inputs.test.ts +129 -0
- package/src/llm/bedrock/utils/message_inputs.ts +81 -4
- package/src/llm/bedrock/utils/toolResultCachePoint.test.ts +103 -0
- package/src/messages/budget.ts +32 -0
- package/src/messages/cache.tail.test.ts +340 -0
- package/src/messages/cache.ts +267 -1
- package/src/messages/index.ts +1 -0
- package/src/messages/tailCacheConversion.test.ts +161 -0
- package/src/scripts/bench-prompt-cache.ts +479 -0
- package/src/specs/langfuse-config.test.ts +69 -2
- package/src/specs/langfuse-metadata.test.ts +44 -0
- package/src/specs/langfuse-tool-output-tracing.test.ts +6 -0
- package/src/summarization/node.ts +2 -2
- package/src/tools/__tests__/annotateMessagesForLLM.test.ts +50 -0
- package/src/tools/search/format.test.ts +242 -0
- package/src/tools/search/format.ts +122 -5
- package/src/tools/search/tool.ts +5 -1
- package/src/tools/search/types.ts +7 -0
- package/src/tools/toolOutputReferences.ts +34 -20
- package/src/types/graph.ts +2 -0
|
@@ -274,16 +274,11 @@ describe('AgentContext', () => {
|
|
|
274
274
|
new HumanMessage('First'),
|
|
275
275
|
new HumanMessage('Second'),
|
|
276
276
|
]);
|
|
277
|
-
const firstContent = result[1].content as TestSystemContentBlock[];
|
|
278
277
|
const secondContent = result[2].content as TestSystemContentBlock[];
|
|
279
278
|
|
|
280
279
|
expect(result).toHaveLength(3);
|
|
281
280
|
expect(result[0].content).toBe('Dynamic only');
|
|
282
|
-
expect(
|
|
283
|
-
type: 'text',
|
|
284
|
-
text: 'First',
|
|
285
|
-
cache_control: { type: 'ephemeral' },
|
|
286
|
-
});
|
|
281
|
+
expect(result[1].content).toBe('First');
|
|
287
282
|
expect(secondContent[0]).toMatchObject({
|
|
288
283
|
type: 'text',
|
|
289
284
|
text: 'Second',
|
|
@@ -686,7 +681,7 @@ describe('AgentContext', () => {
|
|
|
686
681
|
expect(result[8].content).toBe('Now answer without tools');
|
|
687
682
|
});
|
|
688
683
|
|
|
689
|
-
it('adds OpenRouter body cache
|
|
684
|
+
it('adds a single OpenRouter body cache point on the tail when there is no dynamic tail', async () => {
|
|
690
685
|
const ctx = createBasicContext({
|
|
691
686
|
agentConfig: {
|
|
692
687
|
provider: Providers.OPENROUTER,
|
|
@@ -702,9 +697,8 @@ describe('AgentContext', () => {
|
|
|
702
697
|
new HumanMessage('First'),
|
|
703
698
|
new HumanMessage('Second'),
|
|
704
699
|
]);
|
|
705
|
-
const firstContent = result[1].content as TestSystemContentBlock[];
|
|
706
700
|
const secondContent = result[2].content as TestSystemContentBlock[];
|
|
707
|
-
expect(
|
|
701
|
+
expect(result[1].content).toBe('First');
|
|
708
702
|
expect(secondContent[0]).toHaveProperty('cache_control');
|
|
709
703
|
});
|
|
710
704
|
|
|
@@ -2147,4 +2141,233 @@ describe('AgentContext', () => {
|
|
|
2147
2141
|
expect(ctx.lastCallUsage!.inputTokens).toBe(8005);
|
|
2148
2142
|
});
|
|
2149
2143
|
});
|
|
2144
|
+
|
|
2145
|
+
describe('projectContextUsage', () => {
|
|
2146
|
+
const countByChars = (msg: { content: unknown }): number => {
|
|
2147
|
+
const content =
|
|
2148
|
+
typeof msg.content === 'string'
|
|
2149
|
+
? msg.content
|
|
2150
|
+
: JSON.stringify(msg.content);
|
|
2151
|
+
return content.length;
|
|
2152
|
+
};
|
|
2153
|
+
|
|
2154
|
+
const buildBranch = (
|
|
2155
|
+
maxContextTokens: number,
|
|
2156
|
+
perMessageTokens: number,
|
|
2157
|
+
count: number,
|
|
2158
|
+
): { ctx: AgentContext; messages: AIMessage[] } => {
|
|
2159
|
+
const ctx = createBasicContext({ tokenCounter: countByChars });
|
|
2160
|
+
ctx.maxContextTokens = maxContextTokens;
|
|
2161
|
+
const messages: AIMessage[] = [];
|
|
2162
|
+
for (let i = 0; i < count; i++) {
|
|
2163
|
+
// countByChars counts content length, and projectContextUsage recounts
|
|
2164
|
+
// the supplied messages — so size content to the intended per-msg tokens.
|
|
2165
|
+
const content = 'x'.repeat(perMessageTokens);
|
|
2166
|
+
messages.push(
|
|
2167
|
+
i % 2 === 0
|
|
2168
|
+
? (new HumanMessage(content) as unknown as AIMessage)
|
|
2169
|
+
: new AIMessage(content),
|
|
2170
|
+
);
|
|
2171
|
+
}
|
|
2172
|
+
return { ctx, messages };
|
|
2173
|
+
};
|
|
2174
|
+
|
|
2175
|
+
it('returns null without a tokenizer or a window', () => {
|
|
2176
|
+
const noCounter = createBasicContext({});
|
|
2177
|
+
noCounter.maxContextTokens = 1000;
|
|
2178
|
+
expect(noCounter.projectContextUsage([new HumanMessage('hi')])).toBeNull();
|
|
2179
|
+
|
|
2180
|
+
const noWindow = createBasicContext({ tokenCounter: countByChars });
|
|
2181
|
+
noWindow.maxContextTokens = undefined;
|
|
2182
|
+
expect(noWindow.projectContextUsage([new HumanMessage('hi')])).toBeNull();
|
|
2183
|
+
});
|
|
2184
|
+
|
|
2185
|
+
it('keeps the whole branch and reports headroom when it fits', () => {
|
|
2186
|
+
const { ctx, messages } = buildBranch(100_000, 1_000, 4);
|
|
2187
|
+
const usage = ctx.projectContextUsage(messages);
|
|
2188
|
+
|
|
2189
|
+
expect(usage).not.toBeNull();
|
|
2190
|
+
expect(usage!.breakdown.messageCount).toBe(4);
|
|
2191
|
+
expect(usage!.breakdown.maxContextTokens).toBe(100_000);
|
|
2192
|
+
expect(usage!.remainingContextTokens).toBeGreaterThan(0);
|
|
2193
|
+
expect(usage!.breakdown.messageTokens).toBeGreaterThan(0);
|
|
2194
|
+
|
|
2195
|
+
const max = usage!.contextBudget ?? usage!.breakdown.maxContextTokens;
|
|
2196
|
+
const used = max - (usage!.remainingContextTokens ?? 0);
|
|
2197
|
+
expect(used).toBeLessThanOrEqual(max);
|
|
2198
|
+
});
|
|
2199
|
+
|
|
2200
|
+
it('prunes older messages when the branch exceeds the window', () => {
|
|
2201
|
+
const { ctx, messages } = buildBranch(3_000, 1_000, 6);
|
|
2202
|
+
const usage = ctx.projectContextUsage(messages);
|
|
2203
|
+
|
|
2204
|
+
expect(usage).not.toBeNull();
|
|
2205
|
+
expect(usage!.breakdown.messageCount).toBeGreaterThan(0);
|
|
2206
|
+
expect(usage!.breakdown.messageCount).toBeLessThan(6);
|
|
2207
|
+
expect(usage!.remainingContextTokens).toBeGreaterThanOrEqual(0);
|
|
2208
|
+
|
|
2209
|
+
const max = usage!.contextBudget ?? usage!.breakdown.maxContextTokens;
|
|
2210
|
+
expect(max - (usage!.remainingContextTokens ?? 0)).toBeLessThanOrEqual(max);
|
|
2211
|
+
});
|
|
2212
|
+
|
|
2213
|
+
it('does not mutate the context (local pruner, no field writes)', () => {
|
|
2214
|
+
const { ctx, messages } = buildBranch(3_000, 1_000, 6);
|
|
2215
|
+
const mapBefore = { ...ctx.indexTokenCountMap };
|
|
2216
|
+
|
|
2217
|
+
expect(ctx.pruneMessages).toBeUndefined();
|
|
2218
|
+
ctx.projectContextUsage(messages);
|
|
2219
|
+
|
|
2220
|
+
expect(ctx.pruneMessages).toBeUndefined();
|
|
2221
|
+
expect(ctx.indexTokenCountMap).toEqual(mapBefore);
|
|
2222
|
+
});
|
|
2223
|
+
|
|
2224
|
+
it('does not mutate the caller messages under context pressure', () => {
|
|
2225
|
+
const ctx = createBasicContext({ tokenCounter: countByChars });
|
|
2226
|
+
ctx.maxContextTokens = 400;
|
|
2227
|
+
const consumed = new ToolMessage({
|
|
2228
|
+
content: 'x'.repeat(20_000),
|
|
2229
|
+
tool_call_id: 't1',
|
|
2230
|
+
name: 'tool',
|
|
2231
|
+
});
|
|
2232
|
+
const messages: AIMessage[] = [
|
|
2233
|
+
new HumanMessage('question') as unknown as AIMessage,
|
|
2234
|
+
new AIMessage({
|
|
2235
|
+
content: '',
|
|
2236
|
+
tool_calls: [{ id: 't1', name: 'tool', args: {} }],
|
|
2237
|
+
}),
|
|
2238
|
+
consumed as unknown as AIMessage,
|
|
2239
|
+
new AIMessage('final answer'),
|
|
2240
|
+
];
|
|
2241
|
+
const originalRef = messages[2];
|
|
2242
|
+
const originalContent = (messages[2] as unknown as ToolMessage).content;
|
|
2243
|
+
|
|
2244
|
+
ctx.projectContextUsage(messages);
|
|
2245
|
+
|
|
2246
|
+
expect(messages[2]).toBe(originalRef);
|
|
2247
|
+
expect((messages[2] as unknown as ToolMessage).content).toBe(
|
|
2248
|
+
originalContent,
|
|
2249
|
+
);
|
|
2250
|
+
});
|
|
2251
|
+
|
|
2252
|
+
it('recounts the supplied branch, ignoring a stale context token map', () => {
|
|
2253
|
+
const ctx = createBasicContext({ tokenCounter: countByChars });
|
|
2254
|
+
ctx.maxContextTokens = 3_000;
|
|
2255
|
+
// Empty/stale map — if it were reused, every message would count as 0 and
|
|
2256
|
+
// nothing would prune. The fresh recount must drive pruning instead.
|
|
2257
|
+
ctx.indexTokenCountMap = {};
|
|
2258
|
+
const messages: AIMessage[] = [];
|
|
2259
|
+
for (let i = 0; i < 6; i++) {
|
|
2260
|
+
messages.push(new HumanMessage('x'.repeat(1_000)) as unknown as AIMessage);
|
|
2261
|
+
}
|
|
2262
|
+
|
|
2263
|
+
const usage = ctx.projectContextUsage(messages);
|
|
2264
|
+
|
|
2265
|
+
expect(usage).not.toBeNull();
|
|
2266
|
+
expect(usage!.breakdown.messageCount).toBeLessThan(6);
|
|
2267
|
+
});
|
|
2268
|
+
|
|
2269
|
+
it('uses a caller-supplied token map when provided', () => {
|
|
2270
|
+
const { ctx, messages } = buildBranch(3_000, 1, 6);
|
|
2271
|
+
// Each message is ~1 char, so a recount would fit all 6. The supplied map
|
|
2272
|
+
// claims 1000 each, forcing a prune — proving the map is honored.
|
|
2273
|
+
const indexTokenCountMap: Record<string, number> = {};
|
|
2274
|
+
for (let i = 0; i < messages.length; i++) {
|
|
2275
|
+
indexTokenCountMap[String(i)] = 1_000;
|
|
2276
|
+
}
|
|
2277
|
+
|
|
2278
|
+
const usage = ctx.projectContextUsage(messages, { indexTokenCountMap });
|
|
2279
|
+
|
|
2280
|
+
expect(usage!.breakdown.messageCount).toBeLessThan(6);
|
|
2281
|
+
});
|
|
2282
|
+
|
|
2283
|
+
it('ignores this context live usage so projections are not recalibrated', () => {
|
|
2284
|
+
const build = (): { ctx: AgentContext; messages: AIMessage[] } => {
|
|
2285
|
+
const ctx = createBasicContext({ tokenCounter: countByChars });
|
|
2286
|
+
ctx.maxContextTokens = 5_000;
|
|
2287
|
+
const messages: AIMessage[] = [0, 1, 2].map(
|
|
2288
|
+
() => new HumanMessage('x'.repeat(1_000)) as unknown as AIMessage,
|
|
2289
|
+
);
|
|
2290
|
+
return { ctx, messages };
|
|
2291
|
+
};
|
|
2292
|
+
|
|
2293
|
+
const clean = build();
|
|
2294
|
+
const cleanUsage = clean.ctx.projectContextUsage(clean.messages);
|
|
2295
|
+
|
|
2296
|
+
const dirty = build();
|
|
2297
|
+
dirty.ctx.currentUsage = {
|
|
2298
|
+
input_tokens: 4_000,
|
|
2299
|
+
output_tokens: 50,
|
|
2300
|
+
total_tokens: 4_050,
|
|
2301
|
+
};
|
|
2302
|
+
dirty.ctx.updateLastCallUsage({ input_tokens: 4_000, output_tokens: 50 });
|
|
2303
|
+
const dirtyUsage = dirty.ctx.projectContextUsage(dirty.messages);
|
|
2304
|
+
|
|
2305
|
+
expect(dirtyUsage!.remainingContextTokens).toBe(
|
|
2306
|
+
cleanUsage!.remainingContextTokens,
|
|
2307
|
+
);
|
|
2308
|
+
expect(dirtyUsage!.calibrationRatio).toBe(cleanUsage!.calibrationRatio);
|
|
2309
|
+
});
|
|
2310
|
+
|
|
2311
|
+
it('does not mutate AI message content arrays during projection', () => {
|
|
2312
|
+
const ctx = createBasicContext({
|
|
2313
|
+
agentConfig: {
|
|
2314
|
+
provider: Providers.ANTHROPIC,
|
|
2315
|
+
clientOptions: {
|
|
2316
|
+
model: 'claude-x',
|
|
2317
|
+
thinking: { type: 'enabled', budget_tokens: 1024 },
|
|
2318
|
+
} as never,
|
|
2319
|
+
},
|
|
2320
|
+
tokenCounter: countByChars,
|
|
2321
|
+
});
|
|
2322
|
+
ctx.maxContextTokens = 2_000;
|
|
2323
|
+
const aiContent = [
|
|
2324
|
+
{ type: 'thinking', thinking: 'step by step', signature: 'sig' },
|
|
2325
|
+
{ type: 'text', text: 'the answer' },
|
|
2326
|
+
];
|
|
2327
|
+
const ai = new AIMessage({ content: aiContent as never });
|
|
2328
|
+
const messages: AIMessage[] = [
|
|
2329
|
+
new HumanMessage('question') as unknown as AIMessage,
|
|
2330
|
+
ai,
|
|
2331
|
+
new HumanMessage('another') as unknown as AIMessage,
|
|
2332
|
+
];
|
|
2333
|
+
const contentRef = ai.content;
|
|
2334
|
+
const lenBefore = (ai.content as unknown[]).length;
|
|
2335
|
+
|
|
2336
|
+
ctx.projectContextUsage(messages);
|
|
2337
|
+
|
|
2338
|
+
expect(messages[1].content).toBe(contentRef);
|
|
2339
|
+
expect((messages[1].content as unknown[]).length).toBe(lenBefore);
|
|
2340
|
+
});
|
|
2341
|
+
|
|
2342
|
+
it('honors an explicit calibrationRatio seed', () => {
|
|
2343
|
+
const base = buildBranch(100_000, 1_000, 4);
|
|
2344
|
+
const baseUsage = base.ctx.projectContextUsage(base.messages);
|
|
2345
|
+
|
|
2346
|
+
const scaled = buildBranch(100_000, 1_000, 4);
|
|
2347
|
+
const scaledUsage = scaled.ctx.projectContextUsage(scaled.messages, {
|
|
2348
|
+
calibrationRatio: 3,
|
|
2349
|
+
});
|
|
2350
|
+
|
|
2351
|
+
expect(scaledUsage!.calibrationRatio).toBe(3);
|
|
2352
|
+
expect(scaledUsage!.remainingContextTokens).not.toBe(
|
|
2353
|
+
baseUsage!.remainingContextTokens,
|
|
2354
|
+
);
|
|
2355
|
+
});
|
|
2356
|
+
|
|
2357
|
+
it('refreshes a stale system runnable before projecting', () => {
|
|
2358
|
+
const ctx = createBasicContext({
|
|
2359
|
+
agentConfig: { instructions: 'system prompt' },
|
|
2360
|
+
tokenCounter: countByChars,
|
|
2361
|
+
});
|
|
2362
|
+
ctx.maxContextTokens = 5_000;
|
|
2363
|
+
ctx.initializeSystemRunnable();
|
|
2364
|
+
const systemBefore = ctx.systemMessageTokens;
|
|
2365
|
+
|
|
2366
|
+
// Adds a handoff preamble + marks stale, but defers the token recount.
|
|
2367
|
+
ctx.setHandoffContext('PriorAgent', ['SiblingA', 'SiblingB']);
|
|
2368
|
+
ctx.projectContextUsage([new HumanMessage('hi') as unknown as AIMessage]);
|
|
2369
|
+
|
|
2370
|
+
expect(ctx.systemMessageTokens).toBeGreaterThan(systemBefore);
|
|
2371
|
+
});
|
|
2372
|
+
});
|
|
2150
2373
|
});
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import { AIMessage, HumanMessage } from '@langchain/core/messages';
|
|
2
|
+
import type * as t from '@/types';
|
|
3
|
+
import { Providers } from '@/common';
|
|
4
|
+
import { projectAgentContextUsage } from '../projection';
|
|
5
|
+
|
|
6
|
+
const countByChars = (msg: { content: unknown }): number => {
|
|
7
|
+
const content =
|
|
8
|
+
typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content);
|
|
9
|
+
return content.length;
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
const agent = (maxContextTokens: number): t.AgentInputs => ({
|
|
13
|
+
agentId: 'test-agent',
|
|
14
|
+
provider: Providers.OPENAI,
|
|
15
|
+
instructions: 'system prompt',
|
|
16
|
+
maxContextTokens,
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
const branch = (perMessageChars: number, count: number): AIMessage[] => {
|
|
20
|
+
const messages: AIMessage[] = [];
|
|
21
|
+
for (let i = 0; i < count; i++) {
|
|
22
|
+
const content = 'x'.repeat(perMessageChars);
|
|
23
|
+
messages.push(
|
|
24
|
+
i % 2 === 0
|
|
25
|
+
? (new HumanMessage(content) as unknown as AIMessage)
|
|
26
|
+
: new AIMessage(content),
|
|
27
|
+
);
|
|
28
|
+
}
|
|
29
|
+
return messages;
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
describe('projectAgentContextUsage', () => {
|
|
33
|
+
it('returns a budget snapshot for a branch that fits', async () => {
|
|
34
|
+
const usage = await projectAgentContextUsage({
|
|
35
|
+
agent: agent(100_000),
|
|
36
|
+
messages: branch(1_000, 4),
|
|
37
|
+
tokenCounter: countByChars,
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
expect(usage).not.toBeNull();
|
|
41
|
+
expect(usage!.breakdown.maxContextTokens).toBe(100_000);
|
|
42
|
+
expect(usage!.breakdown.messageCount).toBe(4);
|
|
43
|
+
expect(usage!.remainingContextTokens).toBeGreaterThan(0);
|
|
44
|
+
expect(usage!.agentId).toBe('test-agent');
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
it('prunes when the branch exceeds the window', async () => {
|
|
48
|
+
const usage = await projectAgentContextUsage({
|
|
49
|
+
agent: agent(3_000),
|
|
50
|
+
messages: branch(1_000, 6),
|
|
51
|
+
tokenCounter: countByChars,
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
expect(usage).not.toBeNull();
|
|
55
|
+
expect(usage!.breakdown.messageCount).toBeGreaterThan(0);
|
|
56
|
+
expect(usage!.breakdown.messageCount).toBeLessThan(6);
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
it('returns null without a context window', async () => {
|
|
60
|
+
const noWindow: t.AgentInputs = {
|
|
61
|
+
agentId: 'test-agent',
|
|
62
|
+
provider: Providers.OPENAI,
|
|
63
|
+
instructions: 'sys',
|
|
64
|
+
};
|
|
65
|
+
const usage = await projectAgentContextUsage({
|
|
66
|
+
agent: noWindow,
|
|
67
|
+
messages: branch(100, 2),
|
|
68
|
+
tokenCounter: countByChars,
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
expect(usage).toBeNull();
|
|
72
|
+
});
|
|
73
|
+
});
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import type { BaseMessage } from '@langchain/core/messages';
|
|
2
|
+
import type * as t from '@/types';
|
|
3
|
+
import { AgentContext } from './AgentContext';
|
|
4
|
+
|
|
5
|
+
export interface ProjectAgentContextUsageParams {
|
|
6
|
+
/** Same `AgentInputs` a run is built from (instructions, tools, model, window). */
|
|
7
|
+
agent: t.AgentInputs;
|
|
8
|
+
/** Branch messages to project, in send order (no leading system message). */
|
|
9
|
+
messages: BaseMessage[];
|
|
10
|
+
tokenCounter: t.TokenCounter;
|
|
11
|
+
/** Per-message counts aligned to `messages` (e.g. from `formatAgentMessages`).
|
|
12
|
+
* When omitted, counts are recounted via `tokenCounter`. */
|
|
13
|
+
indexTokenCountMap?: Record<string, number>;
|
|
14
|
+
/** Provider-calibrated ratio from a prior snapshot, applied as a static seed. */
|
|
15
|
+
calibrationRatio?: number;
|
|
16
|
+
runId?: string;
|
|
17
|
+
agentId?: string;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Projects a pre-send context-usage snapshot for a branch under an agent config
|
|
22
|
+
* WITHOUT invoking the model — the host-side (page-load / branch-switch /
|
|
23
|
+
* window-switch) counterpart to the live `ON_CONTEXT_USAGE` event. Builds a
|
|
24
|
+
* throwaway `AgentContext` from the same `AgentInputs` a run uses, awaits its
|
|
25
|
+
* instruction/tool token accounting, then runs the shared pruner + budget math
|
|
26
|
+
* via `AgentContext.projectContextUsage` (which never mutates the supplied
|
|
27
|
+
* messages). Returns null when the config has no tokenizer or context window.
|
|
28
|
+
*/
|
|
29
|
+
export async function projectAgentContextUsage({
|
|
30
|
+
agent,
|
|
31
|
+
messages,
|
|
32
|
+
tokenCounter,
|
|
33
|
+
indexTokenCountMap,
|
|
34
|
+
calibrationRatio,
|
|
35
|
+
runId,
|
|
36
|
+
agentId,
|
|
37
|
+
}: ProjectAgentContextUsageParams): Promise<t.ContextUsageEvent | null> {
|
|
38
|
+
const context = AgentContext.fromConfig(agent, tokenCounter, indexTokenCountMap);
|
|
39
|
+
await context.tokenCalculationPromise;
|
|
40
|
+
return context.projectContextUsage(messages, {
|
|
41
|
+
runId,
|
|
42
|
+
agentId: agentId ?? agent.agentId,
|
|
43
|
+
calibrationRatio,
|
|
44
|
+
indexTokenCountMap,
|
|
45
|
+
});
|
|
46
|
+
}
|
package/src/graphs/Graph.ts
CHANGED
|
@@ -19,13 +19,14 @@ import {
|
|
|
19
19
|
convertMessagesToContent,
|
|
20
20
|
sanitizeOrphanToolBlocks,
|
|
21
21
|
extractToolDiscoveries,
|
|
22
|
-
|
|
22
|
+
addBedrockTailCacheControl,
|
|
23
23
|
formatArtifactPayload,
|
|
24
24
|
enforceOriginalContentCap,
|
|
25
25
|
formatContentStrings,
|
|
26
26
|
isLegacyConvertible,
|
|
27
27
|
createPruneMessages,
|
|
28
|
-
|
|
28
|
+
syncBudgetDerivedFields,
|
|
29
|
+
addTailCacheControl,
|
|
29
30
|
getMessageId,
|
|
30
31
|
makeIsDeferred,
|
|
31
32
|
partitionAndMarkAnthropicToolCache,
|
|
@@ -111,35 +112,6 @@ function trailingMutationStart(messages: BaseMessage[]): number {
|
|
|
111
112
|
return Math.max(0, Math.min(index, messages.length - 2));
|
|
112
113
|
}
|
|
113
114
|
|
|
114
|
-
/**
|
|
115
|
-
* Re-derives the breakdown fields coupled to the calibrated budget math so
|
|
116
|
-
* the snapshot stays internally consistent: the aggregate
|
|
117
|
-
* `instructionTokens`/`availableForMessages` reflect the pruner's effective
|
|
118
|
-
* (calibrated) overhead — component fields remain local estimates — and
|
|
119
|
-
* `messageTokens` mirrors `contextBudget - instructions - remaining`.
|
|
120
|
-
*/
|
|
121
|
-
function syncBudgetDerivedFields(usage: t.ContextUsageEvent): void {
|
|
122
|
-
const { breakdown, contextBudget, effectiveInstructionTokens } = usage;
|
|
123
|
-
if (effectiveInstructionTokens == null) {
|
|
124
|
-
return;
|
|
125
|
-
}
|
|
126
|
-
breakdown.instructionTokens = effectiveInstructionTokens;
|
|
127
|
-
if (contextBudget == null) {
|
|
128
|
-
return;
|
|
129
|
-
}
|
|
130
|
-
breakdown.availableForMessages = Math.max(
|
|
131
|
-
0,
|
|
132
|
-
contextBudget - effectiveInstructionTokens
|
|
133
|
-
);
|
|
134
|
-
if (usage.remainingContextTokens == null) {
|
|
135
|
-
return;
|
|
136
|
-
}
|
|
137
|
-
breakdown.messageTokens = Math.max(
|
|
138
|
-
0,
|
|
139
|
-
contextBudget - effectiveInstructionTokens - usage.remainingContextTokens
|
|
140
|
-
);
|
|
141
|
-
}
|
|
142
|
-
|
|
143
115
|
type ReasoningKey = 'reasoning_content' | 'reasoning';
|
|
144
116
|
type ReasoningSummary = { summary?: Array<{ text?: string }> };
|
|
145
117
|
type ReasoningDetail = { type?: string; text?: string };
|
|
@@ -1761,35 +1733,6 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1761
1733
|
}
|
|
1762
1734
|
}
|
|
1763
1735
|
|
|
1764
|
-
if (agentContext.provider === Providers.ANTHROPIC) {
|
|
1765
|
-
const anthropicOptions = agentContext.clientOptions as
|
|
1766
|
-
| t.AnthropicClientOptions
|
|
1767
|
-
| undefined;
|
|
1768
|
-
if (
|
|
1769
|
-
anthropicOptions?.promptCache === true &&
|
|
1770
|
-
!agentContext.systemRunnable
|
|
1771
|
-
) {
|
|
1772
|
-
finalMessages = addCacheControl<BaseMessage>(finalMessages);
|
|
1773
|
-
}
|
|
1774
|
-
} else if (agentContext.provider === Providers.BEDROCK) {
|
|
1775
|
-
const bedrockOptions = agentContext.clientOptions as
|
|
1776
|
-
| t.BedrockAnthropicClientOptions
|
|
1777
|
-
| undefined;
|
|
1778
|
-
if (bedrockOptions?.promptCache === true) {
|
|
1779
|
-
finalMessages = addBedrockCacheControl<BaseMessage>(finalMessages);
|
|
1780
|
-
}
|
|
1781
|
-
} else if (agentContext.provider === Providers.OPENROUTER) {
|
|
1782
|
-
const openRouterOptions = agentContext.clientOptions as
|
|
1783
|
-
| t.ProviderOptionsMap[Providers.OPENROUTER]
|
|
1784
|
-
| undefined;
|
|
1785
|
-
if (
|
|
1786
|
-
openRouterOptions?.promptCache === true &&
|
|
1787
|
-
!agentContext.systemRunnable
|
|
1788
|
-
) {
|
|
1789
|
-
finalMessages = addCacheControl<BaseMessage>(finalMessages);
|
|
1790
|
-
}
|
|
1791
|
-
}
|
|
1792
|
-
|
|
1793
1736
|
if (
|
|
1794
1737
|
isThinkingEnabled(agentContext.provider, agentContext.clientOptions)
|
|
1795
1738
|
) {
|
|
@@ -1811,13 +1754,53 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1811
1754
|
);
|
|
1812
1755
|
}
|
|
1813
1756
|
|
|
1814
|
-
//
|
|
1815
|
-
//
|
|
1816
|
-
//
|
|
1817
|
-
//
|
|
1757
|
+
// Determine the prompt-cache strategy up front. Two distinct facts:
|
|
1758
|
+
//
|
|
1759
|
+
// `providerPromptCacheEnabled` — prompt caching is on for this provider
|
|
1760
|
+
// at all. This drives orphan cleanup, because EVERY cached send must be
|
|
1761
|
+
// sanitized — including the system-runnable path, where AgentContext (not
|
|
1762
|
+
// this node) adds the body marker.
|
|
1763
|
+
//
|
|
1764
|
+
// `willAddTailCache` — THIS node will add the marker itself. Anthropic /
|
|
1765
|
+
// OpenRouter defer to the system runnable when one owns the system-prompt
|
|
1766
|
+
// breakpoint, so they exclude that case; Bedrock always marks here.
|
|
1767
|
+
const anthropicPromptCacheEnabled =
|
|
1768
|
+
agentContext.provider === Providers.ANTHROPIC &&
|
|
1769
|
+
(agentContext.clientOptions as t.AnthropicClientOptions | undefined)
|
|
1770
|
+
?.promptCache === true;
|
|
1771
|
+
const openRouterPromptCacheEnabled =
|
|
1772
|
+
agentContext.provider === Providers.OPENROUTER &&
|
|
1773
|
+
(
|
|
1774
|
+
agentContext.clientOptions as
|
|
1775
|
+
| t.ProviderOptionsMap[Providers.OPENROUTER]
|
|
1776
|
+
| undefined
|
|
1777
|
+
)?.promptCache === true;
|
|
1778
|
+
const bedrockPromptCacheEnabled =
|
|
1779
|
+
agentContext.provider === Providers.BEDROCK &&
|
|
1780
|
+
(
|
|
1781
|
+
agentContext.clientOptions as
|
|
1782
|
+
| t.BedrockAnthropicClientOptions
|
|
1783
|
+
| undefined
|
|
1784
|
+
)?.promptCache === true;
|
|
1785
|
+
const providerPromptCacheEnabled =
|
|
1786
|
+
anthropicPromptCacheEnabled ||
|
|
1787
|
+
openRouterPromptCacheEnabled ||
|
|
1788
|
+
bedrockPromptCacheEnabled;
|
|
1789
|
+
|
|
1790
|
+
// Intentionally broad: runs when the pruner wasn't used, when any
|
|
1791
|
+
// post-pruning transform (ensureThinkingBlock, etc.) reassigned
|
|
1792
|
+
// finalMessages, OR when this is a prompt-cached send. The last clause
|
|
1793
|
+
// matters because the marker is now applied AFTER this gate (and, for the
|
|
1794
|
+
// system-runnable path, in AgentContext entirely): without it, a cached
|
|
1795
|
+
// send whose pruner returned the context unchanged would skip cleanup and
|
|
1796
|
+
// could ship orphaned AI/tool pairs from persisted history.
|
|
1797
|
+
// sanitizeOrphanToolBlocks fast-paths to a Set diff check when no orphans
|
|
1798
|
+
// exist, so the cost is negligible.
|
|
1818
1799
|
const needsOrphanSanitize =
|
|
1819
1800
|
anthropicLike &&
|
|
1820
|
-
(!agentContext.pruneMessages ||
|
|
1801
|
+
(!agentContext.pruneMessages ||
|
|
1802
|
+
finalMessages !== messagesToUse ||
|
|
1803
|
+
providerPromptCacheEnabled);
|
|
1821
1804
|
if (needsOrphanSanitize) {
|
|
1822
1805
|
const beforeSanitize = finalMessages.length;
|
|
1823
1806
|
finalMessages = sanitizeOrphanToolBlocks(finalMessages);
|
|
@@ -1837,6 +1820,24 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1837
1820
|
}
|
|
1838
1821
|
}
|
|
1839
1822
|
|
|
1823
|
+
// Place the single tail prompt-cache breakpoint LAST, after thinking
|
|
1824
|
+
// normalization and orphan sanitization. ensureThinkingBlockInMessages can
|
|
1825
|
+
// fold a trailing non-thinking AI→Tool chain into a `[Previous agent
|
|
1826
|
+
// context]` HumanMessage whose builder copies text but not cache_control /
|
|
1827
|
+
// cachePoint, and sanitizeOrphanToolBlocks can drop the anchored block — so
|
|
1828
|
+
// marking earlier would let the only breakpoint vanish before the model
|
|
1829
|
+
// call (zero message caching). Anchoring on the final message list keeps
|
|
1830
|
+
// the marker on a block that actually ships. The system-runnable path
|
|
1831
|
+
// adds its body marker in AgentContext, so this node skips it there.
|
|
1832
|
+
if (
|
|
1833
|
+
(anthropicPromptCacheEnabled || openRouterPromptCacheEnabled) &&
|
|
1834
|
+
!agentContext.systemRunnable
|
|
1835
|
+
) {
|
|
1836
|
+
finalMessages = addTailCacheControl<BaseMessage>(finalMessages);
|
|
1837
|
+
} else if (bedrockPromptCacheEnabled) {
|
|
1838
|
+
finalMessages = addBedrockTailCacheControl<BaseMessage>(finalMessages);
|
|
1839
|
+
}
|
|
1840
|
+
|
|
1840
1841
|
if (
|
|
1841
1842
|
agentContext.lastStreamCall != null &&
|
|
1842
1843
|
agentContext.streamBuffer != null
|
package/src/index.ts
CHANGED
package/src/langfuse.ts
CHANGED
|
@@ -11,6 +11,7 @@ const TRACE_METADATA_MAX_LENGTH = 200;
|
|
|
11
11
|
const LANGFUSE_FORCE_FLUSH_ON_DISPOSE = 'LANGFUSE_FORCE_FLUSH_ON_DISPOSE';
|
|
12
12
|
|
|
13
13
|
export type LangfuseTraceMetadata = Record<string, string>;
|
|
14
|
+
type LangfuseMetadata = NonNullable<t.LangfuseConfig['metadata']>;
|
|
14
15
|
|
|
15
16
|
type LangfuseHandlerParams = {
|
|
16
17
|
userId?: string;
|
|
@@ -44,6 +45,13 @@ function hasLangfuseTracingConfig(langfuse?: t.LangfuseConfig): boolean {
|
|
|
44
45
|
);
|
|
45
46
|
}
|
|
46
47
|
|
|
48
|
+
function hasLangfuseTraceAttributes(langfuse?: t.LangfuseConfig): boolean {
|
|
49
|
+
return (
|
|
50
|
+
Object.keys(createTraceMetadata(langfuse?.metadata ?? {})).length > 0 ||
|
|
51
|
+
(mergeLangfuseTags(undefined, langfuse?.tags)?.length ?? 0) > 0
|
|
52
|
+
);
|
|
53
|
+
}
|
|
54
|
+
|
|
47
55
|
export function hasLangfuseConfigCredentials(
|
|
48
56
|
langfuse?: t.LangfuseConfig
|
|
49
57
|
): langfuse is t.LangfuseConfig & {
|
|
@@ -67,6 +75,7 @@ export function isExplicitLangfuseConfig(langfuse?: t.LangfuseConfig): boolean {
|
|
|
67
75
|
isPresent(langfuse?.publicKey) ||
|
|
68
76
|
isPresent(langfuse?.secretKey) ||
|
|
69
77
|
isPresent(langfuse?.baseUrl) ||
|
|
78
|
+
hasLangfuseTraceAttributes(langfuse) ||
|
|
70
79
|
hasLangfuseTracingConfig(langfuse)
|
|
71
80
|
);
|
|
72
81
|
}
|
|
@@ -110,6 +119,27 @@ export function createLangfuseTraceMetadata({
|
|
|
110
119
|
});
|
|
111
120
|
}
|
|
112
121
|
|
|
122
|
+
function mergeLangfuseTraceMetadata(
|
|
123
|
+
traceMetadata?: LangfuseTraceMetadata,
|
|
124
|
+
metadata?: LangfuseMetadata
|
|
125
|
+
): LangfuseTraceMetadata | undefined {
|
|
126
|
+
const merged = createTraceMetadata({
|
|
127
|
+
...(metadata ?? {}),
|
|
128
|
+
...(traceMetadata ?? {}),
|
|
129
|
+
});
|
|
130
|
+
return Object.keys(merged).length > 0 ? merged : undefined;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
function mergeLangfuseTags(
|
|
134
|
+
tags?: string[],
|
|
135
|
+
configTags?: string[]
|
|
136
|
+
): string[] | undefined {
|
|
137
|
+
const merged = [...(tags ?? []), ...(configTags ?? [])].filter(
|
|
138
|
+
(tag) => tag.trim() !== ''
|
|
139
|
+
);
|
|
140
|
+
return merged.length > 0 ? [...new Set(merged)] : undefined;
|
|
141
|
+
}
|
|
142
|
+
|
|
113
143
|
export function getLangfuseTraceName(
|
|
114
144
|
traceMetadata?: LangfuseTraceMetadata,
|
|
115
145
|
fallback: string = 'LibreChat Agent'
|
|
@@ -161,12 +191,16 @@ export function createLangfuseHandler({
|
|
|
161
191
|
return new CallbackHandler({
|
|
162
192
|
userId,
|
|
163
193
|
sessionId,
|
|
164
|
-
traceMetadata
|
|
165
|
-
|
|
194
|
+
traceMetadata: mergeLangfuseTraceMetadata(
|
|
195
|
+
traceMetadata,
|
|
196
|
+
langfuse?.metadata
|
|
197
|
+
),
|
|
198
|
+
tags: mergeLangfuseTags(tags, langfuse?.tags),
|
|
166
199
|
});
|
|
167
200
|
}
|
|
168
201
|
|
|
169
202
|
function createPropagateAttributeParams({
|
|
203
|
+
langfuse,
|
|
170
204
|
userId,
|
|
171
205
|
sessionId,
|
|
172
206
|
traceMetadata,
|
|
@@ -177,8 +211,8 @@ function createPropagateAttributeParams({
|
|
|
177
211
|
userId,
|
|
178
212
|
sessionId,
|
|
179
213
|
traceName,
|
|
180
|
-
tags,
|
|
181
|
-
metadata: traceMetadata,
|
|
214
|
+
tags: mergeLangfuseTags(tags, langfuse?.tags),
|
|
215
|
+
metadata: mergeLangfuseTraceMetadata(traceMetadata, langfuse?.metadata),
|
|
182
216
|
};
|
|
183
217
|
}
|
|
184
218
|
|
|
@@ -692,10 +692,28 @@ export function resolveLangfuseConfig(
|
|
|
692
692
|
...agentLangfuse.toolOutputTracing,
|
|
693
693
|
}
|
|
694
694
|
: undefined;
|
|
695
|
+
const metadata =
|
|
696
|
+
runLangfuse.metadata != null || agentLangfuse.metadata != null
|
|
697
|
+
? {
|
|
698
|
+
...runLangfuse.metadata,
|
|
699
|
+
...agentLangfuse.metadata,
|
|
700
|
+
}
|
|
701
|
+
: undefined;
|
|
702
|
+
const tags =
|
|
703
|
+
runLangfuse.tags != null || agentLangfuse.tags != null
|
|
704
|
+
? [
|
|
705
|
+
...new Set([
|
|
706
|
+
...(runLangfuse.tags ?? []),
|
|
707
|
+
...(agentLangfuse.tags ?? []),
|
|
708
|
+
]),
|
|
709
|
+
]
|
|
710
|
+
: undefined;
|
|
695
711
|
|
|
696
712
|
return {
|
|
697
713
|
...runLangfuse,
|
|
698
714
|
...agentLangfuse,
|
|
715
|
+
...(metadata != null ? { metadata } : {}),
|
|
716
|
+
...(tags != null ? { tags } : {}),
|
|
699
717
|
...(toolNodeTracing != null ? { toolNodeTracing } : {}),
|
|
700
718
|
...(toolOutputTracing != null ? { toolOutputTracing } : {}),
|
|
701
719
|
};
|