@librechat/agents 3.2.35 → 3.2.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +75 -2
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/agents/projection.cjs +25 -0
  4. package/dist/cjs/agents/projection.cjs.map +1 -0
  5. package/dist/cjs/graphs/Graph.cjs +10 -26
  6. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  7. package/dist/cjs/langfuse.cjs +16 -5
  8. package/dist/cjs/langfuse.cjs.map +1 -1
  9. package/dist/cjs/langfuseToolOutputTracing.cjs +7 -0
  10. package/dist/cjs/langfuseToolOutputTracing.cjs.map +1 -1
  11. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +118 -7
  12. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
  13. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +44 -4
  14. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
  15. package/dist/cjs/main.cjs +7 -0
  16. package/dist/cjs/messages/budget.cjs +23 -0
  17. package/dist/cjs/messages/budget.cjs.map +1 -0
  18. package/dist/cjs/messages/cache.cjs +184 -0
  19. package/dist/cjs/messages/cache.cjs.map +1 -1
  20. package/dist/cjs/messages/index.cjs +1 -0
  21. package/dist/cjs/summarization/node.cjs +1 -1
  22. package/dist/cjs/summarization/node.cjs.map +1 -1
  23. package/dist/cjs/tools/search/format.cjs +91 -2
  24. package/dist/cjs/tools/search/format.cjs.map +1 -1
  25. package/dist/cjs/tools/search/tool.cjs +4 -3
  26. package/dist/cjs/tools/search/tool.cjs.map +1 -1
  27. package/dist/cjs/tools/toolOutputReferences.cjs +28 -14
  28. package/dist/cjs/tools/toolOutputReferences.cjs.map +1 -1
  29. package/dist/esm/agents/AgentContext.mjs +76 -3
  30. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  31. package/dist/esm/agents/projection.mjs +25 -0
  32. package/dist/esm/agents/projection.mjs.map +1 -0
  33. package/dist/esm/graphs/Graph.mjs +9 -25
  34. package/dist/esm/graphs/Graph.mjs.map +1 -1
  35. package/dist/esm/langfuse.mjs +16 -5
  36. package/dist/esm/langfuse.mjs.map +1 -1
  37. package/dist/esm/langfuseToolOutputTracing.mjs +7 -0
  38. package/dist/esm/langfuseToolOutputTracing.mjs.map +1 -1
  39. package/dist/esm/llm/anthropic/utils/message_inputs.mjs +118 -7
  40. package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
  41. package/dist/esm/llm/bedrock/utils/message_inputs.mjs +44 -4
  42. package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
  43. package/dist/esm/main.mjs +4 -2
  44. package/dist/esm/messages/budget.mjs +23 -0
  45. package/dist/esm/messages/budget.mjs.map +1 -0
  46. package/dist/esm/messages/cache.mjs +182 -1
  47. package/dist/esm/messages/cache.mjs.map +1 -1
  48. package/dist/esm/messages/index.mjs +1 -0
  49. package/dist/esm/summarization/node.mjs +2 -2
  50. package/dist/esm/summarization/node.mjs.map +1 -1
  51. package/dist/esm/tools/search/format.mjs +91 -2
  52. package/dist/esm/tools/search/format.mjs.map +1 -1
  53. package/dist/esm/tools/search/tool.mjs +4 -3
  54. package/dist/esm/tools/search/tool.mjs.map +1 -1
  55. package/dist/esm/tools/toolOutputReferences.mjs +28 -14
  56. package/dist/esm/tools/toolOutputReferences.mjs.map +1 -1
  57. package/dist/types/agents/AgentContext.d.ts +30 -1
  58. package/dist/types/agents/projection.d.ts +26 -0
  59. package/dist/types/index.d.ts +1 -0
  60. package/dist/types/messages/budget.d.ts +11 -0
  61. package/dist/types/messages/cache.d.ts +47 -0
  62. package/dist/types/messages/index.d.ts +1 -0
  63. package/dist/types/tools/search/format.d.ts +4 -1
  64. package/dist/types/tools/search/types.d.ts +7 -0
  65. package/dist/types/types/graph.d.ts +2 -0
  66. package/package.json +2 -1
  67. package/src/agents/AgentContext.ts +105 -4
  68. package/src/agents/__tests__/AgentContext.test.ts +232 -9
  69. package/src/agents/__tests__/projection.test.ts +73 -0
  70. package/src/agents/projection.ts +46 -0
  71. package/src/graphs/Graph.ts +66 -65
  72. package/src/index.ts +3 -0
  73. package/src/langfuse.ts +38 -4
  74. package/src/langfuseToolOutputTracing.ts +18 -0
  75. package/src/llm/anthropic/utils/cross-provider-reasoning.test.ts +317 -0
  76. package/src/llm/anthropic/utils/message_inputs.ts +209 -19
  77. package/src/llm/anthropic/utils/stripPrefillCache.test.ts +111 -0
  78. package/src/llm/bedrock/utils/cross-provider-reasoning.test.ts +131 -0
  79. package/src/llm/bedrock/utils/message_inputs.test.ts +129 -0
  80. package/src/llm/bedrock/utils/message_inputs.ts +81 -4
  81. package/src/llm/bedrock/utils/toolResultCachePoint.test.ts +103 -0
  82. package/src/messages/budget.ts +32 -0
  83. package/src/messages/cache.tail.test.ts +340 -0
  84. package/src/messages/cache.ts +267 -1
  85. package/src/messages/index.ts +1 -0
  86. package/src/messages/tailCacheConversion.test.ts +161 -0
  87. package/src/scripts/bench-prompt-cache.ts +479 -0
  88. package/src/specs/langfuse-config.test.ts +69 -2
  89. package/src/specs/langfuse-metadata.test.ts +44 -0
  90. package/src/specs/langfuse-tool-output-tracing.test.ts +6 -0
  91. package/src/summarization/node.ts +2 -2
  92. package/src/tools/__tests__/annotateMessagesForLLM.test.ts +50 -0
  93. package/src/tools/search/format.test.ts +242 -0
  94. package/src/tools/search/format.ts +122 -5
  95. package/src/tools/search/tool.ts +5 -1
  96. package/src/tools/search/types.ts +7 -0
  97. package/src/tools/toolOutputReferences.ts +34 -20
  98. package/src/types/graph.ts +2 -0
@@ -274,16 +274,11 @@ describe('AgentContext', () => {
274
274
  new HumanMessage('First'),
275
275
  new HumanMessage('Second'),
276
276
  ]);
277
- const firstContent = result[1].content as TestSystemContentBlock[];
278
277
  const secondContent = result[2].content as TestSystemContentBlock[];
279
278
 
280
279
  expect(result).toHaveLength(3);
281
280
  expect(result[0].content).toBe('Dynamic only');
282
- expect(firstContent[0]).toMatchObject({
283
- type: 'text',
284
- text: 'First',
285
- cache_control: { type: 'ephemeral' },
286
- });
281
+ expect(result[1].content).toBe('First');
287
282
  expect(secondContent[0]).toMatchObject({
288
283
  type: 'text',
289
284
  text: 'Second',
@@ -686,7 +681,7 @@ describe('AgentContext', () => {
686
681
  expect(result[8].content).toBe('Now answer without tools');
687
682
  });
688
683
 
689
- it('adds OpenRouter body cache points when there is no dynamic tail', async () => {
684
+ it('adds a single OpenRouter body cache point on the tail when there is no dynamic tail', async () => {
690
685
  const ctx = createBasicContext({
691
686
  agentConfig: {
692
687
  provider: Providers.OPENROUTER,
@@ -702,9 +697,8 @@ describe('AgentContext', () => {
702
697
  new HumanMessage('First'),
703
698
  new HumanMessage('Second'),
704
699
  ]);
705
- const firstContent = result[1].content as TestSystemContentBlock[];
706
700
  const secondContent = result[2].content as TestSystemContentBlock[];
707
- expect(firstContent[0]).toHaveProperty('cache_control');
701
+ expect(result[1].content).toBe('First');
708
702
  expect(secondContent[0]).toHaveProperty('cache_control');
709
703
  });
710
704
 
@@ -2147,4 +2141,233 @@ describe('AgentContext', () => {
2147
2141
  expect(ctx.lastCallUsage!.inputTokens).toBe(8005);
2148
2142
  });
2149
2143
  });
2144
+
2145
+ describe('projectContextUsage', () => {
2146
+ const countByChars = (msg: { content: unknown }): number => {
2147
+ const content =
2148
+ typeof msg.content === 'string'
2149
+ ? msg.content
2150
+ : JSON.stringify(msg.content);
2151
+ return content.length;
2152
+ };
2153
+
2154
+ const buildBranch = (
2155
+ maxContextTokens: number,
2156
+ perMessageTokens: number,
2157
+ count: number,
2158
+ ): { ctx: AgentContext; messages: AIMessage[] } => {
2159
+ const ctx = createBasicContext({ tokenCounter: countByChars });
2160
+ ctx.maxContextTokens = maxContextTokens;
2161
+ const messages: AIMessage[] = [];
2162
+ for (let i = 0; i < count; i++) {
2163
+ // countByChars counts content length, and projectContextUsage recounts
2164
+ // the supplied messages — so size content to the intended per-msg tokens.
2165
+ const content = 'x'.repeat(perMessageTokens);
2166
+ messages.push(
2167
+ i % 2 === 0
2168
+ ? (new HumanMessage(content) as unknown as AIMessage)
2169
+ : new AIMessage(content),
2170
+ );
2171
+ }
2172
+ return { ctx, messages };
2173
+ };
2174
+
2175
+ it('returns null without a tokenizer or a window', () => {
2176
+ const noCounter = createBasicContext({});
2177
+ noCounter.maxContextTokens = 1000;
2178
+ expect(noCounter.projectContextUsage([new HumanMessage('hi')])).toBeNull();
2179
+
2180
+ const noWindow = createBasicContext({ tokenCounter: countByChars });
2181
+ noWindow.maxContextTokens = undefined;
2182
+ expect(noWindow.projectContextUsage([new HumanMessage('hi')])).toBeNull();
2183
+ });
2184
+
2185
+ it('keeps the whole branch and reports headroom when it fits', () => {
2186
+ const { ctx, messages } = buildBranch(100_000, 1_000, 4);
2187
+ const usage = ctx.projectContextUsage(messages);
2188
+
2189
+ expect(usage).not.toBeNull();
2190
+ expect(usage!.breakdown.messageCount).toBe(4);
2191
+ expect(usage!.breakdown.maxContextTokens).toBe(100_000);
2192
+ expect(usage!.remainingContextTokens).toBeGreaterThan(0);
2193
+ expect(usage!.breakdown.messageTokens).toBeGreaterThan(0);
2194
+
2195
+ const max = usage!.contextBudget ?? usage!.breakdown.maxContextTokens;
2196
+ const used = max - (usage!.remainingContextTokens ?? 0);
2197
+ expect(used).toBeLessThanOrEqual(max);
2198
+ });
2199
+
2200
+ it('prunes older messages when the branch exceeds the window', () => {
2201
+ const { ctx, messages } = buildBranch(3_000, 1_000, 6);
2202
+ const usage = ctx.projectContextUsage(messages);
2203
+
2204
+ expect(usage).not.toBeNull();
2205
+ expect(usage!.breakdown.messageCount).toBeGreaterThan(0);
2206
+ expect(usage!.breakdown.messageCount).toBeLessThan(6);
2207
+ expect(usage!.remainingContextTokens).toBeGreaterThanOrEqual(0);
2208
+
2209
+ const max = usage!.contextBudget ?? usage!.breakdown.maxContextTokens;
2210
+ expect(max - (usage!.remainingContextTokens ?? 0)).toBeLessThanOrEqual(max);
2211
+ });
2212
+
2213
+ it('does not mutate the context (local pruner, no field writes)', () => {
2214
+ const { ctx, messages } = buildBranch(3_000, 1_000, 6);
2215
+ const mapBefore = { ...ctx.indexTokenCountMap };
2216
+
2217
+ expect(ctx.pruneMessages).toBeUndefined();
2218
+ ctx.projectContextUsage(messages);
2219
+
2220
+ expect(ctx.pruneMessages).toBeUndefined();
2221
+ expect(ctx.indexTokenCountMap).toEqual(mapBefore);
2222
+ });
2223
+
2224
+ it('does not mutate the caller messages under context pressure', () => {
2225
+ const ctx = createBasicContext({ tokenCounter: countByChars });
2226
+ ctx.maxContextTokens = 400;
2227
+ const consumed = new ToolMessage({
2228
+ content: 'x'.repeat(20_000),
2229
+ tool_call_id: 't1',
2230
+ name: 'tool',
2231
+ });
2232
+ const messages: AIMessage[] = [
2233
+ new HumanMessage('question') as unknown as AIMessage,
2234
+ new AIMessage({
2235
+ content: '',
2236
+ tool_calls: [{ id: 't1', name: 'tool', args: {} }],
2237
+ }),
2238
+ consumed as unknown as AIMessage,
2239
+ new AIMessage('final answer'),
2240
+ ];
2241
+ const originalRef = messages[2];
2242
+ const originalContent = (messages[2] as unknown as ToolMessage).content;
2243
+
2244
+ ctx.projectContextUsage(messages);
2245
+
2246
+ expect(messages[2]).toBe(originalRef);
2247
+ expect((messages[2] as unknown as ToolMessage).content).toBe(
2248
+ originalContent,
2249
+ );
2250
+ });
2251
+
2252
+ it('recounts the supplied branch, ignoring a stale context token map', () => {
2253
+ const ctx = createBasicContext({ tokenCounter: countByChars });
2254
+ ctx.maxContextTokens = 3_000;
2255
+ // Empty/stale map — if it were reused, every message would count as 0 and
2256
+ // nothing would prune. The fresh recount must drive pruning instead.
2257
+ ctx.indexTokenCountMap = {};
2258
+ const messages: AIMessage[] = [];
2259
+ for (let i = 0; i < 6; i++) {
2260
+ messages.push(new HumanMessage('x'.repeat(1_000)) as unknown as AIMessage);
2261
+ }
2262
+
2263
+ const usage = ctx.projectContextUsage(messages);
2264
+
2265
+ expect(usage).not.toBeNull();
2266
+ expect(usage!.breakdown.messageCount).toBeLessThan(6);
2267
+ });
2268
+
2269
+ it('uses a caller-supplied token map when provided', () => {
2270
+ const { ctx, messages } = buildBranch(3_000, 1, 6);
2271
+ // Each message is ~1 char, so a recount would fit all 6. The supplied map
2272
+ // claims 1000 each, forcing a prune — proving the map is honored.
2273
+ const indexTokenCountMap: Record<string, number> = {};
2274
+ for (let i = 0; i < messages.length; i++) {
2275
+ indexTokenCountMap[String(i)] = 1_000;
2276
+ }
2277
+
2278
+ const usage = ctx.projectContextUsage(messages, { indexTokenCountMap });
2279
+
2280
+ expect(usage!.breakdown.messageCount).toBeLessThan(6);
2281
+ });
2282
+
2283
+ it('ignores this context live usage so projections are not recalibrated', () => {
2284
+ const build = (): { ctx: AgentContext; messages: AIMessage[] } => {
2285
+ const ctx = createBasicContext({ tokenCounter: countByChars });
2286
+ ctx.maxContextTokens = 5_000;
2287
+ const messages: AIMessage[] = [0, 1, 2].map(
2288
+ () => new HumanMessage('x'.repeat(1_000)) as unknown as AIMessage,
2289
+ );
2290
+ return { ctx, messages };
2291
+ };
2292
+
2293
+ const clean = build();
2294
+ const cleanUsage = clean.ctx.projectContextUsage(clean.messages);
2295
+
2296
+ const dirty = build();
2297
+ dirty.ctx.currentUsage = {
2298
+ input_tokens: 4_000,
2299
+ output_tokens: 50,
2300
+ total_tokens: 4_050,
2301
+ };
2302
+ dirty.ctx.updateLastCallUsage({ input_tokens: 4_000, output_tokens: 50 });
2303
+ const dirtyUsage = dirty.ctx.projectContextUsage(dirty.messages);
2304
+
2305
+ expect(dirtyUsage!.remainingContextTokens).toBe(
2306
+ cleanUsage!.remainingContextTokens,
2307
+ );
2308
+ expect(dirtyUsage!.calibrationRatio).toBe(cleanUsage!.calibrationRatio);
2309
+ });
2310
+
2311
+ it('does not mutate AI message content arrays during projection', () => {
2312
+ const ctx = createBasicContext({
2313
+ agentConfig: {
2314
+ provider: Providers.ANTHROPIC,
2315
+ clientOptions: {
2316
+ model: 'claude-x',
2317
+ thinking: { type: 'enabled', budget_tokens: 1024 },
2318
+ } as never,
2319
+ },
2320
+ tokenCounter: countByChars,
2321
+ });
2322
+ ctx.maxContextTokens = 2_000;
2323
+ const aiContent = [
2324
+ { type: 'thinking', thinking: 'step by step', signature: 'sig' },
2325
+ { type: 'text', text: 'the answer' },
2326
+ ];
2327
+ const ai = new AIMessage({ content: aiContent as never });
2328
+ const messages: AIMessage[] = [
2329
+ new HumanMessage('question') as unknown as AIMessage,
2330
+ ai,
2331
+ new HumanMessage('another') as unknown as AIMessage,
2332
+ ];
2333
+ const contentRef = ai.content;
2334
+ const lenBefore = (ai.content as unknown[]).length;
2335
+
2336
+ ctx.projectContextUsage(messages);
2337
+
2338
+ expect(messages[1].content).toBe(contentRef);
2339
+ expect((messages[1].content as unknown[]).length).toBe(lenBefore);
2340
+ });
2341
+
2342
+ it('honors an explicit calibrationRatio seed', () => {
2343
+ const base = buildBranch(100_000, 1_000, 4);
2344
+ const baseUsage = base.ctx.projectContextUsage(base.messages);
2345
+
2346
+ const scaled = buildBranch(100_000, 1_000, 4);
2347
+ const scaledUsage = scaled.ctx.projectContextUsage(scaled.messages, {
2348
+ calibrationRatio: 3,
2349
+ });
2350
+
2351
+ expect(scaledUsage!.calibrationRatio).toBe(3);
2352
+ expect(scaledUsage!.remainingContextTokens).not.toBe(
2353
+ baseUsage!.remainingContextTokens,
2354
+ );
2355
+ });
2356
+
2357
+ it('refreshes a stale system runnable before projecting', () => {
2358
+ const ctx = createBasicContext({
2359
+ agentConfig: { instructions: 'system prompt' },
2360
+ tokenCounter: countByChars,
2361
+ });
2362
+ ctx.maxContextTokens = 5_000;
2363
+ ctx.initializeSystemRunnable();
2364
+ const systemBefore = ctx.systemMessageTokens;
2365
+
2366
+ // Adds a handoff preamble + marks stale, but defers the token recount.
2367
+ ctx.setHandoffContext('PriorAgent', ['SiblingA', 'SiblingB']);
2368
+ ctx.projectContextUsage([new HumanMessage('hi') as unknown as AIMessage]);
2369
+
2370
+ expect(ctx.systemMessageTokens).toBeGreaterThan(systemBefore);
2371
+ });
2372
+ });
2150
2373
  });
@@ -0,0 +1,73 @@
1
+ import { AIMessage, HumanMessage } from '@langchain/core/messages';
2
+ import type * as t from '@/types';
3
+ import { Providers } from '@/common';
4
+ import { projectAgentContextUsage } from '../projection';
5
+
6
+ const countByChars = (msg: { content: unknown }): number => {
7
+ const content =
8
+ typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content);
9
+ return content.length;
10
+ };
11
+
12
+ const agent = (maxContextTokens: number): t.AgentInputs => ({
13
+ agentId: 'test-agent',
14
+ provider: Providers.OPENAI,
15
+ instructions: 'system prompt',
16
+ maxContextTokens,
17
+ });
18
+
19
+ const branch = (perMessageChars: number, count: number): AIMessage[] => {
20
+ const messages: AIMessage[] = [];
21
+ for (let i = 0; i < count; i++) {
22
+ const content = 'x'.repeat(perMessageChars);
23
+ messages.push(
24
+ i % 2 === 0
25
+ ? (new HumanMessage(content) as unknown as AIMessage)
26
+ : new AIMessage(content),
27
+ );
28
+ }
29
+ return messages;
30
+ };
31
+
32
+ describe('projectAgentContextUsage', () => {
33
+ it('returns a budget snapshot for a branch that fits', async () => {
34
+ const usage = await projectAgentContextUsage({
35
+ agent: agent(100_000),
36
+ messages: branch(1_000, 4),
37
+ tokenCounter: countByChars,
38
+ });
39
+
40
+ expect(usage).not.toBeNull();
41
+ expect(usage!.breakdown.maxContextTokens).toBe(100_000);
42
+ expect(usage!.breakdown.messageCount).toBe(4);
43
+ expect(usage!.remainingContextTokens).toBeGreaterThan(0);
44
+ expect(usage!.agentId).toBe('test-agent');
45
+ });
46
+
47
+ it('prunes when the branch exceeds the window', async () => {
48
+ const usage = await projectAgentContextUsage({
49
+ agent: agent(3_000),
50
+ messages: branch(1_000, 6),
51
+ tokenCounter: countByChars,
52
+ });
53
+
54
+ expect(usage).not.toBeNull();
55
+ expect(usage!.breakdown.messageCount).toBeGreaterThan(0);
56
+ expect(usage!.breakdown.messageCount).toBeLessThan(6);
57
+ });
58
+
59
+ it('returns null without a context window', async () => {
60
+ const noWindow: t.AgentInputs = {
61
+ agentId: 'test-agent',
62
+ provider: Providers.OPENAI,
63
+ instructions: 'sys',
64
+ };
65
+ const usage = await projectAgentContextUsage({
66
+ agent: noWindow,
67
+ messages: branch(100, 2),
68
+ tokenCounter: countByChars,
69
+ });
70
+
71
+ expect(usage).toBeNull();
72
+ });
73
+ });
@@ -0,0 +1,46 @@
1
+ import type { BaseMessage } from '@langchain/core/messages';
2
+ import type * as t from '@/types';
3
+ import { AgentContext } from './AgentContext';
4
+
5
+ export interface ProjectAgentContextUsageParams {
6
+ /** Same `AgentInputs` a run is built from (instructions, tools, model, window). */
7
+ agent: t.AgentInputs;
8
+ /** Branch messages to project, in send order (no leading system message). */
9
+ messages: BaseMessage[];
10
+ tokenCounter: t.TokenCounter;
11
+ /** Per-message counts aligned to `messages` (e.g. from `formatAgentMessages`).
12
+ * When omitted, counts are recounted via `tokenCounter`. */
13
+ indexTokenCountMap?: Record<string, number>;
14
+ /** Provider-calibrated ratio from a prior snapshot, applied as a static seed. */
15
+ calibrationRatio?: number;
16
+ runId?: string;
17
+ agentId?: string;
18
+ }
19
+
20
+ /**
21
+ * Projects a pre-send context-usage snapshot for a branch under an agent config
22
+ * WITHOUT invoking the model — the host-side (page-load / branch-switch /
23
+ * window-switch) counterpart to the live `ON_CONTEXT_USAGE` event. Builds a
24
+ * throwaway `AgentContext` from the same `AgentInputs` a run uses, awaits its
25
+ * instruction/tool token accounting, then runs the shared pruner + budget math
26
+ * via `AgentContext.projectContextUsage` (which never mutates the supplied
27
+ * messages). Returns null when the config has no tokenizer or context window.
28
+ */
29
+ export async function projectAgentContextUsage({
30
+ agent,
31
+ messages,
32
+ tokenCounter,
33
+ indexTokenCountMap,
34
+ calibrationRatio,
35
+ runId,
36
+ agentId,
37
+ }: ProjectAgentContextUsageParams): Promise<t.ContextUsageEvent | null> {
38
+ const context = AgentContext.fromConfig(agent, tokenCounter, indexTokenCountMap);
39
+ await context.tokenCalculationPromise;
40
+ return context.projectContextUsage(messages, {
41
+ runId,
42
+ agentId: agentId ?? agent.agentId,
43
+ calibrationRatio,
44
+ indexTokenCountMap,
45
+ });
46
+ }
@@ -19,13 +19,14 @@ import {
19
19
  convertMessagesToContent,
20
20
  sanitizeOrphanToolBlocks,
21
21
  extractToolDiscoveries,
22
- addBedrockCacheControl,
22
+ addBedrockTailCacheControl,
23
23
  formatArtifactPayload,
24
24
  enforceOriginalContentCap,
25
25
  formatContentStrings,
26
26
  isLegacyConvertible,
27
27
  createPruneMessages,
28
- addCacheControl,
28
+ syncBudgetDerivedFields,
29
+ addTailCacheControl,
29
30
  getMessageId,
30
31
  makeIsDeferred,
31
32
  partitionAndMarkAnthropicToolCache,
@@ -111,35 +112,6 @@ function trailingMutationStart(messages: BaseMessage[]): number {
111
112
  return Math.max(0, Math.min(index, messages.length - 2));
112
113
  }
113
114
 
114
- /**
115
- * Re-derives the breakdown fields coupled to the calibrated budget math so
116
- * the snapshot stays internally consistent: the aggregate
117
- * `instructionTokens`/`availableForMessages` reflect the pruner's effective
118
- * (calibrated) overhead — component fields remain local estimates — and
119
- * `messageTokens` mirrors `contextBudget - instructions - remaining`.
120
- */
121
- function syncBudgetDerivedFields(usage: t.ContextUsageEvent): void {
122
- const { breakdown, contextBudget, effectiveInstructionTokens } = usage;
123
- if (effectiveInstructionTokens == null) {
124
- return;
125
- }
126
- breakdown.instructionTokens = effectiveInstructionTokens;
127
- if (contextBudget == null) {
128
- return;
129
- }
130
- breakdown.availableForMessages = Math.max(
131
- 0,
132
- contextBudget - effectiveInstructionTokens
133
- );
134
- if (usage.remainingContextTokens == null) {
135
- return;
136
- }
137
- breakdown.messageTokens = Math.max(
138
- 0,
139
- contextBudget - effectiveInstructionTokens - usage.remainingContextTokens
140
- );
141
- }
142
-
143
115
  type ReasoningKey = 'reasoning_content' | 'reasoning';
144
116
  type ReasoningSummary = { summary?: Array<{ text?: string }> };
145
117
  type ReasoningDetail = { type?: string; text?: string };
@@ -1761,35 +1733,6 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1761
1733
  }
1762
1734
  }
1763
1735
 
1764
- if (agentContext.provider === Providers.ANTHROPIC) {
1765
- const anthropicOptions = agentContext.clientOptions as
1766
- | t.AnthropicClientOptions
1767
- | undefined;
1768
- if (
1769
- anthropicOptions?.promptCache === true &&
1770
- !agentContext.systemRunnable
1771
- ) {
1772
- finalMessages = addCacheControl<BaseMessage>(finalMessages);
1773
- }
1774
- } else if (agentContext.provider === Providers.BEDROCK) {
1775
- const bedrockOptions = agentContext.clientOptions as
1776
- | t.BedrockAnthropicClientOptions
1777
- | undefined;
1778
- if (bedrockOptions?.promptCache === true) {
1779
- finalMessages = addBedrockCacheControl<BaseMessage>(finalMessages);
1780
- }
1781
- } else if (agentContext.provider === Providers.OPENROUTER) {
1782
- const openRouterOptions = agentContext.clientOptions as
1783
- | t.ProviderOptionsMap[Providers.OPENROUTER]
1784
- | undefined;
1785
- if (
1786
- openRouterOptions?.promptCache === true &&
1787
- !agentContext.systemRunnable
1788
- ) {
1789
- finalMessages = addCacheControl<BaseMessage>(finalMessages);
1790
- }
1791
- }
1792
-
1793
1736
  if (
1794
1737
  isThinkingEnabled(agentContext.provider, agentContext.clientOptions)
1795
1738
  ) {
@@ -1811,13 +1754,53 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1811
1754
  );
1812
1755
  }
1813
1756
 
1814
- // Intentionally broad: runs when the pruner wasn't used OR any post-pruning
1815
- // transform (addCacheControl, ensureThinkingBlock, etc.) reassigned finalMessages.
1816
- // sanitizeOrphanToolBlocks fast-paths to a Set diff check when no orphans exist,
1817
- // so the cost is negligible and this acts as a safety net for Anthropic/Bedrock.
1757
+ // Determine the prompt-cache strategy up front. Two distinct facts:
1758
+ //
1759
+ // `providerPromptCacheEnabled` prompt caching is on for this provider
1760
+ // at all. This drives orphan cleanup, because EVERY cached send must be
1761
+ // sanitized — including the system-runnable path, where AgentContext (not
1762
+ // this node) adds the body marker.
1763
+ //
1764
+ // `willAddTailCache` — THIS node will add the marker itself. Anthropic /
1765
+ // OpenRouter defer to the system runnable when one owns the system-prompt
1766
+ // breakpoint, so they exclude that case; Bedrock always marks here.
1767
+ const anthropicPromptCacheEnabled =
1768
+ agentContext.provider === Providers.ANTHROPIC &&
1769
+ (agentContext.clientOptions as t.AnthropicClientOptions | undefined)
1770
+ ?.promptCache === true;
1771
+ const openRouterPromptCacheEnabled =
1772
+ agentContext.provider === Providers.OPENROUTER &&
1773
+ (
1774
+ agentContext.clientOptions as
1775
+ | t.ProviderOptionsMap[Providers.OPENROUTER]
1776
+ | undefined
1777
+ )?.promptCache === true;
1778
+ const bedrockPromptCacheEnabled =
1779
+ agentContext.provider === Providers.BEDROCK &&
1780
+ (
1781
+ agentContext.clientOptions as
1782
+ | t.BedrockAnthropicClientOptions
1783
+ | undefined
1784
+ )?.promptCache === true;
1785
+ const providerPromptCacheEnabled =
1786
+ anthropicPromptCacheEnabled ||
1787
+ openRouterPromptCacheEnabled ||
1788
+ bedrockPromptCacheEnabled;
1789
+
1790
+ // Intentionally broad: runs when the pruner wasn't used, when any
1791
+ // post-pruning transform (ensureThinkingBlock, etc.) reassigned
1792
+ // finalMessages, OR when this is a prompt-cached send. The last clause
1793
+ // matters because the marker is now applied AFTER this gate (and, for the
1794
+ // system-runnable path, in AgentContext entirely): without it, a cached
1795
+ // send whose pruner returned the context unchanged would skip cleanup and
1796
+ // could ship orphaned AI/tool pairs from persisted history.
1797
+ // sanitizeOrphanToolBlocks fast-paths to a Set diff check when no orphans
1798
+ // exist, so the cost is negligible.
1818
1799
  const needsOrphanSanitize =
1819
1800
  anthropicLike &&
1820
- (!agentContext.pruneMessages || finalMessages !== messagesToUse);
1801
+ (!agentContext.pruneMessages ||
1802
+ finalMessages !== messagesToUse ||
1803
+ providerPromptCacheEnabled);
1821
1804
  if (needsOrphanSanitize) {
1822
1805
  const beforeSanitize = finalMessages.length;
1823
1806
  finalMessages = sanitizeOrphanToolBlocks(finalMessages);
@@ -1837,6 +1820,24 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1837
1820
  }
1838
1821
  }
1839
1822
 
1823
+ // Place the single tail prompt-cache breakpoint LAST, after thinking
1824
+ // normalization and orphan sanitization. ensureThinkingBlockInMessages can
1825
+ // fold a trailing non-thinking AI→Tool chain into a `[Previous agent
1826
+ // context]` HumanMessage whose builder copies text but not cache_control /
1827
+ // cachePoint, and sanitizeOrphanToolBlocks can drop the anchored block — so
1828
+ // marking earlier would let the only breakpoint vanish before the model
1829
+ // call (zero message caching). Anchoring on the final message list keeps
1830
+ // the marker on a block that actually ships. The system-runnable path
1831
+ // adds its body marker in AgentContext, so this node skips it there.
1832
+ if (
1833
+ (anthropicPromptCacheEnabled || openRouterPromptCacheEnabled) &&
1834
+ !agentContext.systemRunnable
1835
+ ) {
1836
+ finalMessages = addTailCacheControl<BaseMessage>(finalMessages);
1837
+ } else if (bedrockPromptCacheEnabled) {
1838
+ finalMessages = addBedrockTailCacheControl<BaseMessage>(finalMessages);
1839
+ }
1840
+
1840
1841
  if (
1841
1842
  agentContext.lastStreamCall != null &&
1842
1843
  agentContext.streamBuffer != null
package/src/index.ts CHANGED
@@ -8,6 +8,9 @@ export * from './messages';
8
8
  /* Graphs */
9
9
  export * from './graphs';
10
10
 
11
+ /* Context-usage projection (host-side pre-send snapshot) */
12
+ export * from './agents/projection';
13
+
11
14
  /* Summarization */
12
15
  export * from './summarization';
13
16
 
package/src/langfuse.ts CHANGED
@@ -11,6 +11,7 @@ const TRACE_METADATA_MAX_LENGTH = 200;
11
11
  const LANGFUSE_FORCE_FLUSH_ON_DISPOSE = 'LANGFUSE_FORCE_FLUSH_ON_DISPOSE';
12
12
 
13
13
  export type LangfuseTraceMetadata = Record<string, string>;
14
+ type LangfuseMetadata = NonNullable<t.LangfuseConfig['metadata']>;
14
15
 
15
16
  type LangfuseHandlerParams = {
16
17
  userId?: string;
@@ -44,6 +45,13 @@ function hasLangfuseTracingConfig(langfuse?: t.LangfuseConfig): boolean {
44
45
  );
45
46
  }
46
47
 
48
+ function hasLangfuseTraceAttributes(langfuse?: t.LangfuseConfig): boolean {
49
+ return (
50
+ Object.keys(createTraceMetadata(langfuse?.metadata ?? {})).length > 0 ||
51
+ (mergeLangfuseTags(undefined, langfuse?.tags)?.length ?? 0) > 0
52
+ );
53
+ }
54
+
47
55
  export function hasLangfuseConfigCredentials(
48
56
  langfuse?: t.LangfuseConfig
49
57
  ): langfuse is t.LangfuseConfig & {
@@ -67,6 +75,7 @@ export function isExplicitLangfuseConfig(langfuse?: t.LangfuseConfig): boolean {
67
75
  isPresent(langfuse?.publicKey) ||
68
76
  isPresent(langfuse?.secretKey) ||
69
77
  isPresent(langfuse?.baseUrl) ||
78
+ hasLangfuseTraceAttributes(langfuse) ||
70
79
  hasLangfuseTracingConfig(langfuse)
71
80
  );
72
81
  }
@@ -110,6 +119,27 @@ export function createLangfuseTraceMetadata({
110
119
  });
111
120
  }
112
121
 
122
+ function mergeLangfuseTraceMetadata(
123
+ traceMetadata?: LangfuseTraceMetadata,
124
+ metadata?: LangfuseMetadata
125
+ ): LangfuseTraceMetadata | undefined {
126
+ const merged = createTraceMetadata({
127
+ ...(metadata ?? {}),
128
+ ...(traceMetadata ?? {}),
129
+ });
130
+ return Object.keys(merged).length > 0 ? merged : undefined;
131
+ }
132
+
133
+ function mergeLangfuseTags(
134
+ tags?: string[],
135
+ configTags?: string[]
136
+ ): string[] | undefined {
137
+ const merged = [...(tags ?? []), ...(configTags ?? [])].filter(
138
+ (tag) => tag.trim() !== ''
139
+ );
140
+ return merged.length > 0 ? [...new Set(merged)] : undefined;
141
+ }
142
+
113
143
  export function getLangfuseTraceName(
114
144
  traceMetadata?: LangfuseTraceMetadata,
115
145
  fallback: string = 'LibreChat Agent'
@@ -161,12 +191,16 @@ export function createLangfuseHandler({
161
191
  return new CallbackHandler({
162
192
  userId,
163
193
  sessionId,
164
- traceMetadata,
165
- tags,
194
+ traceMetadata: mergeLangfuseTraceMetadata(
195
+ traceMetadata,
196
+ langfuse?.metadata
197
+ ),
198
+ tags: mergeLangfuseTags(tags, langfuse?.tags),
166
199
  });
167
200
  }
168
201
 
169
202
  function createPropagateAttributeParams({
203
+ langfuse,
170
204
  userId,
171
205
  sessionId,
172
206
  traceMetadata,
@@ -177,8 +211,8 @@ function createPropagateAttributeParams({
177
211
  userId,
178
212
  sessionId,
179
213
  traceName,
180
- tags,
181
- metadata: traceMetadata,
214
+ tags: mergeLangfuseTags(tags, langfuse?.tags),
215
+ metadata: mergeLangfuseTraceMetadata(traceMetadata, langfuse?.metadata),
182
216
  };
183
217
  }
184
218
 
@@ -692,10 +692,28 @@ export function resolveLangfuseConfig(
692
692
  ...agentLangfuse.toolOutputTracing,
693
693
  }
694
694
  : undefined;
695
+ const metadata =
696
+ runLangfuse.metadata != null || agentLangfuse.metadata != null
697
+ ? {
698
+ ...runLangfuse.metadata,
699
+ ...agentLangfuse.metadata,
700
+ }
701
+ : undefined;
702
+ const tags =
703
+ runLangfuse.tags != null || agentLangfuse.tags != null
704
+ ? [
705
+ ...new Set([
706
+ ...(runLangfuse.tags ?? []),
707
+ ...(agentLangfuse.tags ?? []),
708
+ ]),
709
+ ]
710
+ : undefined;
695
711
 
696
712
  return {
697
713
  ...runLangfuse,
698
714
  ...agentLangfuse,
715
+ ...(metadata != null ? { metadata } : {}),
716
+ ...(tags != null ? { tags } : {}),
699
717
  ...(toolNodeTracing != null ? { toolNodeTracing } : {}),
700
718
  ...(toolOutputTracing != null ? { toolOutputTracing } : {}),
701
719
  };