@librechat/agents 3.2.35 → 3.2.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +74 -1
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/agents/projection.cjs +25 -0
  4. package/dist/cjs/agents/projection.cjs.map +1 -0
  5. package/dist/cjs/graphs/Graph.cjs +3 -18
  6. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  7. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +26 -4
  8. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
  9. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +20 -0
  10. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
  11. package/dist/cjs/main.cjs +5 -0
  12. package/dist/cjs/messages/budget.cjs +23 -0
  13. package/dist/cjs/messages/budget.cjs.map +1 -0
  14. package/dist/cjs/messages/cache.cjs +1 -0
  15. package/dist/cjs/messages/cache.cjs.map +1 -1
  16. package/dist/cjs/messages/index.cjs +1 -0
  17. package/dist/cjs/tools/search/format.cjs +91 -2
  18. package/dist/cjs/tools/search/format.cjs.map +1 -1
  19. package/dist/cjs/tools/search/tool.cjs +4 -3
  20. package/dist/cjs/tools/search/tool.cjs.map +1 -1
  21. package/dist/esm/agents/AgentContext.mjs +75 -2
  22. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  23. package/dist/esm/agents/projection.mjs +25 -0
  24. package/dist/esm/agents/projection.mjs.map +1 -0
  25. package/dist/esm/graphs/Graph.mjs +1 -16
  26. package/dist/esm/graphs/Graph.mjs.map +1 -1
  27. package/dist/esm/llm/anthropic/utils/message_inputs.mjs +26 -4
  28. package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
  29. package/dist/esm/llm/bedrock/utils/message_inputs.mjs +20 -0
  30. package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
  31. package/dist/esm/main.mjs +4 -2
  32. package/dist/esm/messages/budget.mjs +23 -0
  33. package/dist/esm/messages/budget.mjs.map +1 -0
  34. package/dist/esm/messages/cache.mjs +1 -1
  35. package/dist/esm/messages/cache.mjs.map +1 -1
  36. package/dist/esm/messages/index.mjs +1 -0
  37. package/dist/esm/tools/search/format.mjs +91 -2
  38. package/dist/esm/tools/search/format.mjs.map +1 -1
  39. package/dist/esm/tools/search/tool.mjs +4 -3
  40. package/dist/esm/tools/search/tool.mjs.map +1 -1
  41. package/dist/types/agents/AgentContext.d.ts +30 -1
  42. package/dist/types/agents/projection.d.ts +26 -0
  43. package/dist/types/index.d.ts +1 -0
  44. package/dist/types/messages/budget.d.ts +11 -0
  45. package/dist/types/messages/cache.d.ts +7 -0
  46. package/dist/types/messages/index.d.ts +1 -0
  47. package/dist/types/tools/search/format.d.ts +4 -1
  48. package/dist/types/tools/search/types.d.ts +7 -0
  49. package/package.json +1 -1
  50. package/src/agents/AgentContext.ts +103 -2
  51. package/src/agents/__tests__/AgentContext.test.ts +229 -0
  52. package/src/agents/__tests__/projection.test.ts +73 -0
  53. package/src/agents/projection.ts +46 -0
  54. package/src/graphs/Graph.ts +1 -29
  55. package/src/index.ts +3 -0
  56. package/src/llm/anthropic/utils/cross-provider-reasoning.test.ts +317 -0
  57. package/src/llm/anthropic/utils/message_inputs.ts +78 -16
  58. package/src/llm/bedrock/utils/cross-provider-reasoning.test.ts +131 -0
  59. package/src/llm/bedrock/utils/message_inputs.ts +35 -0
  60. package/src/messages/budget.ts +32 -0
  61. package/src/messages/cache.ts +1 -1
  62. package/src/messages/index.ts +1 -0
  63. package/src/tools/search/format.test.ts +242 -0
  64. package/src/tools/search/format.ts +122 -5
  65. package/src/tools/search/tool.ts +5 -1
  66. package/src/tools/search/types.ts +7 -0
@@ -1,6 +1,7 @@
1
1
  export * from './core';
2
2
  export * from './ids';
3
3
  export * from './prune';
4
+ export * from './budget';
4
5
  export * from './format';
5
6
  export * from './cache';
6
7
  export * from './anthropicToolCache';
@@ -1,5 +1,8 @@
1
1
  import type * as t from './types';
2
- export declare function formatResultsForLLM(turn: number, results: t.SearchResultData): {
2
+ /** Resolves the per-search highlight budget from config, the
3
+ * `SEARCH_MAX_LLM_OUTPUT_CHARS` env var, or the default (50,000 chars). */
4
+ export declare function resolveMaxLLMOutputChars(maxOutputChars?: number): number;
5
+ export declare function formatResultsForLLM(turn: number, results: t.SearchResultData, maxOutputChars?: number): {
3
6
  output: string;
4
7
  references: t.ResultReference[];
5
8
  };
@@ -189,6 +189,13 @@ export type SafeSearchLevel = 0 | 1 | 2;
189
189
  export type Logger = WinstonLogger;
190
190
  export interface SearchToolConfig extends SearchConfig, ProcessSourcesConfig, FirecrawlConfig {
191
191
  tavilyScraperOptions?: TavilyScraperConfig;
192
+ /** Max chars of highlight content this tool feeds the MODEL per search (the
193
+ * dominant, otherwise-unbounded part of the output). Distinct from
194
+ * `maxContentLength`, which caps scraped/reranked content per source — full
195
+ * content always remains in the `WEB_SEARCH` artifact. Defaults to 50,000;
196
+ * also configurable via the `SEARCH_MAX_LLM_OUTPUT_CHARS` env var. Hosts that
197
+ * know the context window (e.g. LibreChat) pass a window-relative value. */
198
+ maxOutputChars?: number;
192
199
  logger?: Logger;
193
200
  safeSearch?: SafeSearchLevel;
194
201
  jinaApiKey?: string;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@librechat/agents",
3
- "version": "3.2.35",
3
+ "version": "3.2.36",
4
4
  "main": "./dist/cjs/main.cjs",
5
5
  "module": "./dist/esm/main.mjs",
6
6
  "types": "./dist/types/index.d.ts",
@@ -7,7 +7,6 @@ import type {
7
7
  BaseMessageFields,
8
8
  } from '@langchain/core/messages';
9
9
  import type { RunnableConfig, Runnable } from '@langchain/core/runnables';
10
- import type { createPruneMessages } from '@/messages';
11
10
  import type * as t from '@/types';
12
11
  import {
13
12
  ANTHROPIC_TOOL_TOKEN_MULTIPLIER,
@@ -19,10 +18,16 @@ import {
19
18
  import {
20
19
  addCacheControl,
21
20
  addCacheControlToStablePrefixMessages,
21
+ cloneMessage,
22
22
  } from '@/messages/cache';
23
23
  import { createSchemaOnlyTools } from '@/tools/schema';
24
24
  import { apportionTokenCounts } from '@/utils/tokens';
25
- import { DEFAULT_RESERVE_RATIO } from '@/messages';
25
+ import {
26
+ DEFAULT_RESERVE_RATIO,
27
+ createPruneMessages,
28
+ syncBudgetDerivedFields,
29
+ } from '@/messages';
30
+ import { isThinkingEnabled } from '@/llm/request';
26
31
  import { toJsonSchema } from '@/utils/schema';
27
32
 
28
33
  type AgentSystemTextBlock = {
@@ -1330,6 +1335,102 @@ export class AgentContext {
1330
1335
  return lines.join('\n');
1331
1336
  }
1332
1337
 
1338
+ /**
1339
+ * Projects the context-usage snapshot for an arbitrary message set WITHOUT
1340
+ * invoking the model — the pre-send / page-load / window-switch counterpart to
1341
+ * the live `ON_CONTEXT_USAGE` snapshot. Runs the same pruner + budget math the
1342
+ * graph uses (`createPruneMessages` → `getTokenBudgetBreakdown` →
1343
+ * `syncBudgetDerivedFields`) so projected numbers match a real call. Returns
1344
+ * null when the context lacks the tokenizer or window needed to prune. Omits
1345
+ * the live post-format reconciliation (provider-specific, invoke-time) — a
1346
+ * small, acceptable delta for a pre-send estimate.
1347
+ *
1348
+ * Safe to call off the hot path: the supplied `messages` are never mutated
1349
+ * (each is passed as a clone — the pruner both replaces tool-result slots and
1350
+ * unshifts reasoning blocks into AI content arrays in place), and this
1351
+ * context's own state is untouched apart from refreshing stale instruction
1352
+ * counts (idempotent, exactly what a real call does). Token counts are
1353
+ * recounted for the supplied messages (the context's `indexTokenCountMap` is
1354
+ * keyed to the live run's branch and would missum an arbitrary branch) unless
1355
+ * the caller passes a map it guarantees matches. Calibration is NOT re-derived
1356
+ * from this context's live usage (a fresh pruner would compare the prior
1357
+ * call's provider input against the whole projected branch); the learned
1358
+ * `calibrationRatio` is applied as a static seed, and callers may override it
1359
+ * with a persisted ratio via `opts.calibrationRatio`.
1360
+ */
1361
+ projectContextUsage(
1362
+ messages: BaseMessage[],
1363
+ opts?: {
1364
+ runId?: string;
1365
+ agentId?: string;
1366
+ calibrationRatio?: number;
1367
+ indexTokenCountMap?: Record<string, number | undefined>;
1368
+ }
1369
+ ): t.ContextUsageEvent | null {
1370
+ const tokenCounter = this.tokenCounter;
1371
+ if (tokenCounter == null || this.maxContextTokens == null) {
1372
+ return null;
1373
+ }
1374
+ /** Refresh stale system overhead (handoff/summary changes) so instruction
1375
+ * tokens match the prompt a real call would send. */
1376
+ this.initializeSystemRunnable();
1377
+ /** Clone array-content messages: the pruner unshifts reasoning blocks into
1378
+ * AI content arrays in place, which would otherwise corrupt the caller's
1379
+ * history. (Slot replacements land on the mapped array, not the caller's.) */
1380
+ const projected = messages.map((message) =>
1381
+ Array.isArray(message.content)
1382
+ ? cloneMessage(message, [...message.content])
1383
+ : message
1384
+ );
1385
+ let indexTokenCountMap = opts?.indexTokenCountMap;
1386
+ if (indexTokenCountMap == null) {
1387
+ indexTokenCountMap = {};
1388
+ for (let i = 0; i < messages.length; i++) {
1389
+ indexTokenCountMap[String(i)] = tokenCounter(messages[i]);
1390
+ }
1391
+ }
1392
+ const prune = createPruneMessages({
1393
+ startIndex: 0,
1394
+ provider: this.provider,
1395
+ tokenCounter,
1396
+ maxTokens: this.maxContextTokens,
1397
+ thinkingEnabled: isThinkingEnabled(this.provider, this.clientOptions),
1398
+ indexTokenCountMap,
1399
+ contextPruningConfig: this.contextPruningConfig,
1400
+ summarizationEnabled: this.summarizationEnabled,
1401
+ reserveRatio: this.summarizationConfig?.reserveRatio,
1402
+ calibrationRatio: opts?.calibrationRatio ?? this.calibrationRatio,
1403
+ getInstructionTokens: () => this.instructionTokens,
1404
+ });
1405
+ const {
1406
+ context,
1407
+ prePruneContextTokens,
1408
+ remainingContextTokens,
1409
+ contextBudget,
1410
+ effectiveInstructionTokens,
1411
+ calibrationRatio,
1412
+ } = prune({
1413
+ messages: projected,
1414
+ usageMetadata: undefined,
1415
+ lastCallUsage: undefined,
1416
+ totalTokensFresh: false,
1417
+ });
1418
+ const breakdown = this.getTokenBudgetBreakdown(messages);
1419
+ breakdown.messageCount = context.length;
1420
+ const usage: t.ContextUsageEvent = {
1421
+ runId: opts?.runId,
1422
+ agentId: opts?.agentId,
1423
+ breakdown,
1424
+ contextBudget,
1425
+ effectiveInstructionTokens,
1426
+ prePruneContextTokens,
1427
+ remainingContextTokens,
1428
+ calibrationRatio,
1429
+ };
1430
+ syncBudgetDerivedFields(usage);
1431
+ return usage;
1432
+ }
1433
+
1333
1434
  /**
1334
1435
  * Updates the last-call usage with data from the most recent LLM response.
1335
1436
  * Unlike `currentUsage` which accumulates, this captures only the single call.
@@ -2147,4 +2147,233 @@ describe('AgentContext', () => {
2147
2147
  expect(ctx.lastCallUsage!.inputTokens).toBe(8005);
2148
2148
  });
2149
2149
  });
2150
+
2151
+ describe('projectContextUsage', () => {
2152
+ const countByChars = (msg: { content: unknown }): number => {
2153
+ const content =
2154
+ typeof msg.content === 'string'
2155
+ ? msg.content
2156
+ : JSON.stringify(msg.content);
2157
+ return content.length;
2158
+ };
2159
+
2160
+ const buildBranch = (
2161
+ maxContextTokens: number,
2162
+ perMessageTokens: number,
2163
+ count: number,
2164
+ ): { ctx: AgentContext; messages: AIMessage[] } => {
2165
+ const ctx = createBasicContext({ tokenCounter: countByChars });
2166
+ ctx.maxContextTokens = maxContextTokens;
2167
+ const messages: AIMessage[] = [];
2168
+ for (let i = 0; i < count; i++) {
2169
+ // countByChars counts content length, and projectContextUsage recounts
2170
+ // the supplied messages — so size content to the intended per-msg tokens.
2171
+ const content = 'x'.repeat(perMessageTokens);
2172
+ messages.push(
2173
+ i % 2 === 0
2174
+ ? (new HumanMessage(content) as unknown as AIMessage)
2175
+ : new AIMessage(content),
2176
+ );
2177
+ }
2178
+ return { ctx, messages };
2179
+ };
2180
+
2181
+ it('returns null without a tokenizer or a window', () => {
2182
+ const noCounter = createBasicContext({});
2183
+ noCounter.maxContextTokens = 1000;
2184
+ expect(noCounter.projectContextUsage([new HumanMessage('hi')])).toBeNull();
2185
+
2186
+ const noWindow = createBasicContext({ tokenCounter: countByChars });
2187
+ noWindow.maxContextTokens = undefined;
2188
+ expect(noWindow.projectContextUsage([new HumanMessage('hi')])).toBeNull();
2189
+ });
2190
+
2191
+ it('keeps the whole branch and reports headroom when it fits', () => {
2192
+ const { ctx, messages } = buildBranch(100_000, 1_000, 4);
2193
+ const usage = ctx.projectContextUsage(messages);
2194
+
2195
+ expect(usage).not.toBeNull();
2196
+ expect(usage!.breakdown.messageCount).toBe(4);
2197
+ expect(usage!.breakdown.maxContextTokens).toBe(100_000);
2198
+ expect(usage!.remainingContextTokens).toBeGreaterThan(0);
2199
+ expect(usage!.breakdown.messageTokens).toBeGreaterThan(0);
2200
+
2201
+ const max = usage!.contextBudget ?? usage!.breakdown.maxContextTokens;
2202
+ const used = max - (usage!.remainingContextTokens ?? 0);
2203
+ expect(used).toBeLessThanOrEqual(max);
2204
+ });
2205
+
2206
+ it('prunes older messages when the branch exceeds the window', () => {
2207
+ const { ctx, messages } = buildBranch(3_000, 1_000, 6);
2208
+ const usage = ctx.projectContextUsage(messages);
2209
+
2210
+ expect(usage).not.toBeNull();
2211
+ expect(usage!.breakdown.messageCount).toBeGreaterThan(0);
2212
+ expect(usage!.breakdown.messageCount).toBeLessThan(6);
2213
+ expect(usage!.remainingContextTokens).toBeGreaterThanOrEqual(0);
2214
+
2215
+ const max = usage!.contextBudget ?? usage!.breakdown.maxContextTokens;
2216
+ expect(max - (usage!.remainingContextTokens ?? 0)).toBeLessThanOrEqual(max);
2217
+ });
2218
+
2219
+ it('does not mutate the context (local pruner, no field writes)', () => {
2220
+ const { ctx, messages } = buildBranch(3_000, 1_000, 6);
2221
+ const mapBefore = { ...ctx.indexTokenCountMap };
2222
+
2223
+ expect(ctx.pruneMessages).toBeUndefined();
2224
+ ctx.projectContextUsage(messages);
2225
+
2226
+ expect(ctx.pruneMessages).toBeUndefined();
2227
+ expect(ctx.indexTokenCountMap).toEqual(mapBefore);
2228
+ });
2229
+
2230
+ it('does not mutate the caller messages under context pressure', () => {
2231
+ const ctx = createBasicContext({ tokenCounter: countByChars });
2232
+ ctx.maxContextTokens = 400;
2233
+ const consumed = new ToolMessage({
2234
+ content: 'x'.repeat(20_000),
2235
+ tool_call_id: 't1',
2236
+ name: 'tool',
2237
+ });
2238
+ const messages: AIMessage[] = [
2239
+ new HumanMessage('question') as unknown as AIMessage,
2240
+ new AIMessage({
2241
+ content: '',
2242
+ tool_calls: [{ id: 't1', name: 'tool', args: {} }],
2243
+ }),
2244
+ consumed as unknown as AIMessage,
2245
+ new AIMessage('final answer'),
2246
+ ];
2247
+ const originalRef = messages[2];
2248
+ const originalContent = (messages[2] as unknown as ToolMessage).content;
2249
+
2250
+ ctx.projectContextUsage(messages);
2251
+
2252
+ expect(messages[2]).toBe(originalRef);
2253
+ expect((messages[2] as unknown as ToolMessage).content).toBe(
2254
+ originalContent,
2255
+ );
2256
+ });
2257
+
2258
+ it('recounts the supplied branch, ignoring a stale context token map', () => {
2259
+ const ctx = createBasicContext({ tokenCounter: countByChars });
2260
+ ctx.maxContextTokens = 3_000;
2261
+ // Empty/stale map — if it were reused, every message would count as 0 and
2262
+ // nothing would prune. The fresh recount must drive pruning instead.
2263
+ ctx.indexTokenCountMap = {};
2264
+ const messages: AIMessage[] = [];
2265
+ for (let i = 0; i < 6; i++) {
2266
+ messages.push(new HumanMessage('x'.repeat(1_000)) as unknown as AIMessage);
2267
+ }
2268
+
2269
+ const usage = ctx.projectContextUsage(messages);
2270
+
2271
+ expect(usage).not.toBeNull();
2272
+ expect(usage!.breakdown.messageCount).toBeLessThan(6);
2273
+ });
2274
+
2275
+ it('uses a caller-supplied token map when provided', () => {
2276
+ const { ctx, messages } = buildBranch(3_000, 1, 6);
2277
+ // Each message is ~1 char, so a recount would fit all 6. The supplied map
2278
+ // claims 1000 each, forcing a prune — proving the map is honored.
2279
+ const indexTokenCountMap: Record<string, number> = {};
2280
+ for (let i = 0; i < messages.length; i++) {
2281
+ indexTokenCountMap[String(i)] = 1_000;
2282
+ }
2283
+
2284
+ const usage = ctx.projectContextUsage(messages, { indexTokenCountMap });
2285
+
2286
+ expect(usage!.breakdown.messageCount).toBeLessThan(6);
2287
+ });
2288
+
2289
+ it('ignores this context live usage so projections are not recalibrated', () => {
2290
+ const build = (): { ctx: AgentContext; messages: AIMessage[] } => {
2291
+ const ctx = createBasicContext({ tokenCounter: countByChars });
2292
+ ctx.maxContextTokens = 5_000;
2293
+ const messages: AIMessage[] = [0, 1, 2].map(
2294
+ () => new HumanMessage('x'.repeat(1_000)) as unknown as AIMessage,
2295
+ );
2296
+ return { ctx, messages };
2297
+ };
2298
+
2299
+ const clean = build();
2300
+ const cleanUsage = clean.ctx.projectContextUsage(clean.messages);
2301
+
2302
+ const dirty = build();
2303
+ dirty.ctx.currentUsage = {
2304
+ input_tokens: 4_000,
2305
+ output_tokens: 50,
2306
+ total_tokens: 4_050,
2307
+ };
2308
+ dirty.ctx.updateLastCallUsage({ input_tokens: 4_000, output_tokens: 50 });
2309
+ const dirtyUsage = dirty.ctx.projectContextUsage(dirty.messages);
2310
+
2311
+ expect(dirtyUsage!.remainingContextTokens).toBe(
2312
+ cleanUsage!.remainingContextTokens,
2313
+ );
2314
+ expect(dirtyUsage!.calibrationRatio).toBe(cleanUsage!.calibrationRatio);
2315
+ });
2316
+
2317
+ it('does not mutate AI message content arrays during projection', () => {
2318
+ const ctx = createBasicContext({
2319
+ agentConfig: {
2320
+ provider: Providers.ANTHROPIC,
2321
+ clientOptions: {
2322
+ model: 'claude-x',
2323
+ thinking: { type: 'enabled', budget_tokens: 1024 },
2324
+ } as never,
2325
+ },
2326
+ tokenCounter: countByChars,
2327
+ });
2328
+ ctx.maxContextTokens = 2_000;
2329
+ const aiContent = [
2330
+ { type: 'thinking', thinking: 'step by step', signature: 'sig' },
2331
+ { type: 'text', text: 'the answer' },
2332
+ ];
2333
+ const ai = new AIMessage({ content: aiContent as never });
2334
+ const messages: AIMessage[] = [
2335
+ new HumanMessage('question') as unknown as AIMessage,
2336
+ ai,
2337
+ new HumanMessage('another') as unknown as AIMessage,
2338
+ ];
2339
+ const contentRef = ai.content;
2340
+ const lenBefore = (ai.content as unknown[]).length;
2341
+
2342
+ ctx.projectContextUsage(messages);
2343
+
2344
+ expect(messages[1].content).toBe(contentRef);
2345
+ expect((messages[1].content as unknown[]).length).toBe(lenBefore);
2346
+ });
2347
+
2348
+ it('honors an explicit calibrationRatio seed', () => {
2349
+ const base = buildBranch(100_000, 1_000, 4);
2350
+ const baseUsage = base.ctx.projectContextUsage(base.messages);
2351
+
2352
+ const scaled = buildBranch(100_000, 1_000, 4);
2353
+ const scaledUsage = scaled.ctx.projectContextUsage(scaled.messages, {
2354
+ calibrationRatio: 3,
2355
+ });
2356
+
2357
+ expect(scaledUsage!.calibrationRatio).toBe(3);
2358
+ expect(scaledUsage!.remainingContextTokens).not.toBe(
2359
+ baseUsage!.remainingContextTokens,
2360
+ );
2361
+ });
2362
+
2363
+ it('refreshes a stale system runnable before projecting', () => {
2364
+ const ctx = createBasicContext({
2365
+ agentConfig: { instructions: 'system prompt' },
2366
+ tokenCounter: countByChars,
2367
+ });
2368
+ ctx.maxContextTokens = 5_000;
2369
+ ctx.initializeSystemRunnable();
2370
+ const systemBefore = ctx.systemMessageTokens;
2371
+
2372
+ // Adds a handoff preamble + marks stale, but defers the token recount.
2373
+ ctx.setHandoffContext('PriorAgent', ['SiblingA', 'SiblingB']);
2374
+ ctx.projectContextUsage([new HumanMessage('hi') as unknown as AIMessage]);
2375
+
2376
+ expect(ctx.systemMessageTokens).toBeGreaterThan(systemBefore);
2377
+ });
2378
+ });
2150
2379
  });
@@ -0,0 +1,73 @@
1
+ import { AIMessage, HumanMessage } from '@langchain/core/messages';
2
+ import type * as t from '@/types';
3
+ import { Providers } from '@/common';
4
+ import { projectAgentContextUsage } from '../projection';
5
+
6
+ const countByChars = (msg: { content: unknown }): number => {
7
+ const content =
8
+ typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content);
9
+ return content.length;
10
+ };
11
+
12
+ const agent = (maxContextTokens: number): t.AgentInputs => ({
13
+ agentId: 'test-agent',
14
+ provider: Providers.OPENAI,
15
+ instructions: 'system prompt',
16
+ maxContextTokens,
17
+ });
18
+
19
+ const branch = (perMessageChars: number, count: number): AIMessage[] => {
20
+ const messages: AIMessage[] = [];
21
+ for (let i = 0; i < count; i++) {
22
+ const content = 'x'.repeat(perMessageChars);
23
+ messages.push(
24
+ i % 2 === 0
25
+ ? (new HumanMessage(content) as unknown as AIMessage)
26
+ : new AIMessage(content),
27
+ );
28
+ }
29
+ return messages;
30
+ };
31
+
32
+ describe('projectAgentContextUsage', () => {
33
+ it('returns a budget snapshot for a branch that fits', async () => {
34
+ const usage = await projectAgentContextUsage({
35
+ agent: agent(100_000),
36
+ messages: branch(1_000, 4),
37
+ tokenCounter: countByChars,
38
+ });
39
+
40
+ expect(usage).not.toBeNull();
41
+ expect(usage!.breakdown.maxContextTokens).toBe(100_000);
42
+ expect(usage!.breakdown.messageCount).toBe(4);
43
+ expect(usage!.remainingContextTokens).toBeGreaterThan(0);
44
+ expect(usage!.agentId).toBe('test-agent');
45
+ });
46
+
47
+ it('prunes when the branch exceeds the window', async () => {
48
+ const usage = await projectAgentContextUsage({
49
+ agent: agent(3_000),
50
+ messages: branch(1_000, 6),
51
+ tokenCounter: countByChars,
52
+ });
53
+
54
+ expect(usage).not.toBeNull();
55
+ expect(usage!.breakdown.messageCount).toBeGreaterThan(0);
56
+ expect(usage!.breakdown.messageCount).toBeLessThan(6);
57
+ });
58
+
59
+ it('returns null without a context window', async () => {
60
+ const noWindow: t.AgentInputs = {
61
+ agentId: 'test-agent',
62
+ provider: Providers.OPENAI,
63
+ instructions: 'sys',
64
+ };
65
+ const usage = await projectAgentContextUsage({
66
+ agent: noWindow,
67
+ messages: branch(100, 2),
68
+ tokenCounter: countByChars,
69
+ });
70
+
71
+ expect(usage).toBeNull();
72
+ });
73
+ });
@@ -0,0 +1,46 @@
1
+ import type { BaseMessage } from '@langchain/core/messages';
2
+ import type * as t from '@/types';
3
+ import { AgentContext } from './AgentContext';
4
+
5
+ export interface ProjectAgentContextUsageParams {
6
+ /** Same `AgentInputs` a run is built from (instructions, tools, model, window). */
7
+ agent: t.AgentInputs;
8
+ /** Branch messages to project, in send order (no leading system message). */
9
+ messages: BaseMessage[];
10
+ tokenCounter: t.TokenCounter;
11
+ /** Per-message counts aligned to `messages` (e.g. from `formatAgentMessages`).
12
+ * When omitted, counts are recounted via `tokenCounter`. */
13
+ indexTokenCountMap?: Record<string, number>;
14
+ /** Provider-calibrated ratio from a prior snapshot, applied as a static seed. */
15
+ calibrationRatio?: number;
16
+ runId?: string;
17
+ agentId?: string;
18
+ }
19
+
20
+ /**
21
+ * Projects a pre-send context-usage snapshot for a branch under an agent config
22
+ * WITHOUT invoking the model — the host-side (page-load / branch-switch /
23
+ * window-switch) counterpart to the live `ON_CONTEXT_USAGE` event. Builds a
24
+ * throwaway `AgentContext` from the same `AgentInputs` a run uses, awaits its
25
+ * instruction/tool token accounting, then runs the shared pruner + budget math
26
+ * via `AgentContext.projectContextUsage` (which never mutates the supplied
27
+ * messages). Returns null when the config has no tokenizer or context window.
28
+ */
29
+ export async function projectAgentContextUsage({
30
+ agent,
31
+ messages,
32
+ tokenCounter,
33
+ indexTokenCountMap,
34
+ calibrationRatio,
35
+ runId,
36
+ agentId,
37
+ }: ProjectAgentContextUsageParams): Promise<t.ContextUsageEvent | null> {
38
+ const context = AgentContext.fromConfig(agent, tokenCounter, indexTokenCountMap);
39
+ await context.tokenCalculationPromise;
40
+ return context.projectContextUsage(messages, {
41
+ runId,
42
+ agentId: agentId ?? agent.agentId,
43
+ calibrationRatio,
44
+ indexTokenCountMap,
45
+ });
46
+ }
@@ -25,6 +25,7 @@ import {
25
25
  formatContentStrings,
26
26
  isLegacyConvertible,
27
27
  createPruneMessages,
28
+ syncBudgetDerivedFields,
28
29
  addCacheControl,
29
30
  getMessageId,
30
31
  makeIsDeferred,
@@ -111,35 +112,6 @@ function trailingMutationStart(messages: BaseMessage[]): number {
111
112
  return Math.max(0, Math.min(index, messages.length - 2));
112
113
  }
113
114
 
114
- /**
115
- * Re-derives the breakdown fields coupled to the calibrated budget math so
116
- * the snapshot stays internally consistent: the aggregate
117
- * `instructionTokens`/`availableForMessages` reflect the pruner's effective
118
- * (calibrated) overhead — component fields remain local estimates — and
119
- * `messageTokens` mirrors `contextBudget - instructions - remaining`.
120
- */
121
- function syncBudgetDerivedFields(usage: t.ContextUsageEvent): void {
122
- const { breakdown, contextBudget, effectiveInstructionTokens } = usage;
123
- if (effectiveInstructionTokens == null) {
124
- return;
125
- }
126
- breakdown.instructionTokens = effectiveInstructionTokens;
127
- if (contextBudget == null) {
128
- return;
129
- }
130
- breakdown.availableForMessages = Math.max(
131
- 0,
132
- contextBudget - effectiveInstructionTokens
133
- );
134
- if (usage.remainingContextTokens == null) {
135
- return;
136
- }
137
- breakdown.messageTokens = Math.max(
138
- 0,
139
- contextBudget - effectiveInstructionTokens - usage.remainingContextTokens
140
- );
141
- }
142
-
143
115
  type ReasoningKey = 'reasoning_content' | 'reasoning';
144
116
  type ReasoningSummary = { summary?: Array<{ text?: string }> };
145
117
  type ReasoningDetail = { type?: string; text?: string };
package/src/index.ts CHANGED
@@ -8,6 +8,9 @@ export * from './messages';
8
8
  /* Graphs */
9
9
  export * from './graphs';
10
10
 
11
+ /* Context-usage projection (host-side pre-send snapshot) */
12
+ export * from './agents/projection';
13
+
11
14
  /* Summarization */
12
15
  export * from './summarization';
13
16