@librechat/agents 3.1.77-dev.1 → 3.1.78-dev.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. package/dist/cjs/common/enum.cjs +54 -0
  2. package/dist/cjs/common/enum.cjs.map +1 -1
  3. package/dist/cjs/graphs/Graph.cjs +148 -4
  4. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  5. package/dist/cjs/hooks/createWorkspacePolicyHook.cjs +291 -0
  6. package/dist/cjs/hooks/createWorkspacePolicyHook.cjs.map +1 -0
  7. package/dist/cjs/llm/openai/index.cjs +317 -1
  8. package/dist/cjs/llm/openai/index.cjs.map +1 -1
  9. package/dist/cjs/main.cjs +90 -0
  10. package/dist/cjs/main.cjs.map +1 -1
  11. package/dist/cjs/messages/anthropicToolCache.cjs +102 -0
  12. package/dist/cjs/messages/anthropicToolCache.cjs.map +1 -0
  13. package/dist/cjs/messages/prune.cjs +27 -0
  14. package/dist/cjs/messages/prune.cjs.map +1 -1
  15. package/dist/cjs/messages/recency.cjs +99 -0
  16. package/dist/cjs/messages/recency.cjs.map +1 -0
  17. package/dist/cjs/run.cjs +30 -0
  18. package/dist/cjs/run.cjs.map +1 -1
  19. package/dist/cjs/summarization/node.cjs +100 -6
  20. package/dist/cjs/summarization/node.cjs.map +1 -1
  21. package/dist/cjs/tools/ToolNode.cjs +635 -23
  22. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  23. package/dist/cjs/tools/local/CompileCheckTool.cjs +227 -0
  24. package/dist/cjs/tools/local/CompileCheckTool.cjs.map +1 -0
  25. package/dist/cjs/tools/local/FileCheckpointer.cjs +90 -0
  26. package/dist/cjs/tools/local/FileCheckpointer.cjs.map +1 -0
  27. package/dist/cjs/tools/local/LocalCodingTools.cjs +1098 -0
  28. package/dist/cjs/tools/local/LocalCodingTools.cjs.map +1 -0
  29. package/dist/cjs/tools/local/LocalExecutionEngine.cjs +1042 -0
  30. package/dist/cjs/tools/local/LocalExecutionEngine.cjs.map +1 -0
  31. package/dist/cjs/tools/local/LocalExecutionTools.cjs +122 -0
  32. package/dist/cjs/tools/local/LocalExecutionTools.cjs.map +1 -0
  33. package/dist/cjs/tools/local/LocalProgrammaticToolCalling.cjs +453 -0
  34. package/dist/cjs/tools/local/LocalProgrammaticToolCalling.cjs.map +1 -0
  35. package/dist/cjs/tools/local/attachments.cjs +183 -0
  36. package/dist/cjs/tools/local/attachments.cjs.map +1 -0
  37. package/dist/cjs/tools/local/bashAst.cjs +129 -0
  38. package/dist/cjs/tools/local/bashAst.cjs.map +1 -0
  39. package/dist/cjs/tools/local/editStrategies.cjs +188 -0
  40. package/dist/cjs/tools/local/editStrategies.cjs.map +1 -0
  41. package/dist/cjs/tools/local/resolveLocalExecutionTools.cjs +141 -0
  42. package/dist/cjs/tools/local/resolveLocalExecutionTools.cjs.map +1 -0
  43. package/dist/cjs/tools/local/syntaxCheck.cjs +182 -0
  44. package/dist/cjs/tools/local/syntaxCheck.cjs.map +1 -0
  45. package/dist/cjs/tools/local/textEncoding.cjs +30 -0
  46. package/dist/cjs/tools/local/textEncoding.cjs.map +1 -0
  47. package/dist/cjs/tools/local/workspaceFS.cjs +51 -0
  48. package/dist/cjs/tools/local/workspaceFS.cjs.map +1 -0
  49. package/dist/cjs/tools/subagent/SubagentExecutor.cjs +1 -0
  50. package/dist/cjs/tools/subagent/SubagentExecutor.cjs.map +1 -1
  51. package/dist/esm/common/enum.mjs +53 -1
  52. package/dist/esm/common/enum.mjs.map +1 -1
  53. package/dist/esm/graphs/Graph.mjs +149 -5
  54. package/dist/esm/graphs/Graph.mjs.map +1 -1
  55. package/dist/esm/hooks/createWorkspacePolicyHook.mjs +289 -0
  56. package/dist/esm/hooks/createWorkspacePolicyHook.mjs.map +1 -0
  57. package/dist/esm/llm/openai/index.mjs +318 -2
  58. package/dist/esm/llm/openai/index.mjs.map +1 -1
  59. package/dist/esm/main.mjs +17 -2
  60. package/dist/esm/main.mjs.map +1 -1
  61. package/dist/esm/messages/anthropicToolCache.mjs +99 -0
  62. package/dist/esm/messages/anthropicToolCache.mjs.map +1 -0
  63. package/dist/esm/messages/prune.mjs +26 -1
  64. package/dist/esm/messages/prune.mjs.map +1 -1
  65. package/dist/esm/messages/recency.mjs +97 -0
  66. package/dist/esm/messages/recency.mjs.map +1 -0
  67. package/dist/esm/run.mjs +30 -0
  68. package/dist/esm/run.mjs.map +1 -1
  69. package/dist/esm/summarization/node.mjs +100 -6
  70. package/dist/esm/summarization/node.mjs.map +1 -1
  71. package/dist/esm/tools/ToolNode.mjs +635 -23
  72. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  73. package/dist/esm/tools/local/CompileCheckTool.mjs +223 -0
  74. package/dist/esm/tools/local/CompileCheckTool.mjs.map +1 -0
  75. package/dist/esm/tools/local/FileCheckpointer.mjs +87 -0
  76. package/dist/esm/tools/local/FileCheckpointer.mjs.map +1 -0
  77. package/dist/esm/tools/local/LocalCodingTools.mjs +1075 -0
  78. package/dist/esm/tools/local/LocalCodingTools.mjs.map +1 -0
  79. package/dist/esm/tools/local/LocalExecutionEngine.mjs +1022 -0
  80. package/dist/esm/tools/local/LocalExecutionEngine.mjs.map +1 -0
  81. package/dist/esm/tools/local/LocalExecutionTools.mjs +117 -0
  82. package/dist/esm/tools/local/LocalExecutionTools.mjs.map +1 -0
  83. package/dist/esm/tools/local/LocalProgrammaticToolCalling.mjs +448 -0
  84. package/dist/esm/tools/local/LocalProgrammaticToolCalling.mjs.map +1 -0
  85. package/dist/esm/tools/local/attachments.mjs +180 -0
  86. package/dist/esm/tools/local/attachments.mjs.map +1 -0
  87. package/dist/esm/tools/local/bashAst.mjs +126 -0
  88. package/dist/esm/tools/local/bashAst.mjs.map +1 -0
  89. package/dist/esm/tools/local/editStrategies.mjs +185 -0
  90. package/dist/esm/tools/local/editStrategies.mjs.map +1 -0
  91. package/dist/esm/tools/local/resolveLocalExecutionTools.mjs +137 -0
  92. package/dist/esm/tools/local/resolveLocalExecutionTools.mjs.map +1 -0
  93. package/dist/esm/tools/local/syntaxCheck.mjs +179 -0
  94. package/dist/esm/tools/local/syntaxCheck.mjs.map +1 -0
  95. package/dist/esm/tools/local/textEncoding.mjs +27 -0
  96. package/dist/esm/tools/local/textEncoding.mjs.map +1 -0
  97. package/dist/esm/tools/local/workspaceFS.mjs +49 -0
  98. package/dist/esm/tools/local/workspaceFS.mjs.map +1 -0
  99. package/dist/esm/tools/subagent/SubagentExecutor.mjs +1 -0
  100. package/dist/esm/tools/subagent/SubagentExecutor.mjs.map +1 -1
  101. package/dist/types/common/enum.d.ts +39 -1
  102. package/dist/types/graphs/Graph.d.ts +34 -0
  103. package/dist/types/hooks/createWorkspacePolicyHook.d.ts +95 -0
  104. package/dist/types/hooks/index.d.ts +2 -0
  105. package/dist/types/index.d.ts +1 -0
  106. package/dist/types/llm/openai/index.d.ts +17 -0
  107. package/dist/types/messages/anthropicToolCache.d.ts +51 -0
  108. package/dist/types/messages/index.d.ts +2 -0
  109. package/dist/types/messages/prune.d.ts +11 -0
  110. package/dist/types/messages/recency.d.ts +64 -0
  111. package/dist/types/run.d.ts +21 -0
  112. package/dist/types/tools/ToolNode.d.ts +145 -2
  113. package/dist/types/tools/local/CompileCheckTool.d.ts +31 -0
  114. package/dist/types/tools/local/FileCheckpointer.d.ts +39 -0
  115. package/dist/types/tools/local/LocalCodingTools.d.ts +57 -0
  116. package/dist/types/tools/local/LocalExecutionEngine.d.ts +149 -0
  117. package/dist/types/tools/local/LocalExecutionTools.d.ts +9 -0
  118. package/dist/types/tools/local/LocalProgrammaticToolCalling.d.ts +21 -0
  119. package/dist/types/tools/local/attachments.d.ts +84 -0
  120. package/dist/types/tools/local/bashAst.d.ts +11 -0
  121. package/dist/types/tools/local/editStrategies.d.ts +28 -0
  122. package/dist/types/tools/local/index.d.ts +12 -0
  123. package/dist/types/tools/local/resolveLocalExecutionTools.d.ts +38 -0
  124. package/dist/types/tools/local/syntaxCheck.d.ts +42 -0
  125. package/dist/types/tools/local/textEncoding.d.ts +21 -0
  126. package/dist/types/tools/local/workspaceFS.d.ts +49 -0
  127. package/dist/types/types/hitl.d.ts +56 -27
  128. package/dist/types/types/run.d.ts +8 -1
  129. package/dist/types/types/summarize.d.ts +30 -0
  130. package/dist/types/types/tools.d.ts +341 -6
  131. package/package.json +21 -2
  132. package/src/common/enum.ts +54 -0
  133. package/src/graphs/Graph.ts +164 -6
  134. package/src/hooks/__tests__/compactHooks.test.ts +38 -2
  135. package/src/hooks/__tests__/createWorkspacePolicyHook.test.ts +393 -0
  136. package/src/hooks/createWorkspacePolicyHook.ts +355 -0
  137. package/src/hooks/index.ts +6 -0
  138. package/src/index.ts +1 -0
  139. package/src/llm/openai/deepseek.test.ts +479 -0
  140. package/src/llm/openai/index.ts +484 -1
  141. package/src/messages/__tests__/anthropicToolCache.test.ts +125 -0
  142. package/src/messages/__tests__/recency.test.ts +267 -0
  143. package/src/messages/anthropicToolCache.ts +116 -0
  144. package/src/messages/index.ts +2 -0
  145. package/src/messages/prune.ts +27 -1
  146. package/src/messages/recency.ts +155 -0
  147. package/src/run.ts +31 -0
  148. package/src/scripts/compare_pi_vs_ours.ts +840 -0
  149. package/src/scripts/local_engine.ts +166 -0
  150. package/src/scripts/local_engine_checkpointer.ts +205 -0
  151. package/src/scripts/local_engine_compile.ts +263 -0
  152. package/src/scripts/local_engine_hooks.ts +226 -0
  153. package/src/scripts/local_engine_image.ts +201 -0
  154. package/src/scripts/local_engine_ptc.ts +151 -0
  155. package/src/scripts/local_engine_workspace.ts +258 -0
  156. package/src/scripts/summarization-recency.ts +462 -0
  157. package/src/specs/prune.test.ts +39 -0
  158. package/src/summarization/__tests__/node.test.ts +499 -3
  159. package/src/summarization/node.ts +124 -7
  160. package/src/tools/ToolNode.ts +769 -20
  161. package/src/tools/__tests__/LocalExecutionTools.test.ts +2647 -0
  162. package/src/tools/__tests__/ProgrammaticToolCalling.test.ts +175 -0
  163. package/src/tools/__tests__/ToolNode.outputReferences.test.ts +114 -0
  164. package/src/tools/__tests__/ToolNode.session.test.ts +84 -0
  165. package/src/tools/__tests__/directToolHITLResumeScope.test.ts +467 -0
  166. package/src/tools/__tests__/directToolHooks.test.ts +411 -0
  167. package/src/tools/__tests__/localToolNames.test.ts +73 -0
  168. package/src/tools/__tests__/workspaceSeam.test.ts +134 -0
  169. package/src/tools/local/CompileCheckTool.ts +278 -0
  170. package/src/tools/local/FileCheckpointer.ts +93 -0
  171. package/src/tools/local/LocalCodingTools.ts +1342 -0
  172. package/src/tools/local/LocalExecutionEngine.ts +1329 -0
  173. package/src/tools/local/LocalExecutionTools.ts +167 -0
  174. package/src/tools/local/LocalProgrammaticToolCalling.ts +594 -0
  175. package/src/tools/local/__tests__/FileCheckpointer.test.ts +120 -0
  176. package/src/tools/local/__tests__/editStrategies.test.ts +134 -0
  177. package/src/tools/local/attachments.ts +251 -0
  178. package/src/tools/local/bashAst.ts +151 -0
  179. package/src/tools/local/editStrategies.ts +188 -0
  180. package/src/tools/local/index.ts +12 -0
  181. package/src/tools/local/resolveLocalExecutionTools.ts +208 -0
  182. package/src/tools/local/syntaxCheck.ts +243 -0
  183. package/src/tools/local/textEncoding.ts +37 -0
  184. package/src/tools/local/workspaceFS.ts +89 -0
  185. package/src/types/hitl.ts +56 -27
  186. package/src/types/run.ts +12 -1
  187. package/src/types/summarize.ts +31 -0
  188. package/src/types/tools.ts +359 -7
@@ -0,0 +1,462 @@
1
+ /* eslint-disable no-console */
2
+ /**
3
+ * Live multi-provider validation for the recency-window summarization
4
+ * change. Verifies two end-to-end behaviors against real APIs:
5
+ *
6
+ * 1. First-turn protection: a single oversized user message does not
7
+ * trigger summarization. Summary events MUST NOT fire. This
8
+ * addresses LibreChat issue #12940.
9
+ *
10
+ * 2. Multi-turn compaction: after enough turns accumulate, the
11
+ * summarizer fires on older content while the most recent two
12
+ * user-led turns are returned in `getRunMessages()` verbatim.
13
+ *
14
+ * IMPORTANT: env loading must happen *before* this module's imports
15
+ * resolve. The Bedrock AWS SDK in particular captures credentials
16
+ * during module init. Run with the dotenv preload + override flag:
17
+ *
18
+ * DOTENV_CONFIG_OVERRIDE=true node -r dotenv/config \
19
+ * --loader ./tsconfig-paths-bootstrap.mjs \
20
+ * --experimental-specifier-resolution=node \
21
+ * ./src/scripts/summarization-recency.ts --provider all
22
+ */
23
+ import { config as loadEnv } from 'dotenv';
24
+ // Override pre-existing env vars (some shells inject empty placeholders).
25
+ // This is a belt-and-suspenders second pass after the -r dotenv/config
26
+ // preload — covers the case where the script is invoked without preload.
27
+ loadEnv({ override: true });
28
+
29
+ // The Bedrock llmConfig requires BEDROCK_AWS_REGION specifically; default it
30
+ // to the standard cross-region-inference region when the user has bedrock
31
+ // credentials but the region knob is commented out.
32
+ if (
33
+ (process.env.BEDROCK_AWS_REGION == null ||
34
+ process.env.BEDROCK_AWS_REGION === '') &&
35
+ process.env.BEDROCK_AWS_ACCESS_KEY_ID != null &&
36
+ process.env.BEDROCK_AWS_ACCESS_KEY_ID !== ''
37
+ ) {
38
+ process.env.BEDROCK_AWS_REGION =
39
+ process.env.AWS_DEFAULT_REGION ?? 'us-east-1';
40
+ }
41
+
42
+ import {
43
+ AIMessage,
44
+ HumanMessage,
45
+ type BaseMessage,
46
+ } from '@langchain/core/messages';
47
+ import yargs from 'yargs';
48
+ import { hideBin } from 'yargs/helpers';
49
+
50
+ import type * as t from '@/types';
51
+ import { GraphEvents, Providers } from '@/common';
52
+ import { Run } from '@/run';
53
+ import { createTokenCounter } from '@/utils/tokens';
54
+ import { getLLMConfig } from '@/utils/llmConfig';
55
+
56
+ interface ProviderEntry {
57
+ name: string;
58
+ provider: Providers;
59
+ envCheck: () => boolean;
60
+ /**
61
+ * Token budget tight enough that ~30K of dummy content overflows on
62
+ * turn 1 but the recency window keeps the message verbatim, then
63
+ * triggers summarization once a 3rd turn arrives.
64
+ */
65
+ maxContextTokens: number;
66
+ /** Optional override for the model field on the agent's llmConfig. */
67
+ modelOverride?: string;
68
+ }
69
+
70
+ const PROVIDERS: ProviderEntry[] = [
71
+ {
72
+ name: 'anthropic',
73
+ provider: Providers.ANTHROPIC,
74
+ envCheck: () =>
75
+ process.env.ANTHROPIC_API_KEY != null &&
76
+ process.env.ANTHROPIC_API_KEY !== '',
77
+ maxContextTokens: 2_000,
78
+ },
79
+ {
80
+ name: Providers.OPENAI, // 'openAI' — must match the llmConfigs key
81
+ provider: Providers.OPENAI,
82
+ envCheck: () =>
83
+ process.env.OPENAI_API_KEY != null && process.env.OPENAI_API_KEY !== '',
84
+ maxContextTokens: 2_000,
85
+ modelOverride: 'gpt-5.4-mini',
86
+ },
87
+ {
88
+ name: 'google',
89
+ provider: Providers.GOOGLE,
90
+ envCheck: () =>
91
+ process.env.GOOGLE_API_KEY != null && process.env.GOOGLE_API_KEY !== '',
92
+ maxContextTokens: 2_000,
93
+ },
94
+ {
95
+ name: 'bedrock',
96
+ provider: Providers.BEDROCK,
97
+ envCheck: () =>
98
+ process.env.BEDROCK_AWS_ACCESS_KEY_ID != null &&
99
+ process.env.BEDROCK_AWS_ACCESS_KEY_ID !== '' &&
100
+ process.env.BEDROCK_AWS_SECRET_ACCESS_KEY != null &&
101
+ process.env.BEDROCK_AWS_SECRET_ACCESS_KEY !== '' &&
102
+ // The Bedrock llmConfig reads BEDROCK_AWS_REGION specifically; if it's
103
+ // missing, the SDK throws "Resolved credential object is not valid".
104
+ process.env.BEDROCK_AWS_REGION != null &&
105
+ process.env.BEDROCK_AWS_REGION !== '',
106
+ maxContextTokens: 2_000,
107
+ },
108
+ {
109
+ name: Providers.OPENROUTER,
110
+ provider: Providers.OPENROUTER,
111
+ envCheck: () =>
112
+ process.env.OPENROUTER_API_KEY != null &&
113
+ process.env.OPENROUTER_API_KEY !== '',
114
+ maxContextTokens: 2_000,
115
+ modelOverride: 'moonshotai/kimi-k2.6',
116
+ },
117
+ {
118
+ name: Providers.DEEPSEEK,
119
+ provider: Providers.DEEPSEEK,
120
+ envCheck: () =>
121
+ process.env.DEEPSEEK_API_KEY != null &&
122
+ process.env.DEEPSEEK_API_KEY !== '',
123
+ maxContextTokens: 2_000,
124
+ modelOverride: 'deepseek-v4-flash',
125
+ },
126
+ ];
127
+
128
+ interface ScenarioSpies {
129
+ onSummarizeStart: Array<unknown>;
130
+ onSummarizeComplete: Array<unknown>;
131
+ }
132
+
133
+ function buildHandlers(spies: ScenarioSpies): Record<string, unknown> {
134
+ return {
135
+ [GraphEvents.ON_SUMMARIZE_START]: {
136
+ handle: (_event: string, data: t.StreamEventData): void => {
137
+ spies.onSummarizeStart.push(data);
138
+ },
139
+ },
140
+ [GraphEvents.ON_SUMMARIZE_COMPLETE]: {
141
+ handle: (_event: string, data: t.StreamEventData): void => {
142
+ spies.onSummarizeComplete.push(data);
143
+ },
144
+ },
145
+ };
146
+ }
147
+
148
+ function newSpies(): ScenarioSpies {
149
+ return { onSummarizeStart: [], onSummarizeComplete: [] };
150
+ }
151
+
152
+ let cachedTokenCounter: t.TokenCounter | undefined;
153
+ async function getTokenCounter(): Promise<t.TokenCounter> {
154
+ if (cachedTokenCounter == null) {
155
+ cachedTokenCounter = await createTokenCounter();
156
+ }
157
+ return cachedTokenCounter;
158
+ }
159
+
160
+ async function createRun({
161
+ entry,
162
+ threadId,
163
+ spies,
164
+ retainTurns,
165
+ }: {
166
+ entry: ProviderEntry;
167
+ threadId: string;
168
+ spies: ScenarioSpies;
169
+ retainTurns?: number;
170
+ }): Promise<Run<t.IState>> {
171
+ const baseConfig = getLLMConfig(entry.name);
172
+ const llmConfig =
173
+ entry.modelOverride != null
174
+ ? { ...baseConfig, model: entry.modelOverride }
175
+ : baseConfig;
176
+ // tokenCounter is required for pruneMessages to be wired up
177
+ // (Graph.ts gates createPruneMessages on it). Without prune, no
178
+ // messagesToRefine, no summarization trigger.
179
+ const tokenCounter = await getTokenCounter();
180
+ return Run.create<t.IState>({
181
+ runId: `recency-${entry.name}-${Date.now()}`,
182
+ graphConfig: {
183
+ type: 'standard',
184
+ llmConfig,
185
+ tools: [],
186
+ instructions:
187
+ 'You are a brief assistant. Reply in 1-2 short sentences. Do not echo or restate the user message.',
188
+ maxContextTokens: entry.maxContextTokens,
189
+ summarizationEnabled: true,
190
+ summarizationConfig: {
191
+ provider: entry.provider,
192
+ maxSummaryTokens: 400,
193
+ ...(retainTurns != null
194
+ ? { retainRecent: { turns: retainTurns } }
195
+ : {}),
196
+ },
197
+ },
198
+ returnContent: false,
199
+ tokenCounter,
200
+ customHandlers: buildHandlers(spies) as never,
201
+ });
202
+ }
203
+
204
+ async function runTurn(
205
+ run: Run<t.IState>,
206
+ history: BaseMessage[],
207
+ text: string,
208
+ threadId: string
209
+ ): Promise<BaseMessage[]> {
210
+ history.push(new HumanMessage(text));
211
+ await run.processStream({ messages: history }, {
212
+ configurable: { thread_id: threadId },
213
+ // Match the recursion limit used by src/specs/summarization.test.ts —
214
+ // some providers re-cycle through agent ↔ summarize a few times
215
+ // before settling and the default of 25 trips them.
216
+ recursionLimit: 80,
217
+ streamMode: 'values',
218
+ version: 'v2',
219
+ } as never);
220
+ const finalMessages = run.getRunMessages();
221
+ if (finalMessages != null) {
222
+ history.push(...finalMessages);
223
+ }
224
+ return history;
225
+ }
226
+
227
+ const PADDING = 'Lorem ipsum dolor sit amet, '.repeat(400);
228
+
229
+ interface ScenarioResult {
230
+ name: string;
231
+ provider: string;
232
+ passed: boolean;
233
+ details: string[];
234
+ }
235
+
236
+ async function scenarioFirstTurnProtection(
237
+ entry: ProviderEntry
238
+ ): Promise<ScenarioResult> {
239
+ const result: ScenarioResult = {
240
+ name: 'first-turn protection (large single user message)',
241
+ provider: entry.name,
242
+ passed: false,
243
+ details: [],
244
+ };
245
+ const threadId = `recency-1-${entry.name}-${Date.now()}`;
246
+ const spies = newSpies();
247
+
248
+ try {
249
+ const run = await createRun({ entry, threadId, spies });
250
+ const history: BaseMessage[] = [];
251
+ // Sized to overflow the configured 2K budget on a single message.
252
+ // Old behavior: summarization fires and replaces the user's payload
253
+ // with a generic summary (LibreChat issue #12940). New behavior:
254
+ // recency window skips the LLM summarization call entirely and the
255
+ // payload is preserved up to the prune step's truncation logic.
256
+ const oversizedMessage =
257
+ `Here is a structured payload I need you to keep verbatim:\n\n` +
258
+ `<payload-MARKER-XYZ123>\n${PADDING}\n</payload-MARKER-XYZ123>\n\n` +
259
+ `Reply OK so we can continue.`;
260
+
261
+ try {
262
+ await runTurn(run, history, oversizedMessage, threadId);
263
+ } catch (turnErr) {
264
+ // A subsequent prune emergency-error ("Message pruning removed all
265
+ // messages") is acceptable: it means the budget is genuinely too
266
+ // tight, surfacing as a clear error rather than a silent
267
+ // summarization that destroys the user's payload. The signal we
268
+ // care about is whether ON_SUMMARIZE_START fired beforehand.
269
+ const msg = turnErr instanceof Error ? turnErr.message : String(turnErr);
270
+ if (msg.includes('empty_messages')) {
271
+ result.details.push(
272
+ 'note: prune surfaced empty_messages error (expected when single message > budget)'
273
+ );
274
+ } else {
275
+ throw turnErr;
276
+ }
277
+ }
278
+
279
+ if (spies.onSummarizeStart.length > 0) {
280
+ result.details.push(
281
+ `FAIL: ON_SUMMARIZE_START fired ${spies.onSummarizeStart.length}x — first user message was destroyed by summarization.`
282
+ );
283
+ } else {
284
+ result.details.push('OK: no ON_SUMMARIZE_START on first turn.');
285
+ }
286
+ if (spies.onSummarizeComplete.length > 0) {
287
+ result.details.push(
288
+ `FAIL: ON_SUMMARIZE_COMPLETE fired ${spies.onSummarizeComplete.length}x.`
289
+ );
290
+ }
291
+
292
+ result.passed =
293
+ spies.onSummarizeStart.length === 0 &&
294
+ spies.onSummarizeComplete.length === 0;
295
+ } catch (err) {
296
+ result.details.push(
297
+ `EXCEPTION: ${err instanceof Error ? err.message : String(err)}`
298
+ );
299
+ }
300
+
301
+ return result;
302
+ }
303
+
304
+ async function scenarioMultiTurnCompaction(
305
+ entry: ProviderEntry
306
+ ): Promise<ScenarioResult> {
307
+ const result: ScenarioResult = {
308
+ name: 'multi-turn compaction preserves the recency tail',
309
+ provider: entry.name,
310
+ passed: false,
311
+ details: [],
312
+ };
313
+ const threadId = `recency-2-${entry.name}-${Date.now()}`;
314
+ const spies = newSpies();
315
+
316
+ try {
317
+ const run = await createRun({ entry, threadId, spies, retainTurns: 2 });
318
+ const history: BaseMessage[] = [];
319
+
320
+ // 4 turns; each padded so that older turns will overflow the
321
+ // configured budget once the conversation has accumulated a few
322
+ // exchanges (~3K chars per turn ≈ 750 tokens × 4 ≈ 3K tokens).
323
+ await runTurn(
324
+ run,
325
+ history,
326
+ `Turn 1. Topic: ALPHA-BEACON. ${PADDING.slice(0, 3000)}\nReply only "noted alpha".`,
327
+ threadId
328
+ );
329
+ await runTurn(
330
+ run,
331
+ history,
332
+ `Turn 2. Topic: BETA-LIGHTHOUSE. ${PADDING.slice(0, 3000)}\nReply only "noted beta".`,
333
+ threadId
334
+ );
335
+ await runTurn(
336
+ run,
337
+ history,
338
+ `Turn 3. Topic: GAMMA-PARSEC. ${PADDING.slice(0, 3000)}\nReply only "noted gamma".`,
339
+ threadId
340
+ );
341
+ await runTurn(
342
+ run,
343
+ history,
344
+ `Turn 4. Final: which topic codenames have I mentioned? Reply with the comma-separated list of codenames you remember.`,
345
+ threadId
346
+ );
347
+
348
+ const startedCount = spies.onSummarizeStart.length;
349
+ const completedCount = spies.onSummarizeComplete.length;
350
+ result.details.push(
351
+ `summarize start=${startedCount}, complete=${completedCount}`
352
+ );
353
+
354
+ if (startedCount === 0) {
355
+ result.details.push(
356
+ 'FAIL: expected at least one summarization to fire across 4 turns at the configured budget.'
357
+ );
358
+ return result;
359
+ }
360
+
361
+ // Inspect the final assistant message for codename recall as a soft signal.
362
+ const lastAi = [...history].reverse().find((m) => m instanceof AIMessage);
363
+ const lastAiText =
364
+ lastAi != null
365
+ ? typeof lastAi.content === 'string'
366
+ ? lastAi.content
367
+ : JSON.stringify(lastAi.content)
368
+ : '';
369
+ result.details.push(
370
+ `final-AI-snippet: ${lastAiText.slice(0, 200).replace(/\s+/g, ' ')}`
371
+ );
372
+
373
+ // The recency window keeps the most recent 2 turns verbatim, so the
374
+ // model must still recall GAMMA and the turn-4 ask. ALPHA/BETA may
375
+ // be remembered from the summary or forgotten — that's allowed.
376
+ const recallsRecent =
377
+ lastAiText.toLowerCase().includes('gamma') ||
378
+ lastAiText.toLowerCase().includes('parsec');
379
+ if (recallsRecent) {
380
+ result.details.push('OK: recent-tail topic (GAMMA-PARSEC) recalled.');
381
+ } else {
382
+ result.details.push(
383
+ 'WARN: recent-tail topic not in final response — could be model wording (not a hard fail).'
384
+ );
385
+ }
386
+
387
+ result.passed = startedCount > 0 && completedCount > 0;
388
+ } catch (err) {
389
+ result.details.push(
390
+ `EXCEPTION: ${err instanceof Error ? err.message : String(err)}`
391
+ );
392
+ }
393
+
394
+ return result;
395
+ }
396
+
397
+ function summarize(results: ScenarioResult[]): boolean {
398
+ console.log('\n========== Summary ==========');
399
+ let allPassed = true;
400
+ for (const r of results) {
401
+ const status = r.passed ? 'PASS' : 'FAIL';
402
+ console.log(`[${status}] ${r.provider}: ${r.name}`);
403
+ for (const d of r.details) {
404
+ console.log(` ${d}`);
405
+ }
406
+ if (!r.passed) {
407
+ allPassed = false;
408
+ }
409
+ }
410
+ console.log('=============================\n');
411
+ return allPassed;
412
+ }
413
+
414
+ async function main(): Promise<void> {
415
+ const argv = await yargs(hideBin(process.argv))
416
+ .option('provider', {
417
+ type: 'string',
418
+ description: 'provider name, or "all" to run every configured provider',
419
+ default: 'all',
420
+ })
421
+ .option('skip-multi', {
422
+ type: 'boolean',
423
+ description:
424
+ 'skip the multi-turn compaction scenario (faster smoke test)',
425
+ default: false,
426
+ })
427
+ .help().argv;
428
+
429
+ const requested = String(argv.provider).toLowerCase();
430
+ const targets =
431
+ requested === 'all'
432
+ ? PROVIDERS
433
+ : PROVIDERS.filter((p) => p.name.toLowerCase() === requested);
434
+
435
+ if (targets.length === 0) {
436
+ console.error(
437
+ `unknown provider "${requested}". available: ${PROVIDERS.map((p) => p.name).join(', ')}, all`
438
+ );
439
+ process.exit(2);
440
+ }
441
+
442
+ const results: ScenarioResult[] = [];
443
+ for (const entry of targets) {
444
+ if (!entry.envCheck()) {
445
+ console.log(`skipping ${entry.name} — credentials not in .env`);
446
+ continue;
447
+ }
448
+ console.log(`\n----- provider: ${entry.name} -----`);
449
+ results.push(await scenarioFirstTurnProtection(entry));
450
+ if (!argv['skip-multi']) {
451
+ results.push(await scenarioMultiTurnCompaction(entry));
452
+ }
453
+ }
454
+
455
+ const ok = summarize(results);
456
+ process.exit(ok ? 0 : 1);
457
+ }
458
+
459
+ main().catch((err) => {
460
+ console.error(err);
461
+ process.exit(1);
462
+ });
@@ -18,6 +18,8 @@ import {
18
18
  preFlightTruncateToolCallInputs,
19
19
  repairOrphanedToolMessages,
20
20
  sanitizeOrphanToolBlocks,
21
+ enforceOriginalContentCap,
22
+ ORIGINAL_CONTENT_MAX_CHARS,
21
23
  createPruneMessages,
22
24
  } from '@/messages/prune';
23
25
  import { getLLMConfig } from '@/utils/llmConfig';
@@ -1534,6 +1536,43 @@ describe('Prune Messages Tests', () => {
1534
1536
  expect(finalMessages?.length).toBeGreaterThan(0);
1535
1537
  });
1536
1538
  });
1539
+
1540
+ describe('enforceOriginalContentCap', () => {
1541
+ it('is a no-op when total chars are below the cap', () => {
1542
+ const map = new Map<number, string>([
1543
+ [0, 'a'.repeat(100)],
1544
+ [1, 'b'.repeat(200)],
1545
+ ]);
1546
+ enforceOriginalContentCap(map);
1547
+ expect(map.size).toBe(2);
1548
+ expect(map.get(0)?.length).toBe(100);
1549
+ expect(map.get(1)?.length).toBe(200);
1550
+ });
1551
+
1552
+ it('evicts oldest entries (by Map insertion order) until under the cap', () => {
1553
+ const map = new Map<number, string>();
1554
+ // Insert 4 entries totaling well over the cap, in insertion order
1555
+ // 0, 1, 2, 3. Each entry is roughly 700_000 chars (>1/3 of cap).
1556
+ const big = 'x'.repeat(700_000);
1557
+ map.set(0, big);
1558
+ map.set(1, big);
1559
+ map.set(2, big);
1560
+ map.set(3, big);
1561
+
1562
+ // 4 * 700_000 = 2_800_000 > 2_000_000 cap. Eviction should drop
1563
+ // the oldest entry (key 0) — leaving 3 * 700_000 = 2_100_000 still
1564
+ // > cap, so key 1 is also dropped — 2 * 700_000 = 1_400_000 ≤ cap.
1565
+ enforceOriginalContentCap(map);
1566
+ expect(map.has(0)).toBe(false);
1567
+ expect(map.has(1)).toBe(false);
1568
+ expect(map.has(2)).toBe(true);
1569
+ expect(map.has(3)).toBe(true);
1570
+ });
1571
+
1572
+ it('exposes the cap as a constant for callers', () => {
1573
+ expect(ORIGINAL_CONTENT_MAX_CHARS).toBe(2_000_000);
1574
+ });
1575
+ });
1537
1576
  });
1538
1577
 
1539
1578
  describe('sanitizeOrphanToolBlocks', () => {