@townco/agent 0.1.50 → 0.1.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -54,16 +54,29 @@ const ToolSchema = z.union([
54
54
  ]);
55
55
  /** Hook configuration schema. */
56
56
  export const HookConfigSchema = z.object({
57
- type: z.enum(["context_size"]),
57
+ type: z.enum(["context_size", "tool_response"]),
58
58
  setting: z
59
- .object({
60
- threshold: z.number().min(0).max(100),
61
- })
59
+ .union([
60
+ // For context_size hooks
61
+ z.object({
62
+ threshold: z.number().min(0).max(100),
63
+ }),
64
+ // For tool_response hooks
65
+ z.object({
66
+ maxContextThreshold: z.number().min(0).max(100).optional(),
67
+ responseTruncationThreshold: z.number().min(0).max(100).optional(),
68
+ }),
69
+ ])
62
70
  .optional(),
63
71
  callback: z.string(),
64
72
  });
65
73
  /** Agent definition schema. */
66
74
  export const AgentDefinitionSchema = z.object({
75
+ /** Human-readable display name for the agent (shown in UI). */
76
+ displayName: z.string().optional(),
77
+ version: z.string().optional(),
78
+ description: z.string().optional(),
79
+ suggestedPrompts: z.array(z.string()).optional(),
67
80
  systemPrompt: z.string().nullable(),
68
81
  model: z.string(),
69
82
  tools: z.array(ToolSchema).optional(),
package/dist/index.js CHANGED
@@ -36,7 +36,7 @@ const exampleAgent = {
36
36
  {
37
37
  type: "context_size",
38
38
  setting: {
39
- threshold: 95,
39
+ threshold: 80,
40
40
  },
41
41
  callback: "compaction_tool",
42
42
  },
@@ -2,6 +2,10 @@ import type { PromptRequest, PromptResponse, SessionNotification } from "@agentc
2
2
  import { z } from "zod";
3
3
  import type { ContentBlock } from "../acp-server/session-storage.js";
4
4
  export declare const zAgentRunnerParams: z.ZodObject<{
5
+ displayName: z.ZodOptional<z.ZodString>;
6
+ version: z.ZodOptional<z.ZodString>;
7
+ description: z.ZodOptional<z.ZodString>;
8
+ suggestedPrompts: z.ZodOptional<z.ZodArray<z.ZodString>>;
5
9
  systemPrompt: z.ZodNullable<z.ZodString>;
6
10
  model: z.ZodString;
7
11
  tools: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodUnion<readonly [z.ZodLiteral<"todo_write">, z.ZodLiteral<"get_weather">, z.ZodLiteral<"web_search">, z.ZodLiteral<"filesystem">]>, z.ZodObject<{
@@ -33,10 +37,14 @@ export declare const zAgentRunnerParams: z.ZodObject<{
33
37
  hooks: z.ZodOptional<z.ZodArray<z.ZodObject<{
34
38
  type: z.ZodEnum<{
35
39
  context_size: "context_size";
40
+ tool_response: "tool_response";
36
41
  }>;
37
- setting: z.ZodOptional<z.ZodObject<{
42
+ setting: z.ZodOptional<z.ZodUnion<readonly [z.ZodObject<{
38
43
  threshold: z.ZodNumber;
39
- }, z.core.$strip>>;
44
+ }, z.core.$strip>, z.ZodObject<{
45
+ maxContextThreshold: z.ZodOptional<z.ZodNumber>;
46
+ responseTruncationThreshold: z.ZodOptional<z.ZodNumber>;
47
+ }, z.core.$strip>]>>;
40
48
  callback: z.ZodString;
41
49
  }, z.core.$strip>>>;
42
50
  }, z.core.$strip>;
@@ -2,6 +2,10 @@ import { z } from "zod";
2
2
  import { HookConfigSchema, McpConfigSchema } from "../definition";
3
3
  import { zToolType } from "./tools";
4
4
  export const zAgentRunnerParams = z.object({
5
+ displayName: z.string().optional(),
6
+ version: z.string().optional(),
7
+ description: z.string().optional(),
8
+ suggestedPrompts: z.array(z.string()).optional(),
5
9
  systemPrompt: z.string().nullable(),
6
10
  model: z.string(),
7
11
  tools: z.array(zToolType).optional(),
@@ -20,4 +20,21 @@ export declare class HookExecutor {
20
20
  * Execute a context_size hook
21
21
  */
22
22
  private executeContextSizeHook;
23
+ /**
24
+ * Execute tool_response hooks when a tool returns output
25
+ */
26
+ executeToolResponseHooks(session: ReadonlySession, currentContextTokens: number, toolResponse: {
27
+ toolCallId: string;
28
+ toolName: string;
29
+ toolInput: Record<string, unknown>;
30
+ rawOutput: Record<string, unknown>;
31
+ outputTokens: number;
32
+ }): Promise<{
33
+ modifiedOutput?: Record<string, unknown>;
34
+ truncationWarning?: string;
35
+ }>;
36
+ /**
37
+ * Execute a single tool_response hook
38
+ */
39
+ private executeToolResponseHook;
23
40
  }
@@ -106,4 +106,70 @@ export class HookExecutor {
106
106
  };
107
107
  }
108
108
  }
109
+ /**
110
+ * Execute tool_response hooks when a tool returns output
111
+ */
112
+ async executeToolResponseHooks(session, currentContextTokens, toolResponse) {
113
+ logger.info(`Executing tool_response hooks - found ${this.hooks.length} hook(s)`, {
114
+ toolCallId: toolResponse.toolCallId,
115
+ toolName: toolResponse.toolName,
116
+ outputTokens: toolResponse.outputTokens,
117
+ });
118
+ for (const hook of this.hooks) {
119
+ if (hook.type === "tool_response") {
120
+ const result = await this.executeToolResponseHook(hook, session, currentContextTokens, toolResponse);
121
+ if (result) {
122
+ return result;
123
+ }
124
+ }
125
+ }
126
+ return {}; // No modifications
127
+ }
128
+ /**
129
+ * Execute a single tool_response hook
130
+ */
131
+ async executeToolResponseHook(hook, session, currentContextTokens, toolResponse) {
132
+ const maxTokens = getModelMaxTokens(this.model);
133
+ try {
134
+ // Load and execute callback
135
+ const callback = await this.loadCallback(hook.callback);
136
+ // Pass hook settings through requestParams
137
+ const sessionWithSettings = {
138
+ ...session,
139
+ requestParams: {
140
+ ...session.requestParams,
141
+ hookSettings: hook.setting,
142
+ },
143
+ };
144
+ const hookContext = {
145
+ session: sessionWithSettings,
146
+ currentTokens: currentContextTokens,
147
+ maxTokens,
148
+ percentage: (currentContextTokens / maxTokens) * 100,
149
+ model: this.model,
150
+ toolResponse,
151
+ };
152
+ const result = await callback(hookContext);
153
+ // Extract modified output and warnings from metadata
154
+ if (result.metadata) {
155
+ const response = {};
156
+ if (result.metadata.modifiedOutput) {
157
+ response.modifiedOutput = result.metadata.modifiedOutput;
158
+ }
159
+ if (result.metadata.truncationWarning) {
160
+ response.truncationWarning = result.metadata
161
+ .truncationWarning;
162
+ }
163
+ return response;
164
+ }
165
+ return null;
166
+ }
167
+ catch (error) {
168
+ logger.error("Tool response hook execution failed", {
169
+ callback: hook.callback,
170
+ error: error instanceof Error ? error.message : String(error),
171
+ });
172
+ return null; // Return original output on error
173
+ }
174
+ }
109
175
  }
@@ -99,7 +99,15 @@ Please provide your summary based on the conversation above, following this stru
99
99
  const summaryEntry = createFullMessageEntry("user", `This session is being continued from a previous conversation that ran out of context. The conversation is summarized below:\n${summaryText}`);
100
100
  // Set compactedUpTo to indicate all messages have been compacted into the summary
101
101
  const lastMessageIndex = messagesToCompact.length - 1;
102
- const newContextEntry = createContextEntry([summaryEntry], undefined, lastMessageIndex, summaryTokens);
102
+ const newContextEntry = createContextEntry([summaryEntry], undefined, lastMessageIndex, {
103
+ // Store summary tokens in userMessagesTokens since the summary is a user message
104
+ systemPromptTokens: 0,
105
+ userMessagesTokens: summaryTokens,
106
+ assistantMessagesTokens: 0,
107
+ toolInputTokens: 0,
108
+ toolResultsTokens: 0,
109
+ totalEstimated: summaryTokens,
110
+ });
103
111
  return {
104
112
  newContextEntry,
105
113
  metadata: {
@@ -0,0 +1,6 @@
1
+ import type { HookCallback } from "../types.js";
2
+ /**
3
+ * Tool response compaction hook - compacts or truncates large tool responses
4
+ * to prevent context overflow
5
+ */
6
+ export declare const toolResponseCompactor: HookCallback;
@@ -0,0 +1,461 @@
1
+ import { ChatAnthropic } from "@langchain/anthropic";
2
+ import { HumanMessage, SystemMessage } from "@langchain/core/messages";
3
+ import { createLogger } from "@townco/core";
4
+ import { countToolResultTokens } from "../../../utils/token-counter.js";
5
+ const logger = createLogger("tool-response-compactor");
6
+ // Haiku 4.5 for compaction (fast and cost-effective)
7
+ const COMPACTION_MODEL = "claude-haiku-4-5-20251001";
8
+ const COMPACTION_MODEL_CONTEXT = 200000; // Haiku context size for calculating truncation limits
9
+ /**
10
+ * Tool response compaction hook - compacts or truncates large tool responses
11
+ * to prevent context overflow
12
+ */
13
+ export const toolResponseCompactor = async (ctx) => {
14
+ // Only process if we have tool response data
15
+ if (!ctx.toolResponse) {
16
+ logger.warn("toolResponseCompactor called without tool response data");
17
+ return { newContextEntry: null };
18
+ }
19
+ const { toolCallId, toolName, toolInput, rawOutput, outputTokens } = ctx.toolResponse;
20
+ // Get settings from hook configuration
21
+ const settings = ctx.session.requestParams.hookSettings;
22
+ const maxContextThreshold = settings?.maxContextThreshold ?? 80;
23
+ const responseTruncationThreshold = settings?.responseTruncationThreshold ?? 30;
24
+ // Calculate actual token limits from percentages
25
+ const maxAllowedTotal = ctx.maxTokens * (maxContextThreshold / 100);
26
+ const availableSpace = maxAllowedTotal - ctx.currentTokens;
27
+ const projectedTotal = ctx.currentTokens + outputTokens;
28
+ const compactionLimit = COMPACTION_MODEL_CONTEXT * (responseTruncationThreshold / 100);
29
+ logger.info("Tool response compaction hook triggered", {
30
+ toolCallId,
31
+ toolName,
32
+ outputTokens,
33
+ currentContext: ctx.currentTokens,
34
+ maxAllowedTotal,
35
+ availableSpace,
36
+ projectedTotal,
37
+ compactionLimit,
38
+ settings,
39
+ });
40
+ // Case 0: Small response, no action needed
41
+ if (projectedTotal < maxAllowedTotal) {
42
+ logger.info("Tool response fits within threshold, no compaction needed");
43
+ return {
44
+ newContextEntry: null,
45
+ metadata: {
46
+ action: "none",
47
+ originalTokens: outputTokens,
48
+ finalTokens: outputTokens,
49
+ },
50
+ };
51
+ }
52
+ // Response would exceed threshold, need to compact or truncate
53
+ // Determine target size: fit within available space, but cap at compactionLimit for truncation
54
+ const targetSize = Math.min(availableSpace, compactionLimit);
55
+ // Case 2: Huge response, must truncate (too large for LLM compaction)
56
+ if (outputTokens >= compactionLimit) {
57
+ logger.warn("Tool response exceeds compaction capacity, truncating", {
58
+ outputTokens,
59
+ compactionLimit,
60
+ targetSize,
61
+ availableSpace,
62
+ });
63
+ const truncated = truncateToolResponse(rawOutput, targetSize);
64
+ const finalTokens = countToolResultTokens(truncated);
65
+ // Verify truncation stayed within boundaries
66
+ if (finalTokens > targetSize) {
67
+ logger.error("Truncation exceeded target size - this should not happen!", {
68
+ finalTokens,
69
+ targetSize,
70
+ excess: finalTokens - targetSize,
71
+ });
72
+ // Try more aggressive truncation (70% of target as emergency measure)
73
+ const emergencySize = Math.floor(targetSize * 0.7);
74
+ const emergencyTruncated = truncateToolResponse(rawOutput, emergencySize);
75
+ let emergencyTokens = countToolResultTokens(emergencyTruncated);
76
+ // Final safety check - if emergency truncation STILL exceeded target, use ultra-conservative fallback
77
+ if (emergencyTokens > targetSize) {
78
+ logger.error("Emergency truncation STILL exceeded target - using ultra-conservative fallback", {
79
+ emergencyTokens,
80
+ targetSize,
81
+ emergencySize,
82
+ });
83
+ // Ultra-conservative: just return a simple error structure with the raw data sliced to 50% of target
84
+ const ultraConservativeSize = Math.floor(targetSize * 0.5);
85
+ return {
86
+ newContextEntry: null,
87
+ metadata: {
88
+ action: "truncated",
89
+ originalTokens: outputTokens,
90
+ finalTokens: ultraConservativeSize, // Conservative estimate
91
+ modifiedOutput: {
92
+ _truncation_error: "Tool response was too large and could not be reliably truncated",
93
+ _original_token_count: outputTokens,
94
+ _target_token_count: targetSize,
95
+ _partial_data: JSON.stringify(rawOutput).slice(0, ultraConservativeSize * 3),
96
+ },
97
+ truncationWarning: `Tool response was severely truncated from ${outputTokens.toLocaleString()} to ~${ultraConservativeSize.toLocaleString()} tokens (emergency truncation failed - data may be incomplete)`,
98
+ },
99
+ };
100
+ }
101
+ return {
102
+ newContextEntry: null,
103
+ metadata: {
104
+ action: "truncated",
105
+ originalTokens: outputTokens,
106
+ finalTokens: emergencyTokens,
107
+ modifiedOutput: emergencyTruncated,
108
+ truncationWarning: `Tool response was aggressively truncated from ${outputTokens.toLocaleString()} to ${emergencyTokens.toLocaleString()} tokens to fit within context limit (emergency truncation)`,
109
+ },
110
+ };
111
+ }
112
+ return {
113
+ newContextEntry: null,
114
+ metadata: {
115
+ action: "truncated",
116
+ originalTokens: outputTokens,
117
+ finalTokens,
118
+ modifiedOutput: truncated,
119
+ truncationWarning: `Tool response was truncated from ${outputTokens.toLocaleString()} to ${finalTokens.toLocaleString()} tokens to fit within context limit (available space: ${availableSpace.toLocaleString()} tokens)`,
120
+ },
121
+ };
122
+ }
123
+ // Case 1: Medium response, intelligent compaction
124
+ logger.info("Tool response requires intelligent compaction", {
125
+ outputTokens,
126
+ targetSize,
127
+ availableSpace,
128
+ compactionLimit,
129
+ });
130
+ try {
131
+ // Build conversation context (last 5 messages)
132
+ const recentMessages = ctx.session.messages.slice(-5);
133
+ const conversationContext = recentMessages
134
+ .map((msg) => {
135
+ const text = msg.content
136
+ .filter((b) => b.type === "text")
137
+ .map((b) => b.text)
138
+ .join("\n");
139
+ return `${msg.role}: ${text}`;
140
+ })
141
+ .join("\n\n");
142
+ const compacted = await compactWithLLM(rawOutput, toolName, toolInput, conversationContext, targetSize);
143
+ let finalTokens = countToolResultTokens(compacted);
144
+ // Verify compaction stayed within boundaries
145
+ if (finalTokens > targetSize) {
146
+ logger.warn("LLM compaction exceeded target, falling back to truncation", {
147
+ finalTokens,
148
+ targetSize,
149
+ excess: finalTokens - targetSize,
150
+ });
151
+ // Fallback to truncation
152
+ const truncated = truncateToolResponse(compacted, targetSize);
153
+ finalTokens = countToolResultTokens(truncated);
154
+ return {
155
+ newContextEntry: null,
156
+ metadata: {
157
+ action: "compacted_then_truncated",
158
+ originalTokens: outputTokens,
159
+ finalTokens,
160
+ tokensSaved: outputTokens - finalTokens,
161
+ modifiedOutput: truncated,
162
+ truncationWarning: `Tool response was compacted then truncated from ${outputTokens.toLocaleString()} to ${finalTokens.toLocaleString()} tokens to fit within context limit`,
163
+ },
164
+ };
165
+ }
166
+ logger.info("Successfully compacted tool response", {
167
+ originalTokens: outputTokens,
168
+ finalTokens,
169
+ targetSize,
170
+ tokensSaved: outputTokens - finalTokens,
171
+ });
172
+ return {
173
+ newContextEntry: null,
174
+ metadata: {
175
+ action: "compacted",
176
+ originalTokens: outputTokens,
177
+ finalTokens,
178
+ tokensSaved: outputTokens - finalTokens,
179
+ modifiedOutput: compacted,
180
+ },
181
+ };
182
+ }
183
+ catch (error) {
184
+ logger.error("Compaction failed, falling back to truncation", {
185
+ error: error instanceof Error ? error.message : String(error),
186
+ });
187
+ // Fallback to truncation with the same target size
188
+ const truncated = truncateToolResponse(rawOutput, targetSize);
189
+ let finalTokens = countToolResultTokens(truncated);
190
+ // Verify truncation stayed within boundaries
191
+ if (finalTokens > targetSize) {
192
+ logger.error("Fallback truncation exceeded target, using emergency truncation", {
193
+ finalTokens,
194
+ targetSize,
195
+ });
196
+ const emergencySize = Math.floor(targetSize * 0.7);
197
+ const emergencyTruncated = truncateToolResponse(rawOutput, emergencySize);
198
+ finalTokens = countToolResultTokens(emergencyTruncated);
199
+ // Final safety check
200
+ if (finalTokens > targetSize) {
201
+ logger.error("Emergency truncation STILL exceeded target - using ultra-conservative fallback");
202
+ const ultraConservativeSize = Math.floor(targetSize * 0.5);
203
+ return {
204
+ newContextEntry: null,
205
+ metadata: {
206
+ action: "truncated",
207
+ originalTokens: outputTokens,
208
+ finalTokens: ultraConservativeSize,
209
+ modifiedOutput: {
210
+ _truncation_error: "Tool response was too large and could not be reliably truncated (compaction failed)",
211
+ _original_token_count: outputTokens,
212
+ _target_token_count: targetSize,
213
+ _partial_data: JSON.stringify(rawOutput).slice(0, ultraConservativeSize * 3),
214
+ },
215
+ truncationWarning: `Tool response was severely truncated from ${outputTokens.toLocaleString()} to ~${ultraConservativeSize.toLocaleString()} tokens (compaction+emergency truncation failed)`,
216
+ },
217
+ };
218
+ }
219
+ return {
220
+ newContextEntry: null,
221
+ metadata: {
222
+ action: "truncated",
223
+ originalTokens: outputTokens,
224
+ finalTokens,
225
+ modifiedOutput: emergencyTruncated,
226
+ truncationWarning: `Tool response was truncated from ${outputTokens.toLocaleString()} to ${finalTokens.toLocaleString()} tokens (compaction failed, emergency truncation applied)`,
227
+ },
228
+ };
229
+ }
230
+ return {
231
+ newContextEntry: null,
232
+ metadata: {
233
+ action: "truncated",
234
+ originalTokens: outputTokens,
235
+ finalTokens,
236
+ modifiedOutput: truncated,
237
+ truncationWarning: `Tool response was truncated from ${outputTokens.toLocaleString()} to ${finalTokens.toLocaleString()} tokens (compaction failed)`,
238
+ },
239
+ };
240
+ }
241
+ };
242
+ /**
243
+ * Recursive LLM compaction with adaptive retries
244
+ */
245
+ async function compactWithLLM(rawOutput, toolName, toolInput, conversationContext, targetTokens) {
246
+ const model = new ChatAnthropic({
247
+ model: COMPACTION_MODEL,
248
+ temperature: 0,
249
+ });
250
+ // Step 1: Understand what we're looking for (only need to do this once)
251
+ const analysisPrompt = `You are helping to manage context size in an agent conversation.
252
+
253
+ A tool was just called with these parameters:
254
+ Tool: ${toolName}
255
+ Input: ${JSON.stringify(toolInput, null, 2)}
256
+
257
+ Recent conversation context:
258
+ ${conversationContext}
259
+
260
+ Based on the tool input and conversation context, what key information is the user looking for from this tool response?
261
+
262
+ Provide a concise list (3-5 bullet points) of the most important elements to extract.`;
263
+ const analysisResponse = await model.invoke([
264
+ new SystemMessage("You are a helpful assistant analyzing information needs."),
265
+ new HumanMessage(analysisPrompt),
266
+ ]);
267
+ const keyRequirements = typeof analysisResponse.content === "string"
268
+ ? analysisResponse.content
269
+ : "Extract relevant information";
270
+ logger.info("Identified key requirements for compaction", {
271
+ requirements: keyRequirements.substring(0, 200),
272
+ });
273
+ // Step 2: Recursively compact until we meet the target
274
+ let currentData = rawOutput;
275
+ let currentTokens = countToolResultTokens(rawOutput);
276
+ const maxAttempts = 4;
277
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
278
+ const reductionNeeded = Math.round(((currentTokens - targetTokens) / currentTokens) * 100);
279
+ // Build the compaction prompt based on whether this is first attempt or retry
280
+ let compactionPrompt;
281
+ if (attempt === 0) {
282
+ // First attempt - compact from original
283
+ compactionPrompt = `You are helping to compact a large tool response to save context space.
284
+
285
+ Key information needed:
286
+ ${keyRequirements}
287
+
288
+ Tool response to compact (JSON):
289
+ ${JSON.stringify(currentData, null, 2)}
290
+
291
+ Current size: ${currentTokens.toLocaleString()} tokens
292
+ Target size: ${targetTokens.toLocaleString()} tokens (reduce by ${reductionNeeded}%)
293
+
294
+ Your task: Create a compacted version that:
295
+ 1. Retains all information relevant to the key requirements above
296
+ 2. Removes or summarizes less relevant details
297
+ 3. Maintains the same JSON structure where possible
298
+ 4. Reduces the size to ${targetTokens.toLocaleString()} tokens or less
299
+ 5. Be aggressive in removing unnecessary data
300
+
301
+ Return ONLY valid JSON (no explanation text).`;
302
+ }
303
+ else {
304
+ // Retry - need to compact further
305
+ compactionPrompt = `Your previous compaction was good but still too large.
306
+
307
+ Key information needed:
308
+ ${keyRequirements}
309
+
310
+ Previous compaction result (JSON):
311
+ ${JSON.stringify(currentData, null, 2)}
312
+
313
+ Current size: ${currentTokens.toLocaleString()} tokens
314
+ Target size: ${targetTokens.toLocaleString()} tokens
315
+ You need to reduce by another ${reductionNeeded}%
316
+
317
+ Your task: Further compact this data by:
318
+ 1. Being MORE aggressive in removing unnecessary details
319
+ 2. Summarizing verbose content more concisely
320
+ 3. Removing any redundant information
321
+ 4. Keeping ONLY the most essential data related to the key requirements
322
+ 5. Reduce to ${targetTokens.toLocaleString()} tokens or less
323
+
324
+ Return ONLY valid JSON (no explanation text).`;
325
+ }
326
+ const compactionResponse = await model.invoke([
327
+ new SystemMessage("You are a helpful assistant compacting data."),
328
+ new HumanMessage(compactionPrompt),
329
+ ]);
330
+ // Extract and parse JSON
331
+ const responseText = typeof compactionResponse.content === "string"
332
+ ? compactionResponse.content
333
+ : JSON.stringify(compactionResponse.content);
334
+ const jsonMatch = responseText.match(/```(?:json)?\n([\s\S]*?)\n```/) || [
335
+ null,
336
+ responseText,
337
+ ];
338
+ const jsonText = jsonMatch[1] || responseText;
339
+ const compacted = JSON.parse(jsonText.trim());
340
+ const compactedTokens = countToolResultTokens(compacted);
341
+ logger.info(`LLM compaction attempt ${attempt + 1}/${maxAttempts}`, {
342
+ currentTokens,
343
+ compactedTokens,
344
+ targetTokens,
345
+ reductionAchieved: currentTokens - compactedTokens,
346
+ });
347
+ // Check if we've met the target
348
+ if (compactedTokens <= targetTokens) {
349
+ logger.info("LLM compaction succeeded", {
350
+ attempts: attempt + 1,
351
+ originalTokens: countToolResultTokens(rawOutput),
352
+ finalTokens: compactedTokens,
353
+ targetTokens,
354
+ });
355
+ return compacted;
356
+ }
357
+ // If we're within 5% of target, accept it (close enough)
358
+ if (compactedTokens <= targetTokens * 1.05) {
359
+ logger.info("LLM compaction close enough to target", {
360
+ attempts: attempt + 1,
361
+ finalTokens: compactedTokens,
362
+ targetTokens,
363
+ overshoot: compactedTokens - targetTokens,
364
+ });
365
+ return compacted;
366
+ }
367
+ // Still too large - prepare for another attempt
368
+ currentData = compacted;
369
+ currentTokens = compactedTokens;
370
+ }
371
+ // If we exhausted all attempts, return the last result anyway
372
+ logger.warn("LLM compaction exhausted attempts but did not meet target", {
373
+ finalTokens: currentTokens,
374
+ targetTokens,
375
+ overshoot: currentTokens - targetTokens,
376
+ });
377
+ return currentData;
378
+ }
379
+ /**
380
+ * Truncate tool response to target token count
381
+ * Uses iterative approach to ensure we stay under the target
382
+ */
383
+ function truncateToolResponse(rawOutput, targetTokens) {
384
+ const currentTokens = countToolResultTokens(rawOutput);
385
+ if (currentTokens <= targetTokens) {
386
+ return rawOutput; // Already within limit
387
+ }
388
+ const outputString = JSON.stringify(rawOutput);
389
+ // Start with 70% of target to leave significant room for closing braces and metadata
390
+ let ratio = 0.7;
391
+ let lastResult = null;
392
+ // Iteratively truncate until we meet the target
393
+ for (let attempt = 0; attempt < 15; attempt++) {
394
+ // Calculate character limit based on ratio
395
+ const targetChars = Math.floor((targetTokens * ratio * outputString.length) / currentTokens);
396
+ // Truncate the JSON string
397
+ let truncated = outputString.slice(0, targetChars);
398
+ // Try to close any open JSON structures
399
+ const openBraces = (truncated.match(/{/g) || []).length;
400
+ const closeBraces = (truncated.match(/}/g) || []).length;
401
+ const openBrackets = (truncated.match(/\[/g) || []).length;
402
+ const closeBrackets = (truncated.match(/\]/g) || []).length;
403
+ truncated += "}".repeat(Math.max(0, openBraces - closeBraces));
404
+ truncated += "]".repeat(Math.max(0, openBrackets - closeBrackets));
405
+ try {
406
+ // Try to parse as valid JSON
407
+ const parsed = JSON.parse(truncated);
408
+ const parsedTokens = countToolResultTokens(parsed);
409
+ // Store the result
410
+ lastResult = { parsed, tokens: parsedTokens };
411
+ if (parsedTokens <= targetTokens) {
412
+ // Success! Add truncation notice
413
+ return {
414
+ ...parsed,
415
+ _truncation_notice: "... [TRUNCATED - response exceeded size limit]",
416
+ _original_token_count: currentTokens,
417
+ _truncated_token_count: parsedTokens,
418
+ };
419
+ }
420
+ // Still too large - calculate how much we need to reduce
421
+ // If we overshot, reduce ratio proportionally to how much we exceeded
422
+ const overshootRatio = parsedTokens / targetTokens; // e.g., 1.03 if we're 3% over
423
+ ratio = (ratio / overshootRatio) * 0.95; // Reduce by overshoot amount plus 5% safety margin
424
+ logger.debug("Truncation attempt resulted in overshoot, retrying", {
425
+ attempt,
426
+ targetTokens,
427
+ parsedTokens,
428
+ overshootRatio,
429
+ newRatio: ratio,
430
+ });
431
+ }
432
+ catch {
433
+ // JSON parse failed, try more aggressive truncation
434
+ ratio *= 0.85;
435
+ }
436
+ }
437
+ // If we exhausted all attempts, return the last successful parse (if any)
438
+ // or a very conservative fallback
439
+ if (lastResult && lastResult.tokens <= targetTokens * 1.1) {
440
+ // Within 10% of target - good enough
441
+ logger.warn("Truncation reached attempt limit but result is close enough", {
442
+ targetTokens,
443
+ actualTokens: lastResult.tokens,
444
+ });
445
+ return {
446
+ ...lastResult.parsed,
447
+ _truncation_notice: "... [TRUNCATED - response exceeded size limit]",
448
+ _original_token_count: currentTokens,
449
+ _truncated_token_count: lastResult.tokens,
450
+ };
451
+ }
452
+ // If all attempts failed, return a simple truncated structure
453
+ const safeChars = Math.floor(targetTokens * 3); // Very conservative
454
+ return {
455
+ truncated: true,
456
+ originalSize: currentTokens,
457
+ targetSize: targetTokens,
458
+ content: outputString.slice(0, safeChars),
459
+ warning: "Response was truncated due to size constraints (JSON parsing failed)",
460
+ };
461
+ }
@@ -1,10 +1,12 @@
1
1
  import { compactionTool } from "./predefined/compaction-tool";
2
+ import { toolResponseCompactor } from "./predefined/tool-response-compactor";
2
3
  /**
3
4
  * Registry of predefined hook callbacks
4
5
  * Maps callback names to their implementations
5
6
  */
6
7
  export const HOOK_REGISTRY = {
7
8
  compaction_tool: compactionTool,
9
+ tool_response_compactor: toolResponseCompactor,
8
10
  };
9
11
  /**
10
12
  * Check if a callback name is a predefined hook