@townco/agent 0.1.50 → 0.1.51
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/acp-server/adapter.d.ts +10 -0
- package/dist/acp-server/adapter.js +287 -80
- package/dist/acp-server/http.js +8 -1
- package/dist/acp-server/session-storage.d.ts +17 -3
- package/dist/acp-server/session-storage.js +9 -0
- package/dist/definition/index.d.ts +16 -4
- package/dist/definition/index.js +17 -4
- package/dist/index.js +1 -1
- package/dist/runner/agent-runner.d.ts +10 -2
- package/dist/runner/agent-runner.js +4 -0
- package/dist/runner/hooks/executor.d.ts +17 -0
- package/dist/runner/hooks/executor.js +66 -0
- package/dist/runner/hooks/predefined/compaction-tool.js +9 -1
- package/dist/runner/hooks/predefined/tool-response-compactor.d.ts +6 -0
- package/dist/runner/hooks/predefined/tool-response-compactor.js +461 -0
- package/dist/runner/hooks/registry.js +2 -0
- package/dist/runner/hooks/types.d.ts +39 -3
- package/dist/runner/hooks/types.js +9 -4
- package/dist/runner/langchain/index.js +95 -76
- package/dist/scaffold/link-local.d.ts +1 -0
- package/dist/scaffold/link-local.js +54 -0
- package/dist/scaffold/project-scaffold.js +1 -0
- package/dist/templates/index.d.ts +7 -0
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/dist/utils/context-size-calculator.d.ts +29 -0
- package/dist/utils/context-size-calculator.js +78 -0
- package/dist/utils/index.d.ts +2 -0
- package/dist/utils/index.js +2 -0
- package/dist/utils/token-counter.d.ts +19 -0
- package/dist/utils/token-counter.js +44 -0
- package/index.ts +1 -1
- package/package.json +7 -6
- package/templates/index.ts +18 -6
package/dist/definition/index.js
CHANGED
|
@@ -54,16 +54,29 @@ const ToolSchema = z.union([
|
|
|
54
54
|
]);
|
|
55
55
|
/** Hook configuration schema. */
|
|
56
56
|
export const HookConfigSchema = z.object({
|
|
57
|
-
type: z.enum(["context_size"]),
|
|
57
|
+
type: z.enum(["context_size", "tool_response"]),
|
|
58
58
|
setting: z
|
|
59
|
-
.
|
|
60
|
-
|
|
61
|
-
|
|
59
|
+
.union([
|
|
60
|
+
// For context_size hooks
|
|
61
|
+
z.object({
|
|
62
|
+
threshold: z.number().min(0).max(100),
|
|
63
|
+
}),
|
|
64
|
+
// For tool_response hooks
|
|
65
|
+
z.object({
|
|
66
|
+
maxContextThreshold: z.number().min(0).max(100).optional(),
|
|
67
|
+
responseTruncationThreshold: z.number().min(0).max(100).optional(),
|
|
68
|
+
}),
|
|
69
|
+
])
|
|
62
70
|
.optional(),
|
|
63
71
|
callback: z.string(),
|
|
64
72
|
});
|
|
65
73
|
/** Agent definition schema. */
|
|
66
74
|
export const AgentDefinitionSchema = z.object({
|
|
75
|
+
/** Human-readable display name for the agent (shown in UI). */
|
|
76
|
+
displayName: z.string().optional(),
|
|
77
|
+
version: z.string().optional(),
|
|
78
|
+
description: z.string().optional(),
|
|
79
|
+
suggestedPrompts: z.array(z.string()).optional(),
|
|
67
80
|
systemPrompt: z.string().nullable(),
|
|
68
81
|
model: z.string(),
|
|
69
82
|
tools: z.array(ToolSchema).optional(),
|
package/dist/index.js
CHANGED
|
@@ -2,6 +2,10 @@ import type { PromptRequest, PromptResponse, SessionNotification } from "@agentc
|
|
|
2
2
|
import { z } from "zod";
|
|
3
3
|
import type { ContentBlock } from "../acp-server/session-storage.js";
|
|
4
4
|
export declare const zAgentRunnerParams: z.ZodObject<{
|
|
5
|
+
displayName: z.ZodOptional<z.ZodString>;
|
|
6
|
+
version: z.ZodOptional<z.ZodString>;
|
|
7
|
+
description: z.ZodOptional<z.ZodString>;
|
|
8
|
+
suggestedPrompts: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
5
9
|
systemPrompt: z.ZodNullable<z.ZodString>;
|
|
6
10
|
model: z.ZodString;
|
|
7
11
|
tools: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodUnion<readonly [z.ZodLiteral<"todo_write">, z.ZodLiteral<"get_weather">, z.ZodLiteral<"web_search">, z.ZodLiteral<"filesystem">]>, z.ZodObject<{
|
|
@@ -33,10 +37,14 @@ export declare const zAgentRunnerParams: z.ZodObject<{
|
|
|
33
37
|
hooks: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
34
38
|
type: z.ZodEnum<{
|
|
35
39
|
context_size: "context_size";
|
|
40
|
+
tool_response: "tool_response";
|
|
36
41
|
}>;
|
|
37
|
-
setting: z.ZodOptional<z.ZodObject<{
|
|
42
|
+
setting: z.ZodOptional<z.ZodUnion<readonly [z.ZodObject<{
|
|
38
43
|
threshold: z.ZodNumber;
|
|
39
|
-
}, z.core.$strip
|
|
44
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
45
|
+
maxContextThreshold: z.ZodOptional<z.ZodNumber>;
|
|
46
|
+
responseTruncationThreshold: z.ZodOptional<z.ZodNumber>;
|
|
47
|
+
}, z.core.$strip>]>>;
|
|
40
48
|
callback: z.ZodString;
|
|
41
49
|
}, z.core.$strip>>>;
|
|
42
50
|
}, z.core.$strip>;
|
|
@@ -2,6 +2,10 @@ import { z } from "zod";
|
|
|
2
2
|
import { HookConfigSchema, McpConfigSchema } from "../definition";
|
|
3
3
|
import { zToolType } from "./tools";
|
|
4
4
|
export const zAgentRunnerParams = z.object({
|
|
5
|
+
displayName: z.string().optional(),
|
|
6
|
+
version: z.string().optional(),
|
|
7
|
+
description: z.string().optional(),
|
|
8
|
+
suggestedPrompts: z.array(z.string()).optional(),
|
|
5
9
|
systemPrompt: z.string().nullable(),
|
|
6
10
|
model: z.string(),
|
|
7
11
|
tools: z.array(zToolType).optional(),
|
|
@@ -20,4 +20,21 @@ export declare class HookExecutor {
|
|
|
20
20
|
* Execute a context_size hook
|
|
21
21
|
*/
|
|
22
22
|
private executeContextSizeHook;
|
|
23
|
+
/**
|
|
24
|
+
* Execute tool_response hooks when a tool returns output
|
|
25
|
+
*/
|
|
26
|
+
executeToolResponseHooks(session: ReadonlySession, currentContextTokens: number, toolResponse: {
|
|
27
|
+
toolCallId: string;
|
|
28
|
+
toolName: string;
|
|
29
|
+
toolInput: Record<string, unknown>;
|
|
30
|
+
rawOutput: Record<string, unknown>;
|
|
31
|
+
outputTokens: number;
|
|
32
|
+
}): Promise<{
|
|
33
|
+
modifiedOutput?: Record<string, unknown>;
|
|
34
|
+
truncationWarning?: string;
|
|
35
|
+
}>;
|
|
36
|
+
/**
|
|
37
|
+
* Execute a single tool_response hook
|
|
38
|
+
*/
|
|
39
|
+
private executeToolResponseHook;
|
|
23
40
|
}
|
|
@@ -106,4 +106,70 @@ export class HookExecutor {
|
|
|
106
106
|
};
|
|
107
107
|
}
|
|
108
108
|
}
|
|
109
|
+
/**
|
|
110
|
+
* Execute tool_response hooks when a tool returns output
|
|
111
|
+
*/
|
|
112
|
+
async executeToolResponseHooks(session, currentContextTokens, toolResponse) {
|
|
113
|
+
logger.info(`Executing tool_response hooks - found ${this.hooks.length} hook(s)`, {
|
|
114
|
+
toolCallId: toolResponse.toolCallId,
|
|
115
|
+
toolName: toolResponse.toolName,
|
|
116
|
+
outputTokens: toolResponse.outputTokens,
|
|
117
|
+
});
|
|
118
|
+
for (const hook of this.hooks) {
|
|
119
|
+
if (hook.type === "tool_response") {
|
|
120
|
+
const result = await this.executeToolResponseHook(hook, session, currentContextTokens, toolResponse);
|
|
121
|
+
if (result) {
|
|
122
|
+
return result;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
return {}; // No modifications
|
|
127
|
+
}
|
|
128
|
+
/**
|
|
129
|
+
* Execute a single tool_response hook
|
|
130
|
+
*/
|
|
131
|
+
async executeToolResponseHook(hook, session, currentContextTokens, toolResponse) {
|
|
132
|
+
const maxTokens = getModelMaxTokens(this.model);
|
|
133
|
+
try {
|
|
134
|
+
// Load and execute callback
|
|
135
|
+
const callback = await this.loadCallback(hook.callback);
|
|
136
|
+
// Pass hook settings through requestParams
|
|
137
|
+
const sessionWithSettings = {
|
|
138
|
+
...session,
|
|
139
|
+
requestParams: {
|
|
140
|
+
...session.requestParams,
|
|
141
|
+
hookSettings: hook.setting,
|
|
142
|
+
},
|
|
143
|
+
};
|
|
144
|
+
const hookContext = {
|
|
145
|
+
session: sessionWithSettings,
|
|
146
|
+
currentTokens: currentContextTokens,
|
|
147
|
+
maxTokens,
|
|
148
|
+
percentage: (currentContextTokens / maxTokens) * 100,
|
|
149
|
+
model: this.model,
|
|
150
|
+
toolResponse,
|
|
151
|
+
};
|
|
152
|
+
const result = await callback(hookContext);
|
|
153
|
+
// Extract modified output and warnings from metadata
|
|
154
|
+
if (result.metadata) {
|
|
155
|
+
const response = {};
|
|
156
|
+
if (result.metadata.modifiedOutput) {
|
|
157
|
+
response.modifiedOutput = result.metadata.modifiedOutput;
|
|
158
|
+
}
|
|
159
|
+
if (result.metadata.truncationWarning) {
|
|
160
|
+
response.truncationWarning = result.metadata
|
|
161
|
+
.truncationWarning;
|
|
162
|
+
}
|
|
163
|
+
return response;
|
|
164
|
+
}
|
|
165
|
+
return null;
|
|
166
|
+
}
|
|
167
|
+
catch (error) {
|
|
168
|
+
logger.error("Tool response hook execution failed", {
|
|
169
|
+
callback: hook.callback,
|
|
170
|
+
error: error instanceof Error ? error.message : String(error),
|
|
171
|
+
});
|
|
172
|
+
return null; // Return original output on error
|
|
173
|
+
}
|
|
174
|
+
}
|
|
109
175
|
}
|
|
@@ -99,7 +99,15 @@ Please provide your summary based on the conversation above, following this stru
|
|
|
99
99
|
const summaryEntry = createFullMessageEntry("user", `This session is being continued from a previous conversation that ran out of context. The conversation is summarized below:\n${summaryText}`);
|
|
100
100
|
// Set compactedUpTo to indicate all messages have been compacted into the summary
|
|
101
101
|
const lastMessageIndex = messagesToCompact.length - 1;
|
|
102
|
-
const newContextEntry = createContextEntry([summaryEntry], undefined, lastMessageIndex,
|
|
102
|
+
const newContextEntry = createContextEntry([summaryEntry], undefined, lastMessageIndex, {
|
|
103
|
+
// Store summary tokens in userMessagesTokens since the summary is a user message
|
|
104
|
+
systemPromptTokens: 0,
|
|
105
|
+
userMessagesTokens: summaryTokens,
|
|
106
|
+
assistantMessagesTokens: 0,
|
|
107
|
+
toolInputTokens: 0,
|
|
108
|
+
toolResultsTokens: 0,
|
|
109
|
+
totalEstimated: summaryTokens,
|
|
110
|
+
});
|
|
103
111
|
return {
|
|
104
112
|
newContextEntry,
|
|
105
113
|
metadata: {
|
|
@@ -0,0 +1,461 @@
|
|
|
1
|
+
import { ChatAnthropic } from "@langchain/anthropic";
|
|
2
|
+
import { HumanMessage, SystemMessage } from "@langchain/core/messages";
|
|
3
|
+
import { createLogger } from "@townco/core";
|
|
4
|
+
import { countToolResultTokens } from "../../../utils/token-counter.js";
|
|
5
|
+
const logger = createLogger("tool-response-compactor");
|
|
6
|
+
// Haiku 4.5 for compaction (fast and cost-effective)
|
|
7
|
+
const COMPACTION_MODEL = "claude-haiku-4-5-20251001";
|
|
8
|
+
const COMPACTION_MODEL_CONTEXT = 200000; // Haiku context size for calculating truncation limits
|
|
9
|
+
/**
|
|
10
|
+
* Tool response compaction hook - compacts or truncates large tool responses
|
|
11
|
+
* to prevent context overflow
|
|
12
|
+
*/
|
|
13
|
+
export const toolResponseCompactor = async (ctx) => {
|
|
14
|
+
// Only process if we have tool response data
|
|
15
|
+
if (!ctx.toolResponse) {
|
|
16
|
+
logger.warn("toolResponseCompactor called without tool response data");
|
|
17
|
+
return { newContextEntry: null };
|
|
18
|
+
}
|
|
19
|
+
const { toolCallId, toolName, toolInput, rawOutput, outputTokens } = ctx.toolResponse;
|
|
20
|
+
// Get settings from hook configuration
|
|
21
|
+
const settings = ctx.session.requestParams.hookSettings;
|
|
22
|
+
const maxContextThreshold = settings?.maxContextThreshold ?? 80;
|
|
23
|
+
const responseTruncationThreshold = settings?.responseTruncationThreshold ?? 30;
|
|
24
|
+
// Calculate actual token limits from percentages
|
|
25
|
+
const maxAllowedTotal = ctx.maxTokens * (maxContextThreshold / 100);
|
|
26
|
+
const availableSpace = maxAllowedTotal - ctx.currentTokens;
|
|
27
|
+
const projectedTotal = ctx.currentTokens + outputTokens;
|
|
28
|
+
const compactionLimit = COMPACTION_MODEL_CONTEXT * (responseTruncationThreshold / 100);
|
|
29
|
+
logger.info("Tool response compaction hook triggered", {
|
|
30
|
+
toolCallId,
|
|
31
|
+
toolName,
|
|
32
|
+
outputTokens,
|
|
33
|
+
currentContext: ctx.currentTokens,
|
|
34
|
+
maxAllowedTotal,
|
|
35
|
+
availableSpace,
|
|
36
|
+
projectedTotal,
|
|
37
|
+
compactionLimit,
|
|
38
|
+
settings,
|
|
39
|
+
});
|
|
40
|
+
// Case 0: Small response, no action needed
|
|
41
|
+
if (projectedTotal < maxAllowedTotal) {
|
|
42
|
+
logger.info("Tool response fits within threshold, no compaction needed");
|
|
43
|
+
return {
|
|
44
|
+
newContextEntry: null,
|
|
45
|
+
metadata: {
|
|
46
|
+
action: "none",
|
|
47
|
+
originalTokens: outputTokens,
|
|
48
|
+
finalTokens: outputTokens,
|
|
49
|
+
},
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
// Response would exceed threshold, need to compact or truncate
|
|
53
|
+
// Determine target size: fit within available space, but cap at compactionLimit for truncation
|
|
54
|
+
const targetSize = Math.min(availableSpace, compactionLimit);
|
|
55
|
+
// Case 2: Huge response, must truncate (too large for LLM compaction)
|
|
56
|
+
if (outputTokens >= compactionLimit) {
|
|
57
|
+
logger.warn("Tool response exceeds compaction capacity, truncating", {
|
|
58
|
+
outputTokens,
|
|
59
|
+
compactionLimit,
|
|
60
|
+
targetSize,
|
|
61
|
+
availableSpace,
|
|
62
|
+
});
|
|
63
|
+
const truncated = truncateToolResponse(rawOutput, targetSize);
|
|
64
|
+
const finalTokens = countToolResultTokens(truncated);
|
|
65
|
+
// Verify truncation stayed within boundaries
|
|
66
|
+
if (finalTokens > targetSize) {
|
|
67
|
+
logger.error("Truncation exceeded target size - this should not happen!", {
|
|
68
|
+
finalTokens,
|
|
69
|
+
targetSize,
|
|
70
|
+
excess: finalTokens - targetSize,
|
|
71
|
+
});
|
|
72
|
+
// Try more aggressive truncation (70% of target as emergency measure)
|
|
73
|
+
const emergencySize = Math.floor(targetSize * 0.7);
|
|
74
|
+
const emergencyTruncated = truncateToolResponse(rawOutput, emergencySize);
|
|
75
|
+
let emergencyTokens = countToolResultTokens(emergencyTruncated);
|
|
76
|
+
// Final safety check - if emergency truncation STILL exceeded target, use ultra-conservative fallback
|
|
77
|
+
if (emergencyTokens > targetSize) {
|
|
78
|
+
logger.error("Emergency truncation STILL exceeded target - using ultra-conservative fallback", {
|
|
79
|
+
emergencyTokens,
|
|
80
|
+
targetSize,
|
|
81
|
+
emergencySize,
|
|
82
|
+
});
|
|
83
|
+
// Ultra-conservative: just return a simple error structure with the raw data sliced to 50% of target
|
|
84
|
+
const ultraConservativeSize = Math.floor(targetSize * 0.5);
|
|
85
|
+
return {
|
|
86
|
+
newContextEntry: null,
|
|
87
|
+
metadata: {
|
|
88
|
+
action: "truncated",
|
|
89
|
+
originalTokens: outputTokens,
|
|
90
|
+
finalTokens: ultraConservativeSize, // Conservative estimate
|
|
91
|
+
modifiedOutput: {
|
|
92
|
+
_truncation_error: "Tool response was too large and could not be reliably truncated",
|
|
93
|
+
_original_token_count: outputTokens,
|
|
94
|
+
_target_token_count: targetSize,
|
|
95
|
+
_partial_data: JSON.stringify(rawOutput).slice(0, ultraConservativeSize * 3),
|
|
96
|
+
},
|
|
97
|
+
truncationWarning: `Tool response was severely truncated from ${outputTokens.toLocaleString()} to ~${ultraConservativeSize.toLocaleString()} tokens (emergency truncation failed - data may be incomplete)`,
|
|
98
|
+
},
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
return {
|
|
102
|
+
newContextEntry: null,
|
|
103
|
+
metadata: {
|
|
104
|
+
action: "truncated",
|
|
105
|
+
originalTokens: outputTokens,
|
|
106
|
+
finalTokens: emergencyTokens,
|
|
107
|
+
modifiedOutput: emergencyTruncated,
|
|
108
|
+
truncationWarning: `Tool response was aggressively truncated from ${outputTokens.toLocaleString()} to ${emergencyTokens.toLocaleString()} tokens to fit within context limit (emergency truncation)`,
|
|
109
|
+
},
|
|
110
|
+
};
|
|
111
|
+
}
|
|
112
|
+
return {
|
|
113
|
+
newContextEntry: null,
|
|
114
|
+
metadata: {
|
|
115
|
+
action: "truncated",
|
|
116
|
+
originalTokens: outputTokens,
|
|
117
|
+
finalTokens,
|
|
118
|
+
modifiedOutput: truncated,
|
|
119
|
+
truncationWarning: `Tool response was truncated from ${outputTokens.toLocaleString()} to ${finalTokens.toLocaleString()} tokens to fit within context limit (available space: ${availableSpace.toLocaleString()} tokens)`,
|
|
120
|
+
},
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
// Case 1: Medium response, intelligent compaction
|
|
124
|
+
logger.info("Tool response requires intelligent compaction", {
|
|
125
|
+
outputTokens,
|
|
126
|
+
targetSize,
|
|
127
|
+
availableSpace,
|
|
128
|
+
compactionLimit,
|
|
129
|
+
});
|
|
130
|
+
try {
|
|
131
|
+
// Build conversation context (last 5 messages)
|
|
132
|
+
const recentMessages = ctx.session.messages.slice(-5);
|
|
133
|
+
const conversationContext = recentMessages
|
|
134
|
+
.map((msg) => {
|
|
135
|
+
const text = msg.content
|
|
136
|
+
.filter((b) => b.type === "text")
|
|
137
|
+
.map((b) => b.text)
|
|
138
|
+
.join("\n");
|
|
139
|
+
return `${msg.role}: ${text}`;
|
|
140
|
+
})
|
|
141
|
+
.join("\n\n");
|
|
142
|
+
const compacted = await compactWithLLM(rawOutput, toolName, toolInput, conversationContext, targetSize);
|
|
143
|
+
let finalTokens = countToolResultTokens(compacted);
|
|
144
|
+
// Verify compaction stayed within boundaries
|
|
145
|
+
if (finalTokens > targetSize) {
|
|
146
|
+
logger.warn("LLM compaction exceeded target, falling back to truncation", {
|
|
147
|
+
finalTokens,
|
|
148
|
+
targetSize,
|
|
149
|
+
excess: finalTokens - targetSize,
|
|
150
|
+
});
|
|
151
|
+
// Fallback to truncation
|
|
152
|
+
const truncated = truncateToolResponse(compacted, targetSize);
|
|
153
|
+
finalTokens = countToolResultTokens(truncated);
|
|
154
|
+
return {
|
|
155
|
+
newContextEntry: null,
|
|
156
|
+
metadata: {
|
|
157
|
+
action: "compacted_then_truncated",
|
|
158
|
+
originalTokens: outputTokens,
|
|
159
|
+
finalTokens,
|
|
160
|
+
tokensSaved: outputTokens - finalTokens,
|
|
161
|
+
modifiedOutput: truncated,
|
|
162
|
+
truncationWarning: `Tool response was compacted then truncated from ${outputTokens.toLocaleString()} to ${finalTokens.toLocaleString()} tokens to fit within context limit`,
|
|
163
|
+
},
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
logger.info("Successfully compacted tool response", {
|
|
167
|
+
originalTokens: outputTokens,
|
|
168
|
+
finalTokens,
|
|
169
|
+
targetSize,
|
|
170
|
+
tokensSaved: outputTokens - finalTokens,
|
|
171
|
+
});
|
|
172
|
+
return {
|
|
173
|
+
newContextEntry: null,
|
|
174
|
+
metadata: {
|
|
175
|
+
action: "compacted",
|
|
176
|
+
originalTokens: outputTokens,
|
|
177
|
+
finalTokens,
|
|
178
|
+
tokensSaved: outputTokens - finalTokens,
|
|
179
|
+
modifiedOutput: compacted,
|
|
180
|
+
},
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
catch (error) {
|
|
184
|
+
logger.error("Compaction failed, falling back to truncation", {
|
|
185
|
+
error: error instanceof Error ? error.message : String(error),
|
|
186
|
+
});
|
|
187
|
+
// Fallback to truncation with the same target size
|
|
188
|
+
const truncated = truncateToolResponse(rawOutput, targetSize);
|
|
189
|
+
let finalTokens = countToolResultTokens(truncated);
|
|
190
|
+
// Verify truncation stayed within boundaries
|
|
191
|
+
if (finalTokens > targetSize) {
|
|
192
|
+
logger.error("Fallback truncation exceeded target, using emergency truncation", {
|
|
193
|
+
finalTokens,
|
|
194
|
+
targetSize,
|
|
195
|
+
});
|
|
196
|
+
const emergencySize = Math.floor(targetSize * 0.7);
|
|
197
|
+
const emergencyTruncated = truncateToolResponse(rawOutput, emergencySize);
|
|
198
|
+
finalTokens = countToolResultTokens(emergencyTruncated);
|
|
199
|
+
// Final safety check
|
|
200
|
+
if (finalTokens > targetSize) {
|
|
201
|
+
logger.error("Emergency truncation STILL exceeded target - using ultra-conservative fallback");
|
|
202
|
+
const ultraConservativeSize = Math.floor(targetSize * 0.5);
|
|
203
|
+
return {
|
|
204
|
+
newContextEntry: null,
|
|
205
|
+
metadata: {
|
|
206
|
+
action: "truncated",
|
|
207
|
+
originalTokens: outputTokens,
|
|
208
|
+
finalTokens: ultraConservativeSize,
|
|
209
|
+
modifiedOutput: {
|
|
210
|
+
_truncation_error: "Tool response was too large and could not be reliably truncated (compaction failed)",
|
|
211
|
+
_original_token_count: outputTokens,
|
|
212
|
+
_target_token_count: targetSize,
|
|
213
|
+
_partial_data: JSON.stringify(rawOutput).slice(0, ultraConservativeSize * 3),
|
|
214
|
+
},
|
|
215
|
+
truncationWarning: `Tool response was severely truncated from ${outputTokens.toLocaleString()} to ~${ultraConservativeSize.toLocaleString()} tokens (compaction+emergency truncation failed)`,
|
|
216
|
+
},
|
|
217
|
+
};
|
|
218
|
+
}
|
|
219
|
+
return {
|
|
220
|
+
newContextEntry: null,
|
|
221
|
+
metadata: {
|
|
222
|
+
action: "truncated",
|
|
223
|
+
originalTokens: outputTokens,
|
|
224
|
+
finalTokens,
|
|
225
|
+
modifiedOutput: emergencyTruncated,
|
|
226
|
+
truncationWarning: `Tool response was truncated from ${outputTokens.toLocaleString()} to ${finalTokens.toLocaleString()} tokens (compaction failed, emergency truncation applied)`,
|
|
227
|
+
},
|
|
228
|
+
};
|
|
229
|
+
}
|
|
230
|
+
return {
|
|
231
|
+
newContextEntry: null,
|
|
232
|
+
metadata: {
|
|
233
|
+
action: "truncated",
|
|
234
|
+
originalTokens: outputTokens,
|
|
235
|
+
finalTokens,
|
|
236
|
+
modifiedOutput: truncated,
|
|
237
|
+
truncationWarning: `Tool response was truncated from ${outputTokens.toLocaleString()} to ${finalTokens.toLocaleString()} tokens (compaction failed)`,
|
|
238
|
+
},
|
|
239
|
+
};
|
|
240
|
+
}
|
|
241
|
+
};
|
|
242
|
+
/**
|
|
243
|
+
* Recursive LLM compaction with adaptive retries
|
|
244
|
+
*/
|
|
245
|
+
async function compactWithLLM(rawOutput, toolName, toolInput, conversationContext, targetTokens) {
|
|
246
|
+
const model = new ChatAnthropic({
|
|
247
|
+
model: COMPACTION_MODEL,
|
|
248
|
+
temperature: 0,
|
|
249
|
+
});
|
|
250
|
+
// Step 1: Understand what we're looking for (only need to do this once)
|
|
251
|
+
const analysisPrompt = `You are helping to manage context size in an agent conversation.
|
|
252
|
+
|
|
253
|
+
A tool was just called with these parameters:
|
|
254
|
+
Tool: ${toolName}
|
|
255
|
+
Input: ${JSON.stringify(toolInput, null, 2)}
|
|
256
|
+
|
|
257
|
+
Recent conversation context:
|
|
258
|
+
${conversationContext}
|
|
259
|
+
|
|
260
|
+
Based on the tool input and conversation context, what key information is the user looking for from this tool response?
|
|
261
|
+
|
|
262
|
+
Provide a concise list (3-5 bullet points) of the most important elements to extract.`;
|
|
263
|
+
const analysisResponse = await model.invoke([
|
|
264
|
+
new SystemMessage("You are a helpful assistant analyzing information needs."),
|
|
265
|
+
new HumanMessage(analysisPrompt),
|
|
266
|
+
]);
|
|
267
|
+
const keyRequirements = typeof analysisResponse.content === "string"
|
|
268
|
+
? analysisResponse.content
|
|
269
|
+
: "Extract relevant information";
|
|
270
|
+
logger.info("Identified key requirements for compaction", {
|
|
271
|
+
requirements: keyRequirements.substring(0, 200),
|
|
272
|
+
});
|
|
273
|
+
// Step 2: Recursively compact until we meet the target
|
|
274
|
+
let currentData = rawOutput;
|
|
275
|
+
let currentTokens = countToolResultTokens(rawOutput);
|
|
276
|
+
const maxAttempts = 4;
|
|
277
|
+
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
278
|
+
const reductionNeeded = Math.round(((currentTokens - targetTokens) / currentTokens) * 100);
|
|
279
|
+
// Build the compaction prompt based on whether this is first attempt or retry
|
|
280
|
+
let compactionPrompt;
|
|
281
|
+
if (attempt === 0) {
|
|
282
|
+
// First attempt - compact from original
|
|
283
|
+
compactionPrompt = `You are helping to compact a large tool response to save context space.
|
|
284
|
+
|
|
285
|
+
Key information needed:
|
|
286
|
+
${keyRequirements}
|
|
287
|
+
|
|
288
|
+
Tool response to compact (JSON):
|
|
289
|
+
${JSON.stringify(currentData, null, 2)}
|
|
290
|
+
|
|
291
|
+
Current size: ${currentTokens.toLocaleString()} tokens
|
|
292
|
+
Target size: ${targetTokens.toLocaleString()} tokens (reduce by ${reductionNeeded}%)
|
|
293
|
+
|
|
294
|
+
Your task: Create a compacted version that:
|
|
295
|
+
1. Retains all information relevant to the key requirements above
|
|
296
|
+
2. Removes or summarizes less relevant details
|
|
297
|
+
3. Maintains the same JSON structure where possible
|
|
298
|
+
4. Reduces the size to ${targetTokens.toLocaleString()} tokens or less
|
|
299
|
+
5. Be aggressive in removing unnecessary data
|
|
300
|
+
|
|
301
|
+
Return ONLY valid JSON (no explanation text).`;
|
|
302
|
+
}
|
|
303
|
+
else {
|
|
304
|
+
// Retry - need to compact further
|
|
305
|
+
compactionPrompt = `Your previous compaction was good but still too large.
|
|
306
|
+
|
|
307
|
+
Key information needed:
|
|
308
|
+
${keyRequirements}
|
|
309
|
+
|
|
310
|
+
Previous compaction result (JSON):
|
|
311
|
+
${JSON.stringify(currentData, null, 2)}
|
|
312
|
+
|
|
313
|
+
Current size: ${currentTokens.toLocaleString()} tokens
|
|
314
|
+
Target size: ${targetTokens.toLocaleString()} tokens
|
|
315
|
+
You need to reduce by another ${reductionNeeded}%
|
|
316
|
+
|
|
317
|
+
Your task: Further compact this data by:
|
|
318
|
+
1. Being MORE aggressive in removing unnecessary details
|
|
319
|
+
2. Summarizing verbose content more concisely
|
|
320
|
+
3. Removing any redundant information
|
|
321
|
+
4. Keeping ONLY the most essential data related to the key requirements
|
|
322
|
+
5. Reduce to ${targetTokens.toLocaleString()} tokens or less
|
|
323
|
+
|
|
324
|
+
Return ONLY valid JSON (no explanation text).`;
|
|
325
|
+
}
|
|
326
|
+
const compactionResponse = await model.invoke([
|
|
327
|
+
new SystemMessage("You are a helpful assistant compacting data."),
|
|
328
|
+
new HumanMessage(compactionPrompt),
|
|
329
|
+
]);
|
|
330
|
+
// Extract and parse JSON
|
|
331
|
+
const responseText = typeof compactionResponse.content === "string"
|
|
332
|
+
? compactionResponse.content
|
|
333
|
+
: JSON.stringify(compactionResponse.content);
|
|
334
|
+
const jsonMatch = responseText.match(/```(?:json)?\n([\s\S]*?)\n```/) || [
|
|
335
|
+
null,
|
|
336
|
+
responseText,
|
|
337
|
+
];
|
|
338
|
+
const jsonText = jsonMatch[1] || responseText;
|
|
339
|
+
const compacted = JSON.parse(jsonText.trim());
|
|
340
|
+
const compactedTokens = countToolResultTokens(compacted);
|
|
341
|
+
logger.info(`LLM compaction attempt ${attempt + 1}/${maxAttempts}`, {
|
|
342
|
+
currentTokens,
|
|
343
|
+
compactedTokens,
|
|
344
|
+
targetTokens,
|
|
345
|
+
reductionAchieved: currentTokens - compactedTokens,
|
|
346
|
+
});
|
|
347
|
+
// Check if we've met the target
|
|
348
|
+
if (compactedTokens <= targetTokens) {
|
|
349
|
+
logger.info("LLM compaction succeeded", {
|
|
350
|
+
attempts: attempt + 1,
|
|
351
|
+
originalTokens: countToolResultTokens(rawOutput),
|
|
352
|
+
finalTokens: compactedTokens,
|
|
353
|
+
targetTokens,
|
|
354
|
+
});
|
|
355
|
+
return compacted;
|
|
356
|
+
}
|
|
357
|
+
// If we're within 5% of target, accept it (close enough)
|
|
358
|
+
if (compactedTokens <= targetTokens * 1.05) {
|
|
359
|
+
logger.info("LLM compaction close enough to target", {
|
|
360
|
+
attempts: attempt + 1,
|
|
361
|
+
finalTokens: compactedTokens,
|
|
362
|
+
targetTokens,
|
|
363
|
+
overshoot: compactedTokens - targetTokens,
|
|
364
|
+
});
|
|
365
|
+
return compacted;
|
|
366
|
+
}
|
|
367
|
+
// Still too large - prepare for another attempt
|
|
368
|
+
currentData = compacted;
|
|
369
|
+
currentTokens = compactedTokens;
|
|
370
|
+
}
|
|
371
|
+
// If we exhausted all attempts, return the last result anyway
|
|
372
|
+
logger.warn("LLM compaction exhausted attempts but did not meet target", {
|
|
373
|
+
finalTokens: currentTokens,
|
|
374
|
+
targetTokens,
|
|
375
|
+
overshoot: currentTokens - targetTokens,
|
|
376
|
+
});
|
|
377
|
+
return currentData;
|
|
378
|
+
}
|
|
379
|
+
/**
|
|
380
|
+
* Truncate tool response to target token count
|
|
381
|
+
* Uses iterative approach to ensure we stay under the target
|
|
382
|
+
*/
|
|
383
|
+
function truncateToolResponse(rawOutput, targetTokens) {
|
|
384
|
+
const currentTokens = countToolResultTokens(rawOutput);
|
|
385
|
+
if (currentTokens <= targetTokens) {
|
|
386
|
+
return rawOutput; // Already within limit
|
|
387
|
+
}
|
|
388
|
+
const outputString = JSON.stringify(rawOutput);
|
|
389
|
+
// Start with 70% of target to leave significant room for closing braces and metadata
|
|
390
|
+
let ratio = 0.7;
|
|
391
|
+
let lastResult = null;
|
|
392
|
+
// Iteratively truncate until we meet the target
|
|
393
|
+
for (let attempt = 0; attempt < 15; attempt++) {
|
|
394
|
+
// Calculate character limit based on ratio
|
|
395
|
+
const targetChars = Math.floor((targetTokens * ratio * outputString.length) / currentTokens);
|
|
396
|
+
// Truncate the JSON string
|
|
397
|
+
let truncated = outputString.slice(0, targetChars);
|
|
398
|
+
// Try to close any open JSON structures
|
|
399
|
+
const openBraces = (truncated.match(/{/g) || []).length;
|
|
400
|
+
const closeBraces = (truncated.match(/}/g) || []).length;
|
|
401
|
+
const openBrackets = (truncated.match(/\[/g) || []).length;
|
|
402
|
+
const closeBrackets = (truncated.match(/\]/g) || []).length;
|
|
403
|
+
truncated += "}".repeat(Math.max(0, openBraces - closeBraces));
|
|
404
|
+
truncated += "]".repeat(Math.max(0, openBrackets - closeBrackets));
|
|
405
|
+
try {
|
|
406
|
+
// Try to parse as valid JSON
|
|
407
|
+
const parsed = JSON.parse(truncated);
|
|
408
|
+
const parsedTokens = countToolResultTokens(parsed);
|
|
409
|
+
// Store the result
|
|
410
|
+
lastResult = { parsed, tokens: parsedTokens };
|
|
411
|
+
if (parsedTokens <= targetTokens) {
|
|
412
|
+
// Success! Add truncation notice
|
|
413
|
+
return {
|
|
414
|
+
...parsed,
|
|
415
|
+
_truncation_notice: "... [TRUNCATED - response exceeded size limit]",
|
|
416
|
+
_original_token_count: currentTokens,
|
|
417
|
+
_truncated_token_count: parsedTokens,
|
|
418
|
+
};
|
|
419
|
+
}
|
|
420
|
+
// Still too large - calculate how much we need to reduce
|
|
421
|
+
// If we overshot, reduce ratio proportionally to how much we exceeded
|
|
422
|
+
const overshootRatio = parsedTokens / targetTokens; // e.g., 1.03 if we're 3% over
|
|
423
|
+
ratio = (ratio / overshootRatio) * 0.95; // Reduce by overshoot amount plus 5% safety margin
|
|
424
|
+
logger.debug("Truncation attempt resulted in overshoot, retrying", {
|
|
425
|
+
attempt,
|
|
426
|
+
targetTokens,
|
|
427
|
+
parsedTokens,
|
|
428
|
+
overshootRatio,
|
|
429
|
+
newRatio: ratio,
|
|
430
|
+
});
|
|
431
|
+
}
|
|
432
|
+
catch {
|
|
433
|
+
// JSON parse failed, try more aggressive truncation
|
|
434
|
+
ratio *= 0.85;
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
// If we exhausted all attempts, return the last successful parse (if any)
|
|
438
|
+
// or a very conservative fallback
|
|
439
|
+
if (lastResult && lastResult.tokens <= targetTokens * 1.1) {
|
|
440
|
+
// Within 10% of target - good enough
|
|
441
|
+
logger.warn("Truncation reached attempt limit but result is close enough", {
|
|
442
|
+
targetTokens,
|
|
443
|
+
actualTokens: lastResult.tokens,
|
|
444
|
+
});
|
|
445
|
+
return {
|
|
446
|
+
...lastResult.parsed,
|
|
447
|
+
_truncation_notice: "... [TRUNCATED - response exceeded size limit]",
|
|
448
|
+
_original_token_count: currentTokens,
|
|
449
|
+
_truncated_token_count: lastResult.tokens,
|
|
450
|
+
};
|
|
451
|
+
}
|
|
452
|
+
// If all attempts failed, return a simple truncated structure
|
|
453
|
+
const safeChars = Math.floor(targetTokens * 3); // Very conservative
|
|
454
|
+
return {
|
|
455
|
+
truncated: true,
|
|
456
|
+
originalSize: currentTokens,
|
|
457
|
+
targetSize: targetTokens,
|
|
458
|
+
content: outputString.slice(0, safeChars),
|
|
459
|
+
warning: "Response was truncated due to size constraints (JSON parsing failed)",
|
|
460
|
+
};
|
|
461
|
+
}
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import { compactionTool } from "./predefined/compaction-tool";
|
|
2
|
+
import { toolResponseCompactor } from "./predefined/tool-response-compactor";
|
|
2
3
|
/**
|
|
3
4
|
* Registry of predefined hook callbacks
|
|
4
5
|
* Maps callback names to their implementations
|
|
5
6
|
*/
|
|
6
7
|
export const HOOK_REGISTRY = {
|
|
7
8
|
compaction_tool: compactionTool,
|
|
9
|
+
tool_response_compactor: toolResponseCompactor,
|
|
8
10
|
};
|
|
9
11
|
/**
|
|
10
12
|
* Check if a callback name is a predefined hook
|