@townco/agent 0.1.50 → 0.1.52
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/acp-server/adapter.d.ts +10 -0
- package/dist/acp-server/adapter.js +287 -80
- package/dist/acp-server/cli.d.ts +1 -3
- package/dist/acp-server/http.js +8 -1
- package/dist/acp-server/index.js +5 -0
- package/dist/acp-server/session-storage.d.ts +17 -3
- package/dist/acp-server/session-storage.js +9 -0
- package/dist/bin.js +0 -0
- package/dist/check-jaeger.d.ts +5 -0
- package/dist/check-jaeger.js +82 -0
- package/dist/definition/index.d.ts +16 -4
- package/dist/definition/index.js +17 -4
- package/dist/index.js +1 -1
- package/dist/run-subagents.d.ts +9 -0
- package/dist/run-subagents.js +110 -0
- package/dist/runner/agent-runner.d.ts +10 -2
- package/dist/runner/agent-runner.js +4 -0
- package/dist/runner/hooks/executor.d.ts +17 -0
- package/dist/runner/hooks/executor.js +66 -0
- package/dist/runner/hooks/predefined/compaction-tool.js +9 -1
- package/dist/runner/hooks/predefined/tool-response-compactor.d.ts +6 -0
- package/dist/runner/hooks/predefined/tool-response-compactor.js +461 -0
- package/dist/runner/hooks/registry.js +2 -0
- package/dist/runner/hooks/types.d.ts +39 -3
- package/dist/runner/hooks/types.js +9 -4
- package/dist/runner/index.d.ts +1 -3
- package/dist/runner/langchain/custom-stream-types.d.ts +36 -0
- package/dist/runner/langchain/custom-stream-types.js +23 -0
- package/dist/runner/langchain/index.js +102 -76
- package/dist/runner/langchain/otel-callbacks.js +67 -1
- package/dist/runner/langchain/tools/bash.d.ts +14 -0
- package/dist/runner/langchain/tools/bash.js +135 -0
- package/dist/scaffold/link-local.d.ts +1 -0
- package/dist/scaffold/link-local.js +54 -0
- package/dist/scaffold/project-scaffold.js +1 -0
- package/dist/telemetry/setup.d.ts +3 -1
- package/dist/telemetry/setup.js +33 -3
- package/dist/templates/index.d.ts +7 -0
- package/dist/test-telemetry.d.ts +5 -0
- package/dist/test-telemetry.js +88 -0
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/dist/utils/context-size-calculator.d.ts +29 -0
- package/dist/utils/context-size-calculator.js +78 -0
- package/dist/utils/index.d.ts +2 -0
- package/dist/utils/index.js +2 -0
- package/dist/utils/token-counter.d.ts +19 -0
- package/dist/utils/token-counter.js +44 -0
- package/index.ts +1 -1
- package/package.json +7 -6
- package/templates/index.ts +18 -6
- package/dist/definition/mcp.d.ts +0 -0
- package/dist/definition/mcp.js +0 -0
- package/dist/definition/tools/todo.d.ts +0 -49
- package/dist/definition/tools/todo.js +0 -80
- package/dist/definition/tools/web_search.d.ts +0 -4
- package/dist/definition/tools/web_search.js +0 -26
- package/dist/dev-agent/index.d.ts +0 -2
- package/dist/dev-agent/index.js +0 -18
- package/dist/example.d.ts +0 -2
- package/dist/example.js +0 -19
|
@@ -0,0 +1,461 @@
|
|
|
1
|
+
import { ChatAnthropic } from "@langchain/anthropic";
|
|
2
|
+
import { HumanMessage, SystemMessage } from "@langchain/core/messages";
|
|
3
|
+
import { createLogger } from "@townco/core";
|
|
4
|
+
import { countToolResultTokens } from "../../../utils/token-counter.js";
|
|
5
|
+
const logger = createLogger("tool-response-compactor");
|
|
6
|
+
// Haiku 4.5 for compaction (fast and cost-effective)
|
|
7
|
+
const COMPACTION_MODEL = "claude-haiku-4-5-20251001";
|
|
8
|
+
const COMPACTION_MODEL_CONTEXT = 200000; // Haiku context size for calculating truncation limits
|
|
9
|
+
/**
|
|
10
|
+
* Tool response compaction hook - compacts or truncates large tool responses
|
|
11
|
+
* to prevent context overflow
|
|
12
|
+
*/
|
|
13
|
+
export const toolResponseCompactor = async (ctx) => {
|
|
14
|
+
// Only process if we have tool response data
|
|
15
|
+
if (!ctx.toolResponse) {
|
|
16
|
+
logger.warn("toolResponseCompactor called without tool response data");
|
|
17
|
+
return { newContextEntry: null };
|
|
18
|
+
}
|
|
19
|
+
const { toolCallId, toolName, toolInput, rawOutput, outputTokens } = ctx.toolResponse;
|
|
20
|
+
// Get settings from hook configuration
|
|
21
|
+
const settings = ctx.session.requestParams.hookSettings;
|
|
22
|
+
const maxContextThreshold = settings?.maxContextThreshold ?? 80;
|
|
23
|
+
const responseTruncationThreshold = settings?.responseTruncationThreshold ?? 30;
|
|
24
|
+
// Calculate actual token limits from percentages
|
|
25
|
+
const maxAllowedTotal = ctx.maxTokens * (maxContextThreshold / 100);
|
|
26
|
+
const availableSpace = maxAllowedTotal - ctx.currentTokens;
|
|
27
|
+
const projectedTotal = ctx.currentTokens + outputTokens;
|
|
28
|
+
const compactionLimit = COMPACTION_MODEL_CONTEXT * (responseTruncationThreshold / 100);
|
|
29
|
+
logger.info("Tool response compaction hook triggered", {
|
|
30
|
+
toolCallId,
|
|
31
|
+
toolName,
|
|
32
|
+
outputTokens,
|
|
33
|
+
currentContext: ctx.currentTokens,
|
|
34
|
+
maxAllowedTotal,
|
|
35
|
+
availableSpace,
|
|
36
|
+
projectedTotal,
|
|
37
|
+
compactionLimit,
|
|
38
|
+
settings,
|
|
39
|
+
});
|
|
40
|
+
// Case 0: Small response, no action needed
|
|
41
|
+
if (projectedTotal < maxAllowedTotal) {
|
|
42
|
+
logger.info("Tool response fits within threshold, no compaction needed");
|
|
43
|
+
return {
|
|
44
|
+
newContextEntry: null,
|
|
45
|
+
metadata: {
|
|
46
|
+
action: "none",
|
|
47
|
+
originalTokens: outputTokens,
|
|
48
|
+
finalTokens: outputTokens,
|
|
49
|
+
},
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
// Response would exceed threshold, need to compact or truncate
|
|
53
|
+
// Determine target size: fit within available space, but cap at compactionLimit for truncation
|
|
54
|
+
const targetSize = Math.min(availableSpace, compactionLimit);
|
|
55
|
+
// Case 2: Huge response, must truncate (too large for LLM compaction)
|
|
56
|
+
if (outputTokens >= compactionLimit) {
|
|
57
|
+
logger.warn("Tool response exceeds compaction capacity, truncating", {
|
|
58
|
+
outputTokens,
|
|
59
|
+
compactionLimit,
|
|
60
|
+
targetSize,
|
|
61
|
+
availableSpace,
|
|
62
|
+
});
|
|
63
|
+
const truncated = truncateToolResponse(rawOutput, targetSize);
|
|
64
|
+
const finalTokens = countToolResultTokens(truncated);
|
|
65
|
+
// Verify truncation stayed within boundaries
|
|
66
|
+
if (finalTokens > targetSize) {
|
|
67
|
+
logger.error("Truncation exceeded target size - this should not happen!", {
|
|
68
|
+
finalTokens,
|
|
69
|
+
targetSize,
|
|
70
|
+
excess: finalTokens - targetSize,
|
|
71
|
+
});
|
|
72
|
+
// Try more aggressive truncation (70% of target as emergency measure)
|
|
73
|
+
const emergencySize = Math.floor(targetSize * 0.7);
|
|
74
|
+
const emergencyTruncated = truncateToolResponse(rawOutput, emergencySize);
|
|
75
|
+
let emergencyTokens = countToolResultTokens(emergencyTruncated);
|
|
76
|
+
// Final safety check - if emergency truncation STILL exceeded target, use ultra-conservative fallback
|
|
77
|
+
if (emergencyTokens > targetSize) {
|
|
78
|
+
logger.error("Emergency truncation STILL exceeded target - using ultra-conservative fallback", {
|
|
79
|
+
emergencyTokens,
|
|
80
|
+
targetSize,
|
|
81
|
+
emergencySize,
|
|
82
|
+
});
|
|
83
|
+
// Ultra-conservative: just return a simple error structure with the raw data sliced to 50% of target
|
|
84
|
+
const ultraConservativeSize = Math.floor(targetSize * 0.5);
|
|
85
|
+
return {
|
|
86
|
+
newContextEntry: null,
|
|
87
|
+
metadata: {
|
|
88
|
+
action: "truncated",
|
|
89
|
+
originalTokens: outputTokens,
|
|
90
|
+
finalTokens: ultraConservativeSize, // Conservative estimate
|
|
91
|
+
modifiedOutput: {
|
|
92
|
+
_truncation_error: "Tool response was too large and could not be reliably truncated",
|
|
93
|
+
_original_token_count: outputTokens,
|
|
94
|
+
_target_token_count: targetSize,
|
|
95
|
+
_partial_data: JSON.stringify(rawOutput).slice(0, ultraConservativeSize * 3),
|
|
96
|
+
},
|
|
97
|
+
truncationWarning: `Tool response was severely truncated from ${outputTokens.toLocaleString()} to ~${ultraConservativeSize.toLocaleString()} tokens (emergency truncation failed - data may be incomplete)`,
|
|
98
|
+
},
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
return {
|
|
102
|
+
newContextEntry: null,
|
|
103
|
+
metadata: {
|
|
104
|
+
action: "truncated",
|
|
105
|
+
originalTokens: outputTokens,
|
|
106
|
+
finalTokens: emergencyTokens,
|
|
107
|
+
modifiedOutput: emergencyTruncated,
|
|
108
|
+
truncationWarning: `Tool response was aggressively truncated from ${outputTokens.toLocaleString()} to ${emergencyTokens.toLocaleString()} tokens to fit within context limit (emergency truncation)`,
|
|
109
|
+
},
|
|
110
|
+
};
|
|
111
|
+
}
|
|
112
|
+
return {
|
|
113
|
+
newContextEntry: null,
|
|
114
|
+
metadata: {
|
|
115
|
+
action: "truncated",
|
|
116
|
+
originalTokens: outputTokens,
|
|
117
|
+
finalTokens,
|
|
118
|
+
modifiedOutput: truncated,
|
|
119
|
+
truncationWarning: `Tool response was truncated from ${outputTokens.toLocaleString()} to ${finalTokens.toLocaleString()} tokens to fit within context limit (available space: ${availableSpace.toLocaleString()} tokens)`,
|
|
120
|
+
},
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
// Case 1: Medium response, intelligent compaction
|
|
124
|
+
logger.info("Tool response requires intelligent compaction", {
|
|
125
|
+
outputTokens,
|
|
126
|
+
targetSize,
|
|
127
|
+
availableSpace,
|
|
128
|
+
compactionLimit,
|
|
129
|
+
});
|
|
130
|
+
try {
|
|
131
|
+
// Build conversation context (last 5 messages)
|
|
132
|
+
const recentMessages = ctx.session.messages.slice(-5);
|
|
133
|
+
const conversationContext = recentMessages
|
|
134
|
+
.map((msg) => {
|
|
135
|
+
const text = msg.content
|
|
136
|
+
.filter((b) => b.type === "text")
|
|
137
|
+
.map((b) => b.text)
|
|
138
|
+
.join("\n");
|
|
139
|
+
return `${msg.role}: ${text}`;
|
|
140
|
+
})
|
|
141
|
+
.join("\n\n");
|
|
142
|
+
const compacted = await compactWithLLM(rawOutput, toolName, toolInput, conversationContext, targetSize);
|
|
143
|
+
let finalTokens = countToolResultTokens(compacted);
|
|
144
|
+
// Verify compaction stayed within boundaries
|
|
145
|
+
if (finalTokens > targetSize) {
|
|
146
|
+
logger.warn("LLM compaction exceeded target, falling back to truncation", {
|
|
147
|
+
finalTokens,
|
|
148
|
+
targetSize,
|
|
149
|
+
excess: finalTokens - targetSize,
|
|
150
|
+
});
|
|
151
|
+
// Fallback to truncation
|
|
152
|
+
const truncated = truncateToolResponse(compacted, targetSize);
|
|
153
|
+
finalTokens = countToolResultTokens(truncated);
|
|
154
|
+
return {
|
|
155
|
+
newContextEntry: null,
|
|
156
|
+
metadata: {
|
|
157
|
+
action: "compacted_then_truncated",
|
|
158
|
+
originalTokens: outputTokens,
|
|
159
|
+
finalTokens,
|
|
160
|
+
tokensSaved: outputTokens - finalTokens,
|
|
161
|
+
modifiedOutput: truncated,
|
|
162
|
+
truncationWarning: `Tool response was compacted then truncated from ${outputTokens.toLocaleString()} to ${finalTokens.toLocaleString()} tokens to fit within context limit`,
|
|
163
|
+
},
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
logger.info("Successfully compacted tool response", {
|
|
167
|
+
originalTokens: outputTokens,
|
|
168
|
+
finalTokens,
|
|
169
|
+
targetSize,
|
|
170
|
+
tokensSaved: outputTokens - finalTokens,
|
|
171
|
+
});
|
|
172
|
+
return {
|
|
173
|
+
newContextEntry: null,
|
|
174
|
+
metadata: {
|
|
175
|
+
action: "compacted",
|
|
176
|
+
originalTokens: outputTokens,
|
|
177
|
+
finalTokens,
|
|
178
|
+
tokensSaved: outputTokens - finalTokens,
|
|
179
|
+
modifiedOutput: compacted,
|
|
180
|
+
},
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
catch (error) {
|
|
184
|
+
logger.error("Compaction failed, falling back to truncation", {
|
|
185
|
+
error: error instanceof Error ? error.message : String(error),
|
|
186
|
+
});
|
|
187
|
+
// Fallback to truncation with the same target size
|
|
188
|
+
const truncated = truncateToolResponse(rawOutput, targetSize);
|
|
189
|
+
let finalTokens = countToolResultTokens(truncated);
|
|
190
|
+
// Verify truncation stayed within boundaries
|
|
191
|
+
if (finalTokens > targetSize) {
|
|
192
|
+
logger.error("Fallback truncation exceeded target, using emergency truncation", {
|
|
193
|
+
finalTokens,
|
|
194
|
+
targetSize,
|
|
195
|
+
});
|
|
196
|
+
const emergencySize = Math.floor(targetSize * 0.7);
|
|
197
|
+
const emergencyTruncated = truncateToolResponse(rawOutput, emergencySize);
|
|
198
|
+
finalTokens = countToolResultTokens(emergencyTruncated);
|
|
199
|
+
// Final safety check
|
|
200
|
+
if (finalTokens > targetSize) {
|
|
201
|
+
logger.error("Emergency truncation STILL exceeded target - using ultra-conservative fallback");
|
|
202
|
+
const ultraConservativeSize = Math.floor(targetSize * 0.5);
|
|
203
|
+
return {
|
|
204
|
+
newContextEntry: null,
|
|
205
|
+
metadata: {
|
|
206
|
+
action: "truncated",
|
|
207
|
+
originalTokens: outputTokens,
|
|
208
|
+
finalTokens: ultraConservativeSize,
|
|
209
|
+
modifiedOutput: {
|
|
210
|
+
_truncation_error: "Tool response was too large and could not be reliably truncated (compaction failed)",
|
|
211
|
+
_original_token_count: outputTokens,
|
|
212
|
+
_target_token_count: targetSize,
|
|
213
|
+
_partial_data: JSON.stringify(rawOutput).slice(0, ultraConservativeSize * 3),
|
|
214
|
+
},
|
|
215
|
+
truncationWarning: `Tool response was severely truncated from ${outputTokens.toLocaleString()} to ~${ultraConservativeSize.toLocaleString()} tokens (compaction+emergency truncation failed)`,
|
|
216
|
+
},
|
|
217
|
+
};
|
|
218
|
+
}
|
|
219
|
+
return {
|
|
220
|
+
newContextEntry: null,
|
|
221
|
+
metadata: {
|
|
222
|
+
action: "truncated",
|
|
223
|
+
originalTokens: outputTokens,
|
|
224
|
+
finalTokens,
|
|
225
|
+
modifiedOutput: emergencyTruncated,
|
|
226
|
+
truncationWarning: `Tool response was truncated from ${outputTokens.toLocaleString()} to ${finalTokens.toLocaleString()} tokens (compaction failed, emergency truncation applied)`,
|
|
227
|
+
},
|
|
228
|
+
};
|
|
229
|
+
}
|
|
230
|
+
return {
|
|
231
|
+
newContextEntry: null,
|
|
232
|
+
metadata: {
|
|
233
|
+
action: "truncated",
|
|
234
|
+
originalTokens: outputTokens,
|
|
235
|
+
finalTokens,
|
|
236
|
+
modifiedOutput: truncated,
|
|
237
|
+
truncationWarning: `Tool response was truncated from ${outputTokens.toLocaleString()} to ${finalTokens.toLocaleString()} tokens (compaction failed)`,
|
|
238
|
+
},
|
|
239
|
+
};
|
|
240
|
+
}
|
|
241
|
+
};
|
|
242
|
+
/**
|
|
243
|
+
* Recursive LLM compaction with adaptive retries
|
|
244
|
+
*/
|
|
245
|
+
async function compactWithLLM(rawOutput, toolName, toolInput, conversationContext, targetTokens) {
|
|
246
|
+
const model = new ChatAnthropic({
|
|
247
|
+
model: COMPACTION_MODEL,
|
|
248
|
+
temperature: 0,
|
|
249
|
+
});
|
|
250
|
+
// Step 1: Understand what we're looking for (only need to do this once)
|
|
251
|
+
const analysisPrompt = `You are helping to manage context size in an agent conversation.
|
|
252
|
+
|
|
253
|
+
A tool was just called with these parameters:
|
|
254
|
+
Tool: ${toolName}
|
|
255
|
+
Input: ${JSON.stringify(toolInput, null, 2)}
|
|
256
|
+
|
|
257
|
+
Recent conversation context:
|
|
258
|
+
${conversationContext}
|
|
259
|
+
|
|
260
|
+
Based on the tool input and conversation context, what key information is the user looking for from this tool response?
|
|
261
|
+
|
|
262
|
+
Provide a concise list (3-5 bullet points) of the most important elements to extract.`;
|
|
263
|
+
const analysisResponse = await model.invoke([
|
|
264
|
+
new SystemMessage("You are a helpful assistant analyzing information needs."),
|
|
265
|
+
new HumanMessage(analysisPrompt),
|
|
266
|
+
]);
|
|
267
|
+
const keyRequirements = typeof analysisResponse.content === "string"
|
|
268
|
+
? analysisResponse.content
|
|
269
|
+
: "Extract relevant information";
|
|
270
|
+
logger.info("Identified key requirements for compaction", {
|
|
271
|
+
requirements: keyRequirements.substring(0, 200),
|
|
272
|
+
});
|
|
273
|
+
// Step 2: Recursively compact until we meet the target
|
|
274
|
+
let currentData = rawOutput;
|
|
275
|
+
let currentTokens = countToolResultTokens(rawOutput);
|
|
276
|
+
const maxAttempts = 4;
|
|
277
|
+
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
278
|
+
const reductionNeeded = Math.round(((currentTokens - targetTokens) / currentTokens) * 100);
|
|
279
|
+
// Build the compaction prompt based on whether this is first attempt or retry
|
|
280
|
+
let compactionPrompt;
|
|
281
|
+
if (attempt === 0) {
|
|
282
|
+
// First attempt - compact from original
|
|
283
|
+
compactionPrompt = `You are helping to compact a large tool response to save context space.
|
|
284
|
+
|
|
285
|
+
Key information needed:
|
|
286
|
+
${keyRequirements}
|
|
287
|
+
|
|
288
|
+
Tool response to compact (JSON):
|
|
289
|
+
${JSON.stringify(currentData, null, 2)}
|
|
290
|
+
|
|
291
|
+
Current size: ${currentTokens.toLocaleString()} tokens
|
|
292
|
+
Target size: ${targetTokens.toLocaleString()} tokens (reduce by ${reductionNeeded}%)
|
|
293
|
+
|
|
294
|
+
Your task: Create a compacted version that:
|
|
295
|
+
1. Retains all information relevant to the key requirements above
|
|
296
|
+
2. Removes or summarizes less relevant details
|
|
297
|
+
3. Maintains the same JSON structure where possible
|
|
298
|
+
4. Reduces the size to ${targetTokens.toLocaleString()} tokens or less
|
|
299
|
+
5. Be aggressive in removing unnecessary data
|
|
300
|
+
|
|
301
|
+
Return ONLY valid JSON (no explanation text).`;
|
|
302
|
+
}
|
|
303
|
+
else {
|
|
304
|
+
// Retry - need to compact further
|
|
305
|
+
compactionPrompt = `Your previous compaction was good but still too large.
|
|
306
|
+
|
|
307
|
+
Key information needed:
|
|
308
|
+
${keyRequirements}
|
|
309
|
+
|
|
310
|
+
Previous compaction result (JSON):
|
|
311
|
+
${JSON.stringify(currentData, null, 2)}
|
|
312
|
+
|
|
313
|
+
Current size: ${currentTokens.toLocaleString()} tokens
|
|
314
|
+
Target size: ${targetTokens.toLocaleString()} tokens
|
|
315
|
+
You need to reduce by another ${reductionNeeded}%
|
|
316
|
+
|
|
317
|
+
Your task: Further compact this data by:
|
|
318
|
+
1. Being MORE aggressive in removing unnecessary details
|
|
319
|
+
2. Summarizing verbose content more concisely
|
|
320
|
+
3. Removing any redundant information
|
|
321
|
+
4. Keeping ONLY the most essential data related to the key requirements
|
|
322
|
+
5. Reduce to ${targetTokens.toLocaleString()} tokens or less
|
|
323
|
+
|
|
324
|
+
Return ONLY valid JSON (no explanation text).`;
|
|
325
|
+
}
|
|
326
|
+
const compactionResponse = await model.invoke([
|
|
327
|
+
new SystemMessage("You are a helpful assistant compacting data."),
|
|
328
|
+
new HumanMessage(compactionPrompt),
|
|
329
|
+
]);
|
|
330
|
+
// Extract and parse JSON
|
|
331
|
+
const responseText = typeof compactionResponse.content === "string"
|
|
332
|
+
? compactionResponse.content
|
|
333
|
+
: JSON.stringify(compactionResponse.content);
|
|
334
|
+
const jsonMatch = responseText.match(/```(?:json)?\n([\s\S]*?)\n```/) || [
|
|
335
|
+
null,
|
|
336
|
+
responseText,
|
|
337
|
+
];
|
|
338
|
+
const jsonText = jsonMatch[1] || responseText;
|
|
339
|
+
const compacted = JSON.parse(jsonText.trim());
|
|
340
|
+
const compactedTokens = countToolResultTokens(compacted);
|
|
341
|
+
logger.info(`LLM compaction attempt ${attempt + 1}/${maxAttempts}`, {
|
|
342
|
+
currentTokens,
|
|
343
|
+
compactedTokens,
|
|
344
|
+
targetTokens,
|
|
345
|
+
reductionAchieved: currentTokens - compactedTokens,
|
|
346
|
+
});
|
|
347
|
+
// Check if we've met the target
|
|
348
|
+
if (compactedTokens <= targetTokens) {
|
|
349
|
+
logger.info("LLM compaction succeeded", {
|
|
350
|
+
attempts: attempt + 1,
|
|
351
|
+
originalTokens: countToolResultTokens(rawOutput),
|
|
352
|
+
finalTokens: compactedTokens,
|
|
353
|
+
targetTokens,
|
|
354
|
+
});
|
|
355
|
+
return compacted;
|
|
356
|
+
}
|
|
357
|
+
// If we're within 5% of target, accept it (close enough)
|
|
358
|
+
if (compactedTokens <= targetTokens * 1.05) {
|
|
359
|
+
logger.info("LLM compaction close enough to target", {
|
|
360
|
+
attempts: attempt + 1,
|
|
361
|
+
finalTokens: compactedTokens,
|
|
362
|
+
targetTokens,
|
|
363
|
+
overshoot: compactedTokens - targetTokens,
|
|
364
|
+
});
|
|
365
|
+
return compacted;
|
|
366
|
+
}
|
|
367
|
+
// Still too large - prepare for another attempt
|
|
368
|
+
currentData = compacted;
|
|
369
|
+
currentTokens = compactedTokens;
|
|
370
|
+
}
|
|
371
|
+
// If we exhausted all attempts, return the last result anyway
|
|
372
|
+
logger.warn("LLM compaction exhausted attempts but did not meet target", {
|
|
373
|
+
finalTokens: currentTokens,
|
|
374
|
+
targetTokens,
|
|
375
|
+
overshoot: currentTokens - targetTokens,
|
|
376
|
+
});
|
|
377
|
+
return currentData;
|
|
378
|
+
}
|
|
379
|
+
/**
|
|
380
|
+
* Truncate tool response to target token count
|
|
381
|
+
* Uses iterative approach to ensure we stay under the target
|
|
382
|
+
*/
|
|
383
|
+
function truncateToolResponse(rawOutput, targetTokens) {
|
|
384
|
+
const currentTokens = countToolResultTokens(rawOutput);
|
|
385
|
+
if (currentTokens <= targetTokens) {
|
|
386
|
+
return rawOutput; // Already within limit
|
|
387
|
+
}
|
|
388
|
+
const outputString = JSON.stringify(rawOutput);
|
|
389
|
+
// Start with 70% of target to leave significant room for closing braces and metadata
|
|
390
|
+
let ratio = 0.7;
|
|
391
|
+
let lastResult = null;
|
|
392
|
+
// Iteratively truncate until we meet the target
|
|
393
|
+
for (let attempt = 0; attempt < 15; attempt++) {
|
|
394
|
+
// Calculate character limit based on ratio
|
|
395
|
+
const targetChars = Math.floor((targetTokens * ratio * outputString.length) / currentTokens);
|
|
396
|
+
// Truncate the JSON string
|
|
397
|
+
let truncated = outputString.slice(0, targetChars);
|
|
398
|
+
// Try to close any open JSON structures
|
|
399
|
+
const openBraces = (truncated.match(/{/g) || []).length;
|
|
400
|
+
const closeBraces = (truncated.match(/}/g) || []).length;
|
|
401
|
+
const openBrackets = (truncated.match(/\[/g) || []).length;
|
|
402
|
+
const closeBrackets = (truncated.match(/\]/g) || []).length;
|
|
403
|
+
truncated += "}".repeat(Math.max(0, openBraces - closeBraces));
|
|
404
|
+
truncated += "]".repeat(Math.max(0, openBrackets - closeBrackets));
|
|
405
|
+
try {
|
|
406
|
+
// Try to parse as valid JSON
|
|
407
|
+
const parsed = JSON.parse(truncated);
|
|
408
|
+
const parsedTokens = countToolResultTokens(parsed);
|
|
409
|
+
// Store the result
|
|
410
|
+
lastResult = { parsed, tokens: parsedTokens };
|
|
411
|
+
if (parsedTokens <= targetTokens) {
|
|
412
|
+
// Success! Add truncation notice
|
|
413
|
+
return {
|
|
414
|
+
...parsed,
|
|
415
|
+
_truncation_notice: "... [TRUNCATED - response exceeded size limit]",
|
|
416
|
+
_original_token_count: currentTokens,
|
|
417
|
+
_truncated_token_count: parsedTokens,
|
|
418
|
+
};
|
|
419
|
+
}
|
|
420
|
+
// Still too large - calculate how much we need to reduce
|
|
421
|
+
// If we overshot, reduce ratio proportionally to how much we exceeded
|
|
422
|
+
const overshootRatio = parsedTokens / targetTokens; // e.g., 1.03 if we're 3% over
|
|
423
|
+
ratio = (ratio / overshootRatio) * 0.95; // Reduce by overshoot amount plus 5% safety margin
|
|
424
|
+
logger.debug("Truncation attempt resulted in overshoot, retrying", {
|
|
425
|
+
attempt,
|
|
426
|
+
targetTokens,
|
|
427
|
+
parsedTokens,
|
|
428
|
+
overshootRatio,
|
|
429
|
+
newRatio: ratio,
|
|
430
|
+
});
|
|
431
|
+
}
|
|
432
|
+
catch {
|
|
433
|
+
// JSON parse failed, try more aggressive truncation
|
|
434
|
+
ratio *= 0.85;
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
// If we exhausted all attempts, return the last successful parse (if any)
|
|
438
|
+
// or a very conservative fallback
|
|
439
|
+
if (lastResult && lastResult.tokens <= targetTokens * 1.1) {
|
|
440
|
+
// Within 10% of target - good enough
|
|
441
|
+
logger.warn("Truncation reached attempt limit but result is close enough", {
|
|
442
|
+
targetTokens,
|
|
443
|
+
actualTokens: lastResult.tokens,
|
|
444
|
+
});
|
|
445
|
+
return {
|
|
446
|
+
...lastResult.parsed,
|
|
447
|
+
_truncation_notice: "... [TRUNCATED - response exceeded size limit]",
|
|
448
|
+
_original_token_count: currentTokens,
|
|
449
|
+
_truncated_token_count: lastResult.tokens,
|
|
450
|
+
};
|
|
451
|
+
}
|
|
452
|
+
// If all attempts failed, return a simple truncated structure
|
|
453
|
+
const safeChars = Math.floor(targetTokens * 3); // Very conservative
|
|
454
|
+
return {
|
|
455
|
+
truncated: true,
|
|
456
|
+
originalSize: currentTokens,
|
|
457
|
+
targetSize: targetTokens,
|
|
458
|
+
content: outputString.slice(0, safeChars),
|
|
459
|
+
warning: "Response was truncated due to size constraints (JSON parsing failed)",
|
|
460
|
+
};
|
|
461
|
+
}
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import { compactionTool } from "./predefined/compaction-tool";
|
|
2
|
+
import { toolResponseCompactor } from "./predefined/tool-response-compactor";
|
|
2
3
|
/**
|
|
3
4
|
* Registry of predefined hook callbacks
|
|
4
5
|
* Maps callback names to their implementations
|
|
5
6
|
*/
|
|
6
7
|
export const HOOK_REGISTRY = {
|
|
7
8
|
compaction_tool: compactionTool,
|
|
9
|
+
tool_response_compactor: toolResponseCompactor,
|
|
8
10
|
};
|
|
9
11
|
/**
|
|
10
12
|
* Check if a callback name is a predefined hook
|
|
@@ -3,7 +3,7 @@ import type { SessionMessage } from "../agent-runner";
|
|
|
3
3
|
/**
|
|
4
4
|
* Hook types supported by the agent system
|
|
5
5
|
*/
|
|
6
|
-
export type HookType = "context_size";
|
|
6
|
+
export type HookType = "context_size" | "tool_response";
|
|
7
7
|
/**
|
|
8
8
|
* Settings for context_size hook
|
|
9
9
|
*/
|
|
@@ -13,6 +13,24 @@ export interface ContextSizeSettings {
|
|
|
13
13
|
*/
|
|
14
14
|
threshold: number;
|
|
15
15
|
}
|
|
16
|
+
/**
|
|
17
|
+
* Settings for tool_response hook
|
|
18
|
+
*/
|
|
19
|
+
export interface ToolResponseSettings {
|
|
20
|
+
/**
|
|
21
|
+
* Maximum % of main model context that tool response + current context can reach
|
|
22
|
+
* If adding the tool response would exceed this, compaction is triggered
|
|
23
|
+
* Default: 80
|
|
24
|
+
*/
|
|
25
|
+
maxContextThreshold?: number | undefined;
|
|
26
|
+
/**
|
|
27
|
+
* Maximum % of compaction model context (Haiku: 200k) that a tool response can be
|
|
28
|
+
* to attempt LLM-based compaction. Larger responses are truncated instead.
|
|
29
|
+
* The truncation limit is also this percentage.
|
|
30
|
+
* Default: 30
|
|
31
|
+
*/
|
|
32
|
+
responseTruncationThreshold?: number | undefined;
|
|
33
|
+
}
|
|
16
34
|
/**
|
|
17
35
|
* Hook configuration in agent definition
|
|
18
36
|
*/
|
|
@@ -24,7 +42,7 @@ export interface HookConfig {
|
|
|
24
42
|
/**
|
|
25
43
|
* Optional hook-specific settings
|
|
26
44
|
*/
|
|
27
|
-
setting?: ContextSizeSettings | undefined;
|
|
45
|
+
setting?: ContextSizeSettings | ToolResponseSettings | undefined;
|
|
28
46
|
/**
|
|
29
47
|
* Callback reference - either a predefined hook name or a file path
|
|
30
48
|
* Examples: "compaction_tool" or "./hooks/my_compaction_tool.ts"
|
|
@@ -72,6 +90,16 @@ export interface HookContext {
|
|
|
72
90
|
* The model being used
|
|
73
91
|
*/
|
|
74
92
|
model: string;
|
|
93
|
+
/**
|
|
94
|
+
* Tool response data (only for tool_response hooks)
|
|
95
|
+
*/
|
|
96
|
+
toolResponse?: {
|
|
97
|
+
toolCallId: string;
|
|
98
|
+
toolName: string;
|
|
99
|
+
toolInput: Record<string, unknown>;
|
|
100
|
+
rawOutput: Record<string, unknown>;
|
|
101
|
+
outputTokens: number;
|
|
102
|
+
};
|
|
75
103
|
}
|
|
76
104
|
/**
|
|
77
105
|
* Result returned by hook callbacks
|
|
@@ -106,7 +134,15 @@ export declare function createContextEntry(messages: Array<{
|
|
|
106
134
|
} | {
|
|
107
135
|
type: "full";
|
|
108
136
|
message: SessionMessage;
|
|
109
|
-
}>, timestamp?: string, compactedUpTo?: number,
|
|
137
|
+
}>, timestamp?: string, compactedUpTo?: number, context_size?: {
|
|
138
|
+
systemPromptTokens: number;
|
|
139
|
+
userMessagesTokens: number;
|
|
140
|
+
assistantMessagesTokens: number;
|
|
141
|
+
toolInputTokens: number;
|
|
142
|
+
toolResultsTokens: number;
|
|
143
|
+
totalEstimated: number;
|
|
144
|
+
llmReportedInputTokens?: number | undefined;
|
|
145
|
+
}): ContextEntry;
|
|
110
146
|
/**
|
|
111
147
|
* Helper function to create a full message entry for context
|
|
112
148
|
* Use this when hooks need to inject new messages into context
|
|
@@ -2,17 +2,22 @@
|
|
|
2
2
|
* Helper function to create a new context entry
|
|
3
3
|
* Use this when hooks want to create a new context snapshot
|
|
4
4
|
*/
|
|
5
|
-
export function createContextEntry(messages, timestamp, compactedUpTo,
|
|
5
|
+
export function createContextEntry(messages, timestamp, compactedUpTo, context_size) {
|
|
6
6
|
const entry = {
|
|
7
7
|
timestamp: timestamp || new Date().toISOString(),
|
|
8
8
|
messages,
|
|
9
|
+
context_size: context_size || {
|
|
10
|
+
systemPromptTokens: 0,
|
|
11
|
+
userMessagesTokens: 0,
|
|
12
|
+
assistantMessagesTokens: 0,
|
|
13
|
+
toolInputTokens: 0,
|
|
14
|
+
toolResultsTokens: 0,
|
|
15
|
+
totalEstimated: 0,
|
|
16
|
+
},
|
|
9
17
|
};
|
|
10
18
|
if (compactedUpTo !== undefined) {
|
|
11
19
|
entry.compactedUpTo = compactedUpTo;
|
|
12
20
|
}
|
|
13
|
-
if (inputTokens !== undefined) {
|
|
14
|
-
entry.inputTokens = inputTokens;
|
|
15
|
-
}
|
|
16
21
|
return entry;
|
|
17
22
|
}
|
|
18
23
|
/**
|
package/dist/runner/index.d.ts
CHANGED
|
@@ -1,6 +1,4 @@
|
|
|
1
1
|
import type { AgentDefinition } from "../definition";
|
|
2
2
|
import { type AgentRunner } from "./agent-runner";
|
|
3
3
|
export type { AgentRunner };
|
|
4
|
-
export declare const makeRunnerFromDefinition: (
|
|
5
|
-
definition: AgentDefinition,
|
|
6
|
-
) => AgentRunner;
|
|
4
|
+
export declare const makeRunnerFromDefinition: (definition: AgentDefinition) => AgentRunner;
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
/**
|
|
3
|
+
* Custom stream events emitted by subagent tools via config.writer
|
|
4
|
+
*/
|
|
5
|
+
export declare const SubagentToolCallEventSchema: z.ZodObject<{
|
|
6
|
+
type: z.ZodLiteral<"tool_call">;
|
|
7
|
+
toolName: z.ZodString;
|
|
8
|
+
}, z.core.$strip>;
|
|
9
|
+
export declare const SubagentMessageEventSchema: z.ZodObject<{
|
|
10
|
+
type: z.ZodLiteral<"message">;
|
|
11
|
+
text: z.ZodString;
|
|
12
|
+
}, z.core.$strip>;
|
|
13
|
+
export declare const SubagentEventSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
14
|
+
type: z.ZodLiteral<"tool_call">;
|
|
15
|
+
toolName: z.ZodString;
|
|
16
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
17
|
+
type: z.ZodLiteral<"message">;
|
|
18
|
+
text: z.ZodString;
|
|
19
|
+
}, z.core.$strip>], "type">;
|
|
20
|
+
export type SubagentToolCallEvent = z.infer<typeof SubagentToolCallEventSchema>;
|
|
21
|
+
export type SubagentMessageEvent = z.infer<typeof SubagentMessageEventSchema>;
|
|
22
|
+
export type SubagentEvent = z.infer<typeof SubagentEventSchema>;
|
|
23
|
+
/**
|
|
24
|
+
* Wrapper for subagent events that includes the parent tool call ID
|
|
25
|
+
*/
|
|
26
|
+
export declare const CustomStreamChunkSchema: z.ZodObject<{
|
|
27
|
+
parentToolCallId: z.ZodString;
|
|
28
|
+
event: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
29
|
+
type: z.ZodLiteral<"tool_call">;
|
|
30
|
+
toolName: z.ZodString;
|
|
31
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
32
|
+
type: z.ZodLiteral<"message">;
|
|
33
|
+
text: z.ZodString;
|
|
34
|
+
}, z.core.$strip>], "type">;
|
|
35
|
+
}, z.core.$strip>;
|
|
36
|
+
export type CustomStreamChunk = z.infer<typeof CustomStreamChunkSchema>;
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
/**
|
|
3
|
+
* Custom stream events emitted by subagent tools via config.writer
|
|
4
|
+
*/
|
|
5
|
+
export const SubagentToolCallEventSchema = z.object({
|
|
6
|
+
type: z.literal("tool_call"),
|
|
7
|
+
toolName: z.string(),
|
|
8
|
+
});
|
|
9
|
+
export const SubagentMessageEventSchema = z.object({
|
|
10
|
+
type: z.literal("message"),
|
|
11
|
+
text: z.string(),
|
|
12
|
+
});
|
|
13
|
+
export const SubagentEventSchema = z.discriminatedUnion("type", [
|
|
14
|
+
SubagentToolCallEventSchema,
|
|
15
|
+
SubagentMessageEventSchema,
|
|
16
|
+
]);
|
|
17
|
+
/**
|
|
18
|
+
* Wrapper for subagent events that includes the parent tool call ID
|
|
19
|
+
*/
|
|
20
|
+
export const CustomStreamChunkSchema = z.object({
|
|
21
|
+
parentToolCallId: z.string(),
|
|
22
|
+
event: SubagentEventSchema,
|
|
23
|
+
});
|