@townco/agent 0.1.102 → 0.1.104
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/acp-server/adapter.d.ts +10 -0
- package/dist/acp-server/adapter.js +101 -31
- package/dist/definition/index.d.ts +17 -4
- package/dist/definition/index.js +19 -2
- package/dist/runner/agent-runner.d.ts +6 -2
- package/dist/runner/hooks/executor.d.ts +5 -3
- package/dist/runner/hooks/executor.js +190 -150
- package/dist/runner/hooks/loader.d.ts +13 -1
- package/dist/runner/hooks/loader.js +27 -0
- package/dist/runner/hooks/predefined/compaction-tool.d.ts +3 -1
- package/dist/runner/hooks/predefined/compaction-tool.js +38 -2
- package/dist/runner/hooks/predefined/context-validator.d.ts +57 -0
- package/dist/runner/hooks/predefined/context-validator.js +92 -0
- package/dist/runner/hooks/predefined/document-context-extractor/chunk-manager.js +2 -2
- package/dist/runner/hooks/predefined/document-context-extractor/content-extractor.js +29 -0
- package/dist/runner/hooks/predefined/document-context-extractor/relevance-scorer.js +29 -0
- package/dist/runner/hooks/predefined/mid-turn-compaction.d.ts +17 -0
- package/dist/runner/hooks/predefined/mid-turn-compaction.js +224 -0
- package/dist/runner/hooks/predefined/token-utils.d.ts +11 -0
- package/dist/runner/hooks/predefined/token-utils.js +13 -0
- package/dist/runner/hooks/predefined/tool-response-compactor.js +155 -25
- package/dist/runner/hooks/registry.js +2 -0
- package/dist/runner/hooks/types.d.ts +37 -4
- package/dist/runner/index.d.ts +6 -2
- package/dist/runner/langchain/index.js +60 -8
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +7 -7
|
@@ -2,7 +2,9 @@ import Anthropic from "@anthropic-ai/sdk";
|
|
|
2
2
|
import { createLogger } from "../../../logger.js";
|
|
3
3
|
import { telemetry } from "../../../telemetry/index.js";
|
|
4
4
|
import { countToolResultTokens } from "../../../utils/token-counter.js";
|
|
5
|
+
import { isContextOverflowError, validatePromptFits, } from "./context-validator.js";
|
|
5
6
|
import { extractDocumentContext } from "./document-context-extractor/index.js";
|
|
7
|
+
import { applyTokenPadding } from "./token-utils.js";
|
|
6
8
|
const logger = createLogger("tool-response-compactor");
|
|
7
9
|
// Create Anthropic client directly (not using LangChain)
|
|
8
10
|
// This ensures compaction LLM calls don't get captured by LangGraph's streaming
|
|
@@ -16,8 +18,8 @@ const COMPACTION_MODEL_CONTEXT = 200000; // Haiku context size for calculating t
|
|
|
16
18
|
* Tool response compaction hook - compacts or truncates large tool responses
|
|
17
19
|
* to prevent context overflow
|
|
18
20
|
*/
|
|
19
|
-
//
|
|
20
|
-
const
|
|
21
|
+
// Default tools to skip compaction for
|
|
22
|
+
const DEFAULT_SKIP_TOOLS = ["todo_write"];
|
|
21
23
|
export const toolResponseCompactor = async (ctx) => {
|
|
22
24
|
// Only process if we have tool response data
|
|
23
25
|
if (!ctx.toolResponse) {
|
|
@@ -25,14 +27,15 @@ export const toolResponseCompactor = async (ctx) => {
|
|
|
25
27
|
return { newContextEntry: null };
|
|
26
28
|
}
|
|
27
29
|
const { toolCallId, toolName, toolInput, rawOutput, outputTokens } = ctx.toolResponse;
|
|
28
|
-
//
|
|
29
|
-
|
|
30
|
-
|
|
30
|
+
// Get settings from callbackSetting - each callback has its own settings
|
|
31
|
+
const settings = ctx.callbackSetting;
|
|
32
|
+
const maxTokensSize = settings?.maxTokensSize ?? 20000; // Default: 20000 tokens
|
|
33
|
+
const skipTools = new Set(settings?.skipTools ?? DEFAULT_SKIP_TOOLS);
|
|
34
|
+
// Skip compaction for configured tools
|
|
35
|
+
if (skipTools.has(toolName)) {
|
|
36
|
+
logger.debug("Skipping compaction for tool", { toolName });
|
|
31
37
|
return { newContextEntry: null };
|
|
32
38
|
}
|
|
33
|
-
// Get settings from hook configuration
|
|
34
|
-
const settings = ctx.session.requestParams.hookSettings;
|
|
35
|
-
const maxTokensSize = settings?.maxTokensSize ?? 20000; // Default: 20000 tokens
|
|
36
39
|
// Use maxTokensSize directly as it's now in tokens
|
|
37
40
|
const maxAllowedResponseSize = maxTokensSize;
|
|
38
41
|
// Calculate available space in context
|
|
@@ -42,12 +45,16 @@ export const toolResponseCompactor = async (ctx) => {
|
|
|
42
45
|
? Math.floor(availableSpace * 0.9)
|
|
43
46
|
: maxAllowedResponseSize;
|
|
44
47
|
// Calculate compaction limit: max response size that can fit in a single LLM compaction call
|
|
45
|
-
|
|
46
|
-
const
|
|
48
|
+
// Increased overhead to 25K to account for system prompt, conversation context, JSON serialization
|
|
49
|
+
const COMPACTION_OVERHEAD = 25000;
|
|
50
|
+
const compactionLimit = Math.floor((COMPACTION_MODEL_CONTEXT - COMPACTION_OVERHEAD) * 0.9); // ~157K tokens
|
|
51
|
+
// Apply 10% padding to output tokens to account for estimation inaccuracies
|
|
52
|
+
const paddedOutputTokens = applyTokenPadding(outputTokens);
|
|
47
53
|
logger.info("Tool response compaction hook triggered", {
|
|
48
54
|
toolCallId,
|
|
49
55
|
toolName,
|
|
50
56
|
outputTokens,
|
|
57
|
+
paddedOutputTokens,
|
|
51
58
|
currentContext: ctx.currentTokens,
|
|
52
59
|
maxTokens: ctx.maxTokens,
|
|
53
60
|
maxAllowedResponseSize,
|
|
@@ -56,8 +63,8 @@ export const toolResponseCompactor = async (ctx) => {
|
|
|
56
63
|
compactionLimit,
|
|
57
64
|
settings,
|
|
58
65
|
});
|
|
59
|
-
// Case 0: Small response, no action needed
|
|
60
|
-
if (
|
|
66
|
+
// Case 0: Small response, no action needed (using padded tokens for safety)
|
|
67
|
+
if (paddedOutputTokens <= effectiveMaxResponseSize) {
|
|
61
68
|
logger.info("Tool response fits within threshold, no compaction needed");
|
|
62
69
|
return {
|
|
63
70
|
newContextEntry: null,
|
|
@@ -83,7 +90,8 @@ export const toolResponseCompactor = async (ctx) => {
|
|
|
83
90
|
contextAlreadyOverThreshold: availableSpace <= maxAllowedResponseSize,
|
|
84
91
|
});
|
|
85
92
|
// Case 2: Huge response - use document context extractor (with truncation fallback)
|
|
86
|
-
|
|
93
|
+
// Use padded tokens for safety margin
|
|
94
|
+
if (paddedOutputTokens >= compactionLimit) {
|
|
87
95
|
logger.info("Tool response exceeds compaction capacity, using document context extractor", {
|
|
88
96
|
outputTokens,
|
|
89
97
|
compactionLimit,
|
|
@@ -180,23 +188,24 @@ export const toolResponseCompactor = async (ctx) => {
|
|
|
180
188
|
const finalTokens = countToolResultTokens(compacted);
|
|
181
189
|
// Verify compaction stayed within boundaries
|
|
182
190
|
if (finalTokens > targetSize) {
|
|
183
|
-
|
|
191
|
+
// Compaction exceeded target - log warning but accept the result
|
|
192
|
+
// The next callback in the chain (compaction-tool) will handle context overflow if needed
|
|
193
|
+
logger.warn("LLM compaction exceeded target, accepting result anyway", {
|
|
184
194
|
finalTokens,
|
|
185
195
|
targetSize,
|
|
186
196
|
excess: finalTokens - targetSize,
|
|
187
197
|
toolName,
|
|
188
198
|
toolCallId,
|
|
189
199
|
});
|
|
190
|
-
throw new Error(`LLM compaction for tool "${toolName}" exceeded target size. ` +
|
|
191
|
-
`Compacted to ${finalTokens.toLocaleString()} tokens but target was ${targetSize.toLocaleString()}. ` +
|
|
192
|
-
`Original response was ${outputTokens.toLocaleString()} tokens.`);
|
|
193
200
|
}
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
201
|
+
else {
|
|
202
|
+
logger.info("Successfully compacted tool response", {
|
|
203
|
+
originalTokens: outputTokens,
|
|
204
|
+
finalTokens,
|
|
205
|
+
targetSize,
|
|
206
|
+
tokensSaved: outputTokens - finalTokens,
|
|
207
|
+
});
|
|
208
|
+
}
|
|
200
209
|
return {
|
|
201
210
|
newContextEntry: null,
|
|
202
211
|
metadata: {
|
|
@@ -205,17 +214,20 @@ export const toolResponseCompactor = async (ctx) => {
|
|
|
205
214
|
finalTokens,
|
|
206
215
|
tokensSaved: outputTokens - finalTokens,
|
|
207
216
|
modifiedOutput: compacted,
|
|
217
|
+
compactionMethod: finalTokens > targetSize ? "llm_exceeded_target" : "llm",
|
|
208
218
|
},
|
|
209
219
|
};
|
|
210
220
|
}
|
|
211
221
|
catch (error) {
|
|
212
|
-
|
|
222
|
+
// Compaction failed - throw error to stop the callback chain
|
|
223
|
+
// The error will be returned to the agent so it can decide what to do
|
|
224
|
+
logger.error("Tool response compaction failed", {
|
|
213
225
|
error: error instanceof Error ? error.message : String(error),
|
|
214
226
|
toolName,
|
|
215
227
|
toolCallId,
|
|
216
228
|
outputTokens,
|
|
217
229
|
});
|
|
218
|
-
throw new Error(`
|
|
230
|
+
throw new Error(`Tool response compaction failed for "${toolName}": ${error instanceof Error ? error.message : String(error)}. Response was ${outputTokens.toLocaleString()} tokens.`);
|
|
219
231
|
}
|
|
220
232
|
};
|
|
221
233
|
/**
|
|
@@ -235,6 +247,17 @@ ${conversationContext}
|
|
|
235
247
|
Based on the tool input and conversation context, what key information is the user looking for from this tool response?
|
|
236
248
|
|
|
237
249
|
Provide a concise list (3-5 bullet points) of the most important elements to extract.`;
|
|
250
|
+
// Pre-flight validation: ensure analysis prompt fits in compaction model's context
|
|
251
|
+
const analysisValidation = validatePromptFits(analysisPrompt, COMPACTION_MODEL, 0.1);
|
|
252
|
+
if (!analysisValidation.isValid) {
|
|
253
|
+
logger.warn("Analysis prompt too large for compaction model, using default requirements", {
|
|
254
|
+
promptTokens: analysisValidation.totalTokens,
|
|
255
|
+
maxAllowed: analysisValidation.maxAllowedTokens,
|
|
256
|
+
});
|
|
257
|
+
// Skip analysis and use generic requirements - will rely on compaction step
|
|
258
|
+
const keyRequirements = "Extract the most important and relevant information from this tool response.";
|
|
259
|
+
return compactWithLLMInternal(rawOutput, keyRequirements, targetTokens);
|
|
260
|
+
}
|
|
238
261
|
// Create OTEL span for analysis call
|
|
239
262
|
const analysisSpan = telemetry.startSpan("compaction.analysis", {
|
|
240
263
|
"gen_ai.operation.name": "chat",
|
|
@@ -324,6 +347,19 @@ Your task: Further compact this data by:
|
|
|
324
347
|
|
|
325
348
|
Return ONLY valid JSON (no explanation text).`;
|
|
326
349
|
}
|
|
350
|
+
// Pre-flight validation: ensure compaction prompt fits in compaction model's context
|
|
351
|
+
const compactionValidation = validatePromptFits(compactionPrompt, COMPACTION_MODEL, 0.1);
|
|
352
|
+
if (!compactionValidation.isValid) {
|
|
353
|
+
logger.warn("Compaction prompt too large for LLM, cannot compact further", {
|
|
354
|
+
attempt: attempt + 1,
|
|
355
|
+
promptTokens: compactionValidation.totalTokens,
|
|
356
|
+
maxAllowed: compactionValidation.maxAllowedTokens,
|
|
357
|
+
currentDataTokens: currentTokens,
|
|
358
|
+
});
|
|
359
|
+
// Return what we have - the caller will need to handle this via chunking
|
|
360
|
+
throw new Error(`Compaction prompt exceeds model context (${compactionValidation.totalTokens} tokens > ${compactionValidation.maxAllowedTokens} max). ` +
|
|
361
|
+
`Current data is ${currentTokens} tokens.`);
|
|
362
|
+
}
|
|
327
363
|
// Create OTEL span for compaction call
|
|
328
364
|
const compactionSpan = telemetry.startSpan("compaction.compact", {
|
|
329
365
|
"gen_ai.operation.name": "chat",
|
|
@@ -354,6 +390,16 @@ Return ONLY valid JSON (no explanation text).`;
|
|
|
354
390
|
}
|
|
355
391
|
catch (error) {
|
|
356
392
|
telemetry.endSpan(compactionSpan, error);
|
|
393
|
+
// Check if this is a context overflow error from the API
|
|
394
|
+
if (isContextOverflowError(error)) {
|
|
395
|
+
logger.warn("Context overflow error from compaction API, returning current data", {
|
|
396
|
+
attempt: attempt + 1,
|
|
397
|
+
currentTokens,
|
|
398
|
+
error: error instanceof Error ? error.message : String(error),
|
|
399
|
+
});
|
|
400
|
+
// Return what we have - better than crashing
|
|
401
|
+
return currentData;
|
|
402
|
+
}
|
|
357
403
|
throw error;
|
|
358
404
|
}
|
|
359
405
|
// Extract and parse JSON
|
|
@@ -404,3 +450,87 @@ Return ONLY valid JSON (no explanation text).`;
|
|
|
404
450
|
});
|
|
405
451
|
return currentData;
|
|
406
452
|
}
|
|
453
|
+
/**
|
|
454
|
+
* Internal helper for compaction when analysis is skipped
|
|
455
|
+
* Uses generic key requirements and goes directly to compaction
|
|
456
|
+
*/
|
|
457
|
+
async function compactWithLLMInternal(rawOutput, keyRequirements, targetTokens) {
|
|
458
|
+
let currentData = rawOutput;
|
|
459
|
+
let currentTokens = countToolResultTokens(rawOutput);
|
|
460
|
+
const maxAttempts = 4;
|
|
461
|
+
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
462
|
+
const reductionNeeded = Math.round(((currentTokens - targetTokens) / currentTokens) * 100);
|
|
463
|
+
const compactionPrompt = `You are helping to compact a large tool response to save context space.
|
|
464
|
+
|
|
465
|
+
Key information needed:
|
|
466
|
+
${keyRequirements}
|
|
467
|
+
|
|
468
|
+
Data to compact (JSON):
|
|
469
|
+
${JSON.stringify(currentData, null, 2)}
|
|
470
|
+
|
|
471
|
+
Current size: ${currentTokens.toLocaleString()} tokens
|
|
472
|
+
Target size: ${targetTokens.toLocaleString()} tokens (reduce by ${reductionNeeded}%)
|
|
473
|
+
|
|
474
|
+
Your task: Create a compacted version that:
|
|
475
|
+
1. Retains all information relevant to the key requirements above
|
|
476
|
+
2. Removes or summarizes less relevant details
|
|
477
|
+
3. Maintains the same JSON structure where possible
|
|
478
|
+
4. Reduces the size to ${targetTokens.toLocaleString()} tokens or less
|
|
479
|
+
5. Be aggressive in removing unnecessary data
|
|
480
|
+
|
|
481
|
+
Return ONLY valid JSON (no explanation text).`;
|
|
482
|
+
// Pre-flight validation
|
|
483
|
+
const validation = validatePromptFits(compactionPrompt, COMPACTION_MODEL, 0.1);
|
|
484
|
+
if (!validation.isValid) {
|
|
485
|
+
logger.warn("Internal compaction prompt too large", {
|
|
486
|
+
attempt: attempt + 1,
|
|
487
|
+
promptTokens: validation.totalTokens,
|
|
488
|
+
maxAllowed: validation.maxAllowedTokens,
|
|
489
|
+
});
|
|
490
|
+
// Return what we have - caller will need to handle via chunking
|
|
491
|
+
return currentData;
|
|
492
|
+
}
|
|
493
|
+
const compactionSpan = telemetry.startSpan("compaction.compact.internal", {
|
|
494
|
+
"gen_ai.operation.name": "chat",
|
|
495
|
+
"gen_ai.provider.name": "anthropic",
|
|
496
|
+
"gen_ai.request.model": COMPACTION_MODEL,
|
|
497
|
+
"compaction.attempt": attempt + 1,
|
|
498
|
+
"compaction.target_tokens": targetTokens,
|
|
499
|
+
"compaction.current_tokens": currentTokens,
|
|
500
|
+
});
|
|
501
|
+
try {
|
|
502
|
+
const compactionResponse = await telemetry.withActiveSpanAsync(compactionSpan, () => anthropic.messages.create({
|
|
503
|
+
model: COMPACTION_MODEL,
|
|
504
|
+
max_tokens: 4096,
|
|
505
|
+
temperature: 0,
|
|
506
|
+
system: "You are a helpful assistant compacting data.",
|
|
507
|
+
messages: [{ role: "user", content: compactionPrompt }],
|
|
508
|
+
}));
|
|
509
|
+
telemetry.recordTokenUsage(compactionResponse.usage.input_tokens, compactionResponse.usage.output_tokens, compactionSpan);
|
|
510
|
+
telemetry.endSpan(compactionSpan);
|
|
511
|
+
const firstContent = compactionResponse.content[0];
|
|
512
|
+
const responseText = firstContent && firstContent.type === "text" ? firstContent.text : "";
|
|
513
|
+
const jsonMatch = responseText.match(/```(?:json)?\n([\s\S]*?)\n```/) || [
|
|
514
|
+
null,
|
|
515
|
+
responseText,
|
|
516
|
+
];
|
|
517
|
+
const jsonText = jsonMatch[1] || responseText;
|
|
518
|
+
const compacted = JSON.parse(jsonText.trim());
|
|
519
|
+
const compactedTokens = countToolResultTokens(compacted);
|
|
520
|
+
if (compactedTokens <= targetTokens ||
|
|
521
|
+
compactedTokens <= targetTokens * 1.05) {
|
|
522
|
+
return compacted;
|
|
523
|
+
}
|
|
524
|
+
currentData = compacted;
|
|
525
|
+
currentTokens = compactedTokens;
|
|
526
|
+
}
|
|
527
|
+
catch (error) {
|
|
528
|
+
telemetry.endSpan(compactionSpan, error);
|
|
529
|
+
if (isContextOverflowError(error)) {
|
|
530
|
+
return currentData;
|
|
531
|
+
}
|
|
532
|
+
throw error;
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
return currentData;
|
|
536
|
+
}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { compactionTool } from "./predefined/compaction-tool";
|
|
2
|
+
import { midTurnCompaction } from "./predefined/mid-turn-compaction";
|
|
2
3
|
import { toolResponseCompactor } from "./predefined/tool-response-compactor";
|
|
3
4
|
/**
|
|
4
5
|
* Registry of predefined hook callbacks
|
|
@@ -6,6 +7,7 @@ import { toolResponseCompactor } from "./predefined/tool-response-compactor";
|
|
|
6
7
|
*/
|
|
7
8
|
export const HOOK_REGISTRY = {
|
|
8
9
|
compaction_tool: compactionTool,
|
|
10
|
+
mid_turn_compaction: midTurnCompaction,
|
|
9
11
|
tool_response_compactor: toolResponseCompactor,
|
|
10
12
|
};
|
|
11
13
|
/**
|
|
@@ -22,6 +22,7 @@ export interface ContextSizeSettings {
|
|
|
22
22
|
}
|
|
23
23
|
/**
|
|
24
24
|
* Settings for tool_response hook
|
|
25
|
+
* @deprecated Use CallbackConfig.setting instead for individual callback settings
|
|
25
26
|
*/
|
|
26
27
|
export interface ToolResponseSettings {
|
|
27
28
|
/**
|
|
@@ -31,6 +32,21 @@ export interface ToolResponseSettings {
|
|
|
31
32
|
*/
|
|
32
33
|
maxTokensSize?: number | undefined;
|
|
33
34
|
}
|
|
35
|
+
/**
|
|
36
|
+
* Individual callback configuration with its own settings
|
|
37
|
+
*/
|
|
38
|
+
export interface CallbackConfig {
|
|
39
|
+
/**
|
|
40
|
+
* Callback reference - either a predefined hook name or a file path
|
|
41
|
+
* Examples: "compaction_tool" or "./hooks/my_compaction_tool.ts"
|
|
42
|
+
*/
|
|
43
|
+
name: string;
|
|
44
|
+
/**
|
|
45
|
+
* Callback-specific settings
|
|
46
|
+
* The callback is responsible for interpreting these settings
|
|
47
|
+
*/
|
|
48
|
+
setting?: Record<string, unknown> | undefined;
|
|
49
|
+
}
|
|
34
50
|
/**
|
|
35
51
|
* Hook configuration in agent definition
|
|
36
52
|
*/
|
|
@@ -40,14 +56,21 @@ export interface HookConfig {
|
|
|
40
56
|
*/
|
|
41
57
|
type: HookType;
|
|
42
58
|
/**
|
|
43
|
-
*
|
|
59
|
+
* @deprecated Use callbacks array instead for new configurations.
|
|
60
|
+
* Single callback reference - either a predefined hook name or a file path
|
|
61
|
+
* Examples: "compaction_tool" or "./hooks/my_compaction_tool.ts"
|
|
62
|
+
*/
|
|
63
|
+
callback?: string | undefined;
|
|
64
|
+
/**
|
|
65
|
+
* @deprecated Use CallbackConfig.setting in callbacks array instead.
|
|
66
|
+
* Optional hook-specific settings (only used with deprecated callback field)
|
|
44
67
|
*/
|
|
45
68
|
setting?: ContextSizeSettings | ToolResponseSettings | undefined;
|
|
46
69
|
/**
|
|
47
|
-
*
|
|
48
|
-
*
|
|
70
|
+
* Array of callback configurations to execute in order.
|
|
71
|
+
* Each callback has its own settings and decides whether to run.
|
|
49
72
|
*/
|
|
50
|
-
|
|
73
|
+
callbacks?: CallbackConfig[] | undefined;
|
|
51
74
|
}
|
|
52
75
|
/**
|
|
53
76
|
* Read-only view of an agent session for hooks
|
|
@@ -102,6 +125,11 @@ export interface HookContext {
|
|
|
102
125
|
* Storage interface for hooks that need to persist data
|
|
103
126
|
*/
|
|
104
127
|
storage?: HookStorageInterface | undefined;
|
|
128
|
+
/**
|
|
129
|
+
* Settings for the current callback being executed.
|
|
130
|
+
* Each callback receives its own settings from CallbackConfig.setting
|
|
131
|
+
*/
|
|
132
|
+
callbackSetting?: Record<string, unknown> | undefined;
|
|
105
133
|
/**
|
|
106
134
|
* Tool response data (only for tool_response hooks)
|
|
107
135
|
*/
|
|
@@ -112,6 +140,11 @@ export interface HookContext {
|
|
|
112
140
|
rawOutput: Record<string, unknown>;
|
|
113
141
|
outputTokens: number;
|
|
114
142
|
};
|
|
143
|
+
/**
|
|
144
|
+
* Token count of the pending tool response (only for tool_response hooks).
|
|
145
|
+
* This is recalculated after each callback in the chain to reflect any modifications.
|
|
146
|
+
*/
|
|
147
|
+
toolResponseTokens?: number | undefined;
|
|
115
148
|
}
|
|
116
149
|
/**
|
|
117
150
|
* Result returned by hook callbacks
|
package/dist/runner/index.d.ts
CHANGED
|
@@ -36,12 +36,16 @@ export declare const makeRunnerFromDefinition: (definition: {
|
|
|
36
36
|
harnessImplementation?: "langchain" | undefined;
|
|
37
37
|
hooks?: {
|
|
38
38
|
type: "context_size" | "tool_response";
|
|
39
|
-
setting?: {
|
|
39
|
+
setting?: Record<string, unknown> | {
|
|
40
40
|
threshold: number;
|
|
41
41
|
} | {
|
|
42
42
|
maxTokensSize?: number | undefined;
|
|
43
43
|
} | undefined;
|
|
44
|
-
callback
|
|
44
|
+
callback?: string | undefined;
|
|
45
|
+
callbacks?: {
|
|
46
|
+
name: string;
|
|
47
|
+
setting?: Record<string, unknown> | undefined;
|
|
48
|
+
}[] | undefined;
|
|
45
49
|
}[] | undefined;
|
|
46
50
|
initialMessage?: {
|
|
47
51
|
enabled: boolean;
|
|
@@ -8,6 +8,8 @@ import { z } from "zod";
|
|
|
8
8
|
import { SUBAGENT_MODE_KEY } from "../../acp-server/adapter";
|
|
9
9
|
import { createLogger } from "../../logger.js";
|
|
10
10
|
import { telemetry } from "../../telemetry/index.js";
|
|
11
|
+
import { calculateContextSize } from "../../utils/context-size-calculator.js";
|
|
12
|
+
import { getModelContextWindow } from "../hooks/constants.js";
|
|
11
13
|
import { bindGeneratorToAbortSignal, bindGeneratorToSessionContext, getAbortSignal, runWithAbortSignal, } from "../session-context";
|
|
12
14
|
import { loadCustomToolModule, } from "../tool-loader.js";
|
|
13
15
|
import { createModelFromString, detectProvider } from "./model-factory.js";
|
|
@@ -384,6 +386,53 @@ export class LangchainAgent {
|
|
|
384
386
|
toolOverheadTokens,
|
|
385
387
|
mcpOverheadTokens,
|
|
386
388
|
};
|
|
389
|
+
// Calculate accurate context size for tool response compaction decisions
|
|
390
|
+
// This includes: system prompt, tool overhead, MCP overhead, and message history
|
|
391
|
+
const baseSystemPromptTokens = this.definition.systemPrompt
|
|
392
|
+
? countTokens(this.definition.systemPrompt)
|
|
393
|
+
: 0;
|
|
394
|
+
// Estimate additional injection tokens based on enabled features
|
|
395
|
+
// These will be injected into the system prompt later
|
|
396
|
+
const hasWebSearchToolsForEstimate = builtInNames.includes("web_search") ||
|
|
397
|
+
builtInNames.includes("town_web_search");
|
|
398
|
+
const hasLibraryToolsForEstimate = enabledTools.some((t) => t.name.startsWith("library__") ||
|
|
399
|
+
t.name.includes("get_document") ||
|
|
400
|
+
t.name.includes("retrieve_document") ||
|
|
401
|
+
t.name.includes("search_document"));
|
|
402
|
+
const hasSubagentToolsForEstimate = enabledTools.some((t) => t.name === SUBAGENT_TOOL_NAME);
|
|
403
|
+
// Rough estimate for injection overhead (citations, subagent instructions, date/time)
|
|
404
|
+
// This is approximate but adds safety margin for the compaction decision
|
|
405
|
+
const injectionOverheadEstimate = (hasWebSearchToolsForEstimate ? 500 : 0) + // Citation instructions
|
|
406
|
+
(hasLibraryToolsForEstimate ? 400 : 0) + // Document citation instructions
|
|
407
|
+
(hasSubagentToolsForEstimate ? 300 : 0) + // Subagent citation instructions
|
|
408
|
+
200; // Date/time injection + safety margin
|
|
409
|
+
const systemPromptTokensEstimate = baseSystemPromptTokens +
|
|
410
|
+
todoInstructionsTokens +
|
|
411
|
+
injectionOverheadEstimate;
|
|
412
|
+
// Calculate message history tokens from context messages
|
|
413
|
+
const messageHistoryContext = calculateContextSize(req.contextMessages || [], undefined, // Don't double count system prompt
|
|
414
|
+
0, // Don't double count tool overhead
|
|
415
|
+
0);
|
|
416
|
+
const messageHistoryTokens = messageHistoryContext.userMessagesTokens +
|
|
417
|
+
messageHistoryContext.assistantMessagesTokens +
|
|
418
|
+
messageHistoryContext.toolInputTokens +
|
|
419
|
+
messageHistoryContext.toolResultsTokens;
|
|
420
|
+
// Total base context (everything except new tool outputs in this turn)
|
|
421
|
+
const baseContextTokens = systemPromptTokensEstimate +
|
|
422
|
+
toolOverheadTokens +
|
|
423
|
+
mcpOverheadTokens +
|
|
424
|
+
messageHistoryTokens;
|
|
425
|
+
const modelContextWindow = getModelContextWindow(this.definition.model);
|
|
426
|
+
_logger.debug("Calculated accurate context size for compaction", {
|
|
427
|
+
baseSystemPromptTokens,
|
|
428
|
+
injectionOverheadEstimate,
|
|
429
|
+
systemPromptTokensEstimate,
|
|
430
|
+
toolOverheadTokens,
|
|
431
|
+
mcpOverheadTokens,
|
|
432
|
+
messageHistoryTokens,
|
|
433
|
+
baseContextTokens,
|
|
434
|
+
modelContextWindow,
|
|
435
|
+
});
|
|
387
436
|
// Wrap tools with response compaction if hook is configured
|
|
388
437
|
const hooks = this.definition.hooks ?? [];
|
|
389
438
|
const hasToolResponseHook = hooks.some((h) => h.type === "tool_response");
|
|
@@ -418,10 +467,9 @@ export class LangchainAgent {
|
|
|
418
467
|
cumulativeToolOutputTokens,
|
|
419
468
|
});
|
|
420
469
|
// Calculate current context including all tool outputs so far in this turn
|
|
470
|
+
// Uses accurate baseContextTokens calculated earlier (system prompt, tool overhead, MCP overhead, message history)
|
|
421
471
|
// This ensures we account for multiple large tool calls in the same turn
|
|
422
|
-
const baseContextTokens = turnTokenUsage.inputTokens || 10000;
|
|
423
472
|
const currentTokens = baseContextTokens + cumulativeToolOutputTokens;
|
|
424
|
-
const maxTokens = 200000; // Claude's limit
|
|
425
473
|
// Build proper hook context with all required fields
|
|
426
474
|
const hookContext = {
|
|
427
475
|
session: {
|
|
@@ -433,8 +481,8 @@ export class LangchainAgent {
|
|
|
433
481
|
},
|
|
434
482
|
},
|
|
435
483
|
currentTokens,
|
|
436
|
-
maxTokens,
|
|
437
|
-
percentage: (currentTokens /
|
|
484
|
+
maxTokens: modelContextWindow,
|
|
485
|
+
percentage: (currentTokens / modelContextWindow) * 100,
|
|
438
486
|
model: this.definition.model,
|
|
439
487
|
agent: this.definition,
|
|
440
488
|
toolResponse: {
|
|
@@ -628,14 +676,18 @@ export class LangchainAgent {
|
|
|
628
676
|
if (hasSubagentTool) {
|
|
629
677
|
agentConfig.systemPrompt = `${agentConfig.systemPrompt ?? ""}\n\n${SUBAGENT_CITATION_INSTRUCTIONS}`;
|
|
630
678
|
}
|
|
631
|
-
//
|
|
679
|
+
// Inject current date/time into system prompt
|
|
680
|
+
const currentDateTime = getCurrentDateTimeString();
|
|
681
|
+
// First, replace any template variables if they exist
|
|
632
682
|
if (agentConfig.systemPrompt) {
|
|
633
|
-
const currentDateTime = getCurrentDateTimeString();
|
|
634
|
-
// Replace {{.CurrentDate}} template variable
|
|
635
683
|
agentConfig.systemPrompt = agentConfig.systemPrompt.replace(/\{\{\.CurrentDate\}\}/g, currentDateTime);
|
|
636
|
-
// Replace {{.CurrentDateTime}} template variable (alias)
|
|
637
684
|
agentConfig.systemPrompt = agentConfig.systemPrompt.replace(/\{\{\.CurrentDateTime\}\}/g, currentDateTime);
|
|
638
685
|
}
|
|
686
|
+
// Always append current date/time information (if not already present via template)
|
|
687
|
+
const dateInfoLine = `The current date and time is ${currentDateTime}.`;
|
|
688
|
+
if (!agentConfig.systemPrompt?.includes(currentDateTime)) {
|
|
689
|
+
agentConfig.systemPrompt = `${agentConfig.systemPrompt ?? ""}\n\n${dateInfoLine}`;
|
|
690
|
+
}
|
|
639
691
|
// Apply prompt parameters from request (user-selected per-message options)
|
|
640
692
|
if (req.promptParameters && this.definition.promptParameters) {
|
|
641
693
|
for (const param of this.definition.promptParameters) {
|