@townco/agent 0.1.102 → 0.1.105
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/acp-server/adapter.d.ts +10 -0
- package/dist/acp-server/adapter.js +101 -31
- package/dist/definition/index.d.ts +17 -4
- package/dist/definition/index.js +19 -2
- package/dist/runner/agent-runner.d.ts +6 -2
- package/dist/runner/hooks/executor.d.ts +5 -3
- package/dist/runner/hooks/executor.js +190 -150
- package/dist/runner/hooks/loader.d.ts +13 -1
- package/dist/runner/hooks/loader.js +27 -0
- package/dist/runner/hooks/predefined/compaction-tool.d.ts +3 -1
- package/dist/runner/hooks/predefined/compaction-tool.js +38 -2
- package/dist/runner/hooks/predefined/context-validator.d.ts +57 -0
- package/dist/runner/hooks/predefined/context-validator.js +92 -0
- package/dist/runner/hooks/predefined/document-context-extractor/chunk-manager.js +2 -2
- package/dist/runner/hooks/predefined/document-context-extractor/content-extractor.js +29 -0
- package/dist/runner/hooks/predefined/document-context-extractor/relevance-scorer.js +29 -0
- package/dist/runner/hooks/predefined/mid-turn-compaction.d.ts +17 -0
- package/dist/runner/hooks/predefined/mid-turn-compaction.js +224 -0
- package/dist/runner/hooks/predefined/token-utils.d.ts +11 -0
- package/dist/runner/hooks/predefined/token-utils.js +13 -0
- package/dist/runner/hooks/predefined/tool-response-compactor.js +155 -25
- package/dist/runner/hooks/registry.js +2 -0
- package/dist/runner/hooks/types.d.ts +37 -4
- package/dist/runner/index.d.ts +6 -2
- package/dist/runner/langchain/index.js +60 -8
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Context validation utilities for ensuring LLM calls don't exceed context limits.
|
|
3
|
+
*
|
|
4
|
+
* Key principle: Before passing content to ANY LLM, validate it's within
|
|
5
|
+
* the model's context limit minus a safety buffer.
|
|
6
|
+
*/
|
|
7
|
+
import { createLogger } from "../../../logger.js";
|
|
8
|
+
import { countTokens } from "../../../utils/token-counter.js";
|
|
9
|
+
import { getModelContextWindow } from "../constants.js";
|
|
10
|
+
import { applyTokenPadding } from "./token-utils.js";
|
|
11
|
+
const logger = createLogger("context-validator");
|
|
12
|
+
/**
|
|
13
|
+
* Default buffer percentage (10%)
|
|
14
|
+
* Accounts for tokenizer estimation differences and API overhead
|
|
15
|
+
*/
|
|
16
|
+
export const DEFAULT_BUFFER_PERCENT = 0.1;
|
|
17
|
+
/**
|
|
18
|
+
* Validates whether adding new content would exceed the model's context limit.
|
|
19
|
+
*
|
|
20
|
+
* @param contentTokens - Tokens in the new content to be added
|
|
21
|
+
* @param currentContextTokens - Tokens already in the context
|
|
22
|
+
* @param modelContextWindow - Model's full context window size
|
|
23
|
+
* @param bufferPercent - Safety buffer as a percentage (default 10%)
|
|
24
|
+
* @returns Validation result indicating if content fits
|
|
25
|
+
*/
|
|
26
|
+
export function validateContextFits(contentTokens, currentContextTokens, modelContextWindow, bufferPercent = DEFAULT_BUFFER_PERCENT) {
|
|
27
|
+
const maxAllowedTokens = Math.floor(modelContextWindow * (1 - bufferPercent));
|
|
28
|
+
const totalTokens = currentContextTokens + contentTokens;
|
|
29
|
+
// Apply 10% padding to account for token estimation inaccuracies
|
|
30
|
+
const paddedTotalTokens = applyTokenPadding(totalTokens);
|
|
31
|
+
const isValid = paddedTotalTokens <= maxAllowedTokens;
|
|
32
|
+
const result = {
|
|
33
|
+
isValid,
|
|
34
|
+
totalTokens,
|
|
35
|
+
maxAllowedTokens,
|
|
36
|
+
modelContextWindow,
|
|
37
|
+
};
|
|
38
|
+
if (!isValid) {
|
|
39
|
+
result.excess = paddedTotalTokens - maxAllowedTokens;
|
|
40
|
+
}
|
|
41
|
+
return result;
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Validates whether a prompt string fits within a model's context limit.
|
|
45
|
+
* Convenience function that counts tokens from the prompt string.
|
|
46
|
+
*
|
|
47
|
+
* @param prompt - The prompt string to validate
|
|
48
|
+
* @param modelName - Name of the model to get context window for
|
|
49
|
+
* @param bufferPercent - Safety buffer as a percentage (default 10%)
|
|
50
|
+
* @returns Validation result indicating if prompt fits
|
|
51
|
+
*/
|
|
52
|
+
export function validatePromptFits(prompt, modelName, bufferPercent = DEFAULT_BUFFER_PERCENT) {
|
|
53
|
+
const promptTokens = countTokens(prompt);
|
|
54
|
+
const modelContextWindow = getModelContextWindow(modelName);
|
|
55
|
+
return validateContextFits(promptTokens, 0, // No existing context for a fresh prompt
|
|
56
|
+
modelContextWindow, bufferPercent);
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Checks if an error is a context overflow error from the Anthropic API.
|
|
60
|
+
*
|
|
61
|
+
* @param error - The error to check
|
|
62
|
+
* @returns true if the error indicates context overflow
|
|
63
|
+
*/
|
|
64
|
+
export function isContextOverflowError(error) {
|
|
65
|
+
if (error instanceof Error) {
|
|
66
|
+
const message = error.message.toLowerCase();
|
|
67
|
+
return (message.includes("prompt is too long") ||
|
|
68
|
+
message.includes("context_length_exceeded") ||
|
|
69
|
+
message.includes("maximum context length") ||
|
|
70
|
+
(message.includes("tokens") && message.includes("maximum")));
|
|
71
|
+
}
|
|
72
|
+
return false;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Logs validation result with appropriate severity
|
|
76
|
+
*/
|
|
77
|
+
export function logValidationResult(result, context) {
|
|
78
|
+
if (result.isValid) {
|
|
79
|
+
logger.debug(`Context validation passed for ${context}`, {
|
|
80
|
+
totalTokens: result.totalTokens,
|
|
81
|
+
maxAllowed: result.maxAllowedTokens,
|
|
82
|
+
headroom: result.maxAllowedTokens - result.totalTokens,
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
else {
|
|
86
|
+
logger.warn(`Context validation failed for ${context}`, {
|
|
87
|
+
totalTokens: result.totalTokens,
|
|
88
|
+
maxAllowed: result.maxAllowedTokens,
|
|
89
|
+
excess: result.excess,
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
}
|
|
@@ -18,11 +18,11 @@ export function calculateChunkSize(config) {
|
|
|
18
18
|
// Reserve space for:
|
|
19
19
|
// - System prompt (~500 tokens for extraction instructions)
|
|
20
20
|
// - LLM response buffer (~2000 tokens for scoring/extraction output)
|
|
21
|
-
// - Safety margin (10%)
|
|
21
|
+
// - Safety margin (25% - increased from 10% to prevent context overflow)
|
|
22
22
|
const responseBuffer = 2000;
|
|
23
23
|
const overhead = systemPromptTokens + responseBuffer;
|
|
24
24
|
const availableForChunk = modelContextSize - overhead;
|
|
25
|
-
const safeChunkSize = Math.floor(availableForChunk * 0.
|
|
25
|
+
const safeChunkSize = Math.floor(availableForChunk * 0.75);
|
|
26
26
|
// Minimum chunk size to ensure meaningful content (10K tokens)
|
|
27
27
|
const minChunkSize = 10000;
|
|
28
28
|
return Math.max(safeChunkSize, minChunkSize);
|
|
@@ -6,6 +6,8 @@
|
|
|
6
6
|
import Anthropic from "@anthropic-ai/sdk";
|
|
7
7
|
import { createLogger } from "../../../../logger.js";
|
|
8
8
|
import { telemetry } from "../../../../telemetry/index.js";
|
|
9
|
+
import { countTokens } from "../../../../utils/token-counter.js";
|
|
10
|
+
import { isContextOverflowError, validateContextFits, } from "../context-validator.js";
|
|
9
11
|
const logger = createLogger("content-extractor");
|
|
10
12
|
// Create Anthropic client directly (not using LangChain)
|
|
11
13
|
const anthropic = new Anthropic({
|
|
@@ -81,6 +83,22 @@ async function extractFromChunk(chunk, keyRequirements, totalChunks, config) {
|
|
|
81
83
|
});
|
|
82
84
|
try {
|
|
83
85
|
const prompt = buildExtractionPrompt(chunk.content, keyRequirements, chunk.index, totalChunks, chunk.relevanceScore ?? 5);
|
|
86
|
+
// Pre-flight validation: ensure prompt fits in model context
|
|
87
|
+
const systemPromptTokens = countTokens(EXTRACTION_SYSTEM_PROMPT);
|
|
88
|
+
const promptTokens = countTokens(prompt);
|
|
89
|
+
const validation = validateContextFits(promptTokens, systemPromptTokens, config.modelContextSize, 0.1);
|
|
90
|
+
if (!validation.isValid) {
|
|
91
|
+
logger.warn("Extraction prompt too large for model context, skipping chunk", {
|
|
92
|
+
chunkIndex: chunk.index,
|
|
93
|
+
promptTokens: validation.totalTokens,
|
|
94
|
+
maxAllowed: validation.maxAllowedTokens,
|
|
95
|
+
});
|
|
96
|
+
// Return empty extraction for chunks we can't process
|
|
97
|
+
return {
|
|
98
|
+
extracted: `[Chunk ${chunk.index + 1} skipped: content too large (${validation.totalTokens} tokens > ${validation.maxAllowedTokens} max)]`,
|
|
99
|
+
keyFacts: [],
|
|
100
|
+
};
|
|
101
|
+
}
|
|
84
102
|
const response = await telemetry.withActiveSpanAsync(span, () => anthropic.messages.create({
|
|
85
103
|
model: config.model,
|
|
86
104
|
max_tokens: 4096,
|
|
@@ -96,6 +114,17 @@ async function extractFromChunk(chunk, keyRequirements, totalChunks, config) {
|
|
|
96
114
|
}
|
|
97
115
|
catch (error) {
|
|
98
116
|
telemetry.endSpan(span, error);
|
|
117
|
+
// Handle context overflow errors gracefully
|
|
118
|
+
if (isContextOverflowError(error)) {
|
|
119
|
+
logger.warn("Context overflow during chunk extraction, returning empty", {
|
|
120
|
+
chunkIndex: chunk.index,
|
|
121
|
+
error: error instanceof Error ? error.message : String(error),
|
|
122
|
+
});
|
|
123
|
+
return {
|
|
124
|
+
extracted: `[Chunk ${chunk.index + 1} skipped: context overflow]`,
|
|
125
|
+
keyFacts: [],
|
|
126
|
+
};
|
|
127
|
+
}
|
|
99
128
|
logger.error("Failed to extract from chunk", {
|
|
100
129
|
chunkIndex: chunk.index,
|
|
101
130
|
error: error instanceof Error ? error.message : String(error),
|
|
@@ -7,6 +7,8 @@
|
|
|
7
7
|
import Anthropic from "@anthropic-ai/sdk";
|
|
8
8
|
import { createLogger } from "../../../../logger.js";
|
|
9
9
|
import { telemetry } from "../../../../telemetry/index.js";
|
|
10
|
+
import { countTokens } from "../../../../utils/token-counter.js";
|
|
11
|
+
import { isContextOverflowError, validateContextFits, } from "../context-validator.js";
|
|
10
12
|
const logger = createLogger("relevance-scorer");
|
|
11
13
|
// Create Anthropic client directly (not using LangChain)
|
|
12
14
|
// This ensures scoring LLM calls don't get captured by LangGraph's streaming
|
|
@@ -82,6 +84,22 @@ async function scoreChunk(chunk, keyRequirements, totalChunks, config) {
|
|
|
82
84
|
});
|
|
83
85
|
try {
|
|
84
86
|
const prompt = buildScoringPrompt(chunk.content, keyRequirements, chunk.index, totalChunks);
|
|
87
|
+
// Pre-flight validation: ensure prompt fits in model context
|
|
88
|
+
const systemPromptTokens = countTokens(SCORING_SYSTEM_PROMPT);
|
|
89
|
+
const promptTokens = countTokens(prompt);
|
|
90
|
+
const validation = validateContextFits(promptTokens, systemPromptTokens, config.modelContextSize, 0.1);
|
|
91
|
+
if (!validation.isValid) {
|
|
92
|
+
logger.warn("Scoring prompt too large for model context, skipping chunk", {
|
|
93
|
+
chunkIndex: chunk.index,
|
|
94
|
+
promptTokens: validation.totalTokens,
|
|
95
|
+
maxAllowed: validation.maxAllowedTokens,
|
|
96
|
+
});
|
|
97
|
+
// Return medium relevance for chunks we can't score
|
|
98
|
+
return {
|
|
99
|
+
score: 5,
|
|
100
|
+
reason: `Chunk too large to score (${validation.totalTokens} tokens > ${validation.maxAllowedTokens} max)`,
|
|
101
|
+
};
|
|
102
|
+
}
|
|
85
103
|
const response = await telemetry.withActiveSpanAsync(span, () => anthropic.messages.create({
|
|
86
104
|
model: config.model,
|
|
87
105
|
max_tokens: 256,
|
|
@@ -97,6 +115,17 @@ async function scoreChunk(chunk, keyRequirements, totalChunks, config) {
|
|
|
97
115
|
}
|
|
98
116
|
catch (error) {
|
|
99
117
|
telemetry.endSpan(span, error);
|
|
118
|
+
// Handle context overflow errors gracefully
|
|
119
|
+
if (isContextOverflowError(error)) {
|
|
120
|
+
logger.warn("Context overflow during chunk scoring, returning medium relevance", {
|
|
121
|
+
chunkIndex: chunk.index,
|
|
122
|
+
error: error instanceof Error ? error.message : String(error),
|
|
123
|
+
});
|
|
124
|
+
return {
|
|
125
|
+
score: 5,
|
|
126
|
+
reason: `Chunk too large to score (context overflow)`,
|
|
127
|
+
};
|
|
128
|
+
}
|
|
100
129
|
logger.error("Failed to score chunk", {
|
|
101
130
|
chunkIndex: chunk.index,
|
|
102
131
|
error: error instanceof Error ? error.message : String(error),
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { type HookCallback } from "../types";
|
|
2
|
+
/**
|
|
3
|
+
* Mid-turn compaction callback that triggers when context + tool response
|
|
4
|
+
* would exceed the maximum limit. Unlike the regular compaction_tool,
|
|
5
|
+
* this callback signals that the current turn should be aborted and
|
|
6
|
+
* restarted with the compacted context.
|
|
7
|
+
*
|
|
8
|
+
* This is necessary because the LangChain agent builds messages once at
|
|
9
|
+
* the start of a turn and doesn't re-read from session.context mid-turn.
|
|
10
|
+
*
|
|
11
|
+
* When triggered, this callback:
|
|
12
|
+
* 1. Creates a compacted summary of the conversation
|
|
13
|
+
* 2. Returns requiresRestart: true in metadata
|
|
14
|
+
* 3. The adapter detects this and aborts the current turn
|
|
15
|
+
* 4. The turn is restarted with the compacted context
|
|
16
|
+
*/
|
|
17
|
+
export declare const midTurnCompaction: HookCallback;
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
import { ChatAnthropic } from "@langchain/anthropic";
|
|
2
|
+
import { HumanMessage, SystemMessage } from "@langchain/core/messages";
|
|
3
|
+
import { createLogger } from "../../../logger.js";
|
|
4
|
+
import { createContextEntry, createFullMessageEntry, } from "../types";
|
|
5
|
+
import { applyTokenPadding } from "./token-utils.js";
|
|
6
|
+
const logger = createLogger("mid-turn-compaction");
|
|
7
|
+
/**
|
|
8
|
+
* Mid-turn compaction callback that triggers when context + tool response
|
|
9
|
+
* would exceed the maximum limit. Unlike the regular compaction_tool,
|
|
10
|
+
* this callback signals that the current turn should be aborted and
|
|
11
|
+
* restarted with the compacted context.
|
|
12
|
+
*
|
|
13
|
+
* This is necessary because the LangChain agent builds messages once at
|
|
14
|
+
* the start of a turn and doesn't re-read from session.context mid-turn.
|
|
15
|
+
*
|
|
16
|
+
* When triggered, this callback:
|
|
17
|
+
* 1. Creates a compacted summary of the conversation
|
|
18
|
+
* 2. Returns requiresRestart: true in metadata
|
|
19
|
+
* 3. The adapter detects this and aborts the current turn
|
|
20
|
+
* 4. The turn is restarted with the compacted context
|
|
21
|
+
*/
|
|
22
|
+
export const midTurnCompaction = async (ctx) => {
|
|
23
|
+
const settings = ctx.callbackSetting;
|
|
24
|
+
const threshold = settings?.threshold ?? 80;
|
|
25
|
+
// Calculate effective token usage including pending tool response
|
|
26
|
+
const toolResponseTokens = ctx.toolResponse?.outputTokens ?? 0;
|
|
27
|
+
const estimatedTokens = ctx.currentTokens + toolResponseTokens;
|
|
28
|
+
// Apply 10% padding to account for token estimation inaccuracies
|
|
29
|
+
const paddedTokens = applyTokenPadding(estimatedTokens);
|
|
30
|
+
const effectivePercentage = (paddedTokens / ctx.maxTokens) * 100;
|
|
31
|
+
// Check if we should trigger compaction
|
|
32
|
+
if (effectivePercentage < threshold) {
|
|
33
|
+
logger.debug("Context below threshold, no mid-turn compaction needed", {
|
|
34
|
+
currentTokens: ctx.currentTokens,
|
|
35
|
+
toolResponseTokens,
|
|
36
|
+
estimatedTokens,
|
|
37
|
+
paddedTokens,
|
|
38
|
+
effectivePercentage: effectivePercentage.toFixed(2),
|
|
39
|
+
threshold,
|
|
40
|
+
});
|
|
41
|
+
return {
|
|
42
|
+
newContextEntry: null,
|
|
43
|
+
metadata: {
|
|
44
|
+
action: "none",
|
|
45
|
+
reason: "below_threshold",
|
|
46
|
+
percentage: effectivePercentage,
|
|
47
|
+
threshold,
|
|
48
|
+
},
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
logger.warn("Mid-turn compaction triggered - context near limit", {
|
|
52
|
+
currentTokens: ctx.currentTokens,
|
|
53
|
+
toolResponseTokens,
|
|
54
|
+
estimatedTokens,
|
|
55
|
+
paddedTokens,
|
|
56
|
+
maxTokens: ctx.maxTokens,
|
|
57
|
+
effectivePercentage: `${effectivePercentage.toFixed(2)}%`,
|
|
58
|
+
threshold,
|
|
59
|
+
contextEntries: ctx.session.context.length,
|
|
60
|
+
totalMessages: ctx.session.messages.length,
|
|
61
|
+
model: ctx.model,
|
|
62
|
+
});
|
|
63
|
+
try {
|
|
64
|
+
// Create the LLM client using the same model as the agent
|
|
65
|
+
const model = new ChatAnthropic({
|
|
66
|
+
model: ctx.model,
|
|
67
|
+
temperature: 0,
|
|
68
|
+
});
|
|
69
|
+
// Build the conversation history to compact
|
|
70
|
+
const messagesToCompact = ctx.session.messages;
|
|
71
|
+
// Convert session messages to text for context, including tool calls and results
|
|
72
|
+
const conversationText = messagesToCompact
|
|
73
|
+
.map((msg) => {
|
|
74
|
+
const parts = [];
|
|
75
|
+
for (const block of msg.content) {
|
|
76
|
+
if (block.type === "text") {
|
|
77
|
+
parts.push(block.text);
|
|
78
|
+
}
|
|
79
|
+
else if (block.type === "tool_call") {
|
|
80
|
+
// Include tool call info
|
|
81
|
+
parts.push(`[Tool: ${block.title}]`);
|
|
82
|
+
if (block.rawInput) {
|
|
83
|
+
parts.push(`Input: ${JSON.stringify(block.rawInput, null, 2)}`);
|
|
84
|
+
}
|
|
85
|
+
if (block.rawOutput) {
|
|
86
|
+
// Summarize large outputs to avoid overwhelming the compaction LLM
|
|
87
|
+
const outputStr = JSON.stringify(block.rawOutput);
|
|
88
|
+
if (outputStr.length > 2000) {
|
|
89
|
+
parts.push(`Output: [Large output - ${outputStr.length} chars]`);
|
|
90
|
+
}
|
|
91
|
+
else {
|
|
92
|
+
parts.push(`Output: ${outputStr}`);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
if (block.error) {
|
|
96
|
+
parts.push(`Error: ${block.error}`);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
return `${msg.role.toUpperCase()}:\n${parts.join("\n")}`;
|
|
101
|
+
})
|
|
102
|
+
.join("\n\n---\n\n");
|
|
103
|
+
// Add the pending tool response that triggered this compaction
|
|
104
|
+
let fullConversationText = conversationText;
|
|
105
|
+
if (ctx.toolResponse) {
|
|
106
|
+
const toolOutputStr = JSON.stringify(ctx.toolResponse.rawOutput);
|
|
107
|
+
// Truncate very large outputs to avoid overwhelming the compaction LLM
|
|
108
|
+
const truncatedOutput = toolOutputStr.length > 5000
|
|
109
|
+
? toolOutputStr.substring(0, 5000) +
|
|
110
|
+
`... [truncated, ${toolOutputStr.length} total chars]`
|
|
111
|
+
: toolOutputStr;
|
|
112
|
+
fullConversationText += `\n\n---\n\n[PENDING TOOL RESPONSE - This must be included in the summary]\nTool: ${ctx.toolResponse.toolName}\nTool Call ID: ${ctx.toolResponse.toolCallId}\nInput: ${JSON.stringify(ctx.toolResponse.toolInput, null, 2)}\nOutput: ${truncatedOutput}`;
|
|
113
|
+
}
|
|
114
|
+
// Create system prompt for compaction
|
|
115
|
+
const systemPrompt = new SystemMessage("You are a helpful AI assistant tasked with summarizing conversations.");
|
|
116
|
+
// Create detailed compaction instructions
|
|
117
|
+
const userPrompt = `Your task is to create a detailed summary of the conversation so far, paying close attention to the user's explicit requests and your previous actions.
|
|
118
|
+
This summary should be thorough in capturing important details, decisions, and context that would be essential for continuing the conversation/task without losing important information.
|
|
119
|
+
|
|
120
|
+
IMPORTANT: This is a mid-turn compaction. The agent was in the middle of processing a tool response when context limits were reached. The summary must preserve:
|
|
121
|
+
1. What task the agent was working on
|
|
122
|
+
2. What tools were being used
|
|
123
|
+
3. The current state of the work in progress
|
|
124
|
+
4. Any pending operations that need to continue
|
|
125
|
+
5. The key findings from the pending tool response (marked at the end of the conversation)
|
|
126
|
+
|
|
127
|
+
Before providing your final summary, wrap your analysis in <analysis> tags to organize your thoughts and ensure you've covered all necessary points. In your analysis process:
|
|
128
|
+
|
|
129
|
+
1. Chronologically analyze each message and section of the conversation. For each section thoroughly identify:
|
|
130
|
+
- The user's explicit requests and intents
|
|
131
|
+
- Your approach to addressing the user's requests
|
|
132
|
+
- Key decisions and important concepts discussed
|
|
133
|
+
- Specific details that are important to remember
|
|
134
|
+
- Challenges encountered and how they were addressed
|
|
135
|
+
- Pay special attention to specific user feedback, especially if the user told you to do something differently
|
|
136
|
+
2. Double-check for accuracy and completeness, addressing each required element thoroughly
|
|
137
|
+
|
|
138
|
+
Your summary should include the following sections:
|
|
139
|
+
|
|
140
|
+
1. Primary Request and Intent: Capture all of the user's explicit requests and intents in detail
|
|
141
|
+
2. Key Topics and Concepts: List all important topics, concepts, and themes discussed in the conversation
|
|
142
|
+
3. Important Details and Information: Document specific details, information, or content that was shared, examined, or created. Pay special attention to the most recent messages and include important details where applicable
|
|
143
|
+
4. Challenges and Solutions: List any challenges, issues, or obstacles that came up and how they were addressed. Pay special attention to specific user feedback, especially if the user asked for a different approach
|
|
144
|
+
5. Problem Solving: Document problems solved and any ongoing work or troubleshooting efforts
|
|
145
|
+
6. All User Messages: List ALL user messages (excluding tool results). These are critical for understanding the user's feedback and changing intent
|
|
146
|
+
7. Pending Tasks: Outline any pending tasks that you have explicitly been asked to work on
|
|
147
|
+
8. Current Work: Describe in detail precisely what was being worked on immediately before this summary, paying special attention to the most recent messages from both user and assistant. CRITICAL: Include the tool that was just called, its purpose, AND summarize the key findings from the pending tool response at the end of the conversation.
|
|
148
|
+
9. Optional Next Step: List the next step related to the most recent work. IMPORTANT: ensure this step is DIRECTLY in line with the user's most recent explicit requests and the task you were working on. Include direct quotes from the most recent conversation showing exactly what task you were working on and where you left off.
|
|
149
|
+
10. Pending Tool Response: Summarize the key information from the tool response that was being processed when this compaction triggered (marked as [PENDING TOOL RESPONSE] at the end). This is critical for continuity.
|
|
150
|
+
|
|
151
|
+
Here's the conversation to summarize:
|
|
152
|
+
|
|
153
|
+
${fullConversationText}
|
|
154
|
+
|
|
155
|
+
Please provide your summary based on the conversation above, following this structure and ensuring precision and thoroughness in your response.`;
|
|
156
|
+
const userMessage = new HumanMessage(userPrompt);
|
|
157
|
+
// Invoke the LLM
|
|
158
|
+
logger.info("Invoking LLM for mid-turn compaction summary");
|
|
159
|
+
const response = await model.invoke([systemPrompt, userMessage]);
|
|
160
|
+
// Extract the summary text from the response
|
|
161
|
+
const summaryText = typeof response.content === "string"
|
|
162
|
+
? response.content
|
|
163
|
+
: Array.isArray(response.content)
|
|
164
|
+
? response.content
|
|
165
|
+
.filter((block) => typeof block === "object" &&
|
|
166
|
+
block !== null &&
|
|
167
|
+
"text" in block)
|
|
168
|
+
.map((block) => block.text)
|
|
169
|
+
.join("\n")
|
|
170
|
+
: "Failed to extract summary";
|
|
171
|
+
// Extract token usage from LLM response
|
|
172
|
+
const responseUsage = response
|
|
173
|
+
.usage_metadata;
|
|
174
|
+
const summaryTokens = responseUsage?.output_tokens ?? 0;
|
|
175
|
+
const inputTokensUsed = responseUsage?.input_tokens ?? ctx.currentTokens;
|
|
176
|
+
logger.info("Generated mid-turn compaction summary", {
|
|
177
|
+
originalMessages: messagesToCompact.length,
|
|
178
|
+
summaryLength: summaryText.length,
|
|
179
|
+
inputTokens: inputTokensUsed,
|
|
180
|
+
summaryTokens,
|
|
181
|
+
tokensSaved: inputTokensUsed - summaryTokens,
|
|
182
|
+
});
|
|
183
|
+
// Create a new context entry with the summary
|
|
184
|
+
// Mark it as a mid-turn compaction so the agent knows to continue
|
|
185
|
+
const summaryEntry = createFullMessageEntry("user", `This session is being continued from a previous conversation that ran out of context during a tool call. The conversation AND the pending tool response are summarized below. IMPORTANT: Continue from where you left off.\n\n${summaryText}`);
|
|
186
|
+
// Set compactedUpTo to indicate all messages have been compacted into the summary
|
|
187
|
+
const lastMessageIndex = messagesToCompact.length - 1;
|
|
188
|
+
const newContextEntry = createContextEntry([summaryEntry], undefined, lastMessageIndex, {
|
|
189
|
+
// Store summary tokens in userMessagesTokens since the summary is a user message
|
|
190
|
+
systemPromptTokens: 0,
|
|
191
|
+
userMessagesTokens: summaryTokens,
|
|
192
|
+
assistantMessagesTokens: 0,
|
|
193
|
+
toolInputTokens: 0,
|
|
194
|
+
toolResultsTokens: 0,
|
|
195
|
+
totalEstimated: summaryTokens,
|
|
196
|
+
});
|
|
197
|
+
return {
|
|
198
|
+
newContextEntry,
|
|
199
|
+
metadata: {
|
|
200
|
+
action: "compacted",
|
|
201
|
+
requiresRestart: true, // Signal to adapter to restart the turn
|
|
202
|
+
messagesRemoved: messagesToCompact.length - 1,
|
|
203
|
+
tokensBeforeCompaction: inputTokensUsed,
|
|
204
|
+
tokensSaved: inputTokensUsed - summaryTokens,
|
|
205
|
+
summaryTokens,
|
|
206
|
+
summaryGenerated: true,
|
|
207
|
+
midTurn: true,
|
|
208
|
+
},
|
|
209
|
+
};
|
|
210
|
+
}
|
|
211
|
+
catch (error) {
|
|
212
|
+
logger.error("Mid-turn compaction failed", {
|
|
213
|
+
error: error instanceof Error ? error.message : String(error),
|
|
214
|
+
stack: error instanceof Error ? error.stack : undefined,
|
|
215
|
+
});
|
|
216
|
+
return {
|
|
217
|
+
newContextEntry: null,
|
|
218
|
+
metadata: {
|
|
219
|
+
action: "failed",
|
|
220
|
+
error: error instanceof Error ? error.message : String(error),
|
|
221
|
+
},
|
|
222
|
+
};
|
|
223
|
+
}
|
|
224
|
+
};
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Utility functions for token calculations in hooks
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Apply 10% safety padding to estimated token counts.
|
|
6
|
+
* This accounts for potential inaccuracies in token estimation.
|
|
7
|
+
*
|
|
8
|
+
* @param estimatedTokens - The estimated token count
|
|
9
|
+
* @returns The padded token count (10% higher, rounded up)
|
|
10
|
+
*/
|
|
11
|
+
export declare function applyTokenPadding(estimatedTokens: number): number;
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Utility functions for token calculations in hooks
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Apply 10% safety padding to estimated token counts.
|
|
6
|
+
* This accounts for potential inaccuracies in token estimation.
|
|
7
|
+
*
|
|
8
|
+
* @param estimatedTokens - The estimated token count
|
|
9
|
+
* @returns The padded token count (10% higher, rounded up)
|
|
10
|
+
*/
|
|
11
|
+
export function applyTokenPadding(estimatedTokens) {
|
|
12
|
+
return Math.ceil(estimatedTokens * 1.1);
|
|
13
|
+
}
|