@townco/agent 0.1.122 → 0.1.123
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/acp-server/adapter.d.ts +1 -0
- package/dist/acp-server/adapter.js +133 -11
- package/dist/runner/agent-runner.d.ts +7 -0
- package/dist/runner/hooks/executor.js +1 -1
- package/dist/runner/hooks/predefined/context-validator.d.ts +1 -1
- package/dist/runner/hooks/predefined/context-validator.js +2 -2
- package/dist/runner/hooks/predefined/document-context-extractor/chunk-manager.d.ts +1 -1
- package/dist/runner/hooks/predefined/document-context-extractor/chunk-manager.js +3 -3
- package/dist/runner/hooks/predefined/document-context-extractor/content-extractor.js +2 -2
- package/dist/runner/hooks/predefined/document-context-extractor/index.js +5 -5
- package/dist/runner/hooks/predefined/document-context-extractor/relevance-scorer.js +2 -2
- package/dist/runner/hooks/predefined/tool-response-compactor.js +9 -9
- package/dist/runner/langchain/index.js +301 -9
- package/dist/runner/langchain/otel-callbacks.d.ts +5 -0
- package/dist/runner/langchain/otel-callbacks.js +8 -0
- package/dist/runner/langchain/tools/artifacts.d.ts +68 -0
- package/dist/runner/langchain/tools/artifacts.js +474 -0
- package/dist/runner/langchain/tools/conversation_search.d.ts +22 -0
- package/dist/runner/langchain/tools/conversation_search.js +137 -0
- package/dist/runner/langchain/tools/document_extract.js +1 -1
- package/dist/runner/langchain/tools/generate_image.d.ts +47 -0
- package/dist/runner/langchain/tools/generate_image.js +175 -0
- package/dist/runner/langchain/tools/port-utils.d.ts +8 -0
- package/dist/runner/langchain/tools/port-utils.js +35 -0
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/dist/utils/context-size-calculator.d.ts +1 -1
- package/dist/utils/context-size-calculator.js +9 -14
- package/dist/utils/token-counter.d.ts +9 -7
- package/dist/utils/token-counter.js +30 -11
- package/dist/utils/tool-overhead-calculator.d.ts +2 -2
- package/dist/utils/tool-overhead-calculator.js +5 -4
- package/package.json +8 -7
|
@@ -59,6 +59,7 @@ export declare class AgentAcpAdapter implements acp.Agent {
|
|
|
59
59
|
private agentUiConfig;
|
|
60
60
|
private currentToolOverheadTokens;
|
|
61
61
|
private currentMcpOverheadTokens;
|
|
62
|
+
private currentSystemPromptTokens;
|
|
62
63
|
constructor(agent: AgentRunner, connection: acp.AgentSideConnection, agentDir?: string, agentName?: string);
|
|
63
64
|
/**
|
|
64
65
|
* Extract tool metadata from the agent definition for exposing to clients.
|
|
@@ -5,7 +5,7 @@ import { getModelContextWindow, HookExecutor, loadHookCallback, } from "../runne
|
|
|
5
5
|
import { getToolGroupChildren } from "../runner/langchain/index.js";
|
|
6
6
|
import { telemetry } from "../telemetry/index.js";
|
|
7
7
|
import { calculateContextSize, } from "../utils/context-size-calculator.js";
|
|
8
|
-
import { countToolResultTokens } from "../utils/token-counter.js";
|
|
8
|
+
import { countTokens, countToolResultTokens } from "../utils/token-counter.js";
|
|
9
9
|
import { SessionStorage, } from "./session-storage.js";
|
|
10
10
|
const logger = createLogger("adapter");
|
|
11
11
|
/**
|
|
@@ -137,6 +137,7 @@ export class AgentAcpAdapter {
|
|
|
137
137
|
agentUiConfig;
|
|
138
138
|
currentToolOverheadTokens = 0; // Track tool overhead for current turn
|
|
139
139
|
currentMcpOverheadTokens = 0; // Track MCP overhead for current turn
|
|
140
|
+
currentSystemPromptTokens = 0; // Track actual system prompt tokens after all injections
|
|
140
141
|
constructor(agent, connection, agentDir, agentName) {
|
|
141
142
|
this.connection = connection;
|
|
142
143
|
this.sessions = new Map();
|
|
@@ -1080,6 +1081,7 @@ export class AgentAcpAdapter {
|
|
|
1080
1081
|
// Reset tool overhead for new turn (will be set by harness)
|
|
1081
1082
|
this.currentToolOverheadTokens = 0;
|
|
1082
1083
|
this.currentMcpOverheadTokens = 0;
|
|
1084
|
+
this.currentSystemPromptTokens = 0;
|
|
1083
1085
|
// Generate a unique messageId for this assistant response
|
|
1084
1086
|
const messageId = Math.random().toString(36).substring(2);
|
|
1085
1087
|
// Convert prompt content blocks to session storage format
|
|
@@ -1163,10 +1165,20 @@ export class AgentAcpAdapter {
|
|
|
1163
1165
|
contextMessages.push(entry.message);
|
|
1164
1166
|
}
|
|
1165
1167
|
}
|
|
1166
|
-
// Calculate context size -
|
|
1167
|
-
const context_size = calculateContextSize(contextMessages,
|
|
1168
|
+
// Calculate context size - use actual system prompt tokens from harness if available
|
|
1169
|
+
const context_size = await calculateContextSize(contextMessages, undefined, // Don't use base prompt - we'll add actual tokens below
|
|
1170
|
+
this.currentToolOverheadTokens, // Include tool overhead
|
|
1168
1171
|
this.currentMcpOverheadTokens, // Include MCP overhead
|
|
1169
1172
|
getModelContextWindow(this.agent.definition.model));
|
|
1173
|
+
// Add actual system prompt tokens (includes all injections)
|
|
1174
|
+
// If harness hasn't sent it yet, use base prompt as fallback
|
|
1175
|
+
const systemPromptTokens = this.currentSystemPromptTokens > 0
|
|
1176
|
+
? this.currentSystemPromptTokens
|
|
1177
|
+
: this.agent.definition.systemPrompt
|
|
1178
|
+
? await countTokens(this.agent.definition.systemPrompt)
|
|
1179
|
+
: 0;
|
|
1180
|
+
context_size.systemPromptTokens = systemPromptTokens;
|
|
1181
|
+
context_size.totalEstimated += systemPromptTokens;
|
|
1170
1182
|
const contextSnapshot = createContextSnapshot(session.messages.length - 1, // Exclude the newly added user message (it will be passed separately via prompt)
|
|
1171
1183
|
new Date().toISOString(), previousContext, context_size);
|
|
1172
1184
|
session.context.push(contextSnapshot);
|
|
@@ -1211,6 +1223,7 @@ export class AgentAcpAdapter {
|
|
|
1211
1223
|
flushLogBuffer(true);
|
|
1212
1224
|
};
|
|
1213
1225
|
// Helper to save cancelled message to session
|
|
1226
|
+
const sessionForTurn = session;
|
|
1214
1227
|
const saveCancelledMessage = async () => {
|
|
1215
1228
|
if (this.noSession)
|
|
1216
1229
|
return;
|
|
@@ -1242,14 +1255,15 @@ export class AgentAcpAdapter {
|
|
|
1242
1255
|
timestamp: new Date().toISOString(),
|
|
1243
1256
|
};
|
|
1244
1257
|
// Check if we already have a partial assistant message
|
|
1245
|
-
const lastMessage =
|
|
1258
|
+
const lastMessage = sessionForTurn.messages[sessionForTurn.messages.length - 1];
|
|
1246
1259
|
if (lastMessage && lastMessage.role === "assistant") {
|
|
1247
|
-
|
|
1260
|
+
sessionForTurn.messages[sessionForTurn.messages.length - 1] =
|
|
1261
|
+
cancelledMessage;
|
|
1248
1262
|
}
|
|
1249
1263
|
else {
|
|
1250
|
-
|
|
1264
|
+
sessionForTurn.messages.push(cancelledMessage);
|
|
1251
1265
|
}
|
|
1252
|
-
await this.saveSessionToDisk(params.sessionId,
|
|
1266
|
+
await this.saveSessionToDisk(params.sessionId, sessionForTurn);
|
|
1253
1267
|
logger.info("Saved cancelled message to session", {
|
|
1254
1268
|
sessionId: params.sessionId,
|
|
1255
1269
|
contentBlocks: contentBlocks.length,
|
|
@@ -1338,6 +1352,8 @@ export class AgentAcpAdapter {
|
|
|
1338
1352
|
const generator = this.agent.invoke(invokeParams);
|
|
1339
1353
|
// Track the invocation span for parenting hook spans
|
|
1340
1354
|
let invocationSpan = null;
|
|
1355
|
+
// Track whether we've updated the initial context snapshot with overhead info
|
|
1356
|
+
let initialSnapshotUpdated = false;
|
|
1341
1357
|
// Manually iterate to capture the return value
|
|
1342
1358
|
let iterResult = await generator.next();
|
|
1343
1359
|
while (!iterResult.done) {
|
|
@@ -1373,6 +1389,90 @@ export class AgentAcpAdapter {
|
|
|
1373
1389
|
iterResult = await generator.next();
|
|
1374
1390
|
continue;
|
|
1375
1391
|
}
|
|
1392
|
+
// Capture system prompt overhead info if provided by harness
|
|
1393
|
+
if ("sessionUpdate" in msg &&
|
|
1394
|
+
msg.sessionUpdate === "system_prompt_overhead") {
|
|
1395
|
+
const overheadInfo = msg;
|
|
1396
|
+
this.currentSystemPromptTokens = overheadInfo.systemPromptTokens;
|
|
1397
|
+
logger.debug("Received system prompt overhead from harness", {
|
|
1398
|
+
systemPromptTokens: this.currentSystemPromptTokens,
|
|
1399
|
+
});
|
|
1400
|
+
// Update the initial context snapshot with actual overhead values
|
|
1401
|
+
// This happens after both tool_overhead_info and system_prompt_overhead are received
|
|
1402
|
+
if (!initialSnapshotUpdated && session.context.length > 0) {
|
|
1403
|
+
const initialSnapshot = session.context[session.context.length - 1];
|
|
1404
|
+
if (initialSnapshot?.context_size) {
|
|
1405
|
+
// Calculate overhead delta
|
|
1406
|
+
const oldSystemPromptTokens = initialSnapshot.context_size.systemPromptTokens;
|
|
1407
|
+
const oldToolOverhead = initialSnapshot.context_size.toolOverheadTokens ?? 0;
|
|
1408
|
+
const oldMcpOverhead = initialSnapshot.context_size.mcpOverheadTokens ?? 0;
|
|
1409
|
+
// Update all overhead fields
|
|
1410
|
+
initialSnapshot.context_size.systemPromptTokens =
|
|
1411
|
+
this.currentSystemPromptTokens;
|
|
1412
|
+
initialSnapshot.context_size.toolOverheadTokens =
|
|
1413
|
+
this.currentToolOverheadTokens;
|
|
1414
|
+
initialSnapshot.context_size.mcpOverheadTokens =
|
|
1415
|
+
this.currentMcpOverheadTokens;
|
|
1416
|
+
// Recalculate total
|
|
1417
|
+
const oldTotal = initialSnapshot.context_size.totalEstimated;
|
|
1418
|
+
const overheadDelta = this.currentSystemPromptTokens -
|
|
1419
|
+
oldSystemPromptTokens +
|
|
1420
|
+
(this.currentToolOverheadTokens - oldToolOverhead) +
|
|
1421
|
+
(this.currentMcpOverheadTokens - oldMcpOverhead);
|
|
1422
|
+
initialSnapshot.context_size.totalEstimated =
|
|
1423
|
+
oldTotal + overheadDelta;
|
|
1424
|
+
logger.debug("Updated initial context snapshot with overhead", {
|
|
1425
|
+
systemPromptTokens: this.currentSystemPromptTokens,
|
|
1426
|
+
toolOverheadTokens: this.currentToolOverheadTokens,
|
|
1427
|
+
mcpOverheadTokens: this.currentMcpOverheadTokens,
|
|
1428
|
+
oldTotal,
|
|
1429
|
+
newTotal: initialSnapshot.context_size.totalEstimated,
|
|
1430
|
+
overheadDelta,
|
|
1431
|
+
});
|
|
1432
|
+
// Save updated snapshot
|
|
1433
|
+
await this.saveSessionToDisk(params.sessionId, session);
|
|
1434
|
+
initialSnapshotUpdated = true;
|
|
1435
|
+
}
|
|
1436
|
+
}
|
|
1437
|
+
// Don't send this update to client, it's internal metadata
|
|
1438
|
+
iterResult = await generator.next();
|
|
1439
|
+
continue;
|
|
1440
|
+
}
|
|
1441
|
+
// Capture actual token usage from API and compare with estimates
|
|
1442
|
+
if ("sessionUpdate" in msg &&
|
|
1443
|
+
msg.sessionUpdate === "actual_token_usage") {
|
|
1444
|
+
const actualUsage = msg;
|
|
1445
|
+
const totalActual = actualUsage.inputTokens + actualUsage.outputTokens;
|
|
1446
|
+
// Get the most recent context entry's estimated total
|
|
1447
|
+
const lastContext = session.context.length > 0
|
|
1448
|
+
? session.context[session.context.length - 1]
|
|
1449
|
+
: null;
|
|
1450
|
+
const estimatedTotal = lastContext?.context_size?.totalEstimated ?? 0;
|
|
1451
|
+
// Calculate discrepancy
|
|
1452
|
+
const discrepancy = totalActual - estimatedTotal;
|
|
1453
|
+
const discrepancyPercent = estimatedTotal > 0
|
|
1454
|
+
? ((discrepancy / totalActual) * 100).toFixed(1)
|
|
1455
|
+
: "N/A";
|
|
1456
|
+
logger.warn("Token usage comparison (Actual vs Estimated)", {
|
|
1457
|
+
sessionId: params.sessionId,
|
|
1458
|
+
actual: {
|
|
1459
|
+
inputTokens: actualUsage.inputTokens,
|
|
1460
|
+
outputTokens: actualUsage.outputTokens,
|
|
1461
|
+
total: totalActual,
|
|
1462
|
+
},
|
|
1463
|
+
estimated: {
|
|
1464
|
+
total: estimatedTotal,
|
|
1465
|
+
breakdown: lastContext?.context_size,
|
|
1466
|
+
},
|
|
1467
|
+
discrepancy: {
|
|
1468
|
+
tokens: discrepancy,
|
|
1469
|
+
percent: discrepancyPercent,
|
|
1470
|
+
},
|
|
1471
|
+
});
|
|
1472
|
+
// Don't send this update to client, it's internal metadata
|
|
1473
|
+
iterResult = await generator.next();
|
|
1474
|
+
continue;
|
|
1475
|
+
}
|
|
1376
1476
|
// Extract and accumulate token usage from message chunks
|
|
1377
1477
|
if ("sessionUpdate" in msg &&
|
|
1378
1478
|
msg.sessionUpdate === "agent_message_chunk" &&
|
|
@@ -1592,6 +1692,18 @@ export class AgentAcpAdapter {
|
|
|
1592
1692
|
toolCallBlock._meta.originalTokens =
|
|
1593
1693
|
compactionMeta.originalTokens;
|
|
1594
1694
|
toolCallBlock._meta.finalTokens = compactionMeta.finalTokens;
|
|
1695
|
+
// If the runner already saved the original content to artifacts, persist that path.
|
|
1696
|
+
if (typeof compactionMeta.originalContentPath === "string" &&
|
|
1697
|
+
compactionMeta.originalContentPath.length > 0) {
|
|
1698
|
+
toolCallBlock._meta.originalContentPath =
|
|
1699
|
+
compactionMeta.originalContentPath;
|
|
1700
|
+
}
|
|
1701
|
+
// Persist a short preview if provided (useful when original file isn't available)
|
|
1702
|
+
if (typeof compactionMeta.originalContentPreview === "string" &&
|
|
1703
|
+
compactionMeta.originalContentPreview.length > 0) {
|
|
1704
|
+
toolCallBlock._meta.originalContentPreview =
|
|
1705
|
+
compactionMeta.originalContentPreview;
|
|
1706
|
+
}
|
|
1595
1707
|
}
|
|
1596
1708
|
if (compactionMeta.originalContent &&
|
|
1597
1709
|
actuallyCompacted &&
|
|
@@ -1697,7 +1809,7 @@ export class AgentAcpAdapter {
|
|
|
1697
1809
|
},
|
|
1698
1810
|
});
|
|
1699
1811
|
}
|
|
1700
|
-
const outputTokens = countToolResultTokens(rawOutput);
|
|
1812
|
+
const outputTokens = await countToolResultTokens(rawOutput);
|
|
1701
1813
|
// Create notification callback to stream hook events in real-time
|
|
1702
1814
|
const sendHookNotification = (notification) => {
|
|
1703
1815
|
this.connection.sessionUpdate({
|
|
@@ -1942,7 +2054,7 @@ export class AgentAcpAdapter {
|
|
|
1942
2054
|
}
|
|
1943
2055
|
}
|
|
1944
2056
|
// Calculate context size - tool result is now in the message, but hasn't been sent to LLM yet
|
|
1945
|
-
const context_size = calculateContextSize(contextMessages, this.agent.definition.systemPrompt ?? undefined, this.currentToolOverheadTokens, // Include tool overhead
|
|
2057
|
+
const context_size = await calculateContextSize(contextMessages, this.agent.definition.systemPrompt ?? undefined, this.currentToolOverheadTokens, // Include tool overhead
|
|
1946
2058
|
this.currentMcpOverheadTokens, // Include MCP overhead
|
|
1947
2059
|
getModelContextWindow(this.agent.definition.model));
|
|
1948
2060
|
// Create snapshot with a pointer to the partial message (not a full copy!)
|
|
@@ -2161,10 +2273,20 @@ export class AgentAcpAdapter {
|
|
|
2161
2273
|
contextMessages.push(entry.message);
|
|
2162
2274
|
}
|
|
2163
2275
|
}
|
|
2164
|
-
// Calculate context size -
|
|
2165
|
-
const context_size = calculateContextSize(contextMessages,
|
|
2276
|
+
// Calculate context size - use actual system prompt tokens from harness if available
|
|
2277
|
+
const context_size = await calculateContextSize(contextMessages, undefined, // Don't use base prompt - we'll add actual tokens below
|
|
2278
|
+
this.currentToolOverheadTokens, // Include tool overhead
|
|
2166
2279
|
this.currentMcpOverheadTokens, // Include MCP overhead
|
|
2167
2280
|
getModelContextWindow(this.agent.definition.model));
|
|
2281
|
+
// Add actual system prompt tokens (includes all injections)
|
|
2282
|
+
// If harness hasn't sent it yet, use base prompt as fallback
|
|
2283
|
+
const systemPromptTokens = this.currentSystemPromptTokens > 0
|
|
2284
|
+
? this.currentSystemPromptTokens
|
|
2285
|
+
: this.agent.definition.systemPrompt
|
|
2286
|
+
? await countTokens(this.agent.definition.systemPrompt)
|
|
2287
|
+
: 0;
|
|
2288
|
+
context_size.systemPromptTokens = systemPromptTokens;
|
|
2289
|
+
context_size.totalEstimated += systemPromptTokens;
|
|
2168
2290
|
const contextSnapshot = createContextSnapshot(session.messages.length, new Date().toISOString(), previousContext, context_size);
|
|
2169
2291
|
session.context.push(contextSnapshot);
|
|
2170
2292
|
await this.saveSessionToDisk(params.sessionId, session);
|
|
@@ -162,6 +162,13 @@ export type ExtendedSessionUpdate = (SessionNotification["update"] & {
|
|
|
162
162
|
sessionUpdate: "tool_overhead_info";
|
|
163
163
|
toolOverheadTokens: number;
|
|
164
164
|
mcpOverheadTokens: number;
|
|
165
|
+
} | {
|
|
166
|
+
sessionUpdate: "system_prompt_overhead";
|
|
167
|
+
systemPromptTokens: number;
|
|
168
|
+
} | {
|
|
169
|
+
sessionUpdate: "actual_token_usage";
|
|
170
|
+
inputTokens: number;
|
|
171
|
+
outputTokens: number;
|
|
165
172
|
} | {
|
|
166
173
|
sessionUpdate: "__invocation_span";
|
|
167
174
|
invocationSpan: Span;
|
|
@@ -231,7 +231,7 @@ export class HookExecutor {
|
|
|
231
231
|
if (result.metadata?.modifiedOutput) {
|
|
232
232
|
const newOutput = result.metadata.modifiedOutput;
|
|
233
233
|
const newOutputTokens = result.metadata.finalTokens ??
|
|
234
|
-
countToolResultTokens(newOutput);
|
|
234
|
+
(await countToolResultTokens(newOutput));
|
|
235
235
|
currentOutput = newOutput;
|
|
236
236
|
currentToolResponse = {
|
|
237
237
|
...currentToolResponse,
|
|
@@ -43,7 +43,7 @@ export declare function validateContextFits(contentTokens: number, currentContex
|
|
|
43
43
|
* @param bufferPercent - Safety buffer as a percentage (default 10%)
|
|
44
44
|
* @returns Validation result indicating if prompt fits
|
|
45
45
|
*/
|
|
46
|
-
export declare function validatePromptFits(prompt: string, modelName: string, bufferPercent?: number): ValidationResult
|
|
46
|
+
export declare function validatePromptFits(prompt: string, modelName: string, bufferPercent?: number): Promise<ValidationResult>;
|
|
47
47
|
/**
|
|
48
48
|
* Checks if an error is a context overflow error from the Anthropic API.
|
|
49
49
|
*
|
|
@@ -49,8 +49,8 @@ export function validateContextFits(contentTokens, currentContextTokens, modelCo
|
|
|
49
49
|
* @param bufferPercent - Safety buffer as a percentage (default 10%)
|
|
50
50
|
* @returns Validation result indicating if prompt fits
|
|
51
51
|
*/
|
|
52
|
-
export function validatePromptFits(prompt, modelName, bufferPercent = DEFAULT_BUFFER_PERCENT) {
|
|
53
|
-
const promptTokens = countTokens(prompt);
|
|
52
|
+
export async function validatePromptFits(prompt, modelName, bufferPercent = DEFAULT_BUFFER_PERCENT) {
|
|
53
|
+
const promptTokens = await countTokens(prompt);
|
|
54
54
|
const modelContextWindow = getModelContextWindow(modelName);
|
|
55
55
|
return validateContextFits(promptTokens, 0, // No existing context for a fresh prompt
|
|
56
56
|
modelContextWindow, bufferPercent);
|
|
@@ -24,7 +24,7 @@ export declare function calculateMaxIterations(documentTokens: number, chunkSize
|
|
|
24
24
|
* The overlap helps maintain context continuity at chunk boundaries,
|
|
25
25
|
* ensuring the LLM doesn't miss information that spans boundaries.
|
|
26
26
|
*/
|
|
27
|
-
export declare function createChunks(content: string, chunkSizeTokens: number, overlapTokens?: number): ChunkInfo[]
|
|
27
|
+
export declare function createChunks(content: string, chunkSizeTokens: number, overlapTokens?: number): Promise<ChunkInfo[]>;
|
|
28
28
|
/**
|
|
29
29
|
* Get summary statistics about chunks
|
|
30
30
|
*/
|
|
@@ -42,9 +42,9 @@ export function calculateMaxIterations(documentTokens, chunkSizeTokens) {
|
|
|
42
42
|
* The overlap helps maintain context continuity at chunk boundaries,
|
|
43
43
|
* ensuring the LLM doesn't miss information that spans boundaries.
|
|
44
44
|
*/
|
|
45
|
-
export function createChunks(content, chunkSizeTokens, overlapTokens = 200) {
|
|
45
|
+
export async function createChunks(content, chunkSizeTokens, overlapTokens = 200) {
|
|
46
46
|
const chunks = [];
|
|
47
|
-
const totalTokens = countTokens(content);
|
|
47
|
+
const totalTokens = await countTokens(content);
|
|
48
48
|
// If content fits in a single chunk, return it as-is
|
|
49
49
|
if (totalTokens <= chunkSizeTokens) {
|
|
50
50
|
return [
|
|
@@ -87,7 +87,7 @@ export function createChunks(content, chunkSizeTokens, overlapTokens = 200) {
|
|
|
87
87
|
}
|
|
88
88
|
// Extract chunk content
|
|
89
89
|
const chunkContent = content.slice(currentOffset, endOffset);
|
|
90
|
-
const chunkTokens = countTokens(chunkContent);
|
|
90
|
+
const chunkTokens = await countTokens(chunkContent);
|
|
91
91
|
chunks.push({
|
|
92
92
|
index: chunkIndex,
|
|
93
93
|
startOffset: currentOffset,
|
|
@@ -84,8 +84,8 @@ async function extractFromChunk(chunk, keyRequirements, totalChunks, config) {
|
|
|
84
84
|
try {
|
|
85
85
|
const prompt = buildExtractionPrompt(chunk.content, keyRequirements, chunk.index, totalChunks, chunk.relevanceScore ?? 5);
|
|
86
86
|
// Pre-flight validation: ensure prompt fits in model context
|
|
87
|
-
const systemPromptTokens = countTokens(EXTRACTION_SYSTEM_PROMPT);
|
|
88
|
-
const promptTokens = countTokens(prompt);
|
|
87
|
+
const systemPromptTokens = await countTokens(EXTRACTION_SYSTEM_PROMPT);
|
|
88
|
+
const promptTokens = await countTokens(prompt);
|
|
89
89
|
const validation = validateContextFits(promptTokens, systemPromptTokens, config.modelContextSize, 0.1);
|
|
90
90
|
if (!validation.isValid) {
|
|
91
91
|
logger.warn("Extraction prompt too large for model context, skipping chunk", {
|
|
@@ -75,7 +75,7 @@ Provide a concise list (3-5 bullet points) of the most important elements to ext
|
|
|
75
75
|
* Perform final compaction of merged extractions if still too large
|
|
76
76
|
*/
|
|
77
77
|
async function compactFinalResult(mergedContent, keyFacts, keyRequirements, targetTokens) {
|
|
78
|
-
const currentTokens = countTokens(mergedContent);
|
|
78
|
+
const currentTokens = await countTokens(mergedContent);
|
|
79
79
|
// If already under target, return as structured result
|
|
80
80
|
if (currentTokens <= targetTokens) {
|
|
81
81
|
return {
|
|
@@ -154,7 +154,7 @@ export async function extractDocumentContext(rawOutput, toolName, toolCallId, to
|
|
|
154
154
|
const startTime = Date.now();
|
|
155
155
|
// Convert output to string for processing
|
|
156
156
|
const outputString = JSON.stringify(rawOutput, null, 2);
|
|
157
|
-
const originalTokens = countToolResultTokens(rawOutput);
|
|
157
|
+
const originalTokens = await countToolResultTokens(rawOutput);
|
|
158
158
|
logger.info("Starting document context extraction", {
|
|
159
159
|
toolName,
|
|
160
160
|
toolCallId,
|
|
@@ -174,7 +174,7 @@ export async function extractDocumentContext(rawOutput, toolName, toolCallId, to
|
|
|
174
174
|
};
|
|
175
175
|
// Calculate chunk size and create chunks
|
|
176
176
|
const chunkSizeTokens = calculateChunkSize(config);
|
|
177
|
-
const chunks = createChunks(outputString, chunkSizeTokens, config.chunkOverlapTokens);
|
|
177
|
+
const chunks = await createChunks(outputString, chunkSizeTokens, config.chunkOverlapTokens);
|
|
178
178
|
const chunkStats = getChunkStats(chunks);
|
|
179
179
|
// Update max iterations based on actual chunk count
|
|
180
180
|
config.maxIterations = calculateMaxIterations(originalTokens, chunkSizeTokens);
|
|
@@ -263,12 +263,12 @@ export async function extractDocumentContext(rawOutput, toolName, toolCallId, to
|
|
|
263
263
|
}
|
|
264
264
|
const { content: mergedContent, keyFacts } = mergeExtractions(extractions, scoredChunks);
|
|
265
265
|
logger.info("Extractions merged", {
|
|
266
|
-
mergedContentTokens: countTokens(mergedContent),
|
|
266
|
+
mergedContentTokens: await countTokens(mergedContent),
|
|
267
267
|
keyFactsCount: keyFacts.length,
|
|
268
268
|
});
|
|
269
269
|
// Step 5: Final compaction if needed
|
|
270
270
|
const result = await compactFinalResult(mergedContent, keyFacts, keyRequirements, targetTokens);
|
|
271
|
-
const finalTokens = countToolResultTokens(result);
|
|
271
|
+
const finalTokens = await countToolResultTokens(result);
|
|
272
272
|
// Mark state as complete
|
|
273
273
|
state = updateStatePhase(state, "complete");
|
|
274
274
|
if (storage) {
|
|
@@ -85,8 +85,8 @@ async function scoreChunk(chunk, keyRequirements, totalChunks, config) {
|
|
|
85
85
|
try {
|
|
86
86
|
const prompt = buildScoringPrompt(chunk.content, keyRequirements, chunk.index, totalChunks);
|
|
87
87
|
// Pre-flight validation: ensure prompt fits in model context
|
|
88
|
-
const systemPromptTokens = countTokens(SCORING_SYSTEM_PROMPT);
|
|
89
|
-
const promptTokens = countTokens(prompt);
|
|
88
|
+
const systemPromptTokens = await countTokens(SCORING_SYSTEM_PROMPT);
|
|
89
|
+
const promptTokens = await countTokens(prompt);
|
|
90
90
|
const validation = validateContextFits(promptTokens, systemPromptTokens, config.modelContextSize, 0.1);
|
|
91
91
|
if (!validation.isValid) {
|
|
92
92
|
logger.warn("Scoring prompt too large for model context, skipping chunk", {
|
|
@@ -185,7 +185,7 @@ export const toolResponseCompactor = async (ctx) => {
|
|
|
185
185
|
})
|
|
186
186
|
.join("\n\n");
|
|
187
187
|
const compacted = await compactWithLLM(rawOutput, toolName, toolInput, conversationContext, targetSize);
|
|
188
|
-
const finalTokens = countToolResultTokens(compacted);
|
|
188
|
+
const finalTokens = await countToolResultTokens(compacted);
|
|
189
189
|
// Verify compaction stayed within boundaries
|
|
190
190
|
if (finalTokens > targetSize) {
|
|
191
191
|
// Compaction exceeded target - log warning but accept the result
|
|
@@ -248,7 +248,7 @@ Based on the tool input and conversation context, what key information is the us
|
|
|
248
248
|
|
|
249
249
|
Provide a concise list (3-5 bullet points) of the most important elements to extract.`;
|
|
250
250
|
// Pre-flight validation: ensure analysis prompt fits in compaction model's context
|
|
251
|
-
const analysisValidation = validatePromptFits(analysisPrompt, COMPACTION_MODEL, 0.1);
|
|
251
|
+
const analysisValidation = await validatePromptFits(analysisPrompt, COMPACTION_MODEL, 0.1);
|
|
252
252
|
if (!analysisValidation.isValid) {
|
|
253
253
|
logger.warn("Analysis prompt too large for compaction model, using default requirements", {
|
|
254
254
|
promptTokens: analysisValidation.totalTokens,
|
|
@@ -296,7 +296,7 @@ Provide a concise list (3-5 bullet points) of the most important elements to ext
|
|
|
296
296
|
});
|
|
297
297
|
// Step 2: Recursively compact until we meet the target
|
|
298
298
|
let currentData = rawOutput;
|
|
299
|
-
let currentTokens = countToolResultTokens(rawOutput);
|
|
299
|
+
let currentTokens = await countToolResultTokens(rawOutput);
|
|
300
300
|
const maxAttempts = 4;
|
|
301
301
|
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
302
302
|
const reductionNeeded = Math.round(((currentTokens - targetTokens) / currentTokens) * 100);
|
|
@@ -348,7 +348,7 @@ Your task: Further compact this data by:
|
|
|
348
348
|
Return ONLY valid JSON (no explanation text).`;
|
|
349
349
|
}
|
|
350
350
|
// Pre-flight validation: ensure compaction prompt fits in compaction model's context
|
|
351
|
-
const compactionValidation = validatePromptFits(compactionPrompt, COMPACTION_MODEL, 0.1);
|
|
351
|
+
const compactionValidation = await validatePromptFits(compactionPrompt, COMPACTION_MODEL, 0.1);
|
|
352
352
|
if (!compactionValidation.isValid) {
|
|
353
353
|
logger.warn("Compaction prompt too large for LLM, cannot compact further", {
|
|
354
354
|
attempt: attempt + 1,
|
|
@@ -411,7 +411,7 @@ Return ONLY valid JSON (no explanation text).`;
|
|
|
411
411
|
];
|
|
412
412
|
const jsonText = jsonMatch[1] || responseText;
|
|
413
413
|
const compacted = JSON.parse(jsonText.trim());
|
|
414
|
-
const compactedTokens = countToolResultTokens(compacted);
|
|
414
|
+
const compactedTokens = await countToolResultTokens(compacted);
|
|
415
415
|
logger.info(`LLM compaction attempt ${attempt + 1}/${maxAttempts}`, {
|
|
416
416
|
currentTokens,
|
|
417
417
|
compactedTokens,
|
|
@@ -422,7 +422,7 @@ Return ONLY valid JSON (no explanation text).`;
|
|
|
422
422
|
if (compactedTokens <= targetTokens) {
|
|
423
423
|
logger.info("LLM compaction succeeded", {
|
|
424
424
|
attempts: attempt + 1,
|
|
425
|
-
originalTokens: countToolResultTokens(rawOutput),
|
|
425
|
+
originalTokens: await countToolResultTokens(rawOutput),
|
|
426
426
|
finalTokens: compactedTokens,
|
|
427
427
|
targetTokens,
|
|
428
428
|
});
|
|
@@ -456,7 +456,7 @@ Return ONLY valid JSON (no explanation text).`;
|
|
|
456
456
|
*/
|
|
457
457
|
async function compactWithLLMInternal(rawOutput, keyRequirements, targetTokens) {
|
|
458
458
|
let currentData = rawOutput;
|
|
459
|
-
let currentTokens = countToolResultTokens(rawOutput);
|
|
459
|
+
let currentTokens = await countToolResultTokens(rawOutput);
|
|
460
460
|
const maxAttempts = 4;
|
|
461
461
|
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
462
462
|
const reductionNeeded = Math.round(((currentTokens - targetTokens) / currentTokens) * 100);
|
|
@@ -480,7 +480,7 @@ Your task: Create a compacted version that:
|
|
|
480
480
|
|
|
481
481
|
Return ONLY valid JSON (no explanation text).`;
|
|
482
482
|
// Pre-flight validation
|
|
483
|
-
const validation = validatePromptFits(compactionPrompt, COMPACTION_MODEL, 0.1);
|
|
483
|
+
const validation = await validatePromptFits(compactionPrompt, COMPACTION_MODEL, 0.1);
|
|
484
484
|
if (!validation.isValid) {
|
|
485
485
|
logger.warn("Internal compaction prompt too large", {
|
|
486
486
|
attempt: attempt + 1,
|
|
@@ -516,7 +516,7 @@ Return ONLY valid JSON (no explanation text).`;
|
|
|
516
516
|
];
|
|
517
517
|
const jsonText = jsonMatch[1] || responseText;
|
|
518
518
|
const compacted = JSON.parse(jsonText.trim());
|
|
519
|
-
const compactedTokens = countToolResultTokens(compacted);
|
|
519
|
+
const compactedTokens = await countToolResultTokens(compacted);
|
|
520
520
|
if (compactedTokens <= targetTokens ||
|
|
521
521
|
compactedTokens <= targetTokens * 1.05) {
|
|
522
522
|
return compacted;
|