@townco/agent 0.1.122 → 0.1.123

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/dist/acp-server/adapter.d.ts +1 -0
  2. package/dist/acp-server/adapter.js +133 -11
  3. package/dist/runner/agent-runner.d.ts +7 -0
  4. package/dist/runner/hooks/executor.js +1 -1
  5. package/dist/runner/hooks/predefined/context-validator.d.ts +1 -1
  6. package/dist/runner/hooks/predefined/context-validator.js +2 -2
  7. package/dist/runner/hooks/predefined/document-context-extractor/chunk-manager.d.ts +1 -1
  8. package/dist/runner/hooks/predefined/document-context-extractor/chunk-manager.js +3 -3
  9. package/dist/runner/hooks/predefined/document-context-extractor/content-extractor.js +2 -2
  10. package/dist/runner/hooks/predefined/document-context-extractor/index.js +5 -5
  11. package/dist/runner/hooks/predefined/document-context-extractor/relevance-scorer.js +2 -2
  12. package/dist/runner/hooks/predefined/tool-response-compactor.js +9 -9
  13. package/dist/runner/langchain/index.js +301 -9
  14. package/dist/runner/langchain/otel-callbacks.d.ts +5 -0
  15. package/dist/runner/langchain/otel-callbacks.js +8 -0
  16. package/dist/runner/langchain/tools/artifacts.d.ts +68 -0
  17. package/dist/runner/langchain/tools/artifacts.js +474 -0
  18. package/dist/runner/langchain/tools/conversation_search.d.ts +22 -0
  19. package/dist/runner/langchain/tools/conversation_search.js +137 -0
  20. package/dist/runner/langchain/tools/document_extract.js +1 -1
  21. package/dist/runner/langchain/tools/generate_image.d.ts +47 -0
  22. package/dist/runner/langchain/tools/generate_image.js +175 -0
  23. package/dist/runner/langchain/tools/port-utils.d.ts +8 -0
  24. package/dist/runner/langchain/tools/port-utils.js +35 -0
  25. package/dist/tsconfig.tsbuildinfo +1 -1
  26. package/dist/utils/context-size-calculator.d.ts +1 -1
  27. package/dist/utils/context-size-calculator.js +9 -14
  28. package/dist/utils/token-counter.d.ts +9 -7
  29. package/dist/utils/token-counter.js +30 -11
  30. package/dist/utils/tool-overhead-calculator.d.ts +2 -2
  31. package/dist/utils/tool-overhead-calculator.js +5 -4
  32. package/package.json +8 -7
@@ -59,6 +59,7 @@ export declare class AgentAcpAdapter implements acp.Agent {
59
59
  private agentUiConfig;
60
60
  private currentToolOverheadTokens;
61
61
  private currentMcpOverheadTokens;
62
+ private currentSystemPromptTokens;
62
63
  constructor(agent: AgentRunner, connection: acp.AgentSideConnection, agentDir?: string, agentName?: string);
63
64
  /**
64
65
  * Extract tool metadata from the agent definition for exposing to clients.
@@ -5,7 +5,7 @@ import { getModelContextWindow, HookExecutor, loadHookCallback, } from "../runne
5
5
  import { getToolGroupChildren } from "../runner/langchain/index.js";
6
6
  import { telemetry } from "../telemetry/index.js";
7
7
  import { calculateContextSize, } from "../utils/context-size-calculator.js";
8
- import { countToolResultTokens } from "../utils/token-counter.js";
8
+ import { countTokens, countToolResultTokens } from "../utils/token-counter.js";
9
9
  import { SessionStorage, } from "./session-storage.js";
10
10
  const logger = createLogger("adapter");
11
11
  /**
@@ -137,6 +137,7 @@ export class AgentAcpAdapter {
137
137
  agentUiConfig;
138
138
  currentToolOverheadTokens = 0; // Track tool overhead for current turn
139
139
  currentMcpOverheadTokens = 0; // Track MCP overhead for current turn
140
+ currentSystemPromptTokens = 0; // Track actual system prompt tokens after all injections
140
141
  constructor(agent, connection, agentDir, agentName) {
141
142
  this.connection = connection;
142
143
  this.sessions = new Map();
@@ -1080,6 +1081,7 @@ export class AgentAcpAdapter {
1080
1081
  // Reset tool overhead for new turn (will be set by harness)
1081
1082
  this.currentToolOverheadTokens = 0;
1082
1083
  this.currentMcpOverheadTokens = 0;
1084
+ this.currentSystemPromptTokens = 0;
1083
1085
  // Generate a unique messageId for this assistant response
1084
1086
  const messageId = Math.random().toString(36).substring(2);
1085
1087
  // Convert prompt content blocks to session storage format
@@ -1163,10 +1165,20 @@ export class AgentAcpAdapter {
1163
1165
  contextMessages.push(entry.message);
1164
1166
  }
1165
1167
  }
1166
- // Calculate context size - only estimated values
1167
- const context_size = calculateContextSize(contextMessages, this.agent.definition.systemPrompt ?? undefined, this.currentToolOverheadTokens, // Include tool overhead
1168
+ // Calculate context size - use actual system prompt tokens from harness if available
1169
+ const context_size = await calculateContextSize(contextMessages, undefined, // Don't use base prompt - we'll add actual tokens below
1170
+ this.currentToolOverheadTokens, // Include tool overhead
1168
1171
  this.currentMcpOverheadTokens, // Include MCP overhead
1169
1172
  getModelContextWindow(this.agent.definition.model));
1173
+ // Add actual system prompt tokens (includes all injections)
1174
+ // If harness hasn't sent it yet, use base prompt as fallback
1175
+ const systemPromptTokens = this.currentSystemPromptTokens > 0
1176
+ ? this.currentSystemPromptTokens
1177
+ : this.agent.definition.systemPrompt
1178
+ ? await countTokens(this.agent.definition.systemPrompt)
1179
+ : 0;
1180
+ context_size.systemPromptTokens = systemPromptTokens;
1181
+ context_size.totalEstimated += systemPromptTokens;
1170
1182
  const contextSnapshot = createContextSnapshot(session.messages.length - 1, // Exclude the newly added user message (it will be passed separately via prompt)
1171
1183
  new Date().toISOString(), previousContext, context_size);
1172
1184
  session.context.push(contextSnapshot);
@@ -1211,6 +1223,7 @@ export class AgentAcpAdapter {
1211
1223
  flushLogBuffer(true);
1212
1224
  };
1213
1225
  // Helper to save cancelled message to session
1226
+ const sessionForTurn = session;
1214
1227
  const saveCancelledMessage = async () => {
1215
1228
  if (this.noSession)
1216
1229
  return;
@@ -1242,14 +1255,15 @@ export class AgentAcpAdapter {
1242
1255
  timestamp: new Date().toISOString(),
1243
1256
  };
1244
1257
  // Check if we already have a partial assistant message
1245
- const lastMessage = session.messages[session.messages.length - 1];
1258
+ const lastMessage = sessionForTurn.messages[sessionForTurn.messages.length - 1];
1246
1259
  if (lastMessage && lastMessage.role === "assistant") {
1247
- session.messages[session.messages.length - 1] = cancelledMessage;
1260
+ sessionForTurn.messages[sessionForTurn.messages.length - 1] =
1261
+ cancelledMessage;
1248
1262
  }
1249
1263
  else {
1250
- session.messages.push(cancelledMessage);
1264
+ sessionForTurn.messages.push(cancelledMessage);
1251
1265
  }
1252
- await this.saveSessionToDisk(params.sessionId, session);
1266
+ await this.saveSessionToDisk(params.sessionId, sessionForTurn);
1253
1267
  logger.info("Saved cancelled message to session", {
1254
1268
  sessionId: params.sessionId,
1255
1269
  contentBlocks: contentBlocks.length,
@@ -1338,6 +1352,8 @@ export class AgentAcpAdapter {
1338
1352
  const generator = this.agent.invoke(invokeParams);
1339
1353
  // Track the invocation span for parenting hook spans
1340
1354
  let invocationSpan = null;
1355
+ // Track whether we've updated the initial context snapshot with overhead info
1356
+ let initialSnapshotUpdated = false;
1341
1357
  // Manually iterate to capture the return value
1342
1358
  let iterResult = await generator.next();
1343
1359
  while (!iterResult.done) {
@@ -1373,6 +1389,90 @@ export class AgentAcpAdapter {
1373
1389
  iterResult = await generator.next();
1374
1390
  continue;
1375
1391
  }
1392
+ // Capture system prompt overhead info if provided by harness
1393
+ if ("sessionUpdate" in msg &&
1394
+ msg.sessionUpdate === "system_prompt_overhead") {
1395
+ const overheadInfo = msg;
1396
+ this.currentSystemPromptTokens = overheadInfo.systemPromptTokens;
1397
+ logger.debug("Received system prompt overhead from harness", {
1398
+ systemPromptTokens: this.currentSystemPromptTokens,
1399
+ });
1400
+ // Update the initial context snapshot with actual overhead values
1401
+ // This happens after both tool_overhead_info and system_prompt_overhead are received
1402
+ if (!initialSnapshotUpdated && session.context.length > 0) {
1403
+ const initialSnapshot = session.context[session.context.length - 1];
1404
+ if (initialSnapshot?.context_size) {
1405
+ // Calculate overhead delta
1406
+ const oldSystemPromptTokens = initialSnapshot.context_size.systemPromptTokens;
1407
+ const oldToolOverhead = initialSnapshot.context_size.toolOverheadTokens ?? 0;
1408
+ const oldMcpOverhead = initialSnapshot.context_size.mcpOverheadTokens ?? 0;
1409
+ // Update all overhead fields
1410
+ initialSnapshot.context_size.systemPromptTokens =
1411
+ this.currentSystemPromptTokens;
1412
+ initialSnapshot.context_size.toolOverheadTokens =
1413
+ this.currentToolOverheadTokens;
1414
+ initialSnapshot.context_size.mcpOverheadTokens =
1415
+ this.currentMcpOverheadTokens;
1416
+ // Recalculate total
1417
+ const oldTotal = initialSnapshot.context_size.totalEstimated;
1418
+ const overheadDelta = this.currentSystemPromptTokens -
1419
+ oldSystemPromptTokens +
1420
+ (this.currentToolOverheadTokens - oldToolOverhead) +
1421
+ (this.currentMcpOverheadTokens - oldMcpOverhead);
1422
+ initialSnapshot.context_size.totalEstimated =
1423
+ oldTotal + overheadDelta;
1424
+ logger.debug("Updated initial context snapshot with overhead", {
1425
+ systemPromptTokens: this.currentSystemPromptTokens,
1426
+ toolOverheadTokens: this.currentToolOverheadTokens,
1427
+ mcpOverheadTokens: this.currentMcpOverheadTokens,
1428
+ oldTotal,
1429
+ newTotal: initialSnapshot.context_size.totalEstimated,
1430
+ overheadDelta,
1431
+ });
1432
+ // Save updated snapshot
1433
+ await this.saveSessionToDisk(params.sessionId, session);
1434
+ initialSnapshotUpdated = true;
1435
+ }
1436
+ }
1437
+ // Don't send this update to client, it's internal metadata
1438
+ iterResult = await generator.next();
1439
+ continue;
1440
+ }
1441
+ // Capture actual token usage from API and compare with estimates
1442
+ if ("sessionUpdate" in msg &&
1443
+ msg.sessionUpdate === "actual_token_usage") {
1444
+ const actualUsage = msg;
1445
+ const totalActual = actualUsage.inputTokens + actualUsage.outputTokens;
1446
+ // Get the most recent context entry's estimated total
1447
+ const lastContext = session.context.length > 0
1448
+ ? session.context[session.context.length - 1]
1449
+ : null;
1450
+ const estimatedTotal = lastContext?.context_size?.totalEstimated ?? 0;
1451
+ // Calculate discrepancy
1452
+ const discrepancy = totalActual - estimatedTotal;
1453
+ const discrepancyPercent = estimatedTotal > 0
1454
+ ? ((discrepancy / totalActual) * 100).toFixed(1)
1455
+ : "N/A";
1456
+ logger.warn("Token usage comparison (Actual vs Estimated)", {
1457
+ sessionId: params.sessionId,
1458
+ actual: {
1459
+ inputTokens: actualUsage.inputTokens,
1460
+ outputTokens: actualUsage.outputTokens,
1461
+ total: totalActual,
1462
+ },
1463
+ estimated: {
1464
+ total: estimatedTotal,
1465
+ breakdown: lastContext?.context_size,
1466
+ },
1467
+ discrepancy: {
1468
+ tokens: discrepancy,
1469
+ percent: discrepancyPercent,
1470
+ },
1471
+ });
1472
+ // Don't send this update to client, it's internal metadata
1473
+ iterResult = await generator.next();
1474
+ continue;
1475
+ }
1376
1476
  // Extract and accumulate token usage from message chunks
1377
1477
  if ("sessionUpdate" in msg &&
1378
1478
  msg.sessionUpdate === "agent_message_chunk" &&
@@ -1592,6 +1692,18 @@ export class AgentAcpAdapter {
1592
1692
  toolCallBlock._meta.originalTokens =
1593
1693
  compactionMeta.originalTokens;
1594
1694
  toolCallBlock._meta.finalTokens = compactionMeta.finalTokens;
1695
+ // If the runner already saved the original content to artifacts, persist that path.
1696
+ if (typeof compactionMeta.originalContentPath === "string" &&
1697
+ compactionMeta.originalContentPath.length > 0) {
1698
+ toolCallBlock._meta.originalContentPath =
1699
+ compactionMeta.originalContentPath;
1700
+ }
1701
+ // Persist a short preview if provided (useful when original file isn't available)
1702
+ if (typeof compactionMeta.originalContentPreview === "string" &&
1703
+ compactionMeta.originalContentPreview.length > 0) {
1704
+ toolCallBlock._meta.originalContentPreview =
1705
+ compactionMeta.originalContentPreview;
1706
+ }
1595
1707
  }
1596
1708
  if (compactionMeta.originalContent &&
1597
1709
  actuallyCompacted &&
@@ -1697,7 +1809,7 @@ export class AgentAcpAdapter {
1697
1809
  },
1698
1810
  });
1699
1811
  }
1700
- const outputTokens = countToolResultTokens(rawOutput);
1812
+ const outputTokens = await countToolResultTokens(rawOutput);
1701
1813
  // Create notification callback to stream hook events in real-time
1702
1814
  const sendHookNotification = (notification) => {
1703
1815
  this.connection.sessionUpdate({
@@ -1942,7 +2054,7 @@ export class AgentAcpAdapter {
1942
2054
  }
1943
2055
  }
1944
2056
  // Calculate context size - tool result is now in the message, but hasn't been sent to LLM yet
1945
- const context_size = calculateContextSize(contextMessages, this.agent.definition.systemPrompt ?? undefined, this.currentToolOverheadTokens, // Include tool overhead
2057
+ const context_size = await calculateContextSize(contextMessages, this.agent.definition.systemPrompt ?? undefined, this.currentToolOverheadTokens, // Include tool overhead
1946
2058
  this.currentMcpOverheadTokens, // Include MCP overhead
1947
2059
  getModelContextWindow(this.agent.definition.model));
1948
2060
  // Create snapshot with a pointer to the partial message (not a full copy!)
@@ -2161,10 +2273,20 @@ export class AgentAcpAdapter {
2161
2273
  contextMessages.push(entry.message);
2162
2274
  }
2163
2275
  }
2164
- // Calculate context size - only estimated values
2165
- const context_size = calculateContextSize(contextMessages, this.agent.definition.systemPrompt ?? undefined, this.currentToolOverheadTokens, // Include tool overhead
2276
+ // Calculate context size - use actual system prompt tokens from harness if available
2277
+ const context_size = await calculateContextSize(contextMessages, undefined, // Don't use base prompt - we'll add actual tokens below
2278
+ this.currentToolOverheadTokens, // Include tool overhead
2166
2279
  this.currentMcpOverheadTokens, // Include MCP overhead
2167
2280
  getModelContextWindow(this.agent.definition.model));
2281
+ // Add actual system prompt tokens (includes all injections)
2282
+ // If harness hasn't sent it yet, use base prompt as fallback
2283
+ const systemPromptTokens = this.currentSystemPromptTokens > 0
2284
+ ? this.currentSystemPromptTokens
2285
+ : this.agent.definition.systemPrompt
2286
+ ? await countTokens(this.agent.definition.systemPrompt)
2287
+ : 0;
2288
+ context_size.systemPromptTokens = systemPromptTokens;
2289
+ context_size.totalEstimated += systemPromptTokens;
2168
2290
  const contextSnapshot = createContextSnapshot(session.messages.length, new Date().toISOString(), previousContext, context_size);
2169
2291
  session.context.push(contextSnapshot);
2170
2292
  await this.saveSessionToDisk(params.sessionId, session);
@@ -162,6 +162,13 @@ export type ExtendedSessionUpdate = (SessionNotification["update"] & {
162
162
  sessionUpdate: "tool_overhead_info";
163
163
  toolOverheadTokens: number;
164
164
  mcpOverheadTokens: number;
165
+ } | {
166
+ sessionUpdate: "system_prompt_overhead";
167
+ systemPromptTokens: number;
168
+ } | {
169
+ sessionUpdate: "actual_token_usage";
170
+ inputTokens: number;
171
+ outputTokens: number;
165
172
  } | {
166
173
  sessionUpdate: "__invocation_span";
167
174
  invocationSpan: Span;
@@ -231,7 +231,7 @@ export class HookExecutor {
231
231
  if (result.metadata?.modifiedOutput) {
232
232
  const newOutput = result.metadata.modifiedOutput;
233
233
  const newOutputTokens = result.metadata.finalTokens ??
234
- countToolResultTokens(newOutput);
234
+ (await countToolResultTokens(newOutput));
235
235
  currentOutput = newOutput;
236
236
  currentToolResponse = {
237
237
  ...currentToolResponse,
@@ -43,7 +43,7 @@ export declare function validateContextFits(contentTokens: number, currentContex
43
43
  * @param bufferPercent - Safety buffer as a percentage (default 10%)
44
44
  * @returns Validation result indicating if prompt fits
45
45
  */
46
- export declare function validatePromptFits(prompt: string, modelName: string, bufferPercent?: number): ValidationResult;
46
+ export declare function validatePromptFits(prompt: string, modelName: string, bufferPercent?: number): Promise<ValidationResult>;
47
47
  /**
48
48
  * Checks if an error is a context overflow error from the Anthropic API.
49
49
  *
@@ -49,8 +49,8 @@ export function validateContextFits(contentTokens, currentContextTokens, modelCo
49
49
  * @param bufferPercent - Safety buffer as a percentage (default 10%)
50
50
  * @returns Validation result indicating if prompt fits
51
51
  */
52
- export function validatePromptFits(prompt, modelName, bufferPercent = DEFAULT_BUFFER_PERCENT) {
53
- const promptTokens = countTokens(prompt);
52
+ export async function validatePromptFits(prompt, modelName, bufferPercent = DEFAULT_BUFFER_PERCENT) {
53
+ const promptTokens = await countTokens(prompt);
54
54
  const modelContextWindow = getModelContextWindow(modelName);
55
55
  return validateContextFits(promptTokens, 0, // No existing context for a fresh prompt
56
56
  modelContextWindow, bufferPercent);
@@ -24,7 +24,7 @@ export declare function calculateMaxIterations(documentTokens: number, chunkSize
24
24
  * The overlap helps maintain context continuity at chunk boundaries,
25
25
  * ensuring the LLM doesn't miss information that spans boundaries.
26
26
  */
27
- export declare function createChunks(content: string, chunkSizeTokens: number, overlapTokens?: number): ChunkInfo[];
27
+ export declare function createChunks(content: string, chunkSizeTokens: number, overlapTokens?: number): Promise<ChunkInfo[]>;
28
28
  /**
29
29
  * Get summary statistics about chunks
30
30
  */
@@ -42,9 +42,9 @@ export function calculateMaxIterations(documentTokens, chunkSizeTokens) {
42
42
  * The overlap helps maintain context continuity at chunk boundaries,
43
43
  * ensuring the LLM doesn't miss information that spans boundaries.
44
44
  */
45
- export function createChunks(content, chunkSizeTokens, overlapTokens = 200) {
45
+ export async function createChunks(content, chunkSizeTokens, overlapTokens = 200) {
46
46
  const chunks = [];
47
- const totalTokens = countTokens(content);
47
+ const totalTokens = await countTokens(content);
48
48
  // If content fits in a single chunk, return it as-is
49
49
  if (totalTokens <= chunkSizeTokens) {
50
50
  return [
@@ -87,7 +87,7 @@ export function createChunks(content, chunkSizeTokens, overlapTokens = 200) {
87
87
  }
88
88
  // Extract chunk content
89
89
  const chunkContent = content.slice(currentOffset, endOffset);
90
- const chunkTokens = countTokens(chunkContent);
90
+ const chunkTokens = await countTokens(chunkContent);
91
91
  chunks.push({
92
92
  index: chunkIndex,
93
93
  startOffset: currentOffset,
@@ -84,8 +84,8 @@ async function extractFromChunk(chunk, keyRequirements, totalChunks, config) {
84
84
  try {
85
85
  const prompt = buildExtractionPrompt(chunk.content, keyRequirements, chunk.index, totalChunks, chunk.relevanceScore ?? 5);
86
86
  // Pre-flight validation: ensure prompt fits in model context
87
- const systemPromptTokens = countTokens(EXTRACTION_SYSTEM_PROMPT);
88
- const promptTokens = countTokens(prompt);
87
+ const systemPromptTokens = await countTokens(EXTRACTION_SYSTEM_PROMPT);
88
+ const promptTokens = await countTokens(prompt);
89
89
  const validation = validateContextFits(promptTokens, systemPromptTokens, config.modelContextSize, 0.1);
90
90
  if (!validation.isValid) {
91
91
  logger.warn("Extraction prompt too large for model context, skipping chunk", {
@@ -75,7 +75,7 @@ Provide a concise list (3-5 bullet points) of the most important elements to ext
75
75
  * Perform final compaction of merged extractions if still too large
76
76
  */
77
77
  async function compactFinalResult(mergedContent, keyFacts, keyRequirements, targetTokens) {
78
- const currentTokens = countTokens(mergedContent);
78
+ const currentTokens = await countTokens(mergedContent);
79
79
  // If already under target, return as structured result
80
80
  if (currentTokens <= targetTokens) {
81
81
  return {
@@ -154,7 +154,7 @@ export async function extractDocumentContext(rawOutput, toolName, toolCallId, to
154
154
  const startTime = Date.now();
155
155
  // Convert output to string for processing
156
156
  const outputString = JSON.stringify(rawOutput, null, 2);
157
- const originalTokens = countToolResultTokens(rawOutput);
157
+ const originalTokens = await countToolResultTokens(rawOutput);
158
158
  logger.info("Starting document context extraction", {
159
159
  toolName,
160
160
  toolCallId,
@@ -174,7 +174,7 @@ export async function extractDocumentContext(rawOutput, toolName, toolCallId, to
174
174
  };
175
175
  // Calculate chunk size and create chunks
176
176
  const chunkSizeTokens = calculateChunkSize(config);
177
- const chunks = createChunks(outputString, chunkSizeTokens, config.chunkOverlapTokens);
177
+ const chunks = await createChunks(outputString, chunkSizeTokens, config.chunkOverlapTokens);
178
178
  const chunkStats = getChunkStats(chunks);
179
179
  // Update max iterations based on actual chunk count
180
180
  config.maxIterations = calculateMaxIterations(originalTokens, chunkSizeTokens);
@@ -263,12 +263,12 @@ export async function extractDocumentContext(rawOutput, toolName, toolCallId, to
263
263
  }
264
264
  const { content: mergedContent, keyFacts } = mergeExtractions(extractions, scoredChunks);
265
265
  logger.info("Extractions merged", {
266
- mergedContentTokens: countTokens(mergedContent),
266
+ mergedContentTokens: await countTokens(mergedContent),
267
267
  keyFactsCount: keyFacts.length,
268
268
  });
269
269
  // Step 5: Final compaction if needed
270
270
  const result = await compactFinalResult(mergedContent, keyFacts, keyRequirements, targetTokens);
271
- const finalTokens = countToolResultTokens(result);
271
+ const finalTokens = await countToolResultTokens(result);
272
272
  // Mark state as complete
273
273
  state = updateStatePhase(state, "complete");
274
274
  if (storage) {
@@ -85,8 +85,8 @@ async function scoreChunk(chunk, keyRequirements, totalChunks, config) {
85
85
  try {
86
86
  const prompt = buildScoringPrompt(chunk.content, keyRequirements, chunk.index, totalChunks);
87
87
  // Pre-flight validation: ensure prompt fits in model context
88
- const systemPromptTokens = countTokens(SCORING_SYSTEM_PROMPT);
89
- const promptTokens = countTokens(prompt);
88
+ const systemPromptTokens = await countTokens(SCORING_SYSTEM_PROMPT);
89
+ const promptTokens = await countTokens(prompt);
90
90
  const validation = validateContextFits(promptTokens, systemPromptTokens, config.modelContextSize, 0.1);
91
91
  if (!validation.isValid) {
92
92
  logger.warn("Scoring prompt too large for model context, skipping chunk", {
@@ -185,7 +185,7 @@ export const toolResponseCompactor = async (ctx) => {
185
185
  })
186
186
  .join("\n\n");
187
187
  const compacted = await compactWithLLM(rawOutput, toolName, toolInput, conversationContext, targetSize);
188
- const finalTokens = countToolResultTokens(compacted);
188
+ const finalTokens = await countToolResultTokens(compacted);
189
189
  // Verify compaction stayed within boundaries
190
190
  if (finalTokens > targetSize) {
191
191
  // Compaction exceeded target - log warning but accept the result
@@ -248,7 +248,7 @@ Based on the tool input and conversation context, what key information is the us
248
248
 
249
249
  Provide a concise list (3-5 bullet points) of the most important elements to extract.`;
250
250
  // Pre-flight validation: ensure analysis prompt fits in compaction model's context
251
- const analysisValidation = validatePromptFits(analysisPrompt, COMPACTION_MODEL, 0.1);
251
+ const analysisValidation = await validatePromptFits(analysisPrompt, COMPACTION_MODEL, 0.1);
252
252
  if (!analysisValidation.isValid) {
253
253
  logger.warn("Analysis prompt too large for compaction model, using default requirements", {
254
254
  promptTokens: analysisValidation.totalTokens,
@@ -296,7 +296,7 @@ Provide a concise list (3-5 bullet points) of the most important elements to ext
296
296
  });
297
297
  // Step 2: Recursively compact until we meet the target
298
298
  let currentData = rawOutput;
299
- let currentTokens = countToolResultTokens(rawOutput);
299
+ let currentTokens = await countToolResultTokens(rawOutput);
300
300
  const maxAttempts = 4;
301
301
  for (let attempt = 0; attempt < maxAttempts; attempt++) {
302
302
  const reductionNeeded = Math.round(((currentTokens - targetTokens) / currentTokens) * 100);
@@ -348,7 +348,7 @@ Your task: Further compact this data by:
348
348
  Return ONLY valid JSON (no explanation text).`;
349
349
  }
350
350
  // Pre-flight validation: ensure compaction prompt fits in compaction model's context
351
- const compactionValidation = validatePromptFits(compactionPrompt, COMPACTION_MODEL, 0.1);
351
+ const compactionValidation = await validatePromptFits(compactionPrompt, COMPACTION_MODEL, 0.1);
352
352
  if (!compactionValidation.isValid) {
353
353
  logger.warn("Compaction prompt too large for LLM, cannot compact further", {
354
354
  attempt: attempt + 1,
@@ -411,7 +411,7 @@ Return ONLY valid JSON (no explanation text).`;
411
411
  ];
412
412
  const jsonText = jsonMatch[1] || responseText;
413
413
  const compacted = JSON.parse(jsonText.trim());
414
- const compactedTokens = countToolResultTokens(compacted);
414
+ const compactedTokens = await countToolResultTokens(compacted);
415
415
  logger.info(`LLM compaction attempt ${attempt + 1}/${maxAttempts}`, {
416
416
  currentTokens,
417
417
  compactedTokens,
@@ -422,7 +422,7 @@ Return ONLY valid JSON (no explanation text).`;
422
422
  if (compactedTokens <= targetTokens) {
423
423
  logger.info("LLM compaction succeeded", {
424
424
  attempts: attempt + 1,
425
- originalTokens: countToolResultTokens(rawOutput),
425
+ originalTokens: await countToolResultTokens(rawOutput),
426
426
  finalTokens: compactedTokens,
427
427
  targetTokens,
428
428
  });
@@ -456,7 +456,7 @@ Return ONLY valid JSON (no explanation text).`;
456
456
  */
457
457
  async function compactWithLLMInternal(rawOutput, keyRequirements, targetTokens) {
458
458
  let currentData = rawOutput;
459
- let currentTokens = countToolResultTokens(rawOutput);
459
+ let currentTokens = await countToolResultTokens(rawOutput);
460
460
  const maxAttempts = 4;
461
461
  for (let attempt = 0; attempt < maxAttempts; attempt++) {
462
462
  const reductionNeeded = Math.round(((currentTokens - targetTokens) / currentTokens) * 100);
@@ -480,7 +480,7 @@ Your task: Create a compacted version that:
480
480
 
481
481
  Return ONLY valid JSON (no explanation text).`;
482
482
  // Pre-flight validation
483
- const validation = validatePromptFits(compactionPrompt, COMPACTION_MODEL, 0.1);
483
+ const validation = await validatePromptFits(compactionPrompt, COMPACTION_MODEL, 0.1);
484
484
  if (!validation.isValid) {
485
485
  logger.warn("Internal compaction prompt too large", {
486
486
  attempt: attempt + 1,
@@ -516,7 +516,7 @@ Return ONLY valid JSON (no explanation text).`;
516
516
  ];
517
517
  const jsonText = jsonMatch[1] || responseText;
518
518
  const compacted = JSON.parse(jsonText.trim());
519
- const compactedTokens = countToolResultTokens(compacted);
519
+ const compactedTokens = await countToolResultTokens(compacted);
520
520
  if (compactedTokens <= targetTokens ||
521
521
  compactedTokens <= targetTokens * 1.05) {
522
522
  return compacted;