@townco/agent 0.1.101 → 0.1.104

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. package/dist/acp-server/adapter.d.ts +10 -0
  2. package/dist/acp-server/adapter.js +101 -31
  3. package/dist/definition/index.d.ts +17 -4
  4. package/dist/definition/index.js +19 -2
  5. package/dist/runner/agent-runner.d.ts +6 -2
  6. package/dist/runner/hooks/executor.d.ts +5 -3
  7. package/dist/runner/hooks/executor.js +190 -150
  8. package/dist/runner/hooks/loader.d.ts +13 -1
  9. package/dist/runner/hooks/loader.js +27 -0
  10. package/dist/runner/hooks/predefined/compaction-tool.d.ts +3 -1
  11. package/dist/runner/hooks/predefined/compaction-tool.js +38 -2
  12. package/dist/runner/hooks/predefined/context-validator.d.ts +57 -0
  13. package/dist/runner/hooks/predefined/context-validator.js +92 -0
  14. package/dist/runner/hooks/predefined/document-context-extractor/chunk-manager.js +2 -2
  15. package/dist/runner/hooks/predefined/document-context-extractor/content-extractor.js +29 -0
  16. package/dist/runner/hooks/predefined/document-context-extractor/relevance-scorer.js +29 -0
  17. package/dist/runner/hooks/predefined/mid-turn-compaction.d.ts +17 -0
  18. package/dist/runner/hooks/predefined/mid-turn-compaction.js +224 -0
  19. package/dist/runner/hooks/predefined/token-utils.d.ts +11 -0
  20. package/dist/runner/hooks/predefined/token-utils.js +13 -0
  21. package/dist/runner/hooks/predefined/tool-response-compactor.js +155 -25
  22. package/dist/runner/hooks/registry.js +2 -0
  23. package/dist/runner/hooks/types.d.ts +37 -4
  24. package/dist/runner/index.d.ts +6 -2
  25. package/dist/runner/langchain/index.js +60 -8
  26. package/dist/tsconfig.tsbuildinfo +1 -1
  27. package/package.json +7 -7
@@ -2,7 +2,9 @@ import Anthropic from "@anthropic-ai/sdk";
2
2
  import { createLogger } from "../../../logger.js";
3
3
  import { telemetry } from "../../../telemetry/index.js";
4
4
  import { countToolResultTokens } from "../../../utils/token-counter.js";
5
+ import { isContextOverflowError, validatePromptFits, } from "./context-validator.js";
5
6
  import { extractDocumentContext } from "./document-context-extractor/index.js";
7
+ import { applyTokenPadding } from "./token-utils.js";
6
8
  const logger = createLogger("tool-response-compactor");
7
9
  // Create Anthropic client directly (not using LangChain)
8
10
  // This ensures compaction LLM calls don't get captured by LangGraph's streaming
@@ -16,8 +18,8 @@ const COMPACTION_MODEL_CONTEXT = 200000; // Haiku context size for calculating t
16
18
  * Tool response compaction hook - compacts or truncates large tool responses
17
19
  * to prevent context overflow
18
20
  */
19
- // Tools that should never be compacted (internal/small response tools)
20
- const SKIP_COMPACTION_TOOLS = new Set(["todo_write", "TodoWrite"]);
21
+ // Default tools to skip compaction for
22
+ const DEFAULT_SKIP_TOOLS = ["todo_write"];
21
23
  export const toolResponseCompactor = async (ctx) => {
22
24
  // Only process if we have tool response data
23
25
  if (!ctx.toolResponse) {
@@ -25,14 +27,15 @@ export const toolResponseCompactor = async (ctx) => {
25
27
  return { newContextEntry: null };
26
28
  }
27
29
  const { toolCallId, toolName, toolInput, rawOutput, outputTokens } = ctx.toolResponse;
28
- // Skip compaction for certain internal tools
29
- if (SKIP_COMPACTION_TOOLS.has(toolName)) {
30
- logger.debug("Skipping compaction for internal tool", { toolName });
30
+ // Get settings from callbackSetting - each callback has its own settings
31
+ const settings = ctx.callbackSetting;
32
+ const maxTokensSize = settings?.maxTokensSize ?? 20000; // Default: 20000 tokens
33
+ const skipTools = new Set(settings?.skipTools ?? DEFAULT_SKIP_TOOLS);
34
+ // Skip compaction for configured tools
35
+ if (skipTools.has(toolName)) {
36
+ logger.debug("Skipping compaction for tool", { toolName });
31
37
  return { newContextEntry: null };
32
38
  }
33
- // Get settings from hook configuration
34
- const settings = ctx.session.requestParams.hookSettings;
35
- const maxTokensSize = settings?.maxTokensSize ?? 20000; // Default: 20000 tokens
36
39
  // Use maxTokensSize directly as it's now in tokens
37
40
  const maxAllowedResponseSize = maxTokensSize;
38
41
  // Calculate available space in context
@@ -42,12 +45,16 @@ export const toolResponseCompactor = async (ctx) => {
42
45
  ? Math.floor(availableSpace * 0.9)
43
46
  : maxAllowedResponseSize;
44
47
  // Calculate compaction limit: max response size that can fit in a single LLM compaction call
45
- const COMPACTION_OVERHEAD = 10000;
46
- const compactionLimit = Math.floor((COMPACTION_MODEL_CONTEXT - COMPACTION_OVERHEAD) * 0.9); // ~175K tokens
48
+ // Increased overhead to 25K to account for system prompt, conversation context, JSON serialization
49
+ const COMPACTION_OVERHEAD = 25000;
50
+ const compactionLimit = Math.floor((COMPACTION_MODEL_CONTEXT - COMPACTION_OVERHEAD) * 0.9); // ~157K tokens
51
+ // Apply 10% padding to output tokens to account for estimation inaccuracies
52
+ const paddedOutputTokens = applyTokenPadding(outputTokens);
47
53
  logger.info("Tool response compaction hook triggered", {
48
54
  toolCallId,
49
55
  toolName,
50
56
  outputTokens,
57
+ paddedOutputTokens,
51
58
  currentContext: ctx.currentTokens,
52
59
  maxTokens: ctx.maxTokens,
53
60
  maxAllowedResponseSize,
@@ -56,8 +63,8 @@ export const toolResponseCompactor = async (ctx) => {
56
63
  compactionLimit,
57
64
  settings,
58
65
  });
59
- // Case 0: Small response, no action needed
60
- if (outputTokens <= effectiveMaxResponseSize) {
66
+ // Case 0: Small response, no action needed (using padded tokens for safety)
67
+ if (paddedOutputTokens <= effectiveMaxResponseSize) {
61
68
  logger.info("Tool response fits within threshold, no compaction needed");
62
69
  return {
63
70
  newContextEntry: null,
@@ -83,7 +90,8 @@ export const toolResponseCompactor = async (ctx) => {
83
90
  contextAlreadyOverThreshold: availableSpace <= maxAllowedResponseSize,
84
91
  });
85
92
  // Case 2: Huge response - use document context extractor (with truncation fallback)
86
- if (outputTokens >= compactionLimit) {
93
+ // Use padded tokens for safety margin
94
+ if (paddedOutputTokens >= compactionLimit) {
87
95
  logger.info("Tool response exceeds compaction capacity, using document context extractor", {
88
96
  outputTokens,
89
97
  compactionLimit,
@@ -180,23 +188,24 @@ export const toolResponseCompactor = async (ctx) => {
180
188
  const finalTokens = countToolResultTokens(compacted);
181
189
  // Verify compaction stayed within boundaries
182
190
  if (finalTokens > targetSize) {
183
- logger.error("LLM compaction exceeded target", {
191
+ // Compaction exceeded target - log warning but accept the result
192
+ // The next callback in the chain (compaction-tool) will handle context overflow if needed
193
+ logger.warn("LLM compaction exceeded target, accepting result anyway", {
184
194
  finalTokens,
185
195
  targetSize,
186
196
  excess: finalTokens - targetSize,
187
197
  toolName,
188
198
  toolCallId,
189
199
  });
190
- throw new Error(`LLM compaction for tool "${toolName}" exceeded target size. ` +
191
- `Compacted to ${finalTokens.toLocaleString()} tokens but target was ${targetSize.toLocaleString()}. ` +
192
- `Original response was ${outputTokens.toLocaleString()} tokens.`);
193
200
  }
194
- logger.info("Successfully compacted tool response", {
195
- originalTokens: outputTokens,
196
- finalTokens,
197
- targetSize,
198
- tokensSaved: outputTokens - finalTokens,
199
- });
201
+ else {
202
+ logger.info("Successfully compacted tool response", {
203
+ originalTokens: outputTokens,
204
+ finalTokens,
205
+ targetSize,
206
+ tokensSaved: outputTokens - finalTokens,
207
+ });
208
+ }
200
209
  return {
201
210
  newContextEntry: null,
202
211
  metadata: {
@@ -205,17 +214,20 @@ export const toolResponseCompactor = async (ctx) => {
205
214
  finalTokens,
206
215
  tokensSaved: outputTokens - finalTokens,
207
216
  modifiedOutput: compacted,
217
+ compactionMethod: finalTokens > targetSize ? "llm_exceeded_target" : "llm",
208
218
  },
209
219
  };
210
220
  }
211
221
  catch (error) {
212
- logger.error("Compaction failed", {
222
+ // Compaction failed - throw error to stop the callback chain
223
+ // The error will be returned to the agent so it can decide what to do
224
+ logger.error("Tool response compaction failed", {
213
225
  error: error instanceof Error ? error.message : String(error),
214
226
  toolName,
215
227
  toolCallId,
216
228
  outputTokens,
217
229
  });
218
- throw new Error(`LLM compaction failed for tool "${toolName}": ${error instanceof Error ? error.message : String(error)}. Original response was ${outputTokens.toLocaleString()} tokens.`);
230
+ throw new Error(`Tool response compaction failed for "${toolName}": ${error instanceof Error ? error.message : String(error)}. Response was ${outputTokens.toLocaleString()} tokens.`);
219
231
  }
220
232
  };
221
233
  /**
@@ -235,6 +247,17 @@ ${conversationContext}
235
247
  Based on the tool input and conversation context, what key information is the user looking for from this tool response?
236
248
 
237
249
  Provide a concise list (3-5 bullet points) of the most important elements to extract.`;
250
+ // Pre-flight validation: ensure analysis prompt fits in compaction model's context
251
+ const analysisValidation = validatePromptFits(analysisPrompt, COMPACTION_MODEL, 0.1);
252
+ if (!analysisValidation.isValid) {
253
+ logger.warn("Analysis prompt too large for compaction model, using default requirements", {
254
+ promptTokens: analysisValidation.totalTokens,
255
+ maxAllowed: analysisValidation.maxAllowedTokens,
256
+ });
257
+ // Skip analysis and use generic requirements - will rely on compaction step
258
+ const keyRequirements = "Extract the most important and relevant information from this tool response.";
259
+ return compactWithLLMInternal(rawOutput, keyRequirements, targetTokens);
260
+ }
238
261
  // Create OTEL span for analysis call
239
262
  const analysisSpan = telemetry.startSpan("compaction.analysis", {
240
263
  "gen_ai.operation.name": "chat",
@@ -324,6 +347,19 @@ Your task: Further compact this data by:
324
347
 
325
348
  Return ONLY valid JSON (no explanation text).`;
326
349
  }
350
+ // Pre-flight validation: ensure compaction prompt fits in compaction model's context
351
+ const compactionValidation = validatePromptFits(compactionPrompt, COMPACTION_MODEL, 0.1);
352
+ if (!compactionValidation.isValid) {
353
+ logger.warn("Compaction prompt too large for LLM, cannot compact further", {
354
+ attempt: attempt + 1,
355
+ promptTokens: compactionValidation.totalTokens,
356
+ maxAllowed: compactionValidation.maxAllowedTokens,
357
+ currentDataTokens: currentTokens,
358
+ });
359
+ // Return what we have - the caller will need to handle this via chunking
360
+ throw new Error(`Compaction prompt exceeds model context (${compactionValidation.totalTokens} tokens > ${compactionValidation.maxAllowedTokens} max). ` +
361
+ `Current data is ${currentTokens} tokens.`);
362
+ }
327
363
  // Create OTEL span for compaction call
328
364
  const compactionSpan = telemetry.startSpan("compaction.compact", {
329
365
  "gen_ai.operation.name": "chat",
@@ -354,6 +390,16 @@ Return ONLY valid JSON (no explanation text).`;
354
390
  }
355
391
  catch (error) {
356
392
  telemetry.endSpan(compactionSpan, error);
393
+ // Check if this is a context overflow error from the API
394
+ if (isContextOverflowError(error)) {
395
+ logger.warn("Context overflow error from compaction API, returning current data", {
396
+ attempt: attempt + 1,
397
+ currentTokens,
398
+ error: error instanceof Error ? error.message : String(error),
399
+ });
400
+ // Return what we have - better than crashing
401
+ return currentData;
402
+ }
357
403
  throw error;
358
404
  }
359
405
  // Extract and parse JSON
@@ -404,3 +450,87 @@ Return ONLY valid JSON (no explanation text).`;
404
450
  });
405
451
  return currentData;
406
452
  }
453
+ /**
454
+ * Internal helper for compaction when analysis is skipped
455
+ * Uses generic key requirements and goes directly to compaction
456
+ */
457
+ async function compactWithLLMInternal(rawOutput, keyRequirements, targetTokens) {
458
+ let currentData = rawOutput;
459
+ let currentTokens = countToolResultTokens(rawOutput);
460
+ const maxAttempts = 4;
461
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
462
+ const reductionNeeded = Math.round(((currentTokens - targetTokens) / currentTokens) * 100);
463
+ const compactionPrompt = `You are helping to compact a large tool response to save context space.
464
+
465
+ Key information needed:
466
+ ${keyRequirements}
467
+
468
+ Data to compact (JSON):
469
+ ${JSON.stringify(currentData, null, 2)}
470
+
471
+ Current size: ${currentTokens.toLocaleString()} tokens
472
+ Target size: ${targetTokens.toLocaleString()} tokens (reduce by ${reductionNeeded}%)
473
+
474
+ Your task: Create a compacted version that:
475
+ 1. Retains all information relevant to the key requirements above
476
+ 2. Removes or summarizes less relevant details
477
+ 3. Maintains the same JSON structure where possible
478
+ 4. Reduces the size to ${targetTokens.toLocaleString()} tokens or less
479
+ 5. Be aggressive in removing unnecessary data
480
+
481
+ Return ONLY valid JSON (no explanation text).`;
482
+ // Pre-flight validation
483
+ const validation = validatePromptFits(compactionPrompt, COMPACTION_MODEL, 0.1);
484
+ if (!validation.isValid) {
485
+ logger.warn("Internal compaction prompt too large", {
486
+ attempt: attempt + 1,
487
+ promptTokens: validation.totalTokens,
488
+ maxAllowed: validation.maxAllowedTokens,
489
+ });
490
+ // Return what we have - caller will need to handle via chunking
491
+ return currentData;
492
+ }
493
+ const compactionSpan = telemetry.startSpan("compaction.compact.internal", {
494
+ "gen_ai.operation.name": "chat",
495
+ "gen_ai.provider.name": "anthropic",
496
+ "gen_ai.request.model": COMPACTION_MODEL,
497
+ "compaction.attempt": attempt + 1,
498
+ "compaction.target_tokens": targetTokens,
499
+ "compaction.current_tokens": currentTokens,
500
+ });
501
+ try {
502
+ const compactionResponse = await telemetry.withActiveSpanAsync(compactionSpan, () => anthropic.messages.create({
503
+ model: COMPACTION_MODEL,
504
+ max_tokens: 4096,
505
+ temperature: 0,
506
+ system: "You are a helpful assistant compacting data.",
507
+ messages: [{ role: "user", content: compactionPrompt }],
508
+ }));
509
+ telemetry.recordTokenUsage(compactionResponse.usage.input_tokens, compactionResponse.usage.output_tokens, compactionSpan);
510
+ telemetry.endSpan(compactionSpan);
511
+ const firstContent = compactionResponse.content[0];
512
+ const responseText = firstContent && firstContent.type === "text" ? firstContent.text : "";
513
+ const jsonMatch = responseText.match(/```(?:json)?\n([\s\S]*?)\n```/) || [
514
+ null,
515
+ responseText,
516
+ ];
517
+ const jsonText = jsonMatch[1] || responseText;
518
+ const compacted = JSON.parse(jsonText.trim());
519
+ const compactedTokens = countToolResultTokens(compacted);
520
+ if (compactedTokens <= targetTokens ||
521
+ compactedTokens <= targetTokens * 1.05) {
522
+ return compacted;
523
+ }
524
+ currentData = compacted;
525
+ currentTokens = compactedTokens;
526
+ }
527
+ catch (error) {
528
+ telemetry.endSpan(compactionSpan, error);
529
+ if (isContextOverflowError(error)) {
530
+ return currentData;
531
+ }
532
+ throw error;
533
+ }
534
+ }
535
+ return currentData;
536
+ }
@@ -1,4 +1,5 @@
1
1
  import { compactionTool } from "./predefined/compaction-tool";
2
+ import { midTurnCompaction } from "./predefined/mid-turn-compaction";
2
3
  import { toolResponseCompactor } from "./predefined/tool-response-compactor";
3
4
  /**
4
5
  * Registry of predefined hook callbacks
@@ -6,6 +7,7 @@ import { toolResponseCompactor } from "./predefined/tool-response-compactor";
6
7
  */
7
8
  export const HOOK_REGISTRY = {
8
9
  compaction_tool: compactionTool,
10
+ mid_turn_compaction: midTurnCompaction,
9
11
  tool_response_compactor: toolResponseCompactor,
10
12
  };
11
13
  /**
@@ -22,6 +22,7 @@ export interface ContextSizeSettings {
22
22
  }
23
23
  /**
24
24
  * Settings for tool_response hook
25
+ * @deprecated Use CallbackConfig.setting instead for individual callback settings
25
26
  */
26
27
  export interface ToolResponseSettings {
27
28
  /**
@@ -31,6 +32,21 @@ export interface ToolResponseSettings {
31
32
  */
32
33
  maxTokensSize?: number | undefined;
33
34
  }
35
+ /**
36
+ * Individual callback configuration with its own settings
37
+ */
38
+ export interface CallbackConfig {
39
+ /**
40
+ * Callback reference - either a predefined hook name or a file path
41
+ * Examples: "compaction_tool" or "./hooks/my_compaction_tool.ts"
42
+ */
43
+ name: string;
44
+ /**
45
+ * Callback-specific settings
46
+ * The callback is responsible for interpreting these settings
47
+ */
48
+ setting?: Record<string, unknown> | undefined;
49
+ }
34
50
  /**
35
51
  * Hook configuration in agent definition
36
52
  */
@@ -40,14 +56,21 @@ export interface HookConfig {
40
56
  */
41
57
  type: HookType;
42
58
  /**
43
- * Optional hook-specific settings
59
+ * @deprecated Use callbacks array instead for new configurations.
60
+ * Single callback reference - either a predefined hook name or a file path
61
+ * Examples: "compaction_tool" or "./hooks/my_compaction_tool.ts"
62
+ */
63
+ callback?: string | undefined;
64
+ /**
65
+ * @deprecated Use CallbackConfig.setting in callbacks array instead.
66
+ * Optional hook-specific settings (only used with deprecated callback field)
44
67
  */
45
68
  setting?: ContextSizeSettings | ToolResponseSettings | undefined;
46
69
  /**
47
- * Callback reference - either a predefined hook name or a file path
48
- * Examples: "compaction_tool" or "./hooks/my_compaction_tool.ts"
70
+ * Array of callback configurations to execute in order.
71
+ * Each callback has its own settings and decides whether to run.
49
72
  */
50
- callback: string;
73
+ callbacks?: CallbackConfig[] | undefined;
51
74
  }
52
75
  /**
53
76
  * Read-only view of an agent session for hooks
@@ -102,6 +125,11 @@ export interface HookContext {
102
125
  * Storage interface for hooks that need to persist data
103
126
  */
104
127
  storage?: HookStorageInterface | undefined;
128
+ /**
129
+ * Settings for the current callback being executed.
130
+ * Each callback receives its own settings from CallbackConfig.setting
131
+ */
132
+ callbackSetting?: Record<string, unknown> | undefined;
105
133
  /**
106
134
  * Tool response data (only for tool_response hooks)
107
135
  */
@@ -112,6 +140,11 @@ export interface HookContext {
112
140
  rawOutput: Record<string, unknown>;
113
141
  outputTokens: number;
114
142
  };
143
+ /**
144
+ * Token count of the pending tool response (only for tool_response hooks).
145
+ * This is recalculated after each callback in the chain to reflect any modifications.
146
+ */
147
+ toolResponseTokens?: number | undefined;
115
148
  }
116
149
  /**
117
150
  * Result returned by hook callbacks
@@ -36,12 +36,16 @@ export declare const makeRunnerFromDefinition: (definition: {
36
36
  harnessImplementation?: "langchain" | undefined;
37
37
  hooks?: {
38
38
  type: "context_size" | "tool_response";
39
- setting?: {
39
+ setting?: Record<string, unknown> | {
40
40
  threshold: number;
41
41
  } | {
42
42
  maxTokensSize?: number | undefined;
43
43
  } | undefined;
44
- callback: string;
44
+ callback?: string | undefined;
45
+ callbacks?: {
46
+ name: string;
47
+ setting?: Record<string, unknown> | undefined;
48
+ }[] | undefined;
45
49
  }[] | undefined;
46
50
  initialMessage?: {
47
51
  enabled: boolean;
@@ -8,6 +8,8 @@ import { z } from "zod";
8
8
  import { SUBAGENT_MODE_KEY } from "../../acp-server/adapter";
9
9
  import { createLogger } from "../../logger.js";
10
10
  import { telemetry } from "../../telemetry/index.js";
11
+ import { calculateContextSize } from "../../utils/context-size-calculator.js";
12
+ import { getModelContextWindow } from "../hooks/constants.js";
11
13
  import { bindGeneratorToAbortSignal, bindGeneratorToSessionContext, getAbortSignal, runWithAbortSignal, } from "../session-context";
12
14
  import { loadCustomToolModule, } from "../tool-loader.js";
13
15
  import { createModelFromString, detectProvider } from "./model-factory.js";
@@ -384,6 +386,53 @@ export class LangchainAgent {
384
386
  toolOverheadTokens,
385
387
  mcpOverheadTokens,
386
388
  };
389
+ // Calculate accurate context size for tool response compaction decisions
390
+ // This includes: system prompt, tool overhead, MCP overhead, and message history
391
+ const baseSystemPromptTokens = this.definition.systemPrompt
392
+ ? countTokens(this.definition.systemPrompt)
393
+ : 0;
394
+ // Estimate additional injection tokens based on enabled features
395
+ // These will be injected into the system prompt later
396
+ const hasWebSearchToolsForEstimate = builtInNames.includes("web_search") ||
397
+ builtInNames.includes("town_web_search");
398
+ const hasLibraryToolsForEstimate = enabledTools.some((t) => t.name.startsWith("library__") ||
399
+ t.name.includes("get_document") ||
400
+ t.name.includes("retrieve_document") ||
401
+ t.name.includes("search_document"));
402
+ const hasSubagentToolsForEstimate = enabledTools.some((t) => t.name === SUBAGENT_TOOL_NAME);
403
+ // Rough estimate for injection overhead (citations, subagent instructions, date/time)
404
+ // This is approximate but adds safety margin for the compaction decision
405
+ const injectionOverheadEstimate = (hasWebSearchToolsForEstimate ? 500 : 0) + // Citation instructions
406
+ (hasLibraryToolsForEstimate ? 400 : 0) + // Document citation instructions
407
+ (hasSubagentToolsForEstimate ? 300 : 0) + // Subagent citation instructions
408
+ 200; // Date/time injection + safety margin
409
+ const systemPromptTokensEstimate = baseSystemPromptTokens +
410
+ todoInstructionsTokens +
411
+ injectionOverheadEstimate;
412
+ // Calculate message history tokens from context messages
413
+ const messageHistoryContext = calculateContextSize(req.contextMessages || [], undefined, // Don't double count system prompt
414
+ 0, // Don't double count tool overhead
415
+ 0);
416
+ const messageHistoryTokens = messageHistoryContext.userMessagesTokens +
417
+ messageHistoryContext.assistantMessagesTokens +
418
+ messageHistoryContext.toolInputTokens +
419
+ messageHistoryContext.toolResultsTokens;
420
+ // Total base context (everything except new tool outputs in this turn)
421
+ const baseContextTokens = systemPromptTokensEstimate +
422
+ toolOverheadTokens +
423
+ mcpOverheadTokens +
424
+ messageHistoryTokens;
425
+ const modelContextWindow = getModelContextWindow(this.definition.model);
426
+ _logger.debug("Calculated accurate context size for compaction", {
427
+ baseSystemPromptTokens,
428
+ injectionOverheadEstimate,
429
+ systemPromptTokensEstimate,
430
+ toolOverheadTokens,
431
+ mcpOverheadTokens,
432
+ messageHistoryTokens,
433
+ baseContextTokens,
434
+ modelContextWindow,
435
+ });
387
436
  // Wrap tools with response compaction if hook is configured
388
437
  const hooks = this.definition.hooks ?? [];
389
438
  const hasToolResponseHook = hooks.some((h) => h.type === "tool_response");
@@ -418,10 +467,9 @@ export class LangchainAgent {
418
467
  cumulativeToolOutputTokens,
419
468
  });
420
469
  // Calculate current context including all tool outputs so far in this turn
470
+ // Uses accurate baseContextTokens calculated earlier (system prompt, tool overhead, MCP overhead, message history)
421
471
  // This ensures we account for multiple large tool calls in the same turn
422
- const baseContextTokens = turnTokenUsage.inputTokens || 10000;
423
472
  const currentTokens = baseContextTokens + cumulativeToolOutputTokens;
424
- const maxTokens = 200000; // Claude's limit
425
473
  // Build proper hook context with all required fields
426
474
  const hookContext = {
427
475
  session: {
@@ -433,8 +481,8 @@ export class LangchainAgent {
433
481
  },
434
482
  },
435
483
  currentTokens,
436
- maxTokens,
437
- percentage: (currentTokens / maxTokens) * 100,
484
+ maxTokens: modelContextWindow,
485
+ percentage: (currentTokens / modelContextWindow) * 100,
438
486
  model: this.definition.model,
439
487
  agent: this.definition,
440
488
  toolResponse: {
@@ -628,14 +676,18 @@ export class LangchainAgent {
628
676
  if (hasSubagentTool) {
629
677
  agentConfig.systemPrompt = `${agentConfig.systemPrompt ?? ""}\n\n${SUBAGENT_CITATION_INSTRUCTIONS}`;
630
678
  }
631
- // Process template variables in system prompt and inject current date/time
679
+ // Inject current date/time into system prompt
680
+ const currentDateTime = getCurrentDateTimeString();
681
+ // First, replace any template variables if they exist
632
682
  if (agentConfig.systemPrompt) {
633
- const currentDateTime = getCurrentDateTimeString();
634
- // Replace {{.CurrentDate}} template variable
635
683
  agentConfig.systemPrompt = agentConfig.systemPrompt.replace(/\{\{\.CurrentDate\}\}/g, currentDateTime);
636
- // Replace {{.CurrentDateTime}} template variable (alias)
637
684
  agentConfig.systemPrompt = agentConfig.systemPrompt.replace(/\{\{\.CurrentDateTime\}\}/g, currentDateTime);
638
685
  }
686
+ // Always append current date/time information (if not already present via template)
687
+ const dateInfoLine = `The current date and time is ${currentDateTime}.`;
688
+ if (!agentConfig.systemPrompt?.includes(currentDateTime)) {
689
+ agentConfig.systemPrompt = `${agentConfig.systemPrompt ?? ""}\n\n${dateInfoLine}`;
690
+ }
639
691
  // Apply prompt parameters from request (user-selected per-message options)
640
692
  if (req.promptParameters && this.definition.promptParameters) {
641
693
  for (const param of this.definition.promptParameters) {