@townco/agent 0.1.83 → 0.1.85

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/dist/acp-server/adapter.js +140 -43
  2. package/dist/acp-server/http.js +55 -0
  3. package/dist/acp-server/session-storage.d.ts +31 -6
  4. package/dist/acp-server/session-storage.js +60 -1
  5. package/dist/definition/index.d.ts +2 -4
  6. package/dist/definition/index.js +1 -2
  7. package/dist/runner/agent-runner.d.ts +2 -3
  8. package/dist/runner/hooks/executor.d.ts +5 -2
  9. package/dist/runner/hooks/executor.js +26 -2
  10. package/dist/runner/hooks/predefined/document-context-extractor/chunk-manager.d.ts +37 -0
  11. package/dist/runner/hooks/predefined/document-context-extractor/chunk-manager.js +134 -0
  12. package/dist/runner/hooks/predefined/document-context-extractor/content-extractor.d.ts +20 -0
  13. package/dist/runner/hooks/predefined/document-context-extractor/content-extractor.js +171 -0
  14. package/dist/runner/hooks/predefined/document-context-extractor/extraction-state.d.ts +57 -0
  15. package/dist/runner/hooks/predefined/document-context-extractor/extraction-state.js +126 -0
  16. package/dist/runner/hooks/predefined/document-context-extractor/index.d.ts +22 -0
  17. package/dist/runner/hooks/predefined/document-context-extractor/index.js +338 -0
  18. package/dist/runner/hooks/predefined/document-context-extractor/relevance-scorer.d.ts +19 -0
  19. package/dist/runner/hooks/predefined/document-context-extractor/relevance-scorer.js +156 -0
  20. package/dist/runner/hooks/predefined/document-context-extractor/types.d.ts +130 -0
  21. package/dist/runner/hooks/predefined/document-context-extractor/types.js +8 -0
  22. package/dist/runner/hooks/predefined/tool-response-compactor.d.ts +0 -4
  23. package/dist/runner/hooks/predefined/tool-response-compactor.js +101 -222
  24. package/dist/runner/hooks/types.d.ts +18 -12
  25. package/dist/runner/langchain/index.js +64 -11
  26. package/dist/runner/langchain/tools/artifacts.js +6 -9
  27. package/dist/runner/langchain/tools/document_extract.d.ts +26 -0
  28. package/dist/runner/langchain/tools/document_extract.js +135 -0
  29. package/dist/runner/tools.d.ts +2 -2
  30. package/dist/runner/tools.js +1 -0
  31. package/dist/templates/index.d.ts +1 -2
  32. package/dist/tsconfig.tsbuildinfo +1 -1
  33. package/dist/utils/context-size-calculator.d.ts +1 -10
  34. package/dist/utils/context-size-calculator.js +1 -12
  35. package/package.json +6 -6
  36. package/templates/index.ts +1 -2
@@ -2,6 +2,7 @@ import Anthropic from "@anthropic-ai/sdk";
2
2
  import { createLogger } from "../../../logger.js";
3
3
  import { telemetry } from "../../../telemetry/index.js";
4
4
  import { countToolResultTokens } from "../../../utils/token-counter.js";
5
+ import { extractDocumentContext } from "./document-context-extractor/index.js";
5
6
  const logger = createLogger("tool-response-compactor");
6
7
  // Create Anthropic client directly (not using LangChain)
7
8
  // This ensures compaction LLM calls don't get captured by LangGraph's streaming
@@ -15,6 +16,8 @@ const COMPACTION_MODEL_CONTEXT = 200000; // Haiku context size for calculating t
15
16
  * Tool response compaction hook - compacts or truncates large tool responses
16
17
  * to prevent context overflow
17
18
  */
19
+ // Tools that should never be compacted (internal/small response tools)
20
+ const SKIP_COMPACTION_TOOLS = new Set(["todo_write", "TodoWrite"]);
18
21
  export const toolResponseCompactor = async (ctx) => {
19
22
  // Only process if we have tool response data
20
23
  if (!ctx.toolResponse) {
@@ -22,28 +25,39 @@ export const toolResponseCompactor = async (ctx) => {
22
25
  return { newContextEntry: null };
23
26
  }
24
27
  const { toolCallId, toolName, toolInput, rawOutput, outputTokens } = ctx.toolResponse;
28
+ // Skip compaction for certain internal tools
29
+ if (SKIP_COMPACTION_TOOLS.has(toolName)) {
30
+ logger.debug("Skipping compaction for internal tool", { toolName });
31
+ return { newContextEntry: null };
32
+ }
25
33
  // Get settings from hook configuration
26
34
  const settings = ctx.session.requestParams.hookSettings;
27
- const maxContextThreshold = settings?.maxContextThreshold ?? 80;
28
- const responseTruncationThreshold = settings?.responseTruncationThreshold ?? 30;
29
- // Calculate actual token limits from percentages
30
- const maxAllowedTotal = ctx.maxTokens * (maxContextThreshold / 100);
31
- const availableSpace = maxAllowedTotal - ctx.currentTokens;
32
- const projectedTotal = ctx.currentTokens + outputTokens;
33
- const compactionLimit = COMPACTION_MODEL_CONTEXT * (responseTruncationThreshold / 100);
35
+ const maxTokensSize = settings?.maxTokensSize ?? 20000; // Default: 20000 tokens
36
+ // Use maxTokensSize directly as it's now in tokens
37
+ const maxAllowedResponseSize = maxTokensSize;
38
+ // Calculate available space in context
39
+ const availableSpace = ctx.maxTokens - ctx.currentTokens;
40
+ // Failsafe: if available space is less than maxTokensSize, use availableSpace - 10%
41
+ const effectiveMaxResponseSize = availableSpace < maxAllowedResponseSize
42
+ ? Math.floor(availableSpace * 0.9)
43
+ : maxAllowedResponseSize;
44
+ // Calculate compaction limit: max response size that can fit in a single LLM compaction call
45
+ const COMPACTION_OVERHEAD = 10000;
46
+ const compactionLimit = Math.floor((COMPACTION_MODEL_CONTEXT - COMPACTION_OVERHEAD) * 0.9); // ~175K tokens
34
47
  logger.info("Tool response compaction hook triggered", {
35
48
  toolCallId,
36
49
  toolName,
37
50
  outputTokens,
38
51
  currentContext: ctx.currentTokens,
39
- maxAllowedTotal,
52
+ maxTokens: ctx.maxTokens,
53
+ maxAllowedResponseSize,
40
54
  availableSpace,
41
- projectedTotal,
55
+ effectiveMaxResponseSize,
42
56
  compactionLimit,
43
57
  settings,
44
58
  });
45
59
  // Case 0: Small response, no action needed
46
- if (projectedTotal < maxAllowedTotal) {
60
+ if (outputTokens <= effectiveMaxResponseSize) {
47
61
  logger.info("Tool response fits within threshold, no compaction needed");
48
62
  return {
49
63
  newContextEntry: null,
@@ -54,93 +68,99 @@ export const toolResponseCompactor = async (ctx) => {
54
68
  },
55
69
  };
56
70
  }
57
- // Response would exceed threshold, need to compact or truncate
58
- // Determine target size: fit within available space, but cap at compactionLimit for truncation
59
- // IMPORTANT: If context is already over threshold, availableSpace will be negative
71
+ // Response would exceed threshold, need to compact or extract
72
+ // Target size is the effectiveMaxResponseSize (what we want the final output to be)
73
+ // IMPORTANT: If context is already very full, availableSpace might be very small
60
74
  // In that case, use a minimum reasonable target size (e.g., 10% of the output or 1000 tokens)
61
75
  const minTargetSize = Math.max(Math.floor(outputTokens * 0.1), 1000);
62
- const targetSize = availableSpace > 0
63
- ? Math.min(availableSpace, compactionLimit)
64
- : minTargetSize;
76
+ const targetSize = effectiveMaxResponseSize > 0 ? effectiveMaxResponseSize : minTargetSize;
65
77
  logger.info("Calculated target size for compaction", {
66
78
  availableSpace,
79
+ effectiveMaxResponseSize,
67
80
  compactionLimit,
68
81
  minTargetSize,
69
82
  targetSize,
70
- contextAlreadyOverThreshold: availableSpace <= 0,
83
+ contextAlreadyOverThreshold: availableSpace <= maxAllowedResponseSize,
71
84
  });
72
- // Case 2: Huge response, must truncate (too large for LLM compaction)
85
+ // Case 2: Huge response - use document context extractor (with truncation fallback)
73
86
  if (outputTokens >= compactionLimit) {
74
- logger.warn("Tool response exceeds compaction capacity, truncating", {
87
+ logger.info("Tool response exceeds compaction capacity, using document context extractor", {
75
88
  outputTokens,
76
89
  compactionLimit,
77
90
  targetSize,
78
91
  availableSpace,
79
92
  });
80
- const truncated = truncateToolResponse(rawOutput, targetSize);
81
- const finalTokens = countToolResultTokens(truncated);
82
- // Verify truncation stayed within boundaries
83
- if (finalTokens > targetSize) {
84
- logger.error("Truncation exceeded target size - this should not happen!", {
85
- finalTokens,
86
- targetSize,
87
- excess: finalTokens - targetSize,
88
- });
89
- // Try more aggressive truncation (70% of target as emergency measure)
90
- const emergencySize = Math.floor(targetSize * 0.7);
91
- const emergencyTruncated = truncateToolResponse(rawOutput, emergencySize);
92
- const emergencyTokens = countToolResultTokens(emergencyTruncated);
93
- // Final safety check - if emergency truncation STILL exceeded target, use ultra-conservative fallback
94
- if (emergencyTokens > targetSize) {
95
- logger.error("Emergency truncation STILL exceeded target - using ultra-conservative fallback", {
96
- emergencyTokens,
97
- targetSize,
98
- emergencySize,
93
+ // Build conversation context for extraction
94
+ const recentMessages = ctx.session.messages.slice(-5);
95
+ const conversationContext = recentMessages
96
+ .map((msg) => {
97
+ const text = msg.content
98
+ .filter((b) => b.type === "text")
99
+ .map((b) => (b.type === "text" ? b.text : ""))
100
+ .join("\n");
101
+ return `${msg.role}: ${text}`;
102
+ })
103
+ .join("\n\n");
104
+ // Try document context extraction
105
+ try {
106
+ const extractionResult = await extractDocumentContext(rawOutput, toolName, toolCallId, toolInput, conversationContext, targetSize, ctx.sessionId ?? "unknown", ctx.storage);
107
+ if (extractionResult.success && extractionResult.extractedData) {
108
+ logger.info("Document context extraction succeeded", {
109
+ originalTokens: outputTokens,
110
+ finalTokens: extractionResult.extractedTokens,
111
+ chunksProcessed: extractionResult.metadata.chunksProcessed,
112
+ chunksExtractedFrom: extractionResult.metadata.chunksExtractedFrom,
99
113
  });
100
- // Ultra-conservative: just return a simple error structure with the raw data sliced to 50% of target
101
- const ultraConservativeSize = Math.floor(targetSize * 0.5);
102
114
  return {
103
115
  newContextEntry: null,
104
116
  metadata: {
105
- action: "truncated",
117
+ action: "compacted",
106
118
  originalTokens: outputTokens,
107
- finalTokens: ultraConservativeSize, // Conservative estimate
108
- modifiedOutput: {
109
- _truncation_error: "Tool response was too large and could not be reliably truncated",
110
- _original_token_count: outputTokens,
111
- _target_token_count: targetSize,
112
- _partial_data: JSON.stringify(rawOutput).slice(0, ultraConservativeSize * 3),
113
- },
114
- truncationWarning: `Tool response was severely truncated from ${outputTokens.toLocaleString()} to ~${ultraConservativeSize.toLocaleString()} tokens (emergency truncation failed - data may be incomplete)`,
119
+ finalTokens: extractionResult.extractedTokens,
120
+ tokensSaved: outputTokens - (extractionResult.extractedTokens ?? 0),
121
+ modifiedOutput: extractionResult.extractedData,
122
+ compactionMethod: "document_context_extraction",
123
+ extractionMetadata: extractionResult.metadata,
115
124
  },
116
125
  };
117
126
  }
118
- return {
119
- newContextEntry: null,
120
- metadata: {
121
- action: "truncated",
122
- originalTokens: outputTokens,
123
- finalTokens: emergencyTokens,
124
- modifiedOutput: emergencyTruncated,
125
- truncationWarning: `Tool response was aggressively truncated from ${outputTokens.toLocaleString()} to ${emergencyTokens.toLocaleString()} tokens to fit within context limit (emergency truncation)`,
126
- },
127
- };
127
+ // Extraction failed - throw error to terminate agent loop
128
+ logger.error("Document context extraction failed", {
129
+ error: extractionResult.error,
130
+ phase: extractionResult.metadata.phase,
131
+ toolName,
132
+ toolCallId,
133
+ outputTokens,
134
+ });
135
+ throw new Error(`Document context extraction failed for tool "${toolName}": ${extractionResult.error}. ` +
136
+ `Original response was ${outputTokens.toLocaleString()} tokens. ` +
137
+ `Full response saved to artifacts.`);
138
+ }
139
+ catch (extractionError) {
140
+ // Re-throw if it's already our error
141
+ if (extractionError instanceof Error &&
142
+ extractionError.message.includes("Document context extraction failed")) {
143
+ throw extractionError;
144
+ }
145
+ // Extraction threw an unexpected error - terminate agent loop
146
+ logger.error("Document context extraction threw an error", {
147
+ error: extractionError instanceof Error
148
+ ? extractionError.message
149
+ : String(extractionError),
150
+ toolName,
151
+ toolCallId,
152
+ outputTokens,
153
+ });
154
+ throw new Error(`Document context extraction failed for tool "${toolName}": ${extractionError instanceof Error
155
+ ? extractionError.message
156
+ : String(extractionError)}. Original response was ${outputTokens.toLocaleString()} tokens.`);
128
157
  }
129
- return {
130
- newContextEntry: null,
131
- metadata: {
132
- action: "truncated",
133
- originalTokens: outputTokens,
134
- finalTokens,
135
- modifiedOutput: truncated,
136
- truncationWarning: `Tool response was truncated from ${outputTokens.toLocaleString()} to ${finalTokens.toLocaleString()} tokens to fit within context limit (available space: ${availableSpace.toLocaleString()} tokens)`,
137
- },
138
- };
139
158
  }
140
159
  // Case 1: Medium response, intelligent compaction
141
160
  logger.info("Tool response requires intelligent compaction", {
142
161
  outputTokens,
143
162
  targetSize,
163
+ effectiveMaxResponseSize,
144
164
  availableSpace,
145
165
  compactionLimit,
146
166
  });
@@ -157,28 +177,19 @@ export const toolResponseCompactor = async (ctx) => {
157
177
  })
158
178
  .join("\n\n");
159
179
  const compacted = await compactWithLLM(rawOutput, toolName, toolInput, conversationContext, targetSize);
160
- let finalTokens = countToolResultTokens(compacted);
180
+ const finalTokens = countToolResultTokens(compacted);
161
181
  // Verify compaction stayed within boundaries
162
182
  if (finalTokens > targetSize) {
163
- logger.warn("LLM compaction exceeded target, falling back to truncation", {
183
+ logger.error("LLM compaction exceeded target", {
164
184
  finalTokens,
165
185
  targetSize,
166
186
  excess: finalTokens - targetSize,
187
+ toolName,
188
+ toolCallId,
167
189
  });
168
- // Fallback to truncation
169
- const truncated = truncateToolResponse(compacted, targetSize);
170
- finalTokens = countToolResultTokens(truncated);
171
- return {
172
- newContextEntry: null,
173
- metadata: {
174
- action: "compacted_then_truncated",
175
- originalTokens: outputTokens,
176
- finalTokens,
177
- tokensSaved: outputTokens - finalTokens,
178
- modifiedOutput: truncated,
179
- truncationWarning: `Tool response was compacted then truncated from ${outputTokens.toLocaleString()} to ${finalTokens.toLocaleString()} tokens to fit within context limit`,
180
- },
181
- };
190
+ throw new Error(`LLM compaction for tool "${toolName}" exceeded target size. ` +
191
+ `Compacted to ${finalTokens.toLocaleString()} tokens but target was ${targetSize.toLocaleString()}. ` +
192
+ `Original response was ${outputTokens.toLocaleString()} tokens.`);
182
193
  }
183
194
  logger.info("Successfully compacted tool response", {
184
195
  originalTokens: outputTokens,
@@ -198,62 +209,13 @@ export const toolResponseCompactor = async (ctx) => {
198
209
  };
199
210
  }
200
211
  catch (error) {
201
- logger.error("Compaction failed, falling back to truncation", {
212
+ logger.error("Compaction failed", {
202
213
  error: error instanceof Error ? error.message : String(error),
214
+ toolName,
215
+ toolCallId,
216
+ outputTokens,
203
217
  });
204
- // Fallback to truncation with the same target size
205
- const truncated = truncateToolResponse(rawOutput, targetSize);
206
- let finalTokens = countToolResultTokens(truncated);
207
- // Verify truncation stayed within boundaries
208
- if (finalTokens > targetSize) {
209
- logger.error("Fallback truncation exceeded target, using emergency truncation", {
210
- finalTokens,
211
- targetSize,
212
- });
213
- const emergencySize = Math.floor(targetSize * 0.7);
214
- const emergencyTruncated = truncateToolResponse(rawOutput, emergencySize);
215
- finalTokens = countToolResultTokens(emergencyTruncated);
216
- // Final safety check
217
- if (finalTokens > targetSize) {
218
- logger.error("Emergency truncation STILL exceeded target - using ultra-conservative fallback");
219
- const ultraConservativeSize = Math.floor(targetSize * 0.5);
220
- return {
221
- newContextEntry: null,
222
- metadata: {
223
- action: "truncated",
224
- originalTokens: outputTokens,
225
- finalTokens: ultraConservativeSize,
226
- modifiedOutput: {
227
- _truncation_error: "Tool response was too large and could not be reliably truncated (compaction failed)",
228
- _original_token_count: outputTokens,
229
- _target_token_count: targetSize,
230
- _partial_data: JSON.stringify(rawOutput).slice(0, ultraConservativeSize * 3),
231
- },
232
- truncationWarning: `Tool response was severely truncated from ${outputTokens.toLocaleString()} to ~${ultraConservativeSize.toLocaleString()} tokens (compaction+emergency truncation failed)`,
233
- },
234
- };
235
- }
236
- return {
237
- newContextEntry: null,
238
- metadata: {
239
- action: "truncated",
240
- originalTokens: outputTokens,
241
- finalTokens,
242
- modifiedOutput: emergencyTruncated,
243
- truncationWarning: `Tool response was truncated from ${outputTokens.toLocaleString()} to ${finalTokens.toLocaleString()} tokens (compaction failed, emergency truncation applied)`,
244
- },
245
- };
246
- }
247
- return {
248
- newContextEntry: null,
249
- metadata: {
250
- action: "truncated",
251
- originalTokens: outputTokens,
252
- finalTokens,
253
- modifiedOutput: truncated,
254
- truncationWarning: `Tool response was truncated from ${outputTokens.toLocaleString()} to ${finalTokens.toLocaleString()} tokens (compaction failed)`,
255
- },
256
- };
218
+ throw new Error(`LLM compaction failed for tool "${toolName}": ${error instanceof Error ? error.message : String(error)}. Original response was ${outputTokens.toLocaleString()} tokens.`);
257
219
  }
258
220
  };
259
221
  /**
@@ -442,86 +404,3 @@ Return ONLY valid JSON (no explanation text).`;
442
404
  });
443
405
  return currentData;
444
406
  }
445
- /**
446
- * Truncate tool response to target token count
447
- * Uses iterative approach to ensure we stay under the target
448
- */
449
- function truncateToolResponse(rawOutput, targetTokens) {
450
- const currentTokens = countToolResultTokens(rawOutput);
451
- if (currentTokens <= targetTokens) {
452
- return rawOutput; // Already within limit
453
- }
454
- const outputString = JSON.stringify(rawOutput);
455
- // Start with 70% of target to leave significant room for closing braces and metadata
456
- let ratio = 0.7;
457
- let lastResult = null;
458
- // Iteratively truncate until we meet the target
459
- for (let attempt = 0; attempt < 15; attempt++) {
460
- // Calculate character limit based on ratio
461
- const targetChars = Math.floor((targetTokens * ratio * outputString.length) / currentTokens);
462
- // Truncate the JSON string
463
- let truncated = outputString.slice(0, targetChars);
464
- // Try to close any open JSON structures
465
- const openBraces = (truncated.match(/{/g) || []).length;
466
- const closeBraces = (truncated.match(/}/g) || []).length;
467
- const openBrackets = (truncated.match(/\[/g) || []).length;
468
- const closeBrackets = (truncated.match(/\]/g) || []).length;
469
- truncated += "}".repeat(Math.max(0, openBraces - closeBraces));
470
- truncated += "]".repeat(Math.max(0, openBrackets - closeBrackets));
471
- try {
472
- // Try to parse as valid JSON
473
- const parsed = JSON.parse(truncated);
474
- const parsedTokens = countToolResultTokens(parsed);
475
- // Store the result
476
- lastResult = { parsed, tokens: parsedTokens };
477
- if (parsedTokens <= targetTokens) {
478
- // Success! Add truncation notice
479
- return {
480
- ...parsed,
481
- _truncation_notice: "... [TRUNCATED - response exceeded size limit]",
482
- _original_token_count: currentTokens,
483
- _truncated_token_count: parsedTokens,
484
- };
485
- }
486
- // Still too large - calculate how much we need to reduce
487
- // If we overshot, reduce ratio proportionally to how much we exceeded
488
- const overshootRatio = parsedTokens / targetTokens; // e.g., 1.03 if we're 3% over
489
- ratio = (ratio / overshootRatio) * 0.95; // Reduce by overshoot amount plus 5% safety margin
490
- logger.debug("Truncation attempt resulted in overshoot, retrying", {
491
- attempt,
492
- targetTokens,
493
- parsedTokens,
494
- overshootRatio,
495
- newRatio: ratio,
496
- });
497
- }
498
- catch {
499
- // JSON parse failed, try more aggressive truncation
500
- ratio *= 0.85;
501
- }
502
- }
503
- // If we exhausted all attempts, return the last successful parse (if any)
504
- // or a very conservative fallback
505
- if (lastResult && lastResult.tokens <= targetTokens * 1.1) {
506
- // Within 10% of target - good enough
507
- logger.warn("Truncation reached attempt limit but result is close enough", {
508
- targetTokens,
509
- actualTokens: lastResult.tokens,
510
- });
511
- return {
512
- ...lastResult.parsed,
513
- _truncation_notice: "... [TRUNCATED - response exceeded size limit]",
514
- _original_token_count: currentTokens,
515
- _truncated_token_count: lastResult.tokens,
516
- };
517
- }
518
- // If all attempts failed, return a simple truncated structure
519
- const safeChars = Math.floor(targetTokens * 3); // Very conservative
520
- return {
521
- truncated: true,
522
- originalSize: currentTokens,
523
- targetSize: targetTokens,
524
- content: outputString.slice(0, safeChars),
525
- warning: "Response was truncated due to size constraints (JSON parsing failed)",
526
- };
527
- }
@@ -1,5 +1,11 @@
1
1
  import type { ContextEntry } from "../../acp-server/session-storage";
2
2
  import type { SessionMessage } from "../agent-runner";
3
+ /**
4
+ * Storage interface for hooks that need to persist data
5
+ */
6
+ export interface HookStorageInterface {
7
+ getArtifactsDir(sessionId: string): string;
8
+ }
3
9
  /**
4
10
  * Hook types supported by the agent system
5
11
  */
@@ -18,18 +24,11 @@ export interface ContextSizeSettings {
18
24
  */
19
25
  export interface ToolResponseSettings {
20
26
  /**
21
- * Maximum % of main model context that tool response + current context can reach
22
- * If adding the tool response would exceed this, compaction is triggered
23
- * Default: 80
24
- */
25
- maxContextThreshold?: number | undefined;
26
- /**
27
- * Maximum % of compaction model context (Haiku: 200k) that a tool response can be
28
- * to attempt LLM-based compaction. Larger responses are truncated instead.
29
- * The truncation limit is also this percentage.
30
- * Default: 30
27
+ * Maximum size of a tool response in tokens.
28
+ * Tool responses larger than this will trigger compaction/extraction.
29
+ * Default: 20000
31
30
  */
32
- responseTruncationThreshold?: number | undefined;
31
+ maxTokensSize?: number | undefined;
33
32
  }
34
33
  /**
35
34
  * Hook configuration in agent definition
@@ -90,6 +89,14 @@ export interface HookContext {
90
89
  * The model being used
91
90
  */
92
91
  model: string;
92
+ /**
93
+ * Session ID for the current session
94
+ */
95
+ sessionId?: string | undefined;
96
+ /**
97
+ * Storage interface for hooks that need to persist data
98
+ */
99
+ storage?: HookStorageInterface | undefined;
93
100
  /**
94
101
  * Tool response data (only for tool_response hooks)
95
102
  */
@@ -141,7 +148,6 @@ export declare function createContextEntry(messages: Array<{
141
148
  toolInputTokens: number;
142
149
  toolResultsTokens: number;
143
150
  totalEstimated: number;
144
- llmReportedInputTokens?: number | undefined;
145
151
  }): ContextEntry;
146
152
  /**
147
153
  * Helper function to create a full message entry for context
@@ -2,7 +2,7 @@ import { mkdir } from "node:fs/promises";
2
2
  import * as path from "node:path";
3
3
  import { MultiServerMCPClient } from "@langchain/mcp-adapters";
4
4
  import { context, propagation, trace } from "@opentelemetry/api";
5
- import { getShedAuth } from "@townco/core/auth";
5
+ import { ensureAuthenticated } from "@townco/core/auth";
6
6
  import { AIMessageChunk, createAgent, ToolMessage, tool, } from "langchain";
7
7
  import { z } from "zod";
8
8
  import { SUBAGENT_MODE_KEY } from "../../acp-server/adapter";
@@ -14,6 +14,7 @@ import { createModelFromString, detectProvider } from "./model-factory.js";
14
14
  import { makeOtelCallbacks } from "./otel-callbacks.js";
15
15
  import { makeArtifactsTools } from "./tools/artifacts";
16
16
  import { makeBrowserTools } from "./tools/browser";
17
+ import { makeDocumentExtractTool } from "./tools/document_extract";
17
18
  import { makeFilesystemTools } from "./tools/filesystem";
18
19
  import { makeGenerateImageTool, makeTownGenerateImageTool, } from "./tools/generate_image";
19
20
  import { SUBAGENT_TOOL_NAME } from "./tools/subagent";
@@ -42,6 +43,7 @@ export const TOOL_REGISTRY = {
42
43
  generate_image: () => makeGenerateImageTool(),
43
44
  town_generate_image: () => makeTownGenerateImageTool(),
44
45
  browser: () => makeBrowserTools(),
46
+ document_extract: () => makeDocumentExtractTool(),
45
47
  };
46
48
  // ============================================================================
47
49
  // Custom tool loading
@@ -356,7 +358,8 @@ export class LangchainAgent {
356
358
  // MCP tools - calculate overhead separately
357
359
  let mcpOverheadTokens = 0;
358
360
  if ((this.definition.mcps?.length ?? 0) > 0) {
359
- const mcpTools = await makeMcpToolsClient(this.definition.mcps).getTools();
361
+ const client = await makeMcpToolsClient(this.definition.mcps);
362
+ const mcpTools = await client.getTools();
360
363
  const mcpToolMetadata = mcpTools.map(extractToolMetadata);
361
364
  mcpOverheadTokens = estimateAllToolsOverhead(mcpToolMetadata);
362
365
  enabledTools.push(...mcpTools);
@@ -447,9 +450,25 @@ export class LangchainAgent {
447
450
  reduction: `${((1 - compactedTokens / outputTokens) * 100).toFixed(1)}%`,
448
451
  totalCumulativeTokens: cumulativeToolOutputTokens,
449
452
  });
450
- return typeof result === "string"
451
- ? modifiedOutput.content
452
- : JSON.stringify(modifiedOutput);
453
+ // Include compaction metadata in the output for the adapter to extract
454
+ // Also include original content so adapter can store it
455
+ const originalContentStr = typeof rawOutput === "object" &&
456
+ rawOutput !== null &&
457
+ "content" in rawOutput
458
+ ? String(rawOutput.content)
459
+ : JSON.stringify(rawOutput);
460
+ const outputWithMeta = {
461
+ ...modifiedOutput,
462
+ _compactionMeta: {
463
+ action: hookResult.metadata.action,
464
+ originalTokens: hookResult.metadata.originalTokens,
465
+ finalTokens: hookResult.metadata.finalTokens,
466
+ tokensSaved: hookResult.metadata.tokensSaved,
467
+ originalContent: originalContentStr,
468
+ },
469
+ };
470
+ // Always return JSON string to preserve metadata
471
+ return JSON.stringify(outputWithMeta);
453
472
  }
454
473
  // No compaction happened, count original size
455
474
  cumulativeToolOutputTokens += outputTokens;
@@ -1037,6 +1056,40 @@ export class LangchainAgent {
1037
1056
  _meta: { messageId: req.messageId },
1038
1057
  });
1039
1058
  // Buffer tool output separately
1059
+ // Check if the content contains compaction metadata and extract it
1060
+ let rawOutput = {
1061
+ content: aiMessage.content,
1062
+ };
1063
+ let compactionMeta;
1064
+ try {
1065
+ const parsed = JSON.parse(aiMessage.content);
1066
+ if (typeof parsed === "object" &&
1067
+ parsed !== null &&
1068
+ "_compactionMeta" in parsed) {
1069
+ // Extract compaction metadata to top level of rawOutput
1070
+ const { _compactionMeta, ...contentWithoutMeta } = parsed;
1071
+ compactionMeta = _compactionMeta;
1072
+ rawOutput = {
1073
+ content: JSON.stringify(contentWithoutMeta),
1074
+ _compactionMeta,
1075
+ };
1076
+ }
1077
+ }
1078
+ catch {
1079
+ // Not valid JSON, use original content
1080
+ }
1081
+ // For content display, use cleaned version if compaction occurred
1082
+ let displayContent = aiMessage.content;
1083
+ if (compactionMeta) {
1084
+ try {
1085
+ const parsed = JSON.parse(aiMessage.content);
1086
+ const { _compactionMeta: _, ...cleanParsed } = parsed;
1087
+ displayContent = JSON.stringify(cleanParsed);
1088
+ }
1089
+ catch {
1090
+ // Keep original if parsing fails
1091
+ }
1092
+ }
1040
1093
  pendingToolCallNotifications.push({
1041
1094
  sessionUpdate: "tool_output",
1042
1095
  toolCallId: aiMessage.tool_call_id,
@@ -1045,11 +1098,11 @@ export class LangchainAgent {
1045
1098
  type: "content",
1046
1099
  content: {
1047
1100
  type: "text",
1048
- text: aiMessage.content,
1101
+ text: displayContent,
1049
1102
  },
1050
1103
  },
1051
1104
  ],
1052
- rawOutput: { content: aiMessage.content },
1105
+ rawOutput,
1053
1106
  _meta: { messageId: req.messageId },
1054
1107
  });
1055
1108
  // Flush tool outputs after buffering
@@ -1119,11 +1172,11 @@ const modelRequestSchema = z.object({
1119
1172
  messages: z.array(z.any()),
1120
1173
  }),
1121
1174
  });
1122
- const makeMcpToolsClient = (mcpConfigs) => {
1123
- const mcpServers = mcpConfigs?.map((config) => {
1175
+ const makeMcpToolsClient = async (mcpConfigs) => {
1176
+ const mcpServers = await Promise.all((mcpConfigs ?? []).map(async (config) => {
1124
1177
  if (typeof config === "string") {
1125
1178
  // String configs use the centralized MCP proxy with auth
1126
- const shedAuth = getShedAuth();
1179
+ const shedAuth = await ensureAuthenticated();
1127
1180
  if (!shedAuth) {
1128
1181
  throw new Error("Not logged in. Run 'town login' or set SHED_API_KEY to use cloud MCP servers.");
1129
1182
  }
@@ -1155,7 +1208,7 @@ const makeMcpToolsClient = (mcpConfigs) => {
1155
1208
  args: config.args ?? [],
1156
1209
  },
1157
1210
  ];
1158
- });
1211
+ }));
1159
1212
  const client = new MultiServerMCPClient({
1160
1213
  // Global tool configuration options
1161
1214
  // Whether to throw on errors if a tool fails to load (optional, default: true)