@townco/agent 0.1.121 → 0.1.123

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  import { EventEmitter } from "node:events";
2
- import { mkdir } from "node:fs/promises";
2
+ import { mkdir, writeFile } from "node:fs/promises";
3
3
  import * as path from "node:path";
4
4
  import { MultiServerMCPClient } from "@langchain/mcp-adapters";
5
5
  import { context, propagation, trace } from "@opentelemetry/api";
@@ -10,9 +10,11 @@ import { ContextOverflowError, SUBAGENT_MODE_KEY, } from "../../acp-server/adapt
10
10
  import { createLogger } from "../../logger.js";
11
11
  import { telemetry } from "../../telemetry/index.js";
12
12
  import { calculateContextSize } from "../../utils/context-size-calculator.js";
13
+ import { countToolResultTokens } from "../../utils/token-counter.js";
13
14
  import { getModelContextWindow } from "../hooks/constants.js";
15
+ import { HookExecutor, loadHookCallback, } from "../hooks/index.js";
14
16
  import { isContextOverflowError } from "../hooks/predefined/context-validator.js";
15
- import { bindGeneratorToAbortSignal, bindGeneratorToEmitUpdate, bindGeneratorToInvocationContext, bindGeneratorToSessionContext, getAbortSignal, getInvocationContext, runWithAbortSignal, } from "../session-context";
17
+ import { bindGeneratorToAbortSignal, bindGeneratorToEmitUpdate, bindGeneratorToInvocationContext, bindGeneratorToSessionContext, getAbortSignal, getInvocationContext, getSessionContext, runWithAbortSignal, } from "../session-context";
16
18
  import { loadCustomToolModule, } from "../tool-loader.js";
17
19
  import { createModelFromString, detectProvider } from "./model-factory.js";
18
20
  import { makeOtelCallbacks } from "./otel-callbacks.js";
@@ -35,6 +37,33 @@ const getWeather = tool(({ city }) => `It's always sunny in ${city}!`, {
35
37
  getWeather.prettyName = "Get Weather";
36
38
  // biome-ignore lint/suspicious/noExplicitAny: Need to add custom properties to LangChain tool
37
39
  getWeather.icon = "Cloud";
40
+ function isPlainRecord(v) {
41
+ return !!v && typeof v === "object" && !Array.isArray(v);
42
+ }
43
+ function stableStringify(value) {
44
+ const seen = new WeakSet();
45
+ const _stringify = (v) => {
46
+ if (v === null)
47
+ return null;
48
+ if (typeof v !== "object")
49
+ return v;
50
+ if (Array.isArray(v))
51
+ return v.map(_stringify);
52
+ const obj = v;
53
+ if (seen.has(obj))
54
+ return "[Circular]";
55
+ seen.add(obj);
56
+ const out = {};
57
+ for (const k of Object.keys(obj).sort()) {
58
+ out[k] = _stringify(obj[k]);
59
+ }
60
+ return out;
61
+ };
62
+ return JSON.stringify(_stringify(value));
63
+ }
64
+ function toolCallKey(toolName, args) {
65
+ return `${toolName}:${stableStringify(args)}`;
66
+ }
38
67
  export const TOOL_REGISTRY = {
39
68
  todo_write: () => makeTodoWriteTool(), // Factory function to create fresh instance per invocation
40
69
  get_weather: getWeather, // TODO: Convert to factory function for full concurrency safety
@@ -225,11 +254,25 @@ export class LangchainAgent {
225
254
  // Create OTEL callbacks for instrumentation early so we can use them during tool wrapping
226
255
  // Track iteration index across LLM calls in this invocation
227
256
  const iterationIndexRef = { current: 0 };
257
+ // Track actual token usage from API responses for validation
258
+ let lastActualTokenUsage = null;
228
259
  otelCallbacks = makeOtelCallbacks({
229
260
  provider,
230
261
  model: effectiveModel,
231
262
  parentContext: invocationContext,
232
263
  iterationIndexRef,
264
+ emitTokenUsage: (data) => {
265
+ // Store actual token usage from API response
266
+ lastActualTokenUsage = {
267
+ inputTokens: data.inputTokens,
268
+ outputTokens: data.outputTokens,
269
+ };
270
+ _logger.info("Actual token usage from API", {
271
+ inputTokens: data.inputTokens,
272
+ outputTokens: data.outputTokens,
273
+ totalTokens: data.inputTokens + data.outputTokens,
274
+ });
275
+ },
233
276
  });
234
277
  // Track todo_write tool call IDs to suppress their tool_call notifications
235
278
  const todoWriteToolCallIds = new Set();
@@ -332,13 +375,13 @@ export class LangchainAgent {
332
375
  const { extractToolMetadata, estimateAllToolsOverhead } = await import("../../utils/tool-overhead-calculator.js");
333
376
  // Calculate overhead for non-MCP tools (built-in, custom, filesystem)
334
377
  const nonMcpToolMetadata = enabledTools.map(extractToolMetadata);
335
- const nonMcpToolDefinitionsTokens = estimateAllToolsOverhead(nonMcpToolMetadata);
378
+ const nonMcpToolDefinitionsTokens = await estimateAllToolsOverhead(nonMcpToolMetadata);
336
379
  // Calculate TODO_WRITE_INSTRUCTIONS overhead if applicable
337
380
  // Skip for subagents since the todo_write tool is filtered out for them
338
381
  const isSubagentForTokens = req.sessionMeta?.[SUBAGENT_MODE_KEY] === true;
339
382
  const hasTodoWriteTool = builtInNames.includes("todo_write") && !isSubagentForTokens;
340
383
  const todoInstructionsTokens = hasTodoWriteTool
341
- ? countTokens(TODO_WRITE_INSTRUCTIONS)
384
+ ? await countTokens(TODO_WRITE_INSTRUCTIONS)
342
385
  : 0;
343
386
  // Total non-MCP tool overhead: tool definitions + TODO instructions
344
387
  const toolOverheadTokens = nonMcpToolDefinitionsTokens + todoInstructionsTokens;
@@ -347,8 +390,20 @@ export class LangchainAgent {
347
390
  if ((this.definition.mcps?.length ?? 0) > 0) {
348
391
  const client = await makeMcpToolsClient(this.definition.mcps);
349
392
  const mcpTools = await client.getTools();
393
+ _logger.info("MCP tools loaded", {
394
+ mcpCount: this.definition.mcps?.length ?? 0,
395
+ toolCount: mcpTools.length,
396
+ toolNames: mcpTools.map((t) => t.name),
397
+ });
350
398
  const mcpToolMetadata = mcpTools.map(extractToolMetadata);
351
- mcpOverheadTokens = estimateAllToolsOverhead(mcpToolMetadata);
399
+ mcpOverheadTokens = await estimateAllToolsOverhead(mcpToolMetadata);
400
+ _logger.warn("MCP tool overhead calculated", {
401
+ mcpToolCount: mcpTools.length,
402
+ estimatedTokens: mcpOverheadTokens,
403
+ avgTokensPerTool: mcpTools.length > 0
404
+ ? Math.round(mcpOverheadTokens / mcpTools.length)
405
+ : 0,
406
+ });
352
407
  enabledTools.push(...mcpTools);
353
408
  }
354
409
  _logger.debug("Calculated tool overhead for context sizing", {
@@ -368,7 +423,7 @@ export class LangchainAgent {
368
423
  // Calculate accurate context size for tool response compaction decisions
369
424
  // This includes: system prompt, tool overhead, MCP overhead, and message history
370
425
  const baseSystemPromptTokens = this.definition.systemPrompt
371
- ? countTokens(this.definition.systemPrompt)
426
+ ? await countTokens(this.definition.systemPrompt)
372
427
  : 0;
373
428
  // Estimate additional injection tokens based on enabled features
374
429
  // These will be injected into the system prompt later
@@ -389,7 +444,7 @@ export class LangchainAgent {
389
444
  todoInstructionsTokens +
390
445
  injectionOverheadEstimate;
391
446
  // Calculate message history tokens from context messages
392
- const messageHistoryContext = calculateContextSize(req.contextMessages || [], undefined, // Don't double count system prompt
447
+ const messageHistoryContext = await calculateContextSize(req.contextMessages || [], undefined, // Don't double count system prompt
393
448
  0, // Don't double count tool overhead
394
449
  0);
395
450
  const messageHistoryTokens = messageHistoryContext.userMessagesTokens +
@@ -415,6 +470,101 @@ export class LangchainAgent {
415
470
  // Hook execution removed from tool wrapper - hooks are now executed only at the adapter layer
416
471
  // The adapter has proper MidTurnRestartError handling that can restart the turn
417
472
  // Executing hooks here in the runner was causing restart signals to be caught as tool failures
473
+ // In-flight tool-response compaction (updates what the LLM sees in this same turn).
474
+ // We run ONLY the tool_response_compactor callback here, and we attach metadata separately
475
+ // to tool_output notifications for persistence/UI, without polluting the model-visible output.
476
+ const hooks = this.definition.hooks ?? [];
477
+ const inflightToolResponseHooks = hooks
478
+ .filter((h) => h.type === "tool_response")
479
+ .map((h) => {
480
+ // Keep only tool_response_compactor callbacks to avoid mid-turn restart logic here.
481
+ if (h.callbacks && h.callbacks.length > 0) {
482
+ const filtered = h.callbacks.filter((c) => c.name === "tool_response_compactor");
483
+ return filtered.length > 0 ? { ...h, callbacks: filtered } : null;
484
+ }
485
+ if (h.callback === "tool_response_compactor") {
486
+ return h;
487
+ }
488
+ return null;
489
+ })
490
+ .filter((h) => h !== null);
491
+ const hasInflightToolCompaction = inflightToolResponseHooks.length > 0;
492
+ const inflightContextTokensRef = { current: baseContextTokens };
493
+ const toolCallIdQueuesByKey = new Map();
494
+ const toolCallIdWaitersByKey = new Map();
495
+ const inflightCompactionMetaByToolCallId = new Map();
496
+ const registerToolCallId = (toolName, args, id) => {
497
+ const key = toolCallKey(toolName, args);
498
+ const waiters = toolCallIdWaitersByKey.get(key);
499
+ if (waiters && waiters.length > 0) {
500
+ const resolve = waiters.shift();
501
+ if (resolve) {
502
+ resolve(id);
503
+ return;
504
+ }
505
+ }
506
+ const q = toolCallIdQueuesByKey.get(key) ?? [];
507
+ q.push(id);
508
+ toolCallIdQueuesByKey.set(key, q);
509
+ };
510
+ const consumeToolCallId = async (toolName, args) => {
511
+ const key = toolCallKey(toolName, args);
512
+ const q = toolCallIdQueuesByKey.get(key);
513
+ if (q && q.length > 0) {
514
+ const id = q.shift();
515
+ if (id)
516
+ return id;
517
+ }
518
+ // Wait briefly for the updates stream to register the id.
519
+ return await new Promise((resolve) => {
520
+ const waiters = toolCallIdWaitersByKey.get(key) ?? [];
521
+ waiters.push(resolve);
522
+ toolCallIdWaitersByKey.set(key, waiters);
523
+ setTimeout(() => {
524
+ // Fallback if never registered (should be rare)
525
+ const fallback = `unknown_${Date.now()}_${Math.random()
526
+ .toString(36)
527
+ .slice(2, 8)}`;
528
+ // Remove this resolver from the waiters list if still present
529
+ const remaining = toolCallIdWaitersByKey.get(key) ?? [];
530
+ const idx = remaining.indexOf(resolve);
531
+ if (idx >= 0)
532
+ remaining.splice(idx, 1);
533
+ if (remaining.length === 0) {
534
+ toolCallIdWaitersByKey.delete(key);
535
+ }
536
+ else {
537
+ toolCallIdWaitersByKey.set(key, remaining);
538
+ }
539
+ resolve(fallback);
540
+ }, 1500);
541
+ });
542
+ };
543
+ const saveToolOriginalToArtifacts = async (sessionId, toolName, toolCallId, content) => {
544
+ try {
545
+ const { artifactsDir } = getSessionContext();
546
+ const toolDir = path.join(artifactsDir, `tool-${toolName}`);
547
+ await mkdir(toolDir, { recursive: true });
548
+ const filePath = path.join(toolDir, `${toolCallId}.original.txt`);
549
+ await writeFile(filePath, content, "utf-8");
550
+ // Match SessionStorage.saveToolOriginal relative path format:
551
+ return `${sessionId}/artifacts/tool-${toolName}/${toolCallId}.original.txt`;
552
+ }
553
+ catch (e) {
554
+ _logger.warn("Failed to save original tool output in runner", {
555
+ toolName,
556
+ toolCallId,
557
+ error: e instanceof Error ? e.message : String(e),
558
+ });
559
+ return null;
560
+ }
561
+ };
562
+ const inflightHookExecutor = hasInflightToolCompaction
563
+ ? new HookExecutor(inflightToolResponseHooks, this.definition.model, (callbackRef) => loadHookCallback(callbackRef, req.agentDir), undefined, // no streaming notifications from runner
564
+ this.definition, {
565
+ getArtifactsDir: (_sid) => getSessionContext().artifactsDir,
566
+ }, req.sessionId, req.agentDir)
567
+ : null;
418
568
  // Counter for subagent calls - used to create unique source ID ranges
419
569
  // Each subagent call gets a unique offset (1000, 2000, 3000, etc.)
420
570
  // to ensure sources never conflict with parent's sources (typically < 100)
@@ -423,9 +573,118 @@ export class LangchainAgent {
423
573
  // All hook execution (compaction, restart logic) happens at the adapter layer
424
574
  const wrappedTools = enabledTools.map((originalTool) => {
425
575
  const wrappedFunc = async (input) => {
426
- // Execute the original tool and return raw result
576
+ // Execute the original tool and return result.
577
+ // If configured, compact large tool output *before* returning it to LangChain,
578
+ // so the next LLM step sees the compacted output in the same in-flight turn.
579
+ const toolCallId = hasInflightToolCompaction
580
+ ? await consumeToolCallId(originalTool.name, input)
581
+ : `unknown_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
427
582
  const result = await originalTool.invoke(input);
428
- return result;
583
+ if (!inflightHookExecutor || !hasInflightToolCompaction) {
584
+ return result;
585
+ }
586
+ // Build hook input in the same shape the adapter uses.
587
+ const toolInput = isPlainRecord(input) ? input : {};
588
+ const rawOutput = isPlainRecord(result)
589
+ ? result
590
+ : typeof result === "string"
591
+ ? { content: result }
592
+ : { content: JSON.stringify(result) };
593
+ const outputTokens = await countToolResultTokens(rawOutput);
594
+ // Include current prompt as the last user message for better context.
595
+ const nowIso = new Date().toISOString();
596
+ const promptBlocksForHooks = req.prompt.map((block) => {
597
+ if (block.type === "text" && typeof block.text === "string") {
598
+ return { type: "text", text: block.text };
599
+ }
600
+ if (block.type === "image") {
601
+ const imageBlock = { type: "image" };
602
+ if (block.source)
603
+ imageBlock.source = block.source;
604
+ if (block.url)
605
+ imageBlock.url = block.url;
606
+ if (block.data)
607
+ imageBlock.data = block.data;
608
+ if (block.mimeType)
609
+ imageBlock.mimeType = block.mimeType;
610
+ return imageBlock;
611
+ }
612
+ return {
613
+ type: "text",
614
+ text: JSON.stringify(block),
615
+ };
616
+ });
617
+ const messagesForHooks = [
618
+ ...(req.contextMessages ?? []),
619
+ {
620
+ role: "user",
621
+ content: promptBlocksForHooks,
622
+ timestamp: nowIso,
623
+ },
624
+ ];
625
+ const hookResult = await inflightHookExecutor.executeToolResponseHooks({
626
+ messages: messagesForHooks,
627
+ context: [],
628
+ requestParams: {},
629
+ }, inflightContextTokensRef.current, {
630
+ toolCallId,
631
+ toolName: originalTool.name,
632
+ toolInput,
633
+ rawOutput,
634
+ outputTokens,
635
+ });
636
+ // Update inflight context token estimate for subsequent tool calls.
637
+ const finalOutput = hookResult.modifiedOutput ?? rawOutput;
638
+ const finalTokens = hookResult.metadata?.finalTokens;
639
+ const finalOutputTokens = finalTokens ?? (await countToolResultTokens(finalOutput));
640
+ inflightContextTokensRef.current += finalOutputTokens;
641
+ // If compaction happened, persist original output to artifacts and store meta
642
+ // for the adapter/UI (without polluting model-visible tool output).
643
+ const action = hookResult.metadata?.action;
644
+ const originalTokens = hookResult.metadata?.originalTokens;
645
+ const finalTokensMeta = hookResult.metadata?.finalTokens;
646
+ const didActuallyCompact = action &&
647
+ action !== "none" &&
648
+ action !== "no_action_needed" &&
649
+ originalTokens !== undefined &&
650
+ finalTokensMeta !== undefined &&
651
+ finalTokensMeta < originalTokens;
652
+ if (didActuallyCompact && req.sessionId) {
653
+ const originalContentStr = typeof rawOutput.content === "string"
654
+ ? rawOutput.content
655
+ : JSON.stringify(rawOutput);
656
+ const preview = originalContentStr.slice(0, 2000);
657
+ const originalContentPath = await saveToolOriginalToArtifacts(req.sessionId, originalTool.name, toolCallId, originalContentStr);
658
+ const meta = {
659
+ action: action ?? "compacted",
660
+ originalTokens,
661
+ finalTokens: finalTokensMeta,
662
+ originalContentPreview: preview,
663
+ };
664
+ const tokensSaved = hookResult.metadata?.tokensSaved ??
665
+ (originalTokens !== undefined && finalTokensMeta !== undefined
666
+ ? originalTokens - finalTokensMeta
667
+ : undefined);
668
+ if (typeof tokensSaved === "number") {
669
+ meta.tokensSaved = tokensSaved;
670
+ }
671
+ const compactionMethod = hookResult.metadata?.compactionMethod;
672
+ if (typeof compactionMethod === "string") {
673
+ meta.compactionMethod = compactionMethod;
674
+ }
675
+ if (typeof originalContentPath === "string") {
676
+ meta.originalContentPath = originalContentPath;
677
+ }
678
+ inflightCompactionMetaByToolCallId.set(toolCallId, meta);
679
+ }
680
+ // Return compacted output to LangChain (model-visible), without metadata.
681
+ if (typeof result === "string") {
682
+ if (typeof finalOutput.content === "string") {
683
+ return finalOutput.content;
684
+ }
685
+ return JSON.stringify(finalOutput);
686
+ }
687
+ return finalOutput;
429
688
  };
430
689
  // Create new tool with wrapped function
431
690
  // biome-ignore lint/suspicious/noExplicitAny: Need to pass function with dynamic signature
@@ -595,6 +854,20 @@ export class LangchainAgent {
595
854
  }
596
855
  }
597
856
  }
857
+ // Calculate final system prompt tokens after ALL modifications/injections
858
+ const finalSystemPromptTokens = agentConfig.systemPrompt
859
+ ? await countTokens(agentConfig.systemPrompt)
860
+ : 0;
861
+ _logger.debug("Final system prompt tokens after all injections", {
862
+ finalSystemPromptTokens,
863
+ baseSystemPromptTokens: baseSystemPromptTokens,
864
+ injectionOverhead: finalSystemPromptTokens - baseSystemPromptTokens,
865
+ });
866
+ // Yield system prompt overhead to adapter for accurate context tracking
867
+ yield {
868
+ sessionUpdate: "system_prompt_overhead",
869
+ systemPromptTokens: finalSystemPromptTokens,
870
+ };
598
871
  const agent = createAgent(agentConfig);
599
872
  // Build messages from context history if available, otherwise use just the prompt
600
873
  // Type includes tool messages for sending tool results
@@ -859,6 +1132,9 @@ export class LangchainAgent {
859
1132
  if (toolCall.id == null) {
860
1133
  throw new Error(`Tool call is missing id: ${JSON.stringify(toolCall)}`);
861
1134
  }
1135
+ // Register toolCall id so the tool wrapper can associate it with the invocation.
1136
+ // This enables in-flight tool-output compaction keyed by real tool_call_id.
1137
+ registerToolCallId(toolCall.name, toolCall.args, toolCall.id);
862
1138
  telemetry.log("info", `Tool call started: ${toolCall.name}`, {
863
1139
  toolCallId: toolCall.id,
864
1140
  toolName: toolCall.name,
@@ -1154,6 +1430,13 @@ export class LangchainAgent {
1154
1430
  catch {
1155
1431
  // Not valid JSON, use original content
1156
1432
  }
1433
+ // If we compacted the tool result in the wrapper, attach meta here for the adapter/UI.
1434
+ // This does NOT affect what LangChain/LLM saw (it already received the compacted output).
1435
+ const inflightMeta = inflightCompactionMetaByToolCallId.get(aiMessage.tool_call_id);
1436
+ if (inflightMeta) {
1437
+ compactionMeta = { ...(compactionMeta ?? {}), ...inflightMeta };
1438
+ rawOutput = { ...rawOutput, _compactionMeta: compactionMeta };
1439
+ }
1157
1440
  // For content display, use cleaned version if compaction occurred
1158
1441
  let displayContent = aiMessage.content;
1159
1442
  if (compactionMeta) {
@@ -1235,6 +1518,15 @@ export class LangchainAgent {
1235
1518
  sessionId: req.sessionId,
1236
1519
  });
1237
1520
  telemetry.endSpan(invocationSpan);
1521
+ // Yield actual token usage from API for comparison with estimates
1522
+ if (lastActualTokenUsage !== null) {
1523
+ const actualUsage = lastActualTokenUsage;
1524
+ yield {
1525
+ sessionUpdate: "actual_token_usage",
1526
+ inputTokens: actualUsage.inputTokens,
1527
+ outputTokens: actualUsage.outputTokens,
1528
+ };
1529
+ }
1238
1530
  return {
1239
1531
  stopReason: "end_turn",
1240
1532
  _meta: {
@@ -7,6 +7,11 @@ export interface OtelCallbackOptions {
7
7
  iterationIndexRef: {
8
8
  current: number;
9
9
  };
10
+ emitTokenUsage?: (data: {
11
+ sessionUpdate: "actual_token_usage";
12
+ inputTokens: number;
13
+ outputTokens: number;
14
+ }) => void;
10
15
  }
11
16
  /**
12
17
  * Creates OpenTelemetry callback handlers for LangChain LLM calls.
@@ -188,6 +188,14 @@ export function makeOtelCallbacks(opts) {
188
188
  ? tokenUsage.totalTokens - inputTokens
189
189
  : 0);
190
190
  telemetry.recordTokenUsage(inputTokens, outputTokens, chatSpan);
191
+ // Emit token usage to adapter for validation
192
+ if (opts.emitTokenUsage) {
193
+ opts.emitTokenUsage({
194
+ sessionUpdate: "actual_token_usage",
195
+ inputTokens,
196
+ outputTokens,
197
+ });
198
+ }
191
199
  }
192
200
  // Serialize output and attach to span
193
201
  const serializedOutput = serializeOutput(output);
@@ -37,7 +37,7 @@ const documentExtract = tool(async ({ session_id, file_path, query, target_token
37
37
  parsedContent = { content };
38
38
  }
39
39
  // Count tokens in the document
40
- const documentTokens = countTokens(content);
40
+ const documentTokens = await countTokens(content);
41
41
  logger.info("Document extraction requested", {
42
42
  filePath: file_path,
43
43
  documentTokens,