@townco/agent 0.1.121 → 0.1.123
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/acp-server/adapter.d.ts +1 -0
- package/dist/acp-server/adapter.js +133 -11
- package/dist/runner/agent-runner.d.ts +7 -0
- package/dist/runner/hooks/executor.js +1 -1
- package/dist/runner/hooks/predefined/context-validator.d.ts +1 -1
- package/dist/runner/hooks/predefined/context-validator.js +2 -2
- package/dist/runner/hooks/predefined/document-context-extractor/chunk-manager.d.ts +1 -1
- package/dist/runner/hooks/predefined/document-context-extractor/chunk-manager.js +3 -3
- package/dist/runner/hooks/predefined/document-context-extractor/content-extractor.js +2 -2
- package/dist/runner/hooks/predefined/document-context-extractor/index.js +5 -5
- package/dist/runner/hooks/predefined/document-context-extractor/relevance-scorer.js +2 -2
- package/dist/runner/hooks/predefined/tool-response-compactor.js +9 -9
- package/dist/runner/langchain/index.js +301 -9
- package/dist/runner/langchain/otel-callbacks.d.ts +5 -0
- package/dist/runner/langchain/otel-callbacks.js +8 -0
- package/dist/runner/langchain/tools/document_extract.js +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/dist/utils/context-size-calculator.d.ts +1 -1
- package/dist/utils/context-size-calculator.js +9 -14
- package/dist/utils/token-counter.d.ts +9 -7
- package/dist/utils/token-counter.js +30 -11
- package/dist/utils/tool-overhead-calculator.d.ts +2 -2
- package/dist/utils/tool-overhead-calculator.js +5 -4
- package/package.json +8 -7
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { EventEmitter } from "node:events";
|
|
2
|
-
import { mkdir } from "node:fs/promises";
|
|
2
|
+
import { mkdir, writeFile } from "node:fs/promises";
|
|
3
3
|
import * as path from "node:path";
|
|
4
4
|
import { MultiServerMCPClient } from "@langchain/mcp-adapters";
|
|
5
5
|
import { context, propagation, trace } from "@opentelemetry/api";
|
|
@@ -10,9 +10,11 @@ import { ContextOverflowError, SUBAGENT_MODE_KEY, } from "../../acp-server/adapt
|
|
|
10
10
|
import { createLogger } from "../../logger.js";
|
|
11
11
|
import { telemetry } from "../../telemetry/index.js";
|
|
12
12
|
import { calculateContextSize } from "../../utils/context-size-calculator.js";
|
|
13
|
+
import { countToolResultTokens } from "../../utils/token-counter.js";
|
|
13
14
|
import { getModelContextWindow } from "../hooks/constants.js";
|
|
15
|
+
import { HookExecutor, loadHookCallback, } from "../hooks/index.js";
|
|
14
16
|
import { isContextOverflowError } from "../hooks/predefined/context-validator.js";
|
|
15
|
-
import { bindGeneratorToAbortSignal, bindGeneratorToEmitUpdate, bindGeneratorToInvocationContext, bindGeneratorToSessionContext, getAbortSignal, getInvocationContext, runWithAbortSignal, } from "../session-context";
|
|
17
|
+
import { bindGeneratorToAbortSignal, bindGeneratorToEmitUpdate, bindGeneratorToInvocationContext, bindGeneratorToSessionContext, getAbortSignal, getInvocationContext, getSessionContext, runWithAbortSignal, } from "../session-context";
|
|
16
18
|
import { loadCustomToolModule, } from "../tool-loader.js";
|
|
17
19
|
import { createModelFromString, detectProvider } from "./model-factory.js";
|
|
18
20
|
import { makeOtelCallbacks } from "./otel-callbacks.js";
|
|
@@ -35,6 +37,33 @@ const getWeather = tool(({ city }) => `It's always sunny in ${city}!`, {
|
|
|
35
37
|
getWeather.prettyName = "Get Weather";
|
|
36
38
|
// biome-ignore lint/suspicious/noExplicitAny: Need to add custom properties to LangChain tool
|
|
37
39
|
getWeather.icon = "Cloud";
|
|
40
|
+
function isPlainRecord(v) {
|
|
41
|
+
return !!v && typeof v === "object" && !Array.isArray(v);
|
|
42
|
+
}
|
|
43
|
+
function stableStringify(value) {
|
|
44
|
+
const seen = new WeakSet();
|
|
45
|
+
const _stringify = (v) => {
|
|
46
|
+
if (v === null)
|
|
47
|
+
return null;
|
|
48
|
+
if (typeof v !== "object")
|
|
49
|
+
return v;
|
|
50
|
+
if (Array.isArray(v))
|
|
51
|
+
return v.map(_stringify);
|
|
52
|
+
const obj = v;
|
|
53
|
+
if (seen.has(obj))
|
|
54
|
+
return "[Circular]";
|
|
55
|
+
seen.add(obj);
|
|
56
|
+
const out = {};
|
|
57
|
+
for (const k of Object.keys(obj).sort()) {
|
|
58
|
+
out[k] = _stringify(obj[k]);
|
|
59
|
+
}
|
|
60
|
+
return out;
|
|
61
|
+
};
|
|
62
|
+
return JSON.stringify(_stringify(value));
|
|
63
|
+
}
|
|
64
|
+
function toolCallKey(toolName, args) {
|
|
65
|
+
return `${toolName}:${stableStringify(args)}`;
|
|
66
|
+
}
|
|
38
67
|
export const TOOL_REGISTRY = {
|
|
39
68
|
todo_write: () => makeTodoWriteTool(), // Factory function to create fresh instance per invocation
|
|
40
69
|
get_weather: getWeather, // TODO: Convert to factory function for full concurrency safety
|
|
@@ -225,11 +254,25 @@ export class LangchainAgent {
|
|
|
225
254
|
// Create OTEL callbacks for instrumentation early so we can use them during tool wrapping
|
|
226
255
|
// Track iteration index across LLM calls in this invocation
|
|
227
256
|
const iterationIndexRef = { current: 0 };
|
|
257
|
+
// Track actual token usage from API responses for validation
|
|
258
|
+
let lastActualTokenUsage = null;
|
|
228
259
|
otelCallbacks = makeOtelCallbacks({
|
|
229
260
|
provider,
|
|
230
261
|
model: effectiveModel,
|
|
231
262
|
parentContext: invocationContext,
|
|
232
263
|
iterationIndexRef,
|
|
264
|
+
emitTokenUsage: (data) => {
|
|
265
|
+
// Store actual token usage from API response
|
|
266
|
+
lastActualTokenUsage = {
|
|
267
|
+
inputTokens: data.inputTokens,
|
|
268
|
+
outputTokens: data.outputTokens,
|
|
269
|
+
};
|
|
270
|
+
_logger.info("Actual token usage from API", {
|
|
271
|
+
inputTokens: data.inputTokens,
|
|
272
|
+
outputTokens: data.outputTokens,
|
|
273
|
+
totalTokens: data.inputTokens + data.outputTokens,
|
|
274
|
+
});
|
|
275
|
+
},
|
|
233
276
|
});
|
|
234
277
|
// Track todo_write tool call IDs to suppress their tool_call notifications
|
|
235
278
|
const todoWriteToolCallIds = new Set();
|
|
@@ -332,13 +375,13 @@ export class LangchainAgent {
|
|
|
332
375
|
const { extractToolMetadata, estimateAllToolsOverhead } = await import("../../utils/tool-overhead-calculator.js");
|
|
333
376
|
// Calculate overhead for non-MCP tools (built-in, custom, filesystem)
|
|
334
377
|
const nonMcpToolMetadata = enabledTools.map(extractToolMetadata);
|
|
335
|
-
const nonMcpToolDefinitionsTokens = estimateAllToolsOverhead(nonMcpToolMetadata);
|
|
378
|
+
const nonMcpToolDefinitionsTokens = await estimateAllToolsOverhead(nonMcpToolMetadata);
|
|
336
379
|
// Calculate TODO_WRITE_INSTRUCTIONS overhead if applicable
|
|
337
380
|
// Skip for subagents since the todo_write tool is filtered out for them
|
|
338
381
|
const isSubagentForTokens = req.sessionMeta?.[SUBAGENT_MODE_KEY] === true;
|
|
339
382
|
const hasTodoWriteTool = builtInNames.includes("todo_write") && !isSubagentForTokens;
|
|
340
383
|
const todoInstructionsTokens = hasTodoWriteTool
|
|
341
|
-
? countTokens(TODO_WRITE_INSTRUCTIONS)
|
|
384
|
+
? await countTokens(TODO_WRITE_INSTRUCTIONS)
|
|
342
385
|
: 0;
|
|
343
386
|
// Total non-MCP tool overhead: tool definitions + TODO instructions
|
|
344
387
|
const toolOverheadTokens = nonMcpToolDefinitionsTokens + todoInstructionsTokens;
|
|
@@ -347,8 +390,20 @@ export class LangchainAgent {
|
|
|
347
390
|
if ((this.definition.mcps?.length ?? 0) > 0) {
|
|
348
391
|
const client = await makeMcpToolsClient(this.definition.mcps);
|
|
349
392
|
const mcpTools = await client.getTools();
|
|
393
|
+
_logger.info("MCP tools loaded", {
|
|
394
|
+
mcpCount: this.definition.mcps?.length ?? 0,
|
|
395
|
+
toolCount: mcpTools.length,
|
|
396
|
+
toolNames: mcpTools.map((t) => t.name),
|
|
397
|
+
});
|
|
350
398
|
const mcpToolMetadata = mcpTools.map(extractToolMetadata);
|
|
351
|
-
mcpOverheadTokens = estimateAllToolsOverhead(mcpToolMetadata);
|
|
399
|
+
mcpOverheadTokens = await estimateAllToolsOverhead(mcpToolMetadata);
|
|
400
|
+
_logger.warn("MCP tool overhead calculated", {
|
|
401
|
+
mcpToolCount: mcpTools.length,
|
|
402
|
+
estimatedTokens: mcpOverheadTokens,
|
|
403
|
+
avgTokensPerTool: mcpTools.length > 0
|
|
404
|
+
? Math.round(mcpOverheadTokens / mcpTools.length)
|
|
405
|
+
: 0,
|
|
406
|
+
});
|
|
352
407
|
enabledTools.push(...mcpTools);
|
|
353
408
|
}
|
|
354
409
|
_logger.debug("Calculated tool overhead for context sizing", {
|
|
@@ -368,7 +423,7 @@ export class LangchainAgent {
|
|
|
368
423
|
// Calculate accurate context size for tool response compaction decisions
|
|
369
424
|
// This includes: system prompt, tool overhead, MCP overhead, and message history
|
|
370
425
|
const baseSystemPromptTokens = this.definition.systemPrompt
|
|
371
|
-
? countTokens(this.definition.systemPrompt)
|
|
426
|
+
? await countTokens(this.definition.systemPrompt)
|
|
372
427
|
: 0;
|
|
373
428
|
// Estimate additional injection tokens based on enabled features
|
|
374
429
|
// These will be injected into the system prompt later
|
|
@@ -389,7 +444,7 @@ export class LangchainAgent {
|
|
|
389
444
|
todoInstructionsTokens +
|
|
390
445
|
injectionOverheadEstimate;
|
|
391
446
|
// Calculate message history tokens from context messages
|
|
392
|
-
const messageHistoryContext = calculateContextSize(req.contextMessages || [], undefined, // Don't double count system prompt
|
|
447
|
+
const messageHistoryContext = await calculateContextSize(req.contextMessages || [], undefined, // Don't double count system prompt
|
|
393
448
|
0, // Don't double count tool overhead
|
|
394
449
|
0);
|
|
395
450
|
const messageHistoryTokens = messageHistoryContext.userMessagesTokens +
|
|
@@ -415,6 +470,101 @@ export class LangchainAgent {
|
|
|
415
470
|
// Hook execution removed from tool wrapper - hooks are now executed only at the adapter layer
|
|
416
471
|
// The adapter has proper MidTurnRestartError handling that can restart the turn
|
|
417
472
|
// Executing hooks here in the runner was causing restart signals to be caught as tool failures
|
|
473
|
+
// In-flight tool-response compaction (updates what the LLM sees in this same turn).
|
|
474
|
+
// We run ONLY the tool_response_compactor callback here, and we attach metadata separately
|
|
475
|
+
// to tool_output notifications for persistence/UI, without polluting the model-visible output.
|
|
476
|
+
const hooks = this.definition.hooks ?? [];
|
|
477
|
+
const inflightToolResponseHooks = hooks
|
|
478
|
+
.filter((h) => h.type === "tool_response")
|
|
479
|
+
.map((h) => {
|
|
480
|
+
// Keep only tool_response_compactor callbacks to avoid mid-turn restart logic here.
|
|
481
|
+
if (h.callbacks && h.callbacks.length > 0) {
|
|
482
|
+
const filtered = h.callbacks.filter((c) => c.name === "tool_response_compactor");
|
|
483
|
+
return filtered.length > 0 ? { ...h, callbacks: filtered } : null;
|
|
484
|
+
}
|
|
485
|
+
if (h.callback === "tool_response_compactor") {
|
|
486
|
+
return h;
|
|
487
|
+
}
|
|
488
|
+
return null;
|
|
489
|
+
})
|
|
490
|
+
.filter((h) => h !== null);
|
|
491
|
+
const hasInflightToolCompaction = inflightToolResponseHooks.length > 0;
|
|
492
|
+
const inflightContextTokensRef = { current: baseContextTokens };
|
|
493
|
+
const toolCallIdQueuesByKey = new Map();
|
|
494
|
+
const toolCallIdWaitersByKey = new Map();
|
|
495
|
+
const inflightCompactionMetaByToolCallId = new Map();
|
|
496
|
+
const registerToolCallId = (toolName, args, id) => {
|
|
497
|
+
const key = toolCallKey(toolName, args);
|
|
498
|
+
const waiters = toolCallIdWaitersByKey.get(key);
|
|
499
|
+
if (waiters && waiters.length > 0) {
|
|
500
|
+
const resolve = waiters.shift();
|
|
501
|
+
if (resolve) {
|
|
502
|
+
resolve(id);
|
|
503
|
+
return;
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
const q = toolCallIdQueuesByKey.get(key) ?? [];
|
|
507
|
+
q.push(id);
|
|
508
|
+
toolCallIdQueuesByKey.set(key, q);
|
|
509
|
+
};
|
|
510
|
+
const consumeToolCallId = async (toolName, args) => {
|
|
511
|
+
const key = toolCallKey(toolName, args);
|
|
512
|
+
const q = toolCallIdQueuesByKey.get(key);
|
|
513
|
+
if (q && q.length > 0) {
|
|
514
|
+
const id = q.shift();
|
|
515
|
+
if (id)
|
|
516
|
+
return id;
|
|
517
|
+
}
|
|
518
|
+
// Wait briefly for the updates stream to register the id.
|
|
519
|
+
return await new Promise((resolve) => {
|
|
520
|
+
const waiters = toolCallIdWaitersByKey.get(key) ?? [];
|
|
521
|
+
waiters.push(resolve);
|
|
522
|
+
toolCallIdWaitersByKey.set(key, waiters);
|
|
523
|
+
setTimeout(() => {
|
|
524
|
+
// Fallback if never registered (should be rare)
|
|
525
|
+
const fallback = `unknown_${Date.now()}_${Math.random()
|
|
526
|
+
.toString(36)
|
|
527
|
+
.slice(2, 8)}`;
|
|
528
|
+
// Remove this resolver from the waiters list if still present
|
|
529
|
+
const remaining = toolCallIdWaitersByKey.get(key) ?? [];
|
|
530
|
+
const idx = remaining.indexOf(resolve);
|
|
531
|
+
if (idx >= 0)
|
|
532
|
+
remaining.splice(idx, 1);
|
|
533
|
+
if (remaining.length === 0) {
|
|
534
|
+
toolCallIdWaitersByKey.delete(key);
|
|
535
|
+
}
|
|
536
|
+
else {
|
|
537
|
+
toolCallIdWaitersByKey.set(key, remaining);
|
|
538
|
+
}
|
|
539
|
+
resolve(fallback);
|
|
540
|
+
}, 1500);
|
|
541
|
+
});
|
|
542
|
+
};
|
|
543
|
+
const saveToolOriginalToArtifacts = async (sessionId, toolName, toolCallId, content) => {
|
|
544
|
+
try {
|
|
545
|
+
const { artifactsDir } = getSessionContext();
|
|
546
|
+
const toolDir = path.join(artifactsDir, `tool-${toolName}`);
|
|
547
|
+
await mkdir(toolDir, { recursive: true });
|
|
548
|
+
const filePath = path.join(toolDir, `${toolCallId}.original.txt`);
|
|
549
|
+
await writeFile(filePath, content, "utf-8");
|
|
550
|
+
// Match SessionStorage.saveToolOriginal relative path format:
|
|
551
|
+
return `${sessionId}/artifacts/tool-${toolName}/${toolCallId}.original.txt`;
|
|
552
|
+
}
|
|
553
|
+
catch (e) {
|
|
554
|
+
_logger.warn("Failed to save original tool output in runner", {
|
|
555
|
+
toolName,
|
|
556
|
+
toolCallId,
|
|
557
|
+
error: e instanceof Error ? e.message : String(e),
|
|
558
|
+
});
|
|
559
|
+
return null;
|
|
560
|
+
}
|
|
561
|
+
};
|
|
562
|
+
const inflightHookExecutor = hasInflightToolCompaction
|
|
563
|
+
? new HookExecutor(inflightToolResponseHooks, this.definition.model, (callbackRef) => loadHookCallback(callbackRef, req.agentDir), undefined, // no streaming notifications from runner
|
|
564
|
+
this.definition, {
|
|
565
|
+
getArtifactsDir: (_sid) => getSessionContext().artifactsDir,
|
|
566
|
+
}, req.sessionId, req.agentDir)
|
|
567
|
+
: null;
|
|
418
568
|
// Counter for subagent calls - used to create unique source ID ranges
|
|
419
569
|
// Each subagent call gets a unique offset (1000, 2000, 3000, etc.)
|
|
420
570
|
// to ensure sources never conflict with parent's sources (typically < 100)
|
|
@@ -423,9 +573,118 @@ export class LangchainAgent {
|
|
|
423
573
|
// All hook execution (compaction, restart logic) happens at the adapter layer
|
|
424
574
|
const wrappedTools = enabledTools.map((originalTool) => {
|
|
425
575
|
const wrappedFunc = async (input) => {
|
|
426
|
-
// Execute the original tool and return
|
|
576
|
+
// Execute the original tool and return result.
|
|
577
|
+
// If configured, compact large tool output *before* returning it to LangChain,
|
|
578
|
+
// so the next LLM step sees the compacted output in the same in-flight turn.
|
|
579
|
+
const toolCallId = hasInflightToolCompaction
|
|
580
|
+
? await consumeToolCallId(originalTool.name, input)
|
|
581
|
+
: `unknown_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
|
|
427
582
|
const result = await originalTool.invoke(input);
|
|
428
|
-
|
|
583
|
+
if (!inflightHookExecutor || !hasInflightToolCompaction) {
|
|
584
|
+
return result;
|
|
585
|
+
}
|
|
586
|
+
// Build hook input in the same shape the adapter uses.
|
|
587
|
+
const toolInput = isPlainRecord(input) ? input : {};
|
|
588
|
+
const rawOutput = isPlainRecord(result)
|
|
589
|
+
? result
|
|
590
|
+
: typeof result === "string"
|
|
591
|
+
? { content: result }
|
|
592
|
+
: { content: JSON.stringify(result) };
|
|
593
|
+
const outputTokens = await countToolResultTokens(rawOutput);
|
|
594
|
+
// Include current prompt as the last user message for better context.
|
|
595
|
+
const nowIso = new Date().toISOString();
|
|
596
|
+
const promptBlocksForHooks = req.prompt.map((block) => {
|
|
597
|
+
if (block.type === "text" && typeof block.text === "string") {
|
|
598
|
+
return { type: "text", text: block.text };
|
|
599
|
+
}
|
|
600
|
+
if (block.type === "image") {
|
|
601
|
+
const imageBlock = { type: "image" };
|
|
602
|
+
if (block.source)
|
|
603
|
+
imageBlock.source = block.source;
|
|
604
|
+
if (block.url)
|
|
605
|
+
imageBlock.url = block.url;
|
|
606
|
+
if (block.data)
|
|
607
|
+
imageBlock.data = block.data;
|
|
608
|
+
if (block.mimeType)
|
|
609
|
+
imageBlock.mimeType = block.mimeType;
|
|
610
|
+
return imageBlock;
|
|
611
|
+
}
|
|
612
|
+
return {
|
|
613
|
+
type: "text",
|
|
614
|
+
text: JSON.stringify(block),
|
|
615
|
+
};
|
|
616
|
+
});
|
|
617
|
+
const messagesForHooks = [
|
|
618
|
+
...(req.contextMessages ?? []),
|
|
619
|
+
{
|
|
620
|
+
role: "user",
|
|
621
|
+
content: promptBlocksForHooks,
|
|
622
|
+
timestamp: nowIso,
|
|
623
|
+
},
|
|
624
|
+
];
|
|
625
|
+
const hookResult = await inflightHookExecutor.executeToolResponseHooks({
|
|
626
|
+
messages: messagesForHooks,
|
|
627
|
+
context: [],
|
|
628
|
+
requestParams: {},
|
|
629
|
+
}, inflightContextTokensRef.current, {
|
|
630
|
+
toolCallId,
|
|
631
|
+
toolName: originalTool.name,
|
|
632
|
+
toolInput,
|
|
633
|
+
rawOutput,
|
|
634
|
+
outputTokens,
|
|
635
|
+
});
|
|
636
|
+
// Update inflight context token estimate for subsequent tool calls.
|
|
637
|
+
const finalOutput = hookResult.modifiedOutput ?? rawOutput;
|
|
638
|
+
const finalTokens = hookResult.metadata?.finalTokens;
|
|
639
|
+
const finalOutputTokens = finalTokens ?? (await countToolResultTokens(finalOutput));
|
|
640
|
+
inflightContextTokensRef.current += finalOutputTokens;
|
|
641
|
+
// If compaction happened, persist original output to artifacts and store meta
|
|
642
|
+
// for the adapter/UI (without polluting model-visible tool output).
|
|
643
|
+
const action = hookResult.metadata?.action;
|
|
644
|
+
const originalTokens = hookResult.metadata?.originalTokens;
|
|
645
|
+
const finalTokensMeta = hookResult.metadata?.finalTokens;
|
|
646
|
+
const didActuallyCompact = action &&
|
|
647
|
+
action !== "none" &&
|
|
648
|
+
action !== "no_action_needed" &&
|
|
649
|
+
originalTokens !== undefined &&
|
|
650
|
+
finalTokensMeta !== undefined &&
|
|
651
|
+
finalTokensMeta < originalTokens;
|
|
652
|
+
if (didActuallyCompact && req.sessionId) {
|
|
653
|
+
const originalContentStr = typeof rawOutput.content === "string"
|
|
654
|
+
? rawOutput.content
|
|
655
|
+
: JSON.stringify(rawOutput);
|
|
656
|
+
const preview = originalContentStr.slice(0, 2000);
|
|
657
|
+
const originalContentPath = await saveToolOriginalToArtifacts(req.sessionId, originalTool.name, toolCallId, originalContentStr);
|
|
658
|
+
const meta = {
|
|
659
|
+
action: action ?? "compacted",
|
|
660
|
+
originalTokens,
|
|
661
|
+
finalTokens: finalTokensMeta,
|
|
662
|
+
originalContentPreview: preview,
|
|
663
|
+
};
|
|
664
|
+
const tokensSaved = hookResult.metadata?.tokensSaved ??
|
|
665
|
+
(originalTokens !== undefined && finalTokensMeta !== undefined
|
|
666
|
+
? originalTokens - finalTokensMeta
|
|
667
|
+
: undefined);
|
|
668
|
+
if (typeof tokensSaved === "number") {
|
|
669
|
+
meta.tokensSaved = tokensSaved;
|
|
670
|
+
}
|
|
671
|
+
const compactionMethod = hookResult.metadata?.compactionMethod;
|
|
672
|
+
if (typeof compactionMethod === "string") {
|
|
673
|
+
meta.compactionMethod = compactionMethod;
|
|
674
|
+
}
|
|
675
|
+
if (typeof originalContentPath === "string") {
|
|
676
|
+
meta.originalContentPath = originalContentPath;
|
|
677
|
+
}
|
|
678
|
+
inflightCompactionMetaByToolCallId.set(toolCallId, meta);
|
|
679
|
+
}
|
|
680
|
+
// Return compacted output to LangChain (model-visible), without metadata.
|
|
681
|
+
if (typeof result === "string") {
|
|
682
|
+
if (typeof finalOutput.content === "string") {
|
|
683
|
+
return finalOutput.content;
|
|
684
|
+
}
|
|
685
|
+
return JSON.stringify(finalOutput);
|
|
686
|
+
}
|
|
687
|
+
return finalOutput;
|
|
429
688
|
};
|
|
430
689
|
// Create new tool with wrapped function
|
|
431
690
|
// biome-ignore lint/suspicious/noExplicitAny: Need to pass function with dynamic signature
|
|
@@ -595,6 +854,20 @@ export class LangchainAgent {
|
|
|
595
854
|
}
|
|
596
855
|
}
|
|
597
856
|
}
|
|
857
|
+
// Calculate final system prompt tokens after ALL modifications/injections
|
|
858
|
+
const finalSystemPromptTokens = agentConfig.systemPrompt
|
|
859
|
+
? await countTokens(agentConfig.systemPrompt)
|
|
860
|
+
: 0;
|
|
861
|
+
_logger.debug("Final system prompt tokens after all injections", {
|
|
862
|
+
finalSystemPromptTokens,
|
|
863
|
+
baseSystemPromptTokens: baseSystemPromptTokens,
|
|
864
|
+
injectionOverhead: finalSystemPromptTokens - baseSystemPromptTokens,
|
|
865
|
+
});
|
|
866
|
+
// Yield system prompt overhead to adapter for accurate context tracking
|
|
867
|
+
yield {
|
|
868
|
+
sessionUpdate: "system_prompt_overhead",
|
|
869
|
+
systemPromptTokens: finalSystemPromptTokens,
|
|
870
|
+
};
|
|
598
871
|
const agent = createAgent(agentConfig);
|
|
599
872
|
// Build messages from context history if available, otherwise use just the prompt
|
|
600
873
|
// Type includes tool messages for sending tool results
|
|
@@ -859,6 +1132,9 @@ export class LangchainAgent {
|
|
|
859
1132
|
if (toolCall.id == null) {
|
|
860
1133
|
throw new Error(`Tool call is missing id: ${JSON.stringify(toolCall)}`);
|
|
861
1134
|
}
|
|
1135
|
+
// Register toolCall id so the tool wrapper can associate it with the invocation.
|
|
1136
|
+
// This enables in-flight tool-output compaction keyed by real tool_call_id.
|
|
1137
|
+
registerToolCallId(toolCall.name, toolCall.args, toolCall.id);
|
|
862
1138
|
telemetry.log("info", `Tool call started: ${toolCall.name}`, {
|
|
863
1139
|
toolCallId: toolCall.id,
|
|
864
1140
|
toolName: toolCall.name,
|
|
@@ -1154,6 +1430,13 @@ export class LangchainAgent {
|
|
|
1154
1430
|
catch {
|
|
1155
1431
|
// Not valid JSON, use original content
|
|
1156
1432
|
}
|
|
1433
|
+
// If we compacted the tool result in the wrapper, attach meta here for the adapter/UI.
|
|
1434
|
+
// This does NOT affect what LangChain/LLM saw (it already received the compacted output).
|
|
1435
|
+
const inflightMeta = inflightCompactionMetaByToolCallId.get(aiMessage.tool_call_id);
|
|
1436
|
+
if (inflightMeta) {
|
|
1437
|
+
compactionMeta = { ...(compactionMeta ?? {}), ...inflightMeta };
|
|
1438
|
+
rawOutput = { ...rawOutput, _compactionMeta: compactionMeta };
|
|
1439
|
+
}
|
|
1157
1440
|
// For content display, use cleaned version if compaction occurred
|
|
1158
1441
|
let displayContent = aiMessage.content;
|
|
1159
1442
|
if (compactionMeta) {
|
|
@@ -1235,6 +1518,15 @@ export class LangchainAgent {
|
|
|
1235
1518
|
sessionId: req.sessionId,
|
|
1236
1519
|
});
|
|
1237
1520
|
telemetry.endSpan(invocationSpan);
|
|
1521
|
+
// Yield actual token usage from API for comparison with estimates
|
|
1522
|
+
if (lastActualTokenUsage !== null) {
|
|
1523
|
+
const actualUsage = lastActualTokenUsage;
|
|
1524
|
+
yield {
|
|
1525
|
+
sessionUpdate: "actual_token_usage",
|
|
1526
|
+
inputTokens: actualUsage.inputTokens,
|
|
1527
|
+
outputTokens: actualUsage.outputTokens,
|
|
1528
|
+
};
|
|
1529
|
+
}
|
|
1238
1530
|
return {
|
|
1239
1531
|
stopReason: "end_turn",
|
|
1240
1532
|
_meta: {
|
|
@@ -7,6 +7,11 @@ export interface OtelCallbackOptions {
|
|
|
7
7
|
iterationIndexRef: {
|
|
8
8
|
current: number;
|
|
9
9
|
};
|
|
10
|
+
emitTokenUsage?: (data: {
|
|
11
|
+
sessionUpdate: "actual_token_usage";
|
|
12
|
+
inputTokens: number;
|
|
13
|
+
outputTokens: number;
|
|
14
|
+
}) => void;
|
|
10
15
|
}
|
|
11
16
|
/**
|
|
12
17
|
* Creates OpenTelemetry callback handlers for LangChain LLM calls.
|
|
@@ -188,6 +188,14 @@ export function makeOtelCallbacks(opts) {
|
|
|
188
188
|
? tokenUsage.totalTokens - inputTokens
|
|
189
189
|
: 0);
|
|
190
190
|
telemetry.recordTokenUsage(inputTokens, outputTokens, chatSpan);
|
|
191
|
+
// Emit token usage to adapter for validation
|
|
192
|
+
if (opts.emitTokenUsage) {
|
|
193
|
+
opts.emitTokenUsage({
|
|
194
|
+
sessionUpdate: "actual_token_usage",
|
|
195
|
+
inputTokens,
|
|
196
|
+
outputTokens,
|
|
197
|
+
});
|
|
198
|
+
}
|
|
191
199
|
}
|
|
192
200
|
// Serialize output and attach to span
|
|
193
201
|
const serializedOutput = serializeOutput(output);
|
|
@@ -37,7 +37,7 @@ const documentExtract = tool(async ({ session_id, file_path, query, target_token
|
|
|
37
37
|
parsedContent = { content };
|
|
38
38
|
}
|
|
39
39
|
// Count tokens in the document
|
|
40
|
-
const documentTokens = countTokens(content);
|
|
40
|
+
const documentTokens = await countTokens(content);
|
|
41
41
|
logger.info("Document extraction requested", {
|
|
42
42
|
filePath: file_path,
|
|
43
43
|
documentTokens,
|