@townco/agent 0.1.72 → 0.1.74

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -258,6 +258,9 @@ export class AgentAcpAdapter {
258
258
  const subagentsMetadata = this.getSubagentsMetadata();
259
259
  response._meta = {
260
260
  ...response._meta,
261
+ // Include model and system prompt for Town Hall comparison feature
262
+ model: this.agent.definition.model,
263
+ systemPrompt: this.agent.definition.systemPrompt ?? null,
261
264
  ...(this.agentDescription
262
265
  ? { agentDescription: this.agentDescription }
263
266
  : {}),
@@ -276,11 +279,14 @@ export class AgentAcpAdapter {
276
279
  }
277
280
  async newSession(params) {
278
281
  const sessionId = Math.random().toString(36).substring(2);
282
+ // Extract configOverrides from _meta if provided (Town Hall comparison feature)
283
+ const configOverrides = params._meta?.configOverrides;
279
284
  this.sessions.set(sessionId, {
280
285
  pendingPrompt: null,
281
286
  messages: [],
282
287
  context: [],
283
288
  requestParams: params,
289
+ configOverrides,
284
290
  });
285
291
  // Note: Initial message is sent by the HTTP transport when SSE connection is established
286
292
  // This ensures the message is delivered after the client is ready to receive it
@@ -361,8 +367,27 @@ export class AgentAcpAdapter {
361
367
  ...(block.icon ? { icon: block.icon } : {}),
362
368
  ...(block.subline ? { subline: block.subline } : {}),
363
369
  ...(block.batchId ? { batchId: block.batchId } : {}),
370
+ // Include subagent data for replay - full content is sent via direct SSE
371
+ // bypassing PostgreSQL NOTIFY size limits
372
+ ...(block.subagentPort ? { subagentPort: block.subagentPort } : {}),
373
+ ...(block.subagentSessionId
374
+ ? { subagentSessionId: block.subagentSessionId }
375
+ : {}),
376
+ ...(block.subagentMessages
377
+ ? { subagentMessages: block.subagentMessages }
378
+ : {}),
364
379
  ...block._meta,
365
380
  };
381
+ // Debug: log subagent data being replayed
382
+ logger.info("Replaying tool_call", {
383
+ toolCallId: block.id,
384
+ title: block.title,
385
+ batchId: block.batchId,
386
+ hasSubagentPort: !!block.subagentPort,
387
+ hasSubagentSessionId: !!block.subagentSessionId,
388
+ hasSubagentMessages: !!block.subagentMessages,
389
+ subagentMessagesCount: block.subagentMessages?.length,
390
+ });
366
391
  this.connection.sessionUpdate({
367
392
  sessionId: params.sessionId,
368
393
  update: {
@@ -456,6 +481,7 @@ export class AgentAcpAdapter {
456
481
  async prompt(params) {
457
482
  const promptSpan = telemetry.startSpan("adapter.prompt", {
458
483
  "session.id": params.sessionId,
484
+ "agent.session_id": params.sessionId,
459
485
  });
460
486
  const spanContext = promptSpan
461
487
  ? trace.setSpan(context.active(), promptSpan)
@@ -516,6 +542,11 @@ export class AgentAcpAdapter {
516
542
  .filter((p) => p.type === "text")
517
543
  .map((p) => p.text)
518
544
  .join("\n");
545
+ // Store user message in span attribute for debugger UI
546
+ const promptSpan = trace.getActiveSpan();
547
+ if (promptSpan) {
548
+ promptSpan.setAttribute("user.message", userMessageText);
549
+ }
519
550
  logger.info("User message received", {
520
551
  sessionId: params.sessionId,
521
552
  messagePreview: userMessageText.slice(0, 100),
@@ -635,11 +666,26 @@ export class AgentAcpAdapter {
635
666
  if (session.requestParams._meta) {
636
667
  invokeParams.sessionMeta = session.requestParams._meta;
637
668
  }
669
+ // Pass config overrides if defined (Town Hall comparison feature)
670
+ if (session.configOverrides) {
671
+ invokeParams.configOverrides = session.configOverrides;
672
+ }
638
673
  const generator = this.agent.invoke(invokeParams);
674
+ // Track the invocation span for parenting hook spans
675
+ let invocationSpan = null;
639
676
  // Manually iterate to capture the return value
640
677
  let iterResult = await generator.next();
641
678
  while (!iterResult.done) {
642
679
  const msg = iterResult.value;
680
+ // Capture the invocation span so we can use it for parenting hook spans
681
+ if ("sessionUpdate" in msg &&
682
+ msg.sessionUpdate === "__invocation_span" &&
683
+ "invocationSpan" in msg) {
684
+ invocationSpan = msg.invocationSpan;
685
+ // Skip this internal message - don't yield it
686
+ iterResult = await generator.next();
687
+ continue;
688
+ }
643
689
  // Capture tool overhead info if provided by harness
644
690
  if ("sessionUpdate" in msg &&
645
691
  msg.sessionUpdate === "tool_overhead_info") {
@@ -720,6 +766,12 @@ export class AgentAcpAdapter {
720
766
  "icon" in toolCallMsg._meta
721
767
  ? String(toolCallMsg._meta.icon)
722
768
  : undefined;
769
+ const verbiage = toolCallMsg._meta &&
770
+ typeof toolCallMsg._meta === "object" &&
771
+ "verbiage" in toolCallMsg._meta &&
772
+ typeof toolCallMsg._meta.verbiage === "object"
773
+ ? toolCallMsg._meta.verbiage
774
+ : undefined;
723
775
  const batchId = toolCallMsg._meta &&
724
776
  typeof toolCallMsg._meta === "object" &&
725
777
  "batchId" in toolCallMsg._meta
@@ -752,6 +804,7 @@ export class AgentAcpAdapter {
752
804
  title: toolCallMsg.title || "Tool",
753
805
  ...(prettyName ? { prettyName } : {}),
754
806
  ...(icon ? { icon } : {}),
807
+ ...(verbiage ? { verbiage } : {}),
755
808
  ...(subline ? { subline } : {}),
756
809
  kind: toolCallMsg.kind || "other",
757
810
  status: toolCallMsg.status || "pending",
@@ -783,6 +836,41 @@ export class AgentAcpAdapter {
783
836
  toolCallBlock.status === "failed") {
784
837
  toolCallBlock.completedAt = Date.now();
785
838
  }
839
+ const meta = updateMsg._meta;
840
+ // Update batchId from _meta (comes from tool_call_update after preliminary tool_call)
841
+ if (meta?.batchId && !toolCallBlock.batchId) {
842
+ toolCallBlock.batchId = meta.batchId;
843
+ }
844
+ if (meta?.subagentPort) {
845
+ toolCallBlock.subagentPort = meta.subagentPort;
846
+ }
847
+ if (meta?.subagentSessionId) {
848
+ toolCallBlock.subagentSessionId = meta.subagentSessionId;
849
+ }
850
+ if (meta?.subagentMessages) {
851
+ logger.info("Storing subagent messages for session replay", {
852
+ toolCallId: updateMsg.toolCallId,
853
+ messageCount: meta.subagentMessages.length,
854
+ });
855
+ toolCallBlock.subagentMessages = meta.subagentMessages;
856
+ }
857
+ }
858
+ // Forward tool_call_update with _meta to the client (for subagent connection info, etc.)
859
+ if (updateMsg._meta) {
860
+ logger.info("Forwarding tool_call_update with _meta to client", {
861
+ toolCallId: updateMsg.toolCallId,
862
+ status: updateMsg.status,
863
+ _meta: updateMsg._meta,
864
+ });
865
+ this.connection.sessionUpdate({
866
+ sessionId: params.sessionId,
867
+ update: {
868
+ sessionUpdate: "tool_call_update",
869
+ toolCallId: updateMsg.toolCallId,
870
+ status: updateMsg.status,
871
+ _meta: updateMsg._meta,
872
+ },
873
+ });
786
874
  }
787
875
  // Forward tool_call_update with _meta to the client (for subagent connection info, etc.)
788
876
  if (updateMsg._meta) {
@@ -949,7 +1037,7 @@ export class AgentAcpAdapter {
949
1037
  toolResultsTokens: midTurnSnapshot.context_size.toolResultsTokens,
950
1038
  });
951
1039
  // Execute hooks mid-turn to check if compaction is needed
952
- const midTurnContextEntries = await this.executeHooksIfConfigured(session, params.sessionId, "mid_turn");
1040
+ const midTurnContextEntries = await this.executeHooksIfConfigured(session, params.sessionId, "mid_turn", invocationSpan);
953
1041
  // Append new context entries returned by hooks (e.g., compaction)
954
1042
  if (midTurnContextEntries.length > 0) {
955
1043
  logger.info(`Appending ${midTurnContextEntries.length} new context entries from mid_turn hooks`, {
@@ -1100,20 +1188,25 @@ export class AgentAcpAdapter {
1100
1188
  * Execute hooks if configured for this agent
1101
1189
  * Returns new context entries that should be appended to session.context
1102
1190
  */
1103
- async executeHooksIfConfigured(session, sessionId, executionPoint) {
1191
+ async executeHooksIfConfigured(session, sessionId, executionPoint, parentSpan) {
1104
1192
  // Check if hooks are configured and session persistence is enabled
1105
1193
  const hooks = this.agent.definition.hooks;
1106
1194
  if (this.noSession || !hooks || hooks.length === 0) {
1107
1195
  return [];
1108
1196
  }
1197
+ // If a parent span is provided (for mid-turn hooks), set it as active context
1198
+ const baseContext = parentSpan
1199
+ ? trace.setSpan(context.active(), parentSpan)
1200
+ : context.active();
1109
1201
  const hookSpan = telemetry.startSpan("adapter.executeHooks", {
1110
1202
  "hooks.executionPoint": executionPoint,
1111
1203
  "hooks.count": hooks.length,
1112
1204
  "session.id": sessionId,
1113
- });
1205
+ "agent.session_id": sessionId,
1206
+ }, baseContext);
1114
1207
  const spanContext = hookSpan
1115
- ? trace.setSpan(context.active(), hookSpan)
1116
- : context.active();
1208
+ ? trace.setSpan(baseContext, hookSpan)
1209
+ : baseContext;
1117
1210
  return context.with(spanContext, async () => {
1118
1211
  try {
1119
1212
  return await this._executeHooksImpl(session, sessionId, executionPoint, hooks);
@@ -45,6 +45,9 @@ function compressIfNeeded(rawMsg) {
45
45
  }
46
46
  // Use PGlite in-memory database for LISTEN/NOTIFY
47
47
  const pg = new PGlite();
48
+ // Store for oversized responses that can't go through PostgreSQL NOTIFY
49
+ // Key: request ID, Value: response object
50
+ const oversizedResponses = new Map();
48
51
  // Helper to create safe channel names from untrusted IDs
49
52
  function safeChannelName(prefix, id) {
50
53
  const hash = createHash("sha256").update(id).digest("hex").slice(0, 16);
@@ -133,26 +136,21 @@ export function makeHttpTransport(agent, agentDir, agentName) {
133
136
  const escapedPayload = payload.replace(/'/g, "''");
134
137
  // Check if even compressed payload is too large
135
138
  if (compressedSize > 7500) {
136
- logger.error("Response payload too large even after compression", {
139
+ logger.info("Response payload too large for NOTIFY, using direct storage", {
137
140
  requestId: rawMsg.id,
138
141
  originalSize,
139
142
  compressedSize,
140
143
  });
141
- // Send error response
142
- const errorResponse = {
144
+ // Store the response for direct retrieval by the HTTP handler
145
+ oversizedResponses.set(rawMsg.id, rawMsg);
146
+ // Send a small marker through NOTIFY to signal the response is ready
147
+ const markerResponse = {
143
148
  jsonrpc: "2.0",
144
149
  id: rawMsg.id,
145
- error: {
146
- code: -32603,
147
- message: "Response payload too large even after compression",
148
- data: {
149
- originalSize,
150
- compressedSize,
151
- },
152
- },
150
+ _oversized: true,
153
151
  };
154
- const errorPayload = JSON.stringify(errorResponse).replace(/'/g, "''");
155
- await pg.query(`NOTIFY ${channel}, '${errorPayload}'`);
152
+ const markerPayload = JSON.stringify(markerResponse).replace(/'/g, "''");
153
+ await pg.query(`NOTIFY ${channel}, '${markerPayload}'`);
156
154
  continue;
157
155
  }
158
156
  try {
@@ -227,6 +225,48 @@ export function makeHttpTransport(agent, agentDir, agentName) {
227
225
  }
228
226
  continue;
229
227
  }
228
+ // Check if this is a tool_call with subagentMessages - send directly via SSE
229
+ // to bypass PostgreSQL NOTIFY size limits (7500 bytes)
230
+ if (messageType === "session/update" &&
231
+ "params" in rawMsg &&
232
+ rawMsg.params != null &&
233
+ typeof rawMsg.params === "object" &&
234
+ "update" in rawMsg.params &&
235
+ rawMsg.params.update != null &&
236
+ typeof rawMsg.params.update === "object" &&
237
+ "sessionUpdate" in rawMsg.params.update &&
238
+ rawMsg.params.update.sessionUpdate === "tool_call" &&
239
+ "_meta" in rawMsg.params.update &&
240
+ rawMsg.params.update._meta != null &&
241
+ typeof rawMsg.params.update._meta === "object" &&
242
+ "subagentMessages" in rawMsg.params.update._meta) {
243
+ // Send subagent tool call directly via SSE, bypassing PostgreSQL NOTIFY
244
+ const stream = sseStreams.get(sessionId);
245
+ if (stream) {
246
+ try {
247
+ await stream.writeSSE({
248
+ event: "message",
249
+ data: JSON.stringify(rawMsg),
250
+ });
251
+ logger.debug("Sent subagent tool call directly via SSE", {
252
+ sessionId,
253
+ payloadSize: JSON.stringify(rawMsg).length,
254
+ });
255
+ }
256
+ catch (error) {
257
+ logger.error("Failed to send subagent tool call", {
258
+ error,
259
+ sessionId,
260
+ });
261
+ }
262
+ }
263
+ else {
264
+ logger.warn("No SSE stream found for subagent tool call", {
265
+ sessionId,
266
+ });
267
+ }
268
+ continue;
269
+ }
230
270
  // Other messages (notifications, requests from agent) go to
231
271
  // session-specific channel via PostgreSQL NOTIFY
232
272
  const channel = safeChannelName("notifications", sessionId);
@@ -513,6 +553,24 @@ export function makeHttpTransport(agent, agentDir, agentName) {
513
553
  };
514
554
  }
515
555
  }
556
+ // Check if this is an oversized response marker
557
+ if (rawResponse &&
558
+ typeof rawResponse === "object" &&
559
+ "_oversized" in rawResponse &&
560
+ rawResponse._oversized === true &&
561
+ "id" in rawResponse) {
562
+ // Retrieve the actual response from the oversized storage
563
+ const actualResponse = oversizedResponses.get(String(rawResponse.id));
564
+ if (actualResponse) {
565
+ oversizedResponses.delete(String(rawResponse.id));
566
+ logger.debug("Retrieved oversized response from storage", { id });
567
+ responseResolver(actualResponse);
568
+ return;
569
+ }
570
+ logger.error("Oversized response not found in storage", {
571
+ id: rawResponse.id,
572
+ });
573
+ }
516
574
  responseResolver(rawResponse);
517
575
  });
518
576
  // Write NDJSON line into the ACP inbound stream
@@ -553,7 +611,6 @@ export function makeHttpTransport(agent, agentDir, agentName) {
553
611
  logger.info("Starting HTTP server", { port });
554
612
  Bun.serve({
555
613
  fetch: app.fetch,
556
- hostname: Bun.env.BIND_HOST || "localhost",
557
614
  port,
558
615
  });
559
616
  logger.info("HTTP server listening", {
@@ -16,6 +16,40 @@ export interface ImageBlock {
16
16
  data?: string | undefined;
17
17
  mimeType?: string | undefined;
18
18
  }
19
+ /**
20
+ * Sub-agent tool call stored within a parent tool call's subagentMessages
21
+ */
22
+ export interface SubagentToolCallBlock {
23
+ id: string;
24
+ title: string;
25
+ prettyName?: string | undefined;
26
+ icon?: string | undefined;
27
+ status: "pending" | "in_progress" | "completed" | "failed";
28
+ }
29
+ /**
30
+ * Content block for sub-agent messages - either text or a tool call
31
+ */
32
+ export interface SubagentTextBlock {
33
+ type: "text";
34
+ text: string;
35
+ }
36
+ export interface SubagentToolCallContentBlock {
37
+ type: "tool_call";
38
+ toolCall: SubagentToolCallBlock;
39
+ }
40
+ export type SubagentContentBlock = SubagentTextBlock | SubagentToolCallContentBlock;
41
+ /**
42
+ * Sub-agent message stored for replay
43
+ */
44
+ export interface SubagentMessage {
45
+ id: string;
46
+ /** Accumulated text content (thinking) */
47
+ content: string;
48
+ /** Interleaved content blocks in arrival order */
49
+ contentBlocks?: SubagentContentBlock[] | undefined;
50
+ /** Tool calls made by the sub-agent */
51
+ toolCalls?: SubagentToolCallBlock[] | undefined;
52
+ }
19
53
  export interface ToolCallBlock {
20
54
  type: "tool_call";
21
55
  id: string;
@@ -37,6 +71,12 @@ export interface ToolCallBlock {
37
71
  originalTokens?: number;
38
72
  finalTokens?: number;
39
73
  };
74
+ /** Sub-agent HTTP port (for reference, not used in replay) */
75
+ subagentPort?: number | undefined;
76
+ /** Sub-agent session ID (for reference, not used in replay) */
77
+ subagentSessionId?: string | undefined;
78
+ /** Stored sub-agent messages for replay */
79
+ subagentMessages?: SubagentMessage[] | undefined;
40
80
  }
41
81
  export type ContentBlock = TextBlock | ImageBlock | ToolCallBlock;
42
82
  /**
@@ -26,6 +26,29 @@ const imageBlockSchema = z.object({
26
26
  data: z.string().optional(),
27
27
  mimeType: z.string().optional(),
28
28
  });
29
+ const subagentToolCallBlockSchema = z.object({
30
+ id: z.string(),
31
+ title: z.string(),
32
+ prettyName: z.string().optional(),
33
+ icon: z.string().optional(),
34
+ status: z.enum(["pending", "in_progress", "completed", "failed"]),
35
+ });
36
+ const subagentContentBlockSchema = z.discriminatedUnion("type", [
37
+ z.object({
38
+ type: z.literal("text"),
39
+ text: z.string(),
40
+ }),
41
+ z.object({
42
+ type: z.literal("tool_call"),
43
+ toolCall: subagentToolCallBlockSchema,
44
+ }),
45
+ ]);
46
+ const subagentMessageSchema = z.object({
47
+ id: z.string(),
48
+ content: z.string(),
49
+ contentBlocks: z.array(subagentContentBlockSchema).optional(),
50
+ toolCalls: z.array(subagentToolCallBlockSchema).optional(),
51
+ });
29
52
  const toolCallBlockSchema = z.object({
30
53
  type: z.literal("tool_call"),
31
54
  id: z.string(),
@@ -52,6 +75,9 @@ const toolCallBlockSchema = z.object({
52
75
  error: z.string().optional(),
53
76
  startedAt: z.number().optional(),
54
77
  completedAt: z.number().optional(),
78
+ subagentPort: z.number().optional(),
79
+ subagentSessionId: z.string().optional(),
80
+ subagentMessages: z.array(subagentMessageSchema).optional(),
55
81
  });
56
82
  const contentBlockSchema = z.discriminatedUnion("type", [
57
83
  textBlockSchema,
@@ -1,4 +1,5 @@
1
1
  import type { PromptRequest, PromptResponse, SessionNotification } from "@agentclientprotocol/sdk";
2
+ import type { Span } from "@opentelemetry/api";
2
3
  import { z } from "zod";
3
4
  import type { ContentBlock } from "../acp-server/session-storage.js";
4
5
  export declare const zAgentRunnerParams: z.ZodObject<{
@@ -8,7 +9,7 @@ export declare const zAgentRunnerParams: z.ZodObject<{
8
9
  suggestedPrompts: z.ZodOptional<z.ZodArray<z.ZodString>>;
9
10
  systemPrompt: z.ZodNullable<z.ZodString>;
10
11
  model: z.ZodString;
11
- tools: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodUnion<readonly [z.ZodLiteral<"todo_write">, z.ZodLiteral<"get_weather">, z.ZodLiteral<"web_search">, z.ZodLiteral<"filesystem">, z.ZodLiteral<"generate_image">, z.ZodLiteral<"browser">]>, z.ZodObject<{
12
+ tools: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodUnion<readonly [z.ZodLiteral<"todo_write">, z.ZodLiteral<"get_weather">, z.ZodLiteral<"web_search">, z.ZodLiteral<"town_web_search">, z.ZodLiteral<"filesystem">, z.ZodLiteral<"generate_image">, z.ZodLiteral<"browser">]>, z.ZodObject<{
12
13
  type: z.ZodLiteral<"custom">;
13
14
  modulePath: z.ZodString;
14
15
  }, z.core.$strip>, z.ZodObject<{
@@ -66,10 +67,16 @@ export interface SessionMessage {
66
67
  content: ContentBlock[];
67
68
  timestamp: string;
68
69
  }
70
+ export interface ConfigOverrides {
71
+ model?: string;
72
+ systemPrompt?: string;
73
+ tools?: string[];
74
+ }
69
75
  export type InvokeRequest = Omit<PromptRequest, "_meta"> & {
70
76
  messageId: string;
71
77
  sessionMeta?: Record<string, unknown>;
72
78
  contextMessages?: SessionMessage[];
79
+ configOverrides?: ConfigOverrides;
73
80
  };
74
81
  export interface TokenUsage {
75
82
  inputTokens?: number;
@@ -125,6 +132,9 @@ export type ExtendedSessionUpdate = (SessionNotification["update"] & {
125
132
  sessionUpdate: "tool_overhead_info";
126
133
  toolOverheadTokens: number;
127
134
  mcpOverheadTokens: number;
135
+ } | {
136
+ sessionUpdate: "__invocation_span";
137
+ invocationSpan: Span;
128
138
  } | AgentMessageChunkWithTokens | HookNotificationUpdate;
129
139
  /** Describes an object that can run an agent definition */
130
140
  export interface AgentRunner {
@@ -1,8 +1,13 @@
1
- import { ChatAnthropic } from "@langchain/anthropic";
2
- import { HumanMessage, SystemMessage } from "@langchain/core/messages";
1
+ import Anthropic from "@anthropic-ai/sdk";
3
2
  import { createLogger } from "../../../logger.js";
3
+ import { telemetry } from "../../../telemetry/index.js";
4
4
  import { countToolResultTokens } from "../../../utils/token-counter.js";
5
5
  const logger = createLogger("tool-response-compactor");
6
+ // Create Anthropic client directly (not using LangChain)
7
+ // This ensures compaction LLM calls don't get captured by LangGraph's streaming
8
+ const anthropic = new Anthropic({
9
+ apiKey: process.env.ANTHROPIC_API_KEY,
10
+ });
6
11
  // Haiku 4.5 for compaction (fast and cost-effective)
7
12
  const COMPACTION_MODEL = "claude-haiku-4-5-20251001";
8
13
  const COMPACTION_MODEL_CONTEXT = 200000; // Haiku context size for calculating truncation limits
@@ -243,10 +248,6 @@ export const toolResponseCompactor = async (ctx) => {
243
248
  * Recursive LLM compaction with adaptive retries
244
249
  */
245
250
  async function compactWithLLM(rawOutput, toolName, toolInput, conversationContext, targetTokens) {
246
- const model = new ChatAnthropic({
247
- model: COMPACTION_MODEL,
248
- temperature: 0,
249
- });
250
251
  // Step 1: Understand what we're looking for (only need to do this once)
251
252
  const analysisPrompt = `You are helping to manage context size in an agent conversation.
252
253
 
@@ -260,12 +261,38 @@ ${conversationContext}
260
261
  Based on the tool input and conversation context, what key information is the user looking for from this tool response?
261
262
 
262
263
  Provide a concise list (3-5 bullet points) of the most important elements to extract.`;
263
- const analysisResponse = await model.invoke([
264
- new SystemMessage("You are a helpful assistant analyzing information needs."),
265
- new HumanMessage(analysisPrompt),
266
- ]);
267
- const keyRequirements = typeof analysisResponse.content === "string"
268
- ? analysisResponse.content
264
+ // Create OTEL span for analysis call
265
+ const analysisSpan = telemetry.startSpan("compaction.analysis", {
266
+ "gen_ai.operation.name": "chat",
267
+ "gen_ai.provider.name": "anthropic",
268
+ "gen_ai.request.model": COMPACTION_MODEL,
269
+ "gen_ai.system_instructions": "You are a helpful assistant analyzing information needs.",
270
+ });
271
+ let analysisResponse;
272
+ try {
273
+ analysisResponse = await telemetry.withActiveSpanAsync(analysisSpan, () => anthropic.messages.create({
274
+ model: COMPACTION_MODEL,
275
+ max_tokens: 1024,
276
+ temperature: 0,
277
+ system: "You are a helpful assistant analyzing information needs.",
278
+ messages: [
279
+ {
280
+ role: "user",
281
+ content: analysisPrompt,
282
+ },
283
+ ],
284
+ }));
285
+ // Record token usage
286
+ telemetry.recordTokenUsage(analysisResponse.usage.input_tokens, analysisResponse.usage.output_tokens, analysisSpan);
287
+ telemetry.endSpan(analysisSpan);
288
+ }
289
+ catch (error) {
290
+ telemetry.endSpan(analysisSpan, error);
291
+ throw error;
292
+ }
293
+ const firstContent = analysisResponse.content[0];
294
+ const keyRequirements = firstContent && firstContent.type === "text"
295
+ ? firstContent.text
269
296
  : "Extract relevant information";
270
297
  logger.info("Identified key requirements for compaction", {
271
298
  requirements: keyRequirements.substring(0, 200),
@@ -323,14 +350,41 @@ Your task: Further compact this data by:
323
350
 
324
351
  Return ONLY valid JSON (no explanation text).`;
325
352
  }
326
- const compactionResponse = await model.invoke([
327
- new SystemMessage("You are a helpful assistant compacting data."),
328
- new HumanMessage(compactionPrompt),
329
- ]);
353
+ // Create OTEL span for compaction call
354
+ const compactionSpan = telemetry.startSpan("compaction.compact", {
355
+ "gen_ai.operation.name": "chat",
356
+ "gen_ai.provider.name": "anthropic",
357
+ "gen_ai.request.model": COMPACTION_MODEL,
358
+ "gen_ai.system_instructions": "You are a helpful assistant compacting data.",
359
+ "compaction.attempt": attempt + 1,
360
+ "compaction.target_tokens": targetTokens,
361
+ "compaction.current_tokens": currentTokens,
362
+ });
363
+ let compactionResponse;
364
+ try {
365
+ compactionResponse = await telemetry.withActiveSpanAsync(compactionSpan, () => anthropic.messages.create({
366
+ model: COMPACTION_MODEL,
367
+ max_tokens: 4096,
368
+ temperature: 0,
369
+ system: "You are a helpful assistant compacting data.",
370
+ messages: [
371
+ {
372
+ role: "user",
373
+ content: compactionPrompt,
374
+ },
375
+ ],
376
+ }));
377
+ // Record token usage
378
+ telemetry.recordTokenUsage(compactionResponse.usage.input_tokens, compactionResponse.usage.output_tokens, compactionSpan);
379
+ telemetry.endSpan(compactionSpan);
380
+ }
381
+ catch (error) {
382
+ telemetry.endSpan(compactionSpan, error);
383
+ throw error;
384
+ }
330
385
  // Extract and parse JSON
331
- const responseText = typeof compactionResponse.content === "string"
332
- ? compactionResponse.content
333
- : JSON.stringify(compactionResponse.content);
386
+ const firstContent = compactionResponse.content[0];
387
+ const responseText = firstContent && firstContent.type === "text" ? firstContent.text : "";
334
388
  const jsonMatch = responseText.match(/```(?:json)?\n([\s\S]*?)\n```/) || [
335
389
  null,
336
390
  responseText,