@townco/agent 0.1.73 → 0.1.74

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -258,6 +258,9 @@ export class AgentAcpAdapter {
258
258
  const subagentsMetadata = this.getSubagentsMetadata();
259
259
  response._meta = {
260
260
  ...response._meta,
261
+ // Include model and system prompt for Town Hall comparison feature
262
+ model: this.agent.definition.model,
263
+ systemPrompt: this.agent.definition.systemPrompt ?? null,
261
264
  ...(this.agentDescription
262
265
  ? { agentDescription: this.agentDescription }
263
266
  : {}),
@@ -276,11 +279,14 @@ export class AgentAcpAdapter {
276
279
  }
277
280
  async newSession(params) {
278
281
  const sessionId = Math.random().toString(36).substring(2);
282
+ // Extract configOverrides from _meta if provided (Town Hall comparison feature)
283
+ const configOverrides = params._meta?.configOverrides;
279
284
  this.sessions.set(sessionId, {
280
285
  pendingPrompt: null,
281
286
  messages: [],
282
287
  context: [],
283
288
  requestParams: params,
289
+ configOverrides,
284
290
  });
285
291
  // Note: Initial message is sent by the HTTP transport when SSE connection is established
286
292
  // This ensures the message is delivered after the client is ready to receive it
@@ -475,6 +481,7 @@ export class AgentAcpAdapter {
475
481
  async prompt(params) {
476
482
  const promptSpan = telemetry.startSpan("adapter.prompt", {
477
483
  "session.id": params.sessionId,
484
+ "agent.session_id": params.sessionId,
478
485
  });
479
486
  const spanContext = promptSpan
480
487
  ? trace.setSpan(context.active(), promptSpan)
@@ -535,6 +542,11 @@ export class AgentAcpAdapter {
535
542
  .filter((p) => p.type === "text")
536
543
  .map((p) => p.text)
537
544
  .join("\n");
545
+ // Store user message in span attribute for debugger UI
546
+ const promptSpan = trace.getActiveSpan();
547
+ if (promptSpan) {
548
+ promptSpan.setAttribute("user.message", userMessageText);
549
+ }
538
550
  logger.info("User message received", {
539
551
  sessionId: params.sessionId,
540
552
  messagePreview: userMessageText.slice(0, 100),
@@ -654,11 +666,26 @@ export class AgentAcpAdapter {
654
666
  if (session.requestParams._meta) {
655
667
  invokeParams.sessionMeta = session.requestParams._meta;
656
668
  }
669
+ // Pass config overrides if defined (Town Hall comparison feature)
670
+ if (session.configOverrides) {
671
+ invokeParams.configOverrides = session.configOverrides;
672
+ }
657
673
  const generator = this.agent.invoke(invokeParams);
674
+ // Track the invocation span for parenting hook spans
675
+ let invocationSpan = null;
658
676
  // Manually iterate to capture the return value
659
677
  let iterResult = await generator.next();
660
678
  while (!iterResult.done) {
661
679
  const msg = iterResult.value;
680
+ // Capture the invocation span so we can use it for parenting hook spans
681
+ if ("sessionUpdate" in msg &&
682
+ msg.sessionUpdate === "__invocation_span" &&
683
+ "invocationSpan" in msg) {
684
+ invocationSpan = msg.invocationSpan;
685
+ // Skip this internal message - don't yield it
686
+ iterResult = await generator.next();
687
+ continue;
688
+ }
662
689
  // Capture tool overhead info if provided by harness
663
690
  if ("sessionUpdate" in msg &&
664
691
  msg.sessionUpdate === "tool_overhead_info") {
@@ -739,6 +766,12 @@ export class AgentAcpAdapter {
739
766
  "icon" in toolCallMsg._meta
740
767
  ? String(toolCallMsg._meta.icon)
741
768
  : undefined;
769
+ const verbiage = toolCallMsg._meta &&
770
+ typeof toolCallMsg._meta === "object" &&
771
+ "verbiage" in toolCallMsg._meta &&
772
+ typeof toolCallMsg._meta.verbiage === "object"
773
+ ? toolCallMsg._meta.verbiage
774
+ : undefined;
742
775
  const batchId = toolCallMsg._meta &&
743
776
  typeof toolCallMsg._meta === "object" &&
744
777
  "batchId" in toolCallMsg._meta
@@ -771,6 +804,7 @@ export class AgentAcpAdapter {
771
804
  title: toolCallMsg.title || "Tool",
772
805
  ...(prettyName ? { prettyName } : {}),
773
806
  ...(icon ? { icon } : {}),
807
+ ...(verbiage ? { verbiage } : {}),
774
808
  ...(subline ? { subline } : {}),
775
809
  kind: toolCallMsg.kind || "other",
776
810
  status: toolCallMsg.status || "pending",
@@ -1003,7 +1037,7 @@ export class AgentAcpAdapter {
1003
1037
  toolResultsTokens: midTurnSnapshot.context_size.toolResultsTokens,
1004
1038
  });
1005
1039
  // Execute hooks mid-turn to check if compaction is needed
1006
- const midTurnContextEntries = await this.executeHooksIfConfigured(session, params.sessionId, "mid_turn");
1040
+ const midTurnContextEntries = await this.executeHooksIfConfigured(session, params.sessionId, "mid_turn", invocationSpan);
1007
1041
  // Append new context entries returned by hooks (e.g., compaction)
1008
1042
  if (midTurnContextEntries.length > 0) {
1009
1043
  logger.info(`Appending ${midTurnContextEntries.length} new context entries from mid_turn hooks`, {
@@ -1154,20 +1188,25 @@ export class AgentAcpAdapter {
1154
1188
  * Execute hooks if configured for this agent
1155
1189
  * Returns new context entries that should be appended to session.context
1156
1190
  */
1157
- async executeHooksIfConfigured(session, sessionId, executionPoint) {
1191
+ async executeHooksIfConfigured(session, sessionId, executionPoint, parentSpan) {
1158
1192
  // Check if hooks are configured and session persistence is enabled
1159
1193
  const hooks = this.agent.definition.hooks;
1160
1194
  if (this.noSession || !hooks || hooks.length === 0) {
1161
1195
  return [];
1162
1196
  }
1197
+ // If a parent span is provided (for mid-turn hooks), set it as active context
1198
+ const baseContext = parentSpan
1199
+ ? trace.setSpan(context.active(), parentSpan)
1200
+ : context.active();
1163
1201
  const hookSpan = telemetry.startSpan("adapter.executeHooks", {
1164
1202
  "hooks.executionPoint": executionPoint,
1165
1203
  "hooks.count": hooks.length,
1166
1204
  "session.id": sessionId,
1167
- });
1205
+ "agent.session_id": sessionId,
1206
+ }, baseContext);
1168
1207
  const spanContext = hookSpan
1169
- ? trace.setSpan(context.active(), hookSpan)
1170
- : context.active();
1208
+ ? trace.setSpan(baseContext, hookSpan)
1209
+ : baseContext;
1171
1210
  return context.with(spanContext, async () => {
1172
1211
  try {
1173
1212
  return await this._executeHooksImpl(session, sessionId, executionPoint, hooks);
@@ -45,6 +45,9 @@ function compressIfNeeded(rawMsg) {
45
45
  }
46
46
  // Use PGlite in-memory database for LISTEN/NOTIFY
47
47
  const pg = new PGlite();
48
+ // Store for oversized responses that can't go through PostgreSQL NOTIFY
49
+ // Key: request ID, Value: response object
50
+ const oversizedResponses = new Map();
48
51
  // Helper to create safe channel names from untrusted IDs
49
52
  function safeChannelName(prefix, id) {
50
53
  const hash = createHash("sha256").update(id).digest("hex").slice(0, 16);
@@ -133,26 +136,21 @@ export function makeHttpTransport(agent, agentDir, agentName) {
133
136
  const escapedPayload = payload.replace(/'/g, "''");
134
137
  // Check if even compressed payload is too large
135
138
  if (compressedSize > 7500) {
136
- logger.error("Response payload too large even after compression", {
139
+ logger.info("Response payload too large for NOTIFY, using direct storage", {
137
140
  requestId: rawMsg.id,
138
141
  originalSize,
139
142
  compressedSize,
140
143
  });
141
- // Send error response
142
- const errorResponse = {
144
+ // Store the response for direct retrieval by the HTTP handler
145
+ oversizedResponses.set(rawMsg.id, rawMsg);
146
+ // Send a small marker through NOTIFY to signal the response is ready
147
+ const markerResponse = {
143
148
  jsonrpc: "2.0",
144
149
  id: rawMsg.id,
145
- error: {
146
- code: -32603,
147
- message: "Response payload too large even after compression",
148
- data: {
149
- originalSize,
150
- compressedSize,
151
- },
152
- },
150
+ _oversized: true,
153
151
  };
154
- const errorPayload = JSON.stringify(errorResponse).replace(/'/g, "''");
155
- await pg.query(`NOTIFY ${channel}, '${errorPayload}'`);
152
+ const markerPayload = JSON.stringify(markerResponse).replace(/'/g, "''");
153
+ await pg.query(`NOTIFY ${channel}, '${markerPayload}'`);
156
154
  continue;
157
155
  }
158
156
  try {
@@ -555,6 +553,24 @@ export function makeHttpTransport(agent, agentDir, agentName) {
555
553
  };
556
554
  }
557
555
  }
556
+ // Check if this is an oversized response marker
557
+ if (rawResponse &&
558
+ typeof rawResponse === "object" &&
559
+ "_oversized" in rawResponse &&
560
+ rawResponse._oversized === true &&
561
+ "id" in rawResponse) {
562
+ // Retrieve the actual response from the oversized storage
563
+ const actualResponse = oversizedResponses.get(String(rawResponse.id));
564
+ if (actualResponse) {
565
+ oversizedResponses.delete(String(rawResponse.id));
566
+ logger.debug("Retrieved oversized response from storage", { id });
567
+ responseResolver(actualResponse);
568
+ return;
569
+ }
570
+ logger.error("Oversized response not found in storage", {
571
+ id: rawResponse.id,
572
+ });
573
+ }
558
574
  responseResolver(rawResponse);
559
575
  });
560
576
  // Write NDJSON line into the ACP inbound stream
@@ -1,4 +1,5 @@
1
1
  import type { PromptRequest, PromptResponse, SessionNotification } from "@agentclientprotocol/sdk";
2
+ import type { Span } from "@opentelemetry/api";
2
3
  import { z } from "zod";
3
4
  import type { ContentBlock } from "../acp-server/session-storage.js";
4
5
  export declare const zAgentRunnerParams: z.ZodObject<{
@@ -8,7 +9,7 @@ export declare const zAgentRunnerParams: z.ZodObject<{
8
9
  suggestedPrompts: z.ZodOptional<z.ZodArray<z.ZodString>>;
9
10
  systemPrompt: z.ZodNullable<z.ZodString>;
10
11
  model: z.ZodString;
11
- tools: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodUnion<readonly [z.ZodLiteral<"todo_write">, z.ZodLiteral<"get_weather">, z.ZodLiteral<"web_search">, z.ZodLiteral<"filesystem">, z.ZodLiteral<"generate_image">, z.ZodLiteral<"browser">]>, z.ZodObject<{
12
+ tools: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodUnion<readonly [z.ZodLiteral<"todo_write">, z.ZodLiteral<"get_weather">, z.ZodLiteral<"web_search">, z.ZodLiteral<"town_web_search">, z.ZodLiteral<"filesystem">, z.ZodLiteral<"generate_image">, z.ZodLiteral<"browser">]>, z.ZodObject<{
12
13
  type: z.ZodLiteral<"custom">;
13
14
  modulePath: z.ZodString;
14
15
  }, z.core.$strip>, z.ZodObject<{
@@ -66,10 +67,16 @@ export interface SessionMessage {
66
67
  content: ContentBlock[];
67
68
  timestamp: string;
68
69
  }
70
+ export interface ConfigOverrides {
71
+ model?: string;
72
+ systemPrompt?: string;
73
+ tools?: string[];
74
+ }
69
75
  export type InvokeRequest = Omit<PromptRequest, "_meta"> & {
70
76
  messageId: string;
71
77
  sessionMeta?: Record<string, unknown>;
72
78
  contextMessages?: SessionMessage[];
79
+ configOverrides?: ConfigOverrides;
73
80
  };
74
81
  export interface TokenUsage {
75
82
  inputTokens?: number;
@@ -125,6 +132,9 @@ export type ExtendedSessionUpdate = (SessionNotification["update"] & {
125
132
  sessionUpdate: "tool_overhead_info";
126
133
  toolOverheadTokens: number;
127
134
  mcpOverheadTokens: number;
135
+ } | {
136
+ sessionUpdate: "__invocation_span";
137
+ invocationSpan: Span;
128
138
  } | AgentMessageChunkWithTokens | HookNotificationUpdate;
129
139
  /** Describes an object that can run an agent definition */
130
140
  export interface AgentRunner {
@@ -1,8 +1,13 @@
1
- import { ChatAnthropic } from "@langchain/anthropic";
2
- import { HumanMessage, SystemMessage } from "@langchain/core/messages";
1
+ import Anthropic from "@anthropic-ai/sdk";
3
2
  import { createLogger } from "../../../logger.js";
3
+ import { telemetry } from "../../../telemetry/index.js";
4
4
  import { countToolResultTokens } from "../../../utils/token-counter.js";
5
5
  const logger = createLogger("tool-response-compactor");
6
+ // Create Anthropic client directly (not using LangChain)
7
+ // This ensures compaction LLM calls don't get captured by LangGraph's streaming
8
+ const anthropic = new Anthropic({
9
+ apiKey: process.env.ANTHROPIC_API_KEY,
10
+ });
6
11
  // Haiku 4.5 for compaction (fast and cost-effective)
7
12
  const COMPACTION_MODEL = "claude-haiku-4-5-20251001";
8
13
  const COMPACTION_MODEL_CONTEXT = 200000; // Haiku context size for calculating truncation limits
@@ -243,10 +248,6 @@ export const toolResponseCompactor = async (ctx) => {
243
248
  * Recursive LLM compaction with adaptive retries
244
249
  */
245
250
  async function compactWithLLM(rawOutput, toolName, toolInput, conversationContext, targetTokens) {
246
- const model = new ChatAnthropic({
247
- model: COMPACTION_MODEL,
248
- temperature: 0,
249
- });
250
251
  // Step 1: Understand what we're looking for (only need to do this once)
251
252
  const analysisPrompt = `You are helping to manage context size in an agent conversation.
252
253
 
@@ -260,12 +261,38 @@ ${conversationContext}
260
261
  Based on the tool input and conversation context, what key information is the user looking for from this tool response?
261
262
 
262
263
  Provide a concise list (3-5 bullet points) of the most important elements to extract.`;
263
- const analysisResponse = await model.invoke([
264
- new SystemMessage("You are a helpful assistant analyzing information needs."),
265
- new HumanMessage(analysisPrompt),
266
- ]);
267
- const keyRequirements = typeof analysisResponse.content === "string"
268
- ? analysisResponse.content
264
+ // Create OTEL span for analysis call
265
+ const analysisSpan = telemetry.startSpan("compaction.analysis", {
266
+ "gen_ai.operation.name": "chat",
267
+ "gen_ai.provider.name": "anthropic",
268
+ "gen_ai.request.model": COMPACTION_MODEL,
269
+ "gen_ai.system_instructions": "You are a helpful assistant analyzing information needs.",
270
+ });
271
+ let analysisResponse;
272
+ try {
273
+ analysisResponse = await telemetry.withActiveSpanAsync(analysisSpan, () => anthropic.messages.create({
274
+ model: COMPACTION_MODEL,
275
+ max_tokens: 1024,
276
+ temperature: 0,
277
+ system: "You are a helpful assistant analyzing information needs.",
278
+ messages: [
279
+ {
280
+ role: "user",
281
+ content: analysisPrompt,
282
+ },
283
+ ],
284
+ }));
285
+ // Record token usage
286
+ telemetry.recordTokenUsage(analysisResponse.usage.input_tokens, analysisResponse.usage.output_tokens, analysisSpan);
287
+ telemetry.endSpan(analysisSpan);
288
+ }
289
+ catch (error) {
290
+ telemetry.endSpan(analysisSpan, error);
291
+ throw error;
292
+ }
293
+ const firstContent = analysisResponse.content[0];
294
+ const keyRequirements = firstContent && firstContent.type === "text"
295
+ ? firstContent.text
269
296
  : "Extract relevant information";
270
297
  logger.info("Identified key requirements for compaction", {
271
298
  requirements: keyRequirements.substring(0, 200),
@@ -323,14 +350,41 @@ Your task: Further compact this data by:
323
350
 
324
351
  Return ONLY valid JSON (no explanation text).`;
325
352
  }
326
- const compactionResponse = await model.invoke([
327
- new SystemMessage("You are a helpful assistant compacting data."),
328
- new HumanMessage(compactionPrompt),
329
- ]);
353
+ // Create OTEL span for compaction call
354
+ const compactionSpan = telemetry.startSpan("compaction.compact", {
355
+ "gen_ai.operation.name": "chat",
356
+ "gen_ai.provider.name": "anthropic",
357
+ "gen_ai.request.model": COMPACTION_MODEL,
358
+ "gen_ai.system_instructions": "You are a helpful assistant compacting data.",
359
+ "compaction.attempt": attempt + 1,
360
+ "compaction.target_tokens": targetTokens,
361
+ "compaction.current_tokens": currentTokens,
362
+ });
363
+ let compactionResponse;
364
+ try {
365
+ compactionResponse = await telemetry.withActiveSpanAsync(compactionSpan, () => anthropic.messages.create({
366
+ model: COMPACTION_MODEL,
367
+ max_tokens: 4096,
368
+ temperature: 0,
369
+ system: "You are a helpful assistant compacting data.",
370
+ messages: [
371
+ {
372
+ role: "user",
373
+ content: compactionPrompt,
374
+ },
375
+ ],
376
+ }));
377
+ // Record token usage
378
+ telemetry.recordTokenUsage(compactionResponse.usage.input_tokens, compactionResponse.usage.output_tokens, compactionSpan);
379
+ telemetry.endSpan(compactionSpan);
380
+ }
381
+ catch (error) {
382
+ telemetry.endSpan(compactionSpan, error);
383
+ throw error;
384
+ }
330
385
  // Extract and parse JSON
331
- const responseText = typeof compactionResponse.content === "string"
332
- ? compactionResponse.content
333
- : JSON.stringify(compactionResponse.content);
386
+ const firstContent = compactionResponse.content[0];
387
+ const responseText = firstContent && firstContent.type === "text" ? firstContent.text : "";
334
388
  const jsonMatch = responseText.match(/```(?:json)?\n([\s\S]*?)\n```/) || [
335
389
  null,
336
390
  responseText,
@@ -1,6 +1,6 @@
1
1
  import { MultiServerMCPClient } from "@langchain/mcp-adapters";
2
2
  import { context, propagation, trace } from "@opentelemetry/api";
3
- import { loadAuthCredentials } from "@townco/core/auth";
3
+ import { getShedAuth } from "@townco/core/auth";
4
4
  import { AIMessageChunk, createAgent, ToolMessage, tool, } from "langchain";
5
5
  import { z } from "zod";
6
6
  import { SUBAGENT_MODE_KEY } from "../../acp-server/adapter";
@@ -15,7 +15,7 @@ import { makeGenerateImageTool } from "./tools/generate_image";
15
15
  import { SUBAGENT_TOOL_NAME } from "./tools/subagent";
16
16
  import { hashQuery, queryToToolCallId, subagentEvents, } from "./tools/subagent-connections";
17
17
  import { TODO_WRITE_TOOL_NAME, todoWrite } from "./tools/todo";
18
- import { makeWebSearchTools } from "./tools/web_search";
18
+ import { makeTownWebSearchTools, makeWebSearchTools } from "./tools/web_search";
19
19
  const _logger = createLogger("agent-runner");
20
20
  const getWeather = tool(({ city }) => `It's always sunny in ${city}!`, {
21
21
  name: "get_weather",
@@ -30,6 +30,7 @@ export const TOOL_REGISTRY = {
30
30
  todo_write: todoWrite,
31
31
  get_weather: getWeather,
32
32
  web_search: () => makeWebSearchTools(),
33
+ town_web_search: () => makeTownWebSearchTools(),
33
34
  filesystem: () => makeFilesystemTools(process.cwd()),
34
35
  generate_image: () => makeGenerateImageTool(),
35
36
  browser: () => makeBrowserTools(),
@@ -96,10 +97,6 @@ export class LangchainAgent {
96
97
  // Clear the buffer after flushing
97
98
  pendingToolCallNotifications.length = 0;
98
99
  }
99
- // Set session_id as a base attribute so all spans in this invocation include it
100
- telemetry.setBaseAttributes({
101
- "agent.session_id": req.sessionId,
102
- });
103
100
  const subagentUpdateQueue = [];
104
101
  let subagentUpdateResolver = null;
105
102
  const subagentMessagesQueue = [];
@@ -176,11 +173,17 @@ export class LangchainAgent {
176
173
  };
177
174
  }
178
175
  }
176
+ // Add agent.session_id as a base attribute so it propagates to all child spans
177
+ // We'll clear this in a finally block to prevent cross-contamination
178
+ telemetry.setBaseAttributes({
179
+ "agent.session_id": req.sessionId,
180
+ });
179
181
  // Start telemetry span for entire invocation
180
182
  const invocationSpan = telemetry.startSpan("agent.invoke", {
181
183
  "agent.model": this.definition.model,
182
184
  "agent.subagent": meta?.[SUBAGENT_MODE_KEY] === true,
183
185
  "agent.message_id": req.messageId,
186
+ "agent.session_id": req.sessionId,
184
187
  }, parentContext);
185
188
  // Create a context with the invocation span as active
186
189
  // This will be used when creating child spans (tool calls)
@@ -192,7 +195,29 @@ export class LangchainAgent {
192
195
  sessionId: req.sessionId,
193
196
  messageId: req.messageId,
194
197
  });
198
+ // Yield the invocation span to the adapter so it can use it for parenting hook spans
199
+ if (invocationSpan) {
200
+ yield {
201
+ sessionUpdate: "__invocation_span",
202
+ invocationSpan,
203
+ };
204
+ }
205
+ // Declare otelCallbacks outside try block so it's accessible in catch
206
+ let otelCallbacks = null;
195
207
  try {
208
+ // Determine effective model early so we can detect provider for callbacks
209
+ // Use override model if provided (Town Hall comparison feature)
210
+ const effectiveModel = req.configOverrides?.model ?? this.definition.model;
211
+ const provider = detectProvider(effectiveModel);
212
+ // Create OTEL callbacks for instrumentation early so we can use them during tool wrapping
213
+ // Track iteration index across LLM calls in this invocation
214
+ const iterationIndexRef = { current: 0 };
215
+ otelCallbacks = makeOtelCallbacks({
216
+ provider,
217
+ model: effectiveModel,
218
+ parentContext: invocationContext,
219
+ iterationIndexRef,
220
+ });
196
221
  // Track todo_write tool call IDs to suppress their tool_call notifications
197
222
  const todoWriteToolCallIds = new Set();
198
223
  // --------------------------------------------------------------------------
@@ -391,19 +416,34 @@ export class LangchainAgent {
391
416
  : wrappedTools;
392
417
  // Wrap tools with tracing so each tool executes within its own span context.
393
418
  // This ensures subagent spans are children of the Task tool span.
394
- const finalTools = filteredTools.map((t) => wrapToolWithTracing(t));
419
+ // Pass the context getter so tools can nest under the current iteration span.
420
+ let finalTools = filteredTools.map((t) => wrapToolWithTracing(t, otelCallbacks?.getCurrentIterationContext ??
421
+ (() => invocationContext)));
422
+ // Apply tool overrides if provided (Town Hall comparison feature)
423
+ if (req.configOverrides?.tools && req.configOverrides.tools.length > 0) {
424
+ const allowedToolNames = new Set(req.configOverrides.tools);
425
+ finalTools = finalTools.filter((t) => allowedToolNames.has(t.name));
426
+ _logger.debug("Applied tool override filter", {
427
+ requested: req.configOverrides.tools,
428
+ filtered: finalTools.map((t) => t.name),
429
+ });
430
+ }
395
431
  // Create the model instance using the factory
396
432
  // This detects the provider from the model string:
397
433
  // - "gemini-2.0-flash" → Google Generative AI
398
434
  // - "vertex-gemini-2.0-flash" → Vertex AI (strips prefix)
399
435
  // - "claude-sonnet-4-5-20250929" → Anthropic
400
- const model = createModelFromString(this.definition.model);
436
+ const model = createModelFromString(effectiveModel);
401
437
  const agentConfig = {
402
438
  model,
403
439
  tools: finalTools,
404
440
  };
405
- if (this.definition.systemPrompt) {
406
- agentConfig.systemPrompt = this.definition.systemPrompt;
441
+ // Use override system prompt if provided (Town Hall comparison feature)
442
+ const effectiveSystemPrompt = req.configOverrides?.systemPrompt !== undefined
443
+ ? req.configOverrides.systemPrompt
444
+ : this.definition.systemPrompt;
445
+ if (effectiveSystemPrompt) {
446
+ agentConfig.systemPrompt = effectiveSystemPrompt;
407
447
  }
408
448
  // Inject system prompt with optional TodoWrite instructions
409
449
  const hasTodoWrite = builtInNames.includes("todo_write");
@@ -411,8 +451,6 @@ export class LangchainAgent {
411
451
  agentConfig.systemPrompt = `${agentConfig.systemPrompt ?? ""}\n\n${TODO_WRITE_INSTRUCTIONS}`;
412
452
  }
413
453
  const agent = createAgent(agentConfig);
414
- // Add logging callbacks for model requests
415
- const provider = detectProvider(this.definition.model);
416
454
  // Build messages from context history if available, otherwise use just the prompt
417
455
  let messages;
418
456
  // Helper to convert content blocks to LangChain format
@@ -503,12 +541,6 @@ export class LangchainAgent {
503
541
  },
504
542
  ];
505
543
  }
506
- // Create OTEL callbacks for instrumentation
507
- const otelCallbacks = makeOtelCallbacks({
508
- provider,
509
- model: this.definition.model,
510
- parentContext: invocationContext,
511
- });
512
544
  // Create the stream within the invocation context so AsyncLocalStorage
513
545
  // propagates the context to all tool executions and callbacks
514
546
  const stream = context.with(invocationContext, () => agent.stream({ messages }, {
@@ -639,6 +671,7 @@ export class LangchainAgent {
639
671
  const matchingTool = finalTools.find((t) => t.name === toolCall.name);
640
672
  let prettyName = matchingTool?.prettyName;
641
673
  const icon = matchingTool?.icon;
674
+ const verbiage = matchingTool?.verbiage;
642
675
  // For the Task tool, use the displayName (or agentName as fallback) as the prettyName
643
676
  if (toolCall.name === SUBAGENT_TOOL_NAME &&
644
677
  toolCall.args &&
@@ -680,6 +713,7 @@ export class LangchainAgent {
680
713
  messageId: req.messageId,
681
714
  ...(prettyName ? { prettyName } : {}),
682
715
  ...(icon ? { icon } : {}),
716
+ ...(verbiage ? { verbiage } : {}),
683
717
  ...(batchId ? { batchId } : {}),
684
718
  },
685
719
  });
@@ -698,6 +732,7 @@ export class LangchainAgent {
698
732
  messageId: req.messageId,
699
733
  ...(prettyName ? { prettyName } : {}),
700
734
  ...(icon ? { icon } : {}),
735
+ ...(verbiage ? { verbiage } : {}),
701
736
  ...(batchId ? { batchId } : {}),
702
737
  },
703
738
  });
@@ -921,6 +956,8 @@ export class LangchainAgent {
921
956
  if (subagentUpdateResolver) {
922
957
  subagentUpdateResolver = null;
923
958
  }
959
+ // Clean up any remaining iteration span
960
+ otelCallbacks?.cleanup();
924
961
  // Log successful completion
925
962
  telemetry.log("info", "Agent invocation completed", {
926
963
  sessionId: req.sessionId,
@@ -937,6 +974,8 @@ export class LangchainAgent {
937
974
  // Clean up subagent event listeners on error
938
975
  subagentEvents.off("connection", onSubagentConnection);
939
976
  subagentEvents.off("messages", onSubagentMessages);
977
+ // Clean up any remaining iteration span
978
+ otelCallbacks?.cleanup();
940
979
  // Log error and end span with error status
941
980
  telemetry.log("error", "Agent invocation failed", {
942
981
  error: error instanceof Error ? error.message : String(error),
@@ -945,6 +984,10 @@ export class LangchainAgent {
945
984
  telemetry.endSpan(invocationSpan, error instanceof Error ? error : new Error(String(error)));
946
985
  throw error;
947
986
  }
987
+ finally {
988
+ // Clear agent.session_id from base attributes to prevent cross-contamination
989
+ telemetry.clearBaseAttribute("agent.session_id");
990
+ }
948
991
  }
949
992
  }
950
993
  const modelRequestSchema = z.object({
@@ -956,17 +999,17 @@ const makeMcpToolsClient = (mcpConfigs) => {
956
999
  const mcpServers = mcpConfigs?.map((config) => {
957
1000
  if (typeof config === "string") {
958
1001
  // String configs use the centralized MCP proxy with auth
959
- const credentials = loadAuthCredentials();
960
- if (!credentials) {
961
- throw new Error("Not logged in. Run 'town login' first to use cloud MCP servers.");
1002
+ const shedAuth = getShedAuth();
1003
+ if (!shedAuth) {
1004
+ throw new Error("Not logged in. Run 'town login' or set SHED_API_KEY to use cloud MCP servers.");
962
1005
  }
963
- const proxyUrl = process.env.MCP_PROXY_URL ?? `${credentials.shed_url}/mcp_proxy`;
1006
+ const proxyUrl = process.env.MCP_PROXY_URL ?? `${shedAuth.shedUrl}/mcp_proxy`;
964
1007
  return [
965
1008
  config,
966
1009
  {
967
1010
  url: `${proxyUrl}?server=${config}`,
968
1011
  headers: {
969
- Authorization: `Bearer ${credentials.access_token}`,
1012
+ Authorization: `Bearer ${shedAuth.accessToken}`,
970
1013
  },
971
1014
  },
972
1015
  ];
@@ -1059,18 +1102,22 @@ export { makeSubagentsTool } from "./tools/subagent.js";
1059
1102
  * This ensures the tool executes within its own span context,
1060
1103
  * so any child operations (like subagent spawning) become children
1061
1104
  * of the tool span rather than the parent invocation span.
1105
+ * @param originalTool The tool to wrap
1106
+ * @param getIterationContext Function that returns the current iteration context
1062
1107
  */
1063
- function wrapToolWithTracing(originalTool) {
1108
+ function wrapToolWithTracing(originalTool, getIterationContext) {
1064
1109
  const wrappedFunc = async (input) => {
1065
1110
  const toolInputJson = JSON.stringify(input);
1111
+ // Get the current iteration context so the tool span is created as a child
1112
+ const iterationContext = getIterationContext();
1066
1113
  const toolSpan = telemetry.startSpan("agent.tool_call", {
1067
1114
  "tool.name": originalTool.name,
1068
1115
  "tool.input": toolInputJson,
1069
- });
1116
+ }, iterationContext);
1070
1117
  // Create a context with the tool span as active
1071
1118
  const spanContext = toolSpan
1072
- ? trace.setSpan(context.active(), toolSpan)
1073
- : context.active();
1119
+ ? trace.setSpan(iterationContext, toolSpan)
1120
+ : iterationContext;
1074
1121
  try {
1075
1122
  // Execute within the tool span's context
1076
1123
  const result = await context.with(spanContext, () => originalTool.invoke(input));
@@ -1,7 +1,7 @@
1
1
  import { ChatAnthropic } from "@langchain/anthropic";
2
2
  import { ChatGoogleGenerativeAI } from "@langchain/google-genai";
3
3
  import { ChatVertexAI } from "@langchain/google-vertexai";
4
- import { loadAuthCredentials } from "@townco/core/auth";
4
+ import { getShedAuth } from "@townco/core/auth";
5
5
  import { createLogger } from "../../logger.js";
6
6
  const logger = createLogger("model-factory");
7
7
  /**
@@ -24,17 +24,14 @@ export function createModelFromString(modelString) {
24
24
  // Check for town- prefix for proxied models via shed
25
25
  if (modelString.startsWith("town-")) {
26
26
  const actualModel = modelString.slice(5); // strip "town-"
27
- const credentials = loadAuthCredentials();
28
- if (!credentials) {
29
- throw new Error("Not logged in. Run 'town login' first.");
27
+ const shedAuth = getShedAuth();
28
+ if (!shedAuth) {
29
+ throw new Error("Not logged in. Run 'town login' or set SHED_API_KEY.");
30
30
  }
31
- const shedUrl = credentials.shed_url ??
32
- process.env.TOWN_SHED_URL ??
33
- "http://localhost:3000";
34
31
  return new ChatAnthropic({
35
32
  model: actualModel,
36
- anthropicApiUrl: `${shedUrl}/api/anthropic`,
37
- apiKey: credentials.access_token,
33
+ anthropicApiUrl: `${shedAuth.shedUrl}/api/anthropic`,
34
+ apiKey: shedAuth.accessToken,
38
35
  });
39
36
  }
40
37
  // Check if the model string uses provider prefix format
@@ -4,6 +4,9 @@ export interface OtelCallbackOptions {
4
4
  provider: string;
5
5
  model: string;
6
6
  parentContext: Context;
7
+ iterationIndexRef: {
8
+ current: number;
9
+ };
7
10
  }
8
11
  /**
9
12
  * Creates OpenTelemetry callback handlers for LangChain LLM calls.
@@ -15,4 +18,7 @@ export interface OtelCallbackOptions {
15
18
  * @param opts.parentContext - The parent OTEL context to create child spans under
16
19
  * @returns CallbackHandlerMethods object that can be passed to LangChain
17
20
  */
18
- export declare function makeOtelCallbacks(opts: OtelCallbackOptions): CallbackHandlerMethods;
21
+ export declare function makeOtelCallbacks(opts: OtelCallbackOptions): CallbackHandlerMethods & {
22
+ cleanup: () => void;
23
+ getCurrentIterationContext: () => Context;
24
+ };