@townco/agent 0.1.52 → 0.1.54

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/dist/acp-server/adapter.d.ts +18 -0
  2. package/dist/acp-server/adapter.js +258 -19
  3. package/dist/acp-server/http.js +39 -1
  4. package/dist/acp-server/session-storage.d.ts +18 -1
  5. package/dist/acp-server/session-storage.js +25 -0
  6. package/dist/definition/index.d.ts +2 -2
  7. package/dist/definition/index.js +1 -0
  8. package/dist/runner/agent-runner.d.ts +11 -2
  9. package/dist/runner/langchain/index.d.ts +0 -1
  10. package/dist/runner/langchain/index.js +265 -64
  11. package/dist/runner/langchain/tools/generate_image.d.ts +28 -0
  12. package/dist/runner/langchain/tools/generate_image.js +135 -0
  13. package/dist/runner/langchain/tools/subagent.d.ts +6 -1
  14. package/dist/runner/langchain/tools/subagent.js +12 -2
  15. package/dist/runner/tools.d.ts +19 -2
  16. package/dist/runner/tools.js +9 -0
  17. package/dist/telemetry/index.js +7 -1
  18. package/dist/templates/index.d.ts +3 -0
  19. package/dist/templates/index.js +26 -4
  20. package/dist/tsconfig.tsbuildinfo +1 -1
  21. package/dist/utils/context-size-calculator.d.ts +9 -4
  22. package/dist/utils/context-size-calculator.js +23 -6
  23. package/dist/utils/tool-overhead-calculator.d.ts +30 -0
  24. package/dist/utils/tool-overhead-calculator.js +54 -0
  25. package/package.json +7 -6
  26. package/templates/index.ts +36 -5
  27. package/dist/check-jaeger.d.ts +0 -5
  28. package/dist/check-jaeger.js +0 -82
  29. package/dist/run-subagents.d.ts +0 -9
  30. package/dist/run-subagents.js +0 -110
  31. package/dist/runner/langchain/custom-stream-types.d.ts +0 -36
  32. package/dist/runner/langchain/custom-stream-types.js +0 -23
  33. package/dist/runner/langchain/tools/bash.d.ts +0 -14
  34. package/dist/runner/langchain/tools/bash.js +0 -135
  35. package/dist/scaffold/link-local.d.ts +0 -1
  36. package/dist/scaffold/link-local.js +0 -54
  37. package/dist/test-telemetry.d.ts +0 -5
  38. package/dist/test-telemetry.js +0 -88
  39. package/dist/utils/logger.d.ts +0 -39
  40. package/dist/utils/logger.js +0 -175
@@ -9,7 +9,8 @@ import { loadCustomToolModule, } from "../tool-loader.js";
9
9
  import { createModelFromString, detectProvider } from "./model-factory.js";
10
10
  import { makeOtelCallbacks } from "./otel-callbacks.js";
11
11
  import { makeFilesystemTools } from "./tools/filesystem";
12
- import { TASK_TOOL_NAME } from "./tools/subagent";
12
+ import { makeGenerateImageTool } from "./tools/generate_image";
13
+ import { SUBAGENT_TOOL_NAME } from "./tools/subagent";
13
14
  import { TODO_WRITE_TOOL_NAME, todoWrite } from "./tools/todo";
14
15
  import { makeWebSearchTools } from "./tools/web_search";
15
16
  const _logger = createLogger("agent-runner");
@@ -27,6 +28,7 @@ export const TOOL_REGISTRY = {
27
28
  get_weather: getWeather,
28
29
  web_search: () => makeWebSearchTools(),
29
30
  filesystem: () => makeFilesystemTools(process.cwd()),
31
+ generate_image: () => makeGenerateImageTool(),
30
32
  };
31
33
  // ============================================================================
32
34
  // Custom tool loading
@@ -54,7 +56,6 @@ async function loadCustomTools(modulePaths) {
54
56
  }
55
57
  export class LangchainAgent {
56
58
  definition;
57
- toolSpans = new Map();
58
59
  constructor(params) {
59
60
  this.definition = params;
60
61
  }
@@ -75,6 +76,8 @@ export class LangchainAgent {
75
76
  totalTokens: 0,
76
77
  };
77
78
  const countedMessageIds = new Set();
79
+ // Track tool calls for which we've emitted preliminary notifications (from early tool_use blocks)
80
+ const preliminaryToolCallIds = new Set();
78
81
  // Start telemetry span for entire invocation
79
82
  const invocationSpan = telemetry.startSpan("agent.invoke", {
80
83
  "agent.model": this.definition.model,
@@ -157,10 +160,41 @@ export class LangchainAgent {
157
160
  const customTools = await loadCustomTools(customToolPaths);
158
161
  enabledTools.push(...customTools);
159
162
  }
160
- // MCP tools
163
+ // Calculate tool overhead tokens for non-MCP tools
164
+ const { countTokens } = await import("../../utils/token-counter.js");
165
+ const { extractToolMetadata, estimateAllToolsOverhead } = await import("../../utils/tool-overhead-calculator.js");
166
+ // Calculate overhead for non-MCP tools (built-in, custom, filesystem)
167
+ const nonMcpToolMetadata = enabledTools.map(extractToolMetadata);
168
+ const nonMcpToolDefinitionsTokens = estimateAllToolsOverhead(nonMcpToolMetadata);
169
+ // Calculate TODO_WRITE_INSTRUCTIONS overhead if applicable
170
+ const hasTodoWriteTool = builtInNames.includes("todo_write");
171
+ const todoInstructionsTokens = hasTodoWriteTool
172
+ ? countTokens(TODO_WRITE_INSTRUCTIONS)
173
+ : 0;
174
+ // Total non-MCP tool overhead: tool definitions + TODO instructions
175
+ const toolOverheadTokens = nonMcpToolDefinitionsTokens + todoInstructionsTokens;
176
+ // MCP tools - calculate overhead separately
177
+ let mcpOverheadTokens = 0;
161
178
  if ((this.definition.mcps?.length ?? 0) > 0) {
162
- enabledTools.push(...(await makeMcpToolsClient(this.definition.mcps).getTools()));
179
+ const mcpTools = await makeMcpToolsClient(this.definition.mcps).getTools();
180
+ const mcpToolMetadata = mcpTools.map(extractToolMetadata);
181
+ mcpOverheadTokens = estimateAllToolsOverhead(mcpToolMetadata);
182
+ enabledTools.push(...mcpTools);
163
183
  }
184
+ _logger.debug("Calculated tool overhead for context sizing", {
185
+ enabledToolCount: enabledTools.length,
186
+ nonMcpToolDefinitionsTokens,
187
+ mcpToolDefinitionsTokens: mcpOverheadTokens,
188
+ todoInstructionsTokens,
189
+ totalNonMcpOverheadTokens: toolOverheadTokens,
190
+ totalMcpOverheadTokens: mcpOverheadTokens,
191
+ });
192
+ // Yield tool overhead info to adapter early in the turn
193
+ yield {
194
+ sessionUpdate: "tool_overhead_info",
195
+ toolOverheadTokens,
196
+ mcpOverheadTokens,
197
+ };
164
198
  // Wrap tools with response compaction if hook is configured
165
199
  const hooks = this.definition.hooks ?? [];
166
200
  const hasToolResponseHook = hooks.some((h) => h.type === "tool_response");
@@ -255,9 +289,12 @@ export class LangchainAgent {
255
289
  }
256
290
  // Filter tools if running in subagent mode
257
291
  const isSubagent = req.sessionMeta?.[SUBAGENT_MODE_KEY] === true;
258
- const finalTools = isSubagent
259
- ? wrappedTools.filter((t) => t.name !== TODO_WRITE_TOOL_NAME && t.name !== TASK_TOOL_NAME)
292
+ const filteredTools = isSubagent
293
+ ? wrappedTools.filter((t) => t.name !== TODO_WRITE_TOOL_NAME && t.name !== SUBAGENT_TOOL_NAME)
260
294
  : wrappedTools;
295
+ // Wrap tools with tracing so each tool executes within its own span context.
296
+ // This ensures subagent spans are children of the Task tool span.
297
+ const finalTools = filteredTools.map((t) => wrapToolWithTracing(t, req.sessionId));
261
298
  // Create the model instance using the factory
262
299
  // This detects the provider from the model string:
263
300
  // - "gemini-2.0-flash" → Google Generative AI
@@ -281,35 +318,93 @@ export class LangchainAgent {
281
318
  const provider = detectProvider(this.definition.model);
282
319
  // Build messages from context history if available, otherwise use just the prompt
283
320
  let messages;
321
+ // Helper to convert content blocks to LangChain format
322
+ // LangChain expects image_url type with data URL, not Claude's native image+source format
323
+ const convertContentBlocks = (blocks) => {
324
+ // Check if we have any image blocks
325
+ const hasImages = blocks.some((block) => block.type === "image");
326
+ if (!hasImages) {
327
+ // Simple text-only message
328
+ return blocks
329
+ .filter((block) => block.type === "text")
330
+ .map((block) => block.text)
331
+ .join("");
332
+ }
333
+ // Multi-modal message with images - return as content block array
334
+ // LangChain uses image_url type with data URL format
335
+ return blocks
336
+ .map((block) => {
337
+ if (block.type === "text") {
338
+ return {
339
+ type: "text",
340
+ text: block.text,
341
+ };
342
+ }
343
+ else if (block.type === "image") {
344
+ // Extract base64 data and media type from various formats
345
+ let base64Data;
346
+ let mediaType = "image/png";
347
+ // Check if it has the source format (Claude API format)
348
+ if ("source" in block && block.source) {
349
+ base64Data = block.source.data;
350
+ mediaType = block.source.media_type || "image/png";
351
+ }
352
+ // ACP format: { type: "image", data: "...", mimeType: "..." }
353
+ else if ("data" in block && block.data) {
354
+ base64Data = block.data;
355
+ if (block.mimeType) {
356
+ const mt = block.mimeType.toLowerCase();
357
+ if (mt === "image/jpeg" || mt === "image/jpg") {
358
+ mediaType = "image/jpeg";
359
+ }
360
+ else if (mt === "image/png") {
361
+ mediaType = "image/png";
362
+ }
363
+ else if (mt === "image/gif") {
364
+ mediaType = "image/gif";
365
+ }
366
+ else if (mt === "image/webp") {
367
+ mediaType = "image/webp";
368
+ }
369
+ }
370
+ }
371
+ if (base64Data) {
372
+ // LangChain format: image_url with data URL
373
+ return {
374
+ type: "image_url",
375
+ image_url: {
376
+ url: `data:${mediaType};base64,${base64Data}`,
377
+ },
378
+ };
379
+ }
380
+ }
381
+ return null;
382
+ })
383
+ .filter(Boolean);
384
+ };
284
385
  if (req.contextMessages && req.contextMessages.length > 0) {
285
386
  // Use context messages (already resolved from context entries)
286
387
  // Convert to LangChain format
287
388
  messages = req.contextMessages.map((msg) => ({
288
389
  type: msg.role === "user" ? "human" : "ai",
289
- // Extract text from content blocks
290
- content: msg.content
291
- .filter((block) => block.type === "text")
292
- .map((block) => block.text)
293
- .join(""),
390
+ content: convertContentBlocks(msg.content),
294
391
  }));
295
392
  // Add the current prompt as the final human message
296
- const currentPromptText = req.prompt
297
- .filter((promptMsg) => promptMsg.type === "text")
298
- .map((promptMsg) => promptMsg.text)
299
- .join("\n");
393
+ const promptContent = convertContentBlocks(req.prompt);
300
394
  messages.push({
301
395
  type: "human",
302
- content: currentPromptText,
396
+ content: promptContent,
303
397
  });
304
398
  }
305
399
  else {
306
400
  // Fallback: No context history, use just the prompt
307
- messages = req.prompt
308
- .filter((promptMsg) => promptMsg.type === "text")
309
- .map((promptMsg) => ({
310
- type: "human",
311
- content: promptMsg.text,
312
- }));
401
+ const promptContent = convertContentBlocks(req.prompt);
402
+ messages = [
403
+ {
404
+ type: "human",
405
+ content: promptContent,
406
+ },
407
+ ];
313
408
  }
314
409
  // Create OTEL callbacks for instrumentation
315
410
  const otelCallbacks = makeOtelCallbacks({
@@ -358,20 +453,15 @@ export class LangchainAgent {
358
453
  turnTokenUsage.totalTokens += tokenUsage.totalTokens ?? 0;
359
454
  countedMessageIds.add(msg.id);
360
455
  }
361
- for (const toolCall of msg.tool_calls ?? []) {
456
+ // Generate a batch ID if there are multiple tool calls (parallel execution)
457
+ const toolCalls = msg.tool_calls ?? [];
458
+ const batchId = toolCalls.length > 1
459
+ ? `batch-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
460
+ : undefined;
461
+ for (const toolCall of toolCalls) {
362
462
  if (toolCall.id == null) {
363
463
  throw new Error(`Tool call is missing id: ${JSON.stringify(toolCall)}`);
364
464
  }
365
- // Create tool span within the invocation context
366
- // This makes the tool span a child of the invocation span
367
- const toolInputJson = JSON.stringify(toolCall.args);
368
- const toolSpan = context.with(invocationContext, () => telemetry.startSpan("agent.tool_call", {
369
- "tool.name": toolCall.name,
370
- "tool.id": toolCall.id,
371
- "tool.input": toolInputJson,
372
- "agent.session_id": req.sessionId,
373
- }));
374
- this.toolSpans.set(toolCall.id, toolSpan);
375
465
  telemetry.log("info", `Tool call started: ${toolCall.name}`, {
376
466
  toolCallId: toolCall.id,
377
467
  toolName: toolCall.name,
@@ -404,22 +494,61 @@ export class LangchainAgent {
404
494
  // continue;
405
495
  //}
406
496
  const matchingTool = finalTools.find((t) => t.name === toolCall.name);
407
- const prettyName = matchingTool?.prettyName;
497
+ let prettyName = matchingTool?.prettyName;
408
498
  const icon = matchingTool?.icon;
409
- yield {
410
- sessionUpdate: "tool_call",
411
- toolCallId: toolCall.id,
412
- title: toolCall.name,
413
- kind: "other",
414
- status: "pending",
415
- rawInput: toolCall.args,
416
- ...(tokenUsage ? { tokenUsage } : {}),
417
- _meta: {
418
- messageId: req.messageId,
419
- ...(prettyName ? { prettyName } : {}),
420
- ...(icon ? { icon } : {}),
421
- },
422
- };
499
+ // For the Task tool, use the displayName (or agentName as fallback) as the prettyName
500
+ if (toolCall.name === SUBAGENT_TOOL_NAME &&
501
+ toolCall.args &&
502
+ typeof toolCall.args === "object" &&
503
+ "agentName" in toolCall.args &&
504
+ typeof toolCall.args.agentName === "string") {
505
+ const agentName = toolCall.args.agentName;
506
+ // Look up displayName from subagentConfigs in the original tool definition
507
+ // (not from matchingTool, which is a LangChain tool without subagentConfigs)
508
+ const taskTool = this.definition.tools?.find((t) => typeof t === "object" &&
509
+ t.type === "direct" &&
510
+ t.name === SUBAGENT_TOOL_NAME);
511
+ const subagentConfigs = taskTool?.subagentConfigs;
512
+ const subagentConfig = subagentConfigs?.find((config) => config.agentName === agentName);
513
+ prettyName = subagentConfig?.displayName ?? agentName;
514
+ }
515
+ // Check if we already emitted a preliminary notification from early tool_use block
516
+ const alreadyEmittedPreliminary = preliminaryToolCallIds.has(toolCall.id);
517
+ if (alreadyEmittedPreliminary) {
518
+ // Update the existing preliminary notification with full details
519
+ yield {
520
+ sessionUpdate: "tool_call_update",
521
+ toolCallId: toolCall.id,
522
+ title: toolCall.name,
523
+ rawInput: toolCall.args,
524
+ ...(tokenUsage ? { tokenUsage } : {}),
525
+ _meta: {
526
+ messageId: req.messageId,
527
+ ...(prettyName ? { prettyName } : {}),
528
+ ...(icon ? { icon } : {}),
529
+ ...(batchId ? { batchId } : {}),
530
+ },
531
+ };
532
+ }
533
+ else {
534
+ // Emit full tool_call notification (fallback for non-streaming scenarios)
535
+ yield {
536
+ sessionUpdate: "tool_call",
537
+ toolCallId: toolCall.id,
538
+ title: toolCall.name,
539
+ kind: "other",
540
+ status: "pending",
541
+ rawInput: toolCall.args,
542
+ ...(tokenUsage ? { tokenUsage } : {}),
543
+ _meta: {
544
+ messageId: req.messageId,
545
+ ...(prettyName ? { prettyName } : {}),
546
+ ...(icon ? { icon } : {}),
547
+ ...(batchId ? { batchId } : {}),
548
+ },
549
+ };
550
+ }
551
+ // Always emit in_progress status update
423
552
  yield {
424
553
  sessionUpdate: "tool_call_update",
425
554
  toolCallId: toolCall.id,
@@ -533,10 +662,26 @@ export class LangchainAgent {
533
662
  yield msgToYield;
534
663
  }
535
664
  else if (part.type === "tool_use") {
536
- // We don't care about tool use chunks -- do nothing
665
+ // Emit early notification for tool use as soon as we detect it
666
+ // The tool_use block contains { type, id, name, input }
667
+ const toolUseBlock = part;
668
+ if (toolUseBlock.id &&
669
+ toolUseBlock.name &&
670
+ !preliminaryToolCallIds.has(toolUseBlock.id)) {
671
+ preliminaryToolCallIds.add(toolUseBlock.id);
672
+ yield {
673
+ sessionUpdate: "tool_call",
674
+ toolCallId: toolUseBlock.id,
675
+ title: toolUseBlock.name,
676
+ kind: "other",
677
+ status: "pending",
678
+ rawInput: {}, // Args not available yet
679
+ _meta: { messageId: req.messageId },
680
+ };
681
+ }
537
682
  }
538
683
  else if (part.type === "input_json_delta") {
539
- // We don't care about tool use input delta chunks -- do nothing
684
+ // Input JSON delta chunks - we don't process these as tool_call is already emitted
540
685
  }
541
686
  else {
542
687
  throw new Error(`Unhandled AIMessageChunk content block type: ${part.type}\n${JSON.stringify(part)}`);
@@ -553,24 +698,22 @@ export class LangchainAgent {
553
698
  // Skip tool_call_update for todo_write tools
554
699
  continue;
555
700
  }
556
- // End telemetry span for this tool call
557
- const toolSpan = this.toolSpans.get(aiMessage.tool_call_id);
558
- if (toolSpan) {
559
- // Add tool output to span before ending
560
- telemetry.setSpanAttributes(toolSpan, {
561
- "tool.output": aiMessage.content,
562
- });
563
- telemetry.log("info", "Tool call completed", {
564
- toolCallId: aiMessage.tool_call_id,
565
- });
566
- telemetry.endSpan(toolSpan);
567
- this.toolSpans.delete(aiMessage.tool_call_id);
568
- }
701
+ // Check if the tool execution failed
702
+ // LangChain may set status: "error" OR the content may start with "Error:"
703
+ const contentLooksLikeError = typeof aiMessage.content === "string" &&
704
+ aiMessage.content.trim().startsWith("Error:");
705
+ const isError = aiMessage.status === "error" || contentLooksLikeError;
706
+ const status = isError ? "failed" : "completed";
707
+ telemetry.log(isError ? "error" : "info", `Tool call ${status}`, {
708
+ toolCallId: aiMessage.tool_call_id,
709
+ ...(isError ? { error: aiMessage.content } : {}),
710
+ });
569
711
  // Send status update (metadata only, no content)
570
712
  yield {
571
713
  sessionUpdate: "tool_call_update",
572
714
  toolCallId: aiMessage.tool_call_id,
573
- status: "completed",
715
+ status,
716
+ ...(isError ? { error: aiMessage.content } : {}),
574
717
  _meta: { messageId: req.messageId },
575
718
  };
576
719
  // Send tool output separately (via direct SSE, bypassing PostgreSQL NOTIFY)
@@ -632,6 +775,16 @@ const modelRequestSchema = z.object({
632
775
  });
633
776
  const makeMcpToolsClient = (mcpConfigs) => {
634
777
  const mcpServers = mcpConfigs?.map((config) => {
778
+ if (typeof config === "string") {
779
+ // Default to localhost:3000/mcp_proxy if not specified
780
+ const proxyUrl = process.env.MCP_PROXY_URL || "http://localhost:3000/mcp_proxy";
781
+ return [
782
+ config,
783
+ {
784
+ url: `${proxyUrl}?server=${config}`,
785
+ },
786
+ ];
787
+ }
635
788
  if (config.transport === "http") {
636
789
  return [
637
790
  config.name,
@@ -715,3 +868,51 @@ I've found some existing telemetry code. Let me mark the first todo as in_progre
715
868
  `.trim();
716
869
  // Re-export subagent tool utility
717
870
  export { makeSubagentsTool } from "./tools/subagent.js";
871
+ /**
872
+ * Wraps a LangChain tool with OpenTelemetry tracing.
873
+ * This ensures the tool executes within its own span context,
874
+ * so any child operations (like subagent spawning) become children
875
+ * of the tool span rather than the parent invocation span.
876
+ */
877
+ function wrapToolWithTracing(originalTool, sessionId) {
878
+ const wrappedFunc = async (input) => {
879
+ const toolInputJson = JSON.stringify(input);
880
+ const toolSpan = telemetry.startSpan("agent.tool_call", {
881
+ "tool.name": originalTool.name,
882
+ "tool.input": toolInputJson,
883
+ "agent.session_id": sessionId,
884
+ });
885
+ // Create a context with the tool span as active
886
+ const spanContext = toolSpan
887
+ ? trace.setSpan(context.active(), toolSpan)
888
+ : context.active();
889
+ try {
890
+ // Execute within the tool span's context
891
+ const result = await context.with(spanContext, () => originalTool.invoke(input));
892
+ const resultStr = typeof result === "string" ? result : JSON.stringify(result);
893
+ if (toolSpan) {
894
+ telemetry.setSpanAttributes(toolSpan, {
895
+ "tool.output": resultStr,
896
+ });
897
+ telemetry.endSpan(toolSpan);
898
+ }
899
+ return result;
900
+ }
901
+ catch (error) {
902
+ if (toolSpan) {
903
+ telemetry.endSpan(toolSpan, error);
904
+ }
905
+ throw error;
906
+ }
907
+ };
908
+ // Create new tool with wrapped function
909
+ const wrappedTool = tool(wrappedFunc, {
910
+ name: originalTool.name,
911
+ description: originalTool.description,
912
+ schema: originalTool.schema,
913
+ });
914
+ // Preserve metadata
915
+ wrappedTool.prettyName = originalTool.prettyName;
916
+ wrappedTool.icon = originalTool.icon;
917
+ return wrappedTool;
918
+ }
@@ -0,0 +1,28 @@
1
+ import { z } from "zod";
2
+ interface GenerateImageResult {
3
+ success: boolean;
4
+ filePath?: string | undefined;
5
+ fileName?: string | undefined;
6
+ imageUrl?: string | undefined;
7
+ textResponse?: string | undefined;
8
+ mimeType?: string | undefined;
9
+ error?: string | undefined;
10
+ }
11
+ export declare function makeGenerateImageTool(): import("langchain").DynamicStructuredTool<z.ZodObject<{
12
+ prompt: z.ZodString;
13
+ aspectRatio: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
14
+ "1:1": "1:1";
15
+ "3:4": "3:4";
16
+ "4:3": "4:3";
17
+ "9:16": "9:16";
18
+ "16:9": "16:9";
19
+ "5:4": "5:4";
20
+ }>>>;
21
+ }, z.core.$strip>, {
22
+ prompt: string;
23
+ aspectRatio: "1:1" | "3:4" | "4:3" | "9:16" | "16:9" | "5:4";
24
+ }, {
25
+ prompt: string;
26
+ aspectRatio?: "1:1" | "3:4" | "4:3" | "9:16" | "16:9" | "5:4" | undefined;
27
+ }, GenerateImageResult>;
28
+ export {};
@@ -0,0 +1,135 @@
1
+ import { mkdir, writeFile } from "node:fs/promises";
2
+ import { join } from "node:path";
3
+ import { GoogleGenAI } from "@google/genai";
4
+ import { tool } from "langchain";
5
+ import { z } from "zod";
6
+ let _genaiClient = null;
7
+ function getGenAIClient() {
8
+ if (_genaiClient) {
9
+ return _genaiClient;
10
+ }
11
+ const apiKey = process.env.GEMINI_API_KEY || process.env.GOOGLE_API_KEY;
12
+ if (!apiKey) {
13
+ throw new Error("GEMINI_API_KEY or GOOGLE_API_KEY environment variable is required to use the generate_image tool. " +
14
+ "Please set one of them to your Google AI API key.");
15
+ }
16
+ _genaiClient = new GoogleGenAI({ apiKey });
17
+ return _genaiClient;
18
+ }
19
+ export function makeGenerateImageTool() {
20
+ const generateImage = tool(async ({ prompt, aspectRatio = "1:1" }) => {
21
+ try {
22
+ const client = getGenAIClient();
23
+ // Use Gemini 3 Pro Image for image generation
24
+ // Note: imageConfig is a valid API option but not yet in the TypeScript types
25
+ // biome-ignore lint/suspicious/noExplicitAny: imageConfig not yet typed in @google/genai
26
+ const config = {
27
+ responseModalities: ["TEXT", "IMAGE"],
28
+ imageConfig: {
29
+ aspectRatio: aspectRatio,
30
+ },
31
+ };
32
+ const response = await client.models.generateContent({
33
+ model: "gemini-3-pro-image-preview",
34
+ contents: [{ text: prompt }],
35
+ config,
36
+ });
37
+ if (!response.candidates || response.candidates.length === 0) {
38
+ return {
39
+ success: false,
40
+ error: "No response from the model. The request may have been filtered.",
41
+ };
42
+ }
43
+ const candidate = response.candidates[0];
44
+ if (!candidate) {
45
+ return {
46
+ success: false,
47
+ error: "No candidate in the response.",
48
+ };
49
+ }
50
+ const parts = candidate.content?.parts;
51
+ if (!parts || parts.length === 0) {
52
+ return {
53
+ success: false,
54
+ error: "No content parts in the response.",
55
+ };
56
+ }
57
+ let imageData;
58
+ let textResponse;
59
+ let mimeType;
60
+ for (const part of parts) {
61
+ if (part.text) {
62
+ textResponse = part.text;
63
+ }
64
+ else if (part.inlineData) {
65
+ imageData = part.inlineData.data;
66
+ mimeType = part.inlineData.mimeType || "image/png";
67
+ }
68
+ }
69
+ if (!imageData) {
70
+ return {
71
+ success: false,
72
+ error: "No image was generated in the response.",
73
+ ...(textResponse ? { textResponse } : {}),
74
+ };
75
+ }
76
+ // Save image to disk in generated-images directory (relative to cwd)
77
+ const outputDir = join(process.cwd(), "generated-images");
78
+ await mkdir(outputDir, { recursive: true });
79
+ // Generate unique filename
80
+ const timestamp = Date.now();
81
+ const extension = mimeType === "image/jpeg" ? "jpg" : "png";
82
+ const fileName = `image-${timestamp}.${extension}`;
83
+ const filePath = join(outputDir, fileName);
84
+ // Save image to file
85
+ const buffer = Buffer.from(imageData, "base64");
86
+ await writeFile(filePath, buffer);
87
+ // Create URL for the static file server
88
+ // The agent HTTP server serves static files from the agent directory
89
+ const port = process.env.PORT || "3100";
90
+ const imageUrl = `http://localhost:${port}/static/generated-images/${fileName}`;
91
+ return {
92
+ success: true,
93
+ filePath,
94
+ fileName,
95
+ imageUrl,
96
+ ...(mimeType ? { mimeType } : {}),
97
+ ...(textResponse ? { textResponse } : {}),
98
+ };
99
+ }
100
+ catch (error) {
101
+ const errorMessage = error instanceof Error ? error.message : "Unknown error occurred";
102
+ return {
103
+ success: false,
104
+ error: `Image generation failed: ${errorMessage}`,
105
+ };
106
+ }
107
+ }, {
108
+ name: "GenerateImage",
109
+ description: "Generate an image based on a text prompt using Google's Gemini image generation model. " +
110
+ "Returns an imageUrl that can be displayed to the user. After calling this tool, " +
111
+ "include the imageUrl in your response as a markdown image like ![Description](imageUrl) " +
112
+ "so the user can see the generated image.\n" +
113
+ "- Creates images from detailed text descriptions\n" +
114
+ "- Supports various aspect ratios for different use cases\n" +
115
+ "- Be specific in prompts about style, composition, colors, and subjects\n" +
116
+ "\n" +
117
+ "Usage notes:\n" +
118
+ " - Provide detailed, specific prompts for best results\n" +
119
+ " - The generated image is saved and served via URL\n" +
120
+ " - Always display the result using markdown: ![description](imageUrl)\n",
121
+ schema: z.object({
122
+ prompt: z
123
+ .string()
124
+ .describe("A detailed description of the image to generate. Be specific about style, composition, colors, and subjects."),
125
+ aspectRatio: z
126
+ .enum(["1:1", "3:4", "4:3", "9:16", "16:9", "5:4"])
127
+ .optional()
128
+ .default("1:1")
129
+ .describe("The aspect ratio of the generated image."),
130
+ }),
131
+ });
132
+ generateImage.prettyName = "Generate Image";
133
+ generateImage.icon = "Image";
134
+ return generateImage;
135
+ }
@@ -2,19 +2,24 @@ import type { DirectTool } from "../../tools.js";
2
2
  /**
3
3
  * Name of the Task tool created by makeSubagentsTool
4
4
  */
5
- export declare const TASK_TOOL_NAME = "Task";
5
+ export declare const SUBAGENT_TOOL_NAME = "subagent";
6
6
  /**
7
7
  * Configuration for a single subagent - supports two variants:
8
8
  * 1. Agent name with optional working directory
9
9
  * 2. Direct path to agent's index.ts file
10
+ *
11
+ * The optional displayName field provides a human-readable name for the UI.
12
+ * If not provided, agentName will be used for display.
10
13
  */
11
14
  type SubagentConfig = {
12
15
  agentName: string;
13
16
  description: string;
17
+ displayName?: string;
14
18
  cwd?: string;
15
19
  } | {
16
20
  agentName: string;
17
21
  description: string;
22
+ displayName?: string;
18
23
  path: string;
19
24
  };
20
25
  /**