@oh-my-pi/pi-coding-agent 3.20.1 → 3.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. package/CHANGELOG.md +107 -8
  2. package/docs/custom-tools.md +3 -3
  3. package/docs/extensions.md +226 -220
  4. package/docs/hooks.md +2 -2
  5. package/docs/sdk.md +50 -53
  6. package/examples/custom-tools/README.md +2 -17
  7. package/examples/extensions/README.md +76 -74
  8. package/examples/extensions/todo.ts +2 -5
  9. package/examples/hooks/custom-compaction.ts +2 -4
  10. package/examples/hooks/handoff.ts +1 -1
  11. package/examples/hooks/qna.ts +1 -1
  12. package/examples/sdk/02-custom-model.ts +1 -1
  13. package/examples/sdk/README.md +7 -11
  14. package/package.json +6 -6
  15. package/src/cli/args.ts +9 -6
  16. package/src/cli/file-processor.ts +1 -1
  17. package/src/cli/list-models.ts +1 -1
  18. package/src/core/agent-session.ts +16 -5
  19. package/src/core/auth-storage.ts +1 -1
  20. package/src/core/compaction/branch-summarization.ts +2 -2
  21. package/src/core/compaction/compaction.ts +2 -2
  22. package/src/core/compaction/utils.ts +1 -1
  23. package/src/core/custom-tools/types.ts +1 -1
  24. package/src/core/custom-tools/wrapper.ts +0 -1
  25. package/src/core/extensions/index.ts +1 -6
  26. package/src/core/extensions/runner.ts +1 -1
  27. package/src/core/extensions/types.ts +1 -1
  28. package/src/core/extensions/wrapper.ts +1 -8
  29. package/src/core/file-mentions.ts +5 -8
  30. package/src/core/hooks/runner.ts +2 -2
  31. package/src/core/hooks/types.ts +1 -1
  32. package/src/core/messages.ts +1 -1
  33. package/src/core/model-registry.ts +1 -1
  34. package/src/core/model-resolver.ts +1 -1
  35. package/src/core/sdk.ts +64 -105
  36. package/src/core/session-manager.ts +18 -22
  37. package/src/core/settings-manager.ts +66 -1
  38. package/src/core/slash-commands.ts +12 -5
  39. package/src/core/system-prompt.ts +49 -36
  40. package/src/core/title-generator.ts +2 -2
  41. package/src/core/tools/ask.ts +98 -4
  42. package/src/core/tools/bash-interceptor.ts +11 -4
  43. package/src/core/tools/bash.ts +121 -5
  44. package/src/core/tools/context.ts +7 -0
  45. package/src/core/tools/edit-diff.ts +73 -24
  46. package/src/core/tools/edit.ts +221 -34
  47. package/src/core/tools/exa/render.ts +4 -16
  48. package/src/core/tools/find.ts +149 -5
  49. package/src/core/tools/gemini-image.ts +279 -56
  50. package/src/core/tools/git.ts +17 -3
  51. package/src/core/tools/grep.ts +185 -5
  52. package/src/core/tools/index.test.ts +180 -0
  53. package/src/core/tools/index.ts +96 -242
  54. package/src/core/tools/ls.ts +133 -5
  55. package/src/core/tools/lsp/index.ts +32 -29
  56. package/src/core/tools/lsp/render.ts +21 -22
  57. package/src/core/tools/notebook.ts +112 -4
  58. package/src/core/tools/output.ts +175 -15
  59. package/src/core/tools/read.ts +127 -25
  60. package/src/core/tools/render-utils.ts +241 -0
  61. package/src/core/tools/renderers.ts +40 -828
  62. package/src/core/tools/review.ts +26 -25
  63. package/src/core/tools/rulebook.ts +11 -3
  64. package/src/core/tools/task/agents.ts +28 -7
  65. package/src/core/tools/task/discovery.ts +0 -6
  66. package/src/core/tools/task/executor.ts +264 -254
  67. package/src/core/tools/task/index.ts +48 -208
  68. package/src/core/tools/task/render.ts +26 -11
  69. package/src/core/tools/task/types.ts +7 -12
  70. package/src/core/tools/task/worker-protocol.ts +17 -0
  71. package/src/core/tools/task/worker.ts +238 -0
  72. package/src/core/tools/truncate.ts +27 -1
  73. package/src/core/tools/web-fetch.ts +25 -49
  74. package/src/core/tools/web-search/index.ts +132 -46
  75. package/src/core/tools/web-search/providers/anthropic.ts +7 -2
  76. package/src/core/tools/web-search/providers/exa.ts +2 -1
  77. package/src/core/tools/web-search/providers/perplexity.ts +6 -1
  78. package/src/core/tools/web-search/render.ts +6 -4
  79. package/src/core/tools/web-search/types.ts +13 -0
  80. package/src/core/tools/write.ts +96 -14
  81. package/src/core/voice.ts +1 -1
  82. package/src/discovery/helpers.test.ts +1 -1
  83. package/src/index.ts +5 -16
  84. package/src/main.ts +5 -5
  85. package/src/modes/interactive/components/assistant-message.ts +1 -1
  86. package/src/modes/interactive/components/custom-message.ts +1 -1
  87. package/src/modes/interactive/components/extensions/inspector-panel.ts +25 -22
  88. package/src/modes/interactive/components/extensions/state-manager.ts +12 -0
  89. package/src/modes/interactive/components/footer.ts +1 -1
  90. package/src/modes/interactive/components/hook-message.ts +1 -1
  91. package/src/modes/interactive/components/model-selector.ts +1 -1
  92. package/src/modes/interactive/components/oauth-selector.ts +1 -1
  93. package/src/modes/interactive/components/settings-defs.ts +49 -0
  94. package/src/modes/interactive/components/status-line.ts +1 -1
  95. package/src/modes/interactive/components/tool-execution.ts +93 -538
  96. package/src/modes/interactive/interactive-mode.ts +19 -7
  97. package/src/modes/interactive/theme/theme.ts +4 -4
  98. package/src/modes/print-mode.ts +1 -1
  99. package/src/modes/rpc/rpc-client.ts +1 -1
  100. package/src/modes/rpc/rpc-types.ts +1 -1
  101. package/src/prompts/system-prompt.md +4 -0
  102. package/src/prompts/task.md +0 -7
  103. package/src/prompts/tools/gemini-image.md +5 -1
  104. package/src/prompts/tools/output.md +6 -2
  105. package/src/prompts/tools/task.md +68 -0
  106. package/src/prompts/tools/web-fetch.md +1 -0
  107. package/src/prompts/tools/web-search.md +2 -0
  108. package/src/utils/image-convert.ts +8 -2
  109. package/src/utils/image-magick.ts +247 -0
  110. package/src/utils/image-resize.ts +53 -13
  111. package/examples/custom-tools/question/index.ts +0 -84
  112. package/examples/custom-tools/subagent/README.md +0 -172
  113. package/examples/custom-tools/subagent/agents/planner.md +0 -37
  114. package/examples/custom-tools/subagent/agents/scout.md +0 -50
  115. package/examples/custom-tools/subagent/agents/worker.md +0 -24
  116. package/examples/custom-tools/subagent/agents.ts +0 -156
  117. package/examples/custom-tools/subagent/commands/implement-and-review.md +0 -10
  118. package/examples/custom-tools/subagent/commands/implement.md +0 -10
  119. package/examples/custom-tools/subagent/commands/scout-and-plan.md +0 -9
  120. package/examples/custom-tools/subagent/index.ts +0 -1002
  121. package/examples/sdk/05-tools.ts +0 -94
  122. package/examples/sdk/12-full-control.ts +0 -95
  123. package/src/prompts/browser.md +0 -71
@@ -1,12 +1,28 @@
1
1
  import { existsSync, type Stats, statSync } from "node:fs";
2
2
  import path from "node:path";
3
3
  import type { AgentTool } from "@oh-my-pi/pi-agent-core";
4
+ import type { Component } from "@oh-my-pi/pi-tui";
5
+ import { Text } from "@oh-my-pi/pi-tui";
4
6
  import { Type } from "@sinclair/typebox";
5
7
  import { globSync } from "glob";
8
+ import { getLanguageFromPath, type Theme } from "../../modes/interactive/theme/theme";
6
9
  import findDescription from "../../prompts/tools/find.md" with { type: "text" };
7
10
  import { ensureTool } from "../../utils/tools-manager";
11
+ import type { RenderResultOptions } from "../custom-tools/types";
8
12
  import { untilAborted } from "../utils";
13
+ import type { ToolSession } from "./index";
9
14
  import { resolveToCwd } from "./path-utils";
15
+ import {
16
+ formatCount,
17
+ formatEmptyMessage,
18
+ formatErrorMessage,
19
+ formatExpandHint,
20
+ formatMeta,
21
+ formatMoreItems,
22
+ formatScope,
23
+ formatTruncationSuffix,
24
+ PREVIEW_LIMITS,
25
+ } from "./render-utils";
10
26
  import { DEFAULT_MAX_BYTES, formatSize, type TruncationResult, truncateHead } from "./truncate";
11
27
 
12
28
  const findSchema = Type.Object({
@@ -40,7 +56,7 @@ export interface FindToolDetails {
40
56
  error?: string;
41
57
  }
42
58
 
43
- export function createFindTool(cwd: string): AgentTool<typeof findSchema> {
59
+ export function createFindTool(session: ToolSession): AgentTool<typeof findSchema> {
44
60
  return {
45
61
  name: "find",
46
62
  label: "Find",
@@ -72,9 +88,9 @@ export function createFindTool(cwd: string): AgentTool<typeof findSchema> {
72
88
  throw new Error("fd is not available and could not be downloaded");
73
89
  }
74
90
 
75
- const searchPath = resolveToCwd(searchDir || ".", cwd);
91
+ const searchPath = resolveToCwd(searchDir || ".", session.cwd);
76
92
  const scopePath = (() => {
77
- const relative = path.relative(cwd, searchPath).replace(/\\/g, "/");
93
+ const relative = path.relative(session.cwd, searchPath).replace(/\\/g, "/");
78
94
  return relative.length === 0 ? "." : relative;
79
95
  })();
80
96
  const effectiveLimit = limit ?? DEFAULT_LIMIT;
@@ -246,5 +262,133 @@ export function createFindTool(cwd: string): AgentTool<typeof findSchema> {
246
262
  };
247
263
  }
248
264
 
249
- /** Default find tool using process.cwd() - for backwards compatibility */
250
- export const findTool = createFindTool(process.cwd());
265
+ // =============================================================================
266
+ // TUI Renderer
267
+ // =============================================================================
268
+
269
+ interface FindRenderArgs {
270
+ pattern: string;
271
+ path?: string;
272
+ type?: string;
273
+ hidden?: boolean;
274
+ sortByMtime?: boolean;
275
+ limit?: number;
276
+ }
277
+
278
+ const COLLAPSED_LIST_LIMIT = PREVIEW_LIMITS.COLLAPSED_ITEMS;
279
+
280
+ export const findToolRenderer = {
281
+ renderCall(args: FindRenderArgs, uiTheme: Theme): Component {
282
+ const label = uiTheme.fg("toolTitle", uiTheme.bold("Find"));
283
+ let text = `${label} ${uiTheme.fg("accent", args.pattern || "*")}`;
284
+
285
+ const meta: string[] = [];
286
+ if (args.path) meta.push(`in ${args.path}`);
287
+ if (args.type && args.type !== "all") meta.push(`type:${args.type}`);
288
+ if (args.hidden) meta.push("hidden");
289
+ if (args.sortByMtime) meta.push("sort:mtime");
290
+ if (args.limit !== undefined) meta.push(`limit:${args.limit}`);
291
+
292
+ text += formatMeta(meta, uiTheme);
293
+
294
+ return new Text(text, 0, 0);
295
+ },
296
+
297
+ renderResult(
298
+ result: { content: Array<{ type: string; text?: string }>; details?: FindToolDetails },
299
+ { expanded }: RenderResultOptions,
300
+ uiTheme: Theme,
301
+ ): Component {
302
+ const details = result.details;
303
+
304
+ if (details?.error) {
305
+ return new Text(formatErrorMessage(details.error, uiTheme), 0, 0);
306
+ }
307
+
308
+ const hasDetailedData = details?.fileCount !== undefined;
309
+ const textContent = result.content?.find((c) => c.type === "text")?.text;
310
+
311
+ if (!hasDetailedData) {
312
+ if (!textContent || textContent.includes("No files matching") || textContent.trim() === "") {
313
+ return new Text(formatEmptyMessage("No files found", uiTheme), 0, 0);
314
+ }
315
+
316
+ const lines = textContent.split("\n").filter((l) => l.trim());
317
+ const maxLines = expanded ? lines.length : Math.min(lines.length, COLLAPSED_LIST_LIMIT);
318
+ const displayLines = lines.slice(0, maxLines);
319
+ const remaining = lines.length - maxLines;
320
+ const hasMore = remaining > 0;
321
+
322
+ const icon = uiTheme.styledSymbol("status.success", "success");
323
+ const summary = formatCount("file", lines.length);
324
+ const expandHint = formatExpandHint(expanded, hasMore, uiTheme);
325
+ let text = `${icon} ${uiTheme.fg("dim", summary)}${expandHint}`;
326
+
327
+ for (let i = 0; i < displayLines.length; i++) {
328
+ const isLast = i === displayLines.length - 1 && remaining === 0;
329
+ const branch = isLast ? uiTheme.tree.last : uiTheme.tree.branch;
330
+ text += `\n ${uiTheme.fg("dim", branch)} ${uiTheme.fg("accent", displayLines[i])}`;
331
+ }
332
+ if (remaining > 0) {
333
+ text += `\n ${uiTheme.fg("dim", uiTheme.tree.last)} ${uiTheme.fg("muted", formatMoreItems(remaining, "file", uiTheme))}`;
334
+ }
335
+ return new Text(text, 0, 0);
336
+ }
337
+
338
+ const fileCount = details?.fileCount ?? 0;
339
+ const truncated = details?.truncated ?? details?.truncation?.truncated ?? false;
340
+ const files = details?.files ?? [];
341
+
342
+ if (fileCount === 0) {
343
+ return new Text(formatEmptyMessage("No files found", uiTheme), 0, 0);
344
+ }
345
+
346
+ const icon = uiTheme.styledSymbol("status.success", "success");
347
+ const summaryText = formatCount("file", fileCount);
348
+ const scopeLabel = formatScope(details?.scopePath, uiTheme);
349
+ const maxFiles = expanded ? files.length : Math.min(files.length, COLLAPSED_LIST_LIMIT);
350
+ const hasMoreFiles = files.length > maxFiles;
351
+ const expandHint = formatExpandHint(expanded, hasMoreFiles, uiTheme);
352
+
353
+ let text = `${icon} ${uiTheme.fg("dim", summaryText)}${formatTruncationSuffix(truncated, uiTheme)}${scopeLabel}${expandHint}`;
354
+
355
+ const truncationReasons: string[] = [];
356
+ if (details?.resultLimitReached) {
357
+ truncationReasons.push(`limit ${details.resultLimitReached} results`);
358
+ }
359
+ if (details?.truncation?.truncated) {
360
+ truncationReasons.push("size limit");
361
+ }
362
+
363
+ const hasTruncation = truncationReasons.length > 0;
364
+
365
+ if (files.length > 0) {
366
+ for (let i = 0; i < maxFiles; i++) {
367
+ const isLast = i === maxFiles - 1 && !hasMoreFiles && !hasTruncation;
368
+ const branch = isLast ? uiTheme.tree.last : uiTheme.tree.branch;
369
+ const entry = files[i];
370
+ const isDir = entry.endsWith("/");
371
+ const entryPath = isDir ? entry.slice(0, -1) : entry;
372
+ const lang = isDir ? undefined : getLanguageFromPath(entryPath);
373
+ const entryIcon = isDir
374
+ ? uiTheme.fg("accent", uiTheme.icon.folder)
375
+ : uiTheme.fg("muted", uiTheme.getLangIcon(lang));
376
+ text += `\n ${uiTheme.fg("dim", branch)} ${entryIcon} ${uiTheme.fg("accent", entry)}`;
377
+ }
378
+
379
+ if (hasMoreFiles) {
380
+ const moreFilesBranch = hasTruncation ? uiTheme.tree.branch : uiTheme.tree.last;
381
+ text += `\n ${uiTheme.fg("dim", moreFilesBranch)} ${uiTheme.fg(
382
+ "muted",
383
+ formatMoreItems(files.length - maxFiles, "file", uiTheme),
384
+ )}`;
385
+ }
386
+ }
387
+
388
+ if (hasTruncation) {
389
+ text += `\n ${uiTheme.fg("dim", uiTheme.tree.last)} ${uiTheme.fg("warning", `truncated: ${truncationReasons.join(", ")}`)}`;
390
+ }
391
+
392
+ return new Text(text, 0, 0);
393
+ },
394
+ };
@@ -1,4 +1,7 @@
1
- import type { ImageContent, TextContent } from "@oh-my-pi/pi-ai";
1
+ import * as crypto from "node:crypto";
2
+ import * as fs from "node:fs";
3
+ import { tmpdir } from "node:os";
4
+ import { join } from "node:path";
2
5
  import { type Static, Type } from "@sinclair/typebox";
3
6
  import geminiImageDescription from "../../prompts/tools/gemini-image.md" with { type: "text" };
4
7
  import { detectSupportedImageMimeTypeFromFile } from "../../utils/mime";
@@ -7,10 +10,17 @@ import { untilAborted } from "../utils";
7
10
  import { resolveReadPath } from "./path-utils";
8
11
  import { getEnv } from "./web-search/auth";
9
12
 
10
- const DEFAULT_MODEL = "gemini-2.5-flash-image";
13
+ const DEFAULT_MODEL = "gemini-3-pro-image-preview";
14
+ const DEFAULT_OPENROUTER_MODEL = "google/gemini-3-pro-image-preview";
11
15
  const DEFAULT_TIMEOUT_SECONDS = 120;
12
16
  const MAX_IMAGE_SIZE = 20 * 1024 * 1024;
13
17
 
18
+ type ImageProvider = "gemini" | "openrouter";
19
+ interface ImageApiKey {
20
+ provider: ImageProvider;
21
+ apiKey: string;
22
+ }
23
+
14
24
  const responseModalitySchema = Type.Union([Type.Literal("Image"), Type.Literal("Text")]);
15
25
  const aspectRatioSchema = Type.Union(
16
26
  [Type.Literal("1:1"), Type.Literal("3:4"), Type.Literal("4:3"), Type.Literal("9:16"), Type.Literal("16:9")],
@@ -34,13 +44,7 @@ export const geminiImageSchema = Type.Object(
34
44
  prompt: Type.String({ description: "Text prompt for image generation or editing." }),
35
45
  model: Type.Optional(
36
46
  Type.String({
37
- description: `Gemini image model. Default: ${DEFAULT_MODEL} (Nano Banana).`,
38
- }),
39
- ),
40
- response_modalities: Type.Optional(
41
- Type.Array(responseModalitySchema, {
42
- description: 'Response modalities (default: ["Image"]).',
43
- minItems: 1,
47
+ description: `Image model. Default: ${DEFAULT_MODEL} (direct Gemini) or ${DEFAULT_OPENROUTER_MODEL} (OpenRouter).`,
44
48
  }),
45
49
  ),
46
50
  aspect_ratio: Type.Optional(aspectRatioSchema),
@@ -100,9 +104,35 @@ interface GeminiGenerateContentResponse {
100
104
  usageMetadata?: GeminiUsageMetadata;
101
105
  }
102
106
 
107
+ interface OpenRouterImageUrl {
108
+ url: string;
109
+ }
110
+
111
+ interface OpenRouterContentPart {
112
+ type: "text" | "image_url";
113
+ text?: string;
114
+ image_url?: OpenRouterImageUrl;
115
+ }
116
+
117
+ interface OpenRouterMessage {
118
+ content?: string | OpenRouterContentPart[];
119
+ images?: Array<string | { image_url?: OpenRouterImageUrl }>;
120
+ }
121
+
122
+ interface OpenRouterChoice {
123
+ message?: OpenRouterMessage;
124
+ }
125
+
126
+ interface OpenRouterResponse {
127
+ choices?: OpenRouterChoice[];
128
+ }
129
+
103
130
  interface GeminiImageToolDetails {
131
+ provider: ImageProvider;
104
132
  model: string;
105
133
  imageCount: number;
134
+ imagePaths: string[];
135
+ images: InlineImageData[];
106
136
  responseText?: string;
107
137
  promptFeedback?: GeminiPromptFeedback;
108
138
  usage?: GeminiUsageMetadata;
@@ -125,12 +155,109 @@ function normalizeDataUrl(data: string): { data: string; mimeType?: string } {
125
155
  return { data: match[2] ?? "", mimeType: match[1] };
126
156
  }
127
157
 
128
- async function findGeminiApiKey(): Promise<string | null> {
158
+ function resolveOpenRouterModel(model: string): string {
159
+ return model.includes("/") ? model : `google/${model}`;
160
+ }
161
+
162
+ function toDataUrl(image: InlineImageData): string {
163
+ return `data:${image.mimeType};base64,${image.data}`;
164
+ }
165
+
166
+ async function loadImageFromUrl(imageUrl: string, signal?: AbortSignal): Promise<InlineImageData> {
167
+ if (imageUrl.startsWith("data:")) {
168
+ const normalized = normalizeDataUrl(imageUrl.trim());
169
+ if (!normalized.mimeType) {
170
+ throw new Error("mime_type is required when providing raw base64 data.");
171
+ }
172
+ if (!normalized.data) {
173
+ throw new Error("Image data is empty.");
174
+ }
175
+ return { data: normalized.data, mimeType: normalized.mimeType };
176
+ }
177
+
178
+ const response = await fetch(imageUrl, { signal });
179
+ if (!response.ok) {
180
+ const rawText = await response.text();
181
+ throw new Error(`Image download failed (${response.status}): ${rawText}`);
182
+ }
183
+ const contentType = response.headers.get("content-type")?.split(";")[0];
184
+ if (!contentType || !contentType.startsWith("image/")) {
185
+ throw new Error(`Unsupported image type from URL: ${imageUrl}`);
186
+ }
187
+ const buffer = Buffer.from(await response.arrayBuffer());
188
+ return { data: buffer.toString("base64"), mimeType: contentType };
189
+ }
190
+
191
+ function collectOpenRouterResponseText(message: OpenRouterMessage | undefined): string | undefined {
192
+ if (!message) return undefined;
193
+ if (typeof message.content === "string") {
194
+ const trimmed = message.content.trim();
195
+ return trimmed.length > 0 ? trimmed : undefined;
196
+ }
197
+ if (Array.isArray(message.content)) {
198
+ const texts = message.content
199
+ .filter((part) => part.type === "text")
200
+ .map((part) => part.text)
201
+ .filter((text): text is string => Boolean(text));
202
+ const combined = texts.join("\n").trim();
203
+ return combined.length > 0 ? combined : undefined;
204
+ }
205
+ return undefined;
206
+ }
207
+
208
+ function extractOpenRouterImageUrls(message: OpenRouterMessage | undefined): string[] {
209
+ const urls: string[] = [];
210
+ if (!message) return urls;
211
+ for (const image of message.images ?? []) {
212
+ if (typeof image === "string") {
213
+ urls.push(image);
214
+ continue;
215
+ }
216
+ if (image.image_url?.url) {
217
+ urls.push(image.image_url.url);
218
+ }
219
+ }
220
+ if (Array.isArray(message.content)) {
221
+ for (const part of message.content) {
222
+ if (part.type === "image_url" && part.image_url?.url) {
223
+ urls.push(part.image_url.url);
224
+ }
225
+ }
226
+ }
227
+ return urls;
228
+ }
229
+
230
+ /** Preferred provider set via settings (default: auto) */
231
+ let preferredImageProvider: ImageProvider | "auto" = "auto";
232
+
233
+ /** Set the preferred image provider from settings */
234
+ export function setPreferredImageProvider(provider: ImageProvider | "auto"): void {
235
+ preferredImageProvider = provider;
236
+ }
237
+
238
+ async function findImageApiKey(): Promise<ImageApiKey | null> {
239
+ // If a specific provider is preferred, try it first
240
+ if (preferredImageProvider === "gemini") {
241
+ const geminiKey = await getEnv("GEMINI_API_KEY");
242
+ if (geminiKey) return { provider: "gemini", apiKey: geminiKey };
243
+ const googleKey = await getEnv("GOOGLE_API_KEY");
244
+ if (googleKey) return { provider: "gemini", apiKey: googleKey };
245
+ // Fall through to auto-detect if preferred provider key not found
246
+ } else if (preferredImageProvider === "openrouter") {
247
+ const openRouterKey = await getEnv("OPENROUTER_API_KEY");
248
+ if (openRouterKey) return { provider: "openrouter", apiKey: openRouterKey };
249
+ // Fall through to auto-detect if preferred provider key not found
250
+ }
251
+
252
+ // Auto-detect: OpenRouter takes priority
253
+ const openRouterKey = await getEnv("OPENROUTER_API_KEY");
254
+ if (openRouterKey) return { provider: "openrouter", apiKey: openRouterKey };
255
+
129
256
  const geminiKey = await getEnv("GEMINI_API_KEY");
130
- if (geminiKey) return geminiKey;
257
+ if (geminiKey) return { provider: "gemini", apiKey: geminiKey };
131
258
 
132
259
  const googleKey = await getEnv("GOOGLE_API_KEY");
133
- if (googleKey) return googleKey;
260
+ if (googleKey) return { provider: "gemini", apiKey: googleKey };
134
261
 
135
262
  return null;
136
263
  }
@@ -174,8 +301,29 @@ async function resolveInputImage(input: ImageInput, cwd: string): Promise<Inline
174
301
  throw new Error("input_images entries must include either path or data.");
175
302
  }
176
303
 
177
- function buildResponseSummary(model: string, imageCount: number, responseText: string | undefined): string {
178
- const lines = [`Model: ${model}`, `Images: ${imageCount}`];
304
+ function getExtensionForMime(mimeType: string): string {
305
+ const map: Record<string, string> = {
306
+ "image/png": "png",
307
+ "image/jpeg": "jpg",
308
+ "image/gif": "gif",
309
+ "image/webp": "webp",
310
+ };
311
+ return map[mimeType] ?? "png";
312
+ }
313
+
314
+ function saveImageToTemp(image: InlineImageData): string {
315
+ const ext = getExtensionForMime(image.mimeType);
316
+ const filename = `omp-image-${crypto.randomUUID()}.${ext}`;
317
+ const filepath = join(tmpdir(), filename);
318
+ fs.writeFileSync(filepath, Buffer.from(image.data, "base64"));
319
+ return filepath;
320
+ }
321
+
322
+ function buildResponseSummary(model: string, imagePaths: string[], responseText: string | undefined): string {
323
+ const lines = [`Model: ${model}`, `Generated ${imagePaths.length} image(s):`];
324
+ for (const p of imagePaths) {
325
+ lines.push(` ${p}`);
326
+ }
179
327
  if (responseText) {
180
328
  lines.push("", responseText.trim());
181
329
  }
@@ -232,60 +380,137 @@ function createAbortController(
232
380
  }
233
381
 
234
382
  export const geminiImageTool: CustomTool<typeof geminiImageSchema, GeminiImageToolDetails> = {
235
- name: "gemini_image",
236
- label: "Gemini Image",
383
+ name: "generate_image",
384
+ label: "GenerateImage",
237
385
  description: geminiImageDescription,
238
386
  parameters: geminiImageSchema,
239
387
  async execute(_toolCallId, params, _onUpdate, ctx, signal) {
240
388
  return untilAborted(signal, async () => {
241
- const apiKey = await findGeminiApiKey();
389
+ const apiKey = await findImageApiKey();
242
390
  if (!apiKey) {
243
- throw new Error("GEMINI_API_KEY not found.");
391
+ throw new Error("OPENROUTER_API_KEY, GEMINI_API_KEY, or GOOGLE_API_KEY not found.");
244
392
  }
245
393
 
246
- const model = params.model ?? DEFAULT_MODEL;
247
- const responseModalities = params.response_modalities ?? ["Image"];
394
+ const provider = apiKey.provider;
395
+ const model = params.model ?? (provider === "openrouter" ? DEFAULT_OPENROUTER_MODEL : DEFAULT_MODEL);
396
+ const resolvedModel = provider === "openrouter" ? resolveOpenRouterModel(model) : model;
248
397
  const cwd = ctx.sessionManager.getCwd();
249
398
 
250
- const parts = [] as Array<{ text?: string; inlineData?: InlineImageData }>;
399
+ const resolvedImages: InlineImageData[] = [];
251
400
  if (params.input_images?.length) {
252
401
  for (const input of params.input_images) {
253
- const image = await resolveInputImage(input, cwd);
254
- parts.push({ inlineData: image });
402
+ resolvedImages.push(await resolveInputImage(input, cwd));
255
403
  }
256
404
  }
257
- parts.push({ text: params.prompt });
258
-
259
- const generationConfig: {
260
- responseModalities: GeminiResponseModality[];
261
- imageConfig?: { aspectRatio?: string; imageSize?: string };
262
- } = {
263
- responseModalities,
264
- };
265
-
266
- if (params.aspect_ratio || params.image_size) {
267
- generationConfig.imageConfig = {
268
- aspectRatio: params.aspect_ratio,
269
- imageSize: params.image_size,
270
- };
271
- }
272
-
273
- const requestBody = {
274
- contents: [{ role: "user" as const, parts }],
275
- generationConfig,
276
- };
277
405
 
278
406
  const timeoutSeconds = params.timeout_seconds ?? DEFAULT_TIMEOUT_SECONDS;
279
407
  const { controller, cleanup } = createAbortController(signal, timeoutSeconds);
280
408
 
281
409
  try {
410
+ if (provider === "openrouter") {
411
+ const contentParts: OpenRouterContentPart[] = [{ type: "text", text: params.prompt }];
412
+ for (const image of resolvedImages) {
413
+ contentParts.push({ type: "image_url", image_url: { url: toDataUrl(image) } });
414
+ }
415
+
416
+ const requestBody = {
417
+ model: resolvedModel,
418
+ messages: [{ role: "user" as const, content: contentParts }],
419
+ };
420
+
421
+ const response = await fetch("https://openrouter.ai/api/v1/chat/completions", {
422
+ method: "POST",
423
+ headers: {
424
+ "Content-Type": "application/json",
425
+ Authorization: `Bearer ${apiKey.apiKey}`,
426
+ },
427
+ body: JSON.stringify(requestBody),
428
+ signal: controller.signal,
429
+ });
430
+
431
+ const rawText = await response.text();
432
+ if (!response.ok) {
433
+ let message = rawText;
434
+ try {
435
+ const parsed = JSON.parse(rawText) as { error?: { message?: string } };
436
+ message = parsed.error?.message ?? message;
437
+ } catch {
438
+ // Keep raw text.
439
+ }
440
+ throw new Error(`OpenRouter image request failed (${response.status}): ${message}`);
441
+ }
442
+
443
+ const data = JSON.parse(rawText) as OpenRouterResponse;
444
+ const message = data.choices?.[0]?.message;
445
+ const responseText = collectOpenRouterResponseText(message);
446
+ const imageUrls = extractOpenRouterImageUrls(message);
447
+ const inlineImages: InlineImageData[] = [];
448
+ for (const imageUrl of imageUrls) {
449
+ inlineImages.push(await loadImageFromUrl(imageUrl, controller.signal));
450
+ }
451
+
452
+ if (inlineImages.length === 0) {
453
+ const messageText = responseText ? `\n\n${responseText}` : "";
454
+ return {
455
+ content: [{ type: "text", text: `No image data returned.${messageText}` }],
456
+ details: {
457
+ provider,
458
+ model: resolvedModel,
459
+ imageCount: 0,
460
+ imagePaths: [],
461
+ images: [],
462
+ responseText,
463
+ },
464
+ };
465
+ }
466
+
467
+ const imagePaths = inlineImages.map(saveImageToTemp);
468
+
469
+ return {
470
+ content: [{ type: "text", text: buildResponseSummary(resolvedModel, imagePaths, responseText) }],
471
+ details: {
472
+ provider,
473
+ model: resolvedModel,
474
+ imageCount: inlineImages.length,
475
+ imagePaths,
476
+ images: inlineImages,
477
+ responseText,
478
+ },
479
+ };
480
+ }
481
+
482
+ const parts = [] as Array<{ text?: string; inlineData?: InlineImageData }>;
483
+ for (const image of resolvedImages) {
484
+ parts.push({ inlineData: image });
485
+ }
486
+ parts.push({ text: params.prompt });
487
+
488
+ const generationConfig: {
489
+ responseModalities: GeminiResponseModality[];
490
+ imageConfig?: { aspectRatio?: string; imageSize?: string };
491
+ } = {
492
+ responseModalities: ["Image"],
493
+ };
494
+
495
+ if (params.aspect_ratio || params.image_size) {
496
+ generationConfig.imageConfig = {
497
+ aspectRatio: params.aspect_ratio,
498
+ imageSize: params.image_size,
499
+ };
500
+ }
501
+
502
+ const requestBody = {
503
+ contents: [{ role: "user" as const, parts }],
504
+ generationConfig,
505
+ };
506
+
282
507
  const response = await fetch(
283
508
  `https://generativelanguage.googleapis.com/v1beta/models/${encodeURIComponent(model)}:generateContent`,
284
509
  {
285
510
  method: "POST",
286
511
  headers: {
287
512
  "Content-Type": "application/json",
288
- "x-goog-api-key": apiKey,
513
+ "x-goog-api-key": apiKey.apiKey,
289
514
  },
290
515
  body: JSON.stringify(requestBody),
291
516
  signal: controller.signal,
@@ -308,18 +533,19 @@ export const geminiImageTool: CustomTool<typeof geminiImageSchema, GeminiImageTo
308
533
  const responseParts = combineParts(data);
309
534
  const responseText = collectResponseText(responseParts);
310
535
  const inlineImages = collectInlineImages(responseParts);
311
- const content: Array<TextContent | ImageContent> = [];
312
536
 
313
537
  if (inlineImages.length === 0) {
314
538
  const blocked = data.promptFeedback?.blockReason
315
539
  ? `Blocked: ${data.promptFeedback.blockReason}`
316
540
  : "No image data returned.";
317
- content.push({ type: "text", text: `${blocked}${responseText ? `\n\n${responseText}` : ""}` });
318
541
  return {
319
- content,
542
+ content: [{ type: "text", text: `${blocked}${responseText ? `\n\n${responseText}` : ""}` }],
320
543
  details: {
544
+ provider,
321
545
  model,
322
546
  imageCount: 0,
547
+ imagePaths: [],
548
+ images: [],
323
549
  responseText,
324
550
  promptFeedback: data.promptFeedback,
325
551
  usage: data.usageMetadata,
@@ -327,19 +553,16 @@ export const geminiImageTool: CustomTool<typeof geminiImageSchema, GeminiImageTo
327
553
  };
328
554
  }
329
555
 
330
- content.push({
331
- type: "text",
332
- text: buildResponseSummary(model, inlineImages.length, responseText),
333
- });
334
- for (const image of inlineImages) {
335
- content.push({ type: "image", data: image.data, mimeType: image.mimeType });
336
- }
556
+ const imagePaths = inlineImages.map(saveImageToTemp);
337
557
 
338
558
  return {
339
- content,
559
+ content: [{ type: "text", text: buildResponseSummary(model, imagePaths, responseText) }],
340
560
  details: {
561
+ provider,
341
562
  model,
342
563
  imageCount: inlineImages.length,
564
+ imagePaths,
565
+ images: inlineImages,
343
566
  responseText,
344
567
  promptFeedback: data.promptFeedback,
345
568
  usage: data.usageMetadata,
@@ -355,7 +578,7 @@ export const geminiImageTool: CustomTool<typeof geminiImageSchema, GeminiImageTo
355
578
  export async function getGeminiImageTools(): Promise<
356
579
  Array<CustomTool<typeof geminiImageSchema, GeminiImageToolDetails>>
357
580
  > {
358
- const apiKey = await findGeminiApiKey();
581
+ const apiKey = await findImageApiKey();
359
582
  if (!apiKey) return [];
360
583
  return [geminiImageTool];
361
584
  }
@@ -2,6 +2,7 @@ import type { AgentTool } from "@oh-my-pi/pi-agent-core";
2
2
  import { type GitParams, gitTool as gitToolCore, type ToolResponse } from "@oh-my-pi/pi-git-tool";
3
3
  import { type Static, Type } from "@sinclair/typebox";
4
4
  import gitDescription from "../../prompts/tools/git.md" with { type: "text" };
5
+ import type { ToolSession } from "./index";
5
6
 
6
7
  const gitSchema = Type.Object({
7
8
  operation: Type.Union([
@@ -192,14 +193,21 @@ const gitSchema = Type.Object({
192
193
 
193
194
  export type GitToolDetails = ToolResponse<unknown>;
194
195
 
195
- export function createGitTool(cwd: string): AgentTool<typeof gitSchema, GitToolDetails> {
196
+ export function createGitTool(session: ToolSession): AgentTool<typeof gitSchema, GitToolDetails> | null {
197
+ if (session.settings?.getGitToolEnabled() === false) {
198
+ return null;
199
+ }
196
200
  return {
197
201
  name: "git",
198
202
  label: "Git",
199
203
  description: gitDescription,
200
204
  parameters: gitSchema,
201
205
  execute: async (_toolCallId, params: Static<typeof gitSchema>, _signal?: AbortSignal) => {
202
- const result = await gitToolCore(params as GitParams, cwd);
206
+ if (params.operation === "commit" && !params.message) {
207
+ throw new Error("Git commit requires a message to avoid an interactive editor. Provide `message`.");
208
+ }
209
+
210
+ const result = await gitToolCore(params as GitParams, session.cwd);
203
211
  if ("error" in result) {
204
212
  const message = result._rendered ?? result.error;
205
213
  return { content: [{ type: "text", text: message }], details: result };
@@ -213,4 +221,10 @@ export function createGitTool(cwd: string): AgentTool<typeof gitSchema, GitToolD
213
221
  };
214
222
  }
215
223
 
216
- export const gitTool = createGitTool(process.cwd());
224
+ export const gitTool = createGitTool({
225
+ cwd: process.cwd(),
226
+ hasUI: false,
227
+ rulebookRules: [],
228
+ getSessionFile: () => null,
229
+ getSessionSpawns: () => null,
230
+ })!;