@oh-my-pi/pi-coding-agent 13.9.11 → 13.9.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/CHANGELOG.md +17 -0
  2. package/package.json +7 -7
  3. package/src/cli/args.ts +18 -16
  4. package/src/config/keybindings.ts +6 -0
  5. package/src/config/model-registry.ts +4 -4
  6. package/src/config/settings-schema.ts +10 -9
  7. package/src/debug/log-viewer.ts +11 -7
  8. package/src/exec/bash-executor.ts +15 -1
  9. package/src/internal-urls/docs-index.generated.ts +1 -1
  10. package/src/modes/components/agent-dashboard.ts +11 -8
  11. package/src/modes/components/extensions/extension-list.ts +16 -8
  12. package/src/modes/components/settings-defs.ts +2 -2
  13. package/src/modes/components/status-line.ts +5 -9
  14. package/src/modes/components/tree-selector.ts +4 -6
  15. package/src/modes/components/welcome.ts +1 -0
  16. package/src/modes/controllers/command-controller.ts +47 -42
  17. package/src/modes/controllers/event-controller.ts +12 -9
  18. package/src/modes/controllers/input-controller.ts +54 -1
  19. package/src/modes/interactive-mode.ts +4 -10
  20. package/src/modes/prompt-action-autocomplete.ts +201 -0
  21. package/src/modes/types.ts +1 -0
  22. package/src/modes/utils/ui-helpers.ts +12 -0
  23. package/src/patch/index.ts +1 -1
  24. package/src/prompts/system/system-prompt.md +97 -107
  25. package/src/prompts/tools/ast-edit.md +5 -2
  26. package/src/prompts/tools/ast-grep.md +5 -2
  27. package/src/prompts/tools/inspect-image-system.md +20 -0
  28. package/src/prompts/tools/inspect-image.md +32 -0
  29. package/src/session/agent-session.ts +33 -36
  30. package/src/session/compaction/compaction.ts +26 -29
  31. package/src/session/session-manager.ts +15 -7
  32. package/src/tools/bash-interactive.ts +8 -3
  33. package/src/tools/fetch.ts +5 -27
  34. package/src/tools/index.ts +4 -0
  35. package/src/tools/inspect-image-renderer.ts +103 -0
  36. package/src/tools/inspect-image.ts +168 -0
  37. package/src/tools/read.ts +62 -49
  38. package/src/tools/renderers.ts +2 -0
  39. package/src/utils/image-input.ts +264 -0
  40. package/src/web/kagi.ts +0 -42
  41. package/src/web/scrapers/youtube.ts +0 -17
  42. package/src/web/search/index.ts +3 -1
  43. package/src/web/search/provider.ts +4 -1
  44. package/src/web/search/providers/exa.ts +8 -0
  45. package/src/web/search/providers/tavily.ts +162 -0
  46. package/src/web/search/types.ts +1 -0
@@ -13,7 +13,11 @@ import {
13
13
  OPENAI_HEADERS,
14
14
  } from "@oh-my-pi/pi-ai/providers/openai-codex/constants";
15
15
  import { transformMessages } from "@oh-my-pi/pi-ai/providers/transform-messages";
16
- import { normalizeResponsesToolCallId } from "@oh-my-pi/pi-ai/utils";
16
+ import {
17
+ getOpenAIResponsesHistoryItems,
18
+ getOpenAIResponsesHistoryPayload,
19
+ normalizeResponsesToolCallId,
20
+ } from "@oh-my-pi/pi-ai/utils";
17
21
  import { logger } from "@oh-my-pi/pi-utils";
18
22
  import { renderPromptTemplate } from "../../config/prompt-templates";
19
23
  import compactionShortSummaryPrompt from "../../prompts/compaction/compaction-short-summary.md" with { type: "text" };
@@ -473,6 +477,7 @@ type OpenAiRemoteCompactionItem = {
473
477
  };
474
478
 
475
479
  interface OpenAiRemoteCompactionPreserveData {
480
+ provider?: string;
476
481
  replacementHistory: Array<Record<string, unknown>>;
477
482
  compactionItem: OpenAiRemoteCompactionItem;
478
483
  }
@@ -523,7 +528,7 @@ function getPreservedOpenAiRemoteCompactionData(
523
528
  ): OpenAiRemoteCompactionPreserveData | undefined {
524
529
  const candidate = preserveData?.[OPENAI_REMOTE_COMPACTION_PRESERVE_KEY];
525
530
  if (!candidate || typeof candidate !== "object") return undefined;
526
- const maybeData = candidate as { replacementHistory?: unknown; compactionItem?: unknown };
531
+ const maybeData = candidate as { provider?: unknown; replacementHistory?: unknown; compactionItem?: unknown };
527
532
  if (!Array.isArray(maybeData.replacementHistory)) return undefined;
528
533
  const maybeItem = maybeData.compactionItem;
529
534
  if (!maybeItem || typeof maybeItem !== "object") return undefined;
@@ -535,6 +540,7 @@ function getPreservedOpenAiRemoteCompactionData(
535
540
  return undefined;
536
541
  }
537
542
  return {
543
+ provider: typeof maybeData.provider === "string" ? maybeData.provider : undefined,
538
544
  replacementHistory: maybeData.replacementHistory as Array<Record<string, unknown>>,
539
545
  compactionItem: compactionItem as unknown as OpenAiRemoteCompactionItem,
540
546
  };
@@ -634,15 +640,6 @@ function trimOpenAiCompactInput(
634
640
  return trimmed;
635
641
  }
636
642
 
637
- function getOpenAIResponsesHistoryItems(
638
- providerPayload: { type?: string; items?: unknown } | undefined,
639
- ): Array<Record<string, unknown>> | undefined {
640
- if (providerPayload?.type !== "openaiResponsesHistory" || !Array.isArray(providerPayload.items)) {
641
- return undefined;
642
- }
643
- return providerPayload.items as Array<Record<string, unknown>>;
644
- }
645
-
646
643
  function collectKnownOpenAiCallIds(items: Array<Record<string, unknown>>): Set<string> {
647
644
  const knownCallIds = new Set<string>();
648
645
  for (const item of items) {
@@ -667,8 +664,8 @@ function buildOpenAiNativeHistory(
667
664
  let knownCallIds = collectKnownOpenAiCallIds(input);
668
665
  for (const message of transformedMessages) {
669
666
  if (message.role === "user" || message.role === "developer") {
670
- const providerPayload = (message as { providerPayload?: { type?: string; items?: unknown } }).providerPayload;
671
- const historyItems = getOpenAIResponsesHistoryItems(providerPayload);
667
+ const providerPayload = (message as { providerPayload?: AssistantMessage["providerPayload"] }).providerPayload;
668
+ const historyItems = getOpenAIResponsesHistoryItems(providerPayload, model.provider);
672
669
  if (historyItems) {
673
670
  input.push(...historyItems);
674
671
  knownCallIds = collectKnownOpenAiCallIds(input);
@@ -705,22 +702,22 @@ function buildOpenAiNativeHistory(
705
702
  }
706
703
 
707
704
  if (message.role === "assistant") {
708
- const providerPayload = (
709
- message as { providerPayload?: { type?: string; incremental?: boolean; items?: unknown } }
710
- ).providerPayload;
711
- const historyItems = getOpenAIResponsesHistoryItems(providerPayload);
712
- if (historyItems) {
713
- if (providerPayload?.incremental) {
714
- input.push(...historyItems);
705
+ const assistant = message as AssistantMessage;
706
+ const providerPayload = getOpenAIResponsesHistoryPayload(
707
+ assistant.providerPayload,
708
+ model.provider,
709
+ assistant.provider,
710
+ );
711
+ if (providerPayload) {
712
+ if (providerPayload.dt) {
713
+ input.push(...providerPayload.items);
715
714
  } else {
716
- input.splice(0, input.length, ...historyItems);
715
+ input.splice(0, input.length, ...providerPayload.items);
717
716
  }
718
717
  knownCallIds = collectKnownOpenAiCallIds(input);
719
718
  msgIndex++;
720
719
  continue;
721
720
  }
722
-
723
- const assistant = message as AssistantMessage;
724
721
  const isDifferentModel =
725
722
  assistant.model !== model.id && assistant.provider === model.provider && assistant.api === model.api;
726
723
 
@@ -889,7 +886,7 @@ async function requestOpenAiRemoteCompaction(
889
886
  });
890
887
  throw new Error("Remote compaction response missing compaction item");
891
888
  }
892
- return { replacementHistory, compactionItem };
889
+ return { provider: model.provider, replacementHistory, compactionItem };
893
890
  }
894
891
 
895
892
  interface RemoteCompactionRequest {
@@ -1224,11 +1221,11 @@ export async function compact(
1224
1221
  if (settings.remoteEnabled !== false && shouldUseOpenAiRemoteCompaction(model)) {
1225
1222
  const previousRemoteCompaction = getPreservedOpenAiRemoteCompactionData(previousPreserveData);
1226
1223
  const remoteMessages = [...messagesToSummarize, ...turnPrefixMessages, ...recentMessages];
1227
- const remoteHistory = buildOpenAiNativeHistory(
1228
- remoteMessages,
1229
- model,
1230
- previousRemoteCompaction?.replacementHistory,
1231
- );
1224
+ const previousReplacementHistory =
1225
+ previousRemoteCompaction?.provider === model.provider
1226
+ ? previousRemoteCompaction.replacementHistory
1227
+ : undefined;
1228
+ const remoteHistory = buildOpenAiNativeHistory(remoteMessages, model, previousReplacementHistory);
1232
1229
  if (remoteHistory.length > 0) {
1233
1230
  try {
1234
1231
  const remote = await requestOpenAiRemoteCompaction(
@@ -532,16 +532,19 @@ export function buildSessionContext(
532
532
  };
533
533
 
534
534
  if (compaction) {
535
- const remoteReplacementHistory = (() => {
535
+ const providerPayload: ProviderPayload | undefined = (() => {
536
536
  const candidate = compaction.preserveData?.openaiRemoteCompaction;
537
537
  if (!candidate || typeof candidate !== "object") return undefined;
538
- const replacementHistory = (candidate as { replacementHistory?: unknown }).replacementHistory;
539
- if (!Array.isArray(replacementHistory)) return undefined;
540
- return replacementHistory as Array<Record<string, unknown>>;
538
+ const remote = candidate as { provider?: unknown; replacementHistory?: unknown };
539
+ if (typeof remote.provider !== "string" || remote.provider.length === 0) return undefined;
540
+ if (!Array.isArray(remote.replacementHistory)) return undefined;
541
+ return {
542
+ type: "openaiResponsesHistory",
543
+ provider: remote.provider,
544
+ items: remote.replacementHistory as Array<Record<string, unknown>>,
545
+ };
541
546
  })();
542
- const providerPayload: ProviderPayload | undefined = remoteReplacementHistory
543
- ? { type: "openaiResponsesHistory", items: remoteReplacementHistory }
544
- : undefined;
547
+ const remoteReplacementHistory = providerPayload?.items;
545
548
 
546
549
  // Emit summary first
547
550
  messages.push(
@@ -897,6 +900,11 @@ async function truncateForPersistence<T>(obj: T, blobStore: BlobStore, key?: str
897
900
 
898
901
  if (typeof obj === "string") {
899
902
  if (obj.length > MAX_PERSIST_CHARS) {
903
+ // Cryptographic signatures must be preserved exactly or cleared entirely — never truncated.
904
+ // Truncation would produce an invalid signature that the API rejects.
905
+ if (key === "thinkingSignature" || key === "thoughtSignature" || key === "textSignature") {
906
+ return "" as T;
907
+ }
900
908
  const limit = Math.max(0, MAX_PERSIST_CHARS - TRUNCATION_NOTICE.length);
901
909
  return `${truncateString(obj, limit)}${TRUNCATION_NOTICE}` as T;
902
910
  }
@@ -2,6 +2,7 @@ import type { AgentToolContext } from "@oh-my-pi/pi-agent-core";
2
2
  import { type PtyRunResult, PtySession, sanitizeText } from "@oh-my-pi/pi-natives";
3
3
  import {
4
4
  type Component,
5
+ extractPrintableText,
5
6
  matchesKey,
6
7
  padding,
7
8
  parseKey,
@@ -32,12 +33,16 @@ const XtermTerminal = xterm.Terminal;
32
33
 
33
34
  function normalizeInputForPty(data: string, applicationCursorKeysMode: boolean): string {
34
35
  const kitty = parseKittySequence(data);
36
+ if (kitty?.eventType === 3) {
37
+ return "";
38
+ }
39
+ const printableText = extractPrintableText(data);
40
+ if (printableText) {
41
+ return printableText;
42
+ }
35
43
  if (!kitty) {
36
44
  return data;
37
45
  }
38
- if (kitty.eventType === 3) {
39
- return "";
40
- }
41
46
  const keyId = parseKey(data);
42
47
  if (!keyId) {
43
48
  return data;
@@ -15,7 +15,6 @@ import { renderStatusLine } from "../tui";
15
15
  import { CachedOutputBlock } from "../tui/output-block";
16
16
  import { formatDimensionNote, resizeImage } from "../utils/image-resize";
17
17
  import { ensureTool } from "../utils/tools-manager";
18
- import { summarizeUrlWithKagi } from "../web/kagi";
19
18
  import { specialHandlers } from "../web/scrapers";
20
19
  import type { RenderResult } from "../web/scrapers/types";
21
20
  import { finalizeOutput, loadPage, MAX_OUTPUT_CHARS } from "../web/scrapers/types";
@@ -466,13 +465,12 @@ function parseFeedToMarkdown(content: string, maxItems = 10): string {
466
465
  }
467
466
 
468
467
  /**
469
- * Render HTML to markdown using kagi, jina, trafilatura, lynx (in order of preference)
468
+ * Render HTML to markdown using jina, trafilatura, lynx (in order of preference)
470
469
  */
471
470
  async function renderHtmlToText(
472
471
  url: string,
473
472
  html: string,
474
473
  timeout: number,
475
- useKagiSummarizer: boolean,
476
474
  userSignal?: AbortSignal,
477
475
  ): Promise<{ content: string; ok: boolean; method: string }> {
478
476
  const signal = ptree.combineSignals(userSignal, timeout * 1000);
@@ -484,20 +482,7 @@ async function renderHtmlToText(
484
482
  signal,
485
483
  };
486
484
 
487
- // Try Kagi Universal Summarizer first (if enabled and KAGI_API_KEY is configured)
488
- if (useKagiSummarizer) {
489
- try {
490
- const kagiSummary = await summarizeUrlWithKagi(url, { signal });
491
- if (kagiSummary && kagiSummary.length > 100 && !isLowQualityOutput(kagiSummary)) {
492
- return { content: kagiSummary, ok: true, method: "kagi" };
493
- }
494
- } catch {
495
- // Kagi failed, continue to next method
496
- signal?.throwIfAborted();
497
- }
498
- }
499
-
500
- // Try jina next (reader API)
485
+ // Try jina first (reader API)
501
486
  try {
502
487
  const jinaUrl = `https://r.jina.ai/${url}`;
503
488
  const response = await fetch(jinaUrl, {
@@ -623,13 +608,7 @@ async function handleSpecialUrls(
623
608
  /**
624
609
  * Main render function implementing the full pipeline
625
610
  */
626
- async function renderUrl(
627
- url: string,
628
- timeout: number,
629
- raw: boolean,
630
- useKagiSummarizer: boolean,
631
- signal?: AbortSignal,
632
- ): Promise<FetchRenderResult> {
611
+ async function renderUrl(url: string, timeout: number, raw: boolean, signal?: AbortSignal): Promise<FetchRenderResult> {
633
612
  const notes: string[] = [];
634
613
  const fetchedAt = new Date().toISOString();
635
614
  if (signal?.aborted) {
@@ -973,7 +952,7 @@ async function renderUrl(
973
952
  }
974
953
 
975
954
  // 5E: Render HTML with lynx or html2text
976
- const htmlResult = await renderHtmlToText(finalUrl, rawContent, timeout, useKagiSummarizer, signal);
955
+ const htmlResult = await renderHtmlToText(finalUrl, rawContent, timeout, signal);
977
956
  if (!htmlResult.ok) {
978
957
  notes.push("html rendering failed (lynx/html2text unavailable)");
979
958
  const output = finalizeOutput(rawContent);
@@ -1113,8 +1092,7 @@ export class FetchTool implements AgentTool<typeof fetchSchema, FetchToolDetails
1113
1092
  throw new ToolAbortError();
1114
1093
  }
1115
1094
 
1116
- const useKagiSummarizer = this.session.settings.get("fetch.useKagiSummarizer");
1117
- const result = await renderUrl(url, effectiveTimeout, raw, useKagiSummarizer, signal);
1095
+ const result = await renderUrl(url, effectiveTimeout, raw, signal);
1118
1096
  const truncation = truncateHead(result.content, {
1119
1097
  maxBytes: DEFAULT_MAX_BYTES,
1120
1098
  maxLines: FETCH_DEFAULT_MAX_LINES,
@@ -27,6 +27,7 @@ import { ExitPlanModeTool } from "./exit-plan-mode";
27
27
  import { FetchTool } from "./fetch";
28
28
  import { FindTool } from "./find";
29
29
  import { GrepTool } from "./grep";
30
+ import { InspectImageTool } from "./inspect-image";
30
31
  import { NotebookTool } from "./notebook";
31
32
  import { wrapToolWithMetaNotice } from "./output-meta";
32
33
  import { PythonTool } from "./python";
@@ -62,6 +63,7 @@ export * from "./fetch";
62
63
  export * from "./find";
63
64
  export * from "./gemini-image";
64
65
  export * from "./grep";
66
+ export * from "./inspect-image";
65
67
  export * from "./notebook";
66
68
  export * from "./pending-action";
67
69
  export * from "./python";
@@ -170,6 +172,7 @@ export const BUILTIN_TOOLS: Record<string, ToolFactory> = {
170
172
  lsp: LspTool.createIf,
171
173
  notebook: s => new NotebookTool(s),
172
174
  read: s => new ReadTool(s),
175
+ inspect_image: s => new InspectImageTool(s),
173
176
  browser: s => new BrowserTool(s),
174
177
  checkpoint: CheckpointTool.createIf,
175
178
  rewind: RewindTool.createIf,
@@ -313,6 +316,7 @@ export async function createTools(session: ToolSession, toolNames?: string[]): P
313
316
  if (name === "ast_edit") return session.settings.get("astEdit.enabled");
314
317
  if (name === "render_mermaid") return session.settings.get("renderMermaid.enabled");
315
318
  if (name === "notebook") return session.settings.get("notebook.enabled");
319
+ if (name === "inspect_image") return session.settings.get("inspect_image.enabled");
316
320
  if (name === "fetch") return session.settings.get("fetch.enabled");
317
321
  if (name === "web_search") return session.settings.get("web_search.enabled");
318
322
  if (name === "lsp") return session.settings.get("lsp.enabled");
@@ -0,0 +1,103 @@
1
+ import type { Component } from "@oh-my-pi/pi-tui";
2
+ import { Text } from "@oh-my-pi/pi-tui";
3
+ import type { RenderResultOptions } from "../extensibility/custom-tools/types";
4
+ import type { Theme } from "../modes/theme/theme";
5
+ import { renderStatusLine } from "../tui";
6
+ import { formatExpandHint, replaceTabs, shortenPath, truncateToWidth } from "./render-utils";
7
+
8
+ interface InspectImageRenderArgs {
9
+ path?: string;
10
+ question?: string;
11
+ }
12
+
13
+ interface InspectImageRendererDetails {
14
+ model: string;
15
+ imagePath: string;
16
+ mimeType: string;
17
+ }
18
+
19
+ interface InspectImageRendererResult {
20
+ content: Array<{ type: string; text?: string }>;
21
+ details?: InspectImageRendererDetails;
22
+ isError?: boolean;
23
+ }
24
+
25
+ const INSPECT_QUESTION_PREVIEW_WIDTH = 100;
26
+ const INSPECT_OUTPUT_COLLAPSED_LINES = 4;
27
+ const INSPECT_OUTPUT_EXPANDED_LINES = 16;
28
+ const INSPECT_OUTPUT_LINE_WIDTH = 120;
29
+
30
+ export const inspectImageToolRenderer = {
31
+ renderCall(args: InspectImageRenderArgs, _options: RenderResultOptions, uiTheme: Theme): Component {
32
+ const rawPath = args.path ?? "";
33
+ const pathDisplay = rawPath ? shortenPath(rawPath) : "…";
34
+ const header = renderStatusLine({ icon: "pending", title: "Inspect Image", description: pathDisplay }, uiTheme);
35
+ const question = args.question?.trim();
36
+ if (!question) {
37
+ return new Text(header, 0, 0);
38
+ }
39
+ const questionLine = ` ${uiTheme.fg("dim", uiTheme.tree.last)} ${uiTheme.fg("dim", "Question:")} ${uiTheme.fg("accent", truncateToWidth(replaceTabs(question), INSPECT_QUESTION_PREVIEW_WIDTH))}`;
40
+ return new Text(`${header}\n${questionLine}`, 0, 0);
41
+ },
42
+
43
+ renderResult(
44
+ result: InspectImageRendererResult,
45
+ options: RenderResultOptions,
46
+ uiTheme: Theme,
47
+ args?: InspectImageRenderArgs,
48
+ ): Component {
49
+ const details = result.details;
50
+ const rawPath = details?.imagePath ?? args?.path ?? "";
51
+ const pathDisplay = rawPath ? shortenPath(rawPath) : "image";
52
+ const metaParts: string[] = [];
53
+ if (details?.model) metaParts.push(details.model);
54
+ if (details?.mimeType) metaParts.push(details.mimeType);
55
+ const header = renderStatusLine(
56
+ {
57
+ icon: result.isError ? "error" : "success",
58
+ title: "Inspect Image",
59
+ description: pathDisplay,
60
+ },
61
+ uiTheme,
62
+ );
63
+
64
+ const lines: string[] = [header];
65
+ const question = args?.question?.trim();
66
+ if (question) {
67
+ lines.push(
68
+ ` ${uiTheme.fg("dim", uiTheme.tree.branch)} ${uiTheme.fg("dim", "Question:")} ${uiTheme.fg("accent", truncateToWidth(replaceTabs(question), INSPECT_QUESTION_PREVIEW_WIDTH))}`,
69
+ );
70
+ }
71
+
72
+ const outputText = result.content.find(content => content.type === "text")?.text?.trimEnd() ?? "";
73
+ if (!outputText) {
74
+ lines.push(uiTheme.fg("dim", "(no output)"));
75
+ if (metaParts.length > 0) {
76
+ lines.push("");
77
+ lines.push(uiTheme.fg("dim", metaParts.join(" · ")));
78
+ }
79
+ return new Text(lines.join("\n"), 0, 0);
80
+ }
81
+
82
+ lines.push("");
83
+ const outputLines = replaceTabs(outputText).split("\n");
84
+ const maxLines = options.expanded ? INSPECT_OUTPUT_EXPANDED_LINES : INSPECT_OUTPUT_COLLAPSED_LINES;
85
+ for (const line of outputLines.slice(0, maxLines)) {
86
+ lines.push(uiTheme.fg("toolOutput", truncateToWidth(line, INSPECT_OUTPUT_LINE_WIDTH)));
87
+ }
88
+
89
+ if (outputLines.length > maxLines) {
90
+ const remaining = outputLines.length - maxLines;
91
+ const hint = formatExpandHint(uiTheme, options.expanded, true);
92
+ lines.push(`${uiTheme.fg("dim", `… ${remaining} more lines`)}${hint ? ` ${hint}` : ""}`);
93
+ }
94
+
95
+ if (metaParts.length > 0) {
96
+ lines.push("");
97
+ lines.push(uiTheme.fg("dim", metaParts.join(" · ")));
98
+ }
99
+
100
+ return new Text(lines.join("\n"), 0, 0);
101
+ },
102
+ mergeCallAndResult: true,
103
+ };
@@ -0,0 +1,168 @@
1
+ import type { AgentTool, AgentToolContext, AgentToolResult, AgentToolUpdateCallback } from "@oh-my-pi/pi-agent-core";
2
+ import { type Api, type AssistantMessage, completeSimple, type Model } from "@oh-my-pi/pi-ai";
3
+ import { type Static, Type } from "@sinclair/typebox";
4
+ import { expandRoleAlias, resolveModelFromString } from "../config/model-resolver";
5
+ import { renderPromptTemplate } from "../config/prompt-templates";
6
+ import inspectImageDescription from "../prompts/tools/inspect-image.md" with { type: "text" };
7
+ import inspectImageSystemPromptTemplate from "../prompts/tools/inspect-image-system.md" with { type: "text" };
8
+ import {
9
+ ImageInputTooLargeError,
10
+ type LoadedImageInput,
11
+ loadImageInput,
12
+ MAX_IMAGE_INPUT_BYTES,
13
+ } from "../utils/image-input";
14
+ import type { ToolSession } from "./index";
15
+ import { ToolError } from "./tool-errors";
16
+
17
+ const inspectImageSchema = Type.Object(
18
+ {
19
+ path: Type.String({ description: "Filesystem path to an image" }),
20
+ question: Type.String({ description: "Question to ask about the image" }),
21
+ },
22
+ { additionalProperties: false },
23
+ );
24
+
25
+ export type InspectImageParams = Static<typeof inspectImageSchema>;
26
+
27
+ export interface InspectImageToolDetails {
28
+ model: string;
29
+ imagePath: string;
30
+ mimeType: string;
31
+ }
32
+
33
+ function extractResponseText(message: AssistantMessage): string {
34
+ return message.content
35
+ .filter(content => content.type === "text")
36
+ .map(content => content.text)
37
+ .join("")
38
+ .trim();
39
+ }
40
+
41
+ export class InspectImageTool implements AgentTool<typeof inspectImageSchema, InspectImageToolDetails> {
42
+ readonly name = "inspect_image";
43
+ readonly label = "InspectImage";
44
+ readonly description: string;
45
+ readonly parameters = inspectImageSchema;
46
+ readonly strict = true;
47
+
48
+ constructor(
49
+ private readonly session: ToolSession,
50
+ private readonly completeImageRequest: typeof completeSimple = completeSimple,
51
+ ) {
52
+ this.description = renderPromptTemplate(inspectImageDescription);
53
+ }
54
+
55
+ async execute(
56
+ _toolCallId: string,
57
+ params: InspectImageParams,
58
+ signal?: AbortSignal,
59
+ _onUpdate?: AgentToolUpdateCallback<InspectImageToolDetails>,
60
+ _context?: AgentToolContext,
61
+ ): Promise<AgentToolResult<InspectImageToolDetails>> {
62
+ if (this.session.settings.get("images.blockImages")) {
63
+ throw new ToolError(
64
+ "Image submission is disabled by settings (images.blockImages=true). Disable it to use inspect_image.",
65
+ );
66
+ }
67
+
68
+ const modelRegistry = this.session.modelRegistry;
69
+ if (!modelRegistry) {
70
+ throw new ToolError("Model registry is unavailable for inspect_image.");
71
+ }
72
+
73
+ const availableModels = modelRegistry.getAvailable();
74
+ if (availableModels.length === 0) {
75
+ throw new ToolError("No models available for inspect_image.");
76
+ }
77
+
78
+ const matchPreferences = { usageOrder: this.session.settings.getStorage()?.getModelUsageOrder() };
79
+ const resolvePattern = (pattern: string | undefined): Model<Api> | undefined => {
80
+ if (!pattern) return undefined;
81
+ const expanded = expandRoleAlias(pattern, this.session.settings);
82
+ return resolveModelFromString(expanded, availableModels, matchPreferences);
83
+ };
84
+
85
+ const activeModelPattern = this.session.getActiveModelString?.() ?? this.session.getModelString?.();
86
+ const model =
87
+ resolvePattern("pi/vision") ??
88
+ resolvePattern("pi/default") ??
89
+ resolvePattern(activeModelPattern) ??
90
+ availableModels[0];
91
+ if (!model) {
92
+ throw new ToolError("Unable to resolve a model for inspect_image.");
93
+ }
94
+
95
+ if (!model.input.includes("image")) {
96
+ throw new ToolError(
97
+ `Resolved model ${model.provider}/${model.id} does not support image input. Configure a vision-capable model for modelRoles.vision.`,
98
+ );
99
+ }
100
+
101
+ const apiKey = await modelRegistry.getApiKey(model);
102
+ if (!apiKey) {
103
+ throw new ToolError(
104
+ `No API key available for ${model.provider}/${model.id}. Configure credentials for this provider or choose another vision-capable model.`,
105
+ );
106
+ }
107
+
108
+ let imageInput: LoadedImageInput | null;
109
+ try {
110
+ imageInput = await loadImageInput({
111
+ path: params.path,
112
+ cwd: this.session.cwd,
113
+ autoResize: this.session.settings.get("images.autoResize"),
114
+ maxBytes: MAX_IMAGE_INPUT_BYTES,
115
+ });
116
+ } catch (error) {
117
+ if (error instanceof ImageInputTooLargeError) {
118
+ throw new ToolError(error.message);
119
+ }
120
+ throw error;
121
+ }
122
+
123
+ if (!imageInput) {
124
+ throw new ToolError("inspect_image only supports PNG, JPEG, GIF, and WEBP files detected by file content.");
125
+ }
126
+
127
+ const response = await this.completeImageRequest(
128
+ model,
129
+ {
130
+ systemPrompt: renderPromptTemplate(inspectImageSystemPromptTemplate),
131
+ messages: [
132
+ {
133
+ role: "user",
134
+ content: [
135
+ { type: "image", data: imageInput.data, mimeType: imageInput.mimeType },
136
+ { type: "text", text: params.question },
137
+ ],
138
+ timestamp: Date.now(),
139
+ },
140
+ ],
141
+ },
142
+ { apiKey, signal },
143
+ );
144
+
145
+ if (response.stopReason === "error") {
146
+ throw new ToolError(response.errorMessage ?? "inspect_image request failed.");
147
+ }
148
+ if (response.stopReason === "aborted") {
149
+ throw new ToolError("inspect_image request aborted.");
150
+ }
151
+
152
+ const text = extractResponseText(response);
153
+ if (!text) {
154
+ throw new ToolError("inspect_image model returned no text output.");
155
+ }
156
+
157
+ return {
158
+ content: [{ type: "text", text }],
159
+ details: {
160
+ model: `${model.provider}/${model.id}`,
161
+ imagePath: imageInput.resolvedPath,
162
+ mimeType: imageInput.mimeType,
163
+ },
164
+ };
165
+ }
166
+ }
167
+
168
+ export { inspectImageToolRenderer } from "./inspect-image-renderer";