@oh-my-pi/pi-coding-agent 13.9.12 → 13.9.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +7 -7
- package/src/cli/args.ts +17 -16
- package/src/config/model-registry.ts +14 -4
- package/src/config/settings-schema.ts +9 -0
- package/src/modes/controllers/event-controller.ts +12 -9
- package/src/modes/controllers/input-controller.ts +13 -0
- package/src/modes/interactive-mode.ts +2 -0
- package/src/modes/types.ts +1 -0
- package/src/modes/utils/ui-helpers.ts +12 -0
- package/src/prompts/system/system-prompt.md +97 -107
- package/src/prompts/tools/ast-edit.md +5 -2
- package/src/prompts/tools/ast-grep.md +5 -2
- package/src/prompts/tools/inspect-image-system.md +20 -0
- package/src/prompts/tools/inspect-image.md +32 -0
- package/src/session/compaction/compaction.ts +26 -29
- package/src/session/session-manager.ts +10 -7
- package/src/tools/index.ts +4 -0
- package/src/tools/inspect-image-renderer.ts +103 -0
- package/src/tools/inspect-image.ts +168 -0
- package/src/tools/read.ts +62 -49
- package/src/tools/renderers.ts +2 -0
- package/src/utils/image-input.ts +264 -0
|
@@ -8,11 +8,12 @@ Performs structural code search using AST matching via native ast-grep.
|
|
|
8
8
|
- Multiple patterns run in one native pass; results are merged and then `offset`/`limit` are applied to the combined match set
|
|
9
9
|
- Use `sel` only for contextual pattern mode; otherwise provide direct patterns
|
|
10
10
|
- In contextual pattern mode, results are returned for the selected node (`sel`), not the outer wrapper used to make the pattern parse
|
|
11
|
-
- For variadic arguments
|
|
11
|
+
- For variadic captures (arguments, fields, statement lists), use `$$$NAME` (not `$$NAME`)
|
|
12
12
|
- Patterns must parse as a single valid AST node for the target language; if a bare pattern fails, wrap it in valid context or use `sel`
|
|
13
|
+
- If ast-grep reports `Multiple AST nodes are detected`, your pattern is not a single parseable node; wrap method snippets in valid context (for example `class $_ { … }`) and use `sel` to target the inner node
|
|
13
14
|
- Patterns match AST structure, not text — whitespace/formatting differences are ignored
|
|
14
15
|
- When the same metavariable appears multiple times, all occurrences must match identical code
|
|
15
|
-
- For TypeScript declarations and methods, prefer shapes that tolerate annotations you do not care about, e.g. `async function $NAME($$$ARGS): $_ { $$$BODY }` or `class $_ { method(
|
|
16
|
+
- For TypeScript declarations and methods, prefer shapes that tolerate annotations you do not care about, e.g. `async function $NAME($$$ARGS): $_ { $$$BODY }` or `class $_ { method($ARG: $_): $_ { $$$BODY } }` instead of omitting annotations entirely
|
|
16
17
|
- Metavariables must be the sole content of an AST node; partial-text metavariables like `prefix$VAR`, `"hello $NAME"`, or `a $OP b` do NOT work — match the whole node instead
|
|
17
18
|
- `$$$` captures are lazy (non-greedy): they stop when the next element in the pattern can match; place the most specific node after `$$$` to control where capture ends
|
|
18
19
|
- `$_` is a non-capturing wildcard (matches any single node without binding); use it when you need to tolerate a node but don't need its value
|
|
@@ -38,6 +39,8 @@ Performs structural code search using AST matching via native ast-grep.
|
|
|
38
39
|
`{"pat":["foo()"],"sel":"identifier","lang":"typescript","path":"src/utils.ts"}`
|
|
39
40
|
- Match a TypeScript function declaration without caring about its exact return type:
|
|
40
41
|
`{"pat":["async function processItems($$$ARGS): $_ { $$$BODY }"],"sel":"function_declaration","lang":"typescript","path":"src/worker.ts"}`
|
|
42
|
+
- Match a TypeScript method body fragment by wrapping it in parseable context and selecting the method node:
|
|
43
|
+
`{"pat":["class $_ { async execute($INPUT: $_) { $$$BEFORE; const $PARSED = $_.parse($INPUT); $$$AFTER } }"],"sel":"method_definition","lang":"typescript","path":"src/tools/todo.ts"}`
|
|
41
44
|
- Loosest existence check for a symbol in one file:
|
|
42
45
|
`{"pat":["processItems"],"sel":"identifier","lang":"typescript","path":"src/worker.ts"}`
|
|
43
46
|
</examples>
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
You are an image-analysis assistant.
|
|
2
|
+
|
|
3
|
+
Core behavior:
|
|
4
|
+
- Be evidence-first: distinguish direct observations from inferences.
|
|
5
|
+
- If something is unclear, say uncertain rather than guessing.
|
|
6
|
+
- Do not fabricate unreadable or occluded details.
|
|
7
|
+
- Keep output compact and useful.
|
|
8
|
+
|
|
9
|
+
Default output format (unless the requested question asks for another format):
|
|
10
|
+
1) Answer
|
|
11
|
+
2) Key evidence
|
|
12
|
+
3) Caveats / uncertainty
|
|
13
|
+
|
|
14
|
+
For OCR-style requests:
|
|
15
|
+
- Preserve exact visible text, including casing and punctuation.
|
|
16
|
+
- If text is partially unreadable, mark the unreadable segments explicitly.
|
|
17
|
+
|
|
18
|
+
For UI/screenshot debugging requests:
|
|
19
|
+
- Focus on visible states, labels, toggles, error messages, disabled controls, and relevant affordances.
|
|
20
|
+
- Separate observed UI state from probable root cause.
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
Inspects an image file with a vision-capable model and returns compact text analysis.
|
|
2
|
+
|
|
3
|
+
<instruction>
|
|
4
|
+
- Use this for image understanding tasks (OCR, UI/screenshot debugging, scene/object questions)
|
|
5
|
+
- Provide `path` to the local image file
|
|
6
|
+
- Write a specific `question`:
|
|
7
|
+
- what to inspect
|
|
8
|
+
- constraints (for example: "quote visible text verbatim", "only report confirmed findings")
|
|
9
|
+
- desired output format (bullets/table/JSON/short answer)
|
|
10
|
+
- Keep `question` grounded in observable evidence and ask for uncertainty when details are unclear
|
|
11
|
+
- Use this tool over `read` when the goal is image analysis
|
|
12
|
+
</instruction>
|
|
13
|
+
|
|
14
|
+
<examples>
|
|
15
|
+
- OCR with strict formatting:
|
|
16
|
+
- `{"path":"screenshots/error.png","question":"Extract all visible text verbatim. Return as bullet list in reading order."}`
|
|
17
|
+
- Screenshot debugging:
|
|
18
|
+
- `{"path":"screenshots/settings.png","question":"Identify the likely cause of the disabled Save button. Return: (1) observations, (2) likely cause, (3) confidence."}`
|
|
19
|
+
- Scene/object question:
|
|
20
|
+
- `{"path":"photos/shelf.jpg","question":"List all clearly visible product labels and their shelf positions (top/middle/bottom). If unreadable, say unreadable."}`
|
|
21
|
+
</examples>
|
|
22
|
+
|
|
23
|
+
<output>
|
|
24
|
+
- Returns text-only analysis from the vision model
|
|
25
|
+
- No image content blocks are returned in tool output
|
|
26
|
+
</output>
|
|
27
|
+
|
|
28
|
+
<critical>
|
|
29
|
+
- Parameters are strict: only `path` and `question` are allowed
|
|
30
|
+
- If image submission is blocked by settings, the tool will fail with an actionable error
|
|
31
|
+
- If configured model does not support image input, configure a vision-capable model role before retrying
|
|
32
|
+
</critical>
|
|
@@ -13,7 +13,11 @@ import {
|
|
|
13
13
|
OPENAI_HEADERS,
|
|
14
14
|
} from "@oh-my-pi/pi-ai/providers/openai-codex/constants";
|
|
15
15
|
import { transformMessages } from "@oh-my-pi/pi-ai/providers/transform-messages";
|
|
16
|
-
import {
|
|
16
|
+
import {
|
|
17
|
+
getOpenAIResponsesHistoryItems,
|
|
18
|
+
getOpenAIResponsesHistoryPayload,
|
|
19
|
+
normalizeResponsesToolCallId,
|
|
20
|
+
} from "@oh-my-pi/pi-ai/utils";
|
|
17
21
|
import { logger } from "@oh-my-pi/pi-utils";
|
|
18
22
|
import { renderPromptTemplate } from "../../config/prompt-templates";
|
|
19
23
|
import compactionShortSummaryPrompt from "../../prompts/compaction/compaction-short-summary.md" with { type: "text" };
|
|
@@ -473,6 +477,7 @@ type OpenAiRemoteCompactionItem = {
|
|
|
473
477
|
};
|
|
474
478
|
|
|
475
479
|
interface OpenAiRemoteCompactionPreserveData {
|
|
480
|
+
provider?: string;
|
|
476
481
|
replacementHistory: Array<Record<string, unknown>>;
|
|
477
482
|
compactionItem: OpenAiRemoteCompactionItem;
|
|
478
483
|
}
|
|
@@ -523,7 +528,7 @@ function getPreservedOpenAiRemoteCompactionData(
|
|
|
523
528
|
): OpenAiRemoteCompactionPreserveData | undefined {
|
|
524
529
|
const candidate = preserveData?.[OPENAI_REMOTE_COMPACTION_PRESERVE_KEY];
|
|
525
530
|
if (!candidate || typeof candidate !== "object") return undefined;
|
|
526
|
-
const maybeData = candidate as { replacementHistory?: unknown; compactionItem?: unknown };
|
|
531
|
+
const maybeData = candidate as { provider?: unknown; replacementHistory?: unknown; compactionItem?: unknown };
|
|
527
532
|
if (!Array.isArray(maybeData.replacementHistory)) return undefined;
|
|
528
533
|
const maybeItem = maybeData.compactionItem;
|
|
529
534
|
if (!maybeItem || typeof maybeItem !== "object") return undefined;
|
|
@@ -535,6 +540,7 @@ function getPreservedOpenAiRemoteCompactionData(
|
|
|
535
540
|
return undefined;
|
|
536
541
|
}
|
|
537
542
|
return {
|
|
543
|
+
provider: typeof maybeData.provider === "string" ? maybeData.provider : undefined,
|
|
538
544
|
replacementHistory: maybeData.replacementHistory as Array<Record<string, unknown>>,
|
|
539
545
|
compactionItem: compactionItem as unknown as OpenAiRemoteCompactionItem,
|
|
540
546
|
};
|
|
@@ -634,15 +640,6 @@ function trimOpenAiCompactInput(
|
|
|
634
640
|
return trimmed;
|
|
635
641
|
}
|
|
636
642
|
|
|
637
|
-
function getOpenAIResponsesHistoryItems(
|
|
638
|
-
providerPayload: { type?: string; items?: unknown } | undefined,
|
|
639
|
-
): Array<Record<string, unknown>> | undefined {
|
|
640
|
-
if (providerPayload?.type !== "openaiResponsesHistory" || !Array.isArray(providerPayload.items)) {
|
|
641
|
-
return undefined;
|
|
642
|
-
}
|
|
643
|
-
return providerPayload.items as Array<Record<string, unknown>>;
|
|
644
|
-
}
|
|
645
|
-
|
|
646
643
|
function collectKnownOpenAiCallIds(items: Array<Record<string, unknown>>): Set<string> {
|
|
647
644
|
const knownCallIds = new Set<string>();
|
|
648
645
|
for (const item of items) {
|
|
@@ -667,8 +664,8 @@ function buildOpenAiNativeHistory(
|
|
|
667
664
|
let knownCallIds = collectKnownOpenAiCallIds(input);
|
|
668
665
|
for (const message of transformedMessages) {
|
|
669
666
|
if (message.role === "user" || message.role === "developer") {
|
|
670
|
-
const providerPayload = (message as { providerPayload?:
|
|
671
|
-
const historyItems = getOpenAIResponsesHistoryItems(providerPayload);
|
|
667
|
+
const providerPayload = (message as { providerPayload?: AssistantMessage["providerPayload"] }).providerPayload;
|
|
668
|
+
const historyItems = getOpenAIResponsesHistoryItems(providerPayload, model.provider);
|
|
672
669
|
if (historyItems) {
|
|
673
670
|
input.push(...historyItems);
|
|
674
671
|
knownCallIds = collectKnownOpenAiCallIds(input);
|
|
@@ -705,22 +702,22 @@ function buildOpenAiNativeHistory(
|
|
|
705
702
|
}
|
|
706
703
|
|
|
707
704
|
if (message.role === "assistant") {
|
|
708
|
-
const
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
705
|
+
const assistant = message as AssistantMessage;
|
|
706
|
+
const providerPayload = getOpenAIResponsesHistoryPayload(
|
|
707
|
+
assistant.providerPayload,
|
|
708
|
+
model.provider,
|
|
709
|
+
assistant.provider,
|
|
710
|
+
);
|
|
711
|
+
if (providerPayload) {
|
|
712
|
+
if (providerPayload.dt) {
|
|
713
|
+
input.push(...providerPayload.items);
|
|
715
714
|
} else {
|
|
716
|
-
input.splice(0, input.length, ...
|
|
715
|
+
input.splice(0, input.length, ...providerPayload.items);
|
|
717
716
|
}
|
|
718
717
|
knownCallIds = collectKnownOpenAiCallIds(input);
|
|
719
718
|
msgIndex++;
|
|
720
719
|
continue;
|
|
721
720
|
}
|
|
722
|
-
|
|
723
|
-
const assistant = message as AssistantMessage;
|
|
724
721
|
const isDifferentModel =
|
|
725
722
|
assistant.model !== model.id && assistant.provider === model.provider && assistant.api === model.api;
|
|
726
723
|
|
|
@@ -889,7 +886,7 @@ async function requestOpenAiRemoteCompaction(
|
|
|
889
886
|
});
|
|
890
887
|
throw new Error("Remote compaction response missing compaction item");
|
|
891
888
|
}
|
|
892
|
-
return { replacementHistory, compactionItem };
|
|
889
|
+
return { provider: model.provider, replacementHistory, compactionItem };
|
|
893
890
|
}
|
|
894
891
|
|
|
895
892
|
interface RemoteCompactionRequest {
|
|
@@ -1224,11 +1221,11 @@ export async function compact(
|
|
|
1224
1221
|
if (settings.remoteEnabled !== false && shouldUseOpenAiRemoteCompaction(model)) {
|
|
1225
1222
|
const previousRemoteCompaction = getPreservedOpenAiRemoteCompactionData(previousPreserveData);
|
|
1226
1223
|
const remoteMessages = [...messagesToSummarize, ...turnPrefixMessages, ...recentMessages];
|
|
1227
|
-
const
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
);
|
|
1224
|
+
const previousReplacementHistory =
|
|
1225
|
+
previousRemoteCompaction?.provider === model.provider
|
|
1226
|
+
? previousRemoteCompaction.replacementHistory
|
|
1227
|
+
: undefined;
|
|
1228
|
+
const remoteHistory = buildOpenAiNativeHistory(remoteMessages, model, previousReplacementHistory);
|
|
1232
1229
|
if (remoteHistory.length > 0) {
|
|
1233
1230
|
try {
|
|
1234
1231
|
const remote = await requestOpenAiRemoteCompaction(
|
|
@@ -532,16 +532,19 @@ export function buildSessionContext(
|
|
|
532
532
|
};
|
|
533
533
|
|
|
534
534
|
if (compaction) {
|
|
535
|
-
const
|
|
535
|
+
const providerPayload: ProviderPayload | undefined = (() => {
|
|
536
536
|
const candidate = compaction.preserveData?.openaiRemoteCompaction;
|
|
537
537
|
if (!candidate || typeof candidate !== "object") return undefined;
|
|
538
|
-
const
|
|
539
|
-
if (
|
|
540
|
-
|
|
538
|
+
const remote = candidate as { provider?: unknown; replacementHistory?: unknown };
|
|
539
|
+
if (typeof remote.provider !== "string" || remote.provider.length === 0) return undefined;
|
|
540
|
+
if (!Array.isArray(remote.replacementHistory)) return undefined;
|
|
541
|
+
return {
|
|
542
|
+
type: "openaiResponsesHistory",
|
|
543
|
+
provider: remote.provider,
|
|
544
|
+
items: remote.replacementHistory as Array<Record<string, unknown>>,
|
|
545
|
+
};
|
|
541
546
|
})();
|
|
542
|
-
const
|
|
543
|
-
? { type: "openaiResponsesHistory", items: remoteReplacementHistory }
|
|
544
|
-
: undefined;
|
|
547
|
+
const remoteReplacementHistory = providerPayload?.items;
|
|
545
548
|
|
|
546
549
|
// Emit summary first
|
|
547
550
|
messages.push(
|
package/src/tools/index.ts
CHANGED
|
@@ -27,6 +27,7 @@ import { ExitPlanModeTool } from "./exit-plan-mode";
|
|
|
27
27
|
import { FetchTool } from "./fetch";
|
|
28
28
|
import { FindTool } from "./find";
|
|
29
29
|
import { GrepTool } from "./grep";
|
|
30
|
+
import { InspectImageTool } from "./inspect-image";
|
|
30
31
|
import { NotebookTool } from "./notebook";
|
|
31
32
|
import { wrapToolWithMetaNotice } from "./output-meta";
|
|
32
33
|
import { PythonTool } from "./python";
|
|
@@ -62,6 +63,7 @@ export * from "./fetch";
|
|
|
62
63
|
export * from "./find";
|
|
63
64
|
export * from "./gemini-image";
|
|
64
65
|
export * from "./grep";
|
|
66
|
+
export * from "./inspect-image";
|
|
65
67
|
export * from "./notebook";
|
|
66
68
|
export * from "./pending-action";
|
|
67
69
|
export * from "./python";
|
|
@@ -170,6 +172,7 @@ export const BUILTIN_TOOLS: Record<string, ToolFactory> = {
|
|
|
170
172
|
lsp: LspTool.createIf,
|
|
171
173
|
notebook: s => new NotebookTool(s),
|
|
172
174
|
read: s => new ReadTool(s),
|
|
175
|
+
inspect_image: s => new InspectImageTool(s),
|
|
173
176
|
browser: s => new BrowserTool(s),
|
|
174
177
|
checkpoint: CheckpointTool.createIf,
|
|
175
178
|
rewind: RewindTool.createIf,
|
|
@@ -313,6 +316,7 @@ export async function createTools(session: ToolSession, toolNames?: string[]): P
|
|
|
313
316
|
if (name === "ast_edit") return session.settings.get("astEdit.enabled");
|
|
314
317
|
if (name === "render_mermaid") return session.settings.get("renderMermaid.enabled");
|
|
315
318
|
if (name === "notebook") return session.settings.get("notebook.enabled");
|
|
319
|
+
if (name === "inspect_image") return session.settings.get("inspect_image.enabled");
|
|
316
320
|
if (name === "fetch") return session.settings.get("fetch.enabled");
|
|
317
321
|
if (name === "web_search") return session.settings.get("web_search.enabled");
|
|
318
322
|
if (name === "lsp") return session.settings.get("lsp.enabled");
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import type { Component } from "@oh-my-pi/pi-tui";
|
|
2
|
+
import { Text } from "@oh-my-pi/pi-tui";
|
|
3
|
+
import type { RenderResultOptions } from "../extensibility/custom-tools/types";
|
|
4
|
+
import type { Theme } from "../modes/theme/theme";
|
|
5
|
+
import { renderStatusLine } from "../tui";
|
|
6
|
+
import { formatExpandHint, replaceTabs, shortenPath, truncateToWidth } from "./render-utils";
|
|
7
|
+
|
|
8
|
+
interface InspectImageRenderArgs {
|
|
9
|
+
path?: string;
|
|
10
|
+
question?: string;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
interface InspectImageRendererDetails {
|
|
14
|
+
model: string;
|
|
15
|
+
imagePath: string;
|
|
16
|
+
mimeType: string;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
interface InspectImageRendererResult {
|
|
20
|
+
content: Array<{ type: string; text?: string }>;
|
|
21
|
+
details?: InspectImageRendererDetails;
|
|
22
|
+
isError?: boolean;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
const INSPECT_QUESTION_PREVIEW_WIDTH = 100;
|
|
26
|
+
const INSPECT_OUTPUT_COLLAPSED_LINES = 4;
|
|
27
|
+
const INSPECT_OUTPUT_EXPANDED_LINES = 16;
|
|
28
|
+
const INSPECT_OUTPUT_LINE_WIDTH = 120;
|
|
29
|
+
|
|
30
|
+
export const inspectImageToolRenderer = {
|
|
31
|
+
renderCall(args: InspectImageRenderArgs, _options: RenderResultOptions, uiTheme: Theme): Component {
|
|
32
|
+
const rawPath = args.path ?? "";
|
|
33
|
+
const pathDisplay = rawPath ? shortenPath(rawPath) : "…";
|
|
34
|
+
const header = renderStatusLine({ icon: "pending", title: "Inspect Image", description: pathDisplay }, uiTheme);
|
|
35
|
+
const question = args.question?.trim();
|
|
36
|
+
if (!question) {
|
|
37
|
+
return new Text(header, 0, 0);
|
|
38
|
+
}
|
|
39
|
+
const questionLine = ` ${uiTheme.fg("dim", uiTheme.tree.last)} ${uiTheme.fg("dim", "Question:")} ${uiTheme.fg("accent", truncateToWidth(replaceTabs(question), INSPECT_QUESTION_PREVIEW_WIDTH))}`;
|
|
40
|
+
return new Text(`${header}\n${questionLine}`, 0, 0);
|
|
41
|
+
},
|
|
42
|
+
|
|
43
|
+
renderResult(
|
|
44
|
+
result: InspectImageRendererResult,
|
|
45
|
+
options: RenderResultOptions,
|
|
46
|
+
uiTheme: Theme,
|
|
47
|
+
args?: InspectImageRenderArgs,
|
|
48
|
+
): Component {
|
|
49
|
+
const details = result.details;
|
|
50
|
+
const rawPath = details?.imagePath ?? args?.path ?? "";
|
|
51
|
+
const pathDisplay = rawPath ? shortenPath(rawPath) : "image";
|
|
52
|
+
const metaParts: string[] = [];
|
|
53
|
+
if (details?.model) metaParts.push(details.model);
|
|
54
|
+
if (details?.mimeType) metaParts.push(details.mimeType);
|
|
55
|
+
const header = renderStatusLine(
|
|
56
|
+
{
|
|
57
|
+
icon: result.isError ? "error" : "success",
|
|
58
|
+
title: "Inspect Image",
|
|
59
|
+
description: pathDisplay,
|
|
60
|
+
},
|
|
61
|
+
uiTheme,
|
|
62
|
+
);
|
|
63
|
+
|
|
64
|
+
const lines: string[] = [header];
|
|
65
|
+
const question = args?.question?.trim();
|
|
66
|
+
if (question) {
|
|
67
|
+
lines.push(
|
|
68
|
+
` ${uiTheme.fg("dim", uiTheme.tree.branch)} ${uiTheme.fg("dim", "Question:")} ${uiTheme.fg("accent", truncateToWidth(replaceTabs(question), INSPECT_QUESTION_PREVIEW_WIDTH))}`,
|
|
69
|
+
);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
const outputText = result.content.find(content => content.type === "text")?.text?.trimEnd() ?? "";
|
|
73
|
+
if (!outputText) {
|
|
74
|
+
lines.push(uiTheme.fg("dim", "(no output)"));
|
|
75
|
+
if (metaParts.length > 0) {
|
|
76
|
+
lines.push("");
|
|
77
|
+
lines.push(uiTheme.fg("dim", metaParts.join(" · ")));
|
|
78
|
+
}
|
|
79
|
+
return new Text(lines.join("\n"), 0, 0);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
lines.push("");
|
|
83
|
+
const outputLines = replaceTabs(outputText).split("\n");
|
|
84
|
+
const maxLines = options.expanded ? INSPECT_OUTPUT_EXPANDED_LINES : INSPECT_OUTPUT_COLLAPSED_LINES;
|
|
85
|
+
for (const line of outputLines.slice(0, maxLines)) {
|
|
86
|
+
lines.push(uiTheme.fg("toolOutput", truncateToWidth(line, INSPECT_OUTPUT_LINE_WIDTH)));
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
if (outputLines.length > maxLines) {
|
|
90
|
+
const remaining = outputLines.length - maxLines;
|
|
91
|
+
const hint = formatExpandHint(uiTheme, options.expanded, true);
|
|
92
|
+
lines.push(`${uiTheme.fg("dim", `… ${remaining} more lines`)}${hint ? ` ${hint}` : ""}`);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
if (metaParts.length > 0) {
|
|
96
|
+
lines.push("");
|
|
97
|
+
lines.push(uiTheme.fg("dim", metaParts.join(" · ")));
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
return new Text(lines.join("\n"), 0, 0);
|
|
101
|
+
},
|
|
102
|
+
mergeCallAndResult: true,
|
|
103
|
+
};
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
import type { AgentTool, AgentToolContext, AgentToolResult, AgentToolUpdateCallback } from "@oh-my-pi/pi-agent-core";
|
|
2
|
+
import { type Api, type AssistantMessage, completeSimple, type Model } from "@oh-my-pi/pi-ai";
|
|
3
|
+
import { type Static, Type } from "@sinclair/typebox";
|
|
4
|
+
import { expandRoleAlias, resolveModelFromString } from "../config/model-resolver";
|
|
5
|
+
import { renderPromptTemplate } from "../config/prompt-templates";
|
|
6
|
+
import inspectImageDescription from "../prompts/tools/inspect-image.md" with { type: "text" };
|
|
7
|
+
import inspectImageSystemPromptTemplate from "../prompts/tools/inspect-image-system.md" with { type: "text" };
|
|
8
|
+
import {
|
|
9
|
+
ImageInputTooLargeError,
|
|
10
|
+
type LoadedImageInput,
|
|
11
|
+
loadImageInput,
|
|
12
|
+
MAX_IMAGE_INPUT_BYTES,
|
|
13
|
+
} from "../utils/image-input";
|
|
14
|
+
import type { ToolSession } from "./index";
|
|
15
|
+
import { ToolError } from "./tool-errors";
|
|
16
|
+
|
|
17
|
+
const inspectImageSchema = Type.Object(
|
|
18
|
+
{
|
|
19
|
+
path: Type.String({ description: "Filesystem path to an image" }),
|
|
20
|
+
question: Type.String({ description: "Question to ask about the image" }),
|
|
21
|
+
},
|
|
22
|
+
{ additionalProperties: false },
|
|
23
|
+
);
|
|
24
|
+
|
|
25
|
+
export type InspectImageParams = Static<typeof inspectImageSchema>;
|
|
26
|
+
|
|
27
|
+
export interface InspectImageToolDetails {
|
|
28
|
+
model: string;
|
|
29
|
+
imagePath: string;
|
|
30
|
+
mimeType: string;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function extractResponseText(message: AssistantMessage): string {
|
|
34
|
+
return message.content
|
|
35
|
+
.filter(content => content.type === "text")
|
|
36
|
+
.map(content => content.text)
|
|
37
|
+
.join("")
|
|
38
|
+
.trim();
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export class InspectImageTool implements AgentTool<typeof inspectImageSchema, InspectImageToolDetails> {
|
|
42
|
+
readonly name = "inspect_image";
|
|
43
|
+
readonly label = "InspectImage";
|
|
44
|
+
readonly description: string;
|
|
45
|
+
readonly parameters = inspectImageSchema;
|
|
46
|
+
readonly strict = true;
|
|
47
|
+
|
|
48
|
+
constructor(
|
|
49
|
+
private readonly session: ToolSession,
|
|
50
|
+
private readonly completeImageRequest: typeof completeSimple = completeSimple,
|
|
51
|
+
) {
|
|
52
|
+
this.description = renderPromptTemplate(inspectImageDescription);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
async execute(
|
|
56
|
+
_toolCallId: string,
|
|
57
|
+
params: InspectImageParams,
|
|
58
|
+
signal?: AbortSignal,
|
|
59
|
+
_onUpdate?: AgentToolUpdateCallback<InspectImageToolDetails>,
|
|
60
|
+
_context?: AgentToolContext,
|
|
61
|
+
): Promise<AgentToolResult<InspectImageToolDetails>> {
|
|
62
|
+
if (this.session.settings.get("images.blockImages")) {
|
|
63
|
+
throw new ToolError(
|
|
64
|
+
"Image submission is disabled by settings (images.blockImages=true). Disable it to use inspect_image.",
|
|
65
|
+
);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const modelRegistry = this.session.modelRegistry;
|
|
69
|
+
if (!modelRegistry) {
|
|
70
|
+
throw new ToolError("Model registry is unavailable for inspect_image.");
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
const availableModels = modelRegistry.getAvailable();
|
|
74
|
+
if (availableModels.length === 0) {
|
|
75
|
+
throw new ToolError("No models available for inspect_image.");
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const matchPreferences = { usageOrder: this.session.settings.getStorage()?.getModelUsageOrder() };
|
|
79
|
+
const resolvePattern = (pattern: string | undefined): Model<Api> | undefined => {
|
|
80
|
+
if (!pattern) return undefined;
|
|
81
|
+
const expanded = expandRoleAlias(pattern, this.session.settings);
|
|
82
|
+
return resolveModelFromString(expanded, availableModels, matchPreferences);
|
|
83
|
+
};
|
|
84
|
+
|
|
85
|
+
const activeModelPattern = this.session.getActiveModelString?.() ?? this.session.getModelString?.();
|
|
86
|
+
const model =
|
|
87
|
+
resolvePattern("pi/vision") ??
|
|
88
|
+
resolvePattern("pi/default") ??
|
|
89
|
+
resolvePattern(activeModelPattern) ??
|
|
90
|
+
availableModels[0];
|
|
91
|
+
if (!model) {
|
|
92
|
+
throw new ToolError("Unable to resolve a model for inspect_image.");
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
if (!model.input.includes("image")) {
|
|
96
|
+
throw new ToolError(
|
|
97
|
+
`Resolved model ${model.provider}/${model.id} does not support image input. Configure a vision-capable model for modelRoles.vision.`,
|
|
98
|
+
);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
const apiKey = await modelRegistry.getApiKey(model);
|
|
102
|
+
if (!apiKey) {
|
|
103
|
+
throw new ToolError(
|
|
104
|
+
`No API key available for ${model.provider}/${model.id}. Configure credentials for this provider or choose another vision-capable model.`,
|
|
105
|
+
);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
let imageInput: LoadedImageInput | null;
|
|
109
|
+
try {
|
|
110
|
+
imageInput = await loadImageInput({
|
|
111
|
+
path: params.path,
|
|
112
|
+
cwd: this.session.cwd,
|
|
113
|
+
autoResize: this.session.settings.get("images.autoResize"),
|
|
114
|
+
maxBytes: MAX_IMAGE_INPUT_BYTES,
|
|
115
|
+
});
|
|
116
|
+
} catch (error) {
|
|
117
|
+
if (error instanceof ImageInputTooLargeError) {
|
|
118
|
+
throw new ToolError(error.message);
|
|
119
|
+
}
|
|
120
|
+
throw error;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
if (!imageInput) {
|
|
124
|
+
throw new ToolError("inspect_image only supports PNG, JPEG, GIF, and WEBP files detected by file content.");
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
const response = await this.completeImageRequest(
|
|
128
|
+
model,
|
|
129
|
+
{
|
|
130
|
+
systemPrompt: renderPromptTemplate(inspectImageSystemPromptTemplate),
|
|
131
|
+
messages: [
|
|
132
|
+
{
|
|
133
|
+
role: "user",
|
|
134
|
+
content: [
|
|
135
|
+
{ type: "image", data: imageInput.data, mimeType: imageInput.mimeType },
|
|
136
|
+
{ type: "text", text: params.question },
|
|
137
|
+
],
|
|
138
|
+
timestamp: Date.now(),
|
|
139
|
+
},
|
|
140
|
+
],
|
|
141
|
+
},
|
|
142
|
+
{ apiKey, signal },
|
|
143
|
+
);
|
|
144
|
+
|
|
145
|
+
if (response.stopReason === "error") {
|
|
146
|
+
throw new ToolError(response.errorMessage ?? "inspect_image request failed.");
|
|
147
|
+
}
|
|
148
|
+
if (response.stopReason === "aborted") {
|
|
149
|
+
throw new ToolError("inspect_image request aborted.");
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
const text = extractResponseText(response);
|
|
153
|
+
if (!text) {
|
|
154
|
+
throw new ToolError("inspect_image model returned no text output.");
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
return {
|
|
158
|
+
content: [{ type: "text", text }],
|
|
159
|
+
details: {
|
|
160
|
+
model: `${model.provider}/${model.id}`,
|
|
161
|
+
imagePath: imageInput.resolvedPath,
|
|
162
|
+
mimeType: imageInput.mimeType,
|
|
163
|
+
},
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
export { inspectImageToolRenderer } from "./inspect-image-renderer";
|