@oh-my-pi/pi-coding-agent 3.20.1 → 3.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. package/CHANGELOG.md +107 -8
  2. package/docs/custom-tools.md +3 -3
  3. package/docs/extensions.md +226 -220
  4. package/docs/hooks.md +2 -2
  5. package/docs/sdk.md +50 -53
  6. package/examples/custom-tools/README.md +2 -17
  7. package/examples/extensions/README.md +76 -74
  8. package/examples/extensions/todo.ts +2 -5
  9. package/examples/hooks/custom-compaction.ts +2 -4
  10. package/examples/hooks/handoff.ts +1 -1
  11. package/examples/hooks/qna.ts +1 -1
  12. package/examples/sdk/02-custom-model.ts +1 -1
  13. package/examples/sdk/README.md +7 -11
  14. package/package.json +6 -6
  15. package/src/cli/args.ts +9 -6
  16. package/src/cli/file-processor.ts +1 -1
  17. package/src/cli/list-models.ts +1 -1
  18. package/src/core/agent-session.ts +16 -5
  19. package/src/core/auth-storage.ts +1 -1
  20. package/src/core/compaction/branch-summarization.ts +2 -2
  21. package/src/core/compaction/compaction.ts +2 -2
  22. package/src/core/compaction/utils.ts +1 -1
  23. package/src/core/custom-tools/types.ts +1 -1
  24. package/src/core/custom-tools/wrapper.ts +0 -1
  25. package/src/core/extensions/index.ts +1 -6
  26. package/src/core/extensions/runner.ts +1 -1
  27. package/src/core/extensions/types.ts +1 -1
  28. package/src/core/extensions/wrapper.ts +1 -8
  29. package/src/core/file-mentions.ts +5 -8
  30. package/src/core/hooks/runner.ts +2 -2
  31. package/src/core/hooks/types.ts +1 -1
  32. package/src/core/messages.ts +1 -1
  33. package/src/core/model-registry.ts +1 -1
  34. package/src/core/model-resolver.ts +1 -1
  35. package/src/core/sdk.ts +64 -105
  36. package/src/core/session-manager.ts +18 -22
  37. package/src/core/settings-manager.ts +66 -1
  38. package/src/core/slash-commands.ts +12 -5
  39. package/src/core/system-prompt.ts +49 -36
  40. package/src/core/title-generator.ts +2 -2
  41. package/src/core/tools/ask.ts +98 -4
  42. package/src/core/tools/bash-interceptor.ts +11 -4
  43. package/src/core/tools/bash.ts +121 -5
  44. package/src/core/tools/context.ts +7 -0
  45. package/src/core/tools/edit-diff.ts +73 -24
  46. package/src/core/tools/edit.ts +221 -34
  47. package/src/core/tools/exa/render.ts +4 -16
  48. package/src/core/tools/find.ts +149 -5
  49. package/src/core/tools/gemini-image.ts +279 -56
  50. package/src/core/tools/git.ts +17 -3
  51. package/src/core/tools/grep.ts +185 -5
  52. package/src/core/tools/index.test.ts +180 -0
  53. package/src/core/tools/index.ts +96 -242
  54. package/src/core/tools/ls.ts +133 -5
  55. package/src/core/tools/lsp/index.ts +32 -29
  56. package/src/core/tools/lsp/render.ts +21 -22
  57. package/src/core/tools/notebook.ts +112 -4
  58. package/src/core/tools/output.ts +175 -15
  59. package/src/core/tools/read.ts +127 -25
  60. package/src/core/tools/render-utils.ts +241 -0
  61. package/src/core/tools/renderers.ts +40 -828
  62. package/src/core/tools/review.ts +26 -25
  63. package/src/core/tools/rulebook.ts +11 -3
  64. package/src/core/tools/task/agents.ts +28 -7
  65. package/src/core/tools/task/discovery.ts +0 -6
  66. package/src/core/tools/task/executor.ts +264 -254
  67. package/src/core/tools/task/index.ts +48 -208
  68. package/src/core/tools/task/render.ts +26 -11
  69. package/src/core/tools/task/types.ts +7 -12
  70. package/src/core/tools/task/worker-protocol.ts +17 -0
  71. package/src/core/tools/task/worker.ts +238 -0
  72. package/src/core/tools/truncate.ts +27 -1
  73. package/src/core/tools/web-fetch.ts +25 -49
  74. package/src/core/tools/web-search/index.ts +132 -46
  75. package/src/core/tools/web-search/providers/anthropic.ts +7 -2
  76. package/src/core/tools/web-search/providers/exa.ts +2 -1
  77. package/src/core/tools/web-search/providers/perplexity.ts +6 -1
  78. package/src/core/tools/web-search/render.ts +6 -4
  79. package/src/core/tools/web-search/types.ts +13 -0
  80. package/src/core/tools/write.ts +96 -14
  81. package/src/core/voice.ts +1 -1
  82. package/src/discovery/helpers.test.ts +1 -1
  83. package/src/index.ts +5 -16
  84. package/src/main.ts +5 -5
  85. package/src/modes/interactive/components/assistant-message.ts +1 -1
  86. package/src/modes/interactive/components/custom-message.ts +1 -1
  87. package/src/modes/interactive/components/extensions/inspector-panel.ts +25 -22
  88. package/src/modes/interactive/components/extensions/state-manager.ts +12 -0
  89. package/src/modes/interactive/components/footer.ts +1 -1
  90. package/src/modes/interactive/components/hook-message.ts +1 -1
  91. package/src/modes/interactive/components/model-selector.ts +1 -1
  92. package/src/modes/interactive/components/oauth-selector.ts +1 -1
  93. package/src/modes/interactive/components/settings-defs.ts +49 -0
  94. package/src/modes/interactive/components/status-line.ts +1 -1
  95. package/src/modes/interactive/components/tool-execution.ts +93 -538
  96. package/src/modes/interactive/interactive-mode.ts +19 -7
  97. package/src/modes/interactive/theme/theme.ts +4 -4
  98. package/src/modes/print-mode.ts +1 -1
  99. package/src/modes/rpc/rpc-client.ts +1 -1
  100. package/src/modes/rpc/rpc-types.ts +1 -1
  101. package/src/prompts/system-prompt.md +4 -0
  102. package/src/prompts/task.md +0 -7
  103. package/src/prompts/tools/gemini-image.md +5 -1
  104. package/src/prompts/tools/output.md +6 -2
  105. package/src/prompts/tools/task.md +68 -0
  106. package/src/prompts/tools/web-fetch.md +1 -0
  107. package/src/prompts/tools/web-search.md +2 -0
  108. package/src/utils/image-convert.ts +8 -2
  109. package/src/utils/image-magick.ts +247 -0
  110. package/src/utils/image-resize.ts +53 -13
  111. package/examples/custom-tools/question/index.ts +0 -84
  112. package/examples/custom-tools/subagent/README.md +0 -172
  113. package/examples/custom-tools/subagent/agents/planner.md +0 -37
  114. package/examples/custom-tools/subagent/agents/scout.md +0 -50
  115. package/examples/custom-tools/subagent/agents/worker.md +0 -24
  116. package/examples/custom-tools/subagent/agents.ts +0 -156
  117. package/examples/custom-tools/subagent/commands/implement-and-review.md +0 -10
  118. package/examples/custom-tools/subagent/commands/implement.md +0 -10
  119. package/examples/custom-tools/subagent/commands/scout-and-plan.md +0 -9
  120. package/examples/custom-tools/subagent/index.ts +0 -1002
  121. package/examples/sdk/05-tools.ts +0 -94
  122. package/examples/sdk/12-full-control.ts +0 -95
  123. package/src/prompts/browser.md +0 -71
@@ -6,8 +6,8 @@
6
6
  import * as fs from "node:fs";
7
7
  import * as os from "node:os";
8
8
  import * as path from "node:path";
9
+ import type { AssistantMessage, ImageContent, Message, OAuthProvider } from "@mariozechner/pi-ai";
9
10
  import type { AgentMessage, ThinkingLevel } from "@oh-my-pi/pi-agent-core";
10
- import type { AssistantMessage, ImageContent, Message, OAuthProvider } from "@oh-my-pi/pi-ai";
11
11
  import type { SlashCommand } from "@oh-my-pi/pi-tui";
12
12
  import {
13
13
  CombinedAutocompleteProvider,
@@ -31,6 +31,7 @@ import { getRecentSessions, type SessionContext, SessionManager } from "../../co
31
31
  import { loadSlashCommands } from "../../core/slash-commands";
32
32
  import { detectNotificationProtocol, isNotificationSuppressed, sendNotification } from "../../core/terminal-notify";
33
33
  import { generateSessionTitle, setTerminalTitle } from "../../core/title-generator";
34
+ import { setPreferredImageProvider, setPreferredWebSearchProvider } from "../../core/tools/index";
34
35
  import type { TruncationResult } from "../../core/tools/truncate";
35
36
  import { VoiceSupervisor } from "../../core/voice-supervisor";
36
37
  import { disableProvider, enableProvider } from "../../discovery";
@@ -1559,7 +1560,10 @@ export class InteractiveMode {
1559
1560
  case "fileMention": {
1560
1561
  // Render compact file mention display
1561
1562
  for (const file of message.files) {
1562
- const text = `${theme.fg("dim", `${theme.tree.hook} `)}${theme.fg("muted", "Read")} ${theme.fg("accent", file.path)} ${theme.fg("dim", `(${file.lineCount} lines)`)}`;
1563
+ const text = `${theme.fg("dim", `${theme.tree.hook} `)}${theme.fg("muted", "Read")} ${theme.fg(
1564
+ "accent",
1565
+ file.path,
1566
+ )} ${theme.fg("dim", `(${file.lineCount} lines)`)}`;
1563
1567
  this.chatContainer.addChild(new Text(text, 0, 0));
1564
1568
  }
1565
1569
  break;
@@ -2362,6 +2366,14 @@ export class InteractiveMode {
2362
2366
  break;
2363
2367
  }
2364
2368
 
2369
+ // Provider settings - update runtime preferences
2370
+ case "webSearchProvider":
2371
+ setPreferredWebSearchProvider(value as "auto" | "exa" | "perplexity" | "anthropic");
2372
+ break;
2373
+ case "imageProvider":
2374
+ setPreferredImageProvider(value as "auto" | "gemini" | "openrouter");
2375
+ break;
2376
+
2365
2377
  // All other settings are handled by the definitions (get/set on SettingsManager)
2366
2378
  // No additional side effects needed
2367
2379
  }
@@ -2466,13 +2478,13 @@ export class InteractiveMode {
2466
2478
  return;
2467
2479
  }
2468
2480
 
2469
- // Ask about summarization
2481
+ // Ask about summarization (or skip if disabled in settings)
2470
2482
  done(); // Close selector first
2471
2483
 
2472
- const wantsSummary = await this.showHookConfirm(
2473
- "Summarize branch?",
2474
- "Create a summary of the branch you're leaving?",
2475
- );
2484
+ const branchSummariesEnabled = this.settingsManager.getBranchSummaryEnabled();
2485
+ const wantsSummary = branchSummariesEnabled
2486
+ ? await this.showHookConfirm("Summarize branch?", "Create a summary of the branch you're leaving?")
2487
+ : false;
2476
2488
 
2477
2489
  // Set up escape handler and loader if summarizing
2478
2490
  let summaryLoader: Loader | undefined;
@@ -1367,9 +1367,9 @@ export class Theme {
1367
1367
  return (str: string) => this.fg("bashMode", str);
1368
1368
  }
1369
1369
 
1370
- // -------------------------------------------------------------------------
1370
+ // ============================================================================
1371
1371
  // Symbol Methods
1372
- // -------------------------------------------------------------------------
1372
+ // ============================================================================
1373
1373
 
1374
1374
  /**
1375
1375
  * Get a symbol by key.
@@ -1392,9 +1392,9 @@ export class Theme {
1392
1392
  return this.symbolPreset;
1393
1393
  }
1394
1394
 
1395
- // -------------------------------------------------------------------------
1395
+ // ============================================================================
1396
1396
  // Symbol Category Accessors
1397
- // -------------------------------------------------------------------------
1397
+ // ============================================================================
1398
1398
 
1399
1399
  get status() {
1400
1400
  return {
@@ -6,7 +6,7 @@
6
6
  * - `omp --mode json "prompt"` - JSON event stream
7
7
  */
8
8
 
9
- import type { AssistantMessage, ImageContent } from "@oh-my-pi/pi-ai";
9
+ import type { AssistantMessage, ImageContent } from "@mariozechner/pi-ai";
10
10
  import type { AgentSession } from "../core/agent-session";
11
11
 
12
12
  /**
@@ -4,8 +4,8 @@
4
4
  * Spawns the agent in RPC mode and provides a typed API for all operations.
5
5
  */
6
6
 
7
+ import type { ImageContent } from "@mariozechner/pi-ai";
7
8
  import type { AgentEvent, AgentMessage, ThinkingLevel } from "@oh-my-pi/pi-agent-core";
8
- import type { ImageContent } from "@oh-my-pi/pi-ai";
9
9
  import type { Subprocess } from "bun";
10
10
  import type { SessionStats } from "../../core/agent-session";
11
11
  import type { BashResult } from "../../core/bash-executor";
@@ -5,8 +5,8 @@
5
5
  * Responses and events are emitted as JSON lines on stdout.
6
6
  */
7
7
 
8
+ import type { ImageContent, Model } from "@mariozechner/pi-ai";
8
9
  import type { AgentMessage, ThinkingLevel } from "@oh-my-pi/pi-agent-core";
9
- import type { ImageContent, Model } from "@oh-my-pi/pi-ai";
10
10
  import type { SessionStats } from "../../core/agent-session";
11
11
  import type { BashResult } from "../../core/bash-executor";
12
12
  import type { CompactionResult } from "../../core/compaction/index";
@@ -15,6 +15,10 @@ Core behavior:
15
15
  - If a command fails due to sandboxing or needs elevated access, request approval and rerun.
16
16
  - Follow project validation/testing guidance; if checks are not run, suggest them in next steps.
17
17
  - Resolve blockers before yielding; do not guess.
18
+ - Use tools to ground answers when external or deterministic info is needed; avoid speculation when a tool can verify.
19
+ - Ask for missing or ambiguous tool parameters instead of guessing; confirm before actions.
20
+ - Minimize tool calls and context usage by narrowing queries and summarizing only what is needed.
21
+ - After each tool result, check relevance; iterate or clarify if results conflict or are insufficient.
18
22
  - Use concise, scannable responses; include file paths in backticks; use short bullets for multi-item lists; avoid dumping large files.
19
23
 
20
24
  Documentation:
@@ -1,10 +1,3 @@
1
- ---
2
- name: task
3
- description: General-purpose subagent with full capabilities for delegated multi-step tasks
4
- spawns: explore
5
- model: default
6
- ---
7
-
8
1
  You are a worker agent for delegated tasks. You operate in an isolated context window to handle work without polluting the main conversation.
9
2
 
10
3
  Do what has been asked; nothing more, nothing less. Work autonomously using all available tools.
@@ -1,4 +1,8 @@
1
- Generate or edit images using Google Gemini image models ("Nano Banana").
1
+ Generate or edit images using Gemini image models directly or via OpenRouter.
2
2
 
3
3
  Provide a text prompt and optional input images. Use response modalities to request image-only output,
4
4
  set aspect ratio or image size, and choose the model explicitly when needed.
5
+
6
+ Prompt tips:
7
+ - Describe subject, composition, style, and lighting in full sentences.
8
+ - For edits, reference the input image and specify the exact changes.
@@ -1,16 +1,16 @@
1
- # TaskOutput
2
-
3
1
  Retrieves complete output from background tasks spawned with the Task tool.
4
2
 
5
3
  ## When to Use
6
4
 
7
5
  Use TaskOutput when:
6
+
8
7
  - Task tool returns truncated preview with "Output truncated" message
9
8
  - You need full output to debug errors or analyze detailed results
10
9
  - Task tool's summary shows substantial line/character counts but preview is incomplete
11
10
  - You're analyzing multi-step task output requiring full context
12
11
 
13
12
  Do NOT use when:
13
+
14
14
  - Task preview already shows complete output (no truncation indicator)
15
15
  - Summary alone answers your question
16
16
 
@@ -21,3 +21,7 @@ Do NOT use when:
21
21
  - `"raw"` (default): Full output with ANSI codes preserved
22
22
  - `"json"`: Structured object with metadata
23
23
  - `"stripped"`: Plain text with ANSI codes removed for parsing
24
+ - `offset` (optional): Line number to start reading from (1-indexed)
25
+ - `limit` (optional): Maximum number of lines to read
26
+
27
+ Use offset/limit for line ranges to reduce context usage on large outputs.
@@ -0,0 +1,68 @@
1
+ Launch a new agent to handle complex, multi-step tasks autonomously.
2
+
3
+ The Task tool launches specialized agents (workers) that autonomously handle complex tasks. Each agent type has specific capabilities and tools available to it.
4
+
5
+ ## Available Agents
6
+
7
+ {{AGENTS_LIST}}
8
+
9
+ ## When NOT to Use
10
+
11
+ - Reading a specific file path → Use Read or Glob tool instead
12
+ - Searching for a specific class/function definition → Use Glob tool instead
13
+ - Searching code within 2-3 specific files → Use Read tool instead
14
+ - Tasks unrelated to the agent descriptions above
15
+
16
+ ## Usage Notes
17
+
18
+ - Always include a short description of the task in the task parameter
19
+ - **Plan-then-execute**: Put shared constraints in `context`, keep each task focused, specify output format and acceptance criteria
20
+ - **Minimize tool chatter**: Avoid repeating large context; use Output tool with output ids for full logs
21
+ - **Parallelize**: Launch multiple agents concurrently whenever possible
22
+ - **Results are intermediate data**: Agent findings provide context for YOU to perform actual work. Do not treat agent reports as "task complete" signals.
23
+ - **Stateless invocations**: Each agent runs autonomously and returns a single final message. Include all necessary context and specify exactly what information to return.
24
+ - **Trust outputs**: Agent results should generally be trusted
25
+ - **Clarify intent**: Tell the agent whether you expect code changes or just research (search, file reads, web fetches)
26
+ - **Proactive use**: If an agent description says to use it proactively, do so without waiting for explicit user request
27
+
28
+ ## Parameters
29
+
30
+ - `tasks`: Array of `{agent, task, description?, model?}` - tasks to run in parallel (max {{MAX_PARALLEL_TASKS}}, {{MAX_CONCURRENCY}} concurrent)
31
+ - `model`: (optional) Override the agent's default model with fuzzy matching (e.g., "sonnet", "codex", "5.2"). Supports comma-separated fallbacks: "gpt, opus" tries gpt first, then opus. Use "default" for omp's default model
32
+ - `context`: (optional) Shared context string prepended to all task prompts - use this to avoid repeating instructions
33
+
34
+ ## Examples
35
+
36
+ <example>
37
+ user: "Please write a function that checks if a number is prime"
38
+ assistant: Sure let me write a function that checks if a number is prime
39
+ assistant: I'm going to use the Write tool to write the following code:
40
+ <code>
41
+ function isPrime(n) {
42
+ if (n <= 1) return false
43
+ for (let i = 2; i * i <= n; i++) {
44
+ if (n % i === 0) return false
45
+ }
46
+ return true
47
+ }
48
+ </code>
49
+ <commentary>
50
+ Since a significant piece of code was written and the task was completed, now use the code-reviewer agent to review the code
51
+ </commentary>
52
+ assistant: Now let me use the code-reviewer agent to review the code
53
+ assistant: Uses the Task tool: { tasks: [{ agent: "code-reviewer", task: "Review the isPrime function" }] }
54
+ </example>
55
+
56
+ <example>
57
+ user: "Find all TODO comments in the codebase"
58
+ assistant: I'll use multiple explore agents to search different directories in parallel
59
+ assistant: Uses the Task tool:
60
+ {
61
+ "context": "Find all TODO comments. Return file:line:content format.",
62
+ "tasks": [
63
+ { "agent": "explore", "task": "Search in src/" },
64
+ { "agent": "explore", "task": "Search in lib/" },
65
+ { "agent": "explore", "task": "Search in tests/" }
66
+ ]
67
+ }
68
+ </example>
@@ -6,3 +6,4 @@ Use this tool when you need to:
6
6
  - Retrieve information from Stack Overflow, Wikipedia, Reddit, NPM, arXiv, or technical blogs
7
7
  - Access RSS/Atom feeds or JSON endpoints
8
8
  - Read PDF or DOCX files hosted at a URL
9
+ - Use `raw: true` for untouched HTML or debugging
@@ -3,6 +3,8 @@ Allows OMP to search the web and use the results to inform responses
3
3
  - Returns search result information formatted as search result blocks, including links as markdown hyperlinks
4
4
  - Use this tool for accessing information beyond Claude's knowledge cutoff
5
5
  - Searches are performed automatically within a single API call
6
+ - Prefer primary sources (papers, official docs) and corroborate key claims with multiple sources
7
+ - Include links for cited sources in the final response
6
8
 
7
9
  Common: system_prompt (guides response style)
8
10
  Anthropic-specific: max_tokens
@@ -1,6 +1,9 @@
1
+ import { convertToPngWithImageMagick } from "./image-magick.js";
2
+
1
3
  /**
2
4
  * Convert image to PNG format for terminal display.
3
5
  * Kitty graphics protocol requires PNG format (f=100).
6
+ * Uses sharp if available, falls back to ImageMagick (magick/convert).
4
7
  */
5
8
  export async function convertToPng(
6
9
  base64Data: string,
@@ -11,6 +14,7 @@ export async function convertToPng(
11
14
  return { data: base64Data, mimeType };
12
15
  }
13
16
 
17
+ // Try sharp first
14
18
  try {
15
19
  const sharp = (await import("sharp")).default;
16
20
  const buffer = Buffer.from(base64Data, "base64");
@@ -20,7 +24,9 @@ export async function convertToPng(
20
24
  mimeType: "image/png",
21
25
  };
22
26
  } catch {
23
- // Sharp not available or conversion failed
24
- return null;
27
+ // Sharp not available, try ImageMagick fallback
25
28
  }
29
+
30
+ // Fall back to ImageMagick
31
+ return convertToPngWithImageMagick(base64Data, mimeType);
26
32
  }
@@ -0,0 +1,247 @@
1
+ let imagemagickCommand: string | null | undefined;
2
+
3
+ /**
4
+ * Detect available ImageMagick command.
5
+ * Returns "magick" (IM7) or "convert" (IM6) or null if unavailable.
6
+ */
7
+ async function detectImageMagick(): Promise<string | null> {
8
+ if (imagemagickCommand !== undefined) {
9
+ return imagemagickCommand;
10
+ }
11
+
12
+ for (const cmd of ["magick", "convert"]) {
13
+ try {
14
+ const proc = Bun.spawn([cmd, "-version"], { stdout: "ignore", stderr: "ignore" });
15
+ const code = await proc.exited;
16
+ if (code === 0) {
17
+ imagemagickCommand = cmd;
18
+ return cmd;
19
+ }
20
+ } catch {}
21
+ }
22
+
23
+ imagemagickCommand = null;
24
+ return null;
25
+ }
26
+
27
+ /**
28
+ * Run ImageMagick command with buffer input/output.
29
+ */
30
+ async function runImageMagick(cmd: string, args: string[], input: Buffer): Promise<Buffer> {
31
+ const proc = Bun.spawn([cmd, ...args], {
32
+ stdin: new Blob([input]),
33
+ stdout: "pipe",
34
+ stderr: "pipe",
35
+ });
36
+
37
+ const [stdout, stderr, exitCode] = await Promise.all([
38
+ new Response(proc.stdout).arrayBuffer(),
39
+ new Response(proc.stderr).text(),
40
+ proc.exited,
41
+ ]);
42
+
43
+ if (exitCode !== 0) {
44
+ throw new Error(`ImageMagick exited with code ${exitCode}: ${stderr}`);
45
+ }
46
+
47
+ return Buffer.from(stdout);
48
+ }
49
+
50
+ /**
51
+ * Convert image to PNG using ImageMagick.
52
+ * Returns null if ImageMagick is unavailable or conversion fails.
53
+ */
54
+ export async function convertToPngWithImageMagick(
55
+ base64Data: string,
56
+ _mimeType: string,
57
+ ): Promise<{ data: string; mimeType: string } | null> {
58
+ const cmd = await detectImageMagick();
59
+ if (!cmd) {
60
+ return null;
61
+ }
62
+
63
+ try {
64
+ const input = Buffer.from(base64Data, "base64");
65
+ // "-" reads from stdin, "png:-" writes PNG to stdout
66
+ const output = await runImageMagick(cmd, ["-", "png:-"], input);
67
+ return {
68
+ data: output.toString("base64"),
69
+ mimeType: "image/png",
70
+ };
71
+ } catch {
72
+ return null;
73
+ }
74
+ }
75
+
76
+ export interface ImageMagickResizeResult {
77
+ data: string; // base64
78
+ mimeType: string;
79
+ width: number;
80
+ height: number;
81
+ }
82
+
83
+ /**
84
+ * Get image dimensions using ImageMagick identify.
85
+ */
86
+ async function getImageDimensions(cmd: string, buffer: Buffer): Promise<{ width: number; height: number } | null> {
87
+ try {
88
+ // Use identify to get dimensions
89
+ const identifyCmd = cmd === "magick" ? "magick" : "identify";
90
+ const args = cmd === "magick" ? ["identify", "-format", "%w %h", "-"] : ["-format", "%w %h", "-"];
91
+
92
+ const output = await runImageMagick(identifyCmd, args, buffer);
93
+ const [w, h] = output.toString().trim().split(" ").map(Number);
94
+ if (Number.isFinite(w) && Number.isFinite(h)) {
95
+ return { width: w, height: h };
96
+ }
97
+ } catch {
98
+ // Fall through
99
+ }
100
+ return null;
101
+ }
102
+
103
+ /**
104
+ * Resize image using ImageMagick.
105
+ * Returns null if ImageMagick is unavailable or operation fails.
106
+ */
107
+ export async function resizeWithImageMagick(
108
+ base64Data: string,
109
+ _mimeType: string,
110
+ maxWidth: number,
111
+ maxHeight: number,
112
+ maxBytes: number,
113
+ jpegQuality: number,
114
+ ): Promise<ImageMagickResizeResult | null> {
115
+ const cmd = await detectImageMagick();
116
+ if (!cmd) {
117
+ return null;
118
+ }
119
+
120
+ try {
121
+ const input = Buffer.from(base64Data, "base64");
122
+
123
+ // Get original dimensions
124
+ const dims = await getImageDimensions(cmd, input);
125
+ if (!dims) {
126
+ return null;
127
+ }
128
+
129
+ // Check if already within limits
130
+ if (dims.width <= maxWidth && dims.height <= maxHeight && input.length <= maxBytes) {
131
+ return null; // Signal caller to use original
132
+ }
133
+
134
+ // Calculate target dimensions maintaining aspect ratio
135
+ let targetWidth = dims.width;
136
+ let targetHeight = dims.height;
137
+
138
+ if (targetWidth > maxWidth) {
139
+ targetHeight = Math.round((targetHeight * maxWidth) / targetWidth);
140
+ targetWidth = maxWidth;
141
+ }
142
+ if (targetHeight > maxHeight) {
143
+ targetWidth = Math.round((targetWidth * maxHeight) / targetHeight);
144
+ targetHeight = maxHeight;
145
+ }
146
+
147
+ // Try PNG first, then JPEG with decreasing quality
148
+ const attempts: Array<{ args: string[]; mimeType: string }> = [
149
+ { args: ["-", "-resize", `${targetWidth}x${targetHeight}>`, "png:-"], mimeType: "image/png" },
150
+ {
151
+ args: ["-", "-resize", `${targetWidth}x${targetHeight}>`, "-quality", String(jpegQuality), "jpeg:-"],
152
+ mimeType: "image/jpeg",
153
+ },
154
+ ];
155
+
156
+ // Add lower quality JPEG attempts
157
+ for (const q of [70, 55, 40]) {
158
+ attempts.push({
159
+ args: ["-", "-resize", `${targetWidth}x${targetHeight}>`, "-quality", String(q), "jpeg:-"],
160
+ mimeType: "image/jpeg",
161
+ });
162
+ }
163
+
164
+ let best: { buffer: Buffer; mimeType: string } | null = null;
165
+
166
+ for (const attempt of attempts) {
167
+ try {
168
+ const output = await runImageMagick(cmd, attempt.args, input);
169
+ if (output.length <= maxBytes) {
170
+ return {
171
+ data: output.toString("base64"),
172
+ mimeType: attempt.mimeType,
173
+ width: targetWidth,
174
+ height: targetHeight,
175
+ };
176
+ }
177
+ if (!best || output.length < best.buffer.length) {
178
+ best = { buffer: output, mimeType: attempt.mimeType };
179
+ }
180
+ } catch {}
181
+ }
182
+
183
+ // Try progressively smaller dimensions
184
+ const scaleSteps = [0.75, 0.5, 0.35, 0.25];
185
+ for (const scale of scaleSteps) {
186
+ const scaledWidth = Math.round(targetWidth * scale);
187
+ const scaledHeight = Math.round(targetHeight * scale);
188
+
189
+ if (scaledWidth < 100 || scaledHeight < 100) break;
190
+
191
+ for (const q of [85, 70, 55, 40]) {
192
+ try {
193
+ const output = await runImageMagick(
194
+ cmd,
195
+ ["-", "-resize", `${scaledWidth}x${scaledHeight}>`, "-quality", String(q), "jpeg:-"],
196
+ input,
197
+ );
198
+ if (output.length <= maxBytes) {
199
+ return {
200
+ data: output.toString("base64"),
201
+ mimeType: "image/jpeg",
202
+ width: scaledWidth,
203
+ height: scaledHeight,
204
+ };
205
+ }
206
+ if (!best || output.length < best.buffer.length) {
207
+ best = { buffer: output, mimeType: "image/jpeg" };
208
+ }
209
+ } catch {}
210
+ }
211
+ }
212
+
213
+ // Return best attempt even if over limit
214
+ if (best) {
215
+ return {
216
+ data: best.buffer.toString("base64"),
217
+ mimeType: best.mimeType,
218
+ width: targetWidth,
219
+ height: targetHeight,
220
+ };
221
+ }
222
+
223
+ return null;
224
+ } catch {
225
+ return null;
226
+ }
227
+ }
228
+
229
+ /**
230
+ * Get image dimensions using ImageMagick.
231
+ * Returns null if ImageMagick is unavailable.
232
+ */
233
+ export async function getImageDimensionsWithImageMagick(
234
+ base64Data: string,
235
+ ): Promise<{ width: number; height: number } | null> {
236
+ const cmd = await detectImageMagick();
237
+ if (!cmd) {
238
+ return null;
239
+ }
240
+
241
+ try {
242
+ const buffer = Buffer.from(base64Data, "base64");
243
+ return await getImageDimensions(cmd, buffer);
244
+ } catch {
245
+ return null;
246
+ }
247
+ }
@@ -1,4 +1,5 @@
1
- import type { ImageContent } from "@oh-my-pi/pi-ai";
1
+ import type { ImageContent } from "@mariozechner/pi-ai";
2
+ import { getImageDimensionsWithImageMagick, resizeWithImageMagick } from "./image-magick.js";
2
3
 
3
4
  export interface ImageResizeOptions {
4
5
  maxWidth?: number; // Default: 2000
@@ -27,6 +28,52 @@ const DEFAULT_OPTIONS: Required<ImageResizeOptions> = {
27
28
  jpegQuality: 80,
28
29
  };
29
30
 
31
+ /**
32
+ * Fallback resize using ImageMagick when sharp is unavailable.
33
+ */
34
+ async function resizeImageWithImageMagick(
35
+ img: ImageContent,
36
+ opts: Required<ImageResizeOptions>,
37
+ ): Promise<ResizedImage> {
38
+ // Try to get dimensions first
39
+ const dims = await getImageDimensionsWithImageMagick(img.data);
40
+ const originalWidth = dims?.width ?? 0;
41
+ const originalHeight = dims?.height ?? 0;
42
+
43
+ // Try to resize
44
+ const result = await resizeWithImageMagick(
45
+ img.data,
46
+ img.mimeType,
47
+ opts.maxWidth,
48
+ opts.maxHeight,
49
+ opts.maxBytes,
50
+ opts.jpegQuality,
51
+ );
52
+
53
+ if (result) {
54
+ return {
55
+ data: result.data,
56
+ mimeType: result.mimeType,
57
+ originalWidth,
58
+ originalHeight,
59
+ width: result.width,
60
+ height: result.height,
61
+ wasResized: true,
62
+ };
63
+ }
64
+
65
+ // ImageMagick not available or resize not needed - return original
66
+ return {
67
+ data: img.data,
68
+ mimeType: img.mimeType,
69
+ originalWidth,
70
+ originalHeight,
71
+ width: originalWidth,
72
+ height: originalHeight,
73
+ wasResized: false,
74
+ };
75
+ }
76
+
30
77
  /** Helper to pick the smaller of two buffers */
31
78
  function pickSmaller(
32
79
  a: { buffer: Buffer; mimeType: string },
@@ -56,17 +103,8 @@ export async function resizeImage(img: ImageContent, options?: ImageResizeOption
56
103
  try {
57
104
  sharp = (await import("sharp")).default;
58
105
  } catch {
59
- // Sharp not available - return original image
60
- // We can't get dimensions without sharp, so return 0s
61
- return {
62
- data: img.data,
63
- mimeType: img.mimeType,
64
- originalWidth: 0,
65
- originalHeight: 0,
66
- width: 0,
67
- height: 0,
68
- wasResized: false,
69
- };
106
+ // Sharp not available - try ImageMagick fallback
107
+ return resizeImageWithImageMagick(img, opts);
70
108
  }
71
109
 
72
110
  const sharpImg = sharp(buffer);
@@ -211,5 +249,7 @@ export function formatDimensionNote(result: ResizedImage): string | undefined {
211
249
  }
212
250
 
213
251
  const scale = result.originalWidth / result.width;
214
- return `[Image: original ${result.originalWidth}x${result.originalHeight}, displayed at ${result.width}x${result.height}. Multiply coordinates by ${scale.toFixed(2)} to map to original image.]`;
252
+ return `[Image: original ${result.originalWidth}x${result.originalHeight}, displayed at ${result.width}x${
253
+ result.height
254
+ }. Multiply coordinates by ${scale.toFixed(2)} to map to original image.]`;
215
255
  }