@oh-my-pi/pi-coding-agent 14.3.0 → 14.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. package/CHANGELOG.md +98 -1
  2. package/package.json +7 -7
  3. package/src/autoresearch/prompt.md +1 -1
  4. package/src/commit/agentic/prompts/analyze-file.md +1 -1
  5. package/src/config/model-registry.ts +67 -15
  6. package/src/config/prompt-templates.ts +5 -5
  7. package/src/config/settings-schema.ts +4 -4
  8. package/src/cursor.ts +3 -8
  9. package/src/discovery/helpers.ts +3 -3
  10. package/src/edit/diff.ts +50 -47
  11. package/src/edit/index.ts +86 -57
  12. package/src/edit/line-hash.ts +743 -24
  13. package/src/edit/modes/apply-patch.ts +0 -9
  14. package/src/edit/modes/atom.ts +893 -0
  15. package/src/edit/modes/chunk.ts +14 -24
  16. package/src/edit/modes/hashline.ts +193 -146
  17. package/src/edit/modes/patch.ts +5 -9
  18. package/src/edit/modes/replace.ts +6 -11
  19. package/src/edit/renderer.ts +14 -10
  20. package/src/edit/streaming.ts +50 -16
  21. package/src/exec/bash-executor.ts +2 -4
  22. package/src/export/html/template.generated.ts +1 -1
  23. package/src/export/html/template.js +4 -12
  24. package/src/extensibility/custom-tools/types.ts +2 -0
  25. package/src/extensibility/custom-tools/wrapper.ts +2 -1
  26. package/src/internal-urls/docs-index.generated.ts +2 -2
  27. package/src/lsp/defaults.json +142 -652
  28. package/src/lsp/index.ts +1 -1
  29. package/src/mcp/render.ts +1 -8
  30. package/src/modes/components/assistant-message.ts +4 -0
  31. package/src/modes/components/diff.ts +23 -14
  32. package/src/modes/components/footer.ts +21 -16
  33. package/src/modes/components/session-selector.ts +3 -3
  34. package/src/modes/components/settings-defs.ts +6 -1
  35. package/src/modes/components/todo-reminder.ts +1 -8
  36. package/src/modes/components/tool-execution.ts +1 -4
  37. package/src/modes/controllers/selector-controller.ts +1 -1
  38. package/src/modes/print-mode.ts +8 -0
  39. package/src/prompts/agents/librarian.md +1 -1
  40. package/src/prompts/agents/reviewer.md +4 -4
  41. package/src/prompts/ci-green-request.md +1 -1
  42. package/src/prompts/review-request.md +1 -1
  43. package/src/prompts/system/subagent-system-prompt.md +3 -3
  44. package/src/prompts/system/subagent-yield-reminder.md +11 -0
  45. package/src/prompts/system/system-prompt.md +3 -0
  46. package/src/prompts/tools/ask.md +3 -2
  47. package/src/prompts/tools/ast-edit.md +16 -20
  48. package/src/prompts/tools/ast-grep.md +19 -24
  49. package/src/prompts/tools/atom.md +87 -0
  50. package/src/prompts/tools/chunk-edit.md +37 -161
  51. package/src/prompts/tools/debug.md +4 -5
  52. package/src/prompts/tools/exit-plan-mode.md +4 -5
  53. package/src/prompts/tools/find.md +4 -8
  54. package/src/prompts/tools/github.md +18 -0
  55. package/src/prompts/tools/grep.md +4 -5
  56. package/src/prompts/tools/hashline.md +22 -89
  57. package/src/prompts/tools/{gemini-image.md → image-gen.md} +1 -1
  58. package/src/prompts/tools/inspect-image.md +6 -6
  59. package/src/prompts/tools/lsp.md +1 -1
  60. package/src/prompts/tools/patch.md +12 -19
  61. package/src/prompts/tools/python.md +3 -2
  62. package/src/prompts/tools/read-chunk.md +2 -3
  63. package/src/prompts/tools/read.md +2 -2
  64. package/src/prompts/tools/ssh.md +8 -17
  65. package/src/prompts/tools/todo-write.md +54 -41
  66. package/src/sdk.ts +14 -9
  67. package/src/session/agent-session.ts +25 -2
  68. package/src/session/session-manager.ts +4 -1
  69. package/src/task/executor.ts +43 -48
  70. package/src/task/render.ts +11 -13
  71. package/src/tools/ask.ts +7 -7
  72. package/src/tools/ast-edit.ts +45 -41
  73. package/src/tools/ast-grep.ts +77 -85
  74. package/src/tools/bash.ts +8 -9
  75. package/src/tools/browser.ts +32 -30
  76. package/src/tools/calculator.ts +4 -4
  77. package/src/tools/cancel-job.ts +1 -1
  78. package/src/tools/checkpoint.ts +2 -2
  79. package/src/tools/debug.ts +41 -37
  80. package/src/tools/exit-plan-mode.ts +1 -1
  81. package/src/tools/find.ts +4 -4
  82. package/src/tools/gh-renderer.ts +12 -4
  83. package/src/tools/gh.ts +509 -697
  84. package/src/tools/grep.ts +116 -131
  85. package/src/tools/{gemini-image.ts → image-gen.ts} +459 -60
  86. package/src/tools/index.ts +14 -32
  87. package/src/tools/inspect-image.ts +3 -3
  88. package/src/tools/json-tree.ts +114 -114
  89. package/src/tools/match-line-format.ts +8 -7
  90. package/src/tools/notebook.ts +8 -7
  91. package/src/tools/poll-tool.ts +2 -1
  92. package/src/tools/python.ts +9 -23
  93. package/src/tools/read.ts +32 -25
  94. package/src/tools/render-mermaid.ts +1 -1
  95. package/src/tools/render-utils.ts +18 -0
  96. package/src/tools/renderers.ts +2 -2
  97. package/src/tools/report-tool-issue.ts +3 -2
  98. package/src/tools/resolve.ts +1 -1
  99. package/src/tools/review.ts +12 -10
  100. package/src/tools/search-tool-bm25.ts +2 -4
  101. package/src/tools/ssh.ts +4 -4
  102. package/src/tools/todo-write.ts +172 -147
  103. package/src/tools/vim.ts +14 -15
  104. package/src/tools/write.ts +4 -4
  105. package/src/tools/{submit-result.ts → yield.ts} +11 -13
  106. package/src/utils/edit-mode.ts +2 -1
  107. package/src/utils/file-display-mode.ts +10 -5
  108. package/src/utils/git.ts +9 -5
  109. package/src/utils/shell-snapshot.ts +2 -3
  110. package/src/vim/render.ts +4 -4
  111. package/src/prompts/system/subagent-submit-reminder.md +0 -11
  112. package/src/prompts/tools/gh-issue-view.md +0 -11
  113. package/src/prompts/tools/gh-pr-checkout.md +0 -12
  114. package/src/prompts/tools/gh-pr-diff.md +0 -12
  115. package/src/prompts/tools/gh-pr-push.md +0 -12
  116. package/src/prompts/tools/gh-pr-view.md +0 -11
  117. package/src/prompts/tools/gh-repo-view.md +0 -11
  118. package/src/prompts/tools/gh-run-watch.md +0 -12
  119. package/src/prompts/tools/gh-search-issues.md +0 -11
  120. package/src/prompts/tools/gh-search-prs.md +0 -11
@@ -1,6 +1,13 @@
1
1
  import * as os from "node:os";
2
2
  import * as path from "node:path";
3
- import { getAntigravityHeaders, getEnvApiKey, StringEnum } from "@oh-my-pi/pi-ai";
3
+ import { getAntigravityHeaders, getEnvApiKey, type Model, StringEnum } from "@oh-my-pi/pi-ai";
4
+ import {
5
+ CODEX_BASE_URL,
6
+ getCodexAccountId,
7
+ OPENAI_HEADER_VALUES,
8
+ OPENAI_HEADERS,
9
+ URL_PATHS,
10
+ } from "@oh-my-pi/pi-ai/providers/openai-codex/constants";
4
11
  import {
5
12
  $env,
6
13
  isEnoent,
@@ -12,9 +19,10 @@ import {
12
19
  untilAborted,
13
20
  } from "@oh-my-pi/pi-utils";
14
21
  import { type Static, Type } from "@sinclair/typebox";
15
- import type { ModelRegistry } from "../config/model-registry";
22
+ import packageJson from "../../package.json" with { type: "json" };
23
+ import { isAuthenticated, type ModelRegistry } from "../config/model-registry";
16
24
  import type { CustomTool } from "../extensibility/custom-tools/types";
17
- import geminiImageDescription from "../prompts/tools/gemini-image.md" with { type: "text" };
25
+ import imageGenDescription from "../prompts/tools/image-gen.md" with { type: "text" };
18
26
  import { resolveReadPath } from "./path-utils";
19
27
 
20
28
  const DEFAULT_MODEL = "gemini-3-pro-image-preview";
@@ -22,31 +30,37 @@ const DEFAULT_OPENROUTER_MODEL = "google/gemini-3-pro-image-preview";
22
30
  const DEFAULT_ANTIGRAVITY_MODEL = "gemini-3-pro-image";
23
31
  const IMAGE_TIMEOUT = 3 * 60 * 1000; // 3 minutes
24
32
  const MAX_IMAGE_SIZE = 35 * 1024 * 1024;
33
+ const DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1";
34
+ const OPENAI_IMAGE_OUTPUT_FORMAT = "webp";
35
+ const OPENAI_IMAGE_MIME_TYPE = "image/webp";
25
36
 
26
37
  const ANTIGRAVITY_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googleapis.com";
27
38
  const IMAGE_SYSTEM_INSTRUCTION =
28
39
  "You are an AI image generator. Generate images based on user descriptions. Focus on creating high-quality, visually appealing images that match the user's request.";
29
40
 
30
- type ImageProvider = "antigravity" | "gemini" | "openrouter";
41
+ type ImageProvider = "antigravity" | "gemini" | "openai" | "openai-codex" | "openrouter";
31
42
  interface ImageApiKey {
32
43
  provider: ImageProvider;
33
44
  apiKey: string;
34
45
  projectId?: string;
46
+ model?: Model;
35
47
  }
36
48
 
37
49
  const responseModalitySchema = StringEnum(["IMAGE", "TEXT"]);
38
50
  const aspectRatioSchema = StringEnum(["1:1", "3:4", "4:3", "9:16", "16:9"], {
39
- description: "Aspect ratio (1:1, 3:4, 4:3, 9:16, 16:9).",
51
+ description: "aspect ratio",
52
+ examples: ["1:1", "3:4", "16:9"],
40
53
  });
41
54
  const imageSizeSchema = StringEnum(["1024x1024", "1536x1024", "1024x1536"], {
42
- description: "Image size, mainly for gemini-3-pro-image-preview.",
55
+ description: "image size",
56
+ examples: ["1024x1024", "1536x1024"],
43
57
  });
44
58
 
45
59
  const inputImageSchema = Type.Object(
46
60
  {
47
- path: Type.Optional(Type.String({ description: "Path to an input image file." })),
48
- data: Type.Optional(Type.String({ description: "Base64 image data or a data: URL." })),
49
- mime_type: Type.Optional(Type.String({ description: "Required for raw base64 data." })),
61
+ path: Type.Optional(Type.String({ description: "input image path" })),
62
+ data: Type.Optional(Type.String({ description: "base64 image data" })),
63
+ mime_type: Type.Optional(Type.String({ description: "mime type" })),
50
64
  },
51
65
  { additionalProperties: false },
52
66
  );
@@ -54,63 +68,64 @@ const inputImageSchema = Type.Object(
54
68
  const baseImageSchema = Type.Object(
55
69
  {
56
70
  subject: Type.String({
57
- description:
58
- "Main subject with key descriptors (e.g., 'A stoic robot barista with glowing blue optics', 'A weathered lighthouse on a rocky cliff').",
71
+ description: "main subject",
72
+ examples: ["a stoic robot barista", "a weathered lighthouse"],
59
73
  }),
60
74
  action: Type.Optional(
61
75
  Type.String({
62
- description: "What the subject is doing (e.g., 'pouring latte art', 'standing against crashing waves').",
76
+ description: "what subject is doing",
77
+ examples: ["pouring latte art", "standing against waves"],
63
78
  }),
64
79
  ),
65
80
  scene: Type.Optional(
66
81
  Type.String({
67
- description:
68
- "Location or environment (e.g., 'in a futuristic café on Mars', 'during a violent thunderstorm at dusk').",
82
+ description: "location or environment",
83
+ examples: ["futuristic café on mars", "thunderstorm at dusk"],
69
84
  }),
70
85
  ),
71
86
  composition: Type.Optional(
72
87
  Type.String({
73
- description:
74
- "Camera angle, framing, depth of field (e.g., 'low-angle close-up, shallow depth of field', 'wide establishing shot').",
88
+ description: "camera angle and framing",
89
+ examples: ["low-angle close-up", "wide establishing shot"],
75
90
  }),
76
91
  ),
77
92
  lighting: Type.Optional(
78
93
  Type.String({
79
- description:
80
- "Lighting setup and mood (e.g., 'warm rim lighting', 'golden hour backlight', 'hard noon shadows').",
94
+ description: "lighting setup",
95
+ examples: ["warm rim lighting", "golden hour backlight"],
81
96
  }),
82
97
  ),
83
98
  style: Type.Optional(
84
99
  Type.String({
85
- description:
86
- "Artistic style, mood, color grading, camera (e.g., 'film noir mood, cinematic color grading', 'Studio Ghibli watercolor', 'photorealistic').",
100
+ description: "artistic style",
101
+ examples: ["film noir", "studio ghibli watercolor", "photorealistic"],
87
102
  }),
88
103
  ),
89
104
  text: Type.Optional(
90
105
  Type.String({
91
- description:
92
- "Text to render in image with specs: exact wording in quotes, font style, color, placement (e.g., 'Headline \"URBAN EXPLORER\" in bold white sans-serif at top center').",
106
+ description: "text to render",
107
+ examples: ["headline 'urban explorer' top center"],
93
108
  }),
94
109
  ),
95
110
  changes: Type.Optional(
96
111
  Type.Array(Type.String(), {
97
- description:
98
- "For edits: specific changes to make, as well as, what to keep unchanged (e.g., ['Change the tie to green', 'Remove the car in background']). Use with input_images.",
112
+ description: "edits to make",
113
+ examples: [["change tie to green", "remove car"]],
99
114
  }),
100
115
  ),
101
116
  aspect_ratio: Type.Optional(aspectRatioSchema),
102
117
  image_size: Type.Optional(imageSizeSchema),
103
118
  input: Type.Optional(
104
119
  Type.Array(inputImageSchema, {
105
- description: "Optional input images for edits or variations.",
120
+ description: "input images",
106
121
  }),
107
122
  ),
108
123
  },
109
124
  { additionalProperties: false },
110
125
  );
111
126
 
112
- export const geminiImageSchema = baseImageSchema;
113
- export type GeminiImageParams = Static<typeof geminiImageSchema>;
127
+ export const imageGenSchema = baseImageSchema;
128
+ export type ImageGenParams = Static<typeof imageGenSchema>;
114
129
  export type GeminiResponseModality = Static<typeof responseModalitySchema>;
115
130
 
116
131
  /**
@@ -118,7 +133,7 @@ export type GeminiResponseModality = Static<typeof responseModalitySchema>;
118
133
  * For generation: builds "subject, action, scene. composition. lighting. camera. style."
119
134
  * For edits: appends change instructions and preserve directives.
120
135
  */
121
- function assemblePrompt(params: GeminiImageParams): string {
136
+ function assemblePrompt(params: ImageGenParams): string {
122
137
  const parts: string[] = [];
123
138
 
124
139
  // Core subject line: subject + action + scene
@@ -184,6 +199,90 @@ interface GeminiGenerateContentResponse {
184
199
  usageMetadata?: GeminiUsageMetadata;
185
200
  }
186
201
 
202
+ interface OpenAIResponsesUsage {
203
+ input_tokens?: number;
204
+ output_tokens?: number;
205
+ total_tokens?: number;
206
+ }
207
+
208
+ type ImageUsageMetadata = GeminiUsageMetadata | OpenAIResponsesUsage;
209
+
210
+ type OpenAIImageAction = "edit" | "generate";
211
+
212
+ interface OpenAIInputTextContent {
213
+ type: "input_text";
214
+ text: string;
215
+ }
216
+
217
+ interface OpenAIInputImageContent {
218
+ type: "input_image";
219
+ detail: "auto";
220
+ image_url: string;
221
+ }
222
+
223
+ type OpenAIInputContent = OpenAIInputTextContent | OpenAIInputImageContent;
224
+
225
+ interface OpenAIImageGenerationTool {
226
+ type: "image_generation";
227
+ action: OpenAIImageAction;
228
+ output_format: typeof OPENAI_IMAGE_OUTPUT_FORMAT;
229
+ size?: string;
230
+ }
231
+
232
+ interface OpenAIHostedImageRequest {
233
+ model: string;
234
+ instructions?: string;
235
+ input: Array<{ role: "user"; content: OpenAIInputContent[] }>;
236
+ tools: OpenAIImageGenerationTool[];
237
+ tool_choice: { type: "image_generation" };
238
+ store: false;
239
+ stream?: boolean;
240
+ }
241
+
242
+ interface OpenAIImageGenerationCall {
243
+ id?: string;
244
+ type: "image_generation_call";
245
+ result?: string;
246
+ revised_prompt?: string;
247
+ status?: string;
248
+ }
249
+
250
+ interface OpenAIOutputText {
251
+ type: "output_text" | "refusal";
252
+ text?: string;
253
+ refusal?: string;
254
+ }
255
+
256
+ interface OpenAIOutputMessage {
257
+ id?: string;
258
+ type: "message";
259
+ content?: OpenAIOutputText[];
260
+ }
261
+
262
+ type OpenAIResponseOutput = OpenAIImageGenerationCall | OpenAIOutputMessage;
263
+
264
+ interface OpenAIHostedImageResponse {
265
+ output?: OpenAIResponseOutput[];
266
+ usage?: OpenAIResponsesUsage;
267
+ error?: { code?: string; message?: string };
268
+ }
269
+
270
+ interface OpenAISseEvent {
271
+ type?: string;
272
+ item?: OpenAIResponseOutput;
273
+ response?: OpenAIHostedImageResponse;
274
+ code?: string;
275
+ message?: string;
276
+ error?: { code?: string; message?: string };
277
+ }
278
+
279
+ interface OpenAIHostedImageResult {
280
+ images: InlineImageData[];
281
+ responseText?: string;
282
+ revisedPrompt?: string;
283
+ usage?: OpenAIResponsesUsage;
284
+ }
285
+
187
286
  interface OpenRouterImageUrl {
188
287
  url: string;
189
288
  }
@@ -240,7 +339,7 @@ interface AntigravityResponseChunk {
240
339
  };
241
340
  }
242
341
 
243
- interface GeminiImageToolDetails {
342
+ interface ImageGenToolDetails {
244
343
  provider: ImageProvider;
245
344
  model: string;
246
345
  imageCount: number;
@@ -248,7 +347,8 @@ interface GeminiImageToolDetails {
248
347
  images: InlineImageData[];
249
348
  responseText?: string;
250
349
  promptFeedback?: GeminiPromptFeedback;
251
- usage?: GeminiUsageMetadata;
350
+ revisedPrompt?: string;
351
+ usage?: ImageUsageMetadata;
252
352
  }
253
353
 
254
354
  interface ImageInput {
@@ -379,26 +479,51 @@ async function findAntigravityCredentials(modelRegistry: ModelRegistry): Promise
379
479
  };
380
480
  }
381
481
 
382
- async function findImageApiKey(modelRegistry?: ModelRegistry): Promise<ImageApiKey | null> {
383
- // If a specific provider is preferred, try it first
384
- if (preferredImageProvider === "antigravity" && modelRegistry) {
482
+ async function findOpenAIHostedImageCredentials(
483
+ modelRegistry: ModelRegistry | undefined,
484
+ activeModel: Model | undefined,
485
+ sessionId?: string,
486
+ ): Promise<ImageApiKey | null> {
487
+ if (!modelRegistry || !isOpenAIHostedImageModel(activeModel)) return null;
488
+ const apiKey = await modelRegistry.getApiKey(activeModel, sessionId);
489
+ if (!isAuthenticated(apiKey)) return null;
490
+ return {
491
+ provider: getOpenAIHostedImageProvider(activeModel),
492
+ apiKey,
493
+ model: activeModel,
494
+ };
495
+ }
496
+
497
+ async function findImageApiKey(
498
+ modelRegistry?: ModelRegistry,
499
+ activeModel?: Model,
500
+ sessionId?: string,
501
+ ): Promise<ImageApiKey | null> {
502
+ // If a specific provider is preferred, try it first.
503
+ if (preferredImageProvider === "openai") {
504
+ const openAI = await findOpenAIHostedImageCredentials(modelRegistry, activeModel, sessionId);
505
+ if (openAI) return openAI;
506
+ // Fall through to auto-detect if preferred provider key not found.
507
+ } else if (preferredImageProvider === "antigravity" && modelRegistry) {
385
508
  const antigravity = await findAntigravityCredentials(modelRegistry);
386
509
  if (antigravity) return antigravity;
387
- // Fall through to auto-detect if preferred provider key not found
388
- }
389
- if (preferredImageProvider === "gemini") {
510
+ // Fall through to auto-detect if preferred provider key not found.
511
+ } else if (preferredImageProvider === "gemini") {
390
512
  const geminiKey = getEnvApiKey("google");
391
513
  if (geminiKey) return { provider: "gemini", apiKey: geminiKey };
392
514
  const googleKey = $env.GOOGLE_API_KEY;
393
515
  if (googleKey) return { provider: "gemini", apiKey: googleKey };
394
- // Fall through to auto-detect if preferred provider key not found
516
+ // Fall through to auto-detect if preferred provider key not found.
395
517
  } else if (preferredImageProvider === "openrouter") {
396
518
  const openRouterKey = getEnvApiKey("openrouter");
397
519
  if (openRouterKey) return { provider: "openrouter", apiKey: openRouterKey };
398
- // Fall through to auto-detect if preferred provider key not found
520
+ // Fall through to auto-detect if preferred provider key not found.
399
521
  }
400
522
 
401
- // Auto-detect: Antigravity takes priority, then OpenRouter, then Gemini
523
+ // Auto-detect: GPT hosted image generation, then Antigravity, OpenRouter, Gemini.
524
+ const openAI = await findOpenAIHostedImageCredentials(modelRegistry, activeModel, sessionId);
525
+ if (openAI) return openAI;
526
+
402
527
  if (modelRegistry) {
403
528
  const antigravity = await findAntigravityCredentials(modelRegistry);
404
529
  if (antigravity) return antigravity;
@@ -512,6 +637,226 @@ function collectInlineImages(parts: GeminiPart[]): InlineImageData[] {
512
637
  return images;
513
638
  }
514
639
 
640
+ function isOpenAIHostedImageModel(model: Model | undefined): model is Model {
641
+ if (!model) return false;
642
+ if (model.provider !== "openai" && model.provider !== "openai-codex") return false;
643
+ if (model.api !== "openai-responses" && model.api !== "openai-codex-responses") return false;
644
+ const modelId = model.id.toLowerCase();
645
+ return modelId.startsWith("gpt-") || modelId === "o3" || modelId.startsWith("o3-");
646
+ }
647
+
648
+ function getOpenAIHostedImageProvider(model: Model): ImageProvider {
649
+ return model.api === "openai-codex-responses" || model.provider === "openai-codex" ? "openai-codex" : "openai";
650
+ }
651
+
652
+ function resolveOpenAIImageSize(aspectRatio: string | undefined, imageSize: string | undefined): string | undefined {
653
+ if (imageSize) return imageSize;
654
+ switch (aspectRatio) {
655
+ case "1:1":
656
+ return "1024x1024";
657
+ case "3:4":
658
+ case "9:16":
659
+ return "1024x1536";
660
+ case "4:3":
661
+ case "16:9":
662
+ return "1536x1024";
663
+ default:
664
+ return undefined;
665
+ }
666
+ }
667
+
668
+ function buildOpenAIHostedImageRequest(
669
+ model: Model,
670
+ promptText: string,
671
+ params: ImageGenParams,
672
+ inputImages: InlineImageData[],
673
+ stream: boolean,
674
+ ): OpenAIHostedImageRequest {
675
+ const content: OpenAIInputContent[] = [{ type: "input_text", text: promptText }];
676
+ for (const image of inputImages) {
677
+ content.push({ type: "input_image", detail: "auto", image_url: toDataUrl(image) });
678
+ }
679
+
680
+ const size = resolveOpenAIImageSize(params.aspect_ratio, params.image_size);
681
+ const tool: OpenAIImageGenerationTool = {
682
+ type: "image_generation",
683
+ action: inputImages.length > 0 ? "edit" : "generate",
684
+ output_format: OPENAI_IMAGE_OUTPUT_FORMAT,
685
+ ...(size ? { size } : {}),
686
+ };
687
+
688
+ return {
689
+ model: model.id,
690
+ input: [{ role: "user", content }],
691
+ tools: [tool],
692
+ tool_choice: { type: "image_generation" },
693
+ store: false,
694
+ ...(stream
695
+ ? {
696
+ instructions:
697
+ "You are an AI image generator. Generate images based on user descriptions. Focus on creating high-quality, visually appealing images that match the user's request.",
698
+ }
699
+ : {}),
700
+ ...(stream ? { stream: true } : {}),
701
+ };
702
+ }
703
+
704
+ function createOpenAIInlineImage(data: string): InlineImageData {
705
+ const bytes = Buffer.from(data, "base64");
706
+ const mimeType = parseImageMetadata(bytes)?.mimeType ?? OPENAI_IMAGE_MIME_TYPE;
707
+ return { data, mimeType };
708
+ }
709
+
710
+ function collectOpenAIHostedImageResult(response: OpenAIHostedImageResponse): OpenAIHostedImageResult {
711
+ const images: InlineImageData[] = [];
712
+ const textParts: string[] = [];
713
+ let revisedPrompt: string | undefined;
714
+
715
+ for (const output of response.output ?? []) {
716
+ if (output.type === "image_generation_call") {
717
+ if (output.result) {
718
+ images.push(createOpenAIInlineImage(output.result));
719
+ }
720
+ if (output.revised_prompt) {
721
+ revisedPrompt = output.revised_prompt;
722
+ }
723
+ continue;
724
+ }
725
+
726
+ for (const part of output.content ?? []) {
727
+ if (part.type === "output_text" && part.text) {
728
+ textParts.push(part.text);
729
+ } else if (part.type === "refusal" && part.refusal) {
730
+ textParts.push(part.refusal);
731
+ }
732
+ }
733
+ }
734
+
735
+ const responseText = textParts.join("\n").trim();
736
+ return {
737
+ images,
738
+ revisedPrompt,
739
+ responseText: responseText.length > 0 ? responseText : undefined,
740
+ usage: response.usage,
741
+ };
742
+ }
743
+
744
+ function getOpenAIResponseErrorMessage(rawText: string): string {
745
+ try {
746
+ const parsed = JSON.parse(rawText) as { error?: { message?: string } };
747
+ return parsed.error?.message ?? rawText;
748
+ } catch {
749
+ return rawText;
750
+ }
751
+ }
752
+
753
+ function getOpenAIBaseUrl(model: Model): string {
754
+ const fallback =
755
+ model.api === "openai-codex-responses" || model.provider === "openai-codex"
756
+ ? CODEX_BASE_URL
757
+ : DEFAULT_OPENAI_BASE_URL;
758
+ return (model.baseUrl || fallback).replace(/\/+$/, "");
759
+ }
760
+
761
+ function getOpenAIResponsesUrl(model: Model): string {
762
+ const baseUrl = getOpenAIBaseUrl(model);
763
+ if (model.api !== "openai-codex-responses" && model.provider !== "openai-codex") {
764
+ return `${baseUrl}/responses`;
765
+ }
766
+ const baseWithSlash = baseUrl.endsWith("/") ? baseUrl : `${baseUrl}/`;
767
+ return new URL(URL_PATHS.RESPONSES.slice(1), baseWithSlash)
768
+ .toString()
769
+ .replace(URL_PATHS.RESPONSES, URL_PATHS.CODEX_RESPONSES);
770
+ }
771
+
772
+ function buildOpenAIImageHeaders(model: Model, apiKey: string, sessionId: string | undefined): Headers {
773
+ const headers = new Headers(model.headers ?? {});
774
+ headers.set("Content-Type", "application/json");
775
+ headers.set("Authorization", `Bearer ${apiKey}`);
776
+
777
+ if (model.api === "openai-codex-responses" || model.provider === "openai-codex") {
778
+ const accountId = getCodexAccountId(apiKey);
779
+ if (!accountId) {
780
+ throw new Error("Failed to extract accountId from OpenAI Codex token");
781
+ }
782
+ headers.delete("x-api-key");
783
+ headers.set(OPENAI_HEADERS.ACCOUNT_ID, accountId);
784
+ headers.set(OPENAI_HEADERS.BETA, OPENAI_HEADER_VALUES.BETA_RESPONSES);
785
+ headers.set(OPENAI_HEADERS.ORIGINATOR, OPENAI_HEADER_VALUES.ORIGINATOR_CODEX);
786
+ headers.set("User-Agent", `pi/${packageJson.version} (${os.platform()} ${os.release()}; ${os.arch()})`);
787
+ if (sessionId) {
788
+ headers.set(OPENAI_HEADERS.CONVERSATION_ID, sessionId);
789
+ headers.set(OPENAI_HEADERS.SESSION_ID, sessionId);
790
+ }
791
+ }
792
+
793
+ return headers;
794
+ }
795
+
796
+ async function parseOpenAIHostedImageSse(response: Response, signal?: AbortSignal): Promise<OpenAIHostedImageResult> {
797
+ if (!response.body) {
798
+ throw new Error("No response body");
799
+ }
800
+
801
+ const fallbackOutput: OpenAIResponseOutput[] = [];
802
+ let completedResponse: OpenAIHostedImageResponse | undefined;
803
+
804
+ for await (const event of readSseJson<OpenAISseEvent>(response.body, signal)) {
805
+ if (event.type === "error") {
806
+ const message = event.error?.message ?? event.message ?? "OpenAI image request failed";
807
+ throw new Error(message);
808
+ }
809
+ if (event.type === "response.failed") {
810
+ const message = event.response?.error?.message ?? "OpenAI image request failed";
811
+ throw new Error(message);
812
+ }
813
+ if (event.type === "response.output_item.done" && event.item) {
814
+ fallbackOutput.push(event.item);
815
+ }
816
+ if ((event.type === "response.completed" || event.type === "response.done") && event.response) {
817
+ completedResponse = event.response;
818
+ }
819
+ }
820
+
821
+ return collectOpenAIHostedImageResult(
822
+ completedResponse?.output?.length
823
+ ? completedResponse
824
+ : { output: fallbackOutput, usage: completedResponse?.usage },
825
+ );
826
+ }
827
+
828
+ async function generateOpenAIHostedImage(
829
+ apiKey: string,
830
+ model: Model,
831
+ params: ImageGenParams,
832
+ inputImages: InlineImageData[],
833
+ signal: AbortSignal | undefined,
834
+ sessionId: string | undefined,
835
+ ): Promise<OpenAIHostedImageResult> {
836
+ const promptText = assemblePrompt(params);
837
+ const stream = model.api === "openai-codex-responses" || model.provider === "openai-codex";
838
+ const requestBody = buildOpenAIHostedImageRequest(model, promptText, params, inputImages, stream);
839
+ const response = await fetch(getOpenAIResponsesUrl(model), {
840
+ method: "POST",
841
+ headers: buildOpenAIImageHeaders(model, apiKey, sessionId),
842
+ body: JSON.stringify(requestBody),
843
+ signal,
844
+ });
845
+
846
+ if (!response.ok) {
847
+ const errorText = await response.text();
848
+ throw new Error(`OpenAI image request failed (${response.status}): ${getOpenAIResponseErrorMessage(errorText)}`);
849
+ }
850
+
851
+ const contentType = response.headers.get("content-type") ?? "";
852
+ if (stream || contentType.includes("text/event-stream")) {
853
+ return parseOpenAIHostedImageSse(response, signal);
854
+ }
855
+
856
+ const data = (await response.json()) as OpenAIHostedImageResponse;
857
+ return collectOpenAIHostedImageResult(data);
858
+ }
859
+
515
860
  function combineParts(response: GeminiGenerateContentResponse): GeminiPart[] {
516
861
  const parts: GeminiPart[] = [];
517
862
  for (const candidate of response.candidates ?? []) {
@@ -568,8 +913,6 @@ interface AntigravitySseResult {
568
913
  usage?: GeminiUsageMetadata;
569
914
  }
570
915
 
571
- const _prefix = Buffer.from("data: ", "utf-8");
572
-
573
916
  async function parseAntigravitySseForImage(response: Response, signal?: AbortSignal): Promise<AntigravitySseResult> {
574
917
  if (!response.body) {
575
918
  throw new Error("No response body");
@@ -604,27 +947,31 @@ async function parseAntigravitySseForImage(response: Response, signal?: AbortSig
604
947
  return { images, text: textParts, usage };
605
948
  }
606
949
 
607
- export const geminiImageTool: CustomTool<typeof geminiImageSchema, GeminiImageToolDetails> = {
950
+ export const imageGenTool: CustomTool<typeof imageGenSchema, ImageGenToolDetails> = {
608
951
  name: "generate_image",
609
952
  label: "GenerateImage",
610
- description: prompt.render(geminiImageDescription),
611
- parameters: geminiImageSchema,
953
+ strict: false,
954
+ description: prompt.render(imageGenDescription),
955
+ parameters: imageGenSchema,
612
956
  async execute(_toolCallId, params, _onUpdate, ctx, signal) {
613
957
  return untilAborted(signal, async () => {
614
- const apiKey = await findImageApiKey(ctx.modelRegistry);
958
+ const sessionId = ctx.sessionManager.getSessionId();
959
+ const apiKey = await findImageApiKey(ctx.modelRegistry, ctx.model, sessionId);
615
960
  if (!apiKey) {
616
961
  throw new Error(
617
- "No image API credentials found. Login with google-antigravity, or set OPENROUTER_API_KEY, GEMINI_API_KEY, or GOOGLE_API_KEY.",
962
+ "No image API credentials found. Use a GPT Responses/Codex model with OpenAI credentials, login with google-antigravity, or set OPENROUTER_API_KEY, GEMINI_API_KEY, or GOOGLE_API_KEY.",
618
963
  );
619
964
  }
620
965
 
621
966
  const provider = apiKey.provider;
622
967
  const model =
623
- provider === "antigravity"
624
- ? DEFAULT_ANTIGRAVITY_MODEL
625
- : provider === "openrouter"
626
- ? DEFAULT_OPENROUTER_MODEL
627
- : DEFAULT_MODEL;
968
+ provider === "openai" || provider === "openai-codex"
969
+ ? (apiKey.model?.id ?? "gpt")
970
+ : provider === "antigravity"
971
+ ? DEFAULT_ANTIGRAVITY_MODEL
972
+ : provider === "openrouter"
973
+ ? DEFAULT_OPENROUTER_MODEL
974
+ : DEFAULT_MODEL;
628
975
  const resolvedModel = provider === "openrouter" ? resolveOpenRouterModel(model) : model;
629
976
  const cwd = ctx.sessionManager.getCwd();
630
977
 
@@ -637,6 +984,56 @@ export const geminiImageTool: CustomTool<typeof geminiImageSchema, GeminiImageTo
637
984
 
638
985
  const requestSignal = ptree.combineSignals(signal, IMAGE_TIMEOUT);
639
986
 
987
+ if (provider === "openai" || provider === "openai-codex") {
988
+ if (!apiKey.model) {
989
+ throw new Error("Missing active GPT model for OpenAI image generation");
990
+ }
991
+
992
+ const parsed = await generateOpenAIHostedImage(
993
+ apiKey.apiKey,
994
+ apiKey.model,
995
+ params,
996
+ resolvedImages,
997
+ requestSignal,
998
+ sessionId,
999
+ );
1000
+
1001
+ if (parsed.images.length === 0) {
1002
+ const messageText = parsed.responseText ? `\n\n${parsed.responseText}` : "";
1003
+ return {
1004
+ content: [{ type: "text", text: `No image data returned.${messageText}` }],
1005
+ details: {
1006
+ provider,
1007
+ model,
1008
+ imageCount: 0,
1009
+ imagePaths: [],
1010
+ images: [],
1011
+ responseText: parsed.responseText,
1012
+ revisedPrompt: parsed.revisedPrompt,
1013
+ usage: parsed.usage,
1014
+ },
1015
+ };
1016
+ }
1017
+
1018
+ const imagePaths = await saveImagesToTemp(parsed.images);
1019
+
1020
+ return {
1021
+ content: [
1022
+ { type: "text", text: buildResponseSummary(provider, model, imagePaths, parsed.responseText) },
1023
+ ],
1024
+ details: {
1025
+ provider,
1026
+ model,
1027
+ imageCount: parsed.images.length,
1028
+ imagePaths,
1029
+ images: parsed.images,
1030
+ responseText: parsed.responseText,
1031
+ revisedPrompt: parsed.revisedPrompt,
1032
+ usage: parsed.usage,
1033
+ },
1034
+ };
1035
+ }
1036
+
640
1037
  if (provider === "antigravity") {
641
1038
  if (!apiKey.projectId) {
642
1039
  throw new Error("Missing projectId in antigravity credentials");
@@ -880,18 +1277,20 @@ export const geminiImageTool: CustomTool<typeof geminiImageSchema, GeminiImageTo
880
1277
  },
881
1278
  };
882
1279
 
883
- export async function getGeminiImageTools(): Promise<
884
- Array<CustomTool<typeof geminiImageSchema, GeminiImageToolDetails>>
885
- > {
886
- const apiKey = await findImageApiKey();
1280
+ export async function getImageGenTools(
1281
+ modelRegistry?: ModelRegistry,
1282
+ activeModel?: Model,
1283
+ ): Promise<Array<CustomTool<typeof imageGenSchema, ImageGenToolDetails>>> {
1284
+ const apiKey = await findImageApiKey(modelRegistry, activeModel);
887
1285
  if (!apiKey) return [];
888
- return [geminiImageTool];
1286
+ return [imageGenTool];
889
1287
  }
890
1288
 
891
- export async function getGeminiImageToolsWithRegistry(
1289
+ export async function getImageGenToolsWithRegistry(
892
1290
  modelRegistry: ModelRegistry,
893
- ): Promise<Array<CustomTool<typeof geminiImageSchema, GeminiImageToolDetails>>> {
894
- const apiKey = await findImageApiKey(modelRegistry);
1291
+ activeModel?: Model,
1292
+ ): Promise<Array<CustomTool<typeof imageGenSchema, ImageGenToolDetails>>> {
1293
+ const apiKey = await findImageApiKey(modelRegistry, activeModel);
895
1294
  if (!apiKey) return [];
896
- return [geminiImageTool];
1295
+ return [imageGenTool];
897
1296
  }