ai-sdk-provider-codex-cli 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +42 -0
- package/dist/index.cjs +145 -29
- package/dist/index.js +145 -29
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -104,6 +104,48 @@ console.log(object);
|
|
|
104
104
|
- Safe defaults for non‑interactive automation (`on-failure`, `workspace-write`, `--skip-git-repo-check`)
|
|
105
105
|
- Fallback to `npx @openai/codex` when not on PATH (`allowNpx`)
|
|
106
106
|
- Usage tracking from experimental JSON event format
|
|
107
|
+
- **Image support** - Pass images to vision-capable models via `--image` flag
|
|
108
|
+
|
|
109
|
+
### Image Support
|
|
110
|
+
|
|
111
|
+
The provider supports multimodal (image) inputs for vision-capable models:
|
|
112
|
+
|
|
113
|
+
```js
|
|
114
|
+
import { generateText } from 'ai';
|
|
115
|
+
import { codexCli } from 'ai-sdk-provider-codex-cli';
|
|
116
|
+
import { readFileSync } from 'fs';
|
|
117
|
+
|
|
118
|
+
const model = codexCli('gpt-5.1-codex', { allowNpx: true, skipGitRepoCheck: true });
|
|
119
|
+
const imageBuffer = readFileSync('./screenshot.png');
|
|
120
|
+
|
|
121
|
+
const { text } = await generateText({
|
|
122
|
+
model,
|
|
123
|
+
messages: [
|
|
124
|
+
{
|
|
125
|
+
role: 'user',
|
|
126
|
+
content: [
|
|
127
|
+
{ type: 'text', text: 'What do you see in this image?' },
|
|
128
|
+
{ type: 'image', image: imageBuffer, mimeType: 'image/png' },
|
|
129
|
+
],
|
|
130
|
+
},
|
|
131
|
+
],
|
|
132
|
+
});
|
|
133
|
+
console.log(text);
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
**Supported image formats:**
|
|
137
|
+
|
|
138
|
+
- Base64 data URL (`data:image/png;base64,...`)
|
|
139
|
+
- Base64 string (without data URL prefix)
|
|
140
|
+
- `Buffer` / `Uint8Array` / `ArrayBuffer`
|
|
141
|
+
|
|
142
|
+
**Not supported:**
|
|
143
|
+
|
|
144
|
+
- HTTP/HTTPS URLs (images must be provided as binary data)
|
|
145
|
+
|
|
146
|
+
Images are written to temporary files and passed to Codex CLI via the `--image` flag. Temp files are automatically cleaned up after the request completes.
|
|
147
|
+
|
|
148
|
+
See [examples/image-support.mjs](examples/image-support.mjs) for a complete working example.
|
|
107
149
|
|
|
108
150
|
### Tool Streaming (v0.3.0+)
|
|
109
151
|
|
package/dist/index.cjs
CHANGED
|
@@ -167,6 +167,94 @@ function validateModelId(modelId) {
|
|
|
167
167
|
if (!modelId || modelId.trim() === "") return "Model ID cannot be empty";
|
|
168
168
|
return void 0;
|
|
169
169
|
}
|
|
170
|
+
function extractImageData(part) {
|
|
171
|
+
if (typeof part !== "object" || part === null) return null;
|
|
172
|
+
const p = part;
|
|
173
|
+
const mimeType = p.mimeType || "image/png";
|
|
174
|
+
if (typeof p.image === "string") {
|
|
175
|
+
return extractFromString(p.image, mimeType);
|
|
176
|
+
}
|
|
177
|
+
if (p.image instanceof URL) {
|
|
178
|
+
if (p.image.protocol === "data:") {
|
|
179
|
+
const dataUrlStr = p.image.toString();
|
|
180
|
+
const match = dataUrlStr.match(/^data:([^;,]+)/);
|
|
181
|
+
const extractedMimeType = match?.[1] || mimeType;
|
|
182
|
+
return { data: dataUrlStr, mimeType: extractedMimeType };
|
|
183
|
+
}
|
|
184
|
+
return null;
|
|
185
|
+
}
|
|
186
|
+
if (Buffer.isBuffer(p.image)) {
|
|
187
|
+
const base64 = p.image.toString("base64");
|
|
188
|
+
return { data: `data:${mimeType};base64,${base64}`, mimeType };
|
|
189
|
+
}
|
|
190
|
+
if (p.image instanceof ArrayBuffer || p.image instanceof Uint8Array) {
|
|
191
|
+
const buffer = Buffer.from(p.image);
|
|
192
|
+
const base64 = buffer.toString("base64");
|
|
193
|
+
return { data: `data:${mimeType};base64,${base64}`, mimeType };
|
|
194
|
+
}
|
|
195
|
+
if (typeof p.data === "string") {
|
|
196
|
+
return extractFromString(p.data, mimeType);
|
|
197
|
+
}
|
|
198
|
+
if (typeof p.url === "string") {
|
|
199
|
+
return extractFromString(p.url, mimeType);
|
|
200
|
+
}
|
|
201
|
+
return null;
|
|
202
|
+
}
|
|
203
|
+
function extractFromString(value, fallbackMimeType) {
|
|
204
|
+
const trimmed = value.trim();
|
|
205
|
+
if (/^https?:\/\//i.test(trimmed)) {
|
|
206
|
+
return null;
|
|
207
|
+
}
|
|
208
|
+
if (trimmed.startsWith("data:")) {
|
|
209
|
+
if (!trimmed.includes(";base64,")) {
|
|
210
|
+
return null;
|
|
211
|
+
}
|
|
212
|
+
const match = trimmed.match(/^data:([^;,]+)/);
|
|
213
|
+
const mimeType = match?.[1] || fallbackMimeType;
|
|
214
|
+
return { data: trimmed, mimeType };
|
|
215
|
+
}
|
|
216
|
+
return {
|
|
217
|
+
data: `data:${fallbackMimeType};base64,${trimmed}`,
|
|
218
|
+
mimeType: fallbackMimeType
|
|
219
|
+
};
|
|
220
|
+
}
|
|
221
|
+
function writeImageToTempFile(imageData) {
|
|
222
|
+
const dir = fs.mkdtempSync(path.join(os.tmpdir(), "codex-img-"));
|
|
223
|
+
const ext = getExtensionFromMimeType(imageData.mimeType);
|
|
224
|
+
const filePath = path.join(dir, `image.${ext}`);
|
|
225
|
+
const base64Match = imageData.data.match(/^data:[^;]+;base64,(.+)$/);
|
|
226
|
+
if (!base64Match) {
|
|
227
|
+
throw new Error("Invalid data URL format: expected data:[type];base64,[data]");
|
|
228
|
+
}
|
|
229
|
+
const buffer = Buffer.from(base64Match[1], "base64");
|
|
230
|
+
fs.writeFileSync(filePath, buffer);
|
|
231
|
+
return filePath;
|
|
232
|
+
}
|
|
233
|
+
function cleanupTempImages(paths) {
|
|
234
|
+
for (const filePath of paths) {
|
|
235
|
+
try {
|
|
236
|
+
fs.rmSync(filePath, { force: true });
|
|
237
|
+
const dir = filePath.replace(/[/\\][^/\\]+$/, "");
|
|
238
|
+
if (dir.includes("codex-img-")) {
|
|
239
|
+
fs.rmSync(dir, { force: true, recursive: true });
|
|
240
|
+
}
|
|
241
|
+
} catch {
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
function getExtensionFromMimeType(mimeType) {
|
|
246
|
+
if (!mimeType) return "png";
|
|
247
|
+
const mapping = {
|
|
248
|
+
"image/png": "png",
|
|
249
|
+
"image/jpeg": "jpg",
|
|
250
|
+
"image/jpg": "jpg",
|
|
251
|
+
"image/gif": "gif",
|
|
252
|
+
"image/webp": "webp",
|
|
253
|
+
"image/bmp": "bmp",
|
|
254
|
+
"image/svg+xml": "svg"
|
|
255
|
+
};
|
|
256
|
+
return mapping[mimeType.toLowerCase()] || mimeType.split("/")[1] || "png";
|
|
257
|
+
}
|
|
170
258
|
|
|
171
259
|
// src/message-mapper.ts
|
|
172
260
|
function isTextPart(p) {
|
|
@@ -187,6 +275,7 @@ function isToolItem(p) {
|
|
|
187
275
|
function mapMessagesToPrompt(prompt) {
|
|
188
276
|
const warnings = [];
|
|
189
277
|
const parts = [];
|
|
278
|
+
const images = [];
|
|
190
279
|
let systemText;
|
|
191
280
|
for (const msg of prompt) {
|
|
192
281
|
if (msg.role === "system") {
|
|
@@ -199,8 +288,14 @@ function mapMessagesToPrompt(prompt) {
|
|
|
199
288
|
} else if (Array.isArray(msg.content)) {
|
|
200
289
|
const text = msg.content.filter(isTextPart).map((p) => p.text).join("\n");
|
|
201
290
|
if (text) parts.push(`Human: ${text}`);
|
|
202
|
-
const
|
|
203
|
-
|
|
291
|
+
for (const part of msg.content.filter(isImagePart)) {
|
|
292
|
+
const imageData = extractImageData(part);
|
|
293
|
+
if (imageData) {
|
|
294
|
+
images.push(imageData);
|
|
295
|
+
} else {
|
|
296
|
+
warnings.push("Unsupported image format in message (HTTP URLs not supported)");
|
|
297
|
+
}
|
|
298
|
+
}
|
|
204
299
|
}
|
|
205
300
|
continue;
|
|
206
301
|
}
|
|
@@ -227,7 +322,7 @@ function mapMessagesToPrompt(prompt) {
|
|
|
227
322
|
let promptText = "";
|
|
228
323
|
if (systemText) promptText += systemText + "\n\n";
|
|
229
324
|
promptText += parts.join("\n\n");
|
|
230
|
-
return { promptText, ...warnings.length ? { warnings } : {} };
|
|
325
|
+
return { promptText, images, ...warnings.length ? { warnings } : {} };
|
|
231
326
|
}
|
|
232
327
|
function createAPICallError({
|
|
233
328
|
message,
|
|
@@ -404,7 +499,7 @@ var CodexCliLanguageModel = class {
|
|
|
404
499
|
const current = typeof data.type === "string" ? data.type : void 0;
|
|
405
500
|
return legacy ?? current;
|
|
406
501
|
}
|
|
407
|
-
buildArgs(promptText, responseFormat, settings = this.settings) {
|
|
502
|
+
buildArgs(promptText, images = [], responseFormat, settings = this.settings) {
|
|
408
503
|
const base = resolveCodexPath(settings.codexPath, settings.allowNpx);
|
|
409
504
|
const args = [...base.args, "exec", "--experimental-json"];
|
|
410
505
|
if (settings.fullAuto) {
|
|
@@ -479,6 +574,16 @@ var CodexCliLanguageModel = class {
|
|
|
479
574
|
args.push("--output-schema", schemaPath);
|
|
480
575
|
}
|
|
481
576
|
}
|
|
577
|
+
const tempImagePaths = [];
|
|
578
|
+
for (const img of images) {
|
|
579
|
+
try {
|
|
580
|
+
const tempPath = writeImageToTempFile(img);
|
|
581
|
+
tempImagePaths.push(tempPath);
|
|
582
|
+
args.push("--image", tempPath);
|
|
583
|
+
} catch (e) {
|
|
584
|
+
this.logger.warn(`[codex-cli] Failed to write image to temp file: ${String(e)}`);
|
|
585
|
+
}
|
|
586
|
+
}
|
|
482
587
|
args.push(promptText);
|
|
483
588
|
const env = {
|
|
484
589
|
...process.env,
|
|
@@ -500,7 +605,8 @@ var CodexCliLanguageModel = class {
|
|
|
500
605
|
cwd: settings.cwd,
|
|
501
606
|
lastMessagePath,
|
|
502
607
|
lastMessageIsTemp,
|
|
503
|
-
schemaPath
|
|
608
|
+
schemaPath,
|
|
609
|
+
tempImagePaths: tempImagePaths.length > 0 ? tempImagePaths : void 0
|
|
504
610
|
};
|
|
505
611
|
}
|
|
506
612
|
applyMcpSettings(args, settings) {
|
|
@@ -830,14 +936,14 @@ var CodexCliLanguageModel = class {
|
|
|
830
936
|
}
|
|
831
937
|
async doGenerate(options) {
|
|
832
938
|
this.logger.debug(`[codex-cli] Starting doGenerate request with model: ${this.modelId}`);
|
|
833
|
-
const { promptText, warnings: mappingWarnings } = mapMessagesToPrompt(options.prompt);
|
|
939
|
+
const { promptText, images, warnings: mappingWarnings } = mapMessagesToPrompt(options.prompt);
|
|
834
940
|
const promptExcerpt = promptText.slice(0, 200);
|
|
835
941
|
const warnings = [
|
|
836
942
|
...this.mapWarnings(options),
|
|
837
943
|
...mappingWarnings?.map((m) => ({ type: "other", message: m })) || []
|
|
838
944
|
];
|
|
839
945
|
this.logger.debug(
|
|
840
|
-
`[codex-cli] Converted ${options.prompt.length} messages, response format: ${options.responseFormat?.type ?? "none"}`
|
|
946
|
+
`[codex-cli] Converted ${options.prompt.length} messages (${images.length} images), response format: ${options.responseFormat?.type ?? "none"}`
|
|
841
947
|
);
|
|
842
948
|
const providerOptions = await providerUtils.parseProviderOptions({
|
|
843
949
|
provider: this.provider,
|
|
@@ -846,11 +952,7 @@ var CodexCliLanguageModel = class {
|
|
|
846
952
|
});
|
|
847
953
|
const effectiveSettings = this.mergeSettings(providerOptions);
|
|
848
954
|
const responseFormat = options.responseFormat?.type === "json" ? { type: "json", schema: options.responseFormat.schema } : void 0;
|
|
849
|
-
const { cmd, args, env, cwd, lastMessagePath, lastMessageIsTemp, schemaPath } = this.buildArgs(
|
|
850
|
-
promptText,
|
|
851
|
-
responseFormat,
|
|
852
|
-
effectiveSettings
|
|
853
|
-
);
|
|
955
|
+
const { cmd, args, env, cwd, lastMessagePath, lastMessageIsTemp, schemaPath, tempImagePaths } = this.buildArgs(promptText, images, responseFormat, effectiveSettings);
|
|
854
956
|
this.logger.debug(
|
|
855
957
|
`[codex-cli] Executing Codex CLI: ${cmd} with ${args.length} arguments, cwd: ${cwd ?? "default"}`
|
|
856
958
|
);
|
|
@@ -863,6 +965,16 @@ var CodexCliLanguageModel = class {
|
|
|
863
965
|
if (options.abortSignal) {
|
|
864
966
|
if (options.abortSignal.aborted) {
|
|
865
967
|
child.kill("SIGTERM");
|
|
968
|
+
if (schemaPath) {
|
|
969
|
+
try {
|
|
970
|
+
const schemaDir = path.dirname(schemaPath);
|
|
971
|
+
fs.rmSync(schemaDir, { recursive: true, force: true });
|
|
972
|
+
} catch {
|
|
973
|
+
}
|
|
974
|
+
}
|
|
975
|
+
if (tempImagePaths?.length) {
|
|
976
|
+
cleanupTempImages(tempImagePaths);
|
|
977
|
+
}
|
|
866
978
|
throw options.abortSignal.reason ?? new Error("Request aborted");
|
|
867
979
|
}
|
|
868
980
|
onAbort = () => child.kill("SIGTERM");
|
|
@@ -957,6 +1069,9 @@ var CodexCliLanguageModel = class {
|
|
|
957
1069
|
} catch {
|
|
958
1070
|
}
|
|
959
1071
|
}
|
|
1072
|
+
if (tempImagePaths?.length) {
|
|
1073
|
+
cleanupTempImages(tempImagePaths);
|
|
1074
|
+
}
|
|
960
1075
|
}
|
|
961
1076
|
if (!text && lastMessagePath) {
|
|
962
1077
|
try {
|
|
@@ -988,14 +1103,14 @@ var CodexCliLanguageModel = class {
|
|
|
988
1103
|
}
|
|
989
1104
|
async doStream(options) {
|
|
990
1105
|
this.logger.debug(`[codex-cli] Starting doStream request with model: ${this.modelId}`);
|
|
991
|
-
const { promptText, warnings: mappingWarnings } = mapMessagesToPrompt(options.prompt);
|
|
1106
|
+
const { promptText, images, warnings: mappingWarnings } = mapMessagesToPrompt(options.prompt);
|
|
992
1107
|
const promptExcerpt = promptText.slice(0, 200);
|
|
993
1108
|
const warnings = [
|
|
994
1109
|
...this.mapWarnings(options),
|
|
995
1110
|
...mappingWarnings?.map((m) => ({ type: "other", message: m })) || []
|
|
996
1111
|
];
|
|
997
1112
|
this.logger.debug(
|
|
998
|
-
`[codex-cli] Converted ${options.prompt.length} messages for streaming, response format: ${options.responseFormat?.type ?? "none"}`
|
|
1113
|
+
`[codex-cli] Converted ${options.prompt.length} messages (${images.length} images) for streaming, response format: ${options.responseFormat?.type ?? "none"}`
|
|
999
1114
|
);
|
|
1000
1115
|
const providerOptions = await providerUtils.parseProviderOptions({
|
|
1001
1116
|
provider: this.provider,
|
|
@@ -1004,11 +1119,7 @@ var CodexCliLanguageModel = class {
|
|
|
1004
1119
|
});
|
|
1005
1120
|
const effectiveSettings = this.mergeSettings(providerOptions);
|
|
1006
1121
|
const responseFormat = options.responseFormat?.type === "json" ? { type: "json", schema: options.responseFormat.schema } : void 0;
|
|
1007
|
-
const { cmd, args, env, cwd, lastMessagePath, lastMessageIsTemp, schemaPath } = this.buildArgs(
|
|
1008
|
-
promptText,
|
|
1009
|
-
responseFormat,
|
|
1010
|
-
effectiveSettings
|
|
1011
|
-
);
|
|
1122
|
+
const { cmd, args, env, cwd, lastMessagePath, lastMessageIsTemp, schemaPath, tempImagePaths } = this.buildArgs(promptText, images, responseFormat, effectiveSettings);
|
|
1012
1123
|
this.logger.debug(
|
|
1013
1124
|
`[codex-cli] Executing Codex CLI for streaming: ${cmd} with ${args.length} arguments`
|
|
1014
1125
|
);
|
|
@@ -1023,6 +1134,18 @@ var CodexCliLanguageModel = class {
|
|
|
1023
1134
|
let responseMetadataSent = false;
|
|
1024
1135
|
let lastUsage;
|
|
1025
1136
|
let turnFailureMessage;
|
|
1137
|
+
const cleanupTempFiles = () => {
|
|
1138
|
+
if (schemaPath) {
|
|
1139
|
+
try {
|
|
1140
|
+
const schemaDir = path.dirname(schemaPath);
|
|
1141
|
+
fs.rmSync(schemaDir, { recursive: true, force: true });
|
|
1142
|
+
} catch {
|
|
1143
|
+
}
|
|
1144
|
+
}
|
|
1145
|
+
if (tempImagePaths?.length) {
|
|
1146
|
+
cleanupTempImages(tempImagePaths);
|
|
1147
|
+
}
|
|
1148
|
+
};
|
|
1026
1149
|
const sendMetadata = (meta = {}) => {
|
|
1027
1150
|
controller.enqueue({
|
|
1028
1151
|
type: "response-metadata",
|
|
@@ -1096,6 +1219,7 @@ var CodexCliLanguageModel = class {
|
|
|
1096
1219
|
if (options.abortSignal) {
|
|
1097
1220
|
if (options.abortSignal.aborted) {
|
|
1098
1221
|
child.kill("SIGTERM");
|
|
1222
|
+
cleanupTempFiles();
|
|
1099
1223
|
controller.error(options.abortSignal.reason ?? new Error("Request aborted"));
|
|
1100
1224
|
return;
|
|
1101
1225
|
}
|
|
@@ -1215,23 +1339,15 @@ var CodexCliLanguageModel = class {
|
|
|
1215
1339
|
}
|
|
1216
1340
|
}
|
|
1217
1341
|
});
|
|
1218
|
-
const cleanupSchema = () => {
|
|
1219
|
-
if (!schemaPath) return;
|
|
1220
|
-
try {
|
|
1221
|
-
const schemaDir = path.dirname(schemaPath);
|
|
1222
|
-
fs.rmSync(schemaDir, { recursive: true, force: true });
|
|
1223
|
-
} catch {
|
|
1224
|
-
}
|
|
1225
|
-
};
|
|
1226
1342
|
child.on("error", (e) => {
|
|
1227
1343
|
this.logger.error(`[codex-cli] Stream spawn error: ${String(e)}`);
|
|
1228
1344
|
if (options.abortSignal) options.abortSignal.removeEventListener("abort", onAbort);
|
|
1229
|
-
|
|
1345
|
+
cleanupTempFiles();
|
|
1230
1346
|
controller.error(this.handleSpawnError(e, promptExcerpt));
|
|
1231
1347
|
});
|
|
1232
1348
|
child.on("close", (code) => {
|
|
1233
1349
|
if (options.abortSignal) options.abortSignal.removeEventListener("abort", onAbort);
|
|
1234
|
-
|
|
1350
|
+
cleanupTempFiles();
|
|
1235
1351
|
setImmediate(() => finishStream(code));
|
|
1236
1352
|
});
|
|
1237
1353
|
},
|
package/dist/index.js
CHANGED
|
@@ -164,6 +164,94 @@ function validateModelId(modelId) {
|
|
|
164
164
|
if (!modelId || modelId.trim() === "") return "Model ID cannot be empty";
|
|
165
165
|
return void 0;
|
|
166
166
|
}
|
|
167
|
+
function extractImageData(part) {
|
|
168
|
+
if (typeof part !== "object" || part === null) return null;
|
|
169
|
+
const p = part;
|
|
170
|
+
const mimeType = p.mimeType || "image/png";
|
|
171
|
+
if (typeof p.image === "string") {
|
|
172
|
+
return extractFromString(p.image, mimeType);
|
|
173
|
+
}
|
|
174
|
+
if (p.image instanceof URL) {
|
|
175
|
+
if (p.image.protocol === "data:") {
|
|
176
|
+
const dataUrlStr = p.image.toString();
|
|
177
|
+
const match = dataUrlStr.match(/^data:([^;,]+)/);
|
|
178
|
+
const extractedMimeType = match?.[1] || mimeType;
|
|
179
|
+
return { data: dataUrlStr, mimeType: extractedMimeType };
|
|
180
|
+
}
|
|
181
|
+
return null;
|
|
182
|
+
}
|
|
183
|
+
if (Buffer.isBuffer(p.image)) {
|
|
184
|
+
const base64 = p.image.toString("base64");
|
|
185
|
+
return { data: `data:${mimeType};base64,${base64}`, mimeType };
|
|
186
|
+
}
|
|
187
|
+
if (p.image instanceof ArrayBuffer || p.image instanceof Uint8Array) {
|
|
188
|
+
const buffer = Buffer.from(p.image);
|
|
189
|
+
const base64 = buffer.toString("base64");
|
|
190
|
+
return { data: `data:${mimeType};base64,${base64}`, mimeType };
|
|
191
|
+
}
|
|
192
|
+
if (typeof p.data === "string") {
|
|
193
|
+
return extractFromString(p.data, mimeType);
|
|
194
|
+
}
|
|
195
|
+
if (typeof p.url === "string") {
|
|
196
|
+
return extractFromString(p.url, mimeType);
|
|
197
|
+
}
|
|
198
|
+
return null;
|
|
199
|
+
}
|
|
200
|
+
function extractFromString(value, fallbackMimeType) {
|
|
201
|
+
const trimmed = value.trim();
|
|
202
|
+
if (/^https?:\/\//i.test(trimmed)) {
|
|
203
|
+
return null;
|
|
204
|
+
}
|
|
205
|
+
if (trimmed.startsWith("data:")) {
|
|
206
|
+
if (!trimmed.includes(";base64,")) {
|
|
207
|
+
return null;
|
|
208
|
+
}
|
|
209
|
+
const match = trimmed.match(/^data:([^;,]+)/);
|
|
210
|
+
const mimeType = match?.[1] || fallbackMimeType;
|
|
211
|
+
return { data: trimmed, mimeType };
|
|
212
|
+
}
|
|
213
|
+
return {
|
|
214
|
+
data: `data:${fallbackMimeType};base64,${trimmed}`,
|
|
215
|
+
mimeType: fallbackMimeType
|
|
216
|
+
};
|
|
217
|
+
}
|
|
218
|
+
function writeImageToTempFile(imageData) {
|
|
219
|
+
const dir = mkdtempSync(join(tmpdir(), "codex-img-"));
|
|
220
|
+
const ext = getExtensionFromMimeType(imageData.mimeType);
|
|
221
|
+
const filePath = join(dir, `image.${ext}`);
|
|
222
|
+
const base64Match = imageData.data.match(/^data:[^;]+;base64,(.+)$/);
|
|
223
|
+
if (!base64Match) {
|
|
224
|
+
throw new Error("Invalid data URL format: expected data:[type];base64,[data]");
|
|
225
|
+
}
|
|
226
|
+
const buffer = Buffer.from(base64Match[1], "base64");
|
|
227
|
+
writeFileSync(filePath, buffer);
|
|
228
|
+
return filePath;
|
|
229
|
+
}
|
|
230
|
+
function cleanupTempImages(paths) {
|
|
231
|
+
for (const filePath of paths) {
|
|
232
|
+
try {
|
|
233
|
+
rmSync(filePath, { force: true });
|
|
234
|
+
const dir = filePath.replace(/[/\\][^/\\]+$/, "");
|
|
235
|
+
if (dir.includes("codex-img-")) {
|
|
236
|
+
rmSync(dir, { force: true, recursive: true });
|
|
237
|
+
}
|
|
238
|
+
} catch {
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
function getExtensionFromMimeType(mimeType) {
|
|
243
|
+
if (!mimeType) return "png";
|
|
244
|
+
const mapping = {
|
|
245
|
+
"image/png": "png",
|
|
246
|
+
"image/jpeg": "jpg",
|
|
247
|
+
"image/jpg": "jpg",
|
|
248
|
+
"image/gif": "gif",
|
|
249
|
+
"image/webp": "webp",
|
|
250
|
+
"image/bmp": "bmp",
|
|
251
|
+
"image/svg+xml": "svg"
|
|
252
|
+
};
|
|
253
|
+
return mapping[mimeType.toLowerCase()] || mimeType.split("/")[1] || "png";
|
|
254
|
+
}
|
|
167
255
|
|
|
168
256
|
// src/message-mapper.ts
|
|
169
257
|
function isTextPart(p) {
|
|
@@ -184,6 +272,7 @@ function isToolItem(p) {
|
|
|
184
272
|
function mapMessagesToPrompt(prompt) {
|
|
185
273
|
const warnings = [];
|
|
186
274
|
const parts = [];
|
|
275
|
+
const images = [];
|
|
187
276
|
let systemText;
|
|
188
277
|
for (const msg of prompt) {
|
|
189
278
|
if (msg.role === "system") {
|
|
@@ -196,8 +285,14 @@ function mapMessagesToPrompt(prompt) {
|
|
|
196
285
|
} else if (Array.isArray(msg.content)) {
|
|
197
286
|
const text = msg.content.filter(isTextPart).map((p) => p.text).join("\n");
|
|
198
287
|
if (text) parts.push(`Human: ${text}`);
|
|
199
|
-
const
|
|
200
|
-
|
|
288
|
+
for (const part of msg.content.filter(isImagePart)) {
|
|
289
|
+
const imageData = extractImageData(part);
|
|
290
|
+
if (imageData) {
|
|
291
|
+
images.push(imageData);
|
|
292
|
+
} else {
|
|
293
|
+
warnings.push("Unsupported image format in message (HTTP URLs not supported)");
|
|
294
|
+
}
|
|
295
|
+
}
|
|
201
296
|
}
|
|
202
297
|
continue;
|
|
203
298
|
}
|
|
@@ -224,7 +319,7 @@ function mapMessagesToPrompt(prompt) {
|
|
|
224
319
|
let promptText = "";
|
|
225
320
|
if (systemText) promptText += systemText + "\n\n";
|
|
226
321
|
promptText += parts.join("\n\n");
|
|
227
|
-
return { promptText, ...warnings.length ? { warnings } : {} };
|
|
322
|
+
return { promptText, images, ...warnings.length ? { warnings } : {} };
|
|
228
323
|
}
|
|
229
324
|
function createAPICallError({
|
|
230
325
|
message,
|
|
@@ -401,7 +496,7 @@ var CodexCliLanguageModel = class {
|
|
|
401
496
|
const current = typeof data.type === "string" ? data.type : void 0;
|
|
402
497
|
return legacy ?? current;
|
|
403
498
|
}
|
|
404
|
-
buildArgs(promptText, responseFormat, settings = this.settings) {
|
|
499
|
+
buildArgs(promptText, images = [], responseFormat, settings = this.settings) {
|
|
405
500
|
const base = resolveCodexPath(settings.codexPath, settings.allowNpx);
|
|
406
501
|
const args = [...base.args, "exec", "--experimental-json"];
|
|
407
502
|
if (settings.fullAuto) {
|
|
@@ -476,6 +571,16 @@ var CodexCliLanguageModel = class {
|
|
|
476
571
|
args.push("--output-schema", schemaPath);
|
|
477
572
|
}
|
|
478
573
|
}
|
|
574
|
+
const tempImagePaths = [];
|
|
575
|
+
for (const img of images) {
|
|
576
|
+
try {
|
|
577
|
+
const tempPath = writeImageToTempFile(img);
|
|
578
|
+
tempImagePaths.push(tempPath);
|
|
579
|
+
args.push("--image", tempPath);
|
|
580
|
+
} catch (e) {
|
|
581
|
+
this.logger.warn(`[codex-cli] Failed to write image to temp file: ${String(e)}`);
|
|
582
|
+
}
|
|
583
|
+
}
|
|
479
584
|
args.push(promptText);
|
|
480
585
|
const env = {
|
|
481
586
|
...process.env,
|
|
@@ -497,7 +602,8 @@ var CodexCliLanguageModel = class {
|
|
|
497
602
|
cwd: settings.cwd,
|
|
498
603
|
lastMessagePath,
|
|
499
604
|
lastMessageIsTemp,
|
|
500
|
-
schemaPath
|
|
605
|
+
schemaPath,
|
|
606
|
+
tempImagePaths: tempImagePaths.length > 0 ? tempImagePaths : void 0
|
|
501
607
|
};
|
|
502
608
|
}
|
|
503
609
|
applyMcpSettings(args, settings) {
|
|
@@ -827,14 +933,14 @@ var CodexCliLanguageModel = class {
|
|
|
827
933
|
}
|
|
828
934
|
async doGenerate(options) {
|
|
829
935
|
this.logger.debug(`[codex-cli] Starting doGenerate request with model: ${this.modelId}`);
|
|
830
|
-
const { promptText, warnings: mappingWarnings } = mapMessagesToPrompt(options.prompt);
|
|
936
|
+
const { promptText, images, warnings: mappingWarnings } = mapMessagesToPrompt(options.prompt);
|
|
831
937
|
const promptExcerpt = promptText.slice(0, 200);
|
|
832
938
|
const warnings = [
|
|
833
939
|
...this.mapWarnings(options),
|
|
834
940
|
...mappingWarnings?.map((m) => ({ type: "other", message: m })) || []
|
|
835
941
|
];
|
|
836
942
|
this.logger.debug(
|
|
837
|
-
`[codex-cli] Converted ${options.prompt.length} messages, response format: ${options.responseFormat?.type ?? "none"}`
|
|
943
|
+
`[codex-cli] Converted ${options.prompt.length} messages (${images.length} images), response format: ${options.responseFormat?.type ?? "none"}`
|
|
838
944
|
);
|
|
839
945
|
const providerOptions = await parseProviderOptions({
|
|
840
946
|
provider: this.provider,
|
|
@@ -843,11 +949,7 @@ var CodexCliLanguageModel = class {
|
|
|
843
949
|
});
|
|
844
950
|
const effectiveSettings = this.mergeSettings(providerOptions);
|
|
845
951
|
const responseFormat = options.responseFormat?.type === "json" ? { type: "json", schema: options.responseFormat.schema } : void 0;
|
|
846
|
-
const { cmd, args, env, cwd, lastMessagePath, lastMessageIsTemp, schemaPath } = this.buildArgs(
|
|
847
|
-
promptText,
|
|
848
|
-
responseFormat,
|
|
849
|
-
effectiveSettings
|
|
850
|
-
);
|
|
952
|
+
const { cmd, args, env, cwd, lastMessagePath, lastMessageIsTemp, schemaPath, tempImagePaths } = this.buildArgs(promptText, images, responseFormat, effectiveSettings);
|
|
851
953
|
this.logger.debug(
|
|
852
954
|
`[codex-cli] Executing Codex CLI: ${cmd} with ${args.length} arguments, cwd: ${cwd ?? "default"}`
|
|
853
955
|
);
|
|
@@ -860,6 +962,16 @@ var CodexCliLanguageModel = class {
|
|
|
860
962
|
if (options.abortSignal) {
|
|
861
963
|
if (options.abortSignal.aborted) {
|
|
862
964
|
child.kill("SIGTERM");
|
|
965
|
+
if (schemaPath) {
|
|
966
|
+
try {
|
|
967
|
+
const schemaDir = dirname(schemaPath);
|
|
968
|
+
rmSync(schemaDir, { recursive: true, force: true });
|
|
969
|
+
} catch {
|
|
970
|
+
}
|
|
971
|
+
}
|
|
972
|
+
if (tempImagePaths?.length) {
|
|
973
|
+
cleanupTempImages(tempImagePaths);
|
|
974
|
+
}
|
|
863
975
|
throw options.abortSignal.reason ?? new Error("Request aborted");
|
|
864
976
|
}
|
|
865
977
|
onAbort = () => child.kill("SIGTERM");
|
|
@@ -954,6 +1066,9 @@ var CodexCliLanguageModel = class {
|
|
|
954
1066
|
} catch {
|
|
955
1067
|
}
|
|
956
1068
|
}
|
|
1069
|
+
if (tempImagePaths?.length) {
|
|
1070
|
+
cleanupTempImages(tempImagePaths);
|
|
1071
|
+
}
|
|
957
1072
|
}
|
|
958
1073
|
if (!text && lastMessagePath) {
|
|
959
1074
|
try {
|
|
@@ -985,14 +1100,14 @@ var CodexCliLanguageModel = class {
|
|
|
985
1100
|
}
|
|
986
1101
|
async doStream(options) {
|
|
987
1102
|
this.logger.debug(`[codex-cli] Starting doStream request with model: ${this.modelId}`);
|
|
988
|
-
const { promptText, warnings: mappingWarnings } = mapMessagesToPrompt(options.prompt);
|
|
1103
|
+
const { promptText, images, warnings: mappingWarnings } = mapMessagesToPrompt(options.prompt);
|
|
989
1104
|
const promptExcerpt = promptText.slice(0, 200);
|
|
990
1105
|
const warnings = [
|
|
991
1106
|
...this.mapWarnings(options),
|
|
992
1107
|
...mappingWarnings?.map((m) => ({ type: "other", message: m })) || []
|
|
993
1108
|
];
|
|
994
1109
|
this.logger.debug(
|
|
995
|
-
`[codex-cli] Converted ${options.prompt.length} messages for streaming, response format: ${options.responseFormat?.type ?? "none"}`
|
|
1110
|
+
`[codex-cli] Converted ${options.prompt.length} messages (${images.length} images) for streaming, response format: ${options.responseFormat?.type ?? "none"}`
|
|
996
1111
|
);
|
|
997
1112
|
const providerOptions = await parseProviderOptions({
|
|
998
1113
|
provider: this.provider,
|
|
@@ -1001,11 +1116,7 @@ var CodexCliLanguageModel = class {
|
|
|
1001
1116
|
});
|
|
1002
1117
|
const effectiveSettings = this.mergeSettings(providerOptions);
|
|
1003
1118
|
const responseFormat = options.responseFormat?.type === "json" ? { type: "json", schema: options.responseFormat.schema } : void 0;
|
|
1004
|
-
const { cmd, args, env, cwd, lastMessagePath, lastMessageIsTemp, schemaPath } = this.buildArgs(
|
|
1005
|
-
promptText,
|
|
1006
|
-
responseFormat,
|
|
1007
|
-
effectiveSettings
|
|
1008
|
-
);
|
|
1119
|
+
const { cmd, args, env, cwd, lastMessagePath, lastMessageIsTemp, schemaPath, tempImagePaths } = this.buildArgs(promptText, images, responseFormat, effectiveSettings);
|
|
1009
1120
|
this.logger.debug(
|
|
1010
1121
|
`[codex-cli] Executing Codex CLI for streaming: ${cmd} with ${args.length} arguments`
|
|
1011
1122
|
);
|
|
@@ -1020,6 +1131,18 @@ var CodexCliLanguageModel = class {
|
|
|
1020
1131
|
let responseMetadataSent = false;
|
|
1021
1132
|
let lastUsage;
|
|
1022
1133
|
let turnFailureMessage;
|
|
1134
|
+
const cleanupTempFiles = () => {
|
|
1135
|
+
if (schemaPath) {
|
|
1136
|
+
try {
|
|
1137
|
+
const schemaDir = dirname(schemaPath);
|
|
1138
|
+
rmSync(schemaDir, { recursive: true, force: true });
|
|
1139
|
+
} catch {
|
|
1140
|
+
}
|
|
1141
|
+
}
|
|
1142
|
+
if (tempImagePaths?.length) {
|
|
1143
|
+
cleanupTempImages(tempImagePaths);
|
|
1144
|
+
}
|
|
1145
|
+
};
|
|
1023
1146
|
const sendMetadata = (meta = {}) => {
|
|
1024
1147
|
controller.enqueue({
|
|
1025
1148
|
type: "response-metadata",
|
|
@@ -1093,6 +1216,7 @@ var CodexCliLanguageModel = class {
|
|
|
1093
1216
|
if (options.abortSignal) {
|
|
1094
1217
|
if (options.abortSignal.aborted) {
|
|
1095
1218
|
child.kill("SIGTERM");
|
|
1219
|
+
cleanupTempFiles();
|
|
1096
1220
|
controller.error(options.abortSignal.reason ?? new Error("Request aborted"));
|
|
1097
1221
|
return;
|
|
1098
1222
|
}
|
|
@@ -1212,23 +1336,15 @@ var CodexCliLanguageModel = class {
|
|
|
1212
1336
|
}
|
|
1213
1337
|
}
|
|
1214
1338
|
});
|
|
1215
|
-
const cleanupSchema = () => {
|
|
1216
|
-
if (!schemaPath) return;
|
|
1217
|
-
try {
|
|
1218
|
-
const schemaDir = dirname(schemaPath);
|
|
1219
|
-
rmSync(schemaDir, { recursive: true, force: true });
|
|
1220
|
-
} catch {
|
|
1221
|
-
}
|
|
1222
|
-
};
|
|
1223
1339
|
child.on("error", (e) => {
|
|
1224
1340
|
this.logger.error(`[codex-cli] Stream spawn error: ${String(e)}`);
|
|
1225
1341
|
if (options.abortSignal) options.abortSignal.removeEventListener("abort", onAbort);
|
|
1226
|
-
|
|
1342
|
+
cleanupTempFiles();
|
|
1227
1343
|
controller.error(this.handleSpawnError(e, promptExcerpt));
|
|
1228
1344
|
});
|
|
1229
1345
|
child.on("close", (code) => {
|
|
1230
1346
|
if (options.abortSignal) options.abortSignal.removeEventListener("abort", onAbort);
|
|
1231
|
-
|
|
1347
|
+
cleanupTempFiles();
|
|
1232
1348
|
setImmediate(() => finishStream(code));
|
|
1233
1349
|
});
|
|
1234
1350
|
},
|