@f5xc-salesdemos/xcsh 15.2.0 → 15.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +7 -7
- package/src/config/settings-schema.ts +36 -2
- package/src/ipy/kernel.ts +10 -10
- package/src/ipy/prelude.py +7 -7
- package/src/modes/components/settings-defs.ts +12 -1
- package/src/prompts/system/system-prompt.md +6 -0
- package/src/sdk.ts +41 -8
- package/src/tools/gemini-image.ts +111 -4
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@f5xc-salesdemos/xcsh",
|
|
4
|
-
"version": "15.2
|
|
4
|
+
"version": "15.3.2",
|
|
5
5
|
"description": "Coding agent CLI with read, bash, edit, write tools and session management",
|
|
6
6
|
"homepage": "https://github.com/f5xc-salesdemos/xcsh",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -46,12 +46,12 @@
|
|
|
46
46
|
"dependencies": {
|
|
47
47
|
"@agentclientprotocol/sdk": "0.16.1",
|
|
48
48
|
"@mozilla/readability": "^0.6",
|
|
49
|
-
"@f5xc-salesdemos/xcsh-stats": "15.2
|
|
50
|
-
"@f5xc-salesdemos/pi-agent-core": "15.2
|
|
51
|
-
"@f5xc-salesdemos/pi-ai": "15.2
|
|
52
|
-
"@f5xc-salesdemos/pi-natives": "15.2
|
|
53
|
-
"@f5xc-salesdemos/pi-tui": "15.2
|
|
54
|
-
"@f5xc-salesdemos/pi-utils": "15.2
|
|
49
|
+
"@f5xc-salesdemos/xcsh-stats": "15.3.2",
|
|
50
|
+
"@f5xc-salesdemos/pi-agent-core": "15.3.2",
|
|
51
|
+
"@f5xc-salesdemos/pi-ai": "15.3.2",
|
|
52
|
+
"@f5xc-salesdemos/pi-natives": "15.3.2",
|
|
53
|
+
"@f5xc-salesdemos/pi-tui": "15.3.2",
|
|
54
|
+
"@f5xc-salesdemos/pi-utils": "15.3.2",
|
|
55
55
|
"@sinclair/typebox": "^0.34",
|
|
56
56
|
"@xterm/headless": "^6.0",
|
|
57
57
|
"ajv": "^8.18",
|
|
@@ -1287,6 +1287,16 @@ export const SETTINGS_SCHEMA = {
|
|
|
1287
1287
|
},
|
|
1288
1288
|
},
|
|
1289
1289
|
|
|
1290
|
+
"generate_image.enabled": {
|
|
1291
|
+
type: "boolean",
|
|
1292
|
+
default: true,
|
|
1293
|
+
ui: {
|
|
1294
|
+
tab: "tools",
|
|
1295
|
+
label: "Generate Image",
|
|
1296
|
+
description: "Enable the generate_image tool for AI-powered image and diagram generation",
|
|
1297
|
+
},
|
|
1298
|
+
},
|
|
1299
|
+
|
|
1290
1300
|
"checkpoint.enabled": {
|
|
1291
1301
|
type: "boolean",
|
|
1292
1302
|
default: false,
|
|
@@ -1615,12 +1625,36 @@ export const SETTINGS_SCHEMA = {
|
|
|
1615
1625
|
},
|
|
1616
1626
|
"providers.image": {
|
|
1617
1627
|
type: "enum",
|
|
1618
|
-
values: ["auto", "gemini", "openrouter"] as const,
|
|
1628
|
+
values: ["auto", "gemini", "openrouter", "openai"] as const,
|
|
1619
1629
|
default: "auto",
|
|
1620
1630
|
ui: {
|
|
1621
1631
|
tab: "providers",
|
|
1622
1632
|
label: "Image Provider",
|
|
1623
|
-
description: "Provider for image generation tool",
|
|
1633
|
+
description: "Provider for image generation tool (auto detects from available API keys)",
|
|
1634
|
+
submenu: true,
|
|
1635
|
+
},
|
|
1636
|
+
},
|
|
1637
|
+
|
|
1638
|
+
"providers.imageSize": {
|
|
1639
|
+
type: "enum",
|
|
1640
|
+
values: ["1024x1024", "1536x1024", "1024x1536"] as const,
|
|
1641
|
+
default: "1536x1024",
|
|
1642
|
+
ui: {
|
|
1643
|
+
tab: "providers",
|
|
1644
|
+
label: "Image Size",
|
|
1645
|
+
description: "Default image dimensions for generation (landscape, square, or portrait)",
|
|
1646
|
+
submenu: true,
|
|
1647
|
+
},
|
|
1648
|
+
},
|
|
1649
|
+
|
|
1650
|
+
"providers.imageQuality": {
|
|
1651
|
+
type: "enum",
|
|
1652
|
+
values: ["low", "medium", "high"] as const,
|
|
1653
|
+
default: "high",
|
|
1654
|
+
ui: {
|
|
1655
|
+
tab: "providers",
|
|
1656
|
+
label: "Image Quality",
|
|
1657
|
+
description: "Rendering quality for generated images (higher = slower but more detailed)",
|
|
1624
1658
|
submenu: true,
|
|
1625
1659
|
},
|
|
1626
1660
|
},
|
package/src/ipy/kernel.ts
CHANGED
|
@@ -11,7 +11,7 @@ import { filterEnv, resolvePythonRuntime } from "./runtime";
|
|
|
11
11
|
const TEXT_ENCODER = new TextEncoder();
|
|
12
12
|
const TEXT_DECODER = new TextDecoder();
|
|
13
13
|
const TRACE_IPC = $flag("PI_PYTHON_IPC_TRACE");
|
|
14
|
-
const PRELUDE_INTROSPECTION_SNIPPET = "import json\nprint(json.dumps(
|
|
14
|
+
const PRELUDE_INTROSPECTION_SNIPPET = "import json\nprint(json.dumps(__xcsh_prelude_docs__()))";
|
|
15
15
|
|
|
16
16
|
class SharedGatewayCreateError extends Error {
|
|
17
17
|
constructor(
|
|
@@ -285,8 +285,8 @@ export function renderKernelDisplay(content: Record<string, unknown>): {
|
|
|
285
285
|
const outputs: KernelDisplayOutput[] = [];
|
|
286
286
|
|
|
287
287
|
// Handle status events (custom MIME type from prelude helpers)
|
|
288
|
-
if (data["application/x-
|
|
289
|
-
const statusData = data["application/x-
|
|
288
|
+
if (data["application/x-xcsh-status"] !== undefined) {
|
|
289
|
+
const statusData = data["application/x-xcsh-status"];
|
|
290
290
|
if (statusData && typeof statusData === "object" && "op" in statusData) {
|
|
291
291
|
outputs.push({ type: "status", event: statusData as PythonStatusEvent });
|
|
292
292
|
}
|
|
@@ -512,7 +512,7 @@ export class PythonKernel {
|
|
|
512
512
|
kernelId,
|
|
513
513
|
config.url,
|
|
514
514
|
Snowflake.next(),
|
|
515
|
-
"
|
|
515
|
+
"xcsh",
|
|
516
516
|
false,
|
|
517
517
|
config.token,
|
|
518
518
|
);
|
|
@@ -575,7 +575,7 @@ export class PythonKernel {
|
|
|
575
575
|
)) as { id: string };
|
|
576
576
|
const kernelId = kernelInfo.id;
|
|
577
577
|
|
|
578
|
-
const kernel = new PythonKernel(Snowflake.next(), kernelId, gatewayUrl, Snowflake.next(), "
|
|
578
|
+
const kernel = new PythonKernel(Snowflake.next(), kernelId, gatewayUrl, Snowflake.next(), "xcsh", true);
|
|
579
579
|
|
|
580
580
|
try {
|
|
581
581
|
await logger.time("startWithSharedGateway:connectWS", kernel.#connectWebSocket.bind(kernel), startup);
|
|
@@ -723,11 +723,11 @@ export class PythonKernel {
|
|
|
723
723
|
const envPayload = Object.fromEntries(envEntries);
|
|
724
724
|
const initScript = [
|
|
725
725
|
"import os, sys",
|
|
726
|
-
`
|
|
727
|
-
"os.chdir(
|
|
728
|
-
`
|
|
729
|
-
"for
|
|
730
|
-
"if
|
|
726
|
+
`__xcsh_cwd = ${JSON.stringify(cwd)}`,
|
|
727
|
+
"os.chdir(__xcsh_cwd)",
|
|
728
|
+
`__xcsh_env = ${JSON.stringify(envPayload)}`,
|
|
729
|
+
"for __xcsh_key, __xcsh_val in __xcsh_env.items():\n os.environ[__xcsh_key] = __xcsh_val",
|
|
730
|
+
"if __xcsh_cwd not in sys.path:\n sys.path.insert(0, __xcsh_cwd)",
|
|
731
731
|
].join("\n");
|
|
732
732
|
const executeOptions = getStartupExecuteOptions(options);
|
|
733
733
|
const result = await this.execute(initScript, {
|
package/src/ipy/prelude.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
# OMP IPython prelude helpers
|
|
3
|
-
if "
|
|
4
|
-
|
|
3
|
+
if "__xcsh_prelude_loaded__" not in globals():
|
|
4
|
+
__xcsh_prelude_loaded__ = True
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
import os, re, json, shutil, subprocess, inspect
|
|
7
7
|
from datetime import datetime
|
|
@@ -14,7 +14,7 @@ if "__omp_prelude_loaded__" not in globals():
|
|
|
14
14
|
def _category(cat: str):
|
|
15
15
|
"""Decorator to tag a prelude function with its category."""
|
|
16
16
|
def decorator(fn):
|
|
17
|
-
fn.
|
|
17
|
+
fn._xcsh_category = cat
|
|
18
18
|
return fn
|
|
19
19
|
return decorator
|
|
20
20
|
|
|
@@ -831,11 +831,11 @@ if "__omp_prelude_loaded__" not in globals():
|
|
|
831
831
|
|
|
832
832
|
return current
|
|
833
833
|
|
|
834
|
-
def
|
|
835
|
-
"""Return prelude helper docs for templating. Discovers functions by
|
|
834
|
+
def __xcsh_prelude_docs__() -> list[dict[str, str]]:
|
|
835
|
+
"""Return prelude helper docs for templating. Discovers functions by _xcsh_category attribute."""
|
|
836
836
|
helpers: list[dict[str, str]] = []
|
|
837
837
|
for name, obj in globals().items():
|
|
838
|
-
if not callable(obj) or not hasattr(obj, "
|
|
838
|
+
if not callable(obj) or not hasattr(obj, "_xcsh_category"):
|
|
839
839
|
continue
|
|
840
840
|
signature = str(inspect.signature(obj))
|
|
841
841
|
doc = inspect.getdoc(obj) or ""
|
|
@@ -844,6 +844,6 @@ if "__omp_prelude_loaded__" not in globals():
|
|
|
844
844
|
"name": name,
|
|
845
845
|
"signature": signature,
|
|
846
846
|
"docstring": docline,
|
|
847
|
-
"category": obj.
|
|
847
|
+
"category": obj._xcsh_category,
|
|
848
848
|
})
|
|
849
849
|
return sorted(helpers, key=lambda h: (h["category"], h["name"]))
|
|
@@ -337,10 +337,21 @@ const OPTION_PROVIDERS: Partial<Record<SettingPath, OptionProvider>> = {
|
|
|
337
337
|
{ value: "parallel", label: "Parallel", description: "Requires PARALLEL_API_KEY" },
|
|
338
338
|
],
|
|
339
339
|
"providers.image": [
|
|
340
|
-
{ value: "auto", label: "Auto", description: "
|
|
340
|
+
{ value: "auto", label: "Auto", description: "Auto-detect from available API keys" },
|
|
341
|
+
{ value: "openai", label: "OpenAI", description: "gpt-image-1 via LITELLM_API_KEY or OPENAI_API_KEY" },
|
|
341
342
|
{ value: "gemini", label: "Gemini", description: "Requires GEMINI_API_KEY" },
|
|
342
343
|
{ value: "openrouter", label: "OpenRouter", description: "Requires OPENROUTER_API_KEY" },
|
|
343
344
|
],
|
|
345
|
+
"providers.imageSize": [
|
|
346
|
+
{ value: "1024x1024", label: "1024x1024", description: "Square" },
|
|
347
|
+
{ value: "1536x1024", label: "1536x1024", description: "Landscape (default)" },
|
|
348
|
+
{ value: "1024x1536", label: "1024x1536", description: "Portrait" },
|
|
349
|
+
],
|
|
350
|
+
"providers.imageQuality": [
|
|
351
|
+
{ value: "low", label: "Low", description: "Fastest generation, lower detail" },
|
|
352
|
+
{ value: "medium", label: "Medium", description: "Balanced speed and quality" },
|
|
353
|
+
{ value: "high", label: "High", description: "Best quality, slower generation (default)" },
|
|
354
|
+
],
|
|
344
355
|
"providers.kimiApiFormat": [
|
|
345
356
|
{ value: "openai", label: "OpenAI", description: "api.kimi.com" },
|
|
346
357
|
{ value: "anthropic", label: "Anthropic", description: "api.moonshot.ai" },
|
|
@@ -281,7 +281,13 @@ Don't open a file hoping. Hope is not a strategy.
|
|
|
281
281
|
### Image inspection
|
|
282
282
|
- For image understanding tasks: **MUST** use `inspect_image` over `read` to avoid overloading main session context.
|
|
283
283
|
- Write a specific `question` for `inspect_image`: what to inspect, constraints (for example verbatim OCR), and desired output format.
|
|
284
|
+
- If you encounter `[Image content detected but current model does not support vision]` in a message, use `inspect_image` with the image file path to analyze it. Do not ask the user to describe the image — analyze it yourself via the tool.
|
|
284
285
|
{{/if}}
|
|
286
|
+
{{#ifAll (includes tools "inspect_image") (includes tools "generate_image")}}
|
|
287
|
+
### Image generation and analysis
|
|
288
|
+
- After using `generate_image`, the result includes saved file paths (e.g. `/tmp/xcsh-image-*.png`). To analyze or describe the generated image, chain `inspect_image` using that file path.
|
|
289
|
+
- Example workflow: user asks "create a diagram and check if it follows brand guidelines" → call `generate_image`, then call `inspect_image` on the resulting file path with the brand compliance question.
|
|
290
|
+
{{/ifAll}}
|
|
285
291
|
|
|
286
292
|
{{SECTION_SEPERATOR "Rules"}}
|
|
287
293
|
|
package/src/sdk.ts
CHANGED
|
@@ -672,7 +672,12 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
|
|
|
672
672
|
}
|
|
673
673
|
|
|
674
674
|
const imageProvider = settings.get("providers.image");
|
|
675
|
-
if (
|
|
675
|
+
if (
|
|
676
|
+
imageProvider === "auto" ||
|
|
677
|
+
imageProvider === "gemini" ||
|
|
678
|
+
imageProvider === "openrouter" ||
|
|
679
|
+
imageProvider === "openai"
|
|
680
|
+
) {
|
|
676
681
|
setPreferredImageProvider(imageProvider);
|
|
677
682
|
}
|
|
678
683
|
|
|
@@ -1034,10 +1039,12 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
|
|
|
1034
1039
|
}
|
|
1035
1040
|
}
|
|
1036
1041
|
|
|
1037
|
-
// Add
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1042
|
+
// Add image generation tools if an image API key is available and the tool is enabled
|
|
1043
|
+
if (settings.get("generate_image.enabled")) {
|
|
1044
|
+
const geminiImageTools = await logger.time("getGeminiImageTools", getGeminiImageTools);
|
|
1045
|
+
if (geminiImageTools.length > 0) {
|
|
1046
|
+
customTools.push(...(geminiImageTools as unknown as CustomTool[]));
|
|
1047
|
+
}
|
|
1041
1048
|
}
|
|
1042
1049
|
|
|
1043
1050
|
// Add web search tools
|
|
@@ -1435,11 +1442,37 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
|
|
|
1435
1442
|
});
|
|
1436
1443
|
};
|
|
1437
1444
|
|
|
1438
|
-
//
|
|
1445
|
+
// Replace unsupported image content with actionable warnings when model lacks vision
|
|
1446
|
+
const convertToLlmWithImageRouting = (messages: Message[]): Message[] => {
|
|
1447
|
+
const currentModel = agent?.state?.model;
|
|
1448
|
+
if (!currentModel || currentModel.input.includes("image")) return messages;
|
|
1449
|
+
|
|
1450
|
+
return messages.map(msg => {
|
|
1451
|
+
if (msg.role !== "user" && msg.role !== "toolResult") return msg;
|
|
1452
|
+
const content = msg.content;
|
|
1453
|
+
if (!Array.isArray(content)) return msg;
|
|
1454
|
+
|
|
1455
|
+
const hasImages = content.some(c => c.type === "image");
|
|
1456
|
+
if (!hasImages) return msg;
|
|
1457
|
+
|
|
1458
|
+
const filtered = content.map(c =>
|
|
1459
|
+
c.type === "image"
|
|
1460
|
+
? {
|
|
1461
|
+
type: "text" as const,
|
|
1462
|
+
text: "[Image content detected but current model does not support vision. Use the inspect_image tool to analyze this image, or ask the user to switch to a vision-capable model.]",
|
|
1463
|
+
}
|
|
1464
|
+
: c,
|
|
1465
|
+
);
|
|
1466
|
+
return { ...msg, content: filtered };
|
|
1467
|
+
});
|
|
1468
|
+
};
|
|
1469
|
+
|
|
1470
|
+
// Final convertToLlm: chain block-images filter → image routing warnings → secret obfuscation
|
|
1439
1471
|
const convertToLlmFinal = (messages: AgentMessage[]): Message[] => {
|
|
1440
1472
|
const converted = convertToLlmWithBlockImages(messages);
|
|
1441
|
-
|
|
1442
|
-
|
|
1473
|
+
const routed = convertToLlmWithImageRouting(converted);
|
|
1474
|
+
if (!obfuscator?.hasSecrets()) return routed;
|
|
1475
|
+
return obfuscateMessages(obfuscator, routed);
|
|
1443
1476
|
};
|
|
1444
1477
|
const transformContext = extensionRunner
|
|
1445
1478
|
? async (messages: AgentMessage[], _signal?: AbortSignal) => {
|
|
@@ -20,6 +20,9 @@ import { resolveReadPath } from "./path-utils";
|
|
|
20
20
|
const DEFAULT_MODEL = "gemini-3-pro-image-preview";
|
|
21
21
|
const DEFAULT_OPENROUTER_MODEL = "google/gemini-3-pro-image-preview";
|
|
22
22
|
const DEFAULT_ANTIGRAVITY_MODEL = "gemini-3-pro-image";
|
|
23
|
+
const DEFAULT_OPENAI_IMAGE_MODEL = "gpt-image-1";
|
|
24
|
+
const DEFAULT_OPENAI_IMAGE_SIZE = "1536x1024";
|
|
25
|
+
const DEFAULT_OPENAI_IMAGE_QUALITY = "high";
|
|
23
26
|
const IMAGE_TIMEOUT = 3 * 60 * 1000; // 3 minutes
|
|
24
27
|
const MAX_IMAGE_SIZE = 35 * 1024 * 1024;
|
|
25
28
|
|
|
@@ -27,7 +30,7 @@ const ANTIGRAVITY_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googleapis.com"
|
|
|
27
30
|
const IMAGE_SYSTEM_INSTRUCTION =
|
|
28
31
|
"You are an AI image generator. Generate images based on user descriptions. Focus on creating high-quality, visually appealing images that match the user's request.";
|
|
29
32
|
|
|
30
|
-
type ImageProvider = "antigravity" | "gemini" | "openrouter";
|
|
33
|
+
type ImageProvider = "antigravity" | "gemini" | "openrouter" | "openai";
|
|
31
34
|
interface ImageApiKey {
|
|
32
35
|
provider: ImageProvider;
|
|
33
36
|
apiKey: string;
|
|
@@ -207,6 +210,21 @@ interface OpenRouterResponse {
|
|
|
207
210
|
choices?: OpenRouterChoice[];
|
|
208
211
|
}
|
|
209
212
|
|
|
213
|
+
interface OpenAIImageResponseData {
|
|
214
|
+
b64_json: string;
|
|
215
|
+
revised_prompt?: string | null;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
interface OpenAIImageResponse {
|
|
219
|
+
created: number;
|
|
220
|
+
data: OpenAIImageResponseData[];
|
|
221
|
+
usage?: {
|
|
222
|
+
total_tokens: number;
|
|
223
|
+
input_tokens: number;
|
|
224
|
+
output_tokens: number;
|
|
225
|
+
};
|
|
226
|
+
}
|
|
227
|
+
|
|
210
228
|
interface AntigravityRequest {
|
|
211
229
|
project: string;
|
|
212
230
|
model: string;
|
|
@@ -396,9 +414,13 @@ async function findImageApiKey(modelRegistry?: ModelRegistry): Promise<ImageApiK
|
|
|
396
414
|
const openRouterKey = getEnvApiKey("openrouter");
|
|
397
415
|
if (openRouterKey) return { provider: "openrouter", apiKey: openRouterKey };
|
|
398
416
|
// Fall through to auto-detect if preferred provider key not found
|
|
417
|
+
} else if (preferredImageProvider === "openai") {
|
|
418
|
+
const openaiKey = getEnvApiKey("litellm") ?? getEnvApiKey("openai");
|
|
419
|
+
if (openaiKey) return { provider: "openai", apiKey: openaiKey };
|
|
420
|
+
// Fall through to auto-detect if preferred provider key not found
|
|
399
421
|
}
|
|
400
422
|
|
|
401
|
-
// Auto-detect: Antigravity takes priority, then OpenRouter, then Gemini
|
|
423
|
+
// Auto-detect: Antigravity takes priority, then OpenRouter, then OpenAI, then Gemini
|
|
402
424
|
if (modelRegistry) {
|
|
403
425
|
const antigravity = await findAntigravityCredentials(modelRegistry);
|
|
404
426
|
if (antigravity) return antigravity;
|
|
@@ -407,6 +429,9 @@ async function findImageApiKey(modelRegistry?: ModelRegistry): Promise<ImageApiK
|
|
|
407
429
|
const openRouterKey = getEnvApiKey("openrouter");
|
|
408
430
|
if (openRouterKey) return { provider: "openrouter", apiKey: openRouterKey };
|
|
409
431
|
|
|
432
|
+
const openaiKey = getEnvApiKey("litellm") ?? getEnvApiKey("openai");
|
|
433
|
+
if (openaiKey) return { provider: "openai", apiKey: openaiKey };
|
|
434
|
+
|
|
410
435
|
const geminiKey = getEnvApiKey("google");
|
|
411
436
|
if (geminiKey) return { provider: "gemini", apiKey: geminiKey };
|
|
412
437
|
|
|
@@ -614,7 +639,7 @@ export const geminiImageTool: CustomTool<typeof geminiImageSchema, GeminiImageTo
|
|
|
614
639
|
const apiKey = await findImageApiKey(ctx.modelRegistry);
|
|
615
640
|
if (!apiKey) {
|
|
616
641
|
throw new Error(
|
|
617
|
-
"No image API credentials found.
|
|
642
|
+
"No image API credentials found. Set LITELLM_API_KEY, OPENAI_API_KEY, OPENROUTER_API_KEY, GEMINI_API_KEY, or GOOGLE_API_KEY.",
|
|
618
643
|
);
|
|
619
644
|
}
|
|
620
645
|
|
|
@@ -624,7 +649,9 @@ export const geminiImageTool: CustomTool<typeof geminiImageSchema, GeminiImageTo
|
|
|
624
649
|
? DEFAULT_ANTIGRAVITY_MODEL
|
|
625
650
|
: provider === "openrouter"
|
|
626
651
|
? DEFAULT_OPENROUTER_MODEL
|
|
627
|
-
:
|
|
652
|
+
: provider === "openai"
|
|
653
|
+
? DEFAULT_OPENAI_IMAGE_MODEL
|
|
654
|
+
: DEFAULT_MODEL;
|
|
628
655
|
const resolvedModel = provider === "openrouter" ? resolveOpenRouterModel(model) : model;
|
|
629
656
|
const cwd = ctx.sessionManager.getCwd();
|
|
630
657
|
|
|
@@ -786,6 +813,86 @@ export const geminiImageTool: CustomTool<typeof geminiImageSchema, GeminiImageTo
|
|
|
786
813
|
};
|
|
787
814
|
}
|
|
788
815
|
|
|
816
|
+
if (provider === "openai") {
|
|
817
|
+
const openaiPrompt = assemblePrompt(params);
|
|
818
|
+
const size = params.image_size ?? ctx.settings?.get("providers.imageSize") ?? DEFAULT_OPENAI_IMAGE_SIZE;
|
|
819
|
+
const quality = ctx.settings?.get("providers.imageQuality") ?? DEFAULT_OPENAI_IMAGE_QUALITY;
|
|
820
|
+
const baseUrl = $env.LITELLM_BASE_URL ?? $env.OPENAI_BASE_URL ?? "https://api.openai.com";
|
|
821
|
+
|
|
822
|
+
const requestBody = {
|
|
823
|
+
model: DEFAULT_OPENAI_IMAGE_MODEL,
|
|
824
|
+
prompt: openaiPrompt,
|
|
825
|
+
n: 1,
|
|
826
|
+
size,
|
|
827
|
+
quality,
|
|
828
|
+
};
|
|
829
|
+
|
|
830
|
+
const response = await fetch(`${baseUrl}/openai/v1/images/generations`, {
|
|
831
|
+
method: "POST",
|
|
832
|
+
headers: {
|
|
833
|
+
Authorization: `Bearer ${apiKey.apiKey}`,
|
|
834
|
+
"Content-Type": "application/json",
|
|
835
|
+
},
|
|
836
|
+
body: JSON.stringify(requestBody),
|
|
837
|
+
signal: requestSignal,
|
|
838
|
+
});
|
|
839
|
+
|
|
840
|
+
const rawText = await response.text();
|
|
841
|
+
if (!response.ok) {
|
|
842
|
+
let message = rawText;
|
|
843
|
+
try {
|
|
844
|
+
const parsed = JSON.parse(rawText) as { error?: { message?: string } };
|
|
845
|
+
message = parsed.error?.message ?? message;
|
|
846
|
+
} catch {
|
|
847
|
+
// Keep raw text.
|
|
848
|
+
}
|
|
849
|
+
throw new Error(`OpenAI image request failed (${response.status}): ${message}`);
|
|
850
|
+
}
|
|
851
|
+
|
|
852
|
+
const data = JSON.parse(rawText) as OpenAIImageResponse;
|
|
853
|
+
const b64 = data.data?.[0]?.b64_json;
|
|
854
|
+
if (!b64) {
|
|
855
|
+
return {
|
|
856
|
+
content: [{ type: "text", text: "No image data returned from OpenAI." }],
|
|
857
|
+
details: {
|
|
858
|
+
provider,
|
|
859
|
+
model: DEFAULT_OPENAI_IMAGE_MODEL,
|
|
860
|
+
imageCount: 0,
|
|
861
|
+
imagePaths: [],
|
|
862
|
+
images: [],
|
|
863
|
+
},
|
|
864
|
+
};
|
|
865
|
+
}
|
|
866
|
+
|
|
867
|
+
const image: InlineImageData = { data: b64, mimeType: "image/png" };
|
|
868
|
+
const imagePaths = await saveImagesToTemp([image]);
|
|
869
|
+
const revisedPrompt = data.data[0]?.revised_prompt ?? undefined;
|
|
870
|
+
|
|
871
|
+
return {
|
|
872
|
+
content: [
|
|
873
|
+
{
|
|
874
|
+
type: "text",
|
|
875
|
+
text: buildResponseSummary(provider, DEFAULT_OPENAI_IMAGE_MODEL, imagePaths, revisedPrompt),
|
|
876
|
+
},
|
|
877
|
+
],
|
|
878
|
+
details: {
|
|
879
|
+
provider,
|
|
880
|
+
model: DEFAULT_OPENAI_IMAGE_MODEL,
|
|
881
|
+
imageCount: 1,
|
|
882
|
+
imagePaths,
|
|
883
|
+
images: [image],
|
|
884
|
+
responseText: revisedPrompt,
|
|
885
|
+
usage: data.usage
|
|
886
|
+
? {
|
|
887
|
+
promptTokenCount: data.usage.input_tokens,
|
|
888
|
+
candidatesTokenCount: data.usage.output_tokens,
|
|
889
|
+
totalTokenCount: data.usage.total_tokens,
|
|
890
|
+
}
|
|
891
|
+
: undefined,
|
|
892
|
+
},
|
|
893
|
+
};
|
|
894
|
+
}
|
|
895
|
+
|
|
789
896
|
const parts = [] as Array<{ text?: string; inlineData?: InlineImageData }>;
|
|
790
897
|
for (const image of resolvedImages) {
|
|
791
898
|
parts.push({ inlineData: image });
|