@f5xc-salesdemos/xcsh 15.2.0 → 15.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@f5xc-salesdemos/xcsh",
4
- "version": "15.2.0",
4
+ "version": "15.3.0",
5
5
  "description": "Coding agent CLI with read, bash, edit, write tools and session management",
6
6
  "homepage": "https://github.com/f5xc-salesdemos/xcsh",
7
7
  "author": "Can Boluk",
@@ -46,12 +46,12 @@
46
46
  "dependencies": {
47
47
  "@agentclientprotocol/sdk": "0.16.1",
48
48
  "@mozilla/readability": "^0.6",
49
- "@f5xc-salesdemos/xcsh-stats": "15.2.0",
50
- "@f5xc-salesdemos/pi-agent-core": "15.2.0",
51
- "@f5xc-salesdemos/pi-ai": "15.2.0",
52
- "@f5xc-salesdemos/pi-natives": "15.2.0",
53
- "@f5xc-salesdemos/pi-tui": "15.2.0",
54
- "@f5xc-salesdemos/pi-utils": "15.2.0",
49
+ "@f5xc-salesdemos/xcsh-stats": "15.3.0",
50
+ "@f5xc-salesdemos/pi-agent-core": "15.3.0",
51
+ "@f5xc-salesdemos/pi-ai": "15.3.0",
52
+ "@f5xc-salesdemos/pi-natives": "15.3.0",
53
+ "@f5xc-salesdemos/pi-tui": "15.3.0",
54
+ "@f5xc-salesdemos/pi-utils": "15.3.0",
55
55
  "@sinclair/typebox": "^0.34",
56
56
  "@xterm/headless": "^6.0",
57
57
  "ajv": "^8.18",
@@ -1287,6 +1287,16 @@ export const SETTINGS_SCHEMA = {
1287
1287
  },
1288
1288
  },
1289
1289
 
1290
+ "generate_image.enabled": {
1291
+ type: "boolean",
1292
+ default: true,
1293
+ ui: {
1294
+ tab: "tools",
1295
+ label: "Generate Image",
1296
+ description: "Enable the generate_image tool for AI-powered image and diagram generation",
1297
+ },
1298
+ },
1299
+
1290
1300
  "checkpoint.enabled": {
1291
1301
  type: "boolean",
1292
1302
  default: false,
@@ -1615,12 +1625,36 @@ export const SETTINGS_SCHEMA = {
1615
1625
  },
1616
1626
  "providers.image": {
1617
1627
  type: "enum",
1618
- values: ["auto", "gemini", "openrouter"] as const,
1628
+ values: ["auto", "gemini", "openrouter", "openai"] as const,
1619
1629
  default: "auto",
1620
1630
  ui: {
1621
1631
  tab: "providers",
1622
1632
  label: "Image Provider",
1623
- description: "Provider for image generation tool",
1633
+ description: "Provider for image generation tool (auto detects from available API keys)",
1634
+ submenu: true,
1635
+ },
1636
+ },
1637
+
1638
+ "providers.imageSize": {
1639
+ type: "enum",
1640
+ values: ["1024x1024", "1536x1024", "1024x1536"] as const,
1641
+ default: "1536x1024",
1642
+ ui: {
1643
+ tab: "providers",
1644
+ label: "Image Size",
1645
+ description: "Default image dimensions for generation (landscape, square, or portrait)",
1646
+ submenu: true,
1647
+ },
1648
+ },
1649
+
1650
+ "providers.imageQuality": {
1651
+ type: "enum",
1652
+ values: ["low", "medium", "high"] as const,
1653
+ default: "high",
1654
+ ui: {
1655
+ tab: "providers",
1656
+ label: "Image Quality",
1657
+ description: "Rendering quality for generated images (higher = slower but more detailed)",
1624
1658
  submenu: true,
1625
1659
  },
1626
1660
  },
@@ -337,10 +337,21 @@ const OPTION_PROVIDERS: Partial<Record<SettingPath, OptionProvider>> = {
337
337
  { value: "parallel", label: "Parallel", description: "Requires PARALLEL_API_KEY" },
338
338
  ],
339
339
  "providers.image": [
340
- { value: "auto", label: "Auto", description: "Priority: OpenRouter > Gemini" },
340
+ { value: "auto", label: "Auto", description: "Auto-detect from available API keys" },
341
+ { value: "openai", label: "OpenAI", description: "gpt-image-1 via LITELLM_API_KEY or OPENAI_API_KEY" },
341
342
  { value: "gemini", label: "Gemini", description: "Requires GEMINI_API_KEY" },
342
343
  { value: "openrouter", label: "OpenRouter", description: "Requires OPENROUTER_API_KEY" },
343
344
  ],
345
+ "providers.imageSize": [
346
+ { value: "1024x1024", label: "1024x1024", description: "Square" },
347
+ { value: "1536x1024", label: "1536x1024", description: "Landscape (default)" },
348
+ { value: "1024x1536", label: "1024x1536", description: "Portrait" },
349
+ ],
350
+ "providers.imageQuality": [
351
+ { value: "low", label: "Low", description: "Fastest generation, lower detail" },
352
+ { value: "medium", label: "Medium", description: "Balanced speed and quality" },
353
+ { value: "high", label: "High", description: "Best quality, slower generation (default)" },
354
+ ],
344
355
  "providers.kimiApiFormat": [
345
356
  { value: "openai", label: "OpenAI", description: "api.kimi.com" },
346
357
  { value: "anthropic", label: "Anthropic", description: "api.moonshot.ai" },
@@ -281,7 +281,13 @@ Don't open a file hoping. Hope is not a strategy.
281
281
  ### Image inspection
282
282
  - For image understanding tasks: **MUST** use `inspect_image` over `read` to avoid overloading main session context.
283
283
  - Write a specific `question` for `inspect_image`: what to inspect, constraints (for example verbatim OCR), and desired output format.
284
+ - If you encounter `[Image content detected but current model does not support vision]` in a message, use `inspect_image` with the image file path to analyze it. Do not ask the user to describe the image — analyze it yourself via the tool.
284
285
  {{/if}}
286
+ {{#ifAll (includes tools "inspect_image") (includes tools "generate_image")}}
287
+ ### Image generation and analysis
288
+ - After using `generate_image`, the result includes saved file paths (e.g. `/tmp/xcsh-image-*.png`). To analyze or describe the generated image, chain `inspect_image` using that file path.
289
+ - Example workflow: user asks "create a diagram and check if it follows brand guidelines" → call `generate_image`, then call `inspect_image` on the resulting file path with the brand compliance question.
290
+ {{/ifAll}}
285
291
 
286
292
  {{SECTION_SEPERATOR "Rules"}}
287
293
 
package/src/sdk.ts CHANGED
@@ -672,7 +672,12 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
672
672
  }
673
673
 
674
674
  const imageProvider = settings.get("providers.image");
675
- if (imageProvider === "auto" || imageProvider === "gemini" || imageProvider === "openrouter") {
675
+ if (
676
+ imageProvider === "auto" ||
677
+ imageProvider === "gemini" ||
678
+ imageProvider === "openrouter" ||
679
+ imageProvider === "openai"
680
+ ) {
676
681
  setPreferredImageProvider(imageProvider);
677
682
  }
678
683
 
@@ -1034,10 +1039,12 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1034
1039
  }
1035
1040
  }
1036
1041
 
1037
- // Add Gemini image tools if GEMINI_API_KEY (or GOOGLE_API_KEY) is available
1038
- const geminiImageTools = await logger.time("getGeminiImageTools", getGeminiImageTools);
1039
- if (geminiImageTools.length > 0) {
1040
- customTools.push(...(geminiImageTools as unknown as CustomTool[]));
1042
+ // Add image generation tools if an image API key is available and the tool is enabled
1043
+ if (settings.get("generate_image.enabled")) {
1044
+ const geminiImageTools = await logger.time("getGeminiImageTools", getGeminiImageTools);
1045
+ if (geminiImageTools.length > 0) {
1046
+ customTools.push(...(geminiImageTools as unknown as CustomTool[]));
1047
+ }
1041
1048
  }
1042
1049
 
1043
1050
  // Add web search tools
@@ -1435,11 +1442,37 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1435
1442
  });
1436
1443
  };
1437
1444
 
1438
- // Final convertToLlm: chain block-images filter with secret obfuscation
1445
+ // Replace unsupported image content with actionable warnings when model lacks vision
1446
+ const convertToLlmWithImageRouting = (messages: Message[]): Message[] => {
1447
+ const currentModel = agent?.state?.model;
1448
+ if (!currentModel || currentModel.input.includes("image")) return messages;
1449
+
1450
+ return messages.map(msg => {
1451
+ if (msg.role !== "user" && msg.role !== "toolResult") return msg;
1452
+ const content = msg.content;
1453
+ if (!Array.isArray(content)) return msg;
1454
+
1455
+ const hasImages = content.some(c => c.type === "image");
1456
+ if (!hasImages) return msg;
1457
+
1458
+ const filtered = content.map(c =>
1459
+ c.type === "image"
1460
+ ? {
1461
+ type: "text" as const,
1462
+ text: "[Image content detected but current model does not support vision. Use the inspect_image tool to analyze this image, or ask the user to switch to a vision-capable model.]",
1463
+ }
1464
+ : c,
1465
+ );
1466
+ return { ...msg, content: filtered };
1467
+ });
1468
+ };
1469
+
1470
+ // Final convertToLlm: chain block-images filter → image routing warnings → secret obfuscation
1439
1471
  const convertToLlmFinal = (messages: AgentMessage[]): Message[] => {
1440
1472
  const converted = convertToLlmWithBlockImages(messages);
1441
- if (!obfuscator?.hasSecrets()) return converted;
1442
- return obfuscateMessages(obfuscator, converted);
1473
+ const routed = convertToLlmWithImageRouting(converted);
1474
+ if (!obfuscator?.hasSecrets()) return routed;
1475
+ return obfuscateMessages(obfuscator, routed);
1443
1476
  };
1444
1477
  const transformContext = extensionRunner
1445
1478
  ? async (messages: AgentMessage[], _signal?: AbortSignal) => {
@@ -20,6 +20,9 @@ import { resolveReadPath } from "./path-utils";
20
20
  const DEFAULT_MODEL = "gemini-3-pro-image-preview";
21
21
  const DEFAULT_OPENROUTER_MODEL = "google/gemini-3-pro-image-preview";
22
22
  const DEFAULT_ANTIGRAVITY_MODEL = "gemini-3-pro-image";
23
+ const DEFAULT_OPENAI_IMAGE_MODEL = "gpt-image-1";
24
+ const DEFAULT_OPENAI_IMAGE_SIZE = "1536x1024";
25
+ const DEFAULT_OPENAI_IMAGE_QUALITY = "high";
23
26
  const IMAGE_TIMEOUT = 3 * 60 * 1000; // 3 minutes
24
27
  const MAX_IMAGE_SIZE = 35 * 1024 * 1024;
25
28
 
@@ -27,7 +30,7 @@ const ANTIGRAVITY_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googleapis.com"
27
30
  const IMAGE_SYSTEM_INSTRUCTION =
28
31
  "You are an AI image generator. Generate images based on user descriptions. Focus on creating high-quality, visually appealing images that match the user's request.";
29
32
 
30
- type ImageProvider = "antigravity" | "gemini" | "openrouter";
33
+ type ImageProvider = "antigravity" | "gemini" | "openrouter" | "openai";
31
34
  interface ImageApiKey {
32
35
  provider: ImageProvider;
33
36
  apiKey: string;
@@ -207,6 +210,21 @@ interface OpenRouterResponse {
207
210
  choices?: OpenRouterChoice[];
208
211
  }
209
212
 
213
+ interface OpenAIImageResponseData {
214
+ b64_json: string;
215
+ revised_prompt?: string | null;
216
+ }
217
+
218
+ interface OpenAIImageResponse {
219
+ created: number;
220
+ data: OpenAIImageResponseData[];
221
+ usage?: {
222
+ total_tokens: number;
223
+ input_tokens: number;
224
+ output_tokens: number;
225
+ };
226
+ }
227
+
210
228
  interface AntigravityRequest {
211
229
  project: string;
212
230
  model: string;
@@ -396,9 +414,13 @@ async function findImageApiKey(modelRegistry?: ModelRegistry): Promise<ImageApiK
396
414
  const openRouterKey = getEnvApiKey("openrouter");
397
415
  if (openRouterKey) return { provider: "openrouter", apiKey: openRouterKey };
398
416
  // Fall through to auto-detect if preferred provider key not found
417
+ } else if (preferredImageProvider === "openai") {
418
+ const openaiKey = getEnvApiKey("litellm") ?? getEnvApiKey("openai");
419
+ if (openaiKey) return { provider: "openai", apiKey: openaiKey };
420
+ // Fall through to auto-detect if preferred provider key not found
399
421
  }
400
422
 
401
- // Auto-detect: Antigravity takes priority, then OpenRouter, then Gemini
423
+ // Auto-detect: Antigravity takes priority, then OpenRouter, then OpenAI, then Gemini
402
424
  if (modelRegistry) {
403
425
  const antigravity = await findAntigravityCredentials(modelRegistry);
404
426
  if (antigravity) return antigravity;
@@ -407,6 +429,9 @@ async function findImageApiKey(modelRegistry?: ModelRegistry): Promise<ImageApiK
407
429
  const openRouterKey = getEnvApiKey("openrouter");
408
430
  if (openRouterKey) return { provider: "openrouter", apiKey: openRouterKey };
409
431
 
432
+ const openaiKey = getEnvApiKey("litellm") ?? getEnvApiKey("openai");
433
+ if (openaiKey) return { provider: "openai", apiKey: openaiKey };
434
+
410
435
  const geminiKey = getEnvApiKey("google");
411
436
  if (geminiKey) return { provider: "gemini", apiKey: geminiKey };
412
437
 
@@ -614,7 +639,7 @@ export const geminiImageTool: CustomTool<typeof geminiImageSchema, GeminiImageTo
614
639
  const apiKey = await findImageApiKey(ctx.modelRegistry);
615
640
  if (!apiKey) {
616
641
  throw new Error(
617
- "No image API credentials found. Login with google-antigravity, or set OPENROUTER_API_KEY, GEMINI_API_KEY, or GOOGLE_API_KEY.",
642
+ "No image API credentials found. Set LITELLM_API_KEY, OPENAI_API_KEY, OPENROUTER_API_KEY, GEMINI_API_KEY, or GOOGLE_API_KEY.",
618
643
  );
619
644
  }
620
645
 
@@ -624,7 +649,9 @@ export const geminiImageTool: CustomTool<typeof geminiImageSchema, GeminiImageTo
624
649
  ? DEFAULT_ANTIGRAVITY_MODEL
625
650
  : provider === "openrouter"
626
651
  ? DEFAULT_OPENROUTER_MODEL
627
- : DEFAULT_MODEL;
652
+ : provider === "openai"
653
+ ? DEFAULT_OPENAI_IMAGE_MODEL
654
+ : DEFAULT_MODEL;
628
655
  const resolvedModel = provider === "openrouter" ? resolveOpenRouterModel(model) : model;
629
656
  const cwd = ctx.sessionManager.getCwd();
630
657
 
@@ -786,6 +813,86 @@ export const geminiImageTool: CustomTool<typeof geminiImageSchema, GeminiImageTo
786
813
  };
787
814
  }
788
815
 
816
+ if (provider === "openai") {
817
+ const openaiPrompt = assemblePrompt(params);
818
+ const size = params.image_size ?? ctx.settings?.get("providers.imageSize") ?? DEFAULT_OPENAI_IMAGE_SIZE;
819
+ const quality = ctx.settings?.get("providers.imageQuality") ?? DEFAULT_OPENAI_IMAGE_QUALITY;
820
+ const baseUrl = $env.LITELLM_BASE_URL ?? $env.OPENAI_BASE_URL ?? "https://api.openai.com";
821
+
822
+ const requestBody = {
823
+ model: DEFAULT_OPENAI_IMAGE_MODEL,
824
+ prompt: openaiPrompt,
825
+ n: 1,
826
+ size,
827
+ quality,
828
+ };
829
+
830
+ const response = await fetch(`${baseUrl}/openai/v1/images/generations`, {
831
+ method: "POST",
832
+ headers: {
833
+ Authorization: `Bearer ${apiKey.apiKey}`,
834
+ "Content-Type": "application/json",
835
+ },
836
+ body: JSON.stringify(requestBody),
837
+ signal: requestSignal,
838
+ });
839
+
840
+ const rawText = await response.text();
841
+ if (!response.ok) {
842
+ let message = rawText;
843
+ try {
844
+ const parsed = JSON.parse(rawText) as { error?: { message?: string } };
845
+ message = parsed.error?.message ?? message;
846
+ } catch {
847
+ // Keep raw text.
848
+ }
849
+ throw new Error(`OpenAI image request failed (${response.status}): ${message}`);
850
+ }
851
+
852
+ const data = JSON.parse(rawText) as OpenAIImageResponse;
853
+ const b64 = data.data?.[0]?.b64_json;
854
+ if (!b64) {
855
+ return {
856
+ content: [{ type: "text", text: "No image data returned from OpenAI." }],
857
+ details: {
858
+ provider,
859
+ model: DEFAULT_OPENAI_IMAGE_MODEL,
860
+ imageCount: 0,
861
+ imagePaths: [],
862
+ images: [],
863
+ },
864
+ };
865
+ }
866
+
867
+ const image: InlineImageData = { data: b64, mimeType: "image/png" };
868
+ const imagePaths = await saveImagesToTemp([image]);
869
+ const revisedPrompt = data.data[0]?.revised_prompt ?? undefined;
870
+
871
+ return {
872
+ content: [
873
+ {
874
+ type: "text",
875
+ text: buildResponseSummary(provider, DEFAULT_OPENAI_IMAGE_MODEL, imagePaths, revisedPrompt),
876
+ },
877
+ ],
878
+ details: {
879
+ provider,
880
+ model: DEFAULT_OPENAI_IMAGE_MODEL,
881
+ imageCount: 1,
882
+ imagePaths,
883
+ images: [image],
884
+ responseText: revisedPrompt,
885
+ usage: data.usage
886
+ ? {
887
+ promptTokenCount: data.usage.input_tokens,
888
+ candidatesTokenCount: data.usage.output_tokens,
889
+ totalTokenCount: data.usage.total_tokens,
890
+ }
891
+ : undefined,
892
+ },
893
+ };
894
+ }
895
+
789
896
  const parts = [] as Array<{ text?: string; inlineData?: InlineImageData }>;
790
897
  for (const image of resolvedImages) {
791
898
  parts.push({ inlineData: image });