@f5xc-salesdemos/xcsh 15.1.3 → 15.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@f5xc-salesdemos/xcsh",
4
- "version": "15.1.3",
4
+ "version": "15.3.0",
5
5
  "description": "Coding agent CLI with read, bash, edit, write tools and session management",
6
6
  "homepage": "https://github.com/f5xc-salesdemos/xcsh",
7
7
  "author": "Can Boluk",
@@ -46,12 +46,12 @@
46
46
  "dependencies": {
47
47
  "@agentclientprotocol/sdk": "0.16.1",
48
48
  "@mozilla/readability": "^0.6",
49
- "@f5xc-salesdemos/xcsh-stats": "15.1.3",
50
- "@f5xc-salesdemos/pi-agent-core": "15.1.3",
51
- "@f5xc-salesdemos/pi-ai": "15.1.3",
52
- "@f5xc-salesdemos/pi-natives": "15.1.3",
53
- "@f5xc-salesdemos/pi-tui": "15.1.3",
54
- "@f5xc-salesdemos/pi-utils": "15.1.3",
49
+ "@f5xc-salesdemos/xcsh-stats": "15.3.0",
50
+ "@f5xc-salesdemos/pi-agent-core": "15.3.0",
51
+ "@f5xc-salesdemos/pi-ai": "15.3.0",
52
+ "@f5xc-salesdemos/pi-natives": "15.3.0",
53
+ "@f5xc-salesdemos/pi-tui": "15.3.0",
54
+ "@f5xc-salesdemos/pi-utils": "15.3.0",
55
55
  "@sinclair/typebox": "^0.34",
56
56
  "@xterm/headless": "^6.0",
57
57
  "ajv": "^8.18",
@@ -262,7 +262,7 @@ export const SETTINGS_SCHEMA = {
262
262
  symbolPreset: {
263
263
  type: "enum",
264
264
  values: ["unicode", "nerd", "ascii"] as const,
265
- default: "unicode",
265
+ default: "nerd",
266
266
  ui: { tab: "appearance", label: "Symbol Preset", description: "Icon/symbol style", submenu: true },
267
267
  },
268
268
 
@@ -292,7 +292,7 @@ export const SETTINGS_SCHEMA = {
292
292
  "statusLine.separator": {
293
293
  type: "enum",
294
294
  values: ["powerline", "powerline-thin", "slash", "pipe", "block", "none", "ascii"] as const,
295
- default: "powerline-thin",
295
+ default: "powerline",
296
296
  ui: {
297
297
  tab: "appearance",
298
298
  label: "Status Line Separator",
@@ -670,7 +670,7 @@ export const SETTINGS_SCHEMA = {
670
670
 
671
671
  collapseChangelog: {
672
672
  type: "boolean",
673
- default: false,
673
+ default: true,
674
674
  ui: { tab: "interaction", label: "Collapse Changelog", description: "Show condensed changelog after updates" },
675
675
  },
676
676
 
@@ -703,7 +703,7 @@ export const SETTINGS_SCHEMA = {
703
703
  // Speech-to-text
704
704
  "stt.enabled": {
705
705
  type: "boolean",
706
- default: false,
706
+ default: true,
707
707
  ui: { tab: "interaction", label: "Speech-to-Text", description: "Enable speech-to-text input via microphone" },
708
708
  },
709
709
 
@@ -861,7 +861,7 @@ export const SETTINGS_SCHEMA = {
861
861
  // Memories
862
862
  "memories.enabled": {
863
863
  type: "boolean",
864
- default: false,
864
+ default: true,
865
865
  ui: {
866
866
  tab: "context",
867
867
  label: "Memories",
@@ -1249,7 +1249,7 @@ export const SETTINGS_SCHEMA = {
1249
1249
 
1250
1250
  "renderMermaid.enabled": {
1251
1251
  type: "boolean",
1252
- default: false,
1252
+ default: true,
1253
1253
  ui: {
1254
1254
  tab: "tools",
1255
1255
  label: "Render Mermaid",
@@ -1269,7 +1269,7 @@ export const SETTINGS_SCHEMA = {
1269
1269
 
1270
1270
  "calc.enabled": {
1271
1271
  type: "boolean",
1272
- default: false,
1272
+ default: true,
1273
1273
  ui: {
1274
1274
  tab: "tools",
1275
1275
  label: "Calculator",
@@ -1279,7 +1279,7 @@ export const SETTINGS_SCHEMA = {
1279
1279
 
1280
1280
  "inspect_image.enabled": {
1281
1281
  type: "boolean",
1282
- default: false,
1282
+ default: true,
1283
1283
  ui: {
1284
1284
  tab: "tools",
1285
1285
  label: "Inspect Image",
@@ -1287,6 +1287,16 @@ export const SETTINGS_SCHEMA = {
1287
1287
  },
1288
1288
  },
1289
1289
 
1290
+ "generate_image.enabled": {
1291
+ type: "boolean",
1292
+ default: true,
1293
+ ui: {
1294
+ tab: "tools",
1295
+ label: "Generate Image",
1296
+ description: "Enable the generate_image tool for AI-powered image and diagram generation",
1297
+ },
1298
+ },
1299
+
1290
1300
  "checkpoint.enabled": {
1291
1301
  type: "boolean",
1292
1302
  default: false,
@@ -1306,7 +1316,7 @@ export const SETTINGS_SCHEMA = {
1306
1316
 
1307
1317
  "github.enabled": {
1308
1318
  type: "boolean",
1309
- default: false,
1319
+ default: true,
1310
1320
  ui: {
1311
1321
  tab: "tools",
1312
1322
  label: "GitHub CLI",
@@ -1615,12 +1625,36 @@ export const SETTINGS_SCHEMA = {
1615
1625
  },
1616
1626
  "providers.image": {
1617
1627
  type: "enum",
1618
- values: ["auto", "gemini", "openrouter"] as const,
1628
+ values: ["auto", "gemini", "openrouter", "openai"] as const,
1619
1629
  default: "auto",
1620
1630
  ui: {
1621
1631
  tab: "providers",
1622
1632
  label: "Image Provider",
1623
- description: "Provider for image generation tool",
1633
+ description: "Provider for image generation tool (auto detects from available API keys)",
1634
+ submenu: true,
1635
+ },
1636
+ },
1637
+
1638
+ "providers.imageSize": {
1639
+ type: "enum",
1640
+ values: ["1024x1024", "1536x1024", "1024x1536"] as const,
1641
+ default: "1536x1024",
1642
+ ui: {
1643
+ tab: "providers",
1644
+ label: "Image Size",
1645
+ description: "Default image dimensions for generation (landscape, square, or portrait)",
1646
+ submenu: true,
1647
+ },
1648
+ },
1649
+
1650
+ "providers.imageQuality": {
1651
+ type: "enum",
1652
+ values: ["low", "medium", "high"] as const,
1653
+ default: "high",
1654
+ ui: {
1655
+ tab: "providers",
1656
+ label: "Image Quality",
1657
+ description: "Rendering quality for generated images (higher = slower but more detailed)",
1624
1658
  submenu: true,
1625
1659
  },
1626
1660
  },
@@ -1662,13 +1696,13 @@ export const SETTINGS_SCHEMA = {
1662
1696
  // Exa
1663
1697
  "exa.enabled": {
1664
1698
  type: "boolean",
1665
- default: true,
1699
+ default: false,
1666
1700
  ui: { tab: "providers", label: "Exa", description: "Master toggle for all Exa search tools" },
1667
1701
  },
1668
1702
 
1669
1703
  "exa.enableSearch": {
1670
1704
  type: "boolean",
1671
- default: true,
1705
+ default: false,
1672
1706
  ui: { tab: "providers", label: "Exa Search", description: "Basic search, deep search, code search, crawl" },
1673
1707
  },
1674
1708
 
@@ -337,10 +337,21 @@ const OPTION_PROVIDERS: Partial<Record<SettingPath, OptionProvider>> = {
337
337
  { value: "parallel", label: "Parallel", description: "Requires PARALLEL_API_KEY" },
338
338
  ],
339
339
  "providers.image": [
340
- { value: "auto", label: "Auto", description: "Priority: OpenRouter > Gemini" },
340
+ { value: "auto", label: "Auto", description: "Auto-detect from available API keys" },
341
+ { value: "openai", label: "OpenAI", description: "gpt-image-1 via LITELLM_API_KEY or OPENAI_API_KEY" },
341
342
  { value: "gemini", label: "Gemini", description: "Requires GEMINI_API_KEY" },
342
343
  { value: "openrouter", label: "OpenRouter", description: "Requires OPENROUTER_API_KEY" },
343
344
  ],
345
+ "providers.imageSize": [
346
+ { value: "1024x1024", label: "1024x1024", description: "Square" },
347
+ { value: "1536x1024", label: "1536x1024", description: "Landscape (default)" },
348
+ { value: "1024x1536", label: "1024x1536", description: "Portrait" },
349
+ ],
350
+ "providers.imageQuality": [
351
+ { value: "low", label: "Low", description: "Fastest generation, lower detail" },
352
+ { value: "medium", label: "Medium", description: "Balanced speed and quality" },
353
+ { value: "high", label: "High", description: "Best quality, slower generation (default)" },
354
+ ],
344
355
  "providers.kimiApiFormat": [
345
356
  { value: "openai", label: "OpenAI", description: "api.kimi.com" },
346
357
  { value: "anthropic", label: "Anthropic", description: "api.moonshot.ai" },
@@ -281,7 +281,13 @@ Don't open a file hoping. Hope is not a strategy.
281
281
  ### Image inspection
282
282
  - For image understanding tasks: **MUST** use `inspect_image` over `read` to avoid overloading main session context.
283
283
  - Write a specific `question` for `inspect_image`: what to inspect, constraints (for example verbatim OCR), and desired output format.
284
+ - If you encounter `[Image content detected but current model does not support vision]` in a message, use `inspect_image` with the image file path to analyze it. Do not ask the user to describe the image — analyze it yourself via the tool.
284
285
  {{/if}}
286
+ {{#ifAll (includes tools "inspect_image") (includes tools "generate_image")}}
287
+ ### Image generation and analysis
288
+ - After using `generate_image`, the result includes saved file paths (e.g. `/tmp/xcsh-image-*.png`). To analyze or describe the generated image, chain `inspect_image` using that file path.
289
+ - Example workflow: user asks "create a diagram and check if it follows brand guidelines" → call `generate_image`, then call `inspect_image` on the resulting file path with the brand compliance question.
290
+ {{/ifAll}}
285
291
 
286
292
  {{SECTION_SEPERATOR "Rules"}}
287
293
 
package/src/sdk.ts CHANGED
@@ -672,7 +672,12 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
672
672
  }
673
673
 
674
674
  const imageProvider = settings.get("providers.image");
675
- if (imageProvider === "auto" || imageProvider === "gemini" || imageProvider === "openrouter") {
675
+ if (
676
+ imageProvider === "auto" ||
677
+ imageProvider === "gemini" ||
678
+ imageProvider === "openrouter" ||
679
+ imageProvider === "openai"
680
+ ) {
676
681
  setPreferredImageProvider(imageProvider);
677
682
  }
678
683
 
@@ -1034,10 +1039,12 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1034
1039
  }
1035
1040
  }
1036
1041
 
1037
- // Add Gemini image tools if GEMINI_API_KEY (or GOOGLE_API_KEY) is available
1038
- const geminiImageTools = await logger.time("getGeminiImageTools", getGeminiImageTools);
1039
- if (geminiImageTools.length > 0) {
1040
- customTools.push(...(geminiImageTools as unknown as CustomTool[]));
1042
+ // Add image generation tools if an image API key is available and the tool is enabled
1043
+ if (settings.get("generate_image.enabled")) {
1044
+ const geminiImageTools = await logger.time("getGeminiImageTools", getGeminiImageTools);
1045
+ if (geminiImageTools.length > 0) {
1046
+ customTools.push(...(geminiImageTools as unknown as CustomTool[]));
1047
+ }
1041
1048
  }
1042
1049
 
1043
1050
  // Add web search tools
@@ -1435,11 +1442,37 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1435
1442
  });
1436
1443
  };
1437
1444
 
1438
- // Final convertToLlm: chain block-images filter with secret obfuscation
1445
+ // Replace unsupported image content with actionable warnings when model lacks vision
1446
+ const convertToLlmWithImageRouting = (messages: Message[]): Message[] => {
1447
+ const currentModel = agent?.state?.model;
1448
+ if (!currentModel || currentModel.input.includes("image")) return messages;
1449
+
1450
+ return messages.map(msg => {
1451
+ if (msg.role !== "user" && msg.role !== "toolResult") return msg;
1452
+ const content = msg.content;
1453
+ if (!Array.isArray(content)) return msg;
1454
+
1455
+ const hasImages = content.some(c => c.type === "image");
1456
+ if (!hasImages) return msg;
1457
+
1458
+ const filtered = content.map(c =>
1459
+ c.type === "image"
1460
+ ? {
1461
+ type: "text" as const,
1462
+ text: "[Image content detected but current model does not support vision. Use the inspect_image tool to analyze this image, or ask the user to switch to a vision-capable model.]",
1463
+ }
1464
+ : c,
1465
+ );
1466
+ return { ...msg, content: filtered };
1467
+ });
1468
+ };
1469
+
1470
+ // Final convertToLlm: chain block-images filter → image routing warnings → secret obfuscation
1439
1471
  const convertToLlmFinal = (messages: AgentMessage[]): Message[] => {
1440
1472
  const converted = convertToLlmWithBlockImages(messages);
1441
- if (!obfuscator?.hasSecrets()) return converted;
1442
- return obfuscateMessages(obfuscator, converted);
1473
+ const routed = convertToLlmWithImageRouting(converted);
1474
+ if (!obfuscator?.hasSecrets()) return routed;
1475
+ return obfuscateMessages(obfuscator, routed);
1443
1476
  };
1444
1477
  const transformContext = extensionRunner
1445
1478
  ? async (messages: AgentMessage[], _signal?: AbortSignal) => {
@@ -20,6 +20,9 @@ import { resolveReadPath } from "./path-utils";
20
20
  const DEFAULT_MODEL = "gemini-3-pro-image-preview";
21
21
  const DEFAULT_OPENROUTER_MODEL = "google/gemini-3-pro-image-preview";
22
22
  const DEFAULT_ANTIGRAVITY_MODEL = "gemini-3-pro-image";
23
+ const DEFAULT_OPENAI_IMAGE_MODEL = "gpt-image-1";
24
+ const DEFAULT_OPENAI_IMAGE_SIZE = "1536x1024";
25
+ const DEFAULT_OPENAI_IMAGE_QUALITY = "high";
23
26
  const IMAGE_TIMEOUT = 3 * 60 * 1000; // 3 minutes
24
27
  const MAX_IMAGE_SIZE = 35 * 1024 * 1024;
25
28
 
@@ -27,7 +30,7 @@ const ANTIGRAVITY_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googleapis.com"
27
30
  const IMAGE_SYSTEM_INSTRUCTION =
28
31
  "You are an AI image generator. Generate images based on user descriptions. Focus on creating high-quality, visually appealing images that match the user's request.";
29
32
 
30
- type ImageProvider = "antigravity" | "gemini" | "openrouter";
33
+ type ImageProvider = "antigravity" | "gemini" | "openrouter" | "openai";
31
34
  interface ImageApiKey {
32
35
  provider: ImageProvider;
33
36
  apiKey: string;
@@ -207,6 +210,21 @@ interface OpenRouterResponse {
207
210
  choices?: OpenRouterChoice[];
208
211
  }
209
212
 
213
+ interface OpenAIImageResponseData {
214
+ b64_json: string;
215
+ revised_prompt?: string | null;
216
+ }
217
+
218
+ interface OpenAIImageResponse {
219
+ created: number;
220
+ data: OpenAIImageResponseData[];
221
+ usage?: {
222
+ total_tokens: number;
223
+ input_tokens: number;
224
+ output_tokens: number;
225
+ };
226
+ }
227
+
210
228
  interface AntigravityRequest {
211
229
  project: string;
212
230
  model: string;
@@ -396,9 +414,13 @@ async function findImageApiKey(modelRegistry?: ModelRegistry): Promise<ImageApiK
396
414
  const openRouterKey = getEnvApiKey("openrouter");
397
415
  if (openRouterKey) return { provider: "openrouter", apiKey: openRouterKey };
398
416
  // Fall through to auto-detect if preferred provider key not found
417
+ } else if (preferredImageProvider === "openai") {
418
+ const openaiKey = getEnvApiKey("litellm") ?? getEnvApiKey("openai");
419
+ if (openaiKey) return { provider: "openai", apiKey: openaiKey };
420
+ // Fall through to auto-detect if preferred provider key not found
399
421
  }
400
422
 
401
- // Auto-detect: Antigravity takes priority, then OpenRouter, then Gemini
423
+ // Auto-detect: Antigravity takes priority, then OpenRouter, then OpenAI, then Gemini
402
424
  if (modelRegistry) {
403
425
  const antigravity = await findAntigravityCredentials(modelRegistry);
404
426
  if (antigravity) return antigravity;
@@ -407,6 +429,9 @@ async function findImageApiKey(modelRegistry?: ModelRegistry): Promise<ImageApiK
407
429
  const openRouterKey = getEnvApiKey("openrouter");
408
430
  if (openRouterKey) return { provider: "openrouter", apiKey: openRouterKey };
409
431
 
432
+ const openaiKey = getEnvApiKey("litellm") ?? getEnvApiKey("openai");
433
+ if (openaiKey) return { provider: "openai", apiKey: openaiKey };
434
+
410
435
  const geminiKey = getEnvApiKey("google");
411
436
  if (geminiKey) return { provider: "gemini", apiKey: geminiKey };
412
437
 
@@ -614,7 +639,7 @@ export const geminiImageTool: CustomTool<typeof geminiImageSchema, GeminiImageTo
614
639
  const apiKey = await findImageApiKey(ctx.modelRegistry);
615
640
  if (!apiKey) {
616
641
  throw new Error(
617
- "No image API credentials found. Login with google-antigravity, or set OPENROUTER_API_KEY, GEMINI_API_KEY, or GOOGLE_API_KEY.",
642
+ "No image API credentials found. Set LITELLM_API_KEY, OPENAI_API_KEY, OPENROUTER_API_KEY, GEMINI_API_KEY, or GOOGLE_API_KEY.",
618
643
  );
619
644
  }
620
645
 
@@ -624,7 +649,9 @@ export const geminiImageTool: CustomTool<typeof geminiImageSchema, GeminiImageTo
624
649
  ? DEFAULT_ANTIGRAVITY_MODEL
625
650
  : provider === "openrouter"
626
651
  ? DEFAULT_OPENROUTER_MODEL
627
- : DEFAULT_MODEL;
652
+ : provider === "openai"
653
+ ? DEFAULT_OPENAI_IMAGE_MODEL
654
+ : DEFAULT_MODEL;
628
655
  const resolvedModel = provider === "openrouter" ? resolveOpenRouterModel(model) : model;
629
656
  const cwd = ctx.sessionManager.getCwd();
630
657
 
@@ -786,6 +813,86 @@ export const geminiImageTool: CustomTool<typeof geminiImageSchema, GeminiImageTo
786
813
  };
787
814
  }
788
815
 
816
+ if (provider === "openai") {
817
+ const openaiPrompt = assemblePrompt(params);
818
+ const size = params.image_size ?? ctx.settings?.get("providers.imageSize") ?? DEFAULT_OPENAI_IMAGE_SIZE;
819
+ const quality = ctx.settings?.get("providers.imageQuality") ?? DEFAULT_OPENAI_IMAGE_QUALITY;
820
+ const baseUrl = $env.LITELLM_BASE_URL ?? $env.OPENAI_BASE_URL ?? "https://api.openai.com";
821
+
822
+ const requestBody = {
823
+ model: DEFAULT_OPENAI_IMAGE_MODEL,
824
+ prompt: openaiPrompt,
825
+ n: 1,
826
+ size,
827
+ quality,
828
+ };
829
+
830
+ const response = await fetch(`${baseUrl}/openai/v1/images/generations`, {
831
+ method: "POST",
832
+ headers: {
833
+ Authorization: `Bearer ${apiKey.apiKey}`,
834
+ "Content-Type": "application/json",
835
+ },
836
+ body: JSON.stringify(requestBody),
837
+ signal: requestSignal,
838
+ });
839
+
840
+ const rawText = await response.text();
841
+ if (!response.ok) {
842
+ let message = rawText;
843
+ try {
844
+ const parsed = JSON.parse(rawText) as { error?: { message?: string } };
845
+ message = parsed.error?.message ?? message;
846
+ } catch {
847
+ // Keep raw text.
848
+ }
849
+ throw new Error(`OpenAI image request failed (${response.status}): ${message}`);
850
+ }
851
+
852
+ const data = JSON.parse(rawText) as OpenAIImageResponse;
853
+ const b64 = data.data?.[0]?.b64_json;
854
+ if (!b64) {
855
+ return {
856
+ content: [{ type: "text", text: "No image data returned from OpenAI." }],
857
+ details: {
858
+ provider,
859
+ model: DEFAULT_OPENAI_IMAGE_MODEL,
860
+ imageCount: 0,
861
+ imagePaths: [],
862
+ images: [],
863
+ },
864
+ };
865
+ }
866
+
867
+ const image: InlineImageData = { data: b64, mimeType: "image/png" };
868
+ const imagePaths = await saveImagesToTemp([image]);
869
+ const revisedPrompt = data.data[0]?.revised_prompt ?? undefined;
870
+
871
+ return {
872
+ content: [
873
+ {
874
+ type: "text",
875
+ text: buildResponseSummary(provider, DEFAULT_OPENAI_IMAGE_MODEL, imagePaths, revisedPrompt),
876
+ },
877
+ ],
878
+ details: {
879
+ provider,
880
+ model: DEFAULT_OPENAI_IMAGE_MODEL,
881
+ imageCount: 1,
882
+ imagePaths,
883
+ images: [image],
884
+ responseText: revisedPrompt,
885
+ usage: data.usage
886
+ ? {
887
+ promptTokenCount: data.usage.input_tokens,
888
+ candidatesTokenCount: data.usage.output_tokens,
889
+ totalTokenCount: data.usage.total_tokens,
890
+ }
891
+ : undefined,
892
+ },
893
+ };
894
+ }
895
+
789
896
  const parts = [] as Array<{ text?: string; inlineData?: InlineImageData }>;
790
897
  for (const image of resolvedImages) {
791
898
  parts.push({ inlineData: image });