@oh-my-pi/pi-coding-agent 15.5.6 → 15.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29,7 +29,12 @@ import {
29
29
  import type { InteractiveModeContext } from "../../modes/types";
30
30
  import { type SessionInfo, SessionManager } from "../../session/session-manager";
31
31
  import { FileSessionStorage } from "../../session/session-storage";
32
- import { isSearchProviderPreference, setPreferredImageProvider, setPreferredSearchProvider } from "../../tools";
32
+ import {
33
+ isImageProviderPreference,
34
+ isSearchProviderPreference,
35
+ setPreferredImageProvider,
36
+ setPreferredSearchProvider,
37
+ } from "../../tools";
33
38
  import { setSessionTerminalTitle } from "../../utils/title-generator";
34
39
  import { AgentDashboard } from "../components/agent-dashboard";
35
40
  import { AssistantMessageComponent } from "../components/assistant-message";
@@ -374,7 +379,7 @@ export class SelectorController {
374
379
  }
375
380
  break;
376
381
  case "providers.image":
377
- if (value === "auto" || value === "openai" || value === "gemini" || value === "openrouter") {
382
+ if (isImageProviderPreference(value)) {
378
383
  setPreferredImageProvider(value);
379
384
  }
380
385
  break;
@@ -1012,7 +1012,7 @@ export class InteractiveMode implements InteractiveModeContext {
1012
1012
  }
1013
1013
 
1014
1014
  async #getPlanFilePath(): Promise<string> {
1015
- return "local://PLAN.md";
1015
+ return this.session.getPlanReferencePath() || "local://PLAN.md";
1016
1016
  }
1017
1017
 
1018
1018
  #resolvePlanFilePath(planFilePath: string): string {
package/src/sdk.ts CHANGED
@@ -129,6 +129,7 @@ import {
129
129
  FindTool,
130
130
  getSearchTools,
131
131
  HIDDEN_TOOLS,
132
+ isImageProviderPreference,
132
133
  isSearchProviderPreference,
133
134
  type LspStartupServerInfo,
134
135
  loadSshTool,
@@ -148,6 +149,7 @@ import { ToolContextStore } from "./tools/context";
148
149
  import { getImageGenTools } from "./tools/image-gen";
149
150
  import { wrapToolWithMetaNotice } from "./tools/output-meta";
150
151
  import { queueResolveHandler } from "./tools/resolve";
152
+ import { ttsTool } from "./tools/tts";
151
153
  import { EventBus } from "./utils/event-bus";
152
154
  import { buildNamedToolChoice } from "./utils/tool-choice";
153
155
  import { buildWorkspaceTree, type WorkspaceTree } from "./workspace-tree";
@@ -893,12 +895,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
893
895
  }
894
896
 
895
897
  const imageProvider = settings.get("providers.image");
896
- if (
897
- imageProvider === "auto" ||
898
- imageProvider === "openai" ||
899
- imageProvider === "gemini" ||
900
- imageProvider === "openrouter"
901
- ) {
898
+ if (isImageProviderPreference(imageProvider)) {
902
899
  setPreferredImageProvider(imageProvider);
903
900
  }
904
901
 
@@ -1319,6 +1316,10 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1319
1316
  customTools.push(...(imageGenTools as unknown as CustomTool[]));
1320
1317
  }
1321
1318
 
1319
+ if (settings.get("tts.enabled")) {
1320
+ customTools.push(ttsTool as unknown as CustomTool);
1321
+ }
1322
+
1322
1323
  // Add web search tools
1323
1324
  if (options.toolNames?.includes("web_search")) {
1324
1325
  customTools.push(...getSearchTools());
@@ -1876,9 +1877,13 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1876
1877
  }
1877
1878
  return key;
1878
1879
  },
1879
- streamFn: (streamModel, context, streamOptions) =>
1880
- streamSimple(streamModel, context, {
1880
+ streamFn: (streamModel, context, streamOptions) => {
1881
+ const openrouterRoutingPreset = settings.get("providers.openrouterVariant");
1882
+ const openrouterVariant =
1883
+ openrouterRoutingPreset && openrouterRoutingPreset !== "default" ? openrouterRoutingPreset : undefined;
1884
+ return streamSimple(streamModel, context, {
1881
1885
  ...streamOptions,
1886
+ openrouterVariant: streamOptions?.openrouterVariant ?? openrouterVariant,
1882
1887
  onAuthError: async (provider, oldKey, error) => {
1883
1888
  await modelRegistry.authStorage.invalidateCredentialMatching(provider, oldKey, {
1884
1889
  signal: streamOptions?.signal,
@@ -1890,7 +1895,8 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1890
1895
  });
1891
1896
  return modelRegistry.getApiKeyForProvider(provider, agent.sessionId);
1892
1897
  },
1893
- }),
1898
+ });
1899
+ },
1894
1900
  cursorExecHandlers,
1895
1901
  transformToolCallArguments: (args, _toolName) => {
1896
1902
  let result = args;
@@ -3631,9 +3631,17 @@ export class AgentSession {
3631
3631
  const sessionOnResponse = this.#onResponse;
3632
3632
  const sessionMetadata = this.agent.metadataForProvider(provider);
3633
3633
  const sessionOnSseEvent = this.#onSseEvent;
3634
- if (!sessionOnPayload && !sessionOnResponse && !sessionMetadata && !sessionOnSseEvent) return options;
3635
-
3636
- const preparedOptions: SimpleStreamOptions = { ...options };
3634
+ const openrouterRoutingPreset =
3635
+ provider === "openrouter" ? this.settings.get("providers.openrouterVariant") : "default";
3636
+ const openrouterVariant =
3637
+ openrouterRoutingPreset !== "default" && options.openrouterVariant === undefined
3638
+ ? openrouterRoutingPreset
3639
+ : undefined;
3640
+ if (!sessionOnPayload && !sessionOnResponse && !sessionMetadata && !sessionOnSseEvent && !openrouterVariant)
3641
+ return options;
3642
+
3643
+ const preparedOptions: SimpleStreamOptions =
3644
+ openrouterVariant === undefined ? { ...options } : { ...options, openrouterVariant };
3637
3645
 
3638
3646
  // Stamp session metadata (e.g. user_id={session_id}) onto direct-call requests so
3639
3647
  // they share the same session bucket as Agent.prompt-routed requests on Anthropic
@@ -3758,6 +3766,10 @@ export class AgentSession {
3758
3766
  this.#planReferencePath = path;
3759
3767
  }
3760
3768
 
3769
+ getPlanReferencePath(): string {
3770
+ return this.#planReferencePath;
3771
+ }
3772
+
3761
3773
  get clientBridge(): ClientBridge | undefined {
3762
3774
  return this.#clientBridge;
3763
3775
  }
@@ -5575,6 +5587,11 @@ export class AgentSession {
5575
5587
  initiatorOverride: "agent",
5576
5588
  metadata: this.agent.metadataForProvider(model.provider),
5577
5589
  telemetry: resolveTelemetry(this.agent.telemetry, this.sessionId),
5590
+ // Honor the user's /model thinking selection on the handoff
5591
+ // path. Clamped per-model inside generateHandoff via
5592
+ // resolveCompactionEffort so unsupported-effort models don't
5593
+ // trip requireSupportedEffort.
5594
+ thinkingLevel: this.thinkingLevel,
5578
5595
  },
5579
5596
  handoffSignal,
5580
5597
  );
@@ -6345,6 +6362,11 @@ export class AgentSession {
6345
6362
  metadata: this.agent.metadataForProvider(candidate.provider),
6346
6363
  convertToLlm,
6347
6364
  telemetry,
6365
+ // Honor the user's /model thinking selection (incl. `off`) on
6366
+ // the manual `/compact` path. Clamped per-model inside compact()
6367
+ // via resolveCompactionEffort so unsupported-effort models
6368
+ // (xai-oauth/grok-build) don't trip requireSupportedEffort.
6369
+ thinkingLevel: this.thinkingLevel,
6348
6370
  });
6349
6371
  } catch (error) {
6350
6372
  if (!this.#isCompactionAuthFailure(error)) {
@@ -6617,6 +6639,11 @@ export class AgentSession {
6617
6639
  initiatorOverride: "agent",
6618
6640
  convertToLlm,
6619
6641
  telemetry,
6642
+ // Honor the user's /model thinking selection on the
6643
+ // auto-compaction path — the most-fired compaction
6644
+ // site. Clamped per-model inside compact() via
6645
+ // resolveCompactionEffort.
6646
+ thinkingLevel: this.thinkingLevel,
6620
6647
  });
6621
6648
  break;
6622
6649
  } catch (error) {
@@ -562,9 +562,22 @@ function parseFeedToMarkdown(content: string, maxItems = 10): string {
562
562
  }
563
563
 
564
564
  /**
565
- * Render HTML to markdown using Parallel, jina, trafilatura, lynx (in order of preference)
565
+ * Cap on any single remote reader-mode request (Parallel, Jina) so a stalled
566
+ * remote endpoint cannot consume the whole reader-mode budget and starve the
567
+ * local fallback renderers (trafilatura, lynx, native). See #1449.
566
568
  */
567
- async function renderHtmlToText(
569
+ const REMOTE_READER_MAX_MS = 10_000;
570
+
571
+ /**
572
+ * Render HTML to markdown using Parallel, jina, trafilatura, lynx, then the
573
+ * in-process native converter. The overall `timeout` budget bounds the call,
574
+ * but remote reader requests are additionally capped at `REMOTE_READER_MAX_MS`
575
+ * so that a hung remote endpoint cannot prevent local fallbacks from running.
576
+ * Only a real `userSignal` cancellation aborts the chain — remote per-attempt
577
+ * timeouts and the overall reader-mode timeout still allow later renderers
578
+ * (especially the purely-local native converter) to be tried.
579
+ */
580
+ export async function renderHtmlToText(
568
581
  url: string,
569
582
  html: string,
570
583
  timeout: number,
@@ -572,14 +585,15 @@ async function renderHtmlToText(
572
585
  userSignal: AbortSignal | undefined,
573
586
  storage: AgentStorage | null,
574
587
  ): Promise<{ content: string; ok: boolean; method: string }> {
575
- const signal = ptree.combineSignals(userSignal, timeout * 1000);
588
+ const overallSignal = ptree.combineSignals(userSignal, timeout * 1000);
576
589
  const execOptions = {
577
590
  mode: "group" as const,
578
591
  allowNonZero: true,
579
592
  allowAbort: true,
580
593
  stderr: "full" as const,
581
- signal,
594
+ signal: overallSignal,
582
595
  };
596
+ const remoteBudgetMs = Math.min(timeout * 1000, REMOTE_READER_MAX_MS);
583
597
 
584
598
  // Try Parallel extract first when credentials are configured
585
599
  if (settings.get("providers.parallelFetch") && findParallelApiKey(storage)) {
@@ -590,7 +604,7 @@ async function renderHtmlToText(
590
604
  objective: "Extract the main content",
591
605
  excerpts: true,
592
606
  fullContent: false,
593
- signal,
607
+ signal: ptree.combineSignals(userSignal, remoteBudgetMs),
594
608
  },
595
609
  storage,
596
610
  );
@@ -602,17 +616,18 @@ async function renderHtmlToText(
602
616
  }
603
617
  }
604
618
  } catch {
605
- // Parallel extract failed, continue to next method
606
- signal?.throwIfAborted();
619
+ // Parallel extract failed or stalled; honour real cancellation only.
620
+ userSignal?.throwIfAborted();
607
621
  }
608
622
  }
609
623
 
610
- // Try jina first (reader API)
624
+ // Try jina reader API with its own sub-budget so a stall cannot starve
625
+ // later fallbacks (#1449).
611
626
  try {
612
627
  const jinaUrl = `https://r.jina.ai/${url}`;
613
628
  const response = await fetch(jinaUrl, {
614
629
  headers: { Accept: "text/markdown" },
615
- signal,
630
+ signal: ptree.combineSignals(userSignal, remoteBudgetMs),
616
631
  });
617
632
  if (response.ok) {
618
633
  const content = await response.text();
@@ -621,37 +636,50 @@ async function renderHtmlToText(
621
636
  }
622
637
  }
623
638
  } catch {
624
- // Jina failed, continue to next method
625
- signal?.throwIfAborted();
639
+ // Jina failed or stalled; honour real cancellation only.
640
+ userSignal?.throwIfAborted();
626
641
  }
627
642
 
628
643
  // Try trafilatura (auto-install via uv/pip)
629
- const trafilatura = await ensureTool("trafilatura", { signal, silent: true });
630
- if (trafilatura) {
631
- const result = await ptree.exec([trafilatura, "-u", url, "--output-format", "markdown"], execOptions);
632
- if (result.ok && result.stdout.trim().length > 100) {
633
- return { content: result.stdout, ok: true, method: "trafilatura" };
644
+ try {
645
+ const trafilatura = await ensureTool("trafilatura", { signal: overallSignal, silent: true });
646
+ if (trafilatura) {
647
+ const result = await ptree.exec([trafilatura, "-u", url, "--output-format", "markdown"], execOptions);
648
+ if (result.ok && result.stdout.trim().length > 100) {
649
+ return { content: result.stdout, ok: true, method: "trafilatura" };
650
+ }
634
651
  }
652
+ } catch {
653
+ // trafilatura unavailable or stalled; continue to next method.
654
+ userSignal?.throwIfAborted();
635
655
  }
636
656
 
637
657
  // Try lynx (can't auto-install, system package)
638
- const lynx = hasCommand("lynx");
639
- if (lynx) {
640
- const result = await ptree.exec(["lynx", "-dump", "-nolist", "-width", "250", url], execOptions);
641
- if (result.ok) {
642
- return { content: result.stdout, ok: true, method: "lynx" };
658
+ try {
659
+ const lynx = hasCommand("lynx");
660
+ if (lynx) {
661
+ const result = await ptree.exec(["lynx", "-dump", "-nolist", "-width", "250", url], execOptions);
662
+ if (result.ok) {
663
+ return { content: result.stdout, ok: true, method: "lynx" };
664
+ }
643
665
  }
666
+ } catch {
667
+ // lynx failed or stalled; continue to native converter.
668
+ userSignal?.throwIfAborted();
644
669
  }
645
670
 
646
- // Fall back to native converter (fastest, no network/subprocess)
671
+ // Fall back to native converter (purely local, no network/subprocess).
672
+ // Always attempted: even if remote renderers and subprocesses were aborted
673
+ // by the overall reader-mode timeout, this still works on already-loaded
674
+ // HTML (#1449).
647
675
  try {
648
676
  const content = await htmlToMarkdown(html, { cleanContent: true });
649
677
  if (content.trim().length > 100 && !isLowQualityOutput(content)) {
650
678
  return { content, ok: true, method: "native" };
651
679
  }
652
680
  } catch {
653
- // Native converter failed, continue to next method
654
- signal?.throwIfAborted();
681
+ // Native converter failed; nothing else to try.
682
+ userSignal?.throwIfAborted();
655
683
  }
656
684
  return { content: "", ok: false, method: "none" };
657
685
  }
@@ -22,12 +22,14 @@ import * as z from "zod/v4";
22
22
  import packageJson from "../../package.json" with { type: "json" };
23
23
  import { isAuthenticated, type ModelRegistry } from "../config/model-registry";
24
24
  import type { CustomTool } from "../extensibility/custom-tools/types";
25
+ import { ohMyPiXAIUserAgent, resolveXAIHttpCredentials } from "../lib/xai-http";
25
26
  import imageGenDescription from "../prompts/tools/image-gen.md" with { type: "text" };
26
27
  import { resolveReadPath } from "./path-utils";
27
28
 
28
29
  const DEFAULT_MODEL = "gemini-3-pro-image-preview";
29
30
  const DEFAULT_OPENROUTER_MODEL = "google/gemini-3-pro-image-preview";
30
31
  const DEFAULT_ANTIGRAVITY_MODEL = "gemini-3-pro-image";
32
+ const DEFAULT_XAI_IMAGE_MODEL = "grok-imagine-image";
31
33
  const IMAGE_TIMEOUT = 3 * 60 * 1000; // 3 minutes
32
34
  const MAX_IMAGE_SIZE = 35 * 1024 * 1024;
33
35
  const DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1";
@@ -38,7 +40,9 @@ const ANTIGRAVITY_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googleapis.com"
38
40
  const IMAGE_SYSTEM_INSTRUCTION =
39
41
  "You are an AI image generator. Generate images based on user descriptions. Focus on creating high-quality, visually appealing images that match the user's request.";
40
42
 
41
- type ImageProvider = "antigravity" | "gemini" | "openai" | "openai-codex" | "openrouter";
43
+ export type ImageProvider = "antigravity" | "gemini" | "openai" | "openai-codex" | "openrouter" | "xai";
44
+ export type ImageProviderPreference = Exclude<ImageProvider, "openai-codex"> | "auto";
45
+
42
46
  interface ImageApiKey {
43
47
  provider: ImageProvider;
44
48
  apiKey: string;
@@ -46,8 +50,13 @@ interface ImageApiKey {
46
50
  model?: Model;
47
51
  }
48
52
 
53
+ const COMMON_IMAGE_ASPECT_RATIOS = ["1:1", "3:4", "4:3", "9:16", "16:9"] as const;
54
+ const XAI_IMAGE_ASPECT_RATIOS = [...COMMON_IMAGE_ASPECT_RATIOS, "3:2", "2:3"] as const;
55
+ const COMMON_IMAGE_ASPECT_RATIO_SET = new Set<string>(COMMON_IMAGE_ASPECT_RATIOS);
56
+ const IMAGE_PROVIDER_PREFERENCES = new Set<string>(["auto", "antigravity", "gemini", "openai", "openrouter", "xai"]);
57
+
49
58
  const responseModalitySchema = z.enum(["IMAGE", "TEXT"] as const);
50
- const aspectRatioSchema = z.enum(["1:1", "3:4", "4:3", "9:16", "16:9"] as const).describe("aspect ratio");
59
+ const aspectRatioSchema = z.enum(XAI_IMAGE_ASPECT_RATIOS).describe("aspect ratio");
51
60
  const imageSizeSchema = z.enum(["1024x1024", "1536x1024", "1024x1536"] as const).describe("image size");
52
61
 
53
62
  const inputImageSchema = z
@@ -274,6 +283,36 @@ interface AntigravityRequest {
274
283
  requestId?: string;
275
284
  }
276
285
 
286
+ interface XAIImageReference {
287
+ // OpenAI-compat discriminator. Every code example at
288
+ // docs.x.ai/developers/rest-api-reference/inference/images sends this
289
+ // alongside `url`; the schema text doesn't strictly require it, but
290
+ // matching the documented wire format avoids relying on schema-vs-example.
291
+ readonly type: "image_url";
292
+ readonly url: string;
293
+ }
294
+
295
+ interface XAIImageRequestBase {
296
+ readonly model: string;
297
+ readonly prompt: string;
298
+ readonly aspect_ratio: string;
299
+ readonly resolution: "1k" | "2k";
300
+ readonly n: number;
301
+ readonly response_format: "b64_json" | "url";
302
+ }
303
+
304
+ // xAI image request body. Three shapes:
305
+ // 1. text-only generation → POST /v1/images/generations
306
+ // 2. single-source edit (image field) → POST /v1/images/edits
307
+ // 3. multi-reference edit (images field) → POST /v1/images/edits
308
+ // `image` and `images` are mutually exclusive per docs.x.ai; the discriminated
309
+ // union enforces that statically. The runtime cap (XAI_MAX_EDIT_IMAGES) bounds
310
+ // the array length, which TypeScript cannot encode without lossy tuple unions.
311
+ type XAIImageRequestBody =
312
+ | (XAIImageRequestBase & { readonly image?: never; readonly images?: never })
313
+ | (XAIImageRequestBase & { readonly image: XAIImageReference; readonly images?: never })
314
+ | (XAIImageRequestBase & { readonly images: readonly XAIImageReference[]; readonly image?: never });
315
+
277
316
  interface AntigravityResponseChunk {
278
317
  response?: {
279
318
  candidates?: Array<{
@@ -391,12 +430,24 @@ function extractOpenRouterImageUrls(message: OpenRouterMessage | undefined): str
391
430
  }
392
431
 
393
432
  /** Preferred provider set via settings (default: auto) */
394
- let preferredImageProvider: ImageProvider | "auto" = "auto";
433
+ let preferredImageProvider: ImageProviderPreference = "auto";
434
+
435
+ export function isImageProviderPreference(value: unknown): value is ImageProviderPreference {
436
+ return typeof value === "string" && IMAGE_PROVIDER_PREFERENCES.has(value);
437
+ }
395
438
 
396
439
  /** Set the preferred image provider from settings */
397
- export function setPreferredImageProvider(provider: ImageProvider | "auto"): void {
440
+ export function setPreferredImageProvider(provider: ImageProviderPreference): void {
398
441
  preferredImageProvider = provider;
399
442
  }
443
+ function assertImageAspectRatioSupported(provider: ImageProvider, aspectRatio: ImageGenParams["aspect_ratio"]): void {
444
+ if (!aspectRatio || provider === "xai" || COMMON_IMAGE_ASPECT_RATIO_SET.has(aspectRatio)) {
445
+ return;
446
+ }
447
+ throw new Error(
448
+ `Aspect ratio ${aspectRatio} is only supported by xAI image generation. Set providers.image to xai or use one of ${COMMON_IMAGE_ASPECT_RATIOS.join(", ")}.`,
449
+ );
450
+ }
400
451
 
401
452
  interface ParsedAntigravityCredentials {
402
453
  accessToken: string;
@@ -429,6 +480,17 @@ async function findAntigravityCredentials(modelRegistry: ModelRegistry): Promise
429
480
  };
430
481
  }
431
482
 
483
+ async function findXAIImageCredentials(modelRegistry?: ModelRegistry): Promise<ImageApiKey | null> {
484
+ if (modelRegistry) {
485
+ const creds = await resolveXAIHttpCredentials(modelRegistry);
486
+ if (creds) return { provider: "xai", apiKey: creds.apiKey };
487
+ return null;
488
+ }
489
+ const apiKey = $env.XAI_API_KEY;
490
+ if (apiKey) return { provider: "xai", apiKey };
491
+ return null;
492
+ }
493
+
432
494
  async function findOpenAIHostedImageCredentials(
433
495
  modelRegistry: ModelRegistry | undefined,
434
496
  activeModel: Model | undefined,
@@ -468,9 +530,13 @@ async function findImageApiKey(
468
530
  const openRouterKey = getEnvApiKey("openrouter");
469
531
  if (openRouterKey) return { provider: "openrouter", apiKey: openRouterKey };
470
532
  // Fall through to auto-detect if preferred provider key not found.
533
+ } else if (preferredImageProvider === "xai") {
534
+ const xai = await findXAIImageCredentials(modelRegistry);
535
+ if (xai) return xai;
536
+ // Fall through to auto-detect if preferred provider key not found.
471
537
  }
472
538
 
473
- // Auto-detect: GPT hosted image generation, then Antigravity, OpenRouter, Gemini.
539
+ // Auto-detect: GPT hosted image generation, then Antigravity, xAI, OpenRouter, Gemini.
474
540
  const openAI = await findOpenAIHostedImageCredentials(modelRegistry, activeModel, sessionId);
475
541
  if (openAI) return openAI;
476
542
 
@@ -479,6 +545,9 @@ async function findImageApiKey(
479
545
  if (antigravity) return antigravity;
480
546
  }
481
547
 
548
+ const xai = await findXAIImageCredentials(modelRegistry);
549
+ if (xai) return xai;
550
+
482
551
  const openRouterKey = getEnvApiKey("openrouter");
483
552
  if (openRouterKey) return { provider: "openrouter", apiKey: openRouterKey };
484
553
 
@@ -857,6 +926,31 @@ function buildAntigravityRequest(
857
926
  };
858
927
  }
859
928
 
929
+ // xAI image-edit cap per docs.x.ai (POST /v1/images/edits supports up to 3
930
+ // source images for multi-reference editing).
931
+ const XAI_MAX_EDIT_IMAGES = 3;
932
+
933
+ // Map the OpenAI-style pixel-size enum (image_size) to xAI's discrete tier.
934
+ // "1024x1024" → "1k"; anything wider (1536x... or ...x1536) → "2k". Absent
935
+ // image_size defaults to "1k", matching hermes-agent's DEFAULT_RESOLUTION
936
+ // (plugins/image_gen/xai/__init__.py:71).
937
+ function resolveXAIResolution(imageSize: string | undefined): "1k" | "2k" {
938
+ if (!imageSize || imageSize === "1024x1024") return "1k";
939
+ return "2k";
940
+ }
941
+
942
+ // Build the discriminated edit body. Caller must ensure images.length is in
943
+ // [1, XAI_MAX_EDIT_IMAGES]; the bound check fires earlier in execute().
944
+ function buildXAIEditPayload(base: XAIImageRequestBase, images: readonly InlineImageData[]): XAIImageRequestBody {
945
+ const refs: readonly XAIImageReference[] = images.map(img => ({
946
+ type: "image_url",
947
+ url: toDataUrl(img),
948
+ }));
949
+ const [first, ...rest] = refs;
950
+ if (first === undefined) return base; // unreachable: caller checked images.length > 0
951
+ return rest.length === 0 ? { ...base, image: first } : { ...base, images: refs };
952
+ }
953
+
860
954
  interface AntigravitySseResult {
861
955
  images: InlineImageData[];
862
956
  text: string[];
@@ -910,7 +1004,7 @@ export const imageGenTool: CustomTool<typeof imageGenSchema, ImageGenToolDetails
910
1004
  const apiKey = await findImageApiKey(ctx.modelRegistry, ctx.model, sessionId);
911
1005
  if (!apiKey) {
912
1006
  throw new Error(
913
- "No image API credentials found. Use a GPT Responses/Codex model with OpenAI credentials, login with google-antigravity, or set OPENROUTER_API_KEY, GEMINI_API_KEY, or GOOGLE_API_KEY.",
1007
+ "No image API credentials found. Use a GPT Responses/Codex model with OpenAI credentials, login with google-antigravity or xAI Grok OAuth, or set XAI_API_KEY, OPENROUTER_API_KEY, GEMINI_API_KEY, or GOOGLE_API_KEY.",
914
1008
  );
915
1009
  }
916
1010
 
@@ -922,8 +1016,11 @@ export const imageGenTool: CustomTool<typeof imageGenSchema, ImageGenToolDetails
922
1016
  ? DEFAULT_ANTIGRAVITY_MODEL
923
1017
  : provider === "openrouter"
924
1018
  ? DEFAULT_OPENROUTER_MODEL
925
- : DEFAULT_MODEL;
1019
+ : provider === "xai"
1020
+ ? DEFAULT_XAI_IMAGE_MODEL
1021
+ : DEFAULT_MODEL;
926
1022
  const resolvedModel = provider === "openrouter" ? resolveOpenRouterModel(model) : model;
1023
+ assertImageAspectRatioSupported(provider, params.aspect_ratio);
927
1024
  const cwd = ctx.sessionManager.getCwd();
928
1025
 
929
1026
  const resolvedImages: InlineImageData[] = [];
@@ -1059,6 +1156,107 @@ export const imageGenTool: CustomTool<typeof imageGenSchema, ImageGenToolDetails
1059
1156
  };
1060
1157
  }
1061
1158
 
1159
+ if (provider === "xai") {
1160
+ if (!ctx.modelRegistry) {
1161
+ throw new Error("Missing modelRegistry for xAI image generation");
1162
+ }
1163
+ const xaiCreds = await resolveXAIHttpCredentials(ctx.modelRegistry, resolvedModel);
1164
+ if (!xaiCreds) {
1165
+ throw new Error(
1166
+ "No xAI credentials. Run /login → xAI Grok OAuth (SuperGrok Subscription) or set XAI_API_KEY.",
1167
+ );
1168
+ }
1169
+
1170
+ const prompt = assemblePrompt(params);
1171
+ const aspectRatio = params.aspect_ratio ?? "1:1";
1172
+ const xaiResolution = resolveXAIResolution(params.image_size);
1173
+
1174
+ const isEdit = resolvedImages.length > 0;
1175
+ if (isEdit && resolvedImages.length > XAI_MAX_EDIT_IMAGES) {
1176
+ throw new Error(
1177
+ `xAI image edits accept up to ${XAI_MAX_EDIT_IMAGES} reference images; got ${resolvedImages.length}.`,
1178
+ );
1179
+ }
1180
+
1181
+ const xaiBaseBody: XAIImageRequestBase = {
1182
+ model: resolvedModel,
1183
+ prompt,
1184
+ aspect_ratio: aspectRatio,
1185
+ resolution: xaiResolution,
1186
+ n: 1,
1187
+ response_format: "b64_json",
1188
+ };
1189
+ const xaiBody: XAIImageRequestBody = isEdit
1190
+ ? buildXAIEditPayload(xaiBaseBody, resolvedImages)
1191
+ : xaiBaseBody;
1192
+ const xaiEndpoint = isEdit ? "/images/edits" : "/images/generations";
1193
+
1194
+ const xaiResponse = await fetch(`${xaiCreds.baseURL}${xaiEndpoint}`, {
1195
+ method: "POST",
1196
+ headers: {
1197
+ Authorization: `Bearer ${xaiCreds.apiKey}`,
1198
+ "Content-Type": "application/json",
1199
+ "User-Agent": ohMyPiXAIUserAgent(),
1200
+ },
1201
+ body: JSON.stringify(xaiBody),
1202
+ signal: requestSignal,
1203
+ });
1204
+
1205
+ const xaiRawText = await xaiResponse.text();
1206
+ if (!xaiResponse.ok) {
1207
+ let message = xaiRawText;
1208
+ try {
1209
+ const parsedErr = JSON.parse(xaiRawText) as { error?: { message?: string } };
1210
+ message = parsedErr.error?.message ?? message;
1211
+ } catch {
1212
+ // Keep raw text.
1213
+ }
1214
+ throw new Error(`xAI image request failed (${xaiResponse.status}): ${message}`);
1215
+ }
1216
+
1217
+ const xaiData = JSON.parse(xaiRawText) as {
1218
+ data?: Array<{ b64_json?: string; url?: string }>;
1219
+ };
1220
+ const xaiInlineImages: InlineImageData[] = [];
1221
+ for (const entry of xaiData.data ?? []) {
1222
+ if (entry.b64_json) {
1223
+ const bytes = Buffer.from(entry.b64_json, "base64");
1224
+ const mimeType = parseImageMetadata(bytes)?.mimeType ?? "image/png";
1225
+ xaiInlineImages.push({ data: entry.b64_json, mimeType });
1226
+ } else if (entry.url) {
1227
+ xaiInlineImages.push(await loadImageFromUrl(entry.url, requestSignal));
1228
+ }
1229
+ }
1230
+
1231
+ if (xaiInlineImages.length === 0) {
1232
+ return {
1233
+ content: [{ type: "text", text: "No image data returned." }],
1234
+ details: {
1235
+ provider,
1236
+ model: resolvedModel,
1237
+ imageCount: 0,
1238
+ imagePaths: [],
1239
+ images: [],
1240
+ },
1241
+ };
1242
+ }
1243
+
1244
+ const xaiImagePaths = await saveImagesToTemp(xaiInlineImages);
1245
+
1246
+ return {
1247
+ content: [
1248
+ { type: "text", text: buildResponseSummary(provider, resolvedModel, xaiImagePaths, undefined) },
1249
+ ],
1250
+ details: {
1251
+ provider,
1252
+ model: resolvedModel,
1253
+ imageCount: xaiInlineImages.length,
1254
+ imagePaths: xaiImagePaths,
1255
+ images: xaiInlineImages,
1256
+ },
1257
+ };
1258
+ }
1259
+
1062
1260
  if (provider === "openrouter") {
1063
1261
  const prompt = assemblePrompt(params);
1064
1262
  const contentParts: OpenRouterContentPart[] = [{ type: "text", text: prompt }];
@@ -92,6 +92,7 @@ export * from "./search";
92
92
  export * from "./search-tool-bm25";
93
93
  export * from "./ssh";
94
94
  export * from "./todo-write";
95
+ export * from "./tts";
95
96
  export * from "./write";
96
97
  export * from "./yield";
97
98
 
@@ -5,6 +5,8 @@ import { normalizeLocalScheme, resolveToCwd } from "./path-utils";
5
5
  import { ToolError } from "./tool-errors";
6
6
 
7
7
  const LOCAL_SCHEME_PREFIX = "local:";
8
+ const PLAN_ALIAS_FILE = "PLAN.md";
9
+ const LOCAL_PLAN_ALIAS = "local://PLAN.md";
8
10
 
9
11
  function resolveRawPath(session: ToolSession, targetPath: string): string {
10
12
  const normalized = normalizeLocalScheme(targetPath);
@@ -18,15 +20,20 @@ function resolveRawPath(session: ToolSession, targetPath: string): string {
18
20
  return resolveToCwd(normalized, session.cwd);
19
21
  }
20
22
 
23
+ function isPlanAliasTarget(session: ToolSession, targetPath: string, resolved: string): boolean {
24
+ const normalized = normalizeLocalScheme(targetPath);
25
+ if (normalized === LOCAL_PLAN_ALIAS) return true;
26
+ return resolved === resolveToCwd(PLAN_ALIAS_FILE, session.cwd);
27
+ }
28
+
21
29
  /**
22
30
  * Resolve a write/edit target to its absolute filesystem path.
23
31
  *
24
- * In plan mode, transparently redirects targets whose basename matches the
25
- * plan file's basename (e.g. a bare `PLAN.md` or `./PLAN.md`) to the canonical
26
- * plan file location at `state.planFilePath`. This lets `write` and `edit`
27
- * accept the unqualified plan filename and have the change land at the
28
- * session-scoped `local://PLAN.md` artifact instead of a stray cwd-relative
29
- * file the plan-mode guard would otherwise reject.
32
+ * In plan mode, transparently redirects `PLAN.md` aliases and targets whose
33
+ * basename matches the plan file's basename to the canonical plan file
34
+ * location at `state.planFilePath`. This lets `write` and `edit` accept the
35
+ * habitual plan filename after approval even when the active artifact has a
36
+ * titled path such as `local://APPROVED.md`.
30
37
  *
31
38
  * Outside plan mode (or when the basename does not match) this is a no-op.
32
39
  */
@@ -38,6 +45,7 @@ export function resolvePlanPath(session: ToolSession, targetPath: string): strin
38
45
 
39
46
  const planResolved = resolveRawPath(session, state.planFilePath);
40
47
  if (resolved === planResolved) return resolved;
48
+ if (isPlanAliasTarget(session, targetPath, resolved)) return planResolved;
41
49
  if (path.basename(resolved) !== path.basename(planResolved)) return resolved;
42
50
 
43
51
  return planResolved;