@oh-my-pi/pi-coding-agent 15.5.6 → 15.5.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +21 -1
- package/dist/types/config/settings-schema.d.ts +50 -2
- package/dist/types/lib/xai-http.d.ts +40 -0
- package/dist/types/session/agent-session.d.ts +1 -0
- package/dist/types/tools/fetch.d.ts +16 -0
- package/dist/types/tools/image-gen.d.ts +6 -2
- package/dist/types/tools/index.d.ts +1 -0
- package/dist/types/tools/plan-mode-guard.d.ts +5 -6
- package/dist/types/tools/tts.d.ts +18 -0
- package/package.json +8 -8
- package/src/config/model-registry.ts +41 -9
- package/src/config/settings-schema.ts +43 -2
- package/src/lib/xai-http.ts +124 -0
- package/src/modes/controllers/selector-controller.ts +7 -2
- package/src/modes/interactive-mode.ts +1 -1
- package/src/sdk.ts +15 -9
- package/src/session/agent-session.ts +30 -3
- package/src/tools/fetch.ts +52 -24
- package/src/tools/image-gen.ts +205 -7
- package/src/tools/index.ts +1 -0
- package/src/tools/plan-mode-guard.ts +14 -6
- package/src/tools/search.ts +2 -2
- package/src/tools/tts.ts +133 -0
|
@@ -29,7 +29,12 @@ import {
|
|
|
29
29
|
import type { InteractiveModeContext } from "../../modes/types";
|
|
30
30
|
import { type SessionInfo, SessionManager } from "../../session/session-manager";
|
|
31
31
|
import { FileSessionStorage } from "../../session/session-storage";
|
|
32
|
-
import {
|
|
32
|
+
import {
|
|
33
|
+
isImageProviderPreference,
|
|
34
|
+
isSearchProviderPreference,
|
|
35
|
+
setPreferredImageProvider,
|
|
36
|
+
setPreferredSearchProvider,
|
|
37
|
+
} from "../../tools";
|
|
33
38
|
import { setSessionTerminalTitle } from "../../utils/title-generator";
|
|
34
39
|
import { AgentDashboard } from "../components/agent-dashboard";
|
|
35
40
|
import { AssistantMessageComponent } from "../components/assistant-message";
|
|
@@ -374,7 +379,7 @@ export class SelectorController {
|
|
|
374
379
|
}
|
|
375
380
|
break;
|
|
376
381
|
case "providers.image":
|
|
377
|
-
if (value
|
|
382
|
+
if (isImageProviderPreference(value)) {
|
|
378
383
|
setPreferredImageProvider(value);
|
|
379
384
|
}
|
|
380
385
|
break;
|
|
@@ -1012,7 +1012,7 @@ export class InteractiveMode implements InteractiveModeContext {
|
|
|
1012
1012
|
}
|
|
1013
1013
|
|
|
1014
1014
|
async #getPlanFilePath(): Promise<string> {
|
|
1015
|
-
return "local://PLAN.md";
|
|
1015
|
+
return this.session.getPlanReferencePath() || "local://PLAN.md";
|
|
1016
1016
|
}
|
|
1017
1017
|
|
|
1018
1018
|
#resolvePlanFilePath(planFilePath: string): string {
|
package/src/sdk.ts
CHANGED
|
@@ -129,6 +129,7 @@ import {
|
|
|
129
129
|
FindTool,
|
|
130
130
|
getSearchTools,
|
|
131
131
|
HIDDEN_TOOLS,
|
|
132
|
+
isImageProviderPreference,
|
|
132
133
|
isSearchProviderPreference,
|
|
133
134
|
type LspStartupServerInfo,
|
|
134
135
|
loadSshTool,
|
|
@@ -148,6 +149,7 @@ import { ToolContextStore } from "./tools/context";
|
|
|
148
149
|
import { getImageGenTools } from "./tools/image-gen";
|
|
149
150
|
import { wrapToolWithMetaNotice } from "./tools/output-meta";
|
|
150
151
|
import { queueResolveHandler } from "./tools/resolve";
|
|
152
|
+
import { ttsTool } from "./tools/tts";
|
|
151
153
|
import { EventBus } from "./utils/event-bus";
|
|
152
154
|
import { buildNamedToolChoice } from "./utils/tool-choice";
|
|
153
155
|
import { buildWorkspaceTree, type WorkspaceTree } from "./workspace-tree";
|
|
@@ -893,12 +895,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
|
|
|
893
895
|
}
|
|
894
896
|
|
|
895
897
|
const imageProvider = settings.get("providers.image");
|
|
896
|
-
if (
|
|
897
|
-
imageProvider === "auto" ||
|
|
898
|
-
imageProvider === "openai" ||
|
|
899
|
-
imageProvider === "gemini" ||
|
|
900
|
-
imageProvider === "openrouter"
|
|
901
|
-
) {
|
|
898
|
+
if (isImageProviderPreference(imageProvider)) {
|
|
902
899
|
setPreferredImageProvider(imageProvider);
|
|
903
900
|
}
|
|
904
901
|
|
|
@@ -1319,6 +1316,10 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
|
|
|
1319
1316
|
customTools.push(...(imageGenTools as unknown as CustomTool[]));
|
|
1320
1317
|
}
|
|
1321
1318
|
|
|
1319
|
+
if (settings.get("tts.enabled")) {
|
|
1320
|
+
customTools.push(ttsTool as unknown as CustomTool);
|
|
1321
|
+
}
|
|
1322
|
+
|
|
1322
1323
|
// Add web search tools
|
|
1323
1324
|
if (options.toolNames?.includes("web_search")) {
|
|
1324
1325
|
customTools.push(...getSearchTools());
|
|
@@ -1876,9 +1877,13 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
|
|
|
1876
1877
|
}
|
|
1877
1878
|
return key;
|
|
1878
1879
|
},
|
|
1879
|
-
streamFn: (streamModel, context, streamOptions) =>
|
|
1880
|
-
|
|
1880
|
+
streamFn: (streamModel, context, streamOptions) => {
|
|
1881
|
+
const openrouterRoutingPreset = settings.get("providers.openrouterVariant");
|
|
1882
|
+
const openrouterVariant =
|
|
1883
|
+
openrouterRoutingPreset && openrouterRoutingPreset !== "default" ? openrouterRoutingPreset : undefined;
|
|
1884
|
+
return streamSimple(streamModel, context, {
|
|
1881
1885
|
...streamOptions,
|
|
1886
|
+
openrouterVariant: streamOptions?.openrouterVariant ?? openrouterVariant,
|
|
1882
1887
|
onAuthError: async (provider, oldKey, error) => {
|
|
1883
1888
|
await modelRegistry.authStorage.invalidateCredentialMatching(provider, oldKey, {
|
|
1884
1889
|
signal: streamOptions?.signal,
|
|
@@ -1890,7 +1895,8 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
|
|
|
1890
1895
|
});
|
|
1891
1896
|
return modelRegistry.getApiKeyForProvider(provider, agent.sessionId);
|
|
1892
1897
|
},
|
|
1893
|
-
})
|
|
1898
|
+
});
|
|
1899
|
+
},
|
|
1894
1900
|
cursorExecHandlers,
|
|
1895
1901
|
transformToolCallArguments: (args, _toolName) => {
|
|
1896
1902
|
let result = args;
|
|
@@ -3631,9 +3631,17 @@ export class AgentSession {
|
|
|
3631
3631
|
const sessionOnResponse = this.#onResponse;
|
|
3632
3632
|
const sessionMetadata = this.agent.metadataForProvider(provider);
|
|
3633
3633
|
const sessionOnSseEvent = this.#onSseEvent;
|
|
3634
|
-
|
|
3635
|
-
|
|
3636
|
-
const
|
|
3634
|
+
const openrouterRoutingPreset =
|
|
3635
|
+
provider === "openrouter" ? this.settings.get("providers.openrouterVariant") : "default";
|
|
3636
|
+
const openrouterVariant =
|
|
3637
|
+
openrouterRoutingPreset !== "default" && options.openrouterVariant === undefined
|
|
3638
|
+
? openrouterRoutingPreset
|
|
3639
|
+
: undefined;
|
|
3640
|
+
if (!sessionOnPayload && !sessionOnResponse && !sessionMetadata && !sessionOnSseEvent && !openrouterVariant)
|
|
3641
|
+
return options;
|
|
3642
|
+
|
|
3643
|
+
const preparedOptions: SimpleStreamOptions =
|
|
3644
|
+
openrouterVariant === undefined ? { ...options } : { ...options, openrouterVariant };
|
|
3637
3645
|
|
|
3638
3646
|
// Stamp session metadata (e.g. user_id={session_id}) onto direct-call requests so
|
|
3639
3647
|
// they share the same session bucket as Agent.prompt-routed requests on Anthropic
|
|
@@ -3758,6 +3766,10 @@ export class AgentSession {
|
|
|
3758
3766
|
this.#planReferencePath = path;
|
|
3759
3767
|
}
|
|
3760
3768
|
|
|
3769
|
+
getPlanReferencePath(): string {
|
|
3770
|
+
return this.#planReferencePath;
|
|
3771
|
+
}
|
|
3772
|
+
|
|
3761
3773
|
get clientBridge(): ClientBridge | undefined {
|
|
3762
3774
|
return this.#clientBridge;
|
|
3763
3775
|
}
|
|
@@ -5575,6 +5587,11 @@ export class AgentSession {
|
|
|
5575
5587
|
initiatorOverride: "agent",
|
|
5576
5588
|
metadata: this.agent.metadataForProvider(model.provider),
|
|
5577
5589
|
telemetry: resolveTelemetry(this.agent.telemetry, this.sessionId),
|
|
5590
|
+
// Honor the user's /model thinking selection on the handoff
|
|
5591
|
+
// path. Clamped per-model inside generateHandoff via
|
|
5592
|
+
// resolveCompactionEffort so unsupported-effort models don't
|
|
5593
|
+
// trip requireSupportedEffort.
|
|
5594
|
+
thinkingLevel: this.thinkingLevel,
|
|
5578
5595
|
},
|
|
5579
5596
|
handoffSignal,
|
|
5580
5597
|
);
|
|
@@ -6345,6 +6362,11 @@ export class AgentSession {
|
|
|
6345
6362
|
metadata: this.agent.metadataForProvider(candidate.provider),
|
|
6346
6363
|
convertToLlm,
|
|
6347
6364
|
telemetry,
|
|
6365
|
+
// Honor the user's /model thinking selection (incl. `off`) on
|
|
6366
|
+
// the manual `/compact` path. Clamped per-model inside compact()
|
|
6367
|
+
// via resolveCompactionEffort so unsupported-effort models
|
|
6368
|
+
// (xai-oauth/grok-build) don't trip requireSupportedEffort.
|
|
6369
|
+
thinkingLevel: this.thinkingLevel,
|
|
6348
6370
|
});
|
|
6349
6371
|
} catch (error) {
|
|
6350
6372
|
if (!this.#isCompactionAuthFailure(error)) {
|
|
@@ -6617,6 +6639,11 @@ export class AgentSession {
|
|
|
6617
6639
|
initiatorOverride: "agent",
|
|
6618
6640
|
convertToLlm,
|
|
6619
6641
|
telemetry,
|
|
6642
|
+
// Honor the user's /model thinking selection on the
|
|
6643
|
+
// auto-compaction path — the most-fired compaction
|
|
6644
|
+
// site. Clamped per-model inside compact() via
|
|
6645
|
+
// resolveCompactionEffort.
|
|
6646
|
+
thinkingLevel: this.thinkingLevel,
|
|
6620
6647
|
});
|
|
6621
6648
|
break;
|
|
6622
6649
|
} catch (error) {
|
package/src/tools/fetch.ts
CHANGED
|
@@ -562,9 +562,22 @@ function parseFeedToMarkdown(content: string, maxItems = 10): string {
|
|
|
562
562
|
}
|
|
563
563
|
|
|
564
564
|
/**
|
|
565
|
-
*
|
|
565
|
+
* Cap on any single remote reader-mode request (Parallel, Jina) so a stalled
|
|
566
|
+
* remote endpoint cannot consume the whole reader-mode budget and starve the
|
|
567
|
+
* local fallback renderers (trafilatura, lynx, native). See #1449.
|
|
566
568
|
*/
|
|
567
|
-
|
|
569
|
+
const REMOTE_READER_MAX_MS = 10_000;
|
|
570
|
+
|
|
571
|
+
/**
|
|
572
|
+
* Render HTML to markdown using Parallel, jina, trafilatura, lynx, then the
|
|
573
|
+
* in-process native converter. The overall `timeout` budget bounds the call,
|
|
574
|
+
* but remote reader requests are additionally capped at `REMOTE_READER_MAX_MS`
|
|
575
|
+
* so that a hung remote endpoint cannot prevent local fallbacks from running.
|
|
576
|
+
* Only a real `userSignal` cancellation aborts the chain — remote per-attempt
|
|
577
|
+
* timeouts and the overall reader-mode timeout still allow later renderers
|
|
578
|
+
* (especially the purely-local native converter) to be tried.
|
|
579
|
+
*/
|
|
580
|
+
export async function renderHtmlToText(
|
|
568
581
|
url: string,
|
|
569
582
|
html: string,
|
|
570
583
|
timeout: number,
|
|
@@ -572,14 +585,15 @@ async function renderHtmlToText(
|
|
|
572
585
|
userSignal: AbortSignal | undefined,
|
|
573
586
|
storage: AgentStorage | null,
|
|
574
587
|
): Promise<{ content: string; ok: boolean; method: string }> {
|
|
575
|
-
const
|
|
588
|
+
const overallSignal = ptree.combineSignals(userSignal, timeout * 1000);
|
|
576
589
|
const execOptions = {
|
|
577
590
|
mode: "group" as const,
|
|
578
591
|
allowNonZero: true,
|
|
579
592
|
allowAbort: true,
|
|
580
593
|
stderr: "full" as const,
|
|
581
|
-
signal,
|
|
594
|
+
signal: overallSignal,
|
|
582
595
|
};
|
|
596
|
+
const remoteBudgetMs = Math.min(timeout * 1000, REMOTE_READER_MAX_MS);
|
|
583
597
|
|
|
584
598
|
// Try Parallel extract first when credentials are configured
|
|
585
599
|
if (settings.get("providers.parallelFetch") && findParallelApiKey(storage)) {
|
|
@@ -590,7 +604,7 @@ async function renderHtmlToText(
|
|
|
590
604
|
objective: "Extract the main content",
|
|
591
605
|
excerpts: true,
|
|
592
606
|
fullContent: false,
|
|
593
|
-
signal,
|
|
607
|
+
signal: ptree.combineSignals(userSignal, remoteBudgetMs),
|
|
594
608
|
},
|
|
595
609
|
storage,
|
|
596
610
|
);
|
|
@@ -602,17 +616,18 @@ async function renderHtmlToText(
|
|
|
602
616
|
}
|
|
603
617
|
}
|
|
604
618
|
} catch {
|
|
605
|
-
// Parallel extract failed
|
|
606
|
-
|
|
619
|
+
// Parallel extract failed or stalled; honour real cancellation only.
|
|
620
|
+
userSignal?.throwIfAborted();
|
|
607
621
|
}
|
|
608
622
|
}
|
|
609
623
|
|
|
610
|
-
// Try jina
|
|
624
|
+
// Try jina reader API with its own sub-budget so a stall cannot starve
|
|
625
|
+
// later fallbacks (#1449).
|
|
611
626
|
try {
|
|
612
627
|
const jinaUrl = `https://r.jina.ai/${url}`;
|
|
613
628
|
const response = await fetch(jinaUrl, {
|
|
614
629
|
headers: { Accept: "text/markdown" },
|
|
615
|
-
signal,
|
|
630
|
+
signal: ptree.combineSignals(userSignal, remoteBudgetMs),
|
|
616
631
|
});
|
|
617
632
|
if (response.ok) {
|
|
618
633
|
const content = await response.text();
|
|
@@ -621,37 +636,50 @@ async function renderHtmlToText(
|
|
|
621
636
|
}
|
|
622
637
|
}
|
|
623
638
|
} catch {
|
|
624
|
-
// Jina failed
|
|
625
|
-
|
|
639
|
+
// Jina failed or stalled; honour real cancellation only.
|
|
640
|
+
userSignal?.throwIfAborted();
|
|
626
641
|
}
|
|
627
642
|
|
|
628
643
|
// Try trafilatura (auto-install via uv/pip)
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
644
|
+
try {
|
|
645
|
+
const trafilatura = await ensureTool("trafilatura", { signal: overallSignal, silent: true });
|
|
646
|
+
if (trafilatura) {
|
|
647
|
+
const result = await ptree.exec([trafilatura, "-u", url, "--output-format", "markdown"], execOptions);
|
|
648
|
+
if (result.ok && result.stdout.trim().length > 100) {
|
|
649
|
+
return { content: result.stdout, ok: true, method: "trafilatura" };
|
|
650
|
+
}
|
|
634
651
|
}
|
|
652
|
+
} catch {
|
|
653
|
+
// trafilatura unavailable or stalled; continue to next method.
|
|
654
|
+
userSignal?.throwIfAborted();
|
|
635
655
|
}
|
|
636
656
|
|
|
637
657
|
// Try lynx (can't auto-install, system package)
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
658
|
+
try {
|
|
659
|
+
const lynx = hasCommand("lynx");
|
|
660
|
+
if (lynx) {
|
|
661
|
+
const result = await ptree.exec(["lynx", "-dump", "-nolist", "-width", "250", url], execOptions);
|
|
662
|
+
if (result.ok) {
|
|
663
|
+
return { content: result.stdout, ok: true, method: "lynx" };
|
|
664
|
+
}
|
|
643
665
|
}
|
|
666
|
+
} catch {
|
|
667
|
+
// lynx failed or stalled; continue to native converter.
|
|
668
|
+
userSignal?.throwIfAborted();
|
|
644
669
|
}
|
|
645
670
|
|
|
646
|
-
// Fall back to native converter (
|
|
671
|
+
// Fall back to native converter (purely local, no network/subprocess).
|
|
672
|
+
// Always attempted: even if remote renderers and subprocesses were aborted
|
|
673
|
+
// by the overall reader-mode timeout, this still works on already-loaded
|
|
674
|
+
// HTML (#1449).
|
|
647
675
|
try {
|
|
648
676
|
const content = await htmlToMarkdown(html, { cleanContent: true });
|
|
649
677
|
if (content.trim().length > 100 && !isLowQualityOutput(content)) {
|
|
650
678
|
return { content, ok: true, method: "native" };
|
|
651
679
|
}
|
|
652
680
|
} catch {
|
|
653
|
-
// Native converter failed
|
|
654
|
-
|
|
681
|
+
// Native converter failed; nothing else to try.
|
|
682
|
+
userSignal?.throwIfAborted();
|
|
655
683
|
}
|
|
656
684
|
return { content: "", ok: false, method: "none" };
|
|
657
685
|
}
|
package/src/tools/image-gen.ts
CHANGED
|
@@ -22,12 +22,14 @@ import * as z from "zod/v4";
|
|
|
22
22
|
import packageJson from "../../package.json" with { type: "json" };
|
|
23
23
|
import { isAuthenticated, type ModelRegistry } from "../config/model-registry";
|
|
24
24
|
import type { CustomTool } from "../extensibility/custom-tools/types";
|
|
25
|
+
import { ohMyPiXAIUserAgent, resolveXAIHttpCredentials } from "../lib/xai-http";
|
|
25
26
|
import imageGenDescription from "../prompts/tools/image-gen.md" with { type: "text" };
|
|
26
27
|
import { resolveReadPath } from "./path-utils";
|
|
27
28
|
|
|
28
29
|
const DEFAULT_MODEL = "gemini-3-pro-image-preview";
|
|
29
30
|
const DEFAULT_OPENROUTER_MODEL = "google/gemini-3-pro-image-preview";
|
|
30
31
|
const DEFAULT_ANTIGRAVITY_MODEL = "gemini-3-pro-image";
|
|
32
|
+
const DEFAULT_XAI_IMAGE_MODEL = "grok-imagine-image";
|
|
31
33
|
const IMAGE_TIMEOUT = 3 * 60 * 1000; // 3 minutes
|
|
32
34
|
const MAX_IMAGE_SIZE = 35 * 1024 * 1024;
|
|
33
35
|
const DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1";
|
|
@@ -38,7 +40,9 @@ const ANTIGRAVITY_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googleapis.com"
|
|
|
38
40
|
const IMAGE_SYSTEM_INSTRUCTION =
|
|
39
41
|
"You are an AI image generator. Generate images based on user descriptions. Focus on creating high-quality, visually appealing images that match the user's request.";
|
|
40
42
|
|
|
41
|
-
type ImageProvider = "antigravity" | "gemini" | "openai" | "openai-codex" | "openrouter";
|
|
43
|
+
export type ImageProvider = "antigravity" | "gemini" | "openai" | "openai-codex" | "openrouter" | "xai";
|
|
44
|
+
export type ImageProviderPreference = Exclude<ImageProvider, "openai-codex"> | "auto";
|
|
45
|
+
|
|
42
46
|
interface ImageApiKey {
|
|
43
47
|
provider: ImageProvider;
|
|
44
48
|
apiKey: string;
|
|
@@ -46,8 +50,13 @@ interface ImageApiKey {
|
|
|
46
50
|
model?: Model;
|
|
47
51
|
}
|
|
48
52
|
|
|
53
|
+
const COMMON_IMAGE_ASPECT_RATIOS = ["1:1", "3:4", "4:3", "9:16", "16:9"] as const;
|
|
54
|
+
const XAI_IMAGE_ASPECT_RATIOS = [...COMMON_IMAGE_ASPECT_RATIOS, "3:2", "2:3"] as const;
|
|
55
|
+
const COMMON_IMAGE_ASPECT_RATIO_SET = new Set<string>(COMMON_IMAGE_ASPECT_RATIOS);
|
|
56
|
+
const IMAGE_PROVIDER_PREFERENCES = new Set<string>(["auto", "antigravity", "gemini", "openai", "openrouter", "xai"]);
|
|
57
|
+
|
|
49
58
|
const responseModalitySchema = z.enum(["IMAGE", "TEXT"] as const);
|
|
50
|
-
const aspectRatioSchema = z.enum(
|
|
59
|
+
const aspectRatioSchema = z.enum(XAI_IMAGE_ASPECT_RATIOS).describe("aspect ratio");
|
|
51
60
|
const imageSizeSchema = z.enum(["1024x1024", "1536x1024", "1024x1536"] as const).describe("image size");
|
|
52
61
|
|
|
53
62
|
const inputImageSchema = z
|
|
@@ -274,6 +283,36 @@ interface AntigravityRequest {
|
|
|
274
283
|
requestId?: string;
|
|
275
284
|
}
|
|
276
285
|
|
|
286
|
+
interface XAIImageReference {
|
|
287
|
+
// OpenAI-compat discriminator. Every code example at
|
|
288
|
+
// docs.x.ai/developers/rest-api-reference/inference/images sends this
|
|
289
|
+
// alongside `url`; the schema text doesn't strictly require it, but
|
|
290
|
+
// matching the documented wire format avoids relying on schema-vs-example.
|
|
291
|
+
readonly type: "image_url";
|
|
292
|
+
readonly url: string;
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
interface XAIImageRequestBase {
|
|
296
|
+
readonly model: string;
|
|
297
|
+
readonly prompt: string;
|
|
298
|
+
readonly aspect_ratio: string;
|
|
299
|
+
readonly resolution: "1k" | "2k";
|
|
300
|
+
readonly n: number;
|
|
301
|
+
readonly response_format: "b64_json" | "url";
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
// xAI image request body. Three shapes:
|
|
305
|
+
// 1. text-only generation → POST /v1/images/generations
|
|
306
|
+
// 2. single-source edit (image field) → POST /v1/images/edits
|
|
307
|
+
// 3. multi-reference edit (images field) → POST /v1/images/edits
|
|
308
|
+
// `image` and `images` are mutually exclusive per docs.x.ai; the discriminated
|
|
309
|
+
// union enforces that statically. The runtime cap (XAI_MAX_EDIT_IMAGES) bounds
|
|
310
|
+
// the array length, which TypeScript cannot encode without lossy tuple unions.
|
|
311
|
+
type XAIImageRequestBody =
|
|
312
|
+
| (XAIImageRequestBase & { readonly image?: never; readonly images?: never })
|
|
313
|
+
| (XAIImageRequestBase & { readonly image: XAIImageReference; readonly images?: never })
|
|
314
|
+
| (XAIImageRequestBase & { readonly images: readonly XAIImageReference[]; readonly image?: never });
|
|
315
|
+
|
|
277
316
|
interface AntigravityResponseChunk {
|
|
278
317
|
response?: {
|
|
279
318
|
candidates?: Array<{
|
|
@@ -391,12 +430,24 @@ function extractOpenRouterImageUrls(message: OpenRouterMessage | undefined): str
|
|
|
391
430
|
}
|
|
392
431
|
|
|
393
432
|
/** Preferred provider set via settings (default: auto) */
|
|
394
|
-
let preferredImageProvider:
|
|
433
|
+
let preferredImageProvider: ImageProviderPreference = "auto";
|
|
434
|
+
|
|
435
|
+
export function isImageProviderPreference(value: unknown): value is ImageProviderPreference {
|
|
436
|
+
return typeof value === "string" && IMAGE_PROVIDER_PREFERENCES.has(value);
|
|
437
|
+
}
|
|
395
438
|
|
|
396
439
|
/** Set the preferred image provider from settings */
|
|
397
|
-
export function setPreferredImageProvider(provider:
|
|
440
|
+
export function setPreferredImageProvider(provider: ImageProviderPreference): void {
|
|
398
441
|
preferredImageProvider = provider;
|
|
399
442
|
}
|
|
443
|
+
function assertImageAspectRatioSupported(provider: ImageProvider, aspectRatio: ImageGenParams["aspect_ratio"]): void {
|
|
444
|
+
if (!aspectRatio || provider === "xai" || COMMON_IMAGE_ASPECT_RATIO_SET.has(aspectRatio)) {
|
|
445
|
+
return;
|
|
446
|
+
}
|
|
447
|
+
throw new Error(
|
|
448
|
+
`Aspect ratio ${aspectRatio} is only supported by xAI image generation. Set providers.image to xai or use one of ${COMMON_IMAGE_ASPECT_RATIOS.join(", ")}.`,
|
|
449
|
+
);
|
|
450
|
+
}
|
|
400
451
|
|
|
401
452
|
interface ParsedAntigravityCredentials {
|
|
402
453
|
accessToken: string;
|
|
@@ -429,6 +480,17 @@ async function findAntigravityCredentials(modelRegistry: ModelRegistry): Promise
|
|
|
429
480
|
};
|
|
430
481
|
}
|
|
431
482
|
|
|
483
|
+
async function findXAIImageCredentials(modelRegistry?: ModelRegistry): Promise<ImageApiKey | null> {
|
|
484
|
+
if (modelRegistry) {
|
|
485
|
+
const creds = await resolveXAIHttpCredentials(modelRegistry);
|
|
486
|
+
if (creds) return { provider: "xai", apiKey: creds.apiKey };
|
|
487
|
+
return null;
|
|
488
|
+
}
|
|
489
|
+
const apiKey = $env.XAI_API_KEY;
|
|
490
|
+
if (apiKey) return { provider: "xai", apiKey };
|
|
491
|
+
return null;
|
|
492
|
+
}
|
|
493
|
+
|
|
432
494
|
async function findOpenAIHostedImageCredentials(
|
|
433
495
|
modelRegistry: ModelRegistry | undefined,
|
|
434
496
|
activeModel: Model | undefined,
|
|
@@ -468,9 +530,13 @@ async function findImageApiKey(
|
|
|
468
530
|
const openRouterKey = getEnvApiKey("openrouter");
|
|
469
531
|
if (openRouterKey) return { provider: "openrouter", apiKey: openRouterKey };
|
|
470
532
|
// Fall through to auto-detect if preferred provider key not found.
|
|
533
|
+
} else if (preferredImageProvider === "xai") {
|
|
534
|
+
const xai = await findXAIImageCredentials(modelRegistry);
|
|
535
|
+
if (xai) return xai;
|
|
536
|
+
// Fall through to auto-detect if preferred provider key not found.
|
|
471
537
|
}
|
|
472
538
|
|
|
473
|
-
// Auto-detect: GPT hosted image generation, then Antigravity, OpenRouter, Gemini.
|
|
539
|
+
// Auto-detect: GPT hosted image generation, then Antigravity, xAI, OpenRouter, Gemini.
|
|
474
540
|
const openAI = await findOpenAIHostedImageCredentials(modelRegistry, activeModel, sessionId);
|
|
475
541
|
if (openAI) return openAI;
|
|
476
542
|
|
|
@@ -479,6 +545,9 @@ async function findImageApiKey(
|
|
|
479
545
|
if (antigravity) return antigravity;
|
|
480
546
|
}
|
|
481
547
|
|
|
548
|
+
const xai = await findXAIImageCredentials(modelRegistry);
|
|
549
|
+
if (xai) return xai;
|
|
550
|
+
|
|
482
551
|
const openRouterKey = getEnvApiKey("openrouter");
|
|
483
552
|
if (openRouterKey) return { provider: "openrouter", apiKey: openRouterKey };
|
|
484
553
|
|
|
@@ -857,6 +926,31 @@ function buildAntigravityRequest(
|
|
|
857
926
|
};
|
|
858
927
|
}
|
|
859
928
|
|
|
929
|
+
// xAI image-edit cap per docs.x.ai (POST /v1/images/edits supports up to 3
|
|
930
|
+
// source images for multi-reference editing).
|
|
931
|
+
const XAI_MAX_EDIT_IMAGES = 3;
|
|
932
|
+
|
|
933
|
+
// Map the OpenAI-style pixel-size enum (image_size) to xAI's discrete tier.
|
|
934
|
+
// "1024x1024" → "1k"; anything wider (1536x... or ...x1536) → "2k". Absent
|
|
935
|
+
// image_size defaults to "1k", matching hermes-agent's DEFAULT_RESOLUTION
|
|
936
|
+
// (plugins/image_gen/xai/__init__.py:71).
|
|
937
|
+
function resolveXAIResolution(imageSize: string | undefined): "1k" | "2k" {
|
|
938
|
+
if (!imageSize || imageSize === "1024x1024") return "1k";
|
|
939
|
+
return "2k";
|
|
940
|
+
}
|
|
941
|
+
|
|
942
|
+
// Build the discriminated edit body. Caller must ensure images.length is in
|
|
943
|
+
// [1, XAI_MAX_EDIT_IMAGES]; the bound check fires earlier in execute().
|
|
944
|
+
function buildXAIEditPayload(base: XAIImageRequestBase, images: readonly InlineImageData[]): XAIImageRequestBody {
|
|
945
|
+
const refs: readonly XAIImageReference[] = images.map(img => ({
|
|
946
|
+
type: "image_url",
|
|
947
|
+
url: toDataUrl(img),
|
|
948
|
+
}));
|
|
949
|
+
const [first, ...rest] = refs;
|
|
950
|
+
if (first === undefined) return base; // unreachable: caller checked images.length > 0
|
|
951
|
+
return rest.length === 0 ? { ...base, image: first } : { ...base, images: refs };
|
|
952
|
+
}
|
|
953
|
+
|
|
860
954
|
interface AntigravitySseResult {
|
|
861
955
|
images: InlineImageData[];
|
|
862
956
|
text: string[];
|
|
@@ -910,7 +1004,7 @@ export const imageGenTool: CustomTool<typeof imageGenSchema, ImageGenToolDetails
|
|
|
910
1004
|
const apiKey = await findImageApiKey(ctx.modelRegistry, ctx.model, sessionId);
|
|
911
1005
|
if (!apiKey) {
|
|
912
1006
|
throw new Error(
|
|
913
|
-
"No image API credentials found. Use a GPT Responses/Codex model with OpenAI credentials, login with google-antigravity, or set OPENROUTER_API_KEY, GEMINI_API_KEY, or GOOGLE_API_KEY.",
|
|
1007
|
+
"No image API credentials found. Use a GPT Responses/Codex model with OpenAI credentials, login with google-antigravity or xAI Grok OAuth, or set XAI_API_KEY, OPENROUTER_API_KEY, GEMINI_API_KEY, or GOOGLE_API_KEY.",
|
|
914
1008
|
);
|
|
915
1009
|
}
|
|
916
1010
|
|
|
@@ -922,8 +1016,11 @@ export const imageGenTool: CustomTool<typeof imageGenSchema, ImageGenToolDetails
|
|
|
922
1016
|
? DEFAULT_ANTIGRAVITY_MODEL
|
|
923
1017
|
: provider === "openrouter"
|
|
924
1018
|
? DEFAULT_OPENROUTER_MODEL
|
|
925
|
-
:
|
|
1019
|
+
: provider === "xai"
|
|
1020
|
+
? DEFAULT_XAI_IMAGE_MODEL
|
|
1021
|
+
: DEFAULT_MODEL;
|
|
926
1022
|
const resolvedModel = provider === "openrouter" ? resolveOpenRouterModel(model) : model;
|
|
1023
|
+
assertImageAspectRatioSupported(provider, params.aspect_ratio);
|
|
927
1024
|
const cwd = ctx.sessionManager.getCwd();
|
|
928
1025
|
|
|
929
1026
|
const resolvedImages: InlineImageData[] = [];
|
|
@@ -1059,6 +1156,107 @@ export const imageGenTool: CustomTool<typeof imageGenSchema, ImageGenToolDetails
|
|
|
1059
1156
|
};
|
|
1060
1157
|
}
|
|
1061
1158
|
|
|
1159
|
+
if (provider === "xai") {
|
|
1160
|
+
if (!ctx.modelRegistry) {
|
|
1161
|
+
throw new Error("Missing modelRegistry for xAI image generation");
|
|
1162
|
+
}
|
|
1163
|
+
const xaiCreds = await resolveXAIHttpCredentials(ctx.modelRegistry, resolvedModel);
|
|
1164
|
+
if (!xaiCreds) {
|
|
1165
|
+
throw new Error(
|
|
1166
|
+
"No xAI credentials. Run /login → xAI Grok OAuth (SuperGrok Subscription) or set XAI_API_KEY.",
|
|
1167
|
+
);
|
|
1168
|
+
}
|
|
1169
|
+
|
|
1170
|
+
const prompt = assemblePrompt(params);
|
|
1171
|
+
const aspectRatio = params.aspect_ratio ?? "1:1";
|
|
1172
|
+
const xaiResolution = resolveXAIResolution(params.image_size);
|
|
1173
|
+
|
|
1174
|
+
const isEdit = resolvedImages.length > 0;
|
|
1175
|
+
if (isEdit && resolvedImages.length > XAI_MAX_EDIT_IMAGES) {
|
|
1176
|
+
throw new Error(
|
|
1177
|
+
`xAI image edits accept up to ${XAI_MAX_EDIT_IMAGES} reference images; got ${resolvedImages.length}.`,
|
|
1178
|
+
);
|
|
1179
|
+
}
|
|
1180
|
+
|
|
1181
|
+
const xaiBaseBody: XAIImageRequestBase = {
|
|
1182
|
+
model: resolvedModel,
|
|
1183
|
+
prompt,
|
|
1184
|
+
aspect_ratio: aspectRatio,
|
|
1185
|
+
resolution: xaiResolution,
|
|
1186
|
+
n: 1,
|
|
1187
|
+
response_format: "b64_json",
|
|
1188
|
+
};
|
|
1189
|
+
const xaiBody: XAIImageRequestBody = isEdit
|
|
1190
|
+
? buildXAIEditPayload(xaiBaseBody, resolvedImages)
|
|
1191
|
+
: xaiBaseBody;
|
|
1192
|
+
const xaiEndpoint = isEdit ? "/images/edits" : "/images/generations";
|
|
1193
|
+
|
|
1194
|
+
const xaiResponse = await fetch(`${xaiCreds.baseURL}${xaiEndpoint}`, {
|
|
1195
|
+
method: "POST",
|
|
1196
|
+
headers: {
|
|
1197
|
+
Authorization: `Bearer ${xaiCreds.apiKey}`,
|
|
1198
|
+
"Content-Type": "application/json",
|
|
1199
|
+
"User-Agent": ohMyPiXAIUserAgent(),
|
|
1200
|
+
},
|
|
1201
|
+
body: JSON.stringify(xaiBody),
|
|
1202
|
+
signal: requestSignal,
|
|
1203
|
+
});
|
|
1204
|
+
|
|
1205
|
+
const xaiRawText = await xaiResponse.text();
|
|
1206
|
+
if (!xaiResponse.ok) {
|
|
1207
|
+
let message = xaiRawText;
|
|
1208
|
+
try {
|
|
1209
|
+
const parsedErr = JSON.parse(xaiRawText) as { error?: { message?: string } };
|
|
1210
|
+
message = parsedErr.error?.message ?? message;
|
|
1211
|
+
} catch {
|
|
1212
|
+
// Keep raw text.
|
|
1213
|
+
}
|
|
1214
|
+
throw new Error(`xAI image request failed (${xaiResponse.status}): ${message}`);
|
|
1215
|
+
}
|
|
1216
|
+
|
|
1217
|
+
const xaiData = JSON.parse(xaiRawText) as {
|
|
1218
|
+
data?: Array<{ b64_json?: string; url?: string }>;
|
|
1219
|
+
};
|
|
1220
|
+
const xaiInlineImages: InlineImageData[] = [];
|
|
1221
|
+
for (const entry of xaiData.data ?? []) {
|
|
1222
|
+
if (entry.b64_json) {
|
|
1223
|
+
const bytes = Buffer.from(entry.b64_json, "base64");
|
|
1224
|
+
const mimeType = parseImageMetadata(bytes)?.mimeType ?? "image/png";
|
|
1225
|
+
xaiInlineImages.push({ data: entry.b64_json, mimeType });
|
|
1226
|
+
} else if (entry.url) {
|
|
1227
|
+
xaiInlineImages.push(await loadImageFromUrl(entry.url, requestSignal));
|
|
1228
|
+
}
|
|
1229
|
+
}
|
|
1230
|
+
|
|
1231
|
+
if (xaiInlineImages.length === 0) {
|
|
1232
|
+
return {
|
|
1233
|
+
content: [{ type: "text", text: "No image data returned." }],
|
|
1234
|
+
details: {
|
|
1235
|
+
provider,
|
|
1236
|
+
model: resolvedModel,
|
|
1237
|
+
imageCount: 0,
|
|
1238
|
+
imagePaths: [],
|
|
1239
|
+
images: [],
|
|
1240
|
+
},
|
|
1241
|
+
};
|
|
1242
|
+
}
|
|
1243
|
+
|
|
1244
|
+
const xaiImagePaths = await saveImagesToTemp(xaiInlineImages);
|
|
1245
|
+
|
|
1246
|
+
return {
|
|
1247
|
+
content: [
|
|
1248
|
+
{ type: "text", text: buildResponseSummary(provider, resolvedModel, xaiImagePaths, undefined) },
|
|
1249
|
+
],
|
|
1250
|
+
details: {
|
|
1251
|
+
provider,
|
|
1252
|
+
model: resolvedModel,
|
|
1253
|
+
imageCount: xaiInlineImages.length,
|
|
1254
|
+
imagePaths: xaiImagePaths,
|
|
1255
|
+
images: xaiInlineImages,
|
|
1256
|
+
},
|
|
1257
|
+
};
|
|
1258
|
+
}
|
|
1259
|
+
|
|
1062
1260
|
if (provider === "openrouter") {
|
|
1063
1261
|
const prompt = assemblePrompt(params);
|
|
1064
1262
|
const contentParts: OpenRouterContentPart[] = [{ type: "text", text: prompt }];
|
package/src/tools/index.ts
CHANGED
|
@@ -5,6 +5,8 @@ import { normalizeLocalScheme, resolveToCwd } from "./path-utils";
|
|
|
5
5
|
import { ToolError } from "./tool-errors";
|
|
6
6
|
|
|
7
7
|
const LOCAL_SCHEME_PREFIX = "local:";
|
|
8
|
+
const PLAN_ALIAS_FILE = "PLAN.md";
|
|
9
|
+
const LOCAL_PLAN_ALIAS = "local://PLAN.md";
|
|
8
10
|
|
|
9
11
|
function resolveRawPath(session: ToolSession, targetPath: string): string {
|
|
10
12
|
const normalized = normalizeLocalScheme(targetPath);
|
|
@@ -18,15 +20,20 @@ function resolveRawPath(session: ToolSession, targetPath: string): string {
|
|
|
18
20
|
return resolveToCwd(normalized, session.cwd);
|
|
19
21
|
}
|
|
20
22
|
|
|
23
|
+
function isPlanAliasTarget(session: ToolSession, targetPath: string, resolved: string): boolean {
|
|
24
|
+
const normalized = normalizeLocalScheme(targetPath);
|
|
25
|
+
if (normalized === LOCAL_PLAN_ALIAS) return true;
|
|
26
|
+
return resolved === resolveToCwd(PLAN_ALIAS_FILE, session.cwd);
|
|
27
|
+
}
|
|
28
|
+
|
|
21
29
|
/**
|
|
22
30
|
* Resolve a write/edit target to its absolute filesystem path.
|
|
23
31
|
*
|
|
24
|
-
* In plan mode, transparently redirects
|
|
25
|
-
* plan file's basename
|
|
26
|
-
*
|
|
27
|
-
*
|
|
28
|
-
*
|
|
29
|
-
* file the plan-mode guard would otherwise reject.
|
|
32
|
+
* In plan mode, transparently redirects `PLAN.md` aliases and targets whose
|
|
33
|
+
* basename matches the plan file's basename to the canonical plan file
|
|
34
|
+
* location at `state.planFilePath`. This lets `write` and `edit` accept the
|
|
35
|
+
* habitual plan filename after approval even when the active artifact has a
|
|
36
|
+
* titled path such as `local://APPROVED.md`.
|
|
30
37
|
*
|
|
31
38
|
* Outside plan mode (or when the basename does not match) this is a no-op.
|
|
32
39
|
*/
|
|
@@ -38,6 +45,7 @@ export function resolvePlanPath(session: ToolSession, targetPath: string): strin
|
|
|
38
45
|
|
|
39
46
|
const planResolved = resolveRawPath(session, state.planFilePath);
|
|
40
47
|
if (resolved === planResolved) return resolved;
|
|
48
|
+
if (isPlanAliasTarget(session, targetPath, resolved)) return planResolved;
|
|
41
49
|
if (path.basename(resolved) !== path.basename(planResolved)) return resolved;
|
|
42
50
|
|
|
43
51
|
return planResolved;
|