ima2-gen 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/README.md +2 -11
  2. package/bin/commands/backfillThumbs.js +18 -0
  3. package/bin/commands/edit.js +7 -6
  4. package/bin/commands/gen.js +7 -6
  5. package/bin/commands/multimode.js +5 -4
  6. package/bin/commands/node.js +4 -4
  7. package/bin/ima2.js +7 -1
  8. package/bin/lib/config-store.js +1 -1
  9. package/docs/API.md +55 -4
  10. package/docs/CLI.md +9 -3
  11. package/docs/PROMPT_STUDIO.md +3 -1
  12. package/docs/migration/runtime-test-inventory.md +3 -1
  13. package/lib/agentRuntime.js +22 -16
  14. package/lib/agentSettings.js +1 -1
  15. package/lib/agyImageAdapter.js +232 -0
  16. package/lib/capabilities.js +2 -1
  17. package/lib/configKeys.js +1 -1
  18. package/lib/geminiApiImageAdapter.js +183 -0
  19. package/lib/grokImageAdapter.js +16 -9
  20. package/lib/grokMultimodeAdapter.js +2 -1
  21. package/lib/grokRuntime.js +3 -0
  22. package/lib/grokSizeMapper.js +13 -1
  23. package/lib/grokVideoAdapter.js +14 -7
  24. package/lib/historyList.js +18 -2
  25. package/lib/imageModels.js +15 -0
  26. package/lib/imageThumb.js +38 -0
  27. package/lib/providerOptions.js +36 -1
  28. package/lib/responsesFallback.js +52 -44
  29. package/lib/runtimeContext.js +27 -0
  30. package/lib/storageMigration.js +1 -1
  31. package/lib/thumbBackfill.js +59 -0
  32. package/lib/vertexAuth.js +44 -0
  33. package/lib/videoThumb.js +60 -0
  34. package/package.json +4 -2
  35. package/routes/auth.js +238 -0
  36. package/routes/edit.js +41 -7
  37. package/routes/generate.js +40 -12
  38. package/routes/history.js +13 -0
  39. package/routes/index.js +4 -0
  40. package/routes/keys.js +254 -0
  41. package/routes/multimode.js +39 -6
  42. package/routes/nodes.js +57 -35
  43. package/routes/quota.js +58 -7
  44. package/routes/video.js +7 -3
  45. package/server.js +123 -0
  46. package/ui/dist/.vite/manifest.json +12 -12
  47. package/ui/dist/assets/AgentWorkspace-CYv84Rus.js +3 -0
  48. package/ui/dist/assets/{CardNewsWorkspace-BN-ga1lG.js → CardNewsWorkspace-Dqyc1WZ1.js} +2 -2
  49. package/ui/dist/assets/{NodeCanvas-BbMa4IhI.js → NodeCanvas-ChEXzQbb.js} +2 -2
  50. package/ui/dist/assets/{PromptBuilderPanel-DRwBJRDQ.js → PromptBuilderPanel-B95ZufnR.js} +1 -1
  51. package/ui/dist/assets/{PromptImportDialog-Dp85kHCq.js → PromptImportDialog-DGOwFQET.js} +2 -2
  52. package/ui/dist/assets/{PromptImportDiscoverySection-BE8Q8MLD.js → PromptImportDiscoverySection-CgvdnR49.js} +1 -1
  53. package/ui/dist/assets/{PromptImportFolderSection-PtH5x0sc.js → PromptImportFolderSection-CfUye9J8.js} +1 -1
  54. package/ui/dist/assets/{PromptLibraryPanel-FnM9tHI9.js → PromptLibraryPanel-B9kndPw1.js} +2 -2
  55. package/ui/dist/assets/SettingsWorkspace-B3tgLrmF.js +1 -0
  56. package/ui/dist/assets/index-BhcvL0g-.js +1 -0
  57. package/ui/dist/assets/index-BtK3YhJc.js +39 -0
  58. package/ui/dist/assets/index-ClOLOjnA.css +1 -0
  59. package/ui/dist/index.html +2 -2
  60. package/ui/dist/assets/AgentWorkspace-C21zqdTZ.js +0 -3
  61. package/ui/dist/assets/SettingsWorkspace-MARPGyBL.js +0 -1
  62. package/ui/dist/assets/index-BAFI6htx.js +0 -42
  63. package/ui/dist/assets/index-BSXxr_Bt.js +0 -1
  64. package/ui/dist/assets/index-DS-ADE7U.css +0 -1
@@ -0,0 +1,232 @@
1
+ import { spawn } from "node:child_process";
2
+ import { readFile, rm, stat, writeFile, mkdir } from "node:fs/promises";
3
+ import { extname, join, resolve } from "node:path";
4
+ import { homedir, tmpdir } from "node:os";
5
+ import { randomBytes } from "node:crypto";
6
+ import { logEvent } from "./logger.js";
7
+ import { detectImageMimeFromB64 } from "./refs.js";
8
+ const AGY_TIMEOUT_MS = 360_000;
9
+ const AGY_OUTPUT_RESOLUTION = "1024x1024";
10
+ const AGY_MAX_OUTPUT_BYTES = 1024 * 1024;
11
+ function agyError(message, status, code) {
12
+ const err = new Error(message);
13
+ err.status = status;
14
+ err.code = code;
15
+ return err;
16
+ }
17
+ function buildAgyPrompt(userPrompt, referencePaths) {
18
+ const imagePathsJson = referencePaths.length > 0
19
+ ? JSON.stringify(referencePaths)
20
+ : "[]";
21
+ return [
22
+ "Please generate one image by calling the tool default_api:generate_image once.",
23
+ "After the tool finishes, print one machine-readable result line so ima2-gen can copy the artifact.",
24
+ "",
25
+ "Tool parameters:",
26
+ ` Prompt: ${JSON.stringify(userPrompt)}`,
27
+ ' ImageName: "ima2_generated"',
28
+ ` ImagePaths: ${imagePathsJson}`,
29
+ ' toolSummary: "ima2 pipeline generation"',
30
+ ' toolAction: "Generating ima2 image"',
31
+ "",
32
+ `Reference count: ${referencePaths.length}. The output resolution is fixed at ${AGY_OUTPUT_RESOLUTION}.`,
33
+ "If generation succeeds, print: RESULT|<absolute_artifact_path>|<file_extension>",
34
+ "If generation fails, print: ERROR|<concise error message>",
35
+ ].join("\n");
36
+ }
37
+ function parseAgyOutput(stdout) {
38
+ const lines = stdout.trim().split("\n").filter((l) => l.trim().length > 0);
39
+ const resultLine = lines.find((l) => l.startsWith("RESULT|"));
40
+ if (resultLine) {
41
+ const parts = resultLine.split("|");
42
+ if (parts.length >= 3) {
43
+ return { artifactPath: parts[1], ext: parts[2] };
44
+ }
45
+ throw agyError(`Malformed RESULT line: ${resultLine}`, 502, "AGY_MALFORMED_RESULT");
46
+ }
47
+ const errorLine = lines.find((l) => l.startsWith("ERROR|"));
48
+ if (errorLine) {
49
+ const msg = errorLine.slice("ERROR|".length).trim() || "Unknown agy error";
50
+ const lower = msg.toLowerCase();
51
+ if (lower.includes("resource exhausted") || lower.includes("exhausted your capacity") || lower.includes("quota will reset")) {
52
+ throw agyError(`Agy generation failed: ${msg}`, 429, "AGY_QUOTA_EXHAUSTED");
53
+ }
54
+ throw agyError(`Agy generation failed: ${msg}`, 502, "AGY_GENERATION_FAILED");
55
+ }
56
+ const fullLower = stdout.toLowerCase();
57
+ if (fullLower.includes("resource exhausted") || fullLower.includes("exhausted your capacity")) {
58
+ throw agyError(`Agy quota exhausted: ${stdout.trim().slice(0, 200)}`, 429, "AGY_QUOTA_EXHAUSTED");
59
+ }
60
+ const savedPathLine = lines.find((l) => l.startsWith("SAVED_PATH="));
61
+ if (savedPathLine) {
62
+ const p = savedPathLine.slice("SAVED_PATH=".length).trim();
63
+ const ext = p.split(".").pop() || "png";
64
+ return { artifactPath: p, ext };
65
+ }
66
+ const normalizedStdout = stdout.replace(/\\/g, "/");
67
+ const pathMatch = normalizedStdout.match(/\/[^\s"']+\/(brain|artifacts)\/[^\s"']+\.(png|jpg|jpeg|webp)/i);
68
+ if (pathMatch) {
69
+ const artifactPath = process.platform === "win32" ? pathMatch[0].replace(/\//g, "\\") : pathMatch[0];
70
+ const ext = extname(artifactPath).slice(1) || "png";
71
+ return { artifactPath, ext };
72
+ }
73
+ throw agyError(`Could not parse artifact path from agy output (${stdout.length} chars): ${stdout.slice(0, 200)}`, 502, "AGY_PARSE_FAILED");
74
+ }
75
+ function spawnAgy(prompt, signal) {
76
+ return new Promise((resolve, reject) => {
77
+ const child = spawn("agy", ["-p", "-"], {
78
+ stdio: ["pipe", "pipe", "pipe"],
79
+ env: {
80
+ PATH: process.env.PATH,
81
+ HOME: process.env.HOME,
82
+ USERPROFILE: process.env.USERPROFILE,
83
+ TMPDIR: process.env.TMPDIR,
84
+ TEMP: process.env.TEMP,
85
+ LANG: process.env.LANG,
86
+ GEMINI_API_KEY: process.env.GEMINI_API_KEY,
87
+ },
88
+ });
89
+ let stdout = "";
90
+ let stderr = "";
91
+ let settled = false;
92
+ const timer = setTimeout(() => {
93
+ if (!settled) {
94
+ settled = true;
95
+ child.kill("SIGTERM");
96
+ reject(agyError("Agy generation timed out", 504, "AGY_TIMEOUT"));
97
+ }
98
+ }, AGY_TIMEOUT_MS);
99
+ child.stdout.on("data", (chunk) => { if (stdout.length < AGY_MAX_OUTPUT_BYTES)
100
+ stdout += chunk.toString(); });
101
+ child.stderr.on("data", (chunk) => { if (stderr.length < AGY_MAX_OUTPUT_BYTES)
102
+ stderr += chunk.toString(); });
103
+ child.on("error", (err) => {
104
+ if (settled)
105
+ return;
106
+ settled = true;
107
+ clearTimeout(timer);
108
+ reject(agyError(`Agy process error: ${err.message}`, 502, "AGY_PROCESS_ERROR"));
109
+ });
110
+ child.on("close", (code) => {
111
+ if (settled)
112
+ return;
113
+ settled = true;
114
+ clearTimeout(timer);
115
+ if (code !== 0 && !stdout.trim()) {
116
+ reject(agyError(`Agy exited with code ${code}: ${stderr.slice(0, 200)}`, 502, "AGY_PROCESS_ERROR"));
117
+ return;
118
+ }
119
+ resolve({ stdout, stderr });
120
+ });
121
+ if (signal) {
122
+ const onAbort = () => {
123
+ if (!settled) {
124
+ settled = true;
125
+ clearTimeout(timer);
126
+ child.kill("SIGTERM");
127
+ reject(agyError("Generation canceled", 499, "GENERATION_CANCELED"));
128
+ }
129
+ };
130
+ signal.addEventListener("abort", onAbort, { once: true });
131
+ child.on("close", () => signal.removeEventListener("abort", onAbort));
132
+ }
133
+ if (signal?.aborted) {
134
+ settled = true;
135
+ clearTimeout(timer);
136
+ child.kill("SIGTERM");
137
+ return reject(agyError("Generation canceled", 499, "GENERATION_CANCELED"));
138
+ }
139
+ child.stdin.on("error", () => { });
140
+ child.stdin.write(prompt);
141
+ child.stdin.end();
142
+ });
143
+ }
144
+ const MIME_TO_EXT = {
145
+ "image/png": "png",
146
+ "image/jpeg": "jpg",
147
+ "image/webp": "webp",
148
+ };
149
+ async function writeRefsToTempFiles(refs) {
150
+ if (refs.length === 0)
151
+ return { paths: [], cleanup: async () => { } };
152
+ const dir = join(tmpdir(), `ima2-agy-refs-${randomBytes(6).toString("hex")}`);
153
+ await mkdir(dir, { recursive: true });
154
+ const paths = [];
155
+ for (let i = 0; i < refs.length; i++) {
156
+ const ref = refs[i];
157
+ const mime = ref.detectedMime || ref.declaredMime || detectImageMimeFromB64(ref.b64) || "image/png";
158
+ const ext = MIME_TO_EXT[mime] || "png";
159
+ const p = join(dir, `ref_${i}.${ext}`);
160
+ await writeFile(p, Buffer.from(ref.b64, "base64"));
161
+ paths.push(p);
162
+ }
163
+ return {
164
+ paths,
165
+ cleanup: async () => {
166
+ await rm(dir, { recursive: true, force: true }).catch(() => { });
167
+ },
168
+ };
169
+ }
170
+ export async function generateViaAgy(prompt, options = {}) {
171
+ const refDetails = (options.references || []).slice(0, 3);
172
+ const { paths: refPaths, cleanup } = await writeRefsToTempFiles(refDetails);
173
+ const agyPrompt = buildAgyPrompt(prompt, refPaths);
174
+ logEvent("agy", "generate:start", {
175
+ requestId: options.requestId,
176
+ promptChars: prompt.length,
177
+ agyPromptChars: agyPrompt.length,
178
+ refs: refPaths.length,
179
+ });
180
+ try {
181
+ const { stdout, stderr } = await spawnAgy(agyPrompt, options.signal);
182
+ if (stderr && stderr.trim().length > 0) {
183
+ logEvent("agy", "generate:stderr", {
184
+ requestId: options.requestId,
185
+ stderrChars: stderr.length,
186
+ stderrPreview: stderr.slice(0, 200),
187
+ });
188
+ }
189
+ const { artifactPath } = parseAgyOutput(stdout);
190
+ // Validate artifact path is within allowed directories
191
+ const resolvedPath = resolve(artifactPath);
192
+ const allowedPrefixes = [
193
+ join(homedir(), ".gemini"),
194
+ join(homedir(), ".cache"),
195
+ tmpdir(),
196
+ ];
197
+ const normalizedResolved = resolvedPath.replace(/\\/g, "/");
198
+ const isSafePath = allowedPrefixes.some((prefix) => {
199
+ const normalizedPrefix = prefix.replace(/\\/g, "/");
200
+ return normalizedResolved.startsWith(normalizedPrefix + "/") || normalizedResolved === normalizedPrefix;
201
+ });
202
+ if (!isSafePath) {
203
+ throw agyError(`Agy artifact path outside allowed directories: ${resolvedPath}`, 502, "AGY_PATH_REJECTED");
204
+ }
205
+ try {
206
+ await stat(resolvedPath);
207
+ }
208
+ catch {
209
+ throw agyError(`Agy artifact not found at parsed path: ${resolvedPath}`, 502, "AGY_ARTIFACT_NOT_FOUND");
210
+ }
211
+ const buffer = await readFile(resolvedPath);
212
+ const b64 = buffer.toString("base64");
213
+ const mime = detectImageMimeFromB64(b64) || "image/png";
214
+ logEvent("agy", "generate:done", {
215
+ requestId: options.requestId,
216
+ artifactPath,
217
+ b64Len: b64.length,
218
+ mime,
219
+ fileBytes: buffer.length,
220
+ });
221
+ return {
222
+ b64,
223
+ revisedPrompt: prompt,
224
+ usage: { agy_artifact_bytes: buffer.length },
225
+ webSearchCalls: 0,
226
+ mime,
227
+ };
228
+ }
229
+ finally {
230
+ await cleanup();
231
+ }
232
+ }
@@ -3,7 +3,7 @@ import { KEY_TO_ENV, WRITABLE_CONFIG_KEYS } from "./configKeys.js";
3
3
  import { DEFAULT_IMAGE_QUALITY, VALID_IMAGE_QUALITIES } from "./oauthNormalize.js";
4
4
  const MAX_GENERATED_IMAGES = 8;
5
5
  const VALID_MODES = ["auto", "direct"];
6
- const VALID_PROVIDERS = ["auto", "oauth", "api", "grok"];
6
+ const VALID_PROVIDERS = ["auto", "oauth", "api", "grok", "grok-api", "agy", "gemini-api"];
7
7
  const AGENT_COMMANDS = [
8
8
  "skill",
9
9
  "capabilities",
@@ -55,6 +55,7 @@ export function buildIma2Capabilities({ appConfig = runtimeConfigDefault, packag
55
55
  supported: toArray(appConfig.imageModels.valid),
56
56
  unsupported: toArray(appConfig.imageModels.unsupported),
57
57
  grokSupported: ["grok-imagine-image", "grok-imagine-image-quality"],
58
+ geminiSupported: ["nano-banana-2", "nano-banana-pro"],
58
59
  },
59
60
  videoModels: {
60
61
  supported: ["grok-imagine-video", "grok-imagine-video-1.5-preview"],
package/lib/configKeys.js CHANGED
@@ -52,7 +52,7 @@ export const KEY_TO_ENV = {
52
52
  "history.defaultPageSize": "IMA2_HISTORY_PAGE_SIZE",
53
53
  };
54
54
  const REDACT_PATTERN = /token|secret|apikey|password/i;
55
- const ALWAYS_REDACT = new Set(["provider", "apiKey", "oauth.token", "oauth.refreshToken"]);
55
+ const ALWAYS_REDACT = new Set(["provider", "apiKey", "oauth.token", "oauth.refreshToken", "vertexServiceAccountJson"]);
56
56
  export function isSensitiveConfigKey(key) {
57
57
  return ALWAYS_REDACT.has(key) || REDACT_PATTERN.test(key);
58
58
  }
@@ -0,0 +1,183 @@
1
+ import { logEvent } from "./logger.js";
2
+ import { detectImageMimeFromB64 } from "./refs.js";
3
+ import { getVertexAccessToken, getVertexProjectId, isVertexInitialized } from "./vertexAuth.js";
4
+ const MODEL_ID_MAP = {
5
+ "nano-banana-2": "gemini-3.1-flash-image",
6
+ "nano-banana-pro": "gemini-3-pro-image",
7
+ };
8
+ const GEMINI_TIMEOUT_MS = 120_000;
9
+ function parseGeminiImageParams(size) {
10
+ if (!size || size === "auto" || size === "1024x1024")
11
+ return { aspectRatio: 1, imageSize: 0 };
12
+ const match = size.match(/^(\d+)x(\d+)$/);
13
+ if (!match)
14
+ return { aspectRatio: 1, imageSize: 0 };
15
+ const w = Number(match[1]);
16
+ const h = Number(match[2]);
17
+ const ratio = w / h;
18
+ const ratioMap = [
19
+ [1, 1], [2, 2 / 3], [3, 3 / 2], [4, 3 / 4], [5, 4 / 3],
20
+ [6, 4 / 5], [7, 5 / 4], [8, 9 / 16], [9, 16 / 9], [10, 21 / 9],
21
+ [11, 1 / 8], [12, 8], [13, 1 / 4], [14, 4],
22
+ ];
23
+ let bestEnum = 1;
24
+ let bestDist = Infinity;
25
+ for (const [enumVal, val] of ratioMap) {
26
+ const dist = Math.abs(ratio - val);
27
+ if (dist < bestDist) {
28
+ bestDist = dist;
29
+ bestEnum = enumVal;
30
+ }
31
+ }
32
+ const maxDim = Math.max(w, h);
33
+ const imageSize = maxDim <= 512 ? 1 : maxDim <= 1024 ? 2 : maxDim <= 2048 ? 3 : 4;
34
+ return { aspectRatio: bestEnum, imageSize };
35
+ }
36
+ function geminiApiError(message, status, code) {
37
+ const err = new Error(message);
38
+ err.status = status;
39
+ err.code = code;
40
+ return err;
41
+ }
42
+ function resolveGeminiModelId(model) {
43
+ return MODEL_ID_MAP[model] || model;
44
+ }
45
+ function buildContents(prompt, references) {
46
+ const parts = [];
47
+ // Add reference images first (if any)
48
+ for (const ref of references.slice(0, 3)) {
49
+ const mime = ref.declaredMime || ref.detectedMime || detectImageMimeFromB64(ref.b64) || "image/png";
50
+ parts.push({
51
+ inlineData: {
52
+ mimeType: mime,
53
+ data: ref.b64,
54
+ },
55
+ });
56
+ }
57
+ // Add text prompt
58
+ parts.push({ text: prompt });
59
+ return [{ role: "user", parts }];
60
+ }
61
+ export async function generateViaGeminiApi(prompt, ctx, options = {}) {
62
+ const apiKey = ctx.geminiApiKey;
63
+ const vertexReady = ctx.hasVertexKey && isVertexInitialized();
64
+ const authMode = ctx.geminiAuthMode;
65
+ const useVertex = authMode === "vertex" ? vertexReady : (!apiKey && vertexReady);
66
+ if (!apiKey && !useVertex) {
67
+ throw geminiApiError("Gemini API key or Vertex AI credentials not configured", 401, "GEMINI_API_KEY_MISSING");
68
+ }
69
+ const model = options.model || "nano-banana-2";
70
+ const apiModelId = resolveGeminiModelId(model);
71
+ const references = (options.references || []).slice(0, 3);
72
+ let url;
73
+ let authHeaders;
74
+ if (useVertex) {
75
+ const token = await getVertexAccessToken();
76
+ const projectId = getVertexProjectId();
77
+ url = `https://aiplatform.googleapis.com/v1/projects/${projectId}/locations/global/publishers/google/models/${apiModelId}:generateContent`;
78
+ authHeaders = { "Content-Type": "application/json", "Authorization": `Bearer ${token}` };
79
+ }
80
+ else {
81
+ url = `https://generativelanguage.googleapis.com/v1beta/models/${apiModelId}:generateContent`;
82
+ authHeaders = { "Content-Type": "application/json", "x-goog-api-key": apiKey };
83
+ }
84
+ const imageParams = parseGeminiImageParams(options.size);
85
+ // NOTE: Vertex (aiplatform.googleapis.com) rejects the response_format field that the
86
+ // direct Gemini API accepts, so the Vertex path can only request modalities — output
87
+ // defaults to 1K/1:1 regardless of requested size. Direct API path honors aspect/size.
88
+ const generationConfig = useVertex
89
+ ? { responseModalities: ["TEXT", "IMAGE"] }
90
+ : {
91
+ response_modalities: ["TEXT", "IMAGE"],
92
+ response_format: {
93
+ image: {
94
+ aspect_ratio: imageParams.aspectRatio,
95
+ image_size: imageParams.imageSize,
96
+ },
97
+ },
98
+ };
99
+ const configKey = useVertex ? "generationConfig" : "generation_config";
100
+ const body = { contents: buildContents(prompt, references), [configKey]: generationConfig };
101
+ logEvent("gemini-api", "generate:start", {
102
+ requestId: options.requestId,
103
+ model,
104
+ apiModelId,
105
+ promptChars: prompt.length,
106
+ refs: references.length,
107
+ });
108
+ const timeoutSignal = AbortSignal.timeout(GEMINI_TIMEOUT_MS);
109
+ const combinedSignal = options.signal
110
+ ? AbortSignal.any([options.signal, timeoutSignal])
111
+ : timeoutSignal;
112
+ try {
113
+ const res = await fetch(url, {
114
+ method: "POST",
115
+ headers: authHeaders,
116
+ body: JSON.stringify(body),
117
+ signal: combinedSignal,
118
+ });
119
+ if (!res.ok) {
120
+ const text = await res.text().catch(() => "");
121
+ if (res.status === 429) {
122
+ throw geminiApiError(`Gemini API rate limited: ${text.slice(0, 200)}`, 429, "GEMINI_API_RATE_LIMITED");
123
+ }
124
+ if (res.status === 400 || res.status === 403) {
125
+ throw geminiApiError(`Gemini API error: ${text.slice(0, 200)}`, res.status, "GEMINI_API_BAD_REQUEST");
126
+ }
127
+ throw geminiApiError(`Gemini API error (${res.status}): ${text.slice(0, 200)}`, 502, "GEMINI_API_UPSTREAM_ERROR");
128
+ }
129
+ const json = await res.json();
130
+ // Extract image from candidates[0].content.parts[]
131
+ const parts = json?.candidates?.[0]?.content?.parts || [];
132
+ let b64 = null;
133
+ let textResponse = "";
134
+ let mime = "image/png";
135
+ for (const part of parts) {
136
+ if (part.inlineData?.data) {
137
+ b64 = part.inlineData.data;
138
+ mime = part.inlineData.mimeType || "image/png";
139
+ }
140
+ if (part.text) {
141
+ textResponse += part.text;
142
+ }
143
+ }
144
+ if (!b64) {
145
+ // Check for safety block
146
+ const finishReason = json?.candidates?.[0]?.finishReason;
147
+ if (finishReason === "SAFETY") {
148
+ throw geminiApiError("Gemini API: generation blocked by safety filter", 400, "GEMINI_API_SAFETY_BLOCKED");
149
+ }
150
+ throw geminiApiError(`Gemini API: no image in response (finishReason: ${finishReason || "unknown"})`, 502, "GEMINI_API_NO_IMAGE");
151
+ }
152
+ const usageMetadata = json?.usageMetadata || {};
153
+ logEvent("gemini-api", "generate:done", {
154
+ requestId: options.requestId,
155
+ model,
156
+ b64Len: b64.length,
157
+ mime,
158
+ textResponseLen: textResponse.length,
159
+ });
160
+ return {
161
+ b64,
162
+ revisedPrompt: textResponse || prompt,
163
+ usage: {
164
+ promptTokens: usageMetadata.promptTokenCount || 0,
165
+ candidatesTokens: usageMetadata.candidatesTokenCount || 0,
166
+ totalTokens: usageMetadata.totalTokenCount || 0,
167
+ },
168
+ webSearchCalls: 0,
169
+ mime,
170
+ };
171
+ }
172
+ catch (e) {
173
+ if (e.name === "AbortError") {
174
+ if (options.signal?.aborted) {
175
+ throw geminiApiError("Generation canceled", 499, "GENERATION_CANCELED");
176
+ }
177
+ throw geminiApiError("Gemini API generation timed out", 504, "GENERATION_TIMEOUT");
178
+ }
179
+ if (e.code && e.status)
180
+ throw e;
181
+ throw geminiApiError(`Gemini API request failed: ${e.message}`, 502, "GEMINI_API_NETWORK_FAILED");
182
+ }
183
+ }
@@ -2,7 +2,14 @@ import { logEvent } from "./logger.js";
2
2
  import { mapSizeToGrokImageParams } from "./grokSizeMapper.js";
3
3
  import { detectImageMimeFromB64 } from "./refs.js";
4
4
  import { getGrokProxyUrl } from "./grokRuntime.js";
5
- function getGrokEndpoint(ctx, path = "/v1/images/generations") {
5
+ function getGrokEndpoint(ctx, path = "/v1/images/generations", directApiKey) {
6
+ if (directApiKey) {
7
+ const normalizedPath = path.startsWith("/") ? path : `/${path}`;
8
+ return {
9
+ url: `https://api.x.ai${normalizedPath}`,
10
+ headers: { "Content-Type": "application/json", Authorization: `Bearer ${directApiKey}` },
11
+ };
12
+ }
6
13
  return {
7
14
  url: getGrokProxyUrl(ctx, path),
8
15
  headers: { "Content-Type": "application/json", Authorization: "Bearer dummy" },
@@ -63,8 +70,8 @@ function extractResponsesText(response) {
63
70
  }
64
71
  return chunks.join("\n\n").trim();
65
72
  }
66
- export async function postGrokImages(ctx, payload, signal, path = "/v1/images/generations") {
67
- const { url, headers } = getGrokEndpoint(ctx, path);
73
+ export async function postGrokImages(ctx, payload, signal, path = "/v1/images/generations", directApiKey) {
74
+ const { url, headers } = getGrokEndpoint(ctx, path, directApiKey);
68
75
  const timeoutMs = getGrokTimeout(ctx);
69
76
  const { combinedSignal, timer } = withTimeoutSignal(signal, timeoutMs);
70
77
  try {
@@ -244,7 +251,7 @@ export function buildGrokSearchPayload(prompt, plannerModel = "grok-4.3") {
244
251
  export async function searchGrokVisualContext(prompt, ctx, options = {}) {
245
252
  const planner = getPlannerConfig(ctx);
246
253
  const payload = buildGrokSearchPayload(prompt, planner.model);
247
- const { url, headers } = getGrokEndpoint(ctx, "/v1/responses");
254
+ const { url, headers } = getGrokEndpoint(ctx, "/v1/responses", options.directApiKey);
248
255
  const { combinedSignal, timer } = withTimeoutSignal(options.signal, planner.timeoutMs);
249
256
  logEvent("grok", "search:start", { requestId: options.requestId, plannerModel: planner.model, promptChars: prompt.length });
250
257
  try {
@@ -305,9 +312,9 @@ export async function planGrokImage(prompt, ctx, options = {}) {
305
312
  const imageModel = options.model || ctx.config.grokProvider?.defaultImageModel || "grok-imagine-image";
306
313
  const planner = getPlannerConfig(ctx);
307
314
  const sizeParams = mapSizeToGrokImageParams(options.size);
308
- const search = await searchGrokVisualContext(prompt, ctx, { signal: options.signal, requestId: options.requestId });
315
+ const search = await searchGrokVisualContext(prompt, ctx, { signal: options.signal, requestId: options.requestId, directApiKey: options.directApiKey });
309
316
  const payload = buildGrokPlannerPayload(prompt, imageModel, options.size, sizeParams, planner.model, search.summary, options.references || options.referenceCount || 0);
310
- const { url, headers } = getGrokEndpoint(ctx, "/v1/chat/completions");
317
+ const { url, headers } = getGrokEndpoint(ctx, "/v1/chat/completions", options.directApiKey);
311
318
  const { combinedSignal, timer } = withTimeoutSignal(options.signal, planner.timeoutMs);
312
319
  logEvent("grok", "planner:start", { requestId: options.requestId, plannerModel: planner.model, imageModel, size: options.size });
313
320
  try {
@@ -356,7 +363,7 @@ export async function generateViaGrok(prompt, ctx, options = {}) {
356
363
  const references = options.references || [];
357
364
  const plan = options.plannedPrompt
358
365
  ? { prompt: options.plannedPrompt, model, webSearchCalls: options.webSearchCalls ?? 1 }
359
- : await planGrokImage(prompt, ctx, { ...options, referenceCount: references.length });
366
+ : await planGrokImage(prompt, ctx, { ...options, referenceCount: references.length, directApiKey: options.directApiKey });
360
367
  const hasReferences = references.length > 0;
361
368
  const payload = hasReferences
362
369
  ? imageEditPayload(model, plan.prompt, references, options.size)
@@ -370,7 +377,7 @@ export async function generateViaGrok(prompt, ctx, options = {}) {
370
377
  size: options.size,
371
378
  refs: references.length,
372
379
  });
373
- const result = await postGrokImages(ctx, payload, options.signal, endpoint);
380
+ const result = await postGrokImages(ctx, payload, options.signal, endpoint, options.directApiKey);
374
381
  if (!result.data?.[0]?.b64_json) {
375
382
  throw grokError("Grok returned empty image data", 502, "GROK_EMPTY_RESPONSE");
376
383
  }
@@ -390,7 +397,7 @@ export async function editViaGrok(prompt, imageB64, ctx, options = {}) {
390
397
  const imageUrl = imageB64.startsWith("data:") ? imageB64 : `data:${detectedInputMime};base64,${imageB64}`;
391
398
  const payload = { model, prompt, n: 1, response_format: "b64_json", image: { type: "image_url", url: imageUrl }, ...mapSizeToGrokImageParams(options.size) };
392
399
  logEvent("grok", "edit:start", { requestId: options.requestId, model, promptChars: prompt.length });
393
- const result = await postGrokImages(ctx, payload, options.signal, "/v1/images/edits");
400
+ const result = await postGrokImages(ctx, payload, options.signal, "/v1/images/edits", options.directApiKey);
394
401
  if (!result.data?.[0]?.b64_json) {
395
402
  throw grokError("Grok edit returned empty image data", 502, "GROK_EMPTY_RESPONSE");
396
403
  }
@@ -19,6 +19,7 @@ export async function generateMultimodeViaGrok(prompt, ctx, options = {}) {
19
19
  signal: options.signal,
20
20
  requestId: options.requestId,
21
21
  references,
22
+ directApiKey: options.directApiKey,
22
23
  });
23
24
  totalWebSearchCalls += plan.webSearchCalls;
24
25
  const endpoint = references.length > 0 ? "/v1/images/edits" : "/v1/images/generations";
@@ -33,7 +34,7 @@ export async function generateMultimodeViaGrok(prompt, ctx, options = {}) {
33
34
  refs: references.length,
34
35
  promptChars: plan.prompt.length,
35
36
  });
36
- const result = await postGrokImages(ctx, payload, options.signal, endpoint);
37
+ const result = await postGrokImages(ctx, payload, options.signal, endpoint, options.directApiKey);
37
38
  if (result.data?.[0]?.b64_json) {
38
39
  const img = { b64: result.data[0].b64_json, mime: result.data[0].mime_type, revisedPrompt: plan.prompt };
39
40
  images.push(img);
@@ -16,3 +16,6 @@ export function getGrokProxyUrl(ctx = {}, path = "/v1") {
16
16
  const normalizedPath = path.startsWith("/") ? path : `/${path}`;
17
17
  return `${getGrokProxyBaseUrl(ctx)}${normalizedPath}`;
18
18
  }
19
+ export function getGrokDirectBaseUrl() {
20
+ return "https://api.x.ai";
21
+ }
@@ -37,7 +37,7 @@ function parseSize(size) {
37
37
  }
38
38
  function aspectValue(aspect) {
39
39
  const [w, h] = aspect.split(":").map(Number);
40
- return w / h;
40
+ return Number.isFinite(h) && h !== 0 ? w / h : 1;
41
41
  }
42
42
  function closestAspect(w, h) {
43
43
  const target = w / h;
@@ -50,6 +50,18 @@ function closestAspect(w, h) {
50
50
  export function mapSizeToGrokImageParams(size) {
51
51
  if (!size || size === "auto")
52
52
  return { aspect_ratio: "auto" };
53
+ // Native format from GrokSizePicker: "grok:<aspect_ratio>:<resolution>"
54
+ if (size.startsWith("grok:")) {
55
+ const parts = size.split(":");
56
+ if (parts.length < 3)
57
+ return { aspect_ratio: "auto" };
58
+ const res = parts[parts.length - 1];
59
+ const aspect = parts.slice(1, -1).join(":");
60
+ return {
61
+ aspect_ratio: SUPPORTED_ASPECTS.includes(aspect) ? aspect : "auto",
62
+ resolution: res === "2k" ? "2k" : "1k",
63
+ };
64
+ }
53
65
  const preset = PRESET_MAP[size];
54
66
  if (preset)
55
67
  return preset;
@@ -20,7 +20,14 @@ function videoConfig(ctx) {
20
20
  plannerTimeoutMs: g.plannerTimeoutMs || 60_000,
21
21
  };
22
22
  }
23
- function videoEndpoint(ctx, path) {
23
+ function videoEndpoint(ctx, path, directApiKey) {
24
+ if (directApiKey) {
25
+ const normalizedPath = path.startsWith("/") ? path : `/${path}`;
26
+ return {
27
+ url: `https://api.x.ai${normalizedPath}`,
28
+ headers: { "Content-Type": "application/json", Authorization: `Bearer ${directApiKey}` },
29
+ };
30
+ }
24
31
  return {
25
32
  url: getGrokProxyUrl(ctx, path),
26
33
  headers: { "Content-Type": "application/json", Authorization: "Bearer dummy" },
@@ -148,7 +155,7 @@ export async function planGrokVideo(prompt, ctx, options = {}) {
148
155
  const duration = options.duration ?? 5;
149
156
  const resolution = options.resolution || "480p";
150
157
  const aspectRatio = options.aspectRatio || "auto";
151
- const search = await searchGrokVisualContext(prompt, ctx, { signal: options.signal, requestId: options.requestId });
158
+ const search = await searchGrokVisualContext(prompt, ctx, { signal: options.signal, requestId: options.requestId, directApiKey: options.directApiKey });
152
159
  const referenceImageUrls = (options.referenceImages ?? []).map((img) => sourceImageUrl(img, undefined));
153
160
  const payload = buildGrokVideoPlannerPayload(prompt, {
154
161
  model: cfg.model,
@@ -162,7 +169,7 @@ export async function planGrokVideo(prompt, ctx, options = {}) {
162
169
  referenceImageUrls,
163
170
  continuityLineage: options.continuityLineage,
164
171
  });
165
- const { url, headers } = videoEndpoint(ctx, "/v1/chat/completions");
172
+ const { url, headers } = videoEndpoint(ctx, "/v1/chat/completions", options.directApiKey);
166
173
  const { combinedSignal, timer } = withTimeoutSignal(options.signal, cfg.plannerTimeoutMs);
167
174
  logEvent("grok", "video:planner:start", { requestId: options.requestId, mode, duration, resolution });
168
175
  try {
@@ -212,7 +219,7 @@ export function buildVideoGenerationPayload(plan, opts) {
212
219
  }
213
220
  export async function startVideoRequest(ctx, payload, options) {
214
221
  const cfg = videoConfig(ctx);
215
- const { url, headers } = videoEndpoint(ctx, "/v1/videos/generations");
222
+ const { url, headers } = videoEndpoint(ctx, "/v1/videos/generations", options.directApiKey);
216
223
  const { combinedSignal, timer } = withTimeoutSignal(options.signal, cfg.startTimeoutMs);
217
224
  try {
218
225
  const res = await fetch(url, { method: "POST", headers, body: JSON.stringify(payload), signal: combinedSignal });
@@ -251,9 +258,9 @@ export function normalizeVideoPoll(data) {
251
258
  failedCode: data?.error?.code,
252
259
  };
253
260
  }
254
- export async function pollVideoOnce(ctx, requestId, signal) {
261
+ export async function pollVideoOnce(ctx, requestId, signal, directApiKey) {
255
262
  const cfg = videoConfig(ctx);
256
- const { url, headers } = videoEndpoint(ctx, `/v1/videos/${requestId}`);
263
+ const { url, headers } = videoEndpoint(ctx, `/v1/videos/${requestId}`, directApiKey);
257
264
  const { combinedSignal, timer } = withTimeoutSignal(signal, cfg.startTimeoutMs);
258
265
  try {
259
266
  const res = await fetch(url, { method: "GET", headers, signal: combinedSignal });
@@ -293,7 +300,7 @@ export async function pollVideoUntilDone(ctx, requestId, options) {
293
300
  for (;;) {
294
301
  if (Date.now() > deadline)
295
302
  throw grokError("Grok video poll budget exceeded", 504, "GROK_VIDEO_TIMEOUT");
296
- const poll = await pollVideoOnce(ctx, requestId, options.signal);
303
+ const poll = await pollVideoOnce(ctx, requestId, options.signal, options.directApiKey);
297
304
  if (poll.status === "done")
298
305
  return poll;
299
306
  if (poll.status === "failed" || poll.status === "expired")