ima2-gen 2.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -11
- package/bin/commands/backfillThumbs.js +18 -0
- package/bin/commands/edit.js +7 -6
- package/bin/commands/gen.js +7 -6
- package/bin/commands/multimode.js +5 -4
- package/bin/commands/node.js +4 -4
- package/bin/ima2.js +7 -1
- package/bin/lib/config-store.js +1 -1
- package/docs/API.md +55 -4
- package/docs/CLI.md +9 -3
- package/docs/PROMPT_STUDIO.md +3 -1
- package/docs/migration/runtime-test-inventory.md +3 -1
- package/lib/agentRuntime.js +22 -16
- package/lib/agentSettings.js +1 -1
- package/lib/agyImageAdapter.js +232 -0
- package/lib/capabilities.js +2 -1
- package/lib/configKeys.js +1 -1
- package/lib/geminiApiImageAdapter.js +183 -0
- package/lib/grokImageAdapter.js +16 -9
- package/lib/grokMultimodeAdapter.js +2 -1
- package/lib/grokRuntime.js +3 -0
- package/lib/grokSizeMapper.js +13 -1
- package/lib/grokVideoAdapter.js +14 -7
- package/lib/historyList.js +18 -2
- package/lib/imageModels.js +15 -0
- package/lib/imageThumb.js +38 -0
- package/lib/providerOptions.js +36 -1
- package/lib/responsesFallback.js +52 -44
- package/lib/runtimeContext.js +27 -0
- package/lib/storageMigration.js +1 -1
- package/lib/thumbBackfill.js +59 -0
- package/lib/vertexAuth.js +44 -0
- package/lib/videoThumb.js +60 -0
- package/package.json +4 -2
- package/routes/auth.js +238 -0
- package/routes/edit.js +41 -7
- package/routes/generate.js +40 -12
- package/routes/history.js +13 -0
- package/routes/index.js +4 -0
- package/routes/keys.js +254 -0
- package/routes/multimode.js +39 -6
- package/routes/nodes.js +57 -35
- package/routes/quota.js +58 -7
- package/routes/video.js +7 -3
- package/server.js +123 -0
- package/ui/dist/.vite/manifest.json +12 -12
- package/ui/dist/assets/AgentWorkspace-CYv84Rus.js +3 -0
- package/ui/dist/assets/{CardNewsWorkspace-BN-ga1lG.js → CardNewsWorkspace-Dqyc1WZ1.js} +2 -2
- package/ui/dist/assets/{NodeCanvas-BbMa4IhI.js → NodeCanvas-ChEXzQbb.js} +2 -2
- package/ui/dist/assets/{PromptBuilderPanel-DRwBJRDQ.js → PromptBuilderPanel-B95ZufnR.js} +1 -1
- package/ui/dist/assets/{PromptImportDialog-Dp85kHCq.js → PromptImportDialog-DGOwFQET.js} +2 -2
- package/ui/dist/assets/{PromptImportDiscoverySection-BE8Q8MLD.js → PromptImportDiscoverySection-CgvdnR49.js} +1 -1
- package/ui/dist/assets/{PromptImportFolderSection-PtH5x0sc.js → PromptImportFolderSection-CfUye9J8.js} +1 -1
- package/ui/dist/assets/{PromptLibraryPanel-FnM9tHI9.js → PromptLibraryPanel-B9kndPw1.js} +2 -2
- package/ui/dist/assets/SettingsWorkspace-B3tgLrmF.js +1 -0
- package/ui/dist/assets/index-BhcvL0g-.js +1 -0
- package/ui/dist/assets/index-BtK3YhJc.js +39 -0
- package/ui/dist/assets/index-ClOLOjnA.css +1 -0
- package/ui/dist/index.html +2 -2
- package/ui/dist/assets/AgentWorkspace-C21zqdTZ.js +0 -3
- package/ui/dist/assets/SettingsWorkspace-MARPGyBL.js +0 -1
- package/ui/dist/assets/index-BAFI6htx.js +0 -42
- package/ui/dist/assets/index-BSXxr_Bt.js +0 -1
- package/ui/dist/assets/index-DS-ADE7U.css +0 -1
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
import { spawn } from "node:child_process";
|
|
2
|
+
import { readFile, rm, stat, writeFile, mkdir } from "node:fs/promises";
|
|
3
|
+
import { extname, join, resolve } from "node:path";
|
|
4
|
+
import { homedir, tmpdir } from "node:os";
|
|
5
|
+
import { randomBytes } from "node:crypto";
|
|
6
|
+
import { logEvent } from "./logger.js";
|
|
7
|
+
import { detectImageMimeFromB64 } from "./refs.js";
|
|
8
|
+
const AGY_TIMEOUT_MS = 360_000;
|
|
9
|
+
const AGY_OUTPUT_RESOLUTION = "1024x1024";
|
|
10
|
+
const AGY_MAX_OUTPUT_BYTES = 1024 * 1024;
|
|
11
|
+
function agyError(message, status, code) {
|
|
12
|
+
const err = new Error(message);
|
|
13
|
+
err.status = status;
|
|
14
|
+
err.code = code;
|
|
15
|
+
return err;
|
|
16
|
+
}
|
|
17
|
+
function buildAgyPrompt(userPrompt, referencePaths) {
|
|
18
|
+
const imagePathsJson = referencePaths.length > 0
|
|
19
|
+
? JSON.stringify(referencePaths)
|
|
20
|
+
: "[]";
|
|
21
|
+
return [
|
|
22
|
+
"Please generate one image by calling the tool default_api:generate_image once.",
|
|
23
|
+
"After the tool finishes, print one machine-readable result line so ima2-gen can copy the artifact.",
|
|
24
|
+
"",
|
|
25
|
+
"Tool parameters:",
|
|
26
|
+
` Prompt: ${JSON.stringify(userPrompt)}`,
|
|
27
|
+
' ImageName: "ima2_generated"',
|
|
28
|
+
` ImagePaths: ${imagePathsJson}`,
|
|
29
|
+
' toolSummary: "ima2 pipeline generation"',
|
|
30
|
+
' toolAction: "Generating ima2 image"',
|
|
31
|
+
"",
|
|
32
|
+
`Reference count: ${referencePaths.length}. The output resolution is fixed at ${AGY_OUTPUT_RESOLUTION}.`,
|
|
33
|
+
"If generation succeeds, print: RESULT|<absolute_artifact_path>|<file_extension>",
|
|
34
|
+
"If generation fails, print: ERROR|<concise error message>",
|
|
35
|
+
].join("\n");
|
|
36
|
+
}
|
|
37
|
+
function parseAgyOutput(stdout) {
|
|
38
|
+
const lines = stdout.trim().split("\n").filter((l) => l.trim().length > 0);
|
|
39
|
+
const resultLine = lines.find((l) => l.startsWith("RESULT|"));
|
|
40
|
+
if (resultLine) {
|
|
41
|
+
const parts = resultLine.split("|");
|
|
42
|
+
if (parts.length >= 3) {
|
|
43
|
+
return { artifactPath: parts[1], ext: parts[2] };
|
|
44
|
+
}
|
|
45
|
+
throw agyError(`Malformed RESULT line: ${resultLine}`, 502, "AGY_MALFORMED_RESULT");
|
|
46
|
+
}
|
|
47
|
+
const errorLine = lines.find((l) => l.startsWith("ERROR|"));
|
|
48
|
+
if (errorLine) {
|
|
49
|
+
const msg = errorLine.slice("ERROR|".length).trim() || "Unknown agy error";
|
|
50
|
+
const lower = msg.toLowerCase();
|
|
51
|
+
if (lower.includes("resource exhausted") || lower.includes("exhausted your capacity") || lower.includes("quota will reset")) {
|
|
52
|
+
throw agyError(`Agy generation failed: ${msg}`, 429, "AGY_QUOTA_EXHAUSTED");
|
|
53
|
+
}
|
|
54
|
+
throw agyError(`Agy generation failed: ${msg}`, 502, "AGY_GENERATION_FAILED");
|
|
55
|
+
}
|
|
56
|
+
const fullLower = stdout.toLowerCase();
|
|
57
|
+
if (fullLower.includes("resource exhausted") || fullLower.includes("exhausted your capacity")) {
|
|
58
|
+
throw agyError(`Agy quota exhausted: ${stdout.trim().slice(0, 200)}`, 429, "AGY_QUOTA_EXHAUSTED");
|
|
59
|
+
}
|
|
60
|
+
const savedPathLine = lines.find((l) => l.startsWith("SAVED_PATH="));
|
|
61
|
+
if (savedPathLine) {
|
|
62
|
+
const p = savedPathLine.slice("SAVED_PATH=".length).trim();
|
|
63
|
+
const ext = p.split(".").pop() || "png";
|
|
64
|
+
return { artifactPath: p, ext };
|
|
65
|
+
}
|
|
66
|
+
const normalizedStdout = stdout.replace(/\\/g, "/");
|
|
67
|
+
const pathMatch = normalizedStdout.match(/\/[^\s"']+\/(brain|artifacts)\/[^\s"']+\.(png|jpg|jpeg|webp)/i);
|
|
68
|
+
if (pathMatch) {
|
|
69
|
+
const artifactPath = process.platform === "win32" ? pathMatch[0].replace(/\//g, "\\") : pathMatch[0];
|
|
70
|
+
const ext = extname(artifactPath).slice(1) || "png";
|
|
71
|
+
return { artifactPath, ext };
|
|
72
|
+
}
|
|
73
|
+
throw agyError(`Could not parse artifact path from agy output (${stdout.length} chars): ${stdout.slice(0, 200)}`, 502, "AGY_PARSE_FAILED");
|
|
74
|
+
}
|
|
75
|
+
function spawnAgy(prompt, signal) {
|
|
76
|
+
return new Promise((resolve, reject) => {
|
|
77
|
+
const child = spawn("agy", ["-p", "-"], {
|
|
78
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
79
|
+
env: {
|
|
80
|
+
PATH: process.env.PATH,
|
|
81
|
+
HOME: process.env.HOME,
|
|
82
|
+
USERPROFILE: process.env.USERPROFILE,
|
|
83
|
+
TMPDIR: process.env.TMPDIR,
|
|
84
|
+
TEMP: process.env.TEMP,
|
|
85
|
+
LANG: process.env.LANG,
|
|
86
|
+
GEMINI_API_KEY: process.env.GEMINI_API_KEY,
|
|
87
|
+
},
|
|
88
|
+
});
|
|
89
|
+
let stdout = "";
|
|
90
|
+
let stderr = "";
|
|
91
|
+
let settled = false;
|
|
92
|
+
const timer = setTimeout(() => {
|
|
93
|
+
if (!settled) {
|
|
94
|
+
settled = true;
|
|
95
|
+
child.kill("SIGTERM");
|
|
96
|
+
reject(agyError("Agy generation timed out", 504, "AGY_TIMEOUT"));
|
|
97
|
+
}
|
|
98
|
+
}, AGY_TIMEOUT_MS);
|
|
99
|
+
child.stdout.on("data", (chunk) => { if (stdout.length < AGY_MAX_OUTPUT_BYTES)
|
|
100
|
+
stdout += chunk.toString(); });
|
|
101
|
+
child.stderr.on("data", (chunk) => { if (stderr.length < AGY_MAX_OUTPUT_BYTES)
|
|
102
|
+
stderr += chunk.toString(); });
|
|
103
|
+
child.on("error", (err) => {
|
|
104
|
+
if (settled)
|
|
105
|
+
return;
|
|
106
|
+
settled = true;
|
|
107
|
+
clearTimeout(timer);
|
|
108
|
+
reject(agyError(`Agy process error: ${err.message}`, 502, "AGY_PROCESS_ERROR"));
|
|
109
|
+
});
|
|
110
|
+
child.on("close", (code) => {
|
|
111
|
+
if (settled)
|
|
112
|
+
return;
|
|
113
|
+
settled = true;
|
|
114
|
+
clearTimeout(timer);
|
|
115
|
+
if (code !== 0 && !stdout.trim()) {
|
|
116
|
+
reject(agyError(`Agy exited with code ${code}: ${stderr.slice(0, 200)}`, 502, "AGY_PROCESS_ERROR"));
|
|
117
|
+
return;
|
|
118
|
+
}
|
|
119
|
+
resolve({ stdout, stderr });
|
|
120
|
+
});
|
|
121
|
+
if (signal) {
|
|
122
|
+
const onAbort = () => {
|
|
123
|
+
if (!settled) {
|
|
124
|
+
settled = true;
|
|
125
|
+
clearTimeout(timer);
|
|
126
|
+
child.kill("SIGTERM");
|
|
127
|
+
reject(agyError("Generation canceled", 499, "GENERATION_CANCELED"));
|
|
128
|
+
}
|
|
129
|
+
};
|
|
130
|
+
signal.addEventListener("abort", onAbort, { once: true });
|
|
131
|
+
child.on("close", () => signal.removeEventListener("abort", onAbort));
|
|
132
|
+
}
|
|
133
|
+
if (signal?.aborted) {
|
|
134
|
+
settled = true;
|
|
135
|
+
clearTimeout(timer);
|
|
136
|
+
child.kill("SIGTERM");
|
|
137
|
+
return reject(agyError("Generation canceled", 499, "GENERATION_CANCELED"));
|
|
138
|
+
}
|
|
139
|
+
child.stdin.on("error", () => { });
|
|
140
|
+
child.stdin.write(prompt);
|
|
141
|
+
child.stdin.end();
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
const MIME_TO_EXT = {
|
|
145
|
+
"image/png": "png",
|
|
146
|
+
"image/jpeg": "jpg",
|
|
147
|
+
"image/webp": "webp",
|
|
148
|
+
};
|
|
149
|
+
async function writeRefsToTempFiles(refs) {
|
|
150
|
+
if (refs.length === 0)
|
|
151
|
+
return { paths: [], cleanup: async () => { } };
|
|
152
|
+
const dir = join(tmpdir(), `ima2-agy-refs-${randomBytes(6).toString("hex")}`);
|
|
153
|
+
await mkdir(dir, { recursive: true });
|
|
154
|
+
const paths = [];
|
|
155
|
+
for (let i = 0; i < refs.length; i++) {
|
|
156
|
+
const ref = refs[i];
|
|
157
|
+
const mime = ref.detectedMime || ref.declaredMime || detectImageMimeFromB64(ref.b64) || "image/png";
|
|
158
|
+
const ext = MIME_TO_EXT[mime] || "png";
|
|
159
|
+
const p = join(dir, `ref_${i}.${ext}`);
|
|
160
|
+
await writeFile(p, Buffer.from(ref.b64, "base64"));
|
|
161
|
+
paths.push(p);
|
|
162
|
+
}
|
|
163
|
+
return {
|
|
164
|
+
paths,
|
|
165
|
+
cleanup: async () => {
|
|
166
|
+
await rm(dir, { recursive: true, force: true }).catch(() => { });
|
|
167
|
+
},
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
export async function generateViaAgy(prompt, options = {}) {
|
|
171
|
+
const refDetails = (options.references || []).slice(0, 3);
|
|
172
|
+
const { paths: refPaths, cleanup } = await writeRefsToTempFiles(refDetails);
|
|
173
|
+
const agyPrompt = buildAgyPrompt(prompt, refPaths);
|
|
174
|
+
logEvent("agy", "generate:start", {
|
|
175
|
+
requestId: options.requestId,
|
|
176
|
+
promptChars: prompt.length,
|
|
177
|
+
agyPromptChars: agyPrompt.length,
|
|
178
|
+
refs: refPaths.length,
|
|
179
|
+
});
|
|
180
|
+
try {
|
|
181
|
+
const { stdout, stderr } = await spawnAgy(agyPrompt, options.signal);
|
|
182
|
+
if (stderr && stderr.trim().length > 0) {
|
|
183
|
+
logEvent("agy", "generate:stderr", {
|
|
184
|
+
requestId: options.requestId,
|
|
185
|
+
stderrChars: stderr.length,
|
|
186
|
+
stderrPreview: stderr.slice(0, 200),
|
|
187
|
+
});
|
|
188
|
+
}
|
|
189
|
+
const { artifactPath } = parseAgyOutput(stdout);
|
|
190
|
+
// Validate artifact path is within allowed directories
|
|
191
|
+
const resolvedPath = resolve(artifactPath);
|
|
192
|
+
const allowedPrefixes = [
|
|
193
|
+
join(homedir(), ".gemini"),
|
|
194
|
+
join(homedir(), ".cache"),
|
|
195
|
+
tmpdir(),
|
|
196
|
+
];
|
|
197
|
+
const normalizedResolved = resolvedPath.replace(/\\/g, "/");
|
|
198
|
+
const isSafePath = allowedPrefixes.some((prefix) => {
|
|
199
|
+
const normalizedPrefix = prefix.replace(/\\/g, "/");
|
|
200
|
+
return normalizedResolved.startsWith(normalizedPrefix + "/") || normalizedResolved === normalizedPrefix;
|
|
201
|
+
});
|
|
202
|
+
if (!isSafePath) {
|
|
203
|
+
throw agyError(`Agy artifact path outside allowed directories: ${resolvedPath}`, 502, "AGY_PATH_REJECTED");
|
|
204
|
+
}
|
|
205
|
+
try {
|
|
206
|
+
await stat(resolvedPath);
|
|
207
|
+
}
|
|
208
|
+
catch {
|
|
209
|
+
throw agyError(`Agy artifact not found at parsed path: ${resolvedPath}`, 502, "AGY_ARTIFACT_NOT_FOUND");
|
|
210
|
+
}
|
|
211
|
+
const buffer = await readFile(resolvedPath);
|
|
212
|
+
const b64 = buffer.toString("base64");
|
|
213
|
+
const mime = detectImageMimeFromB64(b64) || "image/png";
|
|
214
|
+
logEvent("agy", "generate:done", {
|
|
215
|
+
requestId: options.requestId,
|
|
216
|
+
artifactPath,
|
|
217
|
+
b64Len: b64.length,
|
|
218
|
+
mime,
|
|
219
|
+
fileBytes: buffer.length,
|
|
220
|
+
});
|
|
221
|
+
return {
|
|
222
|
+
b64,
|
|
223
|
+
revisedPrompt: prompt,
|
|
224
|
+
usage: { agy_artifact_bytes: buffer.length },
|
|
225
|
+
webSearchCalls: 0,
|
|
226
|
+
mime,
|
|
227
|
+
};
|
|
228
|
+
}
|
|
229
|
+
finally {
|
|
230
|
+
await cleanup();
|
|
231
|
+
}
|
|
232
|
+
}
|
package/lib/capabilities.js
CHANGED
|
@@ -3,7 +3,7 @@ import { KEY_TO_ENV, WRITABLE_CONFIG_KEYS } from "./configKeys.js";
|
|
|
3
3
|
import { DEFAULT_IMAGE_QUALITY, VALID_IMAGE_QUALITIES } from "./oauthNormalize.js";
|
|
4
4
|
const MAX_GENERATED_IMAGES = 8;
|
|
5
5
|
const VALID_MODES = ["auto", "direct"];
|
|
6
|
-
const VALID_PROVIDERS = ["auto", "oauth", "api", "grok"];
|
|
6
|
+
const VALID_PROVIDERS = ["auto", "oauth", "api", "grok", "grok-api", "agy", "gemini-api"];
|
|
7
7
|
const AGENT_COMMANDS = [
|
|
8
8
|
"skill",
|
|
9
9
|
"capabilities",
|
|
@@ -55,6 +55,7 @@ export function buildIma2Capabilities({ appConfig = runtimeConfigDefault, packag
|
|
|
55
55
|
supported: toArray(appConfig.imageModels.valid),
|
|
56
56
|
unsupported: toArray(appConfig.imageModels.unsupported),
|
|
57
57
|
grokSupported: ["grok-imagine-image", "grok-imagine-image-quality"],
|
|
58
|
+
geminiSupported: ["nano-banana-2", "nano-banana-pro"],
|
|
58
59
|
},
|
|
59
60
|
videoModels: {
|
|
60
61
|
supported: ["grok-imagine-video", "grok-imagine-video-1.5-preview"],
|
package/lib/configKeys.js
CHANGED
|
@@ -52,7 +52,7 @@ export const KEY_TO_ENV = {
|
|
|
52
52
|
"history.defaultPageSize": "IMA2_HISTORY_PAGE_SIZE",
|
|
53
53
|
};
|
|
54
54
|
const REDACT_PATTERN = /token|secret|apikey|password/i;
|
|
55
|
-
const ALWAYS_REDACT = new Set(["provider", "apiKey", "oauth.token", "oauth.refreshToken"]);
|
|
55
|
+
const ALWAYS_REDACT = new Set(["provider", "apiKey", "oauth.token", "oauth.refreshToken", "vertexServiceAccountJson"]);
|
|
56
56
|
export function isSensitiveConfigKey(key) {
|
|
57
57
|
return ALWAYS_REDACT.has(key) || REDACT_PATTERN.test(key);
|
|
58
58
|
}
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
import { logEvent } from "./logger.js";
|
|
2
|
+
import { detectImageMimeFromB64 } from "./refs.js";
|
|
3
|
+
import { getVertexAccessToken, getVertexProjectId, isVertexInitialized } from "./vertexAuth.js";
|
|
4
|
+
const MODEL_ID_MAP = {
|
|
5
|
+
"nano-banana-2": "gemini-3.1-flash-image",
|
|
6
|
+
"nano-banana-pro": "gemini-3-pro-image",
|
|
7
|
+
};
|
|
8
|
+
const GEMINI_TIMEOUT_MS = 120_000;
|
|
9
|
+
function parseGeminiImageParams(size) {
|
|
10
|
+
if (!size || size === "auto" || size === "1024x1024")
|
|
11
|
+
return { aspectRatio: 1, imageSize: 0 };
|
|
12
|
+
const match = size.match(/^(\d+)x(\d+)$/);
|
|
13
|
+
if (!match)
|
|
14
|
+
return { aspectRatio: 1, imageSize: 0 };
|
|
15
|
+
const w = Number(match[1]);
|
|
16
|
+
const h = Number(match[2]);
|
|
17
|
+
const ratio = w / h;
|
|
18
|
+
const ratioMap = [
|
|
19
|
+
[1, 1], [2, 2 / 3], [3, 3 / 2], [4, 3 / 4], [5, 4 / 3],
|
|
20
|
+
[6, 4 / 5], [7, 5 / 4], [8, 9 / 16], [9, 16 / 9], [10, 21 / 9],
|
|
21
|
+
[11, 1 / 8], [12, 8], [13, 1 / 4], [14, 4],
|
|
22
|
+
];
|
|
23
|
+
let bestEnum = 1;
|
|
24
|
+
let bestDist = Infinity;
|
|
25
|
+
for (const [enumVal, val] of ratioMap) {
|
|
26
|
+
const dist = Math.abs(ratio - val);
|
|
27
|
+
if (dist < bestDist) {
|
|
28
|
+
bestDist = dist;
|
|
29
|
+
bestEnum = enumVal;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
const maxDim = Math.max(w, h);
|
|
33
|
+
const imageSize = maxDim <= 512 ? 1 : maxDim <= 1024 ? 2 : maxDim <= 2048 ? 3 : 4;
|
|
34
|
+
return { aspectRatio: bestEnum, imageSize };
|
|
35
|
+
}
|
|
36
|
+
function geminiApiError(message, status, code) {
|
|
37
|
+
const err = new Error(message);
|
|
38
|
+
err.status = status;
|
|
39
|
+
err.code = code;
|
|
40
|
+
return err;
|
|
41
|
+
}
|
|
42
|
+
function resolveGeminiModelId(model) {
|
|
43
|
+
return MODEL_ID_MAP[model] || model;
|
|
44
|
+
}
|
|
45
|
+
function buildContents(prompt, references) {
|
|
46
|
+
const parts = [];
|
|
47
|
+
// Add reference images first (if any)
|
|
48
|
+
for (const ref of references.slice(0, 3)) {
|
|
49
|
+
const mime = ref.declaredMime || ref.detectedMime || detectImageMimeFromB64(ref.b64) || "image/png";
|
|
50
|
+
parts.push({
|
|
51
|
+
inlineData: {
|
|
52
|
+
mimeType: mime,
|
|
53
|
+
data: ref.b64,
|
|
54
|
+
},
|
|
55
|
+
});
|
|
56
|
+
}
|
|
57
|
+
// Add text prompt
|
|
58
|
+
parts.push({ text: prompt });
|
|
59
|
+
return [{ role: "user", parts }];
|
|
60
|
+
}
|
|
61
|
+
export async function generateViaGeminiApi(prompt, ctx, options = {}) {
|
|
62
|
+
const apiKey = ctx.geminiApiKey;
|
|
63
|
+
const vertexReady = ctx.hasVertexKey && isVertexInitialized();
|
|
64
|
+
const authMode = ctx.geminiAuthMode;
|
|
65
|
+
const useVertex = authMode === "vertex" ? vertexReady : (!apiKey && vertexReady);
|
|
66
|
+
if (!apiKey && !useVertex) {
|
|
67
|
+
throw geminiApiError("Gemini API key or Vertex AI credentials not configured", 401, "GEMINI_API_KEY_MISSING");
|
|
68
|
+
}
|
|
69
|
+
const model = options.model || "nano-banana-2";
|
|
70
|
+
const apiModelId = resolveGeminiModelId(model);
|
|
71
|
+
const references = (options.references || []).slice(0, 3);
|
|
72
|
+
let url;
|
|
73
|
+
let authHeaders;
|
|
74
|
+
if (useVertex) {
|
|
75
|
+
const token = await getVertexAccessToken();
|
|
76
|
+
const projectId = getVertexProjectId();
|
|
77
|
+
url = `https://aiplatform.googleapis.com/v1/projects/${projectId}/locations/global/publishers/google/models/${apiModelId}:generateContent`;
|
|
78
|
+
authHeaders = { "Content-Type": "application/json", "Authorization": `Bearer ${token}` };
|
|
79
|
+
}
|
|
80
|
+
else {
|
|
81
|
+
url = `https://generativelanguage.googleapis.com/v1beta/models/${apiModelId}:generateContent`;
|
|
82
|
+
authHeaders = { "Content-Type": "application/json", "x-goog-api-key": apiKey };
|
|
83
|
+
}
|
|
84
|
+
const imageParams = parseGeminiImageParams(options.size);
|
|
85
|
+
// NOTE: Vertex (aiplatform.googleapis.com) rejects the response_format field that the
|
|
86
|
+
// direct Gemini API accepts, so the Vertex path can only request modalities — output
|
|
87
|
+
// defaults to 1K/1:1 regardless of requested size. Direct API path honors aspect/size.
|
|
88
|
+
const generationConfig = useVertex
|
|
89
|
+
? { responseModalities: ["TEXT", "IMAGE"] }
|
|
90
|
+
: {
|
|
91
|
+
response_modalities: ["TEXT", "IMAGE"],
|
|
92
|
+
response_format: {
|
|
93
|
+
image: {
|
|
94
|
+
aspect_ratio: imageParams.aspectRatio,
|
|
95
|
+
image_size: imageParams.imageSize,
|
|
96
|
+
},
|
|
97
|
+
},
|
|
98
|
+
};
|
|
99
|
+
const configKey = useVertex ? "generationConfig" : "generation_config";
|
|
100
|
+
const body = { contents: buildContents(prompt, references), [configKey]: generationConfig };
|
|
101
|
+
logEvent("gemini-api", "generate:start", {
|
|
102
|
+
requestId: options.requestId,
|
|
103
|
+
model,
|
|
104
|
+
apiModelId,
|
|
105
|
+
promptChars: prompt.length,
|
|
106
|
+
refs: references.length,
|
|
107
|
+
});
|
|
108
|
+
const timeoutSignal = AbortSignal.timeout(GEMINI_TIMEOUT_MS);
|
|
109
|
+
const combinedSignal = options.signal
|
|
110
|
+
? AbortSignal.any([options.signal, timeoutSignal])
|
|
111
|
+
: timeoutSignal;
|
|
112
|
+
try {
|
|
113
|
+
const res = await fetch(url, {
|
|
114
|
+
method: "POST",
|
|
115
|
+
headers: authHeaders,
|
|
116
|
+
body: JSON.stringify(body),
|
|
117
|
+
signal: combinedSignal,
|
|
118
|
+
});
|
|
119
|
+
if (!res.ok) {
|
|
120
|
+
const text = await res.text().catch(() => "");
|
|
121
|
+
if (res.status === 429) {
|
|
122
|
+
throw geminiApiError(`Gemini API rate limited: ${text.slice(0, 200)}`, 429, "GEMINI_API_RATE_LIMITED");
|
|
123
|
+
}
|
|
124
|
+
if (res.status === 400 || res.status === 403) {
|
|
125
|
+
throw geminiApiError(`Gemini API error: ${text.slice(0, 200)}`, res.status, "GEMINI_API_BAD_REQUEST");
|
|
126
|
+
}
|
|
127
|
+
throw geminiApiError(`Gemini API error (${res.status}): ${text.slice(0, 200)}`, 502, "GEMINI_API_UPSTREAM_ERROR");
|
|
128
|
+
}
|
|
129
|
+
const json = await res.json();
|
|
130
|
+
// Extract image from candidates[0].content.parts[]
|
|
131
|
+
const parts = json?.candidates?.[0]?.content?.parts || [];
|
|
132
|
+
let b64 = null;
|
|
133
|
+
let textResponse = "";
|
|
134
|
+
let mime = "image/png";
|
|
135
|
+
for (const part of parts) {
|
|
136
|
+
if (part.inlineData?.data) {
|
|
137
|
+
b64 = part.inlineData.data;
|
|
138
|
+
mime = part.inlineData.mimeType || "image/png";
|
|
139
|
+
}
|
|
140
|
+
if (part.text) {
|
|
141
|
+
textResponse += part.text;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
if (!b64) {
|
|
145
|
+
// Check for safety block
|
|
146
|
+
const finishReason = json?.candidates?.[0]?.finishReason;
|
|
147
|
+
if (finishReason === "SAFETY") {
|
|
148
|
+
throw geminiApiError("Gemini API: generation blocked by safety filter", 400, "GEMINI_API_SAFETY_BLOCKED");
|
|
149
|
+
}
|
|
150
|
+
throw geminiApiError(`Gemini API: no image in response (finishReason: ${finishReason || "unknown"})`, 502, "GEMINI_API_NO_IMAGE");
|
|
151
|
+
}
|
|
152
|
+
const usageMetadata = json?.usageMetadata || {};
|
|
153
|
+
logEvent("gemini-api", "generate:done", {
|
|
154
|
+
requestId: options.requestId,
|
|
155
|
+
model,
|
|
156
|
+
b64Len: b64.length,
|
|
157
|
+
mime,
|
|
158
|
+
textResponseLen: textResponse.length,
|
|
159
|
+
});
|
|
160
|
+
return {
|
|
161
|
+
b64,
|
|
162
|
+
revisedPrompt: textResponse || prompt,
|
|
163
|
+
usage: {
|
|
164
|
+
promptTokens: usageMetadata.promptTokenCount || 0,
|
|
165
|
+
candidatesTokens: usageMetadata.candidatesTokenCount || 0,
|
|
166
|
+
totalTokens: usageMetadata.totalTokenCount || 0,
|
|
167
|
+
},
|
|
168
|
+
webSearchCalls: 0,
|
|
169
|
+
mime,
|
|
170
|
+
};
|
|
171
|
+
}
|
|
172
|
+
catch (e) {
|
|
173
|
+
if (e.name === "AbortError") {
|
|
174
|
+
if (options.signal?.aborted) {
|
|
175
|
+
throw geminiApiError("Generation canceled", 499, "GENERATION_CANCELED");
|
|
176
|
+
}
|
|
177
|
+
throw geminiApiError("Gemini API generation timed out", 504, "GENERATION_TIMEOUT");
|
|
178
|
+
}
|
|
179
|
+
if (e.code && e.status)
|
|
180
|
+
throw e;
|
|
181
|
+
throw geminiApiError(`Gemini API request failed: ${e.message}`, 502, "GEMINI_API_NETWORK_FAILED");
|
|
182
|
+
}
|
|
183
|
+
}
|
package/lib/grokImageAdapter.js
CHANGED
|
@@ -2,7 +2,14 @@ import { logEvent } from "./logger.js";
|
|
|
2
2
|
import { mapSizeToGrokImageParams } from "./grokSizeMapper.js";
|
|
3
3
|
import { detectImageMimeFromB64 } from "./refs.js";
|
|
4
4
|
import { getGrokProxyUrl } from "./grokRuntime.js";
|
|
5
|
-
function getGrokEndpoint(ctx, path = "/v1/images/generations") {
|
|
5
|
+
function getGrokEndpoint(ctx, path = "/v1/images/generations", directApiKey) {
|
|
6
|
+
if (directApiKey) {
|
|
7
|
+
const normalizedPath = path.startsWith("/") ? path : `/${path}`;
|
|
8
|
+
return {
|
|
9
|
+
url: `https://api.x.ai${normalizedPath}`,
|
|
10
|
+
headers: { "Content-Type": "application/json", Authorization: `Bearer ${directApiKey}` },
|
|
11
|
+
};
|
|
12
|
+
}
|
|
6
13
|
return {
|
|
7
14
|
url: getGrokProxyUrl(ctx, path),
|
|
8
15
|
headers: { "Content-Type": "application/json", Authorization: "Bearer dummy" },
|
|
@@ -63,8 +70,8 @@ function extractResponsesText(response) {
|
|
|
63
70
|
}
|
|
64
71
|
return chunks.join("\n\n").trim();
|
|
65
72
|
}
|
|
66
|
-
export async function postGrokImages(ctx, payload, signal, path = "/v1/images/generations") {
|
|
67
|
-
const { url, headers } = getGrokEndpoint(ctx, path);
|
|
73
|
+
export async function postGrokImages(ctx, payload, signal, path = "/v1/images/generations", directApiKey) {
|
|
74
|
+
const { url, headers } = getGrokEndpoint(ctx, path, directApiKey);
|
|
68
75
|
const timeoutMs = getGrokTimeout(ctx);
|
|
69
76
|
const { combinedSignal, timer } = withTimeoutSignal(signal, timeoutMs);
|
|
70
77
|
try {
|
|
@@ -244,7 +251,7 @@ export function buildGrokSearchPayload(prompt, plannerModel = "grok-4.3") {
|
|
|
244
251
|
export async function searchGrokVisualContext(prompt, ctx, options = {}) {
|
|
245
252
|
const planner = getPlannerConfig(ctx);
|
|
246
253
|
const payload = buildGrokSearchPayload(prompt, planner.model);
|
|
247
|
-
const { url, headers } = getGrokEndpoint(ctx, "/v1/responses");
|
|
254
|
+
const { url, headers } = getGrokEndpoint(ctx, "/v1/responses", options.directApiKey);
|
|
248
255
|
const { combinedSignal, timer } = withTimeoutSignal(options.signal, planner.timeoutMs);
|
|
249
256
|
logEvent("grok", "search:start", { requestId: options.requestId, plannerModel: planner.model, promptChars: prompt.length });
|
|
250
257
|
try {
|
|
@@ -305,9 +312,9 @@ export async function planGrokImage(prompt, ctx, options = {}) {
|
|
|
305
312
|
const imageModel = options.model || ctx.config.grokProvider?.defaultImageModel || "grok-imagine-image";
|
|
306
313
|
const planner = getPlannerConfig(ctx);
|
|
307
314
|
const sizeParams = mapSizeToGrokImageParams(options.size);
|
|
308
|
-
const search = await searchGrokVisualContext(prompt, ctx, { signal: options.signal, requestId: options.requestId });
|
|
315
|
+
const search = await searchGrokVisualContext(prompt, ctx, { signal: options.signal, requestId: options.requestId, directApiKey: options.directApiKey });
|
|
309
316
|
const payload = buildGrokPlannerPayload(prompt, imageModel, options.size, sizeParams, planner.model, search.summary, options.references || options.referenceCount || 0);
|
|
310
|
-
const { url, headers } = getGrokEndpoint(ctx, "/v1/chat/completions");
|
|
317
|
+
const { url, headers } = getGrokEndpoint(ctx, "/v1/chat/completions", options.directApiKey);
|
|
311
318
|
const { combinedSignal, timer } = withTimeoutSignal(options.signal, planner.timeoutMs);
|
|
312
319
|
logEvent("grok", "planner:start", { requestId: options.requestId, plannerModel: planner.model, imageModel, size: options.size });
|
|
313
320
|
try {
|
|
@@ -356,7 +363,7 @@ export async function generateViaGrok(prompt, ctx, options = {}) {
|
|
|
356
363
|
const references = options.references || [];
|
|
357
364
|
const plan = options.plannedPrompt
|
|
358
365
|
? { prompt: options.plannedPrompt, model, webSearchCalls: options.webSearchCalls ?? 1 }
|
|
359
|
-
: await planGrokImage(prompt, ctx, { ...options, referenceCount: references.length });
|
|
366
|
+
: await planGrokImage(prompt, ctx, { ...options, referenceCount: references.length, directApiKey: options.directApiKey });
|
|
360
367
|
const hasReferences = references.length > 0;
|
|
361
368
|
const payload = hasReferences
|
|
362
369
|
? imageEditPayload(model, plan.prompt, references, options.size)
|
|
@@ -370,7 +377,7 @@ export async function generateViaGrok(prompt, ctx, options = {}) {
|
|
|
370
377
|
size: options.size,
|
|
371
378
|
refs: references.length,
|
|
372
379
|
});
|
|
373
|
-
const result = await postGrokImages(ctx, payload, options.signal, endpoint);
|
|
380
|
+
const result = await postGrokImages(ctx, payload, options.signal, endpoint, options.directApiKey);
|
|
374
381
|
if (!result.data?.[0]?.b64_json) {
|
|
375
382
|
throw grokError("Grok returned empty image data", 502, "GROK_EMPTY_RESPONSE");
|
|
376
383
|
}
|
|
@@ -390,7 +397,7 @@ export async function editViaGrok(prompt, imageB64, ctx, options = {}) {
|
|
|
390
397
|
const imageUrl = imageB64.startsWith("data:") ? imageB64 : `data:${detectedInputMime};base64,${imageB64}`;
|
|
391
398
|
const payload = { model, prompt, n: 1, response_format: "b64_json", image: { type: "image_url", url: imageUrl }, ...mapSizeToGrokImageParams(options.size) };
|
|
392
399
|
logEvent("grok", "edit:start", { requestId: options.requestId, model, promptChars: prompt.length });
|
|
393
|
-
const result = await postGrokImages(ctx, payload, options.signal, "/v1/images/edits");
|
|
400
|
+
const result = await postGrokImages(ctx, payload, options.signal, "/v1/images/edits", options.directApiKey);
|
|
394
401
|
if (!result.data?.[0]?.b64_json) {
|
|
395
402
|
throw grokError("Grok edit returned empty image data", 502, "GROK_EMPTY_RESPONSE");
|
|
396
403
|
}
|
|
@@ -19,6 +19,7 @@ export async function generateMultimodeViaGrok(prompt, ctx, options = {}) {
|
|
|
19
19
|
signal: options.signal,
|
|
20
20
|
requestId: options.requestId,
|
|
21
21
|
references,
|
|
22
|
+
directApiKey: options.directApiKey,
|
|
22
23
|
});
|
|
23
24
|
totalWebSearchCalls += plan.webSearchCalls;
|
|
24
25
|
const endpoint = references.length > 0 ? "/v1/images/edits" : "/v1/images/generations";
|
|
@@ -33,7 +34,7 @@ export async function generateMultimodeViaGrok(prompt, ctx, options = {}) {
|
|
|
33
34
|
refs: references.length,
|
|
34
35
|
promptChars: plan.prompt.length,
|
|
35
36
|
});
|
|
36
|
-
const result = await postGrokImages(ctx, payload, options.signal, endpoint);
|
|
37
|
+
const result = await postGrokImages(ctx, payload, options.signal, endpoint, options.directApiKey);
|
|
37
38
|
if (result.data?.[0]?.b64_json) {
|
|
38
39
|
const img = { b64: result.data[0].b64_json, mime: result.data[0].mime_type, revisedPrompt: plan.prompt };
|
|
39
40
|
images.push(img);
|
package/lib/grokRuntime.js
CHANGED
|
@@ -16,3 +16,6 @@ export function getGrokProxyUrl(ctx = {}, path = "/v1") {
|
|
|
16
16
|
const normalizedPath = path.startsWith("/") ? path : `/${path}`;
|
|
17
17
|
return `${getGrokProxyBaseUrl(ctx)}${normalizedPath}`;
|
|
18
18
|
}
|
|
19
|
+
export function getGrokDirectBaseUrl() {
|
|
20
|
+
return "https://api.x.ai";
|
|
21
|
+
}
|
package/lib/grokSizeMapper.js
CHANGED
|
@@ -37,7 +37,7 @@ function parseSize(size) {
|
|
|
37
37
|
}
|
|
38
38
|
function aspectValue(aspect) {
|
|
39
39
|
const [w, h] = aspect.split(":").map(Number);
|
|
40
|
-
return w / h;
|
|
40
|
+
return Number.isFinite(h) && h !== 0 ? w / h : 1;
|
|
41
41
|
}
|
|
42
42
|
function closestAspect(w, h) {
|
|
43
43
|
const target = w / h;
|
|
@@ -50,6 +50,18 @@ function closestAspect(w, h) {
|
|
|
50
50
|
export function mapSizeToGrokImageParams(size) {
|
|
51
51
|
if (!size || size === "auto")
|
|
52
52
|
return { aspect_ratio: "auto" };
|
|
53
|
+
// Native format from GrokSizePicker: "grok:<aspect_ratio>:<resolution>"
|
|
54
|
+
if (size.startsWith("grok:")) {
|
|
55
|
+
const parts = size.split(":");
|
|
56
|
+
if (parts.length < 3)
|
|
57
|
+
return { aspect_ratio: "auto" };
|
|
58
|
+
const res = parts[parts.length - 1];
|
|
59
|
+
const aspect = parts.slice(1, -1).join(":");
|
|
60
|
+
return {
|
|
61
|
+
aspect_ratio: SUPPORTED_ASPECTS.includes(aspect) ? aspect : "auto",
|
|
62
|
+
resolution: res === "2k" ? "2k" : "1k",
|
|
63
|
+
};
|
|
64
|
+
}
|
|
53
65
|
const preset = PRESET_MAP[size];
|
|
54
66
|
if (preset)
|
|
55
67
|
return preset;
|
package/lib/grokVideoAdapter.js
CHANGED
|
@@ -20,7 +20,14 @@ function videoConfig(ctx) {
|
|
|
20
20
|
plannerTimeoutMs: g.plannerTimeoutMs || 60_000,
|
|
21
21
|
};
|
|
22
22
|
}
|
|
23
|
-
function videoEndpoint(ctx, path) {
|
|
23
|
+
function videoEndpoint(ctx, path, directApiKey) {
|
|
24
|
+
if (directApiKey) {
|
|
25
|
+
const normalizedPath = path.startsWith("/") ? path : `/${path}`;
|
|
26
|
+
return {
|
|
27
|
+
url: `https://api.x.ai${normalizedPath}`,
|
|
28
|
+
headers: { "Content-Type": "application/json", Authorization: `Bearer ${directApiKey}` },
|
|
29
|
+
};
|
|
30
|
+
}
|
|
24
31
|
return {
|
|
25
32
|
url: getGrokProxyUrl(ctx, path),
|
|
26
33
|
headers: { "Content-Type": "application/json", Authorization: "Bearer dummy" },
|
|
@@ -148,7 +155,7 @@ export async function planGrokVideo(prompt, ctx, options = {}) {
|
|
|
148
155
|
const duration = options.duration ?? 5;
|
|
149
156
|
const resolution = options.resolution || "480p";
|
|
150
157
|
const aspectRatio = options.aspectRatio || "auto";
|
|
151
|
-
const search = await searchGrokVisualContext(prompt, ctx, { signal: options.signal, requestId: options.requestId });
|
|
158
|
+
const search = await searchGrokVisualContext(prompt, ctx, { signal: options.signal, requestId: options.requestId, directApiKey: options.directApiKey });
|
|
152
159
|
const referenceImageUrls = (options.referenceImages ?? []).map((img) => sourceImageUrl(img, undefined));
|
|
153
160
|
const payload = buildGrokVideoPlannerPayload(prompt, {
|
|
154
161
|
model: cfg.model,
|
|
@@ -162,7 +169,7 @@ export async function planGrokVideo(prompt, ctx, options = {}) {
|
|
|
162
169
|
referenceImageUrls,
|
|
163
170
|
continuityLineage: options.continuityLineage,
|
|
164
171
|
});
|
|
165
|
-
const { url, headers } = videoEndpoint(ctx, "/v1/chat/completions");
|
|
172
|
+
const { url, headers } = videoEndpoint(ctx, "/v1/chat/completions", options.directApiKey);
|
|
166
173
|
const { combinedSignal, timer } = withTimeoutSignal(options.signal, cfg.plannerTimeoutMs);
|
|
167
174
|
logEvent("grok", "video:planner:start", { requestId: options.requestId, mode, duration, resolution });
|
|
168
175
|
try {
|
|
@@ -212,7 +219,7 @@ export function buildVideoGenerationPayload(plan, opts) {
|
|
|
212
219
|
}
|
|
213
220
|
export async function startVideoRequest(ctx, payload, options) {
|
|
214
221
|
const cfg = videoConfig(ctx);
|
|
215
|
-
const { url, headers } = videoEndpoint(ctx, "/v1/videos/generations");
|
|
222
|
+
const { url, headers } = videoEndpoint(ctx, "/v1/videos/generations", options.directApiKey);
|
|
216
223
|
const { combinedSignal, timer } = withTimeoutSignal(options.signal, cfg.startTimeoutMs);
|
|
217
224
|
try {
|
|
218
225
|
const res = await fetch(url, { method: "POST", headers, body: JSON.stringify(payload), signal: combinedSignal });
|
|
@@ -251,9 +258,9 @@ export function normalizeVideoPoll(data) {
|
|
|
251
258
|
failedCode: data?.error?.code,
|
|
252
259
|
};
|
|
253
260
|
}
|
|
254
|
-
export async function pollVideoOnce(ctx, requestId, signal) {
|
|
261
|
+
export async function pollVideoOnce(ctx, requestId, signal, directApiKey) {
|
|
255
262
|
const cfg = videoConfig(ctx);
|
|
256
|
-
const { url, headers } = videoEndpoint(ctx, `/v1/videos/${requestId}
|
|
263
|
+
const { url, headers } = videoEndpoint(ctx, `/v1/videos/${requestId}`, directApiKey);
|
|
257
264
|
const { combinedSignal, timer } = withTimeoutSignal(signal, cfg.startTimeoutMs);
|
|
258
265
|
try {
|
|
259
266
|
const res = await fetch(url, { method: "GET", headers, signal: combinedSignal });
|
|
@@ -293,7 +300,7 @@ export async function pollVideoUntilDone(ctx, requestId, options) {
|
|
|
293
300
|
for (;;) {
|
|
294
301
|
if (Date.now() > deadline)
|
|
295
302
|
throw grokError("Grok video poll budget exceeded", 504, "GROK_VIDEO_TIMEOUT");
|
|
296
|
-
const poll = await pollVideoOnce(ctx, requestId, options.signal);
|
|
303
|
+
const poll = await pollVideoOnce(ctx, requestId, options.signal, options.directApiKey);
|
|
297
304
|
if (poll.status === "done")
|
|
298
305
|
return poll;
|
|
299
306
|
if (poll.status === "failed" || poll.status === "expired")
|