ima2-gen 2.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -11
- package/bin/commands/backfillThumbs.js +18 -0
- package/bin/commands/edit.js +7 -6
- package/bin/commands/gen.js +7 -6
- package/bin/commands/multimode.js +5 -4
- package/bin/commands/node.js +4 -4
- package/bin/ima2.js +7 -1
- package/bin/lib/config-store.js +1 -1
- package/docs/API.md +55 -4
- package/docs/CLI.md +9 -3
- package/docs/PROMPT_STUDIO.md +3 -1
- package/docs/migration/runtime-test-inventory.md +3 -1
- package/lib/agentRuntime.js +22 -16
- package/lib/agentSettings.js +1 -1
- package/lib/agyImageAdapter.js +232 -0
- package/lib/capabilities.js +2 -1
- package/lib/configKeys.js +1 -1
- package/lib/geminiApiImageAdapter.js +183 -0
- package/lib/grokImageAdapter.js +16 -9
- package/lib/grokMultimodeAdapter.js +2 -1
- package/lib/grokRuntime.js +3 -0
- package/lib/grokSizeMapper.js +13 -1
- package/lib/grokVideoAdapter.js +14 -7
- package/lib/historyList.js +18 -2
- package/lib/imageModels.js +15 -0
- package/lib/imageThumb.js +38 -0
- package/lib/providerOptions.js +36 -1
- package/lib/responsesFallback.js +52 -44
- package/lib/runtimeContext.js +27 -0
- package/lib/storageMigration.js +1 -1
- package/lib/thumbBackfill.js +59 -0
- package/lib/vertexAuth.js +44 -0
- package/lib/videoThumb.js +60 -0
- package/package.json +4 -2
- package/routes/auth.js +238 -0
- package/routes/edit.js +41 -7
- package/routes/generate.js +40 -12
- package/routes/history.js +13 -0
- package/routes/index.js +4 -0
- package/routes/keys.js +254 -0
- package/routes/multimode.js +39 -6
- package/routes/nodes.js +57 -35
- package/routes/quota.js +58 -7
- package/routes/video.js +7 -3
- package/server.js +123 -0
- package/ui/dist/.vite/manifest.json +12 -12
- package/ui/dist/assets/AgentWorkspace-CYv84Rus.js +3 -0
- package/ui/dist/assets/{CardNewsWorkspace-BN-ga1lG.js → CardNewsWorkspace-Dqyc1WZ1.js} +2 -2
- package/ui/dist/assets/{NodeCanvas-BbMa4IhI.js → NodeCanvas-ChEXzQbb.js} +2 -2
- package/ui/dist/assets/{PromptBuilderPanel-DRwBJRDQ.js → PromptBuilderPanel-B95ZufnR.js} +1 -1
- package/ui/dist/assets/{PromptImportDialog-Dp85kHCq.js → PromptImportDialog-DGOwFQET.js} +2 -2
- package/ui/dist/assets/{PromptImportDiscoverySection-BE8Q8MLD.js → PromptImportDiscoverySection-CgvdnR49.js} +1 -1
- package/ui/dist/assets/{PromptImportFolderSection-PtH5x0sc.js → PromptImportFolderSection-CfUye9J8.js} +1 -1
- package/ui/dist/assets/{PromptLibraryPanel-FnM9tHI9.js → PromptLibraryPanel-B9kndPw1.js} +2 -2
- package/ui/dist/assets/SettingsWorkspace-B3tgLrmF.js +1 -0
- package/ui/dist/assets/index-BhcvL0g-.js +1 -0
- package/ui/dist/assets/index-BtK3YhJc.js +39 -0
- package/ui/dist/assets/index-ClOLOjnA.css +1 -0
- package/ui/dist/index.html +2 -2
- package/ui/dist/assets/AgentWorkspace-C21zqdTZ.js +0 -3
- package/ui/dist/assets/SettingsWorkspace-MARPGyBL.js +0 -1
- package/ui/dist/assets/index-BAFI6htx.js +0 -42
- package/ui/dist/assets/index-BSXxr_Bt.js +0 -1
- package/ui/dist/assets/index-DS-ADE7U.css +0 -1
package/README.md
CHANGED
|
@@ -83,16 +83,6 @@ npm install -g ima2-gen@latest
|
|
|
83
83
|
|
|
84
84
|
Ctrl+C now performs a clean shutdown — closing the database, stopping child processes, and releasing file locks. On older versions (< 1.1.22) or if you see `EBUSY` on Windows, use the install script which handles stale process cleanup automatically.
|
|
85
85
|
|
|
86
|
-
## What's New in v1.1.22
|
|
87
|
-
|
|
88
|
-
- **Storyboard mode**: composer toggle for maintaining character/scene continuity across sequential frames. Works in both image and video pipelines.
|
|
89
|
-
- **Planner model selection**: choose the Grok planner model (grok-4.3 default) from video settings or via `--planner-model` CLI flag.
|
|
90
|
-
- **Video frame copy**: First/Mid/Last frame extraction buttons on video results for easy keyframe copying.
|
|
91
|
-
- **Multi-character dialogue**: video/image planners now identify characters by visual appearance (clothing + physique + props) instead of names, improving dialogue attribution.
|
|
92
|
-
- **Graceful shutdown**: Ctrl+C now properly closes DB, server sockets, and child processes — fixes Windows EBUSY on npm update.
|
|
93
|
-
- **Cross-platform install scripts**: one-click install for macOS, Windows, and Linux (auto-detects nvm/fnm/brew/winget).
|
|
94
|
-
- **Atomic sidecar writes**: metadata files now use temp+rename to prevent corruption on crash.
|
|
95
|
-
|
|
96
86
|
## What It Does
|
|
97
87
|
|
|
98
88
|
- **Classic mode**: generate, edit, reuse the current image, paste references, and continue from history.
|
|
@@ -109,11 +99,12 @@ Ctrl+C now performs a clean shutdown — closing the database, stopping child pr
|
|
|
109
99
|
|
|
110
100
|
## Provider Paths
|
|
111
101
|
|
|
112
|
-
Image generation can run through the local Codex/ChatGPT OAuth path, a configured OpenAI API key,
|
|
102
|
+
Image generation can run through the local Codex/ChatGPT OAuth path, a configured OpenAI API key, the bundled Grok provider, or the Gemini provider via Antigravity CLI.
|
|
113
103
|
|
|
114
104
|
- `provider: "oauth"` uses the local Codex OAuth proxy.
|
|
115
105
|
- `provider: "api"` calls the OpenAI Responses API with the hosted `image_generation` tool.
|
|
116
106
|
- `provider: "grok"` starts bundled `progrok` on `127.0.0.1:18645`, runs mandatory xAI Web Search plus a planner pass (default: `grok-4.3`, configurable in settings or via `--planner-model`), then calls xAI Images API through the local proxy.
|
|
107
|
+
- `provider: "agy"` spawns the Antigravity CLI (`agy -p`) to generate images via Google Gemini's `default_api:generate_image` tool (model: `nano-banana-2`). Output is fixed at 1024×1024 JPEG, max 3 reference images. No web search, quality, or size controls.
|
|
117
108
|
- API-key generation supports classic generate, edit, mask-guided edit, multimode, and node generation.
|
|
118
109
|
- Grok generation supports Classic, Node, and Agent flows. If a Classic reference, Node parent image, or Agent current image is present, ima2 switches the final Grok call to xAI image edit so image-to-image context is preserved.
|
|
119
110
|
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { config } from "../../config.js";
|
|
2
|
+
import { backfillThumbnails } from "../../lib/thumbBackfill.js";
|
|
3
|
+
import { invalidateHistoryIndex } from "../../lib/historyIndex.js";
|
|
4
|
+
export async function backfillThumbs() {
|
|
5
|
+
const dir = config.storage.generatedDir;
|
|
6
|
+
console.log(`[thumbs] Scanning ${dir} (recursive) for missing thumbnails...`);
|
|
7
|
+
let r;
|
|
8
|
+
try {
|
|
9
|
+
r = await backfillThumbnails(dir);
|
|
10
|
+
}
|
|
11
|
+
catch (e) {
|
|
12
|
+
console.error("[thumbs] Backfill failed:", e instanceof Error ? e.message : e);
|
|
13
|
+
return;
|
|
14
|
+
}
|
|
15
|
+
if (r.created > 0)
|
|
16
|
+
invalidateHistoryIndex();
|
|
17
|
+
console.log(`[thumbs] Done: ${r.created} created, ${r.skipped} skipped (already exist), ${r.failed} failed out of ${r.total} media files.`);
|
|
18
|
+
}
|
package/bin/commands/edit.js
CHANGED
|
@@ -7,8 +7,8 @@ import { createCliRequestId, recoverGeneratedOutputs, formatRecoveryHint } from
|
|
|
7
7
|
import { errInfo } from "../../lib/errInfo.js";
|
|
8
8
|
const VALID_MODES = new Set(["auto", "direct"]);
|
|
9
9
|
const VALID_MODERATION = new Set(["auto", "low"]);
|
|
10
|
-
const VALID_PROVIDERS = new Set(["auto", "oauth", "api", "grok"]);
|
|
11
|
-
const KNOWN_IMAGE_MODELS = new Set(["gpt-5.5", "gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex-spark", "grok-imagine-image", "grok-imagine-image-quality"]);
|
|
10
|
+
const VALID_PROVIDERS = new Set(["auto", "oauth", "api", "grok", "grok-api", "agy", "gemini-api"]);
|
|
11
|
+
const KNOWN_IMAGE_MODELS = new Set(["gpt-5.5", "gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex-spark", "grok-imagine-image", "grok-imagine-image-quality", "nano-banana-2", "nano-banana-pro"]);
|
|
12
12
|
const SPEC = {
|
|
13
13
|
flags: {
|
|
14
14
|
prompt: { short: "p", type: "string" },
|
|
@@ -40,8 +40,9 @@ const HELP = `
|
|
|
40
40
|
-s, --size <WxH>
|
|
41
41
|
-o, --out <file>
|
|
42
42
|
--json
|
|
43
|
-
--model <gpt-5.5|gpt-5.4|gpt-5.4-mini|grok-imagine-image|grok-imagine-image-quality>
|
|
44
|
-
--provider <auto|oauth|api|grok>
|
|
43
|
+
--model <gpt-5.5|gpt-5.4|gpt-5.4-mini|grok-imagine-image|grok-imagine-image-quality|nano-banana-2|nano-banana-pro>
|
|
44
|
+
--provider <auto|oauth|api|grok|grok-api|agy|gemini-api>
|
|
45
|
+
Provider (oauth = GPT OAuth; grok = xAI Grok; agy/gemini-api = Gemini)
|
|
45
46
|
--mode <auto|direct> Prompt handling mode. Default: auto
|
|
46
47
|
--moderation <auto|low> Default: low
|
|
47
48
|
--session <id> Apply session style sheet if enabled
|
|
@@ -64,10 +65,10 @@ export default async function editCmd(argv) {
|
|
|
64
65
|
if (!VALID_MODERATION.has(String(args.moderation)))
|
|
65
66
|
die(2, "--moderation must be one of: auto, low");
|
|
66
67
|
if (args.provider && !VALID_PROVIDERS.has(String(args.provider))) {
|
|
67
|
-
die(2, "--provider must be one of: auto, oauth, api, grok");
|
|
68
|
+
die(2, "--provider must be one of: auto, oauth, api, grok, grok-api, agy, gemini-api");
|
|
68
69
|
}
|
|
69
70
|
if (args.model && !KNOWN_IMAGE_MODELS.has(String(args.model))) {
|
|
70
|
-
die(2, "--model must be one of: gpt-5.5, gpt-5.4, gpt-5.4-mini, gpt-5.3-codex-spark, grok-imagine-image, grok-imagine-image-quality");
|
|
71
|
+
die(2, "--model must be one of: gpt-5.5, gpt-5.4, gpt-5.4-mini, gpt-5.3-codex-spark, grok-imagine-image, grok-imagine-image-quality, nano-banana-2, nano-banana-pro");
|
|
71
72
|
}
|
|
72
73
|
const VALID_REASONING = new Set(["none", "low", "medium", "high", "xhigh"]);
|
|
73
74
|
if (args["reasoning-effort"] && !VALID_REASONING.has(String(args["reasoning-effort"]))) {
|
package/bin/commands/gen.js
CHANGED
|
@@ -7,8 +7,8 @@ import { createCliRequestId, recoverGeneratedOutputs, formatRecoveryHint } from
|
|
|
7
7
|
import { errInfo } from "../../lib/errInfo.js";
|
|
8
8
|
const VALID_MODES = new Set(["auto", "direct"]);
|
|
9
9
|
const VALID_MODERATION = new Set(["auto", "low"]);
|
|
10
|
-
const VALID_PROVIDERS = new Set(["auto", "oauth", "api", "grok"]);
|
|
11
|
-
const KNOWN_IMAGE_MODELS = new Set(["gpt-5.5", "gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex-spark", "grok-imagine-image", "grok-imagine-image-quality"]);
|
|
10
|
+
const VALID_PROVIDERS = new Set(["auto", "oauth", "api", "grok", "grok-api", "agy", "gemini-api"]);
|
|
11
|
+
const KNOWN_IMAGE_MODELS = new Set(["gpt-5.5", "gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex-spark", "grok-imagine-image", "grok-imagine-image-quality", "nano-banana-2", "nano-banana-pro"]);
|
|
12
12
|
const SPEC = {
|
|
13
13
|
flags: {
|
|
14
14
|
quality: { short: "q", type: "string", default: "low" },
|
|
@@ -51,8 +51,9 @@ const HELP = `
|
|
|
51
51
|
--stdin Read prompt from stdin
|
|
52
52
|
--timeout <sec> Default: 180
|
|
53
53
|
--server <url> Override server URL
|
|
54
|
-
--model <gpt-5.5|gpt-5.4|gpt-5.4-mini|grok-imagine-image|grok-imagine-image-quality>
|
|
55
|
-
--provider <auto|oauth|api|grok>
|
|
54
|
+
--model <gpt-5.5|gpt-5.4|gpt-5.4-mini|grok-imagine-image|grok-imagine-image-quality|nano-banana-2|nano-banana-pro>
|
|
55
|
+
--provider <auto|oauth|api|grok|grok-api|agy|gemini-api>
|
|
56
|
+
Provider (oauth = GPT OAuth; grok = xAI Grok; agy/gemini-api = Gemini)
|
|
56
57
|
--mode <auto|direct> Prompt handling mode. Default: auto
|
|
57
58
|
--moderation <auto|low> Default: low
|
|
58
59
|
--session <id> Apply session style sheet if enabled
|
|
@@ -88,10 +89,10 @@ export default async function genCmd(argv) {
|
|
|
88
89
|
if (!VALID_MODERATION.has(String(args.moderation)))
|
|
89
90
|
die(2, "--moderation must be one of: auto, low");
|
|
90
91
|
if (args.provider && !VALID_PROVIDERS.has(String(args.provider))) {
|
|
91
|
-
die(2, "--provider must be one of: auto, oauth, api, grok");
|
|
92
|
+
die(2, "--provider must be one of: auto, oauth, api, grok, grok-api, agy, gemini-api");
|
|
92
93
|
}
|
|
93
94
|
if (args.model && !KNOWN_IMAGE_MODELS.has(String(args.model))) {
|
|
94
|
-
die(2, "--model must be one of: gpt-5.5, gpt-5.4, gpt-5.4-mini, gpt-5.3-codex-spark, grok-imagine-image, grok-imagine-image-quality");
|
|
95
|
+
die(2, "--model must be one of: gpt-5.5, gpt-5.4, gpt-5.4-mini, gpt-5.3-codex-spark, grok-imagine-image, grok-imagine-image-quality, nano-banana-2, nano-banana-pro");
|
|
95
96
|
}
|
|
96
97
|
const VALID_REASONING = new Set(["none", "low", "medium", "high", "xhigh"]);
|
|
97
98
|
if (args["reasoning-effort"] && !VALID_REASONING.has(String(args["reasoning-effort"]))) {
|
|
@@ -40,8 +40,9 @@ const HELP = `
|
|
|
40
40
|
-o, --out <file> First image (implies --max-images 1)
|
|
41
41
|
-d, --out-dir <dir> Output dir for multiple images
|
|
42
42
|
--json
|
|
43
|
-
--model <gpt-5.5|gpt-5.4|gpt-5.4-mini>
|
|
44
|
-
--provider <auto|oauth|api|grok>
|
|
43
|
+
--model <gpt-5.5|gpt-5.4|gpt-5.4-mini|grok-imagine-image|grok-imagine-image-quality|nano-banana-2|nano-banana-pro>
|
|
44
|
+
--provider <auto|oauth|api|grok|grok-api|agy|gemini-api>
|
|
45
|
+
Provider (oauth = GPT OAuth; grok = xAI Grok; agy/gemini-api = Gemini)
|
|
45
46
|
--mode <auto|direct> Prompt handling mode. Default: auto
|
|
46
47
|
--ref <file> Attach reference image (repeatable, max 5)
|
|
47
48
|
--reasoning-effort <none|low|medium|high|xhigh>
|
|
@@ -60,11 +61,11 @@ export default async function multimodeCmd(argv) {
|
|
|
60
61
|
const prompt = args.positional.join(" ");
|
|
61
62
|
if (!prompt)
|
|
62
63
|
die(2, "prompt required");
|
|
63
|
-
const VALID_PROVIDERS = new Set(["auto", "oauth", "api", "grok"]);
|
|
64
|
+
const VALID_PROVIDERS = new Set(["auto", "oauth", "api", "grok", "grok-api", "agy", "gemini-api"]);
|
|
64
65
|
const VALID_MODES = new Set(["auto", "direct"]);
|
|
65
66
|
const VALID_REASONING = new Set(["none", "low", "medium", "high", "xhigh"]);
|
|
66
67
|
if (args.provider && !VALID_PROVIDERS.has(String(args.provider))) {
|
|
67
|
-
die(2, "--provider must be one of: auto, oauth, api, grok");
|
|
68
|
+
die(2, "--provider must be one of: auto, oauth, api, grok, grok-api, agy, gemini-api");
|
|
68
69
|
}
|
|
69
70
|
if (!VALID_MODES.has(String(args.mode)))
|
|
70
71
|
die(2, "--mode must be one of: auto, direct");
|
package/bin/commands/node.js
CHANGED
|
@@ -8,11 +8,11 @@ const HELP = `
|
|
|
8
8
|
ima2 node <subcommand> [options]
|
|
9
9
|
|
|
10
10
|
Subcommands:
|
|
11
|
-
generate <prompt...> [--parent <nodeId>] [--ref <file>...] [--provider <auto|oauth|api|grok>] [--no-stream] [...gen-style flags]
|
|
11
|
+
generate <prompt...> [--parent <nodeId>] [--ref <file>...] [--provider <auto|oauth|api|grok|grok-api|agy|gemini-api>] [--no-stream] [...gen-style flags]
|
|
12
12
|
show <nodeId> [--json]
|
|
13
13
|
|
|
14
14
|
Generate options:
|
|
15
|
-
--provider <auto|oauth|api|grok> Provider for this request
|
|
15
|
+
--provider <auto|oauth|api|grok|grok-api|agy|gemini-api> Provider for this request
|
|
16
16
|
`;
|
|
17
17
|
const GEN_FLAGS = {
|
|
18
18
|
quality: { short: "q", type: "string", default: "low" },
|
|
@@ -58,10 +58,10 @@ async function generateSub(argv) {
|
|
|
58
58
|
if (!prompt)
|
|
59
59
|
die(2, "prompt required");
|
|
60
60
|
const refs = (Array.isArray(args.ref) ? args.ref : []);
|
|
61
|
-
const VALID_PROVIDERS = new Set(["auto", "oauth", "api", "grok"]);
|
|
61
|
+
const VALID_PROVIDERS = new Set(["auto", "oauth", "api", "grok", "grok-api", "agy", "gemini-api"]);
|
|
62
62
|
const VALID_REASONING = new Set(["none", "low", "medium", "high", "xhigh"]);
|
|
63
63
|
if (args.provider && !VALID_PROVIDERS.has(String(args.provider))) {
|
|
64
|
-
die(2, "--provider must be one of: auto, oauth, api, grok");
|
|
64
|
+
die(2, "--provider must be one of: auto, oauth, api, grok, grok-api, agy, gemini-api");
|
|
65
65
|
}
|
|
66
66
|
if (args["reasoning-effort"] && !VALID_REASONING.has(String(args["reasoning-effort"]))) {
|
|
67
67
|
die(2, "--reasoning-effort must be one of: none, low, medium, high, xhigh");
|
package/bin/ima2.js
CHANGED
|
@@ -288,6 +288,7 @@ function showHelp() {
|
|
|
288
288
|
cancel <id> Mark an in-flight job canceled (ima2 cancel --help)
|
|
289
289
|
inflight <sub> Inflight jobs (ls / rm) (ima2 inflight --help)
|
|
290
290
|
storage <sub> Storage status / open-dir (ima2 storage --help)
|
|
291
|
+
backfill-thumbs Generate missing thumbnails for gallery performance
|
|
291
292
|
billing API usage / quota
|
|
292
293
|
providers Configured providers
|
|
293
294
|
oauth <sub> GPT OAuth proxy status (ima2 oauth --help)
|
|
@@ -332,7 +333,7 @@ if (args.includes("-v") || args.includes("--version")) {
|
|
|
332
333
|
process.exit(0);
|
|
333
334
|
}
|
|
334
335
|
if ((!command || args.includes("-h") || args.includes("--help"))
|
|
335
|
-
&& !["doctor", "gen", "video", "edit", "ls", "show", "ps", "cancel", "session", "history", "prompt", "multimode", "node", "annotate", "canvas-versions", "metadata", "comfy", "cardnews", "inflight", "storage", "billing", "providers", "oauth", "grok", "config", "defaults", "capabilities", "skill", "ping"].includes(command)) {
|
|
336
|
+
&& !["doctor", "gen", "video", "edit", "ls", "show", "ps", "cancel", "session", "history", "prompt", "multimode", "node", "annotate", "canvas-versions", "metadata", "comfy", "cardnews", "inflight", "storage", "billing", "providers", "oauth", "grok", "config", "defaults", "capabilities", "skill", "ping", "backfill-thumbs"].includes(command)) {
|
|
336
337
|
showHelp();
|
|
337
338
|
process.exit(command ? 0 : 1);
|
|
338
339
|
}
|
|
@@ -406,6 +407,11 @@ switch (command) {
|
|
|
406
407
|
await mod.default(args.slice(1));
|
|
407
408
|
break;
|
|
408
409
|
}
|
|
410
|
+
case "backfill-thumbs": {
|
|
411
|
+
const { backfillThumbs } = await import("./commands/backfillThumbs.js");
|
|
412
|
+
await backfillThumbs();
|
|
413
|
+
break;
|
|
414
|
+
}
|
|
409
415
|
case "storage":
|
|
410
416
|
case "billing":
|
|
411
417
|
case "providers":
|
package/bin/lib/config-store.js
CHANGED
|
@@ -102,7 +102,7 @@ export function envOverrideForKey(key) {
|
|
|
102
102
|
return { envVar, value: String(process.env[envVar]) };
|
|
103
103
|
}
|
|
104
104
|
export function displayPath(p) {
|
|
105
|
-
const home = process.env.HOME || "";
|
|
105
|
+
const home = process.env.HOME || process.env.USERPROFILE || "";
|
|
106
106
|
return home && p.startsWith(home) ? p.replace(home, "~") : p;
|
|
107
107
|
}
|
|
108
108
|
export function restartNotice() {
|
package/docs/API.md
CHANGED
|
@@ -10,11 +10,14 @@ http://localhost:3333
|
|
|
10
10
|
|
|
11
11
|
## Provider Policy
|
|
12
12
|
|
|
13
|
-
Image generation supports OAuth, API-key, and
|
|
13
|
+
Image generation supports OAuth, API-key, Grok, and Gemini (agy) providers.
|
|
14
14
|
|
|
15
15
|
- `provider: "oauth"` uses the local Codex OAuth proxy.
|
|
16
16
|
- `provider: "api"` uses the OpenAI Responses API with the hosted `image_generation` tool.
|
|
17
17
|
- `provider: "grok"` uses the bundled progrok xAI proxy. Classic, Node, and Agent generation run mandatory xAI Web Search through `/v1/responses`, then run a `grok-4.3` planner call with a forced local `generate_image` function, then ima2 executes xAI `/v1/images/generations`. If reference images, a Node parent image, or an Agent current image are attached, the final step switches to xAI `/v1/images/edits` so image-to-image context is preserved.
|
|
18
|
+
- `provider: "agy"` spawns the Antigravity CLI (`agy -p`) to generate images via Google Gemini's `default_api:generate_image` tool. Model is `nano-banana-2`. Output is fixed at 1024×1024 JPEG. Max 3 reference images (i2i). No web search, quality, size, or mask controls. Multimode returns a single image. Video is unsupported (`AGY_VIDEO_UNSUPPORTED`).
|
|
19
|
+
- `provider: "grok-api"` uses a direct xAI API key instead of the bundled progrok OAuth proxy. Same pipeline as `grok` (Web Search → planner → `/v1/images/generations`), same aspect ratio and resolution options. Requires an xAI API key configured via the web UI key management or `XAI_API_KEY` env var. Also supports video generation.
|
|
20
|
+
- `provider: "gemini-api"` calls the Google Generative Language API directly (or Vertex AI with a service account JSON). Supports models `nano-banana-2` (Gemini 3.1 Flash Image) and `nano-banana-pro` (Gemini 3 Pro Image). Supports variable aspect ratios and resolutions (512px–4K). Requires a `GEMINI_API_KEY` env var, web UI key management, or a Vertex AI service account JSON. No web search or mask controls.
|
|
18
21
|
- API-key generation covers classic generate, edit, mask-guided edit, multimode, and node generation.
|
|
19
22
|
- If `provider: "api"` is requested without an API key, routes fail before upstream with `401` and `API_KEY_REQUIRED`.
|
|
20
23
|
- Grok generation maps `size` to xAI `aspect_ratio` and `resolution`; it does not send an OpenAI-style `size` field upstream. Grok edit uses xAI `/v1/images/edits`; Grok mask edit remains unsupported and returns `GROK_MASK_UNSUPPORTED`.
|
|
@@ -100,7 +103,8 @@ Text-to-image and reference-guided root generation.
|
|
|
100
103
|
"provider": "oauth",
|
|
101
104
|
"model": "gpt-5.4",
|
|
102
105
|
"references": [],
|
|
103
|
-
"requestId": "optional-client-id"
|
|
106
|
+
"requestId": "optional-client-id",
|
|
107
|
+
"storyboard": false
|
|
104
108
|
}
|
|
105
109
|
```
|
|
106
110
|
|
|
@@ -108,6 +112,9 @@ Supported quality values: `low`, `medium`, `high`.
|
|
|
108
112
|
|
|
109
113
|
Supported moderation values: `auto`, `low`.
|
|
110
114
|
|
|
115
|
+
When `storyboard` is `true`, the server prepends storyboard keyframe instructions so image
|
|
116
|
+
generations maintain character and scene continuity for multi-shot video production.
|
|
117
|
+
|
|
111
118
|
Recommended model: `gpt-5.4`. Current app default: `gpt-5.4-mini`. `gpt-5.5` is the strongest quality option when supported, but callers should expect higher quota pressure and possible Codex CLI/backend capability requirements.
|
|
112
119
|
|
|
113
120
|
When `provider` is `"grok"`, supported models are `grok-imagine-image` and
|
|
@@ -267,13 +274,17 @@ Generate a video via the Grok video provider. Returns Server-Sent Events.
|
|
|
267
274
|
| `referenceFilenames` | string[] | — | Existing generated files for reference-to-video |
|
|
268
275
|
| `continueFromVideo` | string | — | Generated `.mp4` parent; server extracts its last frame and rebuilds lineage from sidecar |
|
|
269
276
|
| `continuityLineage` | object | — | Optional client hint; used only when `continueFromVideo` is absent |
|
|
277
|
+
| `plannerModel` | string | `grok-4.3` | Grok video planner model override (also via settings UI or `IMA2_GROK_PLANNER_MODEL`) |
|
|
278
|
+
| `storyboard` | boolean | `false` | Enable storyboard mode — maintains character/scene continuity across sequential clips |
|
|
270
279
|
|
|
271
280
|
Blank prompts return `PROMPT_REQUIRED` with a `guidance` string. The active
|
|
272
281
|
prompt should describe visual flow, motion flow, sound/music/no-music,
|
|
273
282
|
dialogue/no-dialogue, ending frame, and duration pacing. The video planner uses
|
|
274
283
|
the selected duration as the full clip runtime and expands short requests into a
|
|
275
284
|
production-level sequence with opening composition, connected motion/emotion
|
|
276
|
-
change, and a stable ending frame suitable for continuation.
|
|
285
|
+
change, and a stable ending frame suitable for continuation. For multi-character
|
|
286
|
+
scenes, the planner identifies speakers by visual appearance (clothing, physique,
|
|
287
|
+
position, props) rather than names, and attributes each dialogue line accordingly.
|
|
277
288
|
|
|
278
289
|
When `continueFromVideo` is present, the server treats the generated `.mp4`
|
|
279
290
|
sidecar as authoritative. Client `continuityLineage` cannot override it. The
|
|
@@ -313,7 +324,7 @@ Grok prompt surfaces used by video APIs:
|
|
|
313
324
|
|
|
314
325
|
| Surface | Model | Responsibility |
|
|
315
326
|
|---|---|---|
|
|
316
|
-
| Video planner | `grok-4.3` | Converts user prompt, search context, refs, and optional continuity lineage into the final English video prompt. It must structure core subject, action/motion, camera/composition, environment/style, dialogue/audio, ending-frame handoff, and constraints. |
|
|
327
|
+
| Video planner | `grok-4.3` (override via `plannerModel`) | Converts user prompt, search context, refs, and optional continuity lineage into the final English video prompt. It must structure core subject, action/motion, camera/composition, environment/style, dialogue/audio, ending-frame handoff, and constraints. Multi-character dialogue uses appearance-based speaker identification. |
|
|
317
328
|
| Video generation | xAI video model | Receives the planner prompt plus `sourceImage` or `referenceImages` when present. |
|
|
318
329
|
| Video analysis | `grok-4.3` | Reads first/last frame images from `/api/video/analyze` and returns recreation/continuation guidance. |
|
|
319
330
|
|
|
@@ -461,6 +472,44 @@ Style-sheet extraction can require an API key/openai client. Image generation al
|
|
|
461
472
|
| `GROK_RATE_LIMITED` | xAI returned a rate-limit response through progrok |
|
|
462
473
|
| `GROK_AUTH_FAILED` | progrok could not authenticate the xAI request |
|
|
463
474
|
| `GROK_SEARCH_TIMEOUT` / `GROK_PLANNER_TIMEOUT` / `GROK_IMAGE_TIMEOUT` | The Grok search, planner, or image API step exceeded its timeout budget |
|
|
475
|
+
| `AGY_GENERATION_FAILED` | Gemini (agy) image generation failed |
|
|
476
|
+
| `AGY_TIMEOUT` | Agy CLI process exceeded its 360-second timeout |
|
|
477
|
+
| `AGY_PROCESS_ERROR` | Agy CLI binary failed to start or crashed |
|
|
478
|
+
| `AGY_QUOTA_EXHAUSTED` | Gemini API quota exhausted (rate limit) |
|
|
479
|
+
| `AGY_PARSE_FAILED` | Could not parse artifact path from agy output |
|
|
480
|
+
| `AGY_ARTIFACT_NOT_FOUND` | Agy reported an artifact path that does not exist |
|
|
481
|
+
| `AGY_PATH_REJECTED` | Agy artifact path was outside allowed directories |
|
|
482
|
+
| `AGY_VIDEO_UNSUPPORTED` | Video generation is not supported by the Gemini (agy) provider |
|
|
483
|
+
| `AGY_MASK_UNSUPPORTED` | Mask-based editing is not supported by the Gemini (agy) provider |
|
|
484
|
+
| `AGY_REF_TOO_MANY` | Too many reference images for agy (max 3) |
|
|
485
|
+
| `GEMINI_API_KEY_MISSING` | Gemini API key or Vertex AI credentials not configured |
|
|
486
|
+
| `GEMINI_API_RATE_LIMITED` | Gemini API rate limited (429) |
|
|
487
|
+
| `GEMINI_API_BAD_REQUEST` | Gemini API bad request (400/403) |
|
|
488
|
+
| `GEMINI_API_SAFETY_BLOCKED` | Gemini API generation blocked by safety filter |
|
|
489
|
+
| `GEMINI_API_NO_IMAGE` | Gemini API returned no image in response |
|
|
490
|
+
| `VIDEO_PROVIDER_UNSUPPORTED` | Video generation requires provider `"grok"` or `"grok-api"` |
|
|
491
|
+
|
|
492
|
+
## Key Management
|
|
493
|
+
|
|
494
|
+
API key management endpoints for configuring provider credentials at runtime through the web UI or HTTP API.
|
|
495
|
+
|
|
496
|
+
| Endpoint | Method | Description |
|
|
497
|
+
|---|---|---|
|
|
498
|
+
| `/api/keys/status` | GET | Returns configured/valid/maskedKey status for all providers (openai, xai, gemini, vertex) |
|
|
499
|
+
| `/api/keys/:provider` | PUT | Save an API key. Body: `{ "apiKey": "..." }`. Validates key format and upstream before saving to config.json. Provider: `openai`, `xai`, or `gemini`. |
|
|
500
|
+
| `/api/keys/:provider` | DELETE | Remove a config-sourced API key. Env-sourced keys cannot be removed (`ENV_KEY_IMMUTABLE`). |
|
|
501
|
+
| `/api/keys/vertex` | PUT | Save a Vertex AI service account JSON. Body: `{ "serviceAccountJson": "..." }`. Validates JSON structure (`type: "service_account"`, `project_id` required). |
|
|
502
|
+
| `/api/keys/vertex` | DELETE | Remove a config-sourced Vertex AI service account. |
|
|
503
|
+
|
|
504
|
+
Keys saved via PUT are stored in `config.json` and hot-updated in the runtime context (no server restart required). Keys loaded from environment variables (`OPENAI_API_KEY`, `XAI_API_KEY`, `GEMINI_API_KEY`, `VERTEX_SERVICE_ACCOUNT_JSON`) take precedence and are immutable through the API.
|
|
505
|
+
|
|
506
|
+
## Thumbnail Backfill
|
|
507
|
+
|
|
508
|
+
| Endpoint | Method | Description |
|
|
509
|
+
|---|---|---|
|
|
510
|
+
| `/api/history/backfill-thumbnails` | POST | Generate missing `.thumb.jpg` thumbnails for all images and videos in the generated directory. Returns `{ ok, total, created, skipped, failed }`. Also available offline via `ima2 backfill-thumbs`. |
|
|
511
|
+
|
|
512
|
+
Thumbnails are also generated automatically on server startup for any media files that lack them.
|
|
464
513
|
|
|
465
514
|
## Endpoint → CLI Mapping
|
|
466
515
|
|
|
@@ -499,6 +548,8 @@ Most server routes under `/api/*` have a CLI wrapper. The exception is **Agent M
|
|
|
499
548
|
| `GET /api/billing` / `GET /api/providers` / `GET /api/oauth/status` / `GET /api/grok/status` | `ima2 billing` / `ima2 providers` / `ima2 oauth status` / `ima2 grok status` |
|
|
500
549
|
| `GET /api/health` | `ima2 ping` |
|
|
501
550
|
| `GET /api/capabilities` | `ima2 capabilities` |
|
|
551
|
+
| `POST /api/history/backfill-thumbnails` | `ima2 backfill-thumbs` |
|
|
552
|
+
| `GET /api/keys/status`, `PUT/DELETE /api/keys/:provider`, `PUT/DELETE /api/keys/vertex` | Web UI only (Settings > API Keys) |
|
|
502
553
|
| `GET/POST/PATCH/DELETE /api/agent/*` (sessions, turns, queue) | — (Agent Mode; web UI only, no CLI) |
|
|
503
554
|
| `POST /api/prompt-builder/chat` | `ima2 prompt build` |
|
|
504
555
|
|
package/docs/CLI.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# CLI Reference
|
|
2
2
|
|
|
3
|
-
Most server routes under `/api/*` have a CLI wrapper; Agent Mode (`/api/agent/*`) is web-UI-only and has no `ima2` subcommand. The prompt builder HTTP route (`POST /api/prompt-builder/chat`) is available through `ima2 prompt build`. The CLI is a thin shell over the local server, so most commands require a running `ima2 serve` (the few exceptions — `serve`, `setup`, `doctor`, `status`, `open`, `reset`, `config`, `grok`, `skill`, `capabilities`, and local `defaults` inspection — work without a live server).
|
|
3
|
+
Most server routes under `/api/*` have a CLI wrapper; Agent Mode (`/api/agent/*`) is web-UI-only and has no `ima2` subcommand. The prompt builder HTTP route (`POST /api/prompt-builder/chat`) is available through `ima2 prompt build`. The CLI is a thin shell over the local server, so most commands require a running `ima2 serve` (the few exceptions — `serve`, `setup`, `doctor`, `status`, `open`, `reset`, `config`, `grok`, `skill`, `capabilities`, `backfill-thumbs`, and local `defaults` inspection — work without a live server).
|
|
4
4
|
|
|
5
5
|
For a quick start, see the [main README](../README.md). For endpoint mapping, see [API.md](API.md).
|
|
6
6
|
|
|
@@ -16,6 +16,7 @@ For a quick start, see the [main README](../README.md). For endpoint mapping, se
|
|
|
16
16
|
| `ima2 open` | Open the web UI in a browser |
|
|
17
17
|
| `ima2 grok login/status/models/proxy` | Manage the bundled progrok runtime used by the Grok provider |
|
|
18
18
|
| `ima2 reset` | Remove saved config |
|
|
19
|
+
| `ima2 backfill-thumbs` | Generate missing gallery thumbnails for images and videos (offline, no running server needed) |
|
|
19
20
|
|
|
20
21
|
## Common flags
|
|
21
22
|
|
|
@@ -53,13 +54,14 @@ Agents should start from the packaged skill and capability commands instead of g
|
|
|
53
54
|
| `ima2 node generate` | Node-mode generate (SSE; supports `--no-stream`) |
|
|
54
55
|
| `ima2 node show <nodeId>` | Read node metadata |
|
|
55
56
|
|
|
56
|
-
Generation flags include `--provider <auto|oauth|api|grok>`, `--reasoning-effort {none\|low\|medium\|high\|xhigh}`, `--web-search` / `--no-web-search`, `--model`, `--mode`, `--moderation`, `--ref <file>` (repeatable, up to 5 where supported), `-q low|medium|high`, `-n <count>`, `-o <file>`.
|
|
57
|
+
Generation flags include `--provider <auto|oauth|api|grok|grok-api|agy|gemini-api>`, `--reasoning-effort {none\|low\|medium\|high\|xhigh}`, `--web-search` / `--no-web-search`, `--model`, `--mode`, `--moderation`, `--ref <file>` (repeatable, up to 5 where supported), `-q low|medium|high`, `-n <count>`, `-o <file>`.
|
|
57
58
|
|
|
58
59
|
Provider override semantics:
|
|
59
60
|
|
|
60
61
|
- `api` forces the API-key Responses path and requires a configured API key.
|
|
61
62
|
- `oauth` forces the local OAuth proxy path.
|
|
62
63
|
- `grok` uses the bundled progrok xAI proxy (`127.0.0.1:18645`). Classic generation first runs mandatory xAI Web Search through Responses API, then asks `grok-4.3` to call ima2's local `generate_image` tool, then ima2 executes xAI `/v1/images/generations`. If `--ref` images are attached, the final step uses xAI `/v1/images/edits` instead so image-to-image/reference context is preserved. Models: `grok-imagine-image`, `grok-imagine-image-quality`. Size is mapped to xAI `aspect_ratio` and `resolution`; the UI web-search toggle is OpenAI-provider-only because Grok search is always on in this path.
|
|
64
|
+
- `agy` spawns the Antigravity CLI to generate via Google Gemini (`nano-banana-2`). Fixed 1024×1024 JPEG output, max 3 refs. No web search, quality, size, or mask controls.
|
|
63
65
|
- `auto` preserves route default behavior and currently resolves to GPT OAuth unless server routing changes.
|
|
64
66
|
|
|
65
67
|
`ima2 serve` starts the bundled Grok proxy automatically. No separate `progrok`
|
|
@@ -105,7 +107,7 @@ mockup`.
|
|
|
105
107
|
For dense or critical text, keep the text large and explicit. Exact placement,
|
|
106
108
|
small text, and pixel-perfect typography can still need iteration or post-editing.
|
|
107
109
|
|
|
108
|
-
Multimode-specific flags include `--max-images <1..8>`, `--ref <file>` (repeatable, max 5), `--mode <auto|direct>`, `--provider <auto|oauth|api|grok>`, and `--show-partial`. `ima2 edit --mask` remains intentionally deferred to #31 because current mask plumbing is guided edit rather than guaranteed true masked/inpaint semantics.
|
|
110
|
+
Multimode-specific flags include `--max-images <1..8>`, `--ref <file>` (repeatable, max 5), `--mode <auto|direct>`, `--provider <auto|oauth|api|grok|grok-api|agy|gemini-api>`, and `--show-partial`. `ima2 edit --mask` remains intentionally deferred to #31 because current mask plumbing is guided edit rather than guaranteed true masked/inpaint semantics.
|
|
109
111
|
|
|
110
112
|
## Video
|
|
111
113
|
|
|
@@ -126,6 +128,8 @@ Video generate flags:
|
|
|
126
128
|
| `--resolution <480p\|720p>` | Video resolution (default: 480p) |
|
|
127
129
|
| `--aspect-ratio <ratio\|auto>` | 1:1, 16:9, 9:16, 4:3, 3:4, 3:2, 2:3, auto (default: auto) |
|
|
128
130
|
| `--model <name>` | `grok-imagine-video` or `grok-imagine-video-1.5-preview` |
|
|
131
|
+
| `--planner-model <name>` | Grok planner override (default: `grok-4.3`; also in settings UI and `IMA2_GROK_PLANNER_MODEL`) |
|
|
132
|
+
| `--storyboard` | Enable storyboard mode — maintains character/scene continuity across sequential clips |
|
|
129
133
|
| `--ref <file>` | Attach source/reference image (repeatable, max 7) |
|
|
130
134
|
| `-o, --out <file>` | Output file path |
|
|
131
135
|
| `-d, --out-dir <dir>` | Output directory |
|
|
@@ -160,6 +164,8 @@ Video continue flags:
|
|
|
160
164
|
| `--aspect-ratio <ratio\|auto>` | New clip aspect ratio |
|
|
161
165
|
| `--model <name>` | Optional video generation model |
|
|
162
166
|
|
|
167
|
+
Video continue also accepts `--planner-model` and `--storyboard`.
|
|
168
|
+
|
|
163
169
|
Video mode is auto-detected from `--ref` count:
|
|
164
170
|
|
|
165
171
|
| Refs | Mode |
|
package/docs/PROMPT_STUDIO.md
CHANGED
|
@@ -12,10 +12,12 @@ you want a reproducible way to report a workspace issue.
|
|
|
12
12
|
| Area | What it does | Notes |
|
|
13
13
|
|---|---|---|
|
|
14
14
|
| Composer | Holds the prompt for the next request. | Selecting an existing image is view-only. It should not overwrite the composer. |
|
|
15
|
+
| Storyboard | Maintains character and scene continuity across sequential frames. | Toggle in the composer. Works for image and video generation; image keyframes are composed for video production. |
|
|
15
16
|
| Multimode | Starts several separate image requests from the current prompt. | Each slot is a candidate output, not a collage panel or a guaranteed scene sequence. |
|
|
16
17
|
| 1:1 Direct | Sends the prompt through with less rewriting by the app. | Use it for exact wording, strict prompt experiments, or provider-side prompt syntax. |
|
|
17
18
|
| Model quick menu | Changes the image model and reasoning effort from the sidebar header. | The full Settings workspace remains the detailed configuration page. |
|
|
18
|
-
| Recent generations | Shows the visible Prompt Studio history domain. | Arrow keys move inside the same visible recent domain instead of hidden older rows. Video items render as video thumbnails. Drag any thumbnail to the composer to add it as a reference image. |
|
|
19
|
+
| Recent generations | Shows the visible Prompt Studio history domain. | Arrow keys move inside the same visible recent domain instead of hidden older rows. Video items render as video thumbnails. Drag any thumbnail to the composer to add it as a reference image. Video results expose First, Mid, and Last frame buttons to copy keyframes. |
|
|
20
|
+
| Video settings | Controls Grok video duration, resolution, aspect ratio, and planner model. | Default planner model is `grok-4.3`; override per request when needed. |
|
|
19
21
|
| Gallery | Browses saved local images, All/Favorites tabs, and folders. | Favorite toggles should preserve the gallery viewport you were browsing. |
|
|
20
22
|
| Prompt library | Imports saved prompt text into the composer intentionally. | Library insert/continue actions are explicit prompt imports; passive image selection is not. |
|
|
21
23
|
|
|
@@ -4,7 +4,7 @@ Generated by `npm run test:inventory` (script: `scripts/classify-tests.mjs`).
|
|
|
4
4
|
|
|
5
5
|
_Tests considered "runtime-importing" if they import from `../lib/`, `../routes/`, `../bin/`, `../server`, or `../config`._
|
|
6
6
|
|
|
7
|
-
Total:
|
|
7
|
+
Total: 177 (runtime: 61, contract: 116)
|
|
8
8
|
|
|
9
9
|
## Runtime-importing tests
|
|
10
10
|
- `tests/agent-mode-auto-planner-contract.test.ts`
|
|
@@ -64,6 +64,7 @@ Total: 175 (runtime: 60, contract: 115)
|
|
|
64
64
|
- `tests/star-prompt.test.ts`
|
|
65
65
|
- `tests/storage-migration.test.ts`
|
|
66
66
|
- `tests/style-sheet.test.ts`
|
|
67
|
+
- `tests/thumb-backfill.test.ts`
|
|
67
68
|
- `tests/videoContinuity.test.ts`
|
|
68
69
|
- `tests/videoExtendedRoute.test.ts`
|
|
69
70
|
- `tests/videoRoute.test.ts`
|
|
@@ -155,6 +156,7 @@ Total: 175 (runtime: 60, contract: 115)
|
|
|
155
156
|
- `tests/node-layout-contract.test.js`
|
|
156
157
|
- `tests/node-pending-recovery-contract.test.js`
|
|
157
158
|
- `tests/node-regen-actions-contract.test.js`
|
|
159
|
+
- `tests/node-session-evaporation-contract.test.js`
|
|
158
160
|
- `tests/node-ui-contract.test.js`
|
|
159
161
|
- `tests/oauth-masked-edit-contract.test.js`
|
|
160
162
|
- `tests/oauth-proxy-edit-mask-contract.test.js`
|
package/lib/agentRuntime.js
CHANGED
|
@@ -10,6 +10,7 @@ import { detectImageMimeFromB64 } from "./refs.js";
|
|
|
10
10
|
import { resolveProviderOptions } from "./providerOptions.js";
|
|
11
11
|
import { generateViaResponses } from "./responsesImageAdapter.js";
|
|
12
12
|
import { generateViaGrok } from "./grokImageAdapter.js";
|
|
13
|
+
import { generateViaAgy } from "./agyImageAdapter.js";
|
|
13
14
|
import { generateVideoViaGrok } from "./grokVideoAdapter.js";
|
|
14
15
|
import { parseVideoParams } from "./agentGenerationPlanner.js";
|
|
15
16
|
import { appendAgentTurn, buildImageContextManifest, getAgentImages, getAgentSession, importAgentImage, recordAgentWebFinding, restartAgentRuntimeSession, } from "./agentStore.js";
|
|
@@ -46,7 +47,7 @@ export async function runAgentGenerationPlan(ctx, sessionId, prompt, plan, optio
|
|
|
46
47
|
const session = getAgentSession(sessionId);
|
|
47
48
|
if (!session)
|
|
48
49
|
throw notFound(sessionId);
|
|
49
|
-
const webSearchEnabled = options.provider === "grok" ? true : options.webSearchEnabled ?? session.webSearchEnabled;
|
|
50
|
+
const webSearchEnabled = options.provider === "agy" ? false : options.provider === "grok" ? true : options.webSearchEnabled ?? session.webSearchEnabled;
|
|
50
51
|
const enabledTools = webSearchEnabled
|
|
51
52
|
? [...AGENT_ALLOWED_TOOLS]
|
|
52
53
|
: ["ima2.get_image_context", "ima2.generate_image", "ima2.generate_video"];
|
|
@@ -234,21 +235,26 @@ async function generateAgentImage(ctx, sessionId, prompt, manifest, webSearchEna
|
|
|
234
235
|
const effectiveModel = activeProvider === "grok" && options.quality === "high"
|
|
235
236
|
? "grok-imagine-image-quality"
|
|
236
237
|
: providerOptions.model;
|
|
237
|
-
const response = activeProvider === "
|
|
238
|
-
? await
|
|
239
|
-
model: effectiveModel,
|
|
240
|
-
size: providerOptions.size,
|
|
238
|
+
const response = activeProvider === "agy"
|
|
239
|
+
? await generateViaAgy(`${manifest}\n\nUser request:\n${prompt}`, {
|
|
241
240
|
requestId,
|
|
242
241
|
signal: options.signal ?? undefined,
|
|
243
|
-
references: await loadAgentCurrentImageReferences(ctx, sessionId),
|
|
244
242
|
})
|
|
245
|
-
:
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
243
|
+
: activeProvider === "grok"
|
|
244
|
+
? await generateViaGrok(`${manifest}\n\nUser request:\n${prompt}`, ctx, {
|
|
245
|
+
model: effectiveModel,
|
|
246
|
+
size: providerOptions.size,
|
|
247
|
+
requestId,
|
|
248
|
+
signal: options.signal ?? undefined,
|
|
249
|
+
references: await loadAgentCurrentImageReferences(ctx, sessionId),
|
|
250
|
+
})
|
|
251
|
+
: await generateViaResponses(activeProvider, `${manifest}\n\nUser request:\n${prompt}`, options.quality ?? "medium", providerOptions.size, options.moderation ?? "low", [], requestId, "auto", ctx, {
|
|
252
|
+
model: providerOptions.model,
|
|
253
|
+
reasoningEffort: providerOptions.reasoningEffort,
|
|
254
|
+
webSearchEnabled,
|
|
255
|
+
signal: options.signal,
|
|
256
|
+
});
|
|
257
|
+
const format = activeProvider === "grok" || activeProvider === "agy"
|
|
252
258
|
? imageFormatFromMime(("mime" in response ? response.mime : undefined) || detectImageMimeFromB64(response.b64) || "image/jpeg")
|
|
253
259
|
: options.format ?? "png";
|
|
254
260
|
const image = await persistAgentImage(ctx, sessionId, prompt, format, requestId, response, {
|
|
@@ -430,13 +436,13 @@ async function persistAgentVideo(ctx, sessionId, prompt, requestId, result) {
|
|
|
430
436
|
function recordSearchFindings(sessionId, prompt, count, provider) {
|
|
431
437
|
if (!count)
|
|
432
438
|
return [];
|
|
433
|
-
const
|
|
439
|
+
const providerLabel = provider === "grok" ? "Grok" : provider === "agy" ? "Gemini" : "Responses";
|
|
434
440
|
return [
|
|
435
441
|
recordAgentWebFinding({
|
|
436
442
|
sessionId,
|
|
437
443
|
query: prompt,
|
|
438
|
-
title:
|
|
439
|
-
snippet: `${
|
|
444
|
+
title: `${providerLabel} visual research`,
|
|
445
|
+
snippet: `${providerLabel} reported ${count} web search call${count === 1 ? "" : "s"}.`,
|
|
440
446
|
}),
|
|
441
447
|
];
|
|
442
448
|
}
|
package/lib/agentSettings.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
const PROVIDERS = new Set(["oauth", "api", "grok"]);
|
|
1
|
+
const PROVIDERS = new Set(["oauth", "api", "grok", "grok-api", "agy", "gemini-api"]);
|
|
2
2
|
const QUALITIES = new Set(["low", "medium", "high"]);
|
|
3
3
|
const FORMATS = new Set(["png", "jpeg", "webp"]);
|
|
4
4
|
const MODERATIONS = new Set(["auto", "low"]);
|