npm - ima2-gen - Versions diffs - 2.0.0 → 2.0.1 - Mend

ima2-gen 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

package/README.md +2 -11
package/bin/commands/backfillThumbs.js +18 -0
package/bin/commands/edit.js +7 -6
package/bin/commands/gen.js +7 -6
package/bin/commands/multimode.js +5 -4
package/bin/commands/node.js +4 -4
package/bin/ima2.js +7 -1
package/bin/lib/config-store.js +1 -1
package/docs/API.md +55 -4
package/docs/CLI.md +9 -3
package/docs/PROMPT_STUDIO.md +3 -1
package/docs/migration/runtime-test-inventory.md +3 -1
package/lib/agentRuntime.js +22 -16
package/lib/agentSettings.js +1 -1
package/lib/agyImageAdapter.js +232 -0
package/lib/capabilities.js +2 -1
package/lib/configKeys.js +1 -1
package/lib/geminiApiImageAdapter.js +183 -0
package/lib/grokImageAdapter.js +16 -9
package/lib/grokMultimodeAdapter.js +2 -1
package/lib/grokRuntime.js +3 -0
package/lib/grokSizeMapper.js +13 -1
package/lib/grokVideoAdapter.js +14 -7
package/lib/historyList.js +18 -2
package/lib/imageModels.js +15 -0
package/lib/imageThumb.js +38 -0
package/lib/providerOptions.js +36 -1
package/lib/responsesFallback.js +52 -44
package/lib/runtimeContext.js +27 -0
package/lib/storageMigration.js +1 -1
package/lib/thumbBackfill.js +59 -0
package/lib/vertexAuth.js +44 -0
package/lib/videoThumb.js +60 -0
package/package.json +4 -2
package/routes/auth.js +238 -0
package/routes/edit.js +41 -7
package/routes/generate.js +40 -12
package/routes/history.js +13 -0
package/routes/index.js +4 -0
package/routes/keys.js +254 -0
package/routes/multimode.js +39 -6
package/routes/nodes.js +57 -35
package/routes/quota.js +58 -7
package/routes/video.js +7 -3
package/server.js +123 -0
package/ui/dist/.vite/manifest.json +12 -12
package/ui/dist/assets/AgentWorkspace-CYv84Rus.js +3 -0
package/ui/dist/assets/{CardNewsWorkspace-BN-ga1lG.js → CardNewsWorkspace-Dqyc1WZ1.js} +2 -2
package/ui/dist/assets/{NodeCanvas-BbMa4IhI.js → NodeCanvas-ChEXzQbb.js} +2 -2
package/ui/dist/assets/{PromptBuilderPanel-DRwBJRDQ.js → PromptBuilderPanel-B95ZufnR.js} +1 -1
package/ui/dist/assets/{PromptImportDialog-Dp85kHCq.js → PromptImportDialog-DGOwFQET.js} +2 -2
package/ui/dist/assets/{PromptImportDiscoverySection-BE8Q8MLD.js → PromptImportDiscoverySection-CgvdnR49.js} +1 -1
package/ui/dist/assets/{PromptImportFolderSection-PtH5x0sc.js → PromptImportFolderSection-CfUye9J8.js} +1 -1
package/ui/dist/assets/{PromptLibraryPanel-FnM9tHI9.js → PromptLibraryPanel-B9kndPw1.js} +2 -2
package/ui/dist/assets/SettingsWorkspace-B3tgLrmF.js +1 -0
package/ui/dist/assets/index-BhcvL0g-.js +1 -0
package/ui/dist/assets/index-BtK3YhJc.js +39 -0
package/ui/dist/assets/index-ClOLOjnA.css +1 -0
package/ui/dist/index.html +2 -2
package/ui/dist/assets/AgentWorkspace-C21zqdTZ.js +0 -3
package/ui/dist/assets/SettingsWorkspace-MARPGyBL.js +0 -1
package/ui/dist/assets/index-BAFI6htx.js +0 -42
package/ui/dist/assets/index-BSXxr_Bt.js +0 -1
package/ui/dist/assets/index-DS-ADE7U.css +0 -1

package/README.md CHANGED Viewed

@@ -83,16 +83,6 @@ npm install -g ima2-gen@latest
 Ctrl+C now performs a clean shutdown — closing the database, stopping child processes, and releasing file locks. On older versions (< 1.1.22) or if you see `EBUSY` on Windows, use the install script which handles stale process cleanup automatically.
-## What's New in v1.1.22
-- **Storyboard mode**: composer toggle for maintaining character/scene continuity across sequential frames. Works in both image and video pipelines.
-- **Planner model selection**: choose the Grok planner model (grok-4.3 default) from video settings or via `--planner-model` CLI flag.
-- **Video frame copy**: First/Mid/Last frame extraction buttons on video results for easy keyframe copying.
-- **Multi-character dialogue**: video/image planners now identify characters by visual appearance (clothing + physique + props) instead of names, improving dialogue attribution.
-- **Graceful shutdown**: Ctrl+C now properly closes DB, server sockets, and child processes — fixes Windows EBUSY on npm update.
-- **Cross-platform install scripts**: one-click install for macOS, Windows, and Linux (auto-detects nvm/fnm/brew/winget).
-- **Atomic sidecar writes**: metadata files now use temp+rename to prevent corruption on crash.
 ## What It Does
 - **Classic mode**: generate, edit, reuse the current image, paste references, and continue from history.
@@ -109,11 +99,12 @@ Ctrl+C now performs a clean shutdown — closing the database, stopping child pr
 ## Provider Paths
-Image generation can run through the local Codex/ChatGPT OAuth path, a configured OpenAI API key, or the bundled Grok provider.
+Image generation can run through the local Codex/ChatGPT OAuth path, a configured OpenAI API key, the bundled Grok provider, or the Gemini provider via Antigravity CLI.
 - `provider: "oauth"` uses the local Codex OAuth proxy.
 - `provider: "api"` calls the OpenAI Responses API with the hosted `image_generation` tool.
 - `provider: "grok"` starts bundled `progrok` on `127.0.0.1:18645`, runs mandatory xAI Web Search plus a planner pass (default: `grok-4.3`, configurable in settings or via `--planner-model`), then calls xAI Images API through the local proxy.
+- `provider: "agy"` spawns the Antigravity CLI (`agy -p`) to generate images via Google Gemini's `default_api:generate_image` tool (model: `nano-banana-2`). Output is fixed at 1024×1024 JPEG, max 3 reference images. No web search, quality, or size controls.
 - API-key generation supports classic generate, edit, mask-guided edit, multimode, and node generation.
 - Grok generation supports Classic, Node, and Agent flows. If a Classic reference, Node parent image, or Agent current image is present, ima2 switches the final Grok call to xAI image edit so image-to-image context is preserved.

package/bin/commands/backfillThumbs.js ADDED Viewed

@@ -0,0 +1,18 @@
+import { config } from "../../config.js";
+import { backfillThumbnails } from "../../lib/thumbBackfill.js";
+import { invalidateHistoryIndex } from "../../lib/historyIndex.js";
+export async function backfillThumbs() {
+    const dir = config.storage.generatedDir;
+    console.log(`[thumbs] Scanning ${dir} (recursive) for missing thumbnails...`);
+    let r;
+    try {
+        r = await backfillThumbnails(dir);
+    }
+    catch (e) {
+        console.error("[thumbs] Backfill failed:", e instanceof Error ? e.message : e);
+        return;
+    }
+    if (r.created > 0)
+        invalidateHistoryIndex();
+    console.log(`[thumbs] Done: ${r.created} created, ${r.skipped} skipped (already exist), ${r.failed} failed out of ${r.total} media files.`);
+}

package/bin/commands/edit.js CHANGED Viewed

@@ -7,8 +7,8 @@ import { createCliRequestId, recoverGeneratedOutputs, formatRecoveryHint } from
 import { errInfo } from "../../lib/errInfo.js";
 const VALID_MODES = new Set(["auto", "direct"]);
 const VALID_MODERATION = new Set(["auto", "low"]);
-const VALID_PROVIDERS = new Set(["auto", "oauth", "api", "grok"]);
-const KNOWN_IMAGE_MODELS = new Set(["gpt-5.5", "gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex-spark", "grok-imagine-image", "grok-imagine-image-quality"]);
+const VALID_PROVIDERS = new Set(["auto", "oauth", "api", "grok", "grok-api", "agy", "gemini-api"]);
+const KNOWN_IMAGE_MODELS = new Set(["gpt-5.5", "gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex-spark", "grok-imagine-image", "grok-imagine-image-quality", "nano-banana-2", "nano-banana-pro"]);
 const SPEC = {
     flags: {
         prompt: { short: "p", type: "string" },
@@ -40,8 +40,9 @@ const HELP = `
     -s, --size <WxH>
     -o, --out <file>
         --json
-        --model <gpt-5.5|gpt-5.4|gpt-5.4-mini|grok-imagine-image|grok-imagine-image-quality>
-        --provider <auto|oauth|api|grok>  Provider (oauth = GPT OAuth; grok = xAI Grok)
+        --model <gpt-5.5|gpt-5.4|gpt-5.4-mini|grok-imagine-image|grok-imagine-image-quality|nano-banana-2|nano-banana-pro>
+        --provider <auto|oauth|api|grok|grok-api|agy|gemini-api>
+                                      Provider (oauth = GPT OAuth; grok = xAI Grok; agy/gemini-api = Gemini)
         --mode <auto|direct>       Prompt handling mode. Default: auto
         --moderation <auto|low>    Default: low
         --session <id>             Apply session style sheet if enabled
@@ -64,10 +65,10 @@ export default async function editCmd(argv) {
     if (!VALID_MODERATION.has(String(args.moderation)))
         die(2, "--moderation must be one of: auto, low");
     if (args.provider && !VALID_PROVIDERS.has(String(args.provider))) {
-        die(2, "--provider must be one of: auto, oauth, api, grok");
+        die(2, "--provider must be one of: auto, oauth, api, grok, grok-api, agy, gemini-api");
     }
     if (args.model && !KNOWN_IMAGE_MODELS.has(String(args.model))) {
-        die(2, "--model must be one of: gpt-5.5, gpt-5.4, gpt-5.4-mini, gpt-5.3-codex-spark, grok-imagine-image, grok-imagine-image-quality");
+        die(2, "--model must be one of: gpt-5.5, gpt-5.4, gpt-5.4-mini, gpt-5.3-codex-spark, grok-imagine-image, grok-imagine-image-quality, nano-banana-2, nano-banana-pro");
     }
     const VALID_REASONING = new Set(["none", "low", "medium", "high", "xhigh"]);
     if (args["reasoning-effort"] && !VALID_REASONING.has(String(args["reasoning-effort"]))) {

package/bin/commands/gen.js CHANGED Viewed

@@ -7,8 +7,8 @@ import { createCliRequestId, recoverGeneratedOutputs, formatRecoveryHint } from
 import { errInfo } from "../../lib/errInfo.js";
 const VALID_MODES = new Set(["auto", "direct"]);
 const VALID_MODERATION = new Set(["auto", "low"]);
-const VALID_PROVIDERS = new Set(["auto", "oauth", "api", "grok"]);
-const KNOWN_IMAGE_MODELS = new Set(["gpt-5.5", "gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex-spark", "grok-imagine-image", "grok-imagine-image-quality"]);
+const VALID_PROVIDERS = new Set(["auto", "oauth", "api", "grok", "grok-api", "agy", "gemini-api"]);
+const KNOWN_IMAGE_MODELS = new Set(["gpt-5.5", "gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex-spark", "grok-imagine-image", "grok-imagine-image-quality", "nano-banana-2", "nano-banana-pro"]);
 const SPEC = {
     flags: {
         quality: { short: "q", type: "string", default: "low" },
@@ -51,8 +51,9 @@ const HELP = `
         --stdin                              Read prompt from stdin
         --timeout <sec>                     Default: 180
         --server <url>                      Override server URL
-        --model <gpt-5.5|gpt-5.4|gpt-5.4-mini|grok-imagine-image|grok-imagine-image-quality>
-        --provider <auto|oauth|api|grok>    Provider (oauth = GPT OAuth; grok = xAI Grok)
+        --model <gpt-5.5|gpt-5.4|gpt-5.4-mini|grok-imagine-image|grok-imagine-image-quality|nano-banana-2|nano-banana-pro>
+        --provider <auto|oauth|api|grok|grok-api|agy|gemini-api>
+                                            Provider (oauth = GPT OAuth; grok = xAI Grok; agy/gemini-api = Gemini)
         --mode <auto|direct>                Prompt handling mode. Default: auto
         --moderation <auto|low>             Default: low
         --session <id>                      Apply session style sheet if enabled
@@ -88,10 +89,10 @@ export default async function genCmd(argv) {
     if (!VALID_MODERATION.has(String(args.moderation)))
         die(2, "--moderation must be one of: auto, low");
     if (args.provider && !VALID_PROVIDERS.has(String(args.provider))) {
-        die(2, "--provider must be one of: auto, oauth, api, grok");
+        die(2, "--provider must be one of: auto, oauth, api, grok, grok-api, agy, gemini-api");
     }
     if (args.model && !KNOWN_IMAGE_MODELS.has(String(args.model))) {
-        die(2, "--model must be one of: gpt-5.5, gpt-5.4, gpt-5.4-mini, gpt-5.3-codex-spark, grok-imagine-image, grok-imagine-image-quality");
+        die(2, "--model must be one of: gpt-5.5, gpt-5.4, gpt-5.4-mini, gpt-5.3-codex-spark, grok-imagine-image, grok-imagine-image-quality, nano-banana-2, nano-banana-pro");
     }
     const VALID_REASONING = new Set(["none", "low", "medium", "high", "xhigh"]);
     if (args["reasoning-effort"] && !VALID_REASONING.has(String(args["reasoning-effort"]))) {

package/bin/commands/multimode.js CHANGED Viewed

@@ -40,8 +40,9 @@ const HELP = `
     -o, --out <file>                    First image (implies --max-images 1)
     -d, --out-dir <dir>                 Output dir for multiple images
         --json
-        --model <gpt-5.5|gpt-5.4|gpt-5.4-mini>
-        --provider <auto|oauth|api|grok>  Provider (oauth = GPT OAuth; grok = xAI Grok)
+        --model <gpt-5.5|gpt-5.4|gpt-5.4-mini|grok-imagine-image|grok-imagine-image-quality|nano-banana-2|nano-banana-pro>
+        --provider <auto|oauth|api|grok|grok-api|agy|gemini-api>
+                                      Provider (oauth = GPT OAuth; grok = xAI Grok; agy/gemini-api = Gemini)
         --mode <auto|direct>            Prompt handling mode. Default: auto
         --ref <file>                    Attach reference image (repeatable, max 5)
         --reasoning-effort <none|low|medium|high|xhigh>
@@ -60,11 +61,11 @@ export default async function multimodeCmd(argv) {
     const prompt = args.positional.join(" ");
     if (!prompt)
         die(2, "prompt required");
-    const VALID_PROVIDERS = new Set(["auto", "oauth", "api", "grok"]);
+    const VALID_PROVIDERS = new Set(["auto", "oauth", "api", "grok", "grok-api", "agy", "gemini-api"]);
     const VALID_MODES = new Set(["auto", "direct"]);
     const VALID_REASONING = new Set(["none", "low", "medium", "high", "xhigh"]);
     if (args.provider && !VALID_PROVIDERS.has(String(args.provider))) {
-        die(2, "--provider must be one of: auto, oauth, api, grok");
+        die(2, "--provider must be one of: auto, oauth, api, grok, grok-api, agy, gemini-api");
     }
     if (!VALID_MODES.has(String(args.mode)))
         die(2, "--mode must be one of: auto, direct");

package/bin/commands/node.js CHANGED Viewed

@@ -8,11 +8,11 @@ const HELP = `
   ima2 node <subcommand> [options]
   Subcommands:
-    generate <prompt...> [--parent <nodeId>] [--ref <file>...] [--provider <auto|oauth|api|grok>] [--no-stream] [...gen-style flags]
+    generate <prompt...> [--parent <nodeId>] [--ref <file>...] [--provider <auto|oauth|api|grok|grok-api|agy|gemini-api>] [--no-stream] [...gen-style flags]
     show <nodeId> [--json]
   Generate options:
-        --provider <auto|oauth|api|grok>  Provider for this request; grok uses progrok proxy
+        --provider <auto|oauth|api|grok|grok-api|agy|gemini-api>  Provider for this request
 `;
 const GEN_FLAGS = {
     quality: { short: "q", type: "string", default: "low" },
@@ -58,10 +58,10 @@ async function generateSub(argv) {
     if (!prompt)
         die(2, "prompt required");
     const refs = (Array.isArray(args.ref) ? args.ref : []);
-    const VALID_PROVIDERS = new Set(["auto", "oauth", "api", "grok"]);
+    const VALID_PROVIDERS = new Set(["auto", "oauth", "api", "grok", "grok-api", "agy", "gemini-api"]);
     const VALID_REASONING = new Set(["none", "low", "medium", "high", "xhigh"]);
     if (args.provider && !VALID_PROVIDERS.has(String(args.provider))) {
-        die(2, "--provider must be one of: auto, oauth, api, grok");
+        die(2, "--provider must be one of: auto, oauth, api, grok, grok-api, agy, gemini-api");
     }
     if (args["reasoning-effort"] && !VALID_REASONING.has(String(args["reasoning-effort"]))) {
         die(2, "--reasoning-effort must be one of: none, low, medium, high, xhigh");

package/bin/ima2.js CHANGED Viewed

@@ -288,6 +288,7 @@ function showHelp() {
     cancel <id>    Mark an in-flight job canceled (ima2 cancel --help)
     inflight <sub> Inflight jobs (ls / rm)         (ima2 inflight --help)
     storage <sub>  Storage status / open-dir       (ima2 storage --help)
+    backfill-thumbs  Generate missing thumbnails for gallery performance
     billing        API usage / quota
     providers      Configured providers
     oauth <sub>    GPT OAuth proxy status              (ima2 oauth --help)
@@ -332,7 +333,7 @@ if (args.includes("-v") || args.includes("--version")) {
     process.exit(0);
 }
 if ((!command || args.includes("-h") || args.includes("--help"))
-    && !["doctor", "gen", "video", "edit", "ls", "show", "ps", "cancel", "session", "history", "prompt", "multimode", "node", "annotate", "canvas-versions", "metadata", "comfy", "cardnews", "inflight", "storage", "billing", "providers", "oauth", "grok", "config", "defaults", "capabilities", "skill", "ping"].includes(command)) {
+    && !["doctor", "gen", "video", "edit", "ls", "show", "ps", "cancel", "session", "history", "prompt", "multimode", "node", "annotate", "canvas-versions", "metadata", "comfy", "cardnews", "inflight", "storage", "billing", "providers", "oauth", "grok", "config", "defaults", "capabilities", "skill", "ping", "backfill-thumbs"].includes(command)) {
     showHelp();
     process.exit(command ? 0 : 1);
 }
@@ -406,6 +407,11 @@ switch (command) {
         await mod.default(args.slice(1));
         break;
     }
+    case "backfill-thumbs": {
+        const { backfillThumbs } = await import("./commands/backfillThumbs.js");
+        await backfillThumbs();
+        break;
+    }
     case "storage":
     case "billing":
     case "providers":

package/bin/lib/config-store.js CHANGED Viewed

@@ -102,7 +102,7 @@ export function envOverrideForKey(key) {
     return { envVar, value: String(process.env[envVar]) };
 }
 export function displayPath(p) {
-    const home = process.env.HOME || "";
+    const home = process.env.HOME || process.env.USERPROFILE || "";
     return home && p.startsWith(home) ? p.replace(home, "~") : p;
 }
 export function restartNotice() {

package/docs/API.md CHANGED Viewed

@@ -10,11 +10,14 @@ http://localhost:3333
 ## Provider Policy
-Image generation supports OAuth, API-key, and Grok providers.
+Image generation supports OAuth, API-key, Grok, and Gemini (agy) providers.
 - `provider: "oauth"` uses the local Codex OAuth proxy.
 - `provider: "api"` uses the OpenAI Responses API with the hosted `image_generation` tool.
 - `provider: "grok"` uses the bundled progrok xAI proxy. Classic, Node, and Agent generation run mandatory xAI Web Search through `/v1/responses`, then run a `grok-4.3` planner call with a forced local `generate_image` function, then ima2 executes xAI `/v1/images/generations`. If reference images, a Node parent image, or an Agent current image are attached, the final step switches to xAI `/v1/images/edits` so image-to-image context is preserved.
+- `provider: "agy"` spawns the Antigravity CLI (`agy -p`) to generate images via Google Gemini's `default_api:generate_image` tool. Model is `nano-banana-2`. Output is fixed at 1024×1024 JPEG. Max 3 reference images (i2i). No web search, quality, size, or mask controls. Multimode returns a single image. Video is unsupported (`AGY_VIDEO_UNSUPPORTED`).
+- `provider: "grok-api"` uses a direct xAI API key instead of the bundled progrok OAuth proxy. Same pipeline as `grok` (Web Search → planner → `/v1/images/generations`), same aspect ratio and resolution options. Requires an xAI API key configured via the web UI key management or `XAI_API_KEY` env var. Also supports video generation.
+- `provider: "gemini-api"` calls the Google Generative Language API directly (or Vertex AI with a service account JSON). Supports models `nano-banana-2` (Gemini 3.1 Flash Image) and `nano-banana-pro` (Gemini 3 Pro Image). Supports variable aspect ratios and resolutions (512px–4K). Requires a `GEMINI_API_KEY` env var, web UI key management, or a Vertex AI service account JSON. No web search or mask controls.
 - API-key generation covers classic generate, edit, mask-guided edit, multimode, and node generation.
 - If `provider: "api"` is requested without an API key, routes fail before upstream with `401` and `API_KEY_REQUIRED`.
 - Grok generation maps `size` to xAI `aspect_ratio` and `resolution`; it does not send an OpenAI-style `size` field upstream. Grok edit uses xAI `/v1/images/edits`; Grok mask edit remains unsupported and returns `GROK_MASK_UNSUPPORTED`.
@@ -100,7 +103,8 @@ Text-to-image and reference-guided root generation.
   "provider": "oauth",
   "model": "gpt-5.4",
   "references": [],
-  "requestId": "optional-client-id"
+  "requestId": "optional-client-id",
+  "storyboard": false
 }
 ```
@@ -108,6 +112,9 @@ Supported quality values: `low`, `medium`, `high`.
 Supported moderation values: `auto`, `low`.
+When `storyboard` is `true`, the server prepends storyboard keyframe instructions so image
+generations maintain character and scene continuity for multi-shot video production.
 Recommended model: `gpt-5.4`. Current app default: `gpt-5.4-mini`. `gpt-5.5` is the strongest quality option when supported, but callers should expect higher quota pressure and possible Codex CLI/backend capability requirements.
 When `provider` is `"grok"`, supported models are `grok-imagine-image` and
@@ -267,13 +274,17 @@ Generate a video via the Grok video provider. Returns Server-Sent Events.
 | `referenceFilenames` | string[] | — | Existing generated files for reference-to-video |
 | `continueFromVideo` | string | — | Generated `.mp4` parent; server extracts its last frame and rebuilds lineage from sidecar |
 | `continuityLineage` | object | — | Optional client hint; used only when `continueFromVideo` is absent |
+| `plannerModel` | string | `grok-4.3` | Grok video planner model override (also via settings UI or `IMA2_GROK_PLANNER_MODEL`) |
+| `storyboard` | boolean | `false` | Enable storyboard mode — maintains character/scene continuity across sequential clips |
 Blank prompts return `PROMPT_REQUIRED` with a `guidance` string. The active
 prompt should describe visual flow, motion flow, sound/music/no-music,
 dialogue/no-dialogue, ending frame, and duration pacing. The video planner uses
 the selected duration as the full clip runtime and expands short requests into a
 production-level sequence with opening composition, connected motion/emotion
-change, and a stable ending frame suitable for continuation.
+change, and a stable ending frame suitable for continuation. For multi-character
+scenes, the planner identifies speakers by visual appearance (clothing, physique,
+position, props) rather than names, and attributes each dialogue line accordingly.
 When `continueFromVideo` is present, the server treats the generated `.mp4`
 sidecar as authoritative. Client `continuityLineage` cannot override it. The
@@ -313,7 +324,7 @@ Grok prompt surfaces used by video APIs:
 | Surface | Model | Responsibility |
 |---|---|---|
-| Video planner | `grok-4.3` | Converts user prompt, search context, refs, and optional continuity lineage into the final English video prompt. It must structure core subject, action/motion, camera/composition, environment/style, dialogue/audio, ending-frame handoff, and constraints. |
+| Video planner | `grok-4.3` (override via `plannerModel`) | Converts user prompt, search context, refs, and optional continuity lineage into the final English video prompt. It must structure core subject, action/motion, camera/composition, environment/style, dialogue/audio, ending-frame handoff, and constraints. Multi-character dialogue uses appearance-based speaker identification. |
 | Video generation | xAI video model | Receives the planner prompt plus `sourceImage` or `referenceImages` when present. |
 | Video analysis | `grok-4.3` | Reads first/last frame images from `/api/video/analyze` and returns recreation/continuation guidance. |
@@ -461,6 +472,44 @@ Style-sheet extraction can require an API key/openai client. Image generation al
 | `GROK_RATE_LIMITED` | xAI returned a rate-limit response through progrok |
 | `GROK_AUTH_FAILED` | progrok could not authenticate the xAI request |
 | `GROK_SEARCH_TIMEOUT` / `GROK_PLANNER_TIMEOUT` / `GROK_IMAGE_TIMEOUT` | The Grok search, planner, or image API step exceeded its timeout budget |
+| `AGY_GENERATION_FAILED` | Gemini (agy) image generation failed |
+| `AGY_TIMEOUT` | Agy CLI process exceeded its 360-second timeout |
+| `AGY_PROCESS_ERROR` | Agy CLI binary failed to start or crashed |
+| `AGY_QUOTA_EXHAUSTED` | Gemini API quota exhausted (rate limit) |
+| `AGY_PARSE_FAILED` | Could not parse artifact path from agy output |
+| `AGY_ARTIFACT_NOT_FOUND` | Agy reported an artifact path that does not exist |
+| `AGY_PATH_REJECTED` | Agy artifact path was outside allowed directories |
+| `AGY_VIDEO_UNSUPPORTED` | Video generation is not supported by the Gemini (agy) provider |
+| `AGY_MASK_UNSUPPORTED` | Mask-based editing is not supported by the Gemini (agy) provider |
+| `AGY_REF_TOO_MANY` | Too many reference images for agy (max 3) |
+| `GEMINI_API_KEY_MISSING` | Gemini API key or Vertex AI credentials not configured |
+| `GEMINI_API_RATE_LIMITED` | Gemini API rate limited (429) |
+| `GEMINI_API_BAD_REQUEST` | Gemini API bad request (400/403) |
+| `GEMINI_API_SAFETY_BLOCKED` | Gemini API generation blocked by safety filter |
+| `GEMINI_API_NO_IMAGE` | Gemini API returned no image in response |
+| `VIDEO_PROVIDER_UNSUPPORTED` | Video generation requires provider `"grok"` or `"grok-api"` |
+## Key Management
+API key management endpoints for configuring provider credentials at runtime through the web UI or HTTP API.
+| Endpoint | Method | Description |
+|---|---|---|
+| `/api/keys/status` | GET | Returns configured/valid/maskedKey status for all providers (openai, xai, gemini, vertex) |
+| `/api/keys/:provider` | PUT | Save an API key. Body: `{ "apiKey": "..." }`. Validates key format and upstream before saving to config.json. Provider: `openai`, `xai`, or `gemini`. |
+| `/api/keys/:provider` | DELETE | Remove a config-sourced API key. Env-sourced keys cannot be removed (`ENV_KEY_IMMUTABLE`). |
+| `/api/keys/vertex` | PUT | Save a Vertex AI service account JSON. Body: `{ "serviceAccountJson": "..." }`. Validates JSON structure (`type: "service_account"`, `project_id` required). |
+| `/api/keys/vertex` | DELETE | Remove a config-sourced Vertex AI service account. |
+Keys saved via PUT are stored in `config.json` and hot-updated in the runtime context (no server restart required). Keys loaded from environment variables (`OPENAI_API_KEY`, `XAI_API_KEY`, `GEMINI_API_KEY`, `VERTEX_SERVICE_ACCOUNT_JSON`) take precedence and are immutable through the API.
+## Thumbnail Backfill
+| Endpoint | Method | Description |
+|---|---|---|
+| `/api/history/backfill-thumbnails` | POST | Generate missing `.thumb.jpg` thumbnails for all images and videos in the generated directory. Returns `{ ok, total, created, skipped, failed }`. Also available offline via `ima2 backfill-thumbs`. |
+Thumbnails are also generated automatically on server startup for any media files that lack them.
 ## Endpoint → CLI Mapping
@@ -499,6 +548,8 @@ Most server routes under `/api/*` have a CLI wrapper. The exception is **Agent M
 | `GET /api/billing` / `GET /api/providers` / `GET /api/oauth/status` / `GET /api/grok/status` | `ima2 billing` / `ima2 providers` / `ima2 oauth status` / `ima2 grok status` |
 | `GET /api/health` | `ima2 ping` |
 | `GET /api/capabilities` | `ima2 capabilities` |
+| `POST /api/history/backfill-thumbnails` | `ima2 backfill-thumbs` |
+| `GET /api/keys/status`, `PUT/DELETE /api/keys/:provider`, `PUT/DELETE /api/keys/vertex` | Web UI only (Settings > API Keys) |
 | `GET/POST/PATCH/DELETE /api/agent/*` (sessions, turns, queue) | — (Agent Mode; web UI only, no CLI) |
 | `POST /api/prompt-builder/chat` | `ima2 prompt build` |

package/docs/CLI.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # CLI Reference
-Most server routes under `/api/*` have a CLI wrapper; Agent Mode (`/api/agent/*`) is web-UI-only and has no `ima2` subcommand. The prompt builder HTTP route (`POST /api/prompt-builder/chat`) is available through `ima2 prompt build`. The CLI is a thin shell over the local server, so most commands require a running `ima2 serve` (the few exceptions — `serve`, `setup`, `doctor`, `status`, `open`, `reset`, `config`, `grok`, `skill`, `capabilities`, and local `defaults` inspection — work without a live server).
+Most server routes under `/api/*` have a CLI wrapper; Agent Mode (`/api/agent/*`) is web-UI-only and has no `ima2` subcommand. The prompt builder HTTP route (`POST /api/prompt-builder/chat`) is available through `ima2 prompt build`. The CLI is a thin shell over the local server, so most commands require a running `ima2 serve` (the few exceptions — `serve`, `setup`, `doctor`, `status`, `open`, `reset`, `config`, `grok`, `skill`, `capabilities`, `backfill-thumbs`, and local `defaults` inspection — work without a live server).
 For a quick start, see the [main README](../README.md). For endpoint mapping, see [API.md](API.md).
@@ -16,6 +16,7 @@ For a quick start, see the [main README](../README.md). For endpoint mapping, se
 | `ima2 open` | Open the web UI in a browser |
 | `ima2 grok login/status/models/proxy` | Manage the bundled progrok runtime used by the Grok provider |
 | `ima2 reset` | Remove saved config |
+| `ima2 backfill-thumbs` | Generate missing gallery thumbnails for images and videos (offline, no running server needed) |
 ## Common flags
@@ -53,13 +54,14 @@ Agents should start from the packaged skill and capability commands instead of g
 | `ima2 node generate` | Node-mode generate (SSE; supports `--no-stream`) |
 | `ima2 node show <nodeId>` | Read node metadata |
-Generation flags include `--provider <auto|oauth|api|grok>`, `--reasoning-effort {none\|low\|medium\|high\|xhigh}`, `--web-search` / `--no-web-search`, `--model`, `--mode`, `--moderation`, `--ref <file>` (repeatable, up to 5 where supported), `-q low|medium|high`, `-n <count>`, `-o <file>`.
+Generation flags include `--provider <auto|oauth|api|grok|grok-api|agy|gemini-api>`, `--reasoning-effort {none\|low\|medium\|high\|xhigh}`, `--web-search` / `--no-web-search`, `--model`, `--mode`, `--moderation`, `--ref <file>` (repeatable, up to 5 where supported), `-q low|medium|high`, `-n <count>`, `-o <file>`.
 Provider override semantics:
 - `api` forces the API-key Responses path and requires a configured API key.
 - `oauth` forces the local OAuth proxy path.
 - `grok` uses the bundled progrok xAI proxy (`127.0.0.1:18645`). Classic generation first runs mandatory xAI Web Search through Responses API, then asks `grok-4.3` to call ima2's local `generate_image` tool, then ima2 executes xAI `/v1/images/generations`. If `--ref` images are attached, the final step uses xAI `/v1/images/edits` instead so image-to-image/reference context is preserved. Models: `grok-imagine-image`, `grok-imagine-image-quality`. Size is mapped to xAI `aspect_ratio` and `resolution`; the UI web-search toggle is OpenAI-provider-only because Grok search is always on in this path.
+- `agy` spawns the Antigravity CLI to generate via Google Gemini (`nano-banana-2`). Fixed 1024×1024 JPEG output, max 3 refs. No web search, quality, size, or mask controls.
 - `auto` preserves route default behavior and currently resolves to GPT OAuth unless server routing changes.
 `ima2 serve` starts the bundled Grok proxy automatically. No separate `progrok`
@@ -105,7 +107,7 @@ mockup`.
 For dense or critical text, keep the text large and explicit. Exact placement,
 small text, and pixel-perfect typography can still need iteration or post-editing.
-Multimode-specific flags include `--max-images <1..8>`, `--ref <file>` (repeatable, max 5), `--mode <auto|direct>`, `--provider <auto|oauth|api|grok>`, and `--show-partial`. `ima2 edit --mask` remains intentionally deferred to #31 because current mask plumbing is guided edit rather than guaranteed true masked/inpaint semantics.
+Multimode-specific flags include `--max-images <1..8>`, `--ref <file>` (repeatable, max 5), `--mode <auto|direct>`, `--provider <auto|oauth|api|grok|grok-api|agy|gemini-api>`, and `--show-partial`. `ima2 edit --mask` remains intentionally deferred to #31 because current mask plumbing is guided edit rather than guaranteed true masked/inpaint semantics.
 ## Video
@@ -126,6 +128,8 @@ Video generate flags:
 | `--resolution <480p\|720p>` | Video resolution (default: 480p) |
 | `--aspect-ratio <ratio\|auto>` | 1:1, 16:9, 9:16, 4:3, 3:4, 3:2, 2:3, auto (default: auto) |
 | `--model <name>` | `grok-imagine-video` or `grok-imagine-video-1.5-preview` |
+| `--planner-model <name>` | Grok planner override (default: `grok-4.3`; also in settings UI and `IMA2_GROK_PLANNER_MODEL`) |
+| `--storyboard` | Enable storyboard mode — maintains character/scene continuity across sequential clips |
 | `--ref <file>` | Attach source/reference image (repeatable, max 7) |
 | `-o, --out <file>` | Output file path |
 | `-d, --out-dir <dir>` | Output directory |
@@ -160,6 +164,8 @@ Video continue flags:
 | `--aspect-ratio <ratio\|auto>` | New clip aspect ratio |
 | `--model <name>` | Optional video generation model |
+Video continue also accepts `--planner-model` and `--storyboard`.
 Video mode is auto-detected from `--ref` count:
 | Refs | Mode |

package/docs/PROMPT_STUDIO.md CHANGED Viewed

@@ -12,10 +12,12 @@ you want a reproducible way to report a workspace issue.
 | Area | What it does | Notes |
 |---|---|---|
 | Composer | Holds the prompt for the next request. | Selecting an existing image is view-only. It should not overwrite the composer. |
+| Storyboard | Maintains character and scene continuity across sequential frames. | Toggle in the composer. Works for image and video generation; image keyframes are composed for video production. |
 | Multimode | Starts several separate image requests from the current prompt. | Each slot is a candidate output, not a collage panel or a guaranteed scene sequence. |
 | 1:1 Direct | Sends the prompt through with less rewriting by the app. | Use it for exact wording, strict prompt experiments, or provider-side prompt syntax. |
 | Model quick menu | Changes the image model and reasoning effort from the sidebar header. | The full Settings workspace remains the detailed configuration page. |
-| Recent generations | Shows the visible Prompt Studio history domain. | Arrow keys move inside the same visible recent domain instead of hidden older rows. Video items render as video thumbnails. Drag any thumbnail to the composer to add it as a reference image. |
+| Recent generations | Shows the visible Prompt Studio history domain. | Arrow keys move inside the same visible recent domain instead of hidden older rows. Video items render as video thumbnails. Drag any thumbnail to the composer to add it as a reference image. Video results expose First, Mid, and Last frame buttons to copy keyframes. |
+| Video settings | Controls Grok video duration, resolution, aspect ratio, and planner model. | Default planner model is `grok-4.3`; override per request when needed. |
 | Gallery | Browses saved local images, All/Favorites tabs, and folders. | Favorite toggles should preserve the gallery viewport you were browsing. |
 | Prompt library | Imports saved prompt text into the composer intentionally. | Library insert/continue actions are explicit prompt imports; passive image selection is not. |

package/docs/migration/runtime-test-inventory.md CHANGED Viewed

@@ -4,7 +4,7 @@ Generated by `npm run test:inventory` (script: `scripts/classify-tests.mjs`).
 _Tests considered "runtime-importing" if they import from `../lib/`, `../routes/`, `../bin/`, `../server`, or `../config`._
-Total: 175 (runtime: 60, contract: 115)
+Total: 177 (runtime: 61, contract: 116)
 ## Runtime-importing tests
 - `tests/agent-mode-auto-planner-contract.test.ts`
@@ -64,6 +64,7 @@ Total: 175 (runtime: 60, contract: 115)
 - `tests/star-prompt.test.ts`
 - `tests/storage-migration.test.ts`
 - `tests/style-sheet.test.ts`
+- `tests/thumb-backfill.test.ts`
 - `tests/videoContinuity.test.ts`
 - `tests/videoExtendedRoute.test.ts`
 - `tests/videoRoute.test.ts`
@@ -155,6 +156,7 @@ Total: 175 (runtime: 60, contract: 115)
 - `tests/node-layout-contract.test.js`
 - `tests/node-pending-recovery-contract.test.js`
 - `tests/node-regen-actions-contract.test.js`
+- `tests/node-session-evaporation-contract.test.js`
 - `tests/node-ui-contract.test.js`
 - `tests/oauth-masked-edit-contract.test.js`
 - `tests/oauth-proxy-edit-mask-contract.test.js`

package/lib/agentRuntime.js CHANGED Viewed

@@ -10,6 +10,7 @@ import { detectImageMimeFromB64 } from "./refs.js";
 import { resolveProviderOptions } from "./providerOptions.js";
 import { generateViaResponses } from "./responsesImageAdapter.js";
 import { generateViaGrok } from "./grokImageAdapter.js";
+import { generateViaAgy } from "./agyImageAdapter.js";
 import { generateVideoViaGrok } from "./grokVideoAdapter.js";
 import { parseVideoParams } from "./agentGenerationPlanner.js";
 import { appendAgentTurn, buildImageContextManifest, getAgentImages, getAgentSession, importAgentImage, recordAgentWebFinding, restartAgentRuntimeSession, } from "./agentStore.js";
@@ -46,7 +47,7 @@ export async function runAgentGenerationPlan(ctx, sessionId, prompt, plan, optio
     const session = getAgentSession(sessionId);
     if (!session)
         throw notFound(sessionId);
-    const webSearchEnabled = options.provider === "grok" ? true : options.webSearchEnabled ?? session.webSearchEnabled;
+    const webSearchEnabled = options.provider === "agy" ? false : options.provider === "grok" ? true : options.webSearchEnabled ?? session.webSearchEnabled;
     const enabledTools = webSearchEnabled
         ? [...AGENT_ALLOWED_TOOLS]
         : ["ima2.get_image_context", "ima2.generate_image", "ima2.generate_video"];
@@ -234,21 +235,26 @@ async function generateAgentImage(ctx, sessionId, prompt, manifest, webSearchEna
     const effectiveModel = activeProvider === "grok" && options.quality === "high"
         ? "grok-imagine-image-quality"
         : providerOptions.model;
-    const response = activeProvider === "grok"
-        ? await generateViaGrok(`${manifest}\n\nUser request:\n${prompt}`, ctx, {
-            model: effectiveModel,
-            size: providerOptions.size,
+    const response = activeProvider === "agy"
+        ? await generateViaAgy(`${manifest}\n\nUser request:\n${prompt}`, {
             requestId,
             signal: options.signal ?? undefined,
-            references: await loadAgentCurrentImageReferences(ctx, sessionId),
         })
-        : await generateViaResponses(activeProvider, `${manifest}\n\nUser request:\n${prompt}`, options.quality ?? "medium", providerOptions.size, options.moderation ?? "low", [], requestId, "auto", ctx, {
-            model: providerOptions.model,
-            reasoningEffort: providerOptions.reasoningEffort,
-            webSearchEnabled,
-            signal: options.signal,
-        });
-    const format = activeProvider === "grok"
+        : activeProvider === "grok"
+            ? await generateViaGrok(`${manifest}\n\nUser request:\n${prompt}`, ctx, {
+                model: effectiveModel,
+                size: providerOptions.size,
+                requestId,
+                signal: options.signal ?? undefined,
+                references: await loadAgentCurrentImageReferences(ctx, sessionId),
+            })
+            : await generateViaResponses(activeProvider, `${manifest}\n\nUser request:\n${prompt}`, options.quality ?? "medium", providerOptions.size, options.moderation ?? "low", [], requestId, "auto", ctx, {
+                model: providerOptions.model,
+                reasoningEffort: providerOptions.reasoningEffort,
+                webSearchEnabled,
+                signal: options.signal,
+            });
+    const format = activeProvider === "grok" || activeProvider === "agy"
         ? imageFormatFromMime(("mime" in response ? response.mime : undefined) || detectImageMimeFromB64(response.b64) || "image/jpeg")
         : options.format ?? "png";
     const image = await persistAgentImage(ctx, sessionId, prompt, format, requestId, response, {
@@ -430,13 +436,13 @@ async function persistAgentVideo(ctx, sessionId, prompt, requestId, result) {
 function recordSearchFindings(sessionId, prompt, count, provider) {
     if (!count)
         return [];
-    const isGrok = provider === "grok";
+    const providerLabel = provider === "grok" ? "Grok" : provider === "agy" ? "Gemini" : "Responses";
     return [
         recordAgentWebFinding({
             sessionId,
             query: prompt,
-            title: isGrok ? "Grok visual research" : "Responses web_search",
-            snippet: `${isGrok ? "Grok" : "Responses"} reported ${count} web search call${count === 1 ? "" : "s"}.`,
+            title: `${providerLabel} visual research`,
+            snippet: `${providerLabel} reported ${count} web search call${count === 1 ? "" : "s"}.`,
         }),
     ];
 }

package/lib/agentSettings.js CHANGED Viewed

@@ -1,4 +1,4 @@
-const PROVIDERS = new Set(["oauth", "api", "grok"]);
+const PROVIDERS = new Set(["oauth", "api", "grok", "grok-api", "agy", "gemini-api"]);
 const QUALITIES = new Set(["low", "medium", "high"]);
 const FORMATS = new Set(["png", "jpeg", "webp"]);
 const MODERATIONS = new Set(["auto", "low"]);