ima2-gen 2.0.0 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +150 -0
- package/README.md +12 -12
- package/bin/commands/backfillThumbs.js +24 -0
- package/bin/commands/edit.js +7 -6
- package/bin/commands/gen.js +13 -6
- package/bin/commands/multimode.js +5 -4
- package/bin/commands/node.js +4 -4
- package/bin/ima2.js +21 -11
- package/bin/lib/config-store.js +1 -1
- package/docs/API.md +184 -10
- package/docs/CLI.md +11 -4
- package/docs/FAQ.ko.md +16 -0
- package/docs/FAQ.md +30 -0
- package/docs/PROMPT_STUDIO.md +3 -1
- package/docs/README.ko.md +7 -3
- package/docs/migration/runtime-test-inventory.md +17 -1
- package/lib/agentImageVideoGen.js +261 -0
- package/lib/agentRuntime.js +11 -260
- package/lib/agentSettings.js +1 -1
- package/lib/agyImageAdapter.js +259 -0
- package/lib/capabilities.js +2 -1
- package/lib/configKeys.js +1 -1
- package/lib/errorClassify.js +8 -7
- package/lib/eventBus.js +71 -0
- package/lib/geminiApiImageAdapter.js +179 -0
- package/lib/generationErrors.js +3 -1
- package/lib/grokImageAdapter.js +74 -128
- package/lib/grokImageCore.js +153 -0
- package/lib/grokMultimodeAdapter.js +7 -4
- package/lib/grokRuntime.js +3 -0
- package/lib/grokSizeMapper.js +13 -1
- package/lib/grokVideoAdapter.js +14 -7
- package/lib/grokVideoCanvas.js +13 -0
- package/lib/grokVideoPlannerPrompt.js +53 -6
- package/lib/historyList.js +19 -2
- package/lib/imageModels.js +15 -0
- package/lib/imageThumb.js +38 -0
- package/lib/inflight.js +54 -17
- package/lib/multimodeHelpers.js +10 -0
- package/lib/nodeHelpers.js +59 -0
- package/lib/oauthProxy/prompts.js +30 -36
- package/lib/promptBuilder/systemPrompt.js +2 -5
- package/lib/promptSafetyPolicy.js +1 -5
- package/lib/providerOptions.js +36 -1
- package/lib/responsesFallback.js +53 -44
- package/lib/routeHelpers.js +44 -0
- package/lib/runtimeContext.js +27 -0
- package/lib/ssePublish.js +12 -0
- package/lib/storageMigration.js +1 -1
- package/lib/storyboardPrefix.js +28 -0
- package/lib/thumbBackfill.js +70 -0
- package/lib/vertexAuth.js +44 -0
- package/lib/videoThumb.js +60 -0
- package/package.json +7 -2
- package/routes/agy.js +44 -0
- package/routes/auth.js +242 -0
- package/routes/edit.js +48 -8
- package/routes/events.js +78 -0
- package/routes/generate.js +135 -135
- package/routes/history.js +13 -0
- package/routes/index.js +8 -0
- package/routes/keys.js +254 -0
- package/routes/multimode.js +138 -62
- package/routes/nodes.js +107 -129
- package/routes/quota.js +58 -7
- package/routes/video.js +107 -20
- package/server.js +123 -0
- package/skills/ima2/SKILL.md +98 -21
- package/ui/dist/.vite/manifest.json +12 -12
- package/ui/dist/assets/AgentWorkspace-Dth6YijN.js +3 -0
- package/ui/dist/assets/{CardNewsWorkspace-BN-ga1lG.js → CardNewsWorkspace-Dav3K5CT.js} +2 -2
- package/ui/dist/assets/{NodeCanvas-BbMa4IhI.js → NodeCanvas-C4ifFzB1.js} +2 -2
- package/ui/dist/assets/{PromptBuilderPanel-DRwBJRDQ.js → PromptBuilderPanel-CEcyU9PL.js} +1 -1
- package/ui/dist/assets/{PromptImportDialog-Dp85kHCq.js → PromptImportDialog-CgQ94Gth.js} +2 -2
- package/ui/dist/assets/{PromptImportDiscoverySection-BE8Q8MLD.js → PromptImportDiscoverySection-CuzyzbNI.js} +1 -1
- package/ui/dist/assets/{PromptImportFolderSection-PtH5x0sc.js → PromptImportFolderSection-DHLGlO6l.js} +1 -1
- package/ui/dist/assets/{PromptLibraryPanel-FnM9tHI9.js → PromptLibraryPanel-BOe18we8.js} +2 -2
- package/ui/dist/assets/SettingsWorkspace-Cdgnm4Wa.js +1 -0
- package/ui/dist/assets/index-C5PSahkr.js +1 -0
- package/ui/dist/assets/index-Dn2AhL6d.css +1 -0
- package/ui/dist/assets/index-Tjqx6wUV.js +23 -0
- package/ui/dist/index.html +2 -2
- package/ui/dist/assets/AgentWorkspace-C21zqdTZ.js +0 -3
- package/ui/dist/assets/SettingsWorkspace-MARPGyBL.js +0 -1
- package/ui/dist/assets/index-BAFI6htx.js +0 -42
- package/ui/dist/assets/index-BSXxr_Bt.js +0 -1
- package/ui/dist/assets/index-DS-ADE7U.css +0 -1
package/CHANGELOG.md
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project are documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
|
|
12
|
+
- **SSE multiplexing** — shared `GET /api/events` endpoint with ring-buffer replay and `Last-Event-ID` reconnect support (`lib/eventBus.ts`, `routes/events.ts`).
|
|
13
|
+
- **Async POST generation mode** — multimode, node, and video routes accept async POST and dual-emit progress on both per-request SSE and the shared event bus.
|
|
14
|
+
- **Frontend event channel** — singleton `EventSource` client (`ui/src/lib/eventChannel.ts`) replaces per-request SSE streams for UI generation flows.
|
|
15
|
+
- **Subscribe-before-fetch contract** — `tests/async-stream-subscribe-order.test.js` locks the race where ultra-fast server publish could arrive before client handler registration.
|
|
16
|
+
- Store modularization — split monolithic `useAppStore` into focused impl modules (`storeGenImpl`, `storeNodeGenImpl`, `storeVideoImpl`, `storeInflightImpl`, etc.).
|
|
17
|
+
- Frontend/API barrel splits — `ui/src/lib/api.ts` and `ui/src/index.css` decomposed into ≤500-line modules.
|
|
18
|
+
- Storyboard workflow — 9-panel grid with black Panel 1 lead-in for image and video generation.
|
|
19
|
+
- Gallery hang fix — video decoder/connection exhaustion on focus change (RCA 01).
|
|
20
|
+
|
|
21
|
+
### Changed
|
|
22
|
+
|
|
23
|
+
- UI clients migrated from per-request SSE to `eventChannel` + async POST for multimode, node, and video generation.
|
|
24
|
+
- Multimode concurrency tracking uses `activeFlightIds` Set instead of `multimodeAbortControllers`.
|
|
25
|
+
- Test suite grew to **968** cases across **186** files (65 runtime-importing, 121 contract-only).
|
|
26
|
+
|
|
27
|
+
### Fixed
|
|
28
|
+
|
|
29
|
+
- SSE multiplexing hardening — inflight cancel/done race guards, replay-gap handling, subscribe/timeout/requestId races, and frontend reconnect/error parsing (`sseStreamError.ts`).
|
|
30
|
+
- Node route validation order — `startJob`/202 response moved after request validation.
|
|
31
|
+
- CI typecheck — unused imports in card-news tests and store split type mismatches.
|
|
32
|
+
- Thumbnail backfill failure reporting (#94).
|
|
33
|
+
- AGY Windows pipe handling, Gemini API aspect ratio string values, multimode same-prompt batching.
|
|
34
|
+
- Moderation over-filtering — removed safety tags and added error enrichment.
|
|
35
|
+
|
|
36
|
+
## [2.0.1] - 2026-06-03
|
|
37
|
+
|
|
38
|
+
### Added
|
|
39
|
+
|
|
40
|
+
- **Gemini API provider** (`provider: "gemini-api"`) — direct Generative Language API and Vertex AI paths with `nano-banana-2` / `nano-banana-pro` model picker, aspect ratio, and resolution controls.
|
|
41
|
+
- **Grok billing quota bar** — `$used/$limit` on QuotaCard via `GET /api/quota`.
|
|
42
|
+
- **Switch Account** — device-code OAuth re-auth for Grok and Codex without leaving the app.
|
|
43
|
+
- **Grok video model picker** — V / V1.5 selection in video controls.
|
|
44
|
+
- Image/video thumbnails and history sidebar cards.
|
|
45
|
+
- Centralized recursive thumbnail backfill.
|
|
46
|
+
- Gemini/Vertex API key management routes and web UI.
|
|
47
|
+
|
|
48
|
+
### Changed
|
|
49
|
+
|
|
50
|
+
- Provider plumbing and CLI parity for gemini-api, grok-api, and vertex paths.
|
|
51
|
+
- Grok model/size pickers and adapter updates.
|
|
52
|
+
- Pages and developer docs reorganized to be feature-centric.
|
|
53
|
+
|
|
54
|
+
### Fixed
|
|
55
|
+
|
|
56
|
+
- Preserve video metadata in sequence history and thumbnail fallbacks in history UI.
|
|
57
|
+
- Vertex AI integration — auth mode persistence, skip unsupported `response_format`, prefer Vertex over API key when both configured.
|
|
58
|
+
- Gemini image cost corrected to official pricing; aspect ratio/resolution UI layout polish.
|
|
59
|
+
- Skip GPT pixel-limit size confirm for Grok/Gemini providers.
|
|
60
|
+
- Reap orphaned codex device-auth child on abandoned Switch Account flow.
|
|
61
|
+
- Document Gemini providers in CLI help; harden provider paths and CLI metadata.
|
|
62
|
+
|
|
63
|
+
### Security
|
|
64
|
+
|
|
65
|
+
- Atomic `config.json` writes in keys routes; atomic token write with codex env scrubbing.
|
|
66
|
+
- Cap sharp input pixels to prevent decompression bombs.
|
|
67
|
+
- Audit fixes — crypto session IDs, session cap, double-click guard, API key in header only.
|
|
68
|
+
|
|
69
|
+
## [2.0.0] - 2026-06-02
|
|
70
|
+
|
|
71
|
+
### Added
|
|
72
|
+
|
|
73
|
+
- Major version bump packaging the Gemini API, Grok API key, Vertex AI, and expanded provider surface shipped in the 1.1.x preview line.
|
|
74
|
+
|
|
75
|
+
## [1.1.23] - 2026-06-02
|
|
76
|
+
|
|
77
|
+
### Added
|
|
78
|
+
|
|
79
|
+
- Gallery skeleton shimmer and F5 refresh fix (#93).
|
|
80
|
+
- Hero one-click install scripts on the documentation site.
|
|
81
|
+
|
|
82
|
+
## [1.1.22] - 2026-06-02
|
|
83
|
+
|
|
84
|
+
### Fixed
|
|
85
|
+
|
|
86
|
+
- Graceful shutdown releases file handles on Windows (EBUSY fix).
|
|
87
|
+
- Ctrl+C clean shutdown — database close, child process stop, file lock release.
|
|
88
|
+
|
|
89
|
+
## [1.1.21] - 2026-05-31
|
|
90
|
+
|
|
91
|
+
### Changed
|
|
92
|
+
|
|
93
|
+
- Bump bundled progrok 0.1.1 → 0.2.0 (video edit + extend commands).
|
|
94
|
+
|
|
95
|
+
## [1.1.15] - 2026-05-31
|
|
96
|
+
|
|
97
|
+
### Added
|
|
98
|
+
|
|
99
|
+
- **Agent Mode** — conversational image workspace with sessions, turns, durable queue, slash commands (`/api/agent/*`).
|
|
100
|
+
- **Grok provider** — bundled progrok, Classic/Node/Agent through search + planner + xAI Images API.
|
|
101
|
+
- **Video generation** — text/image/reference-to-video via Grok, edit/extend/frame/analyze routes, branch-local last-frame continuation.
|
|
102
|
+
- `GET /api/capabilities` discovery endpoint (#62).
|
|
103
|
+
- `POST /api/prompt-builder/chat` assistant and `ima2 prompt build` CLI wrapper.
|
|
104
|
+
- Grok model/size pickers, billing API, and `ima2 grok` helpers.
|
|
105
|
+
|
|
106
|
+
### Fixed
|
|
107
|
+
|
|
108
|
+
- Prompt Studio regression (#75), long-prompt preview (#77), prompt autofill perf (#78).
|
|
109
|
+
- Per-image metadata persistence (#79), batch comparison matrix (#80).
|
|
110
|
+
|
|
111
|
+
## [1.1.10] - 2026-05-06
|
|
112
|
+
|
|
113
|
+
### Added
|
|
114
|
+
|
|
115
|
+
- API-key provider Responses parity for generate/edit/multimode/node (#49).
|
|
116
|
+
- Masked-edit feature flag groundwork (`IMA2_OAUTH_MASKED_EDIT_ENABLED`, #31).
|
|
117
|
+
- Gallery default-to-current-session with All Images toggle (#42).
|
|
118
|
+
- Centralized `persistenceRegistry` for `ima2.*` localStorage keys (#43).
|
|
119
|
+
- `typecheck:tests` and `test:inventory` quality gates.
|
|
120
|
+
|
|
121
|
+
### Changed
|
|
122
|
+
|
|
123
|
+
- Split `lib/oauthProxy.ts` into `lib/oauthProxy/*` subtree.
|
|
124
|
+
- Added `lib/runtimeContext.ts`, `lib/responsesImageAdapter.ts`, `lib/providerOptions.ts`, `lib/errInfo.ts`, `lib/promptSafetyPolicy.ts`.
|
|
125
|
+
|
|
126
|
+
## [1.1.0] - 2026-04-25
|
|
127
|
+
|
|
128
|
+
### Added
|
|
129
|
+
|
|
130
|
+
- TypeScript migration complete — route, lib, server, config, and bin sources are `*.ts` with committed build artifacts.
|
|
131
|
+
- CLI feature parity with server API (#45).
|
|
132
|
+
- Canvas Mode workspace split and dual-mask cleanup.
|
|
133
|
+
- OS-trash soft-delete for history.
|
|
134
|
+
|
|
135
|
+
## [1.0.3] - 2026-04-23
|
|
136
|
+
|
|
137
|
+
### Added
|
|
138
|
+
|
|
139
|
+
- Initial npm publish of `ima2-gen` — local OAuth image generation studio with Classic mode, Node mode, Canvas Mode, and CLI.
|
|
140
|
+
|
|
141
|
+
[Unreleased]: https://github.com/lidge-jun/ima2-gen/compare/v2.0.1...HEAD
|
|
142
|
+
[2.0.1]: https://github.com/lidge-jun/ima2-gen/compare/v2.0.0...v2.0.1
|
|
143
|
+
[2.0.0]: https://github.com/lidge-jun/ima2-gen/compare/v1.1.23...v2.0.0
|
|
144
|
+
[1.1.23]: https://github.com/lidge-jun/ima2-gen/compare/v1.1.22...v1.1.23
|
|
145
|
+
[1.1.22]: https://github.com/lidge-jun/ima2-gen/compare/v1.1.21...v1.1.22
|
|
146
|
+
[1.1.21]: https://github.com/lidge-jun/ima2-gen/compare/v1.1.20...v1.1.21
|
|
147
|
+
[1.1.15]: https://github.com/lidge-jun/ima2-gen/compare/v1.1.14...v1.1.15
|
|
148
|
+
[1.1.10]: https://github.com/lidge-jun/ima2-gen/compare/v1.1.9...v1.1.10
|
|
149
|
+
[1.1.0]: https://github.com/lidge-jun/ima2-gen/compare/v1.0.11...v1.1.0
|
|
150
|
+
[1.0.3]: https://github.com/lidge-jun/ima2-gen/releases/tag/v1.0.3
|
package/README.md
CHANGED
|
@@ -69,7 +69,7 @@ Each script checks for nvm/fnm/brew/winget, installs Node LTS through the best a
|
|
|
69
69
|
1. **GPT OAuth** — login with ChatGPT account (free, images only)
|
|
70
70
|
2. **Grok OAuth** — login with xAI/Grok account (images + video)
|
|
71
71
|
3. **Both** — GPT OAuth + Grok OAuth (full feature access)
|
|
72
|
-
4. **
|
|
72
|
+
4. **Web setup** — configure everything in the web UI
|
|
73
73
|
|
|
74
74
|
Video generation requires Grok OAuth (option 2 or 3). Run `ima2 grok login` separately if you already have GPT OAuth configured and want to add video support; it defaults to the manual-paste flow.
|
|
75
75
|
|
|
@@ -83,16 +83,6 @@ npm install -g ima2-gen@latest
|
|
|
83
83
|
|
|
84
84
|
Ctrl+C now performs a clean shutdown — closing the database, stopping child processes, and releasing file locks. On older versions (< 1.1.22) or if you see `EBUSY` on Windows, use the install script which handles stale process cleanup automatically.
|
|
85
85
|
|
|
86
|
-
## What's New in v1.1.22
|
|
87
|
-
|
|
88
|
-
- **Storyboard mode**: composer toggle for maintaining character/scene continuity across sequential frames. Works in both image and video pipelines.
|
|
89
|
-
- **Planner model selection**: choose the Grok planner model (grok-4.3 default) from video settings or via `--planner-model` CLI flag.
|
|
90
|
-
- **Video frame copy**: First/Mid/Last frame extraction buttons on video results for easy keyframe copying.
|
|
91
|
-
- **Multi-character dialogue**: video/image planners now identify characters by visual appearance (clothing + physique + props) instead of names, improving dialogue attribution.
|
|
92
|
-
- **Graceful shutdown**: Ctrl+C now properly closes DB, server sockets, and child processes — fixes Windows EBUSY on npm update.
|
|
93
|
-
- **Cross-platform install scripts**: one-click install for macOS, Windows, and Linux (auto-detects nvm/fnm/brew/winget).
|
|
94
|
-
- **Atomic sidecar writes**: metadata files now use temp+rename to prevent corruption on crash.
|
|
95
|
-
|
|
96
86
|
## What It Does
|
|
97
87
|
|
|
98
88
|
- **Classic mode**: generate, edit, reuse the current image, paste references, and continue from history.
|
|
@@ -107,18 +97,26 @@ Ctrl+C now performs a clean shutdown — closing the database, stopping child pr
|
|
|
107
97
|
- **Mobile shell**: use the app bar, compose sheet, and compact settings toggle on smaller screens.
|
|
108
98
|
- **Observable jobs**: active and recent jobs are tracked with safe logs and request IDs.
|
|
109
99
|
|
|
100
|
+
### SSE Multiplexing
|
|
101
|
+
|
|
102
|
+
The web UI uses a single `GET /api/events` Server-Sent Events connection for all generation progress. Multimode, node, and video requests are submitted as async POST (`202 { requestId }`) and progress events are multiplexed through a shared event bus. This eliminates the browser 6-connection limit that previously caused gallery hangs during concurrent generation. CLI clients that do not send `async: true` still receive per-request SSE streams for backward compatibility.
|
|
103
|
+
|
|
110
104
|
## Provider Paths
|
|
111
105
|
|
|
112
|
-
Image generation can run through the local Codex/ChatGPT OAuth path, a configured OpenAI API key,
|
|
106
|
+
Image generation can run through the local Codex/ChatGPT OAuth path, a configured OpenAI API key, the bundled Grok provider, or the Gemini provider via Antigravity CLI.
|
|
113
107
|
|
|
114
108
|
- `provider: "oauth"` uses the local Codex OAuth proxy.
|
|
115
109
|
- `provider: "api"` calls the OpenAI Responses API with the hosted `image_generation` tool.
|
|
116
110
|
- `provider: "grok"` starts bundled `progrok` on `127.0.0.1:18645`, runs mandatory xAI Web Search plus a planner pass (default: `grok-4.3`, configurable in settings or via `--planner-model`), then calls xAI Images API through the local proxy.
|
|
111
|
+
- `provider: "agy"` spawns the Antigravity CLI (`agy -p`) to generate images via Google Gemini's `default_api:generate_image` tool (model: `nano-banana-2`). Output is fixed at 1024×1024 JPEG, max 3 reference images. No web search, quality, or size controls.
|
|
112
|
+
- `provider: "gemini-api"` calls the Google Generative Language API directly. Supports two models: `nano-banana-2` (Gemini 3.1 Flash Image) and `nano-banana-pro` (Gemini 3 Pro Image). Auth is via `GEMINI_API_KEY` env var, web UI key management, or a Vertex AI service account JSON (`VERTEX_SERVICE_ACCOUNT_JSON`). When both an API key and Vertex credentials are configured, Vertex takes priority. Supports variable aspect ratios (1:1 through 21:9) and four resolution tiers (512px, 1K, 2K, 4K); these controls are only honored on the direct API path — the Vertex AI endpoint ignores aspect/size because it does not accept the `response_format` field. Per-model cost differs: `nano-banana-2` (Flash): 512=$0.001, 1K=$0.003, 2K=$0.004, 4K=$0.006; `nano-banana-pro`: 1K=$0.007, 2K=$0.007, 4K=$0.013. No web search or mask controls.
|
|
117
113
|
- API-key generation supports classic generate, edit, mask-guided edit, multimode, and node generation.
|
|
118
114
|
- Grok generation supports Classic, Node, and Agent flows. If a Classic reference, Node parent image, or Agent current image is present, ima2 switches the final Grok call to xAI image edit so image-to-image context is preserved.
|
|
119
115
|
|
|
120
116
|
If no provider is specified, the app keeps the current GPT OAuth/default behavior. API-key generation defaults to `gpt-5.4-mini`, `low` reasoning, and `1024x1024` unless the request passes validated model, reasoning, size, or web-search options. Grok defaults to `grok-imagine-image`; `quality: "high"` promotes the final image call to `grok-imagine-image-quality`.
|
|
121
117
|
|
|
118
|
+
Grok image generation exposes a model picker (`grok-imagine-image` / `grok-imagine-image-quality`) and a size picker (aspect ratio + 1k/2k resolution). The Settings page shows a billing/quota bar with `$used/$limit` drawn from the Grok billing API, and a **Switch Account** button that starts a device-code OAuth flow (`POST /api/auth/switch`) for re-authenticating without leaving the app.
|
|
119
|
+
|
|
122
120
|
Grok video generation uses `grok-imagine-video` (default) or `grok-imagine-video-1.5-preview`. Three modes are auto-detected from reference count: text-to-video (0 refs), image-to-video (1 ref), and reference-to-video (2–7 refs, max 10s duration). `grok-imagine-video-1.5-preview` supports image-to-video but not `reference_images` Ref2V, so 2+ refs use `grok-imagine-video` as the effective model. Video edit and extension are also base-model only. Video controls include duration (1–15s), resolution (480p, 720p), and aspect ratio (1:1, 16:9, 9:16, 4:3, 3:4, 3:2, 2:3, auto).
|
|
123
121
|
|
|
124
122
|

|
|
@@ -269,6 +267,8 @@ environment variables > ~/.ima2/config.json > built-in defaults
|
|
|
269
267
|
| `IMA2_GROK_IMAGE_MODEL_DEFAULT` | `grok-imagine-image` | Default final Grok image model |
|
|
270
268
|
| `IMA2_GROK_GENERATION_TIMEOUT_MS` | `120000` | Timeout for the final Grok Images API call |
|
|
271
269
|
| `IMA2_OAUTH_MASKED_EDIT_ENABLED` | `false` | Opt-in feature flag for masked-edit requests on the OAuth path (#31, groundwork only) |
|
|
270
|
+
| `GEMINI_API_KEY` | — | API key for `provider: "gemini-api"` direct Generative Language API path |
|
|
271
|
+
| `VERTEX_SERVICE_ACCOUNT_JSON` | — | Google service account JSON for Vertex AI auth with `provider: "gemini-api"`; takes priority over `GEMINI_API_KEY` when both are set |
|
|
272
272
|
|
|
273
273
|
### Logging modes
|
|
274
274
|
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import { config } from "../../config.js";
|
|
2
|
+
import { backfillThumbnails } from "../../lib/thumbBackfill.js";
|
|
3
|
+
import { invalidateHistoryIndex } from "../../lib/historyIndex.js";
|
|
4
|
+
export async function backfillThumbs() {
|
|
5
|
+
const dir = config.storage.generatedDir;
|
|
6
|
+
console.log(`[thumbs] Scanning ${dir} (recursive) for missing thumbnails...`);
|
|
7
|
+
let r;
|
|
8
|
+
try {
|
|
9
|
+
r = await backfillThumbnails(dir);
|
|
10
|
+
}
|
|
11
|
+
catch (e) {
|
|
12
|
+
console.error("[thumbs] Backfill failed:", e instanceof Error ? e.message : e);
|
|
13
|
+
return;
|
|
14
|
+
}
|
|
15
|
+
if (r.created > 0)
|
|
16
|
+
invalidateHistoryIndex();
|
|
17
|
+
console.log(`[thumbs] Done: ${r.created} created, ${r.skipped} skipped (already exist), ${r.failed} failed out of ${r.total} media files.`);
|
|
18
|
+
if (r.failures.length > 0) {
|
|
19
|
+
console.log(`[thumbs] Showing ${r.failures.length} thumbnail failure(s):`);
|
|
20
|
+
for (const failure of r.failures) {
|
|
21
|
+
console.log(` - ${failure.kind}: ${failure.file} (${failure.reason})`);
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|
package/bin/commands/edit.js
CHANGED
|
@@ -7,8 +7,8 @@ import { createCliRequestId, recoverGeneratedOutputs, formatRecoveryHint } from
|
|
|
7
7
|
import { errInfo } from "../../lib/errInfo.js";
|
|
8
8
|
const VALID_MODES = new Set(["auto", "direct"]);
|
|
9
9
|
const VALID_MODERATION = new Set(["auto", "low"]);
|
|
10
|
-
const VALID_PROVIDERS = new Set(["auto", "oauth", "api", "grok"]);
|
|
11
|
-
const KNOWN_IMAGE_MODELS = new Set(["gpt-5.5", "gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex-spark", "grok-imagine-image", "grok-imagine-image-quality"]);
|
|
10
|
+
const VALID_PROVIDERS = new Set(["auto", "oauth", "api", "grok", "grok-api", "agy", "gemini-api"]);
|
|
11
|
+
const KNOWN_IMAGE_MODELS = new Set(["gpt-5.5", "gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex-spark", "grok-imagine-image", "grok-imagine-image-quality", "nano-banana-2", "nano-banana-pro"]);
|
|
12
12
|
const SPEC = {
|
|
13
13
|
flags: {
|
|
14
14
|
prompt: { short: "p", type: "string" },
|
|
@@ -40,8 +40,9 @@ const HELP = `
|
|
|
40
40
|
-s, --size <WxH>
|
|
41
41
|
-o, --out <file>
|
|
42
42
|
--json
|
|
43
|
-
--model <gpt-5.5|gpt-5.4|gpt-5.4-mini|grok-imagine-image|grok-imagine-image-quality>
|
|
44
|
-
--provider <auto|oauth|api|grok>
|
|
43
|
+
--model <gpt-5.5|gpt-5.4|gpt-5.4-mini|grok-imagine-image|grok-imagine-image-quality|nano-banana-2|nano-banana-pro>
|
|
44
|
+
--provider <auto|oauth|api|grok|grok-api|agy|gemini-api>
|
|
45
|
+
Provider (oauth = GPT OAuth; grok = xAI Grok; agy/gemini-api = Gemini)
|
|
45
46
|
--mode <auto|direct> Prompt handling mode. Default: auto
|
|
46
47
|
--moderation <auto|low> Default: low
|
|
47
48
|
--session <id> Apply session style sheet if enabled
|
|
@@ -64,10 +65,10 @@ export default async function editCmd(argv) {
|
|
|
64
65
|
if (!VALID_MODERATION.has(String(args.moderation)))
|
|
65
66
|
die(2, "--moderation must be one of: auto, low");
|
|
66
67
|
if (args.provider && !VALID_PROVIDERS.has(String(args.provider))) {
|
|
67
|
-
die(2, "--provider must be one of: auto, oauth, api, grok");
|
|
68
|
+
die(2, "--provider must be one of: auto, oauth, api, grok, grok-api, agy, gemini-api");
|
|
68
69
|
}
|
|
69
70
|
if (args.model && !KNOWN_IMAGE_MODELS.has(String(args.model))) {
|
|
70
|
-
die(2, "--model must be one of: gpt-5.5, gpt-5.4, gpt-5.4-mini, gpt-5.3-codex-spark, grok-imagine-image, grok-imagine-image-quality");
|
|
71
|
+
die(2, "--model must be one of: gpt-5.5, gpt-5.4, gpt-5.4-mini, gpt-5.3-codex-spark, grok-imagine-image, grok-imagine-image-quality, nano-banana-2, nano-banana-pro");
|
|
71
72
|
}
|
|
72
73
|
const VALID_REASONING = new Set(["none", "low", "medium", "high", "xhigh"]);
|
|
73
74
|
if (args["reasoning-effort"] && !VALID_REASONING.has(String(args["reasoning-effort"]))) {
|
package/bin/commands/gen.js
CHANGED
|
@@ -7,8 +7,8 @@ import { createCliRequestId, recoverGeneratedOutputs, formatRecoveryHint } from
|
|
|
7
7
|
import { errInfo } from "../../lib/errInfo.js";
|
|
8
8
|
const VALID_MODES = new Set(["auto", "direct"]);
|
|
9
9
|
const VALID_MODERATION = new Set(["auto", "low"]);
|
|
10
|
-
const VALID_PROVIDERS = new Set(["auto", "oauth", "api", "grok"]);
|
|
11
|
-
const KNOWN_IMAGE_MODELS = new Set(["gpt-5.5", "gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex-spark", "grok-imagine-image", "grok-imagine-image-quality"]);
|
|
10
|
+
const VALID_PROVIDERS = new Set(["auto", "oauth", "api", "grok", "grok-api", "agy", "gemini-api"]);
|
|
11
|
+
const KNOWN_IMAGE_MODELS = new Set(["gpt-5.5", "gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex-spark", "grok-imagine-image", "grok-imagine-image-quality", "nano-banana-2", "nano-banana-pro"]);
|
|
12
12
|
const SPEC = {
|
|
13
13
|
flags: {
|
|
14
14
|
quality: { short: "q", type: "string", default: "low" },
|
|
@@ -39,6 +39,11 @@ const HELP = `
|
|
|
39
39
|
|
|
40
40
|
Generate image(s) via the running ima2 server.
|
|
41
41
|
|
|
42
|
+
Batch/async note:
|
|
43
|
+
Use -n <N> for multiple candidates in one request. Independent CLI
|
|
44
|
+
commands can run concurrently against the server; monitor active requestIds
|
|
45
|
+
with 'ima2 ps --json' and stop one with 'ima2 cancel <requestId>'.
|
|
46
|
+
|
|
42
47
|
Options:
|
|
43
48
|
-q, --quality <low|medium|high> Default: low
|
|
44
49
|
-s, --size <WxH | auto> Default: 1024x1024
|
|
@@ -51,8 +56,9 @@ const HELP = `
|
|
|
51
56
|
--stdin Read prompt from stdin
|
|
52
57
|
--timeout <sec> Default: 180
|
|
53
58
|
--server <url> Override server URL
|
|
54
|
-
--model <gpt-5.5|gpt-5.4|gpt-5.4-mini|grok-imagine-image|grok-imagine-image-quality>
|
|
55
|
-
--provider <auto|oauth|api|grok>
|
|
59
|
+
--model <gpt-5.5|gpt-5.4|gpt-5.4-mini|grok-imagine-image|grok-imagine-image-quality|nano-banana-2|nano-banana-pro>
|
|
60
|
+
--provider <auto|oauth|api|grok|grok-api|agy|gemini-api>
|
|
61
|
+
Provider (oauth = GPT OAuth; grok = xAI Grok; agy/gemini-api = Gemini)
|
|
56
62
|
--mode <auto|direct> Prompt handling mode. Default: auto
|
|
57
63
|
--moderation <auto|low> Default: low
|
|
58
64
|
--session <id> Apply session style sheet if enabled
|
|
@@ -62,6 +68,7 @@ const HELP = `
|
|
|
62
68
|
|
|
63
69
|
Examples:
|
|
64
70
|
ima2 gen "a shiba in space"
|
|
71
|
+
ima2 gen "a shiba in space" -n 4 -d ./out
|
|
65
72
|
ima2 gen "poster" --model gpt-5.4 --mode direct --moderation low
|
|
66
73
|
ima2 gen "merge" --ref a.png --ref b.png -q high -o out.png
|
|
67
74
|
cat prompt.txt | ima2 gen --stdin -n 2 -d ./out
|
|
@@ -88,10 +95,10 @@ export default async function genCmd(argv) {
|
|
|
88
95
|
if (!VALID_MODERATION.has(String(args.moderation)))
|
|
89
96
|
die(2, "--moderation must be one of: auto, low");
|
|
90
97
|
if (args.provider && !VALID_PROVIDERS.has(String(args.provider))) {
|
|
91
|
-
die(2, "--provider must be one of: auto, oauth, api, grok");
|
|
98
|
+
die(2, "--provider must be one of: auto, oauth, api, grok, grok-api, agy, gemini-api");
|
|
92
99
|
}
|
|
93
100
|
if (args.model && !KNOWN_IMAGE_MODELS.has(String(args.model))) {
|
|
94
|
-
die(2, "--model must be one of: gpt-5.5, gpt-5.4, gpt-5.4-mini, gpt-5.3-codex-spark, grok-imagine-image, grok-imagine-image-quality");
|
|
101
|
+
die(2, "--model must be one of: gpt-5.5, gpt-5.4, gpt-5.4-mini, gpt-5.3-codex-spark, grok-imagine-image, grok-imagine-image-quality, nano-banana-2, nano-banana-pro");
|
|
95
102
|
}
|
|
96
103
|
const VALID_REASONING = new Set(["none", "low", "medium", "high", "xhigh"]);
|
|
97
104
|
if (args["reasoning-effort"] && !VALID_REASONING.has(String(args["reasoning-effort"]))) {
|
|
@@ -40,8 +40,9 @@ const HELP = `
|
|
|
40
40
|
-o, --out <file> First image (implies --max-images 1)
|
|
41
41
|
-d, --out-dir <dir> Output dir for multiple images
|
|
42
42
|
--json
|
|
43
|
-
--model <gpt-5.5|gpt-5.4|gpt-5.4-mini>
|
|
44
|
-
--provider <auto|oauth|api|grok>
|
|
43
|
+
--model <gpt-5.5|gpt-5.4|gpt-5.4-mini|grok-imagine-image|grok-imagine-image-quality|nano-banana-2|nano-banana-pro>
|
|
44
|
+
--provider <auto|oauth|api|grok|grok-api|agy|gemini-api>
|
|
45
|
+
Provider (oauth = GPT OAuth; grok = xAI Grok; agy/gemini-api = Gemini)
|
|
45
46
|
--mode <auto|direct> Prompt handling mode. Default: auto
|
|
46
47
|
--ref <file> Attach reference image (repeatable, max 5)
|
|
47
48
|
--reasoning-effort <none|low|medium|high|xhigh>
|
|
@@ -60,11 +61,11 @@ export default async function multimodeCmd(argv) {
|
|
|
60
61
|
const prompt = args.positional.join(" ");
|
|
61
62
|
if (!prompt)
|
|
62
63
|
die(2, "prompt required");
|
|
63
|
-
const VALID_PROVIDERS = new Set(["auto", "oauth", "api", "grok"]);
|
|
64
|
+
const VALID_PROVIDERS = new Set(["auto", "oauth", "api", "grok", "grok-api", "agy", "gemini-api"]);
|
|
64
65
|
const VALID_MODES = new Set(["auto", "direct"]);
|
|
65
66
|
const VALID_REASONING = new Set(["none", "low", "medium", "high", "xhigh"]);
|
|
66
67
|
if (args.provider && !VALID_PROVIDERS.has(String(args.provider))) {
|
|
67
|
-
die(2, "--provider must be one of: auto, oauth, api, grok");
|
|
68
|
+
die(2, "--provider must be one of: auto, oauth, api, grok, grok-api, agy, gemini-api");
|
|
68
69
|
}
|
|
69
70
|
if (!VALID_MODES.has(String(args.mode)))
|
|
70
71
|
die(2, "--mode must be one of: auto, direct");
|
package/bin/commands/node.js
CHANGED
|
@@ -8,11 +8,11 @@ const HELP = `
|
|
|
8
8
|
ima2 node <subcommand> [options]
|
|
9
9
|
|
|
10
10
|
Subcommands:
|
|
11
|
-
generate <prompt...> [--parent <nodeId>] [--ref <file>...] [--provider <auto|oauth|api|grok>] [--no-stream] [...gen-style flags]
|
|
11
|
+
generate <prompt...> [--parent <nodeId>] [--ref <file>...] [--provider <auto|oauth|api|grok|grok-api|agy|gemini-api>] [--no-stream] [...gen-style flags]
|
|
12
12
|
show <nodeId> [--json]
|
|
13
13
|
|
|
14
14
|
Generate options:
|
|
15
|
-
--provider <auto|oauth|api|grok> Provider for this request
|
|
15
|
+
--provider <auto|oauth|api|grok|grok-api|agy|gemini-api> Provider for this request
|
|
16
16
|
`;
|
|
17
17
|
const GEN_FLAGS = {
|
|
18
18
|
quality: { short: "q", type: "string", default: "low" },
|
|
@@ -58,10 +58,10 @@ async function generateSub(argv) {
|
|
|
58
58
|
if (!prompt)
|
|
59
59
|
die(2, "prompt required");
|
|
60
60
|
const refs = (Array.isArray(args.ref) ? args.ref : []);
|
|
61
|
-
const VALID_PROVIDERS = new Set(["auto", "oauth", "api", "grok"]);
|
|
61
|
+
const VALID_PROVIDERS = new Set(["auto", "oauth", "api", "grok", "grok-api", "agy", "gemini-api"]);
|
|
62
62
|
const VALID_REASONING = new Set(["none", "low", "medium", "high", "xhigh"]);
|
|
63
63
|
if (args.provider && !VALID_PROVIDERS.has(String(args.provider))) {
|
|
64
|
-
die(2, "--provider must be one of: auto, oauth, api, grok");
|
|
64
|
+
die(2, "--provider must be one of: auto, oauth, api, grok, grok-api, agy, gemini-api");
|
|
65
65
|
}
|
|
66
66
|
if (args["reasoning-effort"] && !VALID_REASONING.has(String(args["reasoning-effort"]))) {
|
|
67
67
|
die(2, "--reasoning-effort must be one of: none, low, medium, high, xhigh");
|
package/bin/ima2.js
CHANGED
|
@@ -65,20 +65,15 @@ async function setup() {
|
|
|
65
65
|
console.log(" 1) GPT OAuth — login with ChatGPT account (free, images only)");
|
|
66
66
|
console.log(" 2) Grok OAuth — login with xAI/Grok account (images + video)");
|
|
67
67
|
console.log(" 3) Both — GPT OAuth + Grok OAuth");
|
|
68
|
-
console.log(" 4)
|
|
68
|
+
console.log(" 4) Web setup — configure everything in the web UI\n");
|
|
69
69
|
const choice = await rl.question(" Enter 1-4: ");
|
|
70
70
|
const config = loadConfig();
|
|
71
71
|
if (choice.trim() === "4") {
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
console.log(" Invalid API key format. Expected sk-...");
|
|
75
|
-
rl.close();
|
|
76
|
-
process.exit(1);
|
|
77
|
-
}
|
|
78
|
-
config.provider = "api";
|
|
79
|
-
config.apiKey = key.trim();
|
|
72
|
+
config.provider = "oauth";
|
|
73
|
+
delete config.apiKey;
|
|
80
74
|
saveConfig(config);
|
|
81
|
-
console.log("\n
|
|
75
|
+
console.log("\n You can set up everything from the web UI.");
|
|
76
|
+
console.log(" Run 'ima2 serve', then open Settings in the browser to sign in or add API keys.\n");
|
|
82
77
|
}
|
|
83
78
|
else if (choice.trim() === "2") {
|
|
84
79
|
config.provider = "grok";
|
|
@@ -260,6 +255,12 @@ function showHelp() {
|
|
|
260
255
|
|
|
261
256
|
Usage: ima2 <command> [options]
|
|
262
257
|
|
|
258
|
+
Generation workflow:
|
|
259
|
+
Image/video jobs run on the server. For multiple candidates, prefer
|
|
260
|
+
'ima2 gen -n <N>' or 'ima2 multimode <prompt>' instead of repeating
|
|
261
|
+
one-image prompts. Start independent CLI jobs concurrently when needed;
|
|
262
|
+
use 'ima2 ps --json' to monitor requestIds and 'ima2 cancel <id>' to stop.
|
|
263
|
+
|
|
263
264
|
Server commands:
|
|
264
265
|
serve [--dev] Start the image generation server
|
|
265
266
|
setup, login Configure API key or GPT OAuth (interactive)
|
|
@@ -288,6 +289,7 @@ function showHelp() {
|
|
|
288
289
|
cancel <id> Mark an in-flight job canceled (ima2 cancel --help)
|
|
289
290
|
inflight <sub> Inflight jobs (ls / rm) (ima2 inflight --help)
|
|
290
291
|
storage <sub> Storage status / open-dir (ima2 storage --help)
|
|
292
|
+
backfill-thumbs Generate missing thumbnails for gallery performance
|
|
291
293
|
billing API usage / quota
|
|
292
294
|
providers Configured providers
|
|
293
295
|
oauth <sub> GPT OAuth proxy status (ima2 oauth --help)
|
|
@@ -315,6 +317,9 @@ function showHelp() {
|
|
|
315
317
|
ima2 serve Start server
|
|
316
318
|
ima2 serve --dev Start with verbose server diagnostics
|
|
317
319
|
ima2 gen "a shiba in space" Generate from CLI
|
|
320
|
+
ima2 gen "a shiba in space" -n 4 -d ./out
|
|
321
|
+
Generate 4 candidates in one request
|
|
322
|
+
ima2 ps --json Watch active async generation jobs
|
|
318
323
|
ima2 gen "merge" --ref a.png --ref b.png -q high -o out.png
|
|
319
324
|
ima2 video "a cat playing piano" --duration 10
|
|
320
325
|
ima2 ls -n 10 Last 10 generations
|
|
@@ -332,7 +337,7 @@ if (args.includes("-v") || args.includes("--version")) {
|
|
|
332
337
|
process.exit(0);
|
|
333
338
|
}
|
|
334
339
|
if ((!command || args.includes("-h") || args.includes("--help"))
|
|
335
|
-
&& !["doctor", "gen", "video", "edit", "ls", "show", "ps", "cancel", "session", "history", "prompt", "multimode", "node", "annotate", "canvas-versions", "metadata", "comfy", "cardnews", "inflight", "storage", "billing", "providers", "oauth", "grok", "config", "defaults", "capabilities", "skill", "ping"].includes(command)) {
|
|
340
|
+
&& !["doctor", "gen", "video", "edit", "ls", "show", "ps", "cancel", "session", "history", "prompt", "multimode", "node", "annotate", "canvas-versions", "metadata", "comfy", "cardnews", "inflight", "storage", "billing", "providers", "oauth", "grok", "config", "defaults", "capabilities", "skill", "ping", "backfill-thumbs"].includes(command)) {
|
|
336
341
|
showHelp();
|
|
337
342
|
process.exit(command ? 0 : 1);
|
|
338
343
|
}
|
|
@@ -406,6 +411,11 @@ switch (command) {
|
|
|
406
411
|
await mod.default(args.slice(1));
|
|
407
412
|
break;
|
|
408
413
|
}
|
|
414
|
+
case "backfill-thumbs": {
|
|
415
|
+
const { backfillThumbs } = await import("./commands/backfillThumbs.js");
|
|
416
|
+
await backfillThumbs();
|
|
417
|
+
break;
|
|
418
|
+
}
|
|
409
419
|
case "storage":
|
|
410
420
|
case "billing":
|
|
411
421
|
case "providers":
|
package/bin/lib/config-store.js
CHANGED
|
@@ -102,7 +102,7 @@ export function envOverrideForKey(key) {
|
|
|
102
102
|
return { envVar, value: String(process.env[envVar]) };
|
|
103
103
|
}
|
|
104
104
|
export function displayPath(p) {
|
|
105
|
-
const home = process.env.HOME || "";
|
|
105
|
+
const home = process.env.HOME || process.env.USERPROFILE || "";
|
|
106
106
|
return home && p.startsWith(home) ? p.replace(home, "~") : p;
|
|
107
107
|
}
|
|
108
108
|
export function restartNotice() {
|