ima2-gen 1.1.16 → 1.1.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/README.md +24 -2
  2. package/bin/commands/capabilities.js +6 -0
  3. package/bin/commands/capabilities.ts +6 -0
  4. package/bin/commands/grok.js +39 -19
  5. package/bin/commands/grok.ts +39 -20
  6. package/bin/commands/video.js +211 -0
  7. package/bin/commands/video.ts +202 -0
  8. package/bin/ima2.js +61 -6
  9. package/bin/ima2.ts +54 -6
  10. package/docs/API.md +73 -4
  11. package/docs/CLI.md +38 -0
  12. package/lib/capabilities.js +9 -0
  13. package/lib/capabilities.ts +9 -0
  14. package/lib/grokImageAdapter.js +37 -7
  15. package/lib/grokImageAdapter.ts +37 -7
  16. package/lib/grokProxyLauncher.js +9 -8
  17. package/lib/grokProxyLauncher.ts +9 -9
  18. package/lib/grokVideoAdapter.js +56 -7
  19. package/lib/grokVideoAdapter.ts +54 -7
  20. package/lib/imageModels.js +1 -1
  21. package/lib/imageModels.ts +2 -2
  22. package/lib/oauthLauncher.js +16 -2
  23. package/lib/oauthLauncher.ts +16 -3
  24. package/package.json +1 -1
  25. package/routes/video.js +10 -5
  26. package/routes/video.ts +10 -4
  27. package/ui/dist/.vite/manifest.json +12 -12
  28. package/ui/dist/assets/{AgentWorkspace-c1_kEfFN.js → AgentWorkspace-BTuPjlDH.js} +1 -1
  29. package/ui/dist/assets/{CardNewsWorkspace-CTBT3MbP.js → CardNewsWorkspace-DmqCMnIx.js} +1 -1
  30. package/ui/dist/assets/{NodeCanvas-D3ecSAEi.js → NodeCanvas-jr9WXfNm.js} +1 -1
  31. package/ui/dist/assets/{PromptBuilderPanel-CqepukCN.js → PromptBuilderPanel-CoWjqQZS.js} +1 -1
  32. package/ui/dist/assets/{PromptImportDialog-Bvr8Q8P2.js → PromptImportDialog-C2zGZkyK.js} +2 -2
  33. package/ui/dist/assets/{PromptImportDiscoverySection-CyZEXyWP.js → PromptImportDiscoverySection-N0ZxHLYs.js} +1 -1
  34. package/ui/dist/assets/{PromptImportFolderSection-CIl-_pyV.js → PromptImportFolderSection-BC3dCASZ.js} +1 -1
  35. package/ui/dist/assets/{PromptLibraryPanel-Bj23Q6l9.js → PromptLibraryPanel-CcVliYnF.js} +2 -2
  36. package/ui/dist/assets/{SettingsWorkspace-D_GqtEsP.js → SettingsWorkspace-CiB4ux7E.js} +1 -1
  37. package/ui/dist/assets/{index-DtSBvfgp.js → index-C93CfR9P.js} +1 -1
  38. package/ui/dist/assets/index-CIhB_ia7.css +1 -0
  39. package/ui/dist/assets/index-uBEJn5jz.js +32 -0
  40. package/ui/dist/index.html +2 -2
  41. package/ui/dist/assets/index-DMjgFXdO.css +0 -1
  42. package/ui/dist/assets/index-DQ6jg4Ui.js +0 -32
package/README.md CHANGED
@@ -35,6 +35,13 @@ npx @openai/codex login
35
35
  npx ima2-gen serve
36
36
  ```
37
37
 
38
+ To generate a video from the CLI:
39
+
40
+ ```bash
41
+ ima2 video "a cat playing piano" --duration 5 --resolution 720p
42
+ ima2 video "animate this scene" --ref photo.png --duration 10
43
+ ```
44
+
38
45
  If `3333` is already occupied, `ima2-gen` binds the next available port and writes the actual URL to `~/.ima2/server.json`. Use `ima2 open` or the URL printed in the terminal instead of assuming the port.
39
46
 
40
47
  You can also install it globally:
@@ -44,6 +51,17 @@ npm install -g ima2-gen
44
51
  ima2 serve
45
52
  ```
46
53
 
54
+ ### Setup
55
+
56
+ `ima2 setup` offers four authentication choices:
57
+
58
+ 1. **GPT OAuth** — login with ChatGPT account (free, images only)
59
+ 2. **Grok OAuth** — login with xAI/Grok account (images + video)
60
+ 3. **Both** — GPT OAuth + Grok OAuth (full feature access)
61
+ 4. **API Key** — paste your OpenAI API key (paid)
62
+
63
+ Video generation requires Grok OAuth (option 2 or 3). Run `ima2 grok login` separately if you already have GPT OAuth configured and want to add video support.
64
+
47
65
  Before updating a global install on Windows, stop any running `ima2 serve`
48
66
  process. If npm reports `EBUSY` or `resource busy or locked`, close ima2
49
67
  terminals, end stale `node.exe` processes if needed, and retry. If the lock
@@ -54,9 +72,10 @@ persists, reboot and run the update before starting ima2 again.
54
72
  - **Classic mode**: generate, edit, reuse the current image, paste references, and continue from history.
55
73
  - **Node mode**: branch a good image into multiple directions without losing the original.
56
74
  - **Multimode batches**: launch several Classic outputs from one prompt, watch slot-by-slot progress, and continue from the best result.
75
+ - **Video generation**: create short videos from text, a single image, or multiple reference images via Grok video models. SSE streaming shows planning → submitted → progress % → done.
57
76
  - **Canvas Mode**: zoom, pan, annotate, erase, clean backgrounds, keep transparent previews, and export either alpha or matte-backed versions.
58
77
  - **Local gallery**: keep generated assets on your machine with session-aware history. By default the gallery shows the current session and an All Images toggle reveals the full history; the default scope is sticky across sessions. Each image records its generation time and reasoning effort in the result metadata, so they persist across reloads.
59
- - **Reference images**: drag, drop, paste, and attach up to 5 references; large images are compressed before upload.
78
+ - **Reference images**: drag, drop, paste, and attach up to 5 references (images) or up to 7 references (video); large images are compressed before upload.
60
79
  - **Prompt library imports**: import local prompt packs, GitHub folders, and curated GPT-image prompt hints into the built-in prompt library.
61
80
  - **Mobile shell**: use the app bar, compose sheet, and compact settings toggle on smaller screens.
62
81
  - **Observable jobs**: active and recent jobs are tracked with safe logs and request IDs.
@@ -73,7 +92,7 @@ Image generation can run through the local Codex/ChatGPT OAuth path, a configure
73
92
 
74
93
  If no provider is specified, the app keeps the current OAuth/default behavior. API-key generation defaults to `gpt-5.4-mini`, `low` reasoning, and `1024x1024` unless the request passes validated model, reasoning, size, or web-search options. Grok defaults to `grok-imagine-image`; `quality: "high"` promotes the final image call to `grok-imagine-image-quality`.
75
94
 
76
- Grok video generation (T2V/I2V) is not shipped in `1.1.15`. The video files in `docs/grok-video-i2v-plan.md` and `docs/grok-video-i2v-research.md` are implementation planning and research notes only; the published runtime remains image-only.
95
+ Grok video generation uses `grok-imagine-video` (default) or `grok-imagine-video-1.5-preview`. Three modes are auto-detected from reference count: text-to-video (0 refs), image-to-video (1 ref), and reference-to-video (2–7 refs, max 10s duration). Video controls include duration (1–15s), resolution (480p, 720p), and aspect ratio (1:1, 16:9, 9:16, 4:3, 3:4, 3:2, 2:3, auto).
77
96
 
78
97
  ![Settings workspace showing OAuth active and API key provider available.](assets/screenshots/settings-oauth-generation.png)
79
98
 
@@ -166,6 +185,7 @@ These require a running `ima2 serve`. The CLI covers every server route. The mos
166
185
  | `ima2 gen <prompt>` | Generate from the CLI |
167
186
  | `ima2 edit <file> --prompt <text>` | Edit an existing image |
168
187
  | `ima2 multimode <prompt>` | Multi-image SSE generation |
188
+ | `ima2 video <prompt>` | Video generation via Grok (SSE streaming with progress) |
169
189
  | `ima2 ls [--session <id>] [--favorites]` | List recent history |
170
190
  | `ima2 show <name> [--metadata]` | Reveal a generated asset |
171
191
  | `ima2 prompt ls -q <search>` | Search the prompt library |
@@ -179,6 +199,8 @@ The server advertises its actual port at `~/.ima2/server.json`. If `3333` is bus
179
199
  ima2 gen "poster" --model gpt-5.4 --reasoning-effort high
180
200
  ima2 edit input.png --prompt "make it rainy" --web-search
181
201
  ima2 multimode "two cats playing" -n 2
202
+ ima2 video "a cat playing piano" --duration 5 --resolution 720p
203
+ ima2 video "animate this" --ref photo.png --aspect-ratio 16:9
182
204
  ima2 inflight ls --terminal
183
205
  ima2 config set imageModels.reasoningEffort high
184
206
  ```
@@ -70,6 +70,12 @@ function printText(capabilities) {
70
70
  if (capabilities.valid?.imageModels?.grokSupported?.length) {
71
71
  out(` grok models: ${capabilities.valid.imageModels.grokSupported.join(", ")}`);
72
72
  }
73
+ if (capabilities.valid?.videoModels?.supported?.length) {
74
+ out(` video models: ${capabilities.valid.videoModels.supported.join(", ")}`);
75
+ out(` video resolutions: ${capabilities.valid.videoModels.resolutions?.join(", ")}`);
76
+ out(` video aspect ratios: ${capabilities.valid.videoModels.aspectRatios?.join(", ")}`);
77
+ out(` video duration: ${capabilities.valid.videoModels.durationRange?.[0]}-${capabilities.valid.videoModels.durationRange?.[1]}s`);
78
+ }
73
79
  out(` reasoning: ${capabilities.valid?.reasoningEfforts?.join(", ")}`);
74
80
  out(` quality: ${capabilities.valid?.quality?.join(", ")}`);
75
81
  out(` modes: ${capabilities.valid?.modes?.join(", ")}`);
@@ -74,6 +74,12 @@ function printText(capabilities: any): void {
74
74
  if (capabilities.valid?.imageModels?.grokSupported?.length) {
75
75
  out(` grok models: ${capabilities.valid.imageModels.grokSupported.join(", ")}`);
76
76
  }
77
+ if (capabilities.valid?.videoModels?.supported?.length) {
78
+ out(` video models: ${capabilities.valid.videoModels.supported.join(", ")}`);
79
+ out(` video resolutions: ${capabilities.valid.videoModels.resolutions?.join(", ")}`);
80
+ out(` video aspect ratios: ${capabilities.valid.videoModels.aspectRatios?.join(", ")}`);
81
+ out(` video duration: ${capabilities.valid.videoModels.durationRange?.[0]}-${capabilities.valid.videoModels.durationRange?.[1]}s`);
82
+ }
77
83
  out(` reasoning: ${capabilities.valid?.reasoningEfforts?.join(", ")}`);
78
84
  out(` quality: ${capabilities.valid?.quality?.join(", ")}`);
79
85
  out(` modes: ${capabilities.valid?.modes?.join(", ")}`);
@@ -2,7 +2,7 @@ import { spawn } from "node:child_process";
2
2
  import { dirname, join, delimiter } from "node:path";
3
3
  import { fileURLToPath } from "node:url";
4
4
  import { color, die, out } from "../lib/output.js";
5
- import { resolveBin, isWin } from "../lib/platform.js";
5
+ import { isWin } from "../lib/platform.js";
6
6
  const __dirname = dirname(fileURLToPath(import.meta.url));
7
7
  const ROOT = join(__dirname, "..", "..");
8
8
  const HELP = `
@@ -25,6 +25,27 @@ const HELP = `
25
25
  function localBinPath() {
26
26
  return join(ROOT, "node_modules", ".bin");
27
27
  }
28
+ function spawnProgrok(argv, env) {
29
+ return new Promise((resolve, reject) => {
30
+ const progrokBin = join(localBinPath(), isWin ? "progrok.cmd" : "progrok");
31
+ const child = isWin
32
+ ? spawn(progrokBin, argv, {
33
+ cwd: ROOT,
34
+ env,
35
+ stdio: "inherit",
36
+ shell: true,
37
+ windowsHide: true,
38
+ })
39
+ : spawn(progrokBin, argv, {
40
+ cwd: ROOT,
41
+ env,
42
+ stdio: "inherit",
43
+ windowsHide: true,
44
+ });
45
+ child.on("error", (err) => reject(err));
46
+ child.on("close", resolve);
47
+ });
48
+ }
28
49
  export default async function grokCmd(argv) {
29
50
  const sub = argv[0];
30
51
  if (!sub || sub === "--help" || sub === "-h") {
@@ -35,25 +56,24 @@ export default async function grokCmd(argv) {
35
56
  ...process.env,
36
57
  PATH: `${localBinPath()}${delimiter}${process.env.PATH || ""}`,
37
58
  };
38
- const child = isWin
39
- ? spawn("cmd.exe", ["/d", "/s", "/c", `progrok ${argv.map((arg) => JSON.stringify(arg)).join(" ")}`], {
40
- cwd: ROOT,
41
- env,
42
- stdio: "inherit",
43
- windowsHide: true,
44
- })
45
- : spawn(resolveBin("progrok"), argv, {
46
- cwd: ROOT,
47
- env,
48
- stdio: "inherit",
49
- windowsHide: true,
50
- });
51
- child.on("error", (err) => {
59
+ try {
60
+ const code = await spawnProgrok(argv, env);
61
+ if (code && code !== 0) {
62
+ // Auto-fallback: if login (without --device-code) failed, retry with device-code
63
+ if (sub === "login" && !argv.includes("--device-code")) {
64
+ out(color.yellow("⚠ ") + "Browser login failed. Retrying with device-code flow...\n");
65
+ const fallbackCode = await spawnProgrok(["login", "--device-code"], env);
66
+ if (fallbackCode && fallbackCode !== 0) {
67
+ die(fallbackCode, "bundled progrok device-code login also failed");
68
+ }
69
+ }
70
+ else {
71
+ die(code, `bundled progrok exited with code ${code}`);
72
+ }
73
+ }
74
+ }
75
+ catch (err) {
52
76
  die(1, `bundled progrok failed to start: ${err.message}`);
53
- });
54
- const code = await new Promise((resolve) => child.on("close", resolve));
55
- if (code && code !== 0) {
56
- die(code, `bundled progrok exited with code ${code}`);
57
77
  }
58
78
  if (sub === "login") {
59
79
  out(color.green("✓ ") + "Grok OAuth is ready for ima2 serve");
@@ -2,7 +2,7 @@ import { spawn } from "node:child_process";
2
2
  import { dirname, join, delimiter } from "node:path";
3
3
  import { fileURLToPath } from "node:url";
4
4
  import { color, die, out } from "../lib/output.js";
5
- import { resolveBin, isWin } from "../lib/platform.js";
5
+ import { isWin } from "../lib/platform.js";
6
6
 
7
7
  const __dirname = dirname(fileURLToPath(import.meta.url));
8
8
  const ROOT = join(__dirname, "..", "..");
@@ -28,6 +28,28 @@ function localBinPath() {
28
28
  return join(ROOT, "node_modules", ".bin");
29
29
  }
30
30
 
31
+ function spawnProgrok(argv: string[], env: NodeJS.ProcessEnv): Promise<number | null> {
32
+ return new Promise((resolve, reject) => {
33
+ const progrokBin = join(localBinPath(), isWin ? "progrok.cmd" : "progrok");
34
+ const child = isWin
35
+ ? spawn(progrokBin, argv, {
36
+ cwd: ROOT,
37
+ env,
38
+ stdio: "inherit",
39
+ shell: true,
40
+ windowsHide: true,
41
+ })
42
+ : spawn(progrokBin, argv, {
43
+ cwd: ROOT,
44
+ env,
45
+ stdio: "inherit",
46
+ windowsHide: true,
47
+ });
48
+ child.on("error", (err) => reject(err));
49
+ child.on("close", resolve);
50
+ });
51
+ }
52
+
31
53
  export default async function grokCmd(argv: string[]) {
32
54
  const sub = argv[0];
33
55
  if (!sub || sub === "--help" || sub === "-h") {
@@ -39,28 +61,25 @@ export default async function grokCmd(argv: string[]) {
39
61
  ...process.env,
40
62
  PATH: `${localBinPath()}${delimiter}${process.env.PATH || ""}`,
41
63
  };
42
- const child = isWin
43
- ? spawn("cmd.exe", ["/d", "/s", "/c", `progrok ${argv.map((arg) => JSON.stringify(arg)).join(" ")}`], {
44
- cwd: ROOT,
45
- env,
46
- stdio: "inherit",
47
- windowsHide: true,
48
- })
49
- : spawn(resolveBin("progrok"), argv, {
50
- cwd: ROOT,
51
- env,
52
- stdio: "inherit",
53
- windowsHide: true,
54
- });
55
64
 
56
- child.on("error", (err) => {
65
+ try {
66
+ const code = await spawnProgrok(argv, env);
67
+ if (code && code !== 0) {
68
+ // Auto-fallback: if login (without --device-code) failed, retry with device-code
69
+ if (sub === "login" && !argv.includes("--device-code")) {
70
+ out(color.yellow("⚠ ") + "Browser login failed. Retrying with device-code flow...\n");
71
+ const fallbackCode = await spawnProgrok(["login", "--device-code"], env);
72
+ if (fallbackCode && fallbackCode !== 0) {
73
+ die(fallbackCode, "bundled progrok device-code login also failed");
74
+ }
75
+ } else {
76
+ die(code, `bundled progrok exited with code ${code}`);
77
+ }
78
+ }
79
+ } catch (err: any) {
57
80
  die(1, `bundled progrok failed to start: ${err.message}`);
58
- });
59
-
60
- const code = await new Promise<number | null>((resolve) => child.on("close", resolve));
61
- if (code && code !== 0) {
62
- die(code, `bundled progrok exited with code ${code}`);
63
81
  }
82
+
64
83
  if (sub === "login") {
65
84
  out(color.green("✓ ") + "Grok OAuth is ready for ima2 serve");
66
85
  }
@@ -0,0 +1,211 @@
1
+ import { parseArgs } from "../lib/args.js";
2
+ import { resolveServer } from "../lib/client.js";
3
+ import { streamSse } from "../lib/sse.js";
4
+ import { out, die, color, json, exitCodeForError } from "../lib/output.js";
5
+ import { config } from "../../config.js";
6
+ import { readFile, writeFile, mkdir } from "node:fs/promises";
7
+ import { dirname, join } from "node:path";
8
+ const VALID_RESOLUTIONS = new Set(["480p", "720p"]);
9
+ const VALID_ASPECT_RATIOS = new Set(["1:1", "16:9", "9:16", "4:3", "3:4", "3:2", "2:3", "auto"]);
10
+ const VALID_MODELS = new Set(["grok-imagine-video", "grok-imagine-video-1.5-preview"]);
11
+ const SPEC = {
12
+ flags: {
13
+ duration: { type: "string", default: "5" },
14
+ resolution: { type: "string", default: "480p" },
15
+ "aspect-ratio": { type: "string", default: "auto" },
16
+ model: { type: "string" },
17
+ ref: { type: "string", repeatable: true },
18
+ out: { short: "o", type: "string" },
19
+ "out-dir": { short: "d", type: "string" },
20
+ json: { type: "boolean" },
21
+ timeout: { type: "string", default: "600" },
22
+ server: { type: "string" },
23
+ session: { type: "string" },
24
+ help: { short: "h", type: "boolean" },
25
+ },
26
+ };
27
+ const HELP = `
28
+ ima2 video <prompt...> [options]
29
+
30
+ Generate a video via the Grok video provider (SSE streaming).
31
+
32
+ Options:
33
+ --duration <1..15> Duration in seconds. Default: 5
34
+ --resolution <480p|720p> Default: 480p
35
+ --aspect-ratio <ratio|auto> 1:1, 16:9, 9:16, 4:3, 3:4, 3:2, 2:3, auto. Default: auto
36
+ --model <name> grok-imagine-video, grok-imagine-video-1.5-preview
37
+ --ref <file> Attach source/reference image (repeatable, max 7)
38
+ -o, --out <file> Output file path
39
+ -d, --out-dir <dir> Output directory
40
+ --json Print JSON result to stdout
41
+ --timeout <sec> Default: 600
42
+ --server <url> Override server URL
43
+ --session <id> Session ID
44
+
45
+ Modes (auto-detected from --ref count):
46
+ 0 refs → text-to-video
47
+ 1 ref → image-to-video
48
+ 2-7 refs → reference-to-video (max 10s duration)
49
+
50
+ Examples:
51
+ ima2 video "a cat playing piano"
52
+ ima2 video "animate this" --ref photo.png --duration 10
53
+ ima2 video "cinematic" --resolution 720p --aspect-ratio 16:9 -o out.mp4
54
+ `;
55
+ export default async function videoCmd(argv) {
56
+ const args = parseArgs(argv, SPEC);
57
+ if (args.help) {
58
+ out(HELP);
59
+ return;
60
+ }
61
+ const prompt = args.positional.join(" ");
62
+ if (!prompt)
63
+ die(2, "prompt is required");
64
+ const duration = parseInt(String(args.duration)) || 5;
65
+ if (duration < 1 || duration > 15)
66
+ die(2, "--duration must be between 1 and 15");
67
+ const resolution = String(args.resolution);
68
+ if (!VALID_RESOLUTIONS.has(resolution))
69
+ die(2, "--resolution must be one of: 480p, 720p");
70
+ const aspectRatio = String(args["aspect-ratio"]);
71
+ if (!VALID_ASPECT_RATIOS.has(aspectRatio))
72
+ die(2, "--aspect-ratio must be one of: 1:1, 16:9, 9:16, 4:3, 3:4, 3:2, 2:3, auto");
73
+ if (args.model && !VALID_MODELS.has(String(args.model))) {
74
+ die(2, "--model must be one of: grok-imagine-video, grok-imagine-video-1.5-preview");
75
+ }
76
+ const refs = (Array.isArray(args.ref) ? args.ref : []);
77
+ if (refs.length > 7)
78
+ die(2, "max 7 --ref attachments for video");
79
+ let server;
80
+ try {
81
+ server = await resolveServer({ serverFlag: args.server });
82
+ }
83
+ catch (e) {
84
+ die(exitCodeForError(e), e.message);
85
+ throw e;
86
+ }
87
+ const referenceImages = await Promise.all(refs.map(async (p) => {
88
+ const buf = await readFile(p);
89
+ return buf.toString("base64");
90
+ }));
91
+ const timeoutMs = (parseInt(String(args.timeout)) || 600) * 1000;
92
+ const requestId = `req_cli_video_${Date.now().toString(36)}`;
93
+ const body = {
94
+ prompt,
95
+ provider: "grok",
96
+ duration,
97
+ resolution,
98
+ aspectRatio,
99
+ requestId,
100
+ };
101
+ if (args.model)
102
+ body.model = args.model;
103
+ if (args.session)
104
+ body.sessionId = args.session;
105
+ if (referenceImages.length === 1) {
106
+ body.sourceImage = referenceImages[0];
107
+ }
108
+ else if (referenceImages.length > 1) {
109
+ body.referenceImages = referenceImages;
110
+ }
111
+ const ac = new AbortController();
112
+ let timedOut = false;
113
+ const timeoutTimer = setTimeout(() => { timedOut = true; ac.abort(); }, timeoutMs);
114
+ const onSig = () => { ac.abort(); process.exit(130); };
115
+ process.once("SIGINT", onSig);
116
+ process.once("SIGTERM", onSig);
117
+ const url = `${server.base}/api/video/generate`;
118
+ let doneData = null;
119
+ let lastProgress = -1;
120
+ try {
121
+ for await (const ev of streamSse(url, { body, signal: ac.signal, headers: { "X-Request-Id": requestId } })) {
122
+ switch (ev.event) {
123
+ case "planning":
124
+ if (!args.json)
125
+ out(color.dim("[planning] preparing video generation..."));
126
+ break;
127
+ case "submitted":
128
+ if (!args.json)
129
+ out(color.dim(`[submitted] xai request: ${ev.data.xaiVideoRequestId || "..."}`));
130
+ break;
131
+ case "progress": {
132
+ const pct = typeof ev.data.progress === "number" ? Math.round(ev.data.progress * 100) : null;
133
+ if (pct !== null && pct !== lastProgress && !args.json) {
134
+ const bar = renderBar(pct);
135
+ process.stdout.write(`\r ${bar} ${pct}%`);
136
+ lastProgress = pct;
137
+ }
138
+ break;
139
+ }
140
+ case "done":
141
+ if (!args.json && lastProgress >= 0)
142
+ process.stdout.write("\n");
143
+ doneData = ev.data;
144
+ break;
145
+ case "error":
146
+ if (!args.json && lastProgress >= 0)
147
+ process.stdout.write("\n");
148
+ die(1, `video error: ${ev.data.error || ev.data}${ev.data.code ? ` (${ev.data.code})` : ""}`);
149
+ }
150
+ }
151
+ }
152
+ catch (e) {
153
+ if (e.name === "AbortError" && !timedOut)
154
+ return;
155
+ if (!args.json && lastProgress >= 0)
156
+ process.stdout.write("\n");
157
+ die(exitCodeForError(e), e.message);
158
+ }
159
+ finally {
160
+ clearTimeout(timeoutTimer);
161
+ process.off("SIGINT", onSig);
162
+ process.off("SIGTERM", onSig);
163
+ }
164
+ if (!doneData?.filename)
165
+ die(1, "server did not return a video filename");
166
+ // Determine output path
167
+ const filename = String(doneData.filename);
168
+ const explicitOut = args.out ? String(args.out) : null;
169
+ const outDir = args["out-dir"] ? String(args["out-dir"]) : null;
170
+ let target;
171
+ if (explicitOut) {
172
+ target = explicitOut;
173
+ }
174
+ else if (outDir) {
175
+ target = join(outDir, filename);
176
+ }
177
+ else {
178
+ target = join(config.storage.generatedDir, filename);
179
+ }
180
+ // Download the video file from server
181
+ const videoUrl = `${server.base}${doneData.url || `/generated/${encodeURIComponent(filename)}`}`;
182
+ const dlRes = await fetch(videoUrl, { signal: AbortSignal.timeout(30_000) });
183
+ if (!dlRes.ok)
184
+ die(1, `failed to download video: HTTP ${dlRes.status}`);
185
+ const videoBuf = Buffer.from(await dlRes.arrayBuffer());
186
+ await mkdir(dirname(target), { recursive: true }).catch(() => { });
187
+ await writeFile(target, videoBuf);
188
+ if (args.json) {
189
+ json({
190
+ ok: true,
191
+ requestId: doneData.requestId,
192
+ path: target,
193
+ filename,
194
+ elapsed: doneData.elapsed,
195
+ video: doneData.video,
196
+ revisedPrompt: doneData.revisedPrompt,
197
+ });
198
+ }
199
+ else {
200
+ out(color.green("✓ ") + target);
201
+ if (doneData.elapsed)
202
+ out(color.dim(`elapsed ${doneData.elapsed}s`));
203
+ if (doneData.revisedPrompt)
204
+ out(color.dim(`revised: ${String(doneData.revisedPrompt).slice(0, 80)}`));
205
+ }
206
+ }
207
+ function renderBar(pct) {
208
+ const width = 20;
209
+ const filled = Math.round((pct / 100) * width);
210
+ return color.green("█".repeat(filled)) + color.dim("░".repeat(width - filled));
211
+ }
@@ -0,0 +1,202 @@
1
+ import { parseArgs } from "../lib/args.js";
2
+ import { resolveServer } from "../lib/client.js";
3
+ import { streamSse } from "../lib/sse.js";
4
+ import { out, die, color, json, exitCodeForError } from "../lib/output.js";
5
+ import { config } from "../../config.js";
6
+ import { readFile, writeFile, mkdir } from "node:fs/promises";
7
+ import { dirname, join } from "node:path";
8
+
9
+ const VALID_RESOLUTIONS = new Set(["480p", "720p"]);
10
+ const VALID_ASPECT_RATIOS = new Set(["1:1", "16:9", "9:16", "4:3", "3:4", "3:2", "2:3", "auto"]);
11
+ const VALID_MODELS = new Set(["grok-imagine-video", "grok-imagine-video-1.5-preview"]);
12
+
13
+ const SPEC = {
14
+ flags: {
15
+ duration: { type: "string", default: "5" },
16
+ resolution: { type: "string", default: "480p" },
17
+ "aspect-ratio": { type: "string", default: "auto" },
18
+ model: { type: "string" },
19
+ ref: { type: "string", repeatable: true },
20
+ out: { short: "o", type: "string" },
21
+ "out-dir": { short: "d", type: "string" },
22
+ json: { type: "boolean" },
23
+ timeout: { type: "string", default: "600" },
24
+ server: { type: "string" },
25
+ session: { type: "string" },
26
+ help: { short: "h", type: "boolean" },
27
+ },
28
+ };
29
+
30
+ const HELP = `
31
+ ima2 video <prompt...> [options]
32
+
33
+ Generate a video via the Grok video provider (SSE streaming).
34
+
35
+ Options:
36
+ --duration <1..15> Duration in seconds. Default: 5
37
+ --resolution <480p|720p> Default: 480p
38
+ --aspect-ratio <ratio|auto> 1:1, 16:9, 9:16, 4:3, 3:4, 3:2, 2:3, auto. Default: auto
39
+ --model <name> grok-imagine-video, grok-imagine-video-1.5-preview
40
+ --ref <file> Attach source/reference image (repeatable, max 7)
41
+ -o, --out <file> Output file path
42
+ -d, --out-dir <dir> Output directory
43
+ --json Print JSON result to stdout
44
+ --timeout <sec> Default: 600
45
+ --server <url> Override server URL
46
+ --session <id> Session ID
47
+
48
+ Modes (auto-detected from --ref count):
49
+ 0 refs → text-to-video
50
+ 1 ref → image-to-video
51
+ 2-7 refs → reference-to-video (max 10s duration)
52
+
53
+ Examples:
54
+ ima2 video "a cat playing piano"
55
+ ima2 video "animate this" --ref photo.png --duration 10
56
+ ima2 video "cinematic" --resolution 720p --aspect-ratio 16:9 -o out.mp4
57
+ `;
58
+
59
+ export default async function videoCmd(argv: string[]) {
60
+ const args = parseArgs(argv, SPEC);
61
+ if (args.help) { out(HELP); return; }
62
+
63
+ const prompt = args.positional.join(" ");
64
+ if (!prompt) die(2, "prompt is required");
65
+
66
+ const duration = parseInt(String(args.duration)) || 5;
67
+ if (duration < 1 || duration > 15) die(2, "--duration must be between 1 and 15");
68
+
69
+ const resolution = String(args.resolution);
70
+ if (!VALID_RESOLUTIONS.has(resolution)) die(2, "--resolution must be one of: 480p, 720p");
71
+
72
+ const aspectRatio = String(args["aspect-ratio"]);
73
+ if (!VALID_ASPECT_RATIOS.has(aspectRatio)) die(2, "--aspect-ratio must be one of: 1:1, 16:9, 9:16, 4:3, 3:4, 3:2, 2:3, auto");
74
+
75
+ if (args.model && !VALID_MODELS.has(String(args.model))) {
76
+ die(2, "--model must be one of: grok-imagine-video, grok-imagine-video-1.5-preview");
77
+ }
78
+
79
+ const refs = (Array.isArray(args.ref) ? args.ref : []) as string[];
80
+ if (refs.length > 7) die(2, "max 7 --ref attachments for video");
81
+
82
+ let server;
83
+ try { server = await resolveServer({ serverFlag: args.server }); }
84
+ catch (e: unknown) { die(exitCodeForError(e), (e as Error).message); throw e; }
85
+
86
+ const referenceImages = await Promise.all(refs.map(async (p: string) => {
87
+ const buf = await readFile(p);
88
+ return buf.toString("base64");
89
+ }));
90
+
91
+ const timeoutMs = (parseInt(String(args.timeout)) || 600) * 1000;
92
+ const requestId = `req_cli_video_${Date.now().toString(36)}`;
93
+
94
+ const body: Record<string, unknown> = {
95
+ prompt,
96
+ provider: "grok",
97
+ duration,
98
+ resolution,
99
+ aspectRatio,
100
+ requestId,
101
+ };
102
+ if (args.model) body.model = args.model;
103
+ if (args.session) body.sessionId = args.session;
104
+ if (referenceImages.length === 1) {
105
+ body.sourceImage = referenceImages[0];
106
+ } else if (referenceImages.length > 1) {
107
+ body.referenceImages = referenceImages;
108
+ }
109
+
110
+ const ac = new AbortController();
111
+ let timedOut = false;
112
+ const timeoutTimer = setTimeout(() => { timedOut = true; ac.abort(); }, timeoutMs);
113
+ const onSig = () => { ac.abort(); process.exit(130); };
114
+ process.once("SIGINT", onSig);
115
+ process.once("SIGTERM", onSig);
116
+
117
+ const url = `${server.base}/api/video/generate`;
118
+ let doneData: Record<string, unknown> | null = null;
119
+ let lastProgress = -1;
120
+
121
+ try {
122
+ for await (const ev of streamSse(url, { body, signal: ac.signal, headers: { "X-Request-Id": requestId } })) {
123
+ switch (ev.event) {
124
+ case "planning":
125
+ if (!args.json) out(color.dim("[planning] preparing video generation..."));
126
+ break;
127
+ case "submitted":
128
+ if (!args.json) out(color.dim(`[submitted] xai request: ${ev.data.xaiVideoRequestId || "..."}`));
129
+ break;
130
+ case "progress": {
131
+ const pct = typeof ev.data.progress === "number" ? Math.round(ev.data.progress * 100) : null;
132
+ if (pct !== null && pct !== lastProgress && !args.json) {
133
+ const bar = renderBar(pct);
134
+ process.stdout.write(`\r ${bar} ${pct}%`);
135
+ lastProgress = pct;
136
+ }
137
+ break;
138
+ }
139
+ case "done":
140
+ if (!args.json && lastProgress >= 0) process.stdout.write("\n");
141
+ doneData = ev.data;
142
+ break;
143
+ case "error":
144
+ if (!args.json && lastProgress >= 0) process.stdout.write("\n");
145
+ die(1, `video error: ${ev.data.error || ev.data}${ev.data.code ? ` (${ev.data.code})` : ""}`);
146
+ }
147
+ }
148
+ } catch (e: unknown) {
149
+ if ((e as Error).name === "AbortError" && !timedOut) return;
150
+ if (!args.json && lastProgress >= 0) process.stdout.write("\n");
151
+ die(exitCodeForError(e), (e as Error).message);
152
+ } finally {
153
+ clearTimeout(timeoutTimer);
154
+ process.off("SIGINT", onSig);
155
+ process.off("SIGTERM", onSig);
156
+ }
157
+
158
+ if (!doneData?.filename) die(1, "server did not return a video filename");
159
+
160
+ // Determine output path
161
+ const filename = String(doneData.filename);
162
+ const explicitOut = args.out ? String(args.out) : null;
163
+ const outDir = args["out-dir"] ? String(args["out-dir"]) : null;
164
+ let target: string;
165
+ if (explicitOut) {
166
+ target = explicitOut;
167
+ } else if (outDir) {
168
+ target = join(outDir, filename);
169
+ } else {
170
+ target = join(config.storage.generatedDir, filename);
171
+ }
172
+
173
+ // Download the video file from server
174
+ const videoUrl = `${server.base}${doneData.url || `/generated/${encodeURIComponent(filename)}`}`;
175
+ const dlRes = await fetch(videoUrl, { signal: AbortSignal.timeout(30_000) });
176
+ if (!dlRes.ok) die(1, `failed to download video: HTTP ${dlRes.status}`);
177
+ const videoBuf = Buffer.from(await dlRes.arrayBuffer());
178
+ await mkdir(dirname(target), { recursive: true }).catch(() => {});
179
+ await writeFile(target, videoBuf);
180
+
181
+ if (args.json) {
182
+ json({
183
+ ok: true,
184
+ requestId: doneData.requestId,
185
+ path: target,
186
+ filename,
187
+ elapsed: doneData.elapsed,
188
+ video: doneData.video,
189
+ revisedPrompt: doneData.revisedPrompt,
190
+ });
191
+ } else {
192
+ out(color.green("✓ ") + target);
193
+ if (doneData.elapsed) out(color.dim(`elapsed ${doneData.elapsed}s`));
194
+ if (doneData.revisedPrompt) out(color.dim(`revised: ${String(doneData.revisedPrompt).slice(0, 80)}`));
195
+ }
196
+ }
197
+
198
+ function renderBar(pct: number): string {
199
+ const width = 20;
200
+ const filled = Math.round((pct / 100) * width);
201
+ return color.green("█".repeat(filled)) + color.dim("░".repeat(width - filled));
202
+ }