ima2-gen 1.1.20 → 1.1.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -25
- package/bin/commands/capabilities.js +2 -2
- package/bin/commands/capabilities.ts +2 -2
- package/bin/commands/defaults.js +2 -2
- package/bin/commands/defaults.ts +2 -2
- package/bin/commands/doctor.js +3 -3
- package/bin/commands/doctor.ts +3 -3
- package/bin/commands/edit.js +1 -1
- package/bin/commands/edit.ts +1 -1
- package/bin/commands/gen.js +1 -1
- package/bin/commands/gen.ts +1 -1
- package/bin/commands/grok.js +16 -11
- package/bin/commands/grok.ts +16 -11
- package/bin/commands/multimode.js +1 -1
- package/bin/commands/multimode.ts +1 -1
- package/bin/commands/observability.js +2 -2
- package/bin/commands/observability.ts +2 -2
- package/bin/commands/video.js +335 -13
- package/bin/commands/video.ts +249 -12
- package/bin/ima2.js +9 -9
- package/bin/ima2.ts +9 -9
- package/bin/lib/error-hints.js +2 -2
- package/bin/lib/error-hints.ts +2 -2
- package/docs/API.md +112 -3
- package/docs/CLI.md +61 -7
- package/docs/FAQ.ko.md +15 -20
- package/docs/FAQ.md +14 -19
- package/docs/NPX_QUICKSTART.md +40 -0
- package/docs/PROMPT_STUDIO.ko.md +1 -1
- package/docs/PROMPT_STUDIO.md +1 -1
- package/docs/README.ja.md +6 -16
- package/docs/README.ko.md +10 -20
- package/docs/README.zh-CN.md +7 -17
- package/docs/migration/runtime-test-inventory.md +8 -1
- package/lib/agentRuntime.js +19 -5
- package/lib/agentRuntime.ts +17 -5
- package/lib/capabilities.js +1 -1
- package/lib/capabilities.ts +1 -1
- package/lib/generationErrors.js +1 -1
- package/lib/generationErrors.ts +1 -1
- package/lib/grokProxyLauncher.js +26 -3
- package/lib/grokProxyLauncher.ts +27 -3
- package/lib/grokVideoAdapter.js +18 -89
- package/lib/grokVideoAdapter.ts +27 -88
- package/lib/grokVideoCanvas.js +25 -0
- package/lib/grokVideoCanvas.ts +26 -0
- package/lib/grokVideoDownload.js +58 -0
- package/lib/grokVideoDownload.ts +59 -0
- package/lib/grokVideoPlannerPrompt.js +64 -0
- package/lib/grokVideoPlannerPrompt.ts +67 -0
- package/lib/historyList.js +7 -1
- package/lib/historyList.ts +5 -1
- package/lib/oauthLauncher.js +21 -6
- package/lib/oauthLauncher.ts +22 -6
- package/lib/videoContinuity.js +149 -0
- package/lib/videoContinuity.ts +180 -0
- package/lib/videoFrameExtract.js +80 -0
- package/lib/videoFrameExtract.ts +78 -0
- package/node_modules/progrok/dist/index.js +187 -88
- package/node_modules/progrok/dist/index.js.map +1 -1
- package/node_modules/progrok/package.json +1 -1
- package/node_modules/progrok/skills/progrok/SKILL.md +33 -4
- package/package.json +2 -2
- package/routes/index.js +4 -0
- package/routes/index.ts +4 -0
- package/routes/quota.js +66 -0
- package/routes/quota.ts +89 -0
- package/routes/video.js +77 -15
- package/routes/video.ts +82 -14
- package/routes/videoExtended.js +293 -0
- package/routes/videoExtended.ts +284 -0
- package/server.js +6 -2
- package/server.ts +5 -2
- package/skills/ima2/SKILL.md +320 -7
- package/ui/dist/.vite/manifest.json +12 -12
- package/ui/dist/assets/{AgentWorkspace-DS8uvoLI.js → AgentWorkspace-B_hq9CLg.js} +2 -2
- package/ui/dist/assets/{CardNewsWorkspace-CYxMsE67.js → CardNewsWorkspace-wD12J7qk.js} +1 -1
- package/ui/dist/assets/{NodeCanvas-DccIc347.js → NodeCanvas-CI_wuPMf.js} +1 -1
- package/ui/dist/assets/{PromptBuilderPanel-BvxxwSJp.js → PromptBuilderPanel-CUTujJUV.js} +1 -1
- package/ui/dist/assets/{PromptImportDialog-u1_BFDRd.js → PromptImportDialog-CUi66jPK.js} +2 -2
- package/ui/dist/assets/{PromptImportDiscoverySection-C5uvkVSz.js → PromptImportDiscoverySection-Cm3vrjY4.js} +1 -1
- package/ui/dist/assets/{PromptImportFolderSection-D3E_O1SD.js → PromptImportFolderSection-DOtWTD9n.js} +1 -1
- package/ui/dist/assets/{PromptLibraryPanel-4gyf9CB9.js → PromptLibraryPanel-BMjQegRa.js} +2 -2
- package/ui/dist/assets/SettingsWorkspace-PiaVnsdA.js +1 -0
- package/ui/dist/assets/{index-DoKtXbod.js → index-31uVIdt4.js} +1 -1
- package/ui/dist/assets/index-CjgnNtgt.css +1 -0
- package/ui/dist/assets/index-Da2s4_-5.js +36 -0
- package/ui/dist/index.html +2 -2
- package/vendor/progrok-0.2.0.tgz +0 -0
- package/ui/dist/assets/SettingsWorkspace-F3eNu3mJ.js +0 -1
- package/ui/dist/assets/index-B6tcw_UF.css +0 -1
- package/ui/dist/assets/index-DYOh6gQD.js +0 -32
- package/vendor/progrok-0.1.1.tgz +0 -0
package/skills/ima2/SKILL.md
CHANGED
|
@@ -9,7 +9,7 @@ Use this skill when an agent needs to operate `ima2-gen` from an installed packa
|
|
|
9
9
|
|
|
10
10
|
Prefer this package skill for ima2 work instead of a generic OpenAI image-generation
|
|
11
11
|
skill. The generic skill can describe the OpenAI API, but this skill knows ima2's
|
|
12
|
-
local server, OAuth/API provider split, history, in-flight jobs, packaged defaults,
|
|
12
|
+
local server, GPT OAuth/API provider split, history, in-flight jobs, packaged defaults,
|
|
13
13
|
and CLI command surface.
|
|
14
14
|
|
|
15
15
|
## First Commands
|
|
@@ -31,7 +31,7 @@ ima2 serve
|
|
|
31
31
|
ima2 open
|
|
32
32
|
```
|
|
33
33
|
|
|
34
|
-
Use `ima2 doctor` when setup, OAuth, storage, or package integrity is unclear.
|
|
34
|
+
Use `ima2 doctor` when setup, GPT OAuth, storage, or package integrity is unclear.
|
|
35
35
|
|
|
36
36
|
## Generate Images
|
|
37
37
|
|
|
@@ -68,6 +68,8 @@ ima2 grok status
|
|
|
68
68
|
ima2 gen "cinematic neon city" --provider grok --model grok-imagine-image-quality
|
|
69
69
|
```
|
|
70
70
|
|
|
71
|
+
`ima2 grok login` defaults to the manual-paste flow.
|
|
72
|
+
|
|
71
73
|
Grok requests with reference images use the edit/image-to-image path so the
|
|
72
74
|
references remain attached after planning. Keep Grok references to three total
|
|
73
75
|
input images.
|
|
@@ -214,7 +216,7 @@ Inspect local effective defaults without contacting a server:
|
|
|
214
216
|
ima2 defaults --local --json
|
|
215
217
|
```
|
|
216
218
|
|
|
217
|
-
Persist the default model for OAuth and API provider paths:
|
|
219
|
+
Persist the default model for GPT OAuth and API provider paths:
|
|
218
220
|
|
|
219
221
|
```bash
|
|
220
222
|
ima2 defaults set model gpt-5.5
|
|
@@ -293,6 +295,8 @@ ima2 video "cinematic" --ref a.png --ref b.png # reference-to-video (max 7)
|
|
|
293
295
|
| 1 | image-to-video | 15s |
|
|
294
296
|
| 2-7 | reference-to-video | 10s |
|
|
295
297
|
|
|
298
|
+
`grok-imagine-video-1.5-preview` supports image-to-video but does not support `reference_images` Ref2V. For 2+ references, use `grok-imagine-video` and keep duration at 10s or less. Prompt-only 1.5 text-to-video is implemented as an internal white-canvas image-to-video anchor so the 1.5 endpoint receives an image input. ima2 may auto-retry a rejected 1.5 Ref2V request with the base model; read `effectiveModel` and `modelFallback` from the final result before naming or reporting the output.
|
|
299
|
+
|
|
296
300
|
### Parameters
|
|
297
301
|
|
|
298
302
|
| Flag | Values | Default |
|
|
@@ -303,12 +307,15 @@ ima2 video "cinematic" --ref a.png --ref b.png # reference-to-video (max 7)
|
|
|
303
307
|
| `--model` | grok-imagine-video, grok-imagine-video-1.5-preview | grok-imagine-video |
|
|
304
308
|
| `--topic` | any string | (none) |
|
|
305
309
|
| `--session` | session ID | (none) |
|
|
306
|
-
| `-o, --out` | output file path |
|
|
310
|
+
| `-o, --out` | output file path | saved under configured generated dir |
|
|
307
311
|
| `--json` | (flag) | false |
|
|
308
312
|
|
|
309
313
|
### Series Continuity (--topic)
|
|
310
314
|
|
|
311
|
-
|
|
315
|
+
`--topic` is legacy/best-effort series context. Prefer branch-local artifact
|
|
316
|
+
continuity with `ima2 video continue`, Classic "Continue here", gallery video
|
|
317
|
+
drag, or Node parent-video generation. Those flows use the previous generated
|
|
318
|
+
video's last frame plus its stored `revisedPrompt` lineage.
|
|
312
319
|
|
|
313
320
|
```bash
|
|
314
321
|
ima2 video "episode 1: morning routine" --topic "daily-vlog"
|
|
@@ -319,10 +326,52 @@ ima2 video "episode 2: commute" --topic "daily-vlog"
|
|
|
319
326
|
|
|
320
327
|
Prompts are NOT sent directly to the video model. A Grok planner (grok-4.3) rewrites your prompt with web search context for better results. The `revisedPrompt` in the response shows what was actually sent.
|
|
321
328
|
|
|
329
|
+
### Grok 4.3 Prompt Surfaces
|
|
330
|
+
|
|
331
|
+
| Surface | Files | Responsibility |
|
|
332
|
+
|---------|-------|----------------|
|
|
333
|
+
| Image search/planner | `lib/grokImageAdapter.ts` | Web-search context and final image prompt for Grok image generation/editing. |
|
|
334
|
+
| Video planner | `lib/grokVideoAdapter.ts`, `lib/grokVideoPlannerPrompt.ts` | Final video prompt for T2V/I2V/Ref2V, duration pacing, and continuity lineage when present. |
|
|
335
|
+
| Video analyzer | `routes/videoExtended.ts` | First/last-frame analysis prompt for recreating or continuing an existing generated video. |
|
|
336
|
+
| Agent/runtime prompt use | `lib/agentRuntime.ts`, card/template planner modules | Higher-level orchestration surfaces that may create image/video prompt inputs but do not replace the video planner contract. |
|
|
337
|
+
|
|
338
|
+
For video, the Grok 4.3 planner must produce one focused English prompt with:
|
|
339
|
+
core subject, expected action/motion, camera/composition, environment/style,
|
|
340
|
+
dialogue/audio intent, ending frame/continuity handoff, and constraints. If
|
|
341
|
+
`videoContinuity` exists, the lineage is authoritative context: continue from
|
|
342
|
+
the latest clip's final frame and final audio/dialogue state without restarting
|
|
343
|
+
the scene. The planner also applies duration pacing: use the selected seconds as
|
|
344
|
+
the full clip runtime, expand even short requests into a production-level
|
|
345
|
+
sequence, and make the clip feel complete through composition, blocking, camera
|
|
346
|
+
movement, motion rhythm, sound/dialogue timing, and an ending hold.
|
|
347
|
+
|
|
348
|
+
### Active Video Prompt Requirement
|
|
349
|
+
|
|
350
|
+
Blank video prompts are blocked. Weak natural-language prompts are allowed, but
|
|
351
|
+
agents should always write an active prompt that includes:
|
|
352
|
+
|
|
353
|
+
- visual flow: what changes on screen
|
|
354
|
+
- motion flow: subject and camera motion
|
|
355
|
+
- sound flow: music style, no music, room tone, or sound-effects-only
|
|
356
|
+
- dialogue flow: exact line or explicit no-dialogue
|
|
357
|
+
- ending frame: final pose, camera state, last spoken words, and final sound cue
|
|
358
|
+
- duration pacing: make the selected seconds feel naturally filled with a
|
|
359
|
+
production-level sequence from opening composition to connected change to
|
|
360
|
+
stable ending frame
|
|
361
|
+
|
|
362
|
+
Template:
|
|
363
|
+
|
|
364
|
+
```text
|
|
365
|
+
From the attached last frame, <subject/action> moves from A to B while the
|
|
366
|
+
camera <movement>. Sound: <music/no music/SFX/room tone>. Dialogue: <line or no
|
|
367
|
+
dialogue>. Pace the selected duration with a complete visual sequence. End on
|
|
368
|
+
<final frame and final audio state>.
|
|
369
|
+
```
|
|
370
|
+
|
|
322
371
|
### Prerequisites
|
|
323
372
|
|
|
324
373
|
```bash
|
|
325
|
-
ima2 grok login # authenticate (
|
|
374
|
+
ima2 grok login # authenticate (manual-paste flow)
|
|
326
375
|
ima2 grok status # verify connection
|
|
327
376
|
ima2 serve # server must be running
|
|
328
377
|
```
|
|
@@ -330,10 +379,274 @@ ima2 serve # server must be running
|
|
|
330
379
|
### Output
|
|
331
380
|
|
|
332
381
|
SSE streaming events: `planning` → `submitted` → `progress` (0-100%) → `done`.
|
|
333
|
-
|
|
382
|
+
The `submitted` and `done` payloads include `requestedModel`, `effectiveModel`, and `modelFallback` so agents can report when a requested 1.5-preview Ref2V job actually ran on `grok-imagine-video`. CLI `--json` prints `video.requestedModel`, `video.effectiveModel`, and `video.modelFallback`; use `path`/`filename` for local chaining.
|
|
334
383
|
|
|
335
384
|
### Discover Valid Parameters
|
|
336
385
|
|
|
337
386
|
```bash
|
|
338
387
|
ima2 capabilities --json | jq '.valid.videoModels'
|
|
339
388
|
```
|
|
389
|
+
|
|
390
|
+
### Advanced Workflows
|
|
391
|
+
|
|
392
|
+
#### Image-First Video (best quality)
|
|
393
|
+
|
|
394
|
+
Generate a high-quality still image first, then animate it. This produces better results than text-to-video alone because the video model has a concrete visual anchor.
|
|
395
|
+
|
|
396
|
+
```bash
|
|
397
|
+
# Step 1: Generate the key frame
|
|
398
|
+
ima2 gen "cinematic wide shot of a mountain lake at sunset, 16:9" --size 1792x1024 -o keyframe.png
|
|
399
|
+
|
|
400
|
+
# Step 2: Animate from that frame
|
|
401
|
+
ima2 video "gentle water ripples, clouds drifting slowly, birds flying in distance" --ref keyframe.png --duration 10 --aspect-ratio 16:9
|
|
402
|
+
```
|
|
403
|
+
|
|
404
|
+
#### Multi-Shot Video (connected scenes)
|
|
405
|
+
|
|
406
|
+
Create a sequence of connected clips using `--topic` for narrative continuity. Each generation receives context from previous clips in the same topic.
|
|
407
|
+
|
|
408
|
+
```bash
|
|
409
|
+
# Scene 1: Establishing shot
|
|
410
|
+
ima2 video "wide establishing shot of a busy Tokyo street at night, neon signs" \
|
|
411
|
+
--topic "tokyo-night" --duration 5
|
|
412
|
+
|
|
413
|
+
# Scene 2: Medium shot (planner sees Scene 1's revised prompt)
|
|
414
|
+
ima2 video "medium shot following a person walking through the crowd" \
|
|
415
|
+
--topic "tokyo-night" --duration 5
|
|
416
|
+
|
|
417
|
+
# Scene 3: Close-up (planner sees Scenes 1+2)
|
|
418
|
+
ima2 video "close-up of rain drops on a neon sign reflection" \
|
|
419
|
+
--topic "tokyo-night" --duration 5
|
|
420
|
+
```
|
|
421
|
+
|
|
422
|
+
The planner receives previous prompts from the same topic as continuity context. This is best-effort prompt guidance, not a guarantee that subjects, palette, or style will remain identical. For branch-local continuation, use `ima2 video continue` instead.
|
|
423
|
+
|
|
424
|
+
#### Video Continuation (extend/sequel)
|
|
425
|
+
|
|
426
|
+
To continue from an existing video's last frame:
|
|
427
|
+
|
|
428
|
+
```bash
|
|
429
|
+
# Get the last generated video filename
|
|
430
|
+
LAST=$(ima2 ls -n 1 --json | jq -r '.items[0].filename')
|
|
431
|
+
|
|
432
|
+
# True extension keeps the original clip and appends new motion
|
|
433
|
+
ima2 video extend "the camera slowly pulls back revealing the full scene" --video "$LAST" --duration 6
|
|
434
|
+
|
|
435
|
+
# Branch-local sequel keeps revisedPrompt lineage and starts from the last frame
|
|
436
|
+
ima2 video continue "from the last frame, the camera slowly pulls back, no music, footsteps echo, end on a still wide shot" --video "$LAST"
|
|
437
|
+
```
|
|
438
|
+
|
|
439
|
+
Or in the UI: use "Continue here" on a video, drag a video from gallery/history
|
|
440
|
+
to the prompt composer, or create a child from a video node. These flows attach
|
|
441
|
+
the previous video's last frame and carry a branch-local `videoContinuity`
|
|
442
|
+
lineage stack. The stack stores up to 4 revised prompts using
|
|
443
|
+
`keep-start-plus-latest-3`: start clip is preserved, and the newest three clips
|
|
444
|
+
stay in context.
|
|
445
|
+
|
|
446
|
+
`ima2 video extend` is xAI native extension: it returns original+extension as a
|
|
447
|
+
combined artifact. `ima2 video continue` is ima2 branch continuation: it creates
|
|
448
|
+
a new clip from the generated video's last frame and persists lineage metadata.
|
|
449
|
+
|
|
450
|
+
#### Marketing/Product Video
|
|
451
|
+
|
|
452
|
+
Generate a product showcase video from a product image:
|
|
453
|
+
|
|
454
|
+
```bash
|
|
455
|
+
# Step 1: Generate or provide product image
|
|
456
|
+
ima2 gen "clean product photo of wireless earbuds on white background" -o product.png
|
|
457
|
+
|
|
458
|
+
# Step 2: Create dynamic product video
|
|
459
|
+
ima2 video "sleek product reveal with rotating camera, premium feel, studio lighting" \
|
|
460
|
+
--ref product.png --duration 10 --resolution 720p --aspect-ratio 16:9
|
|
461
|
+
```
|
|
462
|
+
|
|
463
|
+
#### Style-Consistent Series
|
|
464
|
+
|
|
465
|
+
For maintaining visual style across multiple videos (e.g., social media series):
|
|
466
|
+
|
|
467
|
+
```bash
|
|
468
|
+
# First video establishes the style
|
|
469
|
+
ima2 video "minimalist animation of a coffee cup, flat design, pastel colors" \
|
|
470
|
+
--topic "coffee-series" --duration 5
|
|
471
|
+
|
|
472
|
+
# Subsequent videos inherit style via planner context
|
|
473
|
+
ima2 video "same style, now showing latte art being poured" \
|
|
474
|
+
--topic "coffee-series" --duration 5
|
|
475
|
+
|
|
476
|
+
ima2 video "same style, steam rising from the cup" \
|
|
477
|
+
--topic "coffee-series" --duration 5
|
|
478
|
+
```
|
|
479
|
+
|
|
480
|
+
#### Batch Generation (scripting)
|
|
481
|
+
|
|
482
|
+
```bash
|
|
483
|
+
#!/bin/bash
|
|
484
|
+
PROMPTS=("sunrise over ocean" "waves crashing" "seagulls flying" "sunset colors")
|
|
485
|
+
TOPIC="ocean-day"
|
|
486
|
+
|
|
487
|
+
for prompt in "${PROMPTS[@]}"; do
|
|
488
|
+
ima2 video "$prompt" --topic "$TOPIC" --duration 5 --json >> results.jsonl
|
|
489
|
+
sleep 2 # rate limiting
|
|
490
|
+
done
|
|
491
|
+
```
|
|
492
|
+
|
|
493
|
+
### Limitations
|
|
494
|
+
|
|
495
|
+
- Max 15 seconds per clip (extend adds 2-10s more)
|
|
496
|
+
- Reference-to-video (2+ refs): max 10 seconds, max 7 refs, `grok-imagine-video` effective model
|
|
497
|
+
- Max 720p resolution (no upscale API available)
|
|
498
|
+
- Video edit/extend: grok-imagine-video only (1.5-preview not supported)
|
|
499
|
+
- Video edit input: max 8.7 seconds
|
|
500
|
+
- Video extend input: 2-15 seconds; extension duration: 2-10 seconds
|
|
501
|
+
|
|
502
|
+
### Video Editing (V2V)
|
|
503
|
+
|
|
504
|
+
Edit an existing video with a text prompt. This uses xAI's real video edit endpoint and saves the result as a generated video artifact.
|
|
505
|
+
|
|
506
|
+
```bash
|
|
507
|
+
# Get the local video file from a previous generation
|
|
508
|
+
VIDEO_FILE=$(ima2 video "ocean waves" --json | jq -r '.path')
|
|
509
|
+
|
|
510
|
+
# Edit: change style
|
|
511
|
+
ima2 video edit "Make the water glow neon blue, bioluminescent" --video "$VIDEO_FILE"
|
|
512
|
+
|
|
513
|
+
# Edit: add object
|
|
514
|
+
ima2 video edit "Add a sailboat in the distance" --video "$VIDEO_FILE"
|
|
515
|
+
|
|
516
|
+
# Edit: change mood
|
|
517
|
+
ima2 video edit "Make it stormy with dark clouds" --video "$VIDEO_FILE"
|
|
518
|
+
```
|
|
519
|
+
|
|
520
|
+
Constraints: grok-imagine-video only, input mp4 <=8.7s. Use `-o/--out` if you also need a local copy outside the generated directory.
|
|
521
|
+
|
|
522
|
+
### Video Extension (Continue from Last Frame)
|
|
523
|
+
|
|
524
|
+
Extend a video from its last frame using xAI's video extension endpoint. The output combines the source video and extension, but continuity quality is provider-dependent.
|
|
525
|
+
|
|
526
|
+
Constraints: grok-imagine-video only, extension duration 2-10s. 1.5-preview is not supported for extension.
|
|
527
|
+
|
|
528
|
+
```bash
|
|
529
|
+
# Generate initial clip
|
|
530
|
+
VIDEO_FILE=$(ima2 video "a bird takes flight from a branch" --duration 5 --json | jq -r '.path')
|
|
531
|
+
|
|
532
|
+
# Extend: add 5 more seconds
|
|
533
|
+
ima2 video extend "the bird soars higher into the clouds" --video "$VIDEO_FILE" --duration 5
|
|
534
|
+
|
|
535
|
+
# Chain extensions for longer videos
|
|
536
|
+
EXTENDED=$(ima2 video extend "camera follows the bird" --video "$VIDEO_FILE" --duration 5 --json | jq -r '.filename')
|
|
537
|
+
ima2 video extend "bird lands on a distant tree" --video "$EXTENDED" --duration 5
|
|
538
|
+
```
|
|
539
|
+
|
|
540
|
+
### Video Frame Extraction
|
|
541
|
+
|
|
542
|
+
Extract frames from generated videos for use as references or analysis.
|
|
543
|
+
|
|
544
|
+
```bash
|
|
545
|
+
# Extract last frame
|
|
546
|
+
ima2 video frame 1780226256355_50252101.mp4 --last -o lastframe.png
|
|
547
|
+
|
|
548
|
+
# Extract frame at specific timestamp
|
|
549
|
+
ima2 video frame 1780226256355_50252101.mp4 --position 2.5 -o frame_2s.png
|
|
550
|
+
|
|
551
|
+
# Use extracted frame as reference for new generation
|
|
552
|
+
ima2 video "continue this scene" --ref lastframe.png
|
|
553
|
+
```
|
|
554
|
+
|
|
555
|
+
### Video Analysis (Recreation Prompt)
|
|
556
|
+
|
|
557
|
+
Analyze first and last video frames with Grok 4.3 image understanding to get a structured recreation prompt. This infers motion from frames; it is not full temporal video understanding.
|
|
558
|
+
|
|
559
|
+
```bash
|
|
560
|
+
# Analyze a generated filename
|
|
561
|
+
ima2 video analyze 1780226256355_50252101.mp4
|
|
562
|
+
|
|
563
|
+
# Output: structured prompt with shot type, inferred camera movement, lighting, color, motion, mood
|
|
564
|
+
|
|
565
|
+
# Use the analysis to recreate with variations
|
|
566
|
+
ANALYSIS=$(ima2 video analyze 1780226256355_50252101.mp4 --json | jq -r '.analysis')
|
|
567
|
+
ima2 video "$ANALYSIS but in anime style" --ref reference.png
|
|
568
|
+
```
|
|
569
|
+
|
|
570
|
+
### Audio in Video (Prompt-Controlled)
|
|
571
|
+
|
|
572
|
+
The API does not expose a separate audio on/off or audio-track control. Treat audio as prompt-compiled: describe dialogue, music, no-music, room tone, or sound-effects-only behavior in the video prompt. Output is provider-dependent, but the prompt must be explicit when audio matters.
|
|
573
|
+
|
|
574
|
+
```bash
|
|
575
|
+
# Explicit sound direction
|
|
576
|
+
ima2 video "ocean waves crashing on rocks with seagull calls and distant thunder"
|
|
577
|
+
|
|
578
|
+
# Music direction
|
|
579
|
+
ima2 video "timelapse of city at night, lo-fi hip hop background music"
|
|
580
|
+
|
|
581
|
+
# Dialogue
|
|
582
|
+
ima2 video "person speaking to camera: Hello world, welcome to my channel"
|
|
583
|
+
|
|
584
|
+
# No music / room tone
|
|
585
|
+
ima2 video "quiet forest scene, no background music, only subtle wind and leaves rustling"
|
|
586
|
+
|
|
587
|
+
# Sound effects only
|
|
588
|
+
ima2 video "no music, only footsteps, cloth movement, rain hits, and one radio click"
|
|
589
|
+
```
|
|
590
|
+
|
|
591
|
+
For continuity clips, always define the final audio state: whether dialogue finishes before the cut, music resolves or continues, or a sound effect carries into the next clip.
|
|
592
|
+
|
|
593
|
+
### Structured Video Prompt Template
|
|
594
|
+
|
|
595
|
+
Use this structure for serious video generation, Ref2V, extension prompts, and multi-shot continuity. A static visual description is not enough.
|
|
596
|
+
|
|
597
|
+
```text
|
|
598
|
+
Scene Start: what the first frame already contains.
|
|
599
|
+
Expected Motion: the exact A or B motion that must happen.
|
|
600
|
+
Camera: pan, dolly, tracking, crane, handheld, static, or Shot Switch.
|
|
601
|
+
Dialogue: speaker, exact line, timing, or "no dialogue".
|
|
602
|
+
Music: style, swell/cut/resolve behavior, or "no background music".
|
|
603
|
+
Sound Effects: room tone, footsteps, rain, machine hum, impact, etc.
|
|
604
|
+
Ending Frame: final pose, composition, camera state.
|
|
605
|
+
Continuity Handoff: final spoken line, music state, or sound cue for the next clip.
|
|
606
|
+
Negative Constraints: no visible subtitles/text unless requested, preserve identity/style.
|
|
607
|
+
```
|
|
608
|
+
|
|
609
|
+
When creating a sequence, write both motions explicitly: "A motion" for the first clip and "B motion" for the continuation. For last-frame Ref2V, use ref 1 as identity/style and ref 2 as current state/last frame.
|
|
610
|
+
|
|
611
|
+
### End Frame Guidance (via Ref2V)
|
|
612
|
+
|
|
613
|
+
Guide the video toward a desired final scene using reference images:
|
|
614
|
+
|
|
615
|
+
```bash
|
|
616
|
+
# Start frame + end frame concept
|
|
617
|
+
ima2 video "smooth transition from day to night" \
|
|
618
|
+
--ref sunrise.png --ref nightsky.png
|
|
619
|
+
```
|
|
620
|
+
|
|
621
|
+
The planner treats reference images as subject/style/composition guidance. This is best-effort guidance, not a guaranteed final-frame constraint.
|
|
622
|
+
|
|
623
|
+
### Soul Character / Face Consistency (via Ref2V)
|
|
624
|
+
|
|
625
|
+
Guide character identity across multiple videos using reference photos:
|
|
626
|
+
|
|
627
|
+
```bash
|
|
628
|
+
# Provide face references for consistency
|
|
629
|
+
ima2 video "person walking through a park, smiling" \
|
|
630
|
+
--ref face_front.png --ref face_side.png --ref face_smile.png
|
|
631
|
+
|
|
632
|
+
# Same character in different scenes
|
|
633
|
+
ima2 video "same person now sitting at a cafe" \
|
|
634
|
+
--ref face_front.png --ref face_side.png --topic "character-series"
|
|
635
|
+
```
|
|
636
|
+
|
|
637
|
+
### Marketing / Product Video
|
|
638
|
+
|
|
639
|
+
Turn a product image into a dynamic showcase video:
|
|
640
|
+
|
|
641
|
+
```bash
|
|
642
|
+
# Step 1: Generate or provide product image
|
|
643
|
+
ima2 gen "clean product photo of wireless earbuds on white background" -o product.png
|
|
644
|
+
|
|
645
|
+
# Step 2: Create product video
|
|
646
|
+
ima2 video "sleek product reveal, rotating camera, premium studio lighting" \
|
|
647
|
+
--ref product.png --duration 10 --aspect-ratio 16:9
|
|
648
|
+
|
|
649
|
+
# Step 3: Extend with lifestyle shot
|
|
650
|
+
PRODUCT_VID=$(ima2 video "product reveal" --ref product.png --json | jq -r '.path')
|
|
651
|
+
ima2 video extend "person puts on the earbuds and smiles" --video "$PRODUCT_VID" --duration 5
|
|
652
|
+
```
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"index.html": {
|
|
3
|
-
"file": "assets/index-
|
|
3
|
+
"file": "assets/index-Da2s4_-5.js",
|
|
4
4
|
"name": "index",
|
|
5
5
|
"src": "index.html",
|
|
6
6
|
"isEntry": true,
|
|
@@ -16,11 +16,11 @@
|
|
|
16
16
|
"src/components/PromptLibraryPanel.tsx"
|
|
17
17
|
],
|
|
18
18
|
"css": [
|
|
19
|
-
"assets/index-
|
|
19
|
+
"assets/index-CjgnNtgt.css"
|
|
20
20
|
]
|
|
21
21
|
},
|
|
22
22
|
"src/components/NodeCanvas.tsx": {
|
|
23
|
-
"file": "assets/NodeCanvas-
|
|
23
|
+
"file": "assets/NodeCanvas-CI_wuPMf.js",
|
|
24
24
|
"name": "NodeCanvas",
|
|
25
25
|
"src": "src/components/NodeCanvas.tsx",
|
|
26
26
|
"isDynamicEntry": true,
|
|
@@ -32,7 +32,7 @@
|
|
|
32
32
|
]
|
|
33
33
|
},
|
|
34
34
|
"src/components/PromptImportDialog.tsx": {
|
|
35
|
-
"file": "assets/PromptImportDialog-
|
|
35
|
+
"file": "assets/PromptImportDialog-CUi66jPK.js",
|
|
36
36
|
"name": "PromptImportDialog",
|
|
37
37
|
"src": "src/components/PromptImportDialog.tsx",
|
|
38
38
|
"isDynamicEntry": true,
|
|
@@ -45,7 +45,7 @@
|
|
|
45
45
|
]
|
|
46
46
|
},
|
|
47
47
|
"src/components/PromptImportDiscoverySection.tsx": {
|
|
48
|
-
"file": "assets/PromptImportDiscoverySection-
|
|
48
|
+
"file": "assets/PromptImportDiscoverySection-Cm3vrjY4.js",
|
|
49
49
|
"name": "PromptImportDiscoverySection",
|
|
50
50
|
"src": "src/components/PromptImportDiscoverySection.tsx",
|
|
51
51
|
"isDynamicEntry": true,
|
|
@@ -54,7 +54,7 @@
|
|
|
54
54
|
]
|
|
55
55
|
},
|
|
56
56
|
"src/components/PromptImportFolderSection.tsx": {
|
|
57
|
-
"file": "assets/PromptImportFolderSection-
|
|
57
|
+
"file": "assets/PromptImportFolderSection-DOtWTD9n.js",
|
|
58
58
|
"name": "PromptImportFolderSection",
|
|
59
59
|
"src": "src/components/PromptImportFolderSection.tsx",
|
|
60
60
|
"isDynamicEntry": true,
|
|
@@ -63,7 +63,7 @@
|
|
|
63
63
|
]
|
|
64
64
|
},
|
|
65
65
|
"src/components/PromptLibraryPanel.tsx": {
|
|
66
|
-
"file": "assets/PromptLibraryPanel-
|
|
66
|
+
"file": "assets/PromptLibraryPanel-BMjQegRa.js",
|
|
67
67
|
"name": "PromptLibraryPanel",
|
|
68
68
|
"src": "src/components/PromptLibraryPanel.tsx",
|
|
69
69
|
"isDynamicEntry": true,
|
|
@@ -75,7 +75,7 @@
|
|
|
75
75
|
]
|
|
76
76
|
},
|
|
77
77
|
"src/components/SettingsWorkspace.tsx": {
|
|
78
|
-
"file": "assets/SettingsWorkspace-
|
|
78
|
+
"file": "assets/SettingsWorkspace-PiaVnsdA.js",
|
|
79
79
|
"name": "SettingsWorkspace",
|
|
80
80
|
"src": "src/components/SettingsWorkspace.tsx",
|
|
81
81
|
"isDynamicEntry": true,
|
|
@@ -84,7 +84,7 @@
|
|
|
84
84
|
]
|
|
85
85
|
},
|
|
86
86
|
"src/components/agent/AgentWorkspace.tsx": {
|
|
87
|
-
"file": "assets/AgentWorkspace-
|
|
87
|
+
"file": "assets/AgentWorkspace-B_hq9CLg.js",
|
|
88
88
|
"name": "AgentWorkspace",
|
|
89
89
|
"src": "src/components/agent/AgentWorkspace.tsx",
|
|
90
90
|
"isDynamicEntry": true,
|
|
@@ -93,7 +93,7 @@
|
|
|
93
93
|
]
|
|
94
94
|
},
|
|
95
95
|
"src/components/canvas-mode/index.ts": {
|
|
96
|
-
"file": "assets/index-
|
|
96
|
+
"file": "assets/index-31uVIdt4.js",
|
|
97
97
|
"name": "index",
|
|
98
98
|
"src": "src/components/canvas-mode/index.ts",
|
|
99
99
|
"isDynamicEntry": true,
|
|
@@ -102,7 +102,7 @@
|
|
|
102
102
|
]
|
|
103
103
|
},
|
|
104
104
|
"src/components/card-news/CardNewsWorkspace.tsx": {
|
|
105
|
-
"file": "assets/CardNewsWorkspace-
|
|
105
|
+
"file": "assets/CardNewsWorkspace-wD12J7qk.js",
|
|
106
106
|
"name": "CardNewsWorkspace",
|
|
107
107
|
"src": "src/components/card-news/CardNewsWorkspace.tsx",
|
|
108
108
|
"isDynamicEntry": true,
|
|
@@ -111,7 +111,7 @@
|
|
|
111
111
|
]
|
|
112
112
|
},
|
|
113
113
|
"src/components/prompt-builder/PromptBuilderPanel.tsx": {
|
|
114
|
-
"file": "assets/PromptBuilderPanel-
|
|
114
|
+
"file": "assets/PromptBuilderPanel-CUTujJUV.js",
|
|
115
115
|
"name": "PromptBuilderPanel",
|
|
116
116
|
"src": "src/components/prompt-builder/PromptBuilderPanel.tsx",
|
|
117
117
|
"isDynamicEntry": true,
|