ima2-gen 1.1.18 → 1.1.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +2 -2
  2. package/bin/commands/video.js +4 -0
  3. package/bin/commands/video.ts +3 -0
  4. package/docs/README.ja.md +2 -2
  5. package/docs/README.ko.md +15 -3
  6. package/docs/README.zh-CN.md +2 -2
  7. package/lib/agentGenerationPlanner.js +18 -1
  8. package/lib/agentGenerationPlanner.ts +21 -1
  9. package/lib/agentRuntime.js +105 -1
  10. package/lib/agentRuntime.ts +118 -1
  11. package/lib/agentTypes.js +1 -0
  12. package/lib/agentTypes.ts +2 -1
  13. package/lib/assetLifecycle.js +12 -8
  14. package/lib/assetLifecycle.ts +12 -8
  15. package/lib/capabilities.js +1 -1
  16. package/lib/capabilities.ts +1 -1
  17. package/lib/grokVideoAdapter.js +30 -2
  18. package/lib/grokVideoAdapter.ts +36 -2
  19. package/lib/historyList.js +1 -0
  20. package/lib/historyList.ts +1 -0
  21. package/lib/videoSeriesChain.js +24 -0
  22. package/lib/videoSeriesChain.ts +29 -0
  23. package/node_modules/progrok/README.md +300 -22
  24. package/node_modules/progrok/dist/index.js +558 -173
  25. package/node_modules/progrok/dist/index.js.map +1 -1
  26. package/node_modules/progrok/package.json +3 -3
  27. package/node_modules/progrok/skills/progrok/SKILL.md +145 -109
  28. package/package.json +2 -2
  29. package/routes/video.js +10 -1
  30. package/routes/video.ts +11 -1
  31. package/ui/dist/.vite/manifest.json +12 -12
  32. package/ui/dist/assets/AgentWorkspace-DE_wg90f.js +3 -0
  33. package/ui/dist/assets/{CardNewsWorkspace-DmqCMnIx.js → CardNewsWorkspace--Myc5pAp.js} +1 -1
  34. package/ui/dist/assets/NodeCanvas-4U5oOT2y.js +7 -0
  35. package/ui/dist/assets/{PromptBuilderPanel-CoWjqQZS.js → PromptBuilderPanel-DNW1U8zI.js} +2 -2
  36. package/ui/dist/assets/{PromptImportDialog-C2zGZkyK.js → PromptImportDialog-o-4Sqki1.js} +2 -2
  37. package/ui/dist/assets/{PromptImportDiscoverySection-N0ZxHLYs.js → PromptImportDiscoverySection-BAbrRP8B.js} +1 -1
  38. package/ui/dist/assets/{PromptImportFolderSection-BC3dCASZ.js → PromptImportFolderSection-L-XI2noz.js} +1 -1
  39. package/ui/dist/assets/{PromptLibraryPanel-CcVliYnF.js → PromptLibraryPanel-CrW9LYGD.js} +2 -2
  40. package/ui/dist/assets/{SettingsWorkspace-CiB4ux7E.js → SettingsWorkspace-Dn4SYTyZ.js} +1 -1
  41. package/ui/dist/assets/index-B6tcw_UF.css +1 -0
  42. package/ui/dist/assets/{index-C93CfR9P.js → index-BONbNNIi.js} +1 -1
  43. package/ui/dist/assets/index-CeSZ2L3-.js +32 -0
  44. package/ui/dist/index.html +2 -2
  45. package/vendor/progrok-0.1.1.tgz +0 -0
  46. package/ui/dist/assets/AgentWorkspace-BTuPjlDH.js +0 -3
  47. package/ui/dist/assets/NodeCanvas-jr9WXfNm.js +0 -7
  48. package/ui/dist/assets/index-CIhB_ia7.css +0 -1
  49. package/ui/dist/assets/index-uBEJn5jz.js +0 -32
  50. package/vendor/progrok-0.1.0.tgz +0 -0
package/README.md CHANGED
@@ -16,9 +16,9 @@
16
16
 
17
17
  `ima2-gen` is a local image generation studio for people who want the ChatGPT/Codex image workflow in a small desktop-like web app.
18
18
 
19
- Run it with `npx`, sign in with Codex OAuth, type a prompt, and keep iterating with history, references, node branches, multimode batches, and Canvas Mode cleanup. No OpenAI API key is required for the default path, but API-key generation and bundled Grok generation are also supported when configured.
19
+ Run it with `npx`, sign in with ChatGPT OAuth or Grok OAuth, and start generating images and videos. Iterate with history, references, node branches, multimode batches, Canvas Mode cleanup, and Grok Video generation. No API key required free ChatGPT OAuth and SuperGrok subscription cover everything.
20
20
 
21
- ![ima2-gen classic generation screen with prompt composer, generated image, compact model label, and result metadata.](assets/screenshots/classic-generate-light.png)
21
+ ![ima2-gen video playback with gallery sidebar showing generated images and videos.](assets/screenshots/classic-generate-light.png)
22
22
 
23
23
  ## Quick Start
24
24
 
@@ -14,6 +14,7 @@ const SPEC = {
14
14
  resolution: { type: "string", default: "480p" },
15
15
  "aspect-ratio": { type: "string", default: "auto" },
16
16
  model: { type: "string" },
17
+ topic: { type: "string" },
17
18
  ref: { type: "string", repeatable: true },
18
19
  out: { short: "o", type: "string" },
19
20
  "out-dir": { short: "d", type: "string" },
@@ -34,6 +35,7 @@ const HELP = `
34
35
  --resolution <480p|720p> Default: 480p
35
36
  --aspect-ratio <ratio|auto> 1:1, 16:9, 9:16, 4:3, 3:4, 3:2, 2:3, auto. Default: auto
36
37
  --model <name> grok-imagine-video, grok-imagine-video-1.5-preview
38
+ --topic <text> Series topic for prompt chain continuity
37
39
  --ref <file> Attach source/reference image (repeatable, max 7)
38
40
  -o, --out <file> Output file path
39
41
  -d, --out-dir <dir> Output directory
@@ -102,6 +104,8 @@ export default async function videoCmd(argv) {
102
104
  body.model = args.model;
103
105
  if (args.session)
104
106
  body.sessionId = args.session;
107
+ if (args.topic)
108
+ body.topic = args.topic;
105
109
  if (referenceImages.length === 1) {
106
110
  body.sourceImage = referenceImages[0];
107
111
  }
@@ -16,6 +16,7 @@ const SPEC = {
16
16
  resolution: { type: "string", default: "480p" },
17
17
  "aspect-ratio": { type: "string", default: "auto" },
18
18
  model: { type: "string" },
19
+ topic: { type: "string" },
19
20
  ref: { type: "string", repeatable: true },
20
21
  out: { short: "o", type: "string" },
21
22
  "out-dir": { short: "d", type: "string" },
@@ -37,6 +38,7 @@ const HELP = `
37
38
  --resolution <480p|720p> Default: 480p
38
39
  --aspect-ratio <ratio|auto> 1:1, 16:9, 9:16, 4:3, 3:4, 3:2, 2:3, auto. Default: auto
39
40
  --model <name> grok-imagine-video, grok-imagine-video-1.5-preview
41
+ --topic <text> Series topic for prompt chain continuity
40
42
  --ref <file> Attach source/reference image (repeatable, max 7)
41
43
  -o, --out <file> Output file path
42
44
  -d, --out-dir <dir> Output directory
@@ -101,6 +103,7 @@ export default async function videoCmd(argv: string[]) {
101
103
  };
102
104
  if (args.model) body.model = args.model;
103
105
  if (args.session) body.sessionId = args.session;
106
+ if (args.topic) body.topic = args.topic;
104
107
  if (referenceImages.length === 1) {
105
108
  body.sourceImage = referenceImages[0];
106
109
  } else if (referenceImages.length > 1) {
package/docs/README.ja.md CHANGED
@@ -8,9 +8,9 @@
8
8
  >
9
9
  > **他の言語で読む**: [English](../README.md) · [한국어](README.ko.md) · [简体中文](README.zh-CN.md)
10
10
 
11
- `ima2-gen` は、ChatGPT/Codex OAuth の画像生成ワークフローをローカルの小さなデスクトップアプリのように使える画像生成スタジオです。
11
+ `ima2-gen` は、無料の ChatGPT SuperGrok だけで画像と動画を作れるローカル AI スタジオです。
12
12
 
13
- `npx` で起動し、Codex OAuth でログインして、プロンプトを書きながら履歴や参照画像、ノード分岐、multimode batch、Canvas Mode cleanup を活用して試行錯誤(イテレーション)が可能です。通常の画像生成には OpenAI API key は不要です。
13
+ `npx` で起動し、ChatGPT または Grok OAuth でログインすれば、すぐに画像・動画生成を始められます。API キー不要で、ノード分岐、multimode batch、Grok Video、Canvas Mode まで全機能が使えます。
14
14
 
15
15
  ![プロンプト入力、生成画像、モデル表示、結果メタデータが見える ima2-gen classic 画面](../assets/screenshots/classic-generate-light.png)
16
16
 
package/docs/README.ko.md CHANGED
@@ -10,9 +10,9 @@
10
10
  >
11
11
  > **다른 언어로 읽기**: [English](../README.md) · [日本語](README.ja.md) · [简体中文](README.zh-CN.md)
12
12
 
13
- `ima2-gen`은 ChatGPT/Codex OAuth 이미지 생성 흐름을 로컬 웹앱처럼 쓸 수 있게 만든 이미지 생성 스튜디오입니다.
13
+ `ima2-gen`은 무료 ChatGPT SuperGrok만으로 이미지와 영상을 만드는 로컬 AI 스튜디오입니다.
14
14
 
15
- `npx`로 실행하고, Codex OAuth로 로그인한 뒤, 프롬프트를 입력하면서 히스토리, 레퍼런스, 노드 브랜치, 멀티모드 배치, Canvas Mode 정리 작업으로 계속 이어갈 수 있습니다. 기본 이미지 생성 경로에서는 OpenAI API 키가 필요하지 않으며, 설정된 경우 OpenAI API-key 경로와 번들 Grok 경로도 사용할 수 있습니다.
15
+ `npx` 줄로 실행하고, ChatGPT 또는 Grok OAuth로 로그인하면 바로 시작됩니다. API 없이 이미지 생성, 비디오 생성, 노드 분기, 멀티모드 배치, Canvas 정리까지 전부 가능합니다.
16
16
 
17
17
  ![프롬프트 작성창, 생성 이미지, 모델 표시, 결과 메타데이터가 보이는 ima2-gen 클래식 생성 화면](../assets/screenshots/classic-generate-light.png)
18
18
 
@@ -40,12 +40,24 @@ npm install -g ima2-gen
40
40
  ima2 serve
41
41
  ```
42
42
 
43
+ ### 설정
44
+
45
+ `ima2 setup`으로 인증 방식을 선택합니다:
46
+
47
+ 1. **GPT OAuth** — ChatGPT 계정으로 로그인 (무료, 이미지만)
48
+ 2. **Grok OAuth** — xAI/Grok 계정으로 로그인 (이미지 + 영상)
49
+ 3. **Both** — GPT + Grok 둘 다 (전체 기능)
50
+ 4. **API Key** — OpenAI API 키 입력 (유료)
51
+
52
+ 영상 생성은 Grok OAuth(2번 또는 3번)가 필요합니다.
53
+
43
54
  ## 무엇을 할 수 있나요?
44
55
 
45
56
  - **Classic mode**: 빠르게 이미지를 만들고, 수정하고, 현재 결과를 다시 레퍼런스로 사용합니다.
46
57
  - **Node mode**: 마음에 드는 이미지를 여러 방향으로 분기해 실험합니다.
47
58
  - **Multimode batches**: 하나의 프롬프트에서 여러 후보 슬롯을 동시에 만들고, 가장 좋은 결과에서 이어갑니다.
48
59
  - **Canvas Mode**: 확대/이동, 주석, 지우개, 배경 정리, 투명 체크보드 미리보기, alpha/matte export를 지원합니다.
60
+ - **Video 생성**: 텍스트, 이미지, 또는 여러 레퍼런스에서 짧은 영상을 만듭니다. 기획→제출→진행률→완료까지 실시간으로 보여줍니다.
49
61
  - **Local gallery**: 생성물을 내 컴퓨터에 저장하고 세션별 히스토리로 봅니다.
50
62
  - **Reference images**: 레퍼런스를 드래그, 붙여넣기, 파일 선택으로 추가합니다. 큰 이미지는 업로드 전에 자동 압축됩니다.
51
63
  - **Prompt library imports**: 로컬 prompt pack, GitHub folder, curated GPT-image hint를 내장 prompt library로 가져옵니다.
@@ -62,7 +74,7 @@ ima2 serve
62
74
 
63
75
  Grok은 Classic, Node, Agent 흐름을 지원합니다. Classic 레퍼런스, Node 부모 이미지, Agent 현재 이미지가 있으면 최종 Grok 호출은 xAI image edit 경로로 전환되어 image-to-image 맥락을 유지합니다. 기본 모델은 `grok-imagine-image`이고, `quality: "high"`에서는 `grok-imagine-image-quality`를 사용합니다.
64
76
 
65
- Grok video 생성(T2V/I2V)은 `1.1.15`에 포함되지 않았습니다. `docs/grok-video-i2v-plan.md`와 `docs/grok-video-i2v-research.md`는 구현 계획과 조사 문서이며, 공개 런타임은 아직 이미지 생성만 지원합니다.
77
+ Grok video 생성(T2V/I2V/ref2v)은 v1.1.16부터 사용 가능합니다. 텍스트 프롬프트, 단일 이미지, 또는 최대 7장의 레퍼런스에서 짧은 영상을 만들 수 있으며, 실시간 진행률 스트리밍을 지원합니다.
66
78
 
67
79
  설정 화면에 **API key provider available**이나 **Grok provider available**이 보이면 해당 공급자가 감지됐고 생성 요청에 사용할 수 있다는 뜻입니다.
68
80
 
@@ -8,9 +8,9 @@
8
8
  >
9
9
  > **其他语言**: [English](../README.md) · [한국어](README.ko.md) · [日本語](README.ja.md)
10
10
 
11
- `ima2-gen` 是一个本地图像生成工作室,为你提供类似小型桌面应用的 ChatGPT/Codex OAuth 图像生成体验。
11
+ `ima2-gen` 是一个本地 AI 工作室,只需免费 ChatGPT SuperGrok 即可生成图像和视频。
12
12
 
13
- 用 `npx` 启动,登录 Codex OAuth,输入 prompt,然后通过历史记录、参考图、节点分支、multimode 批量候选和 Canvas Mode 清理持续迭代。默认图像生成路径不需要 OpenAI API key。
13
+ 用 `npx` 启动,通过 ChatGPT 或 Grok OAuth 登录即可开始生成图像和视频。无需 API 密钥,节点分支、multimode 批量、Grok Video、Canvas Mode 全部可用。
14
14
 
15
15
  ![显示 prompt 输入区、生成图片、模型标签和结果元数据的 ima2-gen classic 界面](../assets/screenshots/classic-generate-light.png)
16
16
 
@@ -34,6 +34,19 @@ export function deriveAgentGenerationPlan({ prompt, settings, command = null })
34
34
  assistantText: null,
35
35
  };
36
36
  }
37
+ if (isVideoIntent(prompt)) {
38
+ return {
39
+ mode: "video",
40
+ prompts: [prompt],
41
+ requestedVariants: 1,
42
+ plannedVariants: 1,
43
+ plannedParallelism: 1,
44
+ source: "auto-request",
45
+ reason: "Video generation detected from prompt keywords.",
46
+ command: command?.name ?? null,
47
+ assistantText: null,
48
+ };
49
+ }
37
50
  const variantDecision = decideVariantCount(prompt, settings, command);
38
51
  const plannedParallelism = resolvePlannedParallelism(settings, variantDecision.count, command);
39
52
  const prompts = buildGenerationPrompts(prompt, variantDecision.count);
@@ -58,7 +71,7 @@ export function normalizeAgentGenerationPlan(prompt, value, settings) {
58
71
  const requestedParallelism = cleanCount(input.plannedParallelism, settings.parallelism, 1, HARD_MAX_VARIANTS);
59
72
  const plannedParallelism = resolvePlannedParallelism({ ...settings, parallelism: requestedParallelism }, plannedVariants, null);
60
73
  return {
61
- mode: input.mode === "question" ? "question" : prompts.length > 1 ? "fanout" : "single",
74
+ mode: input.mode === "question" ? "question" : input.mode === "video" ? "video" : prompts.length > 1 ? "fanout" : "single",
62
75
  prompts,
63
76
  requestedVariants: cleanCount(input.requestedVariants, plannedVariants, 0, HARD_MAX_VARIANTS),
64
77
  plannedVariants,
@@ -183,3 +196,7 @@ function cleanCount(value, fallback, min, max) {
183
196
  function clampCount(value, max) {
184
197
  return Math.max(1, Math.min(max, Math.round(value)));
185
198
  }
199
+ const VIDEO_INTENT_PATTERN = /\b(?:video|animate|animation|동영상|비디오|영상|애니메이트|움직이|클립)\b/iu;
200
+ function isVideoIntent(prompt) {
201
+ return VIDEO_INTENT_PATTERN.test(prompt);
202
+ }
@@ -56,6 +56,20 @@ export function deriveAgentGenerationPlan({ prompt, settings, command = null }:
56
56
  };
57
57
  }
58
58
 
59
+ if (isVideoIntent(prompt)) {
60
+ return {
61
+ mode: "video",
62
+ prompts: [prompt],
63
+ requestedVariants: 1,
64
+ plannedVariants: 1,
65
+ plannedParallelism: 1,
66
+ source: "auto-request",
67
+ reason: "Video generation detected from prompt keywords.",
68
+ command: command?.name ?? null,
69
+ assistantText: null,
70
+ };
71
+ }
72
+
59
73
  const variantDecision = decideVariantCount(prompt, settings, command);
60
74
  const plannedParallelism = resolvePlannedParallelism(settings, variantDecision.count, command);
61
75
  const prompts = buildGenerationPrompts(prompt, variantDecision.count);
@@ -85,7 +99,7 @@ export function normalizeAgentGenerationPlan(
85
99
  const requestedParallelism = cleanCount(input.plannedParallelism, settings.parallelism, 1, HARD_MAX_VARIANTS);
86
100
  const plannedParallelism = resolvePlannedParallelism({ ...settings, parallelism: requestedParallelism }, plannedVariants, null);
87
101
  return {
88
- mode: input.mode === "question" ? "question" : prompts.length > 1 ? "fanout" : "single",
102
+ mode: input.mode === "question" ? "question" : input.mode === "video" ? "video" : prompts.length > 1 ? "fanout" : "single",
89
103
  prompts,
90
104
  requestedVariants: cleanCount(input.requestedVariants, plannedVariants, 0, HARD_MAX_VARIANTS),
91
105
  plannedVariants,
@@ -227,3 +241,9 @@ function cleanCount(value: unknown, fallback: number, min: number, max: number):
227
241
  function clampCount(value: number, max: number): number {
228
242
  return Math.max(1, Math.min(max, Math.round(value)));
229
243
  }
244
+
245
+ const VIDEO_INTENT_PATTERN = /\b(?:video|animate|animation|동영상|비디오|영상|애니메이트|움직이|클립)\b/iu;
246
+
247
+ function isVideoIntent(prompt: string): boolean {
248
+ return VIDEO_INTENT_PATTERN.test(prompt);
249
+ }
@@ -9,6 +9,7 @@ import { detectImageMimeFromB64 } from "./refs.js";
9
9
  import { resolveProviderOptions } from "./providerOptions.js";
10
10
  import { generateViaResponses } from "./responsesImageAdapter.js";
11
11
  import { generateViaGrok } from "./grokImageAdapter.js";
12
+ import { generateVideoViaGrok } from "./grokVideoAdapter.js";
12
13
  import { appendAgentTurn, buildImageContextManifest, getAgentImages, getAgentSession, importAgentImage, recordAgentWebFinding, restartAgentRuntimeSession, } from "./agentStore.js";
13
14
  import { AGENT_ALLOWED_TOOLS, } from "./agentTypes.js";
14
15
  import { errInfo } from "./errInfo.js";
@@ -46,7 +47,7 @@ export async function runAgentGenerationPlan(ctx, sessionId, prompt, plan, optio
46
47
  const webSearchEnabled = options.provider === "grok" ? true : options.webSearchEnabled ?? session.webSearchEnabled;
47
48
  const enabledTools = webSearchEnabled
48
49
  ? [...AGENT_ALLOWED_TOOLS]
49
- : ["ima2.get_image_context", "ima2.generate_image"];
50
+ : ["ima2.get_image_context", "ima2.generate_image", "ima2.generate_video"];
50
51
  assertAgentAllowedTools(enabledTools);
51
52
  if (behavior.appendUserTurn !== false) {
52
53
  appendAgentTurn({ sessionId, role: "user", text: prompt, status: "complete" });
@@ -62,6 +63,13 @@ export async function runAgentGenerationPlan(ctx, sessionId, prompt, plan, optio
62
63
  });
63
64
  return { assistantTurn, imageIds: [], webFindingIds: [] };
64
65
  }
66
+ if (plan.mode === "video") {
67
+ return runAgentVideoGeneration(ctx, sessionId, prompt, {
68
+ ...options,
69
+ requestId: options.requestId ?? `agent_video_${ulid()}`,
70
+ skipUserTurn: true,
71
+ });
72
+ }
65
73
  const manifest = buildImageContextManifest(sessionId);
66
74
  const contextStartedAt = Date.now();
67
75
  appendAgentTurn({
@@ -306,6 +314,102 @@ async function persistAgentImage(ctx, sessionId, prompt, format, requestId, resp
306
314
  createdAt: Date.now(),
307
315
  });
308
316
  }
317
+ export async function runAgentVideoGeneration(ctx, sessionId, prompt, options = {}) {
318
+ const session = getAgentSession(sessionId);
319
+ if (!session)
320
+ throw notFound(sessionId);
321
+ if (!options.skipUserTurn) {
322
+ appendAgentTurn({ sessionId, role: "user", text: prompt, status: "complete" });
323
+ }
324
+ const requestId = options.requestId ?? `agent_video_${ulid()}`;
325
+ const startedAt = Date.now();
326
+ // Auto I2V: if session has a last image, use it as source
327
+ let sourceImage;
328
+ let mode = "text-to-video";
329
+ if (session.lastImageId) {
330
+ const images = getAgentImages(sessionId);
331
+ const lastImage = images.find((img) => img.id === session.lastImageId);
332
+ if (lastImage?.filename && !lastImage.filename.endsWith(".mp4")) {
333
+ try {
334
+ const { loadAssetB64 } = await import("./nodeStore.js");
335
+ sourceImage = await loadAssetB64(ctx.rootDir, lastImage.filename, ctx.config.storage.generatedDir);
336
+ mode = "image-to-video";
337
+ }
338
+ catch { /* fallback to T2V */ }
339
+ }
340
+ }
341
+ const result = await generateVideoViaGrok(prompt, ctx, {
342
+ model: "grok-imagine-video",
343
+ mode,
344
+ sourceImage,
345
+ duration: 5,
346
+ resolution: "480p",
347
+ aspectRatio: "auto",
348
+ requestId,
349
+ signal: options.signal ?? undefined,
350
+ });
351
+ const video = await persistAgentVideo(ctx, sessionId, prompt, requestId, result);
352
+ const finishedAt = Date.now();
353
+ const toolCall = {
354
+ id: `tc_video_${ulid()}`,
355
+ name: "ima2.generate_video",
356
+ status: "complete",
357
+ startedAt,
358
+ finishedAt,
359
+ durationMs: finishedAt - startedAt,
360
+ requestId,
361
+ inputSummary: prompt,
362
+ outputSummary: `Generated video ${video.filename}.`,
363
+ imageIds: [video.id],
364
+ };
365
+ appendAgentTurn({
366
+ sessionId,
367
+ role: "tool",
368
+ text: "ima2.generate_video",
369
+ imageIds: [video.id],
370
+ status: "complete",
371
+ raw: { toolCalls: [toolCall] },
372
+ });
373
+ const assistantTurn = appendAgentTurn({
374
+ sessionId,
375
+ role: "assistant",
376
+ text: `Generated 1 video artifact. ${result.revisedPrompt}`,
377
+ imageIds: [video.id],
378
+ status: "complete",
379
+ });
380
+ return { assistantTurn, imageIds: [video.id], webFindingIds: [] };
381
+ }
382
+ async function persistAgentVideo(ctx, sessionId, prompt, requestId, result) {
383
+ await mkdir(ctx.config.storage.generatedDir, { recursive: true });
384
+ const rand = randomBytes(ctx.config.ids.generatedHexBytes).toString("hex");
385
+ const filename = `${Date.now()}_${rand}_agent.mp4`;
386
+ const meta = {
387
+ kind: "agent",
388
+ mediaType: "video",
389
+ requestId,
390
+ sessionId,
391
+ prompt,
392
+ userPrompt: prompt,
393
+ revisedPrompt: result.revisedPrompt,
394
+ provider: "grok",
395
+ model: "grok-imagine-video",
396
+ createdAt: Date.now(),
397
+ usage: result.usage,
398
+ webSearchCalls: result.webSearchCalls,
399
+ };
400
+ await writeFile(join(ctx.config.storage.generatedDir, filename), result.videoBuffer);
401
+ await writeFile(join(ctx.config.storage.generatedDir, `${filename}.json`), JSON.stringify(meta)).catch(() => { });
402
+ invalidateHistoryIndex();
403
+ logEvent("agent", "video_saved", { requestId, sessionId, filename });
404
+ return importAgentImage(sessionId, {
405
+ id: `ai_${ulid()}`,
406
+ filename,
407
+ url: `/generated/${filename}`,
408
+ prompt,
409
+ revisedPrompt: result.revisedPrompt,
410
+ createdAt: Date.now(),
411
+ });
412
+ }
309
413
  function recordSearchFindings(sessionId, prompt, count, provider) {
310
414
  if (!count)
311
415
  return [];
@@ -9,6 +9,7 @@ import { detectImageMimeFromB64 } from "./refs.js";
9
9
  import { resolveProviderOptions } from "./providerOptions.js";
10
10
  import { generateViaResponses } from "./responsesImageAdapter.js";
11
11
  import { generateViaGrok, type GrokReferenceImage } from "./grokImageAdapter.js";
12
+ import { generateVideoViaGrok } from "./grokVideoAdapter.js";
12
13
  import {
13
14
  appendAgentTurn,
14
15
  buildImageContextManifest,
@@ -95,7 +96,7 @@ export async function runAgentGenerationPlan(
95
96
  const webSearchEnabled = options.provider === "grok" ? true : options.webSearchEnabled ?? session.webSearchEnabled;
96
97
  const enabledTools: AgentToolName[] = webSearchEnabled
97
98
  ? [...AGENT_ALLOWED_TOOLS]
98
- : ["ima2.get_image_context", "ima2.generate_image"];
99
+ : ["ima2.get_image_context", "ima2.generate_image", "ima2.generate_video"];
99
100
  assertAgentAllowedTools(enabledTools);
100
101
  if (behavior.appendUserTurn !== false) {
101
102
  appendAgentTurn({ sessionId, role: "user", text: prompt, status: "complete" });
@@ -111,6 +112,13 @@ export async function runAgentGenerationPlan(
111
112
  });
112
113
  return { assistantTurn, imageIds: [], webFindingIds: [] };
113
114
  }
115
+ if (plan.mode === "video") {
116
+ return runAgentVideoGeneration(ctx, sessionId, prompt, {
117
+ ...options,
118
+ requestId: options.requestId ?? `agent_video_${ulid()}`,
119
+ skipUserTurn: true,
120
+ });
121
+ }
114
122
  const manifest = buildImageContextManifest(sessionId);
115
123
  const contextStartedAt = Date.now();
116
124
  appendAgentTurn({
@@ -397,6 +405,115 @@ async function persistAgentImage(
397
405
  });
398
406
  }
399
407
 
408
+ export async function runAgentVideoGeneration(
409
+ ctx: RuntimeContext,
410
+ sessionId: string,
411
+ prompt: string,
412
+ options: AgentRunOptions & { skipUserTurn?: boolean } = {},
413
+ ) {
414
+ const session = getAgentSession(sessionId);
415
+ if (!session) throw notFound(sessionId);
416
+ if (!options.skipUserTurn) {
417
+ appendAgentTurn({ sessionId, role: "user", text: prompt, status: "complete" });
418
+ }
419
+ const requestId = options.requestId ?? `agent_video_${ulid()}`;
420
+ const startedAt = Date.now();
421
+
422
+ // Auto I2V: if session has a last image, use it as source
423
+ let sourceImage: string | undefined;
424
+ let mode: "text-to-video" | "image-to-video" = "text-to-video";
425
+ if (session.lastImageId) {
426
+ const images = getAgentImages(sessionId);
427
+ const lastImage = images.find((img) => img.id === session.lastImageId);
428
+ if (lastImage?.filename && !lastImage.filename.endsWith(".mp4")) {
429
+ try {
430
+ const { loadAssetB64 } = await import("./nodeStore.js");
431
+ sourceImage = await loadAssetB64(ctx.rootDir, lastImage.filename, ctx.config.storage.generatedDir);
432
+ mode = "image-to-video";
433
+ } catch { /* fallback to T2V */ }
434
+ }
435
+ }
436
+
437
+ const result = await generateVideoViaGrok(prompt, ctx, {
438
+ model: "grok-imagine-video",
439
+ mode,
440
+ sourceImage,
441
+ duration: 5,
442
+ resolution: "480p",
443
+ aspectRatio: "auto",
444
+ requestId,
445
+ signal: options.signal ?? undefined,
446
+ });
447
+ const video = await persistAgentVideo(ctx, sessionId, prompt, requestId, result);
448
+ const finishedAt = Date.now();
449
+ const toolCall: AgentToolCallSummary = {
450
+ id: `tc_video_${ulid()}`,
451
+ name: "ima2.generate_video",
452
+ status: "complete",
453
+ startedAt,
454
+ finishedAt,
455
+ durationMs: finishedAt - startedAt,
456
+ requestId,
457
+ inputSummary: prompt,
458
+ outputSummary: `Generated video ${video.filename}.`,
459
+ imageIds: [video.id],
460
+ };
461
+ appendAgentTurn({
462
+ sessionId,
463
+ role: "tool",
464
+ text: "ima2.generate_video",
465
+ imageIds: [video.id],
466
+ status: "complete",
467
+ raw: { toolCalls: [toolCall] },
468
+ });
469
+ const assistantTurn = appendAgentTurn({
470
+ sessionId,
471
+ role: "assistant",
472
+ text: `Generated 1 video artifact. ${result.revisedPrompt}`,
473
+ imageIds: [video.id],
474
+ status: "complete",
475
+ });
476
+ return { assistantTurn, imageIds: [video.id], webFindingIds: [] };
477
+ }
478
+
479
+ async function persistAgentVideo(
480
+ ctx: RuntimeContext,
481
+ sessionId: string,
482
+ prompt: string,
483
+ requestId: string,
484
+ result: { videoBuffer: Buffer; revisedPrompt: string; usage: Record<string, number> | null; webSearchCalls: number },
485
+ ) {
486
+ await mkdir(ctx.config.storage.generatedDir, { recursive: true });
487
+ const rand = randomBytes(ctx.config.ids.generatedHexBytes).toString("hex");
488
+ const filename = `${Date.now()}_${rand}_agent.mp4`;
489
+ const meta = {
490
+ kind: "agent",
491
+ mediaType: "video",
492
+ requestId,
493
+ sessionId,
494
+ prompt,
495
+ userPrompt: prompt,
496
+ revisedPrompt: result.revisedPrompt,
497
+ provider: "grok",
498
+ model: "grok-imagine-video",
499
+ createdAt: Date.now(),
500
+ usage: result.usage,
501
+ webSearchCalls: result.webSearchCalls,
502
+ };
503
+ await writeFile(join(ctx.config.storage.generatedDir, filename), result.videoBuffer);
504
+ await writeFile(join(ctx.config.storage.generatedDir, `${filename}.json`), JSON.stringify(meta)).catch(() => {});
505
+ invalidateHistoryIndex();
506
+ logEvent("agent", "video_saved", { requestId, sessionId, filename });
507
+ return importAgentImage(sessionId, {
508
+ id: `ai_${ulid()}`,
509
+ filename,
510
+ url: `/generated/${filename}`,
511
+ prompt,
512
+ revisedPrompt: result.revisedPrompt,
513
+ createdAt: Date.now(),
514
+ });
515
+ }
516
+
400
517
  function recordSearchFindings(sessionId: string, prompt: string, count: number, provider: string) {
401
518
  if (!count) return [];
402
519
  const isGrok = provider === "grok";
package/lib/agentTypes.js CHANGED
@@ -2,4 +2,5 @@ export const AGENT_ALLOWED_TOOLS = [
2
2
  "ima2.get_image_context",
3
3
  "ima2.web_search",
4
4
  "ima2.generate_image",
5
+ "ima2.generate_video",
5
6
  ];
package/lib/agentTypes.ts CHANGED
@@ -2,6 +2,7 @@ export const AGENT_ALLOWED_TOOLS = [
2
2
  "ima2.get_image_context",
3
3
  "ima2.web_search",
4
4
  "ima2.generate_image",
5
+ "ima2.generate_video",
5
6
  ] as const;
6
7
 
7
8
  export type AgentToolName = typeof AGENT_ALLOWED_TOOLS[number];
@@ -11,7 +12,7 @@ export type AgentToolCallStatus = "queued" | "running" | "complete" | "error";
11
12
  export type AgentQueueStatus = "queued" | "running" | "succeeded" | "failed" | "canceled";
12
13
  export type AgentSessionRunStatus = "idle" | "queued" | "running" | "error";
13
14
  export type AgentGenerationStrategy = "auto" | "manual";
14
- export type AgentGenerationPlanMode = "single" | "fanout" | "question";
15
+ export type AgentGenerationPlanMode = "single" | "fanout" | "question" | "video";
15
16
  export type AgentGenerationPlanSource = "auto-default" | "auto-request" | "manual-settings" | "slash-command" | "question-command";
16
17
  export type AgentSlashCommandName = "question" | "help" | "variants" | "generate" | "parallelism";
17
18
 
@@ -1,5 +1,5 @@
1
1
  import { getDb } from "./db.js";
2
- import { rename, unlink, access } from "fs/promises";
2
+ import { mkdir, rename, unlink, access } from "fs/promises";
3
3
  import { resolve, sep } from "path";
4
4
  import { moveToSystemTrash } from "./systemTrash.js";
5
5
  import { config } from "../config.js";
@@ -87,21 +87,25 @@ export async function trashAsset(rootDir, filename) {
87
87
  paths.push(sidecar);
88
88
  }
89
89
  catch { }
90
+ let trashMethod = "system";
90
91
  try {
91
92
  await moveToSystemTrash(paths);
92
93
  }
93
- catch (cause) {
94
- const err = new Error("Could not move asset to system trash");
95
- err.status = 500;
96
- err.code = "SYSTEM_TRASH_FAILED";
97
- err.cause = cause;
98
- throw err;
94
+ catch {
95
+ trashMethod = "internal";
96
+ const trashDir = resolve(config.storage.trashDir);
97
+ await mkdir(trashDir, { recursive: true });
98
+ const trashId = `${Date.now()}_${filename}`;
99
+ for (const p of paths) {
100
+ const dest = resolve(trashDir, p.endsWith(".json") ? `${trashId}.json` : trashId);
101
+ await rename(p, dest);
102
+ }
99
103
  }
100
104
  const summary = markNodesAssetMissing(filename);
101
105
  return {
102
106
  ok: true,
103
107
  filename,
104
- trash: "system",
108
+ trash: trashMethod,
105
109
  undoableInApp: false,
106
110
  sessionsTouched: summary.sessionsTouched,
107
111
  nodesTouched: summary.nodesTouched,
@@ -1,5 +1,5 @@
1
1
  import { getDb } from "./db.js";
2
- import { rename, unlink, access } from "fs/promises";
2
+ import { mkdir, rename, unlink, access } from "fs/promises";
3
3
  import { resolve, sep } from "path";
4
4
  import { moveToSystemTrash } from "./systemTrash.js";
5
5
  import { config } from "../config.js";
@@ -84,21 +84,25 @@ export async function trashAsset(rootDir: string, filename: string) {
84
84
  paths.push(sidecar);
85
85
  } catch {}
86
86
 
87
+ let trashMethod: "system" | "internal" = "system";
87
88
  try {
88
89
  await moveToSystemTrash(paths);
89
- } catch (cause) {
90
- const err: any = new Error("Could not move asset to system trash");
91
- err.status = 500;
92
- err.code = "SYSTEM_TRASH_FAILED";
93
- err.cause = cause;
94
- throw err;
90
+ } catch {
91
+ trashMethod = "internal";
92
+ const trashDir = resolve(config.storage.trashDir);
93
+ await mkdir(trashDir, { recursive: true });
94
+ const trashId = `${Date.now()}_${filename}`;
95
+ for (const p of paths) {
96
+ const dest = resolve(trashDir, p.endsWith(".json") ? `${trashId}.json` : trashId);
97
+ await rename(p, dest);
98
+ }
95
99
  }
96
100
 
97
101
  const summary = markNodesAssetMissing(filename);
98
102
  return {
99
103
  ok: true,
100
104
  filename,
101
- trash: "system",
105
+ trash: trashMethod,
102
106
  undoableInApp: false,
103
107
  sessionsTouched: summary.sessionsTouched,
104
108
  nodesTouched: summary.nodesTouched,
@@ -106,7 +106,7 @@ export function buildIma2Capabilities({ appConfig = runtimeConfigDefault, packag
106
106
  i2i: "Use --ref for reference generation, or ima2 edit <file> --prompt \"<text>\" for image edits.",
107
107
  defaults: "Use ima2 defaults set model/reasoning for persistent defaults; request flags remain per-call overrides.",
108
108
  promptBuilder: "Use ima2 prompt build --message \"...\" to refine prompt intent. Use ima2 gen / ima2 multimode to generate images. Workspace profile settings are UI-only.",
109
- video: "Use ima2 video \"<prompt>\" to generate video. Supports --ref for image-to-video and reference-to-video modes.",
109
+ video: "Use ima2 video \"<prompt>\" to generate video. Supports --ref for image-to-video and reference-to-video modes. Use --topic for series continuity across multiple generations.",
110
110
  },
111
111
  };
112
112
  }
@@ -120,7 +120,7 @@ export function buildIma2Capabilities({
120
120
  i2i: "Use --ref for reference generation, or ima2 edit <file> --prompt \"<text>\" for image edits.",
121
121
  defaults: "Use ima2 defaults set model/reasoning for persistent defaults; request flags remain per-call overrides.",
122
122
  promptBuilder: "Use ima2 prompt build --message \"...\" to refine prompt intent. Use ima2 gen / ima2 multimode to generate images. Workspace profile settings are UI-only.",
123
- video: "Use ima2 video \"<prompt>\" to generate video. Supports --ref for image-to-video and reference-to-video modes.",
123
+ video: "Use ima2 video \"<prompt>\" to generate video. Supports --ref for image-to-video and reference-to-video modes. Use --topic for series continuity across multiple generations.",
124
124
  },
125
125
  };
126
126
  }