ima2-gen 1.1.18 → 1.1.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -2
- package/bin/commands/grok.js +19 -21
- package/bin/commands/grok.ts +20 -21
- package/bin/commands/video.js +4 -0
- package/bin/commands/video.ts +3 -0
- package/docs/README.ja.md +2 -2
- package/docs/README.ko.md +15 -3
- package/docs/README.zh-CN.md +2 -2
- package/docs/migration/runtime-test-inventory.md +2 -1
- package/lib/agentGenerationPlanner.js +37 -1
- package/lib/agentGenerationPlanner.ts +45 -1
- package/lib/agentRuntime.js +107 -1
- package/lib/agentRuntime.ts +121 -1
- package/lib/agentTypes.js +1 -0
- package/lib/agentTypes.ts +2 -1
- package/lib/assetLifecycle.js +12 -8
- package/lib/assetLifecycle.ts +12 -8
- package/lib/capabilities.js +1 -1
- package/lib/capabilities.ts +1 -1
- package/lib/grokVideoAdapter.js +30 -2
- package/lib/grokVideoAdapter.ts +36 -2
- package/lib/historyList.js +1 -0
- package/lib/historyList.ts +1 -0
- package/lib/videoSeriesChain.js +24 -0
- package/lib/videoSeriesChain.ts +29 -0
- package/node_modules/progrok/README.md +300 -22
- package/node_modules/progrok/dist/index.js +558 -173
- package/node_modules/progrok/dist/index.js.map +1 -1
- package/node_modules/progrok/package.json +3 -3
- package/node_modules/progrok/skills/progrok/SKILL.md +145 -109
- package/package.json +2 -2
- package/routes/video.js +10 -1
- package/routes/video.ts +11 -1
- package/skills/ima2/SKILL.md +65 -0
- package/ui/dist/.vite/manifest.json +12 -12
- package/ui/dist/assets/AgentWorkspace-DS8uvoLI.js +3 -0
- package/ui/dist/assets/{CardNewsWorkspace-DmqCMnIx.js → CardNewsWorkspace-CYxMsE67.js} +1 -1
- package/ui/dist/assets/NodeCanvas-DccIc347.js +7 -0
- package/ui/dist/assets/{PromptBuilderPanel-CoWjqQZS.js → PromptBuilderPanel-BvxxwSJp.js} +2 -2
- package/ui/dist/assets/{PromptImportDialog-C2zGZkyK.js → PromptImportDialog-u1_BFDRd.js} +2 -2
- package/ui/dist/assets/{PromptImportDiscoverySection-N0ZxHLYs.js → PromptImportDiscoverySection-C5uvkVSz.js} +1 -1
- package/ui/dist/assets/{PromptImportFolderSection-BC3dCASZ.js → PromptImportFolderSection-D3E_O1SD.js} +1 -1
- package/ui/dist/assets/{PromptLibraryPanel-CcVliYnF.js → PromptLibraryPanel-4gyf9CB9.js} +2 -2
- package/ui/dist/assets/{SettingsWorkspace-CiB4ux7E.js → SettingsWorkspace-F3eNu3mJ.js} +1 -1
- package/ui/dist/assets/index-B6tcw_UF.css +1 -0
- package/ui/dist/assets/index-DYOh6gQD.js +32 -0
- package/ui/dist/assets/{index-C93CfR9P.js → index-DoKtXbod.js} +1 -1
- package/ui/dist/index.html +2 -2
- package/vendor/progrok-0.1.1.tgz +0 -0
- package/ui/dist/assets/AgentWorkspace-BTuPjlDH.js +0 -3
- package/ui/dist/assets/NodeCanvas-jr9WXfNm.js +0 -7
- package/ui/dist/assets/index-CIhB_ia7.css +0 -1
- package/ui/dist/assets/index-uBEJn5jz.js +0 -32
- package/vendor/progrok-0.1.0.tgz +0 -0
package/README.md
CHANGED
|
@@ -16,9 +16,9 @@
|
|
|
16
16
|
|
|
17
17
|
`ima2-gen` is a local image generation studio for people who want the ChatGPT/Codex image workflow in a small desktop-like web app.
|
|
18
18
|
|
|
19
|
-
Run it with `npx`, sign in with
|
|
19
|
+
Run it with `npx`, sign in with ChatGPT OAuth or Grok OAuth, and start generating images and videos. Iterate with history, references, node branches, multimode batches, Canvas Mode cleanup, and Grok Video generation. No API key required — free ChatGPT OAuth and SuperGrok subscription cover everything.
|
|
20
20
|
|
|
21
|
-

|
|
22
22
|
|
|
23
23
|
## Quick Start
|
|
24
24
|
|
|
@@ -313,6 +313,15 @@ npm run build
|
|
|
313
313
|
|
|
314
314
|
`npm run dev` builds the UI and starts the TypeScript server entry with `--watch` and verbose server diagnostics. `npm run typecheck`, `npm run build:server`, and `npm run build:cli` verify the TypeScript migration and package emit path. Node mode and Canvas Mode are part of the packaged UI by default.
|
|
315
315
|
|
|
316
|
+
## Contributors
|
|
317
|
+
|
|
318
|
+
- [@lidge-jun](https://github.com/lidge-jun) — maintainer
|
|
319
|
+
- [@ree9622](https://github.com/ree9622) — moderation controls, Windows fixes, structured logging
|
|
320
|
+
- [@Charley-Peng](https://github.com/Charley-Peng) — API cache fix (#74)
|
|
321
|
+
- [@philiptaron](https://github.com/philiptaron) — Nix flake (#81)
|
|
322
|
+
- [@aorying](https://github.com/aorying) — upstream validation error surfacing (informed TS migration direction)
|
|
323
|
+
- [@PARKJONGMlN](https://github.com/PARKJONGMlN) — batch comparison matrix design (#80)
|
|
324
|
+
|
|
316
325
|
## License
|
|
317
326
|
|
|
318
327
|
MIT
|
package/bin/commands/grok.js
CHANGED
|
@@ -12,7 +12,7 @@ const HELP = `
|
|
|
12
12
|
No separate progrok install is required.
|
|
13
13
|
|
|
14
14
|
Subcommands:
|
|
15
|
-
login [
|
|
15
|
+
login [options] Log in to xAI OAuth (default: --manual-paste)
|
|
16
16
|
logout Remove stored xAI credentials
|
|
17
17
|
status Show bundled progrok authentication status
|
|
18
18
|
models List available Grok models
|
|
@@ -28,20 +28,13 @@ function localBinPath() {
|
|
|
28
28
|
function spawnProgrok(argv, env) {
|
|
29
29
|
return new Promise((resolve, reject) => {
|
|
30
30
|
const progrokBin = join(localBinPath(), isWin ? "progrok.cmd" : "progrok");
|
|
31
|
-
const child =
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
})
|
|
39
|
-
: spawn(progrokBin, argv, {
|
|
40
|
-
cwd: ROOT,
|
|
41
|
-
env,
|
|
42
|
-
stdio: "inherit",
|
|
43
|
-
windowsHide: true,
|
|
44
|
-
});
|
|
31
|
+
const child = spawn(progrokBin, argv, {
|
|
32
|
+
cwd: ROOT,
|
|
33
|
+
env,
|
|
34
|
+
stdio: "inherit",
|
|
35
|
+
shell: isWin,
|
|
36
|
+
windowsHide: true,
|
|
37
|
+
});
|
|
45
38
|
child.on("error", (err) => reject(err));
|
|
46
39
|
child.on("close", resolve);
|
|
47
40
|
});
|
|
@@ -57,15 +50,20 @@ export default async function grokCmd(argv) {
|
|
|
57
50
|
PATH: `${localBinPath()}${delimiter}${process.env.PATH || ""}`,
|
|
58
51
|
};
|
|
59
52
|
try {
|
|
53
|
+
// Default to --manual-paste for login (most reliable across platforms).
|
|
54
|
+
// Users can still pass --device-code or --browser explicitly.
|
|
55
|
+
if (sub === "login" && !argv.includes("--device-code") && !argv.includes("--browser") && !argv.includes("--manual-paste")) {
|
|
56
|
+
argv = [...argv, "--manual-paste"];
|
|
57
|
+
}
|
|
60
58
|
const code = await spawnProgrok(argv, env);
|
|
61
59
|
if (code && code !== 0) {
|
|
62
|
-
//
|
|
60
|
+
// progrok 0.1.1+ defaults to device-code flow already.
|
|
61
|
+
// Do NOT auto-retry with --device-code — it issues a NEW code that
|
|
62
|
+
// invalidates the one the user may already be looking at in their browser.
|
|
63
63
|
if (sub === "login" && !argv.includes("--device-code")) {
|
|
64
|
-
out(color.yellow("⚠ ") + "
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
die(fallbackCode, "bundled progrok device-code login also failed");
|
|
68
|
-
}
|
|
64
|
+
out(color.yellow("⚠ ") + "Login failed. Try again with:\n");
|
|
65
|
+
out(" ima2 grok login --device-code\n");
|
|
66
|
+
die(code, "bundled progrok login failed");
|
|
69
67
|
}
|
|
70
68
|
else {
|
|
71
69
|
die(code, `bundled progrok exited with code ${code}`);
|
package/bin/commands/grok.ts
CHANGED
|
@@ -13,7 +13,7 @@ const HELP = `
|
|
|
13
13
|
No separate progrok install is required.
|
|
14
14
|
|
|
15
15
|
Subcommands:
|
|
16
|
-
login [
|
|
16
|
+
login [options] Log in to xAI OAuth (default: --manual-paste)
|
|
17
17
|
logout Remove stored xAI credentials
|
|
18
18
|
status Show bundled progrok authentication status
|
|
19
19
|
models List available Grok models
|
|
@@ -31,20 +31,13 @@ function localBinPath() {
|
|
|
31
31
|
function spawnProgrok(argv: string[], env: NodeJS.ProcessEnv): Promise<number | null> {
|
|
32
32
|
return new Promise((resolve, reject) => {
|
|
33
33
|
const progrokBin = join(localBinPath(), isWin ? "progrok.cmd" : "progrok");
|
|
34
|
-
const child =
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
})
|
|
42
|
-
: spawn(progrokBin, argv, {
|
|
43
|
-
cwd: ROOT,
|
|
44
|
-
env,
|
|
45
|
-
stdio: "inherit",
|
|
46
|
-
windowsHide: true,
|
|
47
|
-
});
|
|
34
|
+
const child = spawn(progrokBin, argv, {
|
|
35
|
+
cwd: ROOT,
|
|
36
|
+
env,
|
|
37
|
+
stdio: "inherit",
|
|
38
|
+
shell: isWin,
|
|
39
|
+
windowsHide: true,
|
|
40
|
+
});
|
|
48
41
|
child.on("error", (err) => reject(err));
|
|
49
42
|
child.on("close", resolve);
|
|
50
43
|
});
|
|
@@ -63,15 +56,21 @@ export default async function grokCmd(argv: string[]) {
|
|
|
63
56
|
};
|
|
64
57
|
|
|
65
58
|
try {
|
|
59
|
+
// Default to --manual-paste for login (most reliable across platforms).
|
|
60
|
+
// Users can still pass --device-code or --browser explicitly.
|
|
61
|
+
if (sub === "login" && !argv.includes("--device-code") && !argv.includes("--browser") && !argv.includes("--manual-paste")) {
|
|
62
|
+
argv = [...argv, "--manual-paste"];
|
|
63
|
+
}
|
|
64
|
+
|
|
66
65
|
const code = await spawnProgrok(argv, env);
|
|
67
66
|
if (code && code !== 0) {
|
|
68
|
-
//
|
|
67
|
+
// progrok 0.1.1+ defaults to device-code flow already.
|
|
68
|
+
// Do NOT auto-retry with --device-code — it issues a NEW code that
|
|
69
|
+
// invalidates the one the user may already be looking at in their browser.
|
|
69
70
|
if (sub === "login" && !argv.includes("--device-code")) {
|
|
70
|
-
out(color.yellow("⚠ ") + "
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
die(fallbackCode, "bundled progrok device-code login also failed");
|
|
74
|
-
}
|
|
71
|
+
out(color.yellow("⚠ ") + "Login failed. Try again with:\n");
|
|
72
|
+
out(" ima2 grok login --device-code\n");
|
|
73
|
+
die(code, "bundled progrok login failed");
|
|
75
74
|
} else {
|
|
76
75
|
die(code, `bundled progrok exited with code ${code}`);
|
|
77
76
|
}
|
package/bin/commands/video.js
CHANGED
|
@@ -14,6 +14,7 @@ const SPEC = {
|
|
|
14
14
|
resolution: { type: "string", default: "480p" },
|
|
15
15
|
"aspect-ratio": { type: "string", default: "auto" },
|
|
16
16
|
model: { type: "string" },
|
|
17
|
+
topic: { type: "string" },
|
|
17
18
|
ref: { type: "string", repeatable: true },
|
|
18
19
|
out: { short: "o", type: "string" },
|
|
19
20
|
"out-dir": { short: "d", type: "string" },
|
|
@@ -34,6 +35,7 @@ const HELP = `
|
|
|
34
35
|
--resolution <480p|720p> Default: 480p
|
|
35
36
|
--aspect-ratio <ratio|auto> 1:1, 16:9, 9:16, 4:3, 3:4, 3:2, 2:3, auto. Default: auto
|
|
36
37
|
--model <name> grok-imagine-video, grok-imagine-video-1.5-preview
|
|
38
|
+
--topic <text> Series topic for prompt chain continuity
|
|
37
39
|
--ref <file> Attach source/reference image (repeatable, max 7)
|
|
38
40
|
-o, --out <file> Output file path
|
|
39
41
|
-d, --out-dir <dir> Output directory
|
|
@@ -102,6 +104,8 @@ export default async function videoCmd(argv) {
|
|
|
102
104
|
body.model = args.model;
|
|
103
105
|
if (args.session)
|
|
104
106
|
body.sessionId = args.session;
|
|
107
|
+
if (args.topic)
|
|
108
|
+
body.topic = args.topic;
|
|
105
109
|
if (referenceImages.length === 1) {
|
|
106
110
|
body.sourceImage = referenceImages[0];
|
|
107
111
|
}
|
package/bin/commands/video.ts
CHANGED
|
@@ -16,6 +16,7 @@ const SPEC = {
|
|
|
16
16
|
resolution: { type: "string", default: "480p" },
|
|
17
17
|
"aspect-ratio": { type: "string", default: "auto" },
|
|
18
18
|
model: { type: "string" },
|
|
19
|
+
topic: { type: "string" },
|
|
19
20
|
ref: { type: "string", repeatable: true },
|
|
20
21
|
out: { short: "o", type: "string" },
|
|
21
22
|
"out-dir": { short: "d", type: "string" },
|
|
@@ -37,6 +38,7 @@ const HELP = `
|
|
|
37
38
|
--resolution <480p|720p> Default: 480p
|
|
38
39
|
--aspect-ratio <ratio|auto> 1:1, 16:9, 9:16, 4:3, 3:4, 3:2, 2:3, auto. Default: auto
|
|
39
40
|
--model <name> grok-imagine-video, grok-imagine-video-1.5-preview
|
|
41
|
+
--topic <text> Series topic for prompt chain continuity
|
|
40
42
|
--ref <file> Attach source/reference image (repeatable, max 7)
|
|
41
43
|
-o, --out <file> Output file path
|
|
42
44
|
-d, --out-dir <dir> Output directory
|
|
@@ -101,6 +103,7 @@ export default async function videoCmd(argv: string[]) {
|
|
|
101
103
|
};
|
|
102
104
|
if (args.model) body.model = args.model;
|
|
103
105
|
if (args.session) body.sessionId = args.session;
|
|
106
|
+
if (args.topic) body.topic = args.topic;
|
|
104
107
|
if (referenceImages.length === 1) {
|
|
105
108
|
body.sourceImage = referenceImages[0];
|
|
106
109
|
} else if (referenceImages.length > 1) {
|
package/docs/README.ja.md
CHANGED
|
@@ -8,9 +8,9 @@
|
|
|
8
8
|
>
|
|
9
9
|
> **他の言語で読む**: [English](../README.md) · [한국어](README.ko.md) · [简体中文](README.zh-CN.md)
|
|
10
10
|
|
|
11
|
-
`ima2-gen`
|
|
11
|
+
`ima2-gen` は、無料の ChatGPT と SuperGrok だけで画像と動画を作れるローカル AI スタジオです。
|
|
12
12
|
|
|
13
|
-
`npx` で起動し、
|
|
13
|
+
`npx` で起動し、ChatGPT または Grok OAuth でログインすれば、すぐに画像・動画生成を始められます。API キー不要で、ノード分岐、multimode batch、Grok Video、Canvas Mode まで全機能が使えます。
|
|
14
14
|
|
|
15
15
|

|
|
16
16
|
|
package/docs/README.ko.md
CHANGED
|
@@ -10,9 +10,9 @@
|
|
|
10
10
|
>
|
|
11
11
|
> **다른 언어로 읽기**: [English](../README.md) · [日本語](README.ja.md) · [简体中文](README.zh-CN.md)
|
|
12
12
|
|
|
13
|
-
`ima2-gen`은 ChatGPT
|
|
13
|
+
`ima2-gen`은 무료 ChatGPT와 SuperGrok만으로 이미지와 영상을 만드는 로컬 AI 스튜디오입니다.
|
|
14
14
|
|
|
15
|
-
`npx
|
|
15
|
+
`npx` 한 줄로 실행하고, ChatGPT 또는 Grok OAuth로 로그인하면 바로 시작됩니다. API 키 없이 이미지 생성, 비디오 생성, 노드 분기, 멀티모드 배치, Canvas 정리까지 전부 가능합니다.
|
|
16
16
|
|
|
17
17
|

|
|
18
18
|
|
|
@@ -40,12 +40,24 @@ npm install -g ima2-gen
|
|
|
40
40
|
ima2 serve
|
|
41
41
|
```
|
|
42
42
|
|
|
43
|
+
### 설정
|
|
44
|
+
|
|
45
|
+
`ima2 setup`으로 인증 방식을 선택합니다:
|
|
46
|
+
|
|
47
|
+
1. **GPT OAuth** — ChatGPT 계정으로 로그인 (무료, 이미지만)
|
|
48
|
+
2. **Grok OAuth** — xAI/Grok 계정으로 로그인 (이미지 + 영상)
|
|
49
|
+
3. **Both** — GPT + Grok 둘 다 (전체 기능)
|
|
50
|
+
4. **API Key** — OpenAI API 키 입력 (유료)
|
|
51
|
+
|
|
52
|
+
영상 생성은 Grok OAuth(2번 또는 3번)가 필요합니다.
|
|
53
|
+
|
|
43
54
|
## 무엇을 할 수 있나요?
|
|
44
55
|
|
|
45
56
|
- **Classic mode**: 빠르게 이미지를 만들고, 수정하고, 현재 결과를 다시 레퍼런스로 사용합니다.
|
|
46
57
|
- **Node mode**: 마음에 드는 이미지를 여러 방향으로 분기해 실험합니다.
|
|
47
58
|
- **Multimode batches**: 하나의 프롬프트에서 여러 후보 슬롯을 동시에 만들고, 가장 좋은 결과에서 이어갑니다.
|
|
48
59
|
- **Canvas Mode**: 확대/이동, 주석, 지우개, 배경 정리, 투명 체크보드 미리보기, alpha/matte export를 지원합니다.
|
|
60
|
+
- **Video 생성**: 텍스트, 이미지, 또는 여러 레퍼런스에서 짧은 영상을 만듭니다. 기획→제출→진행률→완료까지 실시간으로 보여줍니다.
|
|
49
61
|
- **Local gallery**: 생성물을 내 컴퓨터에 저장하고 세션별 히스토리로 봅니다.
|
|
50
62
|
- **Reference images**: 레퍼런스를 드래그, 붙여넣기, 파일 선택으로 추가합니다. 큰 이미지는 업로드 전에 자동 압축됩니다.
|
|
51
63
|
- **Prompt library imports**: 로컬 prompt pack, GitHub folder, curated GPT-image hint를 내장 prompt library로 가져옵니다.
|
|
@@ -62,7 +74,7 @@ ima2 serve
|
|
|
62
74
|
|
|
63
75
|
Grok은 Classic, Node, Agent 흐름을 지원합니다. Classic 레퍼런스, Node 부모 이미지, Agent 현재 이미지가 있으면 최종 Grok 호출은 xAI image edit 경로로 전환되어 image-to-image 맥락을 유지합니다. 기본 모델은 `grok-imagine-image`이고, `quality: "high"`에서는 `grok-imagine-image-quality`를 사용합니다.
|
|
64
76
|
|
|
65
|
-
Grok video 생성(T2V/I2V)은
|
|
77
|
+
Grok video 생성(T2V/I2V/ref2v)은 v1.1.16부터 사용 가능합니다. 텍스트 프롬프트, 단일 이미지, 또는 최대 7장의 레퍼런스에서 짧은 영상을 만들 수 있으며, 실시간 진행률 스트리밍을 지원합니다.
|
|
66
78
|
|
|
67
79
|
설정 화면에 **API key provider available**이나 **Grok provider available**이 보이면 해당 공급자가 감지됐고 생성 요청에 사용할 수 있다는 뜻입니다.
|
|
68
80
|
|
package/docs/README.zh-CN.md
CHANGED
|
@@ -8,9 +8,9 @@
|
|
|
8
8
|
>
|
|
9
9
|
> **其他语言**: [English](../README.md) · [한국어](README.ko.md) · [日本語](README.ja.md)
|
|
10
10
|
|
|
11
|
-
`ima2-gen`
|
|
11
|
+
`ima2-gen` 是一个本地 AI 工作室,只需免费 ChatGPT 和 SuperGrok 即可生成图像和视频。
|
|
12
12
|
|
|
13
|
-
用 `npx`
|
|
13
|
+
用 `npx` 启动,通过 ChatGPT 或 Grok OAuth 登录即可开始生成图像和视频。无需 API 密钥,节点分支、multimode 批量、Grok Video、Canvas Mode 全部可用。
|
|
14
14
|
|
|
15
15
|

|
|
16
16
|
|
|
@@ -4,7 +4,7 @@ Generated by `npm run test:inventory` (script: `scripts/classify-tests.mjs`).
|
|
|
4
4
|
|
|
5
5
|
_Tests considered "runtime-importing" if they import from `../lib/`, `../routes/`, `../bin/`, `../server`, or `../config`._
|
|
6
6
|
|
|
7
|
-
Total:
|
|
7
|
+
Total: 168 (runtime: 55, contract: 113)
|
|
8
8
|
|
|
9
9
|
## Runtime-importing tests
|
|
10
10
|
- `tests/agent-mode-auto-planner-contract.test.ts`
|
|
@@ -13,6 +13,7 @@ Total: 167 (runtime: 54, contract: 113)
|
|
|
13
13
|
- `tests/agent-mode-queue-migration-contract.test.ts`
|
|
14
14
|
- `tests/agent-mode-runtime-contract.test.ts`
|
|
15
15
|
- `tests/agent-mode-slash-command-contract.test.ts`
|
|
16
|
+
- `tests/agent-video-intent.test.ts`
|
|
16
17
|
- `tests/api-cache-policy.test.ts`
|
|
17
18
|
- `tests/api-provider-parity.test.ts`
|
|
18
19
|
- `tests/billing-source.test.ts`
|
|
@@ -34,6 +34,19 @@ export function deriveAgentGenerationPlan({ prompt, settings, command = null })
|
|
|
34
34
|
assistantText: null,
|
|
35
35
|
};
|
|
36
36
|
}
|
|
37
|
+
if (isVideoIntent(prompt)) {
|
|
38
|
+
return {
|
|
39
|
+
mode: "video",
|
|
40
|
+
prompts: [prompt],
|
|
41
|
+
requestedVariants: 1,
|
|
42
|
+
plannedVariants: 1,
|
|
43
|
+
plannedParallelism: 1,
|
|
44
|
+
source: "auto-request",
|
|
45
|
+
reason: "Video generation detected from prompt keywords.",
|
|
46
|
+
command: command?.name ?? null,
|
|
47
|
+
assistantText: null,
|
|
48
|
+
};
|
|
49
|
+
}
|
|
37
50
|
const variantDecision = decideVariantCount(prompt, settings, command);
|
|
38
51
|
const plannedParallelism = resolvePlannedParallelism(settings, variantDecision.count, command);
|
|
39
52
|
const prompts = buildGenerationPrompts(prompt, variantDecision.count);
|
|
@@ -58,7 +71,7 @@ export function normalizeAgentGenerationPlan(prompt, value, settings) {
|
|
|
58
71
|
const requestedParallelism = cleanCount(input.plannedParallelism, settings.parallelism, 1, HARD_MAX_VARIANTS);
|
|
59
72
|
const plannedParallelism = resolvePlannedParallelism({ ...settings, parallelism: requestedParallelism }, plannedVariants, null);
|
|
60
73
|
return {
|
|
61
|
-
mode: input.mode === "question" ? "question" : prompts.length > 1 ? "fanout" : "single",
|
|
74
|
+
mode: input.mode === "question" ? "question" : input.mode === "video" ? "video" : prompts.length > 1 ? "fanout" : "single",
|
|
62
75
|
prompts,
|
|
63
76
|
requestedVariants: cleanCount(input.requestedVariants, plannedVariants, 0, HARD_MAX_VARIANTS),
|
|
64
77
|
plannedVariants,
|
|
@@ -183,3 +196,26 @@ function cleanCount(value, fallback, min, max) {
|
|
|
183
196
|
function clampCount(value, max) {
|
|
184
197
|
return Math.max(1, Math.min(max, Math.round(value)));
|
|
185
198
|
}
|
|
199
|
+
const VIDEO_INTENT_PATTERN = /(?:^|\s|[^\p{L}])(?:video|animate|animation)(?:\s|[^\p{L}]|$)|(?:동영상|비디오|영상|애니메이트|움직이|클립)/iu;
|
|
200
|
+
function isVideoIntent(prompt) {
|
|
201
|
+
return VIDEO_INTENT_PATTERN.test(prompt);
|
|
202
|
+
}
|
|
203
|
+
const DURATION_PATTERN = /(\d{1,2})\s*(?:s|sec|seconds?|초)/i;
|
|
204
|
+
const RESOLUTION_PATTERN = /(720p|480p)/i;
|
|
205
|
+
const ASPECT_PATTERN = /(16:9|9:16|4:3|3:4|3:2|2:3|1:1)/;
|
|
206
|
+
export function parseVideoParams(prompt) {
|
|
207
|
+
const params = {};
|
|
208
|
+
const durMatch = DURATION_PATTERN.exec(prompt);
|
|
209
|
+
if (durMatch) {
|
|
210
|
+
const d = parseInt(durMatch[1]);
|
|
211
|
+
if (d >= 1 && d <= 15)
|
|
212
|
+
params.duration = d;
|
|
213
|
+
}
|
|
214
|
+
const resMatch = RESOLUTION_PATTERN.exec(prompt);
|
|
215
|
+
if (resMatch)
|
|
216
|
+
params.resolution = resMatch[1].toLowerCase();
|
|
217
|
+
const aspMatch = ASPECT_PATTERN.exec(prompt);
|
|
218
|
+
if (aspMatch)
|
|
219
|
+
params.aspectRatio = aspMatch[1];
|
|
220
|
+
return params;
|
|
221
|
+
}
|
|
@@ -56,6 +56,20 @@ export function deriveAgentGenerationPlan({ prompt, settings, command = null }:
|
|
|
56
56
|
};
|
|
57
57
|
}
|
|
58
58
|
|
|
59
|
+
if (isVideoIntent(prompt)) {
|
|
60
|
+
return {
|
|
61
|
+
mode: "video",
|
|
62
|
+
prompts: [prompt],
|
|
63
|
+
requestedVariants: 1,
|
|
64
|
+
plannedVariants: 1,
|
|
65
|
+
plannedParallelism: 1,
|
|
66
|
+
source: "auto-request",
|
|
67
|
+
reason: "Video generation detected from prompt keywords.",
|
|
68
|
+
command: command?.name ?? null,
|
|
69
|
+
assistantText: null,
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
|
|
59
73
|
const variantDecision = decideVariantCount(prompt, settings, command);
|
|
60
74
|
const plannedParallelism = resolvePlannedParallelism(settings, variantDecision.count, command);
|
|
61
75
|
const prompts = buildGenerationPrompts(prompt, variantDecision.count);
|
|
@@ -85,7 +99,7 @@ export function normalizeAgentGenerationPlan(
|
|
|
85
99
|
const requestedParallelism = cleanCount(input.plannedParallelism, settings.parallelism, 1, HARD_MAX_VARIANTS);
|
|
86
100
|
const plannedParallelism = resolvePlannedParallelism({ ...settings, parallelism: requestedParallelism }, plannedVariants, null);
|
|
87
101
|
return {
|
|
88
|
-
mode: input.mode === "question" ? "question" : prompts.length > 1 ? "fanout" : "single",
|
|
102
|
+
mode: input.mode === "question" ? "question" : input.mode === "video" ? "video" : prompts.length > 1 ? "fanout" : "single",
|
|
89
103
|
prompts,
|
|
90
104
|
requestedVariants: cleanCount(input.requestedVariants, plannedVariants, 0, HARD_MAX_VARIANTS),
|
|
91
105
|
plannedVariants,
|
|
@@ -227,3 +241,33 @@ function cleanCount(value: unknown, fallback: number, min: number, max: number):
|
|
|
227
241
|
function clampCount(value: number, max: number): number {
|
|
228
242
|
return Math.max(1, Math.min(max, Math.round(value)));
|
|
229
243
|
}
|
|
244
|
+
|
|
245
|
+
const VIDEO_INTENT_PATTERN = /(?:^|\s|[^\p{L}])(?:video|animate|animation)(?:\s|[^\p{L}]|$)|(?:동영상|비디오|영상|애니메이트|움직이|클립)/iu;
|
|
246
|
+
|
|
247
|
+
function isVideoIntent(prompt: string): boolean {
|
|
248
|
+
return VIDEO_INTENT_PATTERN.test(prompt);
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
export interface VideoParamsFromPrompt {
|
|
252
|
+
duration?: number;
|
|
253
|
+
resolution?: "480p" | "720p";
|
|
254
|
+
aspectRatio?: string;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
const DURATION_PATTERN = /(\d{1,2})\s*(?:s|sec|seconds?|초)/i;
|
|
258
|
+
const RESOLUTION_PATTERN = /(720p|480p)/i;
|
|
259
|
+
const ASPECT_PATTERN = /(16:9|9:16|4:3|3:4|3:2|2:3|1:1)/;
|
|
260
|
+
|
|
261
|
+
export function parseVideoParams(prompt: string): VideoParamsFromPrompt {
|
|
262
|
+
const params: VideoParamsFromPrompt = {};
|
|
263
|
+
const durMatch = DURATION_PATTERN.exec(prompt);
|
|
264
|
+
if (durMatch) {
|
|
265
|
+
const d = parseInt(durMatch[1]);
|
|
266
|
+
if (d >= 1 && d <= 15) params.duration = d;
|
|
267
|
+
}
|
|
268
|
+
const resMatch = RESOLUTION_PATTERN.exec(prompt);
|
|
269
|
+
if (resMatch) params.resolution = resMatch[1].toLowerCase() as "480p" | "720p";
|
|
270
|
+
const aspMatch = ASPECT_PATTERN.exec(prompt);
|
|
271
|
+
if (aspMatch) params.aspectRatio = aspMatch[1];
|
|
272
|
+
return params;
|
|
273
|
+
}
|
package/lib/agentRuntime.js
CHANGED
|
@@ -9,6 +9,8 @@ import { detectImageMimeFromB64 } from "./refs.js";
|
|
|
9
9
|
import { resolveProviderOptions } from "./providerOptions.js";
|
|
10
10
|
import { generateViaResponses } from "./responsesImageAdapter.js";
|
|
11
11
|
import { generateViaGrok } from "./grokImageAdapter.js";
|
|
12
|
+
import { generateVideoViaGrok } from "./grokVideoAdapter.js";
|
|
13
|
+
import { parseVideoParams } from "./agentGenerationPlanner.js";
|
|
12
14
|
import { appendAgentTurn, buildImageContextManifest, getAgentImages, getAgentSession, importAgentImage, recordAgentWebFinding, restartAgentRuntimeSession, } from "./agentStore.js";
|
|
13
15
|
import { AGENT_ALLOWED_TOOLS, } from "./agentTypes.js";
|
|
14
16
|
import { errInfo } from "./errInfo.js";
|
|
@@ -46,7 +48,7 @@ export async function runAgentGenerationPlan(ctx, sessionId, prompt, plan, optio
|
|
|
46
48
|
const webSearchEnabled = options.provider === "grok" ? true : options.webSearchEnabled ?? session.webSearchEnabled;
|
|
47
49
|
const enabledTools = webSearchEnabled
|
|
48
50
|
? [...AGENT_ALLOWED_TOOLS]
|
|
49
|
-
: ["ima2.get_image_context", "ima2.generate_image"];
|
|
51
|
+
: ["ima2.get_image_context", "ima2.generate_image", "ima2.generate_video"];
|
|
50
52
|
assertAgentAllowedTools(enabledTools);
|
|
51
53
|
if (behavior.appendUserTurn !== false) {
|
|
52
54
|
appendAgentTurn({ sessionId, role: "user", text: prompt, status: "complete" });
|
|
@@ -62,6 +64,13 @@ export async function runAgentGenerationPlan(ctx, sessionId, prompt, plan, optio
|
|
|
62
64
|
});
|
|
63
65
|
return { assistantTurn, imageIds: [], webFindingIds: [] };
|
|
64
66
|
}
|
|
67
|
+
if (plan.mode === "video") {
|
|
68
|
+
return runAgentVideoGeneration(ctx, sessionId, prompt, {
|
|
69
|
+
...options,
|
|
70
|
+
requestId: options.requestId ?? `agent_video_${ulid()}`,
|
|
71
|
+
skipUserTurn: true,
|
|
72
|
+
});
|
|
73
|
+
}
|
|
65
74
|
const manifest = buildImageContextManifest(sessionId);
|
|
66
75
|
const contextStartedAt = Date.now();
|
|
67
76
|
appendAgentTurn({
|
|
@@ -306,6 +315,103 @@ async function persistAgentImage(ctx, sessionId, prompt, format, requestId, resp
|
|
|
306
315
|
createdAt: Date.now(),
|
|
307
316
|
});
|
|
308
317
|
}
|
|
318
|
+
export async function runAgentVideoGeneration(ctx, sessionId, prompt, options = {}) {
|
|
319
|
+
const session = getAgentSession(sessionId);
|
|
320
|
+
if (!session)
|
|
321
|
+
throw notFound(sessionId);
|
|
322
|
+
if (!options.skipUserTurn) {
|
|
323
|
+
appendAgentTurn({ sessionId, role: "user", text: prompt, status: "complete" });
|
|
324
|
+
}
|
|
325
|
+
const requestId = options.requestId ?? `agent_video_${ulid()}`;
|
|
326
|
+
const startedAt = Date.now();
|
|
327
|
+
// Auto I2V: if session has a last image, use it as source
|
|
328
|
+
let sourceImage;
|
|
329
|
+
let mode = "text-to-video";
|
|
330
|
+
if (session.lastImageId) {
|
|
331
|
+
const images = getAgentImages(sessionId);
|
|
332
|
+
const lastImage = images.find((img) => img.id === session.lastImageId);
|
|
333
|
+
if (lastImage?.filename && !lastImage.filename.endsWith(".mp4")) {
|
|
334
|
+
try {
|
|
335
|
+
const { loadAssetB64 } = await import("./nodeStore.js");
|
|
336
|
+
sourceImage = await loadAssetB64(ctx.rootDir, lastImage.filename, ctx.config.storage.generatedDir);
|
|
337
|
+
mode = "image-to-video";
|
|
338
|
+
}
|
|
339
|
+
catch { /* fallback to T2V */ }
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
const videoParams = parseVideoParams(prompt);
|
|
343
|
+
const result = await generateVideoViaGrok(prompt, ctx, {
|
|
344
|
+
model: "grok-imagine-video",
|
|
345
|
+
mode,
|
|
346
|
+
sourceImage,
|
|
347
|
+
duration: videoParams.duration ?? 5,
|
|
348
|
+
resolution: videoParams.resolution ?? "480p",
|
|
349
|
+
aspectRatio: (videoParams.aspectRatio ?? "auto"),
|
|
350
|
+
requestId,
|
|
351
|
+
signal: options.signal ?? undefined,
|
|
352
|
+
});
|
|
353
|
+
const video = await persistAgentVideo(ctx, sessionId, prompt, requestId, result);
|
|
354
|
+
const finishedAt = Date.now();
|
|
355
|
+
const toolCall = {
|
|
356
|
+
id: `tc_video_${ulid()}`,
|
|
357
|
+
name: "ima2.generate_video",
|
|
358
|
+
status: "complete",
|
|
359
|
+
startedAt,
|
|
360
|
+
finishedAt,
|
|
361
|
+
durationMs: finishedAt - startedAt,
|
|
362
|
+
requestId,
|
|
363
|
+
inputSummary: prompt,
|
|
364
|
+
outputSummary: `Generated video ${video.filename}.`,
|
|
365
|
+
imageIds: [video.id],
|
|
366
|
+
};
|
|
367
|
+
appendAgentTurn({
|
|
368
|
+
sessionId,
|
|
369
|
+
role: "tool",
|
|
370
|
+
text: "ima2.generate_video",
|
|
371
|
+
imageIds: [video.id],
|
|
372
|
+
status: "complete",
|
|
373
|
+
raw: { toolCalls: [toolCall] },
|
|
374
|
+
});
|
|
375
|
+
const assistantTurn = appendAgentTurn({
|
|
376
|
+
sessionId,
|
|
377
|
+
role: "assistant",
|
|
378
|
+
text: `Generated 1 video artifact. ${result.revisedPrompt}`,
|
|
379
|
+
imageIds: [video.id],
|
|
380
|
+
status: "complete",
|
|
381
|
+
});
|
|
382
|
+
return { assistantTurn, imageIds: [video.id], webFindingIds: [] };
|
|
383
|
+
}
|
|
384
|
+
async function persistAgentVideo(ctx, sessionId, prompt, requestId, result) {
|
|
385
|
+
await mkdir(ctx.config.storage.generatedDir, { recursive: true });
|
|
386
|
+
const rand = randomBytes(ctx.config.ids.generatedHexBytes).toString("hex");
|
|
387
|
+
const filename = `${Date.now()}_${rand}_agent.mp4`;
|
|
388
|
+
const meta = {
|
|
389
|
+
kind: "agent",
|
|
390
|
+
mediaType: "video",
|
|
391
|
+
requestId,
|
|
392
|
+
sessionId,
|
|
393
|
+
prompt,
|
|
394
|
+
userPrompt: prompt,
|
|
395
|
+
revisedPrompt: result.revisedPrompt,
|
|
396
|
+
provider: "grok",
|
|
397
|
+
model: "grok-imagine-video",
|
|
398
|
+
createdAt: Date.now(),
|
|
399
|
+
usage: result.usage,
|
|
400
|
+
webSearchCalls: result.webSearchCalls,
|
|
401
|
+
};
|
|
402
|
+
await writeFile(join(ctx.config.storage.generatedDir, filename), result.videoBuffer);
|
|
403
|
+
await writeFile(join(ctx.config.storage.generatedDir, `${filename}.json`), JSON.stringify(meta)).catch(() => { });
|
|
404
|
+
invalidateHistoryIndex();
|
|
405
|
+
logEvent("agent", "video_saved", { requestId, sessionId, filename });
|
|
406
|
+
return importAgentImage(sessionId, {
|
|
407
|
+
id: `ai_${ulid()}`,
|
|
408
|
+
filename,
|
|
409
|
+
url: `/generated/${filename}`,
|
|
410
|
+
prompt,
|
|
411
|
+
revisedPrompt: result.revisedPrompt,
|
|
412
|
+
createdAt: Date.now(),
|
|
413
|
+
});
|
|
414
|
+
}
|
|
309
415
|
function recordSearchFindings(sessionId, prompt, count, provider) {
|
|
310
416
|
if (!count)
|
|
311
417
|
return [];
|