ima2-gen 2.0.1 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +150 -0
- package/README.md +10 -1
- package/bin/commands/backfillThumbs.js +6 -0
- package/bin/commands/gen.js +6 -0
- package/bin/ima2.js +14 -10
- package/docs/API.md +131 -8
- package/docs/CLI.md +2 -1
- package/docs/FAQ.ko.md +16 -0
- package/docs/FAQ.md +30 -0
- package/docs/README.ko.md +7 -3
- package/docs/migration/runtime-test-inventory.md +15 -1
- package/lib/agentImageVideoGen.js +261 -0
- package/lib/agentRuntime.js +7 -262
- package/lib/agyImageAdapter.js +35 -8
- package/lib/errorClassify.js +8 -7
- package/lib/eventBus.js +71 -0
- package/lib/geminiApiImageAdapter.js +16 -20
- package/lib/generationErrors.js +3 -1
- package/lib/grokImageAdapter.js +68 -129
- package/lib/grokImageCore.js +153 -0
- package/lib/grokMultimodeAdapter.js +5 -3
- package/lib/grokVideoCanvas.js +13 -0
- package/lib/grokVideoPlannerPrompt.js +53 -6
- package/lib/historyList.js +1 -0
- package/lib/inflight.js +54 -17
- package/lib/multimodeHelpers.js +10 -0
- package/lib/nodeHelpers.js +59 -0
- package/lib/oauthProxy/prompts.js +30 -36
- package/lib/promptBuilder/systemPrompt.js +2 -5
- package/lib/promptSafetyPolicy.js +1 -5
- package/lib/responsesFallback.js +2 -1
- package/lib/routeHelpers.js +44 -0
- package/lib/ssePublish.js +12 -0
- package/lib/storyboardPrefix.js +28 -0
- package/lib/thumbBackfill.js +16 -5
- package/package.json +4 -1
- package/routes/agy.js +44 -0
- package/routes/auth.js +6 -2
- package/routes/edit.js +7 -1
- package/routes/events.js +78 -0
- package/routes/generate.js +99 -127
- package/routes/index.js +4 -0
- package/routes/multimode.js +99 -56
- package/routes/nodes.js +59 -103
- package/routes/video.js +100 -17
- package/skills/ima2/SKILL.md +98 -21
- package/ui/dist/.vite/manifest.json +12 -12
- package/ui/dist/assets/{AgentWorkspace-CYv84Rus.js → AgentWorkspace-Dth6YijN.js} +1 -1
- package/ui/dist/assets/{CardNewsWorkspace-Dqyc1WZ1.js → CardNewsWorkspace-Dav3K5CT.js} +1 -1
- package/ui/dist/assets/{NodeCanvas-ChEXzQbb.js → NodeCanvas-C4ifFzB1.js} +1 -1
- package/ui/dist/assets/{PromptBuilderPanel-B95ZufnR.js → PromptBuilderPanel-CEcyU9PL.js} +1 -1
- package/ui/dist/assets/{PromptImportDialog-DGOwFQET.js → PromptImportDialog-CgQ94Gth.js} +2 -2
- package/ui/dist/assets/{PromptImportDiscoverySection-CgvdnR49.js → PromptImportDiscoverySection-CuzyzbNI.js} +1 -1
- package/ui/dist/assets/{PromptImportFolderSection-CfUye9J8.js → PromptImportFolderSection-DHLGlO6l.js} +1 -1
- package/ui/dist/assets/{PromptLibraryPanel-B9kndPw1.js → PromptLibraryPanel-BOe18we8.js} +2 -2
- package/ui/dist/assets/SettingsWorkspace-Cdgnm4Wa.js +1 -0
- package/ui/dist/assets/{index-BhcvL0g-.js → index-C5PSahkr.js} +1 -1
- package/ui/dist/assets/index-Dn2AhL6d.css +1 -0
- package/ui/dist/assets/index-Tjqx6wUV.js +23 -0
- package/ui/dist/index.html +2 -2
- package/ui/dist/assets/SettingsWorkspace-B3tgLrmF.js +0 -1
- package/ui/dist/assets/index-BtK3YhJc.js +0 -39
- package/ui/dist/assets/index-ClOLOjnA.css +0 -1
|
@@ -4,7 +4,7 @@ Generated by `npm run test:inventory` (script: `scripts/classify-tests.mjs`).
|
|
|
4
4
|
|
|
5
5
|
_Tests considered "runtime-importing" if they import from `../lib/`, `../routes/`, `../bin/`, `../server`, or `../config`._
|
|
6
6
|
|
|
7
|
-
Total:
|
|
7
|
+
Total: 191 (runtime: 66, contract: 125)
|
|
8
8
|
|
|
9
9
|
## Runtime-importing tests
|
|
10
10
|
- `tests/agent-mode-auto-planner-contract.test.ts`
|
|
@@ -18,10 +18,14 @@ Total: 177 (runtime: 61, contract: 116)
|
|
|
18
18
|
- `tests/api-provider-parity.test.ts`
|
|
19
19
|
- `tests/billing-source.test.ts`
|
|
20
20
|
- `tests/card-news-contract.test.ts`
|
|
21
|
+
- `tests/card-news-template.test.ts`
|
|
22
|
+
- `tests/classic-generate-async.test.ts`
|
|
21
23
|
- `tests/cli-error-hints.test.ts`
|
|
22
24
|
- `tests/cli-lib.test.ts`
|
|
23
25
|
- `tests/comfy-bridge-contract.test.ts`
|
|
24
26
|
- `tests/error-classify.test.ts`
|
|
27
|
+
- `tests/event-bus.test.ts`
|
|
28
|
+
- `tests/events-channel-contract.test.ts`
|
|
25
29
|
- `tests/generate-route-validation-error.test.ts`
|
|
26
30
|
- `tests/generated-static-privacy.test.ts`
|
|
27
31
|
- `tests/generation-errors.test.ts`
|
|
@@ -36,6 +40,7 @@ Total: 177 (runtime: 61, contract: 116)
|
|
|
36
40
|
- `tests/image-metadata-route.test.ts`
|
|
37
41
|
- `tests/image-metadata-xmp.test.ts`
|
|
38
42
|
- `tests/image-model.test.ts`
|
|
43
|
+
- `tests/inflight-guard-contract.test.ts`
|
|
39
44
|
- `tests/inflight-persistence.test.ts`
|
|
40
45
|
- `tests/inflight.test.ts`
|
|
41
46
|
- `tests/local-import-contract.test.ts`
|
|
@@ -75,6 +80,9 @@ Total: 177 (runtime: 61, contract: 116)
|
|
|
75
80
|
- `tests/agent-mode-right-sidebar-contract.test.js`
|
|
76
81
|
- `tests/agent-mode-tool-folding-contract.test.js`
|
|
77
82
|
- `tests/app-weight-splitting-contract.test.js`
|
|
83
|
+
- `tests/async-capacity-retry-behavior.test.ts`
|
|
84
|
+
- `tests/async-capacity-retry-contract.test.js`
|
|
85
|
+
- `tests/async-stream-subscribe-order.test.js`
|
|
78
86
|
- `tests/background-cleanup-brush-rasterize.test.js`
|
|
79
87
|
- `tests/background-cleanup-mask-compose.test.js`
|
|
80
88
|
- `tests/bin.test.js`
|
|
@@ -123,8 +131,12 @@ Total: 177 (runtime: 61, contract: 116)
|
|
|
123
131
|
- `tests/current-image-actions-readiness-contract.test.js`
|
|
124
132
|
- `tests/direct-mode-visual-contract.test.js`
|
|
125
133
|
- `tests/edit-mask-api-contract.test.js`
|
|
134
|
+
- `tests/frontend-connection-state-contract.test.js`
|
|
135
|
+
- `tests/frontend-sse-risk-contract.test.js`
|
|
136
|
+
- `tests/gallery-hang-regression-contract.test.ts`
|
|
126
137
|
- `tests/gallery-load-older-contract.test.js`
|
|
127
138
|
- `tests/gallery-navigation-ux-contract.test.js`
|
|
139
|
+
- `tests/gallery-selection-during-generation-contract.test.js`
|
|
128
140
|
- `tests/gallery-session-scope-contract.test.js`
|
|
129
141
|
- `tests/gallery-shortcuts-behavior.test.js`
|
|
130
142
|
- `tests/gallery-shortcuts-visible-domain-contract.test.js`
|
|
@@ -145,6 +157,7 @@ Total: 177 (runtime: 61, contract: 116)
|
|
|
145
157
|
- `tests/multimode-backend-contract.test.js`
|
|
146
158
|
- `tests/multimode-concurrent-store-contract.test.js`
|
|
147
159
|
- `tests/multimode-ui-contract.test.js`
|
|
160
|
+
- `tests/node-async-eventbus-contract.test.js`
|
|
148
161
|
- `tests/node-batch-contract.test.js`
|
|
149
162
|
- `tests/node-child-refs-contract.test.js`
|
|
150
163
|
- `tests/node-child-refs-payload.test.js`
|
|
@@ -184,5 +197,6 @@ Total: 177 (runtime: 61, contract: 116)
|
|
|
184
197
|
- `tests/toast-stack-contract.test.js`
|
|
185
198
|
- `tests/ui-error-code-contract.test.js`
|
|
186
199
|
- `tests/video-continuity-ui-contract.test.js`
|
|
200
|
+
- `tests/video-gallery-refresh-contract.test.ts`
|
|
187
201
|
- `tests/vite-dev-port-contract.test.js`
|
|
188
202
|
- `tests/web-search-toggle-contract.test.js`
|
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
import { randomBytes } from "node:crypto";
|
|
2
|
+
import { mkdir, readFile, unlink, writeFile } from "node:fs/promises";
|
|
3
|
+
import { atomicWriteJson } from "./atomicWrite.js";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
import { ulid } from "ulid";
|
|
6
|
+
import { embedImageMetadataBestEffort } from "./imageMetadataStore.js";
|
|
7
|
+
import { invalidateHistoryIndex } from "./historyIndex.js";
|
|
8
|
+
import { logEvent } from "./logger.js";
|
|
9
|
+
import { detectImageMimeFromB64 } from "./refs.js";
|
|
10
|
+
import { resolveProviderOptions } from "./providerOptions.js";
|
|
11
|
+
import { generateViaResponses } from "./responsesImageAdapter.js";
|
|
12
|
+
import { generateViaGrok } from "./grokImageAdapter.js";
|
|
13
|
+
import { generateViaAgy } from "./agyImageAdapter.js";
|
|
14
|
+
import { generateVideoViaGrok } from "./grokVideoAdapter.js";
|
|
15
|
+
import { parseVideoParams } from "./agentGenerationPlanner.js";
|
|
16
|
+
import { appendAgentTurn, getAgentImages, getAgentSession, importAgentImage, } from "./agentStore.js";
|
|
17
|
+
import { errInfo } from "./errInfo.js";
|
|
18
|
+
import { forceImagePrompt, isTextOnlyResult, textOnlyError, notFound } from "./agentRuntime.js";
|
|
19
|
+
export async function generateAgentImageWithRetry(ctx, sessionId, prompt, manifest, webSearchEnabled, options) {
|
|
20
|
+
let lastError = null;
|
|
21
|
+
for (let attempt = 0; attempt < 2; attempt++) {
|
|
22
|
+
try {
|
|
23
|
+
const forcedPrompt = attempt === 0 ? prompt : forceImagePrompt(prompt);
|
|
24
|
+
const result = await generateAgentImage(ctx, sessionId, forcedPrompt, manifest, webSearchEnabled, options);
|
|
25
|
+
if (result.image)
|
|
26
|
+
return result;
|
|
27
|
+
}
|
|
28
|
+
catch (error) {
|
|
29
|
+
lastError = error;
|
|
30
|
+
if (!isTextOnlyResult(error))
|
|
31
|
+
throw error;
|
|
32
|
+
if (attempt === 1)
|
|
33
|
+
break;
|
|
34
|
+
appendAgentTurn({
|
|
35
|
+
sessionId,
|
|
36
|
+
role: "tool",
|
|
37
|
+
text: "ima2.generate_image retry: text-only result rejected",
|
|
38
|
+
status: "error",
|
|
39
|
+
});
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
throw textOnlyError(lastError);
|
|
43
|
+
}
|
|
44
|
+
async function generateAgentImage(ctx, sessionId, prompt, manifest, webSearchEnabled, options) {
|
|
45
|
+
const requestId = options.requestId ?? `agent_${ulid()}`;
|
|
46
|
+
const providerOptions = resolveProviderOptions(ctx, {
|
|
47
|
+
provider: options.provider ?? "oauth",
|
|
48
|
+
rawModel: options.model,
|
|
49
|
+
rawReasoningEffort: options.reasoningEffort,
|
|
50
|
+
rawSize: options.size ?? "1024x1024",
|
|
51
|
+
rawWebSearchEnabled: webSearchEnabled,
|
|
52
|
+
searchMode: webSearchEnabled ? "on" : "off",
|
|
53
|
+
});
|
|
54
|
+
if (providerOptions.error) {
|
|
55
|
+
const err = new Error(providerOptions.error);
|
|
56
|
+
err.code = providerOptions.code;
|
|
57
|
+
err.status = providerOptions.status;
|
|
58
|
+
throw err;
|
|
59
|
+
}
|
|
60
|
+
const activeProvider = providerOptions.provider;
|
|
61
|
+
const effectiveModel = activeProvider === "grok" && options.quality === "high"
|
|
62
|
+
? "grok-imagine-image-quality"
|
|
63
|
+
: providerOptions.model;
|
|
64
|
+
const response = activeProvider === "agy"
|
|
65
|
+
? await generateViaAgy(`${manifest}\n\nUser request:\n${prompt}`, {
|
|
66
|
+
requestId,
|
|
67
|
+
signal: options.signal ?? undefined,
|
|
68
|
+
})
|
|
69
|
+
: activeProvider === "grok"
|
|
70
|
+
? await generateViaGrok(`${manifest}\n\nUser request:\n${prompt}`, ctx, {
|
|
71
|
+
model: effectiveModel,
|
|
72
|
+
size: providerOptions.size,
|
|
73
|
+
requestId,
|
|
74
|
+
signal: options.signal ?? undefined,
|
|
75
|
+
references: await loadAgentCurrentImageReferences(ctx, sessionId),
|
|
76
|
+
})
|
|
77
|
+
: await generateViaResponses(activeProvider, `${manifest}\n\nUser request:\n${prompt}`, options.quality ?? "medium", providerOptions.size, options.moderation ?? "low", [], requestId, "auto", ctx, {
|
|
78
|
+
model: providerOptions.model,
|
|
79
|
+
reasoningEffort: providerOptions.reasoningEffort,
|
|
80
|
+
webSearchEnabled,
|
|
81
|
+
signal: options.signal,
|
|
82
|
+
});
|
|
83
|
+
const format = activeProvider === "grok" || activeProvider === "agy"
|
|
84
|
+
? imageFormatFromMime(("mime" in response ? response.mime : undefined) || detectImageMimeFromB64(response.b64) || "image/jpeg")
|
|
85
|
+
: options.format ?? "png";
|
|
86
|
+
const image = await persistAgentImage(ctx, sessionId, prompt, format, requestId, response, {
|
|
87
|
+
provider: String(activeProvider),
|
|
88
|
+
model: String(effectiveModel),
|
|
89
|
+
});
|
|
90
|
+
const responseText = "text" in response && typeof response.text === "string" ? response.text : null;
|
|
91
|
+
return { image, webSearchCalls: response.webSearchCalls || 0, text: responseText, provider: activeProvider };
|
|
92
|
+
}
|
|
93
|
+
async function loadAgentCurrentImageReferences(ctx, sessionId) {
|
|
94
|
+
const session = getAgentSession(sessionId);
|
|
95
|
+
const currentImage = session?.lastImageId
|
|
96
|
+
? getAgentImages(sessionId).find((image) => image.id === session.lastImageId)
|
|
97
|
+
: null;
|
|
98
|
+
if (!currentImage?.filename)
|
|
99
|
+
return [];
|
|
100
|
+
try {
|
|
101
|
+
const b64 = (await readFile(join(ctx.config.storage.generatedDir, currentImage.filename))).toString("base64");
|
|
102
|
+
const mime = detectImageMimeFromB64(b64);
|
|
103
|
+
return [{ b64, declaredMime: mime, detectedMime: mime }];
|
|
104
|
+
}
|
|
105
|
+
catch (error) {
|
|
106
|
+
const err = errInfo(error);
|
|
107
|
+
logEvent("agent", "grok_ref_missing", { sessionId, filename: currentImage.filename, code: err.code, message: err.message });
|
|
108
|
+
return [];
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
function imageFormatFromMime(mime) {
|
|
112
|
+
if (mime === "image/jpeg")
|
|
113
|
+
return "jpeg";
|
|
114
|
+
if (mime === "image/webp")
|
|
115
|
+
return "webp";
|
|
116
|
+
return "png";
|
|
117
|
+
}
|
|
118
|
+
async function persistAgentImage(ctx, sessionId, prompt, format, requestId, response, generation) {
|
|
119
|
+
await mkdir(ctx.config.storage.generatedDir, { recursive: true });
|
|
120
|
+
const rand = randomBytes(ctx.config.ids.generatedHexBytes).toString("hex");
|
|
121
|
+
const filename = `${Date.now()}_${rand}_agent.${format}`;
|
|
122
|
+
const meta = {
|
|
123
|
+
kind: "agent",
|
|
124
|
+
requestId,
|
|
125
|
+
sessionId,
|
|
126
|
+
prompt,
|
|
127
|
+
userPrompt: prompt,
|
|
128
|
+
revisedPrompt: response.revisedPrompt ?? null,
|
|
129
|
+
provider: generation.provider,
|
|
130
|
+
model: generation.model,
|
|
131
|
+
createdAt: Date.now(),
|
|
132
|
+
usage: response.usage ?? null,
|
|
133
|
+
webSearchCalls: response.webSearchCalls ?? 0,
|
|
134
|
+
};
|
|
135
|
+
const embedded = await embedImageMetadataBestEffort(Buffer.from(response.b64, "base64"), format, meta, {
|
|
136
|
+
version: ctx.packageVersion,
|
|
137
|
+
});
|
|
138
|
+
const filePath = join(ctx.config.storage.generatedDir, filename);
|
|
139
|
+
await writeFile(filePath, embedded.buffer);
|
|
140
|
+
try {
|
|
141
|
+
await atomicWriteJson(`${filePath}.json`, meta);
|
|
142
|
+
}
|
|
143
|
+
catch (err) {
|
|
144
|
+
await unlink(filePath).catch(() => { });
|
|
145
|
+
throw err;
|
|
146
|
+
}
|
|
147
|
+
invalidateHistoryIndex();
|
|
148
|
+
logEvent("agent", "saved", { requestId, sessionId, filename });
|
|
149
|
+
return importAgentImage(sessionId, {
|
|
150
|
+
id: `ai_${ulid()}`,
|
|
151
|
+
filename,
|
|
152
|
+
url: `/generated/${filename}`,
|
|
153
|
+
prompt,
|
|
154
|
+
revisedPrompt: response.revisedPrompt ?? null,
|
|
155
|
+
createdAt: Date.now(),
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
export async function runAgentVideoGeneration(ctx, sessionId, prompt, options = {}) {
|
|
159
|
+
const session = getAgentSession(sessionId);
|
|
160
|
+
if (!session)
|
|
161
|
+
throw notFound(sessionId);
|
|
162
|
+
if (!options.skipUserTurn) {
|
|
163
|
+
appendAgentTurn({ sessionId, role: "user", text: prompt, status: "complete" });
|
|
164
|
+
}
|
|
165
|
+
const requestId = options.requestId ?? `agent_video_${ulid()}`;
|
|
166
|
+
const startedAt = Date.now();
|
|
167
|
+
// Auto I2V: if session has a last image, use it as source
|
|
168
|
+
let sourceImage;
|
|
169
|
+
let mode = "text-to-video";
|
|
170
|
+
if (session.lastImageId) {
|
|
171
|
+
const images = getAgentImages(sessionId);
|
|
172
|
+
const lastImage = images.find((img) => img.id === session.lastImageId);
|
|
173
|
+
if (lastImage?.filename && !lastImage.filename.endsWith(".mp4")) {
|
|
174
|
+
try {
|
|
175
|
+
const { loadAssetB64 } = await import("./nodeStore.js");
|
|
176
|
+
sourceImage = await loadAssetB64(ctx.rootDir, lastImage.filename, ctx.config.storage.generatedDir);
|
|
177
|
+
mode = "image-to-video";
|
|
178
|
+
}
|
|
179
|
+
catch { /* fallback to T2V */ }
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
const videoParams = parseVideoParams(prompt);
|
|
183
|
+
const result = await generateVideoViaGrok(prompt, ctx, {
|
|
184
|
+
model: "grok-imagine-video",
|
|
185
|
+
mode,
|
|
186
|
+
sourceImage,
|
|
187
|
+
duration: videoParams.duration ?? 5,
|
|
188
|
+
resolution: videoParams.resolution ?? "480p",
|
|
189
|
+
aspectRatio: (videoParams.aspectRatio ?? "auto"),
|
|
190
|
+
requestId,
|
|
191
|
+
signal: options.signal ?? undefined,
|
|
192
|
+
});
|
|
193
|
+
const video = await persistAgentVideo(ctx, sessionId, prompt, requestId, result);
|
|
194
|
+
const finishedAt = Date.now();
|
|
195
|
+
const toolCall = {
|
|
196
|
+
id: `tc_video_${ulid()}`,
|
|
197
|
+
name: "ima2.generate_video",
|
|
198
|
+
status: "complete",
|
|
199
|
+
startedAt,
|
|
200
|
+
finishedAt,
|
|
201
|
+
durationMs: finishedAt - startedAt,
|
|
202
|
+
requestId,
|
|
203
|
+
inputSummary: prompt,
|
|
204
|
+
outputSummary: `Generated video ${video.filename}.`,
|
|
205
|
+
imageIds: [video.id],
|
|
206
|
+
};
|
|
207
|
+
appendAgentTurn({
|
|
208
|
+
sessionId,
|
|
209
|
+
role: "tool",
|
|
210
|
+
text: "ima2.generate_video",
|
|
211
|
+
imageIds: [video.id],
|
|
212
|
+
status: "complete",
|
|
213
|
+
raw: { toolCalls: [toolCall] },
|
|
214
|
+
});
|
|
215
|
+
const assistantTurn = appendAgentTurn({
|
|
216
|
+
sessionId,
|
|
217
|
+
role: "assistant",
|
|
218
|
+
text: `Generated 1 video artifact. ${result.revisedPrompt}`,
|
|
219
|
+
imageIds: [video.id],
|
|
220
|
+
status: "complete",
|
|
221
|
+
});
|
|
222
|
+
return { assistantTurn, imageIds: [video.id], webFindingIds: [] };
|
|
223
|
+
}
|
|
224
|
+
async function persistAgentVideo(ctx, sessionId, prompt, requestId, result) {
|
|
225
|
+
await mkdir(ctx.config.storage.generatedDir, { recursive: true });
|
|
226
|
+
const rand = randomBytes(ctx.config.ids.generatedHexBytes).toString("hex");
|
|
227
|
+
const filename = `${Date.now()}_${rand}_agent.mp4`;
|
|
228
|
+
const meta = {
|
|
229
|
+
kind: "agent",
|
|
230
|
+
mediaType: "video",
|
|
231
|
+
requestId,
|
|
232
|
+
sessionId,
|
|
233
|
+
prompt,
|
|
234
|
+
userPrompt: prompt,
|
|
235
|
+
revisedPrompt: result.revisedPrompt,
|
|
236
|
+
provider: "grok",
|
|
237
|
+
model: "grok-imagine-video",
|
|
238
|
+
createdAt: Date.now(),
|
|
239
|
+
usage: result.usage,
|
|
240
|
+
webSearchCalls: result.webSearchCalls,
|
|
241
|
+
};
|
|
242
|
+
const filePath = join(ctx.config.storage.generatedDir, filename);
|
|
243
|
+
await writeFile(filePath, result.videoBuffer);
|
|
244
|
+
try {
|
|
245
|
+
await atomicWriteJson(`${filePath}.json`, meta);
|
|
246
|
+
}
|
|
247
|
+
catch (err) {
|
|
248
|
+
await unlink(filePath).catch(() => { });
|
|
249
|
+
throw err;
|
|
250
|
+
}
|
|
251
|
+
invalidateHistoryIndex();
|
|
252
|
+
logEvent("agent", "video_saved", { requestId, sessionId, filename });
|
|
253
|
+
return importAgentImage(sessionId, {
|
|
254
|
+
id: `ai_${ulid()}`,
|
|
255
|
+
filename,
|
|
256
|
+
url: `/generated/${filename}`,
|
|
257
|
+
prompt,
|
|
258
|
+
revisedPrompt: result.revisedPrompt,
|
|
259
|
+
createdAt: Date.now(),
|
|
260
|
+
});
|
|
261
|
+
}
|
package/lib/agentRuntime.js
CHANGED
|
@@ -1,19 +1,7 @@
|
|
|
1
|
-
import { randomBytes } from "node:crypto";
|
|
2
|
-
import { mkdir, readFile, unlink, writeFile } from "node:fs/promises";
|
|
3
|
-
import { atomicWriteJson } from "./atomicWrite.js";
|
|
4
|
-
import { join } from "node:path";
|
|
5
1
|
import { ulid } from "ulid";
|
|
6
|
-
import {
|
|
7
|
-
import {
|
|
8
|
-
import {
|
|
9
|
-
import { detectImageMimeFromB64 } from "./refs.js";
|
|
10
|
-
import { resolveProviderOptions } from "./providerOptions.js";
|
|
11
|
-
import { generateViaResponses } from "./responsesImageAdapter.js";
|
|
12
|
-
import { generateViaGrok } from "./grokImageAdapter.js";
|
|
13
|
-
import { generateViaAgy } from "./agyImageAdapter.js";
|
|
14
|
-
import { generateVideoViaGrok } from "./grokVideoAdapter.js";
|
|
15
|
-
import { parseVideoParams } from "./agentGenerationPlanner.js";
|
|
16
|
-
import { appendAgentTurn, buildImageContextManifest, getAgentImages, getAgentSession, importAgentImage, recordAgentWebFinding, restartAgentRuntimeSession, } from "./agentStore.js";
|
|
2
|
+
import { generateAgentImageWithRetry } from "./agentImageVideoGen.js";
|
|
3
|
+
import { runAgentVideoGeneration } from "./agentImageVideoGen.js";
|
|
4
|
+
import { appendAgentTurn, buildImageContextManifest, getAgentSession, recordAgentWebFinding, restartAgentRuntimeSession, } from "./agentStore.js";
|
|
17
5
|
import { AGENT_ALLOWED_TOOLS, } from "./agentTypes.js";
|
|
18
6
|
import { errInfo } from "./errInfo.js";
|
|
19
7
|
export function assertAgentAllowedTools(tools) {
|
|
@@ -190,249 +178,6 @@ export function isRuntimeRestartableError(error) {
|
|
|
190
178
|
code.includes("PROTOCOL") ||
|
|
191
179
|
err.message.toLowerCase().includes("protocol wedge"));
|
|
192
180
|
}
|
|
193
|
-
async function generateAgentImageWithRetry(ctx, sessionId, prompt, manifest, webSearchEnabled, options) {
|
|
194
|
-
let lastError = null;
|
|
195
|
-
for (let attempt = 0; attempt < 2; attempt++) {
|
|
196
|
-
try {
|
|
197
|
-
const forcedPrompt = attempt === 0 ? prompt : forceImagePrompt(prompt);
|
|
198
|
-
const result = await generateAgentImage(ctx, sessionId, forcedPrompt, manifest, webSearchEnabled, options);
|
|
199
|
-
if (result.image)
|
|
200
|
-
return result;
|
|
201
|
-
}
|
|
202
|
-
catch (error) {
|
|
203
|
-
lastError = error;
|
|
204
|
-
if (!isTextOnlyResult(error))
|
|
205
|
-
throw error;
|
|
206
|
-
if (attempt === 1)
|
|
207
|
-
break;
|
|
208
|
-
appendAgentTurn({
|
|
209
|
-
sessionId,
|
|
210
|
-
role: "tool",
|
|
211
|
-
text: "ima2.generate_image retry: text-only result rejected",
|
|
212
|
-
status: "error",
|
|
213
|
-
});
|
|
214
|
-
}
|
|
215
|
-
}
|
|
216
|
-
throw textOnlyError(lastError);
|
|
217
|
-
}
|
|
218
|
-
async function generateAgentImage(ctx, sessionId, prompt, manifest, webSearchEnabled, options) {
|
|
219
|
-
const requestId = options.requestId ?? `agent_${ulid()}`;
|
|
220
|
-
const providerOptions = resolveProviderOptions(ctx, {
|
|
221
|
-
provider: options.provider ?? "oauth",
|
|
222
|
-
rawModel: options.model,
|
|
223
|
-
rawReasoningEffort: options.reasoningEffort,
|
|
224
|
-
rawSize: options.size ?? "1024x1024",
|
|
225
|
-
rawWebSearchEnabled: webSearchEnabled,
|
|
226
|
-
searchMode: webSearchEnabled ? "on" : "off",
|
|
227
|
-
});
|
|
228
|
-
if (providerOptions.error) {
|
|
229
|
-
const err = new Error(providerOptions.error);
|
|
230
|
-
err.code = providerOptions.code;
|
|
231
|
-
err.status = providerOptions.status;
|
|
232
|
-
throw err;
|
|
233
|
-
}
|
|
234
|
-
const activeProvider = providerOptions.provider;
|
|
235
|
-
const effectiveModel = activeProvider === "grok" && options.quality === "high"
|
|
236
|
-
? "grok-imagine-image-quality"
|
|
237
|
-
: providerOptions.model;
|
|
238
|
-
const response = activeProvider === "agy"
|
|
239
|
-
? await generateViaAgy(`${manifest}\n\nUser request:\n${prompt}`, {
|
|
240
|
-
requestId,
|
|
241
|
-
signal: options.signal ?? undefined,
|
|
242
|
-
})
|
|
243
|
-
: activeProvider === "grok"
|
|
244
|
-
? await generateViaGrok(`${manifest}\n\nUser request:\n${prompt}`, ctx, {
|
|
245
|
-
model: effectiveModel,
|
|
246
|
-
size: providerOptions.size,
|
|
247
|
-
requestId,
|
|
248
|
-
signal: options.signal ?? undefined,
|
|
249
|
-
references: await loadAgentCurrentImageReferences(ctx, sessionId),
|
|
250
|
-
})
|
|
251
|
-
: await generateViaResponses(activeProvider, `${manifest}\n\nUser request:\n${prompt}`, options.quality ?? "medium", providerOptions.size, options.moderation ?? "low", [], requestId, "auto", ctx, {
|
|
252
|
-
model: providerOptions.model,
|
|
253
|
-
reasoningEffort: providerOptions.reasoningEffort,
|
|
254
|
-
webSearchEnabled,
|
|
255
|
-
signal: options.signal,
|
|
256
|
-
});
|
|
257
|
-
const format = activeProvider === "grok" || activeProvider === "agy"
|
|
258
|
-
? imageFormatFromMime(("mime" in response ? response.mime : undefined) || detectImageMimeFromB64(response.b64) || "image/jpeg")
|
|
259
|
-
: options.format ?? "png";
|
|
260
|
-
const image = await persistAgentImage(ctx, sessionId, prompt, format, requestId, response, {
|
|
261
|
-
provider: String(activeProvider),
|
|
262
|
-
model: String(effectiveModel),
|
|
263
|
-
});
|
|
264
|
-
const responseText = "text" in response && typeof response.text === "string" ? response.text : null;
|
|
265
|
-
return { image, webSearchCalls: response.webSearchCalls || 0, text: responseText, provider: activeProvider };
|
|
266
|
-
}
|
|
267
|
-
async function loadAgentCurrentImageReferences(ctx, sessionId) {
|
|
268
|
-
const session = getAgentSession(sessionId);
|
|
269
|
-
const currentImage = session?.lastImageId
|
|
270
|
-
? getAgentImages(sessionId).find((image) => image.id === session.lastImageId)
|
|
271
|
-
: null;
|
|
272
|
-
if (!currentImage?.filename)
|
|
273
|
-
return [];
|
|
274
|
-
try {
|
|
275
|
-
const b64 = (await readFile(join(ctx.config.storage.generatedDir, currentImage.filename))).toString("base64");
|
|
276
|
-
const mime = detectImageMimeFromB64(b64);
|
|
277
|
-
return [{ b64, declaredMime: mime, detectedMime: mime }];
|
|
278
|
-
}
|
|
279
|
-
catch (error) {
|
|
280
|
-
const err = errInfo(error);
|
|
281
|
-
logEvent("agent", "grok_ref_missing", { sessionId, filename: currentImage.filename, code: err.code, message: err.message });
|
|
282
|
-
return [];
|
|
283
|
-
}
|
|
284
|
-
}
|
|
285
|
-
function imageFormatFromMime(mime) {
|
|
286
|
-
if (mime === "image/jpeg")
|
|
287
|
-
return "jpeg";
|
|
288
|
-
if (mime === "image/webp")
|
|
289
|
-
return "webp";
|
|
290
|
-
return "png";
|
|
291
|
-
}
|
|
292
|
-
async function persistAgentImage(ctx, sessionId, prompt, format, requestId, response, generation) {
|
|
293
|
-
await mkdir(ctx.config.storage.generatedDir, { recursive: true });
|
|
294
|
-
const rand = randomBytes(ctx.config.ids.generatedHexBytes).toString("hex");
|
|
295
|
-
const filename = `${Date.now()}_${rand}_agent.${format}`;
|
|
296
|
-
const meta = {
|
|
297
|
-
kind: "agent",
|
|
298
|
-
requestId,
|
|
299
|
-
sessionId,
|
|
300
|
-
prompt,
|
|
301
|
-
userPrompt: prompt,
|
|
302
|
-
revisedPrompt: response.revisedPrompt ?? null,
|
|
303
|
-
provider: generation.provider,
|
|
304
|
-
model: generation.model,
|
|
305
|
-
createdAt: Date.now(),
|
|
306
|
-
usage: response.usage ?? null,
|
|
307
|
-
webSearchCalls: response.webSearchCalls ?? 0,
|
|
308
|
-
};
|
|
309
|
-
const embedded = await embedImageMetadataBestEffort(Buffer.from(response.b64, "base64"), format, meta, {
|
|
310
|
-
version: ctx.packageVersion,
|
|
311
|
-
});
|
|
312
|
-
const filePath = join(ctx.config.storage.generatedDir, filename);
|
|
313
|
-
await writeFile(filePath, embedded.buffer);
|
|
314
|
-
try {
|
|
315
|
-
await atomicWriteJson(`${filePath}.json`, meta);
|
|
316
|
-
}
|
|
317
|
-
catch (err) {
|
|
318
|
-
await unlink(filePath).catch(() => { });
|
|
319
|
-
throw err;
|
|
320
|
-
}
|
|
321
|
-
invalidateHistoryIndex();
|
|
322
|
-
logEvent("agent", "saved", { requestId, sessionId, filename });
|
|
323
|
-
return importAgentImage(sessionId, {
|
|
324
|
-
id: `ai_${ulid()}`,
|
|
325
|
-
filename,
|
|
326
|
-
url: `/generated/${filename}`,
|
|
327
|
-
prompt,
|
|
328
|
-
revisedPrompt: response.revisedPrompt ?? null,
|
|
329
|
-
createdAt: Date.now(),
|
|
330
|
-
});
|
|
331
|
-
}
|
|
332
|
-
export async function runAgentVideoGeneration(ctx, sessionId, prompt, options = {}) {
|
|
333
|
-
const session = getAgentSession(sessionId);
|
|
334
|
-
if (!session)
|
|
335
|
-
throw notFound(sessionId);
|
|
336
|
-
if (!options.skipUserTurn) {
|
|
337
|
-
appendAgentTurn({ sessionId, role: "user", text: prompt, status: "complete" });
|
|
338
|
-
}
|
|
339
|
-
const requestId = options.requestId ?? `agent_video_${ulid()}`;
|
|
340
|
-
const startedAt = Date.now();
|
|
341
|
-
// Auto I2V: if session has a last image, use it as source
|
|
342
|
-
let sourceImage;
|
|
343
|
-
let mode = "text-to-video";
|
|
344
|
-
if (session.lastImageId) {
|
|
345
|
-
const images = getAgentImages(sessionId);
|
|
346
|
-
const lastImage = images.find((img) => img.id === session.lastImageId);
|
|
347
|
-
if (lastImage?.filename && !lastImage.filename.endsWith(".mp4")) {
|
|
348
|
-
try {
|
|
349
|
-
const { loadAssetB64 } = await import("./nodeStore.js");
|
|
350
|
-
sourceImage = await loadAssetB64(ctx.rootDir, lastImage.filename, ctx.config.storage.generatedDir);
|
|
351
|
-
mode = "image-to-video";
|
|
352
|
-
}
|
|
353
|
-
catch { /* fallback to T2V */ }
|
|
354
|
-
}
|
|
355
|
-
}
|
|
356
|
-
const videoParams = parseVideoParams(prompt);
|
|
357
|
-
const result = await generateVideoViaGrok(prompt, ctx, {
|
|
358
|
-
model: "grok-imagine-video",
|
|
359
|
-
mode,
|
|
360
|
-
sourceImage,
|
|
361
|
-
duration: videoParams.duration ?? 5,
|
|
362
|
-
resolution: videoParams.resolution ?? "480p",
|
|
363
|
-
aspectRatio: (videoParams.aspectRatio ?? "auto"),
|
|
364
|
-
requestId,
|
|
365
|
-
signal: options.signal ?? undefined,
|
|
366
|
-
});
|
|
367
|
-
const video = await persistAgentVideo(ctx, sessionId, prompt, requestId, result);
|
|
368
|
-
const finishedAt = Date.now();
|
|
369
|
-
const toolCall = {
|
|
370
|
-
id: `tc_video_${ulid()}`,
|
|
371
|
-
name: "ima2.generate_video",
|
|
372
|
-
status: "complete",
|
|
373
|
-
startedAt,
|
|
374
|
-
finishedAt,
|
|
375
|
-
durationMs: finishedAt - startedAt,
|
|
376
|
-
requestId,
|
|
377
|
-
inputSummary: prompt,
|
|
378
|
-
outputSummary: `Generated video ${video.filename}.`,
|
|
379
|
-
imageIds: [video.id],
|
|
380
|
-
};
|
|
381
|
-
appendAgentTurn({
|
|
382
|
-
sessionId,
|
|
383
|
-
role: "tool",
|
|
384
|
-
text: "ima2.generate_video",
|
|
385
|
-
imageIds: [video.id],
|
|
386
|
-
status: "complete",
|
|
387
|
-
raw: { toolCalls: [toolCall] },
|
|
388
|
-
});
|
|
389
|
-
const assistantTurn = appendAgentTurn({
|
|
390
|
-
sessionId,
|
|
391
|
-
role: "assistant",
|
|
392
|
-
text: `Generated 1 video artifact. ${result.revisedPrompt}`,
|
|
393
|
-
imageIds: [video.id],
|
|
394
|
-
status: "complete",
|
|
395
|
-
});
|
|
396
|
-
return { assistantTurn, imageIds: [video.id], webFindingIds: [] };
|
|
397
|
-
}
|
|
398
|
-
async function persistAgentVideo(ctx, sessionId, prompt, requestId, result) {
|
|
399
|
-
await mkdir(ctx.config.storage.generatedDir, { recursive: true });
|
|
400
|
-
const rand = randomBytes(ctx.config.ids.generatedHexBytes).toString("hex");
|
|
401
|
-
const filename = `${Date.now()}_${rand}_agent.mp4`;
|
|
402
|
-
const meta = {
|
|
403
|
-
kind: "agent",
|
|
404
|
-
mediaType: "video",
|
|
405
|
-
requestId,
|
|
406
|
-
sessionId,
|
|
407
|
-
prompt,
|
|
408
|
-
userPrompt: prompt,
|
|
409
|
-
revisedPrompt: result.revisedPrompt,
|
|
410
|
-
provider: "grok",
|
|
411
|
-
model: "grok-imagine-video",
|
|
412
|
-
createdAt: Date.now(),
|
|
413
|
-
usage: result.usage,
|
|
414
|
-
webSearchCalls: result.webSearchCalls,
|
|
415
|
-
};
|
|
416
|
-
const filePath = join(ctx.config.storage.generatedDir, filename);
|
|
417
|
-
await writeFile(filePath, result.videoBuffer);
|
|
418
|
-
try {
|
|
419
|
-
await atomicWriteJson(`${filePath}.json`, meta);
|
|
420
|
-
}
|
|
421
|
-
catch (err) {
|
|
422
|
-
await unlink(filePath).catch(() => { });
|
|
423
|
-
throw err;
|
|
424
|
-
}
|
|
425
|
-
invalidateHistoryIndex();
|
|
426
|
-
logEvent("agent", "video_saved", { requestId, sessionId, filename });
|
|
427
|
-
return importAgentImage(sessionId, {
|
|
428
|
-
id: `ai_${ulid()}`,
|
|
429
|
-
filename,
|
|
430
|
-
url: `/generated/${filename}`,
|
|
431
|
-
prompt,
|
|
432
|
-
revisedPrompt: result.revisedPrompt,
|
|
433
|
-
createdAt: Date.now(),
|
|
434
|
-
});
|
|
435
|
-
}
|
|
436
181
|
function recordSearchFindings(sessionId, prompt, count, provider) {
|
|
437
182
|
if (!count)
|
|
438
183
|
return [];
|
|
@@ -446,14 +191,14 @@ function recordSearchFindings(sessionId, prompt, count, provider) {
|
|
|
446
191
|
}),
|
|
447
192
|
];
|
|
448
193
|
}
|
|
449
|
-
function forceImagePrompt(prompt) {
|
|
194
|
+
export function forceImagePrompt(prompt) {
|
|
450
195
|
return [
|
|
451
196
|
"The previous turn did not return an image artifact.",
|
|
452
197
|
"Return a final image using ima2.generate_image/image_generation now.",
|
|
453
198
|
`User request: ${prompt}`,
|
|
454
199
|
].join("\n");
|
|
455
200
|
}
|
|
456
|
-
function isTextOnlyResult(error) {
|
|
201
|
+
export function isTextOnlyResult(error) {
|
|
457
202
|
const err = errInfo(error);
|
|
458
203
|
return [
|
|
459
204
|
"EMPTY_RESPONSE",
|
|
@@ -462,7 +207,7 @@ function isTextOnlyResult(error) {
|
|
|
462
207
|
"IMAGE_TOOL_COMPLETED_WITHOUT_RESULT",
|
|
463
208
|
].includes(err.code || "") || err.message.includes("No image data");
|
|
464
209
|
}
|
|
465
|
-
function textOnlyError(cause) {
|
|
210
|
+
export function textOnlyError(cause) {
|
|
466
211
|
const err = new Error("Agent result did not include an image artifact.");
|
|
467
212
|
err.code = "AGENT_TEXT_ONLY_RESULT";
|
|
468
213
|
err.status = 422;
|
|
@@ -488,7 +233,7 @@ function cleanParallelism(value) {
|
|
|
488
233
|
return 2;
|
|
489
234
|
return Math.max(1, Math.min(8, Math.round(numeric)));
|
|
490
235
|
}
|
|
491
|
-
function notFound(sessionId) {
|
|
236
|
+
export function notFound(sessionId) {
|
|
492
237
|
const err = new Error(`Agent session not found: ${sessionId}`);
|
|
493
238
|
err.code = "AGENT_SESSION_NOT_FOUND";
|
|
494
239
|
err.status = 404;
|