ima2-gen 2.0.1 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/CHANGELOG.md +150 -0
  2. package/README.md +10 -1
  3. package/bin/commands/backfillThumbs.js +6 -0
  4. package/bin/commands/gen.js +6 -0
  5. package/bin/ima2.js +14 -10
  6. package/docs/API.md +131 -8
  7. package/docs/CLI.md +2 -1
  8. package/docs/FAQ.ko.md +16 -0
  9. package/docs/FAQ.md +30 -0
  10. package/docs/README.ko.md +7 -3
  11. package/docs/migration/runtime-test-inventory.md +15 -1
  12. package/lib/agentImageVideoGen.js +261 -0
  13. package/lib/agentRuntime.js +7 -262
  14. package/lib/agyImageAdapter.js +35 -8
  15. package/lib/errorClassify.js +8 -7
  16. package/lib/eventBus.js +71 -0
  17. package/lib/geminiApiImageAdapter.js +16 -20
  18. package/lib/generationErrors.js +3 -1
  19. package/lib/grokImageAdapter.js +68 -129
  20. package/lib/grokImageCore.js +153 -0
  21. package/lib/grokMultimodeAdapter.js +5 -3
  22. package/lib/grokVideoCanvas.js +13 -0
  23. package/lib/grokVideoPlannerPrompt.js +53 -6
  24. package/lib/historyList.js +1 -0
  25. package/lib/inflight.js +54 -17
  26. package/lib/multimodeHelpers.js +10 -0
  27. package/lib/nodeHelpers.js +59 -0
  28. package/lib/oauthProxy/prompts.js +30 -36
  29. package/lib/promptBuilder/systemPrompt.js +2 -5
  30. package/lib/promptSafetyPolicy.js +1 -5
  31. package/lib/responsesFallback.js +2 -1
  32. package/lib/routeHelpers.js +44 -0
  33. package/lib/ssePublish.js +12 -0
  34. package/lib/storyboardPrefix.js +28 -0
  35. package/lib/thumbBackfill.js +16 -5
  36. package/package.json +4 -1
  37. package/routes/agy.js +44 -0
  38. package/routes/auth.js +6 -2
  39. package/routes/edit.js +7 -1
  40. package/routes/events.js +78 -0
  41. package/routes/generate.js +99 -127
  42. package/routes/index.js +4 -0
  43. package/routes/multimode.js +99 -56
  44. package/routes/nodes.js +59 -103
  45. package/routes/video.js +100 -17
  46. package/skills/ima2/SKILL.md +98 -21
  47. package/ui/dist/.vite/manifest.json +12 -12
  48. package/ui/dist/assets/{AgentWorkspace-CYv84Rus.js → AgentWorkspace-Dth6YijN.js} +1 -1
  49. package/ui/dist/assets/{CardNewsWorkspace-Dqyc1WZ1.js → CardNewsWorkspace-Dav3K5CT.js} +1 -1
  50. package/ui/dist/assets/{NodeCanvas-ChEXzQbb.js → NodeCanvas-C4ifFzB1.js} +1 -1
  51. package/ui/dist/assets/{PromptBuilderPanel-B95ZufnR.js → PromptBuilderPanel-CEcyU9PL.js} +1 -1
  52. package/ui/dist/assets/{PromptImportDialog-DGOwFQET.js → PromptImportDialog-CgQ94Gth.js} +2 -2
  53. package/ui/dist/assets/{PromptImportDiscoverySection-CgvdnR49.js → PromptImportDiscoverySection-CuzyzbNI.js} +1 -1
  54. package/ui/dist/assets/{PromptImportFolderSection-CfUye9J8.js → PromptImportFolderSection-DHLGlO6l.js} +1 -1
  55. package/ui/dist/assets/{PromptLibraryPanel-B9kndPw1.js → PromptLibraryPanel-BOe18we8.js} +2 -2
  56. package/ui/dist/assets/SettingsWorkspace-Cdgnm4Wa.js +1 -0
  57. package/ui/dist/assets/{index-BhcvL0g-.js → index-C5PSahkr.js} +1 -1
  58. package/ui/dist/assets/index-Dn2AhL6d.css +1 -0
  59. package/ui/dist/assets/index-Tjqx6wUV.js +23 -0
  60. package/ui/dist/index.html +2 -2
  61. package/ui/dist/assets/SettingsWorkspace-B3tgLrmF.js +0 -1
  62. package/ui/dist/assets/index-BtK3YhJc.js +0 -39
  63. package/ui/dist/assets/index-ClOLOjnA.css +0 -1
@@ -4,7 +4,7 @@ Generated by `npm run test:inventory` (script: `scripts/classify-tests.mjs`).
4
4
 
5
5
  _Tests considered "runtime-importing" if they import from `../lib/`, `../routes/`, `../bin/`, `../server`, or `../config`._
6
6
 
7
- Total: 177 (runtime: 61, contract: 116)
7
+ Total: 191 (runtime: 66, contract: 125)
8
8
 
9
9
  ## Runtime-importing tests
10
10
  - `tests/agent-mode-auto-planner-contract.test.ts`
@@ -18,10 +18,14 @@ Total: 177 (runtime: 61, contract: 116)
18
18
  - `tests/api-provider-parity.test.ts`
19
19
  - `tests/billing-source.test.ts`
20
20
  - `tests/card-news-contract.test.ts`
21
+ - `tests/card-news-template.test.ts`
22
+ - `tests/classic-generate-async.test.ts`
21
23
  - `tests/cli-error-hints.test.ts`
22
24
  - `tests/cli-lib.test.ts`
23
25
  - `tests/comfy-bridge-contract.test.ts`
24
26
  - `tests/error-classify.test.ts`
27
+ - `tests/event-bus.test.ts`
28
+ - `tests/events-channel-contract.test.ts`
25
29
  - `tests/generate-route-validation-error.test.ts`
26
30
  - `tests/generated-static-privacy.test.ts`
27
31
  - `tests/generation-errors.test.ts`
@@ -36,6 +40,7 @@ Total: 177 (runtime: 61, contract: 116)
36
40
  - `tests/image-metadata-route.test.ts`
37
41
  - `tests/image-metadata-xmp.test.ts`
38
42
  - `tests/image-model.test.ts`
43
+ - `tests/inflight-guard-contract.test.ts`
39
44
  - `tests/inflight-persistence.test.ts`
40
45
  - `tests/inflight.test.ts`
41
46
  - `tests/local-import-contract.test.ts`
@@ -75,6 +80,9 @@ Total: 177 (runtime: 61, contract: 116)
75
80
  - `tests/agent-mode-right-sidebar-contract.test.js`
76
81
  - `tests/agent-mode-tool-folding-contract.test.js`
77
82
  - `tests/app-weight-splitting-contract.test.js`
83
+ - `tests/async-capacity-retry-behavior.test.ts`
84
+ - `tests/async-capacity-retry-contract.test.js`
85
+ - `tests/async-stream-subscribe-order.test.js`
78
86
  - `tests/background-cleanup-brush-rasterize.test.js`
79
87
  - `tests/background-cleanup-mask-compose.test.js`
80
88
  - `tests/bin.test.js`
@@ -123,8 +131,12 @@ Total: 177 (runtime: 61, contract: 116)
123
131
  - `tests/current-image-actions-readiness-contract.test.js`
124
132
  - `tests/direct-mode-visual-contract.test.js`
125
133
  - `tests/edit-mask-api-contract.test.js`
134
+ - `tests/frontend-connection-state-contract.test.js`
135
+ - `tests/frontend-sse-risk-contract.test.js`
136
+ - `tests/gallery-hang-regression-contract.test.ts`
126
137
  - `tests/gallery-load-older-contract.test.js`
127
138
  - `tests/gallery-navigation-ux-contract.test.js`
139
+ - `tests/gallery-selection-during-generation-contract.test.js`
128
140
  - `tests/gallery-session-scope-contract.test.js`
129
141
  - `tests/gallery-shortcuts-behavior.test.js`
130
142
  - `tests/gallery-shortcuts-visible-domain-contract.test.js`
@@ -145,6 +157,7 @@ Total: 177 (runtime: 61, contract: 116)
145
157
  - `tests/multimode-backend-contract.test.js`
146
158
  - `tests/multimode-concurrent-store-contract.test.js`
147
159
  - `tests/multimode-ui-contract.test.js`
160
+ - `tests/node-async-eventbus-contract.test.js`
148
161
  - `tests/node-batch-contract.test.js`
149
162
  - `tests/node-child-refs-contract.test.js`
150
163
  - `tests/node-child-refs-payload.test.js`
@@ -184,5 +197,6 @@ Total: 177 (runtime: 61, contract: 116)
184
197
  - `tests/toast-stack-contract.test.js`
185
198
  - `tests/ui-error-code-contract.test.js`
186
199
  - `tests/video-continuity-ui-contract.test.js`
200
+ - `tests/video-gallery-refresh-contract.test.ts`
187
201
  - `tests/vite-dev-port-contract.test.js`
188
202
  - `tests/web-search-toggle-contract.test.js`
@@ -0,0 +1,261 @@
1
+ import { randomBytes } from "node:crypto";
2
+ import { mkdir, readFile, unlink, writeFile } from "node:fs/promises";
3
+ import { atomicWriteJson } from "./atomicWrite.js";
4
+ import { join } from "node:path";
5
+ import { ulid } from "ulid";
6
+ import { embedImageMetadataBestEffort } from "./imageMetadataStore.js";
7
+ import { invalidateHistoryIndex } from "./historyIndex.js";
8
+ import { logEvent } from "./logger.js";
9
+ import { detectImageMimeFromB64 } from "./refs.js";
10
+ import { resolveProviderOptions } from "./providerOptions.js";
11
+ import { generateViaResponses } from "./responsesImageAdapter.js";
12
+ import { generateViaGrok } from "./grokImageAdapter.js";
13
+ import { generateViaAgy } from "./agyImageAdapter.js";
14
+ import { generateVideoViaGrok } from "./grokVideoAdapter.js";
15
+ import { parseVideoParams } from "./agentGenerationPlanner.js";
16
+ import { appendAgentTurn, getAgentImages, getAgentSession, importAgentImage, } from "./agentStore.js";
17
+ import { errInfo } from "./errInfo.js";
18
+ import { forceImagePrompt, isTextOnlyResult, textOnlyError, notFound } from "./agentRuntime.js";
19
+ export async function generateAgentImageWithRetry(ctx, sessionId, prompt, manifest, webSearchEnabled, options) {
20
+ let lastError = null;
21
+ for (let attempt = 0; attempt < 2; attempt++) {
22
+ try {
23
+ const forcedPrompt = attempt === 0 ? prompt : forceImagePrompt(prompt);
24
+ const result = await generateAgentImage(ctx, sessionId, forcedPrompt, manifest, webSearchEnabled, options);
25
+ if (result.image)
26
+ return result;
27
+ }
28
+ catch (error) {
29
+ lastError = error;
30
+ if (!isTextOnlyResult(error))
31
+ throw error;
32
+ if (attempt === 1)
33
+ break;
34
+ appendAgentTurn({
35
+ sessionId,
36
+ role: "tool",
37
+ text: "ima2.generate_image retry: text-only result rejected",
38
+ status: "error",
39
+ });
40
+ }
41
+ }
42
+ throw textOnlyError(lastError);
43
+ }
44
+ async function generateAgentImage(ctx, sessionId, prompt, manifest, webSearchEnabled, options) {
45
+ const requestId = options.requestId ?? `agent_${ulid()}`;
46
+ const providerOptions = resolveProviderOptions(ctx, {
47
+ provider: options.provider ?? "oauth",
48
+ rawModel: options.model,
49
+ rawReasoningEffort: options.reasoningEffort,
50
+ rawSize: options.size ?? "1024x1024",
51
+ rawWebSearchEnabled: webSearchEnabled,
52
+ searchMode: webSearchEnabled ? "on" : "off",
53
+ });
54
+ if (providerOptions.error) {
55
+ const err = new Error(providerOptions.error);
56
+ err.code = providerOptions.code;
57
+ err.status = providerOptions.status;
58
+ throw err;
59
+ }
60
+ const activeProvider = providerOptions.provider;
61
+ const effectiveModel = activeProvider === "grok" && options.quality === "high"
62
+ ? "grok-imagine-image-quality"
63
+ : providerOptions.model;
64
+ const response = activeProvider === "agy"
65
+ ? await generateViaAgy(`${manifest}\n\nUser request:\n${prompt}`, {
66
+ requestId,
67
+ signal: options.signal ?? undefined,
68
+ })
69
+ : activeProvider === "grok"
70
+ ? await generateViaGrok(`${manifest}\n\nUser request:\n${prompt}`, ctx, {
71
+ model: effectiveModel,
72
+ size: providerOptions.size,
73
+ requestId,
74
+ signal: options.signal ?? undefined,
75
+ references: await loadAgentCurrentImageReferences(ctx, sessionId),
76
+ })
77
+ : await generateViaResponses(activeProvider, `${manifest}\n\nUser request:\n${prompt}`, options.quality ?? "medium", providerOptions.size, options.moderation ?? "low", [], requestId, "auto", ctx, {
78
+ model: providerOptions.model,
79
+ reasoningEffort: providerOptions.reasoningEffort,
80
+ webSearchEnabled,
81
+ signal: options.signal,
82
+ });
83
+ const format = activeProvider === "grok" || activeProvider === "agy"
84
+ ? imageFormatFromMime(("mime" in response ? response.mime : undefined) || detectImageMimeFromB64(response.b64) || "image/jpeg")
85
+ : options.format ?? "png";
86
+ const image = await persistAgentImage(ctx, sessionId, prompt, format, requestId, response, {
87
+ provider: String(activeProvider),
88
+ model: String(effectiveModel),
89
+ });
90
+ const responseText = "text" in response && typeof response.text === "string" ? response.text : null;
91
+ return { image, webSearchCalls: response.webSearchCalls || 0, text: responseText, provider: activeProvider };
92
+ }
93
+ async function loadAgentCurrentImageReferences(ctx, sessionId) {
94
+ const session = getAgentSession(sessionId);
95
+ const currentImage = session?.lastImageId
96
+ ? getAgentImages(sessionId).find((image) => image.id === session.lastImageId)
97
+ : null;
98
+ if (!currentImage?.filename)
99
+ return [];
100
+ try {
101
+ const b64 = (await readFile(join(ctx.config.storage.generatedDir, currentImage.filename))).toString("base64");
102
+ const mime = detectImageMimeFromB64(b64);
103
+ return [{ b64, declaredMime: mime, detectedMime: mime }];
104
+ }
105
+ catch (error) {
106
+ const err = errInfo(error);
107
+ logEvent("agent", "grok_ref_missing", { sessionId, filename: currentImage.filename, code: err.code, message: err.message });
108
+ return [];
109
+ }
110
+ }
111
+ function imageFormatFromMime(mime) {
112
+ if (mime === "image/jpeg")
113
+ return "jpeg";
114
+ if (mime === "image/webp")
115
+ return "webp";
116
+ return "png";
117
+ }
118
+ async function persistAgentImage(ctx, sessionId, prompt, format, requestId, response, generation) {
119
+ await mkdir(ctx.config.storage.generatedDir, { recursive: true });
120
+ const rand = randomBytes(ctx.config.ids.generatedHexBytes).toString("hex");
121
+ const filename = `${Date.now()}_${rand}_agent.${format}`;
122
+ const meta = {
123
+ kind: "agent",
124
+ requestId,
125
+ sessionId,
126
+ prompt,
127
+ userPrompt: prompt,
128
+ revisedPrompt: response.revisedPrompt ?? null,
129
+ provider: generation.provider,
130
+ model: generation.model,
131
+ createdAt: Date.now(),
132
+ usage: response.usage ?? null,
133
+ webSearchCalls: response.webSearchCalls ?? 0,
134
+ };
135
+ const embedded = await embedImageMetadataBestEffort(Buffer.from(response.b64, "base64"), format, meta, {
136
+ version: ctx.packageVersion,
137
+ });
138
+ const filePath = join(ctx.config.storage.generatedDir, filename);
139
+ await writeFile(filePath, embedded.buffer);
140
+ try {
141
+ await atomicWriteJson(`${filePath}.json`, meta);
142
+ }
143
+ catch (err) {
144
+ await unlink(filePath).catch(() => { });
145
+ throw err;
146
+ }
147
+ invalidateHistoryIndex();
148
+ logEvent("agent", "saved", { requestId, sessionId, filename });
149
+ return importAgentImage(sessionId, {
150
+ id: `ai_${ulid()}`,
151
+ filename,
152
+ url: `/generated/${filename}`,
153
+ prompt,
154
+ revisedPrompt: response.revisedPrompt ?? null,
155
+ createdAt: Date.now(),
156
+ });
157
+ }
158
+ export async function runAgentVideoGeneration(ctx, sessionId, prompt, options = {}) {
159
+ const session = getAgentSession(sessionId);
160
+ if (!session)
161
+ throw notFound(sessionId);
162
+ if (!options.skipUserTurn) {
163
+ appendAgentTurn({ sessionId, role: "user", text: prompt, status: "complete" });
164
+ }
165
+ const requestId = options.requestId ?? `agent_video_${ulid()}`;
166
+ const startedAt = Date.now();
167
+ // Auto I2V: if session has a last image, use it as source
168
+ let sourceImage;
169
+ let mode = "text-to-video";
170
+ if (session.lastImageId) {
171
+ const images = getAgentImages(sessionId);
172
+ const lastImage = images.find((img) => img.id === session.lastImageId);
173
+ if (lastImage?.filename && !lastImage.filename.endsWith(".mp4")) {
174
+ try {
175
+ const { loadAssetB64 } = await import("./nodeStore.js");
176
+ sourceImage = await loadAssetB64(ctx.rootDir, lastImage.filename, ctx.config.storage.generatedDir);
177
+ mode = "image-to-video";
178
+ }
179
+ catch { /* fallback to T2V */ }
180
+ }
181
+ }
182
+ const videoParams = parseVideoParams(prompt);
183
+ const result = await generateVideoViaGrok(prompt, ctx, {
184
+ model: "grok-imagine-video",
185
+ mode,
186
+ sourceImage,
187
+ duration: videoParams.duration ?? 5,
188
+ resolution: videoParams.resolution ?? "480p",
189
+ aspectRatio: (videoParams.aspectRatio ?? "auto"),
190
+ requestId,
191
+ signal: options.signal ?? undefined,
192
+ });
193
+ const video = await persistAgentVideo(ctx, sessionId, prompt, requestId, result);
194
+ const finishedAt = Date.now();
195
+ const toolCall = {
196
+ id: `tc_video_${ulid()}`,
197
+ name: "ima2.generate_video",
198
+ status: "complete",
199
+ startedAt,
200
+ finishedAt,
201
+ durationMs: finishedAt - startedAt,
202
+ requestId,
203
+ inputSummary: prompt,
204
+ outputSummary: `Generated video ${video.filename}.`,
205
+ imageIds: [video.id],
206
+ };
207
+ appendAgentTurn({
208
+ sessionId,
209
+ role: "tool",
210
+ text: "ima2.generate_video",
211
+ imageIds: [video.id],
212
+ status: "complete",
213
+ raw: { toolCalls: [toolCall] },
214
+ });
215
+ const assistantTurn = appendAgentTurn({
216
+ sessionId,
217
+ role: "assistant",
218
+ text: `Generated 1 video artifact. ${result.revisedPrompt}`,
219
+ imageIds: [video.id],
220
+ status: "complete",
221
+ });
222
+ return { assistantTurn, imageIds: [video.id], webFindingIds: [] };
223
+ }
224
+ async function persistAgentVideo(ctx, sessionId, prompt, requestId, result) {
225
+ await mkdir(ctx.config.storage.generatedDir, { recursive: true });
226
+ const rand = randomBytes(ctx.config.ids.generatedHexBytes).toString("hex");
227
+ const filename = `${Date.now()}_${rand}_agent.mp4`;
228
+ const meta = {
229
+ kind: "agent",
230
+ mediaType: "video",
231
+ requestId,
232
+ sessionId,
233
+ prompt,
234
+ userPrompt: prompt,
235
+ revisedPrompt: result.revisedPrompt,
236
+ provider: "grok",
237
+ model: "grok-imagine-video",
238
+ createdAt: Date.now(),
239
+ usage: result.usage,
240
+ webSearchCalls: result.webSearchCalls,
241
+ };
242
+ const filePath = join(ctx.config.storage.generatedDir, filename);
243
+ await writeFile(filePath, result.videoBuffer);
244
+ try {
245
+ await atomicWriteJson(`${filePath}.json`, meta);
246
+ }
247
+ catch (err) {
248
+ await unlink(filePath).catch(() => { });
249
+ throw err;
250
+ }
251
+ invalidateHistoryIndex();
252
+ logEvent("agent", "video_saved", { requestId, sessionId, filename });
253
+ return importAgentImage(sessionId, {
254
+ id: `ai_${ulid()}`,
255
+ filename,
256
+ url: `/generated/${filename}`,
257
+ prompt,
258
+ revisedPrompt: result.revisedPrompt,
259
+ createdAt: Date.now(),
260
+ });
261
+ }
@@ -1,19 +1,7 @@
1
- import { randomBytes } from "node:crypto";
2
- import { mkdir, readFile, unlink, writeFile } from "node:fs/promises";
3
- import { atomicWriteJson } from "./atomicWrite.js";
4
- import { join } from "node:path";
5
1
  import { ulid } from "ulid";
6
- import { embedImageMetadataBestEffort } from "./imageMetadataStore.js";
7
- import { invalidateHistoryIndex } from "./historyIndex.js";
8
- import { logEvent } from "./logger.js";
9
- import { detectImageMimeFromB64 } from "./refs.js";
10
- import { resolveProviderOptions } from "./providerOptions.js";
11
- import { generateViaResponses } from "./responsesImageAdapter.js";
12
- import { generateViaGrok } from "./grokImageAdapter.js";
13
- import { generateViaAgy } from "./agyImageAdapter.js";
14
- import { generateVideoViaGrok } from "./grokVideoAdapter.js";
15
- import { parseVideoParams } from "./agentGenerationPlanner.js";
16
- import { appendAgentTurn, buildImageContextManifest, getAgentImages, getAgentSession, importAgentImage, recordAgentWebFinding, restartAgentRuntimeSession, } from "./agentStore.js";
2
+ import { generateAgentImageWithRetry } from "./agentImageVideoGen.js";
3
+ import { runAgentVideoGeneration } from "./agentImageVideoGen.js";
4
+ import { appendAgentTurn, buildImageContextManifest, getAgentSession, recordAgentWebFinding, restartAgentRuntimeSession, } from "./agentStore.js";
17
5
  import { AGENT_ALLOWED_TOOLS, } from "./agentTypes.js";
18
6
  import { errInfo } from "./errInfo.js";
19
7
  export function assertAgentAllowedTools(tools) {
@@ -190,249 +178,6 @@ export function isRuntimeRestartableError(error) {
190
178
  code.includes("PROTOCOL") ||
191
179
  err.message.toLowerCase().includes("protocol wedge"));
192
180
  }
193
- async function generateAgentImageWithRetry(ctx, sessionId, prompt, manifest, webSearchEnabled, options) {
194
- let lastError = null;
195
- for (let attempt = 0; attempt < 2; attempt++) {
196
- try {
197
- const forcedPrompt = attempt === 0 ? prompt : forceImagePrompt(prompt);
198
- const result = await generateAgentImage(ctx, sessionId, forcedPrompt, manifest, webSearchEnabled, options);
199
- if (result.image)
200
- return result;
201
- }
202
- catch (error) {
203
- lastError = error;
204
- if (!isTextOnlyResult(error))
205
- throw error;
206
- if (attempt === 1)
207
- break;
208
- appendAgentTurn({
209
- sessionId,
210
- role: "tool",
211
- text: "ima2.generate_image retry: text-only result rejected",
212
- status: "error",
213
- });
214
- }
215
- }
216
- throw textOnlyError(lastError);
217
- }
218
- async function generateAgentImage(ctx, sessionId, prompt, manifest, webSearchEnabled, options) {
219
- const requestId = options.requestId ?? `agent_${ulid()}`;
220
- const providerOptions = resolveProviderOptions(ctx, {
221
- provider: options.provider ?? "oauth",
222
- rawModel: options.model,
223
- rawReasoningEffort: options.reasoningEffort,
224
- rawSize: options.size ?? "1024x1024",
225
- rawWebSearchEnabled: webSearchEnabled,
226
- searchMode: webSearchEnabled ? "on" : "off",
227
- });
228
- if (providerOptions.error) {
229
- const err = new Error(providerOptions.error);
230
- err.code = providerOptions.code;
231
- err.status = providerOptions.status;
232
- throw err;
233
- }
234
- const activeProvider = providerOptions.provider;
235
- const effectiveModel = activeProvider === "grok" && options.quality === "high"
236
- ? "grok-imagine-image-quality"
237
- : providerOptions.model;
238
- const response = activeProvider === "agy"
239
- ? await generateViaAgy(`${manifest}\n\nUser request:\n${prompt}`, {
240
- requestId,
241
- signal: options.signal ?? undefined,
242
- })
243
- : activeProvider === "grok"
244
- ? await generateViaGrok(`${manifest}\n\nUser request:\n${prompt}`, ctx, {
245
- model: effectiveModel,
246
- size: providerOptions.size,
247
- requestId,
248
- signal: options.signal ?? undefined,
249
- references: await loadAgentCurrentImageReferences(ctx, sessionId),
250
- })
251
- : await generateViaResponses(activeProvider, `${manifest}\n\nUser request:\n${prompt}`, options.quality ?? "medium", providerOptions.size, options.moderation ?? "low", [], requestId, "auto", ctx, {
252
- model: providerOptions.model,
253
- reasoningEffort: providerOptions.reasoningEffort,
254
- webSearchEnabled,
255
- signal: options.signal,
256
- });
257
- const format = activeProvider === "grok" || activeProvider === "agy"
258
- ? imageFormatFromMime(("mime" in response ? response.mime : undefined) || detectImageMimeFromB64(response.b64) || "image/jpeg")
259
- : options.format ?? "png";
260
- const image = await persistAgentImage(ctx, sessionId, prompt, format, requestId, response, {
261
- provider: String(activeProvider),
262
- model: String(effectiveModel),
263
- });
264
- const responseText = "text" in response && typeof response.text === "string" ? response.text : null;
265
- return { image, webSearchCalls: response.webSearchCalls || 0, text: responseText, provider: activeProvider };
266
- }
267
- async function loadAgentCurrentImageReferences(ctx, sessionId) {
268
- const session = getAgentSession(sessionId);
269
- const currentImage = session?.lastImageId
270
- ? getAgentImages(sessionId).find((image) => image.id === session.lastImageId)
271
- : null;
272
- if (!currentImage?.filename)
273
- return [];
274
- try {
275
- const b64 = (await readFile(join(ctx.config.storage.generatedDir, currentImage.filename))).toString("base64");
276
- const mime = detectImageMimeFromB64(b64);
277
- return [{ b64, declaredMime: mime, detectedMime: mime }];
278
- }
279
- catch (error) {
280
- const err = errInfo(error);
281
- logEvent("agent", "grok_ref_missing", { sessionId, filename: currentImage.filename, code: err.code, message: err.message });
282
- return [];
283
- }
284
- }
285
- function imageFormatFromMime(mime) {
286
- if (mime === "image/jpeg")
287
- return "jpeg";
288
- if (mime === "image/webp")
289
- return "webp";
290
- return "png";
291
- }
292
- async function persistAgentImage(ctx, sessionId, prompt, format, requestId, response, generation) {
293
- await mkdir(ctx.config.storage.generatedDir, { recursive: true });
294
- const rand = randomBytes(ctx.config.ids.generatedHexBytes).toString("hex");
295
- const filename = `${Date.now()}_${rand}_agent.${format}`;
296
- const meta = {
297
- kind: "agent",
298
- requestId,
299
- sessionId,
300
- prompt,
301
- userPrompt: prompt,
302
- revisedPrompt: response.revisedPrompt ?? null,
303
- provider: generation.provider,
304
- model: generation.model,
305
- createdAt: Date.now(),
306
- usage: response.usage ?? null,
307
- webSearchCalls: response.webSearchCalls ?? 0,
308
- };
309
- const embedded = await embedImageMetadataBestEffort(Buffer.from(response.b64, "base64"), format, meta, {
310
- version: ctx.packageVersion,
311
- });
312
- const filePath = join(ctx.config.storage.generatedDir, filename);
313
- await writeFile(filePath, embedded.buffer);
314
- try {
315
- await atomicWriteJson(`${filePath}.json`, meta);
316
- }
317
- catch (err) {
318
- await unlink(filePath).catch(() => { });
319
- throw err;
320
- }
321
- invalidateHistoryIndex();
322
- logEvent("agent", "saved", { requestId, sessionId, filename });
323
- return importAgentImage(sessionId, {
324
- id: `ai_${ulid()}`,
325
- filename,
326
- url: `/generated/${filename}`,
327
- prompt,
328
- revisedPrompt: response.revisedPrompt ?? null,
329
- createdAt: Date.now(),
330
- });
331
- }
332
- export async function runAgentVideoGeneration(ctx, sessionId, prompt, options = {}) {
333
- const session = getAgentSession(sessionId);
334
- if (!session)
335
- throw notFound(sessionId);
336
- if (!options.skipUserTurn) {
337
- appendAgentTurn({ sessionId, role: "user", text: prompt, status: "complete" });
338
- }
339
- const requestId = options.requestId ?? `agent_video_${ulid()}`;
340
- const startedAt = Date.now();
341
- // Auto I2V: if session has a last image, use it as source
342
- let sourceImage;
343
- let mode = "text-to-video";
344
- if (session.lastImageId) {
345
- const images = getAgentImages(sessionId);
346
- const lastImage = images.find((img) => img.id === session.lastImageId);
347
- if (lastImage?.filename && !lastImage.filename.endsWith(".mp4")) {
348
- try {
349
- const { loadAssetB64 } = await import("./nodeStore.js");
350
- sourceImage = await loadAssetB64(ctx.rootDir, lastImage.filename, ctx.config.storage.generatedDir);
351
- mode = "image-to-video";
352
- }
353
- catch { /* fallback to T2V */ }
354
- }
355
- }
356
- const videoParams = parseVideoParams(prompt);
357
- const result = await generateVideoViaGrok(prompt, ctx, {
358
- model: "grok-imagine-video",
359
- mode,
360
- sourceImage,
361
- duration: videoParams.duration ?? 5,
362
- resolution: videoParams.resolution ?? "480p",
363
- aspectRatio: (videoParams.aspectRatio ?? "auto"),
364
- requestId,
365
- signal: options.signal ?? undefined,
366
- });
367
- const video = await persistAgentVideo(ctx, sessionId, prompt, requestId, result);
368
- const finishedAt = Date.now();
369
- const toolCall = {
370
- id: `tc_video_${ulid()}`,
371
- name: "ima2.generate_video",
372
- status: "complete",
373
- startedAt,
374
- finishedAt,
375
- durationMs: finishedAt - startedAt,
376
- requestId,
377
- inputSummary: prompt,
378
- outputSummary: `Generated video ${video.filename}.`,
379
- imageIds: [video.id],
380
- };
381
- appendAgentTurn({
382
- sessionId,
383
- role: "tool",
384
- text: "ima2.generate_video",
385
- imageIds: [video.id],
386
- status: "complete",
387
- raw: { toolCalls: [toolCall] },
388
- });
389
- const assistantTurn = appendAgentTurn({
390
- sessionId,
391
- role: "assistant",
392
- text: `Generated 1 video artifact. ${result.revisedPrompt}`,
393
- imageIds: [video.id],
394
- status: "complete",
395
- });
396
- return { assistantTurn, imageIds: [video.id], webFindingIds: [] };
397
- }
398
- async function persistAgentVideo(ctx, sessionId, prompt, requestId, result) {
399
- await mkdir(ctx.config.storage.generatedDir, { recursive: true });
400
- const rand = randomBytes(ctx.config.ids.generatedHexBytes).toString("hex");
401
- const filename = `${Date.now()}_${rand}_agent.mp4`;
402
- const meta = {
403
- kind: "agent",
404
- mediaType: "video",
405
- requestId,
406
- sessionId,
407
- prompt,
408
- userPrompt: prompt,
409
- revisedPrompt: result.revisedPrompt,
410
- provider: "grok",
411
- model: "grok-imagine-video",
412
- createdAt: Date.now(),
413
- usage: result.usage,
414
- webSearchCalls: result.webSearchCalls,
415
- };
416
- const filePath = join(ctx.config.storage.generatedDir, filename);
417
- await writeFile(filePath, result.videoBuffer);
418
- try {
419
- await atomicWriteJson(`${filePath}.json`, meta);
420
- }
421
- catch (err) {
422
- await unlink(filePath).catch(() => { });
423
- throw err;
424
- }
425
- invalidateHistoryIndex();
426
- logEvent("agent", "video_saved", { requestId, sessionId, filename });
427
- return importAgentImage(sessionId, {
428
- id: `ai_${ulid()}`,
429
- filename,
430
- url: `/generated/${filename}`,
431
- prompt,
432
- revisedPrompt: result.revisedPrompt,
433
- createdAt: Date.now(),
434
- });
435
- }
436
181
  function recordSearchFindings(sessionId, prompt, count, provider) {
437
182
  if (!count)
438
183
  return [];
@@ -446,14 +191,14 @@ function recordSearchFindings(sessionId, prompt, count, provider) {
446
191
  }),
447
192
  ];
448
193
  }
449
- function forceImagePrompt(prompt) {
194
+ export function forceImagePrompt(prompt) {
450
195
  return [
451
196
  "The previous turn did not return an image artifact.",
452
197
  "Return a final image using ima2.generate_image/image_generation now.",
453
198
  `User request: ${prompt}`,
454
199
  ].join("\n");
455
200
  }
456
- function isTextOnlyResult(error) {
201
+ export function isTextOnlyResult(error) {
457
202
  const err = errInfo(error);
458
203
  return [
459
204
  "EMPTY_RESPONSE",
@@ -462,7 +207,7 @@ function isTextOnlyResult(error) {
462
207
  "IMAGE_TOOL_COMPLETED_WITHOUT_RESULT",
463
208
  ].includes(err.code || "") || err.message.includes("No image data");
464
209
  }
465
- function textOnlyError(cause) {
210
+ export function textOnlyError(cause) {
466
211
  const err = new Error("Agent result did not include an image artifact.");
467
212
  err.code = "AGENT_TEXT_ONLY_RESULT";
468
213
  err.status = 422;
@@ -488,7 +233,7 @@ function cleanParallelism(value) {
488
233
  return 2;
489
234
  return Math.max(1, Math.min(8, Math.round(numeric)));
490
235
  }
491
- function notFound(sessionId) {
236
+ export function notFound(sessionId) {
492
237
  const err = new Error(`Agent session not found: ${sessionId}`);
493
238
  err.code = "AGENT_SESSION_NOT_FOUND";
494
239
  err.status = 404;