ima2-gen 2.0.0 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/CHANGELOG.md +150 -0
  2. package/README.md +12 -12
  3. package/bin/commands/backfillThumbs.js +24 -0
  4. package/bin/commands/edit.js +7 -6
  5. package/bin/commands/gen.js +13 -6
  6. package/bin/commands/multimode.js +5 -4
  7. package/bin/commands/node.js +4 -4
  8. package/bin/ima2.js +21 -11
  9. package/bin/lib/config-store.js +1 -1
  10. package/docs/API.md +184 -10
  11. package/docs/CLI.md +11 -4
  12. package/docs/FAQ.ko.md +16 -0
  13. package/docs/FAQ.md +30 -0
  14. package/docs/PROMPT_STUDIO.md +3 -1
  15. package/docs/README.ko.md +7 -3
  16. package/docs/migration/runtime-test-inventory.md +17 -1
  17. package/lib/agentImageVideoGen.js +261 -0
  18. package/lib/agentRuntime.js +11 -260
  19. package/lib/agentSettings.js +1 -1
  20. package/lib/agyImageAdapter.js +259 -0
  21. package/lib/capabilities.js +2 -1
  22. package/lib/configKeys.js +1 -1
  23. package/lib/errorClassify.js +8 -7
  24. package/lib/eventBus.js +71 -0
  25. package/lib/geminiApiImageAdapter.js +179 -0
  26. package/lib/generationErrors.js +3 -1
  27. package/lib/grokImageAdapter.js +74 -128
  28. package/lib/grokImageCore.js +153 -0
  29. package/lib/grokMultimodeAdapter.js +7 -4
  30. package/lib/grokRuntime.js +3 -0
  31. package/lib/grokSizeMapper.js +13 -1
  32. package/lib/grokVideoAdapter.js +14 -7
  33. package/lib/grokVideoCanvas.js +13 -0
  34. package/lib/grokVideoPlannerPrompt.js +53 -6
  35. package/lib/historyList.js +19 -2
  36. package/lib/imageModels.js +15 -0
  37. package/lib/imageThumb.js +38 -0
  38. package/lib/inflight.js +54 -17
  39. package/lib/multimodeHelpers.js +10 -0
  40. package/lib/nodeHelpers.js +59 -0
  41. package/lib/oauthProxy/prompts.js +30 -36
  42. package/lib/promptBuilder/systemPrompt.js +2 -5
  43. package/lib/promptSafetyPolicy.js +1 -5
  44. package/lib/providerOptions.js +36 -1
  45. package/lib/responsesFallback.js +53 -44
  46. package/lib/routeHelpers.js +44 -0
  47. package/lib/runtimeContext.js +27 -0
  48. package/lib/ssePublish.js +12 -0
  49. package/lib/storageMigration.js +1 -1
  50. package/lib/storyboardPrefix.js +28 -0
  51. package/lib/thumbBackfill.js +70 -0
  52. package/lib/vertexAuth.js +44 -0
  53. package/lib/videoThumb.js +60 -0
  54. package/package.json +7 -2
  55. package/routes/agy.js +44 -0
  56. package/routes/auth.js +242 -0
  57. package/routes/edit.js +48 -8
  58. package/routes/events.js +78 -0
  59. package/routes/generate.js +135 -135
  60. package/routes/history.js +13 -0
  61. package/routes/index.js +8 -0
  62. package/routes/keys.js +254 -0
  63. package/routes/multimode.js +138 -62
  64. package/routes/nodes.js +107 -129
  65. package/routes/quota.js +58 -7
  66. package/routes/video.js +107 -20
  67. package/server.js +123 -0
  68. package/skills/ima2/SKILL.md +98 -21
  69. package/ui/dist/.vite/manifest.json +12 -12
  70. package/ui/dist/assets/AgentWorkspace-Dth6YijN.js +3 -0
  71. package/ui/dist/assets/{CardNewsWorkspace-BN-ga1lG.js → CardNewsWorkspace-Dav3K5CT.js} +2 -2
  72. package/ui/dist/assets/{NodeCanvas-BbMa4IhI.js → NodeCanvas-C4ifFzB1.js} +2 -2
  73. package/ui/dist/assets/{PromptBuilderPanel-DRwBJRDQ.js → PromptBuilderPanel-CEcyU9PL.js} +1 -1
  74. package/ui/dist/assets/{PromptImportDialog-Dp85kHCq.js → PromptImportDialog-CgQ94Gth.js} +2 -2
  75. package/ui/dist/assets/{PromptImportDiscoverySection-BE8Q8MLD.js → PromptImportDiscoverySection-CuzyzbNI.js} +1 -1
  76. package/ui/dist/assets/{PromptImportFolderSection-PtH5x0sc.js → PromptImportFolderSection-DHLGlO6l.js} +1 -1
  77. package/ui/dist/assets/{PromptLibraryPanel-FnM9tHI9.js → PromptLibraryPanel-BOe18we8.js} +2 -2
  78. package/ui/dist/assets/SettingsWorkspace-Cdgnm4Wa.js +1 -0
  79. package/ui/dist/assets/index-C5PSahkr.js +1 -0
  80. package/ui/dist/assets/index-Dn2AhL6d.css +1 -0
  81. package/ui/dist/assets/index-Tjqx6wUV.js +23 -0
  82. package/ui/dist/index.html +2 -2
  83. package/ui/dist/assets/AgentWorkspace-C21zqdTZ.js +0 -3
  84. package/ui/dist/assets/SettingsWorkspace-MARPGyBL.js +0 -1
  85. package/ui/dist/assets/index-BAFI6htx.js +0 -42
  86. package/ui/dist/assets/index-BSXxr_Bt.js +0 -1
  87. package/ui/dist/assets/index-DS-ADE7U.css +0 -1
@@ -0,0 +1,261 @@
1
+ import { randomBytes } from "node:crypto";
2
+ import { mkdir, readFile, unlink, writeFile } from "node:fs/promises";
3
+ import { atomicWriteJson } from "./atomicWrite.js";
4
+ import { join } from "node:path";
5
+ import { ulid } from "ulid";
6
+ import { embedImageMetadataBestEffort } from "./imageMetadataStore.js";
7
+ import { invalidateHistoryIndex } from "./historyIndex.js";
8
+ import { logEvent } from "./logger.js";
9
+ import { detectImageMimeFromB64 } from "./refs.js";
10
+ import { resolveProviderOptions } from "./providerOptions.js";
11
+ import { generateViaResponses } from "./responsesImageAdapter.js";
12
+ import { generateViaGrok } from "./grokImageAdapter.js";
13
+ import { generateViaAgy } from "./agyImageAdapter.js";
14
+ import { generateVideoViaGrok } from "./grokVideoAdapter.js";
15
+ import { parseVideoParams } from "./agentGenerationPlanner.js";
16
+ import { appendAgentTurn, getAgentImages, getAgentSession, importAgentImage, } from "./agentStore.js";
17
+ import { errInfo } from "./errInfo.js";
18
+ import { forceImagePrompt, isTextOnlyResult, textOnlyError, notFound } from "./agentRuntime.js";
19
+ export async function generateAgentImageWithRetry(ctx, sessionId, prompt, manifest, webSearchEnabled, options) {
20
+ let lastError = null;
21
+ for (let attempt = 0; attempt < 2; attempt++) {
22
+ try {
23
+ const forcedPrompt = attempt === 0 ? prompt : forceImagePrompt(prompt);
24
+ const result = await generateAgentImage(ctx, sessionId, forcedPrompt, manifest, webSearchEnabled, options);
25
+ if (result.image)
26
+ return result;
27
+ }
28
+ catch (error) {
29
+ lastError = error;
30
+ if (!isTextOnlyResult(error))
31
+ throw error;
32
+ if (attempt === 1)
33
+ break;
34
+ appendAgentTurn({
35
+ sessionId,
36
+ role: "tool",
37
+ text: "ima2.generate_image retry: text-only result rejected",
38
+ status: "error",
39
+ });
40
+ }
41
+ }
42
+ throw textOnlyError(lastError);
43
+ }
44
+ async function generateAgentImage(ctx, sessionId, prompt, manifest, webSearchEnabled, options) {
45
+ const requestId = options.requestId ?? `agent_${ulid()}`;
46
+ const providerOptions = resolveProviderOptions(ctx, {
47
+ provider: options.provider ?? "oauth",
48
+ rawModel: options.model,
49
+ rawReasoningEffort: options.reasoningEffort,
50
+ rawSize: options.size ?? "1024x1024",
51
+ rawWebSearchEnabled: webSearchEnabled,
52
+ searchMode: webSearchEnabled ? "on" : "off",
53
+ });
54
+ if (providerOptions.error) {
55
+ const err = new Error(providerOptions.error);
56
+ err.code = providerOptions.code;
57
+ err.status = providerOptions.status;
58
+ throw err;
59
+ }
60
+ const activeProvider = providerOptions.provider;
61
+ const effectiveModel = activeProvider === "grok" && options.quality === "high"
62
+ ? "grok-imagine-image-quality"
63
+ : providerOptions.model;
64
+ const response = activeProvider === "agy"
65
+ ? await generateViaAgy(`${manifest}\n\nUser request:\n${prompt}`, {
66
+ requestId,
67
+ signal: options.signal ?? undefined,
68
+ })
69
+ : activeProvider === "grok"
70
+ ? await generateViaGrok(`${manifest}\n\nUser request:\n${prompt}`, ctx, {
71
+ model: effectiveModel,
72
+ size: providerOptions.size,
73
+ requestId,
74
+ signal: options.signal ?? undefined,
75
+ references: await loadAgentCurrentImageReferences(ctx, sessionId),
76
+ })
77
+ : await generateViaResponses(activeProvider, `${manifest}\n\nUser request:\n${prompt}`, options.quality ?? "medium", providerOptions.size, options.moderation ?? "low", [], requestId, "auto", ctx, {
78
+ model: providerOptions.model,
79
+ reasoningEffort: providerOptions.reasoningEffort,
80
+ webSearchEnabled,
81
+ signal: options.signal,
82
+ });
83
+ const format = activeProvider === "grok" || activeProvider === "agy"
84
+ ? imageFormatFromMime(("mime" in response ? response.mime : undefined) || detectImageMimeFromB64(response.b64) || "image/jpeg")
85
+ : options.format ?? "png";
86
+ const image = await persistAgentImage(ctx, sessionId, prompt, format, requestId, response, {
87
+ provider: String(activeProvider),
88
+ model: String(effectiveModel),
89
+ });
90
+ const responseText = "text" in response && typeof response.text === "string" ? response.text : null;
91
+ return { image, webSearchCalls: response.webSearchCalls || 0, text: responseText, provider: activeProvider };
92
+ }
93
+ async function loadAgentCurrentImageReferences(ctx, sessionId) {
94
+ const session = getAgentSession(sessionId);
95
+ const currentImage = session?.lastImageId
96
+ ? getAgentImages(sessionId).find((image) => image.id === session.lastImageId)
97
+ : null;
98
+ if (!currentImage?.filename)
99
+ return [];
100
+ try {
101
+ const b64 = (await readFile(join(ctx.config.storage.generatedDir, currentImage.filename))).toString("base64");
102
+ const mime = detectImageMimeFromB64(b64);
103
+ return [{ b64, declaredMime: mime, detectedMime: mime }];
104
+ }
105
+ catch (error) {
106
+ const err = errInfo(error);
107
+ logEvent("agent", "grok_ref_missing", { sessionId, filename: currentImage.filename, code: err.code, message: err.message });
108
+ return [];
109
+ }
110
+ }
111
+ function imageFormatFromMime(mime) {
112
+ if (mime === "image/jpeg")
113
+ return "jpeg";
114
+ if (mime === "image/webp")
115
+ return "webp";
116
+ return "png";
117
+ }
118
+ async function persistAgentImage(ctx, sessionId, prompt, format, requestId, response, generation) {
119
+ await mkdir(ctx.config.storage.generatedDir, { recursive: true });
120
+ const rand = randomBytes(ctx.config.ids.generatedHexBytes).toString("hex");
121
+ const filename = `${Date.now()}_${rand}_agent.${format}`;
122
+ const meta = {
123
+ kind: "agent",
124
+ requestId,
125
+ sessionId,
126
+ prompt,
127
+ userPrompt: prompt,
128
+ revisedPrompt: response.revisedPrompt ?? null,
129
+ provider: generation.provider,
130
+ model: generation.model,
131
+ createdAt: Date.now(),
132
+ usage: response.usage ?? null,
133
+ webSearchCalls: response.webSearchCalls ?? 0,
134
+ };
135
+ const embedded = await embedImageMetadataBestEffort(Buffer.from(response.b64, "base64"), format, meta, {
136
+ version: ctx.packageVersion,
137
+ });
138
+ const filePath = join(ctx.config.storage.generatedDir, filename);
139
+ await writeFile(filePath, embedded.buffer);
140
+ try {
141
+ await atomicWriteJson(`${filePath}.json`, meta);
142
+ }
143
+ catch (err) {
144
+ await unlink(filePath).catch(() => { });
145
+ throw err;
146
+ }
147
+ invalidateHistoryIndex();
148
+ logEvent("agent", "saved", { requestId, sessionId, filename });
149
+ return importAgentImage(sessionId, {
150
+ id: `ai_${ulid()}`,
151
+ filename,
152
+ url: `/generated/${filename}`,
153
+ prompt,
154
+ revisedPrompt: response.revisedPrompt ?? null,
155
+ createdAt: Date.now(),
156
+ });
157
+ }
158
+ export async function runAgentVideoGeneration(ctx, sessionId, prompt, options = {}) {
159
+ const session = getAgentSession(sessionId);
160
+ if (!session)
161
+ throw notFound(sessionId);
162
+ if (!options.skipUserTurn) {
163
+ appendAgentTurn({ sessionId, role: "user", text: prompt, status: "complete" });
164
+ }
165
+ const requestId = options.requestId ?? `agent_video_${ulid()}`;
166
+ const startedAt = Date.now();
167
+ // Auto I2V: if session has a last image, use it as source
168
+ let sourceImage;
169
+ let mode = "text-to-video";
170
+ if (session.lastImageId) {
171
+ const images = getAgentImages(sessionId);
172
+ const lastImage = images.find((img) => img.id === session.lastImageId);
173
+ if (lastImage?.filename && !lastImage.filename.endsWith(".mp4")) {
174
+ try {
175
+ const { loadAssetB64 } = await import("./nodeStore.js");
176
+ sourceImage = await loadAssetB64(ctx.rootDir, lastImage.filename, ctx.config.storage.generatedDir);
177
+ mode = "image-to-video";
178
+ }
179
+ catch { /* fallback to T2V */ }
180
+ }
181
+ }
182
+ const videoParams = parseVideoParams(prompt);
183
+ const result = await generateVideoViaGrok(prompt, ctx, {
184
+ model: "grok-imagine-video",
185
+ mode,
186
+ sourceImage,
187
+ duration: videoParams.duration ?? 5,
188
+ resolution: videoParams.resolution ?? "480p",
189
+ aspectRatio: (videoParams.aspectRatio ?? "auto"),
190
+ requestId,
191
+ signal: options.signal ?? undefined,
192
+ });
193
+ const video = await persistAgentVideo(ctx, sessionId, prompt, requestId, result);
194
+ const finishedAt = Date.now();
195
+ const toolCall = {
196
+ id: `tc_video_${ulid()}`,
197
+ name: "ima2.generate_video",
198
+ status: "complete",
199
+ startedAt,
200
+ finishedAt,
201
+ durationMs: finishedAt - startedAt,
202
+ requestId,
203
+ inputSummary: prompt,
204
+ outputSummary: `Generated video ${video.filename}.`,
205
+ imageIds: [video.id],
206
+ };
207
+ appendAgentTurn({
208
+ sessionId,
209
+ role: "tool",
210
+ text: "ima2.generate_video",
211
+ imageIds: [video.id],
212
+ status: "complete",
213
+ raw: { toolCalls: [toolCall] },
214
+ });
215
+ const assistantTurn = appendAgentTurn({
216
+ sessionId,
217
+ role: "assistant",
218
+ text: `Generated 1 video artifact. ${result.revisedPrompt}`,
219
+ imageIds: [video.id],
220
+ status: "complete",
221
+ });
222
+ return { assistantTurn, imageIds: [video.id], webFindingIds: [] };
223
+ }
224
+ async function persistAgentVideo(ctx, sessionId, prompt, requestId, result) {
225
+ await mkdir(ctx.config.storage.generatedDir, { recursive: true });
226
+ const rand = randomBytes(ctx.config.ids.generatedHexBytes).toString("hex");
227
+ const filename = `${Date.now()}_${rand}_agent.mp4`;
228
+ const meta = {
229
+ kind: "agent",
230
+ mediaType: "video",
231
+ requestId,
232
+ sessionId,
233
+ prompt,
234
+ userPrompt: prompt,
235
+ revisedPrompt: result.revisedPrompt,
236
+ provider: "grok",
237
+ model: "grok-imagine-video",
238
+ createdAt: Date.now(),
239
+ usage: result.usage,
240
+ webSearchCalls: result.webSearchCalls,
241
+ };
242
+ const filePath = join(ctx.config.storage.generatedDir, filename);
243
+ await writeFile(filePath, result.videoBuffer);
244
+ try {
245
+ await atomicWriteJson(`${filePath}.json`, meta);
246
+ }
247
+ catch (err) {
248
+ await unlink(filePath).catch(() => { });
249
+ throw err;
250
+ }
251
+ invalidateHistoryIndex();
252
+ logEvent("agent", "video_saved", { requestId, sessionId, filename });
253
+ return importAgentImage(sessionId, {
254
+ id: `ai_${ulid()}`,
255
+ filename,
256
+ url: `/generated/${filename}`,
257
+ prompt,
258
+ revisedPrompt: result.revisedPrompt,
259
+ createdAt: Date.now(),
260
+ });
261
+ }
@@ -1,18 +1,7 @@
1
- import { randomBytes } from "node:crypto";
2
- import { mkdir, readFile, unlink, writeFile } from "node:fs/promises";
3
- import { atomicWriteJson } from "./atomicWrite.js";
4
- import { join } from "node:path";
5
1
  import { ulid } from "ulid";
6
- import { embedImageMetadataBestEffort } from "./imageMetadataStore.js";
7
- import { invalidateHistoryIndex } from "./historyIndex.js";
8
- import { logEvent } from "./logger.js";
9
- import { detectImageMimeFromB64 } from "./refs.js";
10
- import { resolveProviderOptions } from "./providerOptions.js";
11
- import { generateViaResponses } from "./responsesImageAdapter.js";
12
- import { generateViaGrok } from "./grokImageAdapter.js";
13
- import { generateVideoViaGrok } from "./grokVideoAdapter.js";
14
- import { parseVideoParams } from "./agentGenerationPlanner.js";
15
- import { appendAgentTurn, buildImageContextManifest, getAgentImages, getAgentSession, importAgentImage, recordAgentWebFinding, restartAgentRuntimeSession, } from "./agentStore.js";
2
+ import { generateAgentImageWithRetry } from "./agentImageVideoGen.js";
3
+ import { runAgentVideoGeneration } from "./agentImageVideoGen.js";
4
+ import { appendAgentTurn, buildImageContextManifest, getAgentSession, recordAgentWebFinding, restartAgentRuntimeSession, } from "./agentStore.js";
16
5
  import { AGENT_ALLOWED_TOOLS, } from "./agentTypes.js";
17
6
  import { errInfo } from "./errInfo.js";
18
7
  export function assertAgentAllowedTools(tools) {
@@ -46,7 +35,7 @@ export async function runAgentGenerationPlan(ctx, sessionId, prompt, plan, optio
46
35
  const session = getAgentSession(sessionId);
47
36
  if (!session)
48
37
  throw notFound(sessionId);
49
- const webSearchEnabled = options.provider === "grok" ? true : options.webSearchEnabled ?? session.webSearchEnabled;
38
+ const webSearchEnabled = options.provider === "agy" ? false : options.provider === "grok" ? true : options.webSearchEnabled ?? session.webSearchEnabled;
50
39
  const enabledTools = webSearchEnabled
51
40
  ? [...AGENT_ALLOWED_TOOLS]
52
41
  : ["ima2.get_image_context", "ima2.generate_image", "ima2.generate_video"];
@@ -189,265 +178,27 @@ export function isRuntimeRestartableError(error) {
189
178
  code.includes("PROTOCOL") ||
190
179
  err.message.toLowerCase().includes("protocol wedge"));
191
180
  }
192
- async function generateAgentImageWithRetry(ctx, sessionId, prompt, manifest, webSearchEnabled, options) {
193
- let lastError = null;
194
- for (let attempt = 0; attempt < 2; attempt++) {
195
- try {
196
- const forcedPrompt = attempt === 0 ? prompt : forceImagePrompt(prompt);
197
- const result = await generateAgentImage(ctx, sessionId, forcedPrompt, manifest, webSearchEnabled, options);
198
- if (result.image)
199
- return result;
200
- }
201
- catch (error) {
202
- lastError = error;
203
- if (!isTextOnlyResult(error))
204
- throw error;
205
- if (attempt === 1)
206
- break;
207
- appendAgentTurn({
208
- sessionId,
209
- role: "tool",
210
- text: "ima2.generate_image retry: text-only result rejected",
211
- status: "error",
212
- });
213
- }
214
- }
215
- throw textOnlyError(lastError);
216
- }
217
- async function generateAgentImage(ctx, sessionId, prompt, manifest, webSearchEnabled, options) {
218
- const requestId = options.requestId ?? `agent_${ulid()}`;
219
- const providerOptions = resolveProviderOptions(ctx, {
220
- provider: options.provider ?? "oauth",
221
- rawModel: options.model,
222
- rawReasoningEffort: options.reasoningEffort,
223
- rawSize: options.size ?? "1024x1024",
224
- rawWebSearchEnabled: webSearchEnabled,
225
- searchMode: webSearchEnabled ? "on" : "off",
226
- });
227
- if (providerOptions.error) {
228
- const err = new Error(providerOptions.error);
229
- err.code = providerOptions.code;
230
- err.status = providerOptions.status;
231
- throw err;
232
- }
233
- const activeProvider = providerOptions.provider;
234
- const effectiveModel = activeProvider === "grok" && options.quality === "high"
235
- ? "grok-imagine-image-quality"
236
- : providerOptions.model;
237
- const response = activeProvider === "grok"
238
- ? await generateViaGrok(`${manifest}\n\nUser request:\n${prompt}`, ctx, {
239
- model: effectiveModel,
240
- size: providerOptions.size,
241
- requestId,
242
- signal: options.signal ?? undefined,
243
- references: await loadAgentCurrentImageReferences(ctx, sessionId),
244
- })
245
- : await generateViaResponses(activeProvider, `${manifest}\n\nUser request:\n${prompt}`, options.quality ?? "medium", providerOptions.size, options.moderation ?? "low", [], requestId, "auto", ctx, {
246
- model: providerOptions.model,
247
- reasoningEffort: providerOptions.reasoningEffort,
248
- webSearchEnabled,
249
- signal: options.signal,
250
- });
251
- const format = activeProvider === "grok"
252
- ? imageFormatFromMime(("mime" in response ? response.mime : undefined) || detectImageMimeFromB64(response.b64) || "image/jpeg")
253
- : options.format ?? "png";
254
- const image = await persistAgentImage(ctx, sessionId, prompt, format, requestId, response, {
255
- provider: String(activeProvider),
256
- model: String(effectiveModel),
257
- });
258
- const responseText = "text" in response && typeof response.text === "string" ? response.text : null;
259
- return { image, webSearchCalls: response.webSearchCalls || 0, text: responseText, provider: activeProvider };
260
- }
261
- async function loadAgentCurrentImageReferences(ctx, sessionId) {
262
- const session = getAgentSession(sessionId);
263
- const currentImage = session?.lastImageId
264
- ? getAgentImages(sessionId).find((image) => image.id === session.lastImageId)
265
- : null;
266
- if (!currentImage?.filename)
267
- return [];
268
- try {
269
- const b64 = (await readFile(join(ctx.config.storage.generatedDir, currentImage.filename))).toString("base64");
270
- const mime = detectImageMimeFromB64(b64);
271
- return [{ b64, declaredMime: mime, detectedMime: mime }];
272
- }
273
- catch (error) {
274
- const err = errInfo(error);
275
- logEvent("agent", "grok_ref_missing", { sessionId, filename: currentImage.filename, code: err.code, message: err.message });
276
- return [];
277
- }
278
- }
279
- function imageFormatFromMime(mime) {
280
- if (mime === "image/jpeg")
281
- return "jpeg";
282
- if (mime === "image/webp")
283
- return "webp";
284
- return "png";
285
- }
286
- async function persistAgentImage(ctx, sessionId, prompt, format, requestId, response, generation) {
287
- await mkdir(ctx.config.storage.generatedDir, { recursive: true });
288
- const rand = randomBytes(ctx.config.ids.generatedHexBytes).toString("hex");
289
- const filename = `${Date.now()}_${rand}_agent.${format}`;
290
- const meta = {
291
- kind: "agent",
292
- requestId,
293
- sessionId,
294
- prompt,
295
- userPrompt: prompt,
296
- revisedPrompt: response.revisedPrompt ?? null,
297
- provider: generation.provider,
298
- model: generation.model,
299
- createdAt: Date.now(),
300
- usage: response.usage ?? null,
301
- webSearchCalls: response.webSearchCalls ?? 0,
302
- };
303
- const embedded = await embedImageMetadataBestEffort(Buffer.from(response.b64, "base64"), format, meta, {
304
- version: ctx.packageVersion,
305
- });
306
- const filePath = join(ctx.config.storage.generatedDir, filename);
307
- await writeFile(filePath, embedded.buffer);
308
- try {
309
- await atomicWriteJson(`${filePath}.json`, meta);
310
- }
311
- catch (err) {
312
- await unlink(filePath).catch(() => { });
313
- throw err;
314
- }
315
- invalidateHistoryIndex();
316
- logEvent("agent", "saved", { requestId, sessionId, filename });
317
- return importAgentImage(sessionId, {
318
- id: `ai_${ulid()}`,
319
- filename,
320
- url: `/generated/${filename}`,
321
- prompt,
322
- revisedPrompt: response.revisedPrompt ?? null,
323
- createdAt: Date.now(),
324
- });
325
- }
326
- export async function runAgentVideoGeneration(ctx, sessionId, prompt, options = {}) {
327
- const session = getAgentSession(sessionId);
328
- if (!session)
329
- throw notFound(sessionId);
330
- if (!options.skipUserTurn) {
331
- appendAgentTurn({ sessionId, role: "user", text: prompt, status: "complete" });
332
- }
333
- const requestId = options.requestId ?? `agent_video_${ulid()}`;
334
- const startedAt = Date.now();
335
- // Auto I2V: if session has a last image, use it as source
336
- let sourceImage;
337
- let mode = "text-to-video";
338
- if (session.lastImageId) {
339
- const images = getAgentImages(sessionId);
340
- const lastImage = images.find((img) => img.id === session.lastImageId);
341
- if (lastImage?.filename && !lastImage.filename.endsWith(".mp4")) {
342
- try {
343
- const { loadAssetB64 } = await import("./nodeStore.js");
344
- sourceImage = await loadAssetB64(ctx.rootDir, lastImage.filename, ctx.config.storage.generatedDir);
345
- mode = "image-to-video";
346
- }
347
- catch { /* fallback to T2V */ }
348
- }
349
- }
350
- const videoParams = parseVideoParams(prompt);
351
- const result = await generateVideoViaGrok(prompt, ctx, {
352
- model: "grok-imagine-video",
353
- mode,
354
- sourceImage,
355
- duration: videoParams.duration ?? 5,
356
- resolution: videoParams.resolution ?? "480p",
357
- aspectRatio: (videoParams.aspectRatio ?? "auto"),
358
- requestId,
359
- signal: options.signal ?? undefined,
360
- });
361
- const video = await persistAgentVideo(ctx, sessionId, prompt, requestId, result);
362
- const finishedAt = Date.now();
363
- const toolCall = {
364
- id: `tc_video_${ulid()}`,
365
- name: "ima2.generate_video",
366
- status: "complete",
367
- startedAt,
368
- finishedAt,
369
- durationMs: finishedAt - startedAt,
370
- requestId,
371
- inputSummary: prompt,
372
- outputSummary: `Generated video ${video.filename}.`,
373
- imageIds: [video.id],
374
- };
375
- appendAgentTurn({
376
- sessionId,
377
- role: "tool",
378
- text: "ima2.generate_video",
379
- imageIds: [video.id],
380
- status: "complete",
381
- raw: { toolCalls: [toolCall] },
382
- });
383
- const assistantTurn = appendAgentTurn({
384
- sessionId,
385
- role: "assistant",
386
- text: `Generated 1 video artifact. ${result.revisedPrompt}`,
387
- imageIds: [video.id],
388
- status: "complete",
389
- });
390
- return { assistantTurn, imageIds: [video.id], webFindingIds: [] };
391
- }
392
- async function persistAgentVideo(ctx, sessionId, prompt, requestId, result) {
393
- await mkdir(ctx.config.storage.generatedDir, { recursive: true });
394
- const rand = randomBytes(ctx.config.ids.generatedHexBytes).toString("hex");
395
- const filename = `${Date.now()}_${rand}_agent.mp4`;
396
- const meta = {
397
- kind: "agent",
398
- mediaType: "video",
399
- requestId,
400
- sessionId,
401
- prompt,
402
- userPrompt: prompt,
403
- revisedPrompt: result.revisedPrompt,
404
- provider: "grok",
405
- model: "grok-imagine-video",
406
- createdAt: Date.now(),
407
- usage: result.usage,
408
- webSearchCalls: result.webSearchCalls,
409
- };
410
- const filePath = join(ctx.config.storage.generatedDir, filename);
411
- await writeFile(filePath, result.videoBuffer);
412
- try {
413
- await atomicWriteJson(`${filePath}.json`, meta);
414
- }
415
- catch (err) {
416
- await unlink(filePath).catch(() => { });
417
- throw err;
418
- }
419
- invalidateHistoryIndex();
420
- logEvent("agent", "video_saved", { requestId, sessionId, filename });
421
- return importAgentImage(sessionId, {
422
- id: `ai_${ulid()}`,
423
- filename,
424
- url: `/generated/${filename}`,
425
- prompt,
426
- revisedPrompt: result.revisedPrompt,
427
- createdAt: Date.now(),
428
- });
429
- }
430
181
  function recordSearchFindings(sessionId, prompt, count, provider) {
431
182
  if (!count)
432
183
  return [];
433
- const isGrok = provider === "grok";
184
+ const providerLabel = provider === "grok" ? "Grok" : provider === "agy" ? "Gemini" : "Responses";
434
185
  return [
435
186
  recordAgentWebFinding({
436
187
  sessionId,
437
188
  query: prompt,
438
- title: isGrok ? "Grok visual research" : "Responses web_search",
439
- snippet: `${isGrok ? "Grok" : "Responses"} reported ${count} web search call${count === 1 ? "" : "s"}.`,
189
+ title: `${providerLabel} visual research`,
190
+ snippet: `${providerLabel} reported ${count} web search call${count === 1 ? "" : "s"}.`,
440
191
  }),
441
192
  ];
442
193
  }
443
- function forceImagePrompt(prompt) {
194
+ export function forceImagePrompt(prompt) {
444
195
  return [
445
196
  "The previous turn did not return an image artifact.",
446
197
  "Return a final image using ima2.generate_image/image_generation now.",
447
198
  `User request: ${prompt}`,
448
199
  ].join("\n");
449
200
  }
450
- function isTextOnlyResult(error) {
201
+ export function isTextOnlyResult(error) {
451
202
  const err = errInfo(error);
452
203
  return [
453
204
  "EMPTY_RESPONSE",
@@ -456,7 +207,7 @@ function isTextOnlyResult(error) {
456
207
  "IMAGE_TOOL_COMPLETED_WITHOUT_RESULT",
457
208
  ].includes(err.code || "") || err.message.includes("No image data");
458
209
  }
459
- function textOnlyError(cause) {
210
+ export function textOnlyError(cause) {
460
211
  const err = new Error("Agent result did not include an image artifact.");
461
212
  err.code = "AGENT_TEXT_ONLY_RESULT";
462
213
  err.status = 422;
@@ -482,7 +233,7 @@ function cleanParallelism(value) {
482
233
  return 2;
483
234
  return Math.max(1, Math.min(8, Math.round(numeric)));
484
235
  }
485
- function notFound(sessionId) {
236
+ export function notFound(sessionId) {
486
237
  const err = new Error(`Agent session not found: ${sessionId}`);
487
238
  err.code = "AGENT_SESSION_NOT_FOUND";
488
239
  err.status = 404;
@@ -1,4 +1,4 @@
1
- const PROVIDERS = new Set(["oauth", "api", "grok"]);
1
+ const PROVIDERS = new Set(["oauth", "api", "grok", "grok-api", "agy", "gemini-api"]);
2
2
  const QUALITIES = new Set(["low", "medium", "high"]);
3
3
  const FORMATS = new Set(["png", "jpeg", "webp"]);
4
4
  const MODERATIONS = new Set(["auto", "low"]);