ima2-gen 2.0.0 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +150 -0
- package/README.md +12 -12
- package/bin/commands/backfillThumbs.js +24 -0
- package/bin/commands/edit.js +7 -6
- package/bin/commands/gen.js +13 -6
- package/bin/commands/multimode.js +5 -4
- package/bin/commands/node.js +4 -4
- package/bin/ima2.js +21 -11
- package/bin/lib/config-store.js +1 -1
- package/docs/API.md +184 -10
- package/docs/CLI.md +11 -4
- package/docs/FAQ.ko.md +16 -0
- package/docs/FAQ.md +30 -0
- package/docs/PROMPT_STUDIO.md +3 -1
- package/docs/README.ko.md +7 -3
- package/docs/migration/runtime-test-inventory.md +17 -1
- package/lib/agentImageVideoGen.js +261 -0
- package/lib/agentRuntime.js +11 -260
- package/lib/agentSettings.js +1 -1
- package/lib/agyImageAdapter.js +259 -0
- package/lib/capabilities.js +2 -1
- package/lib/configKeys.js +1 -1
- package/lib/errorClassify.js +8 -7
- package/lib/eventBus.js +71 -0
- package/lib/geminiApiImageAdapter.js +179 -0
- package/lib/generationErrors.js +3 -1
- package/lib/grokImageAdapter.js +74 -128
- package/lib/grokImageCore.js +153 -0
- package/lib/grokMultimodeAdapter.js +7 -4
- package/lib/grokRuntime.js +3 -0
- package/lib/grokSizeMapper.js +13 -1
- package/lib/grokVideoAdapter.js +14 -7
- package/lib/grokVideoCanvas.js +13 -0
- package/lib/grokVideoPlannerPrompt.js +53 -6
- package/lib/historyList.js +19 -2
- package/lib/imageModels.js +15 -0
- package/lib/imageThumb.js +38 -0
- package/lib/inflight.js +54 -17
- package/lib/multimodeHelpers.js +10 -0
- package/lib/nodeHelpers.js +59 -0
- package/lib/oauthProxy/prompts.js +30 -36
- package/lib/promptBuilder/systemPrompt.js +2 -5
- package/lib/promptSafetyPolicy.js +1 -5
- package/lib/providerOptions.js +36 -1
- package/lib/responsesFallback.js +53 -44
- package/lib/routeHelpers.js +44 -0
- package/lib/runtimeContext.js +27 -0
- package/lib/ssePublish.js +12 -0
- package/lib/storageMigration.js +1 -1
- package/lib/storyboardPrefix.js +28 -0
- package/lib/thumbBackfill.js +70 -0
- package/lib/vertexAuth.js +44 -0
- package/lib/videoThumb.js +60 -0
- package/package.json +7 -2
- package/routes/agy.js +44 -0
- package/routes/auth.js +242 -0
- package/routes/edit.js +48 -8
- package/routes/events.js +78 -0
- package/routes/generate.js +135 -135
- package/routes/history.js +13 -0
- package/routes/index.js +8 -0
- package/routes/keys.js +254 -0
- package/routes/multimode.js +138 -62
- package/routes/nodes.js +107 -129
- package/routes/quota.js +58 -7
- package/routes/video.js +107 -20
- package/server.js +123 -0
- package/skills/ima2/SKILL.md +98 -21
- package/ui/dist/.vite/manifest.json +12 -12
- package/ui/dist/assets/AgentWorkspace-Dth6YijN.js +3 -0
- package/ui/dist/assets/{CardNewsWorkspace-BN-ga1lG.js → CardNewsWorkspace-Dav3K5CT.js} +2 -2
- package/ui/dist/assets/{NodeCanvas-BbMa4IhI.js → NodeCanvas-C4ifFzB1.js} +2 -2
- package/ui/dist/assets/{PromptBuilderPanel-DRwBJRDQ.js → PromptBuilderPanel-CEcyU9PL.js} +1 -1
- package/ui/dist/assets/{PromptImportDialog-Dp85kHCq.js → PromptImportDialog-CgQ94Gth.js} +2 -2
- package/ui/dist/assets/{PromptImportDiscoverySection-BE8Q8MLD.js → PromptImportDiscoverySection-CuzyzbNI.js} +1 -1
- package/ui/dist/assets/{PromptImportFolderSection-PtH5x0sc.js → PromptImportFolderSection-DHLGlO6l.js} +1 -1
- package/ui/dist/assets/{PromptLibraryPanel-FnM9tHI9.js → PromptLibraryPanel-BOe18we8.js} +2 -2
- package/ui/dist/assets/SettingsWorkspace-Cdgnm4Wa.js +1 -0
- package/ui/dist/assets/index-C5PSahkr.js +1 -0
- package/ui/dist/assets/index-Dn2AhL6d.css +1 -0
- package/ui/dist/assets/index-Tjqx6wUV.js +23 -0
- package/ui/dist/index.html +2 -2
- package/ui/dist/assets/AgentWorkspace-C21zqdTZ.js +0 -3
- package/ui/dist/assets/SettingsWorkspace-MARPGyBL.js +0 -1
- package/ui/dist/assets/index-BAFI6htx.js +0 -42
- package/ui/dist/assets/index-BSXxr_Bt.js +0 -1
- package/ui/dist/assets/index-DS-ADE7U.css +0 -1
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
import { randomBytes } from "node:crypto";
|
|
2
|
+
import { mkdir, readFile, unlink, writeFile } from "node:fs/promises";
|
|
3
|
+
import { atomicWriteJson } from "./atomicWrite.js";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
import { ulid } from "ulid";
|
|
6
|
+
import { embedImageMetadataBestEffort } from "./imageMetadataStore.js";
|
|
7
|
+
import { invalidateHistoryIndex } from "./historyIndex.js";
|
|
8
|
+
import { logEvent } from "./logger.js";
|
|
9
|
+
import { detectImageMimeFromB64 } from "./refs.js";
|
|
10
|
+
import { resolveProviderOptions } from "./providerOptions.js";
|
|
11
|
+
import { generateViaResponses } from "./responsesImageAdapter.js";
|
|
12
|
+
import { generateViaGrok } from "./grokImageAdapter.js";
|
|
13
|
+
import { generateViaAgy } from "./agyImageAdapter.js";
|
|
14
|
+
import { generateVideoViaGrok } from "./grokVideoAdapter.js";
|
|
15
|
+
import { parseVideoParams } from "./agentGenerationPlanner.js";
|
|
16
|
+
import { appendAgentTurn, getAgentImages, getAgentSession, importAgentImage, } from "./agentStore.js";
|
|
17
|
+
import { errInfo } from "./errInfo.js";
|
|
18
|
+
import { forceImagePrompt, isTextOnlyResult, textOnlyError, notFound } from "./agentRuntime.js";
|
|
19
|
+
export async function generateAgentImageWithRetry(ctx, sessionId, prompt, manifest, webSearchEnabled, options) {
|
|
20
|
+
let lastError = null;
|
|
21
|
+
for (let attempt = 0; attempt < 2; attempt++) {
|
|
22
|
+
try {
|
|
23
|
+
const forcedPrompt = attempt === 0 ? prompt : forceImagePrompt(prompt);
|
|
24
|
+
const result = await generateAgentImage(ctx, sessionId, forcedPrompt, manifest, webSearchEnabled, options);
|
|
25
|
+
if (result.image)
|
|
26
|
+
return result;
|
|
27
|
+
}
|
|
28
|
+
catch (error) {
|
|
29
|
+
lastError = error;
|
|
30
|
+
if (!isTextOnlyResult(error))
|
|
31
|
+
throw error;
|
|
32
|
+
if (attempt === 1)
|
|
33
|
+
break;
|
|
34
|
+
appendAgentTurn({
|
|
35
|
+
sessionId,
|
|
36
|
+
role: "tool",
|
|
37
|
+
text: "ima2.generate_image retry: text-only result rejected",
|
|
38
|
+
status: "error",
|
|
39
|
+
});
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
throw textOnlyError(lastError);
|
|
43
|
+
}
|
|
44
|
+
async function generateAgentImage(ctx, sessionId, prompt, manifest, webSearchEnabled, options) {
|
|
45
|
+
const requestId = options.requestId ?? `agent_${ulid()}`;
|
|
46
|
+
const providerOptions = resolveProviderOptions(ctx, {
|
|
47
|
+
provider: options.provider ?? "oauth",
|
|
48
|
+
rawModel: options.model,
|
|
49
|
+
rawReasoningEffort: options.reasoningEffort,
|
|
50
|
+
rawSize: options.size ?? "1024x1024",
|
|
51
|
+
rawWebSearchEnabled: webSearchEnabled,
|
|
52
|
+
searchMode: webSearchEnabled ? "on" : "off",
|
|
53
|
+
});
|
|
54
|
+
if (providerOptions.error) {
|
|
55
|
+
const err = new Error(providerOptions.error);
|
|
56
|
+
err.code = providerOptions.code;
|
|
57
|
+
err.status = providerOptions.status;
|
|
58
|
+
throw err;
|
|
59
|
+
}
|
|
60
|
+
const activeProvider = providerOptions.provider;
|
|
61
|
+
const effectiveModel = activeProvider === "grok" && options.quality === "high"
|
|
62
|
+
? "grok-imagine-image-quality"
|
|
63
|
+
: providerOptions.model;
|
|
64
|
+
const response = activeProvider === "agy"
|
|
65
|
+
? await generateViaAgy(`${manifest}\n\nUser request:\n${prompt}`, {
|
|
66
|
+
requestId,
|
|
67
|
+
signal: options.signal ?? undefined,
|
|
68
|
+
})
|
|
69
|
+
: activeProvider === "grok"
|
|
70
|
+
? await generateViaGrok(`${manifest}\n\nUser request:\n${prompt}`, ctx, {
|
|
71
|
+
model: effectiveModel,
|
|
72
|
+
size: providerOptions.size,
|
|
73
|
+
requestId,
|
|
74
|
+
signal: options.signal ?? undefined,
|
|
75
|
+
references: await loadAgentCurrentImageReferences(ctx, sessionId),
|
|
76
|
+
})
|
|
77
|
+
: await generateViaResponses(activeProvider, `${manifest}\n\nUser request:\n${prompt}`, options.quality ?? "medium", providerOptions.size, options.moderation ?? "low", [], requestId, "auto", ctx, {
|
|
78
|
+
model: providerOptions.model,
|
|
79
|
+
reasoningEffort: providerOptions.reasoningEffort,
|
|
80
|
+
webSearchEnabled,
|
|
81
|
+
signal: options.signal,
|
|
82
|
+
});
|
|
83
|
+
const format = activeProvider === "grok" || activeProvider === "agy"
|
|
84
|
+
? imageFormatFromMime(("mime" in response ? response.mime : undefined) || detectImageMimeFromB64(response.b64) || "image/jpeg")
|
|
85
|
+
: options.format ?? "png";
|
|
86
|
+
const image = await persistAgentImage(ctx, sessionId, prompt, format, requestId, response, {
|
|
87
|
+
provider: String(activeProvider),
|
|
88
|
+
model: String(effectiveModel),
|
|
89
|
+
});
|
|
90
|
+
const responseText = "text" in response && typeof response.text === "string" ? response.text : null;
|
|
91
|
+
return { image, webSearchCalls: response.webSearchCalls || 0, text: responseText, provider: activeProvider };
|
|
92
|
+
}
|
|
93
|
+
async function loadAgentCurrentImageReferences(ctx, sessionId) {
|
|
94
|
+
const session = getAgentSession(sessionId);
|
|
95
|
+
const currentImage = session?.lastImageId
|
|
96
|
+
? getAgentImages(sessionId).find((image) => image.id === session.lastImageId)
|
|
97
|
+
: null;
|
|
98
|
+
if (!currentImage?.filename)
|
|
99
|
+
return [];
|
|
100
|
+
try {
|
|
101
|
+
const b64 = (await readFile(join(ctx.config.storage.generatedDir, currentImage.filename))).toString("base64");
|
|
102
|
+
const mime = detectImageMimeFromB64(b64);
|
|
103
|
+
return [{ b64, declaredMime: mime, detectedMime: mime }];
|
|
104
|
+
}
|
|
105
|
+
catch (error) {
|
|
106
|
+
const err = errInfo(error);
|
|
107
|
+
logEvent("agent", "grok_ref_missing", { sessionId, filename: currentImage.filename, code: err.code, message: err.message });
|
|
108
|
+
return [];
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
function imageFormatFromMime(mime) {
|
|
112
|
+
if (mime === "image/jpeg")
|
|
113
|
+
return "jpeg";
|
|
114
|
+
if (mime === "image/webp")
|
|
115
|
+
return "webp";
|
|
116
|
+
return "png";
|
|
117
|
+
}
|
|
118
|
+
async function persistAgentImage(ctx, sessionId, prompt, format, requestId, response, generation) {
|
|
119
|
+
await mkdir(ctx.config.storage.generatedDir, { recursive: true });
|
|
120
|
+
const rand = randomBytes(ctx.config.ids.generatedHexBytes).toString("hex");
|
|
121
|
+
const filename = `${Date.now()}_${rand}_agent.${format}`;
|
|
122
|
+
const meta = {
|
|
123
|
+
kind: "agent",
|
|
124
|
+
requestId,
|
|
125
|
+
sessionId,
|
|
126
|
+
prompt,
|
|
127
|
+
userPrompt: prompt,
|
|
128
|
+
revisedPrompt: response.revisedPrompt ?? null,
|
|
129
|
+
provider: generation.provider,
|
|
130
|
+
model: generation.model,
|
|
131
|
+
createdAt: Date.now(),
|
|
132
|
+
usage: response.usage ?? null,
|
|
133
|
+
webSearchCalls: response.webSearchCalls ?? 0,
|
|
134
|
+
};
|
|
135
|
+
const embedded = await embedImageMetadataBestEffort(Buffer.from(response.b64, "base64"), format, meta, {
|
|
136
|
+
version: ctx.packageVersion,
|
|
137
|
+
});
|
|
138
|
+
const filePath = join(ctx.config.storage.generatedDir, filename);
|
|
139
|
+
await writeFile(filePath, embedded.buffer);
|
|
140
|
+
try {
|
|
141
|
+
await atomicWriteJson(`${filePath}.json`, meta);
|
|
142
|
+
}
|
|
143
|
+
catch (err) {
|
|
144
|
+
await unlink(filePath).catch(() => { });
|
|
145
|
+
throw err;
|
|
146
|
+
}
|
|
147
|
+
invalidateHistoryIndex();
|
|
148
|
+
logEvent("agent", "saved", { requestId, sessionId, filename });
|
|
149
|
+
return importAgentImage(sessionId, {
|
|
150
|
+
id: `ai_${ulid()}`,
|
|
151
|
+
filename,
|
|
152
|
+
url: `/generated/${filename}`,
|
|
153
|
+
prompt,
|
|
154
|
+
revisedPrompt: response.revisedPrompt ?? null,
|
|
155
|
+
createdAt: Date.now(),
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
export async function runAgentVideoGeneration(ctx, sessionId, prompt, options = {}) {
|
|
159
|
+
const session = getAgentSession(sessionId);
|
|
160
|
+
if (!session)
|
|
161
|
+
throw notFound(sessionId);
|
|
162
|
+
if (!options.skipUserTurn) {
|
|
163
|
+
appendAgentTurn({ sessionId, role: "user", text: prompt, status: "complete" });
|
|
164
|
+
}
|
|
165
|
+
const requestId = options.requestId ?? `agent_video_${ulid()}`;
|
|
166
|
+
const startedAt = Date.now();
|
|
167
|
+
// Auto I2V: if session has a last image, use it as source
|
|
168
|
+
let sourceImage;
|
|
169
|
+
let mode = "text-to-video";
|
|
170
|
+
if (session.lastImageId) {
|
|
171
|
+
const images = getAgentImages(sessionId);
|
|
172
|
+
const lastImage = images.find((img) => img.id === session.lastImageId);
|
|
173
|
+
if (lastImage?.filename && !lastImage.filename.endsWith(".mp4")) {
|
|
174
|
+
try {
|
|
175
|
+
const { loadAssetB64 } = await import("./nodeStore.js");
|
|
176
|
+
sourceImage = await loadAssetB64(ctx.rootDir, lastImage.filename, ctx.config.storage.generatedDir);
|
|
177
|
+
mode = "image-to-video";
|
|
178
|
+
}
|
|
179
|
+
catch { /* fallback to T2V */ }
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
const videoParams = parseVideoParams(prompt);
|
|
183
|
+
const result = await generateVideoViaGrok(prompt, ctx, {
|
|
184
|
+
model: "grok-imagine-video",
|
|
185
|
+
mode,
|
|
186
|
+
sourceImage,
|
|
187
|
+
duration: videoParams.duration ?? 5,
|
|
188
|
+
resolution: videoParams.resolution ?? "480p",
|
|
189
|
+
aspectRatio: (videoParams.aspectRatio ?? "auto"),
|
|
190
|
+
requestId,
|
|
191
|
+
signal: options.signal ?? undefined,
|
|
192
|
+
});
|
|
193
|
+
const video = await persistAgentVideo(ctx, sessionId, prompt, requestId, result);
|
|
194
|
+
const finishedAt = Date.now();
|
|
195
|
+
const toolCall = {
|
|
196
|
+
id: `tc_video_${ulid()}`,
|
|
197
|
+
name: "ima2.generate_video",
|
|
198
|
+
status: "complete",
|
|
199
|
+
startedAt,
|
|
200
|
+
finishedAt,
|
|
201
|
+
durationMs: finishedAt - startedAt,
|
|
202
|
+
requestId,
|
|
203
|
+
inputSummary: prompt,
|
|
204
|
+
outputSummary: `Generated video ${video.filename}.`,
|
|
205
|
+
imageIds: [video.id],
|
|
206
|
+
};
|
|
207
|
+
appendAgentTurn({
|
|
208
|
+
sessionId,
|
|
209
|
+
role: "tool",
|
|
210
|
+
text: "ima2.generate_video",
|
|
211
|
+
imageIds: [video.id],
|
|
212
|
+
status: "complete",
|
|
213
|
+
raw: { toolCalls: [toolCall] },
|
|
214
|
+
});
|
|
215
|
+
const assistantTurn = appendAgentTurn({
|
|
216
|
+
sessionId,
|
|
217
|
+
role: "assistant",
|
|
218
|
+
text: `Generated 1 video artifact. ${result.revisedPrompt}`,
|
|
219
|
+
imageIds: [video.id],
|
|
220
|
+
status: "complete",
|
|
221
|
+
});
|
|
222
|
+
return { assistantTurn, imageIds: [video.id], webFindingIds: [] };
|
|
223
|
+
}
|
|
224
|
+
async function persistAgentVideo(ctx, sessionId, prompt, requestId, result) {
|
|
225
|
+
await mkdir(ctx.config.storage.generatedDir, { recursive: true });
|
|
226
|
+
const rand = randomBytes(ctx.config.ids.generatedHexBytes).toString("hex");
|
|
227
|
+
const filename = `${Date.now()}_${rand}_agent.mp4`;
|
|
228
|
+
const meta = {
|
|
229
|
+
kind: "agent",
|
|
230
|
+
mediaType: "video",
|
|
231
|
+
requestId,
|
|
232
|
+
sessionId,
|
|
233
|
+
prompt,
|
|
234
|
+
userPrompt: prompt,
|
|
235
|
+
revisedPrompt: result.revisedPrompt,
|
|
236
|
+
provider: "grok",
|
|
237
|
+
model: "grok-imagine-video",
|
|
238
|
+
createdAt: Date.now(),
|
|
239
|
+
usage: result.usage,
|
|
240
|
+
webSearchCalls: result.webSearchCalls,
|
|
241
|
+
};
|
|
242
|
+
const filePath = join(ctx.config.storage.generatedDir, filename);
|
|
243
|
+
await writeFile(filePath, result.videoBuffer);
|
|
244
|
+
try {
|
|
245
|
+
await atomicWriteJson(`${filePath}.json`, meta);
|
|
246
|
+
}
|
|
247
|
+
catch (err) {
|
|
248
|
+
await unlink(filePath).catch(() => { });
|
|
249
|
+
throw err;
|
|
250
|
+
}
|
|
251
|
+
invalidateHistoryIndex();
|
|
252
|
+
logEvent("agent", "video_saved", { requestId, sessionId, filename });
|
|
253
|
+
return importAgentImage(sessionId, {
|
|
254
|
+
id: `ai_${ulid()}`,
|
|
255
|
+
filename,
|
|
256
|
+
url: `/generated/${filename}`,
|
|
257
|
+
prompt,
|
|
258
|
+
revisedPrompt: result.revisedPrompt,
|
|
259
|
+
createdAt: Date.now(),
|
|
260
|
+
});
|
|
261
|
+
}
|
package/lib/agentRuntime.js
CHANGED
|
@@ -1,18 +1,7 @@
|
|
|
1
|
-
import { randomBytes } from "node:crypto";
|
|
2
|
-
import { mkdir, readFile, unlink, writeFile } from "node:fs/promises";
|
|
3
|
-
import { atomicWriteJson } from "./atomicWrite.js";
|
|
4
|
-
import { join } from "node:path";
|
|
5
1
|
import { ulid } from "ulid";
|
|
6
|
-
import {
|
|
7
|
-
import {
|
|
8
|
-
import {
|
|
9
|
-
import { detectImageMimeFromB64 } from "./refs.js";
|
|
10
|
-
import { resolveProviderOptions } from "./providerOptions.js";
|
|
11
|
-
import { generateViaResponses } from "./responsesImageAdapter.js";
|
|
12
|
-
import { generateViaGrok } from "./grokImageAdapter.js";
|
|
13
|
-
import { generateVideoViaGrok } from "./grokVideoAdapter.js";
|
|
14
|
-
import { parseVideoParams } from "./agentGenerationPlanner.js";
|
|
15
|
-
import { appendAgentTurn, buildImageContextManifest, getAgentImages, getAgentSession, importAgentImage, recordAgentWebFinding, restartAgentRuntimeSession, } from "./agentStore.js";
|
|
2
|
+
import { generateAgentImageWithRetry } from "./agentImageVideoGen.js";
|
|
3
|
+
import { runAgentVideoGeneration } from "./agentImageVideoGen.js";
|
|
4
|
+
import { appendAgentTurn, buildImageContextManifest, getAgentSession, recordAgentWebFinding, restartAgentRuntimeSession, } from "./agentStore.js";
|
|
16
5
|
import { AGENT_ALLOWED_TOOLS, } from "./agentTypes.js";
|
|
17
6
|
import { errInfo } from "./errInfo.js";
|
|
18
7
|
export function assertAgentAllowedTools(tools) {
|
|
@@ -46,7 +35,7 @@ export async function runAgentGenerationPlan(ctx, sessionId, prompt, plan, optio
|
|
|
46
35
|
const session = getAgentSession(sessionId);
|
|
47
36
|
if (!session)
|
|
48
37
|
throw notFound(sessionId);
|
|
49
|
-
const webSearchEnabled = options.provider === "grok" ? true : options.webSearchEnabled ?? session.webSearchEnabled;
|
|
38
|
+
const webSearchEnabled = options.provider === "agy" ? false : options.provider === "grok" ? true : options.webSearchEnabled ?? session.webSearchEnabled;
|
|
50
39
|
const enabledTools = webSearchEnabled
|
|
51
40
|
? [...AGENT_ALLOWED_TOOLS]
|
|
52
41
|
: ["ima2.get_image_context", "ima2.generate_image", "ima2.generate_video"];
|
|
@@ -189,265 +178,27 @@ export function isRuntimeRestartableError(error) {
|
|
|
189
178
|
code.includes("PROTOCOL") ||
|
|
190
179
|
err.message.toLowerCase().includes("protocol wedge"));
|
|
191
180
|
}
|
|
192
|
-
async function generateAgentImageWithRetry(ctx, sessionId, prompt, manifest, webSearchEnabled, options) {
|
|
193
|
-
let lastError = null;
|
|
194
|
-
for (let attempt = 0; attempt < 2; attempt++) {
|
|
195
|
-
try {
|
|
196
|
-
const forcedPrompt = attempt === 0 ? prompt : forceImagePrompt(prompt);
|
|
197
|
-
const result = await generateAgentImage(ctx, sessionId, forcedPrompt, manifest, webSearchEnabled, options);
|
|
198
|
-
if (result.image)
|
|
199
|
-
return result;
|
|
200
|
-
}
|
|
201
|
-
catch (error) {
|
|
202
|
-
lastError = error;
|
|
203
|
-
if (!isTextOnlyResult(error))
|
|
204
|
-
throw error;
|
|
205
|
-
if (attempt === 1)
|
|
206
|
-
break;
|
|
207
|
-
appendAgentTurn({
|
|
208
|
-
sessionId,
|
|
209
|
-
role: "tool",
|
|
210
|
-
text: "ima2.generate_image retry: text-only result rejected",
|
|
211
|
-
status: "error",
|
|
212
|
-
});
|
|
213
|
-
}
|
|
214
|
-
}
|
|
215
|
-
throw textOnlyError(lastError);
|
|
216
|
-
}
|
|
217
|
-
async function generateAgentImage(ctx, sessionId, prompt, manifest, webSearchEnabled, options) {
|
|
218
|
-
const requestId = options.requestId ?? `agent_${ulid()}`;
|
|
219
|
-
const providerOptions = resolveProviderOptions(ctx, {
|
|
220
|
-
provider: options.provider ?? "oauth",
|
|
221
|
-
rawModel: options.model,
|
|
222
|
-
rawReasoningEffort: options.reasoningEffort,
|
|
223
|
-
rawSize: options.size ?? "1024x1024",
|
|
224
|
-
rawWebSearchEnabled: webSearchEnabled,
|
|
225
|
-
searchMode: webSearchEnabled ? "on" : "off",
|
|
226
|
-
});
|
|
227
|
-
if (providerOptions.error) {
|
|
228
|
-
const err = new Error(providerOptions.error);
|
|
229
|
-
err.code = providerOptions.code;
|
|
230
|
-
err.status = providerOptions.status;
|
|
231
|
-
throw err;
|
|
232
|
-
}
|
|
233
|
-
const activeProvider = providerOptions.provider;
|
|
234
|
-
const effectiveModel = activeProvider === "grok" && options.quality === "high"
|
|
235
|
-
? "grok-imagine-image-quality"
|
|
236
|
-
: providerOptions.model;
|
|
237
|
-
const response = activeProvider === "grok"
|
|
238
|
-
? await generateViaGrok(`${manifest}\n\nUser request:\n${prompt}`, ctx, {
|
|
239
|
-
model: effectiveModel,
|
|
240
|
-
size: providerOptions.size,
|
|
241
|
-
requestId,
|
|
242
|
-
signal: options.signal ?? undefined,
|
|
243
|
-
references: await loadAgentCurrentImageReferences(ctx, sessionId),
|
|
244
|
-
})
|
|
245
|
-
: await generateViaResponses(activeProvider, `${manifest}\n\nUser request:\n${prompt}`, options.quality ?? "medium", providerOptions.size, options.moderation ?? "low", [], requestId, "auto", ctx, {
|
|
246
|
-
model: providerOptions.model,
|
|
247
|
-
reasoningEffort: providerOptions.reasoningEffort,
|
|
248
|
-
webSearchEnabled,
|
|
249
|
-
signal: options.signal,
|
|
250
|
-
});
|
|
251
|
-
const format = activeProvider === "grok"
|
|
252
|
-
? imageFormatFromMime(("mime" in response ? response.mime : undefined) || detectImageMimeFromB64(response.b64) || "image/jpeg")
|
|
253
|
-
: options.format ?? "png";
|
|
254
|
-
const image = await persistAgentImage(ctx, sessionId, prompt, format, requestId, response, {
|
|
255
|
-
provider: String(activeProvider),
|
|
256
|
-
model: String(effectiveModel),
|
|
257
|
-
});
|
|
258
|
-
const responseText = "text" in response && typeof response.text === "string" ? response.text : null;
|
|
259
|
-
return { image, webSearchCalls: response.webSearchCalls || 0, text: responseText, provider: activeProvider };
|
|
260
|
-
}
|
|
261
|
-
async function loadAgentCurrentImageReferences(ctx, sessionId) {
|
|
262
|
-
const session = getAgentSession(sessionId);
|
|
263
|
-
const currentImage = session?.lastImageId
|
|
264
|
-
? getAgentImages(sessionId).find((image) => image.id === session.lastImageId)
|
|
265
|
-
: null;
|
|
266
|
-
if (!currentImage?.filename)
|
|
267
|
-
return [];
|
|
268
|
-
try {
|
|
269
|
-
const b64 = (await readFile(join(ctx.config.storage.generatedDir, currentImage.filename))).toString("base64");
|
|
270
|
-
const mime = detectImageMimeFromB64(b64);
|
|
271
|
-
return [{ b64, declaredMime: mime, detectedMime: mime }];
|
|
272
|
-
}
|
|
273
|
-
catch (error) {
|
|
274
|
-
const err = errInfo(error);
|
|
275
|
-
logEvent("agent", "grok_ref_missing", { sessionId, filename: currentImage.filename, code: err.code, message: err.message });
|
|
276
|
-
return [];
|
|
277
|
-
}
|
|
278
|
-
}
|
|
279
|
-
function imageFormatFromMime(mime) {
|
|
280
|
-
if (mime === "image/jpeg")
|
|
281
|
-
return "jpeg";
|
|
282
|
-
if (mime === "image/webp")
|
|
283
|
-
return "webp";
|
|
284
|
-
return "png";
|
|
285
|
-
}
|
|
286
|
-
async function persistAgentImage(ctx, sessionId, prompt, format, requestId, response, generation) {
|
|
287
|
-
await mkdir(ctx.config.storage.generatedDir, { recursive: true });
|
|
288
|
-
const rand = randomBytes(ctx.config.ids.generatedHexBytes).toString("hex");
|
|
289
|
-
const filename = `${Date.now()}_${rand}_agent.${format}`;
|
|
290
|
-
const meta = {
|
|
291
|
-
kind: "agent",
|
|
292
|
-
requestId,
|
|
293
|
-
sessionId,
|
|
294
|
-
prompt,
|
|
295
|
-
userPrompt: prompt,
|
|
296
|
-
revisedPrompt: response.revisedPrompt ?? null,
|
|
297
|
-
provider: generation.provider,
|
|
298
|
-
model: generation.model,
|
|
299
|
-
createdAt: Date.now(),
|
|
300
|
-
usage: response.usage ?? null,
|
|
301
|
-
webSearchCalls: response.webSearchCalls ?? 0,
|
|
302
|
-
};
|
|
303
|
-
const embedded = await embedImageMetadataBestEffort(Buffer.from(response.b64, "base64"), format, meta, {
|
|
304
|
-
version: ctx.packageVersion,
|
|
305
|
-
});
|
|
306
|
-
const filePath = join(ctx.config.storage.generatedDir, filename);
|
|
307
|
-
await writeFile(filePath, embedded.buffer);
|
|
308
|
-
try {
|
|
309
|
-
await atomicWriteJson(`${filePath}.json`, meta);
|
|
310
|
-
}
|
|
311
|
-
catch (err) {
|
|
312
|
-
await unlink(filePath).catch(() => { });
|
|
313
|
-
throw err;
|
|
314
|
-
}
|
|
315
|
-
invalidateHistoryIndex();
|
|
316
|
-
logEvent("agent", "saved", { requestId, sessionId, filename });
|
|
317
|
-
return importAgentImage(sessionId, {
|
|
318
|
-
id: `ai_${ulid()}`,
|
|
319
|
-
filename,
|
|
320
|
-
url: `/generated/${filename}`,
|
|
321
|
-
prompt,
|
|
322
|
-
revisedPrompt: response.revisedPrompt ?? null,
|
|
323
|
-
createdAt: Date.now(),
|
|
324
|
-
});
|
|
325
|
-
}
|
|
326
|
-
export async function runAgentVideoGeneration(ctx, sessionId, prompt, options = {}) {
|
|
327
|
-
const session = getAgentSession(sessionId);
|
|
328
|
-
if (!session)
|
|
329
|
-
throw notFound(sessionId);
|
|
330
|
-
if (!options.skipUserTurn) {
|
|
331
|
-
appendAgentTurn({ sessionId, role: "user", text: prompt, status: "complete" });
|
|
332
|
-
}
|
|
333
|
-
const requestId = options.requestId ?? `agent_video_${ulid()}`;
|
|
334
|
-
const startedAt = Date.now();
|
|
335
|
-
// Auto I2V: if session has a last image, use it as source
|
|
336
|
-
let sourceImage;
|
|
337
|
-
let mode = "text-to-video";
|
|
338
|
-
if (session.lastImageId) {
|
|
339
|
-
const images = getAgentImages(sessionId);
|
|
340
|
-
const lastImage = images.find((img) => img.id === session.lastImageId);
|
|
341
|
-
if (lastImage?.filename && !lastImage.filename.endsWith(".mp4")) {
|
|
342
|
-
try {
|
|
343
|
-
const { loadAssetB64 } = await import("./nodeStore.js");
|
|
344
|
-
sourceImage = await loadAssetB64(ctx.rootDir, lastImage.filename, ctx.config.storage.generatedDir);
|
|
345
|
-
mode = "image-to-video";
|
|
346
|
-
}
|
|
347
|
-
catch { /* fallback to T2V */ }
|
|
348
|
-
}
|
|
349
|
-
}
|
|
350
|
-
const videoParams = parseVideoParams(prompt);
|
|
351
|
-
const result = await generateVideoViaGrok(prompt, ctx, {
|
|
352
|
-
model: "grok-imagine-video",
|
|
353
|
-
mode,
|
|
354
|
-
sourceImage,
|
|
355
|
-
duration: videoParams.duration ?? 5,
|
|
356
|
-
resolution: videoParams.resolution ?? "480p",
|
|
357
|
-
aspectRatio: (videoParams.aspectRatio ?? "auto"),
|
|
358
|
-
requestId,
|
|
359
|
-
signal: options.signal ?? undefined,
|
|
360
|
-
});
|
|
361
|
-
const video = await persistAgentVideo(ctx, sessionId, prompt, requestId, result);
|
|
362
|
-
const finishedAt = Date.now();
|
|
363
|
-
const toolCall = {
|
|
364
|
-
id: `tc_video_${ulid()}`,
|
|
365
|
-
name: "ima2.generate_video",
|
|
366
|
-
status: "complete",
|
|
367
|
-
startedAt,
|
|
368
|
-
finishedAt,
|
|
369
|
-
durationMs: finishedAt - startedAt,
|
|
370
|
-
requestId,
|
|
371
|
-
inputSummary: prompt,
|
|
372
|
-
outputSummary: `Generated video ${video.filename}.`,
|
|
373
|
-
imageIds: [video.id],
|
|
374
|
-
};
|
|
375
|
-
appendAgentTurn({
|
|
376
|
-
sessionId,
|
|
377
|
-
role: "tool",
|
|
378
|
-
text: "ima2.generate_video",
|
|
379
|
-
imageIds: [video.id],
|
|
380
|
-
status: "complete",
|
|
381
|
-
raw: { toolCalls: [toolCall] },
|
|
382
|
-
});
|
|
383
|
-
const assistantTurn = appendAgentTurn({
|
|
384
|
-
sessionId,
|
|
385
|
-
role: "assistant",
|
|
386
|
-
text: `Generated 1 video artifact. ${result.revisedPrompt}`,
|
|
387
|
-
imageIds: [video.id],
|
|
388
|
-
status: "complete",
|
|
389
|
-
});
|
|
390
|
-
return { assistantTurn, imageIds: [video.id], webFindingIds: [] };
|
|
391
|
-
}
|
|
392
|
-
async function persistAgentVideo(ctx, sessionId, prompt, requestId, result) {
|
|
393
|
-
await mkdir(ctx.config.storage.generatedDir, { recursive: true });
|
|
394
|
-
const rand = randomBytes(ctx.config.ids.generatedHexBytes).toString("hex");
|
|
395
|
-
const filename = `${Date.now()}_${rand}_agent.mp4`;
|
|
396
|
-
const meta = {
|
|
397
|
-
kind: "agent",
|
|
398
|
-
mediaType: "video",
|
|
399
|
-
requestId,
|
|
400
|
-
sessionId,
|
|
401
|
-
prompt,
|
|
402
|
-
userPrompt: prompt,
|
|
403
|
-
revisedPrompt: result.revisedPrompt,
|
|
404
|
-
provider: "grok",
|
|
405
|
-
model: "grok-imagine-video",
|
|
406
|
-
createdAt: Date.now(),
|
|
407
|
-
usage: result.usage,
|
|
408
|
-
webSearchCalls: result.webSearchCalls,
|
|
409
|
-
};
|
|
410
|
-
const filePath = join(ctx.config.storage.generatedDir, filename);
|
|
411
|
-
await writeFile(filePath, result.videoBuffer);
|
|
412
|
-
try {
|
|
413
|
-
await atomicWriteJson(`${filePath}.json`, meta);
|
|
414
|
-
}
|
|
415
|
-
catch (err) {
|
|
416
|
-
await unlink(filePath).catch(() => { });
|
|
417
|
-
throw err;
|
|
418
|
-
}
|
|
419
|
-
invalidateHistoryIndex();
|
|
420
|
-
logEvent("agent", "video_saved", { requestId, sessionId, filename });
|
|
421
|
-
return importAgentImage(sessionId, {
|
|
422
|
-
id: `ai_${ulid()}`,
|
|
423
|
-
filename,
|
|
424
|
-
url: `/generated/${filename}`,
|
|
425
|
-
prompt,
|
|
426
|
-
revisedPrompt: result.revisedPrompt,
|
|
427
|
-
createdAt: Date.now(),
|
|
428
|
-
});
|
|
429
|
-
}
|
|
430
181
|
function recordSearchFindings(sessionId, prompt, count, provider) {
|
|
431
182
|
if (!count)
|
|
432
183
|
return [];
|
|
433
|
-
const
|
|
184
|
+
const providerLabel = provider === "grok" ? "Grok" : provider === "agy" ? "Gemini" : "Responses";
|
|
434
185
|
return [
|
|
435
186
|
recordAgentWebFinding({
|
|
436
187
|
sessionId,
|
|
437
188
|
query: prompt,
|
|
438
|
-
title:
|
|
439
|
-
snippet: `${
|
|
189
|
+
title: `${providerLabel} visual research`,
|
|
190
|
+
snippet: `${providerLabel} reported ${count} web search call${count === 1 ? "" : "s"}.`,
|
|
440
191
|
}),
|
|
441
192
|
];
|
|
442
193
|
}
|
|
443
|
-
function forceImagePrompt(prompt) {
|
|
194
|
+
export function forceImagePrompt(prompt) {
|
|
444
195
|
return [
|
|
445
196
|
"The previous turn did not return an image artifact.",
|
|
446
197
|
"Return a final image using ima2.generate_image/image_generation now.",
|
|
447
198
|
`User request: ${prompt}`,
|
|
448
199
|
].join("\n");
|
|
449
200
|
}
|
|
450
|
-
function isTextOnlyResult(error) {
|
|
201
|
+
export function isTextOnlyResult(error) {
|
|
451
202
|
const err = errInfo(error);
|
|
452
203
|
return [
|
|
453
204
|
"EMPTY_RESPONSE",
|
|
@@ -456,7 +207,7 @@ function isTextOnlyResult(error) {
|
|
|
456
207
|
"IMAGE_TOOL_COMPLETED_WITHOUT_RESULT",
|
|
457
208
|
].includes(err.code || "") || err.message.includes("No image data");
|
|
458
209
|
}
|
|
459
|
-
function textOnlyError(cause) {
|
|
210
|
+
export function textOnlyError(cause) {
|
|
460
211
|
const err = new Error("Agent result did not include an image artifact.");
|
|
461
212
|
err.code = "AGENT_TEXT_ONLY_RESULT";
|
|
462
213
|
err.status = 422;
|
|
@@ -482,7 +233,7 @@ function cleanParallelism(value) {
|
|
|
482
233
|
return 2;
|
|
483
234
|
return Math.max(1, Math.min(8, Math.round(numeric)));
|
|
484
235
|
}
|
|
485
|
-
function notFound(sessionId) {
|
|
236
|
+
export function notFound(sessionId) {
|
|
486
237
|
const err = new Error(`Agent session not found: ${sessionId}`);
|
|
487
238
|
err.code = "AGENT_SESSION_NOT_FOUND";
|
|
488
239
|
err.status = 404;
|
package/lib/agentSettings.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
const PROVIDERS = new Set(["oauth", "api", "grok"]);
|
|
1
|
+
const PROVIDERS = new Set(["oauth", "api", "grok", "grok-api", "agy", "gemini-api"]);
|
|
2
2
|
const QUALITIES = new Set(["low", "medium", "high"]);
|
|
3
3
|
const FORMATS = new Set(["png", "jpeg", "webp"]);
|
|
4
4
|
const MODERATIONS = new Set(["auto", "low"]);
|