ima2-gen 1.1.21 → 1.1.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. package/README.md +44 -7
  2. package/bin/commands/video.js +14 -0
  3. package/bin/ima2.js +14 -4
  4. package/bin/lib/platform.js +34 -5
  5. package/docs/README.ko.md +43 -2
  6. package/lib/agentQueueWorker.js +6 -0
  7. package/lib/agentRuntime.js +3 -2
  8. package/lib/atomicWrite.js +14 -0
  9. package/lib/grokImageAdapter.js +6 -0
  10. package/lib/grokProxyLauncher.js +5 -3
  11. package/lib/grokVideoAdapter.js +1 -1
  12. package/lib/grokVideoPlannerPrompt.js +10 -0
  13. package/lib/inflight.js +1 -1
  14. package/lib/oauthLauncher.js +5 -0
  15. package/lib/videoFrameExtract.js +3 -3
  16. package/package.json +5 -7
  17. package/routes/capabilities.js +13 -0
  18. package/routes/edit.js +2 -1
  19. package/routes/generate.js +32 -6
  20. package/routes/health.js +4 -3
  21. package/routes/multimode.js +2 -1
  22. package/routes/video.js +35 -3
  23. package/server.js +29 -2
  24. package/skills/ima2/SKILL.md +48 -6
  25. package/ui/dist/.vite/manifest.json +12 -12
  26. package/ui/dist/assets/{AgentWorkspace-B_hq9CLg.js → AgentWorkspace-C21zqdTZ.js} +1 -1
  27. package/ui/dist/assets/{CardNewsWorkspace-wD12J7qk.js → CardNewsWorkspace-BN-ga1lG.js} +1 -1
  28. package/ui/dist/assets/{NodeCanvas-CI_wuPMf.js → NodeCanvas-BbMa4IhI.js} +1 -1
  29. package/ui/dist/assets/{PromptBuilderPanel-CUTujJUV.js → PromptBuilderPanel-DRwBJRDQ.js} +1 -1
  30. package/ui/dist/assets/{PromptImportDialog-CUi66jPK.js → PromptImportDialog-Dp85kHCq.js} +2 -2
  31. package/ui/dist/assets/{PromptImportDiscoverySection-Cm3vrjY4.js → PromptImportDiscoverySection-BE8Q8MLD.js} +1 -1
  32. package/ui/dist/assets/{PromptImportFolderSection-DOtWTD9n.js → PromptImportFolderSection-PtH5x0sc.js} +1 -1
  33. package/ui/dist/assets/{PromptLibraryPanel-BMjQegRa.js → PromptLibraryPanel-FnM9tHI9.js} +2 -2
  34. package/ui/dist/assets/SettingsWorkspace-MARPGyBL.js +1 -0
  35. package/ui/dist/assets/index-BAFI6htx.js +42 -0
  36. package/ui/dist/assets/{index-31uVIdt4.js → index-BSXxr_Bt.js} +1 -1
  37. package/ui/dist/assets/index-DS-ADE7U.css +1 -0
  38. package/ui/dist/index.html +2 -2
  39. package/bin/commands/annotate.ts +0 -119
  40. package/bin/commands/cancel.ts +0 -48
  41. package/bin/commands/canvas-versions.ts +0 -80
  42. package/bin/commands/capabilities.ts +0 -110
  43. package/bin/commands/cardnews.ts +0 -249
  44. package/bin/commands/comfy.ts +0 -54
  45. package/bin/commands/config.ts +0 -186
  46. package/bin/commands/defaults.ts +0 -192
  47. package/bin/commands/doctor.ts +0 -202
  48. package/bin/commands/edit.ts +0 -150
  49. package/bin/commands/gen.ts +0 -214
  50. package/bin/commands/grok.ts +0 -90
  51. package/bin/commands/history.ts +0 -146
  52. package/bin/commands/ls.ts +0 -64
  53. package/bin/commands/metadata.ts +0 -39
  54. package/bin/commands/multimode.ts +0 -196
  55. package/bin/commands/node.ts +0 -166
  56. package/bin/commands/observability.ts +0 -176
  57. package/bin/commands/ping.ts +0 -31
  58. package/bin/commands/prompt-sub/build.ts +0 -101
  59. package/bin/commands/prompt.ts +0 -492
  60. package/bin/commands/ps.ts +0 -81
  61. package/bin/commands/session.ts +0 -266
  62. package/bin/commands/show.ts +0 -72
  63. package/bin/commands/skill.ts +0 -70
  64. package/bin/commands/video.ts +0 -442
  65. package/bin/ima2.ts +0 -430
  66. package/bin/lib/args.ts +0 -92
  67. package/bin/lib/browser-id.ts +0 -16
  68. package/bin/lib/client.ts +0 -122
  69. package/bin/lib/config-store.ts +0 -120
  70. package/bin/lib/destructive-confirm.ts +0 -19
  71. package/bin/lib/doctor-checks.ts +0 -91
  72. package/bin/lib/error-hints.ts +0 -23
  73. package/bin/lib/files.ts +0 -39
  74. package/bin/lib/output.ts +0 -73
  75. package/bin/lib/platform.ts +0 -99
  76. package/bin/lib/recover-output.ts +0 -139
  77. package/bin/lib/sse.ts +0 -73
  78. package/bin/lib/star-prompt.ts +0 -97
  79. package/bin/lib/storage-doctor.ts +0 -39
  80. package/bin/lib/ui-build.ts +0 -85
  81. package/config.ts +0 -354
  82. package/lib/agentCommandParser.ts +0 -69
  83. package/lib/agentGenerationPlanner.ts +0 -273
  84. package/lib/agentQuestionResponder.ts +0 -266
  85. package/lib/agentQueueStore.ts +0 -270
  86. package/lib/agentQueueWorker.ts +0 -89
  87. package/lib/agentRuntime.ts +0 -604
  88. package/lib/agentSettings.ts +0 -72
  89. package/lib/agentStore.ts +0 -422
  90. package/lib/agentStoreRows.ts +0 -136
  91. package/lib/agentTypes.ts +0 -154
  92. package/lib/apiCachePolicy.ts +0 -11
  93. package/lib/assetLifecycle.ts +0 -146
  94. package/lib/canvasVersionStore.ts +0 -223
  95. package/lib/capabilities.ts +0 -126
  96. package/lib/cardNewsGenerator.ts +0 -271
  97. package/lib/cardNewsJobStore.ts +0 -142
  98. package/lib/cardNewsManifestStore.ts +0 -154
  99. package/lib/cardNewsPlanner.ts +0 -236
  100. package/lib/cardNewsPlannerClient.ts +0 -155
  101. package/lib/cardNewsPlannerPrompt.ts +0 -62
  102. package/lib/cardNewsPlannerSchema.ts +0 -321
  103. package/lib/cardNewsRoleTemplateStore.ts +0 -47
  104. package/lib/cardNewsTemplateStore.ts +0 -252
  105. package/lib/codexDetect.ts +0 -71
  106. package/lib/comfyBridge.ts +0 -235
  107. package/lib/composerSnapshot.ts +0 -33
  108. package/lib/configKeys.ts +0 -62
  109. package/lib/db.ts +0 -295
  110. package/lib/errInfo.ts +0 -43
  111. package/lib/errorClassify.ts +0 -100
  112. package/lib/generationCancel.ts +0 -28
  113. package/lib/generationErrors.ts +0 -238
  114. package/lib/grokImageAdapter.ts +0 -513
  115. package/lib/grokMultimodeAdapter.ts +0 -84
  116. package/lib/grokProxyLauncher.ts +0 -153
  117. package/lib/grokRuntime.ts +0 -23
  118. package/lib/grokSizeMapper.ts +0 -71
  119. package/lib/grokVideoAdapter.ts +0 -458
  120. package/lib/grokVideoCanvas.ts +0 -26
  121. package/lib/grokVideoDownload.ts +0 -59
  122. package/lib/grokVideoPlannerPrompt.ts +0 -67
  123. package/lib/historyIndex.ts +0 -51
  124. package/lib/historyList.ts +0 -181
  125. package/lib/imageMetadata.ts +0 -113
  126. package/lib/imageMetadataStore.ts +0 -67
  127. package/lib/imageModels.ts +0 -165
  128. package/lib/inflight.ts +0 -281
  129. package/lib/localImportStore.ts +0 -114
  130. package/lib/logger.ts +0 -161
  131. package/lib/nodeStore.ts +0 -91
  132. package/lib/oauthLauncher.ts +0 -94
  133. package/lib/oauthNormalize.ts +0 -30
  134. package/lib/oauthProxy/errors.ts +0 -128
  135. package/lib/oauthProxy/generators.ts +0 -494
  136. package/lib/oauthProxy/index.ts +0 -28
  137. package/lib/oauthProxy/prompts.ts +0 -123
  138. package/lib/oauthProxy/references.ts +0 -45
  139. package/lib/oauthProxy/runtime.ts +0 -115
  140. package/lib/oauthProxy/streams.ts +0 -232
  141. package/lib/oauthProxy/types.ts +0 -9
  142. package/lib/oauthProxy.ts +0 -3
  143. package/lib/openDirectory.ts +0 -47
  144. package/lib/pngInfo.ts +0 -26
  145. package/lib/promptBuilder/attachments.ts +0 -74
  146. package/lib/promptBuilder/client.ts +0 -130
  147. package/lib/promptBuilder/constants.ts +0 -9
  148. package/lib/promptBuilder/context.ts +0 -36
  149. package/lib/promptBuilder/errors.ts +0 -12
  150. package/lib/promptBuilder/requestSchema.ts +0 -56
  151. package/lib/promptBuilder/responseParser.ts +0 -219
  152. package/lib/promptBuilder/systemPrompt.ts +0 -135
  153. package/lib/promptBuilder/transport.ts +0 -94
  154. package/lib/promptBuilder/types.ts +0 -109
  155. package/lib/promptImport/curatedSources.ts +0 -141
  156. package/lib/promptImport/discoveryRegistry.ts +0 -329
  157. package/lib/promptImport/errors.ts +0 -18
  158. package/lib/promptImport/githubDiscovery.ts +0 -309
  159. package/lib/promptImport/githubFolder.ts +0 -397
  160. package/lib/promptImport/githubSource.ts +0 -257
  161. package/lib/promptImport/gptImageHints.ts +0 -70
  162. package/lib/promptImport/parsePromptCandidates.ts +0 -179
  163. package/lib/promptImport/promptIndex.ts +0 -326
  164. package/lib/promptImport/rankPromptCandidates.ts +0 -65
  165. package/lib/promptImport/types.ts +0 -103
  166. package/lib/promptSafetyPolicy.ts +0 -5
  167. package/lib/providerOptions.ts +0 -56
  168. package/lib/referenceImageCompress.ts +0 -84
  169. package/lib/refs.ts +0 -133
  170. package/lib/requestLogger.ts +0 -49
  171. package/lib/responsesDoctor.ts +0 -456
  172. package/lib/responsesErrors.ts +0 -83
  173. package/lib/responsesFallback.ts +0 -114
  174. package/lib/responsesImageAdapter.ts +0 -466
  175. package/lib/responsesParse.ts +0 -452
  176. package/lib/responsesTools.ts +0 -28
  177. package/lib/runtimeContext.ts +0 -146
  178. package/lib/runtimePorts.ts +0 -105
  179. package/lib/sessionStore.ts +0 -308
  180. package/lib/storageMigration.ts +0 -310
  181. package/lib/styleSheet.ts +0 -139
  182. package/lib/systemTrash.ts +0 -20
  183. package/lib/videoContinuity.ts +0 -180
  184. package/lib/videoFrameExtract.ts +0 -78
  185. package/lib/videoSeriesChain.ts +0 -29
  186. package/lib/visibleTextLanguagePolicy.ts +0 -7
  187. package/routes/agent.ts +0 -308
  188. package/routes/annotations.ts +0 -118
  189. package/routes/canvasVersions.ts +0 -69
  190. package/routes/capabilities.ts +0 -18
  191. package/routes/cardNews.ts +0 -211
  192. package/routes/comfy.ts +0 -43
  193. package/routes/edit.ts +0 -352
  194. package/routes/generate.ts +0 -492
  195. package/routes/grok.ts +0 -24
  196. package/routes/health.ts +0 -123
  197. package/routes/history.ts +0 -221
  198. package/routes/imageImport.ts +0 -37
  199. package/routes/index.ts +0 -52
  200. package/routes/metadata.ts +0 -77
  201. package/routes/multimode.ts +0 -499
  202. package/routes/nodes.ts +0 -578
  203. package/routes/promptBuilder.ts +0 -37
  204. package/routes/promptImport.ts +0 -379
  205. package/routes/prompts.ts +0 -428
  206. package/routes/quota.ts +0 -89
  207. package/routes/sessions.ts +0 -317
  208. package/routes/storage.ts +0 -47
  209. package/routes/video.ts +0 -300
  210. package/routes/videoExtended.ts +0 -284
  211. package/server.ts +0 -293
  212. package/ui/dist/assets/SettingsWorkspace-PiaVnsdA.js +0 -1
  213. package/ui/dist/assets/index-CjgnNtgt.css +0 -1
  214. package/ui/dist/assets/index-Da2s4_-5.js +0 -36
@@ -1,604 +0,0 @@
1
- import { randomBytes } from "node:crypto";
2
- import { mkdir, readFile, unlink, writeFile } from "node:fs/promises";
3
- import { join } from "node:path";
4
- import { ulid } from "ulid";
5
- import { embedImageMetadataBestEffort } from "./imageMetadataStore.js";
6
- import { invalidateHistoryIndex } from "./historyIndex.js";
7
- import { logEvent } from "./logger.js";
8
- import { detectImageMimeFromB64 } from "./refs.js";
9
- import { resolveProviderOptions } from "./providerOptions.js";
10
- import { generateViaResponses } from "./responsesImageAdapter.js";
11
- import { generateViaGrok, type GrokReferenceImage } from "./grokImageAdapter.js";
12
- import { generateVideoViaGrok } from "./grokVideoAdapter.js";
13
- import { parseVideoParams } from "./agentGenerationPlanner.js";
14
- import {
15
- appendAgentTurn,
16
- buildImageContextManifest,
17
- getAgentImages,
18
- getAgentSession,
19
- importAgentImage,
20
- recordAgentWebFinding,
21
- restartAgentRuntimeSession,
22
- } from "./agentStore.js";
23
- import {
24
- AGENT_ALLOWED_TOOLS,
25
- type AgentGenerationPlan,
26
- type AgentToolCallSummary,
27
- type AgentToolName,
28
- } from "./agentTypes.js";
29
- import { errInfo } from "./errInfo.js";
30
- import { type RuntimeContext } from "./runtimeContext.js";
31
-
32
- type AgentRunOptions = {
33
- provider?: string;
34
- quality?: string;
35
- size?: string;
36
- format?: string;
37
- moderation?: string;
38
- model?: string;
39
- reasoningEffort?: string;
40
- requestId?: string;
41
- webSearchEnabled?: boolean;
42
- parallelism?: number;
43
- signal?: AbortSignal | null;
44
- };
45
-
46
- export function assertAgentAllowedTools(tools: readonly string[]) {
47
- const allowed = new Set<string>(AGENT_ALLOWED_TOOLS);
48
- const denied = tools.filter((tool) => !allowed.has(tool));
49
- if (denied.length > 0) {
50
- const err = new Error(`Agent tool is not allowed: ${denied.join(", ")}`) as Error & {
51
- code?: string;
52
- status?: number;
53
- deniedTools?: string[];
54
- };
55
- err.code = "AGENT_TOOL_NOT_ALLOWED";
56
- err.status = 403;
57
- err.deniedTools = denied;
58
- throw err;
59
- }
60
- }
61
-
62
- export function agentAllowedToolPayload() {
63
- return { tools: [...AGENT_ALLOWED_TOOLS] };
64
- }
65
-
66
- export async function runAgentTurn(ctx: RuntimeContext, sessionId: string, prompt: string, options: AgentRunOptions = {}) {
67
- return runAgentGenerationPlan(
68
- ctx,
69
- sessionId,
70
- prompt,
71
- {
72
- mode: "single",
73
- prompts: [prompt],
74
- requestedVariants: 1,
75
- plannedVariants: 1,
76
- plannedParallelism: cleanParallelism(options.parallelism),
77
- source: "auto-default",
78
- reason: "Direct turn endpoint defaults to one image.",
79
- command: null,
80
- assistantText: null,
81
- },
82
- options,
83
- { appendUserTurn: true },
84
- );
85
- }
86
-
87
- export async function runAgentGenerationPlan(
88
- ctx: RuntimeContext,
89
- sessionId: string,
90
- prompt: string,
91
- plan: AgentGenerationPlan,
92
- options: AgentRunOptions = {},
93
- behavior: { appendUserTurn?: boolean } = {},
94
- ) {
95
- const session = getAgentSession(sessionId);
96
- if (!session) throw notFound(sessionId);
97
- const webSearchEnabled = options.provider === "grok" ? true : options.webSearchEnabled ?? session.webSearchEnabled;
98
- const enabledTools: AgentToolName[] = webSearchEnabled
99
- ? [...AGENT_ALLOWED_TOOLS]
100
- : ["ima2.get_image_context", "ima2.generate_image", "ima2.generate_video"];
101
- assertAgentAllowedTools(enabledTools);
102
- if (behavior.appendUserTurn !== false) {
103
- appendAgentTurn({ sessionId, role: "user", text: prompt, status: "complete" });
104
- }
105
- if (plan.mode === "question") {
106
- const assistantTurn = appendAgentTurn({
107
- sessionId,
108
- role: "assistant",
109
- text: plan.assistantText || plan.reason || "What would you like to clarify before generating images?",
110
- imageIds: [],
111
- webFindingIds: [],
112
- status: "complete",
113
- });
114
- return { assistantTurn, imageIds: [], webFindingIds: [] };
115
- }
116
- if (plan.mode === "video") {
117
- return runAgentVideoGeneration(ctx, sessionId, prompt, {
118
- ...options,
119
- requestId: options.requestId ?? `agent_video_${ulid()}`,
120
- skipUserTurn: true,
121
- });
122
- }
123
- const manifest = buildImageContextManifest(sessionId);
124
- const contextStartedAt = Date.now();
125
- appendAgentTurn({
126
- sessionId,
127
- role: "tool",
128
- text: "ima2.get_image_context",
129
- status: "complete",
130
- raw: {
131
- toolCalls: [{
132
- id: `tc_context_${ulid()}`,
133
- name: "ima2.get_image_context",
134
- status: "complete",
135
- startedAt: contextStartedAt,
136
- finishedAt: Date.now(),
137
- durationMs: Date.now() - contextStartedAt,
138
- outputSummary: "Loaded current image context manifest.",
139
- } satisfies AgentToolCallSummary],
140
- },
141
- });
142
- const generationPrompts = plan.prompts.length > 0 ? plan.prompts : [prompt];
143
- const baseRequestId = options.requestId ?? `agent_${ulid()}`;
144
- const generationResults = await mapWithLimit(generationPrompts, cleanParallelism(plan.plannedParallelism ?? options.parallelism), async (generationPrompt, index) => {
145
- const requestId = generationPrompts.length > 1 ? `${baseRequestId}_${index + 1}` : baseRequestId;
146
- const startedAt = Date.now();
147
- const result = await runGeneratorWithRuntimeRecovery(ctx, sessionId, generationPrompt, manifest, webSearchEnabled, {
148
- ...options,
149
- requestId,
150
- });
151
- const findingIds = recordSearchFindings(sessionId, generationPrompt, result.webSearchCalls, result.provider ?? "oauth");
152
- const finishedAt = Date.now();
153
- return {
154
- prompt: generationPrompt,
155
- imageId: result.image.id,
156
- text: result.text,
157
- findingIds,
158
- toolCall: {
159
- id: `tc_generate_${ulid()}`,
160
- name: "ima2.generate_image",
161
- status: "complete",
162
- startedAt,
163
- finishedAt,
164
- durationMs: finishedAt - startedAt,
165
- requestId,
166
- inputSummary: generationPrompt,
167
- outputSummary: `Generated ${result.image.filename}. ${plan.reason}`,
168
- imageIds: [result.image.id],
169
- webFindingIds: findingIds,
170
- } satisfies AgentToolCallSummary,
171
- };
172
- });
173
- const imageIds = generationResults.map((result) => result.imageId);
174
- const responseTexts = generationResults
175
- .map((result) => result.text)
176
- .filter((text): text is string => typeof text === "string" && text.trim().length > 0);
177
- const findingIds = generationResults.flatMap((result) => result.findingIds);
178
- const webToolCall: AgentToolCallSummary | null = webSearchEnabled ? {
179
- id: `tc_web_${ulid()}`,
180
- name: "ima2.web_search",
181
- status: "complete",
182
- outputSummary: findingIds.length > 0
183
- ? `Recorded ${findingIds.length} web finding${findingIds.length === 1 ? "" : "s"}.`
184
- : "Web search enabled; no findings were reported.",
185
- webFindingIds: findingIds,
186
- } : null;
187
- appendAgentTurn({
188
- sessionId,
189
- role: "tool",
190
- text: webSearchEnabled ? "ima2.web_search + ima2.generate_image" : "ima2.generate_image",
191
- imageIds,
192
- webFindingIds: findingIds,
193
- status: "complete",
194
- raw: {
195
- toolCalls: [
196
- ...(webToolCall ? [webToolCall] : []),
197
- ...generationResults.map((result) => result.toolCall),
198
- ],
199
- },
200
- });
201
- const assistantTurn = appendAgentTurn({
202
- sessionId,
203
- role: "assistant",
204
- text: formatAgentAssistantText(plan, imageIds.length, responseTexts),
205
- imageIds,
206
- webFindingIds: findingIds,
207
- status: "complete",
208
- });
209
- return { assistantTurn, imageIds, webFindingIds: findingIds };
210
- }
211
-
212
- function formatAgentAssistantText(plan: AgentGenerationPlan, imageCount: number, responseTexts: readonly string[]): string {
213
- const countText = imageCount === 1 ? "Generated 1 image artifact." : `Generated ${imageCount} image artifacts.`;
214
- const modeText = plan.mode === "fanout"
215
- ? `Fanout used ${plan.plannedParallelism} concurrent tool call${plan.plannedParallelism === 1 ? "" : "s"}.`
216
- : "Single-image plan completed.";
217
- const modelText = responseTexts.length > 0 ? `${responseTexts.join("\n\n")}\n\n` : "";
218
- return `${modelText}${countText} ${modeText} ${plan.reason}`.trim();
219
- }
220
-
221
- async function runGeneratorWithRuntimeRecovery(
222
- ctx: RuntimeContext,
223
- sessionId: string,
224
- prompt: string,
225
- manifest: string,
226
- webSearchEnabled: boolean,
227
- options: AgentRunOptions,
228
- ) {
229
- try {
230
- return await generateAgentImageWithRetry(ctx, sessionId, prompt, manifest, webSearchEnabled, options);
231
- } catch (error) {
232
- const err = errInfo(error);
233
- if (isRuntimeRestartableError(error)) {
234
- restartAgentRuntimeSession(sessionId, err.code || err.message);
235
- }
236
- appendAgentTurn({ sessionId, role: "assistant", text: err.message, status: "error" });
237
- throw error;
238
- }
239
- }
240
-
241
- export function isRuntimeRestartableError(error: unknown) {
242
- const err = errInfo(error);
243
- const code = err.code || "";
244
- return (
245
- code.includes("AUTH") ||
246
- code.includes("TIMEOUT") ||
247
- code.includes("PROTOCOL") ||
248
- err.message.toLowerCase().includes("protocol wedge")
249
- );
250
- }
251
-
252
- async function generateAgentImageWithRetry(
253
- ctx: RuntimeContext,
254
- sessionId: string,
255
- prompt: string,
256
- manifest: string,
257
- webSearchEnabled: boolean,
258
- options: AgentRunOptions,
259
- ) {
260
- let lastError: unknown = null;
261
- for (let attempt = 0; attempt < 2; attempt++) {
262
- try {
263
- const forcedPrompt = attempt === 0 ? prompt : forceImagePrompt(prompt);
264
- const result = await generateAgentImage(ctx, sessionId, forcedPrompt, manifest, webSearchEnabled, options);
265
- if (result.image) return result;
266
- } catch (error) {
267
- lastError = error;
268
- if (!isTextOnlyResult(error)) throw error;
269
- if (attempt === 1) break;
270
- appendAgentTurn({
271
- sessionId,
272
- role: "tool",
273
- text: "ima2.generate_image retry: text-only result rejected",
274
- status: "error",
275
- });
276
- }
277
- }
278
- throw textOnlyError(lastError);
279
- }
280
-
281
- async function generateAgentImage(
282
- ctx: RuntimeContext,
283
- sessionId: string,
284
- prompt: string,
285
- manifest: string,
286
- webSearchEnabled: boolean,
287
- options: AgentRunOptions,
288
- ) {
289
- const requestId = options.requestId ?? `agent_${ulid()}`;
290
- const providerOptions = resolveProviderOptions(ctx, {
291
- provider: options.provider ?? "oauth",
292
- rawModel: options.model,
293
- rawReasoningEffort: options.reasoningEffort,
294
- rawSize: options.size ?? "1024x1024",
295
- rawWebSearchEnabled: webSearchEnabled,
296
- searchMode: webSearchEnabled ? "on" : "off",
297
- });
298
- if (providerOptions.error) {
299
- const err = new Error(providerOptions.error) as Error & { code?: string; status?: number };
300
- err.code = providerOptions.code;
301
- err.status = providerOptions.status;
302
- throw err;
303
- }
304
- const activeProvider = providerOptions.provider;
305
- const effectiveModel = activeProvider === "grok" && options.quality === "high"
306
- ? "grok-imagine-image-quality"
307
- : providerOptions.model;
308
- const response = activeProvider === "grok"
309
- ? await generateViaGrok(`${manifest}\n\nUser request:\n${prompt}`, ctx, {
310
- model: effectiveModel,
311
- size: providerOptions.size,
312
- requestId,
313
- signal: options.signal ?? undefined,
314
- references: await loadAgentCurrentImageReferences(ctx, sessionId),
315
- })
316
- : await generateViaResponses(
317
- activeProvider,
318
- `${manifest}\n\nUser request:\n${prompt}`,
319
- options.quality ?? "medium",
320
- providerOptions.size,
321
- options.moderation ?? "low",
322
- [],
323
- requestId,
324
- "auto",
325
- ctx,
326
- {
327
- model: providerOptions.model,
328
- reasoningEffort: providerOptions.reasoningEffort,
329
- webSearchEnabled,
330
- signal: options.signal,
331
- },
332
- );
333
- const format = activeProvider === "grok"
334
- ? imageFormatFromMime(("mime" in response ? response.mime : undefined) || detectImageMimeFromB64(response.b64) || "image/jpeg")
335
- : options.format ?? "png";
336
- const image = await persistAgentImage(ctx, sessionId, prompt, format, requestId, response, {
337
- provider: String(activeProvider),
338
- model: String(effectiveModel),
339
- });
340
- const responseText = "text" in response && typeof response.text === "string" ? response.text : null;
341
- return { image, webSearchCalls: response.webSearchCalls || 0, text: responseText, provider: activeProvider };
342
- }
343
-
344
- async function loadAgentCurrentImageReferences(ctx: RuntimeContext, sessionId: string): Promise<GrokReferenceImage[]> {
345
- const session = getAgentSession(sessionId);
346
- const currentImage = session?.lastImageId
347
- ? getAgentImages(sessionId).find((image) => image.id === session.lastImageId)
348
- : null;
349
- if (!currentImage?.filename) return [];
350
- try {
351
- const b64 = (await readFile(join(ctx.config.storage.generatedDir, currentImage.filename))).toString("base64");
352
- const mime = detectImageMimeFromB64(b64);
353
- return [{ b64, declaredMime: mime, detectedMime: mime }];
354
- } catch (error) {
355
- const err = errInfo(error);
356
- logEvent("agent", "grok_ref_missing", { sessionId, filename: currentImage.filename, code: err.code, message: err.message });
357
- return [];
358
- }
359
- }
360
-
361
- function imageFormatFromMime(mime: string | null | undefined): "png" | "jpeg" | "webp" {
362
- if (mime === "image/jpeg") return "jpeg";
363
- if (mime === "image/webp") return "webp";
364
- return "png";
365
- }
366
-
367
- async function persistAgentImage(
368
- ctx: RuntimeContext,
369
- sessionId: string,
370
- prompt: string,
371
- format: string,
372
- requestId: string,
373
- response: { b64: string; revisedPrompt?: string | null; usage?: unknown; webSearchCalls?: number },
374
- generation: { provider: string; model: string },
375
- ) {
376
- await mkdir(ctx.config.storage.generatedDir, { recursive: true });
377
- const rand = randomBytes(ctx.config.ids.generatedHexBytes).toString("hex");
378
- const filename = `${Date.now()}_${rand}_agent.${format}`;
379
- const meta = {
380
- kind: "agent",
381
- requestId,
382
- sessionId,
383
- prompt,
384
- userPrompt: prompt,
385
- revisedPrompt: response.revisedPrompt ?? null,
386
- provider: generation.provider,
387
- model: generation.model,
388
- createdAt: Date.now(),
389
- usage: response.usage ?? null,
390
- webSearchCalls: response.webSearchCalls ?? 0,
391
- };
392
- const embedded = await embedImageMetadataBestEffort(Buffer.from(response.b64, "base64"), format, meta, {
393
- version: ctx.packageVersion,
394
- });
395
- const filePath = join(ctx.config.storage.generatedDir, filename);
396
- await writeFile(filePath, embedded.buffer);
397
- try {
398
- await writeFile(`${filePath}.json`, JSON.stringify(meta));
399
- } catch (err) {
400
- await unlink(filePath).catch(() => {});
401
- throw err;
402
- }
403
- invalidateHistoryIndex();
404
- logEvent("agent", "saved", { requestId, sessionId, filename });
405
- return importAgentImage(sessionId, {
406
- id: `ai_${ulid()}`,
407
- filename,
408
- url: `/generated/${filename}`,
409
- prompt,
410
- revisedPrompt: response.revisedPrompt ?? null,
411
- createdAt: Date.now(),
412
- });
413
- }
414
-
415
- export async function runAgentVideoGeneration(
416
- ctx: RuntimeContext,
417
- sessionId: string,
418
- prompt: string,
419
- options: AgentRunOptions & { skipUserTurn?: boolean } = {},
420
- ) {
421
- const session = getAgentSession(sessionId);
422
- if (!session) throw notFound(sessionId);
423
- if (!options.skipUserTurn) {
424
- appendAgentTurn({ sessionId, role: "user", text: prompt, status: "complete" });
425
- }
426
- const requestId = options.requestId ?? `agent_video_${ulid()}`;
427
- const startedAt = Date.now();
428
-
429
- // Auto I2V: if session has a last image, use it as source
430
- let sourceImage: string | undefined;
431
- let mode: "text-to-video" | "image-to-video" = "text-to-video";
432
- if (session.lastImageId) {
433
- const images = getAgentImages(sessionId);
434
- const lastImage = images.find((img) => img.id === session.lastImageId);
435
- if (lastImage?.filename && !lastImage.filename.endsWith(".mp4")) {
436
- try {
437
- const { loadAssetB64 } = await import("./nodeStore.js");
438
- sourceImage = await loadAssetB64(ctx.rootDir, lastImage.filename, ctx.config.storage.generatedDir);
439
- mode = "image-to-video";
440
- } catch { /* fallback to T2V */ }
441
- }
442
- }
443
-
444
- const videoParams = parseVideoParams(prompt);
445
-
446
- const result = await generateVideoViaGrok(prompt, ctx, {
447
- model: "grok-imagine-video",
448
- mode,
449
- sourceImage,
450
- duration: videoParams.duration ?? 5,
451
- resolution: videoParams.resolution ?? "480p",
452
- aspectRatio: (videoParams.aspectRatio ?? "auto") as "auto" | "1:1" | "16:9" | "9:16" | "4:3" | "3:4" | "3:2" | "2:3",
453
- requestId,
454
- signal: options.signal ?? undefined,
455
- });
456
- const video = await persistAgentVideo(ctx, sessionId, prompt, requestId, result);
457
- const finishedAt = Date.now();
458
- const toolCall: AgentToolCallSummary = {
459
- id: `tc_video_${ulid()}`,
460
- name: "ima2.generate_video",
461
- status: "complete",
462
- startedAt,
463
- finishedAt,
464
- durationMs: finishedAt - startedAt,
465
- requestId,
466
- inputSummary: prompt,
467
- outputSummary: `Generated video ${video.filename}.`,
468
- imageIds: [video.id],
469
- };
470
- appendAgentTurn({
471
- sessionId,
472
- role: "tool",
473
- text: "ima2.generate_video",
474
- imageIds: [video.id],
475
- status: "complete",
476
- raw: { toolCalls: [toolCall] },
477
- });
478
- const assistantTurn = appendAgentTurn({
479
- sessionId,
480
- role: "assistant",
481
- text: `Generated 1 video artifact. ${result.revisedPrompt}`,
482
- imageIds: [video.id],
483
- status: "complete",
484
- });
485
- return { assistantTurn, imageIds: [video.id], webFindingIds: [] };
486
- }
487
-
488
- async function persistAgentVideo(
489
- ctx: RuntimeContext,
490
- sessionId: string,
491
- prompt: string,
492
- requestId: string,
493
- result: { videoBuffer: Buffer; revisedPrompt: string; usage: Record<string, number> | null; webSearchCalls: number },
494
- ) {
495
- await mkdir(ctx.config.storage.generatedDir, { recursive: true });
496
- const rand = randomBytes(ctx.config.ids.generatedHexBytes).toString("hex");
497
- const filename = `${Date.now()}_${rand}_agent.mp4`;
498
- const meta = {
499
- kind: "agent",
500
- mediaType: "video",
501
- requestId,
502
- sessionId,
503
- prompt,
504
- userPrompt: prompt,
505
- revisedPrompt: result.revisedPrompt,
506
- provider: "grok",
507
- model: "grok-imagine-video",
508
- createdAt: Date.now(),
509
- usage: result.usage,
510
- webSearchCalls: result.webSearchCalls,
511
- };
512
- const filePath = join(ctx.config.storage.generatedDir, filename);
513
- await writeFile(filePath, result.videoBuffer);
514
- try {
515
- await writeFile(`${filePath}.json`, JSON.stringify(meta));
516
- } catch (err) {
517
- await unlink(filePath).catch(() => {});
518
- throw err;
519
- }
520
- invalidateHistoryIndex();
521
- logEvent("agent", "video_saved", { requestId, sessionId, filename });
522
- return importAgentImage(sessionId, {
523
- id: `ai_${ulid()}`,
524
- filename,
525
- url: `/generated/${filename}`,
526
- prompt,
527
- revisedPrompt: result.revisedPrompt,
528
- createdAt: Date.now(),
529
- });
530
- }
531
-
532
- function recordSearchFindings(sessionId: string, prompt: string, count: number, provider: string) {
533
- if (!count) return [];
534
- const isGrok = provider === "grok";
535
- return [
536
- recordAgentWebFinding({
537
- sessionId,
538
- query: prompt,
539
- title: isGrok ? "Grok visual research" : "Responses web_search",
540
- snippet: `${isGrok ? "Grok" : "Responses"} reported ${count} web search call${count === 1 ? "" : "s"}.`,
541
- }),
542
- ];
543
- }
544
-
545
- function forceImagePrompt(prompt: string) {
546
- return [
547
- "The previous turn did not return an image artifact.",
548
- "Return a final image using ima2.generate_image/image_generation now.",
549
- `User request: ${prompt}`,
550
- ].join("\n");
551
- }
552
-
553
- function isTextOnlyResult(error: unknown) {
554
- const err = errInfo(error);
555
- return [
556
- "EMPTY_RESPONSE",
557
- "IMAGE_TOOL_NOT_CALLED",
558
- "WEB_SEARCH_ONLY_RESPONSE",
559
- "IMAGE_TOOL_COMPLETED_WITHOUT_RESULT",
560
- ].includes(err.code || "") || err.message.includes("No image data");
561
- }
562
-
563
- function textOnlyError(cause: unknown) {
564
- const err = new Error("Agent result did not include an image artifact.") as Error & {
565
- code?: string;
566
- status?: number;
567
- cause?: unknown;
568
- };
569
- err.code = "AGENT_TEXT_ONLY_RESULT";
570
- err.status = 422;
571
- err.cause = cause;
572
- return err;
573
- }
574
-
575
- async function mapWithLimit<T, R>(
576
- items: readonly T[],
577
- limit: number,
578
- mapper: (item: T, index: number) => Promise<R>,
579
- ): Promise<R[]> {
580
- const results: R[] = [];
581
- let nextIndex = 0;
582
- const workerCount = Math.min(limit, items.length);
583
- await Promise.all(Array.from({ length: workerCount }, async () => {
584
- while (nextIndex < items.length) {
585
- const currentIndex = nextIndex;
586
- nextIndex += 1;
587
- results[currentIndex] = await mapper(items[currentIndex], currentIndex);
588
- }
589
- }));
590
- return results;
591
- }
592
-
593
- function cleanParallelism(value: unknown) {
594
- const numeric = typeof value === "number" ? value : Number(value);
595
- if (!Number.isFinite(numeric)) return 2;
596
- return Math.max(1, Math.min(8, Math.round(numeric)));
597
- }
598
-
599
- function notFound(sessionId: string) {
600
- const err = new Error(`Agent session not found: ${sessionId}`) as Error & { code?: string; status?: number };
601
- err.code = "AGENT_SESSION_NOT_FOUND";
602
- err.status = 404;
603
- return err;
604
- }
@@ -1,72 +0,0 @@
1
- import type { AgentGenerationSettings } from "./agentTypes.js";
2
-
3
- const PROVIDERS = new Set(["oauth", "api", "grok"]);
4
- const QUALITIES = new Set(["low", "medium", "high"]);
5
- const FORMATS = new Set(["png", "jpeg", "webp"]);
6
- const MODERATIONS = new Set(["auto", "low"]);
7
- const REASONING_EFFORTS = new Set(["low", "medium", "high", "xhigh"]);
8
- const GENERATION_STRATEGIES = new Set(["auto", "manual"]);
9
-
10
- export const DEFAULT_AGENT_GENERATION_SETTINGS: AgentGenerationSettings = {
11
- provider: "oauth",
12
- model: "gpt-5.4-mini",
13
- quality: "medium",
14
- size: "1024x1024",
15
- format: "png",
16
- moderation: "low",
17
- reasoningEffort: "medium",
18
- webSearchEnabled: true,
19
- generationStrategy: "auto",
20
- variants: 1,
21
- maxAutoVariants: 8,
22
- parallelism: 2,
23
- };
24
-
25
- export function normalizeAgentGenerationSettings(
26
- value: unknown,
27
- fallback: AgentGenerationSettings = DEFAULT_AGENT_GENERATION_SETTINGS,
28
- ): AgentGenerationSettings {
29
- const input = value && typeof value === "object" ? value as Record<string, unknown> : {};
30
- return {
31
- provider: cleanEnum(input.provider, PROVIDERS, fallback.provider),
32
- model: cleanString(input.model, fallback.model),
33
- quality: cleanEnum(input.quality, QUALITIES, fallback.quality),
34
- size: cleanSize(input.size, fallback.size),
35
- format: cleanEnum(input.format, FORMATS, fallback.format),
36
- moderation: cleanEnum(input.moderation, MODERATIONS, fallback.moderation),
37
- reasoningEffort: cleanEnum(input.reasoningEffort, REASONING_EFFORTS, fallback.reasoningEffort),
38
- webSearchEnabled: typeof input.webSearchEnabled === "boolean" ? input.webSearchEnabled : fallback.webSearchEnabled,
39
- generationStrategy: cleanEnum(input.generationStrategy, GENERATION_STRATEGIES, fallback.generationStrategy),
40
- variants: cleanPositiveInt(input.variants, fallback.variants, 1, 8),
41
- maxAutoVariants: cleanPositiveInt(input.maxAutoVariants, fallback.maxAutoVariants, 1, 8),
42
- parallelism: cleanPositiveInt(input.parallelism, fallback.parallelism, 1, 8),
43
- };
44
- }
45
-
46
- export function mergeAgentGenerationSettings(
47
- current: AgentGenerationSettings,
48
- patch: unknown,
49
- ): AgentGenerationSettings {
50
- return normalizeAgentGenerationSettings({ ...current, ...(patch && typeof patch === "object" ? patch : {}) }, current);
51
- }
52
-
53
- function cleanEnum<T extends string>(value: unknown, allowed: Set<string>, fallback: T): T {
54
- return typeof value === "string" && allowed.has(value) ? value as T : fallback;
55
- }
56
-
57
- function cleanString(value: unknown, fallback: string): string {
58
- return typeof value === "string" && value.trim() ? value.trim().slice(0, 120) : fallback;
59
- }
60
-
61
- function cleanSize(value: unknown, fallback: string): string {
62
- if (typeof value !== "string") return fallback;
63
- const trimmed = value.trim();
64
- if (trimmed === "auto") return trimmed;
65
- return /^\d{3,4}x\d{3,4}$/.test(trimmed) ? trimmed : fallback;
66
- }
67
-
68
- function cleanPositiveInt(value: unknown, fallback: number, min: number, max: number): number {
69
- const numeric = typeof value === "number" ? value : Number(value);
70
- if (!Number.isFinite(numeric)) return fallback;
71
- return Math.max(min, Math.min(max, Math.round(numeric)));
72
- }