ima2-gen 1.1.21 → 1.1.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (208) hide show
  1. package/README.md +30 -4
  2. package/bin/ima2.js +14 -4
  3. package/bin/lib/platform.js +34 -5
  4. package/docs/README.ko.md +31 -0
  5. package/lib/agentQueueWorker.js +6 -0
  6. package/lib/agentRuntime.js +3 -2
  7. package/lib/atomicWrite.js +14 -0
  8. package/lib/grokProxyLauncher.js +5 -3
  9. package/lib/inflight.js +1 -1
  10. package/lib/oauthLauncher.js +5 -0
  11. package/lib/videoFrameExtract.js +3 -3
  12. package/package.json +5 -7
  13. package/routes/edit.js +2 -1
  14. package/routes/generate.js +4 -3
  15. package/routes/health.js +4 -3
  16. package/routes/multimode.js +2 -1
  17. package/routes/video.js +4 -2
  18. package/server.js +29 -2
  19. package/ui/dist/.vite/manifest.json +12 -12
  20. package/ui/dist/assets/{AgentWorkspace-B_hq9CLg.js → AgentWorkspace-COxQ5TjU.js} +1 -1
  21. package/ui/dist/assets/{CardNewsWorkspace-wD12J7qk.js → CardNewsWorkspace-B0OkcuVz.js} +1 -1
  22. package/ui/dist/assets/{NodeCanvas-CI_wuPMf.js → NodeCanvas-BSsclEBh.js} +1 -1
  23. package/ui/dist/assets/{PromptBuilderPanel-CUTujJUV.js → PromptBuilderPanel-DpC9A5Rz.js} +1 -1
  24. package/ui/dist/assets/{PromptImportDialog-CUi66jPK.js → PromptImportDialog-CVwT0rLd.js} +2 -2
  25. package/ui/dist/assets/{PromptImportDiscoverySection-Cm3vrjY4.js → PromptImportDiscoverySection-BDCkRCRs.js} +1 -1
  26. package/ui/dist/assets/{PromptImportFolderSection-DOtWTD9n.js → PromptImportFolderSection-QoKbZD83.js} +1 -1
  27. package/ui/dist/assets/{PromptLibraryPanel-BMjQegRa.js → PromptLibraryPanel-BhFgeKnY.js} +2 -2
  28. package/ui/dist/assets/SettingsWorkspace-CfjrlH5R.js +1 -0
  29. package/ui/dist/assets/index-C-mur7pa.css +1 -0
  30. package/ui/dist/assets/index-CCP5nUOj.js +42 -0
  31. package/ui/dist/assets/{index-31uVIdt4.js → index-Cxhzi3bs.js} +1 -1
  32. package/ui/dist/index.html +2 -2
  33. package/bin/commands/annotate.ts +0 -119
  34. package/bin/commands/cancel.ts +0 -48
  35. package/bin/commands/canvas-versions.ts +0 -80
  36. package/bin/commands/capabilities.ts +0 -110
  37. package/bin/commands/cardnews.ts +0 -249
  38. package/bin/commands/comfy.ts +0 -54
  39. package/bin/commands/config.ts +0 -186
  40. package/bin/commands/defaults.ts +0 -192
  41. package/bin/commands/doctor.ts +0 -202
  42. package/bin/commands/edit.ts +0 -150
  43. package/bin/commands/gen.ts +0 -214
  44. package/bin/commands/grok.ts +0 -90
  45. package/bin/commands/history.ts +0 -146
  46. package/bin/commands/ls.ts +0 -64
  47. package/bin/commands/metadata.ts +0 -39
  48. package/bin/commands/multimode.ts +0 -196
  49. package/bin/commands/node.ts +0 -166
  50. package/bin/commands/observability.ts +0 -176
  51. package/bin/commands/ping.ts +0 -31
  52. package/bin/commands/prompt-sub/build.ts +0 -101
  53. package/bin/commands/prompt.ts +0 -492
  54. package/bin/commands/ps.ts +0 -81
  55. package/bin/commands/session.ts +0 -266
  56. package/bin/commands/show.ts +0 -72
  57. package/bin/commands/skill.ts +0 -70
  58. package/bin/commands/video.ts +0 -442
  59. package/bin/ima2.ts +0 -430
  60. package/bin/lib/args.ts +0 -92
  61. package/bin/lib/browser-id.ts +0 -16
  62. package/bin/lib/client.ts +0 -122
  63. package/bin/lib/config-store.ts +0 -120
  64. package/bin/lib/destructive-confirm.ts +0 -19
  65. package/bin/lib/doctor-checks.ts +0 -91
  66. package/bin/lib/error-hints.ts +0 -23
  67. package/bin/lib/files.ts +0 -39
  68. package/bin/lib/output.ts +0 -73
  69. package/bin/lib/platform.ts +0 -99
  70. package/bin/lib/recover-output.ts +0 -139
  71. package/bin/lib/sse.ts +0 -73
  72. package/bin/lib/star-prompt.ts +0 -97
  73. package/bin/lib/storage-doctor.ts +0 -39
  74. package/bin/lib/ui-build.ts +0 -85
  75. package/config.ts +0 -354
  76. package/lib/agentCommandParser.ts +0 -69
  77. package/lib/agentGenerationPlanner.ts +0 -273
  78. package/lib/agentQuestionResponder.ts +0 -266
  79. package/lib/agentQueueStore.ts +0 -270
  80. package/lib/agentQueueWorker.ts +0 -89
  81. package/lib/agentRuntime.ts +0 -604
  82. package/lib/agentSettings.ts +0 -72
  83. package/lib/agentStore.ts +0 -422
  84. package/lib/agentStoreRows.ts +0 -136
  85. package/lib/agentTypes.ts +0 -154
  86. package/lib/apiCachePolicy.ts +0 -11
  87. package/lib/assetLifecycle.ts +0 -146
  88. package/lib/canvasVersionStore.ts +0 -223
  89. package/lib/capabilities.ts +0 -126
  90. package/lib/cardNewsGenerator.ts +0 -271
  91. package/lib/cardNewsJobStore.ts +0 -142
  92. package/lib/cardNewsManifestStore.ts +0 -154
  93. package/lib/cardNewsPlanner.ts +0 -236
  94. package/lib/cardNewsPlannerClient.ts +0 -155
  95. package/lib/cardNewsPlannerPrompt.ts +0 -62
  96. package/lib/cardNewsPlannerSchema.ts +0 -321
  97. package/lib/cardNewsRoleTemplateStore.ts +0 -47
  98. package/lib/cardNewsTemplateStore.ts +0 -252
  99. package/lib/codexDetect.ts +0 -71
  100. package/lib/comfyBridge.ts +0 -235
  101. package/lib/composerSnapshot.ts +0 -33
  102. package/lib/configKeys.ts +0 -62
  103. package/lib/db.ts +0 -295
  104. package/lib/errInfo.ts +0 -43
  105. package/lib/errorClassify.ts +0 -100
  106. package/lib/generationCancel.ts +0 -28
  107. package/lib/generationErrors.ts +0 -238
  108. package/lib/grokImageAdapter.ts +0 -513
  109. package/lib/grokMultimodeAdapter.ts +0 -84
  110. package/lib/grokProxyLauncher.ts +0 -153
  111. package/lib/grokRuntime.ts +0 -23
  112. package/lib/grokSizeMapper.ts +0 -71
  113. package/lib/grokVideoAdapter.ts +0 -458
  114. package/lib/grokVideoCanvas.ts +0 -26
  115. package/lib/grokVideoDownload.ts +0 -59
  116. package/lib/grokVideoPlannerPrompt.ts +0 -67
  117. package/lib/historyIndex.ts +0 -51
  118. package/lib/historyList.ts +0 -181
  119. package/lib/imageMetadata.ts +0 -113
  120. package/lib/imageMetadataStore.ts +0 -67
  121. package/lib/imageModels.ts +0 -165
  122. package/lib/inflight.ts +0 -281
  123. package/lib/localImportStore.ts +0 -114
  124. package/lib/logger.ts +0 -161
  125. package/lib/nodeStore.ts +0 -91
  126. package/lib/oauthLauncher.ts +0 -94
  127. package/lib/oauthNormalize.ts +0 -30
  128. package/lib/oauthProxy/errors.ts +0 -128
  129. package/lib/oauthProxy/generators.ts +0 -494
  130. package/lib/oauthProxy/index.ts +0 -28
  131. package/lib/oauthProxy/prompts.ts +0 -123
  132. package/lib/oauthProxy/references.ts +0 -45
  133. package/lib/oauthProxy/runtime.ts +0 -115
  134. package/lib/oauthProxy/streams.ts +0 -232
  135. package/lib/oauthProxy/types.ts +0 -9
  136. package/lib/oauthProxy.ts +0 -3
  137. package/lib/openDirectory.ts +0 -47
  138. package/lib/pngInfo.ts +0 -26
  139. package/lib/promptBuilder/attachments.ts +0 -74
  140. package/lib/promptBuilder/client.ts +0 -130
  141. package/lib/promptBuilder/constants.ts +0 -9
  142. package/lib/promptBuilder/context.ts +0 -36
  143. package/lib/promptBuilder/errors.ts +0 -12
  144. package/lib/promptBuilder/requestSchema.ts +0 -56
  145. package/lib/promptBuilder/responseParser.ts +0 -219
  146. package/lib/promptBuilder/systemPrompt.ts +0 -135
  147. package/lib/promptBuilder/transport.ts +0 -94
  148. package/lib/promptBuilder/types.ts +0 -109
  149. package/lib/promptImport/curatedSources.ts +0 -141
  150. package/lib/promptImport/discoveryRegistry.ts +0 -329
  151. package/lib/promptImport/errors.ts +0 -18
  152. package/lib/promptImport/githubDiscovery.ts +0 -309
  153. package/lib/promptImport/githubFolder.ts +0 -397
  154. package/lib/promptImport/githubSource.ts +0 -257
  155. package/lib/promptImport/gptImageHints.ts +0 -70
  156. package/lib/promptImport/parsePromptCandidates.ts +0 -179
  157. package/lib/promptImport/promptIndex.ts +0 -326
  158. package/lib/promptImport/rankPromptCandidates.ts +0 -65
  159. package/lib/promptImport/types.ts +0 -103
  160. package/lib/promptSafetyPolicy.ts +0 -5
  161. package/lib/providerOptions.ts +0 -56
  162. package/lib/referenceImageCompress.ts +0 -84
  163. package/lib/refs.ts +0 -133
  164. package/lib/requestLogger.ts +0 -49
  165. package/lib/responsesDoctor.ts +0 -456
  166. package/lib/responsesErrors.ts +0 -83
  167. package/lib/responsesFallback.ts +0 -114
  168. package/lib/responsesImageAdapter.ts +0 -466
  169. package/lib/responsesParse.ts +0 -452
  170. package/lib/responsesTools.ts +0 -28
  171. package/lib/runtimeContext.ts +0 -146
  172. package/lib/runtimePorts.ts +0 -105
  173. package/lib/sessionStore.ts +0 -308
  174. package/lib/storageMigration.ts +0 -310
  175. package/lib/styleSheet.ts +0 -139
  176. package/lib/systemTrash.ts +0 -20
  177. package/lib/videoContinuity.ts +0 -180
  178. package/lib/videoFrameExtract.ts +0 -78
  179. package/lib/videoSeriesChain.ts +0 -29
  180. package/lib/visibleTextLanguagePolicy.ts +0 -7
  181. package/routes/agent.ts +0 -308
  182. package/routes/annotations.ts +0 -118
  183. package/routes/canvasVersions.ts +0 -69
  184. package/routes/capabilities.ts +0 -18
  185. package/routes/cardNews.ts +0 -211
  186. package/routes/comfy.ts +0 -43
  187. package/routes/edit.ts +0 -352
  188. package/routes/generate.ts +0 -492
  189. package/routes/grok.ts +0 -24
  190. package/routes/health.ts +0 -123
  191. package/routes/history.ts +0 -221
  192. package/routes/imageImport.ts +0 -37
  193. package/routes/index.ts +0 -52
  194. package/routes/metadata.ts +0 -77
  195. package/routes/multimode.ts +0 -499
  196. package/routes/nodes.ts +0 -578
  197. package/routes/promptBuilder.ts +0 -37
  198. package/routes/promptImport.ts +0 -379
  199. package/routes/prompts.ts +0 -428
  200. package/routes/quota.ts +0 -89
  201. package/routes/sessions.ts +0 -317
  202. package/routes/storage.ts +0 -47
  203. package/routes/video.ts +0 -300
  204. package/routes/videoExtended.ts +0 -284
  205. package/server.ts +0 -293
  206. package/ui/dist/assets/SettingsWorkspace-PiaVnsdA.js +0 -1
  207. package/ui/dist/assets/index-CjgnNtgt.css +0 -1
  208. package/ui/dist/assets/index-Da2s4_-5.js +0 -36
@@ -1,604 +0,0 @@
1
- import { randomBytes } from "node:crypto";
2
- import { mkdir, readFile, unlink, writeFile } from "node:fs/promises";
3
- import { join } from "node:path";
4
- import { ulid } from "ulid";
5
- import { embedImageMetadataBestEffort } from "./imageMetadataStore.js";
6
- import { invalidateHistoryIndex } from "./historyIndex.js";
7
- import { logEvent } from "./logger.js";
8
- import { detectImageMimeFromB64 } from "./refs.js";
9
- import { resolveProviderOptions } from "./providerOptions.js";
10
- import { generateViaResponses } from "./responsesImageAdapter.js";
11
- import { generateViaGrok, type GrokReferenceImage } from "./grokImageAdapter.js";
12
- import { generateVideoViaGrok } from "./grokVideoAdapter.js";
13
- import { parseVideoParams } from "./agentGenerationPlanner.js";
14
- import {
15
- appendAgentTurn,
16
- buildImageContextManifest,
17
- getAgentImages,
18
- getAgentSession,
19
- importAgentImage,
20
- recordAgentWebFinding,
21
- restartAgentRuntimeSession,
22
- } from "./agentStore.js";
23
- import {
24
- AGENT_ALLOWED_TOOLS,
25
- type AgentGenerationPlan,
26
- type AgentToolCallSummary,
27
- type AgentToolName,
28
- } from "./agentTypes.js";
29
- import { errInfo } from "./errInfo.js";
30
- import { type RuntimeContext } from "./runtimeContext.js";
31
-
32
- type AgentRunOptions = {
33
- provider?: string;
34
- quality?: string;
35
- size?: string;
36
- format?: string;
37
- moderation?: string;
38
- model?: string;
39
- reasoningEffort?: string;
40
- requestId?: string;
41
- webSearchEnabled?: boolean;
42
- parallelism?: number;
43
- signal?: AbortSignal | null;
44
- };
45
-
46
- export function assertAgentAllowedTools(tools: readonly string[]) {
47
- const allowed = new Set<string>(AGENT_ALLOWED_TOOLS);
48
- const denied = tools.filter((tool) => !allowed.has(tool));
49
- if (denied.length > 0) {
50
- const err = new Error(`Agent tool is not allowed: ${denied.join(", ")}`) as Error & {
51
- code?: string;
52
- status?: number;
53
- deniedTools?: string[];
54
- };
55
- err.code = "AGENT_TOOL_NOT_ALLOWED";
56
- err.status = 403;
57
- err.deniedTools = denied;
58
- throw err;
59
- }
60
- }
61
-
62
- export function agentAllowedToolPayload() {
63
- return { tools: [...AGENT_ALLOWED_TOOLS] };
64
- }
65
-
66
- export async function runAgentTurn(ctx: RuntimeContext, sessionId: string, prompt: string, options: AgentRunOptions = {}) {
67
- return runAgentGenerationPlan(
68
- ctx,
69
- sessionId,
70
- prompt,
71
- {
72
- mode: "single",
73
- prompts: [prompt],
74
- requestedVariants: 1,
75
- plannedVariants: 1,
76
- plannedParallelism: cleanParallelism(options.parallelism),
77
- source: "auto-default",
78
- reason: "Direct turn endpoint defaults to one image.",
79
- command: null,
80
- assistantText: null,
81
- },
82
- options,
83
- { appendUserTurn: true },
84
- );
85
- }
86
-
87
- export async function runAgentGenerationPlan(
88
- ctx: RuntimeContext,
89
- sessionId: string,
90
- prompt: string,
91
- plan: AgentGenerationPlan,
92
- options: AgentRunOptions = {},
93
- behavior: { appendUserTurn?: boolean } = {},
94
- ) {
95
- const session = getAgentSession(sessionId);
96
- if (!session) throw notFound(sessionId);
97
- const webSearchEnabled = options.provider === "grok" ? true : options.webSearchEnabled ?? session.webSearchEnabled;
98
- const enabledTools: AgentToolName[] = webSearchEnabled
99
- ? [...AGENT_ALLOWED_TOOLS]
100
- : ["ima2.get_image_context", "ima2.generate_image", "ima2.generate_video"];
101
- assertAgentAllowedTools(enabledTools);
102
- if (behavior.appendUserTurn !== false) {
103
- appendAgentTurn({ sessionId, role: "user", text: prompt, status: "complete" });
104
- }
105
- if (plan.mode === "question") {
106
- const assistantTurn = appendAgentTurn({
107
- sessionId,
108
- role: "assistant",
109
- text: plan.assistantText || plan.reason || "What would you like to clarify before generating images?",
110
- imageIds: [],
111
- webFindingIds: [],
112
- status: "complete",
113
- });
114
- return { assistantTurn, imageIds: [], webFindingIds: [] };
115
- }
116
- if (plan.mode === "video") {
117
- return runAgentVideoGeneration(ctx, sessionId, prompt, {
118
- ...options,
119
- requestId: options.requestId ?? `agent_video_${ulid()}`,
120
- skipUserTurn: true,
121
- });
122
- }
123
- const manifest = buildImageContextManifest(sessionId);
124
- const contextStartedAt = Date.now();
125
- appendAgentTurn({
126
- sessionId,
127
- role: "tool",
128
- text: "ima2.get_image_context",
129
- status: "complete",
130
- raw: {
131
- toolCalls: [{
132
- id: `tc_context_${ulid()}`,
133
- name: "ima2.get_image_context",
134
- status: "complete",
135
- startedAt: contextStartedAt,
136
- finishedAt: Date.now(),
137
- durationMs: Date.now() - contextStartedAt,
138
- outputSummary: "Loaded current image context manifest.",
139
- } satisfies AgentToolCallSummary],
140
- },
141
- });
142
- const generationPrompts = plan.prompts.length > 0 ? plan.prompts : [prompt];
143
- const baseRequestId = options.requestId ?? `agent_${ulid()}`;
144
- const generationResults = await mapWithLimit(generationPrompts, cleanParallelism(plan.plannedParallelism ?? options.parallelism), async (generationPrompt, index) => {
145
- const requestId = generationPrompts.length > 1 ? `${baseRequestId}_${index + 1}` : baseRequestId;
146
- const startedAt = Date.now();
147
- const result = await runGeneratorWithRuntimeRecovery(ctx, sessionId, generationPrompt, manifest, webSearchEnabled, {
148
- ...options,
149
- requestId,
150
- });
151
- const findingIds = recordSearchFindings(sessionId, generationPrompt, result.webSearchCalls, result.provider ?? "oauth");
152
- const finishedAt = Date.now();
153
- return {
154
- prompt: generationPrompt,
155
- imageId: result.image.id,
156
- text: result.text,
157
- findingIds,
158
- toolCall: {
159
- id: `tc_generate_${ulid()}`,
160
- name: "ima2.generate_image",
161
- status: "complete",
162
- startedAt,
163
- finishedAt,
164
- durationMs: finishedAt - startedAt,
165
- requestId,
166
- inputSummary: generationPrompt,
167
- outputSummary: `Generated ${result.image.filename}. ${plan.reason}`,
168
- imageIds: [result.image.id],
169
- webFindingIds: findingIds,
170
- } satisfies AgentToolCallSummary,
171
- };
172
- });
173
- const imageIds = generationResults.map((result) => result.imageId);
174
- const responseTexts = generationResults
175
- .map((result) => result.text)
176
- .filter((text): text is string => typeof text === "string" && text.trim().length > 0);
177
- const findingIds = generationResults.flatMap((result) => result.findingIds);
178
- const webToolCall: AgentToolCallSummary | null = webSearchEnabled ? {
179
- id: `tc_web_${ulid()}`,
180
- name: "ima2.web_search",
181
- status: "complete",
182
- outputSummary: findingIds.length > 0
183
- ? `Recorded ${findingIds.length} web finding${findingIds.length === 1 ? "" : "s"}.`
184
- : "Web search enabled; no findings were reported.",
185
- webFindingIds: findingIds,
186
- } : null;
187
- appendAgentTurn({
188
- sessionId,
189
- role: "tool",
190
- text: webSearchEnabled ? "ima2.web_search + ima2.generate_image" : "ima2.generate_image",
191
- imageIds,
192
- webFindingIds: findingIds,
193
- status: "complete",
194
- raw: {
195
- toolCalls: [
196
- ...(webToolCall ? [webToolCall] : []),
197
- ...generationResults.map((result) => result.toolCall),
198
- ],
199
- },
200
- });
201
- const assistantTurn = appendAgentTurn({
202
- sessionId,
203
- role: "assistant",
204
- text: formatAgentAssistantText(plan, imageIds.length, responseTexts),
205
- imageIds,
206
- webFindingIds: findingIds,
207
- status: "complete",
208
- });
209
- return { assistantTurn, imageIds, webFindingIds: findingIds };
210
- }
211
-
212
- function formatAgentAssistantText(plan: AgentGenerationPlan, imageCount: number, responseTexts: readonly string[]): string {
213
- const countText = imageCount === 1 ? "Generated 1 image artifact." : `Generated ${imageCount} image artifacts.`;
214
- const modeText = plan.mode === "fanout"
215
- ? `Fanout used ${plan.plannedParallelism} concurrent tool call${plan.plannedParallelism === 1 ? "" : "s"}.`
216
- : "Single-image plan completed.";
217
- const modelText = responseTexts.length > 0 ? `${responseTexts.join("\n\n")}\n\n` : "";
218
- return `${modelText}${countText} ${modeText} ${plan.reason}`.trim();
219
- }
220
-
221
- async function runGeneratorWithRuntimeRecovery(
222
- ctx: RuntimeContext,
223
- sessionId: string,
224
- prompt: string,
225
- manifest: string,
226
- webSearchEnabled: boolean,
227
- options: AgentRunOptions,
228
- ) {
229
- try {
230
- return await generateAgentImageWithRetry(ctx, sessionId, prompt, manifest, webSearchEnabled, options);
231
- } catch (error) {
232
- const err = errInfo(error);
233
- if (isRuntimeRestartableError(error)) {
234
- restartAgentRuntimeSession(sessionId, err.code || err.message);
235
- }
236
- appendAgentTurn({ sessionId, role: "assistant", text: err.message, status: "error" });
237
- throw error;
238
- }
239
- }
240
-
241
- export function isRuntimeRestartableError(error: unknown) {
242
- const err = errInfo(error);
243
- const code = err.code || "";
244
- return (
245
- code.includes("AUTH") ||
246
- code.includes("TIMEOUT") ||
247
- code.includes("PROTOCOL") ||
248
- err.message.toLowerCase().includes("protocol wedge")
249
- );
250
- }
251
-
252
- async function generateAgentImageWithRetry(
253
- ctx: RuntimeContext,
254
- sessionId: string,
255
- prompt: string,
256
- manifest: string,
257
- webSearchEnabled: boolean,
258
- options: AgentRunOptions,
259
- ) {
260
- let lastError: unknown = null;
261
- for (let attempt = 0; attempt < 2; attempt++) {
262
- try {
263
- const forcedPrompt = attempt === 0 ? prompt : forceImagePrompt(prompt);
264
- const result = await generateAgentImage(ctx, sessionId, forcedPrompt, manifest, webSearchEnabled, options);
265
- if (result.image) return result;
266
- } catch (error) {
267
- lastError = error;
268
- if (!isTextOnlyResult(error)) throw error;
269
- if (attempt === 1) break;
270
- appendAgentTurn({
271
- sessionId,
272
- role: "tool",
273
- text: "ima2.generate_image retry: text-only result rejected",
274
- status: "error",
275
- });
276
- }
277
- }
278
- throw textOnlyError(lastError);
279
- }
280
-
281
- async function generateAgentImage(
282
- ctx: RuntimeContext,
283
- sessionId: string,
284
- prompt: string,
285
- manifest: string,
286
- webSearchEnabled: boolean,
287
- options: AgentRunOptions,
288
- ) {
289
- const requestId = options.requestId ?? `agent_${ulid()}`;
290
- const providerOptions = resolveProviderOptions(ctx, {
291
- provider: options.provider ?? "oauth",
292
- rawModel: options.model,
293
- rawReasoningEffort: options.reasoningEffort,
294
- rawSize: options.size ?? "1024x1024",
295
- rawWebSearchEnabled: webSearchEnabled,
296
- searchMode: webSearchEnabled ? "on" : "off",
297
- });
298
- if (providerOptions.error) {
299
- const err = new Error(providerOptions.error) as Error & { code?: string; status?: number };
300
- err.code = providerOptions.code;
301
- err.status = providerOptions.status;
302
- throw err;
303
- }
304
- const activeProvider = providerOptions.provider;
305
- const effectiveModel = activeProvider === "grok" && options.quality === "high"
306
- ? "grok-imagine-image-quality"
307
- : providerOptions.model;
308
- const response = activeProvider === "grok"
309
- ? await generateViaGrok(`${manifest}\n\nUser request:\n${prompt}`, ctx, {
310
- model: effectiveModel,
311
- size: providerOptions.size,
312
- requestId,
313
- signal: options.signal ?? undefined,
314
- references: await loadAgentCurrentImageReferences(ctx, sessionId),
315
- })
316
- : await generateViaResponses(
317
- activeProvider,
318
- `${manifest}\n\nUser request:\n${prompt}`,
319
- options.quality ?? "medium",
320
- providerOptions.size,
321
- options.moderation ?? "low",
322
- [],
323
- requestId,
324
- "auto",
325
- ctx,
326
- {
327
- model: providerOptions.model,
328
- reasoningEffort: providerOptions.reasoningEffort,
329
- webSearchEnabled,
330
- signal: options.signal,
331
- },
332
- );
333
- const format = activeProvider === "grok"
334
- ? imageFormatFromMime(("mime" in response ? response.mime : undefined) || detectImageMimeFromB64(response.b64) || "image/jpeg")
335
- : options.format ?? "png";
336
- const image = await persistAgentImage(ctx, sessionId, prompt, format, requestId, response, {
337
- provider: String(activeProvider),
338
- model: String(effectiveModel),
339
- });
340
- const responseText = "text" in response && typeof response.text === "string" ? response.text : null;
341
- return { image, webSearchCalls: response.webSearchCalls || 0, text: responseText, provider: activeProvider };
342
- }
343
-
344
- async function loadAgentCurrentImageReferences(ctx: RuntimeContext, sessionId: string): Promise<GrokReferenceImage[]> {
345
- const session = getAgentSession(sessionId);
346
- const currentImage = session?.lastImageId
347
- ? getAgentImages(sessionId).find((image) => image.id === session.lastImageId)
348
- : null;
349
- if (!currentImage?.filename) return [];
350
- try {
351
- const b64 = (await readFile(join(ctx.config.storage.generatedDir, currentImage.filename))).toString("base64");
352
- const mime = detectImageMimeFromB64(b64);
353
- return [{ b64, declaredMime: mime, detectedMime: mime }];
354
- } catch (error) {
355
- const err = errInfo(error);
356
- logEvent("agent", "grok_ref_missing", { sessionId, filename: currentImage.filename, code: err.code, message: err.message });
357
- return [];
358
- }
359
- }
360
-
361
- function imageFormatFromMime(mime: string | null | undefined): "png" | "jpeg" | "webp" {
362
- if (mime === "image/jpeg") return "jpeg";
363
- if (mime === "image/webp") return "webp";
364
- return "png";
365
- }
366
-
367
- async function persistAgentImage(
368
- ctx: RuntimeContext,
369
- sessionId: string,
370
- prompt: string,
371
- format: string,
372
- requestId: string,
373
- response: { b64: string; revisedPrompt?: string | null; usage?: unknown; webSearchCalls?: number },
374
- generation: { provider: string; model: string },
375
- ) {
376
- await mkdir(ctx.config.storage.generatedDir, { recursive: true });
377
- const rand = randomBytes(ctx.config.ids.generatedHexBytes).toString("hex");
378
- const filename = `${Date.now()}_${rand}_agent.${format}`;
379
- const meta = {
380
- kind: "agent",
381
- requestId,
382
- sessionId,
383
- prompt,
384
- userPrompt: prompt,
385
- revisedPrompt: response.revisedPrompt ?? null,
386
- provider: generation.provider,
387
- model: generation.model,
388
- createdAt: Date.now(),
389
- usage: response.usage ?? null,
390
- webSearchCalls: response.webSearchCalls ?? 0,
391
- };
392
- const embedded = await embedImageMetadataBestEffort(Buffer.from(response.b64, "base64"), format, meta, {
393
- version: ctx.packageVersion,
394
- });
395
- const filePath = join(ctx.config.storage.generatedDir, filename);
396
- await writeFile(filePath, embedded.buffer);
397
- try {
398
- await writeFile(`${filePath}.json`, JSON.stringify(meta));
399
- } catch (err) {
400
- await unlink(filePath).catch(() => {});
401
- throw err;
402
- }
403
- invalidateHistoryIndex();
404
- logEvent("agent", "saved", { requestId, sessionId, filename });
405
- return importAgentImage(sessionId, {
406
- id: `ai_${ulid()}`,
407
- filename,
408
- url: `/generated/${filename}`,
409
- prompt,
410
- revisedPrompt: response.revisedPrompt ?? null,
411
- createdAt: Date.now(),
412
- });
413
- }
414
-
415
- export async function runAgentVideoGeneration(
416
- ctx: RuntimeContext,
417
- sessionId: string,
418
- prompt: string,
419
- options: AgentRunOptions & { skipUserTurn?: boolean } = {},
420
- ) {
421
- const session = getAgentSession(sessionId);
422
- if (!session) throw notFound(sessionId);
423
- if (!options.skipUserTurn) {
424
- appendAgentTurn({ sessionId, role: "user", text: prompt, status: "complete" });
425
- }
426
- const requestId = options.requestId ?? `agent_video_${ulid()}`;
427
- const startedAt = Date.now();
428
-
429
- // Auto I2V: if session has a last image, use it as source
430
- let sourceImage: string | undefined;
431
- let mode: "text-to-video" | "image-to-video" = "text-to-video";
432
- if (session.lastImageId) {
433
- const images = getAgentImages(sessionId);
434
- const lastImage = images.find((img) => img.id === session.lastImageId);
435
- if (lastImage?.filename && !lastImage.filename.endsWith(".mp4")) {
436
- try {
437
- const { loadAssetB64 } = await import("./nodeStore.js");
438
- sourceImage = await loadAssetB64(ctx.rootDir, lastImage.filename, ctx.config.storage.generatedDir);
439
- mode = "image-to-video";
440
- } catch { /* fallback to T2V */ }
441
- }
442
- }
443
-
444
- const videoParams = parseVideoParams(prompt);
445
-
446
- const result = await generateVideoViaGrok(prompt, ctx, {
447
- model: "grok-imagine-video",
448
- mode,
449
- sourceImage,
450
- duration: videoParams.duration ?? 5,
451
- resolution: videoParams.resolution ?? "480p",
452
- aspectRatio: (videoParams.aspectRatio ?? "auto") as "auto" | "1:1" | "16:9" | "9:16" | "4:3" | "3:4" | "3:2" | "2:3",
453
- requestId,
454
- signal: options.signal ?? undefined,
455
- });
456
- const video = await persistAgentVideo(ctx, sessionId, prompt, requestId, result);
457
- const finishedAt = Date.now();
458
- const toolCall: AgentToolCallSummary = {
459
- id: `tc_video_${ulid()}`,
460
- name: "ima2.generate_video",
461
- status: "complete",
462
- startedAt,
463
- finishedAt,
464
- durationMs: finishedAt - startedAt,
465
- requestId,
466
- inputSummary: prompt,
467
- outputSummary: `Generated video ${video.filename}.`,
468
- imageIds: [video.id],
469
- };
470
- appendAgentTurn({
471
- sessionId,
472
- role: "tool",
473
- text: "ima2.generate_video",
474
- imageIds: [video.id],
475
- status: "complete",
476
- raw: { toolCalls: [toolCall] },
477
- });
478
- const assistantTurn = appendAgentTurn({
479
- sessionId,
480
- role: "assistant",
481
- text: `Generated 1 video artifact. ${result.revisedPrompt}`,
482
- imageIds: [video.id],
483
- status: "complete",
484
- });
485
- return { assistantTurn, imageIds: [video.id], webFindingIds: [] };
486
- }
487
-
488
- async function persistAgentVideo(
489
- ctx: RuntimeContext,
490
- sessionId: string,
491
- prompt: string,
492
- requestId: string,
493
- result: { videoBuffer: Buffer; revisedPrompt: string; usage: Record<string, number> | null; webSearchCalls: number },
494
- ) {
495
- await mkdir(ctx.config.storage.generatedDir, { recursive: true });
496
- const rand = randomBytes(ctx.config.ids.generatedHexBytes).toString("hex");
497
- const filename = `${Date.now()}_${rand}_agent.mp4`;
498
- const meta = {
499
- kind: "agent",
500
- mediaType: "video",
501
- requestId,
502
- sessionId,
503
- prompt,
504
- userPrompt: prompt,
505
- revisedPrompt: result.revisedPrompt,
506
- provider: "grok",
507
- model: "grok-imagine-video",
508
- createdAt: Date.now(),
509
- usage: result.usage,
510
- webSearchCalls: result.webSearchCalls,
511
- };
512
- const filePath = join(ctx.config.storage.generatedDir, filename);
513
- await writeFile(filePath, result.videoBuffer);
514
- try {
515
- await writeFile(`${filePath}.json`, JSON.stringify(meta));
516
- } catch (err) {
517
- await unlink(filePath).catch(() => {});
518
- throw err;
519
- }
520
- invalidateHistoryIndex();
521
- logEvent("agent", "video_saved", { requestId, sessionId, filename });
522
- return importAgentImage(sessionId, {
523
- id: `ai_${ulid()}`,
524
- filename,
525
- url: `/generated/${filename}`,
526
- prompt,
527
- revisedPrompt: result.revisedPrompt,
528
- createdAt: Date.now(),
529
- });
530
- }
531
-
532
- function recordSearchFindings(sessionId: string, prompt: string, count: number, provider: string) {
533
- if (!count) return [];
534
- const isGrok = provider === "grok";
535
- return [
536
- recordAgentWebFinding({
537
- sessionId,
538
- query: prompt,
539
- title: isGrok ? "Grok visual research" : "Responses web_search",
540
- snippet: `${isGrok ? "Grok" : "Responses"} reported ${count} web search call${count === 1 ? "" : "s"}.`,
541
- }),
542
- ];
543
- }
544
-
545
- function forceImagePrompt(prompt: string) {
546
- return [
547
- "The previous turn did not return an image artifact.",
548
- "Return a final image using ima2.generate_image/image_generation now.",
549
- `User request: ${prompt}`,
550
- ].join("\n");
551
- }
552
-
553
- function isTextOnlyResult(error: unknown) {
554
- const err = errInfo(error);
555
- return [
556
- "EMPTY_RESPONSE",
557
- "IMAGE_TOOL_NOT_CALLED",
558
- "WEB_SEARCH_ONLY_RESPONSE",
559
- "IMAGE_TOOL_COMPLETED_WITHOUT_RESULT",
560
- ].includes(err.code || "") || err.message.includes("No image data");
561
- }
562
-
563
- function textOnlyError(cause: unknown) {
564
- const err = new Error("Agent result did not include an image artifact.") as Error & {
565
- code?: string;
566
- status?: number;
567
- cause?: unknown;
568
- };
569
- err.code = "AGENT_TEXT_ONLY_RESULT";
570
- err.status = 422;
571
- err.cause = cause;
572
- return err;
573
- }
574
-
575
- async function mapWithLimit<T, R>(
576
- items: readonly T[],
577
- limit: number,
578
- mapper: (item: T, index: number) => Promise<R>,
579
- ): Promise<R[]> {
580
- const results: R[] = [];
581
- let nextIndex = 0;
582
- const workerCount = Math.min(limit, items.length);
583
- await Promise.all(Array.from({ length: workerCount }, async () => {
584
- while (nextIndex < items.length) {
585
- const currentIndex = nextIndex;
586
- nextIndex += 1;
587
- results[currentIndex] = await mapper(items[currentIndex], currentIndex);
588
- }
589
- }));
590
- return results;
591
- }
592
-
593
- function cleanParallelism(value: unknown) {
594
- const numeric = typeof value === "number" ? value : Number(value);
595
- if (!Number.isFinite(numeric)) return 2;
596
- return Math.max(1, Math.min(8, Math.round(numeric)));
597
- }
598
-
599
- function notFound(sessionId: string) {
600
- const err = new Error(`Agent session not found: ${sessionId}`) as Error & { code?: string; status?: number };
601
- err.code = "AGENT_SESSION_NOT_FOUND";
602
- err.status = 404;
603
- return err;
604
- }
@@ -1,72 +0,0 @@
1
- import type { AgentGenerationSettings } from "./agentTypes.js";
2
-
3
- const PROVIDERS = new Set(["oauth", "api", "grok"]);
4
- const QUALITIES = new Set(["low", "medium", "high"]);
5
- const FORMATS = new Set(["png", "jpeg", "webp"]);
6
- const MODERATIONS = new Set(["auto", "low"]);
7
- const REASONING_EFFORTS = new Set(["low", "medium", "high", "xhigh"]);
8
- const GENERATION_STRATEGIES = new Set(["auto", "manual"]);
9
-
10
- export const DEFAULT_AGENT_GENERATION_SETTINGS: AgentGenerationSettings = {
11
- provider: "oauth",
12
- model: "gpt-5.4-mini",
13
- quality: "medium",
14
- size: "1024x1024",
15
- format: "png",
16
- moderation: "low",
17
- reasoningEffort: "medium",
18
- webSearchEnabled: true,
19
- generationStrategy: "auto",
20
- variants: 1,
21
- maxAutoVariants: 8,
22
- parallelism: 2,
23
- };
24
-
25
- export function normalizeAgentGenerationSettings(
26
- value: unknown,
27
- fallback: AgentGenerationSettings = DEFAULT_AGENT_GENERATION_SETTINGS,
28
- ): AgentGenerationSettings {
29
- const input = value && typeof value === "object" ? value as Record<string, unknown> : {};
30
- return {
31
- provider: cleanEnum(input.provider, PROVIDERS, fallback.provider),
32
- model: cleanString(input.model, fallback.model),
33
- quality: cleanEnum(input.quality, QUALITIES, fallback.quality),
34
- size: cleanSize(input.size, fallback.size),
35
- format: cleanEnum(input.format, FORMATS, fallback.format),
36
- moderation: cleanEnum(input.moderation, MODERATIONS, fallback.moderation),
37
- reasoningEffort: cleanEnum(input.reasoningEffort, REASONING_EFFORTS, fallback.reasoningEffort),
38
- webSearchEnabled: typeof input.webSearchEnabled === "boolean" ? input.webSearchEnabled : fallback.webSearchEnabled,
39
- generationStrategy: cleanEnum(input.generationStrategy, GENERATION_STRATEGIES, fallback.generationStrategy),
40
- variants: cleanPositiveInt(input.variants, fallback.variants, 1, 8),
41
- maxAutoVariants: cleanPositiveInt(input.maxAutoVariants, fallback.maxAutoVariants, 1, 8),
42
- parallelism: cleanPositiveInt(input.parallelism, fallback.parallelism, 1, 8),
43
- };
44
- }
45
-
46
- export function mergeAgentGenerationSettings(
47
- current: AgentGenerationSettings,
48
- patch: unknown,
49
- ): AgentGenerationSettings {
50
- return normalizeAgentGenerationSettings({ ...current, ...(patch && typeof patch === "object" ? patch : {}) }, current);
51
- }
52
-
53
- function cleanEnum<T extends string>(value: unknown, allowed: Set<string>, fallback: T): T {
54
- return typeof value === "string" && allowed.has(value) ? value as T : fallback;
55
- }
56
-
57
- function cleanString(value: unknown, fallback: string): string {
58
- return typeof value === "string" && value.trim() ? value.trim().slice(0, 120) : fallback;
59
- }
60
-
61
- function cleanSize(value: unknown, fallback: string): string {
62
- if (typeof value !== "string") return fallback;
63
- const trimmed = value.trim();
64
- if (trimmed === "auto") return trimmed;
65
- return /^\d{3,4}x\d{3,4}$/.test(trimmed) ? trimmed : fallback;
66
- }
67
-
68
- function cleanPositiveInt(value: unknown, fallback: number, min: number, max: number): number {
69
- const numeric = typeof value === "number" ? value : Number(value);
70
- if (!Number.isFinite(numeric)) return fallback;
71
- return Math.max(min, Math.min(max, Math.round(numeric)));
72
- }