ima2-gen 2.0.1 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/CHANGELOG.md +150 -0
  2. package/README.md +10 -1
  3. package/bin/commands/backfillThumbs.js +6 -0
  4. package/bin/commands/gen.js +6 -0
  5. package/bin/ima2.js +14 -10
  6. package/docs/API.md +131 -8
  7. package/docs/CLI.md +2 -1
  8. package/docs/FAQ.ko.md +16 -0
  9. package/docs/FAQ.md +30 -0
  10. package/docs/README.ko.md +7 -3
  11. package/docs/migration/runtime-test-inventory.md +15 -1
  12. package/lib/agentImageVideoGen.js +261 -0
  13. package/lib/agentRuntime.js +7 -262
  14. package/lib/agyImageAdapter.js +35 -8
  15. package/lib/errorClassify.js +8 -7
  16. package/lib/eventBus.js +71 -0
  17. package/lib/geminiApiImageAdapter.js +16 -20
  18. package/lib/generationErrors.js +3 -1
  19. package/lib/grokImageAdapter.js +68 -129
  20. package/lib/grokImageCore.js +153 -0
  21. package/lib/grokMultimodeAdapter.js +5 -3
  22. package/lib/grokVideoCanvas.js +13 -0
  23. package/lib/grokVideoPlannerPrompt.js +53 -6
  24. package/lib/historyList.js +1 -0
  25. package/lib/inflight.js +54 -17
  26. package/lib/multimodeHelpers.js +10 -0
  27. package/lib/nodeHelpers.js +59 -0
  28. package/lib/oauthProxy/prompts.js +30 -36
  29. package/lib/promptBuilder/systemPrompt.js +2 -5
  30. package/lib/promptSafetyPolicy.js +1 -5
  31. package/lib/responsesFallback.js +2 -1
  32. package/lib/routeHelpers.js +44 -0
  33. package/lib/ssePublish.js +12 -0
  34. package/lib/storyboardPrefix.js +28 -0
  35. package/lib/thumbBackfill.js +16 -5
  36. package/package.json +4 -1
  37. package/routes/agy.js +44 -0
  38. package/routes/auth.js +6 -2
  39. package/routes/edit.js +7 -1
  40. package/routes/events.js +78 -0
  41. package/routes/generate.js +99 -127
  42. package/routes/index.js +4 -0
  43. package/routes/multimode.js +99 -56
  44. package/routes/nodes.js +59 -103
  45. package/routes/video.js +100 -17
  46. package/skills/ima2/SKILL.md +98 -21
  47. package/ui/dist/.vite/manifest.json +12 -12
  48. package/ui/dist/assets/{AgentWorkspace-CYv84Rus.js → AgentWorkspace-Dth6YijN.js} +1 -1
  49. package/ui/dist/assets/{CardNewsWorkspace-Dqyc1WZ1.js → CardNewsWorkspace-Dav3K5CT.js} +1 -1
  50. package/ui/dist/assets/{NodeCanvas-ChEXzQbb.js → NodeCanvas-C4ifFzB1.js} +1 -1
  51. package/ui/dist/assets/{PromptBuilderPanel-B95ZufnR.js → PromptBuilderPanel-CEcyU9PL.js} +1 -1
  52. package/ui/dist/assets/{PromptImportDialog-DGOwFQET.js → PromptImportDialog-CgQ94Gth.js} +2 -2
  53. package/ui/dist/assets/{PromptImportDiscoverySection-CgvdnR49.js → PromptImportDiscoverySection-CuzyzbNI.js} +1 -1
  54. package/ui/dist/assets/{PromptImportFolderSection-CfUye9J8.js → PromptImportFolderSection-DHLGlO6l.js} +1 -1
  55. package/ui/dist/assets/{PromptLibraryPanel-B9kndPw1.js → PromptLibraryPanel-BOe18we8.js} +2 -2
  56. package/ui/dist/assets/SettingsWorkspace-Cdgnm4Wa.js +1 -0
  57. package/ui/dist/assets/{index-BhcvL0g-.js → index-C5PSahkr.js} +1 -1
  58. package/ui/dist/assets/index-Dn2AhL6d.css +1 -0
  59. package/ui/dist/assets/index-Tjqx6wUV.js +23 -0
  60. package/ui/dist/index.html +2 -2
  61. package/ui/dist/assets/SettingsWorkspace-B3tgLrmF.js +0 -1
  62. package/ui/dist/assets/index-BtK3YhJc.js +0 -39
  63. package/ui/dist/assets/index-ClOLOjnA.css +0 -1
@@ -117,12 +117,9 @@ For variations:
117
117
  - Vary only the requested axis, such as style, mood, color palette, composition, camera angle, background, medium, or lighting.
118
118
  - Do not generate images unless explicitly asked.
119
119
 
120
- Safety and style limits:
121
- - Do not create prompts that violate safety policy.
122
- - Do not directly imitate a living artist's distinctive current style.
120
+ Style limits:
123
121
  - When a living artist style is requested, replace it with broader visual characteristics such as medium, color, lighting, era, composition, texture, or mood.
124
- - Avoid prompts that enable deception, impersonation, or harmful misuse.
125
- - For real people, avoid defamatory, sexualized, misleading, or dignity-violating framing.
122
+ - For real people, preserve dignity and avoid misleading framing.
126
123
 
127
124
  Before finalizing, check:
128
125
  - Does the prompt preserve the user's original idea?
@@ -1,5 +1 @@
1
- export const SAFETY_INTENT_POLICY = [
2
- "* [Level of Intent to Sexual Exploitation]: Zero",
3
- "* [Level of Intent to Sexual Commodification]: Zero",
4
- "* [Judged by Appearance Alone]: Forbidden",
5
- ].join("\n");
1
+ export const SAFETY_INTENT_POLICY = "";
@@ -38,7 +38,8 @@ export async function retryPromptOnlyJsonImage({ postResponses, ctx, provider, p
38
38
  tools: tools(false, { quality, size, moderation }),
39
39
  tool_choice: imageToolChoice(true),
40
40
  reasoning: { effort: reasoningEffort || "low" },
41
- stream: false,
41
+ // OAuth/Codex proxy returns empty output[] for non-stream image requests; SSE required.
42
+ stream: true,
42
43
  },
43
44
  });
44
45
  }
@@ -0,0 +1,44 @@
1
+ export function validateModeration(ctx, moderation) {
2
+ if (typeof moderation !== "string" || !ctx.config.oauth.validModeration.has(moderation)) {
3
+ return { error: "moderation must be one of: auto, low" };
4
+ }
5
+ return { moderation };
6
+ }
7
+ export function imageFormatFromMime(mime) {
8
+ if (mime === "image/jpeg")
9
+ return "jpeg";
10
+ if (mime === "image/webp")
11
+ return "webp";
12
+ return "png";
13
+ }
14
+ export function writeSse(res, event, data) {
15
+ res.write(`event: ${event}\n`);
16
+ res.write(`data: ${JSON.stringify(data)}\n\n`);
17
+ }
18
+ export function dataUrlFromB64(format, b64) {
19
+ return `data:image/${format === "jpeg" ? "jpeg" : format};base64,${b64}`;
20
+ }
21
+ export function upstreamErrorFields(src) {
22
+ return {
23
+ upstreamCode: src.upstreamCode || null,
24
+ upstreamType: src.upstreamType || null,
25
+ upstreamParam: src.upstreamParam || null,
26
+ diagnosticReason: src.diagnosticReason || null,
27
+ retryKind: src.retryKind || null,
28
+ initialEventCount: src.initialEventCount ?? null,
29
+ initialEventTypes: src.initialEventTypes || null,
30
+ referencesDroppedOnRetry: src.referencesDroppedOnRetry ?? null,
31
+ developerPromptDroppedOnRetry: src.developerPromptDroppedOnRetry ?? null,
32
+ webSearchDroppedOnRetry: src.webSearchDroppedOnRetry ?? null,
33
+ fallbackEventCount: src.fallbackEventCount ?? null,
34
+ fallbackEventTypes: src.fallbackEventTypes || null,
35
+ fallbackImageCallSeen: src.fallbackImageCallSeen ?? null,
36
+ fallbackImageResultCount: src.fallbackImageResultCount ?? null,
37
+ errorEventCount: src.eventCount ?? null,
38
+ eventTypes: src.eventTypes || null,
39
+ webSearchCalls: src.webSearchCalls ?? null,
40
+ responseDiagnostics: src.responseDiagnostics || null,
41
+ toolTypes: src.toolTypes || null,
42
+ toolChoiceKind: src.toolChoiceKind || null,
43
+ };
44
+ }
@@ -0,0 +1,12 @@
1
+ import { publish } from "./eventBus.js";
2
+ import { isJobCanceled } from "./inflight.js";
3
+ /**
4
+ * Publish a multiplexed job event. Suppresses terminal `done` after cancel so
5
+ * clients never resolve success when abortJob already emitted `error`.
6
+ */
7
+ export function publishJobEvent(requestId, event, data) {
8
+ if (event === "done" && isJobCanceled(requestId))
9
+ return false;
10
+ publish(requestId, event, data);
11
+ return true;
12
+ }
@@ -0,0 +1,28 @@
1
+ export const STORYBOARD_PREFIX = [
2
+ "[STORYBOARD MODE — Video Production Keyframe / Storyboard Grid]",
3
+ "This image will be used for video production. It may be a single keyframe OR a 3x3 storyboard grid.",
4
+ "The prompt and all injected instructions MUST be in English.",
5
+ "",
6
+ "IF GENERATING A 3x3 STORYBOARD GRID:",
7
+ "- Panel 1 (top-left) MUST be COMPLETELY SOLID BLACK — no image, no text, just pure black.",
8
+ "- Panels 2-9 contain the action sequence (8 key moments).",
9
+ "- Do NOT add timestamp labels or text overlays to any panel — they burn into the video.",
10
+ "- Maintain identical character designs across all panels.",
11
+ "- Each panel should look like a cinematic film still, not a sketch.",
12
+ "",
13
+ "CHARACTER LOCK:",
14
+ "- Identify each character by 2-3 VISUAL identifiers (clothing color + physique + position/props). Never by name alone.",
15
+ "- Copy character descriptions VERBATIM from the reference/prior frame. Do NOT rephrase or drift.",
16
+ "",
17
+ "SCENE CONTINUITY:",
18
+ "- Lock lighting direction, color palette, environment, and art style to prior frames.",
19
+ "- Change ONLY: action, shot scale, camera angle, or expression.",
20
+ "- Reference image = canonical anchor. Preserve it faithfully.",
21
+ "",
22
+ "VIDEO-READY COMPOSITION:",
23
+ "- Frame for animation: leave space for motion, avoid static-only poses.",
24
+ "- Use descriptive caption format: shot type + subject action + environment + technical (lens, lighting) + mood.",
25
+ "- Specify intended camera movement for the video phase (e.g. 'slow dolly-in', 'static wide').",
26
+ "- End pose must be stable and suitable for video continuation.",
27
+ "",
28
+ ].join("\n") + "\n";
@@ -2,6 +2,10 @@ import { readdir } from "node:fs/promises";
2
2
  import { join } from "node:path";
3
3
  import { ensureVideoThumbnail, videoThumbExists } from "./videoThumb.js";
4
4
  import { generateImageThumbnail, imageThumbExists } from "./imageThumb.js";
5
+ const FAILURE_DETAIL_LIMIT = 20;
6
+ function errorReason(error) {
7
+ return error instanceof Error ? error.message : String(error);
8
+ }
5
9
  /**
6
10
  * Recursively scan `dir` (up to `maxDepth` levels, matching historyList's walk
7
11
  * depth) and generate missing `.thumb.jpg` thumbnails for every image and video.
@@ -11,7 +15,13 @@ import { generateImageThumbnail, imageThumbExists } from "./imageThumb.js";
11
15
  * shows a thumbless media tile.
12
16
  */
13
17
  export async function backfillThumbnails(dir, maxDepth = 2) {
14
- const result = { total: 0, created: 0, skipped: 0, failed: 0 };
18
+ const result = { total: 0, created: 0, skipped: 0, failed: 0, failures: [] };
19
+ function recordFailure(file, kind, reason) {
20
+ result.failed++;
21
+ if (result.failures.length >= FAILURE_DETAIL_LIMIT)
22
+ return;
23
+ result.failures.push({ file, kind, reason });
24
+ }
15
25
  async function walk(current, depth) {
16
26
  const entries = await readdir(current, { withFileTypes: true }).catch(() => []);
17
27
  for (const entry of entries) {
@@ -28,8 +38,9 @@ export async function backfillThumbnails(dir, maxDepth = 2) {
28
38
  if (!/\.(png|jpe?g|webp|mp4)$/i.test(entry.name))
29
39
  continue;
30
40
  result.total++;
41
+ const kind = /\.mp4$/i.test(entry.name) ? "video" : "image";
31
42
  try {
32
- if (/\.mp4$/i.test(entry.name)) {
43
+ if (kind === "video") {
33
44
  if (await videoThumbExists(full)) {
34
45
  result.skipped++;
35
46
  continue;
@@ -38,7 +49,7 @@ export async function backfillThumbnails(dir, maxDepth = 2) {
38
49
  if (ok)
39
50
  result.created++;
40
51
  else
41
- result.failed++;
52
+ recordFailure(full, kind, "thumbnail generation returned false");
42
53
  }
43
54
  else {
44
55
  if (await imageThumbExists(full)) {
@@ -49,8 +60,8 @@ export async function backfillThumbnails(dir, maxDepth = 2) {
49
60
  result.created++;
50
61
  }
51
62
  }
52
- catch {
53
- result.failed++;
63
+ catch (error) {
64
+ recordFailure(full, kind, errorReason(error));
54
65
  }
55
66
  }
56
67
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ima2-gen",
3
- "version": "2.0.1",
3
+ "version": "2.0.2",
4
4
  "description": "Local OAuth image generation studio with classic and node workflows",
5
5
  "type": "module",
6
6
  "bin": {
@@ -39,6 +39,7 @@
39
39
  "cli"
40
40
  ],
41
41
  "license": "MIT",
42
+ "homepage": "https://lidge-jun.github.io/ima2-gen/",
42
43
  "repository": {
43
44
  "type": "git",
44
45
  "url": "git+https://github.com/lidge-jun/ima2-gen.git"
@@ -57,6 +58,7 @@
57
58
  "assets/card-news/templates/",
58
59
  ".env.example",
59
60
  "README.md",
61
+ "CHANGELOG.md",
60
62
  "LICENSE",
61
63
  "server.js",
62
64
  "config.js"
@@ -65,6 +67,7 @@
65
67
  "node": ">=20"
66
68
  },
67
69
  "dependencies": {
70
+ "@openai/codex": "latest",
68
71
  "better-sqlite3": "^12.9.0",
69
72
  "dotenv": "^17.4.2",
70
73
  "express": "^5.1.0",
package/routes/agy.js ADDED
@@ -0,0 +1,44 @@
1
+ import { spawn } from "node:child_process";
2
+ // Detect whether the Antigravity CLI (`agy`) is installed, using the same
3
+ // spawn-and-catch style as lib/agyImageAdapter.ts (no shell `which`/`where`).
4
+ // Login state cannot be probed — agy has no status command — so we only
5
+ // report installation here.
6
+ function isAgyInstalled() {
7
+ return new Promise((resolve) => {
8
+ let settled = false;
9
+ const done = (value) => {
10
+ if (settled)
11
+ return;
12
+ settled = true;
13
+ resolve(value);
14
+ };
15
+ try {
16
+ const child = spawn("agy", ["--version"], { stdio: "ignore" });
17
+ child.on("error", () => done(false)); // ENOENT when not on PATH
18
+ child.on("exit", (code) => done(code === 0));
19
+ // Safety timeout so a hung binary never blocks the request.
20
+ setTimeout(() => {
21
+ try {
22
+ if (!child.killed)
23
+ child.kill();
24
+ }
25
+ catch { /* ignore */ }
26
+ done(false);
27
+ }, 3000).unref?.();
28
+ }
29
+ catch {
30
+ done(false);
31
+ }
32
+ });
33
+ }
34
+ export function registerAgyRoutes(app) {
35
+ app.get("/api/agy/status", async (_req, res) => {
36
+ try {
37
+ const installed = await isAgyInstalled();
38
+ res.json({ installed });
39
+ }
40
+ catch {
41
+ res.json({ installed: false });
42
+ }
43
+ });
44
+ }
package/routes/auth.js CHANGED
@@ -2,11 +2,15 @@ import { spawn } from "node:child_process";
2
2
  import { randomBytes } from "node:crypto";
3
3
  import { writeFileSync, renameSync, mkdirSync, existsSync } from "node:fs";
4
4
  import { homedir } from "node:os";
5
- import { join } from "node:path";
5
+ import { join, dirname } from "node:path";
6
+ import { fileURLToPath } from "node:url";
6
7
  const GROK_CLIENT_ID = "b1a00492-073a-47ea-816f-4c329264a828";
7
8
  const GROK_SCOPE = "openid profile email offline_access grok-cli:access api:access";
8
9
  const GROK_TOKEN_URL = "https://auth.x.ai/oauth2/token";
9
10
  const CODEX_DEVICE_CODE_GRANT = "urn:ietf:params:oauth:grant-type:device_code";
11
+ // Bundled @openai/codex binary (npm dependency), resolved relative to this
12
+ // module so device-auth login works even when `codex` is not on the user's PATH.
13
+ const CODEX_BIN = join(dirname(fileURLToPath(import.meta.url)), "..", "node_modules", ".bin", process.platform === "win32" ? "codex.cmd" : "codex");
10
14
  const MAX_CONCURRENT_SESSIONS = 20;
11
15
  const sessions = new Map();
12
16
  function sid() {
@@ -125,7 +129,7 @@ function startCodexDeviceCode() {
125
129
  for (const k of ["OPENAI_API_KEY", "XAI_API_KEY", "GEMINI_API_KEY", "ANTHROPIC_API_KEY", "VERTEX_SERVICE_ACCOUNT_JSON"]) {
126
130
  delete childEnv[k];
127
131
  }
128
- const child = spawn("codex", ["login", "--device-auth"], {
132
+ const child = spawn(CODEX_BIN, ["login", "--device-auth"], {
129
133
  stdio: ["ignore", "pipe", "pipe"],
130
134
  env: childEnv,
131
135
  });
package/routes/edit.js CHANGED
@@ -169,6 +169,7 @@ export function registerEditRoutes(app, ctxRaw) {
169
169
  let revisedPrompt;
170
170
  let webSearchCalls = 0;
171
171
  let resultMimeFromProvider;
172
+ let providerUrl = null;
172
173
  if (activeProvider === "gemini-api") {
173
174
  const r = await generateViaGeminiApi(`Edit this image: ${prompt}`, requireRuntimeContext(ctx), {
174
175
  model: imageModel,
@@ -206,6 +207,7 @@ export function registerEditRoutes(app, ctxRaw) {
206
207
  directApiKey,
207
208
  });
208
209
  resultB64 = r.b64;
210
+ providerUrl = r.providerUrl ?? null;
209
211
  usage = r.usage;
210
212
  revisedPrompt = r.revisedPrompt;
211
213
  webSearchCalls = r.webSearchCalls;
@@ -237,6 +239,7 @@ export function registerEditRoutes(app, ctxRaw) {
237
239
  const editFilePath = join(ctx.config.storage.generatedDir, filename);
238
240
  await writeFile(editFilePath, editBuffer);
239
241
  generateImageThumbnailFromBuffer(editBuffer, editFilePath).catch(() => { });
242
+ const createdAt = Date.now();
240
243
  const meta = {
241
244
  prompt,
242
245
  userPrompt: prompt,
@@ -252,10 +255,11 @@ export function registerEditRoutes(app, ctxRaw) {
252
255
  provider: activeProvider,
253
256
  kind: "edit",
254
257
  requestId,
255
- createdAt: Date.now(),
258
+ createdAt,
256
259
  usage: usage || null,
257
260
  webSearchCalls,
258
261
  webSearchEnabled,
262
+ ...(providerUrl ? { providerUrl } : {}),
259
263
  };
260
264
  await safeWriteSidecar(join(ctx.config.storage.generatedDir, filename + ".json"), meta);
261
265
  invalidateHistoryIndex();
@@ -281,6 +285,8 @@ export function registerEditRoutes(app, ctxRaw) {
281
285
  promptMode: normalizedPromptMode,
282
286
  webSearchCalls,
283
287
  webSearchEnabled,
288
+ providerUrl,
289
+ createdAt,
284
290
  });
285
291
  }
286
292
  catch (e) {
@@ -0,0 +1,78 @@
1
+ import { subscribe, replaySince, hasReplayGap, replayOldestId, MAX_SSE_LISTENERS } from "../lib/eventBus.js";
2
+ let activeConnections = 0;
3
+ function safeWrite(res, chunk) {
4
+ if (res.writableEnded || res.destroyed)
5
+ return false;
6
+ try {
7
+ res.write(chunk);
8
+ return true;
9
+ }
10
+ catch {
11
+ return false;
12
+ }
13
+ }
14
+ function formatSse(ev) {
15
+ return `id: ${ev.id}\nevent: ${ev.event}\ndata: ${JSON.stringify({ ...ev.data, jobId: ev.jobId })}\n\n`;
16
+ }
17
+ export function registerEventsRoute(app, _ctx) {
18
+ app.get("/api/events", (req, res) => {
19
+ if (activeConnections >= MAX_SSE_LISTENERS) {
20
+ return res.status(503).json({
21
+ error: { code: "SSE_CAPACITY", message: "Too many event stream connections" },
22
+ });
23
+ }
24
+ res.setHeader("Content-Type", "text/event-stream; charset=utf-8");
25
+ res.setHeader("Cache-Control", "no-cache, no-transform");
26
+ res.setHeader("Connection", "keep-alive");
27
+ res.setHeader("X-Accel-Buffering", "no");
28
+ res.flushHeaders?.();
29
+ activeConnections++;
30
+ const headerLastId = parseInt(req.headers["last-event-id"], 10);
31
+ const queryLastId = parseInt(String(req.query.lastEventId ?? ""), 10);
32
+ const lastId = !Number.isNaN(headerLastId) ? headerLastId : queryLastId;
33
+ if (!Number.isNaN(lastId)) {
34
+ if (hasReplayGap(lastId)) {
35
+ const gapPayload = JSON.stringify({
36
+ lastEventId: lastId,
37
+ oldestAvailableId: replayOldestId(),
38
+ });
39
+ if (!safeWrite(res, `event: replay-gap\ndata: ${gapPayload}\n\n`)) {
40
+ activeConnections = Math.max(0, activeConnections - 1);
41
+ return;
42
+ }
43
+ }
44
+ for (const ev of replaySince(lastId)) {
45
+ if (!safeWrite(res, formatSse(ev)))
46
+ break;
47
+ }
48
+ }
49
+ let cleaned = false;
50
+ const unsub = subscribe((ev) => {
51
+ if (!safeWrite(res, formatSse(ev)))
52
+ cleanup();
53
+ });
54
+ const heartbeat = setInterval(() => {
55
+ if (!safeWrite(res, ": ping\n\n"))
56
+ cleanup();
57
+ }, 15_000);
58
+ function cleanup() {
59
+ if (cleaned)
60
+ return;
61
+ cleaned = true;
62
+ unsub();
63
+ clearInterval(heartbeat);
64
+ activeConnections = Math.max(0, activeConnections - 1);
65
+ if (!res.writableEnded && !res.destroyed) {
66
+ try {
67
+ res.end();
68
+ }
69
+ catch {
70
+ /* socket already torn down */
71
+ }
72
+ }
73
+ }
74
+ req.on("close", cleanup);
75
+ res.on("close", cleanup);
76
+ res.on("error", cleanup);
77
+ });
78
+ }