ima2-gen 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,14 +1,22 @@
1
1
  # ima2-gen
2
2
 
3
+ [![npm version](https://img.shields.io/npm/v/ima2-gen)](https://www.npmjs.com/package/ima2-gen)
4
+
3
5
  Minimal CLI + web UI for OpenAI `gpt-image-2` image generation.
4
6
 
5
- ## Quick Start
7
+ ![ima2-gen screenshot](assets/screenshot.png)
8
+
9
+ ## Install & Run
6
10
 
7
11
  ```bash
8
- git clone https://github.com/lidge-jun/ima2-gen.git
9
- cd ima2-gen
10
- npm install
11
- npm start
12
+ npx ima2-gen serve
13
+ ```
14
+
15
+ Or install globally:
16
+
17
+ ```bash
18
+ npm install -g ima2-gen
19
+ ima2 serve
12
20
  ```
13
21
 
14
22
  First run prompts you to choose:
@@ -23,16 +31,9 @@ Then opens `http://localhost:3333`.
23
31
  ## CLI
24
32
 
25
33
  ```bash
26
- npx ima2 serve # start server (auto-setup on first run)
27
- npx ima2 setup # reconfigure auth
28
- npx ima2 reset # clear saved config
29
- ```
30
-
31
- Or install globally:
32
-
33
- ```bash
34
- npm install -g ima2-gen
35
- ima2 serve
34
+ ima2 serve # start server (auto-setup on first run)
35
+ ima2 setup # reconfigure auth
36
+ ima2 reset # clear saved config
36
37
  ```
37
38
 
38
39
  ## Features
@@ -44,7 +45,8 @@ ima2 serve
44
45
  - **Size** — presets (1024 ~ 4K) + custom (any 16px-aligned ratio)
45
46
  - **Format** — PNG / JPEG / WebP
46
47
  - **Moderation** — auto (standard) / low (less restrictive)
47
- - **History** — session thumbnail strip
48
+ - **Prompt display** — shown under image, click to copy
49
+ - **History** — persisted across page refreshes (localStorage)
48
50
  - **Download / Copy** — save or clipboard
49
51
 
50
52
  ## Architecture
@@ -75,3 +77,7 @@ OAUTH_PORT=10531
75
77
  | High | $0.211 | $0.165 | $0.165 |
76
78
 
77
79
  OAuth mode is free (uses your ChatGPT Plus/Pro subscription).
80
+
81
+ ## License
82
+
83
+ MIT
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ima2-gen",
3
- "version": "1.0.0",
3
+ "version": "1.0.1",
4
4
  "description": "GPT Image 2 generator with OAuth & API key support",
5
5
  "type": "module",
6
6
  "bin": {
@@ -16,7 +16,13 @@
16
16
  "release:minor": "npm version minor && npm publish && git push origin main --tags",
17
17
  "release:major": "npm version major && npm publish && git push origin main --tags"
18
18
  },
19
- "keywords": ["openai", "gpt-image-2", "image-generation", "oauth", "cli"],
19
+ "keywords": [
20
+ "openai",
21
+ "gpt-image-2",
22
+ "image-generation",
23
+ "oauth",
24
+ "cli"
25
+ ],
20
26
  "license": "MIT",
21
27
  "repository": {
22
28
  "type": "git",
@@ -25,6 +31,7 @@
25
31
  "files": [
26
32
  "bin/",
27
33
  "public/",
34
+ "assets/",
28
35
  "server.js",
29
36
  ".env.example",
30
37
  "README.md"
package/public/index.html CHANGED
@@ -581,10 +581,7 @@
581
581
 
582
582
  <main class="canvas">
583
583
  <div class="progress-bar" id="progressBar"></div>
584
- <div class="canvas-empty" id="emptyState">
585
- GPT-IMAGE-2
586
- <span>Enter a prompt and hit generate</span>
587
- </div>
584
+ <div class="canvas-empty" id="emptyState" style="display:none"></div>
588
585
  <div class="result-container" id="resultContainer">
589
586
  <img class="result-img" id="resultImg">
590
587
  <div class="result-prompt" id="resultPrompt"></div>
@@ -807,7 +804,6 @@
807
804
  btn.classList.add("loading");
808
805
  btn.textContent = "Generating...";
809
806
  $("#progressBar").classList.add("active");
810
- $("#emptyState").style.display = "none";
811
807
 
812
808
  try {
813
809
  const isEdit = state.mode === "i2i" && state.sourceImageB64;
@@ -838,7 +834,6 @@
838
834
  toast(`Generated in ${data.elapsed}s`);
839
835
  } catch (err) {
840
836
  toast(err.message, true);
841
- $("#emptyState").style.display = "";
842
837
  } finally {
843
838
  state.generating = false;
844
839
  btn.disabled = false;
package/server.js CHANGED
@@ -41,8 +41,12 @@ async function generateViaOAuth(prompt, quality, size) {
41
41
  headers: { "Content-Type": "application/json", Accept: "text/event-stream" },
42
42
  body: JSON.stringify({
43
43
  model: "gpt-5.4",
44
- input: [{ role: "user", content: prompt }],
44
+ input: [
45
+ { role: "developer", content: "You are an image generator. Always use the image_generation tool to create the image. Never respond with text only." },
46
+ { role: "user", content: `Generate an image: ${prompt}` },
47
+ ],
45
48
  tools: [{ type: "image_generation", quality, size }],
49
+ tool_choice: "required",
46
50
  stream: true,
47
51
  }),
48
52
  });
@@ -241,32 +245,120 @@ app.post("/api/generate", async (req, res) => {
241
245
  }
242
246
  });
243
247
 
248
+ // ── OAuth edit: send image as input to Responses API ──
249
+ async function editViaOAuth(prompt, imageB64, quality, size) {
250
+ const res = await fetch(`${OAUTH_URL}/v1/responses`, {
251
+ method: "POST",
252
+ headers: { "Content-Type": "application/json", Accept: "text/event-stream" },
253
+ body: JSON.stringify({
254
+ model: "gpt-5.4",
255
+ input: [
256
+ { role: "developer", content: "You are an image editor. Always use the image_generation tool to edit the provided image. Never respond with text only." },
257
+ {
258
+ role: "user",
259
+ content: [
260
+ { type: "input_image", image_url: `data:image/png;base64,${imageB64}` },
261
+ { type: "input_text", text: `Edit this image: ${prompt}` },
262
+ ],
263
+ },
264
+ ],
265
+ tools: [{ type: "image_generation", quality, size }],
266
+ tool_choice: "required",
267
+ stream: true,
268
+ }),
269
+ });
270
+
271
+ if (!res.ok) {
272
+ const text = await res.text();
273
+ let msg;
274
+ try { msg = JSON.parse(text).error?.message; } catch {}
275
+ throw new Error(msg || `OAuth edit returned ${res.status}`);
276
+ }
277
+
278
+ const reader = res.body.getReader();
279
+ const decoder = new TextDecoder();
280
+ let buffer = "";
281
+ let resultB64 = null;
282
+ let usage = null;
283
+
284
+ while (true) {
285
+ const { done, value } = await reader.read();
286
+ if (done) break;
287
+ buffer += decoder.decode(value, { stream: true });
288
+
289
+ let boundary;
290
+ while ((boundary = buffer.indexOf("\n\n")) !== -1) {
291
+ const block = buffer.slice(0, boundary);
292
+ buffer = buffer.slice(boundary + 2);
293
+
294
+ let eventData = "";
295
+ for (const line of block.split("\n")) {
296
+ if (line.startsWith("data: ")) eventData += line.slice(6);
297
+ }
298
+ if (!eventData || eventData === "[DONE]") continue;
299
+
300
+ try {
301
+ const data = JSON.parse(eventData);
302
+ if (data.type === "response.output_item.done" && data.item?.type === "image_generation_call" && data.item.result) {
303
+ resultB64 = data.item.result;
304
+ console.log("[oauth-edit] got image, b64 length:", resultB64.length);
305
+ }
306
+ if (data.type === "response.completed") usage = data.response?.usage || null;
307
+ if (data.type === "error") throw new Error(data.error?.message || JSON.stringify(data));
308
+ } catch (e) {
309
+ if (e.message && !e.message.startsWith("Unexpected")) throw e;
310
+ }
311
+ }
312
+ }
313
+
314
+ if (resultB64) return { b64: resultB64, usage };
315
+ throw new Error("No image data received from OAuth edit");
316
+ }
317
+
244
318
  // ── Edit image (inpainting) ──
245
319
  app.post("/api/edit", async (req, res) => {
246
320
  try {
247
- const { prompt, image: imageB64, mask: maskB64, quality = "low", size = "1024x1024", moderation = "low" } =
321
+ const { prompt, image: imageB64, mask: maskB64, quality = "low", size = "1024x1024", moderation = "low", provider = "auto" } =
248
322
  req.body;
249
323
 
250
324
  if (!prompt || !imageB64)
251
325
  return res.status(400).json({ error: "Prompt and image are required" });
252
- if (!openai)
253
- return res.status(400).json({ error: "Image editing requires an API key" });
254
326
 
327
+ const useOAuth = provider === "oauth" || (provider === "auto" && !HAS_API_KEY);
328
+ console.log(`[edit] provider=${useOAuth ? "oauth" : "api"} quality=${quality} size=${size}`);
255
329
  const startTime = Date.now();
256
330
 
257
- const imageFile = new File([Buffer.from(imageB64, "base64")], "image.png", { type: "image/png" });
258
- const params = { model: "gpt-image-2", prompt, image: imageFile, quality, size, moderation };
259
- if (maskB64) {
260
- params.mask = new File([Buffer.from(maskB64, "base64")], "mask.png", { type: "image/png" });
331
+ let resultB64, usage;
332
+
333
+ if (useOAuth) {
334
+ const result = await editViaOAuth(prompt, imageB64, quality, size);
335
+ resultB64 = result.b64;
336
+ usage = result.usage;
337
+ } else if (openai) {
338
+ const imageFile = new File([Buffer.from(imageB64, "base64")], "image.png", { type: "image/png" });
339
+ const params = { model: "gpt-image-2", prompt, image: imageFile, quality, size, moderation };
340
+ if (maskB64) {
341
+ params.mask = new File([Buffer.from(maskB64, "base64")], "mask.png", { type: "image/png" });
342
+ }
343
+ const response = await openai.images.edit(params);
344
+ resultB64 = response.data[0].b64_json;
345
+ usage = response.usage;
346
+ } else {
347
+ return res.status(400).json({ error: "No API key configured and OAuth not selected" });
261
348
  }
262
349
 
263
- const response = await openai.images.edit(params);
264
350
  const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
265
351
 
352
+ await mkdir(join(__dirname, "generated"), { recursive: true });
353
+ const filename = `${Date.now()}.png`;
354
+ await writeFile(join(__dirname, "generated", filename), Buffer.from(resultB64, "base64"));
355
+
266
356
  res.json({
267
- image: `data:image/png;base64,${response.data[0].b64_json}`,
357
+ image: `data:image/png;base64,${resultB64}`,
268
358
  elapsed,
269
- usage: response.usage,
359
+ filename,
360
+ usage,
361
+ provider: useOAuth ? "oauth" : "api",
270
362
  });
271
363
  } catch (err) {
272
364
  console.error("Edit error:", err.message);