npm - heysnap-image-gen - Versions diffs - 0.1.0 - Mend

heysnap-image-gen 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md ADDED Viewed

@@ -0,0 +1,56 @@
+# image-gen
+Opinionated GPT Image 2 CLI for VM agents.
+`image-gen` sends image generation and image edit requests through the HeySnap
+gateway, reads the VM machine token automatically, and writes the resulting
+image files locally.
+## Install
+```sh
+npm install -g heysnap-image-gen
+```
+## Usage
+```sh
+image-gen "A polished product photo of a matte black coffee mug" -o mug.png
+```
+Use a reference image:
+```sh
+image-gen "Restyle this screenshot as a polished SaaS dashboard" \
+  --image screenshot.png \
+  -o dashboard.png
+```
+Generate multiple images:
+```sh
+image-gen "Three calm finance app logo concepts" --n 3 --out-dir logos
+```
+Read a long prompt from a file:
+```sh
+image-gen --prompt-file prompt.txt -o result.png
+```
+## Auth
+The CLI checks token sources in this order:
+1. `ANK1015_IMAGE_GATEWAY_TOKEN`
+2. `ANK1015_CODEX_GATEWAY_TOKEN`
+3. `ANK1015_MACHINE_TOKEN_FILE`
+4. `/opt/ank1015/machine-token`
+The default gateway base URL is:
+```text
+https://api.heysnap.xyz/llm/openai/v1
+```
+See [docs/usage.md](docs/usage.md) for the full CLI contract.

package/dist/cli.js ADDED Viewed

@@ -0,0 +1,426 @@
+import { mkdir, readFile, stat, writeFile } from "node:fs/promises";
+import { basename, dirname, extname, join, resolve } from "node:path";
+const MODEL = "gpt-image-2";
+const DEFAULT_GATEWAY_BASE_URL = "https://api.heysnap.xyz/llm/openai/v1";
+const DEFAULT_MACHINE_TOKEN_FILE = "/opt/ank1015/machine-token";
+const MAX_INPUT_IMAGES = 16;
+const MAX_OUTPUT_IMAGES = 10;
+const MIN_PIXELS = 655_360;
+const MAX_PIXELS = 8_294_400;
+const MAX_EDGE = 3840;
+class CliError extends Error {
+}
+export const runCli = async (argv, runtime = {}) => {
+    const stdout = runtime.stdout ?? process.stdout;
+    const stderr = runtime.stderr ?? process.stderr;
+    try {
+        const parsed = await parseArgs(argv, runtime);
+        if (parsed.help === true) {
+            stdout.write(`${helpText()}\n`);
+            return 0;
+        }
+        if (parsed.options === undefined) {
+            throw new CliError("Missing prompt.");
+        }
+        const result = await generateImages(parsed.options, runtime);
+        stdout.write(`${result.paths.join("\n")}\n`);
+        return 0;
+    }
+    catch (error) {
+        stderr.write(`image-gen: ${error instanceof Error ? error.message : String(error)}\n`);
+        return 1;
+    }
+};
+export const parseArgs = async (argv, runtime = {}) => {
+    const env = runtime.env ?? process.env;
+    const cwd = runtime.cwd ?? process.cwd();
+    const promptParts = [];
+    const images = [];
+    let output;
+    let outDir;
+    let mask;
+    let size = "auto";
+    let quality = "auto";
+    let format;
+    let compression;
+    let n = 1;
+    let moderation = "auto";
+    let promptFile;
+    for (let index = 0; index < argv.length; index += 1) {
+        const arg = argv[index] ?? "";
+        if (arg === "--help" || arg === "-h") {
+            return { help: true };
+        }
+        if (arg === "--") {
+            promptParts.push(...argv.slice(index + 1));
+            break;
+        }
+        if (!arg.startsWith("-")) {
+            promptParts.push(arg);
+            continue;
+        }
+        switch (arg) {
+            case "-o":
+            case "--output":
+                output = requireValue(argv, index, arg);
+                index += 1;
+                break;
+            case "--out-dir":
+                outDir = requireValue(argv, index, arg);
+                index += 1;
+                break;
+            case "-i":
+            case "--image":
+                images.push(requireValue(argv, index, arg));
+                index += 1;
+                break;
+            case "--mask":
+                mask = requireValue(argv, index, arg);
+                index += 1;
+                break;
+            case "--size":
+                size = requireValue(argv, index, arg);
+                index += 1;
+                break;
+            case "--quality":
+                quality = parseEnum(requireValue(argv, index, arg), ["auto", "low", "medium", "high"], "--quality");
+                index += 1;
+                break;
+            case "--format":
+                format = parseEnum(requireValue(argv, index, arg), ["png", "jpeg"], "--format");
+                index += 1;
+                break;
+            case "--compression":
+                compression = parseInteger(requireValue(argv, index, arg), "--compression", 0, 100);
+                index += 1;
+                break;
+            case "--n":
+                n = parseInteger(requireValue(argv, index, arg), "--n", 1, MAX_OUTPUT_IMAGES);
+                index += 1;
+                break;
+            case "--moderation":
+                moderation = parseEnum(requireValue(argv, index, arg), ["auto", "low"], "--moderation");
+                index += 1;
+                break;
+            case "--prompt-file":
+                promptFile = requireValue(argv, index, arg);
+                index += 1;
+                break;
+            default:
+                throw new CliError(`Unknown option: ${arg}`);
+        }
+    }
+    if (promptFile !== undefined && promptParts.length > 0) {
+        throw new CliError("Use either a prompt argument or --prompt-file, not both.");
+    }
+    const prompt = promptFile === undefined
+        ? promptParts.join(" ").trim()
+        : (await readFile(resolve(cwd, promptFile), "utf8")).trim();
+    const resolvedFormat = resolveOutputFormat({ explicitFormat: format, output });
+    const options = {
+        prompt,
+        output,
+        outDir,
+        images,
+        mask,
+        size,
+        quality,
+        format: resolvedFormat,
+        compression,
+        n,
+        moderation,
+    };
+    validateOptions(options, env);
+    return { options };
+};
+export const generateImages = async (options, runtime = {}) => {
+    const env = runtime.env ?? process.env;
+    const cwd = runtime.cwd ?? process.cwd();
+    const fetchImpl = runtime.fetch ?? fetch;
+    const token = await readMachineToken(env);
+    const baseUrl = resolveGatewayBaseUrl(env);
+    const request = options.images.length === 0
+        ? await buildGenerationRequest(options, baseUrl, token)
+        : await buildEditRequest(options, baseUrl, token, cwd);
+    const response = await fetchImpl(request.url, request.init);
+    const body = await readResponseJson(response);
+    if (!response.ok) {
+        throw new CliError(readApiError(response.status, body));
+    }
+    const imageResponse = body;
+    const base64Images = imageResponse.data?.map((entry) => entry.b64_json).filter(isString) ?? [];
+    if (base64Images.length === 0) {
+        throw new CliError("Gateway response did not include any image data.");
+    }
+    const outputPaths = buildOutputPaths(options, base64Images.length, cwd, runtime.now?.() ?? new Date());
+    await Promise.all(outputPaths.map(async (outputPath, index) => {
+        await mkdir(dirname(outputPath), { recursive: true });
+        await writeFile(outputPath, Buffer.from(base64Images[index] ?? "", "base64"));
+    }));
+    return { paths: outputPaths };
+};
+const buildGenerationRequest = async (options, baseUrl, token) => {
+    const body = commonPayload(options);
+    return {
+        url: `${baseUrl}/images/generations`,
+        init: {
+            method: "POST",
+            headers: {
+                "api-key": token,
+                "content-type": "application/json",
+            },
+            body: JSON.stringify(body),
+        },
+    };
+};
+const buildEditRequest = async (options, baseUrl, token, cwd) => {
+    const form = new FormData();
+    const payload = commonPayload(options);
+    for (const [key, value] of Object.entries(payload)) {
+        form.set(key, String(value));
+    }
+    for (const imagePath of options.images) {
+        form.append("image[]", await pathToBlob(resolve(cwd, imagePath)), basename(imagePath));
+    }
+    if (options.mask !== undefined) {
+        form.set("mask", await pathToBlob(resolve(cwd, options.mask)), basename(options.mask));
+    }
+    return {
+        url: `${baseUrl}/images/edits`,
+        init: {
+            method: "POST",
+            headers: {
+                "api-key": token,
+            },
+            body: form,
+        },
+    };
+};
+const commonPayload = (options) => {
+    const payload = {
+        model: MODEL,
+        prompt: options.prompt,
+        size: options.size,
+        quality: options.quality,
+        output_format: options.format ?? "png",
+        n: options.n,
+        moderation: options.moderation,
+    };
+    if (options.compression !== undefined) {
+        payload["output_compression"] = options.compression;
+    }
+    return payload;
+};
+const validateOptions = (options, env) => {
+    if (options.prompt.length === 0) {
+        throw new CliError("Prompt is required.");
+    }
+    if (options.prompt.length > 32_000) {
+        throw new CliError("Prompt must be 32,000 characters or fewer for GPT image models.");
+    }
+    if (options.output !== undefined && options.outDir !== undefined) {
+        throw new CliError("Use either --output or --out-dir, not both.");
+    }
+    if (options.n > 1 && options.output !== undefined) {
+        throw new CliError("Use --out-dir instead of --output when --n is greater than 1.");
+    }
+    if (options.images.length > MAX_INPUT_IMAGES) {
+        throw new CliError(`--image can be repeated up to ${String(MAX_INPUT_IMAGES)} times.`);
+    }
+    if (options.mask !== undefined && options.images.length === 0) {
+        throw new CliError("--mask requires at least one --image.");
+    }
+    validateSize(options.size);
+    if (options.compression !== undefined && (options.format ?? "png") === "png") {
+        throw new CliError("--compression is only valid with --format jpeg.");
+    }
+    const tokenSources = [
+        env["ANK1015_IMAGE_GATEWAY_TOKEN"],
+        env["ANK1015_CODEX_GATEWAY_TOKEN"],
+        env["ANK1015_MACHINE_TOKEN_FILE"],
+    ];
+    if (tokenSources.every((value) => value === undefined || value.trim().length === 0)) {
+        // The default VM token path may still exist; defer this to runtime so local tests can inject env cleanly.
+        return;
+    }
+};
+const validateSize = (size) => {
+    if (size === "auto") {
+        return;
+    }
+    const match = /^(\d+)x(\d+)$/.exec(size);
+    if (match === null) {
+        throw new CliError("--size must be auto or WIDTHxHEIGHT, for example 1024x1024.");
+    }
+    const width = Number(match[1]);
+    const height = Number(match[2]);
+    const longEdge = Math.max(width, height);
+    const shortEdge = Math.min(width, height);
+    const pixels = width * height;
+    if (width % 16 !== 0 || height % 16 !== 0) {
+        throw new CliError("--size width and height must both be multiples of 16.");
+    }
+    if (longEdge > MAX_EDGE) {
+        throw new CliError(`--size maximum edge length is ${String(MAX_EDGE)}px.`);
+    }
+    if (longEdge / shortEdge > 3) {
+        throw new CliError("--size long edge to short edge ratio must not exceed 3:1.");
+    }
+    if (pixels < MIN_PIXELS || pixels > MAX_PIXELS) {
+        throw new CliError(`--size total pixels must be between ${String(MIN_PIXELS)} and ${String(MAX_PIXELS)}.`);
+    }
+};
+const buildOutputPaths = (options, count, cwd, now) => {
+    if (options.output !== undefined) {
+        return [resolve(cwd, options.output)];
+    }
+    const outputDir = resolve(cwd, options.outDir ?? ".");
+    const stamp = toFileTimestamp(now);
+    const extension = options.format ?? "png";
+    return Array.from({ length: count }, (_, index) => (join(outputDir, `image-${stamp}-${String(index + 1).padStart(2, "0")}.${extension}`)));
+};
+const resolveOutputFormat = (input) => {
+    const outputExtension = input.output === undefined ? undefined : extname(input.output);
+    const extension = outputExtension === undefined ? undefined : extensionToFormat(outputExtension);
+    if (outputExtension !== undefined && outputExtension.length > 0 && extension === undefined) {
+        throw new CliError(`Unsupported output extension ${outputExtension}. Use .png, .jpg, or .jpeg.`);
+    }
+    if (input.explicitFormat !== undefined && extension !== undefined && input.explicitFormat !== extension) {
+        throw new CliError(`--format ${input.explicitFormat} does not match output extension ${outputExtension}.`);
+    }
+    return input.explicitFormat ?? extension ?? "png";
+};
+const extensionToFormat = (extension) => {
+    switch (extension.toLowerCase()) {
+        case ".png":
+            return "png";
+        case ".jpg":
+        case ".jpeg":
+            return "jpeg";
+        default:
+            return undefined;
+    }
+};
+const pathToBlob = async (path) => {
+    const fileStat = await stat(path);
+    if (fileStat.size > 50 * 1024 * 1024) {
+        throw new CliError(`Image files must be 50MB or smaller: ${path}`);
+    }
+    return new Blob([await readFile(path)], { type: contentTypeForPath(path) });
+};
+const contentTypeForPath = (path) => {
+    switch (extname(path).toLowerCase()) {
+        case ".png":
+            return "image/png";
+        case ".jpg":
+        case ".jpeg":
+            return "image/jpeg";
+        case ".webp":
+            return "image/webp";
+        default:
+            return "application/octet-stream";
+    }
+};
+const readMachineToken = async (env) => {
+    const inlineToken = env["ANK1015_IMAGE_GATEWAY_TOKEN"]?.trim()
+        || env["ANK1015_CODEX_GATEWAY_TOKEN"]?.trim();
+    if (inlineToken !== undefined && inlineToken.length > 0) {
+        return inlineToken;
+    }
+    const tokenFile = env["ANK1015_MACHINE_TOKEN_FILE"]?.trim() || DEFAULT_MACHINE_TOKEN_FILE;
+    try {
+        const token = (await readFile(tokenFile, "utf8")).trim();
+        if (token.length > 0) {
+            return token;
+        }
+    }
+    catch {
+        // The specific error below is clearer for agents than the fs failure.
+    }
+    throw new CliError(`Machine token not found. Set ANK1015_IMAGE_GATEWAY_TOKEN or ANK1015_MACHINE_TOKEN_FILE.`);
+};
+const resolveGatewayBaseUrl = (env) => {
+    const explicit = env["ANK1015_IMAGE_GATEWAY_URL"]?.trim();
+    if (explicit !== undefined && explicit.length > 0) {
+        return explicit.replace(/\/+$/, "");
+    }
+    const cloudServer = env["CLOUD_SERVER_PUBLIC_URL"]?.trim();
+    if (cloudServer !== undefined && cloudServer.length > 0) {
+        return `${cloudServer.replace(/\/+$/, "")}/llm/openai/v1`;
+    }
+    return DEFAULT_GATEWAY_BASE_URL;
+};
+const readResponseJson = async (response) => {
+    const text = await response.text();
+    if (text.trim().length === 0) {
+        return null;
+    }
+    try {
+        return JSON.parse(text);
+    }
+    catch {
+        throw new CliError(`Gateway returned invalid JSON with HTTP ${String(response.status)}.`);
+    }
+};
+const readApiError = (status, body) => {
+    if (typeof body === "object" && body !== null && "error" in body) {
+        const error = body.error;
+        const message = typeof error?.message === "string" ? error.message : undefined;
+        const code = typeof error?.code === "string" ? error.code : undefined;
+        if (message !== undefined && code !== undefined) {
+            return `Gateway request failed (${String(status)} ${code}): ${message}`;
+        }
+        if (message !== undefined) {
+            return `Gateway request failed (${String(status)}): ${message}`;
+        }
+    }
+    return `Gateway request failed with HTTP ${String(status)}.`;
+};
+const requireValue = (argv, index, option) => {
+    const value = argv[index + 1];
+    if (value === undefined || value.length === 0 || value.startsWith("-")) {
+        throw new CliError(`${option} requires a value.`);
+    }
+    return value;
+};
+const parseInteger = (value, option, min, max) => {
+    if (!/^\d+$/.test(value)) {
+        throw new CliError(`${option} must be an integer.`);
+    }
+    const parsed = Number(value);
+    if (parsed < min || parsed > max) {
+        throw new CliError(`${option} must be between ${String(min)} and ${String(max)}.`);
+    }
+    return parsed;
+};
+const parseEnum = (value, allowed, option) => {
+    if (!allowed.includes(value)) {
+        throw new CliError(`${option} must be one of: ${allowed.join(", ")}.`);
+    }
+    return value;
+};
+const toFileTimestamp = (date) => date.toISOString().replace(/\.\d{3}Z$/, "Z").replace(/[:-]/g, "").replace("T", "-");
+const isString = (value) => typeof value === "string" && value.length > 0;
+const helpText = () => `Usage:
+  image-gen <prompt> [options]
+  image-gen --prompt-file prompt.txt [options]
+Examples:
+  image-gen "A polished product photo of a matte black coffee mug" -o mug.png
+  image-gen "Restyle this screenshot as a clean SaaS dashboard" -i screenshot.png -o dashboard.png
+  image-gen "Replace the masked area with a small indoor pool" -i room.png --mask mask.png -o room-edit.png
+  image-gen "Three calm finance app logo concepts" --n 3 --out-dir logos
+Options:
+  -o, --output <path>          Output file for one image
+      --out-dir <dir>          Output directory, required when --n > 1
+  -i, --image <path>           Reference/input image, repeatable up to 16 times
+      --mask <path>            Mask image for editing the first input image
+      --size <size>            auto or WIDTHxHEIGHT, default: auto
+      --quality <quality>      auto | low | medium | high, default: auto
+      --format <format>        png | jpeg, default: output extension or png
+      --compression <0-100>    jpeg only
+      --n <count>              Number of images, 1-10, default: 1
+      --moderation <value>     auto | low, default: auto
+      --prompt-file <path>     Read prompt from file
+  -h, --help                   Show this help`;

package/dist/index.js ADDED Viewed

@@ -0,0 +1,3 @@
+#!/usr/bin/env node
+import { runCli } from "./cli.js";
+process.exitCode = await runCli(process.argv.slice(2));

package/docs/usage.md ADDED Viewed

@@ -0,0 +1,214 @@
+# image-gen CLI
+`image-gen` is an opinionated image generation CLI for VM agents. It always uses
+`gpt-image-2`, sends requests through the HeySnap gateway, handles machine-token
+auth automatically, and writes image files locally.
+The core contract is:
+```sh
+image-gen <prompt> [options]
+```
+It returns the generated image path or paths on `stdout`, one path per line. Use
+those paths directly in scripts or agent workflows.
+```sh
+image_path="$(image-gen "A clean product photo of a matte black coffee mug" -o mug.png)"
+```
+## Common Commands
+Generate one image from a text prompt:
+```sh
+image-gen "A polished product photo of a matte black coffee mug on a white desk" \
+  -o mug.png
+```
+Generate to an automatically named file in the current directory:
+```sh
+image-gen "A minimal app icon for a calendar app"
+```
+Generate multiple images:
+```sh
+image-gen "Three calm finance app logo concepts, premium and simple" \
+  --n 3 \
+  --out-dir logos
+```
+Use one reference image:
+```sh
+image-gen "Restyle this screenshot as a polished SaaS dashboard" \
+  --image screenshot.png \
+  -o dashboard.png
+```
+Use multiple reference images:
+```sh
+image-gen "Create a gift basket containing the items from these references" \
+  --image lotion.png \
+  --image candle.png \
+  --image soap.png \
+  --image ribbon.png \
+  -o basket.png
+```
+Edit an image with a mask:
+```sh
+image-gen "Replace the masked area with a small indoor pool" \
+  --image room.png \
+  --mask mask.png \
+  -o room-edit.png
+```
+Read a long prompt from a file:
+```sh
+image-gen --prompt-file prompt.txt -o result.png
+```
+Request JPEG output:
+```sh
+image-gen "A fast-loading article hero image of a modern workspace" \
+  --format jpeg \
+  --compression 65 \
+  -o hero.jpg
+```
+## Options
+```text
+-o, --output <path>          Output file for one image
+    --out-dir <dir>          Output directory, required when --n > 1
+-i, --image <path>           Reference/input image, repeatable up to 16 times
+    --mask <path>            Mask image for editing the first input image
+    --size <size>            auto or WIDTHxHEIGHT, default: auto
+    --quality <quality>      auto | low | medium | high, default: auto
+    --format <format>        png | jpeg | webp, default: output extension or png
+    --compression <0-100>    jpeg/webp only
+    --n <count>              Number of images, 1-10, default: 1
+    --moderation <value>     auto | low, default: auto
+    --prompt-file <path>     Read prompt from file
+-h, --help                   Show help
+```
+## Behavior
+If no `--image` is passed, `image-gen` creates a new image from the text prompt.
+If one or more `--image` flags are passed, `image-gen` sends an edit/reference
+image request. The prompt should describe the desired output, not just the input.
+If `--mask` is passed, the mask applies to the first `--image`. The mask and
+first image should have the same dimensions. Use a PNG mask with an alpha
+channel.
+If `--output` is omitted, files are named like:
+```text
+image-20260509-101112Z-01.png
+```
+If `--n` is greater than `1`, use `--out-dir` instead of `--output`.
+## Size
+`--size` defaults to `auto`.
+You can also pass explicit dimensions:
+```sh
+image-gen "A cinematic landscape poster" --size 1536x1024 -o poster.png
+image-gen "A square app icon" --size 1024x1024 -o icon.png
+image-gen "A portrait poster" --size 1024x1536 -o portrait.png
+```
+Explicit sizes must follow GPT Image 2 constraints:
+- Width and height must be multiples of `16`.
+- Maximum edge length is `3840px`.
+- Long edge to short edge ratio must not exceed `3:1`.
+- Total pixels must be between `655360` and `8294400`.
+## Quality
+`--quality` defaults to `auto`.
+Use `low` for fast drafts, `medium` for balanced output, and `high` for final
+assets:
+```sh
+image-gen "A rough thumbnail concept for a travel app" --quality low -o draft.png
+image-gen "A final polished app-store hero graphic" --quality high -o final.png
+```
+## Format And Compression
+Supported formats:
+- `png`
+- `jpeg`
+If `--format` is omitted and `--output` has a known extension, the extension is
+used. For example, `-o hero.jpg` requests JPEG.
+`--compression` is only valid with `jpeg`:
+```sh
+image-gen "A blog cover image" --format jpeg --compression 70 -o cover.jpg
+```
+## Moderation
+`--moderation` controls GPT Image moderation strictness:
+- `auto`: default filtering
+- `low`: less restrictive filtering
+```sh
+image-gen "A dramatic editorial fashion image" --moderation auto -o fashion.png
+```
+## Limits
+- Prompt length: up to `32000` characters.
+- Reference images: up to `16` `--image` flags.
+- Output count: `--n` supports `1` through `10`.
+- Input image files: `50MB` or smaller.
+- Transparent backgrounds are not exposed because this CLI is scoped to
+  `gpt-image-2` only.
+- The model is fixed to `gpt-image-2`; there is no `--model` flag.
+## Script Usage
+Use `stdout` for paths:
+```sh
+result="$(image-gen "A small pixel-art save icon" -o save-icon.png)"
+echo "Generated: $result"
+```
+Multiple outputs:
+```sh
+image-gen "Four onboarding illustrations for a productivity app" \
+  --n 4 \
+  --out-dir onboarding \
+  > generated-images.txt
+```
+Then read each path:
+```sh
+while read -r path; do
+  echo "Generated $path"
+done < generated-images.txt
+```

package/package.json ADDED Viewed

@@ -0,0 +1,27 @@
+{
+  "name": "heysnap-image-gen",
+  "version": "0.1.0",
+  "description": "Opinionated GPT Image 2 CLI for VM agents.",
+  "type": "module",
+  "bin": {
+    "image-gen": "dist/index.js"
+  },
+  "files": [
+    "dist",
+    "docs",
+    "README.md"
+  ],
+  "scripts": {
+    "build": "tsc -p tsconfig.json",
+    "dev": "node --import tsx src/index.ts",
+    "start": "node dist/index.js",
+    "test": "node --import tsx --test src/**/*.test.ts",
+    "prepack": "npm run build",
+    "prepublishOnly": "npm run build && npm test"
+  },
+  "devDependencies": {
+    "@types/node": "^25.6.2",
+    "tsx": "^4.21.0",
+    "typescript": "^6.0.3"
+  }
+}