npm - ai-cli - Versions diffs - 0.2.1 → 0.3.0 - Mend

ai-cli 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -19,10 +19,14 @@ ai text "explain quantum computing"
 ai models                          # list available models
 ```
-### Piping
+### Piping and References
 ```bash
 ai image "a dragon" | ai video "animate this"
+ai image --image reference.png "make a sticker in this style"
+ai image -i sketch.png -i palette.jpg "render this product concept"
+ai text --image screenshot.png "what is broken in this UI?"
+cat photo.png | ai text "describe this image"
 cat notes.txt | ai text "summarize this"
 git diff | ai text "explain these changes"
 ```
@@ -50,6 +54,7 @@ ai image -m flux-2-pro "a sunset"   # resolves to bfl/flux-2-pro
 ### image
 ```
+-i, --image <path-or-url> Reference image path or URL (repeatable)
 --size <WxH>             Image size (e.g. 1024x1024)
 --aspect-ratio <W:H>     Aspect ratio (e.g. 16:9)
 --quality <level>        Quality (standard, hd)
@@ -57,6 +62,14 @@ ai image -m flux-2-pro "a sunset"   # resolves to bfl/flux-2-pro
 --no-preview             Disable inline image preview
 ```
+Reference images can be local paths, `file://` URLs, `http(s)://` URLs or data URLs. You can repeat `--image` to pass multiple references, and you can still pipe one image through stdin:
+```bash
+cat input.png | ai image -i style.png "combine the subject with this style"
+```
+Reference-image support is model-dependent; unsupported models may reject image inputs.
 ### video
 ```
@@ -69,11 +82,19 @@ ai image -m flux-2-pro "a sunset"   # resolves to bfl/flux-2-pro
 ```
 -f, --format <fmt>       Output format: md, txt (default: md)
+-i, --image <path-or-url> Image input path or URL for vision (repeatable)
 -s, --system <prompt>    System prompt
 --max-tokens <n>         Maximum tokens to generate
 -t, --temperature <n>    Temperature (0-2)
 ```
+For vision-capable text models, `ai text` accepts images from `--image` or piped stdin:
+```bash
+ai text -i chart.png -i table.jpg "summarize the data"
+cat screenshot.png | ai text "list the visible errors"
+```
 ### models
 ```

package/dist/index.js CHANGED Viewed

@@ -4617,7 +4617,7 @@ var {
 // package.json
 var package_default = {
   name: "ai-cli",
-  version: "0.2.1",
+  version: "0.3.0",
   description: "A tiny, agent-native CLI for generating images, video and text with dead-simple commands, stdin support and predictable artifact outputs",
   type: "module",
   license: "Apache-2.0",
@@ -28463,6 +28463,112 @@ var _a20;
 _a20 = symbol20;
 var defaultDownload2 = createDownload();
+// src/lib/image-references.ts
+import { readFile } from "node:fs/promises";
+import { fileURLToPath } from "node:url";
+function collectImageReference(value, previous = []) {
+  return [...previous, value];
+}
+async function loadImageReferences(references) {
+  return Promise.all(references.map(loadImageReference));
+}
+function isLikelyImage(data) {
+  if (hasPrefix(data, [137, 80, 78, 71, 13, 10, 26, 10])) {
+    return true;
+  }
+  if (hasPrefix(data, [255, 216, 255]))
+    return true;
+  if (startsWithAscii(data, "GIF87a") || startsWithAscii(data, "GIF89a")) {
+    return true;
+  }
+  if (startsWithAscii(data, "RIFF") && data.length >= 12 && asciiAt(data, 8, 4) === "WEBP") {
+    return true;
+  }
+  if (startsWithAscii(data, "BM"))
+    return true;
+  if (hasPrefix(data, [73, 73, 42, 0]))
+    return true;
+  if (hasPrefix(data, [77, 77, 0, 42]))
+    return true;
+  if (hasIsoImageBrand(data))
+    return true;
+  if (looksLikeSvg(data))
+    return true;
+  return false;
+}
+async function loadImageReference(reference) {
+  const trimmed = reference.trim();
+  if (!trimmed) {
+    throw new Error("--image cannot be empty");
+  }
+  const url2 = parseReferenceUrl(trimmed);
+  if (url2) {
+    if (url2.protocol === "http:" || url2.protocol === "https:")
+      return url2.toString();
+    if (url2.protocol === "data:")
+      return url2.toString();
+    if (url2.protocol === "file:")
+      return readReferenceFile(fileURLToPath(url2));
+    throw new Error(`unsupported reference image URL scheme "${url2.protocol}"; use a file path, file:// URL, http(s) URL, or data URL`);
+  }
+  return readReferenceFile(trimmed);
+}
+async function readReferenceFile(path) {
+  try {
+    return new Uint8Array(await readFile(path));
+  } catch (err) {
+    const message = err instanceof Error ? err.message : String(err);
+    throw new Error(`could not read reference image "${path}": ${message}`);
+  }
+}
+function parseReferenceUrl(input) {
+  if (/^[a-zA-Z]:[\\/]/.test(input))
+    return null;
+  const lowerInput = input.toLowerCase();
+  if (!lowerInput.startsWith("data:") && !/^[a-zA-Z][a-zA-Z\d+.-]*:\/\//.test(input)) {
+    return null;
+  }
+  try {
+    return new URL(input);
+  } catch {
+    return null;
+  }
+}
+function hasPrefix(data, prefix) {
+  return prefix.every((byte, index) => data[index] === byte);
+}
+function startsWithAscii(data, value) {
+  return asciiAt(data, 0, value.length) === value;
+}
+function asciiAt(data, offset, length) {
+  if (data.length < offset + length)
+    return "";
+  return String.fromCharCode(...data.slice(offset, offset + length));
+}
+function hasIsoImageBrand(data) {
+  if (data.length < 12 || asciiAt(data, 4, 4) !== "ftyp")
+    return false;
+  const brands = new Set([
+    "avif",
+    "avis",
+    "heic",
+    "heix",
+    "hevc",
+    "hevx",
+    "mif1",
+    "msf1"
+  ]);
+  for (let offset = 8;offset + 4 <= Math.min(data.length, 64); offset += 4) {
+    if (brands.has(asciiAt(data, offset, 4)))
+      return true;
+  }
+  return false;
+}
+function looksLikeSvg(data) {
+  const prefix = new TextDecoder().decode(data.slice(0, Math.min(data.length, 512))).trimStart().toLowerCase();
+  return prefix.startsWith("<svg") || prefix.startsWith("<?xml") && prefix.includes("<svg");
+}
 // src/lib/h264-wasm.ts
 var START_CODE = new Uint8Array([0, 0, 0, 1]);
 function buildAnnexB(sps, pps, idr) {
@@ -29701,17 +29807,31 @@ function stdinAsText(buf) {
 var DEFAULT_CONCURRENCY = 4;
 var DEFAULT_TIMEOUT_MS = 120000;
 function registerImageCommand(program2) {
-  program2.command("image").description("Generate an image from a prompt").argument("[prompt]", "The prompt to generate an image from").option("-m, --model <model>", "Model ID (creator/model-name), comma-separated for multi-model").option("-o, --output <path>", "Output file path or directory").option("-n, --count <n>", "Number of images per model (default: 1)").option("--size <WxH>", "Image size (e.g. 1024x1024)").option("--aspect-ratio <W:H>", "Aspect ratio (e.g. 16:9)").option("--quality <level>", "Quality (standard, hd)").option("--style <style>", "Style (e.g. vivid, natural)").option("-q, --quiet", "Suppress progress output").option("--json", "Output metadata as JSON").option("--no-preview", "Disable inline image preview in supported terminals").option("-p, --concurrency <n>", `Max parallel generations (default: ${DEFAULT_CONCURRENCY})`).action(async (rawPrompt, opts) => {
+  program2.command("image").description("Generate an image from a prompt").argument("[prompt]", "The prompt to generate an image from").option("-m, --model <model>", "Model ID (creator/model-name), comma-separated for multi-model").option("-o, --output <path>", "Output file path or directory").option("-i, --image <path-or-url>", "Reference image path or URL (repeatable)", collectImageReference, []).option("-n, --count <n>", "Number of images per model (default: 1)").option("--size <WxH>", "Image size (e.g. 1024x1024)").option("--aspect-ratio <W:H>", "Aspect ratio (e.g. 16:9)").option("--quality <level>", "Quality (standard, hd)").option("--style <style>", "Style (e.g. vivid, natural)").option("-q, --quiet", "Suppress progress output").option("--json", "Output metadata as JSON").option("--no-preview", "Disable inline image preview in supported terminals").option("-p, --concurrency <n>", `Max parallel generations (default: ${DEFAULT_CONCURRENCY})`).action(async (rawPrompt, opts) => {
     const prompt = rawPrompt?.trim() || undefined;
     const stdin = await readStdin();
-    if (!prompt && !stdin) {
-      process.stderr.write(`Error: prompt is required (provide as argument or pipe via stdin)
+    const imageReferenceInputs = opts.image ?? [];
+    if (!prompt && !stdin && imageReferenceInputs.length === 0) {
+      process.stderr.write(`Error: prompt or reference image is required (provide a prompt, --image, or pipe an image via stdin)
 `);
       process.exit(1);
     }
+    let referenceImages = [];
+    try {
+      referenceImages = await loadImageReferences(imageReferenceInputs);
+    } catch (err) {
+      const message = err instanceof Error ? err.message : String(err);
+      process.stderr.write(`Error: ${message}
+`);
+      process.exit(1);
+    }
+    const images = [
+      ...stdin ? [new Uint8Array(stdin)] : [],
+      ...referenceImages
+    ];
     let imagePrompt;
-    if (stdin) {
-      imagePrompt = prompt ? { images: [new Uint8Array(stdin)], text: prompt } : { images: [new Uint8Array(stdin)] };
+    if (images.length > 0) {
+      imagePrompt = prompt ? { images, text: prompt } : { images };
     } else {
       imagePrompt = prompt;
     }
@@ -29903,26 +30023,32 @@ function resolveFormat(fmt) {
   throw new Error(`--format must be one of: md, txt (got "${fmt}")`);
 }
 function registerTextCommand(program2) {
-  program2.command("text").description("Generate text from a prompt").argument("[prompt]", "The prompt to generate text from").option("-m, --model <model>", "Model ID (creator/model-name), comma-separated for multi-model").option("-o, --output <path>", "Output file path or directory").option("-f, --format <fmt>", "Output format: md, txt (default: md)").option("-n, --count <n>", "Number of generations (default: 1)").option("-p, --concurrency <n>", `Max parallel generations (default: ${DEFAULT_CONCURRENCY2})`).option("-s, --system <prompt>", "System prompt").option("--max-tokens <n>", "Maximum tokens to generate").option("-t, --temperature <n>", "Temperature (0-2)").option("-q, --quiet", "Suppress progress output").option("--json", "Output metadata as JSON").action(async (rawPrompt, opts) => {
+  program2.command("text").description("Generate text from a prompt").argument("[prompt]", "The prompt to generate text from").option("-m, --model <model>", "Model ID (creator/model-name), comma-separated for multi-model").option("-o, --output <path>", "Output file path or directory").option("-f, --format <fmt>", "Output format: md, txt (default: md)").option("-i, --image <path-or-url>", "Image input path or URL for vision (repeatable)", collectImageReference, []).option("-n, --count <n>", "Number of generations (default: 1)").option("-p, --concurrency <n>", `Max parallel generations (default: ${DEFAULT_CONCURRENCY2})`).option("-s, --system <prompt>", "System prompt").option("--max-tokens <n>", "Maximum tokens to generate").option("-t, --temperature <n>", "Temperature (0-2)").option("-q, --quiet", "Suppress progress output").option("--json", "Output metadata as JSON").action(async (rawPrompt, opts) => {
     const prompt = rawPrompt?.trim() || undefined;
     const stdin = await readStdin();
-    if (!prompt && !stdin) {
-      process.stderr.write(`Error: prompt is required (provide as argument or pipe via stdin)
+    const imageReferenceInputs = opts.image ?? [];
+    if (!prompt && !stdin && imageReferenceInputs.length === 0) {
+      process.stderr.write(`Error: prompt, stdin, or image is required (provide a prompt, --image, or pipe text/image via stdin)
 `);
       process.exit(1);
     }
-    let fullPrompt;
-    if (stdin && prompt) {
-      fullPrompt = `${stdinAsText(stdin)}
----
-${prompt}`;
-    } else if (stdin) {
-      fullPrompt = stdinAsText(stdin);
-    } else {
-      fullPrompt = prompt;
+    let referenceImages = [];
+    try {
+      referenceImages = await loadImageReferences(imageReferenceInputs);
+    } catch (err) {
+      const message = err instanceof Error ? err.message : String(err);
+      process.stderr.write(`Error: ${message}
+`);
+      process.exit(1);
     }
+    const stdinBytes = stdin ? new Uint8Array(stdin) : undefined;
+    const stdinIsImage = stdinBytes ? isLikelyImage(stdinBytes) : false;
+    const images = [
+      ...stdinBytes && stdinIsImage ? [stdinBytes] : [],
+      ...referenceImages
+    ];
+    const stdinText = stdin && !stdinIsImage ? stdinAsText(stdin) : undefined;
+    const textPrompt = buildTextPrompt({ prompt, stdinText, images });
     const format = resolveFormat(opts.format);
     const gatewayModels = await fetchGatewayModels();
     const models = resolveModels("text", opts.model, gatewayModels.text);
@@ -29938,7 +30064,7 @@ ${prompt}`;
           "x-title": "ai-cli"
         },
         model: gateway(modelId),
-        prompt: fullPrompt,
+        prompt: textPrompt,
         system: opts.system,
         maxOutputTokens: maxTokens,
         temperature,
@@ -29959,6 +30085,37 @@ ${prompt}`;
       process.exit(2);
   });
 }
+function buildTextPrompt({
+  prompt,
+  stdinText,
+  images
+}) {
+  if (images.length === 0) {
+    if (stdinText && prompt)
+      return `${stdinText}
+---
+${prompt}`;
+    if (stdinText)
+      return stdinText;
+    return prompt;
+  }
+  const content = [];
+  if (stdinText)
+    content.push({ type: "text", text: stdinText });
+  for (const image of images)
+    content.push({ type: "image", image });
+  if (prompt) {
+    content.push({ type: "text", text: prompt });
+  } else if (!stdinText) {
+    content.push({
+      type: "text",
+      text: images.length === 1 ? "Describe this image." : "Describe these images."
+    });
+  }
+  return [{ role: "user", content }];
+}
 // src/commands/video.ts
 var DEFAULT_CONCURRENCY3 = 2;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "ai-cli",
-  "version": "0.2.1",
+  "version": "0.3.0",
   "description": "A tiny, agent-native CLI for generating images, video and text with dead-simple commands, stdin support and predictable artifact outputs",
   "type": "module",
   "license": "Apache-2.0",