ai-cli 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +22 -1
  2. package/dist/index.js +178 -21
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -19,10 +19,14 @@ ai text "explain quantum computing"
19
19
  ai models # list available models
20
20
  ```
21
21
 
22
- ### Piping
22
+ ### Piping and References
23
23
 
24
24
  ```bash
25
25
  ai image "a dragon" | ai video "animate this"
26
+ ai image --image reference.png "make a sticker in this style"
27
+ ai image -i sketch.png -i palette.jpg "render this product concept"
28
+ ai text --image screenshot.png "what is broken in this UI?"
29
+ cat photo.png | ai text "describe this image"
26
30
  cat notes.txt | ai text "summarize this"
27
31
  git diff | ai text "explain these changes"
28
32
  ```
@@ -50,6 +54,7 @@ ai image -m flux-2-pro "a sunset" # resolves to bfl/flux-2-pro
50
54
  ### image
51
55
 
52
56
  ```
57
+ -i, --image <path-or-url> Reference image path or URL (repeatable)
53
58
  --size <WxH> Image size (e.g. 1024x1024)
54
59
  --aspect-ratio <W:H> Aspect ratio (e.g. 16:9)
55
60
  --quality <level> Quality (standard, hd)
@@ -57,6 +62,14 @@ ai image -m flux-2-pro "a sunset" # resolves to bfl/flux-2-pro
57
62
  --no-preview Disable inline image preview
58
63
  ```
59
64
 
65
+ Reference images can be local paths, `file://` URLs, `http(s)://` URLs or data URLs. You can repeat `--image` to pass multiple references, and you can still pipe one image through stdin:
66
+
67
+ ```bash
68
+ cat input.png | ai image -i style.png "combine the subject with this style"
69
+ ```
70
+
71
+ Reference-image support is model-dependent; unsupported models may reject image inputs.
72
+
60
73
  ### video
61
74
 
62
75
  ```
@@ -69,11 +82,19 @@ ai image -m flux-2-pro "a sunset" # resolves to bfl/flux-2-pro
69
82
 
70
83
  ```
71
84
  -f, --format <fmt> Output format: md, txt (default: md)
85
+ -i, --image <path-or-url> Image input path or URL for vision (repeatable)
72
86
  -s, --system <prompt> System prompt
73
87
  --max-tokens <n> Maximum tokens to generate
74
88
  -t, --temperature <n> Temperature (0-2)
75
89
  ```
76
90
 
91
+ For vision-capable text models, `ai text` accepts images from `--image` or piped stdin:
92
+
93
+ ```bash
94
+ ai text -i chart.png -i table.jpg "summarize the data"
95
+ cat screenshot.png | ai text "list the visible errors"
96
+ ```
97
+
77
98
  ### models
78
99
 
79
100
  ```
package/dist/index.js CHANGED
@@ -4617,7 +4617,7 @@ var {
4617
4617
  // package.json
4618
4618
  var package_default = {
4619
4619
  name: "ai-cli",
4620
- version: "0.2.1",
4620
+ version: "0.3.0",
4621
4621
  description: "A tiny, agent-native CLI for generating images, video and text with dead-simple commands, stdin support and predictable artifact outputs",
4622
4622
  type: "module",
4623
4623
  license: "Apache-2.0",
@@ -28463,6 +28463,112 @@ var _a20;
28463
28463
  _a20 = symbol20;
28464
28464
  var defaultDownload2 = createDownload();
28465
28465
 
28466
+ // src/lib/image-references.ts
28467
+ import { readFile } from "node:fs/promises";
28468
+ import { fileURLToPath } from "node:url";
28469
+ function collectImageReference(value, previous = []) {
28470
+ return [...previous, value];
28471
+ }
28472
+ async function loadImageReferences(references) {
28473
+ return Promise.all(references.map(loadImageReference));
28474
+ }
28475
+ function isLikelyImage(data) {
28476
+ if (hasPrefix(data, [137, 80, 78, 71, 13, 10, 26, 10])) {
28477
+ return true;
28478
+ }
28479
+ if (hasPrefix(data, [255, 216, 255]))
28480
+ return true;
28481
+ if (startsWithAscii(data, "GIF87a") || startsWithAscii(data, "GIF89a")) {
28482
+ return true;
28483
+ }
28484
+ if (startsWithAscii(data, "RIFF") && data.length >= 12 && asciiAt(data, 8, 4) === "WEBP") {
28485
+ return true;
28486
+ }
28487
+ if (startsWithAscii(data, "BM"))
28488
+ return true;
28489
+ if (hasPrefix(data, [73, 73, 42, 0]))
28490
+ return true;
28491
+ if (hasPrefix(data, [77, 77, 0, 42]))
28492
+ return true;
28493
+ if (hasIsoImageBrand(data))
28494
+ return true;
28495
+ if (looksLikeSvg(data))
28496
+ return true;
28497
+ return false;
28498
+ }
28499
+ async function loadImageReference(reference) {
28500
+ const trimmed = reference.trim();
28501
+ if (!trimmed) {
28502
+ throw new Error("--image cannot be empty");
28503
+ }
28504
+ const url2 = parseReferenceUrl(trimmed);
28505
+ if (url2) {
28506
+ if (url2.protocol === "http:" || url2.protocol === "https:")
28507
+ return url2.toString();
28508
+ if (url2.protocol === "data:")
28509
+ return url2.toString();
28510
+ if (url2.protocol === "file:")
28511
+ return readReferenceFile(fileURLToPath(url2));
28512
+ throw new Error(`unsupported reference image URL scheme "${url2.protocol}"; use a file path, file:// URL, http(s) URL, or data URL`);
28513
+ }
28514
+ return readReferenceFile(trimmed);
28515
+ }
28516
+ async function readReferenceFile(path) {
28517
+ try {
28518
+ return new Uint8Array(await readFile(path));
28519
+ } catch (err) {
28520
+ const message = err instanceof Error ? err.message : String(err);
28521
+ throw new Error(`could not read reference image "${path}": ${message}`);
28522
+ }
28523
+ }
28524
+ function parseReferenceUrl(input) {
28525
+ if (/^[a-zA-Z]:[\\/]/.test(input))
28526
+ return null;
28527
+ const lowerInput = input.toLowerCase();
28528
+ if (!lowerInput.startsWith("data:") && !/^[a-zA-Z][a-zA-Z\d+.-]*:\/\//.test(input)) {
28529
+ return null;
28530
+ }
28531
+ try {
28532
+ return new URL(input);
28533
+ } catch {
28534
+ return null;
28535
+ }
28536
+ }
28537
+ function hasPrefix(data, prefix) {
28538
+ return prefix.every((byte, index) => data[index] === byte);
28539
+ }
28540
+ function startsWithAscii(data, value) {
28541
+ return asciiAt(data, 0, value.length) === value;
28542
+ }
28543
+ function asciiAt(data, offset, length) {
28544
+ if (data.length < offset + length)
28545
+ return "";
28546
+ return String.fromCharCode(...data.slice(offset, offset + length));
28547
+ }
28548
+ function hasIsoImageBrand(data) {
28549
+ if (data.length < 12 || asciiAt(data, 4, 4) !== "ftyp")
28550
+ return false;
28551
+ const brands = new Set([
28552
+ "avif",
28553
+ "avis",
28554
+ "heic",
28555
+ "heix",
28556
+ "hevc",
28557
+ "hevx",
28558
+ "mif1",
28559
+ "msf1"
28560
+ ]);
28561
+ for (let offset = 8;offset + 4 <= Math.min(data.length, 64); offset += 4) {
28562
+ if (brands.has(asciiAt(data, offset, 4)))
28563
+ return true;
28564
+ }
28565
+ return false;
28566
+ }
28567
+ function looksLikeSvg(data) {
28568
+ const prefix = new TextDecoder().decode(data.slice(0, Math.min(data.length, 512))).trimStart().toLowerCase();
28569
+ return prefix.startsWith("<svg") || prefix.startsWith("<?xml") && prefix.includes("<svg");
28570
+ }
28571
+
28466
28572
  // src/lib/h264-wasm.ts
28467
28573
  var START_CODE = new Uint8Array([0, 0, 0, 1]);
28468
28574
  function buildAnnexB(sps, pps, idr) {
@@ -29701,17 +29807,31 @@ function stdinAsText(buf) {
29701
29807
  var DEFAULT_CONCURRENCY = 4;
29702
29808
  var DEFAULT_TIMEOUT_MS = 120000;
29703
29809
  function registerImageCommand(program2) {
29704
- program2.command("image").description("Generate an image from a prompt").argument("[prompt]", "The prompt to generate an image from").option("-m, --model <model>", "Model ID (creator/model-name), comma-separated for multi-model").option("-o, --output <path>", "Output file path or directory").option("-n, --count <n>", "Number of images per model (default: 1)").option("--size <WxH>", "Image size (e.g. 1024x1024)").option("--aspect-ratio <W:H>", "Aspect ratio (e.g. 16:9)").option("--quality <level>", "Quality (standard, hd)").option("--style <style>", "Style (e.g. vivid, natural)").option("-q, --quiet", "Suppress progress output").option("--json", "Output metadata as JSON").option("--no-preview", "Disable inline image preview in supported terminals").option("-p, --concurrency <n>", `Max parallel generations (default: ${DEFAULT_CONCURRENCY})`).action(async (rawPrompt, opts) => {
29810
+ program2.command("image").description("Generate an image from a prompt").argument("[prompt]", "The prompt to generate an image from").option("-m, --model <model>", "Model ID (creator/model-name), comma-separated for multi-model").option("-o, --output <path>", "Output file path or directory").option("-i, --image <path-or-url>", "Reference image path or URL (repeatable)", collectImageReference, []).option("-n, --count <n>", "Number of images per model (default: 1)").option("--size <WxH>", "Image size (e.g. 1024x1024)").option("--aspect-ratio <W:H>", "Aspect ratio (e.g. 16:9)").option("--quality <level>", "Quality (standard, hd)").option("--style <style>", "Style (e.g. vivid, natural)").option("-q, --quiet", "Suppress progress output").option("--json", "Output metadata as JSON").option("--no-preview", "Disable inline image preview in supported terminals").option("-p, --concurrency <n>", `Max parallel generations (default: ${DEFAULT_CONCURRENCY})`).action(async (rawPrompt, opts) => {
29705
29811
  const prompt = rawPrompt?.trim() || undefined;
29706
29812
  const stdin = await readStdin();
29707
- if (!prompt && !stdin) {
29708
- process.stderr.write(`Error: prompt is required (provide as argument or pipe via stdin)
29813
+ const imageReferenceInputs = opts.image ?? [];
29814
+ if (!prompt && !stdin && imageReferenceInputs.length === 0) {
29815
+ process.stderr.write(`Error: prompt or reference image is required (provide a prompt, --image, or pipe an image via stdin)
29709
29816
  `);
29710
29817
  process.exit(1);
29711
29818
  }
29819
+ let referenceImages = [];
29820
+ try {
29821
+ referenceImages = await loadImageReferences(imageReferenceInputs);
29822
+ } catch (err) {
29823
+ const message = err instanceof Error ? err.message : String(err);
29824
+ process.stderr.write(`Error: ${message}
29825
+ `);
29826
+ process.exit(1);
29827
+ }
29828
+ const images = [
29829
+ ...stdin ? [new Uint8Array(stdin)] : [],
29830
+ ...referenceImages
29831
+ ];
29712
29832
  let imagePrompt;
29713
- if (stdin) {
29714
- imagePrompt = prompt ? { images: [new Uint8Array(stdin)], text: prompt } : { images: [new Uint8Array(stdin)] };
29833
+ if (images.length > 0) {
29834
+ imagePrompt = prompt ? { images, text: prompt } : { images };
29715
29835
  } else {
29716
29836
  imagePrompt = prompt;
29717
29837
  }
@@ -29903,26 +30023,32 @@ function resolveFormat(fmt) {
29903
30023
  throw new Error(`--format must be one of: md, txt (got "${fmt}")`);
29904
30024
  }
29905
30025
  function registerTextCommand(program2) {
29906
- program2.command("text").description("Generate text from a prompt").argument("[prompt]", "The prompt to generate text from").option("-m, --model <model>", "Model ID (creator/model-name), comma-separated for multi-model").option("-o, --output <path>", "Output file path or directory").option("-f, --format <fmt>", "Output format: md, txt (default: md)").option("-n, --count <n>", "Number of generations (default: 1)").option("-p, --concurrency <n>", `Max parallel generations (default: ${DEFAULT_CONCURRENCY2})`).option("-s, --system <prompt>", "System prompt").option("--max-tokens <n>", "Maximum tokens to generate").option("-t, --temperature <n>", "Temperature (0-2)").option("-q, --quiet", "Suppress progress output").option("--json", "Output metadata as JSON").action(async (rawPrompt, opts) => {
30026
+ program2.command("text").description("Generate text from a prompt").argument("[prompt]", "The prompt to generate text from").option("-m, --model <model>", "Model ID (creator/model-name), comma-separated for multi-model").option("-o, --output <path>", "Output file path or directory").option("-f, --format <fmt>", "Output format: md, txt (default: md)").option("-i, --image <path-or-url>", "Image input path or URL for vision (repeatable)", collectImageReference, []).option("-n, --count <n>", "Number of generations (default: 1)").option("-p, --concurrency <n>", `Max parallel generations (default: ${DEFAULT_CONCURRENCY2})`).option("-s, --system <prompt>", "System prompt").option("--max-tokens <n>", "Maximum tokens to generate").option("-t, --temperature <n>", "Temperature (0-2)").option("-q, --quiet", "Suppress progress output").option("--json", "Output metadata as JSON").action(async (rawPrompt, opts) => {
29907
30027
  const prompt = rawPrompt?.trim() || undefined;
29908
30028
  const stdin = await readStdin();
29909
- if (!prompt && !stdin) {
29910
- process.stderr.write(`Error: prompt is required (provide as argument or pipe via stdin)
30029
+ const imageReferenceInputs = opts.image ?? [];
30030
+ if (!prompt && !stdin && imageReferenceInputs.length === 0) {
30031
+ process.stderr.write(`Error: prompt, stdin, or image is required (provide a prompt, --image, or pipe text/image via stdin)
29911
30032
  `);
29912
30033
  process.exit(1);
29913
30034
  }
29914
- let fullPrompt;
29915
- if (stdin && prompt) {
29916
- fullPrompt = `${stdinAsText(stdin)}
29917
-
29918
- ---
29919
-
29920
- ${prompt}`;
29921
- } else if (stdin) {
29922
- fullPrompt = stdinAsText(stdin);
29923
- } else {
29924
- fullPrompt = prompt;
30035
+ let referenceImages = [];
30036
+ try {
30037
+ referenceImages = await loadImageReferences(imageReferenceInputs);
30038
+ } catch (err) {
30039
+ const message = err instanceof Error ? err.message : String(err);
30040
+ process.stderr.write(`Error: ${message}
30041
+ `);
30042
+ process.exit(1);
29925
30043
  }
30044
+ const stdinBytes = stdin ? new Uint8Array(stdin) : undefined;
30045
+ const stdinIsImage = stdinBytes ? isLikelyImage(stdinBytes) : false;
30046
+ const images = [
30047
+ ...stdinBytes && stdinIsImage ? [stdinBytes] : [],
30048
+ ...referenceImages
30049
+ ];
30050
+ const stdinText = stdin && !stdinIsImage ? stdinAsText(stdin) : undefined;
30051
+ const textPrompt = buildTextPrompt({ prompt, stdinText, images });
29926
30052
  const format = resolveFormat(opts.format);
29927
30053
  const gatewayModels = await fetchGatewayModels();
29928
30054
  const models = resolveModels("text", opts.model, gatewayModels.text);
@@ -29938,7 +30064,7 @@ ${prompt}`;
29938
30064
  "x-title": "ai-cli"
29939
30065
  },
29940
30066
  model: gateway(modelId),
29941
- prompt: fullPrompt,
30067
+ prompt: textPrompt,
29942
30068
  system: opts.system,
29943
30069
  maxOutputTokens: maxTokens,
29944
30070
  temperature,
@@ -29959,6 +30085,37 @@ ${prompt}`;
29959
30085
  process.exit(2);
29960
30086
  });
29961
30087
  }
30088
+ function buildTextPrompt({
30089
+ prompt,
30090
+ stdinText,
30091
+ images
30092
+ }) {
30093
+ if (images.length === 0) {
30094
+ if (stdinText && prompt)
30095
+ return `${stdinText}
30096
+
30097
+ ---
30098
+
30099
+ ${prompt}`;
30100
+ if (stdinText)
30101
+ return stdinText;
30102
+ return prompt;
30103
+ }
30104
+ const content = [];
30105
+ if (stdinText)
30106
+ content.push({ type: "text", text: stdinText });
30107
+ for (const image of images)
30108
+ content.push({ type: "image", image });
30109
+ if (prompt) {
30110
+ content.push({ type: "text", text: prompt });
30111
+ } else if (!stdinText) {
30112
+ content.push({
30113
+ type: "text",
30114
+ text: images.length === 1 ? "Describe this image." : "Describe these images."
30115
+ });
30116
+ }
30117
+ return [{ role: "user", content }];
30118
+ }
29962
30119
 
29963
30120
  // src/commands/video.ts
29964
30121
  var DEFAULT_CONCURRENCY3 = 2;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ai-cli",
3
- "version": "0.2.1",
3
+ "version": "0.3.0",
4
4
  "description": "A tiny, agent-native CLI for generating images, video and text with dead-simple commands, stdin support and predictable artifact outputs",
5
5
  "type": "module",
6
6
  "license": "Apache-2.0",