llmist 2.3.0 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import "./chunk-ZDNV7DDO.js";
2
+ import "./chunk-QFRVTS5F.js";
3
3
  import {
4
4
  AgentBuilder,
5
5
  BaseGadget,
@@ -27,7 +27,7 @@ import {
27
27
  resolveModel,
28
28
  schemaToJSONSchema,
29
29
  validateGadgetSchema
30
- } from "./chunk-GANXNBIZ.js";
30
+ } from "./chunk-6ZDUWO6N.js";
31
31
 
32
32
  // src/cli/constants.ts
33
33
  var CLI_NAME = "llmist";
@@ -36,7 +36,9 @@ var COMMANDS = {
36
36
  complete: "complete",
37
37
  agent: "agent",
38
38
  models: "models",
39
- gadget: "gadget"
39
+ gadget: "gadget",
40
+ image: "image",
41
+ speech: "speech"
40
42
  };
41
43
  var LOG_LEVELS = ["silly", "trace", "debug", "info", "warn", "error", "fatal"];
42
44
  var DEFAULT_MODEL = "openai:gpt-5-nano";
@@ -57,7 +59,17 @@ var OPTION_FLAGS = {
57
59
  docker: "--docker",
58
60
  dockerRo: "--docker-ro",
59
61
  noDocker: "--no-docker",
60
- dockerDev: "--docker-dev"
62
+ dockerDev: "--docker-dev",
63
+ // Image generation options
64
+ imageSize: "--size <size>",
65
+ imageQuality: "--quality <quality>",
66
+ imageCount: "-n, --count <number>",
67
+ imageOutput: "-o, --output <path>",
68
+ // Speech generation options
69
+ voice: "--voice <name>",
70
+ speechFormat: "--format <format>",
71
+ speechSpeed: "--speed <value>",
72
+ speechOutput: "-o, --output <path>"
61
73
  };
62
74
  var OPTION_DESCRIPTIONS = {
63
75
  model: "Model identifier, e.g. openai:gpt-5-nano or anthropic:claude-sonnet-4-5.",
@@ -76,7 +88,17 @@ var OPTION_DESCRIPTIONS = {
76
88
  docker: "Run agent in a Docker sandbox container for security isolation.",
77
89
  dockerRo: "Run in Docker with current directory mounted read-only.",
78
90
  noDocker: "Disable Docker sandboxing (override config).",
79
- dockerDev: "Run in Docker dev mode (mount local source instead of npm install)."
91
+ dockerDev: "Run in Docker dev mode (mount local source instead of npm install).",
92
+ // Image generation descriptions
93
+ imageSize: "Image size/aspect ratio, e.g. '1024x1024', '1:1', '16:9'.",
94
+ imageQuality: "Image quality: 'standard', 'hd', 'low', 'medium', 'high'.",
95
+ imageCount: "Number of images to generate (model dependent, usually 1-4).",
96
+ imageOutput: "Output path for the generated image. Defaults to stdout if not specified.",
97
+ // Speech generation descriptions
98
+ voice: "Voice name for speech generation, e.g. 'nova', 'alloy', 'Zephyr'.",
99
+ speechFormat: "Audio format: 'mp3', 'opus', 'aac', 'flac', 'wav', 'pcm'.",
100
+ speechSpeed: "Speech speed multiplier (0.25 to 4.0, default 1.0).",
101
+ speechOutput: "Output path for audio file. Defaults to stdout if not specified."
80
102
  };
81
103
  var SUMMARY_PREFIX = "[llmist]";
82
104
 
@@ -86,7 +108,7 @@ import { Command, InvalidArgumentError as InvalidArgumentError2 } from "commande
86
108
  // package.json
87
109
  var package_default = {
88
110
  name: "llmist",
89
- version: "2.2.0",
111
+ version: "2.4.0",
90
112
  description: "TypeScript LLM client with streaming tool execution. Tools fire mid-stream. Built-in function calling works with any model\u2014no structured outputs or native tool support required.",
91
113
  type: "module",
92
114
  main: "dist/index.cjs",
@@ -2108,6 +2130,22 @@ var AGENT_CONFIG_KEYS = /* @__PURE__ */ new Set([
2108
2130
  "docker-cwd-permission"
2109
2131
  // Override CWD mount permission for this profile
2110
2132
  ]);
2133
+ var IMAGE_CONFIG_KEYS = /* @__PURE__ */ new Set([
2134
+ "model",
2135
+ "size",
2136
+ "quality",
2137
+ "count",
2138
+ "output",
2139
+ "quiet"
2140
+ ]);
2141
+ var SPEECH_CONFIG_KEYS = /* @__PURE__ */ new Set([
2142
+ "model",
2143
+ "voice",
2144
+ "format",
2145
+ "speed",
2146
+ "output",
2147
+ "quiet"
2148
+ ]);
2111
2149
  var CUSTOM_CONFIG_KEYS = /* @__PURE__ */ new Set([
2112
2150
  ...COMPLETE_CONFIG_KEYS,
2113
2151
  ...AGENT_CONFIG_KEYS,
@@ -2368,6 +2406,75 @@ function validateAgentConfig(raw, section) {
2368
2406
  }
2369
2407
  return result;
2370
2408
  }
2409
+ function validateImageConfig(raw, section) {
2410
+ if (typeof raw !== "object" || raw === null) {
2411
+ throw new ConfigError(`[${section}] must be a table`);
2412
+ }
2413
+ const rawObj = raw;
2414
+ for (const key of Object.keys(rawObj)) {
2415
+ if (!IMAGE_CONFIG_KEYS.has(key)) {
2416
+ throw new ConfigError(`[${section}].${key} is not a valid option`);
2417
+ }
2418
+ }
2419
+ const result = {};
2420
+ if ("model" in rawObj) {
2421
+ result.model = validateString(rawObj.model, "model", section);
2422
+ }
2423
+ if ("size" in rawObj) {
2424
+ result.size = validateString(rawObj.size, "size", section);
2425
+ }
2426
+ if ("quality" in rawObj) {
2427
+ result.quality = validateString(rawObj.quality, "quality", section);
2428
+ }
2429
+ if ("count" in rawObj) {
2430
+ result.count = validateNumber(rawObj.count, "count", section, {
2431
+ integer: true,
2432
+ min: 1,
2433
+ max: 10
2434
+ });
2435
+ }
2436
+ if ("output" in rawObj) {
2437
+ result.output = validateString(rawObj.output, "output", section);
2438
+ }
2439
+ if ("quiet" in rawObj) {
2440
+ result.quiet = validateBoolean(rawObj.quiet, "quiet", section);
2441
+ }
2442
+ return result;
2443
+ }
2444
+ function validateSpeechConfig(raw, section) {
2445
+ if (typeof raw !== "object" || raw === null) {
2446
+ throw new ConfigError(`[${section}] must be a table`);
2447
+ }
2448
+ const rawObj = raw;
2449
+ for (const key of Object.keys(rawObj)) {
2450
+ if (!SPEECH_CONFIG_KEYS.has(key)) {
2451
+ throw new ConfigError(`[${section}].${key} is not a valid option`);
2452
+ }
2453
+ }
2454
+ const result = {};
2455
+ if ("model" in rawObj) {
2456
+ result.model = validateString(rawObj.model, "model", section);
2457
+ }
2458
+ if ("voice" in rawObj) {
2459
+ result.voice = validateString(rawObj.voice, "voice", section);
2460
+ }
2461
+ if ("format" in rawObj) {
2462
+ result.format = validateString(rawObj.format, "format", section);
2463
+ }
2464
+ if ("speed" in rawObj) {
2465
+ result.speed = validateNumber(rawObj.speed, "speed", section, {
2466
+ min: 0.25,
2467
+ max: 4
2468
+ });
2469
+ }
2470
+ if ("output" in rawObj) {
2471
+ result.output = validateString(rawObj.output, "output", section);
2472
+ }
2473
+ if ("quiet" in rawObj) {
2474
+ result.quiet = validateBoolean(rawObj.quiet, "quiet", section);
2475
+ }
2476
+ return result;
2477
+ }
2371
2478
  function validateStringOrBoolean(value, field, section) {
2372
2479
  if (typeof value === "string" || typeof value === "boolean") {
2373
2480
  return value;
@@ -2490,6 +2597,10 @@ function validateConfig(raw, configPath) {
2490
2597
  result.complete = validateCompleteConfig(value, key);
2491
2598
  } else if (key === "agent") {
2492
2599
  result.agent = validateAgentConfig(value, key);
2600
+ } else if (key === "image") {
2601
+ result.image = validateImageConfig(value, key);
2602
+ } else if (key === "speech") {
2603
+ result.speech = validateSpeechConfig(value, key);
2493
2604
  } else if (key === "prompts") {
2494
2605
  result.prompts = validatePromptsConfig(value, key);
2495
2606
  } else if (key === "docker") {
@@ -2534,7 +2645,7 @@ function loadConfig() {
2534
2645
  return resolveTemplatesInConfig(inherited, configPath);
2535
2646
  }
2536
2647
  function getCustomCommandNames(config) {
2537
- const reserved = /* @__PURE__ */ new Set(["global", "complete", "agent", "prompts", "docker"]);
2648
+ const reserved = /* @__PURE__ */ new Set(["global", "complete", "agent", "image", "speech", "prompts", "docker"]);
2538
2649
  return Object.keys(config).filter((key) => !reserved.has(key));
2539
2650
  }
2540
2651
  function resolveTemplatesInConfig(config, configPath) {
@@ -4129,19 +4240,118 @@ function registerGadgetCommand(program, env) {
4129
4240
  );
4130
4241
  }
4131
4242
 
4243
+ // src/cli/image-command.ts
4244
+ import { writeFileSync as writeFileSync2 } from "node:fs";
4245
+ var DEFAULT_IMAGE_MODEL = "dall-e-3";
4246
+ async function executeImage(promptArg, options, env) {
4247
+ const prompt = await resolvePrompt(promptArg, env);
4248
+ const client = env.createClient();
4249
+ const model = options.model;
4250
+ const n = options.count ? Number.parseInt(options.count, 10) : 1;
4251
+ const stderrTTY = env.stderr.isTTY === true;
4252
+ if (!options.quiet && stderrTTY) {
4253
+ env.stderr.write(`${SUMMARY_PREFIX} Generating image with ${model}...
4254
+ `);
4255
+ }
4256
+ const result = await client.image.generate({
4257
+ model,
4258
+ prompt,
4259
+ size: options.size,
4260
+ quality: options.quality,
4261
+ n,
4262
+ responseFormat: options.output ? "b64_json" : "url"
4263
+ });
4264
+ if (options.output) {
4265
+ const imageData = result.images[0];
4266
+ if (imageData.b64Json) {
4267
+ const buffer = Buffer.from(imageData.b64Json, "base64");
4268
+ writeFileSync2(options.output, buffer);
4269
+ if (!options.quiet) {
4270
+ env.stderr.write(`${SUMMARY_PREFIX} Image saved to ${options.output}
4271
+ `);
4272
+ }
4273
+ } else if (imageData.url) {
4274
+ env.stdout.write(`${imageData.url}
4275
+ `);
4276
+ }
4277
+ } else {
4278
+ for (const image of result.images) {
4279
+ if (image.url) {
4280
+ env.stdout.write(`${image.url}
4281
+ `);
4282
+ } else if (image.b64Json) {
4283
+ env.stdout.write(image.b64Json);
4284
+ }
4285
+ }
4286
+ }
4287
+ if (!options.quiet && stderrTTY) {
4288
+ const parts = [
4289
+ `${result.images.length} image(s)`,
4290
+ `size: ${result.usage.size}`,
4291
+ `quality: ${result.usage.quality}`
4292
+ ];
4293
+ if (result.cost !== void 0) {
4294
+ parts.push(`cost: ${formatCost(result.cost)}`);
4295
+ }
4296
+ env.stderr.write(`${SUMMARY_PREFIX} ${parts.join(" | ")}
4297
+ `);
4298
+ }
4299
+ }
4300
+ function registerImageCommand(program, env, config) {
4301
+ program.command(COMMANDS.image).description("Generate images from a text prompt.").argument("[prompt]", "Image generation prompt. If omitted, stdin is used when available.").option(
4302
+ OPTION_FLAGS.model,
4303
+ OPTION_DESCRIPTIONS.model,
4304
+ config?.model ?? DEFAULT_IMAGE_MODEL
4305
+ ).option(OPTION_FLAGS.imageSize, OPTION_DESCRIPTIONS.imageSize, config?.size).option(OPTION_FLAGS.imageQuality, OPTION_DESCRIPTIONS.imageQuality, config?.quality).option(OPTION_FLAGS.imageCount, OPTION_DESCRIPTIONS.imageCount, config?.count?.toString()).option(OPTION_FLAGS.imageOutput, OPTION_DESCRIPTIONS.imageOutput, config?.output).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, config?.quiet ?? false).action(
4306
+ (prompt, options) => executeAction(() => executeImage(prompt, options, env), env)
4307
+ );
4308
+ }
4309
+
4132
4310
  // src/cli/models-command.ts
4133
4311
  import chalk8 from "chalk";
4134
4312
  init_model_shortcuts();
4135
4313
  async function handleModelsCommand(options, env) {
4136
4314
  const client = env.createClient();
4137
- const models = client.modelRegistry.listModels(options.provider);
4315
+ const showText = options.all || options.text || !options.image && !options.speech;
4316
+ const showImage = options.all || options.image;
4317
+ const showSpeech = options.all || options.speech;
4318
+ const textModels = showText ? client.modelRegistry.listModels(options.provider) : [];
4319
+ const imageModels = showImage ? client.image.listModels().filter((m) => !options.provider || m.provider === options.provider) : [];
4320
+ const speechModels = showSpeech ? client.speech.listModels().filter((m) => !options.provider || m.provider === options.provider) : [];
4138
4321
  if (options.format === "json") {
4139
- renderJSON(models, env.stdout);
4322
+ renderJSON(textModels, imageModels, speechModels, env.stdout);
4140
4323
  } else {
4141
- renderTable(models, options.verbose || false, env.stdout);
4324
+ renderAllTables(textModels, imageModels, speechModels, options.verbose || false, env.stdout);
4142
4325
  }
4143
4326
  }
4144
- function renderTable(models, verbose, stream) {
4327
+ function renderAllTables(textModels, imageModels, speechModels, verbose, stream) {
4328
+ const hasAnyModels = textModels.length > 0 || imageModels.length > 0 || speechModels.length > 0;
4329
+ if (!hasAnyModels) {
4330
+ stream.write(chalk8.yellow("\nNo models found matching the specified criteria.\n\n"));
4331
+ return;
4332
+ }
4333
+ stream.write(chalk8.bold.cyan("\nAvailable Models\n"));
4334
+ stream.write(chalk8.cyan("=".repeat(80)) + "\n\n");
4335
+ if (textModels.length > 0) {
4336
+ renderTextTable(textModels, verbose, stream);
4337
+ }
4338
+ if (imageModels.length > 0) {
4339
+ renderImageTable(imageModels, verbose, stream);
4340
+ }
4341
+ if (speechModels.length > 0) {
4342
+ renderSpeechTable(speechModels, verbose, stream);
4343
+ }
4344
+ if (textModels.length > 0) {
4345
+ stream.write(chalk8.bold.magenta("Model Shortcuts\n"));
4346
+ stream.write(chalk8.dim("\u2500".repeat(80)) + "\n");
4347
+ const shortcuts = Object.entries(MODEL_ALIASES).sort((a, b) => a[0].localeCompare(b[0]));
4348
+ for (const [shortcut, fullName] of shortcuts) {
4349
+ stream.write(chalk8.cyan(` ${shortcut.padEnd(15)}`) + chalk8.dim(" \u2192 ") + chalk8.white(fullName) + "\n");
4350
+ }
4351
+ stream.write("\n");
4352
+ }
4353
+ }
4354
+ function renderTextTable(models, verbose, stream) {
4145
4355
  const grouped = /* @__PURE__ */ new Map();
4146
4356
  for (const model of models) {
4147
4357
  const provider = model.provider;
@@ -4150,13 +4360,13 @@ function renderTable(models, verbose, stream) {
4150
4360
  }
4151
4361
  grouped.get(provider).push(model);
4152
4362
  }
4153
- stream.write(chalk8.bold.cyan("\nAvailable Models\n"));
4154
- stream.write(chalk8.cyan("=".repeat(80)) + "\n\n");
4363
+ stream.write(chalk8.bold.blue("\u{1F4DD} Text/LLM Models\n"));
4364
+ stream.write(chalk8.dim("\u2500".repeat(80)) + "\n\n");
4155
4365
  const providers = Array.from(grouped.keys()).sort();
4156
4366
  for (const provider of providers) {
4157
4367
  const providerModels = grouped.get(provider);
4158
4368
  const providerName = provider.charAt(0).toUpperCase() + provider.slice(1);
4159
- stream.write(chalk8.bold.yellow(`${providerName} Models
4369
+ stream.write(chalk8.bold.yellow(`${providerName}
4160
4370
  `));
4161
4371
  if (verbose) {
4162
4372
  renderVerboseTable(providerModels, stream);
@@ -4165,13 +4375,6 @@ function renderTable(models, verbose, stream) {
4165
4375
  }
4166
4376
  stream.write("\n");
4167
4377
  }
4168
- stream.write(chalk8.bold.magenta("Model Shortcuts\n"));
4169
- stream.write(chalk8.dim("\u2500".repeat(80)) + "\n");
4170
- const shortcuts = Object.entries(MODEL_ALIASES).sort((a, b) => a[0].localeCompare(b[0]));
4171
- for (const [shortcut, fullName] of shortcuts) {
4172
- stream.write(chalk8.cyan(` ${shortcut.padEnd(15)}`) + chalk8.dim(" \u2192 ") + chalk8.white(fullName) + "\n");
4173
- }
4174
- stream.write("\n");
4175
4378
  }
4176
4379
  function renderCompactTable(models, stream) {
4177
4380
  const idWidth = 25;
@@ -4248,9 +4451,171 @@ function renderVerboseTable(models, stream) {
4248
4451
  }
4249
4452
  stream.write("\n");
4250
4453
  }
4251
- function renderJSON(models, stream) {
4252
- const output = {
4253
- models: models.map((model) => ({
4454
+ function renderImageTable(models, verbose, stream) {
4455
+ stream.write(chalk8.bold.green("\u{1F3A8} Image Generation Models\n"));
4456
+ stream.write(chalk8.dim("\u2500".repeat(80)) + "\n\n");
4457
+ const grouped = /* @__PURE__ */ new Map();
4458
+ for (const model of models) {
4459
+ if (!grouped.has(model.provider)) {
4460
+ grouped.set(model.provider, []);
4461
+ }
4462
+ grouped.get(model.provider).push(model);
4463
+ }
4464
+ for (const [provider, providerModels] of Array.from(grouped.entries()).sort()) {
4465
+ const providerName = provider.charAt(0).toUpperCase() + provider.slice(1);
4466
+ stream.write(chalk8.bold.yellow(`${providerName}
4467
+ `));
4468
+ if (verbose) {
4469
+ for (const model of providerModels) {
4470
+ stream.write(chalk8.bold.green(`
4471
+ ${model.modelId}
4472
+ `));
4473
+ stream.write(chalk8.dim(" " + "\u2500".repeat(60)) + "\n");
4474
+ stream.write(` ${chalk8.dim("Name:")} ${chalk8.white(model.displayName)}
4475
+ `);
4476
+ stream.write(` ${chalk8.dim("Sizes:")} ${chalk8.yellow(model.supportedSizes.join(", "))}
4477
+ `);
4478
+ if (model.supportedQualities) {
4479
+ stream.write(` ${chalk8.dim("Qualities:")} ${chalk8.yellow(model.supportedQualities.join(", "))}
4480
+ `);
4481
+ }
4482
+ stream.write(` ${chalk8.dim("Max Images:")} ${chalk8.yellow(model.maxImages.toString())}
4483
+ `);
4484
+ stream.write(` ${chalk8.dim("Pricing:")} ${chalk8.cyan(formatImagePrice(model))}
4485
+ `);
4486
+ if (model.features) {
4487
+ const features = [];
4488
+ if (model.features.textRendering) features.push("text-rendering");
4489
+ if (model.features.transparency) features.push("transparency");
4490
+ if (model.features.conversational) features.push("conversational");
4491
+ if (features.length > 0) {
4492
+ stream.write(` ${chalk8.dim("Features:")} ${chalk8.blue(features.join(", "))}
4493
+ `);
4494
+ }
4495
+ }
4496
+ }
4497
+ } else {
4498
+ const idWidth = 32;
4499
+ const nameWidth = 25;
4500
+ const sizesWidth = 20;
4501
+ const priceWidth = 15;
4502
+ stream.write(chalk8.dim("\u2500".repeat(idWidth + nameWidth + sizesWidth + priceWidth + 6)) + "\n");
4503
+ stream.write(
4504
+ chalk8.bold(
4505
+ "Model ID".padEnd(idWidth) + " " + "Display Name".padEnd(nameWidth) + " " + "Sizes".padEnd(sizesWidth) + " " + "Price".padEnd(priceWidth)
4506
+ ) + "\n"
4507
+ );
4508
+ stream.write(chalk8.dim("\u2500".repeat(idWidth + nameWidth + sizesWidth + priceWidth + 6)) + "\n");
4509
+ for (const model of providerModels) {
4510
+ const sizes = model.supportedSizes.length > 2 ? model.supportedSizes.slice(0, 2).join(", ") + "..." : model.supportedSizes.join(", ");
4511
+ stream.write(
4512
+ chalk8.green(model.modelId.padEnd(idWidth)) + " " + chalk8.white(model.displayName.substring(0, nameWidth - 1).padEnd(nameWidth)) + " " + chalk8.yellow(sizes.padEnd(sizesWidth)) + " " + chalk8.cyan(formatImagePrice(model).padEnd(priceWidth)) + "\n"
4513
+ );
4514
+ }
4515
+ stream.write(chalk8.dim("\u2500".repeat(idWidth + nameWidth + sizesWidth + priceWidth + 6)) + "\n");
4516
+ }
4517
+ stream.write("\n");
4518
+ }
4519
+ }
4520
+ function renderSpeechTable(models, verbose, stream) {
4521
+ stream.write(chalk8.bold.magenta("\u{1F3A4} Speech (TTS) Models\n"));
4522
+ stream.write(chalk8.dim("\u2500".repeat(80)) + "\n\n");
4523
+ const grouped = /* @__PURE__ */ new Map();
4524
+ for (const model of models) {
4525
+ if (!grouped.has(model.provider)) {
4526
+ grouped.set(model.provider, []);
4527
+ }
4528
+ grouped.get(model.provider).push(model);
4529
+ }
4530
+ for (const [provider, providerModels] of Array.from(grouped.entries()).sort()) {
4531
+ const providerName = provider.charAt(0).toUpperCase() + provider.slice(1);
4532
+ stream.write(chalk8.bold.yellow(`${providerName}
4533
+ `));
4534
+ if (verbose) {
4535
+ for (const model of providerModels) {
4536
+ stream.write(chalk8.bold.green(`
4537
+ ${model.modelId}
4538
+ `));
4539
+ stream.write(chalk8.dim(" " + "\u2500".repeat(60)) + "\n");
4540
+ stream.write(` ${chalk8.dim("Name:")} ${chalk8.white(model.displayName)}
4541
+ `);
4542
+ stream.write(` ${chalk8.dim("Voices:")} ${chalk8.yellow(model.voices.length.toString())} voices
4543
+ `);
4544
+ if (model.voices.length <= 6) {
4545
+ stream.write(` ${chalk8.dim(model.voices.join(", "))}
4546
+ `);
4547
+ } else {
4548
+ stream.write(` ${chalk8.dim(model.voices.slice(0, 6).join(", ") + "...")}
4549
+ `);
4550
+ }
4551
+ stream.write(` ${chalk8.dim("Formats:")} ${chalk8.yellow(model.formats.join(", "))}
4552
+ `);
4553
+ stream.write(` ${chalk8.dim("Max Input:")} ${chalk8.yellow(model.maxInputLength.toString())} chars
4554
+ `);
4555
+ stream.write(` ${chalk8.dim("Pricing:")} ${chalk8.cyan(formatSpeechPrice(model))}
4556
+ `);
4557
+ if (model.features) {
4558
+ const features = [];
4559
+ if (model.features.multiSpeaker) features.push("multi-speaker");
4560
+ if (model.features.voiceInstructions) features.push("voice-instructions");
4561
+ if (model.features.languages) features.push(`${model.features.languages} languages`);
4562
+ if (features.length > 0) {
4563
+ stream.write(` ${chalk8.dim("Features:")} ${chalk8.blue(features.join(", "))}
4564
+ `);
4565
+ }
4566
+ }
4567
+ }
4568
+ } else {
4569
+ const idWidth = 30;
4570
+ const nameWidth = 28;
4571
+ const voicesWidth = 12;
4572
+ const priceWidth = 18;
4573
+ stream.write(chalk8.dim("\u2500".repeat(idWidth + nameWidth + voicesWidth + priceWidth + 6)) + "\n");
4574
+ stream.write(
4575
+ chalk8.bold(
4576
+ "Model ID".padEnd(idWidth) + " " + "Display Name".padEnd(nameWidth) + " " + "Voices".padEnd(voicesWidth) + " " + "Price".padEnd(priceWidth)
4577
+ ) + "\n"
4578
+ );
4579
+ stream.write(chalk8.dim("\u2500".repeat(idWidth + nameWidth + voicesWidth + priceWidth + 6)) + "\n");
4580
+ for (const model of providerModels) {
4581
+ stream.write(
4582
+ chalk8.green(model.modelId.padEnd(idWidth)) + " " + chalk8.white(model.displayName.substring(0, nameWidth - 1).padEnd(nameWidth)) + " " + chalk8.yellow(`${model.voices.length} voices`.padEnd(voicesWidth)) + " " + chalk8.cyan(formatSpeechPrice(model).padEnd(priceWidth)) + "\n"
4583
+ );
4584
+ }
4585
+ stream.write(chalk8.dim("\u2500".repeat(idWidth + nameWidth + voicesWidth + priceWidth + 6)) + "\n");
4586
+ }
4587
+ stream.write("\n");
4588
+ }
4589
+ }
4590
+ function formatImagePrice(model) {
4591
+ if (model.pricing.perImage !== void 0) {
4592
+ return `$${model.pricing.perImage.toFixed(2)}/img`;
4593
+ }
4594
+ if (model.pricing.bySize) {
4595
+ const prices = Object.values(model.pricing.bySize);
4596
+ const minPrice = Math.min(...prices.flatMap((p) => typeof p === "number" ? [p] : Object.values(p)));
4597
+ const maxPrice = Math.max(...prices.flatMap((p) => typeof p === "number" ? [p] : Object.values(p)));
4598
+ if (minPrice === maxPrice) {
4599
+ return `$${minPrice.toFixed(2)}/img`;
4600
+ }
4601
+ return `$${minPrice.toFixed(2)}-${maxPrice.toFixed(2)}`;
4602
+ }
4603
+ return "varies";
4604
+ }
4605
+ function formatSpeechPrice(model) {
4606
+ if (model.pricing.perCharacter !== void 0) {
4607
+ const perMillion = model.pricing.perCharacter * 1e6;
4608
+ return `$${perMillion.toFixed(0)}/1M chars`;
4609
+ }
4610
+ if (model.pricing.perMinute !== void 0) {
4611
+ return `~$${model.pricing.perMinute.toFixed(2)}/min`;
4612
+ }
4613
+ return "varies";
4614
+ }
4615
+ function renderJSON(textModels, imageModels, speechModels, stream) {
4616
+ const output = {};
4617
+ if (textModels.length > 0) {
4618
+ output.textModels = textModels.map((model) => ({
4254
4619
  provider: model.provider,
4255
4620
  modelId: model.modelId,
4256
4621
  displayName: model.displayName,
@@ -4266,9 +4631,33 @@ function renderJSON(models, stream) {
4266
4631
  knowledgeCutoff: model.knowledgeCutoff,
4267
4632
  features: model.features,
4268
4633
  metadata: model.metadata
4269
- })),
4270
- shortcuts: MODEL_ALIASES
4271
- };
4634
+ }));
4635
+ output.shortcuts = MODEL_ALIASES;
4636
+ }
4637
+ if (imageModels.length > 0) {
4638
+ output.imageModels = imageModels.map((model) => ({
4639
+ provider: model.provider,
4640
+ modelId: model.modelId,
4641
+ displayName: model.displayName,
4642
+ supportedSizes: model.supportedSizes,
4643
+ supportedQualities: model.supportedQualities,
4644
+ maxImages: model.maxImages,
4645
+ pricing: model.pricing,
4646
+ features: model.features
4647
+ }));
4648
+ }
4649
+ if (speechModels.length > 0) {
4650
+ output.speechModels = speechModels.map((model) => ({
4651
+ provider: model.provider,
4652
+ modelId: model.modelId,
4653
+ displayName: model.displayName,
4654
+ voices: model.voices,
4655
+ formats: model.formats,
4656
+ maxInputLength: model.maxInputLength,
4657
+ pricing: model.pricing,
4658
+ features: model.features
4659
+ }));
4660
+ }
4272
4661
  stream.write(JSON.stringify(output, null, 2) + "\n");
4273
4662
  }
4274
4663
  function formatTokens2(count) {
@@ -4281,7 +4670,7 @@ function formatTokens2(count) {
4281
4670
  }
4282
4671
  }
4283
4672
  function registerModelsCommand(program, env) {
4284
- program.command(COMMANDS.models).description("List all available LLM models with pricing and capabilities.").option("--provider <name>", "Filter by provider (openai, anthropic, gemini)").option("--format <format>", "Output format: table or json", "table").option("--verbose", "Show detailed model information", false).action(
4673
+ program.command(COMMANDS.models).description("List available models with pricing and capabilities.").option("--provider <name>", "Filter by provider (openai, anthropic, gemini)").option("--format <format>", "Output format: table or json", "table").option("--verbose", "Show detailed model information", false).option("--text", "Show text/LLM models (default if no type specified)").option("--image", "Show image generation models").option("--speech", "Show speech/TTS models").option("--all", "Show all model types (text, image, speech)").action(
4285
4674
  (options) => executeAction(
4286
4675
  () => handleModelsCommand(options, env),
4287
4676
  env
@@ -4289,6 +4678,60 @@ function registerModelsCommand(program, env) {
4289
4678
  );
4290
4679
  }
4291
4680
 
4681
+ // src/cli/speech-command.ts
4682
+ import { writeFileSync as writeFileSync3 } from "node:fs";
4683
+ var DEFAULT_SPEECH_MODEL = "tts-1";
4684
+ var DEFAULT_VOICE = "nova";
4685
+ async function executeSpeech(textArg, options, env) {
4686
+ const text = await resolvePrompt(textArg, env);
4687
+ const client = env.createClient();
4688
+ const model = options.model;
4689
+ const voice = options.voice ?? DEFAULT_VOICE;
4690
+ const speed = options.speed ? Number.parseFloat(options.speed) : void 0;
4691
+ const stderrTTY = env.stderr.isTTY === true;
4692
+ if (!options.quiet && stderrTTY) {
4693
+ env.stderr.write(`${SUMMARY_PREFIX} Generating speech with ${model} (voice: ${voice})...
4694
+ `);
4695
+ }
4696
+ const result = await client.speech.generate({
4697
+ model,
4698
+ input: text,
4699
+ voice,
4700
+ responseFormat: options.format,
4701
+ speed
4702
+ });
4703
+ const audioBuffer = Buffer.from(result.audio);
4704
+ if (options.output) {
4705
+ writeFileSync3(options.output, audioBuffer);
4706
+ if (!options.quiet) {
4707
+ env.stderr.write(`${SUMMARY_PREFIX} Audio saved to ${options.output}
4708
+ `);
4709
+ }
4710
+ } else {
4711
+ env.stdout.write(audioBuffer);
4712
+ }
4713
+ if (!options.quiet && stderrTTY) {
4714
+ const parts = [
4715
+ `${result.usage.characterCount} characters`,
4716
+ `format: ${result.format}`
4717
+ ];
4718
+ if (result.cost !== void 0) {
4719
+ parts.push(`cost: ${formatCost(result.cost)}`);
4720
+ }
4721
+ env.stderr.write(`${SUMMARY_PREFIX} ${parts.join(" | ")}
4722
+ `);
4723
+ }
4724
+ }
4725
+ function registerSpeechCommand(program, env, config) {
4726
+ program.command(COMMANDS.speech).description("Generate speech audio from text.").argument("[text]", "Text to convert to speech. If omitted, stdin is used when available.").option(
4727
+ OPTION_FLAGS.model,
4728
+ OPTION_DESCRIPTIONS.model,
4729
+ config?.model ?? DEFAULT_SPEECH_MODEL
4730
+ ).option(OPTION_FLAGS.voice, OPTION_DESCRIPTIONS.voice, config?.voice ?? DEFAULT_VOICE).option(OPTION_FLAGS.speechFormat, OPTION_DESCRIPTIONS.speechFormat, config?.format).option(OPTION_FLAGS.speechSpeed, OPTION_DESCRIPTIONS.speechSpeed, config?.speed?.toString()).option(OPTION_FLAGS.speechOutput, OPTION_DESCRIPTIONS.speechOutput, config?.output).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, config?.quiet ?? false).action(
4731
+ (text, options) => executeAction(() => executeSpeech(text, options, env), env)
4732
+ );
4733
+ }
4734
+
4292
4735
  // src/cli/environment.ts
4293
4736
  init_client();
4294
4737
  init_logger();
@@ -4440,6 +4883,8 @@ function createProgram(env, config) {
4440
4883
  });
4441
4884
  registerCompleteCommand(program, env, config?.complete);
4442
4885
  registerAgentCommand(program, env, config?.agent);
4886
+ registerImageCommand(program, env, config?.image);
4887
+ registerSpeechCommand(program, env, config?.speech);
4443
4888
  registerModelsCommand(program, env);
4444
4889
  registerGadgetCommand(program, env);
4445
4890
  if (config) {