llmist 2.2.0 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import "./chunk-ZDNV7DDO.js";
2
+ import "./chunk-QFRVTS5F.js";
3
3
  import {
4
4
  AgentBuilder,
5
5
  BaseGadget,
@@ -27,7 +27,7 @@ import {
27
27
  resolveModel,
28
28
  schemaToJSONSchema,
29
29
  validateGadgetSchema
30
- } from "./chunk-GANXNBIZ.js";
30
+ } from "./chunk-6ZDUWO6N.js";
31
31
 
32
32
  // src/cli/constants.ts
33
33
  var CLI_NAME = "llmist";
@@ -36,7 +36,9 @@ var COMMANDS = {
36
36
  complete: "complete",
37
37
  agent: "agent",
38
38
  models: "models",
39
- gadget: "gadget"
39
+ gadget: "gadget",
40
+ image: "image",
41
+ speech: "speech"
40
42
  };
41
43
  var LOG_LEVELS = ["silly", "trace", "debug", "info", "warn", "error", "fatal"];
42
44
  var DEFAULT_MODEL = "openai:gpt-5-nano";
@@ -57,7 +59,17 @@ var OPTION_FLAGS = {
57
59
  docker: "--docker",
58
60
  dockerRo: "--docker-ro",
59
61
  noDocker: "--no-docker",
60
- dockerDev: "--docker-dev"
62
+ dockerDev: "--docker-dev",
63
+ // Image generation options
64
+ imageSize: "--size <size>",
65
+ imageQuality: "--quality <quality>",
66
+ imageCount: "-n, --count <number>",
67
+ imageOutput: "-o, --output <path>",
68
+ // Speech generation options
69
+ voice: "--voice <name>",
70
+ speechFormat: "--format <format>",
71
+ speechSpeed: "--speed <value>",
72
+ speechOutput: "-o, --output <path>"
61
73
  };
62
74
  var OPTION_DESCRIPTIONS = {
63
75
  model: "Model identifier, e.g. openai:gpt-5-nano or anthropic:claude-sonnet-4-5.",
@@ -76,7 +88,17 @@ var OPTION_DESCRIPTIONS = {
76
88
  docker: "Run agent in a Docker sandbox container for security isolation.",
77
89
  dockerRo: "Run in Docker with current directory mounted read-only.",
78
90
  noDocker: "Disable Docker sandboxing (override config).",
79
- dockerDev: "Run in Docker dev mode (mount local source instead of npm install)."
91
+ dockerDev: "Run in Docker dev mode (mount local source instead of npm install).",
92
+ // Image generation descriptions
93
+ imageSize: "Image size/aspect ratio, e.g. '1024x1024', '1:1', '16:9'.",
94
+ imageQuality: "Image quality: 'standard', 'hd', 'low', 'medium', 'high'.",
95
+ imageCount: "Number of images to generate (model dependent, usually 1-4).",
96
+ imageOutput: "Output path for the generated image. Defaults to stdout if not specified.",
97
+ // Speech generation descriptions
98
+ voice: "Voice name for speech generation, e.g. 'nova', 'alloy', 'Zephyr'.",
99
+ speechFormat: "Audio format: 'mp3', 'opus', 'aac', 'flac', 'wav', 'pcm'.",
100
+ speechSpeed: "Speech speed multiplier (0.25 to 4.0, default 1.0).",
101
+ speechOutput: "Output path for audio file. Defaults to stdout if not specified."
80
102
  };
81
103
  var SUMMARY_PREFIX = "[llmist]";
82
104
 
@@ -86,8 +108,8 @@ import { Command, InvalidArgumentError as InvalidArgumentError2 } from "commande
86
108
  // package.json
87
109
  var package_default = {
88
110
  name: "llmist",
89
- version: "2.1.0",
90
- description: "Universal TypeScript LLM client with streaming-first agent framework. Works with any model - no structured outputs or native tool calling required. Implements its own flexible grammar for function calling.",
111
+ version: "2.4.0",
112
+ description: "TypeScript LLM client with streaming tool execution. Tools fire mid-stream. Built-in function calling works with any model\u2014no structured outputs or native tool support required.",
91
113
  type: "module",
92
114
  main: "dist/index.cjs",
93
115
  module: "dist/index.js",
@@ -161,9 +183,16 @@ var package_default = {
161
183
  "universal-client",
162
184
  "multi-provider",
163
185
  "hooks",
164
- "gadgets"
186
+ "gadgets",
187
+ "chatbot",
188
+ "chatgpt",
189
+ "agentic",
190
+ "language-model",
191
+ "generative-ai",
192
+ "bun",
193
+ "nodejs"
165
194
  ],
166
- author: "",
195
+ author: "Zbigniew Sobiecki <zbigniew@sobiecki.name>",
167
196
  license: "MIT",
168
197
  dependencies: {
169
198
  "@anthropic-ai/sdk": "^0.69.0",
@@ -2101,6 +2130,22 @@ var AGENT_CONFIG_KEYS = /* @__PURE__ */ new Set([
2101
2130
  "docker-cwd-permission"
2102
2131
  // Override CWD mount permission for this profile
2103
2132
  ]);
2133
+ var IMAGE_CONFIG_KEYS = /* @__PURE__ */ new Set([
2134
+ "model",
2135
+ "size",
2136
+ "quality",
2137
+ "count",
2138
+ "output",
2139
+ "quiet"
2140
+ ]);
2141
+ var SPEECH_CONFIG_KEYS = /* @__PURE__ */ new Set([
2142
+ "model",
2143
+ "voice",
2144
+ "format",
2145
+ "speed",
2146
+ "output",
2147
+ "quiet"
2148
+ ]);
2104
2149
  var CUSTOM_CONFIG_KEYS = /* @__PURE__ */ new Set([
2105
2150
  ...COMPLETE_CONFIG_KEYS,
2106
2151
  ...AGENT_CONFIG_KEYS,
@@ -2361,6 +2406,75 @@ function validateAgentConfig(raw, section) {
2361
2406
  }
2362
2407
  return result;
2363
2408
  }
2409
+ function validateImageConfig(raw, section) {
2410
+ if (typeof raw !== "object" || raw === null) {
2411
+ throw new ConfigError(`[${section}] must be a table`);
2412
+ }
2413
+ const rawObj = raw;
2414
+ for (const key of Object.keys(rawObj)) {
2415
+ if (!IMAGE_CONFIG_KEYS.has(key)) {
2416
+ throw new ConfigError(`[${section}].${key} is not a valid option`);
2417
+ }
2418
+ }
2419
+ const result = {};
2420
+ if ("model" in rawObj) {
2421
+ result.model = validateString(rawObj.model, "model", section);
2422
+ }
2423
+ if ("size" in rawObj) {
2424
+ result.size = validateString(rawObj.size, "size", section);
2425
+ }
2426
+ if ("quality" in rawObj) {
2427
+ result.quality = validateString(rawObj.quality, "quality", section);
2428
+ }
2429
+ if ("count" in rawObj) {
2430
+ result.count = validateNumber(rawObj.count, "count", section, {
2431
+ integer: true,
2432
+ min: 1,
2433
+ max: 10
2434
+ });
2435
+ }
2436
+ if ("output" in rawObj) {
2437
+ result.output = validateString(rawObj.output, "output", section);
2438
+ }
2439
+ if ("quiet" in rawObj) {
2440
+ result.quiet = validateBoolean(rawObj.quiet, "quiet", section);
2441
+ }
2442
+ return result;
2443
+ }
2444
+ function validateSpeechConfig(raw, section) {
2445
+ if (typeof raw !== "object" || raw === null) {
2446
+ throw new ConfigError(`[${section}] must be a table`);
2447
+ }
2448
+ const rawObj = raw;
2449
+ for (const key of Object.keys(rawObj)) {
2450
+ if (!SPEECH_CONFIG_KEYS.has(key)) {
2451
+ throw new ConfigError(`[${section}].${key} is not a valid option`);
2452
+ }
2453
+ }
2454
+ const result = {};
2455
+ if ("model" in rawObj) {
2456
+ result.model = validateString(rawObj.model, "model", section);
2457
+ }
2458
+ if ("voice" in rawObj) {
2459
+ result.voice = validateString(rawObj.voice, "voice", section);
2460
+ }
2461
+ if ("format" in rawObj) {
2462
+ result.format = validateString(rawObj.format, "format", section);
2463
+ }
2464
+ if ("speed" in rawObj) {
2465
+ result.speed = validateNumber(rawObj.speed, "speed", section, {
2466
+ min: 0.25,
2467
+ max: 4
2468
+ });
2469
+ }
2470
+ if ("output" in rawObj) {
2471
+ result.output = validateString(rawObj.output, "output", section);
2472
+ }
2473
+ if ("quiet" in rawObj) {
2474
+ result.quiet = validateBoolean(rawObj.quiet, "quiet", section);
2475
+ }
2476
+ return result;
2477
+ }
2364
2478
  function validateStringOrBoolean(value, field, section) {
2365
2479
  if (typeof value === "string" || typeof value === "boolean") {
2366
2480
  return value;
@@ -2483,6 +2597,10 @@ function validateConfig(raw, configPath) {
2483
2597
  result.complete = validateCompleteConfig(value, key);
2484
2598
  } else if (key === "agent") {
2485
2599
  result.agent = validateAgentConfig(value, key);
2600
+ } else if (key === "image") {
2601
+ result.image = validateImageConfig(value, key);
2602
+ } else if (key === "speech") {
2603
+ result.speech = validateSpeechConfig(value, key);
2486
2604
  } else if (key === "prompts") {
2487
2605
  result.prompts = validatePromptsConfig(value, key);
2488
2606
  } else if (key === "docker") {
@@ -2527,7 +2645,7 @@ function loadConfig() {
2527
2645
  return resolveTemplatesInConfig(inherited, configPath);
2528
2646
  }
2529
2647
  function getCustomCommandNames(config) {
2530
- const reserved = /* @__PURE__ */ new Set(["global", "complete", "agent", "prompts", "docker"]);
2648
+ const reserved = /* @__PURE__ */ new Set(["global", "complete", "agent", "image", "speech", "prompts", "docker"]);
2531
2649
  return Object.keys(config).filter((key) => !reserved.has(key));
2532
2650
  }
2533
2651
  function resolveTemplatesInConfig(config, configPath) {
@@ -4122,19 +4240,118 @@ function registerGadgetCommand(program, env) {
4122
4240
  );
4123
4241
  }
4124
4242
 
4243
+ // src/cli/image-command.ts
4244
+ import { writeFileSync as writeFileSync2 } from "node:fs";
4245
+ var DEFAULT_IMAGE_MODEL = "dall-e-3";
4246
+ async function executeImage(promptArg, options, env) {
4247
+ const prompt = await resolvePrompt(promptArg, env);
4248
+ const client = env.createClient();
4249
+ const model = options.model;
4250
+ const n = options.count ? Number.parseInt(options.count, 10) : 1;
4251
+ const stderrTTY = env.stderr.isTTY === true;
4252
+ if (!options.quiet && stderrTTY) {
4253
+ env.stderr.write(`${SUMMARY_PREFIX} Generating image with ${model}...
4254
+ `);
4255
+ }
4256
+ const result = await client.image.generate({
4257
+ model,
4258
+ prompt,
4259
+ size: options.size,
4260
+ quality: options.quality,
4261
+ n,
4262
+ responseFormat: options.output ? "b64_json" : "url"
4263
+ });
4264
+ if (options.output) {
4265
+ const imageData = result.images[0];
4266
+ if (imageData.b64Json) {
4267
+ const buffer = Buffer.from(imageData.b64Json, "base64");
4268
+ writeFileSync2(options.output, buffer);
4269
+ if (!options.quiet) {
4270
+ env.stderr.write(`${SUMMARY_PREFIX} Image saved to ${options.output}
4271
+ `);
4272
+ }
4273
+ } else if (imageData.url) {
4274
+ env.stdout.write(`${imageData.url}
4275
+ `);
4276
+ }
4277
+ } else {
4278
+ for (const image of result.images) {
4279
+ if (image.url) {
4280
+ env.stdout.write(`${image.url}
4281
+ `);
4282
+ } else if (image.b64Json) {
4283
+ env.stdout.write(image.b64Json);
4284
+ }
4285
+ }
4286
+ }
4287
+ if (!options.quiet && stderrTTY) {
4288
+ const parts = [
4289
+ `${result.images.length} image(s)`,
4290
+ `size: ${result.usage.size}`,
4291
+ `quality: ${result.usage.quality}`
4292
+ ];
4293
+ if (result.cost !== void 0) {
4294
+ parts.push(`cost: ${formatCost(result.cost)}`);
4295
+ }
4296
+ env.stderr.write(`${SUMMARY_PREFIX} ${parts.join(" | ")}
4297
+ `);
4298
+ }
4299
+ }
4300
+ function registerImageCommand(program, env, config) {
4301
+ program.command(COMMANDS.image).description("Generate images from a text prompt.").argument("[prompt]", "Image generation prompt. If omitted, stdin is used when available.").option(
4302
+ OPTION_FLAGS.model,
4303
+ OPTION_DESCRIPTIONS.model,
4304
+ config?.model ?? DEFAULT_IMAGE_MODEL
4305
+ ).option(OPTION_FLAGS.imageSize, OPTION_DESCRIPTIONS.imageSize, config?.size).option(OPTION_FLAGS.imageQuality, OPTION_DESCRIPTIONS.imageQuality, config?.quality).option(OPTION_FLAGS.imageCount, OPTION_DESCRIPTIONS.imageCount, config?.count?.toString()).option(OPTION_FLAGS.imageOutput, OPTION_DESCRIPTIONS.imageOutput, config?.output).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, config?.quiet ?? false).action(
4306
+ (prompt, options) => executeAction(() => executeImage(prompt, options, env), env)
4307
+ );
4308
+ }
4309
+
4125
4310
  // src/cli/models-command.ts
4126
4311
  import chalk8 from "chalk";
4127
4312
  init_model_shortcuts();
4128
4313
  async function handleModelsCommand(options, env) {
4129
4314
  const client = env.createClient();
4130
- const models = client.modelRegistry.listModels(options.provider);
4315
+ const showText = options.all || options.text || !options.image && !options.speech;
4316
+ const showImage = options.all || options.image;
4317
+ const showSpeech = options.all || options.speech;
4318
+ const textModels = showText ? client.modelRegistry.listModels(options.provider) : [];
4319
+ const imageModels = showImage ? client.image.listModels().filter((m) => !options.provider || m.provider === options.provider) : [];
4320
+ const speechModels = showSpeech ? client.speech.listModels().filter((m) => !options.provider || m.provider === options.provider) : [];
4131
4321
  if (options.format === "json") {
4132
- renderJSON(models, env.stdout);
4322
+ renderJSON(textModels, imageModels, speechModels, env.stdout);
4133
4323
  } else {
4134
- renderTable(models, options.verbose || false, env.stdout);
4324
+ renderAllTables(textModels, imageModels, speechModels, options.verbose || false, env.stdout);
4135
4325
  }
4136
4326
  }
4137
- function renderTable(models, verbose, stream) {
4327
+ function renderAllTables(textModels, imageModels, speechModels, verbose, stream) {
4328
+ const hasAnyModels = textModels.length > 0 || imageModels.length > 0 || speechModels.length > 0;
4329
+ if (!hasAnyModels) {
4330
+ stream.write(chalk8.yellow("\nNo models found matching the specified criteria.\n\n"));
4331
+ return;
4332
+ }
4333
+ stream.write(chalk8.bold.cyan("\nAvailable Models\n"));
4334
+ stream.write(chalk8.cyan("=".repeat(80)) + "\n\n");
4335
+ if (textModels.length > 0) {
4336
+ renderTextTable(textModels, verbose, stream);
4337
+ }
4338
+ if (imageModels.length > 0) {
4339
+ renderImageTable(imageModels, verbose, stream);
4340
+ }
4341
+ if (speechModels.length > 0) {
4342
+ renderSpeechTable(speechModels, verbose, stream);
4343
+ }
4344
+ if (textModels.length > 0) {
4345
+ stream.write(chalk8.bold.magenta("Model Shortcuts\n"));
4346
+ stream.write(chalk8.dim("\u2500".repeat(80)) + "\n");
4347
+ const shortcuts = Object.entries(MODEL_ALIASES).sort((a, b) => a[0].localeCompare(b[0]));
4348
+ for (const [shortcut, fullName] of shortcuts) {
4349
+ stream.write(chalk8.cyan(` ${shortcut.padEnd(15)}`) + chalk8.dim(" \u2192 ") + chalk8.white(fullName) + "\n");
4350
+ }
4351
+ stream.write("\n");
4352
+ }
4353
+ }
4354
+ function renderTextTable(models, verbose, stream) {
4138
4355
  const grouped = /* @__PURE__ */ new Map();
4139
4356
  for (const model of models) {
4140
4357
  const provider = model.provider;
@@ -4143,13 +4360,13 @@ function renderTable(models, verbose, stream) {
4143
4360
  }
4144
4361
  grouped.get(provider).push(model);
4145
4362
  }
4146
- stream.write(chalk8.bold.cyan("\nAvailable Models\n"));
4147
- stream.write(chalk8.cyan("=".repeat(80)) + "\n\n");
4363
+ stream.write(chalk8.bold.blue("\u{1F4DD} Text/LLM Models\n"));
4364
+ stream.write(chalk8.dim("\u2500".repeat(80)) + "\n\n");
4148
4365
  const providers = Array.from(grouped.keys()).sort();
4149
4366
  for (const provider of providers) {
4150
4367
  const providerModels = grouped.get(provider);
4151
4368
  const providerName = provider.charAt(0).toUpperCase() + provider.slice(1);
4152
- stream.write(chalk8.bold.yellow(`${providerName} Models
4369
+ stream.write(chalk8.bold.yellow(`${providerName}
4153
4370
  `));
4154
4371
  if (verbose) {
4155
4372
  renderVerboseTable(providerModels, stream);
@@ -4158,13 +4375,6 @@ function renderTable(models, verbose, stream) {
4158
4375
  }
4159
4376
  stream.write("\n");
4160
4377
  }
4161
- stream.write(chalk8.bold.magenta("Model Shortcuts\n"));
4162
- stream.write(chalk8.dim("\u2500".repeat(80)) + "\n");
4163
- const shortcuts = Object.entries(MODEL_ALIASES).sort((a, b) => a[0].localeCompare(b[0]));
4164
- for (const [shortcut, fullName] of shortcuts) {
4165
- stream.write(chalk8.cyan(` ${shortcut.padEnd(15)}`) + chalk8.dim(" \u2192 ") + chalk8.white(fullName) + "\n");
4166
- }
4167
- stream.write("\n");
4168
4378
  }
4169
4379
  function renderCompactTable(models, stream) {
4170
4380
  const idWidth = 25;
@@ -4241,9 +4451,171 @@ function renderVerboseTable(models, stream) {
4241
4451
  }
4242
4452
  stream.write("\n");
4243
4453
  }
4244
- function renderJSON(models, stream) {
4245
- const output = {
4246
- models: models.map((model) => ({
4454
+ function renderImageTable(models, verbose, stream) {
4455
+ stream.write(chalk8.bold.green("\u{1F3A8} Image Generation Models\n"));
4456
+ stream.write(chalk8.dim("\u2500".repeat(80)) + "\n\n");
4457
+ const grouped = /* @__PURE__ */ new Map();
4458
+ for (const model of models) {
4459
+ if (!grouped.has(model.provider)) {
4460
+ grouped.set(model.provider, []);
4461
+ }
4462
+ grouped.get(model.provider).push(model);
4463
+ }
4464
+ for (const [provider, providerModels] of Array.from(grouped.entries()).sort()) {
4465
+ const providerName = provider.charAt(0).toUpperCase() + provider.slice(1);
4466
+ stream.write(chalk8.bold.yellow(`${providerName}
4467
+ `));
4468
+ if (verbose) {
4469
+ for (const model of providerModels) {
4470
+ stream.write(chalk8.bold.green(`
4471
+ ${model.modelId}
4472
+ `));
4473
+ stream.write(chalk8.dim(" " + "\u2500".repeat(60)) + "\n");
4474
+ stream.write(` ${chalk8.dim("Name:")} ${chalk8.white(model.displayName)}
4475
+ `);
4476
+ stream.write(` ${chalk8.dim("Sizes:")} ${chalk8.yellow(model.supportedSizes.join(", "))}
4477
+ `);
4478
+ if (model.supportedQualities) {
4479
+ stream.write(` ${chalk8.dim("Qualities:")} ${chalk8.yellow(model.supportedQualities.join(", "))}
4480
+ `);
4481
+ }
4482
+ stream.write(` ${chalk8.dim("Max Images:")} ${chalk8.yellow(model.maxImages.toString())}
4483
+ `);
4484
+ stream.write(` ${chalk8.dim("Pricing:")} ${chalk8.cyan(formatImagePrice(model))}
4485
+ `);
4486
+ if (model.features) {
4487
+ const features = [];
4488
+ if (model.features.textRendering) features.push("text-rendering");
4489
+ if (model.features.transparency) features.push("transparency");
4490
+ if (model.features.conversational) features.push("conversational");
4491
+ if (features.length > 0) {
4492
+ stream.write(` ${chalk8.dim("Features:")} ${chalk8.blue(features.join(", "))}
4493
+ `);
4494
+ }
4495
+ }
4496
+ }
4497
+ } else {
4498
+ const idWidth = 32;
4499
+ const nameWidth = 25;
4500
+ const sizesWidth = 20;
4501
+ const priceWidth = 15;
4502
+ stream.write(chalk8.dim("\u2500".repeat(idWidth + nameWidth + sizesWidth + priceWidth + 6)) + "\n");
4503
+ stream.write(
4504
+ chalk8.bold(
4505
+ "Model ID".padEnd(idWidth) + " " + "Display Name".padEnd(nameWidth) + " " + "Sizes".padEnd(sizesWidth) + " " + "Price".padEnd(priceWidth)
4506
+ ) + "\n"
4507
+ );
4508
+ stream.write(chalk8.dim("\u2500".repeat(idWidth + nameWidth + sizesWidth + priceWidth + 6)) + "\n");
4509
+ for (const model of providerModels) {
4510
+ const sizes = model.supportedSizes.length > 2 ? model.supportedSizes.slice(0, 2).join(", ") + "..." : model.supportedSizes.join(", ");
4511
+ stream.write(
4512
+ chalk8.green(model.modelId.padEnd(idWidth)) + " " + chalk8.white(model.displayName.substring(0, nameWidth - 1).padEnd(nameWidth)) + " " + chalk8.yellow(sizes.padEnd(sizesWidth)) + " " + chalk8.cyan(formatImagePrice(model).padEnd(priceWidth)) + "\n"
4513
+ );
4514
+ }
4515
+ stream.write(chalk8.dim("\u2500".repeat(idWidth + nameWidth + sizesWidth + priceWidth + 6)) + "\n");
4516
+ }
4517
+ stream.write("\n");
4518
+ }
4519
+ }
4520
+ function renderSpeechTable(models, verbose, stream) {
4521
+ stream.write(chalk8.bold.magenta("\u{1F3A4} Speech (TTS) Models\n"));
4522
+ stream.write(chalk8.dim("\u2500".repeat(80)) + "\n\n");
4523
+ const grouped = /* @__PURE__ */ new Map();
4524
+ for (const model of models) {
4525
+ if (!grouped.has(model.provider)) {
4526
+ grouped.set(model.provider, []);
4527
+ }
4528
+ grouped.get(model.provider).push(model);
4529
+ }
4530
+ for (const [provider, providerModels] of Array.from(grouped.entries()).sort()) {
4531
+ const providerName = provider.charAt(0).toUpperCase() + provider.slice(1);
4532
+ stream.write(chalk8.bold.yellow(`${providerName}
4533
+ `));
4534
+ if (verbose) {
4535
+ for (const model of providerModels) {
4536
+ stream.write(chalk8.bold.green(`
4537
+ ${model.modelId}
4538
+ `));
4539
+ stream.write(chalk8.dim(" " + "\u2500".repeat(60)) + "\n");
4540
+ stream.write(` ${chalk8.dim("Name:")} ${chalk8.white(model.displayName)}
4541
+ `);
4542
+ stream.write(` ${chalk8.dim("Voices:")} ${chalk8.yellow(model.voices.length.toString())} voices
4543
+ `);
4544
+ if (model.voices.length <= 6) {
4545
+ stream.write(` ${chalk8.dim(model.voices.join(", "))}
4546
+ `);
4547
+ } else {
4548
+ stream.write(` ${chalk8.dim(model.voices.slice(0, 6).join(", ") + "...")}
4549
+ `);
4550
+ }
4551
+ stream.write(` ${chalk8.dim("Formats:")} ${chalk8.yellow(model.formats.join(", "))}
4552
+ `);
4553
+ stream.write(` ${chalk8.dim("Max Input:")} ${chalk8.yellow(model.maxInputLength.toString())} chars
4554
+ `);
4555
+ stream.write(` ${chalk8.dim("Pricing:")} ${chalk8.cyan(formatSpeechPrice(model))}
4556
+ `);
4557
+ if (model.features) {
4558
+ const features = [];
4559
+ if (model.features.multiSpeaker) features.push("multi-speaker");
4560
+ if (model.features.voiceInstructions) features.push("voice-instructions");
4561
+ if (model.features.languages) features.push(`${model.features.languages} languages`);
4562
+ if (features.length > 0) {
4563
+ stream.write(` ${chalk8.dim("Features:")} ${chalk8.blue(features.join(", "))}
4564
+ `);
4565
+ }
4566
+ }
4567
+ }
4568
+ } else {
4569
+ const idWidth = 30;
4570
+ const nameWidth = 28;
4571
+ const voicesWidth = 12;
4572
+ const priceWidth = 18;
4573
+ stream.write(chalk8.dim("\u2500".repeat(idWidth + nameWidth + voicesWidth + priceWidth + 6)) + "\n");
4574
+ stream.write(
4575
+ chalk8.bold(
4576
+ "Model ID".padEnd(idWidth) + " " + "Display Name".padEnd(nameWidth) + " " + "Voices".padEnd(voicesWidth) + " " + "Price".padEnd(priceWidth)
4577
+ ) + "\n"
4578
+ );
4579
+ stream.write(chalk8.dim("\u2500".repeat(idWidth + nameWidth + voicesWidth + priceWidth + 6)) + "\n");
4580
+ for (const model of providerModels) {
4581
+ stream.write(
4582
+ chalk8.green(model.modelId.padEnd(idWidth)) + " " + chalk8.white(model.displayName.substring(0, nameWidth - 1).padEnd(nameWidth)) + " " + chalk8.yellow(`${model.voices.length} voices`.padEnd(voicesWidth)) + " " + chalk8.cyan(formatSpeechPrice(model).padEnd(priceWidth)) + "\n"
4583
+ );
4584
+ }
4585
+ stream.write(chalk8.dim("\u2500".repeat(idWidth + nameWidth + voicesWidth + priceWidth + 6)) + "\n");
4586
+ }
4587
+ stream.write("\n");
4588
+ }
4589
+ }
4590
+ function formatImagePrice(model) {
4591
+ if (model.pricing.perImage !== void 0) {
4592
+ return `$${model.pricing.perImage.toFixed(2)}/img`;
4593
+ }
4594
+ if (model.pricing.bySize) {
4595
+ const prices = Object.values(model.pricing.bySize);
4596
+ const minPrice = Math.min(...prices.flatMap((p) => typeof p === "number" ? [p] : Object.values(p)));
4597
+ const maxPrice = Math.max(...prices.flatMap((p) => typeof p === "number" ? [p] : Object.values(p)));
4598
+ if (minPrice === maxPrice) {
4599
+ return `$${minPrice.toFixed(2)}/img`;
4600
+ }
4601
+ return `$${minPrice.toFixed(2)}-${maxPrice.toFixed(2)}`;
4602
+ }
4603
+ return "varies";
4604
+ }
4605
+ function formatSpeechPrice(model) {
4606
+ if (model.pricing.perCharacter !== void 0) {
4607
+ const perMillion = model.pricing.perCharacter * 1e6;
4608
+ return `$${perMillion.toFixed(0)}/1M chars`;
4609
+ }
4610
+ if (model.pricing.perMinute !== void 0) {
4611
+ return `~$${model.pricing.perMinute.toFixed(2)}/min`;
4612
+ }
4613
+ return "varies";
4614
+ }
4615
+ function renderJSON(textModels, imageModels, speechModels, stream) {
4616
+ const output = {};
4617
+ if (textModels.length > 0) {
4618
+ output.textModels = textModels.map((model) => ({
4247
4619
  provider: model.provider,
4248
4620
  modelId: model.modelId,
4249
4621
  displayName: model.displayName,
@@ -4259,9 +4631,33 @@ function renderJSON(models, stream) {
4259
4631
  knowledgeCutoff: model.knowledgeCutoff,
4260
4632
  features: model.features,
4261
4633
  metadata: model.metadata
4262
- })),
4263
- shortcuts: MODEL_ALIASES
4264
- };
4634
+ }));
4635
+ output.shortcuts = MODEL_ALIASES;
4636
+ }
4637
+ if (imageModels.length > 0) {
4638
+ output.imageModels = imageModels.map((model) => ({
4639
+ provider: model.provider,
4640
+ modelId: model.modelId,
4641
+ displayName: model.displayName,
4642
+ supportedSizes: model.supportedSizes,
4643
+ supportedQualities: model.supportedQualities,
4644
+ maxImages: model.maxImages,
4645
+ pricing: model.pricing,
4646
+ features: model.features
4647
+ }));
4648
+ }
4649
+ if (speechModels.length > 0) {
4650
+ output.speechModels = speechModels.map((model) => ({
4651
+ provider: model.provider,
4652
+ modelId: model.modelId,
4653
+ displayName: model.displayName,
4654
+ voices: model.voices,
4655
+ formats: model.formats,
4656
+ maxInputLength: model.maxInputLength,
4657
+ pricing: model.pricing,
4658
+ features: model.features
4659
+ }));
4660
+ }
4265
4661
  stream.write(JSON.stringify(output, null, 2) + "\n");
4266
4662
  }
4267
4663
  function formatTokens2(count) {
@@ -4274,7 +4670,7 @@ function formatTokens2(count) {
4274
4670
  }
4275
4671
  }
4276
4672
  function registerModelsCommand(program, env) {
4277
- program.command(COMMANDS.models).description("List all available LLM models with pricing and capabilities.").option("--provider <name>", "Filter by provider (openai, anthropic, gemini)").option("--format <format>", "Output format: table or json", "table").option("--verbose", "Show detailed model information", false).action(
4673
+ program.command(COMMANDS.models).description("List available models with pricing and capabilities.").option("--provider <name>", "Filter by provider (openai, anthropic, gemini)").option("--format <format>", "Output format: table or json", "table").option("--verbose", "Show detailed model information", false).option("--text", "Show text/LLM models (default if no type specified)").option("--image", "Show image generation models").option("--speech", "Show speech/TTS models").option("--all", "Show all model types (text, image, speech)").action(
4278
4674
  (options) => executeAction(
4279
4675
  () => handleModelsCommand(options, env),
4280
4676
  env
@@ -4282,6 +4678,60 @@ function registerModelsCommand(program, env) {
4282
4678
  );
4283
4679
  }
4284
4680
 
4681
+ // src/cli/speech-command.ts
4682
+ import { writeFileSync as writeFileSync3 } from "node:fs";
4683
+ var DEFAULT_SPEECH_MODEL = "tts-1";
4684
+ var DEFAULT_VOICE = "nova";
4685
+ async function executeSpeech(textArg, options, env) {
4686
+ const text = await resolvePrompt(textArg, env);
4687
+ const client = env.createClient();
4688
+ const model = options.model;
4689
+ const voice = options.voice ?? DEFAULT_VOICE;
4690
+ const speed = options.speed ? Number.parseFloat(options.speed) : void 0;
4691
+ const stderrTTY = env.stderr.isTTY === true;
4692
+ if (!options.quiet && stderrTTY) {
4693
+ env.stderr.write(`${SUMMARY_PREFIX} Generating speech with ${model} (voice: ${voice})...
4694
+ `);
4695
+ }
4696
+ const result = await client.speech.generate({
4697
+ model,
4698
+ input: text,
4699
+ voice,
4700
+ responseFormat: options.format,
4701
+ speed
4702
+ });
4703
+ const audioBuffer = Buffer.from(result.audio);
4704
+ if (options.output) {
4705
+ writeFileSync3(options.output, audioBuffer);
4706
+ if (!options.quiet) {
4707
+ env.stderr.write(`${SUMMARY_PREFIX} Audio saved to ${options.output}
4708
+ `);
4709
+ }
4710
+ } else {
4711
+ env.stdout.write(audioBuffer);
4712
+ }
4713
+ if (!options.quiet && stderrTTY) {
4714
+ const parts = [
4715
+ `${result.usage.characterCount} characters`,
4716
+ `format: ${result.format}`
4717
+ ];
4718
+ if (result.cost !== void 0) {
4719
+ parts.push(`cost: ${formatCost(result.cost)}`);
4720
+ }
4721
+ env.stderr.write(`${SUMMARY_PREFIX} ${parts.join(" | ")}
4722
+ `);
4723
+ }
4724
+ }
4725
+ function registerSpeechCommand(program, env, config) {
4726
+ program.command(COMMANDS.speech).description("Generate speech audio from text.").argument("[text]", "Text to convert to speech. If omitted, stdin is used when available.").option(
4727
+ OPTION_FLAGS.model,
4728
+ OPTION_DESCRIPTIONS.model,
4729
+ config?.model ?? DEFAULT_SPEECH_MODEL
4730
+ ).option(OPTION_FLAGS.voice, OPTION_DESCRIPTIONS.voice, config?.voice ?? DEFAULT_VOICE).option(OPTION_FLAGS.speechFormat, OPTION_DESCRIPTIONS.speechFormat, config?.format).option(OPTION_FLAGS.speechSpeed, OPTION_DESCRIPTIONS.speechSpeed, config?.speed?.toString()).option(OPTION_FLAGS.speechOutput, OPTION_DESCRIPTIONS.speechOutput, config?.output).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, config?.quiet ?? false).action(
4731
+ (text, options) => executeAction(() => executeSpeech(text, options, env), env)
4732
+ );
4733
+ }
4734
+
4285
4735
  // src/cli/environment.ts
4286
4736
  init_client();
4287
4737
  init_logger();
@@ -4433,6 +4883,8 @@ function createProgram(env, config) {
4433
4883
  });
4434
4884
  registerCompleteCommand(program, env, config?.complete);
4435
4885
  registerAgentCommand(program, env, config?.agent);
4886
+ registerImageCommand(program, env, config?.image);
4887
+ registerSpeechCommand(program, env, config?.speech);
4436
4888
  registerModelsCommand(program, env);
4437
4889
  registerGadgetCommand(program, env);
4438
4890
  if (config) {