llmist 2.4.0 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -0
- package/dist/{chunk-QFRVTS5F.js → chunk-IHSZUAYN.js} +4 -2
- package/dist/chunk-IHSZUAYN.js.map +1 -0
- package/dist/{chunk-6ZDUWO6N.js → chunk-YHS2DYXP.js} +1781 -528
- package/dist/chunk-YHS2DYXP.js.map +1 -0
- package/dist/cli.cjs +1218 -151
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +172 -26
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +1393 -124
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +93 -20
- package/dist/index.d.ts +93 -20
- package/dist/index.js +34 -2
- package/dist/{mock-stream-BQcC2VCP.d.cts → mock-stream-ga4KIiwX.d.cts} +714 -12
- package/dist/{mock-stream-BQcC2VCP.d.ts → mock-stream-ga4KIiwX.d.ts} +714 -12
- package/dist/testing/index.cjs +1713 -508
- package/dist/testing/index.cjs.map +1 -1
- package/dist/testing/index.d.cts +2 -2
- package/dist/testing/index.d.ts +2 -2
- package/dist/testing/index.js +1 -1
- package/package.json +1 -1
- package/dist/chunk-6ZDUWO6N.js.map +0 -1
- package/dist/chunk-QFRVTS5F.js.map +0 -1
package/dist/cli.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import "./chunk-
|
|
2
|
+
import "./chunk-IHSZUAYN.js";
|
|
3
3
|
import {
|
|
4
4
|
AgentBuilder,
|
|
5
5
|
BaseGadget,
|
|
@@ -10,14 +10,20 @@ import {
|
|
|
10
10
|
LLMMessageBuilder,
|
|
11
11
|
LLMist,
|
|
12
12
|
MODEL_ALIASES,
|
|
13
|
+
audioFromBuffer,
|
|
13
14
|
createGadget,
|
|
14
15
|
createLogger,
|
|
16
|
+
detectAudioMimeType,
|
|
17
|
+
detectImageMimeType,
|
|
18
|
+
extractText,
|
|
19
|
+
imageFromBuffer,
|
|
15
20
|
init_builder,
|
|
16
21
|
init_client,
|
|
17
22
|
init_constants,
|
|
18
23
|
init_create_gadget,
|
|
19
24
|
init_exceptions,
|
|
20
25
|
init_gadget,
|
|
26
|
+
init_input_content,
|
|
21
27
|
init_logger,
|
|
22
28
|
init_messages,
|
|
23
29
|
init_model_shortcuts,
|
|
@@ -26,8 +32,9 @@ import {
|
|
|
26
32
|
init_schema_validator,
|
|
27
33
|
resolveModel,
|
|
28
34
|
schemaToJSONSchema,
|
|
35
|
+
text,
|
|
29
36
|
validateGadgetSchema
|
|
30
|
-
} from "./chunk-
|
|
37
|
+
} from "./chunk-YHS2DYXP.js";
|
|
31
38
|
|
|
32
39
|
// src/cli/constants.ts
|
|
33
40
|
var CLI_NAME = "llmist";
|
|
@@ -38,7 +45,8 @@ var COMMANDS = {
|
|
|
38
45
|
models: "models",
|
|
39
46
|
gadget: "gadget",
|
|
40
47
|
image: "image",
|
|
41
|
-
speech: "speech"
|
|
48
|
+
speech: "speech",
|
|
49
|
+
vision: "vision"
|
|
42
50
|
};
|
|
43
51
|
var LOG_LEVELS = ["silly", "trace", "debug", "info", "warn", "error", "fatal"];
|
|
44
52
|
var DEFAULT_MODEL = "openai:gpt-5-nano";
|
|
@@ -60,6 +68,9 @@ var OPTION_FLAGS = {
|
|
|
60
68
|
dockerRo: "--docker-ro",
|
|
61
69
|
noDocker: "--no-docker",
|
|
62
70
|
dockerDev: "--docker-dev",
|
|
71
|
+
// Multimodal input options
|
|
72
|
+
inputImage: "--image <path>",
|
|
73
|
+
inputAudio: "--audio <path>",
|
|
63
74
|
// Image generation options
|
|
64
75
|
imageSize: "--size <size>",
|
|
65
76
|
imageQuality: "--quality <quality>",
|
|
@@ -85,6 +96,9 @@ var OPTION_DESCRIPTIONS = {
|
|
|
85
96
|
noBuiltins: "Disable built-in gadgets (AskUser, TellUser).",
|
|
86
97
|
noBuiltinInteraction: "Disable interactive gadgets (AskUser) while keeping TellUser.",
|
|
87
98
|
quiet: "Suppress all output except content (text and TellUser messages).",
|
|
99
|
+
// Multimodal input descriptions
|
|
100
|
+
inputImage: "Image file to include with the prompt (vision models).",
|
|
101
|
+
inputAudio: "Audio file to include with the prompt (Gemini only).",
|
|
88
102
|
docker: "Run agent in a Docker sandbox container for security isolation.",
|
|
89
103
|
dockerRo: "Run in Docker with current directory mounted read-only.",
|
|
90
104
|
noDocker: "Disable Docker sandboxing (override config).",
|
|
@@ -108,7 +122,7 @@ import { Command, InvalidArgumentError as InvalidArgumentError2 } from "commande
|
|
|
108
122
|
// package.json
|
|
109
123
|
var package_default = {
|
|
110
124
|
name: "llmist",
|
|
111
|
-
version: "2.
|
|
125
|
+
version: "2.5.0",
|
|
112
126
|
description: "TypeScript LLM client with streaming tool execution. Tools fire mid-stream. Built-in function calling works with any model\u2014no structured outputs or native tool support required.",
|
|
113
127
|
type: "module",
|
|
114
128
|
main: "dist/index.cjs",
|
|
@@ -248,6 +262,7 @@ function isAbortError(error) {
|
|
|
248
262
|
}
|
|
249
263
|
|
|
250
264
|
// src/cli/agent-command.ts
|
|
265
|
+
init_input_content();
|
|
251
266
|
init_registry();
|
|
252
267
|
init_constants();
|
|
253
268
|
|
|
@@ -572,6 +587,75 @@ var finish = createGadget({
|
|
|
572
587
|
});
|
|
573
588
|
var builtinGadgets = [askUser, tellUser, finish];
|
|
574
589
|
|
|
590
|
+
// src/cli/file-utils.ts
|
|
591
|
+
init_input_content();
|
|
592
|
+
import { readFile, stat } from "node:fs/promises";
|
|
593
|
+
import { resolve as resolve2 } from "node:path";
|
|
594
|
+
var DEFAULT_MAX_FILE_SIZE = 50 * 1024 * 1024;
|
|
595
|
+
function formatFileSize(bytes) {
|
|
596
|
+
if (bytes < 1024) return `${bytes} bytes`;
|
|
597
|
+
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
|
|
598
|
+
if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
|
|
599
|
+
return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
|
|
600
|
+
}
|
|
601
|
+
async function checkFileSize(absolutePath, filePath, maxSize) {
|
|
602
|
+
const stats = await stat(absolutePath);
|
|
603
|
+
if (stats.size > maxSize) {
|
|
604
|
+
throw new Error(
|
|
605
|
+
`File "${filePath}" is too large (${formatFileSize(stats.size)}). Maximum allowed size is ${formatFileSize(maxSize)}. Consider compressing the file or using a smaller version.`
|
|
606
|
+
);
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
async function readImageFile(filePath, options = {}) {
|
|
610
|
+
const absolutePath = resolve2(filePath);
|
|
611
|
+
const maxFileSize = options.maxFileSize ?? DEFAULT_MAX_FILE_SIZE;
|
|
612
|
+
let buffer;
|
|
613
|
+
try {
|
|
614
|
+
await checkFileSize(absolutePath, filePath, maxFileSize);
|
|
615
|
+
buffer = await readFile(absolutePath);
|
|
616
|
+
} catch (error) {
|
|
617
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
618
|
+
throw new Error(`Failed to read image file "${filePath}": ${message}`);
|
|
619
|
+
}
|
|
620
|
+
const mimeType = detectImageMimeType(buffer);
|
|
621
|
+
if (!mimeType) {
|
|
622
|
+
throw new Error(
|
|
623
|
+
`File "${filePath}" is not a supported image format. Supported formats: JPEG, PNG, GIF, WebP`
|
|
624
|
+
);
|
|
625
|
+
}
|
|
626
|
+
return imageFromBuffer(buffer, mimeType);
|
|
627
|
+
}
|
|
628
|
+
async function readAudioFile(filePath, options = {}) {
|
|
629
|
+
const absolutePath = resolve2(filePath);
|
|
630
|
+
const maxFileSize = options.maxFileSize ?? DEFAULT_MAX_FILE_SIZE;
|
|
631
|
+
let buffer;
|
|
632
|
+
try {
|
|
633
|
+
await checkFileSize(absolutePath, filePath, maxFileSize);
|
|
634
|
+
buffer = await readFile(absolutePath);
|
|
635
|
+
} catch (error) {
|
|
636
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
637
|
+
throw new Error(`Failed to read audio file "${filePath}": ${message}`);
|
|
638
|
+
}
|
|
639
|
+
const mimeType = detectAudioMimeType(buffer);
|
|
640
|
+
if (!mimeType) {
|
|
641
|
+
throw new Error(
|
|
642
|
+
`File "${filePath}" is not a supported audio format. Supported formats: MP3, WAV, OGG, WebM`
|
|
643
|
+
);
|
|
644
|
+
}
|
|
645
|
+
return audioFromBuffer(buffer, mimeType);
|
|
646
|
+
}
|
|
647
|
+
async function readFileBuffer(filePath, options = {}) {
|
|
648
|
+
const absolutePath = resolve2(filePath);
|
|
649
|
+
const maxFileSize = options.maxFileSize ?? DEFAULT_MAX_FILE_SIZE;
|
|
650
|
+
try {
|
|
651
|
+
await checkFileSize(absolutePath, filePath, maxFileSize);
|
|
652
|
+
return await readFile(absolutePath);
|
|
653
|
+
} catch (error) {
|
|
654
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
655
|
+
throw new Error(`Failed to read file "${filePath}": ${message}`);
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
|
|
575
659
|
// src/cli/gadgets.ts
|
|
576
660
|
init_gadget();
|
|
577
661
|
import fs5 from "node:fs";
|
|
@@ -735,7 +819,7 @@ ${formattedList}`;
|
|
|
735
819
|
// src/cli/builtins/filesystem/read-file.ts
|
|
736
820
|
import fs3 from "node:fs";
|
|
737
821
|
import { z as z3 } from "zod";
|
|
738
|
-
var
|
|
822
|
+
var readFile2 = createGadget({
|
|
739
823
|
name: "ReadFile",
|
|
740
824
|
description: "Read the entire content of a file and return it as text. The file path must be within the current working directory or its subdirectories.",
|
|
741
825
|
schema: z3.object({
|
|
@@ -1006,7 +1090,7 @@ error: ${message}`;
|
|
|
1006
1090
|
// src/cli/builtins/index.ts
|
|
1007
1091
|
var builtinGadgetRegistry = {
|
|
1008
1092
|
ListDirectory: listDirectory,
|
|
1009
|
-
ReadFile:
|
|
1093
|
+
ReadFile: readFile2,
|
|
1010
1094
|
WriteFile: writeFile,
|
|
1011
1095
|
EditFile: editFile,
|
|
1012
1096
|
RunCommand: runCommand
|
|
@@ -1141,6 +1225,7 @@ async function loadGadgets(specifiers, cwd, importer = (specifier) => import(spe
|
|
|
1141
1225
|
}
|
|
1142
1226
|
|
|
1143
1227
|
// src/cli/llm-logging.ts
|
|
1228
|
+
init_messages();
|
|
1144
1229
|
import { mkdir, writeFile as writeFile2 } from "node:fs/promises";
|
|
1145
1230
|
import { homedir } from "node:os";
|
|
1146
1231
|
import { join } from "node:path";
|
|
@@ -1158,7 +1243,7 @@ function formatLlmRequest(messages) {
|
|
|
1158
1243
|
const lines = [];
|
|
1159
1244
|
for (const msg of messages) {
|
|
1160
1245
|
lines.push(`=== ${msg.role.toUpperCase()} ===`);
|
|
1161
|
-
lines.push(msg.content
|
|
1246
|
+
lines.push(msg.content ? extractText(msg.content) : "");
|
|
1162
1247
|
lines.push("");
|
|
1163
1248
|
}
|
|
1164
1249
|
return lines.join("\n");
|
|
@@ -1232,9 +1317,9 @@ function ensureMarkedConfigured() {
|
|
|
1232
1317
|
markedConfigured = true;
|
|
1233
1318
|
}
|
|
1234
1319
|
}
|
|
1235
|
-
function renderMarkdown(
|
|
1320
|
+
function renderMarkdown(text2) {
|
|
1236
1321
|
ensureMarkedConfigured();
|
|
1237
|
-
let rendered = marked.parse(
|
|
1322
|
+
let rendered = marked.parse(text2);
|
|
1238
1323
|
rendered = rendered.replace(/\*\*(.+?)\*\*/g, (_, content) => chalk3.bold(content)).replace(/(?<!\*)\*(\S[^*]*)\*(?!\*)/g, (_, content) => chalk3.italic(content));
|
|
1239
1324
|
return rendered.trimEnd();
|
|
1240
1325
|
}
|
|
@@ -1248,8 +1333,8 @@ function createRainbowSeparator() {
|
|
|
1248
1333
|
}
|
|
1249
1334
|
return result;
|
|
1250
1335
|
}
|
|
1251
|
-
function renderMarkdownWithSeparators(
|
|
1252
|
-
const rendered = renderMarkdown(
|
|
1336
|
+
function renderMarkdownWithSeparators(text2) {
|
|
1337
|
+
const rendered = renderMarkdown(text2);
|
|
1253
1338
|
const separator = createRainbowSeparator();
|
|
1254
1339
|
return `
|
|
1255
1340
|
${separator}
|
|
@@ -1417,12 +1502,12 @@ var StreamPrinter = class {
|
|
|
1417
1502
|
*
|
|
1418
1503
|
* @param text - Text to write
|
|
1419
1504
|
*/
|
|
1420
|
-
write(
|
|
1421
|
-
if (!
|
|
1505
|
+
write(text2) {
|
|
1506
|
+
if (!text2) {
|
|
1422
1507
|
return;
|
|
1423
1508
|
}
|
|
1424
|
-
this.target.write(
|
|
1425
|
-
this.endedWithNewline =
|
|
1509
|
+
this.target.write(text2);
|
|
1510
|
+
this.endedWithNewline = text2.endsWith("\n");
|
|
1426
1511
|
}
|
|
1427
1512
|
/**
|
|
1428
1513
|
* Ensures output ends with a newline by writing one if needed.
|
|
@@ -1901,7 +1986,7 @@ function addCompleteOptions(cmd, defaults) {
|
|
|
1901
1986
|
OPTION_DESCRIPTIONS.maxTokens,
|
|
1902
1987
|
createNumericParser({ label: "Max tokens", integer: true, min: 1 }),
|
|
1903
1988
|
defaults?.["max-tokens"]
|
|
1904
|
-
).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, defaults?.quiet).option(OPTION_FLAGS.logLlmRequests, OPTION_DESCRIPTIONS.logLlmRequests, defaults?.["log-llm-requests"]);
|
|
1989
|
+
).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, defaults?.quiet).option(OPTION_FLAGS.logLlmRequests, OPTION_DESCRIPTIONS.logLlmRequests, defaults?.["log-llm-requests"]).option(OPTION_FLAGS.inputImage, OPTION_DESCRIPTIONS.inputImage).option(OPTION_FLAGS.inputAudio, OPTION_DESCRIPTIONS.inputAudio);
|
|
1905
1990
|
}
|
|
1906
1991
|
function addAgentOptions(cmd, defaults) {
|
|
1907
1992
|
const gadgetAccumulator = (value, previous = []) => [
|
|
@@ -1925,7 +2010,7 @@ function addAgentOptions(cmd, defaults) {
|
|
|
1925
2010
|
OPTION_FLAGS.noBuiltinInteraction,
|
|
1926
2011
|
OPTION_DESCRIPTIONS.noBuiltinInteraction,
|
|
1927
2012
|
defaults?.["builtin-interaction"] !== false
|
|
1928
|
-
).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, defaults?.quiet).option(OPTION_FLAGS.logLlmRequests, OPTION_DESCRIPTIONS.logLlmRequests, defaults?.["log-llm-requests"]).option(OPTION_FLAGS.docker, OPTION_DESCRIPTIONS.docker).option(OPTION_FLAGS.dockerRo, OPTION_DESCRIPTIONS.dockerRo).option(OPTION_FLAGS.noDocker, OPTION_DESCRIPTIONS.noDocker).option(OPTION_FLAGS.dockerDev, OPTION_DESCRIPTIONS.dockerDev);
|
|
2013
|
+
).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, defaults?.quiet).option(OPTION_FLAGS.logLlmRequests, OPTION_DESCRIPTIONS.logLlmRequests, defaults?.["log-llm-requests"]).option(OPTION_FLAGS.inputImage, OPTION_DESCRIPTIONS.inputImage).option(OPTION_FLAGS.inputAudio, OPTION_DESCRIPTIONS.inputAudio).option(OPTION_FLAGS.docker, OPTION_DESCRIPTIONS.docker).option(OPTION_FLAGS.dockerRo, OPTION_DESCRIPTIONS.dockerRo).option(OPTION_FLAGS.noDocker, OPTION_DESCRIPTIONS.noDocker).option(OPTION_FLAGS.dockerDev, OPTION_DESCRIPTIONS.dockerDev);
|
|
1929
2014
|
}
|
|
1930
2015
|
function configToCompleteOptions(config) {
|
|
1931
2016
|
const result = {};
|
|
@@ -3639,8 +3724,8 @@ Denied: ${result.reason ?? "by user"}`
|
|
|
3639
3724
|
builder.withTextOnlyHandler("acknowledge");
|
|
3640
3725
|
builder.withTextWithGadgetsHandler({
|
|
3641
3726
|
gadgetName: "TellUser",
|
|
3642
|
-
parameterMapping: (
|
|
3643
|
-
resultMapping: (
|
|
3727
|
+
parameterMapping: (text2) => ({ message: text2, done: false, type: "info" }),
|
|
3728
|
+
resultMapping: (text2) => `\u2139\uFE0F ${text2}`
|
|
3644
3729
|
});
|
|
3645
3730
|
builder.withTrailingMessage(
|
|
3646
3731
|
(ctx) => [
|
|
@@ -3649,7 +3734,19 @@ Denied: ${result.reason ?? "by user"}`
|
|
|
3649
3734
|
"Maximize efficiency by batching independent operations in a single response."
|
|
3650
3735
|
].join(" ")
|
|
3651
3736
|
);
|
|
3652
|
-
|
|
3737
|
+
let agent;
|
|
3738
|
+
if (options.image || options.audio) {
|
|
3739
|
+
const parts = [text(prompt)];
|
|
3740
|
+
if (options.image) {
|
|
3741
|
+
parts.push(await readImageFile(options.image));
|
|
3742
|
+
}
|
|
3743
|
+
if (options.audio) {
|
|
3744
|
+
parts.push(await readAudioFile(options.audio));
|
|
3745
|
+
}
|
|
3746
|
+
agent = builder.askWithContent(parts);
|
|
3747
|
+
} else {
|
|
3748
|
+
agent = builder.ask(prompt);
|
|
3749
|
+
}
|
|
3653
3750
|
let textBuffer = "";
|
|
3654
3751
|
const flushTextBuffer = () => {
|
|
3655
3752
|
if (textBuffer) {
|
|
@@ -3724,6 +3821,7 @@ function registerAgentCommand(program, env, config) {
|
|
|
3724
3821
|
}
|
|
3725
3822
|
|
|
3726
3823
|
// src/cli/complete-command.ts
|
|
3824
|
+
init_input_content();
|
|
3727
3825
|
init_messages();
|
|
3728
3826
|
init_model_shortcuts();
|
|
3729
3827
|
init_constants();
|
|
@@ -3735,7 +3833,18 @@ async function executeComplete(promptArg, options, env) {
|
|
|
3735
3833
|
if (options.system) {
|
|
3736
3834
|
builder.addSystem(options.system);
|
|
3737
3835
|
}
|
|
3738
|
-
|
|
3836
|
+
if (options.image || options.audio) {
|
|
3837
|
+
const parts = [text(prompt)];
|
|
3838
|
+
if (options.image) {
|
|
3839
|
+
parts.push(await readImageFile(options.image));
|
|
3840
|
+
}
|
|
3841
|
+
if (options.audio) {
|
|
3842
|
+
parts.push(await readAudioFile(options.audio));
|
|
3843
|
+
}
|
|
3844
|
+
builder.addUserMultimodal(parts);
|
|
3845
|
+
} else {
|
|
3846
|
+
builder.addUser(prompt);
|
|
3847
|
+
}
|
|
3739
3848
|
const messages = builder.build();
|
|
3740
3849
|
const llmLogsBaseDir = resolveLogDir(options.logLlmRequests, "requests");
|
|
3741
3850
|
let llmSessionDir;
|
|
@@ -4683,7 +4792,7 @@ import { writeFileSync as writeFileSync3 } from "node:fs";
|
|
|
4683
4792
|
var DEFAULT_SPEECH_MODEL = "tts-1";
|
|
4684
4793
|
var DEFAULT_VOICE = "nova";
|
|
4685
4794
|
async function executeSpeech(textArg, options, env) {
|
|
4686
|
-
const
|
|
4795
|
+
const text2 = await resolvePrompt(textArg, env);
|
|
4687
4796
|
const client = env.createClient();
|
|
4688
4797
|
const model = options.model;
|
|
4689
4798
|
const voice = options.voice ?? DEFAULT_VOICE;
|
|
@@ -4695,7 +4804,7 @@ async function executeSpeech(textArg, options, env) {
|
|
|
4695
4804
|
}
|
|
4696
4805
|
const result = await client.speech.generate({
|
|
4697
4806
|
model,
|
|
4698
|
-
input:
|
|
4807
|
+
input: text2,
|
|
4699
4808
|
voice,
|
|
4700
4809
|
responseFormat: options.format,
|
|
4701
4810
|
speed
|
|
@@ -4728,7 +4837,43 @@ function registerSpeechCommand(program, env, config) {
|
|
|
4728
4837
|
OPTION_DESCRIPTIONS.model,
|
|
4729
4838
|
config?.model ?? DEFAULT_SPEECH_MODEL
|
|
4730
4839
|
).option(OPTION_FLAGS.voice, OPTION_DESCRIPTIONS.voice, config?.voice ?? DEFAULT_VOICE).option(OPTION_FLAGS.speechFormat, OPTION_DESCRIPTIONS.speechFormat, config?.format).option(OPTION_FLAGS.speechSpeed, OPTION_DESCRIPTIONS.speechSpeed, config?.speed?.toString()).option(OPTION_FLAGS.speechOutput, OPTION_DESCRIPTIONS.speechOutput, config?.output).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, config?.quiet ?? false).action(
|
|
4731
|
-
(
|
|
4840
|
+
(text2, options) => executeAction(() => executeSpeech(text2, options, env), env)
|
|
4841
|
+
);
|
|
4842
|
+
}
|
|
4843
|
+
|
|
4844
|
+
// src/cli/vision-command.ts
|
|
4845
|
+
init_model_shortcuts();
|
|
4846
|
+
async function executeVision(imagePath, options, env) {
|
|
4847
|
+
const client = env.createClient();
|
|
4848
|
+
const model = resolveModel(options.model);
|
|
4849
|
+
const imageBuffer = await readFileBuffer(imagePath);
|
|
4850
|
+
const prompt = options.prompt ?? "Describe this image in detail.";
|
|
4851
|
+
const stderrTTY = env.stderr.isTTY === true;
|
|
4852
|
+
if (!options.quiet && stderrTTY) {
|
|
4853
|
+
env.stderr.write(`${SUMMARY_PREFIX} Analyzing image with ${model}...
|
|
4854
|
+
`);
|
|
4855
|
+
}
|
|
4856
|
+
const result = await client.vision.analyze({
|
|
4857
|
+
model,
|
|
4858
|
+
image: imageBuffer,
|
|
4859
|
+
prompt,
|
|
4860
|
+
maxTokens: options.maxTokens
|
|
4861
|
+
});
|
|
4862
|
+
env.stdout.write(result);
|
|
4863
|
+
env.stdout.write("\n");
|
|
4864
|
+
}
|
|
4865
|
+
function registerVisionCommand(program, env) {
|
|
4866
|
+
program.command(COMMANDS.vision ?? "vision").description("Analyze an image using vision-capable models").argument("<image>", "Path to image file to analyze").option(
|
|
4867
|
+
OPTION_FLAGS.model,
|
|
4868
|
+
OPTION_DESCRIPTIONS.model,
|
|
4869
|
+
"gpt-4o"
|
|
4870
|
+
// Default to a vision-capable model
|
|
4871
|
+
).option("-p, --prompt <prompt>", "Analysis prompt describing what to extract or describe").option(
|
|
4872
|
+
OPTION_FLAGS.maxTokens,
|
|
4873
|
+
OPTION_DESCRIPTIONS.maxTokens,
|
|
4874
|
+
createNumericParser({ label: "Max tokens", integer: true, min: 1 })
|
|
4875
|
+
).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet).action(
|
|
4876
|
+
(imagePath, options) => executeAction(() => executeVision(imagePath, options, env), env)
|
|
4732
4877
|
);
|
|
4733
4878
|
}
|
|
4734
4879
|
|
|
@@ -4777,7 +4922,7 @@ function createLoggerFactory(config) {
|
|
|
4777
4922
|
}
|
|
4778
4923
|
function createPromptFunction(stdin, stdout) {
|
|
4779
4924
|
return (question) => {
|
|
4780
|
-
return new Promise((
|
|
4925
|
+
return new Promise((resolve3) => {
|
|
4781
4926
|
const rl = readline.createInterface({
|
|
4782
4927
|
input: stdin,
|
|
4783
4928
|
output: stdout
|
|
@@ -4792,7 +4937,7 @@ function createPromptFunction(stdin, stdout) {
|
|
|
4792
4937
|
`);
|
|
4793
4938
|
rl.question(chalk9.green.bold("You: "), (answer) => {
|
|
4794
4939
|
rl.close();
|
|
4795
|
-
|
|
4940
|
+
resolve3(answer);
|
|
4796
4941
|
});
|
|
4797
4942
|
});
|
|
4798
4943
|
};
|
|
@@ -4885,6 +5030,7 @@ function createProgram(env, config) {
|
|
|
4885
5030
|
registerAgentCommand(program, env, config?.agent);
|
|
4886
5031
|
registerImageCommand(program, env, config?.image);
|
|
4887
5032
|
registerSpeechCommand(program, env, config?.speech);
|
|
5033
|
+
registerVisionCommand(program, env);
|
|
4888
5034
|
registerModelsCommand(program, env);
|
|
4889
5035
|
registerGadgetCommand(program, env);
|
|
4890
5036
|
if (config) {
|