llmist 2.3.0 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -0
- package/dist/{chunk-ZDNV7DDO.js → chunk-IHSZUAYN.js} +4 -2
- package/dist/chunk-IHSZUAYN.js.map +1 -0
- package/dist/{chunk-GANXNBIZ.js → chunk-YHS2DYXP.js} +2839 -579
- package/dist/chunk-YHS2DYXP.js.map +1 -0
- package/dist/cli.cjs +2717 -198
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +638 -47
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +2496 -220
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +109 -20
- package/dist/index.d.ts +109 -20
- package/dist/index.js +34 -2
- package/dist/{mock-stream-wRfUqXx4.d.cts → mock-stream-ga4KIiwX.d.cts} +1121 -12
- package/dist/{mock-stream-wRfUqXx4.d.ts → mock-stream-ga4KIiwX.d.ts} +1121 -12
- package/dist/testing/index.cjs +2771 -559
- package/dist/testing/index.cjs.map +1 -1
- package/dist/testing/index.d.cts +2 -2
- package/dist/testing/index.d.ts +2 -2
- package/dist/testing/index.js +1 -1
- package/package.json +1 -1
- package/dist/chunk-GANXNBIZ.js.map +0 -1
- package/dist/chunk-ZDNV7DDO.js.map +0 -1
|
@@ -113,375 +113,229 @@ var init_constants = __esm({
|
|
|
113
113
|
}
|
|
114
114
|
});
|
|
115
115
|
|
|
116
|
-
// src/core/
|
|
117
|
-
function
|
|
118
|
-
|
|
119
|
-
if (MODEL_ALIASES[normalized]) {
|
|
120
|
-
return true;
|
|
121
|
-
}
|
|
122
|
-
return KNOWN_MODEL_PATTERNS.some((pattern) => pattern.test(model));
|
|
116
|
+
// src/core/input-content.ts
|
|
117
|
+
function isTextPart(part) {
|
|
118
|
+
return part.type === "text";
|
|
123
119
|
}
|
|
124
|
-
function
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
120
|
+
function isImagePart(part) {
|
|
121
|
+
return part.type === "image";
|
|
122
|
+
}
|
|
123
|
+
function isAudioPart(part) {
|
|
124
|
+
return part.type === "audio";
|
|
125
|
+
}
|
|
126
|
+
function text(content) {
|
|
127
|
+
return { type: "text", text: content };
|
|
128
|
+
}
|
|
129
|
+
function imageFromBase64(data, mediaType) {
|
|
130
|
+
return {
|
|
131
|
+
type: "image",
|
|
132
|
+
source: { type: "base64", mediaType, data }
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
function imageFromUrl(url) {
|
|
136
|
+
return {
|
|
137
|
+
type: "image",
|
|
138
|
+
source: { type: "url", url }
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
function detectImageMimeType(data) {
|
|
142
|
+
const bytes = data instanceof Buffer ? data : Buffer.from(data);
|
|
143
|
+
for (const { bytes: magic, mimeType } of IMAGE_MAGIC_BYTES) {
|
|
144
|
+
if (bytes.length >= magic.length) {
|
|
145
|
+
let matches = true;
|
|
146
|
+
for (let i = 0; i < magic.length; i++) {
|
|
147
|
+
if (bytes[i] !== magic[i]) {
|
|
148
|
+
matches = false;
|
|
149
|
+
break;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
if (matches) {
|
|
153
|
+
if (mimeType === "image/webp") {
|
|
154
|
+
if (bytes.length >= 12) {
|
|
155
|
+
const webpMarker = bytes[8] === 87 && bytes[9] === 69 && bytes[10] === 66 && bytes[11] === 80;
|
|
156
|
+
if (!webpMarker) continue;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
return mimeType;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
135
162
|
}
|
|
136
|
-
|
|
137
|
-
|
|
163
|
+
return null;
|
|
164
|
+
}
|
|
165
|
+
function detectAudioMimeType(data) {
|
|
166
|
+
const bytes = data instanceof Buffer ? data : Buffer.from(data);
|
|
167
|
+
for (const { bytes: magic, mimeType } of AUDIO_MAGIC_BYTES) {
|
|
168
|
+
if (bytes.length >= magic.length) {
|
|
169
|
+
let matches = true;
|
|
170
|
+
for (let i = 0; i < magic.length; i++) {
|
|
171
|
+
if (bytes[i] !== magic[i]) {
|
|
172
|
+
matches = false;
|
|
173
|
+
break;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
if (matches) {
|
|
177
|
+
if (mimeType === "audio/wav") {
|
|
178
|
+
if (bytes.length >= 12) {
|
|
179
|
+
const waveMarker = bytes[8] === 87 && bytes[9] === 65 && bytes[10] === 86 && bytes[11] === 69;
|
|
180
|
+
if (!waveMarker) continue;
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
return mimeType;
|
|
184
|
+
}
|
|
185
|
+
}
|
|
138
186
|
}
|
|
139
|
-
|
|
140
|
-
|
|
187
|
+
return null;
|
|
188
|
+
}
|
|
189
|
+
function toBase64(data) {
|
|
190
|
+
if (typeof data === "string") {
|
|
191
|
+
return data;
|
|
141
192
|
}
|
|
142
|
-
|
|
143
|
-
|
|
193
|
+
return Buffer.from(data).toString("base64");
|
|
194
|
+
}
|
|
195
|
+
function imageFromBuffer(buffer, mediaType) {
|
|
196
|
+
const detectedType = mediaType ?? detectImageMimeType(buffer);
|
|
197
|
+
if (!detectedType) {
|
|
198
|
+
throw new Error(
|
|
199
|
+
"Could not detect image MIME type. Please provide the mediaType parameter explicitly."
|
|
200
|
+
);
|
|
144
201
|
}
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
if (!options.silent) {
|
|
152
|
-
console.warn(
|
|
153
|
-
`\u26A0\uFE0F Unknown model '${model}', falling back to 'openai:${model}'. This might be a typo. Did you mean: gpt4, gpt5, gpt5-nano, sonnet, haiku, flash? Use { strict: true } to error on unknown models, or { silent: true } to suppress this warning.`
|
|
154
|
-
);
|
|
202
|
+
return {
|
|
203
|
+
type: "image",
|
|
204
|
+
source: {
|
|
205
|
+
type: "base64",
|
|
206
|
+
mediaType: detectedType,
|
|
207
|
+
data: toBase64(buffer)
|
|
155
208
|
}
|
|
156
|
-
}
|
|
157
|
-
return `openai:${model}`;
|
|
209
|
+
};
|
|
158
210
|
}
|
|
159
|
-
function
|
|
160
|
-
return
|
|
211
|
+
function audioFromBase64(data, mediaType) {
|
|
212
|
+
return {
|
|
213
|
+
type: "audio",
|
|
214
|
+
source: { type: "base64", mediaType, data }
|
|
215
|
+
};
|
|
161
216
|
}
|
|
162
|
-
function
|
|
163
|
-
const
|
|
164
|
-
if (
|
|
165
|
-
|
|
217
|
+
function audioFromBuffer(buffer, mediaType) {
|
|
218
|
+
const detectedType = mediaType ?? detectAudioMimeType(buffer);
|
|
219
|
+
if (!detectedType) {
|
|
220
|
+
throw new Error(
|
|
221
|
+
"Could not detect audio MIME type. Please provide the mediaType parameter explicitly."
|
|
222
|
+
);
|
|
166
223
|
}
|
|
167
|
-
return
|
|
224
|
+
return {
|
|
225
|
+
type: "audio",
|
|
226
|
+
source: {
|
|
227
|
+
type: "base64",
|
|
228
|
+
mediaType: detectedType,
|
|
229
|
+
data: toBase64(buffer)
|
|
230
|
+
}
|
|
231
|
+
};
|
|
168
232
|
}
|
|
169
|
-
function
|
|
170
|
-
|
|
171
|
-
if (separatorIndex === -1) {
|
|
172
|
-
return model;
|
|
173
|
-
}
|
|
174
|
-
return model.slice(separatorIndex + 1);
|
|
233
|
+
function isDataUrl(input) {
|
|
234
|
+
return input.startsWith("data:");
|
|
175
235
|
}
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
236
|
+
function parseDataUrl(url) {
|
|
237
|
+
const match = url.match(/^data:([^;]+);base64,(.+)$/);
|
|
238
|
+
if (!match) return null;
|
|
239
|
+
return { mimeType: match[1], data: match[2] };
|
|
240
|
+
}
|
|
241
|
+
var IMAGE_MAGIC_BYTES, AUDIO_MAGIC_BYTES;
|
|
242
|
+
var init_input_content = __esm({
|
|
243
|
+
"src/core/input-content.ts"() {
|
|
179
244
|
"use strict";
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
};
|
|
200
|
-
KNOWN_MODEL_PATTERNS = [
|
|
201
|
-
/^gpt-?\d/i,
|
|
202
|
-
// gpt-4, gpt-3.5, gpt4, etc.
|
|
203
|
-
/^claude-?\d/i,
|
|
204
|
-
// claude-3, claude-2, etc.
|
|
205
|
-
/^gemini-?(\d|pro|flash)/i,
|
|
206
|
-
// gemini-2.0, gemini-pro, gemini-flash, etc.
|
|
207
|
-
/^o\d/i
|
|
208
|
-
// OpenAI o1, o3, etc.
|
|
245
|
+
IMAGE_MAGIC_BYTES = [
|
|
246
|
+
{ bytes: [255, 216, 255], mimeType: "image/jpeg" },
|
|
247
|
+
{ bytes: [137, 80, 78, 71], mimeType: "image/png" },
|
|
248
|
+
{ bytes: [71, 73, 70, 56], mimeType: "image/gif" },
|
|
249
|
+
// WebP starts with RIFF....WEBP
|
|
250
|
+
{ bytes: [82, 73, 70, 70], mimeType: "image/webp" }
|
|
251
|
+
];
|
|
252
|
+
AUDIO_MAGIC_BYTES = [
|
|
253
|
+
// MP3 frame sync
|
|
254
|
+
{ bytes: [255, 251], mimeType: "audio/mp3" },
|
|
255
|
+
{ bytes: [255, 250], mimeType: "audio/mp3" },
|
|
256
|
+
// ID3 tag (MP3)
|
|
257
|
+
{ bytes: [73, 68, 51], mimeType: "audio/mp3" },
|
|
258
|
+
// OGG
|
|
259
|
+
{ bytes: [79, 103, 103, 83], mimeType: "audio/ogg" },
|
|
260
|
+
// WAV (RIFF)
|
|
261
|
+
{ bytes: [82, 73, 70, 70], mimeType: "audio/wav" },
|
|
262
|
+
// WebM
|
|
263
|
+
{ bytes: [26, 69, 223, 163], mimeType: "audio/webm" }
|
|
209
264
|
];
|
|
210
265
|
}
|
|
211
266
|
});
|
|
212
267
|
|
|
213
|
-
// src/
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
try {
|
|
218
|
-
jsonSchema = z.toJSONSchema(schema, { target: "draft-7" });
|
|
219
|
-
} catch (error) {
|
|
220
|
-
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
221
|
-
throw new Error(
|
|
222
|
-
`Gadget "${gadgetName}" has a schema that cannot be serialized to JSON Schema.
|
|
223
|
-
This usually happens with unsupported patterns like:
|
|
224
|
-
- z.record() - use z.object({}).passthrough() instead
|
|
225
|
-
- Complex transforms or custom refinements
|
|
226
|
-
- Circular references
|
|
227
|
-
|
|
228
|
-
Original error: ${errorMessage}
|
|
229
|
-
|
|
230
|
-
Only use schema patterns that Zod v4's native toJSONSchema() supports.`
|
|
231
|
-
);
|
|
232
|
-
}
|
|
233
|
-
const issues = findUnknownTypes(jsonSchema);
|
|
234
|
-
if (issues.length > 0) {
|
|
235
|
-
const fieldList = issues.join(", ");
|
|
236
|
-
throw new Error(
|
|
237
|
-
`Gadget "${gadgetName}" uses z.unknown() which produces incomplete schemas.
|
|
238
|
-
Problematic fields: ${fieldList}
|
|
239
|
-
|
|
240
|
-
z.unknown() doesn't generate type information in JSON Schema, making it unclear
|
|
241
|
-
to the LLM what data structure to provide.
|
|
242
|
-
|
|
243
|
-
Suggestions:
|
|
244
|
-
- Use z.object({}).passthrough() for flexible objects
|
|
245
|
-
- Use z.record(z.string()) for key-value objects with string values
|
|
246
|
-
- Define specific structure if possible
|
|
247
|
-
|
|
248
|
-
Example fixes:
|
|
249
|
-
// \u274C Bad
|
|
250
|
-
content: z.unknown()
|
|
251
|
-
|
|
252
|
-
// \u2705 Good
|
|
253
|
-
content: z.object({}).passthrough() // for flexible objects
|
|
254
|
-
content: z.record(z.string()) // for key-value objects
|
|
255
|
-
content: z.array(z.string()) // for arrays of strings
|
|
256
|
-
`
|
|
257
|
-
);
|
|
258
|
-
}
|
|
268
|
+
// src/core/prompt-config.ts
|
|
269
|
+
function resolvePromptTemplate(template, defaultValue, context) {
|
|
270
|
+
const resolved = template ?? defaultValue;
|
|
271
|
+
return typeof resolved === "function" ? resolved(context) : resolved;
|
|
259
272
|
}
|
|
260
|
-
function
|
|
261
|
-
const
|
|
262
|
-
if (
|
|
263
|
-
return
|
|
264
|
-
}
|
|
265
|
-
if (schema.definitions) {
|
|
266
|
-
for (const defSchema of Object.values(schema.definitions)) {
|
|
267
|
-
issues.push(...findUnknownTypes(defSchema, []));
|
|
268
|
-
}
|
|
269
|
-
}
|
|
270
|
-
if (schema.properties) {
|
|
271
|
-
for (const [propName, propSchema] of Object.entries(schema.properties)) {
|
|
272
|
-
const propPath = [...path, propName];
|
|
273
|
-
if (hasNoType(propSchema)) {
|
|
274
|
-
issues.push(propPath.join(".") || propName);
|
|
275
|
-
}
|
|
276
|
-
issues.push(...findUnknownTypes(propSchema, propPath));
|
|
277
|
-
}
|
|
273
|
+
function resolveRulesTemplate(rules, context) {
|
|
274
|
+
const resolved = rules ?? DEFAULT_PROMPTS.rules;
|
|
275
|
+
if (Array.isArray(resolved)) {
|
|
276
|
+
return resolved;
|
|
278
277
|
}
|
|
279
|
-
if (
|
|
280
|
-
const
|
|
281
|
-
|
|
282
|
-
issues.push(itemPath.join("."));
|
|
283
|
-
}
|
|
284
|
-
issues.push(...findUnknownTypes(schema.items, itemPath));
|
|
278
|
+
if (typeof resolved === "function") {
|
|
279
|
+
const result = resolved(context);
|
|
280
|
+
return Array.isArray(result) ? result : [result];
|
|
285
281
|
}
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
282
|
+
return [resolved];
|
|
283
|
+
}
|
|
284
|
+
function resolveHintTemplate(template, defaultValue, context) {
|
|
285
|
+
const resolved = template ?? defaultValue;
|
|
286
|
+
if (typeof resolved === "function") {
|
|
287
|
+
return resolved(context);
|
|
290
288
|
}
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
289
|
+
return resolved.replace(/\{iteration\}/g, String(context.iteration)).replace(/\{maxIterations\}/g, String(context.maxIterations)).replace(/\{remaining\}/g, String(context.remaining));
|
|
290
|
+
}
|
|
291
|
+
var DEFAULT_HINTS, DEFAULT_PROMPTS;
|
|
292
|
+
var init_prompt_config = __esm({
|
|
293
|
+
"src/core/prompt-config.ts"() {
|
|
294
|
+
"use strict";
|
|
295
|
+
DEFAULT_HINTS = {
|
|
296
|
+
parallelGadgetsHint: "Tip: You can call multiple gadgets in a single response for efficiency.",
|
|
297
|
+
iterationProgressHint: "[Iteration {iteration}/{maxIterations}] Plan your actions accordingly."
|
|
298
|
+
};
|
|
299
|
+
DEFAULT_PROMPTS = {
|
|
300
|
+
mainInstruction: [
|
|
301
|
+
"\u26A0\uFE0F CRITICAL: RESPOND ONLY WITH GADGET INVOCATIONS",
|
|
302
|
+
"DO NOT use function calling or tool calling",
|
|
303
|
+
"You must output the exact text markers shown below in plain text.",
|
|
304
|
+
"EACH MARKER MUST START WITH A NEWLINE."
|
|
305
|
+
].join("\n"),
|
|
306
|
+
criticalUsage: "INVOKE gadgets using the markers - do not describe what you want to do.",
|
|
307
|
+
formatDescription: (ctx) => `Parameters using ${ctx.argPrefix}name markers (value on next line(s), no escaping needed)`,
|
|
308
|
+
rules: () => [
|
|
309
|
+
"Output ONLY plain text with the exact markers - never use function/tool calling",
|
|
310
|
+
"You can invoke multiple gadgets in a single response",
|
|
311
|
+
"Gadgets without dependencies execute immediately (in parallel if multiple)",
|
|
312
|
+
"Use :invocation_id:dep1,dep2 syntax when a gadget needs results from prior gadgets",
|
|
313
|
+
"If any dependency fails, dependent gadgets are automatically skipped"
|
|
314
|
+
],
|
|
315
|
+
customExamples: null
|
|
316
|
+
};
|
|
295
317
|
}
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
318
|
+
});
|
|
319
|
+
|
|
320
|
+
// src/core/messages.ts
|
|
321
|
+
function normalizeContent(content) {
|
|
322
|
+
if (typeof content === "string") {
|
|
323
|
+
return [{ type: "text", text: content }];
|
|
300
324
|
}
|
|
301
|
-
return
|
|
325
|
+
return content;
|
|
302
326
|
}
|
|
303
|
-
function
|
|
304
|
-
if (
|
|
305
|
-
return
|
|
327
|
+
function extractText(content) {
|
|
328
|
+
if (typeof content === "string") {
|
|
329
|
+
return content;
|
|
306
330
|
}
|
|
307
|
-
|
|
308
|
-
const hasRef = prop.$ref !== void 0;
|
|
309
|
-
const hasUnion = prop.anyOf !== void 0 || prop.oneOf !== void 0 || prop.allOf !== void 0;
|
|
310
|
-
if (hasType || hasRef || hasUnion) {
|
|
311
|
-
return false;
|
|
312
|
-
}
|
|
313
|
-
const keys = Object.keys(prop);
|
|
314
|
-
const metadataKeys = ["description", "title", "default", "examples"];
|
|
315
|
-
const hasOnlyMetadata = keys.every((key) => metadataKeys.includes(key));
|
|
316
|
-
return hasOnlyMetadata || keys.length === 0;
|
|
317
|
-
}
|
|
318
|
-
var init_schema_validator = __esm({
|
|
319
|
-
"src/gadgets/schema-validator.ts"() {
|
|
320
|
-
"use strict";
|
|
321
|
-
}
|
|
322
|
-
});
|
|
323
|
-
|
|
324
|
-
// src/gadgets/registry.ts
|
|
325
|
-
var GadgetRegistry;
|
|
326
|
-
var init_registry = __esm({
|
|
327
|
-
"src/gadgets/registry.ts"() {
|
|
328
|
-
"use strict";
|
|
329
|
-
init_schema_validator();
|
|
330
|
-
GadgetRegistry = class _GadgetRegistry {
|
|
331
|
-
gadgets = /* @__PURE__ */ new Map();
|
|
332
|
-
/**
|
|
333
|
-
* Creates a registry from an array of gadget classes or instances,
|
|
334
|
-
* or an object mapping names to gadgets.
|
|
335
|
-
*
|
|
336
|
-
* @param gadgets - Array of gadgets/classes or object with custom names
|
|
337
|
-
* @returns New GadgetRegistry with all gadgets registered
|
|
338
|
-
*
|
|
339
|
-
* @example
|
|
340
|
-
* ```typescript
|
|
341
|
-
* // From array of classes
|
|
342
|
-
* const registry = GadgetRegistry.from([Calculator, Weather]);
|
|
343
|
-
*
|
|
344
|
-
* // From array of instances
|
|
345
|
-
* const registry = GadgetRegistry.from([new Calculator(), new Weather()]);
|
|
346
|
-
*
|
|
347
|
-
* // From object with custom names
|
|
348
|
-
* const registry = GadgetRegistry.from({
|
|
349
|
-
* calc: Calculator,
|
|
350
|
-
* weather: new Weather({ apiKey: "..." })
|
|
351
|
-
* });
|
|
352
|
-
* ```
|
|
353
|
-
*/
|
|
354
|
-
static from(gadgets) {
|
|
355
|
-
const registry = new _GadgetRegistry();
|
|
356
|
-
if (Array.isArray(gadgets)) {
|
|
357
|
-
registry.registerMany(gadgets);
|
|
358
|
-
} else {
|
|
359
|
-
for (const [name, gadget] of Object.entries(gadgets)) {
|
|
360
|
-
const instance = typeof gadget === "function" ? new gadget() : gadget;
|
|
361
|
-
registry.register(name, instance);
|
|
362
|
-
}
|
|
363
|
-
}
|
|
364
|
-
return registry;
|
|
365
|
-
}
|
|
366
|
-
/**
|
|
367
|
-
* Registers multiple gadgets at once from an array.
|
|
368
|
-
*
|
|
369
|
-
* @param gadgets - Array of gadget instances or classes
|
|
370
|
-
* @returns This registry for chaining
|
|
371
|
-
*
|
|
372
|
-
* @example
|
|
373
|
-
* ```typescript
|
|
374
|
-
* registry.registerMany([Calculator, Weather, Email]);
|
|
375
|
-
* registry.registerMany([new Calculator(), new Weather()]);
|
|
376
|
-
* ```
|
|
377
|
-
*/
|
|
378
|
-
registerMany(gadgets) {
|
|
379
|
-
for (const gadget of gadgets) {
|
|
380
|
-
const instance = typeof gadget === "function" ? new gadget() : gadget;
|
|
381
|
-
this.registerByClass(instance);
|
|
382
|
-
}
|
|
383
|
-
return this;
|
|
384
|
-
}
|
|
385
|
-
// Register a gadget by name
|
|
386
|
-
register(name, gadget) {
|
|
387
|
-
const normalizedName = name.toLowerCase();
|
|
388
|
-
if (this.gadgets.has(normalizedName)) {
|
|
389
|
-
throw new Error(`Gadget '${name}' is already registered`);
|
|
390
|
-
}
|
|
391
|
-
if (gadget.parameterSchema) {
|
|
392
|
-
validateGadgetSchema(gadget.parameterSchema, name);
|
|
393
|
-
}
|
|
394
|
-
this.gadgets.set(normalizedName, gadget);
|
|
395
|
-
}
|
|
396
|
-
// Register a gadget using its name property or class name
|
|
397
|
-
registerByClass(gadget) {
|
|
398
|
-
const name = gadget.name ?? gadget.constructor.name;
|
|
399
|
-
this.register(name, gadget);
|
|
400
|
-
}
|
|
401
|
-
// Get gadget by name (case-insensitive)
|
|
402
|
-
get(name) {
|
|
403
|
-
return this.gadgets.get(name.toLowerCase());
|
|
404
|
-
}
|
|
405
|
-
// Check if gadget exists (case-insensitive)
|
|
406
|
-
has(name) {
|
|
407
|
-
return this.gadgets.has(name.toLowerCase());
|
|
408
|
-
}
|
|
409
|
-
// Get all registered gadget names
|
|
410
|
-
getNames() {
|
|
411
|
-
return Array.from(this.gadgets.keys());
|
|
412
|
-
}
|
|
413
|
-
// Get all gadgets for instruction generation
|
|
414
|
-
getAll() {
|
|
415
|
-
return Array.from(this.gadgets.values());
|
|
416
|
-
}
|
|
417
|
-
// Unregister gadget (useful for testing, case-insensitive)
|
|
418
|
-
unregister(name) {
|
|
419
|
-
return this.gadgets.delete(name.toLowerCase());
|
|
420
|
-
}
|
|
421
|
-
// Clear all gadgets (useful for testing)
|
|
422
|
-
clear() {
|
|
423
|
-
this.gadgets.clear();
|
|
424
|
-
}
|
|
425
|
-
};
|
|
426
|
-
}
|
|
427
|
-
});
|
|
428
|
-
|
|
429
|
-
// src/core/prompt-config.ts
|
|
430
|
-
function resolvePromptTemplate(template, defaultValue, context) {
|
|
431
|
-
const resolved = template ?? defaultValue;
|
|
432
|
-
return typeof resolved === "function" ? resolved(context) : resolved;
|
|
433
|
-
}
|
|
434
|
-
function resolveRulesTemplate(rules, context) {
|
|
435
|
-
const resolved = rules ?? DEFAULT_PROMPTS.rules;
|
|
436
|
-
if (Array.isArray(resolved)) {
|
|
437
|
-
return resolved;
|
|
438
|
-
}
|
|
439
|
-
if (typeof resolved === "function") {
|
|
440
|
-
const result = resolved(context);
|
|
441
|
-
return Array.isArray(result) ? result : [result];
|
|
442
|
-
}
|
|
443
|
-
return [resolved];
|
|
331
|
+
return content.filter((part) => part.type === "text").map((part) => part.text).join("");
|
|
444
332
|
}
|
|
445
|
-
function resolveHintTemplate(template, defaultValue, context) {
|
|
446
|
-
const resolved = template ?? defaultValue;
|
|
447
|
-
if (typeof resolved === "function") {
|
|
448
|
-
return resolved(context);
|
|
449
|
-
}
|
|
450
|
-
return resolved.replace(/\{iteration\}/g, String(context.iteration)).replace(/\{maxIterations\}/g, String(context.maxIterations)).replace(/\{remaining\}/g, String(context.remaining));
|
|
451
|
-
}
|
|
452
|
-
var DEFAULT_HINTS, DEFAULT_PROMPTS;
|
|
453
|
-
var init_prompt_config = __esm({
|
|
454
|
-
"src/core/prompt-config.ts"() {
|
|
455
|
-
"use strict";
|
|
456
|
-
DEFAULT_HINTS = {
|
|
457
|
-
parallelGadgetsHint: "Tip: You can call multiple gadgets in a single response for efficiency.",
|
|
458
|
-
iterationProgressHint: "[Iteration {iteration}/{maxIterations}] Plan your actions accordingly."
|
|
459
|
-
};
|
|
460
|
-
DEFAULT_PROMPTS = {
|
|
461
|
-
mainInstruction: [
|
|
462
|
-
"\u26A0\uFE0F CRITICAL: RESPOND ONLY WITH GADGET INVOCATIONS",
|
|
463
|
-
"DO NOT use function calling or tool calling",
|
|
464
|
-
"You must output the exact text markers shown below in plain text.",
|
|
465
|
-
"EACH MARKER MUST START WITH A NEWLINE."
|
|
466
|
-
].join("\n"),
|
|
467
|
-
criticalUsage: "INVOKE gadgets using the markers - do not describe what you want to do.",
|
|
468
|
-
formatDescription: (ctx) => `Parameters using ${ctx.argPrefix}name markers (value on next line(s), no escaping needed)`,
|
|
469
|
-
rules: () => [
|
|
470
|
-
"Output ONLY plain text with the exact markers - never use function/tool calling",
|
|
471
|
-
"You can invoke multiple gadgets in a single response",
|
|
472
|
-
"For dependent gadgets, invoke the first one and wait for the result"
|
|
473
|
-
],
|
|
474
|
-
customExamples: null
|
|
475
|
-
};
|
|
476
|
-
}
|
|
477
|
-
});
|
|
478
|
-
|
|
479
|
-
// src/core/messages.ts
|
|
480
333
|
var LLMMessageBuilder;
|
|
481
334
|
var init_messages = __esm({
|
|
482
335
|
"src/core/messages.ts"() {
|
|
483
336
|
"use strict";
|
|
484
337
|
init_constants();
|
|
338
|
+
init_input_content();
|
|
485
339
|
init_prompt_config();
|
|
486
340
|
LLMMessageBuilder = class {
|
|
487
341
|
messages = [];
|
|
@@ -583,6 +437,10 @@ CRITICAL: ${criticalUsage}
|
|
|
583
437
|
parts.push(`
|
|
584
438
|
1. Start marker: ${this.startPrefix}gadget_name`);
|
|
585
439
|
parts.push(`
|
|
440
|
+
With ID: ${this.startPrefix}gadget_name:my_id`);
|
|
441
|
+
parts.push(`
|
|
442
|
+
With dependencies: ${this.startPrefix}gadget_name:my_id:dep1,dep2`);
|
|
443
|
+
parts.push(`
|
|
586
444
|
2. ${formatDescription}`);
|
|
587
445
|
parts.push(`
|
|
588
446
|
3. End marker: ${this.endPrefix}`);
|
|
@@ -632,6 +490,25 @@ ${this.endPrefix}`;
|
|
|
632
490
|
EXAMPLE (Multiple Gadgets):
|
|
633
491
|
|
|
634
492
|
${multipleExample}`);
|
|
493
|
+
const dependencyExample = `${this.startPrefix}fetch_data:fetch_1
|
|
494
|
+
${this.argPrefix}url
|
|
495
|
+
https://api.example.com/users
|
|
496
|
+
${this.endPrefix}
|
|
497
|
+
${this.startPrefix}fetch_data:fetch_2
|
|
498
|
+
${this.argPrefix}url
|
|
499
|
+
https://api.example.com/orders
|
|
500
|
+
${this.endPrefix}
|
|
501
|
+
${this.startPrefix}merge_data:merge_1:fetch_1,fetch_2
|
|
502
|
+
${this.argPrefix}format
|
|
503
|
+
json
|
|
504
|
+
${this.endPrefix}`;
|
|
505
|
+
parts.push(`
|
|
506
|
+
|
|
507
|
+
EXAMPLE (With Dependencies):
|
|
508
|
+
merge_1 waits for fetch_1 AND fetch_2 to complete.
|
|
509
|
+
If either fails, merge_1 is automatically skipped.
|
|
510
|
+
|
|
511
|
+
${dependencyExample}`);
|
|
635
512
|
parts.push(`
|
|
636
513
|
|
|
637
514
|
BLOCK FORMAT SYNTAX:
|
|
@@ -650,89 +527,519 @@ class Calculator {
|
|
|
650
527
|
}
|
|
651
528
|
}
|
|
652
529
|
|
|
653
|
-
BLOCK FORMAT RULES:
|
|
654
|
-
- Each parameter starts with ${this.argPrefix}parameterName on its own line
|
|
655
|
-
- The value starts on the NEXT line after the marker
|
|
656
|
-
- Value ends when the next ${this.argPrefix} or ${this.endPrefix} appears
|
|
657
|
-
- NO escaping needed - write values exactly as they should appear
|
|
658
|
-
- Perfect for code, JSON, markdown, or any content with special characters
|
|
659
|
-
|
|
660
|
-
NESTED OBJECTS (use / separator):
|
|
661
|
-
${this.argPrefix}config/timeout
|
|
662
|
-
30
|
|
663
|
-
${this.argPrefix}config/retries
|
|
664
|
-
3
|
|
665
|
-
Produces: { "config": { "timeout": "30", "retries": "3" } }
|
|
666
|
-
|
|
667
|
-
ARRAYS (use numeric indices):
|
|
668
|
-
${this.argPrefix}items/0
|
|
669
|
-
first
|
|
670
|
-
${this.argPrefix}items/1
|
|
671
|
-
second
|
|
672
|
-
Produces: { "items": ["first", "second"] }`);
|
|
673
|
-
return parts.join("");
|
|
674
|
-
}
|
|
675
|
-
buildRulesSection(context) {
|
|
676
|
-
const parts = [];
|
|
677
|
-
parts.push("\n\nRULES:");
|
|
678
|
-
const rules = resolveRulesTemplate(this.promptConfig.rules, context);
|
|
679
|
-
for (const rule of rules) {
|
|
680
|
-
parts.push(`
|
|
681
|
-
- ${rule}`);
|
|
530
|
+
BLOCK FORMAT RULES:
|
|
531
|
+
- Each parameter starts with ${this.argPrefix}parameterName on its own line
|
|
532
|
+
- The value starts on the NEXT line after the marker
|
|
533
|
+
- Value ends when the next ${this.argPrefix} or ${this.endPrefix} appears
|
|
534
|
+
- NO escaping needed - write values exactly as they should appear
|
|
535
|
+
- Perfect for code, JSON, markdown, or any content with special characters
|
|
536
|
+
|
|
537
|
+
NESTED OBJECTS (use / separator):
|
|
538
|
+
${this.argPrefix}config/timeout
|
|
539
|
+
30
|
|
540
|
+
${this.argPrefix}config/retries
|
|
541
|
+
3
|
|
542
|
+
Produces: { "config": { "timeout": "30", "retries": "3" } }
|
|
543
|
+
|
|
544
|
+
ARRAYS (use numeric indices):
|
|
545
|
+
${this.argPrefix}items/0
|
|
546
|
+
first
|
|
547
|
+
${this.argPrefix}items/1
|
|
548
|
+
second
|
|
549
|
+
Produces: { "items": ["first", "second"] }`);
|
|
550
|
+
return parts.join("");
|
|
551
|
+
}
|
|
552
|
+
buildRulesSection(context) {
|
|
553
|
+
const parts = [];
|
|
554
|
+
parts.push("\n\nRULES:");
|
|
555
|
+
const rules = resolveRulesTemplate(this.promptConfig.rules, context);
|
|
556
|
+
for (const rule of rules) {
|
|
557
|
+
parts.push(`
|
|
558
|
+
- ${rule}`);
|
|
559
|
+
}
|
|
560
|
+
return parts.join("");
|
|
561
|
+
}
|
|
562
|
+
/**
|
|
563
|
+
* Add a user message.
|
|
564
|
+
* Content can be a string (text only) or an array of content parts (multimodal).
|
|
565
|
+
*
|
|
566
|
+
* @param content - Message content
|
|
567
|
+
* @param metadata - Optional metadata
|
|
568
|
+
*
|
|
569
|
+
* @example
|
|
570
|
+
* ```typescript
|
|
571
|
+
* // Text only
|
|
572
|
+
* builder.addUser("Hello!");
|
|
573
|
+
*
|
|
574
|
+
* // Multimodal
|
|
575
|
+
* builder.addUser([
|
|
576
|
+
* text("What's in this image?"),
|
|
577
|
+
* imageFromBuffer(imageData),
|
|
578
|
+
* ]);
|
|
579
|
+
* ```
|
|
580
|
+
*/
|
|
581
|
+
addUser(content, metadata) {
|
|
582
|
+
this.messages.push({ role: "user", content, metadata });
|
|
583
|
+
return this;
|
|
584
|
+
}
|
|
585
|
+
addAssistant(content, metadata) {
|
|
586
|
+
this.messages.push({ role: "assistant", content, metadata });
|
|
587
|
+
return this;
|
|
588
|
+
}
|
|
589
|
+
/**
|
|
590
|
+
* Add a user message with an image attachment.
|
|
591
|
+
*
|
|
592
|
+
* @param textContent - Text prompt
|
|
593
|
+
* @param imageData - Image data (Buffer, Uint8Array, or base64 string)
|
|
594
|
+
* @param mimeType - Optional MIME type (auto-detected if not provided)
|
|
595
|
+
*
|
|
596
|
+
* @example
|
|
597
|
+
* ```typescript
|
|
598
|
+
* builder.addUserWithImage(
|
|
599
|
+
* "What's in this image?",
|
|
600
|
+
* await fs.readFile("photo.jpg"),
|
|
601
|
+
* "image/jpeg" // Optional - auto-detected
|
|
602
|
+
* );
|
|
603
|
+
* ```
|
|
604
|
+
*/
|
|
605
|
+
addUserWithImage(textContent, imageData, mimeType) {
|
|
606
|
+
const imageBuffer = typeof imageData === "string" ? Buffer.from(imageData, "base64") : imageData;
|
|
607
|
+
const detectedMime = mimeType ?? detectImageMimeType(imageBuffer);
|
|
608
|
+
if (!detectedMime) {
|
|
609
|
+
throw new Error(
|
|
610
|
+
"Could not detect image MIME type. Please provide the mimeType parameter explicitly."
|
|
611
|
+
);
|
|
612
|
+
}
|
|
613
|
+
const content = [
|
|
614
|
+
text(textContent),
|
|
615
|
+
{
|
|
616
|
+
type: "image",
|
|
617
|
+
source: {
|
|
618
|
+
type: "base64",
|
|
619
|
+
mediaType: detectedMime,
|
|
620
|
+
data: toBase64(imageBuffer)
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
];
|
|
624
|
+
this.messages.push({ role: "user", content });
|
|
625
|
+
return this;
|
|
626
|
+
}
|
|
627
|
+
/**
|
|
628
|
+
* Add a user message with an image URL (OpenAI only).
|
|
629
|
+
*
|
|
630
|
+
* @param textContent - Text prompt
|
|
631
|
+
* @param imageUrl - URL to the image
|
|
632
|
+
*
|
|
633
|
+
* @example
|
|
634
|
+
* ```typescript
|
|
635
|
+
* builder.addUserWithImageUrl(
|
|
636
|
+
* "What's in this image?",
|
|
637
|
+
* "https://example.com/image.jpg"
|
|
638
|
+
* );
|
|
639
|
+
* ```
|
|
640
|
+
*/
|
|
641
|
+
addUserWithImageUrl(textContent, imageUrl) {
|
|
642
|
+
const content = [text(textContent), imageFromUrl(imageUrl)];
|
|
643
|
+
this.messages.push({ role: "user", content });
|
|
644
|
+
return this;
|
|
645
|
+
}
|
|
646
|
+
/**
|
|
647
|
+
* Add a user message with an audio attachment (Gemini only).
|
|
648
|
+
*
|
|
649
|
+
* @param textContent - Text prompt
|
|
650
|
+
* @param audioData - Audio data (Buffer, Uint8Array, or base64 string)
|
|
651
|
+
* @param mimeType - Optional MIME type (auto-detected if not provided)
|
|
652
|
+
*
|
|
653
|
+
* @example
|
|
654
|
+
* ```typescript
|
|
655
|
+
* builder.addUserWithAudio(
|
|
656
|
+
* "Transcribe this audio",
|
|
657
|
+
* await fs.readFile("recording.mp3"),
|
|
658
|
+
* "audio/mp3" // Optional - auto-detected
|
|
659
|
+
* );
|
|
660
|
+
* ```
|
|
661
|
+
*/
|
|
662
|
+
addUserWithAudio(textContent, audioData, mimeType) {
|
|
663
|
+
const audioBuffer = typeof audioData === "string" ? Buffer.from(audioData, "base64") : audioData;
|
|
664
|
+
const content = [text(textContent), audioFromBuffer(audioBuffer, mimeType)];
|
|
665
|
+
this.messages.push({ role: "user", content });
|
|
666
|
+
return this;
|
|
667
|
+
}
|
|
668
|
+
/**
|
|
669
|
+
* Add a user message with multiple content parts.
|
|
670
|
+
* Provides full flexibility for complex multimodal messages.
|
|
671
|
+
*
|
|
672
|
+
* @param parts - Array of content parts
|
|
673
|
+
*
|
|
674
|
+
* @example
|
|
675
|
+
* ```typescript
|
|
676
|
+
* builder.addUserMultimodal([
|
|
677
|
+
* text("Compare these images:"),
|
|
678
|
+
* imageFromBuffer(image1),
|
|
679
|
+
* imageFromBuffer(image2),
|
|
680
|
+
* ]);
|
|
681
|
+
* ```
|
|
682
|
+
*/
|
|
683
|
+
addUserMultimodal(parts) {
|
|
684
|
+
this.messages.push({ role: "user", content: parts });
|
|
685
|
+
return this;
|
|
686
|
+
}
|
|
687
|
+
addGadgetCall(gadget, parameters, result) {
|
|
688
|
+
const paramStr = this.formatBlockParameters(parameters, "");
|
|
689
|
+
this.messages.push({
|
|
690
|
+
role: "assistant",
|
|
691
|
+
content: `${this.startPrefix}${gadget}
|
|
692
|
+
${paramStr}
|
|
693
|
+
${this.endPrefix}`
|
|
694
|
+
});
|
|
695
|
+
this.messages.push({
|
|
696
|
+
role: "user",
|
|
697
|
+
content: `Result: ${result}`
|
|
698
|
+
});
|
|
699
|
+
return this;
|
|
700
|
+
}
|
|
701
|
+
/**
|
|
702
|
+
* Format parameters as Block format with JSON Pointer paths.
|
|
703
|
+
* Uses the configured argPrefix for consistency with system prompt.
|
|
704
|
+
*/
|
|
705
|
+
formatBlockParameters(params, prefix) {
|
|
706
|
+
const lines = [];
|
|
707
|
+
for (const [key, value] of Object.entries(params)) {
|
|
708
|
+
const fullPath = prefix ? `${prefix}/${key}` : key;
|
|
709
|
+
if (Array.isArray(value)) {
|
|
710
|
+
value.forEach((item, index) => {
|
|
711
|
+
const itemPath = `${fullPath}/${index}`;
|
|
712
|
+
if (typeof item === "object" && item !== null) {
|
|
713
|
+
lines.push(this.formatBlockParameters(item, itemPath));
|
|
714
|
+
} else {
|
|
715
|
+
lines.push(`${this.argPrefix}${itemPath}`);
|
|
716
|
+
lines.push(String(item));
|
|
717
|
+
}
|
|
718
|
+
});
|
|
719
|
+
} else if (typeof value === "object" && value !== null) {
|
|
720
|
+
lines.push(this.formatBlockParameters(value, fullPath));
|
|
721
|
+
} else {
|
|
722
|
+
lines.push(`${this.argPrefix}${fullPath}`);
|
|
723
|
+
lines.push(String(value));
|
|
724
|
+
}
|
|
725
|
+
}
|
|
726
|
+
return lines.join("\n");
|
|
727
|
+
}
|
|
728
|
+
build() {
|
|
729
|
+
return [...this.messages];
|
|
730
|
+
}
|
|
731
|
+
};
|
|
732
|
+
}
|
|
733
|
+
});
|
|
734
|
+
|
|
735
|
+
// src/core/model-shortcuts.ts
|
|
736
|
+
function isKnownModelPattern(model) {
|
|
737
|
+
const normalized = model.toLowerCase();
|
|
738
|
+
if (MODEL_ALIASES[normalized]) {
|
|
739
|
+
return true;
|
|
740
|
+
}
|
|
741
|
+
return KNOWN_MODEL_PATTERNS.some((pattern) => pattern.test(model));
|
|
742
|
+
}
|
|
743
|
+
function resolveModel(model, options = {}) {
|
|
744
|
+
if (model.includes(":")) {
|
|
745
|
+
return model;
|
|
746
|
+
}
|
|
747
|
+
const normalized = model.toLowerCase();
|
|
748
|
+
if (MODEL_ALIASES[normalized]) {
|
|
749
|
+
return MODEL_ALIASES[normalized];
|
|
750
|
+
}
|
|
751
|
+
const modelLower = model.toLowerCase();
|
|
752
|
+
if (modelLower.startsWith("gpt")) {
|
|
753
|
+
return `openai:${model}`;
|
|
754
|
+
}
|
|
755
|
+
if (modelLower.startsWith("claude")) {
|
|
756
|
+
return `anthropic:${model}`;
|
|
757
|
+
}
|
|
758
|
+
if (modelLower.startsWith("gemini")) {
|
|
759
|
+
return `gemini:${model}`;
|
|
760
|
+
}
|
|
761
|
+
if (modelLower.match(/^o\d/)) {
|
|
762
|
+
return `openai:${model}`;
|
|
763
|
+
}
|
|
764
|
+
if (!isKnownModelPattern(model)) {
|
|
765
|
+
if (options.strict) {
|
|
766
|
+
throw new Error(
|
|
767
|
+
`Unknown model '${model}'. Did you mean one of: gpt4, sonnet, haiku, flash? Use explicit provider prefix like 'openai:${model}' to bypass this check.`
|
|
768
|
+
);
|
|
769
|
+
}
|
|
770
|
+
if (!options.silent) {
|
|
771
|
+
console.warn(
|
|
772
|
+
`\u26A0\uFE0F Unknown model '${model}', falling back to 'openai:${model}'. This might be a typo. Did you mean: gpt4, gpt5, gpt5-nano, sonnet, haiku, flash? Use { strict: true } to error on unknown models, or { silent: true } to suppress this warning.`
|
|
773
|
+
);
|
|
774
|
+
}
|
|
775
|
+
}
|
|
776
|
+
return `openai:${model}`;
|
|
777
|
+
}
|
|
778
|
+
function hasProviderPrefix(model) {
|
|
779
|
+
return model.includes(":");
|
|
780
|
+
}
|
|
781
|
+
function getProvider(model) {
|
|
782
|
+
const separatorIndex = model.indexOf(":");
|
|
783
|
+
if (separatorIndex === -1) {
|
|
784
|
+
return void 0;
|
|
785
|
+
}
|
|
786
|
+
return model.slice(0, separatorIndex);
|
|
787
|
+
}
|
|
788
|
+
function getModelId(model) {
|
|
789
|
+
const separatorIndex = model.indexOf(":");
|
|
790
|
+
if (separatorIndex === -1) {
|
|
791
|
+
return model;
|
|
792
|
+
}
|
|
793
|
+
return model.slice(separatorIndex + 1);
|
|
794
|
+
}
|
|
795
|
+
var MODEL_ALIASES, KNOWN_MODEL_PATTERNS;
|
|
796
|
+
var init_model_shortcuts = __esm({
|
|
797
|
+
"src/core/model-shortcuts.ts"() {
|
|
798
|
+
"use strict";
|
|
799
|
+
MODEL_ALIASES = {
|
|
800
|
+
// OpenAI aliases
|
|
801
|
+
gpt4: "openai:gpt-4o",
|
|
802
|
+
gpt4o: "openai:gpt-4o",
|
|
803
|
+
gpt5: "openai:gpt-5",
|
|
804
|
+
"gpt5-mini": "openai:gpt-5-mini",
|
|
805
|
+
"gpt5-nano": "openai:gpt-5-nano",
|
|
806
|
+
// Anthropic aliases
|
|
807
|
+
sonnet: "anthropic:claude-sonnet-4-5",
|
|
808
|
+
"claude-sonnet": "anthropic:claude-sonnet-4-5",
|
|
809
|
+
haiku: "anthropic:claude-haiku-4-5",
|
|
810
|
+
"claude-haiku": "anthropic:claude-haiku-4-5",
|
|
811
|
+
opus: "anthropic:claude-opus-4-5",
|
|
812
|
+
"claude-opus": "anthropic:claude-opus-4-5",
|
|
813
|
+
// Gemini aliases
|
|
814
|
+
flash: "gemini:gemini-2.0-flash",
|
|
815
|
+
"gemini-flash": "gemini:gemini-2.0-flash",
|
|
816
|
+
"gemini-pro": "gemini:gemini-2.5-pro",
|
|
817
|
+
pro: "gemini:gemini-2.5-pro"
|
|
818
|
+
};
|
|
819
|
+
KNOWN_MODEL_PATTERNS = [
|
|
820
|
+
/^gpt-?\d/i,
|
|
821
|
+
// gpt-4, gpt-3.5, gpt4, etc.
|
|
822
|
+
/^claude-?\d/i,
|
|
823
|
+
// claude-3, claude-2, etc.
|
|
824
|
+
/^gemini-?(\d|pro|flash)/i,
|
|
825
|
+
// gemini-2.0, gemini-pro, gemini-flash, etc.
|
|
826
|
+
/^o\d/i
|
|
827
|
+
// OpenAI o1, o3, etc.
|
|
828
|
+
];
|
|
829
|
+
}
|
|
830
|
+
});
|
|
831
|
+
|
|
832
|
+
// src/gadgets/schema-validator.ts
|
|
833
|
+
import * as z from "zod";
|
|
834
|
+
function validateGadgetSchema(schema, gadgetName) {
|
|
835
|
+
let jsonSchema;
|
|
836
|
+
try {
|
|
837
|
+
jsonSchema = z.toJSONSchema(schema, { target: "draft-7" });
|
|
838
|
+
} catch (error) {
|
|
839
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
840
|
+
throw new Error(
|
|
841
|
+
`Gadget "${gadgetName}" has a schema that cannot be serialized to JSON Schema.
|
|
842
|
+
This usually happens with unsupported patterns like:
|
|
843
|
+
- z.record() - use z.object({}).passthrough() instead
|
|
844
|
+
- Complex transforms or custom refinements
|
|
845
|
+
- Circular references
|
|
846
|
+
|
|
847
|
+
Original error: ${errorMessage}
|
|
848
|
+
|
|
849
|
+
Only use schema patterns that Zod v4's native toJSONSchema() supports.`
|
|
850
|
+
);
|
|
851
|
+
}
|
|
852
|
+
const issues = findUnknownTypes(jsonSchema);
|
|
853
|
+
if (issues.length > 0) {
|
|
854
|
+
const fieldList = issues.join(", ");
|
|
855
|
+
throw new Error(
|
|
856
|
+
`Gadget "${gadgetName}" uses z.unknown() which produces incomplete schemas.
|
|
857
|
+
Problematic fields: ${fieldList}
|
|
858
|
+
|
|
859
|
+
z.unknown() doesn't generate type information in JSON Schema, making it unclear
|
|
860
|
+
to the LLM what data structure to provide.
|
|
861
|
+
|
|
862
|
+
Suggestions:
|
|
863
|
+
- Use z.object({}).passthrough() for flexible objects
|
|
864
|
+
- Use z.record(z.string()) for key-value objects with string values
|
|
865
|
+
- Define specific structure if possible
|
|
866
|
+
|
|
867
|
+
Example fixes:
|
|
868
|
+
// \u274C Bad
|
|
869
|
+
content: z.unknown()
|
|
870
|
+
|
|
871
|
+
// \u2705 Good
|
|
872
|
+
content: z.object({}).passthrough() // for flexible objects
|
|
873
|
+
content: z.record(z.string()) // for key-value objects
|
|
874
|
+
content: z.array(z.string()) // for arrays of strings
|
|
875
|
+
`
|
|
876
|
+
);
|
|
877
|
+
}
|
|
878
|
+
}
|
|
879
|
+
function findUnknownTypes(schema, path = []) {
|
|
880
|
+
const issues = [];
|
|
881
|
+
if (!schema || typeof schema !== "object") {
|
|
882
|
+
return issues;
|
|
883
|
+
}
|
|
884
|
+
if (schema.definitions) {
|
|
885
|
+
for (const defSchema of Object.values(schema.definitions)) {
|
|
886
|
+
issues.push(...findUnknownTypes(defSchema, []));
|
|
887
|
+
}
|
|
888
|
+
}
|
|
889
|
+
if (schema.properties) {
|
|
890
|
+
for (const [propName, propSchema] of Object.entries(schema.properties)) {
|
|
891
|
+
const propPath = [...path, propName];
|
|
892
|
+
if (hasNoType(propSchema)) {
|
|
893
|
+
issues.push(propPath.join(".") || propName);
|
|
894
|
+
}
|
|
895
|
+
issues.push(...findUnknownTypes(propSchema, propPath));
|
|
896
|
+
}
|
|
897
|
+
}
|
|
898
|
+
if (schema.items) {
|
|
899
|
+
const itemPath = [...path, "[]"];
|
|
900
|
+
if (hasNoType(schema.items)) {
|
|
901
|
+
issues.push(itemPath.join("."));
|
|
902
|
+
}
|
|
903
|
+
issues.push(...findUnknownTypes(schema.items, itemPath));
|
|
904
|
+
}
|
|
905
|
+
if (schema.anyOf) {
|
|
906
|
+
schema.anyOf.forEach((subSchema, index) => {
|
|
907
|
+
issues.push(...findUnknownTypes(subSchema, [...path, `anyOf[${index}]`]));
|
|
908
|
+
});
|
|
909
|
+
}
|
|
910
|
+
if (schema.oneOf) {
|
|
911
|
+
schema.oneOf.forEach((subSchema, index) => {
|
|
912
|
+
issues.push(...findUnknownTypes(subSchema, [...path, `oneOf[${index}]`]));
|
|
913
|
+
});
|
|
914
|
+
}
|
|
915
|
+
if (schema.allOf) {
|
|
916
|
+
schema.allOf.forEach((subSchema, index) => {
|
|
917
|
+
issues.push(...findUnknownTypes(subSchema, [...path, `allOf[${index}]`]));
|
|
918
|
+
});
|
|
919
|
+
}
|
|
920
|
+
return issues;
|
|
921
|
+
}
|
|
922
|
+
function hasNoType(prop) {
|
|
923
|
+
if (!prop || typeof prop !== "object") {
|
|
924
|
+
return false;
|
|
925
|
+
}
|
|
926
|
+
const hasType = prop.type !== void 0;
|
|
927
|
+
const hasRef = prop.$ref !== void 0;
|
|
928
|
+
const hasUnion = prop.anyOf !== void 0 || prop.oneOf !== void 0 || prop.allOf !== void 0;
|
|
929
|
+
if (hasType || hasRef || hasUnion) {
|
|
930
|
+
return false;
|
|
931
|
+
}
|
|
932
|
+
const keys = Object.keys(prop);
|
|
933
|
+
const metadataKeys = ["description", "title", "default", "examples"];
|
|
934
|
+
const hasOnlyMetadata = keys.every((key) => metadataKeys.includes(key));
|
|
935
|
+
return hasOnlyMetadata || keys.length === 0;
|
|
936
|
+
}
|
|
937
|
+
var init_schema_validator = __esm({
|
|
938
|
+
"src/gadgets/schema-validator.ts"() {
|
|
939
|
+
"use strict";
|
|
940
|
+
}
|
|
941
|
+
});
|
|
942
|
+
|
|
943
|
+
// src/gadgets/registry.ts
|
|
944
|
+
var GadgetRegistry;
|
|
945
|
+
var init_registry = __esm({
|
|
946
|
+
"src/gadgets/registry.ts"() {
|
|
947
|
+
"use strict";
|
|
948
|
+
init_schema_validator();
|
|
949
|
+
GadgetRegistry = class _GadgetRegistry {
|
|
950
|
+
gadgets = /* @__PURE__ */ new Map();
|
|
951
|
+
/**
|
|
952
|
+
* Creates a registry from an array of gadget classes or instances,
|
|
953
|
+
* or an object mapping names to gadgets.
|
|
954
|
+
*
|
|
955
|
+
* @param gadgets - Array of gadgets/classes or object with custom names
|
|
956
|
+
* @returns New GadgetRegistry with all gadgets registered
|
|
957
|
+
*
|
|
958
|
+
* @example
|
|
959
|
+
* ```typescript
|
|
960
|
+
* // From array of classes
|
|
961
|
+
* const registry = GadgetRegistry.from([Calculator, Weather]);
|
|
962
|
+
*
|
|
963
|
+
* // From array of instances
|
|
964
|
+
* const registry = GadgetRegistry.from([new Calculator(), new Weather()]);
|
|
965
|
+
*
|
|
966
|
+
* // From object with custom names
|
|
967
|
+
* const registry = GadgetRegistry.from({
|
|
968
|
+
* calc: Calculator,
|
|
969
|
+
* weather: new Weather({ apiKey: "..." })
|
|
970
|
+
* });
|
|
971
|
+
* ```
|
|
972
|
+
*/
|
|
973
|
+
static from(gadgets) {
|
|
974
|
+
const registry = new _GadgetRegistry();
|
|
975
|
+
if (Array.isArray(gadgets)) {
|
|
976
|
+
registry.registerMany(gadgets);
|
|
977
|
+
} else {
|
|
978
|
+
for (const [name, gadget] of Object.entries(gadgets)) {
|
|
979
|
+
const instance = typeof gadget === "function" ? new gadget() : gadget;
|
|
980
|
+
registry.register(name, instance);
|
|
981
|
+
}
|
|
682
982
|
}
|
|
683
|
-
return
|
|
983
|
+
return registry;
|
|
684
984
|
}
|
|
685
|
-
|
|
686
|
-
|
|
985
|
+
/**
|
|
986
|
+
* Registers multiple gadgets at once from an array.
|
|
987
|
+
*
|
|
988
|
+
* @param gadgets - Array of gadget instances or classes
|
|
989
|
+
* @returns This registry for chaining
|
|
990
|
+
*
|
|
991
|
+
* @example
|
|
992
|
+
* ```typescript
|
|
993
|
+
* registry.registerMany([Calculator, Weather, Email]);
|
|
994
|
+
* registry.registerMany([new Calculator(), new Weather()]);
|
|
995
|
+
* ```
|
|
996
|
+
*/
|
|
997
|
+
registerMany(gadgets) {
|
|
998
|
+
for (const gadget of gadgets) {
|
|
999
|
+
const instance = typeof gadget === "function" ? new gadget() : gadget;
|
|
1000
|
+
this.registerByClass(instance);
|
|
1001
|
+
}
|
|
687
1002
|
return this;
|
|
688
1003
|
}
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
1004
|
+
// Register a gadget by name
|
|
1005
|
+
register(name, gadget) {
|
|
1006
|
+
const normalizedName = name.toLowerCase();
|
|
1007
|
+
if (this.gadgets.has(normalizedName)) {
|
|
1008
|
+
throw new Error(`Gadget '${name}' is already registered`);
|
|
1009
|
+
}
|
|
1010
|
+
if (gadget.parameterSchema) {
|
|
1011
|
+
validateGadgetSchema(gadget.parameterSchema, name);
|
|
1012
|
+
}
|
|
1013
|
+
this.gadgets.set(normalizedName, gadget);
|
|
692
1014
|
}
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
content: `${this.startPrefix}${gadget}
|
|
698
|
-
${paramStr}
|
|
699
|
-
${this.endPrefix}`
|
|
700
|
-
});
|
|
701
|
-
this.messages.push({
|
|
702
|
-
role: "user",
|
|
703
|
-
content: `Result: ${result}`
|
|
704
|
-
});
|
|
705
|
-
return this;
|
|
1015
|
+
// Register a gadget using its name property or class name
|
|
1016
|
+
registerByClass(gadget) {
|
|
1017
|
+
const name = gadget.name ?? gadget.constructor.name;
|
|
1018
|
+
this.register(name, gadget);
|
|
706
1019
|
}
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
*/
|
|
711
|
-
formatBlockParameters(params, prefix) {
|
|
712
|
-
const lines = [];
|
|
713
|
-
for (const [key, value] of Object.entries(params)) {
|
|
714
|
-
const fullPath = prefix ? `${prefix}/${key}` : key;
|
|
715
|
-
if (Array.isArray(value)) {
|
|
716
|
-
value.forEach((item, index) => {
|
|
717
|
-
const itemPath = `${fullPath}/${index}`;
|
|
718
|
-
if (typeof item === "object" && item !== null) {
|
|
719
|
-
lines.push(this.formatBlockParameters(item, itemPath));
|
|
720
|
-
} else {
|
|
721
|
-
lines.push(`${this.argPrefix}${itemPath}`);
|
|
722
|
-
lines.push(String(item));
|
|
723
|
-
}
|
|
724
|
-
});
|
|
725
|
-
} else if (typeof value === "object" && value !== null) {
|
|
726
|
-
lines.push(this.formatBlockParameters(value, fullPath));
|
|
727
|
-
} else {
|
|
728
|
-
lines.push(`${this.argPrefix}${fullPath}`);
|
|
729
|
-
lines.push(String(value));
|
|
730
|
-
}
|
|
731
|
-
}
|
|
732
|
-
return lines.join("\n");
|
|
1020
|
+
// Get gadget by name (case-insensitive)
|
|
1021
|
+
get(name) {
|
|
1022
|
+
return this.gadgets.get(name.toLowerCase());
|
|
733
1023
|
}
|
|
734
|
-
|
|
735
|
-
|
|
1024
|
+
// Check if gadget exists (case-insensitive)
|
|
1025
|
+
has(name) {
|
|
1026
|
+
return this.gadgets.has(name.toLowerCase());
|
|
1027
|
+
}
|
|
1028
|
+
// Get all registered gadget names
|
|
1029
|
+
getNames() {
|
|
1030
|
+
return Array.from(this.gadgets.keys());
|
|
1031
|
+
}
|
|
1032
|
+
// Get all gadgets for instruction generation
|
|
1033
|
+
getAll() {
|
|
1034
|
+
return Array.from(this.gadgets.values());
|
|
1035
|
+
}
|
|
1036
|
+
// Unregister gadget (useful for testing, case-insensitive)
|
|
1037
|
+
unregister(name) {
|
|
1038
|
+
return this.gadgets.delete(name.toLowerCase());
|
|
1039
|
+
}
|
|
1040
|
+
// Clear all gadgets (useful for testing)
|
|
1041
|
+
clear() {
|
|
1042
|
+
this.gadgets.clear();
|
|
736
1043
|
}
|
|
737
1044
|
};
|
|
738
1045
|
}
|
|
@@ -1928,7 +2235,7 @@ var init_conversation_manager = __esm({
|
|
|
1928
2235
|
if (msg.role === "user") {
|
|
1929
2236
|
this.historyBuilder.addUser(msg.content);
|
|
1930
2237
|
} else if (msg.role === "assistant") {
|
|
1931
|
-
this.historyBuilder.addAssistant(msg.content);
|
|
2238
|
+
this.historyBuilder.addAssistant(extractText(msg.content));
|
|
1932
2239
|
}
|
|
1933
2240
|
}
|
|
1934
2241
|
}
|
|
@@ -1949,8 +2256,10 @@ async function runWithHandlers(agentGenerator, handlers) {
|
|
|
1949
2256
|
if (handlers.onGadgetCall) {
|
|
1950
2257
|
await handlers.onGadgetCall({
|
|
1951
2258
|
gadgetName: event.call.gadgetName,
|
|
2259
|
+
invocationId: event.call.invocationId,
|
|
1952
2260
|
parameters: event.call.parameters,
|
|
1953
|
-
parametersRaw: event.call.parametersRaw
|
|
2261
|
+
parametersRaw: event.call.parametersRaw,
|
|
2262
|
+
dependencies: event.call.dependencies
|
|
1954
2263
|
});
|
|
1955
2264
|
}
|
|
1956
2265
|
break;
|
|
@@ -2542,7 +2851,27 @@ var init_cost_reporting_client = __esm({
|
|
|
2542
2851
|
constructor(client, reportCost) {
|
|
2543
2852
|
this.client = client;
|
|
2544
2853
|
this.reportCost = reportCost;
|
|
2854
|
+
this.image = {
|
|
2855
|
+
generate: async (options) => {
|
|
2856
|
+
const result = await this.client.image.generate(options);
|
|
2857
|
+
if (result.cost !== void 0 && result.cost > 0) {
|
|
2858
|
+
this.reportCost(result.cost);
|
|
2859
|
+
}
|
|
2860
|
+
return result;
|
|
2861
|
+
}
|
|
2862
|
+
};
|
|
2863
|
+
this.speech = {
|
|
2864
|
+
generate: async (options) => {
|
|
2865
|
+
const result = await this.client.speech.generate(options);
|
|
2866
|
+
if (result.cost !== void 0 && result.cost > 0) {
|
|
2867
|
+
this.reportCost(result.cost);
|
|
2868
|
+
}
|
|
2869
|
+
return result;
|
|
2870
|
+
}
|
|
2871
|
+
};
|
|
2545
2872
|
}
|
|
2873
|
+
image;
|
|
2874
|
+
speech;
|
|
2546
2875
|
/**
|
|
2547
2876
|
* Access to model registry for cost estimation.
|
|
2548
2877
|
*/
|
|
@@ -2807,15 +3136,37 @@ var init_parser = __esm({
|
|
|
2807
3136
|
return segment.trim().length > 0 ? segment : void 0;
|
|
2808
3137
|
}
|
|
2809
3138
|
/**
|
|
2810
|
-
* Parse gadget name
|
|
2811
|
-
*
|
|
3139
|
+
* Parse gadget name with optional invocation ID and dependencies.
|
|
3140
|
+
*
|
|
3141
|
+
* Supported formats:
|
|
3142
|
+
* - `GadgetName` - Auto-generate ID, no dependencies
|
|
3143
|
+
* - `GadgetName:my_id` - Explicit ID, no dependencies
|
|
3144
|
+
* - `GadgetName:my_id:dep1,dep2` - Explicit ID with dependencies
|
|
3145
|
+
*
|
|
3146
|
+
* Dependencies must be comma-separated invocation IDs.
|
|
2812
3147
|
*/
|
|
2813
3148
|
parseGadgetName(gadgetName) {
|
|
2814
|
-
|
|
2815
|
-
|
|
2816
|
-
return {
|
|
3149
|
+
const parts = gadgetName.split(":");
|
|
3150
|
+
if (parts.length === 1) {
|
|
3151
|
+
return {
|
|
3152
|
+
actualName: parts[0],
|
|
3153
|
+
invocationId: `gadget_${++globalInvocationCounter}`,
|
|
3154
|
+
dependencies: []
|
|
3155
|
+
};
|
|
3156
|
+
} else if (parts.length === 2) {
|
|
3157
|
+
return {
|
|
3158
|
+
actualName: parts[0],
|
|
3159
|
+
invocationId: parts[1].trim(),
|
|
3160
|
+
dependencies: []
|
|
3161
|
+
};
|
|
3162
|
+
} else {
|
|
3163
|
+
const deps = parts[2].split(",").map((d) => d.trim()).filter((d) => d.length > 0);
|
|
3164
|
+
return {
|
|
3165
|
+
actualName: parts[0],
|
|
3166
|
+
invocationId: parts[1].trim(),
|
|
3167
|
+
dependencies: deps
|
|
3168
|
+
};
|
|
2817
3169
|
}
|
|
2818
|
-
return { actualName: gadgetName, invocationId: `gadget_${++globalInvocationCounter}` };
|
|
2819
3170
|
}
|
|
2820
3171
|
/**
|
|
2821
3172
|
* Extract the error message from a parse error.
|
|
@@ -2851,39 +3202,20 @@ var init_parser = __esm({
|
|
|
2851
3202
|
const metadataEndIndex = this.buffer.indexOf("\n", metadataStartIndex);
|
|
2852
3203
|
if (metadataEndIndex === -1) break;
|
|
2853
3204
|
const gadgetName = this.buffer.substring(metadataStartIndex, metadataEndIndex).trim();
|
|
2854
|
-
const { actualName: actualGadgetName, invocationId } = this.parseGadgetName(gadgetName);
|
|
3205
|
+
const { actualName: actualGadgetName, invocationId, dependencies } = this.parseGadgetName(gadgetName);
|
|
2855
3206
|
const contentStartIndex = metadataEndIndex + 1;
|
|
2856
3207
|
let partEndIndex;
|
|
2857
3208
|
let endMarkerLength = 0;
|
|
2858
|
-
|
|
2859
|
-
|
|
2860
|
-
|
|
2861
|
-
|
|
2862
|
-
endMarkerLength =
|
|
3209
|
+
const nextStartPos = this.buffer.indexOf(this.startPrefix, contentStartIndex);
|
|
3210
|
+
const endPos = this.buffer.indexOf(this.endPrefix, contentStartIndex);
|
|
3211
|
+
if (nextStartPos !== -1 && (endPos === -1 || nextStartPos < endPos)) {
|
|
3212
|
+
partEndIndex = nextStartPos;
|
|
3213
|
+
endMarkerLength = 0;
|
|
3214
|
+
} else if (endPos !== -1) {
|
|
3215
|
+
partEndIndex = endPos;
|
|
3216
|
+
endMarkerLength = this.endPrefix.length;
|
|
2863
3217
|
} else {
|
|
2864
|
-
|
|
2865
|
-
let validEndPos = -1;
|
|
2866
|
-
let searchPos = contentStartIndex;
|
|
2867
|
-
while (true) {
|
|
2868
|
-
const endPos = this.buffer.indexOf(this.endPrefix, searchPos);
|
|
2869
|
-
if (endPos === -1) break;
|
|
2870
|
-
const afterEnd = this.buffer.substring(endPos + this.endPrefix.length);
|
|
2871
|
-
if (afterEnd.startsWith("\n") || afterEnd.startsWith("\r") || afterEnd.startsWith(this.startPrefix) || afterEnd.length === 0) {
|
|
2872
|
-
validEndPos = endPos;
|
|
2873
|
-
break;
|
|
2874
|
-
} else {
|
|
2875
|
-
searchPos = endPos + this.endPrefix.length;
|
|
2876
|
-
}
|
|
2877
|
-
}
|
|
2878
|
-
if (nextStartPos !== -1 && (validEndPos === -1 || nextStartPos < validEndPos)) {
|
|
2879
|
-
partEndIndex = nextStartPos;
|
|
2880
|
-
endMarkerLength = 0;
|
|
2881
|
-
} else if (validEndPos !== -1) {
|
|
2882
|
-
partEndIndex = validEndPos;
|
|
2883
|
-
endMarkerLength = this.endPrefix.length;
|
|
2884
|
-
} else {
|
|
2885
|
-
break;
|
|
2886
|
-
}
|
|
3218
|
+
break;
|
|
2887
3219
|
}
|
|
2888
3220
|
const parametersRaw = this.buffer.substring(contentStartIndex, partEndIndex).trim();
|
|
2889
3221
|
const { parameters, parseError } = this.parseParameters(parametersRaw);
|
|
@@ -2894,7 +3226,8 @@ var init_parser = __esm({
|
|
|
2894
3226
|
invocationId,
|
|
2895
3227
|
parametersRaw,
|
|
2896
3228
|
parameters,
|
|
2897
|
-
parseError
|
|
3229
|
+
parseError,
|
|
3230
|
+
dependencies
|
|
2898
3231
|
}
|
|
2899
3232
|
};
|
|
2900
3233
|
startIndex = partEndIndex + endMarkerLength;
|
|
@@ -2917,7 +3250,7 @@ var init_parser = __esm({
|
|
|
2917
3250
|
const metadataEndIndex = this.buffer.indexOf("\n", metadataStartIndex);
|
|
2918
3251
|
if (metadataEndIndex !== -1) {
|
|
2919
3252
|
const gadgetName = this.buffer.substring(metadataStartIndex, metadataEndIndex).trim();
|
|
2920
|
-
const { actualName: actualGadgetName, invocationId } = this.parseGadgetName(gadgetName);
|
|
3253
|
+
const { actualName: actualGadgetName, invocationId, dependencies } = this.parseGadgetName(gadgetName);
|
|
2921
3254
|
const contentStartIndex = metadataEndIndex + 1;
|
|
2922
3255
|
const parametersRaw = this.buffer.substring(contentStartIndex).trim();
|
|
2923
3256
|
const { parameters, parseError } = this.parseParameters(parametersRaw);
|
|
@@ -2928,7 +3261,8 @@ var init_parser = __esm({
|
|
|
2928
3261
|
invocationId,
|
|
2929
3262
|
parametersRaw,
|
|
2930
3263
|
parameters,
|
|
2931
|
-
parseError
|
|
3264
|
+
parseError,
|
|
3265
|
+
dependencies
|
|
2932
3266
|
}
|
|
2933
3267
|
};
|
|
2934
3268
|
return;
|
|
@@ -3298,6 +3632,13 @@ var init_stream_processor = __esm({
|
|
|
3298
3632
|
accumulatedText = "";
|
|
3299
3633
|
shouldStopExecution = false;
|
|
3300
3634
|
observerFailureCount = 0;
|
|
3635
|
+
// Dependency tracking for gadget execution DAG
|
|
3636
|
+
/** Gadgets waiting for their dependencies to complete */
|
|
3637
|
+
pendingGadgets = /* @__PURE__ */ new Map();
|
|
3638
|
+
/** Completed gadget results, keyed by invocation ID */
|
|
3639
|
+
completedResults = /* @__PURE__ */ new Map();
|
|
3640
|
+
/** Invocation IDs of gadgets that have failed (error or skipped due to dependency) */
|
|
3641
|
+
failedInvocations = /* @__PURE__ */ new Set();
|
|
3301
3642
|
constructor(options) {
|
|
3302
3643
|
this.iteration = options.iteration;
|
|
3303
3644
|
this.registry = options.registry;
|
|
@@ -3398,6 +3739,16 @@ var init_stream_processor = __esm({
|
|
|
3398
3739
|
}
|
|
3399
3740
|
}
|
|
3400
3741
|
}
|
|
3742
|
+
const finalPendingEvents = await this.processPendingGadgets();
|
|
3743
|
+
outputs.push(...finalPendingEvents);
|
|
3744
|
+
if (finalPendingEvents.some((e) => e.type === "gadget_result")) {
|
|
3745
|
+
didExecuteGadgets = true;
|
|
3746
|
+
}
|
|
3747
|
+
for (const evt of finalPendingEvents) {
|
|
3748
|
+
if (evt.type === "gadget_result" && evt.result.breaksLoop) {
|
|
3749
|
+
shouldBreakLoop = true;
|
|
3750
|
+
}
|
|
3751
|
+
}
|
|
3401
3752
|
}
|
|
3402
3753
|
let finalMessage = this.accumulatedText;
|
|
3403
3754
|
if (this.hooks.interceptors?.interceptAssistantMessage) {
|
|
@@ -3449,7 +3800,11 @@ var init_stream_processor = __esm({
|
|
|
3449
3800
|
return [{ type: "text", content }];
|
|
3450
3801
|
}
|
|
3451
3802
|
/**
|
|
3452
|
-
* Process a gadget call through the full lifecycle.
|
|
3803
|
+
* Process a gadget call through the full lifecycle, handling dependencies.
|
|
3804
|
+
*
|
|
3805
|
+
* Gadgets without dependencies (or with all dependencies satisfied) execute immediately.
|
|
3806
|
+
* Gadgets with unsatisfied dependencies are queued for later execution.
|
|
3807
|
+
* After each execution, pending gadgets are checked to see if they can now run.
|
|
3453
3808
|
*/
|
|
3454
3809
|
async processGadgetCall(call) {
|
|
3455
3810
|
if (this.shouldStopExecution) {
|
|
@@ -3459,7 +3814,54 @@ var init_stream_processor = __esm({
|
|
|
3459
3814
|
return [];
|
|
3460
3815
|
}
|
|
3461
3816
|
const events = [];
|
|
3462
|
-
events.push({ type: "gadget_call", call });
|
|
3817
|
+
events.push({ type: "gadget_call", call });
|
|
3818
|
+
if (call.dependencies.length > 0) {
|
|
3819
|
+
if (call.dependencies.includes(call.invocationId)) {
|
|
3820
|
+
this.logger.warn("Gadget has self-referential dependency (depends on itself)", {
|
|
3821
|
+
gadgetName: call.gadgetName,
|
|
3822
|
+
invocationId: call.invocationId
|
|
3823
|
+
});
|
|
3824
|
+
this.failedInvocations.add(call.invocationId);
|
|
3825
|
+
const skipEvent = {
|
|
3826
|
+
type: "gadget_skipped",
|
|
3827
|
+
gadgetName: call.gadgetName,
|
|
3828
|
+
invocationId: call.invocationId,
|
|
3829
|
+
parameters: call.parameters ?? {},
|
|
3830
|
+
failedDependency: call.invocationId,
|
|
3831
|
+
failedDependencyError: `Gadget "${call.invocationId}" cannot depend on itself (self-referential dependency)`
|
|
3832
|
+
};
|
|
3833
|
+
events.push(skipEvent);
|
|
3834
|
+
return events;
|
|
3835
|
+
}
|
|
3836
|
+
const failedDep = call.dependencies.find((dep) => this.failedInvocations.has(dep));
|
|
3837
|
+
if (failedDep) {
|
|
3838
|
+
const skipEvents = await this.handleFailedDependency(call, failedDep);
|
|
3839
|
+
events.push(...skipEvents);
|
|
3840
|
+
return events;
|
|
3841
|
+
}
|
|
3842
|
+
const unsatisfied = call.dependencies.filter((dep) => !this.completedResults.has(dep));
|
|
3843
|
+
if (unsatisfied.length > 0) {
|
|
3844
|
+
this.logger.debug("Queueing gadget for later - waiting on dependencies", {
|
|
3845
|
+
gadgetName: call.gadgetName,
|
|
3846
|
+
invocationId: call.invocationId,
|
|
3847
|
+
waitingOn: unsatisfied
|
|
3848
|
+
});
|
|
3849
|
+
this.pendingGadgets.set(call.invocationId, call);
|
|
3850
|
+
return events;
|
|
3851
|
+
}
|
|
3852
|
+
}
|
|
3853
|
+
const executeEvents = await this.executeGadgetWithHooks(call);
|
|
3854
|
+
events.push(...executeEvents);
|
|
3855
|
+
const triggeredEvents = await this.processPendingGadgets();
|
|
3856
|
+
events.push(...triggeredEvents);
|
|
3857
|
+
return events;
|
|
3858
|
+
}
|
|
3859
|
+
/**
|
|
3860
|
+
* Execute a gadget through the full hook lifecycle.
|
|
3861
|
+
* This is the core execution logic, extracted from processGadgetCall.
|
|
3862
|
+
*/
|
|
3863
|
+
async executeGadgetWithHooks(call) {
|
|
3864
|
+
const events = [];
|
|
3463
3865
|
if (call.parseError) {
|
|
3464
3866
|
this.logger.warn("Gadget has parse error", {
|
|
3465
3867
|
gadgetName: call.gadgetName,
|
|
@@ -3590,6 +3992,10 @@ var init_stream_processor = __esm({
|
|
|
3590
3992
|
});
|
|
3591
3993
|
}
|
|
3592
3994
|
await this.runObserversInParallel(completeObservers);
|
|
3995
|
+
this.completedResults.set(result.invocationId, result);
|
|
3996
|
+
if (result.error) {
|
|
3997
|
+
this.failedInvocations.add(result.invocationId);
|
|
3998
|
+
}
|
|
3593
3999
|
events.push({ type: "gadget_result", result });
|
|
3594
4000
|
if (result.error) {
|
|
3595
4001
|
const errorType = this.determineErrorType(call, result);
|
|
@@ -3605,6 +4011,162 @@ var init_stream_processor = __esm({
|
|
|
3605
4011
|
}
|
|
3606
4012
|
return events;
|
|
3607
4013
|
}
|
|
4014
|
+
/**
|
|
4015
|
+
* Handle a gadget that cannot execute because a dependency failed.
|
|
4016
|
+
* Calls the onDependencySkipped controller to allow customization.
|
|
4017
|
+
*/
|
|
4018
|
+
async handleFailedDependency(call, failedDep) {
|
|
4019
|
+
const events = [];
|
|
4020
|
+
const depResult = this.completedResults.get(failedDep);
|
|
4021
|
+
const depError = depResult?.error ?? "Dependency failed";
|
|
4022
|
+
let action = { action: "skip" };
|
|
4023
|
+
if (this.hooks.controllers?.onDependencySkipped) {
|
|
4024
|
+
const context = {
|
|
4025
|
+
iteration: this.iteration,
|
|
4026
|
+
gadgetName: call.gadgetName,
|
|
4027
|
+
invocationId: call.invocationId,
|
|
4028
|
+
parameters: call.parameters ?? {},
|
|
4029
|
+
failedDependency: failedDep,
|
|
4030
|
+
failedDependencyError: depError,
|
|
4031
|
+
logger: this.logger
|
|
4032
|
+
};
|
|
4033
|
+
action = await this.hooks.controllers.onDependencySkipped(context);
|
|
4034
|
+
}
|
|
4035
|
+
if (action.action === "skip") {
|
|
4036
|
+
this.failedInvocations.add(call.invocationId);
|
|
4037
|
+
const skipEvent = {
|
|
4038
|
+
type: "gadget_skipped",
|
|
4039
|
+
gadgetName: call.gadgetName,
|
|
4040
|
+
invocationId: call.invocationId,
|
|
4041
|
+
parameters: call.parameters ?? {},
|
|
4042
|
+
failedDependency: failedDep,
|
|
4043
|
+
failedDependencyError: depError
|
|
4044
|
+
};
|
|
4045
|
+
events.push(skipEvent);
|
|
4046
|
+
if (this.hooks.observers?.onGadgetSkipped) {
|
|
4047
|
+
const observeContext = {
|
|
4048
|
+
iteration: this.iteration,
|
|
4049
|
+
gadgetName: call.gadgetName,
|
|
4050
|
+
invocationId: call.invocationId,
|
|
4051
|
+
parameters: call.parameters ?? {},
|
|
4052
|
+
failedDependency: failedDep,
|
|
4053
|
+
failedDependencyError: depError,
|
|
4054
|
+
logger: this.logger
|
|
4055
|
+
};
|
|
4056
|
+
await this.safeObserve(() => this.hooks.observers.onGadgetSkipped(observeContext));
|
|
4057
|
+
}
|
|
4058
|
+
this.logger.info("Gadget skipped due to failed dependency", {
|
|
4059
|
+
gadgetName: call.gadgetName,
|
|
4060
|
+
invocationId: call.invocationId,
|
|
4061
|
+
failedDependency: failedDep
|
|
4062
|
+
});
|
|
4063
|
+
} else if (action.action === "execute_anyway") {
|
|
4064
|
+
this.logger.info("Executing gadget despite failed dependency (controller override)", {
|
|
4065
|
+
gadgetName: call.gadgetName,
|
|
4066
|
+
invocationId: call.invocationId,
|
|
4067
|
+
failedDependency: failedDep
|
|
4068
|
+
});
|
|
4069
|
+
const executeEvents = await this.executeGadgetWithHooks(call);
|
|
4070
|
+
events.push(...executeEvents);
|
|
4071
|
+
} else if (action.action === "use_fallback") {
|
|
4072
|
+
const fallbackResult = {
|
|
4073
|
+
gadgetName: call.gadgetName,
|
|
4074
|
+
invocationId: call.invocationId,
|
|
4075
|
+
parameters: call.parameters ?? {},
|
|
4076
|
+
result: action.fallbackResult,
|
|
4077
|
+
executionTimeMs: 0
|
|
4078
|
+
};
|
|
4079
|
+
this.completedResults.set(call.invocationId, fallbackResult);
|
|
4080
|
+
events.push({ type: "gadget_result", result: fallbackResult });
|
|
4081
|
+
this.logger.info("Using fallback result for gadget with failed dependency", {
|
|
4082
|
+
gadgetName: call.gadgetName,
|
|
4083
|
+
invocationId: call.invocationId,
|
|
4084
|
+
failedDependency: failedDep
|
|
4085
|
+
});
|
|
4086
|
+
}
|
|
4087
|
+
return events;
|
|
4088
|
+
}
|
|
4089
|
+
/**
|
|
4090
|
+
* Process pending gadgets whose dependencies are now satisfied.
|
|
4091
|
+
* Executes ready gadgets in parallel and continues until no more can be triggered.
|
|
4092
|
+
*/
|
|
4093
|
+
async processPendingGadgets() {
|
|
4094
|
+
const events = [];
|
|
4095
|
+
let progress = true;
|
|
4096
|
+
while (progress && this.pendingGadgets.size > 0) {
|
|
4097
|
+
progress = false;
|
|
4098
|
+
const readyToExecute = [];
|
|
4099
|
+
const readyToSkip = [];
|
|
4100
|
+
for (const [invocationId, call] of this.pendingGadgets) {
|
|
4101
|
+
const failedDep = call.dependencies.find((dep) => this.failedInvocations.has(dep));
|
|
4102
|
+
if (failedDep) {
|
|
4103
|
+
readyToSkip.push({ call, failedDep });
|
|
4104
|
+
continue;
|
|
4105
|
+
}
|
|
4106
|
+
const allSatisfied = call.dependencies.every((dep) => this.completedResults.has(dep));
|
|
4107
|
+
if (allSatisfied) {
|
|
4108
|
+
readyToExecute.push(call);
|
|
4109
|
+
}
|
|
4110
|
+
}
|
|
4111
|
+
for (const { call, failedDep } of readyToSkip) {
|
|
4112
|
+
this.pendingGadgets.delete(call.invocationId);
|
|
4113
|
+
const skipEvents = await this.handleFailedDependency(call, failedDep);
|
|
4114
|
+
events.push(...skipEvents);
|
|
4115
|
+
progress = true;
|
|
4116
|
+
}
|
|
4117
|
+
if (readyToExecute.length > 0) {
|
|
4118
|
+
this.logger.debug("Executing ready gadgets in parallel", {
|
|
4119
|
+
count: readyToExecute.length,
|
|
4120
|
+
invocationIds: readyToExecute.map((c) => c.invocationId)
|
|
4121
|
+
});
|
|
4122
|
+
for (const call of readyToExecute) {
|
|
4123
|
+
this.pendingGadgets.delete(call.invocationId);
|
|
4124
|
+
}
|
|
4125
|
+
const executePromises = readyToExecute.map((call) => this.executeGadgetWithHooks(call));
|
|
4126
|
+
const results = await Promise.all(executePromises);
|
|
4127
|
+
for (const executeEvents of results) {
|
|
4128
|
+
events.push(...executeEvents);
|
|
4129
|
+
}
|
|
4130
|
+
progress = true;
|
|
4131
|
+
}
|
|
4132
|
+
}
|
|
4133
|
+
if (this.pendingGadgets.size > 0) {
|
|
4134
|
+
const pendingIds = new Set(this.pendingGadgets.keys());
|
|
4135
|
+
for (const [invocationId, call] of this.pendingGadgets) {
|
|
4136
|
+
const missingDeps = call.dependencies.filter((dep) => !this.completedResults.has(dep));
|
|
4137
|
+
const circularDeps = missingDeps.filter((dep) => pendingIds.has(dep));
|
|
4138
|
+
const trulyMissingDeps = missingDeps.filter((dep) => !pendingIds.has(dep));
|
|
4139
|
+
let errorMessage;
|
|
4140
|
+
let logLevel = "warn";
|
|
4141
|
+
if (circularDeps.length > 0 && trulyMissingDeps.length > 0) {
|
|
4142
|
+
errorMessage = `Dependencies unresolvable: circular=[${circularDeps.join(", ")}], missing=[${trulyMissingDeps.join(", ")}]`;
|
|
4143
|
+
logLevel = "error";
|
|
4144
|
+
} else if (circularDeps.length > 0) {
|
|
4145
|
+
errorMessage = `Circular dependency detected: "${invocationId}" depends on "${circularDeps[0]}" which also depends on "${invocationId}" (directly or indirectly)`;
|
|
4146
|
+
} else {
|
|
4147
|
+
errorMessage = `Dependency "${missingDeps[0]}" was never executed - check that the invocation ID exists and is spelled correctly`;
|
|
4148
|
+
}
|
|
4149
|
+
this.logger[logLevel]("Gadget has unresolvable dependencies", {
|
|
4150
|
+
gadgetName: call.gadgetName,
|
|
4151
|
+
invocationId,
|
|
4152
|
+
circularDependencies: circularDeps,
|
|
4153
|
+
missingDependencies: trulyMissingDeps
|
|
4154
|
+
});
|
|
4155
|
+
this.failedInvocations.add(invocationId);
|
|
4156
|
+
const skipEvent = {
|
|
4157
|
+
type: "gadget_skipped",
|
|
4158
|
+
gadgetName: call.gadgetName,
|
|
4159
|
+
invocationId,
|
|
4160
|
+
parameters: call.parameters ?? {},
|
|
4161
|
+
failedDependency: missingDeps[0],
|
|
4162
|
+
failedDependencyError: errorMessage
|
|
4163
|
+
};
|
|
4164
|
+
events.push(skipEvent);
|
|
4165
|
+
}
|
|
4166
|
+
this.pendingGadgets.clear();
|
|
4167
|
+
}
|
|
4168
|
+
return events;
|
|
4169
|
+
}
|
|
3608
4170
|
/**
|
|
3609
4171
|
* Safely execute an observer, catching and logging any errors.
|
|
3610
4172
|
* Observers are non-critical, so errors are logged but don't crash the system.
|
|
@@ -4042,9 +4604,9 @@ var init_agent = __esm({
|
|
|
4042
4604
|
if (msg.role === "user") {
|
|
4043
4605
|
this.conversation.addUserMessage(msg.content);
|
|
4044
4606
|
} else if (msg.role === "assistant") {
|
|
4045
|
-
this.conversation.addAssistantMessage(msg.content);
|
|
4607
|
+
this.conversation.addAssistantMessage(extractText(msg.content));
|
|
4046
4608
|
} else if (msg.role === "system") {
|
|
4047
|
-
this.conversation.addUserMessage(`[System] ${msg.content}`);
|
|
4609
|
+
this.conversation.addUserMessage(`[System] ${extractText(msg.content)}`);
|
|
4048
4610
|
}
|
|
4049
4611
|
}
|
|
4050
4612
|
}
|
|
@@ -4264,6 +4826,7 @@ var init_builder = __esm({
|
|
|
4264
4826
|
"src/agent/builder.ts"() {
|
|
4265
4827
|
"use strict";
|
|
4266
4828
|
init_constants();
|
|
4829
|
+
init_input_content();
|
|
4267
4830
|
init_model_shortcuts();
|
|
4268
4831
|
init_registry();
|
|
4269
4832
|
init_agent();
|
|
@@ -4911,13 +5474,17 @@ ${endPrefix}`
|
|
|
4911
5474
|
* }
|
|
4912
5475
|
* ```
|
|
4913
5476
|
*/
|
|
4914
|
-
|
|
5477
|
+
/**
|
|
5478
|
+
* Build AgentOptions with the given user prompt.
|
|
5479
|
+
* Centralizes options construction for ask(), askWithImage(), and askWithContent().
|
|
5480
|
+
*/
|
|
5481
|
+
buildAgentOptions(userPrompt) {
|
|
4915
5482
|
if (!this.client) {
|
|
4916
5483
|
const { LLMist: LLMistClass } = (init_client(), __toCommonJS(client_exports));
|
|
4917
5484
|
this.client = new LLMistClass();
|
|
4918
5485
|
}
|
|
4919
5486
|
const registry = GadgetRegistry.from(this.gadgets);
|
|
4920
|
-
|
|
5487
|
+
return {
|
|
4921
5488
|
client: this.client,
|
|
4922
5489
|
model: this.model ?? "openai:gpt-5-nano",
|
|
4923
5490
|
systemPrompt: this.systemPrompt,
|
|
@@ -4943,6 +5510,83 @@ ${endPrefix}`
|
|
|
4943
5510
|
compactionConfig: this.compactionConfig,
|
|
4944
5511
|
signal: this.signal
|
|
4945
5512
|
};
|
|
5513
|
+
}
|
|
5514
|
+
ask(userPrompt) {
|
|
5515
|
+
const options = this.buildAgentOptions(userPrompt);
|
|
5516
|
+
return new Agent(AGENT_INTERNAL_KEY, options);
|
|
5517
|
+
}
|
|
5518
|
+
/**
|
|
5519
|
+
* Build and create the agent with a multimodal user prompt (text + image).
|
|
5520
|
+
* Returns the Agent instance ready to run.
|
|
5521
|
+
*
|
|
5522
|
+
* @param textPrompt - Text prompt describing what to do with the image
|
|
5523
|
+
* @param imageData - Image data (Buffer, Uint8Array, or base64 string)
|
|
5524
|
+
* @param mimeType - Optional MIME type (auto-detected if not provided)
|
|
5525
|
+
* @returns Configured Agent instance
|
|
5526
|
+
*
|
|
5527
|
+
* @example
|
|
5528
|
+
* ```typescript
|
|
5529
|
+
* const agent = LLMist.createAgent()
|
|
5530
|
+
* .withModel("gpt-4o")
|
|
5531
|
+
* .withSystem("You analyze images")
|
|
5532
|
+
* .askWithImage(
|
|
5533
|
+
* "What's in this image?",
|
|
5534
|
+
* await fs.readFile("photo.jpg")
|
|
5535
|
+
* );
|
|
5536
|
+
*
|
|
5537
|
+
* for await (const event of agent.run()) {
|
|
5538
|
+
* // handle events
|
|
5539
|
+
* }
|
|
5540
|
+
* ```
|
|
5541
|
+
*/
|
|
5542
|
+
askWithImage(textPrompt, imageData, mimeType) {
|
|
5543
|
+
const imageBuffer = typeof imageData === "string" ? Buffer.from(imageData, "base64") : imageData;
|
|
5544
|
+
const detectedMime = mimeType ?? detectImageMimeType(imageBuffer);
|
|
5545
|
+
if (!detectedMime) {
|
|
5546
|
+
throw new Error(
|
|
5547
|
+
"Could not detect image MIME type. Please provide the mimeType parameter explicitly."
|
|
5548
|
+
);
|
|
5549
|
+
}
|
|
5550
|
+
const userContent = [
|
|
5551
|
+
text(textPrompt),
|
|
5552
|
+
{
|
|
5553
|
+
type: "image",
|
|
5554
|
+
source: {
|
|
5555
|
+
type: "base64",
|
|
5556
|
+
mediaType: detectedMime,
|
|
5557
|
+
data: toBase64(imageBuffer)
|
|
5558
|
+
}
|
|
5559
|
+
}
|
|
5560
|
+
];
|
|
5561
|
+
const options = this.buildAgentOptions(userContent);
|
|
5562
|
+
return new Agent(AGENT_INTERNAL_KEY, options);
|
|
5563
|
+
}
|
|
5564
|
+
/**
|
|
5565
|
+
* Build and return an Agent configured with multimodal content.
|
|
5566
|
+
* More flexible than askWithImage - accepts any combination of content parts.
|
|
5567
|
+
*
|
|
5568
|
+
* @param content - Array of content parts (text, images, audio)
|
|
5569
|
+
* @returns A configured Agent ready for execution
|
|
5570
|
+
*
|
|
5571
|
+
* @example
|
|
5572
|
+
* ```typescript
|
|
5573
|
+
* import { text, imageFromBuffer, audioFromBuffer } from "llmist";
|
|
5574
|
+
*
|
|
5575
|
+
* const agent = LLMist.createAgent()
|
|
5576
|
+
* .withModel("gemini:gemini-2.5-flash")
|
|
5577
|
+
* .askWithContent([
|
|
5578
|
+
* text("Describe this image and transcribe the audio:"),
|
|
5579
|
+
* imageFromBuffer(imageData),
|
|
5580
|
+
* audioFromBuffer(audioData),
|
|
5581
|
+
* ]);
|
|
5582
|
+
*
|
|
5583
|
+
* for await (const event of agent.run()) {
|
|
5584
|
+
* // handle events
|
|
5585
|
+
* }
|
|
5586
|
+
* ```
|
|
5587
|
+
*/
|
|
5588
|
+
askWithContent(content) {
|
|
5589
|
+
const options = this.buildAgentOptions(content);
|
|
4946
5590
|
return new Agent(AGENT_INTERNAL_KEY, options);
|
|
4947
5591
|
}
|
|
4948
5592
|
/**
|
|
@@ -5418,6 +6062,7 @@ var AnthropicMessagesProvider;
|
|
|
5418
6062
|
var init_anthropic = __esm({
|
|
5419
6063
|
"src/providers/anthropic.ts"() {
|
|
5420
6064
|
"use strict";
|
|
6065
|
+
init_messages();
|
|
5421
6066
|
init_anthropic_models();
|
|
5422
6067
|
init_base_provider();
|
|
5423
6068
|
init_constants2();
|
|
@@ -5430,11 +6075,33 @@ var init_anthropic = __esm({
|
|
|
5430
6075
|
getModelSpecs() {
|
|
5431
6076
|
return ANTHROPIC_MODELS;
|
|
5432
6077
|
}
|
|
6078
|
+
// =========================================================================
|
|
6079
|
+
// Image Generation (Not Supported)
|
|
6080
|
+
// =========================================================================
|
|
6081
|
+
supportsImageGeneration(_modelId) {
|
|
6082
|
+
return false;
|
|
6083
|
+
}
|
|
6084
|
+
async generateImage() {
|
|
6085
|
+
throw new Error(
|
|
6086
|
+
"Anthropic does not support image generation. Use OpenAI (DALL-E, GPT Image) or Google Gemini (Imagen) instead."
|
|
6087
|
+
);
|
|
6088
|
+
}
|
|
6089
|
+
// =========================================================================
|
|
6090
|
+
// Speech Generation (Not Supported)
|
|
6091
|
+
// =========================================================================
|
|
6092
|
+
supportsSpeechGeneration(_modelId) {
|
|
6093
|
+
return false;
|
|
6094
|
+
}
|
|
6095
|
+
async generateSpeech() {
|
|
6096
|
+
throw new Error(
|
|
6097
|
+
"Anthropic does not support speech generation. Use OpenAI (TTS) or Google Gemini (TTS) instead."
|
|
6098
|
+
);
|
|
6099
|
+
}
|
|
5433
6100
|
buildRequestPayload(options, descriptor, spec, messages) {
|
|
5434
6101
|
const systemMessages = messages.filter((message) => message.role === "system");
|
|
5435
6102
|
const system = systemMessages.length > 0 ? systemMessages.map((m, index) => ({
|
|
5436
6103
|
type: "text",
|
|
5437
|
-
text: m.content,
|
|
6104
|
+
text: extractText(m.content),
|
|
5438
6105
|
// Add cache_control to the LAST system message block
|
|
5439
6106
|
...index === systemMessages.length - 1 ? { cache_control: { type: "ephemeral" } } : {}
|
|
5440
6107
|
})) : void 0;
|
|
@@ -5447,14 +6114,10 @@ var init_anthropic = __esm({
|
|
|
5447
6114
|
);
|
|
5448
6115
|
const conversation = nonSystemMessages.map((message, index) => ({
|
|
5449
6116
|
role: message.role,
|
|
5450
|
-
content:
|
|
5451
|
-
|
|
5452
|
-
|
|
5453
|
-
|
|
5454
|
-
// Add cache_control to the LAST user message
|
|
5455
|
-
...message.role === "user" && index === lastUserIndex ? { cache_control: { type: "ephemeral" } } : {}
|
|
5456
|
-
}
|
|
5457
|
-
]
|
|
6117
|
+
content: this.convertToAnthropicContent(
|
|
6118
|
+
message.content,
|
|
6119
|
+
message.role === "user" && index === lastUserIndex
|
|
6120
|
+
)
|
|
5458
6121
|
}));
|
|
5459
6122
|
const defaultMaxTokens = spec?.maxOutputTokens ?? ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS;
|
|
5460
6123
|
const payload = {
|
|
@@ -5470,6 +6133,52 @@ var init_anthropic = __esm({
|
|
|
5470
6133
|
};
|
|
5471
6134
|
return payload;
|
|
5472
6135
|
}
|
|
6136
|
+
/**
|
|
6137
|
+
* Convert llmist content to Anthropic's content block format.
|
|
6138
|
+
* Handles text, images (base64 only), and applies cache_control.
|
|
6139
|
+
*/
|
|
6140
|
+
convertToAnthropicContent(content, addCacheControl) {
|
|
6141
|
+
const parts = normalizeContent(content);
|
|
6142
|
+
return parts.map((part, index) => {
|
|
6143
|
+
const isLastPart = index === parts.length - 1;
|
|
6144
|
+
const cacheControl = addCacheControl && isLastPart ? { cache_control: { type: "ephemeral" } } : {};
|
|
6145
|
+
if (part.type === "text") {
|
|
6146
|
+
return {
|
|
6147
|
+
type: "text",
|
|
6148
|
+
text: part.text,
|
|
6149
|
+
...cacheControl
|
|
6150
|
+
};
|
|
6151
|
+
}
|
|
6152
|
+
if (part.type === "image") {
|
|
6153
|
+
return this.convertImagePart(part, cacheControl);
|
|
6154
|
+
}
|
|
6155
|
+
if (part.type === "audio") {
|
|
6156
|
+
throw new Error(
|
|
6157
|
+
"Anthropic does not support audio input. Use Google Gemini for audio processing."
|
|
6158
|
+
);
|
|
6159
|
+
}
|
|
6160
|
+
throw new Error(`Unsupported content type: ${part.type}`);
|
|
6161
|
+
});
|
|
6162
|
+
}
|
|
6163
|
+
/**
|
|
6164
|
+
* Convert an image content part to Anthropic's image block format.
|
|
6165
|
+
*/
|
|
6166
|
+
convertImagePart(part, cacheControl) {
|
|
6167
|
+
if (part.source.type === "url") {
|
|
6168
|
+
throw new Error(
|
|
6169
|
+
"Anthropic does not support image URLs. Please provide base64-encoded image data instead."
|
|
6170
|
+
);
|
|
6171
|
+
}
|
|
6172
|
+
return {
|
|
6173
|
+
type: "image",
|
|
6174
|
+
source: {
|
|
6175
|
+
type: "base64",
|
|
6176
|
+
media_type: part.source.mediaType,
|
|
6177
|
+
data: part.source.data
|
|
6178
|
+
},
|
|
6179
|
+
...cacheControl
|
|
6180
|
+
};
|
|
6181
|
+
}
|
|
5473
6182
|
async executeStreamRequest(payload, signal) {
|
|
5474
6183
|
const client = this.client;
|
|
5475
6184
|
const stream2 = await client.messages.create(payload, signal ? { signal } : void 0);
|
|
@@ -5552,17 +6261,12 @@ var init_anthropic = __esm({
|
|
|
5552
6261
|
async countTokens(messages, descriptor, _spec) {
|
|
5553
6262
|
const client = this.client;
|
|
5554
6263
|
const systemMessages = messages.filter((message) => message.role === "system");
|
|
5555
|
-
const system = systemMessages.length > 0 ? systemMessages.map((m) => m.content).join("\n\n") : void 0;
|
|
6264
|
+
const system = systemMessages.length > 0 ? systemMessages.map((m) => extractText(m.content)).join("\n\n") : void 0;
|
|
5556
6265
|
const conversation = messages.filter(
|
|
5557
6266
|
(message) => message.role !== "system"
|
|
5558
6267
|
).map((message) => ({
|
|
5559
6268
|
role: message.role,
|
|
5560
|
-
content:
|
|
5561
|
-
{
|
|
5562
|
-
type: "text",
|
|
5563
|
-
text: message.content
|
|
5564
|
-
}
|
|
5565
|
-
]
|
|
6269
|
+
content: this.convertToAnthropicContent(message.content, false)
|
|
5566
6270
|
}));
|
|
5567
6271
|
try {
|
|
5568
6272
|
const response = await client.messages.countTokens({
|
|
@@ -5576,14 +6280,201 @@ var init_anthropic = __esm({
|
|
|
5576
6280
|
`Token counting failed for ${descriptor.name}, using fallback estimation:`,
|
|
5577
6281
|
error
|
|
5578
6282
|
);
|
|
5579
|
-
|
|
5580
|
-
|
|
6283
|
+
let totalChars = 0;
|
|
6284
|
+
let imageCount = 0;
|
|
6285
|
+
for (const msg of messages) {
|
|
6286
|
+
const parts = normalizeContent(msg.content);
|
|
6287
|
+
for (const part of parts) {
|
|
6288
|
+
if (part.type === "text") {
|
|
6289
|
+
totalChars += part.text.length;
|
|
6290
|
+
} else if (part.type === "image") {
|
|
6291
|
+
imageCount++;
|
|
6292
|
+
}
|
|
6293
|
+
}
|
|
6294
|
+
}
|
|
6295
|
+
return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN) + imageCount * 1e3;
|
|
5581
6296
|
}
|
|
5582
6297
|
}
|
|
5583
6298
|
};
|
|
5584
6299
|
}
|
|
5585
6300
|
});
|
|
5586
6301
|
|
|
6302
|
+
// src/providers/gemini-image-models.ts
|
|
6303
|
+
function getGeminiImageModelSpec(modelId) {
|
|
6304
|
+
return geminiImageModels.find((m) => m.modelId === modelId);
|
|
6305
|
+
}
|
|
6306
|
+
function isGeminiImageModel(modelId) {
|
|
6307
|
+
return geminiImageModels.some((m) => m.modelId === modelId);
|
|
6308
|
+
}
|
|
6309
|
+
function calculateGeminiImageCost(modelId, size = "1:1", n = 1) {
|
|
6310
|
+
const spec = getGeminiImageModelSpec(modelId);
|
|
6311
|
+
if (!spec) return void 0;
|
|
6312
|
+
if (spec.pricing.perImage !== void 0) {
|
|
6313
|
+
return spec.pricing.perImage * n;
|
|
6314
|
+
}
|
|
6315
|
+
if (spec.pricing.bySize) {
|
|
6316
|
+
const sizePrice = spec.pricing.bySize[size];
|
|
6317
|
+
if (typeof sizePrice === "number") {
|
|
6318
|
+
return sizePrice * n;
|
|
6319
|
+
}
|
|
6320
|
+
}
|
|
6321
|
+
return void 0;
|
|
6322
|
+
}
|
|
6323
|
+
var IMAGEN4_ASPECT_RATIOS, GEMINI_IMAGE_ASPECT_RATIOS, geminiImageModels;
|
|
6324
|
+
var init_gemini_image_models = __esm({
|
|
6325
|
+
"src/providers/gemini-image-models.ts"() {
|
|
6326
|
+
"use strict";
|
|
6327
|
+
IMAGEN4_ASPECT_RATIOS = ["1:1", "3:4", "4:3", "9:16", "16:9"];
|
|
6328
|
+
GEMINI_IMAGE_ASPECT_RATIOS = ["1:1", "3:4", "4:3", "9:16", "16:9"];
|
|
6329
|
+
geminiImageModels = [
|
|
6330
|
+
// Imagen 4 Family (standalone image generation)
|
|
6331
|
+
{
|
|
6332
|
+
provider: "gemini",
|
|
6333
|
+
modelId: "imagen-4.0-fast-generate-001",
|
|
6334
|
+
displayName: "Imagen 4 Fast",
|
|
6335
|
+
pricing: {
|
|
6336
|
+
perImage: 0.02
|
|
6337
|
+
},
|
|
6338
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
6339
|
+
maxImages: 4,
|
|
6340
|
+
defaultSize: "1:1",
|
|
6341
|
+
features: {
|
|
6342
|
+
textRendering: true
|
|
6343
|
+
}
|
|
6344
|
+
},
|
|
6345
|
+
{
|
|
6346
|
+
provider: "gemini",
|
|
6347
|
+
modelId: "imagen-4.0-generate-001",
|
|
6348
|
+
displayName: "Imagen 4",
|
|
6349
|
+
pricing: {
|
|
6350
|
+
perImage: 0.04
|
|
6351
|
+
},
|
|
6352
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
6353
|
+
maxImages: 4,
|
|
6354
|
+
defaultSize: "1:1",
|
|
6355
|
+
features: {
|
|
6356
|
+
textRendering: true
|
|
6357
|
+
}
|
|
6358
|
+
},
|
|
6359
|
+
{
|
|
6360
|
+
provider: "gemini",
|
|
6361
|
+
modelId: "imagen-4.0-ultra-generate-001",
|
|
6362
|
+
displayName: "Imagen 4 Ultra",
|
|
6363
|
+
pricing: {
|
|
6364
|
+
perImage: 0.06
|
|
6365
|
+
},
|
|
6366
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
6367
|
+
maxImages: 4,
|
|
6368
|
+
defaultSize: "1:1",
|
|
6369
|
+
features: {
|
|
6370
|
+
textRendering: true
|
|
6371
|
+
}
|
|
6372
|
+
},
|
|
6373
|
+
// Preview versions
|
|
6374
|
+
{
|
|
6375
|
+
provider: "gemini",
|
|
6376
|
+
modelId: "imagen-4.0-generate-preview-06-06",
|
|
6377
|
+
displayName: "Imagen 4 (Preview)",
|
|
6378
|
+
pricing: {
|
|
6379
|
+
perImage: 0.04
|
|
6380
|
+
},
|
|
6381
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
6382
|
+
maxImages: 4,
|
|
6383
|
+
defaultSize: "1:1",
|
|
6384
|
+
features: {
|
|
6385
|
+
textRendering: true
|
|
6386
|
+
}
|
|
6387
|
+
},
|
|
6388
|
+
{
|
|
6389
|
+
provider: "gemini",
|
|
6390
|
+
modelId: "imagen-4.0-ultra-generate-preview-06-06",
|
|
6391
|
+
displayName: "Imagen 4 Ultra (Preview)",
|
|
6392
|
+
pricing: {
|
|
6393
|
+
perImage: 0.06
|
|
6394
|
+
},
|
|
6395
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
6396
|
+
maxImages: 4,
|
|
6397
|
+
defaultSize: "1:1",
|
|
6398
|
+
features: {
|
|
6399
|
+
textRendering: true
|
|
6400
|
+
}
|
|
6401
|
+
},
|
|
6402
|
+
// Gemini Native Image Generation (multimodal models)
|
|
6403
|
+
{
|
|
6404
|
+
provider: "gemini",
|
|
6405
|
+
modelId: "gemini-2.5-flash-image",
|
|
6406
|
+
displayName: "Gemini 2.5 Flash Image",
|
|
6407
|
+
pricing: {
|
|
6408
|
+
perImage: 0.039
|
|
6409
|
+
},
|
|
6410
|
+
supportedSizes: [...GEMINI_IMAGE_ASPECT_RATIOS],
|
|
6411
|
+
maxImages: 1,
|
|
6412
|
+
defaultSize: "1:1",
|
|
6413
|
+
features: {
|
|
6414
|
+
conversational: true,
|
|
6415
|
+
textRendering: true
|
|
6416
|
+
}
|
|
6417
|
+
},
|
|
6418
|
+
{
|
|
6419
|
+
provider: "gemini",
|
|
6420
|
+
modelId: "gemini-2.5-flash-image-preview",
|
|
6421
|
+
displayName: "Gemini 2.5 Flash Image (Preview)",
|
|
6422
|
+
pricing: {
|
|
6423
|
+
perImage: 0.039
|
|
6424
|
+
},
|
|
6425
|
+
supportedSizes: [...GEMINI_IMAGE_ASPECT_RATIOS],
|
|
6426
|
+
maxImages: 1,
|
|
6427
|
+
defaultSize: "1:1",
|
|
6428
|
+
features: {
|
|
6429
|
+
conversational: true,
|
|
6430
|
+
textRendering: true
|
|
6431
|
+
}
|
|
6432
|
+
},
|
|
6433
|
+
{
|
|
6434
|
+
provider: "gemini",
|
|
6435
|
+
modelId: "gemini-3-pro-image-preview",
|
|
6436
|
+
displayName: "Gemini 3 Pro Image (Preview)",
|
|
6437
|
+
pricing: {
|
|
6438
|
+
// Token-based: ~$0.134 per 1K/2K image, $0.24 per 4K
|
|
6439
|
+
// Using 2K as default
|
|
6440
|
+
bySize: {
|
|
6441
|
+
"1K": 0.134,
|
|
6442
|
+
"2K": 0.134,
|
|
6443
|
+
"4K": 0.24
|
|
6444
|
+
}
|
|
6445
|
+
},
|
|
6446
|
+
supportedSizes: ["1K", "2K", "4K"],
|
|
6447
|
+
maxImages: 1,
|
|
6448
|
+
defaultSize: "2K",
|
|
6449
|
+
features: {
|
|
6450
|
+
conversational: true,
|
|
6451
|
+
textRendering: true
|
|
6452
|
+
}
|
|
6453
|
+
},
|
|
6454
|
+
// Alias: nano-banana-pro-preview is gemini-3-pro-image-preview
|
|
6455
|
+
{
|
|
6456
|
+
provider: "gemini",
|
|
6457
|
+
modelId: "nano-banana-pro-preview",
|
|
6458
|
+
displayName: "Nano Banana Pro (Gemini 3 Pro Image)",
|
|
6459
|
+
pricing: {
|
|
6460
|
+
bySize: {
|
|
6461
|
+
"1K": 0.134,
|
|
6462
|
+
"2K": 0.134,
|
|
6463
|
+
"4K": 0.24
|
|
6464
|
+
}
|
|
6465
|
+
},
|
|
6466
|
+
supportedSizes: ["1K", "2K", "4K"],
|
|
6467
|
+
maxImages: 1,
|
|
6468
|
+
defaultSize: "2K",
|
|
6469
|
+
features: {
|
|
6470
|
+
conversational: true,
|
|
6471
|
+
textRendering: true
|
|
6472
|
+
}
|
|
6473
|
+
}
|
|
6474
|
+
];
|
|
6475
|
+
}
|
|
6476
|
+
});
|
|
6477
|
+
|
|
5587
6478
|
// src/providers/gemini-models.ts
|
|
5588
6479
|
var GEMINI_MODELS;
|
|
5589
6480
|
var init_gemini_models = __esm({
|
|
@@ -5741,16 +6632,155 @@ var init_gemini_models = __esm({
|
|
|
5741
6632
|
output: 0.3
|
|
5742
6633
|
// No context caching available for 2.0-flash-lite
|
|
5743
6634
|
},
|
|
5744
|
-
knowledgeCutoff: "2024-08",
|
|
6635
|
+
knowledgeCutoff: "2024-08",
|
|
6636
|
+
features: {
|
|
6637
|
+
streaming: true,
|
|
6638
|
+
functionCalling: true,
|
|
6639
|
+
vision: true,
|
|
6640
|
+
structuredOutputs: true
|
|
6641
|
+
},
|
|
6642
|
+
metadata: {
|
|
6643
|
+
family: "Gemini 2.0",
|
|
6644
|
+
notes: "Smallest and most cost effective 2.0 model for at scale usage."
|
|
6645
|
+
}
|
|
6646
|
+
}
|
|
6647
|
+
];
|
|
6648
|
+
}
|
|
6649
|
+
});
|
|
6650
|
+
|
|
6651
|
+
// src/providers/gemini-speech-models.ts
|
|
6652
|
+
function getGeminiSpeechModelSpec(modelId) {
|
|
6653
|
+
return geminiSpeechModels.find((m) => m.modelId === modelId);
|
|
6654
|
+
}
|
|
6655
|
+
function isGeminiSpeechModel(modelId) {
|
|
6656
|
+
return geminiSpeechModels.some((m) => m.modelId === modelId);
|
|
6657
|
+
}
|
|
6658
|
+
function calculateGeminiSpeechCost(modelId, characterCount, estimatedMinutes) {
|
|
6659
|
+
const spec = getGeminiSpeechModelSpec(modelId);
|
|
6660
|
+
if (!spec) return void 0;
|
|
6661
|
+
if (spec.pricing.perMinute !== void 0) {
|
|
6662
|
+
if (estimatedMinutes !== void 0) {
|
|
6663
|
+
return estimatedMinutes * spec.pricing.perMinute;
|
|
6664
|
+
}
|
|
6665
|
+
const approxMinutes = characterCount / 750;
|
|
6666
|
+
return approxMinutes * spec.pricing.perMinute;
|
|
6667
|
+
}
|
|
6668
|
+
return void 0;
|
|
6669
|
+
}
|
|
6670
|
+
var GEMINI_TTS_VOICES, GEMINI_TTS_FORMATS, geminiSpeechModels;
|
|
6671
|
+
var init_gemini_speech_models = __esm({
|
|
6672
|
+
"src/providers/gemini-speech-models.ts"() {
|
|
6673
|
+
"use strict";
|
|
6674
|
+
GEMINI_TTS_VOICES = [
|
|
6675
|
+
"Zephyr",
|
|
6676
|
+
// Bright
|
|
6677
|
+
"Puck",
|
|
6678
|
+
// Upbeat
|
|
6679
|
+
"Charon",
|
|
6680
|
+
// Informative
|
|
6681
|
+
"Kore",
|
|
6682
|
+
// Firm
|
|
6683
|
+
"Fenrir",
|
|
6684
|
+
// Excitable
|
|
6685
|
+
"Leda",
|
|
6686
|
+
// Youthful
|
|
6687
|
+
"Orus",
|
|
6688
|
+
// Firm
|
|
6689
|
+
"Aoede",
|
|
6690
|
+
// Breezy
|
|
6691
|
+
"Callirrhoe",
|
|
6692
|
+
// Easy-going
|
|
6693
|
+
"Autonoe",
|
|
6694
|
+
// Bright
|
|
6695
|
+
"Enceladus",
|
|
6696
|
+
// Breathy
|
|
6697
|
+
"Iapetus",
|
|
6698
|
+
// Clear
|
|
6699
|
+
"Umbriel",
|
|
6700
|
+
// Easy-going
|
|
6701
|
+
"Algieba",
|
|
6702
|
+
// Smooth
|
|
6703
|
+
"Despina",
|
|
6704
|
+
// Smooth
|
|
6705
|
+
"Erinome",
|
|
6706
|
+
// Clear
|
|
6707
|
+
"Algenib",
|
|
6708
|
+
// Gravelly
|
|
6709
|
+
"Rasalgethi",
|
|
6710
|
+
// Informative
|
|
6711
|
+
"Laomedeia",
|
|
6712
|
+
// Upbeat
|
|
6713
|
+
"Achernar",
|
|
6714
|
+
// Soft
|
|
6715
|
+
"Alnilam",
|
|
6716
|
+
// Firm
|
|
6717
|
+
"Schedar",
|
|
6718
|
+
// Even
|
|
6719
|
+
"Gacrux",
|
|
6720
|
+
// Mature
|
|
6721
|
+
"Pulcherrima",
|
|
6722
|
+
// Forward
|
|
6723
|
+
"Achird",
|
|
6724
|
+
// Friendly
|
|
6725
|
+
"Zubenelgenubi",
|
|
6726
|
+
// Casual
|
|
6727
|
+
"Vindemiatrix",
|
|
6728
|
+
// Gentle
|
|
6729
|
+
"Sadachbia",
|
|
6730
|
+
// Lively
|
|
6731
|
+
"Sadaltager",
|
|
6732
|
+
// Knowledgeable
|
|
6733
|
+
"Sulafat"
|
|
6734
|
+
// Warm
|
|
6735
|
+
];
|
|
6736
|
+
GEMINI_TTS_FORMATS = ["pcm", "wav"];
|
|
6737
|
+
geminiSpeechModels = [
|
|
6738
|
+
{
|
|
6739
|
+
provider: "gemini",
|
|
6740
|
+
modelId: "gemini-2.5-flash-preview-tts",
|
|
6741
|
+
displayName: "Gemini 2.5 Flash TTS (Preview)",
|
|
6742
|
+
pricing: {
|
|
6743
|
+
// $0.50 per 1M input tokens = $0.0000005 per token
|
|
6744
|
+
perInputToken: 5e-7,
|
|
6745
|
+
// $10.00 per 1M audio output tokens = $0.00001 per token
|
|
6746
|
+
perAudioOutputToken: 1e-5,
|
|
6747
|
+
// Rough estimate: ~$0.01 per minute of audio
|
|
6748
|
+
perMinute: 0.01
|
|
6749
|
+
},
|
|
6750
|
+
voices: [...GEMINI_TTS_VOICES],
|
|
6751
|
+
formats: GEMINI_TTS_FORMATS,
|
|
6752
|
+
maxInputLength: 8e3,
|
|
6753
|
+
// bytes (text + prompt combined)
|
|
6754
|
+
defaultVoice: "Zephyr",
|
|
6755
|
+
defaultFormat: "wav",
|
|
6756
|
+
features: {
|
|
6757
|
+
multiSpeaker: true,
|
|
6758
|
+
languages: 24,
|
|
6759
|
+
voiceInstructions: true
|
|
6760
|
+
}
|
|
6761
|
+
},
|
|
6762
|
+
{
|
|
6763
|
+
provider: "gemini",
|
|
6764
|
+
modelId: "gemini-2.5-pro-preview-tts",
|
|
6765
|
+
displayName: "Gemini 2.5 Pro TTS (Preview)",
|
|
6766
|
+
pricing: {
|
|
6767
|
+
// $1.00 per 1M input tokens = $0.000001 per token
|
|
6768
|
+
perInputToken: 1e-6,
|
|
6769
|
+
// $20.00 per 1M audio output tokens = $0.00002 per token
|
|
6770
|
+
perAudioOutputToken: 2e-5,
|
|
6771
|
+
// Rough estimate: ~$0.02 per minute of audio
|
|
6772
|
+
perMinute: 0.02
|
|
6773
|
+
},
|
|
6774
|
+
voices: [...GEMINI_TTS_VOICES],
|
|
6775
|
+
formats: GEMINI_TTS_FORMATS,
|
|
6776
|
+
maxInputLength: 8e3,
|
|
6777
|
+
// bytes
|
|
6778
|
+
defaultVoice: "Zephyr",
|
|
6779
|
+
defaultFormat: "wav",
|
|
5745
6780
|
features: {
|
|
5746
|
-
|
|
5747
|
-
|
|
5748
|
-
|
|
5749
|
-
structuredOutputs: true
|
|
5750
|
-
},
|
|
5751
|
-
metadata: {
|
|
5752
|
-
family: "Gemini 2.0",
|
|
5753
|
-
notes: "Smallest and most cost effective 2.0 model for at scale usage."
|
|
6781
|
+
multiSpeaker: true,
|
|
6782
|
+
languages: 24,
|
|
6783
|
+
voiceInstructions: true
|
|
5754
6784
|
}
|
|
5755
6785
|
}
|
|
5756
6786
|
];
|
|
@@ -5758,7 +6788,32 @@ var init_gemini_models = __esm({
|
|
|
5758
6788
|
});
|
|
5759
6789
|
|
|
5760
6790
|
// src/providers/gemini.ts
|
|
5761
|
-
import { FunctionCallingConfigMode, GoogleGenAI } from "@google/genai";
|
|
6791
|
+
import { FunctionCallingConfigMode, GoogleGenAI, Modality } from "@google/genai";
|
|
6792
|
+
function wrapPcmInWav(pcmData, sampleRate, bitsPerSample, numChannels) {
|
|
6793
|
+
const byteRate = sampleRate * numChannels * bitsPerSample / 8;
|
|
6794
|
+
const blockAlign = numChannels * bitsPerSample / 8;
|
|
6795
|
+
const dataSize = pcmData.length;
|
|
6796
|
+
const headerSize = 44;
|
|
6797
|
+
const fileSize = headerSize + dataSize - 8;
|
|
6798
|
+
const buffer = new ArrayBuffer(headerSize + dataSize);
|
|
6799
|
+
const view = new DataView(buffer);
|
|
6800
|
+
const uint8 = new Uint8Array(buffer);
|
|
6801
|
+
view.setUint32(0, 1380533830, false);
|
|
6802
|
+
view.setUint32(4, fileSize, true);
|
|
6803
|
+
view.setUint32(8, 1463899717, false);
|
|
6804
|
+
view.setUint32(12, 1718449184, false);
|
|
6805
|
+
view.setUint32(16, 16, true);
|
|
6806
|
+
view.setUint16(20, 1, true);
|
|
6807
|
+
view.setUint16(22, numChannels, true);
|
|
6808
|
+
view.setUint32(24, sampleRate, true);
|
|
6809
|
+
view.setUint32(28, byteRate, true);
|
|
6810
|
+
view.setUint16(32, blockAlign, true);
|
|
6811
|
+
view.setUint16(34, bitsPerSample, true);
|
|
6812
|
+
view.setUint32(36, 1684108385, false);
|
|
6813
|
+
view.setUint32(40, dataSize, true);
|
|
6814
|
+
uint8.set(pcmData, headerSize);
|
|
6815
|
+
return buffer;
|
|
6816
|
+
}
|
|
5762
6817
|
function createGeminiProviderFromEnv() {
|
|
5763
6818
|
return createProviderFromEnv("GEMINI_API_KEY", GoogleGenAI, GeminiGenerativeProvider);
|
|
5764
6819
|
}
|
|
@@ -5766,9 +6821,12 @@ var GEMINI_ROLE_MAP, GeminiGenerativeProvider;
|
|
|
5766
6821
|
var init_gemini = __esm({
|
|
5767
6822
|
"src/providers/gemini.ts"() {
|
|
5768
6823
|
"use strict";
|
|
6824
|
+
init_messages();
|
|
5769
6825
|
init_base_provider();
|
|
5770
6826
|
init_constants2();
|
|
6827
|
+
init_gemini_image_models();
|
|
5771
6828
|
init_gemini_models();
|
|
6829
|
+
init_gemini_speech_models();
|
|
5772
6830
|
init_utils();
|
|
5773
6831
|
GEMINI_ROLE_MAP = {
|
|
5774
6832
|
system: "user",
|
|
@@ -5783,6 +6841,139 @@ var init_gemini = __esm({
|
|
|
5783
6841
|
getModelSpecs() {
|
|
5784
6842
|
return GEMINI_MODELS;
|
|
5785
6843
|
}
|
|
6844
|
+
// =========================================================================
|
|
6845
|
+
// Image Generation
|
|
6846
|
+
// =========================================================================
|
|
6847
|
+
getImageModelSpecs() {
|
|
6848
|
+
return geminiImageModels;
|
|
6849
|
+
}
|
|
6850
|
+
supportsImageGeneration(modelId) {
|
|
6851
|
+
return isGeminiImageModel(modelId);
|
|
6852
|
+
}
|
|
6853
|
+
async generateImage(options) {
|
|
6854
|
+
const client = this.client;
|
|
6855
|
+
const spec = getGeminiImageModelSpec(options.model);
|
|
6856
|
+
const isImagenModel = options.model.startsWith("imagen");
|
|
6857
|
+
const aspectRatio = options.size ?? spec?.defaultSize ?? "1:1";
|
|
6858
|
+
const n = options.n ?? 1;
|
|
6859
|
+
if (isImagenModel) {
|
|
6860
|
+
const response2 = await client.models.generateImages({
|
|
6861
|
+
model: options.model,
|
|
6862
|
+
prompt: options.prompt,
|
|
6863
|
+
config: {
|
|
6864
|
+
numberOfImages: n,
|
|
6865
|
+
aspectRatio,
|
|
6866
|
+
outputMimeType: options.responseFormat === "b64_json" ? "image/png" : "image/jpeg"
|
|
6867
|
+
}
|
|
6868
|
+
});
|
|
6869
|
+
const images2 = response2.generatedImages ?? [];
|
|
6870
|
+
const cost2 = calculateGeminiImageCost(options.model, aspectRatio, images2.length);
|
|
6871
|
+
return {
|
|
6872
|
+
// Gemini's imageBytes is already base64 encoded, so use it directly
|
|
6873
|
+
images: images2.map((img) => ({
|
|
6874
|
+
b64Json: img.image?.imageBytes ?? void 0
|
|
6875
|
+
})),
|
|
6876
|
+
model: options.model,
|
|
6877
|
+
usage: {
|
|
6878
|
+
imagesGenerated: images2.length,
|
|
6879
|
+
size: aspectRatio,
|
|
6880
|
+
quality: "standard"
|
|
6881
|
+
},
|
|
6882
|
+
cost: cost2
|
|
6883
|
+
};
|
|
6884
|
+
}
|
|
6885
|
+
const response = await client.models.generateContent({
|
|
6886
|
+
model: options.model,
|
|
6887
|
+
contents: [{ role: "user", parts: [{ text: options.prompt }] }],
|
|
6888
|
+
config: {
|
|
6889
|
+
responseModalities: [Modality.IMAGE, Modality.TEXT]
|
|
6890
|
+
}
|
|
6891
|
+
});
|
|
6892
|
+
const images = [];
|
|
6893
|
+
const candidate = response.candidates?.[0];
|
|
6894
|
+
if (candidate?.content?.parts) {
|
|
6895
|
+
for (const part of candidate.content.parts) {
|
|
6896
|
+
if ("inlineData" in part && part.inlineData) {
|
|
6897
|
+
images.push({
|
|
6898
|
+
b64Json: part.inlineData.data
|
|
6899
|
+
});
|
|
6900
|
+
}
|
|
6901
|
+
}
|
|
6902
|
+
}
|
|
6903
|
+
const cost = calculateGeminiImageCost(options.model, aspectRatio, images.length);
|
|
6904
|
+
return {
|
|
6905
|
+
images,
|
|
6906
|
+
model: options.model,
|
|
6907
|
+
usage: {
|
|
6908
|
+
imagesGenerated: images.length,
|
|
6909
|
+
size: aspectRatio,
|
|
6910
|
+
quality: "standard"
|
|
6911
|
+
},
|
|
6912
|
+
cost
|
|
6913
|
+
};
|
|
6914
|
+
}
|
|
6915
|
+
// =========================================================================
|
|
6916
|
+
// Speech Generation
|
|
6917
|
+
// =========================================================================
|
|
6918
|
+
getSpeechModelSpecs() {
|
|
6919
|
+
return geminiSpeechModels;
|
|
6920
|
+
}
|
|
6921
|
+
supportsSpeechGeneration(modelId) {
|
|
6922
|
+
return isGeminiSpeechModel(modelId);
|
|
6923
|
+
}
|
|
6924
|
+
async generateSpeech(options) {
|
|
6925
|
+
const client = this.client;
|
|
6926
|
+
const spec = getGeminiSpeechModelSpec(options.model);
|
|
6927
|
+
const voice = options.voice ?? spec?.defaultVoice ?? "Zephyr";
|
|
6928
|
+
const response = await client.models.generateContent({
|
|
6929
|
+
model: options.model,
|
|
6930
|
+
contents: [
|
|
6931
|
+
{
|
|
6932
|
+
role: "user",
|
|
6933
|
+
parts: [{ text: options.input }]
|
|
6934
|
+
}
|
|
6935
|
+
],
|
|
6936
|
+
config: {
|
|
6937
|
+
responseModalities: [Modality.AUDIO],
|
|
6938
|
+
speechConfig: {
|
|
6939
|
+
voiceConfig: {
|
|
6940
|
+
prebuiltVoiceConfig: {
|
|
6941
|
+
voiceName: voice
|
|
6942
|
+
}
|
|
6943
|
+
}
|
|
6944
|
+
}
|
|
6945
|
+
}
|
|
6946
|
+
});
|
|
6947
|
+
let pcmData;
|
|
6948
|
+
const candidate = response.candidates?.[0];
|
|
6949
|
+
if (candidate?.content?.parts) {
|
|
6950
|
+
for (const part of candidate.content.parts) {
|
|
6951
|
+
if ("inlineData" in part && part.inlineData?.data) {
|
|
6952
|
+
const base64 = part.inlineData.data;
|
|
6953
|
+
const binary = atob(base64);
|
|
6954
|
+
pcmData = new Uint8Array(binary.length);
|
|
6955
|
+
for (let i = 0; i < binary.length; i++) {
|
|
6956
|
+
pcmData[i] = binary.charCodeAt(i);
|
|
6957
|
+
}
|
|
6958
|
+
break;
|
|
6959
|
+
}
|
|
6960
|
+
}
|
|
6961
|
+
}
|
|
6962
|
+
if (!pcmData) {
|
|
6963
|
+
throw new Error("No audio data in Gemini TTS response");
|
|
6964
|
+
}
|
|
6965
|
+
const audioData = wrapPcmInWav(pcmData, 24e3, 16, 1);
|
|
6966
|
+
const cost = calculateGeminiSpeechCost(options.model, options.input.length);
|
|
6967
|
+
return {
|
|
6968
|
+
audio: audioData,
|
|
6969
|
+
model: options.model,
|
|
6970
|
+
usage: {
|
|
6971
|
+
characterCount: options.input.length
|
|
6972
|
+
},
|
|
6973
|
+
cost,
|
|
6974
|
+
format: spec?.defaultFormat ?? "wav"
|
|
6975
|
+
};
|
|
6976
|
+
}
|
|
5786
6977
|
buildRequestPayload(options, descriptor, _spec, messages) {
|
|
5787
6978
|
const contents = this.convertMessagesToContents(messages);
|
|
5788
6979
|
const generationConfig = this.buildGenerationConfig(options);
|
|
@@ -5800,7 +6991,7 @@ var init_gemini = __esm({
|
|
|
5800
6991
|
};
|
|
5801
6992
|
return {
|
|
5802
6993
|
model: descriptor.name,
|
|
5803
|
-
contents
|
|
6994
|
+
contents,
|
|
5804
6995
|
config
|
|
5805
6996
|
};
|
|
5806
6997
|
}
|
|
@@ -5835,18 +7026,25 @@ var init_gemini = __esm({
|
|
|
5835
7026
|
if (message.role === "system") {
|
|
5836
7027
|
expandedMessages.push({
|
|
5837
7028
|
role: "user",
|
|
5838
|
-
content: message.content
|
|
7029
|
+
content: extractText(message.content)
|
|
5839
7030
|
});
|
|
5840
7031
|
expandedMessages.push({
|
|
5841
7032
|
role: "assistant",
|
|
5842
7033
|
content: "Understood."
|
|
5843
7034
|
});
|
|
5844
7035
|
} else {
|
|
5845
|
-
expandedMessages.push(
|
|
7036
|
+
expandedMessages.push({
|
|
7037
|
+
role: message.role,
|
|
7038
|
+
content: message.content
|
|
7039
|
+
});
|
|
5846
7040
|
}
|
|
5847
7041
|
}
|
|
5848
7042
|
return this.mergeConsecutiveMessages(expandedMessages);
|
|
5849
7043
|
}
|
|
7044
|
+
/**
|
|
7045
|
+
* Merge consecutive messages with the same role (required by Gemini).
|
|
7046
|
+
* Handles multimodal content by converting to Gemini's part format.
|
|
7047
|
+
*/
|
|
5850
7048
|
mergeConsecutiveMessages(messages) {
|
|
5851
7049
|
if (messages.length === 0) {
|
|
5852
7050
|
return [];
|
|
@@ -5855,15 +7053,16 @@ var init_gemini = __esm({
|
|
|
5855
7053
|
let currentGroup = null;
|
|
5856
7054
|
for (const message of messages) {
|
|
5857
7055
|
const geminiRole = GEMINI_ROLE_MAP[message.role];
|
|
7056
|
+
const geminiParts = this.convertToGeminiParts(message.content);
|
|
5858
7057
|
if (currentGroup && currentGroup.role === geminiRole) {
|
|
5859
|
-
currentGroup.parts.push(
|
|
7058
|
+
currentGroup.parts.push(...geminiParts);
|
|
5860
7059
|
} else {
|
|
5861
7060
|
if (currentGroup) {
|
|
5862
7061
|
result.push(currentGroup);
|
|
5863
7062
|
}
|
|
5864
7063
|
currentGroup = {
|
|
5865
7064
|
role: geminiRole,
|
|
5866
|
-
parts:
|
|
7065
|
+
parts: geminiParts
|
|
5867
7066
|
};
|
|
5868
7067
|
}
|
|
5869
7068
|
}
|
|
@@ -5872,11 +7071,39 @@ var init_gemini = __esm({
|
|
|
5872
7071
|
}
|
|
5873
7072
|
return result;
|
|
5874
7073
|
}
|
|
5875
|
-
|
|
5876
|
-
|
|
5877
|
-
|
|
5878
|
-
|
|
5879
|
-
|
|
7074
|
+
/**
|
|
7075
|
+
* Convert llmist content to Gemini's part format.
|
|
7076
|
+
* Handles text, images, and audio (Gemini supports all three).
|
|
7077
|
+
*/
|
|
7078
|
+
convertToGeminiParts(content) {
|
|
7079
|
+
const parts = normalizeContent(content);
|
|
7080
|
+
return parts.map((part) => {
|
|
7081
|
+
if (part.type === "text") {
|
|
7082
|
+
return { text: part.text };
|
|
7083
|
+
}
|
|
7084
|
+
if (part.type === "image") {
|
|
7085
|
+
if (part.source.type === "url") {
|
|
7086
|
+
throw new Error(
|
|
7087
|
+
"Gemini does not support image URLs directly. Please provide base64-encoded image data."
|
|
7088
|
+
);
|
|
7089
|
+
}
|
|
7090
|
+
return {
|
|
7091
|
+
inlineData: {
|
|
7092
|
+
mimeType: part.source.mediaType,
|
|
7093
|
+
data: part.source.data
|
|
7094
|
+
}
|
|
7095
|
+
};
|
|
7096
|
+
}
|
|
7097
|
+
if (part.type === "audio") {
|
|
7098
|
+
return {
|
|
7099
|
+
inlineData: {
|
|
7100
|
+
mimeType: part.source.mediaType,
|
|
7101
|
+
data: part.source.data
|
|
7102
|
+
}
|
|
7103
|
+
};
|
|
7104
|
+
}
|
|
7105
|
+
throw new Error(`Unsupported content type: ${part.type}`);
|
|
7106
|
+
});
|
|
5880
7107
|
}
|
|
5881
7108
|
buildGenerationConfig(options) {
|
|
5882
7109
|
const config = {};
|
|
@@ -5897,9 +7124,9 @@ var init_gemini = __esm({
|
|
|
5897
7124
|
async *wrapStream(iterable) {
|
|
5898
7125
|
const stream2 = iterable;
|
|
5899
7126
|
for await (const chunk of stream2) {
|
|
5900
|
-
const
|
|
5901
|
-
if (
|
|
5902
|
-
yield { text, rawEvent: chunk };
|
|
7127
|
+
const text3 = this.extractText(chunk);
|
|
7128
|
+
if (text3) {
|
|
7129
|
+
yield { text: text3, rawEvent: chunk };
|
|
5903
7130
|
}
|
|
5904
7131
|
const finishReason = this.extractFinishReason(chunk);
|
|
5905
7132
|
const usage = this.extractUsage(chunk);
|
|
@@ -5960,7 +7187,7 @@ var init_gemini = __esm({
|
|
|
5960
7187
|
try {
|
|
5961
7188
|
const response = await client.models.countTokens({
|
|
5962
7189
|
model: descriptor.name,
|
|
5963
|
-
contents
|
|
7190
|
+
contents
|
|
5964
7191
|
// Note: systemInstruction not used - it's not supported by countTokens()
|
|
5965
7192
|
// and would cause a 2100% token counting error
|
|
5966
7193
|
});
|
|
@@ -5970,14 +7197,140 @@ var init_gemini = __esm({
|
|
|
5970
7197
|
`Token counting failed for ${descriptor.name}, using fallback estimation:`,
|
|
5971
7198
|
error
|
|
5972
7199
|
);
|
|
5973
|
-
|
|
5974
|
-
|
|
7200
|
+
let totalChars = 0;
|
|
7201
|
+
let mediaCount = 0;
|
|
7202
|
+
for (const msg of messages) {
|
|
7203
|
+
const parts = normalizeContent(msg.content);
|
|
7204
|
+
for (const part of parts) {
|
|
7205
|
+
if (part.type === "text") {
|
|
7206
|
+
totalChars += part.text.length;
|
|
7207
|
+
} else if (part.type === "image" || part.type === "audio") {
|
|
7208
|
+
mediaCount++;
|
|
7209
|
+
}
|
|
7210
|
+
}
|
|
7211
|
+
}
|
|
7212
|
+
return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN) + mediaCount * 258;
|
|
5975
7213
|
}
|
|
5976
7214
|
}
|
|
5977
7215
|
};
|
|
5978
7216
|
}
|
|
5979
7217
|
});
|
|
5980
7218
|
|
|
7219
|
+
// src/providers/openai-image-models.ts
|
|
7220
|
+
function getOpenAIImageModelSpec(modelId) {
|
|
7221
|
+
return openaiImageModels.find((m) => m.modelId === modelId);
|
|
7222
|
+
}
|
|
7223
|
+
function isOpenAIImageModel(modelId) {
|
|
7224
|
+
return openaiImageModels.some((m) => m.modelId === modelId);
|
|
7225
|
+
}
|
|
7226
|
+
function calculateOpenAIImageCost(modelId, size, quality = "standard", n = 1) {
|
|
7227
|
+
const spec = getOpenAIImageModelSpec(modelId);
|
|
7228
|
+
if (!spec) return void 0;
|
|
7229
|
+
const sizePrice = spec.pricing.bySize?.[size];
|
|
7230
|
+
if (sizePrice === void 0) return void 0;
|
|
7231
|
+
let pricePerImage;
|
|
7232
|
+
if (typeof sizePrice === "number") {
|
|
7233
|
+
pricePerImage = sizePrice;
|
|
7234
|
+
} else {
|
|
7235
|
+
pricePerImage = sizePrice[quality];
|
|
7236
|
+
if (pricePerImage === void 0) return void 0;
|
|
7237
|
+
}
|
|
7238
|
+
return pricePerImage * n;
|
|
7239
|
+
}
|
|
7240
|
+
var GPT_IMAGE_SIZES, GPT_IMAGE_QUALITIES, DALLE3_SIZES, DALLE3_QUALITIES, DALLE2_SIZES, openaiImageModels;
|
|
7241
|
+
var init_openai_image_models = __esm({
|
|
7242
|
+
"src/providers/openai-image-models.ts"() {
|
|
7243
|
+
"use strict";
|
|
7244
|
+
GPT_IMAGE_SIZES = ["1024x1024", "1024x1536", "1536x1024"];
|
|
7245
|
+
GPT_IMAGE_QUALITIES = ["low", "medium", "high"];
|
|
7246
|
+
DALLE3_SIZES = ["1024x1024", "1024x1792", "1792x1024"];
|
|
7247
|
+
DALLE3_QUALITIES = ["standard", "hd"];
|
|
7248
|
+
DALLE2_SIZES = ["256x256", "512x512", "1024x1024"];
|
|
7249
|
+
openaiImageModels = [
|
|
7250
|
+
// GPT Image 1 Family (flagship)
|
|
7251
|
+
{
|
|
7252
|
+
provider: "openai",
|
|
7253
|
+
modelId: "gpt-image-1",
|
|
7254
|
+
displayName: "GPT Image 1",
|
|
7255
|
+
pricing: {
|
|
7256
|
+
bySize: {
|
|
7257
|
+
"1024x1024": { low: 0.011, medium: 0.04, high: 0.17 },
|
|
7258
|
+
"1024x1536": { low: 0.016, medium: 0.06, high: 0.25 },
|
|
7259
|
+
"1536x1024": { low: 0.016, medium: 0.06, high: 0.25 }
|
|
7260
|
+
}
|
|
7261
|
+
},
|
|
7262
|
+
supportedSizes: [...GPT_IMAGE_SIZES],
|
|
7263
|
+
supportedQualities: [...GPT_IMAGE_QUALITIES],
|
|
7264
|
+
maxImages: 1,
|
|
7265
|
+
defaultSize: "1024x1024",
|
|
7266
|
+
defaultQuality: "medium",
|
|
7267
|
+
features: {
|
|
7268
|
+
textRendering: true,
|
|
7269
|
+
transparency: true
|
|
7270
|
+
}
|
|
7271
|
+
},
|
|
7272
|
+
{
|
|
7273
|
+
provider: "openai",
|
|
7274
|
+
modelId: "gpt-image-1-mini",
|
|
7275
|
+
displayName: "GPT Image 1 Mini",
|
|
7276
|
+
pricing: {
|
|
7277
|
+
bySize: {
|
|
7278
|
+
"1024x1024": { low: 5e-3, medium: 0.02, high: 0.052 },
|
|
7279
|
+
"1024x1536": { low: 75e-4, medium: 0.03, high: 0.078 },
|
|
7280
|
+
"1536x1024": { low: 75e-4, medium: 0.03, high: 0.078 }
|
|
7281
|
+
}
|
|
7282
|
+
},
|
|
7283
|
+
supportedSizes: [...GPT_IMAGE_SIZES],
|
|
7284
|
+
supportedQualities: [...GPT_IMAGE_QUALITIES],
|
|
7285
|
+
maxImages: 1,
|
|
7286
|
+
defaultSize: "1024x1024",
|
|
7287
|
+
defaultQuality: "medium",
|
|
7288
|
+
features: {
|
|
7289
|
+
textRendering: true,
|
|
7290
|
+
transparency: true
|
|
7291
|
+
}
|
|
7292
|
+
},
|
|
7293
|
+
// DALL-E Family
|
|
7294
|
+
{
|
|
7295
|
+
provider: "openai",
|
|
7296
|
+
modelId: "dall-e-3",
|
|
7297
|
+
displayName: "DALL-E 3",
|
|
7298
|
+
pricing: {
|
|
7299
|
+
bySize: {
|
|
7300
|
+
"1024x1024": { standard: 0.04, hd: 0.08 },
|
|
7301
|
+
"1024x1792": { standard: 0.08, hd: 0.12 },
|
|
7302
|
+
"1792x1024": { standard: 0.08, hd: 0.12 }
|
|
7303
|
+
}
|
|
7304
|
+
},
|
|
7305
|
+
supportedSizes: [...DALLE3_SIZES],
|
|
7306
|
+
supportedQualities: [...DALLE3_QUALITIES],
|
|
7307
|
+
maxImages: 1,
|
|
7308
|
+
// DALL-E 3 only supports n=1
|
|
7309
|
+
defaultSize: "1024x1024",
|
|
7310
|
+
defaultQuality: "standard",
|
|
7311
|
+
features: {
|
|
7312
|
+
textRendering: true
|
|
7313
|
+
}
|
|
7314
|
+
},
|
|
7315
|
+
{
|
|
7316
|
+
provider: "openai",
|
|
7317
|
+
modelId: "dall-e-2",
|
|
7318
|
+
displayName: "DALL-E 2 (Legacy)",
|
|
7319
|
+
pricing: {
|
|
7320
|
+
bySize: {
|
|
7321
|
+
"256x256": 0.016,
|
|
7322
|
+
"512x512": 0.018,
|
|
7323
|
+
"1024x1024": 0.02
|
|
7324
|
+
}
|
|
7325
|
+
},
|
|
7326
|
+
supportedSizes: [...DALLE2_SIZES],
|
|
7327
|
+
maxImages: 10,
|
|
7328
|
+
defaultSize: "1024x1024"
|
|
7329
|
+
}
|
|
7330
|
+
];
|
|
7331
|
+
}
|
|
7332
|
+
});
|
|
7333
|
+
|
|
5981
7334
|
// src/providers/openai-models.ts
|
|
5982
7335
|
var OPENAI_MODELS;
|
|
5983
7336
|
var init_openai_models = __esm({
|
|
@@ -6342,6 +7695,144 @@ var init_openai_models = __esm({
|
|
|
6342
7695
|
}
|
|
6343
7696
|
});
|
|
6344
7697
|
|
|
7698
|
+
// src/providers/openai-speech-models.ts
|
|
7699
|
+
function getOpenAISpeechModelSpec(modelId) {
|
|
7700
|
+
return openaiSpeechModels.find((m) => m.modelId === modelId);
|
|
7701
|
+
}
|
|
7702
|
+
function isOpenAISpeechModel(modelId) {
|
|
7703
|
+
return openaiSpeechModels.some((m) => m.modelId === modelId);
|
|
7704
|
+
}
|
|
7705
|
+
function calculateOpenAISpeechCost(modelId, characterCount, estimatedMinutes) {
|
|
7706
|
+
const spec = getOpenAISpeechModelSpec(modelId);
|
|
7707
|
+
if (!spec) return void 0;
|
|
7708
|
+
if (spec.pricing.perCharacter !== void 0) {
|
|
7709
|
+
return characterCount * spec.pricing.perCharacter;
|
|
7710
|
+
}
|
|
7711
|
+
if (spec.pricing.perMinute !== void 0 && estimatedMinutes !== void 0) {
|
|
7712
|
+
return estimatedMinutes * spec.pricing.perMinute;
|
|
7713
|
+
}
|
|
7714
|
+
if (spec.pricing.perMinute !== void 0) {
|
|
7715
|
+
const approxMinutes = characterCount / 750;
|
|
7716
|
+
return approxMinutes * spec.pricing.perMinute;
|
|
7717
|
+
}
|
|
7718
|
+
return void 0;
|
|
7719
|
+
}
|
|
7720
|
+
var OPENAI_TTS_VOICES, OPENAI_TTS_EXTENDED_VOICES, OPENAI_TTS_FORMATS, openaiSpeechModels;
|
|
7721
|
+
var init_openai_speech_models = __esm({
|
|
7722
|
+
"src/providers/openai-speech-models.ts"() {
|
|
7723
|
+
"use strict";
|
|
7724
|
+
OPENAI_TTS_VOICES = [
|
|
7725
|
+
"alloy",
|
|
7726
|
+
"echo",
|
|
7727
|
+
"fable",
|
|
7728
|
+
"onyx",
|
|
7729
|
+
"nova",
|
|
7730
|
+
"shimmer"
|
|
7731
|
+
];
|
|
7732
|
+
OPENAI_TTS_EXTENDED_VOICES = [
|
|
7733
|
+
...OPENAI_TTS_VOICES,
|
|
7734
|
+
"ash",
|
|
7735
|
+
"ballad",
|
|
7736
|
+
"coral",
|
|
7737
|
+
"sage",
|
|
7738
|
+
"verse"
|
|
7739
|
+
];
|
|
7740
|
+
OPENAI_TTS_FORMATS = ["mp3", "opus", "aac", "flac", "wav", "pcm"];
|
|
7741
|
+
openaiSpeechModels = [
|
|
7742
|
+
// Standard TTS models (character-based pricing)
|
|
7743
|
+
{
|
|
7744
|
+
provider: "openai",
|
|
7745
|
+
modelId: "tts-1",
|
|
7746
|
+
displayName: "TTS-1",
|
|
7747
|
+
pricing: {
|
|
7748
|
+
// $15 per 1M characters = $0.000015 per character
|
|
7749
|
+
perCharacter: 15e-6
|
|
7750
|
+
},
|
|
7751
|
+
voices: [...OPENAI_TTS_VOICES],
|
|
7752
|
+
formats: OPENAI_TTS_FORMATS,
|
|
7753
|
+
maxInputLength: 4096,
|
|
7754
|
+
defaultVoice: "alloy",
|
|
7755
|
+
defaultFormat: "mp3",
|
|
7756
|
+
features: {
|
|
7757
|
+
voiceInstructions: false
|
|
7758
|
+
}
|
|
7759
|
+
},
|
|
7760
|
+
{
|
|
7761
|
+
provider: "openai",
|
|
7762
|
+
modelId: "tts-1-1106",
|
|
7763
|
+
displayName: "TTS-1 (Nov 2023)",
|
|
7764
|
+
pricing: {
|
|
7765
|
+
perCharacter: 15e-6
|
|
7766
|
+
},
|
|
7767
|
+
voices: [...OPENAI_TTS_VOICES],
|
|
7768
|
+
formats: OPENAI_TTS_FORMATS,
|
|
7769
|
+
maxInputLength: 4096,
|
|
7770
|
+
defaultVoice: "alloy",
|
|
7771
|
+
defaultFormat: "mp3",
|
|
7772
|
+
features: {
|
|
7773
|
+
voiceInstructions: false
|
|
7774
|
+
}
|
|
7775
|
+
},
|
|
7776
|
+
{
|
|
7777
|
+
provider: "openai",
|
|
7778
|
+
modelId: "tts-1-hd",
|
|
7779
|
+
displayName: "TTS-1 HD",
|
|
7780
|
+
pricing: {
|
|
7781
|
+
// $30 per 1M characters = $0.00003 per character
|
|
7782
|
+
perCharacter: 3e-5
|
|
7783
|
+
},
|
|
7784
|
+
voices: [...OPENAI_TTS_VOICES],
|
|
7785
|
+
formats: OPENAI_TTS_FORMATS,
|
|
7786
|
+
maxInputLength: 4096,
|
|
7787
|
+
defaultVoice: "alloy",
|
|
7788
|
+
defaultFormat: "mp3",
|
|
7789
|
+
features: {
|
|
7790
|
+
voiceInstructions: false
|
|
7791
|
+
}
|
|
7792
|
+
},
|
|
7793
|
+
{
|
|
7794
|
+
provider: "openai",
|
|
7795
|
+
modelId: "tts-1-hd-1106",
|
|
7796
|
+
displayName: "TTS-1 HD (Nov 2023)",
|
|
7797
|
+
pricing: {
|
|
7798
|
+
perCharacter: 3e-5
|
|
7799
|
+
},
|
|
7800
|
+
voices: [...OPENAI_TTS_VOICES],
|
|
7801
|
+
formats: OPENAI_TTS_FORMATS,
|
|
7802
|
+
maxInputLength: 4096,
|
|
7803
|
+
defaultVoice: "alloy",
|
|
7804
|
+
defaultFormat: "mp3",
|
|
7805
|
+
features: {
|
|
7806
|
+
voiceInstructions: false
|
|
7807
|
+
}
|
|
7808
|
+
},
|
|
7809
|
+
// Token-based TTS model with voice instructions support
|
|
7810
|
+
{
|
|
7811
|
+
provider: "openai",
|
|
7812
|
+
modelId: "gpt-4o-mini-tts",
|
|
7813
|
+
displayName: "GPT-4o Mini TTS",
|
|
7814
|
+
pricing: {
|
|
7815
|
+
// $0.60 per 1M input tokens = $0.0000006 per token
|
|
7816
|
+
perInputToken: 6e-7,
|
|
7817
|
+
// $12 per 1M audio output tokens = $0.000012 per token
|
|
7818
|
+
perAudioOutputToken: 12e-6,
|
|
7819
|
+
// ~$0.015 per minute of audio
|
|
7820
|
+
perMinute: 0.015
|
|
7821
|
+
},
|
|
7822
|
+
voices: [...OPENAI_TTS_EXTENDED_VOICES],
|
|
7823
|
+
formats: OPENAI_TTS_FORMATS,
|
|
7824
|
+
maxInputLength: 2e3,
|
|
7825
|
+
// tokens, not characters
|
|
7826
|
+
defaultVoice: "alloy",
|
|
7827
|
+
defaultFormat: "mp3",
|
|
7828
|
+
features: {
|
|
7829
|
+
voiceInstructions: true
|
|
7830
|
+
}
|
|
7831
|
+
}
|
|
7832
|
+
];
|
|
7833
|
+
}
|
|
7834
|
+
});
|
|
7835
|
+
|
|
6345
7836
|
// src/providers/openai.ts
|
|
6346
7837
|
import OpenAI from "openai";
|
|
6347
7838
|
import { encoding_for_model } from "tiktoken";
|
|
@@ -6361,9 +7852,12 @@ var ROLE_MAP, OpenAIChatProvider;
|
|
|
6361
7852
|
var init_openai = __esm({
|
|
6362
7853
|
"src/providers/openai.ts"() {
|
|
6363
7854
|
"use strict";
|
|
7855
|
+
init_messages();
|
|
6364
7856
|
init_base_provider();
|
|
6365
7857
|
init_constants2();
|
|
7858
|
+
init_openai_image_models();
|
|
6366
7859
|
init_openai_models();
|
|
7860
|
+
init_openai_speech_models();
|
|
6367
7861
|
init_utils();
|
|
6368
7862
|
ROLE_MAP = {
|
|
6369
7863
|
system: "system",
|
|
@@ -6378,6 +7872,87 @@ var init_openai = __esm({
|
|
|
6378
7872
|
getModelSpecs() {
|
|
6379
7873
|
return OPENAI_MODELS;
|
|
6380
7874
|
}
|
|
7875
|
+
// =========================================================================
|
|
7876
|
+
// Image Generation
|
|
7877
|
+
// =========================================================================
|
|
7878
|
+
getImageModelSpecs() {
|
|
7879
|
+
return openaiImageModels;
|
|
7880
|
+
}
|
|
7881
|
+
supportsImageGeneration(modelId) {
|
|
7882
|
+
return isOpenAIImageModel(modelId);
|
|
7883
|
+
}
|
|
7884
|
+
async generateImage(options) {
|
|
7885
|
+
const client = this.client;
|
|
7886
|
+
const spec = getOpenAIImageModelSpec(options.model);
|
|
7887
|
+
const size = options.size ?? spec?.defaultSize ?? "1024x1024";
|
|
7888
|
+
const quality = options.quality ?? spec?.defaultQuality ?? "standard";
|
|
7889
|
+
const n = options.n ?? 1;
|
|
7890
|
+
const isDallE2 = options.model === "dall-e-2";
|
|
7891
|
+
const isGptImage = options.model.startsWith("gpt-image");
|
|
7892
|
+
const requestParams = {
|
|
7893
|
+
model: options.model,
|
|
7894
|
+
prompt: options.prompt,
|
|
7895
|
+
size,
|
|
7896
|
+
n
|
|
7897
|
+
};
|
|
7898
|
+
if (!isDallE2 && !isGptImage) {
|
|
7899
|
+
requestParams.quality = quality;
|
|
7900
|
+
}
|
|
7901
|
+
if (isGptImage) {
|
|
7902
|
+
} else if (!isDallE2) {
|
|
7903
|
+
requestParams.response_format = options.responseFormat ?? "url";
|
|
7904
|
+
}
|
|
7905
|
+
const response = await client.images.generate(requestParams);
|
|
7906
|
+
const cost = calculateOpenAIImageCost(options.model, size, quality, n);
|
|
7907
|
+
const images = response.data ?? [];
|
|
7908
|
+
return {
|
|
7909
|
+
images: images.map((img) => ({
|
|
7910
|
+
url: img.url,
|
|
7911
|
+
b64Json: img.b64_json,
|
|
7912
|
+
revisedPrompt: img.revised_prompt
|
|
7913
|
+
})),
|
|
7914
|
+
model: options.model,
|
|
7915
|
+
usage: {
|
|
7916
|
+
imagesGenerated: images.length,
|
|
7917
|
+
size,
|
|
7918
|
+
quality
|
|
7919
|
+
},
|
|
7920
|
+
cost
|
|
7921
|
+
};
|
|
7922
|
+
}
|
|
7923
|
+
// =========================================================================
|
|
7924
|
+
// Speech Generation
|
|
7925
|
+
// =========================================================================
|
|
7926
|
+
getSpeechModelSpecs() {
|
|
7927
|
+
return openaiSpeechModels;
|
|
7928
|
+
}
|
|
7929
|
+
supportsSpeechGeneration(modelId) {
|
|
7930
|
+
return isOpenAISpeechModel(modelId);
|
|
7931
|
+
}
|
|
7932
|
+
async generateSpeech(options) {
|
|
7933
|
+
const client = this.client;
|
|
7934
|
+
const spec = getOpenAISpeechModelSpec(options.model);
|
|
7935
|
+
const format = options.responseFormat ?? spec?.defaultFormat ?? "mp3";
|
|
7936
|
+
const voice = options.voice ?? spec?.defaultVoice ?? "alloy";
|
|
7937
|
+
const response = await client.audio.speech.create({
|
|
7938
|
+
model: options.model,
|
|
7939
|
+
input: options.input,
|
|
7940
|
+
voice,
|
|
7941
|
+
response_format: format,
|
|
7942
|
+
speed: options.speed ?? 1
|
|
7943
|
+
});
|
|
7944
|
+
const audioBuffer = await response.arrayBuffer();
|
|
7945
|
+
const cost = calculateOpenAISpeechCost(options.model, options.input.length);
|
|
7946
|
+
return {
|
|
7947
|
+
audio: audioBuffer,
|
|
7948
|
+
model: options.model,
|
|
7949
|
+
usage: {
|
|
7950
|
+
characterCount: options.input.length
|
|
7951
|
+
},
|
|
7952
|
+
cost,
|
|
7953
|
+
format
|
|
7954
|
+
};
|
|
7955
|
+
}
|
|
6381
7956
|
buildRequestPayload(options, descriptor, spec, messages) {
|
|
6382
7957
|
const { maxTokens, temperature, topP, stopSequences, extra } = options;
|
|
6383
7958
|
const supportsTemperature = spec?.metadata?.supportsTemperature !== false;
|
|
@@ -6385,11 +7960,7 @@ var init_openai = __esm({
|
|
|
6385
7960
|
const sanitizedExtra = sanitizeExtra(extra, shouldIncludeTemperature);
|
|
6386
7961
|
return {
|
|
6387
7962
|
model: descriptor.name,
|
|
6388
|
-
messages: messages.map((message) => (
|
|
6389
|
-
role: ROLE_MAP[message.role],
|
|
6390
|
-
content: message.content,
|
|
6391
|
-
name: message.name
|
|
6392
|
-
})),
|
|
7963
|
+
messages: messages.map((message) => this.convertToOpenAIMessage(message)),
|
|
6393
7964
|
// Only set max_completion_tokens if explicitly provided
|
|
6394
7965
|
// Otherwise let the API use "as much as fits" in the context window
|
|
6395
7966
|
...maxTokens !== void 0 ? { max_completion_tokens: maxTokens } : {},
|
|
@@ -6401,6 +7972,77 @@ var init_openai = __esm({
|
|
|
6401
7972
|
...shouldIncludeTemperature ? { temperature } : {}
|
|
6402
7973
|
};
|
|
6403
7974
|
}
|
|
7975
|
+
/**
|
|
7976
|
+
* Convert an LLMMessage to OpenAI's ChatCompletionMessageParam.
|
|
7977
|
+
* Handles role-specific content type requirements:
|
|
7978
|
+
* - system/assistant: string content only
|
|
7979
|
+
* - user: string or multimodal array content
|
|
7980
|
+
*/
|
|
7981
|
+
convertToOpenAIMessage(message) {
|
|
7982
|
+
const role = ROLE_MAP[message.role];
|
|
7983
|
+
if (role === "user") {
|
|
7984
|
+
const content = this.convertToOpenAIContent(message.content);
|
|
7985
|
+
return {
|
|
7986
|
+
role: "user",
|
|
7987
|
+
content,
|
|
7988
|
+
...message.name ? { name: message.name } : {}
|
|
7989
|
+
};
|
|
7990
|
+
}
|
|
7991
|
+
const textContent = typeof message.content === "string" ? message.content : extractText(message.content);
|
|
7992
|
+
if (role === "system") {
|
|
7993
|
+
return {
|
|
7994
|
+
role: "system",
|
|
7995
|
+
content: textContent,
|
|
7996
|
+
...message.name ? { name: message.name } : {}
|
|
7997
|
+
};
|
|
7998
|
+
}
|
|
7999
|
+
return {
|
|
8000
|
+
role: "assistant",
|
|
8001
|
+
content: textContent,
|
|
8002
|
+
...message.name ? { name: message.name } : {}
|
|
8003
|
+
};
|
|
8004
|
+
}
|
|
8005
|
+
/**
|
|
8006
|
+
* Convert llmist content to OpenAI's content format.
|
|
8007
|
+
* Optimizes by returning string for text-only content, array for multimodal.
|
|
8008
|
+
*/
|
|
8009
|
+
convertToOpenAIContent(content) {
|
|
8010
|
+
if (typeof content === "string") {
|
|
8011
|
+
return content;
|
|
8012
|
+
}
|
|
8013
|
+
return content.map((part) => {
|
|
8014
|
+
if (part.type === "text") {
|
|
8015
|
+
return { type: "text", text: part.text };
|
|
8016
|
+
}
|
|
8017
|
+
if (part.type === "image") {
|
|
8018
|
+
return this.convertImagePart(part);
|
|
8019
|
+
}
|
|
8020
|
+
if (part.type === "audio") {
|
|
8021
|
+
throw new Error(
|
|
8022
|
+
"OpenAI chat completions do not support audio input. Use Whisper for transcription or Gemini for audio understanding."
|
|
8023
|
+
);
|
|
8024
|
+
}
|
|
8025
|
+
throw new Error(`Unsupported content type: ${part.type}`);
|
|
8026
|
+
});
|
|
8027
|
+
}
|
|
8028
|
+
/**
|
|
8029
|
+
* Convert an image content part to OpenAI's image_url format.
|
|
8030
|
+
* Supports both URLs and base64 data URLs.
|
|
8031
|
+
*/
|
|
8032
|
+
convertImagePart(part) {
|
|
8033
|
+
if (part.source.type === "url") {
|
|
8034
|
+
return {
|
|
8035
|
+
type: "image_url",
|
|
8036
|
+
image_url: { url: part.source.url }
|
|
8037
|
+
};
|
|
8038
|
+
}
|
|
8039
|
+
return {
|
|
8040
|
+
type: "image_url",
|
|
8041
|
+
image_url: {
|
|
8042
|
+
url: `data:${part.source.mediaType};base64,${part.source.data}`
|
|
8043
|
+
}
|
|
8044
|
+
};
|
|
8045
|
+
}
|
|
6404
8046
|
async executeStreamRequest(payload, signal) {
|
|
6405
8047
|
const client = this.client;
|
|
6406
8048
|
const stream2 = await client.chat.completions.create(payload, signal ? { signal } : void 0);
|
|
@@ -6409,9 +8051,9 @@ var init_openai = __esm({
|
|
|
6409
8051
|
async *wrapStream(iterable) {
|
|
6410
8052
|
const stream2 = iterable;
|
|
6411
8053
|
for await (const chunk of stream2) {
|
|
6412
|
-
const
|
|
6413
|
-
if (
|
|
6414
|
-
yield { text, rawEvent: chunk };
|
|
8054
|
+
const text3 = chunk.choices.map((choice) => choice.delta?.content ?? "").join("");
|
|
8055
|
+
if (text3) {
|
|
8056
|
+
yield { text: text3, rawEvent: chunk };
|
|
6415
8057
|
}
|
|
6416
8058
|
const finishReason = chunk.choices.find((choice) => choice.finish_reason)?.finish_reason;
|
|
6417
8059
|
const usage = chunk.usage ? {
|
|
@@ -6459,17 +8101,26 @@ var init_openai = __esm({
|
|
|
6459
8101
|
}
|
|
6460
8102
|
try {
|
|
6461
8103
|
let tokenCount = 0;
|
|
8104
|
+
let imageCount = 0;
|
|
6462
8105
|
for (const message of messages) {
|
|
6463
8106
|
tokenCount += OPENAI_MESSAGE_OVERHEAD_TOKENS;
|
|
6464
8107
|
const roleText = ROLE_MAP[message.role];
|
|
6465
8108
|
tokenCount += encoding.encode(roleText).length;
|
|
6466
|
-
|
|
8109
|
+
const textContent = extractText(message.content);
|
|
8110
|
+
tokenCount += encoding.encode(textContent).length;
|
|
8111
|
+
const parts = normalizeContent(message.content);
|
|
8112
|
+
for (const part of parts) {
|
|
8113
|
+
if (part.type === "image") {
|
|
8114
|
+
imageCount++;
|
|
8115
|
+
}
|
|
8116
|
+
}
|
|
6467
8117
|
if (message.name) {
|
|
6468
8118
|
tokenCount += encoding.encode(message.name).length;
|
|
6469
8119
|
tokenCount += OPENAI_NAME_FIELD_OVERHEAD_TOKENS;
|
|
6470
8120
|
}
|
|
6471
8121
|
}
|
|
6472
8122
|
tokenCount += OPENAI_REPLY_PRIMING_TOKENS;
|
|
8123
|
+
tokenCount += imageCount * 765;
|
|
6473
8124
|
return tokenCount;
|
|
6474
8125
|
} finally {
|
|
6475
8126
|
encoding.free();
|
|
@@ -6479,8 +8130,19 @@ var init_openai = __esm({
|
|
|
6479
8130
|
`Token counting failed for ${descriptor.name}, using fallback estimation:`,
|
|
6480
8131
|
error
|
|
6481
8132
|
);
|
|
6482
|
-
|
|
6483
|
-
|
|
8133
|
+
let totalChars = 0;
|
|
8134
|
+
let imageCount = 0;
|
|
8135
|
+
for (const msg of messages) {
|
|
8136
|
+
const parts = normalizeContent(msg.content);
|
|
8137
|
+
for (const part of parts) {
|
|
8138
|
+
if (part.type === "text") {
|
|
8139
|
+
totalChars += part.text.length;
|
|
8140
|
+
} else if (part.type === "image") {
|
|
8141
|
+
imageCount++;
|
|
8142
|
+
}
|
|
8143
|
+
}
|
|
8144
|
+
}
|
|
8145
|
+
return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN) + imageCount * 765;
|
|
6484
8146
|
}
|
|
6485
8147
|
}
|
|
6486
8148
|
};
|
|
@@ -6718,30 +8380,109 @@ var init_model_registry = __esm({
|
|
|
6718
8380
|
}
|
|
6719
8381
|
});
|
|
6720
8382
|
|
|
6721
|
-
// src/core/
|
|
6722
|
-
var
|
|
6723
|
-
var
|
|
6724
|
-
"src/core/
|
|
8383
|
+
// src/core/namespaces/image.ts
|
|
8384
|
+
var ImageNamespace;
|
|
8385
|
+
var init_image = __esm({
|
|
8386
|
+
"src/core/namespaces/image.ts"() {
|
|
6725
8387
|
"use strict";
|
|
6726
|
-
|
|
6727
|
-
constructor(defaultProvider
|
|
8388
|
+
ImageNamespace = class {
|
|
8389
|
+
constructor(adapters, defaultProvider) {
|
|
8390
|
+
this.adapters = adapters;
|
|
6728
8391
|
this.defaultProvider = defaultProvider;
|
|
6729
8392
|
}
|
|
6730
|
-
|
|
6731
|
-
|
|
6732
|
-
|
|
6733
|
-
|
|
8393
|
+
/**
|
|
8394
|
+
* Generate images from a text prompt.
|
|
8395
|
+
*
|
|
8396
|
+
* @param options - Image generation options
|
|
8397
|
+
* @returns Promise resolving to the generation result with images and cost
|
|
8398
|
+
* @throws Error if the provider doesn't support image generation
|
|
8399
|
+
*/
|
|
8400
|
+
async generate(options) {
|
|
8401
|
+
const modelId = options.model;
|
|
8402
|
+
const adapter = this.findImageAdapter(modelId);
|
|
8403
|
+
if (!adapter || !adapter.generateImage) {
|
|
8404
|
+
throw new Error(
|
|
8405
|
+
`No provider supports image generation for model "${modelId}". Available image models: ${this.listModels().map((m) => m.modelId).join(", ")}`
|
|
8406
|
+
);
|
|
6734
8407
|
}
|
|
6735
|
-
|
|
6736
|
-
|
|
6737
|
-
|
|
8408
|
+
return adapter.generateImage(options);
|
|
8409
|
+
}
|
|
8410
|
+
/**
|
|
8411
|
+
* List all available image generation models.
|
|
8412
|
+
*/
|
|
8413
|
+
listModels() {
|
|
8414
|
+
const models = [];
|
|
8415
|
+
for (const adapter of this.adapters) {
|
|
8416
|
+
if (adapter.getImageModelSpecs) {
|
|
8417
|
+
models.push(...adapter.getImageModelSpecs());
|
|
8418
|
+
}
|
|
8419
|
+
}
|
|
8420
|
+
return models;
|
|
8421
|
+
}
|
|
8422
|
+
/**
|
|
8423
|
+
* Check if a model is supported for image generation.
|
|
8424
|
+
*/
|
|
8425
|
+
supportsModel(modelId) {
|
|
8426
|
+
return this.findImageAdapter(modelId) !== void 0;
|
|
8427
|
+
}
|
|
8428
|
+
findImageAdapter(modelId) {
|
|
8429
|
+
return this.adapters.find(
|
|
8430
|
+
(adapter) => adapter.supportsImageGeneration?.(modelId) ?? false
|
|
8431
|
+
);
|
|
8432
|
+
}
|
|
8433
|
+
};
|
|
8434
|
+
}
|
|
8435
|
+
});
|
|
8436
|
+
|
|
8437
|
+
// src/core/namespaces/speech.ts
|
|
8438
|
+
var SpeechNamespace;
|
|
8439
|
+
var init_speech = __esm({
|
|
8440
|
+
"src/core/namespaces/speech.ts"() {
|
|
8441
|
+
"use strict";
|
|
8442
|
+
SpeechNamespace = class {
|
|
8443
|
+
constructor(adapters, defaultProvider) {
|
|
8444
|
+
this.adapters = adapters;
|
|
8445
|
+
this.defaultProvider = defaultProvider;
|
|
8446
|
+
}
|
|
8447
|
+
/**
|
|
8448
|
+
* Generate speech audio from text.
|
|
8449
|
+
*
|
|
8450
|
+
* @param options - Speech generation options
|
|
8451
|
+
* @returns Promise resolving to the generation result with audio and cost
|
|
8452
|
+
* @throws Error if the provider doesn't support speech generation
|
|
8453
|
+
*/
|
|
8454
|
+
async generate(options) {
|
|
8455
|
+
const modelId = options.model;
|
|
8456
|
+
const adapter = this.findSpeechAdapter(modelId);
|
|
8457
|
+
if (!adapter || !adapter.generateSpeech) {
|
|
8458
|
+
throw new Error(
|
|
8459
|
+
`No provider supports speech generation for model "${modelId}". Available speech models: ${this.listModels().map((m) => m.modelId).join(", ")}`
|
|
8460
|
+
);
|
|
6738
8461
|
}
|
|
6739
|
-
|
|
6740
|
-
|
|
6741
|
-
|
|
6742
|
-
|
|
8462
|
+
return adapter.generateSpeech(options);
|
|
8463
|
+
}
|
|
8464
|
+
/**
|
|
8465
|
+
* List all available speech generation models.
|
|
8466
|
+
*/
|
|
8467
|
+
listModels() {
|
|
8468
|
+
const models = [];
|
|
8469
|
+
for (const adapter of this.adapters) {
|
|
8470
|
+
if (adapter.getSpeechModelSpecs) {
|
|
8471
|
+
models.push(...adapter.getSpeechModelSpecs());
|
|
8472
|
+
}
|
|
6743
8473
|
}
|
|
6744
|
-
return
|
|
8474
|
+
return models;
|
|
8475
|
+
}
|
|
8476
|
+
/**
|
|
8477
|
+
* Check if a model is supported for speech generation.
|
|
8478
|
+
*/
|
|
8479
|
+
supportsModel(modelId) {
|
|
8480
|
+
return this.findSpeechAdapter(modelId) !== void 0;
|
|
8481
|
+
}
|
|
8482
|
+
findSpeechAdapter(modelId) {
|
|
8483
|
+
return this.adapters.find(
|
|
8484
|
+
(adapter) => adapter.supportsSpeechGeneration?.(modelId) ?? false
|
|
8485
|
+
);
|
|
6745
8486
|
}
|
|
6746
8487
|
};
|
|
6747
8488
|
}
|
|
@@ -6790,6 +8531,201 @@ var init_quick_methods = __esm({
|
|
|
6790
8531
|
}
|
|
6791
8532
|
});
|
|
6792
8533
|
|
|
8534
|
+
// src/core/namespaces/text.ts
|
|
8535
|
+
var TextNamespace;
|
|
8536
|
+
var init_text = __esm({
|
|
8537
|
+
"src/core/namespaces/text.ts"() {
|
|
8538
|
+
"use strict";
|
|
8539
|
+
init_quick_methods();
|
|
8540
|
+
TextNamespace = class {
|
|
8541
|
+
constructor(client) {
|
|
8542
|
+
this.client = client;
|
|
8543
|
+
}
|
|
8544
|
+
/**
|
|
8545
|
+
* Generate a complete text response.
|
|
8546
|
+
*
|
|
8547
|
+
* @param prompt - User prompt
|
|
8548
|
+
* @param options - Optional configuration
|
|
8549
|
+
* @returns Complete text response
|
|
8550
|
+
*/
|
|
8551
|
+
async complete(prompt, options) {
|
|
8552
|
+
return complete(this.client, prompt, options);
|
|
8553
|
+
}
|
|
8554
|
+
/**
|
|
8555
|
+
* Stream text chunks.
|
|
8556
|
+
*
|
|
8557
|
+
* @param prompt - User prompt
|
|
8558
|
+
* @param options - Optional configuration
|
|
8559
|
+
* @returns Async generator yielding text chunks
|
|
8560
|
+
*/
|
|
8561
|
+
stream(prompt, options) {
|
|
8562
|
+
return stream(this.client, prompt, options);
|
|
8563
|
+
}
|
|
8564
|
+
};
|
|
8565
|
+
}
|
|
8566
|
+
});
|
|
8567
|
+
|
|
8568
|
+
// src/core/namespaces/vision.ts
|
|
8569
|
+
var VisionNamespace;
|
|
8570
|
+
var init_vision = __esm({
|
|
8571
|
+
"src/core/namespaces/vision.ts"() {
|
|
8572
|
+
"use strict";
|
|
8573
|
+
init_input_content();
|
|
8574
|
+
init_messages();
|
|
8575
|
+
VisionNamespace = class {
|
|
8576
|
+
constructor(client) {
|
|
8577
|
+
this.client = client;
|
|
8578
|
+
}
|
|
8579
|
+
/**
|
|
8580
|
+
* Build a message builder with the image content attached.
|
|
8581
|
+
* Handles URLs, data URLs, base64 strings, and binary buffers.
|
|
8582
|
+
*/
|
|
8583
|
+
buildImageMessage(options) {
|
|
8584
|
+
const builder = new LLMMessageBuilder();
|
|
8585
|
+
if (options.systemPrompt) {
|
|
8586
|
+
builder.addSystem(options.systemPrompt);
|
|
8587
|
+
}
|
|
8588
|
+
if (typeof options.image === "string") {
|
|
8589
|
+
if (options.image.startsWith("http://") || options.image.startsWith("https://")) {
|
|
8590
|
+
builder.addUserWithImageUrl(options.prompt, options.image);
|
|
8591
|
+
} else if (isDataUrl(options.image)) {
|
|
8592
|
+
const parsed = parseDataUrl(options.image);
|
|
8593
|
+
if (!parsed) {
|
|
8594
|
+
throw new Error("Invalid data URL format");
|
|
8595
|
+
}
|
|
8596
|
+
builder.addUserWithImage(
|
|
8597
|
+
options.prompt,
|
|
8598
|
+
parsed.data,
|
|
8599
|
+
parsed.mimeType
|
|
8600
|
+
);
|
|
8601
|
+
} else {
|
|
8602
|
+
const buffer = Buffer.from(options.image, "base64");
|
|
8603
|
+
builder.addUserWithImage(options.prompt, buffer, options.mimeType);
|
|
8604
|
+
}
|
|
8605
|
+
} else {
|
|
8606
|
+
builder.addUserWithImage(options.prompt, options.image, options.mimeType);
|
|
8607
|
+
}
|
|
8608
|
+
return builder;
|
|
8609
|
+
}
|
|
8610
|
+
/**
|
|
8611
|
+
* Stream the response and collect text and usage information.
|
|
8612
|
+
*/
|
|
8613
|
+
async streamAndCollect(options, builder) {
|
|
8614
|
+
let response = "";
|
|
8615
|
+
let finalUsage;
|
|
8616
|
+
for await (const chunk of this.client.stream({
|
|
8617
|
+
model: options.model,
|
|
8618
|
+
messages: builder.build(),
|
|
8619
|
+
maxTokens: options.maxTokens,
|
|
8620
|
+
temperature: options.temperature
|
|
8621
|
+
})) {
|
|
8622
|
+
response += chunk.text;
|
|
8623
|
+
if (chunk.usage) {
|
|
8624
|
+
finalUsage = {
|
|
8625
|
+
inputTokens: chunk.usage.inputTokens,
|
|
8626
|
+
outputTokens: chunk.usage.outputTokens,
|
|
8627
|
+
totalTokens: chunk.usage.totalTokens
|
|
8628
|
+
};
|
|
8629
|
+
}
|
|
8630
|
+
}
|
|
8631
|
+
return { text: response.trim(), usage: finalUsage };
|
|
8632
|
+
}
|
|
8633
|
+
/**
|
|
8634
|
+
* Analyze an image with a vision-capable model.
|
|
8635
|
+
* Returns the analysis as a string.
|
|
8636
|
+
*
|
|
8637
|
+
* @param options - Vision analysis options
|
|
8638
|
+
* @returns Promise resolving to the analysis text
|
|
8639
|
+
* @throws Error if the image format is unsupported or model doesn't support vision
|
|
8640
|
+
*
|
|
8641
|
+
* @example
|
|
8642
|
+
* ```typescript
|
|
8643
|
+
* // From file
|
|
8644
|
+
* const result = await llmist.vision.analyze({
|
|
8645
|
+
* model: "gpt-4o",
|
|
8646
|
+
* image: await fs.readFile("photo.jpg"),
|
|
8647
|
+
* prompt: "What's in this image?",
|
|
8648
|
+
* });
|
|
8649
|
+
*
|
|
8650
|
+
* // From URL (OpenAI only)
|
|
8651
|
+
* const result = await llmist.vision.analyze({
|
|
8652
|
+
* model: "gpt-4o",
|
|
8653
|
+
* image: "https://example.com/image.jpg",
|
|
8654
|
+
* prompt: "Describe this image",
|
|
8655
|
+
* });
|
|
8656
|
+
* ```
|
|
8657
|
+
*/
|
|
8658
|
+
async analyze(options) {
|
|
8659
|
+
const builder = this.buildImageMessage(options);
|
|
8660
|
+
const { text: text3 } = await this.streamAndCollect(options, builder);
|
|
8661
|
+
return text3;
|
|
8662
|
+
}
|
|
8663
|
+
/**
|
|
8664
|
+
* Analyze an image and return detailed result with usage info.
|
|
8665
|
+
*
|
|
8666
|
+
* @param options - Vision analysis options
|
|
8667
|
+
* @returns Promise resolving to the analysis result with usage info
|
|
8668
|
+
*/
|
|
8669
|
+
async analyzeWithUsage(options) {
|
|
8670
|
+
const builder = this.buildImageMessage(options);
|
|
8671
|
+
const { text: text3, usage } = await this.streamAndCollect(options, builder);
|
|
8672
|
+
return {
|
|
8673
|
+
text: text3,
|
|
8674
|
+
model: options.model,
|
|
8675
|
+
usage
|
|
8676
|
+
};
|
|
8677
|
+
}
|
|
8678
|
+
/**
|
|
8679
|
+
* Check if a model supports vision/image input.
|
|
8680
|
+
*
|
|
8681
|
+
* @param modelId - Model ID to check
|
|
8682
|
+
* @returns True if the model supports vision
|
|
8683
|
+
*/
|
|
8684
|
+
supportsModel(modelId) {
|
|
8685
|
+
const spec = this.client.modelRegistry.getModelSpec(modelId);
|
|
8686
|
+
return spec?.features?.vision === true;
|
|
8687
|
+
}
|
|
8688
|
+
/**
|
|
8689
|
+
* List all models that support vision.
|
|
8690
|
+
*
|
|
8691
|
+
* @returns Array of model IDs that support vision
|
|
8692
|
+
*/
|
|
8693
|
+
listModels() {
|
|
8694
|
+
return this.client.modelRegistry.listModels().filter((spec) => spec.features?.vision === true).map((spec) => spec.modelId);
|
|
8695
|
+
}
|
|
8696
|
+
};
|
|
8697
|
+
}
|
|
8698
|
+
});
|
|
8699
|
+
|
|
8700
|
+
// src/core/options.ts
|
|
8701
|
+
var ModelIdentifierParser;
|
|
8702
|
+
var init_options = __esm({
|
|
8703
|
+
"src/core/options.ts"() {
|
|
8704
|
+
"use strict";
|
|
8705
|
+
ModelIdentifierParser = class {
|
|
8706
|
+
constructor(defaultProvider = "openai") {
|
|
8707
|
+
this.defaultProvider = defaultProvider;
|
|
8708
|
+
}
|
|
8709
|
+
parse(identifier) {
|
|
8710
|
+
const trimmed = identifier.trim();
|
|
8711
|
+
if (!trimmed) {
|
|
8712
|
+
throw new Error("Model identifier cannot be empty");
|
|
8713
|
+
}
|
|
8714
|
+
const [maybeProvider, ...rest] = trimmed.split(":");
|
|
8715
|
+
if (rest.length === 0) {
|
|
8716
|
+
return { provider: this.defaultProvider, name: maybeProvider };
|
|
8717
|
+
}
|
|
8718
|
+
const provider = maybeProvider;
|
|
8719
|
+
const name = rest.join(":");
|
|
8720
|
+
if (!name) {
|
|
8721
|
+
throw new Error("Model name cannot be empty");
|
|
8722
|
+
}
|
|
8723
|
+
return { provider, name };
|
|
8724
|
+
}
|
|
8725
|
+
};
|
|
8726
|
+
}
|
|
8727
|
+
});
|
|
8728
|
+
|
|
6793
8729
|
// src/core/client.ts
|
|
6794
8730
|
var client_exports = {};
|
|
6795
8731
|
__export(client_exports, {
|
|
@@ -6802,12 +8738,22 @@ var init_client = __esm({
|
|
|
6802
8738
|
init_builder();
|
|
6803
8739
|
init_discovery();
|
|
6804
8740
|
init_model_registry();
|
|
8741
|
+
init_image();
|
|
8742
|
+
init_speech();
|
|
8743
|
+
init_text();
|
|
8744
|
+
init_vision();
|
|
6805
8745
|
init_options();
|
|
6806
8746
|
init_quick_methods();
|
|
6807
8747
|
LLMist = class _LLMist {
|
|
6808
8748
|
parser;
|
|
8749
|
+
defaultProvider;
|
|
6809
8750
|
modelRegistry;
|
|
6810
8751
|
adapters;
|
|
8752
|
+
// Namespaces for different generation types
|
|
8753
|
+
text;
|
|
8754
|
+
image;
|
|
8755
|
+
speech;
|
|
8756
|
+
vision;
|
|
6811
8757
|
constructor(...args) {
|
|
6812
8758
|
let adapters = [];
|
|
6813
8759
|
let defaultProvider;
|
|
@@ -6846,6 +8792,7 @@ var init_client = __esm({
|
|
|
6846
8792
|
const priorityB = b.priority ?? 0;
|
|
6847
8793
|
return priorityB - priorityA;
|
|
6848
8794
|
});
|
|
8795
|
+
this.defaultProvider = resolvedDefaultProvider;
|
|
6849
8796
|
this.parser = new ModelIdentifierParser(resolvedDefaultProvider);
|
|
6850
8797
|
this.modelRegistry = new ModelRegistry();
|
|
6851
8798
|
for (const adapter of this.adapters) {
|
|
@@ -6854,6 +8801,10 @@ var init_client = __esm({
|
|
|
6854
8801
|
if (customModels.length > 0) {
|
|
6855
8802
|
this.modelRegistry.registerModels(customModels);
|
|
6856
8803
|
}
|
|
8804
|
+
this.text = new TextNamespace(this);
|
|
8805
|
+
this.image = new ImageNamespace(this.adapters, this.defaultProvider);
|
|
8806
|
+
this.speech = new SpeechNamespace(this.adapters, this.defaultProvider);
|
|
8807
|
+
this.vision = new VisionNamespace(this);
|
|
6857
8808
|
}
|
|
6858
8809
|
stream(options) {
|
|
6859
8810
|
const descriptor = this.parser.parse(options.model);
|
|
@@ -7275,9 +9226,9 @@ function sleep(ms) {
|
|
|
7275
9226
|
function generateInvocationId() {
|
|
7276
9227
|
return `inv-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`;
|
|
7277
9228
|
}
|
|
7278
|
-
function splitIntoChunks(
|
|
9229
|
+
function splitIntoChunks(text3, minChunkSize = 5, maxChunkSize = 30) {
|
|
7279
9230
|
const chunks = [];
|
|
7280
|
-
let remaining =
|
|
9231
|
+
let remaining = text3;
|
|
7281
9232
|
while (remaining.length > 0) {
|
|
7282
9233
|
const chunkSize = Math.min(
|
|
7283
9234
|
Math.floor(Math.random() * (maxChunkSize - minChunkSize + 1)) + minChunkSize,
|
|
@@ -7336,17 +9287,17 @@ ${String(value)}
|
|
|
7336
9287
|
return result;
|
|
7337
9288
|
}
|
|
7338
9289
|
function formatGadgetCalls(gadgetCalls) {
|
|
7339
|
-
let
|
|
9290
|
+
let text3 = "";
|
|
7340
9291
|
const calls = [];
|
|
7341
9292
|
for (const call of gadgetCalls) {
|
|
7342
9293
|
const invocationId = call.invocationId ?? generateInvocationId();
|
|
7343
9294
|
calls.push({ name: call.gadgetName, invocationId });
|
|
7344
9295
|
const blockParams = serializeToBlockFormat(call.parameters);
|
|
7345
|
-
|
|
9296
|
+
text3 += `
|
|
7346
9297
|
${GADGET_START_PREFIX}${call.gadgetName}
|
|
7347
9298
|
${blockParams}${GADGET_END_PREFIX}`;
|
|
7348
9299
|
}
|
|
7349
|
-
return { text, calls };
|
|
9300
|
+
return { text: text3, calls };
|
|
7350
9301
|
}
|
|
7351
9302
|
async function* createMockStream(response) {
|
|
7352
9303
|
if (response.delayMs) {
|
|
@@ -7386,9 +9337,9 @@ async function* createMockStream(response) {
|
|
|
7386
9337
|
};
|
|
7387
9338
|
}
|
|
7388
9339
|
}
|
|
7389
|
-
function createTextMockStream(
|
|
9340
|
+
function createTextMockStream(text3, options) {
|
|
7390
9341
|
return createMockStream({
|
|
7391
|
-
text,
|
|
9342
|
+
text: text3,
|
|
7392
9343
|
delayMs: options?.delayMs,
|
|
7393
9344
|
streamDelayMs: options?.streamDelayMs,
|
|
7394
9345
|
usage: options?.usage,
|
|
@@ -7405,10 +9356,10 @@ var MockProviderAdapter = class {
|
|
|
7405
9356
|
constructor(options) {
|
|
7406
9357
|
this.mockManager = getMockManager(options);
|
|
7407
9358
|
}
|
|
7408
|
-
supports(
|
|
9359
|
+
supports(_descriptor) {
|
|
7409
9360
|
return true;
|
|
7410
9361
|
}
|
|
7411
|
-
stream(options, descriptor,
|
|
9362
|
+
stream(options, descriptor, _spec) {
|
|
7412
9363
|
const context = {
|
|
7413
9364
|
model: options.model,
|
|
7414
9365
|
provider: descriptor.provider,
|
|
@@ -7419,20 +9370,154 @@ var MockProviderAdapter = class {
|
|
|
7419
9370
|
return this.createMockStreamFromContext(context);
|
|
7420
9371
|
}
|
|
7421
9372
|
async *createMockStreamFromContext(context) {
|
|
7422
|
-
|
|
7423
|
-
|
|
7424
|
-
|
|
7425
|
-
|
|
7426
|
-
|
|
7427
|
-
|
|
7428
|
-
|
|
7429
|
-
|
|
7430
|
-
|
|
7431
|
-
|
|
7432
|
-
|
|
7433
|
-
|
|
7434
|
-
|
|
9373
|
+
const mockResponse = await this.mockManager.findMatch(context);
|
|
9374
|
+
if (!mockResponse) {
|
|
9375
|
+
yield {
|
|
9376
|
+
text: "",
|
|
9377
|
+
finishReason: "stop",
|
|
9378
|
+
usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 }
|
|
9379
|
+
};
|
|
9380
|
+
return;
|
|
9381
|
+
}
|
|
9382
|
+
yield* createMockStream(mockResponse);
|
|
9383
|
+
}
|
|
9384
|
+
// ==========================================================================
|
|
9385
|
+
// Image Generation Support
|
|
9386
|
+
// ==========================================================================
|
|
9387
|
+
/**
|
|
9388
|
+
* Check if this adapter supports image generation for a given model.
|
|
9389
|
+
* Returns true if there's a registered mock with images for this model.
|
|
9390
|
+
*/
|
|
9391
|
+
supportsImageGeneration(_modelId) {
|
|
9392
|
+
return true;
|
|
9393
|
+
}
|
|
9394
|
+
/**
|
|
9395
|
+
* Generate mock images based on registered mocks.
|
|
9396
|
+
*
|
|
9397
|
+
* @param options - Image generation options
|
|
9398
|
+
* @returns Mock image generation result
|
|
9399
|
+
*/
|
|
9400
|
+
async generateImage(options) {
|
|
9401
|
+
const context = {
|
|
9402
|
+
model: options.model,
|
|
9403
|
+
provider: "mock",
|
|
9404
|
+
modelName: options.model,
|
|
9405
|
+
options: {
|
|
9406
|
+
model: options.model,
|
|
9407
|
+
messages: [{ role: "user", content: options.prompt }]
|
|
9408
|
+
},
|
|
9409
|
+
messages: [{ role: "user", content: options.prompt }]
|
|
9410
|
+
};
|
|
9411
|
+
const mockResponse = await this.mockManager.findMatch(context);
|
|
9412
|
+
if (!mockResponse?.images || mockResponse.images.length === 0) {
|
|
9413
|
+
throw new Error(
|
|
9414
|
+
`No mock registered for image generation with model "${options.model}". Use mockLLM().forModel("${options.model}").returnsImage(...).register() to add one.`
|
|
9415
|
+
);
|
|
9416
|
+
}
|
|
9417
|
+
return this.createImageResult(options, mockResponse);
|
|
9418
|
+
}
|
|
9419
|
+
/**
|
|
9420
|
+
* Transform mock response into ImageGenerationResult format.
|
|
9421
|
+
*
|
|
9422
|
+
* @param options - Original image generation options
|
|
9423
|
+
* @param mockResponse - Mock response containing image data
|
|
9424
|
+
* @returns ImageGenerationResult with mock data and zero cost
|
|
9425
|
+
*/
|
|
9426
|
+
createImageResult(options, mockResponse) {
|
|
9427
|
+
const images = mockResponse.images ?? [];
|
|
9428
|
+
return {
|
|
9429
|
+
images: images.map((img) => ({
|
|
9430
|
+
b64Json: img.data,
|
|
9431
|
+
revisedPrompt: img.revisedPrompt
|
|
9432
|
+
})),
|
|
9433
|
+
model: options.model,
|
|
9434
|
+
usage: {
|
|
9435
|
+
imagesGenerated: images.length,
|
|
9436
|
+
size: options.size ?? "1024x1024",
|
|
9437
|
+
quality: options.quality ?? "standard"
|
|
9438
|
+
},
|
|
9439
|
+
cost: 0
|
|
9440
|
+
// Mock cost is always 0
|
|
9441
|
+
};
|
|
9442
|
+
}
|
|
9443
|
+
// ==========================================================================
|
|
9444
|
+
// Speech Generation Support
|
|
9445
|
+
// ==========================================================================
|
|
9446
|
+
/**
|
|
9447
|
+
* Check if this adapter supports speech generation for a given model.
|
|
9448
|
+
* Returns true if there's a registered mock with audio for this model.
|
|
9449
|
+
*/
|
|
9450
|
+
supportsSpeechGeneration(_modelId) {
|
|
9451
|
+
return true;
|
|
9452
|
+
}
|
|
9453
|
+
/**
|
|
9454
|
+
* Generate mock speech based on registered mocks.
|
|
9455
|
+
*
|
|
9456
|
+
* @param options - Speech generation options
|
|
9457
|
+
* @returns Mock speech generation result
|
|
9458
|
+
*/
|
|
9459
|
+
async generateSpeech(options) {
|
|
9460
|
+
const context = {
|
|
9461
|
+
model: options.model,
|
|
9462
|
+
provider: "mock",
|
|
9463
|
+
modelName: options.model,
|
|
9464
|
+
options: {
|
|
9465
|
+
model: options.model,
|
|
9466
|
+
messages: [{ role: "user", content: options.input }]
|
|
9467
|
+
},
|
|
9468
|
+
messages: [{ role: "user", content: options.input }]
|
|
9469
|
+
};
|
|
9470
|
+
const mockResponse = await this.mockManager.findMatch(context);
|
|
9471
|
+
if (!mockResponse?.audio) {
|
|
9472
|
+
throw new Error(
|
|
9473
|
+
`No mock registered for speech generation with model "${options.model}". Use mockLLM().forModel("${options.model}").returnsAudio(...).register() to add one.`
|
|
9474
|
+
);
|
|
9475
|
+
}
|
|
9476
|
+
return this.createSpeechResult(options, mockResponse);
|
|
9477
|
+
}
|
|
9478
|
+
/**
|
|
9479
|
+
* Transform mock response into SpeechGenerationResult format.
|
|
9480
|
+
* Converts base64 audio data to ArrayBuffer.
|
|
9481
|
+
*
|
|
9482
|
+
* @param options - Original speech generation options
|
|
9483
|
+
* @param mockResponse - Mock response containing audio data
|
|
9484
|
+
* @returns SpeechGenerationResult with mock data and zero cost
|
|
9485
|
+
*/
|
|
9486
|
+
createSpeechResult(options, mockResponse) {
|
|
9487
|
+
const audio = mockResponse.audio;
|
|
9488
|
+
const binaryString = atob(audio.data);
|
|
9489
|
+
const bytes = new Uint8Array(binaryString.length);
|
|
9490
|
+
for (let i = 0; i < binaryString.length; i++) {
|
|
9491
|
+
bytes[i] = binaryString.charCodeAt(i);
|
|
7435
9492
|
}
|
|
9493
|
+
const format = this.mimeTypeToAudioFormat(audio.mimeType);
|
|
9494
|
+
return {
|
|
9495
|
+
audio: bytes.buffer,
|
|
9496
|
+
model: options.model,
|
|
9497
|
+
usage: {
|
|
9498
|
+
characterCount: options.input.length
|
|
9499
|
+
},
|
|
9500
|
+
cost: 0,
|
|
9501
|
+
// Mock cost is always 0
|
|
9502
|
+
format
|
|
9503
|
+
};
|
|
9504
|
+
}
|
|
9505
|
+
/**
|
|
9506
|
+
* Map MIME type to audio format for SpeechGenerationResult.
|
|
9507
|
+
* Defaults to "mp3" for unknown MIME types.
|
|
9508
|
+
*
|
|
9509
|
+
* @param mimeType - Audio MIME type string
|
|
9510
|
+
* @returns Audio format identifier
|
|
9511
|
+
*/
|
|
9512
|
+
mimeTypeToAudioFormat(mimeType) {
|
|
9513
|
+
const mapping = {
|
|
9514
|
+
"audio/mp3": "mp3",
|
|
9515
|
+
"audio/mpeg": "mp3",
|
|
9516
|
+
"audio/wav": "wav",
|
|
9517
|
+
"audio/webm": "opus",
|
|
9518
|
+
"audio/ogg": "opus"
|
|
9519
|
+
};
|
|
9520
|
+
return mapping[mimeType] ?? "mp3";
|
|
7436
9521
|
}
|
|
7437
9522
|
};
|
|
7438
9523
|
function createMockAdapter(options) {
|
|
@@ -7440,6 +9525,20 @@ function createMockAdapter(options) {
|
|
|
7440
9525
|
}
|
|
7441
9526
|
|
|
7442
9527
|
// src/testing/mock-builder.ts
|
|
9528
|
+
init_input_content();
|
|
9529
|
+
init_messages();
|
|
9530
|
+
function hasImageContent(content) {
|
|
9531
|
+
if (typeof content === "string") return false;
|
|
9532
|
+
return content.some((part) => isImagePart(part));
|
|
9533
|
+
}
|
|
9534
|
+
function hasAudioContent(content) {
|
|
9535
|
+
if (typeof content === "string") return false;
|
|
9536
|
+
return content.some((part) => isAudioPart(part));
|
|
9537
|
+
}
|
|
9538
|
+
function countImages(content) {
|
|
9539
|
+
if (typeof content === "string") return 0;
|
|
9540
|
+
return content.filter((part) => isImagePart(part)).length;
|
|
9541
|
+
}
|
|
7443
9542
|
var MockBuilder = class {
|
|
7444
9543
|
matchers = [];
|
|
7445
9544
|
response = {};
|
|
@@ -7502,9 +9601,9 @@ var MockBuilder = class {
|
|
|
7502
9601
|
* @example
|
|
7503
9602
|
* mockLLM().whenMessageContains('hello')
|
|
7504
9603
|
*/
|
|
7505
|
-
whenMessageContains(
|
|
9604
|
+
whenMessageContains(text3) {
|
|
7506
9605
|
this.matchers.push(
|
|
7507
|
-
(ctx) => ctx.messages.some((msg) => msg.content
|
|
9606
|
+
(ctx) => ctx.messages.some((msg) => extractText(msg.content).toLowerCase().includes(text3.toLowerCase()))
|
|
7508
9607
|
);
|
|
7509
9608
|
return this;
|
|
7510
9609
|
}
|
|
@@ -7514,10 +9613,11 @@ var MockBuilder = class {
|
|
|
7514
9613
|
* @example
|
|
7515
9614
|
* mockLLM().whenLastMessageContains('goodbye')
|
|
7516
9615
|
*/
|
|
7517
|
-
whenLastMessageContains(
|
|
9616
|
+
whenLastMessageContains(text3) {
|
|
7518
9617
|
this.matchers.push((ctx) => {
|
|
7519
9618
|
const lastMsg = ctx.messages[ctx.messages.length - 1];
|
|
7520
|
-
|
|
9619
|
+
if (!lastMsg) return false;
|
|
9620
|
+
return extractText(lastMsg.content).toLowerCase().includes(text3.toLowerCase());
|
|
7521
9621
|
});
|
|
7522
9622
|
return this;
|
|
7523
9623
|
}
|
|
@@ -7528,7 +9628,7 @@ var MockBuilder = class {
|
|
|
7528
9628
|
* mockLLM().whenMessageMatches(/calculate \d+/)
|
|
7529
9629
|
*/
|
|
7530
9630
|
whenMessageMatches(regex) {
|
|
7531
|
-
this.matchers.push((ctx) => ctx.messages.some((msg) => regex.test(msg.content
|
|
9631
|
+
this.matchers.push((ctx) => ctx.messages.some((msg) => regex.test(extractText(msg.content))));
|
|
7532
9632
|
return this;
|
|
7533
9633
|
}
|
|
7534
9634
|
/**
|
|
@@ -7537,10 +9637,10 @@ var MockBuilder = class {
|
|
|
7537
9637
|
* @example
|
|
7538
9638
|
* mockLLM().whenRoleContains('system', 'You are a helpful assistant')
|
|
7539
9639
|
*/
|
|
7540
|
-
whenRoleContains(role,
|
|
9640
|
+
whenRoleContains(role, text3) {
|
|
7541
9641
|
this.matchers.push(
|
|
7542
9642
|
(ctx) => ctx.messages.some(
|
|
7543
|
-
(msg) => msg.role === role && msg.content
|
|
9643
|
+
(msg) => msg.role === role && extractText(msg.content).toLowerCase().includes(text3.toLowerCase())
|
|
7544
9644
|
)
|
|
7545
9645
|
);
|
|
7546
9646
|
return this;
|
|
@@ -7568,6 +9668,43 @@ var MockBuilder = class {
|
|
|
7568
9668
|
this.matchers.push(matcher);
|
|
7569
9669
|
return this;
|
|
7570
9670
|
}
|
|
9671
|
+
// ==========================================================================
|
|
9672
|
+
// Multimodal Matchers
|
|
9673
|
+
// ==========================================================================
|
|
9674
|
+
/**
|
|
9675
|
+
* Match when any message contains an image.
|
|
9676
|
+
*
|
|
9677
|
+
* @example
|
|
9678
|
+
* mockLLM().whenMessageHasImage().returns("I see an image of a sunset.")
|
|
9679
|
+
*/
|
|
9680
|
+
whenMessageHasImage() {
|
|
9681
|
+
this.matchers.push((ctx) => ctx.messages.some((msg) => hasImageContent(msg.content)));
|
|
9682
|
+
return this;
|
|
9683
|
+
}
|
|
9684
|
+
/**
|
|
9685
|
+
* Match when any message contains audio.
|
|
9686
|
+
*
|
|
9687
|
+
* @example
|
|
9688
|
+
* mockLLM().whenMessageHasAudio().returns("I hear music playing.")
|
|
9689
|
+
*/
|
|
9690
|
+
whenMessageHasAudio() {
|
|
9691
|
+
this.matchers.push((ctx) => ctx.messages.some((msg) => hasAudioContent(msg.content)));
|
|
9692
|
+
return this;
|
|
9693
|
+
}
|
|
9694
|
+
/**
|
|
9695
|
+
* Match based on the number of images in the last message.
|
|
9696
|
+
*
|
|
9697
|
+
* @example
|
|
9698
|
+
* mockLLM().whenImageCount((n) => n >= 2).returns("Comparing multiple images...")
|
|
9699
|
+
*/
|
|
9700
|
+
whenImageCount(predicate) {
|
|
9701
|
+
this.matchers.push((ctx) => {
|
|
9702
|
+
const lastMsg = ctx.messages[ctx.messages.length - 1];
|
|
9703
|
+
if (!lastMsg) return false;
|
|
9704
|
+
return predicate(countImages(lastMsg.content));
|
|
9705
|
+
});
|
|
9706
|
+
return this;
|
|
9707
|
+
}
|
|
7571
9708
|
/**
|
|
7572
9709
|
* Set the text response to return.
|
|
7573
9710
|
* Can be a static string or a function that returns a string dynamically.
|
|
@@ -7577,17 +9714,17 @@ var MockBuilder = class {
|
|
|
7577
9714
|
* mockLLM().returns(() => `Response at ${Date.now()}`)
|
|
7578
9715
|
* mockLLM().returns((ctx) => `You said: ${ctx.messages[0]?.content}`)
|
|
7579
9716
|
*/
|
|
7580
|
-
returns(
|
|
7581
|
-
if (typeof
|
|
9717
|
+
returns(text3) {
|
|
9718
|
+
if (typeof text3 === "function") {
|
|
7582
9719
|
this.response = async (ctx) => {
|
|
7583
|
-
const resolvedText = await Promise.resolve().then(() =>
|
|
9720
|
+
const resolvedText = await Promise.resolve().then(() => text3(ctx));
|
|
7584
9721
|
return { text: resolvedText };
|
|
7585
9722
|
};
|
|
7586
9723
|
} else {
|
|
7587
9724
|
if (typeof this.response === "function") {
|
|
7588
9725
|
throw new Error("Cannot use returns() after withResponse() with a function");
|
|
7589
9726
|
}
|
|
7590
|
-
this.response.text =
|
|
9727
|
+
this.response.text = text3;
|
|
7591
9728
|
}
|
|
7592
9729
|
return this;
|
|
7593
9730
|
}
|
|
@@ -7624,6 +9761,112 @@ var MockBuilder = class {
|
|
|
7624
9761
|
this.response.gadgetCalls.push({ gadgetName, parameters });
|
|
7625
9762
|
return this;
|
|
7626
9763
|
}
|
|
9764
|
+
// ==========================================================================
|
|
9765
|
+
// Multimodal Response Helpers
|
|
9766
|
+
// ==========================================================================
|
|
9767
|
+
/**
|
|
9768
|
+
* Return a single image in the response.
|
|
9769
|
+
* Useful for mocking image generation endpoints.
|
|
9770
|
+
*
|
|
9771
|
+
* @param data - Image data (base64 string or Buffer)
|
|
9772
|
+
* @param mimeType - MIME type (auto-detected if Buffer provided without type)
|
|
9773
|
+
*
|
|
9774
|
+
* @example
|
|
9775
|
+
* mockLLM()
|
|
9776
|
+
* .forModel('dall-e-3')
|
|
9777
|
+
* .returnsImage(pngBuffer)
|
|
9778
|
+
* .register();
|
|
9779
|
+
*/
|
|
9780
|
+
returnsImage(data, mimeType) {
|
|
9781
|
+
if (typeof this.response === "function") {
|
|
9782
|
+
throw new Error("Cannot use returnsImage() after withResponse() with a function");
|
|
9783
|
+
}
|
|
9784
|
+
let imageData;
|
|
9785
|
+
let imageMime;
|
|
9786
|
+
if (typeof data === "string") {
|
|
9787
|
+
imageData = data;
|
|
9788
|
+
if (!mimeType) {
|
|
9789
|
+
throw new Error("MIME type is required when providing base64 string data");
|
|
9790
|
+
}
|
|
9791
|
+
imageMime = mimeType;
|
|
9792
|
+
} else {
|
|
9793
|
+
imageData = toBase64(data);
|
|
9794
|
+
const detected = mimeType ?? detectImageMimeType(data);
|
|
9795
|
+
if (!detected) {
|
|
9796
|
+
throw new Error(
|
|
9797
|
+
"Could not detect image MIME type. Please provide the mimeType parameter explicitly."
|
|
9798
|
+
);
|
|
9799
|
+
}
|
|
9800
|
+
imageMime = detected;
|
|
9801
|
+
}
|
|
9802
|
+
if (!this.response.images) {
|
|
9803
|
+
this.response.images = [];
|
|
9804
|
+
}
|
|
9805
|
+
this.response.images.push({ data: imageData, mimeType: imageMime });
|
|
9806
|
+
return this;
|
|
9807
|
+
}
|
|
9808
|
+
/**
|
|
9809
|
+
* Return multiple images in the response.
|
|
9810
|
+
*
|
|
9811
|
+
* @example
|
|
9812
|
+
* mockLLM()
|
|
9813
|
+
* .forModel('dall-e-3')
|
|
9814
|
+
* .returnsImages([
|
|
9815
|
+
* { data: pngBuffer1 },
|
|
9816
|
+
* { data: pngBuffer2 },
|
|
9817
|
+
* ])
|
|
9818
|
+
* .register();
|
|
9819
|
+
*/
|
|
9820
|
+
returnsImages(images) {
|
|
9821
|
+
for (const img of images) {
|
|
9822
|
+
this.returnsImage(img.data, img.mimeType);
|
|
9823
|
+
if (img.revisedPrompt && this.response && typeof this.response !== "function") {
|
|
9824
|
+
const lastImage = this.response.images?.[this.response.images.length - 1];
|
|
9825
|
+
if (lastImage) {
|
|
9826
|
+
lastImage.revisedPrompt = img.revisedPrompt;
|
|
9827
|
+
}
|
|
9828
|
+
}
|
|
9829
|
+
}
|
|
9830
|
+
return this;
|
|
9831
|
+
}
|
|
9832
|
+
/**
|
|
9833
|
+
* Return audio data in the response.
|
|
9834
|
+
* Useful for mocking speech synthesis endpoints.
|
|
9835
|
+
*
|
|
9836
|
+
* @param data - Audio data (base64 string or Buffer)
|
|
9837
|
+
* @param mimeType - MIME type (auto-detected if Buffer provided without type)
|
|
9838
|
+
*
|
|
9839
|
+
* @example
|
|
9840
|
+
* mockLLM()
|
|
9841
|
+
* .forModel('tts-1')
|
|
9842
|
+
* .returnsAudio(mp3Buffer)
|
|
9843
|
+
* .register();
|
|
9844
|
+
*/
|
|
9845
|
+
returnsAudio(data, mimeType) {
|
|
9846
|
+
if (typeof this.response === "function") {
|
|
9847
|
+
throw new Error("Cannot use returnsAudio() after withResponse() with a function");
|
|
9848
|
+
}
|
|
9849
|
+
let audioData;
|
|
9850
|
+
let audioMime;
|
|
9851
|
+
if (typeof data === "string") {
|
|
9852
|
+
audioData = data;
|
|
9853
|
+
if (!mimeType) {
|
|
9854
|
+
throw new Error("MIME type is required when providing base64 string data");
|
|
9855
|
+
}
|
|
9856
|
+
audioMime = mimeType;
|
|
9857
|
+
} else {
|
|
9858
|
+
audioData = toBase64(data);
|
|
9859
|
+
const detected = mimeType ?? detectAudioMimeType(data);
|
|
9860
|
+
if (!detected) {
|
|
9861
|
+
throw new Error(
|
|
9862
|
+
"Could not detect audio MIME type. Please provide the mimeType parameter explicitly."
|
|
9863
|
+
);
|
|
9864
|
+
}
|
|
9865
|
+
audioMime = detected;
|
|
9866
|
+
}
|
|
9867
|
+
this.response.audio = { data: audioData, mimeType: audioMime };
|
|
9868
|
+
return this;
|
|
9869
|
+
}
|
|
7627
9870
|
/**
|
|
7628
9871
|
* Set the complete mock response object.
|
|
7629
9872
|
* This allows full control over all response properties.
|
|
@@ -7954,23 +10197,23 @@ function createTestStream(chunks) {
|
|
|
7954
10197
|
}
|
|
7955
10198
|
}();
|
|
7956
10199
|
}
|
|
7957
|
-
function createTextStream(
|
|
10200
|
+
function createTextStream(text3, options) {
|
|
7958
10201
|
return async function* () {
|
|
7959
10202
|
if (options?.delayMs) {
|
|
7960
10203
|
await sleep2(options.delayMs);
|
|
7961
10204
|
}
|
|
7962
|
-
const chunkSize = options?.chunkSize ??
|
|
10205
|
+
const chunkSize = options?.chunkSize ?? text3.length;
|
|
7963
10206
|
const chunks = [];
|
|
7964
|
-
for (let i = 0; i <
|
|
7965
|
-
chunks.push(
|
|
10207
|
+
for (let i = 0; i < text3.length; i += chunkSize) {
|
|
10208
|
+
chunks.push(text3.slice(i, i + chunkSize));
|
|
7966
10209
|
}
|
|
7967
10210
|
for (let i = 0; i < chunks.length; i++) {
|
|
7968
10211
|
const isLast = i === chunks.length - 1;
|
|
7969
10212
|
const chunk = { text: chunks[i] };
|
|
7970
10213
|
if (isLast) {
|
|
7971
10214
|
chunk.finishReason = options?.finishReason ?? "stop";
|
|
7972
|
-
const inputTokens = Math.ceil(
|
|
7973
|
-
const outputTokens = Math.ceil(
|
|
10215
|
+
const inputTokens = Math.ceil(text3.length / 4);
|
|
10216
|
+
const outputTokens = Math.ceil(text3.length / 4);
|
|
7974
10217
|
chunk.usage = options?.usage ?? {
|
|
7975
10218
|
inputTokens,
|
|
7976
10219
|
outputTokens,
|
|
@@ -7992,11 +10235,11 @@ async function collectStream(stream2) {
|
|
|
7992
10235
|
return chunks;
|
|
7993
10236
|
}
|
|
7994
10237
|
async function collectStreamText(stream2) {
|
|
7995
|
-
let
|
|
10238
|
+
let text3 = "";
|
|
7996
10239
|
for await (const chunk of stream2) {
|
|
7997
|
-
|
|
10240
|
+
text3 += chunk.text ?? "";
|
|
7998
10241
|
}
|
|
7999
|
-
return
|
|
10242
|
+
return text3;
|
|
8000
10243
|
}
|
|
8001
10244
|
async function getStreamFinalChunk(stream2) {
|
|
8002
10245
|
let lastChunk;
|
|
@@ -8378,6 +10621,21 @@ function filterDefinedEnv(env) {
|
|
|
8378
10621
|
}
|
|
8379
10622
|
|
|
8380
10623
|
export {
|
|
10624
|
+
isTextPart,
|
|
10625
|
+
isImagePart,
|
|
10626
|
+
isAudioPart,
|
|
10627
|
+
text,
|
|
10628
|
+
imageFromBase64,
|
|
10629
|
+
imageFromUrl,
|
|
10630
|
+
detectImageMimeType,
|
|
10631
|
+
detectAudioMimeType,
|
|
10632
|
+
toBase64,
|
|
10633
|
+
imageFromBuffer,
|
|
10634
|
+
audioFromBase64,
|
|
10635
|
+
audioFromBuffer,
|
|
10636
|
+
isDataUrl,
|
|
10637
|
+
parseDataUrl,
|
|
10638
|
+
init_input_content,
|
|
8381
10639
|
MODEL_ALIASES,
|
|
8382
10640
|
resolveModel,
|
|
8383
10641
|
hasProviderPrefix,
|
|
@@ -8394,6 +10652,8 @@ export {
|
|
|
8394
10652
|
resolveRulesTemplate,
|
|
8395
10653
|
resolveHintTemplate,
|
|
8396
10654
|
init_prompt_config,
|
|
10655
|
+
normalizeContent,
|
|
10656
|
+
extractText,
|
|
8397
10657
|
LLMMessageBuilder,
|
|
8398
10658
|
init_messages,
|
|
8399
10659
|
BreakLoopException,
|
|
@@ -8450,11 +10710,11 @@ export {
|
|
|
8450
10710
|
init_discovery,
|
|
8451
10711
|
ModelRegistry,
|
|
8452
10712
|
init_model_registry,
|
|
8453
|
-
ModelIdentifierParser,
|
|
8454
|
-
init_options,
|
|
8455
10713
|
complete,
|
|
8456
10714
|
stream,
|
|
8457
10715
|
init_quick_methods,
|
|
10716
|
+
ModelIdentifierParser,
|
|
10717
|
+
init_options,
|
|
8458
10718
|
LLMist,
|
|
8459
10719
|
init_client,
|
|
8460
10720
|
AgentBuilder,
|
|
@@ -8501,4 +10761,4 @@ export {
|
|
|
8501
10761
|
MockPromptRecorder,
|
|
8502
10762
|
waitFor
|
|
8503
10763
|
};
|
|
8504
|
-
//# sourceMappingURL=chunk-
|
|
10764
|
+
//# sourceMappingURL=chunk-YHS2DYXP.js.map
|