llmist 2.3.0 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -0
- package/dist/{chunk-ZDNV7DDO.js → chunk-IHSZUAYN.js} +4 -2
- package/dist/chunk-IHSZUAYN.js.map +1 -0
- package/dist/{chunk-GANXNBIZ.js → chunk-YHS2DYXP.js} +2839 -579
- package/dist/chunk-YHS2DYXP.js.map +1 -0
- package/dist/cli.cjs +2717 -198
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +638 -47
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +2496 -220
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +109 -20
- package/dist/index.d.ts +109 -20
- package/dist/index.js +34 -2
- package/dist/{mock-stream-wRfUqXx4.d.cts → mock-stream-ga4KIiwX.d.cts} +1121 -12
- package/dist/{mock-stream-wRfUqXx4.d.ts → mock-stream-ga4KIiwX.d.ts} +1121 -12
- package/dist/testing/index.cjs +2771 -559
- package/dist/testing/index.cjs.map +1 -1
- package/dist/testing/index.d.cts +2 -2
- package/dist/testing/index.d.ts +2 -2
- package/dist/testing/index.js +1 -1
- package/package.json +1 -1
- package/dist/chunk-GANXNBIZ.js.map +0 -1
- package/dist/chunk-ZDNV7DDO.js.map +0 -1
package/dist/testing/index.cjs
CHANGED
|
@@ -124,348 +124,187 @@ var init_constants = __esm({
|
|
|
124
124
|
}
|
|
125
125
|
});
|
|
126
126
|
|
|
127
|
-
// src/core/
|
|
128
|
-
function
|
|
129
|
-
|
|
130
|
-
if (MODEL_ALIASES[normalized]) {
|
|
131
|
-
return true;
|
|
132
|
-
}
|
|
133
|
-
return KNOWN_MODEL_PATTERNS.some((pattern) => pattern.test(model));
|
|
127
|
+
// src/core/input-content.ts
|
|
128
|
+
function isImagePart(part) {
|
|
129
|
+
return part.type === "image";
|
|
134
130
|
}
|
|
135
|
-
function
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
131
|
+
function isAudioPart(part) {
|
|
132
|
+
return part.type === "audio";
|
|
133
|
+
}
|
|
134
|
+
function text(content) {
|
|
135
|
+
return { type: "text", text: content };
|
|
136
|
+
}
|
|
137
|
+
function imageFromUrl(url) {
|
|
138
|
+
return {
|
|
139
|
+
type: "image",
|
|
140
|
+
source: { type: "url", url }
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
function detectImageMimeType(data) {
|
|
144
|
+
const bytes = data instanceof Buffer ? data : Buffer.from(data);
|
|
145
|
+
for (const { bytes: magic, mimeType } of IMAGE_MAGIC_BYTES) {
|
|
146
|
+
if (bytes.length >= magic.length) {
|
|
147
|
+
let matches = true;
|
|
148
|
+
for (let i = 0; i < magic.length; i++) {
|
|
149
|
+
if (bytes[i] !== magic[i]) {
|
|
150
|
+
matches = false;
|
|
151
|
+
break;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
if (matches) {
|
|
155
|
+
if (mimeType === "image/webp") {
|
|
156
|
+
if (bytes.length >= 12) {
|
|
157
|
+
const webpMarker = bytes[8] === 87 && bytes[9] === 69 && bytes[10] === 66 && bytes[11] === 80;
|
|
158
|
+
if (!webpMarker) continue;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
return mimeType;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
146
164
|
}
|
|
147
|
-
|
|
148
|
-
|
|
165
|
+
return null;
|
|
166
|
+
}
|
|
167
|
+
function detectAudioMimeType(data) {
|
|
168
|
+
const bytes = data instanceof Buffer ? data : Buffer.from(data);
|
|
169
|
+
for (const { bytes: magic, mimeType } of AUDIO_MAGIC_BYTES) {
|
|
170
|
+
if (bytes.length >= magic.length) {
|
|
171
|
+
let matches = true;
|
|
172
|
+
for (let i = 0; i < magic.length; i++) {
|
|
173
|
+
if (bytes[i] !== magic[i]) {
|
|
174
|
+
matches = false;
|
|
175
|
+
break;
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
if (matches) {
|
|
179
|
+
if (mimeType === "audio/wav") {
|
|
180
|
+
if (bytes.length >= 12) {
|
|
181
|
+
const waveMarker = bytes[8] === 87 && bytes[9] === 65 && bytes[10] === 86 && bytes[11] === 69;
|
|
182
|
+
if (!waveMarker) continue;
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
return mimeType;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
149
188
|
}
|
|
150
|
-
|
|
151
|
-
|
|
189
|
+
return null;
|
|
190
|
+
}
|
|
191
|
+
function toBase64(data) {
|
|
192
|
+
if (typeof data === "string") {
|
|
193
|
+
return data;
|
|
152
194
|
}
|
|
153
|
-
|
|
154
|
-
|
|
195
|
+
return Buffer.from(data).toString("base64");
|
|
196
|
+
}
|
|
197
|
+
function audioFromBuffer(buffer, mediaType) {
|
|
198
|
+
const detectedType = mediaType ?? detectAudioMimeType(buffer);
|
|
199
|
+
if (!detectedType) {
|
|
200
|
+
throw new Error(
|
|
201
|
+
"Could not detect audio MIME type. Please provide the mediaType parameter explicitly."
|
|
202
|
+
);
|
|
155
203
|
}
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
if (!options.silent) {
|
|
163
|
-
console.warn(
|
|
164
|
-
`\u26A0\uFE0F Unknown model '${model}', falling back to 'openai:${model}'. This might be a typo. Did you mean: gpt4, gpt5, gpt5-nano, sonnet, haiku, flash? Use { strict: true } to error on unknown models, or { silent: true } to suppress this warning.`
|
|
165
|
-
);
|
|
204
|
+
return {
|
|
205
|
+
type: "audio",
|
|
206
|
+
source: {
|
|
207
|
+
type: "base64",
|
|
208
|
+
mediaType: detectedType,
|
|
209
|
+
data: toBase64(buffer)
|
|
166
210
|
}
|
|
167
|
-
}
|
|
168
|
-
return `openai:${model}`;
|
|
211
|
+
};
|
|
169
212
|
}
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
213
|
+
function isDataUrl(input) {
|
|
214
|
+
return input.startsWith("data:");
|
|
215
|
+
}
|
|
216
|
+
function parseDataUrl(url) {
|
|
217
|
+
const match = url.match(/^data:([^;]+);base64,(.+)$/);
|
|
218
|
+
if (!match) return null;
|
|
219
|
+
return { mimeType: match[1], data: match[2] };
|
|
220
|
+
}
|
|
221
|
+
var IMAGE_MAGIC_BYTES, AUDIO_MAGIC_BYTES;
|
|
222
|
+
var init_input_content = __esm({
|
|
223
|
+
"src/core/input-content.ts"() {
|
|
173
224
|
"use strict";
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
};
|
|
194
|
-
KNOWN_MODEL_PATTERNS = [
|
|
195
|
-
/^gpt-?\d/i,
|
|
196
|
-
// gpt-4, gpt-3.5, gpt4, etc.
|
|
197
|
-
/^claude-?\d/i,
|
|
198
|
-
// claude-3, claude-2, etc.
|
|
199
|
-
/^gemini-?(\d|pro|flash)/i,
|
|
200
|
-
// gemini-2.0, gemini-pro, gemini-flash, etc.
|
|
201
|
-
/^o\d/i
|
|
202
|
-
// OpenAI o1, o3, etc.
|
|
225
|
+
IMAGE_MAGIC_BYTES = [
|
|
226
|
+
{ bytes: [255, 216, 255], mimeType: "image/jpeg" },
|
|
227
|
+
{ bytes: [137, 80, 78, 71], mimeType: "image/png" },
|
|
228
|
+
{ bytes: [71, 73, 70, 56], mimeType: "image/gif" },
|
|
229
|
+
// WebP starts with RIFF....WEBP
|
|
230
|
+
{ bytes: [82, 73, 70, 70], mimeType: "image/webp" }
|
|
231
|
+
];
|
|
232
|
+
AUDIO_MAGIC_BYTES = [
|
|
233
|
+
// MP3 frame sync
|
|
234
|
+
{ bytes: [255, 251], mimeType: "audio/mp3" },
|
|
235
|
+
{ bytes: [255, 250], mimeType: "audio/mp3" },
|
|
236
|
+
// ID3 tag (MP3)
|
|
237
|
+
{ bytes: [73, 68, 51], mimeType: "audio/mp3" },
|
|
238
|
+
// OGG
|
|
239
|
+
{ bytes: [79, 103, 103, 83], mimeType: "audio/ogg" },
|
|
240
|
+
// WAV (RIFF)
|
|
241
|
+
{ bytes: [82, 73, 70, 70], mimeType: "audio/wav" },
|
|
242
|
+
// WebM
|
|
243
|
+
{ bytes: [26, 69, 223, 163], mimeType: "audio/webm" }
|
|
203
244
|
];
|
|
204
245
|
}
|
|
205
246
|
});
|
|
206
247
|
|
|
207
|
-
// src/
|
|
208
|
-
function
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
jsonSchema = z.toJSONSchema(schema, { target: "draft-7" });
|
|
212
|
-
} catch (error) {
|
|
213
|
-
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
214
|
-
throw new Error(
|
|
215
|
-
`Gadget "${gadgetName}" has a schema that cannot be serialized to JSON Schema.
|
|
216
|
-
This usually happens with unsupported patterns like:
|
|
217
|
-
- z.record() - use z.object({}).passthrough() instead
|
|
218
|
-
- Complex transforms or custom refinements
|
|
219
|
-
- Circular references
|
|
220
|
-
|
|
221
|
-
Original error: ${errorMessage}
|
|
222
|
-
|
|
223
|
-
Only use schema patterns that Zod v4's native toJSONSchema() supports.`
|
|
224
|
-
);
|
|
225
|
-
}
|
|
226
|
-
const issues = findUnknownTypes(jsonSchema);
|
|
227
|
-
if (issues.length > 0) {
|
|
228
|
-
const fieldList = issues.join(", ");
|
|
229
|
-
throw new Error(
|
|
230
|
-
`Gadget "${gadgetName}" uses z.unknown() which produces incomplete schemas.
|
|
231
|
-
Problematic fields: ${fieldList}
|
|
232
|
-
|
|
233
|
-
z.unknown() doesn't generate type information in JSON Schema, making it unclear
|
|
234
|
-
to the LLM what data structure to provide.
|
|
235
|
-
|
|
236
|
-
Suggestions:
|
|
237
|
-
- Use z.object({}).passthrough() for flexible objects
|
|
238
|
-
- Use z.record(z.string()) for key-value objects with string values
|
|
239
|
-
- Define specific structure if possible
|
|
240
|
-
|
|
241
|
-
Example fixes:
|
|
242
|
-
// \u274C Bad
|
|
243
|
-
content: z.unknown()
|
|
244
|
-
|
|
245
|
-
// \u2705 Good
|
|
246
|
-
content: z.object({}).passthrough() // for flexible objects
|
|
247
|
-
content: z.record(z.string()) // for key-value objects
|
|
248
|
-
content: z.array(z.string()) // for arrays of strings
|
|
249
|
-
`
|
|
250
|
-
);
|
|
251
|
-
}
|
|
252
|
-
}
|
|
253
|
-
function findUnknownTypes(schema, path = []) {
|
|
254
|
-
const issues = [];
|
|
255
|
-
if (!schema || typeof schema !== "object") {
|
|
256
|
-
return issues;
|
|
257
|
-
}
|
|
258
|
-
if (schema.definitions) {
|
|
259
|
-
for (const defSchema of Object.values(schema.definitions)) {
|
|
260
|
-
issues.push(...findUnknownTypes(defSchema, []));
|
|
261
|
-
}
|
|
262
|
-
}
|
|
263
|
-
if (schema.properties) {
|
|
264
|
-
for (const [propName, propSchema] of Object.entries(schema.properties)) {
|
|
265
|
-
const propPath = [...path, propName];
|
|
266
|
-
if (hasNoType(propSchema)) {
|
|
267
|
-
issues.push(propPath.join(".") || propName);
|
|
268
|
-
}
|
|
269
|
-
issues.push(...findUnknownTypes(propSchema, propPath));
|
|
270
|
-
}
|
|
271
|
-
}
|
|
272
|
-
if (schema.items) {
|
|
273
|
-
const itemPath = [...path, "[]"];
|
|
274
|
-
if (hasNoType(schema.items)) {
|
|
275
|
-
issues.push(itemPath.join("."));
|
|
276
|
-
}
|
|
277
|
-
issues.push(...findUnknownTypes(schema.items, itemPath));
|
|
278
|
-
}
|
|
279
|
-
if (schema.anyOf) {
|
|
280
|
-
schema.anyOf.forEach((subSchema, index) => {
|
|
281
|
-
issues.push(...findUnknownTypes(subSchema, [...path, `anyOf[${index}]`]));
|
|
282
|
-
});
|
|
283
|
-
}
|
|
284
|
-
if (schema.oneOf) {
|
|
285
|
-
schema.oneOf.forEach((subSchema, index) => {
|
|
286
|
-
issues.push(...findUnknownTypes(subSchema, [...path, `oneOf[${index}]`]));
|
|
287
|
-
});
|
|
288
|
-
}
|
|
289
|
-
if (schema.allOf) {
|
|
290
|
-
schema.allOf.forEach((subSchema, index) => {
|
|
291
|
-
issues.push(...findUnknownTypes(subSchema, [...path, `allOf[${index}]`]));
|
|
292
|
-
});
|
|
293
|
-
}
|
|
294
|
-
return issues;
|
|
248
|
+
// src/core/prompt-config.ts
|
|
249
|
+
function resolvePromptTemplate(template, defaultValue, context) {
|
|
250
|
+
const resolved = template ?? defaultValue;
|
|
251
|
+
return typeof resolved === "function" ? resolved(context) : resolved;
|
|
295
252
|
}
|
|
296
|
-
function
|
|
297
|
-
|
|
298
|
-
|
|
253
|
+
function resolveRulesTemplate(rules, context) {
|
|
254
|
+
const resolved = rules ?? DEFAULT_PROMPTS.rules;
|
|
255
|
+
if (Array.isArray(resolved)) {
|
|
256
|
+
return resolved;
|
|
299
257
|
}
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
if (hasType || hasRef || hasUnion) {
|
|
304
|
-
return false;
|
|
258
|
+
if (typeof resolved === "function") {
|
|
259
|
+
const result = resolved(context);
|
|
260
|
+
return Array.isArray(result) ? result : [result];
|
|
305
261
|
}
|
|
306
|
-
|
|
307
|
-
const metadataKeys = ["description", "title", "default", "examples"];
|
|
308
|
-
const hasOnlyMetadata = keys.every((key) => metadataKeys.includes(key));
|
|
309
|
-
return hasOnlyMetadata || keys.length === 0;
|
|
262
|
+
return [resolved];
|
|
310
263
|
}
|
|
311
|
-
var
|
|
312
|
-
var
|
|
313
|
-
"src/
|
|
264
|
+
var DEFAULT_PROMPTS;
|
|
265
|
+
var init_prompt_config = __esm({
|
|
266
|
+
"src/core/prompt-config.ts"() {
|
|
314
267
|
"use strict";
|
|
315
|
-
|
|
268
|
+
DEFAULT_PROMPTS = {
|
|
269
|
+
mainInstruction: [
|
|
270
|
+
"\u26A0\uFE0F CRITICAL: RESPOND ONLY WITH GADGET INVOCATIONS",
|
|
271
|
+
"DO NOT use function calling or tool calling",
|
|
272
|
+
"You must output the exact text markers shown below in plain text.",
|
|
273
|
+
"EACH MARKER MUST START WITH A NEWLINE."
|
|
274
|
+
].join("\n"),
|
|
275
|
+
criticalUsage: "INVOKE gadgets using the markers - do not describe what you want to do.",
|
|
276
|
+
formatDescription: (ctx) => `Parameters using ${ctx.argPrefix}name markers (value on next line(s), no escaping needed)`,
|
|
277
|
+
rules: () => [
|
|
278
|
+
"Output ONLY plain text with the exact markers - never use function/tool calling",
|
|
279
|
+
"You can invoke multiple gadgets in a single response",
|
|
280
|
+
"Gadgets without dependencies execute immediately (in parallel if multiple)",
|
|
281
|
+
"Use :invocation_id:dep1,dep2 syntax when a gadget needs results from prior gadgets",
|
|
282
|
+
"If any dependency fails, dependent gadgets are automatically skipped"
|
|
283
|
+
],
|
|
284
|
+
customExamples: null
|
|
285
|
+
};
|
|
316
286
|
}
|
|
317
287
|
});
|
|
318
288
|
|
|
319
|
-
// src/
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
* ```typescript
|
|
336
|
-
* // From array of classes
|
|
337
|
-
* const registry = GadgetRegistry.from([Calculator, Weather]);
|
|
338
|
-
*
|
|
339
|
-
* // From array of instances
|
|
340
|
-
* const registry = GadgetRegistry.from([new Calculator(), new Weather()]);
|
|
341
|
-
*
|
|
342
|
-
* // From object with custom names
|
|
343
|
-
* const registry = GadgetRegistry.from({
|
|
344
|
-
* calc: Calculator,
|
|
345
|
-
* weather: new Weather({ apiKey: "..." })
|
|
346
|
-
* });
|
|
347
|
-
* ```
|
|
348
|
-
*/
|
|
349
|
-
static from(gadgets) {
|
|
350
|
-
const registry = new _GadgetRegistry();
|
|
351
|
-
if (Array.isArray(gadgets)) {
|
|
352
|
-
registry.registerMany(gadgets);
|
|
353
|
-
} else {
|
|
354
|
-
for (const [name, gadget] of Object.entries(gadgets)) {
|
|
355
|
-
const instance = typeof gadget === "function" ? new gadget() : gadget;
|
|
356
|
-
registry.register(name, instance);
|
|
357
|
-
}
|
|
358
|
-
}
|
|
359
|
-
return registry;
|
|
360
|
-
}
|
|
361
|
-
/**
|
|
362
|
-
* Registers multiple gadgets at once from an array.
|
|
363
|
-
*
|
|
364
|
-
* @param gadgets - Array of gadget instances or classes
|
|
365
|
-
* @returns This registry for chaining
|
|
366
|
-
*
|
|
367
|
-
* @example
|
|
368
|
-
* ```typescript
|
|
369
|
-
* registry.registerMany([Calculator, Weather, Email]);
|
|
370
|
-
* registry.registerMany([new Calculator(), new Weather()]);
|
|
371
|
-
* ```
|
|
372
|
-
*/
|
|
373
|
-
registerMany(gadgets) {
|
|
374
|
-
for (const gadget of gadgets) {
|
|
375
|
-
const instance = typeof gadget === "function" ? new gadget() : gadget;
|
|
376
|
-
this.registerByClass(instance);
|
|
377
|
-
}
|
|
378
|
-
return this;
|
|
379
|
-
}
|
|
380
|
-
// Register a gadget by name
|
|
381
|
-
register(name, gadget) {
|
|
382
|
-
const normalizedName = name.toLowerCase();
|
|
383
|
-
if (this.gadgets.has(normalizedName)) {
|
|
384
|
-
throw new Error(`Gadget '${name}' is already registered`);
|
|
385
|
-
}
|
|
386
|
-
if (gadget.parameterSchema) {
|
|
387
|
-
validateGadgetSchema(gadget.parameterSchema, name);
|
|
388
|
-
}
|
|
389
|
-
this.gadgets.set(normalizedName, gadget);
|
|
390
|
-
}
|
|
391
|
-
// Register a gadget using its name property or class name
|
|
392
|
-
registerByClass(gadget) {
|
|
393
|
-
const name = gadget.name ?? gadget.constructor.name;
|
|
394
|
-
this.register(name, gadget);
|
|
395
|
-
}
|
|
396
|
-
// Get gadget by name (case-insensitive)
|
|
397
|
-
get(name) {
|
|
398
|
-
return this.gadgets.get(name.toLowerCase());
|
|
399
|
-
}
|
|
400
|
-
// Check if gadget exists (case-insensitive)
|
|
401
|
-
has(name) {
|
|
402
|
-
return this.gadgets.has(name.toLowerCase());
|
|
403
|
-
}
|
|
404
|
-
// Get all registered gadget names
|
|
405
|
-
getNames() {
|
|
406
|
-
return Array.from(this.gadgets.keys());
|
|
407
|
-
}
|
|
408
|
-
// Get all gadgets for instruction generation
|
|
409
|
-
getAll() {
|
|
410
|
-
return Array.from(this.gadgets.values());
|
|
411
|
-
}
|
|
412
|
-
// Unregister gadget (useful for testing, case-insensitive)
|
|
413
|
-
unregister(name) {
|
|
414
|
-
return this.gadgets.delete(name.toLowerCase());
|
|
415
|
-
}
|
|
416
|
-
// Clear all gadgets (useful for testing)
|
|
417
|
-
clear() {
|
|
418
|
-
this.gadgets.clear();
|
|
419
|
-
}
|
|
420
|
-
};
|
|
421
|
-
}
|
|
422
|
-
});
|
|
423
|
-
|
|
424
|
-
// src/core/prompt-config.ts
|
|
425
|
-
function resolvePromptTemplate(template, defaultValue, context) {
|
|
426
|
-
const resolved = template ?? defaultValue;
|
|
427
|
-
return typeof resolved === "function" ? resolved(context) : resolved;
|
|
428
|
-
}
|
|
429
|
-
function resolveRulesTemplate(rules, context) {
|
|
430
|
-
const resolved = rules ?? DEFAULT_PROMPTS.rules;
|
|
431
|
-
if (Array.isArray(resolved)) {
|
|
432
|
-
return resolved;
|
|
433
|
-
}
|
|
434
|
-
if (typeof resolved === "function") {
|
|
435
|
-
const result = resolved(context);
|
|
436
|
-
return Array.isArray(result) ? result : [result];
|
|
437
|
-
}
|
|
438
|
-
return [resolved];
|
|
439
|
-
}
|
|
440
|
-
var DEFAULT_PROMPTS;
|
|
441
|
-
var init_prompt_config = __esm({
|
|
442
|
-
"src/core/prompt-config.ts"() {
|
|
443
|
-
"use strict";
|
|
444
|
-
DEFAULT_PROMPTS = {
|
|
445
|
-
mainInstruction: [
|
|
446
|
-
"\u26A0\uFE0F CRITICAL: RESPOND ONLY WITH GADGET INVOCATIONS",
|
|
447
|
-
"DO NOT use function calling or tool calling",
|
|
448
|
-
"You must output the exact text markers shown below in plain text.",
|
|
449
|
-
"EACH MARKER MUST START WITH A NEWLINE."
|
|
450
|
-
].join("\n"),
|
|
451
|
-
criticalUsage: "INVOKE gadgets using the markers - do not describe what you want to do.",
|
|
452
|
-
formatDescription: (ctx) => `Parameters using ${ctx.argPrefix}name markers (value on next line(s), no escaping needed)`,
|
|
453
|
-
rules: () => [
|
|
454
|
-
"Output ONLY plain text with the exact markers - never use function/tool calling",
|
|
455
|
-
"You can invoke multiple gadgets in a single response",
|
|
456
|
-
"For dependent gadgets, invoke the first one and wait for the result"
|
|
457
|
-
],
|
|
458
|
-
customExamples: null
|
|
459
|
-
};
|
|
460
|
-
}
|
|
461
|
-
});
|
|
462
|
-
|
|
463
|
-
// src/core/messages.ts
|
|
464
|
-
var LLMMessageBuilder;
|
|
465
|
-
var init_messages = __esm({
|
|
466
|
-
"src/core/messages.ts"() {
|
|
289
|
+
// src/core/messages.ts
|
|
290
|
+
function normalizeContent(content) {
|
|
291
|
+
if (typeof content === "string") {
|
|
292
|
+
return [{ type: "text", text: content }];
|
|
293
|
+
}
|
|
294
|
+
return content;
|
|
295
|
+
}
|
|
296
|
+
function extractText(content) {
|
|
297
|
+
if (typeof content === "string") {
|
|
298
|
+
return content;
|
|
299
|
+
}
|
|
300
|
+
return content.filter((part) => part.type === "text").map((part) => part.text).join("");
|
|
301
|
+
}
|
|
302
|
+
var LLMMessageBuilder;
|
|
303
|
+
var init_messages = __esm({
|
|
304
|
+
"src/core/messages.ts"() {
|
|
467
305
|
"use strict";
|
|
468
306
|
init_constants();
|
|
307
|
+
init_input_content();
|
|
469
308
|
init_prompt_config();
|
|
470
309
|
LLMMessageBuilder = class {
|
|
471
310
|
messages = [];
|
|
@@ -567,6 +406,10 @@ CRITICAL: ${criticalUsage}
|
|
|
567
406
|
parts.push(`
|
|
568
407
|
1. Start marker: ${this.startPrefix}gadget_name`);
|
|
569
408
|
parts.push(`
|
|
409
|
+
With ID: ${this.startPrefix}gadget_name:my_id`);
|
|
410
|
+
parts.push(`
|
|
411
|
+
With dependencies: ${this.startPrefix}gadget_name:my_id:dep1,dep2`);
|
|
412
|
+
parts.push(`
|
|
570
413
|
2. ${formatDescription}`);
|
|
571
414
|
parts.push(`
|
|
572
415
|
3. End marker: ${this.endPrefix}`);
|
|
@@ -616,6 +459,25 @@ ${this.endPrefix}`;
|
|
|
616
459
|
EXAMPLE (Multiple Gadgets):
|
|
617
460
|
|
|
618
461
|
${multipleExample}`);
|
|
462
|
+
const dependencyExample = `${this.startPrefix}fetch_data:fetch_1
|
|
463
|
+
${this.argPrefix}url
|
|
464
|
+
https://api.example.com/users
|
|
465
|
+
${this.endPrefix}
|
|
466
|
+
${this.startPrefix}fetch_data:fetch_2
|
|
467
|
+
${this.argPrefix}url
|
|
468
|
+
https://api.example.com/orders
|
|
469
|
+
${this.endPrefix}
|
|
470
|
+
${this.startPrefix}merge_data:merge_1:fetch_1,fetch_2
|
|
471
|
+
${this.argPrefix}format
|
|
472
|
+
json
|
|
473
|
+
${this.endPrefix}`;
|
|
474
|
+
parts.push(`
|
|
475
|
+
|
|
476
|
+
EXAMPLE (With Dependencies):
|
|
477
|
+
merge_1 waits for fetch_1 AND fetch_2 to complete.
|
|
478
|
+
If either fails, merge_1 is automatically skipped.
|
|
479
|
+
|
|
480
|
+
${dependencyExample}`);
|
|
619
481
|
parts.push(`
|
|
620
482
|
|
|
621
483
|
BLOCK FORMAT SYNTAX:
|
|
@@ -634,89 +496,503 @@ class Calculator {
|
|
|
634
496
|
}
|
|
635
497
|
}
|
|
636
498
|
|
|
637
|
-
BLOCK FORMAT RULES:
|
|
638
|
-
- Each parameter starts with ${this.argPrefix}parameterName on its own line
|
|
639
|
-
- The value starts on the NEXT line after the marker
|
|
640
|
-
- Value ends when the next ${this.argPrefix} or ${this.endPrefix} appears
|
|
641
|
-
- NO escaping needed - write values exactly as they should appear
|
|
642
|
-
- Perfect for code, JSON, markdown, or any content with special characters
|
|
643
|
-
|
|
644
|
-
NESTED OBJECTS (use / separator):
|
|
645
|
-
${this.argPrefix}config/timeout
|
|
646
|
-
30
|
|
647
|
-
${this.argPrefix}config/retries
|
|
648
|
-
3
|
|
649
|
-
Produces: { "config": { "timeout": "30", "retries": "3" } }
|
|
650
|
-
|
|
651
|
-
ARRAYS (use numeric indices):
|
|
652
|
-
${this.argPrefix}items/0
|
|
653
|
-
first
|
|
654
|
-
${this.argPrefix}items/1
|
|
655
|
-
second
|
|
656
|
-
Produces: { "items": ["first", "second"] }`);
|
|
657
|
-
return parts.join("");
|
|
658
|
-
}
|
|
659
|
-
buildRulesSection(context) {
|
|
660
|
-
const parts = [];
|
|
661
|
-
parts.push("\n\nRULES:");
|
|
662
|
-
const rules = resolveRulesTemplate(this.promptConfig.rules, context);
|
|
663
|
-
for (const rule of rules) {
|
|
664
|
-
parts.push(`
|
|
665
|
-
- ${rule}`);
|
|
499
|
+
BLOCK FORMAT RULES:
|
|
500
|
+
- Each parameter starts with ${this.argPrefix}parameterName on its own line
|
|
501
|
+
- The value starts on the NEXT line after the marker
|
|
502
|
+
- Value ends when the next ${this.argPrefix} or ${this.endPrefix} appears
|
|
503
|
+
- NO escaping needed - write values exactly as they should appear
|
|
504
|
+
- Perfect for code, JSON, markdown, or any content with special characters
|
|
505
|
+
|
|
506
|
+
NESTED OBJECTS (use / separator):
|
|
507
|
+
${this.argPrefix}config/timeout
|
|
508
|
+
30
|
|
509
|
+
${this.argPrefix}config/retries
|
|
510
|
+
3
|
|
511
|
+
Produces: { "config": { "timeout": "30", "retries": "3" } }
|
|
512
|
+
|
|
513
|
+
ARRAYS (use numeric indices):
|
|
514
|
+
${this.argPrefix}items/0
|
|
515
|
+
first
|
|
516
|
+
${this.argPrefix}items/1
|
|
517
|
+
second
|
|
518
|
+
Produces: { "items": ["first", "second"] }`);
|
|
519
|
+
return parts.join("");
|
|
520
|
+
}
|
|
521
|
+
buildRulesSection(context) {
|
|
522
|
+
const parts = [];
|
|
523
|
+
parts.push("\n\nRULES:");
|
|
524
|
+
const rules = resolveRulesTemplate(this.promptConfig.rules, context);
|
|
525
|
+
for (const rule of rules) {
|
|
526
|
+
parts.push(`
|
|
527
|
+
- ${rule}`);
|
|
528
|
+
}
|
|
529
|
+
return parts.join("");
|
|
530
|
+
}
|
|
531
|
+
/**
|
|
532
|
+
* Add a user message.
|
|
533
|
+
* Content can be a string (text only) or an array of content parts (multimodal).
|
|
534
|
+
*
|
|
535
|
+
* @param content - Message content
|
|
536
|
+
* @param metadata - Optional metadata
|
|
537
|
+
*
|
|
538
|
+
* @example
|
|
539
|
+
* ```typescript
|
|
540
|
+
* // Text only
|
|
541
|
+
* builder.addUser("Hello!");
|
|
542
|
+
*
|
|
543
|
+
* // Multimodal
|
|
544
|
+
* builder.addUser([
|
|
545
|
+
* text("What's in this image?"),
|
|
546
|
+
* imageFromBuffer(imageData),
|
|
547
|
+
* ]);
|
|
548
|
+
* ```
|
|
549
|
+
*/
|
|
550
|
+
addUser(content, metadata) {
|
|
551
|
+
this.messages.push({ role: "user", content, metadata });
|
|
552
|
+
return this;
|
|
553
|
+
}
|
|
554
|
+
addAssistant(content, metadata) {
|
|
555
|
+
this.messages.push({ role: "assistant", content, metadata });
|
|
556
|
+
return this;
|
|
557
|
+
}
|
|
558
|
+
/**
|
|
559
|
+
* Add a user message with an image attachment.
|
|
560
|
+
*
|
|
561
|
+
* @param textContent - Text prompt
|
|
562
|
+
* @param imageData - Image data (Buffer, Uint8Array, or base64 string)
|
|
563
|
+
* @param mimeType - Optional MIME type (auto-detected if not provided)
|
|
564
|
+
*
|
|
565
|
+
* @example
|
|
566
|
+
* ```typescript
|
|
567
|
+
* builder.addUserWithImage(
|
|
568
|
+
* "What's in this image?",
|
|
569
|
+
* await fs.readFile("photo.jpg"),
|
|
570
|
+
* "image/jpeg" // Optional - auto-detected
|
|
571
|
+
* );
|
|
572
|
+
* ```
|
|
573
|
+
*/
|
|
574
|
+
addUserWithImage(textContent, imageData, mimeType) {
|
|
575
|
+
const imageBuffer = typeof imageData === "string" ? Buffer.from(imageData, "base64") : imageData;
|
|
576
|
+
const detectedMime = mimeType ?? detectImageMimeType(imageBuffer);
|
|
577
|
+
if (!detectedMime) {
|
|
578
|
+
throw new Error(
|
|
579
|
+
"Could not detect image MIME type. Please provide the mimeType parameter explicitly."
|
|
580
|
+
);
|
|
581
|
+
}
|
|
582
|
+
const content = [
|
|
583
|
+
text(textContent),
|
|
584
|
+
{
|
|
585
|
+
type: "image",
|
|
586
|
+
source: {
|
|
587
|
+
type: "base64",
|
|
588
|
+
mediaType: detectedMime,
|
|
589
|
+
data: toBase64(imageBuffer)
|
|
590
|
+
}
|
|
591
|
+
}
|
|
592
|
+
];
|
|
593
|
+
this.messages.push({ role: "user", content });
|
|
594
|
+
return this;
|
|
595
|
+
}
|
|
596
|
+
/**
|
|
597
|
+
* Add a user message with an image URL (OpenAI only).
|
|
598
|
+
*
|
|
599
|
+
* @param textContent - Text prompt
|
|
600
|
+
* @param imageUrl - URL to the image
|
|
601
|
+
*
|
|
602
|
+
* @example
|
|
603
|
+
* ```typescript
|
|
604
|
+
* builder.addUserWithImageUrl(
|
|
605
|
+
* "What's in this image?",
|
|
606
|
+
* "https://example.com/image.jpg"
|
|
607
|
+
* );
|
|
608
|
+
* ```
|
|
609
|
+
*/
|
|
610
|
+
addUserWithImageUrl(textContent, imageUrl) {
|
|
611
|
+
const content = [text(textContent), imageFromUrl(imageUrl)];
|
|
612
|
+
this.messages.push({ role: "user", content });
|
|
613
|
+
return this;
|
|
614
|
+
}
|
|
615
|
+
/**
|
|
616
|
+
* Add a user message with an audio attachment (Gemini only).
|
|
617
|
+
*
|
|
618
|
+
* @param textContent - Text prompt
|
|
619
|
+
* @param audioData - Audio data (Buffer, Uint8Array, or base64 string)
|
|
620
|
+
* @param mimeType - Optional MIME type (auto-detected if not provided)
|
|
621
|
+
*
|
|
622
|
+
* @example
|
|
623
|
+
* ```typescript
|
|
624
|
+
* builder.addUserWithAudio(
|
|
625
|
+
* "Transcribe this audio",
|
|
626
|
+
* await fs.readFile("recording.mp3"),
|
|
627
|
+
* "audio/mp3" // Optional - auto-detected
|
|
628
|
+
* );
|
|
629
|
+
* ```
|
|
630
|
+
*/
|
|
631
|
+
addUserWithAudio(textContent, audioData, mimeType) {
|
|
632
|
+
const audioBuffer = typeof audioData === "string" ? Buffer.from(audioData, "base64") : audioData;
|
|
633
|
+
const content = [text(textContent), audioFromBuffer(audioBuffer, mimeType)];
|
|
634
|
+
this.messages.push({ role: "user", content });
|
|
635
|
+
return this;
|
|
636
|
+
}
|
|
637
|
+
/**
|
|
638
|
+
* Add a user message with multiple content parts.
|
|
639
|
+
* Provides full flexibility for complex multimodal messages.
|
|
640
|
+
*
|
|
641
|
+
* @param parts - Array of content parts
|
|
642
|
+
*
|
|
643
|
+
* @example
|
|
644
|
+
* ```typescript
|
|
645
|
+
* builder.addUserMultimodal([
|
|
646
|
+
* text("Compare these images:"),
|
|
647
|
+
* imageFromBuffer(image1),
|
|
648
|
+
* imageFromBuffer(image2),
|
|
649
|
+
* ]);
|
|
650
|
+
* ```
|
|
651
|
+
*/
|
|
652
|
+
addUserMultimodal(parts) {
|
|
653
|
+
this.messages.push({ role: "user", content: parts });
|
|
654
|
+
return this;
|
|
655
|
+
}
|
|
656
|
+
addGadgetCall(gadget, parameters, result) {
|
|
657
|
+
const paramStr = this.formatBlockParameters(parameters, "");
|
|
658
|
+
this.messages.push({
|
|
659
|
+
role: "assistant",
|
|
660
|
+
content: `${this.startPrefix}${gadget}
|
|
661
|
+
${paramStr}
|
|
662
|
+
${this.endPrefix}`
|
|
663
|
+
});
|
|
664
|
+
this.messages.push({
|
|
665
|
+
role: "user",
|
|
666
|
+
content: `Result: ${result}`
|
|
667
|
+
});
|
|
668
|
+
return this;
|
|
669
|
+
}
|
|
670
|
+
/**
|
|
671
|
+
* Format parameters as Block format with JSON Pointer paths.
|
|
672
|
+
* Uses the configured argPrefix for consistency with system prompt.
|
|
673
|
+
*/
|
|
674
|
+
formatBlockParameters(params, prefix) {
|
|
675
|
+
const lines = [];
|
|
676
|
+
for (const [key, value] of Object.entries(params)) {
|
|
677
|
+
const fullPath = prefix ? `${prefix}/${key}` : key;
|
|
678
|
+
if (Array.isArray(value)) {
|
|
679
|
+
value.forEach((item, index) => {
|
|
680
|
+
const itemPath = `${fullPath}/${index}`;
|
|
681
|
+
if (typeof item === "object" && item !== null) {
|
|
682
|
+
lines.push(this.formatBlockParameters(item, itemPath));
|
|
683
|
+
} else {
|
|
684
|
+
lines.push(`${this.argPrefix}${itemPath}`);
|
|
685
|
+
lines.push(String(item));
|
|
686
|
+
}
|
|
687
|
+
});
|
|
688
|
+
} else if (typeof value === "object" && value !== null) {
|
|
689
|
+
lines.push(this.formatBlockParameters(value, fullPath));
|
|
690
|
+
} else {
|
|
691
|
+
lines.push(`${this.argPrefix}${fullPath}`);
|
|
692
|
+
lines.push(String(value));
|
|
693
|
+
}
|
|
694
|
+
}
|
|
695
|
+
return lines.join("\n");
|
|
696
|
+
}
|
|
697
|
+
build() {
|
|
698
|
+
return [...this.messages];
|
|
699
|
+
}
|
|
700
|
+
};
|
|
701
|
+
}
|
|
702
|
+
});
|
|
703
|
+
|
|
704
|
+
// src/core/model-shortcuts.ts
|
|
705
|
+
function isKnownModelPattern(model) {
|
|
706
|
+
const normalized = model.toLowerCase();
|
|
707
|
+
if (MODEL_ALIASES[normalized]) {
|
|
708
|
+
return true;
|
|
709
|
+
}
|
|
710
|
+
return KNOWN_MODEL_PATTERNS.some((pattern) => pattern.test(model));
|
|
711
|
+
}
|
|
712
|
+
function resolveModel(model, options = {}) {
|
|
713
|
+
if (model.includes(":")) {
|
|
714
|
+
return model;
|
|
715
|
+
}
|
|
716
|
+
const normalized = model.toLowerCase();
|
|
717
|
+
if (MODEL_ALIASES[normalized]) {
|
|
718
|
+
return MODEL_ALIASES[normalized];
|
|
719
|
+
}
|
|
720
|
+
const modelLower = model.toLowerCase();
|
|
721
|
+
if (modelLower.startsWith("gpt")) {
|
|
722
|
+
return `openai:${model}`;
|
|
723
|
+
}
|
|
724
|
+
if (modelLower.startsWith("claude")) {
|
|
725
|
+
return `anthropic:${model}`;
|
|
726
|
+
}
|
|
727
|
+
if (modelLower.startsWith("gemini")) {
|
|
728
|
+
return `gemini:${model}`;
|
|
729
|
+
}
|
|
730
|
+
if (modelLower.match(/^o\d/)) {
|
|
731
|
+
return `openai:${model}`;
|
|
732
|
+
}
|
|
733
|
+
if (!isKnownModelPattern(model)) {
|
|
734
|
+
if (options.strict) {
|
|
735
|
+
throw new Error(
|
|
736
|
+
`Unknown model '${model}'. Did you mean one of: gpt4, sonnet, haiku, flash? Use explicit provider prefix like 'openai:${model}' to bypass this check.`
|
|
737
|
+
);
|
|
738
|
+
}
|
|
739
|
+
if (!options.silent) {
|
|
740
|
+
console.warn(
|
|
741
|
+
`\u26A0\uFE0F Unknown model '${model}', falling back to 'openai:${model}'. This might be a typo. Did you mean: gpt4, gpt5, gpt5-nano, sonnet, haiku, flash? Use { strict: true } to error on unknown models, or { silent: true } to suppress this warning.`
|
|
742
|
+
);
|
|
743
|
+
}
|
|
744
|
+
}
|
|
745
|
+
return `openai:${model}`;
|
|
746
|
+
}
|
|
747
|
+
var MODEL_ALIASES, KNOWN_MODEL_PATTERNS;
|
|
748
|
+
var init_model_shortcuts = __esm({
|
|
749
|
+
"src/core/model-shortcuts.ts"() {
|
|
750
|
+
"use strict";
|
|
751
|
+
MODEL_ALIASES = {
|
|
752
|
+
// OpenAI aliases
|
|
753
|
+
gpt4: "openai:gpt-4o",
|
|
754
|
+
gpt4o: "openai:gpt-4o",
|
|
755
|
+
gpt5: "openai:gpt-5",
|
|
756
|
+
"gpt5-mini": "openai:gpt-5-mini",
|
|
757
|
+
"gpt5-nano": "openai:gpt-5-nano",
|
|
758
|
+
// Anthropic aliases
|
|
759
|
+
sonnet: "anthropic:claude-sonnet-4-5",
|
|
760
|
+
"claude-sonnet": "anthropic:claude-sonnet-4-5",
|
|
761
|
+
haiku: "anthropic:claude-haiku-4-5",
|
|
762
|
+
"claude-haiku": "anthropic:claude-haiku-4-5",
|
|
763
|
+
opus: "anthropic:claude-opus-4-5",
|
|
764
|
+
"claude-opus": "anthropic:claude-opus-4-5",
|
|
765
|
+
// Gemini aliases
|
|
766
|
+
flash: "gemini:gemini-2.0-flash",
|
|
767
|
+
"gemini-flash": "gemini:gemini-2.0-flash",
|
|
768
|
+
"gemini-pro": "gemini:gemini-2.5-pro",
|
|
769
|
+
pro: "gemini:gemini-2.5-pro"
|
|
770
|
+
};
|
|
771
|
+
KNOWN_MODEL_PATTERNS = [
|
|
772
|
+
/^gpt-?\d/i,
|
|
773
|
+
// gpt-4, gpt-3.5, gpt4, etc.
|
|
774
|
+
/^claude-?\d/i,
|
|
775
|
+
// claude-3, claude-2, etc.
|
|
776
|
+
/^gemini-?(\d|pro|flash)/i,
|
|
777
|
+
// gemini-2.0, gemini-pro, gemini-flash, etc.
|
|
778
|
+
/^o\d/i
|
|
779
|
+
// OpenAI o1, o3, etc.
|
|
780
|
+
];
|
|
781
|
+
}
|
|
782
|
+
});
|
|
783
|
+
|
|
784
|
+
// src/gadgets/schema-validator.ts
|
|
785
|
+
function validateGadgetSchema(schema, gadgetName) {
|
|
786
|
+
let jsonSchema;
|
|
787
|
+
try {
|
|
788
|
+
jsonSchema = z.toJSONSchema(schema, { target: "draft-7" });
|
|
789
|
+
} catch (error) {
|
|
790
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
791
|
+
throw new Error(
|
|
792
|
+
`Gadget "${gadgetName}" has a schema that cannot be serialized to JSON Schema.
|
|
793
|
+
This usually happens with unsupported patterns like:
|
|
794
|
+
- z.record() - use z.object({}).passthrough() instead
|
|
795
|
+
- Complex transforms or custom refinements
|
|
796
|
+
- Circular references
|
|
797
|
+
|
|
798
|
+
Original error: ${errorMessage}
|
|
799
|
+
|
|
800
|
+
Only use schema patterns that Zod v4's native toJSONSchema() supports.`
|
|
801
|
+
);
|
|
802
|
+
}
|
|
803
|
+
const issues = findUnknownTypes(jsonSchema);
|
|
804
|
+
if (issues.length > 0) {
|
|
805
|
+
const fieldList = issues.join(", ");
|
|
806
|
+
throw new Error(
|
|
807
|
+
`Gadget "${gadgetName}" uses z.unknown() which produces incomplete schemas.
|
|
808
|
+
Problematic fields: ${fieldList}
|
|
809
|
+
|
|
810
|
+
z.unknown() doesn't generate type information in JSON Schema, making it unclear
|
|
811
|
+
to the LLM what data structure to provide.
|
|
812
|
+
|
|
813
|
+
Suggestions:
|
|
814
|
+
- Use z.object({}).passthrough() for flexible objects
|
|
815
|
+
- Use z.record(z.string()) for key-value objects with string values
|
|
816
|
+
- Define specific structure if possible
|
|
817
|
+
|
|
818
|
+
Example fixes:
|
|
819
|
+
// \u274C Bad
|
|
820
|
+
content: z.unknown()
|
|
821
|
+
|
|
822
|
+
// \u2705 Good
|
|
823
|
+
content: z.object({}).passthrough() // for flexible objects
|
|
824
|
+
content: z.record(z.string()) // for key-value objects
|
|
825
|
+
content: z.array(z.string()) // for arrays of strings
|
|
826
|
+
`
|
|
827
|
+
);
|
|
828
|
+
}
|
|
829
|
+
}
|
|
830
|
+
function findUnknownTypes(schema, path = []) {
|
|
831
|
+
const issues = [];
|
|
832
|
+
if (!schema || typeof schema !== "object") {
|
|
833
|
+
return issues;
|
|
834
|
+
}
|
|
835
|
+
if (schema.definitions) {
|
|
836
|
+
for (const defSchema of Object.values(schema.definitions)) {
|
|
837
|
+
issues.push(...findUnknownTypes(defSchema, []));
|
|
838
|
+
}
|
|
839
|
+
}
|
|
840
|
+
if (schema.properties) {
|
|
841
|
+
for (const [propName, propSchema] of Object.entries(schema.properties)) {
|
|
842
|
+
const propPath = [...path, propName];
|
|
843
|
+
if (hasNoType(propSchema)) {
|
|
844
|
+
issues.push(propPath.join(".") || propName);
|
|
845
|
+
}
|
|
846
|
+
issues.push(...findUnknownTypes(propSchema, propPath));
|
|
847
|
+
}
|
|
848
|
+
}
|
|
849
|
+
if (schema.items) {
|
|
850
|
+
const itemPath = [...path, "[]"];
|
|
851
|
+
if (hasNoType(schema.items)) {
|
|
852
|
+
issues.push(itemPath.join("."));
|
|
853
|
+
}
|
|
854
|
+
issues.push(...findUnknownTypes(schema.items, itemPath));
|
|
855
|
+
}
|
|
856
|
+
if (schema.anyOf) {
|
|
857
|
+
schema.anyOf.forEach((subSchema, index) => {
|
|
858
|
+
issues.push(...findUnknownTypes(subSchema, [...path, `anyOf[${index}]`]));
|
|
859
|
+
});
|
|
860
|
+
}
|
|
861
|
+
if (schema.oneOf) {
|
|
862
|
+
schema.oneOf.forEach((subSchema, index) => {
|
|
863
|
+
issues.push(...findUnknownTypes(subSchema, [...path, `oneOf[${index}]`]));
|
|
864
|
+
});
|
|
865
|
+
}
|
|
866
|
+
if (schema.allOf) {
|
|
867
|
+
schema.allOf.forEach((subSchema, index) => {
|
|
868
|
+
issues.push(...findUnknownTypes(subSchema, [...path, `allOf[${index}]`]));
|
|
869
|
+
});
|
|
870
|
+
}
|
|
871
|
+
return issues;
|
|
872
|
+
}
|
|
873
|
+
function hasNoType(prop) {
|
|
874
|
+
if (!prop || typeof prop !== "object") {
|
|
875
|
+
return false;
|
|
876
|
+
}
|
|
877
|
+
const hasType = prop.type !== void 0;
|
|
878
|
+
const hasRef = prop.$ref !== void 0;
|
|
879
|
+
const hasUnion = prop.anyOf !== void 0 || prop.oneOf !== void 0 || prop.allOf !== void 0;
|
|
880
|
+
if (hasType || hasRef || hasUnion) {
|
|
881
|
+
return false;
|
|
882
|
+
}
|
|
883
|
+
const keys = Object.keys(prop);
|
|
884
|
+
const metadataKeys = ["description", "title", "default", "examples"];
|
|
885
|
+
const hasOnlyMetadata = keys.every((key) => metadataKeys.includes(key));
|
|
886
|
+
return hasOnlyMetadata || keys.length === 0;
|
|
887
|
+
}
|
|
888
|
+
var z;
|
|
889
|
+
var init_schema_validator = __esm({
|
|
890
|
+
"src/gadgets/schema-validator.ts"() {
|
|
891
|
+
"use strict";
|
|
892
|
+
z = __toESM(require("zod"), 1);
|
|
893
|
+
}
|
|
894
|
+
});
|
|
895
|
+
|
|
896
|
+
// src/gadgets/registry.ts
|
|
897
|
+
var GadgetRegistry;
|
|
898
|
+
var init_registry = __esm({
|
|
899
|
+
"src/gadgets/registry.ts"() {
|
|
900
|
+
"use strict";
|
|
901
|
+
init_schema_validator();
|
|
902
|
+
GadgetRegistry = class _GadgetRegistry {
|
|
903
|
+
gadgets = /* @__PURE__ */ new Map();
|
|
904
|
+
/**
|
|
905
|
+
* Creates a registry from an array of gadget classes or instances,
|
|
906
|
+
* or an object mapping names to gadgets.
|
|
907
|
+
*
|
|
908
|
+
* @param gadgets - Array of gadgets/classes or object with custom names
|
|
909
|
+
* @returns New GadgetRegistry with all gadgets registered
|
|
910
|
+
*
|
|
911
|
+
* @example
|
|
912
|
+
* ```typescript
|
|
913
|
+
* // From array of classes
|
|
914
|
+
* const registry = GadgetRegistry.from([Calculator, Weather]);
|
|
915
|
+
*
|
|
916
|
+
* // From array of instances
|
|
917
|
+
* const registry = GadgetRegistry.from([new Calculator(), new Weather()]);
|
|
918
|
+
*
|
|
919
|
+
* // From object with custom names
|
|
920
|
+
* const registry = GadgetRegistry.from({
|
|
921
|
+
* calc: Calculator,
|
|
922
|
+
* weather: new Weather({ apiKey: "..." })
|
|
923
|
+
* });
|
|
924
|
+
* ```
|
|
925
|
+
*/
|
|
926
|
+
static from(gadgets) {
|
|
927
|
+
const registry = new _GadgetRegistry();
|
|
928
|
+
if (Array.isArray(gadgets)) {
|
|
929
|
+
registry.registerMany(gadgets);
|
|
930
|
+
} else {
|
|
931
|
+
for (const [name, gadget] of Object.entries(gadgets)) {
|
|
932
|
+
const instance = typeof gadget === "function" ? new gadget() : gadget;
|
|
933
|
+
registry.register(name, instance);
|
|
934
|
+
}
|
|
666
935
|
}
|
|
667
|
-
return
|
|
936
|
+
return registry;
|
|
668
937
|
}
|
|
669
|
-
|
|
670
|
-
|
|
938
|
+
/**
|
|
939
|
+
* Registers multiple gadgets at once from an array.
|
|
940
|
+
*
|
|
941
|
+
* @param gadgets - Array of gadget instances or classes
|
|
942
|
+
* @returns This registry for chaining
|
|
943
|
+
*
|
|
944
|
+
* @example
|
|
945
|
+
* ```typescript
|
|
946
|
+
* registry.registerMany([Calculator, Weather, Email]);
|
|
947
|
+
* registry.registerMany([new Calculator(), new Weather()]);
|
|
948
|
+
* ```
|
|
949
|
+
*/
|
|
950
|
+
registerMany(gadgets) {
|
|
951
|
+
for (const gadget of gadgets) {
|
|
952
|
+
const instance = typeof gadget === "function" ? new gadget() : gadget;
|
|
953
|
+
this.registerByClass(instance);
|
|
954
|
+
}
|
|
671
955
|
return this;
|
|
672
956
|
}
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
957
|
+
// Register a gadget by name
|
|
958
|
+
register(name, gadget) {
|
|
959
|
+
const normalizedName = name.toLowerCase();
|
|
960
|
+
if (this.gadgets.has(normalizedName)) {
|
|
961
|
+
throw new Error(`Gadget '${name}' is already registered`);
|
|
962
|
+
}
|
|
963
|
+
if (gadget.parameterSchema) {
|
|
964
|
+
validateGadgetSchema(gadget.parameterSchema, name);
|
|
965
|
+
}
|
|
966
|
+
this.gadgets.set(normalizedName, gadget);
|
|
676
967
|
}
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
content: `${this.startPrefix}${gadget}
|
|
682
|
-
${paramStr}
|
|
683
|
-
${this.endPrefix}`
|
|
684
|
-
});
|
|
685
|
-
this.messages.push({
|
|
686
|
-
role: "user",
|
|
687
|
-
content: `Result: ${result}`
|
|
688
|
-
});
|
|
689
|
-
return this;
|
|
968
|
+
// Register a gadget using its name property or class name
|
|
969
|
+
registerByClass(gadget) {
|
|
970
|
+
const name = gadget.name ?? gadget.constructor.name;
|
|
971
|
+
this.register(name, gadget);
|
|
690
972
|
}
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
*/
|
|
695
|
-
formatBlockParameters(params, prefix) {
|
|
696
|
-
const lines = [];
|
|
697
|
-
for (const [key, value] of Object.entries(params)) {
|
|
698
|
-
const fullPath = prefix ? `${prefix}/${key}` : key;
|
|
699
|
-
if (Array.isArray(value)) {
|
|
700
|
-
value.forEach((item, index) => {
|
|
701
|
-
const itemPath = `${fullPath}/${index}`;
|
|
702
|
-
if (typeof item === "object" && item !== null) {
|
|
703
|
-
lines.push(this.formatBlockParameters(item, itemPath));
|
|
704
|
-
} else {
|
|
705
|
-
lines.push(`${this.argPrefix}${itemPath}`);
|
|
706
|
-
lines.push(String(item));
|
|
707
|
-
}
|
|
708
|
-
});
|
|
709
|
-
} else if (typeof value === "object" && value !== null) {
|
|
710
|
-
lines.push(this.formatBlockParameters(value, fullPath));
|
|
711
|
-
} else {
|
|
712
|
-
lines.push(`${this.argPrefix}${fullPath}`);
|
|
713
|
-
lines.push(String(value));
|
|
714
|
-
}
|
|
715
|
-
}
|
|
716
|
-
return lines.join("\n");
|
|
973
|
+
// Get gadget by name (case-insensitive)
|
|
974
|
+
get(name) {
|
|
975
|
+
return this.gadgets.get(name.toLowerCase());
|
|
717
976
|
}
|
|
718
|
-
|
|
719
|
-
|
|
977
|
+
// Check if gadget exists (case-insensitive)
|
|
978
|
+
has(name) {
|
|
979
|
+
return this.gadgets.has(name.toLowerCase());
|
|
980
|
+
}
|
|
981
|
+
// Get all registered gadget names
|
|
982
|
+
getNames() {
|
|
983
|
+
return Array.from(this.gadgets.keys());
|
|
984
|
+
}
|
|
985
|
+
// Get all gadgets for instruction generation
|
|
986
|
+
getAll() {
|
|
987
|
+
return Array.from(this.gadgets.values());
|
|
988
|
+
}
|
|
989
|
+
// Unregister gadget (useful for testing, case-insensitive)
|
|
990
|
+
unregister(name) {
|
|
991
|
+
return this.gadgets.delete(name.toLowerCase());
|
|
992
|
+
}
|
|
993
|
+
// Clear all gadgets (useful for testing)
|
|
994
|
+
clear() {
|
|
995
|
+
this.gadgets.clear();
|
|
720
996
|
}
|
|
721
997
|
};
|
|
722
998
|
}
|
|
@@ -1913,7 +2189,7 @@ var init_conversation_manager = __esm({
|
|
|
1913
2189
|
if (msg.role === "user") {
|
|
1914
2190
|
this.historyBuilder.addUser(msg.content);
|
|
1915
2191
|
} else if (msg.role === "assistant") {
|
|
1916
|
-
this.historyBuilder.addAssistant(msg.content);
|
|
2192
|
+
this.historyBuilder.addAssistant(extractText(msg.content));
|
|
1917
2193
|
}
|
|
1918
2194
|
}
|
|
1919
2195
|
}
|
|
@@ -1934,8 +2210,10 @@ async function runWithHandlers(agentGenerator, handlers) {
|
|
|
1934
2210
|
if (handlers.onGadgetCall) {
|
|
1935
2211
|
await handlers.onGadgetCall({
|
|
1936
2212
|
gadgetName: event.call.gadgetName,
|
|
2213
|
+
invocationId: event.call.invocationId,
|
|
1937
2214
|
parameters: event.call.parameters,
|
|
1938
|
-
parametersRaw: event.call.parametersRaw
|
|
2215
|
+
parametersRaw: event.call.parametersRaw,
|
|
2216
|
+
dependencies: event.call.dependencies
|
|
1939
2217
|
});
|
|
1940
2218
|
}
|
|
1941
2219
|
break;
|
|
@@ -2497,7 +2775,27 @@ var init_cost_reporting_client = __esm({
|
|
|
2497
2775
|
constructor(client, reportCost) {
|
|
2498
2776
|
this.client = client;
|
|
2499
2777
|
this.reportCost = reportCost;
|
|
2778
|
+
this.image = {
|
|
2779
|
+
generate: async (options) => {
|
|
2780
|
+
const result = await this.client.image.generate(options);
|
|
2781
|
+
if (result.cost !== void 0 && result.cost > 0) {
|
|
2782
|
+
this.reportCost(result.cost);
|
|
2783
|
+
}
|
|
2784
|
+
return result;
|
|
2785
|
+
}
|
|
2786
|
+
};
|
|
2787
|
+
this.speech = {
|
|
2788
|
+
generate: async (options) => {
|
|
2789
|
+
const result = await this.client.speech.generate(options);
|
|
2790
|
+
if (result.cost !== void 0 && result.cost > 0) {
|
|
2791
|
+
this.reportCost(result.cost);
|
|
2792
|
+
}
|
|
2793
|
+
return result;
|
|
2794
|
+
}
|
|
2795
|
+
};
|
|
2500
2796
|
}
|
|
2797
|
+
image;
|
|
2798
|
+
speech;
|
|
2501
2799
|
/**
|
|
2502
2800
|
* Access to model registry for cost estimation.
|
|
2503
2801
|
*/
|
|
@@ -2762,15 +3060,37 @@ var init_parser = __esm({
|
|
|
2762
3060
|
return segment.trim().length > 0 ? segment : void 0;
|
|
2763
3061
|
}
|
|
2764
3062
|
/**
|
|
2765
|
-
* Parse gadget name
|
|
2766
|
-
*
|
|
3063
|
+
* Parse gadget name with optional invocation ID and dependencies.
|
|
3064
|
+
*
|
|
3065
|
+
* Supported formats:
|
|
3066
|
+
* - `GadgetName` - Auto-generate ID, no dependencies
|
|
3067
|
+
* - `GadgetName:my_id` - Explicit ID, no dependencies
|
|
3068
|
+
* - `GadgetName:my_id:dep1,dep2` - Explicit ID with dependencies
|
|
3069
|
+
*
|
|
3070
|
+
* Dependencies must be comma-separated invocation IDs.
|
|
2767
3071
|
*/
|
|
2768
3072
|
parseGadgetName(gadgetName) {
|
|
2769
|
-
|
|
2770
|
-
|
|
2771
|
-
return {
|
|
3073
|
+
const parts = gadgetName.split(":");
|
|
3074
|
+
if (parts.length === 1) {
|
|
3075
|
+
return {
|
|
3076
|
+
actualName: parts[0],
|
|
3077
|
+
invocationId: `gadget_${++globalInvocationCounter}`,
|
|
3078
|
+
dependencies: []
|
|
3079
|
+
};
|
|
3080
|
+
} else if (parts.length === 2) {
|
|
3081
|
+
return {
|
|
3082
|
+
actualName: parts[0],
|
|
3083
|
+
invocationId: parts[1].trim(),
|
|
3084
|
+
dependencies: []
|
|
3085
|
+
};
|
|
3086
|
+
} else {
|
|
3087
|
+
const deps = parts[2].split(",").map((d) => d.trim()).filter((d) => d.length > 0);
|
|
3088
|
+
return {
|
|
3089
|
+
actualName: parts[0],
|
|
3090
|
+
invocationId: parts[1].trim(),
|
|
3091
|
+
dependencies: deps
|
|
3092
|
+
};
|
|
2772
3093
|
}
|
|
2773
|
-
return { actualName: gadgetName, invocationId: `gadget_${++globalInvocationCounter}` };
|
|
2774
3094
|
}
|
|
2775
3095
|
/**
|
|
2776
3096
|
* Extract the error message from a parse error.
|
|
@@ -2806,39 +3126,20 @@ var init_parser = __esm({
|
|
|
2806
3126
|
const metadataEndIndex = this.buffer.indexOf("\n", metadataStartIndex);
|
|
2807
3127
|
if (metadataEndIndex === -1) break;
|
|
2808
3128
|
const gadgetName = this.buffer.substring(metadataStartIndex, metadataEndIndex).trim();
|
|
2809
|
-
const { actualName: actualGadgetName, invocationId } = this.parseGadgetName(gadgetName);
|
|
3129
|
+
const { actualName: actualGadgetName, invocationId, dependencies } = this.parseGadgetName(gadgetName);
|
|
2810
3130
|
const contentStartIndex = metadataEndIndex + 1;
|
|
2811
3131
|
let partEndIndex;
|
|
2812
3132
|
let endMarkerLength = 0;
|
|
2813
|
-
|
|
2814
|
-
|
|
2815
|
-
|
|
2816
|
-
|
|
2817
|
-
endMarkerLength =
|
|
3133
|
+
const nextStartPos = this.buffer.indexOf(this.startPrefix, contentStartIndex);
|
|
3134
|
+
const endPos = this.buffer.indexOf(this.endPrefix, contentStartIndex);
|
|
3135
|
+
if (nextStartPos !== -1 && (endPos === -1 || nextStartPos < endPos)) {
|
|
3136
|
+
partEndIndex = nextStartPos;
|
|
3137
|
+
endMarkerLength = 0;
|
|
3138
|
+
} else if (endPos !== -1) {
|
|
3139
|
+
partEndIndex = endPos;
|
|
3140
|
+
endMarkerLength = this.endPrefix.length;
|
|
2818
3141
|
} else {
|
|
2819
|
-
|
|
2820
|
-
let validEndPos = -1;
|
|
2821
|
-
let searchPos = contentStartIndex;
|
|
2822
|
-
while (true) {
|
|
2823
|
-
const endPos = this.buffer.indexOf(this.endPrefix, searchPos);
|
|
2824
|
-
if (endPos === -1) break;
|
|
2825
|
-
const afterEnd = this.buffer.substring(endPos + this.endPrefix.length);
|
|
2826
|
-
if (afterEnd.startsWith("\n") || afterEnd.startsWith("\r") || afterEnd.startsWith(this.startPrefix) || afterEnd.length === 0) {
|
|
2827
|
-
validEndPos = endPos;
|
|
2828
|
-
break;
|
|
2829
|
-
} else {
|
|
2830
|
-
searchPos = endPos + this.endPrefix.length;
|
|
2831
|
-
}
|
|
2832
|
-
}
|
|
2833
|
-
if (nextStartPos !== -1 && (validEndPos === -1 || nextStartPos < validEndPos)) {
|
|
2834
|
-
partEndIndex = nextStartPos;
|
|
2835
|
-
endMarkerLength = 0;
|
|
2836
|
-
} else if (validEndPos !== -1) {
|
|
2837
|
-
partEndIndex = validEndPos;
|
|
2838
|
-
endMarkerLength = this.endPrefix.length;
|
|
2839
|
-
} else {
|
|
2840
|
-
break;
|
|
2841
|
-
}
|
|
3142
|
+
break;
|
|
2842
3143
|
}
|
|
2843
3144
|
const parametersRaw = this.buffer.substring(contentStartIndex, partEndIndex).trim();
|
|
2844
3145
|
const { parameters, parseError } = this.parseParameters(parametersRaw);
|
|
@@ -2849,7 +3150,8 @@ var init_parser = __esm({
|
|
|
2849
3150
|
invocationId,
|
|
2850
3151
|
parametersRaw,
|
|
2851
3152
|
parameters,
|
|
2852
|
-
parseError
|
|
3153
|
+
parseError,
|
|
3154
|
+
dependencies
|
|
2853
3155
|
}
|
|
2854
3156
|
};
|
|
2855
3157
|
startIndex = partEndIndex + endMarkerLength;
|
|
@@ -2872,7 +3174,7 @@ var init_parser = __esm({
|
|
|
2872
3174
|
const metadataEndIndex = this.buffer.indexOf("\n", metadataStartIndex);
|
|
2873
3175
|
if (metadataEndIndex !== -1) {
|
|
2874
3176
|
const gadgetName = this.buffer.substring(metadataStartIndex, metadataEndIndex).trim();
|
|
2875
|
-
const { actualName: actualGadgetName, invocationId } = this.parseGadgetName(gadgetName);
|
|
3177
|
+
const { actualName: actualGadgetName, invocationId, dependencies } = this.parseGadgetName(gadgetName);
|
|
2876
3178
|
const contentStartIndex = metadataEndIndex + 1;
|
|
2877
3179
|
const parametersRaw = this.buffer.substring(contentStartIndex).trim();
|
|
2878
3180
|
const { parameters, parseError } = this.parseParameters(parametersRaw);
|
|
@@ -2883,7 +3185,8 @@ var init_parser = __esm({
|
|
|
2883
3185
|
invocationId,
|
|
2884
3186
|
parametersRaw,
|
|
2885
3187
|
parameters,
|
|
2886
|
-
parseError
|
|
3188
|
+
parseError,
|
|
3189
|
+
dependencies
|
|
2887
3190
|
}
|
|
2888
3191
|
};
|
|
2889
3192
|
return;
|
|
@@ -3253,6 +3556,13 @@ var init_stream_processor = __esm({
|
|
|
3253
3556
|
accumulatedText = "";
|
|
3254
3557
|
shouldStopExecution = false;
|
|
3255
3558
|
observerFailureCount = 0;
|
|
3559
|
+
// Dependency tracking for gadget execution DAG
|
|
3560
|
+
/** Gadgets waiting for their dependencies to complete */
|
|
3561
|
+
pendingGadgets = /* @__PURE__ */ new Map();
|
|
3562
|
+
/** Completed gadget results, keyed by invocation ID */
|
|
3563
|
+
completedResults = /* @__PURE__ */ new Map();
|
|
3564
|
+
/** Invocation IDs of gadgets that have failed (error or skipped due to dependency) */
|
|
3565
|
+
failedInvocations = /* @__PURE__ */ new Set();
|
|
3256
3566
|
constructor(options) {
|
|
3257
3567
|
this.iteration = options.iteration;
|
|
3258
3568
|
this.registry = options.registry;
|
|
@@ -3353,6 +3663,16 @@ var init_stream_processor = __esm({
|
|
|
3353
3663
|
}
|
|
3354
3664
|
}
|
|
3355
3665
|
}
|
|
3666
|
+
const finalPendingEvents = await this.processPendingGadgets();
|
|
3667
|
+
outputs.push(...finalPendingEvents);
|
|
3668
|
+
if (finalPendingEvents.some((e) => e.type === "gadget_result")) {
|
|
3669
|
+
didExecuteGadgets = true;
|
|
3670
|
+
}
|
|
3671
|
+
for (const evt of finalPendingEvents) {
|
|
3672
|
+
if (evt.type === "gadget_result" && evt.result.breaksLoop) {
|
|
3673
|
+
shouldBreakLoop = true;
|
|
3674
|
+
}
|
|
3675
|
+
}
|
|
3356
3676
|
}
|
|
3357
3677
|
let finalMessage = this.accumulatedText;
|
|
3358
3678
|
if (this.hooks.interceptors?.interceptAssistantMessage) {
|
|
@@ -3404,7 +3724,11 @@ var init_stream_processor = __esm({
|
|
|
3404
3724
|
return [{ type: "text", content }];
|
|
3405
3725
|
}
|
|
3406
3726
|
/**
|
|
3407
|
-
* Process a gadget call through the full lifecycle.
|
|
3727
|
+
* Process a gadget call through the full lifecycle, handling dependencies.
|
|
3728
|
+
*
|
|
3729
|
+
* Gadgets without dependencies (or with all dependencies satisfied) execute immediately.
|
|
3730
|
+
* Gadgets with unsatisfied dependencies are queued for later execution.
|
|
3731
|
+
* After each execution, pending gadgets are checked to see if they can now run.
|
|
3408
3732
|
*/
|
|
3409
3733
|
async processGadgetCall(call) {
|
|
3410
3734
|
if (this.shouldStopExecution) {
|
|
@@ -3415,6 +3739,53 @@ var init_stream_processor = __esm({
|
|
|
3415
3739
|
}
|
|
3416
3740
|
const events = [];
|
|
3417
3741
|
events.push({ type: "gadget_call", call });
|
|
3742
|
+
if (call.dependencies.length > 0) {
|
|
3743
|
+
if (call.dependencies.includes(call.invocationId)) {
|
|
3744
|
+
this.logger.warn("Gadget has self-referential dependency (depends on itself)", {
|
|
3745
|
+
gadgetName: call.gadgetName,
|
|
3746
|
+
invocationId: call.invocationId
|
|
3747
|
+
});
|
|
3748
|
+
this.failedInvocations.add(call.invocationId);
|
|
3749
|
+
const skipEvent = {
|
|
3750
|
+
type: "gadget_skipped",
|
|
3751
|
+
gadgetName: call.gadgetName,
|
|
3752
|
+
invocationId: call.invocationId,
|
|
3753
|
+
parameters: call.parameters ?? {},
|
|
3754
|
+
failedDependency: call.invocationId,
|
|
3755
|
+
failedDependencyError: `Gadget "${call.invocationId}" cannot depend on itself (self-referential dependency)`
|
|
3756
|
+
};
|
|
3757
|
+
events.push(skipEvent);
|
|
3758
|
+
return events;
|
|
3759
|
+
}
|
|
3760
|
+
const failedDep = call.dependencies.find((dep) => this.failedInvocations.has(dep));
|
|
3761
|
+
if (failedDep) {
|
|
3762
|
+
const skipEvents = await this.handleFailedDependency(call, failedDep);
|
|
3763
|
+
events.push(...skipEvents);
|
|
3764
|
+
return events;
|
|
3765
|
+
}
|
|
3766
|
+
const unsatisfied = call.dependencies.filter((dep) => !this.completedResults.has(dep));
|
|
3767
|
+
if (unsatisfied.length > 0) {
|
|
3768
|
+
this.logger.debug("Queueing gadget for later - waiting on dependencies", {
|
|
3769
|
+
gadgetName: call.gadgetName,
|
|
3770
|
+
invocationId: call.invocationId,
|
|
3771
|
+
waitingOn: unsatisfied
|
|
3772
|
+
});
|
|
3773
|
+
this.pendingGadgets.set(call.invocationId, call);
|
|
3774
|
+
return events;
|
|
3775
|
+
}
|
|
3776
|
+
}
|
|
3777
|
+
const executeEvents = await this.executeGadgetWithHooks(call);
|
|
3778
|
+
events.push(...executeEvents);
|
|
3779
|
+
const triggeredEvents = await this.processPendingGadgets();
|
|
3780
|
+
events.push(...triggeredEvents);
|
|
3781
|
+
return events;
|
|
3782
|
+
}
|
|
3783
|
+
/**
|
|
3784
|
+
* Execute a gadget through the full hook lifecycle.
|
|
3785
|
+
* This is the core execution logic, extracted from processGadgetCall.
|
|
3786
|
+
*/
|
|
3787
|
+
async executeGadgetWithHooks(call) {
|
|
3788
|
+
const events = [];
|
|
3418
3789
|
if (call.parseError) {
|
|
3419
3790
|
this.logger.warn("Gadget has parse error", {
|
|
3420
3791
|
gadgetName: call.gadgetName,
|
|
@@ -3545,6 +3916,10 @@ var init_stream_processor = __esm({
|
|
|
3545
3916
|
});
|
|
3546
3917
|
}
|
|
3547
3918
|
await this.runObserversInParallel(completeObservers);
|
|
3919
|
+
this.completedResults.set(result.invocationId, result);
|
|
3920
|
+
if (result.error) {
|
|
3921
|
+
this.failedInvocations.add(result.invocationId);
|
|
3922
|
+
}
|
|
3548
3923
|
events.push({ type: "gadget_result", result });
|
|
3549
3924
|
if (result.error) {
|
|
3550
3925
|
const errorType = this.determineErrorType(call, result);
|
|
@@ -3560,6 +3935,162 @@ var init_stream_processor = __esm({
|
|
|
3560
3935
|
}
|
|
3561
3936
|
return events;
|
|
3562
3937
|
}
|
|
3938
|
+
/**
|
|
3939
|
+
* Handle a gadget that cannot execute because a dependency failed.
|
|
3940
|
+
* Calls the onDependencySkipped controller to allow customization.
|
|
3941
|
+
*/
|
|
3942
|
+
async handleFailedDependency(call, failedDep) {
|
|
3943
|
+
const events = [];
|
|
3944
|
+
const depResult = this.completedResults.get(failedDep);
|
|
3945
|
+
const depError = depResult?.error ?? "Dependency failed";
|
|
3946
|
+
let action = { action: "skip" };
|
|
3947
|
+
if (this.hooks.controllers?.onDependencySkipped) {
|
|
3948
|
+
const context = {
|
|
3949
|
+
iteration: this.iteration,
|
|
3950
|
+
gadgetName: call.gadgetName,
|
|
3951
|
+
invocationId: call.invocationId,
|
|
3952
|
+
parameters: call.parameters ?? {},
|
|
3953
|
+
failedDependency: failedDep,
|
|
3954
|
+
failedDependencyError: depError,
|
|
3955
|
+
logger: this.logger
|
|
3956
|
+
};
|
|
3957
|
+
action = await this.hooks.controllers.onDependencySkipped(context);
|
|
3958
|
+
}
|
|
3959
|
+
if (action.action === "skip") {
|
|
3960
|
+
this.failedInvocations.add(call.invocationId);
|
|
3961
|
+
const skipEvent = {
|
|
3962
|
+
type: "gadget_skipped",
|
|
3963
|
+
gadgetName: call.gadgetName,
|
|
3964
|
+
invocationId: call.invocationId,
|
|
3965
|
+
parameters: call.parameters ?? {},
|
|
3966
|
+
failedDependency: failedDep,
|
|
3967
|
+
failedDependencyError: depError
|
|
3968
|
+
};
|
|
3969
|
+
events.push(skipEvent);
|
|
3970
|
+
if (this.hooks.observers?.onGadgetSkipped) {
|
|
3971
|
+
const observeContext = {
|
|
3972
|
+
iteration: this.iteration,
|
|
3973
|
+
gadgetName: call.gadgetName,
|
|
3974
|
+
invocationId: call.invocationId,
|
|
3975
|
+
parameters: call.parameters ?? {},
|
|
3976
|
+
failedDependency: failedDep,
|
|
3977
|
+
failedDependencyError: depError,
|
|
3978
|
+
logger: this.logger
|
|
3979
|
+
};
|
|
3980
|
+
await this.safeObserve(() => this.hooks.observers.onGadgetSkipped(observeContext));
|
|
3981
|
+
}
|
|
3982
|
+
this.logger.info("Gadget skipped due to failed dependency", {
|
|
3983
|
+
gadgetName: call.gadgetName,
|
|
3984
|
+
invocationId: call.invocationId,
|
|
3985
|
+
failedDependency: failedDep
|
|
3986
|
+
});
|
|
3987
|
+
} else if (action.action === "execute_anyway") {
|
|
3988
|
+
this.logger.info("Executing gadget despite failed dependency (controller override)", {
|
|
3989
|
+
gadgetName: call.gadgetName,
|
|
3990
|
+
invocationId: call.invocationId,
|
|
3991
|
+
failedDependency: failedDep
|
|
3992
|
+
});
|
|
3993
|
+
const executeEvents = await this.executeGadgetWithHooks(call);
|
|
3994
|
+
events.push(...executeEvents);
|
|
3995
|
+
} else if (action.action === "use_fallback") {
|
|
3996
|
+
const fallbackResult = {
|
|
3997
|
+
gadgetName: call.gadgetName,
|
|
3998
|
+
invocationId: call.invocationId,
|
|
3999
|
+
parameters: call.parameters ?? {},
|
|
4000
|
+
result: action.fallbackResult,
|
|
4001
|
+
executionTimeMs: 0
|
|
4002
|
+
};
|
|
4003
|
+
this.completedResults.set(call.invocationId, fallbackResult);
|
|
4004
|
+
events.push({ type: "gadget_result", result: fallbackResult });
|
|
4005
|
+
this.logger.info("Using fallback result for gadget with failed dependency", {
|
|
4006
|
+
gadgetName: call.gadgetName,
|
|
4007
|
+
invocationId: call.invocationId,
|
|
4008
|
+
failedDependency: failedDep
|
|
4009
|
+
});
|
|
4010
|
+
}
|
|
4011
|
+
return events;
|
|
4012
|
+
}
|
|
4013
|
+
/**
|
|
4014
|
+
* Process pending gadgets whose dependencies are now satisfied.
|
|
4015
|
+
* Executes ready gadgets in parallel and continues until no more can be triggered.
|
|
4016
|
+
*/
|
|
4017
|
+
async processPendingGadgets() {
|
|
4018
|
+
const events = [];
|
|
4019
|
+
let progress = true;
|
|
4020
|
+
while (progress && this.pendingGadgets.size > 0) {
|
|
4021
|
+
progress = false;
|
|
4022
|
+
const readyToExecute = [];
|
|
4023
|
+
const readyToSkip = [];
|
|
4024
|
+
for (const [invocationId, call] of this.pendingGadgets) {
|
|
4025
|
+
const failedDep = call.dependencies.find((dep) => this.failedInvocations.has(dep));
|
|
4026
|
+
if (failedDep) {
|
|
4027
|
+
readyToSkip.push({ call, failedDep });
|
|
4028
|
+
continue;
|
|
4029
|
+
}
|
|
4030
|
+
const allSatisfied = call.dependencies.every((dep) => this.completedResults.has(dep));
|
|
4031
|
+
if (allSatisfied) {
|
|
4032
|
+
readyToExecute.push(call);
|
|
4033
|
+
}
|
|
4034
|
+
}
|
|
4035
|
+
for (const { call, failedDep } of readyToSkip) {
|
|
4036
|
+
this.pendingGadgets.delete(call.invocationId);
|
|
4037
|
+
const skipEvents = await this.handleFailedDependency(call, failedDep);
|
|
4038
|
+
events.push(...skipEvents);
|
|
4039
|
+
progress = true;
|
|
4040
|
+
}
|
|
4041
|
+
if (readyToExecute.length > 0) {
|
|
4042
|
+
this.logger.debug("Executing ready gadgets in parallel", {
|
|
4043
|
+
count: readyToExecute.length,
|
|
4044
|
+
invocationIds: readyToExecute.map((c) => c.invocationId)
|
|
4045
|
+
});
|
|
4046
|
+
for (const call of readyToExecute) {
|
|
4047
|
+
this.pendingGadgets.delete(call.invocationId);
|
|
4048
|
+
}
|
|
4049
|
+
const executePromises = readyToExecute.map((call) => this.executeGadgetWithHooks(call));
|
|
4050
|
+
const results = await Promise.all(executePromises);
|
|
4051
|
+
for (const executeEvents of results) {
|
|
4052
|
+
events.push(...executeEvents);
|
|
4053
|
+
}
|
|
4054
|
+
progress = true;
|
|
4055
|
+
}
|
|
4056
|
+
}
|
|
4057
|
+
if (this.pendingGadgets.size > 0) {
|
|
4058
|
+
const pendingIds = new Set(this.pendingGadgets.keys());
|
|
4059
|
+
for (const [invocationId, call] of this.pendingGadgets) {
|
|
4060
|
+
const missingDeps = call.dependencies.filter((dep) => !this.completedResults.has(dep));
|
|
4061
|
+
const circularDeps = missingDeps.filter((dep) => pendingIds.has(dep));
|
|
4062
|
+
const trulyMissingDeps = missingDeps.filter((dep) => !pendingIds.has(dep));
|
|
4063
|
+
let errorMessage;
|
|
4064
|
+
let logLevel = "warn";
|
|
4065
|
+
if (circularDeps.length > 0 && trulyMissingDeps.length > 0) {
|
|
4066
|
+
errorMessage = `Dependencies unresolvable: circular=[${circularDeps.join(", ")}], missing=[${trulyMissingDeps.join(", ")}]`;
|
|
4067
|
+
logLevel = "error";
|
|
4068
|
+
} else if (circularDeps.length > 0) {
|
|
4069
|
+
errorMessage = `Circular dependency detected: "${invocationId}" depends on "${circularDeps[0]}" which also depends on "${invocationId}" (directly or indirectly)`;
|
|
4070
|
+
} else {
|
|
4071
|
+
errorMessage = `Dependency "${missingDeps[0]}" was never executed - check that the invocation ID exists and is spelled correctly`;
|
|
4072
|
+
}
|
|
4073
|
+
this.logger[logLevel]("Gadget has unresolvable dependencies", {
|
|
4074
|
+
gadgetName: call.gadgetName,
|
|
4075
|
+
invocationId,
|
|
4076
|
+
circularDependencies: circularDeps,
|
|
4077
|
+
missingDependencies: trulyMissingDeps
|
|
4078
|
+
});
|
|
4079
|
+
this.failedInvocations.add(invocationId);
|
|
4080
|
+
const skipEvent = {
|
|
4081
|
+
type: "gadget_skipped",
|
|
4082
|
+
gadgetName: call.gadgetName,
|
|
4083
|
+
invocationId,
|
|
4084
|
+
parameters: call.parameters ?? {},
|
|
4085
|
+
failedDependency: missingDeps[0],
|
|
4086
|
+
failedDependencyError: errorMessage
|
|
4087
|
+
};
|
|
4088
|
+
events.push(skipEvent);
|
|
4089
|
+
}
|
|
4090
|
+
this.pendingGadgets.clear();
|
|
4091
|
+
}
|
|
4092
|
+
return events;
|
|
4093
|
+
}
|
|
3563
4094
|
/**
|
|
3564
4095
|
* Safely execute an observer, catching and logging any errors.
|
|
3565
4096
|
* Observers are non-critical, so errors are logged but don't crash the system.
|
|
@@ -3997,9 +4528,9 @@ var init_agent = __esm({
|
|
|
3997
4528
|
if (msg.role === "user") {
|
|
3998
4529
|
this.conversation.addUserMessage(msg.content);
|
|
3999
4530
|
} else if (msg.role === "assistant") {
|
|
4000
|
-
this.conversation.addAssistantMessage(msg.content);
|
|
4531
|
+
this.conversation.addAssistantMessage(extractText(msg.content));
|
|
4001
4532
|
} else if (msg.role === "system") {
|
|
4002
|
-
this.conversation.addUserMessage(`[System] ${msg.content}`);
|
|
4533
|
+
this.conversation.addUserMessage(`[System] ${extractText(msg.content)}`);
|
|
4003
4534
|
}
|
|
4004
4535
|
}
|
|
4005
4536
|
}
|
|
@@ -4219,6 +4750,7 @@ var init_builder = __esm({
|
|
|
4219
4750
|
"src/agent/builder.ts"() {
|
|
4220
4751
|
"use strict";
|
|
4221
4752
|
init_constants();
|
|
4753
|
+
init_input_content();
|
|
4222
4754
|
init_model_shortcuts();
|
|
4223
4755
|
init_registry();
|
|
4224
4756
|
init_agent();
|
|
@@ -4866,13 +5398,17 @@ ${endPrefix}`
|
|
|
4866
5398
|
* }
|
|
4867
5399
|
* ```
|
|
4868
5400
|
*/
|
|
4869
|
-
|
|
5401
|
+
/**
|
|
5402
|
+
* Build AgentOptions with the given user prompt.
|
|
5403
|
+
* Centralizes options construction for ask(), askWithImage(), and askWithContent().
|
|
5404
|
+
*/
|
|
5405
|
+
buildAgentOptions(userPrompt) {
|
|
4870
5406
|
if (!this.client) {
|
|
4871
5407
|
const { LLMist: LLMistClass } = (init_client(), __toCommonJS(client_exports));
|
|
4872
5408
|
this.client = new LLMistClass();
|
|
4873
5409
|
}
|
|
4874
5410
|
const registry = GadgetRegistry.from(this.gadgets);
|
|
4875
|
-
|
|
5411
|
+
return {
|
|
4876
5412
|
client: this.client,
|
|
4877
5413
|
model: this.model ?? "openai:gpt-5-nano",
|
|
4878
5414
|
systemPrompt: this.systemPrompt,
|
|
@@ -4898,6 +5434,83 @@ ${endPrefix}`
|
|
|
4898
5434
|
compactionConfig: this.compactionConfig,
|
|
4899
5435
|
signal: this.signal
|
|
4900
5436
|
};
|
|
5437
|
+
}
|
|
5438
|
+
ask(userPrompt) {
|
|
5439
|
+
const options = this.buildAgentOptions(userPrompt);
|
|
5440
|
+
return new Agent(AGENT_INTERNAL_KEY, options);
|
|
5441
|
+
}
|
|
5442
|
+
/**
|
|
5443
|
+
* Build and create the agent with a multimodal user prompt (text + image).
|
|
5444
|
+
* Returns the Agent instance ready to run.
|
|
5445
|
+
*
|
|
5446
|
+
* @param textPrompt - Text prompt describing what to do with the image
|
|
5447
|
+
* @param imageData - Image data (Buffer, Uint8Array, or base64 string)
|
|
5448
|
+
* @param mimeType - Optional MIME type (auto-detected if not provided)
|
|
5449
|
+
* @returns Configured Agent instance
|
|
5450
|
+
*
|
|
5451
|
+
* @example
|
|
5452
|
+
* ```typescript
|
|
5453
|
+
* const agent = LLMist.createAgent()
|
|
5454
|
+
* .withModel("gpt-4o")
|
|
5455
|
+
* .withSystem("You analyze images")
|
|
5456
|
+
* .askWithImage(
|
|
5457
|
+
* "What's in this image?",
|
|
5458
|
+
* await fs.readFile("photo.jpg")
|
|
5459
|
+
* );
|
|
5460
|
+
*
|
|
5461
|
+
* for await (const event of agent.run()) {
|
|
5462
|
+
* // handle events
|
|
5463
|
+
* }
|
|
5464
|
+
* ```
|
|
5465
|
+
*/
|
|
5466
|
+
askWithImage(textPrompt, imageData, mimeType) {
|
|
5467
|
+
const imageBuffer = typeof imageData === "string" ? Buffer.from(imageData, "base64") : imageData;
|
|
5468
|
+
const detectedMime = mimeType ?? detectImageMimeType(imageBuffer);
|
|
5469
|
+
if (!detectedMime) {
|
|
5470
|
+
throw new Error(
|
|
5471
|
+
"Could not detect image MIME type. Please provide the mimeType parameter explicitly."
|
|
5472
|
+
);
|
|
5473
|
+
}
|
|
5474
|
+
const userContent = [
|
|
5475
|
+
text(textPrompt),
|
|
5476
|
+
{
|
|
5477
|
+
type: "image",
|
|
5478
|
+
source: {
|
|
5479
|
+
type: "base64",
|
|
5480
|
+
mediaType: detectedMime,
|
|
5481
|
+
data: toBase64(imageBuffer)
|
|
5482
|
+
}
|
|
5483
|
+
}
|
|
5484
|
+
];
|
|
5485
|
+
const options = this.buildAgentOptions(userContent);
|
|
5486
|
+
return new Agent(AGENT_INTERNAL_KEY, options);
|
|
5487
|
+
}
|
|
5488
|
+
/**
|
|
5489
|
+
* Build and return an Agent configured with multimodal content.
|
|
5490
|
+
* More flexible than askWithImage - accepts any combination of content parts.
|
|
5491
|
+
*
|
|
5492
|
+
* @param content - Array of content parts (text, images, audio)
|
|
5493
|
+
* @returns A configured Agent ready for execution
|
|
5494
|
+
*
|
|
5495
|
+
* @example
|
|
5496
|
+
* ```typescript
|
|
5497
|
+
* import { text, imageFromBuffer, audioFromBuffer } from "llmist";
|
|
5498
|
+
*
|
|
5499
|
+
* const agent = LLMist.createAgent()
|
|
5500
|
+
* .withModel("gemini:gemini-2.5-flash")
|
|
5501
|
+
* .askWithContent([
|
|
5502
|
+
* text("Describe this image and transcribe the audio:"),
|
|
5503
|
+
* imageFromBuffer(imageData),
|
|
5504
|
+
* audioFromBuffer(audioData),
|
|
5505
|
+
* ]);
|
|
5506
|
+
*
|
|
5507
|
+
* for await (const event of agent.run()) {
|
|
5508
|
+
* // handle events
|
|
5509
|
+
* }
|
|
5510
|
+
* ```
|
|
5511
|
+
*/
|
|
5512
|
+
askWithContent(content) {
|
|
5513
|
+
const options = this.buildAgentOptions(content);
|
|
4901
5514
|
return new Agent(AGENT_INTERNAL_KEY, options);
|
|
4902
5515
|
}
|
|
4903
5516
|
/**
|
|
@@ -5373,6 +5986,7 @@ var init_anthropic = __esm({
|
|
|
5373
5986
|
"src/providers/anthropic.ts"() {
|
|
5374
5987
|
"use strict";
|
|
5375
5988
|
import_sdk = __toESM(require("@anthropic-ai/sdk"), 1);
|
|
5989
|
+
init_messages();
|
|
5376
5990
|
init_anthropic_models();
|
|
5377
5991
|
init_base_provider();
|
|
5378
5992
|
init_constants2();
|
|
@@ -5385,11 +5999,33 @@ var init_anthropic = __esm({
|
|
|
5385
5999
|
getModelSpecs() {
|
|
5386
6000
|
return ANTHROPIC_MODELS;
|
|
5387
6001
|
}
|
|
6002
|
+
// =========================================================================
|
|
6003
|
+
// Image Generation (Not Supported)
|
|
6004
|
+
// =========================================================================
|
|
6005
|
+
supportsImageGeneration(_modelId) {
|
|
6006
|
+
return false;
|
|
6007
|
+
}
|
|
6008
|
+
async generateImage() {
|
|
6009
|
+
throw new Error(
|
|
6010
|
+
"Anthropic does not support image generation. Use OpenAI (DALL-E, GPT Image) or Google Gemini (Imagen) instead."
|
|
6011
|
+
);
|
|
6012
|
+
}
|
|
6013
|
+
// =========================================================================
|
|
6014
|
+
// Speech Generation (Not Supported)
|
|
6015
|
+
// =========================================================================
|
|
6016
|
+
supportsSpeechGeneration(_modelId) {
|
|
6017
|
+
return false;
|
|
6018
|
+
}
|
|
6019
|
+
async generateSpeech() {
|
|
6020
|
+
throw new Error(
|
|
6021
|
+
"Anthropic does not support speech generation. Use OpenAI (TTS) or Google Gemini (TTS) instead."
|
|
6022
|
+
);
|
|
6023
|
+
}
|
|
5388
6024
|
buildRequestPayload(options, descriptor, spec, messages) {
|
|
5389
6025
|
const systemMessages = messages.filter((message) => message.role === "system");
|
|
5390
6026
|
const system = systemMessages.length > 0 ? systemMessages.map((m, index) => ({
|
|
5391
6027
|
type: "text",
|
|
5392
|
-
text: m.content,
|
|
6028
|
+
text: extractText(m.content),
|
|
5393
6029
|
// Add cache_control to the LAST system message block
|
|
5394
6030
|
...index === systemMessages.length - 1 ? { cache_control: { type: "ephemeral" } } : {}
|
|
5395
6031
|
})) : void 0;
|
|
@@ -5402,14 +6038,10 @@ var init_anthropic = __esm({
|
|
|
5402
6038
|
);
|
|
5403
6039
|
const conversation = nonSystemMessages.map((message, index) => ({
|
|
5404
6040
|
role: message.role,
|
|
5405
|
-
content:
|
|
5406
|
-
|
|
5407
|
-
|
|
5408
|
-
|
|
5409
|
-
// Add cache_control to the LAST user message
|
|
5410
|
-
...message.role === "user" && index === lastUserIndex ? { cache_control: { type: "ephemeral" } } : {}
|
|
5411
|
-
}
|
|
5412
|
-
]
|
|
6041
|
+
content: this.convertToAnthropicContent(
|
|
6042
|
+
message.content,
|
|
6043
|
+
message.role === "user" && index === lastUserIndex
|
|
6044
|
+
)
|
|
5413
6045
|
}));
|
|
5414
6046
|
const defaultMaxTokens = spec?.maxOutputTokens ?? ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS;
|
|
5415
6047
|
const payload = {
|
|
@@ -5425,6 +6057,52 @@ var init_anthropic = __esm({
|
|
|
5425
6057
|
};
|
|
5426
6058
|
return payload;
|
|
5427
6059
|
}
|
|
6060
|
+
/**
|
|
6061
|
+
* Convert llmist content to Anthropic's content block format.
|
|
6062
|
+
* Handles text, images (base64 only), and applies cache_control.
|
|
6063
|
+
*/
|
|
6064
|
+
convertToAnthropicContent(content, addCacheControl) {
|
|
6065
|
+
const parts = normalizeContent(content);
|
|
6066
|
+
return parts.map((part, index) => {
|
|
6067
|
+
const isLastPart = index === parts.length - 1;
|
|
6068
|
+
const cacheControl = addCacheControl && isLastPart ? { cache_control: { type: "ephemeral" } } : {};
|
|
6069
|
+
if (part.type === "text") {
|
|
6070
|
+
return {
|
|
6071
|
+
type: "text",
|
|
6072
|
+
text: part.text,
|
|
6073
|
+
...cacheControl
|
|
6074
|
+
};
|
|
6075
|
+
}
|
|
6076
|
+
if (part.type === "image") {
|
|
6077
|
+
return this.convertImagePart(part, cacheControl);
|
|
6078
|
+
}
|
|
6079
|
+
if (part.type === "audio") {
|
|
6080
|
+
throw new Error(
|
|
6081
|
+
"Anthropic does not support audio input. Use Google Gemini for audio processing."
|
|
6082
|
+
);
|
|
6083
|
+
}
|
|
6084
|
+
throw new Error(`Unsupported content type: ${part.type}`);
|
|
6085
|
+
});
|
|
6086
|
+
}
|
|
6087
|
+
/**
|
|
6088
|
+
* Convert an image content part to Anthropic's image block format.
|
|
6089
|
+
*/
|
|
6090
|
+
convertImagePart(part, cacheControl) {
|
|
6091
|
+
if (part.source.type === "url") {
|
|
6092
|
+
throw new Error(
|
|
6093
|
+
"Anthropic does not support image URLs. Please provide base64-encoded image data instead."
|
|
6094
|
+
);
|
|
6095
|
+
}
|
|
6096
|
+
return {
|
|
6097
|
+
type: "image",
|
|
6098
|
+
source: {
|
|
6099
|
+
type: "base64",
|
|
6100
|
+
media_type: part.source.mediaType,
|
|
6101
|
+
data: part.source.data
|
|
6102
|
+
},
|
|
6103
|
+
...cacheControl
|
|
6104
|
+
};
|
|
6105
|
+
}
|
|
5428
6106
|
async executeStreamRequest(payload, signal) {
|
|
5429
6107
|
const client = this.client;
|
|
5430
6108
|
const stream2 = await client.messages.create(payload, signal ? { signal } : void 0);
|
|
@@ -5507,17 +6185,12 @@ var init_anthropic = __esm({
|
|
|
5507
6185
|
async countTokens(messages, descriptor, _spec) {
|
|
5508
6186
|
const client = this.client;
|
|
5509
6187
|
const systemMessages = messages.filter((message) => message.role === "system");
|
|
5510
|
-
const system = systemMessages.length > 0 ? systemMessages.map((m) => m.content).join("\n\n") : void 0;
|
|
6188
|
+
const system = systemMessages.length > 0 ? systemMessages.map((m) => extractText(m.content)).join("\n\n") : void 0;
|
|
5511
6189
|
const conversation = messages.filter(
|
|
5512
6190
|
(message) => message.role !== "system"
|
|
5513
6191
|
).map((message) => ({
|
|
5514
6192
|
role: message.role,
|
|
5515
|
-
content:
|
|
5516
|
-
{
|
|
5517
|
-
type: "text",
|
|
5518
|
-
text: message.content
|
|
5519
|
-
}
|
|
5520
|
-
]
|
|
6193
|
+
content: this.convertToAnthropicContent(message.content, false)
|
|
5521
6194
|
}));
|
|
5522
6195
|
try {
|
|
5523
6196
|
const response = await client.messages.countTokens({
|
|
@@ -5531,14 +6204,201 @@ var init_anthropic = __esm({
|
|
|
5531
6204
|
`Token counting failed for ${descriptor.name}, using fallback estimation:`,
|
|
5532
6205
|
error
|
|
5533
6206
|
);
|
|
5534
|
-
|
|
5535
|
-
|
|
6207
|
+
let totalChars = 0;
|
|
6208
|
+
let imageCount = 0;
|
|
6209
|
+
for (const msg of messages) {
|
|
6210
|
+
const parts = normalizeContent(msg.content);
|
|
6211
|
+
for (const part of parts) {
|
|
6212
|
+
if (part.type === "text") {
|
|
6213
|
+
totalChars += part.text.length;
|
|
6214
|
+
} else if (part.type === "image") {
|
|
6215
|
+
imageCount++;
|
|
6216
|
+
}
|
|
6217
|
+
}
|
|
6218
|
+
}
|
|
6219
|
+
return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN) + imageCount * 1e3;
|
|
5536
6220
|
}
|
|
5537
6221
|
}
|
|
5538
6222
|
};
|
|
5539
6223
|
}
|
|
5540
6224
|
});
|
|
5541
6225
|
|
|
6226
|
+
// src/providers/gemini-image-models.ts
|
|
6227
|
+
function getGeminiImageModelSpec(modelId) {
|
|
6228
|
+
return geminiImageModels.find((m) => m.modelId === modelId);
|
|
6229
|
+
}
|
|
6230
|
+
function isGeminiImageModel(modelId) {
|
|
6231
|
+
return geminiImageModels.some((m) => m.modelId === modelId);
|
|
6232
|
+
}
|
|
6233
|
+
function calculateGeminiImageCost(modelId, size = "1:1", n = 1) {
|
|
6234
|
+
const spec = getGeminiImageModelSpec(modelId);
|
|
6235
|
+
if (!spec) return void 0;
|
|
6236
|
+
if (spec.pricing.perImage !== void 0) {
|
|
6237
|
+
return spec.pricing.perImage * n;
|
|
6238
|
+
}
|
|
6239
|
+
if (spec.pricing.bySize) {
|
|
6240
|
+
const sizePrice = spec.pricing.bySize[size];
|
|
6241
|
+
if (typeof sizePrice === "number") {
|
|
6242
|
+
return sizePrice * n;
|
|
6243
|
+
}
|
|
6244
|
+
}
|
|
6245
|
+
return void 0;
|
|
6246
|
+
}
|
|
6247
|
+
var IMAGEN4_ASPECT_RATIOS, GEMINI_IMAGE_ASPECT_RATIOS, geminiImageModels;
|
|
6248
|
+
var init_gemini_image_models = __esm({
|
|
6249
|
+
"src/providers/gemini-image-models.ts"() {
|
|
6250
|
+
"use strict";
|
|
6251
|
+
IMAGEN4_ASPECT_RATIOS = ["1:1", "3:4", "4:3", "9:16", "16:9"];
|
|
6252
|
+
GEMINI_IMAGE_ASPECT_RATIOS = ["1:1", "3:4", "4:3", "9:16", "16:9"];
|
|
6253
|
+
geminiImageModels = [
|
|
6254
|
+
// Imagen 4 Family (standalone image generation)
|
|
6255
|
+
{
|
|
6256
|
+
provider: "gemini",
|
|
6257
|
+
modelId: "imagen-4.0-fast-generate-001",
|
|
6258
|
+
displayName: "Imagen 4 Fast",
|
|
6259
|
+
pricing: {
|
|
6260
|
+
perImage: 0.02
|
|
6261
|
+
},
|
|
6262
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
6263
|
+
maxImages: 4,
|
|
6264
|
+
defaultSize: "1:1",
|
|
6265
|
+
features: {
|
|
6266
|
+
textRendering: true
|
|
6267
|
+
}
|
|
6268
|
+
},
|
|
6269
|
+
{
|
|
6270
|
+
provider: "gemini",
|
|
6271
|
+
modelId: "imagen-4.0-generate-001",
|
|
6272
|
+
displayName: "Imagen 4",
|
|
6273
|
+
pricing: {
|
|
6274
|
+
perImage: 0.04
|
|
6275
|
+
},
|
|
6276
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
6277
|
+
maxImages: 4,
|
|
6278
|
+
defaultSize: "1:1",
|
|
6279
|
+
features: {
|
|
6280
|
+
textRendering: true
|
|
6281
|
+
}
|
|
6282
|
+
},
|
|
6283
|
+
{
|
|
6284
|
+
provider: "gemini",
|
|
6285
|
+
modelId: "imagen-4.0-ultra-generate-001",
|
|
6286
|
+
displayName: "Imagen 4 Ultra",
|
|
6287
|
+
pricing: {
|
|
6288
|
+
perImage: 0.06
|
|
6289
|
+
},
|
|
6290
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
6291
|
+
maxImages: 4,
|
|
6292
|
+
defaultSize: "1:1",
|
|
6293
|
+
features: {
|
|
6294
|
+
textRendering: true
|
|
6295
|
+
}
|
|
6296
|
+
},
|
|
6297
|
+
// Preview versions
|
|
6298
|
+
{
|
|
6299
|
+
provider: "gemini",
|
|
6300
|
+
modelId: "imagen-4.0-generate-preview-06-06",
|
|
6301
|
+
displayName: "Imagen 4 (Preview)",
|
|
6302
|
+
pricing: {
|
|
6303
|
+
perImage: 0.04
|
|
6304
|
+
},
|
|
6305
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
6306
|
+
maxImages: 4,
|
|
6307
|
+
defaultSize: "1:1",
|
|
6308
|
+
features: {
|
|
6309
|
+
textRendering: true
|
|
6310
|
+
}
|
|
6311
|
+
},
|
|
6312
|
+
{
|
|
6313
|
+
provider: "gemini",
|
|
6314
|
+
modelId: "imagen-4.0-ultra-generate-preview-06-06",
|
|
6315
|
+
displayName: "Imagen 4 Ultra (Preview)",
|
|
6316
|
+
pricing: {
|
|
6317
|
+
perImage: 0.06
|
|
6318
|
+
},
|
|
6319
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
6320
|
+
maxImages: 4,
|
|
6321
|
+
defaultSize: "1:1",
|
|
6322
|
+
features: {
|
|
6323
|
+
textRendering: true
|
|
6324
|
+
}
|
|
6325
|
+
},
|
|
6326
|
+
// Gemini Native Image Generation (multimodal models)
|
|
6327
|
+
{
|
|
6328
|
+
provider: "gemini",
|
|
6329
|
+
modelId: "gemini-2.5-flash-image",
|
|
6330
|
+
displayName: "Gemini 2.5 Flash Image",
|
|
6331
|
+
pricing: {
|
|
6332
|
+
perImage: 0.039
|
|
6333
|
+
},
|
|
6334
|
+
supportedSizes: [...GEMINI_IMAGE_ASPECT_RATIOS],
|
|
6335
|
+
maxImages: 1,
|
|
6336
|
+
defaultSize: "1:1",
|
|
6337
|
+
features: {
|
|
6338
|
+
conversational: true,
|
|
6339
|
+
textRendering: true
|
|
6340
|
+
}
|
|
6341
|
+
},
|
|
6342
|
+
{
|
|
6343
|
+
provider: "gemini",
|
|
6344
|
+
modelId: "gemini-2.5-flash-image-preview",
|
|
6345
|
+
displayName: "Gemini 2.5 Flash Image (Preview)",
|
|
6346
|
+
pricing: {
|
|
6347
|
+
perImage: 0.039
|
|
6348
|
+
},
|
|
6349
|
+
supportedSizes: [...GEMINI_IMAGE_ASPECT_RATIOS],
|
|
6350
|
+
maxImages: 1,
|
|
6351
|
+
defaultSize: "1:1",
|
|
6352
|
+
features: {
|
|
6353
|
+
conversational: true,
|
|
6354
|
+
textRendering: true
|
|
6355
|
+
}
|
|
6356
|
+
},
|
|
6357
|
+
{
|
|
6358
|
+
provider: "gemini",
|
|
6359
|
+
modelId: "gemini-3-pro-image-preview",
|
|
6360
|
+
displayName: "Gemini 3 Pro Image (Preview)",
|
|
6361
|
+
pricing: {
|
|
6362
|
+
// Token-based: ~$0.134 per 1K/2K image, $0.24 per 4K
|
|
6363
|
+
// Using 2K as default
|
|
6364
|
+
bySize: {
|
|
6365
|
+
"1K": 0.134,
|
|
6366
|
+
"2K": 0.134,
|
|
6367
|
+
"4K": 0.24
|
|
6368
|
+
}
|
|
6369
|
+
},
|
|
6370
|
+
supportedSizes: ["1K", "2K", "4K"],
|
|
6371
|
+
maxImages: 1,
|
|
6372
|
+
defaultSize: "2K",
|
|
6373
|
+
features: {
|
|
6374
|
+
conversational: true,
|
|
6375
|
+
textRendering: true
|
|
6376
|
+
}
|
|
6377
|
+
},
|
|
6378
|
+
// Alias: nano-banana-pro-preview is gemini-3-pro-image-preview
|
|
6379
|
+
{
|
|
6380
|
+
provider: "gemini",
|
|
6381
|
+
modelId: "nano-banana-pro-preview",
|
|
6382
|
+
displayName: "Nano Banana Pro (Gemini 3 Pro Image)",
|
|
6383
|
+
pricing: {
|
|
6384
|
+
bySize: {
|
|
6385
|
+
"1K": 0.134,
|
|
6386
|
+
"2K": 0.134,
|
|
6387
|
+
"4K": 0.24
|
|
6388
|
+
}
|
|
6389
|
+
},
|
|
6390
|
+
supportedSizes: ["1K", "2K", "4K"],
|
|
6391
|
+
maxImages: 1,
|
|
6392
|
+
defaultSize: "2K",
|
|
6393
|
+
features: {
|
|
6394
|
+
conversational: true,
|
|
6395
|
+
textRendering: true
|
|
6396
|
+
}
|
|
6397
|
+
}
|
|
6398
|
+
];
|
|
6399
|
+
}
|
|
6400
|
+
});
|
|
6401
|
+
|
|
5542
6402
|
// src/providers/gemini-models.ts
|
|
5543
6403
|
var GEMINI_MODELS;
|
|
5544
6404
|
var init_gemini_models = __esm({
|
|
@@ -5692,20 +6552,159 @@ var init_gemini_models = __esm({
|
|
|
5692
6552
|
contextWindow: 1048576,
|
|
5693
6553
|
maxOutputTokens: 8192,
|
|
5694
6554
|
pricing: {
|
|
5695
|
-
input: 0.075,
|
|
5696
|
-
output: 0.3
|
|
5697
|
-
// No context caching available for 2.0-flash-lite
|
|
6555
|
+
input: 0.075,
|
|
6556
|
+
output: 0.3
|
|
6557
|
+
// No context caching available for 2.0-flash-lite
|
|
6558
|
+
},
|
|
6559
|
+
knowledgeCutoff: "2024-08",
|
|
6560
|
+
features: {
|
|
6561
|
+
streaming: true,
|
|
6562
|
+
functionCalling: true,
|
|
6563
|
+
vision: true,
|
|
6564
|
+
structuredOutputs: true
|
|
6565
|
+
},
|
|
6566
|
+
metadata: {
|
|
6567
|
+
family: "Gemini 2.0",
|
|
6568
|
+
notes: "Smallest and most cost effective 2.0 model for at scale usage."
|
|
6569
|
+
}
|
|
6570
|
+
}
|
|
6571
|
+
];
|
|
6572
|
+
}
|
|
6573
|
+
});
|
|
6574
|
+
|
|
6575
|
+
// src/providers/gemini-speech-models.ts
|
|
6576
|
+
function getGeminiSpeechModelSpec(modelId) {
|
|
6577
|
+
return geminiSpeechModels.find((m) => m.modelId === modelId);
|
|
6578
|
+
}
|
|
6579
|
+
function isGeminiSpeechModel(modelId) {
|
|
6580
|
+
return geminiSpeechModels.some((m) => m.modelId === modelId);
|
|
6581
|
+
}
|
|
6582
|
+
function calculateGeminiSpeechCost(modelId, characterCount, estimatedMinutes) {
|
|
6583
|
+
const spec = getGeminiSpeechModelSpec(modelId);
|
|
6584
|
+
if (!spec) return void 0;
|
|
6585
|
+
if (spec.pricing.perMinute !== void 0) {
|
|
6586
|
+
if (estimatedMinutes !== void 0) {
|
|
6587
|
+
return estimatedMinutes * spec.pricing.perMinute;
|
|
6588
|
+
}
|
|
6589
|
+
const approxMinutes = characterCount / 750;
|
|
6590
|
+
return approxMinutes * spec.pricing.perMinute;
|
|
6591
|
+
}
|
|
6592
|
+
return void 0;
|
|
6593
|
+
}
|
|
6594
|
+
var GEMINI_TTS_VOICES, GEMINI_TTS_FORMATS, geminiSpeechModels;
|
|
6595
|
+
var init_gemini_speech_models = __esm({
|
|
6596
|
+
"src/providers/gemini-speech-models.ts"() {
|
|
6597
|
+
"use strict";
|
|
6598
|
+
GEMINI_TTS_VOICES = [
|
|
6599
|
+
"Zephyr",
|
|
6600
|
+
// Bright
|
|
6601
|
+
"Puck",
|
|
6602
|
+
// Upbeat
|
|
6603
|
+
"Charon",
|
|
6604
|
+
// Informative
|
|
6605
|
+
"Kore",
|
|
6606
|
+
// Firm
|
|
6607
|
+
"Fenrir",
|
|
6608
|
+
// Excitable
|
|
6609
|
+
"Leda",
|
|
6610
|
+
// Youthful
|
|
6611
|
+
"Orus",
|
|
6612
|
+
// Firm
|
|
6613
|
+
"Aoede",
|
|
6614
|
+
// Breezy
|
|
6615
|
+
"Callirrhoe",
|
|
6616
|
+
// Easy-going
|
|
6617
|
+
"Autonoe",
|
|
6618
|
+
// Bright
|
|
6619
|
+
"Enceladus",
|
|
6620
|
+
// Breathy
|
|
6621
|
+
"Iapetus",
|
|
6622
|
+
// Clear
|
|
6623
|
+
"Umbriel",
|
|
6624
|
+
// Easy-going
|
|
6625
|
+
"Algieba",
|
|
6626
|
+
// Smooth
|
|
6627
|
+
"Despina",
|
|
6628
|
+
// Smooth
|
|
6629
|
+
"Erinome",
|
|
6630
|
+
// Clear
|
|
6631
|
+
"Algenib",
|
|
6632
|
+
// Gravelly
|
|
6633
|
+
"Rasalgethi",
|
|
6634
|
+
// Informative
|
|
6635
|
+
"Laomedeia",
|
|
6636
|
+
// Upbeat
|
|
6637
|
+
"Achernar",
|
|
6638
|
+
// Soft
|
|
6639
|
+
"Alnilam",
|
|
6640
|
+
// Firm
|
|
6641
|
+
"Schedar",
|
|
6642
|
+
// Even
|
|
6643
|
+
"Gacrux",
|
|
6644
|
+
// Mature
|
|
6645
|
+
"Pulcherrima",
|
|
6646
|
+
// Forward
|
|
6647
|
+
"Achird",
|
|
6648
|
+
// Friendly
|
|
6649
|
+
"Zubenelgenubi",
|
|
6650
|
+
// Casual
|
|
6651
|
+
"Vindemiatrix",
|
|
6652
|
+
// Gentle
|
|
6653
|
+
"Sadachbia",
|
|
6654
|
+
// Lively
|
|
6655
|
+
"Sadaltager",
|
|
6656
|
+
// Knowledgeable
|
|
6657
|
+
"Sulafat"
|
|
6658
|
+
// Warm
|
|
6659
|
+
];
|
|
6660
|
+
GEMINI_TTS_FORMATS = ["pcm", "wav"];
|
|
6661
|
+
geminiSpeechModels = [
|
|
6662
|
+
{
|
|
6663
|
+
provider: "gemini",
|
|
6664
|
+
modelId: "gemini-2.5-flash-preview-tts",
|
|
6665
|
+
displayName: "Gemini 2.5 Flash TTS (Preview)",
|
|
6666
|
+
pricing: {
|
|
6667
|
+
// $0.50 per 1M input tokens = $0.0000005 per token
|
|
6668
|
+
perInputToken: 5e-7,
|
|
6669
|
+
// $10.00 per 1M audio output tokens = $0.00001 per token
|
|
6670
|
+
perAudioOutputToken: 1e-5,
|
|
6671
|
+
// Rough estimate: ~$0.01 per minute of audio
|
|
6672
|
+
perMinute: 0.01
|
|
6673
|
+
},
|
|
6674
|
+
voices: [...GEMINI_TTS_VOICES],
|
|
6675
|
+
formats: GEMINI_TTS_FORMATS,
|
|
6676
|
+
maxInputLength: 8e3,
|
|
6677
|
+
// bytes (text + prompt combined)
|
|
6678
|
+
defaultVoice: "Zephyr",
|
|
6679
|
+
defaultFormat: "wav",
|
|
6680
|
+
features: {
|
|
6681
|
+
multiSpeaker: true,
|
|
6682
|
+
languages: 24,
|
|
6683
|
+
voiceInstructions: true
|
|
6684
|
+
}
|
|
6685
|
+
},
|
|
6686
|
+
{
|
|
6687
|
+
provider: "gemini",
|
|
6688
|
+
modelId: "gemini-2.5-pro-preview-tts",
|
|
6689
|
+
displayName: "Gemini 2.5 Pro TTS (Preview)",
|
|
6690
|
+
pricing: {
|
|
6691
|
+
// $1.00 per 1M input tokens = $0.000001 per token
|
|
6692
|
+
perInputToken: 1e-6,
|
|
6693
|
+
// $20.00 per 1M audio output tokens = $0.00002 per token
|
|
6694
|
+
perAudioOutputToken: 2e-5,
|
|
6695
|
+
// Rough estimate: ~$0.02 per minute of audio
|
|
6696
|
+
perMinute: 0.02
|
|
5698
6697
|
},
|
|
5699
|
-
|
|
6698
|
+
voices: [...GEMINI_TTS_VOICES],
|
|
6699
|
+
formats: GEMINI_TTS_FORMATS,
|
|
6700
|
+
maxInputLength: 8e3,
|
|
6701
|
+
// bytes
|
|
6702
|
+
defaultVoice: "Zephyr",
|
|
6703
|
+
defaultFormat: "wav",
|
|
5700
6704
|
features: {
|
|
5701
|
-
|
|
5702
|
-
|
|
5703
|
-
|
|
5704
|
-
structuredOutputs: true
|
|
5705
|
-
},
|
|
5706
|
-
metadata: {
|
|
5707
|
-
family: "Gemini 2.0",
|
|
5708
|
-
notes: "Smallest and most cost effective 2.0 model for at scale usage."
|
|
6705
|
+
multiSpeaker: true,
|
|
6706
|
+
languages: 24,
|
|
6707
|
+
voiceInstructions: true
|
|
5709
6708
|
}
|
|
5710
6709
|
}
|
|
5711
6710
|
];
|
|
@@ -5713,6 +6712,31 @@ var init_gemini_models = __esm({
|
|
|
5713
6712
|
});
|
|
5714
6713
|
|
|
5715
6714
|
// src/providers/gemini.ts
|
|
6715
|
+
function wrapPcmInWav(pcmData, sampleRate, bitsPerSample, numChannels) {
|
|
6716
|
+
const byteRate = sampleRate * numChannels * bitsPerSample / 8;
|
|
6717
|
+
const blockAlign = numChannels * bitsPerSample / 8;
|
|
6718
|
+
const dataSize = pcmData.length;
|
|
6719
|
+
const headerSize = 44;
|
|
6720
|
+
const fileSize = headerSize + dataSize - 8;
|
|
6721
|
+
const buffer = new ArrayBuffer(headerSize + dataSize);
|
|
6722
|
+
const view = new DataView(buffer);
|
|
6723
|
+
const uint8 = new Uint8Array(buffer);
|
|
6724
|
+
view.setUint32(0, 1380533830, false);
|
|
6725
|
+
view.setUint32(4, fileSize, true);
|
|
6726
|
+
view.setUint32(8, 1463899717, false);
|
|
6727
|
+
view.setUint32(12, 1718449184, false);
|
|
6728
|
+
view.setUint32(16, 16, true);
|
|
6729
|
+
view.setUint16(20, 1, true);
|
|
6730
|
+
view.setUint16(22, numChannels, true);
|
|
6731
|
+
view.setUint32(24, sampleRate, true);
|
|
6732
|
+
view.setUint32(28, byteRate, true);
|
|
6733
|
+
view.setUint16(32, blockAlign, true);
|
|
6734
|
+
view.setUint16(34, bitsPerSample, true);
|
|
6735
|
+
view.setUint32(36, 1684108385, false);
|
|
6736
|
+
view.setUint32(40, dataSize, true);
|
|
6737
|
+
uint8.set(pcmData, headerSize);
|
|
6738
|
+
return buffer;
|
|
6739
|
+
}
|
|
5716
6740
|
function createGeminiProviderFromEnv() {
|
|
5717
6741
|
return createProviderFromEnv("GEMINI_API_KEY", import_genai.GoogleGenAI, GeminiGenerativeProvider);
|
|
5718
6742
|
}
|
|
@@ -5721,9 +6745,12 @@ var init_gemini = __esm({
|
|
|
5721
6745
|
"src/providers/gemini.ts"() {
|
|
5722
6746
|
"use strict";
|
|
5723
6747
|
import_genai = require("@google/genai");
|
|
6748
|
+
init_messages();
|
|
5724
6749
|
init_base_provider();
|
|
5725
6750
|
init_constants2();
|
|
6751
|
+
init_gemini_image_models();
|
|
5726
6752
|
init_gemini_models();
|
|
6753
|
+
init_gemini_speech_models();
|
|
5727
6754
|
init_utils();
|
|
5728
6755
|
GEMINI_ROLE_MAP = {
|
|
5729
6756
|
system: "user",
|
|
@@ -5738,6 +6765,139 @@ var init_gemini = __esm({
|
|
|
5738
6765
|
getModelSpecs() {
|
|
5739
6766
|
return GEMINI_MODELS;
|
|
5740
6767
|
}
|
|
6768
|
+
// =========================================================================
|
|
6769
|
+
// Image Generation
|
|
6770
|
+
// =========================================================================
|
|
6771
|
+
getImageModelSpecs() {
|
|
6772
|
+
return geminiImageModels;
|
|
6773
|
+
}
|
|
6774
|
+
supportsImageGeneration(modelId) {
|
|
6775
|
+
return isGeminiImageModel(modelId);
|
|
6776
|
+
}
|
|
6777
|
+
async generateImage(options) {
|
|
6778
|
+
const client = this.client;
|
|
6779
|
+
const spec = getGeminiImageModelSpec(options.model);
|
|
6780
|
+
const isImagenModel = options.model.startsWith("imagen");
|
|
6781
|
+
const aspectRatio = options.size ?? spec?.defaultSize ?? "1:1";
|
|
6782
|
+
const n = options.n ?? 1;
|
|
6783
|
+
if (isImagenModel) {
|
|
6784
|
+
const response2 = await client.models.generateImages({
|
|
6785
|
+
model: options.model,
|
|
6786
|
+
prompt: options.prompt,
|
|
6787
|
+
config: {
|
|
6788
|
+
numberOfImages: n,
|
|
6789
|
+
aspectRatio,
|
|
6790
|
+
outputMimeType: options.responseFormat === "b64_json" ? "image/png" : "image/jpeg"
|
|
6791
|
+
}
|
|
6792
|
+
});
|
|
6793
|
+
const images2 = response2.generatedImages ?? [];
|
|
6794
|
+
const cost2 = calculateGeminiImageCost(options.model, aspectRatio, images2.length);
|
|
6795
|
+
return {
|
|
6796
|
+
// Gemini's imageBytes is already base64 encoded, so use it directly
|
|
6797
|
+
images: images2.map((img) => ({
|
|
6798
|
+
b64Json: img.image?.imageBytes ?? void 0
|
|
6799
|
+
})),
|
|
6800
|
+
model: options.model,
|
|
6801
|
+
usage: {
|
|
6802
|
+
imagesGenerated: images2.length,
|
|
6803
|
+
size: aspectRatio,
|
|
6804
|
+
quality: "standard"
|
|
6805
|
+
},
|
|
6806
|
+
cost: cost2
|
|
6807
|
+
};
|
|
6808
|
+
}
|
|
6809
|
+
const response = await client.models.generateContent({
|
|
6810
|
+
model: options.model,
|
|
6811
|
+
contents: [{ role: "user", parts: [{ text: options.prompt }] }],
|
|
6812
|
+
config: {
|
|
6813
|
+
responseModalities: [import_genai.Modality.IMAGE, import_genai.Modality.TEXT]
|
|
6814
|
+
}
|
|
6815
|
+
});
|
|
6816
|
+
const images = [];
|
|
6817
|
+
const candidate = response.candidates?.[0];
|
|
6818
|
+
if (candidate?.content?.parts) {
|
|
6819
|
+
for (const part of candidate.content.parts) {
|
|
6820
|
+
if ("inlineData" in part && part.inlineData) {
|
|
6821
|
+
images.push({
|
|
6822
|
+
b64Json: part.inlineData.data
|
|
6823
|
+
});
|
|
6824
|
+
}
|
|
6825
|
+
}
|
|
6826
|
+
}
|
|
6827
|
+
const cost = calculateGeminiImageCost(options.model, aspectRatio, images.length);
|
|
6828
|
+
return {
|
|
6829
|
+
images,
|
|
6830
|
+
model: options.model,
|
|
6831
|
+
usage: {
|
|
6832
|
+
imagesGenerated: images.length,
|
|
6833
|
+
size: aspectRatio,
|
|
6834
|
+
quality: "standard"
|
|
6835
|
+
},
|
|
6836
|
+
cost
|
|
6837
|
+
};
|
|
6838
|
+
}
|
|
6839
|
+
// =========================================================================
|
|
6840
|
+
// Speech Generation
|
|
6841
|
+
// =========================================================================
|
|
6842
|
+
getSpeechModelSpecs() {
|
|
6843
|
+
return geminiSpeechModels;
|
|
6844
|
+
}
|
|
6845
|
+
supportsSpeechGeneration(modelId) {
|
|
6846
|
+
return isGeminiSpeechModel(modelId);
|
|
6847
|
+
}
|
|
6848
|
+
async generateSpeech(options) {
|
|
6849
|
+
const client = this.client;
|
|
6850
|
+
const spec = getGeminiSpeechModelSpec(options.model);
|
|
6851
|
+
const voice = options.voice ?? spec?.defaultVoice ?? "Zephyr";
|
|
6852
|
+
const response = await client.models.generateContent({
|
|
6853
|
+
model: options.model,
|
|
6854
|
+
contents: [
|
|
6855
|
+
{
|
|
6856
|
+
role: "user",
|
|
6857
|
+
parts: [{ text: options.input }]
|
|
6858
|
+
}
|
|
6859
|
+
],
|
|
6860
|
+
config: {
|
|
6861
|
+
responseModalities: [import_genai.Modality.AUDIO],
|
|
6862
|
+
speechConfig: {
|
|
6863
|
+
voiceConfig: {
|
|
6864
|
+
prebuiltVoiceConfig: {
|
|
6865
|
+
voiceName: voice
|
|
6866
|
+
}
|
|
6867
|
+
}
|
|
6868
|
+
}
|
|
6869
|
+
}
|
|
6870
|
+
});
|
|
6871
|
+
let pcmData;
|
|
6872
|
+
const candidate = response.candidates?.[0];
|
|
6873
|
+
if (candidate?.content?.parts) {
|
|
6874
|
+
for (const part of candidate.content.parts) {
|
|
6875
|
+
if ("inlineData" in part && part.inlineData?.data) {
|
|
6876
|
+
const base64 = part.inlineData.data;
|
|
6877
|
+
const binary = atob(base64);
|
|
6878
|
+
pcmData = new Uint8Array(binary.length);
|
|
6879
|
+
for (let i = 0; i < binary.length; i++) {
|
|
6880
|
+
pcmData[i] = binary.charCodeAt(i);
|
|
6881
|
+
}
|
|
6882
|
+
break;
|
|
6883
|
+
}
|
|
6884
|
+
}
|
|
6885
|
+
}
|
|
6886
|
+
if (!pcmData) {
|
|
6887
|
+
throw new Error("No audio data in Gemini TTS response");
|
|
6888
|
+
}
|
|
6889
|
+
const audioData = wrapPcmInWav(pcmData, 24e3, 16, 1);
|
|
6890
|
+
const cost = calculateGeminiSpeechCost(options.model, options.input.length);
|
|
6891
|
+
return {
|
|
6892
|
+
audio: audioData,
|
|
6893
|
+
model: options.model,
|
|
6894
|
+
usage: {
|
|
6895
|
+
characterCount: options.input.length
|
|
6896
|
+
},
|
|
6897
|
+
cost,
|
|
6898
|
+
format: spec?.defaultFormat ?? "wav"
|
|
6899
|
+
};
|
|
6900
|
+
}
|
|
5741
6901
|
buildRequestPayload(options, descriptor, _spec, messages) {
|
|
5742
6902
|
const contents = this.convertMessagesToContents(messages);
|
|
5743
6903
|
const generationConfig = this.buildGenerationConfig(options);
|
|
@@ -5755,7 +6915,7 @@ var init_gemini = __esm({
|
|
|
5755
6915
|
};
|
|
5756
6916
|
return {
|
|
5757
6917
|
model: descriptor.name,
|
|
5758
|
-
contents
|
|
6918
|
+
contents,
|
|
5759
6919
|
config
|
|
5760
6920
|
};
|
|
5761
6921
|
}
|
|
@@ -5790,18 +6950,25 @@ var init_gemini = __esm({
|
|
|
5790
6950
|
if (message.role === "system") {
|
|
5791
6951
|
expandedMessages.push({
|
|
5792
6952
|
role: "user",
|
|
5793
|
-
content: message.content
|
|
6953
|
+
content: extractText(message.content)
|
|
5794
6954
|
});
|
|
5795
6955
|
expandedMessages.push({
|
|
5796
6956
|
role: "assistant",
|
|
5797
6957
|
content: "Understood."
|
|
5798
6958
|
});
|
|
5799
6959
|
} else {
|
|
5800
|
-
expandedMessages.push(
|
|
6960
|
+
expandedMessages.push({
|
|
6961
|
+
role: message.role,
|
|
6962
|
+
content: message.content
|
|
6963
|
+
});
|
|
5801
6964
|
}
|
|
5802
6965
|
}
|
|
5803
6966
|
return this.mergeConsecutiveMessages(expandedMessages);
|
|
5804
6967
|
}
|
|
6968
|
+
/**
|
|
6969
|
+
* Merge consecutive messages with the same role (required by Gemini).
|
|
6970
|
+
* Handles multimodal content by converting to Gemini's part format.
|
|
6971
|
+
*/
|
|
5805
6972
|
mergeConsecutiveMessages(messages) {
|
|
5806
6973
|
if (messages.length === 0) {
|
|
5807
6974
|
return [];
|
|
@@ -5810,15 +6977,16 @@ var init_gemini = __esm({
|
|
|
5810
6977
|
let currentGroup = null;
|
|
5811
6978
|
for (const message of messages) {
|
|
5812
6979
|
const geminiRole = GEMINI_ROLE_MAP[message.role];
|
|
6980
|
+
const geminiParts = this.convertToGeminiParts(message.content);
|
|
5813
6981
|
if (currentGroup && currentGroup.role === geminiRole) {
|
|
5814
|
-
currentGroup.parts.push(
|
|
6982
|
+
currentGroup.parts.push(...geminiParts);
|
|
5815
6983
|
} else {
|
|
5816
6984
|
if (currentGroup) {
|
|
5817
6985
|
result.push(currentGroup);
|
|
5818
6986
|
}
|
|
5819
6987
|
currentGroup = {
|
|
5820
6988
|
role: geminiRole,
|
|
5821
|
-
parts:
|
|
6989
|
+
parts: geminiParts
|
|
5822
6990
|
};
|
|
5823
6991
|
}
|
|
5824
6992
|
}
|
|
@@ -5827,11 +6995,39 @@ var init_gemini = __esm({
|
|
|
5827
6995
|
}
|
|
5828
6996
|
return result;
|
|
5829
6997
|
}
|
|
5830
|
-
|
|
5831
|
-
|
|
5832
|
-
|
|
5833
|
-
|
|
5834
|
-
|
|
6998
|
+
/**
|
|
6999
|
+
* Convert llmist content to Gemini's part format.
|
|
7000
|
+
* Handles text, images, and audio (Gemini supports all three).
|
|
7001
|
+
*/
|
|
7002
|
+
convertToGeminiParts(content) {
|
|
7003
|
+
const parts = normalizeContent(content);
|
|
7004
|
+
return parts.map((part) => {
|
|
7005
|
+
if (part.type === "text") {
|
|
7006
|
+
return { text: part.text };
|
|
7007
|
+
}
|
|
7008
|
+
if (part.type === "image") {
|
|
7009
|
+
if (part.source.type === "url") {
|
|
7010
|
+
throw new Error(
|
|
7011
|
+
"Gemini does not support image URLs directly. Please provide base64-encoded image data."
|
|
7012
|
+
);
|
|
7013
|
+
}
|
|
7014
|
+
return {
|
|
7015
|
+
inlineData: {
|
|
7016
|
+
mimeType: part.source.mediaType,
|
|
7017
|
+
data: part.source.data
|
|
7018
|
+
}
|
|
7019
|
+
};
|
|
7020
|
+
}
|
|
7021
|
+
if (part.type === "audio") {
|
|
7022
|
+
return {
|
|
7023
|
+
inlineData: {
|
|
7024
|
+
mimeType: part.source.mediaType,
|
|
7025
|
+
data: part.source.data
|
|
7026
|
+
}
|
|
7027
|
+
};
|
|
7028
|
+
}
|
|
7029
|
+
throw new Error(`Unsupported content type: ${part.type}`);
|
|
7030
|
+
});
|
|
5835
7031
|
}
|
|
5836
7032
|
buildGenerationConfig(options) {
|
|
5837
7033
|
const config = {};
|
|
@@ -5852,9 +7048,9 @@ var init_gemini = __esm({
|
|
|
5852
7048
|
async *wrapStream(iterable) {
|
|
5853
7049
|
const stream2 = iterable;
|
|
5854
7050
|
for await (const chunk of stream2) {
|
|
5855
|
-
const
|
|
5856
|
-
if (
|
|
5857
|
-
yield { text, rawEvent: chunk };
|
|
7051
|
+
const text3 = this.extractText(chunk);
|
|
7052
|
+
if (text3) {
|
|
7053
|
+
yield { text: text3, rawEvent: chunk };
|
|
5858
7054
|
}
|
|
5859
7055
|
const finishReason = this.extractFinishReason(chunk);
|
|
5860
7056
|
const usage = this.extractUsage(chunk);
|
|
@@ -5915,7 +7111,7 @@ var init_gemini = __esm({
|
|
|
5915
7111
|
try {
|
|
5916
7112
|
const response = await client.models.countTokens({
|
|
5917
7113
|
model: descriptor.name,
|
|
5918
|
-
contents
|
|
7114
|
+
contents
|
|
5919
7115
|
// Note: systemInstruction not used - it's not supported by countTokens()
|
|
5920
7116
|
// and would cause a 2100% token counting error
|
|
5921
7117
|
});
|
|
@@ -5925,14 +7121,140 @@ var init_gemini = __esm({
|
|
|
5925
7121
|
`Token counting failed for ${descriptor.name}, using fallback estimation:`,
|
|
5926
7122
|
error
|
|
5927
7123
|
);
|
|
5928
|
-
|
|
5929
|
-
|
|
7124
|
+
let totalChars = 0;
|
|
7125
|
+
let mediaCount = 0;
|
|
7126
|
+
for (const msg of messages) {
|
|
7127
|
+
const parts = normalizeContent(msg.content);
|
|
7128
|
+
for (const part of parts) {
|
|
7129
|
+
if (part.type === "text") {
|
|
7130
|
+
totalChars += part.text.length;
|
|
7131
|
+
} else if (part.type === "image" || part.type === "audio") {
|
|
7132
|
+
mediaCount++;
|
|
7133
|
+
}
|
|
7134
|
+
}
|
|
7135
|
+
}
|
|
7136
|
+
return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN) + mediaCount * 258;
|
|
5930
7137
|
}
|
|
5931
7138
|
}
|
|
5932
7139
|
};
|
|
5933
7140
|
}
|
|
5934
7141
|
});
|
|
5935
7142
|
|
|
7143
|
+
// src/providers/openai-image-models.ts
|
|
7144
|
+
function getOpenAIImageModelSpec(modelId) {
|
|
7145
|
+
return openaiImageModels.find((m) => m.modelId === modelId);
|
|
7146
|
+
}
|
|
7147
|
+
function isOpenAIImageModel(modelId) {
|
|
7148
|
+
return openaiImageModels.some((m) => m.modelId === modelId);
|
|
7149
|
+
}
|
|
7150
|
+
function calculateOpenAIImageCost(modelId, size, quality = "standard", n = 1) {
|
|
7151
|
+
const spec = getOpenAIImageModelSpec(modelId);
|
|
7152
|
+
if (!spec) return void 0;
|
|
7153
|
+
const sizePrice = spec.pricing.bySize?.[size];
|
|
7154
|
+
if (sizePrice === void 0) return void 0;
|
|
7155
|
+
let pricePerImage;
|
|
7156
|
+
if (typeof sizePrice === "number") {
|
|
7157
|
+
pricePerImage = sizePrice;
|
|
7158
|
+
} else {
|
|
7159
|
+
pricePerImage = sizePrice[quality];
|
|
7160
|
+
if (pricePerImage === void 0) return void 0;
|
|
7161
|
+
}
|
|
7162
|
+
return pricePerImage * n;
|
|
7163
|
+
}
|
|
7164
|
+
var GPT_IMAGE_SIZES, GPT_IMAGE_QUALITIES, DALLE3_SIZES, DALLE3_QUALITIES, DALLE2_SIZES, openaiImageModels;
|
|
7165
|
+
var init_openai_image_models = __esm({
|
|
7166
|
+
"src/providers/openai-image-models.ts"() {
|
|
7167
|
+
"use strict";
|
|
7168
|
+
GPT_IMAGE_SIZES = ["1024x1024", "1024x1536", "1536x1024"];
|
|
7169
|
+
GPT_IMAGE_QUALITIES = ["low", "medium", "high"];
|
|
7170
|
+
DALLE3_SIZES = ["1024x1024", "1024x1792", "1792x1024"];
|
|
7171
|
+
DALLE3_QUALITIES = ["standard", "hd"];
|
|
7172
|
+
DALLE2_SIZES = ["256x256", "512x512", "1024x1024"];
|
|
7173
|
+
openaiImageModels = [
|
|
7174
|
+
// GPT Image 1 Family (flagship)
|
|
7175
|
+
{
|
|
7176
|
+
provider: "openai",
|
|
7177
|
+
modelId: "gpt-image-1",
|
|
7178
|
+
displayName: "GPT Image 1",
|
|
7179
|
+
pricing: {
|
|
7180
|
+
bySize: {
|
|
7181
|
+
"1024x1024": { low: 0.011, medium: 0.04, high: 0.17 },
|
|
7182
|
+
"1024x1536": { low: 0.016, medium: 0.06, high: 0.25 },
|
|
7183
|
+
"1536x1024": { low: 0.016, medium: 0.06, high: 0.25 }
|
|
7184
|
+
}
|
|
7185
|
+
},
|
|
7186
|
+
supportedSizes: [...GPT_IMAGE_SIZES],
|
|
7187
|
+
supportedQualities: [...GPT_IMAGE_QUALITIES],
|
|
7188
|
+
maxImages: 1,
|
|
7189
|
+
defaultSize: "1024x1024",
|
|
7190
|
+
defaultQuality: "medium",
|
|
7191
|
+
features: {
|
|
7192
|
+
textRendering: true,
|
|
7193
|
+
transparency: true
|
|
7194
|
+
}
|
|
7195
|
+
},
|
|
7196
|
+
{
|
|
7197
|
+
provider: "openai",
|
|
7198
|
+
modelId: "gpt-image-1-mini",
|
|
7199
|
+
displayName: "GPT Image 1 Mini",
|
|
7200
|
+
pricing: {
|
|
7201
|
+
bySize: {
|
|
7202
|
+
"1024x1024": { low: 5e-3, medium: 0.02, high: 0.052 },
|
|
7203
|
+
"1024x1536": { low: 75e-4, medium: 0.03, high: 0.078 },
|
|
7204
|
+
"1536x1024": { low: 75e-4, medium: 0.03, high: 0.078 }
|
|
7205
|
+
}
|
|
7206
|
+
},
|
|
7207
|
+
supportedSizes: [...GPT_IMAGE_SIZES],
|
|
7208
|
+
supportedQualities: [...GPT_IMAGE_QUALITIES],
|
|
7209
|
+
maxImages: 1,
|
|
7210
|
+
defaultSize: "1024x1024",
|
|
7211
|
+
defaultQuality: "medium",
|
|
7212
|
+
features: {
|
|
7213
|
+
textRendering: true,
|
|
7214
|
+
transparency: true
|
|
7215
|
+
}
|
|
7216
|
+
},
|
|
7217
|
+
// DALL-E Family
|
|
7218
|
+
{
|
|
7219
|
+
provider: "openai",
|
|
7220
|
+
modelId: "dall-e-3",
|
|
7221
|
+
displayName: "DALL-E 3",
|
|
7222
|
+
pricing: {
|
|
7223
|
+
bySize: {
|
|
7224
|
+
"1024x1024": { standard: 0.04, hd: 0.08 },
|
|
7225
|
+
"1024x1792": { standard: 0.08, hd: 0.12 },
|
|
7226
|
+
"1792x1024": { standard: 0.08, hd: 0.12 }
|
|
7227
|
+
}
|
|
7228
|
+
},
|
|
7229
|
+
supportedSizes: [...DALLE3_SIZES],
|
|
7230
|
+
supportedQualities: [...DALLE3_QUALITIES],
|
|
7231
|
+
maxImages: 1,
|
|
7232
|
+
// DALL-E 3 only supports n=1
|
|
7233
|
+
defaultSize: "1024x1024",
|
|
7234
|
+
defaultQuality: "standard",
|
|
7235
|
+
features: {
|
|
7236
|
+
textRendering: true
|
|
7237
|
+
}
|
|
7238
|
+
},
|
|
7239
|
+
{
|
|
7240
|
+
provider: "openai",
|
|
7241
|
+
modelId: "dall-e-2",
|
|
7242
|
+
displayName: "DALL-E 2 (Legacy)",
|
|
7243
|
+
pricing: {
|
|
7244
|
+
bySize: {
|
|
7245
|
+
"256x256": 0.016,
|
|
7246
|
+
"512x512": 0.018,
|
|
7247
|
+
"1024x1024": 0.02
|
|
7248
|
+
}
|
|
7249
|
+
},
|
|
7250
|
+
supportedSizes: [...DALLE2_SIZES],
|
|
7251
|
+
maxImages: 10,
|
|
7252
|
+
defaultSize: "1024x1024"
|
|
7253
|
+
}
|
|
7254
|
+
];
|
|
7255
|
+
}
|
|
7256
|
+
});
|
|
7257
|
+
|
|
5936
7258
|
// src/providers/openai-models.ts
|
|
5937
7259
|
var OPENAI_MODELS;
|
|
5938
7260
|
var init_openai_models = __esm({
|
|
@@ -6297,6 +7619,144 @@ var init_openai_models = __esm({
|
|
|
6297
7619
|
}
|
|
6298
7620
|
});
|
|
6299
7621
|
|
|
7622
|
+
// src/providers/openai-speech-models.ts
|
|
7623
|
+
function getOpenAISpeechModelSpec(modelId) {
|
|
7624
|
+
return openaiSpeechModels.find((m) => m.modelId === modelId);
|
|
7625
|
+
}
|
|
7626
|
+
function isOpenAISpeechModel(modelId) {
|
|
7627
|
+
return openaiSpeechModels.some((m) => m.modelId === modelId);
|
|
7628
|
+
}
|
|
7629
|
+
function calculateOpenAISpeechCost(modelId, characterCount, estimatedMinutes) {
|
|
7630
|
+
const spec = getOpenAISpeechModelSpec(modelId);
|
|
7631
|
+
if (!spec) return void 0;
|
|
7632
|
+
if (spec.pricing.perCharacter !== void 0) {
|
|
7633
|
+
return characterCount * spec.pricing.perCharacter;
|
|
7634
|
+
}
|
|
7635
|
+
if (spec.pricing.perMinute !== void 0 && estimatedMinutes !== void 0) {
|
|
7636
|
+
return estimatedMinutes * spec.pricing.perMinute;
|
|
7637
|
+
}
|
|
7638
|
+
if (spec.pricing.perMinute !== void 0) {
|
|
7639
|
+
const approxMinutes = characterCount / 750;
|
|
7640
|
+
return approxMinutes * spec.pricing.perMinute;
|
|
7641
|
+
}
|
|
7642
|
+
return void 0;
|
|
7643
|
+
}
|
|
7644
|
+
var OPENAI_TTS_VOICES, OPENAI_TTS_EXTENDED_VOICES, OPENAI_TTS_FORMATS, openaiSpeechModels;
|
|
7645
|
+
var init_openai_speech_models = __esm({
|
|
7646
|
+
"src/providers/openai-speech-models.ts"() {
|
|
7647
|
+
"use strict";
|
|
7648
|
+
OPENAI_TTS_VOICES = [
|
|
7649
|
+
"alloy",
|
|
7650
|
+
"echo",
|
|
7651
|
+
"fable",
|
|
7652
|
+
"onyx",
|
|
7653
|
+
"nova",
|
|
7654
|
+
"shimmer"
|
|
7655
|
+
];
|
|
7656
|
+
OPENAI_TTS_EXTENDED_VOICES = [
|
|
7657
|
+
...OPENAI_TTS_VOICES,
|
|
7658
|
+
"ash",
|
|
7659
|
+
"ballad",
|
|
7660
|
+
"coral",
|
|
7661
|
+
"sage",
|
|
7662
|
+
"verse"
|
|
7663
|
+
];
|
|
7664
|
+
OPENAI_TTS_FORMATS = ["mp3", "opus", "aac", "flac", "wav", "pcm"];
|
|
7665
|
+
openaiSpeechModels = [
|
|
7666
|
+
// Standard TTS models (character-based pricing)
|
|
7667
|
+
{
|
|
7668
|
+
provider: "openai",
|
|
7669
|
+
modelId: "tts-1",
|
|
7670
|
+
displayName: "TTS-1",
|
|
7671
|
+
pricing: {
|
|
7672
|
+
// $15 per 1M characters = $0.000015 per character
|
|
7673
|
+
perCharacter: 15e-6
|
|
7674
|
+
},
|
|
7675
|
+
voices: [...OPENAI_TTS_VOICES],
|
|
7676
|
+
formats: OPENAI_TTS_FORMATS,
|
|
7677
|
+
maxInputLength: 4096,
|
|
7678
|
+
defaultVoice: "alloy",
|
|
7679
|
+
defaultFormat: "mp3",
|
|
7680
|
+
features: {
|
|
7681
|
+
voiceInstructions: false
|
|
7682
|
+
}
|
|
7683
|
+
},
|
|
7684
|
+
{
|
|
7685
|
+
provider: "openai",
|
|
7686
|
+
modelId: "tts-1-1106",
|
|
7687
|
+
displayName: "TTS-1 (Nov 2023)",
|
|
7688
|
+
pricing: {
|
|
7689
|
+
perCharacter: 15e-6
|
|
7690
|
+
},
|
|
7691
|
+
voices: [...OPENAI_TTS_VOICES],
|
|
7692
|
+
formats: OPENAI_TTS_FORMATS,
|
|
7693
|
+
maxInputLength: 4096,
|
|
7694
|
+
defaultVoice: "alloy",
|
|
7695
|
+
defaultFormat: "mp3",
|
|
7696
|
+
features: {
|
|
7697
|
+
voiceInstructions: false
|
|
7698
|
+
}
|
|
7699
|
+
},
|
|
7700
|
+
{
|
|
7701
|
+
provider: "openai",
|
|
7702
|
+
modelId: "tts-1-hd",
|
|
7703
|
+
displayName: "TTS-1 HD",
|
|
7704
|
+
pricing: {
|
|
7705
|
+
// $30 per 1M characters = $0.00003 per character
|
|
7706
|
+
perCharacter: 3e-5
|
|
7707
|
+
},
|
|
7708
|
+
voices: [...OPENAI_TTS_VOICES],
|
|
7709
|
+
formats: OPENAI_TTS_FORMATS,
|
|
7710
|
+
maxInputLength: 4096,
|
|
7711
|
+
defaultVoice: "alloy",
|
|
7712
|
+
defaultFormat: "mp3",
|
|
7713
|
+
features: {
|
|
7714
|
+
voiceInstructions: false
|
|
7715
|
+
}
|
|
7716
|
+
},
|
|
7717
|
+
{
|
|
7718
|
+
provider: "openai",
|
|
7719
|
+
modelId: "tts-1-hd-1106",
|
|
7720
|
+
displayName: "TTS-1 HD (Nov 2023)",
|
|
7721
|
+
pricing: {
|
|
7722
|
+
perCharacter: 3e-5
|
|
7723
|
+
},
|
|
7724
|
+
voices: [...OPENAI_TTS_VOICES],
|
|
7725
|
+
formats: OPENAI_TTS_FORMATS,
|
|
7726
|
+
maxInputLength: 4096,
|
|
7727
|
+
defaultVoice: "alloy",
|
|
7728
|
+
defaultFormat: "mp3",
|
|
7729
|
+
features: {
|
|
7730
|
+
voiceInstructions: false
|
|
7731
|
+
}
|
|
7732
|
+
},
|
|
7733
|
+
// Token-based TTS model with voice instructions support
|
|
7734
|
+
{
|
|
7735
|
+
provider: "openai",
|
|
7736
|
+
modelId: "gpt-4o-mini-tts",
|
|
7737
|
+
displayName: "GPT-4o Mini TTS",
|
|
7738
|
+
pricing: {
|
|
7739
|
+
// $0.60 per 1M input tokens = $0.0000006 per token
|
|
7740
|
+
perInputToken: 6e-7,
|
|
7741
|
+
// $12 per 1M audio output tokens = $0.000012 per token
|
|
7742
|
+
perAudioOutputToken: 12e-6,
|
|
7743
|
+
// ~$0.015 per minute of audio
|
|
7744
|
+
perMinute: 0.015
|
|
7745
|
+
},
|
|
7746
|
+
voices: [...OPENAI_TTS_EXTENDED_VOICES],
|
|
7747
|
+
formats: OPENAI_TTS_FORMATS,
|
|
7748
|
+
maxInputLength: 2e3,
|
|
7749
|
+
// tokens, not characters
|
|
7750
|
+
defaultVoice: "alloy",
|
|
7751
|
+
defaultFormat: "mp3",
|
|
7752
|
+
features: {
|
|
7753
|
+
voiceInstructions: true
|
|
7754
|
+
}
|
|
7755
|
+
}
|
|
7756
|
+
];
|
|
7757
|
+
}
|
|
7758
|
+
});
|
|
7759
|
+
|
|
6300
7760
|
// src/providers/openai.ts
|
|
6301
7761
|
function sanitizeExtra(extra, allowTemperature) {
|
|
6302
7762
|
if (!extra) {
|
|
@@ -6316,9 +7776,12 @@ var init_openai = __esm({
|
|
|
6316
7776
|
"use strict";
|
|
6317
7777
|
import_openai = __toESM(require("openai"), 1);
|
|
6318
7778
|
import_tiktoken = require("tiktoken");
|
|
7779
|
+
init_messages();
|
|
6319
7780
|
init_base_provider();
|
|
6320
7781
|
init_constants2();
|
|
7782
|
+
init_openai_image_models();
|
|
6321
7783
|
init_openai_models();
|
|
7784
|
+
init_openai_speech_models();
|
|
6322
7785
|
init_utils();
|
|
6323
7786
|
ROLE_MAP = {
|
|
6324
7787
|
system: "system",
|
|
@@ -6333,6 +7796,87 @@ var init_openai = __esm({
|
|
|
6333
7796
|
getModelSpecs() {
|
|
6334
7797
|
return OPENAI_MODELS;
|
|
6335
7798
|
}
|
|
7799
|
+
// =========================================================================
|
|
7800
|
+
// Image Generation
|
|
7801
|
+
// =========================================================================
|
|
7802
|
+
getImageModelSpecs() {
|
|
7803
|
+
return openaiImageModels;
|
|
7804
|
+
}
|
|
7805
|
+
supportsImageGeneration(modelId) {
|
|
7806
|
+
return isOpenAIImageModel(modelId);
|
|
7807
|
+
}
|
|
7808
|
+
async generateImage(options) {
|
|
7809
|
+
const client = this.client;
|
|
7810
|
+
const spec = getOpenAIImageModelSpec(options.model);
|
|
7811
|
+
const size = options.size ?? spec?.defaultSize ?? "1024x1024";
|
|
7812
|
+
const quality = options.quality ?? spec?.defaultQuality ?? "standard";
|
|
7813
|
+
const n = options.n ?? 1;
|
|
7814
|
+
const isDallE2 = options.model === "dall-e-2";
|
|
7815
|
+
const isGptImage = options.model.startsWith("gpt-image");
|
|
7816
|
+
const requestParams = {
|
|
7817
|
+
model: options.model,
|
|
7818
|
+
prompt: options.prompt,
|
|
7819
|
+
size,
|
|
7820
|
+
n
|
|
7821
|
+
};
|
|
7822
|
+
if (!isDallE2 && !isGptImage) {
|
|
7823
|
+
requestParams.quality = quality;
|
|
7824
|
+
}
|
|
7825
|
+
if (isGptImage) {
|
|
7826
|
+
} else if (!isDallE2) {
|
|
7827
|
+
requestParams.response_format = options.responseFormat ?? "url";
|
|
7828
|
+
}
|
|
7829
|
+
const response = await client.images.generate(requestParams);
|
|
7830
|
+
const cost = calculateOpenAIImageCost(options.model, size, quality, n);
|
|
7831
|
+
const images = response.data ?? [];
|
|
7832
|
+
return {
|
|
7833
|
+
images: images.map((img) => ({
|
|
7834
|
+
url: img.url,
|
|
7835
|
+
b64Json: img.b64_json,
|
|
7836
|
+
revisedPrompt: img.revised_prompt
|
|
7837
|
+
})),
|
|
7838
|
+
model: options.model,
|
|
7839
|
+
usage: {
|
|
7840
|
+
imagesGenerated: images.length,
|
|
7841
|
+
size,
|
|
7842
|
+
quality
|
|
7843
|
+
},
|
|
7844
|
+
cost
|
|
7845
|
+
};
|
|
7846
|
+
}
|
|
7847
|
+
// =========================================================================
|
|
7848
|
+
// Speech Generation
|
|
7849
|
+
// =========================================================================
|
|
7850
|
+
getSpeechModelSpecs() {
|
|
7851
|
+
return openaiSpeechModels;
|
|
7852
|
+
}
|
|
7853
|
+
supportsSpeechGeneration(modelId) {
|
|
7854
|
+
return isOpenAISpeechModel(modelId);
|
|
7855
|
+
}
|
|
7856
|
+
async generateSpeech(options) {
|
|
7857
|
+
const client = this.client;
|
|
7858
|
+
const spec = getOpenAISpeechModelSpec(options.model);
|
|
7859
|
+
const format = options.responseFormat ?? spec?.defaultFormat ?? "mp3";
|
|
7860
|
+
const voice = options.voice ?? spec?.defaultVoice ?? "alloy";
|
|
7861
|
+
const response = await client.audio.speech.create({
|
|
7862
|
+
model: options.model,
|
|
7863
|
+
input: options.input,
|
|
7864
|
+
voice,
|
|
7865
|
+
response_format: format,
|
|
7866
|
+
speed: options.speed ?? 1
|
|
7867
|
+
});
|
|
7868
|
+
const audioBuffer = await response.arrayBuffer();
|
|
7869
|
+
const cost = calculateOpenAISpeechCost(options.model, options.input.length);
|
|
7870
|
+
return {
|
|
7871
|
+
audio: audioBuffer,
|
|
7872
|
+
model: options.model,
|
|
7873
|
+
usage: {
|
|
7874
|
+
characterCount: options.input.length
|
|
7875
|
+
},
|
|
7876
|
+
cost,
|
|
7877
|
+
format
|
|
7878
|
+
};
|
|
7879
|
+
}
|
|
6336
7880
|
buildRequestPayload(options, descriptor, spec, messages) {
|
|
6337
7881
|
const { maxTokens, temperature, topP, stopSequences, extra } = options;
|
|
6338
7882
|
const supportsTemperature = spec?.metadata?.supportsTemperature !== false;
|
|
@@ -6340,11 +7884,7 @@ var init_openai = __esm({
|
|
|
6340
7884
|
const sanitizedExtra = sanitizeExtra(extra, shouldIncludeTemperature);
|
|
6341
7885
|
return {
|
|
6342
7886
|
model: descriptor.name,
|
|
6343
|
-
messages: messages.map((message) => (
|
|
6344
|
-
role: ROLE_MAP[message.role],
|
|
6345
|
-
content: message.content,
|
|
6346
|
-
name: message.name
|
|
6347
|
-
})),
|
|
7887
|
+
messages: messages.map((message) => this.convertToOpenAIMessage(message)),
|
|
6348
7888
|
// Only set max_completion_tokens if explicitly provided
|
|
6349
7889
|
// Otherwise let the API use "as much as fits" in the context window
|
|
6350
7890
|
...maxTokens !== void 0 ? { max_completion_tokens: maxTokens } : {},
|
|
@@ -6356,6 +7896,77 @@ var init_openai = __esm({
|
|
|
6356
7896
|
...shouldIncludeTemperature ? { temperature } : {}
|
|
6357
7897
|
};
|
|
6358
7898
|
}
|
|
7899
|
+
/**
|
|
7900
|
+
* Convert an LLMMessage to OpenAI's ChatCompletionMessageParam.
|
|
7901
|
+
* Handles role-specific content type requirements:
|
|
7902
|
+
* - system/assistant: string content only
|
|
7903
|
+
* - user: string or multimodal array content
|
|
7904
|
+
*/
|
|
7905
|
+
convertToOpenAIMessage(message) {
|
|
7906
|
+
const role = ROLE_MAP[message.role];
|
|
7907
|
+
if (role === "user") {
|
|
7908
|
+
const content = this.convertToOpenAIContent(message.content);
|
|
7909
|
+
return {
|
|
7910
|
+
role: "user",
|
|
7911
|
+
content,
|
|
7912
|
+
...message.name ? { name: message.name } : {}
|
|
7913
|
+
};
|
|
7914
|
+
}
|
|
7915
|
+
const textContent = typeof message.content === "string" ? message.content : extractText(message.content);
|
|
7916
|
+
if (role === "system") {
|
|
7917
|
+
return {
|
|
7918
|
+
role: "system",
|
|
7919
|
+
content: textContent,
|
|
7920
|
+
...message.name ? { name: message.name } : {}
|
|
7921
|
+
};
|
|
7922
|
+
}
|
|
7923
|
+
return {
|
|
7924
|
+
role: "assistant",
|
|
7925
|
+
content: textContent,
|
|
7926
|
+
...message.name ? { name: message.name } : {}
|
|
7927
|
+
};
|
|
7928
|
+
}
|
|
7929
|
+
/**
|
|
7930
|
+
* Convert llmist content to OpenAI's content format.
|
|
7931
|
+
* Optimizes by returning string for text-only content, array for multimodal.
|
|
7932
|
+
*/
|
|
7933
|
+
convertToOpenAIContent(content) {
|
|
7934
|
+
if (typeof content === "string") {
|
|
7935
|
+
return content;
|
|
7936
|
+
}
|
|
7937
|
+
return content.map((part) => {
|
|
7938
|
+
if (part.type === "text") {
|
|
7939
|
+
return { type: "text", text: part.text };
|
|
7940
|
+
}
|
|
7941
|
+
if (part.type === "image") {
|
|
7942
|
+
return this.convertImagePart(part);
|
|
7943
|
+
}
|
|
7944
|
+
if (part.type === "audio") {
|
|
7945
|
+
throw new Error(
|
|
7946
|
+
"OpenAI chat completions do not support audio input. Use Whisper for transcription or Gemini for audio understanding."
|
|
7947
|
+
);
|
|
7948
|
+
}
|
|
7949
|
+
throw new Error(`Unsupported content type: ${part.type}`);
|
|
7950
|
+
});
|
|
7951
|
+
}
|
|
7952
|
+
/**
|
|
7953
|
+
* Convert an image content part to OpenAI's image_url format.
|
|
7954
|
+
* Supports both URLs and base64 data URLs.
|
|
7955
|
+
*/
|
|
7956
|
+
convertImagePart(part) {
|
|
7957
|
+
if (part.source.type === "url") {
|
|
7958
|
+
return {
|
|
7959
|
+
type: "image_url",
|
|
7960
|
+
image_url: { url: part.source.url }
|
|
7961
|
+
};
|
|
7962
|
+
}
|
|
7963
|
+
return {
|
|
7964
|
+
type: "image_url",
|
|
7965
|
+
image_url: {
|
|
7966
|
+
url: `data:${part.source.mediaType};base64,${part.source.data}`
|
|
7967
|
+
}
|
|
7968
|
+
};
|
|
7969
|
+
}
|
|
6359
7970
|
async executeStreamRequest(payload, signal) {
|
|
6360
7971
|
const client = this.client;
|
|
6361
7972
|
const stream2 = await client.chat.completions.create(payload, signal ? { signal } : void 0);
|
|
@@ -6364,9 +7975,9 @@ var init_openai = __esm({
|
|
|
6364
7975
|
async *wrapStream(iterable) {
|
|
6365
7976
|
const stream2 = iterable;
|
|
6366
7977
|
for await (const chunk of stream2) {
|
|
6367
|
-
const
|
|
6368
|
-
if (
|
|
6369
|
-
yield { text, rawEvent: chunk };
|
|
7978
|
+
const text3 = chunk.choices.map((choice) => choice.delta?.content ?? "").join("");
|
|
7979
|
+
if (text3) {
|
|
7980
|
+
yield { text: text3, rawEvent: chunk };
|
|
6370
7981
|
}
|
|
6371
7982
|
const finishReason = chunk.choices.find((choice) => choice.finish_reason)?.finish_reason;
|
|
6372
7983
|
const usage = chunk.usage ? {
|
|
@@ -6414,17 +8025,26 @@ var init_openai = __esm({
|
|
|
6414
8025
|
}
|
|
6415
8026
|
try {
|
|
6416
8027
|
let tokenCount = 0;
|
|
8028
|
+
let imageCount = 0;
|
|
6417
8029
|
for (const message of messages) {
|
|
6418
8030
|
tokenCount += OPENAI_MESSAGE_OVERHEAD_TOKENS;
|
|
6419
8031
|
const roleText = ROLE_MAP[message.role];
|
|
6420
8032
|
tokenCount += encoding.encode(roleText).length;
|
|
6421
|
-
|
|
8033
|
+
const textContent = extractText(message.content);
|
|
8034
|
+
tokenCount += encoding.encode(textContent).length;
|
|
8035
|
+
const parts = normalizeContent(message.content);
|
|
8036
|
+
for (const part of parts) {
|
|
8037
|
+
if (part.type === "image") {
|
|
8038
|
+
imageCount++;
|
|
8039
|
+
}
|
|
8040
|
+
}
|
|
6422
8041
|
if (message.name) {
|
|
6423
8042
|
tokenCount += encoding.encode(message.name).length;
|
|
6424
8043
|
tokenCount += OPENAI_NAME_FIELD_OVERHEAD_TOKENS;
|
|
6425
8044
|
}
|
|
6426
8045
|
}
|
|
6427
8046
|
tokenCount += OPENAI_REPLY_PRIMING_TOKENS;
|
|
8047
|
+
tokenCount += imageCount * 765;
|
|
6428
8048
|
return tokenCount;
|
|
6429
8049
|
} finally {
|
|
6430
8050
|
encoding.free();
|
|
@@ -6434,8 +8054,19 @@ var init_openai = __esm({
|
|
|
6434
8054
|
`Token counting failed for ${descriptor.name}, using fallback estimation:`,
|
|
6435
8055
|
error
|
|
6436
8056
|
);
|
|
6437
|
-
|
|
6438
|
-
|
|
8057
|
+
let totalChars = 0;
|
|
8058
|
+
let imageCount = 0;
|
|
8059
|
+
for (const msg of messages) {
|
|
8060
|
+
const parts = normalizeContent(msg.content);
|
|
8061
|
+
for (const part of parts) {
|
|
8062
|
+
if (part.type === "text") {
|
|
8063
|
+
totalChars += part.text.length;
|
|
8064
|
+
} else if (part.type === "image") {
|
|
8065
|
+
imageCount++;
|
|
8066
|
+
}
|
|
8067
|
+
}
|
|
8068
|
+
}
|
|
8069
|
+
return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN) + imageCount * 765;
|
|
6439
8070
|
}
|
|
6440
8071
|
}
|
|
6441
8072
|
};
|
|
@@ -6673,30 +8304,109 @@ var init_model_registry = __esm({
|
|
|
6673
8304
|
}
|
|
6674
8305
|
});
|
|
6675
8306
|
|
|
6676
|
-
// src/core/
|
|
6677
|
-
var
|
|
6678
|
-
var
|
|
6679
|
-
"src/core/
|
|
8307
|
+
// src/core/namespaces/image.ts
|
|
8308
|
+
var ImageNamespace;
|
|
8309
|
+
var init_image = __esm({
|
|
8310
|
+
"src/core/namespaces/image.ts"() {
|
|
8311
|
+
"use strict";
|
|
8312
|
+
ImageNamespace = class {
|
|
8313
|
+
constructor(adapters, defaultProvider) {
|
|
8314
|
+
this.adapters = adapters;
|
|
8315
|
+
this.defaultProvider = defaultProvider;
|
|
8316
|
+
}
|
|
8317
|
+
/**
|
|
8318
|
+
* Generate images from a text prompt.
|
|
8319
|
+
*
|
|
8320
|
+
* @param options - Image generation options
|
|
8321
|
+
* @returns Promise resolving to the generation result with images and cost
|
|
8322
|
+
* @throws Error if the provider doesn't support image generation
|
|
8323
|
+
*/
|
|
8324
|
+
async generate(options) {
|
|
8325
|
+
const modelId = options.model;
|
|
8326
|
+
const adapter = this.findImageAdapter(modelId);
|
|
8327
|
+
if (!adapter || !adapter.generateImage) {
|
|
8328
|
+
throw new Error(
|
|
8329
|
+
`No provider supports image generation for model "${modelId}". Available image models: ${this.listModels().map((m) => m.modelId).join(", ")}`
|
|
8330
|
+
);
|
|
8331
|
+
}
|
|
8332
|
+
return adapter.generateImage(options);
|
|
8333
|
+
}
|
|
8334
|
+
/**
|
|
8335
|
+
* List all available image generation models.
|
|
8336
|
+
*/
|
|
8337
|
+
listModels() {
|
|
8338
|
+
const models = [];
|
|
8339
|
+
for (const adapter of this.adapters) {
|
|
8340
|
+
if (adapter.getImageModelSpecs) {
|
|
8341
|
+
models.push(...adapter.getImageModelSpecs());
|
|
8342
|
+
}
|
|
8343
|
+
}
|
|
8344
|
+
return models;
|
|
8345
|
+
}
|
|
8346
|
+
/**
|
|
8347
|
+
* Check if a model is supported for image generation.
|
|
8348
|
+
*/
|
|
8349
|
+
supportsModel(modelId) {
|
|
8350
|
+
return this.findImageAdapter(modelId) !== void 0;
|
|
8351
|
+
}
|
|
8352
|
+
findImageAdapter(modelId) {
|
|
8353
|
+
return this.adapters.find(
|
|
8354
|
+
(adapter) => adapter.supportsImageGeneration?.(modelId) ?? false
|
|
8355
|
+
);
|
|
8356
|
+
}
|
|
8357
|
+
};
|
|
8358
|
+
}
|
|
8359
|
+
});
|
|
8360
|
+
|
|
8361
|
+
// src/core/namespaces/speech.ts
|
|
8362
|
+
var SpeechNamespace;
|
|
8363
|
+
var init_speech = __esm({
|
|
8364
|
+
"src/core/namespaces/speech.ts"() {
|
|
6680
8365
|
"use strict";
|
|
6681
|
-
|
|
6682
|
-
constructor(defaultProvider
|
|
8366
|
+
SpeechNamespace = class {
|
|
8367
|
+
constructor(adapters, defaultProvider) {
|
|
8368
|
+
this.adapters = adapters;
|
|
6683
8369
|
this.defaultProvider = defaultProvider;
|
|
6684
8370
|
}
|
|
6685
|
-
|
|
6686
|
-
|
|
6687
|
-
|
|
6688
|
-
|
|
6689
|
-
|
|
6690
|
-
|
|
6691
|
-
|
|
6692
|
-
|
|
8371
|
+
/**
|
|
8372
|
+
* Generate speech audio from text.
|
|
8373
|
+
*
|
|
8374
|
+
* @param options - Speech generation options
|
|
8375
|
+
* @returns Promise resolving to the generation result with audio and cost
|
|
8376
|
+
* @throws Error if the provider doesn't support speech generation
|
|
8377
|
+
*/
|
|
8378
|
+
async generate(options) {
|
|
8379
|
+
const modelId = options.model;
|
|
8380
|
+
const adapter = this.findSpeechAdapter(modelId);
|
|
8381
|
+
if (!adapter || !adapter.generateSpeech) {
|
|
8382
|
+
throw new Error(
|
|
8383
|
+
`No provider supports speech generation for model "${modelId}". Available speech models: ${this.listModels().map((m) => m.modelId).join(", ")}`
|
|
8384
|
+
);
|
|
6693
8385
|
}
|
|
6694
|
-
|
|
6695
|
-
|
|
6696
|
-
|
|
6697
|
-
|
|
8386
|
+
return adapter.generateSpeech(options);
|
|
8387
|
+
}
|
|
8388
|
+
/**
|
|
8389
|
+
* List all available speech generation models.
|
|
8390
|
+
*/
|
|
8391
|
+
listModels() {
|
|
8392
|
+
const models = [];
|
|
8393
|
+
for (const adapter of this.adapters) {
|
|
8394
|
+
if (adapter.getSpeechModelSpecs) {
|
|
8395
|
+
models.push(...adapter.getSpeechModelSpecs());
|
|
8396
|
+
}
|
|
6698
8397
|
}
|
|
6699
|
-
return
|
|
8398
|
+
return models;
|
|
8399
|
+
}
|
|
8400
|
+
/**
|
|
8401
|
+
* Check if a model is supported for speech generation.
|
|
8402
|
+
*/
|
|
8403
|
+
supportsModel(modelId) {
|
|
8404
|
+
return this.findSpeechAdapter(modelId) !== void 0;
|
|
8405
|
+
}
|
|
8406
|
+
findSpeechAdapter(modelId) {
|
|
8407
|
+
return this.adapters.find(
|
|
8408
|
+
(adapter) => adapter.supportsSpeechGeneration?.(modelId) ?? false
|
|
8409
|
+
);
|
|
6700
8410
|
}
|
|
6701
8411
|
};
|
|
6702
8412
|
}
|
|
@@ -6745,6 +8455,201 @@ var init_quick_methods = __esm({
|
|
|
6745
8455
|
}
|
|
6746
8456
|
});
|
|
6747
8457
|
|
|
8458
|
+
// src/core/namespaces/text.ts
|
|
8459
|
+
var TextNamespace;
|
|
8460
|
+
var init_text = __esm({
|
|
8461
|
+
"src/core/namespaces/text.ts"() {
|
|
8462
|
+
"use strict";
|
|
8463
|
+
init_quick_methods();
|
|
8464
|
+
TextNamespace = class {
|
|
8465
|
+
constructor(client) {
|
|
8466
|
+
this.client = client;
|
|
8467
|
+
}
|
|
8468
|
+
/**
|
|
8469
|
+
* Generate a complete text response.
|
|
8470
|
+
*
|
|
8471
|
+
* @param prompt - User prompt
|
|
8472
|
+
* @param options - Optional configuration
|
|
8473
|
+
* @returns Complete text response
|
|
8474
|
+
*/
|
|
8475
|
+
async complete(prompt, options) {
|
|
8476
|
+
return complete(this.client, prompt, options);
|
|
8477
|
+
}
|
|
8478
|
+
/**
|
|
8479
|
+
* Stream text chunks.
|
|
8480
|
+
*
|
|
8481
|
+
* @param prompt - User prompt
|
|
8482
|
+
* @param options - Optional configuration
|
|
8483
|
+
* @returns Async generator yielding text chunks
|
|
8484
|
+
*/
|
|
8485
|
+
stream(prompt, options) {
|
|
8486
|
+
return stream(this.client, prompt, options);
|
|
8487
|
+
}
|
|
8488
|
+
};
|
|
8489
|
+
}
|
|
8490
|
+
});
|
|
8491
|
+
|
|
8492
|
+
// src/core/namespaces/vision.ts
|
|
8493
|
+
var VisionNamespace;
|
|
8494
|
+
var init_vision = __esm({
|
|
8495
|
+
"src/core/namespaces/vision.ts"() {
|
|
8496
|
+
"use strict";
|
|
8497
|
+
init_input_content();
|
|
8498
|
+
init_messages();
|
|
8499
|
+
VisionNamespace = class {
|
|
8500
|
+
constructor(client) {
|
|
8501
|
+
this.client = client;
|
|
8502
|
+
}
|
|
8503
|
+
/**
|
|
8504
|
+
* Build a message builder with the image content attached.
|
|
8505
|
+
* Handles URLs, data URLs, base64 strings, and binary buffers.
|
|
8506
|
+
*/
|
|
8507
|
+
buildImageMessage(options) {
|
|
8508
|
+
const builder = new LLMMessageBuilder();
|
|
8509
|
+
if (options.systemPrompt) {
|
|
8510
|
+
builder.addSystem(options.systemPrompt);
|
|
8511
|
+
}
|
|
8512
|
+
if (typeof options.image === "string") {
|
|
8513
|
+
if (options.image.startsWith("http://") || options.image.startsWith("https://")) {
|
|
8514
|
+
builder.addUserWithImageUrl(options.prompt, options.image);
|
|
8515
|
+
} else if (isDataUrl(options.image)) {
|
|
8516
|
+
const parsed = parseDataUrl(options.image);
|
|
8517
|
+
if (!parsed) {
|
|
8518
|
+
throw new Error("Invalid data URL format");
|
|
8519
|
+
}
|
|
8520
|
+
builder.addUserWithImage(
|
|
8521
|
+
options.prompt,
|
|
8522
|
+
parsed.data,
|
|
8523
|
+
parsed.mimeType
|
|
8524
|
+
);
|
|
8525
|
+
} else {
|
|
8526
|
+
const buffer = Buffer.from(options.image, "base64");
|
|
8527
|
+
builder.addUserWithImage(options.prompt, buffer, options.mimeType);
|
|
8528
|
+
}
|
|
8529
|
+
} else {
|
|
8530
|
+
builder.addUserWithImage(options.prompt, options.image, options.mimeType);
|
|
8531
|
+
}
|
|
8532
|
+
return builder;
|
|
8533
|
+
}
|
|
8534
|
+
/**
|
|
8535
|
+
* Stream the response and collect text and usage information.
|
|
8536
|
+
*/
|
|
8537
|
+
async streamAndCollect(options, builder) {
|
|
8538
|
+
let response = "";
|
|
8539
|
+
let finalUsage;
|
|
8540
|
+
for await (const chunk of this.client.stream({
|
|
8541
|
+
model: options.model,
|
|
8542
|
+
messages: builder.build(),
|
|
8543
|
+
maxTokens: options.maxTokens,
|
|
8544
|
+
temperature: options.temperature
|
|
8545
|
+
})) {
|
|
8546
|
+
response += chunk.text;
|
|
8547
|
+
if (chunk.usage) {
|
|
8548
|
+
finalUsage = {
|
|
8549
|
+
inputTokens: chunk.usage.inputTokens,
|
|
8550
|
+
outputTokens: chunk.usage.outputTokens,
|
|
8551
|
+
totalTokens: chunk.usage.totalTokens
|
|
8552
|
+
};
|
|
8553
|
+
}
|
|
8554
|
+
}
|
|
8555
|
+
return { text: response.trim(), usage: finalUsage };
|
|
8556
|
+
}
|
|
8557
|
+
/**
|
|
8558
|
+
* Analyze an image with a vision-capable model.
|
|
8559
|
+
* Returns the analysis as a string.
|
|
8560
|
+
*
|
|
8561
|
+
* @param options - Vision analysis options
|
|
8562
|
+
* @returns Promise resolving to the analysis text
|
|
8563
|
+
* @throws Error if the image format is unsupported or model doesn't support vision
|
|
8564
|
+
*
|
|
8565
|
+
* @example
|
|
8566
|
+
* ```typescript
|
|
8567
|
+
* // From file
|
|
8568
|
+
* const result = await llmist.vision.analyze({
|
|
8569
|
+
* model: "gpt-4o",
|
|
8570
|
+
* image: await fs.readFile("photo.jpg"),
|
|
8571
|
+
* prompt: "What's in this image?",
|
|
8572
|
+
* });
|
|
8573
|
+
*
|
|
8574
|
+
* // From URL (OpenAI only)
|
|
8575
|
+
* const result = await llmist.vision.analyze({
|
|
8576
|
+
* model: "gpt-4o",
|
|
8577
|
+
* image: "https://example.com/image.jpg",
|
|
8578
|
+
* prompt: "Describe this image",
|
|
8579
|
+
* });
|
|
8580
|
+
* ```
|
|
8581
|
+
*/
|
|
8582
|
+
async analyze(options) {
|
|
8583
|
+
const builder = this.buildImageMessage(options);
|
|
8584
|
+
const { text: text3 } = await this.streamAndCollect(options, builder);
|
|
8585
|
+
return text3;
|
|
8586
|
+
}
|
|
8587
|
+
/**
|
|
8588
|
+
* Analyze an image and return detailed result with usage info.
|
|
8589
|
+
*
|
|
8590
|
+
* @param options - Vision analysis options
|
|
8591
|
+
* @returns Promise resolving to the analysis result with usage info
|
|
8592
|
+
*/
|
|
8593
|
+
async analyzeWithUsage(options) {
|
|
8594
|
+
const builder = this.buildImageMessage(options);
|
|
8595
|
+
const { text: text3, usage } = await this.streamAndCollect(options, builder);
|
|
8596
|
+
return {
|
|
8597
|
+
text: text3,
|
|
8598
|
+
model: options.model,
|
|
8599
|
+
usage
|
|
8600
|
+
};
|
|
8601
|
+
}
|
|
8602
|
+
/**
|
|
8603
|
+
* Check if a model supports vision/image input.
|
|
8604
|
+
*
|
|
8605
|
+
* @param modelId - Model ID to check
|
|
8606
|
+
* @returns True if the model supports vision
|
|
8607
|
+
*/
|
|
8608
|
+
supportsModel(modelId) {
|
|
8609
|
+
const spec = this.client.modelRegistry.getModelSpec(modelId);
|
|
8610
|
+
return spec?.features?.vision === true;
|
|
8611
|
+
}
|
|
8612
|
+
/**
|
|
8613
|
+
* List all models that support vision.
|
|
8614
|
+
*
|
|
8615
|
+
* @returns Array of model IDs that support vision
|
|
8616
|
+
*/
|
|
8617
|
+
listModels() {
|
|
8618
|
+
return this.client.modelRegistry.listModels().filter((spec) => spec.features?.vision === true).map((spec) => spec.modelId);
|
|
8619
|
+
}
|
|
8620
|
+
};
|
|
8621
|
+
}
|
|
8622
|
+
});
|
|
8623
|
+
|
|
8624
|
+
// src/core/options.ts
|
|
8625
|
+
var ModelIdentifierParser;
|
|
8626
|
+
var init_options = __esm({
|
|
8627
|
+
"src/core/options.ts"() {
|
|
8628
|
+
"use strict";
|
|
8629
|
+
ModelIdentifierParser = class {
|
|
8630
|
+
constructor(defaultProvider = "openai") {
|
|
8631
|
+
this.defaultProvider = defaultProvider;
|
|
8632
|
+
}
|
|
8633
|
+
parse(identifier) {
|
|
8634
|
+
const trimmed = identifier.trim();
|
|
8635
|
+
if (!trimmed) {
|
|
8636
|
+
throw new Error("Model identifier cannot be empty");
|
|
8637
|
+
}
|
|
8638
|
+
const [maybeProvider, ...rest] = trimmed.split(":");
|
|
8639
|
+
if (rest.length === 0) {
|
|
8640
|
+
return { provider: this.defaultProvider, name: maybeProvider };
|
|
8641
|
+
}
|
|
8642
|
+
const provider = maybeProvider;
|
|
8643
|
+
const name = rest.join(":");
|
|
8644
|
+
if (!name) {
|
|
8645
|
+
throw new Error("Model name cannot be empty");
|
|
8646
|
+
}
|
|
8647
|
+
return { provider, name };
|
|
8648
|
+
}
|
|
8649
|
+
};
|
|
8650
|
+
}
|
|
8651
|
+
});
|
|
8652
|
+
|
|
6748
8653
|
// src/core/client.ts
|
|
6749
8654
|
var client_exports = {};
|
|
6750
8655
|
__export(client_exports, {
|
|
@@ -6757,12 +8662,22 @@ var init_client = __esm({
|
|
|
6757
8662
|
init_builder();
|
|
6758
8663
|
init_discovery();
|
|
6759
8664
|
init_model_registry();
|
|
8665
|
+
init_image();
|
|
8666
|
+
init_speech();
|
|
8667
|
+
init_text();
|
|
8668
|
+
init_vision();
|
|
6760
8669
|
init_options();
|
|
6761
8670
|
init_quick_methods();
|
|
6762
8671
|
LLMist = class _LLMist {
|
|
6763
8672
|
parser;
|
|
8673
|
+
defaultProvider;
|
|
6764
8674
|
modelRegistry;
|
|
6765
8675
|
adapters;
|
|
8676
|
+
// Namespaces for different generation types
|
|
8677
|
+
text;
|
|
8678
|
+
image;
|
|
8679
|
+
speech;
|
|
8680
|
+
vision;
|
|
6766
8681
|
constructor(...args) {
|
|
6767
8682
|
let adapters = [];
|
|
6768
8683
|
let defaultProvider;
|
|
@@ -6801,6 +8716,7 @@ var init_client = __esm({
|
|
|
6801
8716
|
const priorityB = b.priority ?? 0;
|
|
6802
8717
|
return priorityB - priorityA;
|
|
6803
8718
|
});
|
|
8719
|
+
this.defaultProvider = resolvedDefaultProvider;
|
|
6804
8720
|
this.parser = new ModelIdentifierParser(resolvedDefaultProvider);
|
|
6805
8721
|
this.modelRegistry = new ModelRegistry();
|
|
6806
8722
|
for (const adapter of this.adapters) {
|
|
@@ -6809,6 +8725,10 @@ var init_client = __esm({
|
|
|
6809
8725
|
if (customModels.length > 0) {
|
|
6810
8726
|
this.modelRegistry.registerModels(customModels);
|
|
6811
8727
|
}
|
|
8728
|
+
this.text = new TextNamespace(this);
|
|
8729
|
+
this.image = new ImageNamespace(this.adapters, this.defaultProvider);
|
|
8730
|
+
this.speech = new SpeechNamespace(this.adapters, this.defaultProvider);
|
|
8731
|
+
this.vision = new VisionNamespace(this);
|
|
6812
8732
|
}
|
|
6813
8733
|
stream(options) {
|
|
6814
8734
|
const descriptor = this.parser.parse(options.model);
|
|
@@ -7275,9 +9195,9 @@ function sleep(ms) {
|
|
|
7275
9195
|
function generateInvocationId() {
|
|
7276
9196
|
return `inv-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`;
|
|
7277
9197
|
}
|
|
7278
|
-
function splitIntoChunks(
|
|
9198
|
+
function splitIntoChunks(text3, minChunkSize = 5, maxChunkSize = 30) {
|
|
7279
9199
|
const chunks = [];
|
|
7280
|
-
let remaining =
|
|
9200
|
+
let remaining = text3;
|
|
7281
9201
|
while (remaining.length > 0) {
|
|
7282
9202
|
const chunkSize = Math.min(
|
|
7283
9203
|
Math.floor(Math.random() * (maxChunkSize - minChunkSize + 1)) + minChunkSize,
|
|
@@ -7336,17 +9256,17 @@ ${String(value)}
|
|
|
7336
9256
|
return result;
|
|
7337
9257
|
}
|
|
7338
9258
|
function formatGadgetCalls(gadgetCalls) {
|
|
7339
|
-
let
|
|
9259
|
+
let text3 = "";
|
|
7340
9260
|
const calls = [];
|
|
7341
9261
|
for (const call of gadgetCalls) {
|
|
7342
9262
|
const invocationId = call.invocationId ?? generateInvocationId();
|
|
7343
9263
|
calls.push({ name: call.gadgetName, invocationId });
|
|
7344
9264
|
const blockParams = serializeToBlockFormat(call.parameters);
|
|
7345
|
-
|
|
9265
|
+
text3 += `
|
|
7346
9266
|
${GADGET_START_PREFIX}${call.gadgetName}
|
|
7347
9267
|
${blockParams}${GADGET_END_PREFIX}`;
|
|
7348
9268
|
}
|
|
7349
|
-
return { text, calls };
|
|
9269
|
+
return { text: text3, calls };
|
|
7350
9270
|
}
|
|
7351
9271
|
async function* createMockStream(response) {
|
|
7352
9272
|
if (response.delayMs) {
|
|
@@ -7386,9 +9306,9 @@ async function* createMockStream(response) {
|
|
|
7386
9306
|
};
|
|
7387
9307
|
}
|
|
7388
9308
|
}
|
|
7389
|
-
function createTextMockStream(
|
|
9309
|
+
function createTextMockStream(text3, options) {
|
|
7390
9310
|
return createMockStream({
|
|
7391
|
-
text,
|
|
9311
|
+
text: text3,
|
|
7392
9312
|
delayMs: options?.delayMs,
|
|
7393
9313
|
streamDelayMs: options?.streamDelayMs,
|
|
7394
9314
|
usage: options?.usage,
|
|
@@ -7405,10 +9325,10 @@ var MockProviderAdapter = class {
|
|
|
7405
9325
|
constructor(options) {
|
|
7406
9326
|
this.mockManager = getMockManager(options);
|
|
7407
9327
|
}
|
|
7408
|
-
supports(
|
|
9328
|
+
supports(_descriptor) {
|
|
7409
9329
|
return true;
|
|
7410
9330
|
}
|
|
7411
|
-
stream(options, descriptor,
|
|
9331
|
+
stream(options, descriptor, _spec) {
|
|
7412
9332
|
const context = {
|
|
7413
9333
|
model: options.model,
|
|
7414
9334
|
provider: descriptor.provider,
|
|
@@ -7419,20 +9339,154 @@ var MockProviderAdapter = class {
|
|
|
7419
9339
|
return this.createMockStreamFromContext(context);
|
|
7420
9340
|
}
|
|
7421
9341
|
async *createMockStreamFromContext(context) {
|
|
7422
|
-
|
|
7423
|
-
|
|
7424
|
-
|
|
7425
|
-
|
|
7426
|
-
|
|
7427
|
-
|
|
7428
|
-
|
|
7429
|
-
|
|
7430
|
-
|
|
7431
|
-
|
|
7432
|
-
|
|
7433
|
-
|
|
7434
|
-
|
|
9342
|
+
const mockResponse = await this.mockManager.findMatch(context);
|
|
9343
|
+
if (!mockResponse) {
|
|
9344
|
+
yield {
|
|
9345
|
+
text: "",
|
|
9346
|
+
finishReason: "stop",
|
|
9347
|
+
usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 }
|
|
9348
|
+
};
|
|
9349
|
+
return;
|
|
9350
|
+
}
|
|
9351
|
+
yield* createMockStream(mockResponse);
|
|
9352
|
+
}
|
|
9353
|
+
// ==========================================================================
|
|
9354
|
+
// Image Generation Support
|
|
9355
|
+
// ==========================================================================
|
|
9356
|
+
/**
|
|
9357
|
+
* Check if this adapter supports image generation for a given model.
|
|
9358
|
+
* Returns true if there's a registered mock with images for this model.
|
|
9359
|
+
*/
|
|
9360
|
+
supportsImageGeneration(_modelId) {
|
|
9361
|
+
return true;
|
|
9362
|
+
}
|
|
9363
|
+
/**
|
|
9364
|
+
* Generate mock images based on registered mocks.
|
|
9365
|
+
*
|
|
9366
|
+
* @param options - Image generation options
|
|
9367
|
+
* @returns Mock image generation result
|
|
9368
|
+
*/
|
|
9369
|
+
async generateImage(options) {
|
|
9370
|
+
const context = {
|
|
9371
|
+
model: options.model,
|
|
9372
|
+
provider: "mock",
|
|
9373
|
+
modelName: options.model,
|
|
9374
|
+
options: {
|
|
9375
|
+
model: options.model,
|
|
9376
|
+
messages: [{ role: "user", content: options.prompt }]
|
|
9377
|
+
},
|
|
9378
|
+
messages: [{ role: "user", content: options.prompt }]
|
|
9379
|
+
};
|
|
9380
|
+
const mockResponse = await this.mockManager.findMatch(context);
|
|
9381
|
+
if (!mockResponse?.images || mockResponse.images.length === 0) {
|
|
9382
|
+
throw new Error(
|
|
9383
|
+
`No mock registered for image generation with model "${options.model}". Use mockLLM().forModel("${options.model}").returnsImage(...).register() to add one.`
|
|
9384
|
+
);
|
|
9385
|
+
}
|
|
9386
|
+
return this.createImageResult(options, mockResponse);
|
|
9387
|
+
}
|
|
9388
|
+
/**
|
|
9389
|
+
* Transform mock response into ImageGenerationResult format.
|
|
9390
|
+
*
|
|
9391
|
+
* @param options - Original image generation options
|
|
9392
|
+
* @param mockResponse - Mock response containing image data
|
|
9393
|
+
* @returns ImageGenerationResult with mock data and zero cost
|
|
9394
|
+
*/
|
|
9395
|
+
createImageResult(options, mockResponse) {
|
|
9396
|
+
const images = mockResponse.images ?? [];
|
|
9397
|
+
return {
|
|
9398
|
+
images: images.map((img) => ({
|
|
9399
|
+
b64Json: img.data,
|
|
9400
|
+
revisedPrompt: img.revisedPrompt
|
|
9401
|
+
})),
|
|
9402
|
+
model: options.model,
|
|
9403
|
+
usage: {
|
|
9404
|
+
imagesGenerated: images.length,
|
|
9405
|
+
size: options.size ?? "1024x1024",
|
|
9406
|
+
quality: options.quality ?? "standard"
|
|
9407
|
+
},
|
|
9408
|
+
cost: 0
|
|
9409
|
+
// Mock cost is always 0
|
|
9410
|
+
};
|
|
9411
|
+
}
|
|
9412
|
+
// ==========================================================================
|
|
9413
|
+
// Speech Generation Support
|
|
9414
|
+
// ==========================================================================
|
|
9415
|
+
/**
|
|
9416
|
+
* Check if this adapter supports speech generation for a given model.
|
|
9417
|
+
* Returns true if there's a registered mock with audio for this model.
|
|
9418
|
+
*/
|
|
9419
|
+
supportsSpeechGeneration(_modelId) {
|
|
9420
|
+
return true;
|
|
9421
|
+
}
|
|
9422
|
+
/**
|
|
9423
|
+
* Generate mock speech based on registered mocks.
|
|
9424
|
+
*
|
|
9425
|
+
* @param options - Speech generation options
|
|
9426
|
+
* @returns Mock speech generation result
|
|
9427
|
+
*/
|
|
9428
|
+
async generateSpeech(options) {
|
|
9429
|
+
const context = {
|
|
9430
|
+
model: options.model,
|
|
9431
|
+
provider: "mock",
|
|
9432
|
+
modelName: options.model,
|
|
9433
|
+
options: {
|
|
9434
|
+
model: options.model,
|
|
9435
|
+
messages: [{ role: "user", content: options.input }]
|
|
9436
|
+
},
|
|
9437
|
+
messages: [{ role: "user", content: options.input }]
|
|
9438
|
+
};
|
|
9439
|
+
const mockResponse = await this.mockManager.findMatch(context);
|
|
9440
|
+
if (!mockResponse?.audio) {
|
|
9441
|
+
throw new Error(
|
|
9442
|
+
`No mock registered for speech generation with model "${options.model}". Use mockLLM().forModel("${options.model}").returnsAudio(...).register() to add one.`
|
|
9443
|
+
);
|
|
9444
|
+
}
|
|
9445
|
+
return this.createSpeechResult(options, mockResponse);
|
|
9446
|
+
}
|
|
9447
|
+
/**
|
|
9448
|
+
* Transform mock response into SpeechGenerationResult format.
|
|
9449
|
+
* Converts base64 audio data to ArrayBuffer.
|
|
9450
|
+
*
|
|
9451
|
+
* @param options - Original speech generation options
|
|
9452
|
+
* @param mockResponse - Mock response containing audio data
|
|
9453
|
+
* @returns SpeechGenerationResult with mock data and zero cost
|
|
9454
|
+
*/
|
|
9455
|
+
createSpeechResult(options, mockResponse) {
|
|
9456
|
+
const audio = mockResponse.audio;
|
|
9457
|
+
const binaryString = atob(audio.data);
|
|
9458
|
+
const bytes = new Uint8Array(binaryString.length);
|
|
9459
|
+
for (let i = 0; i < binaryString.length; i++) {
|
|
9460
|
+
bytes[i] = binaryString.charCodeAt(i);
|
|
7435
9461
|
}
|
|
9462
|
+
const format = this.mimeTypeToAudioFormat(audio.mimeType);
|
|
9463
|
+
return {
|
|
9464
|
+
audio: bytes.buffer,
|
|
9465
|
+
model: options.model,
|
|
9466
|
+
usage: {
|
|
9467
|
+
characterCount: options.input.length
|
|
9468
|
+
},
|
|
9469
|
+
cost: 0,
|
|
9470
|
+
// Mock cost is always 0
|
|
9471
|
+
format
|
|
9472
|
+
};
|
|
9473
|
+
}
|
|
9474
|
+
/**
|
|
9475
|
+
* Map MIME type to audio format for SpeechGenerationResult.
|
|
9476
|
+
* Defaults to "mp3" for unknown MIME types.
|
|
9477
|
+
*
|
|
9478
|
+
* @param mimeType - Audio MIME type string
|
|
9479
|
+
* @returns Audio format identifier
|
|
9480
|
+
*/
|
|
9481
|
+
mimeTypeToAudioFormat(mimeType) {
|
|
9482
|
+
const mapping = {
|
|
9483
|
+
"audio/mp3": "mp3",
|
|
9484
|
+
"audio/mpeg": "mp3",
|
|
9485
|
+
"audio/wav": "wav",
|
|
9486
|
+
"audio/webm": "opus",
|
|
9487
|
+
"audio/ogg": "opus"
|
|
9488
|
+
};
|
|
9489
|
+
return mapping[mimeType] ?? "mp3";
|
|
7436
9490
|
}
|
|
7437
9491
|
};
|
|
7438
9492
|
function createMockAdapter(options) {
|
|
@@ -7440,6 +9494,20 @@ function createMockAdapter(options) {
|
|
|
7440
9494
|
}
|
|
7441
9495
|
|
|
7442
9496
|
// src/testing/mock-builder.ts
|
|
9497
|
+
init_input_content();
|
|
9498
|
+
init_messages();
|
|
9499
|
+
function hasImageContent(content) {
|
|
9500
|
+
if (typeof content === "string") return false;
|
|
9501
|
+
return content.some((part) => isImagePart(part));
|
|
9502
|
+
}
|
|
9503
|
+
function hasAudioContent(content) {
|
|
9504
|
+
if (typeof content === "string") return false;
|
|
9505
|
+
return content.some((part) => isAudioPart(part));
|
|
9506
|
+
}
|
|
9507
|
+
function countImages(content) {
|
|
9508
|
+
if (typeof content === "string") return 0;
|
|
9509
|
+
return content.filter((part) => isImagePart(part)).length;
|
|
9510
|
+
}
|
|
7443
9511
|
var MockBuilder = class {
|
|
7444
9512
|
matchers = [];
|
|
7445
9513
|
response = {};
|
|
@@ -7502,9 +9570,9 @@ var MockBuilder = class {
|
|
|
7502
9570
|
* @example
|
|
7503
9571
|
* mockLLM().whenMessageContains('hello')
|
|
7504
9572
|
*/
|
|
7505
|
-
whenMessageContains(
|
|
9573
|
+
whenMessageContains(text3) {
|
|
7506
9574
|
this.matchers.push(
|
|
7507
|
-
(ctx) => ctx.messages.some((msg) => msg.content
|
|
9575
|
+
(ctx) => ctx.messages.some((msg) => extractText(msg.content).toLowerCase().includes(text3.toLowerCase()))
|
|
7508
9576
|
);
|
|
7509
9577
|
return this;
|
|
7510
9578
|
}
|
|
@@ -7514,10 +9582,11 @@ var MockBuilder = class {
|
|
|
7514
9582
|
* @example
|
|
7515
9583
|
* mockLLM().whenLastMessageContains('goodbye')
|
|
7516
9584
|
*/
|
|
7517
|
-
whenLastMessageContains(
|
|
9585
|
+
whenLastMessageContains(text3) {
|
|
7518
9586
|
this.matchers.push((ctx) => {
|
|
7519
9587
|
const lastMsg = ctx.messages[ctx.messages.length - 1];
|
|
7520
|
-
|
|
9588
|
+
if (!lastMsg) return false;
|
|
9589
|
+
return extractText(lastMsg.content).toLowerCase().includes(text3.toLowerCase());
|
|
7521
9590
|
});
|
|
7522
9591
|
return this;
|
|
7523
9592
|
}
|
|
@@ -7528,7 +9597,7 @@ var MockBuilder = class {
|
|
|
7528
9597
|
* mockLLM().whenMessageMatches(/calculate \d+/)
|
|
7529
9598
|
*/
|
|
7530
9599
|
whenMessageMatches(regex) {
|
|
7531
|
-
this.matchers.push((ctx) => ctx.messages.some((msg) => regex.test(msg.content
|
|
9600
|
+
this.matchers.push((ctx) => ctx.messages.some((msg) => regex.test(extractText(msg.content))));
|
|
7532
9601
|
return this;
|
|
7533
9602
|
}
|
|
7534
9603
|
/**
|
|
@@ -7537,10 +9606,10 @@ var MockBuilder = class {
|
|
|
7537
9606
|
* @example
|
|
7538
9607
|
* mockLLM().whenRoleContains('system', 'You are a helpful assistant')
|
|
7539
9608
|
*/
|
|
7540
|
-
whenRoleContains(role,
|
|
9609
|
+
whenRoleContains(role, text3) {
|
|
7541
9610
|
this.matchers.push(
|
|
7542
9611
|
(ctx) => ctx.messages.some(
|
|
7543
|
-
(msg) => msg.role === role && msg.content
|
|
9612
|
+
(msg) => msg.role === role && extractText(msg.content).toLowerCase().includes(text3.toLowerCase())
|
|
7544
9613
|
)
|
|
7545
9614
|
);
|
|
7546
9615
|
return this;
|
|
@@ -7568,6 +9637,43 @@ var MockBuilder = class {
|
|
|
7568
9637
|
this.matchers.push(matcher);
|
|
7569
9638
|
return this;
|
|
7570
9639
|
}
|
|
9640
|
+
// ==========================================================================
|
|
9641
|
+
// Multimodal Matchers
|
|
9642
|
+
// ==========================================================================
|
|
9643
|
+
/**
|
|
9644
|
+
* Match when any message contains an image.
|
|
9645
|
+
*
|
|
9646
|
+
* @example
|
|
9647
|
+
* mockLLM().whenMessageHasImage().returns("I see an image of a sunset.")
|
|
9648
|
+
*/
|
|
9649
|
+
whenMessageHasImage() {
|
|
9650
|
+
this.matchers.push((ctx) => ctx.messages.some((msg) => hasImageContent(msg.content)));
|
|
9651
|
+
return this;
|
|
9652
|
+
}
|
|
9653
|
+
/**
|
|
9654
|
+
* Match when any message contains audio.
|
|
9655
|
+
*
|
|
9656
|
+
* @example
|
|
9657
|
+
* mockLLM().whenMessageHasAudio().returns("I hear music playing.")
|
|
9658
|
+
*/
|
|
9659
|
+
whenMessageHasAudio() {
|
|
9660
|
+
this.matchers.push((ctx) => ctx.messages.some((msg) => hasAudioContent(msg.content)));
|
|
9661
|
+
return this;
|
|
9662
|
+
}
|
|
9663
|
+
/**
|
|
9664
|
+
* Match based on the number of images in the last message.
|
|
9665
|
+
*
|
|
9666
|
+
* @example
|
|
9667
|
+
* mockLLM().whenImageCount((n) => n >= 2).returns("Comparing multiple images...")
|
|
9668
|
+
*/
|
|
9669
|
+
whenImageCount(predicate) {
|
|
9670
|
+
this.matchers.push((ctx) => {
|
|
9671
|
+
const lastMsg = ctx.messages[ctx.messages.length - 1];
|
|
9672
|
+
if (!lastMsg) return false;
|
|
9673
|
+
return predicate(countImages(lastMsg.content));
|
|
9674
|
+
});
|
|
9675
|
+
return this;
|
|
9676
|
+
}
|
|
7571
9677
|
/**
|
|
7572
9678
|
* Set the text response to return.
|
|
7573
9679
|
* Can be a static string or a function that returns a string dynamically.
|
|
@@ -7577,17 +9683,17 @@ var MockBuilder = class {
|
|
|
7577
9683
|
* mockLLM().returns(() => `Response at ${Date.now()}`)
|
|
7578
9684
|
* mockLLM().returns((ctx) => `You said: ${ctx.messages[0]?.content}`)
|
|
7579
9685
|
*/
|
|
7580
|
-
returns(
|
|
7581
|
-
if (typeof
|
|
9686
|
+
returns(text3) {
|
|
9687
|
+
if (typeof text3 === "function") {
|
|
7582
9688
|
this.response = async (ctx) => {
|
|
7583
|
-
const resolvedText = await Promise.resolve().then(() =>
|
|
9689
|
+
const resolvedText = await Promise.resolve().then(() => text3(ctx));
|
|
7584
9690
|
return { text: resolvedText };
|
|
7585
9691
|
};
|
|
7586
9692
|
} else {
|
|
7587
9693
|
if (typeof this.response === "function") {
|
|
7588
9694
|
throw new Error("Cannot use returns() after withResponse() with a function");
|
|
7589
9695
|
}
|
|
7590
|
-
this.response.text =
|
|
9696
|
+
this.response.text = text3;
|
|
7591
9697
|
}
|
|
7592
9698
|
return this;
|
|
7593
9699
|
}
|
|
@@ -7624,6 +9730,112 @@ var MockBuilder = class {
|
|
|
7624
9730
|
this.response.gadgetCalls.push({ gadgetName, parameters });
|
|
7625
9731
|
return this;
|
|
7626
9732
|
}
|
|
9733
|
+
// ==========================================================================
|
|
9734
|
+
// Multimodal Response Helpers
|
|
9735
|
+
// ==========================================================================
|
|
9736
|
+
/**
|
|
9737
|
+
* Return a single image in the response.
|
|
9738
|
+
* Useful for mocking image generation endpoints.
|
|
9739
|
+
*
|
|
9740
|
+
* @param data - Image data (base64 string or Buffer)
|
|
9741
|
+
* @param mimeType - MIME type (auto-detected if Buffer provided without type)
|
|
9742
|
+
*
|
|
9743
|
+
* @example
|
|
9744
|
+
* mockLLM()
|
|
9745
|
+
* .forModel('dall-e-3')
|
|
9746
|
+
* .returnsImage(pngBuffer)
|
|
9747
|
+
* .register();
|
|
9748
|
+
*/
|
|
9749
|
+
returnsImage(data, mimeType) {
|
|
9750
|
+
if (typeof this.response === "function") {
|
|
9751
|
+
throw new Error("Cannot use returnsImage() after withResponse() with a function");
|
|
9752
|
+
}
|
|
9753
|
+
let imageData;
|
|
9754
|
+
let imageMime;
|
|
9755
|
+
if (typeof data === "string") {
|
|
9756
|
+
imageData = data;
|
|
9757
|
+
if (!mimeType) {
|
|
9758
|
+
throw new Error("MIME type is required when providing base64 string data");
|
|
9759
|
+
}
|
|
9760
|
+
imageMime = mimeType;
|
|
9761
|
+
} else {
|
|
9762
|
+
imageData = toBase64(data);
|
|
9763
|
+
const detected = mimeType ?? detectImageMimeType(data);
|
|
9764
|
+
if (!detected) {
|
|
9765
|
+
throw new Error(
|
|
9766
|
+
"Could not detect image MIME type. Please provide the mimeType parameter explicitly."
|
|
9767
|
+
);
|
|
9768
|
+
}
|
|
9769
|
+
imageMime = detected;
|
|
9770
|
+
}
|
|
9771
|
+
if (!this.response.images) {
|
|
9772
|
+
this.response.images = [];
|
|
9773
|
+
}
|
|
9774
|
+
this.response.images.push({ data: imageData, mimeType: imageMime });
|
|
9775
|
+
return this;
|
|
9776
|
+
}
|
|
9777
|
+
/**
|
|
9778
|
+
* Return multiple images in the response.
|
|
9779
|
+
*
|
|
9780
|
+
* @example
|
|
9781
|
+
* mockLLM()
|
|
9782
|
+
* .forModel('dall-e-3')
|
|
9783
|
+
* .returnsImages([
|
|
9784
|
+
* { data: pngBuffer1 },
|
|
9785
|
+
* { data: pngBuffer2 },
|
|
9786
|
+
* ])
|
|
9787
|
+
* .register();
|
|
9788
|
+
*/
|
|
9789
|
+
returnsImages(images) {
|
|
9790
|
+
for (const img of images) {
|
|
9791
|
+
this.returnsImage(img.data, img.mimeType);
|
|
9792
|
+
if (img.revisedPrompt && this.response && typeof this.response !== "function") {
|
|
9793
|
+
const lastImage = this.response.images?.[this.response.images.length - 1];
|
|
9794
|
+
if (lastImage) {
|
|
9795
|
+
lastImage.revisedPrompt = img.revisedPrompt;
|
|
9796
|
+
}
|
|
9797
|
+
}
|
|
9798
|
+
}
|
|
9799
|
+
return this;
|
|
9800
|
+
}
|
|
9801
|
+
/**
|
|
9802
|
+
* Return audio data in the response.
|
|
9803
|
+
* Useful for mocking speech synthesis endpoints.
|
|
9804
|
+
*
|
|
9805
|
+
* @param data - Audio data (base64 string or Buffer)
|
|
9806
|
+
* @param mimeType - MIME type (auto-detected if Buffer provided without type)
|
|
9807
|
+
*
|
|
9808
|
+
* @example
|
|
9809
|
+
* mockLLM()
|
|
9810
|
+
* .forModel('tts-1')
|
|
9811
|
+
* .returnsAudio(mp3Buffer)
|
|
9812
|
+
* .register();
|
|
9813
|
+
*/
|
|
9814
|
+
returnsAudio(data, mimeType) {
|
|
9815
|
+
if (typeof this.response === "function") {
|
|
9816
|
+
throw new Error("Cannot use returnsAudio() after withResponse() with a function");
|
|
9817
|
+
}
|
|
9818
|
+
let audioData;
|
|
9819
|
+
let audioMime;
|
|
9820
|
+
if (typeof data === "string") {
|
|
9821
|
+
audioData = data;
|
|
9822
|
+
if (!mimeType) {
|
|
9823
|
+
throw new Error("MIME type is required when providing base64 string data");
|
|
9824
|
+
}
|
|
9825
|
+
audioMime = mimeType;
|
|
9826
|
+
} else {
|
|
9827
|
+
audioData = toBase64(data);
|
|
9828
|
+
const detected = mimeType ?? detectAudioMimeType(data);
|
|
9829
|
+
if (!detected) {
|
|
9830
|
+
throw new Error(
|
|
9831
|
+
"Could not detect audio MIME type. Please provide the mimeType parameter explicitly."
|
|
9832
|
+
);
|
|
9833
|
+
}
|
|
9834
|
+
audioMime = detected;
|
|
9835
|
+
}
|
|
9836
|
+
this.response.audio = { data: audioData, mimeType: audioMime };
|
|
9837
|
+
return this;
|
|
9838
|
+
}
|
|
7627
9839
|
/**
|
|
7628
9840
|
* Set the complete mock response object.
|
|
7629
9841
|
* This allows full control over all response properties.
|
|
@@ -7954,23 +10166,23 @@ function createTestStream(chunks) {
|
|
|
7954
10166
|
}
|
|
7955
10167
|
}();
|
|
7956
10168
|
}
|
|
7957
|
-
function createTextStream(
|
|
10169
|
+
function createTextStream(text3, options) {
|
|
7958
10170
|
return async function* () {
|
|
7959
10171
|
if (options?.delayMs) {
|
|
7960
10172
|
await sleep2(options.delayMs);
|
|
7961
10173
|
}
|
|
7962
|
-
const chunkSize = options?.chunkSize ??
|
|
10174
|
+
const chunkSize = options?.chunkSize ?? text3.length;
|
|
7963
10175
|
const chunks = [];
|
|
7964
|
-
for (let i = 0; i <
|
|
7965
|
-
chunks.push(
|
|
10176
|
+
for (let i = 0; i < text3.length; i += chunkSize) {
|
|
10177
|
+
chunks.push(text3.slice(i, i + chunkSize));
|
|
7966
10178
|
}
|
|
7967
10179
|
for (let i = 0; i < chunks.length; i++) {
|
|
7968
10180
|
const isLast = i === chunks.length - 1;
|
|
7969
10181
|
const chunk = { text: chunks[i] };
|
|
7970
10182
|
if (isLast) {
|
|
7971
10183
|
chunk.finishReason = options?.finishReason ?? "stop";
|
|
7972
|
-
const inputTokens = Math.ceil(
|
|
7973
|
-
const outputTokens = Math.ceil(
|
|
10184
|
+
const inputTokens = Math.ceil(text3.length / 4);
|
|
10185
|
+
const outputTokens = Math.ceil(text3.length / 4);
|
|
7974
10186
|
chunk.usage = options?.usage ?? {
|
|
7975
10187
|
inputTokens,
|
|
7976
10188
|
outputTokens,
|
|
@@ -7992,11 +10204,11 @@ async function collectStream(stream2) {
|
|
|
7992
10204
|
return chunks;
|
|
7993
10205
|
}
|
|
7994
10206
|
async function collectStreamText(stream2) {
|
|
7995
|
-
let
|
|
10207
|
+
let text3 = "";
|
|
7996
10208
|
for await (const chunk of stream2) {
|
|
7997
|
-
|
|
10209
|
+
text3 += chunk.text ?? "";
|
|
7998
10210
|
}
|
|
7999
|
-
return
|
|
10211
|
+
return text3;
|
|
8000
10212
|
}
|
|
8001
10213
|
async function getStreamFinalChunk(stream2) {
|
|
8002
10214
|
let lastChunk;
|