llmist 2.4.0 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -124,348 +124,187 @@ var init_constants = __esm({
124
124
  }
125
125
  });
126
126
 
127
- // src/core/model-shortcuts.ts
128
- function isKnownModelPattern(model) {
129
- const normalized = model.toLowerCase();
130
- if (MODEL_ALIASES[normalized]) {
131
- return true;
132
- }
133
- return KNOWN_MODEL_PATTERNS.some((pattern) => pattern.test(model));
127
+ // src/core/input-content.ts
128
+ function isImagePart(part) {
129
+ return part.type === "image";
134
130
  }
135
- function resolveModel(model, options = {}) {
136
- if (model.includes(":")) {
137
- return model;
138
- }
139
- const normalized = model.toLowerCase();
140
- if (MODEL_ALIASES[normalized]) {
141
- return MODEL_ALIASES[normalized];
142
- }
143
- const modelLower = model.toLowerCase();
144
- if (modelLower.startsWith("gpt")) {
145
- return `openai:${model}`;
131
+ function isAudioPart(part) {
132
+ return part.type === "audio";
133
+ }
134
+ function text(content) {
135
+ return { type: "text", text: content };
136
+ }
137
+ function imageFromUrl(url) {
138
+ return {
139
+ type: "image",
140
+ source: { type: "url", url }
141
+ };
142
+ }
143
+ function detectImageMimeType(data) {
144
+ const bytes = data instanceof Buffer ? data : Buffer.from(data);
145
+ for (const { bytes: magic, mimeType } of IMAGE_MAGIC_BYTES) {
146
+ if (bytes.length >= magic.length) {
147
+ let matches = true;
148
+ for (let i = 0; i < magic.length; i++) {
149
+ if (bytes[i] !== magic[i]) {
150
+ matches = false;
151
+ break;
152
+ }
153
+ }
154
+ if (matches) {
155
+ if (mimeType === "image/webp") {
156
+ if (bytes.length >= 12) {
157
+ const webpMarker = bytes[8] === 87 && bytes[9] === 69 && bytes[10] === 66 && bytes[11] === 80;
158
+ if (!webpMarker) continue;
159
+ }
160
+ }
161
+ return mimeType;
162
+ }
163
+ }
146
164
  }
147
- if (modelLower.startsWith("claude")) {
148
- return `anthropic:${model}`;
165
+ return null;
166
+ }
167
+ function detectAudioMimeType(data) {
168
+ const bytes = data instanceof Buffer ? data : Buffer.from(data);
169
+ for (const { bytes: magic, mimeType } of AUDIO_MAGIC_BYTES) {
170
+ if (bytes.length >= magic.length) {
171
+ let matches = true;
172
+ for (let i = 0; i < magic.length; i++) {
173
+ if (bytes[i] !== magic[i]) {
174
+ matches = false;
175
+ break;
176
+ }
177
+ }
178
+ if (matches) {
179
+ if (mimeType === "audio/wav") {
180
+ if (bytes.length >= 12) {
181
+ const waveMarker = bytes[8] === 87 && bytes[9] === 65 && bytes[10] === 86 && bytes[11] === 69;
182
+ if (!waveMarker) continue;
183
+ }
184
+ }
185
+ return mimeType;
186
+ }
187
+ }
149
188
  }
150
- if (modelLower.startsWith("gemini")) {
151
- return `gemini:${model}`;
189
+ return null;
190
+ }
191
+ function toBase64(data) {
192
+ if (typeof data === "string") {
193
+ return data;
152
194
  }
153
- if (modelLower.match(/^o\d/)) {
154
- return `openai:${model}`;
195
+ return Buffer.from(data).toString("base64");
196
+ }
197
+ function audioFromBuffer(buffer, mediaType) {
198
+ const detectedType = mediaType ?? detectAudioMimeType(buffer);
199
+ if (!detectedType) {
200
+ throw new Error(
201
+ "Could not detect audio MIME type. Please provide the mediaType parameter explicitly."
202
+ );
155
203
  }
156
- if (!isKnownModelPattern(model)) {
157
- if (options.strict) {
158
- throw new Error(
159
- `Unknown model '${model}'. Did you mean one of: gpt4, sonnet, haiku, flash? Use explicit provider prefix like 'openai:${model}' to bypass this check.`
160
- );
161
- }
162
- if (!options.silent) {
163
- console.warn(
164
- `\u26A0\uFE0F Unknown model '${model}', falling back to 'openai:${model}'. This might be a typo. Did you mean: gpt4, gpt5, gpt5-nano, sonnet, haiku, flash? Use { strict: true } to error on unknown models, or { silent: true } to suppress this warning.`
165
- );
204
+ return {
205
+ type: "audio",
206
+ source: {
207
+ type: "base64",
208
+ mediaType: detectedType,
209
+ data: toBase64(buffer)
166
210
  }
167
- }
168
- return `openai:${model}`;
211
+ };
169
212
  }
170
- var MODEL_ALIASES, KNOWN_MODEL_PATTERNS;
171
- var init_model_shortcuts = __esm({
172
- "src/core/model-shortcuts.ts"() {
213
+ function isDataUrl(input) {
214
+ return input.startsWith("data:");
215
+ }
216
+ function parseDataUrl(url) {
217
+ const match = url.match(/^data:([^;]+);base64,(.+)$/);
218
+ if (!match) return null;
219
+ return { mimeType: match[1], data: match[2] };
220
+ }
221
+ var IMAGE_MAGIC_BYTES, AUDIO_MAGIC_BYTES;
222
+ var init_input_content = __esm({
223
+ "src/core/input-content.ts"() {
173
224
  "use strict";
174
- MODEL_ALIASES = {
175
- // OpenAI aliases
176
- gpt4: "openai:gpt-4o",
177
- gpt4o: "openai:gpt-4o",
178
- gpt5: "openai:gpt-5",
179
- "gpt5-mini": "openai:gpt-5-mini",
180
- "gpt5-nano": "openai:gpt-5-nano",
181
- // Anthropic aliases
182
- sonnet: "anthropic:claude-sonnet-4-5",
183
- "claude-sonnet": "anthropic:claude-sonnet-4-5",
184
- haiku: "anthropic:claude-haiku-4-5",
185
- "claude-haiku": "anthropic:claude-haiku-4-5",
186
- opus: "anthropic:claude-opus-4-5",
187
- "claude-opus": "anthropic:claude-opus-4-5",
188
- // Gemini aliases
189
- flash: "gemini:gemini-2.0-flash",
190
- "gemini-flash": "gemini:gemini-2.0-flash",
191
- "gemini-pro": "gemini:gemini-2.5-pro",
192
- pro: "gemini:gemini-2.5-pro"
193
- };
194
- KNOWN_MODEL_PATTERNS = [
195
- /^gpt-?\d/i,
196
- // gpt-4, gpt-3.5, gpt4, etc.
197
- /^claude-?\d/i,
198
- // claude-3, claude-2, etc.
199
- /^gemini-?(\d|pro|flash)/i,
200
- // gemini-2.0, gemini-pro, gemini-flash, etc.
201
- /^o\d/i
202
- // OpenAI o1, o3, etc.
225
+ IMAGE_MAGIC_BYTES = [
226
+ { bytes: [255, 216, 255], mimeType: "image/jpeg" },
227
+ { bytes: [137, 80, 78, 71], mimeType: "image/png" },
228
+ { bytes: [71, 73, 70, 56], mimeType: "image/gif" },
229
+ // WebP starts with RIFF....WEBP
230
+ { bytes: [82, 73, 70, 70], mimeType: "image/webp" }
231
+ ];
232
+ AUDIO_MAGIC_BYTES = [
233
+ // MP3 frame sync
234
+ { bytes: [255, 251], mimeType: "audio/mp3" },
235
+ { bytes: [255, 250], mimeType: "audio/mp3" },
236
+ // ID3 tag (MP3)
237
+ { bytes: [73, 68, 51], mimeType: "audio/mp3" },
238
+ // OGG
239
+ { bytes: [79, 103, 103, 83], mimeType: "audio/ogg" },
240
+ // WAV (RIFF)
241
+ { bytes: [82, 73, 70, 70], mimeType: "audio/wav" },
242
+ // WebM
243
+ { bytes: [26, 69, 223, 163], mimeType: "audio/webm" }
203
244
  ];
204
245
  }
205
246
  });
206
247
 
207
- // src/gadgets/schema-validator.ts
208
- function validateGadgetSchema(schema, gadgetName) {
209
- let jsonSchema;
210
- try {
211
- jsonSchema = z.toJSONSchema(schema, { target: "draft-7" });
212
- } catch (error) {
213
- const errorMessage = error instanceof Error ? error.message : String(error);
214
- throw new Error(
215
- `Gadget "${gadgetName}" has a schema that cannot be serialized to JSON Schema.
216
- This usually happens with unsupported patterns like:
217
- - z.record() - use z.object({}).passthrough() instead
218
- - Complex transforms or custom refinements
219
- - Circular references
220
-
221
- Original error: ${errorMessage}
222
-
223
- Only use schema patterns that Zod v4's native toJSONSchema() supports.`
224
- );
225
- }
226
- const issues = findUnknownTypes(jsonSchema);
227
- if (issues.length > 0) {
228
- const fieldList = issues.join(", ");
229
- throw new Error(
230
- `Gadget "${gadgetName}" uses z.unknown() which produces incomplete schemas.
231
- Problematic fields: ${fieldList}
232
-
233
- z.unknown() doesn't generate type information in JSON Schema, making it unclear
234
- to the LLM what data structure to provide.
235
-
236
- Suggestions:
237
- - Use z.object({}).passthrough() for flexible objects
238
- - Use z.record(z.string()) for key-value objects with string values
239
- - Define specific structure if possible
240
-
241
- Example fixes:
242
- // \u274C Bad
243
- content: z.unknown()
244
-
245
- // \u2705 Good
246
- content: z.object({}).passthrough() // for flexible objects
247
- content: z.record(z.string()) // for key-value objects
248
- content: z.array(z.string()) // for arrays of strings
249
- `
250
- );
251
- }
252
- }
253
- function findUnknownTypes(schema, path = []) {
254
- const issues = [];
255
- if (!schema || typeof schema !== "object") {
256
- return issues;
257
- }
258
- if (schema.definitions) {
259
- for (const defSchema of Object.values(schema.definitions)) {
260
- issues.push(...findUnknownTypes(defSchema, []));
261
- }
262
- }
263
- if (schema.properties) {
264
- for (const [propName, propSchema] of Object.entries(schema.properties)) {
265
- const propPath = [...path, propName];
266
- if (hasNoType(propSchema)) {
267
- issues.push(propPath.join(".") || propName);
268
- }
269
- issues.push(...findUnknownTypes(propSchema, propPath));
270
- }
271
- }
272
- if (schema.items) {
273
- const itemPath = [...path, "[]"];
274
- if (hasNoType(schema.items)) {
275
- issues.push(itemPath.join("."));
276
- }
277
- issues.push(...findUnknownTypes(schema.items, itemPath));
278
- }
279
- if (schema.anyOf) {
280
- schema.anyOf.forEach((subSchema, index) => {
281
- issues.push(...findUnknownTypes(subSchema, [...path, `anyOf[${index}]`]));
282
- });
283
- }
284
- if (schema.oneOf) {
285
- schema.oneOf.forEach((subSchema, index) => {
286
- issues.push(...findUnknownTypes(subSchema, [...path, `oneOf[${index}]`]));
287
- });
288
- }
289
- if (schema.allOf) {
290
- schema.allOf.forEach((subSchema, index) => {
291
- issues.push(...findUnknownTypes(subSchema, [...path, `allOf[${index}]`]));
292
- });
293
- }
294
- return issues;
248
+ // src/core/prompt-config.ts
249
+ function resolvePromptTemplate(template, defaultValue, context) {
250
+ const resolved = template ?? defaultValue;
251
+ return typeof resolved === "function" ? resolved(context) : resolved;
295
252
  }
296
- function hasNoType(prop) {
297
- if (!prop || typeof prop !== "object") {
298
- return false;
253
+ function resolveRulesTemplate(rules, context) {
254
+ const resolved = rules ?? DEFAULT_PROMPTS.rules;
255
+ if (Array.isArray(resolved)) {
256
+ return resolved;
299
257
  }
300
- const hasType = prop.type !== void 0;
301
- const hasRef = prop.$ref !== void 0;
302
- const hasUnion = prop.anyOf !== void 0 || prop.oneOf !== void 0 || prop.allOf !== void 0;
303
- if (hasType || hasRef || hasUnion) {
304
- return false;
258
+ if (typeof resolved === "function") {
259
+ const result = resolved(context);
260
+ return Array.isArray(result) ? result : [result];
305
261
  }
306
- const keys = Object.keys(prop);
307
- const metadataKeys = ["description", "title", "default", "examples"];
308
- const hasOnlyMetadata = keys.every((key) => metadataKeys.includes(key));
309
- return hasOnlyMetadata || keys.length === 0;
262
+ return [resolved];
310
263
  }
311
- var z;
312
- var init_schema_validator = __esm({
313
- "src/gadgets/schema-validator.ts"() {
264
+ var DEFAULT_PROMPTS;
265
+ var init_prompt_config = __esm({
266
+ "src/core/prompt-config.ts"() {
314
267
  "use strict";
315
- z = __toESM(require("zod"), 1);
268
+ DEFAULT_PROMPTS = {
269
+ mainInstruction: [
270
+ "\u26A0\uFE0F CRITICAL: RESPOND ONLY WITH GADGET INVOCATIONS",
271
+ "DO NOT use function calling or tool calling",
272
+ "You must output the exact text markers shown below in plain text.",
273
+ "EACH MARKER MUST START WITH A NEWLINE."
274
+ ].join("\n"),
275
+ criticalUsage: "INVOKE gadgets using the markers - do not describe what you want to do.",
276
+ formatDescription: (ctx) => `Parameters using ${ctx.argPrefix}name markers (value on next line(s), no escaping needed)`,
277
+ rules: () => [
278
+ "Output ONLY plain text with the exact markers - never use function/tool calling",
279
+ "You can invoke multiple gadgets in a single response",
280
+ "Gadgets without dependencies execute immediately (in parallel if multiple)",
281
+ "Use :invocation_id:dep1,dep2 syntax when a gadget needs results from prior gadgets",
282
+ "If any dependency fails, dependent gadgets are automatically skipped"
283
+ ],
284
+ customExamples: null
285
+ };
316
286
  }
317
287
  });
318
288
 
319
- // src/gadgets/registry.ts
320
- var GadgetRegistry;
321
- var init_registry = __esm({
322
- "src/gadgets/registry.ts"() {
323
- "use strict";
324
- init_schema_validator();
325
- GadgetRegistry = class _GadgetRegistry {
326
- gadgets = /* @__PURE__ */ new Map();
327
- /**
328
- * Creates a registry from an array of gadget classes or instances,
329
- * or an object mapping names to gadgets.
330
- *
331
- * @param gadgets - Array of gadgets/classes or object with custom names
332
- * @returns New GadgetRegistry with all gadgets registered
333
- *
334
- * @example
335
- * ```typescript
336
- * // From array of classes
337
- * const registry = GadgetRegistry.from([Calculator, Weather]);
338
- *
339
- * // From array of instances
340
- * const registry = GadgetRegistry.from([new Calculator(), new Weather()]);
341
- *
342
- * // From object with custom names
343
- * const registry = GadgetRegistry.from({
344
- * calc: Calculator,
345
- * weather: new Weather({ apiKey: "..." })
346
- * });
347
- * ```
348
- */
349
- static from(gadgets) {
350
- const registry = new _GadgetRegistry();
351
- if (Array.isArray(gadgets)) {
352
- registry.registerMany(gadgets);
353
- } else {
354
- for (const [name, gadget] of Object.entries(gadgets)) {
355
- const instance = typeof gadget === "function" ? new gadget() : gadget;
356
- registry.register(name, instance);
357
- }
358
- }
359
- return registry;
360
- }
361
- /**
362
- * Registers multiple gadgets at once from an array.
363
- *
364
- * @param gadgets - Array of gadget instances or classes
365
- * @returns This registry for chaining
366
- *
367
- * @example
368
- * ```typescript
369
- * registry.registerMany([Calculator, Weather, Email]);
370
- * registry.registerMany([new Calculator(), new Weather()]);
371
- * ```
372
- */
373
- registerMany(gadgets) {
374
- for (const gadget of gadgets) {
375
- const instance = typeof gadget === "function" ? new gadget() : gadget;
376
- this.registerByClass(instance);
377
- }
378
- return this;
379
- }
380
- // Register a gadget by name
381
- register(name, gadget) {
382
- const normalizedName = name.toLowerCase();
383
- if (this.gadgets.has(normalizedName)) {
384
- throw new Error(`Gadget '${name}' is already registered`);
385
- }
386
- if (gadget.parameterSchema) {
387
- validateGadgetSchema(gadget.parameterSchema, name);
388
- }
389
- this.gadgets.set(normalizedName, gadget);
390
- }
391
- // Register a gadget using its name property or class name
392
- registerByClass(gadget) {
393
- const name = gadget.name ?? gadget.constructor.name;
394
- this.register(name, gadget);
395
- }
396
- // Get gadget by name (case-insensitive)
397
- get(name) {
398
- return this.gadgets.get(name.toLowerCase());
399
- }
400
- // Check if gadget exists (case-insensitive)
401
- has(name) {
402
- return this.gadgets.has(name.toLowerCase());
403
- }
404
- // Get all registered gadget names
405
- getNames() {
406
- return Array.from(this.gadgets.keys());
407
- }
408
- // Get all gadgets for instruction generation
409
- getAll() {
410
- return Array.from(this.gadgets.values());
411
- }
412
- // Unregister gadget (useful for testing, case-insensitive)
413
- unregister(name) {
414
- return this.gadgets.delete(name.toLowerCase());
415
- }
416
- // Clear all gadgets (useful for testing)
417
- clear() {
418
- this.gadgets.clear();
419
- }
420
- };
421
- }
422
- });
423
-
424
- // src/core/prompt-config.ts
425
- function resolvePromptTemplate(template, defaultValue, context) {
426
- const resolved = template ?? defaultValue;
427
- return typeof resolved === "function" ? resolved(context) : resolved;
428
- }
429
- function resolveRulesTemplate(rules, context) {
430
- const resolved = rules ?? DEFAULT_PROMPTS.rules;
431
- if (Array.isArray(resolved)) {
432
- return resolved;
433
- }
434
- if (typeof resolved === "function") {
435
- const result = resolved(context);
436
- return Array.isArray(result) ? result : [result];
437
- }
438
- return [resolved];
439
- }
440
- var DEFAULT_PROMPTS;
441
- var init_prompt_config = __esm({
442
- "src/core/prompt-config.ts"() {
443
- "use strict";
444
- DEFAULT_PROMPTS = {
445
- mainInstruction: [
446
- "\u26A0\uFE0F CRITICAL: RESPOND ONLY WITH GADGET INVOCATIONS",
447
- "DO NOT use function calling or tool calling",
448
- "You must output the exact text markers shown below in plain text.",
449
- "EACH MARKER MUST START WITH A NEWLINE."
450
- ].join("\n"),
451
- criticalUsage: "INVOKE gadgets using the markers - do not describe what you want to do.",
452
- formatDescription: (ctx) => `Parameters using ${ctx.argPrefix}name markers (value on next line(s), no escaping needed)`,
453
- rules: () => [
454
- "Output ONLY plain text with the exact markers - never use function/tool calling",
455
- "You can invoke multiple gadgets in a single response",
456
- "For dependent gadgets, invoke the first one and wait for the result"
457
- ],
458
- customExamples: null
459
- };
460
- }
461
- });
462
-
463
- // src/core/messages.ts
464
- var LLMMessageBuilder;
465
- var init_messages = __esm({
466
- "src/core/messages.ts"() {
289
+ // src/core/messages.ts
290
+ function normalizeContent(content) {
291
+ if (typeof content === "string") {
292
+ return [{ type: "text", text: content }];
293
+ }
294
+ return content;
295
+ }
296
+ function extractText(content) {
297
+ if (typeof content === "string") {
298
+ return content;
299
+ }
300
+ return content.filter((part) => part.type === "text").map((part) => part.text).join("");
301
+ }
302
+ var LLMMessageBuilder;
303
+ var init_messages = __esm({
304
+ "src/core/messages.ts"() {
467
305
  "use strict";
468
306
  init_constants();
307
+ init_input_content();
469
308
  init_prompt_config();
470
309
  LLMMessageBuilder = class {
471
310
  messages = [];
@@ -567,6 +406,10 @@ CRITICAL: ${criticalUsage}
567
406
  parts.push(`
568
407
  1. Start marker: ${this.startPrefix}gadget_name`);
569
408
  parts.push(`
409
+ With ID: ${this.startPrefix}gadget_name:my_id`);
410
+ parts.push(`
411
+ With dependencies: ${this.startPrefix}gadget_name:my_id:dep1,dep2`);
412
+ parts.push(`
570
413
  2. ${formatDescription}`);
571
414
  parts.push(`
572
415
  3. End marker: ${this.endPrefix}`);
@@ -616,6 +459,25 @@ ${this.endPrefix}`;
616
459
  EXAMPLE (Multiple Gadgets):
617
460
 
618
461
  ${multipleExample}`);
462
+ const dependencyExample = `${this.startPrefix}fetch_data:fetch_1
463
+ ${this.argPrefix}url
464
+ https://api.example.com/users
465
+ ${this.endPrefix}
466
+ ${this.startPrefix}fetch_data:fetch_2
467
+ ${this.argPrefix}url
468
+ https://api.example.com/orders
469
+ ${this.endPrefix}
470
+ ${this.startPrefix}merge_data:merge_1:fetch_1,fetch_2
471
+ ${this.argPrefix}format
472
+ json
473
+ ${this.endPrefix}`;
474
+ parts.push(`
475
+
476
+ EXAMPLE (With Dependencies):
477
+ merge_1 waits for fetch_1 AND fetch_2 to complete.
478
+ If either fails, merge_1 is automatically skipped.
479
+
480
+ ${dependencyExample}`);
619
481
  parts.push(`
620
482
 
621
483
  BLOCK FORMAT SYNTAX:
@@ -656,67 +518,481 @@ second
656
518
  Produces: { "items": ["first", "second"] }`);
657
519
  return parts.join("");
658
520
  }
659
- buildRulesSection(context) {
660
- const parts = [];
661
- parts.push("\n\nRULES:");
662
- const rules = resolveRulesTemplate(this.promptConfig.rules, context);
663
- for (const rule of rules) {
664
- parts.push(`
665
- - ${rule}`);
521
+ buildRulesSection(context) {
522
+ const parts = [];
523
+ parts.push("\n\nRULES:");
524
+ const rules = resolveRulesTemplate(this.promptConfig.rules, context);
525
+ for (const rule of rules) {
526
+ parts.push(`
527
+ - ${rule}`);
528
+ }
529
+ return parts.join("");
530
+ }
531
+ /**
532
+ * Add a user message.
533
+ * Content can be a string (text only) or an array of content parts (multimodal).
534
+ *
535
+ * @param content - Message content
536
+ * @param metadata - Optional metadata
537
+ *
538
+ * @example
539
+ * ```typescript
540
+ * // Text only
541
+ * builder.addUser("Hello!");
542
+ *
543
+ * // Multimodal
544
+ * builder.addUser([
545
+ * text("What's in this image?"),
546
+ * imageFromBuffer(imageData),
547
+ * ]);
548
+ * ```
549
+ */
550
+ addUser(content, metadata) {
551
+ this.messages.push({ role: "user", content, metadata });
552
+ return this;
553
+ }
554
+ addAssistant(content, metadata) {
555
+ this.messages.push({ role: "assistant", content, metadata });
556
+ return this;
557
+ }
558
+ /**
559
+ * Add a user message with an image attachment.
560
+ *
561
+ * @param textContent - Text prompt
562
+ * @param imageData - Image data (Buffer, Uint8Array, or base64 string)
563
+ * @param mimeType - Optional MIME type (auto-detected if not provided)
564
+ *
565
+ * @example
566
+ * ```typescript
567
+ * builder.addUserWithImage(
568
+ * "What's in this image?",
569
+ * await fs.readFile("photo.jpg"),
570
+ * "image/jpeg" // Optional - auto-detected
571
+ * );
572
+ * ```
573
+ */
574
+ addUserWithImage(textContent, imageData, mimeType) {
575
+ const imageBuffer = typeof imageData === "string" ? Buffer.from(imageData, "base64") : imageData;
576
+ const detectedMime = mimeType ?? detectImageMimeType(imageBuffer);
577
+ if (!detectedMime) {
578
+ throw new Error(
579
+ "Could not detect image MIME type. Please provide the mimeType parameter explicitly."
580
+ );
581
+ }
582
+ const content = [
583
+ text(textContent),
584
+ {
585
+ type: "image",
586
+ source: {
587
+ type: "base64",
588
+ mediaType: detectedMime,
589
+ data: toBase64(imageBuffer)
590
+ }
591
+ }
592
+ ];
593
+ this.messages.push({ role: "user", content });
594
+ return this;
595
+ }
596
+ /**
597
+ * Add a user message with an image URL (OpenAI only).
598
+ *
599
+ * @param textContent - Text prompt
600
+ * @param imageUrl - URL to the image
601
+ *
602
+ * @example
603
+ * ```typescript
604
+ * builder.addUserWithImageUrl(
605
+ * "What's in this image?",
606
+ * "https://example.com/image.jpg"
607
+ * );
608
+ * ```
609
+ */
610
+ addUserWithImageUrl(textContent, imageUrl) {
611
+ const content = [text(textContent), imageFromUrl(imageUrl)];
612
+ this.messages.push({ role: "user", content });
613
+ return this;
614
+ }
615
+ /**
616
+ * Add a user message with an audio attachment (Gemini only).
617
+ *
618
+ * @param textContent - Text prompt
619
+ * @param audioData - Audio data (Buffer, Uint8Array, or base64 string)
620
+ * @param mimeType - Optional MIME type (auto-detected if not provided)
621
+ *
622
+ * @example
623
+ * ```typescript
624
+ * builder.addUserWithAudio(
625
+ * "Transcribe this audio",
626
+ * await fs.readFile("recording.mp3"),
627
+ * "audio/mp3" // Optional - auto-detected
628
+ * );
629
+ * ```
630
+ */
631
+ addUserWithAudio(textContent, audioData, mimeType) {
632
+ const audioBuffer = typeof audioData === "string" ? Buffer.from(audioData, "base64") : audioData;
633
+ const content = [text(textContent), audioFromBuffer(audioBuffer, mimeType)];
634
+ this.messages.push({ role: "user", content });
635
+ return this;
636
+ }
637
+ /**
638
+ * Add a user message with multiple content parts.
639
+ * Provides full flexibility for complex multimodal messages.
640
+ *
641
+ * @param parts - Array of content parts
642
+ *
643
+ * @example
644
+ * ```typescript
645
+ * builder.addUserMultimodal([
646
+ * text("Compare these images:"),
647
+ * imageFromBuffer(image1),
648
+ * imageFromBuffer(image2),
649
+ * ]);
650
+ * ```
651
+ */
652
+ addUserMultimodal(parts) {
653
+ this.messages.push({ role: "user", content: parts });
654
+ return this;
655
+ }
656
+ addGadgetCall(gadget, parameters, result) {
657
+ const paramStr = this.formatBlockParameters(parameters, "");
658
+ this.messages.push({
659
+ role: "assistant",
660
+ content: `${this.startPrefix}${gadget}
661
+ ${paramStr}
662
+ ${this.endPrefix}`
663
+ });
664
+ this.messages.push({
665
+ role: "user",
666
+ content: `Result: ${result}`
667
+ });
668
+ return this;
669
+ }
670
+ /**
671
+ * Format parameters as Block format with JSON Pointer paths.
672
+ * Uses the configured argPrefix for consistency with system prompt.
673
+ */
674
+ formatBlockParameters(params, prefix) {
675
+ const lines = [];
676
+ for (const [key, value] of Object.entries(params)) {
677
+ const fullPath = prefix ? `${prefix}/${key}` : key;
678
+ if (Array.isArray(value)) {
679
+ value.forEach((item, index) => {
680
+ const itemPath = `${fullPath}/${index}`;
681
+ if (typeof item === "object" && item !== null) {
682
+ lines.push(this.formatBlockParameters(item, itemPath));
683
+ } else {
684
+ lines.push(`${this.argPrefix}${itemPath}`);
685
+ lines.push(String(item));
686
+ }
687
+ });
688
+ } else if (typeof value === "object" && value !== null) {
689
+ lines.push(this.formatBlockParameters(value, fullPath));
690
+ } else {
691
+ lines.push(`${this.argPrefix}${fullPath}`);
692
+ lines.push(String(value));
693
+ }
694
+ }
695
+ return lines.join("\n");
696
+ }
697
+ build() {
698
+ return [...this.messages];
699
+ }
700
+ };
701
+ }
702
+ });
703
+
704
+ // src/core/model-shortcuts.ts
705
+ function isKnownModelPattern(model) {
706
+ const normalized = model.toLowerCase();
707
+ if (MODEL_ALIASES[normalized]) {
708
+ return true;
709
+ }
710
+ return KNOWN_MODEL_PATTERNS.some((pattern) => pattern.test(model));
711
+ }
712
+ function resolveModel(model, options = {}) {
713
+ if (model.includes(":")) {
714
+ return model;
715
+ }
716
+ const normalized = model.toLowerCase();
717
+ if (MODEL_ALIASES[normalized]) {
718
+ return MODEL_ALIASES[normalized];
719
+ }
720
+ const modelLower = model.toLowerCase();
721
+ if (modelLower.startsWith("gpt")) {
722
+ return `openai:${model}`;
723
+ }
724
+ if (modelLower.startsWith("claude")) {
725
+ return `anthropic:${model}`;
726
+ }
727
+ if (modelLower.startsWith("gemini")) {
728
+ return `gemini:${model}`;
729
+ }
730
+ if (modelLower.match(/^o\d/)) {
731
+ return `openai:${model}`;
732
+ }
733
+ if (!isKnownModelPattern(model)) {
734
+ if (options.strict) {
735
+ throw new Error(
736
+ `Unknown model '${model}'. Did you mean one of: gpt4, sonnet, haiku, flash? Use explicit provider prefix like 'openai:${model}' to bypass this check.`
737
+ );
738
+ }
739
+ if (!options.silent) {
740
+ console.warn(
741
+ `\u26A0\uFE0F Unknown model '${model}', falling back to 'openai:${model}'. This might be a typo. Did you mean: gpt4, gpt5, gpt5-nano, sonnet, haiku, flash? Use { strict: true } to error on unknown models, or { silent: true } to suppress this warning.`
742
+ );
743
+ }
744
+ }
745
+ return `openai:${model}`;
746
+ }
747
+ var MODEL_ALIASES, KNOWN_MODEL_PATTERNS;
748
+ var init_model_shortcuts = __esm({
749
+ "src/core/model-shortcuts.ts"() {
750
+ "use strict";
751
+ MODEL_ALIASES = {
752
+ // OpenAI aliases
753
+ gpt4: "openai:gpt-4o",
754
+ gpt4o: "openai:gpt-4o",
755
+ gpt5: "openai:gpt-5",
756
+ "gpt5-mini": "openai:gpt-5-mini",
757
+ "gpt5-nano": "openai:gpt-5-nano",
758
+ // Anthropic aliases
759
+ sonnet: "anthropic:claude-sonnet-4-5",
760
+ "claude-sonnet": "anthropic:claude-sonnet-4-5",
761
+ haiku: "anthropic:claude-haiku-4-5",
762
+ "claude-haiku": "anthropic:claude-haiku-4-5",
763
+ opus: "anthropic:claude-opus-4-5",
764
+ "claude-opus": "anthropic:claude-opus-4-5",
765
+ // Gemini aliases
766
+ flash: "gemini:gemini-2.0-flash",
767
+ "gemini-flash": "gemini:gemini-2.0-flash",
768
+ "gemini-pro": "gemini:gemini-2.5-pro",
769
+ pro: "gemini:gemini-2.5-pro"
770
+ };
771
+ KNOWN_MODEL_PATTERNS = [
772
+ /^gpt-?\d/i,
773
+ // gpt-4, gpt-3.5, gpt4, etc.
774
+ /^claude-?\d/i,
775
+ // claude-3, claude-2, etc.
776
+ /^gemini-?(\d|pro|flash)/i,
777
+ // gemini-2.0, gemini-pro, gemini-flash, etc.
778
+ /^o\d/i
779
+ // OpenAI o1, o3, etc.
780
+ ];
781
+ }
782
+ });
783
+
784
+ // src/gadgets/schema-validator.ts
785
+ function validateGadgetSchema(schema, gadgetName) {
786
+ let jsonSchema;
787
+ try {
788
+ jsonSchema = z.toJSONSchema(schema, { target: "draft-7" });
789
+ } catch (error) {
790
+ const errorMessage = error instanceof Error ? error.message : String(error);
791
+ throw new Error(
792
+ `Gadget "${gadgetName}" has a schema that cannot be serialized to JSON Schema.
793
+ This usually happens with unsupported patterns like:
794
+ - z.record() - use z.object({}).passthrough() instead
795
+ - Complex transforms or custom refinements
796
+ - Circular references
797
+
798
+ Original error: ${errorMessage}
799
+
800
+ Only use schema patterns that Zod v4's native toJSONSchema() supports.`
801
+ );
802
+ }
803
+ const issues = findUnknownTypes(jsonSchema);
804
+ if (issues.length > 0) {
805
+ const fieldList = issues.join(", ");
806
+ throw new Error(
807
+ `Gadget "${gadgetName}" uses z.unknown() which produces incomplete schemas.
808
+ Problematic fields: ${fieldList}
809
+
810
+ z.unknown() doesn't generate type information in JSON Schema, making it unclear
811
+ to the LLM what data structure to provide.
812
+
813
+ Suggestions:
814
+ - Use z.object({}).passthrough() for flexible objects
815
+ - Use z.record(z.string()) for key-value objects with string values
816
+ - Define specific structure if possible
817
+
818
+ Example fixes:
819
+ // \u274C Bad
820
+ content: z.unknown()
821
+
822
+ // \u2705 Good
823
+ content: z.object({}).passthrough() // for flexible objects
824
+ content: z.record(z.string()) // for key-value objects
825
+ content: z.array(z.string()) // for arrays of strings
826
+ `
827
+ );
828
+ }
829
+ }
830
+ function findUnknownTypes(schema, path = []) {
831
+ const issues = [];
832
+ if (!schema || typeof schema !== "object") {
833
+ return issues;
834
+ }
835
+ if (schema.definitions) {
836
+ for (const defSchema of Object.values(schema.definitions)) {
837
+ issues.push(...findUnknownTypes(defSchema, []));
838
+ }
839
+ }
840
+ if (schema.properties) {
841
+ for (const [propName, propSchema] of Object.entries(schema.properties)) {
842
+ const propPath = [...path, propName];
843
+ if (hasNoType(propSchema)) {
844
+ issues.push(propPath.join(".") || propName);
845
+ }
846
+ issues.push(...findUnknownTypes(propSchema, propPath));
847
+ }
848
+ }
849
+ if (schema.items) {
850
+ const itemPath = [...path, "[]"];
851
+ if (hasNoType(schema.items)) {
852
+ issues.push(itemPath.join("."));
853
+ }
854
+ issues.push(...findUnknownTypes(schema.items, itemPath));
855
+ }
856
+ if (schema.anyOf) {
857
+ schema.anyOf.forEach((subSchema, index) => {
858
+ issues.push(...findUnknownTypes(subSchema, [...path, `anyOf[${index}]`]));
859
+ });
860
+ }
861
+ if (schema.oneOf) {
862
+ schema.oneOf.forEach((subSchema, index) => {
863
+ issues.push(...findUnknownTypes(subSchema, [...path, `oneOf[${index}]`]));
864
+ });
865
+ }
866
+ if (schema.allOf) {
867
+ schema.allOf.forEach((subSchema, index) => {
868
+ issues.push(...findUnknownTypes(subSchema, [...path, `allOf[${index}]`]));
869
+ });
870
+ }
871
+ return issues;
872
+ }
873
+ function hasNoType(prop) {
874
+ if (!prop || typeof prop !== "object") {
875
+ return false;
876
+ }
877
+ const hasType = prop.type !== void 0;
878
+ const hasRef = prop.$ref !== void 0;
879
+ const hasUnion = prop.anyOf !== void 0 || prop.oneOf !== void 0 || prop.allOf !== void 0;
880
+ if (hasType || hasRef || hasUnion) {
881
+ return false;
882
+ }
883
+ const keys = Object.keys(prop);
884
+ const metadataKeys = ["description", "title", "default", "examples"];
885
+ const hasOnlyMetadata = keys.every((key) => metadataKeys.includes(key));
886
+ return hasOnlyMetadata || keys.length === 0;
887
+ }
888
+ var z;
889
+ var init_schema_validator = __esm({
890
+ "src/gadgets/schema-validator.ts"() {
891
+ "use strict";
892
+ z = __toESM(require("zod"), 1);
893
+ }
894
+ });
895
+
896
+ // src/gadgets/registry.ts
897
+ var GadgetRegistry;
898
+ var init_registry = __esm({
899
+ "src/gadgets/registry.ts"() {
900
+ "use strict";
901
+ init_schema_validator();
902
+ GadgetRegistry = class _GadgetRegistry {
903
+ gadgets = /* @__PURE__ */ new Map();
904
+ /**
905
+ * Creates a registry from an array of gadget classes or instances,
906
+ * or an object mapping names to gadgets.
907
+ *
908
+ * @param gadgets - Array of gadgets/classes or object with custom names
909
+ * @returns New GadgetRegistry with all gadgets registered
910
+ *
911
+ * @example
912
+ * ```typescript
913
+ * // From array of classes
914
+ * const registry = GadgetRegistry.from([Calculator, Weather]);
915
+ *
916
+ * // From array of instances
917
+ * const registry = GadgetRegistry.from([new Calculator(), new Weather()]);
918
+ *
919
+ * // From object with custom names
920
+ * const registry = GadgetRegistry.from({
921
+ * calc: Calculator,
922
+ * weather: new Weather({ apiKey: "..." })
923
+ * });
924
+ * ```
925
+ */
926
+ static from(gadgets) {
927
+ const registry = new _GadgetRegistry();
928
+ if (Array.isArray(gadgets)) {
929
+ registry.registerMany(gadgets);
930
+ } else {
931
+ for (const [name, gadget] of Object.entries(gadgets)) {
932
+ const instance = typeof gadget === "function" ? new gadget() : gadget;
933
+ registry.register(name, instance);
934
+ }
935
+ }
936
+ return registry;
937
+ }
938
+ /**
939
+ * Registers multiple gadgets at once from an array.
940
+ *
941
+ * @param gadgets - Array of gadget instances or classes
942
+ * @returns This registry for chaining
943
+ *
944
+ * @example
945
+ * ```typescript
946
+ * registry.registerMany([Calculator, Weather, Email]);
947
+ * registry.registerMany([new Calculator(), new Weather()]);
948
+ * ```
949
+ */
950
+ registerMany(gadgets) {
951
+ for (const gadget of gadgets) {
952
+ const instance = typeof gadget === "function" ? new gadget() : gadget;
953
+ this.registerByClass(instance);
954
+ }
955
+ return this;
956
+ }
957
+ // Register a gadget by name
958
+ register(name, gadget) {
959
+ const normalizedName = name.toLowerCase();
960
+ if (this.gadgets.has(normalizedName)) {
961
+ throw new Error(`Gadget '${name}' is already registered`);
666
962
  }
667
- return parts.join("");
963
+ if (gadget.parameterSchema) {
964
+ validateGadgetSchema(gadget.parameterSchema, name);
965
+ }
966
+ this.gadgets.set(normalizedName, gadget);
668
967
  }
669
- addUser(content, metadata) {
670
- this.messages.push({ role: "user", content, metadata });
671
- return this;
968
+ // Register a gadget using its name property or class name
969
+ registerByClass(gadget) {
970
+ const name = gadget.name ?? gadget.constructor.name;
971
+ this.register(name, gadget);
672
972
  }
673
- addAssistant(content, metadata) {
674
- this.messages.push({ role: "assistant", content, metadata });
675
- return this;
973
+ // Get gadget by name (case-insensitive)
974
+ get(name) {
975
+ return this.gadgets.get(name.toLowerCase());
676
976
  }
677
- addGadgetCall(gadget, parameters, result) {
678
- const paramStr = this.formatBlockParameters(parameters, "");
679
- this.messages.push({
680
- role: "assistant",
681
- content: `${this.startPrefix}${gadget}
682
- ${paramStr}
683
- ${this.endPrefix}`
684
- });
685
- this.messages.push({
686
- role: "user",
687
- content: `Result: ${result}`
688
- });
689
- return this;
977
+ // Check if gadget exists (case-insensitive)
978
+ has(name) {
979
+ return this.gadgets.has(name.toLowerCase());
690
980
  }
691
- /**
692
- * Format parameters as Block format with JSON Pointer paths.
693
- * Uses the configured argPrefix for consistency with system prompt.
694
- */
695
- formatBlockParameters(params, prefix) {
696
- const lines = [];
697
- for (const [key, value] of Object.entries(params)) {
698
- const fullPath = prefix ? `${prefix}/${key}` : key;
699
- if (Array.isArray(value)) {
700
- value.forEach((item, index) => {
701
- const itemPath = `${fullPath}/${index}`;
702
- if (typeof item === "object" && item !== null) {
703
- lines.push(this.formatBlockParameters(item, itemPath));
704
- } else {
705
- lines.push(`${this.argPrefix}${itemPath}`);
706
- lines.push(String(item));
707
- }
708
- });
709
- } else if (typeof value === "object" && value !== null) {
710
- lines.push(this.formatBlockParameters(value, fullPath));
711
- } else {
712
- lines.push(`${this.argPrefix}${fullPath}`);
713
- lines.push(String(value));
714
- }
715
- }
716
- return lines.join("\n");
981
+ // Get all registered gadget names
982
+ getNames() {
983
+ return Array.from(this.gadgets.keys());
717
984
  }
718
- build() {
719
- return [...this.messages];
985
+ // Get all gadgets for instruction generation
986
+ getAll() {
987
+ return Array.from(this.gadgets.values());
988
+ }
989
+ // Unregister gadget (useful for testing, case-insensitive)
990
+ unregister(name) {
991
+ return this.gadgets.delete(name.toLowerCase());
992
+ }
993
+ // Clear all gadgets (useful for testing)
994
+ clear() {
995
+ this.gadgets.clear();
720
996
  }
721
997
  };
722
998
  }
@@ -1913,7 +2189,7 @@ var init_conversation_manager = __esm({
1913
2189
  if (msg.role === "user") {
1914
2190
  this.historyBuilder.addUser(msg.content);
1915
2191
  } else if (msg.role === "assistant") {
1916
- this.historyBuilder.addAssistant(msg.content);
2192
+ this.historyBuilder.addAssistant(extractText(msg.content));
1917
2193
  }
1918
2194
  }
1919
2195
  }
@@ -1934,8 +2210,10 @@ async function runWithHandlers(agentGenerator, handlers) {
1934
2210
  if (handlers.onGadgetCall) {
1935
2211
  await handlers.onGadgetCall({
1936
2212
  gadgetName: event.call.gadgetName,
2213
+ invocationId: event.call.invocationId,
1937
2214
  parameters: event.call.parameters,
1938
- parametersRaw: event.call.parametersRaw
2215
+ parametersRaw: event.call.parametersRaw,
2216
+ dependencies: event.call.dependencies
1939
2217
  });
1940
2218
  }
1941
2219
  break;
@@ -2782,15 +3060,37 @@ var init_parser = __esm({
2782
3060
  return segment.trim().length > 0 ? segment : void 0;
2783
3061
  }
2784
3062
  /**
2785
- * Parse gadget name, handling both old format (name:invocationId) and new format (just name).
2786
- * For new format, generates a unique invocation ID.
3063
+ * Parse gadget name with optional invocation ID and dependencies.
3064
+ *
3065
+ * Supported formats:
3066
+ * - `GadgetName` - Auto-generate ID, no dependencies
3067
+ * - `GadgetName:my_id` - Explicit ID, no dependencies
3068
+ * - `GadgetName:my_id:dep1,dep2` - Explicit ID with dependencies
3069
+ *
3070
+ * Dependencies must be comma-separated invocation IDs.
2787
3071
  */
2788
3072
  parseGadgetName(gadgetName) {
2789
- if (gadgetName.includes(":")) {
2790
- const parts = gadgetName.split(":");
2791
- return { actualName: parts[0], invocationId: parts[1] };
3073
+ const parts = gadgetName.split(":");
3074
+ if (parts.length === 1) {
3075
+ return {
3076
+ actualName: parts[0],
3077
+ invocationId: `gadget_${++globalInvocationCounter}`,
3078
+ dependencies: []
3079
+ };
3080
+ } else if (parts.length === 2) {
3081
+ return {
3082
+ actualName: parts[0],
3083
+ invocationId: parts[1].trim(),
3084
+ dependencies: []
3085
+ };
3086
+ } else {
3087
+ const deps = parts[2].split(",").map((d) => d.trim()).filter((d) => d.length > 0);
3088
+ return {
3089
+ actualName: parts[0],
3090
+ invocationId: parts[1].trim(),
3091
+ dependencies: deps
3092
+ };
2792
3093
  }
2793
- return { actualName: gadgetName, invocationId: `gadget_${++globalInvocationCounter}` };
2794
3094
  }
2795
3095
  /**
2796
3096
  * Extract the error message from a parse error.
@@ -2826,39 +3126,20 @@ var init_parser = __esm({
2826
3126
  const metadataEndIndex = this.buffer.indexOf("\n", metadataStartIndex);
2827
3127
  if (metadataEndIndex === -1) break;
2828
3128
  const gadgetName = this.buffer.substring(metadataStartIndex, metadataEndIndex).trim();
2829
- const { actualName: actualGadgetName, invocationId } = this.parseGadgetName(gadgetName);
3129
+ const { actualName: actualGadgetName, invocationId, dependencies } = this.parseGadgetName(gadgetName);
2830
3130
  const contentStartIndex = metadataEndIndex + 1;
2831
3131
  let partEndIndex;
2832
3132
  let endMarkerLength = 0;
2833
- if (gadgetName.includes(":")) {
2834
- const oldEndMarker = `${this.endPrefix + actualGadgetName}:${invocationId}`;
2835
- partEndIndex = this.buffer.indexOf(oldEndMarker, contentStartIndex);
2836
- if (partEndIndex === -1) break;
2837
- endMarkerLength = oldEndMarker.length;
3133
+ const nextStartPos = this.buffer.indexOf(this.startPrefix, contentStartIndex);
3134
+ const endPos = this.buffer.indexOf(this.endPrefix, contentStartIndex);
3135
+ if (nextStartPos !== -1 && (endPos === -1 || nextStartPos < endPos)) {
3136
+ partEndIndex = nextStartPos;
3137
+ endMarkerLength = 0;
3138
+ } else if (endPos !== -1) {
3139
+ partEndIndex = endPos;
3140
+ endMarkerLength = this.endPrefix.length;
2838
3141
  } else {
2839
- const nextStartPos = this.buffer.indexOf(this.startPrefix, contentStartIndex);
2840
- let validEndPos = -1;
2841
- let searchPos = contentStartIndex;
2842
- while (true) {
2843
- const endPos = this.buffer.indexOf(this.endPrefix, searchPos);
2844
- if (endPos === -1) break;
2845
- const afterEnd = this.buffer.substring(endPos + this.endPrefix.length);
2846
- if (afterEnd.startsWith("\n") || afterEnd.startsWith("\r") || afterEnd.startsWith(this.startPrefix) || afterEnd.length === 0) {
2847
- validEndPos = endPos;
2848
- break;
2849
- } else {
2850
- searchPos = endPos + this.endPrefix.length;
2851
- }
2852
- }
2853
- if (nextStartPos !== -1 && (validEndPos === -1 || nextStartPos < validEndPos)) {
2854
- partEndIndex = nextStartPos;
2855
- endMarkerLength = 0;
2856
- } else if (validEndPos !== -1) {
2857
- partEndIndex = validEndPos;
2858
- endMarkerLength = this.endPrefix.length;
2859
- } else {
2860
- break;
2861
- }
3142
+ break;
2862
3143
  }
2863
3144
  const parametersRaw = this.buffer.substring(contentStartIndex, partEndIndex).trim();
2864
3145
  const { parameters, parseError } = this.parseParameters(parametersRaw);
@@ -2869,7 +3150,8 @@ var init_parser = __esm({
2869
3150
  invocationId,
2870
3151
  parametersRaw,
2871
3152
  parameters,
2872
- parseError
3153
+ parseError,
3154
+ dependencies
2873
3155
  }
2874
3156
  };
2875
3157
  startIndex = partEndIndex + endMarkerLength;
@@ -2892,7 +3174,7 @@ var init_parser = __esm({
2892
3174
  const metadataEndIndex = this.buffer.indexOf("\n", metadataStartIndex);
2893
3175
  if (metadataEndIndex !== -1) {
2894
3176
  const gadgetName = this.buffer.substring(metadataStartIndex, metadataEndIndex).trim();
2895
- const { actualName: actualGadgetName, invocationId } = this.parseGadgetName(gadgetName);
3177
+ const { actualName: actualGadgetName, invocationId, dependencies } = this.parseGadgetName(gadgetName);
2896
3178
  const contentStartIndex = metadataEndIndex + 1;
2897
3179
  const parametersRaw = this.buffer.substring(contentStartIndex).trim();
2898
3180
  const { parameters, parseError } = this.parseParameters(parametersRaw);
@@ -2903,7 +3185,8 @@ var init_parser = __esm({
2903
3185
  invocationId,
2904
3186
  parametersRaw,
2905
3187
  parameters,
2906
- parseError
3188
+ parseError,
3189
+ dependencies
2907
3190
  }
2908
3191
  };
2909
3192
  return;
@@ -3273,6 +3556,13 @@ var init_stream_processor = __esm({
3273
3556
  accumulatedText = "";
3274
3557
  shouldStopExecution = false;
3275
3558
  observerFailureCount = 0;
3559
+ // Dependency tracking for gadget execution DAG
3560
+ /** Gadgets waiting for their dependencies to complete */
3561
+ pendingGadgets = /* @__PURE__ */ new Map();
3562
+ /** Completed gadget results, keyed by invocation ID */
3563
+ completedResults = /* @__PURE__ */ new Map();
3564
+ /** Invocation IDs of gadgets that have failed (error or skipped due to dependency) */
3565
+ failedInvocations = /* @__PURE__ */ new Set();
3276
3566
  constructor(options) {
3277
3567
  this.iteration = options.iteration;
3278
3568
  this.registry = options.registry;
@@ -3373,6 +3663,16 @@ var init_stream_processor = __esm({
3373
3663
  }
3374
3664
  }
3375
3665
  }
3666
+ const finalPendingEvents = await this.processPendingGadgets();
3667
+ outputs.push(...finalPendingEvents);
3668
+ if (finalPendingEvents.some((e) => e.type === "gadget_result")) {
3669
+ didExecuteGadgets = true;
3670
+ }
3671
+ for (const evt of finalPendingEvents) {
3672
+ if (evt.type === "gadget_result" && evt.result.breaksLoop) {
3673
+ shouldBreakLoop = true;
3674
+ }
3675
+ }
3376
3676
  }
3377
3677
  let finalMessage = this.accumulatedText;
3378
3678
  if (this.hooks.interceptors?.interceptAssistantMessage) {
@@ -3424,7 +3724,11 @@ var init_stream_processor = __esm({
3424
3724
  return [{ type: "text", content }];
3425
3725
  }
3426
3726
  /**
3427
- * Process a gadget call through the full lifecycle.
3727
+ * Process a gadget call through the full lifecycle, handling dependencies.
3728
+ *
3729
+ * Gadgets without dependencies (or with all dependencies satisfied) execute immediately.
3730
+ * Gadgets with unsatisfied dependencies are queued for later execution.
3731
+ * After each execution, pending gadgets are checked to see if they can now run.
3428
3732
  */
3429
3733
  async processGadgetCall(call) {
3430
3734
  if (this.shouldStopExecution) {
@@ -3435,6 +3739,53 @@ var init_stream_processor = __esm({
3435
3739
  }
3436
3740
  const events = [];
3437
3741
  events.push({ type: "gadget_call", call });
3742
+ if (call.dependencies.length > 0) {
3743
+ if (call.dependencies.includes(call.invocationId)) {
3744
+ this.logger.warn("Gadget has self-referential dependency (depends on itself)", {
3745
+ gadgetName: call.gadgetName,
3746
+ invocationId: call.invocationId
3747
+ });
3748
+ this.failedInvocations.add(call.invocationId);
3749
+ const skipEvent = {
3750
+ type: "gadget_skipped",
3751
+ gadgetName: call.gadgetName,
3752
+ invocationId: call.invocationId,
3753
+ parameters: call.parameters ?? {},
3754
+ failedDependency: call.invocationId,
3755
+ failedDependencyError: `Gadget "${call.invocationId}" cannot depend on itself (self-referential dependency)`
3756
+ };
3757
+ events.push(skipEvent);
3758
+ return events;
3759
+ }
3760
+ const failedDep = call.dependencies.find((dep) => this.failedInvocations.has(dep));
3761
+ if (failedDep) {
3762
+ const skipEvents = await this.handleFailedDependency(call, failedDep);
3763
+ events.push(...skipEvents);
3764
+ return events;
3765
+ }
3766
+ const unsatisfied = call.dependencies.filter((dep) => !this.completedResults.has(dep));
3767
+ if (unsatisfied.length > 0) {
3768
+ this.logger.debug("Queueing gadget for later - waiting on dependencies", {
3769
+ gadgetName: call.gadgetName,
3770
+ invocationId: call.invocationId,
3771
+ waitingOn: unsatisfied
3772
+ });
3773
+ this.pendingGadgets.set(call.invocationId, call);
3774
+ return events;
3775
+ }
3776
+ }
3777
+ const executeEvents = await this.executeGadgetWithHooks(call);
3778
+ events.push(...executeEvents);
3779
+ const triggeredEvents = await this.processPendingGadgets();
3780
+ events.push(...triggeredEvents);
3781
+ return events;
3782
+ }
3783
+ /**
3784
+ * Execute a gadget through the full hook lifecycle.
3785
+ * This is the core execution logic, extracted from processGadgetCall.
3786
+ */
3787
+ async executeGadgetWithHooks(call) {
3788
+ const events = [];
3438
3789
  if (call.parseError) {
3439
3790
  this.logger.warn("Gadget has parse error", {
3440
3791
  gadgetName: call.gadgetName,
@@ -3565,6 +3916,10 @@ var init_stream_processor = __esm({
3565
3916
  });
3566
3917
  }
3567
3918
  await this.runObserversInParallel(completeObservers);
3919
+ this.completedResults.set(result.invocationId, result);
3920
+ if (result.error) {
3921
+ this.failedInvocations.add(result.invocationId);
3922
+ }
3568
3923
  events.push({ type: "gadget_result", result });
3569
3924
  if (result.error) {
3570
3925
  const errorType = this.determineErrorType(call, result);
@@ -3580,6 +3935,162 @@ var init_stream_processor = __esm({
3580
3935
  }
3581
3936
  return events;
3582
3937
  }
3938
+ /**
3939
+ * Handle a gadget that cannot execute because a dependency failed.
3940
+ * Calls the onDependencySkipped controller to allow customization.
3941
+ */
3942
+ async handleFailedDependency(call, failedDep) {
3943
+ const events = [];
3944
+ const depResult = this.completedResults.get(failedDep);
3945
+ const depError = depResult?.error ?? "Dependency failed";
3946
+ let action = { action: "skip" };
3947
+ if (this.hooks.controllers?.onDependencySkipped) {
3948
+ const context = {
3949
+ iteration: this.iteration,
3950
+ gadgetName: call.gadgetName,
3951
+ invocationId: call.invocationId,
3952
+ parameters: call.parameters ?? {},
3953
+ failedDependency: failedDep,
3954
+ failedDependencyError: depError,
3955
+ logger: this.logger
3956
+ };
3957
+ action = await this.hooks.controllers.onDependencySkipped(context);
3958
+ }
3959
+ if (action.action === "skip") {
3960
+ this.failedInvocations.add(call.invocationId);
3961
+ const skipEvent = {
3962
+ type: "gadget_skipped",
3963
+ gadgetName: call.gadgetName,
3964
+ invocationId: call.invocationId,
3965
+ parameters: call.parameters ?? {},
3966
+ failedDependency: failedDep,
3967
+ failedDependencyError: depError
3968
+ };
3969
+ events.push(skipEvent);
3970
+ if (this.hooks.observers?.onGadgetSkipped) {
3971
+ const observeContext = {
3972
+ iteration: this.iteration,
3973
+ gadgetName: call.gadgetName,
3974
+ invocationId: call.invocationId,
3975
+ parameters: call.parameters ?? {},
3976
+ failedDependency: failedDep,
3977
+ failedDependencyError: depError,
3978
+ logger: this.logger
3979
+ };
3980
+ await this.safeObserve(() => this.hooks.observers.onGadgetSkipped(observeContext));
3981
+ }
3982
+ this.logger.info("Gadget skipped due to failed dependency", {
3983
+ gadgetName: call.gadgetName,
3984
+ invocationId: call.invocationId,
3985
+ failedDependency: failedDep
3986
+ });
3987
+ } else if (action.action === "execute_anyway") {
3988
+ this.logger.info("Executing gadget despite failed dependency (controller override)", {
3989
+ gadgetName: call.gadgetName,
3990
+ invocationId: call.invocationId,
3991
+ failedDependency: failedDep
3992
+ });
3993
+ const executeEvents = await this.executeGadgetWithHooks(call);
3994
+ events.push(...executeEvents);
3995
+ } else if (action.action === "use_fallback") {
3996
+ const fallbackResult = {
3997
+ gadgetName: call.gadgetName,
3998
+ invocationId: call.invocationId,
3999
+ parameters: call.parameters ?? {},
4000
+ result: action.fallbackResult,
4001
+ executionTimeMs: 0
4002
+ };
4003
+ this.completedResults.set(call.invocationId, fallbackResult);
4004
+ events.push({ type: "gadget_result", result: fallbackResult });
4005
+ this.logger.info("Using fallback result for gadget with failed dependency", {
4006
+ gadgetName: call.gadgetName,
4007
+ invocationId: call.invocationId,
4008
+ failedDependency: failedDep
4009
+ });
4010
+ }
4011
+ return events;
4012
+ }
4013
+ /**
4014
+ * Process pending gadgets whose dependencies are now satisfied.
4015
+ * Executes ready gadgets in parallel and continues until no more can be triggered.
4016
+ */
4017
+ async processPendingGadgets() {
4018
+ const events = [];
4019
+ let progress = true;
4020
+ while (progress && this.pendingGadgets.size > 0) {
4021
+ progress = false;
4022
+ const readyToExecute = [];
4023
+ const readyToSkip = [];
4024
+ for (const [invocationId, call] of this.pendingGadgets) {
4025
+ const failedDep = call.dependencies.find((dep) => this.failedInvocations.has(dep));
4026
+ if (failedDep) {
4027
+ readyToSkip.push({ call, failedDep });
4028
+ continue;
4029
+ }
4030
+ const allSatisfied = call.dependencies.every((dep) => this.completedResults.has(dep));
4031
+ if (allSatisfied) {
4032
+ readyToExecute.push(call);
4033
+ }
4034
+ }
4035
+ for (const { call, failedDep } of readyToSkip) {
4036
+ this.pendingGadgets.delete(call.invocationId);
4037
+ const skipEvents = await this.handleFailedDependency(call, failedDep);
4038
+ events.push(...skipEvents);
4039
+ progress = true;
4040
+ }
4041
+ if (readyToExecute.length > 0) {
4042
+ this.logger.debug("Executing ready gadgets in parallel", {
4043
+ count: readyToExecute.length,
4044
+ invocationIds: readyToExecute.map((c) => c.invocationId)
4045
+ });
4046
+ for (const call of readyToExecute) {
4047
+ this.pendingGadgets.delete(call.invocationId);
4048
+ }
4049
+ const executePromises = readyToExecute.map((call) => this.executeGadgetWithHooks(call));
4050
+ const results = await Promise.all(executePromises);
4051
+ for (const executeEvents of results) {
4052
+ events.push(...executeEvents);
4053
+ }
4054
+ progress = true;
4055
+ }
4056
+ }
4057
+ if (this.pendingGadgets.size > 0) {
4058
+ const pendingIds = new Set(this.pendingGadgets.keys());
4059
+ for (const [invocationId, call] of this.pendingGadgets) {
4060
+ const missingDeps = call.dependencies.filter((dep) => !this.completedResults.has(dep));
4061
+ const circularDeps = missingDeps.filter((dep) => pendingIds.has(dep));
4062
+ const trulyMissingDeps = missingDeps.filter((dep) => !pendingIds.has(dep));
4063
+ let errorMessage;
4064
+ let logLevel = "warn";
4065
+ if (circularDeps.length > 0 && trulyMissingDeps.length > 0) {
4066
+ errorMessage = `Dependencies unresolvable: circular=[${circularDeps.join(", ")}], missing=[${trulyMissingDeps.join(", ")}]`;
4067
+ logLevel = "error";
4068
+ } else if (circularDeps.length > 0) {
4069
+ errorMessage = `Circular dependency detected: "${invocationId}" depends on "${circularDeps[0]}" which also depends on "${invocationId}" (directly or indirectly)`;
4070
+ } else {
4071
+ errorMessage = `Dependency "${missingDeps[0]}" was never executed - check that the invocation ID exists and is spelled correctly`;
4072
+ }
4073
+ this.logger[logLevel]("Gadget has unresolvable dependencies", {
4074
+ gadgetName: call.gadgetName,
4075
+ invocationId,
4076
+ circularDependencies: circularDeps,
4077
+ missingDependencies: trulyMissingDeps
4078
+ });
4079
+ this.failedInvocations.add(invocationId);
4080
+ const skipEvent = {
4081
+ type: "gadget_skipped",
4082
+ gadgetName: call.gadgetName,
4083
+ invocationId,
4084
+ parameters: call.parameters ?? {},
4085
+ failedDependency: missingDeps[0],
4086
+ failedDependencyError: errorMessage
4087
+ };
4088
+ events.push(skipEvent);
4089
+ }
4090
+ this.pendingGadgets.clear();
4091
+ }
4092
+ return events;
4093
+ }
3583
4094
  /**
3584
4095
  * Safely execute an observer, catching and logging any errors.
3585
4096
  * Observers are non-critical, so errors are logged but don't crash the system.
@@ -4017,9 +4528,9 @@ var init_agent = __esm({
4017
4528
  if (msg.role === "user") {
4018
4529
  this.conversation.addUserMessage(msg.content);
4019
4530
  } else if (msg.role === "assistant") {
4020
- this.conversation.addAssistantMessage(msg.content);
4531
+ this.conversation.addAssistantMessage(extractText(msg.content));
4021
4532
  } else if (msg.role === "system") {
4022
- this.conversation.addUserMessage(`[System] ${msg.content}`);
4533
+ this.conversation.addUserMessage(`[System] ${extractText(msg.content)}`);
4023
4534
  }
4024
4535
  }
4025
4536
  }
@@ -4239,6 +4750,7 @@ var init_builder = __esm({
4239
4750
  "src/agent/builder.ts"() {
4240
4751
  "use strict";
4241
4752
  init_constants();
4753
+ init_input_content();
4242
4754
  init_model_shortcuts();
4243
4755
  init_registry();
4244
4756
  init_agent();
@@ -4886,13 +5398,17 @@ ${endPrefix}`
4886
5398
  * }
4887
5399
  * ```
4888
5400
  */
4889
- ask(userPrompt) {
5401
+ /**
5402
+ * Build AgentOptions with the given user prompt.
5403
+ * Centralizes options construction for ask(), askWithImage(), and askWithContent().
5404
+ */
5405
+ buildAgentOptions(userPrompt) {
4890
5406
  if (!this.client) {
4891
5407
  const { LLMist: LLMistClass } = (init_client(), __toCommonJS(client_exports));
4892
5408
  this.client = new LLMistClass();
4893
5409
  }
4894
5410
  const registry = GadgetRegistry.from(this.gadgets);
4895
- const options = {
5411
+ return {
4896
5412
  client: this.client,
4897
5413
  model: this.model ?? "openai:gpt-5-nano",
4898
5414
  systemPrompt: this.systemPrompt,
@@ -4918,6 +5434,83 @@ ${endPrefix}`
4918
5434
  compactionConfig: this.compactionConfig,
4919
5435
  signal: this.signal
4920
5436
  };
5437
+ }
5438
+ ask(userPrompt) {
5439
+ const options = this.buildAgentOptions(userPrompt);
5440
+ return new Agent(AGENT_INTERNAL_KEY, options);
5441
+ }
5442
+ /**
5443
+ * Build and create the agent with a multimodal user prompt (text + image).
5444
+ * Returns the Agent instance ready to run.
5445
+ *
5446
+ * @param textPrompt - Text prompt describing what to do with the image
5447
+ * @param imageData - Image data (Buffer, Uint8Array, or base64 string)
5448
+ * @param mimeType - Optional MIME type (auto-detected if not provided)
5449
+ * @returns Configured Agent instance
5450
+ *
5451
+ * @example
5452
+ * ```typescript
5453
+ * const agent = LLMist.createAgent()
5454
+ * .withModel("gpt-4o")
5455
+ * .withSystem("You analyze images")
5456
+ * .askWithImage(
5457
+ * "What's in this image?",
5458
+ * await fs.readFile("photo.jpg")
5459
+ * );
5460
+ *
5461
+ * for await (const event of agent.run()) {
5462
+ * // handle events
5463
+ * }
5464
+ * ```
5465
+ */
5466
+ askWithImage(textPrompt, imageData, mimeType) {
5467
+ const imageBuffer = typeof imageData === "string" ? Buffer.from(imageData, "base64") : imageData;
5468
+ const detectedMime = mimeType ?? detectImageMimeType(imageBuffer);
5469
+ if (!detectedMime) {
5470
+ throw new Error(
5471
+ "Could not detect image MIME type. Please provide the mimeType parameter explicitly."
5472
+ );
5473
+ }
5474
+ const userContent = [
5475
+ text(textPrompt),
5476
+ {
5477
+ type: "image",
5478
+ source: {
5479
+ type: "base64",
5480
+ mediaType: detectedMime,
5481
+ data: toBase64(imageBuffer)
5482
+ }
5483
+ }
5484
+ ];
5485
+ const options = this.buildAgentOptions(userContent);
5486
+ return new Agent(AGENT_INTERNAL_KEY, options);
5487
+ }
5488
+ /**
5489
+ * Build and return an Agent configured with multimodal content.
5490
+ * More flexible than askWithImage - accepts any combination of content parts.
5491
+ *
5492
+ * @param content - Array of content parts (text, images, audio)
5493
+ * @returns A configured Agent ready for execution
5494
+ *
5495
+ * @example
5496
+ * ```typescript
5497
+ * import { text, imageFromBuffer, audioFromBuffer } from "llmist";
5498
+ *
5499
+ * const agent = LLMist.createAgent()
5500
+ * .withModel("gemini:gemini-2.5-flash")
5501
+ * .askWithContent([
5502
+ * text("Describe this image and transcribe the audio:"),
5503
+ * imageFromBuffer(imageData),
5504
+ * audioFromBuffer(audioData),
5505
+ * ]);
5506
+ *
5507
+ * for await (const event of agent.run()) {
5508
+ * // handle events
5509
+ * }
5510
+ * ```
5511
+ */
5512
+ askWithContent(content) {
5513
+ const options = this.buildAgentOptions(content);
4921
5514
  return new Agent(AGENT_INTERNAL_KEY, options);
4922
5515
  }
4923
5516
  /**
@@ -5393,6 +5986,7 @@ var init_anthropic = __esm({
5393
5986
  "src/providers/anthropic.ts"() {
5394
5987
  "use strict";
5395
5988
  import_sdk = __toESM(require("@anthropic-ai/sdk"), 1);
5989
+ init_messages();
5396
5990
  init_anthropic_models();
5397
5991
  init_base_provider();
5398
5992
  init_constants2();
@@ -5431,7 +6025,7 @@ var init_anthropic = __esm({
5431
6025
  const systemMessages = messages.filter((message) => message.role === "system");
5432
6026
  const system = systemMessages.length > 0 ? systemMessages.map((m, index) => ({
5433
6027
  type: "text",
5434
- text: m.content,
6028
+ text: extractText(m.content),
5435
6029
  // Add cache_control to the LAST system message block
5436
6030
  ...index === systemMessages.length - 1 ? { cache_control: { type: "ephemeral" } } : {}
5437
6031
  })) : void 0;
@@ -5444,14 +6038,10 @@ var init_anthropic = __esm({
5444
6038
  );
5445
6039
  const conversation = nonSystemMessages.map((message, index) => ({
5446
6040
  role: message.role,
5447
- content: [
5448
- {
5449
- type: "text",
5450
- text: message.content,
5451
- // Add cache_control to the LAST user message
5452
- ...message.role === "user" && index === lastUserIndex ? { cache_control: { type: "ephemeral" } } : {}
5453
- }
5454
- ]
6041
+ content: this.convertToAnthropicContent(
6042
+ message.content,
6043
+ message.role === "user" && index === lastUserIndex
6044
+ )
5455
6045
  }));
5456
6046
  const defaultMaxTokens = spec?.maxOutputTokens ?? ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS;
5457
6047
  const payload = {
@@ -5465,7 +6055,53 @@ var init_anthropic = __esm({
5465
6055
  stream: true,
5466
6056
  ...options.extra
5467
6057
  };
5468
- return payload;
6058
+ return payload;
6059
+ }
6060
+ /**
6061
+ * Convert llmist content to Anthropic's content block format.
6062
+ * Handles text, images (base64 only), and applies cache_control.
6063
+ */
6064
+ convertToAnthropicContent(content, addCacheControl) {
6065
+ const parts = normalizeContent(content);
6066
+ return parts.map((part, index) => {
6067
+ const isLastPart = index === parts.length - 1;
6068
+ const cacheControl = addCacheControl && isLastPart ? { cache_control: { type: "ephemeral" } } : {};
6069
+ if (part.type === "text") {
6070
+ return {
6071
+ type: "text",
6072
+ text: part.text,
6073
+ ...cacheControl
6074
+ };
6075
+ }
6076
+ if (part.type === "image") {
6077
+ return this.convertImagePart(part, cacheControl);
6078
+ }
6079
+ if (part.type === "audio") {
6080
+ throw new Error(
6081
+ "Anthropic does not support audio input. Use Google Gemini for audio processing."
6082
+ );
6083
+ }
6084
+ throw new Error(`Unsupported content type: ${part.type}`);
6085
+ });
6086
+ }
6087
+ /**
6088
+ * Convert an image content part to Anthropic's image block format.
6089
+ */
6090
+ convertImagePart(part, cacheControl) {
6091
+ if (part.source.type === "url") {
6092
+ throw new Error(
6093
+ "Anthropic does not support image URLs. Please provide base64-encoded image data instead."
6094
+ );
6095
+ }
6096
+ return {
6097
+ type: "image",
6098
+ source: {
6099
+ type: "base64",
6100
+ media_type: part.source.mediaType,
6101
+ data: part.source.data
6102
+ },
6103
+ ...cacheControl
6104
+ };
5469
6105
  }
5470
6106
  async executeStreamRequest(payload, signal) {
5471
6107
  const client = this.client;
@@ -5549,17 +6185,12 @@ var init_anthropic = __esm({
5549
6185
  async countTokens(messages, descriptor, _spec) {
5550
6186
  const client = this.client;
5551
6187
  const systemMessages = messages.filter((message) => message.role === "system");
5552
- const system = systemMessages.length > 0 ? systemMessages.map((m) => m.content).join("\n\n") : void 0;
6188
+ const system = systemMessages.length > 0 ? systemMessages.map((m) => extractText(m.content)).join("\n\n") : void 0;
5553
6189
  const conversation = messages.filter(
5554
6190
  (message) => message.role !== "system"
5555
6191
  ).map((message) => ({
5556
6192
  role: message.role,
5557
- content: [
5558
- {
5559
- type: "text",
5560
- text: message.content
5561
- }
5562
- ]
6193
+ content: this.convertToAnthropicContent(message.content, false)
5563
6194
  }));
5564
6195
  try {
5565
6196
  const response = await client.messages.countTokens({
@@ -5573,8 +6204,19 @@ var init_anthropic = __esm({
5573
6204
  `Token counting failed for ${descriptor.name}, using fallback estimation:`,
5574
6205
  error
5575
6206
  );
5576
- const totalChars = messages.reduce((sum, msg) => sum + (msg.content?.length ?? 0), 0);
5577
- return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN);
6207
+ let totalChars = 0;
6208
+ let imageCount = 0;
6209
+ for (const msg of messages) {
6210
+ const parts = normalizeContent(msg.content);
6211
+ for (const part of parts) {
6212
+ if (part.type === "text") {
6213
+ totalChars += part.text.length;
6214
+ } else if (part.type === "image") {
6215
+ imageCount++;
6216
+ }
6217
+ }
6218
+ }
6219
+ return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN) + imageCount * 1e3;
5578
6220
  }
5579
6221
  }
5580
6222
  };
@@ -6103,6 +6745,7 @@ var init_gemini = __esm({
6103
6745
  "src/providers/gemini.ts"() {
6104
6746
  "use strict";
6105
6747
  import_genai = require("@google/genai");
6748
+ init_messages();
6106
6749
  init_base_provider();
6107
6750
  init_constants2();
6108
6751
  init_gemini_image_models();
@@ -6272,7 +6915,7 @@ var init_gemini = __esm({
6272
6915
  };
6273
6916
  return {
6274
6917
  model: descriptor.name,
6275
- contents: this.convertContentsForNewSDK(contents),
6918
+ contents,
6276
6919
  config
6277
6920
  };
6278
6921
  }
@@ -6307,18 +6950,25 @@ var init_gemini = __esm({
6307
6950
  if (message.role === "system") {
6308
6951
  expandedMessages.push({
6309
6952
  role: "user",
6310
- content: message.content
6953
+ content: extractText(message.content)
6311
6954
  });
6312
6955
  expandedMessages.push({
6313
6956
  role: "assistant",
6314
6957
  content: "Understood."
6315
6958
  });
6316
6959
  } else {
6317
- expandedMessages.push(message);
6960
+ expandedMessages.push({
6961
+ role: message.role,
6962
+ content: message.content
6963
+ });
6318
6964
  }
6319
6965
  }
6320
6966
  return this.mergeConsecutiveMessages(expandedMessages);
6321
6967
  }
6968
+ /**
6969
+ * Merge consecutive messages with the same role (required by Gemini).
6970
+ * Handles multimodal content by converting to Gemini's part format.
6971
+ */
6322
6972
  mergeConsecutiveMessages(messages) {
6323
6973
  if (messages.length === 0) {
6324
6974
  return [];
@@ -6327,15 +6977,16 @@ var init_gemini = __esm({
6327
6977
  let currentGroup = null;
6328
6978
  for (const message of messages) {
6329
6979
  const geminiRole = GEMINI_ROLE_MAP[message.role];
6980
+ const geminiParts = this.convertToGeminiParts(message.content);
6330
6981
  if (currentGroup && currentGroup.role === geminiRole) {
6331
- currentGroup.parts.push({ text: message.content });
6982
+ currentGroup.parts.push(...geminiParts);
6332
6983
  } else {
6333
6984
  if (currentGroup) {
6334
6985
  result.push(currentGroup);
6335
6986
  }
6336
6987
  currentGroup = {
6337
6988
  role: geminiRole,
6338
- parts: [{ text: message.content }]
6989
+ parts: geminiParts
6339
6990
  };
6340
6991
  }
6341
6992
  }
@@ -6344,11 +6995,39 @@ var init_gemini = __esm({
6344
6995
  }
6345
6996
  return result;
6346
6997
  }
6347
- convertContentsForNewSDK(contents) {
6348
- return contents.map((content) => ({
6349
- role: content.role,
6350
- parts: content.parts.map((part) => ({ text: part.text }))
6351
- }));
6998
+ /**
6999
+ * Convert llmist content to Gemini's part format.
7000
+ * Handles text, images, and audio (Gemini supports all three).
7001
+ */
7002
+ convertToGeminiParts(content) {
7003
+ const parts = normalizeContent(content);
7004
+ return parts.map((part) => {
7005
+ if (part.type === "text") {
7006
+ return { text: part.text };
7007
+ }
7008
+ if (part.type === "image") {
7009
+ if (part.source.type === "url") {
7010
+ throw new Error(
7011
+ "Gemini does not support image URLs directly. Please provide base64-encoded image data."
7012
+ );
7013
+ }
7014
+ return {
7015
+ inlineData: {
7016
+ mimeType: part.source.mediaType,
7017
+ data: part.source.data
7018
+ }
7019
+ };
7020
+ }
7021
+ if (part.type === "audio") {
7022
+ return {
7023
+ inlineData: {
7024
+ mimeType: part.source.mediaType,
7025
+ data: part.source.data
7026
+ }
7027
+ };
7028
+ }
7029
+ throw new Error(`Unsupported content type: ${part.type}`);
7030
+ });
6352
7031
  }
6353
7032
  buildGenerationConfig(options) {
6354
7033
  const config = {};
@@ -6369,9 +7048,9 @@ var init_gemini = __esm({
6369
7048
  async *wrapStream(iterable) {
6370
7049
  const stream2 = iterable;
6371
7050
  for await (const chunk of stream2) {
6372
- const text = this.extractText(chunk);
6373
- if (text) {
6374
- yield { text, rawEvent: chunk };
7051
+ const text3 = this.extractText(chunk);
7052
+ if (text3) {
7053
+ yield { text: text3, rawEvent: chunk };
6375
7054
  }
6376
7055
  const finishReason = this.extractFinishReason(chunk);
6377
7056
  const usage = this.extractUsage(chunk);
@@ -6432,7 +7111,7 @@ var init_gemini = __esm({
6432
7111
  try {
6433
7112
  const response = await client.models.countTokens({
6434
7113
  model: descriptor.name,
6435
- contents: this.convertContentsForNewSDK(contents)
7114
+ contents
6436
7115
  // Note: systemInstruction not used - it's not supported by countTokens()
6437
7116
  // and would cause a 2100% token counting error
6438
7117
  });
@@ -6442,8 +7121,19 @@ var init_gemini = __esm({
6442
7121
  `Token counting failed for ${descriptor.name}, using fallback estimation:`,
6443
7122
  error
6444
7123
  );
6445
- const totalChars = messages.reduce((sum, msg) => sum + (msg.content?.length ?? 0), 0);
6446
- return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN);
7124
+ let totalChars = 0;
7125
+ let mediaCount = 0;
7126
+ for (const msg of messages) {
7127
+ const parts = normalizeContent(msg.content);
7128
+ for (const part of parts) {
7129
+ if (part.type === "text") {
7130
+ totalChars += part.text.length;
7131
+ } else if (part.type === "image" || part.type === "audio") {
7132
+ mediaCount++;
7133
+ }
7134
+ }
7135
+ }
7136
+ return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN) + mediaCount * 258;
6447
7137
  }
6448
7138
  }
6449
7139
  };
@@ -7086,6 +7776,7 @@ var init_openai = __esm({
7086
7776
  "use strict";
7087
7777
  import_openai = __toESM(require("openai"), 1);
7088
7778
  import_tiktoken = require("tiktoken");
7779
+ init_messages();
7089
7780
  init_base_provider();
7090
7781
  init_constants2();
7091
7782
  init_openai_image_models();
@@ -7193,11 +7884,7 @@ var init_openai = __esm({
7193
7884
  const sanitizedExtra = sanitizeExtra(extra, shouldIncludeTemperature);
7194
7885
  return {
7195
7886
  model: descriptor.name,
7196
- messages: messages.map((message) => ({
7197
- role: ROLE_MAP[message.role],
7198
- content: message.content,
7199
- name: message.name
7200
- })),
7887
+ messages: messages.map((message) => this.convertToOpenAIMessage(message)),
7201
7888
  // Only set max_completion_tokens if explicitly provided
7202
7889
  // Otherwise let the API use "as much as fits" in the context window
7203
7890
  ...maxTokens !== void 0 ? { max_completion_tokens: maxTokens } : {},
@@ -7209,6 +7896,77 @@ var init_openai = __esm({
7209
7896
  ...shouldIncludeTemperature ? { temperature } : {}
7210
7897
  };
7211
7898
  }
7899
+ /**
7900
+ * Convert an LLMMessage to OpenAI's ChatCompletionMessageParam.
7901
+ * Handles role-specific content type requirements:
7902
+ * - system/assistant: string content only
7903
+ * - user: string or multimodal array content
7904
+ */
7905
+ convertToOpenAIMessage(message) {
7906
+ const role = ROLE_MAP[message.role];
7907
+ if (role === "user") {
7908
+ const content = this.convertToOpenAIContent(message.content);
7909
+ return {
7910
+ role: "user",
7911
+ content,
7912
+ ...message.name ? { name: message.name } : {}
7913
+ };
7914
+ }
7915
+ const textContent = typeof message.content === "string" ? message.content : extractText(message.content);
7916
+ if (role === "system") {
7917
+ return {
7918
+ role: "system",
7919
+ content: textContent,
7920
+ ...message.name ? { name: message.name } : {}
7921
+ };
7922
+ }
7923
+ return {
7924
+ role: "assistant",
7925
+ content: textContent,
7926
+ ...message.name ? { name: message.name } : {}
7927
+ };
7928
+ }
7929
+ /**
7930
+ * Convert llmist content to OpenAI's content format.
7931
+ * Optimizes by returning string for text-only content, array for multimodal.
7932
+ */
7933
+ convertToOpenAIContent(content) {
7934
+ if (typeof content === "string") {
7935
+ return content;
7936
+ }
7937
+ return content.map((part) => {
7938
+ if (part.type === "text") {
7939
+ return { type: "text", text: part.text };
7940
+ }
7941
+ if (part.type === "image") {
7942
+ return this.convertImagePart(part);
7943
+ }
7944
+ if (part.type === "audio") {
7945
+ throw new Error(
7946
+ "OpenAI chat completions do not support audio input. Use Whisper for transcription or Gemini for audio understanding."
7947
+ );
7948
+ }
7949
+ throw new Error(`Unsupported content type: ${part.type}`);
7950
+ });
7951
+ }
7952
+ /**
7953
+ * Convert an image content part to OpenAI's image_url format.
7954
+ * Supports both URLs and base64 data URLs.
7955
+ */
7956
+ convertImagePart(part) {
7957
+ if (part.source.type === "url") {
7958
+ return {
7959
+ type: "image_url",
7960
+ image_url: { url: part.source.url }
7961
+ };
7962
+ }
7963
+ return {
7964
+ type: "image_url",
7965
+ image_url: {
7966
+ url: `data:${part.source.mediaType};base64,${part.source.data}`
7967
+ }
7968
+ };
7969
+ }
7212
7970
  async executeStreamRequest(payload, signal) {
7213
7971
  const client = this.client;
7214
7972
  const stream2 = await client.chat.completions.create(payload, signal ? { signal } : void 0);
@@ -7217,9 +7975,9 @@ var init_openai = __esm({
7217
7975
  async *wrapStream(iterable) {
7218
7976
  const stream2 = iterable;
7219
7977
  for await (const chunk of stream2) {
7220
- const text = chunk.choices.map((choice) => choice.delta?.content ?? "").join("");
7221
- if (text) {
7222
- yield { text, rawEvent: chunk };
7978
+ const text3 = chunk.choices.map((choice) => choice.delta?.content ?? "").join("");
7979
+ if (text3) {
7980
+ yield { text: text3, rawEvent: chunk };
7223
7981
  }
7224
7982
  const finishReason = chunk.choices.find((choice) => choice.finish_reason)?.finish_reason;
7225
7983
  const usage = chunk.usage ? {
@@ -7267,17 +8025,26 @@ var init_openai = __esm({
7267
8025
  }
7268
8026
  try {
7269
8027
  let tokenCount = 0;
8028
+ let imageCount = 0;
7270
8029
  for (const message of messages) {
7271
8030
  tokenCount += OPENAI_MESSAGE_OVERHEAD_TOKENS;
7272
8031
  const roleText = ROLE_MAP[message.role];
7273
8032
  tokenCount += encoding.encode(roleText).length;
7274
- tokenCount += encoding.encode(message.content ?? "").length;
8033
+ const textContent = extractText(message.content);
8034
+ tokenCount += encoding.encode(textContent).length;
8035
+ const parts = normalizeContent(message.content);
8036
+ for (const part of parts) {
8037
+ if (part.type === "image") {
8038
+ imageCount++;
8039
+ }
8040
+ }
7275
8041
  if (message.name) {
7276
8042
  tokenCount += encoding.encode(message.name).length;
7277
8043
  tokenCount += OPENAI_NAME_FIELD_OVERHEAD_TOKENS;
7278
8044
  }
7279
8045
  }
7280
8046
  tokenCount += OPENAI_REPLY_PRIMING_TOKENS;
8047
+ tokenCount += imageCount * 765;
7281
8048
  return tokenCount;
7282
8049
  } finally {
7283
8050
  encoding.free();
@@ -7287,8 +8054,19 @@ var init_openai = __esm({
7287
8054
  `Token counting failed for ${descriptor.name}, using fallback estimation:`,
7288
8055
  error
7289
8056
  );
7290
- const totalChars = messages.reduce((sum, msg) => sum + (msg.content?.length ?? 0), 0);
7291
- return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN);
8057
+ let totalChars = 0;
8058
+ let imageCount = 0;
8059
+ for (const msg of messages) {
8060
+ const parts = normalizeContent(msg.content);
8061
+ for (const part of parts) {
8062
+ if (part.type === "text") {
8063
+ totalChars += part.text.length;
8064
+ } else if (part.type === "image") {
8065
+ imageCount++;
8066
+ }
8067
+ }
8068
+ }
8069
+ return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN) + imageCount * 765;
7292
8070
  }
7293
8071
  }
7294
8072
  };
@@ -7711,6 +8489,138 @@ var init_text = __esm({
7711
8489
  }
7712
8490
  });
7713
8491
 
8492
+ // src/core/namespaces/vision.ts
8493
+ var VisionNamespace;
8494
+ var init_vision = __esm({
8495
+ "src/core/namespaces/vision.ts"() {
8496
+ "use strict";
8497
+ init_input_content();
8498
+ init_messages();
8499
+ VisionNamespace = class {
8500
+ constructor(client) {
8501
+ this.client = client;
8502
+ }
8503
+ /**
8504
+ * Build a message builder with the image content attached.
8505
+ * Handles URLs, data URLs, base64 strings, and binary buffers.
8506
+ */
8507
+ buildImageMessage(options) {
8508
+ const builder = new LLMMessageBuilder();
8509
+ if (options.systemPrompt) {
8510
+ builder.addSystem(options.systemPrompt);
8511
+ }
8512
+ if (typeof options.image === "string") {
8513
+ if (options.image.startsWith("http://") || options.image.startsWith("https://")) {
8514
+ builder.addUserWithImageUrl(options.prompt, options.image);
8515
+ } else if (isDataUrl(options.image)) {
8516
+ const parsed = parseDataUrl(options.image);
8517
+ if (!parsed) {
8518
+ throw new Error("Invalid data URL format");
8519
+ }
8520
+ builder.addUserWithImage(
8521
+ options.prompt,
8522
+ parsed.data,
8523
+ parsed.mimeType
8524
+ );
8525
+ } else {
8526
+ const buffer = Buffer.from(options.image, "base64");
8527
+ builder.addUserWithImage(options.prompt, buffer, options.mimeType);
8528
+ }
8529
+ } else {
8530
+ builder.addUserWithImage(options.prompt, options.image, options.mimeType);
8531
+ }
8532
+ return builder;
8533
+ }
8534
+ /**
8535
+ * Stream the response and collect text and usage information.
8536
+ */
8537
+ async streamAndCollect(options, builder) {
8538
+ let response = "";
8539
+ let finalUsage;
8540
+ for await (const chunk of this.client.stream({
8541
+ model: options.model,
8542
+ messages: builder.build(),
8543
+ maxTokens: options.maxTokens,
8544
+ temperature: options.temperature
8545
+ })) {
8546
+ response += chunk.text;
8547
+ if (chunk.usage) {
8548
+ finalUsage = {
8549
+ inputTokens: chunk.usage.inputTokens,
8550
+ outputTokens: chunk.usage.outputTokens,
8551
+ totalTokens: chunk.usage.totalTokens
8552
+ };
8553
+ }
8554
+ }
8555
+ return { text: response.trim(), usage: finalUsage };
8556
+ }
8557
+ /**
8558
+ * Analyze an image with a vision-capable model.
8559
+ * Returns the analysis as a string.
8560
+ *
8561
+ * @param options - Vision analysis options
8562
+ * @returns Promise resolving to the analysis text
8563
+ * @throws Error if the image format is unsupported or model doesn't support vision
8564
+ *
8565
+ * @example
8566
+ * ```typescript
8567
+ * // From file
8568
+ * const result = await llmist.vision.analyze({
8569
+ * model: "gpt-4o",
8570
+ * image: await fs.readFile("photo.jpg"),
8571
+ * prompt: "What's in this image?",
8572
+ * });
8573
+ *
8574
+ * // From URL (OpenAI only)
8575
+ * const result = await llmist.vision.analyze({
8576
+ * model: "gpt-4o",
8577
+ * image: "https://example.com/image.jpg",
8578
+ * prompt: "Describe this image",
8579
+ * });
8580
+ * ```
8581
+ */
8582
+ async analyze(options) {
8583
+ const builder = this.buildImageMessage(options);
8584
+ const { text: text3 } = await this.streamAndCollect(options, builder);
8585
+ return text3;
8586
+ }
8587
+ /**
8588
+ * Analyze an image and return detailed result with usage info.
8589
+ *
8590
+ * @param options - Vision analysis options
8591
+ * @returns Promise resolving to the analysis result with usage info
8592
+ */
8593
+ async analyzeWithUsage(options) {
8594
+ const builder = this.buildImageMessage(options);
8595
+ const { text: text3, usage } = await this.streamAndCollect(options, builder);
8596
+ return {
8597
+ text: text3,
8598
+ model: options.model,
8599
+ usage
8600
+ };
8601
+ }
8602
+ /**
8603
+ * Check if a model supports vision/image input.
8604
+ *
8605
+ * @param modelId - Model ID to check
8606
+ * @returns True if the model supports vision
8607
+ */
8608
+ supportsModel(modelId) {
8609
+ const spec = this.client.modelRegistry.getModelSpec(modelId);
8610
+ return spec?.features?.vision === true;
8611
+ }
8612
+ /**
8613
+ * List all models that support vision.
8614
+ *
8615
+ * @returns Array of model IDs that support vision
8616
+ */
8617
+ listModels() {
8618
+ return this.client.modelRegistry.listModels().filter((spec) => spec.features?.vision === true).map((spec) => spec.modelId);
8619
+ }
8620
+ };
8621
+ }
8622
+ });
8623
+
7714
8624
  // src/core/options.ts
7715
8625
  var ModelIdentifierParser;
7716
8626
  var init_options = __esm({
@@ -7755,6 +8665,7 @@ var init_client = __esm({
7755
8665
  init_image();
7756
8666
  init_speech();
7757
8667
  init_text();
8668
+ init_vision();
7758
8669
  init_options();
7759
8670
  init_quick_methods();
7760
8671
  LLMist = class _LLMist {
@@ -7766,6 +8677,7 @@ var init_client = __esm({
7766
8677
  text;
7767
8678
  image;
7768
8679
  speech;
8680
+ vision;
7769
8681
  constructor(...args) {
7770
8682
  let adapters = [];
7771
8683
  let defaultProvider;
@@ -7816,6 +8728,7 @@ var init_client = __esm({
7816
8728
  this.text = new TextNamespace(this);
7817
8729
  this.image = new ImageNamespace(this.adapters, this.defaultProvider);
7818
8730
  this.speech = new SpeechNamespace(this.adapters, this.defaultProvider);
8731
+ this.vision = new VisionNamespace(this);
7819
8732
  }
7820
8733
  stream(options) {
7821
8734
  const descriptor = this.parser.parse(options.model);
@@ -8282,9 +9195,9 @@ function sleep(ms) {
8282
9195
  function generateInvocationId() {
8283
9196
  return `inv-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`;
8284
9197
  }
8285
- function splitIntoChunks(text, minChunkSize = 5, maxChunkSize = 30) {
9198
+ function splitIntoChunks(text3, minChunkSize = 5, maxChunkSize = 30) {
8286
9199
  const chunks = [];
8287
- let remaining = text;
9200
+ let remaining = text3;
8288
9201
  while (remaining.length > 0) {
8289
9202
  const chunkSize = Math.min(
8290
9203
  Math.floor(Math.random() * (maxChunkSize - minChunkSize + 1)) + minChunkSize,
@@ -8343,17 +9256,17 @@ ${String(value)}
8343
9256
  return result;
8344
9257
  }
8345
9258
  function formatGadgetCalls(gadgetCalls) {
8346
- let text = "";
9259
+ let text3 = "";
8347
9260
  const calls = [];
8348
9261
  for (const call of gadgetCalls) {
8349
9262
  const invocationId = call.invocationId ?? generateInvocationId();
8350
9263
  calls.push({ name: call.gadgetName, invocationId });
8351
9264
  const blockParams = serializeToBlockFormat(call.parameters);
8352
- text += `
9265
+ text3 += `
8353
9266
  ${GADGET_START_PREFIX}${call.gadgetName}
8354
9267
  ${blockParams}${GADGET_END_PREFIX}`;
8355
9268
  }
8356
- return { text, calls };
9269
+ return { text: text3, calls };
8357
9270
  }
8358
9271
  async function* createMockStream(response) {
8359
9272
  if (response.delayMs) {
@@ -8393,9 +9306,9 @@ async function* createMockStream(response) {
8393
9306
  };
8394
9307
  }
8395
9308
  }
8396
- function createTextMockStream(text, options) {
9309
+ function createTextMockStream(text3, options) {
8397
9310
  return createMockStream({
8398
- text,
9311
+ text: text3,
8399
9312
  delayMs: options?.delayMs,
8400
9313
  streamDelayMs: options?.streamDelayMs,
8401
9314
  usage: options?.usage,
@@ -8412,10 +9325,10 @@ var MockProviderAdapter = class {
8412
9325
  constructor(options) {
8413
9326
  this.mockManager = getMockManager(options);
8414
9327
  }
8415
- supports(descriptor) {
9328
+ supports(_descriptor) {
8416
9329
  return true;
8417
9330
  }
8418
- stream(options, descriptor, spec) {
9331
+ stream(options, descriptor, _spec) {
8419
9332
  const context = {
8420
9333
  model: options.model,
8421
9334
  provider: descriptor.provider,
@@ -8426,20 +9339,154 @@ var MockProviderAdapter = class {
8426
9339
  return this.createMockStreamFromContext(context);
8427
9340
  }
8428
9341
  async *createMockStreamFromContext(context) {
8429
- try {
8430
- const mockResponse = await this.mockManager.findMatch(context);
8431
- if (!mockResponse) {
8432
- yield {
8433
- text: "",
8434
- finishReason: "stop",
8435
- usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 }
8436
- };
8437
- return;
8438
- }
8439
- yield* createMockStream(mockResponse);
8440
- } catch (error) {
8441
- throw error;
9342
+ const mockResponse = await this.mockManager.findMatch(context);
9343
+ if (!mockResponse) {
9344
+ yield {
9345
+ text: "",
9346
+ finishReason: "stop",
9347
+ usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 }
9348
+ };
9349
+ return;
9350
+ }
9351
+ yield* createMockStream(mockResponse);
9352
+ }
9353
+ // ==========================================================================
9354
+ // Image Generation Support
9355
+ // ==========================================================================
9356
+ /**
9357
+ * Check if this adapter supports image generation for a given model.
9358
+ * Returns true if there's a registered mock with images for this model.
9359
+ */
9360
+ supportsImageGeneration(_modelId) {
9361
+ return true;
9362
+ }
9363
+ /**
9364
+ * Generate mock images based on registered mocks.
9365
+ *
9366
+ * @param options - Image generation options
9367
+ * @returns Mock image generation result
9368
+ */
9369
+ async generateImage(options) {
9370
+ const context = {
9371
+ model: options.model,
9372
+ provider: "mock",
9373
+ modelName: options.model,
9374
+ options: {
9375
+ model: options.model,
9376
+ messages: [{ role: "user", content: options.prompt }]
9377
+ },
9378
+ messages: [{ role: "user", content: options.prompt }]
9379
+ };
9380
+ const mockResponse = await this.mockManager.findMatch(context);
9381
+ if (!mockResponse?.images || mockResponse.images.length === 0) {
9382
+ throw new Error(
9383
+ `No mock registered for image generation with model "${options.model}". Use mockLLM().forModel("${options.model}").returnsImage(...).register() to add one.`
9384
+ );
9385
+ }
9386
+ return this.createImageResult(options, mockResponse);
9387
+ }
9388
+ /**
9389
+ * Transform mock response into ImageGenerationResult format.
9390
+ *
9391
+ * @param options - Original image generation options
9392
+ * @param mockResponse - Mock response containing image data
9393
+ * @returns ImageGenerationResult with mock data and zero cost
9394
+ */
9395
+ createImageResult(options, mockResponse) {
9396
+ const images = mockResponse.images ?? [];
9397
+ return {
9398
+ images: images.map((img) => ({
9399
+ b64Json: img.data,
9400
+ revisedPrompt: img.revisedPrompt
9401
+ })),
9402
+ model: options.model,
9403
+ usage: {
9404
+ imagesGenerated: images.length,
9405
+ size: options.size ?? "1024x1024",
9406
+ quality: options.quality ?? "standard"
9407
+ },
9408
+ cost: 0
9409
+ // Mock cost is always 0
9410
+ };
9411
+ }
9412
+ // ==========================================================================
9413
+ // Speech Generation Support
9414
+ // ==========================================================================
9415
+ /**
9416
+ * Check if this adapter supports speech generation for a given model.
9417
+ * Returns true if there's a registered mock with audio for this model.
9418
+ */
9419
+ supportsSpeechGeneration(_modelId) {
9420
+ return true;
9421
+ }
9422
+ /**
9423
+ * Generate mock speech based on registered mocks.
9424
+ *
9425
+ * @param options - Speech generation options
9426
+ * @returns Mock speech generation result
9427
+ */
9428
+ async generateSpeech(options) {
9429
+ const context = {
9430
+ model: options.model,
9431
+ provider: "mock",
9432
+ modelName: options.model,
9433
+ options: {
9434
+ model: options.model,
9435
+ messages: [{ role: "user", content: options.input }]
9436
+ },
9437
+ messages: [{ role: "user", content: options.input }]
9438
+ };
9439
+ const mockResponse = await this.mockManager.findMatch(context);
9440
+ if (!mockResponse?.audio) {
9441
+ throw new Error(
9442
+ `No mock registered for speech generation with model "${options.model}". Use mockLLM().forModel("${options.model}").returnsAudio(...).register() to add one.`
9443
+ );
9444
+ }
9445
+ return this.createSpeechResult(options, mockResponse);
9446
+ }
9447
+ /**
9448
+ * Transform mock response into SpeechGenerationResult format.
9449
+ * Converts base64 audio data to ArrayBuffer.
9450
+ *
9451
+ * @param options - Original speech generation options
9452
+ * @param mockResponse - Mock response containing audio data
9453
+ * @returns SpeechGenerationResult with mock data and zero cost
9454
+ */
9455
+ createSpeechResult(options, mockResponse) {
9456
+ const audio = mockResponse.audio;
9457
+ const binaryString = atob(audio.data);
9458
+ const bytes = new Uint8Array(binaryString.length);
9459
+ for (let i = 0; i < binaryString.length; i++) {
9460
+ bytes[i] = binaryString.charCodeAt(i);
8442
9461
  }
9462
+ const format = this.mimeTypeToAudioFormat(audio.mimeType);
9463
+ return {
9464
+ audio: bytes.buffer,
9465
+ model: options.model,
9466
+ usage: {
9467
+ characterCount: options.input.length
9468
+ },
9469
+ cost: 0,
9470
+ // Mock cost is always 0
9471
+ format
9472
+ };
9473
+ }
9474
+ /**
9475
+ * Map MIME type to audio format for SpeechGenerationResult.
9476
+ * Defaults to "mp3" for unknown MIME types.
9477
+ *
9478
+ * @param mimeType - Audio MIME type string
9479
+ * @returns Audio format identifier
9480
+ */
9481
+ mimeTypeToAudioFormat(mimeType) {
9482
+ const mapping = {
9483
+ "audio/mp3": "mp3",
9484
+ "audio/mpeg": "mp3",
9485
+ "audio/wav": "wav",
9486
+ "audio/webm": "opus",
9487
+ "audio/ogg": "opus"
9488
+ };
9489
+ return mapping[mimeType] ?? "mp3";
8443
9490
  }
8444
9491
  };
8445
9492
  function createMockAdapter(options) {
@@ -8447,6 +9494,20 @@ function createMockAdapter(options) {
8447
9494
  }
8448
9495
 
8449
9496
  // src/testing/mock-builder.ts
9497
+ init_input_content();
9498
+ init_messages();
9499
+ function hasImageContent(content) {
9500
+ if (typeof content === "string") return false;
9501
+ return content.some((part) => isImagePart(part));
9502
+ }
9503
+ function hasAudioContent(content) {
9504
+ if (typeof content === "string") return false;
9505
+ return content.some((part) => isAudioPart(part));
9506
+ }
9507
+ function countImages(content) {
9508
+ if (typeof content === "string") return 0;
9509
+ return content.filter((part) => isImagePart(part)).length;
9510
+ }
8450
9511
  var MockBuilder = class {
8451
9512
  matchers = [];
8452
9513
  response = {};
@@ -8509,9 +9570,9 @@ var MockBuilder = class {
8509
9570
  * @example
8510
9571
  * mockLLM().whenMessageContains('hello')
8511
9572
  */
8512
- whenMessageContains(text) {
9573
+ whenMessageContains(text3) {
8513
9574
  this.matchers.push(
8514
- (ctx) => ctx.messages.some((msg) => msg.content?.toLowerCase().includes(text.toLowerCase()))
9575
+ (ctx) => ctx.messages.some((msg) => extractText(msg.content).toLowerCase().includes(text3.toLowerCase()))
8515
9576
  );
8516
9577
  return this;
8517
9578
  }
@@ -8521,10 +9582,11 @@ var MockBuilder = class {
8521
9582
  * @example
8522
9583
  * mockLLM().whenLastMessageContains('goodbye')
8523
9584
  */
8524
- whenLastMessageContains(text) {
9585
+ whenLastMessageContains(text3) {
8525
9586
  this.matchers.push((ctx) => {
8526
9587
  const lastMsg = ctx.messages[ctx.messages.length - 1];
8527
- return lastMsg?.content?.toLowerCase().includes(text.toLowerCase()) ?? false;
9588
+ if (!lastMsg) return false;
9589
+ return extractText(lastMsg.content).toLowerCase().includes(text3.toLowerCase());
8528
9590
  });
8529
9591
  return this;
8530
9592
  }
@@ -8535,7 +9597,7 @@ var MockBuilder = class {
8535
9597
  * mockLLM().whenMessageMatches(/calculate \d+/)
8536
9598
  */
8537
9599
  whenMessageMatches(regex) {
8538
- this.matchers.push((ctx) => ctx.messages.some((msg) => regex.test(msg.content ?? "")));
9600
+ this.matchers.push((ctx) => ctx.messages.some((msg) => regex.test(extractText(msg.content))));
8539
9601
  return this;
8540
9602
  }
8541
9603
  /**
@@ -8544,10 +9606,10 @@ var MockBuilder = class {
8544
9606
  * @example
8545
9607
  * mockLLM().whenRoleContains('system', 'You are a helpful assistant')
8546
9608
  */
8547
- whenRoleContains(role, text) {
9609
+ whenRoleContains(role, text3) {
8548
9610
  this.matchers.push(
8549
9611
  (ctx) => ctx.messages.some(
8550
- (msg) => msg.role === role && msg.content?.toLowerCase().includes(text.toLowerCase())
9612
+ (msg) => msg.role === role && extractText(msg.content).toLowerCase().includes(text3.toLowerCase())
8551
9613
  )
8552
9614
  );
8553
9615
  return this;
@@ -8575,6 +9637,43 @@ var MockBuilder = class {
8575
9637
  this.matchers.push(matcher);
8576
9638
  return this;
8577
9639
  }
9640
+ // ==========================================================================
9641
+ // Multimodal Matchers
9642
+ // ==========================================================================
9643
+ /**
9644
+ * Match when any message contains an image.
9645
+ *
9646
+ * @example
9647
+ * mockLLM().whenMessageHasImage().returns("I see an image of a sunset.")
9648
+ */
9649
+ whenMessageHasImage() {
9650
+ this.matchers.push((ctx) => ctx.messages.some((msg) => hasImageContent(msg.content)));
9651
+ return this;
9652
+ }
9653
+ /**
9654
+ * Match when any message contains audio.
9655
+ *
9656
+ * @example
9657
+ * mockLLM().whenMessageHasAudio().returns("I hear music playing.")
9658
+ */
9659
+ whenMessageHasAudio() {
9660
+ this.matchers.push((ctx) => ctx.messages.some((msg) => hasAudioContent(msg.content)));
9661
+ return this;
9662
+ }
9663
+ /**
9664
+ * Match based on the number of images in the last message.
9665
+ *
9666
+ * @example
9667
+ * mockLLM().whenImageCount((n) => n >= 2).returns("Comparing multiple images...")
9668
+ */
9669
+ whenImageCount(predicate) {
9670
+ this.matchers.push((ctx) => {
9671
+ const lastMsg = ctx.messages[ctx.messages.length - 1];
9672
+ if (!lastMsg) return false;
9673
+ return predicate(countImages(lastMsg.content));
9674
+ });
9675
+ return this;
9676
+ }
8578
9677
  /**
8579
9678
  * Set the text response to return.
8580
9679
  * Can be a static string or a function that returns a string dynamically.
@@ -8584,17 +9683,17 @@ var MockBuilder = class {
8584
9683
  * mockLLM().returns(() => `Response at ${Date.now()}`)
8585
9684
  * mockLLM().returns((ctx) => `You said: ${ctx.messages[0]?.content}`)
8586
9685
  */
8587
- returns(text) {
8588
- if (typeof text === "function") {
9686
+ returns(text3) {
9687
+ if (typeof text3 === "function") {
8589
9688
  this.response = async (ctx) => {
8590
- const resolvedText = await Promise.resolve().then(() => text(ctx));
9689
+ const resolvedText = await Promise.resolve().then(() => text3(ctx));
8591
9690
  return { text: resolvedText };
8592
9691
  };
8593
9692
  } else {
8594
9693
  if (typeof this.response === "function") {
8595
9694
  throw new Error("Cannot use returns() after withResponse() with a function");
8596
9695
  }
8597
- this.response.text = text;
9696
+ this.response.text = text3;
8598
9697
  }
8599
9698
  return this;
8600
9699
  }
@@ -8631,6 +9730,112 @@ var MockBuilder = class {
8631
9730
  this.response.gadgetCalls.push({ gadgetName, parameters });
8632
9731
  return this;
8633
9732
  }
9733
+ // ==========================================================================
9734
+ // Multimodal Response Helpers
9735
+ // ==========================================================================
9736
+ /**
9737
+ * Return a single image in the response.
9738
+ * Useful for mocking image generation endpoints.
9739
+ *
9740
+ * @param data - Image data (base64 string or Buffer)
9741
+ * @param mimeType - MIME type (auto-detected if Buffer provided without type)
9742
+ *
9743
+ * @example
9744
+ * mockLLM()
9745
+ * .forModel('dall-e-3')
9746
+ * .returnsImage(pngBuffer)
9747
+ * .register();
9748
+ */
9749
+ returnsImage(data, mimeType) {
9750
+ if (typeof this.response === "function") {
9751
+ throw new Error("Cannot use returnsImage() after withResponse() with a function");
9752
+ }
9753
+ let imageData;
9754
+ let imageMime;
9755
+ if (typeof data === "string") {
9756
+ imageData = data;
9757
+ if (!mimeType) {
9758
+ throw new Error("MIME type is required when providing base64 string data");
9759
+ }
9760
+ imageMime = mimeType;
9761
+ } else {
9762
+ imageData = toBase64(data);
9763
+ const detected = mimeType ?? detectImageMimeType(data);
9764
+ if (!detected) {
9765
+ throw new Error(
9766
+ "Could not detect image MIME type. Please provide the mimeType parameter explicitly."
9767
+ );
9768
+ }
9769
+ imageMime = detected;
9770
+ }
9771
+ if (!this.response.images) {
9772
+ this.response.images = [];
9773
+ }
9774
+ this.response.images.push({ data: imageData, mimeType: imageMime });
9775
+ return this;
9776
+ }
9777
+ /**
9778
+ * Return multiple images in the response.
9779
+ *
9780
+ * @example
9781
+ * mockLLM()
9782
+ * .forModel('dall-e-3')
9783
+ * .returnsImages([
9784
+ * { data: pngBuffer1 },
9785
+ * { data: pngBuffer2 },
9786
+ * ])
9787
+ * .register();
9788
+ */
9789
+ returnsImages(images) {
9790
+ for (const img of images) {
9791
+ this.returnsImage(img.data, img.mimeType);
9792
+ if (img.revisedPrompt && this.response && typeof this.response !== "function") {
9793
+ const lastImage = this.response.images?.[this.response.images.length - 1];
9794
+ if (lastImage) {
9795
+ lastImage.revisedPrompt = img.revisedPrompt;
9796
+ }
9797
+ }
9798
+ }
9799
+ return this;
9800
+ }
9801
+ /**
9802
+ * Return audio data in the response.
9803
+ * Useful for mocking speech synthesis endpoints.
9804
+ *
9805
+ * @param data - Audio data (base64 string or Buffer)
9806
+ * @param mimeType - MIME type (auto-detected if Buffer provided without type)
9807
+ *
9808
+ * @example
9809
+ * mockLLM()
9810
+ * .forModel('tts-1')
9811
+ * .returnsAudio(mp3Buffer)
9812
+ * .register();
9813
+ */
9814
+ returnsAudio(data, mimeType) {
9815
+ if (typeof this.response === "function") {
9816
+ throw new Error("Cannot use returnsAudio() after withResponse() with a function");
9817
+ }
9818
+ let audioData;
9819
+ let audioMime;
9820
+ if (typeof data === "string") {
9821
+ audioData = data;
9822
+ if (!mimeType) {
9823
+ throw new Error("MIME type is required when providing base64 string data");
9824
+ }
9825
+ audioMime = mimeType;
9826
+ } else {
9827
+ audioData = toBase64(data);
9828
+ const detected = mimeType ?? detectAudioMimeType(data);
9829
+ if (!detected) {
9830
+ throw new Error(
9831
+ "Could not detect audio MIME type. Please provide the mimeType parameter explicitly."
9832
+ );
9833
+ }
9834
+ audioMime = detected;
9835
+ }
9836
+ this.response.audio = { data: audioData, mimeType: audioMime };
9837
+ return this;
9838
+ }
8634
9839
  /**
8635
9840
  * Set the complete mock response object.
8636
9841
  * This allows full control over all response properties.
@@ -8961,23 +10166,23 @@ function createTestStream(chunks) {
8961
10166
  }
8962
10167
  }();
8963
10168
  }
8964
- function createTextStream(text, options) {
10169
+ function createTextStream(text3, options) {
8965
10170
  return async function* () {
8966
10171
  if (options?.delayMs) {
8967
10172
  await sleep2(options.delayMs);
8968
10173
  }
8969
- const chunkSize = options?.chunkSize ?? text.length;
10174
+ const chunkSize = options?.chunkSize ?? text3.length;
8970
10175
  const chunks = [];
8971
- for (let i = 0; i < text.length; i += chunkSize) {
8972
- chunks.push(text.slice(i, i + chunkSize));
10176
+ for (let i = 0; i < text3.length; i += chunkSize) {
10177
+ chunks.push(text3.slice(i, i + chunkSize));
8973
10178
  }
8974
10179
  for (let i = 0; i < chunks.length; i++) {
8975
10180
  const isLast = i === chunks.length - 1;
8976
10181
  const chunk = { text: chunks[i] };
8977
10182
  if (isLast) {
8978
10183
  chunk.finishReason = options?.finishReason ?? "stop";
8979
- const inputTokens = Math.ceil(text.length / 4);
8980
- const outputTokens = Math.ceil(text.length / 4);
10184
+ const inputTokens = Math.ceil(text3.length / 4);
10185
+ const outputTokens = Math.ceil(text3.length / 4);
8981
10186
  chunk.usage = options?.usage ?? {
8982
10187
  inputTokens,
8983
10188
  outputTokens,
@@ -8999,11 +10204,11 @@ async function collectStream(stream2) {
8999
10204
  return chunks;
9000
10205
  }
9001
10206
  async function collectStreamText(stream2) {
9002
- let text = "";
10207
+ let text3 = "";
9003
10208
  for await (const chunk of stream2) {
9004
- text += chunk.text ?? "";
10209
+ text3 += chunk.text ?? "";
9005
10210
  }
9006
- return text;
10211
+ return text3;
9007
10212
  }
9008
10213
  async function getStreamFinalChunk(stream2) {
9009
10214
  let lastChunk;