llmist 2.4.0 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -113,375 +113,229 @@ var init_constants = __esm({
113
113
  }
114
114
  });
115
115
 
116
- // src/core/model-shortcuts.ts
117
- function isKnownModelPattern(model) {
118
- const normalized = model.toLowerCase();
119
- if (MODEL_ALIASES[normalized]) {
120
- return true;
121
- }
122
- return KNOWN_MODEL_PATTERNS.some((pattern) => pattern.test(model));
116
+ // src/core/input-content.ts
117
+ function isTextPart(part) {
118
+ return part.type === "text";
123
119
  }
124
- function resolveModel(model, options = {}) {
125
- if (model.includes(":")) {
126
- return model;
127
- }
128
- const normalized = model.toLowerCase();
129
- if (MODEL_ALIASES[normalized]) {
130
- return MODEL_ALIASES[normalized];
131
- }
132
- const modelLower = model.toLowerCase();
133
- if (modelLower.startsWith("gpt")) {
134
- return `openai:${model}`;
120
+ function isImagePart(part) {
121
+ return part.type === "image";
122
+ }
123
+ function isAudioPart(part) {
124
+ return part.type === "audio";
125
+ }
126
+ function text(content) {
127
+ return { type: "text", text: content };
128
+ }
129
+ function imageFromBase64(data, mediaType) {
130
+ return {
131
+ type: "image",
132
+ source: { type: "base64", mediaType, data }
133
+ };
134
+ }
135
+ function imageFromUrl(url) {
136
+ return {
137
+ type: "image",
138
+ source: { type: "url", url }
139
+ };
140
+ }
141
+ function detectImageMimeType(data) {
142
+ const bytes = data instanceof Buffer ? data : Buffer.from(data);
143
+ for (const { bytes: magic, mimeType } of IMAGE_MAGIC_BYTES) {
144
+ if (bytes.length >= magic.length) {
145
+ let matches = true;
146
+ for (let i = 0; i < magic.length; i++) {
147
+ if (bytes[i] !== magic[i]) {
148
+ matches = false;
149
+ break;
150
+ }
151
+ }
152
+ if (matches) {
153
+ if (mimeType === "image/webp") {
154
+ if (bytes.length >= 12) {
155
+ const webpMarker = bytes[8] === 87 && bytes[9] === 69 && bytes[10] === 66 && bytes[11] === 80;
156
+ if (!webpMarker) continue;
157
+ }
158
+ }
159
+ return mimeType;
160
+ }
161
+ }
135
162
  }
136
- if (modelLower.startsWith("claude")) {
137
- return `anthropic:${model}`;
163
+ return null;
164
+ }
165
+ function detectAudioMimeType(data) {
166
+ const bytes = data instanceof Buffer ? data : Buffer.from(data);
167
+ for (const { bytes: magic, mimeType } of AUDIO_MAGIC_BYTES) {
168
+ if (bytes.length >= magic.length) {
169
+ let matches = true;
170
+ for (let i = 0; i < magic.length; i++) {
171
+ if (bytes[i] !== magic[i]) {
172
+ matches = false;
173
+ break;
174
+ }
175
+ }
176
+ if (matches) {
177
+ if (mimeType === "audio/wav") {
178
+ if (bytes.length >= 12) {
179
+ const waveMarker = bytes[8] === 87 && bytes[9] === 65 && bytes[10] === 86 && bytes[11] === 69;
180
+ if (!waveMarker) continue;
181
+ }
182
+ }
183
+ return mimeType;
184
+ }
185
+ }
138
186
  }
139
- if (modelLower.startsWith("gemini")) {
140
- return `gemini:${model}`;
187
+ return null;
188
+ }
189
+ function toBase64(data) {
190
+ if (typeof data === "string") {
191
+ return data;
141
192
  }
142
- if (modelLower.match(/^o\d/)) {
143
- return `openai:${model}`;
193
+ return Buffer.from(data).toString("base64");
194
+ }
195
+ function imageFromBuffer(buffer, mediaType) {
196
+ const detectedType = mediaType ?? detectImageMimeType(buffer);
197
+ if (!detectedType) {
198
+ throw new Error(
199
+ "Could not detect image MIME type. Please provide the mediaType parameter explicitly."
200
+ );
144
201
  }
145
- if (!isKnownModelPattern(model)) {
146
- if (options.strict) {
147
- throw new Error(
148
- `Unknown model '${model}'. Did you mean one of: gpt4, sonnet, haiku, flash? Use explicit provider prefix like 'openai:${model}' to bypass this check.`
149
- );
150
- }
151
- if (!options.silent) {
152
- console.warn(
153
- `\u26A0\uFE0F Unknown model '${model}', falling back to 'openai:${model}'. This might be a typo. Did you mean: gpt4, gpt5, gpt5-nano, sonnet, haiku, flash? Use { strict: true } to error on unknown models, or { silent: true } to suppress this warning.`
154
- );
202
+ return {
203
+ type: "image",
204
+ source: {
205
+ type: "base64",
206
+ mediaType: detectedType,
207
+ data: toBase64(buffer)
155
208
  }
156
- }
157
- return `openai:${model}`;
209
+ };
158
210
  }
159
- function hasProviderPrefix(model) {
160
- return model.includes(":");
211
+ function audioFromBase64(data, mediaType) {
212
+ return {
213
+ type: "audio",
214
+ source: { type: "base64", mediaType, data }
215
+ };
161
216
  }
162
- function getProvider(model) {
163
- const separatorIndex = model.indexOf(":");
164
- if (separatorIndex === -1) {
165
- return void 0;
217
+ function audioFromBuffer(buffer, mediaType) {
218
+ const detectedType = mediaType ?? detectAudioMimeType(buffer);
219
+ if (!detectedType) {
220
+ throw new Error(
221
+ "Could not detect audio MIME type. Please provide the mediaType parameter explicitly."
222
+ );
166
223
  }
167
- return model.slice(0, separatorIndex);
224
+ return {
225
+ type: "audio",
226
+ source: {
227
+ type: "base64",
228
+ mediaType: detectedType,
229
+ data: toBase64(buffer)
230
+ }
231
+ };
168
232
  }
169
- function getModelId(model) {
170
- const separatorIndex = model.indexOf(":");
171
- if (separatorIndex === -1) {
172
- return model;
173
- }
174
- return model.slice(separatorIndex + 1);
233
+ function isDataUrl(input) {
234
+ return input.startsWith("data:");
175
235
  }
176
- var MODEL_ALIASES, KNOWN_MODEL_PATTERNS;
177
- var init_model_shortcuts = __esm({
178
- "src/core/model-shortcuts.ts"() {
236
+ function parseDataUrl(url) {
237
+ const match = url.match(/^data:([^;]+);base64,(.+)$/);
238
+ if (!match) return null;
239
+ return { mimeType: match[1], data: match[2] };
240
+ }
241
+ var IMAGE_MAGIC_BYTES, AUDIO_MAGIC_BYTES;
242
+ var init_input_content = __esm({
243
+ "src/core/input-content.ts"() {
179
244
  "use strict";
180
- MODEL_ALIASES = {
181
- // OpenAI aliases
182
- gpt4: "openai:gpt-4o",
183
- gpt4o: "openai:gpt-4o",
184
- gpt5: "openai:gpt-5",
185
- "gpt5-mini": "openai:gpt-5-mini",
186
- "gpt5-nano": "openai:gpt-5-nano",
187
- // Anthropic aliases
188
- sonnet: "anthropic:claude-sonnet-4-5",
189
- "claude-sonnet": "anthropic:claude-sonnet-4-5",
190
- haiku: "anthropic:claude-haiku-4-5",
191
- "claude-haiku": "anthropic:claude-haiku-4-5",
192
- opus: "anthropic:claude-opus-4-5",
193
- "claude-opus": "anthropic:claude-opus-4-5",
194
- // Gemini aliases
195
- flash: "gemini:gemini-2.0-flash",
196
- "gemini-flash": "gemini:gemini-2.0-flash",
197
- "gemini-pro": "gemini:gemini-2.5-pro",
198
- pro: "gemini:gemini-2.5-pro"
199
- };
200
- KNOWN_MODEL_PATTERNS = [
201
- /^gpt-?\d/i,
202
- // gpt-4, gpt-3.5, gpt4, etc.
203
- /^claude-?\d/i,
204
- // claude-3, claude-2, etc.
205
- /^gemini-?(\d|pro|flash)/i,
206
- // gemini-2.0, gemini-pro, gemini-flash, etc.
207
- /^o\d/i
208
- // OpenAI o1, o3, etc.
245
+ IMAGE_MAGIC_BYTES = [
246
+ { bytes: [255, 216, 255], mimeType: "image/jpeg" },
247
+ { bytes: [137, 80, 78, 71], mimeType: "image/png" },
248
+ { bytes: [71, 73, 70, 56], mimeType: "image/gif" },
249
+ // WebP starts with RIFF....WEBP
250
+ { bytes: [82, 73, 70, 70], mimeType: "image/webp" }
251
+ ];
252
+ AUDIO_MAGIC_BYTES = [
253
+ // MP3 frame sync
254
+ { bytes: [255, 251], mimeType: "audio/mp3" },
255
+ { bytes: [255, 250], mimeType: "audio/mp3" },
256
+ // ID3 tag (MP3)
257
+ { bytes: [73, 68, 51], mimeType: "audio/mp3" },
258
+ // OGG
259
+ { bytes: [79, 103, 103, 83], mimeType: "audio/ogg" },
260
+ // WAV (RIFF)
261
+ { bytes: [82, 73, 70, 70], mimeType: "audio/wav" },
262
+ // WebM
263
+ { bytes: [26, 69, 223, 163], mimeType: "audio/webm" }
209
264
  ];
210
265
  }
211
266
  });
212
267
 
213
- // src/gadgets/schema-validator.ts
214
- import * as z from "zod";
215
- function validateGadgetSchema(schema, gadgetName) {
216
- let jsonSchema;
217
- try {
218
- jsonSchema = z.toJSONSchema(schema, { target: "draft-7" });
219
- } catch (error) {
220
- const errorMessage = error instanceof Error ? error.message : String(error);
221
- throw new Error(
222
- `Gadget "${gadgetName}" has a schema that cannot be serialized to JSON Schema.
223
- This usually happens with unsupported patterns like:
224
- - z.record() - use z.object({}).passthrough() instead
225
- - Complex transforms or custom refinements
226
- - Circular references
227
-
228
- Original error: ${errorMessage}
229
-
230
- Only use schema patterns that Zod v4's native toJSONSchema() supports.`
231
- );
232
- }
233
- const issues = findUnknownTypes(jsonSchema);
234
- if (issues.length > 0) {
235
- const fieldList = issues.join(", ");
236
- throw new Error(
237
- `Gadget "${gadgetName}" uses z.unknown() which produces incomplete schemas.
238
- Problematic fields: ${fieldList}
239
-
240
- z.unknown() doesn't generate type information in JSON Schema, making it unclear
241
- to the LLM what data structure to provide.
242
-
243
- Suggestions:
244
- - Use z.object({}).passthrough() for flexible objects
245
- - Use z.record(z.string()) for key-value objects with string values
246
- - Define specific structure if possible
247
-
248
- Example fixes:
249
- // \u274C Bad
250
- content: z.unknown()
251
-
252
- // \u2705 Good
253
- content: z.object({}).passthrough() // for flexible objects
254
- content: z.record(z.string()) // for key-value objects
255
- content: z.array(z.string()) // for arrays of strings
256
- `
257
- );
258
- }
268
+ // src/core/prompt-config.ts
269
+ function resolvePromptTemplate(template, defaultValue, context) {
270
+ const resolved = template ?? defaultValue;
271
+ return typeof resolved === "function" ? resolved(context) : resolved;
259
272
  }
260
- function findUnknownTypes(schema, path = []) {
261
- const issues = [];
262
- if (!schema || typeof schema !== "object") {
263
- return issues;
264
- }
265
- if (schema.definitions) {
266
- for (const defSchema of Object.values(schema.definitions)) {
267
- issues.push(...findUnknownTypes(defSchema, []));
268
- }
269
- }
270
- if (schema.properties) {
271
- for (const [propName, propSchema] of Object.entries(schema.properties)) {
272
- const propPath = [...path, propName];
273
- if (hasNoType(propSchema)) {
274
- issues.push(propPath.join(".") || propName);
275
- }
276
- issues.push(...findUnknownTypes(propSchema, propPath));
277
- }
273
+ function resolveRulesTemplate(rules, context) {
274
+ const resolved = rules ?? DEFAULT_PROMPTS.rules;
275
+ if (Array.isArray(resolved)) {
276
+ return resolved;
278
277
  }
279
- if (schema.items) {
280
- const itemPath = [...path, "[]"];
281
- if (hasNoType(schema.items)) {
282
- issues.push(itemPath.join("."));
283
- }
284
- issues.push(...findUnknownTypes(schema.items, itemPath));
278
+ if (typeof resolved === "function") {
279
+ const result = resolved(context);
280
+ return Array.isArray(result) ? result : [result];
285
281
  }
286
- if (schema.anyOf) {
287
- schema.anyOf.forEach((subSchema, index) => {
288
- issues.push(...findUnknownTypes(subSchema, [...path, `anyOf[${index}]`]));
289
- });
282
+ return [resolved];
283
+ }
284
+ function resolveHintTemplate(template, defaultValue, context) {
285
+ const resolved = template ?? defaultValue;
286
+ if (typeof resolved === "function") {
287
+ return resolved(context);
290
288
  }
291
- if (schema.oneOf) {
292
- schema.oneOf.forEach((subSchema, index) => {
293
- issues.push(...findUnknownTypes(subSchema, [...path, `oneOf[${index}]`]));
294
- });
289
+ return resolved.replace(/\{iteration\}/g, String(context.iteration)).replace(/\{maxIterations\}/g, String(context.maxIterations)).replace(/\{remaining\}/g, String(context.remaining));
290
+ }
291
+ var DEFAULT_HINTS, DEFAULT_PROMPTS;
292
+ var init_prompt_config = __esm({
293
+ "src/core/prompt-config.ts"() {
294
+ "use strict";
295
+ DEFAULT_HINTS = {
296
+ parallelGadgetsHint: "Tip: You can call multiple gadgets in a single response for efficiency.",
297
+ iterationProgressHint: "[Iteration {iteration}/{maxIterations}] Plan your actions accordingly."
298
+ };
299
+ DEFAULT_PROMPTS = {
300
+ mainInstruction: [
301
+ "\u26A0\uFE0F CRITICAL: RESPOND ONLY WITH GADGET INVOCATIONS",
302
+ "DO NOT use function calling or tool calling",
303
+ "You must output the exact text markers shown below in plain text.",
304
+ "EACH MARKER MUST START WITH A NEWLINE."
305
+ ].join("\n"),
306
+ criticalUsage: "INVOKE gadgets using the markers - do not describe what you want to do.",
307
+ formatDescription: (ctx) => `Parameters using ${ctx.argPrefix}name markers (value on next line(s), no escaping needed)`,
308
+ rules: () => [
309
+ "Output ONLY plain text with the exact markers - never use function/tool calling",
310
+ "You can invoke multiple gadgets in a single response",
311
+ "Gadgets without dependencies execute immediately (in parallel if multiple)",
312
+ "Use :invocation_id:dep1,dep2 syntax when a gadget needs results from prior gadgets",
313
+ "If any dependency fails, dependent gadgets are automatically skipped"
314
+ ],
315
+ customExamples: null
316
+ };
295
317
  }
296
- if (schema.allOf) {
297
- schema.allOf.forEach((subSchema, index) => {
298
- issues.push(...findUnknownTypes(subSchema, [...path, `allOf[${index}]`]));
299
- });
318
+ });
319
+
320
+ // src/core/messages.ts
321
+ function normalizeContent(content) {
322
+ if (typeof content === "string") {
323
+ return [{ type: "text", text: content }];
300
324
  }
301
- return issues;
325
+ return content;
302
326
  }
303
- function hasNoType(prop) {
304
- if (!prop || typeof prop !== "object") {
305
- return false;
327
+ function extractText(content) {
328
+ if (typeof content === "string") {
329
+ return content;
306
330
  }
307
- const hasType = prop.type !== void 0;
308
- const hasRef = prop.$ref !== void 0;
309
- const hasUnion = prop.anyOf !== void 0 || prop.oneOf !== void 0 || prop.allOf !== void 0;
310
- if (hasType || hasRef || hasUnion) {
311
- return false;
312
- }
313
- const keys = Object.keys(prop);
314
- const metadataKeys = ["description", "title", "default", "examples"];
315
- const hasOnlyMetadata = keys.every((key) => metadataKeys.includes(key));
316
- return hasOnlyMetadata || keys.length === 0;
317
- }
318
- var init_schema_validator = __esm({
319
- "src/gadgets/schema-validator.ts"() {
320
- "use strict";
321
- }
322
- });
323
-
324
- // src/gadgets/registry.ts
325
- var GadgetRegistry;
326
- var init_registry = __esm({
327
- "src/gadgets/registry.ts"() {
328
- "use strict";
329
- init_schema_validator();
330
- GadgetRegistry = class _GadgetRegistry {
331
- gadgets = /* @__PURE__ */ new Map();
332
- /**
333
- * Creates a registry from an array of gadget classes or instances,
334
- * or an object mapping names to gadgets.
335
- *
336
- * @param gadgets - Array of gadgets/classes or object with custom names
337
- * @returns New GadgetRegistry with all gadgets registered
338
- *
339
- * @example
340
- * ```typescript
341
- * // From array of classes
342
- * const registry = GadgetRegistry.from([Calculator, Weather]);
343
- *
344
- * // From array of instances
345
- * const registry = GadgetRegistry.from([new Calculator(), new Weather()]);
346
- *
347
- * // From object with custom names
348
- * const registry = GadgetRegistry.from({
349
- * calc: Calculator,
350
- * weather: new Weather({ apiKey: "..." })
351
- * });
352
- * ```
353
- */
354
- static from(gadgets) {
355
- const registry = new _GadgetRegistry();
356
- if (Array.isArray(gadgets)) {
357
- registry.registerMany(gadgets);
358
- } else {
359
- for (const [name, gadget] of Object.entries(gadgets)) {
360
- const instance = typeof gadget === "function" ? new gadget() : gadget;
361
- registry.register(name, instance);
362
- }
363
- }
364
- return registry;
365
- }
366
- /**
367
- * Registers multiple gadgets at once from an array.
368
- *
369
- * @param gadgets - Array of gadget instances or classes
370
- * @returns This registry for chaining
371
- *
372
- * @example
373
- * ```typescript
374
- * registry.registerMany([Calculator, Weather, Email]);
375
- * registry.registerMany([new Calculator(), new Weather()]);
376
- * ```
377
- */
378
- registerMany(gadgets) {
379
- for (const gadget of gadgets) {
380
- const instance = typeof gadget === "function" ? new gadget() : gadget;
381
- this.registerByClass(instance);
382
- }
383
- return this;
384
- }
385
- // Register a gadget by name
386
- register(name, gadget) {
387
- const normalizedName = name.toLowerCase();
388
- if (this.gadgets.has(normalizedName)) {
389
- throw new Error(`Gadget '${name}' is already registered`);
390
- }
391
- if (gadget.parameterSchema) {
392
- validateGadgetSchema(gadget.parameterSchema, name);
393
- }
394
- this.gadgets.set(normalizedName, gadget);
395
- }
396
- // Register a gadget using its name property or class name
397
- registerByClass(gadget) {
398
- const name = gadget.name ?? gadget.constructor.name;
399
- this.register(name, gadget);
400
- }
401
- // Get gadget by name (case-insensitive)
402
- get(name) {
403
- return this.gadgets.get(name.toLowerCase());
404
- }
405
- // Check if gadget exists (case-insensitive)
406
- has(name) {
407
- return this.gadgets.has(name.toLowerCase());
408
- }
409
- // Get all registered gadget names
410
- getNames() {
411
- return Array.from(this.gadgets.keys());
412
- }
413
- // Get all gadgets for instruction generation
414
- getAll() {
415
- return Array.from(this.gadgets.values());
416
- }
417
- // Unregister gadget (useful for testing, case-insensitive)
418
- unregister(name) {
419
- return this.gadgets.delete(name.toLowerCase());
420
- }
421
- // Clear all gadgets (useful for testing)
422
- clear() {
423
- this.gadgets.clear();
424
- }
425
- };
426
- }
427
- });
428
-
429
- // src/core/prompt-config.ts
430
- function resolvePromptTemplate(template, defaultValue, context) {
431
- const resolved = template ?? defaultValue;
432
- return typeof resolved === "function" ? resolved(context) : resolved;
433
- }
434
- function resolveRulesTemplate(rules, context) {
435
- const resolved = rules ?? DEFAULT_PROMPTS.rules;
436
- if (Array.isArray(resolved)) {
437
- return resolved;
438
- }
439
- if (typeof resolved === "function") {
440
- const result = resolved(context);
441
- return Array.isArray(result) ? result : [result];
442
- }
443
- return [resolved];
444
- }
445
- function resolveHintTemplate(template, defaultValue, context) {
446
- const resolved = template ?? defaultValue;
447
- if (typeof resolved === "function") {
448
- return resolved(context);
449
- }
450
- return resolved.replace(/\{iteration\}/g, String(context.iteration)).replace(/\{maxIterations\}/g, String(context.maxIterations)).replace(/\{remaining\}/g, String(context.remaining));
331
+ return content.filter((part) => part.type === "text").map((part) => part.text).join("");
451
332
  }
452
- var DEFAULT_HINTS, DEFAULT_PROMPTS;
453
- var init_prompt_config = __esm({
454
- "src/core/prompt-config.ts"() {
455
- "use strict";
456
- DEFAULT_HINTS = {
457
- parallelGadgetsHint: "Tip: You can call multiple gadgets in a single response for efficiency.",
458
- iterationProgressHint: "[Iteration {iteration}/{maxIterations}] Plan your actions accordingly."
459
- };
460
- DEFAULT_PROMPTS = {
461
- mainInstruction: [
462
- "\u26A0\uFE0F CRITICAL: RESPOND ONLY WITH GADGET INVOCATIONS",
463
- "DO NOT use function calling or tool calling",
464
- "You must output the exact text markers shown below in plain text.",
465
- "EACH MARKER MUST START WITH A NEWLINE."
466
- ].join("\n"),
467
- criticalUsage: "INVOKE gadgets using the markers - do not describe what you want to do.",
468
- formatDescription: (ctx) => `Parameters using ${ctx.argPrefix}name markers (value on next line(s), no escaping needed)`,
469
- rules: () => [
470
- "Output ONLY plain text with the exact markers - never use function/tool calling",
471
- "You can invoke multiple gadgets in a single response",
472
- "For dependent gadgets, invoke the first one and wait for the result"
473
- ],
474
- customExamples: null
475
- };
476
- }
477
- });
478
-
479
- // src/core/messages.ts
480
333
  var LLMMessageBuilder;
481
334
  var init_messages = __esm({
482
335
  "src/core/messages.ts"() {
483
336
  "use strict";
484
337
  init_constants();
338
+ init_input_content();
485
339
  init_prompt_config();
486
340
  LLMMessageBuilder = class {
487
341
  messages = [];
@@ -583,6 +437,10 @@ CRITICAL: ${criticalUsage}
583
437
  parts.push(`
584
438
  1. Start marker: ${this.startPrefix}gadget_name`);
585
439
  parts.push(`
440
+ With ID: ${this.startPrefix}gadget_name:my_id`);
441
+ parts.push(`
442
+ With dependencies: ${this.startPrefix}gadget_name:my_id:dep1,dep2`);
443
+ parts.push(`
586
444
  2. ${formatDescription}`);
587
445
  parts.push(`
588
446
  3. End marker: ${this.endPrefix}`);
@@ -632,6 +490,25 @@ ${this.endPrefix}`;
632
490
  EXAMPLE (Multiple Gadgets):
633
491
 
634
492
  ${multipleExample}`);
493
+ const dependencyExample = `${this.startPrefix}fetch_data:fetch_1
494
+ ${this.argPrefix}url
495
+ https://api.example.com/users
496
+ ${this.endPrefix}
497
+ ${this.startPrefix}fetch_data:fetch_2
498
+ ${this.argPrefix}url
499
+ https://api.example.com/orders
500
+ ${this.endPrefix}
501
+ ${this.startPrefix}merge_data:merge_1:fetch_1,fetch_2
502
+ ${this.argPrefix}format
503
+ json
504
+ ${this.endPrefix}`;
505
+ parts.push(`
506
+
507
+ EXAMPLE (With Dependencies):
508
+ merge_1 waits for fetch_1 AND fetch_2 to complete.
509
+ If either fails, merge_1 is automatically skipped.
510
+
511
+ ${dependencyExample}`);
635
512
  parts.push(`
636
513
 
637
514
  BLOCK FORMAT SYNTAX:
@@ -672,67 +549,497 @@ second
672
549
  Produces: { "items": ["first", "second"] }`);
673
550
  return parts.join("");
674
551
  }
675
- buildRulesSection(context) {
676
- const parts = [];
677
- parts.push("\n\nRULES:");
678
- const rules = resolveRulesTemplate(this.promptConfig.rules, context);
679
- for (const rule of rules) {
680
- parts.push(`
681
- - ${rule}`);
552
+ buildRulesSection(context) {
553
+ const parts = [];
554
+ parts.push("\n\nRULES:");
555
+ const rules = resolveRulesTemplate(this.promptConfig.rules, context);
556
+ for (const rule of rules) {
557
+ parts.push(`
558
+ - ${rule}`);
559
+ }
560
+ return parts.join("");
561
+ }
562
+ /**
563
+ * Add a user message.
564
+ * Content can be a string (text only) or an array of content parts (multimodal).
565
+ *
566
+ * @param content - Message content
567
+ * @param metadata - Optional metadata
568
+ *
569
+ * @example
570
+ * ```typescript
571
+ * // Text only
572
+ * builder.addUser("Hello!");
573
+ *
574
+ * // Multimodal
575
+ * builder.addUser([
576
+ * text("What's in this image?"),
577
+ * imageFromBuffer(imageData),
578
+ * ]);
579
+ * ```
580
+ */
581
+ addUser(content, metadata) {
582
+ this.messages.push({ role: "user", content, metadata });
583
+ return this;
584
+ }
585
+ addAssistant(content, metadata) {
586
+ this.messages.push({ role: "assistant", content, metadata });
587
+ return this;
588
+ }
589
+ /**
590
+ * Add a user message with an image attachment.
591
+ *
592
+ * @param textContent - Text prompt
593
+ * @param imageData - Image data (Buffer, Uint8Array, or base64 string)
594
+ * @param mimeType - Optional MIME type (auto-detected if not provided)
595
+ *
596
+ * @example
597
+ * ```typescript
598
+ * builder.addUserWithImage(
599
+ * "What's in this image?",
600
+ * await fs.readFile("photo.jpg"),
601
+ * "image/jpeg" // Optional - auto-detected
602
+ * );
603
+ * ```
604
+ */
605
+ addUserWithImage(textContent, imageData, mimeType) {
606
+ const imageBuffer = typeof imageData === "string" ? Buffer.from(imageData, "base64") : imageData;
607
+ const detectedMime = mimeType ?? detectImageMimeType(imageBuffer);
608
+ if (!detectedMime) {
609
+ throw new Error(
610
+ "Could not detect image MIME type. Please provide the mimeType parameter explicitly."
611
+ );
612
+ }
613
+ const content = [
614
+ text(textContent),
615
+ {
616
+ type: "image",
617
+ source: {
618
+ type: "base64",
619
+ mediaType: detectedMime,
620
+ data: toBase64(imageBuffer)
621
+ }
622
+ }
623
+ ];
624
+ this.messages.push({ role: "user", content });
625
+ return this;
626
+ }
627
+ /**
628
+ * Add a user message with an image URL (OpenAI only).
629
+ *
630
+ * @param textContent - Text prompt
631
+ * @param imageUrl - URL to the image
632
+ *
633
+ * @example
634
+ * ```typescript
635
+ * builder.addUserWithImageUrl(
636
+ * "What's in this image?",
637
+ * "https://example.com/image.jpg"
638
+ * );
639
+ * ```
640
+ */
641
+ addUserWithImageUrl(textContent, imageUrl) {
642
+ const content = [text(textContent), imageFromUrl(imageUrl)];
643
+ this.messages.push({ role: "user", content });
644
+ return this;
645
+ }
646
+ /**
647
+ * Add a user message with an audio attachment (Gemini only).
648
+ *
649
+ * @param textContent - Text prompt
650
+ * @param audioData - Audio data (Buffer, Uint8Array, or base64 string)
651
+ * @param mimeType - Optional MIME type (auto-detected if not provided)
652
+ *
653
+ * @example
654
+ * ```typescript
655
+ * builder.addUserWithAudio(
656
+ * "Transcribe this audio",
657
+ * await fs.readFile("recording.mp3"),
658
+ * "audio/mp3" // Optional - auto-detected
659
+ * );
660
+ * ```
661
+ */
662
+ addUserWithAudio(textContent, audioData, mimeType) {
663
+ const audioBuffer = typeof audioData === "string" ? Buffer.from(audioData, "base64") : audioData;
664
+ const content = [text(textContent), audioFromBuffer(audioBuffer, mimeType)];
665
+ this.messages.push({ role: "user", content });
666
+ return this;
667
+ }
668
+ /**
669
+ * Add a user message with multiple content parts.
670
+ * Provides full flexibility for complex multimodal messages.
671
+ *
672
+ * @param parts - Array of content parts
673
+ *
674
+ * @example
675
+ * ```typescript
676
+ * builder.addUserMultimodal([
677
+ * text("Compare these images:"),
678
+ * imageFromBuffer(image1),
679
+ * imageFromBuffer(image2),
680
+ * ]);
681
+ * ```
682
+ */
683
+ addUserMultimodal(parts) {
684
+ this.messages.push({ role: "user", content: parts });
685
+ return this;
686
+ }
687
+ addGadgetCall(gadget, parameters, result) {
688
+ const paramStr = this.formatBlockParameters(parameters, "");
689
+ this.messages.push({
690
+ role: "assistant",
691
+ content: `${this.startPrefix}${gadget}
692
+ ${paramStr}
693
+ ${this.endPrefix}`
694
+ });
695
+ this.messages.push({
696
+ role: "user",
697
+ content: `Result: ${result}`
698
+ });
699
+ return this;
700
+ }
701
+ /**
702
+ * Format parameters as Block format with JSON Pointer paths.
703
+ * Uses the configured argPrefix for consistency with system prompt.
704
+ */
705
+ formatBlockParameters(params, prefix) {
706
+ const lines = [];
707
+ for (const [key, value] of Object.entries(params)) {
708
+ const fullPath = prefix ? `${prefix}/${key}` : key;
709
+ if (Array.isArray(value)) {
710
+ value.forEach((item, index) => {
711
+ const itemPath = `${fullPath}/${index}`;
712
+ if (typeof item === "object" && item !== null) {
713
+ lines.push(this.formatBlockParameters(item, itemPath));
714
+ } else {
715
+ lines.push(`${this.argPrefix}${itemPath}`);
716
+ lines.push(String(item));
717
+ }
718
+ });
719
+ } else if (typeof value === "object" && value !== null) {
720
+ lines.push(this.formatBlockParameters(value, fullPath));
721
+ } else {
722
+ lines.push(`${this.argPrefix}${fullPath}`);
723
+ lines.push(String(value));
724
+ }
725
+ }
726
+ return lines.join("\n");
727
+ }
728
+ build() {
729
+ return [...this.messages];
730
+ }
731
+ };
732
+ }
733
+ });
734
+
735
+ // src/core/model-shortcuts.ts
736
+ function isKnownModelPattern(model) {
737
+ const normalized = model.toLowerCase();
738
+ if (MODEL_ALIASES[normalized]) {
739
+ return true;
740
+ }
741
+ return KNOWN_MODEL_PATTERNS.some((pattern) => pattern.test(model));
742
+ }
743
+ function resolveModel(model, options = {}) {
744
+ if (model.includes(":")) {
745
+ return model;
746
+ }
747
+ const normalized = model.toLowerCase();
748
+ if (MODEL_ALIASES[normalized]) {
749
+ return MODEL_ALIASES[normalized];
750
+ }
751
+ const modelLower = model.toLowerCase();
752
+ if (modelLower.startsWith("gpt")) {
753
+ return `openai:${model}`;
754
+ }
755
+ if (modelLower.startsWith("claude")) {
756
+ return `anthropic:${model}`;
757
+ }
758
+ if (modelLower.startsWith("gemini")) {
759
+ return `gemini:${model}`;
760
+ }
761
+ if (modelLower.match(/^o\d/)) {
762
+ return `openai:${model}`;
763
+ }
764
+ if (!isKnownModelPattern(model)) {
765
+ if (options.strict) {
766
+ throw new Error(
767
+ `Unknown model '${model}'. Did you mean one of: gpt4, sonnet, haiku, flash? Use explicit provider prefix like 'openai:${model}' to bypass this check.`
768
+ );
769
+ }
770
+ if (!options.silent) {
771
+ console.warn(
772
+ `\u26A0\uFE0F Unknown model '${model}', falling back to 'openai:${model}'. This might be a typo. Did you mean: gpt4, gpt5, gpt5-nano, sonnet, haiku, flash? Use { strict: true } to error on unknown models, or { silent: true } to suppress this warning.`
773
+ );
774
+ }
775
+ }
776
+ return `openai:${model}`;
777
+ }
778
+ function hasProviderPrefix(model) {
779
+ return model.includes(":");
780
+ }
781
+ function getProvider(model) {
782
+ const separatorIndex = model.indexOf(":");
783
+ if (separatorIndex === -1) {
784
+ return void 0;
785
+ }
786
+ return model.slice(0, separatorIndex);
787
+ }
788
+ function getModelId(model) {
789
+ const separatorIndex = model.indexOf(":");
790
+ if (separatorIndex === -1) {
791
+ return model;
792
+ }
793
+ return model.slice(separatorIndex + 1);
794
+ }
795
+ var MODEL_ALIASES, KNOWN_MODEL_PATTERNS;
796
+ var init_model_shortcuts = __esm({
797
+ "src/core/model-shortcuts.ts"() {
798
+ "use strict";
799
+ MODEL_ALIASES = {
800
+ // OpenAI aliases
801
+ gpt4: "openai:gpt-4o",
802
+ gpt4o: "openai:gpt-4o",
803
+ gpt5: "openai:gpt-5",
804
+ "gpt5-mini": "openai:gpt-5-mini",
805
+ "gpt5-nano": "openai:gpt-5-nano",
806
+ // Anthropic aliases
807
+ sonnet: "anthropic:claude-sonnet-4-5",
808
+ "claude-sonnet": "anthropic:claude-sonnet-4-5",
809
+ haiku: "anthropic:claude-haiku-4-5",
810
+ "claude-haiku": "anthropic:claude-haiku-4-5",
811
+ opus: "anthropic:claude-opus-4-5",
812
+ "claude-opus": "anthropic:claude-opus-4-5",
813
+ // Gemini aliases
814
+ flash: "gemini:gemini-2.0-flash",
815
+ "gemini-flash": "gemini:gemini-2.0-flash",
816
+ "gemini-pro": "gemini:gemini-2.5-pro",
817
+ pro: "gemini:gemini-2.5-pro"
818
+ };
819
+ KNOWN_MODEL_PATTERNS = [
820
+ /^gpt-?\d/i,
821
+ // gpt-4, gpt-3.5, gpt4, etc.
822
+ /^claude-?\d/i,
823
+ // claude-3, claude-2, etc.
824
+ /^gemini-?(\d|pro|flash)/i,
825
+ // gemini-2.0, gemini-pro, gemini-flash, etc.
826
+ /^o\d/i
827
+ // OpenAI o1, o3, etc.
828
+ ];
829
+ }
830
+ });
831
+
832
+ // src/gadgets/schema-validator.ts
833
+ import * as z from "zod";
834
+ function validateGadgetSchema(schema, gadgetName) {
835
+ let jsonSchema;
836
+ try {
837
+ jsonSchema = z.toJSONSchema(schema, { target: "draft-7" });
838
+ } catch (error) {
839
+ const errorMessage = error instanceof Error ? error.message : String(error);
840
+ throw new Error(
841
+ `Gadget "${gadgetName}" has a schema that cannot be serialized to JSON Schema.
842
+ This usually happens with unsupported patterns like:
843
+ - z.record() - use z.object({}).passthrough() instead
844
+ - Complex transforms or custom refinements
845
+ - Circular references
846
+
847
+ Original error: ${errorMessage}
848
+
849
+ Only use schema patterns that Zod v4's native toJSONSchema() supports.`
850
+ );
851
+ }
852
+ const issues = findUnknownTypes(jsonSchema);
853
+ if (issues.length > 0) {
854
+ const fieldList = issues.join(", ");
855
+ throw new Error(
856
+ `Gadget "${gadgetName}" uses z.unknown() which produces incomplete schemas.
857
+ Problematic fields: ${fieldList}
858
+
859
+ z.unknown() doesn't generate type information in JSON Schema, making it unclear
860
+ to the LLM what data structure to provide.
861
+
862
+ Suggestions:
863
+ - Use z.object({}).passthrough() for flexible objects
864
+ - Use z.record(z.string()) for key-value objects with string values
865
+ - Define specific structure if possible
866
+
867
+ Example fixes:
868
+ // \u274C Bad
869
+ content: z.unknown()
870
+
871
+ // \u2705 Good
872
+ content: z.object({}).passthrough() // for flexible objects
873
+ content: z.record(z.string()) // for key-value objects
874
+ content: z.array(z.string()) // for arrays of strings
875
+ `
876
+ );
877
+ }
878
+ }
879
+ function findUnknownTypes(schema, path = []) {
880
+ const issues = [];
881
+ if (!schema || typeof schema !== "object") {
882
+ return issues;
883
+ }
884
+ if (schema.definitions) {
885
+ for (const defSchema of Object.values(schema.definitions)) {
886
+ issues.push(...findUnknownTypes(defSchema, []));
887
+ }
888
+ }
889
+ if (schema.properties) {
890
+ for (const [propName, propSchema] of Object.entries(schema.properties)) {
891
+ const propPath = [...path, propName];
892
+ if (hasNoType(propSchema)) {
893
+ issues.push(propPath.join(".") || propName);
894
+ }
895
+ issues.push(...findUnknownTypes(propSchema, propPath));
896
+ }
897
+ }
898
+ if (schema.items) {
899
+ const itemPath = [...path, "[]"];
900
+ if (hasNoType(schema.items)) {
901
+ issues.push(itemPath.join("."));
902
+ }
903
+ issues.push(...findUnknownTypes(schema.items, itemPath));
904
+ }
905
+ if (schema.anyOf) {
906
+ schema.anyOf.forEach((subSchema, index) => {
907
+ issues.push(...findUnknownTypes(subSchema, [...path, `anyOf[${index}]`]));
908
+ });
909
+ }
910
+ if (schema.oneOf) {
911
+ schema.oneOf.forEach((subSchema, index) => {
912
+ issues.push(...findUnknownTypes(subSchema, [...path, `oneOf[${index}]`]));
913
+ });
914
+ }
915
+ if (schema.allOf) {
916
+ schema.allOf.forEach((subSchema, index) => {
917
+ issues.push(...findUnknownTypes(subSchema, [...path, `allOf[${index}]`]));
918
+ });
919
+ }
920
+ return issues;
921
+ }
922
+ function hasNoType(prop) {
923
+ if (!prop || typeof prop !== "object") {
924
+ return false;
925
+ }
926
+ const hasType = prop.type !== void 0;
927
+ const hasRef = prop.$ref !== void 0;
928
+ const hasUnion = prop.anyOf !== void 0 || prop.oneOf !== void 0 || prop.allOf !== void 0;
929
+ if (hasType || hasRef || hasUnion) {
930
+ return false;
931
+ }
932
+ const keys = Object.keys(prop);
933
+ const metadataKeys = ["description", "title", "default", "examples"];
934
+ const hasOnlyMetadata = keys.every((key) => metadataKeys.includes(key));
935
+ return hasOnlyMetadata || keys.length === 0;
936
+ }
937
+ var init_schema_validator = __esm({
938
+ "src/gadgets/schema-validator.ts"() {
939
+ "use strict";
940
+ }
941
+ });
942
+
943
+ // src/gadgets/registry.ts
944
+ var GadgetRegistry;
945
+ var init_registry = __esm({
946
+ "src/gadgets/registry.ts"() {
947
+ "use strict";
948
+ init_schema_validator();
949
+ GadgetRegistry = class _GadgetRegistry {
950
+ gadgets = /* @__PURE__ */ new Map();
951
+ /**
952
+ * Creates a registry from an array of gadget classes or instances,
953
+ * or an object mapping names to gadgets.
954
+ *
955
+ * @param gadgets - Array of gadgets/classes or object with custom names
956
+ * @returns New GadgetRegistry with all gadgets registered
957
+ *
958
+ * @example
959
+ * ```typescript
960
+ * // From array of classes
961
+ * const registry = GadgetRegistry.from([Calculator, Weather]);
962
+ *
963
+ * // From array of instances
964
+ * const registry = GadgetRegistry.from([new Calculator(), new Weather()]);
965
+ *
966
+ * // From object with custom names
967
+ * const registry = GadgetRegistry.from({
968
+ * calc: Calculator,
969
+ * weather: new Weather({ apiKey: "..." })
970
+ * });
971
+ * ```
972
+ */
973
+ static from(gadgets) {
974
+ const registry = new _GadgetRegistry();
975
+ if (Array.isArray(gadgets)) {
976
+ registry.registerMany(gadgets);
977
+ } else {
978
+ for (const [name, gadget] of Object.entries(gadgets)) {
979
+ const instance = typeof gadget === "function" ? new gadget() : gadget;
980
+ registry.register(name, instance);
981
+ }
982
+ }
983
+ return registry;
984
+ }
985
+ /**
986
+ * Registers multiple gadgets at once from an array.
987
+ *
988
+ * @param gadgets - Array of gadget instances or classes
989
+ * @returns This registry for chaining
990
+ *
991
+ * @example
992
+ * ```typescript
993
+ * registry.registerMany([Calculator, Weather, Email]);
994
+ * registry.registerMany([new Calculator(), new Weather()]);
995
+ * ```
996
+ */
997
+ registerMany(gadgets) {
998
+ for (const gadget of gadgets) {
999
+ const instance = typeof gadget === "function" ? new gadget() : gadget;
1000
+ this.registerByClass(instance);
1001
+ }
1002
+ return this;
1003
+ }
1004
+ // Register a gadget by name
1005
+ register(name, gadget) {
1006
+ const normalizedName = name.toLowerCase();
1007
+ if (this.gadgets.has(normalizedName)) {
1008
+ throw new Error(`Gadget '${name}' is already registered`);
682
1009
  }
683
- return parts.join("");
1010
+ if (gadget.parameterSchema) {
1011
+ validateGadgetSchema(gadget.parameterSchema, name);
1012
+ }
1013
+ this.gadgets.set(normalizedName, gadget);
684
1014
  }
685
- addUser(content, metadata) {
686
- this.messages.push({ role: "user", content, metadata });
687
- return this;
1015
+ // Register a gadget using its name property or class name
1016
+ registerByClass(gadget) {
1017
+ const name = gadget.name ?? gadget.constructor.name;
1018
+ this.register(name, gadget);
688
1019
  }
689
- addAssistant(content, metadata) {
690
- this.messages.push({ role: "assistant", content, metadata });
691
- return this;
1020
+ // Get gadget by name (case-insensitive)
1021
+ get(name) {
1022
+ return this.gadgets.get(name.toLowerCase());
692
1023
  }
693
- addGadgetCall(gadget, parameters, result) {
694
- const paramStr = this.formatBlockParameters(parameters, "");
695
- this.messages.push({
696
- role: "assistant",
697
- content: `${this.startPrefix}${gadget}
698
- ${paramStr}
699
- ${this.endPrefix}`
700
- });
701
- this.messages.push({
702
- role: "user",
703
- content: `Result: ${result}`
704
- });
705
- return this;
1024
+ // Check if gadget exists (case-insensitive)
1025
+ has(name) {
1026
+ return this.gadgets.has(name.toLowerCase());
706
1027
  }
707
- /**
708
- * Format parameters as Block format with JSON Pointer paths.
709
- * Uses the configured argPrefix for consistency with system prompt.
710
- */
711
- formatBlockParameters(params, prefix) {
712
- const lines = [];
713
- for (const [key, value] of Object.entries(params)) {
714
- const fullPath = prefix ? `${prefix}/${key}` : key;
715
- if (Array.isArray(value)) {
716
- value.forEach((item, index) => {
717
- const itemPath = `${fullPath}/${index}`;
718
- if (typeof item === "object" && item !== null) {
719
- lines.push(this.formatBlockParameters(item, itemPath));
720
- } else {
721
- lines.push(`${this.argPrefix}${itemPath}`);
722
- lines.push(String(item));
723
- }
724
- });
725
- } else if (typeof value === "object" && value !== null) {
726
- lines.push(this.formatBlockParameters(value, fullPath));
727
- } else {
728
- lines.push(`${this.argPrefix}${fullPath}`);
729
- lines.push(String(value));
730
- }
731
- }
732
- return lines.join("\n");
1028
+ // Get all registered gadget names
1029
+ getNames() {
1030
+ return Array.from(this.gadgets.keys());
733
1031
  }
734
- build() {
735
- return [...this.messages];
1032
+ // Get all gadgets for instruction generation
1033
+ getAll() {
1034
+ return Array.from(this.gadgets.values());
1035
+ }
1036
+ // Unregister gadget (useful for testing, case-insensitive)
1037
+ unregister(name) {
1038
+ return this.gadgets.delete(name.toLowerCase());
1039
+ }
1040
+ // Clear all gadgets (useful for testing)
1041
+ clear() {
1042
+ this.gadgets.clear();
736
1043
  }
737
1044
  };
738
1045
  }
@@ -1928,7 +2235,7 @@ var init_conversation_manager = __esm({
1928
2235
  if (msg.role === "user") {
1929
2236
  this.historyBuilder.addUser(msg.content);
1930
2237
  } else if (msg.role === "assistant") {
1931
- this.historyBuilder.addAssistant(msg.content);
2238
+ this.historyBuilder.addAssistant(extractText(msg.content));
1932
2239
  }
1933
2240
  }
1934
2241
  }
@@ -1949,8 +2256,10 @@ async function runWithHandlers(agentGenerator, handlers) {
1949
2256
  if (handlers.onGadgetCall) {
1950
2257
  await handlers.onGadgetCall({
1951
2258
  gadgetName: event.call.gadgetName,
2259
+ invocationId: event.call.invocationId,
1952
2260
  parameters: event.call.parameters,
1953
- parametersRaw: event.call.parametersRaw
2261
+ parametersRaw: event.call.parametersRaw,
2262
+ dependencies: event.call.dependencies
1954
2263
  });
1955
2264
  }
1956
2265
  break;
@@ -2827,15 +3136,37 @@ var init_parser = __esm({
2827
3136
  return segment.trim().length > 0 ? segment : void 0;
2828
3137
  }
2829
3138
  /**
2830
- * Parse gadget name, handling both old format (name:invocationId) and new format (just name).
2831
- * For new format, generates a unique invocation ID.
3139
+ * Parse gadget name with optional invocation ID and dependencies.
3140
+ *
3141
+ * Supported formats:
3142
+ * - `GadgetName` - Auto-generate ID, no dependencies
3143
+ * - `GadgetName:my_id` - Explicit ID, no dependencies
3144
+ * - `GadgetName:my_id:dep1,dep2` - Explicit ID with dependencies
3145
+ *
3146
+ * Dependencies must be comma-separated invocation IDs.
2832
3147
  */
2833
3148
  parseGadgetName(gadgetName) {
2834
- if (gadgetName.includes(":")) {
2835
- const parts = gadgetName.split(":");
2836
- return { actualName: parts[0], invocationId: parts[1] };
3149
+ const parts = gadgetName.split(":");
3150
+ if (parts.length === 1) {
3151
+ return {
3152
+ actualName: parts[0],
3153
+ invocationId: `gadget_${++globalInvocationCounter}`,
3154
+ dependencies: []
3155
+ };
3156
+ } else if (parts.length === 2) {
3157
+ return {
3158
+ actualName: parts[0],
3159
+ invocationId: parts[1].trim(),
3160
+ dependencies: []
3161
+ };
3162
+ } else {
3163
+ const deps = parts[2].split(",").map((d) => d.trim()).filter((d) => d.length > 0);
3164
+ return {
3165
+ actualName: parts[0],
3166
+ invocationId: parts[1].trim(),
3167
+ dependencies: deps
3168
+ };
2837
3169
  }
2838
- return { actualName: gadgetName, invocationId: `gadget_${++globalInvocationCounter}` };
2839
3170
  }
2840
3171
  /**
2841
3172
  * Extract the error message from a parse error.
@@ -2871,39 +3202,20 @@ var init_parser = __esm({
2871
3202
  const metadataEndIndex = this.buffer.indexOf("\n", metadataStartIndex);
2872
3203
  if (metadataEndIndex === -1) break;
2873
3204
  const gadgetName = this.buffer.substring(metadataStartIndex, metadataEndIndex).trim();
2874
- const { actualName: actualGadgetName, invocationId } = this.parseGadgetName(gadgetName);
3205
+ const { actualName: actualGadgetName, invocationId, dependencies } = this.parseGadgetName(gadgetName);
2875
3206
  const contentStartIndex = metadataEndIndex + 1;
2876
3207
  let partEndIndex;
2877
3208
  let endMarkerLength = 0;
2878
- if (gadgetName.includes(":")) {
2879
- const oldEndMarker = `${this.endPrefix + actualGadgetName}:${invocationId}`;
2880
- partEndIndex = this.buffer.indexOf(oldEndMarker, contentStartIndex);
2881
- if (partEndIndex === -1) break;
2882
- endMarkerLength = oldEndMarker.length;
3209
+ const nextStartPos = this.buffer.indexOf(this.startPrefix, contentStartIndex);
3210
+ const endPos = this.buffer.indexOf(this.endPrefix, contentStartIndex);
3211
+ if (nextStartPos !== -1 && (endPos === -1 || nextStartPos < endPos)) {
3212
+ partEndIndex = nextStartPos;
3213
+ endMarkerLength = 0;
3214
+ } else if (endPos !== -1) {
3215
+ partEndIndex = endPos;
3216
+ endMarkerLength = this.endPrefix.length;
2883
3217
  } else {
2884
- const nextStartPos = this.buffer.indexOf(this.startPrefix, contentStartIndex);
2885
- let validEndPos = -1;
2886
- let searchPos = contentStartIndex;
2887
- while (true) {
2888
- const endPos = this.buffer.indexOf(this.endPrefix, searchPos);
2889
- if (endPos === -1) break;
2890
- const afterEnd = this.buffer.substring(endPos + this.endPrefix.length);
2891
- if (afterEnd.startsWith("\n") || afterEnd.startsWith("\r") || afterEnd.startsWith(this.startPrefix) || afterEnd.length === 0) {
2892
- validEndPos = endPos;
2893
- break;
2894
- } else {
2895
- searchPos = endPos + this.endPrefix.length;
2896
- }
2897
- }
2898
- if (nextStartPos !== -1 && (validEndPos === -1 || nextStartPos < validEndPos)) {
2899
- partEndIndex = nextStartPos;
2900
- endMarkerLength = 0;
2901
- } else if (validEndPos !== -1) {
2902
- partEndIndex = validEndPos;
2903
- endMarkerLength = this.endPrefix.length;
2904
- } else {
2905
- break;
2906
- }
3218
+ break;
2907
3219
  }
2908
3220
  const parametersRaw = this.buffer.substring(contentStartIndex, partEndIndex).trim();
2909
3221
  const { parameters, parseError } = this.parseParameters(parametersRaw);
@@ -2914,7 +3226,8 @@ var init_parser = __esm({
2914
3226
  invocationId,
2915
3227
  parametersRaw,
2916
3228
  parameters,
2917
- parseError
3229
+ parseError,
3230
+ dependencies
2918
3231
  }
2919
3232
  };
2920
3233
  startIndex = partEndIndex + endMarkerLength;
@@ -2937,7 +3250,7 @@ var init_parser = __esm({
2937
3250
  const metadataEndIndex = this.buffer.indexOf("\n", metadataStartIndex);
2938
3251
  if (metadataEndIndex !== -1) {
2939
3252
  const gadgetName = this.buffer.substring(metadataStartIndex, metadataEndIndex).trim();
2940
- const { actualName: actualGadgetName, invocationId } = this.parseGadgetName(gadgetName);
3253
+ const { actualName: actualGadgetName, invocationId, dependencies } = this.parseGadgetName(gadgetName);
2941
3254
  const contentStartIndex = metadataEndIndex + 1;
2942
3255
  const parametersRaw = this.buffer.substring(contentStartIndex).trim();
2943
3256
  const { parameters, parseError } = this.parseParameters(parametersRaw);
@@ -2948,7 +3261,8 @@ var init_parser = __esm({
2948
3261
  invocationId,
2949
3262
  parametersRaw,
2950
3263
  parameters,
2951
- parseError
3264
+ parseError,
3265
+ dependencies
2952
3266
  }
2953
3267
  };
2954
3268
  return;
@@ -3318,6 +3632,13 @@ var init_stream_processor = __esm({
3318
3632
  accumulatedText = "";
3319
3633
  shouldStopExecution = false;
3320
3634
  observerFailureCount = 0;
3635
+ // Dependency tracking for gadget execution DAG
3636
+ /** Gadgets waiting for their dependencies to complete */
3637
+ pendingGadgets = /* @__PURE__ */ new Map();
3638
+ /** Completed gadget results, keyed by invocation ID */
3639
+ completedResults = /* @__PURE__ */ new Map();
3640
+ /** Invocation IDs of gadgets that have failed (error or skipped due to dependency) */
3641
+ failedInvocations = /* @__PURE__ */ new Set();
3321
3642
  constructor(options) {
3322
3643
  this.iteration = options.iteration;
3323
3644
  this.registry = options.registry;
@@ -3418,6 +3739,16 @@ var init_stream_processor = __esm({
3418
3739
  }
3419
3740
  }
3420
3741
  }
3742
+ const finalPendingEvents = await this.processPendingGadgets();
3743
+ outputs.push(...finalPendingEvents);
3744
+ if (finalPendingEvents.some((e) => e.type === "gadget_result")) {
3745
+ didExecuteGadgets = true;
3746
+ }
3747
+ for (const evt of finalPendingEvents) {
3748
+ if (evt.type === "gadget_result" && evt.result.breaksLoop) {
3749
+ shouldBreakLoop = true;
3750
+ }
3751
+ }
3421
3752
  }
3422
3753
  let finalMessage = this.accumulatedText;
3423
3754
  if (this.hooks.interceptors?.interceptAssistantMessage) {
@@ -3469,7 +3800,11 @@ var init_stream_processor = __esm({
3469
3800
  return [{ type: "text", content }];
3470
3801
  }
3471
3802
  /**
3472
- * Process a gadget call through the full lifecycle.
3803
+ * Process a gadget call through the full lifecycle, handling dependencies.
3804
+ *
3805
+ * Gadgets without dependencies (or with all dependencies satisfied) execute immediately.
3806
+ * Gadgets with unsatisfied dependencies are queued for later execution.
3807
+ * After each execution, pending gadgets are checked to see if they can now run.
3473
3808
  */
3474
3809
  async processGadgetCall(call) {
3475
3810
  if (this.shouldStopExecution) {
@@ -3480,6 +3815,53 @@ var init_stream_processor = __esm({
3480
3815
  }
3481
3816
  const events = [];
3482
3817
  events.push({ type: "gadget_call", call });
3818
+ if (call.dependencies.length > 0) {
3819
+ if (call.dependencies.includes(call.invocationId)) {
3820
+ this.logger.warn("Gadget has self-referential dependency (depends on itself)", {
3821
+ gadgetName: call.gadgetName,
3822
+ invocationId: call.invocationId
3823
+ });
3824
+ this.failedInvocations.add(call.invocationId);
3825
+ const skipEvent = {
3826
+ type: "gadget_skipped",
3827
+ gadgetName: call.gadgetName,
3828
+ invocationId: call.invocationId,
3829
+ parameters: call.parameters ?? {},
3830
+ failedDependency: call.invocationId,
3831
+ failedDependencyError: `Gadget "${call.invocationId}" cannot depend on itself (self-referential dependency)`
3832
+ };
3833
+ events.push(skipEvent);
3834
+ return events;
3835
+ }
3836
+ const failedDep = call.dependencies.find((dep) => this.failedInvocations.has(dep));
3837
+ if (failedDep) {
3838
+ const skipEvents = await this.handleFailedDependency(call, failedDep);
3839
+ events.push(...skipEvents);
3840
+ return events;
3841
+ }
3842
+ const unsatisfied = call.dependencies.filter((dep) => !this.completedResults.has(dep));
3843
+ if (unsatisfied.length > 0) {
3844
+ this.logger.debug("Queueing gadget for later - waiting on dependencies", {
3845
+ gadgetName: call.gadgetName,
3846
+ invocationId: call.invocationId,
3847
+ waitingOn: unsatisfied
3848
+ });
3849
+ this.pendingGadgets.set(call.invocationId, call);
3850
+ return events;
3851
+ }
3852
+ }
3853
+ const executeEvents = await this.executeGadgetWithHooks(call);
3854
+ events.push(...executeEvents);
3855
+ const triggeredEvents = await this.processPendingGadgets();
3856
+ events.push(...triggeredEvents);
3857
+ return events;
3858
+ }
3859
+ /**
3860
+ * Execute a gadget through the full hook lifecycle.
3861
+ * This is the core execution logic, extracted from processGadgetCall.
3862
+ */
3863
+ async executeGadgetWithHooks(call) {
3864
+ const events = [];
3483
3865
  if (call.parseError) {
3484
3866
  this.logger.warn("Gadget has parse error", {
3485
3867
  gadgetName: call.gadgetName,
@@ -3610,6 +3992,10 @@ var init_stream_processor = __esm({
3610
3992
  });
3611
3993
  }
3612
3994
  await this.runObserversInParallel(completeObservers);
3995
+ this.completedResults.set(result.invocationId, result);
3996
+ if (result.error) {
3997
+ this.failedInvocations.add(result.invocationId);
3998
+ }
3613
3999
  events.push({ type: "gadget_result", result });
3614
4000
  if (result.error) {
3615
4001
  const errorType = this.determineErrorType(call, result);
@@ -3625,6 +4011,162 @@ var init_stream_processor = __esm({
3625
4011
  }
3626
4012
  return events;
3627
4013
  }
4014
+ /**
4015
+ * Handle a gadget that cannot execute because a dependency failed.
4016
+ * Calls the onDependencySkipped controller to allow customization.
4017
+ */
4018
+ async handleFailedDependency(call, failedDep) {
4019
+ const events = [];
4020
+ const depResult = this.completedResults.get(failedDep);
4021
+ const depError = depResult?.error ?? "Dependency failed";
4022
+ let action = { action: "skip" };
4023
+ if (this.hooks.controllers?.onDependencySkipped) {
4024
+ const context = {
4025
+ iteration: this.iteration,
4026
+ gadgetName: call.gadgetName,
4027
+ invocationId: call.invocationId,
4028
+ parameters: call.parameters ?? {},
4029
+ failedDependency: failedDep,
4030
+ failedDependencyError: depError,
4031
+ logger: this.logger
4032
+ };
4033
+ action = await this.hooks.controllers.onDependencySkipped(context);
4034
+ }
4035
+ if (action.action === "skip") {
4036
+ this.failedInvocations.add(call.invocationId);
4037
+ const skipEvent = {
4038
+ type: "gadget_skipped",
4039
+ gadgetName: call.gadgetName,
4040
+ invocationId: call.invocationId,
4041
+ parameters: call.parameters ?? {},
4042
+ failedDependency: failedDep,
4043
+ failedDependencyError: depError
4044
+ };
4045
+ events.push(skipEvent);
4046
+ if (this.hooks.observers?.onGadgetSkipped) {
4047
+ const observeContext = {
4048
+ iteration: this.iteration,
4049
+ gadgetName: call.gadgetName,
4050
+ invocationId: call.invocationId,
4051
+ parameters: call.parameters ?? {},
4052
+ failedDependency: failedDep,
4053
+ failedDependencyError: depError,
4054
+ logger: this.logger
4055
+ };
4056
+ await this.safeObserve(() => this.hooks.observers.onGadgetSkipped(observeContext));
4057
+ }
4058
+ this.logger.info("Gadget skipped due to failed dependency", {
4059
+ gadgetName: call.gadgetName,
4060
+ invocationId: call.invocationId,
4061
+ failedDependency: failedDep
4062
+ });
4063
+ } else if (action.action === "execute_anyway") {
4064
+ this.logger.info("Executing gadget despite failed dependency (controller override)", {
4065
+ gadgetName: call.gadgetName,
4066
+ invocationId: call.invocationId,
4067
+ failedDependency: failedDep
4068
+ });
4069
+ const executeEvents = await this.executeGadgetWithHooks(call);
4070
+ events.push(...executeEvents);
4071
+ } else if (action.action === "use_fallback") {
4072
+ const fallbackResult = {
4073
+ gadgetName: call.gadgetName,
4074
+ invocationId: call.invocationId,
4075
+ parameters: call.parameters ?? {},
4076
+ result: action.fallbackResult,
4077
+ executionTimeMs: 0
4078
+ };
4079
+ this.completedResults.set(call.invocationId, fallbackResult);
4080
+ events.push({ type: "gadget_result", result: fallbackResult });
4081
+ this.logger.info("Using fallback result for gadget with failed dependency", {
4082
+ gadgetName: call.gadgetName,
4083
+ invocationId: call.invocationId,
4084
+ failedDependency: failedDep
4085
+ });
4086
+ }
4087
+ return events;
4088
+ }
4089
+ /**
4090
+ * Process pending gadgets whose dependencies are now satisfied.
4091
+ * Executes ready gadgets in parallel and continues until no more can be triggered.
4092
+ */
4093
+ async processPendingGadgets() {
4094
+ const events = [];
4095
+ let progress = true;
4096
+ while (progress && this.pendingGadgets.size > 0) {
4097
+ progress = false;
4098
+ const readyToExecute = [];
4099
+ const readyToSkip = [];
4100
+ for (const [invocationId, call] of this.pendingGadgets) {
4101
+ const failedDep = call.dependencies.find((dep) => this.failedInvocations.has(dep));
4102
+ if (failedDep) {
4103
+ readyToSkip.push({ call, failedDep });
4104
+ continue;
4105
+ }
4106
+ const allSatisfied = call.dependencies.every((dep) => this.completedResults.has(dep));
4107
+ if (allSatisfied) {
4108
+ readyToExecute.push(call);
4109
+ }
4110
+ }
4111
+ for (const { call, failedDep } of readyToSkip) {
4112
+ this.pendingGadgets.delete(call.invocationId);
4113
+ const skipEvents = await this.handleFailedDependency(call, failedDep);
4114
+ events.push(...skipEvents);
4115
+ progress = true;
4116
+ }
4117
+ if (readyToExecute.length > 0) {
4118
+ this.logger.debug("Executing ready gadgets in parallel", {
4119
+ count: readyToExecute.length,
4120
+ invocationIds: readyToExecute.map((c) => c.invocationId)
4121
+ });
4122
+ for (const call of readyToExecute) {
4123
+ this.pendingGadgets.delete(call.invocationId);
4124
+ }
4125
+ const executePromises = readyToExecute.map((call) => this.executeGadgetWithHooks(call));
4126
+ const results = await Promise.all(executePromises);
4127
+ for (const executeEvents of results) {
4128
+ events.push(...executeEvents);
4129
+ }
4130
+ progress = true;
4131
+ }
4132
+ }
4133
+ if (this.pendingGadgets.size > 0) {
4134
+ const pendingIds = new Set(this.pendingGadgets.keys());
4135
+ for (const [invocationId, call] of this.pendingGadgets) {
4136
+ const missingDeps = call.dependencies.filter((dep) => !this.completedResults.has(dep));
4137
+ const circularDeps = missingDeps.filter((dep) => pendingIds.has(dep));
4138
+ const trulyMissingDeps = missingDeps.filter((dep) => !pendingIds.has(dep));
4139
+ let errorMessage;
4140
+ let logLevel = "warn";
4141
+ if (circularDeps.length > 0 && trulyMissingDeps.length > 0) {
4142
+ errorMessage = `Dependencies unresolvable: circular=[${circularDeps.join(", ")}], missing=[${trulyMissingDeps.join(", ")}]`;
4143
+ logLevel = "error";
4144
+ } else if (circularDeps.length > 0) {
4145
+ errorMessage = `Circular dependency detected: "${invocationId}" depends on "${circularDeps[0]}" which also depends on "${invocationId}" (directly or indirectly)`;
4146
+ } else {
4147
+ errorMessage = `Dependency "${missingDeps[0]}" was never executed - check that the invocation ID exists and is spelled correctly`;
4148
+ }
4149
+ this.logger[logLevel]("Gadget has unresolvable dependencies", {
4150
+ gadgetName: call.gadgetName,
4151
+ invocationId,
4152
+ circularDependencies: circularDeps,
4153
+ missingDependencies: trulyMissingDeps
4154
+ });
4155
+ this.failedInvocations.add(invocationId);
4156
+ const skipEvent = {
4157
+ type: "gadget_skipped",
4158
+ gadgetName: call.gadgetName,
4159
+ invocationId,
4160
+ parameters: call.parameters ?? {},
4161
+ failedDependency: missingDeps[0],
4162
+ failedDependencyError: errorMessage
4163
+ };
4164
+ events.push(skipEvent);
4165
+ }
4166
+ this.pendingGadgets.clear();
4167
+ }
4168
+ return events;
4169
+ }
3628
4170
  /**
3629
4171
  * Safely execute an observer, catching and logging any errors.
3630
4172
  * Observers are non-critical, so errors are logged but don't crash the system.
@@ -4062,9 +4604,9 @@ var init_agent = __esm({
4062
4604
  if (msg.role === "user") {
4063
4605
  this.conversation.addUserMessage(msg.content);
4064
4606
  } else if (msg.role === "assistant") {
4065
- this.conversation.addAssistantMessage(msg.content);
4607
+ this.conversation.addAssistantMessage(extractText(msg.content));
4066
4608
  } else if (msg.role === "system") {
4067
- this.conversation.addUserMessage(`[System] ${msg.content}`);
4609
+ this.conversation.addUserMessage(`[System] ${extractText(msg.content)}`);
4068
4610
  }
4069
4611
  }
4070
4612
  }
@@ -4284,6 +4826,7 @@ var init_builder = __esm({
4284
4826
  "src/agent/builder.ts"() {
4285
4827
  "use strict";
4286
4828
  init_constants();
4829
+ init_input_content();
4287
4830
  init_model_shortcuts();
4288
4831
  init_registry();
4289
4832
  init_agent();
@@ -4931,13 +5474,17 @@ ${endPrefix}`
4931
5474
  * }
4932
5475
  * ```
4933
5476
  */
4934
- ask(userPrompt) {
5477
+ /**
5478
+ * Build AgentOptions with the given user prompt.
5479
+ * Centralizes options construction for ask(), askWithImage(), and askWithContent().
5480
+ */
5481
+ buildAgentOptions(userPrompt) {
4935
5482
  if (!this.client) {
4936
5483
  const { LLMist: LLMistClass } = (init_client(), __toCommonJS(client_exports));
4937
5484
  this.client = new LLMistClass();
4938
5485
  }
4939
5486
  const registry = GadgetRegistry.from(this.gadgets);
4940
- const options = {
5487
+ return {
4941
5488
  client: this.client,
4942
5489
  model: this.model ?? "openai:gpt-5-nano",
4943
5490
  systemPrompt: this.systemPrompt,
@@ -4963,6 +5510,83 @@ ${endPrefix}`
4963
5510
  compactionConfig: this.compactionConfig,
4964
5511
  signal: this.signal
4965
5512
  };
5513
+ }
5514
+ ask(userPrompt) {
5515
+ const options = this.buildAgentOptions(userPrompt);
5516
+ return new Agent(AGENT_INTERNAL_KEY, options);
5517
+ }
5518
+ /**
5519
+ * Build and create the agent with a multimodal user prompt (text + image).
5520
+ * Returns the Agent instance ready to run.
5521
+ *
5522
+ * @param textPrompt - Text prompt describing what to do with the image
5523
+ * @param imageData - Image data (Buffer, Uint8Array, or base64 string)
5524
+ * @param mimeType - Optional MIME type (auto-detected if not provided)
5525
+ * @returns Configured Agent instance
5526
+ *
5527
+ * @example
5528
+ * ```typescript
5529
+ * const agent = LLMist.createAgent()
5530
+ * .withModel("gpt-4o")
5531
+ * .withSystem("You analyze images")
5532
+ * .askWithImage(
5533
+ * "What's in this image?",
5534
+ * await fs.readFile("photo.jpg")
5535
+ * );
5536
+ *
5537
+ * for await (const event of agent.run()) {
5538
+ * // handle events
5539
+ * }
5540
+ * ```
5541
+ */
5542
+ askWithImage(textPrompt, imageData, mimeType) {
5543
+ const imageBuffer = typeof imageData === "string" ? Buffer.from(imageData, "base64") : imageData;
5544
+ const detectedMime = mimeType ?? detectImageMimeType(imageBuffer);
5545
+ if (!detectedMime) {
5546
+ throw new Error(
5547
+ "Could not detect image MIME type. Please provide the mimeType parameter explicitly."
5548
+ );
5549
+ }
5550
+ const userContent = [
5551
+ text(textPrompt),
5552
+ {
5553
+ type: "image",
5554
+ source: {
5555
+ type: "base64",
5556
+ mediaType: detectedMime,
5557
+ data: toBase64(imageBuffer)
5558
+ }
5559
+ }
5560
+ ];
5561
+ const options = this.buildAgentOptions(userContent);
5562
+ return new Agent(AGENT_INTERNAL_KEY, options);
5563
+ }
5564
+ /**
5565
+ * Build and return an Agent configured with multimodal content.
5566
+ * More flexible than askWithImage - accepts any combination of content parts.
5567
+ *
5568
+ * @param content - Array of content parts (text, images, audio)
5569
+ * @returns A configured Agent ready for execution
5570
+ *
5571
+ * @example
5572
+ * ```typescript
5573
+ * import { text, imageFromBuffer, audioFromBuffer } from "llmist";
5574
+ *
5575
+ * const agent = LLMist.createAgent()
5576
+ * .withModel("gemini:gemini-2.5-flash")
5577
+ * .askWithContent([
5578
+ * text("Describe this image and transcribe the audio:"),
5579
+ * imageFromBuffer(imageData),
5580
+ * audioFromBuffer(audioData),
5581
+ * ]);
5582
+ *
5583
+ * for await (const event of agent.run()) {
5584
+ * // handle events
5585
+ * }
5586
+ * ```
5587
+ */
5588
+ askWithContent(content) {
5589
+ const options = this.buildAgentOptions(content);
4966
5590
  return new Agent(AGENT_INTERNAL_KEY, options);
4967
5591
  }
4968
5592
  /**
@@ -5438,6 +6062,7 @@ var AnthropicMessagesProvider;
5438
6062
  var init_anthropic = __esm({
5439
6063
  "src/providers/anthropic.ts"() {
5440
6064
  "use strict";
6065
+ init_messages();
5441
6066
  init_anthropic_models();
5442
6067
  init_base_provider();
5443
6068
  init_constants2();
@@ -5476,7 +6101,7 @@ var init_anthropic = __esm({
5476
6101
  const systemMessages = messages.filter((message) => message.role === "system");
5477
6102
  const system = systemMessages.length > 0 ? systemMessages.map((m, index) => ({
5478
6103
  type: "text",
5479
- text: m.content,
6104
+ text: extractText(m.content),
5480
6105
  // Add cache_control to the LAST system message block
5481
6106
  ...index === systemMessages.length - 1 ? { cache_control: { type: "ephemeral" } } : {}
5482
6107
  })) : void 0;
@@ -5489,14 +6114,10 @@ var init_anthropic = __esm({
5489
6114
  );
5490
6115
  const conversation = nonSystemMessages.map((message, index) => ({
5491
6116
  role: message.role,
5492
- content: [
5493
- {
5494
- type: "text",
5495
- text: message.content,
5496
- // Add cache_control to the LAST user message
5497
- ...message.role === "user" && index === lastUserIndex ? { cache_control: { type: "ephemeral" } } : {}
5498
- }
5499
- ]
6117
+ content: this.convertToAnthropicContent(
6118
+ message.content,
6119
+ message.role === "user" && index === lastUserIndex
6120
+ )
5500
6121
  }));
5501
6122
  const defaultMaxTokens = spec?.maxOutputTokens ?? ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS;
5502
6123
  const payload = {
@@ -5510,7 +6131,53 @@ var init_anthropic = __esm({
5510
6131
  stream: true,
5511
6132
  ...options.extra
5512
6133
  };
5513
- return payload;
6134
+ return payload;
6135
+ }
6136
+ /**
6137
+ * Convert llmist content to Anthropic's content block format.
6138
+ * Handles text, images (base64 only), and applies cache_control.
6139
+ */
6140
+ convertToAnthropicContent(content, addCacheControl) {
6141
+ const parts = normalizeContent(content);
6142
+ return parts.map((part, index) => {
6143
+ const isLastPart = index === parts.length - 1;
6144
+ const cacheControl = addCacheControl && isLastPart ? { cache_control: { type: "ephemeral" } } : {};
6145
+ if (part.type === "text") {
6146
+ return {
6147
+ type: "text",
6148
+ text: part.text,
6149
+ ...cacheControl
6150
+ };
6151
+ }
6152
+ if (part.type === "image") {
6153
+ return this.convertImagePart(part, cacheControl);
6154
+ }
6155
+ if (part.type === "audio") {
6156
+ throw new Error(
6157
+ "Anthropic does not support audio input. Use Google Gemini for audio processing."
6158
+ );
6159
+ }
6160
+ throw new Error(`Unsupported content type: ${part.type}`);
6161
+ });
6162
+ }
6163
+ /**
6164
+ * Convert an image content part to Anthropic's image block format.
6165
+ */
6166
+ convertImagePart(part, cacheControl) {
6167
+ if (part.source.type === "url") {
6168
+ throw new Error(
6169
+ "Anthropic does not support image URLs. Please provide base64-encoded image data instead."
6170
+ );
6171
+ }
6172
+ return {
6173
+ type: "image",
6174
+ source: {
6175
+ type: "base64",
6176
+ media_type: part.source.mediaType,
6177
+ data: part.source.data
6178
+ },
6179
+ ...cacheControl
6180
+ };
5514
6181
  }
5515
6182
  async executeStreamRequest(payload, signal) {
5516
6183
  const client = this.client;
@@ -5594,17 +6261,12 @@ var init_anthropic = __esm({
5594
6261
  async countTokens(messages, descriptor, _spec) {
5595
6262
  const client = this.client;
5596
6263
  const systemMessages = messages.filter((message) => message.role === "system");
5597
- const system = systemMessages.length > 0 ? systemMessages.map((m) => m.content).join("\n\n") : void 0;
6264
+ const system = systemMessages.length > 0 ? systemMessages.map((m) => extractText(m.content)).join("\n\n") : void 0;
5598
6265
  const conversation = messages.filter(
5599
6266
  (message) => message.role !== "system"
5600
6267
  ).map((message) => ({
5601
6268
  role: message.role,
5602
- content: [
5603
- {
5604
- type: "text",
5605
- text: message.content
5606
- }
5607
- ]
6269
+ content: this.convertToAnthropicContent(message.content, false)
5608
6270
  }));
5609
6271
  try {
5610
6272
  const response = await client.messages.countTokens({
@@ -5618,8 +6280,19 @@ var init_anthropic = __esm({
5618
6280
  `Token counting failed for ${descriptor.name}, using fallback estimation:`,
5619
6281
  error
5620
6282
  );
5621
- const totalChars = messages.reduce((sum, msg) => sum + (msg.content?.length ?? 0), 0);
5622
- return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN);
6283
+ let totalChars = 0;
6284
+ let imageCount = 0;
6285
+ for (const msg of messages) {
6286
+ const parts = normalizeContent(msg.content);
6287
+ for (const part of parts) {
6288
+ if (part.type === "text") {
6289
+ totalChars += part.text.length;
6290
+ } else if (part.type === "image") {
6291
+ imageCount++;
6292
+ }
6293
+ }
6294
+ }
6295
+ return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN) + imageCount * 1e3;
5623
6296
  }
5624
6297
  }
5625
6298
  };
@@ -6148,6 +6821,7 @@ var GEMINI_ROLE_MAP, GeminiGenerativeProvider;
6148
6821
  var init_gemini = __esm({
6149
6822
  "src/providers/gemini.ts"() {
6150
6823
  "use strict";
6824
+ init_messages();
6151
6825
  init_base_provider();
6152
6826
  init_constants2();
6153
6827
  init_gemini_image_models();
@@ -6317,7 +6991,7 @@ var init_gemini = __esm({
6317
6991
  };
6318
6992
  return {
6319
6993
  model: descriptor.name,
6320
- contents: this.convertContentsForNewSDK(contents),
6994
+ contents,
6321
6995
  config
6322
6996
  };
6323
6997
  }
@@ -6352,18 +7026,25 @@ var init_gemini = __esm({
6352
7026
  if (message.role === "system") {
6353
7027
  expandedMessages.push({
6354
7028
  role: "user",
6355
- content: message.content
7029
+ content: extractText(message.content)
6356
7030
  });
6357
7031
  expandedMessages.push({
6358
7032
  role: "assistant",
6359
7033
  content: "Understood."
6360
7034
  });
6361
7035
  } else {
6362
- expandedMessages.push(message);
7036
+ expandedMessages.push({
7037
+ role: message.role,
7038
+ content: message.content
7039
+ });
6363
7040
  }
6364
7041
  }
6365
7042
  return this.mergeConsecutiveMessages(expandedMessages);
6366
7043
  }
7044
+ /**
7045
+ * Merge consecutive messages with the same role (required by Gemini).
7046
+ * Handles multimodal content by converting to Gemini's part format.
7047
+ */
6367
7048
  mergeConsecutiveMessages(messages) {
6368
7049
  if (messages.length === 0) {
6369
7050
  return [];
@@ -6372,15 +7053,16 @@ var init_gemini = __esm({
6372
7053
  let currentGroup = null;
6373
7054
  for (const message of messages) {
6374
7055
  const geminiRole = GEMINI_ROLE_MAP[message.role];
7056
+ const geminiParts = this.convertToGeminiParts(message.content);
6375
7057
  if (currentGroup && currentGroup.role === geminiRole) {
6376
- currentGroup.parts.push({ text: message.content });
7058
+ currentGroup.parts.push(...geminiParts);
6377
7059
  } else {
6378
7060
  if (currentGroup) {
6379
7061
  result.push(currentGroup);
6380
7062
  }
6381
7063
  currentGroup = {
6382
7064
  role: geminiRole,
6383
- parts: [{ text: message.content }]
7065
+ parts: geminiParts
6384
7066
  };
6385
7067
  }
6386
7068
  }
@@ -6389,11 +7071,39 @@ var init_gemini = __esm({
6389
7071
  }
6390
7072
  return result;
6391
7073
  }
6392
- convertContentsForNewSDK(contents) {
6393
- return contents.map((content) => ({
6394
- role: content.role,
6395
- parts: content.parts.map((part) => ({ text: part.text }))
6396
- }));
7074
+ /**
7075
+ * Convert llmist content to Gemini's part format.
7076
+ * Handles text, images, and audio (Gemini supports all three).
7077
+ */
7078
+ convertToGeminiParts(content) {
7079
+ const parts = normalizeContent(content);
7080
+ return parts.map((part) => {
7081
+ if (part.type === "text") {
7082
+ return { text: part.text };
7083
+ }
7084
+ if (part.type === "image") {
7085
+ if (part.source.type === "url") {
7086
+ throw new Error(
7087
+ "Gemini does not support image URLs directly. Please provide base64-encoded image data."
7088
+ );
7089
+ }
7090
+ return {
7091
+ inlineData: {
7092
+ mimeType: part.source.mediaType,
7093
+ data: part.source.data
7094
+ }
7095
+ };
7096
+ }
7097
+ if (part.type === "audio") {
7098
+ return {
7099
+ inlineData: {
7100
+ mimeType: part.source.mediaType,
7101
+ data: part.source.data
7102
+ }
7103
+ };
7104
+ }
7105
+ throw new Error(`Unsupported content type: ${part.type}`);
7106
+ });
6397
7107
  }
6398
7108
  buildGenerationConfig(options) {
6399
7109
  const config = {};
@@ -6414,9 +7124,9 @@ var init_gemini = __esm({
6414
7124
  async *wrapStream(iterable) {
6415
7125
  const stream2 = iterable;
6416
7126
  for await (const chunk of stream2) {
6417
- const text = this.extractText(chunk);
6418
- if (text) {
6419
- yield { text, rawEvent: chunk };
7127
+ const text3 = this.extractText(chunk);
7128
+ if (text3) {
7129
+ yield { text: text3, rawEvent: chunk };
6420
7130
  }
6421
7131
  const finishReason = this.extractFinishReason(chunk);
6422
7132
  const usage = this.extractUsage(chunk);
@@ -6477,7 +7187,7 @@ var init_gemini = __esm({
6477
7187
  try {
6478
7188
  const response = await client.models.countTokens({
6479
7189
  model: descriptor.name,
6480
- contents: this.convertContentsForNewSDK(contents)
7190
+ contents
6481
7191
  // Note: systemInstruction not used - it's not supported by countTokens()
6482
7192
  // and would cause a 2100% token counting error
6483
7193
  });
@@ -6487,8 +7197,19 @@ var init_gemini = __esm({
6487
7197
  `Token counting failed for ${descriptor.name}, using fallback estimation:`,
6488
7198
  error
6489
7199
  );
6490
- const totalChars = messages.reduce((sum, msg) => sum + (msg.content?.length ?? 0), 0);
6491
- return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN);
7200
+ let totalChars = 0;
7201
+ let mediaCount = 0;
7202
+ for (const msg of messages) {
7203
+ const parts = normalizeContent(msg.content);
7204
+ for (const part of parts) {
7205
+ if (part.type === "text") {
7206
+ totalChars += part.text.length;
7207
+ } else if (part.type === "image" || part.type === "audio") {
7208
+ mediaCount++;
7209
+ }
7210
+ }
7211
+ }
7212
+ return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN) + mediaCount * 258;
6492
7213
  }
6493
7214
  }
6494
7215
  };
@@ -7131,6 +7852,7 @@ var ROLE_MAP, OpenAIChatProvider;
7131
7852
  var init_openai = __esm({
7132
7853
  "src/providers/openai.ts"() {
7133
7854
  "use strict";
7855
+ init_messages();
7134
7856
  init_base_provider();
7135
7857
  init_constants2();
7136
7858
  init_openai_image_models();
@@ -7238,11 +7960,7 @@ var init_openai = __esm({
7238
7960
  const sanitizedExtra = sanitizeExtra(extra, shouldIncludeTemperature);
7239
7961
  return {
7240
7962
  model: descriptor.name,
7241
- messages: messages.map((message) => ({
7242
- role: ROLE_MAP[message.role],
7243
- content: message.content,
7244
- name: message.name
7245
- })),
7963
+ messages: messages.map((message) => this.convertToOpenAIMessage(message)),
7246
7964
  // Only set max_completion_tokens if explicitly provided
7247
7965
  // Otherwise let the API use "as much as fits" in the context window
7248
7966
  ...maxTokens !== void 0 ? { max_completion_tokens: maxTokens } : {},
@@ -7254,6 +7972,77 @@ var init_openai = __esm({
7254
7972
  ...shouldIncludeTemperature ? { temperature } : {}
7255
7973
  };
7256
7974
  }
7975
+ /**
7976
+ * Convert an LLMMessage to OpenAI's ChatCompletionMessageParam.
7977
+ * Handles role-specific content type requirements:
7978
+ * - system/assistant: string content only
7979
+ * - user: string or multimodal array content
7980
+ */
7981
+ convertToOpenAIMessage(message) {
7982
+ const role = ROLE_MAP[message.role];
7983
+ if (role === "user") {
7984
+ const content = this.convertToOpenAIContent(message.content);
7985
+ return {
7986
+ role: "user",
7987
+ content,
7988
+ ...message.name ? { name: message.name } : {}
7989
+ };
7990
+ }
7991
+ const textContent = typeof message.content === "string" ? message.content : extractText(message.content);
7992
+ if (role === "system") {
7993
+ return {
7994
+ role: "system",
7995
+ content: textContent,
7996
+ ...message.name ? { name: message.name } : {}
7997
+ };
7998
+ }
7999
+ return {
8000
+ role: "assistant",
8001
+ content: textContent,
8002
+ ...message.name ? { name: message.name } : {}
8003
+ };
8004
+ }
8005
+ /**
8006
+ * Convert llmist content to OpenAI's content format.
8007
+ * Optimizes by returning string for text-only content, array for multimodal.
8008
+ */
8009
+ convertToOpenAIContent(content) {
8010
+ if (typeof content === "string") {
8011
+ return content;
8012
+ }
8013
+ return content.map((part) => {
8014
+ if (part.type === "text") {
8015
+ return { type: "text", text: part.text };
8016
+ }
8017
+ if (part.type === "image") {
8018
+ return this.convertImagePart(part);
8019
+ }
8020
+ if (part.type === "audio") {
8021
+ throw new Error(
8022
+ "OpenAI chat completions do not support audio input. Use Whisper for transcription or Gemini for audio understanding."
8023
+ );
8024
+ }
8025
+ throw new Error(`Unsupported content type: ${part.type}`);
8026
+ });
8027
+ }
8028
+ /**
8029
+ * Convert an image content part to OpenAI's image_url format.
8030
+ * Supports both URLs and base64 data URLs.
8031
+ */
8032
+ convertImagePart(part) {
8033
+ if (part.source.type === "url") {
8034
+ return {
8035
+ type: "image_url",
8036
+ image_url: { url: part.source.url }
8037
+ };
8038
+ }
8039
+ return {
8040
+ type: "image_url",
8041
+ image_url: {
8042
+ url: `data:${part.source.mediaType};base64,${part.source.data}`
8043
+ }
8044
+ };
8045
+ }
7257
8046
  async executeStreamRequest(payload, signal) {
7258
8047
  const client = this.client;
7259
8048
  const stream2 = await client.chat.completions.create(payload, signal ? { signal } : void 0);
@@ -7262,9 +8051,9 @@ var init_openai = __esm({
7262
8051
  async *wrapStream(iterable) {
7263
8052
  const stream2 = iterable;
7264
8053
  for await (const chunk of stream2) {
7265
- const text = chunk.choices.map((choice) => choice.delta?.content ?? "").join("");
7266
- if (text) {
7267
- yield { text, rawEvent: chunk };
8054
+ const text3 = chunk.choices.map((choice) => choice.delta?.content ?? "").join("");
8055
+ if (text3) {
8056
+ yield { text: text3, rawEvent: chunk };
7268
8057
  }
7269
8058
  const finishReason = chunk.choices.find((choice) => choice.finish_reason)?.finish_reason;
7270
8059
  const usage = chunk.usage ? {
@@ -7312,17 +8101,26 @@ var init_openai = __esm({
7312
8101
  }
7313
8102
  try {
7314
8103
  let tokenCount = 0;
8104
+ let imageCount = 0;
7315
8105
  for (const message of messages) {
7316
8106
  tokenCount += OPENAI_MESSAGE_OVERHEAD_TOKENS;
7317
8107
  const roleText = ROLE_MAP[message.role];
7318
8108
  tokenCount += encoding.encode(roleText).length;
7319
- tokenCount += encoding.encode(message.content ?? "").length;
8109
+ const textContent = extractText(message.content);
8110
+ tokenCount += encoding.encode(textContent).length;
8111
+ const parts = normalizeContent(message.content);
8112
+ for (const part of parts) {
8113
+ if (part.type === "image") {
8114
+ imageCount++;
8115
+ }
8116
+ }
7320
8117
  if (message.name) {
7321
8118
  tokenCount += encoding.encode(message.name).length;
7322
8119
  tokenCount += OPENAI_NAME_FIELD_OVERHEAD_TOKENS;
7323
8120
  }
7324
8121
  }
7325
8122
  tokenCount += OPENAI_REPLY_PRIMING_TOKENS;
8123
+ tokenCount += imageCount * 765;
7326
8124
  return tokenCount;
7327
8125
  } finally {
7328
8126
  encoding.free();
@@ -7332,8 +8130,19 @@ var init_openai = __esm({
7332
8130
  `Token counting failed for ${descriptor.name}, using fallback estimation:`,
7333
8131
  error
7334
8132
  );
7335
- const totalChars = messages.reduce((sum, msg) => sum + (msg.content?.length ?? 0), 0);
7336
- return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN);
8133
+ let totalChars = 0;
8134
+ let imageCount = 0;
8135
+ for (const msg of messages) {
8136
+ const parts = normalizeContent(msg.content);
8137
+ for (const part of parts) {
8138
+ if (part.type === "text") {
8139
+ totalChars += part.text.length;
8140
+ } else if (part.type === "image") {
8141
+ imageCount++;
8142
+ }
8143
+ }
8144
+ }
8145
+ return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN) + imageCount * 765;
7337
8146
  }
7338
8147
  }
7339
8148
  };
@@ -7756,6 +8565,138 @@ var init_text = __esm({
7756
8565
  }
7757
8566
  });
7758
8567
 
8568
+ // src/core/namespaces/vision.ts
8569
+ var VisionNamespace;
8570
+ var init_vision = __esm({
8571
+ "src/core/namespaces/vision.ts"() {
8572
+ "use strict";
8573
+ init_input_content();
8574
+ init_messages();
8575
+ VisionNamespace = class {
8576
+ constructor(client) {
8577
+ this.client = client;
8578
+ }
8579
+ /**
8580
+ * Build a message builder with the image content attached.
8581
+ * Handles URLs, data URLs, base64 strings, and binary buffers.
8582
+ */
8583
+ buildImageMessage(options) {
8584
+ const builder = new LLMMessageBuilder();
8585
+ if (options.systemPrompt) {
8586
+ builder.addSystem(options.systemPrompt);
8587
+ }
8588
+ if (typeof options.image === "string") {
8589
+ if (options.image.startsWith("http://") || options.image.startsWith("https://")) {
8590
+ builder.addUserWithImageUrl(options.prompt, options.image);
8591
+ } else if (isDataUrl(options.image)) {
8592
+ const parsed = parseDataUrl(options.image);
8593
+ if (!parsed) {
8594
+ throw new Error("Invalid data URL format");
8595
+ }
8596
+ builder.addUserWithImage(
8597
+ options.prompt,
8598
+ parsed.data,
8599
+ parsed.mimeType
8600
+ );
8601
+ } else {
8602
+ const buffer = Buffer.from(options.image, "base64");
8603
+ builder.addUserWithImage(options.prompt, buffer, options.mimeType);
8604
+ }
8605
+ } else {
8606
+ builder.addUserWithImage(options.prompt, options.image, options.mimeType);
8607
+ }
8608
+ return builder;
8609
+ }
8610
+ /**
8611
+ * Stream the response and collect text and usage information.
8612
+ */
8613
+ async streamAndCollect(options, builder) {
8614
+ let response = "";
8615
+ let finalUsage;
8616
+ for await (const chunk of this.client.stream({
8617
+ model: options.model,
8618
+ messages: builder.build(),
8619
+ maxTokens: options.maxTokens,
8620
+ temperature: options.temperature
8621
+ })) {
8622
+ response += chunk.text;
8623
+ if (chunk.usage) {
8624
+ finalUsage = {
8625
+ inputTokens: chunk.usage.inputTokens,
8626
+ outputTokens: chunk.usage.outputTokens,
8627
+ totalTokens: chunk.usage.totalTokens
8628
+ };
8629
+ }
8630
+ }
8631
+ return { text: response.trim(), usage: finalUsage };
8632
+ }
8633
+ /**
8634
+ * Analyze an image with a vision-capable model.
8635
+ * Returns the analysis as a string.
8636
+ *
8637
+ * @param options - Vision analysis options
8638
+ * @returns Promise resolving to the analysis text
8639
+ * @throws Error if the image format is unsupported or model doesn't support vision
8640
+ *
8641
+ * @example
8642
+ * ```typescript
8643
+ * // From file
8644
+ * const result = await llmist.vision.analyze({
8645
+ * model: "gpt-4o",
8646
+ * image: await fs.readFile("photo.jpg"),
8647
+ * prompt: "What's in this image?",
8648
+ * });
8649
+ *
8650
+ * // From URL (OpenAI only)
8651
+ * const result = await llmist.vision.analyze({
8652
+ * model: "gpt-4o",
8653
+ * image: "https://example.com/image.jpg",
8654
+ * prompt: "Describe this image",
8655
+ * });
8656
+ * ```
8657
+ */
8658
+ async analyze(options) {
8659
+ const builder = this.buildImageMessage(options);
8660
+ const { text: text3 } = await this.streamAndCollect(options, builder);
8661
+ return text3;
8662
+ }
8663
+ /**
8664
+ * Analyze an image and return detailed result with usage info.
8665
+ *
8666
+ * @param options - Vision analysis options
8667
+ * @returns Promise resolving to the analysis result with usage info
8668
+ */
8669
+ async analyzeWithUsage(options) {
8670
+ const builder = this.buildImageMessage(options);
8671
+ const { text: text3, usage } = await this.streamAndCollect(options, builder);
8672
+ return {
8673
+ text: text3,
8674
+ model: options.model,
8675
+ usage
8676
+ };
8677
+ }
8678
+ /**
8679
+ * Check if a model supports vision/image input.
8680
+ *
8681
+ * @param modelId - Model ID to check
8682
+ * @returns True if the model supports vision
8683
+ */
8684
+ supportsModel(modelId) {
8685
+ const spec = this.client.modelRegistry.getModelSpec(modelId);
8686
+ return spec?.features?.vision === true;
8687
+ }
8688
+ /**
8689
+ * List all models that support vision.
8690
+ *
8691
+ * @returns Array of model IDs that support vision
8692
+ */
8693
+ listModels() {
8694
+ return this.client.modelRegistry.listModels().filter((spec) => spec.features?.vision === true).map((spec) => spec.modelId);
8695
+ }
8696
+ };
8697
+ }
8698
+ });
8699
+
7759
8700
  // src/core/options.ts
7760
8701
  var ModelIdentifierParser;
7761
8702
  var init_options = __esm({
@@ -7800,6 +8741,7 @@ var init_client = __esm({
7800
8741
  init_image();
7801
8742
  init_speech();
7802
8743
  init_text();
8744
+ init_vision();
7803
8745
  init_options();
7804
8746
  init_quick_methods();
7805
8747
  LLMist = class _LLMist {
@@ -7811,6 +8753,7 @@ var init_client = __esm({
7811
8753
  text;
7812
8754
  image;
7813
8755
  speech;
8756
+ vision;
7814
8757
  constructor(...args) {
7815
8758
  let adapters = [];
7816
8759
  let defaultProvider;
@@ -7861,6 +8804,7 @@ var init_client = __esm({
7861
8804
  this.text = new TextNamespace(this);
7862
8805
  this.image = new ImageNamespace(this.adapters, this.defaultProvider);
7863
8806
  this.speech = new SpeechNamespace(this.adapters, this.defaultProvider);
8807
+ this.vision = new VisionNamespace(this);
7864
8808
  }
7865
8809
  stream(options) {
7866
8810
  const descriptor = this.parser.parse(options.model);
@@ -8282,9 +9226,9 @@ function sleep(ms) {
8282
9226
  function generateInvocationId() {
8283
9227
  return `inv-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`;
8284
9228
  }
8285
- function splitIntoChunks(text, minChunkSize = 5, maxChunkSize = 30) {
9229
+ function splitIntoChunks(text3, minChunkSize = 5, maxChunkSize = 30) {
8286
9230
  const chunks = [];
8287
- let remaining = text;
9231
+ let remaining = text3;
8288
9232
  while (remaining.length > 0) {
8289
9233
  const chunkSize = Math.min(
8290
9234
  Math.floor(Math.random() * (maxChunkSize - minChunkSize + 1)) + minChunkSize,
@@ -8343,17 +9287,17 @@ ${String(value)}
8343
9287
  return result;
8344
9288
  }
8345
9289
  function formatGadgetCalls(gadgetCalls) {
8346
- let text = "";
9290
+ let text3 = "";
8347
9291
  const calls = [];
8348
9292
  for (const call of gadgetCalls) {
8349
9293
  const invocationId = call.invocationId ?? generateInvocationId();
8350
9294
  calls.push({ name: call.gadgetName, invocationId });
8351
9295
  const blockParams = serializeToBlockFormat(call.parameters);
8352
- text += `
9296
+ text3 += `
8353
9297
  ${GADGET_START_PREFIX}${call.gadgetName}
8354
9298
  ${blockParams}${GADGET_END_PREFIX}`;
8355
9299
  }
8356
- return { text, calls };
9300
+ return { text: text3, calls };
8357
9301
  }
8358
9302
  async function* createMockStream(response) {
8359
9303
  if (response.delayMs) {
@@ -8393,9 +9337,9 @@ async function* createMockStream(response) {
8393
9337
  };
8394
9338
  }
8395
9339
  }
8396
- function createTextMockStream(text, options) {
9340
+ function createTextMockStream(text3, options) {
8397
9341
  return createMockStream({
8398
- text,
9342
+ text: text3,
8399
9343
  delayMs: options?.delayMs,
8400
9344
  streamDelayMs: options?.streamDelayMs,
8401
9345
  usage: options?.usage,
@@ -8412,10 +9356,10 @@ var MockProviderAdapter = class {
8412
9356
  constructor(options) {
8413
9357
  this.mockManager = getMockManager(options);
8414
9358
  }
8415
- supports(descriptor) {
9359
+ supports(_descriptor) {
8416
9360
  return true;
8417
9361
  }
8418
- stream(options, descriptor, spec) {
9362
+ stream(options, descriptor, _spec) {
8419
9363
  const context = {
8420
9364
  model: options.model,
8421
9365
  provider: descriptor.provider,
@@ -8426,20 +9370,154 @@ var MockProviderAdapter = class {
8426
9370
  return this.createMockStreamFromContext(context);
8427
9371
  }
8428
9372
  async *createMockStreamFromContext(context) {
8429
- try {
8430
- const mockResponse = await this.mockManager.findMatch(context);
8431
- if (!mockResponse) {
8432
- yield {
8433
- text: "",
8434
- finishReason: "stop",
8435
- usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 }
8436
- };
8437
- return;
8438
- }
8439
- yield* createMockStream(mockResponse);
8440
- } catch (error) {
8441
- throw error;
9373
+ const mockResponse = await this.mockManager.findMatch(context);
9374
+ if (!mockResponse) {
9375
+ yield {
9376
+ text: "",
9377
+ finishReason: "stop",
9378
+ usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 }
9379
+ };
9380
+ return;
9381
+ }
9382
+ yield* createMockStream(mockResponse);
9383
+ }
9384
+ // ==========================================================================
9385
+ // Image Generation Support
9386
+ // ==========================================================================
9387
+ /**
9388
+ * Check if this adapter supports image generation for a given model.
9389
+ * Returns true if there's a registered mock with images for this model.
9390
+ */
9391
+ supportsImageGeneration(_modelId) {
9392
+ return true;
9393
+ }
9394
+ /**
9395
+ * Generate mock images based on registered mocks.
9396
+ *
9397
+ * @param options - Image generation options
9398
+ * @returns Mock image generation result
9399
+ */
9400
+ async generateImage(options) {
9401
+ const context = {
9402
+ model: options.model,
9403
+ provider: "mock",
9404
+ modelName: options.model,
9405
+ options: {
9406
+ model: options.model,
9407
+ messages: [{ role: "user", content: options.prompt }]
9408
+ },
9409
+ messages: [{ role: "user", content: options.prompt }]
9410
+ };
9411
+ const mockResponse = await this.mockManager.findMatch(context);
9412
+ if (!mockResponse?.images || mockResponse.images.length === 0) {
9413
+ throw new Error(
9414
+ `No mock registered for image generation with model "${options.model}". Use mockLLM().forModel("${options.model}").returnsImage(...).register() to add one.`
9415
+ );
9416
+ }
9417
+ return this.createImageResult(options, mockResponse);
9418
+ }
9419
+ /**
9420
+ * Transform mock response into ImageGenerationResult format.
9421
+ *
9422
+ * @param options - Original image generation options
9423
+ * @param mockResponse - Mock response containing image data
9424
+ * @returns ImageGenerationResult with mock data and zero cost
9425
+ */
9426
+ createImageResult(options, mockResponse) {
9427
+ const images = mockResponse.images ?? [];
9428
+ return {
9429
+ images: images.map((img) => ({
9430
+ b64Json: img.data,
9431
+ revisedPrompt: img.revisedPrompt
9432
+ })),
9433
+ model: options.model,
9434
+ usage: {
9435
+ imagesGenerated: images.length,
9436
+ size: options.size ?? "1024x1024",
9437
+ quality: options.quality ?? "standard"
9438
+ },
9439
+ cost: 0
9440
+ // Mock cost is always 0
9441
+ };
9442
+ }
9443
+ // ==========================================================================
9444
+ // Speech Generation Support
9445
+ // ==========================================================================
9446
+ /**
9447
+ * Check if this adapter supports speech generation for a given model.
9448
+ * Returns true if there's a registered mock with audio for this model.
9449
+ */
9450
+ supportsSpeechGeneration(_modelId) {
9451
+ return true;
9452
+ }
9453
+ /**
9454
+ * Generate mock speech based on registered mocks.
9455
+ *
9456
+ * @param options - Speech generation options
9457
+ * @returns Mock speech generation result
9458
+ */
9459
+ async generateSpeech(options) {
9460
+ const context = {
9461
+ model: options.model,
9462
+ provider: "mock",
9463
+ modelName: options.model,
9464
+ options: {
9465
+ model: options.model,
9466
+ messages: [{ role: "user", content: options.input }]
9467
+ },
9468
+ messages: [{ role: "user", content: options.input }]
9469
+ };
9470
+ const mockResponse = await this.mockManager.findMatch(context);
9471
+ if (!mockResponse?.audio) {
9472
+ throw new Error(
9473
+ `No mock registered for speech generation with model "${options.model}". Use mockLLM().forModel("${options.model}").returnsAudio(...).register() to add one.`
9474
+ );
9475
+ }
9476
+ return this.createSpeechResult(options, mockResponse);
9477
+ }
9478
+ /**
9479
+ * Transform mock response into SpeechGenerationResult format.
9480
+ * Converts base64 audio data to ArrayBuffer.
9481
+ *
9482
+ * @param options - Original speech generation options
9483
+ * @param mockResponse - Mock response containing audio data
9484
+ * @returns SpeechGenerationResult with mock data and zero cost
9485
+ */
9486
+ createSpeechResult(options, mockResponse) {
9487
+ const audio = mockResponse.audio;
9488
+ const binaryString = atob(audio.data);
9489
+ const bytes = new Uint8Array(binaryString.length);
9490
+ for (let i = 0; i < binaryString.length; i++) {
9491
+ bytes[i] = binaryString.charCodeAt(i);
8442
9492
  }
9493
+ const format = this.mimeTypeToAudioFormat(audio.mimeType);
9494
+ return {
9495
+ audio: bytes.buffer,
9496
+ model: options.model,
9497
+ usage: {
9498
+ characterCount: options.input.length
9499
+ },
9500
+ cost: 0,
9501
+ // Mock cost is always 0
9502
+ format
9503
+ };
9504
+ }
9505
+ /**
9506
+ * Map MIME type to audio format for SpeechGenerationResult.
9507
+ * Defaults to "mp3" for unknown MIME types.
9508
+ *
9509
+ * @param mimeType - Audio MIME type string
9510
+ * @returns Audio format identifier
9511
+ */
9512
+ mimeTypeToAudioFormat(mimeType) {
9513
+ const mapping = {
9514
+ "audio/mp3": "mp3",
9515
+ "audio/mpeg": "mp3",
9516
+ "audio/wav": "wav",
9517
+ "audio/webm": "opus",
9518
+ "audio/ogg": "opus"
9519
+ };
9520
+ return mapping[mimeType] ?? "mp3";
8443
9521
  }
8444
9522
  };
8445
9523
  function createMockAdapter(options) {
@@ -8447,6 +9525,20 @@ function createMockAdapter(options) {
8447
9525
  }
8448
9526
 
8449
9527
  // src/testing/mock-builder.ts
9528
+ init_input_content();
9529
+ init_messages();
9530
+ function hasImageContent(content) {
9531
+ if (typeof content === "string") return false;
9532
+ return content.some((part) => isImagePart(part));
9533
+ }
9534
+ function hasAudioContent(content) {
9535
+ if (typeof content === "string") return false;
9536
+ return content.some((part) => isAudioPart(part));
9537
+ }
9538
+ function countImages(content) {
9539
+ if (typeof content === "string") return 0;
9540
+ return content.filter((part) => isImagePart(part)).length;
9541
+ }
8450
9542
  var MockBuilder = class {
8451
9543
  matchers = [];
8452
9544
  response = {};
@@ -8509,9 +9601,9 @@ var MockBuilder = class {
8509
9601
  * @example
8510
9602
  * mockLLM().whenMessageContains('hello')
8511
9603
  */
8512
- whenMessageContains(text) {
9604
+ whenMessageContains(text3) {
8513
9605
  this.matchers.push(
8514
- (ctx) => ctx.messages.some((msg) => msg.content?.toLowerCase().includes(text.toLowerCase()))
9606
+ (ctx) => ctx.messages.some((msg) => extractText(msg.content).toLowerCase().includes(text3.toLowerCase()))
8515
9607
  );
8516
9608
  return this;
8517
9609
  }
@@ -8521,10 +9613,11 @@ var MockBuilder = class {
8521
9613
  * @example
8522
9614
  * mockLLM().whenLastMessageContains('goodbye')
8523
9615
  */
8524
- whenLastMessageContains(text) {
9616
+ whenLastMessageContains(text3) {
8525
9617
  this.matchers.push((ctx) => {
8526
9618
  const lastMsg = ctx.messages[ctx.messages.length - 1];
8527
- return lastMsg?.content?.toLowerCase().includes(text.toLowerCase()) ?? false;
9619
+ if (!lastMsg) return false;
9620
+ return extractText(lastMsg.content).toLowerCase().includes(text3.toLowerCase());
8528
9621
  });
8529
9622
  return this;
8530
9623
  }
@@ -8535,7 +9628,7 @@ var MockBuilder = class {
8535
9628
  * mockLLM().whenMessageMatches(/calculate \d+/)
8536
9629
  */
8537
9630
  whenMessageMatches(regex) {
8538
- this.matchers.push((ctx) => ctx.messages.some((msg) => regex.test(msg.content ?? "")));
9631
+ this.matchers.push((ctx) => ctx.messages.some((msg) => regex.test(extractText(msg.content))));
8539
9632
  return this;
8540
9633
  }
8541
9634
  /**
@@ -8544,10 +9637,10 @@ var MockBuilder = class {
8544
9637
  * @example
8545
9638
  * mockLLM().whenRoleContains('system', 'You are a helpful assistant')
8546
9639
  */
8547
- whenRoleContains(role, text) {
9640
+ whenRoleContains(role, text3) {
8548
9641
  this.matchers.push(
8549
9642
  (ctx) => ctx.messages.some(
8550
- (msg) => msg.role === role && msg.content?.toLowerCase().includes(text.toLowerCase())
9643
+ (msg) => msg.role === role && extractText(msg.content).toLowerCase().includes(text3.toLowerCase())
8551
9644
  )
8552
9645
  );
8553
9646
  return this;
@@ -8575,6 +9668,43 @@ var MockBuilder = class {
8575
9668
  this.matchers.push(matcher);
8576
9669
  return this;
8577
9670
  }
9671
+ // ==========================================================================
9672
+ // Multimodal Matchers
9673
+ // ==========================================================================
9674
+ /**
9675
+ * Match when any message contains an image.
9676
+ *
9677
+ * @example
9678
+ * mockLLM().whenMessageHasImage().returns("I see an image of a sunset.")
9679
+ */
9680
+ whenMessageHasImage() {
9681
+ this.matchers.push((ctx) => ctx.messages.some((msg) => hasImageContent(msg.content)));
9682
+ return this;
9683
+ }
9684
+ /**
9685
+ * Match when any message contains audio.
9686
+ *
9687
+ * @example
9688
+ * mockLLM().whenMessageHasAudio().returns("I hear music playing.")
9689
+ */
9690
+ whenMessageHasAudio() {
9691
+ this.matchers.push((ctx) => ctx.messages.some((msg) => hasAudioContent(msg.content)));
9692
+ return this;
9693
+ }
9694
+ /**
9695
+ * Match based on the number of images in the last message.
9696
+ *
9697
+ * @example
9698
+ * mockLLM().whenImageCount((n) => n >= 2).returns("Comparing multiple images...")
9699
+ */
9700
+ whenImageCount(predicate) {
9701
+ this.matchers.push((ctx) => {
9702
+ const lastMsg = ctx.messages[ctx.messages.length - 1];
9703
+ if (!lastMsg) return false;
9704
+ return predicate(countImages(lastMsg.content));
9705
+ });
9706
+ return this;
9707
+ }
8578
9708
  /**
8579
9709
  * Set the text response to return.
8580
9710
  * Can be a static string or a function that returns a string dynamically.
@@ -8584,17 +9714,17 @@ var MockBuilder = class {
8584
9714
  * mockLLM().returns(() => `Response at ${Date.now()}`)
8585
9715
  * mockLLM().returns((ctx) => `You said: ${ctx.messages[0]?.content}`)
8586
9716
  */
8587
- returns(text) {
8588
- if (typeof text === "function") {
9717
+ returns(text3) {
9718
+ if (typeof text3 === "function") {
8589
9719
  this.response = async (ctx) => {
8590
- const resolvedText = await Promise.resolve().then(() => text(ctx));
9720
+ const resolvedText = await Promise.resolve().then(() => text3(ctx));
8591
9721
  return { text: resolvedText };
8592
9722
  };
8593
9723
  } else {
8594
9724
  if (typeof this.response === "function") {
8595
9725
  throw new Error("Cannot use returns() after withResponse() with a function");
8596
9726
  }
8597
- this.response.text = text;
9727
+ this.response.text = text3;
8598
9728
  }
8599
9729
  return this;
8600
9730
  }
@@ -8631,6 +9761,112 @@ var MockBuilder = class {
8631
9761
  this.response.gadgetCalls.push({ gadgetName, parameters });
8632
9762
  return this;
8633
9763
  }
9764
+ // ==========================================================================
9765
+ // Multimodal Response Helpers
9766
+ // ==========================================================================
9767
+ /**
9768
+ * Return a single image in the response.
9769
+ * Useful for mocking image generation endpoints.
9770
+ *
9771
+ * @param data - Image data (base64 string or Buffer)
9772
+ * @param mimeType - MIME type (auto-detected if Buffer provided without type)
9773
+ *
9774
+ * @example
9775
+ * mockLLM()
9776
+ * .forModel('dall-e-3')
9777
+ * .returnsImage(pngBuffer)
9778
+ * .register();
9779
+ */
9780
+ returnsImage(data, mimeType) {
9781
+ if (typeof this.response === "function") {
9782
+ throw new Error("Cannot use returnsImage() after withResponse() with a function");
9783
+ }
9784
+ let imageData;
9785
+ let imageMime;
9786
+ if (typeof data === "string") {
9787
+ imageData = data;
9788
+ if (!mimeType) {
9789
+ throw new Error("MIME type is required when providing base64 string data");
9790
+ }
9791
+ imageMime = mimeType;
9792
+ } else {
9793
+ imageData = toBase64(data);
9794
+ const detected = mimeType ?? detectImageMimeType(data);
9795
+ if (!detected) {
9796
+ throw new Error(
9797
+ "Could not detect image MIME type. Please provide the mimeType parameter explicitly."
9798
+ );
9799
+ }
9800
+ imageMime = detected;
9801
+ }
9802
+ if (!this.response.images) {
9803
+ this.response.images = [];
9804
+ }
9805
+ this.response.images.push({ data: imageData, mimeType: imageMime });
9806
+ return this;
9807
+ }
9808
+ /**
9809
+ * Return multiple images in the response.
9810
+ *
9811
+ * @example
9812
+ * mockLLM()
9813
+ * .forModel('dall-e-3')
9814
+ * .returnsImages([
9815
+ * { data: pngBuffer1 },
9816
+ * { data: pngBuffer2 },
9817
+ * ])
9818
+ * .register();
9819
+ */
9820
+ returnsImages(images) {
9821
+ for (const img of images) {
9822
+ this.returnsImage(img.data, img.mimeType);
9823
+ if (img.revisedPrompt && this.response && typeof this.response !== "function") {
9824
+ const lastImage = this.response.images?.[this.response.images.length - 1];
9825
+ if (lastImage) {
9826
+ lastImage.revisedPrompt = img.revisedPrompt;
9827
+ }
9828
+ }
9829
+ }
9830
+ return this;
9831
+ }
9832
+ /**
9833
+ * Return audio data in the response.
9834
+ * Useful for mocking speech synthesis endpoints.
9835
+ *
9836
+ * @param data - Audio data (base64 string or Buffer)
9837
+ * @param mimeType - MIME type (auto-detected if Buffer provided without type)
9838
+ *
9839
+ * @example
9840
+ * mockLLM()
9841
+ * .forModel('tts-1')
9842
+ * .returnsAudio(mp3Buffer)
9843
+ * .register();
9844
+ */
9845
+ returnsAudio(data, mimeType) {
9846
+ if (typeof this.response === "function") {
9847
+ throw new Error("Cannot use returnsAudio() after withResponse() with a function");
9848
+ }
9849
+ let audioData;
9850
+ let audioMime;
9851
+ if (typeof data === "string") {
9852
+ audioData = data;
9853
+ if (!mimeType) {
9854
+ throw new Error("MIME type is required when providing base64 string data");
9855
+ }
9856
+ audioMime = mimeType;
9857
+ } else {
9858
+ audioData = toBase64(data);
9859
+ const detected = mimeType ?? detectAudioMimeType(data);
9860
+ if (!detected) {
9861
+ throw new Error(
9862
+ "Could not detect audio MIME type. Please provide the mimeType parameter explicitly."
9863
+ );
9864
+ }
9865
+ audioMime = detected;
9866
+ }
9867
+ this.response.audio = { data: audioData, mimeType: audioMime };
9868
+ return this;
9869
+ }
8634
9870
  /**
8635
9871
  * Set the complete mock response object.
8636
9872
  * This allows full control over all response properties.
@@ -8961,23 +10197,23 @@ function createTestStream(chunks) {
8961
10197
  }
8962
10198
  }();
8963
10199
  }
8964
- function createTextStream(text, options) {
10200
+ function createTextStream(text3, options) {
8965
10201
  return async function* () {
8966
10202
  if (options?.delayMs) {
8967
10203
  await sleep2(options.delayMs);
8968
10204
  }
8969
- const chunkSize = options?.chunkSize ?? text.length;
10205
+ const chunkSize = options?.chunkSize ?? text3.length;
8970
10206
  const chunks = [];
8971
- for (let i = 0; i < text.length; i += chunkSize) {
8972
- chunks.push(text.slice(i, i + chunkSize));
10207
+ for (let i = 0; i < text3.length; i += chunkSize) {
10208
+ chunks.push(text3.slice(i, i + chunkSize));
8973
10209
  }
8974
10210
  for (let i = 0; i < chunks.length; i++) {
8975
10211
  const isLast = i === chunks.length - 1;
8976
10212
  const chunk = { text: chunks[i] };
8977
10213
  if (isLast) {
8978
10214
  chunk.finishReason = options?.finishReason ?? "stop";
8979
- const inputTokens = Math.ceil(text.length / 4);
8980
- const outputTokens = Math.ceil(text.length / 4);
10215
+ const inputTokens = Math.ceil(text3.length / 4);
10216
+ const outputTokens = Math.ceil(text3.length / 4);
8981
10217
  chunk.usage = options?.usage ?? {
8982
10218
  inputTokens,
8983
10219
  outputTokens,
@@ -8999,11 +10235,11 @@ async function collectStream(stream2) {
8999
10235
  return chunks;
9000
10236
  }
9001
10237
  async function collectStreamText(stream2) {
9002
- let text = "";
10238
+ let text3 = "";
9003
10239
  for await (const chunk of stream2) {
9004
- text += chunk.text ?? "";
10240
+ text3 += chunk.text ?? "";
9005
10241
  }
9006
- return text;
10242
+ return text3;
9007
10243
  }
9008
10244
  async function getStreamFinalChunk(stream2) {
9009
10245
  let lastChunk;
@@ -9385,6 +10621,21 @@ function filterDefinedEnv(env) {
9385
10621
  }
9386
10622
 
9387
10623
  export {
10624
+ isTextPart,
10625
+ isImagePart,
10626
+ isAudioPart,
10627
+ text,
10628
+ imageFromBase64,
10629
+ imageFromUrl,
10630
+ detectImageMimeType,
10631
+ detectAudioMimeType,
10632
+ toBase64,
10633
+ imageFromBuffer,
10634
+ audioFromBase64,
10635
+ audioFromBuffer,
10636
+ isDataUrl,
10637
+ parseDataUrl,
10638
+ init_input_content,
9388
10639
  MODEL_ALIASES,
9389
10640
  resolveModel,
9390
10641
  hasProviderPrefix,
@@ -9401,6 +10652,8 @@ export {
9401
10652
  resolveRulesTemplate,
9402
10653
  resolveHintTemplate,
9403
10654
  init_prompt_config,
10655
+ normalizeContent,
10656
+ extractText,
9404
10657
  LLMMessageBuilder,
9405
10658
  init_messages,
9406
10659
  BreakLoopException,
@@ -9508,4 +10761,4 @@ export {
9508
10761
  MockPromptRecorder,
9509
10762
  waitFor
9510
10763
  };
9511
- //# sourceMappingURL=chunk-6ZDUWO6N.js.map
10764
+ //# sourceMappingURL=chunk-YHS2DYXP.js.map