@agentionai/agents 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -73,6 +73,7 @@ import { ClaudeAgent, OpenAiAgent } from '@agentionai/agents';
73
73
 
74
74
  - **Multi-Provider, No Lock-in** - Claude, OpenAI, Gemini, Mistral—same interface. Switch models with one line.
75
75
  - **Composable, Not Magical** - Agents are objects. Pipelines are arrays. No hidden state, no surprises.
76
+ - **Multimodal / Vision** - Send images alongside text with a unified `MessageContent[]` API across all providers.
76
77
  - **Full Observability** - Per-call token counts, execution timing, pipeline structure visualization.
77
78
  - **TypeScript-Native** - Strict typing, interfaces, and generics from the ground up.
78
79
  - **RAG Ready** - LanceDB vector store, token-aware chunking, ingestion pipeline out of the box.
@@ -175,6 +176,43 @@ const researcher = new ClaudeAgent({
175
176
  const result = await researcher.execute('Latest developments in quantum computing');
176
177
  ```
177
178
 
179
+ ### Multimodal / Vision
180
+
181
+ Send images alongside text using `imageUrl()` or `imageBase64()`. The same `MessageContent[]` interface works across all providers:
182
+
183
+ ```typescript
184
+ import { ClaudeAgent } from '@agentionai/agents/claude';
185
+ import { imageUrl, imageBase64 } from '@agentionai/agents/core';
186
+ import * as fs from 'fs';
187
+
188
+ const agent = new ClaudeAgent({
189
+ apiKey: process.env.ANTHROPIC_API_KEY,
190
+ model: 'claude-opus-4-6',
191
+ name: 'VisionAgent',
192
+ description: 'You analyze images.',
193
+ });
194
+
195
+ // Remote image by URL
196
+ const response = await agent.execute([
197
+ imageUrl('https://example.com/chart.png'),
198
+ { type: 'text', text: 'Summarize this chart in one sentence.' },
199
+ ]);
200
+
201
+ // Local image as base64
202
+ const data = fs.readFileSync('./photo.jpg').toString('base64');
203
+ const response2 = await agent.execute([
204
+ imageBase64(data, 'image/jpeg'),
205
+ { type: 'text', text: 'What plant is this?' },
206
+ ]);
207
+ ```
208
+
209
+ | Provider | URL | Base64 |
210
+ |----------|:---:|:------:|
211
+ | Claude | ✅ | ✅ |
212
+ | OpenAI | ✅ | ✅ |
213
+ | Gemini | ✅ | ✅ |
214
+ | Mistral | ✅ | ❌ |
215
+
178
216
  ## Core Concepts
179
217
 
180
218
  ### Agents
@@ -187,6 +225,11 @@ JSON Schema + handler pattern. Unique capability: wrap any agent as a tool for d
187
225
 
188
226
  [Learn more →](https://docs.agention.ai/guide/tools)
189
227
 
228
+ ### Multimodal / Vision
229
+ Unified `MessageContent[]` interface for images across all providers. URL and base64 images, mix text and images freely in a single call.
230
+
231
+ [Learn more →](https://docs.agention.ai/guide/multimodal)
232
+
190
233
  ### History
191
234
  Provider-agnostic, persistent (Redis, file, custom), shareable across agents of different providers.
192
235
 
@@ -213,6 +256,7 @@ Per-call and per-node token counts, duration metrics, full execution visibility.
213
256
  - **[Quick Start](https://docs.agention.ai/guide/quickstart)** - Build a weather assistant in 5 minutes
214
257
  - **[Agents](https://docs.agention.ai/guide/agents)** - Agent configuration and providers
215
258
  - **[Tools](https://docs.agention.ai/guide/tools)** - Adding capabilities and agent delegation
259
+ - **[Multimodal / Vision](https://docs.agention.ai/guide/multimodal)** - Sending images across all providers
216
260
  - **[Graph Pipelines](https://docs.agention.ai/guide/graph-pipelines)** - Multi-agent workflows
217
261
  - **[Vector Stores](https://docs.agention.ai/guide/vector-stores)** - RAG and semantic search
218
262
  - **[Examples](https://docs.agention.ai/guide/examples)** - Real-world implementations
@@ -1,8 +1,8 @@
1
1
  import EventEmitter from "events";
2
2
  import { Tool } from "../tools/Tool";
3
- import { History, HistoryEntry, MessageRole, MessageContent } from "../history/History";
3
+ import { History, HistoryEntry, MessageRole, MessageContent, ImageMimeType } from "../history/History";
4
4
  import { AgentVendor, CommonAgentConfig, VendorSpecificConfig } from "./AgentConfig";
5
- export type { HistoryEntry, MessageRole, MessageContent };
5
+ export type { HistoryEntry, MessageRole, MessageContent, ImageMimeType };
6
6
  export type { AgentVendor };
7
7
  /**
8
8
  * Agent config as used across all agents
@@ -1,7 +1,7 @@
1
1
  import { Message, Usage } from "@anthropic-ai/sdk/resources";
2
2
  import { type ToolDefinition } from "../../tools/Tool";
3
3
  import { BaseAgent, BaseAgentConfig, TokenUsage } from "../BaseAgent";
4
- import { History } from "../../history/History";
4
+ import { History, MessageContent } from "../../history/History";
5
5
  import { ClaudeModel } from "../model-types";
6
6
  type AgentConfig = BaseAgentConfig & {
7
7
  apiKey: string;
@@ -37,7 +37,7 @@ export declare class ClaudeAgent extends BaseAgent {
37
37
  constructor(config: Omit<AgentConfig, "vendor">, history?: History);
38
38
  protected getToolDefinitions(): ToolDefinition[];
39
39
  protected process(_input: string): Promise<string>;
40
- execute(input: string): Promise<string>;
40
+ execute(input: string | MessageContent[]): Promise<string>;
41
41
  protected handleResponse(response: Message): Promise<string>;
42
42
  private handleToolUse;
43
43
  protected parseUsage(input: Usage): TokenUsage;
@@ -64,16 +64,23 @@ class ClaudeAgent extends BaseAgent_1.BaseAgent {
64
64
  // Reset token usage for this execution
65
65
  this.lastTokenUsage = undefined;
66
66
  this.currentToolCallCount = 0;
67
+ // Normalise input to a display string for viz reporting
68
+ const inputPreview = typeof input === "string" ? input : JSON.stringify(input);
67
69
  // Start visualization reporting
68
70
  if (VizConfig_1.vizConfig.isEnabled()) {
69
- this.vizEventId = VizReporter_1.vizReporter.agentStart(this.id, this.name, this.config.model, "anthropic", input);
71
+ this.vizEventId = VizReporter_1.vizReporter.agentStart(this.id, this.name, this.config.model, "anthropic", inputPreview);
70
72
  }
71
73
  if (this.history.transient) {
72
74
  this.history.clear();
73
75
  // Re-add system message after clear
74
76
  this.addSystemMessage(this.getSystemMessage());
75
77
  }
76
- this.addTextToHistory("user", input);
78
+ if (typeof input === "string") {
79
+ this.addTextToHistory("user", input);
80
+ }
81
+ else {
82
+ this.addMessageToHistory("user", input);
83
+ }
77
84
  try {
78
85
  const messages = transformers_1.anthropicTransformer.toProvider(this.history.getEntries());
79
86
  const systemMessage = this.history.getSystemMessage();
@@ -180,6 +187,7 @@ class ClaudeAgent extends BaseAgent_1.BaseAgent {
180
187
  const messages = transformers_1.anthropicTransformer.toProvider(this.history.getEntries());
181
188
  const newResponse = await this.client.messages.create({
182
189
  model: this.config.model,
190
+ system: this.history.getSystemMessage(),
183
191
  max_tokens: this.config.maxTokens,
184
192
  messages,
185
193
  tools: this.getToolDefinitions(),
@@ -1,6 +1,6 @@
1
1
  import { FunctionDeclarationsTool, GenerateContentResult, Schema } from "@google/generative-ai";
2
2
  import { BaseAgent, BaseAgentConfig, TokenUsage } from "../BaseAgent";
3
- import { History } from "../../history/History";
3
+ import { History, MessageContent } from "../../history/History";
4
4
  import { GeminiModel } from "../model-types";
5
5
  type AgentConfig = BaseAgentConfig & {
6
6
  apiKey: string;
@@ -50,7 +50,7 @@ export declare class GeminiAgent extends BaseAgent {
50
50
  */
51
51
  private mapJsonSchemaTypeToGemini;
52
52
  protected process(_input: string): Promise<string>;
53
- execute(input: string): Promise<string>;
53
+ execute(input: string | MessageContent[]): Promise<string>;
54
54
  protected handleResponse(response: GenerateContentResult): Promise<string>;
55
55
  private handleFunctionCalls;
56
56
  protected parseUsage(input: {
@@ -165,16 +165,22 @@ class GeminiAgent extends BaseAgent_1.BaseAgent {
165
165
  // Reset token usage for this execution
166
166
  this.lastTokenUsage = undefined;
167
167
  this.currentToolCallCount = 0;
168
+ const inputPreview = typeof input === "string" ? input : JSON.stringify(input);
168
169
  // Start visualization reporting
169
170
  if (VizConfig_1.vizConfig.isEnabled()) {
170
- this.vizEventId = VizReporter_1.vizReporter.agentStart(this.id, this.name, this.config.model, "gemini", input);
171
+ this.vizEventId = VizReporter_1.vizReporter.agentStart(this.id, this.name, this.config.model, "gemini", inputPreview);
171
172
  }
172
173
  if (this.history.transient) {
173
174
  this.history.clear();
174
175
  // Re-add system message after clear
175
176
  this.addSystemMessage(this.getSystemMessage());
176
177
  }
177
- this.addTextToHistory("user", input);
178
+ if (typeof input === "string") {
179
+ this.addTextToHistory("user", input);
180
+ }
181
+ else {
182
+ this.addMessageToHistory("user", input);
183
+ }
178
184
  try {
179
185
  const contents = transformers_1.geminiTransformer.toProvider(this.history.getEntries());
180
186
  const systemMessage = this.history.getSystemMessage();
@@ -1,5 +1,5 @@
1
1
  import { BaseAgent, BaseAgentConfig, TokenUsage } from "../BaseAgent";
2
- import { History } from "../../history/History";
2
+ import { History, MessageContent } from "../../history/History";
3
3
  import { ChatCompletionResponse, Tool, UsageInfo } from "@mistralai/mistralai/models/components";
4
4
  import { MistralModel } from "../model-types";
5
5
  type AgentConfig = BaseAgentConfig & {
@@ -38,7 +38,7 @@ export declare class MistralAgent extends BaseAgent {
38
38
  constructor(config: Omit<AgentConfig, "vendor">, history?: History);
39
39
  protected getToolDefinitions(): Tool[];
40
40
  protected process(_input: string): Promise<string>;
41
- execute(input: string): Promise<string>;
41
+ execute(input: string | MessageContent[]): Promise<string>;
42
42
  protected handleResponse(response: ChatCompletionResponse): Promise<string>;
43
43
  private handleToolCalls;
44
44
  protected parseUsage(input: UsageInfo): TokenUsage;
@@ -75,16 +75,22 @@ class MistralAgent extends BaseAgent_1.BaseAgent {
75
75
  // Reset token usage for this execution
76
76
  this.lastTokenUsage = undefined;
77
77
  this.currentToolCallCount = 0;
78
+ const inputPreview = typeof input === "string" ? input : JSON.stringify(input);
78
79
  // Start visualization reporting
79
80
  if (VizConfig_1.vizConfig.isEnabled()) {
80
- this.vizEventId = VizReporter_1.vizReporter.agentStart(this.id, this.name, this.config.model, "mistral", input);
81
+ this.vizEventId = VizReporter_1.vizReporter.agentStart(this.id, this.name, this.config.model, "mistral", inputPreview);
81
82
  }
82
83
  if (this.history.transient) {
83
84
  this.history.clear();
84
85
  // Re-add system message after clear
85
86
  this.addSystemMessage(this.getSystemMessage());
86
87
  }
87
- this.addTextToHistory("user", input);
88
+ if (typeof input === "string") {
89
+ this.addTextToHistory("user", input);
90
+ }
91
+ else {
92
+ this.addMessageToHistory("user", input);
93
+ }
88
94
  try {
89
95
  const messages = transformers_1.mistralTransformer.toProvider(this.history.getEntries());
90
96
  const response = await this.client.chat.complete({
@@ -1,5 +1,5 @@
1
1
  import { BaseAgent, BaseAgentConfig, TokenUsage } from "../BaseAgent";
2
- import { History } from "../../history/History";
2
+ import { History, MessageContent } from "../../history/History";
3
3
  import { Tool, Response, ResponseUsage } from "openai/resources/responses/responses";
4
4
  import { OpenAIModel } from "../model-types";
5
5
  type AgentConfig = BaseAgentConfig & {
@@ -39,7 +39,7 @@ export declare class OpenAiAgent extends BaseAgent {
39
39
  constructor(config: Omit<AgentConfig, "vendor">, history?: History);
40
40
  protected getToolDefinitions(): Tool[];
41
41
  protected process(_input: string): Promise<string>;
42
- execute(input: string): Promise<string>;
42
+ execute(input: string | MessageContent[]): Promise<string>;
43
43
  protected handleResponse(response: Response): Promise<string>;
44
44
  private handleToolUse;
45
45
  protected parseUsage(input: ResponseUsage): TokenUsage;
@@ -86,16 +86,22 @@ class OpenAiAgent extends BaseAgent_1.BaseAgent {
86
86
  // Reset token usage for this execution
87
87
  this.lastTokenUsage = undefined;
88
88
  this.currentToolCallCount = 0;
89
+ const inputPreview = typeof input === "string" ? input : JSON.stringify(input);
89
90
  // Start visualization reporting
90
91
  if (VizConfig_1.vizConfig.isEnabled()) {
91
- this.vizEventId = VizReporter_1.vizReporter.agentStart(this.id, this.name, this.config.model, "openai", input);
92
+ this.vizEventId = VizReporter_1.vizReporter.agentStart(this.id, this.name, this.config.model, "openai", inputPreview);
92
93
  }
93
94
  if (this.history.transient) {
94
95
  this.history.clear();
95
96
  // Re-add system message after clear
96
97
  this.addSystemMessage(this.getSystemMessage());
97
98
  }
98
- this.addTextToHistory("user", input);
99
+ if (typeof input === "string") {
100
+ this.addTextToHistory("user", input);
101
+ }
102
+ else {
103
+ this.addMessageToHistory("user", input);
104
+ }
99
105
  try {
100
106
  const inputMessages = transformers_1.openAiTransformer.toProvider(this.history.getEntries());
101
107
  const response = await this.client.responses.create({
@@ -3,8 +3,8 @@ import { HistoryEntry, MessageRole, MessageContent } from "./types";
3
3
  import type { ReduceOptions } from "./types";
4
4
  /** @internal — exposed for test teardown only */
5
5
  export declare function resetTokenxCache(): void;
6
- export type { HistoryEntry, MessageRole, MessageContent, ReduceOptions } from "./types";
7
- export { text, toolUse, toolResult, textMessage, isTextContent, isToolUseContent, isToolResultContent, } from "./types";
6
+ export type { HistoryEntry, MessageRole, MessageContent, ReduceOptions, ImageMimeType, ImageUrlContent, ImageBase64Content, } from "./types";
7
+ export { text, toolUse, toolResult, textMessage, imageUrl, imageBase64, isTextContent, isToolUseContent, isToolResultContent, isImageUrlContent, isImageBase64Content, isImageContent, } from "./types";
8
8
  /**
9
9
  * Metadata stored alongside each history entry.
10
10
  * Extended with summary tracking fields for the compression plugin.
@@ -36,7 +36,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
36
36
  return (mod && mod.__esModule) ? mod : { "default": mod };
37
37
  };
38
38
  Object.defineProperty(exports, "__esModule", { value: true });
39
- exports.History = exports.isToolResultContent = exports.isToolUseContent = exports.isTextContent = exports.textMessage = exports.toolResult = exports.toolUse = exports.text = void 0;
39
+ exports.History = exports.isImageContent = exports.isImageBase64Content = exports.isImageUrlContent = exports.isToolResultContent = exports.isToolUseContent = exports.isTextContent = exports.imageBase64 = exports.imageUrl = exports.textMessage = exports.toolResult = exports.toolUse = exports.text = void 0;
40
40
  exports.resetTokenxCache = resetTokenxCache;
41
41
  const events_1 = __importDefault(require("events"));
42
42
  const types_1 = require("./types");
@@ -53,14 +53,32 @@ void Promise.resolve().then(() => __importStar(require("tokenx"))).then((mod) =>
53
53
  function resetTokenxCache() {
54
54
  _estimateTokenCount = (t) => Math.ceil(t.length / 4);
55
55
  }
56
+ /**
57
+ * Estimate token count for a content block array.
58
+ * Image blocks use a flat 1000-token estimate (resolution-independent conservative value).
59
+ * Text and tool blocks fall through to the tokenx estimator.
60
+ */
61
+ function estimateContentTokens(content) {
62
+ return content.reduce((sum, block) => {
63
+ if ((0, types_1.isImageContent)(block)) {
64
+ return sum + 1000;
65
+ }
66
+ return sum + _estimateTokenCount(JSON.stringify(block));
67
+ }, 0);
68
+ }
56
69
  var types_2 = require("./types");
57
70
  Object.defineProperty(exports, "text", { enumerable: true, get: function () { return types_2.text; } });
58
71
  Object.defineProperty(exports, "toolUse", { enumerable: true, get: function () { return types_2.toolUse; } });
59
72
  Object.defineProperty(exports, "toolResult", { enumerable: true, get: function () { return types_2.toolResult; } });
60
73
  Object.defineProperty(exports, "textMessage", { enumerable: true, get: function () { return types_2.textMessage; } });
74
+ Object.defineProperty(exports, "imageUrl", { enumerable: true, get: function () { return types_2.imageUrl; } });
75
+ Object.defineProperty(exports, "imageBase64", { enumerable: true, get: function () { return types_2.imageBase64; } });
61
76
  Object.defineProperty(exports, "isTextContent", { enumerable: true, get: function () { return types_2.isTextContent; } });
62
77
  Object.defineProperty(exports, "isToolUseContent", { enumerable: true, get: function () { return types_2.isToolUseContent; } });
63
78
  Object.defineProperty(exports, "isToolResultContent", { enumerable: true, get: function () { return types_2.isToolResultContent; } });
79
+ Object.defineProperty(exports, "isImageUrlContent", { enumerable: true, get: function () { return types_2.isImageUrlContent; } });
80
+ Object.defineProperty(exports, "isImageBase64Content", { enumerable: true, get: function () { return types_2.isImageBase64Content; } });
81
+ Object.defineProperty(exports, "isImageContent", { enumerable: true, get: function () { return types_2.isImageContent; } });
64
82
  /**
65
83
  * Manages conversation history in a provider-agnostic format.
66
84
  *
@@ -148,7 +166,7 @@ class History extends events_1.default {
148
166
  const __metadata = {
149
167
  date: new Date().toISOString(),
150
168
  contentLength,
151
- estimatedTokens: _estimateTokenCount(serialized),
169
+ estimatedTokens: estimateContentTokens(entry.content),
152
170
  };
153
171
  this._entries.push({
154
172
  ...entry,
@@ -40,6 +40,22 @@ exports.anthropicTransformer = {
40
40
  is_error: block.is_error,
41
41
  };
42
42
  }
43
+ if ((0, types_1.isImageUrlContent)(block)) {
44
+ return {
45
+ type: "image",
46
+ source: { type: "url", url: block.url },
47
+ };
48
+ }
49
+ if ((0, types_1.isImageBase64Content)(block)) {
50
+ return {
51
+ type: "image",
52
+ source: {
53
+ type: "base64",
54
+ media_type: block.mimeType,
55
+ data: block.data,
56
+ },
57
+ };
58
+ }
43
59
  throw new Error(`Unknown content type: ${block.type}`);
44
60
  });
45
61
  return { role, content };
@@ -122,11 +138,14 @@ exports.openAiTransformer = {
122
138
  });
123
139
  continue;
124
140
  }
125
- // Separate tool_use from other content for OpenAI format
141
+ // Separate content blocks by type for OpenAI format
126
142
  const textBlocks = entry.content.filter(types_1.isTextContent);
127
143
  const toolUseBlocks = entry.content.filter(types_1.isToolUseContent);
128
144
  const toolResultBlocks = entry.content.filter(types_1.isToolResultContent);
129
- // Add text message if present
145
+ const imageUrlBlocks = entry.content.filter(types_1.isImageUrlContent);
146
+ const imageBase64Blocks = entry.content.filter(types_1.isImageBase64Content);
147
+ const hasImages = imageUrlBlocks.length > 0 || imageBase64Blocks.length > 0;
148
+ // Add text/image message if present
130
149
  if (textBlocks.length > 0 && entry.role !== "user") {
131
150
  items.push({
132
151
  type: "message",
@@ -135,13 +154,42 @@ exports.openAiTransformer = {
135
154
  });
136
155
  }
137
156
  else if (entry.role === "user" &&
138
- textBlocks.length > 0 &&
157
+ (textBlocks.length > 0 || hasImages) &&
139
158
  toolResultBlocks.length === 0) {
140
- items.push({
141
- type: "message",
142
- role: "user",
143
- content: textBlocks.map((c) => c.text).join("\n"),
144
- });
159
+ if (hasImages) {
160
+ // Mixed content: build an array of content parts
161
+ const parts = [];
162
+ for (const block of entry.content) {
163
+ if ((0, types_1.isTextContent)(block)) {
164
+ parts.push({ type: "input_text", text: block.text });
165
+ }
166
+ else if ((0, types_1.isImageUrlContent)(block)) {
167
+ parts.push({
168
+ type: "input_image",
169
+ image_url: block.url,
170
+ ...(block.detail ? { detail: block.detail } : {}),
171
+ });
172
+ }
173
+ else if ((0, types_1.isImageBase64Content)(block)) {
174
+ parts.push({
175
+ type: "input_image",
176
+ image_url: `data:${block.mimeType};base64,${block.data}`,
177
+ });
178
+ }
179
+ }
180
+ items.push({
181
+ type: "message",
182
+ role: "user",
183
+ content: parts,
184
+ });
185
+ }
186
+ else {
187
+ items.push({
188
+ type: "message",
189
+ role: "user",
190
+ content: textBlocks.map((c) => c.text).join("\n"),
191
+ });
192
+ }
145
193
  }
146
194
  // Add tool calls as separate function_call items (OpenAI format)
147
195
  // Convert IDs to OpenAI format if they came from another provider
@@ -243,7 +291,7 @@ exports.mistralTransformer = {
243
291
  messages.push(msg);
244
292
  continue;
245
293
  }
246
- // User role - could be text or tool results
294
+ // User role - could be text, images, or tool results
247
295
  if (toolResultBlocks.length > 0) {
248
296
  // Mistral uses separate "tool" role messages for each result
249
297
  // We need to find the corresponding tool name from the assistant's tool_calls
@@ -258,11 +306,31 @@ exports.mistralTransformer = {
258
306
  });
259
307
  }
260
308
  }
261
- else if (textBlocks.length > 0) {
262
- messages.push({
263
- role: "user",
264
- content: textBlocks.map((c) => c.text).join("\n"),
265
- });
309
+ else {
310
+ const imageUrlBlocks = entry.content.filter(types_1.isImageUrlContent);
311
+ const imageBase64Blocks = entry.content.filter(types_1.isImageBase64Content);
312
+ if (imageBase64Blocks.length > 0) {
313
+ throw new Error("Mistral does not support base64 image inputs. Convert images to URLs before using with MistralAgent.");
314
+ }
315
+ if (imageUrlBlocks.length > 0) {
316
+ // Mistral vision: array content with text + image_url parts
317
+ const parts = [];
318
+ for (const block of entry.content) {
319
+ if ((0, types_1.isTextContent)(block)) {
320
+ parts.push({ type: "text", text: block.text });
321
+ }
322
+ else if ((0, types_1.isImageUrlContent)(block)) {
323
+ parts.push({ type: "image_url", image_url: block.url });
324
+ }
325
+ }
326
+ messages.push({ role: "user", content: parts });
327
+ }
328
+ else if (textBlocks.length > 0) {
329
+ messages.push({
330
+ role: "user",
331
+ content: textBlocks.map((c) => c.text).join("\n"),
332
+ });
333
+ }
266
334
  }
267
335
  }
268
336
  return messages;
@@ -336,6 +404,25 @@ exports.geminiTransformer = {
336
404
  for (const block of textBlocks) {
337
405
  parts.push({ text: block.text });
338
406
  }
407
+ // Add image parts
408
+ for (const block of entry.content) {
409
+ if ((0, types_1.isImageUrlContent)(block)) {
410
+ parts.push({
411
+ fileData: {
412
+ mimeType: block.mimeType ?? "image/jpeg",
413
+ fileUri: block.url,
414
+ },
415
+ });
416
+ }
417
+ else if ((0, types_1.isImageBase64Content)(block)) {
418
+ parts.push({
419
+ inlineData: {
420
+ mimeType: block.mimeType,
421
+ data: block.data,
422
+ },
423
+ });
424
+ }
425
+ }
339
426
  // Add function call parts (for assistant/model messages)
340
427
  for (const block of toolUseBlocks) {
341
428
  parts.push({
@@ -29,10 +29,34 @@ export type ToolResultContent = {
29
29
  content: string;
30
30
  is_error?: boolean;
31
31
  };
32
+ /**
33
+ * Supported image MIME types across all providers
34
+ */
35
+ export type ImageMimeType = "image/jpeg" | "image/png" | "image/gif" | "image/webp";
36
+ /**
37
+ * Image referenced by URL
38
+ */
39
+ export type ImageUrlContent = {
40
+ type: "image_url";
41
+ url: string;
42
+ /** Required hint for Gemini (fileData); optional for other providers */
43
+ mimeType?: ImageMimeType;
44
+ /** OpenAI detail level hint — ignored by other providers */
45
+ detail?: "low" | "high" | "auto";
46
+ };
47
+ /**
48
+ * Image provided as raw base64-encoded data (no data: URI prefix)
49
+ */
50
+ export type ImageBase64Content = {
51
+ type: "image_base64";
52
+ /** Raw base64 string — do not include the `data:<mime>;base64,` prefix */
53
+ data: string;
54
+ mimeType: ImageMimeType;
55
+ };
32
56
  /**
33
57
  * Union of all content types
34
58
  */
35
- export type MessageContent = TextContent | ToolUseContent | ToolResultContent;
59
+ export type MessageContent = TextContent | ToolUseContent | ToolResultContent | ImageUrlContent | ImageBase64Content;
36
60
  /**
37
61
  * Anthropic-specific metadata
38
62
  */
@@ -111,6 +135,9 @@ export type HistoryEntry = {
111
135
  export declare function isTextContent(content: MessageContent): content is TextContent;
112
136
  export declare function isToolUseContent(content: MessageContent): content is ToolUseContent;
113
137
  export declare function isToolResultContent(content: MessageContent): content is ToolResultContent;
138
+ export declare function isImageUrlContent(content: MessageContent): content is ImageUrlContent;
139
+ export declare function isImageBase64Content(content: MessageContent): content is ImageBase64Content;
140
+ export declare function isImageContent(content: MessageContent): content is ImageUrlContent | ImageBase64Content;
114
141
  /**
115
142
  * Create a text content block
116
143
  */
@@ -127,6 +154,17 @@ export declare function toolResult(tool_use_id: string, content: string, is_erro
127
154
  * Create a simple text message entry
128
155
  */
129
156
  export declare function textMessage(role: MessageRole, value: string): HistoryEntry;
157
+ /**
158
+ * Create an image URL content block
159
+ */
160
+ export declare function imageUrl(url: string, options?: {
161
+ mimeType?: ImageMimeType;
162
+ detail?: "low" | "high" | "auto";
163
+ }): ImageUrlContent;
164
+ /**
165
+ * Create a base64 image content block
166
+ */
167
+ export declare function imageBase64(data: string, mimeType: ImageMimeType): ImageBase64Content;
130
168
  /**
131
169
  * Options controlling how history.reduce() compacts stored entries.
132
170
  * All fields are optional — supply whichever constraints apply.
@@ -9,10 +9,15 @@ Object.defineProperty(exports, "__esModule", { value: true });
9
9
  exports.isTextContent = isTextContent;
10
10
  exports.isToolUseContent = isToolUseContent;
11
11
  exports.isToolResultContent = isToolResultContent;
12
+ exports.isImageUrlContent = isImageUrlContent;
13
+ exports.isImageBase64Content = isImageBase64Content;
14
+ exports.isImageContent = isImageContent;
12
15
  exports.text = text;
13
16
  exports.toolUse = toolUse;
14
17
  exports.toolResult = toolResult;
15
18
  exports.textMessage = textMessage;
19
+ exports.imageUrl = imageUrl;
20
+ exports.imageBase64 = imageBase64;
16
21
  // =============================================================================
17
22
  // Helper Type Guards
18
23
  // =============================================================================
@@ -25,6 +30,15 @@ function isToolUseContent(content) {
25
30
  function isToolResultContent(content) {
26
31
  return content.type === "tool_result";
27
32
  }
33
+ function isImageUrlContent(content) {
34
+ return content.type === "image_url";
35
+ }
36
+ function isImageBase64Content(content) {
37
+ return content.type === "image_base64";
38
+ }
39
+ function isImageContent(content) {
40
+ return content.type === "image_url" || content.type === "image_base64";
41
+ }
28
42
  // =============================================================================
29
43
  // Utility Functions
30
44
  // =============================================================================
@@ -52,4 +66,16 @@ function toolResult(tool_use_id, content, is_error) {
52
66
  function textMessage(role, value) {
53
67
  return { role, content: [text(value)] };
54
68
  }
69
+ /**
70
+ * Create an image URL content block
71
+ */
72
+ function imageUrl(url, options) {
73
+ return { type: "image_url", url, ...options };
74
+ }
75
+ /**
76
+ * Create a base64 image content block
77
+ */
78
+ function imageBase64(data, mimeType) {
79
+ return { type: "image_base64", data, mimeType };
80
+ }
55
81
  //# sourceMappingURL=types.js.map
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@agentionai/agents",
3
3
  "author": "Laurent Zuijdwijk",
4
- "version": "0.9.0",
4
+ "version": "0.10.0",
5
5
  "description": "Agent Library",
6
6
  "main": "dist/index.js",
7
7
  "types": "dist/index.d.ts",