npm - @sisu-ai/adapter-ollama - Versions diffs - 4.0.2 → 4.1.0 - Mend

@sisu-ai/adapter-ollama 4.0.2 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -16,8 +16,6 @@ npm i @sisu-ai/adapter-ollama
 - Start Ollama locally: `ollama serve`
 - Pull a tools-capable model: `ollama pull llama3.1:latest`
-## Documentation
-Discover what you can do through examples or documentation. Check it out at https://github.com/finger-gun/sisu
 ## Usage
 ```ts
@@ -25,14 +23,83 @@ import { ollamaAdapter } from '@sisu-ai/adapter-ollama';
 const model = ollamaAdapter({ model: 'llama3.1' });
 // or with custom base URL: { baseUrl: 'http://localhost:11435' }
+```
+## Images (Vision)
+- Accepts multi-part `content` arrays with `type: 'text' | 'image_url'` and convenience fields like `images`/`image_url`.
+- The adapter maps these to Ollama's expected shape by sending `content` as a string and `images` as a string array on the message.
+- If an image value is an `http(s)` URL, the adapter fetches it and inlines it as base64 automatically. Data URLs are supported; raw base64 strings pass through.
-// Works with @sisu-ai/mw-tool-calling — tools are passed via GenerateOptions.tools
+Content parts (adapter maps to `images[]` under the hood and auto-fetches URLs):
+```ts
+const messages: any[] = [
+  { role: 'user', content: [
+    { type: 'text', text: 'What is in this image?' },
+    { type: 'image_url', image_url: { url: 'https://example.com/pic.jpg' } },
+  ] }
+];
+const res = await model.generate(messages, { toolChoice: 'none' });
 ```
+Convenience shape:
+```ts
+const messages: any[] = [
+  { role: 'user', content: 'Describe the image.', images: ['https://example.com/pic.jpg'] },
+];
+const res = await model.generate(messages, { toolChoice: 'none' });
+```
+### Normalizing Ollama API
+- Providers such as OpenAI vision models accepts `image_url` parts with `url` pointing to a remote image; the provider dereferences the URL.
+- Ollama expects each message to include `images: string[]` of base64-encoded image data; it does not dereference remote URLs.
+- This adapter keeps the authoring experience consistent by accepting OpenAI-style parts and convenience URLs, and performs URL→base64 conversion for you.
+### Accepted image formats
+- Base64 string: `images: ["<base64>"]` (preferred/default for Ollama)
+- Data URL: `images: ["data:image/png;base64,<base64>"]` or in parts via `{ type: 'image_url', image_url: { url: 'data:...' } }`
+- Remote URL (convenience): `{ type: 'image_url', image_url: { url: 'https://...' } }` or `images: ['https://...']` — adapter fetches and inlines as base64.
+Note: URL fetching happens from your runtime. If your environment blocks outbound HTTP, either provide base64 directly or host images where your runtime can reach them.
 ## Tools
-- Accepts `GenerateOptions.tools` and sends them to Ollama under `tools`.
-- Parses `message.tool_calls` into `{ id, name, arguments }` for the tool loop.
-- Sends assistant `tool_calls` and `tool` messages back to Ollama for follow-up.
+- Define tools as small, named functions with a zod schema.
+- Register them on your agent and add the tool-calling middleware — the adapter handles the wire format to/from Ollama.
+- Under the hood, the adapter sends your tool schemas to the model, maps model “function calls” back to your handlers, and includes tool results for follow‑up turns.
+Quick start with tools
+```ts
+import { Agent, InMemoryKV, NullStream, SimpleTools, createConsoleLogger, type Ctx, type Tool } from '@sisu-ai/core';
+import { registerTools } from '@sisu-ai/mw-register-tools';
+import { toolCalling } from '@sisu-ai/mw-tool-calling';
+import { z } from 'zod';
+import { ollamaAdapter } from '@sisu-ai/adapter-ollama';
+const sum: Tool<{ a: number; b: number }> = {
+  name: 'sum',
+  description: 'Add two numbers',
+  schema: z.object({ a: z.number(), b: z.number() }),
+  handler: async ({ a, b }) => ({ result: a + b }),
+};
+const model = ollamaAdapter({ model: 'llama3.1' });
+const ctx: Ctx = {
+  input: 'Use the sum tool to add 3 and 7, then explain.',
+  messages: [{ role: 'system', content: 'You are helpful.' }],
+  model,
+  tools: new SimpleTools(),
+  memory: new InMemoryKV(),
+  stream: new NullStream(),
+  state: {},
+  signal: new AbortController().signal,
+  log: createConsoleLogger(),
+};
+const app = new Agent()
+  .use(registerTools([sum])) // make tools available
+  .use(toolCalling);         // let the model pick tools, run them, and finalize
+await app.handler()(ctx);
+```
 ## Notes
 - Tool choice forcing is model-dependent; current loop asks for tools on first turn and plain completion on second.

package/dist/index.js CHANGED Viewed

@@ -5,30 +5,44 @@ export function ollamaAdapter(opts) {
     const modelName = `ollama:${opts.model}`;
     function generate(messages, genOpts) {
         // Map messages to Ollama format; include assistant tool_calls and tool messages
-        const mapped = messages.map((m) => {
-            const base = { role: m.role };
-            if (m.role === 'assistant' && Array.isArray(m.tool_calls)) {
-                base.tool_calls = m.tool_calls.map((tc) => ({ id: tc.id, type: 'function', function: { name: tc.name, arguments: (tc.arguments ?? {}) } }));
-                base.content = m.content ? String(m.content) : null;
-            }
-            else if (m.role === 'tool') {
-                base.content = String(m.content ?? '');
-                if (m.tool_call_id)
-                    base.tool_call_id = m.tool_call_id;
-                if (m.name && !m.tool_call_id)
-                    base.name = m.name;
-            }
-            else {
-                base.content = String(m.content ?? '');
+        async function mapMessagesWithImages() {
+            const out = [];
+            for (const m of messages) {
+                const base = { role: m.role };
+                const anyM = m;
+                if (m.role === 'assistant' && Array.isArray(anyM.tool_calls)) {
+                    base.tool_calls = anyM.tool_calls.map((tc) => ({ id: tc.id, type: 'function', function: { name: tc.name, arguments: (tc.arguments ?? {}) } }));
+                    const ti = buildTextAndImages(anyM);
+                    base.content = ti.content ?? (m.content !== undefined ? m.content : null);
+                    if (ti.images?.length)
+                        base.images = await toBase64Images(ti.images);
+                }
+                else if (m.role === 'tool') {
+                    base.content = String(m.content ?? '');
+                    if (m.tool_call_id)
+                        base.tool_call_id = m.tool_call_id;
+                    if (m.name && !m.tool_call_id)
+                        base.name = m.name;
+                }
+                else {
+                    const ti = buildTextAndImages(anyM);
+                    base.content = ti.content ?? (m.content ?? '');
+                    if (ti.images?.length)
+                        base.images = await toBase64Images(ti.images);
+                    if (m.name)
+                        base.name = m.name;
+                }
+                out.push(base);
             }
-            return base;
-        });
-        const toolsParam = (genOpts?.tools ?? []).map(toOllamaTool);
-        const baseBody = { model: opts.model, messages: mapped };
-        if (toolsParam.length)
-            baseBody.tools = toolsParam;
+            return out;
+        }
         if (genOpts?.stream === true) {
             return (async function* () {
+                const toolsParam = (genOpts?.tools ?? []).map(toOllamaTool);
+                const mapped = await mapMessagesWithImages();
+                const baseBody = { model: opts.model, messages: mapped };
+                if (toolsParam.length)
+                    baseBody.tools = toolsParam;
                 const res = await fetch(`${baseUrl}/api/chat`, {
                     method: 'POST',
                     headers: {
@@ -74,6 +88,11 @@ export function ollamaAdapter(opts) {
         }
         // Non-stream path
         return (async () => {
+            const toolsParam = (genOpts?.tools ?? []).map(toOllamaTool);
+            const mapped = await mapMessagesWithImages();
+            const baseBody = { model: opts.model, messages: mapped };
+            if (toolsParam.length)
+                baseBody.tools = toolsParam;
             const res = await fetch(`${baseUrl}/api/chat`, {
                 method: 'POST',
                 headers: {
@@ -97,11 +116,11 @@ export function ollamaAdapter(opts) {
             }
             const data = raw ? JSON.parse(raw) : {};
             const choice = data?.message ?? {};
-            const content = choice?.content ?? '';
+            const content = choice?.content;
             const tcs = Array.isArray(choice?.tool_calls)
                 ? choice.tool_calls.map((tc) => ({ id: tc.id, name: tc.function?.name, arguments: safeJson(tc.function?.arguments) }))
                 : undefined;
-            const out = { role: 'assistant', content: String(content ?? '') };
+            const out = { role: 'assistant', content: content ?? '' };
             if (tcs)
                 out.tool_calls = tcs;
             return { message: out };
@@ -161,3 +180,99 @@ function safeJson(s) {
         return s;
     }
 }
+// Accept OpenAI-style content parts or convenience fields and map to
+// Ollama's expected shape: { content: string, images?: string[] }
+function buildTextAndImages(m) {
+    if (!m || typeof m !== 'object')
+        return {};
+    const obj = m;
+    // If content is parts, normalize
+    if (Array.isArray(obj.content) || Array.isArray(obj.contentParts)) {
+        const parts = Array.isArray(obj.content) ? obj.content : obj.contentParts;
+        const texts = [];
+        const images = [];
+        for (const p of parts) {
+            if (typeof p === 'string') {
+                texts.push(p);
+                continue;
+            }
+            if (!p || typeof p !== 'object')
+                continue;
+            const po = p;
+            const t = po.type;
+            if (t === 'text' && typeof po.text === 'string') {
+                texts.push(po.text);
+                continue;
+            }
+            if (t === 'image_url') {
+                const iu = po.image_url;
+                if (typeof iu === 'string')
+                    images.push(iu);
+                else if (iu && typeof iu === 'object' && typeof iu.url === 'string')
+                    images.push(String(iu.url));
+                continue;
+            }
+            if (t === 'image' && typeof po.url === 'string') {
+                images.push(String(po.url));
+                continue;
+            }
+            if (typeof po.image_url === 'string') {
+                images.push(String(po.image_url));
+                continue;
+            }
+            if (typeof po.image === 'string') {
+                images.push(String(po.image));
+                continue;
+            }
+        }
+        return { content: texts.join('\n\n'), images: images.length ? images : undefined };
+    }
+    // Otherwise, use content string (if any) and collect convenience images
+    const images = [];
+    if (Array.isArray(obj.images))
+        images.push(...obj.images);
+    if (Array.isArray(obj.image_urls))
+        images.push(...obj.image_urls);
+    if (typeof obj.image_url === 'string')
+        images.push(obj.image_url);
+    if (typeof obj.image === 'string')
+        images.push(obj.image);
+    const content = typeof obj.content === 'string' ? obj.content : undefined;
+    return { content, images: images.length ? images : undefined };
+}
+async function toBase64Images(images) {
+    const out = [];
+    for (const src of images)
+        out.push(await toBase64(src));
+    return out;
+}
+function isHttpUrl(s) {
+    return /^https?:\/\//i.test(s);
+}
+function isDataUrl(s) {
+    return /^data:/i.test(s);
+}
+function fromDataUrl(s) {
+    const i = s.indexOf(',');
+    return i >= 0 ? s.slice(i + 1) : '';
+}
+function isProbablyBase64(s) {
+    if (!s || /[:\/]/.test(s))
+        return false; // exclude URLs
+    // Basic base64 check: valid chars and length % 4 == 0
+    if (s.length % 4 !== 0)
+        return false;
+    return /^[A-Za-z0-9+/]+={0,2}$/.test(s);
+}
+async function toBase64(src) {
+    if (isDataUrl(src))
+        return fromDataUrl(src);
+    if (isHttpUrl(src)) {
+        const res = await fetch(src);
+        if (!res.ok)
+            throw new Error(`Failed to fetch image: ${res.status} ${res.statusText}`);
+        const buf = Buffer.from(await res.arrayBuffer());
+        return buf.toString('base64');
+    }
+    return isProbablyBase64(src) ? src : src;
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@sisu-ai/adapter-ollama",
-  "version": "4.0.2",
+  "version": "4.1.0",
   "license": "Apache-2.0",
   "type": "module",
   "main": "dist/index.js",