@warlock.js/ai-ollama 4.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cjs/index.cjs +705 -0
- package/cjs/index.cjs.map +1 -0
- package/esm/config.type.d.mts +80 -0
- package/esm/config.type.d.mts.map +1 -0
- package/esm/embedder.mjs +101 -0
- package/esm/embedder.mjs.map +1 -0
- package/esm/index.d.mts +3 -0
- package/esm/index.mjs +3 -0
- package/esm/known-vision-models.mjs +44 -0
- package/esm/known-vision-models.mjs.map +1 -0
- package/esm/model.mjs +251 -0
- package/esm/model.mjs.map +1 -0
- package/esm/sdk.d.mts +62 -0
- package/esm/sdk.d.mts.map +1 -0
- package/esm/sdk.mjs +78 -0
- package/esm/sdk.mjs.map +1 -0
- package/esm/utils/index.mjs +6 -0
- package/esm/utils/map-done-reason.mjs +31 -0
- package/esm/utils/map-done-reason.mjs.map +1 -0
- package/esm/utils/to-ollama-messages.mjs +87 -0
- package/esm/utils/to-ollama-messages.mjs.map +1 -0
- package/esm/utils/to-ollama-tools.mjs +41 -0
- package/esm/utils/to-ollama-tools.mjs.map +1 -0
- package/esm/utils/wrap-ollama-error.mjs +104 -0
- package/esm/utils/wrap-ollama-error.mjs.map +1 -0
- package/llms-full.txt +122 -0
- package/llms.txt +9 -0
- package/package.json +38 -0
- package/skills/README.md +9 -0
- package/skills/setup-ollama/SKILL.md +112 -0
package/cjs/index.cjs
ADDED
|
@@ -0,0 +1,705 @@
|
|
|
1
|
+
Object.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });
|
|
2
|
+
let ollama = require("ollama");
|
|
3
|
+
let _warlock_js_ai = require("@warlock.js/ai");
|
|
4
|
+
let _warlock_js_logger = require("@warlock.js/logger");
|
|
5
|
+
|
|
6
|
+
//#region ../../@warlock.js/ai-ollama/src/utils/map-done-reason.ts
|
|
7
|
+
const doneReasonMap = {
|
|
8
|
+
stop: "stop",
|
|
9
|
+
length: "length"
|
|
10
|
+
};
|
|
11
|
+
/**
|
|
12
|
+
* Map Ollama's `done_reason` to the normalized `FinishReason` union.
|
|
13
|
+
*
|
|
14
|
+
* `stop` is the natural terminal; `length` means the `num_predict`
|
|
15
|
+
* cap was hit. Anything else — `load` (model load only, no
|
|
16
|
+
* generation), an empty string, or any future value — falls through
|
|
17
|
+
* to `"error"`.
|
|
18
|
+
*
|
|
19
|
+
* Note: Ollama has no tool-use done reason — it sets `done_reason:
|
|
20
|
+
* "stop"` and populates `message.tool_calls`. `OllamaModel` derives
|
|
21
|
+
* `"tool_calls"` from tool-call presence; this map stays purely about
|
|
22
|
+
* the raw signal.
|
|
23
|
+
*
|
|
24
|
+
* @example
|
|
25
|
+
* mapDoneReason("stop"); // "stop"
|
|
26
|
+
* mapDoneReason("length"); // "length"
|
|
27
|
+
* mapDoneReason("load"); // "error"
|
|
28
|
+
* mapDoneReason(undefined); // "error"
|
|
29
|
+
*/
|
|
30
|
+
function mapDoneReason(raw) {
|
|
31
|
+
return doneReasonMap[raw ?? ""] ?? "error";
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
//#endregion
|
|
35
|
+
//#region ../../@warlock.js/ai-ollama/src/utils/to-ollama-messages.ts
|
|
36
|
+
/**
|
|
37
|
+
* Convert vendor-neutral `Message[]` into the Ollama chat message
|
|
38
|
+
* shape.
|
|
39
|
+
*
|
|
40
|
+
* Unlike Anthropic / Gemini / Bedrock, Ollama keeps a first-class
|
|
41
|
+
* `system` role inside `messages`, so there is no system-prompt
|
|
42
|
+
* hoisting — roles pass straight through. The Ollama specifics this
|
|
43
|
+
* absorbs:
|
|
44
|
+
*
|
|
45
|
+
* 1. **Tool calls.** An assistant message with `toolCalls` becomes an
|
|
46
|
+
* `assistant` message whose `tool_calls` is the Ollama
|
|
47
|
+
* `{ function: { name, arguments } }` shape (Ollama has no tool-call
|
|
48
|
+
* id — see `OllamaModel`/decisions for the synthesized-id note).
|
|
49
|
+
* 2. **Tool results.** A neutral `tool` message becomes a `tool`
|
|
50
|
+
* message with `tool_name` set from `toolCallId` (Ollama matches a
|
|
51
|
+
* result to its call by tool name).
|
|
52
|
+
* 3. **Images.** Multipart user content collapses to a single
|
|
53
|
+
* `content` string plus an `images` array of base64 strings.
|
|
54
|
+
*
|
|
55
|
+
* @example
|
|
56
|
+
* const messages = toOllamaMessages([
|
|
57
|
+
* { role: "system", content: "Be concise." },
|
|
58
|
+
* { role: "user", content: "Hi" },
|
|
59
|
+
* ]);
|
|
60
|
+
*/
|
|
61
|
+
function toOllamaMessages(messages) {
|
|
62
|
+
return messages.map((message) => {
|
|
63
|
+
if (message.role === "tool") return {
|
|
64
|
+
role: "tool",
|
|
65
|
+
content: stringifyContent(message.content),
|
|
66
|
+
tool_name: message.toolCallId ?? ""
|
|
67
|
+
};
|
|
68
|
+
if (message.role === "assistant" && message.toolCalls && message.toolCalls.length > 0) return {
|
|
69
|
+
role: "assistant",
|
|
70
|
+
content: stringifyContent(message.content),
|
|
71
|
+
tool_calls: message.toolCalls.map((toolCall) => ({ function: {
|
|
72
|
+
name: toolCall.name,
|
|
73
|
+
arguments: toolCall.input ?? {}
|
|
74
|
+
} }))
|
|
75
|
+
};
|
|
76
|
+
if (message.role === "user" && Array.isArray(message.content)) return toMultipartMessage(message.content);
|
|
77
|
+
return {
|
|
78
|
+
role: message.role,
|
|
79
|
+
content: stringifyContent(message.content)
|
|
80
|
+
};
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Collapse a `ContentPart[]` user message into Ollama's
|
|
85
|
+
* single-string-content + base64-`images` shape. Ollama cannot fetch
|
|
86
|
+
* remote URLs, so a `{ url }` image surfaces a typed
|
|
87
|
+
* `InvalidRequestError` upfront (consistent with the Bedrock/Gemini
|
|
88
|
+
* adapters). The agent has already resolved attachments — nothing is
|
|
89
|
+
* fetched here.
|
|
90
|
+
*/
|
|
91
|
+
function toMultipartMessage(parts) {
|
|
92
|
+
const textChunks = [];
|
|
93
|
+
const images = [];
|
|
94
|
+
for (const part of parts) {
|
|
95
|
+
if (part.type === "text") {
|
|
96
|
+
textChunks.push(part.text);
|
|
97
|
+
continue;
|
|
98
|
+
}
|
|
99
|
+
if ("url" in part.source) throw new _warlock_js_ai.InvalidRequestError("Ollama does not fetch remote-URL images; supply base64 image bytes instead.");
|
|
100
|
+
images.push(part.source.base64);
|
|
101
|
+
}
|
|
102
|
+
return {
|
|
103
|
+
role: "user",
|
|
104
|
+
content: textChunks.join(""),
|
|
105
|
+
...images.length > 0 ? { images } : {}
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Multipart content on a non-user role collapses to concatenated text;
|
|
110
|
+
* plain strings pass through unchanged.
|
|
111
|
+
*/
|
|
112
|
+
function stringifyContent(content) {
|
|
113
|
+
if (typeof content === "string") return content;
|
|
114
|
+
return content.filter((part) => part.type === "text").map((part) => part.text).join("");
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
//#endregion
|
|
118
|
+
//#region ../../@warlock.js/ai-ollama/src/utils/to-ollama-tools.ts
|
|
119
|
+
/**
|
|
120
|
+
* Convert vendor-neutral `ToolConfig[]` into Ollama's `tools` array.
|
|
121
|
+
* Each tool becomes a `{ type: "function", function: { name,
|
|
122
|
+
* description, parameters } }` entry. Non-object extractions degrade
|
|
123
|
+
* to a parameterless object so registration never fails.
|
|
124
|
+
*
|
|
125
|
+
* Returns `undefined` when there are no tools so the caller can omit
|
|
126
|
+
* `tools` from the request.
|
|
127
|
+
*
|
|
128
|
+
* @example
|
|
129
|
+
* const tools = toOllamaTools([weatherTool]);
|
|
130
|
+
* await ollama.chat({ model, messages, tools });
|
|
131
|
+
*/
|
|
132
|
+
function toOllamaTools(tools) {
|
|
133
|
+
if (!tools || tools.length === 0) return;
|
|
134
|
+
return tools.map((tool) => ({
|
|
135
|
+
type: "function",
|
|
136
|
+
function: {
|
|
137
|
+
name: tool.name,
|
|
138
|
+
description: tool.description,
|
|
139
|
+
parameters: toParameters(tool.input)
|
|
140
|
+
}
|
|
141
|
+
}));
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* Resolve a tool's input schema to a JSON-Schema object. Ollama wants
|
|
145
|
+
* an object root for function parameters; anything else (or a failed
|
|
146
|
+
* extraction) degrades to a parameterless object.
|
|
147
|
+
*/
|
|
148
|
+
function toParameters(input) {
|
|
149
|
+
const schema = (0, _warlock_js_ai.extractJsonSchema)(input);
|
|
150
|
+
if (schema && schema.type === "object") return schema;
|
|
151
|
+
return { type: "object" };
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
//#endregion
|
|
155
|
+
//#region ../../@warlock.js/ai-ollama/src/utils/wrap-ollama-error.ts
|
|
156
|
+
/**
|
|
157
|
+
* Wrap any thrown value caught inside the Ollama adapter into the
|
|
158
|
+
* appropriate `@warlock.js/ai` `AIError` subclass.
|
|
159
|
+
*
|
|
160
|
+
* **Dispatch strategy.** HTTP faults carry `status_code`; the local
|
|
161
|
+
* daemon being down surfaces as a connection error (`ECONNREFUSED` /
|
|
162
|
+
* "fetch failed") — mapped to `ProviderError` since it's an
|
|
163
|
+
* operational "is Ollama running?" condition, not a request defect.
|
|
164
|
+
* `400` with context-length phrasing maps to
|
|
165
|
+
* `ContextLengthExceededError`.
|
|
166
|
+
*
|
|
167
|
+
* `AIError` instances pass through unchanged so `catch/throw wrap(e)`
|
|
168
|
+
* pipelines never double-wrap.
|
|
169
|
+
*
|
|
170
|
+
* @example
|
|
171
|
+
* try {
|
|
172
|
+
* return await this.client.chat({ ... });
|
|
173
|
+
* } catch (thrown) {
|
|
174
|
+
* throw wrapOllamaError(thrown);
|
|
175
|
+
* }
|
|
176
|
+
*/
|
|
177
|
+
function wrapOllamaError(thrown) {
|
|
178
|
+
if (thrown instanceof _warlock_js_ai.AIError) return thrown;
|
|
179
|
+
const shape = toShape(thrown);
|
|
180
|
+
const context = buildContext(shape);
|
|
181
|
+
const message = shape.message ?? (thrown instanceof Error ? thrown.message : String(thrown));
|
|
182
|
+
if (isTimeout(shape)) return new _warlock_js_ai.ProviderTimeoutError(message, {
|
|
183
|
+
cause: thrown,
|
|
184
|
+
context
|
|
185
|
+
});
|
|
186
|
+
if (isConnectionRefused(shape, message)) return new _warlock_js_ai.ProviderError(message, {
|
|
187
|
+
cause: thrown,
|
|
188
|
+
context
|
|
189
|
+
});
|
|
190
|
+
if (shape.statusCode === 401 || shape.statusCode === 403) return new _warlock_js_ai.ProviderAuthError(message, {
|
|
191
|
+
cause: thrown,
|
|
192
|
+
context
|
|
193
|
+
});
|
|
194
|
+
if (shape.statusCode === 429) return new _warlock_js_ai.ProviderRateLimitError(message, {
|
|
195
|
+
cause: thrown,
|
|
196
|
+
context
|
|
197
|
+
});
|
|
198
|
+
if (isClientStatus(shape.statusCode)) {
|
|
199
|
+
if (/context length|too long|exceeds|maximum context/i.test(message)) return new _warlock_js_ai.ContextLengthExceededError(message, {
|
|
200
|
+
cause: thrown,
|
|
201
|
+
context
|
|
202
|
+
});
|
|
203
|
+
return new _warlock_js_ai.InvalidRequestError(message, {
|
|
204
|
+
cause: thrown,
|
|
205
|
+
context
|
|
206
|
+
});
|
|
207
|
+
}
|
|
208
|
+
return new _warlock_js_ai.ProviderError(message, {
|
|
209
|
+
cause: thrown,
|
|
210
|
+
context
|
|
211
|
+
});
|
|
212
|
+
}
|
|
213
|
+
/**
|
|
214
|
+
* Read the raw error shape. `ResponseError` exposes `status_code`;
|
|
215
|
+
* fetch-layer errors carry a `cause` whose `code` is the OS-level
|
|
216
|
+
* socket error.
|
|
217
|
+
*/
|
|
218
|
+
function toShape(thrown) {
|
|
219
|
+
if (typeof thrown !== "object" || thrown === null) return {};
|
|
220
|
+
const raw = thrown;
|
|
221
|
+
const cause = raw.cause;
|
|
222
|
+
return {
|
|
223
|
+
name: typeof raw.name === "string" ? raw.name : void 0,
|
|
224
|
+
message: typeof raw.message === "string" ? raw.message : void 0,
|
|
225
|
+
statusCode: typeof raw.status_code === "number" ? raw.status_code : void 0,
|
|
226
|
+
code: typeof raw.code === "string" ? raw.code : cause && typeof cause.code === "string" ? cause.code : void 0
|
|
227
|
+
};
|
|
228
|
+
}
|
|
229
|
+
/** Transport-level timeout signals. */
|
|
230
|
+
function isTimeout(shape) {
|
|
231
|
+
if (shape.name === "AbortError" || shape.name === "TimeoutError") return true;
|
|
232
|
+
return shape.code === "ETIMEDOUT" || shape.code === "ECONNABORTED";
|
|
233
|
+
}
|
|
234
|
+
/**
|
|
235
|
+
* The Ollama daemon not being reachable (most common local failure):
|
|
236
|
+
* connection refused at the socket layer, or the `fetch failed`
|
|
237
|
+
* TypeError the client surfaces when the host is down.
|
|
238
|
+
*/
|
|
239
|
+
function isConnectionRefused(shape, message) {
|
|
240
|
+
return shape.code === "ECONNREFUSED" || /fetch failed|econnrefused/i.test(message);
|
|
241
|
+
}
|
|
242
|
+
/** True for HTTP 4xx — a client-side request problem, not a server fault. */
|
|
243
|
+
function isClientStatus(status) {
|
|
244
|
+
return typeof status === "number" && status >= 400 && status < 500;
|
|
245
|
+
}
|
|
246
|
+
/** Attach the diagnostic fields to `error.context`. */
|
|
247
|
+
function buildContext(shape) {
|
|
248
|
+
const context = {};
|
|
249
|
+
if (shape.statusCode !== void 0) context.status = shape.statusCode;
|
|
250
|
+
if (shape.code) context.code = shape.code;
|
|
251
|
+
return context;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
//#endregion
|
|
255
|
+
//#region ../../@warlock.js/ai-ollama/src/embedder.ts
|
|
256
|
+
const LOG_MODULE$1 = "ai.ollama";
|
|
257
|
+
/**
|
|
258
|
+
* Ollama-backed implementation of `EmbedderContract`
|
|
259
|
+
* (`nomic-embed-text`, `mxbai-embed-large`, …) via `client.embed`.
|
|
260
|
+
*
|
|
261
|
+
* **Role.** Converts text into floating-point vectors. Standalone
|
|
262
|
+
* primitive — unrelated to chat / tools / the agent loop.
|
|
263
|
+
*
|
|
264
|
+
* **Batch is native.** Ollama's `embed` accepts a string array and
|
|
265
|
+
* returns `embeddings` in input order, so `embedMany` is a single
|
|
266
|
+
* request (like the Gemini adapter, unlike Bedrock/Titan).
|
|
267
|
+
*
|
|
268
|
+
* **Usage.** Ollama returns only `prompt_eval_count` (no separate
|
|
269
|
+
* total); it is reported as both `promptTokens` and `totalTokens`.
|
|
270
|
+
*
|
|
271
|
+
* **Dimensions.** When no `dimensions` override is given,
|
|
272
|
+
* `this.dimensions` starts at `0` and is populated from the first
|
|
273
|
+
* response's vector length, then cached. Passing `dimensions`
|
|
274
|
+
* forwards Ollama's truncation field and sets the initial value.
|
|
275
|
+
*
|
|
276
|
+
* @example
|
|
277
|
+
* const embedder = new OllamaEmbedder(client, { name: "nomic-embed-text" });
|
|
278
|
+
* const { vector } = await embedder.embed("Hello world");
|
|
279
|
+
* const { vectors } = await embedder.embedMany(["doc 1", "doc 2"]);
|
|
280
|
+
*/
|
|
281
|
+
var OllamaEmbedder = class {
|
|
282
|
+
constructor(client, config, provider = "ollama") {
|
|
283
|
+
this.logger = _warlock_js_logger.log;
|
|
284
|
+
this.client = client;
|
|
285
|
+
this.name = config.name;
|
|
286
|
+
this.provider = provider;
|
|
287
|
+
this.configuredDimensions = config.dimensions;
|
|
288
|
+
this.dimensions = config.dimensions ?? 0;
|
|
289
|
+
}
|
|
290
|
+
async embed(input) {
|
|
291
|
+
const { embeddings, usage } = await this.request([input]);
|
|
292
|
+
return {
|
|
293
|
+
vector: embeddings[0] ?? [],
|
|
294
|
+
dimensions: this.dimensions,
|
|
295
|
+
usage
|
|
296
|
+
};
|
|
297
|
+
}
|
|
298
|
+
async embedMany(inputs) {
|
|
299
|
+
const { embeddings, usage } = await this.request(inputs);
|
|
300
|
+
return {
|
|
301
|
+
vectors: embeddings,
|
|
302
|
+
dimensions: this.dimensions,
|
|
303
|
+
usage
|
|
304
|
+
};
|
|
305
|
+
}
|
|
306
|
+
/**
|
|
307
|
+
* Shared transport: one `embed` call for the whole batch, wrap
|
|
308
|
+
* provider errors, cache `dimensions` from the first vector, and
|
|
309
|
+
* return vectors in input order plus a neutral usage object.
|
|
310
|
+
*/
|
|
311
|
+
async request(inputs) {
|
|
312
|
+
this.logger.debug(LOG_MODULE$1, "embedder.request", "embed", {
|
|
313
|
+
model: this.name,
|
|
314
|
+
count: inputs.length
|
|
315
|
+
});
|
|
316
|
+
let response;
|
|
317
|
+
try {
|
|
318
|
+
response = await this.client.embed({
|
|
319
|
+
model: this.name,
|
|
320
|
+
input: inputs,
|
|
321
|
+
...this.configuredDimensions !== void 0 ? { dimensions: this.configuredDimensions } : {}
|
|
322
|
+
});
|
|
323
|
+
} catch (thrown) {
|
|
324
|
+
const wrapped = wrapOllamaError(thrown);
|
|
325
|
+
this.logger.error(LOG_MODULE$1, "embedder.error", wrapped.message, {
|
|
326
|
+
code: wrapped.code,
|
|
327
|
+
context: wrapped.context
|
|
328
|
+
});
|
|
329
|
+
throw wrapped;
|
|
330
|
+
}
|
|
331
|
+
const embeddings = response.embeddings ?? [];
|
|
332
|
+
if (this.dimensions === 0 && embeddings[0]) this.dimensions = embeddings[0].length;
|
|
333
|
+
const tokens = response.prompt_eval_count ?? 0;
|
|
334
|
+
const usage = {
|
|
335
|
+
promptTokens: tokens,
|
|
336
|
+
totalTokens: tokens
|
|
337
|
+
};
|
|
338
|
+
this.logger.debug(LOG_MODULE$1, "embedder.response", "embed returned", {
|
|
339
|
+
count: embeddings.length,
|
|
340
|
+
dimensions: this.dimensions
|
|
341
|
+
});
|
|
342
|
+
return {
|
|
343
|
+
embeddings,
|
|
344
|
+
usage
|
|
345
|
+
};
|
|
346
|
+
}
|
|
347
|
+
};
|
|
348
|
+
|
|
349
|
+
//#endregion
|
|
350
|
+
//#region ../../@warlock.js/ai-ollama/src/known-vision-models.ts
|
|
351
|
+
/**
|
|
352
|
+
* Substrings identifying Ollama model tags whose family accepts image
|
|
353
|
+
* input (vision).
|
|
354
|
+
*
|
|
355
|
+
* Ollama tags are family-named with optional size/quant suffixes
|
|
356
|
+
* (`llama3.2-vision:11b`, `llava:13b-v1.6`, `qwen2.5-vl:7b`). A
|
|
357
|
+
* substring match tolerates those suffixes. Covers the common
|
|
358
|
+
* multimodal families on the Ollama registry; text-only models
|
|
359
|
+
* (`llama3.1`, `mistral`, `phi3`, `nomic-embed-text`) are excluded.
|
|
360
|
+
* Override per-model via `ollama.model({ name, vision: true | false })`.
|
|
361
|
+
*/
|
|
362
|
+
const VISION_CAPABLE_SUBSTRINGS = [
|
|
363
|
+
"llava",
|
|
364
|
+
"vision",
|
|
365
|
+
"bakllava",
|
|
366
|
+
"moondream",
|
|
367
|
+
"minicpm-v",
|
|
368
|
+
"qwen2-vl",
|
|
369
|
+
"qwen2.5-vl",
|
|
370
|
+
"llama4",
|
|
371
|
+
"gemma3"
|
|
372
|
+
];
|
|
373
|
+
/**
|
|
374
|
+
* Infer whether an Ollama model tag supports vision based on the known
|
|
375
|
+
* multimodal-family substrings. Unknown tags default to `false` so
|
|
376
|
+
* passing an image to a text-only local model surfaces a clear,
|
|
377
|
+
* agent-side capability error instead of the image being silently
|
|
378
|
+
* ignored by the model.
|
|
379
|
+
*
|
|
380
|
+
* @example
|
|
381
|
+
* inferVisionCapability("llama3.2-vision:11b"); // → true
|
|
382
|
+
* inferVisionCapability("llava:13b"); // → true
|
|
383
|
+
* inferVisionCapability("llama3.1"); // → false
|
|
384
|
+
* inferVisionCapability("nomic-embed-text"); // → false
|
|
385
|
+
*/
|
|
386
|
+
function inferVisionCapability(modelName) {
|
|
387
|
+
const normalized = modelName.toLowerCase();
|
|
388
|
+
return VISION_CAPABLE_SUBSTRINGS.some((fragment) => normalized.includes(fragment));
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
//#endregion
|
|
392
|
+
//#region ../../@warlock.js/ai-ollama/src/model.ts
|
|
393
|
+
const LOG_MODULE = "ai.ollama";
|
|
394
|
+
/**
|
|
395
|
+
* Ollama-backed implementation of `ModelContract`.
|
|
396
|
+
*
|
|
397
|
+
* **Role.** The provider-facing bridge between the vendor-neutral
|
|
398
|
+
* `@warlock.js/ai` agent runtime and a local (or self-hosted) Ollama
|
|
399
|
+
* server via the official `ollama` client.
|
|
400
|
+
*
|
|
401
|
+
* **Responsibility.**
|
|
402
|
+
* - Owns: a long-lived `Ollama` client + frozen `ModelConfig` (model
|
|
403
|
+
* tag, temperature, maxTokens) used as per-call defaults.
|
|
404
|
+
* - Owns: translating vendor-neutral `Message[]` / `ToolConfig[]` into
|
|
405
|
+
* Ollama's chat shapes (system stays a real role, `tool_calls` /
|
|
406
|
+
* `tool_name`, base64 `images`) and Ollama's response (content, tool
|
|
407
|
+
* calls, done reason, eval-count usage) back into neutral shapes.
|
|
408
|
+
* - Does NOT own: tool dispatch, looping, history, retries — agent
|
|
409
|
+
* concerns. The model is a per-call protocol adapter.
|
|
410
|
+
*
|
|
411
|
+
* **Tool-call ids.** Ollama has no tool-call id concept — a `tool_call`
|
|
412
|
+
* is `{ function: { name, arguments } }`. The adapter synthesizes the
|
|
413
|
+
* neutral `id` from the tool name so the agent's tool-result round-trip
|
|
414
|
+
* (which keys on `toolCallId`) maps back to Ollama's name-based
|
|
415
|
+
* matching. Parallel calls to the *same* tool in one turn therefore
|
|
416
|
+
* share an id — a documented v1 limitation inherent to Ollama's wire
|
|
417
|
+
* format, not this adapter.
|
|
418
|
+
*
|
|
419
|
+
* Modeled as a class (see §4.2 of code-style.md — "long-lived state
|
|
420
|
+
* across calls").
|
|
421
|
+
*
|
|
422
|
+
* @example
|
|
423
|
+
* import { Ollama } from "ollama";
|
|
424
|
+
* const client = new Ollama({ host: "http://127.0.0.1:11434" });
|
|
425
|
+
* const model = new OllamaModel(client, { name: "llama3.1" });
|
|
426
|
+
*
|
|
427
|
+
* const myAgent = agent({ model, tools: [searchTool] });
|
|
428
|
+
* const result = await myAgent.execute("Summarize today's news.");
|
|
429
|
+
*/
|
|
430
|
+
var OllamaModel = class {
|
|
431
|
+
constructor(client, config, provider = "ollama") {
|
|
432
|
+
this.logger = _warlock_js_logger.log;
|
|
433
|
+
this.client = client;
|
|
434
|
+
this.config = config;
|
|
435
|
+
this.name = config.name;
|
|
436
|
+
this.provider = provider;
|
|
437
|
+
this.pricing = config.pricing;
|
|
438
|
+
this.capabilities = {
|
|
439
|
+
structuredOutput: config.structuredOutput ?? true,
|
|
440
|
+
vision: config.vision ?? inferVisionCapability(config.name)
|
|
441
|
+
};
|
|
442
|
+
}
|
|
443
|
+
/**
|
|
444
|
+
* Single-shot completion. Sends the full message list to
|
|
445
|
+
* `client.chat`, waits for the terminal response, and reshapes it
|
|
446
|
+
* into a vendor-neutral `ModelResponse`. Per-call `options` override
|
|
447
|
+
* the instance defaults for this call only.
|
|
448
|
+
*/
|
|
449
|
+
async complete(messages, options) {
|
|
450
|
+
this.logger.debug(LOG_MODULE, "request", "Starting chat call", {
|
|
451
|
+
model: this.name,
|
|
452
|
+
messageCount: messages.length,
|
|
453
|
+
streaming: false,
|
|
454
|
+
toolCount: options?.tools?.length ?? 0
|
|
455
|
+
});
|
|
456
|
+
let response;
|
|
457
|
+
try {
|
|
458
|
+
response = await this.client.chat({
|
|
459
|
+
...this.buildRequest(messages, options),
|
|
460
|
+
stream: false
|
|
461
|
+
});
|
|
462
|
+
} catch (thrown) {
|
|
463
|
+
throw this.logAndWrap(thrown);
|
|
464
|
+
}
|
|
465
|
+
const toolCalls = this.extractToolCalls(response.message);
|
|
466
|
+
const finishReason = toolCalls ? "tool_calls" : mapDoneReason(response.done_reason);
|
|
467
|
+
const usage = this.extractUsage(response);
|
|
468
|
+
this.logger.debug(LOG_MODULE, "response", "chat call succeeded", {
|
|
469
|
+
finishReason,
|
|
470
|
+
usage
|
|
471
|
+
});
|
|
472
|
+
return {
|
|
473
|
+
content: response.message?.content ?? "",
|
|
474
|
+
finishReason,
|
|
475
|
+
usage,
|
|
476
|
+
toolCalls
|
|
477
|
+
};
|
|
478
|
+
}
|
|
479
|
+
/**
|
|
480
|
+
* Incremental streaming completion. Yields neutral
|
|
481
|
+
* `ModelStreamChunk`s — `delta` for content, `tool-call` per
|
|
482
|
+
* function call (Ollama streams a fully-formed call, not partial
|
|
483
|
+
* JSON), and a terminal `done` with the final finish reason + usage.
|
|
484
|
+
* Honors `options.signal` by aborting the underlying stream.
|
|
485
|
+
*/
|
|
486
|
+
async *stream(messages, options) {
|
|
487
|
+
this.logger.debug(LOG_MODULE, "request", "Starting streaming chat call", {
|
|
488
|
+
model: this.name,
|
|
489
|
+
messageCount: messages.length,
|
|
490
|
+
streaming: true,
|
|
491
|
+
toolCount: options?.tools?.length ?? 0
|
|
492
|
+
});
|
|
493
|
+
let stream;
|
|
494
|
+
try {
|
|
495
|
+
stream = await this.client.chat({
|
|
496
|
+
...this.buildRequest(messages, options),
|
|
497
|
+
stream: true
|
|
498
|
+
});
|
|
499
|
+
} catch (thrown) {
|
|
500
|
+
throw this.logAndWrap(thrown);
|
|
501
|
+
}
|
|
502
|
+
if (options?.signal) if (options.signal.aborted) stream.abort();
|
|
503
|
+
else options.signal.addEventListener("abort", () => stream.abort(), { once: true });
|
|
504
|
+
let rawDoneReason;
|
|
505
|
+
let sawToolCall = false;
|
|
506
|
+
const usage = {
|
|
507
|
+
input: 0,
|
|
508
|
+
output: 0,
|
|
509
|
+
total: 0
|
|
510
|
+
};
|
|
511
|
+
try {
|
|
512
|
+
for await (const chunk of stream) {
|
|
513
|
+
const content = chunk.message?.content;
|
|
514
|
+
if (content) yield {
|
|
515
|
+
type: "delta",
|
|
516
|
+
content
|
|
517
|
+
};
|
|
518
|
+
for (const call of chunk.message?.tool_calls ?? []) {
|
|
519
|
+
sawToolCall = true;
|
|
520
|
+
yield {
|
|
521
|
+
type: "tool-call",
|
|
522
|
+
id: call.function.name,
|
|
523
|
+
name: call.function.name,
|
|
524
|
+
input: call.function.arguments ?? {}
|
|
525
|
+
};
|
|
526
|
+
}
|
|
527
|
+
if (chunk.done_reason) rawDoneReason = chunk.done_reason;
|
|
528
|
+
if (chunk.done) {
|
|
529
|
+
usage.input = chunk.prompt_eval_count ?? usage.input;
|
|
530
|
+
usage.output = chunk.eval_count ?? usage.output;
|
|
531
|
+
usage.total = usage.input + usage.output;
|
|
532
|
+
}
|
|
533
|
+
}
|
|
534
|
+
} catch (thrown) {
|
|
535
|
+
throw this.logAndWrap(thrown);
|
|
536
|
+
}
|
|
537
|
+
const finishReason = sawToolCall ? "tool_calls" : mapDoneReason(rawDoneReason);
|
|
538
|
+
this.logger.debug(LOG_MODULE, "response", "streaming chat call succeeded", {
|
|
539
|
+
finishReason,
|
|
540
|
+
usage
|
|
541
|
+
});
|
|
542
|
+
yield {
|
|
543
|
+
type: "done",
|
|
544
|
+
finishReason,
|
|
545
|
+
usage
|
|
546
|
+
};
|
|
547
|
+
}
|
|
548
|
+
/**
|
|
549
|
+
* Assemble the Ollama chat request shared by `complete()` and
|
|
550
|
+
* `stream()` (each adds its own `stream` literal so the client's
|
|
551
|
+
* overload resolves). Maps inference params into Ollama `options`
|
|
552
|
+
* and conditionally attaches tools + native structured output.
|
|
553
|
+
*/
|
|
554
|
+
buildRequest(messages, options) {
|
|
555
|
+
const temperature = options?.temperature ?? this.config.temperature;
|
|
556
|
+
const maxTokens = options?.maxTokens ?? this.config.maxTokens;
|
|
557
|
+
const ollamaOptions = {
|
|
558
|
+
...temperature !== void 0 ? { temperature } : {},
|
|
559
|
+
...maxTokens !== void 0 ? { num_predict: maxTokens } : {}
|
|
560
|
+
};
|
|
561
|
+
return {
|
|
562
|
+
model: this.name,
|
|
563
|
+
messages: toOllamaMessages(messages),
|
|
564
|
+
...Object.keys(ollamaOptions).length > 0 ? { options: ollamaOptions } : {},
|
|
565
|
+
...this.buildTools(options?.tools),
|
|
566
|
+
...this.buildFormat(options?.responseSchema)
|
|
567
|
+
};
|
|
568
|
+
}
|
|
569
|
+
/**
|
|
570
|
+
* Spread-friendly tools fragment. Empty object when no tools were
|
|
571
|
+
* supplied so the caller can unconditionally spread it.
|
|
572
|
+
*/
|
|
573
|
+
buildTools(tools) {
|
|
574
|
+
const mapped = toOllamaTools(tools);
|
|
575
|
+
return mapped ? { tools: mapped } : {};
|
|
576
|
+
}
|
|
577
|
+
/**
|
|
578
|
+
* Translate the neutral `responseSchema` into Ollama's native
|
|
579
|
+
* structured output (`format` accepts a JSON Schema object).
|
|
580
|
+
* Emitted only when the model is `structuredOutput`-capable and the
|
|
581
|
+
* schema is an object root — otherwise the agent's soft prompt hint
|
|
582
|
+
* + client-side `validate()` carry shape.
|
|
583
|
+
*/
|
|
584
|
+
buildFormat(responseSchema) {
|
|
585
|
+
if (!responseSchema || !this.capabilities.structuredOutput) return {};
|
|
586
|
+
if (responseSchema.type !== "object" || typeof responseSchema.properties !== "object") return {};
|
|
587
|
+
return { format: responseSchema };
|
|
588
|
+
}
|
|
589
|
+
/**
|
|
590
|
+
* Reshape Ollama's `message.tool_calls` into the neutral
|
|
591
|
+
* `ModelToolCallRequest[]`. Ollama has no tool-call id, so the
|
|
592
|
+
* neutral `id` is synthesized from the tool name (see the class
|
|
593
|
+
* doc). Returns `undefined` when no tools were requested.
|
|
594
|
+
*/
|
|
595
|
+
extractToolCalls(message) {
|
|
596
|
+
const calls = message?.tool_calls;
|
|
597
|
+
if (!calls || calls.length === 0) return;
|
|
598
|
+
return calls.map((call) => ({
|
|
599
|
+
id: call.function.name,
|
|
600
|
+
name: call.function.name,
|
|
601
|
+
input: call.function.arguments ?? {}
|
|
602
|
+
}));
|
|
603
|
+
}
|
|
604
|
+
/**
|
|
605
|
+
* Normalize Ollama's eval counts into the neutral `Usage` shape.
|
|
606
|
+
* Ollama runs locally with no prompt cache, so there is no
|
|
607
|
+
* `cachedTokens`; `total` is computed from input + output.
|
|
608
|
+
*/
|
|
609
|
+
extractUsage(response) {
|
|
610
|
+
const input = response.prompt_eval_count ?? 0;
|
|
611
|
+
const output = response.eval_count ?? 0;
|
|
612
|
+
return {
|
|
613
|
+
input,
|
|
614
|
+
output,
|
|
615
|
+
total: input + output
|
|
616
|
+
};
|
|
617
|
+
}
|
|
618
|
+
/**
|
|
619
|
+
* Wrap a thrown provider error into the typed `AIError` hierarchy
|
|
620
|
+
* and emit the standard error log line before it propagates.
|
|
621
|
+
*/
|
|
622
|
+
logAndWrap(thrown) {
|
|
623
|
+
const wrapped = wrapOllamaError(thrown);
|
|
624
|
+
this.logger.error(LOG_MODULE, "error", wrapped.message, {
|
|
625
|
+
code: wrapped.code,
|
|
626
|
+
context: wrapped.context
|
|
627
|
+
});
|
|
628
|
+
return wrapped;
|
|
629
|
+
}
|
|
630
|
+
};
|
|
631
|
+
|
|
632
|
+
//#endregion
|
|
633
|
+
//#region ../../@warlock.js/ai-ollama/src/sdk.ts
|
|
634
|
+
/**
|
|
635
|
+
* Ollama-backed implementation of `SDKAdapterContract`.
|
|
636
|
+
*
|
|
637
|
+
* **Role.** The package entry point for local / self-hosted models
|
|
638
|
+
* served by an Ollama daemon via the official `ollama` client. One
|
|
639
|
+
* `OllamaSDK` holds one live `Ollama` client, shared by every
|
|
640
|
+
* `ModelContract` / `EmbedderContract` it produces.
|
|
641
|
+
*
|
|
642
|
+
* **Responsibility.**
|
|
643
|
+
* - Owns: a long-lived `Ollama` client (host, headers) and its
|
|
644
|
+
* lifetime. Factory for `OllamaModel` / `OllamaEmbedder` instances
|
|
645
|
+
* sharing that client.
|
|
646
|
+
* - Does NOT own: anything per-call — those live in `OllamaModel` /
|
|
647
|
+
* `OllamaEmbedder` and the agent runtime.
|
|
648
|
+
*
|
|
649
|
+
* Modeled as a class (see §4.2 of code-style.md — "long-lived state
|
|
650
|
+
* across many calls"), fronted by FP usage like the other adapters.
|
|
651
|
+
*
|
|
652
|
+
* @example
|
|
653
|
+
* const ollama = new OllamaSDK({}); // local default host
|
|
654
|
+
* const model = ollama.model({ name: "llama3.1", temperature: 0.7 });
|
|
655
|
+
* const embedder = ollama.embedder({ name: "nomic-embed-text" });
|
|
656
|
+
*/
|
|
657
|
+
var OllamaSDK = class {
|
|
658
|
+
constructor(config = {}) {
|
|
659
|
+
const { provider, pricing, ...clientConfig } = config;
|
|
660
|
+
this.client = new ollama.Ollama(clientConfig);
|
|
661
|
+
this.provider = provider ?? "ollama";
|
|
662
|
+
this.pricing = pricing;
|
|
663
|
+
}
|
|
664
|
+
/**
|
|
665
|
+
* Build an `OllamaModel` bound to this SDK's client. Each call
|
|
666
|
+
* returns a fresh instance; all instances share the underlying
|
|
667
|
+
* `Ollama` client. The SDK's `provider` label is forwarded.
|
|
668
|
+
*
|
|
669
|
+
* Pricing resolution: per-model `config.pricing` wins; otherwise the
|
|
670
|
+
* SDK-level registry entry keyed by `config.name`; otherwise
|
|
671
|
+
* `undefined` (local Ollama is free, so usually undefined).
|
|
672
|
+
*/
|
|
673
|
+
model(config) {
|
|
674
|
+
const resolvedPricing = config.pricing ?? this.pricing?.[config.name];
|
|
675
|
+
const resolvedConfig = resolvedPricing === config.pricing ? config : {
|
|
676
|
+
...config,
|
|
677
|
+
pricing: resolvedPricing
|
|
678
|
+
};
|
|
679
|
+
return new OllamaModel(this.client, resolvedConfig, this.provider);
|
|
680
|
+
}
|
|
681
|
+
/**
|
|
682
|
+
* Rough token-count estimate. Uses the character-heuristic
|
|
683
|
+
* (`approximateTokenCount`) from the core package — good enough for
|
|
684
|
+
* budgeting / context guards, not billing (and Ollama is free
|
|
685
|
+
* anyway). The optional model id is reserved for future per-model
|
|
686
|
+
* tokenizer dispatch; currently ignored.
|
|
687
|
+
*/
|
|
688
|
+
async count(text, _model) {
|
|
689
|
+
return (0, _warlock_js_ai.approximateTokenCount)(text);
|
|
690
|
+
}
|
|
691
|
+
/**
|
|
692
|
+
* Build an `OllamaEmbedder` bound to this SDK's client.
|
|
693
|
+
*
|
|
694
|
+
* @example
|
|
695
|
+
* const embedder = ollama.embedder({ name: "nomic-embed-text" });
|
|
696
|
+
* const { vector } = await embedder.embed("Hello world");
|
|
697
|
+
*/
|
|
698
|
+
embedder(config) {
|
|
699
|
+
return new OllamaEmbedder(this.client, config, this.provider);
|
|
700
|
+
}
|
|
701
|
+
};
|
|
702
|
+
|
|
703
|
+
//#endregion
|
|
704
|
+
exports.OllamaSDK = OllamaSDK;
|
|
705
|
+
//# sourceMappingURL=index.cjs.map
|