@warlock.js/ai-ollama 4.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cjs/index.cjs ADDED
@@ -0,0 +1,705 @@
1
+ Object.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });
2
+ let ollama = require("ollama");
3
+ let _warlock_js_ai = require("@warlock.js/ai");
4
+ let _warlock_js_logger = require("@warlock.js/logger");
5
+
6
+ //#region ../../@warlock.js/ai-ollama/src/utils/map-done-reason.ts
7
+ const doneReasonMap = {
8
+ stop: "stop",
9
+ length: "length"
10
+ };
11
+ /**
12
+ * Map Ollama's `done_reason` to the normalized `FinishReason` union.
13
+ *
14
+ * `stop` is the natural terminal; `length` means the `num_predict`
15
+ * cap was hit. Anything else — `load` (model load only, no
16
+ * generation), an empty string, or any future value — falls through
17
+ * to `"error"`.
18
+ *
19
+ * Note: Ollama has no tool-use done reason — it sets `done_reason:
20
+ * "stop"` and populates `message.tool_calls`. `OllamaModel` derives
21
+ * `"tool_calls"` from tool-call presence; this map stays purely about
22
+ * the raw signal.
23
+ *
24
+ * @example
25
+ * mapDoneReason("stop"); // "stop"
26
+ * mapDoneReason("length"); // "length"
27
+ * mapDoneReason("load"); // "error"
28
+ * mapDoneReason(undefined); // "error"
29
+ */
30
+ function mapDoneReason(raw) {
31
+ return doneReasonMap[raw ?? ""] ?? "error";
32
+ }
33
+
34
+ //#endregion
35
+ //#region ../../@warlock.js/ai-ollama/src/utils/to-ollama-messages.ts
36
+ /**
37
+ * Convert vendor-neutral `Message[]` into the Ollama chat message
38
+ * shape.
39
+ *
40
+ * Unlike Anthropic / Gemini / Bedrock, Ollama keeps a first-class
41
+ * `system` role inside `messages`, so there is no system-prompt
42
+ * hoisting — roles pass straight through. The Ollama specifics this
43
+ * absorbs:
44
+ *
45
+ * 1. **Tool calls.** An assistant message with `toolCalls` becomes an
46
+ * `assistant` message whose `tool_calls` is the Ollama
47
+ * `{ function: { name, arguments } }` shape (Ollama has no tool-call
48
+ * id — see `OllamaModel`/decisions for the synthesized-id note).
49
+ * 2. **Tool results.** A neutral `tool` message becomes a `tool`
50
+ * message with `tool_name` set from `toolCallId` (Ollama matches a
51
+ * result to its call by tool name).
52
+ * 3. **Images.** Multipart user content collapses to a single
53
+ * `content` string plus an `images` array of base64 strings.
54
+ *
55
+ * @example
56
+ * const messages = toOllamaMessages([
57
+ * { role: "system", content: "Be concise." },
58
+ * { role: "user", content: "Hi" },
59
+ * ]);
60
+ */
61
+ function toOllamaMessages(messages) {
62
+ return messages.map((message) => {
63
+ if (message.role === "tool") return {
64
+ role: "tool",
65
+ content: stringifyContent(message.content),
66
+ tool_name: message.toolCallId ?? ""
67
+ };
68
+ if (message.role === "assistant" && message.toolCalls && message.toolCalls.length > 0) return {
69
+ role: "assistant",
70
+ content: stringifyContent(message.content),
71
+ tool_calls: message.toolCalls.map((toolCall) => ({ function: {
72
+ name: toolCall.name,
73
+ arguments: toolCall.input ?? {}
74
+ } }))
75
+ };
76
+ if (message.role === "user" && Array.isArray(message.content)) return toMultipartMessage(message.content);
77
+ return {
78
+ role: message.role,
79
+ content: stringifyContent(message.content)
80
+ };
81
+ });
82
+ }
83
+ /**
84
+ * Collapse a `ContentPart[]` user message into Ollama's
85
+ * single-string-content + base64-`images` shape. Ollama cannot fetch
86
+ * remote URLs, so a `{ url }` image surfaces a typed
87
+ * `InvalidRequestError` upfront (consistent with the Bedrock/Gemini
88
+ * adapters). The agent has already resolved attachments — nothing is
89
+ * fetched here.
90
+ */
91
+ function toMultipartMessage(parts) {
92
+ const textChunks = [];
93
+ const images = [];
94
+ for (const part of parts) {
95
+ if (part.type === "text") {
96
+ textChunks.push(part.text);
97
+ continue;
98
+ }
99
+ if ("url" in part.source) throw new _warlock_js_ai.InvalidRequestError("Ollama does not fetch remote-URL images; supply base64 image bytes instead.");
100
+ images.push(part.source.base64);
101
+ }
102
+ return {
103
+ role: "user",
104
+ content: textChunks.join(""),
105
+ ...images.length > 0 ? { images } : {}
106
+ };
107
+ }
108
+ /**
109
+ * Multipart content on a non-user role collapses to concatenated text;
110
+ * plain strings pass through unchanged.
111
+ */
112
+ function stringifyContent(content) {
113
+ if (typeof content === "string") return content;
114
+ return content.filter((part) => part.type === "text").map((part) => part.text).join("");
115
+ }
116
+
117
+ //#endregion
118
+ //#region ../../@warlock.js/ai-ollama/src/utils/to-ollama-tools.ts
119
+ /**
120
+ * Convert vendor-neutral `ToolConfig[]` into Ollama's `tools` array.
121
+ * Each tool becomes a `{ type: "function", function: { name,
122
+ * description, parameters } }` entry. Non-object extractions degrade
123
+ * to a parameterless object so registration never fails.
124
+ *
125
+ * Returns `undefined` when there are no tools so the caller can omit
126
+ * `tools` from the request.
127
+ *
128
+ * @example
129
+ * const tools = toOllamaTools([weatherTool]);
130
+ * await ollama.chat({ model, messages, tools });
131
+ */
132
+ function toOllamaTools(tools) {
133
+ if (!tools || tools.length === 0) return;
134
+ return tools.map((tool) => ({
135
+ type: "function",
136
+ function: {
137
+ name: tool.name,
138
+ description: tool.description,
139
+ parameters: toParameters(tool.input)
140
+ }
141
+ }));
142
+ }
143
+ /**
144
+ * Resolve a tool's input schema to a JSON-Schema object. Ollama wants
145
+ * an object root for function parameters; anything else (or a failed
146
+ * extraction) degrades to a parameterless object.
147
+ */
148
+ function toParameters(input) {
149
+ const schema = (0, _warlock_js_ai.extractJsonSchema)(input);
150
+ if (schema && schema.type === "object") return schema;
151
+ return { type: "object" };
152
+ }
153
+
154
+ //#endregion
155
+ //#region ../../@warlock.js/ai-ollama/src/utils/wrap-ollama-error.ts
156
+ /**
157
+ * Wrap any thrown value caught inside the Ollama adapter into the
158
+ * appropriate `@warlock.js/ai` `AIError` subclass.
159
+ *
160
+ * **Dispatch strategy.** HTTP faults carry `status_code`; the local
161
+ * daemon being down surfaces as a connection error (`ECONNREFUSED` /
162
+ * "fetch failed") — mapped to `ProviderError` since it's an
163
+ * operational "is Ollama running?" condition, not a request defect.
164
+ * `400` with context-length phrasing maps to
165
+ * `ContextLengthExceededError`.
166
+ *
167
+ * `AIError` instances pass through unchanged so `catch/throw wrap(e)`
168
+ * pipelines never double-wrap.
169
+ *
170
+ * @example
171
+ * try {
172
+ * return await this.client.chat({ ... });
173
+ * } catch (thrown) {
174
+ * throw wrapOllamaError(thrown);
175
+ * }
176
+ */
177
+ function wrapOllamaError(thrown) {
178
+ if (thrown instanceof _warlock_js_ai.AIError) return thrown;
179
+ const shape = toShape(thrown);
180
+ const context = buildContext(shape);
181
+ const message = shape.message ?? (thrown instanceof Error ? thrown.message : String(thrown));
182
+ if (isTimeout(shape)) return new _warlock_js_ai.ProviderTimeoutError(message, {
183
+ cause: thrown,
184
+ context
185
+ });
186
+ if (isConnectionRefused(shape, message)) return new _warlock_js_ai.ProviderError(message, {
187
+ cause: thrown,
188
+ context
189
+ });
190
+ if (shape.statusCode === 401 || shape.statusCode === 403) return new _warlock_js_ai.ProviderAuthError(message, {
191
+ cause: thrown,
192
+ context
193
+ });
194
+ if (shape.statusCode === 429) return new _warlock_js_ai.ProviderRateLimitError(message, {
195
+ cause: thrown,
196
+ context
197
+ });
198
+ if (isClientStatus(shape.statusCode)) {
199
+ if (/context length|too long|exceeds|maximum context/i.test(message)) return new _warlock_js_ai.ContextLengthExceededError(message, {
200
+ cause: thrown,
201
+ context
202
+ });
203
+ return new _warlock_js_ai.InvalidRequestError(message, {
204
+ cause: thrown,
205
+ context
206
+ });
207
+ }
208
+ return new _warlock_js_ai.ProviderError(message, {
209
+ cause: thrown,
210
+ context
211
+ });
212
+ }
213
+ /**
214
+ * Read the raw error shape. `ResponseError` exposes `status_code`;
215
+ * fetch-layer errors carry a `cause` whose `code` is the OS-level
216
+ * socket error.
217
+ */
218
+ function toShape(thrown) {
219
+ if (typeof thrown !== "object" || thrown === null) return {};
220
+ const raw = thrown;
221
+ const cause = raw.cause;
222
+ return {
223
+ name: typeof raw.name === "string" ? raw.name : void 0,
224
+ message: typeof raw.message === "string" ? raw.message : void 0,
225
+ statusCode: typeof raw.status_code === "number" ? raw.status_code : void 0,
226
+ code: typeof raw.code === "string" ? raw.code : cause && typeof cause.code === "string" ? cause.code : void 0
227
+ };
228
+ }
229
+ /** Transport-level timeout signals. */
230
+ function isTimeout(shape) {
231
+ if (shape.name === "AbortError" || shape.name === "TimeoutError") return true;
232
+ return shape.code === "ETIMEDOUT" || shape.code === "ECONNABORTED";
233
+ }
234
+ /**
235
+ * The Ollama daemon not being reachable (most common local failure):
236
+ * connection refused at the socket layer, or the `fetch failed`
237
+ * TypeError the client surfaces when the host is down.
238
+ */
239
+ function isConnectionRefused(shape, message) {
240
+ return shape.code === "ECONNREFUSED" || /fetch failed|econnrefused/i.test(message);
241
+ }
242
+ /** True for HTTP 4xx — a client-side request problem, not a server fault. */
243
+ function isClientStatus(status) {
244
+ return typeof status === "number" && status >= 400 && status < 500;
245
+ }
246
+ /** Attach the diagnostic fields to `error.context`. */
247
+ function buildContext(shape) {
248
+ const context = {};
249
+ if (shape.statusCode !== void 0) context.status = shape.statusCode;
250
+ if (shape.code) context.code = shape.code;
251
+ return context;
252
+ }
253
+
254
+ //#endregion
255
+ //#region ../../@warlock.js/ai-ollama/src/embedder.ts
256
+ const LOG_MODULE$1 = "ai.ollama";
257
+ /**
258
+ * Ollama-backed implementation of `EmbedderContract`
259
+ * (`nomic-embed-text`, `mxbai-embed-large`, …) via `client.embed`.
260
+ *
261
+ * **Role.** Converts text into floating-point vectors. Standalone
262
+ * primitive — unrelated to chat / tools / the agent loop.
263
+ *
264
+ * **Batch is native.** Ollama's `embed` accepts a string array and
265
+ * returns `embeddings` in input order, so `embedMany` is a single
266
+ * request (like the Gemini adapter, unlike Bedrock/Titan).
267
+ *
268
+ * **Usage.** Ollama returns only `prompt_eval_count` (no separate
269
+ * total); it is reported as both `promptTokens` and `totalTokens`.
270
+ *
271
+ * **Dimensions.** When no `dimensions` override is given,
272
+ * `this.dimensions` starts at `0` and is populated from the first
273
+ * response's vector length, then cached. Passing `dimensions`
274
+ * forwards Ollama's truncation field and sets the initial value.
275
+ *
276
+ * @example
277
+ * const embedder = new OllamaEmbedder(client, { name: "nomic-embed-text" });
278
+ * const { vector } = await embedder.embed("Hello world");
279
+ * const { vectors } = await embedder.embedMany(["doc 1", "doc 2"]);
280
+ */
281
+ var OllamaEmbedder = class {
282
+ constructor(client, config, provider = "ollama") {
283
+ this.logger = _warlock_js_logger.log;
284
+ this.client = client;
285
+ this.name = config.name;
286
+ this.provider = provider;
287
+ this.configuredDimensions = config.dimensions;
288
+ this.dimensions = config.dimensions ?? 0;
289
+ }
290
+ async embed(input) {
291
+ const { embeddings, usage } = await this.request([input]);
292
+ return {
293
+ vector: embeddings[0] ?? [],
294
+ dimensions: this.dimensions,
295
+ usage
296
+ };
297
+ }
298
+ async embedMany(inputs) {
299
+ const { embeddings, usage } = await this.request(inputs);
300
+ return {
301
+ vectors: embeddings,
302
+ dimensions: this.dimensions,
303
+ usage
304
+ };
305
+ }
306
+ /**
307
+ * Shared transport: one `embed` call for the whole batch, wrap
308
+ * provider errors, cache `dimensions` from the first vector, and
309
+ * return vectors in input order plus a neutral usage object.
310
+ */
311
+ async request(inputs) {
312
+ this.logger.debug(LOG_MODULE$1, "embedder.request", "embed", {
313
+ model: this.name,
314
+ count: inputs.length
315
+ });
316
+ let response;
317
+ try {
318
+ response = await this.client.embed({
319
+ model: this.name,
320
+ input: inputs,
321
+ ...this.configuredDimensions !== void 0 ? { dimensions: this.configuredDimensions } : {}
322
+ });
323
+ } catch (thrown) {
324
+ const wrapped = wrapOllamaError(thrown);
325
+ this.logger.error(LOG_MODULE$1, "embedder.error", wrapped.message, {
326
+ code: wrapped.code,
327
+ context: wrapped.context
328
+ });
329
+ throw wrapped;
330
+ }
331
+ const embeddings = response.embeddings ?? [];
332
+ if (this.dimensions === 0 && embeddings[0]) this.dimensions = embeddings[0].length;
333
+ const tokens = response.prompt_eval_count ?? 0;
334
+ const usage = {
335
+ promptTokens: tokens,
336
+ totalTokens: tokens
337
+ };
338
+ this.logger.debug(LOG_MODULE$1, "embedder.response", "embed returned", {
339
+ count: embeddings.length,
340
+ dimensions: this.dimensions
341
+ });
342
+ return {
343
+ embeddings,
344
+ usage
345
+ };
346
+ }
347
+ };
348
+
349
+ //#endregion
350
+ //#region ../../@warlock.js/ai-ollama/src/known-vision-models.ts
351
+ /**
352
+ * Substrings identifying Ollama model tags whose family accepts image
353
+ * input (vision).
354
+ *
355
+ * Ollama tags are family-named with optional size/quant suffixes
356
+ * (`llama3.2-vision:11b`, `llava:13b-v1.6`, `qwen2.5-vl:7b`). A
357
+ * substring match tolerates those suffixes. Covers the common
358
+ * multimodal families on the Ollama registry; text-only models
359
+ * (`llama3.1`, `mistral`, `phi3`, `nomic-embed-text`) are excluded.
360
+ * Override per-model via `ollama.model({ name, vision: true | false })`.
361
+ */
362
+ const VISION_CAPABLE_SUBSTRINGS = [
363
+ "llava",
364
+ "vision",
365
+ "bakllava",
366
+ "moondream",
367
+ "minicpm-v",
368
+ "qwen2-vl",
369
+ "qwen2.5-vl",
370
+ "llama4",
371
+ "gemma3"
372
+ ];
373
+ /**
374
+ * Infer whether an Ollama model tag supports vision based on the known
375
+ * multimodal-family substrings. Unknown tags default to `false` so
376
+ * passing an image to a text-only local model surfaces a clear,
377
+ * agent-side capability error instead of the image being silently
378
+ * ignored by the model.
379
+ *
380
+ * @example
381
+ * inferVisionCapability("llama3.2-vision:11b"); // → true
382
+ * inferVisionCapability("llava:13b"); // → true
383
+ * inferVisionCapability("llama3.1"); // → false
384
+ * inferVisionCapability("nomic-embed-text"); // → false
385
+ */
386
+ function inferVisionCapability(modelName) {
387
+ const normalized = modelName.toLowerCase();
388
+ return VISION_CAPABLE_SUBSTRINGS.some((fragment) => normalized.includes(fragment));
389
+ }
390
+
391
+ //#endregion
392
+ //#region ../../@warlock.js/ai-ollama/src/model.ts
393
+ const LOG_MODULE = "ai.ollama";
394
+ /**
395
+ * Ollama-backed implementation of `ModelContract`.
396
+ *
397
+ * **Role.** The provider-facing bridge between the vendor-neutral
398
+ * `@warlock.js/ai` agent runtime and a local (or self-hosted) Ollama
399
+ * server via the official `ollama` client.
400
+ *
401
+ * **Responsibility.**
402
+ * - Owns: a long-lived `Ollama` client + frozen `ModelConfig` (model
403
+ * tag, temperature, maxTokens) used as per-call defaults.
404
+ * - Owns: translating vendor-neutral `Message[]` / `ToolConfig[]` into
405
+ * Ollama's chat shapes (system stays a real role, `tool_calls` /
406
+ * `tool_name`, base64 `images`) and Ollama's response (content, tool
407
+ * calls, done reason, eval-count usage) back into neutral shapes.
408
+ * - Does NOT own: tool dispatch, looping, history, retries — agent
409
+ * concerns. The model is a per-call protocol adapter.
410
+ *
411
+ * **Tool-call ids.** Ollama has no tool-call id concept — a `tool_call`
412
+ * is `{ function: { name, arguments } }`. The adapter synthesizes the
413
+ * neutral `id` from the tool name so the agent's tool-result round-trip
414
+ * (which keys on `toolCallId`) maps back to Ollama's name-based
415
+ * matching. Parallel calls to the *same* tool in one turn therefore
416
+ * share an id — a documented v1 limitation inherent to Ollama's wire
417
+ * format, not this adapter.
418
+ *
419
+ * Modeled as a class (see §4.2 of code-style.md — "long-lived state
420
+ * across calls").
421
+ *
422
+ * @example
423
+ * import { Ollama } from "ollama";
424
+ * const client = new Ollama({ host: "http://127.0.0.1:11434" });
425
+ * const model = new OllamaModel(client, { name: "llama3.1" });
426
+ *
427
+ * const myAgent = agent({ model, tools: [searchTool] });
428
+ * const result = await myAgent.execute("Summarize today's news.");
429
+ */
430
+ var OllamaModel = class {
431
+ constructor(client, config, provider = "ollama") {
432
+ this.logger = _warlock_js_logger.log;
433
+ this.client = client;
434
+ this.config = config;
435
+ this.name = config.name;
436
+ this.provider = provider;
437
+ this.pricing = config.pricing;
438
+ this.capabilities = {
439
+ structuredOutput: config.structuredOutput ?? true,
440
+ vision: config.vision ?? inferVisionCapability(config.name)
441
+ };
442
+ }
443
+ /**
444
+ * Single-shot completion. Sends the full message list to
445
+ * `client.chat`, waits for the terminal response, and reshapes it
446
+ * into a vendor-neutral `ModelResponse`. Per-call `options` override
447
+ * the instance defaults for this call only.
448
+ */
449
+ async complete(messages, options) {
450
+ this.logger.debug(LOG_MODULE, "request", "Starting chat call", {
451
+ model: this.name,
452
+ messageCount: messages.length,
453
+ streaming: false,
454
+ toolCount: options?.tools?.length ?? 0
455
+ });
456
+ let response;
457
+ try {
458
+ response = await this.client.chat({
459
+ ...this.buildRequest(messages, options),
460
+ stream: false
461
+ });
462
+ } catch (thrown) {
463
+ throw this.logAndWrap(thrown);
464
+ }
465
+ const toolCalls = this.extractToolCalls(response.message);
466
+ const finishReason = toolCalls ? "tool_calls" : mapDoneReason(response.done_reason);
467
+ const usage = this.extractUsage(response);
468
+ this.logger.debug(LOG_MODULE, "response", "chat call succeeded", {
469
+ finishReason,
470
+ usage
471
+ });
472
+ return {
473
+ content: response.message?.content ?? "",
474
+ finishReason,
475
+ usage,
476
+ toolCalls
477
+ };
478
+ }
479
+ /**
480
+ * Incremental streaming completion. Yields neutral
481
+ * `ModelStreamChunk`s — `delta` for content, `tool-call` per
482
+ * function call (Ollama streams a fully-formed call, not partial
483
+ * JSON), and a terminal `done` with the final finish reason + usage.
484
+ * Honors `options.signal` by aborting the underlying stream.
485
+ */
486
+ async *stream(messages, options) {
487
+ this.logger.debug(LOG_MODULE, "request", "Starting streaming chat call", {
488
+ model: this.name,
489
+ messageCount: messages.length,
490
+ streaming: true,
491
+ toolCount: options?.tools?.length ?? 0
492
+ });
493
+ let stream;
494
+ try {
495
+ stream = await this.client.chat({
496
+ ...this.buildRequest(messages, options),
497
+ stream: true
498
+ });
499
+ } catch (thrown) {
500
+ throw this.logAndWrap(thrown);
501
+ }
502
+ if (options?.signal) if (options.signal.aborted) stream.abort();
503
+ else options.signal.addEventListener("abort", () => stream.abort(), { once: true });
504
+ let rawDoneReason;
505
+ let sawToolCall = false;
506
+ const usage = {
507
+ input: 0,
508
+ output: 0,
509
+ total: 0
510
+ };
511
+ try {
512
+ for await (const chunk of stream) {
513
+ const content = chunk.message?.content;
514
+ if (content) yield {
515
+ type: "delta",
516
+ content
517
+ };
518
+ for (const call of chunk.message?.tool_calls ?? []) {
519
+ sawToolCall = true;
520
+ yield {
521
+ type: "tool-call",
522
+ id: call.function.name,
523
+ name: call.function.name,
524
+ input: call.function.arguments ?? {}
525
+ };
526
+ }
527
+ if (chunk.done_reason) rawDoneReason = chunk.done_reason;
528
+ if (chunk.done) {
529
+ usage.input = chunk.prompt_eval_count ?? usage.input;
530
+ usage.output = chunk.eval_count ?? usage.output;
531
+ usage.total = usage.input + usage.output;
532
+ }
533
+ }
534
+ } catch (thrown) {
535
+ throw this.logAndWrap(thrown);
536
+ }
537
+ const finishReason = sawToolCall ? "tool_calls" : mapDoneReason(rawDoneReason);
538
+ this.logger.debug(LOG_MODULE, "response", "streaming chat call succeeded", {
539
+ finishReason,
540
+ usage
541
+ });
542
+ yield {
543
+ type: "done",
544
+ finishReason,
545
+ usage
546
+ };
547
+ }
548
+ /**
549
+ * Assemble the Ollama chat request shared by `complete()` and
550
+ * `stream()` (each adds its own `stream` literal so the client's
551
+ * overload resolves). Maps inference params into Ollama `options`
552
+ * and conditionally attaches tools + native structured output.
553
+ */
554
+ buildRequest(messages, options) {
555
+ const temperature = options?.temperature ?? this.config.temperature;
556
+ const maxTokens = options?.maxTokens ?? this.config.maxTokens;
557
+ const ollamaOptions = {
558
+ ...temperature !== void 0 ? { temperature } : {},
559
+ ...maxTokens !== void 0 ? { num_predict: maxTokens } : {}
560
+ };
561
+ return {
562
+ model: this.name,
563
+ messages: toOllamaMessages(messages),
564
+ ...Object.keys(ollamaOptions).length > 0 ? { options: ollamaOptions } : {},
565
+ ...this.buildTools(options?.tools),
566
+ ...this.buildFormat(options?.responseSchema)
567
+ };
568
+ }
569
+ /**
570
+ * Spread-friendly tools fragment. Empty object when no tools were
571
+ * supplied so the caller can unconditionally spread it.
572
+ */
573
+ buildTools(tools) {
574
+ const mapped = toOllamaTools(tools);
575
+ return mapped ? { tools: mapped } : {};
576
+ }
577
+ /**
578
+ * Translate the neutral `responseSchema` into Ollama's native
579
+ * structured output (`format` accepts a JSON Schema object).
580
+ * Emitted only when the model is `structuredOutput`-capable and the
581
+ * schema is an object root — otherwise the agent's soft prompt hint
582
+ * + client-side `validate()` carry shape.
583
+ */
584
+ buildFormat(responseSchema) {
585
+ if (!responseSchema || !this.capabilities.structuredOutput) return {};
586
+ if (responseSchema.type !== "object" || typeof responseSchema.properties !== "object") return {};
587
+ return { format: responseSchema };
588
+ }
589
+ /**
590
+ * Reshape Ollama's `message.tool_calls` into the neutral
591
+ * `ModelToolCallRequest[]`. Ollama has no tool-call id, so the
592
+ * neutral `id` is synthesized from the tool name (see the class
593
+ * doc). Returns `undefined` when no tools were requested.
594
+ */
595
+ extractToolCalls(message) {
596
+ const calls = message?.tool_calls;
597
+ if (!calls || calls.length === 0) return;
598
+ return calls.map((call) => ({
599
+ id: call.function.name,
600
+ name: call.function.name,
601
+ input: call.function.arguments ?? {}
602
+ }));
603
+ }
604
+ /**
605
+ * Normalize Ollama's eval counts into the neutral `Usage` shape.
606
+ * Ollama runs locally with no prompt cache, so there is no
607
+ * `cachedTokens`; `total` is computed from input + output.
608
+ */
609
+ extractUsage(response) {
610
+ const input = response.prompt_eval_count ?? 0;
611
+ const output = response.eval_count ?? 0;
612
+ return {
613
+ input,
614
+ output,
615
+ total: input + output
616
+ };
617
+ }
618
+ /**
619
+ * Wrap a thrown provider error into the typed `AIError` hierarchy
620
+ * and emit the standard error log line before it propagates.
621
+ */
622
+ logAndWrap(thrown) {
623
+ const wrapped = wrapOllamaError(thrown);
624
+ this.logger.error(LOG_MODULE, "error", wrapped.message, {
625
+ code: wrapped.code,
626
+ context: wrapped.context
627
+ });
628
+ return wrapped;
629
+ }
630
+ };
631
+
632
+ //#endregion
633
+ //#region ../../@warlock.js/ai-ollama/src/sdk.ts
634
+ /**
635
+ * Ollama-backed implementation of `SDKAdapterContract`.
636
+ *
637
+ * **Role.** The package entry point for local / self-hosted models
638
+ * served by an Ollama daemon via the official `ollama` client. One
639
+ * `OllamaSDK` holds one live `Ollama` client, shared by every
640
+ * `ModelContract` / `EmbedderContract` it produces.
641
+ *
642
+ * **Responsibility.**
643
+ * - Owns: a long-lived `Ollama` client (host, headers) and its
644
+ * lifetime. Factory for `OllamaModel` / `OllamaEmbedder` instances
645
+ * sharing that client.
646
+ * - Does NOT own: anything per-call — those live in `OllamaModel` /
647
+ * `OllamaEmbedder` and the agent runtime.
648
+ *
649
+ * Modeled as a class (see §4.2 of code-style.md — "long-lived state
650
+ * across many calls"), fronted by FP usage like the other adapters.
651
+ *
652
+ * @example
653
+ * const ollama = new OllamaSDK({}); // local default host
654
+ * const model = ollama.model({ name: "llama3.1", temperature: 0.7 });
655
+ * const embedder = ollama.embedder({ name: "nomic-embed-text" });
656
+ */
657
+ var OllamaSDK = class {
658
+ constructor(config = {}) {
659
+ const { provider, pricing, ...clientConfig } = config;
660
+ this.client = new ollama.Ollama(clientConfig);
661
+ this.provider = provider ?? "ollama";
662
+ this.pricing = pricing;
663
+ }
664
+ /**
665
+ * Build an `OllamaModel` bound to this SDK's client. Each call
666
+ * returns a fresh instance; all instances share the underlying
667
+ * `Ollama` client. The SDK's `provider` label is forwarded.
668
+ *
669
+ * Pricing resolution: per-model `config.pricing` wins; otherwise the
670
+ * SDK-level registry entry keyed by `config.name`; otherwise
671
+ * `undefined` (local Ollama is free, so usually undefined).
672
+ */
673
+ model(config) {
674
+ const resolvedPricing = config.pricing ?? this.pricing?.[config.name];
675
+ const resolvedConfig = resolvedPricing === config.pricing ? config : {
676
+ ...config,
677
+ pricing: resolvedPricing
678
+ };
679
+ return new OllamaModel(this.client, resolvedConfig, this.provider);
680
+ }
681
+ /**
682
+ * Rough token-count estimate. Uses the character-heuristic
683
+ * (`approximateTokenCount`) from the core package — good enough for
684
+ * budgeting / context guards, not billing (and Ollama is free
685
+ * anyway). The optional model id is reserved for future per-model
686
+ * tokenizer dispatch; currently ignored.
687
+ */
688
+ async count(text, _model) {
689
+ return (0, _warlock_js_ai.approximateTokenCount)(text);
690
+ }
691
+ /**
692
+ * Build an `OllamaEmbedder` bound to this SDK's client.
693
+ *
694
+ * @example
695
+ * const embedder = ollama.embedder({ name: "nomic-embed-text" });
696
+ * const { vector } = await embedder.embed("Hello world");
697
+ */
698
+ embedder(config) {
699
+ return new OllamaEmbedder(this.client, config, this.provider);
700
+ }
701
+ };
702
+
703
+ //#endregion
704
+ exports.OllamaSDK = OllamaSDK;
705
+ //# sourceMappingURL=index.cjs.map