@strav/brain 1.0.0-alpha.15 → 1.0.0-alpha.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,86 @@
1
+ /**
2
+ * `OllamaProvider` — `OpenAICompatProvider` pointed at a local
3
+ * Ollama server's OpenAI-compatible `/v1` endpoint.
4
+ *
5
+ * Why this matters: Ollama (and the wider local-LLM ecosystem —
6
+ * LM Studio, llama.cpp's server, vLLM, TGI) lets apps run
7
+ * inference on-device or on-prem. Two real use cases:
8
+ *
9
+ * - **Privacy.** Data never leaves the machine / the customer's
10
+ * network — table stakes for regulated workloads.
11
+ * - **Dev / test.** Build agents without burning API credits or
12
+ * needing a cloud key at all. Run the test suite against a
13
+ * local `llama3.2:1b` for free; ship to a hosted provider in
14
+ * prod.
15
+ *
16
+ * Inherits the OpenAI-compat overrides (strip `reasoning_effort`,
17
+ * `json_object`-mode generate with schema-in-system-prompt,
18
+ * throws on combined tools + schema) from the base. Only adds:
19
+ *
20
+ * - Constructor with Ollama defaults — base URL
21
+ * `http://localhost:11434/v1`, placeholder API key `'ollama'`
22
+ * (the SDK demands a non-empty string; Ollama ignores it).
23
+ *
24
+ * `defaultModel` is required because Ollama models are
25
+ * user-installed via `ollama pull <model>` — no universal default
26
+ * exists. Tool calling depends on the underlying model; pick a
27
+ * function-calling-tuned model (`llama3.2`, `qwen2.5`, `mistral`)
28
+ * for `runWithTools` to behave.
29
+ *
30
+ * The same provider works against any OpenAI-compatible local
31
+ * server by overriding `baseUrl` — LM Studio (`:1234/v1`),
32
+ * llama.cpp's server (`:8080/v1`), vLLM, TGI, remote Ollama on
33
+ * another host. The driver name is `ollama` for ergonomic reasons;
34
+ * the implementation is "any OpenAI-compatible local server."
35
+ *
36
+ * Local inference has no upstream prompt cache, so the default
37
+ * `mapUsage` (cache fields → 0) is correct without override.
38
+ * `countTokens` not implemented (Ollama doesn't expose a count
39
+ * endpoint).
40
+ */
41
+
42
+ import type OpenAI from 'openai'
43
+ import type { OllamaProviderConfig } from '../brain_config.ts'
44
+ import type { ResolveMcpToolsOptions } from '../mcp/resolve_mcp_tools.ts'
45
+ import { OpenAICompatProvider } from './openai_compat_provider.ts'
46
+
47
+ const DEFAULT_OLLAMA_BASE_URL = 'http://localhost:11434/v1'
48
+ const DEFAULT_OLLAMA_API_KEY = 'ollama'
49
+
50
+ export interface OllamaProviderOptions {
51
+ client?: OpenAI
52
+ /**
53
+ * Internal seam — tests inject a stub MCP client factory so MCP
54
+ * tool resolution doesn't dial the network. Real apps leave it
55
+ * unset; the provider uses the default `MCPClient`.
56
+ */
57
+ mcpClientFactory?: ResolveMcpToolsOptions['clientFactory']
58
+ }
59
+
60
+ export class OllamaProvider extends OpenAICompatProvider {
61
+ constructor(
62
+ name: string,
63
+ config: OllamaProviderConfig,
64
+ options: OllamaProviderOptions = {},
65
+ ) {
66
+ super(
67
+ name,
68
+ {
69
+ driver: 'openai',
70
+ apiKey: config.apiKey ?? DEFAULT_OLLAMA_API_KEY,
71
+ baseUrl: config.baseUrl ?? DEFAULT_OLLAMA_BASE_URL,
72
+ defaultModel: config.defaultModel,
73
+ ...(config.defaultMaxTokens !== undefined
74
+ ? { defaultMaxTokens: config.defaultMaxTokens }
75
+ : {}),
76
+ ...(config.defaultEmbedModel !== undefined
77
+ ? { defaultEmbedModel: config.defaultEmbedModel }
78
+ : {}),
79
+ ...(config.defaultTranscribeModel !== undefined
80
+ ? { defaultTranscribeModel: config.defaultTranscribeModel }
81
+ : {}),
82
+ },
83
+ options,
84
+ )
85
+ }
86
+ }
@@ -0,0 +1,187 @@
1
+ /**
2
+ * `OpenAICompatProvider` — abstract intermediate that captures the
3
+ * "OpenAI-compatible local / third-party endpoint" pattern shared by
4
+ * `DeepSeekProvider`, `OllamaProvider`, and anything else (Groq,
5
+ * Together, Fireworks, vLLM, llama.cpp's server, …) that exposes a
6
+ * `/v1/chat/completions` surface that is request-/response-shape-
7
+ * identical to OpenAI's.
8
+ *
9
+ * What it does, factored out of OpenAIProvider:
10
+ *
11
+ * - **Strips `reasoning_effort`.** Compat endpoints typically
12
+ * reject unknown fields. `buildParams` removes it on every
13
+ * request. Subclasses that target a vendor which DOES support
14
+ * `reasoning_effort` re-add it in their own `buildParams`
15
+ * override.
16
+ *
17
+ * - **`generate` via `json_object` + schema-in-system-prompt.**
18
+ * The OpenAI provider uses `response_format.json_schema` with
19
+ * `strict: true` — most OpenAI-compat vendors don't support
20
+ * that. The safe default is `json_object` mode + a
21
+ * "Respond with JSON matching this schema" instruction
22
+ * injected into the system prompt, then client-side
23
+ * `parseGenerated` validates.
24
+ *
25
+ * - **Throws on combined tools + schema.** No reliable per-turn
26
+ * schema enforcement on compat endpoints in V1.
27
+ * `runWithToolsAndSchema` / `streamWithToolsAndSchema` throw
28
+ * `BrainError` with a clear "run as two calls or switch
29
+ * providers" message.
30
+ *
31
+ * - **`mapUsage` hook.** Default just maps OpenAI's
32
+ * `prompt_tokens` / `completion_tokens` straight across. Vendors
33
+ * that report cache hits on a custom field (DeepSeek does;
34
+ * `prompt_cache_hit_tokens`) override.
35
+ *
36
+ * Subclasses provide just the constructor + (sometimes) a
37
+ * `mapUsage` override. Most named compat providers in this
38
+ * codebase are now <40 lines.
39
+ */
40
+
41
+ import type OpenAI from 'openai'
42
+ import type { AgentGenerateResult } from '../agent_generate_result.ts'
43
+ import type { AgentStreamEvent } from '../agent_stream_event.ts'
44
+ import { BrainError } from '../brain_error.ts'
45
+ import { parseGenerated, type OutputSchema } from '../output_schema.ts'
46
+ import type { RunWithToolsOptions } from '../provider.ts'
47
+ import type { Tool } from '../tool.ts'
48
+ import type {
49
+ ChatOptions,
50
+ ChatUsage,
51
+ GenerateResult,
52
+ Message,
53
+ SystemPrompt,
54
+ } from '../types.ts'
55
+ import { OpenAIProvider } from './openai_provider.ts'
56
+
57
+ export abstract class OpenAICompatProvider extends OpenAIProvider {
58
+ /**
59
+ * Same as the OpenAI build but strips `reasoning_effort` — most
60
+ * compat endpoints reject unknown fields. Subclasses that target
61
+ * a vendor which DOES accept `reasoning_effort` override this
62
+ * and either skip the strip or call `super.buildParams` and
63
+ * re-add it.
64
+ */
65
+ protected override buildParams(
66
+ messages: readonly Message[],
67
+ options: ChatOptions,
68
+ tools: readonly Tool[],
69
+ ): OpenAI.Chat.ChatCompletionCreateParamsNonStreaming {
70
+ const params = super.buildParams(messages, options, tools)
71
+ if ('reasoning_effort' in params) {
72
+ delete (params as { reasoning_effort?: unknown }).reasoning_effort
73
+ }
74
+ return params
75
+ }
76
+
77
+ /**
78
+ * `generate` injects the JSON Schema into the system prompt as a
79
+ * "respond with JSON matching this schema" instruction, sets
80
+ * `response_format: { type: 'json_object' }`, and validates the
81
+ * response client-side via `parseGenerated`. Apps that supply
82
+ * `schema.parse` get the same runtime validation as on the
83
+ * other providers; without it, the value is `T` by type
84
+ * assertion only.
85
+ *
86
+ * Caveat: unlike OpenAI's `strict: true` json_schema mode, the
87
+ * upstream API doesn't enforce the schema. Smaller models may
88
+ * hallucinate; `parseGenerated` catches it at the boundary.
89
+ */
90
+ override async generate<T>(
91
+ messages: readonly Message[],
92
+ schema: OutputSchema<T>,
93
+ options: ChatOptions = {},
94
+ ): Promise<GenerateResult<T>> {
95
+ const augmented: ChatOptions = {
96
+ ...options,
97
+ system: combineSystem(options.system, schemaInstruction(schema)),
98
+ }
99
+ const params = this.buildParams(messages, augmented, [])
100
+ params.response_format = { type: 'json_object' }
101
+ const response = await this.client.chat.completions.create(
102
+ params,
103
+ options.signal !== undefined ? { signal: options.signal } : undefined,
104
+ )
105
+ const choice = response.choices[0]
106
+ const text = choice?.message?.content ?? ''
107
+ const value = parseGenerated(text, schema)
108
+ return {
109
+ value,
110
+ text,
111
+ model: response.model,
112
+ stopReason: choice?.finish_reason ?? null,
113
+ usage: this.mapUsage(response.usage),
114
+ raw: response,
115
+ }
116
+ }
117
+
118
+ /**
119
+ * Combined tool-loop + structured output isn't supported on
120
+ * OpenAI-compat providers in V1. The API's `json_object` mode
121
+ * doesn't carry schema enforcement, and weaving the
122
+ * schema-instruction into every turn's system prompt during a
123
+ * tool loop would surprise apps. Apps run `runTools(...)` +
124
+ * `generate(...)` as two separate calls, or switch to OpenAI /
125
+ * Anthropic / Gemini for the combined call.
126
+ */
127
+ override async runWithToolsAndSchema<T>(
128
+ _messages: readonly Message[],
129
+ _tools: readonly Tool[],
130
+ _schema: OutputSchema<T>,
131
+ _options?: RunWithToolsOptions,
132
+ ): Promise<AgentGenerateResult<T>> {
133
+ throw new BrainError(
134
+ `${this.name}.runWithToolsAndSchema: combined tool use + structured output is not supported on OpenAI-compat providers in V1. Run \`brain.runTools(...)\` and \`brain.generate(...)\` as two separate calls, or switch to OpenAI / Anthropic / Gemini for this combination.`,
135
+ { context: { provider: this.name } },
136
+ )
137
+ }
138
+
139
+ override async *streamWithToolsAndSchema<T>(
140
+ _messages: readonly Message[],
141
+ _tools: readonly Tool[],
142
+ _schema: OutputSchema<T>,
143
+ _options?: RunWithToolsOptions,
144
+ ): AsyncIterable<AgentStreamEvent<T>> {
145
+ throw new BrainError(
146
+ `${this.name}.streamWithToolsAndSchema: combined streaming + tool use + structured output is not supported on OpenAI-compat providers in V1. Use \`brain.streamTools(...)\` and \`brain.generate(...)\` separately, or switch to OpenAI / Anthropic / Gemini for this combination.`,
147
+ { context: { provider: this.name } },
148
+ )
149
+ }
150
+
151
+ /**
152
+ * Hook for subclasses to extract usage from a vendor-specific
153
+ * `CompletionUsage` extension. Default maps OpenAI's standard
154
+ * `prompt_tokens` / `completion_tokens` /
155
+ * `prompt_tokens_details.cached_tokens` shape. DeepSeek reads
156
+ * `prompt_cache_hit_tokens`; Ollama leaves cache fields at 0.
157
+ */
158
+ protected mapUsage(u: OpenAI.CompletionUsage | undefined): ChatUsage {
159
+ return {
160
+ inputTokens: u?.prompt_tokens ?? 0,
161
+ outputTokens: u?.completion_tokens ?? 0,
162
+ cacheReadTokens: u?.prompt_tokens_details?.cached_tokens ?? 0,
163
+ cacheCreationTokens: 0,
164
+ }
165
+ }
166
+ }
167
+
168
+ // ─── Shared helpers ──────────────────────────────────────────────────────
169
+
170
+ /** Merge an additional instruction into an existing system prompt. */
171
+ function combineSystem(existing: SystemPrompt | undefined, addition: string): SystemPrompt {
172
+ if (existing === undefined) return addition
173
+ if (typeof existing === 'string') return `${existing}\n\n${addition}`
174
+ if (Array.isArray(existing)) return [...existing, { text: addition }]
175
+ return [existing, { text: addition }]
176
+ }
177
+
178
+ /** Build the system-prompt fragment that pins the model to the supplied JSON schema. */
179
+ function schemaInstruction(schema: OutputSchema<unknown>): string {
180
+ const lines = [
181
+ `Respond with a JSON object that matches the following JSON Schema. Output ONLY the JSON object — no prose, no markdown fences.`,
182
+ schema.description ? `Schema description: ${schema.description}` : undefined,
183
+ `Schema (name: ${schema.name}):`,
184
+ JSON.stringify(schema.jsonSchema, null, 2),
185
+ ].filter((s): s is string => s !== undefined)
186
+ return lines.join('\n')
187
+ }