@strav/brain 1.0.0-alpha.15 → 1.0.0-alpha.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/agent.ts +34 -5
- package/src/agent_generate_result.ts +30 -0
- package/src/agent_runner.ts +140 -14
- package/src/agent_stream_event.ts +100 -0
- package/src/brain_config.ts +91 -1
- package/src/brain_manager.ts +168 -4
- package/src/brain_provider.ts +25 -1
- package/src/index.ts +19 -1
- package/src/mcp/client.ts +82 -13
- package/src/mcp/index.ts +6 -0
- package/src/mcp/oauth.ts +227 -0
- package/src/mcp/resolve_mcp_tools.ts +6 -2
- package/src/mcp_server.ts +16 -0
- package/src/provider.ts +109 -0
- package/src/providers/anthropic_provider.ts +596 -28
- package/src/providers/deepseek_provider.ts +117 -0
- package/src/providers/gemini_provider.ts +590 -21
- package/src/providers/ollama_provider.ts +86 -0
- package/src/providers/openai_compat_provider.ts +187 -0
- package/src/providers/openai_provider.ts +735 -32
- package/src/providers/openai_responses_provider.ts +700 -0
- package/src/tool.ts +7 -0
- package/src/tool_runner.ts +81 -0
- package/src/types.ts +233 -0
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `OllamaProvider` — `OpenAICompatProvider` pointed at a local
|
|
3
|
+
* Ollama server's OpenAI-compatible `/v1` endpoint.
|
|
4
|
+
*
|
|
5
|
+
* Why this matters: Ollama (and the wider local-LLM ecosystem —
|
|
6
|
+
* LM Studio, llama.cpp's server, vLLM, TGI) lets apps run
|
|
7
|
+
* inference on-device or on-prem. Two real use cases:
|
|
8
|
+
*
|
|
9
|
+
* - **Privacy.** Data never leaves the machine / the customer's
|
|
10
|
+
* network — table stakes for regulated workloads.
|
|
11
|
+
* - **Dev / test.** Build agents without burning API credits or
|
|
12
|
+
* needing a cloud key at all. Run the test suite against a
|
|
13
|
+
* local `llama3.2:1b` for free; ship to a hosted provider in
|
|
14
|
+
* prod.
|
|
15
|
+
*
|
|
16
|
+
* Inherits the OpenAI-compat overrides (strip `reasoning_effort`,
|
|
17
|
+
* `json_object`-mode generate with schema-in-system-prompt,
|
|
18
|
+
* throws on combined tools + schema) from the base. Only adds:
|
|
19
|
+
*
|
|
20
|
+
* - Constructor with Ollama defaults — base URL
|
|
21
|
+
* `http://localhost:11434/v1`, placeholder API key `'ollama'`
|
|
22
|
+
* (the SDK demands a non-empty string; Ollama ignores it).
|
|
23
|
+
*
|
|
24
|
+
* `defaultModel` is required because Ollama models are
|
|
25
|
+
* user-installed via `ollama pull <model>` — no universal default
|
|
26
|
+
* exists. Tool calling depends on the underlying model; pick a
|
|
27
|
+
* function-calling-tuned model (`llama3.2`, `qwen2.5`, `mistral`)
|
|
28
|
+
* for `runWithTools` to behave.
|
|
29
|
+
*
|
|
30
|
+
* The same provider works against any OpenAI-compatible local
|
|
31
|
+
* server by overriding `baseUrl` — LM Studio (`:1234/v1`),
|
|
32
|
+
* llama.cpp's server (`:8080/v1`), vLLM, TGI, remote Ollama on
|
|
33
|
+
* another host. The driver name is `ollama` for ergonomic reasons;
|
|
34
|
+
* the implementation is "any OpenAI-compatible local server."
|
|
35
|
+
*
|
|
36
|
+
* Local inference has no upstream prompt cache, so the default
|
|
37
|
+
* `mapUsage` (cache fields → 0) is correct without override.
|
|
38
|
+
* `countTokens` not implemented (Ollama doesn't expose a count
|
|
39
|
+
* endpoint).
|
|
40
|
+
*/
|
|
41
|
+
|
|
42
|
+
import type OpenAI from 'openai'
|
|
43
|
+
import type { OllamaProviderConfig } from '../brain_config.ts'
|
|
44
|
+
import type { ResolveMcpToolsOptions } from '../mcp/resolve_mcp_tools.ts'
|
|
45
|
+
import { OpenAICompatProvider } from './openai_compat_provider.ts'
|
|
46
|
+
|
|
47
|
+
const DEFAULT_OLLAMA_BASE_URL = 'http://localhost:11434/v1'
|
|
48
|
+
const DEFAULT_OLLAMA_API_KEY = 'ollama'
|
|
49
|
+
|
|
50
|
+
export interface OllamaProviderOptions {
|
|
51
|
+
client?: OpenAI
|
|
52
|
+
/**
|
|
53
|
+
* Internal seam — tests inject a stub MCP client factory so MCP
|
|
54
|
+
* tool resolution doesn't dial the network. Real apps leave it
|
|
55
|
+
* unset; the provider uses the default `MCPClient`.
|
|
56
|
+
*/
|
|
57
|
+
mcpClientFactory?: ResolveMcpToolsOptions['clientFactory']
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export class OllamaProvider extends OpenAICompatProvider {
|
|
61
|
+
constructor(
|
|
62
|
+
name: string,
|
|
63
|
+
config: OllamaProviderConfig,
|
|
64
|
+
options: OllamaProviderOptions = {},
|
|
65
|
+
) {
|
|
66
|
+
super(
|
|
67
|
+
name,
|
|
68
|
+
{
|
|
69
|
+
driver: 'openai',
|
|
70
|
+
apiKey: config.apiKey ?? DEFAULT_OLLAMA_API_KEY,
|
|
71
|
+
baseUrl: config.baseUrl ?? DEFAULT_OLLAMA_BASE_URL,
|
|
72
|
+
defaultModel: config.defaultModel,
|
|
73
|
+
...(config.defaultMaxTokens !== undefined
|
|
74
|
+
? { defaultMaxTokens: config.defaultMaxTokens }
|
|
75
|
+
: {}),
|
|
76
|
+
...(config.defaultEmbedModel !== undefined
|
|
77
|
+
? { defaultEmbedModel: config.defaultEmbedModel }
|
|
78
|
+
: {}),
|
|
79
|
+
...(config.defaultTranscribeModel !== undefined
|
|
80
|
+
? { defaultTranscribeModel: config.defaultTranscribeModel }
|
|
81
|
+
: {}),
|
|
82
|
+
},
|
|
83
|
+
options,
|
|
84
|
+
)
|
|
85
|
+
}
|
|
86
|
+
}
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `OpenAICompatProvider` — abstract intermediate that captures the
|
|
3
|
+
* "OpenAI-compatible local / third-party endpoint" pattern shared by
|
|
4
|
+
* `DeepSeekProvider`, `OllamaProvider`, and anything else (Groq,
|
|
5
|
+
* Together, Fireworks, vLLM, llama.cpp's server, …) that exposes a
|
|
6
|
+
* `/v1/chat/completions` surface that is request-/response-shape-
|
|
7
|
+
* identical to OpenAI's.
|
|
8
|
+
*
|
|
9
|
+
* What it does, factored out of OpenAIProvider:
|
|
10
|
+
*
|
|
11
|
+
* - **Strips `reasoning_effort`.** Compat endpoints typically
|
|
12
|
+
* reject unknown fields. `buildParams` removes it on every
|
|
13
|
+
* request. Subclasses that target a vendor which DOES support
|
|
14
|
+
* `reasoning_effort` re-add it in their own `buildParams`
|
|
15
|
+
* override.
|
|
16
|
+
*
|
|
17
|
+
* - **`generate` via `json_object` + schema-in-system-prompt.**
|
|
18
|
+
* The OpenAI provider uses `response_format.json_schema` with
|
|
19
|
+
* `strict: true` — most OpenAI-compat vendors don't support
|
|
20
|
+
* that. The safe default is `json_object` mode + a
|
|
21
|
+
* "Respond with JSON matching this schema" instruction
|
|
22
|
+
* injected into the system prompt, then client-side
|
|
23
|
+
* `parseGenerated` validates.
|
|
24
|
+
*
|
|
25
|
+
* - **Throws on combined tools + schema.** No reliable per-turn
|
|
26
|
+
* schema enforcement on compat endpoints in V1.
|
|
27
|
+
* `runWithToolsAndSchema` / `streamWithToolsAndSchema` throw
|
|
28
|
+
* `BrainError` with a clear "run as two calls or switch
|
|
29
|
+
* providers" message.
|
|
30
|
+
*
|
|
31
|
+
* - **`mapUsage` hook.** Default just maps OpenAI's
|
|
32
|
+
* `prompt_tokens` / `completion_tokens` straight across. Vendors
|
|
33
|
+
* that report cache hits on a custom field (DeepSeek does;
|
|
34
|
+
* `prompt_cache_hit_tokens`) override.
|
|
35
|
+
*
|
|
36
|
+
* Subclasses provide just the constructor + (sometimes) a
|
|
37
|
+
* `mapUsage` override. Most named compat providers in this
|
|
38
|
+
* codebase are now <40 lines.
|
|
39
|
+
*/
|
|
40
|
+
|
|
41
|
+
import type OpenAI from 'openai'
|
|
42
|
+
import type { AgentGenerateResult } from '../agent_generate_result.ts'
|
|
43
|
+
import type { AgentStreamEvent } from '../agent_stream_event.ts'
|
|
44
|
+
import { BrainError } from '../brain_error.ts'
|
|
45
|
+
import { parseGenerated, type OutputSchema } from '../output_schema.ts'
|
|
46
|
+
import type { RunWithToolsOptions } from '../provider.ts'
|
|
47
|
+
import type { Tool } from '../tool.ts'
|
|
48
|
+
import type {
|
|
49
|
+
ChatOptions,
|
|
50
|
+
ChatUsage,
|
|
51
|
+
GenerateResult,
|
|
52
|
+
Message,
|
|
53
|
+
SystemPrompt,
|
|
54
|
+
} from '../types.ts'
|
|
55
|
+
import { OpenAIProvider } from './openai_provider.ts'
|
|
56
|
+
|
|
57
|
+
export abstract class OpenAICompatProvider extends OpenAIProvider {
|
|
58
|
+
/**
|
|
59
|
+
* Same as the OpenAI build but strips `reasoning_effort` — most
|
|
60
|
+
* compat endpoints reject unknown fields. Subclasses that target
|
|
61
|
+
* a vendor which DOES accept `reasoning_effort` override this
|
|
62
|
+
* and either skip the strip or call `super.buildParams` and
|
|
63
|
+
* re-add it.
|
|
64
|
+
*/
|
|
65
|
+
protected override buildParams(
|
|
66
|
+
messages: readonly Message[],
|
|
67
|
+
options: ChatOptions,
|
|
68
|
+
tools: readonly Tool[],
|
|
69
|
+
): OpenAI.Chat.ChatCompletionCreateParamsNonStreaming {
|
|
70
|
+
const params = super.buildParams(messages, options, tools)
|
|
71
|
+
if ('reasoning_effort' in params) {
|
|
72
|
+
delete (params as { reasoning_effort?: unknown }).reasoning_effort
|
|
73
|
+
}
|
|
74
|
+
return params
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* `generate` injects the JSON Schema into the system prompt as a
|
|
79
|
+
* "respond with JSON matching this schema" instruction, sets
|
|
80
|
+
* `response_format: { type: 'json_object' }`, and validates the
|
|
81
|
+
* response client-side via `parseGenerated`. Apps that supply
|
|
82
|
+
* `schema.parse` get the same runtime validation as on the
|
|
83
|
+
* other providers; without it, the value is `T` by type
|
|
84
|
+
* assertion only.
|
|
85
|
+
*
|
|
86
|
+
* Caveat: unlike OpenAI's `strict: true` json_schema mode, the
|
|
87
|
+
* upstream API doesn't enforce the schema. Smaller models may
|
|
88
|
+
* hallucinate; `parseGenerated` catches it at the boundary.
|
|
89
|
+
*/
|
|
90
|
+
override async generate<T>(
|
|
91
|
+
messages: readonly Message[],
|
|
92
|
+
schema: OutputSchema<T>,
|
|
93
|
+
options: ChatOptions = {},
|
|
94
|
+
): Promise<GenerateResult<T>> {
|
|
95
|
+
const augmented: ChatOptions = {
|
|
96
|
+
...options,
|
|
97
|
+
system: combineSystem(options.system, schemaInstruction(schema)),
|
|
98
|
+
}
|
|
99
|
+
const params = this.buildParams(messages, augmented, [])
|
|
100
|
+
params.response_format = { type: 'json_object' }
|
|
101
|
+
const response = await this.client.chat.completions.create(
|
|
102
|
+
params,
|
|
103
|
+
options.signal !== undefined ? { signal: options.signal } : undefined,
|
|
104
|
+
)
|
|
105
|
+
const choice = response.choices[0]
|
|
106
|
+
const text = choice?.message?.content ?? ''
|
|
107
|
+
const value = parseGenerated(text, schema)
|
|
108
|
+
return {
|
|
109
|
+
value,
|
|
110
|
+
text,
|
|
111
|
+
model: response.model,
|
|
112
|
+
stopReason: choice?.finish_reason ?? null,
|
|
113
|
+
usage: this.mapUsage(response.usage),
|
|
114
|
+
raw: response,
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Combined tool-loop + structured output isn't supported on
|
|
120
|
+
* OpenAI-compat providers in V1. The API's `json_object` mode
|
|
121
|
+
* doesn't carry schema enforcement, and weaving the
|
|
122
|
+
* schema-instruction into every turn's system prompt during a
|
|
123
|
+
* tool loop would surprise apps. Apps run `runTools(...)` +
|
|
124
|
+
* `generate(...)` as two separate calls, or switch to OpenAI /
|
|
125
|
+
* Anthropic / Gemini for the combined call.
|
|
126
|
+
*/
|
|
127
|
+
override async runWithToolsAndSchema<T>(
|
|
128
|
+
_messages: readonly Message[],
|
|
129
|
+
_tools: readonly Tool[],
|
|
130
|
+
_schema: OutputSchema<T>,
|
|
131
|
+
_options?: RunWithToolsOptions,
|
|
132
|
+
): Promise<AgentGenerateResult<T>> {
|
|
133
|
+
throw new BrainError(
|
|
134
|
+
`${this.name}.runWithToolsAndSchema: combined tool use + structured output is not supported on OpenAI-compat providers in V1. Run \`brain.runTools(...)\` and \`brain.generate(...)\` as two separate calls, or switch to OpenAI / Anthropic / Gemini for this combination.`,
|
|
135
|
+
{ context: { provider: this.name } },
|
|
136
|
+
)
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
override async *streamWithToolsAndSchema<T>(
|
|
140
|
+
_messages: readonly Message[],
|
|
141
|
+
_tools: readonly Tool[],
|
|
142
|
+
_schema: OutputSchema<T>,
|
|
143
|
+
_options?: RunWithToolsOptions,
|
|
144
|
+
): AsyncIterable<AgentStreamEvent<T>> {
|
|
145
|
+
throw new BrainError(
|
|
146
|
+
`${this.name}.streamWithToolsAndSchema: combined streaming + tool use + structured output is not supported on OpenAI-compat providers in V1. Use \`brain.streamTools(...)\` and \`brain.generate(...)\` separately, or switch to OpenAI / Anthropic / Gemini for this combination.`,
|
|
147
|
+
{ context: { provider: this.name } },
|
|
148
|
+
)
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Hook for subclasses to extract usage from a vendor-specific
|
|
153
|
+
* `CompletionUsage` extension. Default maps OpenAI's standard
|
|
154
|
+
* `prompt_tokens` / `completion_tokens` /
|
|
155
|
+
* `prompt_tokens_details.cached_tokens` shape. DeepSeek reads
|
|
156
|
+
* `prompt_cache_hit_tokens`; Ollama leaves cache fields at 0.
|
|
157
|
+
*/
|
|
158
|
+
protected mapUsage(u: OpenAI.CompletionUsage | undefined): ChatUsage {
|
|
159
|
+
return {
|
|
160
|
+
inputTokens: u?.prompt_tokens ?? 0,
|
|
161
|
+
outputTokens: u?.completion_tokens ?? 0,
|
|
162
|
+
cacheReadTokens: u?.prompt_tokens_details?.cached_tokens ?? 0,
|
|
163
|
+
cacheCreationTokens: 0,
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// ─── Shared helpers ──────────────────────────────────────────────────────
|
|
169
|
+
|
|
170
|
+
/** Merge an additional instruction into an existing system prompt. */
|
|
171
|
+
function combineSystem(existing: SystemPrompt | undefined, addition: string): SystemPrompt {
|
|
172
|
+
if (existing === undefined) return addition
|
|
173
|
+
if (typeof existing === 'string') return `${existing}\n\n${addition}`
|
|
174
|
+
if (Array.isArray(existing)) return [...existing, { text: addition }]
|
|
175
|
+
return [existing, { text: addition }]
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
/** Build the system-prompt fragment that pins the model to the supplied JSON schema. */
|
|
179
|
+
function schemaInstruction(schema: OutputSchema<unknown>): string {
|
|
180
|
+
const lines = [
|
|
181
|
+
`Respond with a JSON object that matches the following JSON Schema. Output ONLY the JSON object — no prose, no markdown fences.`,
|
|
182
|
+
schema.description ? `Schema description: ${schema.description}` : undefined,
|
|
183
|
+
`Schema (name: ${schema.name}):`,
|
|
184
|
+
JSON.stringify(schema.jsonSchema, null, 2),
|
|
185
|
+
].filter((s): s is string => s !== undefined)
|
|
186
|
+
return lines.join('\n')
|
|
187
|
+
}
|