@strav/brain 1.0.0-alpha.12 → 1.0.0-alpha.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@strav/brain",
3
- "version": "1.0.0-alpha.12",
3
+ "version": "1.0.0-alpha.13",
4
4
  "description": "Strav AI module — unified Provider interface, BrainManager, threads, prompt caching, tools / agents / MCP. Anthropic + OpenAI providers; Gemini / DeepSeek follow.",
5
5
  "type": "module",
6
6
  "main": "./src/index.ts",
@@ -21,8 +21,9 @@
21
21
  },
22
22
  "dependencies": {
23
23
  "@anthropic-ai/sdk": "^0.100.0",
24
+ "@google/genai": "^2.7.0",
24
25
  "@modelcontextprotocol/sdk": "^1.29.0",
25
- "@strav/kernel": "1.0.0-alpha.12",
26
+ "@strav/kernel": "1.0.0-alpha.13",
26
27
  "openai": "^6.0.0"
27
28
  },
28
29
  "peerDependencies": {
@@ -49,7 +49,25 @@ export interface OpenAIProviderConfig {
49
49
  defaultMaxTokens?: number
50
50
  }
51
51
 
52
- export type ProviderConfig = AnthropicProviderConfig | OpenAIProviderConfig // | GoogleProviderConfig | DeepSeekProviderConfig (later slices)
52
+ /** Google (Gemini) driver config backed by `@google/genai`. */
53
+ export interface GeminiProviderConfig {
54
+ driver: 'google'
55
+ /** API key. Required. Most apps source from `env('GOOGLE_API_KEY')` or `env('GEMINI_API_KEY')`. */
56
+ apiKey: string
57
+ /** Optional override of the SDK's base URL — useful for proxies or test doubles. */
58
+ baseUrl?: string
59
+ /** Default model when neither `options.model` nor `options.tier` is passed. Defaults to `gemini-2.5-flash`. */
60
+ defaultModel?: string
61
+ /** Default `max_tokens` for `chat()` calls that don't specify one. */
62
+ defaultMaxTokens?: number
63
+ /** Optional API version pin (`v1` / `v1beta`). */
64
+ apiVersion?: string
65
+ }
66
+
67
+ export type ProviderConfig =
68
+ | AnthropicProviderConfig
69
+ | OpenAIProviderConfig
70
+ | GeminiProviderConfig // | DeepSeekProviderConfig (later slice)
53
71
 
54
72
  /** Cache-shape defaults applied when `ChatOptions.cache` is omitted. */
55
73
  export interface BrainCacheConfig {
@@ -28,6 +28,7 @@ import { type Application, ConfigError, ConfigRepository, ServiceProvider } from
28
28
  import { BrainManager } from './brain_manager.ts'
29
29
  import type { BrainConfigShape, ProviderConfig } from './brain_config.ts'
30
30
  import { AnthropicProvider } from './providers/anthropic_provider.ts'
31
+ import { GeminiProvider } from './providers/gemini_provider.ts'
31
32
  import { OpenAIProvider } from './providers/openai_provider.ts'
32
33
  import type { Provider } from './provider.ts'
33
34
 
@@ -101,10 +102,17 @@ function buildProvider(name: string, config: ProviderConfig): Provider {
101
102
  )
102
103
  }
103
104
  return new OpenAIProvider(name, config)
105
+ case 'google':
106
+ if (!config.apiKey) {
107
+ throw new ConfigError(
108
+ `BrainProvider: google provider "${name}" is missing apiKey. Source from env('GOOGLE_API_KEY').`,
109
+ )
110
+ }
111
+ return new GeminiProvider(name, config)
104
112
  default: {
105
113
  const exhaustiveCheck: never = config
106
114
  throw new ConfigError(
107
- `BrainProvider: unknown driver for provider "${name}". Known drivers: anthropic, openai.`,
115
+ `BrainProvider: unknown driver for provider "${name}". Known drivers: anthropic, openai, google.`,
108
116
  )
109
117
  // (unreachable — kept for the exhaustive check to fire when a new driver lands)
110
118
  // biome-ignore lint/correctness/noUnreachable: kept for the exhaustive-check above
package/src/index.ts CHANGED
@@ -16,6 +16,7 @@ export {
16
16
  type BrainConfigShape,
17
17
  DEFAULT_MODEL,
18
18
  DEFAULT_TIERS,
19
+ type GeminiProviderConfig,
19
20
  type OpenAIProviderConfig,
20
21
  type ProviderConfig,
21
22
  } from './brain_config.ts'
@@ -29,6 +30,7 @@ export { BrainProvider } from './brain_provider.ts'
29
30
  export { defineTool, type DefineToolSpec } from './define_tool.ts'
30
31
  export type { MCPServer, MCPServerToolConfig } from './mcp_server.ts'
31
32
  export { AnthropicProvider } from './providers/anthropic_provider.ts'
33
+ export { GeminiProvider } from './providers/gemini_provider.ts'
32
34
  export { OpenAIProvider } from './providers/openai_provider.ts'
33
35
  export type { Provider, RunWithToolsOptions } from './provider.ts'
34
36
  export { Thread, type ThreadOptions, type ThreadState } from './thread.ts'
@@ -0,0 +1,445 @@
1
+ /**
2
+ * `GeminiProvider` — implementation of `Provider` backed by the
3
+ * official `@google/genai` SDK (Gemini Developer API / Vertex AI).
4
+ *
5
+ * Maps framework shapes to Gemini's wire format:
6
+ *
7
+ * - `system` → `config.systemInstruction` (string-joined when
8
+ * multi-block). Cache flags on the system prompt are ignored —
9
+ * Gemini's prompt caching uses an explicit Caches API rather
10
+ * than per-block flags, so `cache: true` becomes a no-op
11
+ * consistent with the OpenAI provider.
12
+ *
13
+ * - `Message[]` → `Content[]`. Framework `role: 'user' | 'assistant'`
14
+ * maps to Gemini's `role: 'user' | 'model'`. String content
15
+ * becomes a single `{text}` part; `ContentBlock[]` content fans
16
+ * out:
17
+ * - `TextBlock` → `{text}`
18
+ * - `ToolUseBlock` → `{functionCall: {id, name, args}}`
19
+ * - `ToolResultBlock` → `{functionResponse: {id, name,
20
+ * response: {result | error}}}`
21
+ * - `MCP*` blocks → silently dropped (Anthropic-only).
22
+ *
23
+ * - `Tool[]` → `[{functionDeclarations: [{name, description,
24
+ * parametersJsonSchema: inputSchema}]}]`. We use
25
+ * `parametersJsonSchema` (not `parameters`) so JSON-Schema-shaped
26
+ * tool inputs pass through verbatim without translation to
27
+ * Gemini's `Schema` form.
28
+ *
29
+ * - `MCPServer[]` → resolved via the local MCP client
30
+ * (`@strav/brain/mcp`). Discovered tools are namespaced
31
+ * `<server>__<tool>` and merged with caller-supplied tools.
32
+ * Transports are closed in a `finally` once the loop exits.
33
+ * Gemini has no first-party server-side MCP equivalent to
34
+ * Anthropic's connector.
35
+ *
36
+ * - `thinking: 'adaptive'` → `thinkingConfig: { thinkingBudget: -1 }`
37
+ * (auto). `'disabled'` → `thinkingConfig: { thinkingBudget: 0 }`.
38
+ * Explicit `effort` (`low`/`medium`/`high`/`xhigh`/`max`) maps to
39
+ * `thinkingConfig.thinkingLevel`. Non-thinking models ignore the
40
+ * field upstream — we always emit, the SDK rejects only for
41
+ * models that don't support it.
42
+ *
43
+ * - `cache: true` → no-op. Gemini's prompt cache lives behind the
44
+ * `Caches` API; same accepted-silently behavior as OpenAI.
45
+ *
46
+ * - `countTokens` IS implemented — `ai.models.countTokens` exists
47
+ * and is cheap. Returns `totalTokens`.
48
+ */
49
+
50
+ import { GoogleGenAI, ThinkingLevel } from '@google/genai'
51
+ import type {
52
+ Content,
53
+ FunctionDeclaration,
54
+ GenerateContentConfig,
55
+ GenerateContentParameters,
56
+ GenerateContentResponse,
57
+ Part,
58
+ } from '@google/genai'
59
+ import type { AgentResult } from '../agent_result.ts'
60
+ import { BrainError } from '../brain_error.ts'
61
+ import type { GeminiProviderConfig } from '../brain_config.ts'
62
+ import type { MCPServer } from '../mcp_server.ts'
63
+ import { resolveMcpTools, type ResolveMcpToolsOptions } from '../mcp/resolve_mcp_tools.ts'
64
+ import type { Provider, RunWithToolsOptions } from '../provider.ts'
65
+ import type { Tool } from '../tool.ts'
66
+ import { ToolExecutionError } from '../tool_execution_error.ts'
67
+ import type {
68
+ ChatOptions,
69
+ ChatResult,
70
+ ChatUsage,
71
+ ContentBlock,
72
+ Message,
73
+ StreamEvent,
74
+ SystemPrompt,
75
+ TextBlock,
76
+ ToolResultBlock,
77
+ ToolUseBlock,
78
+ } from '../types.ts'
79
+
80
+ const DEFAULT_GEMINI_MODEL = 'gemini-2.5-flash'
81
+
82
+ /**
83
+ * The slice of `GoogleGenAI` the provider exercises. Narrowed so
84
+ * tests can inject a stub without satisfying the full SDK surface.
85
+ */
86
+ export interface GeminiModelsClient {
87
+ generateContent(params: GenerateContentParameters): Promise<GenerateContentResponse>
88
+ generateContentStream(
89
+ params: GenerateContentParameters,
90
+ ): Promise<AsyncIterable<GenerateContentResponse>>
91
+ countTokens(params: { model: string; contents: Content[] }): Promise<{ totalTokens?: number }>
92
+ }
93
+
94
+ export interface GeminiProviderOptions {
95
+ client?: { models: GeminiModelsClient }
96
+ /** Internal seam — tests inject a stub MCP client factory. */
97
+ mcpClientFactory?: ResolveMcpToolsOptions['clientFactory']
98
+ }
99
+
100
+ export class GeminiProvider implements Provider {
101
+ readonly name: string
102
+ private readonly models: GeminiModelsClient
103
+ private readonly defaultModel: string
104
+ private readonly defaultMaxTokens: number
105
+ private readonly mcpClientFactory?: ResolveMcpToolsOptions['clientFactory']
106
+
107
+ constructor(name: string, config: GeminiProviderConfig, options: GeminiProviderOptions = {}) {
108
+ this.name = name
109
+ this.defaultModel = config.defaultModel ?? DEFAULT_GEMINI_MODEL
110
+ this.defaultMaxTokens = config.defaultMaxTokens ?? 4096
111
+ this.mcpClientFactory = options.mcpClientFactory
112
+ if (options.client) {
113
+ this.models = options.client.models
114
+ } else {
115
+ const httpOpts =
116
+ config.baseUrl !== undefined || config.apiVersion !== undefined
117
+ ? {
118
+ ...(config.baseUrl !== undefined ? { baseUrl: config.baseUrl } : {}),
119
+ ...(config.apiVersion !== undefined ? { apiVersion: config.apiVersion } : {}),
120
+ }
121
+ : undefined
122
+ const sdk = new GoogleGenAI({
123
+ apiKey: config.apiKey,
124
+ ...(httpOpts ? { httpOptions: httpOpts } : {}),
125
+ })
126
+ this.models = sdk.models as unknown as GeminiModelsClient
127
+ }
128
+ }
129
+
130
+ async chat(messages: readonly Message[], options: ChatOptions = {}): Promise<ChatResult> {
131
+ const params = this.buildParams(messages, options, [])
132
+ const response = await this.models.generateContent(params)
133
+ return this.toChatResult(response, params.model)
134
+ }
135
+
136
+ async *stream(
137
+ messages: readonly Message[],
138
+ options: ChatOptions = {},
139
+ ): AsyncIterable<StreamEvent> {
140
+ const params = this.buildParams(messages, options, [])
141
+ const stream = await this.models.generateContentStream(params)
142
+ let finishReason: string | null = null
143
+ let lastUsage: ChatUsage | undefined
144
+ for await (const chunk of stream) {
145
+ const candidate = chunk.candidates?.[0]
146
+ const text = candidateText(candidate)
147
+ if (text.length > 0) yield { type: 'text', delta: text }
148
+ if (candidate?.finishReason) finishReason = String(candidate.finishReason)
149
+ if (chunk.usageMetadata) lastUsage = toUsage(chunk.usageMetadata)
150
+ }
151
+ yield {
152
+ type: 'stop',
153
+ stopReason: finishReason,
154
+ usage: lastUsage ?? {
155
+ inputTokens: 0,
156
+ outputTokens: 0,
157
+ cacheReadTokens: 0,
158
+ cacheCreationTokens: 0,
159
+ },
160
+ }
161
+ }
162
+
163
+ async countTokens(messages: readonly Message[], options: ChatOptions = {}): Promise<number> {
164
+ const contents = this.toContents(messages)
165
+ const model = options.model ?? this.defaultModel
166
+ const response = await this.models.countTokens({ model, contents })
167
+ return response.totalTokens ?? 0
168
+ }
169
+
170
+ async runWithTools(
171
+ messages: readonly Message[],
172
+ tools: readonly Tool[],
173
+ options: RunWithToolsOptions = {},
174
+ ): Promise<AgentResult> {
175
+ const mcpServers: readonly MCPServer[] = options.mcpServers ?? []
176
+ const resolved =
177
+ mcpServers.length > 0
178
+ ? await resolveMcpTools(mcpServers, {
179
+ ...(this.mcpClientFactory ? { clientFactory: this.mcpClientFactory } : {}),
180
+ })
181
+ : { tools: [] as Tool[], close: async () => {} }
182
+ try {
183
+ return await this._runLoop(messages, [...tools, ...resolved.tools], options)
184
+ } finally {
185
+ await resolved.close()
186
+ }
187
+ }
188
+
189
+ private async _runLoop(
190
+ messages: readonly Message[],
191
+ tools: readonly Tool[],
192
+ options: RunWithToolsOptions,
193
+ ): Promise<AgentResult> {
194
+ const maxIterations = options.maxIterations ?? 10
195
+ const toolMap = new Map<string, Tool>(tools.map((t) => [t.name, t]))
196
+ const workingMessages: Message[] = [...messages]
197
+ const aggregated: ChatUsage = {
198
+ inputTokens: 0,
199
+ outputTokens: 0,
200
+ cacheReadTokens: 0,
201
+ cacheCreationTokens: 0,
202
+ }
203
+ let iterations = 0
204
+
205
+ while (true) {
206
+ const params = this.buildParams(workingMessages, options, tools)
207
+ const response = await this.models.generateContent(params)
208
+ addUsage(aggregated, response.usageMetadata)
209
+
210
+ const candidate = response.candidates?.[0]
211
+ if (!candidate) {
212
+ throw new BrainError('GeminiProvider: response had no candidates.')
213
+ }
214
+ const parts = candidate.content?.parts ?? []
215
+ const assistantContent = fromGeminiParts(parts)
216
+ workingMessages.push({ role: 'assistant', content: assistantContent })
217
+
218
+ const toolUses = (Array.isArray(assistantContent) ? assistantContent : []).filter(
219
+ (b): b is ToolUseBlock => b.type === 'tool_use',
220
+ )
221
+
222
+ if (toolUses.length === 0) {
223
+ return {
224
+ text: typeof assistantContent === 'string'
225
+ ? assistantContent
226
+ : candidateText(candidate),
227
+ messages: workingMessages,
228
+ iterations,
229
+ stopReason: candidate.finishReason ? String(candidate.finishReason) : 'stop',
230
+ usage: aggregated,
231
+ }
232
+ }
233
+
234
+ const resultBlocks: ContentBlock[] = []
235
+ for (const call of toolUses) {
236
+ const tool = toolMap.get(call.name)
237
+ if (!tool) {
238
+ throw new ToolExecutionError(
239
+ call.name,
240
+ call.id,
241
+ new Error(`Tool "${call.name}" is not registered.`),
242
+ )
243
+ }
244
+ let output: unknown
245
+ try {
246
+ output = await tool.execute(call.input, {
247
+ callId: call.id,
248
+ context: options.context ?? {},
249
+ })
250
+ } catch (cause) {
251
+ throw new ToolExecutionError(call.name, call.id, cause)
252
+ }
253
+ const resultBlock: ToolResultBlock = {
254
+ type: 'tool_result',
255
+ toolUseId: call.id,
256
+ content: typeof output === 'string' ? output : JSON.stringify(output),
257
+ }
258
+ resultBlocks.push(resultBlock)
259
+ }
260
+ workingMessages.push({ role: 'user', content: resultBlocks })
261
+
262
+ iterations++
263
+ if (iterations >= maxIterations) {
264
+ return {
265
+ text: candidateText(candidate),
266
+ messages: workingMessages,
267
+ iterations,
268
+ stopReason: 'max_iterations',
269
+ usage: aggregated,
270
+ }
271
+ }
272
+ }
273
+ }
274
+
275
+ // ─── Param translation ──────────────────────────────────────────────────
276
+
277
+ private buildParams(
278
+ messages: readonly Message[],
279
+ options: ChatOptions,
280
+ tools: readonly Tool[],
281
+ ): GenerateContentParameters {
282
+ const model = options.model ?? this.defaultModel
283
+ const contents = this.toContents(messages)
284
+ const config: GenerateContentConfig = {
285
+ maxOutputTokens: options.maxTokens ?? this.defaultMaxTokens,
286
+ }
287
+
288
+ const systemText = systemPromptText(options.system)
289
+ if (systemText.length > 0) {
290
+ config.systemInstruction = systemText
291
+ }
292
+
293
+ if (tools.length > 0) {
294
+ const functionDeclarations: FunctionDeclaration[] = tools.map((t) => ({
295
+ name: t.name,
296
+ description: t.description,
297
+ parametersJsonSchema: t.inputSchema,
298
+ }))
299
+ config.tools = [{ functionDeclarations }]
300
+ }
301
+
302
+ const thinking = buildThinkingConfig(options)
303
+ if (thinking !== undefined) config.thinkingConfig = thinking
304
+
305
+ return { model, contents, config }
306
+ }
307
+
308
+ private toContents(messages: readonly Message[]): Content[] {
309
+ return messages.map((m) => ({
310
+ role: m.role === 'assistant' ? 'model' : 'user',
311
+ parts: toGeminiParts(m.content),
312
+ }))
313
+ }
314
+
315
+ private toChatResult(
316
+ response: GenerateContentResponse,
317
+ requestedModel: string,
318
+ ): ChatResult<GenerateContentResponse> {
319
+ const candidate = response.candidates?.[0]
320
+ return {
321
+ text: candidateText(candidate),
322
+ model: response.modelVersion ?? requestedModel,
323
+ stopReason: candidate?.finishReason ? String(candidate.finishReason) : null,
324
+ usage: toUsage(response.usageMetadata),
325
+ raw: response,
326
+ }
327
+ }
328
+ }
329
+
330
+ // ─── Shape converters ─────────────────────────────────────────────────────
331
+
332
+ function systemPromptText(system: SystemPrompt | undefined): string {
333
+ if (system === undefined) return ''
334
+ if (typeof system === 'string') return system
335
+ if (Array.isArray(system)) return system.map((b) => b.text).join('\n')
336
+ return system.text
337
+ }
338
+
339
+ function toGeminiParts(content: string | ContentBlock[]): Part[] {
340
+ if (typeof content === 'string') return [{ text: content }]
341
+ const parts: Part[] = []
342
+ for (const block of content) {
343
+ if (block.type === 'text') {
344
+ parts.push({ text: block.text })
345
+ } else if (block.type === 'tool_use') {
346
+ parts.push({
347
+ functionCall: {
348
+ id: block.id,
349
+ name: block.name,
350
+ args: (block.input ?? {}) as Record<string, unknown>,
351
+ },
352
+ })
353
+ } else if (block.type === 'tool_result') {
354
+ const text = typeof block.content === 'string'
355
+ ? block.content
356
+ : block.content.map((t) => t.text).join('')
357
+ parts.push({
358
+ functionResponse: {
359
+ id: block.toolUseId,
360
+ name: '',
361
+ response: block.isError ? { error: text } : { result: text },
362
+ },
363
+ })
364
+ }
365
+ // MCP blocks (Anthropic-only) silently dropped.
366
+ }
367
+ return parts
368
+ }
369
+
370
+ function fromGeminiParts(parts: readonly Part[]): string | ContentBlock[] {
371
+ const blocks: ContentBlock[] = []
372
+ for (const part of parts) {
373
+ if (typeof part.text === 'string' && part.text.length > 0) {
374
+ blocks.push({ type: 'text', text: part.text })
375
+ } else if (part.functionCall) {
376
+ const fc = part.functionCall
377
+ blocks.push({
378
+ type: 'tool_use',
379
+ id: fc.id ?? `gemini_${cryptoRandomId()}`,
380
+ name: fc.name ?? '',
381
+ input: fc.args ?? {},
382
+ } satisfies ToolUseBlock)
383
+ }
384
+ }
385
+ if (blocks.length === 1 && blocks[0]?.type === 'text') return blocks[0].text
386
+ return blocks
387
+ }
388
+
389
+ function candidateText(candidate: { content?: { parts?: Part[] } } | undefined): string {
390
+ const parts = candidate?.content?.parts ?? []
391
+ return parts
392
+ .filter((p) => typeof p.text === 'string' && p.text.length > 0)
393
+ .map((p) => p.text as string)
394
+ .join('')
395
+ }
396
+
397
+ function buildThinkingConfig(options: ChatOptions): GenerateContentConfig['thinkingConfig'] {
398
+ if (options.effort !== undefined) {
399
+ const level = effortToThinkingLevel(options.effort)
400
+ return level !== undefined ? { thinkingLevel: level } : { thinkingBudget: -1 }
401
+ }
402
+ if (options.thinking === 'adaptive') return { thinkingBudget: -1 }
403
+ if (options.thinking === 'disabled') return { thinkingBudget: 0 }
404
+ return undefined
405
+ }
406
+
407
+ function effortToThinkingLevel(
408
+ effort: NonNullable<ChatOptions['effort']>,
409
+ ): ThinkingLevel | undefined {
410
+ switch (effort) {
411
+ case 'low': return ThinkingLevel.LOW
412
+ case 'medium': return ThinkingLevel.MEDIUM
413
+ case 'high':
414
+ case 'xhigh':
415
+ case 'max':
416
+ return ThinkingLevel.HIGH
417
+ }
418
+ }
419
+
420
+ function toUsage(u: { promptTokenCount?: number; candidatesTokenCount?: number; cachedContentTokenCount?: number } | undefined): ChatUsage {
421
+ return {
422
+ inputTokens: u?.promptTokenCount ?? 0,
423
+ outputTokens: u?.candidatesTokenCount ?? 0,
424
+ cacheReadTokens: u?.cachedContentTokenCount ?? 0,
425
+ cacheCreationTokens: 0,
426
+ }
427
+ }
428
+
429
+ function addUsage(
430
+ acc: ChatUsage,
431
+ u: { promptTokenCount?: number; candidatesTokenCount?: number; cachedContentTokenCount?: number } | undefined,
432
+ ): void {
433
+ if (!u) return
434
+ acc.inputTokens += u.promptTokenCount ?? 0
435
+ acc.outputTokens += u.candidatesTokenCount ?? 0
436
+ acc.cacheReadTokens += u.cachedContentTokenCount ?? 0
437
+ }
438
+
439
+ function cryptoRandomId(): string {
440
+ // Stable, low-entropy fallback for synthesizing tool-use ids when
441
+ // Gemini omits them. Uniqueness within a single response is all the
442
+ // loop requires — the id only travels back paired with its result
443
+ // and never escapes to the caller.
444
+ return Math.random().toString(36).slice(2, 12)
445
+ }