@strav/brain 1.0.0-alpha.9 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/package.json +23 -7
  2. package/src/agent.ts +43 -5
  3. package/src/agent_generate_result.ts +32 -0
  4. package/src/agent_result.ts +7 -0
  5. package/src/agent_runner.ts +218 -14
  6. package/src/agent_stream_event.ts +100 -0
  7. package/src/brain_config.ts +218 -1
  8. package/src/brain_driver.ts +247 -0
  9. package/src/brain_error.ts +86 -10
  10. package/src/brain_manager.ts +359 -11
  11. package/src/brain_provider.ts +79 -9
  12. package/src/drivers/anthropic/anthropic_brain_driver.ts +641 -0
  13. package/src/drivers/anthropic/anthropic_helpers.ts +65 -0
  14. package/src/drivers/anthropic/anthropic_message_builder.ts +258 -0
  15. package/src/drivers/anthropic/anthropic_response_mapper.ts +123 -0
  16. package/src/drivers/anthropic/anthropic_tool_loop.ts +246 -0
  17. package/src/drivers/anthropic/index.ts +1 -0
  18. package/src/drivers/deepseek/deepseek_brain_driver.ts +117 -0
  19. package/src/drivers/deepseek/index.ts +1 -0
  20. package/src/drivers/gemini/gemini_brain_driver.ts +1064 -0
  21. package/src/drivers/gemini/index.ts +1 -0
  22. package/src/drivers/minimax/index.ts +1 -0
  23. package/src/drivers/minimax/minimax_brain_driver.ts +84 -0
  24. package/src/drivers/ollama/index.ts +1 -0
  25. package/src/drivers/ollama/ollama_brain_driver.ts +86 -0
  26. package/src/drivers/openai/index.ts +1 -0
  27. package/src/drivers/openai/openai_brain_driver.ts +796 -0
  28. package/src/drivers/openai/openai_helpers.ts +58 -0
  29. package/src/drivers/openai/openai_message_builder.ts +187 -0
  30. package/src/drivers/openai/openai_response_mapper.ts +70 -0
  31. package/src/drivers/openai/openai_tool_dispatch.ts +127 -0
  32. package/src/drivers/openai/openai_tool_loop.ts +191 -0
  33. package/src/drivers/openai_compat/index.ts +1 -0
  34. package/src/drivers/openai_compat/openai_compat_brain_driver.ts +616 -0
  35. package/src/drivers/openai_responses/index.ts +1 -0
  36. package/src/drivers/openai_responses/openai_responses_brain_driver.ts +1015 -0
  37. package/src/drivers/openrouter/index.ts +1 -0
  38. package/src/drivers/openrouter/openrouter_brain_driver.ts +137 -0
  39. package/src/drivers/qwen/index.ts +1 -0
  40. package/src/drivers/qwen/qwen_brain_driver.ts +103 -0
  41. package/src/index.ts +75 -11
  42. package/src/mcp/client.ts +243 -0
  43. package/src/mcp/index.ts +23 -0
  44. package/src/mcp/oauth.ts +227 -0
  45. package/src/mcp/pool.ts +106 -0
  46. package/src/mcp/resolve_mcp_tools.ts +108 -0
  47. package/src/mcp_server.ts +63 -0
  48. package/src/output_schema.ts +72 -0
  49. package/src/persistence/brain_message.ts +34 -0
  50. package/src/persistence/brain_message_repository.ts +98 -0
  51. package/src/persistence/brain_store.ts +166 -0
  52. package/src/persistence/brain_suspended_run.ts +30 -0
  53. package/src/persistence/brain_suspended_run_repository.ts +59 -0
  54. package/src/persistence/brain_thread.ts +30 -0
  55. package/src/persistence/brain_thread_repository.ts +56 -0
  56. package/src/persistence/database_brain_store.ts +190 -0
  57. package/src/persistence/index.ts +48 -0
  58. package/src/persistence/schemas/brain_message_schema.ts +61 -0
  59. package/src/persistence/schemas/brain_suspended_run_schema.ts +58 -0
  60. package/src/persistence/schemas/brain_thread_schema.ts +50 -0
  61. package/src/persistence/schemas/index.ts +3 -0
  62. package/src/suspended_run.ts +153 -0
  63. package/src/thread.ts +40 -1
  64. package/src/tool.ts +7 -0
  65. package/src/tool_runner.ts +81 -0
  66. package/src/translate/index.ts +19 -0
  67. package/src/translate/translate_cache.ts +78 -0
  68. package/src/translate/translate_provider.ts +46 -0
  69. package/src/translate/translator.ts +271 -0
  70. package/src/types.ts +398 -1
  71. package/src/zod/index.ts +121 -0
  72. package/src/provider.ts +0 -74
  73. package/src/providers/anthropic_provider.ts +0 -397
@@ -0,0 +1,1064 @@
1
+ /**
2
+ * `GeminiBrainDriver` — implementation of `Provider` backed by the
3
+ * official `@google/genai` SDK (Gemini Developer API / Vertex AI).
4
+ *
5
+ * Maps framework shapes to Gemini's wire format:
6
+ *
7
+ * - `system` → `config.systemInstruction` (string-joined when
8
+ * multi-block). Cache flags on the system prompt are ignored —
9
+ * Gemini's prompt caching uses an explicit Caches API rather
10
+ * than per-block flags, so `cache: true` becomes a no-op
11
+ * consistent with the OpenAI provider.
12
+ *
13
+ * - `Message[]` → `Content[]`. Framework `role: 'user' | 'assistant'`
14
+ * maps to Gemini's `role: 'user' | 'model'`. String content
15
+ * becomes a single `{text}` part; `ContentBlock[]` content fans
16
+ * out:
17
+ * - `TextBlock` → `{text}`
18
+ * - `ToolUseBlock` → `{functionCall: {id, name, args}}`
19
+ * - `ToolResultBlock` → `{functionResponse: {id, name,
20
+ * response: {result | error}}}`
21
+ * - `MCP*` blocks → silently dropped (Anthropic-only).
22
+ *
23
+ * - `Tool[]` → `[{functionDeclarations: [{name, description,
24
+ * parametersJsonSchema: inputSchema}]}]`. We use
25
+ * `parametersJsonSchema` (not `parameters`) so JSON-Schema-shaped
26
+ * tool inputs pass through verbatim without translation to
27
+ * Gemini's `Schema` form.
28
+ *
29
+ * - `MCPServer[]` → resolved via the local MCP client
30
+ * (`@strav/brain/mcp`). Discovered tools are namespaced
31
+ * `<server>__<tool>` and merged with caller-supplied tools.
32
+ * Transports are closed in a `finally` once the loop exits.
33
+ * Gemini has no first-party server-side MCP equivalent to
34
+ * Anthropic's connector.
35
+ *
36
+ * - `thinking: 'adaptive'` → `thinkingConfig: { thinkingBudget: -1 }`
37
+ * (auto). `'disabled'` → `thinkingConfig: { thinkingBudget: 0 }`.
38
+ * Explicit `effort` (`low`/`medium`/`high`/`xhigh`/`max`) maps to
39
+ * `thinkingConfig.thinkingLevel`. Non-thinking models ignore the
40
+ * field upstream — we always emit, the SDK rejects only for
41
+ * models that don't support it.
42
+ *
43
+ * - `cache: true` → no-op. Gemini's prompt cache lives behind the
44
+ * `Caches` API; same accepted-silently behavior as OpenAI.
45
+ *
46
+ * - `countTokens` IS implemented — `ai.models.countTokens` exists
47
+ * and is cheap. Returns `totalTokens`.
48
+ */
49
+
50
+ import { GoogleGenAI, ThinkingLevel } from '@google/genai'
51
+ import type {
52
+ Content,
53
+ FunctionDeclaration,
54
+ GenerateContentConfig,
55
+ GenerateContentParameters,
56
+ GenerateContentResponse,
57
+ Part,
58
+ } from '@google/genai'
59
+ import type { AgentResult } from '../../agent_result.ts'
60
+ import { BrainError } from '../../brain_error.ts'
61
+ import type { GeminiProviderConfig } from '../../brain_config.ts'
62
+ import type { MCPServer } from '../../mcp_server.ts'
63
+ import type { AgentGenerateResult } from '../../agent_generate_result.ts'
64
+ import type { AgentStreamEvent } from '../../agent_stream_event.ts'
65
+ import type {
66
+ AudioSource,
67
+ EmbedOptions,
68
+ EmbedResult,
69
+ ServerTool,
70
+ TranscribeOptions,
71
+ TranscribeResult,
72
+ } from '../../types.ts'
73
+ import { resolveMcpTools, type ResolveMcpToolsOptions } from '../../mcp/resolve_mcp_tools.ts'
74
+ import { parseGenerated, type OutputSchema } from '../../output_schema.ts'
75
+ import { runToolWithRecovery } from '../../tool_runner.ts'
76
+ import type {
77
+ BrainDriver,
78
+ RunWithToolsOptions,
79
+ RunWithToolsOptionsWithSuspend,
80
+ } from '../../brain_driver.ts'
81
+ import type { SuspendedRun } from '../../suspended_run.ts'
82
+ import type { Tool } from '../../tool.ts'
83
+ import type {
84
+ ChatOptions,
85
+ ChatResult,
86
+ ChatUsage,
87
+ ContentBlock,
88
+ GenerateResult,
89
+ Message,
90
+ StreamEvent,
91
+ SystemPrompt,
92
+ TextBlock,
93
+ ToolResultBlock,
94
+ ToolUseBlock,
95
+ } from '../../types.ts'
96
+
97
+ const DEFAULT_GEMINI_MODEL = 'gemini-2.5-flash'
98
+ const DEFAULT_GEMINI_EMBED_MODEL = 'text-embedding-004'
99
+
100
+ /**
101
+ * The slice of `GoogleGenAI` the provider exercises. Narrowed so
102
+ * tests can inject a stub without satisfying the full SDK surface.
103
+ */
104
+ export interface GeminiModelsClient {
105
+ generateContent(params: GenerateContentParameters): Promise<GenerateContentResponse>
106
+ generateContentStream(
107
+ params: GenerateContentParameters,
108
+ ): Promise<AsyncIterable<GenerateContentResponse>>
109
+ countTokens(params: { model: string; contents: Content[] }): Promise<{ totalTokens?: number }>
110
+ /**
111
+ * Optional on the test seam — the real SDK always provides it,
112
+ * but tests that don't exercise embed don't need to stub it.
113
+ * `embed()` calls this directly; missing it throws a clear
114
+ * TypeError if invoked.
115
+ */
116
+ embedContent?(params: {
117
+ model: string
118
+ contents: string[]
119
+ config?: { outputDimensionality?: number; abortSignal?: AbortSignal }
120
+ }): Promise<{ embeddings?: Array<{ values?: number[] }> }>
121
+ }
122
+
123
+ export interface GeminiProviderOptions {
124
+ client?: { models: GeminiModelsClient }
125
+ /** Internal seam — tests inject a stub MCP client factory. */
126
+ mcpClientFactory?: ResolveMcpToolsOptions['clientFactory']
127
+ /** See `OpenAIProviderOptions.mcpPool` — same semantics. */
128
+ mcpPool?: ResolveMcpToolsOptions['pool']
129
+ }
130
+
131
+ export class GeminiBrainDriver implements BrainDriver {
132
+ readonly name: string
133
+ private readonly models: GeminiModelsClient
134
+ private readonly defaultModel: string
135
+ private readonly defaultMaxTokens: number
136
+ private readonly defaultEmbedModel: string
137
+ private readonly mcpClientFactory?: ResolveMcpToolsOptions['clientFactory']
138
+ private readonly mcpPool?: ResolveMcpToolsOptions['pool']
139
+
140
+ constructor(name: string, config: GeminiProviderConfig, options: GeminiProviderOptions = {}) {
141
+ this.name = name
142
+ this.defaultModel = config.defaultModel ?? DEFAULT_GEMINI_MODEL
143
+ this.defaultMaxTokens = config.defaultMaxTokens ?? 4096
144
+ this.defaultEmbedModel = config.defaultEmbedModel ?? DEFAULT_GEMINI_EMBED_MODEL
145
+ this.mcpClientFactory = options.mcpClientFactory
146
+ this.mcpPool = options.mcpPool
147
+ if (options.client) {
148
+ this.models = options.client.models
149
+ } else {
150
+ const httpOpts =
151
+ config.baseUrl !== undefined || config.apiVersion !== undefined
152
+ ? {
153
+ ...(config.baseUrl !== undefined ? { baseUrl: config.baseUrl } : {}),
154
+ ...(config.apiVersion !== undefined ? { apiVersion: config.apiVersion } : {}),
155
+ }
156
+ : undefined
157
+ const sdk = new GoogleGenAI({
158
+ apiKey: config.apiKey,
159
+ ...(httpOpts ? { httpOptions: httpOpts } : {}),
160
+ })
161
+ this.models = sdk.models as unknown as GeminiModelsClient
162
+ }
163
+ }
164
+
165
+ async chat(messages: readonly Message[], options: ChatOptions = {}): Promise<ChatResult> {
166
+ const params = this.buildParams(messages, options, [])
167
+ const response = await this.models.generateContent(params)
168
+ return this.toChatResult(response, params.model)
169
+ }
170
+
171
+ async *stream(
172
+ messages: readonly Message[],
173
+ options: ChatOptions = {},
174
+ ): AsyncIterable<StreamEvent> {
175
+ const params = this.buildParams(messages, options, [])
176
+ const stream = await this.models.generateContentStream(params)
177
+ let finishReason: string | null = null
178
+ let lastUsage: ChatUsage | undefined
179
+ for await (const chunk of stream) {
180
+ const candidate = chunk.candidates?.[0]
181
+ const text = candidateText(candidate)
182
+ if (text.length > 0) yield { type: 'text', delta: text }
183
+ if (candidate?.finishReason) finishReason = String(candidate.finishReason)
184
+ if (chunk.usageMetadata) lastUsage = toUsage(chunk.usageMetadata)
185
+ }
186
+ yield {
187
+ type: 'stop',
188
+ stopReason: finishReason,
189
+ usage: lastUsage ?? {
190
+ inputTokens: 0,
191
+ outputTokens: 0,
192
+ cacheReadTokens: 0,
193
+ cacheCreationTokens: 0,
194
+ },
195
+ }
196
+ }
197
+
198
+ async countTokens(messages: readonly Message[], options: ChatOptions = {}): Promise<number> {
199
+ const contents = this.toContents(messages)
200
+ const model = options.model ?? this.defaultModel
201
+ const response = await this.models.countTokens({ model, contents })
202
+ return response.totalTokens ?? 0
203
+ }
204
+
205
+ /**
206
+ * Gemini embeddings via `ai.models.embedContent`. Returns one
207
+ * vector per input text. `usage.inputTokens` is `0` — Gemini's
208
+ * embed endpoint doesn't surface token counts in the response
209
+ * for the Gemini Developer API tier (Vertex's request-level
210
+ * metadata exposes billable characters, but that's a different
211
+ * accounting unit and not the framework's contract). Apps that
212
+ * need exact embed-token usage call `countTokens` separately
213
+ * before the call.
214
+ */
215
+ /**
216
+ * Gemini has no dedicated transcription endpoint, so we wrap a
217
+ * chat call: an AudioBlock + a system message that tells the
218
+ * model to transcribe verbatim. Apps that want OpenAI-style
219
+ * Whisper transcription with `language` / `duration` metadata
220
+ * route to OpenAI (or local Whisper via Ollama).
221
+ *
222
+ * `options.prompt` threads into the system instruction —
223
+ * useful for style/vocabulary hints. `options.language` is
224
+ * surfaced to the model in the system prompt (Gemini doesn't
225
+ * have a dedicated language field).
226
+ */
227
+ async transcribe(
228
+ audio: AudioSource,
229
+ options: TranscribeOptions = {},
230
+ ): Promise<TranscribeResult> {
231
+ const lines = [
232
+ 'Transcribe the attached audio verbatim. Output ONLY the transcribed text — no preamble, no quotes, no commentary.',
233
+ options.language ? `Audio language: ${options.language}.` : undefined,
234
+ options.prompt ? `Style / vocabulary hints: ${options.prompt}` : undefined,
235
+ ].filter((s): s is string => s !== undefined)
236
+ const system = lines.join(' ')
237
+ const chatResult = await this.chat(
238
+ [
239
+ {
240
+ role: 'user',
241
+ content: [{ type: 'audio', source: audio }],
242
+ },
243
+ ],
244
+ {
245
+ system,
246
+ ...(options.model !== undefined ? { model: options.model } : {}),
247
+ ...(options.signal !== undefined ? { signal: options.signal } : {}),
248
+ },
249
+ )
250
+ return {
251
+ text: chatResult.text,
252
+ model: chatResult.model,
253
+ raw: chatResult.raw,
254
+ }
255
+ }
256
+
257
+ async embed(
258
+ texts: readonly string[],
259
+ options: EmbedOptions = {},
260
+ ): Promise<EmbedResult<{ embeddings?: Array<{ values?: number[] }> }>> {
261
+ const model = options.model ?? this.defaultEmbedModel
262
+ const config: { outputDimensionality?: number; abortSignal?: AbortSignal } = {}
263
+ if (options.dimensions !== undefined) config.outputDimensionality = options.dimensions
264
+ if (options.signal !== undefined) config.abortSignal = options.signal
265
+ if (!this.models.embedContent) {
266
+ throw new BrainError(
267
+ `GeminiBrainDriver.embed: underlying SDK does not implement embedContent. This usually means a test stub omitted it.`,
268
+ { context: { provider: this.name } },
269
+ )
270
+ }
271
+ const response = await this.models.embedContent({
272
+ model,
273
+ contents: texts as string[],
274
+ ...(Object.keys(config).length > 0 ? { config } : {}),
275
+ })
276
+ const embeddings = (response.embeddings ?? []).map((e) => e.values ?? [])
277
+ return {
278
+ embeddings,
279
+ model,
280
+ usage: { inputTokens: 0 },
281
+ raw: response,
282
+ }
283
+ }
284
+
285
+ /**
286
+ * Resolve MCP tool descriptors for `servers`, threading the
287
+ * provider's optional `clientFactory` (test seam) and `mcpPool`
288
+ * (long-lived connections) through. Caller invokes
289
+ * `resolved.close()` in `finally` — a no-op when the pool owns
290
+ * lifetimes.
291
+ */
292
+ private resolveMcp(servers: readonly MCPServer[]): Promise<{
293
+ tools: Tool[]
294
+ close: () => Promise<void>
295
+ }> {
296
+ if (servers.length === 0) {
297
+ return Promise.resolve({ tools: [], close: async () => {} })
298
+ }
299
+ return resolveMcpTools(servers, {
300
+ ...(this.mcpClientFactory ? { clientFactory: this.mcpClientFactory } : {}),
301
+ ...(this.mcpPool ? { pool: this.mcpPool } : {}),
302
+ })
303
+ }
304
+
305
+ runWithTools(
306
+ messages: readonly Message[],
307
+ tools: readonly Tool[],
308
+ options: RunWithToolsOptionsWithSuspend,
309
+ ): Promise<AgentResult | SuspendedRun>
310
+ runWithTools(
311
+ messages: readonly Message[],
312
+ tools: readonly Tool[],
313
+ options?: RunWithToolsOptions,
314
+ ): Promise<AgentResult>
315
+ async runWithTools(
316
+ messages: readonly Message[],
317
+ tools: readonly Tool[],
318
+ options: RunWithToolsOptions = {},
319
+ ): Promise<AgentResult | SuspendedRun> {
320
+ const resolved = await this.resolveMcp(options.mcpServers ?? [])
321
+ try {
322
+ return await this._runLoop(messages, [...tools, ...resolved.tools], options)
323
+ } finally {
324
+ await resolved.close()
325
+ }
326
+ }
327
+
328
+ private async _runLoop(
329
+ messages: readonly Message[],
330
+ tools: readonly Tool[],
331
+ options: RunWithToolsOptions,
332
+ ): Promise<AgentResult | SuspendedRun> {
333
+ const maxIterations = options.maxIterations ?? 10
334
+ const toolMap = new Map<string, Tool>(tools.map((t) => [t.name, t]))
335
+ const workingMessages: Message[] = [...messages]
336
+ const aggregated: ChatUsage = {
337
+ inputTokens: 0,
338
+ outputTokens: 0,
339
+ cacheReadTokens: 0,
340
+ cacheCreationTokens: 0,
341
+ }
342
+ let iterations = 0
343
+
344
+ while (true) {
345
+ checkAborted(options.signal)
346
+ const params = this.buildParams(workingMessages, options, tools)
347
+ const response = await this.models.generateContent(params)
348
+ addUsage(aggregated, response.usageMetadata)
349
+
350
+ const candidate = response.candidates?.[0]
351
+ if (!candidate) {
352
+ throw new BrainError('GeminiBrainDriver: response had no candidates.')
353
+ }
354
+ const parts = candidate.content?.parts ?? []
355
+ const assistantContent = fromGeminiParts(parts)
356
+ workingMessages.push({ role: 'assistant', content: assistantContent })
357
+
358
+ const toolUses = (Array.isArray(assistantContent) ? assistantContent : []).filter(
359
+ (b): b is ToolUseBlock => b.type === 'tool_use',
360
+ )
361
+
362
+ if (toolUses.length === 0) {
363
+ return {
364
+ text: typeof assistantContent === 'string'
365
+ ? assistantContent
366
+ : candidateText(candidate),
367
+ messages: workingMessages,
368
+ iterations,
369
+ stopReason: candidate.finishReason ? String(candidate.finishReason) : 'stop',
370
+ usage: aggregated,
371
+ }
372
+ }
373
+
374
+ const resultBlocks: ContentBlock[] = []
375
+ for (let i = 0; i < toolUses.length; i++) {
376
+ const call = toolUses[i]!
377
+ if (options.shouldSuspend && await options.shouldSuspend(call, options.context)) {
378
+ return {
379
+ status: 'suspended',
380
+ pendingToolCalls: toolUses.slice(i),
381
+ state: { messages: workingMessages, iterations, usage: aggregated },
382
+ }
383
+ }
384
+ const { content, isError } = await runToolWithRecovery(
385
+ toolMap.get(call.name),
386
+ call.name,
387
+ call.id,
388
+ call.input,
389
+ options,
390
+ )
391
+ resultBlocks.push({
392
+ type: 'tool_result',
393
+ toolUseId: call.id,
394
+ content,
395
+ ...(isError ? { isError: true } : {}),
396
+ } satisfies ToolResultBlock)
397
+ }
398
+ workingMessages.push({ role: 'user', content: resultBlocks })
399
+
400
+ iterations++
401
+ if (iterations >= maxIterations) {
402
+ return {
403
+ text: candidateText(candidate),
404
+ messages: workingMessages,
405
+ iterations,
406
+ stopReason: 'max_iterations',
407
+ usage: aggregated,
408
+ }
409
+ }
410
+ }
411
+ }
412
+
413
+ async runWithToolsAndSchema<T>(
414
+ messages: readonly Message[],
415
+ tools: readonly Tool[],
416
+ schema: OutputSchema<T>,
417
+ options: RunWithToolsOptions = {},
418
+ ): Promise<AgentGenerateResult<T>> {
419
+ const resolved = await this.resolveMcp(options.mcpServers ?? [])
420
+ try {
421
+ return await this._runLoopWithSchema([...tools, ...resolved.tools], messages, schema, options)
422
+ } finally {
423
+ await resolved.close()
424
+ }
425
+ }
426
+
427
+ private async _runLoopWithSchema<T>(
428
+ tools: readonly Tool[],
429
+ messages: readonly Message[],
430
+ schema: OutputSchema<T>,
431
+ options: RunWithToolsOptions,
432
+ ): Promise<AgentGenerateResult<T>> {
433
+ const maxIterations = options.maxIterations ?? 10
434
+ const toolMap = new Map<string, Tool>(tools.map((t) => [t.name, t]))
435
+ const workingMessages: Message[] = [...messages]
436
+ const aggregated: ChatUsage = {
437
+ inputTokens: 0,
438
+ outputTokens: 0,
439
+ cacheReadTokens: 0,
440
+ cacheCreationTokens: 0,
441
+ }
442
+ let iterations = 0
443
+
444
+ while (true) {
445
+ const params = this.buildParams(workingMessages, options, tools)
446
+ params.config = {
447
+ ...(params.config ?? {}),
448
+ responseMimeType: 'application/json',
449
+ responseJsonSchema: schema.jsonSchema,
450
+ }
451
+ const response = await this.models.generateContent(params)
452
+ addUsage(aggregated, response.usageMetadata)
453
+
454
+ const candidate = response.candidates?.[0]
455
+ if (!candidate) {
456
+ throw new BrainError('GeminiBrainDriver: response had no candidates.')
457
+ }
458
+ const parts = candidate.content?.parts ?? []
459
+ const assistantContent = fromGeminiParts(parts)
460
+ workingMessages.push({ role: 'assistant', content: assistantContent })
461
+
462
+ const toolUses = (Array.isArray(assistantContent) ? assistantContent : []).filter(
463
+ (b): b is ToolUseBlock => b.type === 'tool_use',
464
+ )
465
+
466
+ if (toolUses.length === 0) {
467
+ const text = typeof assistantContent === 'string'
468
+ ? assistantContent
469
+ : candidateText(candidate)
470
+ return {
471
+ value: parseGenerated(text, schema),
472
+ text,
473
+ messages: workingMessages,
474
+ iterations,
475
+ stopReason: candidate.finishReason ? String(candidate.finishReason) : 'stop',
476
+ usage: aggregated,
477
+ }
478
+ }
479
+
480
+ const resultBlocks: ContentBlock[] = []
481
+ for (const call of toolUses) {
482
+ const { content, isError } = await runToolWithRecovery(
483
+ toolMap.get(call.name),
484
+ call.name,
485
+ call.id,
486
+ call.input,
487
+ options,
488
+ )
489
+ resultBlocks.push({
490
+ type: 'tool_result',
491
+ toolUseId: call.id,
492
+ content,
493
+ ...(isError ? { isError: true } : {}),
494
+ } satisfies ToolResultBlock)
495
+ }
496
+ workingMessages.push({ role: 'user', content: resultBlocks })
497
+
498
+ iterations++
499
+ if (iterations >= maxIterations) {
500
+ const text = candidateText(candidate)
501
+ return {
502
+ value: parseGenerated(text, schema),
503
+ text,
504
+ messages: workingMessages,
505
+ iterations,
506
+ stopReason: 'max_iterations',
507
+ usage: aggregated,
508
+ }
509
+ }
510
+ }
511
+ }
512
+
513
+ async *streamWithTools(
514
+ messages: readonly Message[],
515
+ tools: readonly Tool[],
516
+ options: RunWithToolsOptions = {},
517
+ ): AsyncIterable<AgentStreamEvent> {
518
+ const resolved = await this.resolveMcp(options.mcpServers ?? [])
519
+ try {
520
+ yield* this._streamLoop(messages, [...tools, ...resolved.tools], options)
521
+ } finally {
522
+ await resolved.close()
523
+ }
524
+ }
525
+
526
+ private async *_streamLoop(
527
+ messages: readonly Message[],
528
+ tools: readonly Tool[],
529
+ options: RunWithToolsOptions,
530
+ ): AsyncIterable<AgentStreamEvent> {
531
+ const maxIterations = options.maxIterations ?? 10
532
+ const toolMap = new Map<string, Tool>(tools.map((t) => [t.name, t]))
533
+ const workingMessages: Message[] = [...messages]
534
+ const aggregated: ChatUsage = {
535
+ inputTokens: 0,
536
+ outputTokens: 0,
537
+ cacheReadTokens: 0,
538
+ cacheCreationTokens: 0,
539
+ }
540
+ let iterations = 0
541
+
542
+ while (true) {
543
+ checkAborted(options.signal)
544
+ yield { type: 'iteration_start', iteration: iterations }
545
+
546
+ const params = this.buildParams(workingMessages, options, tools)
547
+ const stream = await this.models.generateContentStream(params)
548
+
549
+ const accumulatedParts: Part[] = []
550
+ let finishReason: string | null = null
551
+ let lastUsage: ChatUsage | undefined
552
+
553
+ for await (const chunk of stream) {
554
+ const candidate = chunk.candidates?.[0]
555
+ const chunkParts = candidate?.content?.parts ?? []
556
+ for (const part of chunkParts) {
557
+ if (typeof part.text === 'string' && part.text.length > 0) {
558
+ yield { type: 'text', delta: part.text }
559
+ }
560
+ }
561
+ accumulatedParts.push(...chunkParts)
562
+ if (candidate?.finishReason) finishReason = String(candidate.finishReason)
563
+ if (chunk.usageMetadata) lastUsage = toUsage(chunk.usageMetadata)
564
+ }
565
+ if (lastUsage) {
566
+ aggregated.inputTokens += lastUsage.inputTokens
567
+ aggregated.outputTokens += lastUsage.outputTokens
568
+ aggregated.cacheReadTokens += lastUsage.cacheReadTokens
569
+ }
570
+
571
+ yield { type: 'iteration_end', iteration: iterations, stopReason: finishReason }
572
+
573
+ const assistantContent = fromGeminiParts(accumulatedParts)
574
+ workingMessages.push({ role: 'assistant', content: assistantContent })
575
+
576
+ const toolUses = (Array.isArray(assistantContent) ? assistantContent : []).filter(
577
+ (b): b is ToolUseBlock => b.type === 'tool_use',
578
+ )
579
+
580
+ if (toolUses.length === 0) {
581
+ yield {
582
+ type: 'stop',
583
+ stopReason: finishReason ?? 'stop',
584
+ iterations,
585
+ usage: aggregated,
586
+ messages: workingMessages,
587
+ }
588
+ return
589
+ }
590
+
591
+ const resultBlocks: ContentBlock[] = []
592
+ for (const call of toolUses) {
593
+ yield { type: 'tool_use', id: call.id, name: call.name, input: call.input }
594
+ const { content, isError } = await runToolWithRecovery(
595
+ toolMap.get(call.name),
596
+ call.name,
597
+ call.id,
598
+ call.input,
599
+ options,
600
+ )
601
+ resultBlocks.push({
602
+ type: 'tool_result',
603
+ toolUseId: call.id,
604
+ content,
605
+ ...(isError ? { isError: true } : {}),
606
+ } satisfies ToolResultBlock)
607
+ yield {
608
+ type: 'tool_result',
609
+ id: call.id,
610
+ name: call.name,
611
+ content,
612
+ isError,
613
+ }
614
+ }
615
+ workingMessages.push({ role: 'user', content: resultBlocks })
616
+
617
+ iterations++
618
+ if (iterations >= maxIterations) {
619
+ yield {
620
+ type: 'stop',
621
+ stopReason: 'max_iterations',
622
+ iterations,
623
+ usage: aggregated,
624
+ messages: workingMessages,
625
+ }
626
+ return
627
+ }
628
+ }
629
+ }
630
+
631
+ async *streamWithToolsAndSchema<T>(
632
+ messages: readonly Message[],
633
+ tools: readonly Tool[],
634
+ schema: OutputSchema<T>,
635
+ options: RunWithToolsOptions = {},
636
+ ): AsyncIterable<AgentStreamEvent<T>> {
637
+ const resolved = await this.resolveMcp(options.mcpServers ?? [])
638
+ try {
639
+ yield* this._streamLoopWithSchema(
640
+ [...tools, ...resolved.tools],
641
+ messages,
642
+ schema,
643
+ options,
644
+ )
645
+ } finally {
646
+ await resolved.close()
647
+ }
648
+ }
649
+
650
+ private async *_streamLoopWithSchema<T>(
651
+ tools: readonly Tool[],
652
+ messages: readonly Message[],
653
+ schema: OutputSchema<T>,
654
+ options: RunWithToolsOptions,
655
+ ): AsyncIterable<AgentStreamEvent<T>> {
656
+ const maxIterations = options.maxIterations ?? 10
657
+ const toolMap = new Map<string, Tool>(tools.map((t) => [t.name, t]))
658
+ const workingMessages: Message[] = [...messages]
659
+ const aggregated: ChatUsage = {
660
+ inputTokens: 0,
661
+ outputTokens: 0,
662
+ cacheReadTokens: 0,
663
+ cacheCreationTokens: 0,
664
+ }
665
+ let iterations = 0
666
+
667
+ while (true) {
668
+ checkAborted(options.signal)
669
+ yield { type: 'iteration_start', iteration: iterations }
670
+
671
+ const params = this.buildParams(workingMessages, options, tools)
672
+ params.config = {
673
+ ...(params.config ?? {}),
674
+ responseMimeType: 'application/json',
675
+ responseJsonSchema: schema.jsonSchema,
676
+ }
677
+ const stream = await this.models.generateContentStream(params)
678
+
679
+ const accumulatedParts: Part[] = []
680
+ let textBuf = ''
681
+ let finishReason: string | null = null
682
+ let lastUsage: ChatUsage | undefined
683
+
684
+ for await (const chunk of stream) {
685
+ const candidate = chunk.candidates?.[0]
686
+ const chunkParts = candidate?.content?.parts ?? []
687
+ for (const part of chunkParts) {
688
+ if (typeof part.text === 'string' && part.text.length > 0) {
689
+ textBuf += part.text
690
+ yield { type: 'text', delta: part.text }
691
+ }
692
+ }
693
+ accumulatedParts.push(...chunkParts)
694
+ if (candidate?.finishReason) finishReason = String(candidate.finishReason)
695
+ if (chunk.usageMetadata) lastUsage = toUsage(chunk.usageMetadata)
696
+ }
697
+ if (lastUsage) {
698
+ aggregated.inputTokens += lastUsage.inputTokens
699
+ aggregated.outputTokens += lastUsage.outputTokens
700
+ aggregated.cacheReadTokens += lastUsage.cacheReadTokens
701
+ }
702
+
703
+ yield { type: 'iteration_end', iteration: iterations, stopReason: finishReason }
704
+
705
+ const assistantContent = fromGeminiParts(accumulatedParts)
706
+ workingMessages.push({ role: 'assistant', content: assistantContent })
707
+
708
+ const toolUses = (Array.isArray(assistantContent) ? assistantContent : []).filter(
709
+ (b): b is ToolUseBlock => b.type === 'tool_use',
710
+ )
711
+
712
+ if (toolUses.length === 0) {
713
+ const text = textBuf
714
+ const value = parseGenerated(text, schema)
715
+ yield {
716
+ type: 'stop',
717
+ stopReason: finishReason ?? 'stop',
718
+ iterations,
719
+ usage: aggregated,
720
+ messages: workingMessages,
721
+ value,
722
+ text,
723
+ } as AgentStreamEvent<T>
724
+ return
725
+ }
726
+
727
+ const resultBlocks: ContentBlock[] = []
728
+ for (const call of toolUses) {
729
+ yield { type: 'tool_use', id: call.id, name: call.name, input: call.input }
730
+ const { content, isError } = await runToolWithRecovery(
731
+ toolMap.get(call.name),
732
+ call.name,
733
+ call.id,
734
+ call.input,
735
+ options,
736
+ )
737
+ resultBlocks.push({
738
+ type: 'tool_result',
739
+ toolUseId: call.id,
740
+ content,
741
+ ...(isError ? { isError: true } : {}),
742
+ } satisfies ToolResultBlock)
743
+ yield {
744
+ type: 'tool_result',
745
+ id: call.id,
746
+ name: call.name,
747
+ content,
748
+ isError,
749
+ }
750
+ }
751
+ workingMessages.push({ role: 'user', content: resultBlocks })
752
+
753
+ iterations++
754
+ if (iterations >= maxIterations) {
755
+ const text = textBuf
756
+ const value = parseGenerated(text, schema)
757
+ yield {
758
+ type: 'stop',
759
+ stopReason: 'max_iterations',
760
+ iterations,
761
+ usage: aggregated,
762
+ messages: workingMessages,
763
+ value,
764
+ text,
765
+ } as AgentStreamEvent<T>
766
+ return
767
+ }
768
+ }
769
+ }
770
+
771
+ async generate<T>(
772
+ messages: readonly Message[],
773
+ schema: OutputSchema<T>,
774
+ options: ChatOptions = {},
775
+ ): Promise<GenerateResult<T>> {
776
+ const params = this.buildParams(messages, options, [])
777
+ params.config = {
778
+ ...(params.config ?? {}),
779
+ responseMimeType: 'application/json',
780
+ responseJsonSchema: schema.jsonSchema,
781
+ }
782
+ const response = await this.models.generateContent(params)
783
+ const candidate = response.candidates?.[0]
784
+ const text = candidateText(candidate)
785
+ const value = parseGenerated(text, schema)
786
+ return {
787
+ value,
788
+ text,
789
+ model: response.modelVersion ?? params.model,
790
+ stopReason: candidate?.finishReason ? String(candidate.finishReason) : null,
791
+ usage: toUsage(response.usageMetadata),
792
+ raw: response,
793
+ }
794
+ }
795
+
796
+ // ─── Param translation ──────────────────────────────────────────────────
797
+
798
+ private buildParams(
799
+ messages: readonly Message[],
800
+ options: ChatOptions,
801
+ tools: readonly Tool[],
802
+ ): GenerateContentParameters {
803
+ const model = options.model ?? this.defaultModel
804
+ const contents = this.toContents(messages)
805
+ const config: GenerateContentConfig = {
806
+ maxOutputTokens: options.maxTokens ?? this.defaultMaxTokens,
807
+ }
808
+
809
+ const systemText = systemPromptText(options.system)
810
+ if (systemText.length > 0) {
811
+ config.systemInstruction = systemText
812
+ }
813
+
814
+ const configTools: NonNullable<GenerateContentConfig['tools']> = []
815
+ if (tools.length > 0) {
816
+ const functionDeclarations: FunctionDeclaration[] = tools.map((t) => ({
817
+ name: t.name,
818
+ description: t.description,
819
+ parametersJsonSchema: t.inputSchema,
820
+ }))
821
+ configTools.push({ functionDeclarations })
822
+ }
823
+ if (options.serverTools && options.serverTools.length > 0) {
824
+ configTools.push(...geminiServerTools(options.serverTools))
825
+ }
826
+ if (configTools.length > 0) {
827
+ config.tools = configTools
828
+ }
829
+
830
+ const thinking = buildThinkingConfig(options)
831
+ if (thinking !== undefined) config.thinkingConfig = thinking
832
+
833
+ if (options.signal !== undefined) config.abortSignal = options.signal
834
+
835
+ return { model, contents, config }
836
+ }
837
+
838
+ private toContents(messages: readonly Message[]): Content[] {
839
+ return messages.map((m) => ({
840
+ role: m.role === 'assistant' ? 'model' : 'user',
841
+ parts: toGeminiParts(m.content),
842
+ }))
843
+ }
844
+
845
+ private toChatResult(
846
+ response: GenerateContentResponse,
847
+ requestedModel: string,
848
+ ): ChatResult<GenerateContentResponse> {
849
+ const candidate = response.candidates?.[0]
850
+ return {
851
+ text: candidateText(candidate),
852
+ model: response.modelVersion ?? requestedModel,
853
+ stopReason: candidate?.finishReason ? String(candidate.finishReason) : null,
854
+ usage: toUsage(response.usageMetadata),
855
+ raw: response,
856
+ }
857
+ }
858
+ }
859
+
860
+ // ─── Shape converters ─────────────────────────────────────────────────────
861
+
862
+ /** Throw a DOMException-shaped abort error if the signal has fired. */
863
+ function checkAborted(signal: AbortSignal | undefined): void {
864
+ if (signal?.aborted) {
865
+ throw signal.reason ?? new DOMException('Aborted', 'AbortError')
866
+ }
867
+ }
868
+
869
+ function systemPromptText(system: SystemPrompt | undefined): string {
870
+ if (system === undefined) return ''
871
+ if (typeof system === 'string') return system
872
+ if (Array.isArray(system)) return system.map((b) => b.text).join('\n')
873
+ return system.text
874
+ }
875
+
876
+ function toGeminiParts(content: string | ContentBlock[]): Part[] {
877
+ if (typeof content === 'string') return [{ text: content }]
878
+ const parts: Part[] = []
879
+ for (const block of content) {
880
+ if (block.type === 'text') {
881
+ parts.push({ text: block.text })
882
+ } else if (block.type === 'image' || block.type === 'document' || block.type === 'audio') {
883
+ // All three media block types share Gemini's inlineData /
884
+ // fileData wire shape; only the MIME differs. Base64 →
885
+ // inlineData. URL → fileData with fileUri. Gemini's
886
+ // fileData accepts public HTTPS and gs:// URIs; arbitrary
887
+ // private URLs need to be fetched and converted to base64
888
+ // by the app.
889
+ if (block.source.type === 'base64') {
890
+ parts.push({
891
+ inlineData: { mimeType: block.source.mediaType, data: block.source.data },
892
+ })
893
+ } else {
894
+ parts.push({
895
+ fileData: {
896
+ fileUri: block.source.url,
897
+ mimeType: guessMimeFromUrl(block.source.url, block.type),
898
+ },
899
+ })
900
+ }
901
+ } else if (block.type === 'tool_use') {
902
+ parts.push({
903
+ functionCall: {
904
+ id: block.id,
905
+ name: block.name,
906
+ args: (block.input ?? {}) as Record<string, unknown>,
907
+ },
908
+ })
909
+ } else if (block.type === 'tool_result') {
910
+ const text = typeof block.content === 'string'
911
+ ? block.content
912
+ : block.content.map((t) => t.text).join('')
913
+ parts.push({
914
+ functionResponse: {
915
+ id: block.toolUseId,
916
+ name: '',
917
+ response: block.isError ? { error: text } : { result: text },
918
+ },
919
+ })
920
+ }
921
+ // MCP blocks (Anthropic-only) silently dropped.
922
+ }
923
+ return parts
924
+ }
925
+
926
+ /**
927
+ * Gemini's `fileData.mimeType` is required, but our media-block
928
+ * URL-source variants don't carry it (the app may not know).
929
+ * Best-effort from the file extension. Default falls back to the
930
+ * block type's most-common MIME (jpeg for images, pdf for
931
+ * documents, mp3 for audio).
932
+ */
933
+ /**
934
+ * Translate framework `ServerTool[]` into Gemini's typed entries
935
+ * (`googleSearch` / `codeExecution` / `urlContext`). Anthropic-
936
+ * specific tools (`web_fetch`) throw with clear guidance.
937
+ *
938
+ * Gemini's server tools have no per-tool config — they're enabled
939
+ * with empty `{}` objects. Domain allowlists / max_uses /
940
+ * blocked_domains on `web_search` are silently dropped (Gemini
941
+ * doesn't accept them).
942
+ */
943
+ function geminiServerTools(
944
+ serverTools: readonly ServerTool[],
945
+ ): NonNullable<GenerateContentConfig['tools']> {
946
+ const out: NonNullable<GenerateContentConfig['tools']> = []
947
+ for (const t of serverTools) {
948
+ if (t.type === 'web_search') {
949
+ out.push({ googleSearch: {} })
950
+ } else if (t.type === 'code_execution') {
951
+ out.push({ codeExecution: {} })
952
+ } else if (t.type === 'url_context') {
953
+ out.push({ urlContext: {} })
954
+ } else if (t.type === 'web_fetch') {
955
+ throw new BrainError(
956
+ 'GeminiBrainDriver: server tool `web_fetch` is Anthropic-only. Use `url_context` for Gemini or route the call to Anthropic.',
957
+ { context: { provider: 'google' } },
958
+ )
959
+ }
960
+ }
961
+ return out
962
+ }
963
+
964
+ function guessMimeFromUrl(
965
+ url: string,
966
+ kind: 'image' | 'document' | 'audio',
967
+ ): string {
968
+ const lower = url.toLowerCase().split('?')[0] ?? ''
969
+ if (kind === 'image') {
970
+ if (lower.endsWith('.png')) return 'image/png'
971
+ if (lower.endsWith('.webp')) return 'image/webp'
972
+ if (lower.endsWith('.gif')) return 'image/gif'
973
+ if (lower.endsWith('.jpg') || lower.endsWith('.jpeg')) return 'image/jpeg'
974
+ return 'image/jpeg'
975
+ }
976
+ if (kind === 'document') {
977
+ return 'application/pdf'
978
+ }
979
+ // audio
980
+ if (lower.endsWith('.mp3')) return 'audio/mp3'
981
+ if (lower.endsWith('.wav')) return 'audio/wav'
982
+ if (lower.endsWith('.ogg')) return 'audio/ogg'
983
+ if (lower.endsWith('.flac')) return 'audio/flac'
984
+ if (lower.endsWith('.webm')) return 'audio/webm'
985
+ if (lower.endsWith('.aac') || lower.endsWith('.m4a')) return 'audio/aac'
986
+ return 'audio/mp3'
987
+ }
988
+
989
+ function fromGeminiParts(parts: readonly Part[]): string | ContentBlock[] {
990
+ const blocks: ContentBlock[] = []
991
+ for (const part of parts) {
992
+ if (typeof part.text === 'string' && part.text.length > 0) {
993
+ blocks.push({ type: 'text', text: part.text })
994
+ } else if (part.functionCall) {
995
+ const fc = part.functionCall
996
+ blocks.push({
997
+ type: 'tool_use',
998
+ id: fc.id ?? `gemini_${cryptoRandomId()}`,
999
+ name: fc.name ?? '',
1000
+ input: fc.args ?? {},
1001
+ } satisfies ToolUseBlock)
1002
+ }
1003
+ }
1004
+ if (blocks.length === 1 && blocks[0]?.type === 'text') return blocks[0].text
1005
+ return blocks
1006
+ }
1007
+
1008
+ function candidateText(candidate: { content?: { parts?: Part[] } } | undefined): string {
1009
+ const parts = candidate?.content?.parts ?? []
1010
+ return parts
1011
+ .filter((p) => typeof p.text === 'string' && p.text.length > 0)
1012
+ .map((p) => p.text as string)
1013
+ .join('')
1014
+ }
1015
+
1016
+ function buildThinkingConfig(options: ChatOptions): GenerateContentConfig['thinkingConfig'] {
1017
+ if (options.effort !== undefined) {
1018
+ const level = effortToThinkingLevel(options.effort)
1019
+ return level !== undefined ? { thinkingLevel: level } : { thinkingBudget: -1 }
1020
+ }
1021
+ if (options.thinking === 'adaptive') return { thinkingBudget: -1 }
1022
+ if (options.thinking === 'disabled') return { thinkingBudget: 0 }
1023
+ return undefined
1024
+ }
1025
+
1026
+ function effortToThinkingLevel(
1027
+ effort: NonNullable<ChatOptions['effort']>,
1028
+ ): ThinkingLevel | undefined {
1029
+ switch (effort) {
1030
+ case 'low': return ThinkingLevel.LOW
1031
+ case 'medium': return ThinkingLevel.MEDIUM
1032
+ case 'high':
1033
+ case 'xhigh':
1034
+ case 'max':
1035
+ return ThinkingLevel.HIGH
1036
+ }
1037
+ }
1038
+
1039
+ function toUsage(u: { promptTokenCount?: number; candidatesTokenCount?: number; cachedContentTokenCount?: number } | undefined): ChatUsage {
1040
+ return {
1041
+ inputTokens: u?.promptTokenCount ?? 0,
1042
+ outputTokens: u?.candidatesTokenCount ?? 0,
1043
+ cacheReadTokens: u?.cachedContentTokenCount ?? 0,
1044
+ cacheCreationTokens: 0,
1045
+ }
1046
+ }
1047
+
1048
+ function addUsage(
1049
+ acc: ChatUsage,
1050
+ u: { promptTokenCount?: number; candidatesTokenCount?: number; cachedContentTokenCount?: number } | undefined,
1051
+ ): void {
1052
+ if (!u) return
1053
+ acc.inputTokens += u.promptTokenCount ?? 0
1054
+ acc.outputTokens += u.candidatesTokenCount ?? 0
1055
+ acc.cacheReadTokens += u.cachedContentTokenCount ?? 0
1056
+ }
1057
+
1058
+ function cryptoRandomId(): string {
1059
+ // Stable, low-entropy fallback for synthesizing tool-use ids when
1060
+ // Gemini omits them. Uniqueness within a single response is all the
1061
+ // loop requires — the id only travels back paired with its result
1062
+ // and never escapes to the caller.
1063
+ return Math.random().toString(36).slice(2, 12)
1064
+ }