@strav/brain 0.4.31 → 1.0.0-alpha.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,101 +0,0 @@
1
- import type { Message, ContentBlock } from '../types.ts'
2
-
3
- /** Known context window sizes by model identifier. */
4
- const MODEL_CONTEXT_WINDOWS: Record<string, number> = {
5
- // Anthropic
6
- 'claude-opus-4-20250514': 200_000,
7
- 'claude-sonnet-4-20250514': 200_000,
8
- 'claude-sonnet-4-5-20250514': 200_000,
9
- 'claude-haiku-3-5-20241022': 200_000,
10
-
11
- // OpenAI
12
- 'gpt-4o': 128_000,
13
- 'gpt-4o-mini': 128_000,
14
- 'gpt-4-turbo': 128_000,
15
- 'gpt-4.1': 1_000_000,
16
- 'gpt-4.1-mini': 1_000_000,
17
- 'gpt-4.1-nano': 1_000_000,
18
- o3: 200_000,
19
- 'o3-mini': 200_000,
20
- 'o4-mini': 200_000,
21
-
22
- // DeepSeek
23
- 'deepseek-chat': 64_000,
24
- 'deepseek-reasoner': 64_000,
25
- }
26
-
27
- const DEFAULT_CONTEXT_WINDOW = 128_000
28
-
29
- /** Overhead tokens per message (role, formatting, separators). */
30
- const MESSAGE_OVERHEAD = 4
31
-
32
- /** Average characters per token for estimation. */
33
- const CHARS_PER_TOKEN = 4
34
-
35
- /**
36
- * Approximate token counting without external dependencies.
37
- *
38
- * Uses character-based estimation (~4 chars per token) which is
39
- * conservative enough for budget management. Exact counts are not
40
- * needed — we just need to know when we're approaching the limit.
41
- */
42
- export class TokenCounter {
43
- /** Estimate token count for a string. */
44
- static estimate(text: string): number {
45
- if (!text) return 0
46
- return Math.ceil(text.length / CHARS_PER_TOKEN)
47
- }
48
-
49
- /** Estimate token count for a Message array. */
50
- static estimateMessages(messages: Message[]): number {
51
- let total = 0
52
-
53
- for (const msg of messages) {
54
- total += MESSAGE_OVERHEAD
55
- total += TokenCounter.estimateContent(msg.content)
56
-
57
- if (msg.toolCalls) {
58
- for (const call of msg.toolCalls) {
59
- total += TokenCounter.estimate(call.name)
60
- total += TokenCounter.estimate(JSON.stringify(call.arguments))
61
- total += MESSAGE_OVERHEAD
62
- }
63
- }
64
- }
65
-
66
- return total
67
- }
68
-
69
- /** Get the context window size for a model, or the default fallback. */
70
- static contextWindow(model: string): number {
71
- // Exact match
72
- if (MODEL_CONTEXT_WINDOWS[model] !== undefined) {
73
- return MODEL_CONTEXT_WINDOWS[model]!
74
- }
75
-
76
- // Prefix match (e.g. 'claude-sonnet-4-20250514' matches 'claude-sonnet-4')
77
- for (const [key, size] of Object.entries(MODEL_CONTEXT_WINDOWS)) {
78
- if (model.startsWith(key) || key.startsWith(model)) {
79
- return size
80
- }
81
- }
82
-
83
- return DEFAULT_CONTEXT_WINDOW
84
- }
85
-
86
- /** Estimate tokens for message content (string or ContentBlock[]). */
87
- private static estimateContent(content: string | ContentBlock[]): number {
88
- if (typeof content === 'string') {
89
- return TokenCounter.estimate(content)
90
- }
91
-
92
- let total = 0
93
- for (const block of content) {
94
- if (block.text) total += TokenCounter.estimate(block.text)
95
- if (block.content) total += TokenCounter.estimate(block.content)
96
- if (block.input) total += TokenCounter.estimate(JSON.stringify(block.input))
97
- if (block.name) total += TokenCounter.estimate(block.name)
98
- }
99
- return total
100
- }
101
- }
@@ -1,68 +0,0 @@
1
- import type { Message } from '../types.ts'
2
-
3
- // ── Configuration ───────────────────────────────────────────────────────────
4
-
5
- export interface MemoryConfig {
6
- /** Max tokens for the entire context window (default: auto-detect from model). */
7
- maxContextTokens?: number
8
- /** Strategy: 'sliding_window' | 'summarize' (default: 'summarize'). */
9
- strategy?: string
10
- /** Reserve this fraction of context for the response (default: 0.25). */
11
- responseReserve?: number
12
- /** Min messages to keep in working memory before compacting (default: 4). */
13
- minWorkingMessages?: number
14
- /** Number of oldest messages to compact per cycle (default: 10). */
15
- compactionBatchSize?: number
16
- /** Enable semantic fact extraction during compaction (default: true). */
17
- extractFacts?: boolean
18
- }
19
-
20
- // ── Compaction Strategy ─────────────────────────────────────────────────────
21
-
22
- export interface CompactionResult {
23
- /** Summary text replacing the compacted messages. */
24
- summary: string
25
- /** Facts extracted during compaction (if enabled). */
26
- facts?: Fact[]
27
- /** Token count of the summary. */
28
- summaryTokens: number
29
- }
30
-
31
- export interface CompactionStrategy {
32
- readonly name: string
33
- compact(
34
- messages: Message[],
35
- options: { provider: string; model: string; existingSummary?: string; extractFacts?: boolean }
36
- ): Promise<CompactionResult>
37
- }
38
-
39
- // ── Thread Store ────────────────────────────────────────────────────────────
40
-
41
- export interface SerializedMemoryThread {
42
- id: string
43
- messages: Message[]
44
- system?: string
45
- summary?: string
46
- facts?: Fact[]
47
- metadata?: Record<string, unknown>
48
- createdAt: string
49
- updatedAt: string
50
- }
51
-
52
- export interface ThreadStore {
53
- save(thread: SerializedMemoryThread): Promise<void>
54
- load(id: string): Promise<SerializedMemoryThread | null>
55
- delete(id: string): Promise<void>
56
- list(options?: { limit?: number; offset?: number }): Promise<SerializedMemoryThread[]>
57
- }
58
-
59
- // ── Semantic Memory ─────────────────────────────────────────────────────────
60
-
61
- export interface Fact {
62
- key: string
63
- value: string
64
- source: 'extracted' | 'explicit'
65
- confidence: number
66
- createdAt: string
67
- updatedAt: string
68
- }
@@ -1,496 +0,0 @@
1
- import { parseSSE } from '../utils/sse_parser.ts'
2
- import { retryableFetch, type RetryOptions } from '../utils/retry.ts'
3
- import { ExternalServiceError } from '@strav/kernel'
4
- import type {
5
- AIProvider,
6
- CompletionRequest,
7
- CompletionResponse,
8
- StreamChunk,
9
- EmbeddingResponse,
10
- ProviderConfig,
11
- Message,
12
- ToolCall,
13
- TranscribeRequest,
14
- TranscriptionResponse,
15
- Usage,
16
- } from '../types.ts'
17
-
18
- /**
19
- * Google Gemini API provider.
20
- *
21
- * Translates the framework's normalized CompletionRequest/Response
22
- * to/from the Google Generative Language API wire format. Uses raw `fetch()`.
23
- */
24
- export class GoogleProvider implements AIProvider {
25
- readonly name: string
26
- private apiKey: string
27
- private baseUrl: string
28
- private defaultModel: string
29
- private defaultMaxTokens?: number
30
- private retryOptions: RetryOptions
31
- private toolCallIdToNameMap: Map<string, string> = new Map()
32
-
33
- constructor(config: ProviderConfig) {
34
- this.name = 'google'
35
- this.apiKey = config.apiKey
36
- this.baseUrl = (config.baseUrl ?? 'https://generativelanguage.googleapis.com/v1beta').replace(/\/$/, '')
37
- this.defaultModel = config.model || 'gemini-2.0-flash'
38
- this.defaultMaxTokens = config.maxTokens
39
- this.retryOptions = {
40
- maxRetries: config.maxRetries ?? 3,
41
- baseDelay: config.retryBaseDelay ?? 1000,
42
- }
43
- }
44
-
45
- async complete(request: CompletionRequest): Promise<CompletionResponse> {
46
- const model = request.model ?? this.defaultModel
47
- const body = this.buildRequestBody(request, false)
48
-
49
- const response = await retryableFetch(
50
- 'Google',
51
- `${this.baseUrl}/models/${model}:generateContent`,
52
- { method: 'POST', headers: this.buildHeaders(), body: JSON.stringify(body) },
53
- this.retryOptions
54
- )
55
-
56
- const data: any = await response.json()
57
- return this.parseResponse(data)
58
- }
59
-
60
- async *stream(request: CompletionRequest): AsyncIterable<StreamChunk> {
61
- const model = request.model ?? this.defaultModel
62
- const body = this.buildRequestBody(request, true)
63
-
64
- const response = await retryableFetch(
65
- 'Google',
66
- `${this.baseUrl}/models/${model}:streamGenerateContent`,
67
- { method: 'POST', headers: this.buildHeaders(), body: JSON.stringify(body) },
68
- this.retryOptions
69
- )
70
-
71
- if (!response.body) {
72
- throw new ExternalServiceError('Google', undefined, 'No stream body returned')
73
- }
74
-
75
- let currentToolIndex = -1
76
- let currentToolCall: Partial<ToolCall> | null = null
77
-
78
- for await (const sse of parseSSE(response.body)) {
79
- if (sse.data === '[DONE]') {
80
- yield { type: 'done' }
81
- break
82
- }
83
-
84
- let parsed: any
85
- try {
86
- parsed = JSON.parse(sse.data)
87
- } catch {
88
- continue
89
- }
90
-
91
- const candidate = parsed.candidates?.[0]
92
- if (!candidate) continue
93
-
94
- // Process content parts if they exist
95
- if (candidate.content?.parts) {
96
- for (const part of candidate.content.parts) {
97
- if (part.text) {
98
- // Text content
99
- yield { type: 'text', text: part.text }
100
- } else if (part.functionCall) {
101
- // Function call
102
- if (currentToolCall === null) {
103
- // Start of new tool call
104
- currentToolIndex++
105
- currentToolCall = {
106
- id: part.functionCall.id || this.generateToolCallId(),
107
- name: part.functionCall.name,
108
- arguments: part.functionCall.args || {}
109
- }
110
-
111
- yield {
112
- type: 'tool_start',
113
- toolCall: {
114
- id: currentToolCall.id,
115
- name: currentToolCall.name
116
- } as ToolCall,
117
- toolIndex: currentToolIndex,
118
- }
119
- }
120
-
121
- // If this is a complete function call, end it
122
- if (part.functionCall.name && part.functionCall.args) {
123
- yield { type: 'tool_end', toolIndex: currentToolIndex }
124
- currentToolCall = null
125
- }
126
- }
127
- }
128
- }
129
-
130
- // Check if this is the final chunk
131
- if (candidate.finishReason) {
132
- // Handle usage information in the final chunk
133
- if (parsed.usageMetadata) {
134
- const usage: Usage = {
135
- inputTokens: parsed.usageMetadata.promptTokenCount ?? 0,
136
- outputTokens: parsed.usageMetadata.candidatesTokenCount ?? 0,
137
- totalTokens: parsed.usageMetadata.totalTokenCount ?? 0,
138
- }
139
- yield { type: 'usage', usage }
140
- }
141
-
142
- yield { type: 'done' }
143
- break
144
- }
145
- }
146
- }
147
-
148
- async embed(input: string | string[], model?: string): Promise<EmbeddingResponse> {
149
- const embeddingModel = model ?? 'text-embedding-004'
150
- const inputs = Array.isArray(input) ? input : [input]
151
-
152
- const requests = inputs.map(text => ({
153
- model: `models/${embeddingModel}`,
154
- content: {
155
- parts: [{ text }]
156
- }
157
- }))
158
-
159
- const embeddings: number[][] = []
160
-
161
- // Process each input separately as Google's batch API might not be available
162
- for (const request of requests) {
163
- const response = await retryableFetch(
164
- 'Google',
165
- `${this.baseUrl}/models/${embeddingModel}:embedContent`,
166
- { method: 'POST', headers: this.buildHeaders(), body: JSON.stringify(request) },
167
- this.retryOptions
168
- )
169
-
170
- const data: any = await response.json()
171
- if (data.embedding?.values) {
172
- embeddings.push(data.embedding.values)
173
- }
174
- }
175
-
176
- return {
177
- embeddings,
178
- model: embeddingModel,
179
- usage: { totalTokens: inputs.length * 10 } // Rough estimate, Google doesn't provide token count for embeddings
180
- }
181
- }
182
-
183
- /**
184
- * Speech-to-text via Gemini's multimodal generateContent endpoint.
185
- *
186
- * Gemini doesn't have a dedicated STT endpoint; instead, audio is
187
- * passed as an inline `audio/*` part alongside a text prompt asking
188
- * for a transcription. We default to `gemini-2.5-flash` (fast, cheap,
189
- * Thai-capable). Override `model` for `gemini-2.5-pro` when accuracy
190
- * matters more than latency.
191
- *
192
- * Inline audio is capped at ~20MB across the whole request. Chunk
193
- * longer recordings, or use Gemini's Files API (upload + reference)
194
- * which isn't covered here — out of scope for the typical SME
195
- * voice-note flow (<=60s clips).
196
- */
197
- async transcribe(request: TranscribeRequest): Promise<TranscriptionResponse> {
198
- const model = request.model ?? 'gemini-2.5-flash'
199
- const contentType = request.contentType ?? 'audio/mpeg'
200
-
201
- const bytes =
202
- request.audio instanceof Blob
203
- ? new Uint8Array(await request.audio.arrayBuffer())
204
- : request.audio
205
- const base64 = encodeBase64(bytes)
206
-
207
- const instruction = buildTranscriptionInstruction(request)
208
-
209
- const body = {
210
- contents: [
211
- {
212
- role: 'user',
213
- parts: [
214
- { text: instruction },
215
- { inline_data: { mime_type: contentType, data: base64 } },
216
- ],
217
- },
218
- ],
219
- generationConfig: {
220
- // Deterministic output for a transcription task.
221
- temperature: 0,
222
- },
223
- }
224
-
225
- const response = await retryableFetch(
226
- 'Google',
227
- `${this.baseUrl}/models/${model}:generateContent`,
228
- { method: 'POST', headers: this.buildHeaders(), body: JSON.stringify(body) },
229
- this.retryOptions
230
- )
231
-
232
- const data: any = await response.json()
233
- const text = extractTranscript(data)
234
-
235
- return {
236
- text,
237
- language: request.language,
238
- raw: data,
239
- }
240
- }
241
-
242
- // ── Private helpers ──────────────────────────────────────────────────────
243
-
244
- private buildHeaders(): Record<string, string> {
245
- return {
246
- 'content-type': 'application/json',
247
- 'x-goog-api-key': this.apiKey,
248
- }
249
- }
250
-
251
- private buildRequestBody(request: CompletionRequest, stream: boolean): Record<string, unknown> {
252
- const model = request.model ?? this.defaultModel
253
-
254
- const body: Record<string, unknown> = {
255
- contents: this.mapMessages(request.messages),
256
- }
257
-
258
- // Add system instruction if present
259
- if (request.system) {
260
- body.systemInstruction = {
261
- parts: [{ text: request.system }]
262
- }
263
- }
264
-
265
- // Generation config
266
- const generationConfig: Record<string, unknown> = {}
267
-
268
- if (request.maxTokens !== undefined) {
269
- generationConfig.maxOutputTokens = request.maxTokens
270
- } else if (this.defaultMaxTokens !== undefined) {
271
- generationConfig.maxOutputTokens = this.defaultMaxTokens
272
- }
273
-
274
- if (request.temperature !== undefined) {
275
- generationConfig.temperature = request.temperature
276
- }
277
-
278
- if (request.stopSequences?.length) {
279
- generationConfig.stopSequences = request.stopSequences
280
- }
281
-
282
- // Structured output
283
- if (request.schema) {
284
- generationConfig.responseMimeType = 'application/json'
285
- generationConfig.responseSchema = request.schema
286
- }
287
-
288
- if (Object.keys(generationConfig).length > 0) {
289
- body.generationConfig = generationConfig
290
- }
291
-
292
- // Tools (function declarations)
293
- if (request.tools?.length) {
294
- body.tools = [{
295
- functionDeclarations: request.tools.map(t => ({
296
- name: t.name,
297
- description: t.description,
298
- parameters: t.parameters,
299
- }))
300
- }]
301
-
302
- // Tool choice configuration
303
- if (request.toolChoice) {
304
- const toolConfig: Record<string, unknown> = {}
305
-
306
- if (request.toolChoice === 'auto') {
307
- toolConfig.functionCallingConfig = { mode: 'AUTO' }
308
- } else if (request.toolChoice === 'required') {
309
- toolConfig.functionCallingConfig = { mode: 'ANY' }
310
- } else if (typeof request.toolChoice === 'object' && request.toolChoice.name) {
311
- toolConfig.functionCallingConfig = {
312
- mode: 'ANY',
313
- allowedFunctionNames: [request.toolChoice.name]
314
- }
315
- }
316
-
317
- if (Object.keys(toolConfig).length > 0) {
318
- body.toolConfig = toolConfig
319
- }
320
- }
321
- }
322
-
323
- return body
324
- }
325
-
326
- private mapMessages(messages: Message[]): any[] {
327
- const result: any[] = []
328
-
329
- for (const msg of messages) {
330
- if (msg.role === 'tool') {
331
- // Tool results go as user messages with function response parts
332
- // Get the function name from our mapping
333
- const functionName = msg.toolCallId ? this.toolCallIdToNameMap.get(msg.toolCallId) : undefined
334
-
335
- if (!functionName) {
336
- throw new ExternalServiceError('Google', undefined, `No function name found for tool call ID: ${msg.toolCallId}`)
337
- }
338
-
339
- result.push({
340
- role: 'user',
341
- parts: [
342
- {
343
- functionResponse: {
344
- name: functionName,
345
- response: {
346
- content: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content),
347
- }
348
- }
349
- }
350
- ]
351
- })
352
- } else if (msg.role === 'assistant') {
353
- const parts: any[] = []
354
-
355
- // Add text content if present
356
- const text = typeof msg.content === 'string' ? msg.content : ''
357
- if (text) {
358
- parts.push({ text })
359
- }
360
-
361
- // Add function call parts and track their IDs
362
- if (msg.toolCalls?.length) {
363
- for (const tc of msg.toolCalls) {
364
- // Store the mapping for later use
365
- this.toolCallIdToNameMap.set(tc.id, tc.name)
366
-
367
- parts.push({
368
- functionCall: {
369
- name: tc.name,
370
- args: tc.arguments,
371
- }
372
- })
373
- }
374
- }
375
-
376
- result.push({
377
- role: 'model', // Gemini uses 'model' instead of 'assistant'
378
- parts
379
- })
380
- } else {
381
- // User messages
382
- result.push({
383
- role: 'user',
384
- parts: [{ text: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content) }]
385
- })
386
- }
387
- }
388
-
389
- return result
390
- }
391
-
392
- private parseResponse(data: any): CompletionResponse {
393
- const candidate = data.candidates?.[0]
394
- if (!candidate) {
395
- throw new ExternalServiceError('Google', undefined, 'No candidates in response')
396
- }
397
-
398
- let content = ''
399
- const toolCalls: ToolCall[] = []
400
-
401
- // Extract content from parts
402
- if (Array.isArray(candidate.content?.parts)) {
403
- for (const part of candidate.content.parts) {
404
- if (part.text) {
405
- content += part.text
406
- } else if (part.functionCall) {
407
- toolCalls.push({
408
- id: part.functionCall.id || this.generateToolCallId(),
409
- name: part.functionCall.name,
410
- arguments: part.functionCall.args || {},
411
- })
412
- }
413
- }
414
- }
415
-
416
- const usage: Usage = {
417
- inputTokens: data.usageMetadata?.promptTokenCount ?? 0,
418
- outputTokens: data.usageMetadata?.candidatesTokenCount ?? 0,
419
- totalTokens: data.usageMetadata?.totalTokenCount ?? 0,
420
- }
421
-
422
- let stopReason: CompletionResponse['stopReason'] = 'end'
423
-
424
- // Check tool calls first, as Google may return STOP even with tool calls
425
- if (toolCalls.length > 0) {
426
- stopReason = 'tool_use'
427
- } else {
428
- switch (candidate.finishReason) {
429
- case 'STOP':
430
- stopReason = 'end'
431
- break
432
- case 'MAX_TOKENS':
433
- stopReason = 'max_tokens'
434
- break
435
- case 'SAFETY':
436
- case 'RECITATION':
437
- stopReason = 'stop_sequence'
438
- break
439
- }
440
- }
441
-
442
- return {
443
- id: data.candidates?.[0]?.id || this.generateResponseId(),
444
- content,
445
- toolCalls,
446
- stopReason,
447
- usage,
448
- raw: data,
449
- }
450
- }
451
-
452
- private generateToolCallId(): string {
453
- return `tool_${Math.random().toString(36).substring(2, 15)}`
454
- }
455
-
456
- private generateResponseId(): string {
457
- return `resp_${Math.random().toString(36).substring(2, 15)}`
458
- }
459
- }
460
-
461
- function buildTranscriptionInstruction(request: TranscribeRequest): string {
462
- const parts: string[] = [
463
- 'Transcribe the audio to text. Return only the transcription, without commentary, timestamps, or speaker labels.',
464
- ]
465
- if (request.language) {
466
- parts.push(`The audio is in ${request.language}. Preserve the original language in the output.`)
467
- }
468
- if (request.prompt) {
469
- // Surface the priming hint to bias vocabulary (proper nouns, menu
470
- // items, dialect markers). Kept inside the same system-style turn —
471
- // Gemini doesn't have a separate "system_instruction" field that
472
- // behaves differently for this use.
473
- parts.push(`Context to help with vocabulary: ${request.prompt}`)
474
- }
475
- return parts.join(' ')
476
- }
477
-
478
- function extractTranscript(data: any): string {
479
- const candidate = data?.candidates?.[0]
480
- if (!candidate?.content?.parts) return ''
481
- return candidate.content.parts
482
- .map((part: any) => (typeof part?.text === 'string' ? part.text : ''))
483
- .join('')
484
- .trim()
485
- }
486
-
487
- function encodeBase64(bytes: Uint8Array): string {
488
- // Node / Bun: use Buffer; falls back to atob/btoa in pure browser envs
489
- // (not used in this codebase, but kept for parity with bun-types).
490
- if (typeof Buffer !== 'undefined') {
491
- return Buffer.from(bytes).toString('base64')
492
- }
493
- let binary = ''
494
- for (const b of bytes) binary += String.fromCharCode(b)
495
- return btoa(binary)
496
- }