@strav/brain 0.4.30 → 1.0.0-alpha.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +17 -20
- package/src/brain_config.ts +72 -0
- package/src/brain_error.ts +29 -0
- package/src/brain_manager.ts +113 -132
- package/src/brain_provider.ts +81 -6
- package/src/index.ts +27 -43
- package/src/provider.ts +48 -0
- package/src/providers/anthropic_provider.ts +192 -246
- package/src/thread.ts +99 -0
- package/src/types.ts +101 -246
- package/CHANGELOG.md +0 -44
- package/README.md +0 -121
- package/src/agent.ts +0 -93
- package/src/helpers.ts +0 -1082
- package/src/mcp_toolbox.ts +0 -62
- package/src/memory/context_budget.ts +0 -120
- package/src/memory/index.ts +0 -17
- package/src/memory/memory_manager.ts +0 -168
- package/src/memory/semantic_memory.ts +0 -89
- package/src/memory/strategies/sliding_window.ts +0 -20
- package/src/memory/strategies/summarize.ts +0 -157
- package/src/memory/thread_store.ts +0 -56
- package/src/memory/token_counter.ts +0 -101
- package/src/memory/types.ts +0 -68
- package/src/providers/google_provider.ts +0 -496
- package/src/providers/openai_provider.ts +0 -569
- package/src/providers/openai_responses_provider.ts +0 -321
- package/src/tool.ts +0 -51
- package/src/utils/error_scrub.ts +0 -5
- package/src/utils/prompt.ts +0 -65
- package/src/utils/retry.ts +0 -104
- package/src/utils/schema.ts +0 -27
- package/src/utils/sse_parser.ts +0 -62
- package/src/workflow.ts +0 -199
- package/tsconfig.json +0 -5
|
@@ -1,101 +0,0 @@
|
|
|
1
|
-
import type { Message, ContentBlock } from '../types.ts'
|
|
2
|
-
|
|
3
|
-
/** Known context window sizes by model identifier. */
|
|
4
|
-
const MODEL_CONTEXT_WINDOWS: Record<string, number> = {
|
|
5
|
-
// Anthropic
|
|
6
|
-
'claude-opus-4-20250514': 200_000,
|
|
7
|
-
'claude-sonnet-4-20250514': 200_000,
|
|
8
|
-
'claude-sonnet-4-5-20250514': 200_000,
|
|
9
|
-
'claude-haiku-3-5-20241022': 200_000,
|
|
10
|
-
|
|
11
|
-
// OpenAI
|
|
12
|
-
'gpt-4o': 128_000,
|
|
13
|
-
'gpt-4o-mini': 128_000,
|
|
14
|
-
'gpt-4-turbo': 128_000,
|
|
15
|
-
'gpt-4.1': 1_000_000,
|
|
16
|
-
'gpt-4.1-mini': 1_000_000,
|
|
17
|
-
'gpt-4.1-nano': 1_000_000,
|
|
18
|
-
o3: 200_000,
|
|
19
|
-
'o3-mini': 200_000,
|
|
20
|
-
'o4-mini': 200_000,
|
|
21
|
-
|
|
22
|
-
// DeepSeek
|
|
23
|
-
'deepseek-chat': 64_000,
|
|
24
|
-
'deepseek-reasoner': 64_000,
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
const DEFAULT_CONTEXT_WINDOW = 128_000
|
|
28
|
-
|
|
29
|
-
/** Overhead tokens per message (role, formatting, separators). */
|
|
30
|
-
const MESSAGE_OVERHEAD = 4
|
|
31
|
-
|
|
32
|
-
/** Average characters per token for estimation. */
|
|
33
|
-
const CHARS_PER_TOKEN = 4
|
|
34
|
-
|
|
35
|
-
/**
|
|
36
|
-
* Approximate token counting without external dependencies.
|
|
37
|
-
*
|
|
38
|
-
* Uses character-based estimation (~4 chars per token) which is
|
|
39
|
-
* conservative enough for budget management. Exact counts are not
|
|
40
|
-
* needed — we just need to know when we're approaching the limit.
|
|
41
|
-
*/
|
|
42
|
-
export class TokenCounter {
|
|
43
|
-
/** Estimate token count for a string. */
|
|
44
|
-
static estimate(text: string): number {
|
|
45
|
-
if (!text) return 0
|
|
46
|
-
return Math.ceil(text.length / CHARS_PER_TOKEN)
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
/** Estimate token count for a Message array. */
|
|
50
|
-
static estimateMessages(messages: Message[]): number {
|
|
51
|
-
let total = 0
|
|
52
|
-
|
|
53
|
-
for (const msg of messages) {
|
|
54
|
-
total += MESSAGE_OVERHEAD
|
|
55
|
-
total += TokenCounter.estimateContent(msg.content)
|
|
56
|
-
|
|
57
|
-
if (msg.toolCalls) {
|
|
58
|
-
for (const call of msg.toolCalls) {
|
|
59
|
-
total += TokenCounter.estimate(call.name)
|
|
60
|
-
total += TokenCounter.estimate(JSON.stringify(call.arguments))
|
|
61
|
-
total += MESSAGE_OVERHEAD
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
return total
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
/** Get the context window size for a model, or the default fallback. */
|
|
70
|
-
static contextWindow(model: string): number {
|
|
71
|
-
// Exact match
|
|
72
|
-
if (MODEL_CONTEXT_WINDOWS[model] !== undefined) {
|
|
73
|
-
return MODEL_CONTEXT_WINDOWS[model]!
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
// Prefix match (e.g. 'claude-sonnet-4-20250514' matches 'claude-sonnet-4')
|
|
77
|
-
for (const [key, size] of Object.entries(MODEL_CONTEXT_WINDOWS)) {
|
|
78
|
-
if (model.startsWith(key) || key.startsWith(model)) {
|
|
79
|
-
return size
|
|
80
|
-
}
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
return DEFAULT_CONTEXT_WINDOW
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
/** Estimate tokens for message content (string or ContentBlock[]). */
|
|
87
|
-
private static estimateContent(content: string | ContentBlock[]): number {
|
|
88
|
-
if (typeof content === 'string') {
|
|
89
|
-
return TokenCounter.estimate(content)
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
let total = 0
|
|
93
|
-
for (const block of content) {
|
|
94
|
-
if (block.text) total += TokenCounter.estimate(block.text)
|
|
95
|
-
if (block.content) total += TokenCounter.estimate(block.content)
|
|
96
|
-
if (block.input) total += TokenCounter.estimate(JSON.stringify(block.input))
|
|
97
|
-
if (block.name) total += TokenCounter.estimate(block.name)
|
|
98
|
-
}
|
|
99
|
-
return total
|
|
100
|
-
}
|
|
101
|
-
}
|
package/src/memory/types.ts
DELETED
|
@@ -1,68 +0,0 @@
|
|
|
1
|
-
import type { Message } from '../types.ts'
|
|
2
|
-
|
|
3
|
-
// ── Configuration ───────────────────────────────────────────────────────────
|
|
4
|
-
|
|
5
|
-
export interface MemoryConfig {
|
|
6
|
-
/** Max tokens for the entire context window (default: auto-detect from model). */
|
|
7
|
-
maxContextTokens?: number
|
|
8
|
-
/** Strategy: 'sliding_window' | 'summarize' (default: 'summarize'). */
|
|
9
|
-
strategy?: string
|
|
10
|
-
/** Reserve this fraction of context for the response (default: 0.25). */
|
|
11
|
-
responseReserve?: number
|
|
12
|
-
/** Min messages to keep in working memory before compacting (default: 4). */
|
|
13
|
-
minWorkingMessages?: number
|
|
14
|
-
/** Number of oldest messages to compact per cycle (default: 10). */
|
|
15
|
-
compactionBatchSize?: number
|
|
16
|
-
/** Enable semantic fact extraction during compaction (default: true). */
|
|
17
|
-
extractFacts?: boolean
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
// ── Compaction Strategy ─────────────────────────────────────────────────────
|
|
21
|
-
|
|
22
|
-
export interface CompactionResult {
|
|
23
|
-
/** Summary text replacing the compacted messages. */
|
|
24
|
-
summary: string
|
|
25
|
-
/** Facts extracted during compaction (if enabled). */
|
|
26
|
-
facts?: Fact[]
|
|
27
|
-
/** Token count of the summary. */
|
|
28
|
-
summaryTokens: number
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
export interface CompactionStrategy {
|
|
32
|
-
readonly name: string
|
|
33
|
-
compact(
|
|
34
|
-
messages: Message[],
|
|
35
|
-
options: { provider: string; model: string; existingSummary?: string; extractFacts?: boolean }
|
|
36
|
-
): Promise<CompactionResult>
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
// ── Thread Store ────────────────────────────────────────────────────────────
|
|
40
|
-
|
|
41
|
-
export interface SerializedMemoryThread {
|
|
42
|
-
id: string
|
|
43
|
-
messages: Message[]
|
|
44
|
-
system?: string
|
|
45
|
-
summary?: string
|
|
46
|
-
facts?: Fact[]
|
|
47
|
-
metadata?: Record<string, unknown>
|
|
48
|
-
createdAt: string
|
|
49
|
-
updatedAt: string
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
export interface ThreadStore {
|
|
53
|
-
save(thread: SerializedMemoryThread): Promise<void>
|
|
54
|
-
load(id: string): Promise<SerializedMemoryThread | null>
|
|
55
|
-
delete(id: string): Promise<void>
|
|
56
|
-
list(options?: { limit?: number; offset?: number }): Promise<SerializedMemoryThread[]>
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
// ── Semantic Memory ─────────────────────────────────────────────────────────
|
|
60
|
-
|
|
61
|
-
export interface Fact {
|
|
62
|
-
key: string
|
|
63
|
-
value: string
|
|
64
|
-
source: 'extracted' | 'explicit'
|
|
65
|
-
confidence: number
|
|
66
|
-
createdAt: string
|
|
67
|
-
updatedAt: string
|
|
68
|
-
}
|
|
@@ -1,496 +0,0 @@
|
|
|
1
|
-
import { parseSSE } from '../utils/sse_parser.ts'
|
|
2
|
-
import { retryableFetch, type RetryOptions } from '../utils/retry.ts'
|
|
3
|
-
import { ExternalServiceError } from '@strav/kernel'
|
|
4
|
-
import type {
|
|
5
|
-
AIProvider,
|
|
6
|
-
CompletionRequest,
|
|
7
|
-
CompletionResponse,
|
|
8
|
-
StreamChunk,
|
|
9
|
-
EmbeddingResponse,
|
|
10
|
-
ProviderConfig,
|
|
11
|
-
Message,
|
|
12
|
-
ToolCall,
|
|
13
|
-
TranscribeRequest,
|
|
14
|
-
TranscriptionResponse,
|
|
15
|
-
Usage,
|
|
16
|
-
} from '../types.ts'
|
|
17
|
-
|
|
18
|
-
/**
|
|
19
|
-
* Google Gemini API provider.
|
|
20
|
-
*
|
|
21
|
-
* Translates the framework's normalized CompletionRequest/Response
|
|
22
|
-
* to/from the Google Generative Language API wire format. Uses raw `fetch()`.
|
|
23
|
-
*/
|
|
24
|
-
export class GoogleProvider implements AIProvider {
|
|
25
|
-
readonly name: string
|
|
26
|
-
private apiKey: string
|
|
27
|
-
private baseUrl: string
|
|
28
|
-
private defaultModel: string
|
|
29
|
-
private defaultMaxTokens?: number
|
|
30
|
-
private retryOptions: RetryOptions
|
|
31
|
-
private toolCallIdToNameMap: Map<string, string> = new Map()
|
|
32
|
-
|
|
33
|
-
constructor(config: ProviderConfig) {
|
|
34
|
-
this.name = 'google'
|
|
35
|
-
this.apiKey = config.apiKey
|
|
36
|
-
this.baseUrl = (config.baseUrl ?? 'https://generativelanguage.googleapis.com/v1beta').replace(/\/$/, '')
|
|
37
|
-
this.defaultModel = config.model || 'gemini-2.0-flash'
|
|
38
|
-
this.defaultMaxTokens = config.maxTokens
|
|
39
|
-
this.retryOptions = {
|
|
40
|
-
maxRetries: config.maxRetries ?? 3,
|
|
41
|
-
baseDelay: config.retryBaseDelay ?? 1000,
|
|
42
|
-
}
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
async complete(request: CompletionRequest): Promise<CompletionResponse> {
|
|
46
|
-
const model = request.model ?? this.defaultModel
|
|
47
|
-
const body = this.buildRequestBody(request, false)
|
|
48
|
-
|
|
49
|
-
const response = await retryableFetch(
|
|
50
|
-
'Google',
|
|
51
|
-
`${this.baseUrl}/models/${model}:generateContent`,
|
|
52
|
-
{ method: 'POST', headers: this.buildHeaders(), body: JSON.stringify(body) },
|
|
53
|
-
this.retryOptions
|
|
54
|
-
)
|
|
55
|
-
|
|
56
|
-
const data: any = await response.json()
|
|
57
|
-
return this.parseResponse(data)
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
async *stream(request: CompletionRequest): AsyncIterable<StreamChunk> {
|
|
61
|
-
const model = request.model ?? this.defaultModel
|
|
62
|
-
const body = this.buildRequestBody(request, true)
|
|
63
|
-
|
|
64
|
-
const response = await retryableFetch(
|
|
65
|
-
'Google',
|
|
66
|
-
`${this.baseUrl}/models/${model}:streamGenerateContent`,
|
|
67
|
-
{ method: 'POST', headers: this.buildHeaders(), body: JSON.stringify(body) },
|
|
68
|
-
this.retryOptions
|
|
69
|
-
)
|
|
70
|
-
|
|
71
|
-
if (!response.body) {
|
|
72
|
-
throw new ExternalServiceError('Google', undefined, 'No stream body returned')
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
let currentToolIndex = -1
|
|
76
|
-
let currentToolCall: Partial<ToolCall> | null = null
|
|
77
|
-
|
|
78
|
-
for await (const sse of parseSSE(response.body)) {
|
|
79
|
-
if (sse.data === '[DONE]') {
|
|
80
|
-
yield { type: 'done' }
|
|
81
|
-
break
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
let parsed: any
|
|
85
|
-
try {
|
|
86
|
-
parsed = JSON.parse(sse.data)
|
|
87
|
-
} catch {
|
|
88
|
-
continue
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
const candidate = parsed.candidates?.[0]
|
|
92
|
-
if (!candidate) continue
|
|
93
|
-
|
|
94
|
-
// Process content parts if they exist
|
|
95
|
-
if (candidate.content?.parts) {
|
|
96
|
-
for (const part of candidate.content.parts) {
|
|
97
|
-
if (part.text) {
|
|
98
|
-
// Text content
|
|
99
|
-
yield { type: 'text', text: part.text }
|
|
100
|
-
} else if (part.functionCall) {
|
|
101
|
-
// Function call
|
|
102
|
-
if (currentToolCall === null) {
|
|
103
|
-
// Start of new tool call
|
|
104
|
-
currentToolIndex++
|
|
105
|
-
currentToolCall = {
|
|
106
|
-
id: part.functionCall.id || this.generateToolCallId(),
|
|
107
|
-
name: part.functionCall.name,
|
|
108
|
-
arguments: part.functionCall.args || {}
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
yield {
|
|
112
|
-
type: 'tool_start',
|
|
113
|
-
toolCall: {
|
|
114
|
-
id: currentToolCall.id,
|
|
115
|
-
name: currentToolCall.name
|
|
116
|
-
} as ToolCall,
|
|
117
|
-
toolIndex: currentToolIndex,
|
|
118
|
-
}
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
// If this is a complete function call, end it
|
|
122
|
-
if (part.functionCall.name && part.functionCall.args) {
|
|
123
|
-
yield { type: 'tool_end', toolIndex: currentToolIndex }
|
|
124
|
-
currentToolCall = null
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
}
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
// Check if this is the final chunk
|
|
131
|
-
if (candidate.finishReason) {
|
|
132
|
-
// Handle usage information in the final chunk
|
|
133
|
-
if (parsed.usageMetadata) {
|
|
134
|
-
const usage: Usage = {
|
|
135
|
-
inputTokens: parsed.usageMetadata.promptTokenCount ?? 0,
|
|
136
|
-
outputTokens: parsed.usageMetadata.candidatesTokenCount ?? 0,
|
|
137
|
-
totalTokens: parsed.usageMetadata.totalTokenCount ?? 0,
|
|
138
|
-
}
|
|
139
|
-
yield { type: 'usage', usage }
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
yield { type: 'done' }
|
|
143
|
-
break
|
|
144
|
-
}
|
|
145
|
-
}
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
async embed(input: string | string[], model?: string): Promise<EmbeddingResponse> {
|
|
149
|
-
const embeddingModel = model ?? 'text-embedding-004'
|
|
150
|
-
const inputs = Array.isArray(input) ? input : [input]
|
|
151
|
-
|
|
152
|
-
const requests = inputs.map(text => ({
|
|
153
|
-
model: `models/${embeddingModel}`,
|
|
154
|
-
content: {
|
|
155
|
-
parts: [{ text }]
|
|
156
|
-
}
|
|
157
|
-
}))
|
|
158
|
-
|
|
159
|
-
const embeddings: number[][] = []
|
|
160
|
-
|
|
161
|
-
// Process each input separately as Google's batch API might not be available
|
|
162
|
-
for (const request of requests) {
|
|
163
|
-
const response = await retryableFetch(
|
|
164
|
-
'Google',
|
|
165
|
-
`${this.baseUrl}/models/${embeddingModel}:embedContent`,
|
|
166
|
-
{ method: 'POST', headers: this.buildHeaders(), body: JSON.stringify(request) },
|
|
167
|
-
this.retryOptions
|
|
168
|
-
)
|
|
169
|
-
|
|
170
|
-
const data: any = await response.json()
|
|
171
|
-
if (data.embedding?.values) {
|
|
172
|
-
embeddings.push(data.embedding.values)
|
|
173
|
-
}
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
return {
|
|
177
|
-
embeddings,
|
|
178
|
-
model: embeddingModel,
|
|
179
|
-
usage: { totalTokens: inputs.length * 10 } // Rough estimate, Google doesn't provide token count for embeddings
|
|
180
|
-
}
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
/**
|
|
184
|
-
* Speech-to-text via Gemini's multimodal generateContent endpoint.
|
|
185
|
-
*
|
|
186
|
-
* Gemini doesn't have a dedicated STT endpoint; instead, audio is
|
|
187
|
-
* passed as an inline `audio/*` part alongside a text prompt asking
|
|
188
|
-
* for a transcription. We default to `gemini-2.5-flash` (fast, cheap,
|
|
189
|
-
* Thai-capable). Override `model` for `gemini-2.5-pro` when accuracy
|
|
190
|
-
* matters more than latency.
|
|
191
|
-
*
|
|
192
|
-
* Inline audio is capped at ~20MB across the whole request. Chunk
|
|
193
|
-
* longer recordings, or use Gemini's Files API (upload + reference)
|
|
194
|
-
* which isn't covered here — out of scope for the typical SME
|
|
195
|
-
* voice-note flow (<=60s clips).
|
|
196
|
-
*/
|
|
197
|
-
async transcribe(request: TranscribeRequest): Promise<TranscriptionResponse> {
|
|
198
|
-
const model = request.model ?? 'gemini-2.5-flash'
|
|
199
|
-
const contentType = request.contentType ?? 'audio/mpeg'
|
|
200
|
-
|
|
201
|
-
const bytes =
|
|
202
|
-
request.audio instanceof Blob
|
|
203
|
-
? new Uint8Array(await request.audio.arrayBuffer())
|
|
204
|
-
: request.audio
|
|
205
|
-
const base64 = encodeBase64(bytes)
|
|
206
|
-
|
|
207
|
-
const instruction = buildTranscriptionInstruction(request)
|
|
208
|
-
|
|
209
|
-
const body = {
|
|
210
|
-
contents: [
|
|
211
|
-
{
|
|
212
|
-
role: 'user',
|
|
213
|
-
parts: [
|
|
214
|
-
{ text: instruction },
|
|
215
|
-
{ inline_data: { mime_type: contentType, data: base64 } },
|
|
216
|
-
],
|
|
217
|
-
},
|
|
218
|
-
],
|
|
219
|
-
generationConfig: {
|
|
220
|
-
// Deterministic output for a transcription task.
|
|
221
|
-
temperature: 0,
|
|
222
|
-
},
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
const response = await retryableFetch(
|
|
226
|
-
'Google',
|
|
227
|
-
`${this.baseUrl}/models/${model}:generateContent`,
|
|
228
|
-
{ method: 'POST', headers: this.buildHeaders(), body: JSON.stringify(body) },
|
|
229
|
-
this.retryOptions
|
|
230
|
-
)
|
|
231
|
-
|
|
232
|
-
const data: any = await response.json()
|
|
233
|
-
const text = extractTranscript(data)
|
|
234
|
-
|
|
235
|
-
return {
|
|
236
|
-
text,
|
|
237
|
-
language: request.language,
|
|
238
|
-
raw: data,
|
|
239
|
-
}
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
// ── Private helpers ──────────────────────────────────────────────────────
|
|
243
|
-
|
|
244
|
-
private buildHeaders(): Record<string, string> {
|
|
245
|
-
return {
|
|
246
|
-
'content-type': 'application/json',
|
|
247
|
-
'x-goog-api-key': this.apiKey,
|
|
248
|
-
}
|
|
249
|
-
}
|
|
250
|
-
|
|
251
|
-
private buildRequestBody(request: CompletionRequest, stream: boolean): Record<string, unknown> {
|
|
252
|
-
const model = request.model ?? this.defaultModel
|
|
253
|
-
|
|
254
|
-
const body: Record<string, unknown> = {
|
|
255
|
-
contents: this.mapMessages(request.messages),
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
// Add system instruction if present
|
|
259
|
-
if (request.system) {
|
|
260
|
-
body.systemInstruction = {
|
|
261
|
-
parts: [{ text: request.system }]
|
|
262
|
-
}
|
|
263
|
-
}
|
|
264
|
-
|
|
265
|
-
// Generation config
|
|
266
|
-
const generationConfig: Record<string, unknown> = {}
|
|
267
|
-
|
|
268
|
-
if (request.maxTokens !== undefined) {
|
|
269
|
-
generationConfig.maxOutputTokens = request.maxTokens
|
|
270
|
-
} else if (this.defaultMaxTokens !== undefined) {
|
|
271
|
-
generationConfig.maxOutputTokens = this.defaultMaxTokens
|
|
272
|
-
}
|
|
273
|
-
|
|
274
|
-
if (request.temperature !== undefined) {
|
|
275
|
-
generationConfig.temperature = request.temperature
|
|
276
|
-
}
|
|
277
|
-
|
|
278
|
-
if (request.stopSequences?.length) {
|
|
279
|
-
generationConfig.stopSequences = request.stopSequences
|
|
280
|
-
}
|
|
281
|
-
|
|
282
|
-
// Structured output
|
|
283
|
-
if (request.schema) {
|
|
284
|
-
generationConfig.responseMimeType = 'application/json'
|
|
285
|
-
generationConfig.responseSchema = request.schema
|
|
286
|
-
}
|
|
287
|
-
|
|
288
|
-
if (Object.keys(generationConfig).length > 0) {
|
|
289
|
-
body.generationConfig = generationConfig
|
|
290
|
-
}
|
|
291
|
-
|
|
292
|
-
// Tools (function declarations)
|
|
293
|
-
if (request.tools?.length) {
|
|
294
|
-
body.tools = [{
|
|
295
|
-
functionDeclarations: request.tools.map(t => ({
|
|
296
|
-
name: t.name,
|
|
297
|
-
description: t.description,
|
|
298
|
-
parameters: t.parameters,
|
|
299
|
-
}))
|
|
300
|
-
}]
|
|
301
|
-
|
|
302
|
-
// Tool choice configuration
|
|
303
|
-
if (request.toolChoice) {
|
|
304
|
-
const toolConfig: Record<string, unknown> = {}
|
|
305
|
-
|
|
306
|
-
if (request.toolChoice === 'auto') {
|
|
307
|
-
toolConfig.functionCallingConfig = { mode: 'AUTO' }
|
|
308
|
-
} else if (request.toolChoice === 'required') {
|
|
309
|
-
toolConfig.functionCallingConfig = { mode: 'ANY' }
|
|
310
|
-
} else if (typeof request.toolChoice === 'object' && request.toolChoice.name) {
|
|
311
|
-
toolConfig.functionCallingConfig = {
|
|
312
|
-
mode: 'ANY',
|
|
313
|
-
allowedFunctionNames: [request.toolChoice.name]
|
|
314
|
-
}
|
|
315
|
-
}
|
|
316
|
-
|
|
317
|
-
if (Object.keys(toolConfig).length > 0) {
|
|
318
|
-
body.toolConfig = toolConfig
|
|
319
|
-
}
|
|
320
|
-
}
|
|
321
|
-
}
|
|
322
|
-
|
|
323
|
-
return body
|
|
324
|
-
}
|
|
325
|
-
|
|
326
|
-
private mapMessages(messages: Message[]): any[] {
|
|
327
|
-
const result: any[] = []
|
|
328
|
-
|
|
329
|
-
for (const msg of messages) {
|
|
330
|
-
if (msg.role === 'tool') {
|
|
331
|
-
// Tool results go as user messages with function response parts
|
|
332
|
-
// Get the function name from our mapping
|
|
333
|
-
const functionName = msg.toolCallId ? this.toolCallIdToNameMap.get(msg.toolCallId) : undefined
|
|
334
|
-
|
|
335
|
-
if (!functionName) {
|
|
336
|
-
throw new ExternalServiceError('Google', undefined, `No function name found for tool call ID: ${msg.toolCallId}`)
|
|
337
|
-
}
|
|
338
|
-
|
|
339
|
-
result.push({
|
|
340
|
-
role: 'user',
|
|
341
|
-
parts: [
|
|
342
|
-
{
|
|
343
|
-
functionResponse: {
|
|
344
|
-
name: functionName,
|
|
345
|
-
response: {
|
|
346
|
-
content: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content),
|
|
347
|
-
}
|
|
348
|
-
}
|
|
349
|
-
}
|
|
350
|
-
]
|
|
351
|
-
})
|
|
352
|
-
} else if (msg.role === 'assistant') {
|
|
353
|
-
const parts: any[] = []
|
|
354
|
-
|
|
355
|
-
// Add text content if present
|
|
356
|
-
const text = typeof msg.content === 'string' ? msg.content : ''
|
|
357
|
-
if (text) {
|
|
358
|
-
parts.push({ text })
|
|
359
|
-
}
|
|
360
|
-
|
|
361
|
-
// Add function call parts and track their IDs
|
|
362
|
-
if (msg.toolCalls?.length) {
|
|
363
|
-
for (const tc of msg.toolCalls) {
|
|
364
|
-
// Store the mapping for later use
|
|
365
|
-
this.toolCallIdToNameMap.set(tc.id, tc.name)
|
|
366
|
-
|
|
367
|
-
parts.push({
|
|
368
|
-
functionCall: {
|
|
369
|
-
name: tc.name,
|
|
370
|
-
args: tc.arguments,
|
|
371
|
-
}
|
|
372
|
-
})
|
|
373
|
-
}
|
|
374
|
-
}
|
|
375
|
-
|
|
376
|
-
result.push({
|
|
377
|
-
role: 'model', // Gemini uses 'model' instead of 'assistant'
|
|
378
|
-
parts
|
|
379
|
-
})
|
|
380
|
-
} else {
|
|
381
|
-
// User messages
|
|
382
|
-
result.push({
|
|
383
|
-
role: 'user',
|
|
384
|
-
parts: [{ text: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content) }]
|
|
385
|
-
})
|
|
386
|
-
}
|
|
387
|
-
}
|
|
388
|
-
|
|
389
|
-
return result
|
|
390
|
-
}
|
|
391
|
-
|
|
392
|
-
private parseResponse(data: any): CompletionResponse {
|
|
393
|
-
const candidate = data.candidates?.[0]
|
|
394
|
-
if (!candidate) {
|
|
395
|
-
throw new ExternalServiceError('Google', undefined, 'No candidates in response')
|
|
396
|
-
}
|
|
397
|
-
|
|
398
|
-
let content = ''
|
|
399
|
-
const toolCalls: ToolCall[] = []
|
|
400
|
-
|
|
401
|
-
// Extract content from parts
|
|
402
|
-
if (Array.isArray(candidate.content?.parts)) {
|
|
403
|
-
for (const part of candidate.content.parts) {
|
|
404
|
-
if (part.text) {
|
|
405
|
-
content += part.text
|
|
406
|
-
} else if (part.functionCall) {
|
|
407
|
-
toolCalls.push({
|
|
408
|
-
id: part.functionCall.id || this.generateToolCallId(),
|
|
409
|
-
name: part.functionCall.name,
|
|
410
|
-
arguments: part.functionCall.args || {},
|
|
411
|
-
})
|
|
412
|
-
}
|
|
413
|
-
}
|
|
414
|
-
}
|
|
415
|
-
|
|
416
|
-
const usage: Usage = {
|
|
417
|
-
inputTokens: data.usageMetadata?.promptTokenCount ?? 0,
|
|
418
|
-
outputTokens: data.usageMetadata?.candidatesTokenCount ?? 0,
|
|
419
|
-
totalTokens: data.usageMetadata?.totalTokenCount ?? 0,
|
|
420
|
-
}
|
|
421
|
-
|
|
422
|
-
let stopReason: CompletionResponse['stopReason'] = 'end'
|
|
423
|
-
|
|
424
|
-
// Check tool calls first, as Google may return STOP even with tool calls
|
|
425
|
-
if (toolCalls.length > 0) {
|
|
426
|
-
stopReason = 'tool_use'
|
|
427
|
-
} else {
|
|
428
|
-
switch (candidate.finishReason) {
|
|
429
|
-
case 'STOP':
|
|
430
|
-
stopReason = 'end'
|
|
431
|
-
break
|
|
432
|
-
case 'MAX_TOKENS':
|
|
433
|
-
stopReason = 'max_tokens'
|
|
434
|
-
break
|
|
435
|
-
case 'SAFETY':
|
|
436
|
-
case 'RECITATION':
|
|
437
|
-
stopReason = 'stop_sequence'
|
|
438
|
-
break
|
|
439
|
-
}
|
|
440
|
-
}
|
|
441
|
-
|
|
442
|
-
return {
|
|
443
|
-
id: data.candidates?.[0]?.id || this.generateResponseId(),
|
|
444
|
-
content,
|
|
445
|
-
toolCalls,
|
|
446
|
-
stopReason,
|
|
447
|
-
usage,
|
|
448
|
-
raw: data,
|
|
449
|
-
}
|
|
450
|
-
}
|
|
451
|
-
|
|
452
|
-
private generateToolCallId(): string {
|
|
453
|
-
return `tool_${Math.random().toString(36).substring(2, 15)}`
|
|
454
|
-
}
|
|
455
|
-
|
|
456
|
-
private generateResponseId(): string {
|
|
457
|
-
return `resp_${Math.random().toString(36).substring(2, 15)}`
|
|
458
|
-
}
|
|
459
|
-
}
|
|
460
|
-
|
|
461
|
-
function buildTranscriptionInstruction(request: TranscribeRequest): string {
|
|
462
|
-
const parts: string[] = [
|
|
463
|
-
'Transcribe the audio to text. Return only the transcription, without commentary, timestamps, or speaker labels.',
|
|
464
|
-
]
|
|
465
|
-
if (request.language) {
|
|
466
|
-
parts.push(`The audio is in ${request.language}. Preserve the original language in the output.`)
|
|
467
|
-
}
|
|
468
|
-
if (request.prompt) {
|
|
469
|
-
// Surface the priming hint to bias vocabulary (proper nouns, menu
|
|
470
|
-
// items, dialect markers). Kept inside the same system-style turn —
|
|
471
|
-
// Gemini doesn't have a separate "system_instruction" field that
|
|
472
|
-
// behaves differently for this use.
|
|
473
|
-
parts.push(`Context to help with vocabulary: ${request.prompt}`)
|
|
474
|
-
}
|
|
475
|
-
return parts.join(' ')
|
|
476
|
-
}
|
|
477
|
-
|
|
478
|
-
function extractTranscript(data: any): string {
|
|
479
|
-
const candidate = data?.candidates?.[0]
|
|
480
|
-
if (!candidate?.content?.parts) return ''
|
|
481
|
-
return candidate.content.parts
|
|
482
|
-
.map((part: any) => (typeof part?.text === 'string' ? part.text : ''))
|
|
483
|
-
.join('')
|
|
484
|
-
.trim()
|
|
485
|
-
}
|
|
486
|
-
|
|
487
|
-
function encodeBase64(bytes: Uint8Array): string {
|
|
488
|
-
// Node / Bun: use Buffer; falls back to atob/btoa in pure browser envs
|
|
489
|
-
// (not used in this codebase, but kept for parity with bun-types).
|
|
490
|
-
if (typeof Buffer !== 'undefined') {
|
|
491
|
-
return Buffer.from(bytes).toString('base64')
|
|
492
|
-
}
|
|
493
|
-
let binary = ''
|
|
494
|
-
for (const b of bytes) binary += String.fromCharCode(b)
|
|
495
|
-
return btoa(binary)
|
|
496
|
-
}
|