@strav/brain 0.4.30 → 1.0.0-alpha.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,569 +0,0 @@
1
- import { parseSSE } from '../utils/sse_parser.ts'
2
- import { retryableFetch, type RetryOptions } from '../utils/retry.ts'
3
- import { ExternalServiceError } from '@strav/kernel'
4
- import type {
5
- AIProvider,
6
- CompletionRequest,
7
- CompletionResponse,
8
- StreamChunk,
9
- EmbeddingResponse,
10
- ProviderConfig,
11
- Message,
12
- ToolCall,
13
- TranscribeRequest,
14
- TranscriptionResponse,
15
- Usage,
16
- } from '../types.ts'
17
-
18
- /**
19
- * OpenAI Chat Completions API provider.
20
- *
21
- * Also serves DeepSeek and any OpenAI-compatible API by setting `baseUrl`
22
- * in the provider config. Uses raw `fetch()`.
23
- */
24
- export class OpenAIProvider implements AIProvider {
25
- readonly name: string
26
- private apiKey: string
27
- private baseUrl: string
28
- private defaultModel: string
29
- private defaultMaxTokens?: number
30
- private retryOptions: RetryOptions
31
-
32
- constructor(config: ProviderConfig, name?: string) {
33
- this.name = name ?? 'openai'
34
- this.apiKey = config.apiKey
35
- this.baseUrl = (config.baseUrl ?? 'https://api.openai.com').replace(/\/$/, '')
36
- this.defaultModel = config.model
37
- this.defaultMaxTokens = config.maxTokens
38
- this.retryOptions = {
39
- maxRetries: config.maxRetries ?? 3,
40
- baseDelay: config.retryBaseDelay ?? 1000,
41
- }
42
- }
43
-
44
- /** Whether this provider supports OpenAI's native json_schema response format. */
45
- private get supportsJsonSchema(): boolean {
46
- return this.baseUrl === 'https://api.openai.com'
47
- }
48
-
49
- async complete(request: CompletionRequest): Promise<CompletionResponse> {
50
- const body = this.buildRequestBody(request, false)
51
-
52
- const response = await retryableFetch(
53
- 'OpenAI',
54
- `${this.baseUrl}/v1/chat/completions`,
55
- { method: 'POST', headers: this.buildHeaders(), body: JSON.stringify(body) },
56
- this.retryOptions
57
- )
58
-
59
- const data: any = await response.json()
60
- return this.parseResponse(data)
61
- }
62
-
63
- async *stream(request: CompletionRequest): AsyncIterable<StreamChunk> {
64
- const body = this.buildRequestBody(request, true)
65
-
66
- const response = await retryableFetch(
67
- 'OpenAI',
68
- `${this.baseUrl}/v1/chat/completions`,
69
- { method: 'POST', headers: this.buildHeaders(), body: JSON.stringify(body) },
70
- this.retryOptions
71
- )
72
-
73
- if (!response.body) {
74
- throw new ExternalServiceError('OpenAI', undefined, 'No stream body returned')
75
- }
76
-
77
- // Track in-progress tool calls for tool_start vs tool_delta distinction
78
- const seenTools = new Set<number>()
79
-
80
- for await (const sse of parseSSE(response.body)) {
81
- if (sse.data === '[DONE]') {
82
- yield { type: 'done' }
83
- break
84
- }
85
-
86
- let parsed: any
87
- try {
88
- parsed = JSON.parse(sse.data)
89
- } catch {
90
- continue
91
- }
92
-
93
- const choice = parsed.choices?.[0]
94
- if (!choice) continue
95
-
96
- const delta = choice.delta
97
- if (!delta) continue
98
-
99
- // Text content
100
- if (delta.content) {
101
- yield { type: 'text', text: delta.content }
102
- }
103
-
104
- // Tool calls
105
- if (delta.tool_calls) {
106
- for (const tc of delta.tool_calls) {
107
- const index: number = tc.index ?? 0
108
-
109
- if (!seenTools.has(index)) {
110
- // First chunk for this tool — emit tool_start
111
- seenTools.add(index)
112
- yield {
113
- type: 'tool_start',
114
- toolCall: { id: tc.id, name: tc.function?.name },
115
- toolIndex: index,
116
- }
117
- }
118
-
119
- // Argument fragments
120
- if (tc.function?.arguments) {
121
- yield {
122
- type: 'tool_delta',
123
- text: tc.function.arguments,
124
- toolIndex: index,
125
- }
126
- }
127
- }
128
- }
129
-
130
- // Finish reason
131
- if (choice.finish_reason) {
132
- if (choice.finish_reason === 'tool_calls') {
133
- // Emit tool_end for all tracked tools
134
- for (const idx of seenTools) {
135
- yield { type: 'tool_end', toolIndex: idx }
136
- }
137
- }
138
-
139
- // Usage in final chunk (if stream_options.include_usage is set)
140
- if (parsed.usage) {
141
- yield {
142
- type: 'usage',
143
- usage: {
144
- inputTokens: parsed.usage.prompt_tokens ?? 0,
145
- outputTokens: parsed.usage.completion_tokens ?? 0,
146
- totalTokens: parsed.usage.total_tokens ?? 0,
147
- },
148
- }
149
- }
150
- }
151
- }
152
- }
153
-
154
- async embed(input: string | string[], model?: string): Promise<EmbeddingResponse> {
155
- const body = {
156
- input: Array.isArray(input) ? input : [input],
157
- model: model ?? 'text-embedding-3-small',
158
- }
159
-
160
- const response = await retryableFetch(
161
- 'OpenAI',
162
- `${this.baseUrl}/v1/embeddings`,
163
- { method: 'POST', headers: this.buildHeaders(), body: JSON.stringify(body) },
164
- this.retryOptions
165
- )
166
-
167
- const data: any = await response.json()
168
-
169
- return {
170
- embeddings: data.data.map((d: any) => d.embedding),
171
- model: data.model,
172
- usage: { totalTokens: data.usage?.total_tokens ?? 0 },
173
- }
174
- }
175
-
176
- /**
177
- * Speech-to-text via the OpenAI Whisper API (/v1/audio/transcriptions).
178
- *
179
- * Defaults to `whisper-1` — the long-standing, broadly supported model.
180
- * Override with `gpt-4o-transcribe` or `gpt-4o-mini-transcribe` for the
181
- * newer architecture (better noise/accent robustness, similar pricing).
182
- *
183
- * Requests `verbose_json` so we can surface `language` and `duration`
184
- * on the normalized response without a second round-trip.
185
- */
186
- async transcribe(request: TranscribeRequest): Promise<TranscriptionResponse> {
187
- const filename = request.filename ?? defaultFilename(request.contentType)
188
- const contentType = request.contentType ?? 'application/octet-stream'
189
- const blob =
190
- request.audio instanceof Blob
191
- ? request.audio
192
- : new Blob([request.audio], { type: contentType })
193
-
194
- const form = new FormData()
195
- form.append('file', blob, filename)
196
- form.append('model', request.model ?? 'whisper-1')
197
- form.append('response_format', 'verbose_json')
198
- if (request.language) form.append('language', request.language)
199
- if (request.prompt) form.append('prompt', request.prompt)
200
-
201
- const response = await retryableFetch(
202
- 'OpenAI',
203
- `${this.baseUrl}/v1/audio/transcriptions`,
204
- {
205
- method: 'POST',
206
- // Don't set Content-Type — the runtime sets it with the
207
- // multipart boundary derived from the FormData body.
208
- headers: { Authorization: `Bearer ${this.apiKey}` },
209
- body: form,
210
- },
211
- this.retryOptions
212
- )
213
-
214
- const data: any = await response.json()
215
- return {
216
- text: String(data.text ?? ''),
217
- language: typeof data.language === 'string' ? data.language : undefined,
218
- duration: typeof data.duration === 'number' ? data.duration : undefined,
219
- raw: data,
220
- }
221
- }
222
-
223
- // ── Private helpers ──────────────────────────────────────────────────────
224
-
225
- private isReasoningModel(model: string): boolean {
226
- return /^(o[1-9]|gpt-5)/.test(model)
227
- }
228
-
229
- private usesMaxCompletionTokens(model: string): boolean {
230
- return this.isReasoningModel(model) || /^gpt-4\.1|gpt-4o-mini-2024/.test(model)
231
- }
232
-
233
- private buildHeaders(): Record<string, string> {
234
- return {
235
- 'content-type': 'application/json',
236
- authorization: `Bearer ${this.apiKey}`,
237
- }
238
- }
239
-
240
- private buildRequestBody(request: CompletionRequest, stream: boolean): Record<string, unknown> {
241
- const body: Record<string, unknown> = {
242
- model: request.model ?? this.defaultModel,
243
- messages: this.mapMessages(request.messages, request.system),
244
- }
245
-
246
- if (stream) body.stream = true
247
- if (request.maxTokens ?? this.defaultMaxTokens) {
248
- const tokens = request.maxTokens ?? this.defaultMaxTokens
249
- const model = (body.model as string) ?? ''
250
-
251
- if (this.usesMaxCompletionTokens(model)) {
252
- body.max_completion_tokens = tokens
253
- } else {
254
- body.max_tokens = tokens
255
- }
256
- }
257
- if (request.temperature !== undefined && !this.isReasoningModel((body.model as string) ?? '')) {
258
- body.temperature = request.temperature
259
- }
260
- if (request.stopSequences?.length) body.stop = request.stopSequences
261
-
262
- // Tools
263
- if (request.tools?.length) {
264
- body.tools = request.tools.map(t => ({
265
- type: 'function',
266
- function: {
267
- name: t.name,
268
- description: t.description,
269
- parameters: t.parameters,
270
- },
271
- }))
272
- }
273
-
274
- // Tool choice
275
- if (request.toolChoice) {
276
- if (typeof request.toolChoice === 'string') {
277
- body.tool_choice = request.toolChoice
278
- } else {
279
- body.tool_choice = {
280
- type: 'function',
281
- function: { name: request.toolChoice.name },
282
- }
283
- }
284
- }
285
-
286
- // Structured output
287
- if (request.schema) {
288
- const useStrict = this.supportsJsonSchema && this.isStrictCompatible(request.schema)
289
-
290
- if (useStrict) {
291
- body.response_format = {
292
- type: 'json_schema',
293
- json_schema: {
294
- name: 'response',
295
- schema: this.normalizeSchemaForOpenAI(request.schema),
296
- strict: true,
297
- },
298
- }
299
- } else {
300
- // Fallback: json_object mode with schema injected into system prompt
301
- body.response_format = { type: 'json_object' }
302
- const schemaHint = `\n\nYou MUST respond with valid JSON matching this schema:\n${JSON.stringify(request.schema, null, 2)}`
303
- const messages = body.messages as any[]
304
- if (messages[0]?.role === 'system') {
305
- messages[0].content += schemaHint
306
- } else {
307
- messages.unshift({ role: 'system', content: `Respond with valid JSON.${schemaHint}` })
308
- }
309
- }
310
- }
311
-
312
- return body
313
- }
314
-
315
- private mapMessages(messages: Message[], system?: string): any[] {
316
- const result: any[] = []
317
-
318
- // System prompt as first message
319
- if (system) {
320
- result.push({ role: 'system', content: system })
321
- }
322
-
323
- for (const msg of messages) {
324
- if (msg.role === 'tool') {
325
- result.push({
326
- role: 'tool',
327
- tool_call_id: msg.toolCallId,
328
- content: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content),
329
- })
330
- } else if (msg.role === 'assistant') {
331
- const mapped: any = {
332
- role: 'assistant',
333
- content: typeof msg.content === 'string' ? msg.content : null,
334
- }
335
-
336
- if (msg.toolCalls?.length) {
337
- mapped.tool_calls = msg.toolCalls.map(tc => ({
338
- id: tc.id,
339
- type: 'function',
340
- function: {
341
- name: tc.name,
342
- arguments: JSON.stringify(tc.arguments),
343
- },
344
- }))
345
- }
346
-
347
- result.push(mapped)
348
- } else {
349
- result.push({
350
- role: 'user',
351
- content: typeof msg.content === 'string' ? msg.content : msg.content,
352
- })
353
- }
354
- }
355
-
356
- return result
357
- }
358
-
359
- private parseResponse(data: any): CompletionResponse {
360
- const choice = data.choices?.[0]
361
- const message = choice?.message
362
-
363
- const content: string = message?.content ?? ''
364
- const toolCalls: ToolCall[] = []
365
-
366
- if (message?.tool_calls) {
367
- for (const tc of message.tool_calls) {
368
- let args: Record<string, unknown> = {}
369
- try {
370
- args = JSON.parse(tc.function.arguments)
371
- } catch {
372
- // Invalid JSON from the model — pass as-is in a wrapper
373
- args = { _raw: tc.function.arguments }
374
- }
375
-
376
- toolCalls.push({
377
- id: tc.id,
378
- name: tc.function.name,
379
- arguments: args,
380
- })
381
- }
382
- }
383
-
384
- const usage: Usage = {
385
- inputTokens: data.usage?.prompt_tokens ?? 0,
386
- outputTokens: data.usage?.completion_tokens ?? 0,
387
- totalTokens: data.usage?.total_tokens ?? 0,
388
- }
389
-
390
- let stopReason: CompletionResponse['stopReason'] = 'end'
391
- switch (choice?.finish_reason) {
392
- case 'tool_calls':
393
- stopReason = 'tool_use'
394
- break
395
- case 'length':
396
- stopReason = 'max_tokens'
397
- break
398
- case 'stop':
399
- stopReason = 'end'
400
- break
401
- }
402
-
403
- return {
404
- id: data.id ?? '',
405
- content,
406
- toolCalls,
407
- stopReason,
408
- usage,
409
- raw: data,
410
- }
411
- }
412
-
413
- /**
414
- * OpenAI's strict structured output requires:
415
- * - All properties listed in `required`
416
- * - Optional properties use nullable types instead
417
- * - `additionalProperties: false` on every object
418
- */
419
- /**
420
- * Check if a schema is compatible with OpenAI's strict structured output.
421
- * Record types (object with additionalProperties != false) are not supported.
422
- */
423
- private isStrictCompatible(schema: Record<string, unknown>): boolean {
424
- if (schema == null || typeof schema !== 'object') return true
425
-
426
- // Record type: object with additionalProperties that isn't false
427
- if (
428
- schema.type === 'object' &&
429
- schema.additionalProperties !== undefined &&
430
- schema.additionalProperties !== false
431
- ) {
432
- return false
433
- }
434
-
435
- // Check nested properties
436
- if (schema.properties) {
437
- for (const prop of Object.values(schema.properties as Record<string, any>)) {
438
- if (!this.isStrictCompatible(prop)) return false
439
- }
440
- }
441
-
442
- // Check array items
443
- if (schema.items && !this.isStrictCompatible(schema.items as Record<string, unknown>))
444
- return false
445
-
446
- // Check anyOf / oneOf
447
- for (const key of ['anyOf', 'oneOf'] as const) {
448
- if (Array.isArray(schema[key])) {
449
- for (const s of schema[key] as any[]) {
450
- if (!this.isStrictCompatible(s)) return false
451
- }
452
- }
453
- }
454
-
455
- return true
456
- }
457
-
458
- /** Keywords OpenAI strict mode does NOT support. */
459
- private static UNSUPPORTED_KEYWORDS = new Set([
460
- 'propertyNames',
461
- 'patternProperties',
462
- 'if',
463
- 'then',
464
- 'else',
465
- 'not',
466
- 'contains',
467
- 'minItems',
468
- 'maxItems',
469
- 'minProperties',
470
- 'maxProperties',
471
- 'minLength',
472
- 'maxLength',
473
- 'minimum',
474
- 'maximum',
475
- 'exclusiveMinimum',
476
- 'exclusiveMaximum',
477
- 'multipleOf',
478
- 'pattern',
479
- 'format',
480
- 'contentEncoding',
481
- 'contentMediaType',
482
- 'unevaluatedProperties',
483
- '$schema',
484
- ])
485
-
486
- private normalizeSchemaForOpenAI(schema: Record<string, unknown>): Record<string, unknown> {
487
- if (schema == null || typeof schema !== 'object') return schema
488
-
489
- // Strip unsupported keywords
490
- const result: Record<string, unknown> = {}
491
- for (const [k, v] of Object.entries(schema)) {
492
- if (!OpenAIProvider.UNSUPPORTED_KEYWORDS.has(k)) {
493
- result[k] = v
494
- }
495
- }
496
-
497
- // Handle object types with explicit properties
498
- if (result.type === 'object' && result.properties) {
499
- const props = result.properties as Record<string, any>
500
- const currentRequired = new Set(
501
- Array.isArray(result.required) ? (result.required as string[]) : []
502
- )
503
-
504
- const normalizedProps: Record<string, any> = {}
505
-
506
- for (const [key, prop] of Object.entries(props)) {
507
- let normalizedProp = this.normalizeSchemaForOpenAI(prop)
508
-
509
- // If property is not required, make it nullable and add to required
510
- if (!currentRequired.has(key)) {
511
- normalizedProp = this.makeNullable(normalizedProp)
512
- }
513
-
514
- normalizedProps[key] = normalizedProp
515
- }
516
-
517
- result.properties = normalizedProps
518
- result.required = Object.keys(normalizedProps)
519
- result.additionalProperties = false
520
- }
521
-
522
- // Handle arrays
523
- if (result.type === 'array' && result.items) {
524
- result.items = this.normalizeSchemaForOpenAI(result.items as Record<string, unknown>)
525
- }
526
-
527
- // Handle anyOf / oneOf
528
- for (const key of ['anyOf', 'oneOf'] as const) {
529
- if (Array.isArray(result[key])) {
530
- result[key] = (result[key] as any[]).map((s: any) => this.normalizeSchemaForOpenAI(s))
531
- }
532
- }
533
-
534
- return result
535
- }
536
-
537
- private makeNullable(schema: Record<string, unknown>): Record<string, unknown> {
538
- // Already nullable
539
- if (Array.isArray(schema.type) && schema.type.includes('null')) return schema
540
-
541
- // Has anyOf — add null variant
542
- if (Array.isArray(schema.anyOf)) {
543
- const hasNull = schema.anyOf.some((s: any) => s.type === 'null')
544
- if (!hasNull) {
545
- return { ...schema, anyOf: [...schema.anyOf, { type: 'null' }] }
546
- }
547
- return schema
548
- }
549
-
550
- // Simple type — wrap in anyOf with null
551
- if (schema.type) {
552
- const { type, ...rest } = schema
553
- return { anyOf: [{ type, ...rest }, { type: 'null' }] }
554
- }
555
-
556
- return schema
557
- }
558
- }
559
-
560
- /**
561
- * Choose a multipart filename for Whisper based on the content type.
562
- * Whisper sniffs the extension when no MIME is supplied; sending a name
563
- * that matches the actual format avoids "unsupported file" 400s.
564
- */
565
- function defaultFilename(contentType?: string): string {
566
- if (!contentType) return 'audio.bin'
567
- const ext = contentType.split('/')[1]?.split(';')[0]?.trim()
568
- return ext ? `audio.${ext}` : 'audio.bin'
569
- }