@strav/brain 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,101 @@
1
+ import type { Message, ContentBlock } from '../types.ts'
2
+
3
+ /** Known context window sizes by model identifier. */
4
+ const MODEL_CONTEXT_WINDOWS: Record<string, number> = {
5
+ // Anthropic
6
+ 'claude-opus-4-20250514': 200_000,
7
+ 'claude-sonnet-4-20250514': 200_000,
8
+ 'claude-sonnet-4-5-20250514': 200_000,
9
+ 'claude-haiku-3-5-20241022': 200_000,
10
+
11
+ // OpenAI
12
+ 'gpt-4o': 128_000,
13
+ 'gpt-4o-mini': 128_000,
14
+ 'gpt-4-turbo': 128_000,
15
+ 'gpt-4.1': 1_000_000,
16
+ 'gpt-4.1-mini': 1_000_000,
17
+ 'gpt-4.1-nano': 1_000_000,
18
+ o3: 200_000,
19
+ 'o3-mini': 200_000,
20
+ 'o4-mini': 200_000,
21
+
22
+ // DeepSeek
23
+ 'deepseek-chat': 64_000,
24
+ 'deepseek-reasoner': 64_000,
25
+ }
26
+
27
+ const DEFAULT_CONTEXT_WINDOW = 128_000
28
+
29
+ /** Overhead tokens per message (role, formatting, separators). */
30
+ const MESSAGE_OVERHEAD = 4
31
+
32
+ /** Average characters per token for estimation. */
33
+ const CHARS_PER_TOKEN = 4
34
+
35
+ /**
36
+ * Approximate token counting without external dependencies.
37
+ *
38
+ * Uses character-based estimation (~4 chars per token) which is
39
+ * conservative enough for budget management. Exact counts are not
40
+ * needed — we just need to know when we're approaching the limit.
41
+ */
42
+ export class TokenCounter {
43
+ /** Estimate token count for a string. */
44
+ static estimate(text: string): number {
45
+ if (!text) return 0
46
+ return Math.ceil(text.length / CHARS_PER_TOKEN)
47
+ }
48
+
49
+ /** Estimate token count for a Message array. */
50
+ static estimateMessages(messages: Message[]): number {
51
+ let total = 0
52
+
53
+ for (const msg of messages) {
54
+ total += MESSAGE_OVERHEAD
55
+ total += TokenCounter.estimateContent(msg.content)
56
+
57
+ if (msg.toolCalls) {
58
+ for (const call of msg.toolCalls) {
59
+ total += TokenCounter.estimate(call.name)
60
+ total += TokenCounter.estimate(JSON.stringify(call.arguments))
61
+ total += MESSAGE_OVERHEAD
62
+ }
63
+ }
64
+ }
65
+
66
+ return total
67
+ }
68
+
69
+ /** Get the context window size for a model, or the default fallback. */
70
+ static contextWindow(model: string): number {
71
+ // Exact match
72
+ if (MODEL_CONTEXT_WINDOWS[model] !== undefined) {
73
+ return MODEL_CONTEXT_WINDOWS[model]!
74
+ }
75
+
76
+ // Prefix match (e.g. 'claude-sonnet-4-20250514' matches 'claude-sonnet-4')
77
+ for (const [key, size] of Object.entries(MODEL_CONTEXT_WINDOWS)) {
78
+ if (model.startsWith(key) || key.startsWith(model)) {
79
+ return size
80
+ }
81
+ }
82
+
83
+ return DEFAULT_CONTEXT_WINDOW
84
+ }
85
+
86
+ /** Estimate tokens for message content (string or ContentBlock[]). */
87
+ private static estimateContent(content: string | ContentBlock[]): number {
88
+ if (typeof content === 'string') {
89
+ return TokenCounter.estimate(content)
90
+ }
91
+
92
+ let total = 0
93
+ for (const block of content) {
94
+ if (block.text) total += TokenCounter.estimate(block.text)
95
+ if (block.content) total += TokenCounter.estimate(block.content)
96
+ if (block.input) total += TokenCounter.estimate(JSON.stringify(block.input))
97
+ if (block.name) total += TokenCounter.estimate(block.name)
98
+ }
99
+ return total
100
+ }
101
+ }
@@ -0,0 +1,68 @@
1
+ import type { Message } from '../types.ts'
2
+
3
+ // ── Configuration ───────────────────────────────────────────────────────────
4
+
5
+ export interface MemoryConfig {
6
+ /** Max tokens for the entire context window (default: auto-detect from model). */
7
+ maxContextTokens?: number
8
+ /** Strategy: 'sliding_window' | 'summarize' (default: 'summarize'). */
9
+ strategy?: string
10
+ /** Reserve this fraction of context for the response (default: 0.25). */
11
+ responseReserve?: number
12
+ /** Min messages to keep in working memory before compacting (default: 4). */
13
+ minWorkingMessages?: number
14
+ /** Number of oldest messages to compact per cycle (default: 10). */
15
+ compactionBatchSize?: number
16
+ /** Enable semantic fact extraction during compaction (default: true). */
17
+ extractFacts?: boolean
18
+ }
19
+
20
+ // ── Compaction Strategy ─────────────────────────────────────────────────────
21
+
22
+ export interface CompactionResult {
23
+ /** Summary text replacing the compacted messages. */
24
+ summary: string
25
+ /** Facts extracted during compaction (if enabled). */
26
+ facts?: Fact[]
27
+ /** Token count of the summary. */
28
+ summaryTokens: number
29
+ }
30
+
31
+ export interface CompactionStrategy {
32
+ readonly name: string
33
+ compact(
34
+ messages: Message[],
35
+ options: { provider: string; model: string; existingSummary?: string; extractFacts?: boolean }
36
+ ): Promise<CompactionResult>
37
+ }
38
+
39
+ // ── Thread Store ────────────────────────────────────────────────────────────
40
+
41
+ export interface SerializedMemoryThread {
42
+ id: string
43
+ messages: Message[]
44
+ system?: string
45
+ summary?: string
46
+ facts?: Fact[]
47
+ metadata?: Record<string, unknown>
48
+ createdAt: string
49
+ updatedAt: string
50
+ }
51
+
52
+ export interface ThreadStore {
53
+ save(thread: SerializedMemoryThread): Promise<void>
54
+ load(id: string): Promise<SerializedMemoryThread | null>
55
+ delete(id: string): Promise<void>
56
+ list(options?: { limit?: number; offset?: number }): Promise<SerializedMemoryThread[]>
57
+ }
58
+
59
+ // ── Semantic Memory ─────────────────────────────────────────────────────────
60
+
61
+ export interface Fact {
62
+ key: string
63
+ value: string
64
+ source: 'extracted' | 'explicit'
65
+ confidence: number
66
+ createdAt: string
67
+ updatedAt: string
68
+ }
@@ -0,0 +1,276 @@
1
+ import { parseSSE } from '../utils/sse_parser.ts'
2
+ import { retryableFetch, type RetryOptions } from '../utils/retry.ts'
3
+ import { ExternalServiceError } from '@stravigor/kernel'
4
+ import type {
5
+ AIProvider,
6
+ CompletionRequest,
7
+ CompletionResponse,
8
+ StreamChunk,
9
+ ProviderConfig,
10
+ Message,
11
+ ToolCall,
12
+ Usage,
13
+ } from '../types.ts'
14
+
15
+ /**
16
+ * Anthropic Messages API provider.
17
+ *
18
+ * Translates the framework's normalized CompletionRequest/Response
19
+ * to/from the Anthropic wire format. Uses raw `fetch()`.
20
+ */
21
+ export class AnthropicProvider implements AIProvider {
22
+ readonly name: string
23
+ private apiKey: string
24
+ private baseUrl: string
25
+ private defaultModel: string
26
+ private defaultMaxTokens: number
27
+ private retryOptions: RetryOptions
28
+
29
+ constructor(config: ProviderConfig) {
30
+ this.name = 'anthropic'
31
+ this.apiKey = config.apiKey
32
+ this.baseUrl = (config.baseUrl ?? 'https://api.anthropic.com').replace(/\/$/, '')
33
+ this.defaultModel = config.model
34
+ this.defaultMaxTokens = config.maxTokens ?? 4096
35
+ this.retryOptions = {
36
+ maxRetries: config.maxRetries ?? 3,
37
+ baseDelay: config.retryBaseDelay ?? 1000,
38
+ }
39
+ }
40
+
41
+ async complete(request: CompletionRequest): Promise<CompletionResponse> {
42
+ const body = this.buildRequestBody(request, false)
43
+
44
+ const response = await retryableFetch(
45
+ 'Anthropic',
46
+ `${this.baseUrl}/v1/messages`,
47
+ { method: 'POST', headers: this.buildHeaders(), body: JSON.stringify(body) },
48
+ this.retryOptions
49
+ )
50
+
51
+ const data: any = await response.json()
52
+ return this.parseResponse(data)
53
+ }
54
+
55
+ async *stream(request: CompletionRequest): AsyncIterable<StreamChunk> {
56
+ const body = this.buildRequestBody(request, true)
57
+
58
+ const response = await retryableFetch(
59
+ 'Anthropic',
60
+ `${this.baseUrl}/v1/messages`,
61
+ { method: 'POST', headers: this.buildHeaders(), body: JSON.stringify(body) },
62
+ this.retryOptions
63
+ )
64
+
65
+ if (!response.body) {
66
+ throw new ExternalServiceError('Anthropic', undefined, 'No stream body returned')
67
+ }
68
+
69
+ let currentBlockIndex = -1
70
+
71
+ for await (const sse of parseSSE(response.body)) {
72
+ if (sse.data === '[DONE]') break
73
+
74
+ let parsed: any
75
+ try {
76
+ parsed = JSON.parse(sse.data)
77
+ } catch {
78
+ continue
79
+ }
80
+
81
+ const type = parsed.type ?? sse.event
82
+
83
+ if (type === 'content_block_start') {
84
+ currentBlockIndex = parsed.index ?? currentBlockIndex + 1
85
+ const block = parsed.content_block
86
+ if (block?.type === 'tool_use') {
87
+ yield {
88
+ type: 'tool_start',
89
+ toolCall: { id: block.id, name: block.name },
90
+ toolIndex: currentBlockIndex,
91
+ }
92
+ }
93
+ } else if (type === 'content_block_delta') {
94
+ const delta = parsed.delta
95
+ if (delta?.type === 'text_delta') {
96
+ yield { type: 'text', text: delta.text }
97
+ } else if (delta?.type === 'input_json_delta') {
98
+ yield {
99
+ type: 'tool_delta',
100
+ text: delta.partial_json,
101
+ toolIndex: parsed.index ?? currentBlockIndex,
102
+ }
103
+ }
104
+ } else if (type === 'content_block_stop') {
105
+ // If we were accumulating a tool call, signal end
106
+ if (currentBlockIndex >= 0) {
107
+ yield { type: 'tool_end', toolIndex: parsed.index ?? currentBlockIndex }
108
+ }
109
+ } else if (type === 'message_delta') {
110
+ const usage = parsed.usage
111
+ if (usage) {
112
+ yield {
113
+ type: 'usage',
114
+ usage: {
115
+ inputTokens: usage.input_tokens ?? 0,
116
+ outputTokens: usage.output_tokens ?? 0,
117
+ totalTokens: (usage.input_tokens ?? 0) + (usage.output_tokens ?? 0),
118
+ },
119
+ }
120
+ }
121
+ } else if (type === 'message_stop') {
122
+ yield { type: 'done' }
123
+ }
124
+ }
125
+ }
126
+
127
+ // ── Private helpers ──────────────────────────────────────────────────────
128
+
129
+ private buildHeaders(): Record<string, string> {
130
+ return {
131
+ 'content-type': 'application/json',
132
+ 'x-api-key': this.apiKey,
133
+ 'anthropic-version': '2023-06-01',
134
+ }
135
+ }
136
+
137
+ private buildRequestBody(request: CompletionRequest, stream: boolean): Record<string, unknown> {
138
+ const body: Record<string, unknown> = {
139
+ model: request.model ?? this.defaultModel,
140
+ max_tokens: request.maxTokens ?? this.defaultMaxTokens,
141
+ messages: this.mapMessages(request.messages),
142
+ }
143
+
144
+ if (stream) body.stream = true
145
+ if (request.system) body.system = request.system
146
+ if (request.temperature !== undefined) body.temperature = request.temperature
147
+ if (request.stopSequences?.length) body.stop_sequences = request.stopSequences
148
+
149
+ // Tools
150
+ if (request.tools?.length) {
151
+ body.tools = request.tools.map(t => ({
152
+ name: t.name,
153
+ description: t.description,
154
+ input_schema: t.parameters,
155
+ }))
156
+ }
157
+
158
+ // Tool choice
159
+ if (request.toolChoice) {
160
+ if (request.toolChoice === 'auto') {
161
+ body.tool_choice = { type: 'auto' }
162
+ } else if (request.toolChoice === 'required') {
163
+ body.tool_choice = { type: 'any' }
164
+ } else {
165
+ body.tool_choice = { type: 'tool', name: request.toolChoice.name }
166
+ }
167
+ }
168
+
169
+ // Structured output
170
+ if (request.schema) {
171
+ body.output_format = { type: 'json_schema', schema: request.schema }
172
+ }
173
+
174
+ return body
175
+ }
176
+
177
+ private mapMessages(messages: Message[]): any[] {
178
+ const result: any[] = []
179
+
180
+ for (const msg of messages) {
181
+ if (msg.role === 'tool') {
182
+ // Tool results go as user messages with tool_result content blocks
183
+ result.push({
184
+ role: 'user',
185
+ content: [
186
+ {
187
+ type: 'tool_result',
188
+ tool_use_id: msg.toolCallId,
189
+ content: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content),
190
+ },
191
+ ],
192
+ })
193
+ } else if (msg.role === 'assistant') {
194
+ const content: any[] = []
195
+
196
+ // Add text content if present
197
+ const text = typeof msg.content === 'string' ? msg.content : ''
198
+ if (text) {
199
+ content.push({ type: 'text', text })
200
+ }
201
+
202
+ // Add tool use blocks
203
+ if (msg.toolCalls?.length) {
204
+ for (const tc of msg.toolCalls) {
205
+ content.push({
206
+ type: 'tool_use',
207
+ id: tc.id,
208
+ name: tc.name,
209
+ input: tc.arguments,
210
+ })
211
+ }
212
+ }
213
+
214
+ result.push({
215
+ role: 'assistant',
216
+ content: content.length === 1 && content[0].type === 'text' ? content[0].text : content,
217
+ })
218
+ } else {
219
+ // User messages
220
+ result.push({
221
+ role: 'user',
222
+ content: typeof msg.content === 'string' ? msg.content : msg.content,
223
+ })
224
+ }
225
+ }
226
+
227
+ return result
228
+ }
229
+
230
+ private parseResponse(data: any): CompletionResponse {
231
+ let content = ''
232
+ const toolCalls: ToolCall[] = []
233
+
234
+ if (Array.isArray(data.content)) {
235
+ for (const block of data.content) {
236
+ if (block.type === 'text') {
237
+ content += block.text
238
+ } else if (block.type === 'tool_use') {
239
+ toolCalls.push({
240
+ id: block.id,
241
+ name: block.name,
242
+ arguments: block.input ?? {},
243
+ })
244
+ }
245
+ }
246
+ }
247
+
248
+ const usage: Usage = {
249
+ inputTokens: data.usage?.input_tokens ?? 0,
250
+ outputTokens: data.usage?.output_tokens ?? 0,
251
+ totalTokens: (data.usage?.input_tokens ?? 0) + (data.usage?.output_tokens ?? 0),
252
+ }
253
+
254
+ let stopReason: CompletionResponse['stopReason'] = 'end'
255
+ switch (data.stop_reason) {
256
+ case 'tool_use':
257
+ stopReason = 'tool_use'
258
+ break
259
+ case 'max_tokens':
260
+ stopReason = 'max_tokens'
261
+ break
262
+ case 'stop_sequence':
263
+ stopReason = 'stop_sequence'
264
+ break
265
+ }
266
+
267
+ return {
268
+ id: data.id ?? '',
269
+ content,
270
+ toolCalls,
271
+ stopReason,
272
+ usage,
273
+ raw: data,
274
+ }
275
+ }
276
+ }