@strav/brain 1.0.0-alpha.17 → 1.0.0-alpha.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +4 -2
- package/src/agent_generate_result.ts +2 -0
- package/src/agent_result.ts +7 -0
- package/src/agent_runner.ts +80 -4
- package/src/brain_manager.ts +119 -2
- package/src/index.ts +20 -2
- package/src/mcp/client.ts +17 -0
- package/src/mcp/index.ts +1 -0
- package/src/mcp/pool.ts +106 -0
- package/src/mcp/resolve_mcp_tools.ts +25 -7
- package/src/persistence/brain_message.ts +34 -0
- package/src/persistence/brain_message_repository.ts +106 -0
- package/src/persistence/brain_store.ts +166 -0
- package/src/persistence/brain_suspended_run.ts +30 -0
- package/src/persistence/brain_suspended_run_repository.ts +68 -0
- package/src/persistence/brain_thread.ts +30 -0
- package/src/persistence/brain_thread_repository.ts +65 -0
- package/src/persistence/database_brain_store.ts +190 -0
- package/src/persistence/index.ts +48 -0
- package/src/persistence/schema/brain_message_schema.ts +61 -0
- package/src/persistence/schema/brain_suspended_run_schema.ts +58 -0
- package/src/persistence/schema/brain_thread_schema.ts +50 -0
- package/src/persistence/schema/index.ts +3 -0
- package/src/provider.ts +36 -1
- package/src/providers/anthropic_provider.ts +140 -23
- package/src/providers/gemini_provider.ts +55 -32
- package/src/providers/openai_compat_provider.ts +452 -23
- package/src/providers/openai_provider.ts +87 -32
- package/src/providers/openai_responses_provider.ts +365 -50
- package/src/suspended_run.ts +153 -0
- package/src/thread.ts +40 -1
- package/src/types.ts +110 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@strav/brain",
|
|
3
|
-
"version": "1.0.0-alpha.
|
|
3
|
+
"version": "1.0.0-alpha.19",
|
|
4
4
|
"description": "Strav AI module — unified Provider interface, BrainManager, threads, prompt caching, tools / agents / MCP. Anthropic + OpenAI providers; Gemini / DeepSeek follow.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./src/index.ts",
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
"exports": {
|
|
9
9
|
".": "./src/index.ts",
|
|
10
10
|
"./mcp": "./src/mcp/index.ts",
|
|
11
|
+
"./persistence": "./src/persistence/index.ts",
|
|
11
12
|
"./zod": "./src/zod/index.ts"
|
|
12
13
|
},
|
|
13
14
|
"files": [
|
|
@@ -24,7 +25,8 @@
|
|
|
24
25
|
"@anthropic-ai/sdk": "^0.100.0",
|
|
25
26
|
"@google/genai": "^2.7.0",
|
|
26
27
|
"@modelcontextprotocol/sdk": "^1.29.0",
|
|
27
|
-
"@strav/
|
|
28
|
+
"@strav/database": "1.0.0-alpha.19",
|
|
29
|
+
"@strav/kernel": "1.0.0-alpha.19",
|
|
28
30
|
"openai": "^6.0.0"
|
|
29
31
|
},
|
|
30
32
|
"peerDependencies": {
|
package/src/agent_result.ts
CHANGED
|
@@ -29,4 +29,11 @@ export interface AgentResult {
|
|
|
29
29
|
stopReason: string
|
|
30
30
|
/** Token usage summed across every model call in the loop. */
|
|
31
31
|
usage: ChatUsage
|
|
32
|
+
/**
|
|
33
|
+
* Final provider response id when the provider exposes stateful
|
|
34
|
+
* conversations (OpenAI Responses API). Captured from the last
|
|
35
|
+
* model turn so apps that persist the conversation can resume
|
|
36
|
+
* via `ChatOptions.previousResponseId`. Undefined elsewhere.
|
|
37
|
+
*/
|
|
38
|
+
responseId?: string
|
|
32
39
|
}
|
package/src/agent_runner.ts
CHANGED
|
@@ -26,8 +26,17 @@ import type { AgentStreamEvent } from './agent_stream_event.ts'
|
|
|
26
26
|
import type { BrainManager } from './brain_manager.ts'
|
|
27
27
|
import { BrainError } from './brain_error.ts'
|
|
28
28
|
import type { OutputSchema } from './output_schema.ts'
|
|
29
|
-
import type {
|
|
29
|
+
import type {
|
|
30
|
+
ChatOptions,
|
|
31
|
+
Message,
|
|
32
|
+
ToolUseBlock,
|
|
33
|
+
} from './types.ts'
|
|
30
34
|
import type { RunWithToolsOptions } from './provider.ts'
|
|
35
|
+
import type {
|
|
36
|
+
SuspendedRun,
|
|
37
|
+
SuspendedState,
|
|
38
|
+
ToolResultInput,
|
|
39
|
+
} from './suspended_run.ts'
|
|
31
40
|
|
|
32
41
|
/**
|
|
33
42
|
* Conditional return shape for `AgentRunner.run()`. With the default
|
|
@@ -42,16 +51,47 @@ import type { RunWithToolsOptions } from './provider.ts'
|
|
|
42
51
|
*/
|
|
43
52
|
export type AgentRunResult<T> = [T] extends [never] ? AgentResult : AgentGenerateResult<T>
|
|
44
53
|
|
|
45
|
-
|
|
54
|
+
/**
|
|
55
|
+
* Conditional return shape that flips when the runner has opted in
|
|
56
|
+
* to suspension via `.suspend(gate)`. The phantom `S` generic on
|
|
57
|
+
* `AgentRunner<T, S>` carries the bit; `S extends true` widens the
|
|
58
|
+
* union so callers must narrow with `isSuspended(...)` before
|
|
59
|
+
* touching `result.value` / `result.text`.
|
|
60
|
+
*/
|
|
61
|
+
export type AgentRunMaybeSuspended<T, S extends boolean> = [S] extends [true]
|
|
62
|
+
? AgentRunResult<T> | SuspendedRun
|
|
63
|
+
: AgentRunResult<T>
|
|
64
|
+
|
|
65
|
+
export class AgentRunner<T = never, S extends boolean = false> {
|
|
46
66
|
private prompt: string | undefined
|
|
47
67
|
private contextBag: Record<string, unknown> = {}
|
|
48
68
|
private schema: OutputSchema<T> | undefined
|
|
69
|
+
private suspendGate:
|
|
70
|
+
| ((call: ToolUseBlock, context?: Record<string, unknown>) => boolean | Promise<boolean>)
|
|
71
|
+
| undefined
|
|
49
72
|
|
|
50
73
|
constructor(
|
|
51
74
|
private readonly brain: BrainManager,
|
|
52
75
|
private readonly agent: Agent<unknown>,
|
|
53
76
|
) {}
|
|
54
77
|
|
|
78
|
+
/**
|
|
79
|
+
* Install a human-in-the-loop gate. Called before each tool
|
|
80
|
+
* execution inside the agent loop; when it returns `true`, the
|
|
81
|
+
* run pauses and `.run()` resolves with a `SuspendedRun` instead
|
|
82
|
+
* of `AgentResult`. Apps obtain results out-of-band and call
|
|
83
|
+
* `.resume(state, results)` to continue.
|
|
84
|
+
*
|
|
85
|
+
* Throws `BrainError` if the runner is also in structured-output
|
|
86
|
+
* mode (`.output(schema)`) — schema + suspend is a deferred slice.
|
|
87
|
+
*/
|
|
88
|
+
suspend(
|
|
89
|
+
gate: (call: ToolUseBlock, context?: Record<string, unknown>) => boolean | Promise<boolean>,
|
|
90
|
+
): AgentRunner<T, true> {
|
|
91
|
+
this.suspendGate = gate
|
|
92
|
+
return this as unknown as AgentRunner<T, true>
|
|
93
|
+
}
|
|
94
|
+
|
|
55
95
|
/** Set the user input. Required before `run()`. */
|
|
56
96
|
input(text: string): this {
|
|
57
97
|
this.prompt = text
|
|
@@ -127,10 +167,15 @@ export class AgentRunner<T = never> {
|
|
|
127
167
|
>
|
|
128
168
|
}
|
|
129
169
|
|
|
130
|
-
async run(): Promise<
|
|
170
|
+
async run(): Promise<AgentRunMaybeSuspended<T, S>> {
|
|
131
171
|
if (this.prompt === undefined) {
|
|
132
172
|
throw new BrainError('AgentRunner.run: input() must be called before run().')
|
|
133
173
|
}
|
|
174
|
+
if (this.suspendGate !== undefined && this.schema !== undefined) {
|
|
175
|
+
throw new BrainError(
|
|
176
|
+
'AgentRunner.run: `.suspend(...)` and `.output(schema)` cannot be combined in V1 — the schema variants don\'t yet model pause/resume. Run tools first with suspension, then call brain.generate(...) on the result for the structured summary.',
|
|
177
|
+
)
|
|
178
|
+
}
|
|
134
179
|
const messages: Message[] = [{ role: 'user', content: this.prompt }]
|
|
135
180
|
|
|
136
181
|
if (this.schema !== undefined) {
|
|
@@ -172,8 +217,39 @@ export class AgentRunner<T = never> {
|
|
|
172
217
|
context: this.contextBag,
|
|
173
218
|
}
|
|
174
219
|
if (this.agent.mcpServers.length > 0) options.mcpServers = this.agent.mcpServers
|
|
220
|
+
if (this.suspendGate !== undefined) options.shouldSuspend = this.suspendGate
|
|
175
221
|
const result = await this.brain.runTools(messages, this.agent.tools, options)
|
|
176
|
-
return result as
|
|
222
|
+
return result as AgentRunMaybeSuspended<T, S>
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
/**
|
|
226
|
+
* Resume a previously-suspended run. Takes the `SuspendedRun.state`
|
|
227
|
+
* snapshot and the results gathered for each `pendingToolCalls`
|
|
228
|
+
* entry; the loop continues from where it paused.
|
|
229
|
+
*
|
|
230
|
+
* The runner's `suspend()` gate carries over so the same
|
|
231
|
+
* approval logic applies to any further tool calls — pass a
|
|
232
|
+
* fresh gate via `suspend()` before `resume()` to change the
|
|
233
|
+
* policy.
|
|
234
|
+
*/
|
|
235
|
+
async resume(
|
|
236
|
+
state: SuspendedState,
|
|
237
|
+
results: readonly ToolResultInput[],
|
|
238
|
+
): Promise<AgentRunMaybeSuspended<T, true>> {
|
|
239
|
+
if (this.schema !== undefined) {
|
|
240
|
+
throw new BrainError(
|
|
241
|
+
'AgentRunner.resume: structured-output runners cannot be resumed in V1 — `.output(schema)` is incompatible with pause/resume.',
|
|
242
|
+
)
|
|
243
|
+
}
|
|
244
|
+
const options: RunWithToolsOptions = {
|
|
245
|
+
...this.buildChatOptions(),
|
|
246
|
+
maxIterations: this.agent.maxIterations,
|
|
247
|
+
context: this.contextBag,
|
|
248
|
+
}
|
|
249
|
+
if (this.agent.mcpServers.length > 0) options.mcpServers = this.agent.mcpServers
|
|
250
|
+
if (this.suspendGate !== undefined) options.shouldSuspend = this.suspendGate
|
|
251
|
+
const result = await this.brain.resumeTools(state, results, this.agent.tools, options)
|
|
252
|
+
return result as AgentRunMaybeSuspended<T, true>
|
|
177
253
|
}
|
|
178
254
|
|
|
179
255
|
private buildChatOptions(): ChatOptions {
|
package/src/brain_manager.ts
CHANGED
|
@@ -40,7 +40,12 @@ import type {
|
|
|
40
40
|
TranscribeOptions,
|
|
41
41
|
TranscribeResult,
|
|
42
42
|
} from './types.ts'
|
|
43
|
-
import type {
|
|
43
|
+
import type {
|
|
44
|
+
Provider,
|
|
45
|
+
RunWithToolsOptions,
|
|
46
|
+
RunWithToolsOptionsWithSuspend,
|
|
47
|
+
} from './provider.ts'
|
|
48
|
+
import { appendResumeResults, type SuspendedRun, type SuspendedState, type ToolResultInput } from './suspended_run.ts'
|
|
44
49
|
import type { Tool } from './tool.ts'
|
|
45
50
|
import { DEFAULT_TIERS } from './brain_config.ts'
|
|
46
51
|
|
|
@@ -152,11 +157,21 @@ export class BrainManager {
|
|
|
152
157
|
* implement `runWithTools` (V1: OpenAI / Gemini / DeepSeek providers
|
|
153
158
|
* don't yet — only `AnthropicProvider`).
|
|
154
159
|
*/
|
|
160
|
+
runTools(
|
|
161
|
+
input: string | readonly Message[],
|
|
162
|
+
tools: readonly Tool[],
|
|
163
|
+
options: RunWithToolsOptionsWithSuspend,
|
|
164
|
+
): Promise<AgentResult | SuspendedRun>
|
|
165
|
+
runTools(
|
|
166
|
+
input: string | readonly Message[],
|
|
167
|
+
tools: readonly Tool[],
|
|
168
|
+
options?: RunWithToolsOptions,
|
|
169
|
+
): Promise<AgentResult>
|
|
155
170
|
async runTools(
|
|
156
171
|
input: string | readonly Message[],
|
|
157
172
|
tools: readonly Tool[],
|
|
158
173
|
options: RunWithToolsOptions = {},
|
|
159
|
-
): Promise<AgentResult> {
|
|
174
|
+
): Promise<AgentResult | SuspendedRun> {
|
|
160
175
|
const provider = this.provider(options.provider)
|
|
161
176
|
if (!provider.runWithTools) {
|
|
162
177
|
throw new BrainError(
|
|
@@ -175,6 +190,42 @@ export class BrainManager {
|
|
|
175
190
|
return provider.runWithTools(messages, tools, resolved)
|
|
176
191
|
}
|
|
177
192
|
|
|
193
|
+
/**
|
|
194
|
+
* Resume a previously-suspended tool-use loop. Takes the
|
|
195
|
+
* `SuspendedRun.state` snapshot plus the results the integrator
|
|
196
|
+
* gathered for each `pendingToolCalls` entry; appends a `tool_result`
|
|
197
|
+
* block per entry; re-enters `runTools` so the model can continue
|
|
198
|
+
* (potentially suspending again on the next tool).
|
|
199
|
+
*
|
|
200
|
+
* Mid-batch invariant: every pending call MUST get a result —
|
|
201
|
+
* otherwise the provider rejects the next request because the
|
|
202
|
+
* assistant turn's `tool_use` blocks are no longer balanced.
|
|
203
|
+
* `resumeTools` throws `BrainError` when results are missing.
|
|
204
|
+
*
|
|
205
|
+
* The `previousResponseId` carried on the snapshot (when the
|
|
206
|
+
* provider supports stateful conversations) is threaded back via
|
|
207
|
+
* `options.previousResponseId` automatically — per-call
|
|
208
|
+
* `options.previousResponseId` wins if supplied explicitly.
|
|
209
|
+
*/
|
|
210
|
+
async resumeTools(
|
|
211
|
+
state: SuspendedState,
|
|
212
|
+
results: readonly ToolResultInput[],
|
|
213
|
+
tools: readonly Tool[],
|
|
214
|
+
options: RunWithToolsOptions = {},
|
|
215
|
+
): Promise<AgentResult | SuspendedRun> {
|
|
216
|
+
const resumed = appendResumeResults(state, results)
|
|
217
|
+
const merged: RunWithToolsOptions = { ...options }
|
|
218
|
+
if (merged.previousResponseId === undefined && state.responseId !== undefined) {
|
|
219
|
+
merged.previousResponseId = state.responseId
|
|
220
|
+
}
|
|
221
|
+
const out = await this.runTools(
|
|
222
|
+
resumed,
|
|
223
|
+
tools,
|
|
224
|
+
merged as RunWithToolsOptionsWithSuspend,
|
|
225
|
+
)
|
|
226
|
+
return mergeResumeCounters(out, state)
|
|
227
|
+
}
|
|
228
|
+
|
|
178
229
|
/**
|
|
179
230
|
* Streaming variant of `generateWithTools`. Yields
|
|
180
231
|
* `AgentStreamEvent<T>`s as the loop progresses; the terminal
|
|
@@ -189,6 +240,7 @@ export class BrainManager {
|
|
|
189
240
|
tools: readonly Tool[],
|
|
190
241
|
options: RunWithToolsOptions = {},
|
|
191
242
|
): AsyncIterable<AgentStreamEvent<T>> {
|
|
243
|
+
rejectShouldSuspend(options, 'streamGenerateWithTools')
|
|
192
244
|
const provider = this.provider(options.provider)
|
|
193
245
|
if (!provider.streamWithToolsAndSchema) {
|
|
194
246
|
throw new BrainError(
|
|
@@ -220,6 +272,7 @@ export class BrainManager {
|
|
|
220
272
|
tools: readonly Tool[],
|
|
221
273
|
options: RunWithToolsOptions = {},
|
|
222
274
|
): Promise<AgentGenerateResult<T>> {
|
|
275
|
+
rejectShouldSuspend(options, 'generateWithTools')
|
|
223
276
|
const provider = this.provider(options.provider)
|
|
224
277
|
if (!provider.runWithToolsAndSchema) {
|
|
225
278
|
throw new BrainError(
|
|
@@ -250,6 +303,7 @@ export class BrainManager {
|
|
|
250
303
|
tools: readonly Tool[],
|
|
251
304
|
options: RunWithToolsOptions = {},
|
|
252
305
|
): AsyncIterable<AgentStreamEvent> {
|
|
306
|
+
rejectShouldSuspend(options, 'streamTools')
|
|
253
307
|
const provider = this.provider(options.provider)
|
|
254
308
|
if (!provider.streamWithTools) {
|
|
255
309
|
throw new BrainError(
|
|
@@ -411,3 +465,66 @@ function normalizeInput(input: string | readonly Message[]): readonly Message[]
|
|
|
411
465
|
}
|
|
412
466
|
return input
|
|
413
467
|
}
|
|
468
|
+
|
|
469
|
+
/**
|
|
470
|
+
* V1 scope guard. `shouldSuspend` is wired only into the non-
|
|
471
|
+
* streaming `runWithTools` loop; the streaming and schema variants
|
|
472
|
+
* don't yet model pause / resume, so silently ignoring would be
|
|
473
|
+
* worse than throwing. Apps that need both should run tools first
|
|
474
|
+
* (suspending as needed), then call `generate` for the structured
|
|
475
|
+
* summary in a separate step.
|
|
476
|
+
*/
|
|
477
|
+
/**
|
|
478
|
+
* Carry forward the pre-suspension iteration count + token usage so
|
|
479
|
+
* `result.iterations` / `result.usage` reflect the full run, not
|
|
480
|
+
* just the post-resume portion. When the resumed call suspends
|
|
481
|
+
* again, the new state's iterations + usage also get the carry-
|
|
482
|
+
* forward so apps see a running total across an arbitrary number
|
|
483
|
+
* of suspension cycles.
|
|
484
|
+
*/
|
|
485
|
+
function mergeResumeCounters(
|
|
486
|
+
out: AgentResult | SuspendedRun,
|
|
487
|
+
state: SuspendedState,
|
|
488
|
+
): AgentResult | SuspendedRun {
|
|
489
|
+
// +1 accounts for the suspended round itself — at suspension time
|
|
490
|
+
// the loop hadn't yet incremented `iterations` (we paused mid-
|
|
491
|
+
// batch, before tool execution). Supplying results to resume
|
|
492
|
+
// effectively completes that round.
|
|
493
|
+
const carryIter = state.iterations + 1
|
|
494
|
+
if ('status' in out) {
|
|
495
|
+
return {
|
|
496
|
+
...out,
|
|
497
|
+
state: {
|
|
498
|
+
...out.state,
|
|
499
|
+
iterations: out.state.iterations + carryIter,
|
|
500
|
+
usage: addUsage(out.state.usage, state.usage),
|
|
501
|
+
},
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
return {
|
|
505
|
+
...out,
|
|
506
|
+
iterations: out.iterations + carryIter,
|
|
507
|
+
usage: addUsage(out.usage, state.usage),
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
function addUsage(
|
|
512
|
+
a: SuspendedState['usage'],
|
|
513
|
+
b: SuspendedState['usage'],
|
|
514
|
+
): SuspendedState['usage'] {
|
|
515
|
+
return {
|
|
516
|
+
inputTokens: a.inputTokens + b.inputTokens,
|
|
517
|
+
outputTokens: a.outputTokens + b.outputTokens,
|
|
518
|
+
cacheReadTokens: a.cacheReadTokens + b.cacheReadTokens,
|
|
519
|
+
cacheCreationTokens: a.cacheCreationTokens + b.cacheCreationTokens,
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
function rejectShouldSuspend(options: RunWithToolsOptions, entry: string): void {
|
|
524
|
+
if (options.shouldSuspend !== undefined) {
|
|
525
|
+
throw new BrainError(
|
|
526
|
+
`BrainManager.${entry}: \`shouldSuspend\` is only supported on \`runTools\` (the non-streaming + no-schema entrypoint) in V1. Run tools first with suspension, then call \`generate\` for the structured summary as a separate step.`,
|
|
527
|
+
{ context: { entry } },
|
|
528
|
+
)
|
|
529
|
+
}
|
|
530
|
+
}
|
package/src/index.ts
CHANGED
|
@@ -10,7 +10,11 @@
|
|
|
10
10
|
export { Agent } from './agent.ts'
|
|
11
11
|
export type { AgentGenerateResult } from './agent_generate_result.ts'
|
|
12
12
|
export type { AgentResult } from './agent_result.ts'
|
|
13
|
-
export {
|
|
13
|
+
export {
|
|
14
|
+
AgentRunner,
|
|
15
|
+
type AgentRunMaybeSuspended,
|
|
16
|
+
type AgentRunResult,
|
|
17
|
+
} from './agent_runner.ts'
|
|
14
18
|
export type { AgentStreamEvent } from './agent_stream_event.ts'
|
|
15
19
|
export {
|
|
16
20
|
type AnthropicProviderConfig,
|
|
@@ -33,6 +37,7 @@ export {
|
|
|
33
37
|
} from './brain_manager.ts'
|
|
34
38
|
export { BrainProvider } from './brain_provider.ts'
|
|
35
39
|
export { defineTool, type DefineToolSpec } from './define_tool.ts'
|
|
40
|
+
export { MCPClientPool, type MCPClientFactory } from './mcp/pool.ts'
|
|
36
41
|
export type { MCPServer, MCPServerToolConfig } from './mcp_server.ts'
|
|
37
42
|
export type { OutputSchema } from './output_schema.ts'
|
|
38
43
|
export { AnthropicProvider } from './providers/anthropic_provider.ts'
|
|
@@ -42,7 +47,18 @@ export { OllamaProvider } from './providers/ollama_provider.ts'
|
|
|
42
47
|
export { OpenAICompatProvider } from './providers/openai_compat_provider.ts'
|
|
43
48
|
export { OpenAIProvider } from './providers/openai_provider.ts'
|
|
44
49
|
export { OpenAIResponsesProvider } from './providers/openai_responses_provider.ts'
|
|
45
|
-
export type {
|
|
50
|
+
export type {
|
|
51
|
+
Provider,
|
|
52
|
+
RunWithToolsOptions,
|
|
53
|
+
RunWithToolsOptionsWithSuspend,
|
|
54
|
+
} from './provider.ts'
|
|
55
|
+
export {
|
|
56
|
+
appendResumeResults,
|
|
57
|
+
isSuspended,
|
|
58
|
+
type SuspendedRun,
|
|
59
|
+
type SuspendedState,
|
|
60
|
+
type ToolResultInput,
|
|
61
|
+
} from './suspended_run.ts'
|
|
46
62
|
export { Thread, type ThreadOptions, type ThreadState } from './thread.ts'
|
|
47
63
|
export type { Tool, ToolContext } from './tool.ts'
|
|
48
64
|
export { ToolExecutionError } from './tool_execution_error.ts'
|
|
@@ -50,6 +66,8 @@ export type {
|
|
|
50
66
|
ChatOptions,
|
|
51
67
|
ChatResult,
|
|
52
68
|
ChatUsage,
|
|
69
|
+
CompactConfig,
|
|
70
|
+
CompactionBlock,
|
|
53
71
|
ContentBlock,
|
|
54
72
|
AudioBlock,
|
|
55
73
|
AudioSource,
|
package/src/mcp/client.ts
CHANGED
|
@@ -66,6 +66,15 @@ export class MCPClient {
|
|
|
66
66
|
readonly server: MCPServer
|
|
67
67
|
private readonly _client: Client
|
|
68
68
|
private _connected = false
|
|
69
|
+
/**
|
|
70
|
+
* In-flight connect promise — set on the first concurrent
|
|
71
|
+
* `connect()` and cleared on settle. Subsequent callers that
|
|
72
|
+
* race against the first one await the same promise instead of
|
|
73
|
+
* each kicking off their own transport handshake. Necessary for
|
|
74
|
+
* pooled clients: a fresh `borrow()` followed by parallel
|
|
75
|
+
* `listTools()` + `callTool()` calls both hit the same connect.
|
|
76
|
+
*/
|
|
77
|
+
private _connecting: Promise<void> | undefined
|
|
69
78
|
private _transport: StreamableHTTPClientTransport | undefined
|
|
70
79
|
private _authProvider: StoreBackedOAuthProvider | undefined
|
|
71
80
|
|
|
@@ -87,6 +96,14 @@ export class MCPClient {
|
|
|
87
96
|
|
|
88
97
|
async connect(): Promise<void> {
|
|
89
98
|
if (this._connected) return
|
|
99
|
+
if (this._connecting) return this._connecting
|
|
100
|
+
this._connecting = this._doConnect().finally(() => {
|
|
101
|
+
this._connecting = undefined
|
|
102
|
+
})
|
|
103
|
+
return this._connecting
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
private async _doConnect(): Promise<void> {
|
|
90
107
|
const transport = this._buildTransport()
|
|
91
108
|
this._transport = transport
|
|
92
109
|
try {
|
package/src/mcp/index.ts
CHANGED
package/src/mcp/pool.ts
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `MCPClientPool` — long-lived, per-server `MCPClient` cache.
|
|
3
|
+
*
|
|
4
|
+
* Default `resolveMcpTools` flow constructs a fresh `MCPClient` per
|
|
5
|
+
* call to `runTools` / `runWithTools` / etc., handshakes the
|
|
6
|
+
* Streamable HTTP transport, lists tools, executes them, then
|
|
7
|
+
* closes the transport in a `finally`. For one-shot calls that's
|
|
8
|
+
* fine. For long-running agent workers — chat servers, background
|
|
9
|
+
* job processors — the per-call handshake adds noticeable
|
|
10
|
+
* latency and burns connection slots upstream.
|
|
11
|
+
*
|
|
12
|
+
* The pool keeps one connected `MCPClient` per `(server.name,
|
|
13
|
+
* server.url)` pair for the lifetime of the pool. `borrow(server)`
|
|
14
|
+
* returns the pooled client (lazily creating + connecting on
|
|
15
|
+
* first use). When the pool is in play, `resolveMcpTools` skips
|
|
16
|
+
* the per-call `close()` — the pool owns the lifetime — so
|
|
17
|
+
* subsequent calls reuse the existing transport.
|
|
18
|
+
*
|
|
19
|
+
* Apps own the pool's lifetime. Construct one at app boot, hand it
|
|
20
|
+
* to every provider (or to `BrainProvider` if using the DI
|
|
21
|
+
* helper), and call `pool.close()` on shutdown.
|
|
22
|
+
*
|
|
23
|
+
* ```ts
|
|
24
|
+
* const pool = new MCPClientPool()
|
|
25
|
+
*
|
|
26
|
+
* const openai = new OpenAIProvider(
|
|
27
|
+
* 'openai',
|
|
28
|
+
* { driver: 'openai', apiKey: ... },
|
|
29
|
+
* { mcpPool: pool },
|
|
30
|
+
* )
|
|
31
|
+
*
|
|
32
|
+
* // ... many runTools calls later, on graceful shutdown:
|
|
33
|
+
* await pool.close()
|
|
34
|
+
* ```
|
|
35
|
+
*
|
|
36
|
+
* Concurrency: `borrow()` is synchronous; `MCPClient.connect()`
|
|
37
|
+
* itself dedupes concurrent calls. Two parallel `runTools` calls
|
|
38
|
+
* sharing the same pooled client both await one handshake.
|
|
39
|
+
*
|
|
40
|
+
* Re-auth: when a borrowed client throws `MCPAuthRequiredError`,
|
|
41
|
+
* the pool keeps the (still un-authorized) client. Apps call
|
|
42
|
+
* `pool.evict(server)` after running `completeAuthorization` on
|
|
43
|
+
* a fresh client so subsequent borrows see the renewed state —
|
|
44
|
+
* or just reuse the same client the app authorized via the
|
|
45
|
+
* standard `MCPClient.completeAuthorization` flow.
|
|
46
|
+
*/
|
|
47
|
+
|
|
48
|
+
import type { MCPServer } from '../mcp_server.ts'
|
|
49
|
+
import { MCPClient } from './client.ts'
|
|
50
|
+
|
|
51
|
+
/** Internal — factory injection for tests. Defaults to `new MCPClient(server)`. */
|
|
52
|
+
export type MCPClientFactory = (server: MCPServer) => MCPClient
|
|
53
|
+
|
|
54
|
+
export class MCPClientPool {
|
|
55
|
+
private readonly clients: Map<string, MCPClient> = new Map()
|
|
56
|
+
private readonly factory: MCPClientFactory
|
|
57
|
+
|
|
58
|
+
constructor(factory: MCPClientFactory = (s) => new MCPClient(s)) {
|
|
59
|
+
this.factory = factory
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Return the pooled client for `server`, constructing + caching it on
|
|
64
|
+
* first call. The client is NOT eagerly connected — the first
|
|
65
|
+
* `listTools` / `callTool` invocation triggers `connect()` once.
|
|
66
|
+
*/
|
|
67
|
+
borrow(server: MCPServer): MCPClient {
|
|
68
|
+
const key = poolKey(server)
|
|
69
|
+
const existing = this.clients.get(key)
|
|
70
|
+
if (existing) return existing
|
|
71
|
+
const client = this.factory(server)
|
|
72
|
+
this.clients.set(key, client)
|
|
73
|
+
return client
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Drop the cached client for `server` and close its transport.
|
|
78
|
+
* Useful after the app re-authorizes an OAuth server, or after a
|
|
79
|
+
* transient failure where the connection state is suspect and a
|
|
80
|
+
* fresh handshake on next borrow is preferable.
|
|
81
|
+
*/
|
|
82
|
+
async evict(server: MCPServer): Promise<void> {
|
|
83
|
+
const key = poolKey(server)
|
|
84
|
+
const client = this.clients.get(key)
|
|
85
|
+
if (!client) return
|
|
86
|
+
this.clients.delete(key)
|
|
87
|
+
await client.close()
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/** Close every pooled client. Call on app shutdown. */
|
|
91
|
+
async close(): Promise<void> {
|
|
92
|
+
const all = [...this.clients.values()]
|
|
93
|
+
this.clients.clear()
|
|
94
|
+
await Promise.all(all.map((c) => c.close()))
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/** Whether the pool currently holds a client for `server`. Used by tests. */
|
|
98
|
+
has(server: MCPServer): boolean {
|
|
99
|
+
return this.clients.has(poolKey(server))
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/** Pool key: name + url, so two `MCPServer`s with the same name but different URLs don't collide. */
|
|
104
|
+
function poolKey(server: MCPServer): string {
|
|
105
|
+
return `${server.name}|${server.url}`
|
|
106
|
+
}
|
|
@@ -21,6 +21,7 @@
|
|
|
21
21
|
import type { MCPServer } from '../mcp_server.ts'
|
|
22
22
|
import type { Tool, ToolContext } from '../tool.ts'
|
|
23
23
|
import { MCPClient } from './client.ts'
|
|
24
|
+
import type { MCPClientPool } from './pool.ts'
|
|
24
25
|
|
|
25
26
|
export interface ResolvedMcpTools {
|
|
26
27
|
tools: Tool[]
|
|
@@ -30,6 +31,14 @@ export interface ResolvedMcpTools {
|
|
|
30
31
|
export interface ResolveMcpToolsOptions {
|
|
31
32
|
/** Override the client factory — tests inject mock clients per server here. */
|
|
32
33
|
clientFactory?(server: MCPServer): MCPClient
|
|
34
|
+
/**
|
|
35
|
+
* When set, clients are borrowed from the pool instead of being
|
|
36
|
+
* constructed fresh per call, and the returned `close` becomes a
|
|
37
|
+
* no-op — the pool owns the lifetime, and apps call
|
|
38
|
+
* `pool.close()` on shutdown. Mutually beneficial with
|
|
39
|
+
* `clientFactory` (tests pass a factory to the pool itself).
|
|
40
|
+
*/
|
|
41
|
+
pool?: MCPClientPool
|
|
33
42
|
}
|
|
34
43
|
|
|
35
44
|
const NAME_SEPARATOR = '__'
|
|
@@ -40,13 +49,16 @@ export async function resolveMcpTools(
|
|
|
40
49
|
): Promise<ResolvedMcpTools> {
|
|
41
50
|
const clients: MCPClient[] = []
|
|
42
51
|
const tools: Tool[] = []
|
|
52
|
+
const pooled = options.pool !== undefined
|
|
43
53
|
|
|
44
54
|
for (const server of servers) {
|
|
45
55
|
if (server.tools?.enabled === false) continue
|
|
46
|
-
const client = options.
|
|
47
|
-
? options.
|
|
48
|
-
:
|
|
49
|
-
|
|
56
|
+
const client = options.pool
|
|
57
|
+
? options.pool.borrow(server)
|
|
58
|
+
: options.clientFactory
|
|
59
|
+
? options.clientFactory(server)
|
|
60
|
+
: new MCPClient(server)
|
|
61
|
+
if (!pooled) clients.push(client)
|
|
50
62
|
|
|
51
63
|
const allowed = server.tools?.allowedTools
|
|
52
64
|
const allowedSet = allowed ? new Set(allowed) : null
|
|
@@ -60,9 +72,15 @@ export async function resolveMcpTools(
|
|
|
60
72
|
|
|
61
73
|
return {
|
|
62
74
|
tools,
|
|
63
|
-
close
|
|
64
|
-
|
|
65
|
-
|
|
75
|
+
// Pooled clients live across calls — `close` becomes a no-op
|
|
76
|
+
// and the pool owns the lifetime. Non-pooled clients close
|
|
77
|
+
// here so each `runWithTools` invocation cleans up its own
|
|
78
|
+
// transports.
|
|
79
|
+
close: pooled
|
|
80
|
+
? async () => {}
|
|
81
|
+
: async () => {
|
|
82
|
+
await Promise.all(clients.map((c) => c.close()))
|
|
83
|
+
},
|
|
66
84
|
}
|
|
67
85
|
}
|
|
68
86
|
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `BrainMessage` — the typed row of `brain_message`. One per turn.
|
|
3
|
+
*
|
|
4
|
+
* `content` mirrors `Message.content` — string for plain text or
|
|
5
|
+
* `ContentBlock[]` when the turn carries structured blocks
|
|
6
|
+
* (tool_use, tool_result, image, compaction, ...). JSONB hydration
|
|
7
|
+
* is automatic.
|
|
8
|
+
*
|
|
9
|
+
* Assistant turns carry `model` / `usage` / `stop_reason` /
|
|
10
|
+
* `response_id`; user turns leave them NULL. The repository's
|
|
11
|
+
* `appendTurn` helper writes the right shape per role.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { Model } from '@strav/database'
|
|
15
|
+
import type { ChatUsage, ContentBlock } from '../types.ts'
|
|
16
|
+
import { brainMessageSchema } from './schema/brain_message_schema.ts'
|
|
17
|
+
|
|
18
|
+
export type BrainMessageRole = 'user' | 'assistant'
|
|
19
|
+
|
|
20
|
+
export class BrainMessage extends Model {
|
|
21
|
+
static override readonly schema = brainMessageSchema
|
|
22
|
+
|
|
23
|
+
id!: string
|
|
24
|
+
tenant_id!: string
|
|
25
|
+
thread_id!: string
|
|
26
|
+
turn_index!: number
|
|
27
|
+
role!: BrainMessageRole
|
|
28
|
+
content!: string | ContentBlock[]
|
|
29
|
+
model!: string | null
|
|
30
|
+
usage!: ChatUsage | null
|
|
31
|
+
stop_reason!: string | null
|
|
32
|
+
response_id!: string | null
|
|
33
|
+
created_at!: Date
|
|
34
|
+
}
|