@strav/brain 1.0.0-alpha.9 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +23 -7
- package/src/agent.ts +43 -5
- package/src/agent_generate_result.ts +32 -0
- package/src/agent_result.ts +7 -0
- package/src/agent_runner.ts +218 -14
- package/src/agent_stream_event.ts +100 -0
- package/src/brain_config.ts +218 -1
- package/src/brain_driver.ts +247 -0
- package/src/brain_error.ts +86 -10
- package/src/brain_manager.ts +359 -11
- package/src/brain_provider.ts +79 -9
- package/src/drivers/anthropic/anthropic_brain_driver.ts +641 -0
- package/src/drivers/anthropic/anthropic_helpers.ts +65 -0
- package/src/drivers/anthropic/anthropic_message_builder.ts +258 -0
- package/src/drivers/anthropic/anthropic_response_mapper.ts +123 -0
- package/src/drivers/anthropic/anthropic_tool_loop.ts +246 -0
- package/src/drivers/anthropic/index.ts +1 -0
- package/src/drivers/deepseek/deepseek_brain_driver.ts +117 -0
- package/src/drivers/deepseek/index.ts +1 -0
- package/src/drivers/gemini/gemini_brain_driver.ts +1064 -0
- package/src/drivers/gemini/index.ts +1 -0
- package/src/drivers/minimax/index.ts +1 -0
- package/src/drivers/minimax/minimax_brain_driver.ts +84 -0
- package/src/drivers/ollama/index.ts +1 -0
- package/src/drivers/ollama/ollama_brain_driver.ts +86 -0
- package/src/drivers/openai/index.ts +1 -0
- package/src/drivers/openai/openai_brain_driver.ts +796 -0
- package/src/drivers/openai/openai_helpers.ts +58 -0
- package/src/drivers/openai/openai_message_builder.ts +187 -0
- package/src/drivers/openai/openai_response_mapper.ts +70 -0
- package/src/drivers/openai/openai_tool_dispatch.ts +127 -0
- package/src/drivers/openai/openai_tool_loop.ts +191 -0
- package/src/drivers/openai_compat/index.ts +1 -0
- package/src/drivers/openai_compat/openai_compat_brain_driver.ts +616 -0
- package/src/drivers/openai_responses/index.ts +1 -0
- package/src/drivers/openai_responses/openai_responses_brain_driver.ts +1015 -0
- package/src/drivers/openrouter/index.ts +1 -0
- package/src/drivers/openrouter/openrouter_brain_driver.ts +137 -0
- package/src/drivers/qwen/index.ts +1 -0
- package/src/drivers/qwen/qwen_brain_driver.ts +103 -0
- package/src/index.ts +75 -11
- package/src/mcp/client.ts +243 -0
- package/src/mcp/index.ts +23 -0
- package/src/mcp/oauth.ts +227 -0
- package/src/mcp/pool.ts +106 -0
- package/src/mcp/resolve_mcp_tools.ts +108 -0
- package/src/mcp_server.ts +63 -0
- package/src/output_schema.ts +72 -0
- package/src/persistence/brain_message.ts +34 -0
- package/src/persistence/brain_message_repository.ts +98 -0
- package/src/persistence/brain_store.ts +166 -0
- package/src/persistence/brain_suspended_run.ts +30 -0
- package/src/persistence/brain_suspended_run_repository.ts +59 -0
- package/src/persistence/brain_thread.ts +30 -0
- package/src/persistence/brain_thread_repository.ts +56 -0
- package/src/persistence/database_brain_store.ts +190 -0
- package/src/persistence/index.ts +48 -0
- package/src/persistence/schemas/brain_message_schema.ts +61 -0
- package/src/persistence/schemas/brain_suspended_run_schema.ts +58 -0
- package/src/persistence/schemas/brain_thread_schema.ts +50 -0
- package/src/persistence/schemas/index.ts +3 -0
- package/src/suspended_run.ts +153 -0
- package/src/thread.ts +40 -1
- package/src/tool.ts +7 -0
- package/src/tool_runner.ts +81 -0
- package/src/translate/index.ts +19 -0
- package/src/translate/translate_cache.ts +78 -0
- package/src/translate/translate_provider.ts +46 -0
- package/src/translate/translator.ts +271 -0
- package/src/types.ts +398 -1
- package/src/zod/index.ts +121 -0
- package/src/provider.ts +0 -74
- package/src/providers/anthropic_provider.ts +0 -397
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `brainMessageSchema` — one row per assistant or user turn within
|
|
3
|
+
* a thread. Append-only; rows are inserted in `turn_index` order
|
|
4
|
+
* and never updated (compaction blocks live as a regular assistant
|
|
5
|
+
* row whose `content` includes a `CompactionBlock`).
|
|
6
|
+
*
|
|
7
|
+
* Why per-turn rather than a JSONB blob on `brain_thread`:
|
|
8
|
+
*
|
|
9
|
+
* - **Pagination.** UIs render the latest N turns; queries select
|
|
10
|
+
* by `(thread_id, turn_index)` instead of parsing a JSON array.
|
|
11
|
+
* - **Per-turn metadata.** `model` / `usage` / `stop_reason` /
|
|
12
|
+
* `response_id` are indexed and queryable for cost analytics,
|
|
13
|
+
* audit, and routing (e.g., "which threads used gpt-5?").
|
|
14
|
+
* - **Append cost.** Each `send()` is a single INSERT, not a
|
|
15
|
+
* rewrite of the entire array.
|
|
16
|
+
*
|
|
17
|
+
* Columns:
|
|
18
|
+
*
|
|
19
|
+
* - `id` ULID primary key.
|
|
20
|
+
* - `thread_id` FK → `brain_thread`. `onDelete: cascade` —
|
|
21
|
+
* deleting a thread drops its history.
|
|
22
|
+
* - `turn_index` 0-based ordinal. Unique with `thread_id` (app
|
|
23
|
+
* migration adds the index).
|
|
24
|
+
* - `role` `user` or `assistant`. The framework's
|
|
25
|
+
* `Message.role` union; tool_result blocks land
|
|
26
|
+
* on user turns per the assistant ↔ user
|
|
27
|
+
* handshake, so `role` reflects that.
|
|
28
|
+
* - `content` JSONB — `string | ContentBlock[]`. Carries
|
|
29
|
+
* every typed block: text, image, document,
|
|
30
|
+
* audio, tool_use, tool_result, mcp_*, compaction.
|
|
31
|
+
* - `model` Model identifier used for assistant turns
|
|
32
|
+
* (NULL for user turns).
|
|
33
|
+
* - `usage` JSONB — `ChatUsage` for assistant turns.
|
|
34
|
+
* - `stop_reason` Provider terminal reason (`end_turn`, etc.).
|
|
35
|
+
* - `response_id` OpenAI Responses API id when surfaced. Indexed
|
|
36
|
+
* via partial index in the recommended migration.
|
|
37
|
+
* - `created_at` Timestamp.
|
|
38
|
+
*
|
|
39
|
+
* Archetype.Event — append-only semantics; no `updated_at`.
|
|
40
|
+
*/
|
|
41
|
+
|
|
42
|
+
import { Archetype, defineSchema } from '@strav/database'
|
|
43
|
+
import { brainThreadSchema } from './brain_thread_schema.ts'
|
|
44
|
+
|
|
45
|
+
export const brainMessageSchema = defineSchema(
|
|
46
|
+
'brain_message',
|
|
47
|
+
Archetype.Event,
|
|
48
|
+
(t) => {
|
|
49
|
+
t.id()
|
|
50
|
+
t.foreign('thread_id').to(brainThreadSchema).onDelete('cascade').notNull()
|
|
51
|
+
t.integer('turn_index').notNull()
|
|
52
|
+
t.enum('role', ['user', 'assistant']).notNull()
|
|
53
|
+
t.json('content').notNull()
|
|
54
|
+
t.string('model').max(128).nullable()
|
|
55
|
+
t.json('usage').nullable()
|
|
56
|
+
t.string('stop_reason').max(64).nullable()
|
|
57
|
+
t.string('response_id').max(128).nullable()
|
|
58
|
+
t.timestamp('created_at').notNull()
|
|
59
|
+
},
|
|
60
|
+
{ tenanted: true },
|
|
61
|
+
)
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `brainSuspendedRunSchema` — a paused agentic loop awaiting
|
|
3
|
+
* human-in-the-loop tool approval.
|
|
4
|
+
*
|
|
5
|
+
* Two real use cases drive the shape:
|
|
6
|
+
*
|
|
7
|
+
* 1. **Linked to a thread** — the suspending run was part of a
|
|
8
|
+
* conversational thread; the app wants the suspended state to
|
|
9
|
+
* reference its thread so the UI can show "thread X is paused
|
|
10
|
+
* waiting on Y." `thread_id` is the FK, nullable so detached
|
|
11
|
+
* runs are fine.
|
|
12
|
+
* 2. **Standalone** — the run came from a one-shot `runTools(...)`
|
|
13
|
+
* call (cron job, queued worker, ...). No thread context;
|
|
14
|
+
* `thread_id` stays NULL.
|
|
15
|
+
*
|
|
16
|
+
* Columns:
|
|
17
|
+
*
|
|
18
|
+
* - `id` ULID primary key. The id apps reference
|
|
19
|
+
* when resuming.
|
|
20
|
+
* - `thread_id` FK → `brain_thread`, NULLABLE,
|
|
21
|
+
* `onDelete: set null` — if the thread
|
|
22
|
+
* gets deleted, the suspended run keeps
|
|
23
|
+
* its data so the human approver can
|
|
24
|
+
* still inspect it.
|
|
25
|
+
* - `user_id` App-defined approver / owner.
|
|
26
|
+
* - `pending_tool_calls` JSONB — `ToolUseBlock[]` the model
|
|
27
|
+
* wants executed. Multi-call batches are
|
|
28
|
+
* captured together (mid-batch invariant).
|
|
29
|
+
* - `state` JSONB — `SuspendedState` snapshot. The
|
|
30
|
+
* framework's `brain.resumeTools(state,
|
|
31
|
+
* ...)` takes this as its first arg.
|
|
32
|
+
* - `status` `pending | resumed | cancelled`. Apps
|
|
33
|
+
* bulk-list pending runs and walk through
|
|
34
|
+
* an approval queue.
|
|
35
|
+
* - `timestamps` `created_at` for "how long pending?"
|
|
36
|
+
* sorts, `updated_at` for transition
|
|
37
|
+
* tracking.
|
|
38
|
+
*
|
|
39
|
+
* Tenanted: standard `tenant_id` + RLS.
|
|
40
|
+
*/
|
|
41
|
+
|
|
42
|
+
import { Archetype, defineSchema } from '@strav/database'
|
|
43
|
+
import { brainThreadSchema } from './brain_thread_schema.ts'
|
|
44
|
+
|
|
45
|
+
export const brainSuspendedRunSchema = defineSchema(
|
|
46
|
+
'brain_suspended_run',
|
|
47
|
+
Archetype.Entity,
|
|
48
|
+
(t) => {
|
|
49
|
+
t.id()
|
|
50
|
+
t.foreign('thread_id').to(brainThreadSchema).onDelete('set null').nullable()
|
|
51
|
+
t.string('user_id').max(64).nullable()
|
|
52
|
+
t.json('pending_tool_calls').notNull()
|
|
53
|
+
t.json('state').notNull()
|
|
54
|
+
t.enum('status', ['pending', 'resumed', 'cancelled']).notNull().default('pending')
|
|
55
|
+
t.timestamps()
|
|
56
|
+
},
|
|
57
|
+
{ tenanted: true },
|
|
58
|
+
)
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `brainThreadSchema` — one row per conversation.
|
|
3
|
+
*
|
|
4
|
+
* Carries the per-thread defaults that `Thread` already serializes
|
|
5
|
+
* (`system`, `options`, `lastResponseId`) plus a few framework-side
|
|
6
|
+
* fields apps want to filter / sort on:
|
|
7
|
+
*
|
|
8
|
+
* - `id` ULID primary key. Hand the same value back to
|
|
9
|
+
* `BrainStore.loadThread(id)` to rehydrate.
|
|
10
|
+
* - `user_id` App-defined owner. Stored as `text` (no FK) —
|
|
11
|
+
* user table shape varies per app. Indexed in
|
|
12
|
+
* the recommended migration so "list threads
|
|
13
|
+
* for user X" stays fast.
|
|
14
|
+
* - `title` Human label. Apps set it from the first user
|
|
15
|
+
* turn or via an explicit "rename" UI.
|
|
16
|
+
* - `system` Thread-owned system prompt. Mirrors
|
|
17
|
+
* `ThreadState.system`. JSONB so the structured
|
|
18
|
+
* form (text + cache flag) round-trips.
|
|
19
|
+
* - `options` Thread defaults applied to every `send()`.
|
|
20
|
+
* Mirrors `ThreadState.options`.
|
|
21
|
+
* - `last_response_id` OpenAI Responses API stateful pointer.
|
|
22
|
+
* Mirrors `ThreadState.lastResponseId`. NULL for
|
|
23
|
+
* non-Responses providers.
|
|
24
|
+
* - `timestamps` `created_at` + `updated_at` for sort / audit.
|
|
25
|
+
*
|
|
26
|
+
* Tenanted: `tenant_id` FK + RLS policies auto-injected by
|
|
27
|
+
* `@strav/database`. Apps wrap calls in `tenants.withTenant(...)`
|
|
28
|
+
* and the database enforces isolation — no app-level filter needed.
|
|
29
|
+
*
|
|
30
|
+
* The per-turn message history lives in `brain_message`, joined by
|
|
31
|
+
* `thread_id`. This keeps every send to an O(1) INSERT and makes
|
|
32
|
+
* pagination / per-turn analytics cheap.
|
|
33
|
+
*/
|
|
34
|
+
|
|
35
|
+
import { Archetype, defineSchema } from '@strav/database'
|
|
36
|
+
|
|
37
|
+
export const brainThreadSchema = defineSchema(
|
|
38
|
+
'brain_thread',
|
|
39
|
+
Archetype.Entity,
|
|
40
|
+
(t) => {
|
|
41
|
+
t.id()
|
|
42
|
+
t.string('user_id').max(64).nullable()
|
|
43
|
+
t.string('title').max(255).nullable()
|
|
44
|
+
t.json('system').nullable()
|
|
45
|
+
t.json('options').nullable()
|
|
46
|
+
t.string('last_response_id').max(128).nullable()
|
|
47
|
+
t.timestamps()
|
|
48
|
+
},
|
|
49
|
+
{ tenanted: true },
|
|
50
|
+
)
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `SuspendedRun` — what `runWithTools` (and `runner.run()`) returns
|
|
3
|
+
* when the agentic loop pauses because `shouldSuspend(call)` returned
|
|
4
|
+
* `true` for a tool the model wants to call.
|
|
5
|
+
*
|
|
6
|
+
* Use case: human-in-the-loop gating. The integrator inspects
|
|
7
|
+
* `pendingToolCalls`, obtains results out-of-band (human approval,
|
|
8
|
+
* external worker, queued job, ...), and calls
|
|
9
|
+
* `brain.resumeTools(state, results, ...)` or
|
|
10
|
+
* `runner.resume(state, results)` to continue the conversation.
|
|
11
|
+
*
|
|
12
|
+
* State model:
|
|
13
|
+
* - `state.messages` contains every message exchanged up to and
|
|
14
|
+
* including the assistant turn that requested the pending tool
|
|
15
|
+
* calls. Resume picks up by appending tool_result blocks for
|
|
16
|
+
* each pending call and re-entering the loop — no special
|
|
17
|
+
* provider-level resume hook is needed.
|
|
18
|
+
* - `state` is plain JSON — apps persist it across process
|
|
19
|
+
* boundaries (e.g., one row per pending agent run in Postgres).
|
|
20
|
+
*
|
|
21
|
+
* Mid-batch invariant: when a tool call in a multi-call batch
|
|
22
|
+
* triggers suspension, ALL remaining calls in that same batch are
|
|
23
|
+
* captured together in `pendingToolCalls`. Apps MUST supply results
|
|
24
|
+
* for every entry on resume; otherwise the provider's
|
|
25
|
+
* tool_use / tool_result pairing becomes unbalanced and the next
|
|
26
|
+
* model call rejects.
|
|
27
|
+
*/
|
|
28
|
+
|
|
29
|
+
import { BrainError } from './brain_error.ts'
|
|
30
|
+
import type {
|
|
31
|
+
ChatUsage,
|
|
32
|
+
ContentBlock,
|
|
33
|
+
Message,
|
|
34
|
+
ToolResultBlock,
|
|
35
|
+
ToolUseBlock,
|
|
36
|
+
} from './types.ts'
|
|
37
|
+
|
|
38
|
+
export interface SuspendedRun {
|
|
39
|
+
status: 'suspended'
|
|
40
|
+
/**
|
|
41
|
+
* The model's pending tool calls — the one that triggered the
|
|
42
|
+
* suspension, plus any unexecuted siblings from the same
|
|
43
|
+
* assistant turn. Match by `id` when supplying results.
|
|
44
|
+
*/
|
|
45
|
+
pendingToolCalls: ToolUseBlock[]
|
|
46
|
+
/** JSON-serializable snapshot of the loop state at the suspension point. */
|
|
47
|
+
state: SuspendedState
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export interface SuspendedState {
|
|
51
|
+
/** Full message history up to and including the suspending assistant turn. */
|
|
52
|
+
messages: Message[]
|
|
53
|
+
/** Iteration count at the suspension point — preserved across resume. */
|
|
54
|
+
iterations: number
|
|
55
|
+
/** Aggregated token usage across the iterations completed so far. */
|
|
56
|
+
usage: ChatUsage
|
|
57
|
+
/**
|
|
58
|
+
* Provider response id captured at the suspension point. When the
|
|
59
|
+
* provider supports stateful conversations (OpenAI Responses API),
|
|
60
|
+
* resume threads this back through `previousResponseId` so the
|
|
61
|
+
* model picks up exactly where it paused.
|
|
62
|
+
*/
|
|
63
|
+
responseId?: string
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Result of one pending tool call, supplied to `resumeTools`. The
|
|
68
|
+
* shape mirrors `ToolResultBlock` minus the `type` discriminator —
|
|
69
|
+
* the framework builds the block at resume time.
|
|
70
|
+
*
|
|
71
|
+
* To signal a failure (so the model adapts rather than crashing the
|
|
72
|
+
* loop), pass a string describing the error as `content` and set
|
|
73
|
+
* `isError: true`.
|
|
74
|
+
*/
|
|
75
|
+
export interface ToolResultInput {
|
|
76
|
+
toolUseId: string
|
|
77
|
+
content: string
|
|
78
|
+
isError?: boolean
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Type guard. Convenient at call sites that need to discriminate
|
|
83
|
+
* between a completed `AgentResult` and a `SuspendedRun`.
|
|
84
|
+
*
|
|
85
|
+
* ```ts
|
|
86
|
+
* const out = await brain.runTools(prompt, tools, { shouldSuspend })
|
|
87
|
+
* if (isSuspended(out)) {
|
|
88
|
+
* await persistForLater(out.pendingToolCalls, out.state)
|
|
89
|
+
* return
|
|
90
|
+
* }
|
|
91
|
+
* render(out.text)
|
|
92
|
+
* ```
|
|
93
|
+
*/
|
|
94
|
+
export function isSuspended(value: unknown): value is SuspendedRun {
|
|
95
|
+
return (
|
|
96
|
+
typeof value === 'object' &&
|
|
97
|
+
value !== null &&
|
|
98
|
+
(value as { status?: unknown }).status === 'suspended'
|
|
99
|
+
)
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Append a `tool_result` user-role message to `state.messages` that
|
|
104
|
+
* carries one block per supplied result. Validates that the pending
|
|
105
|
+
* tool_use ids referenced in the latest assistant turn are all
|
|
106
|
+
* covered — missing results throw `BrainError` so the next provider
|
|
107
|
+
* call doesn't fail with an opaque "tool_use without tool_result"
|
|
108
|
+
* upstream error.
|
|
109
|
+
*
|
|
110
|
+
* Exported for `BrainManager.resumeTools` / `AgentRunner.resume`;
|
|
111
|
+
* tests can use it directly to verify resume mechanics without
|
|
112
|
+
* round-tripping through a provider.
|
|
113
|
+
*/
|
|
114
|
+
export function appendResumeResults(
|
|
115
|
+
state: SuspendedState,
|
|
116
|
+
results: readonly ToolResultInput[],
|
|
117
|
+
): Message[] {
|
|
118
|
+
const pending = collectPendingIds(state.messages)
|
|
119
|
+
for (const id of pending) {
|
|
120
|
+
if (!results.some((r) => r.toolUseId === id)) {
|
|
121
|
+
throw new BrainError(
|
|
122
|
+
`resumeTools: missing result for pending tool call id "${id}". Every pending tool_use in the suspending assistant turn must be answered on resume.`,
|
|
123
|
+
{ context: { pendingIds: [...pending], suppliedIds: results.map((r) => r.toolUseId) } },
|
|
124
|
+
)
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
const resultBlocks: ContentBlock[] = results.map((r) => {
|
|
128
|
+
const block: ToolResultBlock = {
|
|
129
|
+
type: 'tool_result',
|
|
130
|
+
toolUseId: r.toolUseId,
|
|
131
|
+
content: r.content,
|
|
132
|
+
...(r.isError ? { isError: true } : {}),
|
|
133
|
+
}
|
|
134
|
+
return block
|
|
135
|
+
})
|
|
136
|
+
return [...state.messages, { role: 'user', content: resultBlocks }]
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Look at the latest assistant turn in `messages` and pull every
|
|
141
|
+
* tool_use block's id. Used to validate resume coverage.
|
|
142
|
+
*/
|
|
143
|
+
function collectPendingIds(messages: readonly Message[]): string[] {
|
|
144
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
145
|
+
const m = messages[i]!
|
|
146
|
+
if (m.role !== 'assistant') continue
|
|
147
|
+
if (typeof m.content === 'string') return []
|
|
148
|
+
return m.content
|
|
149
|
+
.filter((b): b is ToolUseBlock => b.type === 'tool_use')
|
|
150
|
+
.map((b) => b.id)
|
|
151
|
+
}
|
|
152
|
+
return []
|
|
153
|
+
}
|
package/src/thread.ts
CHANGED
|
@@ -35,6 +35,14 @@ export interface ThreadState {
|
|
|
35
35
|
messages: Message[]
|
|
36
36
|
system?: SystemPrompt
|
|
37
37
|
options?: ChatOptions
|
|
38
|
+
/**
|
|
39
|
+
* Last provider response id captured by `send(...)` — restored on
|
|
40
|
+
* `fromJSON` so subsequent sends thread it via
|
|
41
|
+
* `ChatOptions.previousResponseId` automatically. Only ever set
|
|
42
|
+
* when the underlying provider surfaces `responseId` (OpenAI
|
|
43
|
+
* Responses API today).
|
|
44
|
+
*/
|
|
45
|
+
lastResponseId?: string
|
|
38
46
|
}
|
|
39
47
|
|
|
40
48
|
export class Thread {
|
|
@@ -42,6 +50,13 @@ export class Thread {
|
|
|
42
50
|
readonly messages: Message[] = []
|
|
43
51
|
readonly system?: SystemPrompt
|
|
44
52
|
readonly options?: ChatOptions
|
|
53
|
+
/**
|
|
54
|
+
* Last response id returned by the provider on this thread. Used to
|
|
55
|
+
* thread stateful-conversation hints (OpenAI Responses API) into
|
|
56
|
+
* the next `send(...)` so apps don't have to manage it manually.
|
|
57
|
+
* `undefined` for providers that don't surface a response id.
|
|
58
|
+
*/
|
|
59
|
+
lastResponseId?: string
|
|
45
60
|
private readonly brain: BrainManager
|
|
46
61
|
|
|
47
62
|
constructor(brain: BrainManager, opts: ThreadOptions = {}) {
|
|
@@ -54,6 +69,11 @@ export class Thread {
|
|
|
54
69
|
* Append a user turn, call the model, append the assistant reply,
|
|
55
70
|
* and return the reply text. Per-call options override the
|
|
56
71
|
* thread's defaults; `system` always comes from the thread.
|
|
72
|
+
*
|
|
73
|
+
* When the underlying provider supports stateful conversations
|
|
74
|
+
* (OpenAI Responses API), `previousResponseId` is auto-threaded
|
|
75
|
+
* from the prior turn — apps don't need to manage it. Per-call
|
|
76
|
+
* `options.previousResponseId` wins if supplied explicitly.
|
|
57
77
|
*/
|
|
58
78
|
async send(text: string, options: ChatOptions = {}): Promise<string> {
|
|
59
79
|
this.messages.push({ role: 'user', content: text })
|
|
@@ -65,8 +85,25 @@ export class Thread {
|
|
|
65
85
|
// mid-thread by changing the system prompt every turn.
|
|
66
86
|
...(this.system !== undefined ? { system: this.system } : {}),
|
|
67
87
|
}
|
|
88
|
+
if (
|
|
89
|
+
merged.previousResponseId === undefined &&
|
|
90
|
+
this.lastResponseId !== undefined
|
|
91
|
+
) {
|
|
92
|
+
merged.previousResponseId = this.lastResponseId
|
|
93
|
+
}
|
|
68
94
|
const result = await this.brain.chat(this.messages, merged)
|
|
69
|
-
|
|
95
|
+
// Preserve structured assistant content when present (compaction
|
|
96
|
+
// blocks today; reasoning blocks later). Round-tripping these
|
|
97
|
+
// back to the provider on subsequent sends is what makes
|
|
98
|
+
// server-side compaction actually save tokens — once a turn
|
|
99
|
+
// carries a `compaction` block, the older raw turns drop out
|
|
100
|
+
// and the model only re-reads the summary.
|
|
101
|
+
if (result.content !== undefined && result.content.length > 0) {
|
|
102
|
+
this.messages.push({ role: 'assistant', content: result.content })
|
|
103
|
+
} else {
|
|
104
|
+
this.messages.push({ role: 'assistant', content: result.text })
|
|
105
|
+
}
|
|
106
|
+
if (result.responseId !== undefined) this.lastResponseId = result.responseId
|
|
70
107
|
return result.text
|
|
71
108
|
}
|
|
72
109
|
|
|
@@ -80,6 +117,7 @@ export class Thread {
|
|
|
80
117
|
const state: ThreadState = { messages: [...this.messages] }
|
|
81
118
|
if (this.system !== undefined) state.system = this.system
|
|
82
119
|
if (this.options !== undefined) state.options = this.options
|
|
120
|
+
if (this.lastResponseId !== undefined) state.lastResponseId = this.lastResponseId
|
|
83
121
|
return state
|
|
84
122
|
}
|
|
85
123
|
|
|
@@ -94,6 +132,7 @@ export class Thread {
|
|
|
94
132
|
if (state.options !== undefined) options.options = state.options
|
|
95
133
|
const thread = new Thread(brain, options)
|
|
96
134
|
for (const m of state.messages) thread.messages.push(m)
|
|
135
|
+
if (state.lastResponseId !== undefined) thread.lastResponseId = state.lastResponseId
|
|
97
136
|
return thread
|
|
98
137
|
}
|
|
99
138
|
}
|
package/src/tool.ts
CHANGED
|
@@ -23,6 +23,13 @@ export interface ToolContext {
|
|
|
23
23
|
readonly callId: string
|
|
24
24
|
/** Per-run free-form context bag passed by the caller. Optional. */
|
|
25
25
|
readonly context: Readonly<Record<string, unknown>>
|
|
26
|
+
/**
|
|
27
|
+
* Cancellation signal forwarded from the run's `options.signal`.
|
|
28
|
+
* Tools that wrap network calls (HTTP fetches, MCP servers, child
|
|
29
|
+
* processes) should pass this through so cancellation actually
|
|
30
|
+
* unwinds in-flight work.
|
|
31
|
+
*/
|
|
32
|
+
readonly signal?: AbortSignal
|
|
26
33
|
}
|
|
27
34
|
|
|
28
35
|
export interface Tool<TInput = unknown, TOutput = unknown> {
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `runToolWithRecovery` — shared helper used by every provider's
|
|
3
|
+
* agentic loop to execute one tool call.
|
|
4
|
+
*
|
|
5
|
+
* Encapsulates two error paths and the optional `onToolError`
|
|
6
|
+
* recovery callback:
|
|
7
|
+
*
|
|
8
|
+
* 1. **Tool not registered** — the model called a name that
|
|
9
|
+
* isn't in `toolMap`. Without recovery, throw
|
|
10
|
+
* `ToolExecutionError`. With recovery, the callback's return
|
|
11
|
+
* string becomes the `tool_result.content` (with `isError:
|
|
12
|
+
* true`) and the loop continues — the model sees "unknown
|
|
13
|
+
* tool" and adapts.
|
|
14
|
+
*
|
|
15
|
+
* 2. **`execute()` throws** — the tool's body raised. Same
|
|
16
|
+
* pattern: either rethrow as `ToolExecutionError` or feed
|
|
17
|
+
* back as an error result.
|
|
18
|
+
*
|
|
19
|
+
* The returned shape is the framework-agnostic `{ content, isError }`
|
|
20
|
+
* pair each provider then wraps into its own `tool_result` block
|
|
21
|
+
* shape (Anthropic `tool_result` with `is_error`; OpenAI tool-role
|
|
22
|
+
* message content; Gemini `functionResponse` with `{ error }`).
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
import type { RunWithToolsOptions } from './brain_driver.ts'
|
|
26
|
+
import type { Tool, ToolContext } from './tool.ts'
|
|
27
|
+
import { ToolExecutionError } from './tool_execution_error.ts'
|
|
28
|
+
|
|
29
|
+
export interface ToolRunResult {
|
|
30
|
+
content: string
|
|
31
|
+
isError: boolean
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export async function runToolWithRecovery(
|
|
35
|
+
tool: Tool | undefined,
|
|
36
|
+
toolName: string,
|
|
37
|
+
callId: string,
|
|
38
|
+
input: unknown,
|
|
39
|
+
options: RunWithToolsOptions,
|
|
40
|
+
): Promise<ToolRunResult> {
|
|
41
|
+
if (!tool) {
|
|
42
|
+
return recoverOrThrow(
|
|
43
|
+
new ToolExecutionError(
|
|
44
|
+
toolName,
|
|
45
|
+
callId,
|
|
46
|
+
new Error(`Tool "${toolName}" is not registered.`),
|
|
47
|
+
),
|
|
48
|
+
options,
|
|
49
|
+
)
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const ctx: ToolContext = {
|
|
53
|
+
callId,
|
|
54
|
+
context: options.context ?? {},
|
|
55
|
+
...(options.signal !== undefined ? { signal: options.signal } : {}),
|
|
56
|
+
}
|
|
57
|
+
let output: unknown
|
|
58
|
+
try {
|
|
59
|
+
output = await tool.execute(input, ctx)
|
|
60
|
+
} catch (cause) {
|
|
61
|
+
return recoverOrThrow(new ToolExecutionError(toolName, callId, cause), options)
|
|
62
|
+
}
|
|
63
|
+
return {
|
|
64
|
+
content: typeof output === 'string' ? output : JSON.stringify(output),
|
|
65
|
+
isError: false,
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Resolve a `ToolExecutionError` through the `onToolError` callback
|
|
71
|
+
* (when set) or rethrow. Used by providers for failures that happen
|
|
72
|
+
* outside `tool.execute` — e.g., OpenAI's JSON-parse-arguments path.
|
|
73
|
+
*/
|
|
74
|
+
export function recoverOrThrow(
|
|
75
|
+
error: ToolExecutionError,
|
|
76
|
+
options: RunWithToolsOptions,
|
|
77
|
+
): ToolRunResult {
|
|
78
|
+
const recovered = options.onToolError?.(error)
|
|
79
|
+
if (typeof recovered !== 'string') throw error
|
|
80
|
+
return { content: recovered, isError: true }
|
|
81
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
// Public API of `@strav/brain/translate`.
|
|
2
|
+
//
|
|
3
|
+
// LLM-backed translation primitive on top of `BrainManager`. Sonnet-
|
|
4
|
+
// uniform by default (tier='balanced'), with parallel fan-out across
|
|
5
|
+
// target languages, JSON-schema constrained output, prompt caching on
|
|
6
|
+
// the system prompt, and a process-local LRU for repeat strings.
|
|
7
|
+
|
|
8
|
+
export { TranslateCache, cacheKey } from './translate_cache.ts'
|
|
9
|
+
export {
|
|
10
|
+
type TranslateConfig,
|
|
11
|
+
TranslatorProvider,
|
|
12
|
+
} from './translate_provider.ts'
|
|
13
|
+
export {
|
|
14
|
+
type BatchTranslateOptions,
|
|
15
|
+
DEFAULT_SYSTEM_PROMPT,
|
|
16
|
+
type TranslateOptions,
|
|
17
|
+
Translator,
|
|
18
|
+
type TranslatorOptions,
|
|
19
|
+
} from './translator.ts'
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `TranslateCache` — tiny LRU keyed on `(model, from, to, text)`. Keeps
|
|
3
|
+
* repeated translations of the same phrase from hitting the model
|
|
4
|
+
* twice during a single fan-out (or during a batch where the same
|
|
5
|
+
* field value recurs across drafts).
|
|
6
|
+
*
|
|
7
|
+
* Intentionally in-memory + process-local. Apps that want persistent
|
|
8
|
+
* caching (e.g. across job retries / restarts) wrap their own
|
|
9
|
+
* Repository around `Translator` and call into it themselves; this
|
|
10
|
+
* cache exists to make the hot path cheap, not to be a system of
|
|
11
|
+
* record.
|
|
12
|
+
*
|
|
13
|
+
* Eviction is FIFO via Map insertion order — re-inserting on hit
|
|
14
|
+
* (`delete`+`set`) bumps the entry to the end so cold entries fall
|
|
15
|
+
* off first. `capacity: 0` disables the cache entirely.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
export class TranslateCache {
|
|
19
|
+
private readonly store = new Map<string, string>()
|
|
20
|
+
|
|
21
|
+
constructor(readonly capacity: number) {}
|
|
22
|
+
|
|
23
|
+
get(key: string): string | undefined {
|
|
24
|
+
if (this.capacity === 0) return undefined
|
|
25
|
+
const hit = this.store.get(key)
|
|
26
|
+
if (hit === undefined) return undefined
|
|
27
|
+
// Bump recency.
|
|
28
|
+
this.store.delete(key)
|
|
29
|
+
this.store.set(key, hit)
|
|
30
|
+
return hit
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
set(key: string, value: string): void {
|
|
34
|
+
if (this.capacity === 0) return
|
|
35
|
+
if (this.store.has(key)) this.store.delete(key)
|
|
36
|
+
this.store.set(key, value)
|
|
37
|
+
if (this.store.size > this.capacity) {
|
|
38
|
+
// Evict the oldest entry (the first key in insertion order).
|
|
39
|
+
const oldest = this.store.keys().next().value
|
|
40
|
+
if (oldest !== undefined) this.store.delete(oldest)
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
clear(): void {
|
|
45
|
+
this.store.clear()
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
get size(): number {
|
|
49
|
+
return this.store.size
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Stable cache key for a single (text → language) translation.
|
|
55
|
+
* Inputs are joined with a separator that can't appear in BCP-47
|
|
56
|
+
* codes; the text is hashed (FNV-1a 32-bit) to keep keys bounded.
|
|
57
|
+
*
|
|
58
|
+
* Collision risk on FNV-1a 32-bit is non-zero but acceptable for a
|
|
59
|
+
* best-effort cache: the cost of a collision is one extra LLM call
|
|
60
|
+
* the next time the loser's text hashes to the same key.
|
|
61
|
+
*/
|
|
62
|
+
export function cacheKey(input: {
|
|
63
|
+
model: string
|
|
64
|
+
from: string | undefined
|
|
65
|
+
to: string
|
|
66
|
+
text: string
|
|
67
|
+
}): string {
|
|
68
|
+
return `${input.model}|${input.from ?? 'auto'}|${input.to}|${fnv1a32(input.text)}`
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
function fnv1a32(text: string): string {
|
|
72
|
+
let hash = 0x811c9dc5
|
|
73
|
+
for (let i = 0; i < text.length; i++) {
|
|
74
|
+
hash ^= text.charCodeAt(i)
|
|
75
|
+
hash = (hash + ((hash << 1) + (hash << 4) + (hash << 7) + (hash << 8) + (hash << 24))) >>> 0
|
|
76
|
+
}
|
|
77
|
+
return hash.toString(16).padStart(8, '0')
|
|
78
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `TranslatorProvider` — `ServiceProvider` that binds a default
|
|
3
|
+
* `Translator` singleton resolved against the registered
|
|
4
|
+
* `BrainManager`.
|
|
5
|
+
*
|
|
6
|
+
* Reads `config.brain.translate` (optional) for defaults — provider,
|
|
7
|
+
* tier, model, cacheSize. Apps that need multiple translators with
|
|
8
|
+
* different defaults (e.g. one for headlines, one for body) skip the
|
|
9
|
+
* provider and construct `new Translator({ brain, ... })` directly.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { type Application, ConfigRepository, ServiceProvider } from '@strav/kernel'
|
|
13
|
+
import { BrainManager } from '../brain_manager.ts'
|
|
14
|
+
import type { ModelTier } from '../types.ts'
|
|
15
|
+
import { Translator } from './translator.ts'
|
|
16
|
+
|
|
17
|
+
export interface TranslateConfig {
|
|
18
|
+
provider?: string
|
|
19
|
+
tier?: ModelTier
|
|
20
|
+
model?: string
|
|
21
|
+
systemPrompt?: string
|
|
22
|
+
cacheSize?: number
|
|
23
|
+
cache?: boolean
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export class TranslatorProvider extends ServiceProvider {
|
|
27
|
+
override readonly name = 'brain.translate'
|
|
28
|
+
override readonly dependencies = ['brain']
|
|
29
|
+
|
|
30
|
+
override register(app: Application): void {
|
|
31
|
+
app.singleton(Translator, (c) => {
|
|
32
|
+
const brain = c.resolve(BrainManager)
|
|
33
|
+
const cfg =
|
|
34
|
+
(c.resolve(ConfigRepository).get('brain.translate') as TranslateConfig | undefined) ?? {}
|
|
35
|
+
return new Translator({
|
|
36
|
+
brain,
|
|
37
|
+
...(cfg.provider !== undefined ? { provider: cfg.provider } : {}),
|
|
38
|
+
...(cfg.tier !== undefined ? { tier: cfg.tier } : {}),
|
|
39
|
+
...(cfg.model !== undefined ? { model: cfg.model } : {}),
|
|
40
|
+
...(cfg.systemPrompt !== undefined ? { systemPrompt: cfg.systemPrompt } : {}),
|
|
41
|
+
...(cfg.cacheSize !== undefined ? { cacheSize: cfg.cacheSize } : {}),
|
|
42
|
+
...(cfg.cache !== undefined ? { cache: cfg.cache } : {}),
|
|
43
|
+
})
|
|
44
|
+
})
|
|
45
|
+
}
|
|
46
|
+
}
|