npm - @strav/brain - Versions diffs - 1.0.0-alpha.16 → 1.0.0-alpha.18 - Mend

@strav/brain 1.0.0-alpha.16 → 1.0.0-alpha.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

package/package.json +4 -2
package/src/agent.ts +34 -5
package/src/agent_generate_result.ts +2 -0
package/src/agent_result.ts +7 -0
package/src/agent_runner.ts +134 -15
package/src/agent_stream_event.ts +100 -0
package/src/brain_config.ts +91 -1
package/src/brain_manager.ts +287 -6
package/src/brain_provider.ts +25 -1
package/src/index.ts +37 -2
package/src/mcp/client.ts +99 -13
package/src/mcp/index.ts +7 -0
package/src/mcp/oauth.ts +227 -0
package/src/mcp/pool.ts +106 -0
package/src/mcp/resolve_mcp_tools.ts +31 -9
package/src/mcp_server.ts +16 -0
package/src/persistence/brain_message.ts +34 -0
package/src/persistence/brain_message_repository.ts +106 -0
package/src/persistence/brain_store.ts +166 -0
package/src/persistence/brain_suspended_run.ts +30 -0
package/src/persistence/brain_suspended_run_repository.ts +68 -0
package/src/persistence/brain_thread.ts +30 -0
package/src/persistence/brain_thread_repository.ts +65 -0
package/src/persistence/database_brain_store.ts +190 -0
package/src/persistence/index.ts +48 -0
package/src/persistence/schema/brain_message_schema.ts +61 -0
package/src/persistence/schema/brain_suspended_run_schema.ts +58 -0
package/src/persistence/schema/brain_thread_schema.ts +50 -0
package/src/persistence/schema/index.ts +3 -0
package/src/provider.ts +145 -1
package/src/providers/anthropic_provider.ts +723 -38
package/src/providers/deepseek_provider.ts +117 -0
package/src/providers/gemini_provider.ts +625 -33
package/src/providers/ollama_provider.ts +86 -0
package/src/providers/openai_compat_provider.ts +616 -0
package/src/providers/openai_provider.ts +801 -43
package/src/providers/openai_responses_provider.ts +1015 -0
package/src/suspended_run.ts +153 -0
package/src/thread.ts +40 -1
package/src/tool.ts +7 -0
package/src/tool_runner.ts +81 -0
package/src/types.ts +343 -0

package/src/suspended_run.ts ADDED Viewed

@@ -0,0 +1,153 @@
+/**
+ * `SuspendedRun` — what `runWithTools` (and `runner.run()`) returns
+ * when the agentic loop pauses because `shouldSuspend(call)` returned
+ * `true` for a tool the model wants to call.
+ *
+ * Use case: human-in-the-loop gating. The integrator inspects
+ * `pendingToolCalls`, obtains results out-of-band (human approval,
+ * external worker, queued job, ...), and calls
+ * `brain.resumeTools(state, results, ...)` or
+ * `runner.resume(state, results)` to continue the conversation.
+ *
+ * State model:
+ *   - `state.messages` contains every message exchanged up to and
+ *     including the assistant turn that requested the pending tool
+ *     calls. Resume picks up by appending tool_result blocks for
+ *     each pending call and re-entering the loop — no special
+ *     provider-level resume hook is needed.
+ *   - `state` is plain JSON — apps persist it across process
+ *     boundaries (e.g., one row per pending agent run in Postgres).
+ *
+ * Mid-batch invariant: when a tool call in a multi-call batch
+ * triggers suspension, ALL remaining calls in that same batch are
+ * captured together in `pendingToolCalls`. Apps MUST supply results
+ * for every entry on resume; otherwise the provider's
+ * tool_use / tool_result pairing becomes unbalanced and the next
+ * model call rejects.
+ */
+import { BrainError } from './brain_error.ts'
+import type {
+  ChatUsage,
+  ContentBlock,
+  Message,
+  ToolResultBlock,
+  ToolUseBlock,
+} from './types.ts'
+export interface SuspendedRun {
+  status: 'suspended'
+  /**
+   * The model's pending tool calls — the one that triggered the
+   * suspension, plus any unexecuted siblings from the same
+   * assistant turn. Match by `id` when supplying results.
+   */
+  pendingToolCalls: ToolUseBlock[]
+  /** JSON-serializable snapshot of the loop state at the suspension point. */
+  state: SuspendedState
+}
+export interface SuspendedState {
+  /** Full message history up to and including the suspending assistant turn. */
+  messages: Message[]
+  /** Iteration count at the suspension point — preserved across resume. */
+  iterations: number
+  /** Aggregated token usage across the iterations completed so far. */
+  usage: ChatUsage
+  /**
+   * Provider response id captured at the suspension point. When the
+   * provider supports stateful conversations (OpenAI Responses API),
+   * resume threads this back through `previousResponseId` so the
+   * model picks up exactly where it paused.
+   */
+  responseId?: string
+}
+/**
+ * Result of one pending tool call, supplied to `resumeTools`. The
+ * shape mirrors `ToolResultBlock` minus the `type` discriminator —
+ * the framework builds the block at resume time.
+ *
+ * To signal a failure (so the model adapts rather than crashing the
+ * loop), pass a string describing the error as `content` and set
+ * `isError: true`.
+ */
+export interface ToolResultInput {
+  toolUseId: string
+  content: string
+  isError?: boolean
+}
+/**
+ * Type guard. Convenient at call sites that need to discriminate
+ * between a completed `AgentResult` and a `SuspendedRun`.
+ *
+ * ```ts
+ * const out = await brain.runTools(prompt, tools, { shouldSuspend })
+ * if (isSuspended(out)) {
+ *   await persistForLater(out.pendingToolCalls, out.state)
+ *   return
+ * }
+ * render(out.text)
+ * ```
+ */
+export function isSuspended(value: unknown): value is SuspendedRun {
+  return (
+    typeof value === 'object' &&
+    value !== null &&
+    (value as { status?: unknown }).status === 'suspended'
+  )
+}
+/**
+ * Append a `tool_result` user-role message to `state.messages` that
+ * carries one block per supplied result. Validates that the pending
+ * tool_use ids referenced in the latest assistant turn are all
+ * covered — missing results throw `BrainError` so the next provider
+ * call doesn't fail with an opaque "tool_use without tool_result"
+ * upstream error.
+ *
+ * Exported for `BrainManager.resumeTools` / `AgentRunner.resume`;
+ * tests can use it directly to verify resume mechanics without
+ * round-tripping through a provider.
+ */
+export function appendResumeResults(
+  state: SuspendedState,
+  results: readonly ToolResultInput[],
+): Message[] {
+  const pending = collectPendingIds(state.messages)
+  for (const id of pending) {
+    if (!results.some((r) => r.toolUseId === id)) {
+      throw new BrainError(
+        `resumeTools: missing result for pending tool call id "${id}". Every pending tool_use in the suspending assistant turn must be answered on resume.`,
+        { context: { pendingIds: [...pending], suppliedIds: results.map((r) => r.toolUseId) } },
+      )
+    }
+  }
+  const resultBlocks: ContentBlock[] = results.map((r) => {
+    const block: ToolResultBlock = {
+      type: 'tool_result',
+      toolUseId: r.toolUseId,
+      content: r.content,
+      ...(r.isError ? { isError: true } : {}),
+    }
+    return block
+  })
+  return [...state.messages, { role: 'user', content: resultBlocks }]
+}
+/**
+ * Look at the latest assistant turn in `messages` and pull every
+ * tool_use block's id. Used to validate resume coverage.
+ */
+function collectPendingIds(messages: readonly Message[]): string[] {
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const m = messages[i]!
+    if (m.role !== 'assistant') continue
+    if (typeof m.content === 'string') return []
+    return m.content
+      .filter((b): b is ToolUseBlock => b.type === 'tool_use')
+      .map((b) => b.id)
+  }
+  return []
+}

package/src/thread.ts CHANGED Viewed

@@ -35,6 +35,14 @@ export interface ThreadState {
   messages: Message[]
   system?: SystemPrompt
   options?: ChatOptions
+  /**
+   * Last provider response id captured by `send(...)` — restored on
+   * `fromJSON` so subsequent sends thread it via
+   * `ChatOptions.previousResponseId` automatically. Only ever set
+   * when the underlying provider surfaces `responseId` (OpenAI
+   * Responses API today).
+   */
+  lastResponseId?: string
 }
 export class Thread {
@@ -42,6 +50,13 @@ export class Thread {
   readonly messages: Message[] = []
   readonly system?: SystemPrompt
   readonly options?: ChatOptions
+  /**
+   * Last response id returned by the provider on this thread. Used to
+   * thread stateful-conversation hints (OpenAI Responses API) into
+   * the next `send(...)` so apps don't have to manage it manually.
+   * `undefined` for providers that don't surface a response id.
+   */
+  lastResponseId?: string
   private readonly brain: BrainManager
   constructor(brain: BrainManager, opts: ThreadOptions = {}) {
@@ -54,6 +69,11 @@ export class Thread {
    * Append a user turn, call the model, append the assistant reply,
    * and return the reply text. Per-call options override the
    * thread's defaults; `system` always comes from the thread.
+   *
+   * When the underlying provider supports stateful conversations
+   * (OpenAI Responses API), `previousResponseId` is auto-threaded
+   * from the prior turn — apps don't need to manage it. Per-call
+   * `options.previousResponseId` wins if supplied explicitly.
    */
   async send(text: string, options: ChatOptions = {}): Promise<string> {
     this.messages.push({ role: 'user', content: text })
@@ -65,8 +85,25 @@ export class Thread {
       // mid-thread by changing the system prompt every turn.
       ...(this.system !== undefined ? { system: this.system } : {}),
     }
+    if (
+      merged.previousResponseId === undefined &&
+      this.lastResponseId !== undefined
+    ) {
+      merged.previousResponseId = this.lastResponseId
+    }
     const result = await this.brain.chat(this.messages, merged)
-    this.messages.push({ role: 'assistant', content: result.text })
+    // Preserve structured assistant content when present (compaction
+    // blocks today; reasoning blocks later). Round-tripping these
+    // back to the provider on subsequent sends is what makes
+    // server-side compaction actually save tokens — once a turn
+    // carries a `compaction` block, the older raw turns drop out
+    // and the model only re-reads the summary.
+    if (result.content !== undefined && result.content.length > 0) {
+      this.messages.push({ role: 'assistant', content: result.content })
+    } else {
+      this.messages.push({ role: 'assistant', content: result.text })
+    }
+    if (result.responseId !== undefined) this.lastResponseId = result.responseId
     return result.text
   }
@@ -80,6 +117,7 @@ export class Thread {
     const state: ThreadState = { messages: [...this.messages] }
     if (this.system !== undefined) state.system = this.system
     if (this.options !== undefined) state.options = this.options
+    if (this.lastResponseId !== undefined) state.lastResponseId = this.lastResponseId
     return state
   }
@@ -94,6 +132,7 @@ export class Thread {
     if (state.options !== undefined) options.options = state.options
     const thread = new Thread(brain, options)
     for (const m of state.messages) thread.messages.push(m)
+    if (state.lastResponseId !== undefined) thread.lastResponseId = state.lastResponseId
     return thread
   }
 }

package/src/tool.ts CHANGED Viewed

@@ -23,6 +23,13 @@ export interface ToolContext {
   readonly callId: string
   /** Per-run free-form context bag passed by the caller. Optional. */
   readonly context: Readonly<Record<string, unknown>>
+  /**
+   * Cancellation signal forwarded from the run's `options.signal`.
+   * Tools that wrap network calls (HTTP fetches, MCP servers, child
+   * processes) should pass this through so cancellation actually
+   * unwinds in-flight work.
+   */
+  readonly signal?: AbortSignal
 }
 export interface Tool<TInput = unknown, TOutput = unknown> {

package/src/tool_runner.ts ADDED Viewed

@@ -0,0 +1,81 @@
+/**
+ * `runToolWithRecovery` — shared helper used by every provider's
+ * agentic loop to execute one tool call.
+ *
+ * Encapsulates two error paths and the optional `onToolError`
+ * recovery callback:
+ *
+ *   1. **Tool not registered** — the model called a name that
+ *      isn't in `toolMap`. Without recovery, throw
+ *      `ToolExecutionError`. With recovery, the callback's return
+ *      string becomes the `tool_result.content` (with `isError:
+ *      true`) and the loop continues — the model sees "unknown
+ *      tool" and adapts.
+ *
+ *   2. **`execute()` throws** — the tool's body raised. Same
+ *      pattern: either rethrow as `ToolExecutionError` or feed
+ *      back as an error result.
+ *
+ * The returned shape is the framework-agnostic `{ content, isError }`
+ * pair each provider then wraps into its own `tool_result` block
+ * shape (Anthropic `tool_result` with `is_error`; OpenAI tool-role
+ * message content; Gemini `functionResponse` with `{ error }`).
+ */
+import type { RunWithToolsOptions } from './provider.ts'
+import type { Tool, ToolContext } from './tool.ts'
+import { ToolExecutionError } from './tool_execution_error.ts'
+export interface ToolRunResult {
+  content: string
+  isError: boolean
+}
+export async function runToolWithRecovery(
+  tool: Tool | undefined,
+  toolName: string,
+  callId: string,
+  input: unknown,
+  options: RunWithToolsOptions,
+): Promise<ToolRunResult> {
+  if (!tool) {
+    return recoverOrThrow(
+      new ToolExecutionError(
+        toolName,
+        callId,
+        new Error(`Tool "${toolName}" is not registered.`),
+      ),
+      options,
+    )
+  }
+  const ctx: ToolContext = {
+    callId,
+    context: options.context ?? {},
+    ...(options.signal !== undefined ? { signal: options.signal } : {}),
+  }
+  let output: unknown
+  try {
+    output = await tool.execute(input, ctx)
+  } catch (cause) {
+    return recoverOrThrow(new ToolExecutionError(toolName, callId, cause), options)
+  }
+  return {
+    content: typeof output === 'string' ? output : JSON.stringify(output),
+    isError: false,
+  }
+}
+/**
+ * Resolve a `ToolExecutionError` through the `onToolError` callback
+ * (when set) or rethrow. Used by providers for failures that happen
+ * outside `tool.execute` — e.g., OpenAI's JSON-parse-arguments path.
+ */
+export function recoverOrThrow(
+  error: ToolExecutionError,
+  options: RunWithToolsOptions,
+): ToolRunResult {
+  const recovered = options.onToolError?.(error)
+  if (typeof recovered !== 'string') throw error
+  return { content: recovered, isError: true }
+}