npm - @strav/brain - Versions diffs - 1.0.0-alpha.17 → 1.0.0-alpha.19 - Mend

@strav/brain 1.0.0-alpha.17 → 1.0.0-alpha.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/package.json +4 -2
package/src/agent_generate_result.ts +2 -0
package/src/agent_result.ts +7 -0
package/src/agent_runner.ts +80 -4
package/src/brain_manager.ts +119 -2
package/src/index.ts +20 -2
package/src/mcp/client.ts +17 -0
package/src/mcp/index.ts +1 -0
package/src/mcp/pool.ts +106 -0
package/src/mcp/resolve_mcp_tools.ts +25 -7
package/src/persistence/brain_message.ts +34 -0
package/src/persistence/brain_message_repository.ts +106 -0
package/src/persistence/brain_store.ts +166 -0
package/src/persistence/brain_suspended_run.ts +30 -0
package/src/persistence/brain_suspended_run_repository.ts +68 -0
package/src/persistence/brain_thread.ts +30 -0
package/src/persistence/brain_thread_repository.ts +65 -0
package/src/persistence/database_brain_store.ts +190 -0
package/src/persistence/index.ts +48 -0
package/src/persistence/schema/brain_message_schema.ts +61 -0
package/src/persistence/schema/brain_suspended_run_schema.ts +58 -0
package/src/persistence/schema/brain_thread_schema.ts +50 -0
package/src/persistence/schema/index.ts +3 -0
package/src/provider.ts +36 -1
package/src/providers/anthropic_provider.ts +140 -23
package/src/providers/gemini_provider.ts +55 -32
package/src/providers/openai_compat_provider.ts +452 -23
package/src/providers/openai_provider.ts +87 -32
package/src/providers/openai_responses_provider.ts +365 -50
package/src/suspended_run.ts +153 -0
package/src/thread.ts +40 -1
package/src/types.ts +110 -0

package/src/thread.ts CHANGED Viewed

@@ -35,6 +35,14 @@ export interface ThreadState {
   messages: Message[]
   system?: SystemPrompt
   options?: ChatOptions
+  /**
+   * Last provider response id captured by `send(...)` — restored on
+   * `fromJSON` so subsequent sends thread it via
+   * `ChatOptions.previousResponseId` automatically. Only ever set
+   * when the underlying provider surfaces `responseId` (OpenAI
+   * Responses API today).
+   */
+  lastResponseId?: string
 }
 export class Thread {
@@ -42,6 +50,13 @@ export class Thread {
   readonly messages: Message[] = []
   readonly system?: SystemPrompt
   readonly options?: ChatOptions
+  /**
+   * Last response id returned by the provider on this thread. Used to
+   * thread stateful-conversation hints (OpenAI Responses API) into
+   * the next `send(...)` so apps don't have to manage it manually.
+   * `undefined` for providers that don't surface a response id.
+   */
+  lastResponseId?: string
   private readonly brain: BrainManager
   constructor(brain: BrainManager, opts: ThreadOptions = {}) {
@@ -54,6 +69,11 @@ export class Thread {
    * Append a user turn, call the model, append the assistant reply,
    * and return the reply text. Per-call options override the
    * thread's defaults; `system` always comes from the thread.
+   *
+   * When the underlying provider supports stateful conversations
+   * (OpenAI Responses API), `previousResponseId` is auto-threaded
+   * from the prior turn — apps don't need to manage it. Per-call
+   * `options.previousResponseId` wins if supplied explicitly.
    */
   async send(text: string, options: ChatOptions = {}): Promise<string> {
     this.messages.push({ role: 'user', content: text })
@@ -65,8 +85,25 @@ export class Thread {
       // mid-thread by changing the system prompt every turn.
       ...(this.system !== undefined ? { system: this.system } : {}),
     }
+    if (
+      merged.previousResponseId === undefined &&
+      this.lastResponseId !== undefined
+    ) {
+      merged.previousResponseId = this.lastResponseId
+    }
     const result = await this.brain.chat(this.messages, merged)
-    this.messages.push({ role: 'assistant', content: result.text })
+    // Preserve structured assistant content when present (compaction
+    // blocks today; reasoning blocks later). Round-tripping these
+    // back to the provider on subsequent sends is what makes
+    // server-side compaction actually save tokens — once a turn
+    // carries a `compaction` block, the older raw turns drop out
+    // and the model only re-reads the summary.
+    if (result.content !== undefined && result.content.length > 0) {
+      this.messages.push({ role: 'assistant', content: result.content })
+    } else {
+      this.messages.push({ role: 'assistant', content: result.text })
+    }
+    if (result.responseId !== undefined) this.lastResponseId = result.responseId
     return result.text
   }
@@ -80,6 +117,7 @@ export class Thread {
     const state: ThreadState = { messages: [...this.messages] }
     if (this.system !== undefined) state.system = this.system
     if (this.options !== undefined) state.options = this.options
+    if (this.lastResponseId !== undefined) state.lastResponseId = this.lastResponseId
     return state
   }
@@ -94,6 +132,7 @@ export class Thread {
     if (state.options !== undefined) options.options = state.options
     const thread = new Thread(brain, options)
     for (const m of state.messages) thread.messages.push(m)
+    if (state.lastResponseId !== undefined) thread.lastResponseId = state.lastResponseId
     return thread
   }
 }

package/src/types.ts CHANGED Viewed

@@ -173,6 +173,35 @@ export interface AudioBlock {
     | { type: 'url'; url: string }
 }
+/**
+ * Server-side compaction block. Anthropic's `compact-2026-01-12`
+ * beta returns a `compaction` block when an auto-compaction trigger
+ * fires during a request. The framework surfaces it on
+ * `result.content` and Thread persists it on the assistant turn so
+ * subsequent requests echo it back verbatim — the model only sees
+ * the summary + opaque blob from then on, and the older raw turns
+ * stay out of context.
+ *
+ * V1 produces these on Anthropic only. Other providers ignore the
+ * `compact` option silently, and never emit a `CompactionBlock`.
+ *
+ * Round-trip invariant: pass the block back unchanged. The
+ * `encryptedContent` blob is opaque metadata the server uses to
+ * stitch the compaction history together; the framework never
+ * mutates it.
+ *
+ * `content === null` means a compaction attempt failed (e.g.,
+ * malformed model output). The server treats these as no-ops on
+ * the next request, so apps don't need to special-case them.
+ */
+export interface CompactionBlock {
+  type: 'compaction'
+  /** Summary of compacted content. Null when compaction failed. */
+  content: string | null
+  /** Opaque metadata round-tripped verbatim on subsequent requests. */
+  encryptedContent: string | null
+}
 export type ContentBlock =
   | TextBlock
   | ImageBlock
@@ -182,6 +211,7 @@ export type ContentBlock =
   | ToolResultBlock
   | MCPToolUseBlock
   | MCPToolResultBlock
+  | CompactionBlock
 /** A single conversation turn. `content` can be a bare string or a typed block list. */
 export interface Message {
@@ -254,6 +284,36 @@ export type ServerTool =
       /** Gemini fetches the URL and surfaces grounded answers from it. */
     }
+/**
+ * Per-call compaction configuration. Maps to Anthropic's
+ * `compact-2026-01-12` beta `edits[]` entry. All fields optional —
+ * omitting one falls back to the server's default (trigger:
+ * 150,000 input tokens; no extra instructions; no pause).
+ */
+export interface CompactConfig {
+  /**
+   * Trigger threshold in input tokens. Compaction fires once the
+   * conversation crosses this token count. Default 150,000 — same
+   * as the server-side default.
+   */
+  trigger?: number
+  /**
+   * Extra hint to the summarization model. Useful for biasing the
+   * compaction toward what your app actually cares to preserve
+   * ("keep all customer ids referenced", "preserve every diff
+   * hunk", ...).
+   */
+  instructions?: string
+  /**
+   * When `true`, the server returns the compaction block in-line
+   * but does NOT continue generation — the next assistant turn
+   * waits for an explicit re-prompt. Apps that want to inspect or
+   * gate compaction set this; default `false` (compaction is
+   * transparent).
+   */
+  pauseAfterCompaction?: boolean
+}
 export interface ChatOptions {
   /** Override the configured default model. Wins over `tier`. */
   model?: string
@@ -308,6 +368,36 @@ export interface ChatOptions {
    * route to Anthropic / Gemini).
    */
   serverTools?: readonly ServerTool[]
+  /**
+   * Server-side conversation compaction. When set, the provider
+   * auto-summarizes the older part of the message history once the
+   * `trigger` token threshold is reached; the summary lives on the
+   * response as a `CompactionBlock` that apps round-trip on
+   * subsequent requests (Thread does this automatically). Saves
+   * tokens on long threads without lossy client-side pruning.
+   *
+   * Only honored by `AnthropicProvider` (driver `'anthropic'`),
+   * via the `compact-2026-01-12` beta. Silently ignored by every
+   * other provider so apps targeting multiple providers with the
+   * same options object don't have to special-case.
+   */
+  compact?: CompactConfig
+  /**
+   * Stateful conversation pointer — OpenAI Responses API. When set,
+   * the provider sends only the new turn(s); the server picks up
+   * from the prior `Response` identified by this id and replays
+   * the conversation server-side. Saves tokens on long threads.
+   *
+   * Only honored by `OpenAIResponsesProvider` (driver
+   * `'openai-responses'`); silently ignored by every other provider
+   * — apps that target multiple providers with the same options
+   * object don't have to special-case.
+   *
+   * Pair with `ChatResult.responseId` (returned by every call) to
+   * thread the conversation forward. `Thread` does this
+   * automatically when its underlying provider supports it.
+   */
+  previousResponseId?: string
 }
 /** Token usage for a single call. Cache-hit fields are populated when caching is in play. */
@@ -330,6 +420,24 @@ export interface ChatResult<Raw = unknown> {
   stopReason: string | null
   usage: ChatUsage
   raw: Raw
+  /**
+   * Structured assistant content blocks — populated when the model
+   * emitted more than plain text on this turn (compaction blocks
+   * today; reasoning blocks once those surface). Apps that
+   * persist the conversation (`Thread`, custom stores) push this
+   * onto the message history when present so round-trippable
+   * blocks survive subsequent requests. Undefined when the turn
+   * was plain text only.
+   */
+  content?: ContentBlock[]
+  /**
+   * Provider response id when the provider exposes stateful
+   * conversations (currently OpenAI Responses API). Apps thread
+   * this forward via `ChatOptions.previousResponseId` so the
+   * server replays prior turns without re-sending them.
+   * Undefined for providers that don't support the pattern.
+   */
+  responseId?: string
 }
 /**
@@ -447,4 +555,6 @@ export interface GenerateResult<T = unknown, Raw = unknown> {
   stopReason: string | null
   usage: ChatUsage
   raw: Raw
+  /** See `ChatResult.responseId`. */
+  responseId?: string
 }